From 099f53cb50e45ef617a9f1d63ceec799e489418b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 8 Apr 2009 14:28:37 -0700
Subject: async_tx: rename zero_sum to val

'zero_sum' does not properly describe the operation of generating parity
and checking that it validates against an existing buffer.  Change the
name of the operation to 'val' (for 'validate').  This is in
anticipation of the p+q case where it is a requirement to identify the
target parity buffers separately from the source buffers, because the
target parity buffers will not have corresponding pq coefficients.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/async_tx.h  | 2 +-
 include/linux/dmaengine.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 5fc2ef8d97fa..513150d8c25b 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -117,7 +117,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	dma_async_tx_callback cb_fn, void *cb_fn_param);
 
 struct dma_async_tx_descriptor *
-async_xor_zero_sum(struct page *dest, struct page **src_list,
+async_xor_val(struct page *dest, struct page **src_list,
 	unsigned int offset, int src_cnt, size_t len,
 	u32 *result, enum async_tx_flags flags,
 	struct dma_async_tx_descriptor *depend_tx,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 2e2aa3df170c..6768727d00d7 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -55,8 +55,8 @@ enum dma_transaction_type {
 	DMA_PQ_XOR,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
-	DMA_ZERO_SUM,
-	DMA_PQ_ZERO_SUM,
+	DMA_XOR_VAL,
+	DMA_PQ_VAL,
 	DMA_MEMSET,
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
@@ -214,7 +214,7 @@ struct dma_async_tx_descriptor {
  * @device_free_chan_resources: release DMA channel's resources
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
- * @device_prep_dma_zero_sum: prepares a zero_sum operation
+ * @device_prep_dma_xor_val: prepares a xor validation operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -243,7 +243,7 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		unsigned int src_cnt, size_t len, unsigned long flags);
-	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
+	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, u32 *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
-- 
cgit v1.2.3


From 88ba2aa586c874681c072101287e15d40de7e6e2 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 9 Apr 2009 16:16:18 -0700
Subject: async_tx: kill ASYNC_TX_DEP_ACK flag

In support of inter-channel chaining async_tx utilizes an ack flag to
gate whether a dependent operation can be chained to another.  While the
flag is not set the chain can be considered open for appending.  Setting
the ack flag closes the chain and flags the descriptor for garbage
collection.  The ASYNC_TX_DEP_ACK flag essentially means "close the
chain after adding this dependency".  Since each operation can only have
one child the api now implicitly sets the ack flag at dependency
submission time.  This removes an unnecessary management burden from
clients of the api.

[ Impact: clean up and enforce one dependency per operation ]

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |  9 ++++-----
 crypto/async_tx/async_memcpy.c        |  2 +-
 crypto/async_tx/async_memset.c        |  2 +-
 crypto/async_tx/async_tx.c            |  4 ++--
 crypto/async_tx/async_xor.c           |  6 ++----
 drivers/md/raid5.c                    | 25 +++++++++++--------------
 include/linux/async_tx.h              |  4 +---
 7 files changed, 22 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 4af12180d191..76feda8541dc 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -80,8 +80,8 @@ acknowledged by the application before the offload engine driver is allowed to
 recycle (or free) the descriptor.  A descriptor can be acked by one of the
 following methods:
 1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
-2/ setting the ASYNC_TX_DEP_ACK flag to acknowledge the parent
-   descriptor of a new operation.
+2/ submitting an unacknowledged descriptor as a dependency to another
+   async_tx call will implicitly set the acknowledged state.
 3/ calling async_tx_ack() on the descriptor.
 
 3.4 When does the operation execute?
@@ -136,10 +136,9 @@ int run_xor_copy_xor(struct page **xor_srcs,
 
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
 		       ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL);
-	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len,
-			  ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+	tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, tx, NULL, NULL);
 	tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len,
-		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_DEP_ACK | ASYNC_TX_ACK,
+		       ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK,
 		       tx, complete_xor_copy_xor, NULL);
 
 	async_tx_issue_pending_all();
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index ddccfb01c416..7117ec6f1b74 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -35,7 +35,7 @@
  * @src: src page
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memcpy depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 5b5eb99bb244..b2f133885b7f 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,7 +35,7 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: memset depends on the result of this transaction
  * @cb_fn: function to call when the memcpy completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 06eb6cc09fef..3766bc3d7d89 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -223,7 +223,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 	if (flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
-	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
+	if (depend_tx)
 		async_tx_ack(depend_tx);
 }
 EXPORT_SYMBOL_GPL(async_tx_submit);
@@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(async_tx_submit);
 /**
  * async_trigger_callback - schedules the callback function to be run after
  * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: 'callback' requires the completion of this transaction
  * @cb_fn: function to call after depend_tx completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index e0580b0ea533..3cc5dc763b54 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -105,7 +105,6 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 				_cb_param);
 
 		depend_tx = tx;
-		flags |= ASYNC_TX_DEP_ACK;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -168,8 +167,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
- *	ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
@@ -230,7 +228,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
+ * @flags: ASYNC_TX_ACK
  * @depend_tx: xor depends on the result of this transaction.
  * @cb_fn: function to call when the xor completes
  * @cb_param: parameter to pass to the callback routine
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f8d2d35ed298..0ef5362c8d02 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -525,14 +525,12 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-					b_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  b_offset, clen, 0,
+						  tx, NULL, NULL);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-					page_offset, clen,
-					ASYNC_TX_DEP_ACK,
-					tx, NULL, NULL);
+						  page_offset, clen, 0,
+						  tx, NULL, NULL);
 		}
 		if (clen < len) /* hit end of page */
 			break;
@@ -615,8 +613,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
-		ops_complete_biofill, sh);
+	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -701,8 +698,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	}
 
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
-		ops_complete_prexor, sh);
+		       ASYNC_TX_XOR_DROP_DST, tx,
+		       ops_complete_prexor, sh);
 
 	return tx;
 }
@@ -809,7 +806,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
 	 * for the synchronous xor case
 	 */
-	flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
+	flags = ASYNC_TX_ACK |
 		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
 
 	atomic_inc(&sh->count);
@@ -858,7 +855,7 @@ static void ops_run_check(struct stripe_head *sh)
 		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
+	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
 		ops_complete_check, sh);
 }
 
@@ -2687,8 +2684,8 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 
 			/* place all the copies on one channel */
 			tx = async_memcpy(sh2->dev[dd_idx].page,
-				sh->dev[i].page, 0, 0, STRIPE_SIZE,
-				ASYNC_TX_DEP_ACK, tx, NULL, NULL);
+					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
+					  0, tx, NULL, NULL);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 513150d8c25b..9f14cd540cd2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,13 +58,11 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
- * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
-	ASYNC_TX_ACK		 = (1 << 3),
-	ASYNC_TX_DEP_ACK	 = (1 << 4),
+	ASYNC_TX_ACK		 = (1 << 2),
 };
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From a08abd8ca890a377521d65d493d174bebcaf694b Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 3 Jun 2009 11:43:59 -0700
Subject: async_tx: structify submission arguments, add scribble

Prepare the api for the arrival of a new parameter, 'scribble'.  This
will allow callers to identify scratchpad memory for dma address or page
address conversions.  As this adds yet another parameter, take this
opportunity to convert the common submission parameters (flags,
dependency, callback, and callback argument) into an object that is
passed by reference.

Also, take this opportunity to fix up the kerneldoc and add notes about
the relevant ASYNC_TX_* flags for each routine.

[ Impact: moves api pass-by-value parameters to a pass-by-reference struct ]

Signed-off-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   6 +-
 crypto/async_tx/async_memcpy.c        |  26 +++----
 crypto/async_tx/async_memset.c        |  25 +++----
 crypto/async_tx/async_tx.c            |  51 +++++++-------
 crypto/async_tx/async_xor.c           | 123 +++++++++++++++++-----------------
 drivers/md/raid5.c                    |  59 +++++++++-------
 include/linux/async_tx.h              |  84 ++++++++++++++---------
 7 files changed, 200 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 76feda8541dc..dfe0475f7919 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -54,11 +54,7 @@ features surfaced as a result:
 
 3.1 General format of the API:
 struct dma_async_tx_descriptor *
-async_<operation>(<op specific parameters>,
-		  enum async_tx_flags flags,
-        	  struct dma_async_tx_descriptor *dependency,
-        	  dma_async_tx_callback callback_routine,
-		  void *callback_parameter);
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
 
 3.2 Supported operations:
 memcpy  - memory copy between a source and a destination buffer
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 7117ec6f1b74..89e05556f3df 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -33,28 +33,28 @@
  * async_memcpy - attempt to copy memory with a dma engine.
  * @dest: destination page
  * @src: src page
- * @offset: offset in pages to start transaction
+ * @dest_offset: offset into 'dest' to start transaction
+ * @src_offset: offset into 'src' to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memcpy depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMCPY,
 						      &dest, 1, &src, 1, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
@@ -67,13 +67,13 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		void *dest_buf, *src_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
 		src_buf = kmap_atomic(src, KM_USER1) + src_offset;
@@ -83,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 		kunmap_atomic(dest_buf, KM_USER0);
 		kunmap_atomic(src_buf, KM_USER1);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b2f133885b7f..c14437238f4c 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -35,26 +35,23 @@
  * @val: fill value
  * @offset: offset in pages to start transaction
  * @len: length in bytes
- * @flags: ASYNC_TX_ACK
- * @depend_tx: memset depends on the result of this transaction
- * @cb_fn: function to call when the memcpy completes
- * @cb_param: parameter to pass to the callback routine
+ *
+ * honored flags: ASYNC_TX_ACK
  */
 struct dma_async_tx_descriptor *
-async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_memset(struct page *dest, int val, unsigned int offset, size_t len,
+	     struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_MEMSET,
 						      &dest, 1, NULL, 0, len);
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
@@ -64,19 +61,19 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 	if (tx) {
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else { /* run the memset synchronously */
 		void *dest_buf;
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		dest_buf = (void *) (((char *) page_address(dest)) + offset);
+		dest_buf = page_address(dest) + offset;
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
 		memset(dest_buf, val, len);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 3766bc3d7d89..802a5ce437d9 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -45,13 +45,15 @@ static void __exit async_tx_exit(void)
 /**
  * __async_tx_find_channel - find a channel to carry out the operation or let
  *	the transaction execute synchronously
- * @depend_tx: transaction dependency
+ * @submit: transaction dependency and submission modifiers
  * @tx_type: transaction type
  */
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type)
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type)
 {
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
 	/* see if we can keep the chain on one channel */
 	if (depend_tx &&
 	    dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
@@ -144,13 +146,14 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 
 
 /**
- * submit_disposition - while holding depend_tx->lock we must avoid submitting
- * 	new operations to prevent a circular locking dependency with
- * 	drivers that already hold a channel lock when calling
- * 	async_tx_run_dependencies.
+ * submit_disposition - flags for routing an incoming operation
  * @ASYNC_TX_SUBMITTED: we were able to append the new operation under the lock
  * @ASYNC_TX_CHANNEL_SWITCH: when the lock is dropped schedule a channel switch
  * @ASYNC_TX_DIRECT_SUBMIT: when the lock is dropped submit directly
+ *
+ * while holding depend_tx->lock we must avoid submitting new operations
+ * to prevent a circular locking dependency with drivers that already
+ * hold a channel lock when calling async_tx_run_dependencies.
  */
 enum submit_disposition {
 	ASYNC_TX_SUBMITTED,
@@ -160,11 +163,12 @@ enum submit_disposition {
 
 void
 async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+		struct async_submit_ctl *submit)
 {
-	tx->callback = cb_fn;
-	tx->callback_param = cb_param;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
+
+	tx->callback = submit->cb_fn;
+	tx->callback_param = submit->cb_param;
 
 	if (depend_tx) {
 		enum submit_disposition s;
@@ -220,7 +224,7 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 		tx->tx_submit(tx);
 	}
 
-	if (flags & ASYNC_TX_ACK)
+	if (submit->flags & ASYNC_TX_ACK)
 		async_tx_ack(tx);
 
 	if (depend_tx)
@@ -229,21 +233,20 @@ async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
 EXPORT_SYMBOL_GPL(async_tx_submit);
 
 /**
- * async_trigger_callback - schedules the callback function to be run after
- * any dependent operations have been completed.
- * @flags: ASYNC_TX_ACK
- * @depend_tx: 'callback' requires the completion of this transaction
- * @cb_fn: function to call after depend_tx completes
- * @cb_param: parameter to pass to the callback routine
+ * async_trigger_callback - schedules the callback function to be run
+ * @submit: submission and completion parameters
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * The callback is run after any dependent operations have completed.
  */
 struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_trigger_callback(struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan;
 	struct dma_device *device;
 	struct dma_async_tx_descriptor *tx;
+	struct dma_async_tx_descriptor *depend_tx = submit->depend_tx;
 
 	if (depend_tx) {
 		chan = depend_tx->chan;
@@ -262,14 +265,14 @@ async_trigger_callback(enum async_tx_flags flags,
 	if (tx) {
 		pr_debug("%s: (async)\n", __func__);
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
 		pr_debug("%s: (sync)\n", __func__);
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
 	}
 
 	return tx;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 3cc5dc763b54..691fa98a18c4 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -34,18 +34,16 @@
 static __async_inline struct dma_async_tx_descriptor *
 do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	     unsigned int offset, int src_cnt, size_t len,
-	     enum async_tx_flags flags,
-	     struct dma_async_tx_descriptor *depend_tx,
-	     dma_async_tx_callback cb_fn, void *cb_param)
+	     struct async_submit_ctl *submit)
 {
 	struct dma_device *dma = chan->device;
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
 	struct dma_async_tx_descriptor *tx = NULL;
 	int src_off = 0;
 	int i;
-	dma_async_tx_callback _cb_fn;
-	void *_cb_param;
-	enum async_tx_flags async_flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	void *cb_param_orig = submit->cb_param;
+	enum async_tx_flags flags_orig = submit->flags;
 	enum dma_ctrl_flags dma_flags;
 	int xor_src_cnt;
 	dma_addr_t dma_dest;
@@ -63,7 +61,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	}
 
 	while (src_cnt) {
-		async_flags = flags;
+		submit->flags = flags_orig;
 		dma_flags = 0;
 		xor_src_cnt = min(src_cnt, dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
@@ -71,15 +69,15 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 * buffer mapped
 		 */
 		if (src_cnt > xor_src_cnt) {
-			async_flags &= ~ASYNC_TX_ACK;
+			submit->flags &= ~ASYNC_TX_ACK;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
-			_cb_fn = NULL;
-			_cb_param = NULL;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
 		} else {
-			_cb_fn = cb_fn;
-			_cb_param = cb_param;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
 		}
-		if (_cb_fn)
+		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
 
 		/* Since we have clobbered the src_list we are committed
@@ -90,7 +88,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 					      xor_src_cnt, len, dma_flags);
 
 		if (unlikely(!tx))
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 		/* spin wait for the preceeding transactions to complete */
 		while (unlikely(!tx)) {
@@ -101,10 +99,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 						      dma_flags);
 		}
 
-		async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn,
-				_cb_param);
-
-		depend_tx = tx;
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
 
 		if (src_cnt > xor_src_cnt) {
 			/* drop completed sources */
@@ -123,8 +119,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 
 static void
 do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	    int src_cnt, size_t len, enum async_tx_flags flags,
-	    dma_async_tx_callback cb_fn, void *cb_param)
+	    int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
 	int i;
 	int xor_src_cnt;
@@ -139,7 +134,7 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	/* set destination address */
 	dest_buf = page_address(dest) + offset;
 
-	if (flags & ASYNC_TX_XOR_ZERO_DST)
+	if (submit->flags & ASYNC_TX_XOR_ZERO_DST)
 		memset(dest_buf, 0, len);
 
 	while (src_cnt > 0) {
@@ -152,33 +147,35 @@ do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		src_off += xor_src_cnt;
 	}
 
-	async_tx_sync_epilog(cb_fn, cb_param);
+	async_tx_sync_epilog(submit);
 }
 
 /**
  * async_xor - attempt to xor a set of blocks with a dma engine.
- *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
- *	flag must be set to not include dest data in the calculation.  The
- *	assumption with dma eninges is that they only use the destination
- *	buffer as a source when it is explicity specified in the source list.
  * @dest: destination page
- * @src_list: array of source pages (if the dest is also a source it must be
- *	at index zero).  The contents of this array may be overwritten.
- * @offset: offset in pages to start transaction
+ * @src_list: array of source pages
+ * @offset: common src/dst offset to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
- * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK, ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DST
+ *
+ * xor_blocks always uses the dest as a source so the
+ * ASYNC_TX_XOR_ZERO_DST flag must be set to not include dest data in
+ * the calculation.  The assumption with dma eninges is that they only
+ * use the destination buffer as a source when it is explicity specified
+ * in the source list.
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+	  int src_cnt, size_t len, struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	BUG_ON(src_cnt <= 1);
@@ -188,7 +185,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
 		return do_async_xor(chan, dest, src_list, offset, src_cnt, len,
-				    flags, depend_tx, cb_fn, cb_param);
+				    submit);
 	} else {
 		/* run the xor synchronously */
 		pr_debug("%s (sync): len: %zu\n", __func__, len);
@@ -196,16 +193,15 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 		/* in the sync case the dest is an implied source
 		 * (assumes the dest is the first source)
 		 */
-		if (flags & ASYNC_TX_XOR_DROP_DST) {
+		if (submit->flags & ASYNC_TX_XOR_DROP_DST) {
 			src_cnt--;
 			src_list++;
 		}
 
 		/* wait for any prerequisite operations */
-		async_tx_quiesce(&depend_tx);
+		async_tx_quiesce(&submit->depend_tx);
 
-		do_sync_xor(dest, src_list, offset, src_cnt, len,
-			    flags, cb_fn, cb_param);
+		do_sync_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		return NULL;
 	}
@@ -222,25 +218,25 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
 /**
  * async_xor_val - attempt a xor parity check with a dma engine.
  * @dest: destination page used if the xor is performed synchronously
- * @src_list: array of source pages.  The dest page must be listed as a source
- * 	at index zero.  The contents of this array may be overwritten.
+ * @src_list: array of source pages
  * @offset: offset in pages to start transaction
  * @src_cnt: number of source pages
  * @len: length in bytes
  * @result: 0 if sum == 0 else non-zero
- * @flags: ASYNC_TX_ACK
- * @depend_tx: xor depends on the result of this transaction.
- * @cb_fn: function to call when the xor completes
- * @cb_param: parameter to pass to the callback routine
+ * @submit: submission / completion modifiers
+ *
+ * honored flags: ASYNC_TX_ACK
+ *
+ * src_list note: if the dest is also a source it must be at index zero.
+ * The contents of this array will be overwritten if a scribble region
+ * is not specified.
  */
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_param)
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit)
 {
-	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR_VAL,
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
 						      &dest, 1, src_list,
 						      src_cnt, len);
 	struct dma_device *device = chan ? chan->device : NULL;
@@ -250,11 +246,12 @@ async_xor_val(struct page *dest, struct page **src_list,
 
 	if (device && src_cnt <= device->max_xor) {
 		dma_addr_t *dma_src = (dma_addr_t *) src_list;
-		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
+		unsigned long dma_prep_flags;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
+		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
@@ -263,7 +260,7 @@ async_xor_val(struct page *dest, struct page **src_list,
 						     len, result,
 						     dma_prep_flags);
 		if (unlikely(!tx)) {
-			async_tx_quiesce(&depend_tx);
+			async_tx_quiesce(&submit->depend_tx);
 
 			while (!tx) {
 				dma_async_issue_pending(chan);
@@ -273,23 +270,23 @@ async_xor_val(struct page *dest, struct page **src_list,
 			}
 		}
 
-		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
+		async_tx_submit(chan, tx, submit);
 	} else {
-		unsigned long xor_flags = flags;
+		enum async_tx_flags flags_orig = submit->flags;
 
 		pr_debug("%s: (sync) len: %zu\n", __func__, len);
 
-		xor_flags |= ASYNC_TX_XOR_DROP_DST;
-		xor_flags &= ~ASYNC_TX_ACK;
+		submit->flags |= ASYNC_TX_XOR_DROP_DST;
+		submit->flags &= ~ASYNC_TX_ACK;
 
-		tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
-			depend_tx, NULL, NULL);
+		tx = async_xor(dest, src_list, offset, src_cnt, len, submit);
 
 		async_tx_quiesce(&tx);
 
 		*result = page_is_zero(dest, offset, len) ? 0 : 1;
 
-		async_tx_sync_epilog(cb_fn, cb_param);
+		async_tx_sync_epilog(submit);
+		submit->flags = flags_orig;
 	}
 
 	return tx;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0ef5362c8d02..e1920f23579f 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -499,11 +499,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	struct page *bio_page;
 	int i;
 	int page_offset;
+	struct async_submit_ctl submit;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
+
+	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -525,13 +528,14 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 			bio_page = bio_iovec_idx(bio, i)->bv_page;
 			if (frombio)
 				tx = async_memcpy(page, bio_page, page_offset,
-						  b_offset, clen, 0,
-						  tx, NULL, NULL);
+						  b_offset, clen, &submit);
 			else
 				tx = async_memcpy(bio_page, page, b_offset,
-						  page_offset, clen, 0,
-						  tx, NULL, NULL);
+						  page_offset, clen, &submit);
 		}
+		/* chain the operations */
+		submit.depend_tx = tx;
+
 		if (clen < len) /* hit end of page */
 			break;
 		page_offset +=  len;
@@ -590,6 +594,7 @@ static void ops_run_biofill(struct stripe_head *sh)
 {
 	struct dma_async_tx_descriptor *tx = NULL;
 	raid5_conf_t *conf = sh->raid_conf;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu\n", __func__,
@@ -613,7 +618,8 @@ static void ops_run_biofill(struct stripe_head *sh)
 	}
 
 	atomic_inc(&sh->count);
-	async_trigger_callback(ASYNC_TX_ACK, tx, ops_complete_biofill, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_biofill, sh, NULL);
+	async_trigger_callback(&submit);
 }
 
 static void ops_complete_compute5(void *stripe_head_ref)
@@ -645,6 +651,7 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 	struct page *xor_dest = tgt->page;
 	int count = 0;
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 	int i;
 
 	pr_debug("%s: stripe %llu block: %d\n",
@@ -657,13 +664,12 @@ static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh)
 
 	atomic_inc(&sh->count);
 
+	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+			  ops_complete_compute5, sh, NULL);
 	if (unlikely(count == 1))
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			0, NULL, ops_complete_compute5, sh);
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
 	else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			ASYNC_TX_XOR_ZERO_DST, NULL,
-			ops_complete_compute5, sh);
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -683,6 +689,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	int count = 0, pd_idx = sh->pd_idx, i;
+	struct async_submit_ctl submit;
 
 	/* existing parity data subtracted */
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -697,9 +704,9 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 			xor_srcs[count++] = dev->page;
 	}
 
-	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		       ASYNC_TX_XOR_DROP_DST, tx,
-		       ops_complete_prexor, sh);
+	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+			  ops_complete_prexor, sh, NULL);
+	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
 	return tx;
 }
@@ -772,7 +779,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 	/* kernel stack size limits the total number of disks */
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
-
+	struct async_submit_ctl submit;
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest;
 	int prexor = 0;
@@ -811,13 +818,11 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
 
 	atomic_inc(&sh->count);
 
-	if (unlikely(count == 1)) {
-		flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
-		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
-	} else
-		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-			flags, tx, ops_complete_postxor, sh);
+	init_async_submit(&submit, flags, tx, ops_complete_postxor, sh, NULL);
+	if (unlikely(count == 1))
+		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
+	else
+		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 }
 
 static void ops_complete_check(void *stripe_head_ref)
@@ -838,6 +843,7 @@ static void ops_run_check(struct stripe_head *sh)
 	int disks = sh->disks;
 	struct page *xor_srcs[disks];
 	struct dma_async_tx_descriptor *tx;
+	struct async_submit_ctl submit;
 
 	int count = 0, pd_idx = sh->pd_idx, i;
 	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
@@ -851,12 +857,13 @@ static void ops_run_check(struct stripe_head *sh)
 			xor_srcs[count++] = dev->page;
 	}
 
+	init_async_submit(&submit, 0, NULL, NULL, NULL, NULL);
 	tx = async_xor_val(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
-		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
+			   &sh->ops.zero_sum_result, &submit);
 
 	atomic_inc(&sh->count);
-	tx = async_trigger_callback(ASYNC_TX_ACK, tx,
-		ops_complete_check, sh);
+	init_async_submit(&submit, ASYNC_TX_ACK, tx, ops_complete_check, sh, NULL);
+	tx = async_trigger_callback(&submit);
 }
 
 static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request)
@@ -2664,6 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 		if (i != sh->pd_idx && i != sh->qd_idx) {
 			int dd_idx, j;
 			struct stripe_head *sh2;
+			struct async_submit_ctl submit;
 
 			sector_t bn = compute_blocknr(sh, i, 1);
 			sector_t s = raid5_compute_sector(conf, bn, 0,
@@ -2683,9 +2691,10 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
 			}
 
 			/* place all the copies on one channel */
+			init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
 			tx = async_memcpy(sh2->dev[dd_idx].page,
 					  sh->dev[i].page, 0, 0, STRIPE_SIZE,
-					  0, tx, NULL, NULL);
+					  &submit);
 
 			set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
 			set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 9f14cd540cd2..00cfb637ddf2 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -65,6 +65,22 @@ enum async_tx_flags {
 	ASYNC_TX_ACK		 = (1 << 2),
 };
 
+/**
+ * struct async_submit_ctl - async_tx submission/completion modifiers
+ * @flags: submission modifiers
+ * @depend_tx: parent dependency of the current operation being submitted
+ * @cb_fn: callback routine to run at operation completion
+ * @cb_param: parameter for the callback routine
+ * @scribble: caller provided space for dma/page address conversions
+ */
+struct async_submit_ctl {
+	enum async_tx_flags flags;
+	struct dma_async_tx_descriptor *depend_tx;
+	dma_async_tx_callback cb_fn;
+	void *cb_param;
+	void *scribble;
+};
+
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
@@ -73,8 +89,8 @@ enum async_tx_flags {
 #define async_tx_find_channel(dep, type, dst, dst_count, src, src_count, len) \
 	 __async_tx_find_channel(dep, type)
 struct dma_chan *
-__async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type);
+__async_tx_find_channel(struct async_submit_ctl *submit,
+			enum dma_transaction_type tx_type);
 #endif /* CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL */
 #else
 static inline void async_tx_issue_pending_all(void)
@@ -83,9 +99,10 @@ static inline void async_tx_issue_pending_all(void)
 }
 
 static inline struct dma_chan *
-async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
-	enum dma_transaction_type tx_type, struct page **dst, int dst_count,
-	struct page **src, int src_count, size_t len)
+async_tx_find_channel(struct async_submit_ctl *submit,
+		      enum dma_transaction_type tx_type, struct page **dst,
+		      int dst_count, struct page **src, int src_count,
+		      size_t len)
 {
 	return NULL;
 }
@@ -97,46 +114,53 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
  * @cb_fn_param: parameter to pass to the callback routine
  */
 static inline void
-async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param)
+async_tx_sync_epilog(struct async_submit_ctl *submit)
+{
+	if (submit->cb_fn)
+		submit->cb_fn(submit->cb_param);
+}
+
+typedef union {
+	unsigned long addr;
+	struct page *page;
+	dma_addr_t dma;
+} addr_conv_t;
+
+static inline void
+init_async_submit(struct async_submit_ctl *args, enum async_tx_flags flags,
+		  struct dma_async_tx_descriptor *tx,
+		  dma_async_tx_callback cb_fn, void *cb_param,
+		  addr_conv_t *scribble)
 {
-	if (cb_fn)
-		cb_fn(cb_fn_param);
+	args->flags = flags;
+	args->depend_tx = tx;
+	args->cb_fn = cb_fn;
+	args->cb_param = cb_param;
+	args->scribble = scribble;
 }
 
-void
-async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
-	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+void async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
+		     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_xor(struct page *dest, struct page **src_list, unsigned int offset,
-	int src_cnt, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	  int src_cnt, size_t len, struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
-async_xor_val(struct page *dest, struct page **src_list,
-	unsigned int offset, int src_cnt, size_t len,
-	u32 *result, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
+	      int src_cnt, size_t len, u32 *result,
+	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
-	unsigned int src_offset, size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     unsigned int src_offset, size_t len,
+	     struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
 async_memset(struct page *dest, int val, unsigned int offset,
-	size_t len, enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+	     size_t len, struct async_submit_ctl *submit);
 
-struct dma_async_tx_descriptor *
-async_trigger_callback(enum async_tx_flags flags,
-	struct dma_async_tx_descriptor *depend_tx,
-	dma_async_tx_callback cb_fn, void *cb_fn_param);
+struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From 627ad9fd0733f0a31a266ff98a4a933eee710f0b Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 20 Jun 2009 23:21:41 -0400
Subject: ext4: Fix type warning on 64-bit platforms in tracing events header

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index acf4cc9cd36d..b456fb0a3c57 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -34,7 +34,8 @@ TRACE_EVENT(ext4_free_inode,
 
 	TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
 		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
-		  __entry->uid, __entry->gid, __entry->blocks)
+		  __entry->uid, __entry->gid,
+		  (unsigned long long) __entry->blocks)
 );
 
 TRACE_EVENT(ext4_request_inode,
-- 
cgit v1.2.3


From 0dc8335aa3e59f1adbb0fce7c9c0b342146cd036 Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Thu, 7 May 2009 15:09:33 +0200
Subject: oprofile: remove irq_flags in struct op_entry

This became obsolete with this commit:

 304cc6a ring_buffer: remove unused flags parameter, fix

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 include/linux/oprofile.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index 1d9518bc4c58..dbbe2dbc4418 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -171,7 +171,6 @@ struct op_sample;
 struct op_entry {
 	struct ring_buffer_event *event;
 	struct op_sample *sample;
-	unsigned long irq_flags;
 	unsigned long size;
 	unsigned long *data;
 };
-- 
cgit v1.2.3


From 51563a0e5650d0d76539625388d72d62b34c726e Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 3 Jun 2009 20:54:56 +0200
Subject: x86/oprofile: introduce oprofile_add_data64()

The IBS implemention writes 64 bit register values to the cpu buffer
by writing two 32 values using oprofile_add_data(). This patch
introduces oprofile_add_data64() to write a single 64 bit value to the
buffer.

Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_amd.c | 27 +++++++++------------------
 drivers/oprofile/cpu_buffer.c    | 15 +++++++++++++++
 include/linux/oprofile.h         |  1 +
 3 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index 6493ef7ae9ad..cc930467575d 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -140,13 +140,10 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
 			oprofile_write_reserve(&entry, regs, val,
 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
-			oprofile_add_data(&entry, (u32)ctl);
-			oprofile_add_data(&entry, (u32)(ctl >> 32));
+			oprofile_add_data64(&entry, val);
+			oprofile_add_data64(&entry, ctl);
 			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
@@ -162,23 +159,17 @@ op_amd_handle_ibs(struct pt_regs * const regs,
 			rdmsrl(MSR_AMD64_IBSOPRIP, val);
 			oprofile_write_reserve(&entry, regs, val,
 					       IBS_OP_CODE, IBS_OP_SIZE);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
-			oprofile_add_data(&entry, (u32)val);
-			oprofile_add_data(&entry, (u32)(val >> 32));
+			oprofile_add_data64(&entry, val);
 			oprofile_write_commit(&entry);
 
 			/* reenable the IRQ */
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 50640cc5eef2..a7aae24f2889 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -406,6 +406,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
 	return op_cpu_buffer_add_data(entry, val);
 }
 
+int oprofile_add_data64(struct op_entry *entry, u64 val)
+{
+	if (!entry->event)
+		return 0;
+	if (op_cpu_buffer_get_size(entry) < 2)
+		/*
+		 * the function returns 0 to indicate a too small
+		 * buffer, even if there is some space left
+		 */
+		return 0;
+	if (!op_cpu_buffer_add_data(entry, (u32)val))
+		return 0;
+	return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
+}
+
 int oprofile_write_commit(struct op_entry *entry)
 {
 	if (!entry->event)
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index dbbe2dbc4418..d68d2ed94f15 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -179,6 +179,7 @@ void oprofile_write_reserve(struct op_entry *entry,
 			    struct pt_regs * const regs,
 			    unsigned long pc, int code, int size);
 int oprofile_add_data(struct op_entry *entry, unsigned long val);
+int oprofile_add_data64(struct op_entry *entry, u64 val);
 int oprofile_write_commit(struct op_entry *entry);
 
 #endif /* OPROFILE_H */
-- 
cgit v1.2.3


From a635f9dd83f3382577f4544a96df12356e951a40 Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 12 Jun 2009 15:20:55 +0200
Subject: HID: use debugfs for report dumping descriptor

It is a little bit inconvenient for people who have some non-standard
HID hardware (usually violating the HID specification) to have to
recompile kernel with CONFIG_HID_DEBUG to be able to see kernel's perspective
of the HID report descriptor and observe the parsed events. Plus the messages
are then mixed up inconveniently with the rest of the dmesg stuff.

This patch implements /sys/kernel/debug/hid/<device>/rdesc file, which
represents the kernel's view of report descriptor (both the raw report
descriptor data and parsed contents).

With all the device-specific debug data being available through debugfs, there
is no need for keeping CONFIG_HID_DEBUG, as the 'debug' parameter to the
hid module will now only output only driver-specific debugging options, which has
absolutely minimal memory footprint, just a few error messages and one global
flag (hid_debug).

We use the current set of output formatting functions. The ones that need to be
used both for one-shot rdesc seq_file and also for continuous flow of data
(individual reports, as being sent by the device) distinguish according to the
passed seq_file parameter, and if it is NULL, it still output to kernel ringbuffer,
otherwise the corresponding seq_file is used for output.

The format of the output is preserved.

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig           |  15 ---
 drivers/hid/Makefile          |   5 +-
 drivers/hid/hid-core.c        |  13 ++-
 drivers/hid/hid-debug.c       | 227 +++++++++++++++++++++++++++++-------------
 drivers/hid/hid-input.c       |  13 +--
 drivers/hid/usbhid/hid-core.c |   8 +-
 include/linux/hid-debug.h     |  32 +++---
 include/linux/hid.h           |   4 +
 8 files changed, 195 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index 7e67dcb3d4f6..05950a783560 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -31,21 +31,6 @@ config HID
 
 	  If unsure, say Y.
 
-config HID_DEBUG
-	bool "HID debugging support"
-	default y
-	depends on HID
-	---help---
-	This option lets the HID layer output diagnostics about its internal
-	state, resolve HID usages, dump HID fields, etc. Individual HID drivers
-	use this debugging facility to output information about individual HID
-	devices, etc.
-
-	This feature is useful for those who are either debugging the HID parser
-	or any HID hardware device.
-
-	If unsure, say Y.
-
 config HIDRAW
 	bool "/dev/hidraw raw HID device support"
 	depends on HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index 1f7cb0fd4505..cf3687d1ed63 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -3,9 +3,12 @@
 #
 hid-objs			:= hid-core.o hid-input.o
 
+ifdef CONFIG_DEBUG_FS
+	hid-objs		+= hid-debug.o
+endif
+
 obj-$(CONFIG_HID)		+= hid.o
 
-hid-$(CONFIG_HID_DEBUG)		+= hid-debug.o
 hid-$(CONFIG_HIDRAW)		+= hidraw.o
 
 hid-logitech-objs		:= hid-lg.o
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8551693d645f..d4317db85b54 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -44,12 +44,10 @@
 #define DRIVER_DESC "HID core driver"
 #define DRIVER_LICENSE "GPL"
 
-#ifdef CONFIG_HID_DEBUG
 int hid_debug = 0;
 module_param_named(debug, hid_debug, int, 0600);
 MODULE_PARM_DESC(debug, "HID debugging (0=off, 1=probing info, 2=continuous data dumping)");
 EXPORT_SYMBOL_GPL(hid_debug);
-#endif
 
 /*
  * Register a new report for a device.
@@ -987,7 +985,6 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
 
 	if (offset >= field->report_count) {
 		dbg_hid("offset (%d) exceeds report_count (%d)\n", offset, field->report_count);
-		hid_dump_field(field, 8);
 		return -1;
 	}
 	if (field->logical_minimum < 0) {
@@ -1721,6 +1718,8 @@ int hid_add_device(struct hid_device *hdev)
 	if (!ret)
 		hdev->status |= HID_STAT_ADDED;
 
+	hid_debug_register(hdev, dev_name(&hdev->dev));
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(hid_add_device);
@@ -1768,6 +1767,7 @@ static void hid_remove_device(struct hid_device *hdev)
 {
 	if (hdev->status & HID_STAT_ADDED) {
 		device_del(&hdev->dev);
+		hid_debug_unregister(hdev);
 		hdev->status &= ~HID_STAT_ADDED;
 	}
 }
@@ -1843,6 +1843,10 @@ static int __init hid_init(void)
 {
 	int ret;
 
+	if (hid_debug)
+		printk(KERN_WARNING "HID: hid_debug parameter has been deprecated. "
+				"Debugging data are now provided via debugfs\n");
+
 	ret = bus_register(&hid_bus_type);
 	if (ret) {
 		printk(KERN_ERR "HID: can't register hid bus\n");
@@ -1853,6 +1857,8 @@ static int __init hid_init(void)
 	if (ret)
 		goto err_bus;
 
+	hid_debug_init();
+
 	return 0;
 err_bus:
 	bus_unregister(&hid_bus_type);
@@ -1862,6 +1868,7 @@ err:
 
 static void __exit hid_exit(void)
 {
+	hid_debug_exit();
 	hidraw_exit();
 	bus_unregister(&hid_bus_type);
 }
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 47ac1a7d66e1..067e173aa3e4 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1,9 +1,9 @@
 /*
  *  (c) 1999 Andreas Gal		<gal@cs.uni-magdeburg.de>
  *  (c) 2000-2001 Vojtech Pavlik	<vojtech@ucw.cz>
- *  (c) 2007 Jiri Kosina
+ *  (c) 2007-2009 Jiri Kosina
  *
- *  Some debug stuff for the HID parser.
+ *  HID debugging support
  */
 
 /*
@@ -26,9 +26,13 @@
  * Vojtech Pavlik, Simunkova 1594, Prague 8, 182 00 Czech Republic
  */
 
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 #include <linux/hid.h>
 #include <linux/hid-debug.h>
 
+static struct dentry *hid_debug_root;
+
 struct hid_usage_entry {
 	unsigned  page;
 	unsigned  usage;
@@ -331,72 +335,83 @@ static const struct hid_usage_entry hid_usage_table[] = {
   { 0, 0, NULL }
 };
 
-static void resolv_usage_page(unsigned page) {
+static void resolv_usage_page(unsigned page, struct seq_file *f) {
 	const struct hid_usage_entry *p;
 
 	for (p = hid_usage_table; p->description; p++)
 		if (p->page == page) {
-			printk("%s", p->description);
+			if (!f)
+				printk("%s", p->description);
+			else
+				seq_printf(f, "%s", p->description);
 			return;
 		}
-	printk("%04x", page);
+	if (!f)
+		printk("%04x", page);
+	else
+		seq_printf(f, "%04x", page);
 }
 
-void hid_resolv_usage(unsigned usage) {
+void hid_resolv_usage(unsigned usage, struct seq_file *f) {
 	const struct hid_usage_entry *p;
 
-	if (!hid_debug)
-		return;
-
-	resolv_usage_page(usage >> 16);
-	printk(".");
+	resolv_usage_page(usage >> 16, f);
+	if (!f)
+		printk(".");
+	else
+		seq_printf(f, ".");
 	for (p = hid_usage_table; p->description; p++)
 		if (p->page == (usage >> 16)) {
 			for(++p; p->description && p->usage != 0; p++)
 				if (p->usage == (usage & 0xffff)) {
-					printk("%s", p->description);
+					if (!f)
+						printk("%s", p->description);
+					else
+						seq_printf(f,
+							"%s",
+							p->description);
 					return;
 				}
 			break;
 		}
-	printk("%04x", usage & 0xffff);
+	if (!f)
+		printk("%04x", usage & 0xffff);
+	else
+		seq_printf(f, "%04x", usage & 0xffff);
 }
 EXPORT_SYMBOL_GPL(hid_resolv_usage);
 
-static void tab(int n) {
-	printk(KERN_DEBUG "%*s", n, "");
+static void tab(int n, struct seq_file *f) {
+	seq_printf(f, "%*s", n, "");
 }
 
-void hid_dump_field(struct hid_field *field, int n) {
+void hid_dump_field(struct hid_field *field, int n, struct seq_file *f) {
 	int j;
 
-	if (!hid_debug)
-		return;
-
 	if (field->physical) {
-		tab(n);
-		printk("Physical(");
-		hid_resolv_usage(field->physical); printk(")\n");
+		tab(n, f);
+		seq_printf(f, "Physical(");
+		hid_resolv_usage(field->physical, f); seq_printf(f, ")\n");
 	}
 	if (field->logical) {
-		tab(n);
-		printk("Logical(");
-		hid_resolv_usage(field->logical); printk(")\n");
+		tab(n, f);
+		seq_printf(f, "Logical(");
+		hid_resolv_usage(field->logical, f); seq_printf(f, ")\n");
 	}
-	tab(n); printk("Usage(%d)\n", field->maxusage);
+	tab(n, f); seq_printf(f, "Usage(%d)\n", field->maxusage);
 	for (j = 0; j < field->maxusage; j++) {
-		tab(n+2); hid_resolv_usage(field->usage[j].hid); printk("\n");
+		tab(n+2, f); hid_resolv_usage(field->usage[j].hid, f); seq_printf(f, "\n");
 	}
 	if (field->logical_minimum != field->logical_maximum) {
-		tab(n); printk("Logical Minimum(%d)\n", field->logical_minimum);
-		tab(n); printk("Logical Maximum(%d)\n", field->logical_maximum);
+		tab(n, f); seq_printf(f, "Logical Minimum(%d)\n", field->logical_minimum);
+		tab(n, f); seq_printf(f, "Logical Maximum(%d)\n", field->logical_maximum);
 	}
 	if (field->physical_minimum != field->physical_maximum) {
-		tab(n); printk("Physical Minimum(%d)\n", field->physical_minimum);
-		tab(n); printk("Physical Maximum(%d)\n", field->physical_maximum);
+		tab(n, f); seq_printf(f, "Physical Minimum(%d)\n", field->physical_minimum);
+		tab(n, f); seq_printf(f, "Physical Maximum(%d)\n", field->physical_maximum);
 	}
 	if (field->unit_exponent) {
-		tab(n); printk("Unit Exponent(%d)\n", field->unit_exponent);
+		tab(n, f); seq_printf(f, "Unit Exponent(%d)\n", field->unit_exponent);
 	}
 	if (field->unit) {
 		static const char *systems[5] = { "None", "SI Linear", "SI Rotation", "English Linear", "English Rotation" };
@@ -417,77 +432,75 @@ void hid_dump_field(struct hid_field *field, int n) {
 		data >>= 4;
 
 		if(sys > 4) {
-			tab(n); printk("Unit(Invalid)\n");
+			tab(n, f); seq_printf(f, "Unit(Invalid)\n");
 		}
 		else {
 			int earlier_unit = 0;
 
-			tab(n); printk("Unit(%s : ", systems[sys]);
+			tab(n, f); seq_printf(f, "Unit(%s : ", systems[sys]);
 
 			for (i=1 ; i<sizeof(__u32)*2 ; i++) {
 				char nibble = data & 0xf;
 				data >>= 4;
 				if (nibble != 0) {
 					if(earlier_unit++ > 0)
-						printk("*");
-					printk("%s", units[sys][i]);
+						seq_printf(f, "*");
+					seq_printf(f, "%s", units[sys][i]);
 					if(nibble != 1) {
 						/* This is a _signed_ nibble(!) */
 
 						int val = nibble & 0x7;
 						if(nibble & 0x08)
 							val = -((0x7 & ~val) +1);
-						printk("^%d", val);
+						seq_printf(f, "^%d", val);
 					}
 				}
 			}
-			printk(")\n");
+			seq_printf(f, ")\n");
 		}
 	}
-	tab(n); printk("Report Size(%u)\n", field->report_size);
-	tab(n); printk("Report Count(%u)\n", field->report_count);
-	tab(n); printk("Report Offset(%u)\n", field->report_offset);
+	tab(n, f); seq_printf(f, "Report Size(%u)\n", field->report_size);
+	tab(n, f); seq_printf(f, "Report Count(%u)\n", field->report_count);
+	tab(n, f); seq_printf(f, "Report Offset(%u)\n", field->report_offset);
 
-	tab(n); printk("Flags( ");
+	tab(n, f); seq_printf(f, "Flags( ");
 	j = field->flags;
-	printk("%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : "");
-	printk("%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array ");
-	printk("%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute ");
-	printk("%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : "");
-	printk("%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : "");
-	printk("%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : "");
-	printk("%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : "");
-	printk("%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : "");
-	printk("%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : "");
-	printk(")\n");
+	seq_printf(f, "%s", HID_MAIN_ITEM_CONSTANT & j ? "Constant " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_VARIABLE & j ? "Variable " : "Array ");
+	seq_printf(f, "%s", HID_MAIN_ITEM_RELATIVE & j ? "Relative " : "Absolute ");
+	seq_printf(f, "%s", HID_MAIN_ITEM_WRAP & j ? "Wrap " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_NONLINEAR & j ? "NonLinear " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_NO_PREFERRED & j ? "NoPreferredState " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_NULL_STATE & j ? "NullState " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_VOLATILE & j ? "Volatile " : "");
+	seq_printf(f, "%s", HID_MAIN_ITEM_BUFFERED_BYTE & j ? "BufferedByte " : "");
+	seq_printf(f, ")\n");
 }
 EXPORT_SYMBOL_GPL(hid_dump_field);
 
-void hid_dump_device(struct hid_device *device) {
+void hid_dump_device(struct hid_device *device, struct seq_file *f)
+{
 	struct hid_report_enum *report_enum;
 	struct hid_report *report;
 	struct list_head *list;
 	unsigned i,k;
 	static const char *table[] = {"INPUT", "OUTPUT", "FEATURE"};
 
-	if (!hid_debug)
-		return;
-
 	for (i = 0; i < HID_REPORT_TYPES; i++) {
 		report_enum = device->report_enum + i;
 		list = report_enum->report_list.next;
 		while (list != &report_enum->report_list) {
 			report = (struct hid_report *) list;
-			tab(2);
-			printk("%s", table[i]);
+			tab(2, f);
+			seq_printf(f, "%s", table[i]);
 			if (report->id)
-				printk("(%d)", report->id);
-			printk("[%s]", table[report->type]);
-			printk("\n");
+				seq_printf(f, "(%d)", report->id);
+			seq_printf(f, "[%s]", table[report->type]);
+			seq_printf(f, "\n");
 			for (k = 0; k < report->maxfield; k++) {
-				tab(4);
-				printk("Field(%d)\n", k);
-				hid_dump_field(report->field[k], 6);
+				tab(4, f);
+				seq_printf(f, "Field(%d)\n", k);
+				hid_dump_field(report->field[k], 6, f);
 			}
 			list = list->next;
 		}
@@ -500,7 +513,7 @@ void hid_dump_input(struct hid_usage *usage, __s32 value) {
 		return;
 
 	printk(KERN_DEBUG "hid-debug: input ");
-	hid_resolv_usage(usage->hid);
+	hid_resolv_usage(usage->hid, NULL);
 	printk(" = %d\n", value);
 }
 EXPORT_SYMBOL_GPL(hid_dump_input);
@@ -767,12 +780,84 @@ static const char **names[EV_MAX + 1] = {
 	[EV_SND] = sounds,			[EV_REP] = repeats,
 };
 
-void hid_resolv_event(__u8 type, __u16 code) {
-
-	if (!hid_debug)
-		return;
+void hid_resolv_event(__u8 type, __u16 code, struct seq_file *f) {
 
-	printk("%s.%s", events[type] ? events[type] : "?",
+	seq_printf(f, "%s.%s", events[type] ? events[type] : "?",
 		names[type] ? (names[type][code] ? names[type][code] : "?") : "?");
 }
-EXPORT_SYMBOL_GPL(hid_resolv_event);
+
+void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f)
+{
+	int i, j, k;
+	struct hid_report *report;
+	struct hid_usage *usage;
+
+	for (k = HID_INPUT_REPORT; k <= HID_OUTPUT_REPORT; k++) {
+		list_for_each_entry(report, &hid->report_enum[k].report_list, list) {
+			for (i = 0; i < report->maxfield; i++) {
+				for ( j = 0; j < report->field[i]->maxusage; j++) {
+					usage = report->field[i]->usage + j;
+					hid_resolv_usage(usage->hid, f);
+					seq_printf(f, " ---> ");
+					hid_resolv_event(usage->type, usage->code, f);
+					seq_printf(f, "\n");
+				}
+			}
+		}
+	}
+
+}
+
+static int hid_debug_rdesc_show(struct seq_file *f, void *p)
+{
+	struct hid_device *hdev = f->private;
+	int i;
+
+	/* dump HID report descriptor */
+	for (i = 0; i < hdev->rsize; i++)
+		seq_printf(f, "%02x ", hdev->rdesc[i]);
+	seq_printf(f, "\n\n");
+
+	/* dump parsed data and input mappings */
+	hid_dump_device(hdev, f);
+	seq_printf(f, "\n");
+	hid_dump_input_mapping(hdev, f);
+
+	return 0;
+}
+
+static int hid_debug_rdesc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hid_debug_rdesc_show, inode->i_private);
+}
+
+static const struct file_operations hid_debug_rdesc_fops = {
+	.open           = hid_debug_rdesc_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+void hid_debug_register(struct hid_device *hdev, const char *name)
+{
+	hdev->debug_dir = debugfs_create_dir(name, hid_debug_root);
+	hdev->debug_rdesc = debugfs_create_file("rdesc", 0400,
+			hdev->debug_dir, hdev, &hid_debug_rdesc_fops);
+}
+
+void hid_debug_unregister(struct hid_device *hdev)
+{
+	debugfs_remove(hdev->debug_rdesc);
+	debugfs_remove(hdev->debug_dir);
+}
+
+void hid_debug_init(void)
+{
+	hid_debug_root = debugfs_create_dir("hid", NULL);
+}
+
+void hid_debug_exit(void)
+{
+	debugfs_remove_recursive(hid_debug_root);
+}
+
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 7f183b7147e1..5862b0f3b55d 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -159,17 +159,12 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 
 	field->hidinput = hidinput;
 
-	dbg_hid("Mapping: ");
-	hid_resolv_usage(usage->hid);
-	dbg_hid_line(" ---> ");
-
 	if (field->flags & HID_MAIN_ITEM_CONSTANT)
 		goto ignore;
 
 	/* only LED usages are supported in output fields */
 	if (field->report_type == HID_OUTPUT_REPORT &&
 			(usage->hid & HID_USAGE_PAGE) != HID_UP_LED) {
-		dbg_hid_line(" [non-LED output field] ");
 		goto ignore;
 	}
 
@@ -561,15 +556,9 @@ mapped:
 		set_bit(MSC_SCAN, input->mscbit);
 	}
 
-	hid_resolv_event(usage->type, usage->code);
-
-	dbg_hid_line("\n");
-
-	return;
-
 ignore:
-	dbg_hid_line("IGNORED\n");
 	return;
+
 }
 
 void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value)
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index ac8049b5f1e9..708aa52d0753 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -4,8 +4,8 @@
  *  Copyright (c) 1999 Andreas Gal
  *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
  *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
- *  Copyright (c) 2006-2008 Jiri Kosina
  *  Copyright (c) 2007-2008 Oliver Neukum
+ *  Copyright (c) 2006-2009 Jiri Kosina
  */
 
 /*
@@ -886,11 +886,6 @@ static int usbhid_parse(struct hid_device *hid)
 		goto err;
 	}
 
-	dbg_hid("report descriptor (size %u, read %d) = ", rsize, n);
-	for (n = 0; n < rsize; n++)
-		dbg_hid_line(" %02x", (unsigned char) rdesc[n]);
-	dbg_hid_line("\n");
-
 	ret = hid_parse_report(hid, rdesc, rsize);
 	kfree(rdesc);
 	if (ret) {
@@ -1005,7 +1000,6 @@ static int usbhid_start(struct hid_device *hid)
 	usbhid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP);
 
 	usbhid_init_reports(hid);
-	hid_dump_device(hid);
 
 	set_bit(HID_STARTED, &usbhid->iofl);
 
diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h
index 50d568ec178a..516e12c33235 100644
--- a/include/linux/hid-debug.h
+++ b/include/linux/hid-debug.h
@@ -2,7 +2,7 @@
 #define __HID_DEBUG_H
 
 /*
- *  Copyright (c) 2007	Jiri Kosina
+ *  Copyright (c) 2007-2009	Jiri Kosina
  */
 
 /*
@@ -22,24 +22,30 @@
  *
  */
 
-#ifdef CONFIG_HID_DEBUG
+#ifdef CONFIG_DEBUG_FS
 
 void hid_dump_input(struct hid_usage *, __s32);
-void hid_dump_device(struct hid_device *);
-void hid_dump_field(struct hid_field *, int);
-void hid_resolv_usage(unsigned);
-void hid_resolv_event(__u8, __u16);
+void hid_dump_device(struct hid_device *, struct seq_file *);
+void hid_dump_field(struct hid_field *, int, struct seq_file *);
+void hid_resolv_usage(unsigned, struct seq_file *);
+void hid_debug_register(struct hid_device *, const char *);
+void hid_debug_unregister(struct hid_device *);
+void hid_debug_init(void);
+void hid_debug_exit(void);
 
 #else
 
-#define hid_dump_input(a,b)     do { } while (0)
-#define hid_dump_device(c)      do { } while (0)
-#define hid_dump_field(a,b)     do { } while (0)
-#define hid_resolv_usage(a)         do { } while (0)
-#define hid_resolv_event(a,b)       do { } while (0)
-
-#endif /* CONFIG_HID_DEBUG */
+#define hid_dump_input(a,b)		do { } while (0)
+#define hid_dump_device(c)		do { } while (0)
+#define hid_dump_field(a,b)		do { } while (0)
+#define hid_resolv_usage(a)		do { } while (0)
+#define hid_resolv_event(a,b)		do { } while (0)
+#define hid_debug_register(a, b)	do { } while (0)
+#define hid_debug_unregister(a)		do { } while (0)
+#define hid_debug_init()		do { } while (0)
+#define hid_debug_exit()		do { } while (0)
 
+#endif
 
 #endif
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a72876e43589..da09ab140ef1 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -451,6 +451,10 @@ struct hid_device {							/* device report descriptor */
 	char phys[64];							/* Device physical location */
 	char uniq[64];							/* Device unique identifier (serial #) */
 
+	/* debugfs */
+	struct dentry *debug_dir;
+	struct dentry *debug_rdesc;
+
 	void *driver_data;
 
 	/* temporary hid_ff handling (until moved to the drivers) */
-- 
cgit v1.2.3


From cd667ce24796700e1a0e6e7528efc61c96ff832e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 12 Jun 2009 15:20:57 +0200
Subject: HID: use debugfs for events/reports dumping

This is a followup patch to the one implemeting rdesc representation in debugfs
rather than being dependent on compile-time CONFIG_HID_DEBUG setting.

The API of the appropriate formatting functions is slightly modified -- if
they are passed seq_file pointer, the one-shot output for 'rdesc' file mode
is used, and therefore the message is formatted into the corresponding seq_file
immediately.

Otherwise the called function allocated a new buffer, formats the text into the
buffer and returns the pointer to it, so that it can be queued into the ring-buffer
of the processess blocked waiting on input on 'events' file in debugfs.

'debug' parameter to the 'hid' module is now used solely for the prupose of inetrnal
driver state debugging (parser, transport, etc).

Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c    |  42 ++++++--
 drivers/hid/hid-debug.c   | 239 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/hid-debug.h |  18 +++-
 include/linux/hid.h       |  26 ++---
 4 files changed, 276 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index d4317db85b54..449bd747d116 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -46,7 +46,7 @@
 
 int hid_debug = 0;
 module_param_named(debug, hid_debug, int, 0600);
-MODULE_PARM_DESC(debug, "HID debugging (0=off, 1=probing info, 2=continuous data dumping)");
+MODULE_PARM_DESC(debug, "toggle HID debugging messages");
 EXPORT_SYMBOL_GPL(hid_debug);
 
 /*
@@ -859,7 +859,7 @@ static void hid_process_event(struct hid_device *hid, struct hid_field *field,
 	struct hid_driver *hdrv = hid->driver;
 	int ret;
 
-	hid_dump_input(usage, value);
+	hid_dump_input(hid, usage, value);
 
 	if (hdrv && hdrv->event && hid_match_usage(hid, usage)) {
 		ret = hdrv->event(hid, field, usage, value);
@@ -981,7 +981,7 @@ int hid_set_field(struct hid_field *field, unsigned offset, __s32 value)
 {
 	unsigned size = field->report_size;
 
-	hid_dump_input(field->usage + offset, value);
+	hid_dump_input(field->report->device, field->usage + offset, value);
 
 	if (offset >= field->report_count) {
 		dbg_hid("offset (%d) exceeds report_count (%d)\n", offset, field->report_count);
@@ -1075,6 +1075,7 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
 	struct hid_report_enum *report_enum = hid->report_enum + type;
 	struct hid_driver *hdrv = hid->driver;
 	struct hid_report *report;
+	char *buf;
 	unsigned int i;
 	int ret;
 
@@ -1086,18 +1087,36 @@ int hid_input_report(struct hid_device *hid, int type, u8 *data, int size, int i
 		return -1;
 	}
 
-	dbg_hid("report (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un");
+	buf = kmalloc(sizeof(char) * HID_DEBUG_BUFSIZE,
+			interrupt ? GFP_ATOMIC : GFP_KERNEL);
+
+	if (!buf) {
+		report = hid_get_report(report_enum, data);
+		goto nomem;
+	}
+
+	snprintf(buf, HID_DEBUG_BUFSIZE - 1,
+			"\nreport (size %u) (%snumbered)\n", size, report_enum->numbered ? "" : "un");
+	hid_debug_event(hid, buf);
 
 	report = hid_get_report(report_enum, data);
 	if (!report)
 		return -1;
 
 	/* dump the report */
-	dbg_hid("report %d (size %u) = ", report->id, size);
-	for (i = 0; i < size; i++)
-		dbg_hid_line(" %02x", data[i]);
-	dbg_hid_line("\n");
+	snprintf(buf, HID_DEBUG_BUFSIZE - 1,
+			"report %d (size %u) = ", report->id, size);
+	hid_debug_event(hid, buf);
+	for (i = 0; i < size; i++) {
+		snprintf(buf, HID_DEBUG_BUFSIZE - 1,
+				" %02x", data[i]);
+		hid_debug_event(hid, buf);
+	}
+	hid_debug_event(hid, "\n");
+
+	kfree(buf);
 
+nomem:
 	if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) {
 		ret = hdrv->raw_event(hid, report, data, size);
 		if (ret != 0)
@@ -1756,6 +1775,9 @@ struct hid_device *hid_allocate_device(void)
 	for (i = 0; i < HID_REPORT_TYPES; i++)
 		INIT_LIST_HEAD(&hdev->report_enum[i].report_list);
 
+	init_waitqueue_head(&hdev->debug_wait);
+	INIT_LIST_HEAD(&hdev->debug_list);
+
 	return hdev;
 err:
 	put_device(&hdev->dev);
@@ -1844,8 +1866,8 @@ static int __init hid_init(void)
 	int ret;
 
 	if (hid_debug)
-		printk(KERN_WARNING "HID: hid_debug parameter has been deprecated. "
-				"Debugging data are now provided via debugfs\n");
+		printk(KERN_WARNING "HID: hid_debug is now used solely for parser and driver debugging.\n"
+				"HID: debugfs is now used for inspecting the device (report descriptor, reports)\n");
 
 	ret = bus_register(&hid_bus_type);
 	if (ret) {
diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 067e173aa3e4..a331a1821e85 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -28,6 +28,10 @@
 
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/poll.h>
+
 #include <linux/hid.h>
 #include <linux/hid-debug.h>
 
@@ -335,49 +339,86 @@ static const struct hid_usage_entry hid_usage_table[] = {
   { 0, 0, NULL }
 };
 
-static void resolv_usage_page(unsigned page, struct seq_file *f) {
+/* Either output directly into simple seq_file, or (if f == NULL)
+ * allocate a separate buffer that will then be passed to the 'events'
+ * ringbuffer.
+ *
+ * This is because these functions can be called both for "one-shot"
+ * "rdesc" while resolving, or for blocking "events".
+ *
+ * This holds both for resolv_usage_page() and hid_resolv_usage().
+ */
+static char *resolv_usage_page(unsigned page, struct seq_file *f) {
 	const struct hid_usage_entry *p;
+	char *buf = NULL;
+
+	if (!f) {
+		buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_ATOMIC);
+		if (!buf)
+			return ERR_PTR(-ENOMEM);
+	}
 
 	for (p = hid_usage_table; p->description; p++)
 		if (p->page == page) {
-			if (!f)
-				printk("%s", p->description);
-			else
+			if (!f) {
+				snprintf(buf, HID_DEBUG_BUFSIZE, "%s",
+						p->description);
+				return buf;
+			}
+			else {
 				seq_printf(f, "%s", p->description);
-			return;
+				return NULL;
+			}
 		}
 	if (!f)
-		printk("%04x", page);
+		snprintf(buf, HID_DEBUG_BUFSIZE, "%04x", page);
 	else
 		seq_printf(f, "%04x", page);
+	return buf;
 }
 
-void hid_resolv_usage(unsigned usage, struct seq_file *f) {
+char *hid_resolv_usage(unsigned usage, struct seq_file *f) {
 	const struct hid_usage_entry *p;
+	char *buf = NULL;
+	int len = 0;
 
-	resolv_usage_page(usage >> 16, f);
-	if (!f)
-		printk(".");
-	else
+	buf = resolv_usage_page(usage >> 16, f);
+	if (IS_ERR(buf)) {
+		printk(KERN_ERR "error allocating HID debug buffer\n");
+		return NULL;
+	}
+
+
+	if (!f) {
+		len = strlen(buf);
+		snprintf(buf+len, max(0, HID_DEBUG_BUFSIZE - len), ".");
+		len++;
+	}
+	else {
 		seq_printf(f, ".");
+	}
 	for (p = hid_usage_table; p->description; p++)
 		if (p->page == (usage >> 16)) {
 			for(++p; p->description && p->usage != 0; p++)
 				if (p->usage == (usage & 0xffff)) {
 					if (!f)
-						printk("%s", p->description);
+						snprintf(buf + len,
+							max(0,HID_DEBUG_BUFSIZE - len - 1),
+							"%s", p->description);
 					else
 						seq_printf(f,
 							"%s",
 							p->description);
-					return;
+					return buf;
 				}
 			break;
 		}
 	if (!f)
-		printk("%04x", usage & 0xffff);
+		snprintf(buf + len, max(0, HID_DEBUG_BUFSIZE - len - 1),
+				"%04x", usage & 0xffff);
 	else
 		seq_printf(f, "%04x", usage & 0xffff);
+	return buf;
 }
 EXPORT_SYMBOL_GPL(hid_resolv_usage);
 
@@ -508,13 +549,37 @@ void hid_dump_device(struct hid_device *device, struct seq_file *f)
 }
 EXPORT_SYMBOL_GPL(hid_dump_device);
 
-void hid_dump_input(struct hid_usage *usage, __s32 value) {
-	if (hid_debug < 2)
+/* enqueue string to 'events' ring buffer */
+void hid_debug_event(struct hid_device *hdev, char *buf)
+{
+	int i;
+	struct hid_debug_list *list;
+
+	list_for_each_entry(list, &hdev->debug_list, node) {
+		for (i = 0; i <= strlen(buf); i++)
+			list->hid_debug_buf[(list->tail + i) % (HID_DEBUG_BUFSIZE - 1)] =
+				buf[i];
+		list->tail = (list->tail + i) % (HID_DEBUG_BUFSIZE - 1);
+        }
+}
+EXPORT_SYMBOL_GPL(hid_debug_event);
+
+void hid_dump_input(struct hid_device *hdev, struct hid_usage *usage, __s32 value)
+{
+	char *buf;
+	int len;
+
+	buf = hid_resolv_usage(usage->hid, NULL);
+	if (!buf)
 		return;
+	len = strlen(buf);
+	snprintf(buf + len, HID_DEBUG_BUFSIZE - len - 1, " = %d\n", value);
+
+	hid_debug_event(hdev, buf);
+
+	kfree(buf);
+        wake_up_interruptible(&hdev->debug_wait);
 
-	printk(KERN_DEBUG "hid-debug: input ");
-	hid_resolv_usage(usage->hid, NULL);
-	printk(" = %d\n", value);
 }
 EXPORT_SYMBOL_GPL(hid_dump_input);
 
@@ -808,6 +873,7 @@ void hid_dump_input_mapping(struct hid_device *hid, struct seq_file *f)
 
 }
 
+
 static int hid_debug_rdesc_show(struct seq_file *f, void *p)
 {
 	struct hid_device *hdev = f->private;
@@ -831,6 +897,126 @@ static int hid_debug_rdesc_open(struct inode *inode, struct file *file)
 	return single_open(file, hid_debug_rdesc_show, inode->i_private);
 }
 
+static int hid_debug_events_open(struct inode *inode, struct file *file)
+{
+	int err = 0;
+	struct hid_debug_list *list;
+
+	if (!(list = kzalloc(sizeof(struct hid_debug_list), GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (!(list->hid_debug_buf = kzalloc(sizeof(char) * HID_DEBUG_BUFSIZE, GFP_KERNEL))) {
+		err = -ENOMEM;
+		goto out;
+	}
+	list->hdev = (struct hid_device *) inode->i_private;
+	file->private_data = list;
+	mutex_init(&list->read_mutex);
+
+	list_add_tail(&list->node, &list->hdev->debug_list);
+
+out:
+	return err;
+}
+
+static ssize_t hid_debug_events_read(struct file *file, char __user *buffer,
+		size_t count, loff_t *ppos)
+{
+	struct hid_debug_list *list = file->private_data;
+	int ret = 0, len;
+	DECLARE_WAITQUEUE(wait, current);
+
+	while (ret == 0) {
+		mutex_lock(&list->read_mutex);
+		if (list->head == list->tail) {
+			add_wait_queue(&list->hdev->debug_wait, &wait);
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			while (list->head == list->tail) {
+				if (file->f_flags & O_NONBLOCK) {
+					ret = -EAGAIN;
+					break;
+				}
+				if (signal_pending(current)) {
+					ret = -ERESTARTSYS;
+					break;
+				}
+
+				if (!list->hdev || !list->hdev->debug) {
+					ret = -EIO;
+					break;
+				}
+
+				/* allow O_NONBLOCK from other threads */
+				mutex_unlock(&list->read_mutex);
+				schedule();
+				mutex_lock(&list->read_mutex);
+				set_current_state(TASK_INTERRUPTIBLE);
+			}
+
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&list->hdev->debug_wait, &wait);
+		}
+
+		if (ret)
+			goto out;
+
+		/* pass the ringbuffer contents to userspace */
+copy_rest:
+		if (list->tail == list->head)
+			goto out;
+		if (list->tail > list->head) {
+			len = list->tail - list->head;
+
+			if (copy_to_user(buffer + ret, &list->hid_debug_buf[list->head], len)) {
+				ret = -EFAULT;
+				goto out;
+			}
+			ret += len;
+			list->head += len;
+		} else {
+			len = HID_DEBUG_BUFSIZE - list->head;
+
+			if (copy_to_user(buffer, &list->hid_debug_buf[list->head], len)) {
+				ret = -EFAULT;
+				goto out;
+			}
+			list->head = 0;
+			ret += len;
+			goto copy_rest;
+		}
+
+	}
+out:
+	mutex_unlock(&list->read_mutex);
+	return ret;
+}
+
+static unsigned int hid_debug_events_poll(struct file *file, poll_table *wait)
+{
+	struct hid_debug_list *list = file->private_data;
+
+	poll_wait(file, &list->hdev->debug_wait, wait);
+	if (list->head != list->tail)
+		return POLLIN | POLLRDNORM;
+	if (!list->hdev->debug)
+		return POLLERR | POLLHUP;
+	return 0;
+}
+
+static int hid_debug_events_release(struct inode *inode, struct file *file)
+{
+	struct hid_debug_list *list = file->private_data;
+
+	list_del(&list->node);
+	kfree(list->hid_debug_buf);
+	kfree(list);
+
+	return 0;
+}
+
 static const struct file_operations hid_debug_rdesc_fops = {
 	.open           = hid_debug_rdesc_open,
 	.read           = seq_read,
@@ -838,16 +1024,31 @@ static const struct file_operations hid_debug_rdesc_fops = {
 	.release        = single_release,
 };
 
+static const struct file_operations hid_debug_events_fops = {
+	.owner =        THIS_MODULE,
+	.open           = hid_debug_events_open,
+	.read           = hid_debug_events_read,
+	.poll		= hid_debug_events_poll,
+	.release        = hid_debug_events_release,
+};
+
+
 void hid_debug_register(struct hid_device *hdev, const char *name)
 {
 	hdev->debug_dir = debugfs_create_dir(name, hid_debug_root);
 	hdev->debug_rdesc = debugfs_create_file("rdesc", 0400,
 			hdev->debug_dir, hdev, &hid_debug_rdesc_fops);
+	hdev->debug_events = debugfs_create_file("events", 0400,
+			hdev->debug_dir, hdev, &hid_debug_events_fops);
+	hdev->debug = 1;
 }
 
 void hid_debug_unregister(struct hid_device *hdev)
 {
+	hdev->debug = 0;
+	wake_up_interruptible(&hdev->debug_wait);
 	debugfs_remove(hdev->debug_rdesc);
+	debugfs_remove(hdev->debug_events);
 	debugfs_remove(hdev->debug_dir);
 }
 
diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h
index 516e12c33235..ec08ac1ad687 100644
--- a/include/linux/hid-debug.h
+++ b/include/linux/hid-debug.h
@@ -24,14 +24,27 @@
 
 #ifdef CONFIG_DEBUG_FS
 
-void hid_dump_input(struct hid_usage *, __s32);
+void hid_dump_input(struct hid_device *, struct hid_usage *, __s32);
 void hid_dump_device(struct hid_device *, struct seq_file *);
 void hid_dump_field(struct hid_field *, int, struct seq_file *);
-void hid_resolv_usage(unsigned, struct seq_file *);
+char *hid_resolv_usage(unsigned, struct seq_file *);
 void hid_debug_register(struct hid_device *, const char *);
 void hid_debug_unregister(struct hid_device *);
 void hid_debug_init(void);
 void hid_debug_exit(void);
+void hid_debug_event(struct hid_device *, char *);
+
+#define HID_DEBUG_BUFSIZE 512
+
+struct hid_debug_list {
+	char *hid_debug_buf;
+	int head;
+	int tail;
+	struct fasync_struct *fasync;
+	struct hid_device *hdev;
+	struct list_head node;
+	struct mutex read_mutex;
+};
 
 #else
 
@@ -44,6 +57,7 @@ void hid_debug_exit(void);
 #define hid_debug_unregister(a)		do { } while (0)
 #define hid_debug_init()		do { } while (0)
 #define hid_debug_exit()		do { } while (0)
+#define hid_debug_event(a,b)		do { } while (0)
 
 #endif
 
diff --git a/include/linux/hid.h b/include/linux/hid.h
index da09ab140ef1..60fa52913f89 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -451,10 +451,6 @@ struct hid_device {							/* device report descriptor */
 	char phys[64];							/* Device physical location */
 	char uniq[64];							/* Device unique identifier (serial #) */
 
-	/* debugfs */
-	struct dentry *debug_dir;
-	struct dentry *debug_rdesc;
-
 	void *driver_data;
 
 	/* temporary hid_ff handling (until moved to the drivers) */
@@ -468,6 +464,14 @@ struct hid_device {							/* device report descriptor */
 
 	/* handler for raw output data, used by hidraw */
 	int (*hid_output_raw_report) (struct hid_device *, __u8 *, size_t);
+
+	/* debugging support via debugfs */
+	unsigned short debug;
+	struct dentry *debug_dir;
+	struct dentry *debug_rdesc;
+	struct dentry *debug_events;
+	struct list_head debug_list;
+	wait_queue_head_t debug_wait;
 };
 
 static inline void *hid_get_drvdata(struct hid_device *hdev)
@@ -625,9 +629,7 @@ struct hid_ll_driver {
 
 /* HID core API */
 
-#ifdef CONFIG_HID_DEBUG
 extern int hid_debug;
-#endif
 
 extern int hid_add_device(struct hid_device *);
 extern void hid_destroy_device(struct hid_device *);
@@ -783,21 +785,9 @@ int hid_pidff_init(struct hid_device *hid);
 #define hid_pidff_init NULL
 #endif
 
-#ifdef CONFIG_HID_DEBUG
 #define dbg_hid(format, arg...) if (hid_debug) \
 				printk(KERN_DEBUG "%s: " format ,\
 				__FILE__ , ## arg)
-#define dbg_hid_line(format, arg...) if (hid_debug) \
-				printk(format, ## arg)
-#else
-static inline int __attribute__((format(printf, 1, 2)))
-dbg_hid(const char *fmt, ...)
-{
-	return 0;
-}
-#define dbg_hid_line dbg_hid
-#endif /* HID_DEBUG */
-
 #define err_hid(format, arg...) printk(KERN_ERR "%s: " format "\n" , \
 		__FILE__ , ## arg)
 #endif /* HID_FF */
-- 
cgit v1.2.3


From 43ce1d23b43330634507a049b55c36e91d27282e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 14 Jun 2009 17:58:45 -0400
Subject: ext4: Fix mmap/truncate race when blocksize < pagesize && !nodellaoc

This patch fixes the mmap/truncate race that was fixed for delayed
allocation by merging ext4_{journalled,normal,da}_writepage() into
ext4_writepage().

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c             | 234 +++++++++++---------------------------------
 include/trace/events/ext4.h |  45 +--------
 2 files changed, 58 insertions(+), 221 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1275f34589c7..97c48b5b0578 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -47,6 +47,10 @@
 
 #define MPAGE_DA_EXTENT_TAIL 0x01
 
+static int __ext4_journalled_writepage(struct page *page,
+				       struct writeback_control *wbc,
+				       unsigned int len);
+
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
@@ -2392,7 +2396,7 @@ static int __mpage_da_writepage(struct page *page,
 			 * We need to try to allocate
 			 * unmapped blocks in the same page.
 			 * Otherwise we won't make progress
-			 * with the page in ext4_da_writepage
+			 * with the page in ext4_writepage
 			 */
 			if (ext4_bh_delay_or_unwritten(NULL, bh)) {
 				mpage_add_bh_to_extent(mpd, logical,
@@ -2519,13 +2523,47 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
 }
 
 /*
+ * Note that we don't need to start a transaction unless we're journaling data
+ * because we should have holes filled from ext4_page_mkwrite(). We even don't
+ * need to file the inode to the transaction's list in ordered mode because if
+ * we are writing back data added by write(), the inode is already there and if
+ * we are writing back data modified via mmap(), noone guarantees in which
+ * transaction the data will hit the disk. In case we are journaling data, we
+ * cannot start transaction directly because transaction start ranks above page
+ * lock so we have to do some magic.
+ *
  * This function can get called via...
  *   - ext4_da_writepages after taking page lock (have journal handle)
  *   - journal_submit_inode_data_buffers (no journal handle)
  *   - shrink_page_list via pdflush (no journal handle)
  *   - grab_page_cache when doing write_begin (have journal handle)
+ *
+ * We don't do any block allocation in this function. If we have page with
+ * multiple blocks we need to write those buffer_heads that are mapped. This
+ * is important for mmaped based write. So if we do with blocksize 1K
+ * truncate(f, 1024);
+ * a = mmap(f, 0, 4096);
+ * a[0] = 'a';
+ * truncate(f, 4096);
+ * we have in the page first buffer_head mapped via page_mkwrite call back
+ * but other bufer_heads would be unmapped but dirty(dirty done via the
+ * do_wp_page). So writepage should write the first block. If we modify
+ * the mmap area beyond 1024 we will again get a page_fault and the
+ * page_mkwrite callback will do the block allocation and mark the
+ * buffer_heads mapped.
+ *
+ * We redirty the page if we have any buffer_heads that is either delay or
+ * unwritten in the page.
+ *
+ * We can get recursively called as show below.
+ *
+ *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext4_writepage()
+ *
+ * But since we don't do any block allocation we should not deadlock.
+ * Page also have the dirty flag cleared so we don't get recurive page_lock.
  */
-static int ext4_da_writepage(struct page *page,
+static int ext4_writepage(struct page *page,
 			     struct writeback_control *wbc)
 {
 	int ret = 0;
@@ -2534,7 +2572,7 @@ static int ext4_da_writepage(struct page *page,
 	struct buffer_head *page_bufs;
 	struct inode *inode = page->mapping->host;
 
-	trace_ext4_da_writepage(inode, page);
+	trace_ext4_writepage(inode, page);
 	size = i_size_read(inode);
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
@@ -2596,6 +2634,15 @@ static int ext4_da_writepage(struct page *page,
 		block_commit_write(page, 0, len);
 	}
 
+	if (PageChecked(page) && ext4_should_journal_data(inode)) {
+		/*
+		 * It's mmapped pagecache.  Add buffers and journal it.  There
+		 * doesn't seem much point in redirtying the page here.
+		 */
+		ClearPageChecked(page);
+		return __ext4_journalled_writepage(page, wbc, len);
+	}
+
 	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_writepage(page, noalloc_get_block_write, wbc);
 	else
@@ -3135,112 +3182,10 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 	return 0;
 }
 
-/*
- * Note that we don't need to start a transaction unless we're journaling data
- * because we should have holes filled from ext4_page_mkwrite(). We even don't
- * need to file the inode to the transaction's list in ordered mode because if
- * we are writing back data added by write(), the inode is already there and if
- * we are writing back data modified via mmap(), noone guarantees in which
- * transaction the data will hit the disk. In case we are journaling data, we
- * cannot start transaction directly because transaction start ranks above page
- * lock so we have to do some magic.
- *
- * In all journaling modes block_write_full_page() will start the I/O.
- *
- * Problem:
- *
- *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *		ext4_writepage()
- *
- * Similar for:
- *
- *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext4_get_block().  We will deadlock on various things like
- * lock_journal and i_data_sem
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- *	    non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- *   In journalled data mode, a data buffer may be metadata against the
- *   current transaction.  But the same file is part of a shared mapping
- *   and someone does a writepage() on it.
- *
- *   We will move the buffer onto the async_data list, but *after* it has
- *   been dirtied. So there's a small window where we have dirty data on
- *   BJ_Metadata.
- *
- *   Note that this only applies to the last partial page in the file.  The
- *   bit which block_write_full_page() uses prepare/commit for.  (That's
- *   broken code anyway: it's wrong for msync()).
- *
- *   It's a rare case: affects the final partial page, for journalled data
- *   where the file is subject to bith write() and writepage() in the same
- *   transction.  To fix it we'll need a custom block_write_full_page().
- *   We'll probably need that anyway for journalling writepage() output.
- *
- * We don't honour synchronous mounts for writepage().  That would be
- * disastrous.  Any write() or metadata operation will sync the fs for
- * us.
- *
- */
-static int __ext4_normal_writepage(struct page *page,
-				   struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-
-	if (test_opt(inode->i_sb, NOBH))
-		return nobh_writepage(page, noalloc_get_block_write, wbc);
-	else
-		return block_write_full_page(page, noalloc_get_block_write,
-					     wbc);
-}
-
-static int ext4_normal_writepage(struct page *page,
-				 struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	loff_t size = i_size_read(inode);
-	loff_t len;
-
-	trace_ext4_normal_writepage(inode, page);
-	J_ASSERT(PageLocked(page));
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-
-	if (page_has_buffers(page)) {
-		/* if page has buffers it should all be mapped
-		 * and allocated. If there are not buffers attached
-		 * to the page we know the page is dirty but it lost
-		 * buffers. That means that at some moment in time
-		 * after write_begin() / write_end() has been called
-		 * all buffers have been clean and thus they must have been
-		 * written at least once. So they are all mapped and we can
-		 * happily proceed with mapping them and writing the page.
-		 */
-		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_delay_or_unwritten));
-	}
-
-	if (!ext4_journal_current_handle())
-		return __ext4_normal_writepage(page, wbc);
-
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
 static int __ext4_journalled_writepage(struct page *page,
-				       struct writeback_control *wbc)
+				       struct writeback_control *wbc,
+				       unsigned int len)
 {
-	loff_t size;
-	unsigned int len;
 	struct address_space *mapping = page->mapping;
 	struct inode *inode = mapping->host;
 	struct buffer_head *page_bufs;
@@ -3248,16 +3193,8 @@ static int __ext4_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	size = i_size_read(inode);
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-	ret = block_prepare_write(page, 0, len, noalloc_get_block_write);
-	if (ret != 0)
-		goto out_unlock;
-
 	page_bufs = page_buffers(page);
+	BUG_ON(!page_bufs);
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
 	/* As soon as we unlock the page, it can go away, but we have
 	 * references to buffers so we are safe */
@@ -3282,67 +3219,10 @@ static int __ext4_journalled_writepage(struct page *page,
 
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one);
 	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
-	goto out;
-
-out_unlock:
-	unlock_page(page);
 out:
 	return ret;
 }
 
-static int ext4_journalled_writepage(struct page *page,
-				     struct writeback_control *wbc)
-{
-	struct inode *inode = page->mapping->host;
-	loff_t size = i_size_read(inode);
-	loff_t len;
-
-	trace_ext4_journalled_writepage(inode, page);
-	J_ASSERT(PageLocked(page));
-	if (page->index == size >> PAGE_CACHE_SHIFT)
-		len = size & ~PAGE_CACHE_MASK;
-	else
-		len = PAGE_CACHE_SIZE;
-
-	if (page_has_buffers(page)) {
-		/* if page has buffers it should all be mapped
-		 * and allocated. If there are not buffers attached
-		 * to the page we know the page is dirty but it lost
-		 * buffers. That means that at some moment in time
-		 * after write_begin() / write_end() has been called
-		 * all buffers have been clean and thus they must have been
-		 * written at least once. So they are all mapped and we can
-		 * happily proceed with mapping them and writing the page.
-		 */
-		BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
-					ext4_bh_delay_or_unwritten));
-	}
-
-	if (ext4_journal_current_handle())
-		goto no_write;
-
-	if (PageChecked(page)) {
-		/*
-		 * It's mmapped pagecache.  Add buffers and journal it.  There
-		 * doesn't seem much point in redirtying the page here.
-		 */
-		ClearPageChecked(page);
-		return __ext4_journalled_writepage(page, wbc);
-	} else {
-		/*
-		 * It may be a page full of checkpoint-mode buffers.  We don't
-		 * really know unless we go poke around in the buffer_heads.
-		 * But block_write_full_page will do the right thing.
-		 */
-		return block_write_full_page(page, noalloc_get_block_write,
-					     wbc);
-	}
-no_write:
-	redirty_page_for_writepage(wbc, page);
-	unlock_page(page);
-	return 0;
-}
-
 static int ext4_readpage(struct file *file, struct page *page)
 {
 	return mpage_readpage(page, ext4_get_block);
@@ -3489,7 +3369,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
 static const struct address_space_operations ext4_ordered_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_normal_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_ordered_write_end,
@@ -3504,7 +3384,7 @@ static const struct address_space_operations ext4_ordered_aops = {
 static const struct address_space_operations ext4_writeback_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_normal_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_writeback_write_end,
@@ -3519,7 +3399,7 @@ static const struct address_space_operations ext4_writeback_aops = {
 static const struct address_space_operations ext4_journalled_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_journalled_writepage,
+	.writepage		= ext4_writepage,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_write_begin,
 	.write_end		= ext4_journalled_write_end,
@@ -3533,7 +3413,7 @@ static const struct address_space_operations ext4_journalled_aops = {
 static const struct address_space_operations ext4_da_aops = {
 	.readpage		= ext4_readpage,
 	.readpages		= ext4_readpages,
-	.writepage		= ext4_da_writepage,
+	.writepage		= ext4_writepage,
 	.writepages		= ext4_da_writepages,
 	.sync_page		= block_sync_page,
 	.write_begin		= ext4_da_write_begin,
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b456fb0a3c57..dfbc9b0edc88 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -190,7 +190,7 @@ TRACE_EVENT(ext4_journalled_write_end,
 		  __entry->copied)
 );
 
-TRACE_EVENT(ext4_da_writepage,
+TRACE_EVENT(ext4_writepage,
 	TP_PROTO(struct inode *inode, struct page *page),
 
 	TP_ARGS(inode, page),
@@ -342,49 +342,6 @@ TRACE_EVENT(ext4_da_write_end,
 		  __entry->copied)
 );
 
-TRACE_EVENT(ext4_normal_writepage,
-	TP_PROTO(struct inode *inode, struct page *page),
-
-	TP_ARGS(inode, page),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	pgoff_t, index			)
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->index	= page->index;
-	),
-
-	TP_printk("dev %s ino %lu page_index %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
-);
-
-TRACE_EVENT(ext4_journalled_writepage,
-	TP_PROTO(struct inode *inode, struct page *page),
-
-	TP_ARGS(inode, page),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	pgoff_t, index			)
-
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->index	= page->index;
-	),
-
-	TP_printk("dev %s ino %lu page_index %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
-);
-
 TRACE_EVENT(ext4_discard_blocks,
 	TP_PROTO(struct super_block *sb, unsigned long long blk,
 			unsigned long long count),
-- 
cgit v1.2.3


From ca94c442535a44d508c99a77e54f21a59f4fc462 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Mon, 15 Jun 2009 17:17:47 +0200
Subject: sched: Introduce SCHED_RESET_ON_FORK scheduling policy flag

This patch introduces a new flag SCHED_RESET_ON_FORK which can be passed
to the kernel via sched_setscheduler(), ORed in the policy parameter. If
set this will make sure that when the process forks a) the scheduling
priority is reset to DEFAULT_PRIO if it was higher and b) the scheduling
policy is reset to SCHED_NORMAL if it was either SCHED_FIFO or SCHED_RR.

Why have this?

Currently, if a process is real-time scheduled this will 'leak' to all
its child processes. For security reasons it is often (always?) a good
idea to make sure that if a process acquires RT scheduling this is
confined to this process and only this process. More specifically this
makes the per-process resource limit RLIMIT_RTTIME useful for security
purposes, because it makes it impossible to use a fork bomb to
circumvent the per-process RLIMIT_RTTIME accounting.

This feature is also useful for tools like 'renice' which can then
change the nice level of a process without having this spill to all its
child processes.

Why expose this via sched_setscheduler() and not other syscalls such as
prctl() or sched_setparam()?

prctl() does not take a pid parameter. Due to that it would be
impossible to modify this flag for other processes than the current one.

The struct passed to sched_setparam() can unfortunately not be extended
without breaking compatibility, since sched_setparam() lacks a size
parameter.

How to use this from userspace? In your RT program simply replace this:

  sched_setscheduler(pid, SCHED_FIFO, &param);

by this:

  sched_setscheduler(pid, SCHED_FIFO|SCHED_RESET_ON_FORK, &param);

Signed-off-by: Lennart Poettering <lennart@poettering.net>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090615152714.GA29092@tango.0pointer.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  6 ++++++
 kernel/sched.c        | 49 ++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 46 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4896fdfec913..d4a2c6662f7d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -38,6 +38,8 @@
 #define SCHED_BATCH		3
 /* SCHED_ISO: reserved but not implemented yet */
 #define SCHED_IDLE		5
+/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+#define SCHED_RESET_ON_FORK     0x40000000
 
 #ifdef __KERNEL__
 
@@ -1209,6 +1211,10 @@ struct task_struct {
 	unsigned did_exec:1;
 	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
 				 * execve */
+
+	/* Revert to default priority/policy when forking */
+	unsigned sched_reset_on_fork:1;
+
 	pid_t pid;
 	pid_t tgid;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 8ec9d13140be..32e6ede85255 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2613,12 +2613,28 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	set_task_cpu(p, cpu);
 
 	/*
-	 * Make sure we do not leak PI boosting priority to the child:
+	 * Revert to default priority/policy on fork if requested. Make sure we
+	 * do not leak PI boosting priority to the child.
 	 */
-	p->prio = current->normal_prio;
+	if (current->sched_reset_on_fork &&
+			(p->policy == SCHED_FIFO || p->policy == SCHED_RR))
+		p->policy = SCHED_NORMAL;
+
+	if (current->sched_reset_on_fork &&
+			(current->normal_prio < DEFAULT_PRIO))
+		p->prio = DEFAULT_PRIO;
+	else
+		p->prio = current->normal_prio;
+
 	if (!rt_prio(p->prio))
 		p->sched_class = &fair_sched_class;
 
+	/*
+	 * We don't need the reset flag anymore after the fork. It has
+	 * fulfilled its duty:
+	 */
+	p->sched_reset_on_fork = 0;
+
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -6094,17 +6110,25 @@ static int __sched_setscheduler(struct task_struct *p, int policy,
 	unsigned long flags;
 	const struct sched_class *prev_class = p->sched_class;
 	struct rq *rq;
+	int reset_on_fork;
 
 	/* may grab non-irq protected spin_locks */
 	BUG_ON(in_interrupt());
 recheck:
 	/* double check policy once rq lock held */
-	if (policy < 0)
+	if (policy < 0) {
+		reset_on_fork = p->sched_reset_on_fork;
 		policy = oldpolicy = p->policy;
-	else if (policy != SCHED_FIFO && policy != SCHED_RR &&
-			policy != SCHED_NORMAL && policy != SCHED_BATCH &&
-			policy != SCHED_IDLE)
-		return -EINVAL;
+	} else {
+		reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
+		policy &= ~SCHED_RESET_ON_FORK;
+
+		if (policy != SCHED_FIFO && policy != SCHED_RR &&
+				policy != SCHED_NORMAL && policy != SCHED_BATCH &&
+				policy != SCHED_IDLE)
+			return -EINVAL;
+	}
+
 	/*
 	 * Valid priorities for SCHED_FIFO and SCHED_RR are
 	 * 1..MAX_USER_RT_PRIO-1, valid priority for SCHED_NORMAL,
@@ -6148,6 +6172,10 @@ recheck:
 		/* can't change other user's priorities */
 		if (!check_same_owner(p))
 			return -EPERM;
+
+		/* Normal users shall not reset the sched_reset_on_fork flag */
+		if (p->sched_reset_on_fork && !reset_on_fork)
+			return -EPERM;
 	}
 
 	if (user) {
@@ -6191,6 +6219,8 @@ recheck:
 	if (running)
 		p->sched_class->put_prev_task(rq, p);
 
+	p->sched_reset_on_fork = reset_on_fork;
+
 	oldprio = p->prio;
 	__setscheduler(rq, p, policy, param->sched_priority);
 
@@ -6307,14 +6337,15 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
 	if (p) {
 		retval = security_task_getscheduler(p);
 		if (!retval)
-			retval = p->policy;
+			retval = p->policy
+				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
 	}
 	read_unlock(&tasklist_lock);
 	return retval;
 }
 
 /**
- * sys_sched_getscheduler - get the RT priority of a thread
+ * sys_sched_getparam - get the RT priority of a thread
  * @pid: the pid in question.
  * @param: structure containing the RT priority.
  */
-- 
cgit v1.2.3


From 085f30654175a91c28d2b66b9ea6cceab627fed0 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 16 Jun 2009 13:57:07 +0200
Subject: ALSA: Add new TLV types for dBwith min/max

Add new types for TLV dB scale specified with min/max values instead
of min/step since the resolution can't match always with the one
a device provides.  For example, usb audio devices give 1/256 dB
resolution while ALSA TLV is based on 1/100 dB resolution.
The new min/max types have less problems because the possible
rounding error happens only at min/max.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/tlv.h  | 14 ++++++++++++++
 sound/core/vmaster.c |  8 ++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/sound/tlv.h b/include/sound/tlv.h
index d136ea2181ed..9fd5b19ccf5c 100644
--- a/include/sound/tlv.h
+++ b/include/sound/tlv.h
@@ -35,6 +35,8 @@
 #define SNDRV_CTL_TLVT_DB_SCALE	1       /* dB scale */
 #define SNDRV_CTL_TLVT_DB_LINEAR 2	/* linear volume */
 #define SNDRV_CTL_TLVT_DB_RANGE 3	/* dB range container */
+#define SNDRV_CTL_TLVT_DB_MINMAX 4	/* dB scale with min/max */
+#define SNDRV_CTL_TLVT_DB_MINMAX_MUTE 5	/* dB scale with min/max with mute */
 
 #define TLV_DB_SCALE_ITEM(min, step, mute)			\
 	SNDRV_CTL_TLVT_DB_SCALE, 2 * sizeof(unsigned int),	\
@@ -42,6 +44,18 @@
 #define DECLARE_TLV_DB_SCALE(name, min, step, mute) \
 	unsigned int name[] = { TLV_DB_SCALE_ITEM(min, step, mute) }
 
+/* dB scale specified with min/max values instead of step */
+#define TLV_DB_MINMAX_ITEM(min_dB, max_dB)			\
+	SNDRV_CTL_TLVT_DB_MINMAX, 2 * sizeof(unsigned int),	\
+	(min_dB), (max_dB)
+#define TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB)			\
+	SNDRV_CTL_TLVT_DB_MINMAX_MUTE, 2 * sizeof(unsigned int),	\
+	(min_dB), (max_dB)
+#define DECLARE_TLV_DB_MINMAX(name, min_dB, max_dB) \
+	unsigned int name[] = { TLV_DB_MINMAX_ITEM(min_dB, max_dB) }
+#define DECLARE_TLV_DB_MINMAX_MUTE(name, min_dB, max_dB) \
+	unsigned int name[] = { TLV_DB_MINMAX_MUTE_ITEM(min_dB, max_dB) }
+
 /* linear volume between min_dB and max_dB (.01dB unit) */
 #define TLV_DB_LINEAR_ITEM(min_dB, max_dB)		    \
 	SNDRV_CTL_TLVT_DB_LINEAR, 2 * sizeof(unsigned int), \
diff --git a/sound/core/vmaster.c b/sound/core/vmaster.c
index 257624bd1997..3b9b550109cb 100644
--- a/sound/core/vmaster.c
+++ b/sound/core/vmaster.c
@@ -353,7 +353,8 @@ static void master_free(struct snd_kcontrol *kcontrol)
  *
  * The optional argument @tlv can be used to specify the TLV information
  * for dB scale of the master control.  It should be a single element
- * with #SNDRV_CTL_TLVT_DB_SCALE type, and should be the max 0dB.
+ * with #SNDRV_CTL_TLVT_DB_SCALE, #SNDRV_CTL_TLV_DB_MINMAX or
+ * #SNDRV_CTL_TLVT_DB_MINMAX_MUTE type, and should be the max 0dB.
  */
 struct snd_kcontrol *snd_ctl_make_virtual_master(char *name,
 						 const unsigned int *tlv)
@@ -384,7 +385,10 @@ struct snd_kcontrol *snd_ctl_make_virtual_master(char *name,
 	kctl->private_free = master_free;
 
 	/* additional (constant) TLV read */
-	if (tlv && tlv[0] == SNDRV_CTL_TLVT_DB_SCALE) {
+	if (tlv &&
+	    (tlv[0] == SNDRV_CTL_TLVT_DB_SCALE ||
+	     tlv[0] == SNDRV_CTL_TLVT_DB_MINMAX ||
+	     tlv[0] == SNDRV_CTL_TLVT_DB_MINMAX_MUTE)) {
 		kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ;
 		memcpy(master->tlv, tlv, sizeof(master->tlv));
 		kctl->tlv.p = master->tlv;
-- 
cgit v1.2.3


From 5b739ef8a4e8cf5201d21abff897e292c232477b Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 18 Jun 2009 19:50:21 +0800
Subject: random: Add optional continuous repetition test to entropy store
 based rngs

FIPS-140 requires that all random number generators implement continuous self
tests in which each extracted block of data is compared against the last block
for repetition.  The ansi_cprng implements such a test, but it would be nice if
the hw rng's did the same thing.  Obviously its not something thats always
needed, but it seems like it would be a nice feature to have on occasion. I've
written the below patch which allows individual entropy stores to be flagged as
desiring a continuous test to be run on them as is extracted.  By default this
option is off, but is enabled in the event that fips mode is selected during
bootup.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/internal.h     |  7 +------
 drivers/char/random.c | 14 ++++++++++++++
 include/linux/fips.h  | 10 ++++++++++
 3 files changed, 25 insertions(+), 6 deletions(-)
 create mode 100644 include/linux/fips.h

(limited to 'include')

diff --git a/crypto/internal.h b/crypto/internal.h
index 113579a82dff..95baaea21fbc 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -25,12 +25,7 @@
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
-
-#ifdef CONFIG_CRYPTO_FIPS
-extern int fips_enabled;
-#else
-#define fips_enabled 0
-#endif
+#include <linux/fips.h>
 
 /* Crypto notification events. */
 enum {
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 8c7444857a4b..d8a9255e1a3f 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -240,6 +240,7 @@
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
+#include <linux/fips.h>
 
 #ifdef CONFIG_GENERIC_HARDIRQS
 # include <linux/irq.h>
@@ -413,6 +414,7 @@ struct entropy_store {
 	unsigned add_ptr;
 	int entropy_count;
 	int input_rotate;
+	__u8 *last_data;
 };
 
 static __u32 input_pool_data[INPUT_POOL_WORDS];
@@ -852,12 +854,21 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 {
 	ssize_t ret = 0, i;
 	__u8 tmp[EXTRACT_SIZE];
+	unsigned long flags;
 
 	xfer_secondary_pool(r, nbytes);
 	nbytes = account(r, nbytes, min, reserved);
 
 	while (nbytes) {
 		extract_buf(r, tmp);
+
+		if (r->last_data) {
+			spin_lock_irqsave(&r->lock, flags);
+			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
+				panic("Hardware RNG duplicated output!\n");
+			memcpy(r->last_data, tmp, EXTRACT_SIZE);
+			spin_unlock_irqrestore(&r->lock, flags);
+		}
 		i = min_t(int, nbytes, EXTRACT_SIZE);
 		memcpy(buf, tmp, i);
 		nbytes -= i;
@@ -940,6 +951,9 @@ static void init_std_data(struct entropy_store *r)
 	now = ktime_get_real();
 	mix_pool_bytes(r, &now, sizeof(now));
 	mix_pool_bytes(r, utsname(), sizeof(*(utsname())));
+	/* Enable continuous test in fips mode */
+	if (fips_enabled)
+		r->last_data = kmalloc(EXTRACT_SIZE, GFP_KERNEL);
 }
 
 static int rand_initialize(void)
diff --git a/include/linux/fips.h b/include/linux/fips.h
new file mode 100644
index 000000000000..f8fb07b0b6b8
--- /dev/null
+++ b/include/linux/fips.h
@@ -0,0 +1,10 @@
+#ifndef _FIPS_H
+#define _FIPS_H
+
+#ifdef CONFIG_CRYPTO_FIPS
+extern int fips_enabled;
+#else
+#define fips_enabled 0
+#endif
+
+#endif
-- 
cgit v1.2.3


From f29ac756a40d0f1bb07d682ea521e7b666ff06d5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 19 Jun 2009 18:27:26 +0200
Subject: perf_counter: Optimize perf_swcounter_event()

Similar to tracepoints, use an enable variable to reduce
overhead when unused.

Only look for a counter of a particular event type when we know
there is at least one in the system.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 11 ++++++++++-
 kernel/perf_counter.c        | 18 +++++++++++++++---
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 89698d8aba5c..e7213e46cf9c 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -669,7 +669,16 @@ static inline int is_software_counter(struct perf_counter *counter)
 		(counter->attr.type != PERF_TYPE_HW_CACHE);
 }
 
-extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
+extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
+
+extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
+
+static inline void
+perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+{
+	if (atomic_read(&perf_swcounter_enabled[event]))
+		__perf_swcounter_event(event, nr, nmi, regs, addr);
+}
 
 extern void __perf_counter_mmap(struct vm_area_struct *vma);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1a933a221ea4..7515c7695428 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3317,8 +3317,8 @@ out:
 	put_cpu_var(perf_cpu_context);
 }
 
-void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+void __perf_swcounter_event(u32 event, u64 nr, int nmi,
+			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data = {
 		.regs = regs,
@@ -3509,9 +3509,19 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 }
 #endif
 
+atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
+
+static void sw_perf_counter_destroy(struct perf_counter *counter)
+{
+	u64 event = counter->attr.config;
+
+	atomic_dec(&perf_swcounter_enabled[event]);
+}
+
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
 	const struct pmu *pmu = NULL;
+	u64 event = counter->attr.config;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -3520,7 +3530,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	 * to be kernel events, and page faults are never hypervisor
 	 * events.
 	 */
-	switch (counter->attr.config) {
+	switch (event) {
 	case PERF_COUNT_SW_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
@@ -3541,6 +3551,8 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
 	case PERF_COUNT_SW_CONTEXT_SWITCHES:
 	case PERF_COUNT_SW_CPU_MIGRATIONS:
+		atomic_inc(&perf_swcounter_enabled[event]);
+		counter->destroy = sw_perf_counter_destroy;
 		pmu = &perf_ops_generic;
 		break;
 	}
-- 
cgit v1.2.3


From 92722b1bb1ebcba767f9c6ee499992ee33367268 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 11 Jun 2009 14:17:48 +0100
Subject: leds: Further document parameters for blink_set()

The documentation for the parameters of blink_set() was a bit hard
to find so put some where I'd expected to find it.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 376fe07732ea..c7f0b148df06 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -45,7 +45,9 @@ struct led_classdev {
 	/* Get LED brightness level */
 	enum led_brightness (*brightness_get)(struct led_classdev *led_cdev);
 
-	/* Activate hardware accelerated blink */
+	/* Activate hardware accelerated blink, delays are in
+	 * miliseconds and if none is provided then a sensible default
+	 * should be chosen. */
 	int		(*blink_set)(struct led_classdev *led_cdev,
 				     unsigned long *delay_on,
 				     unsigned long *delay_off);
-- 
cgit v1.2.3


From 5054d39e327f76df022163a2ebd02e444c5d65f9 Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ospite@studenti.unina.it>
Date: Fri, 19 Jun 2009 13:55:42 +0200
Subject: leds: LED driver for National Semiconductor LP3944 Funlight Chip

LEDs driver for National Semiconductor LP3944 Funlight Chip
http://www.national.com/pf/LP/LP3944.html

This helper chip can drive up to 8 leds, with two programmable DIM
modes; it could even be used as a gpio expander but this driver assumes
it is used as a led controller.

The DIM modes are used to set _blink_ patterns for leds, the pattern is
specified supplying two parameters:
  - period: from 0s to 1.6s
  - duty cycle: percentage of the period the led is on, from 0 to 100

LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
leds, the camera flash light and the displays backlights.

Signed-off-by: Antonio Ospite <ospite@studenti.unina.it>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 Documentation/leds-lp3944.txt |  50 +++++
 drivers/leds/Kconfig          |  11 +
 drivers/leds/Makefile         |   1 +
 drivers/leds/leds-lp3944.c    | 466 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/leds-lp3944.h   |  53 +++++
 5 files changed, 581 insertions(+)
 create mode 100644 Documentation/leds-lp3944.txt
 create mode 100644 drivers/leds/leds-lp3944.c
 create mode 100644 include/linux/leds-lp3944.h

(limited to 'include')

diff --git a/Documentation/leds-lp3944.txt b/Documentation/leds-lp3944.txt
new file mode 100644
index 000000000000..c6eda18b15ef
--- /dev/null
+++ b/Documentation/leds-lp3944.txt
@@ -0,0 +1,50 @@
+Kernel driver lp3944
+====================
+
+  * National Semiconductor LP3944 Fun-light Chip
+    Prefix: 'lp3944'
+    Addresses scanned: None (see the Notes section below)
+    Datasheet: Publicly available at the National Semiconductor website
+               http://www.national.com/pf/LP/LP3944.html
+
+Authors:
+        Antonio Ospite <ospite@studenti.unina.it>
+
+
+Description
+-----------
+The LP3944 is a helper chip that can drive up to 8 leds, with two programmable
+DIM modes; it could even be used as a gpio expander but this driver assumes it
+is used as a led controller.
+
+The DIM modes are used to set _blink_ patterns for leds, the pattern is
+specified supplying two parameters:
+  - period: from 0s to 1.6s
+  - duty cycle: percentage of the period the led is on, from 0 to 100
+
+Setting a led in DIM0 or DIM1 mode makes it blink according to the pattern.
+See the datasheet for details.
+
+LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
+leds, the camera flash light and the lcds power.
+
+
+Notes
+-----
+The chip is used mainly in embedded contexts, so this driver expects it is
+registered using the i2c_board_info mechanism.
+
+To register the chip at address 0x60 on adapter 0, set the platform data
+according to include/linux/leds-lp3944.h, set the i2c board info:
+
+	static struct i2c_board_info __initdata a910_i2c_board_info[] = {
+		{
+			I2C_BOARD_INFO("lp3944", 0x60),
+			.platform_data = &a910_lp3944_leds,
+		},
+	};
+
+and register it in the platform init function
+
+	i2c_register_board_info(0, a910_i2c_board_info,
+			ARRAY_SIZE(a910_i2c_board_info));
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index cfcd6bf831c9..7c8e7122aaa9 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -146,6 +146,17 @@ config LEDS_GPIO_OF
 	  of_platform devices.  For instance, LEDs which are listed in a "dts"
 	  file.
 
+config LEDS_LP3944
+	tristate "LED Support for N.S. LP3944 (Fun Light) I2C chip"
+	depends on LEDS_CLASS && I2C
+	help
+    This option enables support for LEDs connected to the National
+    Semiconductor LP3944 Lighting Management Unit (LMU) also known as
+    Fun Light Chip.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called leds-lp3944.
+
 config LEDS_CLEVO_MAIL
 	tristate "Mail LED on Clevo notebook"
 	depends on LEDS_CLASS && X86 && SERIO_I8042 && DMI
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 2d41c4dcf92f..e8cdcf77a4c3 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_LEDS_COBALT_RAQ)		+= leds-cobalt-raq.o
 obj-$(CONFIG_LEDS_SUNFIRE)		+= leds-sunfire.o
 obj-$(CONFIG_LEDS_PCA9532)		+= leds-pca9532.o
 obj-$(CONFIG_LEDS_GPIO)			+= leds-gpio.o
+obj-$(CONFIG_LEDS_LP3944)		+= leds-lp3944.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)		+= leds-clevo-mail.o
 obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
diff --git a/drivers/leds/leds-lp3944.c b/drivers/leds/leds-lp3944.c
new file mode 100644
index 000000000000..5946208ba26e
--- /dev/null
+++ b/drivers/leds/leds-lp3944.c
@@ -0,0 +1,466 @@
+/*
+ * leds-lp3944.c - driver for National Semiconductor LP3944 Funlight Chip
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+/*
+ * I2C driver for National Semiconductor LP3944 Funlight Chip
+ * http://www.national.com/pf/LP/LP3944.html
+ *
+ * This helper chip can drive up to 8 leds, with two programmable DIM modes;
+ * it could even be used as a gpio expander but this driver assumes it is used
+ * as a led controller.
+ *
+ * The DIM modes are used to set _blink_ patterns for leds, the pattern is
+ * specified supplying two parameters:
+ *   - period: from 0s to 1.6s
+ *   - duty cycle: percentage of the period the led is on, from 0 to 100
+ *
+ * LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
+ * leds, the camera flash light and the displays backlights.
+ */
+
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/leds.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/leds-lp3944.h>
+
+/* Read Only Registers */
+#define LP3944_REG_INPUT1     0x00 /* LEDs 0-7 InputRegister (Read Only) */
+#define LP3944_REG_REGISTER1  0x01 /* None (Read Only) */
+
+#define LP3944_REG_PSC0       0x02 /* Frequency Prescaler 0 (R/W) */
+#define LP3944_REG_PWM0       0x03 /* PWM Register 0 (R/W) */
+#define LP3944_REG_PSC1       0x04 /* Frequency Prescaler 1 (R/W) */
+#define LP3944_REG_PWM1       0x05 /* PWM Register 1 (R/W) */
+#define LP3944_REG_LS0        0x06 /* LEDs 0-3 Selector (R/W) */
+#define LP3944_REG_LS1        0x07 /* LEDs 4-7 Selector (R/W) */
+
+/* These registers are not used to control leds in LP3944, they can store
+ * arbitrary values which the chip will ignore.
+ */
+#define LP3944_REG_REGISTER8  0x08
+#define LP3944_REG_REGISTER9  0x09
+
+#define LP3944_DIM0 0
+#define LP3944_DIM1 1
+
+/* period in ms */
+#define LP3944_PERIOD_MIN 0
+#define LP3944_PERIOD_MAX 1600
+
+/* duty cycle is a percentage */
+#define LP3944_DUTY_CYCLE_MIN 0
+#define LP3944_DUTY_CYCLE_MAX 100
+
+#define ldev_to_led(c)       container_of(c, struct lp3944_led_data, ldev)
+
+/* Saved data */
+struct lp3944_led_data {
+	u8 id;
+	enum lp3944_type type;
+	enum lp3944_status status;
+	struct led_classdev ldev;
+	struct i2c_client *client;
+	struct work_struct work;
+};
+
+struct lp3944_data {
+	struct mutex lock;
+	struct i2c_client *client;
+	struct lp3944_led_data leds[LP3944_LEDS_MAX];
+};
+
+static int lp3944_reg_read(struct i2c_client *client, u8 reg, u8 *value)
+{
+	int tmp;
+
+	tmp = i2c_smbus_read_byte_data(client, reg);
+	if (tmp < 0)
+		return -EINVAL;
+
+	*value = tmp;
+
+	return 0;
+}
+
+static int lp3944_reg_write(struct i2c_client *client, u8 reg, u8 value)
+{
+	return i2c_smbus_write_byte_data(client, reg, value);
+}
+
+/**
+ * Set the period for DIM status
+ *
+ * @client: the i2c client
+ * @dim: either LP3944_DIM0 or LP3944_DIM1
+ * @period: period of a blink, that is a on/off cycle, expressed in ms.
+ */
+static int lp3944_dim_set_period(struct i2c_client *client, u8 dim, u16 period)
+{
+	u8 psc_reg;
+	u8 psc_value;
+	int err;
+
+	if (dim == LP3944_DIM0)
+		psc_reg = LP3944_REG_PSC0;
+	else if (dim == LP3944_DIM1)
+		psc_reg = LP3944_REG_PSC1;
+	else
+		return -EINVAL;
+
+	/* Convert period to Prescaler value */
+	if (period > LP3944_PERIOD_MAX)
+		return -EINVAL;
+
+	psc_value = (period * 255) / LP3944_PERIOD_MAX;
+
+	err = lp3944_reg_write(client, psc_reg, psc_value);
+
+	return err;
+}
+
+/**
+ * Set the duty cycle for DIM status
+ *
+ * @client: the i2c client
+ * @dim: either LP3944_DIM0 or LP3944_DIM1
+ * @duty_cycle: percentage of a period during which a led is ON
+ */
+static int lp3944_dim_set_dutycycle(struct i2c_client *client, u8 dim,
+				    u8 duty_cycle)
+{
+	u8 pwm_reg;
+	u8 pwm_value;
+	int err;
+
+	if (dim == LP3944_DIM0)
+		pwm_reg = LP3944_REG_PWM0;
+	else if (dim == LP3944_DIM1)
+		pwm_reg = LP3944_REG_PWM1;
+	else
+		return -EINVAL;
+
+	/* Convert duty cycle to PWM value */
+	if (duty_cycle > LP3944_DUTY_CYCLE_MAX)
+		return -EINVAL;
+
+	pwm_value = (duty_cycle * 255) / LP3944_DUTY_CYCLE_MAX;
+
+	err = lp3944_reg_write(client, pwm_reg, pwm_value);
+
+	return err;
+}
+
+/**
+ * Set the led status
+ *
+ * @led: a lp3944_led_data structure
+ * @status: one of LP3944_LED_STATUS_OFF
+ *                 LP3944_LED_STATUS_ON
+ *                 LP3944_LED_STATUS_DIM0
+ *                 LP3944_LED_STATUS_DIM1
+ */
+static int lp3944_led_set(struct lp3944_led_data *led, u8 status)
+{
+	struct lp3944_data *data = i2c_get_clientdata(led->client);
+	u8 id = led->id;
+	u8 reg;
+	u8 val = 0;
+	int err;
+
+	dev_dbg(&led->client->dev, "%s: %s, status before normalization:%d\n",
+		__func__, led->ldev.name, status);
+
+	switch (id) {
+	case LP3944_LED0:
+	case LP3944_LED1:
+	case LP3944_LED2:
+	case LP3944_LED3:
+		reg = LP3944_REG_LS0;
+		break;
+	case LP3944_LED4:
+	case LP3944_LED5:
+	case LP3944_LED6:
+	case LP3944_LED7:
+		id -= LP3944_LED4;
+		reg = LP3944_REG_LS1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (status > LP3944_LED_STATUS_DIM1)
+		return -EINVAL;
+
+	/* invert only 0 and 1, leave unchanged the other values,
+	 * remember we are abusing status to set blink patterns
+	 */
+	if (led->type == LP3944_LED_TYPE_LED_INVERTED && status < 2)
+		status = 1 - status;
+
+	mutex_lock(&data->lock);
+	lp3944_reg_read(led->client, reg, &val);
+
+	val &= ~(LP3944_LED_STATUS_MASK << (id << 1));
+	val |= (status << (id << 1));
+
+	dev_dbg(&led->client->dev, "%s: %s, reg:%d id:%d status:%d val:%#x\n",
+		__func__, led->ldev.name, reg, id, status, val);
+
+	/* set led status */
+	err = lp3944_reg_write(led->client, reg, val);
+	mutex_unlock(&data->lock);
+
+	return err;
+}
+
+static int lp3944_led_set_blink(struct led_classdev *led_cdev,
+				unsigned long *delay_on,
+				unsigned long *delay_off)
+{
+	struct lp3944_led_data *led = ldev_to_led(led_cdev);
+	u16 period;
+	u8 duty_cycle;
+	int err;
+
+	/* units are in ms */
+	if (*delay_on + *delay_off > LP3944_PERIOD_MAX)
+		return -EINVAL;
+
+	if (*delay_on == 0 && *delay_off == 0) {
+		/* Special case: the leds subsystem requires a default user
+		 * friendly blink pattern for the LED.  Let's blink the led
+		 * slowly (1Hz).
+		 */
+		*delay_on = 500;
+		*delay_off = 500;
+	}
+
+	period = (*delay_on) + (*delay_off);
+
+	/* duty_cycle is the percentage of period during which the led is ON */
+	duty_cycle = 100 * (*delay_on) / period;
+
+	/* invert duty cycle for inverted leds, this has the same effect of
+	 * swapping delay_on and delay_off
+	 */
+	if (led->type == LP3944_LED_TYPE_LED_INVERTED)
+		duty_cycle = 100 - duty_cycle;
+
+	/* NOTE: using always the first DIM mode, this means that all leds
+	 * will have the same blinking pattern.
+	 *
+	 * We could find a way later to have two leds blinking in hardware
+	 * with different patterns at the same time, falling back to software
+	 * control for the other ones.
+	 */
+	err = lp3944_dim_set_period(led->client, LP3944_DIM0, period);
+	if (err)
+		return err;
+
+	err = lp3944_dim_set_dutycycle(led->client, LP3944_DIM0, duty_cycle);
+	if (err)
+		return err;
+
+	dev_dbg(&led->client->dev, "%s: OK hardware accelerated blink!\n",
+		__func__);
+
+	led->status = LP3944_LED_STATUS_DIM0;
+	schedule_work(&led->work);
+
+	return 0;
+}
+
+static void lp3944_led_set_brightness(struct led_classdev *led_cdev,
+				      enum led_brightness brightness)
+{
+	struct lp3944_led_data *led = ldev_to_led(led_cdev);
+
+	dev_dbg(&led->client->dev, "%s: %s, %d\n",
+		__func__, led_cdev->name, brightness);
+
+	led->status = brightness;
+	schedule_work(&led->work);
+}
+
+static void lp3944_led_work(struct work_struct *work)
+{
+	struct lp3944_led_data *led;
+
+	led = container_of(work, struct lp3944_led_data, work);
+	lp3944_led_set(led, led->status);
+}
+
+static int lp3944_configure(struct i2c_client *client,
+			    struct lp3944_data *data,
+			    struct lp3944_platform_data *pdata)
+{
+	int i, err = 0;
+
+	for (i = 0; i < pdata->leds_size; i++) {
+		struct lp3944_led *pled = &pdata->leds[i];
+		struct lp3944_led_data *led = &data->leds[i];
+		led->client = client;
+		led->id = i;
+
+		switch (pled->type) {
+
+		case LP3944_LED_TYPE_LED:
+		case LP3944_LED_TYPE_LED_INVERTED:
+			led->type = pled->type;
+			led->status = pled->status;
+			led->ldev.name = pled->name;
+			led->ldev.max_brightness = 1;
+			led->ldev.brightness_set = lp3944_led_set_brightness;
+			led->ldev.blink_set = lp3944_led_set_blink;
+			led->ldev.flags = LED_CORE_SUSPENDRESUME;
+
+			INIT_WORK(&led->work, lp3944_led_work);
+			err = led_classdev_register(&client->dev, &led->ldev);
+			if (err < 0) {
+				dev_err(&client->dev,
+					"couldn't register LED %s\n",
+					led->ldev.name);
+				goto exit;
+			}
+
+			/* to expose the default value to userspace */
+			led->ldev.brightness = led->status;
+
+			/* Set the default led status */
+			err = lp3944_led_set(led, led->status);
+			if (err < 0) {
+				dev_err(&client->dev,
+					"%s couldn't set STATUS %d\n",
+					led->ldev.name, led->status);
+				goto exit;
+			}
+			break;
+
+		case LP3944_LED_TYPE_NONE:
+		default:
+			break;
+
+		}
+	}
+	return 0;
+
+exit:
+	if (i > 0)
+		for (i = i - 1; i >= 0; i--)
+			switch (pdata->leds[i].type) {
+
+			case LP3944_LED_TYPE_LED:
+			case LP3944_LED_TYPE_LED_INVERTED:
+				led_classdev_unregister(&data->leds[i].ldev);
+				cancel_work_sync(&data->leds[i].work);
+				break;
+
+			case LP3944_LED_TYPE_NONE:
+			default:
+				break;
+			}
+
+	return err;
+}
+
+static int __devinit lp3944_probe(struct i2c_client *client,
+				  const struct i2c_device_id *id)
+{
+	struct lp3944_platform_data *lp3944_pdata = client->dev.platform_data;
+	struct lp3944_data *data;
+
+	if (lp3944_pdata == NULL) {
+		dev_err(&client->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	/* Let's see whether this adapter can support what we need. */
+	if (!i2c_check_functionality(client->adapter,
+				I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "insufficient functionality!\n");
+		return -ENODEV;
+	}
+
+	data = kzalloc(sizeof(struct lp3944_data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->client = client;
+	i2c_set_clientdata(client, data);
+
+	mutex_init(&data->lock);
+
+	dev_info(&client->dev, "lp3944 enabled\n");
+
+	lp3944_configure(client, data, lp3944_pdata);
+	return 0;
+}
+
+static int __devexit lp3944_remove(struct i2c_client *client)
+{
+	struct lp3944_platform_data *pdata = client->dev.platform_data;
+	struct lp3944_data *data = i2c_get_clientdata(client);
+	int i;
+
+	for (i = 0; i < pdata->leds_size; i++)
+		switch (data->leds[i].type) {
+		case LP3944_LED_TYPE_LED:
+		case LP3944_LED_TYPE_LED_INVERTED:
+			led_classdev_unregister(&data->leds[i].ldev);
+			cancel_work_sync(&data->leds[i].work);
+			break;
+
+		case LP3944_LED_TYPE_NONE:
+		default:
+			break;
+		}
+
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+/* lp3944 i2c driver struct */
+static const struct i2c_device_id lp3944_id[] = {
+	{"lp3944", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, lp3944_id);
+
+static struct i2c_driver lp3944_driver = {
+	.driver   = {
+		   .name = "lp3944",
+	},
+	.probe    = lp3944_probe,
+	.remove   = __devexit_p(lp3944_remove),
+	.id_table = lp3944_id,
+};
+
+static int __init lp3944_module_init(void)
+{
+	return i2c_add_driver(&lp3944_driver);
+}
+
+static void __exit lp3944_module_exit(void)
+{
+	i2c_del_driver(&lp3944_driver);
+}
+
+module_init(lp3944_module_init);
+module_exit(lp3944_module_exit);
+
+MODULE_AUTHOR("Antonio Ospite <ospite@studenti.unina.it>");
+MODULE_DESCRIPTION("LP3944 Fun Light Chip");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/leds-lp3944.h b/include/linux/leds-lp3944.h
new file mode 100644
index 000000000000..afc9f9fd70f5
--- /dev/null
+++ b/include/linux/leds-lp3944.h
@@ -0,0 +1,53 @@
+/*
+ * leds-lp3944.h - platform data structure for lp3944 led controller
+ *
+ * Copyright (C) 2009 Antonio Ospite <ospite@studenti.unina.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __LINUX_LEDS_LP3944_H
+#define __LINUX_LEDS_LP3944_H
+
+#include <linux/leds.h>
+#include <linux/workqueue.h>
+
+#define LP3944_LED0 0
+#define LP3944_LED1 1
+#define LP3944_LED2 2
+#define LP3944_LED3 3
+#define LP3944_LED4 4
+#define LP3944_LED5 5
+#define LP3944_LED6 6
+#define LP3944_LED7 7
+#define LP3944_LEDS_MAX 8
+
+#define LP3944_LED_STATUS_MASK	0x03
+enum lp3944_status {
+	LP3944_LED_STATUS_OFF  = 0x0,
+	LP3944_LED_STATUS_ON   = 0x1,
+	LP3944_LED_STATUS_DIM0 = 0x2,
+	LP3944_LED_STATUS_DIM1 = 0x3
+};
+
+enum lp3944_type {
+	LP3944_LED_TYPE_NONE,
+	LP3944_LED_TYPE_LED,
+	LP3944_LED_TYPE_LED_INVERTED,
+};
+
+struct lp3944_led {
+	char *name;
+	enum lp3944_type type;
+	enum lp3944_status status;
+};
+
+struct lp3944_platform_data {
+	struct lp3944_led leds[LP3944_LEDS_MAX];
+	u8 leds_size;
+};
+
+#endif /* __LINUX_LEDS_LP3944_H */
-- 
cgit v1.2.3


From ed88bae6918fa990cbfe47316bd0f790121aaf00 Mon Sep 17 00:00:00 2001
From: Trent Piepho <xyzzy@speakeasy.org>
Date: Tue, 12 May 2009 15:33:12 -0700
Subject: leds: Add options to have GPIO LEDs start on or keep their state

There already is a "default-on" trigger but there are problems with it.

For one, it's a inefficient way to do it and requires led trigger support
to be compiled in.

But the real reason is that is produces a glitch on the LED.  The GPIO is
allocate with the LED *off*, then *later* when the trigger runs it is
turned back on.  If the LED was already on via the GPIO's reset default or
action of the firmware, this produces a glitch where the LED goes from on
to off to on.  While normally this is fast enough that it wouldn't be
noticeable to a human observer, there are still serious problems.

One is that there may be something else on the GPIO line, like a hardware
alarm or watchdog, that is fast enough to notice the glitch.

Another is that the kernel may panic before the LED is turned back on, thus
hanging with the LED in the wrong state.  This is not just speculation, but
actually happened to me with an embedded system that has an LED which
should turn off when the kernel finishes booting, which was left in the
incorrect state due to a bug in the OF LED binding code.

We also let GPIO LEDs get their initial value from whatever the current
state of the GPIO line is.  On some systems the LEDs are put into some
state by the firmware or hardware before Linux boots, and it is desired to
have them keep this state which is otherwise unknown to Linux.

This requires that the underlying GPIO driver support reading the value of
output GPIOs.  Some drivers support this and some do not.

The platform device binding gains a field in the platform data
"default_state" that controls this.  There are three constants defined to
select from on, off, or keeping the current state.  The OpenFirmware
binding uses a property named "default-state" that can be set to "on",
"off", or "keep".  The default if the property isn't present is off.

Signed-off-by: Trent Piepho <xyzzy@speakeasy.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Sean MacLennan <smaclennan@pikatech.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 Documentation/powerpc/dts-bindings/gpio/led.txt | 17 ++++++++++++++++-
 drivers/leds/leds-gpio.c                        | 20 +++++++++++++++++---
 include/linux/leds.h                            |  9 +++++++--
 3 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/powerpc/dts-bindings/gpio/led.txt b/Documentation/powerpc/dts-bindings/gpio/led.txt
index 4fe14deedc0a..064db928c3c1 100644
--- a/Documentation/powerpc/dts-bindings/gpio/led.txt
+++ b/Documentation/powerpc/dts-bindings/gpio/led.txt
@@ -16,10 +16,17 @@ LED sub-node properties:
   string defining the trigger assigned to the LED.  Current triggers are:
     "backlight" - LED will act as a back-light, controlled by the framebuffer
 		  system
-    "default-on" - LED will turn on
+    "default-on" - LED will turn on, but see "default-state" below
     "heartbeat" - LED "double" flashes at a load average based rate
     "ide-disk" - LED indicates disk activity
     "timer" - LED flashes at a fixed, configurable rate
+- default-state:  (optional) The initial state of the LED.  Valid
+  values are "on", "off", and "keep".  If the LED is already on or off
+  and the default-state property is set the to same value, then no
+  glitch should be produced where the LED momentarily turns off (or
+  on).  The "keep" setting will keep the LED at whatever its current
+  state is, without producing a glitch.  The default is off if this
+  property is not present.
 
 Examples:
 
@@ -30,14 +37,22 @@ leds {
 		gpios = <&mcu_pio 0 1>; /* Active low */
 		linux,default-trigger = "ide-disk";
 	};
+
+	fault {
+		gpios = <&mcu_pio 1 0>;
+		/* Keep LED on if BIOS detected hardware fault */
+		default-state = "keep";
+	};
 };
 
 run-control {
 	compatible = "gpio-leds";
 	red {
 		gpios = <&mpc8572 6 0>;
+		default-state = "off";
 	};
 	green {
 		gpios = <&mpc8572 7 0>;
+		default-state = "on";
 	};
 }
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 76895e691042..6b06638eb5b4 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -76,7 +76,7 @@ static int __devinit create_gpio_led(const struct gpio_led *template,
 	struct gpio_led_data *led_dat, struct device *parent,
 	int (*blink_set)(unsigned, unsigned long *, unsigned long *))
 {
-	int ret;
+	int ret, state;
 
 	/* skip leds that aren't available */
 	if (!gpio_is_valid(template->gpio)) {
@@ -99,11 +99,15 @@ static int __devinit create_gpio_led(const struct gpio_led *template,
 		led_dat->cdev.blink_set = gpio_blink_set;
 	}
 	led_dat->cdev.brightness_set = gpio_led_set;
-	led_dat->cdev.brightness = LED_OFF;
+	if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP)
+		state = !!gpio_get_value(led_dat->gpio) ^ led_dat->active_low;
+	else
+		state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
+	led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
-	ret = gpio_direction_output(led_dat->gpio, led_dat->active_low);
+	ret = gpio_direction_output(led_dat->gpio, led_dat->active_low ^ state);
 	if (ret < 0)
 		goto err;
 
@@ -223,12 +227,22 @@ static int __devinit of_gpio_leds_probe(struct of_device *ofdev,
 	memset(&led, 0, sizeof(led));
 	for_each_child_of_node(np, child) {
 		enum of_gpio_flags flags;
+		const char *state;
 
 		led.gpio = of_get_gpio_flags(child, 0, &flags);
 		led.active_low = flags & OF_GPIO_ACTIVE_LOW;
 		led.name = of_get_property(child, "label", NULL) ? : child->name;
 		led.default_trigger =
 			of_get_property(child, "linux,default-trigger", NULL);
+		state = of_get_property(child, "default-state", NULL);
+		if (state) {
+			if (!strcmp(state, "keep"))
+				led.default_state = LEDS_GPIO_DEFSTATE_KEEP;
+			else if(!strcmp(state, "on"))
+				led.default_state = LEDS_GPIO_DEFSTATE_ON;
+			else
+				led.default_state = LEDS_GPIO_DEFSTATE_OFF;
+		}
 
 		ret = create_gpio_led(&led, &pdata->led_data[pdata->num_leds++],
 				      &ofdev->dev, NULL);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index c7f0b148df06..62af62915cf7 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -143,9 +143,14 @@ struct gpio_led {
 	const char *name;
 	const char *default_trigger;
 	unsigned 	gpio;
-	u8 		active_low : 1;
-	u8		retain_state_suspended : 1;
+	unsigned	active_low : 1;
+	unsigned	retain_state_suspended : 1;
+	unsigned	default_state : 2;
+	/* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */
 };
+#define LEDS_GPIO_DEFSTATE_OFF	0
+#define LEDS_GPIO_DEFSTATE_ON	1
+#define LEDS_GPIO_DEFSTATE_KEEP	2
 
 struct gpio_led_platform_data {
 	int 		num_leds;
-- 
cgit v1.2.3


From a1dd8c617217322614f0465ae347895c4b58e1ab Mon Sep 17 00:00:00 2001
From: Richard Purdie <rpurdie@linux.intel.com>
Date: Mon, 22 Jun 2009 14:54:13 +0100
Subject: leds: Futher document blink_set

Futher document blink_set function pointer

Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 include/linux/leds.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 62af62915cf7..d8bf9665e70c 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -47,7 +47,8 @@ struct led_classdev {
 
 	/* Activate hardware accelerated blink, delays are in
 	 * miliseconds and if none is provided then a sensible default
-	 * should be chosen. */
+	 * should be chosen. The call can adjust the timings if it can't
+	 * match the values specified exactly. */
 	int		(*blink_set)(struct led_classdev *led_cdev,
 				     unsigned long *delay_on,
 				     unsigned long *delay_off);
-- 
cgit v1.2.3


From e14cbee401cd00779a5267128371506b22c77bc9 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <michel@daenzer.net>
Date: Tue, 23 Jun 2009 12:36:32 +0200
Subject: drm: Fix shifts which were miscalculated when converting from
 bitfields.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Looks like I managed to mess up most shifts when converting from bitfields. :(

The patch below works on my Thinkpad T500 (as well as on my PowerBook,
where the previous change worked as well, maybe out of luck...). I'd
appreciate more testing and eyes looking over it though.

Signed-off-by: Michel Dänzer <daenzer@vmware.com>
Tested-by: Michael Pyne <mpyne@kde.org>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_edid.c | 12 ++++++------
 include/drm/drm_edid.h     | 38 +++++++++++++++++++-------------------
 2 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7d0835226f6e..80cc6d06d61b 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -294,10 +294,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 	unsigned vactive = (pt->vactive_vblank_hi & 0xf0) << 4 | pt->vactive_lo;
 	unsigned hblank = (pt->hactive_hblank_hi & 0xf) << 8 | pt->hblank_lo;
 	unsigned vblank = (pt->vactive_vblank_hi & 0xf) << 8 | pt->vblank_lo;
-	unsigned hsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0x3) << 8 | pt->hsync_offset_lo;
-	unsigned hsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) << 6 | pt->hsync_pulse_width_lo;
-	unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0x30) | (pt->vsync_offset_pulse_width_lo & 0xf);
-	unsigned vsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0xc0) >> 2 | pt->vsync_offset_pulse_width_lo >> 4;
+	unsigned hsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc0) << 2 | pt->hsync_offset_lo;
+	unsigned hsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x30) << 4 | pt->hsync_pulse_width_lo;
+	unsigned vsync_offset = (pt->hsync_vsync_offset_pulse_width_hi & 0xc) >> 2 | pt->vsync_offset_pulse_width_lo >> 4;
+	unsigned vsync_pulse_width = (pt->hsync_vsync_offset_pulse_width_hi & 0x3) << 4 | (pt->vsync_offset_pulse_width_lo & 0xf);
 
 	/* ignore tiny modes */
 	if (hactive < 64 || vactive < 64)
@@ -347,8 +347,8 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
 	mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
 		DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
 
-	mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf) << 8;
-	mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4;
+	mode->width_mm = pt->width_mm_lo | (pt->width_height_mm_hi & 0xf0) << 4;
+	mode->height_mm = pt->height_mm_lo | (pt->width_height_mm_hi & 0xf) << 8;
 
 	if (quirks & EDID_QUIRK_DETAILED_IN_CM) {
 		mode->width_mm *= 10;
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index c263e4d71754..7d6c9a2dfcbb 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -35,11 +35,11 @@ struct est_timings {
 } __attribute__((packed));
 
 /* 00=16:10, 01=4:3, 10=5:4, 11=16:9 */
-#define EDID_TIMING_ASPECT_SHIFT 0
+#define EDID_TIMING_ASPECT_SHIFT 6
 #define EDID_TIMING_ASPECT_MASK  (0x3 << EDID_TIMING_ASPECT_SHIFT)
 
 /* need to add 60 */
-#define EDID_TIMING_VFREQ_SHIFT  2
+#define EDID_TIMING_VFREQ_SHIFT  0
 #define EDID_TIMING_VFREQ_MASK   (0x3f << EDID_TIMING_VFREQ_SHIFT)
 
 struct std_timing {
@@ -47,11 +47,11 @@ struct std_timing {
 	u8 vfreq_aspect;
 } __attribute__((packed));
 
-#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 6)
-#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 5)
+#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1)
+#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2)
 #define DRM_EDID_PT_SEPARATE_SYNC  (3 << 3)
-#define DRM_EDID_PT_STEREO         (1 << 2)
-#define DRM_EDID_PT_INTERLACED     (1 << 1)
+#define DRM_EDID_PT_STEREO         (1 << 5)
+#define DRM_EDID_PT_INTERLACED     (1 << 7)
 
 /* If detailed data is pixel timing */
 struct detailed_pixel_timing {
@@ -93,7 +93,7 @@ struct detailed_data_monitor_range {
 } __attribute__((packed));
 
 struct detailed_data_wpindex {
-	u8 white_xy_lo; /* Upper 2 bits each */
+	u8 white_yx_lo; /* Lower 2 bits each */
 	u8 white_x_hi;
 	u8 white_y_hi;
 	u8 gamma; /* need to divide by 100 then add 1 */
@@ -135,21 +135,21 @@ struct detailed_timing {
 	} data;
 } __attribute__((packed));
 
-#define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 7)
-#define DRM_EDID_INPUT_SYNC_ON_GREEN   (1 << 5)
-#define DRM_EDID_INPUT_COMPOSITE_SYNC  (1 << 4)
+#define DRM_EDID_INPUT_SERRATION_VSYNC (1 << 0)
+#define DRM_EDID_INPUT_SYNC_ON_GREEN   (1 << 1)
+#define DRM_EDID_INPUT_COMPOSITE_SYNC  (1 << 2)
 #define DRM_EDID_INPUT_SEPARATE_SYNCS  (1 << 3)
-#define DRM_EDID_INPUT_BLANK_TO_BLACK  (1 << 2)
-#define DRM_EDID_INPUT_VIDEO_LEVEL     (3 << 1)
-#define DRM_EDID_INPUT_DIGITAL         (1 << 0) /* bits above must be zero if set */
+#define DRM_EDID_INPUT_BLANK_TO_BLACK  (1 << 4)
+#define DRM_EDID_INPUT_VIDEO_LEVEL     (3 << 5)
+#define DRM_EDID_INPUT_DIGITAL         (1 << 7) /* bits below must be zero if set */
 
-#define DRM_EDID_FEATURE_DEFAULT_GTF      (1 << 7)
-#define DRM_EDID_FEATURE_PREFERRED_TIMING (1 << 6)
-#define DRM_EDID_FEATURE_STANDARD_COLOR   (1 << 5)
+#define DRM_EDID_FEATURE_DEFAULT_GTF      (1 << 0)
+#define DRM_EDID_FEATURE_PREFERRED_TIMING (1 << 1)
+#define DRM_EDID_FEATURE_STANDARD_COLOR   (1 << 2)
 #define DRM_EDID_FEATURE_DISPLAY_TYPE     (3 << 3) /* 00=mono, 01=rgb, 10=non-rgb, 11=unknown */
-#define DRM_EDID_FEATURE_PM_ACTIVE_OFF    (1 << 2)
-#define DRM_EDID_FEATURE_PM_SUSPEND       (1 << 1)
-#define DRM_EDID_FEATURE_PM_STANDBY       (1 << 0)
+#define DRM_EDID_FEATURE_PM_ACTIVE_OFF    (1 << 5)
+#define DRM_EDID_FEATURE_PM_SUSPEND       (1 << 6)
+#define DRM_EDID_FEATURE_PM_STANDBY       (1 << 7)
 
 struct edid {
 	u8 header[8];
-- 
cgit v1.2.3


From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 30 Mar 2009 19:07:44 +0900
Subject: percpu: use dynamic percpu allocator as the default percpu allocator

This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use
dynamic percpu allocator.  The first chunk is allocated using
embedding helper and 8k is reserved for modules.  This ensures that
the new allocator behaves almost identically to the original allocator
as long as static percpu variables are concerned, so it shouldn't
introduce much breakage.

s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing
range limit the addressing model imposes.  Unfortunately, this breaks
if the address is specified using a variable, so for now, the two
archs aren't converted.

The following architectures are affected by this change.

* sh
* arm
* cris
* mips
* sparc(32)
* blackfin
* avr32
* parisc (broken, under investigation)
* m32r
* powerpc(32)

As this change makes the dynamic allocator the default one,
CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert -
CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted
archs.  These archs implement their own setup_per_cpu_areas() and the
conversion is not trivial.

* powerpc(64)
* sparc(64)
* ia64
* alpha
* s390

Boot and batch alloc/free tests on x86_32 with debug code (x86_32
doesn't use default first chunk initialization).  Compile tested on
sparc(32), powerpc(32), arm and alpha.

Kyle McMartin reported that this change breaks parisc.  The problem is
still under investigation and he is okay with pushing this patch
forward and fixing parisc later.

[ Impact: use dynamic allocator for most archs w/o custom percpu setup ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Reviewed-by: Christoph Lameter <cl@linux.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/alpha/Kconfig     |  3 +++
 arch/ia64/Kconfig      |  3 +++
 arch/powerpc/Kconfig   |  3 +++
 arch/s390/Kconfig      |  3 +++
 arch/sparc/Kconfig     |  3 +++
 arch/x86/Kconfig       |  3 ---
 include/linux/percpu.h | 12 +++++++++---
 init/main.c            | 24 ------------------------
 kernel/module.c        |  6 +++---
 mm/Makefile            |  2 +-
 mm/allocpercpu.c       | 28 ++++++++++++++++++++++++++++
 mm/percpu.c            | 40 +++++++++++++++++++++++++++++++++++++++-
 12 files changed, 95 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9fb8aae5c391..05d86407188c 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -70,6 +70,9 @@ config AUTO_IRQ_AFFINITY
 	depends on SMP
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 source "init/Kconfig"
 source "kernel/Kconfig.freezer"
 
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..328d2f8b8c3f 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -89,6 +89,9 @@ config GENERIC_TIME_VSYSCALL
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index bf6cedfa05db..a774c2acbe69 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -46,6 +46,9 @@ config GENERIC_HARDIRQS_NO__DO_IRQ
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool PPC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool PPC64
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index a14dba0e4d67..f4a3cc62d28f 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -75,6 +75,9 @@ config VIRT_CPU_ACCOUNTING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 3f8b6a92eabd..7a8698b913fe 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -92,6 +92,9 @@ config AUDIT_ARCH
 	bool
 	default y
 
+config HAVE_LEGACY_PER_CPU_AREA
+	def_bool y if SPARC64
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y if SPARC64
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d1430ef6b4f9..a48a90076d83 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -149,9 +149,6 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
-config HAVE_DYNAMIC_PER_CPU_AREA
-	def_bool y
-
 config HAVE_CPUMASK_OF_CPU_MAP
 	def_bool X86_64_SMP
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 26fd9d12f050..e5000343dd61 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -34,7 +34,7 @@
 
 #ifdef CONFIG_SMP
 
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 
 /* minimum unit size, also is the maximum supported allocation size */
 #define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
@@ -80,7 +80,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
 
 extern void *__alloc_reserved_percpu(size_t size, size_t align);
 
-#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 struct percpu_data {
 	void *ptrs[1];
@@ -99,11 +99,15 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];				\
 })
 
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 extern void *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void *__pdata);
 
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+extern void __init setup_per_cpu_areas(void);
+#endif
+
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
@@ -124,6 +128,8 @@ static inline void free_percpu(void *p)
 	kfree(p);
 }
 
+static inline void __init setup_per_cpu_areas(void) { }
+
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
diff --git a/init/main.c b/init/main.c
index 09131ec090c1..602d724afa5c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -357,7 +357,6 @@ static void __init smp_init(void)
 #define smp_init()	do { } while (0)
 #endif
 
-static inline void setup_per_cpu_areas(void) { }
 static inline void setup_nr_cpu_ids(void) { }
 static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
@@ -378,29 +377,6 @@ static void __init setup_nr_cpu_ids(void)
 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
 }
 
-#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static void __init setup_per_cpu_areas(void)
-{
-	unsigned long size, i;
-	char *ptr;
-	unsigned long nr_possible_cpus = num_possible_cpus();
-
-	/* Copy section for each CPU (we discard the original) */
-	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
-	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
-
-	for_each_possible_cpu(i) {
-		__per_cpu_offset[i] = ptr - __per_cpu_start;
-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-		ptr += size;
-	}
-}
-#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-
 /* Called by boot processor to activate the rest. */
 static void __init smp_init(void)
 {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..f5934954fa99 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
 
-#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 
 static void *percpu_modalloc(unsigned long size, unsigned long align,
 			     const char *name)
@@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme)
 	free_percpu(freeme);
 }
 
-#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 /* Number of blocks used and allocated. */
 static unsigned int pcpu_num_used, pcpu_num_allocated;
@@ -535,7 +535,7 @@ static int percpu_modinit(void)
 }
 __initcall(percpu_modinit);
 
-#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
+#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */
 
 static unsigned int find_pcpusec(Elf_Ehdr *hdr,
 				 Elf_Shdr *sechdrs,
diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd6426693..c77c6487552f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,7 +33,7 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
-ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA
 obj-$(CONFIG_SMP) += percpu.o
 else
 obj-$(CONFIG_SMP) += allocpercpu.o
diff --git a/mm/allocpercpu.c b/mm/allocpercpu.c
index dfdee6a47359..df34ceae0c67 100644
--- a/mm/allocpercpu.c
+++ b/mm/allocpercpu.c
@@ -5,6 +5,8 @@
  */
 #include <linux/mm.h>
 #include <linux/module.h>
+#include <linux/bootmem.h>
+#include <asm/sections.h>
 
 #ifndef cache_line_size
 #define cache_line_size()	L1_CACHE_BYTES
@@ -147,3 +149,29 @@ void free_percpu(void *__pdata)
 	kfree(__percpu_disguise(__pdata));
 }
 EXPORT_SYMBOL_GPL(free_percpu);
+
+/*
+ * Generic percpu area setup.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long size, i;
+	char *ptr;
+	unsigned long nr_possible_cpus = num_possible_cpus();
+
+	/* Copy section for each CPU (we discard the original) */
+	size = ALIGN(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
+	ptr = alloc_bootmem_pages(size * nr_possible_cpus);
+
+	for_each_possible_cpu(i) {
+		__per_cpu_offset[i] = ptr - __per_cpu_start;
+		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+		ptr += size;
+	}
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
diff --git a/mm/percpu.c b/mm/percpu.c
index b70f2acd8853..b14984566f5a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -43,7 +43,7 @@
  *
  * To use this allocator, arch code should do the followings.
  *
- * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
+ * - drop CONFIG_HAVE_LEGACY_PER_CPU_AREA
  *
  * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
  *   regular address to percpu pointer and back if they need to be
@@ -1275,3 +1275,41 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      reserved_size, dyn_size,
 				      pcpue_unit_size, pcpue_ptr, NULL);
 }
+
+/*
+ * Generic percpu area setup.
+ *
+ * The embedding helper is used because its behavior closely resembles
+ * the original non-dynamic generic percpu area setup.  This is
+ * important because many archs have addressing restrictions and might
+ * fail if the percpu area is located far away from the previous
+ * location.  As an added bonus, in non-NUMA cases, embedding is
+ * generally a good idea TLB-wise because percpu area can piggy back
+ * on the physical linear memory mapping which uses large page
+ * mappings on applicable archs.
+ */
+#ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+void __init setup_per_cpu_areas(void)
+{
+	size_t static_size = __per_cpu_end - __per_cpu_start;
+	ssize_t unit_size;
+	unsigned long delta;
+	unsigned int cpu;
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
+					   PERCPU_DYNAMIC_RESERVE, -1);
+	if (unit_size < 0)
+		panic("Failed to initialized percpu areas.");
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + cpu * unit_size;
+}
+#endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-- 
cgit v1.2.3


From 405d967dc70002991f8fc35c20e0d3cbc7614f63 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Jun 2009 15:13:38 +0900
Subject: linker script: throw away .discard section

x86 throws away .discard section but no other archs do.  Also,
.discard is not thrown away while linking modules.  Make every arch
and module linking throw it away.  This will be used to define dummy
variables for percpu declarations and definitions.

This patch is based on Ivan Kokshaysky's alpha percpu patch.

[ Impact: always throw away everything in .discard ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Haavard Skinnemoen <hskinnemoen@atmel.com>
Cc: Bryan Wu <cooloney@kernel.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Hirokazu Takata <takata@linux-m32r.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@elte.hu>
---
 Makefile                             | 2 +-
 arch/alpha/kernel/vmlinux.lds.S      | 1 +
 arch/arm/kernel/vmlinux.lds.S        | 1 +
 arch/avr32/kernel/vmlinux.lds.S      | 1 +
 arch/blackfin/kernel/vmlinux.lds.S   | 1 +
 arch/cris/kernel/vmlinux.lds.S       | 1 +
 arch/frv/kernel/vmlinux.lds.S        | 2 ++
 arch/h8300/kernel/vmlinux.lds.S      | 1 +
 arch/ia64/kernel/vmlinux.lds.S       | 1 +
 arch/m32r/kernel/vmlinux.lds.S       | 1 +
 arch/m68k/kernel/vmlinux-std.lds     | 1 +
 arch/m68k/kernel/vmlinux-sun3.lds    | 1 +
 arch/m68knommu/kernel/vmlinux.lds.S  | 1 +
 arch/microblaze/kernel/vmlinux.lds.S | 2 ++
 arch/mips/kernel/vmlinux.lds.S       | 1 +
 arch/mn10300/kernel/vmlinux.lds.S    | 1 +
 arch/parisc/kernel/vmlinux.lds.S     | 1 +
 arch/powerpc/kernel/vmlinux.lds.S    | 1 +
 arch/s390/kernel/vmlinux.lds.S       | 1 +
 arch/sh/kernel/vmlinux.lds.S         | 1 +
 arch/sparc/kernel/vmlinux.lds.S      | 1 +
 arch/um/kernel/dyn.lds.S             | 2 ++
 arch/um/kernel/uml.lds.S             | 2 ++
 arch/xtensa/kernel/vmlinux.lds.S     | 1 +
 include/asm-generic/vmlinux.lds.h    | 8 ++++++++
 scripts/module-common.lds            | 8 ++++++++
 26 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 scripts/module-common.lds

(limited to 'include')

diff --git a/Makefile b/Makefile
index 46e1c9d03d51..12245be05122 100644
--- a/Makefile
+++ b/Makefile
@@ -327,7 +327,7 @@ CHECKFLAGS     := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
 MODFLAGS	= -DMODULE
 CFLAGS_MODULE   = $(MODFLAGS)
 AFLAGS_MODULE   = $(MODFLAGS)
-LDFLAGS_MODULE  =
+LDFLAGS_MODULE  = -T $(srctree)/scripts/module-common.lds
 CFLAGS_KERNEL	=
 AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index b9d6568e5f7f..75fe1d6877e9 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -139,6 +139,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	.mdebug 0 : {
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 6c0779792546..e256c57b8981 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -82,6 +82,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 		*(.ARM.exidx.exit.text)
 		*(.ARM.extab.exit.text)
 #ifndef CONFIG_MMU
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index 7910d41eb886..b8324608ec0c 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -131,6 +131,7 @@ SECTIONS
 	/DISCARD/       	: {
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	DWARF_DEBUG
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 6ac307ca0d80..6e8eabd8f0a6 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -280,5 +280,6 @@ SECTIONS
 	/DISCARD/ :
 	{
 		*(.exitcall.exit)
+		*(.discard)
 	}
 }
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index 0d2adfc794d4..a3175ebb38cc 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -145,6 +145,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
         }
 
 	dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024;
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 22d9787406ed..64b5a5e4d35e 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -177,6 +177,8 @@ SECTIONS
   .debug_ranges		0 : { *(.debug_ranges) }
 
   .comment 0 : { *(.comment) }
+
+  /DISCARD/ : { *(.discard) }
 }
 
 __kernel_image_size_no_bss = __bss_start - __kernel_image_start;
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 43a87b9085b6..03d6c0df33db 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -154,6 +154,7 @@ SECTIONS
 	}
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 	}
         .romfs :	
 	{
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 4a95e86b9ac2..13d958975874 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -29,6 +29,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	*(.IA_64.unwind.exit.text)
 	*(.IA_64.unwind_info.exit.text)
 	}
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 4179adf6c624..480a49944cfd 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -125,6 +125,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 01d212bb05a6..905a797ada93 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -87,6 +87,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   /* Stabs debugging sections.  */
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index c192f773db96..47d04be322aa 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -82,6 +82,7 @@ __init_begin = .;
 	EXIT_TEXT
 	EXIT_DATA
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   .crap : {
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index b7fe505e358d..68111a61a77f 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -188,6 +188,7 @@ SECTIONS {
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	.bss : {
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index d34d38dcd12c..a207543c5927 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -162,4 +162,6 @@ SECTIONS {
 	}
 	. = ALIGN(4096);
 	_end = .;
+
+	/DISCARD/ : { *(.discard) }
 }
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 58738c8d754f..45901609b741 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -179,6 +179,7 @@ SECTIONS
 	/* Sections to be discarded */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 
 		/* ABI crap starts here */
 		*(.MIPS.options)
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 24de6b90f401..5d9f2f96ad92 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -146,6 +146,7 @@ SECTIONS
   /* Sections to be discarded */
   /DISCARD/ : {
 	*(.exitcall.exit)
+	*(.discard)
 	}
 
   STABS_DEBUG
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index fd2cc4fd2b65..ccf58341845a 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -240,6 +240,7 @@ SECTIONS
 	/* Sections to be discarded */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 #ifdef CONFIG_64BIT
 		/* temporary hack until binutils is fixed to not emit these
 	 	 * for static binaries
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8ef8a14abc95..7fca9355fd3d 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -40,6 +40,7 @@ SECTIONS
 	/* Sections to be discarded. */
 	/DISCARD/ : {
 	*(.exitcall.exit)
+	*(.discard)
 	EXIT_DATA
 	}
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index a53db23ee092..98867dfea469 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -161,6 +161,7 @@ SECTIONS
 	/DISCARD/ : {
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	/* Debugging sections.	*/
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index f53c76acaede..766976d27b21 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -171,6 +171,7 @@ SECTIONS
 	 */
 	/DISCARD/ : {
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	STABS_DEBUG
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index fcbbd000ec08..d63cf914667d 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -175,6 +175,7 @@ SECTIONS
 		EXIT_TEXT
 		EXIT_DATA
 		*(.exitcall.exit)
+		*(.discard)
 	}
 
 	STABS_DEBUG
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 9975e1ab44fb..2916d6eadffd 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -156,4 +156,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /DISCARD/	: { *(.discard) }
 }
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 11b835248b86..1f8a622cabe1 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -100,4 +100,6 @@ SECTIONS
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /DISCARD/	: { *(.discard) }
 }
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 41c159cd872f..b1e24638acd7 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -287,6 +287,7 @@ SECTIONS
 	EXIT_TEXT
 	EXIT_DATA
         *(.exitcall.exit)
+	*(.discard)
   }
 
   .xt.lit : { *(.xt.lit) }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 55413e568f07..a19120c4e109 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -628,6 +628,14 @@
 #define INITRAMFS
 #endif
 
+#define DISCARDS							\
+	/DISCARD/ : {							\
+	EXIT_TEXT							\
+	EXIT_DATA							\
+	*(.exitcall.exit)						\
+	*(.discard)							\
+	}
+
 /**
  * PERCPU_VADDR - define output section for percpu area
  * @vaddr: explicit base address (optional)
diff --git a/scripts/module-common.lds b/scripts/module-common.lds
new file mode 100644
index 000000000000..47a1f9ae0ede
--- /dev/null
+++ b/scripts/module-common.lds
@@ -0,0 +1,8 @@
+/*
+ * Common module linker script, always used when linking a module.
+ * Archs are free to supply their own linker scripts.  ld will
+ * combine them automatically.
+ */
+SECTIONS {
+	/DISCARD/ : { *(.discard) }
+}
-- 
cgit v1.2.3


From 7c756e6e19e71f0327760d8955f7077118ebb2b1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Jun 2009 15:13:50 +0900
Subject: percpu: implement optional weak percpu definitions

Some archs (alpha and s390) need to use weak definitions for percpu
variables in modules so that the compiler generates external
references for them.

This patch implements weak percpu definitions which arch can enable by
defining ARCH_NEEDS_WEAK_PER_CPU in arch percpu header file.  This
weak definition adds the following two restrictions on percpu variable
definitions.

  1. percpu symbols must be unique whether static or not
  2. percpu variables can't be defined inside a function

To ensure that these restrictions are observed in generic code, config
option DEBUG_FORCE_WEAK_PER_CPU enables weak percpu definitions for
all cases.

This patch is inspired by Ivan Kokshaysky's alpha percpu patch.

[ Impact: stricter rules for percpu variables, one more debug config option ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Howells <dhowells@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
---
 include/linux/percpu-defs.h | 65 ++++++++++++++++++++++++++++++++++++++-------
 lib/Kconfig.debug           | 15 +++++++++++
 2 files changed, 71 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f921d74f49f..cf32838ad0fa 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -10,21 +10,68 @@
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
- * 'section' argument.  This may be used to affect the parameters governing the
+ * 'sec' argument.  This may be used to affect the parameters governing the
  * variable's storage.
  *
  * NOTE!  The sections for the DECLARE and for the DEFINE must match, lest
  * linkage errors occur due the compiler generating the wrong code to access
  * that section.
  */
-#define DECLARE_PER_CPU_SECTION(type, name, section)			\
-	extern								\
-	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
-
-#define DEFINE_PER_CPU_SECTION(type, name, section)			\
-	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+#define __PCPU_ATTRS(sec)						\
+	__attribute__((section(PER_CPU_BASE_SECTION sec)))		\
+	PER_CPU_ATTRIBUTES
+
+#define __PCPU_DUMMY_ATTRS						\
+	__attribute__((section(".discard"), unused))
+
+/*
+ * s390 and alpha modules require percpu variables to be defined as
+ * weak to force the compiler to generate GOT based external
+ * references for them.  This is necessary because percpu sections
+ * will be located outside of the usually addressable area.
+ *
+ * This definition puts the following two extra restrictions when
+ * defining percpu variables.
+ *
+ * 1. The symbol must be globally unique, even the static ones.
+ * 2. Static percpu variables cannot be defined inside a function.
+ *
+ * Archs which need weak percpu definitions should define
+ * ARCH_NEEDS_WEAK_PER_CPU in asm/percpu.h when necessary.
+ *
+ * To ensure that the generic code observes the above two
+ * restrictions, if CONFIG_DEBUG_FORCE_WEAK_PER_CPU is set weak
+ * definition is used for all cases.
+ */
+#if defined(ARCH_NEEDS_WEAK_PER_CPU) || defined(CONFIG_DEBUG_FORCE_WEAK_PER_CPU)
+/*
+ * __pcpu_scope_* dummy variable is used to enforce scope.  It
+ * receives the static modifier when it's used in front of
+ * DEFINE_PER_CPU() and will trigger build failure if
+ * DECLARE_PER_CPU() is used for the same variable.
+ *
+ * __pcpu_unique_* dummy variable is used to enforce symbol uniqueness
+ * such that hidden weak symbol collision, which will cause unrelated
+ * variables to share the same address, can be detected during build.
+ */
+#define DECLARE_PER_CPU_SECTION(type, name, sec)			\
+	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
+	extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SECTION(type, name, sec)				\
+	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
+	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
+	__PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+#else
+/*
+ * Normal declaration and definition macros.
+ */
+#define DECLARE_PER_CPU_SECTION(type, name, sec)			\
+	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+
+#define DEFINE_PER_CPU_SECTION(type, name, sec)				\
+	__PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
+#endif
 
 /*
  * Variant on the per-CPU variable declaration/definition theme used for
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 23067ab1a73c..77e0d8b1b7c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -777,6 +777,21 @@ config DEBUG_BLOCK_EXT_DEVT
 
 	  Say N if you are unsure.
 
+config DEBUG_FORCE_WEAK_PER_CPU
+	bool "Force weak per-cpu definitions"
+	depends on DEBUG_KERNEL
+	help
+	  s390 and alpha require percpu variables in modules to be
+	  defined weak to work around addressing range issue which
+	  puts the following two restrictions on percpu variable
+	  definitions.
+
+	  1. percpu symbols must be unique whether static or not
+	  2. percpu variables can't be defined inside a function
+
+	  To ensure that generic code follows the above rules, this
+	  option forces all percpu variables to be defined as weak.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 346c17a6cf60375323adfaa4b8a9d841049f890e Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 22 Jun 2009 07:38:26 +0000
Subject: ide: relax DMA info validity checking

There are some broken devices that report multiple DMA xfer modes
enabled at once (ATA spec doesn't allow it) but otherwise work fine
with DMA so just delete ide_id_dma_bug().

[ As discovered by detective work by Frans and Bart, due to how
  handling of the ID block was handled before commit c419993
  ("ide-iops: only clear DMA words on setting DMA mode") this
  check was always seeing zeros in the fields or other similar
  garbage.  Therefore this check wasn't actually checking anything.
  Now that the tests actually check the real bits, all we see are
  devices that trigger the check yet work perfectly fine, therefore
  killing this useless check is the best thing to do. -DaveM ]

Reported-by: Frans Pop <elendil@planet.nl>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-dma.c  | 21 ---------------------
 drivers/ide/ide-iops.c |  3 ---
 include/linux/ide.h    |  2 --
 3 files changed, 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 219e6fb78dc6..ee58c88dee5a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -361,9 +361,6 @@ static int ide_tune_dma(ide_drive_t *drive)
 	if (__ide_dma_bad_drive(drive))
 		return 0;
 
-	if (ide_id_dma_bug(drive))
-		return 0;
-
 	if (hwif->host_flags & IDE_HFLAG_TRUST_BIOS_FOR_DMA)
 		return config_drive_for_dma(drive);
 
@@ -394,24 +391,6 @@ static int ide_dma_check(ide_drive_t *drive)
 	return -1;
 }
 
-int ide_id_dma_bug(ide_drive_t *drive)
-{
-	u16 *id = drive->id;
-
-	if (id[ATA_ID_FIELD_VALID] & 4) {
-		if ((id[ATA_ID_UDMA_MODES] >> 8) &&
-		    (id[ATA_ID_MWDMA_MODES] >> 8))
-			goto err_out;
-	} else if ((id[ATA_ID_MWDMA_MODES] >> 8) &&
-		   (id[ATA_ID_SWDMA_MODES] >> 8))
-		goto err_out;
-
-	return 0;
-err_out:
-	printk(KERN_ERR "%s: bad DMA info in identify block\n", drive->name);
-	return 1;
-}
-
 int ide_set_dma(ide_drive_t *drive)
 {
 	int rc;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index fa047150a1c6..917186ec4966 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -329,9 +329,6 @@ int ide_driveid_update(ide_drive_t *drive)
 
 	kfree(id);
 
-	if ((drive->dev_flags & IDE_DFLAG_USING_DMA) && ide_id_dma_bug(drive))
-		ide_dma_off(drive);
-
 	return 1;
 out_err:
 	if (rc == 2)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 95c6e00a72e8..cf1f3888067c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1361,7 +1361,6 @@ int ide_in_drive_list(u16 *, const struct drive_list_entry *);
 #ifdef CONFIG_BLK_DEV_IDEDMA
 int ide_dma_good_drive(ide_drive_t *);
 int __ide_dma_bad_drive(ide_drive_t *);
-int ide_id_dma_bug(ide_drive_t *);
 
 u8 ide_find_dma_mode(ide_drive_t *, u8);
 
@@ -1402,7 +1401,6 @@ void ide_dma_lost_irq(ide_drive_t *);
 ide_startstop_t ide_dma_timeout_retry(ide_drive_t *, int);
 
 #else
-static inline int ide_id_dma_bug(ide_drive_t *drive) { return 0; }
 static inline u8 ide_find_dma_mode(ide_drive_t *drive, u8 speed) { return 0; }
 static inline u8 ide_max_dma_mode(ide_drive_t *drive) { return 0; }
 static inline void ide_dma_off_quietly(ide_drive_t *drive) { ; }
-- 
cgit v1.2.3


From 507e123151149e578c9aae33eb876c49824da5f8 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 23 Jun 2009 17:38:15 +0200
Subject: timer stats: Optimize by adding quick check to avoid function calls

When the kernel is configured with CONFIG_TIMER_STATS but timer
stats are runtime disabled we still get calls to
__timer_stats_timer_set_start_info which initializes some
fields in the corresponding struct timer_list.

So add some quick checks in the the timer stats setup functions
to avoid function calls to __timer_stats_timer_set_start_info
when timer stats are disabled.

In an artificial workload that does nothing but playing ping
pong with a single tcp packet via loopback this decreases cpu
consumption by 1 - 1.5%.

This is part of a modified function trace output on SLES11:

 perl-2497  [00] 28630647177732388 [+  125]: sk_reset_timer <-tcp_v4_rcv
 perl-2497  [00] 28630647177732513 [+  125]: mod_timer <-sk_reset_timer
 perl-2497  [00] 28630647177732638 [+  125]: __timer_stats_timer_set_start_info <-mod_timer
 perl-2497  [00] 28630647177732763 [+  125]: __mod_timer <-mod_timer
 perl-2497  [00] 28630647177732888 [+  125]: __timer_stats_timer_set_start_info <-__mod_timer
 perl-2497  [00] 28630647177733013 [+   93]: lock_timer_base <-__mod_timer

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Mustafa Mesanovic <mustafa.mesanovic@de.ibm.com>
Cc: Arjan van de Ven <arjan@infradead.org>
LKML-Reference: <20090623153811.GA4641@osiris.boeblingen.de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hrtimer.h   |  5 +++++
 include/linux/timer.h     |  4 ++++
 kernel/time/timer_stats.c | 16 ++++++++--------
 kernel/timer.c            |  2 ++
 4 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 7400900de94a..54648e625efd 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -21,6 +21,7 @@
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/percpu.h>
+#include <linux/timer.h>
 
 
 struct hrtimer_clock_base;
@@ -447,6 +448,8 @@ extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 
 static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
 {
+	if (likely(!timer->start_pid))
+		return;
 	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
 				 timer->function, timer->start_comm, 0);
 }
@@ -456,6 +459,8 @@ extern void __timer_stats_hrtimer_set_start_info(struct hrtimer *timer,
 
 static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
 {
+	if (likely(!timer_stats_active))
+		return;
 	__timer_stats_hrtimer_set_start_info(timer, __builtin_return_address(0));
 }
 
diff --git a/include/linux/timer.h b/include/linux/timer.h
index ccf882eed8f8..be62ec2ebea5 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -190,6 +190,8 @@ extern unsigned long get_next_timer_interrupt(unsigned long now);
  */
 #ifdef CONFIG_TIMER_STATS
 
+extern int timer_stats_active;
+
 #define TIMER_STATS_FLAG_DEFERRABLE	0x1
 
 extern void init_timer_stats(void);
@@ -203,6 +205,8 @@ extern void __timer_stats_timer_set_start_info(struct timer_list *timer,
 
 static inline void timer_stats_timer_set_start_info(struct timer_list *timer)
 {
+	if (likely(!timer_stats_active))
+		return;
 	__timer_stats_timer_set_start_info(timer, __builtin_return_address(0));
 }
 
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c994530d166d..4cde8b9c716f 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -96,7 +96,7 @@ static DEFINE_MUTEX(show_mutex);
 /*
  * Collection status, active/inactive:
  */
-static int __read_mostly active;
+int __read_mostly timer_stats_active;
 
 /*
  * Beginning/end timestamps of measurement:
@@ -242,7 +242,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 	struct entry *entry, input;
 	unsigned long flags;
 
-	if (likely(!active))
+	if (likely(!timer_stats_active))
 		return;
 
 	lock = &per_cpu(lookup_lock, raw_smp_processor_id());
@@ -254,7 +254,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 	input.timer_flag = timer_flag;
 
 	spin_lock_irqsave(lock, flags);
-	if (!active)
+	if (!timer_stats_active)
 		goto out_unlock;
 
 	entry = tstat_lookup(&input, comm);
@@ -290,7 +290,7 @@ static int tstats_show(struct seq_file *m, void *v)
 	/*
 	 * If still active then calculate up to now:
 	 */
-	if (active)
+	if (timer_stats_active)
 		time_stop = ktime_get();
 
 	time = ktime_sub(time_stop, time_start);
@@ -368,18 +368,18 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
 	mutex_lock(&show_mutex);
 	switch (ctl[0]) {
 	case '0':
-		if (active) {
-			active = 0;
+		if (timer_stats_active) {
+			timer_stats_active = 0;
 			time_stop = ktime_get();
 			sync_access();
 		}
 		break;
 	case '1':
-		if (!active) {
+		if (!timer_stats_active) {
 			reset_entries();
 			time_start = ktime_get();
 			smp_mb();
-			active = 1;
+			timer_stats_active = 1;
 		}
 		break;
 	default:
diff --git a/kernel/timer.c b/kernel/timer.c
index 54d3912f8cad..0b36b9e5cc8b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -380,6 +380,8 @@ static void timer_stats_account_timer(struct timer_list *timer)
 {
 	unsigned int flag = 0;
 
+	if (likely(!timer->start_site))
+		return;
 	if (unlikely(tbase_get_deferrable(timer->base)))
 		flag |= TIMER_STATS_FLAG_DEFERRABLE;
 
-- 
cgit v1.2.3


From c17ef45342cc033fdf7bdd5b28615e0090f8d2e7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 23 Jun 2009 17:12:47 -0700
Subject: rcu: Remove Classic RCU

Remove Classic RCU, given that the combination of Tree RCU and
the proposed Bloatwatch RCU do everything that Classic RCU can
with fewer bugs.

Tree RCU has been default in x86 builds for almost six months,
and seems to be quite reliable, so there does not seem to be
much justification for keeping the Classic RCU code and config
complexity around anymore.

Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com>
Cc: akpm@linux-foundation.org
Cc: niv@us.ibm.com
Cc: dvhltc@us.ibm.com
Cc: dipankar@in.ibm.com
Cc: dhowells@redhat.com
Cc: lethal@linux-sh.org
Cc: kernel@wantstofly.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h | 178 ----------
 include/linux/rcupdate.h   |   4 +-
 init/Kconfig               |  12 +-
 kernel/Makefile            |   1 -
 kernel/rcuclassic.c        | 807 ---------------------------------------------
 5 files changed, 3 insertions(+), 999 deletions(-)
 delete mode 100644 include/linux/rcuclassic.h
 delete mode 100644 kernel/rcuclassic.c

(limited to 'include')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
deleted file mode 100644
index bfd92e1e5d2c..000000000000
--- a/include/linux/rcuclassic.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion (classic version)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2001
- *
- * Author: Dipankar Sarma <dipankar@in.ibm.com>
- *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- * Papers:
- * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
- * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
- *
- */
-
-#ifndef __LINUX_RCUCLASSIC_H
-#define __LINUX_RCUCLASSIC_H
-
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK	(10 * HZ) /* for rcp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-/* Global control variables for rcupdate callback mechanism. */
-struct rcu_ctrlblk {
-	long	cur;		/* Current batch number.                      */
-	long	completed;	/* Number of the last completed batch         */
-	long	pending;	/* Number of the last pending batch           */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-	unsigned long gp_start;	/* Time at which GP started in jiffies. */
-	unsigned long jiffies_stall;
-				/* Time at which to check for CPU stalls. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-	int	signaled;
-
-	spinlock_t	lock	____cacheline_internodealigned_in_smp;
-	DECLARE_BITMAP(cpumask, NR_CPUS); /* CPUs that need to switch for */
-					  /* current batch to proceed.     */
-} ____cacheline_internodealigned_in_smp;
-
-/* Is batch a before batch b ? */
-static inline int rcu_batch_before(long a, long b)
-{
-	return (a - b) < 0;
-}
-
-/* Is batch a after batch b ? */
-static inline int rcu_batch_after(long a, long b)
-{
-	return (a - b) > 0;
-}
-
-/* Per-CPU data for Read-Copy UPdate. */
-struct rcu_data {
-	/* 1) quiescent state handling : */
-	long		quiescbatch;     /* Batch # for grace period */
-	int		passed_quiesc;	 /* User-mode/idle loop etc. */
-	int		qs_pending;	 /* core waits for quiesc state */
-
-	/* 2) batch handling */
-	/*
-	 * if nxtlist is not NULL, then:
-	 * batch:
-	 *	The batch # for the last entry of nxtlist
-	 * [*nxttail[1], NULL = *nxttail[2]):
-	 *	Entries that batch # <= batch
-	 * [*nxttail[0], *nxttail[1]):
-	 *	Entries that batch # <= batch - 1
-	 * [nxtlist, *nxttail[0]):
-	 *	Entries that batch # <= batch - 2
-	 *	The grace period for these entries has completed, and
-	 *	the other grace-period-completed entries may be moved
-	 *	here temporarily in rcu_process_callbacks().
-	 */
-	long  	       	batch;
-	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail[3];
-	long            qlen; 	 	 /* # of queued callbacks */
-	struct rcu_head *donelist;
-	struct rcu_head **donetail;
-	long		blimit;		 /* Upper limit on a processed batch */
-	int cpu;
-	struct rcu_head barrier;
-};
-
-/*
- * Increment the quiescent state counter.
- * The counter is a bit degenerated: We do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period. Thus just a flag.
- */
-extern void rcu_qsctr_inc(int cpu);
-extern void rcu_bh_qsctr_inc(int cpu);
-
-extern int rcu_pending(int cpu);
-extern int rcu_needs_cpu(int cpu);
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire()	\
-			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
-#else
-# define rcu_read_acquire()	do { } while (0)
-# define rcu_read_release()	do { } while (0)
-#endif
-
-#define __rcu_read_lock() \
-	do { \
-		preempt_disable(); \
-		__acquire(RCU); \
-		rcu_read_acquire(); \
-	} while (0)
-#define __rcu_read_unlock() \
-	do { \
-		rcu_read_release(); \
-		__release(RCU); \
-		preempt_enable(); \
-	} while (0)
-#define __rcu_read_lock_bh() \
-	do { \
-		local_bh_disable(); \
-		__acquire(RCU_BH); \
-		rcu_read_acquire(); \
-	} while (0)
-#define __rcu_read_unlock_bh() \
-	do { \
-		rcu_read_release(); \
-		__release(RCU_BH); \
-		local_bh_enable(); \
-	} while (0)
-
-#define __synchronize_sched() synchronize_rcu()
-
-#define call_rcu_sched(head, func) call_rcu(head, func)
-
-extern void __rcu_init(void);
-#define rcu_init_sched()	do { } while (0)
-extern void rcu_check_callbacks(int cpu, int user);
-extern void rcu_restart_cpu(int cpu);
-
-extern long rcu_batches_completed(void);
-extern long rcu_batches_completed_bh(void);
-
-#define rcu_enter_nohz()	do { } while (0)
-#define rcu_exit_nohz()		do { } while (0)
-
-/* A context switch is a grace period for rcuclassic. */
-static inline int rcu_blocking_is_gp(void)
-{
-	return num_online_cpus() == 1;
-}
-
-#endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 15fbb3ca634d..0cdfdb622faa 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -54,9 +54,7 @@ struct rcu_head {
 /* Internal to kernel, but needed by rcupreempt.h. */
 extern int rcu_scheduler_active;
 
-#if defined(CONFIG_CLASSIC_RCU)
-#include <linux/rcuclassic.h>
-#elif defined(CONFIG_TREE_RCU)
+#if defined(CONFIG_TREE_RCU)
 #include <linux/rcutree.h>
 #elif defined(CONFIG_PREEMPT_RCU)
 #include <linux/rcupreempt.h>
diff --git a/init/Kconfig b/init/Kconfig
index 1ce05a4cb5f6..d10f31dfa0b2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -316,21 +316,13 @@ choice
 	prompt "RCU Implementation"
 	default TREE_RCU
 
-config CLASSIC_RCU
-	bool "Classic RCU"
-	help
-	  This option selects the classic RCU implementation that is
-	  designed for best read-side performance on non-realtime
-	  systems.
-
-	  Select this option if you are unsure.
-
 config TREE_RCU
 	bool "Tree-based hierarchical RCU"
 	help
 	  This option selects the RCU implementation that is
 	  designed for very large SMP system with hundreds or
-	  thousands of CPUs.
+	  thousands of CPUs.  It also scales down nicely to
+	  smaller systems.
 
 config PREEMPT_RCU
 	bool "Preemptible RCU"
diff --git a/kernel/Makefile b/kernel/Makefile
index 0a32cb21ec97..d6042bcc9e99 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -80,7 +80,6 @@ obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
-obj-$(CONFIG_CLASSIC_RCU) += rcuclassic.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
deleted file mode 100644
index 0f2b0b311304..000000000000
--- a/kernel/rcuclassic.c
+++ /dev/null
@@ -1,807 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2001
- *
- * Authors: Dipankar Sarma <dipankar@in.ibm.com>
- *	    Manfred Spraul <manfred@colorfullife.com>
- *
- * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- * Papers:
- * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
- * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <asm/atomic.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/time.h>
-
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-static struct lock_class_key rcu_lock_key;
-struct lockdep_map rcu_lock_map =
-	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
-EXPORT_SYMBOL_GPL(rcu_lock_map);
-#endif
-
-
-/* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_ctrlblk = {
-	.cur = -300,
-	.completed = -300,
-	.pending = -300,
-	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
-	.cpumask = CPU_BITS_NONE,
-};
-
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.cur = -300,
-	.completed = -300,
-	.pending = -300,
-	.lock = __SPIN_LOCK_UNLOCKED(&rcu_bh_ctrlblk.lock),
-	.cpumask = CPU_BITS_NONE,
-};
-
-static DEFINE_PER_CPU(struct rcu_data, rcu_data);
-static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
-
-/*
- * Increment the quiescent state counter.
- * The counter is a bit degenerated: We do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period. Thus just a flag.
- */
-void rcu_qsctr_inc(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	rdp->passed_quiesc = 1;
-}
-
-void rcu_bh_qsctr_inc(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-	rdp->passed_quiesc = 1;
-}
-
-static int blimit = 10;
-static int qhimark = 10000;
-static int qlowmark = 100;
-
-#ifdef CONFIG_SMP
-static void force_quiescent_state(struct rcu_data *rdp,
-			struct rcu_ctrlblk *rcp)
-{
-	int cpu;
-	unsigned long flags;
-
-	set_need_resched();
-	spin_lock_irqsave(&rcp->lock, flags);
-	if (unlikely(!rcp->signaled)) {
-		rcp->signaled = 1;
-		/*
-		 * Don't send IPI to itself. With irqs disabled,
-		 * rdp->cpu is the current cpu.
-		 *
-		 * cpu_online_mask is updated by the _cpu_down()
-		 * using __stop_machine(). Since we're in irqs disabled
-		 * section, __stop_machine() is not exectuting, hence
-		 * the cpu_online_mask is stable.
-		 *
-		 * However,  a cpu might have been offlined _just_ before
-		 * we disabled irqs while entering here.
-		 * And rcu subsystem might not yet have handled the CPU_DEAD
-		 * notification, leading to the offlined cpu's bit
-		 * being set in the rcp->cpumask.
-		 *
-		 * Hence cpumask = (rcp->cpumask & cpu_online_mask) to prevent
-		 * sending smp_reschedule() to an offlined CPU.
-		 */
-		for_each_cpu_and(cpu,
-				  to_cpumask(rcp->cpumask), cpu_online_mask) {
-			if (cpu != rdp->cpu)
-				smp_send_reschedule(cpu);
-		}
-	}
-	spin_unlock_irqrestore(&rcp->lock, flags);
-}
-#else
-static inline void force_quiescent_state(struct rcu_data *rdp,
-			struct rcu_ctrlblk *rcp)
-{
-	set_need_resched();
-}
-#endif
-
-static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
-		struct rcu_data *rdp)
-{
-	long batch;
-
-	head->next = NULL;
-	smp_mb(); /* Read of rcu->cur must happen after any change by caller. */
-
-	/*
-	 * Determine the batch number of this callback.
-	 *
-	 * Using ACCESS_ONCE to avoid the following error when gcc eliminates
-	 * local variable "batch" and emits codes like this:
-	 *	1) rdp->batch = rcp->cur + 1 # gets old value
-	 *	......
-	 *	2)rcu_batch_after(rcp->cur + 1, rdp->batch) # gets new value
-	 * then [*nxttail[0], *nxttail[1]) may contain callbacks
-	 * that batch# = rdp->batch, see the comment of struct rcu_data.
-	 */
-	batch = ACCESS_ONCE(rcp->cur) + 1;
-
-	if (rdp->nxtlist && rcu_batch_after(batch, rdp->batch)) {
-		/* process callbacks */
-		rdp->nxttail[0] = rdp->nxttail[1];
-		rdp->nxttail[1] = rdp->nxttail[2];
-		if (rcu_batch_after(batch - 1, rdp->batch))
-			rdp->nxttail[0] = rdp->nxttail[2];
-	}
-
-	rdp->batch = batch;
-	*rdp->nxttail[2] = head;
-	rdp->nxttail[2] = &head->next;
-
-	if (unlikely(++rdp->qlen > qhimark)) {
-		rdp->blimit = INT_MAX;
-		force_quiescent_state(rdp, &rcu_ctrlblk);
-	}
-}
-
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-
-static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
-{
-	rcp->gp_start = jiffies;
-	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
-}
-
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	int cpu;
-	long delta;
-	unsigned long flags;
-
-	/* Only let one CPU complain about others per time interval. */
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	delta = jiffies - rcp->jiffies_stall;
-	if (delta < 2 || rcp->cur != rcp->completed) {
-		spin_unlock_irqrestore(&rcp->lock, flags);
-		return;
-	}
-	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-
-	/* OK, time to rat on our buddy... */
-
-	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
-	for_each_possible_cpu(cpu) {
-		if (cpumask_test_cpu(cpu, to_cpumask(rcp->cpumask)))
-			printk(" %d", cpu);
-	}
-	printk(" (detected by %d, t=%ld jiffies)\n",
-	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
-}
-
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long flags;
-
-	printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
-			smp_processor_id(), jiffies,
-			jiffies - rcp->gp_start);
-	dump_stack();
-	spin_lock_irqsave(&rcp->lock, flags);
-	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
-		rcp->jiffies_stall =
-			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-	set_need_resched();  /* kick ourselves to get things going. */
-}
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	long delta;
-
-	delta = jiffies - rcp->jiffies_stall;
-	if (cpumask_test_cpu(smp_processor_id(), to_cpumask(rcp->cpumask)) &&
-		delta >= 0) {
-
-		/* We haven't checked in, so go dump stack. */
-		print_cpu_stall(rcp);
-
-	} else if (rcp->cur != rcp->completed && delta >= 2) {
-
-		/* They had two seconds to dump stack, so complain. */
-		print_other_cpu_stall(rcp);
-	}
-}
-
-#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-
-static inline void check_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-/**
- * call_rcu - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
- *
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- */
-void call_rcu(struct rcu_head *head,
-				void (*func)(struct rcu_head *rcu))
-{
-	unsigned long flags;
-
-	head->func = func;
-	local_irq_save(flags);
-	__call_rcu(head, &rcu_ctrlblk, &__get_cpu_var(rcu_data));
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
-/**
- * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
- *
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by rcu_read_lock() and
- * rcu_read_unlock(), * if in interrupt context or rcu_read_lock_bh()
- * and rcu_read_unlock_bh(), if in process context. These may be nested.
- */
-void call_rcu_bh(struct rcu_head *head,
-				void (*func)(struct rcu_head *rcu))
-{
-	unsigned long flags;
-
-	head->func = func;
-	local_irq_save(flags);
-	__call_rcu(head, &rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
-/*
- * Return the number of RCU batches processed thus far.  Useful
- * for debug and statistics.
- */
-long rcu_batches_completed(void)
-{
-	return rcu_ctrlblk.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed);
-
-/*
- * Return the number of RCU batches processed thus far.  Useful
- * for debug and statistics.
- */
-long rcu_batches_completed_bh(void)
-{
-	return rcu_bh_ctrlblk.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
-
-/* Raises the softirq for processing rcu_callbacks. */
-static inline void raise_rcu_softirq(void)
-{
-	raise_softirq(RCU_SOFTIRQ);
-}
-
-/*
- * Invoke the completed RCU callbacks. They are expected to be in
- * a per-cpu list.
- */
-static void rcu_do_batch(struct rcu_data *rdp)
-{
-	unsigned long flags;
-	struct rcu_head *next, *list;
-	int count = 0;
-
-	list = rdp->donelist;
-	while (list) {
-		next = list->next;
-		prefetch(next);
-		list->func(list);
-		list = next;
-		if (++count >= rdp->blimit)
-			break;
-	}
-	rdp->donelist = list;
-
-	local_irq_save(flags);
-	rdp->qlen -= count;
-	local_irq_restore(flags);
-	if (rdp->blimit == INT_MAX && rdp->qlen <= qlowmark)
-		rdp->blimit = blimit;
-
-	if (!rdp->donelist)
-		rdp->donetail = &rdp->donelist;
-	else
-		raise_rcu_softirq();
-}
-
-/*
- * Grace period handling:
- * The grace period handling consists out of two steps:
- * - A new grace period is started.
- *   This is done by rcu_start_batch. The start is not broadcasted to
- *   all cpus, they must pick this up by comparing rcp->cur with
- *   rdp->quiescbatch. All cpus are recorded  in the
- *   rcu_ctrlblk.cpumask bitmap.
- * - All cpus must go through a quiescent state.
- *   Since the start of the grace period is not broadcasted, at least two
- *   calls to rcu_check_quiescent_state are required:
- *   The first call just notices that a new grace period is running. The
- *   following calls check if there was a quiescent state since the beginning
- *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
- *   the bitmap is empty, then the grace period is completed.
- *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
- *   period (if necessary).
- */
-
-/*
- * Register a new batch of callbacks, and start it up if there is currently no
- * active batch and the batch to be registered has not already occurred.
- * Caller must hold rcu_ctrlblk.lock.
- */
-static void rcu_start_batch(struct rcu_ctrlblk *rcp)
-{
-	if (rcp->cur != rcp->pending &&
-			rcp->completed == rcp->cur) {
-		rcp->cur++;
-		record_gp_stall_check_time(rcp);
-
-		/*
-		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
-		 * Barrier  Otherwise it can cause tickless idle CPUs to be
-		 * included in rcp->cpumask, which will extend graceperiods
-		 * unnecessarily.
-		 */
-		smp_mb();
-		cpumask_andnot(to_cpumask(rcp->cpumask),
-			       cpu_online_mask, nohz_cpu_mask);
-
-		rcp->signaled = 0;
-	}
-}
-
-/*
- * cpu went through a quiescent state since the beginning of the grace period.
- * Clear it from the cpu mask and complete the grace period if it was the last
- * cpu. Start another grace period if someone has further entries pending
- */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
-{
-	cpumask_clear_cpu(cpu, to_cpumask(rcp->cpumask));
-	if (cpumask_empty(to_cpumask(rcp->cpumask))) {
-		/* batch completed ! */
-		rcp->completed = rcp->cur;
-		rcu_start_batch(rcp);
-	}
-}
-
-/*
- * Check if the cpu has gone through a quiescent state (say context
- * switch). If so and if it already hasn't done so in this RCU
- * quiescent cycle, then indicate that it has done so.
- */
-static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
-					struct rcu_data *rdp)
-{
-	unsigned long flags;
-
-	if (rdp->quiescbatch != rcp->cur) {
-		/* start new grace period: */
-		rdp->qs_pending = 1;
-		rdp->passed_quiesc = 0;
-		rdp->quiescbatch = rcp->cur;
-		return;
-	}
-
-	/* Grace period already completed for this cpu?
-	 * qs_pending is checked instead of the actual bitmap to avoid
-	 * cacheline trashing.
-	 */
-	if (!rdp->qs_pending)
-		return;
-
-	/*
-	 * Was there a quiescent state since the beginning of the grace
-	 * period? If no, then exit and wait for the next call.
-	 */
-	if (!rdp->passed_quiesc)
-		return;
-	rdp->qs_pending = 0;
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	/*
-	 * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
-	 * during cpu startup. Ignore the quiescent state.
-	 */
-	if (likely(rdp->quiescbatch == rcp->cur))
-		cpu_quiet(rdp->cpu, rcp);
-
-	spin_unlock_irqrestore(&rcp->lock, flags);
-}
-
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-/* warning! helper for rcu_offline_cpu. do not use elsewhere without reviewing
- * locking requirements, the list it's pulling from has to belong to a cpu
- * which is dead and hence not processing interrupts.
- */
-static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
-				struct rcu_head **tail, long batch)
-{
-	unsigned long flags;
-
-	if (list) {
-		local_irq_save(flags);
-		this_rdp->batch = batch;
-		*this_rdp->nxttail[2] = list;
-		this_rdp->nxttail[2] = tail;
-		local_irq_restore(flags);
-	}
-}
-
-static void __rcu_offline_cpu(struct rcu_data *this_rdp,
-				struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	unsigned long flags;
-
-	/*
-	 * if the cpu going offline owns the grace period
-	 * we can block indefinitely waiting for it, so flush
-	 * it here
-	 */
-	spin_lock_irqsave(&rcp->lock, flags);
-	if (rcp->cur != rcp->completed)
-		cpu_quiet(rdp->cpu, rcp);
-	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail, rcp->cur + 1);
-	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail[2], rcp->cur + 1);
-	spin_unlock(&rcp->lock);
-
-	this_rdp->qlen += rdp->qlen;
-	local_irq_restore(flags);
-}
-
-static void rcu_offline_cpu(int cpu)
-{
-	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
-	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
-
-	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
-					&per_cpu(rcu_data, cpu));
-	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
-					&per_cpu(rcu_bh_data, cpu));
-	put_cpu_var(rcu_data);
-	put_cpu_var(rcu_bh_data);
-}
-
-#else
-
-static void rcu_offline_cpu(int cpu)
-{
-}
-
-#endif
-
-/*
- * This does the RCU processing work from softirq context.
- */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
-					struct rcu_data *rdp)
-{
-	unsigned long flags;
-	long completed_snap;
-
-	if (rdp->nxtlist) {
-		local_irq_save(flags);
-		completed_snap = ACCESS_ONCE(rcp->completed);
-
-		/*
-		 * move the other grace-period-completed entries to
-		 * [rdp->nxtlist, *rdp->nxttail[0]) temporarily
-		 */
-		if (!rcu_batch_before(completed_snap, rdp->batch))
-			rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2];
-		else if (!rcu_batch_before(completed_snap, rdp->batch - 1))
-			rdp->nxttail[0] = rdp->nxttail[1];
-
-		/*
-		 * the grace period for entries in
-		 * [rdp->nxtlist, *rdp->nxttail[0]) has completed and
-		 * move these entries to donelist
-		 */
-		if (rdp->nxttail[0] != &rdp->nxtlist) {
-			*rdp->donetail = rdp->nxtlist;
-			rdp->donetail = rdp->nxttail[0];
-			rdp->nxtlist = *rdp->nxttail[0];
-			*rdp->donetail = NULL;
-
-			if (rdp->nxttail[1] == rdp->nxttail[0])
-				rdp->nxttail[1] = &rdp->nxtlist;
-			if (rdp->nxttail[2] == rdp->nxttail[0])
-				rdp->nxttail[2] = &rdp->nxtlist;
-			rdp->nxttail[0] = &rdp->nxtlist;
-		}
-
-		local_irq_restore(flags);
-
-		if (rcu_batch_after(rdp->batch, rcp->pending)) {
-			unsigned long flags2;
-
-			/* and start it/schedule start if it's a new batch */
-			spin_lock_irqsave(&rcp->lock, flags2);
-			if (rcu_batch_after(rdp->batch, rcp->pending)) {
-				rcp->pending = rdp->batch;
-				rcu_start_batch(rcp);
-			}
-			spin_unlock_irqrestore(&rcp->lock, flags2);
-		}
-	}
-
-	rcu_check_quiescent_state(rcp, rdp);
-	if (rdp->donelist)
-		rcu_do_batch(rdp);
-}
-
-static void rcu_process_callbacks(struct softirq_action *unused)
-{
-	/*
-	 * Memory references from any prior RCU read-side critical sections
-	 * executed by the interrupted code must be see before any RCU
-	 * grace-period manupulations below.
-	 */
-
-	smp_mb(); /* See above block comment. */
-
-	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
-	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
-
-	/*
-	 * Memory references from any later RCU read-side critical sections
-	 * executed by the interrupted code must be see after any RCU
-	 * grace-period manupulations above.
-	 */
-
-	smp_mb(); /* See above block comment. */
-}
-
-static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	/* Check for CPU stalls, if enabled. */
-	check_cpu_stall(rcp);
-
-	if (rdp->nxtlist) {
-		long completed_snap = ACCESS_ONCE(rcp->completed);
-
-		/*
-		 * This cpu has pending rcu entries and the grace period
-		 * for them has completed.
-		 */
-		if (!rcu_batch_before(completed_snap, rdp->batch))
-			return 1;
-		if (!rcu_batch_before(completed_snap, rdp->batch - 1) &&
-				rdp->nxttail[0] != rdp->nxttail[1])
-			return 1;
-		if (rdp->nxttail[0] != &rdp->nxtlist)
-			return 1;
-
-		/*
-		 * This cpu has pending rcu entries and the new batch
-		 * for then hasn't been started nor scheduled start
-		 */
-		if (rcu_batch_after(rdp->batch, rcp->pending))
-			return 1;
-	}
-
-	/* This cpu has finished callbacks to invoke */
-	if (rdp->donelist)
-		return 1;
-
-	/* The rcu core waits for a quiescent state from the cpu */
-	if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
-		return 1;
-
-	/* nothing to do */
-	return 0;
-}
-
-/*
- * Check to see if there is any immediate RCU-related work to be done
- * by the current CPU, returning 1 if so.  This function is part of the
- * RCU implementation; it is -not- an exported member of the RCU API.
- */
-int rcu_pending(int cpu)
-{
-	return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
-		__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
-}
-
-/*
- * Check to see if any future RCU-related work will need to be done
- * by the current CPU, even if none need be done immediately, returning
- * 1 if so.  This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
- */
-int rcu_needs_cpu(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	struct rcu_data *rdp_bh = &per_cpu(rcu_bh_data, cpu);
-
-	return !!rdp->nxtlist || !!rdp_bh->nxtlist || rcu_pending(cpu);
-}
-
-/*
- * Top-level function driving RCU grace-period detection, normally
- * invoked from the scheduler-clock interrupt.  This function simply
- * increments counters that are read only from softirq by this same
- * CPU, so there are no memory barriers required.
- */
-void rcu_check_callbacks(int cpu, int user)
-{
-	if (user ||
-	    (idle_cpu(cpu) && rcu_scheduler_active &&
-	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
-
-		/*
-		 * Get here if this CPU took its interrupt from user
-		 * mode or from the idle loop, and if this is not a
-		 * nested interrupt.  In this case, the CPU is in
-		 * a quiescent state, so count it.
-		 *
-		 * Also do a memory barrier.  This is needed to handle
-		 * the case where writes from a preempt-disable section
-		 * of code get reordered into schedule() by this CPU's
-		 * write buffer.  The memory barrier makes sure that
-		 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
-		 * by other CPUs to happen after any such write.
-		 */
-
-		smp_mb();  /* See above block comment. */
-		rcu_qsctr_inc(cpu);
-		rcu_bh_qsctr_inc(cpu);
-
-	} else if (!in_softirq()) {
-
-		/*
-		 * Get here if this CPU did not take its interrupt from
-		 * softirq, in other words, if it is not interrupting
-		 * a rcu_bh read-side critical section.  This is an _bh
-		 * critical section, so count it.  The memory barrier
-		 * is needed for the same reason as is the above one.
-		 */
-
-		smp_mb();  /* See above block comment. */
-		rcu_bh_qsctr_inc(cpu);
-	}
-	raise_rcu_softirq();
-}
-
-static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_ctrlblk *rcp,
-						struct rcu_data *rdp)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	memset(rdp, 0, sizeof(*rdp));
-	rdp->nxttail[0] = rdp->nxttail[1] = rdp->nxttail[2] = &rdp->nxtlist;
-	rdp->donetail = &rdp->donelist;
-	rdp->quiescbatch = rcp->completed;
-	rdp->qs_pending = 0;
-	rdp->cpu = cpu;
-	rdp->blimit = blimit;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-}
-
-static void __cpuinit rcu_online_cpu(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
-	struct rcu_data *bh_rdp = &per_cpu(rcu_bh_data, cpu);
-
-	rcu_init_percpu_data(cpu, &rcu_ctrlblk, rdp);
-	rcu_init_percpu_data(cpu, &rcu_bh_ctrlblk, bh_rdp);
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-}
-
-static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
-				unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		rcu_online_cpu(cpu);
-		break;
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		rcu_offline_cpu(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata rcu_nb = {
-	.notifier_call	= rcu_cpu_notify,
-};
-
-/*
- * Initializes rcu mechanism.  Assumed to be called early.
- * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
- * Note that rcu_qsctr and friends are implicitly
- * initialized due to the choice of ``0'' for RCU_CTR_INVALID.
- */
-void __init __rcu_init(void)
-{
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	/* Register notifier for non-boot CPUs */
-	register_cpu_notifier(&rcu_nb);
-}
-
-module_param(blimit, int, 0);
-module_param(qhimark, int, 0);
-module_param(qlowmark, int, 0);
-- 
cgit v1.2.3


From 9e48858f7d36a6a3849f1d1b40c3bf5624b4ee7c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 7 May 2009 19:26:19 +1000
Subject: security: rename ptrace_may_access => ptrace_access_check

The ->ptrace_may_access() methods are named confusingly - the real
ptrace_may_access() returns a bool, while these security checks have
a retval convention.

Rename it to ptrace_access_check, to reduce the confusion factor.

[ Impact: cleanup, no code changed ]

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h   | 14 +++++++-------
 kernel/ptrace.c            |  2 +-
 security/capability.c      |  2 +-
 security/commoncap.c       |  4 ++--
 security/security.c        |  4 ++--
 security/selinux/hooks.c   |  6 +++---
 security/smack/smack_lsm.c |  8 ++++----
 7 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 5eff459b3833..145909165dbf 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -52,7 +52,7 @@ struct audit_krule;
 extern int cap_capable(struct task_struct *tsk, const struct cred *cred,
 		       int cap, int audit);
 extern int cap_settime(struct timespec *ts, struct timezone *tz);
-extern int cap_ptrace_may_access(struct task_struct *child, unsigned int mode);
+extern int cap_ptrace_access_check(struct task_struct *child, unsigned int mode);
 extern int cap_ptrace_traceme(struct task_struct *parent);
 extern int cap_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted);
 extern int cap_capset(struct cred *new, const struct cred *old,
@@ -1209,7 +1209,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@alter contains the flag indicating whether changes are to be made.
  *	Return 0 if permission is granted.
  *
- * @ptrace_may_access:
+ * @ptrace_access_check:
  *	Check permission before allowing the current process to trace the
  *	@child process.
  *	Security modules may also want to perform a process tracing check
@@ -1224,7 +1224,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Check that the @parent process has sufficient permission to trace the
  *	current process before allowing the current process to present itself
  *	to the @parent process for tracing.
- *	The parent process will still have to undergo the ptrace_may_access
+ *	The parent process will still have to undergo the ptrace_access_check
  *	checks before it is allowed to trace this one.
  *	@parent contains the task_struct structure for debugger process.
  *	Return 0 if permission is granted.
@@ -1336,7 +1336,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 struct security_operations {
 	char name[SECURITY_NAME_MAX + 1];
 
-	int (*ptrace_may_access) (struct task_struct *child, unsigned int mode);
+	int (*ptrace_access_check) (struct task_struct *child, unsigned int mode);
 	int (*ptrace_traceme) (struct task_struct *parent);
 	int (*capget) (struct task_struct *target,
 		       kernel_cap_t *effective,
@@ -1617,7 +1617,7 @@ extern int security_module_enable(struct security_operations *ops);
 extern int register_security(struct security_operations *ops);
 
 /* Security operations */
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode);
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
 		    kernel_cap_t *effective,
@@ -1798,10 +1798,10 @@ static inline int security_init(void)
 	return 0;
 }
 
-static inline int security_ptrace_may_access(struct task_struct *child,
+static inline int security_ptrace_access_check(struct task_struct *child,
 					     unsigned int mode)
 {
-	return cap_ptrace_may_access(child, mode);
+	return cap_ptrace_access_check(child, mode);
 }
 
 static inline int security_ptrace_traceme(struct task_struct *parent)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2c07ba..9a4184e04f29 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -152,7 +152,7 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
 	if (!dumpable && !capable(CAP_SYS_PTRACE))
 		return -EPERM;
 
-	return security_ptrace_may_access(task, mode);
+	return security_ptrace_access_check(task, mode);
 }
 
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)
diff --git a/security/capability.c b/security/capability.c
index 21b6cead6a8e..f218dd361647 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -863,7 +863,7 @@ struct security_operations default_security_ops = {
 
 void security_fixup_ops(struct security_operations *ops)
 {
-	set_to_cap_if_null(ops, ptrace_may_access);
+	set_to_cap_if_null(ops, ptrace_access_check);
 	set_to_cap_if_null(ops, ptrace_traceme);
 	set_to_cap_if_null(ops, capget);
 	set_to_cap_if_null(ops, capset);
diff --git a/security/commoncap.c b/security/commoncap.c
index 48b7e0228fa3..aa97704564d4 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -101,7 +101,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
 }
 
 /**
- * cap_ptrace_may_access - Determine whether the current process may access
+ * cap_ptrace_access_check - Determine whether the current process may access
  *			   another
  * @child: The process to be accessed
  * @mode: The mode of attachment.
@@ -109,7 +109,7 @@ int cap_settime(struct timespec *ts, struct timezone *tz)
  * Determine whether a process may access another, returning 0 if permission
  * granted, -ve if denied.
  */
-int cap_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
 	int ret = 0;
 
diff --git a/security/security.c b/security/security.c
index dc7674fbfc7a..4501c5e1f988 100644
--- a/security/security.c
+++ b/security/security.c
@@ -124,9 +124,9 @@ int register_security(struct security_operations *ops)
 
 /* Security operations */
 
-int security_ptrace_may_access(struct task_struct *child, unsigned int mode)
+int security_ptrace_access_check(struct task_struct *child, unsigned int mode)
 {
-	return security_ops->ptrace_may_access(child, mode);
+	return security_ops->ptrace_access_check(child, mode);
 }
 
 int security_ptrace_traceme(struct task_struct *parent)
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d6f64783acd1..e3b4f3083dd7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1854,12 +1854,12 @@ static inline u32 open_file_to_av(struct file *file)
 
 /* Hook functions begin here. */
 
-static int selinux_ptrace_may_access(struct task_struct *child,
+static int selinux_ptrace_access_check(struct task_struct *child,
 				     unsigned int mode)
 {
 	int rc;
 
-	rc = cap_ptrace_may_access(child, mode);
+	rc = cap_ptrace_access_check(child, mode);
 	if (rc)
 		return rc;
 
@@ -5315,7 +5315,7 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer)
 static struct security_operations selinux_ops = {
 	.name =				"selinux",
 
-	.ptrace_may_access =		selinux_ptrace_may_access,
+	.ptrace_access_check =		selinux_ptrace_access_check,
 	.ptrace_traceme =		selinux_ptrace_traceme,
 	.capget =			selinux_capget,
 	.capset =			selinux_capset,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 0023182078c7..1c9bdbcbe3d2 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -91,7 +91,7 @@ struct inode_smack *new_inode_smack(char *smack)
  */
 
 /**
- * smack_ptrace_may_access - Smack approval on PTRACE_ATTACH
+ * smack_ptrace_access_check - Smack approval on PTRACE_ATTACH
  * @ctp: child task pointer
  * @mode: ptrace attachment mode
  *
@@ -99,13 +99,13 @@ struct inode_smack *new_inode_smack(char *smack)
  *
  * Do the capability checks, and require read and write.
  */
-static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode)
+static int smack_ptrace_access_check(struct task_struct *ctp, unsigned int mode)
 {
 	int rc;
 	struct smk_audit_info ad;
 	char *sp, *tsp;
 
-	rc = cap_ptrace_may_access(ctp, mode);
+	rc = cap_ptrace_access_check(ctp, mode);
 	if (rc != 0)
 		return rc;
 
@@ -3032,7 +3032,7 @@ static void smack_release_secctx(char *secdata, u32 seclen)
 struct security_operations smack_ops = {
 	.name =				"smack",
 
-	.ptrace_may_access =		smack_ptrace_may_access,
+	.ptrace_access_check =		smack_ptrace_access_check,
 	.ptrace_traceme =		smack_ptrace_traceme,
 	.syslog = 			smack_syslog,
 
-- 
cgit v1.2.3


From c7a1a4c80f873d5d6ecd173035bb80eba489f380 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 24 Jun 2009 01:07:44 +0000
Subject: Phonet: publicize the Netlink notification function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h | 1 +
 net/phonet/pn_netlink.c     | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 5054dc5ea2c2..29d126736611 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -45,6 +45,7 @@ int phonet_address_add(struct net_device *dev, u8 addr);
 int phonet_address_del(struct net_device *dev, u8 addr);
 u8 phonet_address_get(struct net_device *dev, u8 addr);
 int phonet_address_lookup(struct net *net, u8 addr);
+void phonet_address_notify(int event, struct net_device *dev, u8 addr);
 
 #define PN_NO_ADDR	0xff
 
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index cec4e5951681..f8b4cee434c2 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -32,7 +32,7 @@
 static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
 		     u32 pid, u32 seq, int event);
 
-static void rtmsg_notify(int event, struct net_device *dev, u8 addr)
+void phonet_address_notify(int event, struct net_device *dev, u8 addr)
 {
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
@@ -94,7 +94,7 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr)
 	else
 		err = phonet_address_del(dev, pnaddr);
 	if (!err)
-		rtmsg_notify(nlh->nlmsg_type, dev, pnaddr);
+		phonet_address_notify(nlh->nlmsg_type, dev, pnaddr);
 	return err;
 }
 
-- 
cgit v1.2.3


From dcf52fb71d988ba945054308f661bddf9b2455fb Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 22 Jun 2009 20:41:45 +0000
Subject: ACPI: remove unused acpi_device_ops .stop method

No drivers use the .stop method, so remove it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 5 -----
 include/acpi/acpi_bus.h | 2 --
 2 files changed, 7 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..4a89f081160f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -426,9 +426,6 @@ static int acpi_device_probe(struct device * dev)
 		if (acpi_drv->ops.notify) {
 			ret = acpi_device_install_notify_handler(acpi_dev);
 			if (ret) {
-				if (acpi_drv->ops.stop)
-					acpi_drv->ops.stop(acpi_dev,
-						   acpi_dev->removal_type);
 				if (acpi_drv->ops.remove)
 					acpi_drv->ops.remove(acpi_dev,
 						     acpi_dev->removal_type);
@@ -452,8 +449,6 @@ static int acpi_device_remove(struct device * dev)
 	if (acpi_drv) {
 		if (acpi_drv->ops.notify)
 			acpi_device_remove_notify_handler(acpi_dev);
-		if (acpi_drv->ops.stop)
-			acpi_drv->ops.stop(acpi_dev, acpi_dev->removal_type);
 		if (acpi_drv->ops.remove)
 			acpi_drv->ops.remove(acpi_dev, acpi_dev->removal_type);
 	}
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..79a6c5ebe908 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -89,7 +89,6 @@ struct acpi_device;
 typedef int (*acpi_op_add) (struct acpi_device * device);
 typedef int (*acpi_op_remove) (struct acpi_device * device, int type);
 typedef int (*acpi_op_start) (struct acpi_device * device);
-typedef int (*acpi_op_stop) (struct acpi_device * device, int type);
 typedef int (*acpi_op_suspend) (struct acpi_device * device,
 				pm_message_t state);
 typedef int (*acpi_op_resume) (struct acpi_device * device);
@@ -106,7 +105,6 @@ struct acpi_device_ops {
 	acpi_op_add add;
 	acpi_op_remove remove;
 	acpi_op_start start;
-	acpi_op_stop stop;
 	acpi_op_suspend suspend;
 	acpi_op_resume resume;
 	acpi_op_bind bind;
-- 
cgit v1.2.3


From 6fdc03709433ccc2005f0f593ae9d9dd04f7b485 Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sat, 20 Jun 2009 13:23:59 +0200
Subject: firewire: core: do not DMA-map stack addresses

The DMA mapping API cannot map on-stack addresses, as explained in
Documentation/DMA-mapping.txt.  Convert the two cases of on-stack packet
payload buffers in firewire-core (payload of lock requests in the bus
manager work and in iso resource management) to slab-allocated memory.

There are a number on-stack buffers for quadlet write or quadlet read
requests in firewire-core and firewire-sbp2.  These are harmless; they
are copied to/ from card driver internal DMA buffers since quadlet
payloads are inlined with packet headers.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core-card.c | 14 +++++++-------
 drivers/firewire/core-cdev.c |  4 +++-
 drivers/firewire/core-iso.c  | 24 +++++++++++++-----------
 drivers/firewire/core.h      |  3 ++-
 include/linux/firewire.h     |  1 +
 5 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 543fccac81bb..f74edae5cb4c 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -196,8 +196,8 @@ static void allocate_broadcast_channel(struct fw_card *card, int generation)
 {
 	int channel, bandwidth = 0;
 
-	fw_iso_resource_manage(card, generation, 1ULL << 31,
-			       &channel, &bandwidth, true);
+	fw_iso_resource_manage(card, generation, 1ULL << 31, &channel,
+			       &bandwidth, true, card->bm_transaction_data);
 	if (channel == 31) {
 		card->broadcast_channel_allocated = true;
 		device_for_each_child(card->device, (void *)(long)generation,
@@ -230,7 +230,6 @@ static void fw_card_bm_work(struct work_struct *work)
 	bool do_reset = false;
 	bool root_device_is_running;
 	bool root_device_is_cmc;
-	__be32 lock_data[2];
 
 	spin_lock_irqsave(&card->lock, flags);
 
@@ -273,22 +272,23 @@ static void fw_card_bm_work(struct work_struct *work)
 			goto pick_me;
 		}
 
-		lock_data[0] = cpu_to_be32(0x3f);
-		lock_data[1] = cpu_to_be32(local_id);
+		card->bm_transaction_data[0] = cpu_to_be32(0x3f);
+		card->bm_transaction_data[1] = cpu_to_be32(local_id);
 
 		spin_unlock_irqrestore(&card->lock, flags);
 
 		rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 				irm_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
-				lock_data, sizeof(lock_data));
+				card->bm_transaction_data,
+				sizeof(card->bm_transaction_data));
 
 		if (rcode == RCODE_GENERATION)
 			/* Another bus reset, BM work has been rescheduled. */
 			goto out;
 
 		if (rcode == RCODE_COMPLETE &&
-		    lock_data[0] != cpu_to_be32(0x3f)) {
+		    card->bm_transaction_data[0] != cpu_to_be32(0x3f)) {
 
 			/* Somebody else is BM.  Only act as IRM. */
 			if (local_id == irm_id)
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index d1d30c615b0f..ced186d7e9a9 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -125,6 +125,7 @@ struct iso_resource {
 	int generation;
 	u64 channels;
 	s32 bandwidth;
+	__be32 transaction_data[2];
 	struct iso_resource_event *e_alloc, *e_dealloc;
 };
 
@@ -1049,7 +1050,8 @@ static void iso_resource_work(struct work_struct *work)
 			r->channels, &channel, &bandwidth,
 			todo == ISO_RES_ALLOC ||
 			todo == ISO_RES_REALLOC ||
-			todo == ISO_RES_ALLOC_ONCE);
+			todo == ISO_RES_ALLOC_ONCE,
+			r->transaction_data);
 	/*
 	 * Is this generation outdated already?  As long as this resource sticks
 	 * in the idr, it will be scheduled again for a newer generation or at
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 166f19c6d38d..110e731f5574 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -177,9 +177,8 @@ EXPORT_SYMBOL(fw_iso_context_stop);
  */
 
 static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
-			    int bandwidth, bool allocate)
+			    int bandwidth, bool allocate, __be32 data[2])
 {
-	__be32 data[2];
 	int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
 
 	/*
@@ -215,9 +214,9 @@ static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
 }
 
 static int manage_channel(struct fw_card *card, int irm_id, int generation,
-			  u32 channels_mask, u64 offset, bool allocate)
+		u32 channels_mask, u64 offset, bool allocate, __be32 data[2])
 {
-	__be32 data[2], c, all, old;
+	__be32 c, all, old;
 	int i, retry = 5;
 
 	old = all = allocate ? cpu_to_be32(~0) : 0;
@@ -260,7 +259,7 @@ static int manage_channel(struct fw_card *card, int irm_id, int generation,
 }
 
 static void deallocate_channel(struct fw_card *card, int irm_id,
-			       int generation, int channel)
+			       int generation, int channel, __be32 buffer[2])
 {
 	u32 mask;
 	u64 offset;
@@ -269,7 +268,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
 	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
 
-	manage_channel(card, irm_id, generation, mask, offset, false);
+	manage_channel(card, irm_id, generation, mask, offset, false, buffer);
 }
 
 /**
@@ -298,7 +297,7 @@ static void deallocate_channel(struct fw_card *card, int irm_id,
  */
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate)
+			    bool allocate, __be32 buffer[2])
 {
 	u32 channels_hi = channels_mask;	/* channels 31...0 */
 	u32 channels_lo = channels_mask >> 32;	/* channels 63...32 */
@@ -310,10 +309,12 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 
 	if (channels_hi)
 		c = manage_channel(card, irm_id, generation, channels_hi,
-		    CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI, allocate);
+				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI,
+				allocate, buffer);
 	if (channels_lo && c < 0) {
 		c = manage_channel(card, irm_id, generation, channels_lo,
-		    CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO, allocate);
+				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO,
+				allocate, buffer);
 		if (c >= 0)
 			c += 32;
 	}
@@ -325,12 +326,13 @@ void fw_iso_resource_manage(struct fw_card *card, int generation,
 	if (*bandwidth == 0)
 		return;
 
-	ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
+	ret = manage_bandwidth(card, irm_id, generation, *bandwidth,
+			       allocate, buffer);
 	if (ret < 0)
 		*bandwidth = 0;
 
 	if (allocate && ret < 0 && c >= 0) {
-		deallocate_channel(card, irm_id, generation, c);
+		deallocate_channel(card, irm_id, generation, c, buffer);
 		*channel = ret;
 	}
 }
diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index c3cfc647e5e3..6052816be353 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -120,7 +120,8 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event);
 
 int fw_iso_buffer_map(struct fw_iso_buffer *buffer, struct vm_area_struct *vma);
 void fw_iso_resource_manage(struct fw_card *card, int generation,
-		u64 channels_mask, int *channel, int *bandwidth, bool allocate);
+			    u64 channels_mask, int *channel, int *bandwidth,
+			    bool allocate, __be32 buffer[2]);
 
 
 /* -topology */
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9823946adbc5..192d1e43c43c 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -127,6 +127,7 @@ struct fw_card {
 	struct delayed_work work;
 	int bm_retries;
 	int bm_generation;
+	__be32 bm_transaction_data[2];
 
 	bool broadcast_channel_allocated;
 	u32 broadcast_channel;
-- 
cgit v1.2.3


From 9d73777e500929b71dcfed16eec05f6760e345a6 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 25 Jun 2009 11:58:55 +0200
Subject: clarify get_user_pages() prototype

Currently the 4th parameter of get_user_pages() is called len, but its
in pages, not bytes. Rename the thing to nr_pages to avoid future
confusion.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 +-
 mm/memory.c        | 26 ++++++++++++--------------
 mm/nommu.c         | 12 +++++-------
 3 files changed, 18 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d006e93d5c93..ba3a7cb1eaa0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -826,7 +826,7 @@ extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-			unsigned long start, int len, int write, int force,
+			unsigned long start, int nr_pages, int write, int force,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
diff --git a/mm/memory.c b/mm/memory.c
index f46ac18ba231..65216194eb8d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1207,8 +1207,8 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, int flags,
-		struct page **pages, struct vm_area_struct **vmas)
+		     unsigned long start, int nr_pages, int flags,
+		     struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
 	unsigned int vm_flags = 0;
@@ -1217,7 +1217,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	int ignore = !!(flags & GUP_FLAGS_IGNORE_VMA_PERMISSIONS);
 	int ignore_sigkill = !!(flags & GUP_FLAGS_IGNORE_SIGKILL);
 
-	if (len <= 0)
+	if (nr_pages <= 0)
 		return 0;
 	/* 
 	 * Require read or write permissions.
@@ -1269,7 +1269,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				vmas[i] = gate_vma;
 			i++;
 			start += PAGE_SIZE;
-			len--;
+			nr_pages--;
 			continue;
 		}
 
@@ -1280,7 +1280,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 		if (is_vm_hugetlb_page(vma)) {
 			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &len, i, write);
+						&start, &nr_pages, i, write);
 			continue;
 		}
 
@@ -1357,9 +1357,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				vmas[i] = vma;
 			i++;
 			start += PAGE_SIZE;
-			len--;
-		} while (len && start < vma->vm_end);
-	} while (len);
+			nr_pages--;
+		} while (nr_pages && start < vma->vm_end);
+	} while (nr_pages);
 	return i;
 }
 
@@ -1368,7 +1368,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  * @tsk:	task_struct of target task
  * @mm:		mm_struct of target mm
  * @start:	starting user address
- * @len:	number of pages from start to pin
+ * @nr_pages:	number of pages from start to pin
  * @write:	whether pages will be written to by the caller
  * @force:	whether to force write access even if user mapping is
  *		readonly. This will result in the page being COWed even
@@ -1380,7 +1380,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  *		Or NULL if the caller does not require them.
  *
  * Returns number of pages pinned. This may be fewer than the number
- * requested. If len is 0 or negative, returns 0. If no pages
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
  * were pinned, returns -errno. Each page returned must be released
  * with a put_page() call when it is finished with. vmas will only
  * remain valid while mmap_sem is held.
@@ -1414,7 +1414,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
  * See also get_user_pages_fast, for performance critical applications.
  */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, int len, int write, int force,
+		unsigned long start, int nr_pages, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
 {
 	int flags = 0;
@@ -1424,9 +1424,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	if (force)
 		flags |= GUP_FLAGS_FORCE;
 
-	return __get_user_pages(tsk, mm,
-				start, len, flags,
-				pages, vmas);
+	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
 
 EXPORT_SYMBOL(get_user_pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 2fd2ad5da98e..bf0cc762a7d2 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -173,8 +173,8 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, int flags,
-		struct page **pages, struct vm_area_struct **vmas)
+		     unsigned long start, int nr_pages, int flags,
+		     struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
@@ -189,7 +189,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
 	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 
-	for (i = 0; i < len; i++) {
+	for (i = 0; i < nr_pages; i++) {
 		vma = find_vma(mm, start);
 		if (!vma)
 			goto finish_or_fault;
@@ -224,7 +224,7 @@ finish_or_fault:
  * - don't permit access to VMAs that don't support it, such as I/O mappings
  */
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-	unsigned long start, int len, int write, int force,
+	unsigned long start, int nr_pages, int write, int force,
 	struct page **pages, struct vm_area_struct **vmas)
 {
 	int flags = 0;
@@ -234,9 +234,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	if (force)
 		flags |= GUP_FLAGS_FORCE;
 
-	return __get_user_pages(tsk, mm,
-				start, len, flags,
-				pages, vmas);
+	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
 EXPORT_SYMBOL(get_user_pages);
 
-- 
cgit v1.2.3


From 41f95331b972a039f519ae0c70f051b7121f7346 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 23 Jun 2009 17:55:18 +0200
Subject: perf_counter: Split the mmap control page in two parts

Since there are two distinct sections to the control page,
move them apart so that possible extentions don't overlap.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e7213e46cf9c..489d5cbfbcca 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -233,6 +233,12 @@ struct perf_counter_mmap_page {
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
 
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[125];	/* align to 1k */
+
 	/*
 	 * Control data for the mmap() data buffer.
 	 *
-- 
cgit v1.2.3


From 7f8b4e4e0988dadfd22330fd147ad2453e19f510 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 22 Jun 2009 14:34:35 +0200
Subject: perf_counter: Add scale information to the mmap control page

Add the needed time scale to the self-profile mmap information.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 4 +++-
 kernel/perf_counter.c        | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 489d5cbfbcca..bcbf1c43ed42 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -232,12 +232,14 @@ struct perf_counter_mmap_page {
 	__u32	lock;			/* seqlock for synchronization */
 	__u32	index;			/* hardware counter identifier */
 	__s64	offset;			/* add to hardware counter value */
+	__u64	time_enabled;		/* time counter active */
+	__u64	time_running;		/* time counter on cpu */
 
 		/*
 		 * Hole for extension of the self monitor capabilities
 		 */
 
-	__u64	__reserved[125];	/* align to 1k */
+	__u64	__reserved[123];	/* align to 1k */
 
 	/*
 	 * Control data for the mmap() data buffer.
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index c2b19c111718..23614adab475 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1782,6 +1782,12 @@ void perf_counter_update_userpage(struct perf_counter *counter)
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
 		userpg->offset -= atomic64_read(&counter->hw.prev_count);
 
+	userpg->time_enabled = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+
+	userpg->time_running = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
+
 	barrier();
 	++userpg->lock;
 	preempt_enable();
-- 
cgit v1.2.3


From 38b200d67636a30cb8dc1508137908e7a649b5c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 23 Jun 2009 20:13:11 +0200
Subject: perf_counter: Add PERF_EVENT_READ

Provide a read() like event which can be used to log the
counter value at specific sites such as child->parent
folding on exit.

In order to be useful, we log the counter parent ID, not the
actual counter ID, since userspace can only relate parent
IDs to perf_counter_attr constructs.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 12 ++++++++
 kernel/perf_counter.c        | 72 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 80 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index bcbf1c43ed42..6a384f04755a 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -334,6 +334,18 @@ enum perf_event_type {
 	 */
 	PERF_EVENT_FORK			= 7,
 
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, tid;
+	 * 	u64				value;
+	 * 	{ u64		time_enabled; 	} && PERF_FORMAT_ENABLED
+	 * 	{ u64		time_running; 	} && PERF_FORMAT_RUNNING
+	 * 	{ u64		parent_id;	} && PERF_FORMAT_ID
+	 * };
+	 */
+	PERF_EVENT_READ			= 8,
+
 	/*
 	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
 	 * will be PERF_SAMPLE_*
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 02994a719e27..a72c20e91953 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2623,6 +2623,66 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 	perf_output_end(&handle);
 }
 
+/*
+ * read event
+ */
+
+struct perf_read_event {
+	struct perf_event_header	header;
+
+	u32				pid;
+	u32				tid;
+	u64				value;
+	u64				format[3];
+};
+
+static void
+perf_counter_read_event(struct perf_counter *counter,
+			struct task_struct *task)
+{
+	struct perf_output_handle handle;
+	struct perf_read_event event = {
+		.header = {
+			.type = PERF_EVENT_READ,
+			.misc = 0,
+			.size = sizeof(event) - sizeof(event.format),
+		},
+		.pid = perf_counter_pid(counter, task),
+		.tid = perf_counter_tid(counter, task),
+		.value = atomic64_read(&counter->count),
+	};
+	int ret, i = 0;
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		event.header.size += sizeof(u64);
+		event.format[i++] = counter->total_time_enabled;
+	}
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		event.header.size += sizeof(u64);
+		event.format[i++] = counter->total_time_running;
+	}
+
+	if (counter->attr.read_format & PERF_FORMAT_ID) {
+		u64 id;
+
+		event.header.size += sizeof(u64);
+		if (counter->parent)
+			id = counter->parent->id;
+		else
+			id = counter->id;
+
+		event.format[i++] = id;
+	}
+
+	ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
+	if (ret)
+		return;
+
+	perf_output_copy(&handle, &event, event.header.size);
+	perf_output_end(&handle);
+}
+
 /*
  * fork tracking
  */
@@ -3985,10 +4045,13 @@ static int inherit_group(struct perf_counter *parent_counter,
 }
 
 static void sync_child_counter(struct perf_counter *child_counter,
-			       struct perf_counter *parent_counter)
+			       struct task_struct *child)
 {
+	struct perf_counter *parent_counter = child_counter->parent;
 	u64 child_val;
 
+	perf_counter_read_event(child_counter, child);
+
 	child_val = atomic64_read(&child_counter->count);
 
 	/*
@@ -4017,7 +4080,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
 
 static void
 __perf_counter_exit_task(struct perf_counter *child_counter,
-			 struct perf_counter_context *child_ctx)
+			 struct perf_counter_context *child_ctx,
+			 struct task_struct *child)
 {
 	struct perf_counter *parent_counter;
 
@@ -4031,7 +4095,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter,
 	 * counters need to be zapped - but otherwise linger.
 	 */
 	if (parent_counter) {
-		sync_child_counter(child_counter, parent_counter);
+		sync_child_counter(child_counter, child);
 		free_counter(child_counter);
 	}
 }
@@ -4093,7 +4157,7 @@ void perf_counter_exit_task(struct task_struct *child)
 again:
 	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
 				 list_entry)
-		__perf_counter_exit_task(child_counter, child_ctx);
+		__perf_counter_exit_task(child_counter, child_ctx, child);
 
 	/*
 	 * If the last counter was a group counter, it will have appended all
-- 
cgit v1.2.3


From bfbd3381e63aa2a14c6706afb50ce4630aa0d9a2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 24 Jun 2009 21:11:59 +0200
Subject: perf_counter: Implement more accurate per task statistics

With the introduction of PERF_EVENT_READ we have the
possibility to provide accurate counter values for
individual tasks in a task hierarchy.

However, due to the lazy context switching used for similar
counter contexts our current per task counts are way off.

In order to maintain some of the lazy switch benefits we
don't disable it out-right, but simply iterate the active
counters and flip the values between the contexts.

This only reads the counters but does not need to reprogram
the full PMU.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 ++-
 kernel/perf_counter.c        | 83 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6a384f04755a..de70a10b5ec8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -178,8 +178,9 @@ struct perf_counter_attr {
 				mmap           :  1, /* include mmap data     */
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
 
-				__reserved_1   : 53;
+				__reserved_1   : 52;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -602,6 +603,7 @@ struct perf_counter_context {
 	int				nr_counters;
 	int				nr_active;
 	int				is_active;
+	int				nr_stat;
 	atomic_t			refcount;
 	struct task_struct		*task;
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a72c20e91953..385ca51c6e60 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 
 	list_add_rcu(&counter->event_entry, &ctx->event_list);
 	ctx->nr_counters++;
+	if (counter->attr.inherit_stat)
+		ctx->nr_stat++;
 }
 
 /*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	if (list_empty(&counter->list_entry))
 		return;
 	ctx->nr_counters--;
+	if (counter->attr.inherit_stat)
+		ctx->nr_stat--;
 
 	list_del_init(&counter->list_entry);
 	list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,76 @@ static int context_equiv(struct perf_counter_context *ctx1,
 		&& !ctx1->pin_count && !ctx2->pin_count;
 }
 
+static void __perf_counter_read(void *counter);
+
+static void __perf_counter_sync_stat(struct perf_counter *counter,
+				     struct perf_counter *next_counter)
+{
+	u64 value;
+
+	if (!counter->attr.inherit_stat)
+		return;
+
+	/*
+	 * Update the counter value, we cannot use perf_counter_read()
+	 * because we're in the middle of a context switch and have IRQs
+	 * disabled, which upsets smp_call_function_single(), however
+	 * we know the counter must be on the current CPU, therefore we
+	 * don't need to use it.
+	 */
+	switch (counter->state) {
+	case PERF_COUNTER_STATE_ACTIVE:
+		__perf_counter_read(counter);
+		break;
+
+	case PERF_COUNTER_STATE_INACTIVE:
+		update_counter_times(counter);
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * In order to keep per-task stats reliable we need to flip the counter
+	 * values when we flip the contexts.
+	 */
+	value = atomic64_read(&next_counter->count);
+	value = atomic64_xchg(&counter->count, value);
+	atomic64_set(&next_counter->count, value);
+
+	/*
+	 * XXX also sync time_enabled and time_running ?
+	 */
+}
+
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
+static void perf_counter_sync_stat(struct perf_counter_context *ctx,
+				   struct perf_counter_context *next_ctx)
+{
+	struct perf_counter *counter, *next_counter;
+
+	if (!ctx->nr_stat)
+		return;
+
+	counter = list_first_entry(&ctx->event_list,
+				   struct perf_counter, event_entry);
+
+	next_counter = list_first_entry(&next_ctx->event_list,
+					struct perf_counter, event_entry);
+
+	while (&counter->event_entry != &ctx->event_list &&
+	       &next_counter->event_entry != &next_ctx->event_list) {
+
+		__perf_counter_sync_stat(counter, next_counter);
+
+		counter = list_next_entry(counter, event_entry);
+		next_counter = list_next_entry(counter, event_entry);
+	}
+}
+
 /*
  * Called from scheduler to remove the counters of the current task,
  * with interrupts disabled.
@@ -1061,6 +1135,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
 			ctx->task = next;
 			next_ctx->task = task;
 			do_switch = 0;
+
+			perf_counter_sync_stat(ctx, next_ctx);
 		}
 		spin_unlock(&next_ctx->lock);
 		spin_unlock(&ctx->lock);
@@ -1350,7 +1426,7 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 /*
  * Cross CPU call to read the hardware counter
  */
-static void __read(void *info)
+static void __perf_counter_read(void *info)
 {
 	struct perf_counter *counter = info;
 	struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1448,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
 	 */
 	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
 		smp_call_function_single(counter->oncpu,
-					 __read, counter, 1);
+					 __perf_counter_read, counter, 1);
 	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
 		update_counter_times(counter);
 	}
@@ -4050,7 +4126,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
 	struct perf_counter *parent_counter = child_counter->parent;
 	u64 child_val;
 
-	perf_counter_read_event(child_counter, child);
+	if (child_counter->attr.inherit_stat)
+		perf_counter_read_event(child_counter, child);
 
 	child_val = atomic64_read(&child_counter->count);
 
-- 
cgit v1.2.3


From e6e18ec79b023d5fe84226cef533cf0e3770ce93 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Jun 2009 11:27:12 +0200
Subject: perf_counter: Rework the sample ABI

The PERF_EVENT_READ implementation made me realize we don't
actually need the sample_type int the output sample, since
we already have that in the perf_counter_attr information.

Therefore, remove the PERF_EVENT_MISC_OVERFLOW bit and the
event->type overloading, and imply put counter overflow
samples in a PERF_EVENT_SAMPLE type.

This also fixes the issue that event->type was only 32-bit
and sample_type had 64 usable bits.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h  | 10 +++++-----
 kernel/perf_counter.c         | 36 +++++++++++++++---------------------
 tools/perf/builtin-annotate.c |  8 ++++----
 tools/perf/builtin-report.c   | 32 +++++++++++++++++++-------------
 tools/perf/builtin-top.c      | 11 ++++++-----
 5 files changed, 49 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index de70a10b5ec8..3078e23c91eb 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -262,7 +262,6 @@ struct perf_counter_mmap_page {
 #define PERF_EVENT_MISC_KERNEL			(1 << 0)
 #define PERF_EVENT_MISC_USER			(2 << 0)
 #define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-#define PERF_EVENT_MISC_OVERFLOW		(1 << 2)
 
 struct perf_event_header {
 	__u32	type;
@@ -348,9 +347,6 @@ enum perf_event_type {
 	PERF_EVENT_READ			= 8,
 
 	/*
-	 * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
-	 * will be PERF_SAMPLE_*
-	 *
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *
@@ -358,8 +354,9 @@ enum perf_event_type {
 	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
 	 *	{ u64			time;     } && PERF_SAMPLE_TIME
 	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			config;   } && PERF_SAMPLE_CONFIG
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
 	 *	{ u64			nr;
 	 *	  { u64 id, val; }	cnt[nr];  } && PERF_SAMPLE_GROUP
@@ -368,6 +365,9 @@ enum perf_event_type {
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 * };
 	 */
+	PERF_EVENT_SAMPLE		= 9,
+
+	PERF_EVENT_MAX,			/* non-ABI */
 };
 
 enum perf_callchain_context {
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 385ca51c6e60..f2f232696587 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2575,15 +2575,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		u32 cpu, reserved;
 	} cpu_entry;
 
-	header.type = 0;
+	header.type = PERF_EVENT_SAMPLE;
 	header.size = sizeof(header);
 
-	header.misc = PERF_EVENT_MISC_OVERFLOW;
+	header.misc = 0;
 	header.misc |= perf_misc_flags(data->regs);
 
 	if (sample_type & PERF_SAMPLE_IP) {
 		ip = perf_instruction_pointer(data->regs);
-		header.type |= PERF_SAMPLE_IP;
 		header.size += sizeof(ip);
 	}
 
@@ -2592,7 +2591,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		tid_entry.pid = perf_counter_pid(counter, current);
 		tid_entry.tid = perf_counter_tid(counter, current);
 
-		header.type |= PERF_SAMPLE_TID;
 		header.size += sizeof(tid_entry);
 	}
 
@@ -2602,34 +2600,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		 */
 		time = sched_clock();
 
-		header.type |= PERF_SAMPLE_TIME;
 		header.size += sizeof(u64);
 	}
 
-	if (sample_type & PERF_SAMPLE_ADDR) {
-		header.type |= PERF_SAMPLE_ADDR;
+	if (sample_type & PERF_SAMPLE_ADDR)
 		header.size += sizeof(u64);
-	}
 
-	if (sample_type & PERF_SAMPLE_ID) {
-		header.type |= PERF_SAMPLE_ID;
+	if (sample_type & PERF_SAMPLE_ID)
 		header.size += sizeof(u64);
-	}
 
 	if (sample_type & PERF_SAMPLE_CPU) {
-		header.type |= PERF_SAMPLE_CPU;
 		header.size += sizeof(cpu_entry);
 
 		cpu_entry.cpu = raw_smp_processor_id();
 	}
 
-	if (sample_type & PERF_SAMPLE_PERIOD) {
-		header.type |= PERF_SAMPLE_PERIOD;
+	if (sample_type & PERF_SAMPLE_PERIOD)
 		header.size += sizeof(u64);
-	}
 
 	if (sample_type & PERF_SAMPLE_GROUP) {
-		header.type |= PERF_SAMPLE_GROUP;
 		header.size += sizeof(u64) +
 			counter->nr_siblings * sizeof(group_entry);
 	}
@@ -2639,10 +2628,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 
 		if (callchain) {
 			callchain_size = (1 + callchain->nr) * sizeof(u64);
-
-			header.type |= PERF_SAMPLE_CALLCHAIN;
 			header.size += callchain_size;
-		}
+		} else
+			header.size += sizeof(u64);
 	}
 
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2693,8 +2681,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		}
 	}
 
-	if (callchain)
-		perf_output_copy(&handle, callchain, callchain_size);
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (callchain)
+			perf_output_copy(&handle, callchain, callchain_size);
+		else {
+			u64 nr = 0;
+			perf_output_put(&handle, nr);
+		}
+	}
 
 	perf_output_end(&handle);
 }
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 7e58e3ad1508..722c0f54e549 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -855,7 +855,7 @@ static unsigned long total = 0,
 		     total_unknown = 0;
 
 static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	char level;
 	int show = 0;
@@ -1013,10 +1013,10 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
 	switch (event->header.type) {
+	case PERF_EVENT_SAMPLE:
+		return process_sample_event(event, offset, head);
+
 	case PERF_EVENT_MMAP:
 		return process_mmap_event(event, offset, head);
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index e575f3039766..ec5361c67bf5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,8 @@ static regex_t		parent_regex;
 
 static int		exclude_other = 1;
 
+static u64		sample_type;
+
 struct ip_event {
 	struct perf_event_header header;
 	u64 ip;
@@ -1135,7 +1137,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event)
 }
 
 static int
-process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	char level;
 	int show = 0;
@@ -1147,12 +1149,12 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 	void *more_data = event->ip.__more_data;
 	struct ip_callchain *chain = NULL;
 
-	if (event->header.type & PERF_SAMPLE_PERIOD) {
+	if (sample_type & PERF_SAMPLE_PERIOD) {
 		period = *(u64 *)more_data;
 		more_data += sizeof(u64);
 	}
 
-	dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n",
+	dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1160,7 +1162,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head)
 		(void *)(long)ip,
 		(long long)period);
 
-	if (event->header.type & PERF_SAMPLE_CALLCHAIN) {
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		int i;
 
 		chain = (void *)more_data;
@@ -1352,10 +1354,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	trace_event(event);
 
-	if (event->header.misc & PERF_EVENT_MISC_OVERFLOW)
-		return process_overflow_event(event, offset, head);
-
 	switch (event->header.type) {
+	case PERF_EVENT_SAMPLE:
+		return process_sample_event(event, offset, head);
+
 	case PERF_EVENT_MMAP:
 		return process_mmap_event(event, offset, head);
 
@@ -1388,18 +1390,21 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 static struct perf_header	*header;
 
-static int perf_header__has_sample(u64 sample_mask)
+static u64 perf_header__sample_type(void)
 {
+	u64 sample_type = 0;
 	int i;
 
 	for (i = 0; i < header->attrs; i++) {
 		struct perf_header_attr *attr = header->attr[i];
 
-		if (!(attr->attr.sample_type & sample_mask))
-			return 0;
+		if (!sample_type)
+			sample_type = attr->attr.sample_type;
+		else if (sample_type != attr->attr.sample_type)
+			die("non matching sample_type");
 	}
 
-	return 1;
+	return sample_type;
 }
 
 static int __cmd_report(void)
@@ -1437,8 +1442,9 @@ static int __cmd_report(void)
 	header = perf_header__read(input);
 	head = header->data_offset;
 
-	if (sort__has_parent &&
-	    !perf_header__has_sample(PERF_SAMPLE_CALLCHAIN)) {
+	sample_type = perf_header__sample_type();
+
+	if (sort__has_parent && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
 		fprintf(stderr, "selected --sort parent, but no callchain data\n");
 		exit(-1);
 	}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5352b5e352ed..cf0d21f1ae10 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -392,11 +392,11 @@ static void record_ip(u64 ip, int counter)
 	samples--;
 }
 
-static void process_event(u64 ip, int counter)
+static void process_event(u64 ip, int counter, int user)
 {
 	samples++;
 
-	if (ip < min_ip || ip > max_ip) {
+	if (user) {
 		userspace_samples++;
 		return;
 	}
@@ -509,9 +509,10 @@ static void mmap_read_counter(struct mmap_data *md)
 
 		old += size;
 
-		if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) {
-			if (event->header.type & PERF_SAMPLE_IP)
-				process_event(event->ip.ip, md->counter);
+		if (event->header.type == PERF_EVENT_SAMPLE) {
+			int user =
+	(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+			process_event(event->ip.ip, md->counter, user);
 		}
 	}
 
-- 
cgit v1.2.3


From 5211a242d0cbdded372aee59da18f80552b0a80a Mon Sep 17 00:00:00 2001
From: Kurt Garloff <garloff@suse.de>
Date: Wed, 24 Jun 2009 14:32:11 -0700
Subject: x86: Add sysctl to allow panic on IOCK NMI error

This patch introduces a new sysctl:

    /proc/sys/kernel/panic_on_io_nmi

which defaults to 0 (off).

When enabled, the kernel panics when the kernel receives an NMI
caused by an IO error.

The IO error triggered NMI indicates a serious system
condition, which could result in IO data corruption. Rather
than contiuing, panicing and dumping might be a better choice,
so one can figure out what's causing the IO error.

This could be especially important to companies running IO
intensive applications where corruption must be avoided, e.g. a
bank's databases.

[ SuSE has been shipping it for a while, it was done at the
  request of a large database vendor, for their users. ]

Signed-off-by: Kurt Garloff <garloff@suse.de>
Signed-off-by: Roberto Angelino <robertangelino@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
LKML-Reference: <20090624213211.GA11291@kroah.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack.c | 1 +
 arch/x86/kernel/traps.c     | 3 +++
 include/linux/kernel.h      | 1 +
 kernel/sysctl.c             | 8 ++++++++
 4 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 95ea5fa7d444..c8405718a4c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -22,6 +22,7 @@
 #include "dumpstack.h"
 
 int panic_on_unrecovered_nmi;
+int panic_on_io_nmi;
 unsigned int code_bytes = 64;
 int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
 static int die_counter;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a0f48f5671c0..5204332f475d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -346,6 +346,9 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
 	show_registers(regs);
 
+	if (panic_on_io_nmi)
+		panic("NMI IOCK error: Not continuing");
+
 	/* Re-enable the IOCK line, wait for a few seconds */
 	reason = (reason & 0xf) | 8;
 	outb(reason, 0x61);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fac104e7186a..d6320a3e8def 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -303,6 +303,7 @@ extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in
 extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
+extern int panic_on_io_nmi;
 extern const char *print_tainted(void);
 extern void add_taint(unsigned flag);
 extern int test_taint(unsigned flag);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62e4ff9968b5..fba42eda8de2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -743,6 +743,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "panic_on_io_nmi",
+		.data		= &panic_on_io_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= KERN_BOOTLOADER_TYPE,
 		.procname	= "bootloader_type",
-- 
cgit v1.2.3


From 5e955245d6cf49c5ed26c7add7392ff5a6762bf4 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 23 Jun 2009 11:27:27 +0000
Subject: ide: always kill the whole request on error

* Use blk_rq_bytes() instead of obsolete ide_rq_bytes() in ide_kill_rq()
  and ide_floppy_do_request() for failed requests.
  [ bugfix part ]

* Use blk_rq_bytes() instead of obsolete ide_rq_bytes() in ide_do_devset()
  and ide_complete_drive_reset().  Then remove ide_rq_bytes().
  [ cleanup part ]

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-devsets.c |  2 +-
 drivers/ide/ide-eh.c      |  2 +-
 drivers/ide/ide-floppy.c  |  2 +-
 drivers/ide/ide-io.c      | 14 ++------------
 include/linux/ide.h       |  1 -
 5 files changed, 5 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-devsets.c b/drivers/ide/ide-devsets.c
index 5bf958e5b1d5..1099bf7cf968 100644
--- a/drivers/ide/ide-devsets.c
+++ b/drivers/ide/ide-devsets.c
@@ -183,6 +183,6 @@ ide_startstop_t ide_do_devset(ide_drive_t *drive, struct request *rq)
 	err = setfunc(drive, *(int *)&rq->cmd[1]);
 	if (err)
 		rq->errors = err;
-	ide_complete_rq(drive, err, ide_rq_bytes(rq));
+	ide_complete_rq(drive, err, blk_rq_bytes(rq));
 	return ide_stopped;
 }
diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index 2b9141979613..e9abf2c3c335 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c
@@ -149,7 +149,7 @@ static inline void ide_complete_drive_reset(ide_drive_t *drive, int err)
 	if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && rq->errors == 0)
 			rq->errors = -EIO;
-		ide_complete_rq(drive, err ? err : 0, ide_rq_bytes(rq));
+		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));
 	}
 }
 
diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 8b3f204f7d73..fefbdfc8db06 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c
@@ -293,7 +293,7 @@ out_end:
 	drive->failed_pc = NULL;
 	if (blk_fs_request(rq) == 0 && rq->errors == 0)
 		rq->errors = -EIO;
-	ide_complete_rq(drive, -EIO, ide_rq_bytes(rq));
+	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
 	return ide_stopped;
 }
 
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 95db5f03f6a2..d5f3c77beadd 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -112,16 +112,6 @@ void ide_complete_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat, u8 err)
 	}
 }
 
-/* obsolete, blk_rq_bytes() should be used instead */
-unsigned int ide_rq_bytes(struct request *rq)
-{
-	if (blk_pc_request(rq))
-		return blk_rq_bytes(rq);
-	else
-		return blk_rq_cur_sectors(rq) << 9;
-}
-EXPORT_SYMBOL_GPL(ide_rq_bytes);
-
 int ide_complete_rq(ide_drive_t *drive, int error, unsigned int nr_bytes)
 {
 	ide_hwif_t *hwif = drive->hwif;
@@ -152,14 +142,14 @@ void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 
 	if ((media == ide_floppy || media == ide_tape) && drv_req) {
 		rq->errors = 0;
-		ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
 	} else {
 		if (media == ide_tape)
 			rq->errors = IDE_DRV_ERROR_GENERAL;
 		else if (blk_fs_request(rq) == 0 && rq->errors == 0)
 			rq->errors = -EIO;
-		ide_complete_rq(drive, -EIO, ide_rq_bytes(rq));
 	}
+
+	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
 }
 
 static void ide_tf_set_specify_cmd(ide_drive_t *drive, struct ide_taskfile *tf)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index cf1f3888067c..c6af7c44d46c 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1062,7 +1062,6 @@ int generic_ide_ioctl(ide_drive_t *, struct block_device *, unsigned, unsigned l
 extern int ide_vlb_clk;
 extern int ide_pci_clk;
 
-unsigned int ide_rq_bytes(struct request *);
 int ide_end_rq(ide_drive_t *, struct request *, int, unsigned int);
 void ide_kill_rq(ide_drive_t *, struct request *);
 
-- 
cgit v1.2.3


From 38b7f49a0654cb52cac61c6455807248eee3059d Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Fri, 26 Jun 2009 10:48:34 +0200
Subject: HID: fix debugfs build with !CONFIG_DEBUG_FS

Fix the debug function prototypes to be correct even in the
!CONFIG_DEBUG_FS case.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid-debug.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/hid-debug.h b/include/linux/hid-debug.h
index ec08ac1ad687..53744fa1c8b7 100644
--- a/include/linux/hid-debug.h
+++ b/include/linux/hid-debug.h
@@ -22,6 +22,8 @@
  *
  */
 
+#define HID_DEBUG_BUFSIZE 512
+
 #ifdef CONFIG_DEBUG_FS
 
 void hid_dump_input(struct hid_device *, struct hid_usage *, __s32);
@@ -34,7 +36,6 @@ void hid_debug_init(void);
 void hid_debug_exit(void);
 void hid_debug_event(struct hid_device *, char *);
 
-#define HID_DEBUG_BUFSIZE 512
 
 struct hid_debug_list {
 	char *hid_debug_buf;
@@ -48,11 +49,10 @@ struct hid_debug_list {
 
 #else
 
-#define hid_dump_input(a,b)		do { } while (0)
-#define hid_dump_device(c)		do { } while (0)
-#define hid_dump_field(a,b)		do { } while (0)
-#define hid_resolv_usage(a)		do { } while (0)
-#define hid_resolv_event(a,b)		do { } while (0)
+#define hid_dump_input(a,b,c)		do { } while (0)
+#define hid_dump_device(a,b)		do { } while (0)
+#define hid_dump_field(a,b,c)		do { } while (0)
+#define hid_resolv_usage(a,b)		do { } while (0)
 #define hid_debug_register(a, b)	do { } while (0)
 #define hid_debug_unregister(a)		do { } while (0)
 #define hid_debug_init()		do { } while (0)
-- 
cgit v1.2.3


From 73f1d9391a6aa72efdcea2f302ee7bfcd313c631 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 24 Jun 2009 01:04:36 +0900
Subject: asm-generic/vmlinux.lds.h: Fix up RW_DATA_SECTION definition.

RW_DATA_SECTION is defined to take 4 different alignment parameters,
while NOSAVE_DATA currently uses a fixed PAGE_SIZE alignment as noted
in the comments.

There are presently no in-tree users of this at present, and I just
stumbled across this while implementing the simplified script on a new
architecture port, which subsequently resulted in a syntax error.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/asm-generic/vmlinux.lds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 92b73b6140ff..f92e730695c8 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -704,7 +704,7 @@
  * matches the requirment of PAGE_ALIGNED_DATA.
  *
  * use 0 as page_align if page_aligned data is not used */
-#define RW_DATA_SECTION(cacheline, nosave, pagealigned, inittask)	\
+#define RW_DATA_SECTION(cacheline, pagealigned, inittask)		\
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
 		INIT_TASK(inittask)					\
@@ -712,7 +712,7 @@
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
 		CONSTRUCTORS						\
-		NOSAVE_DATA(nosave)					\
+		NOSAVE_DATA						\
 		PAGE_ALIGNED_DATA(pagealigned)				\
 	}
 
-- 
cgit v1.2.3


From d2af12aeadaedf657c9fb9c3df984d2c5ab25f4c Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Tue, 23 Jun 2009 19:59:35 -0400
Subject: Add new macros for page-aligned data and bss sections.

This patch is preparation for replacing most uses of
".bss.page_aligned" and ".data.page_aligned" in the kernel with
macros, so that the section name can later be changed without having
to touch a lot of the kernel.

The long-term goal here is to be able to change the kernel's magic
section names to those that are compatible with -ffunction-sections
-fdata-sections.  This requires renaming all magic sections with names
of the form ".data.foo".

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/linkage.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index fee9e59649c1..691f59171c6c 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -21,6 +21,15 @@
 #define __page_aligned_data	__section(.data.page_aligned) __aligned(PAGE_SIZE)
 #define __page_aligned_bss	__section(.bss.page_aligned) __aligned(PAGE_SIZE)
 
+/*
+ * For assembly routines.
+ *
+ * Note when using these that you must specify the appropriate
+ * alignment directives yourself
+ */
+#define __PAGE_ALIGNED_DATA	.section ".data.page_aligned", "aw"
+#define __PAGE_ALIGNED_BSS	.section ".bss.page_aligned", "aw"
+
 /*
  * This is used by architectures to keep arguments on the stack
  * untouched by the compiler by keeping them live until the end.
-- 
cgit v1.2.3


From 39a449d96ac3db9b6d498b6ffbf4c763746d5e8b Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Tue, 23 Jun 2009 18:53:15 -0400
Subject: asm-generic/vmlinux.lds.h: shuffle INIT_TASK* macro names in
 vmlinux.lds.h

We recently added a INIT_TASK(align) in include/asm-generic/vmlinux.lds.h,
but there is already a macro INIT_TASK in include/linux/init_task.h, which
is quite confusing.  We should switch the macro in the linker script to
INIT_TASK_DATA. (Sorry that I missed this in reviewing the patch).  Since
the macros are new, there is only one user of the INIT_TASK in
vmlinux.lds.h, arch/mn10300/kernel/vmlinux.lds.S.

However, we are currently using INIT_TASK_DATA for laying down an entire
.data.init_task section.  So rename that to INIT_TASK_DATA_SECTION.

I would be worried about changing the meaning of INIT_TASK_DATA, but the
old INIT_TASK_DATA implementation had no users, and in fact if anyone had
tried to use it, it would have failed to compile because it didn't pass
the alignment to the old INIT_TASK.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Jesper Nilsson <Jesper.Nilsson@axis.com
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/mn10300/kernel/vmlinux.lds.S | 2 +-
 include/asm-generic/vmlinux.lds.h | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index bcebcefb4ad7..c96ba3da95ac 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -61,7 +61,7 @@ SECTIONS
 	_edata = .;		/* End of data section */
   }
 
-  .data.init_task : { INIT_TASK(THREAD_SIZE); }
+  .data.init_task : { INIT_TASK_DATA(THREAD_SIZE); }
 
   /* might get freed after init */
   . = ALIGN(PAGE_SIZE);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index f92e730695c8..720af4c72206 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -191,7 +191,7 @@
 	. = ALIGN(align);						\
 	*(.data.cacheline_aligned)
 
-#define INIT_TASK(align)						\
+#define INIT_TASK_DATA(align)						\
 	. = ALIGN(align);						\
 	*(.data.init_task)
 
@@ -434,10 +434,10 @@
 /*
  * Init task
  */
-#define INIT_TASK_DATA(align)						\
+#define INIT_TASK_DATA_SECTION(align)					\
 	. = ALIGN(align);						\
 	.data.init_task : {						\
-		INIT_TASK						\
+		INIT_TASK_DATA(align)					\
 	}
 
 #ifdef CONFIG_CONSTRUCTORS
@@ -707,7 +707,7 @@
 #define RW_DATA_SECTION(cacheline, pagealigned, inittask)		\
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
-		INIT_TASK(inittask)					\
+		INIT_TASK_DATA(inittask)				\
 		CACHELINE_ALIGNED_DATA(cacheline)			\
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
-- 
cgit v1.2.3


From 857eceebd2803c9a3459f784acf45e5266921e4d Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Tue, 23 Jun 2009 19:59:36 -0400
Subject: Add new __init_task_data macro to be used in arch init_task.c files.

This patch is preparation for replacing most ".data.init_task" in the
kernel with macros, so that the section name can later be changed
without having to touch a lot of the kernel.

The long-term goal here is to be able to change the kernel's magic
section names to those that are compatible with -ffunction-sections
-fdata-sections.  This requires renaming all magic sections with names
of the form ".data.foo".

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/init_task.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 5368fbdc7801..7fc01b13be43 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -183,5 +183,8 @@ extern struct cred init_cred;
 	LIST_HEAD_INIT(cpu_timers[2]),					\
 }
 
+/* Attach to the init_task data structure for proper alignment */
+#define __init_task_data __attribute__((__section__(".data.init_task")))
+
 
 #endif
-- 
cgit v1.2.3


From 84261923d3dddb766736023bead6fa07b7e218d5 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 17 Jun 2009 10:53:47 -0300
Subject: KVM: protect concurrent make_all_cpus_request

make_all_cpus_request contains a race condition which can
trigger false request completed status, as follows:

CPU0                                              CPU1

if (test_and_set_bit(req,&vcpu->requests))
   ....                                        	   if (test_and_set_bit(req,&vcpu->requests))
   ..                                                  return
proceed to smp_call_function_many(wait=1)

Use a spinlock to serialize concurrent CPUs.

Cc: stable@kernel.org
Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c      | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aacc5449f586..16713dc672e4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -125,6 +125,7 @@ struct kvm_kernel_irq_routing_entry {
 struct kvm {
 	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
+	spinlock_t requests_lock;
 	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	int nmemslots;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 013a5b3e9f75..2884baf1d5f9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -746,6 +746,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		cpumask_clear(cpus);
 
 	me = get_cpu();
+	spin_lock(&kvm->requests_lock);
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
 		vcpu = kvm->vcpus[i];
 		if (!vcpu)
@@ -762,6 +763,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 		smp_call_function_many(cpus, ack_flush, NULL, 1);
 	else
 		called = false;
+	spin_unlock(&kvm->requests_lock);
 	put_cpu();
 	free_cpumask_var(cpus);
 	return called;
@@ -982,6 +984,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm->mm = current->mm;
 	atomic_inc(&kvm->mm->mm_count);
 	spin_lock_init(&kvm->mmu_lock);
+	spin_lock_init(&kvm->requests_lock);
 	kvm_io_bus_init(&kvm->pio_bus);
 	mutex_init(&kvm->lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
-- 
cgit v1.2.3


From 94e5d714f604d4cb4cb13163f01ede278e69258b Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 26 Jun 2009 14:05:27 -0400
Subject: integrity: add ima_counts_put (updated)

This patch fixes an imbalance message as reported by J.R. Okajima.
The IMA file counters are incremented in ima_path_check. If the
actual open fails, such as ETXTBSY, decrement the counters to
prevent unnecessary imbalance messages.

Reported-by: J.R. Okajima <hooanon05@yahoo.co.jp>
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namei.c                        |  7 +++++++
 include/linux/ima.h               |  6 ++++++
 security/integrity/ima/ima_main.c | 29 ++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 5b961eb71cbf..f3c5b278895a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1761,6 +1761,10 @@ do_last:
 			goto exit;
 		}
 		filp = nameidata_to_filp(&nd, open_flag);
+		if (IS_ERR(filp))
+			ima_counts_put(&nd.path,
+				       acc_mode & (MAY_READ | MAY_WRITE |
+						   MAY_EXEC));
 		mnt_drop_write(nd.path.mnt);
 		if (nd.root.mnt)
 			path_put(&nd.root);
@@ -1817,6 +1821,9 @@ ok:
 		goto exit;
 	}
 	filp = nameidata_to_filp(&nd, open_flag);
+	if (IS_ERR(filp))
+		ima_counts_put(&nd.path,
+			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
 	/*
 	 * It is now safe to drop the mnt write
 	 * because the filp has had a write taken
diff --git a/include/linux/ima.h b/include/linux/ima.h
index b1b827d091a9..0e3f2a4c25f6 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -24,6 +24,7 @@ extern int ima_path_check(struct path *path, int mask, int update_counts);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern void ima_counts_get(struct file *file);
+extern void ima_counts_put(struct path *path, int mask);
 
 #else
 static inline int ima_bprm_check(struct linux_binprm *bprm)
@@ -60,5 +61,10 @@ static inline void ima_counts_get(struct file *file)
 {
 	return;
 }
+
+static inline void ima_counts_put(struct path *path, int mask)
+{
+	return;
+}
 #endif /* CONFIG_IMA_H */
 #endif /* _LINUX_IMA_H */
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 6f611874d10e..101c512564ec 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -238,7 +238,34 @@ out:
 }
 
 /*
- * ima_opens_get - increment file counts
+ * ima_counts_put - decrement file counts
+ *
+ * File counts are incremented in ima_path_check. On file open
+ * error, such as ETXTBSY, decrement the counts to prevent
+ * unnecessary imbalance messages.
+ */
+void ima_counts_put(struct path *path, int mask)
+{
+	struct inode *inode = path->dentry->d_inode;
+	struct ima_iint_cache *iint;
+
+	if (!ima_initialized || !S_ISREG(inode->i_mode))
+		return;
+	iint = ima_iint_find_insert_get(inode);
+	if (!iint)
+		return;
+
+	mutex_lock(&iint->mutex);
+	iint->opencount--;
+	if ((mask & MAY_WRITE) || (mask == 0))
+		iint->writecount--;
+	else if (mask & (MAY_READ | MAY_EXEC))
+		iint->readcount--;
+	mutex_unlock(&iint->mutex);
+}
+
+/*
+ * ima_counts_get - increment file counts
  *
  * - for IPC shm and shmat file.
  * - for nfsd exported files.
-- 
cgit v1.2.3


From bab7614d6d1b1fc96ec6c5a7ca34c8641060e659 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Mon, 29 Jun 2009 00:20:52 -0700
Subject: Input: add support for generic GPIO-based matrix keypad

Original patch by Marek Vasut, modified by Eric in:

1. use delayed work to simplify the debouncing
2. combine col_polarity/row_polarity into a single active_low field
3. use a generic bit array based XOR algorithm to detect key
   press/release, which should make the column assertion time
   shorter and code a bit cleaner
4. remove the ALT_FN handling, which is no way generic, the ALT_FN
   key should be treated as no different from other keys, and
   translation will be done by user space by commands like 'loadkeys'.
5. explicitly disable row IRQs and flush potential pending work,
   and schedule an immediate scan after resuming as suggested
   by Uli Luckas
6. incorporate review comments from many others

Patch tested on Littleton/PXA310 (though PXA310 has a dedicate keypad
controller, I have to configure those pins as generic GPIO to use this
driver, works quite well, though), and Sharp Zaurus model SL-C7x0
and SL-C1000.

[dtor@mail.ru: fix error unwinding path, support changing keymap
 from userspace]
Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Reviewed-by: Trilok Soni <soni.trilok@gmail.com>
Reviewed-by: Uli Luckas <u.luckas@road.de>
Reviewed-by: Russell King <linux@arm.linux.org.uk>
Reviewed-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Eric Miao <eric.miao@marvell.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig         |  13 +-
 drivers/input/keyboard/Makefile        |   1 +
 drivers/input/keyboard/matrix_keypad.c | 453 +++++++++++++++++++++++++++++++++
 include/linux/input/matrix_keypad.h    |  65 +++++
 4 files changed, 530 insertions(+), 2 deletions(-)
 create mode 100644 drivers/input/keyboard/matrix_keypad.c
 create mode 100644 include/linux/input/matrix_keypad.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index d2df1030675a..a6b989a9dc07 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -158,7 +158,16 @@ config KEYBOARD_GPIO
 	  with configuration data saying which GPIOs are used.
 
 	  To compile this driver as a module, choose M here: the
-	  module will be called gpio-keys.
+	  module will be called gpio_keys.
+
+config KEYBOARD_MATRIX
+	tristate "GPIO driven matrix keypad support"
+	depends on GENERIC_GPIO
+	help
+	  Enable support for GPIO driven matrix keypad.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called matrix_keypad.
 
 config KEYBOARD_HIL_OLD
 	tristate "HP HIL keyboard support (simple driver)"
@@ -254,7 +263,7 @@ config KEYBOARD_PXA27x
 	tristate "PXA27x/PXA3xx keypad support"
 	depends on PXA27x || PXA3xx
 	help
-	  Enable support for PXA27x/PXA3xx keypad controller
+	  Enable support for PXA27x/PXA3xx keypad controller.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called pxa27x_keypad.
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 632efbc18c44..b5b5eae9724f 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_KEYBOARD_LKKBD)		+= lkkbd.o
 obj-$(CONFIG_KEYBOARD_LM8323)		+= lm8323.o
 obj-$(CONFIG_KEYBOARD_LOCOMO)		+= locomokbd.o
 obj-$(CONFIG_KEYBOARD_MAPLE)		+= maple_keyb.o
+obj-$(CONFIG_KEYBOARD_MATRIX)		+= matrix_keypad.o
 obj-$(CONFIG_KEYBOARD_NEWTON)		+= newtonkbd.o
 obj-$(CONFIG_KEYBOARD_OMAP)		+= omap-keypad.o
 obj-$(CONFIG_KEYBOARD_PXA27x)		+= pxa27x_keypad.o
diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
new file mode 100644
index 000000000000..e9b2e7cb05be
--- /dev/null
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -0,0 +1,453 @@
+/*
+ *  GPIO driven matrix keyboard driver
+ *
+ *  Copyright (c) 2008 Marek Vasut <marek.vasut@gmail.com>
+ *
+ *  Based on corgikbd.c
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/input/matrix_keypad.h>
+
+struct matrix_keypad {
+	const struct matrix_keypad_platform_data *pdata;
+	struct input_dev *input_dev;
+	unsigned short *keycodes;
+
+	uint32_t last_key_state[MATRIX_MAX_COLS];
+	struct delayed_work work;
+	bool scan_pending;
+	bool stopped;
+	spinlock_t lock;
+};
+
+/*
+ * NOTE: normally the GPIO has to be put into HiZ when de-activated to cause
+ * minmal side effect when scanning other columns, here it is configured to
+ * be input, and it should work on most platforms.
+ */
+static void __activate_col(const struct matrix_keypad_platform_data *pdata,
+			   int col, bool on)
+{
+	bool level_on = !pdata->active_low;
+
+	if (on) {
+		gpio_direction_output(pdata->col_gpios[col], level_on);
+	} else {
+		gpio_set_value_cansleep(pdata->col_gpios[col], !level_on);
+		gpio_direction_input(pdata->col_gpios[col]);
+	}
+}
+
+static void activate_col(const struct matrix_keypad_platform_data *pdata,
+			 int col, bool on)
+{
+	__activate_col(pdata, col, on);
+
+	if (on && pdata->col_scan_delay_us)
+		udelay(pdata->col_scan_delay_us);
+}
+
+static void activate_all_cols(const struct matrix_keypad_platform_data *pdata,
+			      bool on)
+{
+	int col;
+
+	for (col = 0; col < pdata->num_col_gpios; col++)
+		__activate_col(pdata, col, on);
+}
+
+static bool row_asserted(const struct matrix_keypad_platform_data *pdata,
+			 int row)
+{
+	return gpio_get_value_cansleep(pdata->row_gpios[row]) ?
+			!pdata->active_low : pdata->active_low;
+}
+
+static void enable_row_irqs(struct matrix_keypad *keypad)
+{
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i;
+
+	for (i = 0; i < pdata->num_row_gpios; i++)
+		enable_irq(gpio_to_irq(pdata->row_gpios[i]));
+}
+
+static void disable_row_irqs(struct matrix_keypad *keypad)
+{
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i;
+
+	for (i = 0; i < pdata->num_row_gpios; i++)
+		disable_irq_nosync(gpio_to_irq(pdata->row_gpios[i]));
+}
+
+/*
+ * This gets the keys from keyboard and reports it to input subsystem
+ */
+static void matrix_keypad_scan(struct work_struct *work)
+{
+	struct matrix_keypad *keypad =
+		container_of(work, struct matrix_keypad, work.work);
+	struct input_dev *input_dev = keypad->input_dev;
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	uint32_t new_state[MATRIX_MAX_COLS];
+	int row, col, code;
+
+	/* de-activate all columns for scanning */
+	activate_all_cols(pdata, false);
+
+	memset(new_state, 0, sizeof(new_state));
+
+	/* assert each column and read the row status out */
+	for (col = 0; col < pdata->num_col_gpios; col++) {
+
+		activate_col(pdata, col, true);
+
+		for (row = 0; row < pdata->num_row_gpios; row++)
+			new_state[col] |=
+				row_asserted(pdata, row) ? (1 << row) : 0;
+
+		activate_col(pdata, col, false);
+	}
+
+	for (col = 0; col < pdata->num_col_gpios; col++) {
+		uint32_t bits_changed;
+
+		bits_changed = keypad->last_key_state[col] ^ new_state[col];
+		if (bits_changed == 0)
+			continue;
+
+		for (row = 0; row < pdata->num_row_gpios; row++) {
+			if ((bits_changed & (1 << row)) == 0)
+				continue;
+
+			code = (row << 4) + col;
+			input_event(input_dev, EV_MSC, MSC_SCAN, code);
+			input_report_key(input_dev,
+					 keypad->keycodes[code],
+					 new_state[col] & (1 << row));
+		}
+	}
+	input_sync(input_dev);
+
+	memcpy(keypad->last_key_state, new_state, sizeof(new_state));
+
+	activate_all_cols(pdata, true);
+
+	/* Enable IRQs again */
+	spin_lock_irq(&keypad->lock);
+	keypad->scan_pending = false;
+	enable_row_irqs(keypad);
+	spin_unlock_irq(&keypad->lock);
+}
+
+static irqreturn_t matrix_keypad_interrupt(int irq, void *id)
+{
+	struct matrix_keypad *keypad = id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&keypad->lock, flags);
+
+	/*
+	 * See if another IRQ beaten us to it and scheduled the
+	 * scan already. In that case we should not try to
+	 * disable IRQs again.
+	 */
+	if (unlikely(keypad->scan_pending || keypad->stopped))
+		goto out;
+
+	disable_row_irqs(keypad);
+	keypad->scan_pending = true;
+	schedule_delayed_work(&keypad->work,
+		msecs_to_jiffies(keypad->pdata->debounce_ms));
+
+out:
+	spin_unlock_irqrestore(&keypad->lock, flags);
+	return IRQ_HANDLED;
+}
+
+static int matrix_keypad_start(struct input_dev *dev)
+{
+	struct matrix_keypad *keypad = input_get_drvdata(dev);
+
+	keypad->stopped = false;
+	mb();
+
+	/*
+	 * Schedule an immediate key scan to capture current key state;
+	 * columns will be activated and IRQs be enabled after the scan.
+	 */
+	schedule_delayed_work(&keypad->work, 0);
+
+	return 0;
+}
+
+static void matrix_keypad_stop(struct input_dev *dev)
+{
+	struct matrix_keypad *keypad = input_get_drvdata(dev);
+
+	keypad->stopped = true;
+	mb();
+	flush_work(&keypad->work.work);
+	/*
+	 * matrix_keypad_scan() will leave IRQs enabled;
+	 * we should disable them now.
+	 */
+	disable_row_irqs(keypad);
+}
+
+#ifdef CONFIG_PM
+static int matrix_keypad_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i;
+
+	matrix_keypad_stop(keypad->input_dev);
+
+	if (device_may_wakeup(&pdev->dev))
+		for (i = 0; i < pdata->num_row_gpios; i++)
+			enable_irq_wake(gpio_to_irq(pdata->row_gpios[i]));
+
+	return 0;
+}
+
+static int matrix_keypad_resume(struct platform_device *pdev)
+{
+	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i;
+
+	if (device_may_wakeup(&pdev->dev))
+		for (i = 0; i < pdata->num_row_gpios; i++)
+			disable_irq_wake(gpio_to_irq(pdata->row_gpios[i]));
+
+	matrix_keypad_start(keypad->input_dev);
+
+	return 0;
+}
+#else
+#define matrix_keypad_suspend	NULL
+#define matrix_keypad_resume	NULL
+#endif
+
+static int __devinit init_matrix_gpio(struct platform_device *pdev,
+					struct matrix_keypad *keypad)
+{
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i, err = -EINVAL;
+
+	/* initialized strobe lines as outputs, activated */
+	for (i = 0; i < pdata->num_col_gpios; i++) {
+		err = gpio_request(pdata->col_gpios[i], "matrix_kbd_col");
+		if (err) {
+			dev_err(&pdev->dev,
+				"failed to request GPIO%d for COL%d\n",
+				pdata->col_gpios[i], i);
+			goto err_free_cols;
+		}
+
+		gpio_direction_output(pdata->col_gpios[i], !pdata->active_low);
+	}
+
+	for (i = 0; i < pdata->num_row_gpios; i++) {
+		err = gpio_request(pdata->row_gpios[i], "matrix_kbd_row");
+		if (err) {
+			dev_err(&pdev->dev,
+				"failed to request GPIO%d for ROW%d\n",
+				pdata->row_gpios[i], i);
+			goto err_free_rows;
+		}
+
+		gpio_direction_input(pdata->row_gpios[i]);
+	}
+
+	for (i = 0; i < pdata->num_row_gpios; i++) {
+		err = request_irq(gpio_to_irq(pdata->row_gpios[i]),
+				matrix_keypad_interrupt,
+				IRQF_DISABLED |
+				IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+				"matrix-keypad", keypad);
+		if (err) {
+			dev_err(&pdev->dev,
+				"Unable to acquire interrupt for GPIO line %i\n",
+				pdata->row_gpios[i]);
+			goto err_free_irqs;
+		}
+	}
+
+	/* initialized as disabled - enabled by input->open */
+	disable_row_irqs(keypad);
+	return 0;
+
+err_free_irqs:
+	while (--i >= 0)
+		free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
+	i = pdata->num_row_gpios;
+err_free_rows:
+	while (--i >= 0)
+		gpio_free(pdata->row_gpios[i]);
+	i = pdata->num_col_gpios;
+err_free_cols:
+	while (--i >= 0)
+		gpio_free(pdata->col_gpios[i]);
+
+	return err;
+}
+
+static int __devinit matrix_keypad_probe(struct platform_device *pdev)
+{
+	const struct matrix_keypad_platform_data *pdata;
+	const struct matrix_keymap_data *keymap_data;
+	struct matrix_keypad *keypad;
+	struct input_dev *input_dev;
+	unsigned short *keycodes;
+	int i;
+	int err;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data defined\n");
+		return -EINVAL;
+	}
+
+	keymap_data = pdata->keymap_data;
+	if (!keymap_data) {
+		dev_err(&pdev->dev, "no keymap data defined\n");
+		return -EINVAL;
+	}
+
+	if (!keymap_data->max_keymap_size) {
+		dev_err(&pdev->dev, "invalid keymap data supplied\n");
+		return -EINVAL;
+	}
+
+	keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL);
+	keycodes = kzalloc(keymap_data->max_keymap_size *
+				sizeof(keypad->keycodes),
+			   GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!keypad || !keycodes || !input_dev) {
+		err = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	keypad->input_dev = input_dev;
+	keypad->pdata = pdata;
+	keypad->keycodes = keycodes;
+	keypad->stopped = true;
+	INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan);
+	spin_lock_init(&keypad->lock);
+
+	input_dev->name		= pdev->name;
+	input_dev->id.bustype	= BUS_HOST;
+	input_dev->dev.parent	= &pdev->dev;
+	input_dev->evbit[0]	= BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
+	input_dev->open		= matrix_keypad_start;
+	input_dev->close	= matrix_keypad_stop;
+
+	input_dev->keycode	= keycodes;
+	input_dev->keycodesize	= sizeof(*keycodes);
+	input_dev->keycodemax	= keymap_data->max_keymap_size;
+
+	for (i = 0; i < keymap_data->keymap_size; i++) {
+		unsigned int key = keymap_data->keymap[i];
+		unsigned int row = KEY_ROW(key);
+		unsigned int col = KEY_COL(key);
+		unsigned short code = KEY_VAL(key);
+
+		keycodes[(row << 4) + col] = code;
+		__set_bit(code, input_dev->keybit);
+	}
+	__clear_bit(KEY_RESERVED, input_dev->keybit);
+
+	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
+	input_set_drvdata(input_dev, keypad);
+
+	err = init_matrix_gpio(pdev, keypad);
+	if (err)
+		goto err_free_mem;
+
+	err = input_register_device(keypad->input_dev);
+	if (err)
+		goto err_free_mem;
+
+	device_init_wakeup(&pdev->dev, pdata->wakeup);
+	platform_set_drvdata(pdev, keypad);
+
+	return 0;
+
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(keycodes);
+	kfree(keypad);
+	return err;
+}
+
+static int __devexit matrix_keypad_remove(struct platform_device *pdev)
+{
+	struct matrix_keypad *keypad = platform_get_drvdata(pdev);
+	const struct matrix_keypad_platform_data *pdata = keypad->pdata;
+	int i;
+
+	device_init_wakeup(&pdev->dev, 0);
+
+	for (i = 0; i < pdata->num_row_gpios; i++) {
+		free_irq(gpio_to_irq(pdata->row_gpios[i]), keypad);
+		gpio_free(pdata->row_gpios[i]);
+	}
+
+	for (i = 0; i < pdata->num_col_gpios; i++)
+		gpio_free(pdata->col_gpios[i]);
+
+	input_unregister_device(keypad->input_dev);
+	platform_set_drvdata(pdev, NULL);
+	kfree(keypad->keycodes);
+	kfree(keypad);
+
+	return 0;
+}
+
+static struct platform_driver matrix_keypad_driver = {
+	.probe		= matrix_keypad_probe,
+	.remove		= __devexit_p(matrix_keypad_remove),
+	.suspend	= matrix_keypad_suspend,
+	.resume		= matrix_keypad_resume,
+	.driver		= {
+		.name	= "matrix-keypad",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init matrix_keypad_init(void)
+{
+	return platform_driver_register(&matrix_keypad_driver);
+}
+
+static void __exit matrix_keypad_exit(void)
+{
+	platform_driver_unregister(&matrix_keypad_driver);
+}
+
+module_init(matrix_keypad_init);
+module_exit(matrix_keypad_exit);
+
+MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("GPIO Driven Matrix Keypad Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:matrix-keypad");
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
new file mode 100644
index 000000000000..7964516c6954
--- /dev/null
+++ b/include/linux/input/matrix_keypad.h
@@ -0,0 +1,65 @@
+#ifndef _MATRIX_KEYPAD_H
+#define _MATRIX_KEYPAD_H
+
+#include <linux/types.h>
+#include <linux/input.h>
+
+#define MATRIX_MAX_ROWS		16
+#define MATRIX_MAX_COLS		16
+
+#define KEY(row, col, val)	((((row) & (MATRIX_MAX_ROWS - 1)) << 24) |\
+				 (((col) & (MATRIX_MAX_COLS - 1)) << 16) |\
+				 (val & 0xffff))
+
+#define KEY_ROW(k)		(((k) >> 24) & 0xff)
+#define KEY_COL(k)		(((k) >> 16) & 0xff)
+#define KEY_VAL(k)		((k) & 0xffff)
+
+/**
+ * struct matrix_keymap_data - keymap for matrix keyboards
+ * @keymap: pointer to array of uint32 values encoded with KEY() macro
+ *	representing keymap
+ * @keymap_size: number of entries (initialized) in this keymap
+ * @max_keymap_size: maximum size of keymap supported by the device
+ *
+ * This structure is supposed to be used by platform code to supply
+ * keymaps to drivers that implement matrix-like keypads/keyboards.
+ */
+struct matrix_keymap_data {
+	const uint32_t *keymap;
+	unsigned int	keymap_size;
+	unsigned int	max_keymap_size;
+};
+
+/**
+ * struct matrix_keypad_platform_data - platform-dependent keypad data
+ * @keymap_data: pointer to &matrix_keymap_data
+ * @row_gpios: array of gpio numbers reporesenting rows
+ * @col_gpios: array of gpio numbers reporesenting colums
+ * @num_row_gpios: actual number of row gpios used by device
+ * @num_col_gpios: actual number of col gpios used by device
+ * @col_scan_delay_us: delay, measured in microseconds, that is
+ *	needed before we can keypad after activating column gpio
+ * @debounce_ms: debounce interval in milliseconds
+ *
+ * This structure represents platform-specific data that use used by
+ * matrix_keypad driver to perform proper initialization.
+ */
+struct matrix_keypad_platform_data {
+	const struct matrix_keymap_data *keymap_data;
+
+	unsigned int	row_gpios[MATRIX_MAX_ROWS];
+	unsigned int	col_gpios[MATRIX_MAX_COLS];
+	unsigned int	num_row_gpios;
+	unsigned int	num_col_gpios;
+
+	unsigned int	col_scan_delay_us;
+
+	/* key debounce interval in milli-second */
+	unsigned int	debounce_ms;
+
+	bool		active_low;
+	bool		wakeup;
+};
+
+#endif /* _MATRIX_KEYPAD_H */
-- 
cgit v1.2.3


From 1a8dd307cc0a2119be4e578c517795464e6dabba Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 29 Jun 2009 17:45:39 +0900
Subject: percpu: use __weak only in the definition of weak percpu variables

__weak is necessary only for definition and might even not work in
declaration.  Drop it from declaration.

This change was suggested by Ivan Kokshaysky.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
---
 include/linux/percpu-defs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index cf32838ad0fa..9b7a53cc16eb 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -56,7 +56,7 @@
  */
 #define DECLARE_PER_CPU_SECTION(type, name, sec)			\
 	extern __PCPU_DUMMY_ATTRS char __pcpu_scope_##name;		\
-	extern __PCPU_ATTRS(sec) __weak __typeof__(type) per_cpu__##name
+	extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
 	__PCPU_DUMMY_ATTRS char __pcpu_scope_##name;			\
-- 
cgit v1.2.3


From a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 29 Jun 2009 14:07:56 +0200
Subject: netfilter: tcp conntrack: fix unacknowledged data detection with NAT

When NAT helpers change the TCP packet size, the highest seen sequence
number needs to be corrected. This is currently only done upwards, when
the packet size is reduced the sequence number is unchanged. This causes
TCP conntrack to falsely detect unacknowledged data and decrease the
timeout.

Fix by updating the highest seen sequence number in both directions after
packet mangling.

Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netfilter/nf_conntrack.h   |  4 ++--
 net/ipv4/netfilter/nf_nat_helper.c     | 17 +++++++++++------
 net/netfilter/nf_conntrack_proto_tcp.c |  6 +++---
 3 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index a632689b61b4..cbdd6284996d 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -258,8 +258,8 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
 /* Update TCP window tracking data when NAT mangles the packet */
 extern void nf_conntrack_tcp_update(const struct sk_buff *skb,
 				    unsigned int dataoff,
-				    struct nf_conn *ct,
-				    int dir);
+				    struct nf_conn *ct, int dir,
+				    s16 offset);
 
 /* Fake conntrack entry for untracked connections */
 extern struct nf_conn nf_conntrack_untracked;
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 155c008626c8..09172a65d9b6 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 				    ct, ctinfo);
 		/* Tell TCP window tracking about seq change */
 		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
-					ct, CTINFO2DIR(ctinfo));
+					ct, CTINFO2DIR(ctinfo),
+					(int)rep_len - (int)match_len);
 
 		nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
 	}
@@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 	struct tcphdr *tcph;
 	int dir;
 	__be32 newseq, newack;
+	s16 seqoff, ackoff;
 	struct nf_conn_nat *nat = nfct_nat(ct);
 	struct nf_nat_seq *this_way, *other_way;
 
@@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 
 	tcph = (void *)skb->data + ip_hdrlen(skb);
 	if (after(ntohl(tcph->seq), this_way->correction_pos))
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
+		seqoff = this_way->offset_after;
 	else
-		newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
+		seqoff = this_way->offset_before;
 
 	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 		  other_way->correction_pos))
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
+		ackoff = other_way->offset_after;
 	else
-		newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
+		ackoff = other_way->offset_before;
+
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
 
 	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
 	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
@@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 	if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
 		return 0;
 
-	nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
+	nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff);
 
 	return 1;
 }
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 33fc0a443f3d..97a82ba75376 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct,
 /* Caller must linearize skb at tcp header. */
 void nf_conntrack_tcp_update(const struct sk_buff *skb,
 			     unsigned int dataoff,
-			     struct nf_conn *ct,
-			     int dir)
+			     struct nf_conn *ct, int dir,
+			     s16 offset)
 {
 	const struct tcphdr *tcph = (const void *)skb->data + dataoff;
 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir];
@@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
 	/*
 	 * We have to worry for the ack in the reply packet only...
 	 */
-	if (after(end, ct->proto.tcp.seen[dir].td_end))
+	if (ct->proto.tcp.seen[dir].td_end + offset == end)
 		ct->proto.tcp.seen[dir].td_end = end;
 	ct->proto.tcp.last_end = end;
 	spin_unlock_bh(&ct->lock);
-- 
cgit v1.2.3


From 8a3af79361e85db6fec4173ef1916322471c19e3 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Mon, 29 Jun 2009 14:28:27 +0200
Subject: netfilter: headers_check fix: linux/netfilter/xt_osf.h

fix the following 'make headers_check' warnings:

  usr/include/linux/netfilter/xt_osf.h:40: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_osf.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h
index fd2272e0959a..18afa495f973 100644
--- a/include/linux/netfilter/xt_osf.h
+++ b/include/linux/netfilter/xt_osf.h
@@ -20,6 +20,8 @@
 #ifndef _XT_OSF_H
 #define _XT_OSF_H
 
+#include <linux/types.h>
+
 #define MAXGENRELEN		32
 
 #define XT_OSF_GENRE		(1<<0)
-- 
cgit v1.2.3


From d6d3f08b0fd998b647a05540cedd11a067b72867 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 29 Jun 2009 14:31:46 +0200
Subject: netfilter: xtables: conntrack match revision 2

As reported by Philip, the UNTRACKED state bit does not fit within
the 8-bit state_mask member. Enlarge state_mask and give status_mask
a few more bits too.

Reported-by: Philip Craig <philipc@snapgear.com>
References: http://markmail.org/thread/b7eg6aovfh4agyz7
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_conntrack.h | 13 +++++++
 net/netfilter/xt_conntrack.c           | 66 ++++++++++++++++++++++++++++++----
 2 files changed, 73 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h
index 3430c7751948..7ae05338e94c 100644
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -81,4 +81,17 @@ struct xt_conntrack_mtinfo1 {
 	__u8 state_mask, status_mask;
 };
 
+struct xt_conntrack_mtinfo2 {
+	union nf_inet_addr origsrc_addr, origsrc_mask;
+	union nf_inet_addr origdst_addr, origdst_mask;
+	union nf_inet_addr replsrc_addr, replsrc_mask;
+	union nf_inet_addr repldst_addr, repldst_mask;
+	__u32 expires_min, expires_max;
+	__u16 l4proto;
+	__be16 origsrc_port, origdst_port;
+	__be16 replsrc_port, repldst_port;
+	__u16 match_flags, invert_flags;
+	__u16 state_mask, status_mask;
+};
+
 #endif /*_XT_CONNTRACK_H*/
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 0b7139f3dd78..fc581800698e 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr,
 
 static inline bool
 conntrack_mt_origsrc(const struct nf_conn *ct,
-                     const struct xt_conntrack_mtinfo1 *info,
+                     const struct xt_conntrack_mtinfo2 *info,
 		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
@@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct,
 
 static inline bool
 conntrack_mt_origdst(const struct nf_conn *ct,
-                     const struct xt_conntrack_mtinfo1 *info,
+                     const struct xt_conntrack_mtinfo2 *info,
 		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3,
@@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct,
 
 static inline bool
 conntrack_mt_replsrc(const struct nf_conn *ct,
-                     const struct xt_conntrack_mtinfo1 *info,
+                     const struct xt_conntrack_mtinfo2 *info,
 		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3,
@@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct,
 
 static inline bool
 conntrack_mt_repldst(const struct nf_conn *ct,
-                     const struct xt_conntrack_mtinfo1 *info,
+                     const struct xt_conntrack_mtinfo2 *info,
 		     u_int8_t family)
 {
 	return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3,
@@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct,
 }
 
 static inline bool
-ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
+ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
                     const struct nf_conn *ct)
 {
 	const struct nf_conntrack_tuple *tuple;
@@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info,
 static bool
 conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
+	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	unsigned int statebit;
@@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
+static bool
+conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+	const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo;
+	struct xt_match_param newpar = *par;
+
+	newpar.matchinfo = *info;
+	return conntrack_mt(skb, &newpar);
+}
+
 static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
@@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
+static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par)
+{
+	struct xt_conntrack_mtinfo1 *info = par->matchinfo;
+	struct xt_conntrack_mtinfo2 *up;
+	int ret = conntrack_mt_check(par);
+
+	if (ret < 0)
+		return ret;
+
+	up = kmalloc(sizeof(*up), GFP_KERNEL);
+	if (up == NULL) {
+		nf_ct_l3proto_module_put(par->family);
+		return -ENOMEM;
+	}
+
+	/*
+	 * The strategy here is to minimize the overhead of v1 matching,
+	 * by prebuilding a v2 struct and putting the pointer into the
+	 * v1 dataspace.
+	 */
+	memcpy(up, info, offsetof(typeof(*info), state_mask));
+	up->state_mask  = info->state_mask;
+	up->status_mask = info->status_mask;
+	*(void **)info  = up;
+	return true;
+}
+
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	nf_ct_l3proto_module_put(par->family);
 }
 
+static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+	struct xt_conntrack_mtinfo2 **info = par->matchinfo;
+	kfree(*info);
+	conntrack_mt_destroy(par);
+}
+
 #ifdef CONFIG_COMPAT
 struct compat_xt_conntrack_info
 {
@@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = {
 		.revision   = 1,
 		.family     = NFPROTO_UNSPEC,
 		.matchsize  = sizeof(struct xt_conntrack_mtinfo1),
+		.match      = conntrack_mt_v1,
+		.checkentry = conntrack_mt_check_v1,
+		.destroy    = conntrack_mt_destroy_v1,
+		.me         = THIS_MODULE,
+	},
+	{
+		.name       = "conntrack",
+		.revision   = 2,
+		.family     = NFPROTO_UNSPEC,
+		.matchsize  = sizeof(struct xt_conntrack_mtinfo2),
 		.match      = conntrack_mt,
 		.checkentry = conntrack_mt_check,
 		.destroy    = conntrack_mt_destroy,
-- 
cgit v1.2.3


From e6ce3066010a21bde961d8f8cefe0b69cae78a0f Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@ozlabs.org>
Date: Mon, 29 Jun 2009 14:31:58 +0800
Subject: fs: allow d_instantiate to be called with negative parent dentry

The new fsnotify infrastructure (starting at 90586523) causes an oops in
spufs, where we populate a directory with files before instantiating the
directory itself. The new changes seem to have introduced an assumption
that a dentry's parent will be positive when instantiating.

This change makes it once again possible to d_instantiate a dentry
with a negative parent, and brings __fsnotify_d_instantiate() into
line with inotify_d_instantiate(), which already has this NULL check.

Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 44848aa830dc..6c3de999fb34 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -280,7 +280,7 @@ static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
 	assert_spin_locked(&dentry->d_lock);
 
 	parent = dentry->d_parent;
-	if (fsnotify_inode_watches_children(parent->d_inode))
+	if (parent->d_inode && fsnotify_inode_watches_children(parent->d_inode))
 		dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
 	else
 		dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
-- 
cgit v1.2.3


From 2fc90f6133a87da8177636866557d4cc5f56e661 Mon Sep 17 00:00:00 2001
From: Alexey Zaytsev <zaytsev@altell.ru>
Date: Wed, 24 Jun 2009 16:22:30 +0400
Subject: PCI: make pci_name() take const argument

Since this function should never modify it (saves warnings when called with
const args too).

Signed-off-by: Alexey Zaytsev <zaytsev@altell.ru>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index d304ddf412d0..115fb7ba5089 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1145,7 +1145,7 @@ static inline void pci_set_drvdata(struct pci_dev *pdev, void *data)
 /* If you want to know what to call your pci_dev, ask this function.
  * Again, it's a wrapper around the generic device.
  */
-static inline const char *pci_name(struct pci_dev *pdev)
+static inline const char *pci_name(const struct pci_dev *pdev)
 {
 	return dev_name(&pdev->dev);
 }
-- 
cgit v1.2.3


From 2bf427b25b79eb7cea27963a66c3d4684cae0e0c Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Mon, 29 Jun 2009 19:20:42 -0700
Subject: ide: fix resume for CONFIG_BLK_DEV_IDEACPI=y

commit 2f0d0fd2a605666d38e290c5c0d2907484352dc4 ("ide-acpi: cleanup
do_drive_get_GTF()") didn't account for the lack of hwif->acpidata
check in generic_ide_suspend() [ indirect user of do_drive_get_GTF()
through ide_acpi_exec_tfs() ] resulting in broken resume when ACPI
support is enabled but ACPI data is unavailable.

Fix it by adding ide_port_acpi() helper for checking if port needs
ACPI handling and cleaning generic_ide_{suspend,resume}() to use it
instead of hiding hwif->acpidata and ide_noacpi checks in IDE ACPI
helpers (this should help in preventing similar bugs in the future).

While at it:
- kill superfluous debugging printks in ide_acpi_{get,push}_timing()

Reported-and-tested-by: Etienne Basset <etienne.basset@numericable.fr>
Also-reported-and-tested-by: Jeff Chua <jeff.chua.linux@gmail.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-acpi.c | 37 +++++++------------------------------
 drivers/ide/ide-pm.c   | 30 ++++++++++++++++++------------
 include/linux/ide.h    |  2 ++
 3 files changed, 27 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index 77f79d26b264..c509c9916464 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -92,6 +92,11 @@ int ide_acpi_init(void)
 	return 0;
 }
 
+bool ide_port_acpi(ide_hwif_t *hwif)
+{
+	return ide_noacpi == 0 && hwif->acpidata;
+}
+
 /**
  * ide_get_dev_handle - finds acpi_handle and PCI device.function
  * @dev: device to locate
@@ -352,9 +357,6 @@ int ide_acpi_exec_tfs(ide_drive_t *drive)
 	unsigned long	gtf_address;
 	unsigned long	obj_loc;
 
-	if (ide_noacpi)
-		return 0;
-
 	DEBPRINT("call get_GTF, drive=%s port=%d\n", drive->name, drive->dn);
 
 	ret = do_drive_get_GTF(drive, &gtf_length, &gtf_address, &obj_loc);
@@ -389,16 +391,6 @@ void ide_acpi_get_timing(ide_hwif_t *hwif)
 	struct acpi_buffer	output;
 	union acpi_object 	*out_obj;
 
-	if (ide_noacpi)
-		return;
-
-	DEBPRINT("ENTER:\n");
-
-	if (!hwif->acpidata) {
-		DEBPRINT("no ACPI data for %s\n", hwif->name);
-		return;
-	}
-
 	/* Setting up output buffer for _GTM */
 	output.length = ACPI_ALLOCATE_BUFFER;
 	output.pointer = NULL;	/* ACPI-CA sets this; save/free it later */
@@ -479,16 +471,6 @@ void ide_acpi_push_timing(ide_hwif_t *hwif)
 	struct ide_acpi_drive_link	*master = &hwif->acpidata->master;
 	struct ide_acpi_drive_link	*slave = &hwif->acpidata->slave;
 
-	if (ide_noacpi)
-		return;
-
-	DEBPRINT("ENTER:\n");
-
-	if (!hwif->acpidata) {
-		DEBPRINT("no ACPI data for %s\n", hwif->name);
-		return;
-	}
-
 	/* Give the GTM buffer + drive Identify data to the channel via the
 	 * _STM method: */
 	/* setup input parameters buffer for _STM */
@@ -527,16 +509,11 @@ void ide_acpi_set_state(ide_hwif_t *hwif, int on)
 	ide_drive_t *drive;
 	int i;
 
-	if (ide_noacpi || ide_noacpi_psx)
+	if (ide_noacpi_psx)
 		return;
 
 	DEBPRINT("ENTER:\n");
 
-	if (!hwif->acpidata) {
-		DEBPRINT("no ACPI data for %s\n", hwif->name);
-		return;
-	}
-
 	/* channel first and then drives for power on and verse versa for power off */
 	if (on)
 		acpi_bus_set_power(hwif->acpidata->obj_handle, ACPI_STATE_D0);
@@ -616,7 +593,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *hwif)
 				 drive->name, err);
 	}
 
-	if (!ide_acpionboot) {
+	if (ide_noacpi || ide_acpionboot == 0) {
 		DEBPRINT("ACPI methods disabled on boot\n");
 		return;
 	}
diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index c14ca144cffe..ad7be2669dcb 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c
@@ -10,9 +10,11 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	struct request_pm_state rqpm;
 	int ret;
 
-	/* call ACPI _GTM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL)
-		ide_acpi_get_timing(hwif);
+	if (ide_port_acpi(hwif)) {
+		/* call ACPI _GTM only once */
+		if ((drive->dn & 1) == 0 || pair == NULL)
+			ide_acpi_get_timing(hwif);
+	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
@@ -26,9 +28,11 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
 	ret = blk_execute_rq(drive->queue, NULL, rq, 0);
 	blk_put_request(rq);
 
-	/* call ACPI _PS3 only after both devices are suspended */
-	if (ret == 0 && ((drive->dn & 1) || pair == NULL))
-		ide_acpi_set_state(hwif, 0);
+	if (ret == 0 && ide_port_acpi(hwif)) {
+		/* call ACPI _PS3 only after both devices are suspended */
+		if ((drive->dn & 1) || pair == NULL)
+			ide_acpi_set_state(hwif, 0);
+	}
 
 	return ret;
 }
@@ -42,13 +46,15 @@ int generic_ide_resume(struct device *dev)
 	struct request_pm_state rqpm;
 	int err;
 
-	/* call ACPI _PS0 / _STM only once */
-	if ((drive->dn & 1) == 0 || pair == NULL) {
-		ide_acpi_set_state(hwif, 1);
-		ide_acpi_push_timing(hwif);
-	}
+	if (ide_port_acpi(hwif)) {
+		/* call ACPI _PS0 / _STM only once */
+		if ((drive->dn & 1) == 0 || pair == NULL) {
+			ide_acpi_set_state(hwif, 1);
+			ide_acpi_push_timing(hwif);
+		}
 
-	ide_acpi_exec_tfs(drive);
+		ide_acpi_exec_tfs(drive);
+	}
 
 	memset(&rqpm, 0, sizeof(rqpm));
 	rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
diff --git a/include/linux/ide.h b/include/linux/ide.h
index c6af7c44d46c..edc93a6d931d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1419,6 +1419,7 @@ static inline void ide_dma_unmap_sg(ide_drive_t *drive,
 
 #ifdef CONFIG_BLK_DEV_IDEACPI
 int ide_acpi_init(void);
+bool ide_port_acpi(ide_hwif_t *hwif);
 extern int ide_acpi_exec_tfs(ide_drive_t *drive);
 extern void ide_acpi_get_timing(ide_hwif_t *hwif);
 extern void ide_acpi_push_timing(ide_hwif_t *hwif);
@@ -1427,6 +1428,7 @@ void ide_acpi_port_init_devices(ide_hwif_t *);
 extern void ide_acpi_set_state(ide_hwif_t *hwif, int on);
 #else
 static inline int ide_acpi_init(void) { return 0; }
+static inline bool ide_port_acpi(ide_hwif_t *hwif) { return 0; }
 static inline int ide_acpi_exec_tfs(ide_drive_t *drive) { return 0; }
 static inline void ide_acpi_get_timing(ide_hwif_t *hwif) { ; }
 static inline void ide_acpi_push_timing(ide_hwif_t *hwif) { ; }
-- 
cgit v1.2.3


From 57e7986ed142417498155ebcd5eaf617ac37136d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 30 Jun 2009 16:07:19 +1000
Subject: perf_counter: Provide a way to enable counters on exec

This provides a way to mark a counter to be enabled on the next
exec. This is useful for measuring the total activity of a
program without including overhead from the process that
launches it.

This also changes the perf stat command to use this new
facility.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19017.43927.838745.689203@cargo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  3 ++-
 kernel/perf_counter.c        | 50 ++++++++++++++++++++++++++++++++++++++++++++
 tools/perf/builtin-stat.c    |  6 +++---
 3 files changed, 55 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 3078e23c91eb..5e970c7d3fd5 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -179,8 +179,9 @@ struct perf_counter_attr {
 				comm	       :  1, /* include comm data     */
 				freq           :  1, /* use freq, not period  */
 				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 66ab1e9d1294..d55a50da2347 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1428,6 +1428,53 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 		perf_counter_task_sched_in(curr, cpu);
 }
 
+/*
+ * Enable all of a task's counters that have been marked enable-on-exec.
+ * This expects task == current.
+ */
+static void perf_counter_enable_on_exec(struct task_struct *task)
+{
+	struct perf_counter_context *ctx;
+	struct perf_counter *counter;
+	unsigned long flags;
+	int enabled = 0;
+
+	local_irq_save(flags);
+	ctx = task->perf_counter_ctxp;
+	if (!ctx || !ctx->nr_counters)
+		goto out;
+
+	__perf_counter_task_sched_out(ctx);
+
+	spin_lock(&ctx->lock);
+
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (!counter->attr.enable_on_exec)
+			continue;
+		counter->attr.enable_on_exec = 0;
+		if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
+			continue;
+		counter->state = PERF_COUNTER_STATE_INACTIVE;
+		counter->tstamp_enabled =
+			ctx->time - counter->total_time_enabled;
+		enabled = 1;
+	}
+
+	/*
+	 * Unclone this context if we enabled any counter.
+	 */
+	if (enabled && ctx->parent_ctx) {
+		put_ctx(ctx->parent_ctx);
+		ctx->parent_ctx = NULL;
+	}
+
+	spin_unlock(&ctx->lock);
+
+	perf_counter_task_sched_in(task, smp_processor_id());
+ out:
+	local_irq_restore(flags);
+}
+
 /*
  * Cross CPU call to read the hardware counter
  */
@@ -2949,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task)
 {
 	struct perf_comm_event comm_event;
 
+	if (task->perf_counter_ctxp)
+		perf_counter_enable_on_exec(task);
+
 	if (!atomic_read(&nr_comm_counters))
 		return;
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 201ef2367dcb..2e03524a1de0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -116,8 +116,9 @@ static void create_perf_stat_counter(int counter, int pid)
 					fd[cpu][counter], strerror(errno));
 		}
 	} else {
-		attr->inherit	= inherit;
-		attr->disabled	= 1;
+		attr->inherit	     = inherit;
+		attr->disabled	     = 1;
+		attr->enable_on_exec = 1;
 
 		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
@@ -262,7 +263,6 @@ static int run_perf_stat(int argc, const char **argv)
 	 * Enable counters and exec the command:
 	 */
 	t0 = rdclock();
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
 
 	close(go_pipe[1]);
 	wait(&status);
-- 
cgit v1.2.3


From e0a43ddcc08c34dbd666d93600fd23914505f4aa Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Tue, 30 Jun 2009 20:12:23 +0200
Subject: fuse: allow umask processing in userspace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch lets filesystems handle masking the file mode on creation.
This is needed if filesystem is using ACLs.

 - The CREATE, MKDIR and MKNOD requests are extended with a "umask"
   parameter.

 - A new FUSE_DONT_MASK flag is added to the INIT request/reply.  With
   this the filesystem may request that the create mode is not masked.

CC: Jean-Pierre André <jean-pierre.andre@wanadoo.fr>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dir.c        | 20 +++++++++++++++++---
 fs/fuse/fuse_i.h     |  3 +++
 fs/fuse/inode.c      |  9 ++++++++-
 include/linux/fuse.h | 20 ++++++++++++++++++--
 4 files changed, 46 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b3089a083d30..6b700734e519 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -375,7 +375,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct fuse_req *req;
 	struct fuse_req *forget_req;
-	struct fuse_open_in inarg;
+	struct fuse_create_in inarg;
 	struct fuse_open_out outopen;
 	struct fuse_entry_out outentry;
 	struct fuse_file *ff;
@@ -399,15 +399,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (!ff)
 		goto out_put_request;
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	flags &= ~O_NOCTTY;
 	memset(&inarg, 0, sizeof(inarg));
 	memset(&outentry, 0, sizeof(outentry));
 	inarg.flags = flags;
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_CREATE;
 	req->in.h.nodeid = get_node_id(dir);
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -546,12 +551,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
 	inarg.rdev = new_encode_dev(rdev);
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKNOD;
 	req->in.numargs = 2;
-	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+						sizeof(inarg);
 	req->in.args[0].value = &inarg;
 	req->in.args[1].size = entry->d_name.len + 1;
 	req->in.args[1].value = entry->d_name.name;
@@ -578,8 +588,12 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, int mode)
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 
+	if (!fc->dont_mask)
+		mode &= ~current_umask();
+
 	memset(&inarg, 0, sizeof(inarg));
 	inarg.mode = mode;
+	inarg.umask = current_umask();
 	req->in.h.opcode = FUSE_MKDIR;
 	req->in.numargs = 2;
 	req->in.args[0].size = sizeof(inarg);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index aaf2f9ff970e..ede4f77b2d6c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -446,6 +446,9 @@ struct fuse_conn {
 	/** Do multi-page cached writes */
 	unsigned big_writes:1;
 
+	/** Don't apply umask to creation modes */
+	unsigned dont_mask:1;
+
 	/** The number of requests waiting for completion */
 	atomic_t num_waiting;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d8673ccf90b7..6cc501bd0187 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -725,6 +725,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 			}
 			if (arg->flags & FUSE_BIG_WRITES)
 				fc->big_writes = 1;
+			if (arg->flags & FUSE_DONT_MASK)
+				fc->dont_mask = 1;
 		} else {
 			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
 			fc->no_lock = 1;
@@ -748,7 +750,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
 	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
-		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
+		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
 	req->in.h.opcode = FUSE_INIT;
 	req->in.numargs = 1;
 	req->in.args[0].size = sizeof(*arg);
@@ -864,6 +866,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto err_put_conn;
 
+	/* Handle umasking inside the fuse code */
+	if (sb->s_flags & MS_POSIXACL)
+		fc->dont_mask = 1;
+	sb->s_flags |= MS_POSIXACL;
+
 	fc->release = fuse_free_conn;
 	fc->flags = d.flags;
 	fc->user_id = d.user_id;
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index d41ed593f79f..e2b816a62488 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -25,6 +25,9 @@
  *  - add IOCTL message
  *  - add unsolicited notification support
  *  - add POLL message and NOTIFY_POLL notification
+ *
+ * 7.12
+ *  - add umask flag to input argument of open, mknod and mkdir
  */
 
 #ifndef _LINUX_FUSE_H
@@ -36,7 +39,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 11
+#define FUSE_KERNEL_MINOR_VERSION 12
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -112,6 +115,7 @@ struct fuse_file_lock {
  * INIT request/reply flags
  *
  * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_DONT_MASK: don't apply umask to file mode on create operations
  */
 #define FUSE_ASYNC_READ		(1 << 0)
 #define FUSE_POSIX_LOCKS	(1 << 1)
@@ -119,6 +123,7 @@ struct fuse_file_lock {
 #define FUSE_ATOMIC_O_TRUNC	(1 << 3)
 #define FUSE_EXPORT_SUPPORT	(1 << 4)
 #define FUSE_BIG_WRITES		(1 << 5)
+#define FUSE_DONT_MASK		(1 << 6)
 
 /**
  * CUSE INIT request/reply flags
@@ -262,14 +267,18 @@ struct fuse_attr_out {
 	struct fuse_attr attr;
 };
 
+#define FUSE_COMPAT_MKNOD_IN_SIZE 8
+
 struct fuse_mknod_in {
 	__u32	mode;
 	__u32	rdev;
+	__u32	umask;
+	__u32	padding;
 };
 
 struct fuse_mkdir_in {
 	__u32	mode;
-	__u32	padding;
+	__u32	umask;
 };
 
 struct fuse_rename_in {
@@ -300,8 +309,15 @@ struct fuse_setattr_in {
 };
 
 struct fuse_open_in {
+	__u32	flags;
+	__u32	unused;
+};
+
+struct fuse_create_in {
 	__u32	flags;
 	__u32	mode;
+	__u32	umask;
+	__u32	padding;
 };
 
 struct fuse_open_out {
-- 
cgit v1.2.3


From 3b463ae0c6264f70e5d4c0a9c46af20fed43c96e Mon Sep 17 00:00:00 2001
From: John Muir <muirj@nortel.com>
Date: Sun, 31 May 2009 11:13:57 -0400
Subject: fuse: invalidation reverse calls

Add notification messages that allow the filesystem to invalidate VFS
caches.

Two notifications are added:

 1) inode invalidation

   - invalidate cached attributes
   - invalidate a range of pages in the page cache (this is optional)

 2) dentry invalidation

   - try to invalidate a subtree in the dentry cache

Care must be taken while accessing the 'struct super_block' for the
mount, as it can go away while an invalidation is in progress.  To
prevent this, introduce a rw-semaphore, that is taken for read during
the invalidation and taken for write in the ->kill_sb callback.

Cc: Csaba Henk <csaba@gluster.com>
Cc: Anand Avati <avati@zresearch.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/fuse/dir.c        | 37 ++++++++++++++++++++++++
 fs/fuse/fuse_i.h     | 24 ++++++++++++++++
 fs/fuse/inode.c      | 59 ++++++++++++++++++++++++++++++++++++--
 include/linux/fuse.h | 16 +++++++++++
 5 files changed, 214 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8a11a8c67c42..f58ecbc416c8 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -849,6 +849,81 @@ err:
 	return err;
 }
 
+static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_inode_out outarg;
+	int err = -EINVAL;
+
+	if (size != sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
+				       outarg.off, outarg.len);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
+static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
+				   struct fuse_copy_state *cs)
+{
+	struct fuse_notify_inval_entry_out outarg;
+	int err = -EINVAL;
+	char buf[FUSE_NAME_MAX+1];
+	struct qstr name;
+
+	if (size < sizeof(outarg))
+		goto err;
+
+	err = fuse_copy_one(cs, &outarg, sizeof(outarg));
+	if (err)
+		goto err;
+
+	err = -ENAMETOOLONG;
+	if (outarg.namelen > FUSE_NAME_MAX)
+		goto err;
+
+	name.name = buf;
+	name.len = outarg.namelen;
+	err = fuse_copy_one(cs, buf, outarg.namelen + 1);
+	if (err)
+		goto err;
+	fuse_copy_finish(cs);
+	buf[outarg.namelen] = 0;
+	name.hash = full_name_hash(name.name, name.len);
+
+	down_read(&fc->killsb);
+	err = -ENOENT;
+	if (!fc->sb)
+		goto err_unlock;
+
+	err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
+
+err_unlock:
+	up_read(&fc->killsb);
+	return err;
+
+err:
+	fuse_copy_finish(cs);
+	return err;
+}
+
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 		       unsigned int size, struct fuse_copy_state *cs)
 {
@@ -856,6 +931,12 @@ static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
 	case FUSE_NOTIFY_POLL:
 		return fuse_notify_poll(fc, size, cs);
 
+	case FUSE_NOTIFY_INVAL_INODE:
+		return fuse_notify_inval_inode(fc, size, cs);
+
+	case FUSE_NOTIFY_INVAL_ENTRY:
+		return fuse_notify_inval_entry(fc, size, cs);
+
 	default:
 		fuse_copy_finish(cs);
 		return -EINVAL;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 6b700734e519..e703654e7f40 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -859,6 +859,43 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat,
 	return err;
 }
 
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name)
+{
+	int err = -ENOTDIR;
+	struct inode *parent;
+	struct dentry *dir;
+	struct dentry *entry;
+
+	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
+	if (!parent)
+		return -ENOENT;
+
+	mutex_lock(&parent->i_mutex);
+	if (!S_ISDIR(parent->i_mode))
+		goto unlock;
+
+	err = -ENOENT;
+	dir = d_find_alias(parent);
+	if (!dir)
+		goto unlock;
+
+	entry = d_lookup(dir, name);
+	dput(dir);
+	if (!entry)
+		goto unlock;
+
+	fuse_invalidate_attr(parent);
+	fuse_invalidate_entry(entry);
+	dput(entry);
+	err = 0;
+
+ unlock:
+	mutex_unlock(&parent->i_mutex);
+	iput(parent);
+	return err;
+}
+
 /*
  * Calling into a user-controlled filesystem gives the filesystem
  * daemon ptrace-like capabilities over the requester process.  This
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ede4f77b2d6c..52b641fc0faf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -484,6 +484,12 @@ struct fuse_conn {
 
 	/** Called on final put */
 	void (*release)(struct fuse_conn *);
+
+	/** Super block for this connection. */
+	struct super_block *sb;
+
+	/** Read/write semaphore to hold when accessing sb. */
+	struct rw_semaphore killsb;
 };
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
@@ -511,6 +517,11 @@ extern const struct file_operations fuse_dev_operations;
 
 extern const struct dentry_operations fuse_dentry_operations;
 
+/**
+ * Inode to nodeid comparison.
+ */
+int fuse_inode_eq(struct inode *inode, void *_nodeidp);
+
 /**
  * Get a filled in inode
  */
@@ -711,6 +722,19 @@ void fuse_release_nowrite(struct inode *inode);
 
 u64 fuse_get_attr_version(struct fuse_conn *fc);
 
+/**
+ * File-system tells the kernel to invalidate cache for the given node id.
+ */
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len);
+
+/**
+ * File-system tells the kernel to invalidate parent attributes and
+ * the dentry matching parent/name.
+ */
+int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
+			     struct qstr *name);
+
 int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
 		 bool isdir);
 ssize_t fuse_direct_io(struct file *file, const char __user *buf,
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6cc501bd0187..f91ccc4a189d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -206,7 +206,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
 		BUG();
 }
 
-static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
+int fuse_inode_eq(struct inode *inode, void *_nodeidp)
 {
 	u64 nodeid = *(u64 *) _nodeidp;
 	if (get_node_id(inode) == nodeid)
@@ -257,6 +257,31 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
 	return inode;
 }
 
+int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
+			     loff_t offset, loff_t len)
+{
+	struct inode *inode;
+	pgoff_t pg_start;
+	pgoff_t pg_end;
+
+	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
+	if (!inode)
+		return -ENOENT;
+
+	fuse_invalidate_attr(inode);
+	if (offset >= 0) {
+		pg_start = offset >> PAGE_CACHE_SHIFT;
+		if (len <= 0)
+			pg_end = -1;
+		else
+			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+		invalidate_inode_pages2_range(inode->i_mapping,
+					      pg_start, pg_end);
+	}
+	iput(inode);
+	return 0;
+}
+
 static void fuse_umount_begin(struct super_block *sb)
 {
 	fuse_abort_conn(get_fuse_conn_super(sb));
@@ -480,6 +505,7 @@ void fuse_conn_init(struct fuse_conn *fc)
 	memset(fc, 0, sizeof(*fc));
 	spin_lock_init(&fc->lock);
 	mutex_init(&fc->inst_mutex);
+	init_rwsem(&fc->killsb);
 	atomic_set(&fc->count, 1);
 	init_waitqueue_head(&fc->waitq);
 	init_waitqueue_head(&fc->blocked_waitq);
@@ -862,6 +888,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fuse_conn_init(fc);
 
 	fc->dev = sb->s_dev;
+	fc->sb = sb;
 	err = fuse_bdi_init(fc, sb);
 	if (err)
 		goto err_put_conn;
@@ -948,12 +975,25 @@ static int fuse_get_sb(struct file_system_type *fs_type,
 	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
 }
 
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_anon_super(sb);
+}
+
 static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE,
 	.get_sb		= fuse_get_sb,
-	.kill_sb	= kill_anon_super,
+	.kill_sb	= fuse_kill_sb_anon,
 };
 
 #ifdef CONFIG_BLOCK
@@ -965,11 +1005,24 @@ static int fuse_get_sb_blk(struct file_system_type *fs_type,
 			   mnt);
 }
 
+static void fuse_kill_sb_blk(struct super_block *sb)
+{
+	struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+	if (fc) {
+		down_write(&fc->killsb);
+		fc->sb = NULL;
+		up_write(&fc->killsb);
+	}
+
+	kill_block_super(sb);
+}
+
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
 	.get_sb		= fuse_get_sb_blk,
-	.kill_sb	= kill_block_super,
+	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index e2b816a62488..cf593bf9fd32 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -28,6 +28,8 @@
  *
  * 7.12
  *  - add umask flag to input argument of open, mknod and mkdir
+ *  - add notification messages for invalidation of inodes and
+ *    directory entries
  */
 
 #ifndef _LINUX_FUSE_H
@@ -229,6 +231,8 @@ enum fuse_opcode {
 
 enum fuse_notify_code {
 	FUSE_NOTIFY_POLL   = 1,
+	FUSE_NOTIFY_INVAL_INODE = 2,
+	FUSE_NOTIFY_INVAL_ENTRY = 3,
 	FUSE_NOTIFY_CODE_MAX,
 };
 
@@ -524,4 +528,16 @@ struct fuse_dirent {
 #define FUSE_DIRENT_SIZE(d) \
 	FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
 
+struct fuse_notify_inval_inode_out {
+	__u64	ino;
+	__s64	off;
+	__s64	len;
+};
+
+struct fuse_notify_inval_entry_out {
+	__u64	parent;
+	__u32	namelen;
+	__u32	padding;
+};
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit v1.2.3


From b294a290d24d1196d68399cc3a9b8c50bfb55abd Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Tue, 30 Jun 2009 02:13:01 -0400
Subject: Revert "power: remove POWER_SUPPLY_PROP_CAPACITY_LEVEL"

This reverts commit 8efe444038a205e79b38b7ad03878824901849a8 and
4cbc76eadf56399cd11fb736b33c53aec9caab8c.

Richard@laptop.org was apparently using CAPACITY_LEVEL for debugging
battery/EC problems, and was upset that it was removed.  This readds it.

Conflicts:

	Documentation/power_supply_class.txt

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/power_supply_class.txt |  2 ++
 drivers/power/olpc_battery.c               |  9 +++++++++
 drivers/power/power_supply_sysfs.c         |  6 ++++++
 include/linux/power_supply.h               | 10 ++++++++++
 4 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index c6cd4956047c..709d95571d7b 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -108,6 +108,8 @@ relative, time-based measurements.
 ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
 
 CAPACITY - capacity in percents.
+CAPACITY_LEVEL - capacity level. This corresponds to
+POWER_SUPPLY_CAPACITY_LEVEL_*.
 
 TEMP - temperature of the power supply.
 TEMP_AMBIENT - ambient temperature.
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 58e419299cd6..3a589df09376 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -276,6 +276,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 			return ret;
 		val->intval = ec_byte;
 		break;
+	case POWER_SUPPLY_PROP_CAPACITY_LEVEL:
+		if (ec_byte & BAT_STAT_FULL)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+		else if (ec_byte & BAT_STAT_LOW)
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+		else
+			val->intval = POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+		break;
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = olpc_ec_cmd(EC_BAT_TEMP, NULL, 0, (void *)&ec_word, 2);
 		if (ret)
@@ -321,6 +329,7 @@ static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
 	POWER_SUPPLY_PROP_CAPACITY,
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_MANUFACTURER,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index da73591017f9..9deabbde6fd6 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -51,6 +51,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 		"Unknown", "NiMH", "Li-ion", "Li-poly", "LiFe", "NiCd",
 		"LiMn"
 	};
+	static char *capacity_level_text[] = {
+		"Unknown", "Critical", "Low", "Normal", "High", "Full"
+	};
 	ssize_t ret;
 	struct power_supply *psy = dev_get_drvdata(dev);
 	const ptrdiff_t off = attr - power_supply_attrs;
@@ -71,6 +74,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
 		return sprintf(buf, "%s\n", technology_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CAPACITY_LEVEL)
+		return sprintf(buf, "%s\n", capacity_level_text[value.intval]);
 	else if (off >= POWER_SUPPLY_PROP_MODEL_NAME)
 		return sprintf(buf, "%s\n", value.strval);
 
@@ -109,6 +114,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(energy_now),
 	POWER_SUPPLY_ATTR(energy_avg),
 	POWER_SUPPLY_ATTR(capacity),
+	POWER_SUPPLY_ATTR(capacity_level),
 	POWER_SUPPLY_ATTR(temp),
 	POWER_SUPPLY_ATTR(temp_ambient),
 	POWER_SUPPLY_ATTR(time_to_empty_now),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 594c494ac3f0..0ab6aa171241 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -58,6 +58,15 @@ enum {
 	POWER_SUPPLY_TECHNOLOGY_LiMn,
 };
 
+enum {
+	POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN = 0,
+	POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_LOW,
+	POWER_SUPPLY_CAPACITY_LEVEL_NORMAL,
+	POWER_SUPPLY_CAPACITY_LEVEL_HIGH,
+	POWER_SUPPLY_CAPACITY_LEVEL_FULL,
+};
+
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
@@ -89,6 +98,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_ENERGY_NOW,
 	POWER_SUPPLY_PROP_ENERGY_AVG,
 	POWER_SUPPLY_PROP_CAPACITY, /* in percents! */
+	POWER_SUPPLY_PROP_CAPACITY_LEVEL,
 	POWER_SUPPLY_PROP_TEMP,
 	POWER_SUPPLY_PROP_TEMP_AMBIENT,
 	POWER_SUPPLY_PROP_TIME_TO_EMPTY_NOW,
-- 
cgit v1.2.3


From 133890103b9de08904f909995973e4b5c08a780e Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 30 Jun 2009 11:41:11 -0700
Subject: eventfd: revised interface and cleanups

Change the eventfd interface to de-couple the eventfd memory context, from
the file pointer instance.

Without such change, there is no clean way to racely free handle the
POLLHUP event sent when the last instance of the file* goes away.  Also,
now the internal eventfd APIs are using the eventfd context instead of the
file*.

This patch is required by KVM's IRQfd code, which is still under
development.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Gregory Haskins <ghaskins@novell.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/lguest/lg.h          |   2 +-
 drivers/lguest/lguest_user.c |   4 +-
 fs/aio.c                     |  24 +++------
 fs/eventfd.c                 | 122 ++++++++++++++++++++++++++++++++++++++-----
 include/linux/aio.h          |   4 +-
 include/linux/eventfd.h      |  35 ++++++++++---
 6 files changed, 149 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index d4e8979735cb..9c3138265f8e 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -82,7 +82,7 @@ struct lg_cpu {
 
 struct lg_eventfd {
 	unsigned long addr;
-	struct file *event;
+	struct eventfd_ctx *event;
 };
 
 struct lg_eventfd_map {
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 32e297121058..9f9a2953b383 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -50,7 +50,7 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
 
 	/* Now append new entry. */
 	new->map[new->num].addr = addr;
-	new->map[new->num].event = eventfd_fget(fd);
+	new->map[new->num].event = eventfd_ctx_fdget(fd);
 	if (IS_ERR(new->map[new->num].event)) {
 		kfree(new);
 		return PTR_ERR(new->map[new->num].event);
@@ -357,7 +357,7 @@ static int close(struct inode *inode, struct file *file)
 
 	/* Release any eventfds they registered. */
 	for (i = 0; i < lg->eventfds->num; i++)
-		fput(lg->eventfds->map[i].event);
+		eventfd_ctx_put(lg->eventfds->map[i].event);
 	kfree(lg->eventfds);
 
 	/* If lg->dead doesn't contain an error code it will be NULL or a
diff --git a/fs/aio.c b/fs/aio.c
index 76da12537956..d065b2c3273e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -485,6 +485,8 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 {
 	assert_spin_locked(&ctx->ctx_lock);
 
+	if (req->ki_eventfd != NULL)
+		eventfd_ctx_put(req->ki_eventfd);
 	if (req->ki_dtor)
 		req->ki_dtor(req);
 	if (req->ki_iovec != &req->ki_inline_vec)
@@ -509,8 +511,6 @@ static void aio_fput_routine(struct work_struct *data)
 		/* Complete the fput(s) */
 		if (req->ki_filp != NULL)
 			__fput(req->ki_filp);
-		if (req->ki_eventfd != NULL)
-			__fput(req->ki_eventfd);
 
 		/* Link the iocb into the context's free list */
 		spin_lock_irq(&ctx->ctx_lock);
@@ -528,8 +528,6 @@ static void aio_fput_routine(struct work_struct *data)
  */
 static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 {
-	int schedule_putreq = 0;
-
 	dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
 		req, atomic_long_read(&req->ki_filp->f_count));
 
@@ -549,24 +547,16 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
 	 * we would not be holding the last reference to the file*, so
 	 * this function will be executed w/out any aio kthread wakeup.
 	 */
-	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count)))
-		schedule_putreq++;
-	else
-		req->ki_filp = NULL;
-	if (req->ki_eventfd != NULL) {
-		if (unlikely(atomic_long_dec_and_test(&req->ki_eventfd->f_count)))
-			schedule_putreq++;
-		else
-			req->ki_eventfd = NULL;
-	}
-	if (unlikely(schedule_putreq)) {
+	if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
 		get_ioctx(ctx);
 		spin_lock(&fput_lock);
 		list_add(&req->ki_list, &fput_head);
 		spin_unlock(&fput_lock);
 		queue_work(aio_wq, &fput_work);
-	} else
+	} else {
+		req->ki_filp = NULL;
 		really_put_req(ctx, req);
+	}
 	return 1;
 }
 
@@ -1622,7 +1612,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		 * an eventfd() fd, and will be signaled for each completed
 		 * event using the eventfd_signal() function.
 		 */
-		req->ki_eventfd = eventfd_fget((int) iocb->aio_resfd);
+		req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
 		if (IS_ERR(req->ki_eventfd)) {
 			ret = PTR_ERR(req->ki_eventfd);
 			req->ki_eventfd = NULL;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 3f0e1974abdc..31d12de83a2a 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -14,35 +14,44 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/anon_inodes.h>
-#include <linux/eventfd.h>
 #include <linux/syscalls.h>
 #include <linux/module.h>
+#include <linux/kref.h>
+#include <linux/eventfd.h>
 
 struct eventfd_ctx {
+	struct kref kref;
 	wait_queue_head_t wqh;
 	/*
 	 * Every time that a write(2) is performed on an eventfd, the
 	 * value of the __u64 being written is added to "count" and a
 	 * wakeup is performed on "wqh". A read(2) will return the "count"
 	 * value to userspace, and will reset "count" to zero. The kernel
-	 * size eventfd_signal() also, adds to the "count" counter and
+	 * side eventfd_signal() also, adds to the "count" counter and
 	 * issue a wakeup.
 	 */
 	__u64 count;
 	unsigned int flags;
 };
 
-/*
- * Adds "n" to the eventfd counter "count". Returns "n" in case of
- * success, or a value lower then "n" in case of coutner overflow.
- * This function is supposed to be called by the kernel in paths
- * that do not allow sleeping. In this function we allow the counter
- * to reach the ULLONG_MAX value, and we signal this as overflow
- * condition by returining a POLLERR to poll(2).
+/**
+ * eventfd_signal - Adds @n to the eventfd counter.
+ * @ctx: [in] Pointer to the eventfd context.
+ * @n: [in] Value of the counter to be added to the eventfd internal counter.
+ *          The value cannot be negative.
+ *
+ * This function is supposed to be called by the kernel in paths that do not
+ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+ * value, and we signal this as overflow condition by returining a POLLERR
+ * to poll(2).
+ *
+ * Returns @n in case of success, a non-negative number lower than @n in case
+ * of overflow, or the following error codes:
+ *
+ * -EINVAL    : The value of @n is negative.
  */
-int eventfd_signal(struct file *file, int n)
+int eventfd_signal(struct eventfd_ctx *ctx, int n)
 {
-	struct eventfd_ctx *ctx = file->private_data;
 	unsigned long flags;
 
 	if (n < 0)
@@ -59,9 +68,45 @@ int eventfd_signal(struct file *file, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free(struct kref *kref)
+{
+	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
+
+	kfree(ctx);
+}
+
+/**
+ * eventfd_ctx_get - Acquires a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to the eventfd context.
+ *
+ * Returns: In case of success, returns a pointer to the eventfd context.
+ */
+struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
+{
+	kref_get(&ctx->kref);
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_get);
+
+/**
+ * eventfd_ctx_put - Releases a reference to the internal eventfd context.
+ * @ctx: [in] Pointer to eventfd context.
+ *
+ * The eventfd context reference must have been previously acquired either
+ * with eventfd_ctx_get() or eventfd_ctx_fdget()).
+ */
+void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+	kref_put(&ctx->kref, eventfd_free);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_put);
+
 static int eventfd_release(struct inode *inode, struct file *file)
 {
-	kfree(file->private_data);
+	struct eventfd_ctx *ctx = file->private_data;
+
+	wake_up_poll(&ctx->wqh, POLLHUP);
+	eventfd_ctx_put(ctx);
 	return 0;
 }
 
@@ -185,6 +230,16 @@ static const struct file_operations eventfd_fops = {
 	.write		= eventfd_write,
 };
 
+/**
+ * eventfd_fget - Acquire a reference of an eventfd file descriptor.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the eventfd file structure in case of success, or the
+ * following error pointer:
+ *
+ * -EBADF    : Invalid @fd file descriptor.
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
 struct file *eventfd_fget(int fd)
 {
 	struct file *file;
@@ -201,6 +256,48 @@ struct file *eventfd_fget(int fd)
 }
 EXPORT_SYMBOL_GPL(eventfd_fget);
 
+/**
+ * eventfd_ctx_fdget - Acquires a reference to the internal eventfd context.
+ * @fd: [in] Eventfd file descriptor.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointers returned by the following functions:
+ *
+ * eventfd_fget
+ */
+struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+	struct file *file;
+	struct eventfd_ctx *ctx;
+
+	file = eventfd_fget(fd);
+	if (IS_ERR(file))
+		return (struct eventfd_ctx *) file;
+	ctx = eventfd_ctx_get(file->private_data);
+	fput(file);
+
+	return ctx;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fdget);
+
+/**
+ * eventfd_ctx_fileget - Acquires a reference to the internal eventfd context.
+ * @file: [in] Eventfd file pointer.
+ *
+ * Returns a pointer to the internal eventfd context, otherwise the error
+ * pointer:
+ *
+ * -EINVAL   : The @fd file descriptor is not an eventfd file.
+ */
+struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
+{
+	if (file->f_op != &eventfd_fops)
+		return ERR_PTR(-EINVAL);
+
+	return eventfd_ctx_get(file->private_data);
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
+
 SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 {
 	int fd;
@@ -217,6 +314,7 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	if (!ctx)
 		return -ENOMEM;
 
+	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index b16a957030f8..47f7d932a01d 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -121,9 +121,9 @@ struct kiocb {
 
 	/*
 	 * If the aio_resfd field of the userspace iocb is not zero,
-	 * this is the underlying file* to deliver event to.
+	 * this is the underlying eventfd context to deliver events to.
 	 */
-	struct file		*ki_eventfd;
+	struct eventfd_ctx	*ki_eventfd;
 };
 
 #define is_sync_kiocb(iocb)	((iocb)->ki_key == KIOCB_SYNC_KEY)
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index f45a8ae5f828..3b85ba6479f4 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -8,10 +8,8 @@
 #ifndef _LINUX_EVENTFD_H
 #define _LINUX_EVENTFD_H
 
-#ifdef CONFIG_EVENTFD
-
-/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/fcntl.h>
+#include <linux/file.h>
 
 /*
  * CAREFUL: Check include/asm-generic/fcntl.h when defining
@@ -27,16 +25,37 @@
 #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK)
 #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE)
 
+#ifdef CONFIG_EVENTFD
+
+struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx);
+void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
-int eventfd_signal(struct file *file, int n);
+struct eventfd_ctx *eventfd_ctx_fdget(int fd);
+struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
+int eventfd_signal(struct eventfd_ctx *ctx, int n);
 
 #else /* CONFIG_EVENTFD */
 
-#define eventfd_fget(fd) ERR_PTR(-ENOSYS)
-static inline int eventfd_signal(struct file *file, int n)
-{ return 0; }
+/*
+ * Ugly ugly ugly error layer to support modules that uses eventfd but
+ * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO.
+ */
+static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
+static inline int eventfd_signal(struct eventfd_ctx *ctx, int n)
+{
+	return -ENOSYS;
+}
+
+static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+
+}
 
-#endif /* CONFIG_EVENTFD */
+#endif
 
 #endif /* _LINUX_EVENTFD_H */
 
-- 
cgit v1.2.3


From 2a2325e6e8a3782795fb520220c36fd805775972 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 30 Jun 2009 11:41:13 -0700
Subject: gcov: fix __ctors_start alignment

The ctors section for each object file is eight byte aligned (on 64 bit).
However the __ctors_start symbol starts at an arbitrary address dependent
on the size of the previous sections.

Therefore the linker may add some zeroes after __ctors_start to make sure
the ctors contents are properly aligned.  However the extra zeroes at the
beginning aren't expected by the code.  When walking the functions
pointers contained in there and extra zeroes are added this may result in
random jumps.  So make sure that the __ctors_start symbol is always
aligned as well.

Fixes this crash on an allyesconfig on s390:

[    0.582482] Kernel BUG at 0000000000000012 [verbose debug info unavailable]
[    0.582489] illegal operation: 0001 [#1] SMP DEBUG_PAGEALLOC
[    0.582496] Modules linked in:
[    0.582501] CPU: 0 Tainted: G        W  2.6.31-rc1-dirty #273
[    0.582506] Process swapper (pid: 1, task: 000000003f218000, ksp: 000000003f2238e8)
[    0.582510] Krnl PSW : 0704200180000000 0000000000000012 (0x12)
[    0.582518]            R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:0 CC:2 PM:0 EA:3
[    0.582524] Krnl GPRS: 0000000000036727 0000000000000010 0000000000000001 0000000000000001
[    0.582529]            00000000001dfefa 0000000000000000 0000000000000000 0000000000000040
[    0.582534]            0000000001fff0f0 0000000001790628 0000000002296048 0000000002296048
[    0.582540]            00000000020c438e 0000000001786000 0000000002014a66 000000003f223e60
[    0.582553] Krnl Code:>0000000000000012: 0000                unknown
[    0.582559]            0000000000000014: 0000                unknown
[    0.582564]            0000000000000016: 0000                unknown
[    0.582570]            0000000000000018: 0000                unknown
[    0.582575]            000000000000001a: 0000                unknown
[    0.582580]            000000000000001c: 0000                unknown
[    0.582585]            000000000000001e: 0000                unknown
[    0.582591]            0000000000000020: 0000                unknown
[    0.582596] Call Trace:
[    0.582599] ([<0000000002014a46>] kernel_init+0x622/0x7a0)
[    0.582607]  [<0000000000113e22>] kernel_thread_starter+0x6/0xc
[    0.582615]  [<0000000000113e1c>] kernel_thread_starter+0x0/0xc
[    0.582621] INFO: lockdep is turned off.
[    0.582624] Last Breaking-Event-Address:
[    0.582627]  [<0000000002014a64>] kernel_init+0x640/0x7a0

Cc: Peter Oberparleiter <oberpar@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 92b73b6140ff..dccdbed05848 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -441,7 +441,8 @@
 	}
 
 #ifdef CONFIG_CONSTRUCTORS
-#define KERNEL_CTORS()	VMLINUX_SYMBOL(__ctors_start) = .; \
+#define KERNEL_CTORS()	. = ALIGN(8);			   \
+			VMLINUX_SYMBOL(__ctors_start) = .; \
 			*(.ctors)			   \
 			VMLINUX_SYMBOL(__ctors_end) = .;
 #else
-- 
cgit v1.2.3


From b01e8dc34379f4ba2f454390e340a025edbaaa7e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 30 Jun 2009 11:41:18 -0700
Subject: alpha: fix percpu build breakage

alpha percpu access requires custom SHIFT_PERCPU_PTR() definition for
modules to work around addressing range limitation.  This is done via
generating inline assembly using C preprocessing which forces the
assembler to generate external reference.  This happens behind the
compiler's back and makes the compiler think that static percpu variables
in modules are unused.

This used to be worked around by using __unused attribute for percpu
variables which prevent the compiler from omitting the variable; however,
recent declare/definition attribute unification change broke this as
__used can't be used for declaration.  Also, in the process,
PER_CPU_ATTRIBUTES definition in alpha percpu.h got broken.

This patch adds PER_CPU_DEF_ATTRIBUTES which is only used for definitions
and make alpha use it to add __used for percpu variables in modules.  This
also fixes the PER_CPU_ATTRIBUTES double definition bug.

Signed-off-by: Tejun Heo <tj@kernel.org>
Tested-by: maximilian attems <max@stro.at>
Acked-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Richard Henderson <rth@twiddle.net>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/percpu.h | 6 +++---
 include/asm-generic/percpu.h    | 4 ++++
 include/linux/percpu-defs.h     | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/percpu.h b/arch/alpha/include/asm/percpu.h
index 06c5c7a4afd3..b663f1f10b6a 100644
--- a/arch/alpha/include/asm/percpu.h
+++ b/arch/alpha/include/asm/percpu.h
@@ -30,7 +30,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 
 #ifndef MODULE
 #define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
-#define PER_CPU_ATTRIBUTES
+#define PER_CPU_DEF_ATTRIBUTES
 #else
 /*
  * To calculate addresses of locally defined variables, GCC uses 32-bit
@@ -49,7 +49,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 		: "=&r"(__ptr), "=&r"(tmp_gp));		\
 	(typeof(&per_cpu_var(var)))(__ptr + (offset)); })
 
-#define PER_CPU_ATTRIBUTES	__used
+#define PER_CPU_DEF_ATTRIBUTES	__used
 
 #endif /* MODULE */
 
@@ -71,7 +71,7 @@ extern unsigned long __per_cpu_offset[NR_CPUS];
 #define __get_cpu_var(var)		per_cpu_var(var)
 #define __raw_get_cpu_var(var)		per_cpu_var(var)
 
-#define PER_CPU_ATTRIBUTES
+#define PER_CPU_DEF_ATTRIBUTES
 
 #endif /* SMP */
 
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index d7d50d7ee51e..aa00800adacc 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -97,4 +97,8 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_ATTRIBUTES
 #endif
 
+#ifndef PER_CPU_DEF_ATTRIBUTES
+#define PER_CPU_DEF_ATTRIBUTES
+#endif
+
 #endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f921d74f49f..68438e18fff4 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -24,7 +24,8 @@
 
 #define DEFINE_PER_CPU_SECTION(type, name, section)			\
 	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+	PER_CPU_ATTRIBUTES PER_CPU_DEF_ATTRIBUTES			\
+	__typeof__(type) per_cpu__##name
 
 /*
  * Variant on the per-CPU variable declaration/definition theme used for
-- 
cgit v1.2.3


From c4285b47b0514e2103584ee829246f813e7ae323 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Tue, 30 Jun 2009 11:41:21 -0700
Subject: parport/serial: add support for NetMos 9901 Multi-IO card

Add support for the PCI-Express NetMos 9901 Multi-IO card.

0001:06:00.0 Serial controller [0700]: NetMos Technology Device [9710:9901] (prog-if 02 [16550])
        Subsystem: Device [a000:1000]
        Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
        Latency: 0, Cache Line Size: 64 bytes
        Interrupt: pin A routed to IRQ 65
        Region 0: I/O ports at 0030 [size=8]
        Region 1: Memory at 80105000 (32-bit, non-prefetchable) [size=4K]
        Region 4: Memory at 80104000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: <access denied>
        Kernel driver in use: serial
        Kernel modules: 8250_pci

0001:06:00.1 Serial controller [0700]: NetMos Technology Device [9710:9901] (prog-if 02 [16550])
        Subsystem: Device [a000:1000]
        Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
        Latency: 0, Cache Line Size: 64 bytes
        Interrupt: pin B routed to IRQ 65
        Region 0: I/O ports at 0020 [size=8]
        Region 1: Memory at 80103000 (32-bit, non-prefetchable) [size=4K]
        Region 4: Memory at 80102000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: <access denied>
        Kernel driver in use: serial
        Kernel modules: 8250_pci

0001:06:00.2 Parallel controller [0701]: NetMos Technology Device [9710:9901] (prog-if 03 [IEEE1284])
        Subsystem: Device [a000:2000]
        Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
        Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
        Latency: 0, Cache Line Size: 64 bytes
        Interrupt: pin C routed to IRQ 65
        Region 0: I/O ports at 0010 [size=8]
        Region 1: I/O ports at <unassigned>
        Region 2: Memory at 80101000 (32-bit, non-prefetchable) [size=4K]
        Region 4: Memory at 80100000 (32-bit, non-prefetchable) [size=4K]
        Capabilities: <access denied>
        Kernel driver in use: parport_pc
        Kernel modules: parport_pc

[   16.760181] PCI parallel port detected: 416c:0100, I/O at 0x812010(0x0), IRQ 65
[   16.760225] parport0: PC-style at 0x812010, irq 65 [PCSPP,TRISTATE,EPP]
[   16.851842] serial 0001:06:00.0: enabling device (0004 -> 0007)
[   16.883776] 0001:06:00.0: ttyS0 at I/O 0x812030 (irq = 65) is a ST16650V2
[   16.893832] serial 0001:06:00.1: enabling device (0004 -> 0007)
[   16.926537] 0001:06:00.1: ttyS1 at I/O 0x812020 (irq = 65) is a ST16650V2

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/parport_pc.c | 5 ++++-
 drivers/serial/8250_pci.c    | 6 ++++++
 include/linux/pci_ids.h      | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c
index 1032d5fdbd42..2597145a066e 100644
--- a/drivers/parport/parport_pc.c
+++ b/drivers/parport/parport_pc.c
@@ -2907,6 +2907,7 @@ enum parport_pc_pci_cards {
 	netmos_9755,
 	netmos_9805,
 	netmos_9815,
+	netmos_9901,
 	quatech_sppxp100,
 };
 
@@ -2987,7 +2988,7 @@ static struct parport_pc_pci {
 	/* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
 	/* netmos_9805 */               { 1, { { 0, -1 }, } },
 	/* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
-
+	/* netmos_9901 */               { 1, { { 0, -1 }, } },
 	/* quatech_sppxp100 */		{ 1, { { 0, 1 }, } },
 };
 
@@ -3089,6 +3090,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9805 },
 	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9815,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, netmos_9815 },
+	{ PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901,
+	  0xA000, 0x2000, 0, 0, netmos_9901 },
 	/* Quatech SPPXP-100 Parallel port PCI ExpressCard */
 	{ PCI_VENDOR_ID_QUATECH, PCI_DEVICE_ID_QUATECH_SPPXP_100,
 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c
index a07015d646dd..6160e03f410c 100644
--- a/drivers/serial/8250_pci.c
+++ b/drivers/serial/8250_pci.c
@@ -759,6 +759,8 @@ static int pci_netmos_init(struct pci_dev *dev)
 	/* subdevice 0x00PS means <P> parallel, <S> serial */
 	unsigned int num_serial = dev->subsystem_device & 0xf;
 
+	if (dev->device == PCI_DEVICE_ID_NETMOS_9901)
+		return 0;
 	if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM &&
 			dev->subsystem_device == 0x0299)
 		return 0;
@@ -3557,6 +3559,10 @@ static struct pci_device_id serial_pci_tbl[] = {
 		PCI_VENDOR_ID_IBM, 0x0299,
 		0, 0, pbn_b0_bt_2_115200 },
 
+	{	PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9901,
+		0xA000, 0x1000,
+		0, 0, pbn_b0_1_115200 },
+
 	/*
 	 * These entries match devices with class COMMUNICATION_SERIAL,
 	 * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a3b000365795..73b46b6b904f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2645,6 +2645,7 @@
 #define PCI_DEVICE_ID_NETMOS_9835	0x9835
 #define PCI_DEVICE_ID_NETMOS_9845	0x9845
 #define PCI_DEVICE_ID_NETMOS_9855	0x9855
+#define PCI_DEVICE_ID_NETMOS_9901	0x9901
 
 #define PCI_VENDOR_ID_3COM_2		0xa727
 
-- 
cgit v1.2.3


From 341c87bf346f57748230628c5ad6ee69219250e8 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 30 Jun 2009 11:41:23 -0700
Subject: elf: limit max map count to safe value

With ELF, at generating coredump, some more headers other than used
vmas are added.

When max_map_count == 65536, a core generated by following kinds of
code can be unreadable because the number of ELF's program header is
written in 16bit in Ehdr (please see elf.h) and the number overflows.

==
	... = mmap(); (munmap, mprotect, etc...)
	if (failed)
		abort();
==

This can happen in mmap/munmap/mprotect/etc...which calls split_vma().

I think 65536 is not safe as _default_ and reduce it to 65530 is good
for avoiding unexpected corrupted core.

Anyway, max_map_count can be enlarged by sysctl if a user is brave..

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Jakub Jelinek <jakub@redhat.com>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       |  5 ++++-
 include/linux/sched.h | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9fa212b014a5..f1867900e459 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1929,7 +1929,10 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 	elf = kmalloc(sizeof(*elf), GFP_KERNEL);
 	if (!elf)
 		goto out;
-	
+	/*
+	 * The number of segs are recored into ELF header as 16bit value.
+	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
+	 */
 	segs = current->mm->map_count;
 #ifdef ELF_CORE_EXTRA_PHDRS
 	segs += ELF_CORE_EXTRA_PHDRS;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4d0754269884..0085d758d645 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -349,8 +349,20 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
 struct nsproxy;
 struct user_namespace;
 
-/* Maximum number of active map areas.. This is a random (large) number */
-#define DEFAULT_MAX_MAP_COUNT	65536
+/*
+ * Default maximum number of active map areas, this limits the number of vmas
+ * per mm struct. Users can overwrite this number by sysctl but there is a
+ * problem.
+ *
+ * When a program's coredump is generated as ELF format, a section is created
+ * per a vma. In ELF, the number of sections is represented in unsigned short.
+ * This means the number of sections should be smaller than 65535 at coredump.
+ * Because the kernel adds some informative sections to a image of program at
+ * generating coredump, we need some margin. The number of extra sections is
+ * 1-3 now and depends on arch. We use "5" as safe margin, here.
+ */
+#define MAPCOUNT_ELF_CORE_MARGIN	(5)
+#define DEFAULT_MAX_MAP_COUNT	(USHORT_MAX - MAPCOUNT_ELF_CORE_MARGIN)
 
 extern int sysctl_max_map_count;
 
-- 
cgit v1.2.3


From b55f627feeb9d48fdbde3835e18afbc76712e49b Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 30 Jun 2009 11:41:26 -0700
Subject: spi: new spi->mode bits

Add two new spi_device.mode bits to accomodate more protocol options, and
pass them through to usermode drivers:

 * SPI_NO_CS ... a second 3-wire variant, where the chipselect
   line is removed instead of a data line; transfers are still
   full duplex.

   This obviously has STRONG protocol implications since the
   chipselect transitions can't be used to synchronize state
   transitions with the SPI master.

 * SPI_READY ... defines open drain signal that's pulled low
   to pause the clock.  This defines a 5-wire variant (normal
   4-wire SPI plus READY) and two 4-wire variants (READY plus
   each of the 3-wire flavors).

   Such hardware flow control can be a big win.  There are ADC
   converters and flash chips that expose READY signals, but not
   many host controllers support it today.

The spi_bitbang code should be changed to use SPI_NO_CS instead of its
current nonportable hack.  That's a mode most hardware can easily support
(unlike SPI_READY).

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: "Paulraj, Sandeep" <s-paulraj@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/spi/spidev_test.c | 10 +++++++++-
 drivers/spi/spidev.c            | 17 +++++++++++------
 include/linux/spi/spi.h         |  2 ++
 include/linux/spi/spidev.h      |  2 ++
 4 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
index cf0e3ce0d526..c1a5aad3c75a 100644
--- a/Documentation/spi/spidev_test.c
+++ b/Documentation/spi/spidev_test.c
@@ -99,11 +99,13 @@ void parse_opts(int argc, char *argv[])
 			{ "lsb",     0, 0, 'L' },
 			{ "cs-high", 0, 0, 'C' },
 			{ "3wire",   0, 0, '3' },
+			{ "no-cs",   0, 0, 'N' },
+			{ "ready",   0, 0, 'R' },
 			{ NULL, 0, 0, 0 },
 		};
 		int c;
 
-		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3", lopts, NULL);
+		c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR", lopts, NULL);
 
 		if (c == -1)
 			break;
@@ -139,6 +141,12 @@ void parse_opts(int argc, char *argv[])
 		case '3':
 			mode |= SPI_3WIRE;
 			break;
+		case 'N':
+			mode |= SPI_NO_CS;
+			break;
+		case 'R':
+			mode |= SPI_READY;
+			break;
 		default:
 			print_usage(argv[0]);
 			break;
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 5d869c4d3eb2..606e7a40a8da 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -58,15 +58,20 @@ static unsigned long	minors[N_SPI_MINORS / BITS_PER_LONG];
 
 
 /* Bit masks for spi_device.mode management.  Note that incorrect
- * settings for CS_HIGH and 3WIRE can cause *lots* of trouble for other
- * devices on a shared bus:  CS_HIGH, because this device will be
- * active when it shouldn't be;  3WIRE, because when active it won't
- * behave as it should.
+ * settings for some settings can cause *lots* of trouble for other
+ * devices on a shared bus:
  *
- * REVISIT should changing those two modes be privileged?
+ *  - CS_HIGH ... this device will be active when it shouldn't be
+ *  - 3WIRE ... when active, it won't behave as it should
+ *  - NO_CS ... there will be no explicit message boundaries; this
+ *	is completely incompatible with the shared bus model
+ *  - READY ... transfers may proceed when they shouldn't.
+ *
+ * REVISIT should changing those flags be privileged?
  */
 #define SPI_MODE_MASK		(SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \
-				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP)
+				| SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \
+				| SPI_NO_CS | SPI_READY)
 
 struct spidev_data {
 	dev_t			devt;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 9c4cd27f4685..743c933ac4e7 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -80,6 +80,8 @@ struct spi_device {
 #define	SPI_LSB_FIRST	0x08			/* per-word bits-on-wire */
 #define	SPI_3WIRE	0x10			/* SI/SO signals shared */
 #define	SPI_LOOP	0x20			/* loopback mode */
+#define	SPI_NO_CS	0x40			/* 1 dev/bus, no chipselect */
+#define	SPI_READY	0x80			/* slave pulls low to pause */
 	u8			bits_per_word;
 	int			irq;
 	void			*controller_state;
diff --git a/include/linux/spi/spidev.h b/include/linux/spi/spidev.h
index 95251ccd5a07..bf0570a84f7a 100644
--- a/include/linux/spi/spidev.h
+++ b/include/linux/spi/spidev.h
@@ -40,6 +40,8 @@
 #define SPI_LSB_FIRST		0x08
 #define SPI_3WIRE		0x10
 #define SPI_LOOP		0x20
+#define SPI_NO_CS		0x40
+#define SPI_READY		0x80
 
 /*---------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 70d6027ff2bc8bab180273b77e7ab3e8a62cca51 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 30 Jun 2009 11:41:27 -0700
Subject: spi: add spi_master flag word

Add a new spi_master.flags word listing constraints relevant to that
controller.  Define the first constraint bit: a half duplex restriction.
Include that constraint in the OMAP1 MicroWire controller driver.

Have the mmc_spi host be the first customer of this flag.  Its coding
relies heavily on full duplex transfers, so it must fail when the
underlying controller driver won't perform them.

(The spi_write_then_read routine could use it too: use the
temporarily-withdrawn full-duplex speedup unless this flag is set, in
which case the existing code applies.  Similarly, any spi_master
implementing only SPI_3WIRE should set the flag.)

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/host/mmc_spi.c | 6 ++++++
 drivers/spi/omap_uwire.c   | 2 ++
 include/linux/spi/spi.h    | 4 ++++
 3 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 240608cc7ae9..a461017ce5ce 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1313,6 +1313,12 @@ static int mmc_spi_probe(struct spi_device *spi)
 	struct mmc_spi_host	*host;
 	int			status;
 
+	/* We rely on full duplex transfers, mostly to reduce
+	 * per-transfer overheads (by making fewer transfers).
+	 */
+	if (spi->master->flags & SPI_MASTER_HALF_DUPLEX)
+		return -EINVAL;
+
 	/* MMC and SD specs only seem to care that sampling is on the
 	 * rising edge ... meaning SPI modes 0 or 3.  So either SPI mode
 	 * should be legit.  We'll use mode 0 since the steady state is 0,
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index aa90ddb37066..8980a5640bd9 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -514,6 +514,8 @@ static int __init uwire_probe(struct platform_device *pdev)
 	/* the spi->mode bits understood by this driver: */
 	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
 
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+
 	master->bus_num = 2;	/* "official" */
 	master->num_chipselect = 4;
 	master->setup = uwire_setup;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 743c933ac4e7..c47c4b4da97e 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -250,6 +250,10 @@ struct spi_master {
 	/* spi_device.mode flags understood by this controller driver */
 	u16			mode_bits;
 
+	/* other constraints relevant to this driver */
+	u16			flags;
+#define SPI_MASTER_HALF_DUPLEX	BIT(0)		/* can't do full duplex */
+
 	/* Setup mode and clock, etc (spi driver may call many times).
 	 *
 	 * IMPORTANT:  this may be called when transfers to another
-- 
cgit v1.2.3


From 537a1bf059fa312355696fa6db80726e655e7f17 Mon Sep 17 00:00:00 2001
From: Krzysztof Helt <krzysztof.h1@wp.pl>
Date: Tue, 30 Jun 2009 11:41:29 -0700
Subject: fbdev: add mutex for fb_mmap locking

Add a mutex to avoid a circular locking problem between the mm layer
semaphore and fbdev ioctl mutex through the fb_mmap() call.

Also, add mutex to all places where smem_start and smem_len fields change
so the mutex inside the fb_mmap() is actually used.  Changing of these
fields before calling the framebuffer_register() are not mutexed.

This is 2.6.31 material.  It removes one lockdep (fb_mmap() and
register_framebuffer()) but there is still another one (fb_release() and
register_framebuffer()).  It also cleans up handling of the smem_start and
smem_len fields used by mutexed section of the fb_mmap().

Signed-off-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/atafb.c                 |  7 ++++++-
 drivers/video/atmel_lcdfb.c           |  2 ++
 drivers/video/fbmem.c                 | 13 +++++--------
 drivers/video/fsl-diu-fb.c            | 14 +++++++++-----
 drivers/video/i810/i810_main.c        |  2 ++
 drivers/video/matrox/matroxfb_base.c  |  3 +++
 drivers/video/matrox/matroxfb_crtc2.c |  5 ++++-
 drivers/video/mx3fb.c                 | 17 +++++++++++------
 drivers/video/omap/omapfb_main.c      |  4 ++++
 drivers/video/platinumfb.c            |  2 ++
 drivers/video/pxafb.c                 |  2 ++
 drivers/video/sh7760fb.c              | 19 ++++++-------------
 drivers/video/sis/sis_main.c          |  2 ++
 drivers/video/sm501fb.c               | 21 +++++++++++++--------
 drivers/video/w100fb.c                |  2 ++
 include/linux/fb.h                    |  1 +
 16 files changed, 74 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/video/atafb.c b/drivers/video/atafb.c
index 018850c116c6..497ff8af03ed 100644
--- a/drivers/video/atafb.c
+++ b/drivers/video/atafb.c
@@ -2414,7 +2414,10 @@ static int atafb_get_fix(struct fb_fix_screeninfo *fix, struct fb_info *info)
 	if (err)
 		return err;
 	memset(fix, 0, sizeof(struct fb_fix_screeninfo));
-	return fbhw->encode_fix(fix, &par);
+	mutex_lock(&info->mm_lock);
+	err = fbhw->encode_fix(fix, &par);
+	mutex_unlock(&info->mm_lock);
+	return err;
 }
 
 static int atafb_get_var(struct fb_var_screeninfo *var, struct fb_info *info)
@@ -2743,7 +2746,9 @@ static int atafb_set_par(struct fb_info *info)
 
 	/* Decode wanted screen parameters */
 	fbhw->decode_var(&info->var, par);
+	mutex_lock(&info->mm_lock);
 	fbhw->encode_fix(&info->fix, par);
+	mutex_unlock(&info->mm_lock);
 
 	/* Set new videomode */
 	ata_set_par(par);
diff --git a/drivers/video/atmel_lcdfb.c b/drivers/video/atmel_lcdfb.c
index 5afd64482f55..cb88394ba995 100644
--- a/drivers/video/atmel_lcdfb.c
+++ b/drivers/video/atmel_lcdfb.c
@@ -270,7 +270,9 @@ static int atmel_lcdfb_alloc_video_memory(struct atmel_lcdfb_info *sinfo)
 
 	smem_len = (var->xres_virtual * var->yres_virtual
 		    * ((var->bits_per_pixel + 7) / 8));
+	mutex_lock(&info->mm_lock);
 	info->fix.smem_len = max(smem_len, sinfo->smem_len);
+	mutex_unlock(&info->mm_lock);
 
 	info->screen_base = dma_alloc_writecombine(info->device, info->fix.smem_len,
 					(dma_addr_t *)&info->fix.smem_start, GFP_KERNEL);
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index f8a09bf8d0cd..53ea05645ff8 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1310,8 +1310,6 @@ static long fb_compat_ioctl(struct file *file, unsigned int cmd,
 
 static int
 fb_mmap(struct file *file, struct vm_area_struct * vma)
-__acquires(&info->lock)
-__releases(&info->lock)
 {
 	int fbidx = iminor(file->f_path.dentry->d_inode);
 	struct fb_info *info = registered_fb[fbidx];
@@ -1325,16 +1323,14 @@ __releases(&info->lock)
 	off = vma->vm_pgoff << PAGE_SHIFT;
 	if (!fb)
 		return -ENODEV;
+	mutex_lock(&info->mm_lock);
 	if (fb->fb_mmap) {
 		int res;
-		mutex_lock(&info->lock);
 		res = fb->fb_mmap(info, vma);
-		mutex_unlock(&info->lock);
+		mutex_unlock(&info->mm_lock);
 		return res;
 	}
 
-	mutex_lock(&info->lock);
-
 	/* frame buffer memory */
 	start = info->fix.smem_start;
 	len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.smem_len);
@@ -1342,13 +1338,13 @@ __releases(&info->lock)
 		/* memory mapped io */
 		off -= len;
 		if (info->var.accel_flags) {
-			mutex_unlock(&info->lock);
+			mutex_unlock(&info->mm_lock);
 			return -EINVAL;
 		}
 		start = info->fix.mmio_start;
 		len = PAGE_ALIGN((start & ~PAGE_MASK) + info->fix.mmio_len);
 	}
-	mutex_unlock(&info->lock);
+	mutex_unlock(&info->mm_lock);
 	start &= PAGE_MASK;
 	if ((vma->vm_end - vma->vm_start + off) > len)
 		return -EINVAL;
@@ -1518,6 +1514,7 @@ register_framebuffer(struct fb_info *fb_info)
 			break;
 	fb_info->node = i;
 	mutex_init(&fb_info->lock);
+	mutex_init(&fb_info->mm_lock);
 
 	fb_info->dev = device_create(fb_class, fb_info->device,
 				     MKDEV(FB_MAJOR, i), NULL, "fb%d", i);
diff --git a/drivers/video/fsl-diu-fb.c b/drivers/video/fsl-diu-fb.c
index f153c581cbd7..0bf2190928d0 100644
--- a/drivers/video/fsl-diu-fb.c
+++ b/drivers/video/fsl-diu-fb.c
@@ -750,24 +750,26 @@ static void update_lcdc(struct fb_info *info)
 static int map_video_memory(struct fb_info *info)
 {
 	phys_addr_t phys;
+	u32 smem_len = info->fix.line_length * info->var.yres_virtual;
 
 	pr_debug("info->var.xres_virtual = %d\n", info->var.xres_virtual);
 	pr_debug("info->var.yres_virtual = %d\n", info->var.yres_virtual);
 	pr_debug("info->fix.line_length  = %d\n", info->fix.line_length);
+	pr_debug("MAP_VIDEO_MEMORY: smem_len = %u\n", smem_len);
 
-	info->fix.smem_len = info->fix.line_length * info->var.yres_virtual;
-	pr_debug("MAP_VIDEO_MEMORY: smem_len = %d\n", info->fix.smem_len);
-	info->screen_base = fsl_diu_alloc(info->fix.smem_len, &phys);
+	info->screen_base = fsl_diu_alloc(smem_len, &phys);
 	if (info->screen_base == NULL) {
 		printk(KERN_ERR "Unable to allocate fb memory\n");
 		return -ENOMEM;
 	}
+	mutex_lock(&info->mm_lock);
 	info->fix.smem_start = (unsigned long) phys;
+	info->fix.smem_len = smem_len;
+	mutex_unlock(&info->mm_lock);
 	info->screen_size = info->fix.smem_len;
 
 	pr_debug("Allocated fb @ paddr=0x%08lx, size=%d.\n",
-				info->fix.smem_start,
-		info->fix.smem_len);
+		 info->fix.smem_start, info->fix.smem_len);
 	pr_debug("screen base %p\n", info->screen_base);
 
 	return 0;
@@ -776,9 +778,11 @@ static int map_video_memory(struct fb_info *info)
 static void unmap_video_memory(struct fb_info *info)
 {
 	fsl_diu_free(info->screen_base, info->fix.smem_len);
+	mutex_lock(&info->mm_lock);
 	info->screen_base = NULL;
 	info->fix.smem_start = 0;
 	info->fix.smem_len = 0;
+	mutex_unlock(&info->mm_lock);
 }
 
 /*
diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c
index 2e940199fc89..71960672d721 100644
--- a/drivers/video/i810/i810_main.c
+++ b/drivers/video/i810/i810_main.c
@@ -1090,8 +1090,10 @@ static int encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info)
     	memset(fix, 0, sizeof(struct fb_fix_screeninfo));
 
     	strcpy(fix->id, "I810");
+	mutex_lock(&info->mm_lock);
     	fix->smem_start = par->fb.physical;
     	fix->smem_len = par->fb.size;
+	mutex_unlock(&info->mm_lock);
     	fix->type = FB_TYPE_PACKED_PIXELS;
     	fix->type_aux = 0;
 	fix->xpanstep = 8;
diff --git a/drivers/video/matrox/matroxfb_base.c b/drivers/video/matrox/matroxfb_base.c
index 8e7a275df50c..59c3a2e14913 100644
--- a/drivers/video/matrox/matroxfb_base.c
+++ b/drivers/video/matrox/matroxfb_base.c
@@ -724,8 +724,10 @@ static void matroxfb_update_fix(WPMINFO2)
 	struct fb_fix_screeninfo *fix = &ACCESS_FBINFO(fbcon).fix;
 	DBG(__func__)
 
+	mutex_lock(&ACCESS_FBINFO(fbcon).mm_lock);
 	fix->smem_start = ACCESS_FBINFO(video.base) + ACCESS_FBINFO(curr.ydstorg.bytes);
 	fix->smem_len = ACCESS_FBINFO(video.len_usable) - ACCESS_FBINFO(curr.ydstorg.bytes);
+	mutex_unlock(&ACCESS_FBINFO(fbcon).mm_lock);
 }
 
 static int matroxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
@@ -2081,6 +2083,7 @@ static int matroxfb_probe(struct pci_dev* pdev, const struct pci_device_id* dumm
 	spin_lock_init(&ACCESS_FBINFO(lock.accel));
 	init_rwsem(&ACCESS_FBINFO(crtc2.lock));
 	init_rwsem(&ACCESS_FBINFO(altout.lock));
+	mutex_init(&ACCESS_FBINFO(fbcon).mm_lock);
 	ACCESS_FBINFO(irq_flags) = 0;
 	init_waitqueue_head(&ACCESS_FBINFO(crtc1.vsync.wait));
 	init_waitqueue_head(&ACCESS_FBINFO(crtc2.vsync.wait));
diff --git a/drivers/video/matrox/matroxfb_crtc2.c b/drivers/video/matrox/matroxfb_crtc2.c
index 7ac4c5f6145d..909e10a11898 100644
--- a/drivers/video/matrox/matroxfb_crtc2.c
+++ b/drivers/video/matrox/matroxfb_crtc2.c
@@ -289,13 +289,16 @@ static int matroxfb_dh_release(struct fb_info* info, int user) {
 #undef m2info
 }
 
-static void matroxfb_dh_init_fix(struct matroxfb_dh_fb_info *m2info) {
+static void matroxfb_dh_init_fix(struct matroxfb_dh_fb_info *m2info)
+{
 	struct fb_fix_screeninfo *fix = &m2info->fbcon.fix;
 
 	strcpy(fix->id, "MATROX DH");
 
+	mutex_lock(&m2info->fbcon.mm_lock);
 	fix->smem_start = m2info->video.base;
 	fix->smem_len = m2info->video.len_usable;
+	mutex_unlock(&m2info->fbcon.mm_lock);
 	fix->ypanstep = 1;
 	fix->ywrapstep = 0;
 	fix->xpanstep = 8;	/* TBD */
diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c
index b7af5256e887..567fb944bd2a 100644
--- a/drivers/video/mx3fb.c
+++ b/drivers/video/mx3fb.c
@@ -669,7 +669,7 @@ static uint32_t bpp_to_pixfmt(int bpp)
 }
 
 static int mx3fb_blank(int blank, struct fb_info *fbi);
-static int mx3fb_map_video_memory(struct fb_info *fbi);
+static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len);
 static int mx3fb_unmap_video_memory(struct fb_info *fbi);
 
 /**
@@ -742,8 +742,7 @@ static int mx3fb_set_par(struct fb_info *fbi)
 		if (fbi->fix.smem_start)
 			mx3fb_unmap_video_memory(fbi);
 
-		fbi->fix.smem_len = mem_len;
-		if (mx3fb_map_video_memory(fbi) < 0) {
+		if (mx3fb_map_video_memory(fbi, mem_len) < 0) {
 			mutex_unlock(&mx3_fbi->mutex);
 			return -ENOMEM;
 		}
@@ -1198,6 +1197,7 @@ static int mx3fb_resume(struct platform_device *pdev)
 /**
  * mx3fb_map_video_memory() - allocates the DRAM memory for the frame buffer.
  * @fbi:	framebuffer information pointer
+ * @mem_len:	length of mapped memory
  * @return:	Error code indicating success or failure
  *
  * This buffer is remapped into a non-cached, non-buffered, memory region to
@@ -1205,23 +1205,26 @@ static int mx3fb_resume(struct platform_device *pdev)
  * area is remapped, all virtual memory access to the video memory should occur
  * at the new region.
  */
-static int mx3fb_map_video_memory(struct fb_info *fbi)
+static int mx3fb_map_video_memory(struct fb_info *fbi, unsigned int mem_len)
 {
 	int retval = 0;
 	dma_addr_t addr;
 
 	fbi->screen_base = dma_alloc_writecombine(fbi->device,
-						  fbi->fix.smem_len,
+						  mem_len,
 						  &addr, GFP_DMA);
 
 	if (!fbi->screen_base) {
 		dev_err(fbi->device, "Cannot allocate %u bytes framebuffer memory\n",
-			fbi->fix.smem_len);
+			mem_len);
 		retval = -EBUSY;
 		goto err0;
 	}
 
+	mutex_lock(&fbi->mm_lock);
 	fbi->fix.smem_start = addr;
+	fbi->fix.smem_len = mem_len;
+	mutex_unlock(&fbi->mm_lock);
 
 	dev_dbg(fbi->device, "allocated fb @ p=0x%08x, v=0x%p, size=%d.\n",
 		(uint32_t) fbi->fix.smem_start, fbi->screen_base, fbi->fix.smem_len);
@@ -1251,8 +1254,10 @@ static int mx3fb_unmap_video_memory(struct fb_info *fbi)
 			      fbi->screen_base, fbi->fix.smem_start);
 
 	fbi->screen_base = 0;
+	mutex_lock(&fbi->mm_lock);
 	fbi->fix.smem_start = 0;
 	fbi->fix.smem_len = 0;
+	mutex_unlock(&fbi->mm_lock);
 	return 0;
 }
 
diff --git a/drivers/video/omap/omapfb_main.c b/drivers/video/omap/omapfb_main.c
index 060d72fe57cb..4ea99bfc37b4 100644
--- a/drivers/video/omap/omapfb_main.c
+++ b/drivers/video/omap/omapfb_main.c
@@ -393,8 +393,10 @@ static void set_fb_fix(struct fb_info *fbi)
 
 	rg = &plane->fbdev->mem_desc.region[plane->idx];
 	fbi->screen_base	= rg->vaddr;
+	mutex_lock(&fbi->mm_lock);
 	fix->smem_start		= rg->paddr;
 	fix->smem_len		= rg->size;
+	mutex_unlock(&fbi->mm_lock);
 
 	fix->type = FB_TYPE_PACKED_PIXELS;
 	bpp = var->bits_per_pixel;
@@ -886,8 +888,10 @@ static int omapfb_setup_mem(struct fb_info *fbi, struct omapfb_mem_info *mi)
 				 * plane memory is dealloce'd, the other
 				 * screen parameters in var / fix are invalid.
 				 */
+				mutex_lock(&fbi->mm_lock);
 				fbi->fix.smem_start = 0;
 				fbi->fix.smem_len = 0;
+				mutex_unlock(&fbi->mm_lock);
 			}
 		}
 	}
diff --git a/drivers/video/platinumfb.c b/drivers/video/platinumfb.c
index 03b3670130a0..bacfabd9ce16 100644
--- a/drivers/video/platinumfb.c
+++ b/drivers/video/platinumfb.c
@@ -141,7 +141,9 @@ static int platinumfb_set_par (struct fb_info *info)
   		offset = 0x10;
 
 	info->screen_base = pinfo->frame_buffer + init->fb_offset + offset;
+	mutex_lock(&info->mm_lock);
 	info->fix.smem_start = (pinfo->frame_buffer_phys) + init->fb_offset + offset;
+	mutex_unlock(&info->mm_lock);
 	info->fix.visual = (pinfo->cmode == CMODE_8) ?
 		FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_DIRECTCOLOR;
  	info->fix.line_length = vmode_attrs[pinfo->vmode-1].hres * (1<<pinfo->cmode)
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 0889d50c3288..6506117c134b 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -815,8 +815,10 @@ static int overlayfb_map_video_memory(struct pxafb_layer *ofb)
 	ofb->video_mem_phys = virt_to_phys(ofb->video_mem);
 	ofb->video_mem_size = size;
 
+	mutex_lock(&ofb->fb.mm_lock);
 	ofb->fb.fix.smem_start	= ofb->video_mem_phys;
 	ofb->fb.fix.smem_len	= ofb->fb.fix.line_length * var->yres_virtual;
+	mutex_unlock(&ofb->fb.mm_lock);
 	ofb->fb.screen_base	= ofb->video_mem;
 	return 0;
 }
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
index 653bdfee3057..9f6d6e61f0cc 100644
--- a/drivers/video/sh7760fb.c
+++ b/drivers/video/sh7760fb.c
@@ -120,18 +120,6 @@ static int sh7760_setcolreg (u_int regno,
 	return 0;
 }
 
-static void encode_fix(struct fb_fix_screeninfo *fix, struct fb_info *info,
-		       unsigned long stride)
-{
-	memset(fix, 0, sizeof(struct fb_fix_screeninfo));
-	strcpy(fix->id, "sh7760-lcdc");
-
-	fix->smem_start = (unsigned long)info->screen_base;
-	fix->smem_len = info->screen_size;
-
-	fix->line_length = stride;
-}
-
 static int sh7760fb_get_color_info(struct device *dev,
 				   u16 lddfr, int *bpp, int *gray)
 {
@@ -334,7 +322,8 @@ static int sh7760fb_set_par(struct fb_info *info)
 
 	iowrite32(ldsarl, par->base + LDSARL);	/* mem for lower half of DSTN */
 
-	encode_fix(&info->fix, info, stride);
+	info->fix.line_length = stride;
+
 	sh7760fb_check_var(&info->var, info);
 
 	sh7760fb_blank(FB_BLANK_UNBLANK, info);	/* panel on! */
@@ -435,6 +424,8 @@ static int sh7760fb_alloc_mem(struct fb_info *info)
 
 	info->screen_base = fbmem;
 	info->screen_size = vram;
+	info->fix.smem_start = (unsigned long)info->screen_base;
+	info->fix.smem_len = info->screen_size;
 
 	return 0;
 }
@@ -520,6 +511,8 @@ static int __devinit sh7760fb_probe(struct platform_device *pdev)
 	info->var.transp.length = 0;
 	info->var.transp.msb_right = 0;
 
+	strcpy(info->fix.id, "sh7760-lcdc");
+
 	/* set the DON2 bit now, before cmap allocation, as it will randomize
 	 * palette memory.
 	 */
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index 7072d19080d5..fd33455389b8 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -1847,8 +1847,10 @@ sisfb_get_fix(struct fb_fix_screeninfo *fix, int con, struct fb_info *info)
 
 	strcpy(fix->id, ivideo->myid);
 
+	mutex_lock(&info->mm_lock);
 	fix->smem_start  = ivideo->video_base + ivideo->video_offset;
 	fix->smem_len    = ivideo->sisfb_mem;
+	mutex_unlock(&info->mm_lock);
 	fix->type        = FB_TYPE_PACKED_PIXELS;
 	fix->type_aux    = 0;
 	fix->visual      = (ivideo->video_bpp == 8) ? FB_VISUAL_PSEUDOCOLOR : FB_VISUAL_TRUECOLOR;
diff --git a/drivers/video/sm501fb.c b/drivers/video/sm501fb.c
index eb5d73a06702..98f24f0ec00d 100644
--- a/drivers/video/sm501fb.c
+++ b/drivers/video/sm501fb.c
@@ -145,7 +145,7 @@ static inline void sm501fb_sync_regs(struct sm501fb_info *info)
 #define SM501_MEMF_ACCEL		(8)
 
 static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
-			   unsigned int why, size_t size)
+			   unsigned int why, size_t size, u32 smem_len)
 {
 	struct sm501fb_par *par;
 	struct fb_info *fbi;
@@ -172,7 +172,7 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 		if (ptr > 0)
 			ptr &= ~(PAGE_SIZE - 1);
 
-		if (fbi && ptr < fbi->fix.smem_len)
+		if (fbi && ptr < smem_len)
 			return -ENOMEM;
 
 		break;
@@ -197,7 +197,7 @@ static int sm501_alloc_mem(struct sm501fb_info *inf, struct sm501_mem *mem,
 
 	case SM501_MEMF_ACCEL:
 		fbi = inf->fb[HEAD_CRT];
-		ptr = fbi ? fbi->fix.smem_len : 0;
+		ptr = fbi ? smem_len : 0;
 
 		fbi = inf->fb[HEAD_PANEL];
 		if (fbi) {
@@ -413,6 +413,7 @@ static int sm501fb_set_par_common(struct fb_info *info,
 	unsigned int mem_type;
 	unsigned int clock_type;
 	unsigned int head_addr;
+	unsigned int smem_len;
 
 	dev_dbg(fbi->dev, "%s: %dx%d, bpp = %d, virtual %dx%d\n",
 		__func__, var->xres, var->yres, var->bits_per_pixel,
@@ -453,18 +454,20 @@ static int sm501fb_set_par_common(struct fb_info *info,
 
 	/* allocate fb memory within 501 */
 	info->fix.line_length = (var->xres_virtual * var->bits_per_pixel)/8;
-	info->fix.smem_len    = info->fix.line_length * var->yres_virtual;
+	smem_len = info->fix.line_length * var->yres_virtual;
 
 	dev_dbg(fbi->dev, "%s: line length = %u\n", __func__,
 		info->fix.line_length);
 
-	if (sm501_alloc_mem(fbi, &par->screen, mem_type,
-			    info->fix.smem_len)) {
+	if (sm501_alloc_mem(fbi, &par->screen, mem_type, smem_len, smem_len)) {
 		dev_err(fbi->dev, "no memory available\n");
 		return -ENOMEM;
 	}
 
+	mutex_lock(&info->mm_lock);
 	info->fix.smem_start = fbi->fbmem_res->start + par->screen.sm_addr;
+	info->fix.smem_len   = smem_len;
+	mutex_unlock(&info->mm_lock);
 
 	info->screen_base = fbi->fbmem + par->screen.sm_addr;
 	info->screen_size = info->fix.smem_len;
@@ -637,7 +640,8 @@ static int sm501fb_set_par_crt(struct fb_info *info)
 	if ((control & SM501_DC_CRT_CONTROL_SEL) == 0) {
 		/* the head is displaying panel data... */
 
-		sm501_alloc_mem(fbi, &par->screen, SM501_MEMF_CRT, 0);
+		sm501_alloc_mem(fbi, &par->screen, SM501_MEMF_CRT, 0,
+				info->fix.smem_len);
 		goto out_update;
 	}
 
@@ -1289,7 +1293,8 @@ static int sm501_init_cursor(struct fb_info *fbi, unsigned int reg_base)
 
 	par->cursor_regs = info->regs + reg_base;
 
-	ret = sm501_alloc_mem(info, &par->cursor, SM501_MEMF_CURSOR, 1024);
+	ret = sm501_alloc_mem(info, &par->cursor, SM501_MEMF_CURSOR, 1024,
+			      fbi->fix.smem_len);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/video/w100fb.c b/drivers/video/w100fb.c
index d0674f1e3f10..8a141c2c637b 100644
--- a/drivers/video/w100fb.c
+++ b/drivers/video/w100fb.c
@@ -523,6 +523,7 @@ static int w100fb_set_par(struct fb_info *info)
 		info->fix.ywrapstep = 0;
 		info->fix.line_length = par->xres * BITS_PER_PIXEL / 8;
 
+		mutex_lock(&info->mm_lock);
 		if ((par->xres*par->yres*BITS_PER_PIXEL/8) > (MEM_INT_SIZE+1)) {
 			par->extmem_active = 1;
 			info->fix.smem_len = par->mach->mem->size+1;
@@ -530,6 +531,7 @@ static int w100fb_set_par(struct fb_info *info)
 			par->extmem_active = 0;
 			info->fix.smem_len = MEM_INT_SIZE+1;
 		}
+		mutex_unlock(&info->mm_lock);
 
 		w100fb_activate_var(par);
 	}
diff --git a/include/linux/fb.h b/include/linux/fb.h
index dd68358996b7..f847df9e99b6 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -819,6 +819,7 @@ struct fb_info {
 	int node;
 	int flags;
 	struct mutex lock;		/* Lock for open/release/ioctl funcs */
+	struct mutex mm_lock;		/* Lock for fb_mmap and smem_* fields */
 	struct fb_var_screeninfo var;	/* Current var */
 	struct fb_fix_screeninfo fix;	/* Current fix */
 	struct fb_monspecs monspecs;	/* Current Monitor specs */
-- 
cgit v1.2.3


From d9d62f3f2c6fa609883714f6fd6cd710a83d307f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 29 Jun 2009 16:54:12 +0000
Subject: usbnet: Remove private stats structure

Now that nothing uses the private stats structure we can remove it.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/usb/usbnet.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 5d44059f6d63..310e18a880ff 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -42,7 +42,6 @@ struct usbnet {
 
 	/* protocol/interface state */
 	struct net_device	*net;
-	struct net_device_stats	stats;
 	int			msg_enable;
 	unsigned long		data [5];
 	u32			xid;
-- 
cgit v1.2.3


From 7878cba9f0037f5599004b03a1260b32d9050360 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Jun 2009 15:37:49 +0200
Subject: block: Create bip slabs with embedded integrity vectors

This patch restores stacking ability to the block layer integrity
infrastructure by creating a set of dedicated bip slabs.  Each bip slab
has an embedded bio_vec array at the end.  This cuts down on memory
allocations and also simplifies the code compared to the original bvec
version.  Only the largest bip slab is backed by a mempool.  The pool is
contained in the bio_set so stacking drivers can ensure forward
progress.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@carl.(none)>
---
 block/blk-core.c    |   2 +-
 drivers/md/dm.c     |   4 +-
 fs/bio-integrity.c  | 170 ++++++++++++++++++++++++++++++++++++++--------------
 fs/bio.c            |  11 +++-
 include/linux/bio.h |  22 +++++--
 5 files changed, 152 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index b06cf5c2a829..345d99da8d41 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2365,7 +2365,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 		__bio_clone(bio, bio_src);
 
 		if (bio_integrity(bio_src) &&
-		    bio_integrity_clone(bio, bio_src, gfp_mask))
+		    bio_integrity_clone(bio, bio_src, gfp_mask, bs))
 			goto free_and_out;
 
 		if (bio_ctr && bio_ctr(bio, bio_src, data))
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3c6d4ee8921d..9acd54a5cffb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
 	clone->bi_flags |= 1 << BIO_CLONED;
 
 	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO);
+		bio_integrity_clone(clone, bio, GFP_NOIO, bs);
 		bio_integrity_trim(clone,
 				   bio_sector_offset(bio, idx, offset), len);
 	}
@@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
 	clone->bi_flags &= ~(1 << BIO_SEG_VALID);
 
 	if (bio_integrity(bio)) {
-		bio_integrity_clone(clone, bio, GFP_NOIO);
+		bio_integrity_clone(clone, bio, GFP_NOIO, bs);
 
 		if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
 			bio_integrity_trim(clone,
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 31c46a241bac..49a34e7f7306 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -1,7 +1,7 @@
 /*
  * bio-integrity.c - bio data integrity extensions
  *
- * Copyright (C) 2007, 2008 Oracle Corporation
+ * Copyright (C) 2007, 2008, 2009 Oracle Corporation
  * Written by: Martin K. Petersen <martin.petersen@oracle.com>
  *
  * This program is free software; you can redistribute it and/or
@@ -25,63 +25,121 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 
-static struct kmem_cache *bio_integrity_slab __read_mostly;
-static mempool_t *bio_integrity_pool;
-static struct bio_set *integrity_bio_set;
+struct integrity_slab {
+	struct kmem_cache *slab;
+	unsigned short nr_vecs;
+	char name[8];
+};
+
+#define IS(x) { .nr_vecs = x, .name = "bip-"__stringify(x) }
+struct integrity_slab bip_slab[BIOVEC_NR_POOLS] __read_mostly = {
+	IS(1), IS(4), IS(16), IS(64), IS(128), IS(BIO_MAX_PAGES),
+};
+#undef IS
+
 static struct workqueue_struct *kintegrityd_wq;
 
+static inline unsigned int vecs_to_idx(unsigned int nr)
+{
+	switch (nr) {
+	case 1:
+		return 0;
+	case 2 ... 4:
+		return 1;
+	case 5 ... 16:
+		return 2;
+	case 17 ... 64:
+		return 3;
+	case 65 ... 128:
+		return 4;
+	case 129 ... BIO_MAX_PAGES:
+		return 5;
+	default:
+		BUG();
+	}
+}
+
+static inline int use_bip_pool(unsigned int idx)
+{
+	if (idx == BIOVEC_NR_POOLS)
+		return 1;
+
+	return 0;
+}
+
 /**
- * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
  * @bio:	bio to attach integrity metadata to
  * @gfp_mask:	Memory allocation mask
  * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ * @bs:		bio_set to allocate from
  *
  * Description: This function prepares a bio for attaching integrity
  * metadata.  nr_vecs specifies the maximum number of pages containing
  * integrity metadata that can be attached.
  */
-struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
-						  gfp_t gfp_mask,
-						  unsigned int nr_vecs)
+struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
+							 gfp_t gfp_mask,
+							 unsigned int nr_vecs,
+							 struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip;
-	struct bio_vec *iv;
-	unsigned long idx;
+	unsigned int idx = vecs_to_idx(nr_vecs);
 
 	BUG_ON(bio == NULL);
+	bip = NULL;
 
-	bip = mempool_alloc(bio_integrity_pool, gfp_mask);
-	if (unlikely(bip == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip\n", __func__);
-		return NULL;
-	}
+	/* Lower order allocations come straight from slab */
+	if (!use_bip_pool(idx))
+		bip = kmem_cache_alloc(bip_slab[idx].slab, gfp_mask);
 
-	memset(bip, 0, sizeof(*bip));
+	/* Use mempool if lower order alloc failed or max vecs were requested */
+	if (bip == NULL) {
+		bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
 
-	iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, integrity_bio_set);
-	if (unlikely(iv == NULL)) {
-		printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
-		mempool_free(bip, bio_integrity_pool);
-		return NULL;
+		if (unlikely(bip == NULL)) {
+			printk(KERN_ERR "%s: could not alloc bip\n", __func__);
+			return NULL;
+		}
 	}
 
-	bip->bip_pool = idx;
-	bip->bip_vec = iv;
+	memset(bip, 0, sizeof(*bip));
+
+	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
 
 	return bip;
 }
+EXPORT_SYMBOL(bio_integrity_alloc_bioset);
+
+/**
+ * bio_integrity_alloc - Allocate integrity payload and attach it to bio
+ * @bio:	bio to attach integrity metadata to
+ * @gfp_mask:	Memory allocation mask
+ * @nr_vecs:	Number of integrity metadata scatter-gather elements
+ *
+ * Description: This function prepares a bio for attaching integrity
+ * metadata.  nr_vecs specifies the maximum number of pages containing
+ * integrity metadata that can be attached.
+ */
+struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
+						  gfp_t gfp_mask,
+						  unsigned int nr_vecs)
+{
+	return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
+}
 EXPORT_SYMBOL(bio_integrity_alloc);
 
 /**
  * bio_integrity_free - Free bio integrity payload
  * @bio:	bio containing bip to be freed
+ * @bs:		bio_set this bio was allocated from
  *
  * Description: Used to free the integrity portion of a bio. Usually
  * called from bio_free().
  */
-void bio_integrity_free(struct bio *bio)
+void bio_integrity_free(struct bio *bio, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 
@@ -92,8 +150,10 @@ void bio_integrity_free(struct bio *bio)
 	    && bip->bip_buf != NULL)
 		kfree(bip->bip_buf);
 
-	bvec_free_bs(integrity_bio_set, bip->bip_vec, bip->bip_pool);
-	mempool_free(bip, bio_integrity_pool);
+	if (use_bip_pool(bip->bip_slab))
+		mempool_free(bip, bs->bio_integrity_pool);
+	else
+		kmem_cache_free(bip_slab[bip->bip_slab].slab, bip);
 
 	bio->bi_integrity = NULL;
 }
@@ -114,7 +174,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 	struct bio_integrity_payload *bip = bio->bi_integrity;
 	struct bio_vec *iv;
 
-	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
+	if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
 		printk(KERN_ERR "%s: bip_vec full\n", __func__);
 		return 0;
 	}
@@ -647,8 +707,8 @@ void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
 	bp->iv1 = bip->bip_vec[0];
 	bp->iv2 = bip->bip_vec[0];
 
-	bp->bip1.bip_vec = &bp->iv1;
-	bp->bip2.bip_vec = &bp->iv2;
+	bp->bip1.bip_vec[0] = bp->iv1;
+	bp->bip2.bip_vec[0] = bp->iv2;
 
 	bp->iv1.bv_len = sectors * bi->tuple_size;
 	bp->iv2.bv_offset += sectors * bi->tuple_size;
@@ -667,17 +727,19 @@ EXPORT_SYMBOL(bio_integrity_split);
  * @bio:	New bio
  * @bio_src:	Original bio
  * @gfp_mask:	Memory allocation mask
+ * @bs:		bio_set to allocate bip from
  *
  * Description:	Called to allocate a bip when cloning a bio
  */
-int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
+int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
+			gfp_t gfp_mask, struct bio_set *bs)
 {
 	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
 
-	bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+	bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs);
 
 	if (bip == NULL)
 		return -EIO;
@@ -693,25 +755,43 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(bio_integrity_clone);
 
-static int __init bio_integrity_init(void)
+int bioset_integrity_create(struct bio_set *bs, int pool_size)
 {
-	kintegrityd_wq = create_workqueue("kintegrityd");
+	unsigned int max_slab = vecs_to_idx(BIO_MAX_PAGES);
+
+	bs->bio_integrity_pool =
+		mempool_create_slab_pool(pool_size, bip_slab[max_slab].slab);
 
+	if (!bs->bio_integrity_pool)
+		return -1;
+
+	return 0;
+}
+EXPORT_SYMBOL(bioset_integrity_create);
+
+void bioset_integrity_free(struct bio_set *bs)
+{
+	if (bs->bio_integrity_pool)
+		mempool_destroy(bs->bio_integrity_pool);
+}
+EXPORT_SYMBOL(bioset_integrity_free);
+
+void __init bio_integrity_init(void)
+{
+	unsigned int i;
+
+	kintegrityd_wq = create_workqueue("kintegrityd");
 	if (!kintegrityd_wq)
 		panic("Failed to create kintegrityd\n");
 
-	bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
-					SLAB_HWCACHE_ALIGN|SLAB_PANIC);
+	for (i = 0 ; i < BIOVEC_NR_POOLS ; i++) {
+		unsigned int size;
 
-	bio_integrity_pool = mempool_create_slab_pool(BIO_POOL_SIZE,
-						      bio_integrity_slab);
-	if (!bio_integrity_pool)
-		panic("bio_integrity: can't allocate bip pool\n");
+		size = sizeof(struct bio_integrity_payload)
+			+ bip_slab[i].nr_vecs * sizeof(struct bio_vec);
 
-	integrity_bio_set = bioset_create(BIO_POOL_SIZE, 0);
-	if (!integrity_bio_set)
-		panic("bio_integrity: can't allocate bio_set\n");
-
-	return 0;
+		bip_slab[i].slab =
+			kmem_cache_create(bip_slab[i].name, size, 0,
+					  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
+	}
 }
-subsys_initcall(bio_integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 24c914043532..1486b19fc431 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -238,7 +238,7 @@ void bio_free(struct bio *bio, struct bio_set *bs)
 		bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
 
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, bs);
 
 	/*
 	 * If we have front padding, adjust the bio pointer before freeing
@@ -341,7 +341,7 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 static void bio_kmalloc_destructor(struct bio *bio)
 {
 	if (bio_integrity(bio))
-		bio_integrity_free(bio);
+		bio_integrity_free(bio, fs_bio_set);
 	kfree(bio);
 }
 
@@ -472,7 +472,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
 	if (bio_integrity(bio)) {
 		int ret;
 
-		ret = bio_integrity_clone(b, bio, gfp_mask);
+		ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set);
 
 		if (ret < 0) {
 			bio_put(b);
@@ -1539,6 +1539,7 @@ void bioset_free(struct bio_set *bs)
 	if (bs->bio_pool)
 		mempool_destroy(bs->bio_pool);
 
+	bioset_integrity_free(bs);
 	biovec_free_pools(bs);
 	bio_put_slab(bs);
 
@@ -1579,6 +1580,9 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
+	if (bioset_integrity_create(bs, pool_size))
+		goto bad;
+
 	if (!biovec_create_pools(bs, pool_size))
 		return bs;
 
@@ -1616,6 +1620,7 @@ static int __init init_bio(void)
 	if (!bio_slabs)
 		panic("bio: can't allocate bios\n");
 
+	bio_integrity_init();
 	biovec_init_slabs();
 
 	fs_bio_set = bioset_create(BIO_POOL_SIZE, 0);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2a04eb54c0dd..2892b710771c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -319,7 +319,6 @@ static inline int bio_has_allocated_vec(struct bio *bio)
  */
 struct bio_integrity_payload {
 	struct bio		*bip_bio;	/* parent bio */
-	struct bio_vec		*bip_vec;	/* integrity data vector */
 
 	sector_t		bip_sector;	/* virtual start sector */
 
@@ -328,11 +327,12 @@ struct bio_integrity_payload {
 
 	unsigned int		bip_size;
 
-	unsigned short		bip_pool;	/* pool the ivec came from */
+	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_idx;	/* current bip_vec index */
 
 	struct work_struct	bip_work;	/* I/O completion */
+	struct bio_vec		bip_vec[0];	/* embedded bvec array */
 };
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
@@ -430,6 +430,9 @@ struct bio_set {
 	unsigned int front_pad;
 
 	mempool_t *bio_pool;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+	mempool_t *bio_integrity_pool;
+#endif
 	mempool_t *bvec_pool;
 };
 
@@ -634,8 +637,9 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
 
 #define bio_integrity(bio) (bio->bi_integrity != NULL)
 
+extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
-extern void bio_integrity_free(struct bio *);
+extern void bio_integrity_free(struct bio *, struct bio_set *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern int bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
@@ -645,21 +649,27 @@ extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
 extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
 extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
-extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
+extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *);
+extern int bioset_integrity_create(struct bio_set *, int);
+extern void bioset_integrity_free(struct bio_set *);
+extern void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
 #define bio_integrity(a)		(0)
+#define bioset_integrity_create(a, b)	(0)
 #define bio_integrity_prep(a)		(0)
 #define bio_integrity_enabled(a)	(0)
-#define bio_integrity_clone(a, b, c)	(0)
-#define bio_integrity_free(a)		do { } while (0)
+#define bio_integrity_clone(a, b, c, d)	(0)
+#define bioset_integrity_free(a)	do { } while (0)
+#define bio_integrity_free(a, b)	do { } while (0)
 #define bio_integrity_endio(a, b)	do { } while (0)
 #define bio_integrity_advance(a, b)	do { } while (0)
 #define bio_integrity_trim(a, b, c)	do { } while (0)
 #define bio_integrity_split(a, b, c)	do { } while (0)
 #define bio_integrity_set_tag(a, b, c)	do { } while (0)
 #define bio_integrity_get_tag(a, b, c)	do { } while (0)
+#define bio_integrity_init(a)		do { } while (0)
 
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
-- 
cgit v1.2.3


From 018e0446890661504783f92388ecce7138c1566d Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 26 Jun 2009 16:27:10 +0200
Subject: block: get rid of queue-private command filter

The initial patches to support this through sysfs export were broken
and have been if 0'ed out in any release. So lets just kill the code
and reclaim some space in struct request_queue, if anyone would later
like to fixup the sysfs bits, the git history can easily restore
the removed bits.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/Makefile         |   2 +-
 block/blk-core.c       |   2 -
 block/bsg.c            |   2 +-
 block/cmd-filter.c     | 233 -------------------------------------------------
 block/scsi_ioctl.c     |  43 +++++++--
 drivers/scsi/sg.c      |   4 +-
 include/linux/blkdev.h |  15 +---
 7 files changed, 42 insertions(+), 259 deletions(-)
 delete mode 100644 block/cmd-filter.c

(limited to 'include')

diff --git a/block/Makefile b/block/Makefile
index e9fa4dd690f2..6c54ed0ff755 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
+			ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 345d99da8d41..02b87134a167 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -595,8 +595,6 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
 
 	q->sg_reserved_size = INT_MAX;
 
-	blk_set_cmd_filter_defaults(&q->cmd_filter);
-
 	/*
 	 * all done
 	 */
diff --git a/block/bsg.c b/block/bsg.c
index e7d475254248..5f184bb3ff9e 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -186,7 +186,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
 		return -EFAULT;
 
 	if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
-		if (blk_verify_command(&q->cmd_filter, rq->cmd, has_write_perm))
+		if (blk_verify_command(rq->cmd, has_write_perm))
 			return -EPERM;
 	} else if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
deleted file mode 100644
index 572bbc2f900d..000000000000
--- a/block/cmd-filter.c
+++ /dev/null
@@ -1,233 +0,0 @@
-/*
- * Copyright 2004 Peter M. Jones <pjones@redhat.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- *
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public Licens
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
- *
- */
-
-#include <linux/list.h>
-#include <linux/genhd.h>
-#include <linux/spinlock.h>
-#include <linux/capability.h>
-#include <linux/bitops.h>
-#include <linux/blkdev.h>
-
-#include <scsi/scsi.h>
-#include <linux/cdrom.h>
-
-int blk_verify_command(struct blk_cmd_filter *filter,
-		       unsigned char *cmd, fmode_t has_write_perm)
-{
-	/* root can do any command. */
-	if (capable(CAP_SYS_RAWIO))
-		return 0;
-
-	/* if there's no filter set, assume we're filtering everything out */
-	if (!filter)
-		return -EPERM;
-
-	/* Anybody who can open the device can do a read-safe command */
-	if (test_bit(cmd[0], filter->read_ok))
-		return 0;
-
-	/* Write-safe commands require a writable open */
-	if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
-		return 0;
-
-	return -EPERM;
-}
-EXPORT_SYMBOL(blk_verify_command);
-
-#if 0
-/* and now, the sysfs stuff */
-static ssize_t rcf_cmds_show(struct blk_cmd_filter *filter, char *page,
-			     int rw)
-{
-	char *npage = page;
-	unsigned long *okbits;
-	int i;
-
-	if (rw == READ)
-		okbits = filter->read_ok;
-	else
-		okbits = filter->write_ok;
-
-	for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
-		if (test_bit(i, okbits)) {
-			npage += sprintf(npage, "0x%02x", i);
-			if (i < BLK_SCSI_MAX_CMDS - 1)
-				sprintf(npage++, " ");
-		}
-	}
-
-	if (npage != page)
-		npage += sprintf(npage, "\n");
-
-	return npage - page;
-}
-
-static ssize_t rcf_readcmds_show(struct blk_cmd_filter *filter, char *page)
-{
-	return rcf_cmds_show(filter, page, READ);
-}
-
-static ssize_t rcf_writecmds_show(struct blk_cmd_filter *filter,
-				 char *page)
-{
-	return rcf_cmds_show(filter, page, WRITE);
-}
-
-static ssize_t rcf_cmds_store(struct blk_cmd_filter *filter,
-			      const char *page, size_t count, int rw)
-{
-	unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
-	int cmd, set;
-	char *p, *status;
-
-	if (rw == READ) {
-		memcpy(&okbits, filter->read_ok, sizeof(okbits));
-		target_okbits = filter->read_ok;
-	} else {
-		memcpy(&okbits, filter->write_ok, sizeof(okbits));
-		target_okbits = filter->write_ok;
-	}
-
-	while ((p = strsep((char **)&page, " ")) != NULL) {
-		set = 1;
-
-		if (p[0] == '+') {
-			p++;
-		} else if (p[0] == '-') {
-			set = 0;
-			p++;
-		}
-
-		cmd = simple_strtol(p, &status, 16);
-
-		/* either of these cases means invalid input, so do nothing. */
-		if ((status == p) || cmd >= BLK_SCSI_MAX_CMDS)
-			return -EINVAL;
-
-		if (set)
-			__set_bit(cmd, okbits);
-		else
-			__clear_bit(cmd, okbits);
-	}
-
-	memcpy(target_okbits, okbits, sizeof(okbits));
-	return count;
-}
-
-static ssize_t rcf_readcmds_store(struct blk_cmd_filter *filter,
-				  const char *page, size_t count)
-{
-	return rcf_cmds_store(filter, page, count, READ);
-}
-
-static ssize_t rcf_writecmds_store(struct blk_cmd_filter *filter,
-				   const char *page, size_t count)
-{
-	return rcf_cmds_store(filter, page, count, WRITE);
-}
-
-struct rcf_sysfs_entry {
-	struct attribute attr;
-	ssize_t (*show)(struct blk_cmd_filter *, char *);
-	ssize_t (*store)(struct blk_cmd_filter *, const char *, size_t);
-};
-
-static struct rcf_sysfs_entry rcf_readcmds_entry = {
-	.attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
-	.show = rcf_readcmds_show,
-	.store = rcf_readcmds_store,
-};
-
-static struct rcf_sysfs_entry rcf_writecmds_entry = {
-	.attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
-	.show = rcf_writecmds_show,
-	.store = rcf_writecmds_store,
-};
-
-static struct attribute *default_attrs[] = {
-	&rcf_readcmds_entry.attr,
-	&rcf_writecmds_entry.attr,
-	NULL,
-};
-
-#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
-
-static ssize_t
-rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
-	struct rcf_sysfs_entry *entry = to_rcf(attr);
-	struct blk_cmd_filter *filter;
-
-	filter = container_of(kobj, struct blk_cmd_filter, kobj);
-	if (entry->show)
-		return entry->show(filter, page);
-
-	return 0;
-}
-
-static ssize_t
-rcf_attr_store(struct kobject *kobj, struct attribute *attr,
-			const char *page, size_t length)
-{
-	struct rcf_sysfs_entry *entry = to_rcf(attr);
-	struct blk_cmd_filter *filter;
-
-	if (!capable(CAP_SYS_RAWIO))
-		return -EPERM;
-
-	if (!entry->store)
-		return -EINVAL;
-
-	filter = container_of(kobj, struct blk_cmd_filter, kobj);
-	return entry->store(filter, page, length);
-}
-
-static struct sysfs_ops rcf_sysfs_ops = {
-	.show = rcf_attr_show,
-	.store = rcf_attr_store,
-};
-
-static struct kobj_type rcf_ktype = {
-	.sysfs_ops = &rcf_sysfs_ops,
-	.default_attrs = default_attrs,
-};
-
-int blk_register_filter(struct gendisk *disk)
-{
-	int ret;
-	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
-	ret = kobject_init_and_add(&filter->kobj, &rcf_ktype,
-				   &disk_to_dev(disk)->kobj,
-				   "%s", "cmd_filter");
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-EXPORT_SYMBOL(blk_register_filter);
-
-void blk_unregister_filter(struct gendisk *disk)
-{
-	struct blk_cmd_filter *filter = &disk->queue->cmd_filter;
-
-	kobject_put(&filter->kobj);
-}
-EXPORT_SYMBOL(blk_unregister_filter);
-#endif
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 5f8e798ede4e..f0e0ce0a607d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -32,6 +32,11 @@
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsi_cmnd.h>
 
+struct blk_cmd_filter {
+	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
+	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
+} blk_default_cmd_filter;
+
 /* Command group 3 is reserved and should never be used.  */
 const unsigned char scsi_command_size_tbl[8] =
 {
@@ -105,7 +110,7 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
 	return put_user(1, p);
 }
 
-void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
+static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
 {
 	/* Basic read-only commands */
 	__set_bit(TEST_UNIT_READY, filter->read_ok);
@@ -187,14 +192,37 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
 	__set_bit(GPCMD_SET_STREAMING, filter->write_ok);
 	__set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok);
 }
-EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults);
+
+int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
+{
+	struct blk_cmd_filter *filter = &blk_default_cmd_filter;
+
+	/* root can do any command. */
+	if (capable(CAP_SYS_RAWIO))
+		return 0;
+
+	/* if there's no filter set, assume we're filtering everything out */
+	if (!filter)
+		return -EPERM;
+
+	/* Anybody who can open the device can do a read-safe command */
+	if (test_bit(cmd[0], filter->read_ok))
+		return 0;
+
+	/* Write-safe commands require a writable open */
+	if (test_bit(cmd[0], filter->write_ok) && has_write_perm)
+		return 0;
+
+	return -EPERM;
+}
+EXPORT_SYMBOL(blk_verify_command);
 
 static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 			     struct sg_io_hdr *hdr, fmode_t mode)
 {
 	if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
 		return -EFAULT;
-	if (blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE))
+	if (blk_verify_command(rq->cmd, mode & FMODE_WRITE))
 		return -EPERM;
 
 	/*
@@ -427,7 +455,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
 	if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
 		goto error;
 
-	err = blk_verify_command(&q->cmd_filter, rq->cmd, mode & FMODE_WRITE);
+	err = blk_verify_command(rq->cmd, mode & FMODE_WRITE);
 	if (err)
 		goto error;
 
@@ -645,5 +673,10 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod
 	blk_put_queue(q);
 	return err;
 }
-
 EXPORT_SYMBOL(scsi_cmd_ioctl);
+
+int __init blk_scsi_ioctl_init(void)
+{
+	blk_set_cmd_filter_defaults(&blk_default_cmd_filter);
+	return 0;
+}
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 8201387b4daa..ef142fd47a83 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -210,13 +210,11 @@ static void sg_put_dev(Sg_device *sdp);
 static int sg_allow_access(struct file *filp, unsigned char *cmd)
 {
 	struct sg_fd *sfp = (struct sg_fd *)filp->private_data;
-	struct request_queue *q = sfp->parentdp->device->request_queue;
 
 	if (sfp->parentdp->device->type == TYPE_SCANNER)
 		return 0;
 
-	return blk_verify_command(&q->cmd_filter,
-				  cmd, filp->f_mode & FMODE_WRITE);
+	return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE);
 }
 
 static int
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8963d9149b5f..49ae07951d55 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -301,12 +301,6 @@ struct blk_queue_tag {
 #define BLK_SCSI_MAX_CMDS	(256)
 #define BLK_SCSI_CMD_PER_LONG	(BLK_SCSI_MAX_CMDS / (sizeof(long) * 8))
 
-struct blk_cmd_filter {
-	unsigned long read_ok[BLK_SCSI_CMD_PER_LONG];
-	unsigned long write_ok[BLK_SCSI_CMD_PER_LONG];
-	struct kobject kobj;
-};
-
 struct queue_limits {
 	unsigned long		bounce_pfn;
 	unsigned long		seg_boundary_mask;
@@ -445,7 +439,6 @@ struct request_queue
 #if defined(CONFIG_BLK_DEV_BSG)
 	struct bsg_class_device bsg_dev;
 #endif
-	struct blk_cmd_filter cmd_filter;
 };
 
 #define QUEUE_FLAG_CLUSTER	0	/* cluster several segments into 1 */
@@ -998,13 +991,7 @@ static inline int sb_issue_discard(struct super_block *sb,
 	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
 }
 
-/*
-* command filter functions
-*/
-extern int blk_verify_command(struct blk_cmd_filter *filter,
-			      unsigned char *cmd, fmode_t has_write_perm);
-extern void blk_unregister_filter(struct gendisk *disk);
-extern void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter);
+extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 
 #define MAX_PHYS_SEGMENTS 128
 #define MAX_HW_SEGMENTS 128
-- 
cgit v1.2.3


From ee8076ed3e1cdd0cd1e61318386932669c90b92f Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Thu, 2 Jul 2009 09:45:18 -0400
Subject: power_supply: Add a charge_type property, and use it for olpc driver

This adds a new sysfs file called 'charge_type' which displays the
type of charging (unknown, n/a, trickle charge, or fast charging).

This allows things like battery diagnostics to determine what the
battery/EC is doing without resorting to changing the 'status' sysfs
output.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 Documentation/power/power_supply_class.txt | 5 +++++
 drivers/power/olpc_battery.c               | 9 +++++++++
 drivers/power/power_supply_sysfs.c         | 6 ++++++
 include/linux/power_supply.h               | 8 ++++++++
 4 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 709d95571d7b..9f16c5178b66 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -76,6 +76,11 @@ STATUS - this attribute represents operating status (charging, full,
 discharging (i.e. powering a load), etc.). This corresponds to
 BATTERY_STATUS_* values, as defined in battery.h.
 
+CHARGE_TYPE - batteries can typically charge at different rates.
+This defines trickle and fast charges.  For batteries that
+are already charged or discharging, 'n/a' can be displayed (or
+'unknown', if the status is not known).
+
 HEALTH - represents health of the battery, values corresponds to
 POWER_SUPPLY_HEALTH_*, defined in battery.h.
 
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 602bbd008f78..8fefe5a73558 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -233,6 +233,14 @@ static int olpc_bat_get_property(struct power_supply *psy,
 		if (ret)
 			return ret;
 		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		if (ec_byte & BAT_STAT_TRICKLE)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		else if (ec_byte & BAT_STAT_CHARGING)
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		else
+			val->intval = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
 	case POWER_SUPPLY_PROP_PRESENT:
 		val->intval = !!(ec_byte & (BAT_STAT_PRESENT |
 					    BAT_STAT_TRICKLE));
@@ -325,6 +333,7 @@ static int olpc_bat_get_property(struct power_supply *psy,
 
 static enum power_supply_property olpc_bat_props[] = {
 	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_TECHNOLOGY,
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 9deabbde6fd6..08144393d64b 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -43,6 +43,9 @@ static ssize_t power_supply_show_property(struct device *dev,
 	static char *status_text[] = {
 		"Unknown", "Charging", "Discharging", "Not charging", "Full"
 	};
+	static char *charge_type[] = {
+		"Unknown", "N/A", "Trickle", "Fast"
+	};
 	static char *health_text[] = {
 		"Unknown", "Good", "Overheat", "Dead", "Over voltage",
 		"Unspecified failure", "Cold",
@@ -70,6 +73,8 @@ static ssize_t power_supply_show_property(struct device *dev,
 
 	if (off == POWER_SUPPLY_PROP_STATUS)
 		return sprintf(buf, "%s\n", status_text[value.intval]);
+	else if (off == POWER_SUPPLY_PROP_CHARGE_TYPE)
+		return sprintf(buf, "%s\n", charge_type[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_HEALTH)
 		return sprintf(buf, "%s\n", health_text[value.intval]);
 	else if (off == POWER_SUPPLY_PROP_TECHNOLOGY)
@@ -86,6 +91,7 @@ static ssize_t power_supply_show_property(struct device *dev,
 static struct device_attribute power_supply_attrs[] = {
 	/* Properties of type `int' */
 	POWER_SUPPLY_ATTR(status),
+	POWER_SUPPLY_ATTR(charge_type),
 	POWER_SUPPLY_ATTR(health),
 	POWER_SUPPLY_ATTR(present),
 	POWER_SUPPLY_ATTR(online),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 0ab6aa171241..4c7c6fc35487 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -38,6 +38,13 @@ enum {
 	POWER_SUPPLY_STATUS_FULL,
 };
 
+enum {
+	POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0,
+	POWER_SUPPLY_CHARGE_TYPE_NONE,
+	POWER_SUPPLY_CHARGE_TYPE_TRICKLE,
+	POWER_SUPPLY_CHARGE_TYPE_FAST,
+};
+
 enum {
 	POWER_SUPPLY_HEALTH_UNKNOWN = 0,
 	POWER_SUPPLY_HEALTH_GOOD,
@@ -70,6 +77,7 @@ enum {
 enum power_supply_property {
 	/* Properties of type `int' */
 	POWER_SUPPLY_PROP_STATUS = 0,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_HEALTH,
 	POWER_SUPPLY_PROP_PRESENT,
 	POWER_SUPPLY_PROP_ONLINE,
-- 
cgit v1.2.3


From 887b5ea3683ce04966e35fa2e5fe215bedcde73c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Wed, 1 Jul 2009 13:38:26 +0000
Subject: if_ether: add define for 1588 aka Timesync

This patch adds ETH_P_1588 protocol ID define.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index ae3a1871413d..70fdba2bbf71 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -78,6 +78,7 @@
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
+#define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
 #define ETH_P_FCOE	0x8906		/* Fibre Channel over Ethernet  */
 #define ETH_P_FIP	0x8914		/* FCoE Initialization Protocol */
 #define ETH_P_EDSA	0xDADA		/* Ethertype DSA [ NOT AN OFFICIALLY REGISTERED ID ] */
-- 
cgit v1.2.3


From 03b042bf1dc14a268a3d65d38b4ec2a4261e8477 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 25 Jun 2009 09:08:16 -0700
Subject: rcu: Add synchronize_sched_expedited() primitive

This adds the synchronize_sched_expedited() primitive that
implements the "big hammer" expedited RCU grace periods.

This primitive is placed in kernel/sched.c rather than
kernel/rcupdate.c due to its need to interact closely with the
migration_thread() kthread.

The idea is to wake up this kthread with req->task set to NULL,
in response to which the kthread reports the quiescent state
resulting from the kthread having been scheduled.

Because this patch needs to fallback to the slow versions of
the primitives in response to some races with CPU onlining and
offlining, a new synchronize_rcu_bh() primitive is added as
well.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: akpm@linux-foundation.org
Cc: torvalds@linux-foundation.org
Cc: davem@davemloft.net
Cc: dada1@cosmosbay.com
Cc: zbr@ioremap.net
Cc: jeff.chua.linux@gmail.com
Cc: paulus@samba.org
Cc: laijs@cn.fujitsu.com
Cc: jengelh@medozas.de
Cc: r000n@r000n.net
Cc: benh@kernel.crashing.org
Cc: mathieu.desnoyers@polymtl.ca
LKML-Reference: <12459460982947-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h   |  25 ++++-----
 include/linux/rcupreempt.h |  10 ++++
 include/linux/rcutree.h    |  12 ++++-
 kernel/rcupdate.c          |  25 +++++++++
 kernel/sched.c             | 129 ++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 186 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0cdfdb622faa..3c89d6a2591f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,7 +51,19 @@ struct rcu_head {
 	void (*func)(struct rcu_head *head);
 };
 
-/* Internal to kernel, but needed by rcupreempt.h. */
+/* Exported common interfaces */
+extern void synchronize_rcu(void);
+extern void synchronize_rcu_bh(void);
+extern void rcu_barrier(void);
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);
+extern void synchronize_sched_expedited(void);
+extern int sched_expedited_torture_stats(char *page);
+
+/* Internal to kernel */
+extern void rcu_init(void);
+extern void rcu_scheduler_starting(void);
+extern int rcu_needs_cpu(int cpu);
 extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU)
@@ -257,15 +269,4 @@ extern void call_rcu(struct rcu_head *head,
 extern void call_rcu_bh(struct rcu_head *head,
 			void (*func)(struct rcu_head *head));
 
-/* Exported common interfaces */
-extern void synchronize_rcu(void);
-extern void rcu_barrier(void);
-extern void rcu_barrier_bh(void);
-extern void rcu_barrier_sched(void);
-
-/* Internal to kernel */
-extern void rcu_init(void);
-extern void rcu_scheduler_starting(void);
-extern int rcu_needs_cpu(int cpu);
-
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index fce522782ffa..f164ac9b7807 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -74,6 +74,16 @@ extern int rcu_needs_cpu(int cpu);
 
 extern void __synchronize_sched(void);
 
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_rcu();  /* Placeholder for new rcupreempt implementation. */
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
+{
+	synchronize_rcu_bh();  /* Placeholder for new rcupreempt impl. */
+}
+
 extern void __rcu_init(void);
 extern void rcu_init_sched(void);
 extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5a5153806c42..d4dfd2489633 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -286,8 +286,14 @@ static inline void __rcu_read_unlock_bh(void)
 
 #define call_rcu_sched(head, func) call_rcu(head, func)
 
-static inline void rcu_init_sched(void)
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_sched_expedited();
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
 {
+	synchronize_sched_expedited();
 }
 
 extern void __rcu_init(void);
@@ -297,6 +303,10 @@ extern void rcu_restart_cpu(int cpu);
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 
+static inline void rcu_init_sched(void)
+{
+}
+
 #ifdef CONFIG_NO_HZ
 void rcu_enter_nohz(void);
 void rcu_exit_nohz(void);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a967c9feb90a..eae29c25fb14 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -98,6 +98,30 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
+/**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu_bh grace
+ * period has elapsed, in other words after all currently executing rcu_bh
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ * and may be nested.
+ */
+void synchronize_rcu_bh(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_bh(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
 static void rcu_barrier_callback(struct rcu_head *notused)
 {
 	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type)
 static inline void wait_migrated_callbacks(void)
 {
 	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
+	smp_mb(); /* In case we didn't sleep. */
 }
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..9ae80bec1c1e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7024,6 +7024,11 @@ fail:
 	return ret;
 }
 
+#define RCU_MIGRATION_IDLE	0
+#define RCU_MIGRATION_NEED_QS	1
+#define RCU_MIGRATION_GOT_QS	2
+#define RCU_MIGRATION_MUST_SYNC	3
+
 /*
  * migration_thread - this is a highprio system thread that performs
  * thread migration by bumping thread off CPU then 'pushing' onto
@@ -7031,6 +7036,7 @@ fail:
  */
 static int migration_thread(void *data)
 {
+	int badcpu;
 	int cpu = (long)data;
 	struct rq *rq;
 
@@ -7065,8 +7071,17 @@ static int migration_thread(void *data)
 		req = list_entry(head->next, struct migration_req, list);
 		list_del_init(head->next);
 
-		spin_unlock(&rq->lock);
-		__migrate_task(req->task, cpu, req->dest_cpu);
+		if (req->task != NULL) {
+			spin_unlock(&rq->lock);
+			__migrate_task(req->task, cpu, req->dest_cpu);
+		} else if (likely(cpu == (badcpu = smp_processor_id()))) {
+			req->dest_cpu = RCU_MIGRATION_GOT_QS;
+			spin_unlock(&rq->lock);
+		} else {
+			req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
+			spin_unlock(&rq->lock);
+			WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
+		}
 		local_irq_enable();
 
 		complete(&req->done);
@@ -10554,3 +10569,113 @@ struct cgroup_subsys cpuacct_subsys = {
 	.subsys_id = cpuacct_subsys_id,
 };
 #endif	/* CONFIG_CGROUP_CPUACCT */
+
+#ifndef CONFIG_SMP
+
+int rcu_expedited_torture_stats(char *page)
+{
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
+
+void synchronize_sched_expedited(void)
+{
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#else /* #ifndef CONFIG_SMP */
+
+static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
+static DEFINE_MUTEX(rcu_sched_expedited_mutex);
+
+#define RCU_EXPEDITED_STATE_POST -2
+#define RCU_EXPEDITED_STATE_IDLE -1
+
+static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+
+int rcu_expedited_torture_stats(char *page)
+{
+	int cnt = 0;
+	int cpu;
+
+	cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
+	for_each_online_cpu(cpu) {
+		 cnt += sprintf(&page[cnt], " %d:%d",
+				cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
+	}
+	cnt += sprintf(&page[cnt], "\n");
+	return cnt;
+}
+EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
+
+static long synchronize_sched_expedited_count;
+
+/*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly.  This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier.  Failing to
+ * observe this restriction will result in deadlock.
+ */
+void synchronize_sched_expedited(void)
+{
+	int cpu;
+	unsigned long flags;
+	bool need_full_sync = 0;
+	struct rq *rq;
+	struct migration_req *req;
+	long snap;
+	int trycount = 0;
+
+	smp_mb();  /* ensure prior mod happens before capturing snap. */
+	snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
+	get_online_cpus();
+	while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
+		put_online_cpus();
+		if (trycount++ < 10)
+			udelay(trycount * num_online_cpus());
+		else {
+			synchronize_sched();
+			return;
+		}
+		if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
+			smp_mb(); /* ensure test happens before caller kfree */
+			return;
+		}
+		get_online_cpus();
+	}
+	rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
+	for_each_online_cpu(cpu) {
+		rq = cpu_rq(cpu);
+		req = &per_cpu(rcu_migration_req, cpu);
+		init_completion(&req->done);
+		req->task = NULL;
+		req->dest_cpu = RCU_MIGRATION_NEED_QS;
+		spin_lock_irqsave(&rq->lock, flags);
+		list_add(&req->list, &rq->migration_queue);
+		spin_unlock_irqrestore(&rq->lock, flags);
+		wake_up_process(rq->migration_thread);
+	}
+	for_each_online_cpu(cpu) {
+		rcu_expedited_state = cpu;
+		req = &per_cpu(rcu_migration_req, cpu);
+		rq = cpu_rq(cpu);
+		wait_for_completion(&req->done);
+		spin_lock_irqsave(&rq->lock, flags);
+		if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
+			need_full_sync = 1;
+		req->dest_cpu = RCU_MIGRATION_IDLE;
+		spin_unlock_irqrestore(&rq->lock, flags);
+	}
+	rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
+	mutex_unlock(&rcu_sched_expedited_mutex);
+	put_online_cpus();
+	if (need_full_sync)
+		synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
+#endif /* #else #ifndef CONFIG_SMP */
-- 
cgit v1.2.3


From 788e5abc5441e9046dd91c995c6f1f75bbd144bf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:58 +0900
Subject: percpu: drop @unit_size from embed first chunk allocator

The only extra feature @unit_size provides is making dead space at the
end of the first chunk which doesn't have any valid usecase.  Drop the
parameter.  This will increase consistency with generalized 4k
allocator.

James Bottomley spotted missing conversion for the default
setup_per_cpu_areas() which caused build breakage on all arcsh which
use it.

[ Impact: drop unused code path ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c |  2 +-
 include/linux/percpu.h         |  2 +-
 mm/percpu.c                    | 18 ++++++------------
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 29a3eef7cf4a..14728206fb52 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -342,7 +342,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 		return -EINVAL;
 
 	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
 }
 
 /*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e5000343dd61..83bff053bd1c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -69,7 +69,7 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, ssize_t unit_size);
+				ssize_t dyn_size);
 
 /*
  * Use this to get to a cpu's version of the per-cpu object
diff --git a/mm/percpu.c b/mm/percpu.c
index 19dd83b5cbdc..fc6babe6e554 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1207,7 +1207,6 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
@@ -1219,9 +1218,9 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * page size.
  *
  * When @dyn_size is positive, dynamic area might be larger than
- * specified to fill page alignment.  Also, when @dyn_size is auto,
- * @dyn_size does not fill the whole first chunk but only what's
- * necessary for page alignment after static and reserved areas.
+ * specified to fill page alignment.  When @dyn_size is auto,
+ * @dyn_size is just big enough to fill page alignment after static
+ * and reserved areas.
  *
  * If the needed size is smaller than the minimum or specified unit
  * size, the leftover is returned to the bootmem allocator.
@@ -1231,7 +1230,7 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
  * percpu access on success, -errno on failure.
  */
 ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size, ssize_t unit_size)
+				      ssize_t dyn_size)
 {
 	size_t chunk_size;
 	unsigned int cpu;
@@ -1242,12 +1241,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	if (dyn_size != 0)
 		dyn_size = pcpue_size - static_size - reserved_size;
 
-	if (unit_size >= 0) {
-		BUG_ON(unit_size < pcpue_size);
-		pcpue_unit_size = unit_size;
-	} else
-		pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
-
+	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
 	chunk_size = pcpue_unit_size * num_possible_cpus();
 
 	pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
@@ -1304,7 +1298,7 @@ void __init setup_per_cpu_areas(void)
 	 * what the legacy allocator did.
 	 */
 	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
-					   PERCPU_DYNAMIC_RESERVE, -1);
+					   PERCPU_DYNAMIC_RESERVE);
 	if (unit_size < 0)
 		panic("Failed to initialized percpu areas.");
 
-- 
cgit v1.2.3


From d4b95f80399471e4bce5e992700ff7f06ef91f6a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: x86,percpu: generalize 4k first chunk allocator

Generalize and move x86 setup_pcpu_4k() into pcpu_4k_first_chunk().
setup_pcpu_4k() now is a simple wrapper around the generalized
version.  Other than taking size parameters and using arch supplied
callbacks to allocate/free memory, pcpu_4k_first_chunk() is identical
to the original implementation.

This simplifies arch code and will help converting more archs to
dynamic percpu allocator.

While at it, s/pcpu_populate_pte_fn_t/pcpu_fc_populate_pte_fn_t/ for
consistency.

[ Impact: code reorganization and generalization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup_percpu.c | 78 ++++++++++----------------------------
 include/linux/percpu.h         | 12 +++++-
 mm/percpu.c                    | 85 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 113 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 14728206fb52..ab896b31e80b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -123,6 +123,19 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
+/*
+ * Helpers for first chunk memory allocation
+ */
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size)
+{
+	return pcpu_alloc_bootmem(cpu, size, size);
+}
+
+static void __init pcpu_fc_free(void *ptr, size_t size)
+{
+	free_bootmem(__pa(ptr), size);
+}
+
 /*
  * Large page remap allocator
  *
@@ -346,22 +359,11 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 }
 
 /*
- * 4k page allocator
+ * 4k allocator
  *
- * This is the basic allocator.  Static percpu area is allocated
- * page-by-page and most of initialization is done by the generic
- * setup function.
+ * Boring fallback 4k allocator.  This allocator puts more pressure on
+ * PTE TLBs but other than that behaves nicely on both UMA and NUMA.
  */
-static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_nr_static_pages __initdata;
-
-static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
-{
-	if (pageno < pcpu4k_nr_static_pages)
-		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
-	return NULL;
-}
-
 static void __init pcpu4k_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
@@ -369,51 +371,9 @@ static void __init pcpu4k_populate_pte(unsigned long addr)
 
 static ssize_t __init setup_pcpu_4k(size_t static_size)
 {
-	size_t pages_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
-
-	pcpu4k_nr_static_pages = PFN_UP(static_size);
-
-	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
-			       * sizeof(pcpu4k_pages[0]));
-	pcpu4k_pages = alloc_bootmem(pages_size);
-
-	/* allocate and copy */
-	j = 0;
-	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
-			void *ptr;
-
-			ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
-			if (!ptr) {
-				pr_warning("PERCPU: failed to allocate "
-					   "4k page for cpu%u\n", cpu);
-				goto enomem;
-			}
-
-			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
-			pcpu4k_pages[j++] = virt_to_page(ptr);
-		}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
-		pcpu4k_nr_static_pages, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, -1,
-				     -1, NULL, pcpu4k_populate_pte);
-	goto out_free_ar;
-
-enomem:
-	while (--j >= 0)
-		free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
-	ret = -ENOMEM;
-out_free_ar:
-	free_bootmem(__pa(pcpu4k_pages), pages_size);
-	return ret;
+	return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				   pcpu_fc_alloc, pcpu_fc_free,
+				   pcpu4k_populate_pte);
 }
 
 /* for explicit first chunk allocator selection */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 83bff053bd1c..41b5bfab4195 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,18 +59,26 @@
 extern void *pcpu_base_addr;
 
 typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
-typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
+typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
+typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, ssize_t unit_size,
 				void *base_addr,
-				pcpu_populate_pte_fn_t populate_pte_fn);
+				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
 
+extern ssize_t __init pcpu_4k_first_chunk(
+				size_t static_size, size_t reserved_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index fc6babe6e554..27b0f40a3ea8 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1037,7 +1037,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				     size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, ssize_t unit_size,
 				     void *base_addr,
-				     pcpu_populate_pte_fn_t populate_pte_fn)
+				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
@@ -1270,6 +1270,89 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      pcpue_unit_size, pcpue_ptr, NULL);
 }
 
+/*
+ * 4k page first chunk setup helper.
+ */
+static struct page **pcpu4k_pages __initdata;
+static int pcpu4k_nr_static_pages __initdata;
+
+static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
+{
+	if (pageno < pcpu4k_nr_static_pages)
+		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
+	return NULL;
+}
+
+/**
+ * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
+ * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
+ * @populate_pte_fn: function to populate pte
+ *
+ * This is a helper to ease setting up embedded first percpu chunk and
+ * can be called where pcpu_setup_first_chunk() is expected.
+ *
+ * This is the basic allocator.  Static percpu area is allocated
+ * page-by-page into vmalloc area.
+ *
+ * RETURNS:
+ * The determined pcpu_unit_size which can be used to initialize
+ * percpu access on success, -errno on failure.
+ */
+ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
+				   pcpu_fc_alloc_fn_t alloc_fn,
+				   pcpu_fc_free_fn_t free_fn,
+				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
+{
+	size_t pages_size;
+	unsigned int cpu;
+	int i, j;
+	ssize_t ret;
+
+	pcpu4k_nr_static_pages = PFN_UP(static_size);
+
+	/* unaligned allocations can't be freed, round up to page size */
+	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() *
+			       sizeof(pcpu4k_pages[0]));
+	pcpu4k_pages = alloc_bootmem(pages_size);
+
+	/* allocate and copy */
+	j = 0;
+	for_each_possible_cpu(cpu)
+		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
+			void *ptr;
+
+			ptr = alloc_fn(cpu, PAGE_SIZE);
+			if (!ptr) {
+				pr_warning("PERCPU: failed to allocate "
+					   "4k page for cpu%u\n", cpu);
+				goto enomem;
+			}
+
+			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
+			pcpu4k_pages[j++] = virt_to_page(ptr);
+		}
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
+		pcpu4k_nr_static_pages, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
+				     reserved_size, -1,
+				     -1, NULL, populate_pte_fn);
+	goto out_free_ar;
+
+enomem:
+	while (--j >= 0)
+		free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE);
+	ret = -ENOMEM;
+out_free_ar:
+	free_bootmem(__pa(pcpu4k_pages), pages_size);
+	return ret;
+}
+
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From 8c4bfc6e8801616ab2e01c38140b2159b388d2ff Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: x86,percpu: generalize lpage first chunk allocator

Generalize and move x86 setup_pcpu_lpage() into
pcpu_lpage_first_chunk().  setup_pcpu_lpage() now is a simple wrapper
around the generalized version.  Other than taking size parameters and
using arch supplied callbacks to allocate/free/map memory,
pcpu_lpage_first_chunk() is identical to the original implementation.

This simplifies arch code and will help converting more archs to
dynamic percpu allocator.

While at it, factor out pcpu_calc_fc_sizes() which is common to
pcpu_embed_first_chunk() and pcpu_lpage_first_chunk().

[ Impact: code reorganization and generalization ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/percpu.h  |   9 --
 arch/x86/kernel/setup_percpu.c | 169 +++------------------------------
 arch/x86/mm/pageattr.c         |   1 +
 include/linux/percpu.h         |  27 ++++++
 mm/percpu.c                    | 209 ++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 244 insertions(+), 171 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 103f1ddb0d85..a18c038a3079 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -156,15 +156,6 @@ do {							\
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ab896b31e80b..4f2e0ac9130b 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -137,44 +137,21 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
 }
 
 /*
- * Large page remap allocator
- *
- * This allocator uses PMD page as unit.  A PMD page is allocated for
- * each cpu and each is remapped into vmalloc area using PMD mapping.
- * As PMD page is quite large, only part of it is used for the first
- * chunk.  Unused part is returned to the bootmem allocator.
- *
- * So, the PMD pages are mapped twice - once to the physical mapping
- * and to the vmalloc area for the first percpu chunk.  The double
- * mapping does add one more PMD TLB entry pressure but still is much
- * better than only using 4k mappings while still being NUMA friendly.
+ * Large page remapping allocator
  */
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-struct pcpul_ent {
-	unsigned int	cpu;
-	void		*ptr;
-};
-
-static size_t pcpul_size;
-static struct pcpul_ent *pcpul_map;
-static struct vm_struct pcpul_vm;
-
-static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
+static void __init pcpul_map(void *ptr, size_t size, void *addr)
 {
-	size_t off = (size_t)pageno << PAGE_SHIFT;
+	pmd_t *pmd, pmd_v;
 
-	if (off >= pcpul_size)
-		return NULL;
-
-	return virt_to_page(pcpul_map[cpu].ptr + off);
+	pmd = populate_extra_pmd((unsigned long)addr);
+	pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE);
+	set_pmd(pmd, pmd_v);
 }
 
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 {
-	size_t map_size, dyn_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
+	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
@@ -198,134 +175,10 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		return -EINVAL;
 	}
 
-	/*
-	 * Currently supports only single page.  Supporting multiple
-	 * pages won't be too difficult if it ever becomes necessary.
-	 */
-	pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
-			       PERCPU_DYNAMIC_RESERVE);
-	if (pcpul_size > PMD_SIZE) {
-		pr_warning("PERCPU: static data is larger than large page, "
-			   "can't use large page\n");
-		return -EINVAL;
-	}
-	dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
-
-	/* allocate pointer array and alloc large pages */
-	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
-	pcpul_map = alloc_bootmem(map_size);
-
-	for_each_possible_cpu(cpu) {
-		pcpul_map[cpu].cpu = cpu;
-		pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
-							PMD_SIZE);
-		if (!pcpul_map[cpu].ptr) {
-			pr_warning("PERCPU: failed to allocate large page "
-				   "for cpu%u\n", cpu);
-			goto enomem;
-		}
-
-		/*
-		 * Only use pcpul_size bytes and give back the rest.
-		 *
-		 * Ingo: The 2MB up-rounding bootmem is needed to make
-		 * sure the partial 2MB page is still fully RAM - it's
-		 * not well-specified to have a PAT-incompatible area
-		 * (unmapped RAM, device memory, etc.) in that hole.
-		 */
-		free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
-			     PMD_SIZE - pcpul_size);
-
-		memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
-	}
-
-	/* allocate address and map */
-	pcpul_vm.flags = VM_ALLOC;
-	pcpul_vm.size = num_possible_cpus() * PMD_SIZE;
-	vm_area_register_early(&pcpul_vm, PMD_SIZE);
-
-	for_each_possible_cpu(cpu) {
-		pmd_t *pmd, pmd_v;
-
-		pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
-					 cpu * PMD_SIZE);
-		pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
-				PAGE_KERNEL_LARGE);
-		set_pmd(pmd, pmd_v);
-	}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Remapped at %p with large pages, static data "
-		"%zu bytes\n", pcpul_vm.addr, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
-				     PMD_SIZE, pcpul_vm.addr, NULL);
-
-	/* sort pcpul_map array for pcpu_lpage_remapped() */
-	for (i = 0; i < num_possible_cpus() - 1; i++)
-		for (j = i + 1; j < num_possible_cpus(); j++)
-			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
-				struct pcpul_ent tmp = pcpul_map[i];
-				pcpul_map[i] = pcpul_map[j];
-				pcpul_map[j] = tmp;
-			}
-
-	return ret;
-
-enomem:
-	for_each_possible_cpu(cpu)
-		if (pcpul_map[cpu].ptr)
-			free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
-	free_bootmem(__pa(pcpul_map), map_size);
-	return -ENOMEM;
-}
-
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu PMD
- * mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used PMD
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
-{
-	void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
-	unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
-	int left = 0, right = num_possible_cpus() - 1;
-	int pos;
-
-	/* pcpul in use at all? */
-	if (!pcpul_map)
-		return NULL;
-
-	/* okay, perform binary search */
-	while (left <= right) {
-		pos = (left + right) / 2;
-
-		if (pcpul_map[pos].ptr < pmd_addr)
-			left = pos + 1;
-		else if (pcpul_map[pos].ptr > pmd_addr)
-			right = pos - 1;
-		else {
-			/* it shouldn't be in the area for the first chunk */
-			WARN_ON(offset < pcpul_size);
-
-			return pcpul_vm.addr +
-				pcpul_map[pos].cpu * PMD_SIZE + offset;
-		}
-	}
-
-	return NULL;
+	return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
+				      PMD_SIZE,
+				      pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 }
 #else
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 1b734d7a8966..c106f7852424 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
+#include <linux/percpu.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 41b5bfab4195..9f6bfd7d4b92 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -62,6 +62,7 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
@@ -79,6 +80,32 @@ extern ssize_t __init pcpu_4k_first_chunk(
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+extern ssize_t __init pcpu_lpage_first_chunk(
+				size_t static_size, size_t reserved_size,
+				ssize_t dyn_size, size_t lpage_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_map_fn_t map_fn);
+
+extern void *pcpu_lpage_remapped(void *kaddr);
+#else
+static inline ssize_t __init pcpu_lpage_first_chunk(
+				size_t static_size, size_t reserved_size,
+				ssize_t dyn_size, size_t lpage_size,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn,
+				pcpu_fc_map_fn_t map_fn)
+{
+	return -EINVAL;
+}
+
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+	return NULL;
+}
+#endif
+
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index f3fe7bc7378f..17db527ee2e2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1190,6 +1190,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	return pcpu_unit_size;
 }
 
+static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
+				 ssize_t *dyn_sizep)
+{
+	size_t size_sum;
+
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
+	if (*dyn_sizep != 0)
+		*dyn_sizep = size_sum - static_size - reserved_size;
+
+	return size_sum;
+}
+
 /*
  * Embedding first chunk setup helper.
  */
@@ -1241,10 +1254,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	unsigned int cpu;
 
 	/* determine parameters and allocate */
-	pcpue_size = PFN_ALIGN(static_size + reserved_size +
-			       (dyn_size >= 0 ? dyn_size : 0));
-	if (dyn_size != 0)
-		dyn_size = pcpue_size - static_size - reserved_size;
+	pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 
 	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
 	chunk_size = pcpue_unit_size * num_possible_cpus();
@@ -1390,6 +1400,197 @@ out_free_ar:
 	return ret;
 }
 
+/*
+ * Large page remapping first chunk setup helper
+ */
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+struct pcpul_ent {
+	unsigned int	cpu;
+	void		*ptr;
+};
+
+static size_t pcpul_size;
+static size_t pcpul_unit_size;
+static struct pcpul_ent *pcpul_map;
+static struct vm_struct pcpul_vm;
+
+static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
+{
+	size_t off = (size_t)pageno << PAGE_SHIFT;
+
+	if (off >= pcpul_size)
+		return NULL;
+
+	return virt_to_page(pcpul_map[cpu].ptr + off);
+}
+
+/**
+ * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @lpage_size: the size of a large page
+ * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
+ * @free_fn: function to free percpu memory, @size <= lpage_size
+ * @map_fn: function to map percpu lpage, always called with lpage_size
+ *
+ * This allocator uses large page as unit.  A large page is allocated
+ * for each cpu and each is remapped into vmalloc area using large
+ * page mapping.  As large page can be quite large, only part of it is
+ * used for the first chunk.  Unused part is returned to the bootmem
+ * allocator.
+ *
+ * So, the large pages are mapped twice - once to the physical mapping
+ * and to the vmalloc area for the first percpu chunk.  The double
+ * mapping does add one more large TLB entry pressure but still is
+ * much better than only using 4k mappings while still being NUMA
+ * friendly.
+ *
+ * RETURNS:
+ * The determined pcpu_unit_size which can be used to initialize
+ * percpu access on success, -errno on failure.
+ */
+ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
+				      ssize_t dyn_size, size_t lpage_size,
+				      pcpu_fc_alloc_fn_t alloc_fn,
+				      pcpu_fc_free_fn_t free_fn,
+				      pcpu_fc_map_fn_t map_fn)
+{
+	size_t size_sum;
+	size_t map_size;
+	unsigned int cpu;
+	int i, j;
+	ssize_t ret;
+
+	/*
+	 * Currently supports only single page.  Supporting multiple
+	 * pages won't be too difficult if it ever becomes necessary.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+
+	pcpul_unit_size = lpage_size;
+	pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	if (pcpul_size > pcpul_unit_size) {
+		pr_warning("PERCPU: static data is larger than large page, "
+			   "can't use large page\n");
+		return -EINVAL;
+	}
+
+	/* allocate pointer array and alloc large pages */
+	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+	pcpul_map = alloc_bootmem(map_size);
+
+	for_each_possible_cpu(cpu) {
+		void *ptr;
+
+		ptr = alloc_fn(cpu, lpage_size);
+		if (!ptr) {
+			pr_warning("PERCPU: failed to allocate large page "
+				   "for cpu%u\n", cpu);
+			goto enomem;
+		}
+
+		/*
+		 * Only use pcpul_size bytes and give back the rest.
+		 *
+		 * Ingo: The lpage_size up-rounding bootmem is needed
+		 * to make sure the partial lpage is still fully RAM -
+		 * it's not well-specified to have a incompatible area
+		 * (unmapped RAM, device memory, etc.) in that hole.
+		 */
+		free_fn(ptr + pcpul_size, lpage_size - pcpul_size);
+
+		pcpul_map[cpu].cpu = cpu;
+		pcpul_map[cpu].ptr = ptr;
+
+		memcpy(ptr, __per_cpu_load, static_size);
+	}
+
+	/* allocate address and map */
+	pcpul_vm.flags = VM_ALLOC;
+	pcpul_vm.size = num_possible_cpus() * pcpul_unit_size;
+	vm_area_register_early(&pcpul_vm, pcpul_unit_size);
+
+	for_each_possible_cpu(cpu)
+		map_fn(pcpul_map[cpu].ptr, pcpul_unit_size,
+		       pcpul_vm.addr + cpu * pcpul_unit_size);
+
+	/* we're ready, commit */
+	pr_info("PERCPU: Remapped at %p with large pages, static data "
+		"%zu bytes\n", pcpul_vm.addr, static_size);
+
+	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
+				     reserved_size, dyn_size, pcpul_unit_size,
+				     pcpul_vm.addr, NULL);
+
+	/* sort pcpul_map array for pcpu_lpage_remapped() */
+	for (i = 0; i < num_possible_cpus() - 1; i++)
+		for (j = i + 1; j < num_possible_cpus(); j++)
+			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
+				struct pcpul_ent tmp = pcpul_map[i];
+				pcpul_map[i] = pcpul_map[j];
+				pcpul_map[j] = tmp;
+			}
+
+	return ret;
+
+enomem:
+	for_each_possible_cpu(cpu)
+		if (pcpul_map[cpu].ptr)
+			free_fn(pcpul_map[cpu].ptr, pcpul_size);
+	free_bootmem(__pa(pcpul_map), map_size);
+	return -ENOMEM;
+}
+
+/**
+ * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
+ * @kaddr: the kernel address in question
+ *
+ * Determine whether @kaddr falls in the pcpul recycled area.  This is
+ * used by pageattr to detect VM aliases and break up the pcpu large
+ * page mapping such that the same physical page is not mapped under
+ * different attributes.
+ *
+ * The recycled area is always at the tail of a partially used large
+ * page.
+ *
+ * RETURNS:
+ * Address of corresponding remapped pcpu address if match is found;
+ * otherwise, NULL.
+ */
+void *pcpu_lpage_remapped(void *kaddr)
+{
+	unsigned long unit_mask = pcpul_unit_size - 1;
+	void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask);
+	unsigned long offset = (unsigned long)kaddr & unit_mask;
+	int left = 0, right = num_possible_cpus() - 1;
+	int pos;
+
+	/* pcpul in use at all? */
+	if (!pcpul_map)
+		return NULL;
+
+	/* okay, perform binary search */
+	while (left <= right) {
+		pos = (left + right) / 2;
+
+		if (pcpul_map[pos].ptr < lpage_addr)
+			left = pos + 1;
+		else if (pcpul_map[pos].ptr > lpage_addr)
+			right = pos - 1;
+		else {
+			/* it shouldn't be in the area for the first chunk */
+			WARN_ON(offset < pcpul_size);
+
+			return pcpul_vm.addr +
+				pcpul_map[pos].cpu * pcpul_unit_size + offset;
+		}
+	}
+
+	return NULL;
+}
+#endif
+
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From 38a6be525460f52ac6f2de1c3f73c5615a8853cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:10:59 +0900
Subject: percpu: simplify pcpu_setup_first_chunk()

Now that all first chunk allocator helpers allocate and map the first
chunk themselves, there's no need to have optional default alloc/map
in pcpu_setup_first_chunk().  Drop @populate_pte_fn and only leave
@dyn_size optional and make all other params mandatory.

This makes it much easier to follow what pcpu_setup_first_chunk() is
doing and what actual differences tweaking each parameter results in.

[ Impact: drop unused code path ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/kernel/smp_64.c |   2 +-
 include/linux/percpu.h     |   5 +--
 mm/percpu.c                | 104 +++++++++++++--------------------------------
 3 files changed, 33 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index fa44eaf8d897..ccad7b20ae75 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1528,7 +1528,7 @@ void __init setup_per_cpu_areas(void)
 
 	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr, NULL);
+						PCPU_CHUNK_SIZE, vm.addr);
 
 	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f6bfd7d4b92..ec64357e1762 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -66,9 +66,8 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, ssize_t unit_size,
-				void *base_addr,
-				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+				ssize_t dyn_size, size_t unit_size,
+				void *base_addr);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 17db527ee2e2..21d938a10662 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -983,24 +983,22 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @get_page_fn: callback to fetch page pointer
  * @static_size: the size of static percpu area in bytes
- * @reserved_size: the size of reserved percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes, 0 for none
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
- * @base_addr: mapped address, NULL for auto
- * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
+ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
+ * @base_addr: mapped address
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
- * setup path.  The first two parameters are mandatory.  The rest are
- * optional.
+ * setup path.
  *
  * @get_page_fn() should return pointer to percpu page given cpu
  * number and page number.  It should at least return enough pages to
  * cover the static area.  The returned pages for static area should
- * have been initialized with valid data.  If @unit_size is specified,
- * it can also return pages after the static area.  NULL return
- * indicates end of pages for the cpu.  Note that @get_page_fn() must
- * return the same number of pages for all cpus.
+ * have been initialized with valid data.  It can also return pages
+ * after the static area.  NULL return indicates end of pages for the
+ * cpu.  Note that @get_page_fn() must return the same number of pages
+ * for all cpus.
  *
  * @reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
@@ -1015,17 +1013,12 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * non-negative value makes percpu leave alone the area beyond
  * @static_size + @reserved_size + @dyn_size.
  *
- * @unit_size, if non-negative, specifies unit size and must be
- * aligned to PAGE_SIZE and equal to or larger than @static_size +
- * @reserved_size + if non-negative, @dyn_size.
- *
- * Non-null @base_addr means that the caller already allocated virtual
- * region for the first chunk and mapped it.  percpu must not mess
- * with the chunk.  Note that @base_addr with 0 @unit_size or non-NULL
- * @populate_pte_fn doesn't make any sense.
+ * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
+ * equal to or larger than @static_size + @reserved_size + if
+ * non-negative, @dyn_size.
  *
- * @populate_pte_fn is used to populate the pagetable.  NULL means the
- * caller already populated the pagetable.
+ * The caller should have mapped the first chunk at @base_addr and
+ * copied static data to each unit.
  *
  * If the first chunk ends up with both reserved and dynamic areas, it
  * is served by two chunks - one to serve the core static and reserved
@@ -1040,9 +1033,8 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 				     size_t static_size, size_t reserved_size,
-				     ssize_t dyn_size, ssize_t unit_size,
-				     void *base_addr,
-				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
+				     ssize_t dyn_size, size_t unit_size,
+				     void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
@@ -1050,27 +1042,18 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
 	unsigned int cpu;
-	int nr_pages;
-	int err, i;
+	int i, nr_pages;
 
 	/* santiy checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
 	BUG_ON(!static_size);
-	if (unit_size >= 0) {
-		BUG_ON(unit_size < size_sum);
-		BUG_ON(unit_size & ~PAGE_MASK);
-		BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
-	} else
-		BUG_ON(base_addr);
-	BUG_ON(base_addr && populate_pte_fn);
-
-	if (unit_size >= 0)
-		pcpu_unit_pages = unit_size >> PAGE_SHIFT;
-	else
-		pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
-					PFN_UP(size_sum));
+	BUG_ON(!base_addr);
+	BUG_ON(unit_size < size_sum);
+	BUG_ON(unit_size & ~PAGE_MASK);
+	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
 
+	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
@@ -1079,6 +1062,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	if (dyn_size < 0)
 		dyn_size = pcpu_unit_size - static_size - reserved_size;
 
+	first_vm.flags = VM_ALLOC;
+	first_vm.size = pcpu_chunk_size;
+	first_vm.addr = base_addr;
+
 	/*
 	 * Allocate chunk slots.  The additional last slot is for
 	 * empty chunks.
@@ -1101,6 +1088,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
 	schunk->page = schunk->page_ar;
+	schunk->immutable = true;
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
@@ -1124,31 +1112,13 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
 		dchunk->page = schunk->page_ar;	/* share page map with schunk */
+		dchunk->immutable = true;
 
 		dchunk->contig_hint = dchunk->free_size = dyn_size;
 		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
 		dchunk->map[dchunk->map_used++] = dchunk->free_size;
 	}
 
-	/* allocate vm address */
-	first_vm.flags = VM_ALLOC;
-	first_vm.size = pcpu_chunk_size;
-
-	if (!base_addr)
-		vm_area_register_early(&first_vm, PAGE_SIZE);
-	else {
-		/*
-		 * Pages already mapped.  No need to remap into
-		 * vmalloc area.  In this case the first chunks can't
-		 * be mapped or unmapped by percpu and are marked
-		 * immutable.
-		 */
-		first_vm.addr = base_addr;
-		schunk->immutable = true;
-		if (dchunk)
-			dchunk->immutable = true;
-	}
-
 	/* assign pages */
 	nr_pages = -1;
 	for_each_possible_cpu(cpu) {
@@ -1168,19 +1138,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 			BUG_ON(nr_pages != i);
 	}
 
-	/* map them */
-	if (populate_pte_fn) {
-		for_each_possible_cpu(cpu)
-			for (i = 0; i < nr_pages; i++)
-				populate_pte_fn(pcpu_chunk_addr(schunk,
-								cpu, i));
-
-		err = pcpu_map(schunk, 0, nr_pages);
-		if (err)
-			panic("failed to setup static percpu area, err=%d\n",
-			      err);
-	}
-
 	/* link the first chunk in */
 	pcpu_first_chunk = dchunk ?: schunk;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -1282,7 +1239,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 
 	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
 				      reserved_size, dyn_size,
-				      pcpue_unit_size, pcpue_ptr, NULL);
+				      pcpue_unit_size, pcpue_ptr);
 }
 
 /*
@@ -1387,8 +1344,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
 				     reserved_size, -1,
-				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr,
-				     NULL);
+				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr);
 	goto out_free_ar;
 
 enomem:
@@ -1521,7 +1477,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 
 	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
 				     reserved_size, dyn_size, pcpul_unit_size,
-				     pcpul_vm.addr, NULL);
+				     pcpul_vm.addr);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From ce3141a277ff6cc37e51008b8888dc2cb7456ef1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: drop pcpu_chunk->page[]

percpu core doesn't need to tack all the allocated pages.  It needs to
know whether certain pages are populated and a way to reverse map
address to page when freeing.  This patch drops pcpu_chunk->page[] and
use populated bitmap and vmalloc_to_page() lookup instead.  Using
vmalloc_to_page() exclusively is also possible but complicates first
chunk handling, inflates cache footprint and prevents non-standard
memory allocation for percpu memory.

pcpu_chunk->page[] was used to track each page's allocation and
allowed asymmetric population which happens during failure path;
however, with single bitmap for all units, this is no longer possible.
Bite the bullet and rewrite (de)populate functions so that things are
done in clearly separated steps such that asymmetric population
doesn't happen.  This makes the (de)population process much more
modular and will also ease implementing non-standard memory usage in
the future (e.g. large pages).

This makes @get_page_fn parameter to pcpu_setup_first_chunk()
unnecessary.  The parameter is dropped and all first chunk helpers are
updated accordingly.  Please note that despite the volume most changes
to first chunk helpers are symbol renames for variables which don't
need to be referenced outside of the helper anymore.

This change reduces memory usage and cache footprint of pcpu_chunk.
Now only #unit_pages bits are necessary per chunk.

[ Impact: reduced memory usage and cache footprint for bookkeeping ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c |  42 ++--
 include/linux/percpu.h     |   3 +-
 mm/percpu.c                | 604 ++++++++++++++++++++++++++++-----------------
 3 files changed, 400 insertions(+), 249 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index ccad7b20ae75..f2f22ee97a7a 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1415,19 +1415,6 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 #endif
 }
 
-static size_t pcpur_size __initdata;
-static void **pcpur_ptrs __initdata;
-
-static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpur_size)
-		return NULL;
-
-	return virt_to_page(pcpur_ptrs[cpu] + off);
-}
-
 #define PCPU_CHUNK_SIZE (4UL * 1024UL * 1024UL)
 
 static void __init pcpu_map_range(unsigned long start, unsigned long end,
@@ -1491,25 +1478,26 @@ void __init setup_per_cpu_areas(void)
 	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
 	static struct vm_struct vm;
 	unsigned long delta, cpu;
-	size_t pcpu_unit_size;
+	size_t size_sum, pcpu_unit_size;
 	size_t ptrs_size;
+	void **ptrs;
 
-	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
-			       PERCPU_DYNAMIC_RESERVE);
-	dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE;
+	size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+			     PERCPU_DYNAMIC_RESERVE);
+	dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE;
 
 
-	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
-	pcpur_ptrs = alloc_bootmem(ptrs_size);
+	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(ptrs[0]));
+	ptrs = alloc_bootmem(ptrs_size);
 
 	for_each_possible_cpu(cpu) {
-		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
-						     PCPU_CHUNK_SIZE);
+		ptrs[cpu] = pcpu_alloc_bootmem(cpu, PCPU_CHUNK_SIZE,
+					       PCPU_CHUNK_SIZE);
 
-		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
-			     PCPU_CHUNK_SIZE - pcpur_size);
+		free_bootmem(__pa(ptrs[cpu] + size_sum),
+			     PCPU_CHUNK_SIZE - size_sum);
 
-		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
+		memcpy(ptrs[cpu], __per_cpu_load, static_size);
 	}
 
 	/* allocate address and map */
@@ -1523,14 +1511,14 @@ void __init setup_per_cpu_areas(void)
 
 		start += cpu * PCPU_CHUNK_SIZE;
 		end = start + PCPU_CHUNK_SIZE;
-		pcpu_map_range(start, end, virt_to_page(pcpur_ptrs[cpu]));
+		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(pcpur_get_page, static_size,
+	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
 						PCPU_CHUNK_SIZE, vm.addr);
 
-	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
+	free_bootmem(__pa(ptrs), ptrs_size);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ec64357e1762..63c8b7a23e66 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -58,13 +58,12 @@
 
 extern void *pcpu_base_addr;
 
-typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
-extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
+extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, size_t unit_size,
 				void *base_addr);
diff --git a/mm/percpu.c b/mm/percpu.c
index 639fce4d2caf..21756814d99f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -94,8 +94,7 @@ struct pcpu_chunk {
 	int			map_alloc;	/* # of map entries allocated */
 	int			*map;		/* allocation map */
 	bool			immutable;	/* no [de]population allowed */
-	struct page		**page;		/* points to page array */
-	struct page		*page_ar[];	/* #cpus * UNIT_PAGES */
+	unsigned long		populated[];	/* populated bitmap */
 };
 
 static int pcpu_unit_pages __read_mostly;
@@ -129,9 +128,9 @@ static int pcpu_reserved_chunk_limit;
  * Synchronization rules.
  *
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
- * protects allocation/reclaim paths, chunks and chunk->page arrays.
- * The latter is a spinlock and protects the index data structures -
- * chunk slots, chunks and area maps in chunks.
+ * protects allocation/reclaim paths, chunks, populated bitmap and
+ * vmalloc mapping.  The latter is a spinlock and protects the index
+ * data structures - chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -188,16 +187,13 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 		(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
 }
 
-static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk,
-				      unsigned int cpu, int page_idx)
+static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
+				    unsigned int cpu, int page_idx)
 {
-	return &chunk->page[pcpu_page_idx(cpu, page_idx)];
-}
+	/* must not be used on pre-mapped chunk */
+	WARN_ON(chunk->immutable);
 
-static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
-				     int page_idx)
-{
-	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
+	return vmalloc_to_page((void *)pcpu_chunk_addr(chunk, cpu, page_idx));
 }
 
 /* set the pointer to a chunk in a page struct */
@@ -212,6 +208,34 @@ static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
 	return (struct pcpu_chunk *)page->index;
 }
 
+static void pcpu_next_unpop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
+{
+	*rs = find_next_zero_bit(chunk->populated, end, *rs);
+	*re = find_next_bit(chunk->populated, end, *rs + 1);
+}
+
+static void pcpu_next_pop(struct pcpu_chunk *chunk, int *rs, int *re, int end)
+{
+	*rs = find_next_bit(chunk->populated, end, *rs);
+	*re = find_next_zero_bit(chunk->populated, end, *rs + 1);
+}
+
+/*
+ * (Un)populated page region iterators.  Iterate over (un)populated
+ * page regions betwen @start and @end in @chunk.  @rs and @re should
+ * be integer variables and will be set to start and end page index of
+ * the current region.
+ */
+#define pcpu_for_each_unpop_region(chunk, rs, re, start, end)		    \
+	for ((rs) = (start), pcpu_next_unpop((chunk), &(rs), &(re), (end)); \
+	     (rs) < (re);						    \
+	     (rs) = (re) + 1, pcpu_next_unpop((chunk), &(rs), &(re), (end)))
+
+#define pcpu_for_each_pop_region(chunk, rs, re, start, end)		    \
+	for ((rs) = (start), pcpu_next_pop((chunk), &(rs), &(re), (end));   \
+	     (rs) < (re);						    \
+	     (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
+
 /**
  * pcpu_mem_alloc - allocate memory
  * @size: bytes to allocate
@@ -545,42 +569,197 @@ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
 }
 
 /**
- * pcpu_unmap - unmap pages out of a pcpu_chunk
+ * pcpu_get_pages_and_bitmap - get temp pages array and bitmap
+ * @chunk: chunk of interest
+ * @bitmapp: output parameter for bitmap
+ * @may_alloc: may allocate the array
+ *
+ * Returns pointer to array of pointers to struct page and bitmap,
+ * both of which can be indexed with pcpu_page_idx().  The returned
+ * array is cleared to zero and *@bitmapp is copied from
+ * @chunk->populated.  Note that there is only one array and bitmap
+ * and access exclusion is the caller's responsibility.
+ *
+ * CONTEXT:
+ * pcpu_alloc_mutex and does GFP_KERNEL allocation if @may_alloc.
+ * Otherwise, don't care.
+ *
+ * RETURNS:
+ * Pointer to temp pages array on success, NULL on failure.
+ */
+static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
+					       unsigned long **bitmapp,
+					       bool may_alloc)
+{
+	static struct page **pages;
+	static unsigned long *bitmap;
+	size_t pages_size = num_possible_cpus() * pcpu_unit_pages *
+			    sizeof(pages[0]);
+	size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
+			     sizeof(unsigned long);
+
+	if (!pages || !bitmap) {
+		if (may_alloc && !pages)
+			pages = pcpu_mem_alloc(pages_size);
+		if (may_alloc && !bitmap)
+			bitmap = pcpu_mem_alloc(bitmap_size);
+		if (!pages || !bitmap)
+			return NULL;
+	}
+
+	memset(pages, 0, pages_size);
+	bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
+
+	*bitmapp = bitmap;
+	return pages;
+}
+
+/**
+ * pcpu_free_pages - free pages which were allocated for @chunk
+ * @chunk: chunk pages were allocated for
+ * @pages: array of pages to be freed, indexed by pcpu_page_idx()
+ * @populated: populated bitmap
+ * @page_start: page index of the first page to be freed
+ * @page_end: page index of the last page to be freed + 1
+ *
+ * Free pages [@page_start and @page_end) in @pages for all units.
+ * The pages were allocated for @chunk.
+ */
+static void pcpu_free_pages(struct pcpu_chunk *chunk,
+			    struct page **pages, unsigned long *populated,
+			    int page_start, int page_end)
+{
+	unsigned int cpu;
+	int i;
+
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page *page = pages[pcpu_page_idx(cpu, i)];
+
+			if (page)
+				__free_page(page);
+		}
+	}
+}
+
+/**
+ * pcpu_alloc_pages - allocates pages for @chunk
+ * @chunk: target chunk
+ * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
+ * @populated: populated bitmap
+ * @page_start: page index of the first page to be allocated
+ * @page_end: page index of the last page to be allocated + 1
+ *
+ * Allocate pages [@page_start,@page_end) into @pages for all units.
+ * The allocation is for @chunk.  Percpu core doesn't care about the
+ * content of @pages and will pass it verbatim to pcpu_map_pages().
+ */
+static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
+			    struct page **pages, unsigned long *populated,
+			    int page_start, int page_end)
+{
+	const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
+	unsigned int cpu;
+	int i;
+
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
+
+			*pagep = alloc_pages_node(cpu_to_node(cpu), gfp, 0);
+			if (!*pagep) {
+				pcpu_free_pages(chunk, pages, populated,
+						page_start, page_end);
+				return -ENOMEM;
+			}
+		}
+	}
+	return 0;
+}
+
+/**
+ * pcpu_pre_unmap_flush - flush cache prior to unmapping
+ * @chunk: chunk the regions to be flushed belongs to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages in [@page_start,@page_end) of @chunk are about to be
+ * unmapped.  Flush cache.  As each flushing trial can be very
+ * expensive, issue flush on the whole region at once rather than
+ * doing it for each cpu.  This could be an overkill but is more
+ * scalable.
+ */
+static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
+				 int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
+	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
+			   pcpu_chunk_addr(chunk, last, page_end));
+}
+
+static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
+{
+	unmap_kernel_range_noflush(addr, nr_pages << PAGE_SHIFT);
+}
+
+/**
+ * pcpu_unmap_pages - unmap pages out of a pcpu_chunk
  * @chunk: chunk of interest
+ * @pages: pages array which can be used to pass information to free
+ * @populated: populated bitmap
  * @page_start: page index of the first page to unmap
  * @page_end: page index of the last page to unmap + 1
- * @flush_tlb: whether to flush tlb or not
  *
  * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
- * If @flush is true, vcache is flushed before unmapping and tlb
- * after.
+ * Corresponding elements in @pages were cleared by the caller and can
+ * be used to carry information to pcpu_free_pages() which will be
+ * called after all unmaps are finished.  The caller should call
+ * proper pre/post flush functions.
  */
-static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
-		       bool flush_tlb)
+static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
+			     struct page **pages, unsigned long *populated,
+			     int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
 	unsigned int cpu;
+	int i;
 
-	/* unmap must not be done on immutable chunk */
-	WARN_ON(chunk->immutable);
+	for_each_possible_cpu(cpu) {
+		for (i = page_start; i < page_end; i++) {
+			struct page *page;
 
-	/*
-	 * Each flushing trial can be very expensive, issue flush on
-	 * the whole region at once rather than doing it for each cpu.
-	 * This could be an overkill but is more scalable.
-	 */
-	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
-			   pcpu_chunk_addr(chunk, last, page_end));
+			page = pcpu_chunk_page(chunk, cpu, i);
+			WARN_ON(!page);
+			pages[pcpu_page_idx(cpu, i)] = page;
+		}
+		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, cpu, page_start),
+				   page_end - page_start);
+	}
 
-	for_each_possible_cpu(cpu)
-		unmap_kernel_range_noflush(
-				pcpu_chunk_addr(chunk, cpu, page_start),
-				(page_end - page_start) << PAGE_SHIFT);
-
-	/* ditto as flush_cache_vunmap() */
-	if (flush_tlb)
-		flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
-				       pcpu_chunk_addr(chunk, last, page_end));
+	for (i = page_start; i < page_end; i++)
+		__clear_bit(i, populated);
+}
+
+/**
+ * pcpu_post_unmap_tlb_flush - flush TLB after unmapping
+ * @chunk: pcpu_chunk the regions to be flushed belong to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages [@page_start,@page_end) of @chunk have been unmapped.  Flush
+ * TLB for the regions.  This can be skipped if the area is to be
+ * returned to vmalloc as vmalloc will handle TLB flushing lazily.
+ *
+ * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
+ * for the whole region.
+ */
+static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
+				      int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
+	flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
+			       pcpu_chunk_addr(chunk, last, page_end));
 }
 
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -591,35 +770,76 @@ static int __pcpu_map_pages(unsigned long addr, struct page **pages,
 }
 
 /**
- * pcpu_map - map pages into a pcpu_chunk
+ * pcpu_map_pages - map pages into a pcpu_chunk
  * @chunk: chunk of interest
+ * @pages: pages array containing pages to be mapped
+ * @populated: populated bitmap
  * @page_start: page index of the first page to map
  * @page_end: page index of the last page to map + 1
  *
- * For each cpu, map pages [@page_start,@page_end) into @chunk.
- * vcache is flushed afterwards.
+ * For each cpu, map pages [@page_start,@page_end) into @chunk.  The
+ * caller is responsible for calling pcpu_post_map_flush() after all
+ * mappings are complete.
+ *
+ * This function is responsible for setting corresponding bits in
+ * @chunk->populated bitmap and whatever is necessary for reverse
+ * lookup (addr -> chunk).
  */
-static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
+static int pcpu_map_pages(struct pcpu_chunk *chunk,
+			  struct page **pages, unsigned long *populated,
+			  int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-	unsigned int cpu;
-	int err;
-
-	/* map must not be done on immutable chunk */
-	WARN_ON(chunk->immutable);
+	unsigned int cpu, tcpu;
+	int i, err;
 
 	for_each_possible_cpu(cpu) {
 		err = __pcpu_map_pages(pcpu_chunk_addr(chunk, cpu, page_start),
-				       pcpu_chunk_pagep(chunk, cpu, page_start),
+				       &pages[pcpu_page_idx(cpu, page_start)],
 				       page_end - page_start);
 		if (err < 0)
-			return err;
+			goto err;
 	}
 
+	/* mapping successful, link chunk and mark populated */
+	for (i = page_start; i < page_end; i++) {
+		for_each_possible_cpu(cpu)
+			pcpu_set_page_chunk(pages[pcpu_page_idx(cpu, i)],
+					    chunk);
+		__set_bit(i, populated);
+	}
+
+	return 0;
+
+err:
+	for_each_possible_cpu(tcpu) {
+		if (tcpu == cpu)
+			break;
+		__pcpu_unmap_pages(pcpu_chunk_addr(chunk, tcpu, page_start),
+				   page_end - page_start);
+	}
+	return err;
+}
+
+/**
+ * pcpu_post_map_flush - flush cache after mapping
+ * @chunk: pcpu_chunk the regions to be flushed belong to
+ * @page_start: page index of the first page to be flushed
+ * @page_end: page index of the last page to be flushed + 1
+ *
+ * Pages [@page_start,@page_end) of @chunk have been mapped.  Flush
+ * cache.
+ *
+ * As with pcpu_pre_unmap_flush(), TLB flushing also is done at once
+ * for the whole region.
+ */
+static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
+				int page_start, int page_end)
+{
+	unsigned int last = num_possible_cpus() - 1;
+
 	/* flush at once, please read comments in pcpu_unmap() */
 	flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start),
 			 pcpu_chunk_addr(chunk, last, page_end));
-	return 0;
 }
 
 /**
@@ -636,39 +856,45 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
  * CONTEXT:
  * pcpu_alloc_mutex.
  */
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
-				  bool flush)
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size)
 {
 	int page_start = PFN_DOWN(off);
 	int page_end = PFN_UP(off + size);
-	int unmap_start = -1;
-	int uninitialized_var(unmap_end);
-	unsigned int cpu;
-	int i;
+	struct page **pages;
+	unsigned long *populated;
+	int rs, re;
+
+	/* quick path, check whether it's empty already */
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		if (rs == page_start && re == page_end)
+			return;
+		break;
+	}
 
-	for (i = page_start; i < page_end; i++) {
-		for_each_possible_cpu(cpu) {
-			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
+	/* immutable chunks can't be depopulated */
+	WARN_ON(chunk->immutable);
 
-			if (!*pagep)
-				continue;
+	/*
+	 * If control reaches here, there must have been at least one
+	 * successful population attempt so the temp pages array must
+	 * be available now.
+	 */
+	pages = pcpu_get_pages_and_bitmap(chunk, &populated, false);
+	BUG_ON(!pages);
 
-			__free_page(*pagep);
+	/* unmap and free */
+	pcpu_pre_unmap_flush(chunk, page_start, page_end);
 
-			/*
-			 * If it's partial depopulation, it might get
-			 * populated or depopulated again.  Mark the
-			 * page gone.
-			 */
-			*pagep = NULL;
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
+		pcpu_unmap_pages(chunk, pages, populated, rs, re);
 
-			unmap_start = unmap_start < 0 ? i : unmap_start;
-			unmap_end = i + 1;
-		}
-	}
+	/* no need to flush tlb, vmalloc will handle it lazily */
+
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end)
+		pcpu_free_pages(chunk, pages, populated, rs, re);
 
-	if (unmap_start >= 0)
-		pcpu_unmap(chunk, unmap_start, unmap_end, flush);
+	/* commit new bitmap */
+	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
 }
 
 /**
@@ -685,50 +911,61 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
  */
 static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 {
-	const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
 	int page_start = PFN_DOWN(off);
 	int page_end = PFN_UP(off + size);
-	int map_start = -1;
-	int uninitialized_var(map_end);
+	int free_end = page_start, unmap_end = page_start;
+	struct page **pages;
+	unsigned long *populated;
 	unsigned int cpu;
-	int i;
+	int rs, re, rc;
 
-	for (i = page_start; i < page_end; i++) {
-		if (pcpu_chunk_page_occupied(chunk, i)) {
-			if (map_start >= 0) {
-				if (pcpu_map(chunk, map_start, map_end))
-					goto err;
-				map_start = -1;
-			}
-			continue;
-		}
+	/* quick path, check whether all pages are already there */
+	pcpu_for_each_pop_region(chunk, rs, re, page_start, page_end) {
+		if (rs == page_start && re == page_end)
+			goto clear;
+		break;
+	}
 
-		map_start = map_start < 0 ? i : map_start;
-		map_end = i + 1;
+	/* need to allocate and map pages, this chunk can't be immutable */
+	WARN_ON(chunk->immutable);
 
-		for_each_possible_cpu(cpu) {
-			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
+	pages = pcpu_get_pages_and_bitmap(chunk, &populated, true);
+	if (!pages)
+		return -ENOMEM;
 
-			*pagep = alloc_pages_node(cpu_to_node(cpu),
-						  alloc_mask, 0);
-			if (!*pagep)
-				goto err;
-			pcpu_set_page_chunk(*pagep, chunk);
-		}
+	/* alloc and map */
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		rc = pcpu_alloc_pages(chunk, pages, populated, rs, re);
+		if (rc)
+			goto err_free;
+		free_end = re;
 	}
 
-	if (map_start >= 0 && pcpu_map(chunk, map_start, map_end))
-		goto err;
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, page_end) {
+		rc = pcpu_map_pages(chunk, pages, populated, rs, re);
+		if (rc)
+			goto err_unmap;
+		unmap_end = re;
+	}
+	pcpu_post_map_flush(chunk, page_start, page_end);
 
+	/* commit new bitmap */
+	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
+clear:
 	for_each_possible_cpu(cpu)
 		memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
 		       size);
-
 	return 0;
-err:
-	/* likely under heavy memory pressure, give memory back */
-	pcpu_depopulate_chunk(chunk, off, size, true);
-	return -ENOMEM;
+
+err_unmap:
+	pcpu_pre_unmap_flush(chunk, page_start, unmap_end);
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, unmap_end)
+		pcpu_unmap_pages(chunk, pages, populated, rs, re);
+	pcpu_post_unmap_tlb_flush(chunk, page_start, unmap_end);
+err_free:
+	pcpu_for_each_unpop_region(chunk, rs, re, page_start, free_end)
+		pcpu_free_pages(chunk, pages, populated, rs, re);
+	return rc;
 }
 
 static void free_pcpu_chunk(struct pcpu_chunk *chunk)
@@ -752,7 +989,6 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void)
 	chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
 	chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
 	chunk->map[chunk->map_used++] = pcpu_unit_size;
-	chunk->page = chunk->page_ar;
 
 	chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL);
 	if (!chunk->vm) {
@@ -933,7 +1169,7 @@ static void pcpu_reclaim(struct work_struct *work)
 	mutex_unlock(&pcpu_alloc_mutex);
 
 	list_for_each_entry_safe(chunk, next, &todo, list) {
-		pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
+		pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size);
 		free_pcpu_chunk(chunk);
 	}
 }
@@ -981,7 +1217,6 @@ EXPORT_SYMBOL_GPL(free_percpu);
 
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
- * @get_page_fn: callback to fetch page pointer
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes, 0 for none
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
@@ -992,14 +1227,6 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * perpcu area.  This function is to be called from arch percpu area
  * setup path.
  *
- * @get_page_fn() should return pointer to percpu page given cpu
- * number and page number.  It should at least return enough pages to
- * cover the static area.  The returned pages for static area should
- * have been initialized with valid data.  It can also return pages
- * after the static area.  NULL return indicates end of pages for the
- * cpu.  Note that @get_page_fn() must return the same number of pages
- * for all cpus.
- *
  * @reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
  * the first chunk such that it's available only through reserved
@@ -1031,8 +1258,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access.
  */
-size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
-				     size_t static_size, size_t reserved_size,
+size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, size_t unit_size,
 				     void *base_addr)
 {
@@ -1041,8 +1267,7 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	size_t size_sum = static_size + reserved_size +
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
-	unsigned int cpu;
-	int i, nr_pages;
+	int i;
 
 	/* santiy checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
@@ -1056,8 +1281,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
-	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
-		+ num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
+	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
+		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
 	if (dyn_size < 0)
 		dyn_size = pcpu_unit_size - static_size - reserved_size;
@@ -1087,8 +1312,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 	schunk->vm = &first_vm;
 	schunk->map = smap;
 	schunk->map_alloc = ARRAY_SIZE(smap);
-	schunk->page = schunk->page_ar;
 	schunk->immutable = true;
+	bitmap_fill(schunk->populated, pcpu_unit_pages);
 
 	if (reserved_size) {
 		schunk->free_size = reserved_size;
@@ -1106,38 +1331,19 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
 	/* init dynamic chunk if necessary */
 	if (dyn_size) {
-		dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
+		dchunk = alloc_bootmem(pcpu_chunk_struct_size);
 		INIT_LIST_HEAD(&dchunk->list);
 		dchunk->vm = &first_vm;
 		dchunk->map = dmap;
 		dchunk->map_alloc = ARRAY_SIZE(dmap);
-		dchunk->page = schunk->page_ar;	/* share page map with schunk */
 		dchunk->immutable = true;
+		bitmap_fill(dchunk->populated, pcpu_unit_pages);
 
 		dchunk->contig_hint = dchunk->free_size = dyn_size;
 		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
 		dchunk->map[dchunk->map_used++] = dchunk->free_size;
 	}
 
-	/* assign pages */
-	nr_pages = -1;
-	for_each_possible_cpu(cpu) {
-		for (i = 0; i < pcpu_unit_pages; i++) {
-			struct page *page = get_page_fn(cpu, i);
-
-			if (!page)
-				break;
-			*pcpu_chunk_pagep(schunk, cpu, i) = page;
-		}
-
-		BUG_ON(i < PFN_UP(static_size));
-
-		if (nr_pages < 0)
-			nr_pages = i;
-		else
-			BUG_ON(nr_pages != i);
-	}
-
 	/* link the first chunk in */
 	pcpu_first_chunk = dchunk ?: schunk;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
@@ -1160,23 +1366,6 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
 	return size_sum;
 }
 
-/*
- * Embedding first chunk setup helper.
- */
-static void *pcpue_ptr __initdata;
-static size_t pcpue_size __initdata;
-static size_t pcpue_unit_size __initdata;
-
-static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpue_size)
-		return NULL;
-
-	return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
-}
-
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @static_size: the size of static percpu area in bytes
@@ -1207,18 +1396,19 @@ static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
 ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 				      ssize_t dyn_size)
 {
-	size_t chunk_size;
+	size_t size_sum, unit_size, chunk_size;
+	void *base;
 	unsigned int cpu;
 
 	/* determine parameters and allocate */
-	pcpue_size = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 
-	pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
-	chunk_size = pcpue_unit_size * num_possible_cpus();
+	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	chunk_size = unit_size * num_possible_cpus();
 
-	pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
-					    __pa(MAX_DMA_ADDRESS));
-	if (!pcpue_ptr) {
+	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
+				       __pa(MAX_DMA_ADDRESS));
+	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
 		return -ENOMEM;
@@ -1226,33 +1416,18 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 
 	/* return the leftover and copy */
 	for_each_possible_cpu(cpu) {
-		void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
+		void *ptr = base + cpu * unit_size;
 
-		free_bootmem(__pa(ptr + pcpue_size),
-			     pcpue_unit_size - pcpue_size);
+		free_bootmem(__pa(ptr + size_sum), unit_size - size_sum);
 		memcpy(ptr, __per_cpu_load, static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
-		pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
+		size_sum >> PAGE_SHIFT, base, static_size);
 
-	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
-				      reserved_size, dyn_size,
-				      pcpue_unit_size, pcpue_ptr);
-}
-
-/*
- * 4k page first chunk setup helper.
- */
-static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_unit_pages __initdata;
-
-static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
-{
-	if (pageno < pcpu4k_unit_pages)
-		return pcpu4k_pages[cpu * pcpu4k_unit_pages + pageno];
-	return NULL;
+	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				      unit_size, base);
 }
 
 /**
@@ -1279,23 +1454,25 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	int unit_pages;
 	size_t pages_size;
+	struct page **pages;
 	unsigned int cpu;
 	int i, j;
 	ssize_t ret;
 
-	pcpu4k_unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
-					 PCPU_MIN_UNIT_SIZE));
+	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
+				  PCPU_MIN_UNIT_SIZE));
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_unit_pages * num_possible_cpus() *
-			       sizeof(pcpu4k_pages[0]));
-	pcpu4k_pages = alloc_bootmem(pages_size);
+	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
+			       sizeof(pages[0]));
+	pages = alloc_bootmem(pages_size);
 
 	/* allocate pages */
 	j = 0;
 	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_unit_pages; i++) {
+		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
@@ -1304,25 +1481,24 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 					   "4k page for cpu%u\n", cpu);
 				goto enomem;
 			}
-			pcpu4k_pages[j++] = virt_to_page(ptr);
+			pages[j++] = virt_to_page(ptr);
 		}
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = num_possible_cpus() * pcpu4k_unit_pages << PAGE_SHIFT;
+	vm.size = num_possible_cpus() * unit_pages << PAGE_SHIFT;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
 	for_each_possible_cpu(cpu) {
 		unsigned long unit_addr = (unsigned long)vm.addr +
-			(cpu * pcpu4k_unit_pages << PAGE_SHIFT);
+			(cpu * unit_pages << PAGE_SHIFT);
 
-		for (i = 0; i < pcpu4k_unit_pages; i++)
+		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr,
-				       &pcpu4k_pages[cpu * pcpu4k_unit_pages],
-				       pcpu4k_unit_pages);
+		ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages],
+				       unit_pages);
 		if (ret < 0)
 			panic("failed to map percpu area, err=%zd\n", ret);
 
@@ -1340,19 +1516,18 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 	/* we're ready, commit */
 	pr_info("PERCPU: %d 4k pages per cpu, static data %zu bytes\n",
-		pcpu4k_unit_pages, static_size);
+		unit_pages, static_size);
 
-	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
-				     reserved_size, -1,
-				     pcpu4k_unit_pages << PAGE_SHIFT, vm.addr);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
+				     unit_pages << PAGE_SHIFT, vm.addr);
 	goto out_free_ar;
 
 enomem:
 	while (--j >= 0)
-		free_fn(page_address(pcpu4k_pages[j]), PAGE_SIZE);
+		free_fn(page_address(pages[j]), PAGE_SIZE);
 	ret = -ENOMEM;
 out_free_ar:
-	free_bootmem(__pa(pcpu4k_pages), pages_size);
+	free_bootmem(__pa(pages), pages_size);
 	return ret;
 }
 
@@ -1370,16 +1545,6 @@ static size_t pcpul_unit_size;
 static struct pcpul_ent *pcpul_map;
 static struct vm_struct pcpul_vm;
 
-static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
-{
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpul_size)
-		return NULL;
-
-	return virt_to_page(pcpul_map[cpu].ptr + off);
-}
-
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @static_size: the size of static percpu area in bytes
@@ -1475,9 +1640,8 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 	pr_info("PERCPU: Remapped at %p with large pages, static data "
 		"%zu bytes\n", pcpul_vm.addr, static_size);
 
-	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
-				     reserved_size, dyn_size, pcpul_unit_size,
-				     pcpul_vm.addr);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				     pcpul_unit_size, pcpul_vm.addr);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From 2f39e637ea240efb74cf807d31c93a71a0b89174 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: allow non-linear / sparse cpu -> unit mapping

Currently cpu and unit are always identity mapped.  To allow more
efficient large page support on NUMA and lazy allocation for possible
but offline cpus, cpu -> unit mapping needs to be non-linear and/or
sparse.  This can be easily implemented by adding a cpu -> unit
mapping array and using it whenever looking up the matching unit for a
cpu.

The only unusal conversion is in pcpu_chunk_addr_search().  The passed
in address is unit0 based and unit0 might not be in use so it needs to
be converted to address of an in-use unit.  This is easily done by
adding the unit offset for the current processor.

[ Impact: allows non-linear/sparse cpu -> unit mapping, no visible change yet ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c |   2 +-
 include/linux/percpu.h     |   3 +-
 mm/percpu.c                | 129 +++++++++++++++++++++++++++++++++------------
 3 files changed, 97 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index f2f22ee97a7a..6970333b48b8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1516,7 +1516,7 @@ void __init setup_per_cpu_areas(void)
 
 	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
 						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr);
+						PCPU_CHUNK_SIZE, vm.addr, NULL);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 63c8b7a23e66..1e0e8878dc2a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -57,6 +57,7 @@
 #endif
 
 extern void *pcpu_base_addr;
+extern const int *pcpu_unit_map;
 
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
@@ -66,7 +67,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size, size_t unit_size,
-				void *base_addr);
+				void *base_addr, const int *unit_map);
 
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 21756814d99f..2196fae24f00 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -8,12 +8,13 @@
  *
  * This is percpu allocator which can handle both static and dynamic
  * areas.  Percpu areas are allocated in chunks in vmalloc area.  Each
- * chunk is consisted of num_possible_cpus() units and the first chunk
- * is used for static percpu variables in the kernel image (special
- * boot time alloc/init handling necessary as these areas need to be
- * brought up before allocation services are running).  Unit grows as
- * necessary and all units grow or shrink in unison.  When a chunk is
- * filled up, another chunk is allocated.  ie. in vmalloc area
+ * chunk is consisted of boot-time determined number of units and the
+ * first chunk is used for static percpu variables in the kernel image
+ * (special boot time alloc/init handling necessary as these areas
+ * need to be brought up before allocation services are running).
+ * Unit grows as necessary and all units grow or shrink in unison.
+ * When a chunk is filled up, another chunk is allocated.  ie. in
+ * vmalloc area
  *
  *  c0                           c1                         c2
  *  -------------------          -------------------        ------------
@@ -22,11 +23,13 @@
  *
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
- * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers pcpu_unit_size apart.
+ * c1:u1, c1:u2 and c1:u3.  On UMA, units corresponds directly to
+ * cpus.  On NUMA, the mapping can be non-linear and even sparse.
+ * Percpu access can be done by configuring percpu base registers
+ * according to cpu to unit mapping and pcpu_unit_size.
  *
- * There are usually many small percpu allocations many of them as
- * small as 4 bytes.  The allocator organizes chunks into lists
+ * There are usually many small percpu allocations many of them being
+ * as small as 4 bytes.  The allocator organizes chunks into lists
  * according to free size and tries to allocate from the fullest one.
  * Each chunk keeps the maximum contiguous area size hint which is
  * guaranteed to be eqaul to or larger than the maximum contiguous
@@ -99,14 +102,22 @@ struct pcpu_chunk {
 
 static int pcpu_unit_pages __read_mostly;
 static int pcpu_unit_size __read_mostly;
+static int pcpu_nr_units __read_mostly;
 static int pcpu_chunk_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
 
+/* cpus with the lowest and highest unit numbers */
+static unsigned int pcpu_first_unit_cpu __read_mostly;
+static unsigned int pcpu_last_unit_cpu __read_mostly;
+
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
+/* cpu -> unit map */
+const int *pcpu_unit_map __read_mostly;
+
 /*
  * The first chunk which always exists.  Note that unlike other
  * chunks, this one can be allocated and mapped in several different
@@ -177,7 +188,7 @@ static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
 
 static int pcpu_page_idx(unsigned int cpu, int page_idx)
 {
-	return cpu * pcpu_unit_pages + page_idx;
+	return pcpu_unit_map[cpu] * pcpu_unit_pages + page_idx;
 }
 
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
@@ -321,6 +332,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 		return pcpu_first_chunk;
 	}
 
+	/*
+	 * The address is relative to unit0 which might be unused and
+	 * thus unmapped.  Offset the address to the unit space of the
+	 * current processor before looking it up in the vmalloc
+	 * space.  Note that any possible cpu id can be used here, so
+	 * there's no need to worry about preemption or cpu hotplug.
+	 */
+	addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size;
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
@@ -593,8 +612,7 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
 {
 	static struct page **pages;
 	static unsigned long *bitmap;
-	size_t pages_size = num_possible_cpus() * pcpu_unit_pages *
-			    sizeof(pages[0]);
+	size_t pages_size = pcpu_nr_units * pcpu_unit_pages * sizeof(pages[0]);
 	size_t bitmap_size = BITS_TO_LONGS(pcpu_unit_pages) *
 			     sizeof(unsigned long);
 
@@ -692,10 +710,9 @@ static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
 static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
 				 int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
-			   pcpu_chunk_addr(chunk, last, page_end));
+	flush_cache_vunmap(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -756,10 +773,9 @@ static void pcpu_unmap_pages(struct pcpu_chunk *chunk,
 static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
 				      int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
-			       pcpu_chunk_addr(chunk, last, page_end));
+	flush_tlb_kernel_range(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -835,11 +851,9 @@ err:
 static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
 				int page_start, int page_end)
 {
-	unsigned int last = num_possible_cpus() - 1;
-
-	/* flush at once, please read comments in pcpu_unmap() */
-	flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start),
-			 pcpu_chunk_addr(chunk, last, page_end));
+	flush_cache_vmap(
+		pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+		pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
 }
 
 /**
@@ -953,8 +967,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
 	bitmap_copy(chunk->populated, populated, pcpu_unit_pages);
 clear:
 	for_each_possible_cpu(cpu)
-		memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
-		       size);
+		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
 	return 0;
 
 err_unmap:
@@ -1088,6 +1101,7 @@ area_found:
 
 	mutex_unlock(&pcpu_alloc_mutex);
 
+	/* return address relative to unit0 */
 	return __addr_to_pcpu_ptr(chunk->vm->addr + off);
 
 fail_unlock:
@@ -1222,6 +1236,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
  * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
  * @base_addr: mapped address
+ * @unit_map: cpu -> unit map, NULL for sequential mapping
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
@@ -1260,16 +1275,17 @@ EXPORT_SYMBOL_GPL(free_percpu);
  */
 size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 				     ssize_t dyn_size, size_t unit_size,
-				     void *base_addr)
+				     void *base_addr, const int *unit_map)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
 	size_t size_sum = static_size + reserved_size +
 			  (dyn_size >= 0 ? dyn_size : 0);
 	struct pcpu_chunk *schunk, *dchunk = NULL;
+	unsigned int cpu, tcpu;
 	int i;
 
-	/* santiy checks */
+	/* sanity checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
 	BUG_ON(!static_size);
@@ -1278,9 +1294,52 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	BUG_ON(unit_size & ~PAGE_MASK);
 	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
 
+	/* determine number of units and verify and initialize pcpu_unit_map */
+	if (unit_map) {
+		int first_unit = INT_MAX, last_unit = INT_MIN;
+
+		for_each_possible_cpu(cpu) {
+			int unit = unit_map[cpu];
+
+			BUG_ON(unit < 0);
+			for_each_possible_cpu(tcpu) {
+				if (tcpu == cpu)
+					break;
+				/* the mapping should be one-to-one */
+				BUG_ON(unit_map[tcpu] == unit);
+			}
+
+			if (unit < first_unit) {
+				pcpu_first_unit_cpu = cpu;
+				first_unit = unit;
+			}
+			if (unit > last_unit) {
+				pcpu_last_unit_cpu = cpu;
+				last_unit = unit;
+			}
+		}
+		pcpu_nr_units = last_unit + 1;
+		pcpu_unit_map = unit_map;
+	} else {
+		int *identity_map;
+
+		/* #units == #cpus, identity mapped */
+		identity_map = alloc_bootmem(num_possible_cpus() *
+					     sizeof(identity_map[0]));
+
+		for_each_possible_cpu(cpu)
+			identity_map[cpu] = cpu;
+
+		pcpu_first_unit_cpu = 0;
+		pcpu_last_unit_cpu = pcpu_nr_units - 1;
+		pcpu_nr_units = num_possible_cpus();
+		pcpu_unit_map = identity_map;
+	}
+
+	/* determine basic parameters */
 	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
-	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
+	pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
 		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
@@ -1349,7 +1408,7 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
 	/* we're done */
-	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
+	pcpu_base_addr = schunk->vm->addr;
 	return pcpu_unit_size;
 }
 
@@ -1427,7 +1486,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 		size_sum >> PAGE_SHIFT, base, static_size);
 
 	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				      unit_size, base);
+				      unit_size, base, NULL);
 }
 
 /**
@@ -1519,7 +1578,7 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 		unit_pages, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
-				     unit_pages << PAGE_SHIFT, vm.addr);
+				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
 	goto out_free_ar;
 
 enomem:
@@ -1641,7 +1700,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 		"%zu bytes\n", pcpul_vm.addr, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     pcpul_unit_size, pcpul_vm.addr);
+				     pcpul_unit_size, pcpul_vm.addr, NULL);
 
 	/* sort pcpul_map array for pcpu_lpage_remapped() */
 	for (i = 0; i < num_possible_cpus() - 1; i++)
-- 
cgit v1.2.3


From a530b7958612bafe2027e21359083dba84f0b3b4 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 4 Jul 2009 08:11:00 +0900
Subject: percpu: teach large page allocator about NUMA

Large page first chunk allocator is primarily used for NUMA machines;
however, its NUMA handling is extremely simplistic.  Regardless of
their proximity, each cpu is put into separate large page just to
return most of the allocated space back wasting large amount of
vmalloc space and increasing cache footprint.

This patch teachs NUMA details to large page allocator.  Given
processor proximity information, pcpu_lpage_build_unit_map() will find
fitting cpu -> unit mapping in which cpus in LOCAL_DISTANCE share the
same large page and not too much virtual address space is wasted.

This greatly reduces the unit and thus chunk size and wastes much less
address space for the first chunk.  For example, on 4/4 NUMA machine,
the original code occupied 16MB of virtual space for the first chunk
while the new code only uses 4MB - one 2MB page for each node.

[ Impact: much better space efficiency on NUMA machines ]

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Beulich <JBeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Miller <davem@davemloft.net>
---
 arch/x86/kernel/setup_percpu.c |  72 +++++++--
 include/linux/percpu.h         |  24 ++-
 mm/percpu.c                    | 358 ++++++++++++++++++++++++++++++++---------
 3 files changed, 359 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4f2e0ac9130b..7501bb14bd51 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -149,36 +149,73 @@ static void __init pcpul_map(void *ptr, size_t size, void *addr)
 	set_pmd(pmd, pmd_v);
 }
 
+static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
+	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
+	size_t unit_map_size, unit_size;
+	int *unit_map;
+	int nr_units;
+	ssize_t ret;
+
+	/* on non-NUMA, embedding is better */
+	if (!chosen && !pcpu_need_numa())
+		return -EINVAL;
+
+	/* need PSE */
+	if (!cpu_has_pse) {
+		pr_warning("PERCPU: lpage allocator requires PSE\n");
+		return -EINVAL;
+	}
 
+	/* allocate and build unit_map */
+	unit_map_size = num_possible_cpus() * sizeof(int);
+	unit_map = alloc_bootmem_nopanic(unit_map_size);
+	if (!unit_map) {
+		pr_warning("PERCPU: failed to allocate unit_map\n");
+		return -ENOMEM;
+	}
+
+	ret = pcpu_lpage_build_unit_map(static_size,
+					PERCPU_FIRST_CHUNK_RESERVE,
+					&dyn_size, &unit_size, PMD_SIZE,
+					unit_map, pcpu_lpage_cpu_distance);
+	if (ret < 0) {
+		pr_warning("PERCPU: failed to build unit_map\n");
+		goto out_free;
+	}
+	nr_units = ret;
+
+	/* do the parameters look okay? */
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
-		size_t tot_size = num_possible_cpus() * PMD_SIZE;
-
-		/* on non-NUMA, embedding is better */
-		if (!pcpu_need_numa())
-			return -EINVAL;
+		size_t tot_size = nr_units * unit_size;
 
 		/* don't consume more than 20% of vmalloc area */
 		if (tot_size > vm_size / 5) {
 			pr_info("PERCPU: too large chunk size %zuMB for "
 				"large page remap\n", tot_size >> 20);
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out_free;
 		}
 	}
 
-	/* need PSE */
-	if (!cpu_has_pse) {
-		pr_warning("PERCPU: lpage allocator requires PSE\n");
-		return -EINVAL;
-	}
-
-	return pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
-				      PMD_SIZE,
-				      pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+	ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				     dyn_size, unit_size, PMD_SIZE,
+				     unit_map, nr_units,
+				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+out_free:
+	if (ret < 0)
+		free_bootmem(__pa(unit_map), unit_map_size);
+	return ret;
 }
 #else
 static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
@@ -299,7 +336,8 @@ void __init setup_per_cpu_areas(void)
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
-		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
+		per_cpu_offset(cpu) =
+			delta + pcpu_unit_map[cpu] * pcpu_unit_size;
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 1e0e8878dc2a..8ce91af4aa19 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -62,6 +62,7 @@ extern const int *pcpu_unit_map;
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
+typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(
@@ -80,18 +81,37 @@ extern ssize_t __init pcpu_4k_first_chunk(
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+extern int __init pcpu_lpage_build_unit_map(
+				size_t static_size, size_t reserved_size,
+				ssize_t *dyn_sizep, size_t *unit_sizep,
+				size_t lpage_size, int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
+
 extern ssize_t __init pcpu_lpage_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t lpage_size,
+				size_t dyn_size, size_t unit_size,
+				size_t lpage_size, const int *unit_map,
+				int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
 
 extern void *pcpu_lpage_remapped(void *kaddr);
 #else
+static inline int pcpu_lpage_build_unit_map(
+				size_t static_size, size_t reserved_size,
+				ssize_t *dyn_sizep, size_t *unit_sizep,
+				size_t lpage_size, int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	return -EINVAL;
+}
+
 static inline ssize_t __init pcpu_lpage_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t lpage_size,
+				size_t dyn_size, size_t unit_size,
+				size_t lpage_size, const int *unit_map,
+				int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn)
diff --git a/mm/percpu.c b/mm/percpu.c
index 2196fae24f00..b3d0bcff8c7c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -59,6 +59,7 @@
 #include <linux/bitmap.h>
 #include <linux/bootmem.h>
 #include <linux/list.h>
+#include <linux/log2.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
@@ -1594,75 +1595,259 @@ out_free_ar:
  * Large page remapping first chunk setup helper
  */
 #ifdef CONFIG_NEED_MULTIPLE_NODES
+
+/**
+ * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * @static_size: the size of static percpu area in bytes
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
+ * @unit_sizep: out parameter for unit size
+ * @unit_map: unit_map to be filled
+ * @cpu_distance_fn: callback to determine distance between cpus
+ *
+ * This function builds cpu -> unit map and determine other parameters
+ * considering needed percpu size, large page size and distances
+ * between CPUs in NUMA.
+ *
+ * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
+ * may share units in the same large page.  The returned configuration
+ * is guaranteed to have CPUs on different nodes on different large
+ * pages and >=75% usage of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
+ * returns the number of units to be allocated.  -errno on failure.
+ */
+int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size,
+				     ssize_t *dyn_sizep, size_t *unit_sizep,
+				     size_t lpage_size, int *unit_map,
+				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	static int group_map[NR_CPUS] __initdata;
+	static int group_cnt[NR_CPUS] __initdata;
+	int group_cnt_max = 0;
+	size_t size_sum, min_unit_size, alloc_size;
+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
+	int last_allocs;
+	unsigned int cpu, tcpu;
+	int group, unit;
+
+	/*
+	 * Determine min_unit_size, alloc_size and max_upa such that
+	 * alloc_size is multiple of lpage_size and is the smallest
+	 * which can accomodate 4k aligned segments which are equal to
+	 * or larger than min_unit_size.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+	alloc_size = roundup(min_unit_size, lpage_size);
+	upa = alloc_size / min_unit_size;
+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+		upa--;
+	max_upa = upa;
+
+	/* group cpus according to their proximity */
+	for_each_possible_cpu(cpu) {
+		group = 0;
+	next_group:
+		for_each_possible_cpu(tcpu) {
+			if (cpu == tcpu)
+				break;
+			if (group_map[tcpu] == group &&
+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+				group++;
+				goto next_group;
+			}
+		}
+		group_map[cpu] = group;
+		group_cnt[group]++;
+		group_cnt_max = max(group_cnt_max, group_cnt[group]);
+	}
+
+	/*
+	 * Expand unit size until address space usage goes over 75%
+	 * and then as much as possible without using more address
+	 * space.
+	 */
+	last_allocs = INT_MAX;
+	for (upa = max_upa; upa; upa--) {
+		int allocs = 0, wasted = 0;
+
+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+			continue;
+
+		for (group = 0; group_cnt[group]; group++) {
+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+			allocs += this_allocs;
+			wasted += this_allocs * upa - group_cnt[group];
+		}
+
+		/*
+		 * Don't accept if wastage is over 25%.  The
+		 * greater-than comparison ensures upa==1 always
+		 * passes the following check.
+		 */
+		if (wasted > num_possible_cpus() / 3)
+			continue;
+
+		/* and then don't consume more memory */
+		if (allocs > last_allocs)
+			break;
+		last_allocs = allocs;
+		best_upa = upa;
+	}
+	*unit_sizep = alloc_size / best_upa;
+
+	/* assign units to cpus accordingly */
+	unit = 0;
+	for (group = 0; group_cnt[group]; group++) {
+		for_each_possible_cpu(cpu)
+			if (group_map[cpu] == group)
+				unit_map[cpu] = unit++;
+		unit = roundup(unit, best_upa);
+	}
+
+	return unit;	/* unit contains aligned number of units */
+}
+
 struct pcpul_ent {
-	unsigned int	cpu;
 	void		*ptr;
+	void		*map_addr;
 };
 
 static size_t pcpul_size;
-static size_t pcpul_unit_size;
+static size_t pcpul_lpage_size;
+static int pcpul_nr_lpages;
 static struct pcpul_ent *pcpul_map;
-static struct vm_struct pcpul_vm;
+
+static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+				     unsigned int *cpup)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu)
+		if (unit_map[cpu] == unit) {
+			if (cpup)
+				*cpup = cpu;
+			return true;
+		}
+
+	return false;
+}
+
+static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
+					size_t reserved_size, size_t dyn_size,
+					size_t unit_size, size_t lpage_size,
+					const int *unit_map, int nr_units)
+{
+	int width = 1, v = nr_units;
+	char empty_str[] = "--------";
+	int upl, lpl;	/* units per lpage, lpage per line */
+	unsigned int cpu;
+	int lpage, unit;
+
+	while (v /= 10)
+		width++;
+	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+
+	upl = max_t(int, lpage_size / unit_size, 1);
+	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+
+	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
+	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+
+	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
+		if (!(unit % upl)) {
+			if (!(lpage++ % lpl)) {
+				printk("\n");
+				printk("%spcpu-lpage: ", lvl);
+			} else
+				printk("| ");
+		}
+		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			printk("%0*d ", width, cpu);
+		else
+			printk("%s ", empty_str);
+	}
+	printk("\n");
+}
 
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: free size for dynamic allocation in bytes
+ * @unit_size: unit size in bytes
  * @lpage_size: the size of a large page
+ * @unit_map: cpu -> unit mapping
+ * @nr_units: the number of units
  * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
  * @free_fn: function to free percpu memory, @size <= lpage_size
  * @map_fn: function to map percpu lpage, always called with lpage_size
  *
- * This allocator uses large page as unit.  A large page is allocated
- * for each cpu and each is remapped into vmalloc area using large
- * page mapping.  As large page can be quite large, only part of it is
- * used for the first chunk.  Unused part is returned to the bootmem
- * allocator.
- *
- * So, the large pages are mapped twice - once to the physical mapping
- * and to the vmalloc area for the first percpu chunk.  The double
- * mapping does add one more large TLB entry pressure but still is
- * much better than only using 4k mappings while still being NUMA
- * friendly.
+ * This allocator uses large page to build and map the first chunk.
+ * Unlike other helpers, the caller should always specify @dyn_size
+ * and @unit_size.  These parameters along with @unit_map and
+ * @nr_units can be determined using pcpu_lpage_build_unit_map().
+ * This two stage initialization is to allow arch code to evaluate the
+ * parameters before committing to it.
+ *
+ * Large pages are allocated as directed by @unit_map and other
+ * parameters and mapped to vmalloc space.  Unused holes are returned
+ * to the page allocator.  Note that these holes end up being actively
+ * mapped twice - once to the physical mapping and to the vmalloc area
+ * for the first percpu chunk.  Depending on architecture, this might
+ * cause problem when changing page attributes of the returned area.
+ * These double mapped areas can be detected using
+ * pcpu_lpage_remapped().
  *
  * RETURNS:
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
 ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size, size_t lpage_size,
+				      size_t dyn_size, size_t unit_size,
+				      size_t lpage_size, const int *unit_map,
+				      int nr_units,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
-	size_t size_sum;
+	static struct vm_struct vm;
+	size_t chunk_size = unit_size * nr_units;
 	size_t map_size;
 	unsigned int cpu;
-	int i, j;
 	ssize_t ret;
+	int i, j, unit;
 
-	/*
-	 * Currently supports only single page.  Supporting multiple
-	 * pages won't be too difficult if it ever becomes necessary.
-	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size,
+			     unit_size, lpage_size, unit_map, nr_units);
 
-	pcpul_unit_size = lpage_size;
-	pcpul_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	if (pcpul_size > pcpul_unit_size) {
-		pr_warning("PERCPU: static data is larger than large page, "
-			   "can't use large page\n");
-		return -EINVAL;
-	}
+	BUG_ON(chunk_size % lpage_size);
+
+	pcpul_size = static_size + reserved_size + dyn_size;
+	pcpul_lpage_size = lpage_size;
+	pcpul_nr_lpages = chunk_size / lpage_size;
 
 	/* allocate pointer array and alloc large pages */
-	map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0]));
+	map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
 	pcpul_map = alloc_bootmem(map_size);
 
-	for_each_possible_cpu(cpu) {
+	/* allocate all pages */
+	for (i = 0; i < pcpul_nr_lpages; i++) {
+		size_t offset = i * lpage_size;
+		int first_unit = offset / unit_size;
+		int last_unit = (offset + lpage_size - 1) / unit_size;
 		void *ptr;
 
+		/* find out which cpu is mapped to this unit */
+		for (unit = first_unit; unit <= last_unit; unit++)
+			if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+				goto found;
+		continue;
+	found:
 		ptr = alloc_fn(cpu, lpage_size);
 		if (!ptr) {
 			pr_warning("PERCPU: failed to allocate large page "
@@ -1670,53 +1855,79 @@ ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
 			goto enomem;
 		}
 
-		/*
-		 * Only use pcpul_size bytes and give back the rest.
-		 *
-		 * Ingo: The lpage_size up-rounding bootmem is needed
-		 * to make sure the partial lpage is still fully RAM -
-		 * it's not well-specified to have a incompatible area
-		 * (unmapped RAM, device memory, etc.) in that hole.
-		 */
-		free_fn(ptr + pcpul_size, lpage_size - pcpul_size);
-
-		pcpul_map[cpu].cpu = cpu;
-		pcpul_map[cpu].ptr = ptr;
+		pcpul_map[i].ptr = ptr;
+	}
 
-		memcpy(ptr, __per_cpu_load, static_size);
+	/* return unused holes */
+	for (unit = 0; unit < nr_units; unit++) {
+		size_t start = unit * unit_size;
+		size_t end = start + unit_size;
+		size_t off, next;
+
+		/* don't free used part of occupied unit */
+		if (pcpul_unit_to_cpu(unit, unit_map, NULL))
+			start += pcpul_size;
+
+		/* unit can span more than one page, punch the holes */
+		for (off = start; off < end; off = next) {
+			void *ptr = pcpul_map[off / lpage_size].ptr;
+			next = min(roundup(off + 1, lpage_size), end);
+			if (ptr)
+				free_fn(ptr + off % lpage_size, next - off);
+		}
 	}
 
-	/* allocate address and map */
-	pcpul_vm.flags = VM_ALLOC;
-	pcpul_vm.size = num_possible_cpus() * pcpul_unit_size;
-	vm_area_register_early(&pcpul_vm, pcpul_unit_size);
+	/* allocate address, map and copy */
+	vm.flags = VM_ALLOC;
+	vm.size = chunk_size;
+	vm_area_register_early(&vm, unit_size);
+
+	for (i = 0; i < pcpul_nr_lpages; i++) {
+		if (!pcpul_map[i].ptr)
+			continue;
+		pcpul_map[i].map_addr = vm.addr + i * lpage_size;
+		map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
+	}
 
 	for_each_possible_cpu(cpu)
-		map_fn(pcpul_map[cpu].ptr, pcpul_unit_size,
-		       pcpul_vm.addr + cpu * pcpul_unit_size);
+		memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load,
+		       static_size);
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Remapped at %p with large pages, static data "
-		"%zu bytes\n", pcpul_vm.addr, static_size);
+		"%zu bytes\n", vm.addr, static_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     pcpul_unit_size, pcpul_vm.addr, NULL);
-
-	/* sort pcpul_map array for pcpu_lpage_remapped() */
-	for (i = 0; i < num_possible_cpus() - 1; i++)
-		for (j = i + 1; j < num_possible_cpus(); j++)
-			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
-				struct pcpul_ent tmp = pcpul_map[i];
-				pcpul_map[i] = pcpul_map[j];
-				pcpul_map[j] = tmp;
-			}
+				     unit_size, vm.addr, unit_map);
+
+	/*
+	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
+	 * lpages are pushed to the end and trimmed.
+	 */
+	for (i = 0; i < pcpul_nr_lpages - 1; i++)
+		for (j = i + 1; j < pcpul_nr_lpages; j++) {
+			struct pcpul_ent tmp;
+
+			if (!pcpul_map[j].ptr)
+				continue;
+			if (pcpul_map[i].ptr &&
+			    pcpul_map[i].ptr < pcpul_map[j].ptr)
+				continue;
+
+			tmp = pcpul_map[i];
+			pcpul_map[i] = pcpul_map[j];
+			pcpul_map[j] = tmp;
+		}
+
+	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
+		pcpul_nr_lpages--;
 
 	return ret;
 
 enomem:
-	for_each_possible_cpu(cpu)
-		if (pcpul_map[cpu].ptr)
-			free_fn(pcpul_map[cpu].ptr, pcpul_size);
+	for (i = 0; i < pcpul_nr_lpages; i++)
+		if (pcpul_map[i].ptr)
+			free_fn(pcpul_map[i].ptr, lpage_size);
 	free_bootmem(__pa(pcpul_map), map_size);
 	return -ENOMEM;
 }
@@ -1739,10 +1950,10 @@ enomem:
  */
 void *pcpu_lpage_remapped(void *kaddr)
 {
-	unsigned long unit_mask = pcpul_unit_size - 1;
-	void *lpage_addr = (void *)((unsigned long)kaddr & ~unit_mask);
-	unsigned long offset = (unsigned long)kaddr & unit_mask;
-	int left = 0, right = num_possible_cpus() - 1;
+	unsigned long lpage_mask = pcpul_lpage_size - 1;
+	void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
+	unsigned long offset = (unsigned long)kaddr & lpage_mask;
+	int left = 0, right = pcpul_nr_lpages - 1;
 	int pos;
 
 	/* pcpul in use at all? */
@@ -1757,13 +1968,8 @@ void *pcpu_lpage_remapped(void *kaddr)
 			left = pos + 1;
 		else if (pcpul_map[pos].ptr > lpage_addr)
 			right = pos - 1;
-		else {
-			/* it shouldn't be in the area for the first chunk */
-			WARN_ON(offset < pcpul_size);
-
-			return pcpul_vm.addr +
-				pcpul_map[pos].cpu * pcpul_unit_size + offset;
-		}
+		else
+			return pcpul_map[pos].map_addr + offset;
 	}
 
 	return NULL;
-- 
cgit v1.2.3


From 96ccd4a43a4d80c80be636cd025a69959cf47424 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 5 Jul 2009 12:47:52 +0200
Subject: genirq: Remove obsolete defines and typedefs

The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
have been kept around for migration reasons. The last users are gone,
remove them.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/feature-removal-schedule.txt | 9 ---------
 include/linux/irq.h                        | 7 -------
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index f8cd450be9aa..2af78126b053 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -394,15 +394,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 
 -----------------------------
 
-What:	obsolete generic irq defines and typedefs
-When:	2.6.30
-Why:	The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
-	have been kept around for migration reasons. After more than two years
-	it's time to remove them finally
-Who:	Thomas Gleixner <tglx@linutronix.de>
-
----------------------------
-
 What:	fakephp and associated sysfs files in /sys/bus/pci/slots/
 When:	2011
 Why:	In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to
diff --git a/include/linux/irq.h b/include/linux/irq.h
index cb2e77a3f7f7..6956df9961ab 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -219,13 +219,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 
 extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
-/*
- * Migration helpers for obsolete names, they will go away:
- */
-#define hw_interrupt_type	irq_chip
-#define no_irq_type		no_irq_chip
-typedef struct irq_desc		irq_desc_t;
-
 /*
  * Pick up the arch-dependent methods:
  */
-- 
cgit v1.2.3


From 509dd025a4ac6f32921211557cfd434b81d8d7a7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 30 Jun 2009 16:07:26 -0300
Subject: V4L/DVB (12148): move V4L2_PIX_FMT_SGRBG8 to the proper place

Instead of defining a new pif format on an internal header, move it to
the V4L2 API header.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/gspca/stv06xx/stv06xx.h | 4 ----
 include/linux/videodev2.h                   | 2 ++
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/gspca/stv06xx/stv06xx.h b/drivers/media/video/gspca/stv06xx/stv06xx.h
index 9df7137fe67e..992ce530f138 100644
--- a/drivers/media/video/gspca/stv06xx/stv06xx.h
+++ b/drivers/media/video/gspca/stv06xx/stv06xx.h
@@ -36,10 +36,6 @@
 
 #define STV_ISOC_ENDPOINT_ADDR		0x81
 
-#ifndef V4L2_PIX_FMT_SGRBG8
-#define V4L2_PIX_FMT_SGRBG8 v4l2_fourcc('G', 'R', 'B', 'G')
-#endif
-
 #define STV_REG23 			0x0423
 
 /* Control registers of the STV0600 ASIC */
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 8a025d510904..95846d988011 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -318,6 +318,8 @@ struct v4l2_pix_format {
 /* see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
+#define V4L2_PIX_FMT_SGRBG8  v4l2_fourcc('G', 'R', 'B', 'G') /*  8  GRGR.. BGBG.. */
+
 /*
  * 10bit raw bayer, expanded to 16 bits
  * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
-- 
cgit v1.2.3


From 7dfba00d05f3c7db9510f3b54a472981cf1521af Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 29 Jun 2009 05:41:26 -0300
Subject: V4L/DVB (12135): Add a driver for mt9v011 sensor

Adds driver for mt9v011 based on its datasheet, available at:
	http://download.micron.com/pdf/datasheets/imaging/MT9V011.pdf

The driver was tested with a webcam that will be added on a next patch.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   8 +
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/mt9v011.c   | 355 ++++++++++++++++++++++++++++++++++++++++
 drivers/media/video/mt9v011.h   |  35 ++++
 include/media/v4l2-chip-ident.h |   3 +
 5 files changed, 402 insertions(+)
 create mode 100644 drivers/media/video/mt9v011.c
 create mode 100644 drivers/media/video/mt9v011.h

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 061e147f6f26..84b6fc15519d 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -312,6 +312,14 @@ config VIDEO_OV7670
 	  OV7670 VGA camera.  It currently only works with the M88ALP01
 	  controller.
 
+config VIDEO_MT9V011
+	tristate "Micron mt9v011 sensor support"
+	depends on I2C && VIDEO_V4L2
+	---help---
+	  This is a Video4Linux2 sensor-level driver for the Micron
+	  mt0v011 1.3 Mpixel camera.  It currently only works with the
+	  em28xx driver.
+
 config VIDEO_TCM825X
 	tristate "TCM825x camera sensor support"
 	depends on I2C && VIDEO_V4L2
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 7fb3add1b387..9f2e3214a482 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -69,6 +69,7 @@ obj-$(CONFIG_VIDEO_UPD64083) += upd64083.o
 obj-$(CONFIG_VIDEO_OV7670) 	+= ov7670.o
 obj-$(CONFIG_VIDEO_TCM825X) += tcm825x.o
 obj-$(CONFIG_VIDEO_TVEEPROM) += tveeprom.o
+obj-$(CONFIG_VIDEO_MT9V011) += mt9v011.o
 
 obj-$(CONFIG_SOC_CAMERA_MT9M001)	+= mt9m001.o
 obj-$(CONFIG_SOC_CAMERA_MT9M111)	+= mt9m111.o
diff --git a/drivers/media/video/mt9v011.c b/drivers/media/video/mt9v011.c
new file mode 100644
index 000000000000..4389197cedd7
--- /dev/null
+++ b/drivers/media/video/mt9v011.c
@@ -0,0 +1,355 @@
+/*
+ * mt9v011 -Micron 1/4-Inch VGA Digital Image Sensor
+ *
+ * Copyright (c) 2009 Mauro Carvalho Chehab (mchehab@redhat.com)
+ * This code is placed under the terms of the GNU General Public License v2
+ */
+
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+#include <linux/delay.h>
+#include <media/v4l2-device.h>
+#include "mt9v011.h"
+#include <media/v4l2-i2c-drv.h>
+#include <media/v4l2-chip-ident.h>
+
+MODULE_DESCRIPTION("Micron mt9v011 sensor driver");
+MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
+MODULE_LICENSE("GPL");
+
+
+static int debug;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0-2)");
+
+/* supported controls */
+static struct v4l2_queryctrl mt9v011_qctrl[] = {
+	{
+		.id = V4L2_CID_GAIN,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Gain",
+		.minimum = 0,
+		.maximum = (1 << 10) - 1,
+		.step = 1,
+		.default_value = 0x0020,
+		.flags = 0,
+	}, {
+		.id = V4L2_CID_RED_BALANCE,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Red Balance",
+		.minimum = -1 << 9,
+		.maximum = (1 << 9) - 1,
+		.step = 1,
+		.default_value = 0,
+		.flags = 0,
+	}, {
+		.id = V4L2_CID_BLUE_BALANCE,
+		.type = V4L2_CTRL_TYPE_INTEGER,
+		.name = "Blue Balance",
+		.minimum = -1 << 9,
+		.maximum = (1 << 9) - 1,
+		.step = 1,
+		.default_value = 0,
+		.flags = 0,
+	},
+};
+
+struct mt9v011 {
+	struct v4l2_subdev sd;
+
+	u16 global_gain, red_bal, blue_bal;
+};
+
+static inline struct mt9v011 *to_mt9v011(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct mt9v011, sd);
+}
+
+static int mt9v011_read(struct v4l2_subdev *sd, unsigned char addr)
+{
+	struct i2c_client *c = v4l2_get_subdevdata(sd);
+	__be16 buffer;
+	int rc, val;
+
+	if (1 != (rc = i2c_master_send(c, &addr, 1)))
+		v4l2_dbg(0, debug, sd,
+			 "i2c i/o error: rc == %d (should be 1)\n", rc);
+
+	msleep(10);
+
+	if (2 != (rc = i2c_master_recv(c, (char *)&buffer, 2)))
+		v4l2_dbg(0, debug, sd,
+			 "i2c i/o error: rc == %d (should be 1)\n", rc);
+
+	val = be16_to_cpu(buffer);
+
+	v4l2_dbg(2, debug, sd, "mt9v011: read 0x%02x = 0x%04x\n", addr, val);
+
+	return val;
+}
+
+static void mt9v011_write(struct v4l2_subdev *sd, unsigned char addr,
+				 u16 value)
+{
+	struct i2c_client *c = v4l2_get_subdevdata(sd);
+	unsigned char buffer[3];
+	int rc;
+
+	buffer[0] = addr;
+	buffer[1] = value >> 8;
+	buffer[2] = value & 0xff;
+
+	v4l2_dbg(2, debug, sd,
+		 "mt9v011: writing 0x%02x 0x%04x\n", buffer[0], value);
+	if (3 != (rc = i2c_master_send(c, buffer, 3)))
+		v4l2_dbg(0, debug, sd,
+			 "i2c i/o error: rc == %d (should be 3)\n", rc);
+}
+
+
+struct i2c_reg_value {
+	unsigned char reg;
+	u16           value;
+};
+
+/*
+ * Values used at the original driver
+ * Some values are marked as Reserved at the datasheet
+ */
+static const struct i2c_reg_value mt9v011_init_default[] = {
+					/* guessed meaning - as mt9m111 */
+		{ R0D_MT9V011_RESET, 0x0001 },
+		{ R0D_MT9V011_RESET, 0x0000 },
+		{ R01_MT9V011_ROWSTART, 0x0008 },
+		{ R02_MT9V011_COLSTART, 0x0014 },
+		{ R03_MT9V011_HEIGHT, 0x01e0 },
+		{ R04_MT9V011_WIDTH, 0x0280 },
+		{ R05_MT9V011_HBLANK, 0x0001 },
+		{ R05_MT9V011_HBLANK, 0x0001 },
+		{ R0A_MT9V011_CLK_SPEED, 0x0000 },
+		{ R05_MT9V011_HBLANK, 0x000a },
+		{ 0x30, 0x0005 },
+		{ 0x34, 0x0100 },
+		{ 0x3d, 0x068f },
+		{ 0x40, 0x01e0 },
+		{ 0x52, 0x0100 },
+		{ 0x58, 0x0038 },	/* Datasheet default 0x0078 */
+		{ 0x59, 0x0723 },	/* Datasheet default 0x0703 */
+		{ 0x62, 0x041a },	/* Datasheet default 0x0418 */
+		{ R09_MT9V011_SHUTTER_WIDTH, 0x0418 },
+		{ R20_MT9V011_READ_MODE, 0x1100 },
+};
+
+static void set_balance(struct v4l2_subdev *sd)
+{
+	struct mt9v011 *core = to_mt9v011(sd);
+	u16 green1_gain, green2_gain, blue_gain, red_gain;
+
+	green1_gain = core->global_gain;
+	green2_gain = core->global_gain;
+
+	blue_gain = core->global_gain +
+		    core->global_gain * core->blue_bal / (1 << 9);
+
+	red_gain = core->global_gain +
+		   core->global_gain * core->blue_bal / (1 << 9);
+
+	mt9v011_write(sd, R2B_MT9V011_GREEN_1_GAIN, green1_gain);
+	mt9v011_write(sd, R2E_MT9V011_GREEN_2_GAIN,  green1_gain);
+	mt9v011_write(sd, R2C_MT9V011_BLUE_GAIN, blue_gain);
+	mt9v011_write(sd, R2D_MT9V011_RED_GAIN, red_gain);
+}
+
+static int mt9v011_reset(struct v4l2_subdev *sd, u32 val)
+{
+	u16 version;
+	int i;
+
+	version = mt9v011_read(sd, R00_MT9V011_CHIP_VERSION);
+
+	if (version != MT9V011_VERSION) {
+		v4l2_info(sd, "*** unknown micron chip detected (0x%04x.\n",
+			  version);
+		return -EINVAL;
+
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mt9v011_init_default); i++)
+		mt9v011_write(sd, mt9v011_init_default[i].reg,
+			       mt9v011_init_default[i].value);
+
+	set_balance(sd);
+
+	return 0;
+};
+
+static int mt9v011_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct mt9v011 *core = to_mt9v011(sd);
+
+	v4l2_dbg(1, debug, sd, "g_ctrl called\n");
+
+	switch (ctrl->id) {
+	case V4L2_CID_GAIN:
+		ctrl->value = core->global_gain;
+		return 0;
+	case V4L2_CID_RED_BALANCE:
+		ctrl->value = core->red_bal;
+		return 0;
+	case V4L2_CID_BLUE_BALANCE:
+		ctrl->value = core->blue_bal;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int mt9v011_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct mt9v011 *core = to_mt9v011(sd);
+	u8 i, n;
+	n = ARRAY_SIZE(mt9v011_qctrl);
+
+	for (i = 0; i < n; i++) {
+		if (ctrl->id != mt9v011_qctrl[i].id)
+			continue;
+		if (ctrl->value < mt9v011_qctrl[i].minimum ||
+		    ctrl->value > mt9v011_qctrl[i].maximum)
+			return -ERANGE;
+		v4l2_dbg(1, debug, sd, "s_ctrl: id=%d, value=%d\n",
+					ctrl->id, ctrl->value);
+		break;
+	}
+
+	switch (ctrl->id) {
+	case V4L2_CID_GAIN:
+		core->global_gain = ctrl->value;
+		break;
+	case V4L2_CID_RED_BALANCE:
+		core->red_bal = ctrl->value;
+		break;
+	case V4L2_CID_BLUE_BALANCE:
+		core->blue_bal = ctrl->value;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set_balance(sd);
+
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int mt9v011_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	reg->val = mt9v011_read(sd, reg->reg & 0xff);
+	reg->size = 2;
+
+	return 0;
+}
+
+static int mt9v011_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	if (!v4l2_chip_match_i2c_client(client, &reg->match))
+		return -EINVAL;
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mt9v011_write(sd, reg->reg & 0xff, reg->val & 0xffff);
+
+	return 0;
+}
+#endif
+
+static int mt9v011_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *chip)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_MT9V011,
+					  MT9V011_VERSION);
+}
+
+static const struct v4l2_subdev_core_ops mt9v011_core_ops = {
+	.g_ctrl = mt9v011_g_ctrl,
+	.s_ctrl = mt9v011_s_ctrl,
+	.reset = mt9v011_reset,
+	.g_chip_ident = mt9v011_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register = mt9v011_g_register,
+	.s_register = mt9v011_s_register,
+#endif
+};
+
+static const struct v4l2_subdev_ops mt9v011_ops = {
+	.core = &mt9v011_core_ops,
+};
+
+
+/****************************************************************************
+			I2C Client & Driver
+ ****************************************************************************/
+
+static int mt9v011_probe(struct i2c_client *c,
+			 const struct i2c_device_id *id)
+{
+	struct mt9v011 *core;
+	struct v4l2_subdev *sd;
+
+	/* Check if the adapter supports the needed features */
+	if (!i2c_check_functionality(c->adapter,
+	     I2C_FUNC_SMBUS_READ_BYTE | I2C_FUNC_SMBUS_WRITE_BYTE_DATA))
+		return -EIO;
+
+	core = kzalloc(sizeof(struct mt9v011), GFP_KERNEL);
+	if (!core)
+		return -ENOMEM;
+
+	core->global_gain = 0x0024;
+
+	sd = &core->sd;
+	v4l2_i2c_subdev_init(sd, c, &mt9v011_ops);
+	v4l_info(c, "chip found @ 0x%02x (%s)\n",
+		 c->addr << 1, c->adapter->name);
+
+	return 0;
+}
+
+static int mt9v011_remove(struct i2c_client *c)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(c);
+
+	v4l2_dbg(1, debug, sd,
+		"mt9v011.c: removing mt9v011 adapter on address 0x%x\n",
+		c->addr << 1);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(to_mt9v011(sd));
+	return 0;
+}
+
+/* ----------------------------------------------------------------------- */
+
+static const struct i2c_device_id mt9v011_id[] = {
+	{ "mt9v011", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mt9v011_id);
+
+static struct v4l2_i2c_driver_data v4l2_i2c_data = {
+	.name = "mt9v011",
+	.probe = mt9v011_probe,
+	.remove = mt9v011_remove,
+	.id_table = mt9v011_id,
+};
diff --git a/drivers/media/video/mt9v011.h b/drivers/media/video/mt9v011.h
new file mode 100644
index 000000000000..9e443ee30558
--- /dev/null
+++ b/drivers/media/video/mt9v011.h
@@ -0,0 +1,35 @@
+/*
+ * mt9v011 -Micron 1/4-Inch VGA Digital Image Sensor
+ *
+ * Copyright (c) 2009 Mauro Carvalho Chehab (mchehab@redhat.com)
+ * This code is placed under the terms of the GNU General Public License v2
+ */
+
+#ifndef MT9V011_H_
+#define MT9V011_H_
+
+#define R00_MT9V011_CHIP_VERSION	0x00
+#define R01_MT9V011_ROWSTART		0x01
+#define R02_MT9V011_COLSTART		0x02
+#define R03_MT9V011_HEIGHT		0x03
+#define R04_MT9V011_WIDTH		0x04
+#define R05_MT9V011_HBLANK		0x05
+#define R06_MT9V011_VBLANK		0x06
+#define R07_MT9V011_OUT_CTRL		0x07
+#define R09_MT9V011_SHUTTER_WIDTH	0x09
+#define R0A_MT9V011_CLK_SPEED		0x0a
+#define R0B_MT9V011_RESTART		0x0b
+#define R0C_MT9V011_SHUTTER_DELAY	0x0c
+#define R0D_MT9V011_RESET		0x0d
+#define R1E_MT9V011_DIGITAL_ZOOM	0x1e
+#define R20_MT9V011_READ_MODE		0x20
+#define R2B_MT9V011_GREEN_1_GAIN	0x2b
+#define R2C_MT9V011_BLUE_GAIN		0x2c
+#define R2D_MT9V011_RED_GAIN		0x2d
+#define R2E_MT9V011_GREEN_2_GAIN	0x2e
+#define R35_MT9V011_GLOBAL_GAIN		0x35
+#define RF1_MT9V011_CHIP_ENABLE		0xf1
+
+#define MT9V011_VERSION			0x8243
+
+#endif
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 4d7e2272c42f..11a4a2d3e364 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -155,6 +155,9 @@ enum {
 	/* module cafe_ccic, just ident 8801 */
 	V4L2_IDENT_CAFE = 8801,
 
+	/* module mt9v011, just ident 8243 */
+	V4L2_IDENT_MT9V011 = 8243,
+
 	/* module tw9910: just ident 9910 */
 	V4L2_IDENT_TW9910 = 9910,
 
-- 
cgit v1.2.3


From 0e8635a8e1f2d4a9e1bfc6c3b21419a5921e674f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 20 Jun 2009 00:53:25 +0000
Subject: net: remove NET_RX_BAD and NET_RX_CN* defines

almost no users in the tree; and the few that use them treat them
like NET_RX_DROP.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/rionet.c            |  5 -----
 drivers/net/wan/farsync.c       | 19 -------------------
 drivers/staging/otus/wrap_pkt.c |  3 ---
 include/linux/netdevice.h       |  4 ----
 net/decnet/dn_route.c           |  2 +-
 net/lapb/lapb_iface.c           |  2 +-
 6 files changed, 2 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 8702e7acdee6..74cdb6f8f84f 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -114,11 +114,6 @@ static int rionet_rx_clean(struct net_device *ndev)
 
 		if (error == NET_RX_DROP) {
 			ndev->stats.rx_dropped++;
-		} else if (error == NET_RX_BAD) {
-			if (netif_msg_rx_err(rnet))
-				printk(KERN_WARNING "%s: bad rx packet\n",
-				       DRV_NAME);
-			ndev->stats.rx_errors++;
 		} else {
 			ndev->stats.rx_packets++;
 			ndev->stats.rx_bytes += RIO_MAX_MSG_SIZE;
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 25c9ef6a1815..90c0a317d9d3 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -792,25 +792,6 @@ fst_process_rx_status(int rx_status, char *name)
 			 */
 			break;
 		}
-
-	case NET_RX_CN_LOW:
-		{
-			dbg(DBG_ASS, "%s: Receive Low Congestion\n", name);
-			break;
-		}
-
-	case NET_RX_CN_MOD:
-		{
-			dbg(DBG_ASS, "%s: Receive Moderate Congestion\n", name);
-			break;
-		}
-
-	case NET_RX_CN_HIGH:
-		{
-			dbg(DBG_ASS, "%s: Receive High Congestion\n", name);
-			break;
-		}
-
 	case NET_RX_DROP:
 		{
 			dbg(DBG_ASS, "%s: Received packet dropped\n", name);
diff --git a/drivers/staging/otus/wrap_pkt.c b/drivers/staging/otus/wrap_pkt.c
index 5db0004c8739..89a6b92f5972 100644
--- a/drivers/staging/otus/wrap_pkt.c
+++ b/drivers/staging/otus/wrap_pkt.c
@@ -156,10 +156,7 @@ void zfLnxRecvEth(zdev_t* dev, zbuf_t* buf, u16_t port)
     switch(netif_rx(buf))
 #endif
     {
-    case NET_RX_BAD:
     case NET_RX_DROP:
-    case NET_RX_CN_MOD:
-    case NET_RX_CN_HIGH:
         break;
     default:
             macp->drv_stats.net_stats.rx_packets++;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d4a4d9867794..9f25ab2899de 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -72,10 +72,6 @@ struct wireless_dev;
 /* Backlog congestion levels */
 #define NET_RX_SUCCESS		0   /* keep 'em coming, baby */
 #define NET_RX_DROP		1  /* packet dropped */
-#define NET_RX_CN_LOW		2   /* storm alert, just in case */
-#define NET_RX_CN_MOD		3   /* Storm on its way! */
-#define NET_RX_CN_HIGH		4   /* The storm is here */
-#define NET_RX_BAD		5  /* packet dropped due to kernel error */
 
 /* NET_XMIT_CN is special. It does not guarantee that this packet is lost. It
  * indicates that the device will soon be dropping packets, or already drops
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 1d6ca8a98dc6..9383d3e5a1ab 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -774,7 +774,7 @@ static int dn_rt_bug(struct sk_buff *skb)
 
 	kfree_skb(skb);
 
-	return NET_RX_BAD;
+	return NET_RX_DROP;
 }
 
 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index 2ba1bc4f3c3a..bda96d18fd98 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -407,7 +407,7 @@ int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb)
 		return lapb->callbacks.data_indication(lapb->dev, skb);
 
 	kfree_skb(skb);
-	return NET_RX_CN_HIGH; /* For now; must be != NET_RX_DROP */
+	return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */
 }
 
 int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb)
-- 
cgit v1.2.3


From af794c74240d8d788058bdfee339512e7ac029b2 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Thu, 25 Jun 2009 04:42:19 +0000
Subject: cleanup: remove unused member in scm_cookie.

This patch removes an unused member (seq) scm_cookie; besides initialized
to 0 in the header file, it is not used.

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/scm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/net/scm.h b/include/net/scm.h
index f45bb6eca7d4..cf48c800e926 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -26,7 +26,6 @@ struct scm_cookie
 #ifdef CONFIG_SECURITY_NETWORK
 	u32			secid;		/* Passed security ID 	*/
 #endif
-	unsigned long		seq;		/* Connection seqno	*/
 };
 
 extern void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm);
@@ -59,7 +58,6 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
 	scm->creds.gid = current_gid();
 	scm->creds.pid = task_tgid_vnr(p);
 	scm->fp = NULL;
-	scm->seq = 0;
 	unix_get_peersec_dgram(sock, scm);
 	if (msg->msg_controllen <= 0)
 		return 0;
-- 
cgit v1.2.3


From 6650613d3387dcc30685e2781818ea7d0f840027 Mon Sep 17 00:00:00 2001
From: "oscar.medina@motorola.com" <oscar.medina@motorola.com>
Date: Tue, 30 Jun 2009 03:25:39 +0000
Subject: tipc: Add socket options to get number of queued messages

This patch allows a TIPC application to determine the number of messages
currently waiting in a socket's receive queue (TIPC_SOCK_RECVQ_DEPTH) or
in all TIPC socket receive queues (TIPC_NODE_RECVQ_DEPTH).

Signed-off-by: Oscar Medina <oscar.medina@motorola.com>
Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h | 2 ++
 net/tipc/socket.c    | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index bea469455a0c..3d92396639de 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -209,5 +209,7 @@ struct sockaddr_tipc {
 #define TIPC_SRC_DROPPABLE	128	/* Default: 0 (resend congested msg) */
 #define TIPC_DEST_DROPPABLE	129	/* Default: based on socket type */
 #define TIPC_CONN_TIMEOUT	130	/* Default: 8000 (ms)  */
+#define TIPC_NODE_RECVQ_DEPTH	131	/* Default: none (read only) */
+#define TIPC_SOCK_RECVQ_DEPTH	132	/* Default: none (read only) */
 
 #endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 1848693ebb82..e8254e809b79 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1748,6 +1748,12 @@ static int getsockopt(struct socket *sock,
 		value = jiffies_to_msecs(sk->sk_rcvtimeo);
 		/* no need to set "res", since already 0 at this point */
 		break;
+	 case TIPC_NODE_RECVQ_DEPTH:
+		value = (u32)atomic_read(&tipc_queue_size);
+		break;
+	 case TIPC_SOCK_RECVQ_DEPTH:
+		value = skb_queue_len(&sk->sk_receive_queue);
+		break;
 	default:
 		res = -EINVAL;
 	}
-- 
cgit v1.2.3


From e04af024b2e74249990587e76ec98220028c01c3 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Fri, 3 Jul 2009 20:11:58 +0000
Subject: net, netns_xt: shrink netns_xt members

In case if kernel was compiled without ebtables support
there is no need to keep ebt_table pointers in netns_xt
structure.

Make it config dependent.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/x_tables.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 9554a644a8f8..591db7d657a3 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -8,8 +8,11 @@ struct ebt_table;
 
 struct netns_xt {
 	struct list_head tables[NFPROTO_NUMPROTO];
+#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
+    defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
 	struct ebt_table *broute_table;
 	struct ebt_table *frame_filter;
 	struct ebt_table *frame_nat;
+#endif
 };
 #endif
-- 
cgit v1.2.3


From a65e7bfcd74e4c0939f235d2bf9f48ddb3a57991 Mon Sep 17 00:00:00 2001
From: Hui Zhu <teawater@gmail.com>
Date: Sun, 5 Jul 2009 12:08:15 -0700
Subject: elf: fix multithreaded program core dumping on arm

Fix the multithread program core thread message error.

This issue affects arches with neither has CORE_DUMP_USE_REGSET nor
ELF_CORE_COPY_TASK_REGS, ARM is one of them.

The thread message of core file is generated in elf_dump_thread_status.
The register values is set by elf_core_copy_task_regs in this function.

If an arch doesn't define ELF_CORE_COPY_TASK_REGS,
elf_core_copy_task_regs() will do nothing.  Then the core file will not
have the register message of thread.

So add elf_core_copy_regs to set regiser values if ELF_CORE_COPY_TASK_REGS
doesn't define.

The following is how to reproduce this issue:

cat 1.c
#include <stdio.h>
#include <pthread.h>
#include <assert.h>

void td1(void * i)
{
       while (1)
       {
               printf ("1\n");
               sleep (1);
       }

       return;
}

void td2(void * i)
{
       while (1)
       {
               printf ("2\n");
               sleep (1);
       }

       return;
}

int
main(int argc,char *argv[],char *envp[])
{
       pthread_t       t1,t2;

       pthread_create(&t1, NULL, (void*)td1, NULL);
       pthread_create(&t2, NULL, (void*)td2, NULL);

       sleep (10);

       assert(0);

       return (0);
}
arm-xxx-gcc -g -lpthread 1.c -o 1
copy 1.c and 1 to a arm board.
Goto this board.
ulimit -c 1800000
./1
# ./1
1
2
1
...
...
1
1: 1.c:37: main: Assertion `0' failed.
Aborted (core dumped)
Then you can get a core file.
gdb 1 core.xxx
Without the patch:
(gdb) info threads
 3 process 909  0x00000000 in ?? ()
 2 process 908  0x00000000 in ?? ()
* 1 process 907  0x4a6e2238 in raise () from /lib/libc.so.6
You can found that the pc of 909 and 908 is 0x00000000.
With the patch:
(gdb) info threads
 3 process 885  0x4a749974 in nanosleep () from /lib/libc.so.6
 2 process 884  0x4a749974 in nanosleep () from /lib/libc.so.6
* 1 process 883  0x4a6e2238 in raise () from /lib/libc.so.6
The pc of 885 and 884 is right.

Signed-off-by: Hui Zhu <teawater@gmail.com>
Cc: Amerigo Wang <xiyou.wangcong@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: David Howells <dhowells@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/elfcore.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 7605c5e9589f..03ec16779802 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -125,6 +125,8 @@ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t*
 #ifdef ELF_CORE_COPY_TASK_REGS
 	
 	return ELF_CORE_COPY_TASK_REGS(t, elfregs);
+#else
+	elf_core_copy_regs(elfregs, task_pt_regs(t));
 #endif
 	return 0;
 }
-- 
cgit v1.2.3


From 82e3310ace59794ecf0f531eca94647b2863dfda Mon Sep 17 00:00:00 2001
From: Tobias Doerffel <tobias.doerffel@gmail.com>
Date: Sun, 5 Jul 2009 12:08:23 -0700
Subject: linux/sysrq.h needs linux/errno.h

In include/linux/sysrq.h the constant EINVAL is being used but is undefined
if include/linux/errno.h is not included before.

Fix this by adding #include <linux/errno.h> at the beginning.

Signed-off-by: Tobias Doerffel <tobias.doerffel@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysrq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 98a1d8cfb73d..99adcdc0d3ca 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -14,6 +14,8 @@
 #ifndef _LINUX_SYSRQ_H
 #define _LINUX_SYSRQ_H
 
+#include <linux/errno.h>
+
 struct pt_regs;
 struct tty_struct;
 
-- 
cgit v1.2.3


From f2ac72e8268d9559c3114d5a22679f91f80a2238 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 7 Jul 2009 12:30:33 +0800
Subject: crypto: api - Add new template create function

This patch introduces the template->create function intended
to replace the existing alloc function.  The intention is for
create to handle the registration directly, whereas currently
the caller of alloc has to handle the registration.

This allows type-specific code to be run prior to registration.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algboss.c        | 5 +++++
 include/crypto/algapi.h | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/crypto/algboss.c b/crypto/algboss.c
index 9908dd830c26..412241ce4cfa 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -68,6 +68,11 @@ static int cryptomgr_probe(void *data)
 		goto err;
 
 	do {
+		if (tmpl->create) {
+			err = tmpl->create(tmpl, param->tb);
+			continue;
+		}
+
 		inst = tmpl->alloc(param->tb);
 		if (IS_ERR(inst))
 			err = PTR_ERR(inst);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 010545436efa..ce010a346420 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -52,6 +52,7 @@ struct crypto_template {
 
 	struct crypto_instance *(*alloc)(struct rtattr **tb);
 	void (*free)(struct crypto_instance *inst);
+	int (*create)(struct crypto_template *tmpl, struct rtattr **tb);
 
 	char name[CRYPTO_MAX_ALG_NAME];
 };
-- 
cgit v1.2.3


From 7afdbf23c3acdec3eaf1b94f87132fff3d81ce73 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 7 Jul 2009 10:23:43 +0200
Subject: signals: declare sys_rt_tgsigqueueinfo in syscalls.h

sys_rt_tgsigqueueinfo needs to be declared in linux/syscalls.h so that
architectures defining the system call table in C can reference it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
LKML-Reference: <200907071023.44008.arnd@arndb.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/syscalls.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fa4242cdade8..80de7003d8c2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -321,6 +321,8 @@ asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
 				siginfo_t __user *uinfo,
 				const struct timespec __user *uts,
 				size_t sigsetsize);
+asmlinkage long sys_rt_tgsigqueueinfo(pid_t tgid, pid_t  pid, int sig,
+		siginfo_t __user *uinfo);
 asmlinkage long sys_kill(int pid, int sig);
 asmlinkage long sys_tgkill(int tgid, int pid, int sig);
 asmlinkage long sys_tkill(int pid, int sig);
-- 
cgit v1.2.3


From 70ec7bb91ad0d6cce84c8e17f8cbb608dda7b18c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 7 Jul 2009 14:07:37 +0800
Subject: crypto: api - Add crypto_alloc_instance2

This patch adds a new argument to crypto_alloc_instance which
sets aside some space before the instance for use by algorithms
such as shash that place type-specific data before crypto_alg.

For compatibility the function has been renamed so that existing
users aren't affected.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 37 +++++++++++++++++++++++++++++++------
 include/crypto/algapi.h |  2 ++
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 56c62e2858d5..429f1a003b06 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -627,17 +627,20 @@ int crypto_attr_u32(struct rtattr *rta, u32 *num)
 }
 EXPORT_SYMBOL_GPL(crypto_attr_u32);
 
-struct crypto_instance *crypto_alloc_instance(const char *name,
-					      struct crypto_alg *alg)
+void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
+			     unsigned int head)
 {
 	struct crypto_instance *inst;
-	struct crypto_spawn *spawn;
+	char *p;
 	int err;
 
-	inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL);
-	if (!inst)
+	p = kzalloc(head + sizeof(*inst) + sizeof(struct crypto_spawn),
+		    GFP_KERNEL);
+	if (!p)
 		return ERR_PTR(-ENOMEM);
 
+	inst = (void *)(p + head);
+
 	err = -ENAMETOOLONG;
 	if (snprintf(inst->alg.cra_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", name,
 		     alg->cra_name) >= CRYPTO_MAX_ALG_NAME)
@@ -647,6 +650,25 @@ struct crypto_instance *crypto_alloc_instance(const char *name,
 		     name, alg->cra_driver_name) >= CRYPTO_MAX_ALG_NAME)
 		goto err_free_inst;
 
+	return p;
+
+err_free_inst:
+	kfree(p);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_instance2);
+
+struct crypto_instance *crypto_alloc_instance(const char *name,
+					      struct crypto_alg *alg)
+{
+	struct crypto_instance *inst;
+	struct crypto_spawn *spawn;
+	int err;
+
+	inst = crypto_alloc_instance2(name, alg, 0);
+	if (IS_ERR(inst))
+		goto out;
+
 	spawn = crypto_instance_ctx(inst);
 	err = crypto_init_spawn(spawn, alg, inst,
 				CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_ASYNC);
@@ -658,7 +680,10 @@ struct crypto_instance *crypto_alloc_instance(const char *name,
 
 err_free_inst:
 	kfree(inst);
-	return ERR_PTR(err);
+	inst = ERR_PTR(err);
+
+out:
+	return inst;
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_instance);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index ce010a346420..99bb29723130 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -132,6 +132,8 @@ int crypto_check_attr_type(struct rtattr **tb, u32 type);
 const char *crypto_attr_alg_name(struct rtattr *rta);
 struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 int crypto_attr_u32(struct rtattr *rta, u32 *num);
+void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
+			     unsigned int head);
 struct crypto_instance *crypto_alloc_instance(const char *name,
 					      struct crypto_alg *alg);
 
-- 
cgit v1.2.3


From 7a6d3c8b3049d07123628f2bf57127bba2cc878f Mon Sep 17 00:00:00 2001
From: Csaba Henk <csaba@gluster.com>
Date: Wed, 1 Jul 2009 17:28:41 -0700
Subject: fuse: make the number of max background requests and congestion
 threshold tunable

The practical values for these limits depend on the design of the
filesystem server so let userspace set them at initialization time.

Signed-off-by: Csaba Henk <csaba@gluster.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/dev.c        | 10 +++++-----
 fs/fuse/fuse_i.h     | 12 ++++++------
 fs/fuse/inode.c      | 14 ++++++++++++++
 include/linux/fuse.h |  9 +++++++--
 4 files changed, 32 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f58ecbc416c8..b152761c1bf6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -250,7 +250,7 @@ static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
 
 static void flush_bg_queue(struct fuse_conn *fc)
 {
-	while (fc->active_background < FUSE_MAX_BACKGROUND &&
+	while (fc->active_background < fc->max_background &&
 	       !list_empty(&fc->bg_queue)) {
 		struct fuse_req *req;
 
@@ -280,11 +280,11 @@ __releases(&fc->lock)
 	list_del(&req->intr_entry);
 	req->state = FUSE_REQ_FINISHED;
 	if (req->background) {
-		if (fc->num_background == FUSE_MAX_BACKGROUND) {
+		if (fc->num_background == fc->max_background) {
 			fc->blocked = 0;
 			wake_up_all(&fc->blocked_waitq);
 		}
-		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+		if (fc->num_background == fc->congestion_threshold &&
 		    fc->connected && fc->bdi_initialized) {
 			clear_bdi_congested(&fc->bdi, READ);
 			clear_bdi_congested(&fc->bdi, WRITE);
@@ -410,9 +410,9 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 {
 	req->background = 1;
 	fc->num_background++;
-	if (fc->num_background == FUSE_MAX_BACKGROUND)
+	if (fc->num_background == fc->max_background)
 		fc->blocked = 1;
-	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+	if (fc->num_background == fc->congestion_threshold &&
 	    fc->bdi_initialized) {
 		set_bdi_congested(&fc->bdi, READ);
 		set_bdi_congested(&fc->bdi, WRITE);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 52b641fc0faf..6bcfab04396f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -25,12 +25,6 @@
 /** Max number of pages that can be used in a single read request */
 #define FUSE_MAX_PAGES_PER_REQ 32
 
-/** Maximum number of outstanding background requests */
-#define FUSE_MAX_BACKGROUND 12
-
-/** Congestion starts at 75% of maximum */
-#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
-
 /** Bias for fi->writectr, meaning new writepages must not be sent */
 #define FUSE_NOWRITE INT_MIN
 
@@ -349,6 +343,12 @@ struct fuse_conn {
 	/** rbtree of fuse_files waiting for poll events indexed by ph */
 	struct rb_root polled_files;
 
+	/** Maximum number of outstanding background requests */
+	unsigned max_background;
+
+	/** Number of background requests at which congestion starts */
+	unsigned congestion_threshold;
+
 	/** Number of requests currently in the background */
 	unsigned num_background;
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..9aa6f46d0c32 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -32,6 +32,12 @@ DEFINE_MUTEX(fuse_mutex);
 
 #define FUSE_DEFAULT_BLKSIZE 512
 
+/** Maximum number of outstanding background requests */
+#define FUSE_DEFAULT_MAX_BACKGROUND 12
+
+/** Congestion starts at 75% of maximum */
+#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
+
 struct fuse_mount_data {
 	int fd;
 	unsigned rootmode;
@@ -517,6 +523,8 @@ void fuse_conn_init(struct fuse_conn *fc)
 	INIT_LIST_HEAD(&fc->bg_queue);
 	INIT_LIST_HEAD(&fc->entry);
 	atomic_set(&fc->num_waiting, 0);
+	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
+	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
 	fc->khctr = 0;
 	fc->polled_files = RB_ROOT;
 	fc->reqctr = 0;
@@ -736,6 +744,12 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
 	else {
 		unsigned long ra_pages;
 
+		if (arg->minor >= 13) {
+			if (arg->max_background)
+				fc->max_background = arg->max_background;
+			if (arg->congestion_threshold)
+				fc->congestion_threshold = arg->congestion_threshold;
+		}
 		if (arg->minor >= 6) {
 			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
 			if (arg->flags & FUSE_ASYNC_READ)
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index cf593bf9fd32..b3700f0ac268 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -30,6 +30,10 @@
  *  - add umask flag to input argument of open, mknod and mkdir
  *  - add notification messages for invalidation of inodes and
  *    directory entries
+ *
+ * 7.13
+ *  - make max number of background requests and congestion threshold
+ *    tunables
  */
 
 #ifndef _LINUX_FUSE_H
@@ -41,7 +45,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 12
+#define FUSE_KERNEL_MINOR_VERSION 13
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -427,7 +431,8 @@ struct fuse_init_out {
 	__u32	minor;
 	__u32	max_readahead;
 	__u32	flags;
-	__u32	unused;
+	__u16   max_background;
+	__u16   congestion_threshold;
 	__u32	max_write;
 };
 
-- 
cgit v1.2.3


From 77ae365eca895061c8bf2b2e3ae1d9ea62869739 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 27 Mar 2009 11:00:29 -0400
Subject: ring-buffer: make lockless

This patch converts the ring buffers into a completely lockless
buffer recording system. The read side still takes locks since
we still serialize readers. But the writers are the ones that
must be lockless (those can happen in NMIs).

The main change is to the "head_page" pointer. We write to the
tail, and read from the head. The "head_page" pointer in the cpu
buffer is now just a reference to where to look. The real head
page is now kept in the head_page->list->prev->next pointer.
That is, in the list head of the previous page we set flags.

The list pages are allocated to be aligned such that the lowest
significant bits are always zero pointing to the list. This gives
us play to put in flags to their pointers.

bit 0: set when the page is a head page
bit 1: set when the writer is moving the page (for overwrite mode)

cmpxchg is used to update the pointer.

When the writer wraps the buffer and the tail meets the head,
in overwrite mode, the writer must move the head page forward.
It first uses cmpxchg to change the pointer flag from 1 to 2.
Once this is done, the reader on another CPU will not take the
page from the buffer.

The writers need to protect against interrupts (we don't bother with
disabling interrupts because NMIs are allowed to write too).

After the writer sets the pointer flag to 2, it takes care to
manage interrupts coming in. This is discribed in detail within the
comments of the code.

 Changes in version 2:
  - Let reader reset entries value of header page.
  - Fix tail page passing commit page on reader page test.
  - Always increment entries and write counter in rb_tail_page_update
  - Add safety check in rb_set_commit_to_write to break out of infinite loop
  - add mask in rb_is_reader_page

[ Impact: lock free writing to the ring buffer ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
---
 include/linux/ring_buffer.h |   1 -
 kernel/trace/ring_buffer.c  | 886 ++++++++++++++++++++++++++++++++++++--------
 kernel/trace/trace.c        |   3 -
 3 files changed, 738 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 29f8599e6bea..7fca71693ae7 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -170,7 +170,6 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
-unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 7c0168ad6d51..e648ba4f70e0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -322,6 +322,14 @@ struct buffer_data_page {
 	unsigned char	 data[];	/* data of buffer page */
 };
 
+/*
+ * Note, the buffer_page list must be first. The buffer pages
+ * are allocated in cache lines, which means that each buffer
+ * page will be at the beginning of a cache line, and thus
+ * the least significant bits will be zero. We use this to
+ * add flags in the list struct pointers, to make the ring buffer
+ * lockless.
+ */
 struct buffer_page {
 	struct list_head list;		/* list of buffer pages */
 	local_t		 write;		/* index for next write */
@@ -330,6 +338,21 @@ struct buffer_page {
 	struct buffer_data_page *page;	/* Actual data page */
 };
 
+/*
+ * The buffer page counters, write and entries, must be reset
+ * atomically when crossing page boundaries. To synchronize this
+ * update, two counters are inserted into the number. One is
+ * the actual counter for the write position or count on the page.
+ *
+ * The other is a counter of updaters. Before an update happens
+ * the update partition of the counter is incremented. This will
+ * allow the updater to update the counter atomically.
+ *
+ * The counter is 20 bits, and the state data is 12.
+ */
+#define RB_WRITE_MASK		0xfffff
+#define RB_WRITE_INTCNT		(1 << 20)
+
 static void rb_init_page(struct buffer_data_page *bpage)
 {
 	local_set(&bpage->commit, 0);
@@ -403,7 +426,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
 struct ring_buffer_per_cpu {
 	int				cpu;
 	struct ring_buffer		*buffer;
-	spinlock_t			reader_lock; /* serialize readers */
+	spinlock_t			reader_lock;	/* serialize readers */
 	raw_spinlock_t			lock;
 	struct lock_class_key		lock_key;
 	struct list_head		*pages;
@@ -411,13 +434,12 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
-	unsigned long			nmi_dropped;
-	unsigned long			commit_overrun;
-	unsigned long			overrun;
-	unsigned long			read;
+	local_t				commit_overrun;
+	local_t				overrun;
 	local_t				entries;
 	local_t				committing;
 	local_t				commits;
+	unsigned long			read;
 	u64				write_stamp;
 	u64				read_stamp;
 	atomic_t			record_disabled;
@@ -489,6 +511,385 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
 
+/*
+ * Making the ring buffer lockless makes things tricky.
+ * Although writes only happen on the CPU that they are on,
+ * and they only need to worry about interrupts. Reads can
+ * happen on any CPU.
+ *
+ * The reader page is always off the ring buffer, but when the
+ * reader finishes with a page, it needs to swap its page with
+ * a new one from the buffer. The reader needs to take from
+ * the head (writes go to the tail). But if a writer is in overwrite
+ * mode and wraps, it must push the head page forward.
+ *
+ * Here lies the problem.
+ *
+ * The reader must be careful to replace only the head page, and
+ * not another one. As described at the top of the file in the
+ * ASCII art, the reader sets its old page to point to the next
+ * page after head. It then sets the page after head to point to
+ * the old reader page. But if the writer moves the head page
+ * during this operation, the reader could end up with the tail.
+ *
+ * We use cmpxchg to help prevent this race. We also do something
+ * special with the page before head. We set the LSB to 1.
+ *
+ * When the writer must push the page forward, it will clear the
+ * bit that points to the head page, move the head, and then set
+ * the bit that points to the new head page.
+ *
+ * We also don't want an interrupt coming in and moving the head
+ * page on another writer. Thus we use the second LSB to catch
+ * that too. Thus:
+ *
+ * head->list->prev->next        bit 1          bit 0
+ *                              -------        -------
+ * Normal page                     0              0
+ * Points to head page             0              1
+ * New head page                   1              0
+ *
+ * Note we can not trust the prev pointer of the head page, because:
+ *
+ * +----+       +-----+        +-----+
+ * |    |------>|  T  |---X--->|  N  |
+ * |    |<------|     |        |     |
+ * +----+       +-----+        +-----+
+ *   ^                           ^ |
+ *   |          +-----+          | |
+ *   +----------|  R  |----------+ |
+ *              |     |<-----------+
+ *              +-----+
+ *
+ * Key:  ---X-->  HEAD flag set in pointer
+ *         T      Tail page
+ *         R      Reader page
+ *         N      Next page
+ *
+ * (see __rb_reserve_next() to see where this happens)
+ *
+ *  What the above shows is that the reader just swapped out
+ *  the reader page with a page in the buffer, but before it
+ *  could make the new header point back to the new page added
+ *  it was preempted by a writer. The writer moved forward onto
+ *  the new page added by the reader and is about to move forward
+ *  again.
+ *
+ *  You can see, it is legitimate for the previous pointer of
+ *  the head (or any page) not to point back to itself. But only
+ *  temporarially.
+ */
+
+#define RB_PAGE_NORMAL		0UL
+#define RB_PAGE_HEAD		1UL
+#define RB_PAGE_UPDATE		2UL
+
+
+#define RB_FLAG_MASK		3UL
+
+/* PAGE_MOVED is not part of the mask */
+#define RB_PAGE_MOVED		4UL
+
+/*
+ * rb_list_head - remove any bit
+ */
+static struct list_head *rb_list_head(struct list_head *list)
+{
+	unsigned long val = (unsigned long)list;
+
+	return (struct list_head *)(val & ~RB_FLAG_MASK);
+}
+
+/*
+ * rb_is_head_page - test if the give page is the head page
+ *
+ * Because the reader may move the head_page pointer, we can
+ * not trust what the head page is (it may be pointing to
+ * the reader page). But if the next page is a header page,
+ * its flags will be non zero.
+ */
+static int inline
+rb_is_head_page(struct ring_buffer_per_cpu *cpu_buffer,
+		struct buffer_page *page, struct list_head *list)
+{
+	unsigned long val;
+
+	val = (unsigned long)list->next;
+
+	if ((val & ~RB_FLAG_MASK) != (unsigned long)&page->list)
+		return RB_PAGE_MOVED;
+
+	return val & RB_FLAG_MASK;
+}
+
+/*
+ * rb_is_reader_page
+ *
+ * The unique thing about the reader page, is that, if the
+ * writer is ever on it, the previous pointer never points
+ * back to the reader page.
+ */
+static int rb_is_reader_page(struct buffer_page *page)
+{
+	struct list_head *list = page->list.prev;
+
+	return rb_list_head(list->next) != &page->list;
+}
+
+/*
+ * rb_set_list_to_head - set a list_head to be pointing to head.
+ */
+static void rb_set_list_to_head(struct ring_buffer_per_cpu *cpu_buffer,
+				struct list_head *list)
+{
+	unsigned long *ptr;
+
+	ptr = (unsigned long *)&list->next;
+	*ptr |= RB_PAGE_HEAD;
+	*ptr &= ~RB_PAGE_UPDATE;
+}
+
+/*
+ * rb_head_page_activate - sets up head page
+ */
+static void rb_head_page_activate(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *head;
+
+	head = cpu_buffer->head_page;
+	if (!head)
+		return;
+
+	/*
+	 * Set the previous list pointer to have the HEAD flag.
+	 */
+	rb_set_list_to_head(cpu_buffer, head->list.prev);
+}
+
+static void rb_list_head_clear(struct list_head *list)
+{
+	unsigned long *ptr = (unsigned long *)&list->next;
+
+	*ptr &= ~RB_FLAG_MASK;
+}
+
+/*
+ * rb_head_page_dactivate - clears head page ptr (for free list)
+ */
+static void
+rb_head_page_deactivate(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct list_head *hd;
+
+	/* Go through the whole list and clear any pointers found. */
+	rb_list_head_clear(cpu_buffer->pages);
+
+	list_for_each(hd, cpu_buffer->pages)
+		rb_list_head_clear(hd);
+}
+
+static int rb_head_page_set(struct ring_buffer_per_cpu *cpu_buffer,
+			    struct buffer_page *head,
+			    struct buffer_page *prev,
+			    int old_flag, int new_flag)
+{
+	struct list_head *list;
+	unsigned long val = (unsigned long)&head->list;
+	unsigned long ret;
+
+	list = &prev->list;
+
+	val &= ~RB_FLAG_MASK;
+
+	ret = (unsigned long)cmpxchg(&list->next,
+				     val | old_flag, val | new_flag);
+
+	/* check if the reader took the page */
+	if ((ret & ~RB_FLAG_MASK) != val)
+		return RB_PAGE_MOVED;
+
+	return ret & RB_FLAG_MASK;
+}
+
+static int rb_head_page_set_update(struct ring_buffer_per_cpu *cpu_buffer,
+				   struct buffer_page *head,
+				   struct buffer_page *prev,
+				   int old_flag)
+{
+	return rb_head_page_set(cpu_buffer, head, prev,
+				old_flag, RB_PAGE_UPDATE);
+}
+
+static int rb_head_page_set_head(struct ring_buffer_per_cpu *cpu_buffer,
+				 struct buffer_page *head,
+				 struct buffer_page *prev,
+				 int old_flag)
+{
+	return rb_head_page_set(cpu_buffer, head, prev,
+				old_flag, RB_PAGE_HEAD);
+}
+
+static int rb_head_page_set_normal(struct ring_buffer_per_cpu *cpu_buffer,
+				   struct buffer_page *head,
+				   struct buffer_page *prev,
+				   int old_flag)
+{
+	return rb_head_page_set(cpu_buffer, head, prev,
+				old_flag, RB_PAGE_NORMAL);
+}
+
+static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
+			       struct buffer_page **bpage)
+{
+	struct list_head *p = rb_list_head((*bpage)->list.next);
+
+	*bpage = list_entry(p, struct buffer_page, list);
+}
+
+static struct buffer_page *
+rb_set_head_page(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	struct buffer_page *head;
+	struct buffer_page *page;
+	struct list_head *list;
+	int i;
+
+	if (RB_WARN_ON(cpu_buffer, !cpu_buffer->head_page))
+		return NULL;
+
+	/* sanity check */
+	list = cpu_buffer->pages;
+	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev->next) != list))
+		return NULL;
+
+	page = head = cpu_buffer->head_page;
+	/*
+	 * It is possible that the writer moves the header behind
+	 * where we started, and we miss in one loop.
+	 * A second loop should grab the header, but we'll do
+	 * three loops just because I'm paranoid.
+	 */
+	for (i = 0; i < 3; i++) {
+		do {
+			if (rb_is_head_page(cpu_buffer, page, page->list.prev)) {
+				cpu_buffer->head_page = page;
+				return page;
+			}
+			rb_inc_page(cpu_buffer, &page);
+		} while (page != head);
+	}
+
+	RB_WARN_ON(cpu_buffer, 1);
+
+	return NULL;
+}
+
+static int rb_head_page_replace(struct buffer_page *old,
+				struct buffer_page *new)
+{
+	unsigned long *ptr = (unsigned long *)&old->list.prev->next;
+	unsigned long val;
+	unsigned long ret;
+
+	val = *ptr & ~RB_FLAG_MASK;
+	val |= RB_PAGE_HEAD;
+
+	ret = cmpxchg(ptr, val, &new->list);
+
+	return ret == val;
+}
+
+/*
+ * rb_tail_page_update - move the tail page forward
+ *
+ * Returns 1 if moved tail page, 0 if someone else did.
+ */
+static int rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
+			       struct buffer_page *tail_page,
+			       struct buffer_page *next_page)
+{
+	struct buffer_page *old_tail;
+	unsigned long old_entries;
+	unsigned long old_write;
+	int ret = 0;
+
+	/*
+	 * The tail page now needs to be moved forward.
+	 *
+	 * We need to reset the tail page, but without messing
+	 * with possible erasing of data brought in by interrupts
+	 * that have moved the tail page and are currently on it.
+	 *
+	 * We add a counter to the write field to denote this.
+	 */
+	old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
+	old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
+
+	/*
+	 * Just make sure we have seen our old_write and synchronize
+	 * with any interrupts that come in.
+	 */
+	barrier();
+
+	/*
+	 * If the tail page is still the same as what we think
+	 * it is, then it is up to us to update the tail
+	 * pointer.
+	 */
+	if (tail_page == cpu_buffer->tail_page) {
+		/* Zero the write counter */
+		unsigned long val = old_write & ~RB_WRITE_MASK;
+		unsigned long eval = old_entries & ~RB_WRITE_MASK;
+
+		/*
+		 * This will only succeed if an interrupt did
+		 * not come in and change it. In which case, we
+		 * do not want to modify it.
+		 */
+		local_cmpxchg(&next_page->write, old_write, val);
+		local_cmpxchg(&next_page->entries, old_entries, eval);
+
+		/*
+		 * No need to worry about races with clearing out the commit.
+		 * it only can increment when a commit takes place. But that
+		 * only happens in the outer most nested commit.
+		 */
+		local_set(&next_page->page->commit, 0);
+
+		old_tail = cmpxchg(&cpu_buffer->tail_page,
+				   tail_page, next_page);
+
+		if (old_tail == tail_page)
+			ret = 1;
+	}
+
+	return ret;
+}
+
+static int rb_check_bpage(struct ring_buffer_per_cpu *cpu_buffer,
+			  struct buffer_page *bpage)
+{
+	unsigned long val = (unsigned long)bpage;
+
+	if (RB_WARN_ON(cpu_buffer, val & RB_FLAG_MASK))
+		return 1;
+
+	return 0;
+}
+
+/**
+ * rb_check_list - make sure a pointer to a list has the last bits zero
+ */
+static int rb_check_list(struct ring_buffer_per_cpu *cpu_buffer,
+			 struct list_head *list)
+{
+	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->prev) != list->prev))
+		return 1;
+	if (RB_WARN_ON(cpu_buffer, rb_list_head(list->next) != list->next))
+		return 1;
+	return 0;
+}
+
 /**
  * check_pages - integrity check of buffer pages
  * @cpu_buffer: CPU buffer with pages to test
@@ -501,11 +902,16 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 	struct list_head *head = cpu_buffer->pages;
 	struct buffer_page *bpage, *tmp;
 
+	rb_head_page_deactivate(cpu_buffer);
+
 	if (RB_WARN_ON(cpu_buffer, head->next->prev != head))
 		return -1;
 	if (RB_WARN_ON(cpu_buffer, head->prev->next != head))
 		return -1;
 
+	if (rb_check_list(cpu_buffer, head))
+		return -1;
+
 	list_for_each_entry_safe(bpage, tmp, head, list) {
 		if (RB_WARN_ON(cpu_buffer,
 			       bpage->list.next->prev != &bpage->list))
@@ -513,8 +919,12 @@ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
 		if (RB_WARN_ON(cpu_buffer,
 			       bpage->list.prev->next != &bpage->list))
 			return -1;
+		if (rb_check_list(cpu_buffer, &bpage->list))
+			return -1;
 	}
 
+	rb_head_page_activate(cpu_buffer);
+
 	return 0;
 }
 
@@ -533,6 +943,9 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer,
 				    GFP_KERNEL, cpu_to_node(cpu_buffer->cpu));
 		if (!bpage)
 			goto free_pages;
+
+		rb_check_bpage(cpu_buffer, bpage);
+
 		list_add(&bpage->list, &pages);
 
 		addr = __get_free_page(GFP_KERNEL);
@@ -586,6 +999,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 	if (!bpage)
 		goto fail_free_buffer;
 
+	rb_check_bpage(cpu_buffer, bpage);
+
 	cpu_buffer->reader_page = bpage;
 	addr = __get_free_page(GFP_KERNEL);
 	if (!addr)
@@ -603,6 +1018,8 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
 	cpu_buffer->tail_page = cpu_buffer->commit_page = cpu_buffer->head_page;
 
+	rb_head_page_activate(cpu_buffer);
+
 	return cpu_buffer;
 
  fail_free_reader:
@@ -620,6 +1037,8 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer)
 
 	free_buffer_page(cpu_buffer->reader_page);
 
+	rb_head_page_deactivate(cpu_buffer);
+
 	if (head) {
 		list_for_each_entry_safe(bpage, tmp, head, list) {
 			list_del_init(&bpage->list);
@@ -770,6 +1189,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages)
 	atomic_inc(&cpu_buffer->record_disabled);
 	synchronize_sched();
 
+	rb_head_page_deactivate(cpu_buffer);
+
 	for (i = 0; i < nr_pages; i++) {
 		if (RB_WARN_ON(cpu_buffer, list_empty(cpu_buffer->pages)))
 			return;
@@ -800,6 +1221,9 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 	atomic_inc(&cpu_buffer->record_disabled);
 	synchronize_sched();
 
+	spin_lock_irq(&cpu_buffer->reader_lock);
+	rb_head_page_deactivate(cpu_buffer);
+
 	for (i = 0; i < nr_pages; i++) {
 		if (RB_WARN_ON(cpu_buffer, list_empty(pages)))
 			return;
@@ -809,6 +1233,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer,
 		list_add_tail(&bpage->list, cpu_buffer->pages);
 	}
 	rb_reset_cpu(cpu_buffer);
+	spin_unlock_irq(&cpu_buffer->reader_lock);
 
 	rb_check_pages(cpu_buffer);
 
@@ -958,22 +1383,15 @@ rb_reader_event(struct ring_buffer_per_cpu *cpu_buffer)
 			       cpu_buffer->reader_page->read);
 }
 
-static inline struct ring_buffer_event *
-rb_head_event(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return __rb_page_index(cpu_buffer->head_page,
-			       cpu_buffer->head_page->read);
-}
-
 static inline struct ring_buffer_event *
 rb_iter_head_event(struct ring_buffer_iter *iter)
 {
 	return __rb_page_index(iter->head_page, iter->head);
 }
 
-static inline unsigned rb_page_write(struct buffer_page *bpage)
+static inline unsigned long rb_page_write(struct buffer_page *bpage)
 {
-	return local_read(&bpage->write);
+	return local_read(&bpage->write) & RB_WRITE_MASK;
 }
 
 static inline unsigned rb_page_commit(struct buffer_page *bpage)
@@ -981,6 +1399,11 @@ static inline unsigned rb_page_commit(struct buffer_page *bpage)
 	return local_read(&bpage->page->commit);
 }
 
+static inline unsigned long rb_page_entries(struct buffer_page *bpage)
+{
+	return local_read(&bpage->entries) & RB_WRITE_MASK;
+}
+
 /* Size is determined by what has been commited */
 static inline unsigned rb_page_size(struct buffer_page *bpage)
 {
@@ -993,19 +1416,6 @@ rb_commit_index(struct ring_buffer_per_cpu *cpu_buffer)
 	return rb_page_commit(cpu_buffer->commit_page);
 }
 
-static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
-{
-	return rb_page_commit(cpu_buffer->head_page);
-}
-
-static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
-			       struct buffer_page **bpage)
-{
-	struct list_head *p = (*bpage)->list.next;
-
-	*bpage = list_entry(p, struct buffer_page, list);
-}
-
 static inline unsigned
 rb_event_index(struct ring_buffer_event *event)
 {
@@ -1031,6 +1441,8 @@ rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
 static void
 rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 {
+	unsigned long max_count;
+
 	/*
 	 * We only race with interrupts and NMIs on this CPU.
 	 * If we own the commit event, then we can commit
@@ -1040,9 +1452,16 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 	 * assign the commit to the tail.
 	 */
  again:
+	max_count = cpu_buffer->buffer->pages * 100;
+
 	while (cpu_buffer->commit_page != cpu_buffer->tail_page) {
-		cpu_buffer->commit_page->page->commit =
-			cpu_buffer->commit_page->write;
+		if (RB_WARN_ON(cpu_buffer, !(--max_count)))
+			return;
+		if (RB_WARN_ON(cpu_buffer,
+			       rb_is_reader_page(cpu_buffer->tail_page)))
+			return;
+		local_set(&cpu_buffer->commit_page->page->commit,
+			  rb_page_write(cpu_buffer->commit_page));
 		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
 		cpu_buffer->write_stamp =
 			cpu_buffer->commit_page->page->time_stamp;
@@ -1051,8 +1470,12 @@ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
 	}
 	while (rb_commit_index(cpu_buffer) !=
 	       rb_page_write(cpu_buffer->commit_page)) {
-		cpu_buffer->commit_page->page->commit =
-			cpu_buffer->commit_page->write;
+
+		local_set(&cpu_buffer->commit_page->page->commit,
+			  rb_page_write(cpu_buffer->commit_page));
+		RB_WARN_ON(cpu_buffer,
+			   local_read(&cpu_buffer->commit_page->page->commit) &
+			   ~RB_WRITE_MASK);
 		barrier();
 	}
 
@@ -1085,7 +1508,7 @@ static void rb_inc_iter(struct ring_buffer_iter *iter)
 	 * to the head page instead of next.
 	 */
 	if (iter->head_page == cpu_buffer->reader_page)
-		iter->head_page = cpu_buffer->head_page;
+		iter->head_page = rb_set_head_page(cpu_buffer);
 	else
 		rb_inc_page(cpu_buffer, &iter->head_page);
 
@@ -1129,6 +1552,163 @@ rb_update_event(struct ring_buffer_event *event,
 	}
 }
 
+/*
+ * rb_handle_head_page - writer hit the head page
+ *
+ * Returns: +1 to retry page
+ *           0 to continue
+ *          -1 on error
+ */
+static int
+rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
+		    struct buffer_page *tail_page,
+		    struct buffer_page *next_page)
+{
+	struct buffer_page *new_head;
+	int entries;
+	int type;
+	int ret;
+
+	entries = rb_page_entries(next_page);
+
+	/*
+	 * The hard part is here. We need to move the head
+	 * forward, and protect against both readers on
+	 * other CPUs and writers coming in via interrupts.
+	 */
+	type = rb_head_page_set_update(cpu_buffer, next_page, tail_page,
+				       RB_PAGE_HEAD);
+
+	/*
+	 * type can be one of four:
+	 *  NORMAL - an interrupt already moved it for us
+	 *  HEAD   - we are the first to get here.
+	 *  UPDATE - we are the interrupt interrupting
+	 *           a current move.
+	 *  MOVED  - a reader on another CPU moved the next
+	 *           pointer to its reader page. Give up
+	 *           and try again.
+	 */
+
+	switch (type) {
+	case RB_PAGE_HEAD:
+		/*
+		 * We changed the head to UPDATE, thus
+		 * it is our responsibility to update
+		 * the counters.
+		 */
+		local_add(entries, &cpu_buffer->overrun);
+
+		/*
+		 * The entries will be zeroed out when we move the
+		 * tail page.
+		 */
+
+		/* still more to do */
+		break;
+
+	case RB_PAGE_UPDATE:
+		/*
+		 * This is an interrupt that interrupt the
+		 * previous update. Still more to do.
+		 */
+		break;
+	case RB_PAGE_NORMAL:
+		/*
+		 * An interrupt came in before the update
+		 * and processed this for us.
+		 * Nothing left to do.
+		 */
+		return 1;
+	case RB_PAGE_MOVED:
+		/*
+		 * The reader is on another CPU and just did
+		 * a swap with our next_page.
+		 * Try again.
+		 */
+		return 1;
+	default:
+		RB_WARN_ON(cpu_buffer, 1); /* WTF??? */
+		return -1;
+	}
+
+	/*
+	 * Now that we are here, the old head pointer is
+	 * set to UPDATE. This will keep the reader from
+	 * swapping the head page with the reader page.
+	 * The reader (on another CPU) will spin till
+	 * we are finished.
+	 *
+	 * We just need to protect against interrupts
+	 * doing the job. We will set the next pointer
+	 * to HEAD. After that, we set the old pointer
+	 * to NORMAL, but only if it was HEAD before.
+	 * otherwise we are an interrupt, and only
+	 * want the outer most commit to reset it.
+	 */
+	new_head = next_page;
+	rb_inc_page(cpu_buffer, &new_head);
+
+	ret = rb_head_page_set_head(cpu_buffer, new_head, next_page,
+				    RB_PAGE_NORMAL);
+
+	/*
+	 * Valid returns are:
+	 *  HEAD   - an interrupt came in and already set it.
+	 *  NORMAL - One of two things:
+	 *            1) We really set it.
+	 *            2) A bunch of interrupts came in and moved
+	 *               the page forward again.
+	 */
+	switch (ret) {
+	case RB_PAGE_HEAD:
+	case RB_PAGE_NORMAL:
+		/* OK */
+		break;
+	default:
+		RB_WARN_ON(cpu_buffer, 1);
+		return -1;
+	}
+
+	/*
+	 * It is possible that an interrupt came in,
+	 * set the head up, then more interrupts came in
+	 * and moved it again. When we get back here,
+	 * the page would have been set to NORMAL but we
+	 * just set it back to HEAD.
+	 *
+	 * How do you detect this? Well, if that happened
+	 * the tail page would have moved.
+	 */
+	if (ret == RB_PAGE_NORMAL) {
+		/*
+		 * If the tail had moved passed next, then we need
+		 * to reset the pointer.
+		 */
+		if (cpu_buffer->tail_page != tail_page &&
+		    cpu_buffer->tail_page != next_page)
+			rb_head_page_set_normal(cpu_buffer, new_head,
+						next_page,
+						RB_PAGE_HEAD);
+	}
+
+	/*
+	 * If this was the outer most commit (the one that
+	 * changed the original pointer from HEAD to UPDATE),
+	 * then it is up to us to reset it to NORMAL.
+	 */
+	if (type == RB_PAGE_HEAD) {
+		ret = rb_head_page_set_normal(cpu_buffer, next_page,
+					      tail_page,
+					      RB_PAGE_UPDATE);
+		if (RB_WARN_ON(cpu_buffer,
+			       ret != RB_PAGE_UPDATE))
+			return -1;
+	}
+
+	return 0;
+}
+
 static unsigned rb_calculate_event_length(unsigned length)
 {
 	struct ring_buffer_event event; /* Used only for sizeof array */
@@ -1207,96 +1787,93 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	     struct buffer_page *commit_page,
 	     struct buffer_page *tail_page, u64 *ts)
 {
-	struct buffer_page *next_page, *head_page, *reader_page;
 	struct ring_buffer *buffer = cpu_buffer->buffer;
-	bool lock_taken = false;
-	unsigned long flags;
+	struct buffer_page *next_page;
+	int ret;
 
 	next_page = tail_page;
 
-	local_irq_save(flags);
-	/*
-	 * Since the write to the buffer is still not
-	 * fully lockless, we must be careful with NMIs.
-	 * The locks in the writers are taken when a write
-	 * crosses to a new page. The locks protect against
-	 * races with the readers (this will soon be fixed
-	 * with a lockless solution).
-	 *
-	 * Because we can not protect against NMIs, and we
-	 * want to keep traces reentrant, we need to manage
-	 * what happens when we are in an NMI.
-	 *
-	 * NMIs can happen after we take the lock.
-	 * If we are in an NMI, only take the lock
-	 * if it is not already taken. Otherwise
-	 * simply fail.
-	 */
-	if (unlikely(in_nmi())) {
-		if (!__raw_spin_trylock(&cpu_buffer->lock)) {
-			cpu_buffer->nmi_dropped++;
-			goto out_reset;
-		}
-	} else
-		__raw_spin_lock(&cpu_buffer->lock);
-
-	lock_taken = true;
-
 	rb_inc_page(cpu_buffer, &next_page);
 
-	head_page = cpu_buffer->head_page;
-	reader_page = cpu_buffer->reader_page;
-
-	/* we grabbed the lock before incrementing */
-	if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
-		goto out_reset;
-
 	/*
 	 * If for some reason, we had an interrupt storm that made
 	 * it all the way around the buffer, bail, and warn
 	 * about it.
 	 */
 	if (unlikely(next_page == commit_page)) {
-		cpu_buffer->commit_overrun++;
+		local_inc(&cpu_buffer->commit_overrun);
 		goto out_reset;
 	}
 
-	if (next_page == head_page) {
-		if (!(buffer->flags & RB_FL_OVERWRITE))
-			goto out_reset;
-
-		/* tail_page has not moved yet? */
-		if (tail_page == cpu_buffer->tail_page) {
-			/* count overflows */
-			cpu_buffer->overrun +=
-				local_read(&head_page->entries);
+	/*
+	 * This is where the fun begins!
+	 *
+	 * We are fighting against races between a reader that
+	 * could be on another CPU trying to swap its reader
+	 * page with the buffer head.
+	 *
+	 * We are also fighting against interrupts coming in and
+	 * moving the head or tail on us as well.
+	 *
+	 * If the next page is the head page then we have filled
+	 * the buffer, unless the commit page is still on the
+	 * reader page.
+	 */
+	if (rb_is_head_page(cpu_buffer, next_page, &tail_page->list)) {
 
-			rb_inc_page(cpu_buffer, &head_page);
-			cpu_buffer->head_page = head_page;
-			cpu_buffer->head_page->read = 0;
+		/*
+		 * If the commit is not on the reader page, then
+		 * move the header page.
+		 */
+		if (!rb_is_reader_page(cpu_buffer->commit_page)) {
+			/*
+			 * If we are not in overwrite mode,
+			 * this is easy, just stop here.
+			 */
+			if (!(buffer->flags & RB_FL_OVERWRITE))
+				goto out_reset;
+
+			ret = rb_handle_head_page(cpu_buffer,
+						  tail_page,
+						  next_page);
+			if (ret < 0)
+				goto out_reset;
+			if (ret)
+				goto out_again;
+		} else {
+			/*
+			 * We need to be careful here too. The
+			 * commit page could still be on the reader
+			 * page. We could have a small buffer, and
+			 * have filled up the buffer with events
+			 * from interrupts and such, and wrapped.
+			 *
+			 * Note, if the tail page is also the on the
+			 * reader_page, we let it move out.
+			 */
+			if (unlikely((cpu_buffer->commit_page !=
+				      cpu_buffer->tail_page) &&
+				     (cpu_buffer->commit_page ==
+				      cpu_buffer->reader_page))) {
+				local_inc(&cpu_buffer->commit_overrun);
+				goto out_reset;
+			}
 		}
 	}
 
-	/*
-	 * If the tail page is still the same as what we think
-	 * it is, then it is up to us to update the tail
-	 * pointer.
-	 */
-	if (tail_page == cpu_buffer->tail_page) {
-		local_set(&next_page->write, 0);
-		local_set(&next_page->entries, 0);
-		local_set(&next_page->page->commit, 0);
-		cpu_buffer->tail_page = next_page;
-
-		/* reread the time stamp */
+	ret = rb_tail_page_update(cpu_buffer, tail_page, next_page);
+	if (ret) {
+		/*
+		 * Nested commits always have zero deltas, so
+		 * just reread the time stamp
+		 */
 		*ts = rb_time_stamp(buffer, cpu_buffer->cpu);
-		cpu_buffer->tail_page->page->time_stamp = *ts;
+		next_page->page->time_stamp = *ts;
 	}
 
-	rb_reset_tail(cpu_buffer, tail_page, tail, length);
+ out_again:
 
-	__raw_spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
+	rb_reset_tail(cpu_buffer, tail_page, tail, length);
 
 	/* fail and let the caller try again */
 	return ERR_PTR(-EAGAIN);
@@ -1305,9 +1882,6 @@ rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
 	/* reset write */
 	rb_reset_tail(cpu_buffer, tail_page, tail, length);
 
-	if (likely(lock_taken))
-		__raw_spin_unlock(&cpu_buffer->lock);
-	local_irq_restore(flags);
 	return NULL;
 }
 
@@ -1324,6 +1898,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
 	barrier();
 	tail_page = cpu_buffer->tail_page;
 	write = local_add_return(length, &tail_page->write);
+
+	/* set write to only the index of the write */
+	write &= RB_WRITE_MASK;
 	tail = write - length;
 
 	/* See if we shot pass the end of this buffer page */
@@ -1368,12 +1945,16 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
 	bpage = cpu_buffer->tail_page;
 
 	if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+		unsigned long write_mask =
+			local_read(&bpage->write) & ~RB_WRITE_MASK;
 		/*
 		 * This is on the tail page. It is possible that
 		 * a write could come in and move the tail page
 		 * and write to the next page. That is fine
 		 * because we just shorten what is on this page.
 		 */
+		old_index += write_mask;
+		new_index += write_mask;
 		index = local_cmpxchg(&bpage->write, old_index, new_index);
 		if (index == old_index)
 			return 1;
@@ -1882,9 +2463,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_write);
 static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *reader = cpu_buffer->reader_page;
-	struct buffer_page *head = cpu_buffer->head_page;
+	struct buffer_page *head = rb_set_head_page(cpu_buffer);
 	struct buffer_page *commit = cpu_buffer->commit_page;
 
+	/* In case of error, head will be NULL */
+	if (unlikely(!head))
+		return 1;
+
 	return reader->read == rb_page_commit(reader) &&
 		(commit == reader ||
 		 (commit == head &&
@@ -1975,7 +2560,7 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+	ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun))
 		- cpu_buffer->read;
 
 	return ret;
@@ -1996,32 +2581,12 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->overrun;
+	ret = local_read(&cpu_buffer->overrun);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
-/**
- * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
- * @buffer: The ring buffer
- * @cpu: The per CPU buffer to get the number of overruns from
- */
-unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
-{
-	struct ring_buffer_per_cpu *cpu_buffer;
-	unsigned long ret;
-
-	if (!cpumask_test_cpu(cpu, buffer->cpumask))
-		return 0;
-
-	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->nmi_dropped;
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
-
 /**
  * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
  * @buffer: The ring buffer
@@ -2037,7 +2602,7 @@ ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
 		return 0;
 
 	cpu_buffer = buffer->buffers[cpu];
-	ret = cpu_buffer->commit_overrun;
+	ret = local_read(&cpu_buffer->commit_overrun);
 
 	return ret;
 }
@@ -2060,7 +2625,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
 		entries += (local_read(&cpu_buffer->entries) -
-			    cpu_buffer->overrun) - cpu_buffer->read;
+			    local_read(&cpu_buffer->overrun)) - cpu_buffer->read;
 	}
 
 	return entries;
@@ -2083,7 +2648,7 @@ unsigned long ring_buffer_overruns(struct ring_buffer *buffer)
 	/* if you care about this being correct, lock the buffer */
 	for_each_buffer_cpu(buffer, cpu) {
 		cpu_buffer = buffer->buffers[cpu];
-		overruns += cpu_buffer->overrun;
+		overruns += local_read(&cpu_buffer->overrun);
 	}
 
 	return overruns;
@@ -2096,8 +2661,10 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 
 	/* Iterator usage is expected to have record disabled */
 	if (list_empty(&cpu_buffer->reader_page->list)) {
-		iter->head_page = cpu_buffer->head_page;
-		iter->head = cpu_buffer->head_page->read;
+		iter->head_page = rb_set_head_page(cpu_buffer);
+		if (unlikely(!iter->head_page))
+			return;
+		iter->head = iter->head_page->read;
 	} else {
 		iter->head_page = cpu_buffer->reader_page;
 		iter->head = cpu_buffer->reader_page->read;
@@ -2214,6 +2781,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	struct buffer_page *reader = NULL;
 	unsigned long flags;
 	int nr_loops = 0;
+	int ret;
 
 	local_irq_save(flags);
 	__raw_spin_lock(&cpu_buffer->lock);
@@ -2247,11 +2815,17 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 		goto out;
 
 	/*
-	 * Splice the empty reader page into the list around the head.
 	 * Reset the reader page to size zero.
 	 */
+	local_set(&cpu_buffer->reader_page->write, 0);
+	local_set(&cpu_buffer->reader_page->entries, 0);
+	local_set(&cpu_buffer->reader_page->page->commit, 0);
 
-	reader = cpu_buffer->head_page;
+ spin:
+	/*
+	 * Splice the empty reader page into the list around the head.
+	 */
+	reader = rb_set_head_page(cpu_buffer);
 	cpu_buffer->reader_page->list.next = reader->list.next;
 	cpu_buffer->reader_page->list.prev = reader->list.prev;
 
@@ -2262,22 +2836,35 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	 */
 	cpu_buffer->pages = reader->list.prev;
 
-	local_set(&cpu_buffer->reader_page->write, 0);
-	local_set(&cpu_buffer->reader_page->entries, 0);
-	local_set(&cpu_buffer->reader_page->page->commit, 0);
+	/* The reader page will be pointing to the new head */
+	rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
 
-	/* Make the reader page now replace the head */
-	reader->list.prev->next = &cpu_buffer->reader_page->list;
-	reader->list.next->prev = &cpu_buffer->reader_page->list;
+	/*
+	 * Here's the tricky part.
+	 *
+	 * We need to move the pointer past the header page.
+	 * But we can only do that if a writer is not currently
+	 * moving it. The page before the header page has the
+	 * flag bit '1' set if it is pointing to the page we want.
+	 * but if the writer is in the process of moving it
+	 * than it will be '2' or already moved '0'.
+	 */
+
+	ret = rb_head_page_replace(reader, cpu_buffer->reader_page);
 
 	/*
-	 * If the tail is on the reader, then we must set the head
-	 * to the inserted page, otherwise we set it one before.
+	 * If we did not convert it, then we must try again.
 	 */
-	cpu_buffer->head_page = cpu_buffer->reader_page;
+	if (!ret)
+		goto spin;
 
-	if (cpu_buffer->commit_page != reader)
-		rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
+	/*
+	 * Yeah! We succeeded in replacing the page.
+	 *
+	 * Now make the new head point back to the reader page.
+	 */
+	reader->list.next->prev = &cpu_buffer->reader_page->list;
+	rb_inc_page(cpu_buffer, &cpu_buffer->head_page);
 
 	/* Finally update the reader page to the new head */
 	cpu_buffer->reader_page = reader;
@@ -2733,6 +3320,8 @@ EXPORT_SYMBOL_GPL(ring_buffer_size);
 static void
 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
+	rb_head_page_deactivate(cpu_buffer);
+
 	cpu_buffer->head_page
 		= list_entry(cpu_buffer->pages, struct buffer_page, list);
 	local_set(&cpu_buffer->head_page->write, 0);
@@ -2750,16 +3339,17 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	local_set(&cpu_buffer->reader_page->page->commit, 0);
 	cpu_buffer->reader_page->read = 0;
 
-	cpu_buffer->nmi_dropped = 0;
-	cpu_buffer->commit_overrun = 0;
-	cpu_buffer->overrun = 0;
-	cpu_buffer->read = 0;
+	local_set(&cpu_buffer->commit_overrun, 0);
+	local_set(&cpu_buffer->overrun, 0);
 	local_set(&cpu_buffer->entries, 0);
 	local_set(&cpu_buffer->committing, 0);
 	local_set(&cpu_buffer->commits, 0);
+	cpu_buffer->read = 0;
 
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
+
+	rb_head_page_activate(cpu_buffer);
 }
 
 /**
@@ -3107,7 +3697,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 		read = 0;
 	} else {
 		/* update the entry counter */
-		cpu_buffer->read += local_read(&reader->entries);
+		cpu_buffer->read += rb_page_entries(reader);
 
 		/* swap the pages */
 		rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bdb3afc8b306..b591f7a1bd7b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3630,9 +3630,6 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
 
-	cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
-	trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
-
 	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
 
 	kfree(s);
-- 
cgit v1.2.3


From 2e4fddd8e420e8f531a34e7a97f9cdb851a6ad13 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 7 Jul 2009 15:17:12 +0800
Subject: crypto: shash - Add shash_instance

This patch adds shash_instance and the associated alloc/free
functions.  This is meant to be an instance that with a shash
algorithm under it.  Note that the instance itself doesn't have
to be shash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 |  7 +++++++
 include/crypto/internal/hash.h | 26 ++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 2ccc8b0076ce..8f9d8831e293 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -502,5 +502,12 @@ int crypto_unregister_shash(struct shash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_shash);
 
+void shash_free_instance(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(shash_instance(inst));
+}
+EXPORT_SYMBOL_GPL(shash_free_instance);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 82b70564bcab..44d3bcdce6e2 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -34,6 +34,10 @@ struct crypto_hash_walk {
 	unsigned int flags;
 };
 
+struct shash_instance {
+	struct shash_alg alg;
+};
+
 extern const struct crypto_type crypto_ahash_type;
 
 int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err);
@@ -46,6 +50,8 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
 
+void shash_free_instance(struct crypto_instance *inst);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
@@ -80,5 +86,25 @@ static inline void *crypto_shash_ctx(struct crypto_shash *tfm)
 	return crypto_tfm_ctx(&tfm->base);
 }
 
+static inline struct crypto_instance *shash_crypto_instance(
+	struct shash_instance *inst)
+{
+	return container_of(&inst->alg.base, struct crypto_instance, alg);
+}
+
+static inline struct shash_instance *shash_instance(
+	struct crypto_instance *inst)
+{
+	return container_of(__crypto_shash_alg(&inst->alg),
+			    struct shash_instance, alg);
+}
+
+static inline struct shash_instance *shash_alloc_instance(
+	const char *name, struct crypto_alg *alg)
+{
+	return crypto_alloc_instance2(name, alg,
+				      sizeof(struct shash_alg) - sizeof(*alg));
+}
+
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
 
-- 
cgit v1.2.3


From 97eedce1a64a57648ac5e39f03825528c47ba72e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 15:55:52 +0800
Subject: crypto: api - Add new style spawn support

This patch modifies the spawn infrastructure to support new style
algorithms like shash.  In particular, this means storing the
frontend type in the spawn and using crypto_create_tfm to allocate
the tfm.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         | 55 ++++++++++++++++++++++++++++++++++++++++++++++---
 include/crypto/algapi.h |  6 ++++++
 2 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 429f1a003b06..17a5fff8f777 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -488,6 +488,23 @@ int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 }
 EXPORT_SYMBOL_GPL(crypto_init_spawn);
 
+int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		       struct crypto_instance *inst,
+		       const struct crypto_type *frontend)
+{
+	int err = -EINVAL;
+
+	if (frontend && (alg->cra_flags ^ frontend->type) & frontend->maskset)
+		goto out;
+
+	spawn->frontend = frontend;
+	err = crypto_init_spawn(spawn, alg, inst, frontend->maskset);
+
+out:
+	return err;
+}
+EXPORT_SYMBOL_GPL(crypto_init_spawn2);
+
 void crypto_drop_spawn(struct crypto_spawn *spawn)
 {
 	down_write(&crypto_alg_sem);
@@ -496,12 +513,10 @@ void crypto_drop_spawn(struct crypto_spawn *spawn)
 }
 EXPORT_SYMBOL_GPL(crypto_drop_spawn);
 
-struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
-				    u32 mask)
+static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn)
 {
 	struct crypto_alg *alg;
 	struct crypto_alg *alg2;
-	struct crypto_tfm *tfm;
 
 	down_read(&crypto_alg_sem);
 	alg = spawn->alg;
@@ -516,6 +531,19 @@ struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
 		return ERR_PTR(-EAGAIN);
 	}
 
+	return alg;
+}
+
+struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
+				    u32 mask)
+{
+	struct crypto_alg *alg;
+	struct crypto_tfm *tfm;
+
+	alg = crypto_spawn_alg(spawn);
+	if (IS_ERR(alg))
+		return ERR_CAST(alg);
+
 	tfm = ERR_PTR(-EINVAL);
 	if (unlikely((alg->cra_flags ^ type) & mask))
 		goto out_put_alg;
@@ -532,6 +560,27 @@ out_put_alg:
 }
 EXPORT_SYMBOL_GPL(crypto_spawn_tfm);
 
+void *crypto_spawn_tfm2(struct crypto_spawn *spawn)
+{
+	struct crypto_alg *alg;
+	struct crypto_tfm *tfm;
+
+	alg = crypto_spawn_alg(spawn);
+	if (IS_ERR(alg))
+		return ERR_CAST(alg);
+
+	tfm = crypto_create_tfm(alg, spawn->frontend);
+	if (IS_ERR(tfm))
+		goto out_put_alg;
+
+	return tfm;
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return tfm;
+}
+EXPORT_SYMBOL_GPL(crypto_spawn_tfm2);
+
 int crypto_register_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_register(&crypto_chain, nb);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 99bb29723130..c9bff92a9e0e 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -61,6 +61,7 @@ struct crypto_spawn {
 	struct list_head list;
 	struct crypto_alg *alg;
 	struct crypto_instance *inst;
+	const struct crypto_type *frontend;
 	u32 mask;
 };
 
@@ -117,9 +118,14 @@ struct crypto_template *crypto_lookup_template(const char *name);
 
 int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 		      struct crypto_instance *inst, u32 mask);
+int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
+		       struct crypto_instance *inst,
+		       const struct crypto_type *frontend);
+
 void crypto_drop_spawn(struct crypto_spawn *spawn);
 struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type,
 				    u32 mask);
+void *crypto_spawn_tfm2(struct crypto_spawn *spawn);
 
 static inline void crypto_set_spawn(struct crypto_spawn *spawn,
 				    struct crypto_instance *inst)
-- 
cgit v1.2.3


From 942969992d86330c9700e2cd9afe8a6bea42df78 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 17:21:37 +0800
Subject: crypto: shash - Add spawn support

This patch adds the functions needed to create and use shash
spawns, i.e., to use shash algorithms in a template.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 |  9 +++++++++
 include/crypto/internal/hash.h | 14 ++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 8f9d8831e293..97f6c8ba103a 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -509,5 +509,14 @@ void shash_free_instance(struct crypto_instance *inst)
 }
 EXPORT_SYMBOL_GPL(shash_free_instance);
 
+int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
+			    struct shash_alg *alg,
+			    struct crypto_instance *inst)
+{
+	return crypto_init_spawn2(&spawn->base, &alg->base, inst,
+				  &crypto_shash_type);
+}
+EXPORT_SYMBOL_GPL(crypto_init_shash_spawn);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 44d3bcdce6e2..6cc824b79331 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -38,6 +38,10 @@ struct shash_instance {
 	struct shash_alg alg;
 };
 
+struct crypto_shash_spawn {
+	struct crypto_spawn base;
+};
+
 extern const struct crypto_type crypto_ahash_type;
 
 int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err);
@@ -52,6 +56,10 @@ int crypto_unregister_shash(struct shash_alg *alg);
 
 void shash_free_instance(struct crypto_instance *inst);
 
+int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
+			    struct shash_alg *alg,
+			    struct crypto_instance *inst);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
@@ -106,5 +114,11 @@ static inline struct shash_instance *shash_alloc_instance(
 				      sizeof(struct shash_alg) - sizeof(*alg));
 }
 
+static inline struct crypto_shash *crypto_spawn_shash(
+	struct crypto_shash_spawn *spawn)
+{
+	return crypto_spawn_tfm2(&spawn->base);
+}
+
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
 
-- 
cgit v1.2.3


From d06854f0243d91badabaab14503f7f3bb770061d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 17:53:16 +0800
Subject: crypto: api - Add crypto_attr_alg2 helper

This patch adds the helper crypto_attr_alg2 which is similar to
crypto_attr_alg but takes an extra frontend argument.  This is
intended to be used by new style algorithm types such as shash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c         |  8 +++++---
 crypto/api.c            | 31 ++++++++++++++++++++++---------
 crypto/internal.h       |  3 +++
 include/crypto/algapi.h | 11 ++++++++++-
 4 files changed, 40 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 17a5fff8f777..2154815201a7 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -644,7 +644,9 @@ const char *crypto_attr_alg_name(struct rtattr *rta)
 }
 EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
 
-struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
+				    const struct crypto_type *frontend,
+				    u32 type, u32 mask)
 {
 	const char *name;
 	int err;
@@ -654,9 +656,9 @@ struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask)
 	if (IS_ERR(name))
 		return ERR_PTR(err);
 
-	return crypto_alg_mod_lookup(name, type, mask);
+	return crypto_find_alg(name, frontend, type, mask);
 }
-EXPORT_SYMBOL_GPL(crypto_attr_alg);
+EXPORT_SYMBOL_GPL(crypto_attr_alg2);
 
 int crypto_attr_u32(struct rtattr *rta, u32 *num)
 {
diff --git a/crypto/api.c b/crypto/api.c
index d5944f92b416..ba81221b3bef 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -503,6 +503,27 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_create_tfm);
 
+struct crypto_alg *crypto_find_alg(const char *alg_name,
+				   const struct crypto_type *frontend,
+				   u32 type, u32 mask)
+{
+	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask) =
+		crypto_alg_mod_lookup;
+
+	if (frontend) {
+		type &= frontend->maskclear;
+		mask &= frontend->maskclear;
+		type |= frontend->type;
+		mask |= frontend->maskset;
+
+		if (frontend->lookup)
+			lookup = frontend->lookup;
+	}
+
+	return lookup(alg_name, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_find_alg);
+
 /*
  *	crypto_alloc_tfm - Locate algorithm and allocate transform
  *	@alg_name: Name of algorithm
@@ -526,21 +547,13 @@ EXPORT_SYMBOL_GPL(crypto_create_tfm);
 void *crypto_alloc_tfm(const char *alg_name,
 		       const struct crypto_type *frontend, u32 type, u32 mask)
 {
-	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
 	void *tfm;
 	int err;
 
-	type &= frontend->maskclear;
-	mask &= frontend->maskclear;
-	type |= frontend->type;
-	mask |= frontend->maskset;
-
-	lookup = frontend->lookup ?: crypto_alg_mod_lookup;
-
 	for (;;) {
 		struct crypto_alg *alg;
 
-		alg = lookup(alg_name, type, mask);
+		alg = crypto_find_alg(alg_name, frontend, type, mask);
 		if (IS_ERR(alg)) {
 			err = PTR_ERR(alg);
 			goto err;
diff --git a/crypto/internal.h b/crypto/internal.h
index 95baaea21fbc..7efa4d0533ff 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -106,6 +106,9 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type,
 				      u32 mask);
 void *crypto_create_tfm(struct crypto_alg *alg,
 			const struct crypto_type *frontend);
+struct crypto_alg *crypto_find_alg(const char *alg_name,
+				   const struct crypto_type *frontend,
+				   u32 type, u32 mask);
 void *crypto_alloc_tfm(const char *alg_name,
 		       const struct crypto_type *frontend, u32 type, u32 mask);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index c9bff92a9e0e..1d15a926041e 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -136,7 +136,16 @@ static inline void crypto_set_spawn(struct crypto_spawn *spawn,
 struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
 int crypto_check_attr_type(struct rtattr **tb, u32 type);
 const char *crypto_attr_alg_name(struct rtattr *rta);
-struct crypto_alg *crypto_attr_alg(struct rtattr *rta, u32 type, u32 mask);
+struct crypto_alg *crypto_attr_alg2(struct rtattr *rta,
+				    const struct crypto_type *frontend,
+				    u32 type, u32 mask);
+
+static inline struct crypto_alg *crypto_attr_alg(struct rtattr *rta,
+						 u32 type, u32 mask)
+{
+	return crypto_attr_alg2(rta, NULL, type, mask);
+}
+
 int crypto_attr_u32(struct rtattr *rta, u32 *num);
 void *crypto_alloc_instance2(const char *name, struct crypto_alg *alg,
 			     unsigned int head);
-- 
cgit v1.2.3


From 7d6f56400a695af497a8b7c23ea0ff9c3d9d99f4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 17:56:28 +0800
Subject: crypto: shash - Add shash_attr_alg2 helper

This patch adds the helper shash_attr_alg2 which locates a shash
algorithm based on the information in the given attribute.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 10 ++++++++++
 include/crypto/internal/hash.h |  2 ++
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 97f6c8ba103a..21bcff6be5b0 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -518,5 +518,15 @@ int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
 }
 EXPORT_SYMBOL_GPL(crypto_init_shash_spawn);
 
+struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+
+	alg = crypto_attr_alg2(rta, &crypto_shash_type, type, mask);
+	return IS_ERR(alg) ? ERR_CAST(alg) :
+	       container_of(alg, struct shash_alg, base);
+}
+EXPORT_SYMBOL_GPL(shash_attr_alg);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Synchronous cryptographic hash type");
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 6cc824b79331..e2ab35a74c62 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -60,6 +60,8 @@ int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
 			    struct shash_alg *alg,
 			    struct crypto_instance *inst);
 
+struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
-- 
cgit v1.2.3


From 619a6ebd2547f3a8ec2fbc5245daaa1f2056eb32 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 18:46:23 +0800
Subject: crypto: shash - Add shash_register_instance

This patch adds shash_register_instance so that shash instances
can be registered without bypassing the shash checks applied to
normal algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 26 +++++++++++++++++++++++++-
 include/crypto/internal/hash.h |  3 ++-
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 21bcff6be5b0..3d242425d692 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -480,7 +480,7 @@ struct crypto_shash *crypto_alloc_shash(const char *alg_name, u32 type,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_shash);
 
-int crypto_register_shash(struct shash_alg *alg)
+static int shash_prepare_alg(struct shash_alg *alg)
 {
 	struct crypto_alg *base = &alg->base;
 
@@ -491,6 +491,17 @@ int crypto_register_shash(struct shash_alg *alg)
 	base->cra_type = &crypto_shash_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SHASH;
+	return 0;
+}
+
+int crypto_register_shash(struct shash_alg *alg)
+{
+	struct crypto_alg *base = &alg->base;
+	int err;
+
+	err = shash_prepare_alg(alg);
+	if (err)
+		return err;
 
 	return crypto_register_alg(base);
 }
@@ -502,6 +513,19 @@ int crypto_unregister_shash(struct shash_alg *alg)
 }
 EXPORT_SYMBOL_GPL(crypto_unregister_shash);
 
+int shash_register_instance(struct crypto_template *tmpl,
+			    struct shash_instance *inst)
+{
+	int err;
+
+	err = shash_prepare_alg(&inst->alg);
+	if (err)
+		return err;
+
+	return crypto_register_instance(tmpl, shash_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(shash_register_instance);
+
 void shash_free_instance(struct crypto_instance *inst)
 {
 	crypto_drop_spawn(crypto_instance_ctx(inst));
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index e2ab35a74c62..fa5c9fb7ce5a 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -53,7 +53,8 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
 
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
-
+int shash_register_instance(struct crypto_template *tmpl,
+			    struct shash_instance *inst);
 void shash_free_instance(struct crypto_instance *inst);
 
 int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
-- 
cgit v1.2.3


From cc6a8acdeee932f6911d8b236d2c7d6bcc4616f6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 17 Jun 2008 16:39:06 +0200
Subject: ALSA: Fix SG-buffer DMA with non-coherent architectures

Using SG-buffers with dma_alloc_coherent() is often very inefficient
on non-coherent architectures because a tracking record could be
allocated in addition for each dma_alloc_coherent() call.
Instead, simply disable SG-buffers but just allocate normal continuous
buffers on non-supported (currently all but x86) architectures.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/memalloc.h |  6 ++++++
 include/sound/pcm.h      | 23 +++++++++++++++++++++++
 sound/core/Kconfig       |  4 ++++
 sound/core/Makefile      |  2 +-
 sound/core/memalloc.c    |  4 ++++
 sound/core/pcm_memory.c  |  2 ++
 6 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/memalloc.h b/include/sound/memalloc.h
index 7ccce94a5255..c42506212649 100644
--- a/include/sound/memalloc.h
+++ b/include/sound/memalloc.h
@@ -47,7 +47,11 @@ struct snd_dma_device {
 #define SNDRV_DMA_TYPE_UNKNOWN		0	/* not defined */
 #define SNDRV_DMA_TYPE_CONTINUOUS	1	/* continuous no-DMA memory */
 #define SNDRV_DMA_TYPE_DEV		2	/* generic device continuous */
+#ifdef CONFIG_SND_DMA_SGBUF
 #define SNDRV_DMA_TYPE_DEV_SG		3	/* generic device SG-buffer */
+#else
+#define SNDRV_DMA_TYPE_DEV_SG	SNDRV_DMA_TYPE_DEV /* no SG-buf support */
+#endif
 
 /*
  * info for buffer allocation
@@ -60,6 +64,7 @@ struct snd_dma_buffer {
 	void *private_data;	/* private for allocator; don't touch */
 };
 
+#ifdef CONFIG_SND_DMA_SGBUF
 /*
  * Scatter-Gather generic device pages
  */
@@ -107,6 +112,7 @@ static inline void *snd_sgbuf_get_ptr(struct snd_sg_buf *sgbuf, size_t offset)
 {
 	return sgbuf->table[offset >> PAGE_SHIFT].buf + offset % PAGE_SIZE;
 }
+#endif /* CONFIG_SND_DMA_SGBUF */
 
 /* allocate/release a buffer */
 int snd_dma_alloc_pages(int type, struct device *dev, size_t size,
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 23893523dc8c..1691c7fe35af 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -902,6 +902,7 @@ int snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
 int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size);
 int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream);
 
+#ifdef CONFIG_SND_DMA_SGBUF
 /*
  * SG-buffer handling
  */
@@ -927,6 +928,28 @@ struct page *snd_pcm_sgbuf_ops_page(struct snd_pcm_substream *substream,
 unsigned int snd_pcm_sgbuf_get_chunk_size(struct snd_pcm_substream *substream,
 					  unsigned int ofs, unsigned int size);
 
+#else /* !SND_DMA_SGBUF */
+/*
+ * fake using a continuous buffer
+ */
+static inline dma_addr_t
+snd_pcm_sgbuf_get_addr(struct snd_pcm_substream *substream, unsigned int ofs)
+{
+	return substream->runtime->dma_addr + ofs;
+}
+
+static inline void *
+snd_pcm_sgbuf_get_ptr(struct snd_pcm_substream *substream, unsigned int ofs)
+{
+	return substream->runtime->dma_area + ofs;
+}
+
+#define snd_pcm_sgbuf_ops_page	NULL
+
+#define snd_pcm_sgbuf_get_chunk_size(subs, ofs, size)	(size)
+
+#endif /* SND_DMA_SGBUF */
+
 /* handle mmap counter - PCM mmap callback should handle this counter properly */
 static inline void snd_pcm_mmap_data_open(struct vm_area_struct *area)
 {
diff --git a/sound/core/Kconfig b/sound/core/Kconfig
index 6061fb5f4e1c..c15682a2f9db 100644
--- a/sound/core/Kconfig
+++ b/sound/core/Kconfig
@@ -206,4 +206,8 @@ config SND_PCM_XRUN_DEBUG
 config SND_VMASTER
 	bool
 
+config SND_DMA_SGBUF
+	def_bool y
+	depends on X86
+
 source "sound/core/seq/Kconfig"
diff --git a/sound/core/Makefile b/sound/core/Makefile
index 4229052e7b91..350a08d277f4 100644
--- a/sound/core/Makefile
+++ b/sound/core/Makefile
@@ -13,7 +13,7 @@ snd-pcm-objs := pcm.o pcm_native.o pcm_lib.o pcm_timer.o pcm_misc.o \
 		pcm_memory.o
 
 snd-page-alloc-y := memalloc.o
-snd-page-alloc-$(CONFIG_HAS_DMA) += sgbuf.o
+snd-page-alloc-$(CONFIG_SND_DMA_SGBUF) += sgbuf.o
 
 snd-rawmidi-objs  := rawmidi.o
 snd-timer-objs    := timer.o
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 1b3534d67686..9e92441f9b78 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -199,6 +199,8 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size,
 	case SNDRV_DMA_TYPE_DEV:
 		dmab->area = snd_malloc_dev_pages(device, size, &dmab->addr);
 		break;
+#endif
+#ifdef CONFIG_SND_DMA_SGBUF
 	case SNDRV_DMA_TYPE_DEV_SG:
 		snd_malloc_sgbuf_pages(device, size, dmab, NULL);
 		break;
@@ -269,6 +271,8 @@ void snd_dma_free_pages(struct snd_dma_buffer *dmab)
 	case SNDRV_DMA_TYPE_DEV:
 		snd_free_dev_pages(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr);
 		break;
+#endif
+#ifdef CONFIG_SND_DMA_SGBUF
 	case SNDRV_DMA_TYPE_DEV_SG:
 		snd_free_sgbuf_pages(dmab);
 		break;
diff --git a/sound/core/pcm_memory.c b/sound/core/pcm_memory.c
index a6d42808828c..caa7796bc2f5 100644
--- a/sound/core/pcm_memory.c
+++ b/sound/core/pcm_memory.c
@@ -304,6 +304,7 @@ int snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm,
 
 EXPORT_SYMBOL(snd_pcm_lib_preallocate_pages_for_all);
 
+#ifdef CONFIG_SND_DMA_SGBUF
 /**
  * snd_pcm_sgbuf_ops_page - get the page struct at the given offset
  * @substream: the pcm substream instance
@@ -349,6 +350,7 @@ unsigned int snd_pcm_sgbuf_get_chunk_size(struct snd_pcm_substream *substream,
 	return size;
 }
 EXPORT_SYMBOL(snd_pcm_sgbuf_get_chunk_size);
+#endif /* CONFIG_SND_DMA_SGBUF */
 
 /**
  * snd_pcm_lib_malloc_pages - allocate the DMA buffer
-- 
cgit v1.2.3


From e4f7c0b44a8ac8935f223195af9ea637d0c08091 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 7 Jul 2009 10:32:59 +0100
Subject: kmemleak: Trace the kmalloc_large* functions in slub

The kmalloc_large() and kmalloc_large_node() functions were missed when
adding the kmemleak hooks to the slub allocator. However, they should be
traced to avoid false positives.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |  2 ++
 mm/slub.c                | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4dcbc2c71491..c1c862b1d01a 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -11,6 +11,7 @@
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
 #include <linux/kmemtrace.h>
+#include <linux/kmemleak.h>
 
 enum stat_item {
 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
@@ -233,6 +234,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 	unsigned int order = get_order(size);
 	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
 
+	kmemleak_alloc(ret, size, 1, flags);
 	trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags);
 
 	return ret;
diff --git a/mm/slub.c b/mm/slub.c
index a9201d83178b..b9f1491a58a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -21,7 +21,6 @@
 #include <linux/kmemcheck.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
-#include <linux/kmemleak.h>
 #include <linux/mempolicy.h>
 #include <linux/ctype.h>
 #include <linux/debugobjects.h>
@@ -2835,13 +2834,15 @@ EXPORT_SYMBOL(__kmalloc);
 static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
 {
 	struct page *page;
+	void *ptr = NULL;
 
 	flags |= __GFP_COMP | __GFP_NOTRACK;
 	page = alloc_pages_node(node, flags, get_order(size));
 	if (page)
-		return page_address(page);
-	else
-		return NULL;
+		ptr = page_address(page);
+
+	kmemleak_alloc(ptr, size, 1, flags);
+	return ptr;
 }
 
 #ifdef CONFIG_NUMA
@@ -2926,6 +2927,7 @@ void kfree(const void *x)
 	page = virt_to_head_page(x);
 	if (unlikely(!PageSlab(page))) {
 		BUG_ON(!PageCompound(page));
+		kmemleak_free(x);
 		put_page(page);
 		return;
 	}
-- 
cgit v1.2.3


From 53238a60dd4a679f6fe5613a7ed46899587205cf Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Tue, 7 Jul 2009 10:33:00 +0100
Subject: kmemleak: Allow partial freeing of memory blocks

Functions like free_bootmem() are allowed to free only part of a memory
block. This patch adds support for this via the kmemleak_free_part()
callback which removes the original object and creates one or two
additional objects as a result of the memory block split.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/kmemleak.h |  4 ++
 mm/kmemleak.c            | 95 +++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 85 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 7796aed6cdd5..6a63807f714e 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -27,6 +27,7 @@ extern void kmemleak_init(void);
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
 			   gfp_t gfp);
 extern void kmemleak_free(const void *ptr);
+extern void kmemleak_free_part(const void *ptr, size_t size);
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
 			     size_t size);
 extern void kmemleak_not_leak(const void *ptr);
@@ -71,6 +72,9 @@ static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 static inline void kmemleak_free(const void *ptr)
 {
 }
+static inline void kmemleak_free_part(const void *ptr, size_t size)
+{
+}
 static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags)
 {
 }
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 466d39007264..5aabd41ffb8f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -210,6 +210,7 @@ static DEFINE_MUTEX(scan_mutex);
 enum {
 	KMEMLEAK_ALLOC,
 	KMEMLEAK_FREE,
+	KMEMLEAK_FREE_PART,
 	KMEMLEAK_NOT_LEAK,
 	KMEMLEAK_IGNORE,
 	KMEMLEAK_SCAN_AREA,
@@ -523,27 +524,17 @@ out:
  * Remove the metadata (struct kmemleak_object) for a memory block from the
  * object_list and object_tree_root and decrement its use_count.
  */
-static void delete_object(unsigned long ptr)
+static void __delete_object(struct kmemleak_object *object)
 {
 	unsigned long flags;
-	struct kmemleak_object *object;
 
 	write_lock_irqsave(&kmemleak_lock, flags);
-	object = lookup_object(ptr, 0);
-	if (!object) {
-#ifdef DEBUG
-		kmemleak_warn("Freeing unknown object at 0x%08lx\n",
-			      ptr);
-#endif
-		write_unlock_irqrestore(&kmemleak_lock, flags);
-		return;
-	}
 	prio_tree_remove(&object_tree_root, &object->tree_node);
 	list_del_rcu(&object->object_list);
 	write_unlock_irqrestore(&kmemleak_lock, flags);
 
 	WARN_ON(!(object->flags & OBJECT_ALLOCATED));
-	WARN_ON(atomic_read(&object->use_count) < 1);
+	WARN_ON(atomic_read(&object->use_count) < 2);
 
 	/*
 	 * Locking here also ensures that the corresponding memory block
@@ -555,6 +546,64 @@ static void delete_object(unsigned long ptr)
 	put_object(object);
 }
 
+/*
+ * Look up the metadata (struct kmemleak_object) corresponding to ptr and
+ * delete it.
+ */
+static void delete_object_full(unsigned long ptr)
+{
+	struct kmemleak_object *object;
+
+	object = find_and_get_object(ptr, 0);
+	if (!object) {
+#ifdef DEBUG
+		kmemleak_warn("Freeing unknown object at 0x%08lx\n",
+			      ptr);
+#endif
+		return;
+	}
+	__delete_object(object);
+	put_object(object);
+}
+
+/*
+ * Look up the metadata (struct kmemleak_object) corresponding to ptr and
+ * delete it. If the memory block is partially freed, the function may create
+ * additional metadata for the remaining parts of the block.
+ */
+static void delete_object_part(unsigned long ptr, size_t size)
+{
+	struct kmemleak_object *object;
+	unsigned long start, end;
+
+	object = find_and_get_object(ptr, 1);
+	if (!object) {
+#ifdef DEBUG
+		kmemleak_warn("Partially freeing unknown object at 0x%08lx "
+			      "(size %zu)\n", ptr, size);
+#endif
+		return;
+	}
+	__delete_object(object);
+
+	/*
+	 * Create one or two objects that may result from the memory block
+	 * split. Note that partial freeing is only done by free_bootmem() and
+	 * this happens before kmemleak_init() is called. The path below is
+	 * only executed during early log recording in kmemleak_init(), so
+	 * GFP_KERNEL is enough.
+	 */
+	start = object->pointer;
+	end = object->pointer + object->size;
+	if (ptr > start)
+		create_object(start, ptr - start, object->min_count,
+			      GFP_KERNEL);
+	if (ptr + size < end)
+		create_object(ptr + size, end - ptr - size, object->min_count,
+			      GFP_KERNEL);
+
+	put_object(object);
+}
 /*
  * Make a object permanently as gray-colored so that it can no longer be
  * reported as a leak. This is used in general to mark a false positive.
@@ -719,12 +768,27 @@ void kmemleak_free(const void *ptr)
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
 	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
-		delete_object((unsigned long)ptr);
+		delete_object_full((unsigned long)ptr);
 	else if (atomic_read(&kmemleak_early_log))
 		log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0);
 }
 EXPORT_SYMBOL_GPL(kmemleak_free);
 
+/*
+ * Partial memory freeing function callback. This function is usually called
+ * from bootmem allocator when (part of) a memory block is freed.
+ */
+void kmemleak_free_part(const void *ptr, size_t size)
+{
+	pr_debug("%s(0x%p)\n", __func__, ptr);
+
+	if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr))
+		delete_object_part((unsigned long)ptr, size);
+	else if (atomic_read(&kmemleak_early_log))
+		log_early(KMEMLEAK_FREE_PART, ptr, size, 0, 0, 0);
+}
+EXPORT_SYMBOL_GPL(kmemleak_free_part);
+
 /*
  * Mark an already allocated memory block as a false positive. This will cause
  * the block to no longer be reported as leak and always be scanned.
@@ -1318,7 +1382,7 @@ static int kmemleak_cleanup_thread(void *arg)
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(object, &object_list, object_list)
-		delete_object(object->pointer);
+		delete_object_full(object->pointer);
 	rcu_read_unlock();
 	mutex_unlock(&scan_mutex);
 
@@ -1413,6 +1477,9 @@ void __init kmemleak_init(void)
 		case KMEMLEAK_FREE:
 			kmemleak_free(log->ptr);
 			break;
+		case KMEMLEAK_FREE_PART:
+			kmemleak_free_part(log->ptr, log->size);
+			break;
 		case KMEMLEAK_NOT_LEAK:
 			kmemleak_not_leak(log->ptr);
 			break;
-- 
cgit v1.2.3


From cde6263fa954dfc03ebe169aa3f7f71176d7901b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 22:32:07 +0800
Subject: crypto: shash - Add crypto_shash_ctx_aligned

This patch adds crypto_shash_ctx_aligned which will be needed
by hmac after its conversion to shash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index fa5c9fb7ce5a..f1041140d3d9 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -123,5 +123,10 @@ static inline struct crypto_shash *crypto_spawn_shash(
 	return crypto_spawn_tfm2(&spawn->base);
 }
 
+static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm)
+{
+	return crypto_tfm_ctx_aligned(&tfm->base);
+}
+
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
 
-- 
cgit v1.2.3


From 0390e6aecf571ddac934e6c0644bb4038167b698 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 8 Jul 2009 22:36:36 +0800
Subject: crypto: shash - Add __crypto_shash_cast

This patch adds __crypto_shash_cast which turns a crypto_tfm
into crypto_shash.  It's analogous to the other __crypto_*_cast
functions.

It hasn't been needed until now since no existing shash algorithms
have had an init function.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index f1041140d3d9..3af34ca8e8bc 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -128,5 +128,10 @@ static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm)
 	return crypto_tfm_ctx_aligned(&tfm->base);
 }
 
+static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
+{
+	return (struct crypto_shash *)tfm;
+}
+
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
 
-- 
cgit v1.2.3


From 37d217f029a56a6d385f99773fb27dfcb51f9a46 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 8 Jul 2009 18:17:58 +0200
Subject: fuse: document protocol version negotiation

Clarify how the protocol version should be negotiated between kernel
and userspace.  Notably libfuse didn't correctly handle the case when
the supported major versions didn't match.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 include/linux/fuse.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include')

diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index b3700f0ac268..3e2925a34bf0 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -41,6 +41,26 @@
 
 #include <linux/types.h>
 
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
+
 /** Version number of this interface */
 #define FUSE_KERNEL_VERSION 7
 
-- 
cgit v1.2.3


From e9bf0cc7cbfbf3952cdf8028aa0d348d09ecdba1 Mon Sep 17 00:00:00 2001
From: Parag Warudkar <parag.lkml@gmail.com>
Date: Wed, 8 Jul 2009 11:46:02 -0400
Subject: elfcore.h : Fix UML build breakage

Commit a65e7bfcd74e4c0939f235d2bf9f48ddb3a57991 broke the UML build with
the following error -

  In file included from fs/proc/kcore.c:17:
  include/linux/elfcore.h: In function 'elf_core_copy_task_regs':
  include/linux/elfcore.h:129: error: implicit declaration of function 'task_pt_regs'

Fix this by restoring the previous behavior of returning 0 for all arches
like UML that don't define task_pt_regs.

Signed-off-by: Parag Warudkar <parag.lkml@gmail.com>
Acked-by: Amerigo Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/elfcore.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 03ec16779802..00d6a68d0421 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -122,10 +122,9 @@ static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_r
 
 static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
 {
-#ifdef ELF_CORE_COPY_TASK_REGS
-	
+#if defined (ELF_CORE_COPY_TASK_REGS)
 	return ELF_CORE_COPY_TASK_REGS(t, elfregs);
-#else
+#elif defined (task_pt_regs)
 	elf_core_copy_regs(elfregs, task_pt_regs(t));
 #endif
 	return 0;
-- 
cgit v1.2.3


From b43f3cbd21ffbd719fd4fa6642bfe6af255ded34 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 8 Jul 2009 01:54:37 +0400
Subject: headers: mnt_namespace.h redux

Fix various silly problems wrt mnt_namespace.h:

 - exit_mnt_ns() isn't used, remove it
 - done that, sched.h and nsproxy.h inclusions aren't needed
 - mount.h inclusion was need for vfsmount_lock, but no longer
 - remove mnt_namespace.h inclusion from files which don't use anything
   from mnt_namespace.h

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/mntpt.c                |  1 -
 fs/namespace.c                |  1 +
 fs/nfs/getroot.c              |  1 -
 fs/reiserfs/super.c           |  1 -
 include/linux/mnt_namespace.h | 13 ++-----------
 kernel/exit.c                 |  1 -
 kernel/fork.c                 |  1 -
 kernel/kmod.c                 |  1 -
 8 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index c52be53f6946..5ffb570cd3a8 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -17,7 +17,6 @@
 #include <linux/pagemap.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include "internal.h"
 
 
diff --git a/fs/namespace.c b/fs/namespace.c
index 3dc283fd4716..277c28a63ead 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -22,6 +22,7 @@
 #include <linux/seq_file.h>
 #include <linux/mnt_namespace.h>
 #include <linux/namei.h>
+#include <linux/nsproxy.h>
 #include <linux/security.h>
 #include <linux/mount.h>
 #include <linux/ramfs.h>
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 46177cb87064..b35d2a616066 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -30,7 +30,6 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/namei.h>
-#include <linux/mnt_namespace.h>
 #include <linux/security.h>
 
 #include <asm/system.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d3aeb061612b..7adea74d6a8a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -24,7 +24,6 @@
 #include <linux/exportfs.h>
 #include <linux/quotaops.h>
 #include <linux/vfs.h>
-#include <linux/mnt_namespace.h>
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
index 3beb2592b03f..d74785c2393a 100644
--- a/include/linux/mnt_namespace.h
+++ b/include/linux/mnt_namespace.h
@@ -2,10 +2,9 @@
 #define _NAMESPACE_H_
 #ifdef __KERNEL__
 
-#include <linux/mount.h>
-#include <linux/sched.h>
-#include <linux/nsproxy.h>
+#include <linux/path.h>
 #include <linux/seq_file.h>
+#include <linux/wait.h>
 
 struct mnt_namespace {
 	atomic_t		count;
@@ -28,14 +27,6 @@ extern struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt);
 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
 		struct fs_struct *);
 extern void put_mnt_ns(struct mnt_namespace *ns);
-
-static inline void exit_mnt_ns(struct task_struct *p)
-{
-	struct mnt_namespace *ns = p->nsproxy->mnt_ns;
-	if (ns)
-		put_mnt_ns(ns);
-}
-
 static inline void get_mnt_ns(struct mnt_namespace *ns)
 {
 	atomic_inc(&ns->count);
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd54..869dc221733e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
 #include <linux/completion.h>
 #include <linux/personality.h>
 #include <linux/tty.h>
-#include <linux/mnt_namespace.h>
 #include <linux/iocontext.h>
 #include <linux/key.h>
 #include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b3f0aa..bd2959228871 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/completion.h>
-#include <linux/mnt_namespace.h>
 #include <linux/personality.h>
 #include <linux/mempolicy.h>
 #include <linux/sem.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bfc..385c31a1bdbf 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
 #include <linux/unistd.h>
 #include <linux/kmod.h>
 #include <linux/slab.h>
-#include <linux/mnt_namespace.h>
 #include <linux/completion.h>
 #include <linux/file.h>
 #include <linux/fdtable.h>
-- 
cgit v1.2.3


From 1ce822fa04fd6878f079461a4b8affe4bb5ec27b Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Wed, 8 Jul 2009 21:25:54 +0530
Subject: includecheck fix: include/linux, rfkill.h

fix the following 'make includecheck' warning:

  include/linux/rfkill.h: linux/types.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index e73e2429a1b1..2ce29831feb6 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -99,7 +99,6 @@ enum rfkill_user_states {
 #undef RFKILL_STATE_UNBLOCKED
 #undef RFKILL_STATE_HARD_BLOCKED
 
-#include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
-- 
cgit v1.2.3


From 023bf6f1b8bf58dc4da7f0dc1cf4787b0d5297c1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 9 Jul 2009 11:27:40 +0900
Subject: linker script: unify usage of discard definition

Discarded sections in different archs share some commonality but have
considerable differences.  This led to linker script for each arch
implementing its own /DISCARD/ definition, which makes maintaining
tedious and adding new entries error-prone.

This patch makes all linker scripts to move discard definitions to the
end of the linker script and use the common DISCARDS macro.  As ld
uses the first matching section definition, archs can include default
discarded sections by including them earlier in the linker script.

ia64 is notable because it first throws away some ia64 specific
subsections and then include the rest of the sections into the final
image, so those sections must be discarded before the inclusion.

defconfig compile tested for x86, x86-64, powerpc, powerpc64, ia64,
alpha, sparc, sparc64 and s390.  Michal Simek tested microblaze.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Tested-by: Michal Simek <monstr@monstr.eu>
Cc: linux-arch@vger.kernel.org
Cc: Michal Simek <monstr@monstr.eu>
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Tony Luck <tony.luck@intel.com>
---
 arch/alpha/kernel/vmlinux.lds.S      | 10 ++--------
 arch/avr32/kernel/vmlinux.lds.S      | 10 +++-------
 arch/blackfin/kernel/vmlinux.lds.S   |  6 +-----
 arch/cris/kernel/vmlinux.lds.S       | 10 ++--------
 arch/frv/kernel/vmlinux.lds.S        |  2 +-
 arch/h8300/kernel/vmlinux.lds.S      |  6 ++----
 arch/ia64/kernel/vmlinux.lds.S       | 17 ++++++++---------
 arch/m32r/kernel/vmlinux.lds.S       | 11 +++--------
 arch/m68k/kernel/vmlinux-std.lds     | 11 +++--------
 arch/m68k/kernel/vmlinux-sun3.lds    | 10 ++--------
 arch/m68knommu/kernel/vmlinux.lds.S  |  8 +-------
 arch/microblaze/kernel/vmlinux.lds.S |  2 +-
 arch/mips/kernel/vmlinux.lds.S       | 22 ++++++++++------------
 arch/mn10300/kernel/vmlinux.lds.S    |  9 +++------
 arch/parisc/kernel/vmlinux.lds.S     |  9 ++++-----
 arch/powerpc/kernel/vmlinux.lds.S    | 10 +++-------
 arch/s390/kernel/vmlinux.lds.S       | 10 +++-------
 arch/sh/kernel/vmlinux.lds.S         | 11 ++++-------
 arch/sparc/kernel/vmlinux.lds.S      |  9 ++-------
 arch/um/include/asm/common.lds.S     |  5 -----
 arch/um/kernel/dyn.lds.S             |  2 +-
 arch/um/kernel/uml.lds.S             |  2 +-
 arch/x86/kernel/vmlinux.lds.S        | 11 ++++-------
 arch/xtensa/kernel/vmlinux.lds.S     | 14 ++++----------
 include/asm-generic/vmlinux.lds.h    | 18 ++++++++++++------
 25 files changed, 80 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 75fe1d6877e9..6dc03c35caa0 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -134,14 +134,6 @@ SECTIONS
 	__bss_stop = .;
 	_end = .;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	.mdebug 0 : {
 		*(.mdebug)
 	}
@@ -151,4 +143,6 @@ SECTIONS
 
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	DISCARDS
 }
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index b8324608ec0c..c4b56654349a 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -124,15 +124,11 @@ SECTIONS
 		_end = .;
 	}
 
+	DWARF_DEBUG
+
 	/* When something in the kernel is NOT compiled as a module, the module
 	 * cleanup code and data are put into these segments. Both can then be
 	 * thrown away, as cleanup code is never called unless it's a module.
 	 */
-	/DISCARD/       	: {
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
-	DWARF_DEBUG
+	DISCARDS
 }
diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S
index 6e8eabd8f0a6..d7ffe299b979 100644
--- a/arch/blackfin/kernel/vmlinux.lds.S
+++ b/arch/blackfin/kernel/vmlinux.lds.S
@@ -277,9 +277,5 @@ SECTIONS
 
 	DWARF_DEBUG
 
-	/DISCARD/ :
-	{
-		*(.exitcall.exit)
-		*(.discard)
-	}
+	DISCARDS
 }
diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S
index a3175ebb38cc..6c81836b9229 100644
--- a/arch/cris/kernel/vmlinux.lds.S
+++ b/arch/cris/kernel/vmlinux.lds.S
@@ -140,13 +140,7 @@ SECTIONS
 	_end = .;
 	__end = .;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-        }
-
 	dram_end = dram_start + (CONFIG_ETRAX_DRAM_SIZE - __CONFIG_ETRAX_VMEM_SIZE)*1024*1024;
+
+	DISCARDS
 }
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index 64b5a5e4d35e..7dbf41f68b52 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -178,7 +178,7 @@ SECTIONS
 
   .comment 0 : { *(.comment) }
 
-  /DISCARD/ : { *(.discard) }
+  DISCARDS
 }
 
 __kernel_image_size_no_bss = __bss_start - __kernel_image_start;
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 03d6c0df33db..662b02ecb86e 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -152,10 +152,6 @@ SECTIONS
 	__end = . ;
 	__ramstart = .;
 	}
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-	}
         .romfs :	
 	{
 		*(.romfs*)
@@ -166,4 +162,6 @@ SECTIONS
 	COMMAND_START = . - 0x200 ;
 	__ramend = . ;
 	}
+
+	DISCARDS
 }
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 13d958975874..eb4214d1c5af 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -24,15 +24,14 @@ PHDRS {
 }
 SECTIONS
 {
-  /* Sections to be discarded */
+  /* unwind exit sections must be discarded before the rest of the
+     sections get included. */
   /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
 	*(.IA_64.unwind.exit.text)
 	*(.IA_64.unwind_info.exit.text)
-	}
+	*(.comment)
+	*(.note)
+  }
 
   v = PAGE_OFFSET;	/* this symbol is here to make debugging easier... */
   phys_start = _start - LOAD_OFFSET;
@@ -317,7 +316,7 @@ SECTIONS
   .debug_funcnames 0 : { *(.debug_funcnames) }
   .debug_typenames 0 : { *(.debug_typenames) }
   .debug_varnames  0 : { *(.debug_varnames) }
-  /* These must appear regardless of  .  */
-  /DISCARD/ : { *(.comment) }
-  /DISCARD/ : { *(.note) }
+
+  /* Default discards */
+  DISCARDS
 }
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 480a49944cfd..de5e21cca6a5 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -120,14 +120,6 @@ SECTIONS
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   /* Stabs debugging sections.  */
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
@@ -136,4 +128,7 @@ SECTIONS
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 905a797ada93..47eac19e8f61 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -82,14 +82,6 @@ SECTIONS
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   /* Stabs debugging sections.  */
   .stab 0 : { *(.stab) }
   .stabstr 0 : { *(.stabstr) }
@@ -98,4 +90,7 @@ SECTIONS
   .stab.index 0 : { *(.stab.index) }
   .stab.indexstr 0 : { *(.stab.indexstr) }
   .comment 0 : { *(.comment) }
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 47d04be322aa..03efaf04d7d7 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -77,14 +77,6 @@ __init_begin = .;
 
   _end = . ;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_TEXT
-	EXIT_DATA
-	*(.exitcall.exit)
-	*(.discard)
-	}
-
   .crap : {
 	/* Stabs debugging sections.  */
 	*(.stab)
@@ -97,4 +89,6 @@ __init_begin = .;
 	*(.note)
   }
 
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index 68111a61a77f..2736a5e309c0 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -184,13 +184,6 @@ SECTIONS {
 		__init_end = .;
 	} > INIT
 
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	.bss : {
 		. = ALIGN(4);
 		_sbss = . ;
@@ -201,5 +194,6 @@ SECTIONS {
 	 	_end = . ;
 	} > BSS
 
+	DISCARDS
 }
 
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index 81bebdcb18fe..ec5fa91a48d8 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -163,5 +163,5 @@ SECTIONS {
 	. = ALIGN(4096);
 	_end = .;
 
-	/DISCARD/ : { *(.discard) }
+	DISCARDS
 }
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 45901609b741..1474c18fb777 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -176,18 +176,6 @@ SECTIONS
 
 	_end = . ;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-
-		/* ABI crap starts here */
-		*(.MIPS.options)
-		*(.options)
-		*(.pdr)
-		*(.reginfo)
-	}
-
 	/* These mark the ABI of the kernel for debuggers.  */
 	.mdebug.abi32 : {
 		KEEP(*(.mdebug.abi32))
@@ -213,4 +201,14 @@ SECTIONS
 		*(.gptab.bss)
 		*(.gptab.sbss)
 	}
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : {
+		/* ABI crap starts here */
+		*(.MIPS.options)
+		*(.options)
+		*(.pdr)
+		*(.reginfo)
+	}
 }
diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index 5609d4962a55..8fcd0f1e21de 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -115,13 +115,10 @@ SECTIONS
   . = ALIGN(PAGE_SIZE);
   pg0 = .;
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
-	EXIT_CALL
-	*(.discard)
-	}
-
   STABS_DEBUG
 
   DWARF_DEBUG
+
+  /* Sections to be discarded */
+  DISCARDS
 }
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index ccf58341845a..aea1784edbd1 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -237,10 +237,12 @@ SECTIONS
 	/* freed after init ends here */
 	_end = . ;
 
+	STABS_DEBUG
+	.note 0 : { *(.note) }
+
 	/* Sections to be discarded */
+	DISCARDS
 	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
 #ifdef CONFIG_64BIT
 		/* temporary hack until binutils is fixed to not emit these
 	 	 * for static binaries
@@ -253,7 +255,4 @@ SECTIONS
 		*(.gnu.hash)
 #endif
 	}
-
-	STABS_DEBUG
-	.note 0 : { *(.note) }	
 }
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 7fca9355fd3d..244e3658983c 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -37,13 +37,6 @@ jiffies = jiffies_64 + 4;
 #endif
 SECTIONS
 {
-	/* Sections to be discarded. */
-	/DISCARD/ : {
-	*(.exitcall.exit)
-	*(.discard)
-	EXIT_DATA
-	}
-
 	. = KERNELBASE;
 
 /*
@@ -299,4 +292,7 @@ SECTIONS
 	. = ALIGN(PAGE_SIZE);
 	_end = . ;
 	PROVIDE32 (end = .);
+
+	/* Sections to be discarded. */
+	DISCARDS
 }
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 98867dfea469..82415c75b996 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -157,14 +157,10 @@ SECTIONS
 
 	_end = . ;
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	/* Debugging sections.	*/
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
 }
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 766976d27b21..0ce254bca92f 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -163,17 +163,14 @@ SECTIONS
 		_end = . ;
 	}
 
+	STABS_DEBUG
+	DWARF_DEBUG
+
 	/*
 	 * When something in the kernel is NOT compiled as a module, the
 	 * module cleanup code and data are put into these segments. Both
 	 * can then be thrown away, as cleanup code is never called unless
 	 * it's a module.
 	 */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
-	STABS_DEBUG
-	DWARF_DEBUG
+	DISCARDS
 }
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index d63cf914667d..866390feb683 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -171,13 +171,8 @@ SECTIONS
 	}
 	_end = . ;
 
-	/DISCARD/ : {
-		EXIT_TEXT
-		EXIT_DATA
-		*(.exitcall.exit)
-		*(.discard)
-	}
-
 	STABS_DEBUG
 	DWARF_DEBUG
+
+	DISCARDS
 }
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index cb0248616d49..37ecc5577a9a 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -123,8 +123,3 @@
 	__initramfs_end = .;
   }
 
-  /* Sections to be discarded */
-  /DISCARD/ : {
- 	*(.exitcall.exit)
-  }
-
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index 2916d6eadffd..715a188c0472 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -157,5 +157,5 @@ SECTIONS
 
   DWARF_DEBUG
 
-  /DISCARD/	: { *(.discard) }
+  DISCARDS
 }
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index 1f8a622cabe1..2ebd39765db8 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -101,5 +101,5 @@ SECTIONS
 
   DWARF_DEBUG
 
-  /DISCARD/	: { *(.discard) }
+  DISCARDS
 }
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 367e87882041..b600c843710b 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -387,15 +387,12 @@ SECTIONS
 		_end = .;
 	}
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.eh_frame)
-		*(.discard)
-	}
-
         STABS_DEBUG
         DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : { *(.eh_frame) }
 }
 
 
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index b1e24638acd7..921b6ff3b645 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -280,16 +280,6 @@ SECTIONS
     *(.ResetVector.text)
   }
 
-  /* Sections to be discarded */
-  /DISCARD/ :
-  {
-	*(.exit.literal)
-	EXIT_TEXT
-	EXIT_DATA
-        *(.exitcall.exit)
-	*(.discard)
-  }
-
   .xt.lit : { *(.xt.lit) }
   .xt.prop : { *(.xt.prop) }
 
@@ -322,4 +312,8 @@ SECTIONS
     *(.xt.lit)
     *(.gnu.linkonce.p*)
   }
+
+  /* Sections to be discarded */
+  DISCARDS
+  /DISCARD/ : { *(.exit.literal) }
 }
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c5c18ac878ab..ab8ea9b7741e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -35,13 +35,10 @@
  *	__bss_stop = .;
  *	_end = .;
  *
- *	/DISCARD/ : {
- *		EXIT_TEXT
- *		EXIT_DATA
- *		EXIT_CALL
- *	}
  *	STABS_DEBUG
  *	DWARF_DEBUG
+ *
+ *	DISCARDS		// must be the last
  * }
  *
  * [__init_begin, __init_end] is the init section that may be freed after init
@@ -629,11 +626,20 @@
 #define INIT_RAM_FS
 #endif
 
+/*
+ * Default discarded sections.
+ *
+ * Some archs want to discard exit text/data at runtime rather than
+ * link time due to cross-section references such as alt instructions,
+ * bug table, eh_frame, etc.  DISCARDS must be the last of output
+ * section definitions so that such archs put those in earlier section
+ * definitions.
+ */
 #define DISCARDS							\
 	/DISCARD/ : {							\
 	EXIT_TEXT							\
 	EXIT_DATA							\
-	*(.exitcall.exit)						\
+	EXIT_CALL							\
 	*(.discard)							\
 	}
 
-- 
cgit v1.2.3


From ef5d590c9b9a00b017683006fe4cf959b9532336 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Jul 2009 11:32:55 +0800
Subject: crypto: shash - Add shash_instance_ctx

This patch adds the helper shash_instance_ctx which is the shash
analogue of crypto_instance_ctx.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 3af34ca8e8bc..069b93e1a8ec 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -110,6 +110,11 @@ static inline struct shash_instance *shash_instance(
 			    struct shash_instance, alg);
 }
 
+static inline void *shash_instance_ctx(struct shash_instance *inst)
+{
+	return crypto_instance_ctx(shash_crypto_instance(inst));
+}
+
 static inline struct shash_instance *shash_alloc_instance(
 	const char *name, struct crypto_alg *alg)
 {
-- 
cgit v1.2.3


From ed5215a21460f63d6bdc118cb55a9e6d1b433f35 Mon Sep 17 00:00:00 2001
From: Thomas Liu <tliu@redhat.com>
Date: Thu, 9 Jul 2009 10:00:29 -0400
Subject: Move variable function in lsm_audit.h into SMACK private space

Moved variable function in include/linux/lsm_audit.h into the
smack_audit_data struct since it is never used outside of it.

Also removed setting of function in the COMMON_AUDIT_DATA_INIT
macro because that variable is now private to SMACK.

Signed-off-by: Thomas Liu <tliu@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
I-dont-see-any-problems-with-it: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h     | 4 ++--
 security/smack/smack.h        | 2 +-
 security/smack/smack_access.c | 7 ++++---
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index e461b2c3d711..68f7bce572b0 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -66,11 +66,11 @@ struct common_audit_data {
 		} key_struct;
 #endif
 	} u;
-	const char *function;
 	/* this union contains LSM specific data */
 	union {
 		/* SMACK data */
 		struct smack_audit_data {
+			const char *function;
 			char *subject;
 			char *object;
 			char *request;
@@ -104,7 +104,7 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
 /* Initialize an LSM audit data structure. */
 #define COMMON_AUDIT_DATA_INIT(_d, _t) \
 	{ memset((_d), 0, sizeof(struct common_audit_data)); \
-	 (_d)->type = LSM_AUDIT_DATA_##_t; (_d)->function = __func__; }
+	 (_d)->type = LSM_AUDIT_DATA_##_t; }
 
 void common_lsm_audit(struct common_audit_data *a);
 
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 243bec175be0..ff180ede3e47 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
 {
 	memset(a, 0, sizeof(*a));
 	a->a.type = type;
-	a->a.function = func;
+	a->a.lsm_priv.smack_audit_data.function = func;
 }
 
 static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index 513dc1aa16dd..dd84877dff30 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -241,7 +241,8 @@ static void smack_log_callback(struct audit_buffer *ab, void *a)
 {
 	struct common_audit_data *ad = a;
 	struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
-	audit_log_format(ab, "lsm=SMACK fn=%s action=%s", ad->function,
+	audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
+			 ad->lsm_priv.smack_audit_data.function,
 			 sad->result ? "denied" : "granted");
 	audit_log_format(ab, " subject=");
 	audit_log_untrustedstring(ab, sad->subject);
@@ -274,8 +275,8 @@ void smack_log(char *subject_label, char *object_label, int request,
 	if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
 		return;
 
-	if (a->function == NULL)
-		a->function = "unknown";
+	if (a->lsm_priv.smack_audit_data.function == NULL)
+		a->lsm_priv.smack_audit_data.function = "unknown";
 
 	/* end preparing the audit data */
 	sad = &a->lsm_priv.smack_audit_data;
-- 
cgit v1.2.3


From d4131ded4d4c1a5c1363ddd93ca104ed97dd0458 Mon Sep 17 00:00:00 2001
From: Thomas Liu <tliu@redhat.com>
Date: Thu, 9 Jul 2009 10:00:30 -0400
Subject: security: Make lsm_priv union in lsm_audit.h anonymous

Made the lsm_priv union in include/linux/lsm_audit.h
anonymous.

Signed-off-by: Thomas Liu <tliu@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h     |  2 +-
 security/smack/smack.h        |  2 +-
 security/smack/smack_access.c | 10 +++++-----
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 68f7bce572b0..40d1b84f2a3c 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -86,7 +86,7 @@ struct common_audit_data {
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
-	} lsm_priv;
+	};
 	/* these callback will be implemented by a specific LSM */
 	void (*lsm_pre_audit)(struct audit_buffer *, void *);
 	void (*lsm_post_audit)(struct audit_buffer *, void *);
diff --git a/security/smack/smack.h b/security/smack/smack.h
index ff180ede3e47..c6e9acae72e4 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -275,7 +275,7 @@ static inline void smk_ad_init(struct smk_audit_info *a, const char *func,
 {
 	memset(a, 0, sizeof(*a));
 	a->a.type = type;
-	a->a.lsm_priv.smack_audit_data.function = func;
+	a->a.smack_audit_data.function = func;
 }
 
 static inline void smk_ad_setfield_u_tsk(struct smk_audit_info *a,
diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c
index dd84877dff30..0f9ac8146900 100644
--- a/security/smack/smack_access.c
+++ b/security/smack/smack_access.c
@@ -240,9 +240,9 @@ static inline void smack_str_from_perm(char *string, int access)
 static void smack_log_callback(struct audit_buffer *ab, void *a)
 {
 	struct common_audit_data *ad = a;
-	struct smack_audit_data *sad = &ad->lsm_priv.smack_audit_data;
+	struct smack_audit_data *sad = &ad->smack_audit_data;
 	audit_log_format(ab, "lsm=SMACK fn=%s action=%s",
-			 ad->lsm_priv.smack_audit_data.function,
+			 ad->smack_audit_data.function,
 			 sad->result ? "denied" : "granted");
 	audit_log_format(ab, " subject=");
 	audit_log_untrustedstring(ab, sad->subject);
@@ -275,11 +275,11 @@ void smack_log(char *subject_label, char *object_label, int request,
 	if (result == 0 && (log_policy & SMACK_AUDIT_ACCEPT) == 0)
 		return;
 
-	if (a->lsm_priv.smack_audit_data.function == NULL)
-		a->lsm_priv.smack_audit_data.function = "unknown";
+	if (a->smack_audit_data.function == NULL)
+		a->smack_audit_data.function = "unknown";
 
 	/* end preparing the audit data */
-	sad = &a->lsm_priv.smack_audit_data;
+	sad = &a->smack_audit_data;
 	smack_str_from_perm(request_buffer, request);
 	sad->subject = subject_label;
 	sad->object  = object_label;
-- 
cgit v1.2.3


From 65c3f0a2d0f72d210c879e4974c2d222b7951321 Mon Sep 17 00:00:00 2001
From: Thomas Liu <tliu@redhat.com>
Date: Thu, 9 Jul 2009 10:00:31 -0400
Subject: security: Wrap SMACK and SELINUX audit data structs in ifdefs

Wrapped the smack_audit_data and selinux_audit_data
structs in include/linux/lsm_audit.h in ifdefs so that the
union will always be the correct size.

Signed-off-by: Thomas Liu <tliu@redhat.com>
Acked-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index 40d1b84f2a3c..a5514a3a4f17 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -68,6 +68,7 @@ struct common_audit_data {
 	} u;
 	/* this union contains LSM specific data */
 	union {
+#ifdef CONFIG_SECURITY_SMACK
 		/* SMACK data */
 		struct smack_audit_data {
 			const char *function;
@@ -76,6 +77,8 @@ struct common_audit_data {
 			char *request;
 			int result;
 		} smack_audit_data;
+#endif
+#ifdef CONFIG_SECURITY_SELINUX
 		/* SELinux data */
 		struct {
 			u32 ssid;
@@ -86,6 +89,7 @@ struct common_audit_data {
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
+#endif
 	};
 	/* these callback will be implemented by a specific LSM */
 	void (*lsm_pre_audit)(struct audit_buffer *, void *);
-- 
cgit v1.2.3


From a57de0b4336e48db2811a2030bb68dba8dd09d88 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 8 Jul 2009 12:09:13 +0000
Subject: net: adding memory barrier to the poll and receive callbacks

Adding memory barrier after the poll_wait function, paired with
receive callbacks. Adding fuctions sock_poll_wait and sk_has_sleeper
to wrap the memory barrier.

Without the memory barrier, following race can happen.
The race fires, when following code paths meet, and the tp->rcv_nxt
and __add_wait_queue updates stay in CPU caches.

CPU1                         CPU2

sys_select                   receive packet
  ...                        ...
  __add_wait_queue           update tp->rcv_nxt
  ...                        ...
  tp->rcv_nxt check          sock_def_readable
  ...                        {
  schedule                      ...
                                if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
                                        wake_up_interruptible(sk->sk_sleep)
                                ...
                             }

If there was no cache the code would work ok, since the wait_queue and
rcv_nxt are opposit to each other.

Meaning that once tp->rcv_nxt is updated by CPU2, the CPU1 either already
passed the tp->rcv_nxt check and sleeps, or will get the new value for
tp->rcv_nxt and will return with new data mask.
In both cases the process (CPU1) is being added to the wait queue, so the
waitqueue_active (CPU2) call cannot miss and will wake up CPU1.

The bad case is when the __add_wait_queue changes done by CPU1 stay in its
cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1 will then
endup calling schedule and sleep forever if there are no more data on the
socket.

Calls to poll_wait in following modules were ommited:
	net/bluetooth/af_bluetooth.c
	net/irda/af_irda.c
	net/irda/irnet/irnet_ppp.c
	net/mac80211/rc80211_pid_debugfs.c
	net/phonet/socket.c
	net/rds/af_rds.c
	net/rfkill/core.c
	net/sunrpc/cache.c
	net/sunrpc/rpc_pipe.c
	net/tipc/socket.c

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h   | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/atm/common.c     |  6 ++---
 net/core/datagram.c  |  2 +-
 net/core/sock.c      |  8 +++----
 net/dccp/output.c    |  2 +-
 net/dccp/proto.c     |  2 +-
 net/ipv4/tcp.c       |  2 +-
 net/iucv/af_iucv.c   |  4 ++--
 net/rxrpc/af_rxrpc.c |  4 ++--
 net/unix/af_unix.c   |  8 +++----
 10 files changed, 85 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 352f06bbd7a9..4eb8409249f6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,6 +54,7 @@
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
+#include <linux/poll.h>
 
 #include <asm/atomic.h>
 #include <net/dst.h>
@@ -1241,6 +1242,71 @@ static inline int sk_has_allocations(const struct sock *sk)
 	return sk_wmem_alloc_get(sk) || sk_rmem_alloc_get(sk);
 }
 
+/**
+ * sk_has_sleeper - check if there are any waiting processes
+ * @sk: socket
+ *
+ * Returns true if socket has waiting processes
+ *
+ * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
+ * barrier call. They were added due to the race found within the tcp code.
+ *
+ * Consider following tcp code paths:
+ *
+ * CPU1                  CPU2
+ *
+ * sys_select            receive packet
+ *   ...                 ...
+ *   __add_wait_queue    update tp->rcv_nxt
+ *   ...                 ...
+ *   tp->rcv_nxt check   sock_def_readable
+ *   ...                 {
+ *   schedule               ...
+ *                          if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+ *                              wake_up_interruptible(sk->sk_sleep)
+ *                          ...
+ *                       }
+ *
+ * The race for tcp fires when the __add_wait_queue changes done by CPU1 stay
+ * in its cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1
+ * could then endup calling schedule and sleep forever if there are no more
+ * data on the socket.
+ */
+static inline int sk_has_sleeper(struct sock *sk)
+{
+	/*
+	 * We need to be sure we are in sync with the
+	 * add_wait_queue modifications to the wait queue.
+	 *
+	 * This memory barrier is paired in the sock_poll_wait.
+	 */
+	smp_mb();
+	return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
+}
+
+/**
+ * sock_poll_wait - place memory barrier behind the poll_wait call.
+ * @filp:           file
+ * @wait_address:   socket wait queue
+ * @p:              poll_table
+ *
+ * See the comments in the sk_has_sleeper function.
+ */
+static inline void sock_poll_wait(struct file *filp,
+		wait_queue_head_t *wait_address, poll_table *p)
+{
+	if (p && wait_address) {
+		poll_wait(filp, wait_address, p);
+		/*
+		 * We need to be sure we are in sync with the
+		 * socket flags modification.
+		 *
+		 * This memory barrier is paired in the sk_has_sleeper.
+		*/
+		smp_mb();
+	}
+}
+
 /*
  * 	Queue a received datagram if it will fit. Stream and sequenced
  *	protocols can't normally use this as they need to fit buffers in
diff --git a/net/atm/common.c b/net/atm/common.c
index c1c97936192c..8c4d843eb17f 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -92,7 +92,7 @@ static void vcc_sock_destruct(struct sock *sk)
 static void vcc_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up(sk->sk_sleep);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 
 	if (vcc_writable(sk)) {
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		if (sk_has_sleeper(sk))
 			wake_up_interruptible(sk->sk_sleep);
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -594,7 +594,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct atm_vcc *vcc;
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
 
 	vcc = ATM_SD(sock);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 58abee1f1df1..b0fe69211eef 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -712,7 +712,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/core/sock.c b/net/core/sock.c
index b0ba569bc973..6354863b1c68 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1715,7 +1715,7 @@ EXPORT_SYMBOL(sock_no_sendpage);
 static void sock_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up_interruptible_all(sk->sk_sleep);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -1723,7 +1723,7 @@ static void sock_def_wakeup(struct sock *sk)
 static void sock_def_error_report(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
 	read_unlock(&sk->sk_callback_lock);
@@ -1732,7 +1732,7 @@ static void sock_def_error_report(struct sock *sk)
 static void sock_def_readable(struct sock *sk, int len)
 {
 	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
@@ -1747,7 +1747,7 @@ static void sock_def_write_space(struct sock *sk)
 	 * progress.  --DaveM
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		if (sk_has_sleeper(sk))
 			wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index c0e88c16d088..c96119fda688 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -196,7 +196,7 @@ void dccp_write_space(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up_interruptible(sk->sk_sleep);
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 314a1b5c033c..94ca8eaace7d 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -311,7 +311,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7870a535dac6..91145244ea63 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -339,7 +339,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	if (sk->sk_state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 6be5f92d1094..49c15b48408e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -306,7 +306,7 @@ static inline int iucv_below_msglim(struct sock *sk)
 static void iucv_sock_wake_msglim(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+	if (sk_has_sleeper(sk))
 		wake_up_interruptible_all(sk->sk_sleep);
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	read_unlock(&sk->sk_callback_lock);
@@ -1256,7 +1256,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask = 0;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index eac5e7bb7365..bfe493ebf27c 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -63,7 +63,7 @@ static void rxrpc_write_space(struct sock *sk)
 	_enter("%p", sk);
 	read_lock(&sk->sk_callback_lock);
 	if (rxrpc_writable(sk)) {
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		if (sk_has_sleeper(sk))
 			wake_up_interruptible(sk->sk_sleep);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
@@ -588,7 +588,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 36d4e44d6233..fc3ebb906911 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -315,7 +315,7 @@ static void unix_write_space(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (unix_writable(sk)) {
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		if (sk_has_sleeper(sk))
 			wake_up_interruptible_sync(sk->sk_sleep);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
@@ -1985,7 +1985,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
 
 	/* exceptional events? */
@@ -2022,7 +2022,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk, *other;
 	unsigned int mask, writable;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk->sk_sleep, wait);
 	mask = 0;
 
 	/* exceptional events? */
@@ -2053,7 +2053,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 		other = unix_peer_get(sk);
 		if (other) {
 			if (unix_peer(other) != sk) {
-				poll_wait(file, &unix_sk(other)->peer_wait,
+				sock_poll_wait(file, &unix_sk(other)->peer_wait,
 					  wait);
 				if (unix_recvq_full(other))
 					writable = 0;
-- 
cgit v1.2.3


From ad46276952f1af34cd91d46d49ba13d347d56367 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Wed, 8 Jul 2009 12:10:31 +0000
Subject: memory barrier: adding smp_mb__after_lock

Adding smp_mb__after_lock define to be used as a smp_mb call after
a lock.

Making it nop for x86, since {read|write|spin}_lock() on x86 are
full memory barriers.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/include/asm/spinlock.h | 4 ++++
 include/linux/spinlock.h        | 5 +++++
 include/net/sock.h              | 5 ++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index b7e5db876399..4e77853321db 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -302,4 +302,8 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw)
 #define _raw_read_relax(lock)	cpu_relax()
 #define _raw_write_relax(lock)	cpu_relax()
 
+/* The {read|write|spin}_lock() on x86 are full memory barriers. */
+static inline void smp_mb__after_lock(void) { }
+#define ARCH_HAS_SMP_MB_AFTER_LOCK
+
 #endif /* _ASM_X86_SPINLOCK_H */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 252b245cfcf4..4be57ab03478 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -132,6 +132,11 @@ do {								\
 #endif /*__raw_spin_is_contended*/
 #endif
 
+/* The lock does not imply full memory barrier. */
+#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK
+static inline void smp_mb__after_lock(void) { smp_mb(); }
+#endif
+
 /**
  * spin_unlock_wait - wait until the spinlock gets unlocked
  * @lock: the spinlock in question.
diff --git a/include/net/sock.h b/include/net/sock.h
index 4eb8409249f6..2c0da9239b95 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1271,6 +1271,9 @@ static inline int sk_has_allocations(const struct sock *sk)
  * in its cache, and so does the tp->rcv_nxt update on CPU2 side.  The CPU1
  * could then endup calling schedule and sleep forever if there are no more
  * data on the socket.
+ *
+ * The sk_has_sleeper is always called right after a call to read_lock, so we
+ * can use smp_mb__after_lock barrier.
  */
 static inline int sk_has_sleeper(struct sock *sk)
 {
@@ -1280,7 +1283,7 @@ static inline int sk_has_sleeper(struct sock *sk)
 	 *
 	 * This memory barrier is paired in the sock_poll_wait.
 	 */
-	smp_mb();
+	smp_mb__after_lock();
 	return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
 }
 
-- 
cgit v1.2.3


From 6ff7041dbfeb3bd7dfe9aa67275c21199ef760d6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 10 Jul 2009 14:57:05 +0200
Subject: hrtimer: Fix migration expiry check

The timer migration expiry check should prevent the migration of a
timer to another CPU when the timer expires before the next event is
scheduled on the other CPU. Migrating the timer might delay it because
we can not reprogram the clock event device on the other CPU. But the
code implementing that check has two flaws:

- for !HIGHRES the check compares the expiry value with the clock
  events device expiry value which is wrong for CLOCK_REALTIME based
  timers.

- the check is racy. It holds the hrtimer base lock of the target CPU,
  but the clock event device expiry value can be modified
  nevertheless, e.g. by an timer interrupt firing.

The !HIGHRES case is easy to fix as we can enqueue the timer on the
cpu which was selected by the load balancer. It runs the idle
balancing code once per jiffy anyway. So the maximum delay for the
timer is the same as when we keep the tick on the current cpu going.

In the HIGHRES case we can get the next expiry value from the hrtimer
cpu_base of the target CPU and serialize the update with the cpu_base
lock. This moves the lock section in hrtimer_interrupt() so we can set
next_event to KTIME_MAX while we are handling the expired timers and
set it to the next expiry value after we handled the timers under the
base lock. While the expired timers are processed timer migration is
blocked because the expiry time of the timer is always <= KTIME_MAX.

Also remove the now useless clockevents_get_next_event() function.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h |   9 ----
 kernel/hrtimer.c           | 121 ++++++++++++++++++++++++---------------------
 kernel/time/clockevents.c  |  11 -----
 3 files changed, 64 insertions(+), 77 deletions(-)

(limited to 'include')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 20a100fe2b4f..3a1dbba4d3ae 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -143,12 +143,3 @@ extern void clockevents_notify(unsigned long reason, void *arg);
 #endif
 
 #endif
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
-extern ktime_t clockevents_get_next_event(int cpu);
-#else
-static inline ktime_t clockevents_get_next_event(int cpu)
-{
-	return (ktime_t) { .tv64 = KTIME_MAX };
-}
-#endif
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 126b9808f287..49da79ab8486 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -191,6 +191,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
 	}
 }
 
+
+/*
+ * Get the preferred target CPU for NOHZ
+ */
+static int hrtimer_get_target(int this_cpu, int pinned)
+{
+#ifdef CONFIG_NO_HZ
+	if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) {
+		int preferred_cpu = get_nohz_load_balancer();
+
+		if (preferred_cpu >= 0)
+			return preferred_cpu;
+	}
+#endif
+	return this_cpu;
+}
+
+/*
+ * With HIGHRES=y we do not migrate the timer when it is expiring
+ * before the next event on the target cpu because we cannot reprogram
+ * the target cpu hardware and we would cause it to fire late.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+static int
+hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
+{
+#ifdef CONFIG_HIGH_RES_TIMERS
+	ktime_t expires;
+
+	if (!new_base->cpu_base->hres_active)
+		return 0;
+
+	expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+	return expires.tv64 <= new_base->cpu_base->expires_next.tv64;
+#else
+	return 0;
+#endif
+}
+
 /*
  * Switch the timer base to the current CPU when possible.
  */
@@ -200,27 +240,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
 {
 	struct hrtimer_clock_base *new_base;
 	struct hrtimer_cpu_base *new_cpu_base;
-	int cpu, preferred_cpu = -1;
-
-	cpu = smp_processor_id();
-#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
-	if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
-		preferred_cpu = get_nohz_load_balancer();
-		if (preferred_cpu >= 0) {
-			/*
-			 * We must not check the expiry value when
-			 * preferred_cpu is the current cpu. If base
-			 * != new_base we would loop forever when the
-			 * timer expires before the current programmed
-			 * next timer event.
-			 */
-			if (preferred_cpu != cpu)
-				cpu = preferred_cpu;
-			else
-				preferred_cpu = -1;
-		}
-	}
-#endif
+	int this_cpu = smp_processor_id();
+	int cpu = hrtimer_get_target(this_cpu, pinned);
 
 again:
 	new_cpu_base = &per_cpu(hrtimer_bases, cpu);
@@ -228,7 +249,7 @@ again:
 
 	if (base != new_base) {
 		/*
-		 * We are trying to schedule the timer on the local CPU.
+		 * We are trying to move timer to new_base.
 		 * However we can't change timer's base while it is running,
 		 * so we keep it on the same CPU. No hassle vs. reprogramming
 		 * the event source in the high resolution case. The softirq
@@ -244,38 +265,12 @@ again:
 		spin_unlock(&base->cpu_base->lock);
 		spin_lock(&new_base->cpu_base->lock);
 
-		/* Optimized away for NOHZ=n SMP=n */
-		if (cpu == preferred_cpu) {
-			/* Calculate clock monotonic expiry time */
-#ifdef CONFIG_HIGH_RES_TIMERS
-			ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
-							new_base->offset);
-#else
-			ktime_t expires = hrtimer_get_expires(timer);
-#endif
-
-			/*
-			 * Get the next event on target cpu from the
-			 * clock events layer.
-			 * This covers the highres=off nohz=on case as well.
-			 */
-			ktime_t next = clockevents_get_next_event(cpu);
-
-			ktime_t delta = ktime_sub(expires, next);
-
-			/*
-			 * We do not migrate the timer when it is expiring
-			 * before the next event on the target cpu because
-			 * we cannot reprogram the target cpu hardware and
-			 * we would cause it to fire late.
-			 */
-			if (delta.tv64 < 0) {
-				cpu = smp_processor_id();
-				spin_unlock(&new_base->cpu_base->lock);
-				spin_lock(&base->cpu_base->lock);
-				timer->base = base;
-				goto again;
-			}
+		if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) {
+			cpu = this_cpu;
+			spin_unlock(&new_base->cpu_base->lock);
+			spin_lock(&base->cpu_base->lock);
+			timer->base = base;
+			goto again;
 		}
 		timer->base = new_base;
 	}
@@ -1287,14 +1282,22 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
 	expires_next.tv64 = KTIME_MAX;
 
+	spin_lock(&cpu_base->lock);
+	/*
+	 * We set expires_next to KTIME_MAX here with cpu_base->lock
+	 * held to prevent that a timer is enqueued in our queue via
+	 * the migration code. This does not affect enqueueing of
+	 * timers which run their callback and need to be requeued on
+	 * this CPU.
+	 */
+	cpu_base->expires_next.tv64 = KTIME_MAX;
+
 	base = cpu_base->clock_base;
 
 	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
 		ktime_t basenow;
 		struct rb_node *node;
 
-		spin_lock(&cpu_base->lock);
-
 		basenow = ktime_add(now, base->offset);
 
 		while ((node = base->first)) {
@@ -1327,11 +1330,15 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 
 			__run_hrtimer(timer);
 		}
-		spin_unlock(&cpu_base->lock);
 		base++;
 	}
 
+	/*
+	 * Store the new expiry value so the migration code can verify
+	 * against it.
+	 */
 	cpu_base->expires_next = expires_next;
+	spin_unlock(&cpu_base->lock);
 
 	/* Reprogramming necessary ? */
 	if (expires_next.tv64 != KTIME_MAX) {
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 1ad6dd461119..a6dcd67b041d 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -254,15 +254,4 @@ void clockevents_notify(unsigned long reason, void *arg)
 	spin_unlock(&clockevents_lock);
 }
 EXPORT_SYMBOL_GPL(clockevents_notify);
-
-ktime_t clockevents_get_next_event(int cpu)
-{
-	struct tick_device *td;
-	struct clock_event_device *dev;
-
-	td = &per_cpu(tick_cpu_device, cpu);
-	dev = td->evtdev;
-
-	return dev->next_event;
-}
 #endif
-- 
cgit v1.2.3


From f9f868dbcca961ed62f1df1d114abd0c38c47dce Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Thu, 9 Jul 2009 11:24:26 +0200
Subject: timer stats: fix quick check optimization

git commit 507e1231 "timer stats: Optimize by adding quick check to
avoid function calls" added one wrong check so that one unnecessary
function call isn't elimated.

time_stats_account_hrtimer() checks if timer->start_pid isn't
initialized in order to find out if timer_stats_update_stats() should
be called.  However start_pid is initialized with -1 instead of 0, so
that the function call always happens.

Check timer->start_site like in timer_stats_account_timer() to fix
this.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 54648e625efd..4759917adc71 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -448,7 +448,7 @@ extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
 
 static inline void timer_stats_account_hrtimer(struct hrtimer *timer)
 {
-	if (likely(!timer->start_pid))
+	if (likely(!timer->start_site))
 		return;
 	timer_stats_update_stats(timer, timer->start_pid, timer->start_site,
 				 timer->function, timer->start_comm, 0);
-- 
cgit v1.2.3


From 8aa7e847d834ed937a9ad37a0f2ad5b8584c1ab0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 9 Jul 2009 14:52:32 +0200
Subject: Fix congestion_wait() sync/async vs read/write confusion

Commit 1faa16d22877f4839bd433547d770c676d1d964c accidentally broke
the bdi congestion wait queue logic, causing us to wait on congestion
for WRITE (== 1) when we really wanted BLK_RW_ASYNC (== 0) instead.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 arch/x86/lib/usercopy_32.c  |  2 +-
 drivers/block/pktcdvd.c     | 10 ++++++----
 drivers/md/dm-crypt.c       |  2 +-
 fs/fat/file.c               |  2 +-
 fs/fuse/dev.c               |  8 ++++----
 fs/nfs/write.c              |  8 +++++---
 fs/reiserfs/journal.c       |  2 +-
 fs/xfs/linux-2.6/kmem.c     |  4 ++--
 fs/xfs/linux-2.6/xfs_buf.c  |  2 +-
 include/linux/backing-dev.h |  6 +++---
 include/linux/blkdev.h      |  8 ++++----
 mm/backing-dev.c            |  7 +++----
 mm/memcontrol.c             |  2 +-
 mm/page-writeback.c         |  8 ++++----
 mm/page_alloc.c             |  4 ++--
 mm/vmscan.c                 |  8 ++++----
 16 files changed, 43 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 7c8ca91bb9ec..1f118d462acc 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -751,7 +751,7 @@ survive:
 
 			if (retval == -ENOMEM && is_global_init(current)) {
 				up_read(&current->mm->mmap_sem);
-				congestion_wait(WRITE, HZ/50);
+				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto survive;
 			}
 
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 83650e00632d..99a506f619b7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -1372,8 +1372,10 @@ try_next_bio:
 	wakeup = (pd->write_congestion_on > 0
 	 		&& pd->bio_queue_size <= pd->write_congestion_off);
 	spin_unlock(&pd->lock);
-	if (wakeup)
-		clear_bdi_congested(&pd->disk->queue->backing_dev_info, WRITE);
+	if (wakeup) {
+		clear_bdi_congested(&pd->disk->queue->backing_dev_info,
+					BLK_RW_ASYNC);
+	}
 
 	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
 	pkt_set_state(pkt, PACKET_WAITING_STATE);
@@ -2592,10 +2594,10 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio)
 	spin_lock(&pd->lock);
 	if (pd->write_congestion_on > 0
 	    && pd->bio_queue_size >= pd->write_congestion_on) {
-		set_bdi_congested(&q->backing_dev_info, WRITE);
+		set_bdi_congested(&q->backing_dev_info, BLK_RW_ASYNC);
 		do {
 			spin_unlock(&pd->lock);
-			congestion_wait(WRITE, HZ);
+			congestion_wait(BLK_RW_ASYNC, HZ);
 			spin_lock(&pd->lock);
 		} while(pd->bio_queue_size > pd->write_congestion_off);
 	}
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9933eb861c71..529e2ba505c3 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
 		 * But don't wait if split was due to the io size restriction
 		 */
 		if (unlikely(out_of_pages))
-			congestion_wait(WRITE, HZ/100);
+			congestion_wait(BLK_RW_ASYNC, HZ/100);
 
 		/*
 		 * With async crypto it is unsafe to share the crypto context
diff --git a/fs/fat/file.c b/fs/fat/file.c
index b28ea646ff60..f042b965c95c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -134,7 +134,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	return 0;
 }
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f58ecbc416c8..6484eb75acd6 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -286,8 +286,8 @@ __releases(&fc->lock)
 		}
 		if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 		    fc->connected && fc->bdi_initialized) {
-			clear_bdi_congested(&fc->bdi, READ);
-			clear_bdi_congested(&fc->bdi, WRITE);
+			clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+			clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 		}
 		fc->num_background--;
 		fc->active_background--;
@@ -414,8 +414,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
 		fc->blocked = 1;
 	if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
 	    fc->bdi_initialized) {
-		set_bdi_congested(&fc->bdi, READ);
-		set_bdi_congested(&fc->bdi, WRITE);
+		set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
+		set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
 	}
 	list_add_tail(&req->list, &fc->bg_queue);
 	flush_bg_queue(fc);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce728829f79a..0a0a2ff767c3 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -202,8 +202,10 @@ static int nfs_set_page_writeback(struct page *page)
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
 		if (atomic_long_inc_return(&nfss->writeback) >
-				NFS_CONGESTION_ON_THRESH)
-			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+				NFS_CONGESTION_ON_THRESH) {
+			set_bdi_congested(&nfss->backing_dev_info,
+						BLK_RW_ASYNC);
+		}
 	}
 	return ret;
 }
@@ -215,7 +217,7 @@ static void nfs_end_page_writeback(struct page *page)
 
 	end_page_writeback(page);
 	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
-		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
 }
 
 /*
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 77f5bb746bf0..90622200b39c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -997,7 +997,7 @@ static int reiserfs_async_progress_wait(struct super_block *s)
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		congestion_wait(WRITE, HZ / 10);
+		congestion_wait(BLK_RW_ASYNC, HZ / 10);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 1cd3b55ee3d2..2d3f90afe5f1 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -53,7 +53,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
@@ -130,7 +130,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__func__, lflags);
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (1);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 1418b916fc27..0c93c7ef3d18 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -412,7 +412,7 @@ _xfs_buf_lookup_pages(
 
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 			goto retry;
 		}
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0ec2c594868e..3a52a63c1351 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -229,9 +229,9 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 				  (1 << BDI_async_congested));
 }
 
-void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
-void set_bdi_congested(struct backing_dev_info *bdi, int rw);
-long congestion_wait(int rw, long timeout);
+void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
+void set_bdi_congested(struct backing_dev_info *bdi, int sync);
+long congestion_wait(int sync, long timeout);
 
 
 static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49ae07951d55..bb3d39978701 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -779,18 +779,18 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
-static inline void blk_clear_queue_congested(struct request_queue *q, int rw)
+static inline void blk_clear_queue_congested(struct request_queue *q, int sync)
 {
-	clear_bdi_congested(&q->backing_dev_info, rw);
+	clear_bdi_congested(&q->backing_dev_info, sync);
 }
 
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
-static inline void blk_set_queue_congested(struct request_queue *q, int rw)
+static inline void blk_set_queue_congested(struct request_queue *q, int sync)
 {
-	set_bdi_congested(&q->backing_dev_info, rw);
+	set_bdi_congested(&q->backing_dev_info, sync);
 }
 
 extern void blk_start_queue(struct request_queue *q);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 493b468a5035..c86edd244294 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -283,7 +283,6 @@ static wait_queue_head_t congestion_wqh[2] = {
 		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
 	};
 
-
 void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
 {
 	enum bdi_state bit;
@@ -308,18 +307,18 @@ EXPORT_SYMBOL(set_bdi_congested);
 
 /**
  * congestion_wait - wait for a backing_dev to become uncongested
- * @rw: READ or WRITE
+ * @sync: SYNC or ASYNC IO
  * @timeout: timeout in jiffies
  *
  * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
  * write congestion.  If no backing_devs are congested then just wait for the
  * next write to be completed.
  */
-long congestion_wait(int rw, long timeout)
+long congestion_wait(int sync, long timeout)
 {
 	long ret;
 	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = &congestion_wqh[rw];
+	wait_queue_head_t *wqh = &congestion_wqh[sync];
 
 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
 	ret = io_schedule_timeout(timeout);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2fa20dadf40..e717964cb5a0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1973,7 +1973,7 @@ try_to_free:
 		if (!progress) {
 			nr_retries--;
 			/* maybe some writeback is necessary */
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 		}
 
 	}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7687879253b9..81627ebcd313 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -575,7 +575,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
 
-		congestion_wait(WRITE, HZ/10);
+		congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 
 	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -669,7 +669,7 @@ void throttle_vm_writeout(gfp_t gfp_mask)
                 if (global_page_state(NR_UNSTABLE_NFS) +
 			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                         	break;
-                congestion_wait(WRITE, HZ/10);
+                congestion_wait(BLK_RW_ASYNC, HZ/10);
 
 		/*
 		 * The caller might hold locks which can prevent IO completion
@@ -715,7 +715,7 @@ static void background_writeout(unsigned long _min_pages)
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
 			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(WRITE, HZ/10);
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
 			else
 				break;
 		}
@@ -787,7 +787,7 @@ static void wb_kupdate(unsigned long arg)
 		writeback_inodes(&wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(WRITE, HZ/10);
+				congestion_wait(BLK_RW_ASYNC, HZ/10);
 			else
 				break;	/* All the old data is written */
 		}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ad7cd1c56b07..a35eeab2724c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1666,7 +1666,7 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
 			preferred_zone, migratetype);
 
 		if (!page && gfp_mask & __GFP_NOFAIL)
-			congestion_wait(WRITE, HZ/50);
+			congestion_wait(BLK_RW_ASYNC, HZ/50);
 	} while (!page && (gfp_mask & __GFP_NOFAIL));
 
 	return page;
@@ -1831,7 +1831,7 @@ rebalance:
 	pages_reclaimed += did_some_progress;
 	if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) {
 		/* Wait for some write requests to complete then retry */
-		congestion_wait(WRITE, HZ/50);
+		congestion_wait(BLK_RW_ASYNC, HZ/50);
 		goto rebalance;
 	}
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 54155268dfca..dea7abd31098 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1104,7 +1104,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		 */
 		if (nr_freed < nr_taken && !current_is_kswapd() &&
 		    lumpy_reclaim) {
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 
 			/*
 			 * The attempt at page out may have made some
@@ -1721,7 +1721,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 
 		/* Take a nap, wait for some writeback to complete */
 		if (sc->nr_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 	}
 	/* top priority shrink_zones still had more to do? don't OOM, then */
 	if (!sc->all_unreclaimable && scanning_global_lru(sc))
@@ -1960,7 +1960,7 @@ loop_again:
 		 * another pass across the zones.
 		 */
 		if (total_scanned && priority < DEF_PRIORITY - 2)
-			congestion_wait(WRITE, HZ/10);
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
 
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
@@ -2233,7 +2233,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 				goto out;
 
 			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
-				congestion_wait(WRITE, HZ / 10);
+				congestion_wait(BLK_RW_ASYNC, HZ / 10);
 		}
 	}
 
-- 
cgit v1.2.3


From ecb554a846f8e9d2a58f6d6c118168a63ac065aa Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 9 Jul 2009 14:46:53 +0200
Subject: block: fix sg SG_DXFER_TO_FROM_DEV regression

I overlooked SG_DXFER_TO_FROM_DEV support when I converted sg to use
the block layer mapping API (2.6.28).

Douglas Gilbert explained SG_DXFER_TO_FROM_DEV:

http://www.spinics.net/lists/linux-scsi/msg37135.html

=
The semantics of SG_DXFER_TO_FROM_DEV were:
   - copy user space buffer to kernel (LLD) buffer
   - do SCSI command which is assumed to be of the DATA_IN
     (data from device) variety. This would overwrite
     some or all of the kernel buffer
   - copy kernel (LLD) buffer back to the user space.

The idea was to detect short reads by filling the original
user space buffer with some marker bytes ("0xec" it would
seem in this report). The "resid" value is a better way
of detecting short reads but that was only added this century
and requires co-operation from the LLD.
=

This patch changes the block layer mapping API to support this
semantics. This simply adds another field to struct rq_map_data and
enables __bio_copy_iov() to copy data from user space even with READ
requests.

It's better to add the flags field and kills null_mapped and the new
from_user fields in struct rq_map_data but that approach makes it
difficult to send this patch to stable trees because st and osst
drivers use struct rq_map_data (they were converted to use the block
layer in 2.6.29 and 2.6.30). Well, I should clean up the block layer
mapping API.

zhou sf reported this regiression and tested this patch:

http://www.spinics.net/lists/linux-scsi/msg37128.html
http://www.spinics.net/lists/linux-scsi/msg37168.html

Reported-by: zhou sf <sxzzsf@gmail.com>
Tested-by: zhou sf <sxzzsf@gmail.com>
Cc: stable@kernel.org
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/scsi/sg.c      |  4 ++++
 fs/bio.c               | 22 ++++++++++++----------
 include/linux/blkdev.h |  1 +
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4d6f2fe1cfe9..9230402c45af 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1656,6 +1656,10 @@ static int sg_start_req(Sg_request *srp, unsigned char *cmd)
 		md->nr_entries = req_schp->k_use_sg;
 		md->offset = 0;
 		md->null_mapped = hp->dxferp ? 0 : 1;
+		if (dxfer_dir == SG_DXFER_TO_FROM_DEV)
+			md->from_user = 1;
+		else
+			md->from_user = 0;
 	}
 
 	if (iov_count) {
diff --git a/fs/bio.c b/fs/bio.c
index 1486b19fc431..76738005c8e8 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -705,14 +705,13 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
 }
 
 static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
-			  struct sg_iovec *iov, int iov_count, int uncopy,
-			  int do_free_page)
+			  struct sg_iovec *iov, int iov_count,
+			  int to_user, int from_user, int do_free_page)
 {
 	int ret = 0, i;
 	struct bio_vec *bvec;
 	int iov_idx = 0;
 	unsigned int iov_off = 0;
-	int read = bio_data_dir(bio) == READ;
 
 	__bio_for_each_segment(bvec, bio, i, 0) {
 		char *bv_addr = page_address(bvec->bv_page);
@@ -727,13 +726,14 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
 			iov_addr = iov[iov_idx].iov_base + iov_off;
 
 			if (!ret) {
-				if (!read && !uncopy)
-					ret = copy_from_user(bv_addr, iov_addr,
-							     bytes);
-				if (read && uncopy)
+				if (to_user)
 					ret = copy_to_user(iov_addr, bv_addr,
 							   bytes);
 
+				if (from_user)
+					ret = copy_from_user(bv_addr, iov_addr,
+							     bytes);
+
 				if (ret)
 					ret = -EFAULT;
 			}
@@ -770,7 +770,8 @@ int bio_uncopy_user(struct bio *bio)
 
 	if (!bio_flagged(bio, BIO_NULL_MAPPED))
 		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
-				     bmd->nr_sgvecs, 1, bmd->is_our_pages);
+				     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+				     0, bmd->is_our_pages);
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
@@ -875,8 +876,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	/*
 	 * success
 	 */
-	if (!write_to_vm && (!map_data || !map_data->null_mapped)) {
-		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0);
+	if ((!write_to_vm && (!map_data || !map_data->null_mapped)) ||
+	    (map_data && map_data->from_user)) {
+		ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 1, 0);
 		if (ret)
 			goto cleanup;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bb3d39978701..0146e0fecf1a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -723,6 +723,7 @@ struct rq_map_data {
 	int nr_entries;
 	unsigned long offset;
 	int null_mapped;
+	int from_user;
 };
 
 struct req_iterator {
-- 
cgit v1.2.3


From e36d56b64808aec54b68b4e9976180c1da0933b2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 9 Jun 2009 21:04:43 +0200
Subject: cfg80211: pass netdev to change_virtual_intf

If there was a reason I'm passing the ifidx I cannot
remember it any more and don't see one now, so let's
just pass the pointer itself.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c |  9 ++-------
 drivers/net/wireless/rndis_wlan.c            | 15 +++++----------
 include/net/cfg80211.h                       |  3 ++-
 net/mac80211/cfg.c                           |  9 ++-------
 net/wireless/nl80211.c                       | 12 +++++-------
 net/wireless/wext-compat.c                   |  2 +-
 6 files changed, 17 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index 96f714e6e12b..bc89b1ef0cad 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -167,20 +167,15 @@ int iwm_cfg80211_inform_bss(struct iwm_priv *iwm)
 	return 0;
 }
 
-static int iwm_cfg80211_change_iface(struct wiphy *wiphy, int ifindex,
+static int iwm_cfg80211_change_iface(struct wiphy *wiphy,
+				     struct net_device *ndev,
 				     enum nl80211_iftype type, u32 *flags,
 				     struct vif_params *params)
 {
-	struct net_device *ndev;
 	struct wireless_dev *wdev;
 	struct iwm_priv *iwm;
 	u32 old_mode;
 
-	/* we're under RTNL */
-	ndev = __dev_get_by_index(&init_net, ifindex);
-	if (!ndev)
-		return -ENODEV;
-
 	wdev = ndev->ieee80211_ptr;
 	iwm = ndev_to_iwm(ndev);
 	old_mode = iwm->conf.mode;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 3bec3dbd3450..e8b0793b030a 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -420,7 +420,8 @@ struct rndis_wlan_private {
 /*
  * cfg80211 ops
  */
-static int rndis_change_virtual_intf(struct wiphy *wiphy, int ifindex,
+static int rndis_change_virtual_intf(struct wiphy *wiphy,
+					struct net_device *dev,
 					enum nl80211_iftype type, u32 *flags,
 					struct vif_params *params);
 
@@ -1222,20 +1223,14 @@ static void set_multicast_list(struct usbnet *usbdev)
 /*
  * cfg80211 ops
  */
-static int rndis_change_virtual_intf(struct wiphy *wiphy, int ifindex,
+static int rndis_change_virtual_intf(struct wiphy *wiphy,
+					struct net_device *dev,
 					enum nl80211_iftype type, u32 *flags,
 					struct vif_params *params)
 {
-	struct net_device *dev;
-	struct usbnet *usbdev;
+	struct usbnet *usbdev = netdev_priv(dev);
 	int mode;
 
-	/* we're under RTNL */
-	dev = __dev_get_by_index(&init_net, ifindex);
-	if (!dev)
-		return -ENODEV;
-	usbdev = netdev_priv(dev);
-
 	switch (type) {
 	case NL80211_IFTYPE_ADHOC:
 		mode = NDIS_80211_INFRA_ADHOC;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1a21895b732b..90f9bfa3bfc2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -866,7 +866,8 @@ struct cfg80211_ops {
 				    enum nl80211_iftype type, u32 *flags,
 				    struct vif_params *params);
 	int	(*del_virtual_intf)(struct wiphy *wiphy, int ifindex);
-	int	(*change_virtual_intf)(struct wiphy *wiphy, int ifindex,
+	int	(*change_virtual_intf)(struct wiphy *wiphy,
+				       struct net_device *dev,
 				       enum nl80211_iftype type, u32 *flags,
 				       struct vif_params *params);
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 3f47276caeb8..eb93eb6a9cc7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -74,19 +74,14 @@ static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex)
 	return 0;
 }
 
-static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex,
+static int ieee80211_change_iface(struct wiphy *wiphy,
+				  struct net_device *dev,
 				  enum nl80211_iftype type, u32 *flags,
 				  struct vif_params *params)
 {
-	struct net_device *dev;
 	struct ieee80211_sub_if_data *sdata;
 	int ret;
 
-	/* we're under RTNL */
-	dev = __dev_get_by_index(&init_net, ifindex);
-	if (!dev)
-		return -ENODEV;
-
 	if (!nl80211_type_check(type))
 		return -EINVAL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 43bdb1372cae..f91e5d472c60 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -767,7 +767,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
 	struct vif_params params;
-	int err, ifindex;
+	int err;
 	enum nl80211_iftype otype, ntype;
 	struct net_device *dev;
 	u32 _flags, *flags = NULL;
@@ -781,9 +781,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	ifindex = dev->ifindex;
 	otype = ntype = dev->ieee80211_ptr->iftype;
-	dev_put(dev);
 
 	if (info->attrs[NL80211_ATTR_IFTYPE]) {
 		ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
@@ -826,20 +824,20 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (change)
-		err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex,
+		err = drv->ops->change_virtual_intf(&drv->wiphy, dev,
 						    ntype, flags, &params);
 	else
 		err = 0;
 
-	dev = __dev_get_by_index(&init_net, ifindex);
-	WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != ntype));
+	WARN_ON(!err && dev->ieee80211_ptr->iftype != ntype);
 
-	if (dev && !err && (ntype != otype)) {
+	if (!err && (ntype != otype)) {
 		if (otype == NL80211_IFTYPE_ADHOC)
 			cfg80211_clear_ibss(dev, false);
 	}
 
  unlock:
+	dev_put(dev);
 	cfg80211_put_dev(drv);
  unlock_rtnl:
 	rtnl_unlock();
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index d030c5315672..9e56f3569e3e 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -103,7 +103,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
 
 	memset(&vifparams, 0, sizeof(vifparams));
 
-	ret = rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type,
+	ret = rdev->ops->change_virtual_intf(wdev->wiphy, dev, type,
 					     NULL, &vifparams);
 	WARN_ON(!ret && wdev->iftype != type);
 
-- 
cgit v1.2.3


From a33e9e7f35ef6dcab528e0327f29188475f60691 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 16 Jun 2009 17:17:27 +0300
Subject: usbnet: Add stop function pointer to 'struct rndis_data'.

Allow minidriver to know that netdev has stopped. This is to let
wireless turn off radio when usbnet dev is stopped.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/usb/usbnet.c   | 14 ++++++++++++++
 include/linux/usb/usbnet.h |  3 +++
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index edfd9e10ceba..25e435c49040 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -575,7 +575,9 @@ EXPORT_SYMBOL_GPL(usbnet_unlink_rx_urbs);
 int usbnet_stop (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
+	struct driver_info	*info = dev->driver_info;
 	int			temp;
+	int			retval;
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK (unlink_wakeup);
 	DECLARE_WAITQUEUE (wait, current);
 
@@ -587,6 +589,18 @@ int usbnet_stop (struct net_device *net)
 			net->stats.rx_errors, net->stats.tx_errors
 			);
 
+	/* allow minidriver to stop correctly (wireless devices to turn off
+	 * radio etc) */
+	if (info->stop) {
+		retval = info->stop(dev);
+		if (retval < 0 && netif_msg_ifdown(dev))
+			devinfo(dev,
+				"stop fail (%d) usbnet usb-%s-%s, %s",
+				retval,
+				dev->udev->bus->bus_name, dev->udev->devpath,
+				info->description);
+	}
+
 	// ensure there are no more active urbs
 	add_wait_queue (&unlink_wakeup, &wait);
 	dev->wait = &unlink_wakeup;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 310e18a880ff..7c17b2efba86 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -97,6 +97,9 @@ struct driver_info {
 	/* reset device ... can sleep */
 	int	(*reset)(struct usbnet *);
 
+	/* stop device ... can sleep */
+	int	(*stop)(struct usbnet *);
+
 	/* see if peer is connected ... can sleep */
 	int	(*check_connect)(struct usbnet *);
 
-- 
cgit v1.2.3


From f1d58c2521eb160178b2151d6326d8dc5d7c8560 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 17 Jun 2009 13:13:00 +0200
Subject: mac80211: push rx status into skb->cb

Within mac80211, we often need to copy the rx status into
skb->cb. This is wasteful, as drivers could be building it
in there to start with. This patch changes the API so that
drivers are expected to pass the RX status in skb->cb, now
accessible as IEEE80211_SKB_RXCB(skb). It also updates all
drivers to pass the rx status in there, but only by making
them memcpy() it into place before the call to the receive
function (ieee80211_rx(_irqsafe)). Each driver can now be
optimised on its own schedule.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c             |  3 +-
 drivers/net/wireless/at76c50x-usb.c        |  3 +-
 drivers/net/wireless/ath/ar9170/main.c     |  6 ++-
 drivers/net/wireless/ath/ath5k/base.c      |  3 +-
 drivers/net/wireless/ath/ath9k/recv.c      | 13 ++++--
 drivers/net/wireless/b43/xmit.c            |  3 +-
 drivers/net/wireless/b43legacy/xmit.c      |  3 +-
 drivers/net/wireless/iwlwifi/iwl-3945.c    |  3 +-
 drivers/net/wireless/iwlwifi/iwl-rx.c      |  3 +-
 drivers/net/wireless/libertas_tf/main.c    |  3 +-
 drivers/net/wireless/mac80211_hwsim.c      |  3 +-
 drivers/net/wireless/mwl8k.c               |  3 +-
 drivers/net/wireless/p54/p54common.c       |  3 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c    |  3 +-
 drivers/net/wireless/rtl818x/rtl8180_dev.c |  3 +-
 drivers/net/wireless/rtl818x/rtl8187_dev.c |  3 +-
 drivers/net/wireless/wl12xx/wl1251_rx.c    |  3 +-
 drivers/net/wireless/zd1211rw/zd_mac.c     |  3 +-
 drivers/staging/agnx/xmit.c                |  3 +-
 drivers/staging/stlc45xx/stlc45xx.c        |  3 +-
 drivers/staging/winbond/wb35rx.c           |  3 +-
 include/net/mac80211.h                     | 29 ++++++------
 net/mac80211/ibss.c                        |  6 +--
 net/mac80211/ieee80211_i.h                 | 10 ++---
 net/mac80211/main.c                        |  5 +--
 net/mac80211/mesh.c                        |  6 +--
 net/mac80211/mesh.h                        |  3 +-
 net/mac80211/mlme.c                        |  4 +-
 net/mac80211/rx.c                          | 71 +++++++++++++-----------------
 net/mac80211/scan.c                        |  4 +-
 30 files changed, 108 insertions(+), 106 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 2b9e379994a1..ecc93834533f 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -452,7 +452,8 @@ static void adm8211_interrupt_rci(struct ieee80211_hw *dev)
 			rx_status.freq = adm8211_channels[priv->channel - 1].center_freq;
 			rx_status.band = IEEE80211_BAND_2GHZ;
 
-			ieee80211_rx_irqsafe(dev, skb, &rx_status);
+			memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
+			ieee80211_rx_irqsafe(dev, skb);
 		}
 
 		entry = (++priv->cur_rx) % priv->rx_ring_size;
diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 4efbdbe6d6bf..13303fa34734 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1568,7 +1568,8 @@ static void at76_rx_tasklet(unsigned long param)
 
 	at76_dbg(DBG_MAC80211, "calling ieee80211_rx_irqsafe(): %d/%d",
 		 priv->rx_skb->len, priv->rx_skb->data_len);
-	ieee80211_rx_irqsafe(priv->hw, priv->rx_skb, &rx_status);
+	memcpy(IEEE80211_SKB_RXCB(priv->rx_skb), &rx_status, sizeof(rx_status));
+	ieee80211_rx_irqsafe(priv->hw, priv->rx_skb);
 
 	/* Use a new skb for the next receive */
 	priv->rx_skb = NULL;
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 9d38cf60a0db..51753ed1b8ba 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -917,8 +917,10 @@ static void ar9170_handle_mpdu(struct ar9170 *ar, u8 *buf, int len)
 		ar9170_rx_phy_status(ar, phy, &status);
 
 	skb = ar9170_rx_copy_data(buf, mpdu_len);
-	if (likely(skb))
-		ieee80211_rx_irqsafe(ar->hw, skb, &status);
+	if (likely(skb)) {
+		memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+		ieee80211_rx_irqsafe(ar->hw, skb);
+	}
 }
 
 void ar9170_rx(struct ar9170 *ar, struct sk_buff *skb)
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index f26a68960622..c6e70919435c 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -1905,7 +1905,8 @@ accept:
 		if (sc->opmode == NL80211_IFTYPE_ADHOC)
 			ath5k_check_ibss_tsf(sc, skb, &rxs);
 
-		__ieee80211_rx(sc->hw, skb, &rxs);
+		memcpy(IEEE80211_SKB_RXCB(skb), &rxs, sizeof(rxs));
+		ieee80211_rx(sc->hw, skb);
 
 		bf->skb = next_skb;
 		bf->skbaddr = next_skb_addr;
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index cece1c4c6bda..c00b9051bb53 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -619,13 +619,18 @@ static void ath_rx_send_to_mac80211(struct ath_softc *sc, struct sk_buff *skb,
 			if (aphy == NULL)
 				continue;
 			nskb = skb_copy(skb, GFP_ATOMIC);
-			if (nskb)
-				__ieee80211_rx(aphy->hw, nskb, rx_status);
+			if (nskb) {
+				memcpy(IEEE80211_SKB_RXCB(nskb), rx_status,
+					sizeof(*rx_status));
+				ieee80211_rx(aphy->hw, nskb);
+			}
 		}
-		__ieee80211_rx(sc->hw, skb, rx_status);
+		memcpy(IEEE80211_SKB_RXCB(skb), rx_status, sizeof(*rx_status));
+		ieee80211_rx(sc->hw, skb);
 	} else {
 		/* Deliver unicast frames based on receiver address */
-		__ieee80211_rx(ath_get_virt_hw(sc, hdr), skb, rx_status);
+		memcpy(IEEE80211_SKB_RXCB(skb), rx_status, sizeof(*rx_status));
+		ieee80211_rx(ath_get_virt_hw(sc, hdr), skb);
 	}
 }
 
diff --git a/drivers/net/wireless/b43/xmit.c b/drivers/net/wireless/b43/xmit.c
index 55f36a7254d9..5b85e7d73592 100644
--- a/drivers/net/wireless/b43/xmit.c
+++ b/drivers/net/wireless/b43/xmit.c
@@ -670,7 +670,8 @@ void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 		goto drop;
 	}
 
-	ieee80211_rx_irqsafe(dev->wl->hw, skb, &status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+	ieee80211_rx_irqsafe(dev->wl->hw, skb);
 
 	return;
 drop:
diff --git a/drivers/net/wireless/b43legacy/xmit.c b/drivers/net/wireless/b43legacy/xmit.c
index b8e39dd06e99..f79cee82601b 100644
--- a/drivers/net/wireless/b43legacy/xmit.c
+++ b/drivers/net/wireless/b43legacy/xmit.c
@@ -591,7 +591,8 @@ void b43legacy_rx(struct b43legacy_wldev *dev,
 	}
 
 	dev->stats.last_rx = jiffies;
-	ieee80211_rx_irqsafe(dev->wl->hw, skb, &status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+	ieee80211_rx_irqsafe(dev->wl->hw, skb);
 
 	return;
 drop:
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c
index 46288e724889..777c09534cec 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945.c
@@ -577,7 +577,8 @@ static void iwl3945_pass_packet_to_mac80211(struct iwl_priv *priv,
 	if (ieee80211_is_data(hdr->frame_control))
 		priv->rxtxpackets += len;
 #endif
-	ieee80211_rx_irqsafe(priv->hw, rxb->skb, stats);
+	memcpy(IEEE80211_SKB_RXCB(rxb->skb), stats, sizeof(*stats));
+	ieee80211_rx_irqsafe(priv->hw, rxb->skb);
 	rxb->skb = NULL;
 }
 
diff --git a/drivers/net/wireless/iwlwifi/iwl-rx.c b/drivers/net/wireless/iwlwifi/iwl-rx.c
index 2b8d40b37a1c..2160795ed015 100644
--- a/drivers/net/wireless/iwlwifi/iwl-rx.c
+++ b/drivers/net/wireless/iwlwifi/iwl-rx.c
@@ -932,7 +932,8 @@ static void iwl_pass_packet_to_mac80211(struct iwl_priv *priv,
 		return;
 
 	iwl_update_rx_stats(priv, le16_to_cpu(hdr->frame_control), len);
-	ieee80211_rx_irqsafe(priv->hw, rxb->skb, stats);
+	memcpy(IEEE80211_SKB_RXCB(rxb->skb), stats, sizeof(*stats));
+	ieee80211_rx_irqsafe(priv->hw, rxb->skb);
 	priv->alloc_rxb_skb--;
 	rxb->skb = NULL;
 }
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 10a99e26d392..4872345a2f61 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -503,7 +503,8 @@ int lbtf_rx(struct lbtf_private *priv, struct sk_buff *skb)
 		skb_reserve(skb, 2);
 	}
 
-	ieee80211_rx_irqsafe(priv->hw, skb, &stats);
+	memcpy(IEEE80211_SKB_RXCB(skb), &stats, sizeof(stats));
+	ieee80211_rx_irqsafe(priv->hw, skb);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(lbtf_rx);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index c47ef48f31c5..b1e4baec29f4 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -430,7 +430,8 @@ static bool mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
 		if (memcmp(hdr->addr1, data2->hw->wiphy->perm_addr,
 			   ETH_ALEN) == 0)
 			ack = true;
-		ieee80211_rx_irqsafe(data2->hw, nskb, &rx_status);
+		memcpy(IEEE80211_SKB_RXCB(nskb), &rx_status, sizeof(rx_status));
+		ieee80211_rx_irqsafe(data2->hw, nskb);
 	}
 	spin_unlock(&hwsim_radio_lock);
 
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index a263d5c84c08..b9eded88c322 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1047,7 +1047,8 @@ static int rxq_process(struct ieee80211_hw *hw, int index, int limit)
 		status.flag = 0;
 		status.band = IEEE80211_BAND_2GHZ;
 		status.freq = ieee80211_channel_to_frequency(rx_desc->channel);
-		ieee80211_rx_irqsafe(hw, skb, &status);
+		memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+		ieee80211_rx_irqsafe(hw, skb);
 
 		processed++;
 	}
diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c
index 22ca122bd798..1b15f9e0b861 100644
--- a/drivers/net/wireless/p54/p54common.c
+++ b/drivers/net/wireless/p54/p54common.c
@@ -794,7 +794,8 @@ static int p54_rx_data(struct ieee80211_hw *dev, struct sk_buff *skb)
 	skb_pull(skb, header_len);
 	skb_trim(skb, le16_to_cpu(hdr->len));
 
-	ieee80211_rx_irqsafe(dev, skb, &rx_status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
+	ieee80211_rx_irqsafe(dev, skb);
 
 	queue_delayed_work(dev->workqueue, &priv->work,
 			   msecs_to_jiffies(P54_STATISTICS_UPDATE));
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 57813e72c808..41e33798adb5 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -449,7 +449,8 @@ void rt2x00lib_rxdone(struct rt2x00_dev *rt2x00dev,
 	 * mac80211 will clean up the skb structure.
 	 */
 	rt2x00debug_dump_frame(rt2x00dev, DUMP_FRAME_RXDONE, entry->skb);
-	ieee80211_rx_irqsafe(rt2x00dev->hw, entry->skb, rx_status);
+	memcpy(IEEE80211_SKB_RXCB(entry->skb), rx_status, sizeof(*rx_status));
+	ieee80211_rx_irqsafe(rt2x00dev->hw, entry->skb);
 
 	/*
 	 * Replace the skb with the freshly allocated one.
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index 7e65d7c31802..47521c5b6f91 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -143,7 +143,8 @@ static void rtl8180_handle_rx(struct ieee80211_hw *dev)
 			if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR)
 				rx_status.flag |= RX_FLAG_FAILED_FCS_CRC;
 
-			ieee80211_rx_irqsafe(dev, skb, &rx_status);
+			memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
+			ieee80211_rx_irqsafe(dev, skb);
 
 			skb = new_skb;
 			priv->rx_buf[priv->rx_idx] = skb;
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index 294250e294dd..c9b9dbe584c6 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -380,7 +380,8 @@ static void rtl8187_rx_cb(struct urb *urb)
 	rx_status.flag |= RX_FLAG_TSFT;
 	if (flags & RTL818X_RX_DESC_FLAG_CRC32_ERR)
 		rx_status.flag |= RX_FLAG_FAILED_FCS_CRC;
-	ieee80211_rx_irqsafe(dev, skb, &rx_status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
+	ieee80211_rx_irqsafe(dev, skb);
 
 	skb = dev_alloc_skb(RTL8187_MAX_RX);
 	if (unlikely(!skb)) {
diff --git a/drivers/net/wireless/wl12xx/wl1251_rx.c b/drivers/net/wireless/wl12xx/wl1251_rx.c
index 48fa39ea17ed..0dbb483a0973 100644
--- a/drivers/net/wireless/wl12xx/wl1251_rx.c
+++ b/drivers/net/wireless/wl12xx/wl1251_rx.c
@@ -151,7 +151,8 @@ static void wl1251_rx_body(struct wl1251 *wl,
 	wl1251_debug(DEBUG_RX, "rx skb 0x%p: %d B %s", skb, skb->len,
 		     beacon ? "beacon" : "");
 
-	ieee80211_rx(wl->hw, skb, &status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+	ieee80211_rx(wl->hw, skb);
 }
 
 static void wl1251_rx_ack(struct wl1251 *wl)
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 40b07b988224..9600b72495da 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -711,7 +711,8 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 
 	memcpy(skb_put(skb, length), buffer, length);
 
-	ieee80211_rx_irqsafe(hw, skb, &stats);
+	memcpy(IEEE80211_SKB_RXCB(skb), &stats, sizeof(stats));
+	ieee80211_rx_irqsafe(hw, skb);
 	return 0;
 }
 
diff --git a/drivers/staging/agnx/xmit.c b/drivers/staging/agnx/xmit.c
index 0e034081f3a5..42db41070cf0 100644
--- a/drivers/staging/agnx/xmit.c
+++ b/drivers/staging/agnx/xmit.c
@@ -384,7 +384,8 @@ void handle_rx_irq(struct agnx_priv *priv)
 /*			dump_ieee80211_hdr((struct ieee80211_hdr *)skb->data, "RX G"); */
 		} else
 			agnx_bug("Unknown packets type");
-		ieee80211_rx_irqsafe(priv->hw, skb, &status);
+		memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+		ieee80211_rx_irqsafe(priv->hw, skb);
 		rx_desc_reinit(priv, i);
 
 	} while (priv->rx.idx++);
diff --git a/drivers/staging/stlc45xx/stlc45xx.c b/drivers/staging/stlc45xx/stlc45xx.c
index cfdaac9b747e..52744faedbff 100644
--- a/drivers/staging/stlc45xx/stlc45xx.c
+++ b/drivers/staging/stlc45xx/stlc45xx.c
@@ -1429,7 +1429,8 @@ static int stlc45xx_rx_data(struct stlc45xx *stlc, struct sk_buff *skb)
 	stlc45xx_debug(DEBUG_RX, "rx data 0x%p %d B", skb->data, skb->len);
 	stlc45xx_dump(DEBUG_RX_CONTENT, skb->data, skb->len);
 
-	ieee80211_rx(stlc->hw, skb, &status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
+	ieee80211_rx(stlc->hw, skb);
 
 	return 0;
 }
diff --git a/drivers/staging/winbond/wb35rx.c b/drivers/staging/winbond/wb35rx.c
index 3e8cf08b87e6..b905e7b43a19 100644
--- a/drivers/staging/winbond/wb35rx.c
+++ b/drivers/staging/winbond/wb35rx.c
@@ -40,7 +40,8 @@ static void packet_came(struct ieee80211_hw *hw, char *pRxBufferAddress, int Pac
 	rx_status.phymode = MODE_IEEE80211B;
 */
 
-	ieee80211_rx_irqsafe(hw, skb, &rx_status);
+	memcpy(IEEE80211_SKB_RXCB(skb), &rx_status, sizeof(rx_status));
+	ieee80211_rx_irqsafe(hw, skb);
 }
 
 static void Wb35Rx_adjust(PDESCRIPTOR pRxDes)
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c06104476973..fe80771d95f1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -397,6 +397,11 @@ static inline struct ieee80211_tx_info *IEEE80211_SKB_CB(struct sk_buff *skb)
 	return (struct ieee80211_tx_info *)skb->cb;
 }
 
+static inline struct ieee80211_rx_status *IEEE80211_SKB_RXCB(struct sk_buff *skb)
+{
+	return (struct ieee80211_rx_status *)skb->cb;
+}
+
 /**
  * ieee80211_tx_info_clear_status - clear TX status
  *
@@ -478,7 +483,7 @@ enum mac80211_rx_flags {
  *
  * The low-level driver should provide this information (the subset
  * supported by hardware) to the 802.11 code with each received
- * frame.
+ * frame, in the skb's control buffer (cb).
  *
  * @mactime: value in microseconds of the 64-bit Time Synchronization Function
  * 	(TSF) timer when the first data symbol (MPDU) arrived at the hardware.
@@ -1606,9 +1611,11 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
  */
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 
-/* trick to avoid symbol clashes with the ieee80211 subsystem */
-void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		    struct ieee80211_rx_status *status);
+/*
+ * trick to avoid symbol clashes with the ieee80211 subsystem,
+ * use the inline below instead
+ */
+void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb);
 
 /**
  * ieee80211_rx - receive frame
@@ -1624,13 +1631,10 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
  *
  * @hw: the hardware this frame came in on
  * @skb: the buffer to receive, owned by mac80211 after this call
- * @status: status of this frame; the status pointer need not be valid
- *	after this function returns
  */
-static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
-				struct ieee80211_rx_status *status)
+static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
-	__ieee80211_rx(hw, skb, status);
+	__ieee80211_rx(hw, skb);
 }
 
 /**
@@ -1644,13 +1648,8 @@ static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
  *
  * @hw: the hardware this frame came in on
  * @skb: the buffer to receive, owned by mac80211 after this call
- * @status: status of this frame; the status pointer need not be valid
- *	after this function returns and is not freed by mac80211,
- *	it is recommended that it points to a stack area
  */
-void ieee80211_rx_irqsafe(struct ieee80211_hw *hw,
-			  struct sk_buff *skb,
-			  struct ieee80211_rx_status *status);
+void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb);
 
 /**
  * ieee80211_tx_status - transmit status callback
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 0b30277eb366..15d5a53b59a8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -705,7 +705,7 @@ static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_mgmt *mgmt;
 	u16 fc;
 
-	rx_status = (struct ieee80211_rx_status *) skb->cb;
+	rx_status = IEEE80211_SKB_RXCB(skb);
 	mgmt = (struct ieee80211_mgmt *) skb->data;
 	fc = le16_to_cpu(mgmt->frame_control);
 
@@ -836,8 +836,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
 }
 
 ieee80211_rx_result
-ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		       struct ieee80211_rx_status *rx_status)
+ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
@@ -852,7 +851,6 @@ ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	switch (fc & IEEE80211_FCTL_STYPE) {
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
-		memcpy(skb->cb, rx_status, sizeof(*rx_status));
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_AUTH:
 		skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 68eb5052179a..c65c65a9e697 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -943,8 +943,7 @@ extern const struct iw_handler_def ieee80211_iw_handler_def;
 /* STA code */
 void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb,
-					  struct ieee80211_rx_status *rx_status);
+					  struct sk_buff *skb);
 int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata);
 int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
 int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
@@ -967,8 +966,7 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
 void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
 ieee80211_rx_result
-ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		       struct ieee80211_rx_status *rx_status);
+ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
 					u8 *bssid, u8 *addr, u32 supp_rates);
 int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
@@ -988,9 +986,7 @@ int ieee80211_scan_results(struct ieee80211_local *local,
 			   char *buf, size_t len);
 void ieee80211_scan_cancel(struct ieee80211_local *local);
 ieee80211_rx_result
-ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata,
-		  struct sk_buff *skb,
-		  struct ieee80211_rx_status *rx_status);
+ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
 			       const char *ie, size_t len);
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 092a017b237e..5b69f5f07299 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -330,19 +330,16 @@ static void ieee80211_tasklet_handler(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
 	struct sk_buff *skb;
-	struct ieee80211_rx_status rx_status;
 	struct ieee80211_ra_tid *ra_tid;
 
 	while ((skb = skb_dequeue(&local->skb_queue)) ||
 	       (skb = skb_dequeue(&local->skb_queue_unreliable))) {
 		switch (skb->pkt_type) {
 		case IEEE80211_RX_MSG:
-			/* status is in skb->cb */
-			memcpy(&rx_status, skb->cb, sizeof(rx_status));
 			/* Clear skb->pkt_type in order to not confuse kernel
 			 * netstack. */
 			skb->pkt_type = 0;
-			__ieee80211_rx(local_to_hw(local), skb, &rx_status);
+			ieee80211_rx(local_to_hw(local), skb);
 			break;
 		case IEEE80211_TX_STATUS_MSG:
 			skb->pkt_type = 0;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 11cf45bce38a..542ea025494e 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -568,7 +568,7 @@ static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 	ifmsh = &sdata->u.mesh;
 
-	rx_status = (struct ieee80211_rx_status *) skb->cb;
+	rx_status = IEEE80211_SKB_RXCB(skb);
 	mgmt = (struct ieee80211_mgmt *) skb->data;
 	stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
 
@@ -671,8 +671,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 }
 
 ieee80211_rx_result
-ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		       struct ieee80211_rx_status *rx_status)
+ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
@@ -689,7 +688,6 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
 	case IEEE80211_STYPE_ACTION:
-		memcpy(skb->cb, rx_status, sizeof(*rx_status));
 		skb_queue_tail(&ifmsh->skb_queue, skb);
 		queue_work(local->hw.workqueue, &ifmsh->work);
 		return RX_QUEUED;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index c7d72819cdd2..2a2ed182cb7e 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -208,8 +208,7 @@ void ieee80211s_init(void);
 void ieee80211s_stop(void);
 void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata);
 ieee80211_rx_result
-ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		       struct ieee80211_rx_status *rx_status);
+ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb);
 void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata);
 void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index aca22b00b6a3..5e25d320deae 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2063,8 +2063,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 }
 
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
-					  struct sk_buff *skb,
-					  struct ieee80211_rx_status *rx_status)
+					  struct sk_buff *skb)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_mgmt *mgmt;
@@ -2080,7 +2079,6 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
-		memcpy(skb->cb, rx_status, sizeof(*rx_status));
 	case IEEE80211_STYPE_AUTH:
 	case IEEE80211_STYPE_ASSOC_RESP:
 	case IEEE80211_STYPE_REASSOC_RESP:
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index de5bba7f910a..0563b6969a21 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -30,7 +30,6 @@
 static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					   struct tid_ampdu_rx *tid_agg_rx,
 					   struct sk_buff *skb,
-					   struct ieee80211_rx_status *status,
 					   u16 mpdu_seq_num,
 					   int bar_req);
 /*
@@ -59,11 +58,11 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
 	return skb;
 }
 
-static inline int should_drop_frame(struct ieee80211_rx_status *status,
-				    struct sk_buff *skb,
+static inline int should_drop_frame(struct sk_buff *skb,
 				    int present_fcs_len,
 				    int radiotap_len)
 {
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 
 	if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
@@ -111,10 +110,10 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 static void
 ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 				 struct sk_buff *skb,
-				 struct ieee80211_rx_status *status,
 				 struct ieee80211_rate *rate,
 				 int rtap_len)
 {
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_radiotap_header *rthdr;
 	unsigned char *pos;
 
@@ -220,9 +219,9 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
  */
 static struct sk_buff *
 ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
-		     struct ieee80211_rx_status *status,
 		     struct ieee80211_rate *rate)
 {
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(origskb);
 	struct ieee80211_sub_if_data *sdata;
 	int needed_headroom = 0;
 	struct sk_buff *skb, *skb2;
@@ -248,8 +247,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		present_fcs_len = FCS_LEN;
 
 	if (!local->monitors) {
-		if (should_drop_frame(status, origskb, present_fcs_len,
-				      rtap_len)) {
+		if (should_drop_frame(origskb, present_fcs_len, rtap_len)) {
 			dev_kfree_skb(origskb);
 			return NULL;
 		}
@@ -257,7 +255,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 		return remove_monitor_info(local, origskb, rtap_len);
 	}
 
-	if (should_drop_frame(status, origskb, present_fcs_len, rtap_len)) {
+	if (should_drop_frame(origskb, present_fcs_len, rtap_len)) {
 		/* only need to expand headroom if necessary */
 		skb = origskb;
 		origskb = NULL;
@@ -289,7 +287,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 
 	/* if necessary, prepend radiotap information */
 	if (!(status->flag & RX_FLAG_RADIOTAP))
-		ieee80211_add_rx_radiotap_header(local, skb, status, rate,
+		ieee80211_add_rx_radiotap_header(local, skb, rate,
 						 needed_headroom);
 
 	skb_reset_mac_header(skb);
@@ -421,12 +419,11 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
 	struct sk_buff *skb = rx->skb;
 
 	if (unlikely(local->hw_scanning))
-		return ieee80211_scan_rx(rx->sdata, skb, rx->status);
+		return ieee80211_scan_rx(rx->sdata, skb);
 
 	if (unlikely(local->sw_scanning)) {
 		/* drop all the other packets during a software scan anyway */
-		if (ieee80211_scan_rx(rx->sdata, skb, rx->status)
-		    != RX_QUEUED)
+		if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
 			dev_kfree_skb(skb);
 		return RX_QUEUED;
 	}
@@ -1620,7 +1617,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
 		/* manage reordering buffer according to requested */
 		/* sequence number */
 		rcu_read_lock();
-		ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL, NULL,
+		ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, NULL,
 						 start_seq_num, 1);
 		rcu_read_unlock();
 		return RX_DROP_UNUSABLE;
@@ -1817,13 +1814,13 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 
 	if (ieee80211_vif_is_mesh(&sdata->vif))
-		return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status);
+		return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
 
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status);
+		return ieee80211_ibss_rx_mgmt(sdata, rx->skb);
 
 	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
+		return ieee80211_sta_rx_mgmt(sdata, rx->skb);
 
 	return RX_DROP_MONITOR;
 }
@@ -2114,9 +2111,9 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
  */
 static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 					 struct sk_buff *skb,
-					 struct ieee80211_rx_status *status,
 					 struct ieee80211_rate *rate)
 {
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr;
@@ -2227,20 +2224,21 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
 {
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rate *rate;
-	struct ieee80211_rx_status status;
+	struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
+	struct ieee80211_rx_status *status;
 
-	if (!tid_agg_rx->reorder_buf[index])
+	if (!skb)
 		goto no_frame;
 
+	status = IEEE80211_SKB_RXCB(skb);
+
 	/* release the reordered frames to stack */
-	memcpy(&status, tid_agg_rx->reorder_buf[index]->cb, sizeof(status));
-	sband = hw->wiphy->bands[status.band];
-	if (status.flag & RX_FLAG_HT)
+	sband = hw->wiphy->bands[status->band];
+	if (status->flag & RX_FLAG_HT)
 		rate = sband->bitrates; /* TODO: HT rates */
 	else
-		rate = &sband->bitrates[status.rate_idx];
-	__ieee80211_rx_handle_packet(hw, tid_agg_rx->reorder_buf[index],
-				     &status, rate);
+		rate = &sband->bitrates[status->rate_idx];
+	__ieee80211_rx_handle_packet(hw, skb, rate);
 	tid_agg_rx->stored_mpdu_num--;
 	tid_agg_rx->reorder_buf[index] = NULL;
 
@@ -2265,7 +2263,6 @@ no_frame:
 static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 					   struct tid_ampdu_rx *tid_agg_rx,
 					   struct sk_buff *skb,
-					   struct ieee80211_rx_status *rxstatus,
 					   u16 mpdu_seq_num,
 					   int bar_req)
 {
@@ -2324,8 +2321,6 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 	/* put the frame in the reordering buffer */
 	tid_agg_rx->reorder_buf[index] = skb;
 	tid_agg_rx->reorder_time[index] = jiffies;
-	memcpy(tid_agg_rx->reorder_buf[index]->cb, rxstatus,
-	       sizeof(*rxstatus));
 	tid_agg_rx->stored_mpdu_num++;
 	/* release the buffer until next missing frame */
 	index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn)
@@ -2374,8 +2369,7 @@ static u8 ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
 }
 
 static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
-				     struct sk_buff *skb,
-				     struct ieee80211_rx_status *status)
+				     struct sk_buff *skb)
 {
 	struct ieee80211_hw *hw = &local->hw;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
@@ -2424,7 +2418,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
 
 	/* according to mpdu sequence number deal with reordering buffer */
 	mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
-	ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, status,
+	ret = ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb,
 						mpdu_seq_num, 0);
  end_reorder:
 	return ret;
@@ -2434,12 +2428,12 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
  * This is the receive path handler. It is called by a low level driver when an
  * 802.11 MPDU is received from the hardware.
  */
-void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
-		    struct ieee80211_rx_status *status)
+void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate = NULL;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 
 	if (status->band < 0 ||
 	    status->band >= IEEE80211_NUM_BANDS) {
@@ -2482,7 +2476,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * if it was previously present.
 	 * Also, frames with less than 16 bytes are dropped.
 	 */
-	skb = ieee80211_rx_monitor(local, skb, status, rate);
+	skb = ieee80211_rx_monitor(local, skb, rate);
 	if (!skb) {
 		rcu_read_unlock();
 		return;
@@ -2500,8 +2494,8 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb,
 	 * frames from other than operational channel), but that should not
 	 * happen in normal networks.
 	 */
-	if (!ieee80211_rx_reorder_ampdu(local, skb, status))
-		__ieee80211_rx_handle_packet(hw, skb, status, rate);
+	if (!ieee80211_rx_reorder_ampdu(local, skb))
+		__ieee80211_rx_handle_packet(hw, skb, rate);
 
 	rcu_read_unlock();
 }
@@ -2509,16 +2503,13 @@ EXPORT_SYMBOL(__ieee80211_rx);
 
 /* This is a version of the rx handler that can be called from hard irq
  * context. Post the skb on the queue and schedule the tasklet */
-void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb,
-			  struct ieee80211_rx_status *status)
+void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
 	BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb));
 
 	skb->dev = local->mdev;
-	/* copy status into skb->cb for use by tasklet */
-	memcpy(skb->cb, status, sizeof(*status));
 	skb->pkt_type = IEEE80211_RX_MSG;
 	skb_queue_tail(&local->skb_queue, skb);
 	tasklet_schedule(&local->tasklet);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2a8d09ad17ff..8b2416c77a6e 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -135,9 +135,9 @@ void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid,
 }
 
 ieee80211_rx_result
-ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		  struct ieee80211_rx_status *rx_status)
+ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
+	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_bss *bss;
 	u8 *elements;
-- 
cgit v1.2.3


From f1f74825fe01ac77204ca34e3240dec50a8207c2 Mon Sep 17 00:00:00 2001
From: David Kilroy <kilroyd@googlemail.com>
Date: Thu, 18 Jun 2009 23:21:13 +0100
Subject: cfg80211: add wrapper function to get wiphy from priv pointer

Signed-off-by: David Kilroy <kilroyd@googlemail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 90f9bfa3bfc2..dba7874d1963 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1058,6 +1058,17 @@ static inline void *wiphy_priv(struct wiphy *wiphy)
 	return &wiphy->priv;
 }
 
+/**
+ * priv_to_wiphy - return the wiphy containing the priv
+ *
+ * @priv: a pointer previously returned by wiphy_priv
+ */
+static inline struct wiphy *priv_to_wiphy(void *priv)
+{
+	BUG_ON(!priv);
+	return container_of(priv, struct wiphy, priv);
+}
+
 /**
  * set_wiphy_dev - set device pointer for wiphy
  *
-- 
cgit v1.2.3


From e6d6e3420d511cd7552a95d1f04bd4c80a9ddb34 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:47 +0200
Subject: cfg80211: use proper allocation flags

Instead of hardcoding GFP_ATOMIC everywhere, add a
new function parameter that gets the flags from the
caller. Obviously then I need to update all callers
(all of them in mac80211), and it turns out that now
it's ok to use GFP_KERNEL in almost all places.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 21 +++++++++++++-------
 net/mac80211/event.c       |  5 +++--
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/mlme.c        | 30 ++++++++++++++++++-----------
 net/mac80211/rx.c          |  3 ++-
 net/mac80211/wpa.c         |  3 ++-
 net/wireless/mlme.c        | 30 ++++++++++++++---------------
 net/wireless/nl80211.c     | 48 ++++++++++++++++++++++++++--------------------
 net/wireless/nl80211.h     | 14 +++++++-------
 9 files changed, 91 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dba7874d1963..1696ff647a08 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1572,64 +1572,70 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);
  * @dev: network device
  * @buf: authentication frame (header + body)
  * @len: length of the frame data
+ * @gfp: allocation flags
  *
  * This function is called whenever an authentication has been processed in
  * station mode. The driver is required to call either this function or
  * cfg80211_send_auth_timeout() to indicate the result of cfg80211_ops::auth()
  * call.
  */
-void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
 
 /**
  * cfg80211_send_auth_timeout - notification of timed out authentication
  * @dev: network device
  * @addr: The MAC address of the device with which the authentication timed out
+ * @gfp: allocation flags
  */
-void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr);
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp);
 
 /**
  * cfg80211_send_rx_assoc - notification of processed association
  * @dev: network device
  * @buf: (re)association response frame (header + body)
  * @len: length of the frame data
+ * @gfp: allocation flags
  *
  * This function is called whenever a (re)association response has been
  * processed in station mode. The driver is required to call either this
  * function or cfg80211_send_assoc_timeout() to indicate the result of
  * cfg80211_ops::assoc() call.
  */
-void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
 
 /**
  * cfg80211_send_assoc_timeout - notification of timed out association
  * @dev: network device
  * @addr: The MAC address of the device with which the association timed out
+ * @gfp: allocation flags
  */
-void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr);
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp);
 
 /**
  * cfg80211_send_deauth - notification of processed deauthentication
  * @dev: network device
  * @buf: deauthentication frame (header + body)
  * @len: length of the frame data
+ * @gfp: allocation flags
  *
  * This function is called whenever deauthentication has been processed in
  * station mode. This includes both received deauthentication frames and
  * locally generated ones.
  */
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
 
 /**
  * cfg80211_send_disassoc - notification of processed disassociation
  * @dev: network device
  * @buf: disassociation response frame (header + body)
  * @len: length of the frame data
+ * @gfp: allocation flags
  *
  * This function is called whenever disassociation has been processed in
  * station mode. This includes both received disassociation frames and locally
  * generated ones.
  */
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
 
 /**
  * cfg80211_hold_bss - exclude bss from expiration
@@ -1655,6 +1661,7 @@ void cfg80211_unhold_bss(struct cfg80211_bss *bss);
  * @key_type: The key type that the received frame used
  * @key_id: Key identifier (0..3)
  * @tsc: The TSC value of the frame that generated the MIC failure (6 octets)
+ * @gfp: allocation flags
  *
  * This function is called whenever the local MAC detects a MIC failure in a
  * received frame. This matches with MLME-MICHAELMICFAILURE.indication()
@@ -1662,7 +1669,7 @@ void cfg80211_unhold_bss(struct cfg80211_bss *bss);
  */
 void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 				  enum nl80211_key_type key_type, int key_id,
-				  const u8 *tsc);
+				  const u8 *tsc, gfp_t gfp);
 
 /**
  * cfg80211_ibss_joined - notify cfg80211 that device joined an IBSS
diff --git a/net/mac80211/event.c b/net/mac80211/event.c
index 3ac636285fb1..01ae759518f6 100644
--- a/net/mac80211/event.c
+++ b/net/mac80211/event.c
@@ -16,11 +16,12 @@
  * driver or is still in the frame), it should provide that information.
  */
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr, const u8 *tsc)
+				     struct ieee80211_hdr *hdr, const u8 *tsc,
+				     gfp_t gfp)
 {
 	cfg80211_michael_mic_failure(sdata->dev, hdr->addr2,
 				     (hdr->addr1[0] & 0x01) ?
 				     NL80211_KEYTYPE_GROUP :
 				     NL80211_KEYTYPE_PAIRWISE,
-				     keyidx, tsc);
+				     keyidx, tsc, gfp);
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c65c65a9e697..e0323e540a03 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1088,7 +1088,8 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
 int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
 			     int rate, int erp, int short_preamble);
 void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx,
-				     struct ieee80211_hdr *hdr, const u8 *tsc);
+				     struct ieee80211_hdr *hdr, const u8 *tsc,
+				     gfp_t gfp);
 void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata);
 void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
 		      int encrypt);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5e25d320deae..383392b04282 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -419,9 +419,11 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
 	if (stype == IEEE80211_STYPE_DEAUTH)
-		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len);
+		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len,
+				     GFP_KERNEL);
 	else
-		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len);
+		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len,
+				       GFP_KERNEL);
 	ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
@@ -1006,7 +1008,8 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata)
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
 		ieee80211_recalc_idle(local);
-		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid);
+		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid,
+					   GFP_KERNEL);
 
 		/*
 		 * Most likely AP is not in the range so remove the
@@ -1055,7 +1058,8 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
 		ieee80211_recalc_idle(local);
-		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid);
+		cfg80211_send_auth_timeout(sdata->dev, ifmgd->bssid,
+					   GFP_KERNEL);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
 				ifmgd->ssid, ifmgd->ssid_len);
@@ -1243,7 +1247,8 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
 		       sdata->dev->name, ifmgd->bssid);
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
 		ieee80211_recalc_idle(local);
-		cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid);
+		cfg80211_send_assoc_timeout(sdata->dev, ifmgd->bssid,
+					    GFP_KERNEL);
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
 				sdata->local->hw.conf.channel->center_freq,
 				ifmgd->ssid, ifmgd->ssid_len);
@@ -1517,12 +1522,14 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	case WLAN_AUTH_LEAP:
 	case WLAN_AUTH_FT:
 		ieee80211_auth_completed(sdata);
-		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
+		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len,
+				      GFP_KERNEL);
 		break;
 	case WLAN_AUTH_SHARED_KEY:
 		if (ifmgd->auth_transaction == 4) {
 			ieee80211_auth_completed(sdata);
-			cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
+			cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len,
+					      GFP_KERNEL);
 		} else
 			ieee80211_auth_challenge(sdata, mgmt, len);
 		break;
@@ -1560,7 +1567,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_set_disassoc(sdata, true, false, 0);
 	ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED;
-	cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, len);
+	cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, len, GFP_KERNEL);
 }
 
 
@@ -1591,7 +1598,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	ieee80211_set_disassoc(sdata, false, false, reason_code);
-	cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, len);
+	cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, len, GFP_KERNEL);
 }
 
 
@@ -1660,7 +1667,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 		 * association next time. This works around some broken APs
 		 * which do not correctly reject reassociation requests. */
 		ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
-		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len);
+		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len,
+				       GFP_KERNEL);
 		if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
 			/* Wait for SME to decide what to do next */
 			ifmgd->state = IEEE80211_STA_MLME_DISABLED;
@@ -1823,7 +1831,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	ifmgd->last_beacon = jiffies;
 
 	ieee80211_associated(sdata);
-	cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len);
+	cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len, GFP_KERNEL);
 }
 
 
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0563b6969a21..ec5acc6dc021 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1863,7 +1863,8 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev,
 	    !ieee80211_is_auth(hdr->frame_control))
 		goto ignore;
 
-	mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL);
+	mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr, NULL,
+					GFP_ATOMIC);
  ignore:
 	dev_kfree_skb(rx->skb);
 	rx->skb = NULL;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index dcfae8884b86..70778694877b 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -122,7 +122,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
 			return RX_DROP_UNUSABLE;
 
 		mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
-						(void *) skb->data, NULL);
+						(void *) skb->data, NULL,
+						GFP_ATOMIC);
 		return RX_DROP_UNUSABLE;
 	}
 
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e56bbea10fc8..c4e6d4b84a46 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -12,35 +12,35 @@
 #include "core.h"
 #include "nl80211.h"
 
-void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_rx_auth(rdev, dev, buf, len);
+	nl80211_send_rx_auth(rdev, dev, buf, len, gfp);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_auth);
 
-void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_rx_assoc(rdev, dev, buf, len);
+	nl80211_send_rx_assoc(rdev, dev, buf, len, gfp);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_deauth(rdev, dev, buf, len);
+	nl80211_send_deauth(rdev, dev, buf, len, gfp);
 }
 EXPORT_SYMBOL(cfg80211_send_deauth);
 
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_disassoc(rdev, dev, buf, len);
+	nl80211_send_disassoc(rdev, dev, buf, len, gfp);
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
@@ -53,33 +53,33 @@ static void cfg80211_wext_disconnected(struct net_device *dev)
 #endif
 }
 
-void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_auth_timeout(rdev, dev, addr);
+	nl80211_send_auth_timeout(rdev, dev, addr, gfp);
 	cfg80211_wext_disconnected(dev);
 }
 EXPORT_SYMBOL(cfg80211_send_auth_timeout);
 
-void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr)
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
-	nl80211_send_assoc_timeout(rdev, dev, addr);
+	nl80211_send_assoc_timeout(rdev, dev, addr, gfp);
 	cfg80211_wext_disconnected(dev);
 }
 EXPORT_SYMBOL(cfg80211_send_assoc_timeout);
 
 void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 				  enum nl80211_key_type key_type, int key_id,
-				  const u8 *tsc)
+				  const u8 *tsc, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 #ifdef CONFIG_WIRELESS_EXT
 	union iwreq_data wrqu;
-	char *buf = kmalloc(128, GFP_ATOMIC);
+	char *buf = kmalloc(128, gfp);
 
 	if (buf) {
 		sprintf(buf, "MLME-MICHAELMICFAILURE.indication("
@@ -93,6 +93,6 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 	}
 #endif
 
-	nl80211_michael_mic_failure(rdev, dev, addr, key_type, key_id, tsc);
+	nl80211_michael_mic_failure(rdev, dev, addr, key_type, key_id, tsc, gfp);
 }
 EXPORT_SYMBOL(cfg80211_michael_mic_failure);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7946b82c5716..01523ba81baf 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3832,12 +3832,12 @@ nla_put_failure:
 static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 				    struct net_device *netdev,
 				    const u8 *buf, size_t len,
-				    enum nl80211_commands cmd)
+				    enum nl80211_commands cmd, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
 
@@ -3856,7 +3856,7 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -3865,42 +3865,45 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 }
 
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
-			  struct net_device *netdev, const u8 *buf, size_t len)
+			  struct net_device *netdev, const u8 *buf,
+			  size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_AUTHENTICATE);
+				NL80211_CMD_AUTHENTICATE, gfp);
 }
 
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev, const u8 *buf,
-			   size_t len)
+			   size_t len, gfp_t gfp)
 {
-	nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE);
+	nl80211_send_mlme_event(rdev, netdev, buf, len,
+				NL80211_CMD_ASSOCIATE, gfp);
 }
 
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
-			 struct net_device *netdev, const u8 *buf, size_t len)
+			 struct net_device *netdev, const u8 *buf,
+			 size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DEAUTHENTICATE);
+				NL80211_CMD_DEAUTHENTICATE, gfp);
 }
 
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev, const u8 *buf,
-			   size_t len)
+			   size_t len, gfp_t gfp)
 {
 	nl80211_send_mlme_event(rdev, netdev, buf, len,
-				NL80211_CMD_DISASSOCIATE);
+				NL80211_CMD_DISASSOCIATE, gfp);
 }
 
 static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 				      struct net_device *netdev, int cmd,
-				      const u8 *addr)
+				      const u8 *addr, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
 
@@ -3920,7 +3923,7 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -3929,16 +3932,19 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 }
 
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
-			       struct net_device *netdev, const u8 *addr)
+			       struct net_device *netdev, const u8 *addr,
+			       gfp_t gfp)
 {
 	nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_AUTHENTICATE,
-				  addr);
+				  addr, gfp);
 }
 
 void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
-				struct net_device *netdev, const u8 *addr)
+				struct net_device *netdev, const u8 *addr,
+				gfp_t gfp)
 {
-	nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE, addr);
+	nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE,
+				  addr, gfp);
 }
 
 void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
@@ -3978,12 +3984,12 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 				 struct net_device *netdev, const u8 *addr,
 				 enum nl80211_key_type key_type, int key_id,
-				 const u8 *tsc)
+				 const u8 *tsc, gfp_t gfp)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
 	if (!msg)
 		return;
 
@@ -4007,7 +4013,7 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_ATOMIC);
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index cf0d271f7e1f..662c216e8d4f 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -15,27 +15,27 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 void nl80211_send_reg_change_event(struct regulatory_request *request);
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
-			  const u8 *buf, size_t len);
+			  const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
-			   const u8 *buf, size_t len);
+			   const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *netdev,
-			 const u8 *buf, size_t len);
+			 const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *netdev,
-			   const u8 *buf, size_t len);
+			   const u8 *buf, size_t len, gfp_t gfp);
 void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev,
-			       const u8 *addr);
+			       const u8 *addr, gfp_t gfp);
 void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
 				struct net_device *netdev,
-				const u8 *addr);
+				const u8 *addr, gfp_t gfp);
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
 			    enum nl80211_key_type key_type,
-			    int key_id, const u8 *tsc);
+			    int key_id, const u8 *tsc, gfp_t gfp);
 
 void
 nl80211_send_beacon_hint_event(struct wiphy *wiphy,
-- 
cgit v1.2.3


From 7ebbe6bd51a259e16608b3fd7b578f5dd1292a45 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:48 +0200
Subject: cfg80211: remove wireless_dev->bssid

This variable isn't necessary -- the wext code keeps
track of the BSSID itself, and otherwise we have
current_bss.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  1 -
 net/wireless/ibss.c    | 13 +++----------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1696ff647a08..10eb53e2bc9f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1170,7 +1170,6 @@ struct wireless_dev {
 
 	/* currently used for IBSS - might be rearranged in the future */
 	struct cfg80211_bss *current_bss;
-	u8 bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 ssid_len;
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a4a1c3498ff2..34b11eae30c8 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -24,9 +24,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
 	if (WARN_ON(!wdev->ssid_len))
 		return;
 
-	if (memcmp(bssid, wdev->bssid, ETH_ALEN) == 0)
-		return;
-
 	bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
 			       wdev->ssid, wdev->ssid_len,
 			       WLAN_CAPABILITY_IBSS, WLAN_CAPABILITY_IBSS);
@@ -41,7 +38,6 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
 
 	cfg80211_hold_bss(bss);
 	wdev->current_bss = bss;
-	memcpy(wdev->bssid, bssid, ETH_ALEN);
 
 	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, gfp);
 #ifdef CONFIG_WIRELESS_EXT
@@ -87,7 +83,6 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 
 	wdev->current_bss = NULL;
 	wdev->ssid_len = 0;
-	memset(wdev->bssid, 0, ETH_ALEN);
 #ifdef CONFIG_WIRELESS_EXT
 	if (!nowext)
 		wdev->wext.ibss.ssid_len = 0;
@@ -356,12 +351,10 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 
 	ap_addr->sa_family = ARPHRD_ETHER;
 
-	if (wdev->wext.ibss.bssid) {
+	if (wdev->current_bss)
+		memcpy(ap_addr->sa_data, wdev->current_bss->bssid, ETH_ALEN);
+	else
 		memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
-		return 0;
-	}
-
-	memcpy(ap_addr->sa_data, wdev->bssid, ETH_ALEN);
 	return 0;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
-- 
cgit v1.2.3


From 5121ea0481f9cea1dfd958f18d7b4ac78778cd40 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:50 +0200
Subject: wext: constify extra argument to wireless_send_event

This is never changed by the function, so can be marked const.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/iw_handler.h | 2 +-
 net/wireless/wext.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 51b9a37de991..2b3fbbb8669e 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -443,7 +443,7 @@ extern int dev_get_wireless_info(char * buffer, char **start, off_t offset,
 extern void wireless_send_event(struct net_device *	dev,
 				unsigned int		cmd,
 				union iwreq_data *	wrqu,
-				char *			extra);
+				const char *		extra);
 
 /* We may need a function to send a stream of events to user space.
  * More on that later... */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 5da07e079c20..425f7d58b961 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1376,7 +1376,7 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 void wireless_send_event(struct net_device *	dev,
 			 unsigned int		cmd,
 			 union iwreq_data *	wrqu,
-			 char *			extra)
+			 const char *		extra)
 {
 	const struct iw_ioctl_description *	descr = NULL;
 	int extra_len = 0;
-- 
cgit v1.2.3


From aff89a9b9084931e51b89d8f3ee3c547bea6c422 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:51 +0200
Subject: cfg80211: introduce nl80211 testmode command

This introduces a new NL80211_CMD_TESTMODE for testing
and calibration use with nl80211. There's no multiplexing
like like iwpriv had, and the command is not available by
default, it needs to be explicitly enabled in Kconfig and
shouldn't be enabled in most kernels.

The command requires a wiphy index or interface index to
identify the device to operate on, and the new TESTDATA
attribute. There also is API for sending replies to the
command, and testmode multicast messages (on a testmode
multicast group).

I've also updated mac80211 to be able to pass through the
command to the driver, since it itself doesn't implement
the testmode command.

Additionally, to give people an idea of how to use the
command, I've added a little code to hwsim that makes use
of the new command to set the powersave mode, this is
currently done via debugfs and should remain there, and
the testmode command only serves as an example of how to
use this best -- with nested netlink attributes in the
TESTDATA attribute. A hwsim testmode tool can be found at
http://git.sipsolutions.net/hwsim.git/. This tool is BSD
licensed so people can easily use it as a basis for their
own internal fabrication and validation tools.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  68 +++++++++++++++++
 include/linux/nl80211.h               |  11 +++
 include/net/cfg80211.h                |  83 +++++++++++++++++++++
 include/net/mac80211.h                |   5 ++
 net/mac80211/cfg.c                    |  13 ++++
 net/wireless/Kconfig                  |  15 ++++
 net/wireless/core.h                   |   4 +
 net/wireless/nl80211.c                | 136 ++++++++++++++++++++++++++++++++++
 8 files changed, 335 insertions(+)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 93c1c4a73e6c..6ac8565072e9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -700,6 +700,73 @@ static int mac80211_hwsim_conf_tx(
 	return 0;
 }
 
+#ifdef CONFIG_NL80211_TESTMODE
+/*
+ * This section contains example code for using netlink
+ * attributes with the testmode command in nl80211.
+ */
+
+/* These enums need to be kept in sync with userspace */
+enum hwsim_testmode_attr {
+	__HWSIM_TM_ATTR_INVALID	= 0,
+	HWSIM_TM_ATTR_CMD	= 1,
+	HWSIM_TM_ATTR_PS	= 2,
+
+	/* keep last */
+	__HWSIM_TM_ATTR_AFTER_LAST,
+	HWSIM_TM_ATTR_MAX	= __HWSIM_TM_ATTR_AFTER_LAST - 1
+};
+
+enum hwsim_testmode_cmd {
+	HWSIM_TM_CMD_SET_PS		= 0,
+	HWSIM_TM_CMD_GET_PS		= 1,
+};
+
+static const struct nla_policy hwsim_testmode_policy[HWSIM_TM_ATTR_MAX + 1] = {
+	[HWSIM_TM_ATTR_CMD] = { .type = NLA_U32 },
+	[HWSIM_TM_ATTR_PS] = { .type = NLA_U32 },
+};
+
+static int hwsim_fops_ps_write(void *dat, u64 val);
+
+int mac80211_hwsim_testmode_cmd(struct ieee80211_hw *hw, void *data, int len)
+{
+	struct mac80211_hwsim_data *hwsim = hw->priv;
+	struct nlattr *tb[HWSIM_TM_ATTR_MAX + 1];
+	struct sk_buff *skb;
+	int err, ps;
+
+	err = nla_parse(tb, HWSIM_TM_ATTR_MAX, data, len,
+			hwsim_testmode_policy);
+	if (err)
+		return err;
+
+	if (!tb[HWSIM_TM_ATTR_CMD])
+		return -EINVAL;
+
+	switch (nla_get_u32(tb[HWSIM_TM_ATTR_CMD])) {
+	case HWSIM_TM_CMD_SET_PS:
+		if (!tb[HWSIM_TM_ATTR_PS])
+			return -EINVAL;
+		ps = nla_get_u32(tb[HWSIM_TM_ATTR_PS]);
+		return hwsim_fops_ps_write(hwsim, ps);
+	case HWSIM_TM_CMD_GET_PS:
+		skb = cfg80211_testmode_alloc_reply_skb(hw->wiphy,
+						nla_total_size(sizeof(u32)));
+		if (!skb)
+			return -ENOMEM;
+		NLA_PUT_U32(skb, HWSIM_TM_ATTR_PS, hwsim->ps);
+		return cfg80211_testmode_reply(skb);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+ nla_put_failure:
+	kfree_skb(skb);
+	return -ENOBUFS;
+}
+#endif
+
 static const struct ieee80211_ops mac80211_hwsim_ops =
 {
 	.tx = mac80211_hwsim_tx,
@@ -713,6 +780,7 @@ static const struct ieee80211_ops mac80211_hwsim_ops =
 	.sta_notify = mac80211_hwsim_sta_notify,
 	.set_tim = mac80211_hwsim_set_tim,
 	.conf_tx = mac80211_hwsim_conf_tx,
+	CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd)
 };
 
 
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index dbea93b694e5..651b18839088 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -242,6 +242,10 @@
  * @NL80211_CMD_LEAVE_IBSS: Leave the IBSS -- no special arguments, the IBSS is
  *	determined by the network interface.
  *
+ * @NL80211_CMD_TESTMODE: testmode command, takes a wiphy (or ifindex) attribute
+ *	to identify the device, and the TESTDATA blob attribute to pass through
+ *	to the driver.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -310,6 +314,8 @@ enum nl80211_commands {
 	NL80211_CMD_JOIN_IBSS,
 	NL80211_CMD_LEAVE_IBSS,
 
+	NL80211_CMD_TESTMODE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -511,6 +517,9 @@ enum nl80211_commands {
  *	authorized by user space. Otherwise, port is marked authorized by
  *	default in station mode.
  *
+ * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
+ *	We recommend using nested, driver-specific attributes within this.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -619,6 +628,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_CONTROL_PORT,
 
+	NL80211_ATTR_TESTDATA,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 10eb53e2bc9f..885d4e5bc4b5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -857,6 +857,8 @@ enum tx_power_setting {
  *
  * @rfkill_poll: polls the hw rfkill line, use cfg80211 reporting
  *	functions to adjust rfkill hw state
+ *
+ * @testmode_cmd: run a test mode command
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy);
@@ -955,6 +957,10 @@ struct cfg80211_ops {
 	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
 
 	void	(*rfkill_poll)(struct wiphy *wiphy);
+
+#ifdef CONFIG_NL80211_TESTMODE
+	int	(*testmode_cmd)(struct wiphy *wiphy, void *data, int len);
+#endif
 };
 
 /*
@@ -1705,4 +1711,81 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy);
  */
 void wiphy_rfkill_stop_polling(struct wiphy *wiphy);
 
+#ifdef CONFIG_NL80211_TESTMODE
+/**
+ * cfg80211_testmode_alloc_reply_skb - allocate testmode reply
+ * @wiphy: the wiphy
+ * @approxlen: an upper bound of the length of the data that will
+ *	be put into the skb
+ *
+ * This function allocates and pre-fills an skb for a reply to
+ * the testmode command. Since it is intended for a reply, calling
+ * it outside of the @testmode_cmd operation is invalid.
+ *
+ * The returned skb (or %NULL if any errors happen) is pre-filled
+ * with the wiphy index and set up in a way that any data that is
+ * put into the skb (with skb_put(), nla_put() or similar) will end
+ * up being within the %NL80211_ATTR_TESTDATA attribute, so all that
+ * needs to be done with the skb is adding data for the corresponding
+ * userspace tool which can then read that data out of the testdata
+ * attribute. You must not modify the skb in any other way.
+ *
+ * When done, call cfg80211_testmode_reply() with the skb and return
+ * its error code as the result of the @testmode_cmd operation.
+ */
+struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
+						  int approxlen);
+
+/**
+ * cfg80211_testmode_reply - send the reply skb
+ * @skb: The skb, must have been allocated with
+ *	cfg80211_testmode_alloc_reply_skb()
+ *
+ * Returns an error code or 0 on success, since calling this
+ * function will usually be the last thing before returning
+ * from the @testmode_cmd you should return the error code.
+ * Note that this function consumes the skb regardless of the
+ * return value.
+ */
+int cfg80211_testmode_reply(struct sk_buff *skb);
+
+/**
+ * cfg80211_testmode_alloc_event_skb - allocate testmode event
+ * @wiphy: the wiphy
+ * @approxlen: an upper bound of the length of the data that will
+ *	be put into the skb
+ * @gfp: allocation flags
+ *
+ * This function allocates and pre-fills an skb for an event on the
+ * testmode multicast group.
+ *
+ * The returned skb (or %NULL if any errors happen) is set up in the
+ * same way as with cfg80211_testmode_alloc_reply_skb() but prepared
+ * for an event. As there, you should simply add data to it that will
+ * then end up in the %NL80211_ATTR_TESTDATA attribute. Again, you must
+ * not modify the skb in any other way.
+ *
+ * When done filling the skb, call cfg80211_testmode_event() with the
+ * skb to send the event.
+ */
+struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy,
+						  int approxlen, gfp_t gfp);
+
+/**
+ * cfg80211_testmode_event - send the event
+ * @skb: The skb, must have been allocated with
+ *	cfg80211_testmode_alloc_event_skb()
+ * @gfp: allocation flags
+ *
+ * This function sends the given @skb, which must have been allocated
+ * by cfg80211_testmode_alloc_event_skb(), as an event. It always
+ * consumes it.
+ */
+void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp);
+
+#define CFG80211_TESTMODE_CMD(cmd)	.testmode_cmd = (cmd),
+#else
+#define CFG80211_TESTMODE_CMD(cmd)
+#endif
+
 #endif /* __NET_CFG80211_H */
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index fe80771d95f1..ce7cb1b5d453 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1416,6 +1416,8 @@ enum ieee80211_ampdu_mlme_action {
  * @rfkill_poll: Poll rfkill hardware state. If you need this, you also
  *	need to set wiphy->rfkill_poll to %true before registration,
  *	and need to call wiphy_rfkill_set_hw_state() in the callback.
+ *
+ * @testmode_cmd: Implement a cfg80211 test mode command.
  */
 struct ieee80211_ops {
 	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
@@ -1466,6 +1468,9 @@ struct ieee80211_ops {
 			    struct ieee80211_sta *sta, u16 tid, u16 *ssn);
 
 	void (*rfkill_poll)(struct ieee80211_hw *hw);
+#ifdef CONFIG_NL80211_TESTMODE
+	int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len);
+#endif
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index eb93eb6a9cc7..c34c1a41019a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1376,6 +1376,18 @@ static void ieee80211_rfkill_poll(struct wiphy *wiphy)
 	drv_rfkill_poll(local);
 }
 
+#ifdef CONFIG_NL80211_TESTMODE
+int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len)
+{
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+
+	if (!local->ops->testmode_cmd)
+		return -EOPNOTSUPP;
+
+	return local->ops->testmode_cmd(&local->hw, data, len);
+}
+#endif
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1418,4 +1430,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_tx_power = ieee80211_set_tx_power,
 	.get_tx_power = ieee80211_get_tx_power,
 	.rfkill_poll = ieee80211_rfkill_poll,
+	CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
 };
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index ec64571c4c23..040263118a20 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -2,6 +2,21 @@ config CFG80211
 	tristate "Improved wireless configuration API"
 	depends on RFKILL || !RFKILL
 
+config NL80211_TESTMODE
+	bool "nl80211 testmode command"
+	depends on CFG80211
+	help
+	  The nl80211 testmode command helps implementing things like
+	  factory calibration or validation tools for wireless chips.
+
+	  Select this option ONLY for kernels that are specifically
+	  built for such purposes.
+
+	  Debugging tools that are supposed to end up in the hands of
+	  users should better be implemented with debugfs.
+
+	  Say N.
+
 config CFG80211_REG_DEBUG
 	bool "cfg80211 regulatory debugging"
 	depends on CFG80211
diff --git a/net/wireless/core.h b/net/wireless/core.h
index bfa340c7abb5..bc084b68865c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -58,6 +58,10 @@ struct cfg80211_registered_device {
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	unsigned long suspend_at;
 
+#ifdef CONFIG_NL80211_TESTMODE
+	struct genl_info *testmode_info;
+#endif
+
 #ifdef CONFIG_CFG80211_DEBUGFS
 	/* Debugfs entries */
 	struct wiphy_debugfsdentries {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 01523ba81baf..bb8de268a6bf 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3416,6 +3416,128 @@ unlock_rtnl:
 	return err;
 }
 
+#ifdef CONFIG_NL80211_TESTMODE
+static struct genl_multicast_group nl80211_testmode_mcgrp = {
+	.name = "testmode",
+};
+
+static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_TESTDATA])
+		return -EINVAL;
+
+	rtnl_lock();
+
+	rdev = cfg80211_get_dev_from_info(info);
+	if (IS_ERR(rdev)) {
+		err = PTR_ERR(rdev);
+		goto unlock_rtnl;
+	}
+
+	err = -EOPNOTSUPP;
+	if (rdev->ops->testmode_cmd) {
+		rdev->testmode_info = info;
+		err = rdev->ops->testmode_cmd(&rdev->wiphy,
+				nla_data(info->attrs[NL80211_ATTR_TESTDATA]),
+				nla_len(info->attrs[NL80211_ATTR_TESTDATA]));
+		rdev->testmode_info = NULL;
+	}
+
+	cfg80211_put_dev(rdev);
+
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static struct sk_buff *
+__cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev,
+			      int approxlen, u32 pid, u32 seq, gfp_t gfp)
+{
+	struct sk_buff *skb;
+	void *hdr;
+	struct nlattr *data;
+
+	skb = nlmsg_new(approxlen + 100, gfp);
+	if (!skb)
+		return NULL;
+
+	hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE);
+	if (!hdr) {
+		kfree_skb(skb);
+		return NULL;
+	}
+
+	NLA_PUT_U32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	data = nla_nest_start(skb, NL80211_ATTR_TESTDATA);
+
+	((void **)skb->cb)[0] = rdev;
+	((void **)skb->cb)[1] = hdr;
+	((void **)skb->cb)[2] = data;
+
+	return skb;
+
+ nla_put_failure:
+	kfree_skb(skb);
+	return NULL;
+}
+
+struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy,
+						  int approxlen)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	if (WARN_ON(!rdev->testmode_info))
+		return NULL;
+
+	return __cfg80211_testmode_alloc_skb(rdev, approxlen,
+				rdev->testmode_info->snd_pid,
+				rdev->testmode_info->snd_seq,
+				GFP_KERNEL);
+}
+EXPORT_SYMBOL(cfg80211_testmode_alloc_reply_skb);
+
+int cfg80211_testmode_reply(struct sk_buff *skb)
+{
+	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
+	void *hdr = ((void **)skb->cb)[1];
+	struct nlattr *data = ((void **)skb->cb)[2];
+
+	if (WARN_ON(!rdev->testmode_info)) {
+		kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	nla_nest_end(skb, data);
+	genlmsg_end(skb, hdr);
+	return genlmsg_reply(skb, rdev->testmode_info);
+}
+EXPORT_SYMBOL(cfg80211_testmode_reply);
+
+struct sk_buff *cfg80211_testmode_alloc_event_skb(struct wiphy *wiphy,
+						  int approxlen, gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	return __cfg80211_testmode_alloc_skb(rdev, approxlen, 0, 0, gfp);
+}
+EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb);
+
+void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
+{
+	void *hdr = ((void **)skb->cb)[1];
+	struct nlattr *data = ((void **)skb->cb)[2];
+
+	nla_nest_end(skb, data);
+	genlmsg_end(skb, hdr);
+	genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp);
+}
+EXPORT_SYMBOL(cfg80211_testmode_event);
+#endif
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -3629,6 +3751,14 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+#ifdef CONFIG_NL80211_TESTMODE
+	{
+		.cmd = NL80211_CMD_TESTMODE,
+		.doit = nl80211_testmode_do,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+#endif
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
@@ -4102,6 +4232,12 @@ int nl80211_init(void)
 	if (err)
 		goto err_out;
 
+#ifdef CONFIG_NL80211_TESTMODE
+	err = genl_register_mc_group(&nl80211_fam, &nl80211_testmode_mcgrp);
+	if (err)
+		goto err_out;
+#endif
+
 	return 0;
  err_out:
 	genl_unregister_family(&nl80211_fam);
-- 
cgit v1.2.3


From 6a669e65c5ec393a650362874e13f7d3365a7827 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:53 +0200
Subject: wireless: define AKM suites

We'll need these values for some drivers using connect API
and for wext compat code, so let's define them.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a9173d5434d1..23343ab0bb03 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1196,6 +1196,10 @@ enum ieee80211_sa_query_action {
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
 #define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 
+/* AKM suite selectors */
+#define WLAN_AKM_SUITE_8021X		0x000FAC01
+#define WLAN_AKM_SUITE_PSK		0x000FAC02
+
 #define WLAN_MAX_KEY_LEN		32
 
 /**
-- 
cgit v1.2.3


From b23aa676ab9d54469cda9f7151f51a2851c6f36e Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 1 Jul 2009 21:26:54 +0200
Subject: cfg80211: connect/disconnect API

This patch introduces the cfg80211 connect/disconnect API.
The goal here is to run the AUTH and ASSOC steps in one call.
This is needed for some fullmac cards that run both steps
directly from the target, after the host driver sends a
connect command.

Additionally, all the new crypto parameters for connect()
are now also valid for associate() -- although associate
requires the IEs to be used, the information can be useful
for drivers and should be given.

Signed-off-by: Samuel Ortiz <samuel.ortiz@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  80 +++++++++++
 include/net/cfg80211.h  | 135 +++++++++++++++++-
 net/mac80211/cfg.c      |   2 +-
 net/wireless/Makefile   |   2 +-
 net/wireless/core.c     |  16 ++-
 net/wireless/core.h     |   7 +
 net/wireless/nl80211.c  | 368 +++++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/nl80211.h  |  13 ++
 net/wireless/sme.c      | 224 +++++++++++++++++++++++++++++
 9 files changed, 829 insertions(+), 18 deletions(-)
 create mode 100644 net/wireless/sme.c

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 651b18839088..b34c17f52f3e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -246,6 +246,22 @@
  *	to identify the device, and the TESTDATA blob attribute to pass through
  *	to the driver.
  *
+ * @NL80211_CMD_CONNECT: connection request and notification; this command
+ *	requests to connect to a specified network but without separating
+ *	auth and assoc steps. For this, you need to specify the SSID in a
+ *	%NL80211_ATTR_SSID attribute, and can optionally specify the association
+ *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC,
+ *	%NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT.
+ *	It is also sent as an event, with the BSSID and response IEs when the
+ *	connection is established or failed to be established. This can be
+ *	determined by the STATUS_CODE attribute.
+ * @NL80211_CMD_ROAM: request that the card roam (currently not implemented),
+ *	sent as an event when the card/driver roamed by itself.
+ * @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
+ *	userspace that a connection was dropped by the AP or due to other
+ *	reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
+ *	%NL80211_ATTR_REASON_CODE attributes are used.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -316,6 +332,10 @@ enum nl80211_commands {
 
 	NL80211_CMD_TESTMODE,
 
+	NL80211_CMD_CONNECT,
+	NL80211_CMD_ROAM,
+	NL80211_CMD_DISCONNECT,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -520,6 +540,30 @@ enum nl80211_commands {
  * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver.
  *	We recommend using nested, driver-specific attributes within this.
  *
+ * @NL80211_ATTR_DISCONNECTED_BY_AP: A flag indicating that the DISCONNECT
+ *	event was due to the AP disconnecting the station, and not due to
+ *	a local disconnect request.
+ * @NL80211_ATTR_STATUS_CODE: StatusCode for the %NL80211_CMD_CONNECT
+ *	event (u16)
+ * @NL80211_ATTR_PRIVACY: Flag attribute, used with connect(), indicating
+ *	that protected APs should be used.
+ *
+ * @NL80211_ATTR_CIPHERS_PAIRWISE: Used with CONNECT and ASSOCIATE to
+ *	indicate which unicast key ciphers will be used with the connection
+ *	(an array of u32).
+ * @NL80211_ATTR_CIPHER_GROUP: Used with CONNECT and ASSOCIATE to indicate
+ *	which group key cipher will be used with the connection (a u32).
+ * @NL80211_ATTR_WPA_VERSIONS: Used with CONNECT and ASSOCIATE to indicate
+ *	which WPA version(s) the AP we want to associate with is using
+ *	(a u32 with flags from &enum nl80211_wpa_versions).
+ * @NL80211_ATTR_AKM_SUITES: Used with CONNECT and ASSOCIATE to indicate
+ *	which key management algorithm(s) to use (an array of u32).
+ *
+ * @NL80211_ATTR_REQ_IE: (Re)association request information elements as
+ *	sent out by the card, for ROAM and successful CONNECT events.
+ * @NL80211_ATTR_RESP_IE: (Re)association response information elements as
+ *	sent by peer, for ROAM and successful CONNECT events.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -630,6 +674,19 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_TESTDATA,
 
+	NL80211_ATTR_PRIVACY,
+
+	NL80211_ATTR_DISCONNECTED_BY_AP,
+	NL80211_ATTR_STATUS_CODE,
+
+	NL80211_ATTR_CIPHER_SUITES_PAIRWISE,
+	NL80211_ATTR_CIPHER_SUITE_GROUP,
+	NL80211_ATTR_WPA_VERSIONS,
+	NL80211_ATTR_AKM_SUITES,
+
+	NL80211_ATTR_REQ_IE,
+	NL80211_ATTR_RESP_IE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -640,6 +697,7 @@ enum nl80211_attrs {
  * Allow user space programs to use #ifdef on new attributes by defining them
  * here
  */
+#define NL80211_CMD_CONNECT NL80211_CMD_CONNECT
 #define NL80211_ATTR_HT_CAPABILITY NL80211_ATTR_HT_CAPABILITY
 #define NL80211_ATTR_BSS_BASIC_RATES NL80211_ATTR_BSS_BASIC_RATES
 #define NL80211_ATTR_WIPHY_TXQ_PARAMS NL80211_ATTR_WIPHY_TXQ_PARAMS
@@ -653,6 +711,10 @@ enum nl80211_attrs {
 #define NL80211_ATTR_SSID NL80211_ATTR_SSID
 #define NL80211_ATTR_AUTH_TYPE NL80211_ATTR_AUTH_TYPE
 #define NL80211_ATTR_REASON_CODE NL80211_ATTR_REASON_CODE
+#define NL80211_ATTR_CIPHER_SUITES_PAIRWISE NL80211_ATTR_CIPHER_SUITES_PAIRWISE
+#define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP
+#define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS
+#define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -661,6 +723,9 @@ enum nl80211_attrs {
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
 #define NL80211_HT_CAPABILITY_LEN		26
 
+#define NL80211_MAX_NR_CIPHER_SUITES		5
+#define NL80211_MAX_NR_AKM_SUITES		2
+
 /**
  * enum nl80211_iftype - (virtual) interface types
  *
@@ -1205,12 +1270,22 @@ enum nl80211_bss {
  * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only)
  * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r)
  * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP)
+ * @__NL80211_AUTHTYPE_NUM: internal
+ * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm
+ * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by
+ *	trying multiple times); this is invalid in netlink -- leave out
+ *	the attribute for this on CONNECT commands.
  */
 enum nl80211_auth_type {
 	NL80211_AUTHTYPE_OPEN_SYSTEM,
 	NL80211_AUTHTYPE_SHARED_KEY,
 	NL80211_AUTHTYPE_FT,
 	NL80211_AUTHTYPE_NETWORK_EAP,
+
+	/* keep last */
+	__NL80211_AUTHTYPE_NUM,
+	NL80211_AUTHTYPE_MAX = __NL80211_AUTHTYPE_NUM - 1,
+	NL80211_AUTHTYPE_AUTOMATIC
 };
 
 /**
@@ -1235,4 +1310,9 @@ enum nl80211_mfp {
 	NL80211_MFP_REQUIRED,
 };
 
+enum nl80211_wpa_versions {
+	NL80211_WPA_VERSION_1 = 1 << 0,
+	NL80211_WPA_VERSION_2 = 1 << 1,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 885d4e5bc4b5..68e11321ed74 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -604,6 +604,30 @@ struct cfg80211_bss {
 	u8 priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
 
+/**
+ * struct cfg80211_crypto_settings - Crypto settings
+ * @wpa_versions: indicates which, if any, WPA versions are enabled
+ *	(from enum nl80211_wpa_versions)
+ * @cipher_group: group key cipher suite (or 0 if unset)
+ * @n_ciphers_pairwise: number of AP supported unicast ciphers
+ * @ciphers_pairwise: unicast key cipher suites
+ * @n_akm_suites: number of AKM suites
+ * @akm_suites: AKM suites
+ * @control_port: Whether user space controls IEEE 802.1X port, i.e.,
+ *	sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
+ *	required to assume that the port is unauthorized until authorized by
+ *	user space. Otherwise, port is marked authorized by default.
+ */
+struct cfg80211_crypto_settings {
+	u32 wpa_versions;
+	u32 cipher_group;
+	int n_ciphers_pairwise;
+	u32 ciphers_pairwise[NL80211_MAX_NR_CIPHER_SUITES];
+	int n_akm_suites;
+	u32 akm_suites[NL80211_MAX_NR_AKM_SUITES];
+	bool control_port;
+};
+
 /**
  * struct cfg80211_auth_request - Authentication request data
  *
@@ -658,10 +682,7 @@ struct cfg80211_auth_request {
  * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
- * @control_port: Whether user space controls IEEE 802.1X port, i.e.,
- *	sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is
- *	required to assume that the port is unauthorized until authorized by
- *	user space. Otherwise, port is marked authorized by default.
+ * @crypto: crypto settings
  */
 struct cfg80211_assoc_request {
 	struct ieee80211_channel *chan;
@@ -671,7 +692,7 @@ struct cfg80211_assoc_request {
 	const u8 *ie;
 	size_t ie_len;
 	bool use_mfp;
-	bool control_port;
+	struct cfg80211_crypto_settings crypto;
 };
 
 /**
@@ -737,6 +758,36 @@ struct cfg80211_ibss_params {
 	bool channel_fixed;
 };
 
+/**
+ * struct cfg80211_connect_params - Connection parameters
+ *
+ * This structure provides information needed to complete IEEE 802.11
+ * authentication and association.
+ *
+ * @channel: The channel to use or %NULL if not specified (auto-select based
+ *	on scan results)
+ * @bssid: The AP BSSID or %NULL if not specified (auto-select based on scan
+ *	results)
+ * @ssid: SSID
+ * @ssid_len: Length of ssid in octets
+ * @auth_type: Authentication type (algorithm)
+ * @assoc_ie: IEs for association request
+ * @assoc_ie_len: Length of assoc_ie in octets
+ * @privacy: indicates whether privacy-enabled APs should be used
+ * @crypto: crypto settings
+ */
+struct cfg80211_connect_params {
+	struct ieee80211_channel *channel;
+	u8 *bssid;
+	u8 *ssid;
+	size_t ssid_len;
+	enum nl80211_auth_type auth_type;
+	u8 *ie;
+	size_t ie_len;
+	bool privacy;
+	struct cfg80211_crypto_settings crypto;
+};
+
 /**
  * enum wiphy_params_flags - set_wiphy_params bitfield values
  * WIPHY_PARAM_RETRY_SHORT: wiphy->retry_short has changed
@@ -841,6 +892,12 @@ enum tx_power_setting {
  * @deauth: Request to deauthenticate from the specified peer
  * @disassoc: Request to disassociate from the specified peer
  *
+ * @connect: Connect to the ESS with the specified parameters. When connected,
+ *	call cfg80211_connect_result() with status code %WLAN_STATUS_SUCCESS.
+ *	If the connection fails for some reason, call cfg80211_connect_result()
+ *	with the status from the AP.
+ * @disconnect: Disconnect from the BSS/ESS.
+ *
  * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call
  *	cfg80211_ibss_joined(), also call that function when changing BSSID due
  *	to a merge.
@@ -946,6 +1003,11 @@ struct cfg80211_ops {
 	int	(*disassoc)(struct wiphy *wiphy, struct net_device *dev,
 			    struct cfg80211_disassoc_request *req);
 
+	int	(*connect)(struct wiphy *wiphy, struct net_device *dev,
+			   struct cfg80211_connect_params *sme);
+	int	(*disconnect)(struct wiphy *wiphy, struct net_device *dev,
+			      u16 reason_code);
+
 	int	(*join_ibss)(struct wiphy *wiphy, struct net_device *dev,
 			     struct cfg80211_ibss_params *params);
 	int	(*leave_ibss)(struct wiphy *wiphy, struct net_device *dev);
@@ -1174,10 +1236,15 @@ struct wireless_dev {
 	struct list_head list;
 	struct net_device *netdev;
 
-	/* currently used for IBSS - might be rearranged in the future */
+	/* currently used for IBSS and SME - might be rearranged later */
 	struct cfg80211_bss *current_bss;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 ssid_len;
+	enum {
+		CFG80211_SME_IDLE,
+		CFG80211_SME_CONNECTING, /* ->connect called */
+		CFG80211_SME_CONNECTED,
+	} sme_state;
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
@@ -1788,4 +1855,60 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp);
 #define CFG80211_TESTMODE_CMD(cmd)
 #endif
 
+/**
+ * cfg80211_connect_result - notify cfg80211 of connection result
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the AP
+ * @req_ie: association request IEs (maybe be %NULL)
+ * @req_ie_len: association request IEs length
+ * @resp_ie: association response IEs (may be %NULL)
+ * @resp_ie_len: assoc response IEs length
+ * @status: status code, 0 for successful connection, use
+ *	%WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you
+ *	the real status code for failures.
+ * @gfp: allocation flags
+ *
+ * It should be called by the underlying driver whenever connect() has
+ * succeeded.
+ */
+void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+			     const u8 *req_ie, size_t req_ie_len,
+			     const u8 *resp_ie, size_t resp_ie_len,
+			     u16 status, gfp_t gfp);
+
+/**
+ * cfg80211_roamed - notify cfg80211 of roaming
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the new AP
+ * @req_ie: association request IEs (maybe be %NULL)
+ * @req_ie_len: association request IEs length
+ * @resp_ie: association response IEs (may be %NULL)
+ * @resp_ie_len: assoc response IEs length
+ * @gfp: allocation flags
+ *
+ * It should be called by the underlying driver whenever it roamed
+ * from one AP to another while connected.
+ */
+void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
+		     const u8 *req_ie, size_t req_ie_len,
+		     const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);
+
+/**
+ * cfg80211_disconnected - notify cfg80211 that connection was dropped
+ *
+ * @dev: network device
+ * @ie: information elements of the deauth/disassoc frame (may be %NULL)
+ * @ie_len: length of IEs
+ * @reason: reason code for the disconnection, set it to 0 if unknown
+ * @gfp: allocation flags
+ *
+ * After it calls this function, the driver should enter an idle state
+ * and not try to connect to any AP any more.
+ */
+void cfg80211_disconnected(struct net_device *dev, u16 reason,
+			   u8 *ie, size_t ie_len, gfp_t gfp);
+
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c34c1a41019a..03de4024597a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1262,7 +1262,7 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 		sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
 	}
 
-	if (req->control_port)
+	if (req->crypto.control_port)
 		sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT;
 	else
 		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index f78c4832a9ca..750c08e31b10 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
 obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
 obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
-cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o
+cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o sme.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
 cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d41b7412b212..314e00f70e3b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -546,6 +546,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 				"symlink to netdev!\n");
 		}
 		wdev->netdev = dev;
+		wdev->sme_state = CFG80211_SME_IDLE;
 #ifdef CONFIG_WIRELESS_EXT
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
@@ -553,11 +554,20 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		mutex_unlock(&rdev->devlist_mtx);
 		break;
 	case NETDEV_GOING_DOWN:
-		if (wdev->iftype != NL80211_IFTYPE_ADHOC)
-			break;
 		if (!wdev->ssid_len)
 			break;
-		cfg80211_leave_ibss(rdev, dev, true);
+
+		switch (wdev->iftype) {
+		case NL80211_IFTYPE_ADHOC:
+			cfg80211_leave_ibss(rdev, dev, true);
+			break;
+		case NL80211_IFTYPE_STATION:
+			cfg80211_disconnect(rdev, dev,
+					    WLAN_REASON_DEAUTH_LEAVING);
+			break;
+		default:
+			break;
+		}
 		break;
 	case NETDEV_UP:
 #ifdef CONFIG_WIRELESS_EXT
diff --git a/net/wireless/core.h b/net/wireless/core.h
index bc084b68865c..f93f96f85d2b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -174,6 +174,13 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, bool nowext);
 
+/* SME */
+int cfg80211_connect(struct cfg80211_registered_device *rdev,
+		     struct net_device *dev,
+		     struct cfg80211_connect_params *connect);
+int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
+			struct net_device *dev, u16 reason);
+
 /* internal helpers */
 int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 				   const u8 *mac_addr);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bb8de268a6bf..89dd3793e03c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -128,6 +128,9 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 		.len = sizeof(struct nl80211_sta_flag_update),
 	},
 	[NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
+	[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
+	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 };
 
 /* IE validation */
@@ -347,6 +350,17 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(join_ibss, JOIN_IBSS);
 
 #undef CMD
+
+	if (dev->ops->connect) {
+		i++;
+		NLA_PUT_U32(msg, i, NL80211_CMD_CONNECT);
+	}
+
+	if (dev->ops->disconnect) {
+		i++;
+		NLA_PUT_U32(msg, i, NL80211_CMD_DISCONNECT);
+	}
+
 	nla_nest_end(msg, nl_cmds);
 
 	return genlmsg_end(msg, hdr);
@@ -3001,12 +3015,31 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 
 static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type)
 {
-	return auth_type == NL80211_AUTHTYPE_OPEN_SYSTEM ||
-		auth_type == NL80211_AUTHTYPE_SHARED_KEY ||
-		auth_type == NL80211_AUTHTYPE_FT ||
-		auth_type == NL80211_AUTHTYPE_NETWORK_EAP;
+	return auth_type <= NL80211_AUTHTYPE_MAX;
+}
+
+static bool nl80211_valid_wpa_versions(u32 wpa_versions)
+{
+	return !(wpa_versions & ~(NL80211_WPA_VERSION_1 |
+				  NL80211_WPA_VERSION_2));
+}
+
+static bool nl80211_valid_akm_suite(u32 akm)
+{
+	return akm == WLAN_AKM_SUITE_8021X ||
+		akm == WLAN_AKM_SUITE_PSK;
+}
+
+static bool nl80211_valid_cipher_suite(u32 cipher)
+{
+	return cipher == WLAN_CIPHER_SUITE_WEP40 ||
+		cipher == WLAN_CIPHER_SUITE_WEP104 ||
+		cipher == WLAN_CIPHER_SUITE_TKIP ||
+		cipher == WLAN_CIPHER_SUITE_CCMP ||
+		cipher == WLAN_CIPHER_SUITE_AES_CMAC;
 }
 
+
 static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
@@ -3086,6 +3119,68 @@ unlock_rtnl:
 	return err;
 }
 
+static int nl80211_crypto_settings(struct genl_info *info,
+				   struct cfg80211_crypto_settings *settings)
+{
+	settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
+
+	if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
+		void *data;
+		int len, i;
+
+		data = nla_data(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]);
+		len = nla_len(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]);
+		settings->n_ciphers_pairwise = len / sizeof(u32);
+
+		if (len % sizeof(u32))
+			return -EINVAL;
+
+		if (settings->n_ciphers_pairwise > NL80211_MAX_NR_CIPHER_SUITES)
+			return -EINVAL;
+
+		memcpy(settings->ciphers_pairwise, data, len);
+
+		for (i = 0; i < settings->n_ciphers_pairwise; i++)
+			if (!nl80211_valid_cipher_suite(
+					settings->ciphers_pairwise[i]))
+				return -EINVAL;
+	}
+
+	if (info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]) {
+		settings->cipher_group =
+			nla_get_u32(info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]);
+		if (!nl80211_valid_cipher_suite(settings->cipher_group))
+			return -EINVAL;
+	}
+
+	if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) {
+		settings->wpa_versions =
+			nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]);
+		if (!nl80211_valid_wpa_versions(settings->wpa_versions))
+			return -EINVAL;
+	}
+
+	if (info->attrs[NL80211_ATTR_AKM_SUITES]) {
+		void *data;
+		int len, i;
+
+		data = nla_data(info->attrs[NL80211_ATTR_AKM_SUITES]);
+		len = nla_len(info->attrs[NL80211_ATTR_AKM_SUITES]);
+		settings->n_akm_suites = len / sizeof(u32);
+
+		if (len % sizeof(u32))
+			return -EINVAL;
+
+		memcpy(settings->akm_suites, data, len);
+
+		for (i = 0; i < settings->n_ciphers_pairwise; i++)
+			if (!nl80211_valid_akm_suite(settings->akm_suites[i]))
+				return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
@@ -3156,9 +3251,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	req.control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
-
-	err = drv->ops->assoc(&drv->wiphy, dev, &req);
+	err = nl80211_crypto_settings(info, &req.crypto);
+	if (!err)
+		err = drv->ops->assoc(&drv->wiphy, dev, &req);
 
 out:
 	cfg80211_put_dev(drv);
@@ -3538,6 +3633,130 @@ void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 EXPORT_SYMBOL(cfg80211_testmode_event);
 #endif
 
+static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	struct cfg80211_connect_params connect;
+	struct wiphy *wiphy;
+	int err;
+
+	memset(&connect, 0, sizeof(connect));
+
+	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_SSID] ||
+	    !nla_len(info->attrs[NL80211_ATTR_SSID]))
+		return -EINVAL;
+
+	if (info->attrs[NL80211_ATTR_AUTH_TYPE]) {
+		connect.auth_type =
+			nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
+		if (!nl80211_valid_auth_type(connect.auth_type))
+			return -EINVAL;
+	} else
+		connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+
+	connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
+
+	err = nl80211_crypto_settings(info, &connect.crypto);
+	if (err)
+		return err;
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	wiphy = &drv->wiphy;
+
+	connect.bssid = NULL;
+	connect.channel = NULL;
+	connect.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM;
+
+	if (info->attrs[NL80211_ATTR_MAC])
+		connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+	connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+
+	if (info->attrs[NL80211_ATTR_IE]) {
+		connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+	}
+
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
+		connect.channel =
+			ieee80211_get_channel(wiphy,
+			    nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+		if (!connect.channel ||
+		    connect.channel->flags & IEEE80211_CHAN_DISABLED) {
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	err = cfg80211_connect(drv, dev, &connect);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
+static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *drv;
+	struct net_device *dev;
+	int err;
+	u16 reason;
+
+	if (!info->attrs[NL80211_ATTR_REASON_CODE])
+		reason = WLAN_REASON_DEAUTH_LEAVING;
+	else
+		reason = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+
+	if (reason == 0)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!netif_running(dev)) {
+		err = -ENETDOWN;
+		goto out;
+	}
+
+	err = cfg80211_disconnect(drv, dev, reason);
+
+out:
+	cfg80211_put_dev(drv);
+	dev_put(dev);
+unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -3759,6 +3978,18 @@ static struct genl_ops nl80211_ops[] = {
 		.flags = GENL_ADMIN_PERM,
 	},
 #endif
+	{
+		.cmd = NL80211_CMD_CONNECT,
+		.doit = nl80211_connect,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NL80211_CMD_DISCONNECT,
+		.doit = nl80211_disconnect,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
@@ -4077,6 +4308,129 @@ void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
 				  addr, gfp);
 }
 
+void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev, const u8 *bssid,
+				 const u8 *req_ie, size_t req_ie_len,
+				 const u8 *resp_ie, size_t resp_ie_len,
+				 u16 status, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONNECT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	if (bssid)
+		NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid);
+	NLA_PUT_U16(msg, NL80211_ATTR_STATUS_CODE, status);
+	if (req_ie)
+		NLA_PUT(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie);
+	if (resp_ie)
+		NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+
+}
+
+void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
+			 struct net_device *netdev, const u8 *bssid,
+			 const u8 *req_ie, size_t req_ie_len,
+			 const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ROAM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid);
+	if (req_ie)
+		NLA_PUT(msg, NL80211_ATTR_REQ_IE, req_ie_len, req_ie);
+	if (resp_ie)
+		NLA_PUT(msg, NL80211_ATTR_RESP_IE, resp_ie_len, resp_ie);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+
+}
+
+void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev, u16 reason,
+			       u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DISCONNECT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+	if (from_ap && reason)
+		NLA_PUT_U16(msg, NL80211_ATTR_REASON_CODE, reason);
+	if (from_ap)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_DISCONNECTED_BY_AP);
+	if (ie)
+		NLA_PUT(msg, NL80211_ATTR_IE, ie_len, ie);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+
+}
+
 void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, const u8 *bssid,
 			     gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 662c216e8d4f..cf3708b48c29 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -31,6 +31,19 @@ void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev,
 void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev,
 				struct net_device *netdev,
 				const u8 *addr, gfp_t gfp);
+void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
+				 struct net_device *netdev, const u8 *bssid,
+				 const u8 *req_ie, size_t req_ie_len,
+				 const u8 *resp_ie, size_t resp_ie_len,
+				 u16 status, gfp_t gfp);
+void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
+			 struct net_device *netdev, const u8 *bssid,
+			 const u8 *req_ie, size_t req_ie_len,
+			 const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);
+void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
+			       struct net_device *netdev, u16 reason,
+			       u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp);
+
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 			    struct net_device *netdev, const u8 *addr,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
new file mode 100644
index 000000000000..fc117031d0bb
--- /dev/null
+++ b/net/wireless/sme.c
@@ -0,0 +1,224 @@
+/*
+ * SME code for cfg80211's connect emulation.
+ *
+ * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2009   Intel Corporation. All rights reserved.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/workqueue.h>
+#include <net/cfg80211.h>
+#include <net/rtnetlink.h>
+#include "nl80211.h"
+
+
+void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+			     const u8 *req_ie, size_t req_ie_len,
+			     const u8 *resp_ie, size_t resp_ie_len,
+			     u16 status, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_bss *bss;
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+#endif
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return;
+
+	if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING))
+		return;
+
+	if (wdev->current_bss) {
+		cfg80211_unhold_bss(wdev->current_bss);
+		cfg80211_put_bss(wdev->current_bss);
+		wdev->current_bss = NULL;
+	}
+
+	if (status == WLAN_STATUS_SUCCESS) {
+		bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
+				       wdev->ssid, wdev->ssid_len,
+				       WLAN_CAPABILITY_ESS,
+				       WLAN_CAPABILITY_ESS);
+
+		if (WARN_ON(!bss))
+			return;
+
+		cfg80211_hold_bss(bss);
+		wdev->current_bss = bss;
+
+		wdev->sme_state = CFG80211_SME_CONNECTED;
+	} else {
+		wdev->sme_state = CFG80211_SME_IDLE;
+	}
+
+	nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid,
+				    req_ie, req_ie_len, resp_ie, resp_ie_len,
+				    status, gfp);
+
+#ifdef CONFIG_WIRELESS_EXT
+	if (req_ie && status == WLAN_STATUS_SUCCESS) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = req_ie_len;
+		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie);
+	}
+
+	if (resp_ie && status == WLAN_STATUS_SUCCESS) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = resp_ie_len;
+		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+	}
+
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	if (bssid)
+		memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+#endif
+}
+EXPORT_SYMBOL(cfg80211_connect_result);
+
+void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
+		     const u8 *req_ie, size_t req_ie_len,
+		     const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_bss *bss;
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+#endif
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return;
+
+	if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED))
+		return;
+
+	/* internal error -- how did we get to CONNECTED w/o BSS? */
+	if (WARN_ON(!wdev->current_bss)) {
+		return;
+	}
+
+	cfg80211_unhold_bss(wdev->current_bss);
+	cfg80211_put_bss(wdev->current_bss);
+	wdev->current_bss = NULL;
+
+	bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
+			       wdev->ssid, wdev->ssid_len,
+			       WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+
+	if (WARN_ON(!bss))
+		return;
+
+	cfg80211_hold_bss(bss);
+	wdev->current_bss = bss;
+
+	nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), dev, bssid,
+			    req_ie, req_ie_len, resp_ie, resp_ie_len, gfp);
+
+#ifdef CONFIG_WIRELESS_EXT
+	if (req_ie) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = req_ie_len;
+		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie);
+	}
+
+	if (resp_ie) {
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = resp_ie_len;
+		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+	}
+
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+#endif
+}
+EXPORT_SYMBOL(cfg80211_roamed);
+
+static void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp,
+				    u8 *ie, size_t ie_len, u16 reason,
+				    bool from_ap)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+#ifdef CONFIG_WIRELESS_EXT
+	union iwreq_data wrqu;
+#endif
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return;
+
+	if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTED))
+		return;
+
+	if (wdev->current_bss) {
+		cfg80211_unhold_bss(wdev->current_bss);
+		cfg80211_put_bss(wdev->current_bss);
+	}
+
+	wdev->current_bss = NULL;
+	wdev->sme_state = CFG80211_SME_IDLE;
+
+	nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev,
+				  reason, ie, ie_len, from_ap, gfp);
+
+#ifdef CONFIG_WIRELESS_EXT
+	memset(&wrqu, 0, sizeof(wrqu));
+	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+#endif
+}
+
+void cfg80211_disconnected(struct net_device *dev, u16 reason,
+			   u8 *ie, size_t ie_len, gfp_t gfp)
+{
+	__cfg80211_disconnected(dev, reason, ie, ie_len, true, gfp);
+}
+EXPORT_SYMBOL(cfg80211_disconnected);
+
+int cfg80211_connect(struct cfg80211_registered_device *rdev,
+		     struct net_device *dev,
+		     struct cfg80211_connect_params *connect)
+{
+	int err;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (wdev->sme_state != CFG80211_SME_IDLE)
+		return -EALREADY;
+
+	if (!rdev->ops->connect) {
+		return -EOPNOTSUPP;
+	} else {
+		wdev->sme_state = CFG80211_SME_CONNECTING;
+		err = rdev->ops->connect(&rdev->wiphy, dev, connect);
+		if (err) {
+			wdev->sme_state = CFG80211_SME_IDLE;
+			return err;
+		}
+	}
+
+	memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
+	wdev->ssid_len = connect->ssid_len;
+
+	return 0;
+}
+
+int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
+			struct net_device *dev, u16 reason)
+{
+	int err;
+
+	if (!rdev->ops->disconnect) {
+		return -EOPNOTSUPP;
+	} else {
+		err = rdev->ops->disconnect(&rdev->wiphy, dev, reason);
+		if (err)
+			return err;
+	}
+
+	__cfg80211_disconnected(dev, 0, NULL, 0, false, GFP_KERNEL);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 6829c878ecd24ff0ae41b4668c7e9d0f11b66942 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 2 Jul 2009 09:13:27 +0200
Subject: cfg80211: emulate connect with auth/assoc

This adds code to cfg80211 so that drivers (mac80211 right
now) that don't implement connect but rather auth/assoc can
still be used with the nl80211 connect command. This will
also be necessary for the wext compat code.

Signed-off-by: Samuel Ortiz <samuel.ortiz@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |   6 +-
 net/wireless/core.c    |   7 +
 net/wireless/core.h    |   8 +
 net/wireless/mlme.c    |  79 ++++++++--
 net/wireless/nl80211.c |   4 +-
 net/wireless/scan.c    |   7 +
 net/wireless/sme.c     | 395 +++++++++++++++++++++++++++++++++++++++++++++++--
 7 files changed, 474 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 68e11321ed74..24fab439d415 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1209,6 +1209,9 @@ extern void wiphy_unregister(struct wiphy *wiphy);
  */
 extern void wiphy_free(struct wiphy *wiphy);
 
+/* internal struct */
+struct cfg80211_conn;
+
 /**
  * struct wireless_dev - wireless per-netdev state
  *
@@ -1242,9 +1245,10 @@ struct wireless_dev {
 	u8 ssid_len;
 	enum {
 		CFG80211_SME_IDLE,
-		CFG80211_SME_CONNECTING, /* ->connect called */
+		CFG80211_SME_CONNECTING,
 		CFG80211_SME_CONNECTED,
 	} sme_state;
+	struct cfg80211_conn *conn;
 
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 314e00f70e3b..a0a679704612 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -321,6 +321,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	}
 
 	INIT_WORK(&drv->rfkill_sync, cfg80211_rfkill_sync_work);
+	INIT_WORK(&drv->conn_work, cfg80211_conn_work);
 
 	/*
 	 * Initialize wiphy parameters to IEEE 802.11 MIB default values.
@@ -481,6 +482,8 @@ void wiphy_unregister(struct wiphy *wiphy)
 	/* unlock again before freeing */
 	mutex_unlock(&drv->mtx);
 
+	cancel_work_sync(&drv->conn_work);
+
 	cfg80211_debugfs_drv_del(drv);
 
 	/* If this device got a regulatory hint tell core its
@@ -569,6 +572,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			break;
 		}
 		break;
+	case NETDEV_DOWN:
+		kfree(wdev->conn);
+		wdev->conn = NULL;
+		break;
 	case NETDEV_UP:
 #ifdef CONFIG_WIRELESS_EXT
 		if (wdev->iftype != NL80211_IFTYPE_ADHOC)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f93f96f85d2b..2c0f64252f3d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -62,6 +62,8 @@ struct cfg80211_registered_device {
 	struct genl_info *testmode_info;
 #endif
 
+	struct work_struct conn_work;
+
 #ifdef CONFIG_CFG80211_DEBUGFS
 	/* Debugfs entries */
 	struct wiphy_debugfsdentries {
@@ -181,8 +183,14 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, u16 reason);
 
+void cfg80211_conn_work(struct work_struct *work);
+
 /* internal helpers */
 int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 				   const u8 *mac_addr);
+void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
+			     size_t ie_len, u16 reason, bool from_ap);
+void cfg80211_sme_scan_done(struct net_device *dev);
+void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index c4e6d4b84a46..3427fe73d3c3 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -16,58 +16,105 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gf
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
 	nl80211_send_rx_auth(rdev, dev, buf, len, gfp);
+	cfg80211_sme_rx_auth(dev, buf, len);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_auth);
 
 void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	u16 status_code;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+	u8 *ie = mgmt->u.assoc_resp.variable;
+	int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
+
+	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
+
 	nl80211_send_rx_assoc(rdev, dev, buf, len, gfp);
+
+	cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
+				status_code, gfp);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
 void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+
 	nl80211_send_deauth(rdev, dev, buf, len, gfp);
+
+	if (wdev->sme_state == CFG80211_SME_CONNECTED) {
+		u16 reason_code;
+		bool from_ap;
+
+		reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
+
+		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
+		__cfg80211_disconnected(dev, gfp, NULL, 0,
+					reason_code, from_ap);
+
+		wdev->sme_state = CFG80211_SME_IDLE;
+	} else if (wdev->sme_state == CFG80211_SME_CONNECTING) {
+		cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+	}
 }
 EXPORT_SYMBOL(cfg80211_send_deauth);
 
 void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+
 	nl80211_send_disassoc(rdev, dev, buf, len, gfp);
-}
-EXPORT_SYMBOL(cfg80211_send_disassoc);
 
-static void cfg80211_wext_disconnected(struct net_device *dev)
-{
-#ifdef CONFIG_WIRELESS_EXT
-	union iwreq_data wrqu;
-	memset(&wrqu, 0, sizeof(wrqu));
-	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
-#endif
+	if (wdev->sme_state == CFG80211_SME_CONNECTED) {
+		u16 reason_code;
+		bool from_ap;
+
+		reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
+
+		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
+		__cfg80211_disconnected(dev, gfp, NULL, 0,
+					reason_code, from_ap);
+
+		wdev->sme_state = CFG80211_SME_IDLE;
+	}
 }
+EXPORT_SYMBOL(cfg80211_send_disassoc);
 
 void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	nl80211_send_auth_timeout(rdev, dev, addr, gfp);
-	cfg80211_wext_disconnected(dev);
+	if (wdev->sme_state == CFG80211_SME_CONNECTING)
+		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+	wdev->sme_state = CFG80211_SME_IDLE;
 }
 EXPORT_SYMBOL(cfg80211_send_auth_timeout);
 
 void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	nl80211_send_assoc_timeout(rdev, dev, addr, gfp);
-	cfg80211_wext_disconnected(dev);
+	if (wdev->sme_state == CFG80211_SME_CONNECTING)
+		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+	wdev->sme_state = CFG80211_SME_IDLE;
 }
 EXPORT_SYMBOL(cfg80211_send_assoc_timeout);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 89dd3793e03c..89aa9e781d10 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -351,12 +351,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 #undef CMD
 
-	if (dev->ops->connect) {
+	if (dev->ops->connect || dev->ops->auth) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_CONNECT);
 	}
 
-	if (dev->ops->disconnect) {
+	if (dev->ops->disconnect || dev->ops->deauth) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_DISCONNECT);
 	}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 261a06386822..82b33e708488 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -30,6 +30,13 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 
 	WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
 
+	/*
+	 * This must be before sending the other events!
+	 * Otherwise, wpa_supplicant gets completely confused with
+	 * wext events.
+	 */
+	cfg80211_sme_scan_done(dev);
+
 	if (aborted)
 		nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev);
 	else
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index fc117031d0bb..3abb04729873 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -12,6 +12,266 @@
 #include <net/rtnetlink.h>
 #include "nl80211.h"
 
+struct cfg80211_conn {
+	struct cfg80211_connect_params params;
+	/* these are sub-states of the _CONNECTING sme_state */
+	enum {
+		CFG80211_CONN_IDLE,
+		CFG80211_CONN_SCANNING,
+		CFG80211_CONN_SCAN_AGAIN,
+		CFG80211_CONN_AUTHENTICATE_NEXT,
+		CFG80211_CONN_AUTHENTICATING,
+		CFG80211_CONN_ASSOCIATE_NEXT,
+		CFG80211_CONN_ASSOCIATING,
+	} state;
+	u8 bssid[ETH_ALEN];
+	u8 *ie;
+	size_t ie_len;
+	bool auto_auth;
+};
+
+
+static int cfg80211_conn_scan(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_scan_request *request;
+	int n_channels, err;
+
+	ASSERT_RTNL();
+
+	if (drv->scan_req)
+		return -EBUSY;
+
+	if (wdev->conn->params.channel) {
+		n_channels = 1;
+	} else {
+		enum ieee80211_band band;
+		n_channels = 0;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			if (!wdev->wiphy->bands[band])
+				continue;
+			n_channels += wdev->wiphy->bands[band]->n_channels;
+		}
+	}
+	request = kzalloc(sizeof(*request) + sizeof(request->ssids[0]) +
+			  sizeof(request->channels[0]) * n_channels,
+			  GFP_KERNEL);
+	if (!request)
+		return -ENOMEM;
+
+	request->channels = (void *)((char *)request + sizeof(*request));
+	if (wdev->conn->params.channel)
+		request->channels[0] = wdev->conn->params.channel;
+	else {
+		int i = 0, j;
+		enum ieee80211_band band;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			if (!wdev->wiphy->bands[band])
+				continue;
+			for (j = 0; j < wdev->wiphy->bands[band]->n_channels;
+			     i++, j++)
+				request->channels[i] =
+					&wdev->wiphy->bands[band]->channels[j];
+		}
+	}
+	request->n_channels = n_channels;
+	request->ssids = (void *)(request->channels + n_channels);
+	request->n_ssids = 1;
+
+	memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
+		wdev->conn->params.ssid_len);
+	request->ssids[0].ssid_len = wdev->conn->params.ssid_len;
+
+	request->ifidx = wdev->netdev->ifindex;
+	request->wiphy = &drv->wiphy;
+
+	drv->scan_req = request;
+
+	err = drv->ops->scan(wdev->wiphy, wdev->netdev, request);
+	if (!err) {
+		wdev->conn->state = CFG80211_CONN_SCANNING;
+		nl80211_send_scan_start(drv, wdev->netdev);
+	} else {
+		drv->scan_req = NULL;
+		kfree(request);
+	}
+	return err;
+}
+
+static int cfg80211_conn_do_work(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
+	union {
+		struct cfg80211_auth_request auth_req;
+		struct cfg80211_assoc_request assoc_req;
+	} u;
+
+	memset(&u, 0, sizeof(u));
+
+	if (!wdev->conn)
+		return 0;
+
+	switch (wdev->conn->state) {
+	case CFG80211_CONN_SCAN_AGAIN:
+		return cfg80211_conn_scan(wdev);
+	case CFG80211_CONN_AUTHENTICATE_NEXT:
+		u.auth_req.chan = wdev->conn->params.channel;
+		u.auth_req.peer_addr = wdev->conn->params.bssid;
+		u.auth_req.ssid = wdev->conn->params.ssid;
+		u.auth_req.ssid_len = wdev->conn->params.ssid_len;
+		u.auth_req.auth_type = wdev->conn->params.auth_type;
+		u.auth_req.ie = NULL;
+		u.auth_req.ie_len = 0;
+		wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
+		BUG_ON(!drv->ops->auth);
+		return drv->ops->auth(wdev->wiphy, wdev->netdev, &u.auth_req);
+	case CFG80211_CONN_ASSOCIATE_NEXT:
+		u.assoc_req.chan = wdev->conn->params.channel;
+		u.assoc_req.peer_addr = wdev->conn->params.bssid;
+		u.assoc_req.ssid = wdev->conn->params.ssid;
+		u.assoc_req.ssid_len = wdev->conn->params.ssid_len;
+		u.assoc_req.ie = wdev->conn->params.ie;
+		u.assoc_req.ie_len = wdev->conn->params.ie_len;
+		u.assoc_req.use_mfp = false;
+		memcpy(&u.assoc_req.crypto, &wdev->conn->params.crypto,
+			sizeof(u.assoc_req.crypto));
+		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
+		BUG_ON(!drv->ops->assoc);
+		return drv->ops->assoc(wdev->wiphy, wdev->netdev,
+					&u.assoc_req);
+	default:
+		return 0;
+	}
+}
+
+void cfg80211_conn_work(struct work_struct *work)
+{
+	struct cfg80211_registered_device *drv =
+		container_of(work, struct cfg80211_registered_device, conn_work);
+	struct wireless_dev *wdev;
+
+	rtnl_lock();
+	mutex_lock(&drv->devlist_mtx);
+
+	list_for_each_entry(wdev, &drv->netdev_list, list) {
+		if (!netif_running(wdev->netdev))
+			continue;
+		if (wdev->sme_state != CFG80211_SME_CONNECTING)
+			continue;
+		if (cfg80211_conn_do_work(wdev))
+			cfg80211_connect_result(wdev->netdev,
+						wdev->conn->params.bssid,
+						NULL, 0, NULL, 0,
+						WLAN_STATUS_UNSPECIFIED_FAILURE,
+						GFP_ATOMIC);
+	}
+
+	mutex_unlock(&drv->devlist_mtx);
+	rtnl_unlock();
+}
+
+static bool cfg80211_get_conn_bss(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_bss *bss;
+	u16 capa = WLAN_CAPABILITY_ESS;
+
+	if (wdev->conn->params.privacy)
+		capa |= WLAN_CAPABILITY_PRIVACY;
+
+	bss = cfg80211_get_bss(wdev->wiphy, NULL, wdev->conn->params.bssid,
+			       wdev->conn->params.ssid,
+			       wdev->conn->params.ssid_len,
+			       WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
+			       capa);
+
+	if (!bss)
+		return false;
+
+	memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
+	wdev->conn->params.bssid = wdev->conn->bssid;
+	wdev->conn->params.channel = bss->channel;
+	wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+	schedule_work(&drv->conn_work);
+
+	cfg80211_put_bss(bss);
+	return true;
+}
+
+void cfg80211_sme_scan_done(struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
+
+	if (wdev->sme_state != CFG80211_SME_CONNECTING)
+		return;
+
+	if (WARN_ON(!wdev->conn))
+		return;
+
+	if (wdev->conn->state != CFG80211_CONN_SCANNING &&
+	    wdev->conn->state != CFG80211_CONN_SCAN_AGAIN)
+		return;
+
+	if (!cfg80211_get_conn_bss(wdev)) {
+		/* not found */
+		if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)
+			schedule_work(&drv->conn_work);
+		else
+			cfg80211_connect_result(dev, wdev->conn->params.bssid,
+						NULL, 0, NULL, 0,
+						WLAN_STATUS_UNSPECIFIED_FAILURE,
+						GFP_ATOMIC);
+		return;
+	}
+}
+
+void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+	u16 status_code = le16_to_cpu(mgmt->u.auth.status_code);
+
+	/* should only RX auth frames when connecting */
+	if (wdev->sme_state != CFG80211_SME_CONNECTING)
+		return;
+
+	if (WARN_ON(!wdev->conn))
+		return;
+
+	if (status_code == WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG &&
+	    wdev->conn->auto_auth &&
+	    wdev->conn->params.auth_type != NL80211_AUTHTYPE_NETWORK_EAP) {
+		/* select automatically between only open, shared, leap */
+		switch (wdev->conn->params.auth_type) {
+		case NL80211_AUTHTYPE_OPEN_SYSTEM:
+			wdev->conn->params.auth_type =
+				NL80211_AUTHTYPE_SHARED_KEY;
+			break;
+		case NL80211_AUTHTYPE_SHARED_KEY:
+			wdev->conn->params.auth_type =
+				NL80211_AUTHTYPE_NETWORK_EAP;
+			break;
+		default:
+			/* huh? */
+			wdev->conn->params.auth_type =
+				NL80211_AUTHTYPE_OPEN_SYSTEM;
+			break;
+		}
+		wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+		schedule_work(&rdev->conn_work);
+	} else if (status_code != WLAN_STATUS_SUCCESS)
+		wdev->sme_state = CFG80211_SME_IDLE;
+	else if (wdev->sme_state == CFG80211_SME_CONNECTING &&
+		 wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
+		wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
+		schedule_work(&rdev->conn_work);
+	}
+}
 
 void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			     const u8 *req_ie, size_t req_ie_len,
@@ -27,7 +287,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return;
 
-	if (WARN_ON(wdev->sme_state != CFG80211_SME_CONNECTING))
+	if (wdev->sme_state != CFG80211_SME_CONNECTING)
 		return;
 
 	if (wdev->current_bss) {
@@ -53,6 +313,9 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 		wdev->sme_state = CFG80211_SME_IDLE;
 	}
 
+	if (wdev->conn)
+		wdev->conn->state = CFG80211_CONN_IDLE;
+
 	nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev, bssid,
 				    req_ie, req_ie_len, resp_ie, resp_ie_len,
 				    status, gfp);
@@ -72,7 +335,7 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 	memset(&wrqu, 0, sizeof(wrqu));
 	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-	if (bssid)
+	if (bssid && status == WLAN_STATUS_SUCCESS)
 		memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 #endif
@@ -138,9 +401,8 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
 }
 EXPORT_SYMBOL(cfg80211_roamed);
 
-static void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp,
-				    u8 *ie, size_t ie_len, u16 reason,
-				    bool from_ap)
+void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
+			     size_t ie_len, u16 reason, bool from_ap)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 #ifdef CONFIG_WIRELESS_EXT
@@ -161,6 +423,11 @@ static void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp,
 	wdev->current_bss = NULL;
 	wdev->sme_state = CFG80211_SME_IDLE;
 
+	if (wdev->conn) {
+		kfree(wdev->conn->ie);
+		wdev->conn->ie = NULL;
+	}
+
 	nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev,
 				  reason, ie, ie_len, from_ap, gfp);
 
@@ -174,7 +441,7 @@ static void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp,
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
 			   u8 *ie, size_t ie_len, gfp_t gfp)
 {
-	__cfg80211_disconnected(dev, reason, ie, ie_len, true, gfp);
+	__cfg80211_disconnected(dev, gfp, ie, ie_len, reason, true);
 }
 EXPORT_SYMBOL(cfg80211_disconnected);
 
@@ -189,7 +456,74 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		return -EALREADY;
 
 	if (!rdev->ops->connect) {
-		return -EOPNOTSUPP;
+		if (!rdev->ops->auth || !rdev->ops->assoc)
+			return -EOPNOTSUPP;
+
+		if (!wdev->conn) {
+			wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
+			if (!wdev->conn)
+				return -ENOMEM;
+		} else
+			memset(wdev->conn, 0, sizeof(*wdev->conn));
+
+		/*
+		 * Copy all parameters, and treat explicitly IEs, BSSID, SSID.
+		 */
+		memcpy(&wdev->conn->params, connect, sizeof(*connect));
+		if (connect->bssid) {
+			wdev->conn->params.bssid = wdev->conn->bssid;
+			memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN);
+		}
+
+		if (connect->ie) {
+			wdev->conn->ie = kmemdup(connect->ie, connect->ie_len,
+						GFP_KERNEL);
+			wdev->conn->params.ie = wdev->conn->ie;
+			if (!wdev->conn->ie)
+				return -ENOMEM;
+		}
+
+		if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) {
+			wdev->conn->auto_auth = true;
+			/* start with open system ... should mostly work */
+			wdev->conn->params.auth_type =
+				NL80211_AUTHTYPE_OPEN_SYSTEM;
+		} else {
+			wdev->conn->auto_auth = false;
+		}
+
+		memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
+		wdev->ssid_len = connect->ssid_len;
+		wdev->conn->params.ssid = wdev->ssid;
+		wdev->conn->params.ssid_len = connect->ssid_len;
+
+		/* don't care about result -- but fill bssid & channel */
+		if (!wdev->conn->params.bssid || !wdev->conn->params.channel)
+			cfg80211_get_conn_bss(wdev);
+
+		wdev->sme_state = CFG80211_SME_CONNECTING;
+
+		/* we're good if we have both BSSID and channel */
+		if (wdev->conn->params.bssid && wdev->conn->params.channel) {
+			wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+			err = cfg80211_conn_do_work(wdev);
+		} else {
+			/* otherwise we'll need to scan for the AP first */
+			err = cfg80211_conn_scan(wdev);
+			/*
+			 * If we can't scan right now, then we need to scan again
+			 * after the current scan finished, since the parameters
+			 * changed (unless we find a good AP anyway).
+			 */
+			if (err == -EBUSY) {
+				err = 0;
+				wdev->conn->state = CFG80211_CONN_SCAN_AGAIN;
+			}
+		}
+		if (err)
+			wdev->sme_state = CFG80211_SME_IDLE;
+
+		return err;
 	} else {
 		wdev->sme_state = CFG80211_SME_CONNECTING;
 		err = rdev->ops->connect(&rdev->wiphy, dev, connect);
@@ -197,28 +531,63 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 			wdev->sme_state = CFG80211_SME_IDLE;
 			return err;
 		}
-	}
 
-	memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
-	wdev->ssid_len = connect->ssid_len;
+		memcpy(wdev->ssid, connect->ssid, connect->ssid_len);
+		wdev->ssid_len = connect->ssid_len;
 
-	return 0;
+		return 0;
+	}
 }
 
 int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, u16 reason)
 {
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
+	if (wdev->sme_state == CFG80211_SME_IDLE)
+		return -EINVAL;
+
 	if (!rdev->ops->disconnect) {
-		return -EOPNOTSUPP;
+		struct cfg80211_deauth_request deauth;
+		u8 bssid[ETH_ALEN];
+
+		/* internal bug. */
+		if (WARN_ON(!wdev->conn))
+			return -EINVAL;
+
+		if (wdev->sme_state == CFG80211_SME_CONNECTING &&
+		    (wdev->conn->state == CFG80211_CONN_SCANNING ||
+		     wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) {
+			wdev->sme_state = CFG80211_SME_IDLE;
+			return 0;
+		}
+
+		if (!rdev->ops->deauth)
+			return -EOPNOTSUPP;
+
+		memset(&deauth, 0, sizeof(deauth));
+
+		/* wdev->conn->params.bssid must be set if > SCANNING */
+		memcpy(bssid, wdev->conn->params.bssid, ETH_ALEN);
+		deauth.peer_addr = bssid;
+		deauth.reason_code = reason;
+
+		err = rdev->ops->deauth(&rdev->wiphy, dev, &deauth);
+		if (err)
+			return err;
 	} else {
 		err = rdev->ops->disconnect(&rdev->wiphy, dev, reason);
 		if (err)
 			return err;
 	}
 
-	__cfg80211_disconnected(dev, 0, NULL, 0, false, GFP_KERNEL);
+	if (wdev->sme_state == CFG80211_SME_CONNECTED)
+		__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0, 0, false);
+	else if (wdev->sme_state == CFG80211_SME_CONNECTING)
+		cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
 
 	return 0;
 }
-- 
cgit v1.2.3


From f21293549f60f88c74fcb9944737f11048896dc4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:56 +0200
Subject: cfg80211: managed mode wext compatibility

This adds code to make it possible to use the cfg80211
connect() API with wireless extensions, and because the
previous patch added emulation of that API with auth()
and assoc(), by extension also supports wext on that.
At the same time, removes code from mac80211 for wext,
but doesn't yet clean up mac80211's mlme code more.

Signed-off-by: Samuel Ortiz <samuel.ortiz@intel.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  36 ++++-
 net/mac80211/mlme.c        |  69 ----------
 net/mac80211/wext.c        | 215 +++--------------------------
 net/wireless/Makefile      |   2 +-
 net/wireless/core.c        |  23 +++-
 net/wireless/core.h        |   3 +-
 net/wireless/nl80211.c     |   2 +-
 net/wireless/sme.c         |  55 +++++---
 net/wireless/wext-compat.c | 229 +++++++++++++++++++++++++------
 net/wireless/wext-sme.c    | 329 +++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 625 insertions(+), 338 deletions(-)
 create mode 100644 net/wireless/wext-sme.c

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 24fab439d415..07085216532d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1253,8 +1253,14 @@ struct wireless_dev {
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
 	struct {
-		struct cfg80211_ibss_params ibss;
+		union {
+			struct cfg80211_ibss_params ibss;
+			struct cfg80211_connect_params connect;
+		};
+		u8 *ie;
+		size_t ie_len;
 		u8 bssid[ETH_ALEN];
+		u8 ssid[IEEE80211_MAX_SSID_LEN];
 		s8 default_key, default_mgmt_key;
 	} wext;
 #endif
@@ -1535,6 +1541,34 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 			     struct iw_request_info *info,
 			     struct sockaddr *ap_addr, char *extra);
 
+int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra);
+int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra);
+int cfg80211_mgd_wext_siwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid);
+int cfg80211_mgd_wext_giwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid);
+int cfg80211_mgd_wext_siwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra);
+int cfg80211_mgd_wext_giwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra);
+int cfg80211_wext_siwgenie(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *extra);
+int cfg80211_wext_siwauth(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *data, char *extra);
+int cfg80211_wext_giwauth(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *data, char *extra);
+
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 58135a5096af..fbb93a70ddc7 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -870,70 +870,6 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 	return changed;
 }
 
-static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata)
-{
-	union iwreq_data wrqu;
-
-	memset(&wrqu, 0, sizeof(wrqu));
-	if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED)
-		memcpy(wrqu.ap_addr.sa_data, sdata->u.mgd.bssid, ETH_ALEN);
-	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-	wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
-}
-
-static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata)
-{
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
-	char *buf;
-	size_t len;
-	int i;
-	union iwreq_data wrqu;
-
-	if (!ifmgd->assocreq_ies && !ifmgd->assocresp_ies)
-		return;
-
-	buf = kmalloc(50 + 2 * (ifmgd->assocreq_ies_len +
-				ifmgd->assocresp_ies_len), GFP_KERNEL);
-	if (!buf)
-		return;
-
-	len = sprintf(buf, "ASSOCINFO(");
-	if (ifmgd->assocreq_ies) {
-		len += sprintf(buf + len, "ReqIEs=");
-		for (i = 0; i < ifmgd->assocreq_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifmgd->assocreq_ies[i]);
-		}
-	}
-	if (ifmgd->assocresp_ies) {
-		if (ifmgd->assocreq_ies)
-			len += sprintf(buf + len, " ");
-		len += sprintf(buf + len, "RespIEs=");
-		for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifmgd->assocresp_ies[i]);
-		}
-	}
-	len += sprintf(buf + len, ")");
-
-	if (len > IW_CUSTOM_MAX) {
-		len = sprintf(buf, "ASSOCRESPIE=");
-		for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
-			len += sprintf(buf + len, "%02x",
-				       ifmgd->assocresp_ies[i]);
-		}
-	}
-
-	if (len <= IW_CUSTOM_MAX) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = len;
-		wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf);
-	}
-
-	kfree(buf);
-}
-
-
 static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 				     u32 bss_info_changed)
 {
@@ -966,7 +902,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET;
 	memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN);
-	ieee80211_sta_send_associnfo(sdata);
 
 	ifmgd->last_probe = jiffies;
 	ieee80211_led_assoc(local, 1);
@@ -993,8 +928,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 
 	netif_tx_start_all_queues(sdata->dev);
 	netif_carrier_on(sdata->dev);
-
-	ieee80211_sta_send_apinfo(sdata);
 }
 
 static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata)
@@ -1147,8 +1080,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	changed |= BSS_CHANGED_ASSOC;
 	sdata->vif.bss_conf.assoc = false;
 
-	ieee80211_sta_send_apinfo(sdata);
-
 	if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) {
 		ifmgd->state = IEEE80211_STA_MLME_DISABLED;
 		ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 1da81f456744..d4e61dc903e8 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -27,29 +27,6 @@
 #include "aes_ccm.h"
 
 
-static int ieee80211_ioctl_siwgenie(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_point *data, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length);
-		if (ret && ret != -EALREADY)
-			return ret;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
-		if (ret != -EALREADY)
-			ieee80211_sta_req_auth(sdata);
-		return 0;
-	}
-
-	return -EOPNOTSUPP;
-}
-
 static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 				   struct iw_request_info *info,
 				   struct iw_freq *freq, char *extra)
@@ -61,16 +38,13 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra);
 	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
+		return cfg80211_mgd_wext_siwfreq(dev, info, freq, extra);
 
 	/* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
 	if (freq->e == 0) {
-		if (freq->m < 0) {
-			if (sdata->vif.type == NL80211_IFTYPE_STATION)
-				sdata->u.mgd.flags |=
-					IEEE80211_STA_AUTO_CHANNEL_SEL;
-			return 0;
-		} else
+		if (freq->m < 0)
+			return -EINVAL;
+		else
 			chan = ieee80211_get_channel(local->hw.wiphy,
 				ieee80211_channel_to_frequency(freq->m));
 	} else {
@@ -95,9 +69,6 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev,
 	if (local->oper_channel == chan)
 		return 0;
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		ieee80211_sta_req_auth(sdata);
-
 	local->oper_channel = chan;
 	local->oper_channel_type = NL80211_CHAN_NO_HT;
 	ieee80211_hw_config(local, 0);
@@ -115,6 +86,8 @@ static int ieee80211_ioctl_giwfreq(struct net_device *dev,
 
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
+	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
 
 	freq->m = local->oper_channel->center_freq;
 	freq->e = 6;
@@ -128,31 +101,11 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
 				    struct iw_point *data, char *ssid)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	size_t len = data->length;
-	int ret;
 
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
-
-	/* iwconfig uses nul termination in SSID.. */
-	if (len > 0 && ssid[len - 1] == '\0')
-		len--;
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		if (data->flags)
-			sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
-		else
-			sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL;
-
-		ret = ieee80211_sta_set_ssid(sdata, ssid, len);
-		if (ret)
-			return ret;
-
-		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
-		ieee80211_sta_req_auth(sdata);
-		return 0;
-	}
+	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		return cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
 
 	return -EOPNOTSUPP;
 }
@@ -162,23 +115,14 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 				    struct iw_request_info *info,
 				    struct iw_point *data, char *ssid)
 {
-	size_t len;
 	struct ieee80211_sub_if_data *sdata;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		int res = ieee80211_sta_get_ssid(sdata, ssid, &len);
-		if (res == 0) {
-			data->length = len;
-			data->flags = 1;
-		} else
-			data->flags = 0;
-		return res;
-	}
+	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		return cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
 
 	return -EOPNOTSUPP;
 }
@@ -193,24 +137,10 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		int ret;
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
 
-		if (is_zero_ether_addr((u8 *) &ap_addr->sa_data))
-			sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL |
-				IEEE80211_STA_AUTO_CHANNEL_SEL;
-		else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
-			sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL;
-		else
-			sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
-		ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
-		if (ret)
-			return ret;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
-		sdata->u.mgd.flags &= ~IEEE80211_STA_CONTROL_PORT;
-		ieee80211_sta_req_auth(sdata);
-		return 0;
-	} else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
+	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
 		/*
 		 * If it is necessary to update the WDS peer address
 		 * while the interface is running, then we need to do
@@ -240,14 +170,10 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
 		return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
 
-	if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-		if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATED) {
-			ap_addr->sa_family = ARPHRD_ETHER;
-			memcpy(&ap_addr->sa_data, sdata->u.mgd.bssid, ETH_ALEN);
-		} else
-			memset(&ap_addr->sa_data, 0, ETH_ALEN);
-		return 0;
-	} else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
+	if (sdata->vif.type == NL80211_IFTYPE_STATION)
+		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
+
+	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
 		ap_addr->sa_family = ARPHRD_ETHER;
 		memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
 		return 0;
@@ -395,85 +321,6 @@ static int ieee80211_ioctl_giwpower(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwauth(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_param *data, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	int ret = 0;
-
-	switch (data->flags & IW_AUTH_INDEX) {
-	case IW_AUTH_WPA_VERSION:
-	case IW_AUTH_CIPHER_GROUP:
-	case IW_AUTH_WPA_ENABLED:
-	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
-	case IW_AUTH_KEY_MGMT:
-	case IW_AUTH_CIPHER_GROUP_MGMT:
-		break;
-	case IW_AUTH_CIPHER_PAIRWISE:
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			if (data->value & (IW_AUTH_CIPHER_WEP40 |
-			    IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP))
-				sdata->u.mgd.flags |=
-					IEEE80211_STA_TKIP_WEP_USED;
-			else
-				sdata->u.mgd.flags &=
-					~IEEE80211_STA_TKIP_WEP_USED;
-		}
-		break;
-	case IW_AUTH_DROP_UNENCRYPTED:
-		sdata->drop_unencrypted = !!data->value;
-		break;
-	case IW_AUTH_PRIVACY_INVOKED:
-		if (sdata->vif.type != NL80211_IFTYPE_STATION)
-			ret = -EINVAL;
-		else {
-			sdata->u.mgd.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
-			/*
-			 * Privacy invoked by wpa_supplicant, store the
-			 * value and allow associating to a protected
-			 * network without having a key up front.
-			 */
-			if (data->value)
-				sdata->u.mgd.flags |=
-					IEEE80211_STA_PRIVACY_INVOKED;
-		}
-		break;
-	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->vif.type == NL80211_IFTYPE_STATION)
-			sdata->u.mgd.auth_algs = data->value;
-		else
-			ret = -EOPNOTSUPP;
-		break;
-	case IW_AUTH_MFP:
-		if (!(sdata->local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) {
-			ret = -EOPNOTSUPP;
-			break;
-		}
-		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
-			switch (data->value) {
-			case IW_AUTH_MFP_DISABLED:
-				sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED;
-				break;
-			case IW_AUTH_MFP_OPTIONAL:
-				sdata->u.mgd.mfp = IEEE80211_MFP_OPTIONAL;
-				break;
-			case IW_AUTH_MFP_REQUIRED:
-				sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED;
-				break;
-			default:
-				ret = -EINVAL;
-			}
-		} else
-			ret = -EOPNOTSUPP;
-		break;
-	default:
-		ret = -EOPNOTSUPP;
-		break;
-	}
-	return ret;
-}
-
 /* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
 static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
 {
@@ -541,28 +388,6 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
 	return wstats;
 }
 
-static int ieee80211_ioctl_giwauth(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_param *data, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	int ret = 0;
-
-	switch (data->flags & IW_AUTH_INDEX) {
-	case IW_AUTH_80211_AUTH_ALG:
-		if (sdata->vif.type == NL80211_IFTYPE_STATION)
-			data->value = sdata->u.mgd.auth_algs;
-		else
-			ret = -EOPNOTSUPP;
-		break;
-	default:
-		ret = -EOPNOTSUPP;
-		break;
-	}
-	return ret;
-}
-
-
 /* Structures to export the Wireless Handlers */
 
 static const iw_handler ieee80211_handler[] =
@@ -615,10 +440,10 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) ieee80211_ioctl_giwpower,		/* SIOCGIWPOWER */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
-	(iw_handler) ieee80211_ioctl_siwgenie,		/* SIOCSIWGENIE */
+	(iw_handler) cfg80211_wext_siwgenie,		/* SIOCSIWGENIE */
 	(iw_handler) NULL,				/* SIOCGIWGENIE */
-	(iw_handler) ieee80211_ioctl_siwauth,		/* SIOCSIWAUTH */
-	(iw_handler) ieee80211_ioctl_giwauth,		/* SIOCGIWAUTH */
+	(iw_handler) cfg80211_wext_siwauth,		/* SIOCSIWAUTH */
+	(iw_handler) cfg80211_wext_giwauth,		/* SIOCGIWAUTH */
 	(iw_handler) cfg80211_wext_siwencodeext,	/* SIOCSIWENCODEEXT */
 	(iw_handler) NULL,				/* SIOCGIWENCODEEXT */
 	(iw_handler) NULL,				/* SIOCSIWPMKSA */
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 750c08e31b10..d74cc77fa57a 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -7,6 +7,6 @@ obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
 
 cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o ibss.o sme.o
 cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
-cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
+cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o wext-sme.o
 
 ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a0a679704612..e2f80dd0e4a6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -553,6 +553,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 #ifdef CONFIG_WIRELESS_EXT
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
 #endif
 		mutex_unlock(&rdev->devlist_mtx);
 		break;
@@ -565,8 +566,13 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_leave_ibss(rdev, dev, true);
 			break;
 		case NL80211_IFTYPE_STATION:
+#ifdef CONFIG_WIRELESS_EXT
+			kfree(wdev->wext.ie);
+			wdev->wext.ie = NULL;
+			wdev->wext.ie_len = 0;
+#endif
 			cfg80211_disconnect(rdev, dev,
-					    WLAN_REASON_DEAUTH_LEAVING);
+					    WLAN_REASON_DEAUTH_LEAVING, true);
 			break;
 		default:
 			break;
@@ -578,11 +584,20 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		break;
 	case NETDEV_UP:
 #ifdef CONFIG_WIRELESS_EXT
-		if (wdev->iftype != NL80211_IFTYPE_ADHOC)
+		switch (wdev->iftype) {
+		case NL80211_IFTYPE_ADHOC:
+			if (wdev->wext.ibss.ssid_len)
+				cfg80211_join_ibss(rdev, dev,
+						   &wdev->wext.ibss);
 			break;
-		if (!wdev->wext.ibss.ssid_len)
+		case NL80211_IFTYPE_STATION:
+			if (wdev->wext.connect.ssid_len)
+				cfg80211_connect(rdev, dev,
+						 &wdev->wext.connect);
+			break;
+		default:
 			break;
-		cfg80211_join_ibss(rdev, dev, &wdev->wext.ibss);
+		}
 #endif
 		break;
 	case NETDEV_UNREGISTER:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 2c0f64252f3d..5209acb0ff7e 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -181,7 +181,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
 		     struct cfg80211_connect_params *connect);
 int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, u16 reason);
+			struct net_device *dev, u16 reason,
+			bool wextev);
 
 void cfg80211_conn_work(struct work_struct *work);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 89aa9e781d10..0008144b354b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3747,7 +3747,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = cfg80211_disconnect(drv, dev, reason);
+	err = cfg80211_disconnect(drv, dev, reason, true);
 
 out:
 	cfg80211_put_dev(drv);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3abb04729873..f272ebf94303 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -273,10 +273,10 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 	}
 }
 
-void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
-			     const u8 *req_ie, size_t req_ie_len,
-			     const u8 *resp_ie, size_t resp_ie_len,
-			     u16 status, gfp_t gfp)
+static void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+				      const u8 *req_ie, size_t req_ie_len,
+				      const u8 *resp_ie, size_t resp_ie_len,
+				      u16 status, bool wextev, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_bss *bss;
@@ -321,25 +321,36 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 				    status, gfp);
 
 #ifdef CONFIG_WIRELESS_EXT
-	if (req_ie && status == WLAN_STATUS_SUCCESS) {
-		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = req_ie_len;
-		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie);
-	}
+	if (wextev) {
+		if (req_ie && status == WLAN_STATUS_SUCCESS) {
+			memset(&wrqu, 0, sizeof(wrqu));
+			wrqu.data.length = req_ie_len;
+			wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie);
+		}
+
+		if (resp_ie && status == WLAN_STATUS_SUCCESS) {
+			memset(&wrqu, 0, sizeof(wrqu));
+			wrqu.data.length = resp_ie_len;
+			wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+		}
 
-	if (resp_ie && status == WLAN_STATUS_SUCCESS) {
 		memset(&wrqu, 0, sizeof(wrqu));
-		wrqu.data.length = resp_ie_len;
-		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+		wrqu.ap_addr.sa_family = ARPHRD_ETHER;
+		if (bssid && status == WLAN_STATUS_SUCCESS)
+			memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+		wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 	}
-
-	memset(&wrqu, 0, sizeof(wrqu));
-	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-	if (bssid && status == WLAN_STATUS_SUCCESS)
-		memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
-	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 #endif
 }
+
+void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+			     const u8 *req_ie, size_t req_ie_len,
+			     const u8 *resp_ie, size_t resp_ie_len,
+			     u16 status, gfp_t gfp)
+{
+	bool wextev = status == WLAN_STATUS_SUCCESS;
+	__cfg80211_connect_result(dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, status, wextev, gfp);
+}
 EXPORT_SYMBOL(cfg80211_connect_result);
 
 void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
@@ -540,7 +551,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 }
 
 int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, u16 reason)
+			struct net_device *dev, u16 reason, bool wextev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
@@ -585,9 +596,9 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state == CFG80211_SME_CONNECTED)
 		__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0, 0, false);
 	else if (wdev->sme_state == CFG80211_SME_CONNECTING)
-		cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE,
-					GFP_KERNEL);
+		__cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0,
+					  WLAN_STATUS_UNSPECIFIED_FAILURE,
+					  wextev, GFP_KERNEL);
 
 	return 0;
 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index cae3b52fba7f..02f052fc1808 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -261,50 +261,6 @@ int cfg80211_wext_giwrange(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwrange);
 
-int cfg80211_wext_siwmlme(struct net_device *dev,
-			  struct iw_request_info *info,
-			  struct iw_point *data, char *extra)
-{
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct iw_mlme *mlme = (struct iw_mlme *)extra;
-	struct cfg80211_registered_device *rdev;
-	union {
-		struct cfg80211_disassoc_request disassoc;
-		struct cfg80211_deauth_request deauth;
-	} cmd;
-
-	if (!wdev)
-		return -EOPNOTSUPP;
-
-	rdev = wiphy_to_dev(wdev->wiphy);
-
-	if (wdev->iftype != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
-	if (mlme->addr.sa_family != ARPHRD_ETHER)
-		return -EINVAL;
-
-	memset(&cmd, 0, sizeof(cmd));
-
-	switch (mlme->cmd) {
-	case IW_MLME_DEAUTH:
-		if (!rdev->ops->deauth)
-			return -EOPNOTSUPP;
-		cmd.deauth.peer_addr = mlme->addr.sa_data;
-		cmd.deauth.reason_code = mlme->reason_code;
-		return rdev->ops->deauth(wdev->wiphy, dev, &cmd.deauth);
-	case IW_MLME_DISASSOC:
-		if (!rdev->ops->disassoc)
-			return -EOPNOTSUPP;
-		cmd.disassoc.peer_addr = mlme->addr.sa_data;
-		cmd.disassoc.reason_code = mlme->reason_code;
-		return rdev->ops->disassoc(wdev->wiphy, dev, &cmd.disassoc);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-EXPORT_SYMBOL_GPL(cfg80211_wext_siwmlme);
-
 
 /**
  * cfg80211_wext_freq - get wext frequency for non-"auto"
@@ -846,3 +802,188 @@ int cfg80211_wext_giwtxpower(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwtxpower);
+
+static int cfg80211_set_auth_alg(struct wireless_dev *wdev,
+				 s32 auth_alg)
+{
+	int nr_alg = 0;
+
+	if (!auth_alg)
+		return -EINVAL;
+
+	if (auth_alg & ~(IW_AUTH_ALG_OPEN_SYSTEM |
+			 IW_AUTH_ALG_SHARED_KEY |
+			 IW_AUTH_ALG_LEAP))
+		return -EINVAL;
+
+	if (auth_alg & IW_AUTH_ALG_OPEN_SYSTEM) {
+		nr_alg++;
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_OPEN_SYSTEM;
+	}
+
+	if (auth_alg & IW_AUTH_ALG_SHARED_KEY) {
+		nr_alg++;
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_SHARED_KEY;
+	}
+
+	if (auth_alg & IW_AUTH_ALG_LEAP) {
+		nr_alg++;
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_NETWORK_EAP;
+	}
+
+	if (nr_alg > 1)
+		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+
+	return 0;
+}
+
+static int cfg80211_set_wpa_version(struct wireless_dev *wdev, u32 wpa_versions)
+{
+	wdev->wext.connect.crypto.wpa_versions = 0;
+
+	if (wpa_versions & ~(IW_AUTH_WPA_VERSION_WPA |
+			     IW_AUTH_WPA_VERSION_WPA2))
+		return -EINVAL;
+
+	if (wpa_versions & IW_AUTH_WPA_VERSION_WPA)
+		wdev->wext.connect.crypto.wpa_versions |=
+			NL80211_WPA_VERSION_1;
+
+	if (wpa_versions & IW_AUTH_WPA_VERSION_WPA2)
+		wdev->wext.connect.crypto.wpa_versions |=
+			NL80211_WPA_VERSION_2;
+
+	return 0;
+}
+
+int cfg80211_set_cipher_group(struct wireless_dev *wdev, u32 cipher)
+{
+	wdev->wext.connect.crypto.cipher_group = 0;
+
+	if (cipher & IW_AUTH_CIPHER_WEP40)
+		wdev->wext.connect.crypto.cipher_group =
+			WLAN_CIPHER_SUITE_WEP40;
+	else if (cipher & IW_AUTH_CIPHER_WEP104)
+		wdev->wext.connect.crypto.cipher_group =
+			WLAN_CIPHER_SUITE_WEP104;
+	else if (cipher & IW_AUTH_CIPHER_TKIP)
+		wdev->wext.connect.crypto.cipher_group =
+			WLAN_CIPHER_SUITE_TKIP;
+	else if (cipher & IW_AUTH_CIPHER_CCMP)
+		wdev->wext.connect.crypto.cipher_group =
+			WLAN_CIPHER_SUITE_CCMP;
+	else if (cipher & IW_AUTH_CIPHER_AES_CMAC)
+		wdev->wext.connect.crypto.cipher_group =
+			WLAN_CIPHER_SUITE_AES_CMAC;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+int cfg80211_set_cipher_pairwise(struct wireless_dev *wdev, u32 cipher)
+{
+	int nr_ciphers = 0;
+	u32 *ciphers_pairwise = wdev->wext.connect.crypto.ciphers_pairwise;
+
+	if (cipher & IW_AUTH_CIPHER_WEP40) {
+		ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP40;
+		nr_ciphers++;
+	}
+
+	if (cipher & IW_AUTH_CIPHER_WEP104) {
+		ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_WEP104;
+		nr_ciphers++;
+	}
+
+	if (cipher & IW_AUTH_CIPHER_TKIP) {
+		ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_TKIP;
+		nr_ciphers++;
+	}
+
+	if (cipher & IW_AUTH_CIPHER_CCMP) {
+		ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_CCMP;
+		nr_ciphers++;
+	}
+
+	if (cipher & IW_AUTH_CIPHER_AES_CMAC) {
+		ciphers_pairwise[nr_ciphers] = WLAN_CIPHER_SUITE_AES_CMAC;
+		nr_ciphers++;
+	}
+
+	BUILD_BUG_ON(NL80211_MAX_NR_CIPHER_SUITES < 5);
+
+	wdev->wext.connect.crypto.n_ciphers_pairwise = nr_ciphers;
+
+	return 0;
+}
+
+
+int cfg80211_set_key_mgt(struct wireless_dev *wdev, u32 key_mgt)
+{
+	int nr_akm_suites = 0;
+
+	if (key_mgt & ~(IW_AUTH_KEY_MGMT_802_1X |
+			IW_AUTH_KEY_MGMT_PSK))
+		return -EINVAL;
+
+	if (key_mgt & IW_AUTH_KEY_MGMT_802_1X) {
+		wdev->wext.connect.crypto.akm_suites[nr_akm_suites] =
+			WLAN_AKM_SUITE_8021X;
+		nr_akm_suites++;
+	}
+
+	if (key_mgt & IW_AUTH_KEY_MGMT_PSK) {
+		wdev->wext.connect.crypto.akm_suites[nr_akm_suites] =
+			WLAN_AKM_SUITE_PSK;
+		nr_akm_suites++;
+	}
+
+	wdev->wext.connect.crypto.n_akm_suites = nr_akm_suites;
+
+	return 0;
+}
+
+int cfg80211_wext_siwauth(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	switch (data->flags & IW_AUTH_INDEX) {
+	case IW_AUTH_PRIVACY_INVOKED:
+		wdev->wext.connect.privacy = data->value;
+		return 0;
+	case IW_AUTH_WPA_VERSION:
+		return cfg80211_set_wpa_version(wdev, data->value);
+	case IW_AUTH_CIPHER_GROUP:
+		return cfg80211_set_cipher_group(wdev, data->value);
+	case IW_AUTH_KEY_MGMT:
+		return cfg80211_set_key_mgt(wdev, data->value);
+	case IW_AUTH_CIPHER_PAIRWISE:
+		return cfg80211_set_cipher_pairwise(wdev, data->value);
+	case IW_AUTH_80211_AUTH_ALG:
+		return cfg80211_set_auth_alg(wdev, data->value);
+	case IW_AUTH_WPA_ENABLED:
+	case IW_AUTH_RX_UNENCRYPTED_EAPOL:
+	case IW_AUTH_DROP_UNENCRYPTED:
+	case IW_AUTH_MFP:
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwauth);
+
+int cfg80211_wext_giwauth(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *data, char *extra)
+{
+	/* XXX: what do we need? */
+
+	return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwauth);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
new file mode 100644
index 000000000000..3b531d572b69
--- /dev/null
+++ b/net/wireless/wext-sme.c
@@ -0,0 +1,329 @@
+/*
+ * cfg80211 wext compat for managed mode.
+ *
+ * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2009   Intel Corporation. All rights reserved.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <net/cfg80211.h>
+#include "nl80211.h"
+
+static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
+				     struct wireless_dev *wdev)
+{
+	int err;
+
+	if (!netif_running(wdev->netdev))
+		return 0;
+
+	wdev->wext.connect.ie = wdev->wext.ie;
+	wdev->wext.connect.ie_len = wdev->wext.ie_len;
+	wdev->wext.connect.privacy = wdev->wext.default_key != -1;
+
+	err = 0;
+	if (wdev->wext.connect.ssid_len != 0)
+		err = cfg80211_connect(rdev, wdev->netdev,
+					&wdev->wext.connect);
+
+	return err;
+}
+
+int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct ieee80211_channel *chan;
+	int err;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	chan = cfg80211_wext_freq(wdev->wiphy, freq);
+	if (chan && IS_ERR(chan))
+		return PTR_ERR(chan);
+
+	if (chan && (chan->flags & IEEE80211_CHAN_DISABLED))
+		return -EINVAL;
+
+	if (wdev->wext.connect.channel == chan)
+		return 0;
+
+	if (wdev->sme_state != CFG80211_SME_IDLE) {
+		bool event = true;
+		/* if SSID set, we'll try right again, avoid event */
+		if (wdev->wext.connect.ssid_len)
+			event = false;
+		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					  dev, WLAN_REASON_DEAUTH_LEAVING,
+					  event);
+		if (err)
+			return err;
+	}
+
+	wdev->wext.connect.channel = chan;
+
+	/* SSID is not set, we just want to switch channel */
+	if (wdev->wext.connect.ssid_len && chan) {
+		if (!rdev->ops->set_channel)
+			return -EOPNOTSUPP;
+
+		return rdev->ops->set_channel(wdev->wiphy, chan,
+					      NL80211_CHAN_NO_HT);
+	}
+
+	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwfreq);
+
+int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct ieee80211_channel *chan = NULL;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	if (wdev->current_bss)
+		chan = wdev->current_bss->channel;
+	else if (wdev->wext.connect.channel)
+		chan = wdev->wext.connect.channel;
+
+	if (chan) {
+		freq->m = chan->center_freq;
+		freq->e = 6;
+		return 0;
+	}
+
+	/* no channel if not joining */
+	return -EINVAL;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwfreq);
+
+int cfg80211_mgd_wext_siwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	size_t len = data->length;
+	int err;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	if (!data->flags)
+		len = 0;
+
+	/* iwconfig uses nul termination in SSID.. */
+	if (len > 0 && ssid[len - 1] == '\0')
+		len--;
+
+	if (wdev->wext.connect.ssid && len &&
+	    len == wdev->wext.connect.ssid_len &&
+	    memcmp(wdev->wext.connect.ssid, ssid, len))
+		return 0;
+
+	if (wdev->sme_state != CFG80211_SME_IDLE) {
+		bool event = true;
+		/* if SSID set now, we'll try to connect, avoid event */
+		if (len)
+			event = false;
+		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					  dev, WLAN_REASON_DEAUTH_LEAVING,
+					  event);
+		if (err)
+			return err;
+	}
+
+	wdev->wext.connect.ssid = wdev->wext.ssid;
+	memcpy(wdev->wext.ssid, ssid, len);
+	wdev->wext.connect.ssid_len = len;
+
+	wdev->wext.connect.crypto.control_port = false;
+
+	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwessid);
+
+int cfg80211_mgd_wext_giwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	data->flags = 0;
+
+	if (wdev->ssid_len) {
+		data->flags = 1;
+		data->length = wdev->ssid_len;
+		memcpy(ssid, wdev->ssid, data->length);
+	} else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) {
+		data->flags = 1;
+		data->length = wdev->wext.connect.ssid_len;
+		memcpy(ssid, wdev->wext.connect.ssid, data->length);
+	} else
+		data->flags = 0;
+
+	return 0;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwessid);
+
+int cfg80211_mgd_wext_siwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	u8 *bssid = ap_addr->sa_data;
+	int err;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	if (ap_addr->sa_family != ARPHRD_ETHER)
+		return -EINVAL;
+
+	/* automatic mode */
+	if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
+		bssid = NULL;
+
+	/* both automatic */
+	if (!bssid && !wdev->wext.connect.bssid)
+		return 0;
+
+	/* fixed already - and no change */
+	if (wdev->wext.connect.bssid && bssid &&
+	    compare_ether_addr(bssid, wdev->wext.connect.bssid) == 0)
+		return 0;
+
+	if (wdev->sme_state != CFG80211_SME_IDLE) {
+		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					  dev, WLAN_REASON_DEAUTH_LEAVING,
+					  false);
+		if (err)
+			return err;
+	}
+
+	if (bssid) {
+		memcpy(wdev->wext.bssid, bssid, ETH_ALEN);
+		wdev->wext.connect.bssid = wdev->wext.bssid;
+	} else
+		wdev->wext.connect.bssid = NULL;
+
+	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwap);
+
+int cfg80211_mgd_wext_giwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	/* call only for station! */
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+		return -EINVAL;
+
+	ap_addr->sa_family = ARPHRD_ETHER;
+
+	if (wdev->current_bss)
+		memcpy(ap_addr->sa_data, wdev->current_bss->bssid, ETH_ALEN);
+	else if (wdev->wext.connect.bssid)
+		memcpy(ap_addr->sa_data, wdev->wext.connect.bssid, ETH_ALEN);
+	else
+		memset(ap_addr->sa_data, 0, ETH_ALEN);
+
+	return 0;
+}
+/* temporary symbol - mark GPL - in the future the handler won't be */
+EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwap);
+
+int cfg80211_wext_siwgenie(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	u8 *ie = extra;
+	int ie_len = data->length, err;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	if (!ie_len)
+		ie = NULL;
+
+	/* no change */
+	if (wdev->wext.ie_len == ie_len &&
+	    memcmp(wdev->wext.ie, ie, ie_len) == 0)
+		return 0;
+
+	if (ie_len) {
+		ie = kmemdup(extra, ie_len, GFP_KERNEL);
+		if (!ie)
+			return -ENOMEM;
+	} else
+		ie = NULL;
+
+	kfree(wdev->wext.ie);
+	wdev->wext.ie = ie;
+	wdev->wext.ie_len = ie_len;
+
+	if (wdev->sme_state != CFG80211_SME_IDLE) {
+		err = cfg80211_disconnect(rdev, dev,
+					  WLAN_REASON_DEAUTH_LEAVING, false);
+		if (err)
+			return err;
+	}
+
+	/* userspace better not think we'll reconnect */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwgenie);
+
+int cfg80211_wext_siwmlme(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_point *data, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct iw_mlme *mlme = (struct iw_mlme *)extra;
+	struct cfg80211_registered_device *rdev;
+
+	if (!wdev)
+		return -EOPNOTSUPP;
+
+	rdev = wiphy_to_dev(wdev->wiphy);
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EINVAL;
+
+	if (mlme->addr.sa_family != ARPHRD_ETHER)
+		return -EINVAL;
+
+	switch (mlme->cmd) {
+	case IW_MLME_DEAUTH:
+	case IW_MLME_DISASSOC:
+		return cfg80211_disconnect(rdev, dev, mlme->reason_code,
+					   true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwmlme);
-- 
cgit v1.2.3


From bc92afd92088ab41223383cc6863ab4792533c54 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:57 +0200
Subject: cfg80211: implement iwpower

Just on/off and timeout, and with a hacky cfg80211 method
until we figure out what we want, though this is probably
sufficient as we want to use pm_qos for wifi everywhere.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/cfg80211.c | 22 +++++++++
 drivers/net/wireless/iwmc3200wifi/wext.c     | 47 +------------------
 include/net/cfg80211.h                       | 13 ++++++
 net/mac80211/Kconfig                         | 16 -------
 net/mac80211/cfg.c                           | 26 +++++++++++
 net/mac80211/mlme.c                          |  5 --
 net/mac80211/wext.c                          | 70 +---------------------------
 net/wireless/Kconfig                         | 16 +++++++
 net/wireless/core.c                          | 11 ++++-
 net/wireless/wext-compat.c                   | 60 ++++++++++++++++++++++++
 10 files changed, 151 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
index d0629d4757d7..54bebba8e27e 100644
--- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c
+++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c
@@ -522,6 +522,27 @@ static int iwm_cfg80211_get_txpower(struct wiphy *wiphy, int *dbm)
 	return 0;
 }
 
+static int iwm_cfg80211_set_power_mgmt(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       bool enabled, int timeout)
+{
+	struct iwm_priv *iwm = wiphy_to_iwm(wiphy);
+	u32 power_index;
+
+	if (enabled)
+		power_index = IWM_POWER_INDEX_DEFAULT;
+	else
+		power_index = IWM_POWER_INDEX_MIN;
+
+	if (power_index == iwm->conf.power_index)
+		return 0;
+
+	iwm->conf.power_index = power_index;
+
+	return iwm_umac_set_config_fix(iwm, UMAC_PARAM_TBL_CFG_FIX,
+				       CFG_POWER_INDEX, iwm->conf.power_index);
+}
+
 static struct cfg80211_ops iwm_cfg80211_ops = {
 	.change_virtual_intf = iwm_cfg80211_change_iface,
 	.add_key = iwm_cfg80211_add_key,
@@ -534,6 +555,7 @@ static struct cfg80211_ops iwm_cfg80211_ops = {
 	.leave_ibss = iwm_cfg80211_leave_ibss,
 	.set_tx_power = iwm_cfg80211_set_txpower,
 	.get_tx_power = iwm_cfg80211_get_txpower,
+	.set_power_mgmt = iwm_cfg80211_set_power_mgmt,
 };
 
 struct wireless_dev *iwm_wdev_alloc(int sizeof_bus, struct device *dev)
diff --git a/drivers/net/wireless/iwmc3200wifi/wext.c b/drivers/net/wireless/iwmc3200wifi/wext.c
index 973457383c11..2e7eaf96cf93 100644
--- a/drivers/net/wireless/iwmc3200wifi/wext.c
+++ b/drivers/net/wireless/iwmc3200wifi/wext.c
@@ -238,49 +238,6 @@ static int iwm_set_wpa_version(struct iwm_priv *iwm, u8 wpa_version)
 	return 0;
 }
 
-static int iwm_wext_siwpower(struct net_device *dev,
-			     struct iw_request_info *info,
-			     struct iw_param *wrq, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-	u32 power_index;
-
-	if (wrq->disabled) {
-		power_index = IWM_POWER_INDEX_MIN;
-		goto set;
-	} else
-		power_index = IWM_POWER_INDEX_DEFAULT;
-
-	switch (wrq->flags & IW_POWER_MODE) {
-	case IW_POWER_ON:
-	case IW_POWER_MODE:
-	case IW_POWER_ALL_R:
-		break;
-	default:
-		return -EINVAL;
-	}
-
- set:
-	if (power_index == iwm->conf.power_index)
-		return 0;
-
-	iwm->conf.power_index = power_index;
-
-	return iwm_umac_set_config_fix(iwm, UMAC_PARAM_TBL_CFG_FIX,
-				       CFG_POWER_INDEX, iwm->conf.power_index);
-}
-
-static int iwm_wext_giwpower(struct net_device *dev,
-			     struct iw_request_info *info,
-			     union iwreq_data *wrqu, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	wrqu->power.disabled = (iwm->conf.power_index == IWM_POWER_INDEX_MIN);
-
-	return 0;
-}
-
 static int iwm_set_key_mgt(struct iwm_priv *iwm, u8 key_mgt)
 {
 	u8 *auth_type = &iwm->umac_profile->sec.auth_type;
@@ -458,8 +415,8 @@ static const iw_handler iwm_handlers[] =
 	(iw_handler) NULL,				/* SIOCGIWRETRY */
 	(iw_handler) cfg80211_wext_siwencode,		/* SIOCSIWENCODE */
 	(iw_handler) cfg80211_wext_giwencode,		/* SIOCGIWENCODE */
-	(iw_handler) iwm_wext_siwpower,			/* SIOCSIWPOWER */
-	(iw_handler) iwm_wext_giwpower,			/* SIOCGIWPOWER */
+	(iw_handler) cfg80211_wext_siwpower,		/* SIOCSIWPOWER */
+	(iw_handler) cfg80211_wext_giwpower,		/* SIOCGIWPOWER */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,                              /* SIOCSIWGENIE */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 07085216532d..82b7d804f6da 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1023,6 +1023,10 @@ struct cfg80211_ops {
 #ifdef CONFIG_NL80211_TESTMODE
 	int	(*testmode_cmd)(struct wiphy *wiphy, void *data, int len);
 #endif
+
+	/* some temporary stuff to finish wext */
+	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
+				  bool enabled, int timeout);
 };
 
 /*
@@ -1262,6 +1266,8 @@ struct wireless_dev {
 		u8 bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
 		s8 default_key, default_mgmt_key;
+		bool ps;
+		int ps_timeout;
 	} wext;
 #endif
 };
@@ -1606,6 +1612,13 @@ int cfg80211_wext_giwtxpower(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *data, char *keybuf);
 
+int cfg80211_wext_siwpower(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *wrq, char *extra);
+int cfg80211_wext_giwpower(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *wrq, char *extra);
+
 /*
  * callbacks for asynchronous cfg80211 methods, notification
  * functions and BSS handling helpers
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index ba2643a43c73..41a32cd919af 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -14,22 +14,6 @@ config MAC80211
 comment "CFG80211 needs to be enabled for MAC80211"
 	depends on CFG80211=n
 
-config MAC80211_DEFAULT_PS
-	bool "enable powersave by default"
-	depends on MAC80211
-	default y
-	help
-	  This option enables powersave mode by default.
-
-	  If this causes your applications to misbehave you should fix your
-	  applications instead -- they need to register their network
-	  latency requirement, see Documentation/power/pm_qos_interface.txt.
-
-config MAC80211_DEFAULT_PS_VALUE
-	int
-	default 1 if MAC80211_DEFAULT_PS
-	default 0
-
 menu "Rate control algorithm selection"
 	depends on MAC80211 != n
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 03de4024597a..8c7b2cdbeeda 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1388,6 +1388,31 @@ int ieee80211_testmode_cmd(struct wiphy *wiphy, void *data, int len)
 }
 #endif
 
+static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
+				    bool enabled, int timeout)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_conf *conf = &local->hw.conf;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
+		return -EOPNOTSUPP;
+
+	if (enabled == sdata->u.mgd.powersave &&
+	    timeout == conf->dynamic_ps_timeout)
+		return 0;
+
+	sdata->u.mgd.powersave = enabled;
+	conf->dynamic_ps_timeout = timeout;
+
+	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
+		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
+
+	ieee80211_recalc_ps(local, -1);
+
+	return 0;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1431,4 +1456,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.get_tx_power = ieee80211_get_tx_power,
 	.rfkill_poll = ieee80211_rfkill_poll,
 	CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
+	.set_power_mgmt = ieee80211_set_power_mgmt,
 };
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index fbb93a70ddc7..2a7860009f9c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2360,11 +2360,6 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
 
 	hw_flags = sdata->local->hw.flags;
-
-	if (hw_flags & IEEE80211_HW_SUPPORTS_PS) {
-		ifmgd->powersave = CONFIG_MAC80211_DEFAULT_PS_VALUE;
-		sdata->local->hw.conf.dynamic_ps_timeout = 500;
-	}
 }
 
 /* configuration hooks */
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index d4e61dc903e8..f77929802c7a 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -255,72 +255,6 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
 	return 0;
 }
 
-static int ieee80211_ioctl_siwpower(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_param *wrq,
-				    char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_conf *conf = &local->hw.conf;
-	int timeout = 0;
-	bool ps;
-
-	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
-		return -EOPNOTSUPP;
-
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
-	if (wrq->disabled) {
-		ps = false;
-		timeout = 0;
-		goto set;
-	}
-
-	switch (wrq->flags & IW_POWER_MODE) {
-	case IW_POWER_ON:       /* If not specified */
-	case IW_POWER_MODE:     /* If set all mask */
-	case IW_POWER_ALL_R:    /* If explicitely state all */
-		ps = true;
-		break;
-	default:                /* Otherwise we ignore */
-		return -EINVAL;
-	}
-
-	if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT))
-		return -EINVAL;
-
-	if (wrq->flags & IW_POWER_TIMEOUT)
-		timeout = wrq->value / 1000;
-
- set:
-	if (ps == sdata->u.mgd.powersave && timeout == conf->dynamic_ps_timeout)
-		return 0;
-
-	sdata->u.mgd.powersave = ps;
-	conf->dynamic_ps_timeout = timeout;
-
-	if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
-		ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
-
-	ieee80211_recalc_ps(local, -1);
-
-	return 0;
-}
-
-static int ieee80211_ioctl_giwpower(struct net_device *dev,
-				    struct iw_request_info *info,
-				    union iwreq_data *wrqu,
-				    char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	wrqu->power.disabled = !sdata->u.mgd.powersave;
-
-	return 0;
-}
-
 /* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
 static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
 {
@@ -436,8 +370,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) cfg80211_wext_giwretry,		/* SIOCGIWRETRY */
 	(iw_handler) cfg80211_wext_siwencode,		/* SIOCSIWENCODE */
 	(iw_handler) cfg80211_wext_giwencode,		/* SIOCGIWENCODE */
-	(iw_handler) ieee80211_ioctl_siwpower,		/* SIOCSIWPOWER */
-	(iw_handler) ieee80211_ioctl_giwpower,		/* SIOCGIWPOWER */
+	(iw_handler) cfg80211_wext_siwpower,		/* SIOCSIWPOWER */
+	(iw_handler) cfg80211_wext_giwpower,		/* SIOCGIWPOWER */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) cfg80211_wext_siwgenie,		/* SIOCSIWGENIE */
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 040263118a20..c6031d5b135f 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -26,6 +26,22 @@ config CFG80211_REG_DEBUG
 
 	  If unsure, say N.
 
+config CFG80211_DEFAULT_PS
+	bool "enable powersave by default"
+	depends on CFG80211
+	default y
+	help
+	  This option enables powersave mode by default.
+
+	  If this causes your applications to misbehave you should fix your
+	  applications instead -- they need to register their network
+	  latency requirement, see Documentation/power/pm_qos_interface.txt.
+
+config CFG80211_DEFAULT_PS_VALUE
+	int
+	default 1 if CFG80211_DEFAULT_PS
+	default 0
+
 config CFG80211_DEBUGFS
 	bool "cfg80211 DebugFS entries"
 	depends on CFG80211 && DEBUG_FS
diff --git a/net/wireless/core.c b/net/wireless/core.c
index e2f80dd0e4a6..413d291d07d7 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -550,12 +550,21 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		}
 		wdev->netdev = dev;
 		wdev->sme_state = CFG80211_SME_IDLE;
+		mutex_unlock(&rdev->devlist_mtx);
 #ifdef CONFIG_WIRELESS_EXT
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
 		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
+		wdev->wext.ps = CONFIG_CFG80211_DEFAULT_PS_VALUE;
+		wdev->wext.ps_timeout = 500;
+		if (rdev->ops->set_power_mgmt)
+			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
+						      wdev->wext.ps,
+						      wdev->wext.ps_timeout)) {
+				/* assume this means it's off */
+				wdev->wext.ps = false;
+			}
 #endif
-		mutex_unlock(&rdev->devlist_mtx);
 		break;
 	case NETDEV_GOING_DOWN:
 		if (!wdev->ssid_len)
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 02f052fc1808..2e1ab78fb0d7 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -987,3 +987,63 @@ int cfg80211_wext_giwauth(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwauth);
+
+int cfg80211_wext_siwpower(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *wrq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	bool ps = wdev->wext.ps;
+	int timeout = wdev->wext.ps_timeout;
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EINVAL;
+
+	if (!rdev->ops->set_power_mgmt)
+		return -EOPNOTSUPP;
+
+	if (wrq->disabled) {
+		ps = false;
+	} else {
+		switch (wrq->flags & IW_POWER_MODE) {
+		case IW_POWER_ON:       /* If not specified */
+		case IW_POWER_MODE:     /* If set all mask */
+		case IW_POWER_ALL_R:    /* If explicitely state all */
+			ps = true;
+			break;
+		default:                /* Otherwise we ignore */
+			return -EINVAL;
+		}
+
+		if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT))
+			return -EINVAL;
+
+		if (wrq->flags & IW_POWER_TIMEOUT)
+			timeout = wrq->value / 1000;
+	}
+
+	err = rdev->ops->set_power_mgmt(wdev->wiphy, dev, ps, timeout);
+	if (err)
+		return err;
+
+	wdev->wext.ps = ps;
+	wdev->wext.ps_timeout = timeout;
+
+	return 0;
+
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwpower);
+
+int cfg80211_wext_giwpower(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_param *wrq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	wrq->disabled = !wdev->wext.ps;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwpower);
-- 
cgit v1.2.3


From ab737a4f7dbe57b12b73f482a7b973bf00b41942 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:58 +0200
Subject: cfg80211: implement IWAP for WDS

This implements siocsiwap/giwap for WDS mode.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 10 ++++++++++
 net/mac80211/cfg.c         | 11 +++++++++++
 net/mac80211/wext.c        | 26 ++++----------------------
 net/wireless/wext-compat.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 71 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 82b7d804f6da..b396d11564bc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1018,6 +1018,9 @@ struct cfg80211_ops {
 				enum tx_power_setting type, int dbm);
 	int	(*get_tx_power)(struct wiphy *wiphy, int *dbm);
 
+	int	(*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev,
+				u8 *addr);
+
 	void	(*rfkill_poll)(struct wiphy *wiphy);
 
 #ifdef CONFIG_NL80211_TESTMODE
@@ -1619,6 +1622,13 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_param *wrq, char *extra);
 
+int cfg80211_wds_wext_siwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *addr, char *extra);
+int cfg80211_wds_wext_giwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *addr, char *extra);
+
 /*
  * callbacks for asynchronous cfg80211 methods, notification
  * functions and BSS handling helpers
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8c7b2cdbeeda..2cf5bf6378e4 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1369,6 +1369,16 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
 	return 0;
 }
 
+static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev,
+				  u8 *addr)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	memcpy(&sdata->u.wds.remote_addr, addr, ETH_ALEN);
+
+	return 0;
+}
+
 static void ieee80211_rfkill_poll(struct wiphy *wiphy)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
@@ -1454,6 +1464,7 @@ struct cfg80211_ops mac80211_config_ops = {
 	.set_wiphy_params = ieee80211_set_wiphy_params,
 	.set_tx_power = ieee80211_set_tx_power,
 	.get_tx_power = ieee80211_get_tx_power,
+	.set_wds_peer = ieee80211_set_wds_peer,
 	.rfkill_poll = ieee80211_rfkill_poll,
 	CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
 	.set_power_mgmt = ieee80211_set_power_mgmt,
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index f77929802c7a..4053d766af2d 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -140,23 +140,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
 	if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
 
-	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
-		/*
-		 * If it is necessary to update the WDS peer address
-		 * while the interface is running, then we need to do
-		 * more work here, namely if it is running we need to
-		 * add a new and remove the old STA entry, this is
-		 * normally handled by _open() and _stop().
-		 */
-		if (netif_running(dev))
-			return -EBUSY;
-
-		memcpy(&sdata->u.wds.remote_addr, (u8 *) &ap_addr->sa_data,
-		       ETH_ALEN);
-
-		return 0;
-	}
-
+	if (sdata->vif.type == NL80211_IFTYPE_WDS)
+		return cfg80211_wds_wext_siwap(dev, info, ap_addr, extra);
 	return -EOPNOTSUPP;
 }
 
@@ -173,11 +158,8 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 	if (sdata->vif.type == NL80211_IFTYPE_STATION)
 		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
 
-	if (sdata->vif.type == NL80211_IFTYPE_WDS) {
-		ap_addr->sa_family = ARPHRD_ETHER;
-		memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
-		return 0;
-	}
+	if (sdata->vif.type == NL80211_IFTYPE_WDS)
+		return cfg80211_wds_wext_giwap(dev, info, ap_addr, extra);
 
 	return -EOPNOTSUPP;
 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 2e1ab78fb0d7..2f72dae2634f 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1047,3 +1047,49 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwpower);
+
+int cfg80211_wds_wext_siwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int err;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
+		return -EINVAL;
+
+	if (addr->sa_family != ARPHRD_ETHER)
+		return -EINVAL;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	if (!rdev->ops->set_wds_peer)
+		return -EOPNOTSUPP;
+
+	err = rdev->ops->set_wds_peer(wdev->wiphy, dev, (u8 *) &addr->sa_data);
+	if (err)
+		return err;
+
+	memcpy(&wdev->wext.bssid, (u8 *) &addr->sa_data, ETH_ALEN);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wds_wext_siwap);
+
+int cfg80211_wds_wext_giwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_WDS))
+		return -EINVAL;
+
+	addr->sa_family = ARPHRD_ETHER;
+	memcpy(&addr->sa_data, wdev->wext.bssid, ETH_ALEN);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wds_wext_giwap);
-- 
cgit v1.2.3


From 9930380f0bd8405fa6a51d644f3de88c30666519 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:26:59 +0200
Subject: cfg80211: implement IWRATE

For now, let's implement that using a very hackish way:
simply mirror the wext API in the cfg80211 API. This
will have to be changed later when we implement proper
bitrate API.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     | 32 +++++++++++++++++++
 net/mac80211/cfg.c         | 43 ++++++++++++++++++++++++++
 net/mac80211/wext.c        | 76 ++--------------------------------------------
 net/wireless/wext-compat.c | 63 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 140 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index b396d11564bc..579085564883 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -815,6 +815,26 @@ enum tx_power_setting {
 	TX_POWER_FIXED,
 };
 
+/*
+ * cfg80211_bitrate_mask - masks for bitrate control
+ */
+struct cfg80211_bitrate_mask {
+/*
+ * As discussed in Berlin, this struct really
+ * should look like this:
+
+	struct {
+		u32 legacy;
+		u8 mcs[IEEE80211_HT_MCS_MASK_LEN];
+	} control[IEEE80211_NUM_BANDS];
+
+ * Since we can always fix in-kernel users, let's keep
+ * it simpler for now:
+ */
+	u32 fixed;   /* fixed bitrate, 0 == not fixed */
+	u32 maxrate; /* in kbps, 0 == no limit */
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -1027,6 +1047,11 @@ struct cfg80211_ops {
 	int	(*testmode_cmd)(struct wiphy *wiphy, void *data, int len);
 #endif
 
+	int	(*set_bitrate_mask)(struct wiphy *wiphy,
+				    struct net_device *dev,
+				    const u8 *peer,
+				    const struct cfg80211_bitrate_mask *mask);
+
 	/* some temporary stuff to finish wext */
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
@@ -1581,6 +1606,13 @@ int cfg80211_wext_giwauth(struct net_device *dev,
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
 
+int cfg80211_wext_siwrate(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *rate, char *extra);
+int cfg80211_wext_giwrate(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *rate, char *extra);
+
 int cfg80211_wext_siwrts(struct net_device *dev,
 			 struct iw_request_info *info,
 			 struct iw_param *rts, char *extra);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2cf5bf6378e4..028f6430879d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1423,6 +1423,48 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
+				      struct net_device *dev,
+				      const u8 *addr,
+				      const struct cfg80211_bitrate_mask *mask)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	int i, err = -EINVAL;
+	u32 target_rate;
+	struct ieee80211_supported_band *sband;
+
+	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
+
+	/* target_rate = -1, rate->fixed = 0 means auto only, so use all rates
+	 * target_rate = X, rate->fixed = 1 means only rate X
+	 * target_rate = X, rate->fixed = 0 means all rates <= X */
+	sdata->max_ratectrl_rateidx = -1;
+	sdata->force_unicast_rateidx = -1;
+
+	if (mask->fixed)
+		target_rate = mask->fixed / 100;
+	else if (mask->maxrate)
+		target_rate = mask->maxrate / 100;
+	else
+		return 0;
+
+	for (i=0; i< sband->n_bitrates; i++) {
+		struct ieee80211_rate *brate = &sband->bitrates[i];
+		int this_rate = brate->bitrate;
+
+		if (target_rate == this_rate) {
+			sdata->max_ratectrl_rateidx = i;
+			if (mask->fixed)
+				sdata->force_unicast_rateidx = i;
+			err = 0;
+			break;
+		}
+	}
+
+	return err;
+}
+
 struct cfg80211_ops mac80211_config_ops = {
 	.add_virtual_intf = ieee80211_add_iface,
 	.del_virtual_intf = ieee80211_del_iface,
@@ -1468,4 +1510,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.rfkill_poll = ieee80211_rfkill_poll,
 	CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
 	.set_power_mgmt = ieee80211_set_power_mgmt,
+	.set_bitrate_mask = ieee80211_set_bitrate_mask,
 };
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 4053d766af2d..244d830f5cfb 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -165,78 +165,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 }
 
 
-static int ieee80211_ioctl_siwrate(struct net_device *dev,
-				  struct iw_request_info *info,
-				  struct iw_param *rate, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	int i, err = -EINVAL;
-	u32 target_rate = rate->value / 100000;
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_supported_band *sband;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	/* target_rate = -1, rate->fixed = 0 means auto only, so use all rates
-	 * target_rate = X, rate->fixed = 1 means only rate X
-	 * target_rate = X, rate->fixed = 0 means all rates <= X */
-	sdata->max_ratectrl_rateidx = -1;
-	sdata->force_unicast_rateidx = -1;
-	if (rate->value < 0)
-		return 0;
-
-	for (i=0; i< sband->n_bitrates; i++) {
-		struct ieee80211_rate *brate = &sband->bitrates[i];
-		int this_rate = brate->bitrate;
-
-		if (target_rate == this_rate) {
-			sdata->max_ratectrl_rateidx = i;
-			if (rate->fixed)
-				sdata->force_unicast_rateidx = i;
-			err = 0;
-			break;
-		}
-	}
-	return err;
-}
-
-static int ieee80211_ioctl_giwrate(struct net_device *dev,
-				  struct iw_request_info *info,
-				  struct iw_param *rate, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct sta_info *sta;
-	struct ieee80211_sub_if_data *sdata;
-	struct ieee80211_supported_band *sband;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type != NL80211_IFTYPE_STATION)
-		return -EOPNOTSUPP;
-
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	rcu_read_lock();
-
-	sta = sta_info_get(local, sdata->u.mgd.bssid);
-
-	if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS))
-		rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate;
-	else
-		rate->value = 0;
-
-	rcu_read_unlock();
-
-	if (!sta)
-		return -ENODEV;
-
-	rate->value *= 100000;
-
-	return 0;
-}
-
 /* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
 static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
 {
@@ -340,8 +268,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWNICKN */
 	(iw_handler) NULL,				/* -- hole -- */
 	(iw_handler) NULL,				/* -- hole -- */
-	(iw_handler) ieee80211_ioctl_siwrate,		/* SIOCSIWRATE */
-	(iw_handler) ieee80211_ioctl_giwrate,		/* SIOCGIWRATE */
+	(iw_handler) cfg80211_wext_siwrate,		/* SIOCSIWRATE */
+	(iw_handler) cfg80211_wext_giwrate,		/* SIOCGIWRATE */
 	(iw_handler) cfg80211_wext_siwrts,		/* SIOCSIWRTS */
 	(iw_handler) cfg80211_wext_giwrts,		/* SIOCGIWRTS */
 	(iw_handler) cfg80211_wext_siwfrag,		/* SIOCSIWFRAG */
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 2f72dae2634f..3a5f999703f1 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1093,3 +1093,66 @@ int cfg80211_wds_wext_giwap(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wds_wext_giwap);
+
+int cfg80211_wext_siwrate(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *rate, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_bitrate_mask mask;
+
+	if (!rdev->ops->set_bitrate_mask)
+		return -EOPNOTSUPP;
+
+	mask.fixed = 0;
+	mask.maxrate = 0;
+
+	if (rate->value < 0) {
+		/* nothing */
+	} else if (rate->fixed) {
+		mask.fixed = rate->value / 1000; /* kbps */
+	} else {
+		mask.maxrate = rate->value / 1000; /* kbps */
+	}
+
+	return rdev->ops->set_bitrate_mask(wdev->wiphy, dev, NULL, &mask);
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwrate);
+
+int cfg80211_wext_giwrate(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_param *rate, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	/* we are under RTNL - globally locked - so can use a static struct */
+	static struct station_info sinfo;
+	u8 *addr;
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
+
+	if (!rdev->ops->get_station)
+		return -EOPNOTSUPP;
+
+	addr = wdev->wext.connect.bssid;
+	if (!addr)
+		return -EOPNOTSUPP;
+
+	err = rdev->ops->get_station(&rdev->wiphy, dev, addr, &sinfo);
+	if (err)
+		return err;
+
+	if (!(sinfo.filled & STATION_INFO_TX_BITRATE))
+		return -EOPNOTSUPP;
+
+	rate->value = 0;
+
+	if (!(sinfo.txrate.flags & RATE_INFO_FLAGS_MCS))
+		rate->value = 100000 * sinfo.txrate.legacy;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwrate);
-- 
cgit v1.2.3


From 8990646d2fafeacfacba4a4b1073a4216662089a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 21:27:00 +0200
Subject: cfg80211: implement get_wireless_stats

By dropping the noise reporting, we can implement
wireless stats in cfg80211. We also make the
handler return NULL if we have no information,
which is possible thanks to the recent wext change.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  1 +
 net/mac80211/wext.c        | 69 +---------------------------------------------
 net/wireless/wext-compat.c | 59 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 579085564883..fe87819954a5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1646,6 +1646,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 int cfg80211_wext_giwtxpower(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *data, char *keybuf);
+struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev);
 
 int cfg80211_wext_siwpower(struct net_device *dev,
 			   struct iw_request_info *info,
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 244d830f5cfb..5acb8140ee58 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -165,73 +165,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
 }
 
 
-/* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
-static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct iw_statistics *wstats = &local->wstats;
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct sta_info *sta = NULL;
-
-	rcu_read_lock();
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		sta = sta_info_get(local, sdata->u.mgd.bssid);
-
-	if (!sta) {
-		wstats->discard.fragment = 0;
-		wstats->discard.misc = 0;
-		wstats->qual.qual = 0;
-		wstats->qual.level = 0;
-		wstats->qual.noise = 0;
-		wstats->qual.updated = IW_QUAL_ALL_INVALID;
-	} else {
-		wstats->qual.updated = 0;
-		/*
-		 * mirror what cfg80211 does for iwrange/scan results,
-		 * otherwise userspace gets confused.
-		 */
-		if (local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
-				       IEEE80211_HW_SIGNAL_DBM)) {
-			wstats->qual.updated |= IW_QUAL_LEVEL_UPDATED;
-			wstats->qual.updated |= IW_QUAL_QUAL_UPDATED;
-		} else {
-			wstats->qual.updated |= IW_QUAL_LEVEL_INVALID;
-			wstats->qual.updated |= IW_QUAL_QUAL_INVALID;
-		}
-
-		if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) {
-			wstats->qual.level = sta->last_signal;
-			wstats->qual.qual = sta->last_signal;
-		} else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
-			int sig = sta->last_signal;
-
-			wstats->qual.updated |= IW_QUAL_DBM;
-			wstats->qual.level = sig;
-			if (sig < -110)
-				sig = -110;
-			else if (sig > -40)
-				sig = -40;
-			wstats->qual.qual = sig + 110;
-		}
-
-		if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
-			/*
-			 * This assumes that if driver reports noise, it also
-			 * reports signal in dBm.
-			 */
-			wstats->qual.noise = sta->last_noise;
-			wstats->qual.updated |= IW_QUAL_NOISE_UPDATED;
-		} else {
-			wstats->qual.updated |= IW_QUAL_NOISE_INVALID;
-		}
-	}
-
-	rcu_read_unlock();
-
-	return wstats;
-}
-
 /* Structures to export the Wireless Handlers */
 
 static const iw_handler ieee80211_handler[] =
@@ -298,5 +231,5 @@ const struct iw_handler_def ieee80211_iw_handler_def =
 {
 	.num_standard	= ARRAY_SIZE(ieee80211_handler),
 	.standard	= (iw_handler *) ieee80211_handler,
-	.get_wireless_stats = ieee80211_get_wireless_stats,
+	.get_wireless_stats = cfg80211_wireless_stats,
 };
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 3a5f999703f1..226cf8609079 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1156,3 +1156,62 @@ int cfg80211_wext_giwrate(struct net_device *dev,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwrate);
+
+/* Get wireless statistics.  Called by /proc/net/wireless and by SIOCGIWSTATS */
+struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	/* we are under RTNL - globally locked - so can use static structs */
+	static struct iw_statistics wstats;
+	static struct station_info sinfo;
+	u8 *addr;
+
+	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
+		return NULL;
+
+	if (!rdev->ops->get_station)
+		return NULL;
+
+	addr = wdev->wext.connect.bssid;
+	if (!addr)
+		return NULL;
+
+	if (rdev->ops->get_station(&rdev->wiphy, dev, addr, &sinfo))
+		return NULL;
+
+	memset(&wstats, 0, sizeof(wstats));
+
+	switch (rdev->wiphy.signal_type) {
+	case CFG80211_SIGNAL_TYPE_MBM:
+		if (sinfo.filled & STATION_INFO_SIGNAL) {
+			int sig = sinfo.signal;
+			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
+			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
+			wstats.qual.updated |= IW_QUAL_DBM;
+			wstats.qual.level = sig;
+			if (sig < -110)
+				sig = -110;
+			else if (sig > -40)
+				sig = -40;
+			wstats.qual.qual = sig + 110;
+			break;
+		}
+	case CFG80211_SIGNAL_TYPE_UNSPEC:
+		if (sinfo.filled & STATION_INFO_SIGNAL) {
+			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
+			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
+			wstats.qual.level = sinfo.signal;
+			wstats.qual.qual = sinfo.signal;
+			break;
+		}
+	default:
+		wstats.qual.updated |= IW_QUAL_LEVEL_INVALID;
+		wstats.qual.updated |= IW_QUAL_QUAL_INVALID;
+	}
+
+	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
+
+	return &wstats;
+}
+EXPORT_SYMBOL_GPL(cfg80211_wireless_stats);
-- 
cgit v1.2.3


From 517357c685ccc4b5783cc7dbdae8824ada19a97f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 2 Jul 2009 17:18:40 +0200
Subject: cfg80211: assimilate and export ieee80211_bss_get_ie

This function from mac80211 seems generally useful, and
I will need it in cfg80211 soon.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  9 +++++++++
 net/mac80211/mlme.c    | 25 +++----------------------
 net/wireless/util.c    | 21 +++++++++++++++++++++
 3 files changed, 33 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fe87819954a5..eb026541f928 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -604,6 +604,15 @@ struct cfg80211_bss {
 	u8 priv[0] __attribute__((__aligned__(sizeof(void *))));
 };
 
+/**
+ * ieee80211_bss_get_ie - find IE with given ID
+ * @bss: the bss to search
+ * @ie: the IE ID
+ * Returns %NULL if not found.
+ */
+const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie);
+
+
 /**
  * struct cfg80211_crypto_settings - Crypto settings
  * @wpa_versions: indicates which, if any, WPA versions are enabled
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2d9b6663253c..5748cda659c2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -46,26 +46,6 @@ static int ecw2cw(int ecw)
 	return (1 << ecw) - 1;
 }
 
-static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
-{
-	u8 *end, *pos;
-
-	pos = bss->cbss.information_elements;
-	if (pos == NULL)
-		return NULL;
-	end = pos + bss->cbss.len_information_elements;
-
-	while (pos + 1 < end) {
-		if (pos + 2 + pos[1] > end)
-			break;
-		if (pos[0] == ie)
-			return pos;
-		pos += 2 + pos[1];
-	}
-
-	return NULL;
-}
-
 static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
 				      struct ieee80211_supported_band *sband,
 				      u32 *rates)
@@ -181,7 +161,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos, *ies, *ht_ie;
+	u8 *pos;
+	const u8 *ies, *ht_ie;
 	int i, len, count, rates_len, supp_rates_len;
 	u16 capab;
 	struct ieee80211_bss *bss;
@@ -345,7 +326,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 	 */
 	if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
 	    sband->ht_cap.ht_supported &&
-	    (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) &&
+	    (ht_ie = ieee80211_bss_get_ie(&bss->cbss, WLAN_EID_HT_INFORMATION)) &&
 	    ht_ie[1] >= sizeof(struct ieee80211_ht_info) &&
 	    (!(ifmgd->flags & IEEE80211_STA_DISABLE_11N))) {
 		struct ieee80211_ht_info *ht_info =
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 25550692dda6..28f8f96801d4 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -502,3 +502,24 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb)
 	return dscp >> 5;
 }
 EXPORT_SYMBOL(cfg80211_classify8021d);
+
+const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
+{
+	u8 *end, *pos;
+
+	pos = bss->information_elements;
+	if (pos == NULL)
+		return NULL;
+	end = pos + bss->len_information_elements;
+
+	while (pos + 1 < end) {
+		if (pos + 2 + pos[1] > end)
+			break;
+		if (pos[0] == ie)
+			return pos;
+		pos += 2 + pos[1];
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL(ieee80211_bss_get_ie);
-- 
cgit v1.2.3


From 19957bb399e2722719c0e20c9ae91cf8b6aaff04 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 2 Jul 2009 17:20:43 +0200
Subject: cfg80211: keep track of BSSes

In order to avoid problems with BSS structs going away
while they're in use, I've long wanted to make cfg80211
keep track of them. Without the SME, that wasn't doable
but now that we have the SME we can do this too. It can
keep track of up to four separate authentications and
one association, regardless of whether it's controlled
by the cfg80211 SME or the userspace SME.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h  |  86 +++---------
 net/mac80211/cfg.c      |  22 ++-
 net/mac80211/mlme.c     |   6 +-
 net/wireless/core.c     |   5 +-
 net/wireless/core.h     |  41 +++++-
 net/wireless/ibss.c     |  12 +-
 net/wireless/mlme.c     | 357 ++++++++++++++++++++++++++++++++++++++++++++++--
 net/wireless/nl80211.c  | 144 +++++++++----------
 net/wireless/scan.c     |  31 +----
 net/wireless/sme.c      | 156 ++++++++++++---------
 net/wireless/wext-sme.c |   4 +-
 11 files changed, 589 insertions(+), 275 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index eb026541f928..ca986cc91098 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -584,7 +584,6 @@ enum cfg80211_signal_type {
  *	is no guarantee that these are well-formed!)
  * @len_information_elements: total length of the information elements
  * @signal: signal strength value (type depends on the wiphy's signal_type)
- * @hold: BSS should not expire
  * @free_priv: function pointer to free private data
  * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
  */
@@ -642,33 +641,17 @@ struct cfg80211_crypto_settings {
  *
  * This structure provides information needed to complete IEEE 802.11
  * authentication.
- * NOTE: This structure will likely change when more code from mac80211 is
- * moved into cfg80211 so that non-mac80211 drivers can benefit from it, too.
- * Before using this in a driver that does not use mac80211, it would be better
- * to check the status of that work and better yet, volunteer to work on it.
- *
- * @chan: The channel to use or %NULL if not specified (auto-select based on
- *	scan results)
- * @peer_addr: The address of the peer STA (AP BSSID in infrastructure case);
- *	this field is required to be present; if the driver wants to help with
- *	BSS selection, it should use (yet to be added) MLME event to allow user
- *	space SME to be notified of roaming candidate, so that the SME can then
- *	use the authentication request with the recommended BSSID and whatever
- *	other data may be needed for authentication/association
- * @ssid: SSID or %NULL if not yet available
- * @ssid_len: Length of ssid in octets
+ *
+ * @bss: The BSS to authenticate with.
  * @auth_type: Authentication type (algorithm)
  * @ie: Extra IEs to add to Authentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
  */
 struct cfg80211_auth_request {
-	struct ieee80211_channel *chan;
-	u8 *peer_addr;
-	const u8 *ssid;
-	size_t ssid_len;
-	enum nl80211_auth_type auth_type;
+	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
+	enum nl80211_auth_type auth_type;
 };
 
 /**
@@ -676,32 +659,18 @@ struct cfg80211_auth_request {
  *
  * This structure provides information needed to complete IEEE 802.11
  * (re)association.
- * NOTE: This structure will likely change when more code from mac80211 is
- * moved into cfg80211 so that non-mac80211 drivers can benefit from it, too.
- * Before using this in a driver that does not use mac80211, it would be better
- * to check the status of that work and better yet, volunteer to work on it.
- *
- * @chan: The channel to use or %NULL if not specified (auto-select based on
- *	scan results)
- * @peer_addr: The address of the peer STA (AP BSSID); this field is required
- *	to be present and the STA must be in State 2 (authenticated) with the
- *	peer STA
- * @ssid: SSID
- * @ssid_len: Length of ssid in octets
+ * @bss: The BSS to associate with.
  * @ie: Extra IEs to add to (Re)Association Request frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
  * @crypto: crypto settings
  */
 struct cfg80211_assoc_request {
-	struct ieee80211_channel *chan;
-	u8 *peer_addr;
-	const u8 *ssid;
-	size_t ssid_len;
+	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
-	bool use_mfp;
 	struct cfg80211_crypto_settings crypto;
+	bool use_mfp;
 };
 
 /**
@@ -710,16 +679,16 @@ struct cfg80211_assoc_request {
  * This structure provides information needed to complete IEEE 802.11
  * deauthentication.
  *
- * @peer_addr: The address of the peer STA (AP BSSID); this field is required
- *	to be present and the STA must be authenticated with the peer STA
+ * @bss: the BSS to deauthenticate from
  * @ie: Extra IEs to add to Deauthentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
+ * @reason_code: The reason code for the deauthentication
  */
 struct cfg80211_deauth_request {
-	u8 *peer_addr;
-	u16 reason_code;
+	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
+	u16 reason_code;
 };
 
 /**
@@ -728,16 +697,16 @@ struct cfg80211_deauth_request {
  * This structure provides information needed to complete IEEE 802.11
  * disassocation.
  *
- * @peer_addr: The address of the peer STA (AP BSSID); this field is required
- *	to be present and the STA must be associated with the peer STA
+ * @bss: the BSS to disassociate from
  * @ie: Extra IEs to add to Disassociation frame or %NULL
  * @ie_len: Length of ie buffer in octets
+ * @reason_code: The reason code for the disassociation
  */
 struct cfg80211_disassoc_request {
-	u8 *peer_addr;
-	u16 reason_code;
+	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
+	u16 reason_code;
 };
 
 /**
@@ -1252,6 +1221,9 @@ extern void wiphy_free(struct wiphy *wiphy);
 
 /* internal struct */
 struct cfg80211_conn;
+struct cfg80211_internal_bss;
+
+#define MAX_AUTH_BSSES		4
 
 /**
  * struct wireless_dev - wireless per-netdev state
@@ -1281,7 +1253,6 @@ struct wireless_dev {
 	struct net_device *netdev;
 
 	/* currently used for IBSS and SME - might be rearranged later */
-	struct cfg80211_bss *current_bss;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 ssid_len;
 	enum {
@@ -1291,6 +1262,10 @@ struct wireless_dev {
 	} sme_state;
 	struct cfg80211_conn *conn;
 
+	struct cfg80211_internal_bss *authtry_bsses[MAX_AUTH_BSSES];
+	struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES];
+	struct cfg80211_internal_bss *current_bss; /* associated / joined */
+
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
 	struct {
@@ -1812,23 +1787,6 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp
  */
 void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
 
-/**
- * cfg80211_hold_bss - exclude bss from expiration
- * @bss: bss which should not expire
- *
- * In a case when the BSS is not updated but it shouldn't expire this
- * function can be used to mark the BSS to be excluded from expiration.
- */
-void cfg80211_hold_bss(struct cfg80211_bss *bss);
-
-/**
- * cfg80211_unhold_bss - remove expiration exception from the BSS
- * @bss: bss which can expire again
- *
- * This function marks the BSS to be expirable again.
- */
-void cfg80211_unhold_bss(struct cfg80211_bss *bss);
-
 /**
  * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
  * @dev: network device
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7606571d4581..0f29cd0580c9 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1173,6 +1173,7 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
 			  struct cfg80211_auth_request *req)
 {
 	struct ieee80211_sub_if_data *sdata;
+	const u8 *ssid;
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
@@ -1193,15 +1194,16 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 	}
 
-	memcpy(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN);
+	memcpy(sdata->u.mgd.bssid, req->bss->bssid, ETH_ALEN);
 
-	sdata->local->oper_channel = req->chan;
+	sdata->local->oper_channel = req->bss->channel;
 	ieee80211_hw_config(sdata->local, 0);
 
-	if (!req->ssid)
+	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
+	if (!ssid)
 		return -EINVAL;
-	memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
-	sdata->u.mgd.ssid_len = req->ssid_len;
+	sdata->u.mgd.ssid_len = *(ssid + 1);
+	memcpy(sdata->u.mgd.ssid, ssid + 2, sdata->u.mgd.ssid_len);
 
 	kfree(sdata->u.mgd.sme_auth_ie);
 	sdata->u.mgd.sme_auth_ie = NULL;
@@ -1227,7 +1229,7 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
-	if (memcmp(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN) != 0 ||
+	if (memcmp(sdata->u.mgd.bssid, req->bss->bssid, ETH_ALEN) != 0 ||
 	    !(sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED))
 		return -ENOLINK; /* not authenticated */
 
@@ -1239,15 +1241,9 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 		    req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP104)
 			sdata->u.mgd.flags |= IEEE80211_STA_DISABLE_11N;
 
-	sdata->local->oper_channel = req->chan;
+	sdata->local->oper_channel = req->bss->channel;
 	ieee80211_hw_config(sdata->local, 0);
 
-	if (!req->ssid)
-		return -EINVAL;
-
-	memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
-	sdata->u.mgd.ssid_len = req->ssid_len;
-
 	ret = ieee80211_sta_set_extra_ie(sdata, req->ie, req->ie_len);
 	if (ret && ret != -EALREADY)
 		return ret;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5748cda659c2..aa1829ae431d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -876,8 +876,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		bss_info_changed |= ieee80211_handle_bss_capability(sdata,
 			bss->cbss.capability, bss->has_erp_value, bss->erp_value);
 
-		cfg80211_hold_bss(&bss->cbss);
-
 		ieee80211_rx_bss_put(local, bss);
 	}
 
@@ -1031,10 +1029,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 				   conf->channel->center_freq,
 				   ifmgd->ssid, ifmgd->ssid_len);
 
-	if (bss) {
-		cfg80211_unhold_bss(&bss->cbss);
+	if (bss)
 		ieee80211_rx_bss_put(local, bss);
-	}
 
 	if (self_disconnected) {
 		if (deauth)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5f6a8322bcb3..7b66cf15349a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -583,15 +583,12 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 #endif
 			cfg80211_disconnect(rdev, dev,
 					    WLAN_REASON_DEAUTH_LEAVING, true);
+			cfg80211_mlme_down(rdev, dev);
 			break;
 		default:
 			break;
 		}
 		break;
-	case NETDEV_DOWN:
-		kfree(wdev->conn);
-		wdev->conn = NULL;
-		break;
 	case NETDEV_UP:
 #ifdef CONFIG_WIRELESS_EXT
 		switch (wdev->iftype) {
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5209acb0ff7e..82918f5896a5 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -110,12 +110,30 @@ struct cfg80211_internal_bss {
 	struct rb_node rbn;
 	unsigned long ts;
 	struct kref ref;
-	bool hold, ies_allocated;
+	atomic_t hold;
+	bool ies_allocated;
 
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
 };
 
+static inline struct cfg80211_internal_bss *bss_from_pub(struct cfg80211_bss *pub)
+{
+	return container_of(pub, struct cfg80211_internal_bss, pub);
+}
+
+static inline void cfg80211_hold_bss(struct cfg80211_internal_bss *bss)
+{
+	atomic_inc(&bss->hold);
+}
+
+static inline void cfg80211_unhold_bss(struct cfg80211_internal_bss *bss)
+{
+	int r = atomic_dec_return(&bss->hold);
+	WARN_ON(r < 0);
+}
+
+
 struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx);
 int get_wiphy_idx(struct wiphy *wiphy);
 
@@ -176,6 +194,26 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, bool nowext);
 
+/* MLME */
+int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, struct ieee80211_channel *chan,
+		       enum nl80211_auth_type auth_type, const u8 *bssid,
+		       const u8 *ssid, int ssid_len,
+		       const u8 *ie, int ie_len);
+int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+			struct net_device *dev, struct ieee80211_channel *chan,
+			const u8 *bssid, const u8 *ssid, int ssid_len,
+			const u8 *ie, int ie_len, bool use_mfp,
+			struct cfg80211_crypto_settings *crypt);
+int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev, const u8 *bssid,
+			 const u8 *ie, int ie_len, u16 reason);
+int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
+			   struct net_device *dev, const u8 *bssid,
+			   const u8 *ie, int ie_len, u16 reason);
+void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
+			struct net_device *dev);
+
 /* SME */
 int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
@@ -193,5 +231,6 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap);
 void cfg80211_sme_scan_done(struct net_device *dev);
 void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_sme_disassoc(struct net_device *dev, int idx);
 
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 34b11eae30c8..c92b542d54b0 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -33,11 +33,11 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(wdev->current_bss);
+		cfg80211_put_bss(&wdev->current_bss->pub);
 	}
 
-	cfg80211_hold_bss(bss);
-	wdev->current_bss = bss;
+	cfg80211_hold_bss(bss_from_pub(bss));
+	wdev->current_bss = bss_from_pub(bss);
 
 	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, gfp);
 #ifdef CONFIG_WIRELESS_EXT
@@ -78,7 +78,7 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(wdev->current_bss);
+		cfg80211_put_bss(&wdev->current_bss->pub);
 	}
 
 	wdev->current_bss = NULL;
@@ -212,7 +212,7 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 		return -EINVAL;
 
 	if (wdev->current_bss)
-		chan = wdev->current_bss->channel;
+		chan = wdev->current_bss->pub.channel;
 	else if (wdev->wext.ibss.channel)
 		chan = wdev->wext.ibss.channel;
 
@@ -352,7 +352,7 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 	ap_addr->sa_family = ARPHRD_ETHER;
 
 	if (wdev->current_bss)
-		memcpy(ap_addr->sa_data, wdev->current_bss->bssid, ETH_ALEN);
+		memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
 	else
 		memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
 	return 0;
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 3427fe73d3c3..1a92bf7597bf 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -14,8 +14,32 @@
 
 void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
 {
-	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+	u8 *bssid = mgmt->bssid;
+	int i;
+	u16 status = le16_to_cpu(mgmt->u.auth.status_code);
+	bool done = false;
+
+	for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (wdev->authtry_bsses[i] &&
+		    memcmp(wdev->authtry_bsses[i]->pub.bssid, bssid,
+							ETH_ALEN) == 0) {
+			if (status == WLAN_STATUS_SUCCESS) {
+				wdev->auth_bsses[i] = wdev->authtry_bsses[i];
+			} else {
+				cfg80211_unhold_bss(wdev->authtry_bsses[i]);
+				cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
+			}
+			wdev->authtry_bsses[i] = NULL;
+			done = true;
+			break;
+		}
+	}
+
+	WARN_ON(!done);
 
 	nl80211_send_rx_auth(rdev, dev, buf, len, gfp);
 	cfg80211_sme_rx_auth(dev, buf, len);
@@ -30,7 +54,8 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, g
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	u8 *ie = mgmt->u.assoc_resp.variable;
-	int ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
+	int i, ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
+	bool done;
 
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
@@ -38,6 +63,20 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, g
 
 	cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
 				status_code, gfp);
+
+	if (status_code == WLAN_STATUS_SUCCESS) {
+		for (i = 0; wdev->current_bss && i < MAX_AUTH_BSSES; i++) {
+			if (wdev->auth_bsses[i] == wdev->current_bss) {
+				cfg80211_unhold_bss(wdev->auth_bsses[i]);
+				cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
+				wdev->auth_bsses[i] = NULL;
+				done = true;
+				break;
+			}
+		}
+
+		WARN_ON(!done);
+	}
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
@@ -47,9 +86,45 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+	const u8 *bssid = mgmt->bssid;
+	int i;
+	bool done = false;
 
 	nl80211_send_deauth(rdev, dev, buf, len, gfp);
 
+	if (wdev->current_bss &&
+	    memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) {
+		done = true;
+		cfg80211_unhold_bss(wdev->current_bss);
+		cfg80211_put_bss(&wdev->current_bss->pub);
+		wdev->current_bss = NULL;
+	} else for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (wdev->auth_bsses[i] &&
+		    memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
+			cfg80211_unhold_bss(wdev->auth_bsses[i]);
+			cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
+			wdev->auth_bsses[i] = NULL;
+			done = true;
+			break;
+		}
+		if (wdev->authtry_bsses[i] &&
+		    memcmp(wdev->authtry_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
+			cfg80211_unhold_bss(wdev->authtry_bsses[i]);
+			cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
+			wdev->authtry_bsses[i] = NULL;
+			done = true;
+			break;
+		}
+	}
+/*
+ * mac80211 currently triggers this warning,
+ * so disable for now (it's harmless, just
+ * means that we got a spurious event)
+
+	WARN_ON(!done);
+
+ */
+
 	if (wdev->sme_state == CFG80211_SME_CONNECTED) {
 		u16 reason_code;
 		bool from_ap;
@@ -59,8 +134,6 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp
 		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
 		__cfg80211_disconnected(dev, gfp, NULL, 0,
 					reason_code, from_ap);
-
-		wdev->sme_state = CFG80211_SME_IDLE;
 	} else if (wdev->sme_state == CFG80211_SME_CONNECTING) {
 		cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
 					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
@@ -74,21 +147,38 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, g
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
+	const u8 *bssid = mgmt->bssid;
+	int i;
+	u16 reason_code;
+	bool from_ap;
+	bool done = false;
 
 	nl80211_send_disassoc(rdev, dev, buf, len, gfp);
 
-	if (wdev->sme_state == CFG80211_SME_CONNECTED) {
-		u16 reason_code;
-		bool from_ap;
+	if (!wdev->sme_state == CFG80211_SME_CONNECTED)
+		return;
 
-		reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
+	if (wdev->current_bss &&
+	    memcmp(wdev->current_bss, bssid, ETH_ALEN) == 0) {
+		for (i = 0; i < MAX_AUTH_BSSES; i++) {
+			if (wdev->authtry_bsses[i] || wdev->auth_bsses[i])
+				continue;
+			wdev->auth_bsses[i] = wdev->current_bss;
+			wdev->current_bss = NULL;
+			done = true;
+			cfg80211_sme_disassoc(dev, i);
+			break;
+		}
+		WARN_ON(!done);
+	} else
+		WARN_ON(1);
 
-		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
-		__cfg80211_disconnected(dev, gfp, NULL, 0,
-					reason_code, from_ap);
 
-		wdev->sme_state = CFG80211_SME_IDLE;
-	}
+	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
+
+	from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
+	__cfg80211_disconnected(dev, gfp, NULL, 0,
+				reason_code, from_ap);
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
@@ -97,11 +187,27 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gf
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	int i;
+	bool done = false;
+
 	nl80211_send_auth_timeout(rdev, dev, addr, gfp);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
 		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
 					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
-	wdev->sme_state = CFG80211_SME_IDLE;
+
+	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
+		if (wdev->authtry_bsses[i] &&
+		    memcmp(wdev->authtry_bsses[i]->pub.bssid,
+			   addr, ETH_ALEN) == 0) {
+			cfg80211_unhold_bss(wdev->authtry_bsses[i]);
+			cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
+			wdev->authtry_bsses[i] = NULL;
+			done = true;
+			break;
+		}
+	}
+
+	WARN_ON(!done);
 }
 EXPORT_SYMBOL(cfg80211_send_auth_timeout);
 
@@ -110,11 +216,27 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t g
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	int i;
+	bool done = false;
+
 	nl80211_send_assoc_timeout(rdev, dev, addr, gfp);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
 		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
 					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
-	wdev->sme_state = CFG80211_SME_IDLE;
+
+	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
+		if (wdev->auth_bsses[i] &&
+		    memcmp(wdev->auth_bsses[i]->pub.bssid,
+			   addr, ETH_ALEN) == 0) {
+			cfg80211_unhold_bss(wdev->auth_bsses[i]);
+			cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
+			wdev->auth_bsses[i] = NULL;
+			done = true;
+			break;
+		}
+	}
+
+	WARN_ON(!done);
 }
 EXPORT_SYMBOL(cfg80211_send_assoc_timeout);
 
@@ -143,3 +265,208 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 	nl80211_michael_mic_failure(rdev, dev, addr, key_type, key_id, tsc, gfp);
 }
 EXPORT_SYMBOL(cfg80211_michael_mic_failure);
+
+/* some MLME handling for userspace SME */
+int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, struct ieee80211_channel *chan,
+		       enum nl80211_auth_type auth_type, const u8 *bssid,
+		       const u8 *ssid, int ssid_len,
+		       const u8 *ie, int ie_len)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_auth_request req;
+	struct cfg80211_internal_bss *bss;
+	int i, err, slot = -1, nfree = 0;
+
+	memset(&req, 0, sizeof(req));
+
+	req.ie = ie;
+	req.ie_len = ie_len;
+	req.auth_type = auth_type;
+	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
+				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+	if (!req.bss)
+		return -ENOENT;
+
+	bss = bss_from_pub(req.bss);
+
+	for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (bss == wdev->auth_bsses[i]) {
+			err = -EALREADY;
+			goto out;
+		}
+	}
+
+	for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (!wdev->auth_bsses[i] && !wdev->authtry_bsses[i]) {
+			slot = i;
+			nfree++;
+		}
+	}
+
+	/* we need one free slot for disassoc and one for this auth */
+	if (nfree < 2) {
+		err = -ENOSPC;
+		goto out;
+	}
+
+	wdev->authtry_bsses[slot] = bss;
+	cfg80211_hold_bss(bss);
+
+	err = rdev->ops->auth(&rdev->wiphy, dev, &req);
+	if (err) {
+		wdev->authtry_bsses[slot] = NULL;
+		cfg80211_unhold_bss(bss);
+	}
+
+ out:
+	if (err)
+		cfg80211_put_bss(req.bss);
+	return err;
+}
+
+int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+			struct net_device *dev, struct ieee80211_channel *chan,
+			const u8 *bssid, const u8 *ssid, int ssid_len,
+			const u8 *ie, int ie_len, bool use_mfp,
+			struct cfg80211_crypto_settings *crypt)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_assoc_request req;
+	struct cfg80211_internal_bss *bss;
+	int i, err, slot = -1;
+
+	memset(&req, 0, sizeof(req));
+
+	if (wdev->current_bss)
+		return -EALREADY;
+
+	req.ie = ie;
+	req.ie_len = ie_len;
+	memcpy(&req.crypto, crypt, sizeof(req.crypto));
+	req.use_mfp = use_mfp;
+	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
+				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+	if (!req.bss)
+		return -ENOENT;
+
+	bss = bss_from_pub(req.bss);
+
+	for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (bss == wdev->auth_bsses[i]) {
+			slot = i;
+			break;
+		}
+	}
+
+	if (slot < 0) {
+		err = -ENOTCONN;
+		goto out;
+	}
+
+	err = rdev->ops->assoc(&rdev->wiphy, dev, &req);
+ out:
+	/* still a reference in wdev->auth_bsses[slot] */
+	cfg80211_put_bss(req.bss);
+	return err;
+}
+
+int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev, const u8 *bssid,
+			 const u8 *ie, int ie_len, u16 reason)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_deauth_request req;
+	int i;
+
+	memset(&req, 0, sizeof(req));
+	req.reason_code = reason;
+	req.ie = ie;
+	req.ie_len = ie_len;
+	if (wdev->current_bss &&
+	    memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) {
+		req.bss = &wdev->current_bss->pub;
+	} else for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (wdev->auth_bsses[i] &&
+		    memcmp(bssid, wdev->auth_bsses[i]->pub.bssid, ETH_ALEN) == 0) {
+			req.bss = &wdev->auth_bsses[i]->pub;
+			break;
+		}
+		if (wdev->authtry_bsses[i] &&
+		    memcmp(bssid, wdev->authtry_bsses[i]->pub.bssid, ETH_ALEN) == 0) {
+			req.bss = &wdev->authtry_bsses[i]->pub;
+			break;
+		}
+	}
+
+	if (!req.bss)
+		return -ENOTCONN;
+
+	return rdev->ops->deauth(&rdev->wiphy, dev, &req);
+}
+
+int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
+			   struct net_device *dev, const u8 *bssid,
+			   const u8 *ie, int ie_len, u16 reason)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_disassoc_request req;
+
+	memset(&req, 0, sizeof(req));
+	req.reason_code = reason;
+	req.ie = ie;
+	req.ie_len = ie_len;
+	if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0)
+		req.bss = &wdev->current_bss->pub;
+	else
+		return -ENOTCONN;
+
+	return rdev->ops->disassoc(&rdev->wiphy, dev, &req);
+}
+
+void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
+			struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_deauth_request req;
+	int i;
+
+	if (!rdev->ops->deauth)
+		return;
+
+	memset(&req, 0, sizeof(req));
+	req.reason_code = WLAN_REASON_DEAUTH_LEAVING;
+	req.ie = NULL;
+	req.ie_len = 0;
+
+	if (wdev->current_bss) {
+		req.bss = &wdev->current_bss->pub;
+		rdev->ops->deauth(&rdev->wiphy, dev, &req);
+		if (wdev->current_bss) {
+			cfg80211_unhold_bss(wdev->current_bss);
+			cfg80211_put_bss(&wdev->current_bss->pub);
+			wdev->current_bss = NULL;
+		}
+	}
+
+	for (i = 0; i < MAX_AUTH_BSSES; i++) {
+		if (wdev->auth_bsses[i]) {
+			req.bss = &wdev->auth_bsses[i]->pub;
+			rdev->ops->deauth(&rdev->wiphy, dev, &req);
+			if (wdev->auth_bsses[i]) {
+				cfg80211_unhold_bss(wdev->auth_bsses[i]);
+				cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
+				wdev->auth_bsses[i] = NULL;
+			}
+		}
+		if (wdev->authtry_bsses[i]) {
+			req.bss = &wdev->authtry_bsses[i]->pub;
+			rdev->ops->deauth(&rdev->wiphy, dev, &req);
+			if (wdev->authtry_bsses[i]) {
+				cfg80211_unhold_bss(wdev->authtry_bsses[i]);
+				cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
+				wdev->authtry_bsses[i] = NULL;
+			}
+		}
+	}
+}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0008144b354b..aa2b3f35cc48 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3044,9 +3044,10 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
 	struct net_device *dev;
-	struct cfg80211_auth_request req;
-	struct wiphy *wiphy;
-	int err;
+	struct ieee80211_channel *chan;
+	const u8 *bssid, *ssid, *ie = NULL;
+	int err, ssid_len, ie_len = 0;
+	enum nl80211_auth_type auth_type;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3057,6 +3058,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NL80211_ATTR_AUTH_TYPE])
 		return -EINVAL;
 
+	if (!info->attrs[NL80211_ATTR_SSID])
+		return -EINVAL;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
 	rtnl_lock();
 
 	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
@@ -3078,38 +3085,30 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	wiphy = &drv->wiphy;
-	memset(&req, 0, sizeof(req));
-
-	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
-
-	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		req.chan = ieee80211_get_channel(
-			wiphy,
-			nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
-		if (!req.chan) {
-			err = -EINVAL;
-			goto out;
-		}
+	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	chan = ieee80211_get_channel(&drv->wiphy,
+		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) {
+		err = -EINVAL;
+		goto out;
 	}
 
-	if (info->attrs[NL80211_ATTR_SSID]) {
-		req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
-		req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
-	}
+	ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
-		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	req.auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
-	if (!nl80211_valid_auth_type(req.auth_type)) {
+	auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
+	if (!nl80211_valid_auth_type(auth_type)) {
 		err = -EINVAL;
 		goto out;
 	}
 
-	err = drv->ops->auth(&drv->wiphy, dev, &req);
+	err = cfg80211_mlme_auth(drv, dev, chan, auth_type, bssid,
+				 ssid, ssid_len, ie, ie_len);
 
 out:
 	cfg80211_put_dev(drv);
@@ -3183,26 +3182,29 @@ static int nl80211_crypto_settings(struct genl_info *info,
 
 static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
-	struct cfg80211_registered_device *drv;
+	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
-	struct cfg80211_assoc_request req;
-	struct wiphy *wiphy;
-	int err;
+	struct cfg80211_crypto_settings crypto;
+	struct ieee80211_channel *chan;
+	const u8 *bssid, *ssid, *ie = NULL;
+	int err, ssid_len, ie_len = 0;
+	bool use_mfp = false;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
 
 	if (!info->attrs[NL80211_ATTR_MAC] ||
-	    !info->attrs[NL80211_ATTR_SSID])
+	    !info->attrs[NL80211_ATTR_SSID] ||
+	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
 		return -EINVAL;
 
 	rtnl_lock();
 
-	err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
+	err = get_drv_dev_by_info_ifindex(info->attrs, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
-	if (!drv->ops->assoc) {
+	if (!rdev->ops->assoc) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
@@ -3217,46 +3219,42 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	wiphy = &drv->wiphy;
-	memset(&req, 0, sizeof(req));
-
-	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		req.chan = ieee80211_get_channel(
-			wiphy,
-			nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
-		if (!req.chan) {
-			err = -EINVAL;
-			goto out;
-		}
+	chan = ieee80211_get_channel(&rdev->wiphy,
+		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) {
+		err = -EINVAL;
+		goto out;
 	}
 
-	req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
-	req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+	ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
-		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
 	if (info->attrs[NL80211_ATTR_USE_MFP]) {
 		enum nl80211_mfp use_mfp =
 			nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
 		if (use_mfp == NL80211_MFP_REQUIRED)
-			req.use_mfp = true;
+			use_mfp = true;
 		else if (use_mfp != NL80211_MFP_NO) {
 			err = -EINVAL;
 			goto out;
 		}
 	}
 
-	err = nl80211_crypto_settings(info, &req.crypto);
+	err = nl80211_crypto_settings(info, &crypto);
 	if (!err)
-		err = drv->ops->assoc(&drv->wiphy, dev, &req);
+		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid,
+					  ssid_len, ie, ie_len, use_mfp,
+					  &crypto);
 
 out:
-	cfg80211_put_dev(drv);
+	cfg80211_put_dev(rdev);
 	dev_put(dev);
 unlock_rtnl:
 	rtnl_unlock();
@@ -3267,9 +3265,9 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
 	struct net_device *dev;
-	struct cfg80211_deauth_request req;
-	struct wiphy *wiphy;
-	int err;
+	const u8 *ie = NULL, *bssid;
+	int err, ie_len = 0;
+	u16 reason_code;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3301,24 +3299,21 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	wiphy = &drv->wiphy;
-	memset(&req, 0, sizeof(req));
-
-	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	req.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
-	if (req.reason_code == 0) {
+	reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+	if (reason_code == 0) {
 		/* Reason Code 0 is reserved */
 		err = -EINVAL;
 		goto out;
 	}
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
-		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = drv->ops->deauth(&drv->wiphy, dev, &req);
+	err = cfg80211_mlme_deauth(drv, dev, bssid, ie, ie_len, reason_code);
 
 out:
 	cfg80211_put_dev(drv);
@@ -3332,9 +3327,9 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *drv;
 	struct net_device *dev;
-	struct cfg80211_disassoc_request req;
-	struct wiphy *wiphy;
-	int err;
+	const u8 *ie = NULL, *bssid;
+	int err, ie_len = 0;
+	u16 reason_code;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3366,24 +3361,21 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	wiphy = &drv->wiphy;
-	memset(&req, 0, sizeof(req));
-
-	req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	req.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
-	if (req.reason_code == 0) {
+	reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
+	if (reason_code == 0) {
 		/* Reason Code 0 is reserved */
 		err = -EINVAL;
 		goto out;
 	}
 
 	if (info->attrs[NL80211_ATTR_IE]) {
-		req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
-		req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
+		ie = nla_data(info->attrs[NL80211_ATTR_IE]);
+		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = drv->ops->disassoc(&drv->wiphy, dev, &req);
+	err = cfg80211_mlme_disassoc(drv, dev, bssid, ie, ie_len, reason_code);
 
 out:
 	cfg80211_put_dev(drv);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 82b33e708488..925399462a79 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -70,6 +70,8 @@ static void bss_release(struct kref *ref)
 	if (bss->ies_allocated)
 		kfree(bss->pub.information_elements);
 
+	BUG_ON(atomic_read(&bss->hold));
+
 	kfree(bss);
 }
 
@@ -92,8 +94,9 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
 	bool expired = false;
 
 	list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
-		if (bss->hold ||
-		    !time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
+		if (atomic_read(&bss->hold))
+			continue;
+		if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
 			continue;
 		list_del(&bss->list);
 		rb_erase(&bss->rbn, &dev->bss_tree);
@@ -553,30 +556,6 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 }
 EXPORT_SYMBOL(cfg80211_unlink_bss);
 
-void cfg80211_hold_bss(struct cfg80211_bss *pub)
-{
-	struct cfg80211_internal_bss *bss;
-
-	if (!pub)
-		return;
-
-	bss = container_of(pub, struct cfg80211_internal_bss, pub);
-	bss->hold = true;
-}
-EXPORT_SYMBOL(cfg80211_hold_bss);
-
-void cfg80211_unhold_bss(struct cfg80211_bss *pub)
-{
-	struct cfg80211_internal_bss *bss;
-
-	if (!pub)
-		return;
-
-	bss = container_of(pub, struct cfg80211_internal_bss, pub);
-	bss->hold = false;
-}
-EXPORT_SYMBOL(cfg80211_unhold_bss);
-
 #ifdef CONFIG_WIRELESS_EXT
 int cfg80211_wext_siwscan(struct net_device *dev,
 			  struct iw_request_info *info,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index d4e0b4065cbc..412161f7b08e 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -103,44 +103,37 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 {
 	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
-	union {
-		struct cfg80211_auth_request auth_req;
-		struct cfg80211_assoc_request assoc_req;
-	} u;
-
-	memset(&u, 0, sizeof(u));
+	struct cfg80211_connect_params *params;
+	int err;
 
 	if (!wdev->conn)
 		return 0;
 
+	params = &wdev->conn->params;
+
 	switch (wdev->conn->state) {
 	case CFG80211_CONN_SCAN_AGAIN:
 		return cfg80211_conn_scan(wdev);
 	case CFG80211_CONN_AUTHENTICATE_NEXT:
-		u.auth_req.chan = wdev->conn->params.channel;
-		u.auth_req.peer_addr = wdev->conn->params.bssid;
-		u.auth_req.ssid = wdev->conn->params.ssid;
-		u.auth_req.ssid_len = wdev->conn->params.ssid_len;
-		u.auth_req.auth_type = wdev->conn->params.auth_type;
-		u.auth_req.ie = NULL;
-		u.auth_req.ie_len = 0;
-		wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
 		BUG_ON(!drv->ops->auth);
-		return drv->ops->auth(wdev->wiphy, wdev->netdev, &u.auth_req);
+		wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
+		return cfg80211_mlme_auth(drv, wdev->netdev,
+					  params->channel, params->auth_type,
+					  params->bssid,
+					  params->ssid, params->ssid_len,
+					  NULL, 0);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
-		u.assoc_req.chan = wdev->conn->params.channel;
-		u.assoc_req.peer_addr = wdev->conn->params.bssid;
-		u.assoc_req.ssid = wdev->conn->params.ssid;
-		u.assoc_req.ssid_len = wdev->conn->params.ssid_len;
-		u.assoc_req.ie = wdev->conn->params.ie;
-		u.assoc_req.ie_len = wdev->conn->params.ie_len;
-		u.assoc_req.use_mfp = false;
-		memcpy(&u.assoc_req.crypto, &wdev->conn->params.crypto,
-			sizeof(u.assoc_req.crypto));
-		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
 		BUG_ON(!drv->ops->assoc);
-		return drv->ops->assoc(wdev->wiphy, wdev->netdev,
-					&u.assoc_req);
+		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
+		err = cfg80211_mlme_assoc(drv, wdev->netdev,
+					  params->channel, params->bssid,
+					  params->ssid, params->ssid_len,
+					  params->ie, params->ie_len,
+					  false, &params->crypto);
+		if (err)
+			cfg80211_mlme_deauth(drv, wdev->netdev, params->bssid,
+					     NULL, 0, WLAN_REASON_DEAUTH_LEAVING);
+		return err;
 	default:
 		return 0;
 	}
@@ -186,7 +179,6 @@ static bool cfg80211_get_conn_bss(struct wireless_dev *wdev)
 			       wdev->conn->params.ssid_len,
 			       WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_PRIVACY,
 			       capa);
-
 	if (!bss)
 		return false;
 
@@ -264,9 +256,11 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 		}
 		wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
 		schedule_work(&rdev->conn_work);
-	} else if (status_code != WLAN_STATUS_SUCCESS)
+	} else if (status_code != WLAN_STATUS_SUCCESS) {
 		wdev->sme_state = CFG80211_SME_IDLE;
-	else if (wdev->sme_state == CFG80211_SME_CONNECTING &&
+		kfree(wdev->conn);
+		wdev->conn = NULL;
+	} else if (wdev->sme_state == CFG80211_SME_CONNECTING &&
 		 wdev->conn->state == CFG80211_CONN_AUTHENTICATING) {
 		wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
 		schedule_work(&rdev->conn_work);
@@ -330,10 +324,13 @@ static void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(wdev->current_bss);
+		cfg80211_put_bss(&wdev->current_bss->pub);
 		wdev->current_bss = NULL;
 	}
 
+	if (wdev->conn)
+		wdev->conn->state = CFG80211_CONN_IDLE;
+
 	if (status == WLAN_STATUS_SUCCESS) {
 		bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
 				       wdev->ssid, wdev->ssid_len,
@@ -343,16 +340,15 @@ static void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 		if (WARN_ON(!bss))
 			return;
 
-		cfg80211_hold_bss(bss);
-		wdev->current_bss = bss;
+		cfg80211_hold_bss(bss_from_pub(bss));
+		wdev->current_bss = bss_from_pub(bss);
 
 		wdev->sme_state = CFG80211_SME_CONNECTED;
 	} else {
 		wdev->sme_state = CFG80211_SME_IDLE;
+		kfree(wdev->conn);
+		wdev->conn = NULL;
 	}
-
-	if (wdev->conn)
-		wdev->conn->state = CFG80211_CONN_IDLE;
 }
 
 void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
@@ -387,7 +383,7 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
 	}
 
 	cfg80211_unhold_bss(wdev->current_bss);
-	cfg80211_put_bss(wdev->current_bss);
+	cfg80211_put_bss(&wdev->current_bss->pub);
 	wdev->current_bss = NULL;
 
 	bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
@@ -397,8 +393,8 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
 	if (WARN_ON(!bss))
 		return;
 
-	cfg80211_hold_bss(bss);
-	wdev->current_bss = bss;
+	cfg80211_hold_bss(bss_from_pub(bss));
+	wdev->current_bss = bss_from_pub(bss);
 
 	nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), dev, bssid,
 			    req_ie, req_ie_len, resp_ie, resp_ie_len, gfp);
@@ -440,7 +436,7 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
-		cfg80211_put_bss(wdev->current_bss);
+		cfg80211_put_bss(&wdev->current_bss->pub);
 	}
 
 	wdev->current_bss = NULL;
@@ -449,6 +445,8 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 	if (wdev->conn) {
 		kfree(wdev->conn->ie);
 		wdev->conn->ie = NULL;
+		kfree(wdev->conn);
+		wdev->conn = NULL;
 	}
 
 	nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev,
@@ -482,12 +480,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		if (!rdev->ops->auth || !rdev->ops->assoc)
 			return -EOPNOTSUPP;
 
-		if (!wdev->conn) {
-			wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
-			if (!wdev->conn)
-				return -ENOMEM;
-		} else
-			memset(wdev->conn, 0, sizeof(*wdev->conn));
+		if (WARN_ON(wdev->conn))
+			return -EINPROGRESS;
+
+		wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
+		if (!wdev->conn)
+			return -ENOMEM;
 
 		/*
 		 * Copy all parameters, and treat explicitly IEs, BSSID, SSID.
@@ -502,8 +500,11 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 			wdev->conn->ie = kmemdup(connect->ie, connect->ie_len,
 						GFP_KERNEL);
 			wdev->conn->params.ie = wdev->conn->ie;
-			if (!wdev->conn->ie)
+			if (!wdev->conn->ie) {
+				kfree(wdev->conn);
+				wdev->conn = NULL;
 				return -ENOMEM;
+			}
 		}
 
 		if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) {
@@ -543,8 +544,11 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 				wdev->conn->state = CFG80211_CONN_SCAN_AGAIN;
 			}
 		}
-		if (err)
+		if (err) {
+			kfree(wdev->conn);
+			wdev->conn = NULL;
 			wdev->sme_state = CFG80211_SME_IDLE;
+		}
 
 		return err;
 	} else {
@@ -572,31 +576,27 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 
 	if (!rdev->ops->disconnect) {
-		struct cfg80211_deauth_request deauth;
-		u8 bssid[ETH_ALEN];
+		if (!rdev->ops->deauth)
+			return -EOPNOTSUPP;
 
-		/* internal bug. */
-		if (WARN_ON(!wdev->conn))
-			return -EINVAL;
+		/* was it connected by userspace SME? */
+		if (!wdev->conn) {
+			cfg80211_mlme_down(rdev, dev);
+			return 0;
+		}
 
 		if (wdev->sme_state == CFG80211_SME_CONNECTING &&
 		    (wdev->conn->state == CFG80211_CONN_SCANNING ||
 		     wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)) {
 			wdev->sme_state = CFG80211_SME_IDLE;
+			kfree(wdev->conn);
+			wdev->conn = NULL;
 			return 0;
 		}
 
-		if (!rdev->ops->deauth)
-			return -EOPNOTSUPP;
-
-		memset(&deauth, 0, sizeof(deauth));
-
 		/* wdev->conn->params.bssid must be set if > SCANNING */
-		memcpy(bssid, wdev->conn->params.bssid, ETH_ALEN);
-		deauth.peer_addr = bssid;
-		deauth.reason_code = reason;
-
-		err = rdev->ops->deauth(&rdev->wiphy, dev, &deauth);
+		err = cfg80211_mlme_deauth(rdev, dev, wdev->conn->params.bssid,
+					   NULL, 0, reason);
 		if (err)
 			return err;
 	} else {
@@ -614,3 +614,33 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 
 	return 0;
 }
+
+void cfg80211_sme_disassoc(struct net_device *dev, int idx)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	u8 bssid[ETH_ALEN];
+
+	if (!wdev->conn)
+		return;
+
+	if (wdev->conn->state == CFG80211_CONN_IDLE)
+		return;
+
+	/*
+	 * Ok, so the association was made by this SME -- we don't
+	 * want it any more so deauthenticate too.
+	 */
+
+	if (!wdev->auth_bsses[idx])
+		return;
+
+	memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN);
+	if (cfg80211_mlme_deauth(rdev, dev, bssid,
+				 NULL, 0, WLAN_REASON_DEAUTH_LEAVING)) {
+		/* whatever -- assume gone anyway */
+		cfg80211_unhold_bss(wdev->auth_bsses[idx]);
+		cfg80211_put_bss(&wdev->auth_bsses[idx]->pub);
+		wdev->auth_bsses[idx] = NULL;
+	}
+}
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 3b531d572b69..fe1987acb891 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -93,7 +93,7 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
 		return -EINVAL;
 
 	if (wdev->current_bss)
-		chan = wdev->current_bss->channel;
+		chan = wdev->current_bss->pub.channel;
 	else if (wdev->wext.connect.channel)
 		chan = wdev->wext.connect.channel;
 
@@ -244,7 +244,7 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
 	ap_addr->sa_family = ARPHRD_ETHER;
 
 	if (wdev->current_bss)
-		memcpy(ap_addr->sa_data, wdev->current_bss->bssid, ETH_ALEN);
+		memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
 	else if (wdev->wext.connect.bssid)
 		memcpy(ap_addr->sa_data, wdev->wext.connect.bssid, ETH_ALEN);
 	else
-- 
cgit v1.2.3


From 1be491fca12ff599c37ceaf7e9042ebee9f0068e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 5 Jul 2009 14:51:06 +0200
Subject: rfkill: prep for rfkill API changes

We've designed the /dev/rfkill API in a way that we
can increase the event struct by adding members at
the end, should it become necessary. To validate the
events, userspace and the kernel need to have the
proper event size to check for -- when reading from
the other end they need to verify that it's at least
version 1 of the event API, with the current struct
size, so define a constant for that and make the
code a little more 'future proof'.

Not that I expect that we'll have to change the event
size any time soon, but it's better to write the code
in a way that lends itself to extending.

Due to the current size of the event struct, the code
is currently equivalent, but should the event struct
ever need to be increased the new code might not need
changing.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 14 ++++++++++++++
 net/rfkill/core.c      | 10 ++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 2ce29831feb6..f3d5812693d6 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -82,6 +82,20 @@ struct rfkill_event {
 	__u8  soft, hard;
 } __packed;
 
+/*
+ * We are planning to be backward and forward compatible with changes
+ * to the event struct, by adding new, optional, members at the end.
+ * When reading an event (whether the kernel from userspace or vice
+ * versa) we need to accept anything that's at least as large as the
+ * version 1 event size, but might be able to accept other sizes in
+ * the future.
+ *
+ * One exception is the kernel -- we already have two event sizes in
+ * that we've made the 'hard' member optional since our only option
+ * is to ignore it anyway.
+ */
+#define RFKILL_EVENT_SIZE_V1	8
+
 /* ioctl for turning off rfkill-input (if present) */
 #define RFKILL_IOC_MAGIC	'R'
 #define RFKILL_IOC_NOINPUT	1
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 79693fe2001e..47497c97c8d9 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1076,10 +1076,16 @@ static ssize_t rfkill_fop_write(struct file *file, const char __user *buf,
 	struct rfkill_event ev;
 
 	/* we don't need the 'hard' variable but accept it */
-	if (count < sizeof(ev) - 1)
+	if (count < RFKILL_EVENT_SIZE_V1 - 1)
 		return -EINVAL;
 
-	if (copy_from_user(&ev, buf, sizeof(ev) - 1))
+	/*
+	 * Copy as much data as we can accept into our 'ev' buffer,
+	 * but tell userspace how much we've copied so it can determine
+	 * our API version even in a write() call, if it cares.
+	 */
+	count = min(count, sizeof(ev));
+	if (copy_from_user(&ev, buf, count))
 		return -EFAULT;
 
 	if (ev.op != RFKILL_OP_CHANGE && ev.op != RFKILL_OP_CHANGE_ALL)
-- 
cgit v1.2.3


From 3e5d7649a64e558e4146ddfad4dfcf13fc65dd47 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Jul 2009 14:37:26 +0200
Subject: cfg80211: let SME control reassociation vs. association

Since we don't really know that well in the kernel,
let's let the SME control whether it wants to use
reassociation or not, by allowing it to give the
previous BSSID in the associate() parameters.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  5 +++++
 include/net/cfg80211.h  |  3 ++-
 net/mac80211/cfg.c      |  6 ++++++
 net/mac80211/mlme.c     |  7 -------
 net/wireless/core.h     |  3 ++-
 net/wireless/mlme.c     |  4 +++-
 net/wireless/nl80211.c  | 10 +++++++---
 net/wireless/sme.c      |  8 +++++++-
 8 files changed, 32 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b34c17f52f3e..e496a2daf7ef 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -564,6 +564,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_RESP_IE: (Re)association response information elements as
  *	sent by peer, for ROAM and successful CONNECT events.
  *
+ * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used by in ASSOCIATE
+ *	commands to specify using a reassociate frame
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -687,6 +690,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_REQ_IE,
 	NL80211_ATTR_RESP_IE,
 
+	NL80211_ATTR_PREV_BSSID,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ca986cc91098..71847d3c2640 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -664,10 +664,11 @@ struct cfg80211_auth_request {
  * @ie_len: Length of ie buffer in octets
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
  * @crypto: crypto settings
+ * @prev_bssid: previous BSSID, if not %NULL use reassociate frame
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
-	const u8 *ie;
+	const u8 *ie, *prev_bssid;
 	size_t ie_len;
 	struct cfg80211_crypto_settings crypto;
 	bool use_mfp;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0f29cd0580c9..e6d8860f26f2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1256,6 +1256,12 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 		sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
 	}
 
+	if (req->prev_bssid) {
+		sdata->u.mgd.flags |= IEEE80211_STA_PREV_BSSID_SET;
+		memcpy(sdata->u.mgd.prev_bssid, req->prev_bssid, ETH_ALEN);
+	} else
+		sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
+
 	if (req->crypto.control_port)
 		sdata->u.mgd.flags |= IEEE80211_STA_CONTROL_PORT;
 	else
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index aa1829ae431d..24486455e505 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -879,9 +879,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 		ieee80211_rx_bss_put(local, bss);
 	}
 
-	ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET;
-	memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN);
-
 	ifmgd->last_probe = jiffies;
 	ieee80211_led_assoc(local, 1);
 
@@ -1470,10 +1467,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
 	if (status_code != WLAN_STATUS_SUCCESS) {
 		printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
 		       sdata->dev->name, status_code);
-		/* if this was a reassociation, ensure we try a "full"
-		 * association next time. This works around some broken APs
-		 * which do not correctly reject reassociation requests. */
-		ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
 		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len,
 				       GFP_KERNEL);
 		/* Wait for SME to decide what to do next */
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 82918f5896a5..4554453c116a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -202,7 +202,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       const u8 *ie, int ie_len);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
-			const u8 *bssid, const u8 *ssid, int ssid_len,
+			const u8 *bssid, const u8 *prev_bssid,
+			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
 			struct cfg80211_crypto_settings *crypt);
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 020f33b38467..087d3377958f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -335,7 +335,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
-			const u8 *bssid, const u8 *ssid, int ssid_len,
+			const u8 *bssid, const u8 *prev_bssid,
+			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
 			struct cfg80211_crypto_settings *crypt)
 {
@@ -353,6 +354,7 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	req.ie_len = ie_len;
 	memcpy(&req.crypto, crypt, sizeof(req.crypto));
 	req.use_mfp = use_mfp;
+	req.prev_bssid = prev_bssid;
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
 				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
 	if (!req.bss)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 723512b48f2e..44c520c264fc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -71,6 +71,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
 
 	[NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
+	[NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN },
 
 	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
 				    .len = WLAN_MAX_KEY_LEN },
@@ -3187,7 +3188,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev;
 	struct cfg80211_crypto_settings crypto;
 	struct ieee80211_channel *chan;
-	const u8 *bssid, *ssid, *ie = NULL;
+	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
 
@@ -3248,10 +3249,13 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
+	if (info->attrs[NL80211_ATTR_PREV_BSSID])
+		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
+
 	err = nl80211_crypto_settings(info, &crypto, 1);
 	if (!err)
-		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, ssid,
-					  ssid_len, ie, ie_len, use_mfp,
+		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
+					  ssid, ssid_len, ie, ie_len, use_mfp,
 					  &crypto);
 
 out:
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 412161f7b08e..066a19ef9d73 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -125,8 +125,14 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!drv->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
+		/*
+		 * We could, later, implement roaming here and then actually
+		 * set prev_bssid to non-NULL. But then we need to be aware
+		 * that some APs don't like that -- so we'd need to retry
+		 * the association.
+		 */
 		err = cfg80211_mlme_assoc(drv, wdev->netdev,
-					  params->channel, params->bssid,
+					  params->channel, params->bssid, NULL,
 					  params->ssid, params->ssid_len,
 					  params->ie, params->ie_len,
 					  false, &params->crypto);
-- 
cgit v1.2.3


From c238c8ac63f2d33ea5e7c0b9e9e0ccd8ae9a34e4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Jul 2009 03:56:06 +0200
Subject: cfg80211: dont use union for wext

Otherwise it becomes very hard to reset the structs
correctly since wext can be configured while the
interface is down.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 71847d3c2640..fe49833242d7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1270,10 +1270,8 @@ struct wireless_dev {
 #ifdef CONFIG_WIRELESS_EXT
 	/* wext data */
 	struct {
-		union {
-			struct cfg80211_ibss_params ibss;
-			struct cfg80211_connect_params connect;
-		};
+		struct cfg80211_ibss_params ibss;
+		struct cfg80211_connect_params connect;
 		u8 *ie;
 		size_t ie_len;
 		u8 bssid[ETH_ALEN];
-- 
cgit v1.2.3


From cb0b4beb93d14429bf0c50fc1ab8e26348dca880 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Jul 2009 03:56:07 +0200
Subject: cfg80211: mlme API must be able to sleep

After the mac80211 mlme cleanup, we can require that
the MLME functions in cfg80211 can sleep. This will
simplify future work in cfg80211 a lot.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 30 ++++++++++++++---------------
 net/mac80211/mlme.c    | 26 +++++++++----------------
 net/wireless/mlme.c    | 51 ++++++++++++++++++++++++++++++++------------------
 3 files changed, 56 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index fe49833242d7..60c1f11da45f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1721,70 +1721,68 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *bss);
  * @dev: network device
  * @buf: authentication frame (header + body)
  * @len: length of the frame data
- * @gfp: allocation flags
  *
  * This function is called whenever an authentication has been processed in
  * station mode. The driver is required to call either this function or
  * cfg80211_send_auth_timeout() to indicate the result of cfg80211_ops::auth()
- * call.
+ * call. This function may sleep.
  */
-void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
  * cfg80211_send_auth_timeout - notification of timed out authentication
  * @dev: network device
  * @addr: The MAC address of the device with which the authentication timed out
- * @gfp: allocation flags
+ *
+ * This function may sleep.
  */
-void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp);
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr);
 
 /**
  * cfg80211_send_rx_assoc - notification of processed association
  * @dev: network device
  * @buf: (re)association response frame (header + body)
  * @len: length of the frame data
- * @gfp: allocation flags
  *
  * This function is called whenever a (re)association response has been
  * processed in station mode. The driver is required to call either this
  * function or cfg80211_send_assoc_timeout() to indicate the result of
- * cfg80211_ops::assoc() call.
+ * cfg80211_ops::assoc() call. This function may sleep.
  */
-void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
  * cfg80211_send_assoc_timeout - notification of timed out association
  * @dev: network device
  * @addr: The MAC address of the device with which the association timed out
- * @gfp: allocation flags
+ *
+ * This function may sleep.
  */
-void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp);
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr);
 
 /**
  * cfg80211_send_deauth - notification of processed deauthentication
  * @dev: network device
  * @buf: deauthentication frame (header + body)
  * @len: length of the frame data
- * @gfp: allocation flags
  *
  * This function is called whenever deauthentication has been processed in
  * station mode. This includes both received deauthentication frames and
- * locally generated ones.
+ * locally generated ones. This function may sleep.
  */
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
  * cfg80211_send_disassoc - notification of processed disassociation
  * @dev: network device
  * @buf: disassociation response frame (header + body)
  * @len: length of the frame data
- * @gfp: allocation flags
  *
  * This function is called whenever disassociation has been processed in
  * station mode. This includes both received disassociation frames and locally
- * generated ones.
+ * generated ones. This function may sleep.
  */
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp);
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
 
 /**
  * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 108e8c9c60fd..15dbb57ab55e 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -412,11 +412,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
 	if (stype == IEEE80211_STYPE_DEAUTH)
-		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len,
-				     GFP_KERNEL);
+		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len);
 	else
-		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len,
-				       GFP_KERNEL);
+		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len);
 	ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
@@ -1839,12 +1837,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			/* no action */
 			break;
 		case RX_MGMT_CFG80211_DEAUTH:
-			cfg80211_send_deauth(sdata->dev, (u8 *) mgmt,
-					     skb->len, GFP_KERNEL);
+			cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
 			break;
 		case RX_MGMT_CFG80211_DISASSOC:
-			cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt,
-					       skb->len, GFP_KERNEL);
+			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
 			break;
 		default:
 			WARN(1, "unexpected: %d", rma);
@@ -1893,12 +1889,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 		/* no action */
 		break;
 	case RX_MGMT_CFG80211_AUTH:
-		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, skb->len,
-				      GFP_KERNEL);
+		cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, skb->len);
 		break;
 	case RX_MGMT_CFG80211_ASSOC:
-		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, skb->len,
-				       GFP_KERNEL);
+		cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, skb->len);
 		break;
 	default:
 		WARN(1, "unexpected: %d", rma);
@@ -2026,13 +2020,11 @@ static void ieee80211_sta_work(struct work_struct *work)
 		switch (wk->tries) {
 		case RX_MGMT_CFG80211_AUTH_TO:
 			cfg80211_send_auth_timeout(sdata->dev,
-						   wk->bss->cbss.bssid,
-						   GFP_KERNEL);
+						   wk->bss->cbss.bssid);
 			break;
 		case RX_MGMT_CFG80211_ASSOC_TO:
-			cfg80211_send_auth_timeout(sdata->dev,
-						   wk->bss->cbss.bssid,
-						   GFP_KERNEL);
+			cfg80211_send_assoc_timeout(sdata->dev,
+						    wk->bss->cbss.bssid);
 			break;
 		default:
 			WARN(1, "unexpected: %d", wk->tries);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 087d3377958f..f7dc7524e145 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -12,7 +12,7 @@
 #include "core.h"
 #include "nl80211.h"
 
-void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
+void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -23,6 +23,8 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gf
 	u16 status = le16_to_cpu(mgmt->u.auth.status_code);
 	bool done = false;
 
+	might_sleep();
+
 	for (i = 0; i < MAX_AUTH_BSSES; i++) {
 		if (wdev->authtry_bsses[i] &&
 		    memcmp(wdev->authtry_bsses[i]->pub.bssid, bssid,
@@ -41,12 +43,12 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len, gf
 
 	WARN_ON(!done);
 
-	nl80211_send_rx_auth(rdev, dev, buf, len, gfp);
+	nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
 	cfg80211_sme_rx_auth(dev, buf, len);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_auth);
 
-void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
+void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 {
 	u16 status_code;
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -57,12 +59,14 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, g
 	int i, ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
 	bool done;
 
+	might_sleep();
+
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
-	nl80211_send_rx_assoc(rdev, dev, buf, len, gfp);
+	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL);
 
 	cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
-				status_code, gfp);
+				status_code, GFP_KERNEL);
 
 	if (status_code == WLAN_STATUS_SUCCESS) {
 		for (i = 0; wdev->current_bss && i < MAX_AUTH_BSSES; i++) {
@@ -80,7 +84,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len, g
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -90,7 +94,9 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp
 	int i;
 	bool done = false;
 
-	nl80211_send_deauth(rdev, dev, buf, len, gfp);
+	might_sleep();
+
+	nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
 
 	if (wdev->current_bss &&
 	    memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -132,16 +138,17 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len, gfp
 		reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
 		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
-		__cfg80211_disconnected(dev, gfp, NULL, 0,
+		__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0,
 					reason_code, from_ap);
 	} else if (wdev->sme_state == CFG80211_SME_CONNECTING) {
 		cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
 	}
 }
 EXPORT_SYMBOL(cfg80211_send_deauth);
 
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, gfp_t gfp)
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -153,7 +160,9 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, g
 	bool from_ap;
 	bool done = false;
 
-	nl80211_send_disassoc(rdev, dev, buf, len, gfp);
+	might_sleep();
+
+	nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL);
 
 	if (!wdev->sme_state == CFG80211_SME_CONNECTED)
 		return;
@@ -177,12 +186,12 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len, g
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
-	__cfg80211_disconnected(dev, gfp, NULL, 0,
+	__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0,
 				reason_code, from_ap);
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
-void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
+void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -190,10 +199,13 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gf
 	int i;
 	bool done = false;
 
-	nl80211_send_auth_timeout(rdev, dev, addr, gfp);
+	might_sleep();
+
+	nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
 		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
 
 	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
 		if (wdev->authtry_bsses[i] &&
@@ -211,7 +223,7 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr, gfp_t gf
 }
 EXPORT_SYMBOL(cfg80211_send_auth_timeout);
 
-void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t gfp)
+void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -219,10 +231,13 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr, gfp_t g
 	int i;
 	bool done = false;
 
-	nl80211_send_assoc_timeout(rdev, dev, addr, gfp);
+	might_sleep();
+
+	nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
 		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE, gfp);
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					GFP_KERNEL);
 
 	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
 		if (wdev->auth_bsses[i] &&
-- 
cgit v1.2.3


From 667503ddcb96f3b10211f997fe55907fa7509841 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 7 Jul 2009 03:56:11 +0200
Subject: cfg80211: fix locking

Over time, a lot of locking issues have crept into
the smarts of cfg80211, so e.g. scan completion can
race against a new scan, IBSS join can race against
leaving an IBSS, etc.

Introduce a new per-interface lock that protects
most of the per-interface data that we need to keep
track of, and sprinkle assertions about that lock
everywhere. Some things now need to be offloaded to
work structs so that we don't require being able to
sleep in functions the drivers call. The exception
to that are the MLME callbacks (rx_auth etc.) that
currently only mac80211 calls because it was easier
to do that there instead of in cfg80211, and future
drivers implementing those calls will, if they ever
exist, probably need to use a similar scheme like
mac80211 anyway...

In order to be able to handle _deauth and _disassoc
properly, introduce a cookie passed to it that will
determine locking requirements.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  24 ++++-
 net/mac80211/cfg.c         |  12 ++-
 net/mac80211/ieee80211_i.h |   6 +-
 net/mac80211/mlme.c        |  25 +++--
 net/wireless/core.c        |  92 +++++++++++++++--
 net/wireless/core.h        | 100 +++++++++++++++++-
 net/wireless/ibss.c        | 133 +++++++++++++++++++-----
 net/wireless/mlme.c        | 214 +++++++++++++++++++++++++++++---------
 net/wireless/nl80211.c     |   8 +-
 net/wireless/nl80211.h     |   2 +-
 net/wireless/scan.c        |  30 +++++-
 net/wireless/sme.c         | 252 +++++++++++++++++++++++++++++++++++----------
 net/wireless/wext-sme.c    | 125 +++++++++++++++-------
 13 files changed, 823 insertions(+), 200 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 60c1f11da45f..83c2c727d71e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -555,6 +555,7 @@ struct cfg80211_scan_request {
 	/* internal */
 	struct wiphy *wiphy;
 	int ifidx;
+	bool aborted;
 };
 
 /**
@@ -998,9 +999,11 @@ struct cfg80211_ops {
 	int	(*assoc)(struct wiphy *wiphy, struct net_device *dev,
 			 struct cfg80211_assoc_request *req);
 	int	(*deauth)(struct wiphy *wiphy, struct net_device *dev,
-			  struct cfg80211_deauth_request *req);
+			  struct cfg80211_deauth_request *req,
+			  void *cookie);
 	int	(*disassoc)(struct wiphy *wiphy, struct net_device *dev,
-			    struct cfg80211_disassoc_request *req);
+			    struct cfg80211_disassoc_request *req,
+			    void *cookie);
 
 	int	(*connect)(struct wiphy *wiphy, struct net_device *dev,
 			   struct cfg80211_connect_params *sme);
@@ -1249,10 +1252,12 @@ struct wireless_dev {
 	struct wiphy *wiphy;
 	enum nl80211_iftype iftype;
 
-	/* private to the generic wireless code */
+	/* the remainder of this struct should be private to cfg80211 */
 	struct list_head list;
 	struct net_device *netdev;
 
+	struct mutex mtx;
+
 	/* currently used for IBSS and SME - might be rearranged later */
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 ssid_len;
@@ -1263,6 +1268,9 @@ struct wireless_dev {
 	} sme_state;
 	struct cfg80211_conn *conn;
 
+	struct list_head event_list;
+	spinlock_t event_lock;
+
 	struct cfg80211_internal_bss *authtry_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *current_bss; /* associated / joined */
@@ -1765,24 +1773,30 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr);
  * @dev: network device
  * @buf: deauthentication frame (header + body)
  * @len: length of the frame data
+ * @cookie: cookie from ->deauth if called within that callback,
+ *	%NULL otherwise
  *
  * This function is called whenever deauthentication has been processed in
  * station mode. This includes both received deauthentication frames and
  * locally generated ones. This function may sleep.
  */
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len,
+			  void *cookie);
 
 /**
  * cfg80211_send_disassoc - notification of processed disassociation
  * @dev: network device
  * @buf: disassociation response frame (header + body)
  * @len: length of the frame data
+ * @cookie: cookie from ->disassoc if called within that callback,
+ *	%NULL otherwise
  *
  * This function is called whenever disassociation has been processed in
  * station mode. This includes both received disassociation frames and locally
  * generated ones. This function may sleep.
  */
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len);
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len,
+			    void *cookie);
 
 /**
  * cfg80211_michael_mic_failure - notification of Michael MIC failure (TKIP)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7cfc14e4ca07..36f8f245fa4c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1182,15 +1182,19 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev,
-			    struct cfg80211_deauth_request *req)
+			    struct cfg80211_deauth_request *req,
+			    void *cookie)
 {
-	return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev), req);
+	return ieee80211_mgd_deauth(IEEE80211_DEV_TO_SUB_IF(dev),
+				    req, cookie);
 }
 
 static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
-			      struct cfg80211_disassoc_request *req)
+			      struct cfg80211_disassoc_request *req,
+			      void *cookie)
 {
-	return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev), req);
+	return ieee80211_mgd_disassoc(IEEE80211_DEV_TO_SUB_IF(dev),
+				      req, cookie);
 }
 
 static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2e92bbd9b2d0..327aabc07abe 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -918,9 +918,11 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req);
 int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
-			 struct cfg80211_deauth_request *req);
+			 struct cfg80211_deauth_request *req,
+			 void *cookie);
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
-			   struct cfg80211_disassoc_request *req);
+			   struct cfg80211_disassoc_request *req,
+			   void *cookie);
 ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 					  struct sk_buff *skb);
 void ieee80211_send_pspoll(struct ieee80211_local *local,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 15dbb57ab55e..c9db9646025b 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -386,7 +386,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 
 static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
-					   const u8 *bssid, u16 stype, u16 reason)
+					   const u8 *bssid, u16 stype, u16 reason,
+					   void *cookie)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -412,9 +413,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	mgmt->u.deauth.reason_code = cpu_to_le16(reason);
 
 	if (stype == IEEE80211_STYPE_DEAUTH)
-		cfg80211_send_deauth(sdata->dev, (u8 *) mgmt, skb->len);
+		cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len, cookie);
 	else
-		cfg80211_send_disassoc(sdata->dev, (u8 *) mgmt, skb->len);
+		cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len, cookie);
 	ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
 }
 
@@ -1837,10 +1838,12 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 			/* no action */
 			break;
 		case RX_MGMT_CFG80211_DEAUTH:
-			cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
+			cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len,
+					     NULL);
 			break;
 		case RX_MGMT_CFG80211_DISASSOC:
-			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
+			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len,
+					       NULL);
 			break;
 		default:
 			WARN(1, "unexpected: %d", rma);
@@ -2273,7 +2276,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 }
 
 int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
-			 struct cfg80211_deauth_request *req)
+			 struct cfg80211_deauth_request *req,
+			 void *cookie)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_mgd_work *wk;
@@ -2305,13 +2309,15 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 
 	ieee80211_send_deauth_disassoc(sdata, bssid,
-			IEEE80211_STYPE_DEAUTH, req->reason_code);
+			IEEE80211_STYPE_DEAUTH, req->reason_code,
+			cookie);
 
 	return 0;
 }
 
 int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
-			   struct cfg80211_disassoc_request *req)
+			   struct cfg80211_disassoc_request *req,
+			   void *cookie)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 
@@ -2331,6 +2337,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
-			IEEE80211_STYPE_DISASSOC, req->reason_code);
+			IEEE80211_STYPE_DISASSOC, req->reason_code,
+			cookie);
 	return 0;
 }
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c6813beded06..9c73769440a8 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -257,6 +257,71 @@ static void cfg80211_rfkill_sync_work(struct work_struct *work)
 	cfg80211_rfkill_set_block(drv, rfkill_blocked(drv->rfkill));
 }
 
+static void cfg80211_process_events(struct wireless_dev *wdev)
+{
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	while (!list_empty(&wdev->event_list)) {
+		ev = list_first_entry(&wdev->event_list,
+				      struct cfg80211_event, list);
+		list_del(&ev->list);
+		spin_unlock_irqrestore(&wdev->event_lock, flags);
+
+		wdev_lock(wdev);
+		switch (ev->type) {
+		case EVENT_CONNECT_RESULT:
+			__cfg80211_connect_result(
+				wdev->netdev, ev->cr.bssid,
+				ev->cr.req_ie, ev->cr.req_ie_len,
+				ev->cr.resp_ie, ev->cr.resp_ie_len,
+				ev->cr.status,
+				ev->cr.status == WLAN_STATUS_SUCCESS);
+			break;
+		case EVENT_ROAMED:
+			__cfg80211_roamed(wdev, ev->rm.bssid,
+					  ev->rm.req_ie, ev->rm.req_ie_len,
+					  ev->rm.resp_ie, ev->rm.resp_ie_len);
+			break;
+		case EVENT_DISCONNECTED:
+			__cfg80211_disconnected(wdev->netdev,
+						ev->dc.ie, ev->dc.ie_len,
+						ev->dc.reason, true);
+			break;
+		case EVENT_IBSS_JOINED:
+			__cfg80211_ibss_joined(wdev->netdev, ev->ij.bssid);
+			break;
+		}
+		wdev_unlock(wdev);
+
+		kfree(ev);
+
+		spin_lock_irqsave(&wdev->event_lock, flags);
+	}
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+}
+
+static void cfg80211_event_work(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			    event_work);
+
+	rtnl_lock();
+	cfg80211_lock_rdev(rdev);
+	mutex_lock(&rdev->devlist_mtx);
+
+	list_for_each_entry(wdev, &rdev->netdev_list, list)
+		cfg80211_process_events(wdev);
+
+	mutex_unlock(&rdev->devlist_mtx);
+	cfg80211_unlock_rdev(rdev);
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
@@ -299,6 +364,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	INIT_LIST_HEAD(&drv->netdev_list);
 	spin_lock_init(&drv->bss_lock);
 	INIT_LIST_HEAD(&drv->bss_list);
+	INIT_WORK(&drv->scan_done_wk, __cfg80211_scan_done);
 
 	device_initialize(&drv->wiphy.dev);
 	drv->wiphy.dev.class = &ieee80211_class;
@@ -316,6 +382,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 
 	INIT_WORK(&drv->rfkill_sync, cfg80211_rfkill_sync_work);
 	INIT_WORK(&drv->conn_work, cfg80211_conn_work);
+	INIT_WORK(&drv->event_work, cfg80211_event_work);
 
 	/*
 	 * Initialize wiphy parameters to IEEE 802.11 MIB default values.
@@ -477,6 +544,9 @@ void wiphy_unregister(struct wiphy *wiphy)
 	mutex_unlock(&drv->mtx);
 
 	cancel_work_sync(&drv->conn_work);
+	cancel_work_sync(&drv->scan_done_wk);
+	kfree(drv->scan_req);
+	flush_work(&drv->event_work);
 
 	cfg80211_debugfs_drv_del(drv);
 
@@ -535,6 +605,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 
 	switch (state) {
 	case NETDEV_REGISTER:
+		mutex_init(&wdev->mtx);
+		INIT_LIST_HEAD(&wdev->event_list);
+		spin_lock_init(&wdev->event_lock);
 		mutex_lock(&rdev->devlist_mtx);
 		list_add(&wdev->list, &rdev->netdev_list);
 		if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
@@ -566,15 +639,17 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			cfg80211_leave_ibss(rdev, dev, true);
 			break;
 		case NL80211_IFTYPE_STATION:
+			wdev_lock(wdev);
 #ifdef CONFIG_WIRELESS_EXT
 			kfree(wdev->wext.ie);
 			wdev->wext.ie = NULL;
 			wdev->wext.ie_len = 0;
 			wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
 #endif
-			cfg80211_disconnect(rdev, dev,
-					    WLAN_REASON_DEAUTH_LEAVING, true);
+			__cfg80211_disconnect(rdev, dev,
+					      WLAN_REASON_DEAUTH_LEAVING, true);
 			cfg80211_mlme_down(rdev, dev);
+			wdev_unlock(wdev);
 			break;
 		default:
 			break;
@@ -582,20 +657,24 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		break;
 	case NETDEV_UP:
 #ifdef CONFIG_WIRELESS_EXT
+		cfg80211_lock_rdev(rdev);
+		wdev_lock(wdev);
 		switch (wdev->iftype) {
 		case NL80211_IFTYPE_ADHOC:
 			if (wdev->wext.ibss.ssid_len)
-				cfg80211_join_ibss(rdev, dev,
-						   &wdev->wext.ibss);
+				__cfg80211_join_ibss(rdev, dev,
+						     &wdev->wext.ibss);
 			break;
 		case NL80211_IFTYPE_STATION:
 			if (wdev->wext.connect.ssid_len)
-				cfg80211_connect(rdev, dev,
-						 &wdev->wext.connect);
+				__cfg80211_connect(rdev, dev,
+						   &wdev->wext.connect);
 			break;
 		default:
 			break;
 		}
+		wdev_unlock(wdev);
+		cfg80211_unlock_rdev(rdev);
 #endif
 		break;
 	case NETDEV_UNREGISTER:
@@ -605,6 +684,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			list_del_init(&wdev->list);
 		}
 		mutex_unlock(&rdev->devlist_mtx);
+		mutex_destroy(&wdev->mtx);
 		break;
 	case NETDEV_PRE_UP:
 		if (rfkill_blocked(rdev->rfkill))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 92da612b3f9e..5ccd642e183b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -57,12 +57,14 @@ struct cfg80211_registered_device {
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	unsigned long suspend_at;
+	struct work_struct scan_done_wk;
 
 #ifdef CONFIG_NL80211_TESTMODE
 	struct genl_info *testmode_info;
 #endif
 
 	struct work_struct conn_work;
+	struct work_struct event_work;
 
 #ifdef CONFIG_CFG80211_DEBUGFS
 	/* Debugfs entries */
@@ -170,12 +172,73 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
 extern struct cfg80211_registered_device *
 cfg80211_get_dev_from_ifindex(int ifindex);
 
+static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *drv)
+{
+	mutex_lock(&drv->mtx);
+}
+
 static inline void cfg80211_unlock_rdev(struct cfg80211_registered_device *drv)
 {
 	BUG_ON(IS_ERR(drv) || !drv);
 	mutex_unlock(&drv->mtx);
 }
 
+static inline void wdev_lock(struct wireless_dev *wdev)
+	__acquires(wdev)
+{
+	mutex_lock(&wdev->mtx);
+	__acquire(wdev->mtx);
+}
+
+static inline void wdev_unlock(struct wireless_dev *wdev)
+	__releases(wdev)
+{
+	__release(wdev->mtx);
+	mutex_unlock(&wdev->mtx);
+}
+
+#define ASSERT_RDEV_LOCK(rdev) WARN_ON(!mutex_is_locked(&(rdev)->mtx));
+#define ASSERT_WDEV_LOCK(wdev) WARN_ON(!mutex_is_locked(&(wdev)->mtx));
+
+enum cfg80211_event_type {
+	EVENT_CONNECT_RESULT,
+	EVENT_ROAMED,
+	EVENT_DISCONNECTED,
+	EVENT_IBSS_JOINED,
+};
+
+struct cfg80211_event {
+	struct list_head list;
+	enum cfg80211_event_type type;
+
+	union {
+		struct {
+			u8 bssid[ETH_ALEN];
+			const u8 *req_ie;
+			const u8 *resp_ie;
+			size_t req_ie_len;
+			size_t resp_ie_len;
+			u16 status;
+		} cr;
+		struct {
+			u8 bssid[ETH_ALEN];
+			const u8 *req_ie;
+			const u8 *resp_ie;
+			size_t req_ie_len;
+			size_t resp_ie_len;
+		} rm;
+		struct {
+			const u8 *ie;
+			size_t ie_len;
+			u16 reason;
+		} dc;
+		struct {
+			u8 bssid[ETH_ALEN];
+		} ij;
+	};
+};
+
+
 /* free object */
 extern void cfg80211_dev_free(struct cfg80211_registered_device *drv);
 
@@ -191,25 +254,46 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev,
                       unsigned long age_secs);
 
 /* IBSS */
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct cfg80211_ibss_params *params);
 int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
 		       struct cfg80211_ibss_params *params);
 void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, bool nowext);
+void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid);
 
 /* MLME */
+int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_auth_type auth_type,
+			 const u8 *bssid,
+			 const u8 *ssid, int ssid_len,
+			 const u8 *ie, int ie_len);
 int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len);
+int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev,
+			  struct ieee80211_channel *chan,
+			  const u8 *bssid, const u8 *prev_bssid,
+			  const u8 *ssid, int ssid_len,
+			  const u8 *ie, int ie_len, bool use_mfp,
+			  struct cfg80211_crypto_settings *crypt);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
 			const u8 *bssid, const u8 *prev_bssid,
 			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
 			struct cfg80211_crypto_settings *crypt);
+int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
+			   struct net_device *dev, const u8 *bssid,
+			   const u8 *ie, int ie_len, u16 reason);
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
 			 const u8 *ie, int ie_len, u16 reason);
@@ -218,24 +302,38 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   const u8 *ie, int ie_len, u16 reason);
 void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
+void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+			       const u8 *req_ie, size_t req_ie_len,
+			       const u8 *resp_ie, size_t resp_ie_len,
+			       u16 status, bool wextev);
 
 /* SME */
+int __cfg80211_connect(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       struct cfg80211_connect_params *connect);
 int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
 		     struct cfg80211_connect_params *connect);
+int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev, u16 reason,
+			  bool wextev);
 int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, u16 reason,
 			bool wextev);
+void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
+		       const u8 *req_ie, size_t req_ie_len,
+		       const u8 *resp_ie, size_t resp_ie_len);
 
 void cfg80211_conn_work(struct work_struct *work);
 
 /* internal helpers */
 int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 				   const u8 *mac_addr);
-void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
+void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap);
 void cfg80211_sme_scan_done(struct net_device *dev);
 void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 void cfg80211_sme_disassoc(struct net_device *dev, int idx);
+void __cfg80211_scan_done(struct work_struct *wk);
 
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index a5330c5a5477..99ef9364b7e8 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -10,7 +10,7 @@
 #include "nl80211.h"
 
 
-void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
+void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_bss *bss;
@@ -39,22 +39,45 @@ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
-	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid, gfp);
+	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
+				GFP_KERNEL);
 #ifdef CONFIG_WIRELESS_EXT
 	memset(&wrqu, 0, sizeof(wrqu));
 	memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 #endif
 }
+
+void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	ev = kzalloc(sizeof(*ev), gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_IBSS_JOINED;
+	memcpy(ev->cr.bssid, bssid, ETH_ALEN);
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	schedule_work(&rdev->event_work);
+}
 EXPORT_SYMBOL(cfg80211_ibss_joined);
 
-int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev,
-		       struct cfg80211_ibss_params *params)
+int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct cfg80211_ibss_params *params)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->ssid_len)
 		return -EALREADY;
 
@@ -72,10 +95,26 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
-void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
+int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       struct cfg80211_ibss_params *params)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_join_ibss(rdev, dev, params);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
+static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
 		cfg80211_put_bss(&wdev->current_bss->pub);
@@ -89,12 +128,23 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 #endif
 }
 
-int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, bool nowext)
+void cfg80211_clear_ibss(struct net_device *dev, bool nowext)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	wdev_lock(wdev);
+	__cfg80211_clear_ibss(dev, nowext);
+	wdev_unlock(wdev);
+}
+
+static int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
+				 struct net_device *dev, bool nowext)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (!wdev->ssid_len)
 		return -ENOLINK;
 
@@ -103,11 +153,24 @@ int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 	if (err)
 		return err;
 
-	cfg80211_clear_ibss(dev, nowext);
+	__cfg80211_clear_ibss(dev, nowext);
 
 	return 0;
 }
 
+int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
+			struct net_device *dev, bool nowext)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_leave_ibss(rdev, dev, nowext);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
 #ifdef CONFIG_WIRELESS_EXT
 static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 				   struct wireless_dev *wdev)
@@ -184,12 +247,15 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 	if (wdev->wext.ibss.channel == chan)
 		return 0;
 
-	if (wdev->ssid_len) {
-		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
-					  dev, true);
-		if (err)
-			return err;
-	}
+	wdev_lock(wdev);
+	err = 0;
+	if (wdev->ssid_len)
+		err = __cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
+					    dev, true);
+	wdev_unlock(wdev);
+
+	if (err)
+		return err;
 
 	if (chan) {
 		wdev->wext.ibss.channel = chan;
@@ -215,10 +281,12 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC))
 		return -EINVAL;
 
+	wdev_lock(wdev);
 	if (wdev->current_bss)
 		chan = wdev->current_bss->pub.channel;
 	else if (wdev->wext.ibss.channel)
 		chan = wdev->wext.ibss.channel;
+	wdev_unlock(wdev);
 
 	if (chan) {
 		freq->m = chan->center_freq;
@@ -247,12 +315,15 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 	if (!wiphy_to_dev(wdev->wiphy)->ops->join_ibss)
 		return -EOPNOTSUPP;
 
-	if (wdev->ssid_len) {
-		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
-					  dev, true);
-		if (err)
-			return err;
-	}
+	wdev_lock(wdev);
+	err = 0;
+	if (wdev->ssid_len)
+		err = __cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
+					    dev, true);
+	wdev_unlock(wdev);
+
+	if (err)
+		return err;
 
 	/* iwconfig uses nul termination in SSID.. */
 	if (len > 0 && ssid[len - 1] == '\0')
@@ -279,6 +350,7 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 
 	data->flags = 0;
 
+	wdev_lock(wdev);
 	if (wdev->ssid_len) {
 		data->flags = 1;
 		data->length = wdev->ssid_len;
@@ -288,6 +360,7 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 		data->length = wdev->wext.ibss.ssid_len;
 		memcpy(ssid, wdev->wext.ibss.ssid, data->length);
 	}
+	wdev_unlock(wdev);
 
 	return 0;
 }
@@ -325,12 +398,15 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 	    compare_ether_addr(bssid, wdev->wext.ibss.bssid) == 0)
 		return 0;
 
-	if (wdev->ssid_len) {
-		err = cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
-					  dev, true);
-		if (err)
-			return err;
-	}
+	wdev_lock(wdev);
+	err = 0;
+	if (wdev->ssid_len)
+		err = __cfg80211_leave_ibss(wiphy_to_dev(wdev->wiphy),
+					    dev, true);
+	wdev_unlock(wdev);
+
+	if (err)
+		return err;
 
 	if (bssid) {
 		memcpy(wdev->wext.bssid, bssid, ETH_ALEN);
@@ -355,10 +431,13 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 
 	ap_addr->sa_family = ARPHRD_ETHER;
 
+	wdev_lock(wdev);
 	if (wdev->current_bss)
 		memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
 	else
 		memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN);
+	wdev_unlock(wdev);
+
 	return 0;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 960bf60e44e5..1b2ca1fea7a1 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -23,7 +23,7 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 	u16 status = le16_to_cpu(mgmt->u.auth.status_code);
 	bool done = false;
 
-	might_sleep();
+	wdev_lock(wdev);
 
 	for (i = 0; i < MAX_AUTH_BSSES; i++) {
 		if (wdev->authtry_bsses[i] &&
@@ -45,6 +45,8 @@ void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 
 	nl80211_send_rx_auth(rdev, dev, buf, len, GFP_KERNEL);
 	cfg80211_sme_rx_auth(dev, buf, len);
+
+	wdev_unlock(wdev);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_auth);
 
@@ -59,14 +61,15 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 	int i, ieoffs = offsetof(struct ieee80211_mgmt, u.assoc_resp.variable);
 	bool done;
 
-	might_sleep();
+	wdev_lock(wdev);
 
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
 	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL);
 
-	cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
-				status_code, GFP_KERNEL);
+	__cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, ie, len - ieoffs,
+				  status_code,
+				  status_code == WLAN_STATUS_SUCCESS);
 
 	if (status_code == WLAN_STATUS_SUCCESS) {
 		for (i = 0; wdev->current_bss && i < MAX_AUTH_BSSES; i++) {
@@ -81,10 +84,13 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 
 		WARN_ON(!done);
 	}
+
+	wdev_unlock(wdev);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
 
-void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
+static void __cfg80211_send_deauth(struct net_device *dev,
+				   const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -94,7 +100,7 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
 	int i;
 	bool done = false;
 
-	might_sleep();
+	ASSERT_WDEV_LOCK(wdev);
 
 	nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
 
@@ -132,17 +138,35 @@ void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len)
 		reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
 
 		from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
-		__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0,
-					reason_code, from_ap);
+		__cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap);
 	} else if (wdev->sme_state == CFG80211_SME_CONNECTING) {
-		cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE,
-					GFP_KERNEL);
+		__cfg80211_connect_result(dev, mgmt->bssid, NULL, 0, NULL, 0,
+					  WLAN_STATUS_UNSPECIFIED_FAILURE,
+					  false);
+	}
+}
+
+
+void cfg80211_send_deauth(struct net_device *dev, const u8 *buf, size_t len,
+			  void *cookie)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	BUG_ON(cookie && wdev != cookie);
+
+	if (cookie) {
+		/* called within callback */
+		__cfg80211_send_deauth(dev, buf, len);
+	} else {
+		wdev_lock(wdev);
+		__cfg80211_send_deauth(dev, buf, len);
+		wdev_unlock(wdev);
 	}
 }
 EXPORT_SYMBOL(cfg80211_send_deauth);
 
-void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
+static void __cfg80211_send_disassoc(struct net_device *dev,
+				     const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -154,12 +178,12 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 	bool from_ap;
 	bool done = false;
 
-	might_sleep();
+	wdev_lock(wdev);
 
 	nl80211_send_disassoc(rdev, dev, buf, len, GFP_KERNEL);
 
 	if (!wdev->sme_state == CFG80211_SME_CONNECTED)
-		return;
+		goto out;
 
 	if (wdev->current_bss &&
 	    memcmp(wdev->current_bss, bssid, ETH_ALEN) == 0) {
@@ -180,8 +204,26 @@ void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len)
 	reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	from_ap = memcmp(mgmt->da, dev->dev_addr, ETH_ALEN) == 0;
-	__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0,
-				reason_code, from_ap);
+	__cfg80211_disconnected(dev, NULL, 0, reason_code, from_ap);
+ out:
+	wdev_unlock(wdev);
+}
+
+void cfg80211_send_disassoc(struct net_device *dev, const u8 *buf, size_t len,
+			    void *cookie)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	BUG_ON(cookie && wdev != cookie);
+
+	if (cookie) {
+		/* called within callback */
+		__cfg80211_send_disassoc(dev, buf, len);
+	} else {
+		wdev_lock(wdev);
+		__cfg80211_send_disassoc(dev, buf, len);
+		wdev_unlock(wdev);
+	}
 }
 EXPORT_SYMBOL(cfg80211_send_disassoc);
 
@@ -193,13 +235,13 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
 	int i;
 	bool done = false;
 
-	might_sleep();
+	wdev_lock(wdev);
 
 	nl80211_send_auth_timeout(rdev, dev, addr, GFP_KERNEL);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
-		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE,
-					GFP_KERNEL);
+		__cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
+					  WLAN_STATUS_UNSPECIFIED_FAILURE,
+					  false);
 
 	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
 		if (wdev->authtry_bsses[i] &&
@@ -214,6 +256,8 @@ void cfg80211_send_auth_timeout(struct net_device *dev, const u8 *addr)
 	}
 
 	WARN_ON(!done);
+
+	wdev_unlock(wdev);
 }
 EXPORT_SYMBOL(cfg80211_send_auth_timeout);
 
@@ -225,13 +269,13 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr)
 	int i;
 	bool done = false;
 
-	might_sleep();
+	wdev_lock(wdev);
 
 	nl80211_send_assoc_timeout(rdev, dev, addr, GFP_KERNEL);
 	if (wdev->sme_state == CFG80211_SME_CONNECTING)
-		cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
-					WLAN_STATUS_UNSPECIFIED_FAILURE,
-					GFP_KERNEL);
+		__cfg80211_connect_result(dev, addr, NULL, 0, NULL, 0,
+					  WLAN_STATUS_UNSPECIFIED_FAILURE,
+					  false);
 
 	for (i = 0; addr && i < MAX_AUTH_BSSES; i++) {
 		if (wdev->auth_bsses[i] &&
@@ -246,6 +290,8 @@ void cfg80211_send_assoc_timeout(struct net_device *dev, const u8 *addr)
 	}
 
 	WARN_ON(!done);
+
+	wdev_unlock(wdev);
 }
 EXPORT_SYMBOL(cfg80211_send_assoc_timeout);
 
@@ -276,17 +322,21 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr,
 EXPORT_SYMBOL(cfg80211_michael_mic_failure);
 
 /* some MLME handling for userspace SME */
-int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
-		       struct net_device *dev, struct ieee80211_channel *chan,
-		       enum nl80211_auth_type auth_type, const u8 *bssid,
-		       const u8 *ssid, int ssid_len,
-		       const u8 *ie, int ie_len)
+int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev,
+			 struct ieee80211_channel *chan,
+			 enum nl80211_auth_type auth_type,
+			 const u8 *bssid,
+			 const u8 *ssid, int ssid_len,
+			 const u8 *ie, int ie_len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_auth_request req;
 	struct cfg80211_internal_bss *bss;
 	int i, err, slot = -1, nfree = 0;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->current_bss &&
 	    memcmp(bssid, wdev->current_bss->pub.bssid, ETH_ALEN) == 0)
 		return -EALREADY;
@@ -342,18 +392,37 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
-int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, struct ieee80211_channel *chan,
-			const u8 *bssid, const u8 *prev_bssid,
-			const u8 *ssid, int ssid_len,
-			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt)
+int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev, struct ieee80211_channel *chan,
+		       enum nl80211_auth_type auth_type, const u8 *bssid,
+		       const u8 *ssid, int ssid_len,
+		       const u8 *ie, int ie_len)
+{
+	int err;
+
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
+				   ssid, ssid_len, ie, ie_len);
+	wdev_unlock(dev->ieee80211_ptr);
+
+	return err;
+}
+
+int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev,
+			  struct ieee80211_channel *chan,
+			  const u8 *bssid, const u8 *prev_bssid,
+			  const u8 *ssid, int ssid_len,
+			  const u8 *ie, int ie_len, bool use_mfp,
+			  struct cfg80211_crypto_settings *crypt)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_assoc_request req;
 	struct cfg80211_internal_bss *bss;
 	int i, err, slot = -1;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	memset(&req, 0, sizeof(req));
 
 	if (wdev->current_bss)
@@ -390,14 +459,35 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
-int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
-			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason)
+int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
+			struct net_device *dev,
+			struct ieee80211_channel *chan,
+			const u8 *bssid, const u8 *prev_bssid,
+			const u8 *ssid, int ssid_len,
+			const u8 *ie, int ie_len, bool use_mfp,
+			struct cfg80211_crypto_settings *crypt)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
+				    ssid, ssid_len, ie, ie_len, use_mfp, crypt);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
+int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
+			   struct net_device *dev, const u8 *bssid,
+			   const u8 *ie, int ie_len, u16 reason)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_deauth_request req;
 	int i;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
 	req.ie = ie;
@@ -421,16 +511,32 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 	if (!req.bss)
 		return -ENOTCONN;
 
-	return rdev->ops->deauth(&rdev->wiphy, dev, &req);
+	return rdev->ops->deauth(&rdev->wiphy, dev, &req, wdev);
 }
 
-int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
-			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
+			 struct net_device *dev, const u8 *bssid,
+			 const u8 *ie, int ie_len, u16 reason)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
+static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
+				    struct net_device *dev, const u8 *bssid,
+				    const u8 *ie, int ie_len, u16 reason)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_disassoc_request req;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
 	req.ie = ie;
@@ -440,7 +546,21 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 	else
 		return -ENOTCONN;
 
-	return rdev->ops->disassoc(&rdev->wiphy, dev, &req);
+	return rdev->ops->disassoc(&rdev->wiphy, dev, &req, wdev);
+}
+
+int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
+			   struct net_device *dev, const u8 *bssid,
+			   const u8 *ie, int ie_len, u16 reason)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	wdev_lock(wdev);
+	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason);
+	wdev_unlock(wdev);
+
+	return err;
 }
 
 void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
@@ -450,6 +570,8 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 	struct cfg80211_deauth_request req;
 	int i;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (!rdev->ops->deauth)
 		return;
 
@@ -460,7 +582,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 
 	if (wdev->current_bss) {
 		req.bss = &wdev->current_bss->pub;
-		rdev->ops->deauth(&rdev->wiphy, dev, &req);
+		rdev->ops->deauth(&rdev->wiphy, dev, &req, wdev);
 		if (wdev->current_bss) {
 			cfg80211_unhold_bss(wdev->current_bss);
 			cfg80211_put_bss(&wdev->current_bss->pub);
@@ -471,7 +593,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 	for (i = 0; i < MAX_AUTH_BSSES; i++) {
 		if (wdev->auth_bsses[i]) {
 			req.bss = &wdev->auth_bsses[i]->pub;
-			rdev->ops->deauth(&rdev->wiphy, dev, &req);
+			rdev->ops->deauth(&rdev->wiphy, dev, &req, wdev);
 			if (wdev->auth_bsses[i]) {
 				cfg80211_unhold_bss(wdev->auth_bsses[i]);
 				cfg80211_put_bss(&wdev->auth_bsses[i]->pub);
@@ -480,7 +602,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 		}
 		if (wdev->authtry_bsses[i]) {
 			req.bss = &wdev->authtry_bsses[i]->pub;
-			rdev->ops->deauth(&rdev->wiphy, dev, &req);
+			rdev->ops->deauth(&rdev->wiphy, dev, &req, wdev);
 			if (wdev->authtry_bsses[i]) {
 				cfg80211_unhold_bss(wdev->authtry_bsses[i]);
 				cfg80211_put_bss(&wdev->authtry_bsses[i]->pub);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4976eac888a3..cf4ac786b20a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4029,6 +4029,8 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	struct nlattr *nest;
 	int i;
 
+	ASSERT_RDEV_LOCK(rdev);
+
 	if (WARN_ON(!req))
 		return 0;
 
@@ -4391,12 +4393,12 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 
 void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
-			       u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp)
+			       const u8 *ie, size_t ie_len, bool from_ap)
 {
 	struct sk_buff *msg;
 	void *hdr;
 
-	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!msg)
 		return;
 
@@ -4420,7 +4422,7 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
 	return;
 
  nla_put_failure:
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index cf3708b48c29..44cc2a76a1b0 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -42,7 +42,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 			 const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp);
 void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 			       struct net_device *netdev, u16 reason,
-			       u8 *ie, size_t ie_len, bool from_ap, gfp_t gfp);
+			       const u8 *ie, size_t ie_len, bool from_ap);
 
 void
 nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 1625faf1de57..4f552c3f29a3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -17,13 +17,21 @@
 
 #define IEEE80211_SCAN_RESULT_EXPIRE	(10 * HZ)
 
-void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+void __cfg80211_scan_done(struct work_struct *wk)
 {
+	struct cfg80211_registered_device *rdev;
+	struct cfg80211_scan_request *request;
 	struct net_device *dev;
 #ifdef CONFIG_WIRELESS_EXT
 	union iwreq_data wrqu;
 #endif
 
+	rdev = container_of(wk, struct cfg80211_registered_device,
+			    scan_done_wk);
+
+	mutex_lock(&rdev->mtx);
+	request = rdev->scan_req;
+
 	dev = dev_get_by_index(&init_net, request->ifidx);
 	if (!dev)
 		goto out;
@@ -35,7 +43,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 	 */
 	cfg80211_sme_scan_done(dev);
 
-	if (aborted)
+	if (request->aborted)
 		nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev);
 	else
 		nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev);
@@ -43,7 +51,7 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 	wiphy_to_dev(request->wiphy)->scan_req = NULL;
 
 #ifdef CONFIG_WIRELESS_EXT
-	if (!aborted) {
+	if (!request->aborted) {
 		memset(&wrqu, 0, sizeof(wrqu));
 
 		wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
@@ -53,8 +61,24 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 	dev_put(dev);
 
  out:
+	cfg80211_unlock_rdev(rdev);
 	kfree(request);
 }
+
+void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
+{
+	struct net_device *dev = dev_get_by_index(&init_net, request->ifidx);
+	if (WARN_ON(!dev)) {
+		kfree(request);
+		return;
+	}
+
+	WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
+
+	request->aborted = aborted;
+	schedule_work(&wiphy_to_dev(request->wiphy)->scan_done_wk);
+	dev_put(dev);
+}
 EXPORT_SYMBOL(cfg80211_scan_done);
 
 static void bss_release(struct kref *ref)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 066a19ef9d73..472e2412c781 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -38,6 +38,8 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 	int n_channels, err;
 
 	ASSERT_RTNL();
+	ASSERT_RDEV_LOCK(drv);
+	ASSERT_WDEV_LOCK(wdev);
 
 	if (drv->scan_req)
 		return -EBUSY;
@@ -106,6 +108,8 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 	struct cfg80211_connect_params *params;
 	int err;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (!wdev->conn)
 		return 0;
 
@@ -117,11 +121,11 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 	case CFG80211_CONN_AUTHENTICATE_NEXT:
 		BUG_ON(!drv->ops->auth);
 		wdev->conn->state = CFG80211_CONN_AUTHENTICATING;
-		return cfg80211_mlme_auth(drv, wdev->netdev,
-					  params->channel, params->auth_type,
-					  params->bssid,
-					  params->ssid, params->ssid_len,
-					  NULL, 0);
+		return __cfg80211_mlme_auth(drv, wdev->netdev,
+					    params->channel, params->auth_type,
+					    params->bssid,
+					    params->ssid, params->ssid_len,
+					    NULL, 0);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!drv->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -131,14 +135,16 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 		 * that some APs don't like that -- so we'd need to retry
 		 * the association.
 		 */
-		err = cfg80211_mlme_assoc(drv, wdev->netdev,
-					  params->channel, params->bssid, NULL,
-					  params->ssid, params->ssid_len,
-					  params->ie, params->ie_len,
-					  false, &params->crypto);
+		err = __cfg80211_mlme_assoc(drv, wdev->netdev,
+					    params->channel, params->bssid,
+					    NULL,
+					    params->ssid, params->ssid_len,
+					    params->ie, params->ie_len,
+					    false, &params->crypto);
 		if (err)
-			cfg80211_mlme_deauth(drv, wdev->netdev, params->bssid,
-					     NULL, 0, WLAN_REASON_DEAUTH_LEAVING);
+			__cfg80211_mlme_deauth(drv, wdev->netdev, params->bssid,
+					       NULL, 0,
+					       WLAN_REASON_DEAUTH_LEAVING);
 		return err;
 	default:
 		return 0;
@@ -152,22 +158,31 @@ void cfg80211_conn_work(struct work_struct *work)
 	struct wireless_dev *wdev;
 
 	rtnl_lock();
+	cfg80211_lock_rdev(drv);
 	mutex_lock(&drv->devlist_mtx);
 
 	list_for_each_entry(wdev, &drv->netdev_list, list) {
-		if (!netif_running(wdev->netdev))
+		wdev_lock(wdev);
+		if (!netif_running(wdev->netdev)) {
+			wdev_unlock(wdev);
 			continue;
-		if (wdev->sme_state != CFG80211_SME_CONNECTING)
+		}
+		if (wdev->sme_state != CFG80211_SME_CONNECTING) {
+			wdev_unlock(wdev);
 			continue;
+		}
 		if (cfg80211_conn_do_work(wdev))
-			cfg80211_connect_result(wdev->netdev,
-						wdev->conn->params.bssid,
-						NULL, 0, NULL, 0,
-						WLAN_STATUS_UNSPECIFIED_FAILURE,
-						GFP_ATOMIC);
+			__cfg80211_connect_result(
+					wdev->netdev,
+					wdev->conn->params.bssid,
+					NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					false);
+		wdev_unlock(wdev);
 	}
 
 	mutex_unlock(&drv->devlist_mtx);
+	cfg80211_unlock_rdev(drv);
 	rtnl_unlock();
 }
 
@@ -177,6 +192,8 @@ static bool cfg80211_get_conn_bss(struct wireless_dev *wdev)
 	struct cfg80211_bss *bss;
 	u16 capa = WLAN_CAPABILITY_ESS;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->conn->params.privacy)
 		capa |= WLAN_CAPABILITY_PRIVACY;
 
@@ -198,11 +215,13 @@ static bool cfg80211_get_conn_bss(struct wireless_dev *wdev)
 	return true;
 }
 
-void cfg80211_sme_scan_done(struct net_device *dev)
+static void __cfg80211_sme_scan_done(struct net_device *dev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *drv = wiphy_to_dev(wdev->wiphy);
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->sme_state != CFG80211_SME_CONNECTING)
 		return;
 
@@ -218,15 +237,26 @@ void cfg80211_sme_scan_done(struct net_device *dev)
 		if (wdev->conn->state == CFG80211_CONN_SCAN_AGAIN)
 			schedule_work(&drv->conn_work);
 		else
-			cfg80211_connect_result(dev, wdev->conn->params.bssid,
-						NULL, 0, NULL, 0,
-						WLAN_STATUS_UNSPECIFIED_FAILURE,
-						GFP_ATOMIC);
-		return;
+			__cfg80211_connect_result(
+					wdev->netdev,
+					wdev->conn->params.bssid,
+					NULL, 0, NULL, 0,
+					WLAN_STATUS_UNSPECIFIED_FAILURE,
+					false);
 	}
 }
 
-void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
+void cfg80211_sme_scan_done(struct net_device *dev)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	wdev_lock(wdev);
+	__cfg80211_sme_scan_done(dev);
+	wdev_unlock(wdev);
+}
+
+void cfg80211_sme_rx_auth(struct net_device *dev,
+			  const u8 *buf, size_t len)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct wiphy *wiphy = wdev->wiphy;
@@ -234,6 +264,8 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
 	u16 status_code = le16_to_cpu(mgmt->u.auth.status_code);
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	/* should only RX auth frames when connecting */
 	if (wdev->sme_state != CFG80211_SME_CONNECTING)
 		return;
@@ -273,10 +305,10 @@ void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
 	}
 }
 
-static void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
-				      const u8 *req_ie, size_t req_ie_len,
-				      const u8 *resp_ie, size_t resp_ie_len,
-				      u16 status, bool wextev, gfp_t gfp)
+void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
+			       const u8 *req_ie, size_t req_ie_len,
+			       const u8 *resp_ie, size_t resp_ie_len,
+			       u16 status, bool wextev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_bss *bss;
@@ -284,18 +316,20 @@ static void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	union iwreq_data wrqu;
 #endif
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return;
 
 	if (wdev->sme_state == CFG80211_SME_CONNECTED)
 		nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), dev,
 				    bssid, req_ie, req_ie_len,
-				    resp_ie, resp_ie_len, gfp);
+				    resp_ie, resp_ie_len, GFP_KERNEL);
 	else
 		nl80211_send_connect_result(wiphy_to_dev(wdev->wiphy), dev,
 					    bssid, req_ie, req_ie_len,
 					    resp_ie, resp_ie_len,
-					    status, gfp);
+					    status, GFP_KERNEL);
 
 #ifdef CONFIG_WIRELESS_EXT
 	if (wextev) {
@@ -362,21 +396,43 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			     const u8 *resp_ie, size_t resp_ie_len,
 			     u16 status, gfp_t gfp)
 {
-	bool wextev = status == WLAN_STATUS_SUCCESS;
-	__cfg80211_connect_result(dev, bssid, req_ie, req_ie_len, resp_ie, resp_ie_len, status, wextev, gfp);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_CONNECT_RESULT;
+	memcpy(ev->cr.bssid, bssid, ETH_ALEN);
+	ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
+	ev->cr.req_ie_len = req_ie_len;
+	memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
+	ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
+	ev->cr.resp_ie_len = resp_ie_len;
+	memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+	ev->cr.status = status;
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	schedule_work(&rdev->event_work);
 }
 EXPORT_SYMBOL(cfg80211_connect_result);
 
-void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
-		     const u8 *req_ie, size_t req_ie_len,
-		     const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
+void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
+		       const u8 *req_ie, size_t req_ie_len,
+		       const u8 *resp_ie, size_t resp_ie_len)
 {
-	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_bss *bss;
 #ifdef CONFIG_WIRELESS_EXT
 	union iwreq_data wrqu;
 #endif
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return;
 
@@ -402,31 +458,62 @@ void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
-	nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), dev, bssid,
-			    req_ie, req_ie_len, resp_ie, resp_ie_len, gfp);
+	nl80211_send_roamed(wiphy_to_dev(wdev->wiphy), wdev->netdev, bssid,
+			    req_ie, req_ie_len, resp_ie, resp_ie_len,
+			    GFP_KERNEL);
 
 #ifdef CONFIG_WIRELESS_EXT
 	if (req_ie) {
 		memset(&wrqu, 0, sizeof(wrqu));
 		wrqu.data.length = req_ie_len;
-		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, req_ie);
+		wireless_send_event(wdev->netdev, IWEVASSOCRESPIE,
+				    &wrqu, req_ie);
 	}
 
 	if (resp_ie) {
 		memset(&wrqu, 0, sizeof(wrqu));
 		wrqu.data.length = resp_ie_len;
-		wireless_send_event(dev, IWEVASSOCRESPIE, &wrqu, resp_ie);
+		wireless_send_event(wdev->netdev, IWEVASSOCRESPIE,
+				    &wrqu, resp_ie);
 	}
 
 	memset(&wrqu, 0, sizeof(wrqu));
 	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
-	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
+	wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
 #endif
 }
+
+void cfg80211_roamed(struct net_device *dev, const u8 *bssid,
+		     const u8 *req_ie, size_t req_ie_len,
+		     const u8 *resp_ie, size_t resp_ie_len, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	ev = kzalloc(sizeof(*ev) + req_ie_len + resp_ie_len, gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_ROAMED;
+	memcpy(ev->rm.bssid, bssid, ETH_ALEN);
+	ev->rm.req_ie = ((u8 *)ev) + sizeof(*ev);
+	ev->rm.req_ie_len = req_ie_len;
+	memcpy((void *)ev->rm.req_ie, req_ie, req_ie_len);
+	ev->rm.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
+	ev->rm.resp_ie_len = resp_ie_len;
+	memcpy((void *)ev->rm.resp_ie, resp_ie, resp_ie_len);
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	schedule_work(&rdev->event_work);
+}
 EXPORT_SYMBOL(cfg80211_roamed);
 
-void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
+void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -434,6 +521,8 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 	union iwreq_data wrqu;
 #endif
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return;
 
@@ -456,7 +545,7 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 	}
 
 	nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev,
-				  reason, ie, ie_len, from_ap, gfp);
+				  reason, ie, ie_len, from_ap);
 
 #ifdef CONFIG_WIRELESS_EXT
 	memset(&wrqu, 0, sizeof(wrqu));
@@ -468,16 +557,36 @@ void __cfg80211_disconnected(struct net_device *dev, gfp_t gfp, u8 *ie,
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
 			   u8 *ie, size_t ie_len, gfp_t gfp)
 {
-	__cfg80211_disconnected(dev, gfp, ie, ie_len, reason, true);
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct cfg80211_event *ev;
+	unsigned long flags;
+
+	ev = kzalloc(sizeof(*ev) + ie_len, gfp);
+	if (!ev)
+		return;
+
+	ev->type = EVENT_DISCONNECTED;
+	ev->dc.ie = ((u8 *)ev) + sizeof(*ev);
+	ev->dc.ie_len = ie_len;
+	memcpy((void *)ev->dc.ie, ie, ie_len);
+	ev->dc.reason = reason;
+
+	spin_lock_irqsave(&wdev->event_lock, flags);
+	list_add_tail(&ev->list, &wdev->event_list);
+	spin_unlock_irqrestore(&wdev->event_lock, flags);
+	schedule_work(&rdev->event_work);
 }
 EXPORT_SYMBOL(cfg80211_disconnected);
 
-int cfg80211_connect(struct cfg80211_registered_device *rdev,
-		     struct net_device *dev,
-		     struct cfg80211_connect_params *connect)
+int __cfg80211_connect(struct cfg80211_registered_device *rdev,
+		       struct net_device *dev,
+		       struct cfg80211_connect_params *connect)
 {
-	int err;
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	int err;
+
+	ASSERT_WDEV_LOCK(wdev);
 
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
@@ -572,12 +681,27 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 	}
 }
 
-int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
-			struct net_device *dev, u16 reason, bool wextev)
+int cfg80211_connect(struct cfg80211_registered_device *rdev,
+		     struct net_device *dev,
+		     struct cfg80211_connect_params *connect)
+{
+	int err;
+
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_connect(rdev, dev, connect);
+	wdev_unlock(dev->ieee80211_ptr);
+
+	return err;
+}
+
+int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
+			  struct net_device *dev, u16 reason, bool wextev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (wdev->sme_state == CFG80211_SME_IDLE)
 		return -EINVAL;
 
@@ -601,8 +725,9 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 		}
 
 		/* wdev->conn->params.bssid must be set if > SCANNING */
-		err = cfg80211_mlme_deauth(rdev, dev, wdev->conn->params.bssid,
-					   NULL, 0, reason);
+		err = __cfg80211_mlme_deauth(rdev, dev,
+					     wdev->conn->params.bssid,
+					     NULL, 0, reason);
 		if (err)
 			return err;
 	} else {
@@ -612,21 +737,36 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 	}
 
 	if (wdev->sme_state == CFG80211_SME_CONNECTED)
-		__cfg80211_disconnected(dev, GFP_KERNEL, NULL, 0, 0, false);
+		__cfg80211_disconnected(dev, NULL, 0, 0, false);
 	else if (wdev->sme_state == CFG80211_SME_CONNECTING)
 		__cfg80211_connect_result(dev, NULL, NULL, 0, NULL, 0,
 					  WLAN_STATUS_UNSPECIFIED_FAILURE,
-					  wextev, GFP_KERNEL);
+					  wextev);
 
 	return 0;
 }
 
+int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
+			struct net_device *dev,
+			u16 reason, bool wextev)
+{
+	int err;
+
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_disconnect(rdev, dev, reason, wextev);
+	wdev_unlock(dev->ieee80211_ptr);
+
+	return err;
+}
+
 void cfg80211_sme_disassoc(struct net_device *dev, int idx)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	u8 bssid[ETH_ALEN];
 
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (!wdev->conn)
 		return;
 
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index fe1987acb891..6f75aaa7f795 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -15,6 +15,9 @@ static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 {
 	int err;
 
+	ASSERT_RDEV_LOCK(rdev);
+	ASSERT_WDEV_LOCK(wdev);
+
 	if (!netif_running(wdev->netdev))
 		return 0;
 
@@ -24,8 +27,8 @@ static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 
 	err = 0;
 	if (wdev->wext.connect.ssid_len != 0)
-		err = cfg80211_connect(rdev, wdev->netdev,
-					&wdev->wext.connect);
+		err = __cfg80211_connect(rdev, wdev->netdev,
+					 &wdev->wext.connect);
 
 	return err;
 }
@@ -50,33 +53,43 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 	if (chan && (chan->flags & IEEE80211_CHAN_DISABLED))
 		return -EINVAL;
 
-	if (wdev->wext.connect.channel == chan)
-		return 0;
+	cfg80211_lock_rdev(rdev);
+	wdev_lock(wdev);
+
+	if (wdev->wext.connect.channel == chan) {
+		err = 0;
+		goto out;
+	}
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
 		bool event = true;
 		/* if SSID set, we'll try right again, avoid event */
 		if (wdev->wext.connect.ssid_len)
 			event = false;
-		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
-					  dev, WLAN_REASON_DEAUTH_LEAVING,
-					  event);
+		err = __cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					    dev, WLAN_REASON_DEAUTH_LEAVING,
+					    event);
 		if (err)
-			return err;
+			goto out;
 	}
 
+
 	wdev->wext.connect.channel = chan;
 
 	/* SSID is not set, we just want to switch channel */
 	if (wdev->wext.connect.ssid_len && chan) {
-		if (!rdev->ops->set_channel)
-			return -EOPNOTSUPP;
-
-		return rdev->ops->set_channel(wdev->wiphy, chan,
-					      NL80211_CHAN_NO_HT);
+		err = -EOPNOTSUPP;
+		if (rdev->ops->set_channel)
+			err = rdev->ops->set_channel(wdev->wiphy, chan,
+						     NL80211_CHAN_NO_HT);
+		goto out;
 	}
 
-	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+	err = cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+ out:
+	wdev_unlock(wdev);
+	cfg80211_unlock_rdev(rdev);
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwfreq);
@@ -92,10 +105,12 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
 	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
 		return -EINVAL;
 
+	wdev_lock(wdev);
 	if (wdev->current_bss)
 		chan = wdev->current_bss->pub.channel;
 	else if (wdev->wext.connect.channel)
 		chan = wdev->wext.connect.channel;
+	wdev_unlock(wdev);
 
 	if (chan) {
 		freq->m = chan->center_freq;
@@ -128,21 +143,26 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 	if (len > 0 && ssid[len - 1] == '\0')
 		len--;
 
+	cfg80211_lock_rdev(wiphy_to_dev(wdev->wiphy));
+	wdev_lock(wdev);
+
+	err = 0;
+
 	if (wdev->wext.connect.ssid && len &&
 	    len == wdev->wext.connect.ssid_len &&
 	    memcmp(wdev->wext.connect.ssid, ssid, len))
-		return 0;
+		goto out;
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
 		bool event = true;
 		/* if SSID set now, we'll try to connect, avoid event */
 		if (len)
 			event = false;
-		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
-					  dev, WLAN_REASON_DEAUTH_LEAVING,
-					  event);
+		err = __cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					    dev, WLAN_REASON_DEAUTH_LEAVING,
+					    event);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	wdev->wext.connect.ssid = wdev->wext.ssid;
@@ -151,7 +171,11 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 
 	wdev->wext.connect.crypto.control_port = false;
 
-	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+	err = cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+ out:
+	wdev_unlock(wdev);
+	cfg80211_unlock_rdev(wiphy_to_dev(wdev->wiphy));
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwessid);
@@ -168,6 +192,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
 
 	data->flags = 0;
 
+	wdev_lock(wdev);
 	if (wdev->ssid_len) {
 		data->flags = 1;
 		data->length = wdev->ssid_len;
@@ -178,6 +203,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
 		memcpy(ssid, wdev->wext.connect.ssid, data->length);
 	} else
 		data->flags = 0;
+	wdev_unlock(wdev);
 
 	return 0;
 }
@@ -203,21 +229,25 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 	if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid))
 		bssid = NULL;
 
+	cfg80211_lock_rdev(wiphy_to_dev(wdev->wiphy));
+	wdev_lock(wdev);
+
+	err = 0;
 	/* both automatic */
 	if (!bssid && !wdev->wext.connect.bssid)
-		return 0;
+		goto out;
 
 	/* fixed already - and no change */
 	if (wdev->wext.connect.bssid && bssid &&
 	    compare_ether_addr(bssid, wdev->wext.connect.bssid) == 0)
-		return 0;
+		goto out;
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
-		err = cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
-					  dev, WLAN_REASON_DEAUTH_LEAVING,
-					  false);
+		err = __cfg80211_disconnect(wiphy_to_dev(wdev->wiphy),
+					    dev, WLAN_REASON_DEAUTH_LEAVING,
+					    false);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	if (bssid) {
@@ -226,7 +256,11 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 	} else
 		wdev->wext.connect.bssid = NULL;
 
-	return cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+	err = cfg80211_mgd_wext_connect(wiphy_to_dev(wdev->wiphy), wdev);
+ out:
+	wdev_unlock(wdev);
+	cfg80211_unlock_rdev(wiphy_to_dev(wdev->wiphy));
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwap);
@@ -243,12 +277,14 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
 
 	ap_addr->sa_family = ARPHRD_ETHER;
 
+	wdev_lock(wdev);
 	if (wdev->current_bss)
 		memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN);
 	else if (wdev->wext.connect.bssid)
 		memcpy(ap_addr->sa_data, wdev->wext.connect.bssid, ETH_ALEN);
 	else
 		memset(ap_addr->sa_data, 0, ETH_ALEN);
+	wdev_unlock(wdev);
 
 	return 0;
 }
@@ -270,15 +306,20 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
 	if (!ie_len)
 		ie = NULL;
 
+	wdev_lock(wdev);
+
 	/* no change */
+	err = 0;
 	if (wdev->wext.ie_len == ie_len &&
 	    memcmp(wdev->wext.ie, ie, ie_len) == 0)
-		return 0;
+		goto out;
 
 	if (ie_len) {
 		ie = kmemdup(extra, ie_len, GFP_KERNEL);
-		if (!ie)
-			return -ENOMEM;
+		if (!ie) {
+			err = -ENOMEM;
+			goto out;
+		}
 	} else
 		ie = NULL;
 
@@ -287,14 +328,17 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
 	wdev->wext.ie_len = ie_len;
 
 	if (wdev->sme_state != CFG80211_SME_IDLE) {
-		err = cfg80211_disconnect(rdev, dev,
-					  WLAN_REASON_DEAUTH_LEAVING, false);
+		err = __cfg80211_disconnect(rdev, dev,
+					    WLAN_REASON_DEAUTH_LEAVING, false);
 		if (err)
-			return err;
+			goto out;
 	}
 
 	/* userspace better not think we'll reconnect */
-	return 0;
+	err = 0;
+ out:
+	wdev_unlock(wdev);
+	return err;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwgenie);
 
@@ -305,6 +349,7 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
 	struct cfg80211_registered_device *rdev;
+	int err;
 
 	if (!wdev)
 		return -EOPNOTSUPP;
@@ -317,13 +362,19 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 	if (mlme->addr.sa_family != ARPHRD_ETHER)
 		return -EINVAL;
 
+	wdev_lock(wdev);
 	switch (mlme->cmd) {
 	case IW_MLME_DEAUTH:
 	case IW_MLME_DISASSOC:
-		return cfg80211_disconnect(rdev, dev, mlme->reason_code,
-					   true);
+		err = __cfg80211_disconnect(rdev, dev, mlme->reason_code,
+					    true);
+		break;
 	default:
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		break;
 	}
+	wdev_unlock(wdev);
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwmlme);
-- 
cgit v1.2.3


From c99e6efe1ba04561e7d93a81f0be07e37427e835 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 10 Jul 2009 14:57:56 +0200
Subject: sched: INIT_PREEMPT_COUNT

Pull the initial preempt_count value into a single
definition site.

Maintainers for: alpha, ia64 and m68k, please have a look,
your arch code is funny.

The header magic is a bit odd, but similar to the KERNEL_DS
one, CPP waits with expanding these macros until the
INIT_THREAD_INFO macro itself is expanded, which is in
arch/*/kernel/init_task.c where we've already included
sched.h so we're good.

Cc: tony.luck@intel.com
Cc: rth@twiddle.net
Cc: geert@linux-m68k.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/thread_info.h      | 1 +
 arch/arm/include/asm/thread_info.h        | 2 +-
 arch/avr32/include/asm/thread_info.h      | 2 +-
 arch/blackfin/include/asm/thread_info.h   | 2 +-
 arch/cris/include/asm/thread_info.h       | 4 +---
 arch/frv/include/asm/thread_info.h        | 4 +---
 arch/h8300/include/asm/thread_info.h      | 2 +-
 arch/ia64/include/asm/thread_info.h       | 2 +-
 arch/m32r/include/asm/thread_info.h       | 4 +---
 arch/m68k/include/asm/thread_info_mm.h    | 1 +
 arch/m68k/include/asm/thread_info_no.h    | 1 +
 arch/microblaze/include/asm/thread_info.h | 4 +---
 arch/mips/include/asm/thread_info.h       | 4 +---
 arch/mn10300/include/asm/thread_info.h    | 4 +---
 arch/parisc/include/asm/thread_info.h     | 2 +-
 arch/powerpc/include/asm/thread_info.h    | 4 +---
 arch/s390/include/asm/thread_info.h       | 2 +-
 arch/sh/include/asm/thread_info.h         | 2 +-
 arch/sparc/include/asm/thread_info_32.h   | 4 +---
 arch/sparc/include/asm/thread_info_64.h   | 4 +---
 arch/um/include/asm/thread_info.h         | 2 +-
 arch/x86/include/asm/thread_info.h        | 2 +-
 arch/xtensa/include/asm/thread_info.h     | 4 +---
 include/linux/sched.h                     | 6 ++++++
 24 files changed, 29 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index d069526bd767..60c83abfde70 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -37,6 +37,7 @@ struct thread_info {
 	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
 	.addr_limit	= KERNEL_DS,		\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 4f8848260ee2..73394e50cbca 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -73,7 +73,7 @@ struct thread_info {
 	.task		= &tsk,						\
 	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
-	.preempt_count	= 1,						\
+	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.addr_limit	= KERNEL_DS,					\
 	.cpu_domain	= domain_val(DOMAIN_USER, DOMAIN_MANAGER) |	\
 			  domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) |	\
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 4442f8d2d423..fc42de5ca209 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -40,7 +40,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,				\
 	.flags		= 0,						\
 	.cpu		= 0,						\
-	.preempt_count	= 1,						\
+	.preempt_count	= INIT_PREEMPT_COUNT,				\
 	.restart_block	= {						\
 		.fn	= do_no_restart_syscall				\
 	}								\
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 2920087516f2..2bbfdd950afc 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -77,7 +77,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count  = 1,                    \
+	.preempt_count  = INIT_PREEMPT_COUNT,   \
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index bc5b2935ca53..c3aade36c330 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -50,8 +50,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
 #define INIT_THREAD_INFO(tsk)				\
@@ -60,7 +58,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,		\
 	.flags		= 0,				\
 	.cpu		= 0,				\
-	.preempt_count	= 1,				\
+	.preempt_count	= INIT_PREEMPT_COUNT,		\
 	.addr_limit	= KERNEL_DS,			\
 	.restart_block = {				\
 		       .fn = do_no_restart_syscall,	\
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index e8a5ed7be021..e608e056bb53 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -56,8 +56,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
 
@@ -67,7 +65,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index 700014d2155f..8bbc8b0ee45d 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -36,7 +36,7 @@ struct thread_info {
 	.exec_domain =	&default_exec_domain,	\
 	.flags =	0,			\
 	.cpu =		0,			\
-	.preempt_count = 1,			\
+	.preempt_count = INIT_PREEMPT_COUNT,	\
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index ae6922626bf4..8ce2e388e37c 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -48,7 +48,7 @@ struct thread_info {
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.addr_limit	= KERNEL_DS,		\
-	.preempt_count	= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 8589d462df27..07bb5bd00e2a 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -57,8 +57,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
 
@@ -68,7 +66,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/m68k/include/asm/thread_info_mm.h b/arch/m68k/include/asm/thread_info_mm.h
index af0fda46e94b..6ea5c33b3c56 100644
--- a/arch/m68k/include/asm/thread_info_mm.h
+++ b/arch/m68k/include/asm/thread_info_mm.h
@@ -19,6 +19,7 @@ struct thread_info {
 {						\
 	.task		= &tsk,			\
 	.exec_domain	= &default_exec_domain,	\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/m68k/include/asm/thread_info_no.h b/arch/m68k/include/asm/thread_info_no.h
index 82529f424ea3..c2bde5e24b0b 100644
--- a/arch/m68k/include/asm/thread_info_no.h
+++ b/arch/m68k/include/asm/thread_info_no.h
@@ -49,6 +49,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 7fac44498445..6e92885d381a 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -75,8 +75,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #define INIT_THREAD_INFO(tsk)			\
 {						\
@@ -84,7 +82,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 143a48136a4b..f9df720d2e40 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -39,8 +39,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #define INIT_THREAD_INFO(tsk)			\
 {						\
@@ -48,7 +46,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= _TIF_FIXADE,		\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/mn10300/include/asm/thread_info.h b/arch/mn10300/include/asm/thread_info.h
index 78a3881f3c12..58d64f8b2cc3 100644
--- a/arch/mn10300/include/asm/thread_info.h
+++ b/arch/mn10300/include/asm/thread_info.h
@@ -65,8 +65,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
 
@@ -76,7 +74,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index 0407959da489..4ce0edfbe969 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -23,7 +23,7 @@ struct thread_info {
 	.flags		= 0,			\
 	.cpu		= 0,			\
 	.addr_limit	= KERNEL_DS,		\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
   	.restart_block	= {			\
 		.fn = do_no_restart_syscall	\
 	}					\
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 9aba5a38a7c4..c8b329255678 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -46,15 +46,13 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task =		&tsk,			\
 	.exec_domain =	&default_exec_domain,	\
 	.cpu =		0,			\
-	.preempt_count = 1,			\
+	.preempt_count = INIT_PREEMPT_COUNT,	\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 925bcc649035..ba1cab9fc1f9 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -61,7 +61,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
 	},					\
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index f09ac4806294..d570ac2e5cb9 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -51,7 +51,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block	= {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 0f7b0e5fb1c7..844d73a0340c 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -54,8 +54,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #define INIT_THREAD_INFO(tsk)				\
 {							\
@@ -64,7 +62,7 @@ struct thread_info {
 	.exec_domain	=	&default_exec_domain,	\
 	.flags		=	0,			\
 	.cpu		=	0,			\
-	.preempt_count	=	1,			\
+	.preempt_count	=	INIT_PREEMPT_COUNT,	\
 	.restart_block	= {				\
 		.fn	=	do_no_restart_syscall,	\
 	},						\
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 65865726b283..1b45a7bbe407 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -125,8 +125,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 #ifndef __ASSEMBLY__
 
@@ -135,7 +133,7 @@ struct thread_info {
 	.task		=	&tsk,			\
 	.flags		= ((unsigned long)ASI_P) << TI_FLAG_CURRENT_DS_SHIFT,	\
 	.exec_domain	=	&default_exec_domain,	\
-	.preempt_count	=	1,			\
+	.preempt_count	=	INIT_PREEMPT_COUNT,	\
 	.restart_block	= {				\
 		.fn	=	do_no_restart_syscall,	\
 	},						\
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index 62274ab9471f..fd911f855367 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -32,7 +32,7 @@ struct thread_info {
 	.exec_domain =	&default_exec_domain,	\
 	.flags =		0,		\
 	.cpu =		0,			\
-	.preempt_count =	1,		\
+	.preempt_count = INIT_PREEMPT_COUNT,	\
 	.addr_limit =	KERNEL_DS,		\
 	.restart_block =  {			\
 		.fn =  do_no_restart_syscall,	\
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b0783520988b..fad7d40b75f8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -49,7 +49,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 0f4fe1faf9ba..13165641cc51 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -80,8 +80,6 @@ struct thread_info {
 
 /*
  * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
  */
 
 #ifndef __ASSEMBLY__
@@ -92,7 +90,7 @@ struct thread_info {
 	.exec_domain	= &default_exec_domain,	\
 	.flags		= 0,			\
 	.cpu		= 0,			\
-	.preempt_count	= 1,			\
+	.preempt_count	= INIT_PREEMPT_COUNT,	\
 	.addr_limit	= KERNEL_DS,		\
 	.restart_block = {			\
 		.fn = do_no_restart_syscall,	\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0085d758d645..2a99f1c15cf8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -498,6 +498,12 @@ struct task_cputime {
 		.sum_exec_runtime = 0,				\
 	}
 
+/*
+ * Disable preemption until the scheduler is running.
+ * Reset by start_kernel()->sched_init()->init_idle().
+ */
+#define INIT_PREEMPT_COUNT	(1)
+
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime:		thread group interval timers.
-- 
cgit v1.2.3


From d86ee4809d0329d4aa0d0f2c76c2295a16862799 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 10 Jul 2009 14:57:57 +0200
Subject: sched: optimize cond_resched()

Optimize cond_resched() by removing one conditional.

Currently cond_resched() checks system_state ==
SYSTEM_RUNNING in order to avoid scheduling before the
scheduler is running.

We can however, as per suggestion of Matt, use
PREEMPT_ACTIVE to accomplish that very same.

Suggested-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  5 ++++-
 kernel/sched.c        | 14 +++++++++-----
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a99f1c15cf8..16a982e389fb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -501,8 +501,11 @@ struct task_cputime {
 /*
  * Disable preemption until the scheduler is running.
  * Reset by start_kernel()->sched_init()->init_idle().
+ *
+ * We include PREEMPT_ACTIVE to avoid cond_resched() from working
+ * before the scheduler is active -- see should_resched().
  */
-#define INIT_PREEMPT_COUNT	(1)
+#define INIT_PREEMPT_COUNT	(1 + PREEMPT_ACTIVE)
 
 /**
  * struct thread_group_cputimer - thread group interval timer counts
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..01f55ada3598 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
 	return 0;
 }
 
+static inline int should_resched(void)
+{
+	return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
+}
+
 static void __cond_resched(void)
 {
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
 
 int __sched _cond_resched(void)
 {
-	if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) &&
-					system_state == SYSTEM_RUNNING) {
+	if (should_resched()) {
 		__cond_resched();
 		return 1;
 	}
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
  */
 int cond_resched_lock(spinlock_t *lock)
 {
-	int resched = need_resched() && system_state == SYSTEM_RUNNING;
+	int resched = should_resched();
 	int ret = 0;
 
 	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
-		if (resched && need_resched())
+		if (resched)
 			__cond_resched();
 		else
 			cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched() && system_state == SYSTEM_RUNNING) {
+	if (should_resched()) {
 		local_bh_enable();
 		__cond_resched();
 		local_bh_disable();
-- 
cgit v1.2.3


From 5a421ce3c062a87db0a9e7f2a0a7ee0a5b869aab Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 10 Jul 2009 12:37:40 +0300
Subject: nfsd41: gather and report statistics also for v4.1 ops

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfs4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bd2eba530667..aff924a24abb 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -234,7 +234,7 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
-#define LAST_NFS4_OP 	OP_RELEASE_LOCKOWNER
+#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v1.2.3


From 24a15a62dcb1968bf4ffdae55c88fa934d971180 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 9 Jul 2009 13:36:22 +0100
Subject: tty: Fix USB kref leak

The sysrq code acquired a kref leak. Fix it by passing the tty separately
from the caller (thus effectively using the callers kref which all the
callers hold anyway)

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/serial/ftdi_sio.c | 2 +-
 drivers/usb/serial/generic.c  | 7 ++++---
 drivers/usb/serial/pl2303.c   | 2 +-
 include/linux/usb/serial.h    | 3 ++-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 3dc3768ca71c..5f08702f672f 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2121,7 +2121,7 @@ static void ftdi_process_read(struct work_struct *work)
 				/* Note that the error flag is duplicated for
 				   every character received since we don't know
 				   which character it applied to */
-				if (!usb_serial_handle_sysrq_char(port,
+				if (!usb_serial_handle_sysrq_char(tty, port,
 						data[packet_offset + i]))
 					tty_insert_flip_char(tty,
 						data[packet_offset + i],
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 3d8dc5671bea..ce57f6a32bdf 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -432,7 +432,7 @@ static void flush_and_resubmit_read_urb(struct usb_serial_port *port)
 	else {
 		/* Push data to tty */
 		for (i = 0; i < urb->actual_length; i++, ch++) {
-			if (!usb_serial_handle_sysrq_char(port, *ch))
+			if (!usb_serial_handle_sysrq_char(tty, port, *ch))
 				tty_insert_flip_char(tty, *ch, TTY_NORMAL);
 		}
 	}
@@ -534,11 +534,12 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 	}
 }
 
-int usb_serial_handle_sysrq_char(struct usb_serial_port *port, unsigned int ch)
+int usb_serial_handle_sysrq_char(struct tty_struct *tty,
+			struct usb_serial_port *port, unsigned int ch)
 {
 	if (port->sysrq && port->console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, tty_port_tty_get(&port->port));
+			handle_sysrq(ch, tty);
 			port->sysrq = 0;
 			return 1;
 		}
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index ec6c132a25b5..8835802067f7 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -1038,7 +1038,7 @@ static void pl2303_read_bulk_callback(struct urb *urb)
 		if (line_status & UART_OVERRUN_ERROR)
 			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 		for (i = 0; i < urb->actual_length; ++i)
-			if (!usb_serial_handle_sysrq_char(port, data[i]))
+			if (!usb_serial_handle_sysrq_char(tty, port, data[i]))
 				tty_insert_flip_char(tty, data[i], tty_flag);
 		tty_flip_buffer_push(tty);
 	}
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 44801d26a37a..0ec50ba62139 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -317,7 +317,8 @@ extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
 extern void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
 						 gfp_t mem_flags);
-extern int usb_serial_handle_sysrq_char(struct usb_serial_port *port,
+extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
+					struct usb_serial_port *port,
 					unsigned int ch);
 extern int usb_serial_handle_break(struct usb_serial_port *port);
 
-- 
cgit v1.2.3


From 99d27e1c59e34869605de625b033c52163f5bfa7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Jul 2009 20:30:57 +0800
Subject: crypto: shash - Export/import hash state only

This patch replaces the full descriptor export with an export of
the partial hash state.  This allows the use of a consistent export
format across all implementations of a given algorithm.

This is useful because a number of cases require the use of the
partial hash state, e.g., PadLock can use the SHA1 hash state
to get around the fact that it can only hash contiguous data
chunks.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c        | 25 ++++++++++++++-----------
 include/crypto/hash.h | 18 ++++++++++++++----
 2 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 23e05a1e9038..14a3b707a31f 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -172,19 +172,15 @@ int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_digest);
 
-int crypto_shash_import(struct shash_desc *desc, const u8 *in)
+static int shash_no_export(struct shash_desc *desc, void *out)
 {
-	struct crypto_shash *tfm = desc->tfm;
-	struct shash_alg *alg = crypto_shash_alg(tfm);
-
-	memcpy(shash_desc_ctx(desc), in, crypto_shash_descsize(tfm));
-
-	if (alg->reinit)
-		return alg->reinit(desc);
+	return -ENOSYS;
+}
 
-	return 0;
+static int shash_no_import(struct shash_desc *desc, const void *in)
+{
+	return -ENOSYS;
 }
-EXPORT_SYMBOL_GPL(crypto_shash_import);
 
 static int shash_async_setkey(struct crypto_ahash *tfm, const u8 *key,
 			      unsigned int keylen)
@@ -484,12 +480,19 @@ static int shash_prepare_alg(struct shash_alg *alg)
 	struct crypto_alg *base = &alg->base;
 
 	if (alg->digestsize > PAGE_SIZE / 8 ||
-	    alg->descsize > PAGE_SIZE / 8)
+	    alg->descsize > PAGE_SIZE / 8 ||
+	    alg->statesize > PAGE_SIZE / 8)
 		return -EINVAL;
 
 	base->cra_type = &crypto_shash_type;
 	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
 	base->cra_flags |= CRYPTO_ALG_TYPE_SHASH;
+
+	if (!alg->import)
+		alg->import = shash_no_import;
+	if (!alg->export)
+		alg->export = shash_no_export;
+
 	return 0;
 }
 
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index d56bb71617c3..3c4cce6a425c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -24,7 +24,6 @@ struct shash_desc {
 
 struct shash_alg {
 	int (*init)(struct shash_desc *desc);
-	int (*reinit)(struct shash_desc *desc);
 	int (*update)(struct shash_desc *desc, const u8 *data,
 		      unsigned int len);
 	int (*final)(struct shash_desc *desc, u8 *out);
@@ -32,11 +31,14 @@ struct shash_alg {
 		     unsigned int len, u8 *out);
 	int (*digest)(struct shash_desc *desc, const u8 *data,
 		      unsigned int len, u8 *out);
+	int (*export)(struct shash_desc *desc, void *out);
+	int (*import)(struct shash_desc *desc, const void *in);
 	int (*setkey)(struct crypto_shash *tfm, const u8 *key,
 		      unsigned int keylen);
 
 	unsigned int descsize;
 	unsigned int digestsize;
+	unsigned int statesize;
 
 	struct crypto_alg base;
 };
@@ -251,6 +253,11 @@ static inline unsigned int crypto_shash_digestsize(struct crypto_shash *tfm)
 	return crypto_shash_alg(tfm)->digestsize;
 }
 
+static inline unsigned int crypto_shash_statesize(struct crypto_shash *tfm)
+{
+	return crypto_shash_alg(tfm)->statesize;
+}
+
 static inline u32 crypto_shash_get_flags(struct crypto_shash *tfm)
 {
 	return crypto_tfm_get_flags(crypto_shash_tfm(tfm));
@@ -281,12 +288,15 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 int crypto_shash_digest(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *out);
 
-static inline void crypto_shash_export(struct shash_desc *desc, u8 *out)
+static inline int crypto_shash_export(struct shash_desc *desc, void *out)
 {
-	memcpy(out, shash_desc_ctx(desc), crypto_shash_descsize(desc->tfm));
+	return crypto_shash_alg(desc->tfm)->export(desc, out);
 }
 
-int crypto_shash_import(struct shash_desc *desc, const u8 *in);
+static inline int crypto_shash_import(struct shash_desc *desc, const void *in)
+{
+	return crypto_shash_alg(desc->tfm)->import(desc, in);
+}
 
 static inline int crypto_shash_init(struct shash_desc *desc)
 {
-- 
cgit v1.2.3


From e2a7ce4e185a94462698cc0e5192495ee3d22a2f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Jul 2009 21:27:13 +0800
Subject: crypto: sha1_generic - Add export/import support

This patch adds export/import support to sha1_generic.  The exported
type is defined by struct sha1_state, which is basically the entire
descriptor state of sha1_generic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha1_generic.c | 41 +++++++++++++++++++++++++----------------
 include/crypto/sha.h  |  8 ++++++++
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 9efef20454cb..0416091bf45a 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -25,31 +25,21 @@
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
 
-struct sha1_ctx {
-        u64 count;
-        u32 state[5];
-        u8 buffer[64];
-};
-
 static int sha1_init(struct shash_desc *desc)
 {
-	struct sha1_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 
-	static const struct sha1_ctx initstate = {
-	  0,
-	  { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-	  { 0, }
+	*sctx = (struct sha1_state){
+		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
 	};
 
-	*sctx = initstate;
-
 	return 0;
 }
 
 static int sha1_update(struct shash_desc *desc, const u8 *data,
 			unsigned int len)
 {
-	struct sha1_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial, done;
 	const u8 *src;
 
@@ -85,7 +75,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
 /* Add padding and return the message digest. */
 static int sha1_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha1_ctx *sctx = shash_desc_ctx(desc);
+	struct sha1_state *sctx = shash_desc_ctx(desc);
 	__be32 *dst = (__be32 *)out;
 	u32 i, index, padlen;
 	__be64 bits;
@@ -111,12 +101,31 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
 	return 0;
 }
 
+static int sha1_export(struct shash_desc *desc, void *out)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int sha1_import(struct shash_desc *desc, const void *in)
+{
+	struct sha1_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
 static struct shash_alg alg = {
 	.digestsize	=	SHA1_DIGEST_SIZE,
 	.init		=	sha1_init,
 	.update		=	sha1_update,
 	.final		=	sha1_final,
-	.descsize	=	sizeof(struct sha1_ctx),
+	.export		=	sha1_export,
+	.import		=	sha1_import,
+	.descsize	=	sizeof(struct sha1_state),
+	.statesize	=	sizeof(struct sha1_state),
 	.base		=	{
 		.cra_name	=	"sha1",
 		.cra_driver_name=	"sha1-generic",
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index c0ccc2b1a2d8..922a248bd04d 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -5,6 +5,8 @@
 #ifndef _CRYPTO_SHA_H
 #define _CRYPTO_SHA_H
 
+#include <linux/types.h>
+
 #define SHA1_DIGEST_SIZE        20
 #define SHA1_BLOCK_SIZE         64
 
@@ -62,4 +64,10 @@
 #define SHA512_H6	0x1f83d9abfb41bd6bULL
 #define SHA512_H7	0x5be0cd19137e2179ULL
 
+struct sha1_state {
+	u64 count;
+	u32 state[SHA1_DIGEST_SIZE / 4];
+	u8 buffer[SHA1_BLOCK_SIZE];
+};
+
 #endif
-- 
cgit v1.2.3


From 9b2fda7b94a769af13c24582739e50664b0e27a8 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 10 Jul 2009 13:00:27 +0800
Subject: crypto: sha256_generic - Add export/import support

This patch adds export/import support to sha256_generic.  The exported
type is defined by struct sha256_state, which is basically the entire
descriptor state of sha256_generic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha256_generic.c | 37 +++++++++++++++++++++++++------------
 include/crypto/sha.h    |  6 ++++++
 2 files changed, 31 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index e58c71bdf333..c48459ebf05b 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -25,12 +25,6 @@
 #include <crypto/sha.h>
 #include <asm/byteorder.h>
 
-struct sha256_ctx {
-	u64 count;
-	u32 state[8];
-	u8 buf[128];
-};
-
 static inline u32 Ch(u32 x, u32 y, u32 z)
 {
 	return z ^ (x & (y ^ z));
@@ -222,7 +216,7 @@ static void sha256_transform(u32 *state, const u8 *input)
 
 static int sha224_init(struct shash_desc *desc)
 {
-	struct sha256_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *sctx = shash_desc_ctx(desc);
 	sctx->state[0] = SHA224_H0;
 	sctx->state[1] = SHA224_H1;
 	sctx->state[2] = SHA224_H2;
@@ -238,7 +232,7 @@ static int sha224_init(struct shash_desc *desc)
 
 static int sha256_init(struct shash_desc *desc)
 {
-	struct sha256_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *sctx = shash_desc_ctx(desc);
 	sctx->state[0] = SHA256_H0;
 	sctx->state[1] = SHA256_H1;
 	sctx->state[2] = SHA256_H2;
@@ -255,7 +249,7 @@ static int sha256_init(struct shash_desc *desc)
 static int sha256_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
-	struct sha256_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *sctx = shash_desc_ctx(desc);
 	unsigned int partial, done;
 	const u8 *src;
 
@@ -286,7 +280,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
 
 static int sha256_final(struct shash_desc *desc, u8 *out)
 {
-	struct sha256_ctx *sctx = shash_desc_ctx(desc);
+	struct sha256_state *sctx = shash_desc_ctx(desc);
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
 	unsigned int index, pad_len;
@@ -326,12 +320,31 @@ static int sha224_final(struct shash_desc *desc, u8 *hash)
 	return 0;
 }
 
+static int sha256_export(struct shash_desc *desc, void *out)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(out, sctx, sizeof(*sctx));
+	return 0;
+}
+
+static int sha256_import(struct shash_desc *desc, const void *in)
+{
+	struct sha256_state *sctx = shash_desc_ctx(desc);
+
+	memcpy(sctx, in, sizeof(*sctx));
+	return 0;
+}
+
 static struct shash_alg sha256 = {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_init,
 	.update		=	sha256_update,
 	.final		=	sha256_final,
-	.descsize	=	sizeof(struct sha256_ctx),
+	.export		=	sha256_export,
+	.import		=	sha256_import,
+	.descsize	=	sizeof(struct sha256_state),
+	.statesize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha256",
 		.cra_driver_name=	"sha256-generic",
@@ -346,7 +359,7 @@ static struct shash_alg sha224 = {
 	.init		=	sha224_init,
 	.update		=	sha256_update,
 	.final		=	sha224_final,
-	.descsize	=	sizeof(struct sha256_ctx),
+	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha224",
 		.cra_driver_name=	"sha224-generic",
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 922a248bd04d..88ef5eb9514d 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -70,4 +70,10 @@ struct sha1_state {
 	u8 buffer[SHA1_BLOCK_SIZE];
 };
 
+struct sha256_state {
+	u64 count;
+	u32 state[SHA256_DIGEST_SIZE / 4];
+	u8 buf[SHA256_BLOCK_SIZE];
+};
+
 #endif
-- 
cgit v1.2.3


From 373c0a7ed3ea3b34efedb7c83ffb521adff7c894 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 11 Jul 2009 10:06:54 -0400
Subject: Fix compile error due to congestion_wait() changes

Move the definition of BLK_RW_ASYNC/BLK_RW_SYNC into linux/backing-dev.h
so that it is available to all callers of set/clear_bdi_congested().

This replaces commit 097041e576ee3a50d92dd643ee8ca65bf6a62e21 ("fuse:
Fix build error"), which will be reverted.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/backing-dev.h | 5 +++++
 include/linux/blkdev.h      | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 3a52a63c1351..1d52425a6118 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -229,6 +229,11 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 				  (1 << BDI_async_congested));
 }
 
+enum {
+	BLK_RW_ASYNC	= 0,
+	BLK_RW_SYNC	= 1,
+};
+
 void clear_bdi_congested(struct backing_dev_info *bdi, int sync);
 void set_bdi_congested(struct backing_dev_info *bdi, int sync);
 long congestion_wait(int sync, long timeout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0146e0fecf1a..e7cb5dbf6c26 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -70,11 +70,6 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_ATA_PC,
 };
 
-enum {
-	BLK_RW_ASYNC	= 0,
-	BLK_RW_SYNC	= 1,
-};
-
 /*
  * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
  * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
-- 
cgit v1.2.3


From aef73cfcb913eae3d0deeb60eb385f75039db40b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 11 Jul 2009 22:22:14 +0800
Subject: crypto: async - Use kzfree for requests

This patch changes the kfree call to kzfree for async requests.
As the request may contain sensitive data it needs to be zeroed
before it can be reallocated by others.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/hash.h  | 2 +-
 include/linux/crypto.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 3c4cce6a425c..f74214a4b01b 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -186,7 +186,7 @@ static inline struct ahash_request *ahash_request_alloc(
 
 static inline void ahash_request_free(struct ahash_request *req)
 {
-	kfree(req);
+	kzfree(req);
 }
 
 static inline struct ahash_request *ahash_request_cast(
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index ec29fa268b94..274f9c7da90c 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -770,7 +770,7 @@ static inline struct ablkcipher_request *ablkcipher_request_alloc(
 
 static inline void ablkcipher_request_free(struct ablkcipher_request *req)
 {
-	kfree(req);
+	kzfree(req);
 }
 
 static inline void ablkcipher_request_set_callback(
@@ -901,7 +901,7 @@ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
 
 static inline void aead_request_free(struct aead_request *req)
 {
-	kfree(req);
+	kzfree(req);
 }
 
 static inline void aead_request_set_callback(struct aead_request *req,
-- 
cgit v1.2.3


From f9fabcb58a6d26d6efde842d1703ac7cfa9427b6 Mon Sep 17 00:00:00 2001
From: Julien Tinnes <jt@cr0.org>
Date: Fri, 26 Jun 2009 20:27:40 +0200
Subject: personality: fix PER_CLEAR_ON_SETID

We have found that the current PER_CLEAR_ON_SETID mask on Linux doesn't
include neither ADDR_COMPAT_LAYOUT, nor MMAP_PAGE_ZERO.

The current mask is READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE.

We believe it is important to add MMAP_PAGE_ZERO, because by using this
personality it is possible to have the first page mapped inside a
process running as setuid root.  This could be used in those scenarios:

 - Exploiting a NULL pointer dereference issue in a setuid root binary
 - Bypassing the mmap_min_addr restrictions of the Linux kernel: by
   running a setuid binary that would drop privileges before giving us
   control back (for instance by loading a user-supplied library), we
   could get the first page mapped in a process we control.  By further
   using mremap and mprotect on this mapping, we can then completely
   bypass the mmap_min_addr restrictions.

Less importantly, we believe ADDR_COMPAT_LAYOUT should also be added
since on x86 32bits it will in practice disable most of the address
space layout randomization (only the stack will remain randomized).

Signed-off-by: Julien Tinnes <jt@cr0.org>
Signed-off-by: Tavis Ormandy <taviso@sdf.lonestar.org>
Cc: stable@kernel.org
Acked-by: Christoph Hellwig <hch@infradead.org>
Acked-by: Kees Cook <kees@ubuntu.com>
Acked-by: Eugene Teo <eugene@redhat.com>
[ Shortened lines and fixed whitespace as per Christophs' suggestion ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/personality.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/personality.h b/include/linux/personality.h
index a84e9ff9b27e..126120819a0d 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -40,7 +40,10 @@ enum {
  * Security-relevant compatibility flags that must be
  * cleared upon setuid or setgid exec:
  */
-#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE)
+#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC  | \
+			    ADDR_NO_RANDOMIZE  | \
+			    ADDR_COMPAT_LAYOUT | \
+			    MMAP_PAGE_ZERO)
 
 /*
  * Personality types.
-- 
cgit v1.2.3


From 405f55712dfe464b3240d7816cc4fe4174831be2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 11 Jul 2009 22:08:37 +0400
Subject: headers: smp_lock.h redux

* Remove smp_lock.h from files which don't need it (including some headers!)
* Add smp_lock.h to files which do need it
* Make smp_lock.h include conditional in hardirq.h
  It's needed only for one kernel_locked() usage which is under CONFIG_PREEMPT

  This will make hardirq.h inclusion cheaper for every PREEMPT=n config
  (which includes allmodconfig/allyesconfig, BTW)

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/ptrace.c                      | 1 -
 arch/blackfin/kernel/ptrace.c                   | 1 -
 arch/blackfin/kernel/sys_bfin.c                 | 1 -
 arch/cris/kernel/sys_cris.c                     | 1 -
 arch/ia64/kernel/ptrace.c                       | 1 -
 arch/m32r/kernel/ptrace.c                       | 1 -
 arch/microblaze/kernel/ptrace.c                 | 1 -
 arch/microblaze/kernel/signal.c                 | 1 -
 arch/microblaze/kernel/sys_microblaze.c         | 1 -
 arch/mips/kernel/ptrace32.c                     | 1 -
 arch/mips/mm/hugetlbpage.c                      | 1 -
 arch/mn10300/kernel/ptrace.c                    | 1 -
 arch/mn10300/kernel/signal.c                    | 1 -
 arch/mn10300/kernel/sys_mn10300.c               | 1 -
 arch/mn10300/kernel/traps.c                     | 1 -
 arch/mn10300/mm/fault.c                         | 1 -
 arch/mn10300/mm/misalignment.c                  | 1 -
 arch/powerpc/kernel/ptrace32.c                  | 1 -
 arch/s390/kernel/dis.c                          | 1 -
 arch/s390/kernel/ptrace.c                       | 1 -
 arch/s390/mm/fault.c                            | 1 -
 arch/sh/mm/tlb-sh3.c                            | 1 -
 arch/sparc/kernel/ptrace_32.c                   | 1 -
 arch/sparc/kernel/ptrace_64.c                   | 1 -
 arch/sparc/kernel/time_64.c                     | 1 -
 arch/sparc/kernel/traps_32.c                    | 1 -
 drivers/block/DAC960.c                          | 1 +
 drivers/block/cciss.c                           | 1 +
 drivers/block/loop.c                            | 1 -
 drivers/bluetooth/hci_vhci.c                    | 1 -
 drivers/char/amiserial.c                        | 1 +
 drivers/char/cyclades.c                         | 1 +
 drivers/char/epca.c                             | 1 +
 drivers/char/isicom.c                           | 1 +
 drivers/char/istallion.c                        | 1 +
 drivers/char/moxa.c                             | 1 +
 drivers/char/mxser.c                            | 1 +
 drivers/char/n_hdlc.c                           | 1 +
 drivers/char/n_r3964.c                          | 1 +
 drivers/char/pty.c                              | 1 +
 drivers/char/rio/rio_linux.c                    | 1 +
 drivers/char/riscom8.c                          | 1 +
 drivers/char/rocket.c                           | 1 +
 drivers/char/serial167.c                        | 1 +
 drivers/char/specialix.c                        | 1 +
 drivers/char/sx.c                               | 1 +
 drivers/char/synclink.c                         | 1 +
 drivers/char/synclink_gt.c                      | 1 +
 drivers/char/synclinkmp.c                       | 1 +
 drivers/char/tpm/tpm.c                          | 1 -
 drivers/char/tty_ioctl.c                        | 1 -
 drivers/char/tty_ldisc.c                        | 1 -
 drivers/char/vt.c                               | 1 +
 drivers/char/vt_ioctl.c                         | 1 +
 drivers/gpio/vr41xx_giu.c                       | 1 -
 drivers/hid/usbhid/hid-core.c                   | 1 -
 drivers/isdn/hisax/hfc_usb.c                    | 1 -
 drivers/isdn/i4l/isdn_tty.c                     | 1 +
 drivers/isdn/mISDN/stack.c                      | 1 +
 drivers/media/dvb/bt8xx/dst_ca.c                | 1 +
 drivers/media/dvb/dvb-core/dvbdev.h             | 1 -
 drivers/media/dvb/ttpci/av7110.c                | 1 -
 drivers/media/radio/radio-mr800.c               | 1 +
 drivers/media/radio/radio-si470x.c              | 1 +
 drivers/media/video/bt8xx/bttv-driver.c         | 1 +
 drivers/media/video/cx23885/cx23885-417.c       | 1 +
 drivers/media/video/cx23885/cx23885-video.c     | 1 +
 drivers/media/video/cx88/cx88-blackbird.c       | 1 +
 drivers/media/video/cx88/cx88-video.c           | 1 +
 drivers/media/video/dabusb.c                    | 1 +
 drivers/media/video/pwc/pwc-if.c                | 1 +
 drivers/media/video/pwc/pwc.h                   | 1 -
 drivers/media/video/s2255drv.c                  | 1 +
 drivers/media/video/saa5246a.c                  | 1 -
 drivers/media/video/saa5249.c                   | 1 -
 drivers/media/video/saa7134/saa7134-empress.c   | 1 +
 drivers/media/video/se401.c                     | 1 +
 drivers/media/video/stk-webcam.c                | 1 +
 drivers/media/video/stradis.c                   | 1 +
 drivers/media/video/stv680.c                    | 1 +
 drivers/media/video/usbvideo/vicam.c            | 1 +
 drivers/media/video/usbvision/usbvision-video.c | 1 +
 drivers/media/video/v4l2-dev.c                  | 1 -
 drivers/media/video/zoran/zoran_driver.c        | 1 +
 drivers/misc/sgi-gru/grufile.c                  | 1 -
 drivers/misc/sgi-gru/grukservices.c             | 1 -
 drivers/net/irda/irtty-sir.c                    | 1 -
 drivers/pci/hotplug/cpci_hotplug_core.c         | 1 -
 drivers/pci/hotplug/cpqphp_ctrl.c               | 1 -
 drivers/pci/hotplug/cpqphp_sysfs.c              | 1 +
 drivers/pci/hotplug/pciehp_ctrl.c               | 1 -
 drivers/pci/syscall.c                           | 1 -
 drivers/s390/block/dasd_ioctl.c                 | 1 +
 drivers/scsi/qla2xxx/qla_mid.c                  | 1 -
 drivers/telephony/ixj.c                         | 1 +
 drivers/telephony/phonedev.c                    | 1 -
 drivers/usb/class/cdc-wdm.c                     | 1 -
 drivers/usb/gadget/amd5536udc.c                 | 1 -
 drivers/usb/gadget/langwell_udc.c               | 1 -
 drivers/usb/gadget/s3c2410_udc.c                | 1 -
 drivers/usb/host/r8a66597-hcd.c                 | 1 -
 drivers/usb/misc/iowarrior.c                    | 1 +
 drivers/usb/misc/rio500.c                       | 1 +
 drivers/usb/misc/usblcd.c                       | 1 +
 drivers/usb/musb/cppi_dma.h                     | 1 -
 drivers/usb/musb/musb_core.h                    | 1 -
 drivers/usb/serial/ftdi_sio.c                   | 1 +
 drivers/usb/serial/mos7840.c                    | 1 +
 drivers/usb/serial/usb-serial.c                 | 1 +
 drivers/video/fbmem.c                           | 1 -
 fs/adfs/super.c                                 | 1 +
 fs/afs/super.c                                  | 1 +
 fs/autofs4/dev-ioctl.c                          | 1 -
 fs/bfs/dir.c                                    | 1 -
 fs/bfs/file.c                                   | 1 -
 fs/btrfs/compression.c                          | 1 -
 fs/btrfs/file.c                                 | 1 -
 fs/btrfs/inode.c                                | 1 -
 fs/btrfs/ioctl.c                                | 1 -
 fs/btrfs/super.c                                | 1 -
 fs/char_dev.c                                   | 1 -
 fs/compat.c                                     | 1 -
 fs/compat_ioctl.c                               | 1 +
 fs/exofs/super.c                                | 1 +
 fs/ext2/ioctl.c                                 | 1 -
 fs/ext4/ioctl.c                                 | 1 -
 fs/fat/dir.c                                    | 1 -
 fs/fat/namei_msdos.c                            | 1 -
 fs/fat/namei_vfat.c                             | 1 -
 fs/fcntl.c                                      | 1 -
 fs/freevxfs/vxfs_super.c                        | 1 +
 fs/hfs/super.c                                  | 1 +
 fs/hfsplus/super.c                              | 1 +
 fs/hpfs/dir.c                                   | 1 +
 fs/hpfs/file.c                                  | 1 +
 fs/hpfs/hpfs_fn.h                               | 1 -
 fs/hpfs/inode.c                                 | 1 +
 fs/hpfs/namei.c                                 | 1 +
 fs/jffs2/super.c                                | 1 +
 fs/lockd/clntproc.c                             | 1 +
 fs/lockd/svc4proc.c                             | 1 +
 fs/lockd/svcproc.c                              | 1 +
 fs/nfs/delegation.c                             | 1 +
 fs/nfs/dir.c                                    | 1 -
 fs/nfs/file.c                                   | 1 -
 fs/nfs/inode.c                                  | 1 -
 fs/nfs/nfs4proc.c                               | 1 -
 fs/nfs/read.c                                   | 1 -
 fs/nfsd/nfsctl.c                                | 1 -
 fs/nfsd/nfssvc.c                                | 1 -
 fs/nilfs2/dir.c                                 | 1 -
 fs/ocfs2/ioctl.c                                | 1 -
 fs/reiserfs/xattr.c                             | 1 -
 fs/squashfs/super.c                             | 1 +
 fs/ubifs/ioctl.c                                | 1 -
 fs/xfs/linux-2.6/xfs_file.c                     | 1 -
 include/linux/crash_dump.h                      | 1 -
 include/linux/hardirq.h                         | 2 ++
 include/linux/quotaops.h                        | 1 -
 include/linux/sunrpc/xdr.h                      | 1 -
 kernel/power/user.c                             | 1 -
 kernel/trace/blktrace.c                         | 1 +
 kernel/trace/trace.c                            | 1 +
 net/appletalk/ddp.c                             | 1 +
 net/ipx/af_ipx.c                                | 1 +
 net/irda/af_irda.c                              | 1 +
 net/irda/irnet/irnet.h                          | 1 -
 net/irda/irnet/irnet_ppp.c                      | 1 +
 net/sunrpc/clnt.c                               | 1 -
 net/sunrpc/sched.c                              | 1 -
 net/sunrpc/svc_xprt.c                           | 1 +
 net/wanrouter/wanmain.c                         | 1 +
 net/x25/af_x25.c                                | 1 +
 173 files changed, 81 insertions(+), 93 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 1e9ad52c460e..e072041d19f8 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -8,7 +8,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index d76618db50df..6a387eec6b65 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -31,7 +31,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index a8f1329c15a4..3da60fb13ce4 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -29,7 +29,6 @@
  * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
index a79fbd87021b..2ad962c7e88e 100644
--- a/arch/cris/kernel/sys_cris.c
+++ b/arch/cris/kernel/sys_cris.c
@@ -15,7 +15,6 @@
 #include <linux/mm.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 92c9689b7d97..9daa87fdb018 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -15,7 +15,6 @@
 #include <linux/mm.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
-#include <linux/smp_lock.h>
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index bf0abe9e1f73..98b8feb12ed8 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index b86aa623e36d..53ff39af6a5c 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -27,7 +27,6 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/ptrace.h>
 #include <linux/signal.h>
 
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 493819c25fba..1c80e4fc40ce 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 8c9ebac5da10..e000bce09b2b 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index c4f9ac17474a..32644b4a0714 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/user.h>
 #include <linux/security.h>
 
diff --git a/arch/mips/mm/hugetlbpage.c b/arch/mips/mm/hugetlbpage.c
index 471c09aa1614..8c2834f5919d 100644
--- a/arch/mips/mm/hugetlbpage.c
+++ b/arch/mips/mm/hugetlbpage.c
@@ -16,7 +16,6 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sysctl.h>
diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c
index e143339ad28e..cf847dabc1bd 100644
--- a/arch/mn10300/kernel/ptrace.c
+++ b/arch/mn10300/kernel/ptrace.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 9f7572a0f578..feb2f2e810db 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -12,7 +12,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index bca5a84dc72c..29d196b83d25 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -13,7 +13,6 @@
 #include <linux/syscalls.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/sem.h>
 #include <linux/msg.h>
 #include <linux/shm.h>
diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c
index 0dfdc5001124..91365adba4f5 100644
--- a/arch/mn10300/kernel/traps.c
+++ b/arch/mn10300/kernel/traps.c
@@ -17,7 +17,6 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c
index a62e1e138bc1..53bb17d0f068 100644
--- a/arch/mn10300/mm/fault.c
+++ b/arch/mn10300/mm/fault.c
@@ -20,7 +20,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/vt_kern.h>		/* For unblank_screen() */
diff --git a/arch/mn10300/mm/misalignment.c b/arch/mn10300/mm/misalignment.c
index 94c4a4358065..30016251f658 100644
--- a/arch/mn10300/mm/misalignment.c
+++ b/arch/mn10300/mm/misalignment.c
@@ -17,7 +17,6 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/spinlock.h>
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 297632cba047..8a6daf4129f6 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -21,7 +21,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/regset.h>
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index d2f270c995d9..db943a7ec513 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -15,7 +15,6 @@
 #include <linux/timer.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 490b39934d65..43acd73105b7 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 74eb26bf1970..e5e119fe03b2 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -22,7 +22,6 @@
 #include <linux/compat.h>
 #include <linux/smp.h>
 #include <linux/kdebug.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/console.h>
 #include <linux/module.h>
diff --git a/arch/sh/mm/tlb-sh3.c b/arch/sh/mm/tlb-sh3.c
index 7fbfd5a11ffa..17cb7c3adf22 100644
--- a/arch/sh/mm/tlb-sh3.c
+++ b/arch/sh/mm/tlb-sh3.c
@@ -18,7 +18,6 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 
 #include <asm/system.h>
diff --git a/arch/sparc/kernel/ptrace_32.c b/arch/sparc/kernel/ptrace_32.c
index 8ce6285a06d5..7e3dfd9bb97e 100644
--- a/arch/sparc/kernel/ptrace_32.c
+++ b/arch/sparc/kernel/ptrace_32.c
@@ -16,7 +16,6 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/regset.h>
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index a941c610e7ce..4ae91dc2feb9 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -17,7 +17,6 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/security.h>
 #include <linux/seccomp.h>
 #include <linux/audit.h>
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 5c12e79b4bdf..da1218e8ee87 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/param.h>
 #include <linux/string.h>
diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c
index 358283341b47..c0490c7bbde0 100644
--- a/arch/sparc/kernel/traps_32.c
+++ b/arch/sparc/kernel/traps_32.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/kdebug.h>
 
 #include <asm/delay.h>
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 668dc234b8e2..1e6b7c14f697 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -36,6 +36,7 @@
 #include <linux/ioport.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
 #include <linux/reboot.h>
 #include <linux/spinlock.h>
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 65a0655e7fc8..a52cc7fe45ea 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -26,6 +26,7 @@
 #include <linux/pci.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/delay.h>
 #include <linux/major.h>
 #include <linux/fs.h>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 801f4ab83302..5757188cd1fb 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -61,7 +61,6 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
 #include <linux/loop.h>
diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c
index 1df9dda2e377..d5cde6d86f89 100644
--- a/drivers/bluetooth/hci_vhci.c
+++ b/drivers/bluetooth/hci_vhci.c
@@ -28,7 +28,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
diff --git a/drivers/char/amiserial.c b/drivers/char/amiserial.c
index 72429b6b2fa8..6c32fbf07164 100644
--- a/drivers/char/amiserial.c
+++ b/drivers/char/amiserial.c
@@ -81,6 +81,7 @@ static char *serial_version = "4.30";
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/bitops.h>
 
diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index f3366d3f06cf..2dafc2da0648 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -633,6 +633,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/serial.h>
+#include <linux/smp_lock.h>
 #include <linux/major.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index abef1f7d84fe..ff647ca1c489 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -36,6 +36,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/uaccess.h>
diff --git a/drivers/char/isicom.c b/drivers/char/isicom.c
index 621d1184673c..4f1f4cd670da 100644
--- a/drivers/char/isicom.c
+++ b/drivers/char/isicom.c
@@ -122,6 +122,7 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/serial.h>
+#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
 #include <linux/timer.h>
diff --git a/drivers/char/istallion.c b/drivers/char/istallion.c
index 0c999f5bb3db..ab2f3349c5c4 100644
--- a/drivers/char/istallion.c
+++ b/drivers/char/istallion.c
@@ -20,6 +20,7 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/char/moxa.c b/drivers/char/moxa.c
index 65b6ff2442c6..dd0083bbb64a 100644
--- a/drivers/char/moxa.c
+++ b/drivers/char/moxa.c
@@ -34,6 +34,7 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include <linux/major.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/ptrace.h>
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 52d953eb30c3..dbf8d52f31d0 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -23,6 +23,7 @@
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
diff --git a/drivers/char/n_hdlc.c b/drivers/char/n_hdlc.c
index 1c43c8cdee25..c68118efad84 100644
--- a/drivers/char/n_hdlc.c
+++ b/drivers/char/n_hdlc.c
@@ -97,6 +97,7 @@
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/errno.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>	/* used in new tty drivers */
 #include <linux/signal.h>	/* used in new tty drivers */
 #include <linux/if.h>
diff --git a/drivers/char/n_r3964.c b/drivers/char/n_r3964.c
index 2e99158ebb8a..6934025a1ac1 100644
--- a/drivers/char/n_r3964.c
+++ b/drivers/char/n_r3964.c
@@ -58,6 +58,7 @@
 #include <linux/ioport.h>
 #include <linux/in.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/tty.h>
 #include <linux/errno.h>
 #include <linux/string.h>	/* used in new tty drivers */
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 9d1b4f548f67..6e6942c45f5b 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -22,6 +22,7 @@
 #include <linux/major.h>
 #include <linux/mm.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 #include <linux/sysctl.h>
 #include <linux/device.h>
 #include <linux/uaccess.h>
diff --git a/drivers/char/rio/rio_linux.c b/drivers/char/rio/rio_linux.c
index ce81da5b2da9..d58c2eb07f07 100644
--- a/drivers/char/rio/rio_linux.c
+++ b/drivers/char/rio/rio_linux.c
@@ -44,6 +44,7 @@
 #include <linux/delay.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/miscdevice.h>
 #include <linux/init.h>
 
diff --git a/drivers/char/riscom8.c b/drivers/char/riscom8.c
index 217660451237..171711acf5cd 100644
--- a/drivers/char/riscom8.c
+++ b/drivers/char/riscom8.c
@@ -47,6 +47,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/tty_flip.h>
+#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 
diff --git a/drivers/char/rocket.c b/drivers/char/rocket.c
index 63d5b628477a..0e29a23ec4c5 100644
--- a/drivers/char/rocket.c
+++ b/drivers/char/rocket.c
@@ -73,6 +73,7 @@
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
 #include <linux/serial.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/ptrace.h>
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index f1f24f0ee26f..51e7a46787be 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -52,6 +52,7 @@
 #include <linux/interrupt.h>
 #include <linux/serial.h>
 #include <linux/serialP.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/fcntl.h>
 #include <linux/ptrace.h>
diff --git a/drivers/char/specialix.c b/drivers/char/specialix.c
index e72be4190a44..bfe4cdb2febb 100644
--- a/drivers/char/specialix.c
+++ b/drivers/char/specialix.c
@@ -87,6 +87,7 @@
 #include <linux/tty_flip.h>
 #include <linux/mm.h>
 #include <linux/serial.h>
+#include <linux/smp_lock.h>
 #include <linux/fcntl.h>
 #include <linux/major.h>
 #include <linux/delay.h>
diff --git a/drivers/char/sx.c b/drivers/char/sx.c
index 518f2a25d91e..a81ec4fcf6ff 100644
--- a/drivers/char/sx.c
+++ b/drivers/char/sx.c
@@ -216,6 +216,7 @@
 #include <linux/eisa.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/miscdevice.h>
 #include <linux/bitops.h>
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index afded3a2379c..813552f14884 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -81,6 +81,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index a2e67e6df3a1..91f20a92fddf 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -62,6 +62,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 6f727e3c53ad..8d4a2a8a0a70 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -52,6 +52,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index ccdd828adcef..b0603b2e5684 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -26,7 +26,6 @@
 #include <linux/poll.h>
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
-#include <linux/smp_lock.h>
 
 #include "tpm.h"
 
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index b24f6c6a1ea3..ad6ba4ed2808 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/mutex.h>
-#include <linux/smp_lock.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index 913aa8d3f1c5..0ef0dc97ba20 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -21,7 +21,6 @@
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/wait.h>
 #include <linux/bitops.h>
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index d9113b4c76e3..7947bd1b4cf7 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -89,6 +89,7 @@
 #include <linux/mutex.h>
 #include <linux/vt_kern.h>
 #include <linux/selection.h>
+#include <linux/smp_lock.h>
 #include <linux/tiocl.h>
 #include <linux/kbd_kern.h>
 #include <linux/consolemap.h>
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 7539bed0f7e0..95189f288f8c 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -25,6 +25,7 @@
 #include <linux/console.h>
 #include <linux/consolemap.h>
 #include <linux/signal.h>
+#include <linux/smp_lock.h>
 #include <linux/timex.h>
 
 #include <asm/io.h>
diff --git a/drivers/gpio/vr41xx_giu.c b/drivers/gpio/vr41xx_giu.c
index b70e06133e78..b16c9a8c03f5 100644
--- a/drivers/gpio/vr41xx_giu.c
+++ b/drivers/gpio/vr41xx_giu.c
@@ -29,7 +29,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 76c4bbe9dccb..3c1fcb7640ab 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -22,7 +22,6 @@
 #include <linux/list.h>
 #include <linux/mm.h>
 #include <linux/mutex.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index 8df889b0c1a9..9de54202c90c 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -37,7 +37,6 @@
 #include <linux/kernel_stat.h>
 #include <linux/usb.h>
 #include <linux/kernel.h>
-#include <linux/smp_lock.h>
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
 #include "hisax.h"
diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c
index b4d4522e5071..2881a66c1aa9 100644
--- a/drivers/isdn/i4l/isdn_tty.c
+++ b/drivers/isdn/i4l/isdn_tty.c
@@ -13,6 +13,7 @@
 
 #include <linux/isdn.h>
 #include <linux/delay.h>
+#include <linux/smp_lock.h>
 #include "isdn_common.h"
 #include "isdn_tty.h"
 #ifdef CONFIG_ISDN_AUDIO
diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c
index e2f45019ebf0..3e1532a180ff 100644
--- a/drivers/isdn/mISDN/stack.c
+++ b/drivers/isdn/mISDN/stack.c
@@ -17,6 +17,7 @@
 
 #include <linux/mISDNif.h>
 #include <linux/kthread.h>
+#include <linux/smp_lock.h>
 #include "core.h"
 
 static u_int	*debug;
diff --git a/drivers/media/dvb/bt8xx/dst_ca.c b/drivers/media/dvb/bt8xx/dst_ca.c
index 4601b059b2b2..0e246eaad05a 100644
--- a/drivers/media/dvb/bt8xx/dst_ca.c
+++ b/drivers/media/dvb/bt8xx/dst_ca.c
@@ -21,6 +21,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/dvb/ca.h>
 #include "dvbdev.h"
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h
index 79927305e84d..487919bea7ae 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.h
+++ b/drivers/media/dvb/dvb-core/dvbdev.h
@@ -27,7 +27,6 @@
 #include <linux/poll.h>
 #include <linux/fs.h>
 #include <linux/list.h>
-#include <linux/smp_lock.h>
 
 #define DVB_MAJOR 212
 
diff --git a/drivers/media/dvb/ttpci/av7110.c b/drivers/media/dvb/ttpci/av7110.c
index d1d959ed37b7..8d65c652ba50 100644
--- a/drivers/media/dvb/ttpci/av7110.c
+++ b/drivers/media/dvb/ttpci/av7110.c
@@ -36,7 +36,6 @@
 #include <linux/fs.h>
 #include <linux/timer.h>
 #include <linux/poll.h>
-#include <linux/smp_lock.h>
 
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index 837467f93805..575bf9d89419 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -58,6 +58,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/input.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-device.h>
diff --git a/drivers/media/radio/radio-si470x.c b/drivers/media/radio/radio-si470x.c
index 46d216329611..e85f318b4d2b 100644
--- a/drivers/media/radio/radio-si470x.c
+++ b/drivers/media/radio/radio-si470x.c
@@ -127,6 +127,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/input.h>
 #include <linux/usb.h>
 #include <linux/hid.h>
diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c
index 5eb1464af670..d147d29bb0d3 100644
--- a/drivers/media/video/bt8xx/bttv-driver.c
+++ b/drivers/media/video/bt8xx/bttv-driver.c
@@ -41,6 +41,7 @@
 #include <linux/fs.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kdev_t.h>
 #include "bttvp.h"
diff --git a/drivers/media/video/cx23885/cx23885-417.c b/drivers/media/video/cx23885/cx23885-417.c
index 2943bfd32a94..428f0c45e6b7 100644
--- a/drivers/media/video/cx23885/cx23885-417.c
+++ b/drivers/media/video/cx23885/cx23885-417.c
@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/firmware.h>
+#include <linux/smp_lock.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <media/cx2341x.h>
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 70836af3ab48..5d6093336300 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -26,6 +26,7 @@
 #include <linux/kmod.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 44eacfb0d0d6..356d6896da3f 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -32,6 +32,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/firmware.h>
+#include <linux/smp_lock.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
 #include <media/cx2341x.h>
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index b12770848c00..2bb54c3ef5cd 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -31,6 +31,7 @@
 #include <linux/kmod.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/delay.h>
diff --git a/drivers/media/video/dabusb.c b/drivers/media/video/dabusb.c
index ec2f45dde164..0664d111085f 100644
--- a/drivers/media/video/dabusb.c
+++ b/drivers/media/video/dabusb.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
diff --git a/drivers/media/video/pwc/pwc-if.c b/drivers/media/video/pwc/pwc-if.c
index db25c3034c11..8d17cf613306 100644
--- a/drivers/media/video/pwc/pwc-if.c
+++ b/drivers/media/video/pwc/pwc-if.c
@@ -62,6 +62,7 @@
 #include <linux/module.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #ifdef CONFIG_USB_PWC_INPUT_EVDEV
 #include <linux/usb/input.h>
 #endif
diff --git a/drivers/media/video/pwc/pwc.h b/drivers/media/video/pwc/pwc.h
index 0be6f814f539..0b658dee05a4 100644
--- a/drivers/media/video/pwc/pwc.h
+++ b/drivers/media/video/pwc/pwc.h
@@ -29,7 +29,6 @@
 #include <linux/usb.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
-#include <linux/smp_lock.h>
 #include <linux/version.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c
index 6be845ccc7d7..9e3262c0ba37 100644
--- a/drivers/media/video/s2255drv.c
+++ b/drivers/media/video/s2255drv.c
@@ -48,6 +48,7 @@
 #include <linux/videodev2.h>
 #include <linux/version.h>
 #include <linux/mm.h>
+#include <linux/smp_lock.h>
 #include <media/videobuf-vmalloc.h>
 #include <media/v4l2-common.h>
 #include <media/v4l2-ioctl.h>
diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c
index 155804b061e9..b624a4c01fdc 100644
--- a/drivers/media/video/saa5246a.c
+++ b/drivers/media/video/saa5246a.c
@@ -43,7 +43,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
-#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/videotext.h>
 #include <linux/videodev2.h>
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index 271d6e931b75..12835fb82c95 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -46,7 +46,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
-#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/videotext.h>
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index add1757f8930..296788c3bf0e 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/delay.h>
 
 #include "saa7134-reg.h"
diff --git a/drivers/media/video/se401.c b/drivers/media/video/se401.c
index c8f05297d0f0..85ffc2cba039 100644
--- a/drivers/media/video/se401.c
+++ b/drivers/media/video/se401.c
@@ -31,6 +31,7 @@ static const char version[] = "0.24";
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/usb.h>
 #include "se401.h"
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index 2e5937047278..4d6785e63455 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -27,6 +27,7 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 
 #include <linux/usb.h>
 #include <linux/mm.h>
diff --git a/drivers/media/video/stradis.c b/drivers/media/video/stradis.c
index 0eb313082c97..eaada39c76fd 100644
--- a/drivers/media/video/stradis.c
+++ b/drivers/media/video/stradis.c
@@ -26,6 +26,7 @@
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/poll.h>
diff --git a/drivers/media/video/stv680.c b/drivers/media/video/stv680.c
index 75f286f7a2e9..8b4e7dafce7b 100644
--- a/drivers/media/video/stv680.c
+++ b/drivers/media/video/stv680.c
@@ -62,6 +62,7 @@
 #include <linux/init.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/errno.h>
 #include <linux/videodev.h>
diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c
index 8d73979596f9..45fce39ec9ad 100644
--- a/drivers/media/video/usbvideo/vicam.c
+++ b/drivers/media/video/usbvideo/vicam.c
@@ -43,6 +43,7 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/firmware.h>
 #include <linux/ihex.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 90b58914f984..90d9b5c0e9a7 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -50,6 +50,7 @@
 #include <linux/list.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/utsname.h>
 #include <linux/highmem.h>
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 31eac66411d7..a7f1b69a7dab 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/kmod.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index 3d7df32a3d87..bcdefb1bcb3d 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -49,6 +49,7 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/pci.h>
 #include <linux/vmalloc.h>
 #include <linux/wait.h>
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index fa2d93a9fb8d..aed609832bc2 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -29,7 +29,6 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/misc/sgi-gru/grukservices.c b/drivers/misc/sgi-gru/grukservices.c
index eedbf9c32760..79689b10f937 100644
--- a/drivers/misc/sgi-gru/grukservices.c
+++ b/drivers/misc/sgi-gru/grukservices.c
@@ -24,7 +24,6 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c
index d53aa9582137..20f9bc626688 100644
--- a/drivers/net/irda/irtty-sir.c
+++ b/drivers/net/irda/irtty-sir.c
@@ -31,7 +31,6 @@
 #include <linux/tty.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
-#include <linux/smp_lock.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index a5b9f6ae507b..d703e73fffa7 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -32,7 +32,6 @@
 #include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/smp_lock.h>
 #include <asm/atomic.h>
 #include <linux/delay.h>
 #include <linux/kthread.h>
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 2fa47af992a8..0ff689afa757 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -34,7 +34,6 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/wait.h>
-#include <linux/smp_lock.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
 #include <linux/kthread.h>
diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
index 8450f4a6568a..e6089bdb6e5b 100644
--- a/drivers/pci/hotplug/cpqphp_sysfs.c
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c
@@ -33,6 +33,7 @@
 #include <linux/workqueue.h>
 #include <linux/pci.h>
 #include <linux/pci_hotplug.h>
+#include <linux/smp_lock.h>
 #include <linux/debugfs.h>
 #include "cpqphp.h"
 
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index ff4034502d24..8aab8edf123e 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -30,7 +30,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/smp_lock.h>
 #include <linux/pci.h>
 #include <linux/workqueue.h>
 #include "../pci.h"
diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c
index ec22284eed30..e1c1ec540893 100644
--- a/drivers/pci/syscall.c
+++ b/drivers/pci/syscall.c
@@ -9,7 +9,6 @@
 
 #include <linux/errno.h>
 #include <linux/pci.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <asm/uaccess.h>
 #include "pci.h"
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 4ce3f72ee1c1..df918ef27965 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -16,6 +16,7 @@
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/blkpg.h>
+#include <linux/smp_lock.h>
 
 #include <asm/ccwdev.h>
 #include <asm/cmb.h>
diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c
index 650bcef08f2a..cd78c501803a 100644
--- a/drivers/scsi/qla2xxx/qla_mid.c
+++ b/drivers/scsi/qla2xxx/qla_mid.c
@@ -9,7 +9,6 @@
 
 #include <linux/moduleparam.h>
 #include <linux/vmalloc.h>
-#include <linux/smp_lock.h>
 #include <linux/list.h>
 
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/telephony/ixj.c b/drivers/telephony/ixj.c
index a913efc69669..40de151f2789 100644
--- a/drivers/telephony/ixj.c
+++ b/drivers/telephony/ixj.c
@@ -257,6 +257,7 @@
 #include <linux/fs.h>		/* everything... */
 #include <linux/errno.h>	/* error codes */
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
diff --git a/drivers/telephony/phonedev.c b/drivers/telephony/phonedev.c
index b52cc830c0b4..f3873f650bb4 100644
--- a/drivers/telephony/phonedev.c
+++ b/drivers/telephony/phonedev.c
@@ -23,7 +23,6 @@
 #include <linux/errno.h>
 #include <linux/phonedev.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index 0fe434505ac4..ba589d4ca8bc 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -15,7 +15,6 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/uaccess.h>
 #include <linux/bitops.h>
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index 826f3adde5d8..77352ccc245e 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -48,7 +48,6 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/timer.h>
diff --git a/drivers/usb/gadget/langwell_udc.c b/drivers/usb/gadget/langwell_udc.c
index 6829d5961359..a3913519fd58 100644
--- a/drivers/usb/gadget/langwell_udc.c
+++ b/drivers/usb/gadget/langwell_udc.c
@@ -34,7 +34,6 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/timer.h>
diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c
index 9a2b8920532d..a9b452fe6221 100644
--- a/drivers/usb/gadget/s3c2410_udc.c
+++ b/drivers/usb/gadget/s3c2410_udc.c
@@ -28,7 +28,6 @@
 #include <linux/ioport.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/timer.h>
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 56976cc0352a..e18f74946e68 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -26,7 +26,6 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/timer.h>
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 3c5fe5cee05a..90e1a8dedfa9 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/poll.h>
 #include <linux/usb/iowarrior.h>
 
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index deb95bb49fd1..d645f3899fe1 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -32,6 +32,7 @@
 #include <linux/kernel.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/random.h>
 #include <linux/poll.h>
diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c
index e0ff9ccd866b..29092b8e59ce 100644
--- a/drivers/usb/misc/usblcd.c
+++ b/drivers/usb/misc/usblcd.c
@@ -16,6 +16,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
diff --git a/drivers/usb/musb/cppi_dma.h b/drivers/usb/musb/cppi_dma.h
index 8a39de3e6e47..59bf949e589b 100644
--- a/drivers/usb/musb/cppi_dma.h
+++ b/drivers/usb/musb/cppi_dma.h
@@ -5,7 +5,6 @@
 
 #include <linux/slab.h>
 #include <linux/list.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/dmapool.h>
 
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index f3772ca3b2cf..381d648a36b8 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -38,7 +38,6 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/interrupt.h>
-#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
 #include <linux/clk.h>
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 5f08702f672f..5a8ae274d258 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -33,6 +33,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index c40f95c1951c..c31940a307f8 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -26,6 +26,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index a84216464ca0..0c39b55aeef4 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -21,6 +21,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/tty.h>
 #include <linux/tty_driver.h>
 #include <linux/tty_flip.h>
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 53ea05645ff8..a85c818be945 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -16,7 +16,6 @@
 #include <linux/compat.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/smp_lock.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/slab.h>
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index aad92f0a1048..6910a98bd73c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -13,6 +13,7 @@
 #include <linux/parser.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/statfs.h>
 #include "adfs.h"
 #include "dir_f.h"
diff --git a/fs/afs/super.c b/fs/afs/super.c
index ad0514d0115f..e1ea1c240b6a 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -18,6 +18,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/parser.h>
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index f3da2eb51f56..00bf8fcb245f 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -19,7 +19,6 @@
 #include <linux/sched.h>
 #include <linux/compat.h>
 #include <linux/syscalls.h>
-#include <linux/smp_lock.h>
 #include <linux/magic.h>
 #include <linux/dcache.h>
 #include <linux/uaccess.h>
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 54bd07d44e68..1e41aadb1068 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -8,7 +8,6 @@
 #include <linux/time.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include "bfs.h"
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index 6a021265f018..88b9a3ff44e4 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -11,7 +11,6 @@
 
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
 #include "bfs.h"
 
 #undef DEBUG
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index de1e2fd32080..9d8ba4d54a37 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -26,7 +26,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 7c3cd248d8d6..4b833972273a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,7 +22,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 7ffa3d34ea19..791eab19e330 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
 #include <linux/swap.h>
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9f4db848db10..bd88f25889f7 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -27,7 +27,6 @@
 #include <linux/time.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
 #include <linux/mpage.h>
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 9f179d4832d5..6d6d06cb6dfc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -26,7 +26,6 @@
 #include <linux/init.h>
 #include <linux/seq_file.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/backing-dev.h>
 #include <linux/mount.h>
 #include <linux/mpage.h>
diff --git a/fs/char_dev.c b/fs/char_dev.c
index b7c9d5187a75..a173551e19d7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -13,7 +13,6 @@
 #include <linux/major.h>
 #include <linux/errno.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 
 #include <linux/kobject.h>
diff --git a/fs/compat.c b/fs/compat.c
index fbadb947727b..94502dab972a 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -32,7 +32,6 @@
 #include <linux/smb_mount.h>
 #include <linux/ncp_mount.h>
 #include <linux/nfs4_mount.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/module.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 626c7483b4de..f28f070a60fc 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
+#include <linux/smp_lock.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index a343b4ea62f6..5ab10c3bbebe 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -31,6 +31,7 @@
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/parser.h>
 #include <linux/vfs.h>
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index 7cb4badef927..e7431309bdca 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
-#include <linux/smp_lock.h>
 #include <asm/current.h>
 #include <asm/uaccess.h>
 
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bb415408fdb6..24a6abb2aef5 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -12,7 +12,6 @@
 #include <linux/capability.h>
 #include <linux/time.h>
 #include <linux/compat.h>
-#include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include <linux/file.h>
 #include <asm/uaccess.h>
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 38ff75a0fe22..530b4ca01510 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/compat.h>
 #include <asm/uaccess.h>
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 82f88733b681..bbc94ae4fd77 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
 #include "fat.h"
 
 /* Characters that are undesirable in an MS-DOS file name */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 73471b7ecc8c..cb6e83557112 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -19,7 +19,6 @@
 #include <linux/jiffies.h>
 #include <linux/ctype.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
 #include "fat.h"
diff --git a/fs/fcntl.c b/fs/fcntl.c
index a040b764f8e3..ae413086db97 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -19,7 +19,6 @@
 #include <linux/signal.h>
 #include <linux/rcupdate.h>
 #include <linux/pid_namespace.h>
-#include <linux/smp_lock.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index cdbd1654e4cd..1e8af939b3e4 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -38,6 +38,7 @@
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/stat.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 6f833dc8e910..f7fcbe49da72 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -19,6 +19,7 @@
 #include <linux/nls.h>
 #include <linux/parser.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 
 #include "hfs_fs.h"
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9fc3af0c0dab..c0759fe0855b 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/nls.h>
 
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 6916c41d7017..8865c94f55f6 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -6,6 +6,7 @@
  *  directory VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 static int hpfs_dir_release(struct inode *inode, struct file *filp)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 64ab52259204..3efabff00367 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -6,6 +6,7 @@
  *  file VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 #define BLOCKS(size) (((size) + 511) >> 9)
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index c2ea31bae313..701ca54c0867 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -13,7 +13,6 @@
 #include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 
 #include "hpfs.h"
 
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 39a1bfbea312..fe703ae46bc7 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -6,6 +6,7 @@
  *  inode VFS functions
  */
 
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 void hpfs_init_inode(struct inode *i)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index b649232dde97..82b9c4ba9ed0 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -6,6 +6,7 @@
  *  adding & removing files & directories
  */
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include "hpfs_fn.h"
 
 static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 07a22caf2687..0035c021395a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/fs.h>
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index f2fdcbce143e..4336adba952a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/smp_lock.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/fs.h>
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 1725037374c5..bd173a6ca3b1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3688e55901fc..e1d28ddd2169 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -10,6 +10,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/in.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index af05b918cb5b..6dd48a4405b4 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -10,6 +10,7 @@
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 89f98e9a024b..38d42c29fb92 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -29,7 +29,6 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/pagevec.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 0055b813ec2c..05062329b678 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -26,7 +26,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/aio.h>
 
 #include <asm/uaccess.h>
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 64f87194d390..bd7938eda6a8 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -30,7 +30,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/nfs_idmap.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 92ce43517814..ff0c080db59b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -45,7 +45,6 @@
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <linux/smp_lock.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 96c4ebfa46f4..73ea5e8d66ce 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,7 +18,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_page.h>
-#include <linux/smp_lock.h>
 
 #include <asm/system.h>
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1250fb978ac1..6d0847562d87 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -25,7 +25,6 @@
 #include <linux/init.h>
 #include <linux/inet.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/ctype.h>
 
 #include <linux/nfs.h>
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884cd54b..492c79b7800b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -18,7 +18,6 @@
 #include <linux/unistd.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/freezer.h>
 #include <linux/fs_struct.h>
 #include <linux/kthread.h>
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 54100acc1102..1a4fa04cf071 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -43,7 +43,6 @@
  */
 
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include "nilfs.h"
 #include "page.h"
 
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 9fcd36dcc9a0..467b413bec21 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,7 +7,6 @@
 
 #include <linux/fs.h>
 #include <linux/mount.h>
-#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_INODE
 #include <cluster/masklog.h>
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index f3d47d856848..6925b835a43b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -46,7 +46,6 @@
 #include <linux/reiserfs_acl.h>
 #include <asm/uaccess.h>
 #include <net/checksum.h>
-#include <linux/smp_lock.h>
 #include <linux/stat.h>
 #include <linux/quotaops.h>
 
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 3b52770f46ff..cb5fc57e370b 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -30,6 +30,7 @@
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 6db7a6be6c97..8aacd64957a2 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
 /* This file implements EXT2-compatible extended attribute ioctl() calls */
 
 #include <linux/compat.h>
-#include <linux/smp_lock.h>
 #include <linux/mount.h>
 #include "ubifs.h"
 
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index f4e255441574..0542fd507649 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -41,7 +41,6 @@
 #include "xfs_ioctl.h"
 
 #include <linux/dcache.h>
-#include <linux/smp_lock.h>
 
 static struct vm_operations_struct xfs_file_vm_ops;
 
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 2dac064d8359..0026f267da20 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -3,7 +3,6 @@
 
 #ifdef CONFIG_CRASH_DUMP
 #include <linux/kexec.h>
-#include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/proc_fs.h>
 
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 45257475623c..8246c697863d 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -2,7 +2,9 @@
 #define LINUX_HARDIRQ_H
 
 #include <linux/preempt.h>
+#ifdef CONFIG_PREEMPT
 #include <linux/smp_lock.h>
+#endif
 #include <linux/lockdep.h>
 #include <linux/ftrace_irq.h>
 #include <asm/hardirq.h>
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 7bc457593684..26361c4c037a 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -7,7 +7,6 @@
 #ifndef _LINUX_QUOTAOPS_
 #define _LINUX_QUOTAOPS_
 
-#include <linux/smp_lock.h>
 #include <linux/fs.h>
 
 static inline struct quota_info *sb_dqopt(struct super_block *sb)
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index d8910b68e1bd..b99c625fddfe 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -12,7 +12,6 @@
 #include <linux/uio.h>
 #include <asm/byteorder.h>
 #include <linux/scatterlist.h>
-#include <linux/smp_lock.h>
 
 /*
  * Buffer adjustment
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae9..bf0014d6a5f0 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
 #include <linux/console.h>
 #include <linux/cpu.h>
 #include <linux/freezer.h>
-#include <linux/smp_lock.h>
 #include <scsi/scsi_scan.h>
 
 #include <asm/uaccess.h>
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc30..1090b0aed9ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
+#include <linux/smp_lock.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa8..8bc8d8afea6a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
 #include <linux/writeback.h>
 #include <linux/kallsyms.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 590b83963622..bfbe13786bb4 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -54,6 +54,7 @@
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
+#include <linux/smp_lock.h>
 #include <linux/termios.h>	/* For TIOCOUTQ/INQ */
 #include <net/datalink.h>
 #include <net/psnap.h>
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 417b0e309495..f1118d92a191 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -41,6 +41,7 @@
 #include <linux/netdevice.h>
 #include <linux/uio.h>
 #include <linux/skbuff.h>
+#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/string.h>
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index cb762c8723ea..80cf29aae096 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -45,6 +45,7 @@
 #include <linux/capability.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/smp_lock.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/init.h>
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index bccf4d0059f0..b001c361ad30 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -241,7 +241,6 @@
 #include <linux/module.h>
 
 #include <linux/kernel.h>
-#include <linux/smp_lock.h>
 #include <linux/skbuff.h>
 #include <linux/tty.h>
 #include <linux/proc_fs.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 6d8ae03c14f5..68cbcb19cbd8 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,6 +13,7 @@
  *	2) as a control channel (write commands, read events)
  */
 
+#include <linux/smp_lock.h>
 #include "irnet_ppp.h"		/* Private header */
 /* Please put other headers in irnet.h - Thanks */
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5bc2f45bddf0..ebfcf9b89909 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -28,7 +28,6 @@
 #include <linux/kallsyms.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
 #include <linux/in6.h>
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 1102ce1251f7..8f459abe97cf 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -16,7 +16,6 @@
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6f33d33cc064..27d44332f017 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/errno.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 466e2d22d256..258daa80ad92 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -48,6 +48,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>	/* support for loadable modules */
 #include <linux/slab.h>		/* kmalloc(), kfree() */
+#include <linux/smp_lock.h>
 #include <linux/mm.h>
 #include <linux/string.h>	/* inline mem*, str* functions */
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 21cdc872004e..5e6c072c64d3 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -40,6 +40,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/smp_lock.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/net.h>
-- 
cgit v1.2.3


From 4ead0a2b6b3aa0b527871ec978c3ef12aafb6bfb Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 2 Jul 2009 23:25:44 +0200
Subject: Driver Core: remove BUS_ID_SIZE

The name size limit is gone from the driver-core, this is
the removal of the last left-over.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index ed4e39f2c423..aebb81036db2 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -25,8 +25,6 @@
 #include <asm/atomic.h>
 #include <asm/device.h>
 
-#define BUS_ID_SIZE		20
-
 struct device;
 struct device_private;
 struct device_driver;
-- 
cgit v1.2.3


From 11a28d373ed2539a110d56419457e2e7db221ac7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 10 Jul 2009 09:51:33 +0000
Subject: net: make namespace iteration possible under RCU

All we need to take care of is using proper RCU list
add/del primitives and inserting a synchronize_rcu()
at one place to make sure the exit notifiers are run
after everybody has stopped iterating the list.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  3 +++
 net/core/net_namespace.c    | 14 +++++++++++---
 2 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ded434b032a4..b34a6ee73754 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -208,6 +208,9 @@ static inline struct net *read_pnet(struct net * const *pnet)
 #define for_each_net(VAR)				\
 	list_for_each_entry(VAR, &net_namespace_list, list)
 
+#define for_each_net_rcu(VAR)				\
+	list_for_each_entry_rcu(VAR, &net_namespace_list, list)
+
 #ifdef CONFIG_NET_NS
 #define __net_init
 #define __net_exit
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b7292a2719dc..5cd0b22e649d 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -6,6 +6,7 @@
 #include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/idr.h>
+#include <linux/rculist.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -127,7 +128,7 @@ static struct net *net_create(void)
 	rv = setup_net(net);
 	if (rv == 0) {
 		rtnl_lock();
-		list_add_tail(&net->list, &net_namespace_list);
+		list_add_tail_rcu(&net->list, &net_namespace_list);
 		rtnl_unlock();
 	}
 	mutex_unlock(&net_mutex);
@@ -156,9 +157,16 @@ static void cleanup_net(struct work_struct *work)
 
 	/* Don't let anyone else find us. */
 	rtnl_lock();
-	list_del(&net->list);
+	list_del_rcu(&net->list);
 	rtnl_unlock();
 
+	/*
+	 * Another CPU might be rcu-iterating the list, wait for it.
+	 * This needs to be before calling the exit() notifiers, so
+	 * the rcu_barrier() below isn't sufficient alone.
+	 */
+	synchronize_rcu();
+
 	/* Run all of the network namespace exit methods */
 	list_for_each_entry_reverse(ops, &pernet_list, list) {
 		if (ops->exit)
@@ -219,7 +227,7 @@ static int __init net_ns_init(void)
 		panic("Could not setup the initial network namespace");
 
 	rtnl_lock();
-	list_add_tail(&init_net.list, &net_namespace_list);
+	list_add_tail_rcu(&init_net.list, &net_namespace_list);
 	rtnl_unlock();
 
 	mutex_unlock(&net_mutex);
-- 
cgit v1.2.3


From 134e63756d5f3d0f7604dfcca847b09d1b14fd66 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 10 Jul 2009 09:51:34 +0000
Subject: genetlink: make netns aware

This makes generic netlink network namespace aware. No
generic netlink families except for the controller family
are made namespace aware, they need to be checked one by
one and then set the family->netnsok member to true.

A new function genlmsg_multicast_netns() is introduced to
allow sending a multicast message in a given namespace,
for example when it applies to an object that lives in
that namespace, a new function genlmsg_multicast_allns()
to send a message to all network namespaces (for objects
that do not have an associated netns).

The function genlmsg_multicast() is changed to multicast
the message in just init_net, which is currently correct
for all generic netlink families since they only work in
init_net right now. Some will later want to work in all
net namespaces because they do not care about the netns
at all -- those will have to be converted to use one of
the new functions genlmsg_multicast_allns() or
genlmsg_multicast_netns() whenever they are made netns
aware in some way.

After this patch families can easily decide whether or
not they should be available in all net namespaces. Many
genl families us it for objects not related to networking
and should therefore be available in all namespaces, but
that will have to be done on a per family basis.

Note that this doesn't touch on the checkpoint/restart
problem where network namespaces could be used, genl
families and multicast groups are numbered globally and
I see no easy way of changing that, especially since it
must be possible to multicast to all network namespaces
for those families that do not care about netns.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/netlink.c               |   2 +-
 include/net/genetlink.h        |  66 +++++++++++++--
 include/net/net_namespace.h    |   2 +
 kernel/taskstats.c             |  10 +--
 net/irda/irnetlink.c           |   2 +-
 net/netfilter/ipvs/ip_vs_ctl.c |   2 +-
 net/netlink/genetlink.c        | 186 ++++++++++++++++++++++++++++++++---------
 net/tipc/netlink.c             |   2 +-
 net/wireless/nl80211.c         |  14 ++--
 9 files changed, 223 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index ccc9d62c462d..55ea369f43a9 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -63,7 +63,7 @@ static int send_data(struct sk_buff *skb)
 		return rv;
 	}
 
-	return genlmsg_unicast(skb, listener_nlpid);
+	return genlmsg_unicast(&init_net, skb, listener_nlpid);
 }
 
 static int user_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 1b0e3ee4ddd8..2a1c06874c42 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -3,6 +3,7 @@
 
 #include <linux/genetlink.h>
 #include <net/netlink.h>
+#include <net/net_namespace.h>
 
 /**
  * struct genl_multicast_group - generic netlink multicast group
@@ -27,6 +28,8 @@ struct genl_multicast_group
  * @name: name of family
  * @version: protocol version
  * @maxattr: maximum number of attributes supported
+ * @netnsok: set to true if the family can handle network
+ *	namespaces and should be presented in all of them
  * @attrbuf: buffer to store parsed attributes
  * @ops_list: list of all assigned operations
  * @family_list: family list
@@ -39,6 +42,7 @@ struct genl_family
 	char			name[GENL_NAMSIZ];
 	unsigned int		version;
 	unsigned int		maxattr;
+	bool			netnsok;
 	struct nlattr **	attrbuf;	/* private */
 	struct list_head	ops_list;	/* private */
 	struct list_head	family_list;	/* private */
@@ -62,8 +66,32 @@ struct genl_info
 	struct genlmsghdr *	genlhdr;
 	void *			userhdr;
 	struct nlattr **	attrs;
+#ifdef CONFIG_NET_NS
+	struct net *		_net;
+#endif
 };
 
+#ifdef CONFIG_NET_NS
+static inline struct net *genl_info_net(struct genl_info *info)
+{
+	return info->_net;
+}
+
+static inline void genl_info_net_set(struct genl_info *info, struct net *net)
+{
+	info->_net = net;
+}
+#else
+static inline struct net *genl_info_net(struct genl_info *info)
+{
+	return &init_net;
+}
+
+static inline void genl_info_net_set(struct genl_info *info, struct net *net)
+{
+}
+#endif
+
 /**
  * struct genl_ops - generic netlink operations
  * @cmd: command identifier
@@ -98,8 +126,6 @@ extern int genl_register_mc_group(struct genl_family *family,
 extern void genl_unregister_mc_group(struct genl_family *family,
 				     struct genl_multicast_group *grp);
 
-extern struct sock *genl_sock;
-
 /**
  * genlmsg_put - Add generic netlink header to netlink message
  * @skb: socket buffer holding the message
@@ -170,7 +196,21 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 }
 
 /**
- * genlmsg_multicast - multicast a netlink message
+ * genlmsg_multicast_netns - multicast a netlink message to a specific netns
+ * @net: the net namespace
+ * @skb: netlink message as socket buffer
+ * @pid: own netlink pid to avoid sending to yourself
+ * @group: multicast group id
+ * @flags: allocation flags
+ */
+static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb,
+					  u32 pid, unsigned int group, gfp_t flags)
+{
+	return nlmsg_multicast(net->genl_sock, skb, pid, group, flags);
+}
+
+/**
+ * genlmsg_multicast - multicast a netlink message to the default netns
  * @skb: netlink message as socket buffer
  * @pid: own netlink pid to avoid sending to yourself
  * @group: multicast group id
@@ -179,17 +219,29 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid,
 				    unsigned int group, gfp_t flags)
 {
-	return nlmsg_multicast(genl_sock, skb, pid, group, flags);
+	return genlmsg_multicast_netns(&init_net, skb, pid, group, flags);
 }
 
+/**
+ * genlmsg_multicast_allns - multicast a netlink message to all net namespaces
+ * @skb: netlink message as socket buffer
+ * @pid: own netlink pid to avoid sending to yourself
+ * @group: multicast group id
+ * @flags: allocation flags
+ *
+ * This function must hold the RTNL or rcu_read_lock().
+ */
+int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid,
+			    unsigned int group, gfp_t flags);
+
 /**
  * genlmsg_unicast - unicast a netlink message
  * @skb: netlink message as socket buffer
  * @pid: netlink pid of the destination socket
  */
-static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid)
+static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid)
 {
-	return nlmsg_unicast(genl_sock, skb, pid);
+	return nlmsg_unicast(net->genl_sock, skb, pid);
 }
 
 /**
@@ -199,7 +251,7 @@ static inline int genlmsg_unicast(struct sk_buff *skb, u32 pid)
  */
 static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info)
 {
-	return genlmsg_unicast(skb, info->snd_pid);
+	return genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
 }
 
 /**
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index b34a6ee73754..3173d12d946b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -26,6 +26,7 @@ struct net_device;
 struct sock;
 struct ctl_table_header;
 struct net_generic;
+struct sock;
 
 struct net {
 	atomic_t		count;		/* To decided when the network
@@ -57,6 +58,7 @@ struct net {
 	spinlock_t		rules_mod_lock;
 
 	struct sock 		*rtnl;			/* rtnetlink socket */
+	struct sock		*genl_sock;
 
 	struct netns_core	core;
 	struct netns_mib	mib;
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 888adbcca30c..ea8384d3caa7 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -108,7 +108,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp,
 /*
  * Send taskstats data in @skb to listener with nl_pid @pid
  */
-static int send_reply(struct sk_buff *skb, pid_t pid)
+static int send_reply(struct sk_buff *skb, struct genl_info *info)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
 	void *reply = genlmsg_data(genlhdr);
@@ -120,7 +120,7 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
 		return rc;
 	}
 
-	return genlmsg_unicast(skb, pid);
+	return genlmsg_reply(skb, info);
 }
 
 /*
@@ -150,7 +150,7 @@ static void send_cpu_listeners(struct sk_buff *skb,
 			if (!skb_next)
 				break;
 		}
-		rc = genlmsg_unicast(skb_cur, s->pid);
+		rc = genlmsg_unicast(&init_net, skb_cur, s->pid);
 		if (rc == -ECONNREFUSED) {
 			s->valid = 0;
 			delcount++;
@@ -418,7 +418,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 		goto err;
 	}
 
-	rc = send_reply(rep_skb, info->snd_pid);
+	rc = send_reply(rep_skb, info);
 
 err:
 	fput_light(file, fput_needed);
@@ -487,7 +487,7 @@ free_return_rc:
 	} else
 		goto err;
 
-	return send_reply(rep_skb, info->snd_pid);
+	return send_reply(rep_skb, info);
 err:
 	nlmsg_free(rep_skb);
 	return rc;
diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c
index 8dd7ed7e7c1f..476b307bd801 100644
--- a/net/irda/irnetlink.c
+++ b/net/irda/irnetlink.c
@@ -115,7 +115,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info)
 
 	genlmsg_end(msg, hdr);
 
-	return genlmsg_unicast(msg, info->snd_pid);
+	return genlmsg_reply(msg, info);
 
  err_out:
 	nlmsg_free(msg);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7c1333c67ff3..2d24d81474ce 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3231,7 +3231,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	genlmsg_end(msg, reply);
-	ret = genlmsg_unicast(msg, info->snd_pid);
+	ret = genlmsg_reply(msg, info);
 	goto out;
 
 nla_put_failure:
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index eed4c6a8afc0..575c64341508 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -18,8 +18,6 @@
 #include <net/sock.h>
 #include <net/genetlink.h>
 
-struct sock *genl_sock = NULL;
-
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
 
 static inline void genl_lock(void)
@@ -175,10 +173,31 @@ int genl_register_mc_group(struct genl_family *family,
 		mc_groups_longs++;
 	}
 
-	err = netlink_change_ngroups(genl_sock,
-				     mc_groups_longs * BITS_PER_LONG);
-	if (err)
-		goto out;
+	if (family->netnsok) {
+		struct net *net;
+
+		rcu_read_lock();
+		for_each_net_rcu(net) {
+			err = netlink_change_ngroups(net->genl_sock,
+					mc_groups_longs * BITS_PER_LONG);
+			if (err) {
+				/*
+				 * No need to roll back, can only fail if
+				 * memory allocation fails and then the
+				 * number of _possible_ groups has been
+				 * increased on some sockets which is ok.
+				 */
+				rcu_read_unlock();
+				goto out;
+			}
+		}
+		rcu_read_unlock();
+	} else {
+		err = netlink_change_ngroups(init_net.genl_sock,
+					     mc_groups_longs * BITS_PER_LONG);
+		if (err)
+			goto out;
+	}
 
 	grp->id = id;
 	set_bit(id, mc_groups);
@@ -195,8 +214,14 @@ EXPORT_SYMBOL(genl_register_mc_group);
 static void __genl_unregister_mc_group(struct genl_family *family,
 				       struct genl_multicast_group *grp)
 {
+	struct net *net;
 	BUG_ON(grp->family != family);
-	netlink_clear_multicast_users(genl_sock, grp->id);
+
+	rcu_read_lock();
+	for_each_net_rcu(net)
+		netlink_clear_multicast_users(net->genl_sock, grp->id);
+	rcu_read_unlock();
+
 	clear_bit(grp->id, mc_groups);
 	list_del(&grp->list);
 	genl_ctrl_event(CTRL_CMD_DELMCAST_GRP, grp);
@@ -467,6 +492,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct genl_ops *ops;
 	struct genl_family *family;
+	struct net *net = sock_net(skb->sk);
 	struct genl_info info;
 	struct genlmsghdr *hdr = nlmsg_data(nlh);
 	int hdrlen, err;
@@ -475,6 +501,10 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if (family == NULL)
 		return -ENOENT;
 
+	/* this family doesn't exist in this netns */
+	if (!family->netnsok && !net_eq(net, &init_net))
+		return -ENOENT;
+
 	hdrlen = GENL_HDRLEN + family->hdrsize;
 	if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
 		return -EINVAL;
@@ -492,7 +522,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 			return -EOPNOTSUPP;
 
 		genl_unlock();
-		err = netlink_dump_start(genl_sock, skb, nlh,
+		err = netlink_dump_start(net->genl_sock, skb, nlh,
 					 ops->dumpit, ops->done);
 		genl_lock();
 		return err;
@@ -514,6 +544,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	info.genlhdr = nlmsg_data(nlh);
 	info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
 	info.attrs = family->attrbuf;
+	genl_info_net_set(&info, net);
 
 	return ops->doit(skb, &info);
 }
@@ -534,6 +565,7 @@ static struct genl_family genl_ctrl = {
 	.name = "nlctrl",
 	.version = 0x2,
 	.maxattr = CTRL_ATTR_MAX,
+	.netnsok = true,
 };
 
 static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
@@ -650,6 +682,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 
 	int i, n = 0;
 	struct genl_family *rt;
+	struct net *net = sock_net(skb->sk);
 	int chains_to_skip = cb->args[0];
 	int fams_to_skip = cb->args[1];
 
@@ -658,6 +691,8 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
 			continue;
 		n = 0;
 		list_for_each_entry(rt, genl_family_chain(i), family_list) {
+			if (!rt->netnsok && !net_eq(net, &init_net))
+				continue;
 			if (++n < fams_to_skip)
 				continue;
 			if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid,
@@ -729,6 +764,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[CTRL_ATTR_FAMILY_ID]) {
 		u16 id = nla_get_u16(info->attrs[CTRL_ATTR_FAMILY_ID]);
 		res = genl_family_find_byid(id);
+		err = -ENOENT;
 	}
 
 	if (info->attrs[CTRL_ATTR_FAMILY_NAME]) {
@@ -736,49 +772,61 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 
 		name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]);
 		res = genl_family_find_byname(name);
+		err = -ENOENT;
 	}
 
-	if (res == NULL) {
-		err = -ENOENT;
-		goto errout;
+	if (res == NULL)
+		return err;
+
+	if (!res->netnsok && !net_eq(genl_info_net(info), &init_net)) {
+		/* family doesn't exist here */
+		return -ENOENT;
 	}
 
 	msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq,
 				    CTRL_CMD_NEWFAMILY);
-	if (IS_ERR(msg)) {
-		err = PTR_ERR(msg);
-		goto errout;
-	}
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	err = genlmsg_reply(msg, info);
-errout:
-	return err;
+	return genlmsg_reply(msg, info);
 }
 
 static int genl_ctrl_event(int event, void *data)
 {
 	struct sk_buff *msg;
+	struct genl_family *family;
+	struct genl_multicast_group *grp;
 
-	if (genl_sock == NULL)
+	/* genl is still initialising */
+	if (!init_net.genl_sock)
 		return 0;
 
 	switch (event) {
 	case CTRL_CMD_NEWFAMILY:
 	case CTRL_CMD_DELFAMILY:
-		msg = ctrl_build_family_msg(data, 0, 0, event);
-		if (IS_ERR(msg))
-			return PTR_ERR(msg);
-
-		genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
+		family = data;
+		msg = ctrl_build_family_msg(family, 0, 0, event);
 		break;
 	case CTRL_CMD_NEWMCAST_GRP:
 	case CTRL_CMD_DELMCAST_GRP:
+		grp = data;
+		family = grp->family;
 		msg = ctrl_build_mcgrp_msg(data, 0, 0, event);
-		if (IS_ERR(msg))
-			return PTR_ERR(msg);
-
-		genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL);
 		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	if (!family->netnsok) {
+		genlmsg_multicast_netns(&init_net, msg, 0,
+					GENL_ID_CTRL, GFP_KERNEL);
+	} else {
+		rcu_read_lock();
+		genlmsg_multicast_allns(msg, 0, GENL_ID_CTRL, GFP_ATOMIC);
+		rcu_read_unlock();
 	}
 
 	return 0;
@@ -795,6 +843,33 @@ static struct genl_multicast_group notify_grp = {
 	.name		= "notify",
 };
 
+static int __net_init genl_pernet_init(struct net *net)
+{
+	/* we'll bump the group number right afterwards */
+	net->genl_sock = netlink_kernel_create(net, NETLINK_GENERIC, 0,
+					       genl_rcv, &genl_mutex,
+					       THIS_MODULE);
+
+	if (!net->genl_sock && net_eq(net, &init_net))
+		panic("GENL: Cannot initialize generic netlink\n");
+
+	if (!net->genl_sock)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __net_exit genl_pernet_exit(struct net *net)
+{
+	netlink_kernel_release(net->genl_sock);
+	net->genl_sock = NULL;
+}
+
+static struct pernet_operations genl_pernet_ops = {
+	.init = genl_pernet_init,
+	.exit = genl_pernet_exit,
+};
+
 static int __init genl_init(void)
 {
 	int i, err;
@@ -804,36 +879,67 @@ static int __init genl_init(void)
 
 	err = genl_register_family(&genl_ctrl);
 	if (err < 0)
-		goto errout;
+		goto problem;
 
 	err = genl_register_ops(&genl_ctrl, &genl_ctrl_ops);
 	if (err < 0)
-		goto errout_register;
+		goto problem;
 
 	netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
 
-	/* we'll bump the group number right afterwards */
-	genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC, 0,
-					  genl_rcv, &genl_mutex, THIS_MODULE);
-	if (genl_sock == NULL)
-		panic("GENL: Cannot initialize generic netlink\n");
+	err = register_pernet_subsys(&genl_pernet_ops);
+	if (err)
+		goto problem;
 
 	err = genl_register_mc_group(&genl_ctrl, &notify_grp);
 	if (err < 0)
-		goto errout_register;
+		goto problem;
 
 	return 0;
 
-errout_register:
-	genl_unregister_family(&genl_ctrl);
-errout:
+problem:
 	panic("GENL: Cannot register controller: %d\n", err);
 }
 
 subsys_initcall(genl_init);
 
-EXPORT_SYMBOL(genl_sock);
 EXPORT_SYMBOL(genl_register_ops);
 EXPORT_SYMBOL(genl_unregister_ops);
 EXPORT_SYMBOL(genl_register_family);
 EXPORT_SYMBOL(genl_unregister_family);
+
+static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group,
+			 gfp_t flags)
+{
+	struct sk_buff *tmp;
+	struct net *net, *prev = NULL;
+	int err;
+
+	for_each_net_rcu(net) {
+		if (prev) {
+			tmp = skb_clone(skb, flags);
+			if (!tmp) {
+				err = -ENOMEM;
+				goto error;
+			}
+			err = nlmsg_multicast(prev->genl_sock, tmp,
+					      pid, group, flags);
+			if (err)
+				goto error;
+		}
+
+		prev = net;
+	}
+
+	return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags);
+ error:
+	kfree_skb(skb);
+	return err;
+}
+
+int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group,
+			    gfp_t flags)
+{
+	return genlmsg_mcast(skb, pid, group, flags);
+}
+EXPORT_SYMBOL(genlmsg_multicast_allns);
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 3c57005e44d1..7bda8e3d1398 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 		rep_nlh = nlmsg_hdr(rep_buf);
 		memcpy(rep_nlh, req_nlh, hdr_space);
 		rep_nlh->nlmsg_len = rep_buf->len;
-		genlmsg_unicast(rep_buf, NETLINK_CB(skb).pid);
+		genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid);
 	}
 
 	return 0;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9deb12f73c44..2a04beba4369 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -413,7 +413,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
 
 	cfg80211_unlock_rdev(dev);
 
-	return genlmsg_unicast(msg, info->snd_pid);
+	return genlmsg_reply(msg, info);
 
  out_free:
 	nlmsg_free(msg);
@@ -739,7 +739,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
 	dev_put(netdev);
 	cfg80211_unlock_rdev(dev);
 
-	return genlmsg_unicast(msg, info->snd_pid);
+	return genlmsg_reply(msg, info);
 
  out_free:
 	nlmsg_free(msg);
@@ -1030,7 +1030,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 		goto nla_put_failure;
 
 	genlmsg_end(msg, hdr);
-	err = genlmsg_unicast(msg, info->snd_pid);
+	err = genlmsg_reply(msg, info);
 	goto out;
 
  nla_put_failure:
@@ -1618,7 +1618,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 				 dev, mac_addr, &sinfo) < 0)
 		goto out_free;
 
-	err = genlmsg_unicast(msg, info->snd_pid);
+	err = genlmsg_reply(msg, info);
 	goto out;
 
  out_free:
@@ -2087,7 +2087,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
 				 dev, dst, next_hop, &pinfo) < 0)
 		goto out_free;
 
-	err = genlmsg_unicast(msg, info->snd_pid);
+	err = genlmsg_reply(msg, info);
 	goto out;
 
  out_free:
@@ -2436,7 +2436,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 			cur_params.dot11MeshHWMPnetDiameterTraversalTime);
 	nla_nest_end(msg, pinfoattr);
 	genlmsg_end(msg, hdr);
-	err = genlmsg_unicast(msg, info->snd_pid);
+	err = genlmsg_reply(msg, info);
 	goto out;
 
  nla_put_failure:
@@ -2624,7 +2624,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 	nla_nest_end(msg, nl_reg_rules);
 
 	genlmsg_end(msg, hdr);
-	err = genlmsg_unicast(msg, info->snd_pid);
+	err = genlmsg_reply(msg, info);
 	goto out;
 
 nla_put_failure:
-- 
cgit v1.2.3


From 30ffee8480c13fbcf8ab6c28e31f79dfff683117 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 10 Jul 2009 09:51:35 +0000
Subject: net: move and export get_net_ns_by_pid

The function get_net_ns_by_pid(), to get a network
namespace from a pid_t, will be required in cfg80211
as well. Therefore, let's move it to net_namespace.c
and export it. We can't make it a static inline in
the !NETNS case because it needs to verify that the
given pid even exists (and return -ESRCH).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  2 ++
 net/core/net_namespace.c    | 21 +++++++++++++++++++++
 net/core/rtnetlink.c        | 21 +--------------------
 3 files changed, 24 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3173d12d946b..3882db1e263a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -108,6 +108,8 @@ static inline struct net *copy_net_ns(unsigned long flags, struct net *net_ns)
 
 extern struct list_head net_namespace_list;
 
+extern struct net *get_net_ns_by_pid(pid_t pid);
+
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5cd0b22e649d..ddd2cd2b1775 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -7,6 +7,7 @@
 #include <linux/sched.h>
 #include <linux/idr.h>
 #include <linux/rculist.h>
+#include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -201,6 +202,26 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 }
 #endif
 
+struct net *get_net_ns_by_pid(pid_t pid)
+{
+	struct task_struct *tsk;
+	struct net *net;
+
+	/* Lookup the network namespace */
+	net = ERR_PTR(-ESRCH);
+	rcu_read_lock();
+	tsk = find_task_by_vpid(pid);
+	if (tsk) {
+		struct nsproxy *nsproxy;
+		nsproxy = task_nsproxy(tsk);
+		if (nsproxy)
+			net = get_net(nsproxy->net_ns);
+	}
+	rcu_read_unlock();
+	return net;
+}
+EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d78030f88bd0..b44775f9f2bf 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -35,7 +35,6 @@
 #include <linux/security.h>
 #include <linux/mutex.h>
 #include <linux/if_addr.h>
-#include <linux/nsproxy.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -52,6 +51,7 @@
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
 #include <net/rtnetlink.h>
+#include <net/net_namespace.h>
 
 struct rtnl_link
 {
@@ -725,25 +725,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
-static struct net *get_net_ns_by_pid(pid_t pid)
-{
-	struct task_struct *tsk;
-	struct net *net;
-
-	/* Lookup the network namespace */
-	net = ERR_PTR(-ESRCH);
-	rcu_read_lock();
-	tsk = find_task_by_vpid(pid);
-	if (tsk) {
-		struct nsproxy *nsproxy;
-		nsproxy = task_nsproxy(tsk);
-		if (nsproxy)
-			net = get_net(nsproxy->net_ns);
-	}
-	rcu_read_unlock();
-	return net;
-}
-
 static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 {
 	if (dev) {
-- 
cgit v1.2.3


From d7ca4cc01fd154f2da30ae6dae160fa5800af758 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 9 Jul 2009 08:09:47 +0000
Subject: udpv4: Handle large incoming UDP/IPv4 packets and support software
 UFO.

- validate and forward GSO UDP/IPv4 packets from untrusted sources.
- do software UFO if the outgoing device doesn't support UFO.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/udp.h  |  3 +++
 net/ipv4/af_inet.c | 12 ++++++++++-
 net/ipv4/udp.c     | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/udp.h b/include/net/udp.h
index 90e6ce56be65..5fb029f817a3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -207,4 +207,7 @@ extern void udp4_proc_exit(void);
 #endif
 
 extern void udp_init(void);
+
+extern int udp4_ufo_send_check(struct sk_buff *skb);
+extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features);
 #endif	/* _UDP_H */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 566ea6c4321d..197d024b2536 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1187,6 +1187,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	int proto;
 	int ihl;
 	int id;
+	unsigned int offset = 0;
 
 	if (!(features & NETIF_F_V4_CSUM))
 		features &= ~NETIF_F_SG;
@@ -1229,7 +1230,14 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 	skb = segs;
 	do {
 		iph = ip_hdr(skb);
-		iph->id = htons(id++);
+		if (proto == IPPROTO_UDP) {
+			iph->id = htons(id);
+			iph->frag_off = htons(offset >> 3);
+			if (skb->next != NULL)
+				iph->frag_off |= htons(IP_MF);
+			offset += (skb->len - skb->mac_len - iph->ihl * 4);
+		} else
+			iph->id = htons(id++);
 		iph->tot_len = htons(skb->len - skb->mac_len);
 		iph->check = 0;
 		iph->check = ip_fast_csum(skb_network_header(skb), iph->ihl);
@@ -1425,6 +1433,8 @@ static struct net_protocol tcp_protocol = {
 static struct net_protocol udp_protocol = {
 	.handler =	udp_rcv,
 	.err_handler =	udp_err,
+	.gso_send_check = udp4_ufo_send_check,
+	.gso_segment = udp4_ufo_fragment,
 	.no_policy =	1,
 	.netns_ok =	1,
 };
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 80e3812837ad..7bc2d082a49e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1816,6 +1816,67 @@ void __init udp_init(void)
 	sysctl_udp_wmem_min = SK_MEM_QUANTUM;
 }
 
+int udp4_ufo_send_check(struct sk_buff *skb)
+{
+	const struct iphdr *iph;
+	struct udphdr *uh;
+
+	if (!pskb_may_pull(skb, sizeof(*uh)))
+		return -EINVAL;
+
+	iph = ip_hdr(skb);
+	uh = udp_hdr(skb);
+
+	uh->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
+				       IPPROTO_UDP, 0);
+	skb->csum_start = skb_transport_header(skb) - skb->head;
+	skb->csum_offset = offsetof(struct udphdr, check);
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	return 0;
+}
+
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, int features)
+{
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	unsigned int mss;
+	int offset;
+	__wsum csum;
+
+	mss = skb_shinfo(skb)->gso_size;
+	if (unlikely(skb->len <= mss))
+		goto out;
+
+	if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+		/* Packet is from an untrusted source, reset gso_segs. */
+		int type = skb_shinfo(skb)->gso_type;
+
+		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY) ||
+			     !(type & (SKB_GSO_UDP))))
+			goto out;
+
+		skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss);
+
+		segs = NULL;
+		goto out;
+	}
+
+	/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
+	 * do checksum of UDP packets sent as multiple IP fragments.
+	 */
+	offset = skb->csum_start - skb_headroom(skb);
+	csum = skb_checksum(skb, offset, skb->len- offset, 0);
+	offset += skb->csum_offset;
+	*(__sum16 *)(skb->data + offset) = csum_fold(csum);
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* Fragment the skb. IP headers of the fragments are updated in
+	 * inet_gso_segment()
+	 */
+	segs = skb_segment(skb, features);
+out:
+	return segs;
+}
+
 EXPORT_SYMBOL(udp_disconnect);
 EXPORT_SYMBOL(udp_ioctl);
 EXPORT_SYMBOL(udp_prot);
-- 
cgit v1.2.3


From 7ea2f2c5a66e4e9a8d96296ac47ad895c467ee1d Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 9 Jul 2009 08:10:01 +0000
Subject: udpv6: Remove unused skb argument of ipv6_select_ident()

- move ipv6_select_ident() inline function to ipv6.h and remove the unused
  skb argument

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    | 12 ++++++++++++
 net/ipv6/ip6_output.c | 18 +++---------------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index f27fd83d67d8..ad9a51130254 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -441,6 +441,18 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
+static __inline__ void ipv6_select_ident(struct frag_hdr *fhdr)
+{
+	static u32 ipv6_fragmentation_id = 1;
+	static DEFINE_SPINLOCK(ip6_id_lock);
+
+	spin_lock_bh(&ip6_id_lock);
+	fhdr->identification = htonl(ipv6_fragmentation_id);
+	if (++ipv6_fragmentation_id == 0)
+		ipv6_fragmentation_id = 1;
+	spin_unlock_bh(&ip6_id_lock);
+}
+
 /*
  *	Prototypes exported by ipv6
  */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1c6f0fc43690..dd1a980b8ac9 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -57,18 +57,6 @@
 
 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
-static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
-{
-	static u32 ipv6_fragmentation_id = 1;
-	static DEFINE_SPINLOCK(ip6_id_lock);
-
-	spin_lock_bh(&ip6_id_lock);
-	fhdr->identification = htonl(ipv6_fragmentation_id);
-	if (++ipv6_fragmentation_id == 0)
-		ipv6_fragmentation_id = 1;
-	spin_unlock_bh(&ip6_id_lock);
-}
-
 int __ip6_local_out(struct sk_buff *skb)
 {
 	int len;
@@ -706,7 +694,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 		skb_reset_network_header(skb);
 		memcpy(skb_network_header(skb), tmp_hdr, hlen);
 
-		ipv6_select_ident(skb, fh);
+		ipv6_select_ident(fh);
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		fh->frag_off = htons(IP6_MF);
@@ -844,7 +832,7 @@ slow_path:
 		fh->nexthdr = nexthdr;
 		fh->reserved = 0;
 		if (!frag_id) {
-			ipv6_select_ident(skb, fh);
+			ipv6_select_ident(fh);
 			frag_id = fh->identification;
 		} else
 			fh->identification = frag_id;
@@ -1093,7 +1081,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,
 		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
 					     sizeof(struct frag_hdr)) & ~7;
 		skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
-		ipv6_select_ident(skb, &fhdr);
+		ipv6_select_ident(&fhdr);
 		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
 		__skb_queue_tail(&sk->sk_write_queue, skb);
 
-- 
cgit v1.2.3


From b3a633c8527ef155b1a4e22e8f5abc58f7af54c9 Mon Sep 17 00:00:00 2001
From: Julien Tinnes <jt@cr0.org>
Date: Fri, 10 Jul 2009 10:46:30 -0700
Subject: personality handling: fix PER_CLEAR_ON_SETID for security reasons

We have found that the current PER_CLEAR_ON_SETID mask on Linux
doesn't include neither ADDR_COMPAT_LAYOUT, nor MMAP_PAGE_ZERO.

The current mask is READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE.

We believe it is important to add MMAP_PAGE_ZERO, because by using
this personality it is possible to have the first page mapped inside a
process running as setuid root. This could be used in those scenarios:

- Exploiting a NULL pointer dereference issue in a setuid root binary
- Bypassing the mmap_min_addr restrictions of the Linux kernel: by
running a setuid binary that would drop privileges before giving us
control back (for instance by loading a user-supplied library), we
could get the first page mapped in a process we control. By further
using mremap and mprotect on this mapping, we can then completely
bypass the mmap_min_addr restrictions.

Less importantly, we believe ADDR_COMPAT_LAYOUT should also be added
since on x86 32bits it will in practice disable most of the address
space layout randomization (only the stack will remain randomized).

Signed-off-by: Julien Tinnes <jt@cr0.org>
Signed-off-by: Tavis Ormandy <taviso@sdf.lonestar.org>
Acked-by: Christoph Hellwig <hch@infradead.org>
Acked-by: Kees Cook <kees.cook@canonical.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/personality.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/personality.h b/include/linux/personality.h
index a84e9ff9b27e..b7f578dac544 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -40,7 +40,11 @@ enum {
  * Security-relevant compatibility flags that must be
  * cleared upon setuid or setgid exec:
  */
-#define PER_CLEAR_ON_SETID (READ_IMPLIES_EXEC|ADDR_NO_RANDOMIZE)
+#define PER_CLEAR_ON_SETID \
+	(READ_IMPLIES_EXEC | \
+	 ADDR_NO_RANDOMIZE | \
+	 ADDR_COMPAT_LAYOUT | \
+	 MMAP_PAGE_ZERO)
 
 /*
  * Personality types.
-- 
cgit v1.2.3


From 1a74826fa1cd6c2e382f927403b4440675f0f55a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 23 Jun 2009 15:58:48 -0700
Subject: Revert "USB: Add Intel Langwell USB OTG Transceiver Drive"

This reverts commit 453f77558810ffa669ed5a510a7173ec49def396.

The driver should not have been accepted as the MSRT code is not
in the main kernel yet, which this depends on.

Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Hao Wu <hao.wu@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/otg/Kconfig          |   14 -
 drivers/usb/otg/Makefile         |    1 -
 drivers/usb/otg/langwell_otg.c   | 1915 --------------------------------------
 include/linux/usb/langwell_otg.h |  177 ----
 4 files changed, 2107 deletions(-)
 delete mode 100644 drivers/usb/otg/langwell_otg.c
 delete mode 100644 include/linux/usb/langwell_otg.h

(limited to 'include')

diff --git a/drivers/usb/otg/Kconfig b/drivers/usb/otg/Kconfig
index 69feeec1628c..aa884d072f0b 100644
--- a/drivers/usb/otg/Kconfig
+++ b/drivers/usb/otg/Kconfig
@@ -59,18 +59,4 @@ config NOP_USB_XCEIV
 	 built-in with usb ip or which are autonomous and doesn't require any
 	 phy programming such as ISP1x04 etc.
 
-config USB_LANGWELL_OTG
-	tristate "Intel Langwell USB OTG dual-role support"
-	depends on USB && MRST
-	select USB_OTG
-	select USB_OTG_UTILS
-	help
-	  Say Y here if you want to build Intel Langwell USB OTG
-	  transciever driver in kernel. This driver implements role
-	  switch between EHCI host driver and Langwell USB OTG
-	  client driver.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called langwell_otg.
-
 endif # USB || OTG
diff --git a/drivers/usb/otg/Makefile b/drivers/usb/otg/Makefile
index 6d1abdd3c0ac..208167856529 100644
--- a/drivers/usb/otg/Makefile
+++ b/drivers/usb/otg/Makefile
@@ -9,7 +9,6 @@ obj-$(CONFIG_USB_OTG_UTILS)	+= otg.o
 obj-$(CONFIG_USB_GPIO_VBUS)	+= gpio_vbus.o
 obj-$(CONFIG_ISP1301_OMAP)	+= isp1301_omap.o
 obj-$(CONFIG_TWL4030_USB)	+= twl4030-usb.o
-obj-$(CONFIG_USB_LANGWELL_OTG)	+= langwell_otg.o
 obj-$(CONFIG_NOP_USB_XCEIV)	+= nop-usb-xceiv.o
 
 ccflags-$(CONFIG_USB_DEBUG)	+= -DDEBUG
diff --git a/drivers/usb/otg/langwell_otg.c b/drivers/usb/otg/langwell_otg.c
deleted file mode 100644
index 6f628d0e9f39..000000000000
--- a/drivers/usb/otg/langwell_otg.c
+++ /dev/null
@@ -1,1915 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008 - 2009, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-/* This driver helps to switch Langwell OTG controller function between host
- * and peripheral. It works with EHCI driver and Langwell client controller
- * driver together.
- */
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/device.h>
-#include <linux/moduleparam.h>
-#include <linux/usb/ch9.h>
-#include <linux/usb/gadget.h>
-#include <linux/usb.h>
-#include <linux/usb/otg.h>
-#include <linux/notifier.h>
-#include <asm/ipc_defs.h>
-#include <linux/delay.h>
-#include "../core/hcd.h"
-
-#include <linux/usb/langwell_otg.h>
-
-#define	DRIVER_DESC		"Intel Langwell USB OTG transceiver driver"
-#define	DRIVER_VERSION		"3.0.0.32L.0002"
-
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_AUTHOR("Henry Yuan <hang.yuan@intel.com>, Hao Wu <hao.wu@intel.com>");
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL");
-
-static const char driver_name[] = "langwell_otg";
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-			const struct pci_device_id *id);
-static void langwell_otg_remove(struct pci_dev *pdev);
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message);
-static int langwell_otg_resume(struct pci_dev *pdev);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-				struct usb_bus *host);
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-				struct usb_gadget *gadget);
-static int langwell_otg_start_srp(struct otg_transceiver *otg);
-
-static const struct pci_device_id pci_ids[] = {{
-	.class =        ((PCI_CLASS_SERIAL_USB << 8) | 0xfe),
-	.class_mask =   ~0,
-	.vendor =	0x8086,
-	.device =	0x0811,
-	.subvendor =	PCI_ANY_ID,
-	.subdevice =	PCI_ANY_ID,
-}, { /* end: all zeroes */ }
-};
-
-static struct pci_driver otg_pci_driver = {
-	.name =		(char *) driver_name,
-	.id_table =	pci_ids,
-
-	.probe =	langwell_otg_probe,
-	.remove =	langwell_otg_remove,
-
-	.suspend =	langwell_otg_suspend,
-	.resume =	langwell_otg_resume,
-};
-
-static const char *state_string(enum usb_otg_state state)
-{
-	switch (state) {
-	case OTG_STATE_A_IDLE:
-		return "a_idle";
-	case OTG_STATE_A_WAIT_VRISE:
-		return "a_wait_vrise";
-	case OTG_STATE_A_WAIT_BCON:
-		return "a_wait_bcon";
-	case OTG_STATE_A_HOST:
-		return "a_host";
-	case OTG_STATE_A_SUSPEND:
-		return "a_suspend";
-	case OTG_STATE_A_PERIPHERAL:
-		return "a_peripheral";
-	case OTG_STATE_A_WAIT_VFALL:
-		return "a_wait_vfall";
-	case OTG_STATE_A_VBUS_ERR:
-		return "a_vbus_err";
-	case OTG_STATE_B_IDLE:
-		return "b_idle";
-	case OTG_STATE_B_SRP_INIT:
-		return "b_srp_init";
-	case OTG_STATE_B_PERIPHERAL:
-		return "b_peripheral";
-	case OTG_STATE_B_WAIT_ACON:
-		return "b_wait_acon";
-	case OTG_STATE_B_HOST:
-		return "b_host";
-	default:
-		return "UNDEFINED";
-	}
-}
-
-/* HSM timers */
-static inline struct langwell_otg_timer *otg_timer_initializer
-(void (*function)(unsigned long), unsigned long expires, unsigned long data)
-{
-	struct langwell_otg_timer *timer;
-	timer = kmalloc(sizeof(struct langwell_otg_timer), GFP_KERNEL);
-	timer->function = function;
-	timer->expires = expires;
-	timer->data = data;
-	return timer;
-}
-
-static struct langwell_otg_timer *a_wait_vrise_tmr, *a_wait_bcon_tmr,
-	*a_aidl_bdis_tmr, *b_ase0_brst_tmr, *b_se0_srp_tmr, *b_srp_res_tmr,
-	*b_bus_suspend_tmr;
-
-static struct list_head active_timers;
-
-static struct langwell_otg *the_transceiver;
-
-/* host/client notify transceiver when event affects HNP state */
-void langwell_update_transceiver()
-{
-	otg_dbg("transceiver driver is notified\n");
-	queue_work(the_transceiver->qwork, &the_transceiver->work);
-}
-EXPORT_SYMBOL(langwell_update_transceiver);
-
-static int langwell_otg_set_host(struct otg_transceiver *otg,
-					struct usb_bus *host)
-{
-	otg->host = host;
-
-	return 0;
-}
-
-static int langwell_otg_set_peripheral(struct otg_transceiver *otg,
-					struct usb_gadget *gadget)
-{
-	otg->gadget = gadget;
-
-	return 0;
-}
-
-static int langwell_otg_set_power(struct otg_transceiver *otg,
-				unsigned mA)
-{
-	return 0;
-}
-
-/* A-device drives vbus, controlled through PMIC CHRGCNTL register*/
-static void langwell_otg_drv_vbus(int on)
-{
-	struct ipc_pmic_reg_data	pmic_data = {0};
-	struct ipc_pmic_reg_data	battery_data;
-
-	/* Check if battery is attached or not */
-	battery_data.pmic_reg_data[0].register_address = 0xd2;
-	battery_data.ioc = 0;
-	battery_data.num_entries = 1;
-	if (ipc_pmic_register_read(&battery_data)) {
-		otg_dbg("Failed to read PMIC register 0xd2.\n");
-		return;
-	}
-
-	if ((battery_data.pmic_reg_data[0].value & 0x20) == 0) {
-		otg_dbg("no battery attached\n");
-		return;
-	}
-
-	/* Workaround for battery attachment issue */
-	if (battery_data.pmic_reg_data[0].value == 0x34) {
-		otg_dbg("battery \n");
-		return;
-	}
-
-	otg_dbg("battery attached\n");
-
-	pmic_data.ioc = 0;
-	pmic_data.pmic_reg_data[0].register_address = 0xD4;
-	pmic_data.num_entries = 1;
-	if (on)
-		pmic_data.pmic_reg_data[0].value = 0x20;
-	else
-		pmic_data.pmic_reg_data[0].value = 0xc0;
-
-	if (ipc_pmic_register_write(&pmic_data, TRUE))
-		otg_dbg("Failed to write PMIC.\n");
-
-}
-
-/* charge vbus or discharge vbus through a resistor to ground */
-static void langwell_otg_chrg_vbus(int on)
-{
-
-	u32	val;
-
-	val = readl(the_transceiver->regs + CI_OTGSC);
-
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VC,
-				the_transceiver->regs + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_VD,
-				the_transceiver->regs + CI_OTGSC);
-
-}
-
-/* Start SRP */
-static int langwell_otg_start_srp(struct otg_transceiver *otg)
-{
-	u32	val;
-
-	otg_dbg("Start SRP ->\n");
-
-	val = readl(the_transceiver->regs + CI_OTGSC);
-
-	writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HADP,
-		the_transceiver->regs + CI_OTGSC);
-
-	/* Check if the data plus is finished or not */
-	msleep(8);
-	val = readl(the_transceiver->regs + CI_OTGSC);
-	if (val & (OTGSC_HADP | OTGSC_DP))
-		otg_dbg("DataLine SRP Error\n");
-
-	/* FIXME: VBus SRP */
-
-	return 0;
-}
-
-
-/* stop SOF via bus_suspend */
-static void langwell_otg_loc_sof(int on)
-{
-	struct usb_hcd	*hcd;
-	int		err;
-
-	otg_dbg("loc_sof -> %d\n", on);
-
-	hcd = bus_to_hcd(the_transceiver->otg.host);
-	if (on)
-		err = hcd->driver->bus_resume(hcd);
-	else
-		err = hcd->driver->bus_suspend(hcd);
-
-	if (err)
-		otg_dbg("Failed to resume/suspend bus - %d\n", err);
-}
-
-static void langwell_otg_phy_low_power(int on)
-{
-	u32	val;
-
-	otg_dbg("phy low power mode-> %d\n", on);
-
-	val = readl(the_transceiver->regs + CI_HOSTPC1);
-	if (on)
-		writel(val | HOSTPC1_PHCD, the_transceiver->regs + CI_HOSTPC1);
-	else
-		writel(val & ~HOSTPC1_PHCD, the_transceiver->regs + CI_HOSTPC1);
-}
-
-/* Enable/Disable OTG interrupt */
-static void langwell_otg_intr(int on)
-{
-	u32 val;
-
-	otg_dbg("interrupt -> %d\n", on);
-
-	val = readl(the_transceiver->regs + CI_OTGSC);
-	if (on) {
-		val = val | (OTGSC_INTEN_MASK | OTGSC_IDPU);
-		writel(val, the_transceiver->regs + CI_OTGSC);
-	} else {
-		val = val & ~(OTGSC_INTEN_MASK | OTGSC_IDPU);
-		writel(val, the_transceiver->regs + CI_OTGSC);
-	}
-}
-
-/* set HAAR: Hardware Assist Auto-Reset */
-static void langwell_otg_HAAR(int on)
-{
-	u32	val;
-
-	otg_dbg("HAAR -> %d\n", on);
-
-	val = readl(the_transceiver->regs + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HAAR,
-				the_transceiver->regs + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HAAR,
-				the_transceiver->regs + CI_OTGSC);
-}
-
-/* set HABA: Hardware Assist B-Disconnect to A-Connect */
-static void langwell_otg_HABA(int on)
-{
-	u32	val;
-
-	otg_dbg("HABA -> %d\n", on);
-
-	val = readl(the_transceiver->regs + CI_OTGSC);
-	if (on)
-		writel((val & ~OTGSC_INTSTS_MASK) | OTGSC_HABA,
-				the_transceiver->regs + CI_OTGSC);
-	else
-		writel((val & ~OTGSC_INTSTS_MASK) & ~OTGSC_HABA,
-				the_transceiver->regs + CI_OTGSC);
-}
-
-static int langwell_otg_check_se0_srp(int on)
-{
-	u32 val;
-
-	int delay_time = TB_SE0_SRP * 10; /* step is 100us */
-
-	otg_dbg("check_se0_srp -> \n");
-
-	do {
-		udelay(100);
-		if (!delay_time--)
-			break;
-		val = readl(the_transceiver->regs + CI_PORTSC1);
-		val &= PORTSC_LS;
-	} while (!val);
-
-	otg_dbg("check_se0_srp <- \n");
-	return val;
-}
-
-/* The timeout callback function to set time out bit */
-static void set_tmout(unsigned long indicator)
-{
-	*(int *)indicator = 1;
-}
-
-void langwell_otg_nsf_msg(unsigned long indicator)
-{
-	switch (indicator) {
-	case 2:
-	case 4:
-	case 6:
-	case 7:
-		printk(KERN_ERR "OTG:NSF-%lu - deivce not responding\n",
-				indicator);
-		break;
-	case 3:
-		printk(KERN_ERR "OTG:NSF-%lu - deivce not supported\n",
-				indicator);
-		break;
-	default:
-		printk(KERN_ERR "Do not have this kind of NSF\n");
-		break;
-	}
-}
-
-/* Initialize timers */
-static void langwell_otg_init_timers(struct otg_hsm *hsm)
-{
-	/* HSM used timers */
-	a_wait_vrise_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_VRISE,
-				(unsigned long)&hsm->a_wait_vrise_tmout);
-	a_wait_bcon_tmr = otg_timer_initializer(&set_tmout, TA_WAIT_BCON,
-				(unsigned long)&hsm->a_wait_bcon_tmout);
-	a_aidl_bdis_tmr = otg_timer_initializer(&set_tmout, TA_AIDL_BDIS,
-				(unsigned long)&hsm->a_aidl_bdis_tmout);
-	b_ase0_brst_tmr = otg_timer_initializer(&set_tmout, TB_ASE0_BRST,
-				(unsigned long)&hsm->b_ase0_brst_tmout);
-	b_se0_srp_tmr = otg_timer_initializer(&set_tmout, TB_SE0_SRP,
-				(unsigned long)&hsm->b_se0_srp);
-	b_srp_res_tmr = otg_timer_initializer(&set_tmout, TB_SRP_RES,
-				(unsigned long)&hsm->b_srp_res_tmout);
-	b_bus_suspend_tmr = otg_timer_initializer(&set_tmout, TB_BUS_SUSPEND,
-				(unsigned long)&hsm->b_bus_suspend_tmout);
-}
-
-/* Free timers */
-static void langwell_otg_free_timers(void)
-{
-	kfree(a_wait_vrise_tmr);
-	kfree(a_wait_bcon_tmr);
-	kfree(a_aidl_bdis_tmr);
-	kfree(b_ase0_brst_tmr);
-	kfree(b_se0_srp_tmr);
-	kfree(b_srp_res_tmr);
-	kfree(b_bus_suspend_tmr);
-}
-
-/* Add timer to timer list */
-static void langwell_otg_add_timer(void *gtimer)
-{
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer;
-	u32	val32;
-
-	/* Check if the timer is already in the active list,
-	 * if so update timer count
-	 */
-	list_for_each_entry(tmp_timer, &active_timers, list)
-		if (tmp_timer == timer) {
-			timer->count = timer->expires;
-			return;
-		}
-	timer->count = timer->expires;
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(the_transceiver->regs + CI_OTGSC);
-		writel(val32 | OTGSC_1MSE, the_transceiver->regs + CI_OTGSC);
-	}
-
-	list_add_tail(&timer->list, &active_timers);
-}
-
-/* Remove timer from the timer list; clear timeout status */
-static void langwell_otg_del_timer(void *gtimer)
-{
-	struct langwell_otg_timer *timer = (struct langwell_otg_timer *)gtimer;
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	u32 val32;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list)
-		if (tmp_timer == timer)
-			list_del(&timer->list);
-
-	if (list_empty(&active_timers)) {
-		val32 = readl(the_transceiver->regs + CI_OTGSC);
-		writel(val32 & ~OTGSC_1MSE, the_transceiver->regs + CI_OTGSC);
-	}
-}
-
-/* Reduce timer count by 1, and find timeout conditions.*/
-static int langwell_otg_tick_timer(u32 *int_sts)
-{
-	struct langwell_otg_timer *tmp_timer, *del_tmp;
-	int expired = 0;
-
-	list_for_each_entry_safe(tmp_timer, del_tmp, &active_timers, list) {
-		tmp_timer->count--;
-		/* check if timer expires */
-		if (!tmp_timer->count) {
-			list_del(&tmp_timer->list);
-			tmp_timer->function(tmp_timer->data);
-			expired = 1;
-		}
-	}
-
-	if (list_empty(&active_timers)) {
-		otg_dbg("tick timer: disable 1ms int\n");
-		*int_sts = *int_sts & ~OTGSC_1MSE;
-	}
-	return expired;
-}
-
-static void reset_otg(void)
-{
-	u32	val;
-	int	delay_time = 1000;
-
-	otg_dbg("reseting OTG controller ...\n");
-	val = readl(the_transceiver->regs + CI_USBCMD);
-	writel(val | USBCMD_RST, the_transceiver->regs + CI_USBCMD);
-	do {
-		udelay(100);
-		if (!delay_time--)
-			otg_dbg("reset timeout\n");
-		val = readl(the_transceiver->regs + CI_USBCMD);
-		val &= USBCMD_RST;
-	} while (val != 0);
-	otg_dbg("reset done.\n");
-}
-
-static void set_host_mode(void)
-{
-	u32 	val;
-
-	reset_otg();
-	val = readl(the_transceiver->regs + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_HOST;
-	writel(val, the_transceiver->regs + CI_USBMODE);
-}
-
-static void set_client_mode(void)
-{
-	u32 	val;
-
-	reset_otg();
-	val = readl(the_transceiver->regs + CI_USBMODE);
-	val = (val & (~USBMODE_CM)) | USBMODE_DEVICE;
-	writel(val, the_transceiver->regs + CI_USBMODE);
-}
-
-static void init_hsm(void)
-{
-	struct langwell_otg	*langwell = the_transceiver;
-	u32			val32;
-
-	/* read OTGSC after reset */
-	val32 = readl(langwell->regs + CI_OTGSC);
-	otg_dbg("%s: OTGSC init value = 0x%x\n", __func__, val32);
-
-	/* set init state */
-	if (val32 & OTGSC_ID) {
-		langwell->hsm.id = 1;
-		langwell->otg.default_a = 0;
-		set_client_mode();
-		langwell->otg.state = OTG_STATE_B_IDLE;
-		langwell_otg_drv_vbus(0);
-	} else {
-		langwell->hsm.id = 0;
-		langwell->otg.default_a = 1;
-		set_host_mode();
-		langwell->otg.state = OTG_STATE_A_IDLE;
-	}
-
-	/* set session indicator */
-	if (val32 & OTGSC_BSE)
-		langwell->hsm.b_sess_end = 1;
-	if (val32 & OTGSC_BSV)
-		langwell->hsm.b_sess_vld = 1;
-	if (val32 & OTGSC_ASV)
-		langwell->hsm.a_sess_vld = 1;
-	if (val32 & OTGSC_AVV)
-		langwell->hsm.a_vbus_vld = 1;
-
-	/* defautly power the bus */
-	langwell->hsm.a_bus_req = 1;
-	langwell->hsm.a_bus_drop = 0;
-	/* defautly don't request bus as B device */
-	langwell->hsm.b_bus_req = 0;
-	/* no system error */
-	langwell->hsm.a_clr_err = 0;
-}
-
-static irqreturn_t otg_dummy_irq(int irq, void *_dev)
-{
-	void __iomem	*reg_base = _dev;
-	u32	val;
-	u32	int_mask = 0;
-
-	val = readl(reg_base + CI_USBMODE);
-	if ((val & USBMODE_CM) != USBMODE_DEVICE)
-		return IRQ_NONE;
-
-	val = readl(reg_base + CI_USBSTS);
-	int_mask = val & INTR_DUMMY_MASK;
-
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	/* clear hsm.b_conn here since host driver can't detect it
-	*  otg_dummy_irq called means B-disconnect happened.
-	*/
-	if (the_transceiver->hsm.b_conn) {
-		the_transceiver->hsm.b_conn = 0;
-		if (spin_trylock(&the_transceiver->wq_lock)) {
-			queue_work(the_transceiver->qwork,
-				&the_transceiver->work);
-			spin_unlock(&the_transceiver->wq_lock);
-		}
-	}
-	/* Clear interrupts */
-	writel(int_mask, reg_base + CI_USBSTS);
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t otg_irq(int irq, void *_dev)
-{
-	struct	langwell_otg *langwell = _dev;
-	u32	int_sts, int_en;
-	u32	int_mask = 0;
-	int	flag = 0;
-
-	int_sts = readl(langwell->regs + CI_OTGSC);
-	int_en = (int_sts & OTGSC_INTEN_MASK) >> 8;
-	int_mask = int_sts & int_en;
-	if (int_mask == 0)
-		return IRQ_NONE;
-
-	if (int_mask & OTGSC_IDIS) {
-		otg_dbg("%s: id change int\n", __func__);
-		langwell->hsm.id = (int_sts & OTGSC_ID) ? 1 : 0;
-		flag = 1;
-	}
-	if (int_mask & OTGSC_DPIS) {
-		otg_dbg("%s: data pulse int\n", __func__);
-		langwell->hsm.a_srp_det = (int_sts & OTGSC_DPS) ? 1 : 0;
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSEIS) {
-		otg_dbg("%s: b session end int\n", __func__);
-		langwell->hsm.b_sess_end = (int_sts & OTGSC_BSE) ? 1 : 0;
-		flag = 1;
-	}
-	if (int_mask & OTGSC_BSVIS) {
-		otg_dbg("%s: b session valid int\n", __func__);
-		langwell->hsm.b_sess_vld = (int_sts & OTGSC_BSV) ? 1 : 0;
-		flag = 1;
-	}
-	if (int_mask & OTGSC_ASVIS) {
-		otg_dbg("%s: a session valid int\n", __func__);
-		langwell->hsm.a_sess_vld = (int_sts & OTGSC_ASV) ? 1 : 0;
-		flag = 1;
-	}
-	if (int_mask & OTGSC_AVVIS) {
-		otg_dbg("%s: a vbus valid int\n", __func__);
-		langwell->hsm.a_vbus_vld = (int_sts & OTGSC_AVV) ? 1 : 0;
-		flag = 1;
-	}
-
-	if (int_mask & OTGSC_1MSS) {
-		/* need to schedule otg_work if any timer is expired */
-		if (langwell_otg_tick_timer(&int_sts))
-			flag = 1;
-	}
-
-	writel((int_sts & ~OTGSC_INTSTS_MASK) | int_mask,
-			langwell->regs + CI_OTGSC);
-	if (flag)
-		queue_work(langwell->qwork, &langwell->work);
-
-	return IRQ_HANDLED;
-}
-
-static void langwell_otg_work(struct work_struct *work)
-{
-	struct langwell_otg *langwell = container_of(work,
-					struct langwell_otg, work);
-	int	retval;
-
-	otg_dbg("%s: old state = %s\n", __func__,
-			state_string(langwell->otg.state));
-
-	switch (langwell->otg.state) {
-	case OTG_STATE_UNDEFINED:
-	case OTG_STATE_B_IDLE:
-		if (!langwell->hsm.id) {
-			langwell_otg_del_timer(b_srp_res_tmr);
-			langwell->otg.default_a = 1;
-			langwell->hsm.a_srp_det = 0;
-
-			langwell_otg_chrg_vbus(0);
-			langwell_otg_drv_vbus(0);
-
-			set_host_mode();
-			langwell->otg.state = OTG_STATE_A_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.b_srp_res_tmout) {
-			langwell->hsm.b_srp_res_tmout = 0;
-			langwell->hsm.b_bus_req = 0;
-			langwell_otg_nsf_msg(6);
-		} else if (langwell->hsm.b_sess_vld) {
-			langwell_otg_del_timer(b_srp_res_tmr);
-			langwell->hsm.b_sess_end = 0;
-			langwell->hsm.a_bus_suspend = 0;
-
-			langwell_otg_chrg_vbus(0);
-			if (langwell->client_ops) {
-				langwell->client_ops->resume(langwell->pdev);
-				langwell->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				otg_dbg("client driver not loaded.\n");
-
-		} else if (langwell->hsm.b_bus_req &&
-				(langwell->hsm.b_sess_end)) {
-			/* workaround for b_se0_srp detection */
-			retval = langwell_otg_check_se0_srp(0);
-			if (retval) {
-				langwell->hsm.b_bus_req = 0;
-				otg_dbg("LS is not SE0, try again later\n");
-			} else {
-				/* Start SRP */
-				langwell_otg_start_srp(&langwell->otg);
-				langwell_otg_add_timer(b_srp_res_tmr);
-			}
-		}
-		break;
-	case OTG_STATE_B_SRP_INIT:
-		if (!langwell->hsm.id) {
-			langwell->otg.default_a = 1;
-			langwell->hsm.a_srp_det = 0;
-
-			langwell_otg_drv_vbus(0);
-			langwell_otg_chrg_vbus(0);
-
-			langwell->otg.state = OTG_STATE_A_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.b_sess_vld) {
-			langwell_otg_chrg_vbus(0);
-			if (langwell->client_ops) {
-				langwell->client_ops->resume(langwell->pdev);
-				langwell->otg.state = OTG_STATE_B_PERIPHERAL;
-			} else
-				otg_dbg("client driver not loaded.\n");
-		}
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (!langwell->hsm.id) {
-			langwell->otg.default_a = 1;
-			langwell->hsm.a_srp_det = 0;
-
-			langwell_otg_drv_vbus(0);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-
-			if (langwell->client_ops) {
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			} else
-				otg_dbg("client driver has been removed.\n");
-
-			langwell->otg.state = OTG_STATE_A_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.b_sess_vld) {
-			langwell->hsm.b_hnp_enable = 0;
-
-			if (langwell->client_ops) {
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			} else
-				otg_dbg("client driver has been removed.\n");
-
-			langwell->otg.state = OTG_STATE_B_IDLE;
-		} else if (langwell->hsm.b_bus_req && langwell->hsm.b_hnp_enable
-			&& langwell->hsm.a_bus_suspend) {
-
-			if (langwell->client_ops) {
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			} else
-				otg_dbg("client driver has been removed.\n");
-
-			langwell_otg_HAAR(1);
-			langwell->hsm.a_conn = 0;
-
-			if (langwell->host_ops) {
-				langwell->host_ops->probe(langwell->pdev,
-					langwell->host_ops->id_table);
-				langwell->otg.state = OTG_STATE_B_WAIT_ACON;
-			} else
-				otg_dbg("host driver not loaded.\n");
-
-			langwell->hsm.a_bus_resume = 0;
-			langwell->hsm.b_ase0_brst_tmout = 0;
-			langwell_otg_add_timer(b_ase0_brst_tmr);
-		}
-		break;
-
-	case OTG_STATE_B_WAIT_ACON:
-		if (!langwell->hsm.id) {
-			langwell_otg_del_timer(b_ase0_brst_tmr);
-			langwell->otg.default_a = 1;
-			langwell->hsm.a_srp_det = 0;
-
-			langwell_otg_drv_vbus(0);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-
-			langwell_otg_HAAR(0);
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell->otg.state = OTG_STATE_A_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.b_sess_vld) {
-			langwell_otg_del_timer(b_ase0_brst_tmr);
-			langwell->hsm.b_hnp_enable = 0;
-			langwell->hsm.b_bus_req = 0;
-			langwell_otg_chrg_vbus(0);
-			langwell_otg_HAAR(0);
-
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell->otg.state = OTG_STATE_B_IDLE;
-		} else if (langwell->hsm.a_conn) {
-			langwell_otg_del_timer(b_ase0_brst_tmr);
-			langwell_otg_HAAR(0);
-			langwell->otg.state = OTG_STATE_B_HOST;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_bus_resume ||
-				langwell->hsm.b_ase0_brst_tmout) {
-			langwell_otg_del_timer(b_ase0_brst_tmr);
-			langwell_otg_HAAR(0);
-			langwell_otg_nsf_msg(7);
-
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-
-			langwell->hsm.a_bus_suspend = 0;
-			langwell->hsm.b_bus_req = 0;
-
-			if (langwell->client_ops)
-				langwell->client_ops->resume(langwell->pdev);
-			else
-				otg_dbg("client driver not loaded.\n");
-
-			langwell->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_B_HOST:
-		if (!langwell->hsm.id) {
-			langwell->otg.default_a = 1;
-			langwell->hsm.a_srp_det = 0;
-
-			langwell_otg_drv_vbus(0);
-			langwell_otg_chrg_vbus(0);
-			set_host_mode();
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell->otg.state = OTG_STATE_A_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.b_sess_vld) {
-			langwell->hsm.b_hnp_enable = 0;
-			langwell->hsm.b_bus_req = 0;
-			langwell_otg_chrg_vbus(0);
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell->otg.state = OTG_STATE_B_IDLE;
-		} else if ((!langwell->hsm.b_bus_req) ||
-				(!langwell->hsm.a_conn)) {
-			langwell->hsm.b_bus_req = 0;
-			langwell_otg_loc_sof(0);
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-
-			langwell->hsm.a_bus_suspend = 0;
-
-			if (langwell->client_ops)
-				langwell->client_ops->resume(langwell->pdev);
-			else
-				otg_dbg("client driver not loaded.\n");
-
-			langwell->otg.state = OTG_STATE_B_PERIPHERAL;
-		}
-		break;
-
-	case OTG_STATE_A_IDLE:
-		langwell->otg.default_a = 1;
-		if (langwell->hsm.id) {
-			langwell->otg.default_a = 0;
-			langwell->hsm.b_bus_req = 0;
-			langwell_otg_drv_vbus(0);
-			langwell_otg_chrg_vbus(0);
-
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_sess_vld) {
-			langwell_otg_drv_vbus(1);
-			langwell->hsm.a_srp_det = 1;
-			langwell->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.a_bus_drop &&
-			(langwell->hsm.a_srp_det || langwell->hsm.a_bus_req)) {
-			langwell_otg_drv_vbus(1);
-			langwell->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
-			queue_work(langwell->qwork, &langwell->work);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		if (langwell->hsm.id) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			langwell->hsm.b_bus_req = 0;
-			langwell->otg.default_a = 0;
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_B_IDLE;
-		} else if (langwell->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_wait_vrise_tmr);
-			if (langwell->host_ops)
-				langwell->host_ops->probe(langwell->pdev,
-						langwell->host_ops->id_table);
-			else
-				otg_dbg("host driver not loaded.\n");
-			langwell->hsm.b_conn = 0;
-			langwell->hsm.a_set_b_hnp_en = 0;
-			langwell->hsm.a_wait_bcon_tmout = 0;
-			langwell_otg_add_timer(a_wait_bcon_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (langwell->hsm.a_wait_vrise_tmout) {
-			if (langwell->hsm.a_vbus_vld) {
-				if (langwell->host_ops)
-					langwell->host_ops->probe(
-						langwell->pdev,
-						langwell->host_ops->id_table);
-				else
-					otg_dbg("host driver not loaded.\n");
-				langwell->hsm.b_conn = 0;
-				langwell->hsm.a_set_b_hnp_en = 0;
-				langwell->hsm.a_wait_bcon_tmout = 0;
-				langwell_otg_add_timer(a_wait_bcon_tmr);
-				langwell->otg.state = OTG_STATE_A_WAIT_BCON;
-			} else {
-				langwell_otg_drv_vbus(0);
-				langwell->otg.state = OTG_STATE_A_VBUS_ERR;
-			}
-		}
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		if (langwell->hsm.id) {
-			langwell_otg_del_timer(a_wait_bcon_tmr);
-
-			langwell->otg.default_a = 0;
-			langwell->hsm.b_bus_req = 0;
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_wait_bcon_tmr);
-
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (langwell->hsm.a_bus_drop ||
-				(langwell->hsm.a_wait_bcon_tmout &&
-				!langwell->hsm.a_bus_req)) {
-			langwell_otg_del_timer(a_wait_bcon_tmr);
-
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (langwell->hsm.b_conn) {
-			langwell_otg_del_timer(a_wait_bcon_tmr);
-
-			langwell->hsm.a_suspend_req = 0;
-			langwell->otg.state = OTG_STATE_A_HOST;
-			if (!langwell->hsm.a_bus_req &&
-				langwell->hsm.a_set_b_hnp_en) {
-				/* It is not safe enough to do a fast
-				 * transistion from A_WAIT_BCON to
-				 * A_SUSPEND */
-				msleep(10000);
-				if (langwell->hsm.a_bus_req)
-					break;
-
-				if (request_irq(langwell->pdev->irq,
-					otg_dummy_irq, IRQF_SHARED,
-					driver_name, langwell->regs) != 0) {
-					otg_dbg("request interrupt %d fail\n",
-					langwell->pdev->irq);
-				}
-
-				langwell_otg_HABA(1);
-				langwell->hsm.b_bus_resume = 0;
-				langwell->hsm.a_aidl_bdis_tmout = 0;
-				langwell_otg_add_timer(a_aidl_bdis_tmr);
-
-				langwell_otg_loc_sof(0);
-				langwell->otg.state = OTG_STATE_A_SUSPEND;
-			} else if (!langwell->hsm.a_bus_req &&
-				!langwell->hsm.a_set_b_hnp_en) {
-				struct pci_dev *pdev = langwell->pdev;
-				if (langwell->host_ops)
-					langwell->host_ops->remove(pdev);
-				else
-					otg_dbg("host driver removed.\n");
-				langwell_otg_drv_vbus(0);
-				langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-			}
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		if (langwell->hsm.id) {
-			langwell->otg.default_a = 0;
-			langwell->hsm.b_bus_req = 0;
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_bus_drop ||
-		(!langwell->hsm.a_set_b_hnp_en && !langwell->hsm.a_bus_req)) {
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!langwell->hsm.a_vbus_vld) {
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (langwell->hsm.a_set_b_hnp_en
-				&& !langwell->hsm.a_bus_req) {
-			/* Set HABA to enable hardware assistance to signal
-			 *  A-connect after receiver B-disconnect. Hardware
-			 *  will then set client mode and enable URE, SLE and
-			 *  PCE after the assistance. otg_dummy_irq is used to
-			 *  clean these ints when client driver is not resumed.
-			 */
-			if (request_irq(langwell->pdev->irq,
-				otg_dummy_irq, IRQF_SHARED, driver_name,
-				langwell->regs) != 0) {
-				otg_dbg("request interrupt %d failed\n",
-						langwell->pdev->irq);
-			}
-
-			/* set HABA */
-			langwell_otg_HABA(1);
-			langwell->hsm.b_bus_resume = 0;
-			langwell->hsm.a_aidl_bdis_tmout = 0;
-			langwell_otg_add_timer(a_aidl_bdis_tmr);
-			langwell_otg_loc_sof(0);
-			langwell->otg.state = OTG_STATE_A_SUSPEND;
-		} else if (!langwell->hsm.b_conn || !langwell->hsm.a_bus_req) {
-			langwell->hsm.a_wait_bcon_tmout = 0;
-			langwell->hsm.a_set_b_hnp_en = 0;
-			langwell_otg_add_timer(a_wait_bcon_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		if (langwell->hsm.id) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(langwell->pdev->irq, langwell->regs);
-			langwell->otg.default_a = 0;
-			langwell->hsm.b_bus_req = 0;
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_bus_req ||
-				langwell->hsm.b_bus_resume) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(langwell->pdev->irq, langwell->regs);
-			langwell->hsm.a_suspend_req = 0;
-			langwell_otg_loc_sof(1);
-			langwell->otg.state = OTG_STATE_A_HOST;
-		} else if (langwell->hsm.a_aidl_bdis_tmout ||
-				langwell->hsm.a_bus_drop) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(langwell->pdev->irq, langwell->regs);
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (!langwell->hsm.b_conn &&
-				langwell->hsm.a_set_b_hnp_en) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(langwell->pdev->irq, langwell->regs);
-
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-
-			langwell->hsm.b_bus_suspend = 0;
-			langwell->hsm.b_bus_suspend_vld = 0;
-			langwell->hsm.b_bus_suspend_tmout = 0;
-
-			/* msleep(200); */
-			if (langwell->client_ops)
-				langwell->client_ops->resume(langwell->pdev);
-			else
-				otg_dbg("client driver not loaded.\n");
-
-			langwell_otg_add_timer(b_bus_suspend_tmr);
-			langwell->otg.state = OTG_STATE_A_PERIPHERAL;
-			break;
-		} else if (!langwell->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(a_aidl_bdis_tmr);
-			langwell_otg_HABA(0);
-			free_irq(langwell->pdev->irq, langwell->regs);
-			if (langwell->host_ops)
-				langwell->host_ops->remove(langwell->pdev);
-			else
-				otg_dbg("host driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
-		}
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		if (langwell->hsm.id) {
-			langwell_otg_del_timer(b_bus_suspend_tmr);
-			langwell->otg.default_a = 0;
-			langwell->hsm.b_bus_req = 0;
-			if (langwell->client_ops)
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			else
-				otg_dbg("client driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (!langwell->hsm.a_vbus_vld) {
-			langwell_otg_del_timer(b_bus_suspend_tmr);
-			if (langwell->client_ops)
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			else
-				otg_dbg("client driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_VBUS_ERR;
-		} else if (langwell->hsm.a_bus_drop) {
-			langwell_otg_del_timer(b_bus_suspend_tmr);
-			if (langwell->client_ops)
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			else
-				otg_dbg("client driver has been removed.\n");
-			langwell_otg_drv_vbus(0);
-			langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		} else if (langwell->hsm.b_bus_suspend) {
-			langwell_otg_del_timer(b_bus_suspend_tmr);
-			if (langwell->client_ops)
-				langwell->client_ops->suspend(langwell->pdev,
-					PMSG_FREEZE);
-			else
-				otg_dbg("client driver has been removed.\n");
-
-			if (langwell->host_ops)
-				langwell->host_ops->probe(langwell->pdev,
-						langwell->host_ops->id_table);
-			else
-				otg_dbg("host driver not loaded.\n");
-			langwell->hsm.a_set_b_hnp_en = 0;
-			langwell->hsm.a_wait_bcon_tmout = 0;
-			langwell_otg_add_timer(a_wait_bcon_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
-		} else if (langwell->hsm.b_bus_suspend_tmout) {
-			u32	val;
-			val = readl(langwell->regs + CI_PORTSC1);
-			if (!(val & PORTSC_SUSP))
-				break;
-			if (langwell->client_ops)
-				langwell->client_ops->suspend(langwell->pdev,
-						PMSG_FREEZE);
-			else
-				otg_dbg("client driver has been removed.\n");
-			if (langwell->host_ops)
-				langwell->host_ops->probe(langwell->pdev,
-						langwell->host_ops->id_table);
-			else
-				otg_dbg("host driver not loaded.\n");
-			langwell->hsm.a_set_b_hnp_en = 0;
-			langwell->hsm.a_wait_bcon_tmout = 0;
-			langwell_otg_add_timer(a_wait_bcon_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_BCON;
-		}
-		break;
-	case OTG_STATE_A_VBUS_ERR:
-		if (langwell->hsm.id) {
-			langwell->otg.default_a = 0;
-			langwell->hsm.a_clr_err = 0;
-			langwell->hsm.a_srp_det = 0;
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_clr_err) {
-			langwell->hsm.a_clr_err = 0;
-			langwell->hsm.a_srp_det = 0;
-			reset_otg();
-			init_hsm();
-			if (langwell->otg.state == OTG_STATE_A_IDLE)
-				queue_work(langwell->qwork, &langwell->work);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VFALL:
-		if (langwell->hsm.id) {
-			langwell->otg.default_a = 0;
-			langwell->otg.state = OTG_STATE_B_IDLE;
-			queue_work(langwell->qwork, &langwell->work);
-		} else if (langwell->hsm.a_bus_req) {
-			langwell_otg_drv_vbus(1);
-			langwell->hsm.a_wait_vrise_tmout = 0;
-			langwell_otg_add_timer(a_wait_vrise_tmr);
-			langwell->otg.state = OTG_STATE_A_WAIT_VRISE;
-		} else if (!langwell->hsm.a_sess_vld) {
-			langwell->hsm.a_srp_det = 0;
-			langwell_otg_drv_vbus(0);
-			set_host_mode();
-			langwell->otg.state = OTG_STATE_A_IDLE;
-		}
-		break;
-	default:
-		;
-	}
-
-	otg_dbg("%s: new state = %s\n", __func__,
-			state_string(langwell->otg.state));
-}
-
-	static ssize_t
-show_registers(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg *langwell;
-	char *next;
-	unsigned size;
-	unsigned t;
-
-	langwell = the_transceiver;
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size,
-		"\n"
-		"USBCMD = 0x%08x \n"
-		"USBSTS = 0x%08x \n"
-		"USBINTR = 0x%08x \n"
-		"ASYNCLISTADDR = 0x%08x \n"
-		"PORTSC1 = 0x%08x \n"
-		"HOSTPC1 = 0x%08x \n"
-		"OTGSC = 0x%08x \n"
-		"USBMODE = 0x%08x \n",
-		readl(langwell->regs + 0x30),
-		readl(langwell->regs + 0x34),
-		readl(langwell->regs + 0x38),
-		readl(langwell->regs + 0x48),
-		readl(langwell->regs + 0x74),
-		readl(langwell->regs + 0xb4),
-		readl(langwell->regs + 0xf4),
-		readl(langwell->regs + 0xf8)
-		);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(registers, S_IRUGO, show_registers, NULL);
-
-static ssize_t
-show_hsm(struct device *_dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg *langwell;
-	char *next;
-	unsigned size;
-	unsigned t;
-
-	langwell = the_transceiver;
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size,
-		"\n"
-		"current state = %s\n"
-		"a_bus_resume = \t%d\n"
-		"a_bus_suspend = \t%d\n"
-		"a_conn = \t%d\n"
-		"a_sess_vld = \t%d\n"
-		"a_srp_det = \t%d\n"
-		"a_vbus_vld = \t%d\n"
-		"b_bus_resume = \t%d\n"
-		"b_bus_suspend = \t%d\n"
-		"b_conn = \t%d\n"
-		"b_se0_srp = \t%d\n"
-		"b_sess_end = \t%d\n"
-		"b_sess_vld = \t%d\n"
-		"id = \t%d\n"
-		"a_set_b_hnp_en = \t%d\n"
-		"b_srp_done = \t%d\n"
-		"b_hnp_enable = \t%d\n"
-		"a_wait_vrise_tmout = \t%d\n"
-		"a_wait_bcon_tmout = \t%d\n"
-		"a_aidl_bdis_tmout = \t%d\n"
-		"b_ase0_brst_tmout = \t%d\n"
-		"a_bus_drop = \t%d\n"
-		"a_bus_req = \t%d\n"
-		"a_clr_err = \t%d\n"
-		"a_suspend_req = \t%d\n"
-		"b_bus_req = \t%d\n"
-		"b_bus_suspend_tmout = \t%d\n"
-		"b_bus_suspend_vld = \t%d\n",
-		state_string(langwell->otg.state),
-		langwell->hsm.a_bus_resume,
-		langwell->hsm.a_bus_suspend,
-		langwell->hsm.a_conn,
-		langwell->hsm.a_sess_vld,
-		langwell->hsm.a_srp_det,
-		langwell->hsm.a_vbus_vld,
-		langwell->hsm.b_bus_resume,
-		langwell->hsm.b_bus_suspend,
-		langwell->hsm.b_conn,
-		langwell->hsm.b_se0_srp,
-		langwell->hsm.b_sess_end,
-		langwell->hsm.b_sess_vld,
-		langwell->hsm.id,
-		langwell->hsm.a_set_b_hnp_en,
-		langwell->hsm.b_srp_done,
-		langwell->hsm.b_hnp_enable,
-		langwell->hsm.a_wait_vrise_tmout,
-		langwell->hsm.a_wait_bcon_tmout,
-		langwell->hsm.a_aidl_bdis_tmout,
-		langwell->hsm.b_ase0_brst_tmout,
-		langwell->hsm.a_bus_drop,
-		langwell->hsm.a_bus_req,
-		langwell->hsm.a_clr_err,
-		langwell->hsm.a_suspend_req,
-		langwell->hsm.b_bus_req,
-		langwell->hsm.b_bus_suspend_tmout,
-		langwell->hsm.b_bus_suspend_vld
-		);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-static DEVICE_ATTR(hsm, S_IRUGO, show_hsm, NULL);
-
-static ssize_t
-get_a_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg *langwell;
-	char *next;
-	unsigned size;
-	unsigned t;
-
-	langwell =  the_transceiver;
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", langwell->hsm.a_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg *langwell;
-	langwell = the_transceiver;
-	if (!langwell->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		langwell->hsm.a_bus_req = 0;
-		otg_dbg("a_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		/* If a_bus_drop is TRUE, a_bus_req can't be set */
-		if (langwell->hsm.a_bus_drop)
-			return -1;
-		langwell->hsm.a_bus_req = 1;
-		otg_dbg("a_bus_req = 1\n");
-	}
-	if (spin_trylock(&langwell->wq_lock)) {
-		queue_work(langwell->qwork, &langwell->work);
-		spin_unlock(&langwell->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_req, S_IRUGO | S_IWUGO, get_a_bus_req, set_a_bus_req);
-
-static ssize_t
-get_a_bus_drop(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg *langwell;
-	char *next;
-	unsigned size;
-	unsigned t;
-
-	langwell =  the_transceiver;
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", langwell->hsm.a_bus_drop);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_a_bus_drop(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg *langwell;
-	langwell = the_transceiver;
-	if (!langwell->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		langwell->hsm.a_bus_drop = 0;
-		otg_dbg("a_bus_drop = 0\n");
-	} else if (buf[0] == '1') {
-		langwell->hsm.a_bus_drop = 1;
-		langwell->hsm.a_bus_req = 0;
-		otg_dbg("a_bus_drop = 1, then a_bus_req = 0\n");
-	}
-	if (spin_trylock(&langwell->wq_lock)) {
-		queue_work(langwell->qwork, &langwell->work);
-		spin_unlock(&langwell->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_bus_drop, S_IRUGO | S_IWUGO,
-	get_a_bus_drop, set_a_bus_drop);
-
-static ssize_t
-get_b_bus_req(struct device *dev, struct device_attribute *attr, char *buf)
-{
-	struct langwell_otg *langwell;
-	char *next;
-	unsigned size;
-	unsigned t;
-
-	langwell =  the_transceiver;
-	next = buf;
-	size = PAGE_SIZE;
-
-	t = scnprintf(next, size, "%d", langwell->hsm.b_bus_req);
-	size -= t;
-	next += t;
-
-	return PAGE_SIZE - size;
-}
-
-static ssize_t
-set_b_bus_req(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg *langwell;
-	langwell = the_transceiver;
-
-	if (langwell->otg.default_a)
-		return -1;
-
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '0') {
-		langwell->hsm.b_bus_req = 0;
-		otg_dbg("b_bus_req = 0\n");
-	} else if (buf[0] == '1') {
-		langwell->hsm.b_bus_req = 1;
-		otg_dbg("b_bus_req = 1\n");
-	}
-	if (spin_trylock(&langwell->wq_lock)) {
-		queue_work(langwell->qwork, &langwell->work);
-		spin_unlock(&langwell->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(b_bus_req, S_IRUGO | S_IWUGO, get_b_bus_req, set_b_bus_req);
-
-static ssize_t
-set_a_clr_err(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct langwell_otg *langwell;
-	langwell = the_transceiver;
-
-	if (!langwell->otg.default_a)
-		return -1;
-	if (count > 2)
-		return -1;
-
-	if (buf[0] == '1') {
-		langwell->hsm.a_clr_err = 1;
-		otg_dbg("a_clr_err = 1\n");
-	}
-	if (spin_trylock(&langwell->wq_lock)) {
-		queue_work(langwell->qwork, &langwell->work);
-		spin_unlock(&langwell->wq_lock);
-	}
-	return count;
-}
-static DEVICE_ATTR(a_clr_err, S_IWUGO, NULL, set_a_clr_err);
-
-static struct attribute *inputs_attrs[] = {
-	&dev_attr_a_bus_req.attr,
-	&dev_attr_a_bus_drop.attr,
-	&dev_attr_b_bus_req.attr,
-	&dev_attr_a_clr_err.attr,
-	NULL,
-};
-
-static struct attribute_group debug_dev_attr_group = {
-	.name = "inputs",
-	.attrs = inputs_attrs,
-};
-
-int langwell_register_host(struct pci_driver *host_driver)
-{
-	int	ret = 0;
-
-	the_transceiver->host_ops = host_driver;
-	queue_work(the_transceiver->qwork, &the_transceiver->work);
-	otg_dbg("host controller driver is registered\n");
-
-	return ret;
-}
-EXPORT_SYMBOL(langwell_register_host);
-
-void langwell_unregister_host(struct pci_driver *host_driver)
-{
-	if (the_transceiver->host_ops)
-		the_transceiver->host_ops->remove(the_transceiver->pdev);
-	the_transceiver->host_ops = NULL;
-	the_transceiver->hsm.a_bus_drop = 1;
-	queue_work(the_transceiver->qwork, &the_transceiver->work);
-	otg_dbg("host controller driver is unregistered\n");
-}
-EXPORT_SYMBOL(langwell_unregister_host);
-
-int langwell_register_peripheral(struct pci_driver *client_driver)
-{
-	int	ret = 0;
-
-	if (client_driver)
-		ret = client_driver->probe(the_transceiver->pdev,
-				client_driver->id_table);
-	if (!ret) {
-		the_transceiver->client_ops = client_driver;
-		queue_work(the_transceiver->qwork, &the_transceiver->work);
-		otg_dbg("client controller driver is registered\n");
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(langwell_register_peripheral);
-
-void langwell_unregister_peripheral(struct pci_driver *client_driver)
-{
-	if (the_transceiver->client_ops)
-		the_transceiver->client_ops->remove(the_transceiver->pdev);
-	the_transceiver->client_ops = NULL;
-	the_transceiver->hsm.b_bus_req = 0;
-	queue_work(the_transceiver->qwork, &the_transceiver->work);
-	otg_dbg("client controller driver is unregistered\n");
-}
-EXPORT_SYMBOL(langwell_unregister_peripheral);
-
-static int langwell_otg_probe(struct pci_dev *pdev,
-		const struct pci_device_id *id)
-{
-	unsigned long		resource, len;
-	void __iomem 		*base = NULL;
-	int			retval;
-	u32			val32;
-	struct langwell_otg	*langwell;
-	char			qname[] = "langwell_otg_queue";
-
-	retval = 0;
-	otg_dbg("\notg controller is detected.\n");
-	if (pci_enable_device(pdev) < 0) {
-		retval = -ENODEV;
-		goto done;
-	}
-
-	langwell = kzalloc(sizeof *langwell, GFP_KERNEL);
-	if (langwell == NULL) {
-		retval = -ENOMEM;
-		goto done;
-	}
-	the_transceiver = langwell;
-
-	/* control register: BAR 0 */
-	resource = pci_resource_start(pdev, 0);
-	len = pci_resource_len(pdev, 0);
-	if (!request_mem_region(resource, len, driver_name)) {
-		retval = -EBUSY;
-		goto err;
-	}
-	langwell->region = 1;
-
-	base = ioremap_nocache(resource, len);
-	if (base == NULL) {
-		retval = -EFAULT;
-		goto err;
-	}
-	langwell->regs = base;
-
-	if (!pdev->irq) {
-		otg_dbg("No IRQ.\n");
-		retval = -ENODEV;
-		goto err;
-	}
-
-	langwell->qwork = create_workqueue(qname);
-	if (!langwell->qwork) {
-		otg_dbg("cannot create workqueue %s\n", qname);
-		retval = -ENOMEM;
-		goto err;
-	}
-	INIT_WORK(&langwell->work, langwell_otg_work);
-
-	/* OTG common part */
-	langwell->pdev = pdev;
-	langwell->otg.dev = &pdev->dev;
-	langwell->otg.label = driver_name;
-	langwell->otg.set_host = langwell_otg_set_host;
-	langwell->otg.set_peripheral = langwell_otg_set_peripheral;
-	langwell->otg.set_power = langwell_otg_set_power;
-	langwell->otg.start_srp = langwell_otg_start_srp;
-	langwell->otg.state = OTG_STATE_UNDEFINED;
-	if (otg_set_transceiver(&langwell->otg)) {
-		otg_dbg("can't set transceiver\n");
-		retval = -EBUSY;
-		goto err;
-	}
-
-	reset_otg();
-	init_hsm();
-
-	spin_lock_init(&langwell->lock);
-	spin_lock_init(&langwell->wq_lock);
-	INIT_LIST_HEAD(&active_timers);
-	langwell_otg_init_timers(&langwell->hsm);
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, langwell) != 0) {
-		otg_dbg("request interrupt %d failed\n", pdev->irq);
-		retval = -EBUSY;
-		goto err;
-	}
-
-	/* enable OTGSC int */
-	val32 = OTGSC_DPIE | OTGSC_BSEIE | OTGSC_BSVIE |
-		OTGSC_ASVIE | OTGSC_AVVIE | OTGSC_IDIE | OTGSC_IDPU;
-	writel(val32, langwell->regs + CI_OTGSC);
-
-	retval = device_create_file(&pdev->dev, &dev_attr_registers);
-	if (retval < 0) {
-		otg_dbg("Can't register sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = device_create_file(&pdev->dev, &dev_attr_hsm);
-	if (retval < 0) {
-		otg_dbg("Can't hsm sysfs attribute: %d\n", retval);
-		goto err;
-	}
-
-	retval = sysfs_create_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	if (retval < 0) {
-		otg_dbg("Can't register sysfs attr group: %d\n", retval);
-		goto err;
-	}
-
-	if (langwell->otg.state == OTG_STATE_A_IDLE)
-		queue_work(langwell->qwork, &langwell->work);
-
-	return 0;
-
-err:
-	if (the_transceiver)
-		langwell_otg_remove(pdev);
-done:
-	return retval;
-}
-
-static void langwell_otg_remove(struct pci_dev *pdev)
-{
-	struct langwell_otg *langwell;
-
-	langwell = the_transceiver;
-
-	if (langwell->qwork) {
-		flush_workqueue(langwell->qwork);
-		destroy_workqueue(langwell->qwork);
-	}
-	langwell_otg_free_timers();
-
-	/* disable OTGSC interrupt as OTGSC doesn't change in reset */
-	writel(0, langwell->regs + CI_OTGSC);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, langwell);
-	if (langwell->regs)
-		iounmap(langwell->regs);
-	if (langwell->region)
-		release_mem_region(pci_resource_start(pdev, 0),
-				pci_resource_len(pdev, 0));
-
-	otg_set_transceiver(NULL);
-	pci_disable_device(pdev);
-	sysfs_remove_group(&pdev->dev.kobj, &debug_dev_attr_group);
-	device_remove_file(&pdev->dev, &dev_attr_hsm);
-	device_remove_file(&pdev->dev, &dev_attr_registers);
-	kfree(langwell);
-	langwell = NULL;
-}
-
-static void transceiver_suspend(struct pci_dev *pdev)
-{
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-	langwell_otg_phy_low_power(1);
-}
-
-static int langwell_otg_suspend(struct pci_dev *pdev, pm_message_t message)
-{
-	int 	ret = 0;
-	struct langwell_otg *langwell;
-
-	langwell = the_transceiver;
-
-	/* Disbale OTG interrupts */
-	langwell_otg_intr(0);
-
-	if (pdev->irq)
-		free_irq(pdev->irq, langwell);
-
-	/* Prevent more otg_work */
-	flush_workqueue(langwell->qwork);
-	spin_lock(&langwell->wq_lock);
-
-	/* start actions */
-	switch (langwell->otg.state) {
-	case OTG_STATE_A_IDLE:
-	case OTG_STATE_B_IDLE:
-	case OTG_STATE_A_WAIT_VFALL:
-	case OTG_STATE_A_VBUS_ERR:
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_VRISE:
-		langwell_otg_del_timer(a_wait_vrise_tmr);
-		langwell->hsm.a_srp_det = 0;
-		langwell_otg_drv_vbus(0);
-		langwell->otg.state = OTG_STATE_A_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		langwell_otg_del_timer(a_wait_bcon_tmr);
-		if (langwell->host_ops)
-			ret = langwell->host_ops->suspend(pdev, message);
-		langwell_otg_drv_vbus(0);
-		break;
-	case OTG_STATE_A_HOST:
-		if (langwell->host_ops)
-			ret = langwell->host_ops->suspend(pdev, message);
-		langwell_otg_drv_vbus(0);
-		langwell_otg_phy_low_power(1);
-		break;
-	case OTG_STATE_A_SUSPEND:
-		langwell_otg_del_timer(a_aidl_bdis_tmr);
-		langwell_otg_HABA(0);
-		if (langwell->host_ops)
-			langwell->host_ops->remove(pdev);
-		else
-			otg_dbg("host driver has been removed.\n");
-		langwell_otg_drv_vbus(0);
-		transceiver_suspend(pdev);
-		langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		break;
-	case OTG_STATE_A_PERIPHERAL:
-		if (langwell->client_ops)
-			ret = langwell->client_ops->suspend(pdev, message);
-		else
-			otg_dbg("client driver has been removed.\n");
-		langwell_otg_drv_vbus(0);
-		transceiver_suspend(pdev);
-		langwell->otg.state = OTG_STATE_A_WAIT_VFALL;
-		break;
-	case OTG_STATE_B_HOST:
-		if (langwell->host_ops)
-			langwell->host_ops->remove(pdev);
-		else
-			otg_dbg("host driver has been removed.\n");
-		langwell->hsm.b_bus_req = 0;
-		transceiver_suspend(pdev);
-		langwell->otg.state = OTG_STATE_B_IDLE;
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (langwell->client_ops)
-			ret = langwell->client_ops->suspend(pdev, message);
-		else
-			otg_dbg("client driver has been removed.\n");
-		break;
-	case OTG_STATE_B_WAIT_ACON:
-		langwell_otg_del_timer(b_ase0_brst_tmr);
-		langwell_otg_HAAR(0);
-		if (langwell->host_ops)
-			langwell->host_ops->remove(pdev);
-		else
-			otg_dbg("host driver has been removed.\n");
-		langwell->hsm.b_bus_req = 0;
-		langwell->otg.state = OTG_STATE_B_IDLE;
-		transceiver_suspend(pdev);
-		break;
-	default:
-		otg_dbg("error state before suspend\n ");
-		break;
-	}
-	spin_unlock(&langwell->wq_lock);
-
-	return ret;
-}
-
-static void transceiver_resume(struct pci_dev *pdev)
-{
-	pci_restore_state(pdev);
-	pci_set_power_state(pdev, PCI_D0);
-	langwell_otg_phy_low_power(0);
-}
-
-static int langwell_otg_resume(struct pci_dev *pdev)
-{
-	int 	ret = 0;
-	struct langwell_otg *langwell;
-
-	langwell = the_transceiver;
-
-	spin_lock(&langwell->wq_lock);
-
-	switch (langwell->otg.state) {
-	case OTG_STATE_A_IDLE:
-	case OTG_STATE_B_IDLE:
-	case OTG_STATE_A_WAIT_VFALL:
-	case OTG_STATE_A_VBUS_ERR:
-		transceiver_resume(pdev);
-		break;
-	case OTG_STATE_A_WAIT_BCON:
-		langwell_otg_add_timer(a_wait_bcon_tmr);
-		langwell_otg_drv_vbus(1);
-		if (langwell->host_ops)
-			ret = langwell->host_ops->resume(pdev);
-		break;
-	case OTG_STATE_A_HOST:
-		langwell_otg_drv_vbus(1);
-		langwell_otg_phy_low_power(0);
-		if (langwell->host_ops)
-			ret = langwell->host_ops->resume(pdev);
-		break;
-	case OTG_STATE_B_PERIPHERAL:
-		if (langwell->client_ops)
-			ret = langwell->client_ops->resume(pdev);
-		else
-			otg_dbg("client driver not loaded.\n");
-		break;
-	default:
-		otg_dbg("error state before suspend\n ");
-		break;
-	}
-
-	if (request_irq(pdev->irq, otg_irq, IRQF_SHARED,
-				driver_name, the_transceiver) != 0) {
-		otg_dbg("request interrupt %d failed\n", pdev->irq);
-		ret = -EBUSY;
-	}
-
-	/* enable OTG interrupts */
-	langwell_otg_intr(1);
-
-	spin_unlock(&langwell->wq_lock);
-
-	queue_work(langwell->qwork, &langwell->work);
-
-
-	return ret;
-}
-
-static int __init langwell_otg_init(void)
-{
-	return pci_register_driver(&otg_pci_driver);
-}
-module_init(langwell_otg_init);
-
-static void __exit langwell_otg_cleanup(void)
-{
-	pci_unregister_driver(&otg_pci_driver);
-}
-module_exit(langwell_otg_cleanup);
diff --git a/include/linux/usb/langwell_otg.h b/include/linux/usb/langwell_otg.h
deleted file mode 100644
index e115ae6df1da..000000000000
--- a/include/linux/usb/langwell_otg.h
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Intel Langwell USB OTG transceiver driver
- * Copyright (C) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef __LANGWELL_OTG_H__
-#define __LANGWELL_OTG_H__
-
-/* notify transceiver driver about OTG events */
-extern void langwell_update_transceiver(void);
-/* HCD register bus driver */
-extern int langwell_register_host(struct pci_driver *host_driver);
-/* HCD unregister bus driver */
-extern void langwell_unregister_host(struct pci_driver *host_driver);
-/* DCD register bus driver */
-extern int langwell_register_peripheral(struct pci_driver *client_driver);
-/* DCD unregister bus driver */
-extern void langwell_unregister_peripheral(struct pci_driver *client_driver);
-/* No silent failure, output warning message */
-extern void langwell_otg_nsf_msg(unsigned long message);
-
-#define CI_USBCMD		0x30
-#	define USBCMD_RST		BIT(1)
-#	define USBCMD_RS		BIT(0)
-#define CI_USBSTS		0x34
-#	define USBSTS_SLI		BIT(8)
-#	define USBSTS_URI		BIT(6)
-#	define USBSTS_PCI		BIT(2)
-#define CI_PORTSC1		0x74
-#	define PORTSC_PP		BIT(12)
-#	define PORTSC_LS		(BIT(11) | BIT(10))
-#	define PORTSC_SUSP		BIT(7)
-#	define PORTSC_CCS		BIT(0)
-#define CI_HOSTPC1		0xb4
-#	define HOSTPC1_PHCD		BIT(22)
-#define CI_OTGSC		0xf4
-#	define OTGSC_DPIE		BIT(30)
-#	define OTGSC_1MSE		BIT(29)
-#	define OTGSC_BSEIE		BIT(28)
-#	define OTGSC_BSVIE		BIT(27)
-#	define OTGSC_ASVIE		BIT(26)
-#	define OTGSC_AVVIE		BIT(25)
-#	define OTGSC_IDIE		BIT(24)
-#	define OTGSC_DPIS		BIT(22)
-#	define OTGSC_1MSS		BIT(21)
-#	define OTGSC_BSEIS		BIT(20)
-#	define OTGSC_BSVIS		BIT(19)
-#	define OTGSC_ASVIS		BIT(18)
-#	define OTGSC_AVVIS		BIT(17)
-#	define OTGSC_IDIS		BIT(16)
-#	define OTGSC_DPS		BIT(14)
-#	define OTGSC_1MST		BIT(13)
-#	define OTGSC_BSE		BIT(12)
-#	define OTGSC_BSV		BIT(11)
-#	define OTGSC_ASV		BIT(10)
-#	define OTGSC_AVV		BIT(9)
-#	define OTGSC_ID			BIT(8)
-#	define OTGSC_HABA		BIT(7)
-#	define OTGSC_HADP		BIT(6)
-#	define OTGSC_IDPU		BIT(5)
-#	define OTGSC_DP			BIT(4)
-#	define OTGSC_OT			BIT(3)
-#	define OTGSC_HAAR		BIT(2)
-#	define OTGSC_VC			BIT(1)
-#	define OTGSC_VD			BIT(0)
-#	define OTGSC_INTEN_MASK		(0x7f << 24)
-#	define OTGSC_INTSTS_MASK	(0x7f << 16)
-#define CI_USBMODE		0xf8
-#	define USBMODE_CM		(BIT(1) | BIT(0))
-#	define USBMODE_IDLE		0
-#	define USBMODE_DEVICE		0x2
-#	define USBMODE_HOST		0x3
-
-#define INTR_DUMMY_MASK (USBSTS_SLI | USBSTS_URI | USBSTS_PCI)
-
-struct otg_hsm {
-	/* Input */
-	int a_bus_resume;
-	int a_bus_suspend;
-	int a_conn;
-	int a_sess_vld;
-	int a_srp_det;
-	int a_vbus_vld;
-	int b_bus_resume;
-	int b_bus_suspend;
-	int b_conn;
-	int b_se0_srp;
-	int b_sess_end;
-	int b_sess_vld;
-	int id;
-
-	/* Internal variables */
-	int a_set_b_hnp_en;
-	int b_srp_done;
-	int b_hnp_enable;
-
-	/* Timeout indicator for timers */
-	int a_wait_vrise_tmout;
-	int a_wait_bcon_tmout;
-	int a_aidl_bdis_tmout;
-	int b_ase0_brst_tmout;
-	int b_bus_suspend_tmout;
-	int b_srp_res_tmout;
-
-	/* Informative variables */
-	int a_bus_drop;
-	int a_bus_req;
-	int a_clr_err;
-	int a_suspend_req;
-	int b_bus_req;
-
-	/* Output */
-	int drv_vbus;
-	int loc_conn;
-	int loc_sof;
-
-	/* Others */
-	int b_bus_suspend_vld;
-};
-
-#define TA_WAIT_VRISE	100
-#define TA_WAIT_BCON	30000
-#define TA_AIDL_BDIS	15000
-#define TB_ASE0_BRST	5000
-#define TB_SE0_SRP	2
-#define TB_SRP_RES	100
-#define TB_BUS_SUSPEND	500
-
-struct langwell_otg_timer {
-	unsigned long expires;	/* Number of count increase to timeout */
-	unsigned long count;	/* Tick counter */
-	void (*function)(unsigned long);	/* Timeout function */
-	unsigned long data;	/* Data passed to function */
-	struct list_head list;
-};
-
-struct langwell_otg {
-	struct otg_transceiver 	otg;
-	struct otg_hsm 		hsm;
-	void __iomem 		*regs;
-	unsigned 		region;
-	struct pci_driver	*host_ops;
-	struct pci_driver	*client_ops;
-	struct pci_dev		*pdev;
-	struct work_struct 	work;
-	struct workqueue_struct	*qwork;
-	spinlock_t 		lock;
-	spinlock_t 		wq_lock;
-};
-
-static inline struct langwell_otg *otg_to_langwell(struct otg_transceiver *otg)
-{
-	return container_of(otg, struct langwell_otg, otg);
-}
-
-#ifdef DEBUG
-#define otg_dbg(fmt, args...) \
-	printk(KERN_DEBUG fmt , ## args)
-#else
-#define otg_dbg(fmt, args...) \
-	do { } while (0)
-#endif /* DEBUG */
-#endif /* __LANGWELL_OTG_H__ */
-- 
cgit v1.2.3


From e376bbbb6a82cf119c93bde66937f66c72cba27b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 18 Jun 2009 10:39:11 -0700
Subject: USB: usb.h: fix kernel-doc notation

Fix usb.h kernel-doc warnings:

Warning(include/linux/usb.h:918): Excess struct/union/enum/typedef member 'nodename' description in 'usb_device_driver'
Warning(include/linux/usb.h:939): No description found for parameter 'nodename'
Warning(include/linux/usb.h:1219): No description found for parameter 'sg'
Warning(include/linux/usb.h:1219): No description found for parameter 'num_sgs'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 84929e914034..b1e3c2fbfe11 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -888,8 +888,6 @@ struct usb_driver {
  * struct usb_device_driver - identifies USB device driver to usbcore
  * @name: The driver name should be unique among USB drivers,
  *	and should normally be the same as the module name.
- * @nodename: Callback to provide a naming hint for a possible
- *	device node to create.
  * @probe: Called to see if the driver is willing to manage a particular
  *	device.  If it is, probe returns zero and uses dev_set_drvdata()
  *	to associate driver-specific data with the device.  If unwilling
@@ -924,6 +922,8 @@ extern struct bus_type usb_bus_type;
 /**
  * struct usb_class_driver - identifies a USB driver that wants to use the USB major number
  * @name: the usb class device name for this driver.  Will show up in sysfs.
+ * @nodename: Callback to provide a naming hint for a possible
+ *	device node to create.
  * @fops: pointer to the struct file_operations of this driver.
  * @minor_base: the start of the minor range for this driver.
  *
@@ -1046,6 +1046,8 @@ typedef void (*usb_complete_t)(struct urb *);
  *	the device driver is saying that it provided this DMA address,
  *	which the host controller driver should use in preference to the
  *	transfer_buffer.
+ * @sg: scatter gather buffer list
+ * @num_sgs: number of entries in the sg list
  * @transfer_buffer_length: How big is transfer_buffer.  The transfer may
  *	be broken up into chunks according to the current maximum packet
  *	size for the endpoint, which is a function of the configuration
-- 
cgit v1.2.3


From 440c1ce178d6a6743e02d136a55b2de3f6d62a20 Mon Sep 17 00:00:00 2001
From: Huang Weiyi <weiyi.huang@gmail.com>
Date: Fri, 10 Jul 2009 15:33:49 +0000
Subject: dropmon: remove duplicated #include

Remove duplicated #include('s) in
  include/linux/net_dropmon.h

Signed-off-by: Huang Weiyi <weiyi.huang@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net_dropmon.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/net_dropmon.h b/include/linux/net_dropmon.h
index 3ceb0cc1bc78..2a739462caeb 100644
--- a/include/linux/net_dropmon.h
+++ b/include/linux/net_dropmon.h
@@ -3,7 +3,6 @@
 
 #include <linux/types.h>
 #include <linux/netlink.h>
-#include <linux/types.h>
 
 struct net_dm_drop_point {
 	__u8 pc[8];
-- 
cgit v1.2.3


From d0b6e04a4cd8360e3c9c419f7c30a3081a0c142a Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 13 Jul 2009 10:33:21 +0800
Subject: tracing/events: Move TRACE_SYSTEM outside of include guard

If TRACE_INCLDUE_FILE is defined, <trace/events/TRACE_INCLUDE_FILE.h>
will be included and compiled, otherwise it will be
<trace/events/TRACE_SYSTEM.h>

So TRACE_SYSTEM should be defined outside of #if proctection,
just like TRACE_INCLUDE_FILE.

Imaging this scenario:

 #include <trace/events/foo.h>
    -> TRACE_SYSTEM == foo
 ...
 #include <trace/events/bar.h>
    -> TRACE_SYSTEM == bar
 ...
 #define CREATE_TRACE_POINTS
 #include <trace/events/foo.h>
    -> TRACE_SYSTEM == bar !!!

and then bar.h will be included and compiled.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A5A9CF1.2010007@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/gfs2/trace_gfs2.h                       |  8 +++----
 include/trace/events/block.h               |  6 ++---
 include/trace/events/ext4.h                |  6 ++---
 include/trace/events/irq.h                 |  6 ++---
 include/trace/events/jbd2.h                |  6 ++---
 include/trace/events/kmem.h                |  6 ++---
 include/trace/events/lockdep.h             |  6 ++---
 include/trace/events/sched.h               |  6 ++---
 include/trace/events/skb.h                 |  6 ++---
 include/trace/events/workqueue.h           |  6 ++---
 samples/trace_events/trace-events-sample.h | 37 ++++++++++++++++--------------
 11 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
index 98d6ef1c1dc0..148d55c14171 100644
--- a/fs/gfs2/trace_gfs2.h
+++ b/fs/gfs2/trace_gfs2.h
@@ -1,12 +1,11 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+
 #if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_GFS2_H
 
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM gfs2
-#define TRACE_INCLUDE_FILE trace_gfs2
-
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/dlmconstants.h>
@@ -403,5 +402,6 @@ TRACE_EVENT(gfs2_block_alloc,
 /* This part must be outside protection */
 #undef TRACE_INCLUDE_PATH
 #define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_gfs2
 #include <trace/define_trace.h>
 
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d6b05f42dd44..9a74b468a229 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -1,3 +1,6 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
 #if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_BLOCK_H
 
@@ -5,9 +8,6 @@
 #include <linux/blkdev.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM block
-
 TRACE_EVENT(block_rq_abort,
 
 	TP_PROTO(struct request_queue *q, struct request *rq),
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index acf4cc9cd36d..8eb82cc8d149 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -1,9 +1,9 @@
-#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_EXT4_H
-
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM ext4
 
+#if !defined(_TRACE_EXT4_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXT4_H
+
 #include <linux/writeback.h>
 #include "../../../fs/ext4/ext4.h"
 #include "../../../fs/ext4/mballoc.h"
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index b0c7ede55eb1..1cb0c3aa11e6 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
 #if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_IRQ_H
 
 #include <linux/tracepoint.h>
 #include <linux/interrupt.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
 #define show_softirq_name(val)			\
 	__print_symbolic(val,			\
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 845b0b4b48fd..10813fa0c8d0 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM jbd2
+
 #if !defined(_TRACE_JBD2_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_JBD2_H
 
 #include <linux/jbd2.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM jbd2
-
 TRACE_EVENT(jbd2_checkpoint,
 
 	TP_PROTO(journal_t *journal, int result),
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 9baba50d6512..1493c541f9c4 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
 #if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_KMEM_H
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kmem
-
 /*
  * The order of these masks is important. Matching masks will be seen
  * first and the left over flags will end up showing by themselves.
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
index 0e956c9dfd7e..bcf1d209a00d 100644
--- a/include/trace/events/lockdep.h
+++ b/include/trace/events/lockdep.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
 #if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_LOCKDEP_H
 
 #include <linux/lockdep.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lockdep
-
 #ifdef CONFIG_LOCKDEP
 
 TRACE_EVENT(lock_acquire,
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 24ab5bcff7b2..8949bb7eb082 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sched
+
 #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SCHED_H
 
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM sched
-
 /*
  * Tracepoint for calling kthread_stop, performed to end a kthread:
  */
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index 1e8fabb57c06..e499863b9669 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -1,12 +1,12 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
 #if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_SKB_H
 
 #include <linux/skbuff.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM skb
-
 /*
  * Tracepoint for free an sk_buff:
  */
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index 035f1bff288e..fcfd9a1e4b96 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -1,3 +1,6 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
 #if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_WORKQUEUE_H
 
@@ -5,9 +8,6 @@
 #include <linux/sched.h>
 #include <linux/tracepoint.h>
 
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM workqueue
-
 TRACE_EVENT(workqueue_insertion,
 
 	TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 9977a756fb32..f24ae370e514 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -1,20 +1,3 @@
-/*
- * Notice that this file is not protected like a normal header.
- * We also must allow for rereading of this file. The
- *
- *  || defined(TRACE_HEADER_MULTI_READ)
- *
- * serves this purpose.
- */
-#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_EVENT_SAMPLE_H
-
-/*
- * All trace headers should include tracepoint.h, until we finally
- * make it into a standard header.
- */
-#include <linux/tracepoint.h>
-
 /*
  * If TRACE_SYSTEM is defined, that will be the directory created
  * in the ftrace directory under /debugfs/tracing/events/<system>
@@ -34,10 +17,30 @@
  * #define TRACE_INCLUDE_FILE trace-events-sample
  *
  * As we do an the bottom of this file.
+ *
+ * Notice that TRACE_SYSTEM should be defined outside of #if
+ * protection, just like TRACE_INCLUDE_FILE.
  */
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sample
 
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ *  || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
 /*
  * The TRACE_EVENT macro is broken up into 5 parts.
  *
-- 
cgit v1.2.3


From 113adefc73c291f93f875fe515a46d8f76252fff Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 12:50:12 +0800
Subject: crypto: shash - Make descsize a run-time attribute

This patch changes descsize to a run-time attribute so that
implementations can change it in their init functions.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 39 ++++++++++++++++++++++++++++-----------
 include/crypto/hash.h          |  3 ++-
 include/crypto/internal/hash.h |  2 +-
 3 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index 131e14d2b572..d8e0f8f8d672 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -300,14 +300,16 @@ static int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 static int shash_compat_setkey(struct crypto_hash *tfm, const u8 *key,
 			       unsigned int keylen)
 {
-	struct shash_desc *desc = crypto_hash_ctx(tfm);
+	struct shash_desc **descp = crypto_hash_ctx(tfm);
+	struct shash_desc *desc = *descp;
 
 	return crypto_shash_setkey(desc->tfm, key, keylen);
 }
 
 static int shash_compat_init(struct hash_desc *hdesc)
 {
-	struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
+	struct shash_desc *desc = *descp;
 
 	desc->flags = hdesc->flags;
 
@@ -317,7 +319,8 @@ static int shash_compat_init(struct hash_desc *hdesc)
 static int shash_compat_update(struct hash_desc *hdesc, struct scatterlist *sg,
 			       unsigned int len)
 {
-	struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
+	struct shash_desc *desc = *descp;
 	struct crypto_hash_walk walk;
 	int nbytes;
 
@@ -330,7 +333,9 @@ static int shash_compat_update(struct hash_desc *hdesc, struct scatterlist *sg,
 
 static int shash_compat_final(struct hash_desc *hdesc, u8 *out)
 {
-	return crypto_shash_final(crypto_hash_ctx(hdesc->tfm), out);
+	struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
+
+	return crypto_shash_final(*descp, out);
 }
 
 static int shash_compat_digest(struct hash_desc *hdesc, struct scatterlist *sg,
@@ -340,7 +345,8 @@ static int shash_compat_digest(struct hash_desc *hdesc, struct scatterlist *sg,
 	int err;
 
 	if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) {
-		struct shash_desc *desc = crypto_hash_ctx(hdesc->tfm);
+		struct shash_desc **descp = crypto_hash_ctx(hdesc->tfm);
+		struct shash_desc *desc = *descp;
 		void *data;
 
 		desc->flags = hdesc->flags;
@@ -368,9 +374,11 @@ out:
 
 static void crypto_exit_shash_ops_compat(struct crypto_tfm *tfm)
 {
-	struct shash_desc *desc= crypto_tfm_ctx(tfm);
+	struct shash_desc **descp = crypto_tfm_ctx(tfm);
+	struct shash_desc *desc = *descp;
 
 	crypto_free_shash(desc->tfm);
+	kzfree(desc);
 }
 
 static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm)
@@ -378,8 +386,9 @@ static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm)
 	struct hash_tfm *crt = &tfm->crt_hash;
 	struct crypto_alg *calg = tfm->__crt_alg;
 	struct shash_alg *alg = __crypto_shash_alg(calg);
-	struct shash_desc *desc = crypto_tfm_ctx(tfm);
+	struct shash_desc **descp = crypto_tfm_ctx(tfm);
 	struct crypto_shash *shash;
+	struct shash_desc *desc;
 
 	if (!crypto_mod_get(calg))
 		return -EAGAIN;
@@ -390,6 +399,14 @@ static int crypto_init_shash_ops_compat(struct crypto_tfm *tfm)
 		return PTR_ERR(shash);
 	}
 
+	desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(shash),
+		       GFP_KERNEL);
+	if (!desc) {
+		crypto_free_shash(shash);
+		return -ENOMEM;
+	}
+
+	*descp = desc;
 	desc->tfm = shash;
 	tfm->exit = crypto_exit_shash_ops_compat;
 
@@ -419,11 +436,9 @@ static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
 					 u32 mask)
 {
-	struct shash_alg *salg = __crypto_shash_alg(alg);
-
 	switch (mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_HASH_MASK:
-		return sizeof(struct shash_desc) + salg->descsize;
+		return sizeof(struct shash_desc *);
 	case CRYPTO_ALG_TYPE_AHASH_MASK:
 		return sizeof(struct crypto_shash *);
 	}
@@ -434,6 +449,9 @@ static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
 static int crypto_shash_init_tfm(struct crypto_tfm *tfm,
 				 const struct crypto_type *frontend)
 {
+	struct crypto_shash *hash = __crypto_shash_cast(tfm);
+
+	hash->descsize = crypto_shash_alg(hash)->descsize;
 	return 0;
 }
 
@@ -452,7 +470,6 @@ static void crypto_shash_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_printf(m, "type         : shash\n");
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
 	seq_printf(m, "digestsize   : %u\n", salg->digestsize);
-	seq_printf(m, "descsize     : %u\n", salg->descsize);
 }
 
 static const struct crypto_type crypto_shash_type = {
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index f74214a4b01b..fcc02d978231 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -48,6 +48,7 @@ struct crypto_ahash {
 };
 
 struct crypto_shash {
+	unsigned int descsize;
 	struct crypto_tfm base;
 };
 
@@ -275,7 +276,7 @@ static inline void crypto_shash_clear_flags(struct crypto_shash *tfm, u32 flags)
 
 static inline unsigned int crypto_shash_descsize(struct crypto_shash *tfm)
 {
-	return crypto_shash_alg(tfm)->descsize;
+	return tfm->descsize;
 }
 
 static inline void *shash_desc_ctx(struct shash_desc *desc)
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 069b93e1a8ec..2d1b10f23c91 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -135,7 +135,7 @@ static inline void *crypto_shash_ctx_aligned(struct crypto_shash *tfm)
 
 static inline struct crypto_shash *__crypto_shash_cast(struct crypto_tfm *tfm)
 {
-	return (struct crypto_shash *)tfm;
+	return container_of(tfm, struct crypto_shash, base);
 }
 
 #endif	/* _CRYPTO_INTERNAL_HASH_H */
-- 
cgit v1.2.3


From 7eddf95ec5440d60f10963f453e27f82f394044e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 12 Jul 2009 21:25:20 +0800
Subject: crypto: shash - Export async functions

This patch exports the async functions so that they can be reused
by cryptd when it switches over to using shash.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 42 ++++++++++++++++++++++--------------------
 include/crypto/internal/hash.h |  3 +++
 2 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/crypto/shash.c b/crypto/shash.c
index d8e0f8f8d672..3b1b06b3dcf3 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -202,9 +202,8 @@ static int shash_async_init(struct ahash_request *req)
 	return crypto_shash_init(desc);
 }
 
-static int shash_async_update(struct ahash_request *req)
+int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc)
 {
-	struct shash_desc *desc = ahash_request_ctx(req);
 	struct crypto_hash_walk walk;
 	int nbytes;
 
@@ -214,13 +213,19 @@ static int shash_async_update(struct ahash_request *req)
 
 	return nbytes;
 }
+EXPORT_SYMBOL_GPL(shash_ahash_update);
+
+static int shash_async_update(struct ahash_request *req)
+{
+	return shash_ahash_update(req, ahash_request_ctx(req));
+}
 
 static int shash_async_final(struct ahash_request *req)
 {
 	return crypto_shash_final(ahash_request_ctx(req), req->result);
 }
 
-static int shash_async_digest(struct ahash_request *req)
+int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc)
 {
 	struct scatterlist *sg = req->src;
 	unsigned int offset = sg->offset;
@@ -228,34 +233,31 @@ static int shash_async_digest(struct ahash_request *req)
 	int err;
 
 	if (nbytes < min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset)) {
-		struct crypto_shash **ctx =
-			crypto_ahash_ctx(crypto_ahash_reqtfm(req));
-		struct shash_desc *desc = ahash_request_ctx(req);
 		void *data;
 
-		desc->tfm = *ctx;
-		desc->flags = req->base.flags;
-
 		data = crypto_kmap(sg_page(sg), 0);
 		err = crypto_shash_digest(desc, data + offset, nbytes,
 					  req->result);
 		crypto_kunmap(data, 0);
 		crypto_yield(desc->flags);
-		goto out;
-	}
+	} else
+		err = crypto_shash_init(desc) ?:
+		      shash_ahash_update(req, desc) ?:
+		      crypto_shash_final(desc, req->result);
 
-	err = shash_async_init(req);
-	if (err)
-		goto out;
+	return err;
+}
+EXPORT_SYMBOL_GPL(shash_ahash_digest);
 
-	err = shash_async_update(req);
-	if (err)
-		goto out;
+static int shash_async_digest(struct ahash_request *req)
+{
+	struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct shash_desc *desc = ahash_request_ctx(req);
 
-	err = shash_async_final(req);
+	desc->tfm = *ctx;
+	desc->flags = req->base.flags;
 
-out:
-	return err;
+	return shash_ahash_digest(req, desc);
 }
 
 static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 2d1b10f23c91..6e283495374e 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -63,6 +63,9 @@ int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
 
 struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
+int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
+int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
 	return crypto_tfm_ctx(&tfm->base);
-- 
cgit v1.2.3


From fc00127fb67b2a7d2b66f0f4096a5367b581f045 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 12 Jul 2009 23:05:48 +0800
Subject: crypto: ahash - Add crypto_ahash_set_reqsize

This patch adds the helper crypto_ahash_set_reqsize so that
implementations do not directly access the crypto_ahash structure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 6e283495374e..5e45818f3351 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -77,6 +77,12 @@ static inline struct ahash_alg *crypto_ahash_alg(
 	return &crypto_ahash_tfm(tfm)->__crt_alg->cra_ahash;
 }
 
+static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm,
+					    unsigned int reqsize)
+{
+	crypto_ahash_crt(tfm)->reqsize = reqsize;
+}
+
 static inline int ahash_enqueue_request(struct crypto_queue *queue,
 					     struct ahash_request *request)
 {
-- 
cgit v1.2.3


From 2ca33da1dea3ba53d1425226a6bac073c5e8568c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 13 Jul 2009 20:46:25 +0800
Subject: crypto: api - Remove frontend argument from extsize/init_tfm

As the extsize and init_tfm functions belong to the frontend the
frontend argument is superfluous.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/api.c            | 4 ++--
 crypto/pcompress.c      | 6 ++----
 crypto/shash.c          | 6 ++----
 include/crypto/algapi.h | 6 ++----
 4 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/crypto/api.c b/crypto/api.c
index c8ac18f7ac1e..798526d90538 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -457,7 +457,7 @@ void *crypto_create_tfm(struct crypto_alg *alg,
 	int err = -ENOMEM;
 
 	tfmsize = frontend->tfmsize;
-	total = tfmsize + sizeof(*tfm) + frontend->extsize(alg, frontend);
+	total = tfmsize + sizeof(*tfm) + frontend->extsize(alg);
 
 	mem = kzalloc(total, GFP_KERNEL);
 	if (mem == NULL)
@@ -466,7 +466,7 @@ void *crypto_create_tfm(struct crypto_alg *alg,
 	tfm = (struct crypto_tfm *)(mem + tfmsize);
 	tfm->__crt_alg = alg;
 
-	err = frontend->init_tfm(tfm, frontend);
+	err = frontend->init_tfm(tfm);
 	if (err)
 		goto out_free_tfm;
 
diff --git a/crypto/pcompress.c b/crypto/pcompress.c
index bcadc03726b7..f7c4a7d7412e 100644
--- a/crypto/pcompress.c
+++ b/crypto/pcompress.c
@@ -36,14 +36,12 @@ static int crypto_pcomp_init(struct crypto_tfm *tfm, u32 type, u32 mask)
 	return 0;
 }
 
-static unsigned int crypto_pcomp_extsize(struct crypto_alg *alg,
-					 const struct crypto_type *frontend)
+static unsigned int crypto_pcomp_extsize(struct crypto_alg *alg)
 {
 	return alg->cra_ctxsize;
 }
 
-static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm,
-				 const struct crypto_type *frontend)
+static int crypto_pcomp_init_tfm(struct crypto_tfm *tfm)
 {
 	return 0;
 }
diff --git a/crypto/shash.c b/crypto/shash.c
index 3b1b06b3dcf3..7063e1421504 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -448,8 +448,7 @@ static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
 	return 0;
 }
 
-static int crypto_shash_init_tfm(struct crypto_tfm *tfm,
-				 const struct crypto_type *frontend)
+static int crypto_shash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
 
@@ -457,8 +456,7 @@ static int crypto_shash_init_tfm(struct crypto_tfm *tfm,
 	return 0;
 }
 
-static unsigned int crypto_shash_extsize(struct crypto_alg *alg,
-					 const struct crypto_type *frontend)
+static unsigned int crypto_shash_extsize(struct crypto_alg *alg)
 {
 	return alg->cra_ctxsize;
 }
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 1d15a926041e..7635fde7b1a2 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -22,11 +22,9 @@ struct seq_file;
 
 struct crypto_type {
 	unsigned int (*ctxsize)(struct crypto_alg *alg, u32 type, u32 mask);
-	unsigned int (*extsize)(struct crypto_alg *alg,
-				const struct crypto_type *frontend);
+	unsigned int (*extsize)(struct crypto_alg *alg);
 	int (*init)(struct crypto_tfm *tfm, u32 type, u32 mask);
-	int (*init_tfm)(struct crypto_tfm *tfm,
-		        const struct crypto_type *frontend);
+	int (*init_tfm)(struct crypto_tfm *tfm);
 	void (*show)(struct seq_file *m, struct crypto_alg *alg);
 	struct crypto_alg *(*lookup)(const char *name, u32 type, u32 mask);
 
-- 
cgit v1.2.3


From 88056ec346ccf41f63dbc7080b24b5fd19d1358d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 12:28:26 +0800
Subject: crypto: ahash - Convert to new style algorithms

This patch converts crypto_ahash to the new style.  The old ahash
algorithm type is retained until the existing ahash implementations
are also converted.  All ahash users will automatically get the
new crypto_ahash type.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 |  82 +++++++++++++++++++++----------
 crypto/shash.c                 |   8 +--
 include/crypto/hash.h          | 109 +++++++++++++++++++++++++++++------------
 include/crypto/internal/hash.h |  11 +++--
 include/linux/crypto.h         |  29 ++---------
 5 files changed, 148 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index f3476374f764..838519386215 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -24,6 +24,12 @@
 
 #include "internal.h"
 
+static inline struct ahash_alg *crypto_ahash_alg(struct crypto_ahash *hash)
+{
+	return container_of(crypto_hash_alg_common(hash), struct ahash_alg,
+			    halg);
+}
+
 static int hash_walk_next(struct crypto_hash_walk *walk)
 {
 	unsigned int alignmask = walk->alignmask;
@@ -169,30 +175,11 @@ static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key,
 	return -ENOSYS;
 }
 
-int crypto_ahash_import(struct ahash_request *req, const u8 *in)
-{
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ahash_alg *alg = crypto_ahash_alg(tfm);
-
-	memcpy(ahash_request_ctx(req), in, crypto_ahash_reqsize(tfm));
-
-	if (alg->reinit)
-		alg->reinit(req);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_ahash_import);
-
-static unsigned int crypto_ahash_ctxsize(struct crypto_alg *alg, u32 type,
-					u32 mask)
-{
-	return alg->cra_ctxsize;
-}
-
 static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 {
-	struct ahash_alg *alg = &tfm->__crt_alg->cra_ahash;
-	struct ahash_tfm *crt   = &tfm->crt_ahash;
+	struct old_ahash_alg *alg = &tfm->__crt_alg->cra_ahash;
+	struct crypto_ahash *crt = __crypto_ahash_cast(tfm);
+	struct ahash_alg *nalg = crypto_ahash_alg(crt);
 
 	if (alg->digestsize > PAGE_SIZE / 8)
 		return -EINVAL;
@@ -204,9 +191,42 @@ static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 	crt->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey;
 	crt->digestsize = alg->digestsize;
 
+	nalg->setkey = alg->setkey;
+	nalg->halg.digestsize = alg->digestsize;
+
 	return 0;
 }
 
+static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
+	struct ahash_alg *alg = crypto_ahash_alg(hash);
+	struct old_ahash_alg *oalg = crypto_old_ahash_alg(hash);
+
+	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
+		return crypto_init_shash_ops_async(tfm);
+
+	if (oalg->init)
+		return crypto_init_ahash_ops(tfm, 0, 0);
+
+	hash->init = alg->init;
+	hash->update = alg->update;
+	hash->final  = alg->final;
+	hash->digest = alg->digest;
+	hash->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey;
+	hash->digestsize = alg->halg.digestsize;
+
+	return 0;
+}
+
+static unsigned int crypto_ahash_extsize(struct crypto_alg *alg)
+{
+	if (alg->cra_type == &crypto_ahash_type)
+		return alg->cra_ctxsize;
+
+	return sizeof(struct crypto_shash *);
+}
+
 static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
 	__attribute__ ((unused));
 static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
@@ -215,17 +235,29 @@ static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg)
 	seq_printf(m, "async        : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ?
 					     "yes" : "no");
 	seq_printf(m, "blocksize    : %u\n", alg->cra_blocksize);
-	seq_printf(m, "digestsize   : %u\n", alg->cra_ahash.digestsize);
+	seq_printf(m, "digestsize   : %u\n",
+		   __crypto_hash_alg_common(alg)->digestsize);
 }
 
 const struct crypto_type crypto_ahash_type = {
-	.ctxsize = crypto_ahash_ctxsize,
-	.init = crypto_init_ahash_ops,
+	.extsize = crypto_ahash_extsize,
+	.init_tfm = crypto_ahash_init_tfm,
 #ifdef CONFIG_PROC_FS
 	.show = crypto_ahash_show,
 #endif
+	.maskclear = ~CRYPTO_ALG_TYPE_MASK,
+	.maskset = CRYPTO_ALG_TYPE_AHASH_MASK,
+	.type = CRYPTO_ALG_TYPE_AHASH,
+	.tfmsize = offsetof(struct crypto_ahash, base),
 };
 EXPORT_SYMBOL_GPL(crypto_ahash_type);
 
+struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
+					u32 mask)
+{
+	return crypto_alloc_tfm(alg_name, &crypto_ahash_type, type, mask);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_ahash);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
diff --git a/crypto/shash.c b/crypto/shash.c
index 7063e1421504..615a5f4d9b7a 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -267,11 +267,11 @@ static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
 	crypto_free_shash(*ctx);
 }
 
-static int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
+int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *calg = tfm->__crt_alg;
 	struct shash_alg *alg = __crypto_shash_alg(calg);
-	struct ahash_tfm *crt = &tfm->crt_ahash;
+	struct crypto_ahash *crt = __crypto_ahash_cast(tfm);
 	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
 	struct crypto_shash *shash;
 
@@ -428,8 +428,6 @@ static int crypto_init_shash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
 	switch (mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_HASH_MASK:
 		return crypto_init_shash_ops_compat(tfm);
-	case CRYPTO_ALG_TYPE_AHASH_MASK:
-		return crypto_init_shash_ops_async(tfm);
 	}
 
 	return -EINVAL;
@@ -441,8 +439,6 @@ static unsigned int crypto_shash_ctxsize(struct crypto_alg *alg, u32 type,
 	switch (mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_HASH_MASK:
 		return sizeof(struct shash_desc *);
-	case CRYPTO_ALG_TYPE_AHASH_MASK:
-		return sizeof(struct crypto_shash *);
 	}
 
 	return 0;
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index fcc02d978231..262861d8f0cb 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -15,6 +15,39 @@
 
 #include <linux/crypto.h>
 
+struct crypto_ahash;
+
+struct hash_alg_common {
+	unsigned int digestsize;
+	unsigned int statesize;
+
+	struct crypto_alg base;
+};
+
+struct ahash_request {
+	struct crypto_async_request base;
+
+	unsigned int nbytes;
+	struct scatterlist *src;
+	u8 *result;
+
+	void *__ctx[] CRYPTO_MINALIGN_ATTR;
+};
+
+struct ahash_alg {
+	int (*init)(struct ahash_request *req);
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+	int (*digest)(struct ahash_request *req);
+	int (*export)(struct ahash_request *req, void *out);
+	int (*import)(struct ahash_request *req, const void *in);
+	int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
+		      unsigned int keylen);
+
+	struct hash_alg_common halg;
+};
+
 struct shash_desc {
 	struct crypto_shash *tfm;
 	u32 flags;
@@ -37,6 +70,8 @@ struct shash_alg {
 		      unsigned int keylen);
 
 	unsigned int descsize;
+
+	/* These fields must match hash_alg_common. */
 	unsigned int digestsize;
 	unsigned int statesize;
 
@@ -44,6 +79,18 @@ struct shash_alg {
 };
 
 struct crypto_ahash {
+	int (*init)(struct ahash_request *req);
+	int (*update)(struct ahash_request *req);
+	int (*final)(struct ahash_request *req);
+	int (*finup)(struct ahash_request *req);
+	int (*digest)(struct ahash_request *req);
+	int (*export)(struct ahash_request *req, void *out);
+	int (*import)(struct ahash_request *req, const void *in);
+	int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
+		      unsigned int keylen);
+
+	unsigned int digestsize;
+	unsigned int reqsize;
 	struct crypto_tfm base;
 };
 
@@ -54,19 +101,11 @@ struct crypto_shash {
 
 static inline struct crypto_ahash *__crypto_ahash_cast(struct crypto_tfm *tfm)
 {
-	return (struct crypto_ahash *)tfm;
+	return container_of(tfm, struct crypto_ahash, base);
 }
 
-static inline struct crypto_ahash *crypto_alloc_ahash(const char *alg_name,
-						      u32 type, u32 mask)
-{
-	type &= ~CRYPTO_ALG_TYPE_MASK;
-	mask &= ~CRYPTO_ALG_TYPE_MASK;
-	type |= CRYPTO_ALG_TYPE_AHASH;
-	mask |= CRYPTO_ALG_TYPE_AHASH_MASK;
-
-	return __crypto_ahash_cast(crypto_alloc_base(alg_name, type, mask));
-}
+struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
+					u32 mask);
 
 static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm)
 {
@@ -75,7 +114,7 @@ static inline struct crypto_tfm *crypto_ahash_tfm(struct crypto_ahash *tfm)
 
 static inline void crypto_free_ahash(struct crypto_ahash *tfm)
 {
-	crypto_free_tfm(crypto_ahash_tfm(tfm));
+	crypto_destroy_tfm(tfm, crypto_ahash_tfm(tfm));
 }
 
 static inline unsigned int crypto_ahash_alignmask(
@@ -84,14 +123,26 @@ static inline unsigned int crypto_ahash_alignmask(
 	return crypto_tfm_alg_alignmask(crypto_ahash_tfm(tfm));
 }
 
-static inline struct ahash_tfm *crypto_ahash_crt(struct crypto_ahash *tfm)
+static inline struct hash_alg_common *__crypto_hash_alg_common(
+	struct crypto_alg *alg)
 {
-	return &crypto_ahash_tfm(tfm)->crt_ahash;
+	return container_of(alg, struct hash_alg_common, base);
+}
+
+static inline struct hash_alg_common *crypto_hash_alg_common(
+	struct crypto_ahash *tfm)
+{
+	return __crypto_hash_alg_common(crypto_ahash_tfm(tfm)->__crt_alg);
 }
 
 static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm)
 {
-	return crypto_ahash_crt(tfm)->digestsize;
+	return tfm->digestsize;
+}
+
+static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm)
+{
+	return crypto_hash_alg_common(tfm)->statesize;
 }
 
 static inline u32 crypto_ahash_get_flags(struct crypto_ahash *tfm)
@@ -117,7 +168,7 @@ static inline struct crypto_ahash *crypto_ahash_reqtfm(
 
 static inline unsigned int crypto_ahash_reqsize(struct crypto_ahash *tfm)
 {
-	return crypto_ahash_crt(tfm)->reqsize;
+	return tfm->reqsize;
 }
 
 static inline void *ahash_request_ctx(struct ahash_request *req)
@@ -128,41 +179,37 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 static inline int crypto_ahash_setkey(struct crypto_ahash *tfm,
 				      const u8 *key, unsigned int keylen)
 {
-	struct ahash_tfm *crt = crypto_ahash_crt(tfm);
-
-	return crt->setkey(tfm, key, keylen);
+	return tfm->setkey(tfm, key, keylen);
 }
 
 static inline int crypto_ahash_digest(struct ahash_request *req)
 {
-	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
-	return crt->digest(req);
+	return crypto_ahash_reqtfm(req)->digest(req);
 }
 
-static inline void crypto_ahash_export(struct ahash_request *req, u8 *out)
+static inline int crypto_ahash_export(struct ahash_request *req, void *out)
 {
-	memcpy(out, ahash_request_ctx(req),
-	       crypto_ahash_reqsize(crypto_ahash_reqtfm(req)));
+	return crypto_ahash_reqtfm(req)->export(req, out);
 }
 
-int crypto_ahash_import(struct ahash_request *req, const u8 *in);
+static inline int crypto_ahash_import(struct ahash_request *req, const void *in)
+{
+	return crypto_ahash_reqtfm(req)->import(req, in);
+}
 
 static inline int crypto_ahash_init(struct ahash_request *req)
 {
-	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
-	return crt->init(req);
+	return crypto_ahash_reqtfm(req)->init(req);
 }
 
 static inline int crypto_ahash_update(struct ahash_request *req)
 {
-	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
-	return crt->update(req);
+	return crypto_ahash_reqtfm(req)->update(req);
 }
 
 static inline int crypto_ahash_final(struct ahash_request *req)
 {
-	struct ahash_tfm *crt = crypto_ahash_crt(crypto_ahash_reqtfm(req));
-	return crt->final(req);
+	return crypto_ahash_reqtfm(req)->final(req);
 }
 
 static inline void ahash_request_set_tfm(struct ahash_request *req,
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 5e45818f3351..08bdffafefad 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -51,6 +51,9 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
 				  struct crypto_hash_walk *walk,
 				  struct scatterlist *sg, unsigned int len);
 
+int crypto_register_ahash(struct ahash_alg *alg);
+int crypto_unregister_ahash(struct ahash_alg *alg);
+
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
 int shash_register_instance(struct crypto_template *tmpl,
@@ -66,12 +69,14 @@ struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
 
+int crypto_init_shash_ops_async(struct crypto_tfm *tfm);
+
 static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 {
-	return crypto_tfm_ctx(&tfm->base);
+	return crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 }
 
-static inline struct ahash_alg *crypto_ahash_alg(
+static inline struct old_ahash_alg *crypto_old_ahash_alg(
 	struct crypto_ahash *tfm)
 {
 	return &crypto_ahash_tfm(tfm)->__crt_alg->cra_ahash;
@@ -80,7 +85,7 @@ static inline struct ahash_alg *crypto_ahash_alg(
 static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm,
 					    unsigned int reqsize)
 {
-	crypto_ahash_crt(tfm)->reqsize = reqsize;
+	tfm->reqsize = reqsize;
 }
 
 static inline int ahash_enqueue_request(struct crypto_queue *queue,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 274f9c7da90c..9e7e9b62a3dc 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -120,6 +120,7 @@ struct crypto_rng;
 struct crypto_tfm;
 struct crypto_type;
 struct aead_givcrypt_request;
+struct ahash_request;
 struct skcipher_givcrypt_request;
 
 typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err);
@@ -146,16 +147,6 @@ struct ablkcipher_request {
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
-struct ahash_request {
-	struct crypto_async_request base;
-
-	unsigned int nbytes;
-	struct scatterlist *src;
-	u8		   *result;
-
-	void *__ctx[] CRYPTO_MINALIGN_ATTR;
-};
-
 /**
  *	struct aead_request - AEAD request
  *	@base: Common attributes for async crypto requests
@@ -220,7 +211,7 @@ struct ablkcipher_alg {
 	unsigned int ivsize;
 };
 
-struct ahash_alg {
+struct old_ahash_alg {
 	int (*init)(struct ahash_request *req);
 	int (*reinit)(struct ahash_request *req);
 	int (*update)(struct ahash_request *req);
@@ -346,7 +337,7 @@ struct crypto_alg {
 		struct cipher_alg cipher;
 		struct digest_alg digest;
 		struct hash_alg hash;
-		struct ahash_alg ahash;
+		struct old_ahash_alg ahash;
 		struct compress_alg compress;
 		struct rng_alg rng;
 	} cra_u;
@@ -433,18 +424,6 @@ struct hash_tfm {
 	unsigned int digestsize;
 };
 
-struct ahash_tfm {
-	int (*init)(struct ahash_request *req);
-	int (*update)(struct ahash_request *req);
-	int (*final)(struct ahash_request *req);
-	int (*digest)(struct ahash_request *req);
-	int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
-			unsigned int keylen);
-
-	unsigned int digestsize;
-	unsigned int reqsize;
-};
-
 struct compress_tfm {
 	int (*cot_compress)(struct crypto_tfm *tfm,
 	                    const u8 *src, unsigned int slen,
@@ -465,7 +444,6 @@ struct rng_tfm {
 #define crt_blkcipher	crt_u.blkcipher
 #define crt_cipher	crt_u.cipher
 #define crt_hash	crt_u.hash
-#define crt_ahash	crt_u.ahash
 #define crt_compress	crt_u.compress
 #define crt_rng		crt_u.rng
 
@@ -479,7 +457,6 @@ struct crypto_tfm {
 		struct blkcipher_tfm blkcipher;
 		struct cipher_tfm cipher;
 		struct hash_tfm hash;
-		struct ahash_tfm ahash;
 		struct compress_tfm compress;
 		struct rng_tfm rng;
 	} crt_u;
-- 
cgit v1.2.3


From 01c2dece4316dadc0f9fad1ad0b56d493980e492 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 14:06:06 +0800
Subject: crypto: ahash - Add instance/spawn support

This patch adds support for creating ahash instances and using
ahash as spawns.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 72 ++++++++++++++++++++++++++++++++++++++++++
 include/crypto/internal/hash.h | 51 ++++++++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index 838519386215..7f599d26086a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -259,5 +259,77 @@ struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type,
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_ahash);
 
+static int ahash_prepare_alg(struct ahash_alg *alg)
+{
+	struct crypto_alg *base = &alg->halg.base;
+
+	if (alg->halg.digestsize > PAGE_SIZE / 8 ||
+	    alg->halg.statesize > PAGE_SIZE / 8)
+		return -EINVAL;
+
+	base->cra_type = &crypto_ahash_type;
+	base->cra_flags &= ~CRYPTO_ALG_TYPE_MASK;
+	base->cra_flags |= CRYPTO_ALG_TYPE_AHASH;
+
+	return 0;
+}
+
+int crypto_register_ahash(struct ahash_alg *alg)
+{
+	struct crypto_alg *base = &alg->halg.base;
+	int err;
+
+	err = ahash_prepare_alg(alg);
+	if (err)
+		return err;
+
+	return crypto_register_alg(base);
+}
+EXPORT_SYMBOL_GPL(crypto_register_ahash);
+
+int crypto_unregister_ahash(struct ahash_alg *alg)
+{
+	return crypto_unregister_alg(&alg->halg.base);
+}
+EXPORT_SYMBOL_GPL(crypto_unregister_ahash);
+
+int ahash_register_instance(struct crypto_template *tmpl,
+			    struct ahash_instance *inst)
+{
+	int err;
+
+	err = ahash_prepare_alg(&inst->alg);
+	if (err)
+		return err;
+
+	return crypto_register_instance(tmpl, ahash_crypto_instance(inst));
+}
+EXPORT_SYMBOL_GPL(ahash_register_instance);
+
+void ahash_free_instance(struct crypto_instance *inst)
+{
+	crypto_drop_spawn(crypto_instance_ctx(inst));
+	kfree(ahash_instance(inst));
+}
+EXPORT_SYMBOL_GPL(ahash_free_instance);
+
+int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
+			    struct hash_alg_common *alg,
+			    struct crypto_instance *inst)
+{
+	return crypto_init_spawn2(&spawn->base, &alg->base, inst,
+				  &crypto_ahash_type);
+}
+EXPORT_SYMBOL_GPL(crypto_init_ahash_spawn);
+
+struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask)
+{
+	struct crypto_alg *alg;
+
+	alg = crypto_attr_alg2(rta, &crypto_ahash_type, type, mask);
+	return IS_ERR(alg) ? ERR_CAST(alg) : __crypto_hash_alg_common(alg);
+}
+EXPORT_SYMBOL_GPL(ahash_attr_alg);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 08bdffafefad..34eda233ee67 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -34,10 +34,18 @@ struct crypto_hash_walk {
 	unsigned int flags;
 };
 
+struct ahash_instance {
+	struct ahash_alg alg;
+};
+
 struct shash_instance {
 	struct shash_alg alg;
 };
 
+struct crypto_ahash_spawn {
+	struct crypto_spawn base;
+};
+
 struct crypto_shash_spawn {
 	struct crypto_spawn base;
 };
@@ -53,6 +61,15 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
 
 int crypto_register_ahash(struct ahash_alg *alg);
 int crypto_unregister_ahash(struct ahash_alg *alg);
+int ahash_register_instance(struct crypto_template *tmpl,
+			    struct ahash_instance *inst);
+void ahash_free_instance(struct crypto_instance *inst);
+
+int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
+			    struct hash_alg_common *alg,
+			    struct crypto_instance *inst);
+
+struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
 int crypto_register_shash(struct shash_alg *alg);
 int crypto_unregister_shash(struct shash_alg *alg);
@@ -88,6 +105,40 @@ static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm,
 	tfm->reqsize = reqsize;
 }
 
+static inline struct crypto_instance *ahash_crypto_instance(
+	struct ahash_instance *inst)
+{
+	return container_of(&inst->alg.halg.base, struct crypto_instance, alg);
+}
+
+static inline struct ahash_instance *ahash_instance(
+	struct crypto_instance *inst)
+{
+	return container_of(&inst->alg, struct ahash_instance, alg.halg.base);
+}
+
+static inline void *ahash_instance_ctx(struct ahash_instance *inst)
+{
+	return crypto_instance_ctx(ahash_crypto_instance(inst));
+}
+
+static inline unsigned int ahash_instance_headroom(void)
+{
+	return sizeof(struct ahash_alg) - sizeof(struct crypto_alg);
+}
+
+static inline struct ahash_instance *ahash_alloc_instance(
+	const char *name, struct crypto_alg *alg)
+{
+	return crypto_alloc_instance2(name, alg, ahash_instance_headroom());
+}
+
+static inline struct crypto_ahash *crypto_spawn_ahash(
+	struct crypto_ahash_spawn *spawn)
+{
+	return crypto_spawn_tfm2(&spawn->base);
+}
+
 static inline int ahash_enqueue_request(struct crypto_queue *queue,
 					     struct ahash_request *request)
 {
-- 
cgit v1.2.3


From 52861c7cd711fac97b37ae0f4842a9ad26ecae72 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 18:30:24 +0800
Subject: crypto: hash - Add helpers to free spawns

This patch adds the helpers crypto_drop_ahash and crypto_drop_shash
so that these spawns can be dropped without ugly casts.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 34eda233ee67..9d30316e9f10 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -69,6 +69,11 @@ int crypto_init_ahash_spawn(struct crypto_ahash_spawn *spawn,
 			    struct hash_alg_common *alg,
 			    struct crypto_instance *inst);
 
+static inline void crypto_drop_ahash(struct crypto_ahash_spawn *spawn)
+{
+	crypto_drop_spawn(&spawn->base);
+}
+
 struct hash_alg_common *ahash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
 int crypto_register_shash(struct shash_alg *alg);
@@ -81,6 +86,11 @@ int crypto_init_shash_spawn(struct crypto_shash_spawn *spawn,
 			    struct shash_alg *alg,
 			    struct crypto_instance *inst);
 
+static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn)
+{
+	crypto_drop_spawn(&spawn->base);
+}
+
 struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
 int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
-- 
cgit v1.2.3


From 9cd899a32f611eb6328014f1d9e0ba31977812d9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 18:45:45 +0800
Subject: crypto: cryptd - Switch to template create API

This patch changes cryptd to use the template->create function
instead of alloc in anticipation for the switch to new style
ahash algorithms.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cryptd.c         | 53 +++++++++++++++++++++++++++----------------------
 crypto/internal.h       |  3 ---
 include/crypto/algapi.h |  3 +++
 3 files changed, 32 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 6e6722edd0d9..ad58f513ba8b 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -287,8 +287,9 @@ out_free_inst:
 	goto out;
 }
 
-static struct crypto_instance *cryptd_alloc_blkcipher(
-	struct rtattr **tb, struct cryptd_queue *queue)
+static int cryptd_create_blkcipher(struct crypto_template *tmpl,
+				   struct rtattr **tb,
+				   struct cryptd_queue *queue)
 {
 	struct cryptd_instance_ctx *ctx;
 	struct crypto_instance *inst;
@@ -298,7 +299,7 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
 	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER,
 				  CRYPTO_ALG_TYPE_MASK);
 	if (IS_ERR(alg))
-		return ERR_CAST(alg);
+		return PTR_ERR(alg);
 
 	inst = cryptd_alloc_instance(alg, sizeof(*ctx));
 	if (IS_ERR(inst))
@@ -330,14 +331,16 @@ static struct crypto_instance *cryptd_alloc_blkcipher(
 	inst->alg.cra_ablkcipher.encrypt = cryptd_blkcipher_encrypt_enqueue;
 	inst->alg.cra_ablkcipher.decrypt = cryptd_blkcipher_decrypt_enqueue;
 
+	err = crypto_register_instance(tmpl, inst);
+	if (err) {
+		crypto_drop_spawn(&ctx->spawn);
+out_free_inst:
+		kfree(inst);
+	}
+
 out_put_alg:
 	crypto_mod_put(alg);
-	return inst;
-
-out_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out_put_alg;
+	return err;
 }
 
 static int cryptd_hash_init_tfm(struct crypto_tfm *tfm)
@@ -502,8 +505,8 @@ static int cryptd_hash_digest_enqueue(struct ahash_request *req)
 	return cryptd_hash_enqueue(req, cryptd_hash_digest);
 }
 
-static struct crypto_instance *cryptd_alloc_hash(
-	struct rtattr **tb, struct cryptd_queue *queue)
+static int cryptd_create_hash(struct crypto_template *tmpl, struct rtattr **tb,
+			      struct cryptd_queue *queue)
 {
 	struct hashd_instance_ctx *ctx;
 	struct crypto_instance *inst;
@@ -513,7 +516,7 @@ static struct crypto_instance *cryptd_alloc_hash(
 
 	salg = shash_attr_alg(tb[1], 0, 0);
 	if (IS_ERR(salg))
-		return ERR_CAST(salg);
+		return PTR_ERR(salg);
 
 	alg = &salg->base;
 	inst = cryptd_alloc_instance(alg, sizeof(*ctx));
@@ -542,34 +545,36 @@ static struct crypto_instance *cryptd_alloc_hash(
 	inst->alg.cra_ahash.setkey = cryptd_hash_setkey;
 	inst->alg.cra_ahash.digest = cryptd_hash_digest_enqueue;
 
+	err = crypto_register_instance(tmpl, inst);
+	if (err) {
+		crypto_drop_shash(&ctx->spawn);
+out_free_inst:
+		kfree(inst);
+	}
+
 out_put_alg:
 	crypto_mod_put(alg);
-	return inst;
-
-out_free_inst:
-	kfree(inst);
-	inst = ERR_PTR(err);
-	goto out_put_alg;
+	return err;
 }
 
 static struct cryptd_queue queue;
 
-static struct crypto_instance *cryptd_alloc(struct rtattr **tb)
+static int cryptd_create(struct crypto_template *tmpl, struct rtattr **tb)
 {
 	struct crypto_attr_type *algt;
 
 	algt = crypto_get_attr_type(tb);
 	if (IS_ERR(algt))
-		return ERR_CAST(algt);
+		return PTR_ERR(algt);
 
 	switch (algt->type & algt->mask & CRYPTO_ALG_TYPE_MASK) {
 	case CRYPTO_ALG_TYPE_BLKCIPHER:
-		return cryptd_alloc_blkcipher(tb, &queue);
+		return cryptd_create_blkcipher(tmpl, tb, &queue);
 	case CRYPTO_ALG_TYPE_DIGEST:
-		return cryptd_alloc_hash(tb, &queue);
+		return cryptd_create_hash(tmpl, tb, &queue);
 	}
 
-	return ERR_PTR(-EINVAL);
+	return -EINVAL;
 }
 
 static void cryptd_free(struct crypto_instance *inst)
@@ -582,7 +587,7 @@ static void cryptd_free(struct crypto_instance *inst)
 
 static struct crypto_template cryptd_tmpl = {
 	.name = "cryptd",
-	.alloc = cryptd_alloc,
+	.create = cryptd_create,
 	.free = cryptd_free,
 	.module = THIS_MODULE,
 };
diff --git a/crypto/internal.h b/crypto/internal.h
index 030e22921c30..2d226362e594 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -97,9 +97,6 @@ struct crypto_alg *crypto_find_alg(const char *alg_name,
 void *crypto_alloc_tfm(const char *alg_name,
 		       const struct crypto_type *frontend, u32 type, u32 mask);
 
-int crypto_register_instance(struct crypto_template *tmpl,
-			     struct crypto_instance *inst);
-
 int crypto_register_notifier(struct notifier_block *nb);
 int crypto_unregister_notifier(struct notifier_block *nb);
 int crypto_probing_notify(unsigned long val, void *v);
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 7635fde7b1a2..9de6c38f4069 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -114,6 +114,9 @@ int crypto_register_template(struct crypto_template *tmpl);
 void crypto_unregister_template(struct crypto_template *tmpl);
 struct crypto_template *crypto_lookup_template(const char *name);
 
+int crypto_register_instance(struct crypto_template *tmpl,
+			     struct crypto_instance *inst);
+
 int crypto_init_spawn(struct crypto_spawn *spawn, struct crypto_alg *alg,
 		      struct crypto_instance *inst, u32 mask);
 int crypto_init_spawn2(struct crypto_spawn *spawn, struct crypto_alg *alg,
-- 
cgit v1.2.3


From 4dc10c0142ce0af8c20ec44dc6928ae63ad4f73a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 20:21:46 +0800
Subject: crypto: crypto4xx - Switch to new style ahash

This patch changes crypto4xx to use the new style ahash type.
In particular, we now use ahash_alg to define ahash algorithms
instead of crypto_alg.

This is achieved by introducing a union that encapsulates the
new type and the existing crypto_alg structure.  They're told
apart through a u32 field containing the type value.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/amcc/crypto4xx_core.c | 85 +++++++++++++++++++++---------------
 drivers/crypto/amcc/crypto4xx_core.h | 25 +++++++++--
 include/crypto/internal/hash.h       |  6 +++
 3 files changed, 77 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c
index cb7be4283aa0..857e35efedac 100644
--- a/drivers/crypto/amcc/crypto4xx_core.c
+++ b/drivers/crypto/amcc/crypto4xx_core.c
@@ -31,8 +31,6 @@
 #include <asm/dcr.h>
 #include <asm/dcr-regs.h>
 #include <asm/cacheflush.h>
-#include <crypto/internal/hash.h>
-#include <crypto/algapi.h>
 #include <crypto/aes.h>
 #include <crypto/sha.h>
 #include "crypto4xx_reg_def.h"
@@ -998,11 +996,15 @@ static int crypto4xx_alg_init(struct crypto_tfm *tfm)
 	ctx->sa_out_dma_addr = 0;
 	ctx->sa_len = 0;
 
-	if (alg->cra_type == &crypto_ablkcipher_type)
+	switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	default:
 		tfm->crt_ablkcipher.reqsize = sizeof(struct crypto4xx_ctx);
-	else if (alg->cra_type == &crypto_ahash_type)
+		break;
+	case CRYPTO_ALG_TYPE_AHASH:
 		crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 					 sizeof(struct crypto4xx_ctx));
+		break;
+	}
 
 	return 0;
 }
@@ -1016,7 +1018,8 @@ static void crypto4xx_alg_exit(struct crypto_tfm *tfm)
 }
 
 int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
-			   struct crypto_alg *crypto_alg, int array_size)
+			   struct crypto4xx_alg_common *crypto_alg,
+			   int array_size)
 {
 	struct crypto4xx_alg *alg;
 	int i;
@@ -1028,13 +1031,18 @@ int crypto4xx_register_alg(struct crypto4xx_device *sec_dev,
 			return -ENOMEM;
 
 		alg->alg = crypto_alg[i];
-		INIT_LIST_HEAD(&alg->alg.cra_list);
-		if (alg->alg.cra_init == NULL)
-			alg->alg.cra_init = crypto4xx_alg_init;
-		if (alg->alg.cra_exit == NULL)
-			alg->alg.cra_exit = crypto4xx_alg_exit;
 		alg->dev = sec_dev;
-		rc = crypto_register_alg(&alg->alg);
+
+		switch (alg->alg.type) {
+		case CRYPTO_ALG_TYPE_AHASH:
+			rc = crypto_register_ahash(&alg->alg.u.hash);
+			break;
+
+		default:
+			rc = crypto_register_alg(&alg->alg.u.cipher);
+			break;
+		}
+
 		if (rc) {
 			list_del(&alg->entry);
 			kfree(alg);
@@ -1052,7 +1060,14 @@ static void crypto4xx_unregister_alg(struct crypto4xx_device *sec_dev)
 
 	list_for_each_entry_safe(alg, tmp, &sec_dev->alg_list, entry) {
 		list_del(&alg->entry);
-		crypto_unregister_alg(&alg->alg);
+		switch (alg->alg.type) {
+		case CRYPTO_ALG_TYPE_AHASH:
+			crypto_unregister_ahash(&alg->alg.u.hash);
+			break;
+
+		default:
+			crypto_unregister_alg(&alg->alg.u.cipher);
+		}
 		kfree(alg);
 	}
 }
@@ -1105,17 +1120,18 @@ static irqreturn_t crypto4xx_ce_interrupt_handler(int irq, void *data)
 /**
  * Supported Crypto Algorithms
  */
-struct crypto_alg crypto4xx_alg[] = {
+struct crypto4xx_alg_common crypto4xx_alg[] = {
 	/* Crypto AES modes */
-	{
+	{ .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .u.cipher = {
 		.cra_name 	= "cbc(aes)",
 		.cra_driver_name = "cbc-aes-ppc4xx",
 		.cra_priority 	= CRYPTO4XX_CRYPTO_PRIORITY,
 		.cra_flags 	= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 		.cra_blocksize 	= AES_BLOCK_SIZE,
 		.cra_ctxsize 	= sizeof(struct crypto4xx_ctx),
-		.cra_alignmask 	= 0,
 		.cra_type 	= &crypto_ablkcipher_type,
+		.cra_init	= crypto4xx_alg_init,
+		.cra_exit	= crypto4xx_alg_exit,
 		.cra_module 	= THIS_MODULE,
 		.cra_u 		= {
 			.ablkcipher = {
@@ -1127,29 +1143,26 @@ struct crypto_alg crypto4xx_alg[] = {
 				.decrypt 	= crypto4xx_decrypt,
 			}
 		}
-	},
+	}},
 	/* Hash SHA1 */
-	{
-		.cra_name	= "sha1",
-		.cra_driver_name = "sha1-ppc4xx",
-		.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
-		.cra_flags	= CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC,
-		.cra_blocksize	= SHA1_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
-		.cra_alignmask	= 0,
-		.cra_type	= &crypto_ahash_type,
-		.cra_init	= crypto4xx_sha1_alg_init,
-		.cra_module	= THIS_MODULE,
-		.cra_u		= {
-			.ahash = {
-				.digestsize 	= SHA1_DIGEST_SIZE,
-				.init		= crypto4xx_hash_init,
-				.update		= crypto4xx_hash_update,
-				.final  	= crypto4xx_hash_final,
-				.digest 	= crypto4xx_hash_digest,
-			}
+	{ .type = CRYPTO_ALG_TYPE_AHASH, .u.hash = {
+		.init		= crypto4xx_hash_init,
+		.update		= crypto4xx_hash_update,
+		.final  	= crypto4xx_hash_final,
+		.digest 	= crypto4xx_hash_digest,
+		.halg.digestsize = SHA1_DIGEST_SIZE,
+		.halg.base	= {
+			.cra_name	= "sha1",
+			.cra_driver_name = "sha1-ppc4xx",
+			.cra_priority	= CRYPTO4XX_CRYPTO_PRIORITY,
+			.cra_flags	= CRYPTO_ALG_ASYNC,
+			.cra_blocksize	= SHA1_BLOCK_SIZE,
+			.cra_ctxsize	= sizeof(struct crypto4xx_ctx),
+			.cra_init	= crypto4xx_sha1_alg_init,
+			.cra_exit	= crypto4xx_alg_exit,
+			.cra_module	= THIS_MODULE,
 		}
-	},
+	}},
 };
 
 /**
diff --git a/drivers/crypto/amcc/crypto4xx_core.h b/drivers/crypto/amcc/crypto4xx_core.h
index 1ef103449364..da9cbe3b9fc3 100644
--- a/drivers/crypto/amcc/crypto4xx_core.h
+++ b/drivers/crypto/amcc/crypto4xx_core.h
@@ -22,6 +22,8 @@
 #ifndef __CRYPTO4XX_CORE_H__
 #define __CRYPTO4XX_CORE_H__
 
+#include <crypto/internal/hash.h>
+
 #define PPC460SX_SDR0_SRST                      0x201
 #define PPC405EX_SDR0_SRST                      0x200
 #define PPC460EX_SDR0_SRST                      0x201
@@ -138,14 +140,31 @@ struct crypto4xx_req_ctx {
 	u16 sa_len;
 };
 
+struct crypto4xx_alg_common {
+	u32 type;
+	union {
+		struct crypto_alg cipher;
+		struct ahash_alg hash;
+	} u;
+};
+
 struct crypto4xx_alg {
 	struct list_head  entry;
-	struct crypto_alg alg;
+	struct crypto4xx_alg_common alg;
 	struct crypto4xx_device *dev;
 };
 
-#define crypto_alg_to_crypto4xx_alg(x) \
-		container_of(x, struct crypto4xx_alg, alg)
+static inline struct crypto4xx_alg *crypto_alg_to_crypto4xx_alg(
+	struct crypto_alg *x)
+{
+	switch (x->cra_flags & CRYPTO_ALG_TYPE_MASK) {
+	case CRYPTO_ALG_TYPE_AHASH:
+		return container_of(__crypto_ahash_alg(x),
+				    struct crypto4xx_alg, alg.u.hash);
+	}
+
+	return container_of(x, struct crypto4xx_alg, alg.u.cipher);
+}
 
 extern int crypto4xx_alloc_sa(struct crypto4xx_ctx *ctx, u32 size);
 extern void crypto4xx_free_sa(struct crypto4xx_ctx *ctx);
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 9d30316e9f10..e3a82514d61e 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -103,6 +103,12 @@ static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 	return crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 }
 
+static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg)
+{
+	return container_of(__crypto_hash_alg_common(alg), struct ahash_alg,
+			    halg);
+}
+
 static inline struct old_ahash_alg *crypto_old_ahash_alg(
 	struct crypto_ahash *tfm)
 {
-- 
cgit v1.2.3


From 500b3e3c3dc8e4845b77ae81e5b7b085ab183ce6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 14 Jul 2009 20:29:57 +0800
Subject: crypto: ahash - Remove old_ahash_alg

Now that all ahash implementations have been converted to the new
ahash type, we can remove old_ahash_alg and its associated support.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 27 ---------------------------
 crypto/shash.c                 |  2 --
 include/crypto/hash.h          |  3 +--
 include/crypto/internal/hash.h |  6 ------
 include/linux/crypto.h         | 16 ----------------
 5 files changed, 1 insertion(+), 53 deletions(-)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index 7f599d26086a..cc824ef25830 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -175,46 +175,19 @@ static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key,
 	return -ENOSYS;
 }
 
-static int crypto_init_ahash_ops(struct crypto_tfm *tfm, u32 type, u32 mask)
-{
-	struct old_ahash_alg *alg = &tfm->__crt_alg->cra_ahash;
-	struct crypto_ahash *crt = __crypto_ahash_cast(tfm);
-	struct ahash_alg *nalg = crypto_ahash_alg(crt);
-
-	if (alg->digestsize > PAGE_SIZE / 8)
-		return -EINVAL;
-
-	crt->init = alg->init;
-	crt->update = alg->update;
-	crt->final  = alg->final;
-	crt->digest = alg->digest;
-	crt->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey;
-	crt->digestsize = alg->digestsize;
-
-	nalg->setkey = alg->setkey;
-	nalg->halg.digestsize = alg->digestsize;
-
-	return 0;
-}
-
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
-	struct old_ahash_alg *oalg = crypto_old_ahash_alg(hash);
 
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 		return crypto_init_shash_ops_async(tfm);
 
-	if (oalg->init)
-		return crypto_init_ahash_ops(tfm, 0, 0);
-
 	hash->init = alg->init;
 	hash->update = alg->update;
 	hash->final  = alg->final;
 	hash->digest = alg->digest;
 	hash->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey;
-	hash->digestsize = alg->halg.digestsize;
 
 	return 0;
 }
diff --git a/crypto/shash.c b/crypto/shash.c
index 615a5f4d9b7a..fd92c03b38fc 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -270,7 +270,6 @@ static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *calg = tfm->__crt_alg;
-	struct shash_alg *alg = __crypto_shash_alg(calg);
 	struct crypto_ahash *crt = __crypto_ahash_cast(tfm);
 	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
 	struct crypto_shash *shash;
@@ -293,7 +292,6 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 	crt->digest = shash_async_digest;
 	crt->setkey = shash_async_setkey;
 
-	crt->digestsize = alg->digestsize;
 	crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash);
 
 	return 0;
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 262861d8f0cb..45c2bddfdf32 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -89,7 +89,6 @@ struct crypto_ahash {
 	int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
 		      unsigned int keylen);
 
-	unsigned int digestsize;
 	unsigned int reqsize;
 	struct crypto_tfm base;
 };
@@ -137,7 +136,7 @@ static inline struct hash_alg_common *crypto_hash_alg_common(
 
 static inline unsigned int crypto_ahash_digestsize(struct crypto_ahash *tfm)
 {
-	return tfm->digestsize;
+	return crypto_hash_alg_common(tfm)->digestsize;
 }
 
 static inline unsigned int crypto_ahash_statesize(struct crypto_ahash *tfm)
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index e3a82514d61e..179dd8fdfa14 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -109,12 +109,6 @@ static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg)
 			    halg);
 }
 
-static inline struct old_ahash_alg *crypto_old_ahash_alg(
-	struct crypto_ahash *tfm)
-{
-	return &crypto_ahash_tfm(tfm)->__crt_alg->cra_ahash;
-}
-
 static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm,
 					    unsigned int reqsize)
 {
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 9e7e9b62a3dc..fd929889e8dc 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -115,12 +115,10 @@ struct crypto_async_request;
 struct crypto_aead;
 struct crypto_blkcipher;
 struct crypto_hash;
-struct crypto_ahash;
 struct crypto_rng;
 struct crypto_tfm;
 struct crypto_type;
 struct aead_givcrypt_request;
-struct ahash_request;
 struct skcipher_givcrypt_request;
 
 typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err);
@@ -211,18 +209,6 @@ struct ablkcipher_alg {
 	unsigned int ivsize;
 };
 
-struct old_ahash_alg {
-	int (*init)(struct ahash_request *req);
-	int (*reinit)(struct ahash_request *req);
-	int (*update)(struct ahash_request *req);
-	int (*final)(struct ahash_request *req);
-	int (*digest)(struct ahash_request *req);
-	int (*setkey)(struct crypto_ahash *tfm, const u8 *key,
-			unsigned int keylen);
-
-	unsigned int digestsize;
-};
-
 struct aead_alg {
 	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
 	              unsigned int keylen);
@@ -309,7 +295,6 @@ struct rng_alg {
 #define cra_cipher	cra_u.cipher
 #define cra_digest	cra_u.digest
 #define cra_hash	cra_u.hash
-#define cra_ahash	cra_u.ahash
 #define cra_compress	cra_u.compress
 #define cra_rng		cra_u.rng
 
@@ -337,7 +322,6 @@ struct crypto_alg {
 		struct cipher_alg cipher;
 		struct digest_alg digest;
 		struct hash_alg hash;
-		struct old_ahash_alg ahash;
 		struct compress_alg compress;
 		struct rng_alg rng;
 	} cra_u;
-- 
cgit v1.2.3


From 8660c1240ec6016522b882c88751cb4ce40bf0e8 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <klto@zhaw.ch>
Date: Mon, 13 Jul 2009 22:48:16 +0000
Subject: skbuff.h: Fix comment for NET_IP_ALIGN

Use the correct function call for skb_reserve in the comment for
NET_IP_ALIGN.

Signed-off-by: Tobias Klauser <klto@zhaw.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b47b3f039d14..f2c69a2cca17 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1342,12 +1342,12 @@ static inline int skb_network_offset(const struct sk_buff *skb)
  * shifting the start of the packet by 2 bytes. Drivers should do this
  * with:
  *
- * skb_reserve(NET_IP_ALIGN);
+ * skb_reserve(skb, NET_IP_ALIGN);
  *
  * The downside to this alignment of the IP header is that the DMA is now
  * unaligned. On some architectures the cost of an unaligned DMA is high
  * and this cost outweighs the gains made by aligning the IP header.
- * 
+ *
  * Since this trade off varies between architectures, we allow NET_IP_ALIGN
  * to be overridden.
  */
-- 
cgit v1.2.3


From 4bd9b0f4afc76cf972578c702e1bc1b6f2d10ba5 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Wed, 24 Jun 2009 15:37:45 -0400
Subject: nfsd41: use globals for DRC limits

The version 4.1 DRC memory limit and tracking variables are server wide and
session specific. Replace struct svc_serv fields with globals.
Stop using the svc_serv sv_lock.

Add a spinlock to serialize access to the DRC limit management variables which
change on session creation and deletion (usage counter) or (future)
administrative action to adjust the total DRC memory limit.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
---
 fs/nfsd/nfs4state.c        | 10 +++++-----
 fs/nfsd/nfssvc.c           | 19 +++++++++++++++----
 include/linux/nfsd/nfsd.h  |  3 +++
 include/linux/sunrpc/svc.h |  2 --
 4 files changed, 23 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c8..2e6a44e3d2fe 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -430,11 +430,11 @@ static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	spin_lock(&nfsd_serv->sv_lock);
-	if (np + nfsd_serv->sv_drc_pages_used > nfsd_serv->sv_drc_max_pages)
-		np = nfsd_serv->sv_drc_max_pages - nfsd_serv->sv_drc_pages_used;
-	nfsd_serv->sv_drc_pages_used += np;
-	spin_unlock(&nfsd_serv->sv_lock);
+	spin_lock(&nfsd_drc_lock);
+	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
+		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
+	nfsd_drc_pages_used += np;
+	spin_unlock(&nfsd_drc_lock);
 
 	if (np <= 0) {
 		status = nfserr_resource;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index d4c9884cd54b..78d8fcd883fb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -67,6 +67,16 @@ struct timeval			nfssvc_boot;
 DEFINE_MUTEX(nfsd_mutex);
 struct svc_serv 		*nfsd_serv;
 
+/*
+ * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
+ * nfsd_drc_max_pages limits the total amount of memory available for
+ * version 4.1 DRC caches.
+ * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
+ */
+spinlock_t	nfsd_drc_lock;
+unsigned int	nfsd_drc_max_pages;
+unsigned int	nfsd_drc_pages_used;
+
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
 static struct svc_version *	nfsd_acl_version[] = {
@@ -238,11 +248,12 @@ static void set_max_drc(void)
 {
 	/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */
 	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages()
+	nfsd_drc_max_pages = nr_free_buffer_pages()
 						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_serv->sv_drc_pages_used = 0;
-	dprintk("%s svc_drc_max_pages %u\n", __func__,
-		nfsd_serv->sv_drc_max_pages);
+	nfsd_drc_pages_used = 0;
+	spin_lock_init(&nfsd_drc_lock);
+	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
+		nfsd_drc_max_pages);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2b49d676d0c9..2571f856908f 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -56,6 +56,9 @@ extern struct svc_version	nfsd_version2, nfsd_version3,
 extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
+extern spinlock_t		nfsd_drc_lock;
+extern unsigned int		nfsd_drc_max_pages;
+extern unsigned int		nfsd_drc_pages_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index ea8009695c69..52e8cb0a7569 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -94,8 +94,6 @@ struct svc_serv {
 	struct module *		sv_module;	/* optional module to count when
 						 * adding threads */
 	svc_thread_fn		sv_function;	/* main function for threads */
-	unsigned int		sv_drc_max_pages; /* Total pages for DRC */
-	unsigned int		sv_drc_pages_used;/* DRC pages used */
 #if defined(CONFIG_NFS_V4_1)
 	struct list_head	sv_cb_list;	/* queue for callback requests
 						 * that arrive over the same
-- 
cgit v1.2.3


From d0cb43b35d64877b2944bd37719708be5d7bbf99 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 9 Jul 2009 09:27:50 +0900
Subject: libata: implement and use HORKAGE_NOSETXFER, take#2

PIONEER DVD-RW DVRTD08 times out SETXFER if no media is present.  The
device is SATA and simply skipping SETXFER works around the problem.
Implement ATA_HORKAGE_NOSETXFER and apply it to the device.

Reported by Moritz Rigler in the following thread.

  http://thread.gmane.org/gmane.linux.ide/36790

and by Lars in bko#9540.

Updated to whine and ignore NOSETXFER if PATA component is detected as
suggested by Alan Cox.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Moritz Rigler <linux-ide@momail.e4ward.com>
Reported-by: Lars <lars21ce@gmx.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 20 ++++++++++++++++++--
 include/linux/libata.h    |  1 +
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 045a486a09ea..2c6aedaef718 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3392,17 +3392,27 @@ int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel)
 
 static int ata_dev_set_mode(struct ata_device *dev)
 {
+	struct ata_port *ap = dev->link->ap;
 	struct ata_eh_context *ehc = &dev->link->eh_context;
+	const bool nosetxfer = dev->horkage & ATA_HORKAGE_NOSETXFER;
 	const char *dev_err_whine = "";
 	int ign_dev_err = 0;
-	unsigned int err_mask;
+	unsigned int err_mask = 0;
 	int rc;
 
 	dev->flags &= ~ATA_DFLAG_PIO;
 	if (dev->xfer_shift == ATA_SHIFT_PIO)
 		dev->flags |= ATA_DFLAG_PIO;
 
-	err_mask = ata_dev_set_xfermode(dev);
+	if (nosetxfer && ap->flags & ATA_FLAG_SATA && ata_id_is_sata(dev->id))
+		dev_err_whine = " (SET_XFERMODE skipped)";
+	else {
+		if (nosetxfer)
+			ata_dev_printk(dev, KERN_WARNING,
+				       "NOSETXFER but PATA detected - can't "
+				       "skip SETXFER, might malfunction\n");
+		err_mask = ata_dev_set_xfermode(dev);
+	}
 
 	if (err_mask & ~AC_ERR_DEV)
 		goto fail;
@@ -4297,6 +4307,12 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	/* Devices which aren't very happy with higher link speeds */
 	{ "WD My Book",			NULL,	ATA_HORKAGE_1_5_GBPS, },
 
+	/*
+	 * Devices which choke on SETXFER.  Applies only if both the
+	 * device and controller are SATA.
+	 */
+	{ "PIONEER DVD-RW  DVRTD08",	"1.00",	ATA_HORKAGE_NOSETXFER },
+
 	/* End Marker */
 	{ }
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3d501db36a26..79b6d7fd4ac2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -385,6 +385,7 @@ enum {
 						    not multiple of 16 bytes */
 	ATA_HORKAGE_FIRMWARE_WARN = (1 << 12),	/* firmware update warning */
 	ATA_HORKAGE_1_5_GBPS	= (1 << 13),	/* force 1.5 Gbps */
+	ATA_HORKAGE_NOSETXFER	= (1 << 14),	/* skip SETXFER, SATA only */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
-- 
cgit v1.2.3


From 66f6ce5e52f2f209d5bf1f06167cec888f4f4c13 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 15 Jul 2009 12:40:40 +0800
Subject: crypto: ahash - Add unaligned handling and default operations

This patch exports the finup operation where available and adds
a default finup operation for ahash.  The operations final, finup
and digest also will now deal with unaligned result pointers by
copying it.  Finally export/import operations are will now be
exported too.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ahash.c                 | 204 ++++++++++++++++++++++++++++++++++++++++-
 crypto/shash.c                 |  52 ++++++++++-
 include/crypto/hash.h          |  23 ++---
 include/crypto/internal/hash.h |   6 ++
 4 files changed, 263 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/crypto/ahash.c b/crypto/ahash.c
index a196055b73d3..ac0798d2824e 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -24,6 +24,13 @@
 
 #include "internal.h"
 
+struct ahash_request_priv {
+	crypto_completion_t complete;
+	void *data;
+	u8 *result;
+	void *ubuf[] CRYPTO_MINALIGN_ATTR;
+};
+
 static inline struct ahash_alg *crypto_ahash_alg(struct crypto_ahash *hash)
 {
 	return container_of(crypto_hash_alg_common(hash), struct ahash_alg,
@@ -156,7 +163,7 @@ static int ahash_setkey_unaligned(struct crypto_ahash *tfm, const u8 *key,
 	return ret;
 }
 
-static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
+int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen)
 {
 	struct ahash_alg *ahash = crypto_ahash_alg(tfm);
@@ -167,6 +174,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 	return ahash->setkey(tfm, key, keylen);
 }
+EXPORT_SYMBOL_GPL(crypto_ahash_setkey);
 
 static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key,
 			  unsigned int keylen)
@@ -174,19 +182,209 @@ static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key,
 	return -ENOSYS;
 }
 
+static inline unsigned int ahash_align_buffer_size(unsigned len,
+						   unsigned long mask)
+{
+	return len + (mask & ~(crypto_tfm_ctx_alignment() - 1));
+}
+
+static void ahash_op_unaligned_finish(struct ahash_request *req, int err)
+{
+	struct ahash_request_priv *priv = req->priv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (!err)
+		memcpy(priv->result, req->result,
+		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+
+	kzfree(priv);
+}
+
+static void ahash_op_unaligned_done(struct crypto_async_request *req, int err)
+{
+	struct ahash_request *areq = req->data;
+	struct ahash_request_priv *priv = areq->priv;
+	crypto_completion_t complete = priv->complete;
+	void *data = priv->data;
+
+	ahash_op_unaligned_finish(areq, err);
+
+	complete(data, err);
+}
+
+static int ahash_op_unaligned(struct ahash_request *req,
+			      int (*op)(struct ahash_request *))
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+	unsigned int ds = crypto_ahash_digestsize(tfm);
+	struct ahash_request_priv *priv;
+	int err;
+
+	priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
+		       (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_ATOMIC : GFP_ATOMIC);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->result = req->result;
+	priv->complete = req->base.complete;
+	priv->data = req->base.data;
+
+	req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
+	req->base.complete = ahash_op_unaligned_done;
+	req->base.data = req;
+	req->priv = priv;
+
+	err = op(req);
+	ahash_op_unaligned_finish(req, err);
+
+	return err;
+}
+
+static int crypto_ahash_op(struct ahash_request *req,
+			   int (*op)(struct ahash_request *))
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+
+	if ((unsigned long)req->result & alignmask)
+		return ahash_op_unaligned(req, op);
+
+	return op(req);
+}
+
+int crypto_ahash_final(struct ahash_request *req)
+{
+	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_final);
+
+int crypto_ahash_finup(struct ahash_request *req)
+{
+	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_finup);
+
+int crypto_ahash_digest(struct ahash_request *req)
+{
+	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->digest);
+}
+EXPORT_SYMBOL_GPL(crypto_ahash_digest);
+
+static void ahash_def_finup_finish2(struct ahash_request *req, int err)
+{
+	struct ahash_request_priv *priv = req->priv;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	if (!err)
+		memcpy(priv->result, req->result,
+		       crypto_ahash_digestsize(crypto_ahash_reqtfm(req)));
+
+	kzfree(priv);
+}
+
+static void ahash_def_finup_done2(struct crypto_async_request *req, int err)
+{
+	struct ahash_request *areq = req->data;
+	struct ahash_request_priv *priv = areq->priv;
+	crypto_completion_t complete = priv->complete;
+	void *data = priv->data;
+
+	ahash_def_finup_finish2(areq, err);
+
+	complete(data, err);
+}
+
+static int ahash_def_finup_finish1(struct ahash_request *req, int err)
+{
+	if (err)
+		goto out;
+
+	req->base.complete = ahash_def_finup_done2;
+	req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	err = crypto_ahash_reqtfm(req)->final(req);
+
+out:
+	ahash_def_finup_finish2(req, err);
+	return err;
+}
+
+static void ahash_def_finup_done1(struct crypto_async_request *req, int err)
+{
+	struct ahash_request *areq = req->data;
+	struct ahash_request_priv *priv = areq->priv;
+	crypto_completion_t complete = priv->complete;
+	void *data = priv->data;
+
+	err = ahash_def_finup_finish1(areq, err);
+
+	complete(data, err);
+}
+
+static int ahash_def_finup(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	unsigned long alignmask = crypto_ahash_alignmask(tfm);
+	unsigned int ds = crypto_ahash_digestsize(tfm);
+	struct ahash_request_priv *priv;
+
+	priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask),
+		       (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+		       GFP_ATOMIC : GFP_ATOMIC);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->result = req->result;
+	priv->complete = req->base.complete;
+	priv->data = req->base.data;
+
+	req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1);
+	req->base.complete = ahash_def_finup_done1;
+	req->base.data = req;
+	req->priv = priv;
+
+	return ahash_def_finup_finish1(req, tfm->update(req));
+}
+
+static int ahash_no_export(struct ahash_request *req, void *out)
+{
+	return -ENOSYS;
+}
+
+static int ahash_no_import(struct ahash_request *req, const void *in)
+{
+	return -ENOSYS;
+}
+
 static int crypto_ahash_init_tfm(struct crypto_tfm *tfm)
 {
 	struct crypto_ahash *hash = __crypto_ahash_cast(tfm);
 	struct ahash_alg *alg = crypto_ahash_alg(hash);
 
+	hash->setkey = ahash_nosetkey;
+	hash->export = ahash_no_export;
+	hash->import = ahash_no_import;
+
 	if (tfm->__crt_alg->cra_type != &crypto_ahash_type)
 		return crypto_init_shash_ops_async(tfm);
 
 	hash->init = alg->init;
 	hash->update = alg->update;
-	hash->final  = alg->final;
+	hash->final = alg->final;
+	hash->finup = alg->finup ?: ahash_def_finup;
 	hash->digest = alg->digest;
-	hash->setkey = alg->setkey ? ahash_setkey : ahash_nosetkey;
+
+	if (alg->setkey)
+		hash->setkey = alg->setkey;
+	if (alg->export)
+		hash->export = alg->export;
+	if (alg->import)
+		hash->import = alg->import;
 
 	return 0;
 }
diff --git a/crypto/shash.c b/crypto/shash.c
index 171c8f052f89..834d9d24cdae 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -235,6 +235,33 @@ static int shash_async_final(struct ahash_request *req)
 	return crypto_shash_final(ahash_request_ctx(req), req->result);
 }
 
+int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc)
+{
+	struct crypto_hash_walk walk;
+	int nbytes;
+
+	for (nbytes = crypto_hash_walk_first(req, &walk); nbytes > 0;
+	     nbytes = crypto_hash_walk_done(&walk, nbytes))
+		nbytes = crypto_hash_walk_last(&walk) ?
+			 crypto_shash_finup(desc, walk.data, nbytes,
+					    req->result) :
+			 crypto_shash_update(desc, walk.data, nbytes);
+
+	return nbytes;
+}
+EXPORT_SYMBOL_GPL(shash_ahash_finup);
+
+static int shash_async_finup(struct ahash_request *req)
+{
+	struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
+	struct shash_desc *desc = ahash_request_ctx(req);
+
+	desc->tfm = *ctx;
+	desc->flags = req->base.flags;
+
+	return shash_ahash_finup(req, desc);
+}
+
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc)
 {
 	struct scatterlist *sg = req->src;
@@ -252,8 +279,7 @@ int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc)
 		crypto_yield(desc->flags);
 	} else
 		err = crypto_shash_init(desc) ?:
-		      shash_ahash_update(req, desc) ?:
-		      crypto_shash_final(desc, req->result);
+		      shash_ahash_finup(req, desc);
 
 	return err;
 }
@@ -270,6 +296,16 @@ static int shash_async_digest(struct ahash_request *req)
 	return shash_ahash_digest(req, desc);
 }
 
+static int shash_async_export(struct ahash_request *req, void *out)
+{
+	return crypto_shash_export(ahash_request_ctx(req), out);
+}
+
+static int shash_async_import(struct ahash_request *req, const void *in)
+{
+	return crypto_shash_import(ahash_request_ctx(req), in);
+}
+
 static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
 {
 	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
@@ -280,6 +316,7 @@ static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm)
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 {
 	struct crypto_alg *calg = tfm->__crt_alg;
+	struct shash_alg *alg = __crypto_shash_alg(calg);
 	struct crypto_ahash *crt = __crypto_ahash_cast(tfm);
 	struct crypto_shash **ctx = crypto_tfm_ctx(tfm);
 	struct crypto_shash *shash;
@@ -298,9 +335,16 @@ int crypto_init_shash_ops_async(struct crypto_tfm *tfm)
 
 	crt->init = shash_async_init;
 	crt->update = shash_async_update;
-	crt->final  = shash_async_final;
+	crt->final = shash_async_final;
+	crt->finup = shash_async_finup;
 	crt->digest = shash_async_digest;
-	crt->setkey = shash_async_setkey;
+
+	if (alg->setkey)
+		crt->setkey = shash_async_setkey;
+	if (alg->export)
+		crt->export = shash_async_export;
+	if (alg->setkey)
+		crt->import = shash_async_import;
 
 	crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash);
 
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 45c2bddfdf32..3e89ce16b59c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -31,6 +31,9 @@ struct ahash_request {
 	struct scatterlist *src;
 	u8 *result;
 
+	/* This field may only be used by the ahash API code. */
+	void *priv;
+
 	void *__ctx[] CRYPTO_MINALIGN_ATTR;
 };
 
@@ -175,16 +178,11 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 	return req->__ctx;
 }
 
-static inline int crypto_ahash_setkey(struct crypto_ahash *tfm,
-				      const u8 *key, unsigned int keylen)
-{
-	return tfm->setkey(tfm, key, keylen);
-}
-
-static inline int crypto_ahash_digest(struct ahash_request *req)
-{
-	return crypto_ahash_reqtfm(req)->digest(req);
-}
+int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
+			unsigned int keylen);
+int crypto_ahash_finup(struct ahash_request *req);
+int crypto_ahash_final(struct ahash_request *req);
+int crypto_ahash_digest(struct ahash_request *req);
 
 static inline int crypto_ahash_export(struct ahash_request *req, void *out)
 {
@@ -206,11 +204,6 @@ static inline int crypto_ahash_update(struct ahash_request *req)
 	return crypto_ahash_reqtfm(req)->update(req);
 }
 
-static inline int crypto_ahash_final(struct ahash_request *req)
-{
-	return crypto_ahash_reqtfm(req)->final(req);
-}
-
 static inline void ahash_request_set_tfm(struct ahash_request *req,
 					 struct crypto_ahash *tfm)
 {
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 179dd8fdfa14..5bfad8c80595 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -59,6 +59,11 @@ int crypto_hash_walk_first_compat(struct hash_desc *hdesc,
 				  struct crypto_hash_walk *walk,
 				  struct scatterlist *sg, unsigned int len);
 
+static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk)
+{
+	return !(walk->entrylen | walk->total);
+}
+
 int crypto_register_ahash(struct ahash_alg *alg);
 int crypto_unregister_ahash(struct ahash_alg *alg);
 int ahash_register_instance(struct crypto_template *tmpl,
@@ -94,6 +99,7 @@ static inline void crypto_drop_shash(struct crypto_shash_spawn *spawn)
 struct shash_alg *shash_attr_alg(struct rtattr *rta, u32 type, u32 mask);
 
 int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc);
+int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc);
 int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc);
 
 int crypto_init_shash_ops_async(struct crypto_tfm *tfm);
-- 
cgit v1.2.3


From 069a9dce384e211784ce6fdfaf1f13921327480d Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Wed, 1 Jul 2009 13:03:52 -0400
Subject: drm/radeon: add some missing pci ids

Also, fix ordering for a couple others

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 include/drm/drm_pciids.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 45c18672b093..7174818c2c13 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -43,6 +43,7 @@
 	{0x1002, 0x4A4F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4A50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4A54, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x4B48, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4B49, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4B4A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x4B4B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R420|RADEON_NEW_MEMMAP}, \
@@ -262,6 +263,7 @@
 	{0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9443, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9444, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9446, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x944A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -346,12 +348,12 @@
 	{0x1002, 0x9599, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV635|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x959B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV635|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x95C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x95C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95C5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95C6, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95C7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95C9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x95C2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-	{0x1002, 0x95C4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95CC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95CD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x95CE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV620|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3


From d782c3f95c9263dc0b98e7115f75f1e18b9600b3 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 22 Jun 2009 13:17:08 +0800
Subject: drm/mode: add the CVT algorithm in kernel space

Add the CVT algorithm in kernel space. And this function can be called to
generate the required modeline.

I copied it from the file of xserver/hw/xfree86/modes/xf86cvt.c. What I have
done is to translate it by using integer calculation. This is to avoid
the float-point calculation in kernel space.

[airlied:- cleaned up some bits]
Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_modes.c | 219 ++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h      |   3 +
 2 files changed, 222 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 54f492a488a9..0dbc7e4f8643 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -8,6 +8,7 @@
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
  *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright 2005-2006 Luc Verhaegen
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -61,6 +62,224 @@ void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 }
 EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 
+/**
+ * drm_cvt_mode -create a modeline based on CVT algorithm
+ * @dev: DRM device
+ * @hdisplay: hdisplay size
+ * @vdisplay: vdisplay size
+ * @vrefresh  : vrefresh rate
+ * @reduced : Whether the GTF calculation is simplified
+ * @interlaced:Whether the interlace is supported
+ *
+ * LOCKING:
+ * none.
+ *
+ * return the modeline based on CVT algorithm
+ *
+ * This function is called to generate the modeline based on CVT algorithm
+ * according to the hdisplay, vdisplay, vrefresh.
+ * It is based from the VESA(TM) Coordinated Video Timing Generator by
+ * Graham Loveridge April 9, 2003 available at
+ * http://www.vesa.org/public/CVT/CVTd6r1.xls
+ *
+ * And it is copied from xf86CVTmode in xserver/hw/xfree86/modes/xf86cvt.c.
+ * What I have done is to translate it by using integer calculation.
+ */
+#define HV_FACTOR			1000
+struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
+				      int vdisplay, int vrefresh,
+				      bool reduced, bool interlaced)
+{
+	/* 1) top/bottom margin size (% of height) - default: 1.8, */
+#define	CVT_MARGIN_PERCENTAGE		18
+	/* 2) character cell horizontal granularity (pixels) - default 8 */
+#define	CVT_H_GRANULARITY		8
+	/* 3) Minimum vertical porch (lines) - default 3 */
+#define	CVT_MIN_V_PORCH			3
+	/* 4) Minimum number of vertical back porch lines - default 6 */
+#define	CVT_MIN_V_BPORCH		6
+	/* Pixel Clock step (kHz) */
+#define CVT_CLOCK_STEP			250
+	struct drm_display_mode *drm_mode;
+	bool margins = false;
+	unsigned int vfieldrate, hperiod;
+	int hdisplay_rnd, hmargin, vdisplay_rnd, vmargin, vsync;
+	int interlace;
+
+	/* allocate the drm_display_mode structure. If failure, we will
+	 * return directly
+	 */
+	drm_mode = drm_mode_create(dev);
+	if (!drm_mode)
+		return NULL;
+
+	/* the CVT default refresh rate is 60Hz */
+	if (!vrefresh)
+		vrefresh = 60;
+
+	/* the required field fresh rate */
+	if (interlaced)
+		vfieldrate = vrefresh * 2;
+	else
+		vfieldrate = vrefresh;
+
+	/* horizontal pixels */
+	hdisplay_rnd = hdisplay - (hdisplay % CVT_H_GRANULARITY);
+
+	/* determine the left&right borders */
+	hmargin = 0;
+	if (margins) {
+		hmargin = hdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000;
+		hmargin -= hmargin % CVT_H_GRANULARITY;
+	}
+	/* find the total active pixels */
+	drm_mode->hdisplay = hdisplay_rnd + 2 * hmargin;
+
+	/* find the number of lines per field */
+	if (interlaced)
+		vdisplay_rnd = vdisplay / 2;
+	else
+		vdisplay_rnd = vdisplay;
+
+	/* find the top & bottom borders */
+	vmargin = 0;
+	if (margins)
+		vmargin = vdisplay_rnd * CVT_MARGIN_PERCENTAGE / 1000;
+
+	drm_mode->vdisplay = vdisplay_rnd + 2 * vmargin;
+
+	/* Interlaced */
+	if (interlaced)
+		interlace = 1;
+	else
+		interlace = 0;
+
+	/* Determine VSync Width from aspect ratio */
+	if (!(vdisplay % 3) && ((vdisplay * 4 / 3) == hdisplay))
+		vsync = 4;
+	else if (!(vdisplay % 9) && ((vdisplay * 16 / 9) == hdisplay))
+		vsync = 5;
+	else if (!(vdisplay % 10) && ((vdisplay * 16 / 10) == hdisplay))
+		vsync = 6;
+	else if (!(vdisplay % 4) && ((vdisplay * 5 / 4) == hdisplay))
+		vsync = 7;
+	else if (!(vdisplay % 9) && ((vdisplay * 15 / 9) == hdisplay))
+		vsync = 7;
+	else /* custom */
+		vsync = 10;
+
+	if (!reduced) {
+		/* simplify the GTF calculation */
+		/* 4) Minimum time of vertical sync + back porch interval (µs)
+		 * default 550.0
+		 */
+		int tmp1, tmp2;
+#define CVT_MIN_VSYNC_BP	550
+		/* 3) Nominal HSync width (% of line period) - default 8 */
+#define CVT_HSYNC_PERCENTAGE	8
+		unsigned int hblank_percentage;
+		int vsyncandback_porch, vback_porch, hblank;
+
+		/* estimated the horizontal period */
+		tmp1 = HV_FACTOR * 1000000  -
+				CVT_MIN_VSYNC_BP * HV_FACTOR * vfieldrate;
+		tmp2 = (vdisplay_rnd + 2 * vmargin + CVT_MIN_V_PORCH) * 2 +
+				interlace;
+		hperiod = tmp1 * 2 / (tmp2 * vfieldrate);
+
+		tmp1 = CVT_MIN_VSYNC_BP * HV_FACTOR / hperiod + 1;
+		/* 9. Find number of lines in sync + backporch */
+		if (tmp1 < (vsync + CVT_MIN_V_PORCH))
+			vsyncandback_porch = vsync + CVT_MIN_V_PORCH;
+		else
+			vsyncandback_porch = tmp1;
+		/* 10. Find number of lines in back porch */
+		vback_porch = vsyncandback_porch - vsync;
+		drm_mode->vtotal = vdisplay_rnd + 2 * vmargin +
+				vsyncandback_porch + CVT_MIN_V_PORCH;
+		/* 5) Definition of Horizontal blanking time limitation */
+		/* Gradient (%/kHz) - default 600 */
+#define CVT_M_FACTOR	600
+		/* Offset (%) - default 40 */
+#define CVT_C_FACTOR	40
+		/* Blanking time scaling factor - default 128 */
+#define CVT_K_FACTOR	128
+		/* Scaling factor weighting - default 20 */
+#define CVT_J_FACTOR	20
+#define CVT_M_PRIME	(CVT_M_FACTOR * CVT_K_FACTOR / 256)
+#define CVT_C_PRIME	((CVT_C_FACTOR - CVT_J_FACTOR) * CVT_K_FACTOR / 256 + \
+			 CVT_J_FACTOR)
+		/* 12. Find ideal blanking duty cycle from formula */
+		hblank_percentage = CVT_C_PRIME * HV_FACTOR - CVT_M_PRIME *
+					hperiod / 1000;
+		/* 13. Blanking time */
+		if (hblank_percentage < 20 * HV_FACTOR)
+			hblank_percentage = 20 * HV_FACTOR;
+		hblank = drm_mode->hdisplay * hblank_percentage /
+			 (100 * HV_FACTOR - hblank_percentage);
+		hblank -= hblank % (2 * CVT_H_GRANULARITY);
+		/* 14. find the total pixes per line */
+		drm_mode->htotal = drm_mode->hdisplay + hblank;
+		drm_mode->hsync_end = drm_mode->hdisplay + hblank / 2;
+		drm_mode->hsync_start = drm_mode->hsync_end -
+			(drm_mode->htotal * CVT_HSYNC_PERCENTAGE) / 100;
+		drm_mode->hsync_start += CVT_H_GRANULARITY -
+			drm_mode->hsync_start % CVT_H_GRANULARITY;
+		/* fill the Vsync values */
+		drm_mode->vsync_start = drm_mode->vdisplay + CVT_MIN_V_PORCH;
+		drm_mode->vsync_end = drm_mode->vsync_start + vsync;
+	} else {
+		/* Reduced blanking */
+		/* Minimum vertical blanking interval time (µs)- default 460 */
+#define CVT_RB_MIN_VBLANK	460
+		/* Fixed number of clocks for horizontal sync */
+#define CVT_RB_H_SYNC		32
+		/* Fixed number of clocks for horizontal blanking */
+#define CVT_RB_H_BLANK		160
+		/* Fixed number of lines for vertical front porch - default 3*/
+#define CVT_RB_VFPORCH		3
+		int vbilines;
+		int tmp1, tmp2;
+		/* 8. Estimate Horizontal period. */
+		tmp1 = HV_FACTOR * 1000000 -
+			CVT_RB_MIN_VBLANK * HV_FACTOR * vfieldrate;
+		tmp2 = vdisplay_rnd + 2 * vmargin;
+		hperiod = tmp1 / (tmp2 * vfieldrate);
+		/* 9. Find number of lines in vertical blanking */
+		vbilines = CVT_RB_MIN_VBLANK * HV_FACTOR / hperiod + 1;
+		/* 10. Check if vertical blanking is sufficient */
+		if (vbilines < (CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH))
+			vbilines = CVT_RB_VFPORCH + vsync + CVT_MIN_V_BPORCH;
+		/* 11. Find total number of lines in vertical field */
+		drm_mode->vtotal = vdisplay_rnd + 2 * vmargin + vbilines;
+		/* 12. Find total number of pixels in a line */
+		drm_mode->htotal = drm_mode->hdisplay + CVT_RB_H_BLANK;
+		/* Fill in HSync values */
+		drm_mode->hsync_end = drm_mode->hdisplay + CVT_RB_H_BLANK / 2;
+		drm_mode->hsync_start = drm_mode->hsync_end = CVT_RB_H_SYNC;
+	}
+	/* 15/13. Find pixel clock frequency (kHz for xf86) */
+	drm_mode->clock = drm_mode->htotal * HV_FACTOR * 1000 / hperiod;
+	drm_mode->clock -= drm_mode->clock % CVT_CLOCK_STEP;
+	/* 18/16. Find actual vertical frame frequency */
+	/* ignore - just set the mode flag for interlaced */
+	if (interlaced)
+		drm_mode->vtotal *= 2;
+	/* Fill the mode line name */
+	drm_mode_set_name(drm_mode);
+	if (reduced)
+		drm_mode->flags |= (DRM_MODE_FLAG_PHSYNC |
+					DRM_MODE_FLAG_NVSYNC);
+	else
+		drm_mode->flags |= (DRM_MODE_FLAG_PVSYNC |
+					DRM_MODE_FLAG_NHSYNC);
+	if (interlaced)
+		drm_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+
+    return drm_mode;
+}
+EXPORT_SYMBOL(drm_cvt_mode);
+
 /**
  * drm_mode_set_name - set the name on a mode
  * @mode: name will be set in this mode
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 7300fb866767..820bc0977e5e 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -736,4 +736,7 @@ extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
+extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
+				int hdisplay, int vdisplay, int vrefresh,
+				bool reduced, bool interlaced);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From 26bbdadad356ec02d33657858d91675f3e9aca94 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 22 Jun 2009 13:17:09 +0800
Subject: drm/mode: add the GTF algorithm in kernel space

Add the GTF algorithm in kernel space. And this function can be called to
generate the required modeline.

I copied it from the file of xserver/hw/xfree86/modes/xf86gtf.c. What I have
done is to translate it by using integer calculation. This is to avoid
the float-point calculation in kernel space.
At the same tie I also refer to the function of fb_get_mode in
drivers/video/fbmon.c

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_modes.c | 197 ++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h      |   3 +
 2 files changed, 200 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 0dbc7e4f8643..fd489d76fbbc 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -9,6 +9,7 @@
  * Copyright © 2007-2008 Intel Corporation
  *   Jesse Barnes <jesse.barnes@intel.com>
  * Copyright 2005-2006 Luc Verhaegen
+ * Copyright (c) 2001, Andy Ritger  aritger@nvidia.com
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -280,6 +281,202 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 }
 EXPORT_SYMBOL(drm_cvt_mode);
 
+/**
+ * drm_gtf_mode - create the modeline based on GTF algorithm
+ *
+ * @dev		:drm device
+ * @hdisplay	:hdisplay size
+ * @vdisplay	:vdisplay size
+ * @vrefresh	:vrefresh rate.
+ * @interlaced	:whether the interlace is supported
+ * @margins	:whether the margin is supported
+ *
+ * LOCKING.
+ * none.
+ *
+ * return the modeline based on GTF algorithm
+ *
+ * This function is to create the modeline based on the GTF algorithm.
+ * Generalized Timing Formula is derived from:
+ *	GTF Spreadsheet by Andy Morrish (1/5/97)
+ *	available at http://www.vesa.org
+ *
+ * And it is copied from the file of xserver/hw/xfree86/modes/xf86gtf.c.
+ * What I have done is to translate it by using integer calculation.
+ * I also refer to the function of fb_get_mode in the file of
+ * drivers/video/fbmon.c
+ */
+struct drm_display_mode *drm_gtf_mode(struct drm_device *dev, int hdisplay,
+				      int vdisplay, int vrefresh,
+				      bool interlaced, int margins)
+{
+	/* 1) top/bottom margin size (% of height) - default: 1.8, */
+#define	GTF_MARGIN_PERCENTAGE		18
+	/* 2) character cell horizontal granularity (pixels) - default 8 */
+#define	GTF_CELL_GRAN			8
+	/* 3) Minimum vertical porch (lines) - default 3 */
+#define	GTF_MIN_V_PORCH			1
+	/* width of vsync in lines */
+#define V_SYNC_RQD			3
+	/* width of hsync as % of total line */
+#define H_SYNC_PERCENT			8
+	/* min time of vsync + back porch (microsec) */
+#define MIN_VSYNC_PLUS_BP		550
+	/* blanking formula gradient */
+#define GTF_M				600
+	/* blanking formula offset */
+#define GTF_C				40
+	/* blanking formula scaling factor */
+#define GTF_K				128
+	/* blanking formula scaling factor */
+#define GTF_J				20
+	/* C' and M' are part of the Blanking Duty Cycle computation */
+#define GTF_C_PRIME		(((GTF_C - GTF_J) * GTF_K / 256) + GTF_J)
+#define GTF_M_PRIME		(GTF_K * GTF_M / 256)
+	struct drm_display_mode *drm_mode;
+	unsigned int hdisplay_rnd, vdisplay_rnd, vfieldrate_rqd;
+	int top_margin, bottom_margin;
+	int interlace;
+	unsigned int hfreq_est;
+	int vsync_plus_bp, vback_porch;
+	unsigned int vtotal_lines, vfieldrate_est, hperiod;
+	unsigned int vfield_rate, vframe_rate;
+	int left_margin, right_margin;
+	unsigned int total_active_pixels, ideal_duty_cycle;
+	unsigned int hblank, total_pixels, pixel_freq;
+	int hsync, hfront_porch, vodd_front_porch_lines;
+	unsigned int tmp1, tmp2;
+
+	drm_mode = drm_mode_create(dev);
+	if (!drm_mode)
+		return NULL;
+
+	/* 1. In order to give correct results, the number of horizontal
+	 * pixels requested is first processed to ensure that it is divisible
+	 * by the character size, by rounding it to the nearest character
+	 * cell boundary:
+	 */
+	hdisplay_rnd = (hdisplay + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN;
+	hdisplay_rnd = hdisplay_rnd * GTF_CELL_GRAN;
+
+	/* 2. If interlace is requested, the number of vertical lines assumed
+	 * by the calculation must be halved, as the computation calculates
+	 * the number of vertical lines per field.
+	 */
+	if (interlaced)
+		vdisplay_rnd = vdisplay / 2;
+	else
+		vdisplay_rnd = vdisplay;
+
+	/* 3. Find the frame rate required: */
+	if (interlaced)
+		vfieldrate_rqd = vrefresh * 2;
+	else
+		vfieldrate_rqd = vrefresh;
+
+	/* 4. Find number of lines in Top margin: */
+	top_margin = 0;
+	if (margins)
+		top_margin = (vdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) /
+				1000;
+	/* 5. Find number of lines in bottom margin: */
+	bottom_margin = top_margin;
+
+	/* 6. If interlace is required, then set variable interlace: */
+	if (interlaced)
+		interlace = 1;
+	else
+		interlace = 0;
+
+	/* 7. Estimate the Horizontal frequency */
+	{
+		tmp1 = (1000000  - MIN_VSYNC_PLUS_BP * vfieldrate_rqd) / 500;
+		tmp2 = (vdisplay_rnd + 2 * top_margin + GTF_MIN_V_PORCH) *
+				2 + interlace;
+		hfreq_est = (tmp2 * 1000 * vfieldrate_rqd) / tmp1;
+	}
+
+	/* 8. Find the number of lines in V sync + back porch */
+	/* [V SYNC+BP] = RINT(([MIN VSYNC+BP] * hfreq_est / 1000000)) */
+	vsync_plus_bp = MIN_VSYNC_PLUS_BP * hfreq_est / 1000;
+	vsync_plus_bp = (vsync_plus_bp + 500) / 1000;
+	/*  9. Find the number of lines in V back porch alone: */
+	vback_porch = vsync_plus_bp - V_SYNC_RQD;
+	/*  10. Find the total number of lines in Vertical field period: */
+	vtotal_lines = vdisplay_rnd + top_margin + bottom_margin +
+			vsync_plus_bp + GTF_MIN_V_PORCH;
+	/*  11. Estimate the Vertical field frequency: */
+	vfieldrate_est = hfreq_est / vtotal_lines;
+	/*  12. Find the actual horizontal period: */
+	hperiod = 1000000 / (vfieldrate_rqd * vtotal_lines);
+
+	/*  13. Find the actual Vertical field frequency: */
+	vfield_rate = hfreq_est / vtotal_lines;
+	/*  14. Find the Vertical frame frequency: */
+	if (interlaced)
+		vframe_rate = vfield_rate / 2;
+	else
+		vframe_rate = vfield_rate;
+	/*  15. Find number of pixels in left margin: */
+	if (margins)
+		left_margin = (hdisplay_rnd * GTF_MARGIN_PERCENTAGE + 500) /
+				1000;
+	else
+		left_margin = 0;
+
+	/* 16.Find number of pixels in right margin: */
+	right_margin = left_margin;
+	/* 17.Find total number of active pixels in image and left and right */
+	total_active_pixels = hdisplay_rnd + left_margin + right_margin;
+	/* 18.Find the ideal blanking duty cycle from blanking duty cycle */
+	ideal_duty_cycle = GTF_C_PRIME * 1000 -
+				(GTF_M_PRIME * 1000000 / hfreq_est);
+	/* 19.Find the number of pixels in the blanking time to the nearest
+	 * double character cell: */
+	hblank = total_active_pixels * ideal_duty_cycle /
+			(100000 - ideal_duty_cycle);
+	hblank = (hblank + GTF_CELL_GRAN) / (2 * GTF_CELL_GRAN);
+	hblank = hblank * 2 * GTF_CELL_GRAN;
+	/* 20.Find total number of pixels: */
+	total_pixels = total_active_pixels + hblank;
+	/* 21.Find pixel clock frequency: */
+	pixel_freq = total_pixels * hfreq_est / 1000;
+	/* Stage 1 computations are now complete; I should really pass
+	 * the results to another function and do the Stage 2 computations,
+	 * but I only need a few more values so I'll just append the
+	 * computations here for now */
+	/* 17. Find the number of pixels in the horizontal sync period: */
+	hsync = H_SYNC_PERCENT * total_pixels / 100;
+	hsync = (hsync + GTF_CELL_GRAN / 2) / GTF_CELL_GRAN;
+	hsync = hsync * GTF_CELL_GRAN;
+	/* 18. Find the number of pixels in horizontal front porch period */
+	hfront_porch = hblank / 2 - hsync;
+	/*  36. Find the number of lines in the odd front porch period: */
+	vodd_front_porch_lines = GTF_MIN_V_PORCH ;
+
+	/* finally, pack the results in the mode struct */
+	drm_mode->hdisplay = hdisplay_rnd;
+	drm_mode->hsync_start = hdisplay_rnd + hfront_porch;
+	drm_mode->hsync_end = drm_mode->hsync_start + hsync;
+	drm_mode->htotal = total_pixels;
+	drm_mode->vdisplay = vdisplay_rnd;
+	drm_mode->vsync_start = vdisplay_rnd + vodd_front_porch_lines;
+	drm_mode->vsync_end = drm_mode->vsync_start + V_SYNC_RQD;
+	drm_mode->vtotal = vtotal_lines;
+
+	drm_mode->clock = pixel_freq;
+
+	drm_mode_set_name(drm_mode);
+	drm_mode->flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC;
+
+	if (interlaced) {
+		drm_mode->vtotal *= 2;
+		drm_mode->flags |= DRM_MODE_FLAG_INTERLACE;
+	}
+
+	return drm_mode;
+}
+EXPORT_SYMBOL(drm_gtf_mode);
 /**
  * drm_mode_set_name - set the name on a mode
  * @mode: name will be set in this mode
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 820bc0977e5e..125994d8ac0b 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -739,4 +739,7 @@ extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool reduced, bool interlaced);
+extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
+				int hdisplay, int vdisplay, int vrefresh,
+				bool interlaced, int margins);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From d1724078d6a01177c1db4ea0b75fda1ca8a73d57 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Wed, 24 Jun 2009 19:57:35 +0200
Subject: ttm: Make messages more readable.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_module.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
index 889a4c7958ae..d1d433834e4f 100644
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -33,7 +33,7 @@
 
 #include <linux/kernel.h>
 
-#define TTM_PFX "[TTM]"
+#define TTM_PFX "[TTM] "
 
 enum ttm_global_types {
 	TTM_GLOBAL_TTM_MEM = 0,
-- 
cgit v1.2.3


From ad49f501867cba87e1e45e5ebae0b12435d68bf1 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Fri, 10 Jul 2009 22:36:26 +1000
Subject: drm/ttm/radeon: add dma32 support.

This add support for using dma32 memory on gpus that really need it.

Currently IGPs are left without DMA32 but we might need to change
that unless we can fix rs690.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_device.c | 17 +++++++++++++++--
 drivers/gpu/drm/radeon/radeon_ttm.c    |  3 ++-
 drivers/gpu/drm/ttm/ttm_bo.c           |  7 ++++++-
 drivers/gpu/drm/ttm/ttm_tt.c           |  9 +++++++--
 include/drm/ttm/ttm_bo_driver.h        |  5 ++++-
 6 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index e7662ba9abfb..3060ce14071e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -624,6 +624,7 @@ struct radeon_device {
 	bool				gpu_lockup;
 	bool				shutdown;
 	bool				suspend;
+	bool				need_dma32;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 332911267ebe..27a5ac969953 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -450,6 +450,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		       uint32_t flags)
 {
 	int r, ret;
+	int dma_bits;
 
 	DRM_INFO("radeon: Initializing kernel modesetting.\n");
 	rdev->shutdown = false;
@@ -492,8 +493,20 @@ int radeon_device_init(struct radeon_device *rdev,
 		return r;
 	}
 
-	/* Report DMA addressing limitation */
-	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(32));
+	/* set DMA mask + need_dma32 flags.
+	 * PCIE - can handle 40-bits.
+	 * IGP - can handle 40-bits (in theory)
+	 * AGP - generally dma32 is safest
+	 * PCI - only dma32
+	 */
+	rdev->need_dma32 = false;
+	if (rdev->flags & RADEON_IS_AGP)
+		rdev->need_dma32 = true;
+	if (rdev->flags & RADEON_IS_PCI)
+		rdev->need_dma32 = true;
+
+	dma_bits = rdev->need_dma32 ? 32 : 40;
+	r = pci_set_dma_mask(rdev->pdev, DMA_BIT_MASK(dma_bits));
 	if (r) {
 		printk(KERN_WARNING "radeon: No suitable DMA available.\n");
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1227a97f5169..4ca9aa9203d0 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -442,7 +442,8 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.mem_global_ref.object,
-			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
+			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a753598a5e35..e55e7972c897 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -224,6 +224,9 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 	TTM_ASSERT_LOCKED(&bo->mutex);
 	bo->ttm = NULL;
 
+	if (bdev->need_dma32)
+		page_flags |= TTM_PAGE_FLAG_DMA32;
+
 	switch (bo->type) {
 	case ttm_bo_type_device:
 		if (zero_alloc)
@@ -1332,7 +1335,8 @@ EXPORT_SYMBOL(ttm_bo_device_release);
 
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
 		       struct ttm_mem_global *mem_glob,
-		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+		       struct ttm_bo_driver *driver, uint64_t file_page_offset,
+		       bool need_dma32)
 {
 	int ret = -EINVAL;
 
@@ -1369,6 +1373,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	INIT_LIST_HEAD(&bdev->swap_lru);
 	bdev->dev_mapping = NULL;
+	bdev->need_dma32 = need_dma32;
 	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
 	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
 	if (unlikely(ret != 0)) {
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 75dc8bd24592..81ab81f030a3 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -131,10 +131,15 @@ static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 
 static struct page *ttm_tt_alloc_page(unsigned page_flags)
 {
+	gfp_t gfp_flags = GFP_HIGHUSER;
+
 	if (page_flags & TTM_PAGE_FLAG_ZERO_ALLOC)
-		return alloc_page(GFP_HIGHUSER | __GFP_ZERO);
+		gfp_flags |= __GFP_ZERO;
+
+	if (page_flags & TTM_PAGE_FLAG_DMA32)
+		gfp_flags |= __GFP_DMA32;
 
-	return alloc_page(GFP_HIGHUSER);
+	return alloc_page(gfp_flags);
 }
 
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 62ed733c52a2..ea83dd23a4d7 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -121,6 +121,7 @@ struct ttm_backend {
 #define TTM_PAGE_FLAG_SWAPPED         (1 << 4)
 #define TTM_PAGE_FLAG_PERSISTANT_SWAP (1 << 5)
 #define TTM_PAGE_FLAG_ZERO_ALLOC      (1 << 6)
+#define TTM_PAGE_FLAG_DMA32           (1 << 7)
 
 enum ttm_caching_state {
 	tt_uncached,
@@ -429,6 +430,8 @@ struct ttm_bo_device {
 	 */
 
 	struct delayed_work wq;
+
+	bool need_dma32;
 };
 
 /**
@@ -648,7 +651,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_mem_global *mem_glob,
 			      struct ttm_bo_driver *driver,
-			      uint64_t file_page_offset);
+			      uint64_t file_page_offset, bool need_dma32);
 
 /**
  * ttm_bo_reserve:
-- 
cgit v1.2.3


From 3d39cecc4841e8d4c4abdb401d10180f5faaded0 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 8 Jul 2009 15:23:30 +0100
Subject: intel-iommu: Remove superfluous iova_alloc_lock from IOVA code

We only ever obtain this lock immediately before the iova_rbtree_lock,
and release it immediately after the iova_rbtree_lock. So ditch it and
just use iova_rbtree_lock.

[v2: Remove the lockdep bits this time too]
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/intel-iommu.c |  3 ---
 drivers/pci/iova.c        | 16 ++++------------
 include/linux/iova.h      |  1 -
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c5f7c73cbb55..d6a857397ec3 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -1309,7 +1309,6 @@ static void iommu_detach_domain(struct dmar_domain *domain,
 }
 
 static struct iova_domain reserved_iova_list;
-static struct lock_class_key reserved_alloc_key;
 static struct lock_class_key reserved_rbtree_key;
 
 static void dmar_init_reserved_ranges(void)
@@ -1320,8 +1319,6 @@ static void dmar_init_reserved_ranges(void)
 
 	init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
 
-	lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
-		&reserved_alloc_key);
 	lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
 		&reserved_rbtree_key);
 
diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c
index 46dd440e2315..7914951ef29a 100644
--- a/drivers/pci/iova.c
+++ b/drivers/pci/iova.c
@@ -22,7 +22,6 @@
 void
 init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit)
 {
-	spin_lock_init(&iovad->iova_alloc_lock);
 	spin_lock_init(&iovad->iova_rbtree_lock);
 	iovad->rbroot = RB_ROOT;
 	iovad->cached32_node = NULL;
@@ -205,7 +204,6 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	unsigned long limit_pfn,
 	bool size_aligned)
 {
-	unsigned long flags;
 	struct iova *new_iova;
 	int ret;
 
@@ -219,11 +217,9 @@ alloc_iova(struct iova_domain *iovad, unsigned long size,
 	if (size_aligned)
 		size = __roundup_pow_of_two(size);
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
 	ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn,
 			new_iova, size_aligned);
 
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
 	if (ret) {
 		free_iova_mem(new_iova);
 		return NULL;
@@ -381,8 +377,7 @@ reserve_iova(struct iova_domain *iovad,
 	struct iova *iova;
 	unsigned int overlap = 0;
 
-	spin_lock_irqsave(&iovad->iova_alloc_lock, flags);
-	spin_lock(&iovad->iova_rbtree_lock);
+	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
 	for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) {
 		if (__is_range_overlap(node, pfn_lo, pfn_hi)) {
 			iova = container_of(node, struct iova, node);
@@ -402,8 +397,7 @@ reserve_iova(struct iova_domain *iovad,
 	iova = __insert_new_range(iovad, pfn_lo, pfn_hi);
 finish:
 
-	spin_unlock(&iovad->iova_rbtree_lock);
-	spin_unlock_irqrestore(&iovad->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
 	return iova;
 }
 
@@ -420,8 +414,7 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 	unsigned long flags;
 	struct rb_node *node;
 
-	spin_lock_irqsave(&from->iova_alloc_lock, flags);
-	spin_lock(&from->iova_rbtree_lock);
+	spin_lock_irqsave(&from->iova_rbtree_lock, flags);
 	for (node = rb_first(&from->rbroot); node; node = rb_next(node)) {
 		struct iova *iova = container_of(node, struct iova, node);
 		struct iova *new_iova;
@@ -430,6 +423,5 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to)
 			printk(KERN_ERR "Reserve iova range %lx@%lx failed\n",
 				iova->pfn_lo, iova->pfn_lo);
 	}
-	spin_unlock(&from->iova_rbtree_lock);
-	spin_unlock_irqrestore(&from->iova_alloc_lock, flags);
+	spin_unlock_irqrestore(&from->iova_rbtree_lock, flags);
 }
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 228f6c94b69c..76a0759e88ec 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -28,7 +28,6 @@ struct iova {
 
 /* holds all the iova translations for a domain */
 struct iova_domain {
-	spinlock_t	iova_alloc_lock;/* Lock to protect iova  allocation */
 	spinlock_t	iova_rbtree_lock; /* Lock to protect update of rbtree */
 	struct rb_root	rbroot;		/* iova domain rbtree root */
 	struct rb_node	*cached32_node; /* Save last alloced node */
-- 
cgit v1.2.3


From fa64966473830219fe74952029ddb0e981a87749 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 15 Jul 2009 21:16:05 +0800
Subject: crypto: shash - Fix digest size offset

When an shash algorithm is exported as ahash, ahash will access
its digest size through hash_alg_common.  That's why the shash
layout needs to match hash_alg_common.  This wasn't the case
because the alignment weren't identical.

This patch fixes the problem.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/hash.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 3e89ce16b59c..26cb1eb16f4c 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -75,7 +75,8 @@ struct shash_alg {
 	unsigned int descsize;
 
 	/* These fields must match hash_alg_common. */
-	unsigned int digestsize;
+	unsigned int digestsize
+		__attribute__ ((aligned(__alignof__(struct hash_alg_common))));
 	unsigned int statesize;
 
 	struct crypto_alg base;
-- 
cgit v1.2.3


From a76761b621bcd8336065c4fe3a74f046858bc34c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Jul 2009 23:35:14 +0900
Subject: percpu: add dummy pcpu_lpage_remapped() for !CONFIG_SMP

!CONFIG_SMP was missing pcpu_lpage_remapped() definition causing build
failure.  Add dummy implementation.  This was discovered by linux-next
testing.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
---
 include/linux/percpu.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 8ce91af4aa19..e134c8229631 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -184,6 +184,11 @@ static inline void free_percpu(void *p)
 
 static inline void __init setup_per_cpu_areas(void) { }
 
+static inline void *pcpu_lpage_remapped(void *kaddr)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_SMP */
 
 #define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
-- 
cgit v1.2.3


From b333b3d22822cf9b295990866798e9239c9dee72 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 24 Jun 2009 01:34:48 +0000
Subject: wireless extensions: make netns aware

This makes wireless extensions netns aware. The
tasklet sending the events is converted to a work
struct so that we can rtnl_lock() in it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  3 +++
 net/wireless/wext.c         | 61 +++++++++++++++++++++------------------------
 2 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3882db1e263a..5c5136fceea8 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -79,6 +79,9 @@ struct net {
 #endif
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
+#endif
+#ifdef CONFIG_WIRELESS_EXT
+	struct sk_buff_head	wext_nlevents;
 #endif
 	struct net_generic	*gen;
 };
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index 425f7d58b961..db8351a5a87d 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -1257,48 +1257,48 @@ int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 }
 #endif
 
-/************************* EVENT PROCESSING *************************/
-/*
- * Process events generated by the wireless layer or the driver.
- * Most often, the event will be propagated through rtnetlink
- */
+static int __net_init wext_pernet_init(struct net *net)
+{
+	skb_queue_head_init(&net->wext_nlevents);
+	return 0;
+}
 
-/* ---------------------------------------------------------------- */
-/*
- * Locking...
- * ----------
- *
- * Thanks to Herbert Xu <herbert@gondor.apana.org.au> for fixing
- * the locking issue in here and implementing this code !
- *
- * The issue : wireless_send_event() is often called in interrupt context,
- * while the Netlink layer can never be called in interrupt context.
- * The fully formed RtNetlink events are queued, and then a tasklet is run
- * to feed those to Netlink.
- * The skb_queue is interrupt safe, and its lock is not held while calling
- * Netlink, so there is no possibility of dealock.
- * Jean II
- */
+static void __net_exit wext_pernet_exit(struct net *net)
+{
+	skb_queue_purge(&net->wext_nlevents);
+}
 
-static struct sk_buff_head wireless_nlevent_queue;
+static struct pernet_operations wext_pernet_ops = {
+	.init = wext_pernet_init,
+	.exit = wext_pernet_exit,
+};
 
 static int __init wireless_nlevent_init(void)
 {
-	skb_queue_head_init(&wireless_nlevent_queue);
+	return register_pernet_subsys(&wext_pernet_ops);
 	return 0;
 }
 
 subsys_initcall(wireless_nlevent_init);
 
-static void wireless_nlevent_process(unsigned long data)
+/* Process events generated by the wireless layer or the driver. */
+static void wireless_nlevent_process(struct work_struct *work)
 {
 	struct sk_buff *skb;
+	struct net *net;
+
+	rtnl_lock();
 
-	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
-		rtnl_notify(skb, &init_net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+	for_each_net(net) {
+		while ((skb = skb_dequeue(&net->wext_nlevents)))
+			rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+				    GFP_KERNEL);
+	}
+
+	rtnl_unlock();
 }
 
-static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+static DECLARE_WORK(wireless_nlevent_work, wireless_nlevent_process);
 
 /* ---------------------------------------------------------------- */
 /*
@@ -1348,9 +1348,6 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 	struct sk_buff *skb;
 	int err;
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return;
-
 	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
 	if (!skb)
 		return;
@@ -1363,8 +1360,8 @@ static void rtmsg_iwinfo(struct net_device *dev, char *event, int event_len)
 	}
 
 	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
-	skb_queue_tail(&wireless_nlevent_queue, skb);
-	tasklet_schedule(&wireless_nlevent_tasklet);
+	skb_queue_tail(&dev_net(dev)->wext_nlevents, skb);
+	schedule_work(&wireless_nlevent_work);
 }
 
 /* ---------------------------------------------------------------- */
-- 
cgit v1.2.3


From 1dacc76d0014a034b8aca14237c127d7c19d7726 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 1 Jul 2009 11:26:02 +0000
Subject: net/compat/wext: send different messages to compat tasks

Wireless extensions have the unfortunate problem that events
are multicast netlink messages, and are not independent of
pointer size. Thus, currently 32-bit tasks on 64-bit platforms
cannot properly receive events and fail with all kinds of
strange problems, for instance wpa_supplicant never notices
disassociations, due to the way the 64-bit event looks (to a
32-bit process), the fact that the address is all zeroes is
lost, it thinks instead it is 00:00:00:00:01:00.

The same problem existed with the ioctls, until David Miller
fixed those some time ago in an heroic effort.

A different problem caused by this is that we cannot send the
ASSOCREQIE/ASSOCRESPIE events because sending them causes a
32-bit wpa_supplicant on a 64-bit system to overwrite its
internal information, which is worse than it not getting the
information at all -- so we currently resort to sending a
custom string event that it then parses. This, however, has a
severe size limitation we are frequently hitting with modern
access points; this limitation would can be lifted after this
patch by sending the correct binary, not custom, event.

A similar problem apparently happens for some other netlink
users on x86_64 with 32-bit tasks due to the alignment for
64-bit quantities.

In order to fix these problems, I have implemented a way to
send compat messages to tasks. When sending an event, we send
the non-compat event data together with a compat event data in
skb_shinfo(main_skb)->frag_list. Then, when the event is read
from the socket, the netlink code makes sure to pass out only
the skb that is compatible with the task. This approach was
suggested by David Miller, my original approach required
always sending two skbs but that had various small problems.

To determine whether compat is needed or not, I have used the
MSG_CMSG_COMPAT flag, and adjusted the call path for recv and
recvfrom to include it, even if those calls do not have a cmsg
parameter.

I have not solved one small part of the problem, and I don't
think it is necessary to: if a 32-bit application uses read()
rather than any form of recvmsg() it will still get the wrong
(64-bit) event. However, neither do applications actually do
this, nor would it be a regression.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/kernel/scall64-n32.S |  2 +-
 arch/mips/kernel/scall64-o32.S |  4 +--
 arch/sparc/kernel/sys32.S      |  2 +-
 include/linux/wireless.h       |  8 +++++
 net/Kconfig                    | 20 +++++++++++
 net/compat.c                   | 17 +++++++--
 net/netlink/af_netlink.c       | 36 ++++++++++++++++++-
 net/wireless/wext.c            | 78 ++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 160 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 15874f9812cc..7c4a94f43706 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -164,7 +164,7 @@ EXPORT(sysn32_call_table)
 	PTR	sys_connect
 	PTR	sys_accept
 	PTR	sys_sendto
-	PTR	sys_recvfrom
+	PTR	compat_sys_recvfrom
 	PTR	compat_sys_sendmsg		/* 6045 */
 	PTR	compat_sys_recvmsg
 	PTR	sys_shutdown
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 781e0f1e9533..821fc978673d 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -378,8 +378,8 @@ sys_call_table:
 	PTR	sys_getsockname
 	PTR	sys_getsockopt
 	PTR	sys_listen
-	PTR	sys_recv			/* 4175 */
-	PTR	sys_recvfrom
+	PTR	compat_sys_recv			/* 4175 */
+	PTR	compat_sys_recvfrom
 	PTR	compat_sys_recvmsg
 	PTR	sys_send
 	PTR	compat_sys_sendmsg
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index f061c4dda9ef..3762f6c78944 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -121,7 +121,7 @@ SIGN2(sys32_syslog, sys_syslog, %o0, %o2)
 SIGN1(sys32_umask, sys_umask, %o0)
 SIGN3(sys32_tgkill, sys_tgkill, %o0, %o1, %o2)
 SIGN1(sys32_sendto, sys_sendto, %o0)
-SIGN1(sys32_recvfrom, sys_recvfrom, %o0)
+SIGN1(sys32_recvfrom, compat_sys_recvfrom, %o0)
 SIGN3(sys32_socket, sys_socket, %o0, %o1, %o2)
 SIGN2(sys32_connect, sys_connect, %o0, %o2)
 SIGN2(sys32_bind, sys_bind, %o0, %o2)
diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index cb24204851f7..5b4c6c772a9b 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1132,6 +1132,14 @@ struct __compat_iw_event {
 };
 #define IW_EV_COMPAT_LCP_LEN offsetof(struct __compat_iw_event, pointer)
 #define IW_EV_COMPAT_POINT_OFF offsetof(struct compat_iw_point, length)
+
+/* Size of the various events for compat */
+#define IW_EV_COMPAT_CHAR_LEN	(IW_EV_COMPAT_LCP_LEN + IFNAMSIZ)
+#define IW_EV_COMPAT_UINT_LEN	(IW_EV_COMPAT_LCP_LEN + sizeof(__u32))
+#define IW_EV_COMPAT_FREQ_LEN	(IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_freq))
+#define IW_EV_COMPAT_PARAM_LEN	(IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_param))
+#define IW_EV_COMPAT_ADDR_LEN	(IW_EV_COMPAT_LCP_LEN + sizeof(struct sockaddr))
+#define IW_EV_COMPAT_QUAL_LEN	(IW_EV_COMPAT_LCP_LEN + sizeof(struct iw_quality))
 #define IW_EV_COMPAT_POINT_LEN	\
 	(IW_EV_COMPAT_LCP_LEN + sizeof(struct compat_iw_point) - \
 	 IW_EV_COMPAT_POINT_OFF)
diff --git a/net/Kconfig b/net/Kconfig
index 7051b9710675..041c35edb763 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -23,6 +23,26 @@ menuconfig NET
 
 if NET
 
+config WANT_COMPAT_NETLINK_MESSAGES
+	bool
+	help
+	  This option can be selected by other options that need compat
+	  netlink messages.
+
+config COMPAT_NETLINK_MESSAGES
+	def_bool y
+	depends on COMPAT
+	depends on WIRELESS_EXT || WANT_COMPAT_NETLINK_MESSAGES
+	help
+	  This option makes it possible to send different netlink messages
+	  to tasks depending on whether the task is a compat task or not. To
+	  achieve this, you need to set skb_shinfo(skb)->frag_list to the
+	  compat skb before sending the skb, the netlink code will sort out
+	  which message to actually pass to the task.
+
+	  Newly written code should NEVER need this option but do
+	  compat-independent messages instead!
+
 menu "Networking options"
 
 source "net/packet/Kconfig"
diff --git a/net/compat.c b/net/compat.c
index 8d739053afe4..12728b17a226 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -743,6 +743,18 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns
 	return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
+asmlinkage long compat_sys_recv(int fd, void __user *buf, size_t len, unsigned flags)
+{
+	return sys_recv(fd, buf, len, flags | MSG_CMSG_COMPAT);
+}
+
+asmlinkage long compat_sys_recvfrom(int fd, void __user *buf, size_t len,
+				    unsigned flags, struct sockaddr __user *addr,
+				    int __user *addrlen)
+{
+	return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
+}
+
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 {
 	int ret;
@@ -788,10 +800,11 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
 		ret = sys_sendto(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), a[5]);
 		break;
 	case SYS_RECV:
-		ret = sys_recv(a0, compat_ptr(a1), a[2], a[3]);
+		ret = compat_sys_recv(a0, compat_ptr(a1), a[2], a[3]);
 		break;
 	case SYS_RECVFROM:
-		ret = sys_recvfrom(a0, compat_ptr(a1), a[2], a[3], compat_ptr(a[4]), compat_ptr(a[5]));
+		ret = compat_sys_recvfrom(a0, compat_ptr(a1), a[2], a[3],
+					  compat_ptr(a[4]), compat_ptr(a[5]));
 		break;
 	case SYS_SHUTDOWN:
 		ret = sys_shutdown(a0,a1);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d46da6cb92e4..da3163d15ef0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1361,7 +1361,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	struct netlink_sock *nlk = nlk_sk(sk);
 	int noblock = flags&MSG_DONTWAIT;
 	size_t copied;
-	struct sk_buff *skb;
+	struct sk_buff *skb, *frag __maybe_unused = NULL;
 	int err;
 
 	if (flags&MSG_OOB)
@@ -1373,6 +1373,35 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+	if (unlikely(skb_shinfo(skb)->frag_list)) {
+		bool need_compat = !!(flags & MSG_CMSG_COMPAT);
+
+		/*
+		 * If this skb has a frag_list, then here that means that
+		 * we will have to use the frag_list skb for compat tasks
+		 * and the regular skb for non-compat tasks.
+		 *
+		 * The skb might (and likely will) be cloned, so we can't
+		 * just reset frag_list and go on with things -- we need to
+		 * keep that. For the compat case that's easy -- simply get
+		 * a reference to the compat skb and free the regular one
+		 * including the frag. For the non-compat case, we need to
+		 * avoid sending the frag to the user -- so assign NULL but
+		 * restore it below before freeing the skb.
+		 */
+		if (need_compat) {
+			struct sk_buff *compskb = skb_shinfo(skb)->frag_list;
+			skb_get(compskb);
+			kfree_skb(skb);
+			skb = compskb;
+		} else {
+			frag = skb_shinfo(skb)->frag_list;
+			skb_shinfo(skb)->frag_list = NULL;
+		}
+	}
+#endif
+
 	msg->msg_namelen = 0;
 
 	copied = skb->len;
@@ -1403,6 +1432,11 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	siocb->scm->creds = *NETLINK_CREDS(skb);
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
+
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+	skb_shinfo(skb)->frag_list = frag;
+#endif
+
 	skb_free_datagram(sk, skb);
 
 	if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index ee35e6422f1f..3fe3c2c0ce11 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -417,6 +417,21 @@ static const int event_type_size[] = {
 	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
 };
 
+#ifdef CONFIG_COMPAT
+static const int compat_event_type_size[] = {
+	IW_EV_COMPAT_LCP_LEN,		/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_COMPAT_CHAR_LEN,		/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_COMPAT_UINT_LEN,		/* IW_HEADER_TYPE_UINT */
+	IW_EV_COMPAT_FREQ_LEN,		/* IW_HEADER_TYPE_FREQ */
+	IW_EV_COMPAT_ADDR_LEN,		/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_COMPAT_POINT_LEN,		/* Without variable payload */
+	IW_EV_COMPAT_PARAM_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_COMPAT_QUAL_LEN,		/* IW_HEADER_TYPE_QUAL */
+};
+#endif
 
 /************************ COMMON SUBROUTINES ************************/
 /*
@@ -1348,6 +1363,22 @@ void wireless_send_event(struct net_device *	dev,
 	struct sk_buff *skb;
 	struct nlmsghdr *nlh;
 	struct nlattr *nla;
+#ifdef CONFIG_COMPAT
+	struct __compat_iw_event *compat_event;
+	struct compat_iw_point compat_wrqu;
+	struct sk_buff *compskb;
+#endif
+
+	/*
+	 * Nothing in the kernel sends scan events with data, be safe.
+	 * This is necessary because we cannot fix up scan event data
+	 * for compat, due to being contained in 'extra', but normally
+	 * applications are required to retrieve the scan data anyway
+	 * and no data is included in the event, this codifies that
+	 * practice.
+	 */
+	if (WARN_ON(cmd == SIOCGIWSCAN && extra))
+		extra = NULL;
 
 	/* Get the description of the Event */
 	if (cmd <= SIOCIWLAST) {
@@ -1446,7 +1477,54 @@ void wireless_send_event(struct net_device *	dev,
 		memcpy(((char *) event) + hdr_len, extra, extra_len);
 
 	nlmsg_end(skb, nlh);
+#ifdef CONFIG_COMPAT
+	hdr_len = compat_event_type_size[descr->header_type];
+	event_len = hdr_len + extra_len;
 
+	compskb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!compskb) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Send via the RtNetlink event channel */
+	nlh = rtnetlink_ifinfo_prep(dev, compskb);
+	if (WARN_ON(!nlh)) {
+		kfree_skb(skb);
+		kfree_skb(compskb);
+		return;
+	}
+
+	/* Add the wireless events in the netlink packet */
+	nla = nla_reserve(compskb, IFLA_WIRELESS, event_len);
+	if (!nla) {
+		kfree_skb(skb);
+		kfree_skb(compskb);
+		return;
+	}
+	compat_event = nla_data(nla);
+
+	compat_event->len = event_len;
+	compat_event->cmd = cmd;
+	if (descr->header_type == IW_HEADER_TYPE_POINT) {
+		compat_wrqu.length = wrqu->data.length;
+		compat_wrqu.flags = wrqu->data.flags;
+		memcpy(&compat_event->pointer,
+			((char *) &compat_wrqu) + IW_EV_COMPAT_POINT_OFF,
+			hdr_len - IW_EV_COMPAT_LCP_LEN);
+		if (extra_len)
+			memcpy(((char *) compat_event) + hdr_len,
+				extra, extra_len);
+	} else {
+		/* extra_len must be zero, so no if (extra) needed */
+		memcpy(&compat_event->pointer, wrqu,
+			hdr_len - IW_EV_COMPAT_LCP_LEN);
+	}
+
+	nlmsg_end(compskb, nlh);
+
+	skb_shinfo(skb)->frag_list = compskb;
+#endif
 	skb_queue_tail(&dev_net(dev)->wext_nlevents, skb);
 	schedule_work(&wireless_nlevent_work);
 }
-- 
cgit v1.2.3


From 43237b5490e8f2f4679decd660064ff35ce490cc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 20 May 2009 18:41:58 +0200
Subject: ext3: Get rid of extenddisksize parameter of ext3_get_blocks_handle()

Get rid of extenddisksize parameter of ext3_get_blocks_handle(). This seems to
be a relict from some old days and setting disksize in this function does not
make much sence. Currently it was set only by ext3_getblk().  Since the
parameter has some effect only if create == 1, it is easy to check that the
three callers which end up calling ext3_getblk() with create == 1 (ext3_append,
ext3_quota_write, ext3_mkdir) do the right thing and set disksize themselves.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/dir.c           |  3 +--
 fs/ext3/inode.c         | 13 +++----------
 include/linux/ext3_fs.h |  2 +-
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 3d724a95882f..373fa90c796a 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -130,8 +130,7 @@ static int ext3_readdir(struct file * filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
-						&map_bh, 0, 0);
+		err = ext3_get_blocks_handle(NULL, inode, blk, 1, &map_bh, 0);
 		if (err > 0) {
 			pgoff_t index = map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 4d7da6f6184b..b49908a167ae 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -788,7 +788,7 @@ err_out:
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 		sector_t iblock, unsigned long maxblocks,
 		struct buffer_head *bh_result,
-		int create, int extend_disksize)
+		int create)
 {
 	int err = -EIO;
 	int offsets[4];
@@ -911,13 +911,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	if (!err)
 		err = ext3_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
-	/*
-	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
-	 * protect it if you're about to implement concurrent
-	 * ext3_get_block() -bzzz
-	*/
-	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
-		ei->i_disksize = inode->i_size;
 	mutex_unlock(&ei->truncate_mutex);
 	if (err)
 		goto cleanup;
@@ -972,7 +965,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 	}
 
 	ret = ext3_get_blocks_handle(handle, inode, iblock,
-					max_blocks, bh_result, create, 0);
+					max_blocks, bh_result, create);
 	if (ret > 0) {
 		bh_result->b_size = (ret << inode->i_blkbits);
 		ret = 0;
@@ -1005,7 +998,7 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
 	err = ext3_get_blocks_handle(handle, inode, block, 1,
-					&dummy, create, 1);
+					&dummy, create);
 	/*
 	 * ext3_get_blocks_handle() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h
index 634a5e5aba3e..7499b3667798 100644
--- a/include/linux/ext3_fs.h
+++ b/include/linux/ext3_fs.h
@@ -874,7 +874,7 @@ struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
 struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
 int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
-	int create, int extend_disksize);
+	int create);
 
 extern struct inode *ext3_iget(struct super_block *, unsigned long);
 extern int  ext3_write_inode (struct inode *, int);
-- 
cgit v1.2.3


From 5c9228f0cfb09a098a8a380116b42ae099e967b6 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 16 Jul 2009 16:06:09 +0100
Subject: vt: drop bootmem/slab memory distinction

Bootmem is not used for the vt screen buffer anymore as slab is now
available at the time the console is initialized.

Get rid of the now superfluous distinction between slab and bootmem,
it's always slab.

This also fixes a kmalloc leak which Catalin described thusly:

Commit a5f4f52e ("vt: use kzalloc() instead of the bootmem allocator")
replaced the alloc_bootmem() with kzalloc() but didn't set vc_kmalloced to
1 and the memory block is later leaked.  The corresponding kmemleak trace:

unreferenced object 0xdf828000 (size 8192):
  comm "swapper", pid 0, jiffies 4294937296
  backtrace:
    [<c006d473>] __save_stack_trace+0x17/0x1c
    [<c000d869>] log_early+0x55/0x84
    [<c01cfa4b>] kmemleak_alloc+0x33/0x3c
    [<c006c013>] __kmalloc+0xd7/0xe4
    [<c00108c7>] con_init+0xbf/0x1b8
    [<c0010149>] console_init+0x11/0x20
    [<c0008797>] start_kernel+0x137/0x1e4

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt.c              | 12 +++---------
 include/linux/console_struct.h |  1 -
 2 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 7947bd1b4cf7..404f4c1ee431 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -770,14 +770,12 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	    visual_init(vc, currcons, 1);
 	    if (!*vc->vc_uni_pagedir_loc)
 		con_set_default_unimap(vc);
-	    if (!vc->vc_kmalloced)
-		vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
+	    vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
 	    if (!vc->vc_screenbuf) {
 		kfree(vc);
 		vc_cons[currcons].d = NULL;
 		return -ENOMEM;
 	    }
-	    vc->vc_kmalloced = 1;
 	    vc_init(vc, vc->vc_rows, vc->vc_cols, 1);
 	    vcs_make_sysfs(currcons);
 	    atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param);
@@ -913,10 +911,8 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 	if (new_scr_end > new_origin)
 		scr_memsetw((void *)new_origin, vc->vc_video_erase_char,
 			    new_scr_end - new_origin);
-	if (vc->vc_kmalloced)
-		kfree(vc->vc_screenbuf);
+	kfree(vc->vc_screenbuf);
 	vc->vc_screenbuf = newscreen;
-	vc->vc_kmalloced = 1;
 	vc->vc_screenbuf_size = new_screen_size;
 	set_origin(vc);
 
@@ -995,8 +991,7 @@ void vc_deallocate(unsigned int currcons)
 		vc->vc_sw->con_deinit(vc);
 		put_pid(vc->vt_pid);
 		module_put(vc->vc_sw->owner);
-		if (vc->vc_kmalloced)
-			kfree(vc->vc_screenbuf);
+		kfree(vc->vc_screenbuf);
 		if (currcons >= MIN_NR_CONSOLES)
 			kfree(vc);
 		vc_cons[currcons].d = NULL;
@@ -2881,7 +2876,6 @@ static int __init con_init(void)
 		INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK);
 		visual_init(vc, currcons, 1);
 		vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT);
-		vc->vc_kmalloced = 0;
 		vc_init(vc, vc->vc_rows, vc->vc_cols,
 			currcons || !vc->vc_sw->con_save_screen);
 	}
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index d71f7c0f931b..38fe59dc89ae 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -89,7 +89,6 @@ struct vc_data {
 	unsigned int	vc_need_wrap	: 1;
 	unsigned int	vc_can_do_color	: 1;
 	unsigned int	vc_report_mouse : 2;
-	unsigned int	vc_kmalloced	: 1;
 	unsigned char	vc_utf		: 1;	/* Unicode UTF-8 encoding */
 	unsigned char	vc_utf_count;
 		 int	vc_utf_char;
-- 
cgit v1.2.3


From 5bb459bb45d1ad3c177485dcf0af01580aa31125 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 10 Jul 2009 03:48:23 +0200
Subject: kernel: rename is_single_threaded(task) to
 current_is_single_threaded(void)

- is_single_threaded(task) is not safe unless task == current,
  we can't use task->signal or task->mm.

- it doesn't make sense unless task == current, the task can
  fork right after the check.

Rename it to current_is_single_threaded() and kill the argument.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/sched.h        | 2 +-
 lib/is_single_threaded.c     | 3 ++-
 security/keys/process_keys.c | 2 +-
 security/selinux/hooks.c     | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 16a982e389fb..0839a2c9b952 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2075,7 +2075,7 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 #define for_each_process(p) \
 	for (p = &init_task ; (p = next_task(p)) != &init_task ; )
 
-extern bool is_single_threaded(struct task_struct *);
+extern bool current_is_single_threaded(void);
 
 /*
  * Careful: do_each_thread/while_each_thread is a double loop so
diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c
index 2762516e0a5e..434010980bdf 100644
--- a/lib/is_single_threaded.c
+++ b/lib/is_single_threaded.c
@@ -15,8 +15,9 @@
 /*
  * Returns true if the task does not share ->mm with another thread/process.
  */
-bool is_single_threaded(struct task_struct *task)
+bool current_is_single_threaded(void)
 {
+	struct task_struct *task = current;
 	struct mm_struct *mm = task->mm;
 	struct task_struct *p, *t;
 	bool ret;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 276d27882ce8..ed929af466d3 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -702,7 +702,7 @@ long join_session_keyring(const char *name)
 	/* only permit this if there's a single thread in the thread group -
 	 * this avoids us having to adjust the creds on all threads and risking
 	 * ENOMEM */
-	if (!is_single_threaded(current))
+	if (!current_is_single_threaded())
 		return -EMLINK;
 
 	new = prepare_creds();
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2081055f6783..e65677da36bd 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5187,7 +5187,7 @@ static int selinux_setprocattr(struct task_struct *p,
 
 		/* Only allow single threaded processes to change context */
 		error = -EPERM;
-		if (!is_single_threaded(p)) {
+		if (!current_is_single_threaded()) {
 			error = security_bounded_transition(tsec->sid, sid);
 			if (error)
 				goto abort_change;
-- 
cgit v1.2.3


From 4dc6dc7162c08b9965163c9ab3f9375d4adff2c7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 15 Jul 2009 23:13:10 +0000
Subject: net: sock_copy() fixes

Commit e912b1142be8f1e2c71c71001dc992c6e5eb2ec1
(net: sk_prot_alloc() should not blindly overwrite memory)
took care of not zeroing whole new socket at allocation time.

sock_copy() is another spot where we should be very careful.
We should not set refcnt to a non null value, until
we are sure other fields are correctly setup, or
a lockless reader could catch this socket by mistake,
while not fully (re)initialized.

This patch puts sk_node & sk_refcnt to the very beginning
of struct sock to ease sock_copy() & sk_prot_alloc() job.

We add appropriate smp_wmb() before sk_refcnt initializations
to match our RCU requirements (changes to sock keys should
be committed to memory before sk_refcnt setting)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 32 +++++++++++++++++++-------------
 net/core/sock.c    | 20 ++++++++++++++++++--
 2 files changed, 37 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2c0da9239b95..950409dcec3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -104,15 +104,15 @@ struct net;
 
 /**
  *	struct sock_common - minimal network layer representation of sockets
+ *	@skc_node: main hash linkage for various protocol lookup tables
+ *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
+ *	@skc_refcnt: reference count
+ *	@skc_hash: hash value used with various protocol lookup tables
  *	@skc_family: network address family
  *	@skc_state: Connection state
  *	@skc_reuse: %SO_REUSEADDR setting
  *	@skc_bound_dev_if: bound device index if != 0
- *	@skc_node: main hash linkage for various protocol lookup tables
- *	@skc_nulls_node: main hash linkage for UDP/UDP-Lite protocol
  *	@skc_bind_node: bind hash linkage for various protocol lookup tables
- *	@skc_refcnt: reference count
- *	@skc_hash: hash value used with various protocol lookup tables
  *	@skc_prot: protocol handlers inside a network family
  *	@skc_net: reference to the network namespace of this socket
  *
@@ -120,17 +120,21 @@ struct net;
  *	for struct sock and struct inet_timewait_sock.
  */
 struct sock_common {
-	unsigned short		skc_family;
-	volatile unsigned char	skc_state;
-	unsigned char		skc_reuse;
-	int			skc_bound_dev_if;
+	/*
+	 * first fields are not copied in sock_copy()
+	 */
 	union {
 		struct hlist_node	skc_node;
 		struct hlist_nulls_node skc_nulls_node;
 	};
-	struct hlist_node	skc_bind_node;
 	atomic_t		skc_refcnt;
+
 	unsigned int		skc_hash;
+	unsigned short		skc_family;
+	volatile unsigned char	skc_state;
+	unsigned char		skc_reuse;
+	int			skc_bound_dev_if;
+	struct hlist_node	skc_bind_node;
 	struct proto		*skc_prot;
 #ifdef CONFIG_NET_NS
 	struct net	 	*skc_net;
@@ -208,15 +212,17 @@ struct sock {
 	 * don't add nothing before this first member (__sk_common) --acme
 	 */
 	struct sock_common	__sk_common;
+#define sk_node			__sk_common.skc_node
+#define sk_nulls_node		__sk_common.skc_nulls_node
+#define sk_refcnt		__sk_common.skc_refcnt
+
+#define sk_copy_start		__sk_common.skc_hash
+#define sk_hash			__sk_common.skc_hash
 #define sk_family		__sk_common.skc_family
 #define sk_state		__sk_common.skc_state
 #define sk_reuse		__sk_common.skc_reuse
 #define sk_bound_dev_if		__sk_common.skc_bound_dev_if
-#define sk_node			__sk_common.skc_node
-#define sk_nulls_node		__sk_common.skc_nulls_node
 #define sk_bind_node		__sk_common.skc_bind_node
-#define sk_refcnt		__sk_common.skc_refcnt
-#define sk_hash			__sk_common.skc_hash
 #define sk_prot			__sk_common.skc_prot
 #define sk_net			__sk_common.skc_net
 	kmemcheck_bitfield_begin(flags);
diff --git a/net/core/sock.c b/net/core/sock.c
index ba5d2116aea1..d9eec153d531 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -919,13 +919,19 @@ static inline void sock_lock_init(struct sock *sk)
 			af_family_keys + sk->sk_family);
 }
 
+/*
+ * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
+ * even temporarly, because of RCU lookups. sk_node should also be left as is.
+ */
 static void sock_copy(struct sock *nsk, const struct sock *osk)
 {
 #ifdef CONFIG_SECURITY_NETWORK
 	void *sptr = nsk->sk_security;
 #endif
-
-	memcpy(nsk, osk, osk->sk_prot->obj_size);
+	BUILD_BUG_ON(offsetof(struct sock, sk_copy_start) !=
+		     sizeof(osk->sk_node) + sizeof(osk->sk_refcnt));
+	memcpy(&nsk->sk_copy_start, &osk->sk_copy_start,
+	       osk->sk_prot->obj_size - offsetof(struct sock, sk_copy_start));
 #ifdef CONFIG_SECURITY_NETWORK
 	nsk->sk_security = sptr;
 	security_sk_clone(osk, nsk);
@@ -1140,6 +1146,11 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 
 		newsk->sk_err	   = 0;
 		newsk->sk_priority = 0;
+		/*
+		 * Before updating sk_refcnt, we must commit prior changes to memory
+		 * (Documentation/RCU/rculist_nulls.txt for details)
+		 */
+		smp_wmb();
 		atomic_set(&newsk->sk_refcnt, 2);
 
 		/*
@@ -1855,6 +1866,11 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	sk->sk_stamp = ktime_set(-1L, 0);
 
+	/*
+	 * Before updating sk_refcnt, we must commit prior changes to memory
+	 * (Documentation/RCU/rculist_nulls.txt for details)
+	 */
+	smp_wmb();
 	atomic_set(&sk->sk_refcnt, 1);
 	atomic_set(&sk->sk_wmem_alloc, 1);
 	atomic_set(&sk->sk_drops, 0);
-- 
cgit v1.2.3


From 5780888bcac316508eb5f4dd23bbea8b5057647c Mon Sep 17 00:00:00 2001
From: Matias Zabaljauregui <zabaljauregui@gmail.com>
Date: Thu, 18 Jun 2009 11:44:06 -0300
Subject: lguest: fix journey

fix: "make Guest" was complaining about duplicated G:032

Signed-off-by: Matias Zabaljauregui <zabaljauregui@gmail.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/include/asm/lguest_hcall.h | 2 +-
 arch/x86/lguest/boot.c              | 2 +-
 include/linux/lguest.h              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index d31c4a684078..33600a66755f 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -30,7 +30,7 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_para.h>
 
-/*G:031 But first, how does our Guest contact the Host to ask for privileged
+/*G:030 But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 7bc65f0f62c4..0188fd37b6c0 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1079,7 +1079,7 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 	return insn_len;
 }
 
-/*G:030 Once we get to lguest_init(), we know we're a Guest.  The various
+/*G:029 Once we get to lguest_init(), we know we're a Guest.  The various
  * pv_ops structures in the kernel provide points for (almost) every routine we
  * have to override to avoid privileged instructions. */
 __init void lguest_init(void)
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 7bc1440fc473..dbf2479e808e 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -11,7 +11,7 @@
 #define LG_CLOCK_MIN_DELTA	100UL
 #define LG_CLOCK_MAX_DELTA	ULONG_MAX
 
-/*G:032 The second method of communicating with the Host is to via "struct
+/*G:031 The second method of communicating with the Host is to via "struct
  * lguest_data".  Once the Guest's initialization hypercall tells the Host where
  * this is, the Guest and Host both publish information in it. :*/
 struct lguest_data
-- 
cgit v1.2.3


From e79f07e2925b10f09a2621650c16f3d6ea778747 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@hp.com>
Date: Tue, 7 Jul 2009 08:47:10 -0600
Subject: virtio_net: Sync header with qemu

Qemu added support for a few extra RX modes that Linux doesn't
currently make use of.  Sync the headers to maintain consistency.

Signed-off-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_net.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index cec79adbe3ea..9c543d6ac535 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -27,6 +27,7 @@
 #define VIRTIO_NET_F_CTRL_VQ	17	/* Control channel available */
 #define VIRTIO_NET_F_CTRL_RX	18	/* Control channel RX mode support */
 #define VIRTIO_NET_F_CTRL_VLAN	19	/* Control channel VLAN filtering */
+#define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support */
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
@@ -81,14 +82,19 @@ typedef __u8 virtio_net_ctrl_ack;
 #define VIRTIO_NET_ERR    1
 
 /*
- * Control the RX mode, ie. promisucous and allmulti.  PROMISC and
- * ALLMULTI commands require an "out" sg entry containing a 1 byte
- * state value, zero = disable, non-zero = enable.  These commands
- * are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ * Control the RX mode, ie. promisucous, allmulti, etc...
+ * All commands require an "out" sg entry containing a 1 byte
+ * state value, zero = disable, non-zero = enable.  Commands
+ * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature.
+ * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA.
  */
 #define VIRTIO_NET_CTRL_RX    0
  #define VIRTIO_NET_CTRL_RX_PROMISC      0
  #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
+ #define VIRTIO_NET_CTRL_RX_ALLUNI       2
+ #define VIRTIO_NET_CTRL_RX_NOMULTI      3
+ #define VIRTIO_NET_CTRL_RX_NOUNI        4
+ #define VIRTIO_NET_CTRL_RX_NOBCAST      5
 
 /*
  * Control the MAC filter table.
-- 
cgit v1.2.3


From e36aa25a533962b08402530e8443ac804a454e27 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Tue, 14 Jul 2009 14:21:04 +0000
Subject: tun: Allow tap device to send/receive UFO packets.

- Allow setting UFO on tap device and handle UFO packets.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>

---------------------------------------------------------
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 13 ++++++++++++-
 include/linux/if_tun.h |  1 +
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dfc1054e4cbd..a998b6a9c245 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -641,6 +641,9 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 		case VIRTIO_NET_HDR_GSO_TCPV6:
 			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
 			break;
+		case VIRTIO_NET_HDR_GSO_UDP:
+			skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+			break;
 		default:
 			tun->dev->stats.rx_frame_errors++;
 			kfree_skb(skb);
@@ -726,6 +729,8 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
 			else if (sinfo->gso_type & SKB_GSO_TCPV6)
 				gso.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				gso.gso_type = VIRTIO_NET_HDR_GSO_UDP;
 			else
 				BUG();
 			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
@@ -1073,7 +1078,8 @@ static int set_offload(struct net_device *dev, unsigned long arg)
 	old_features = dev->features;
 	/* Unset features, set them as we chew on the arg. */
 	features = (old_features & ~(NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST
-				    |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6));
+				    |NETIF_F_TSO_ECN|NETIF_F_TSO|NETIF_F_TSO6
+				    |NETIF_F_UFO));
 
 	if (arg & TUN_F_CSUM) {
 		features |= NETIF_F_HW_CSUM|NETIF_F_SG|NETIF_F_FRAGLIST;
@@ -1090,6 +1096,11 @@ static int set_offload(struct net_device *dev, unsigned long arg)
 				features |= NETIF_F_TSO6;
 			arg &= ~(TUN_F_TSO4|TUN_F_TSO6);
 		}
+
+		if (arg & TUN_F_UFO) {
+			features |= NETIF_F_UFO;
+			arg &= ~TUN_F_UFO;
+		}
 	}
 
 	/* This gives the user a way to test for new features in future by
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 915ba5789f0e..3f5fd523b49d 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -62,6 +62,7 @@
 #define TUN_F_TSO4	0x02	/* I can handle TSO for IPv4 packets */
 #define TUN_F_TSO6	0x04	/* I can handle TSO for IPv6 packets */
 #define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
+#define TUN_F_UFO	0x10	/* I can handle UFO packets */
 
 /* Protocol info prepended to the packets (when IFF_NO_PI is not set) */
 #define TUN_PKT_STRIP	0x0001
-- 
cgit v1.2.3


From 0741241c6b80bfd58417e95de984d60c9e9ef2a0 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Fri, 17 Jul 2009 10:13:21 -0700
Subject: connector: make callback argument type explicit

The connector documentation states that the argument to the callback
function is always a pointer to a struct cn_msg, but rather than encode it
in the API itself, it uses a void pointer everywhere.  This doesn't make
much sense to encode the pointer in documentation as it prevents proper C
type checking from occurring and can easily allow people to use the wrong
pointer type.  So convert the argument type to an explicit struct cn_msg
pointer.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/connector/cn_test.c | 4 +---
 drivers/connector/cn_proc.c       | 3 +--
 drivers/connector/cn_queue.c      | 7 +++++--
 drivers/connector/connector.c     | 6 +++---
 drivers/staging/dst/dcore.c       | 3 +--
 drivers/video/uvesafb.c           | 3 +--
 drivers/w1/w1_netlink.c           | 3 +--
 include/linux/connector.h         | 6 +++---
 8 files changed, 16 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index f688eba87704..50d5ce4899c8 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -32,10 +32,8 @@ static char cn_test_name[] = "cn_test";
 static struct sock *nls;
 static struct timer_list cn_test_timer;
 
-void cn_test_callback(void *data)
+void cn_test_callback(struct cn_msg *msg)
 {
-	struct cn_msg *msg = (struct cn_msg *)data;
-
 	printk("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
 	       __func__, jiffies, msg->id.idx, msg->id.val,
 	       msg->seq, msg->ack, msg->len, (char *)msg->data);
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index c5afc98e2675..85e5dc0431fe 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -202,9 +202,8 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
  * cn_proc_mcast_ctl
  * @data: message sent from userspace via the connector
  */
-static void cn_proc_mcast_ctl(void *data)
+static void cn_proc_mcast_ctl(struct cn_msg *msg)
 {
-	struct cn_msg *msg = data;
 	enum proc_cn_mcast_op *mc_op = NULL;
 	int err = 0;
 
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index c769ef269fb5..d478aefcd3be 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -87,7 +87,9 @@ void cn_queue_wrapper(struct work_struct *work)
 	kfree(d->free);
 }
 
-static struct cn_callback_entry *cn_queue_alloc_callback_entry(char *name, struct cb_id *id, void (*callback)(void *))
+static struct cn_callback_entry *
+cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
+			      void (*callback)(struct cn_msg *))
 {
 	struct cn_callback_entry *cbq;
 
@@ -120,7 +122,8 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
 	return ((i1->idx == i2->idx) && (i1->val == i2->val));
 }
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *))
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id,
+			  void (*callback)(struct cn_msg *))
 {
 	struct cn_callback_entry *cbq, *__cbq;
 	int found = 0;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fd336c5a9057..3f45669f5d76 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -269,7 +269,8 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
  *
  * May sleep.
  */
-int cn_add_callback(struct cb_id *id, char *name, void (*callback)(void *))
+int cn_add_callback(struct cb_id *id, char *name,
+		    void (*callback)(struct cn_msg *))
 {
 	int err;
 	struct cn_dev *dev = &cdev;
@@ -351,9 +352,8 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
  *
  * Used for notification of a request's processing.
  */
-static void cn_callback(void *data)
+static void cn_callback(struct cn_msg *msg)
 {
-	struct cn_msg *msg = data;
 	struct cn_ctl_msg *ctl;
 	struct cn_ctl_entry *ent;
 	u32 size;
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index fad25b753042..84724187ec3e 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -846,10 +846,9 @@ static dst_command_func dst_commands[] = {
 /*
  * Configuration parser.
  */
-static void cn_dst_callback(void *data)
+static void cn_dst_callback(struct cn_msg *msg)
 {
 	struct dst_ctl *ctl;
-	struct cn_msg *msg = data;
 	int err;
 	struct dst_ctl_ack ack;
 	struct dst_node *n = NULL, *tmp;
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index ca5b4643a401..e98baf6916b8 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -67,9 +67,8 @@ static DEFINE_MUTEX(uvfb_lock);
  * find the kernel part of the task struct, copy the registers and
  * the buffer contents and then complete the task.
  */
-static void uvesafb_cn_callback(void *data)
+static void uvesafb_cn_callback(struct cn_msg *msg)
 {
-	struct cn_msg *msg = data;
 	struct uvesafb_task *utask;
 	struct uvesafb_ktask *task;
 
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index fdf72851c574..52ccb3d3a963 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -306,9 +306,8 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
 	return error;
 }
 
-static void w1_cn_callback(void *data)
+static void w1_cn_callback(struct cn_msg *msg)
 {
-	struct cn_msg *msg = data;
 	struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1);
 	struct w1_netlink_cmd *cmd;
 	struct w1_slave *sl;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index b68d27850d51..47ebf416f512 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -136,7 +136,7 @@ struct cn_callback_data {
 	void *ddata;
 	
 	void *callback_priv;
-	void (*callback) (void *);
+	void (*callback) (struct cn_msg *);
 
 	void *free;
 };
@@ -167,11 +167,11 @@ struct cn_dev {
 	struct cn_queue_dev *cbdev;
 };
 
-int cn_add_callback(struct cb_id *, char *, void (*callback) (void *));
+int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(void *));
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
 int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
-- 
cgit v1.2.3


From 04e448d9a386640a79a4aa71251aa1cdd314f662 Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Sun, 12 Jul 2009 18:23:33 -0400
Subject: vmlinux.lds.h: restructure BSS linker script macros

The BSS section macros in vmlinux.lds.h currently place the .sbss
input section outside the bounds of [__bss_start, __bss_end].  On all
architectures except for microblaze that handle both .sbss and
__bss_start/__bss_end, this is wrong: the .sbss input section is
within the range [__bss_start, __bss_end].  Relatedly, the example
code at the top of the file actually has __bss_start/__bss_end defined
twice; I believe the right fix here is to define them in the
BSS_SECTION macro but not in the BSS macro.

Another problem with the current macros is that several
architectures have an ALIGN(4) or some other small number just before
__bss_stop in their linker scripts.  The BSS_SECTION macro currently
hardcodes this to 4; while it should really be an argument.  It also
ignores its sbss_align argument; fix that.

mn10300 is the only user at present of any of the macros touched by
this patch.  It looks like mn10300 actually was incorrectly converted
to use the new BSS() macro (the alignment of 4 prior to conversion was
a __bss_stop alignment, but the argument to the BSS macro is a start
alignment).  So fix this as well.

I'd like acks from Sam and David on this one.  Also CCing Paul, since
he has a patch from me which will need to be updated to use
BSS_SECTION(0, PAGE_SIZE, 4) once this gets merged.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 arch/mn10300/kernel/vmlinux.lds.S |  2 +-
 include/asm-generic/vmlinux.lds.h | 19 +++++++++----------
 2 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S
index c96ba3da95ac..f4aa07934654 100644
--- a/arch/mn10300/kernel/vmlinux.lds.S
+++ b/arch/mn10300/kernel/vmlinux.lds.S
@@ -107,7 +107,7 @@ SECTIONS
   __init_end = .;
   /* freed after init ends here */
 
-  BSS(4)
+  BSS_SECTION(0, PAGE_SIZE, 4)
 
   _end = . ;
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a553f1041cf1..6ad76bf5fb40 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -30,9 +30,7 @@
  *	EXCEPTION_TABLE(...)
  *	NOTES
  *
- *	__bss_start = .;
- *	BSS_SECTION(0, 0)
- *	__bss_stop = .;
+ *	BSS_SECTION(0, 0, 0)
  *	_end = .;
  *
  *	/DISCARD/ : {
@@ -489,7 +487,8 @@
  * bss (Block Started by Symbol) - uninitialized data
  * zeroed during startup
  */
-#define SBSS								\
+#define SBSS(sbss_align)						\
+	. = ALIGN(sbss_align);						\
 	.sbss : AT(ADDR(.sbss) - LOAD_OFFSET) {				\
 		*(.sbss)						\
 		*(.scommon)						\
@@ -498,12 +497,10 @@
 #define BSS(bss_align)							\
 	. = ALIGN(bss_align);						\
 	.bss : AT(ADDR(.bss) - LOAD_OFFSET) {				\
-		VMLINUX_SYMBOL(__bss_start) = .;			\
 		*(.bss.page_aligned)					\
 		*(.dynbss)						\
 		*(.bss)							\
 		*(COMMON)						\
-		VMLINUX_SYMBOL(__bss_stop) = .;				\
 	}
 
 /*
@@ -735,8 +732,10 @@
 		INIT_RAM_FS						\
 	}
 
-#define BSS_SECTION(sbss_align, bss_align)				\
-	SBSS								\
+#define BSS_SECTION(sbss_align, bss_align, stop_align)			\
+	. = ALIGN(sbss_align);						\
+	VMLINUX_SYMBOL(__bss_start) = .;				\
+	SBSS(sbss_align)						\
 	BSS(bss_align)							\
-	. = ALIGN(4);
-
+	. = ALIGN(stop_align);						\
+	VMLINUX_SYMBOL(__bss_stop) = .;
-- 
cgit v1.2.3


From 6301cb95c119ebf324bb96ee226fa9ddffad80a7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 17 Jul 2009 14:15:47 +0200
Subject: sched: fix nr_uninterruptible accounting of frozen tasks really

commit e3c8ca8336 (sched: do not count frozen tasks toward load) broke
the nr_uninterruptible accounting on freeze/thaw. On freeze the task
is excluded from accounting with a check for (task->flags &
PF_FROZEN), but that flag is cleared before the task is thawed. So
while we prevent that the task with state TASK_UNINTERRUPTIBLE
is accounted to nr_uninterruptible on freeze we decrement
nr_uninterruptible on thaw.

Use a separate flag which is handled by the freezing task itself. Set
it before calling the scheduler with TASK_UNINTERRUPTIBLE state and
clear it after we return from frozen state.

Cc: <stable@kernel.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/sched.h | 3 ++-
 kernel/freezer.c      | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 16a982e389fb..3ab08e4bb6b8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -209,7 +209,7 @@ extern unsigned long long time_sync_thresh;
 			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
 #define task_contributes_to_load(task)	\
 				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-				 (task->flags & PF_FROZEN) == 0)
+				 (task->flags & PF_FREEZING) == 0)
 
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
@@ -1680,6 +1680,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_MEMALLOC	0x00000800	/* Allocating memory */
 #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
 #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
+#define PF_FREEZING	0x00004000	/* freeze in progress. do not account to load */
 #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 2f4936cf7083..bd1d42b17cb2 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -44,12 +44,19 @@ void refrigerator(void)
 	recalc_sigpending(); /* We sent fake signal, clean it up */
 	spin_unlock_irq(&current->sighand->siglock);
 
+	/* prevent accounting of that task to load */
+	current->flags |= PF_FREEZING;
+
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (!frozen(current))
 			break;
 		schedule();
 	}
+
+	/* Remove the accounting blocker */
+	current->flags &= ~PF_FREEZING;
+
 	pr_debug("%s left refrigerator\n", current->comm);
 	__set_current_state(save);
 }
-- 
cgit v1.2.3


From e09758fae8ccde97e026c704319eaa18d488dc86 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: sched: Cover the CONFIG_DEBUG_SPINLOCK_SLEEP off-case for
 __might_sleep()

Cover the off case for __might_sleep(), so that we avoid
#ifdefs in files that make use of it. Especially, this prepares
for the __might_sleep() pull up on cond_resched().

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1247725694-6082-3-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 1 +
 kernel/sched.c         | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d6320a3e8def..b804f694ae6a 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -139,6 +139,7 @@ extern int _cond_resched(void);
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
 #else
+  static inline void __might_sleep(char *file, int line) { }
 # define might_sleep() do { might_resched(); } while (0)
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d39e96044b9..370a6c31c5e1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6610,9 +6610,8 @@ static inline int should_resched(void)
 
 static void __cond_resched(void)
 {
-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
 	__might_sleep(__FILE__, __LINE__);
-#endif
+
 	add_preempt_count(PREEMPT_ACTIVE);
 	schedule();
 	sub_preempt_count(PREEMPT_ACTIVE);
-- 
cgit v1.2.3


From e4aafea2d4bde8b44d6500c4ee7195bbfc51269e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: sched: Add a preempt count base offset to __might_sleep()

Add a preempt count base offset to compare against the current
preempt level count. It prepares to pull up the might_sleep
check from cond_resched() to cond_resched_lock() and
cond_resched_bh().

For these two helpers, we need to respectively ensure that once
we'll unlock the given spinlock / reenable local softirqs, we
will reach a sleepable state.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
[ Move and rename preempt_count_equals() ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1247725694-6082-4-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h |  6 +++---
 kernel/sched.c         | 15 +++++++++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index b804f694ae6a..2b5b1e0899a8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -125,7 +125,7 @@ extern int _cond_resched(void);
 #endif
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-  void __might_sleep(char *file, int line);
+  void __might_sleep(char *file, int line, int preempt_offset);
 /**
  * might_sleep - annotation for functions that can sleep
  *
@@ -137,9 +137,9 @@ extern int _cond_resched(void);
  * supposed to.
  */
 # define might_sleep() \
-	do { __might_sleep(__FILE__, __LINE__); might_resched(); } while (0)
+	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
 #else
-  static inline void __might_sleep(char *file, int line) { }
+  static inline void __might_sleep(char *file, int line, int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
 #endif
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 370a6c31c5e1..3ff4d004bd95 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6610,7 +6610,7 @@ static inline int should_resched(void)
 
 static void __cond_resched(void)
 {
-	__might_sleep(__FILE__, __LINE__);
+	__might_sleep(__FILE__, __LINE__, 0);
 
 	add_preempt_count(PREEMPT_ACTIVE);
 	schedule();
@@ -9429,13 +9429,20 @@ void __init sched_init(void)
 }
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
-void __might_sleep(char *file, int line)
+static inline int preempt_count_equals(int preempt_offset)
+{
+	int nested = preempt_count() & ~PREEMPT_ACTIVE;
+
+	return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
+}
+
+void __might_sleep(char *file, int line, int preempt_offset)
 {
 #ifdef in_atomic
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
-	if ((!in_atomic() && !irqs_disabled()) ||
-		    system_state != SYSTEM_RUNNING || oops_in_progress)
+	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
+	    system_state != SYSTEM_RUNNING || oops_in_progress)
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
-- 
cgit v1.2.3


From 6f80bd985fe242c2e6a8b6209ed20b0495d3d63b Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: sched: Remove the CONFIG_PREEMPT_BKL case definition of
 cond_resched()

CONFIG_PREEMPT_BKL doesn't exist anymore. So remove this
config-on case definition of cond_resched().

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1247725694-6082-5-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9bada20e2b23..e2bdf18e05c4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2285,17 +2285,12 @@ static inline int need_resched(void)
  * cond_resched_softirq() will enable bhs before scheduling.
  */
 extern int _cond_resched(void);
-#ifdef CONFIG_PREEMPT_BKL
-static inline int cond_resched(void)
-{
-	return 0;
-}
-#else
+
 static inline int cond_resched(void)
 {
 	return _cond_resched();
 }
-#endif
+
 extern int cond_resched_lock(spinlock_t * lock);
 extern int cond_resched_softirq(void);
 static inline int cond_resched_bkl(void)
-- 
cgit v1.2.3


From 613afbf83298efaead05ebcac23d2285609d7160 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: sched: Pull up the might_sleep() check into cond_resched()

might_sleep() is called late-ish in cond_resched(), after the
need_resched()/preempt enabled/system running tests are
checked.

It's better to check the sleeps while atomic earlier and not
depend on some environment datas that reduce the chances to
detect a problem.

Also define cond_resched_*() helpers as macros, so that the
FILE/LINE reported in the sleeping while atomic warning
displays the real origin and not sched.h

Changes in v2:

 - Call __might_sleep() directly instead of might_sleep() which
   may call cond_resched()

 - Turn cond_resched() into a macro so that the file:line
   couple reported refers to the caller of cond_resched() and
   not __cond_resched() itself.

Changes in v3:

 - Also propagate this __might_sleep() pull up to
   cond_resched_lock() and cond_resched_softirq()

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1247725694-6082-6-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/dcache.c           |  1 +
 include/linux/sched.h | 29 +++++++++++++++++++----------
 kernel/sched.c        | 12 +++++-------
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 9e5cd3c3a6ba..a100fa35a48f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -32,6 +32,7 @@
 #include <linux/swap.h>
 #include <linux/bootmem.h>
 #include <linux/fs_struct.h>
+#include <linux/hardirq.h>
 #include "internal.h"
 
 int sysctl_vfs_cache_pressure __read_mostly = 100;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e2bdf18e05c4..c41d424db887 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2286,17 +2286,26 @@ static inline int need_resched(void)
  */
 extern int _cond_resched(void);
 
-static inline int cond_resched(void)
-{
-	return _cond_resched();
-}
+#define cond_resched() ({			\
+	__might_sleep(__FILE__, __LINE__, 0);	\
+	_cond_resched();			\
+})
 
-extern int cond_resched_lock(spinlock_t * lock);
-extern int cond_resched_softirq(void);
-static inline int cond_resched_bkl(void)
-{
-	return _cond_resched();
-}
+extern int __cond_resched_lock(spinlock_t *lock);
+
+#define cond_resched_lock(lock) ({				\
+	__might_sleep(__FILE__, __LINE__, PREEMPT_OFFSET);	\
+	__cond_resched_lock(lock);				\
+})
+
+extern int __cond_resched_softirq(void);
+
+#define cond_resched_softirq() ({				\
+	__might_sleep(__FILE__, __LINE__, SOFTIRQ_OFFSET);	\
+	__cond_resched_softirq();				\
+})
+
+#define cond_resched_bkl()	cond_resched()
 
 /*
  * Does a critical section need to be broken due to another
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ff4d004bd95..1f7919add8ae 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6610,8 +6610,6 @@ static inline int should_resched(void)
 
 static void __cond_resched(void)
 {
-	__might_sleep(__FILE__, __LINE__, 0);
-
 	add_preempt_count(PREEMPT_ACTIVE);
 	schedule();
 	sub_preempt_count(PREEMPT_ACTIVE);
@@ -6628,14 +6626,14 @@ int __sched _cond_resched(void)
 EXPORT_SYMBOL(_cond_resched);
 
 /*
- * cond_resched_lock() - if a reschedule is pending, drop the given lock,
+ * __cond_resched_lock() - if a reschedule is pending, drop the given lock,
  * call schedule, and on return reacquire the lock.
  *
  * This works OK both with and without CONFIG_PREEMPT. We do strange low-level
  * operations here to prevent schedule() from being called twice (once via
  * spin_unlock(), once by hand).
  */
-int cond_resched_lock(spinlock_t *lock)
+int __cond_resched_lock(spinlock_t *lock)
 {
 	int resched = should_resched();
 	int ret = 0;
@@ -6651,9 +6649,9 @@ int cond_resched_lock(spinlock_t *lock)
 	}
 	return ret;
 }
-EXPORT_SYMBOL(cond_resched_lock);
+EXPORT_SYMBOL(__cond_resched_lock);
 
-int __sched cond_resched_softirq(void)
+int __sched __cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
@@ -6665,7 +6663,7 @@ int __sched cond_resched_softirq(void)
 	}
 	return 0;
 }
-EXPORT_SYMBOL(cond_resched_softirq);
+EXPORT_SYMBOL(__cond_resched_softirq);
 
 /**
  * yield - yield the current processor to other threads.
-- 
cgit v1.2.3


From def01bc53d03881acfc393bd10a5c7575187e008 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: sched: Convert the only user of cond_resched_bkl to use
 cond_resched()

fs/locks.c:flock_lock_file() is the only user of
cond_resched_bkl()

This helper doesn't do anything more than cond_resched(). The
latter naming is enough to explain that we are rescheduling if
needed.

The bkl suffix suggests another semantics but it's actually a
synonym of cond_resched().

Reported-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1247725694-6082-7-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/locks.c            | 2 +-
 include/linux/sched.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/locks.c b/fs/locks.c
index b6440f52178f..2eb81975c99c 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -768,7 +768,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
 	 * give it the opportunity to lock the file.
 	 */
 	if (found)
-		cond_resched_bkl();
+		cond_resched();
 
 find_conflict:
 	for_each_lock(inode, before) {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c41d424db887..cbbfca69aa4a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2305,8 +2305,6 @@ extern int __cond_resched_softirq(void);
 	__cond_resched_softirq();				\
 })
 
-#define cond_resched_bkl()	cond_resched()
-
 /*
  * Does a critical section need to be broken due to another
  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
-- 
cgit v1.2.3


From 719a72b7c75bb239ca6184190ab994b71a31c6dc Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 17 Jul 2009 14:59:55 +0000
Subject: usb: r8a66597-hcd platform data on_chip support

Convert the r8a66597-hcd driver to use the on_chip flag
from platform data to enable on chip behaviour instead
of relying on CONFIG_SUPERH_ON_CHIP_R8A66597 ugliness.

This makes the code cleaner and also allows us to support
both external and internal r8a66597 with the same kernel.

It also makes the Kconfig part more future proof since
we with this patch can add support for new processors
with on-chip r8a66597 without modifying the Kconfig.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-se/7724/setup.c    |   1 +
 arch/sh/kernel/cpu/sh4a/setup-sh7366.c |   2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7723.c |   2 +-
 drivers/usb/host/Kconfig               |   7 --
 drivers/usb/host/r8a66597-hcd.c        | 187 +++++++++++++++++++--------------
 drivers/usb/host/r8a66597.h            |  76 ++++++--------
 include/linux/usb/r8a66597.h           |   3 +
 7 files changed, 147 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 8fed45a2fb85..4fb7e48e2843 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -304,6 +304,7 @@ static struct platform_device sh_eth_device = {
 };
 
 static struct r8a66597_platdata sh7724_usb0_host_data = {
+	.on_chip = 1,
 };
 
 static struct resource sh7724_usb0_host_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index c18f7d09281b..f6d208813564 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -40,7 +40,7 @@ static struct platform_device iic_device = {
 };
 
 static struct r8a66597_platdata r8a66597_data = {
-	/* This set zero to all members */
+	.on_chip = 1,
 };
 
 static struct resource usb_host_resources[] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index e1bb80b2a27b..28516499a2c4 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -398,7 +398,7 @@ static struct platform_device rtc_device = {
 };
 
 static struct r8a66597_platdata r8a66597_data = {
-	/* This set zero to all members */
+	.on_chip = 1,
 };
 
 static struct resource sh7723_usb_host_resources[] = {
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 1a920c70b5a1..f21ca7d27a43 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -336,13 +336,6 @@ config USB_R8A66597_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called r8a66597-hcd.
 
-config SUPERH_ON_CHIP_R8A66597
-	boolean "Enable SuperH on-chip R8A66597 USB"
-	depends on USB_R8A66597_HCD && (CPU_SUBTYPE_SH7366 || CPU_SUBTYPE_SH7723 || CPU_SUBTYPE_SH7724)
-	help
-	   This driver enables support for the on-chip R8A66597 in the
-	   SH7366, SH7723 and SH7724 processors.
-
 config USB_WHCI_HCD
 	tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 09895a97c10b..82dce3e0d4d7 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -91,43 +91,43 @@ static int r8a66597_clock_enable(struct r8a66597 *r8a66597)
 	u16 tmp;
 	int i = 0;
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#if defined(CONFIG_HAVE_CLK)
-	clk_enable(r8a66597->clk);
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		clk_enable(r8a66597->clk);
 #endif
-	do {
-		r8a66597_write(r8a66597, SCKE, SYSCFG0);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 1000) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & SCKE) != SCKE);
-	r8a66597_write(r8a66597, 0x04, 0x02);
-#else
-	do {
-		r8a66597_write(r8a66597, USBE, SYSCFG0);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 1000) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & USBE) != USBE);
-	r8a66597_bclr(r8a66597, USBE, SYSCFG0);
-	r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata), XTAL,
-			SYSCFG0);
+		do {
+			r8a66597_write(r8a66597, SCKE, SYSCFG0);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 1000) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & SCKE) != SCKE);
+		r8a66597_write(r8a66597, 0x04, 0x02);
+	} else {
+		do {
+			r8a66597_write(r8a66597, USBE, SYSCFG0);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 1000) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & USBE) != USBE);
+		r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+		r8a66597_mdfy(r8a66597, get_xtal_from_pdata(r8a66597->pdata),
+			      XTAL, SYSCFG0);
 
-	i = 0;
-	r8a66597_bset(r8a66597, XCKE, SYSCFG0);
-	do {
-		msleep(1);
-		tmp = r8a66597_read(r8a66597, SYSCFG0);
-		if (i++ > 500) {
-			printk(KERN_ERR "r8a66597: register access fail.\n");
-			return -ENXIO;
-		}
-	} while ((tmp & SCKE) != SCKE);
-#endif	/* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */
+		i = 0;
+		r8a66597_bset(r8a66597, XCKE, SYSCFG0);
+		do {
+			msleep(1);
+			tmp = r8a66597_read(r8a66597, SYSCFG0);
+			if (i++ > 500) {
+				printk(KERN_ERR "r8a66597: reg access fail.\n");
+				return -ENXIO;
+			}
+		} while ((tmp & SCKE) != SCKE);
+	}
 
 	return 0;
 }
@@ -136,15 +136,16 @@ static void r8a66597_clock_disable(struct r8a66597 *r8a66597)
 {
 	r8a66597_bclr(r8a66597, SCKE, SYSCFG0);
 	udelay(1);
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#if defined(CONFIG_HAVE_CLK)
-	clk_disable(r8a66597->clk);
-#endif
-#else
-	r8a66597_bclr(r8a66597, PLLC, SYSCFG0);
-	r8a66597_bclr(r8a66597, XCKE, SYSCFG0);
-	r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		clk_disable(r8a66597->clk);
 #endif
+	} else {
+		r8a66597_bclr(r8a66597, PLLC, SYSCFG0);
+		r8a66597_bclr(r8a66597, XCKE, SYSCFG0);
+		r8a66597_bclr(r8a66597, USBE, SYSCFG0);
+	}
 }
 
 static void r8a66597_enable_port(struct r8a66597 *r8a66597, int port)
@@ -205,7 +206,7 @@ static int enable_controller(struct r8a66597 *r8a66597)
 
 	r8a66597_bset(r8a66597, SIGNE | SACKE, INTENB1);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_enable_port(r8a66597, port);
 
 	return 0;
@@ -218,7 +219,7 @@ static void disable_controller(struct r8a66597 *r8a66597)
 	r8a66597_write(r8a66597, 0, INTENB0);
 	r8a66597_write(r8a66597, 0, INTSTS0);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_disable_port(r8a66597, port);
 
 	r8a66597_clock_disable(r8a66597);
@@ -249,11 +250,12 @@ static int is_hub_limit(char *devpath)
 	return ((strlen(devpath) >= 4) ? 1 : 0);
 }
 
-static void get_port_number(char *devpath, u16 *root_port, u16 *hub_port)
+static void get_port_number(struct r8a66597 *r8a66597,
+			    char *devpath, u16 *root_port, u16 *hub_port)
 {
 	if (root_port) {
 		*root_port = (devpath[0] & 0x0F) - 1;
-		if (*root_port >= R8A66597_MAX_ROOT_HUB)
+		if (*root_port >= r8a66597->max_root_hub)
 			printk(KERN_ERR "r8a66597: Illegal root port number.\n");
 	}
 	if (hub_port)
@@ -355,7 +357,8 @@ static int make_r8a66597_device(struct r8a66597 *r8a66597,
 	INIT_LIST_HEAD(&dev->device_list);
 	list_add_tail(&dev->device_list, &r8a66597->child_device);
 
-	get_port_number(urb->dev->devpath, &dev->root_port, &dev->hub_port);
+	get_port_number(r8a66597, urb->dev->devpath,
+			&dev->root_port, &dev->hub_port);
 	if (!is_child_device(urb->dev->devpath))
 		r8a66597->root_hub[dev->root_port].dev = dev;
 
@@ -420,7 +423,7 @@ static void free_usb_address(struct r8a66597 *r8a66597,
 	list_del(&dev->device_list);
 	kfree(dev);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		if (r8a66597->root_hub[port].dev == dev) {
 			r8a66597->root_hub[port].dev = NULL;
 			break;
@@ -495,10 +498,20 @@ static void r8a66597_pipe_toggle(struct r8a66597 *r8a66597,
 		r8a66597_bset(r8a66597, SQCLR, pipe->pipectr);
 }
 
+static inline unsigned short mbw_value(struct r8a66597 *r8a66597)
+{
+	if (r8a66597->pdata->on_chip)
+		return MBW_32;
+	else
+		return MBW_16;
+}
+
 /* this function must be called with interrupt disabled */
 static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum)
 {
-	r8a66597_mdfy(r8a66597, MBW | pipenum, MBW | CURPIPE, CFIFOSEL);
+	unsigned short mbw = mbw_value(r8a66597);
+
+	r8a66597_mdfy(r8a66597, mbw | pipenum, mbw | CURPIPE, CFIFOSEL);
 	r8a66597_reg_wait(r8a66597, CFIFOSEL, CURPIPE, pipenum);
 }
 
@@ -506,11 +519,13 @@ static inline void cfifo_change(struct r8a66597 *r8a66597, u16 pipenum)
 static inline void fifo_change_from_pipe(struct r8a66597 *r8a66597,
 					 struct r8a66597_pipe *pipe)
 {
+	unsigned short mbw = mbw_value(r8a66597);
+
 	cfifo_change(r8a66597, 0);
-	r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D0FIFOSEL);
-	r8a66597_mdfy(r8a66597, MBW | 0, MBW | CURPIPE, D1FIFOSEL);
+	r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D0FIFOSEL);
+	r8a66597_mdfy(r8a66597, mbw | 0, mbw | CURPIPE, D1FIFOSEL);
 
-	r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum, MBW | CURPIPE,
+	r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum, mbw | CURPIPE,
 		      pipe->fifosel);
 	r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE, pipe->info.pipenum);
 }
@@ -742,9 +757,13 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 				     struct r8a66597_pipe *pipe,
 				     struct urb *urb)
 {
-#if !defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	int i;
 	struct r8a66597_pipe_info *info = &pipe->info;
+	unsigned short mbw = mbw_value(r8a66597);
+
+	/* pipe dma is only for external controlles */
+	if (r8a66597->pdata->on_chip)
+		return;
 
 	if ((pipe->info.pipenum != 0) && (info->type != R8A66597_INT)) {
 		for (i = 0; i < R8A66597_MAX_DMA_CHANNEL; i++) {
@@ -763,8 +782,8 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 			set_pipe_reg_addr(pipe, i);
 
 			cfifo_change(r8a66597, 0);
-			r8a66597_mdfy(r8a66597, MBW | pipe->info.pipenum,
-				      MBW | CURPIPE, pipe->fifosel);
+			r8a66597_mdfy(r8a66597, mbw | pipe->info.pipenum,
+				      mbw | CURPIPE, pipe->fifosel);
 
 			r8a66597_reg_wait(r8a66597, pipe->fifosel, CURPIPE,
 					  pipe->info.pipenum);
@@ -772,7 +791,6 @@ static void enable_r8a66597_pipe_dma(struct r8a66597 *r8a66597,
 			break;
 		}
 	}
-#endif	/* #if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) */
 }
 
 /* this function must be called with interrupt disabled */
@@ -1769,7 +1787,7 @@ static void r8a66597_timer(unsigned long _r8a66597)
 
 	spin_lock_irqsave(&r8a66597->lock, flags);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++)
+	for (port = 0; port < r8a66597->max_root_hub; port++)
 		r8a66597_root_hub_control(r8a66597, port);
 
 	spin_unlock_irqrestore(&r8a66597->lock, flags);
@@ -1807,7 +1825,7 @@ static void set_address_zero(struct r8a66597 *r8a66597, struct urb *urb)
 	u16 root_port, hub_port;
 
 	if (usb_address == 0) {
-		get_port_number(urb->dev->devpath,
+		get_port_number(r8a66597, urb->dev->devpath,
 				&root_port, &hub_port);
 		set_devadd_reg(r8a66597, 0,
 			       get_r8a66597_usb_speed(urb->dev->speed),
@@ -2082,7 +2100,7 @@ static int r8a66597_hub_status_data(struct usb_hcd *hcd, char *buf)
 
 	*buf = 0;	/* initialize (no change) */
 
-	for (i = 0; i < R8A66597_MAX_ROOT_HUB; i++) {
+	for (i = 0; i < r8a66597->max_root_hub; i++) {
 		if (r8a66597->root_hub[i].port & 0xffff0000)
 			*buf |= 1 << (i + 1);
 	}
@@ -2097,11 +2115,11 @@ static void r8a66597_hub_descriptor(struct r8a66597 *r8a66597,
 {
 	desc->bDescriptorType = 0x29;
 	desc->bHubContrCurrent = 0;
-	desc->bNbrPorts = R8A66597_MAX_ROOT_HUB;
+	desc->bNbrPorts = r8a66597->max_root_hub;
 	desc->bDescLength = 9;
 	desc->bPwrOn2PwrGood = 0;
 	desc->wHubCharacteristics = cpu_to_le16(0x0011);
-	desc->bitmap[0] = ((1 << R8A66597_MAX_ROOT_HUB) - 1) << 1;
+	desc->bitmap[0] = ((1 << r8a66597->max_root_hub) - 1) << 1;
 	desc->bitmap[1] = ~0;
 }
 
@@ -2129,7 +2147,7 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		}
 		break;
 	case ClearPortFeature:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		if (wLength != 0)
 			goto error;
@@ -2162,12 +2180,12 @@ static int r8a66597_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
 		*buf = 0x00;
 		break;
 	case GetPortStatus:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		*(__le32 *)buf = cpu_to_le32(rh->port);
 		break;
 	case SetPortFeature:
-		if (wIndex > R8A66597_MAX_ROOT_HUB)
+		if (wIndex > r8a66597->max_root_hub)
 			goto error;
 		if (wLength != 0)
 			goto error;
@@ -2216,7 +2234,7 @@ static int r8a66597_bus_suspend(struct usb_hcd *hcd)
 
 	dbg("%s", __func__);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 		unsigned long dvstctr_reg = get_dvstctr_reg(port);
 
@@ -2247,7 +2265,7 @@ static int r8a66597_bus_resume(struct usb_hcd *hcd)
 
 	dbg("%s", __func__);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 		unsigned long dvstctr_reg = get_dvstctr_reg(port);
 
@@ -2314,7 +2332,7 @@ static int r8a66597_suspend(struct device *dev)
 
 	disable_controller(r8a66597);
 
-	for (port = 0; port < R8A66597_MAX_ROOT_HUB; port++) {
+	for (port = 0; port < r8a66597->max_root_hub; port++) {
 		struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 
 		rh->port = 0x00000000;
@@ -2354,8 +2372,9 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev)
 	del_timer_sync(&r8a66597->rh_timer);
 	usb_remove_hcd(hcd);
 	iounmap((void *)r8a66597->reg);
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	clk_put(r8a66597->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (r8a66597->pdata->on_chip)
+		clk_put(r8a66597->clk);
 #endif
 	usb_put_hcd(hcd);
 	return 0;
@@ -2363,7 +2382,7 @@ static int __init_or_module r8a66597_remove(struct platform_device *pdev)
 
 static int __devinit r8a66597_probe(struct platform_device *pdev)
 {
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	char clk_name[8];
 #endif
 	struct resource *res = NULL, *ires;
@@ -2425,15 +2444,20 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 	r8a66597->pdata = pdev->dev.platform_data;
 	r8a66597->irq_sense_low = irq_trigger == IRQF_TRIGGER_LOW;
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id);
-	r8a66597->clk = clk_get(&pdev->dev, clk_name);
-	if (IS_ERR(r8a66597->clk)) {
-		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
-		ret = PTR_ERR(r8a66597->clk);
-		goto clean_up2;
-	}
+	if (r8a66597->pdata->on_chip) {
+#ifdef CONFIG_HAVE_CLK
+		snprintf(clk_name, sizeof(clk_name), "usb%d", pdev->id);
+		r8a66597->clk = clk_get(&pdev->dev, clk_name);
+		if (IS_ERR(r8a66597->clk)) {
+			dev_err(&pdev->dev, "cannot get clock \"%s\"\n",
+				clk_name);
+			ret = PTR_ERR(r8a66597->clk);
+			goto clean_up2;
+		}
 #endif
+		r8a66597->max_root_hub = 1;
+	} else
+		r8a66597->max_root_hub = 2;
 
 	spin_lock_init(&r8a66597->lock);
 	init_timer(&r8a66597->rh_timer);
@@ -2463,8 +2487,9 @@ static int __devinit r8a66597_probe(struct platform_device *pdev)
 	return 0;
 
 clean_up3:
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
-	clk_put(r8a66597->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (r8a66597->pdata->on_chip)
+		clk_put(r8a66597->clk);
 clean_up2:
 #endif
 	usb_put_hcd(hcd);
diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h
index d72680b433f9..eecbd917bc81 100644
--- a/drivers/usb/host/r8a66597.h
+++ b/drivers/usb/host/r8a66597.h
@@ -26,7 +26,7 @@
 #ifndef __R8A66597_H__
 #define __R8A66597_H__
 
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 #include <linux/clk.h>
 #endif
 
@@ -193,13 +193,9 @@
 #define	REW		0x4000	/* b14: Buffer rewind */
 #define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
 #define	DREQE		0x1000	/* b12: DREQ output enable */
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#define	MBW		0x0800
-#else
-#define	MBW		0x0400	/* b10: Maximum bit width for FIFO access */
-#endif
 #define	  MBW_8		 0x0000	  /*  8bit */
 #define	  MBW_16	 0x0400	  /* 16bit */
+#define	  MBW_32	 0x0800   /* 32bit */
 #define	BIGEND		0x0100	/* b8: Big endian mode */
 #define	  BYTE_LITTLE	 0x0000		/* little dendian */
 #define	  BYTE_BIG	 0x0100		/* big endifan */
@@ -405,11 +401,7 @@
 #define R8A66597_MAX_NUM_PIPE		10
 #define R8A66597_BUF_BSIZE		8
 #define R8A66597_MAX_DEVICE		10
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
-#define R8A66597_MAX_ROOT_HUB		1
-#else
 #define R8A66597_MAX_ROOT_HUB		2
-#endif
 #define R8A66597_MAX_SAMPLING		5
 #define R8A66597_RH_POLL_TIME		10
 #define R8A66597_MAX_DMA_CHANNEL	2
@@ -487,7 +479,7 @@ struct r8a66597_root_hub {
 struct r8a66597 {
 	spinlock_t lock;
 	unsigned long reg;
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
 #endif
 	struct r8a66597_platdata	*pdata;
@@ -504,6 +496,7 @@ struct r8a66597 {
 	unsigned short interval_map;
 	unsigned char pipe_cnt[R8A66597_MAX_NUM_PIPE];
 	unsigned char dma_map;
+	unsigned int max_root_hub;
 
 	struct list_head child_device;
 	unsigned long child_connect_map[4];
@@ -550,21 +543,22 @@ static inline void r8a66597_read_fifo(struct r8a66597 *r8a66597,
 				      unsigned long offset, u16 *buf,
 				      int len)
 {
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	unsigned long fifoaddr = r8a66597->reg + offset;
 	unsigned long count;
 
-	count = len / 4;
-	insl(fifoaddr, buf, count);
+	if (r8a66597->pdata->on_chip) {
+		count = len / 4;
+		insl(fifoaddr, buf, count);
 
-	if (len & 0x00000003) {
-		unsigned long tmp = inl(fifoaddr);
-		memcpy((unsigned char *)buf + count * 4, &tmp, len & 0x03);
+		if (len & 0x00000003) {
+			unsigned long tmp = inl(fifoaddr);
+			memcpy((unsigned char *)buf + count * 4, &tmp,
+			       len & 0x03);
+		}
+	} else {
+		len = (len + 1) / 2;
+		insw(fifoaddr, buf, len);
 	}
-#else
-	len = (len + 1) / 2;
-	insw(r8a66597->reg + offset, buf, len);
-#endif
 }
 
 static inline void r8a66597_write(struct r8a66597 *r8a66597, u16 val,
@@ -578,33 +572,33 @@ static inline void r8a66597_write_fifo(struct r8a66597 *r8a66597,
 				       int len)
 {
 	unsigned long fifoaddr = r8a66597->reg + offset;
-#if defined(CONFIG_SUPERH_ON_CHIP_R8A66597)
 	unsigned long count;
 	unsigned char *pb;
 	int i;
 
-	count = len / 4;
-	outsl(fifoaddr, buf, count);
+	if (r8a66597->pdata->on_chip) {
+		count = len / 4;
+		outsl(fifoaddr, buf, count);
+
+		if (len & 0x00000003) {
+			pb = (unsigned char *)buf + count * 4;
+			for (i = 0; i < (len & 0x00000003); i++) {
+				if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND)
+					outb(pb[i], fifoaddr + i);
+				else
+					outb(pb[i], fifoaddr + 3 - i);
+			}
+		}
+	} else {
+		int odd = len & 0x0001;
 
-	if (len & 0x00000003) {
-		pb = (unsigned char *)buf + count * 4;
-		for (i = 0; i < (len & 0x00000003); i++) {
-			if (r8a66597_read(r8a66597, CFIFOSEL) & BIGEND)
-				outb(pb[i], fifoaddr + i);
-			else
-				outb(pb[i], fifoaddr + 3 - i);
+		len = len / 2;
+		outsw(fifoaddr, buf, len);
+		if (unlikely(odd)) {
+			buf = &buf[len];
+			outb((unsigned char)*buf, fifoaddr);
 		}
 	}
-#else
-	int odd = len & 0x0001;
-
-	len = len / 2;
-	outsw(fifoaddr, buf, len);
-	if (unlikely(odd)) {
-		buf = &buf[len];
-		outb((unsigned char)*buf, fifoaddr);
-	}
-#endif
 }
 
 static inline void r8a66597_mdfy(struct r8a66597 *r8a66597,
diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h
index e9f0384fa20c..460ee3f6a2c6 100644
--- a/include/linux/usb/r8a66597.h
+++ b/include/linux/usb/r8a66597.h
@@ -31,6 +31,9 @@ struct r8a66597_platdata {
 	/* This ops can controll port power instead of DVSTCTR register. */
 	void (*port_power)(int port, int power);
 
+	/* set one = on chip controller, set zero = external controller */
+	unsigned	on_chip:1;
+
 	/* (external controller only) set R8A66597_PLATDATA_XTAL_nnMHZ */
 	unsigned	xtal:2;
 
-- 
cgit v1.2.3


From 1c388ad054fb1ead3dc354b1719570b99e464135 Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Fri, 17 Jul 2009 16:16:18 -0700
Subject: include/linux/cred.h: work around gcc-4.2.4 warning in get_cred()

With gcc 4.2.4 (building UML) I get the warning

include/linux/cred.h: In function 'get_cred':
include/linux/cred.h:189: warning: passing argument 1 of
'get_new_cred' discards qualifiers from pointer target type

Inserting an additional local variable appears to keep the compiler happy,
although it's not clear to me why this should be needed.

Signed-off-by: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4fa999696310..b3c76e815d66 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -186,7 +186,8 @@ static inline struct cred *get_new_cred(struct cred *cred)
  */
 static inline const struct cred *get_cred(const struct cred *cred)
 {
-	return get_new_cred((struct cred *) cred);
+	struct cred *nonconst_cred = (struct cred *) cred;
+	return get_new_cred(nonconst_cred);
 }
 
 /**
-- 
cgit v1.2.3


From 4d4036e0e7299c6cbb2d2421b4b30b7a409ce61a Mon Sep 17 00:00:00 2001
From: Jason Yeh <jason.yeh@amd.com>
Date: Wed, 8 Jul 2009 13:49:38 +0200
Subject: oprofile: Implement performance counter multiplexing

The number of hardware counters is limited. The multiplexing feature
enables OProfile to gather more events than counters are provided by
the hardware. This is realized by switching between events at an user
specified time interval.

A new file (/dev/oprofile/time_slice) is added for the user to specify
the timer interval in ms. If the number of events to profile is higher
than the number of hardware counters available, the patch will
schedule a work queue that switches the event counter and re-writes
the different sets of values into it. The switching mechanism needs to
be implemented for each architecture to support multiplexing. This
patch only implements AMD CPU support, but multiplexing can be easily
extended for other models and architectures.

There are follow-on patches that rework parts of this patch.

Signed-off-by: Jason Yeh <jason.yeh@amd.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/Kconfig                      |  12 +++
 arch/x86/oprofile/nmi_int.c       | 162 ++++++++++++++++++++++++++++++++++++--
 arch/x86/oprofile/op_counter.h    |   2 +-
 arch/x86/oprofile/op_model_amd.c  | 110 ++++++++++++++++++++++----
 arch/x86/oprofile/op_model_p4.c   |   4 +
 arch/x86/oprofile/op_model_ppro.c |   2 +
 arch/x86/oprofile/op_x86_model.h  |   7 ++
 drivers/oprofile/oprof.c          |  78 ++++++++++++++++++
 drivers/oprofile/oprof.h          |   2 +
 drivers/oprofile/oprofile_files.c |  43 ++++++++++
 drivers/oprofile/oprofile_stats.c |  10 +++
 include/linux/oprofile.h          |   3 +
 12 files changed, 415 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 99193b160232..beea3ccebb5e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -30,6 +30,18 @@ config OPROFILE_IBS
 
 	  If unsure, say N.
 
+config OPROFILE_EVENT_MULTIPLEX
+	bool "OProfile multiplexing support (EXPERIMENTAL)"
+	default n
+	depends on OPROFILE && X86
+	help
+	  The number of hardware counters is limited. The multiplexing
+	  feature enables OProfile to gather more events than counters
+	  are provided by the hardware. This is realized by switching
+	  between events at an user specified time interval.
+
+	  If unsure, say N.
+
 config HAVE_OPROFILE
 	bool
 
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index fca8dc94531e..e54f6a0b35ac 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -1,11 +1,14 @@
 /**
  * @file nmi_int.c
  *
- * @remark Copyright 2002-2008 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
  * @author Robert Richter <robert.richter@amd.com>
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  */
 
 #include <linux/init.h>
@@ -24,6 +27,12 @@
 #include "op_counter.h"
 #include "op_x86_model.h"
 
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DEFINE_PER_CPU(int, switch_index);
+#endif
+
+
 static struct op_x86_model_spec const *model;
 static DEFINE_PER_CPU(struct op_msrs, cpu_msrs);
 static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
@@ -31,6 +40,13 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc);
 /* 0 == registered but off, 1 == registered and on */
 static int nmi_enabled = 0;
 
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+extern atomic_t multiplex_counter;
+#endif
+
+struct op_counter_config counter_config[OP_MAX_COUNTER];
+
 /* common functions */
 
 u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
@@ -95,6 +111,11 @@ static void free_msrs(void)
 		per_cpu(cpu_msrs, i).counters = NULL;
 		kfree(per_cpu(cpu_msrs, i).controls);
 		per_cpu(cpu_msrs, i).controls = NULL;
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		kfree(per_cpu(cpu_msrs, i).multiplex);
+		per_cpu(cpu_msrs, i).multiplex = NULL;
+#endif
 	}
 }
 
@@ -103,6 +124,9 @@ static int allocate_msrs(void)
 	int success = 1;
 	size_t controls_size = sizeof(struct op_msr) * model->num_controls;
 	size_t counters_size = sizeof(struct op_msr) * model->num_counters;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	size_t multiplex_size = sizeof(struct op_msr) * model->num_virt_counters;
+#endif
 
 	int i;
 	for_each_possible_cpu(i) {
@@ -118,6 +142,14 @@ static int allocate_msrs(void)
 			success = 0;
 			break;
 		}
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		per_cpu(cpu_msrs, i).multiplex =
+				kmalloc(multiplex_size, GFP_KERNEL);
+		if (!per_cpu(cpu_msrs, i).multiplex) {
+			success = 0;
+			break;
+		}
+#endif
 	}
 
 	if (!success)
@@ -126,6 +158,25 @@ static int allocate_msrs(void)
 	return success;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_setup_cpu_mux(struct op_msrs const * const msrs)
+{
+	int i;
+	struct op_msr *multiplex = msrs->multiplex;
+
+	for (i = 0; i < model->num_virt_counters; ++i) {
+		if (counter_config[i].enabled) {
+			multiplex[i].saved = -(u64)counter_config[i].count;
+		} else {
+			multiplex[i].addr  = 0;
+			multiplex[i].saved = 0;
+		}
+	}
+}
+
+#endif
+
 static void nmi_cpu_setup(void *dummy)
 {
 	int cpu = smp_processor_id();
@@ -133,6 +184,9 @@ static void nmi_cpu_setup(void *dummy)
 	nmi_cpu_save_registers(msrs);
 	spin_lock(&oprofilefs_lock);
 	model->setup_ctrs(model, msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	nmi_setup_cpu_mux(msrs);
+#endif
 	spin_unlock(&oprofilefs_lock);
 	per_cpu(saved_lvtpc, cpu) = apic_read(APIC_LVTPC);
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
@@ -173,14 +227,52 @@ static int nmi_setup(void)
 			memcpy(per_cpu(cpu_msrs, cpu).controls,
 				per_cpu(cpu_msrs, 0).controls,
 				sizeof(struct op_msr) * model->num_controls);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+			memcpy(per_cpu(cpu_msrs, cpu).multiplex,
+				per_cpu(cpu_msrs, 0).multiplex,
+				sizeof(struct op_msr) * model->num_virt_counters);
+#endif
 		}
-
 	}
 	on_each_cpu(nmi_cpu_setup, NULL, 1);
 	nmi_enabled = 1;
 	return 0;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs)
+{
+	unsigned int si = __get_cpu_var(switch_index);
+	struct op_msr *multiplex = msrs->multiplex;
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int offset = i + si;
+		if (multiplex[offset].addr) {
+			rdmsrl(multiplex[offset].addr,
+			       multiplex[offset].saved);
+		}
+	}
+}
+
+static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs)
+{
+	unsigned int si = __get_cpu_var(switch_index);
+	struct op_msr *multiplex = msrs->multiplex;
+	unsigned int i;
+
+	for (i = 0; i < model->num_counters; ++i) {
+		int offset = i + si;
+		if (multiplex[offset].addr) {
+			wrmsrl(multiplex[offset].addr,
+			       multiplex[offset].saved);
+		}
+	}
+}
+
+#endif
+
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
 {
 	struct op_msr *counters = msrs->counters;
@@ -214,6 +306,9 @@ static void nmi_cpu_shutdown(void *dummy)
 	apic_write(APIC_LVTPC, per_cpu(saved_lvtpc, cpu));
 	apic_write(APIC_LVTERR, v);
 	nmi_cpu_restore_registers(msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	__get_cpu_var(switch_index) = 0;
+#endif
 }
 
 static void nmi_shutdown(void)
@@ -252,16 +347,15 @@ static void nmi_stop(void)
 	on_each_cpu(nmi_cpu_stop, NULL, 1);
 }
 
-struct op_counter_config counter_config[OP_MAX_COUNTER];
-
 static int nmi_create_files(struct super_block *sb, struct dentry *root)
 {
 	unsigned int i;
 
-	for (i = 0; i < model->num_counters; ++i) {
+	for (i = 0; i < model->num_virt_counters; ++i) {
 		struct dentry *dir;
 		char buf[4];
 
+#ifndef CONFIG_OPROFILE_EVENT_MULTIPLEX
 		/* quick little hack to _not_ expose a counter if it is not
 		 * available for use.  This should protect userspace app.
 		 * NOTE:  assumes 1:1 mapping here (that counters are organized
@@ -269,6 +363,7 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 		 */
 		if (unlikely(!avail_to_resrv_perfctr_nmi_bit(i)))
 			continue;
+#endif /* CONFIG_OPROFILE_EVENT_MULTIPLEX */
 
 		snprintf(buf,  sizeof(buf), "%d", i);
 		dir = oprofilefs_mkdir(sb, root, buf);
@@ -283,6 +378,57 @@ static int nmi_create_files(struct super_block *sb, struct dentry *root)
 	return 0;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void nmi_cpu_switch(void *dummy)
+{
+	int cpu = smp_processor_id();
+	int si = per_cpu(switch_index, cpu);
+	struct op_msrs *msrs = &per_cpu(cpu_msrs, cpu);
+
+	nmi_cpu_stop(NULL);
+	nmi_cpu_save_mpx_registers(msrs);
+
+	/* move to next set */
+	si += model->num_counters;
+	if ((si > model->num_virt_counters) || (counter_config[si].count == 0))
+		per_cpu(switch_index, cpu) = 0;
+	else
+		per_cpu(switch_index, cpu) = si;
+
+	model->switch_ctrl(model, msrs);
+	nmi_cpu_restore_mpx_registers(msrs);
+
+	nmi_cpu_start(NULL);
+}
+
+
+/*
+ * Quick check to see if multiplexing is necessary.
+ * The check should be sufficient since counters are used
+ * in ordre.
+ */
+static int nmi_multiplex_on(void)
+{
+	return counter_config[model->num_counters].count ? 0 : -EINVAL;
+}
+
+static int nmi_switch_event(void)
+{
+	if (!model->switch_ctrl)
+		return -ENOSYS;		/* not implemented */
+	if (nmi_multiplex_on() < 0)
+		return -EINVAL;		/* not necessary */
+
+	on_each_cpu(nmi_cpu_switch, NULL, 1);
+
+	atomic_inc(&multiplex_counter);
+
+	return 0;
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action,
 				 void *data)
@@ -516,12 +662,18 @@ int __init op_nmi_init(struct oprofile_operations *ops)
 	register_cpu_notifier(&oprofile_cpu_nb);
 #endif
 	/* default values, can be overwritten by model */
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	__raw_get_cpu_var(switch_index) = 0;
+#endif
 	ops->create_files	= nmi_create_files;
 	ops->setup		= nmi_setup;
 	ops->shutdown		= nmi_shutdown;
 	ops->start		= nmi_start;
 	ops->stop		= nmi_stop;
 	ops->cpu_type		= cpu_type;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	ops->switch_events	= nmi_switch_event;
+#endif
 
 	if (model->init)
 		ret = model->init(ops);
diff --git a/arch/x86/oprofile/op_counter.h b/arch/x86/oprofile/op_counter.h
index 91b6a116165e..e28398df0df2 100644
--- a/arch/x86/oprofile/op_counter.h
+++ b/arch/x86/oprofile/op_counter.h
@@ -10,7 +10,7 @@
 #ifndef OP_COUNTER_H
 #define OP_COUNTER_H
 
-#define OP_MAX_COUNTER 8
+#define OP_MAX_COUNTER 32
 
 /* Per-perfctr configuration as set via
  * oprofilefs.
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c
index f676f8825a3f..fdbed3a0c877 100644
--- a/arch/x86/oprofile/op_model_amd.c
+++ b/arch/x86/oprofile/op_model_amd.c
@@ -9,12 +9,15 @@
  * @author Philippe Elie
  * @author Graydon Hoare
  * @author Robert Richter <robert.richter@amd.com>
- * @author Barry Kasindorf
+ * @author Barry Kasindorf <barry.kasindorf@amd.com>
+ * @author Jason Yeh <jason.yeh@amd.com>
+ * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
  */
 
 #include <linux/oprofile.h>
 #include <linux/device.h>
 #include <linux/pci.h>
+#include <linux/percpu.h>
 
 #include <asm/ptrace.h>
 #include <asm/msr.h>
@@ -25,12 +28,23 @@
 
 #define NUM_COUNTERS 4
 #define NUM_CONTROLS 4
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+#define NUM_VIRT_COUNTERS 32
+#define NUM_VIRT_CONTROLS 32
+#else
+#define NUM_VIRT_COUNTERS NUM_COUNTERS
+#define NUM_VIRT_CONTROLS NUM_CONTROLS
+#endif
+
 #define OP_EVENT_MASK			0x0FFF
 #define OP_CTR_OVERFLOW			(1ULL<<31)
 
 #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
 
-static unsigned long reset_value[NUM_COUNTERS];
+static unsigned long reset_value[NUM_VIRT_COUNTERS];
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+DECLARE_PER_CPU(int, switch_index);
+#endif
 
 #ifdef CONFIG_OPROFILE_IBS
 
@@ -82,6 +96,16 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs)
 		else
 			msrs->controls[i].addr = 0;
 	}
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	for (i = 0; i < NUM_VIRT_COUNTERS; i++) {
+		int hw_counter = i % NUM_CONTROLS;
+		if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
+			msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter;
+		else
+			msrs->multiplex[i].addr = 0;
+	}
+#endif
 }
 
 static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
@@ -90,6 +114,15 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 	u64 val;
 	int i;
 
+	/* setup reset_value */
+	for (i = 0; i < NUM_VIRT_COUNTERS; ++i) {
+		if (counter_config[i].enabled) {
+			reset_value[i] = counter_config[i].count;
+		} else {
+			reset_value[i] = 0;
+		}
+	}
+
 	/* clear all counters */
 	for (i = 0; i < NUM_CONTROLS; ++i) {
 		if (unlikely(!msrs->controls[i].addr))
@@ -108,20 +141,49 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
 
 	/* enable active counters */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (counter_config[i].enabled && msrs->counters[i].addr) {
-			reset_value[i] = counter_config[i].count;
-			wrmsrl(msrs->counters[i].addr,
-			       -(u64)counter_config[i].count);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (counter_config[offset].enabled && msrs->counters[i].addr) {
+			/* setup counter registers */
+			wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
+
+			/* setup control registers */
 			rdmsrl(msrs->controls[i].addr, val);
 			val &= model->reserved;
-			val |= op_x86_get_ctrl(model, &counter_config[i]);
+			val |= op_x86_get_ctrl(model, &counter_config[offset]);
+			wrmsrl(msrs->controls[i].addr, val);
+		}
+	}
+}
+
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void op_amd_switch_ctrl(struct op_x86_model_spec const *model,
+			       struct op_msrs const * const msrs)
+{
+	u64 val;
+	int i;
+
+	/* enable active counters */
+	for (i = 0; i < NUM_COUNTERS; ++i) {
+		int offset = i + __get_cpu_var(switch_index);
+		if (counter_config[offset].enabled) {
+			/* setup control registers */
+			rdmsrl(msrs->controls[i].addr, val);
+			val &= model->reserved;
+			val |= op_x86_get_ctrl(model, &counter_config[offset]);
 			wrmsrl(msrs->controls[i].addr, val);
-		} else {
-			reset_value[i] = 0;
 		}
 	}
 }
 
+#endif
+
+
 #ifdef CONFIG_OPROFILE_IBS
 
 static inline int
@@ -230,14 +292,19 @@ static int op_amd_check_ctrs(struct pt_regs * const regs,
 	int i;
 
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (!reset_value[i])
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (!reset_value[offset])
 			continue;
 		rdmsrl(msrs->counters[i].addr, val);
 		/* bit is clear if overflowed: */
 		if (val & OP_CTR_OVERFLOW)
 			continue;
-		oprofile_add_sample(regs, i);
-		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[i]);
+		oprofile_add_sample(regs, offset);
+		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[offset]);
 	}
 
 	op_amd_handle_ibs(regs, msrs);
@@ -250,8 +317,14 @@ static void op_amd_start(struct op_msrs const * const msrs)
 {
 	u64 val;
 	int i;
+
 	for (i = 0; i < NUM_COUNTERS; ++i) {
-		if (reset_value[i]) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		int offset = i + __get_cpu_var(switch_index);
+#else
+		int offset = i;
+#endif
+		if (reset_value[offset]) {
 			rdmsrl(msrs->controls[i].addr, val);
 			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 			wrmsrl(msrs->controls[i].addr, val);
@@ -271,7 +344,11 @@ static void op_amd_stop(struct op_msrs const * const msrs)
 	 * pm callback
 	 */
 	for (i = 0; i < NUM_COUNTERS; ++i) {
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+		if (!reset_value[i + per_cpu(switch_index, smp_processor_id())])
+#else
 		if (!reset_value[i])
+#endif
 			continue;
 		rdmsrl(msrs->controls[i].addr, val);
 		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -289,7 +366,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs)
 		if (msrs->counters[i].addr)
 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
 	}
-	for (i = 0; i < NUM_CONTROLS; ++i) {
+	for (i = 0; i < NUM_COUNTERS; ++i) {
 		if (msrs->controls[i].addr)
 			release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
 	}
@@ -463,6 +540,8 @@ static void op_amd_exit(void) {}
 struct op_x86_model_spec const op_amd_spec = {
 	.num_counters		= NUM_COUNTERS,
 	.num_controls		= NUM_CONTROLS,
+	.num_virt_counters	= NUM_VIRT_COUNTERS,
+	.num_virt_controls	= NUM_VIRT_CONTROLS,
 	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
 	.event_mask		= OP_EVENT_MASK,
 	.init			= op_amd_init,
@@ -473,4 +552,7 @@ struct op_x86_model_spec const op_amd_spec = {
 	.start			= &op_amd_start,
 	.stop			= &op_amd_stop,
 	.shutdown		= &op_amd_shutdown,
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	.switch_ctrl		= &op_amd_switch_ctrl,
+#endif
 };
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 5921b7fc724b..65b9237cde8b 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -698,6 +698,8 @@ static void p4_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec const op_p4_ht2_spec = {
 	.num_counters		= NUM_COUNTERS_HT2,
 	.num_controls		= NUM_CONTROLS_HT2,
+	.num_virt_counters	= NUM_COUNTERS_HT2,
+	.num_virt_controls	= NUM_CONTROLS_HT2,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
@@ -710,6 +712,8 @@ struct op_x86_model_spec const op_p4_ht2_spec = {
 struct op_x86_model_spec const op_p4_spec = {
 	.num_counters		= NUM_COUNTERS_NON_HT,
 	.num_controls		= NUM_CONTROLS_NON_HT,
+	.num_virt_counters	= NUM_COUNTERS_NON_HT,
+	.num_virt_controls	= NUM_CONTROLS_NON_HT,
 	.fill_in_addresses	= &p4_fill_in_addresses,
 	.setup_ctrs		= &p4_setup_ctrs,
 	.check_ctrs		= &p4_check_ctrs,
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 570d717c3308..098cbca5c0b0 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -206,6 +206,8 @@ static void ppro_shutdown(struct op_msrs const * const msrs)
 struct op_x86_model_spec const op_ppro_spec = {
 	.num_counters		= 2,
 	.num_controls		= 2,
+	.num_virt_counters	= 2,
+	.num_virt_controls	= 2,
 	.reserved		= MSR_PPRO_EVENTSEL_RESERVED,
 	.fill_in_addresses	= &ppro_fill_in_addresses,
 	.setup_ctrs		= &ppro_setup_ctrs,
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 505489873b9d..0d07d23cb062 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -23,6 +23,7 @@ struct op_msr {
 struct op_msrs {
 	struct op_msr *counters;
 	struct op_msr *controls;
+	struct op_msr *multiplex;
 };
 
 struct pt_regs;
@@ -35,6 +36,8 @@ struct oprofile_operations;
 struct op_x86_model_spec {
 	unsigned int	num_counters;
 	unsigned int	num_controls;
+	unsigned int	num_virt_counters;
+	unsigned int	num_virt_controls;
 	u64		reserved;
 	u16		event_mask;
 	int		(*init)(struct oprofile_operations *ops);
@@ -47,6 +50,10 @@ struct op_x86_model_spec {
 	void		(*start)(struct op_msrs const * const msrs);
 	void		(*stop)(struct op_msrs const * const msrs);
 	void		(*shutdown)(struct op_msrs const * const msrs);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	void		(*switch_ctrl)(struct op_x86_model_spec const *model,
+				       struct op_msrs const * const msrs);
+#endif
 };
 
 struct op_counter_config;
diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c
index 3cffce90f82a..7bc64af7cf99 100644
--- a/drivers/oprofile/oprof.c
+++ b/drivers/oprofile/oprof.c
@@ -12,6 +12,8 @@
 #include <linux/init.h>
 #include <linux/oprofile.h>
 #include <linux/moduleparam.h>
+#include <linux/workqueue.h>
+#include <linux/time.h>
 #include <asm/mutex.h>
 
 #include "oprof.h"
@@ -27,6 +29,15 @@ unsigned long oprofile_backtrace_depth;
 static unsigned long is_setup;
 static DEFINE_MUTEX(start_mutex);
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void switch_worker(struct work_struct *work);
+static DECLARE_DELAYED_WORK(switch_work, switch_worker);
+unsigned long timeout_jiffies;
+#define MULTIPLEXING_TIMER_DEFAULT 1
+
+#endif
+
 /* timer
    0 - use performance monitoring hardware if available
    1 - use the timer int mechanism regardless
@@ -87,6 +98,20 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void start_switch_worker(void)
+{
+	schedule_delayed_work(&switch_work, timeout_jiffies);
+}
+
+static void switch_worker(struct work_struct *work)
+{
+	if (!oprofile_ops.switch_events())
+		start_switch_worker();
+}
+
+#endif
 
 /* Actually start profiling (echo 1>/dev/oprofile/enable) */
 int oprofile_start(void)
@@ -108,6 +133,11 @@ int oprofile_start(void)
 	if ((err = oprofile_ops.start()))
 		goto out;
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	if (oprofile_ops.switch_events)
+		start_switch_worker();
+#endif
+
 	oprofile_started = 1;
 out:
 	mutex_unlock(&start_mutex);
@@ -123,6 +153,11 @@ void oprofile_stop(void)
 		goto out;
 	oprofile_ops.stop();
 	oprofile_started = 0;
+
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	cancel_delayed_work_sync(&switch_work);
+#endif
+
 	/* wake up the daemon to read what remains */
 	wake_up_buffer_waiter();
 out:
@@ -155,6 +190,36 @@ post_sync:
 	mutex_unlock(&start_mutex);
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+/* User inputs in ms, converts to jiffies */
+int oprofile_set_timeout(unsigned long val_msec)
+{
+	int err = 0;
+
+	mutex_lock(&start_mutex);
+
+	if (oprofile_started) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (!oprofile_ops.switch_events) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	timeout_jiffies = msecs_to_jiffies(val_msec);
+	if (timeout_jiffies == MAX_JIFFY_OFFSET)
+		timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT);
+
+out:
+	mutex_unlock(&start_mutex);
+	return err;
+
+}
+
+#endif
 
 int oprofile_set_backtrace(unsigned long val)
 {
@@ -179,10 +244,23 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static void __init oprofile_multiplexing_init(void)
+{
+	timeout_jiffies = msecs_to_jiffies(MULTIPLEXING_TIMER_DEFAULT);
+}
+
+#endif
+
 static int __init oprofile_init(void)
 {
 	int err;
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	oprofile_multiplexing_init();
+#endif
+
 	err = oprofile_arch_init(&oprofile_ops);
 
 	if (err < 0 || timer) {
diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h
index c288d3c24b50..ee38abcc74f3 100644
--- a/drivers/oprofile/oprof.h
+++ b/drivers/oprofile/oprof.h
@@ -27,6 +27,7 @@ extern unsigned long oprofile_buffer_watershed;
 extern struct oprofile_operations oprofile_ops;
 extern unsigned long oprofile_started;
 extern unsigned long oprofile_backtrace_depth;
+extern unsigned long timeout_jiffies;
 
 struct super_block;
 struct dentry;
@@ -35,5 +36,6 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root);
 void oprofile_timer_init(struct oprofile_operations *ops);
 
 int oprofile_set_backtrace(unsigned long depth);
+int oprofile_set_timeout(unsigned long time);
 
 #endif /* OPROF_H */
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index 5d36ffc30dd5..468ec3e4f856 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -9,6 +9,7 @@
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <linux/jiffies.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
@@ -22,6 +23,45 @@ unsigned long oprofile_buffer_size;
 unsigned long oprofile_cpu_buffer_size;
 unsigned long oprofile_buffer_watershed;
 
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+
+static ssize_t timeout_read(struct file *file, char __user *buf,
+		size_t count, loff_t *offset)
+{
+	return oprofilefs_ulong_to_user(jiffies_to_msecs(timeout_jiffies),
+				buf, count, offset);
+}
+
+
+static ssize_t timeout_write(struct file *file, char const __user *buf,
+		size_t count, loff_t *offset)
+{
+	unsigned long val;
+	int retval;
+
+	if (*offset)
+		return -EINVAL;
+
+	retval = oprofilefs_ulong_from_user(&val, buf, count);
+	if (retval)
+		return retval;
+
+	retval = oprofile_set_timeout(val);
+
+	if (retval)
+		return retval;
+	return count;
+}
+
+
+static const struct file_operations timeout_fops = {
+	.read		= timeout_read,
+	.write		= timeout_write,
+};
+
+#endif
+
+
 static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
 {
 	return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count,
@@ -139,6 +179,9 @@ void oprofile_create_files(struct super_block *sb, struct dentry *root)
 	oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops);
 	oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops);
 	oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	oprofilefs_create_file(sb, root, "time_slice", &timeout_fops);
+#endif
 	oprofile_create_stats_files(sb, root);
 	if (oprofile_ops.create_files)
 		oprofile_ops.create_files(sb, root);
diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c
index 3c2270a8300c..77a57a6792f6 100644
--- a/drivers/oprofile/oprofile_stats.c
+++ b/drivers/oprofile/oprofile_stats.c
@@ -16,6 +16,9 @@
 #include "cpu_buffer.h"
 
 struct oprofile_stat_struct oprofile_stats;
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+atomic_t multiplex_counter;
+#endif
 
 void oprofile_reset_stats(void)
 {
@@ -34,6 +37,9 @@ void oprofile_reset_stats(void)
 	atomic_set(&oprofile_stats.sample_lost_no_mapping, 0);
 	atomic_set(&oprofile_stats.event_lost_overflow, 0);
 	atomic_set(&oprofile_stats.bt_lost_no_mapping, 0);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	atomic_set(&multiplex_counter, 0);
+#endif
 }
 
 
@@ -76,4 +82,8 @@ void oprofile_create_stats_files(struct super_block *sb, struct dentry *root)
 		&oprofile_stats.event_lost_overflow);
 	oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping",
 		&oprofile_stats.bt_lost_no_mapping);
+#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
+	oprofilefs_create_ro_atomic(sb, dir, "multiplex_counter",
+		&multiplex_counter);
+#endif
 }
diff --git a/include/linux/oprofile.h b/include/linux/oprofile.h
index d68d2ed94f15..5171639ecf0f 100644
--- a/include/linux/oprofile.h
+++ b/include/linux/oprofile.h
@@ -67,6 +67,9 @@ struct oprofile_operations {
 
 	/* Initiate a stack backtrace. Optional. */
 	void (*backtrace)(struct pt_regs * const regs, unsigned int depth);
+
+	/* Multiplex between different events. Optional. */
+	int (*switch_events)(void);
 	/* CPU identification string. */
 	char * cpu_type;
 };
-- 
cgit v1.2.3


From e3afe7b75ed8f809c1473ea9b39267487c187ccb Mon Sep 17 00:00:00 2001
From: John Dykstra <john.dykstra1@gmail.com>
Date: Thu, 16 Jul 2009 05:04:51 +0000
Subject: tcp: Fix MD5 signature checking on IPv4 mapped sockets

Fix MD5 signature checking so that an IPv4 active open
to an IPv6 socket can succeed.  In particular, use the
correct address family's signature generation function
for the SYN/ACK.

Reported-by:   Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: John Dykstra <john.dykstra1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 5 +++++
 net/ipv4/tcp_ipv4.c   | 1 +
 net/ipv4/tcp_output.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 1 +
 4 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 19f4150f4d4d..88af84306471 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1425,6 +1425,11 @@ struct tcp_request_sock_ops {
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	*(*md5_lookup) (struct sock *sk,
 						struct request_sock *req);
+	int			(*calc_md5_hash) (char *location,
+						  struct tcp_md5sig_key *md5,
+						  struct sock *sk,
+						  struct request_sock *req,
+						  struct sk_buff *skb);
 #endif
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a1ca2698c88..7c107eb876c8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1160,6 +1160,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
+	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
 };
 #endif
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5bdf08d312d9..bd62712848fa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2261,7 +2261,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
 	if (md5) {
-		tp->af_specific->calc_md5_hash(md5_hash_location,
+		tcp_rsk(req)->af_specific->calc_md5_hash(md5_hash_location,
 					       md5, NULL, req, skb);
 	}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 58810c65b635..ae3d65753562 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -896,6 +896,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 #ifdef CONFIG_TCP_MD5SIG
 static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.md5_lookup	=	tcp_v6_reqsk_md5_lookup,
+	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 };
 #endif
 
-- 
cgit v1.2.3


From 68fd60a8c8bca6af51805c45f286f0f2572ac977 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Thu, 16 Jul 2009 10:53:34 +0800
Subject: tracing/events: add missing type info of dynamic arrays

The format file doesn't contain enough information for
__dynamic_array/__string. The type name is missing.

Before:
  # cat format:
  name: irq_handler_entry
  ...
          field:__data_loc name;  offset:16;      size:2;

After:
  # cat format:
  name: irq_handler_entry
  ...
          field:__data_loc char[] name;  offset:16;      size:2;

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4A5E962E.9020900@cn.fujitsu.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1867553c61e5..cc78943e0038 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -120,7 +120,7 @@
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
-	ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"	       \
+	ret = trace_seq_printf(s, "\tfield:__data_loc " #type "[] " #item ";\t"\
 			       "offset:%u;\tsize:%u;\n",		       \
 			       (unsigned int)offsetof(typeof(field),	       \
 					__data_loc_##item),		       \
@@ -279,7 +279,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
-	ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
 				offsetof(typeof(field), __data_loc_##item),    \
 				 sizeof(field.__data_loc_##item), 0);
 
-- 
cgit v1.2.3


From 4edf547b4d0f886acf5aa5a0c8f8edbaff280830 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 15 Jul 2009 06:16:34 +0000
Subject: net: explain netns notifiers a little better

Eric explained this to me -- and afterwards the comment
made sense, but not before. Add the the critical point
about interfaces having to be gone from the netns before
subsys notifiers are called.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 5c5136fceea8..a1202841aadd 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -239,13 +239,15 @@ struct pernet_operations {
  * needs per network namespace operations use device pernet operations,
  * otherwise use pernet subsys operations.
  *
- * This is critically important.  Most of the network code cleanup
- * runs with the assumption that dev_remove_pack has been called so no
- * new packets will arrive during and after the cleanup functions have
- * been called.  dev_remove_pack is not per namespace so instead the
- * guarantee of no more packets arriving in a network namespace is
- * provided by ensuring that all network devices and all sockets have
- * left the network namespace before the cleanup methods are called.
+ * Network interfaces need to be removed from a dying netns _before_
+ * subsys notifiers can be called, as most of the network code cleanup
+ * (which is done from subsys notifiers) runs with the assumption that
+ * dev_remove_pack has been called so no new packets will arrive during
+ * and after the cleanup functions have been called.  dev_remove_pack
+ * is not per namespace so instead the guarantee of no more packets
+ * arriving in a network namespace is provided by ensuring that all
+ * network devices and all sockets have left the network namespace
+ * before the cleanup methods are called.
  *
  * For the longest time the ipv4 icmp code was registered as a pernet
  * device which caused kernel oops, and panics during network
-- 
cgit v1.2.3


From 7d536cb3fb9993bdcd5a2fbaa6b0670ded4e101c Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 16 Jul 2009 10:54:02 +0800
Subject: tracing/events: record the size of dynamic arrays

When a dynamic array is defined, we add __data_loc_foo in
trace_entry to record the offset of the array, but the
size of the array is not recorded, which causes 2 problems:

- the event filter just compares the first 2 chars of the strings.

- parsers can't parse dynamic arrays.

So we encode the size of each dynamic array in the higher 16 bits
of __data_loc_foo, while the offset is in lower 16 bits.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A5E964A.9000403@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h             | 14 ++++++++------
 kernel/trace/trace_events_filter.c |  6 ++++--
 2 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index cc78943e0038..3cbb96ef34f4 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -25,7 +25,7 @@
 #define __array(type, item, len)	type	item[len];
 
 #undef __dynamic_array
-#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
+#define __dynamic_array(type, item, len) u32 __data_loc_##item;
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
@@ -51,13 +51,14 @@
  * Include the following:
  *
  * struct ftrace_data_offsets_<call> {
- *	int				<item1>;
- *	int				<item2>;
+ *	u32				<item1>;
+ *	u32				<item2>;
  *	[...]
  * };
  *
- * The __dynamic_array() macro will create each int <item>, this is
+ * The __dynamic_array() macro will create each u32 <item>, this is
  * to keep the offset of each array from the beginning of the event.
+ * The size of an array is also encoded, in the higher 16 bits of <item>.
  */
 
 #undef __field
@@ -67,7 +68,7 @@
 #define __array(type, item, len)
 
 #undef __dynamic_array
-#define __dynamic_array(type, item, len)	int item;
+#define __dynamic_array(type, item, len)	u32 item;
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
@@ -207,7 +208,7 @@ ftrace_format_##call(struct trace_seq *s)				\
 
 #undef __get_dynamic_array
 #define __get_dynamic_array(field)	\
-		((void *)__entry + __entry->__data_loc_##field)
+		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
 
 #undef __get_str
 #define __get_str(field) (char *)__get_dynamic_array(field)
@@ -325,6 +326,7 @@ ftrace_define_fields_##call(void)					\
 #define __dynamic_array(type, item, len)				\
 	__data_offsets->item = __data_size +				\
 			       offsetof(typeof(*entry), __data);	\
+	__data_offsets->item |= (len * sizeof(type)) << 16;		\
 	__data_size += (len) * sizeof(type);
 
 #undef __string
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index b9aae72d13db..1c80ef702b83 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -176,11 +176,13 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 static int filter_pred_strloc(struct filter_pred *pred, void *event,
 			      int val1, int val2)
 {
-	unsigned short str_loc = *(unsigned short *)(event + pred->offset);
+	u32 str_item = *(u32 *)(event + pred->offset);
+	int str_loc = str_item & 0xffff;
+	int str_len = str_item >> 16;
 	char *addr = (char *)(event + str_loc);
 	int cmp, match;
 
-	cmp = strncmp(addr, pred->str_val, pred->str_len);
+	cmp = strncmp(addr, pred->str_val, str_len);
 
 	match = (!cmp) ^ pred->not;
 
-- 
cgit v1.2.3


From 1f9963cbb0280e0cd554161e00f1a0eeddbf1ae1 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 20 Jul 2009 10:20:53 +0800
Subject: tracing/filters: improve subsystem filter

Currently a subsystem filter should be applicable to all events
under the subsystem, and if it failed, all the event filters
will be cleared. Those behaviors make subsys filter much less
useful:

  # echo 'vec == 1' > irq/softirq_entry/filter
  # echo 'irq == 5' > irq/filter
  bash: echo: write error: Invalid argument
  # cat irq/softirq_entry/filter
  none

I'd expect it set the filter for irq_handler_entry/exit, and
not touch softirq_entry/exit.

The basic idea is, try to see if the filter can be applied
to which events, and then just apply to the those events:

  # echo 'vec == 1' > softirq_entry/filter
  # echo 'irq == 5' > filter
  # cat irq_handler_entry/filter
  irq == 5
  # cat softirq_entry/filter
  vec == 1

Changelog for v2:
- do some cleanups to address Frederic's comments.

Inspired-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A63D485.7030703@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |   4 +-
 kernel/trace/trace.h               |   3 +-
 kernel/trace/trace_events_filter.c | 124 ++++++++++++++++++++++++-------------
 3 files changed, 87 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5c093ffc655b..26d3673d5143 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -101,6 +101,8 @@ void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
 
 void tracing_record_cmdline(struct task_struct *tsk);
 
+struct event_filter;
+
 struct ftrace_event_call {
 	struct list_head	list;
 	char			*name;
@@ -116,7 +118,7 @@ struct ftrace_event_call {
 	int			(*define_fields)(void);
 	struct list_head	fields;
 	int			filter_active;
-	void			*filter;
+	struct event_filter	*filter;
 	void			*mod;
 
 #ifdef CONFIG_EVENT_PROFILE
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 94305c7bc11c..758b0dbed552 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -750,13 +750,14 @@ struct event_filter {
 	int			n_preds;
 	struct filter_pred	**preds;
 	char			*filter_string;
+	bool			no_reset;
 };
 
 struct event_subsystem {
 	struct list_head	list;
 	const char		*name;
 	struct dentry		*entry;
-	void			*filter;
+	struct event_filter	*filter;
 	int			nr_events;
 };
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 1c80ef702b83..27c2dbea3710 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -420,7 +420,14 @@ oom:
 }
 EXPORT_SYMBOL_GPL(init_preds);
 
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+enum {
+	FILTER_DISABLE_ALL,
+	FILTER_INIT_NO_RESET,
+	FILTER_SKIP_NO_RESET,
+};
+
+static void filter_free_subsystem_preds(struct event_subsystem *system,
+					int flag)
 {
 	struct ftrace_event_call *call;
 
@@ -428,6 +435,14 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
 		if (!call->define_fields)
 			continue;
 
+		if (flag == FILTER_INIT_NO_RESET) {
+			call->filter->no_reset = false;
+			continue;
+		}
+
+		if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
+			continue;
+
 		if (!strcmp(call->system, system->name)) {
 			filter_disable_preds(call);
 			remove_filter_string(call->filter);
@@ -529,7 +544,8 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
 
 static int filter_add_pred(struct filter_parse_state *ps,
 			   struct ftrace_event_call *call,
-			   struct filter_pred *pred)
+			   struct filter_pred *pred,
+			   bool dry_run)
 {
 	struct ftrace_event_field *field;
 	filter_pred_fn_t fn;
@@ -541,10 +557,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 	if (pred->op == OP_AND) {
 		pred->pop_n = 2;
-		return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+		fn = filter_pred_and;
+		goto add_pred_fn;
 	} else if (pred->op == OP_OR) {
 		pred->pop_n = 2;
-		return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+		fn = filter_pred_or;
+		goto add_pred_fn;
 	}
 
 	field = find_event_field(call, pred->field_name);
@@ -567,9 +585,6 @@ static int filter_add_pred(struct filter_parse_state *ps,
 		else
 			fn = filter_pred_strloc;
 		pred->str_len = field->size;
-		if (pred->op == OP_NE)
-			pred->not = 1;
-		return filter_add_pred_fn(ps, call, pred, fn);
 	} else {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->str_val, 0, &val);
@@ -580,27 +595,33 @@ static int filter_add_pred(struct filter_parse_state *ps,
 			return -EINVAL;
 		}
 		pred->val = val;
-	}
 
-	fn = select_comparison_fn(pred->op, field->size, field->is_signed);
-	if (!fn) {
-		parse_error(ps, FILT_ERR_INVALID_OP, 0);
-		return -EINVAL;
+		fn = select_comparison_fn(pred->op, field->size,
+					  field->is_signed);
+		if (!fn) {
+			parse_error(ps, FILT_ERR_INVALID_OP, 0);
+			return -EINVAL;
+		}
 	}
 
 	if (pred->op == OP_NE)
 		pred->not = 1;
 
-	return filter_add_pred_fn(ps, call, pred, fn);
+add_pred_fn:
+	if (!dry_run)
+		return filter_add_pred_fn(ps, call, pred, fn);
+	return 0;
 }
 
 static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 				     struct event_subsystem *system,
 				     struct filter_pred *pred,
-				     char *filter_string)
+				     char *filter_string,
+				     bool dry_run)
 {
 	struct ftrace_event_call *call;
 	int err = 0;
+	bool fail = true;
 
 	list_for_each_entry(call, &ftrace_events, list) {
 
@@ -610,16 +631,24 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
 		if (strcmp(call->system, system->name))
 			continue;
 
-		err = filter_add_pred(ps, call, pred);
-		if (err) {
-			filter_free_subsystem_preds(system);
-			parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-			goto out;
-		}
-		replace_filter_string(call->filter, filter_string);
+		if (call->filter->no_reset)
+			continue;
+
+		err = filter_add_pred(ps, call, pred, dry_run);
+		if (err)
+			call->filter->no_reset = true;
+		else
+			fail = false;
+
+		if (!dry_run)
+			replace_filter_string(call->filter, filter_string);
 	}
-out:
-	return err;
+
+	if (fail) {
+		parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+		return err;
+	}
+	return 0;
 }
 
 static void parse_init(struct filter_parse_state *ps,
@@ -978,12 +1007,14 @@ static int check_preds(struct filter_parse_state *ps)
 static int replace_preds(struct event_subsystem *system,
 			 struct ftrace_event_call *call,
 			 struct filter_parse_state *ps,
-			 char *filter_string)
+			 char *filter_string,
+			 bool dry_run)
 {
 	char *operand1 = NULL, *operand2 = NULL;
 	struct filter_pred *pred;
 	struct postfix_elt *elt;
 	int err;
+	int n_preds = 0;
 
 	err = check_preds(ps);
 	if (err)
@@ -1002,19 +1033,14 @@ static int replace_preds(struct event_subsystem *system,
 			continue;
 		}
 
+		if (n_preds++ == MAX_FILTER_PRED) {
+			parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+			return -ENOSPC;
+		}
+
 		if (elt->op == OP_AND || elt->op == OP_OR) {
 			pred = create_logical_pred(elt->op);
-			if (call)
-				err = filter_add_pred(ps, call, pred);
-			else
-				err = filter_add_subsystem_pred(ps, system,
-							pred, filter_string);
-			filter_free_pred(pred);
-			if (err)
-				return err;
-
-			operand1 = operand2 = NULL;
-			continue;
+			goto add_pred;
 		}
 
 		if (!operand1 || !operand2) {
@@ -1023,11 +1049,12 @@ static int replace_preds(struct event_subsystem *system,
 		}
 
 		pred = create_pred(elt->op, operand1, operand2);
+add_pred:
 		if (call)
-			err = filter_add_pred(ps, call, pred);
+			err = filter_add_pred(ps, call, pred, false);
 		else
 			err = filter_add_subsystem_pred(ps, system, pred,
-							filter_string);
+						filter_string, dry_run);
 		filter_free_pred(pred);
 		if (err)
 			return err;
@@ -1068,7 +1095,7 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 		goto out;
 	}
 
-	err = replace_preds(NULL, call, ps, filter_string);
+	err = replace_preds(NULL, call, ps, filter_string, false);
 	if (err)
 		append_filter_err(ps, call->filter);
 
@@ -1092,7 +1119,7 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 	mutex_lock(&event_mutex);
 
 	if (!strcmp(strstrip(filter_string), "0")) {
-		filter_free_subsystem_preds(system);
+		filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
 		remove_filter_string(system->filter);
 		mutex_unlock(&event_mutex);
 		return 0;
@@ -1103,7 +1130,6 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 	if (!ps)
 		goto out_unlock;
 
-	filter_free_subsystem_preds(system);
 	replace_filter_string(system->filter, filter_string);
 
 	parse_init(ps, filter_ops, filter_string);
@@ -1113,9 +1139,23 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 		goto out;
 	}
 
-	err = replace_preds(system, NULL, ps, filter_string);
-	if (err)
+	filter_free_subsystem_preds(system, FILTER_INIT_NO_RESET);
+
+	/* try to see the filter can be applied to which events */
+	err = replace_preds(system, NULL, ps, filter_string, true);
+	if (err) {
+		append_filter_err(ps, system->filter);
+		goto out;
+	}
+
+	filter_free_subsystem_preds(system, FILTER_SKIP_NO_RESET);
+
+	/* really apply the filter to the events */
+	err = replace_preds(system, NULL, ps, filter_string, false);
+	if (err) {
 		append_filter_err(ps, system->filter);
+		filter_free_subsystem_preds(system, 2);
+	}
 
 out:
 	filter_opstack_clear(ps);
-- 
cgit v1.2.3


From 99fde513f57db2c8e1b202ade4be7d47033ff09b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 20 Jul 2009 22:28:50 -0700
Subject: Input: wm97xx - add possibility to control the GPIO_STATUS shift

This patch allows tweaking the behaviour of GPIO_STATUS register
shift quirk that's in wm97xx-core. The problem with GPIO_STATUS
register being shifted by one doesn't appear on all hardware it
seems and causes problems with accelerated touchscreen drivers on
Palm hardware. Therefore an accelerated touchscreen driver can select
if the shift is/isn't happening on the hardware.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/mainstone-wm97xx.c | 3 +++
 drivers/input/touchscreen/wm97xx-core.c      | 6 ++++--
 include/linux/wm97xx.h                       | 7 +++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c
index c797bc04ee83..8fc3b08deb3b 100644
--- a/drivers/input/touchscreen/mainstone-wm97xx.c
+++ b/drivers/input/touchscreen/mainstone-wm97xx.c
@@ -198,6 +198,9 @@ static int wm97xx_acc_startup(struct wm97xx *wm)
 	if (machine_is_palmt5() || machine_is_palmtx() || machine_is_palmld()) {
 		pen_int = 1;
 		irq = 27;
+		/* There is some obscure mutant of WM9712 interbred with WM9713
+		 * used on Palm HW */
+		wm->variant = WM97xx_WM1613;
 	} else if (machine_is_mainstone() && pen_int)
 		irq = 4;
 
diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c
index 2957d48e0045..252eb11fe9db 100644
--- a/drivers/input/touchscreen/wm97xx-core.c
+++ b/drivers/input/touchscreen/wm97xx-core.c
@@ -204,7 +204,7 @@ void wm97xx_set_gpio(struct wm97xx *wm, u32 gpio,
 	else
 		reg &= ~gpio;
 
-	if (wm->id == WM9712_ID2)
+	if (wm->id == WM9712_ID2 && wm->variant != WM97xx_WM1613)
 		wm97xx_reg_write(wm, AC97_GPIO_STATUS, reg << 1);
 	else
 		wm97xx_reg_write(wm, AC97_GPIO_STATUS, reg);
@@ -307,7 +307,7 @@ static void wm97xx_pen_irq_worker(struct work_struct *work)
 					 WM97XX_GPIO_13);
 		}
 
-		if (wm->id == WM9712_ID2)
+		if (wm->id == WM9712_ID2 && wm->variant != WM97xx_WM1613)
 			wm97xx_reg_write(wm, AC97_GPIO_STATUS, (status &
 						~WM97XX_GPIO_13) << 1);
 		else
@@ -582,6 +582,8 @@ static int wm97xx_probe(struct device *dev)
 
 	wm->id = wm97xx_reg_read(wm, AC97_VENDOR_ID2);
 
+	wm->variant = WM97xx_GENERIC;
+
 	dev_info(wm->dev, "detected a wm97%02x codec\n", wm->id & 0xff);
 
 	switch (wm->id & 0xff) {
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 6f69968eab24..0c9878123d5f 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -15,6 +15,12 @@
 #include <linux/input.h>	/* Input device layer */
 #include <linux/platform_device.h>
 
+/*
+ * WM97xx variants
+ */
+#define	WM97xx_GENERIC			0x0000
+#define	WM97xx_WM1613			0x1613
+
 /*
  * WM97xx AC97 Touchscreen registers
  */
@@ -283,6 +289,7 @@ struct wm97xx {
 	unsigned pen_is_down:1;		/* Pen is down */
 	unsigned aux_waiting:1;		/* aux measurement waiting */
 	unsigned pen_probably_down:1;	/* used in polling mode */
+	u16 variant;			/* WM97xx chip variant */
 	u16 suspend_mode;               /* PRP in suspend mode */
 };
 
-- 
cgit v1.2.3


From 591d2fb02ea80472d846c0b8507007806bdd69cc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 21 Jul 2009 11:09:39 +0200
Subject: genirq: Delegate irq affinity setting to the irq thread

irq_set_thread_affinity() calls set_cpus_allowed_ptr() which might
sleep, but irq_set_thread_affinity() is called with desc->lock held
and can be called from hard interrupt context as well. The code has
another bug as it does not hold a ref on the task struct as required
by set_cpus_allowed_ptr().

Just set the IRQTF_AFFINITY bit in action->thread_flags. The next time
the thread runs it migrates itself. Solves all of the above problems
nicely.

Add kerneldoc to irq_set_thread_affinity() while at it.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
---
 include/linux/interrupt.h |  2 ++
 kernel/irq/internals.h    |  3 +--
 kernel/irq/manage.c       | 50 +++++++++++++++++++++++++++++++++++++++++------
 kernel/irq/migration.c    |  2 +-
 4 files changed, 48 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2721f07e9354..88b056ac5629 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -64,11 +64,13 @@
  * IRQTF_RUNTHREAD - signals that the interrupt handler thread should run
  * IRQTF_DIED      - handler thread died
  * IRQTF_WARNED    - warning "IRQ_WAKE_THREAD w/o thread_fn" has been printed
+ * IRQTF_AFFINITY  - irq thread is requested to adjust affinity
  */
 enum {
 	IRQTF_RUNTHREAD,
 	IRQTF_DIED,
 	IRQTF_WARNED,
+	IRQTF_AFFINITY,
 };
 
 typedef irqreturn_t (*irq_handler_t)(int, void *);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 73468253143b..e70ed5592eb9 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -42,8 +42,7 @@ static inline void unregister_handler_proc(unsigned int irq,
 
 extern int irq_select_affinity_usr(unsigned int irq);
 
-extern void
-irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+extern void irq_set_thread_affinity(struct irq_desc *desc);
 
 /*
  * Debugging printout:
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 50da67672901..f0de36f13a44 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -80,14 +80,22 @@ int irq_can_set_affinity(unsigned int irq)
 	return 1;
 }
 
-void
-irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
+/**
+ *	irq_set_thread_affinity - Notify irq threads to adjust affinity
+ *	@desc:		irq descriptor which has affitnity changed
+ *
+ *	We just set IRQTF_AFFINITY and delegate the affinity setting
+ *	to the interrupt thread itself. We can not call
+ *	set_cpus_allowed_ptr() here as we hold desc->lock and this
+ *	code can be called from hard interrupt context.
+ */
+void irq_set_thread_affinity(struct irq_desc *desc)
 {
 	struct irqaction *action = desc->action;
 
 	while (action) {
 		if (action->thread)
-			set_cpus_allowed_ptr(action->thread, cpumask);
+			set_bit(IRQTF_AFFINITY, &action->thread_flags);
 		action = action->next;
 	}
 }
@@ -112,7 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	if (desc->status & IRQ_MOVE_PCNTXT) {
 		if (!desc->chip->set_affinity(irq, cpumask)) {
 			cpumask_copy(desc->affinity, cpumask);
-			irq_set_thread_affinity(desc, cpumask);
+			irq_set_thread_affinity(desc);
 		}
 	}
 	else {
@@ -122,7 +130,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 #else
 	if (!desc->chip->set_affinity(irq, cpumask)) {
 		cpumask_copy(desc->affinity, cpumask);
-		irq_set_thread_affinity(desc, cpumask);
+		irq_set_thread_affinity(desc);
 	}
 #endif
 	desc->status |= IRQ_AFFINITY_SET;
@@ -176,7 +184,7 @@ int irq_select_affinity_usr(unsigned int irq)
 	spin_lock_irqsave(&desc->lock, flags);
 	ret = setup_affinity(irq, desc);
 	if (!ret)
-		irq_set_thread_affinity(desc, desc->affinity);
+		irq_set_thread_affinity(desc);
 	spin_unlock_irqrestore(&desc->lock, flags);
 
 	return ret;
@@ -443,6 +451,34 @@ static int irq_wait_for_interrupt(struct irqaction *action)
 	return -1;
 }
 
+/*
+ * Check whether we need to change the affinity of the interrupt thread.
+ */
+static void
+irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
+{
+	cpumask_var_t mask;
+
+	if (!test_and_clear_bit(IRQTF_AFFINITY, &action->thread_flags))
+		return;
+
+	/*
+	 * In case we are out of memory we set IRQTF_AFFINITY again and
+	 * try again next time
+	 */
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL)) {
+		set_bit(IRQTF_AFFINITY, &action->thread_flags);
+		return;
+	}
+
+	spin_lock_irq(&desc->lock);
+	cpumask_copy(mask, desc->affinity);
+	spin_unlock_irq(&desc->lock);
+
+	set_cpus_allowed_ptr(current, mask);
+	free_cpumask_var(mask);
+}
+
 /*
  * Interrupt handler thread
  */
@@ -458,6 +494,8 @@ static int irq_thread(void *data)
 
 	while (!irq_wait_for_interrupt(action)) {
 
+		irq_thread_check_affinity(desc, action);
+
 		atomic_inc(&desc->threads_active);
 
 		spin_lock_irq(&desc->lock);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index cfe767ca1545..fcb6c96f2627 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -45,7 +45,7 @@ void move_masked_irq(int irq)
 		   < nr_cpu_ids))
 		if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
 			cpumask_copy(desc->affinity, desc->pending_mask);
-			irq_set_thread_affinity(desc, desc->pending_mask);
+			irq_set_thread_affinity(desc);
 		}
 
 	cpumask_clear(desc->pending_mask);
-- 
cgit v1.2.3


From e56f0975360369347725c49654ecfe3792710429 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Sat, 18 Jul 2009 19:20:20 +0100
Subject: rfkill: remove too-strict __must_check

Some drivers don't need the return value of rfkill_set_hw_state(),
so it should not be marked as __must_check.

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 2ce29831feb6..278777fa8a3a 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -224,7 +224,7 @@ void rfkill_destroy(struct rfkill *rfkill);
  * should be blocked) so that drivers need not keep track of the soft
  * block state -- which they might not be able to.
  */
-bool __must_check rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
+bool rfkill_set_hw_state(struct rfkill *rfkill, bool blocked);
 
 /**
  * rfkill_set_sw_state - Set the internal rfkill software block state
-- 
cgit v1.2.3


From f44aebcc566d1d6275f7191867b9633dc11de2ee Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 15 Jul 2009 15:49:52 -0400
Subject: inotify: use GFP_NOFS under potential memory pressure

inotify can have a watchs removed under filesystem reclaim.

=================================
[ INFO: inconsistent lock state ]
2.6.31-rc2 #16
---------------------------------
inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage.
khubd/217 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (iprune_mutex){+.+.?.}, at: [<c10ba899>] invalidate_inodes+0x20/0xe3
{IN-RECLAIM_FS-W} state was registered at:
  [<c10536ab>] __lock_acquire+0x2c9/0xac4
  [<c1053f45>] lock_acquire+0x9f/0xc2
  [<c1308872>] __mutex_lock_common+0x2d/0x323
  [<c1308c00>] mutex_lock_nested+0x2e/0x36
  [<c10ba6ff>] shrink_icache_memory+0x38/0x1b2
  [<c108bfb6>] shrink_slab+0xe2/0x13c
  [<c108c3e1>] kswapd+0x3d1/0x55d
  [<c10449b5>] kthread+0x66/0x6b
  [<c1003fdf>] kernel_thread_helper+0x7/0x10
  [<ffffffff>] 0xffffffff

Two things are needed to fix this.  First we need a method to tell
fsnotify_create_event() to use GFP_NOFS and second we need to stop using
one global IN_IGNORED event and allocate them one at a time.  This solves
current issues with multiple IN_IGNORED on a queue having tail drop
problems and simplifies the allocations since we don't have to worry about
two tasks opperating on the IGNORED event concurrently.

Signed-off-by: Eric Paris <eparis@redhat.com>
---
 fs/notify/fsnotify.c             |  4 +++-
 fs/notify/inotify/inotify_user.c | 18 ++++++++++++------
 fs/notify/notification.c         |  9 +++++----
 include/linux/fsnotify_backend.h |  2 +-
 4 files changed, 21 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ec2f7bd76818..037e878e03fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -159,7 +159,9 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
 			if (!group->ops->should_send_event(group, to_tell, mask))
 				continue;
 			if (!event) {
-				event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
+				event = fsnotify_create_event(to_tell, mask, data,
+							      data_is, file_name, cookie,
+							      GFP_KERNEL);
 				/* shit, we OOM'd and now we can't tell, maybe
 				 * someday someone else will want to do something
 				 * here */
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 726118a5845b..f30d9bbc2e1b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -57,7 +57,6 @@ int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 struct kmem_cache *event_priv_cachep __read_mostly;
-static struct fsnotify_event *inotify_ignored_event;
 
 /*
  * When inotify registers a new group it increments this and uses that
@@ -384,12 +383,19 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 				    struct fsnotify_group *group)
 {
 	struct inotify_inode_mark_entry *ientry;
+	struct fsnotify_event *ignored_event;
 	struct inotify_event_private_data *event_priv;
 	struct fsnotify_event_private_data *fsn_event_priv;
 
+	ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
+					      FSNOTIFY_EVENT_NONE, NULL, 0,
+					      GFP_NOFS);
+	if (!ignored_event)
+		return;
+
 	ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
 
-	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+	event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
 	if (unlikely(!event_priv))
 		goto skip_send_ignore;
 
@@ -398,7 +404,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 	fsn_event_priv->group = group;
 	event_priv->wd = ientry->wd;
 
-	fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
+	fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
 
 	/* did the private data get added? */
 	if (list_empty(&fsn_event_priv->event_list))
@@ -406,6 +412,9 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 
 skip_send_ignore:
 
+	/* matches the reference taken when the event was created */
+	fsnotify_put_event(ignored_event);
+
 	/* remove this entry from the idr */
 	inotify_remove_from_idr(group, ientry);
 
@@ -748,9 +757,6 @@ static int __init inotify_user_setup(void)
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
 	event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
-	inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
-	if (!inotify_ignored_event)
-		panic("unable to allocate the inotify ignored event\n");
 
 	inotify_max_queued_events = 16384;
 	inotify_max_user_instances = 128;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 2b20feaf263a..521368574e97 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -153,7 +153,7 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
 				return true;
 			break;
 		case (FSNOTIFY_EVENT_NONE):
-			return true;
+			return false;
 		};
 	}
 	return false;
@@ -345,18 +345,19 @@ static void initialize_event(struct fsnotify_event *event)
  * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
-					     int data_type, const char *name, u32 cookie)
+					     int data_type, const char *name, u32 cookie,
+					     gfp_t gfp)
 {
 	struct fsnotify_event *event;
 
-	event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+	event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
 	if (!event)
 		return NULL;
 
 	initialize_event(event);
 
 	if (name) {
-		event->file_name = kstrdup(name, GFP_KERNEL);
+		event->file_name = kstrdup(name, gfp);
 		if (!event->file_name) {
 			kmem_cache_free(fsnotify_event_cachep, event);
 			return NULL;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6c3de999fb34..4d6f47b51189 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -352,7 +352,7 @@ extern void fsnotify_unmount_inodes(struct list_head *list);
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
 						    void *data, int data_is, const char *name,
-						    u32 cookie);
+						    u32 cookie, gfp_t gfp);
 
 #else
 
-- 
cgit v1.2.3


From d7aacaddcac3971e33cf52d7e610c06696cb347f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Jul 2009 13:21:31 +0200
Subject: Driver Core: Add platform device arch data V3

Allow architecture specific data in struct platform_device V3.

With this patch struct pdev_archdata is added to struct
platform_device, similar to struct dev_archdata in found in
struct device. Useful for architecture code that needs to
keep extra data associated with each platform device.

Struct pdev_archdata is different from dev.platform_data, the
convention is that dev.platform_data points to driver-specific
data. It may or may not be required by the driver. The format
of this depends on driver but is the same across architectures.

The structure pdev_archdata is a place for architecture specific
data. This data is handled by architecture specific code (for
example runtime PM), and since it is architecture specific it
should _never_ be touched by device driver code. Exactly like
struct dev_archdata but for platform devices.

[rjw: This change is for power management mostly and that's why it
 goes through the suspend tree.]

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Kevin Hilman <khilman@deeprootsystems.com>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/arm/include/asm/device.h        | 3 +++
 arch/ia64/include/asm/device.h       | 3 +++
 arch/microblaze/include/asm/device.h | 3 +++
 arch/powerpc/include/asm/device.h    | 3 +++
 arch/sparc/include/asm/device.h      | 3 +++
 arch/x86/include/asm/device.h        | 3 +++
 include/asm-generic/device.h         | 3 +++
 include/linux/platform_device.h      | 3 +++
 8 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h
index c61642b40603..9f390ce335cb 100644
--- a/arch/arm/include/asm/device.h
+++ b/arch/arm/include/asm/device.h
@@ -12,4 +12,7 @@ struct dev_archdata {
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif
diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h
index 41ab85d66f33..d66d446b127c 100644
--- a/arch/ia64/include/asm/device.h
+++ b/arch/ia64/include/asm/device.h
@@ -15,4 +15,7 @@ struct dev_archdata {
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_IA64_DEVICE_H */
diff --git a/arch/microblaze/include/asm/device.h b/arch/microblaze/include/asm/device.h
index c042830793ed..30286db27c1c 100644
--- a/arch/microblaze/include/asm/device.h
+++ b/arch/microblaze/include/asm/device.h
@@ -16,6 +16,9 @@ struct dev_archdata {
 	struct device_node	*of_node;
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_MICROBLAZE_DEVICE_H */
 
 
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 7d2277cef09a..e3e06e0f7fc0 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -30,4 +30,7 @@ dev_archdata_get_node(const struct dev_archdata *ad)
 	return ad->of_node;
 }
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/sparc/include/asm/device.h b/arch/sparc/include/asm/device.h
index 3702e087df2c..f3b85b6b0b76 100644
--- a/arch/sparc/include/asm/device.h
+++ b/arch/sparc/include/asm/device.h
@@ -32,4 +32,7 @@ dev_archdata_get_node(const struct dev_archdata *ad)
 	return ad->prom_node;
 }
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_SPARC_DEVICE_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 4994a20acbcb..cee34e9ca45b 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -13,4 +13,7 @@ struct dma_map_ops *dma_ops;
 #endif
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_X86_DEVICE_H */
diff --git a/include/asm-generic/device.h b/include/asm-generic/device.h
index c17c9600f220..d7c76bba640d 100644
--- a/include/asm-generic/device.h
+++ b/include/asm-generic/device.h
@@ -9,4 +9,7 @@
 struct dev_archdata {
 };
 
+struct pdev_archdata {
+};
+
 #endif /* _ASM_GENERIC_DEVICE_H */
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 8dc5123b6305..672a69849735 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -22,6 +22,9 @@ struct platform_device {
 	struct resource	* resource;
 
 	struct platform_device_id	*id_entry;
+
+	/* arch specific additions */
+	struct pdev_archdata	archdata;
 };
 
 #define platform_get_device_id(pdev)	((pdev)->id_entry)
-- 
cgit v1.2.3


From 511647ff58fd0f1c1f415d2c757d841650edac91 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 8 Jul 2009 13:23:07 +0200
Subject: PM: Remove platform device suspend_late()/resume_early() V2

This is V2 of the platform driver power management late/early
callback removal patch. The callbacks ->suspend_late() and
->resume_early() are removed since all in-tree users now have
been migrated to dev_pm_ops.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c         | 36 ------------------------------------
 include/linux/platform_device.h |  2 --
 2 files changed, 38 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 81cb01bfc356..455e55971d0e 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -628,30 +628,6 @@ static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
 	return ret;
 }
 
-static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg)
-{
-	struct platform_driver *pdrv = to_platform_driver(dev->driver);
-	struct platform_device *pdev = to_platform_device(dev);
-	int ret = 0;
-
-	if (dev->driver && pdrv->suspend_late)
-		ret = pdrv->suspend_late(pdev, mesg);
-
-	return ret;
-}
-
-static int platform_legacy_resume_early(struct device *dev)
-{
-	struct platform_driver *pdrv = to_platform_driver(dev->driver);
-	struct platform_device *pdev = to_platform_device(dev);
-	int ret = 0;
-
-	if (dev->driver && pdrv->resume_early)
-		ret = pdrv->resume_early(pdev);
-
-	return ret;
-}
-
 static int platform_legacy_resume(struct device *dev)
 {
 	struct platform_driver *pdrv = to_platform_driver(dev->driver);
@@ -714,8 +690,6 @@ static int platform_pm_suspend_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->suspend_noirq)
 			ret = drv->pm->suspend_noirq(dev);
-	} else {
-		ret = platform_legacy_suspend_late(dev, PMSG_SUSPEND);
 	}
 
 	return ret;
@@ -750,8 +724,6 @@ static int platform_pm_resume_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->resume_noirq)
 			ret = drv->pm->resume_noirq(dev);
-	} else {
-		ret = platform_legacy_resume_early(dev);
 	}
 
 	return ret;
@@ -797,8 +769,6 @@ static int platform_pm_freeze_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->freeze_noirq)
 			ret = drv->pm->freeze_noirq(dev);
-	} else {
-		ret = platform_legacy_suspend_late(dev, PMSG_FREEZE);
 	}
 
 	return ret;
@@ -833,8 +803,6 @@ static int platform_pm_thaw_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->thaw_noirq)
 			ret = drv->pm->thaw_noirq(dev);
-	} else {
-		ret = platform_legacy_resume_early(dev);
 	}
 
 	return ret;
@@ -869,8 +837,6 @@ static int platform_pm_poweroff_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->poweroff_noirq)
 			ret = drv->pm->poweroff_noirq(dev);
-	} else {
-		ret = platform_legacy_suspend_late(dev, PMSG_HIBERNATE);
 	}
 
 	return ret;
@@ -905,8 +871,6 @@ static int platform_pm_restore_noirq(struct device *dev)
 	if (drv->pm) {
 		if (drv->pm->restore_noirq)
 			ret = drv->pm->restore_noirq(dev);
-	} else {
-		ret = platform_legacy_resume_early(dev);
 	}
 
 	return ret;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 672a69849735..3c6675c2444b 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -60,8 +60,6 @@ struct platform_driver {
 	int (*remove)(struct platform_device *);
 	void (*shutdown)(struct platform_device *);
 	int (*suspend)(struct platform_device *, pm_message_t state);
-	int (*suspend_late)(struct platform_device *, pm_message_t state);
-	int (*resume_early)(struct platform_device *);
 	int (*resume)(struct platform_device *);
 	struct device_driver driver;
 	struct platform_device_id *id_table;
-- 
cgit v1.2.3


From 1f38ad8389bbca038d320c29d30aa1d6ed96b48d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Jul 2009 11:48:18 +0800
Subject: crypto: sha512 - Export struct sha512_state

This patch renames struct sha512_ctx and exports it as struct
sha512_state so that other sha512 implementations can use it
as the reference structure for exporting their state.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 20 +++++++-------------
 include/crypto/sha.h    |  6 ++++++
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 3bea38d12242..4fe95eb03226 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -21,12 +21,6 @@
 #include <linux/percpu.h>
 #include <asm/byteorder.h>
 
-struct sha512_ctx {
-	u64 state[8];
-	u32 count[4];
-	u8 buf[128];
-};
-
 static DEFINE_PER_CPU(u64[80], msg_schedule);
 
 static inline u64 Ch(u64 x, u64 y, u64 z)
@@ -141,7 +135,7 @@ sha512_transform(u64 *state, const u8 *input)
 static int
 sha512_init(struct shash_desc *desc)
 {
-	struct sha512_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *sctx = shash_desc_ctx(desc);
 	sctx->state[0] = SHA512_H0;
 	sctx->state[1] = SHA512_H1;
 	sctx->state[2] = SHA512_H2;
@@ -158,7 +152,7 @@ sha512_init(struct shash_desc *desc)
 static int
 sha384_init(struct shash_desc *desc)
 {
-	struct sha512_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *sctx = shash_desc_ctx(desc);
 	sctx->state[0] = SHA384_H0;
 	sctx->state[1] = SHA384_H1;
 	sctx->state[2] = SHA384_H2;
@@ -175,7 +169,7 @@ sha384_init(struct shash_desc *desc)
 static int
 sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 {
-	struct sha512_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *sctx = shash_desc_ctx(desc);
 
 	unsigned int i, index, part_len;
 
@@ -214,7 +208,7 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 static int
 sha512_final(struct shash_desc *desc, u8 *hash)
 {
-	struct sha512_ctx *sctx = shash_desc_ctx(desc);
+	struct sha512_state *sctx = shash_desc_ctx(desc);
         static u8 padding[128] = { 0x80, };
 	__be64 *dst = (__be64 *)hash;
 	__be32 bits[4];
@@ -240,7 +234,7 @@ sha512_final(struct shash_desc *desc, u8 *hash)
 		dst[i] = cpu_to_be64(sctx->state[i]);
 
 	/* Zeroize sensitive information. */
-	memset(sctx, 0, sizeof(struct sha512_ctx));
+	memset(sctx, 0, sizeof(struct sha512_state));
 
 	return 0;
 }
@@ -262,7 +256,7 @@ static struct shash_alg sha512 = {
 	.init		=	sha512_init,
 	.update		=	sha512_update,
 	.final		=	sha512_final,
-	.descsize	=	sizeof(struct sha512_ctx),
+	.descsize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha512",
 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
@@ -276,7 +270,7 @@ static struct shash_alg sha384 = {
 	.init		=	sha384_init,
 	.update		=	sha512_update,
 	.final		=	sha384_final,
-	.descsize	=	sizeof(struct sha512_ctx),
+	.descsize	=	sizeof(struct sha512_state),
 	.base		=	{
 		.cra_name	=	"sha384",
 		.cra_flags	=	CRYPTO_ALG_TYPE_SHASH,
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 88ef5eb9514d..45b25ccf7cc6 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -76,4 +76,10 @@ struct sha256_state {
 	u8 buf[SHA256_BLOCK_SIZE];
 };
 
+struct sha512_state {
+	u64 state[8];
+	u32 count[4];
+	u8 buf[128];
+};
+
 #endif
-- 
cgit v1.2.3


From 13887ed6888dad1608eb9530ebd83b6ba29db577 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 22 Jul 2009 12:22:43 +0800
Subject: crypto: sha512_generic - Use 64-bit counters

This patch replaces the 32-bit counters in sha512_generic with
64-bit counters.  It also switches the bit count to the simpler
byte count.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/sha512_generic.c | 28 +++++++++++-----------------
 include/crypto/sha.h    |  6 +++---
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 4fe95eb03226..9ed9f60316e5 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -144,7 +144,7 @@ sha512_init(struct shash_desc *desc)
 	sctx->state[5] = SHA512_H5;
 	sctx->state[6] = SHA512_H6;
 	sctx->state[7] = SHA512_H7;
-	sctx->count[0] = sctx->count[1] = sctx->count[2] = sctx->count[3] = 0;
+	sctx->count[0] = sctx->count[1] = 0;
 
 	return 0;
 }
@@ -161,7 +161,7 @@ sha384_init(struct shash_desc *desc)
 	sctx->state[5] = SHA384_H5;
 	sctx->state[6] = SHA384_H6;
 	sctx->state[7] = SHA384_H7;
-        sctx->count[0] = sctx->count[1] = sctx->count[2] = sctx->count[3] = 0;
+	sctx->count[0] = sctx->count[1] = 0;
 
 	return 0;
 }
@@ -174,15 +174,11 @@ sha512_update(struct shash_desc *desc, const u8 *data, unsigned int len)
 	unsigned int i, index, part_len;
 
 	/* Compute number of bytes mod 128 */
-	index = (unsigned int)((sctx->count[0] >> 3) & 0x7F);
-
-	/* Update number of bits */
-	if ((sctx->count[0] += (len << 3)) < (len << 3)) {
-		if ((sctx->count[1] += 1) < 1)
-			if ((sctx->count[2] += 1) < 1)
-				sctx->count[3]++;
-		sctx->count[1] += (len >> 29);
-	}
+	index = sctx->count[0] & 0x7f;
+
+	/* Update number of bytes */
+	if (!(sctx->count[0] += len))
+		sctx->count[1]++;
 
         part_len = 128 - index;
 
@@ -211,18 +207,16 @@ sha512_final(struct shash_desc *desc, u8 *hash)
 	struct sha512_state *sctx = shash_desc_ctx(desc);
         static u8 padding[128] = { 0x80, };
 	__be64 *dst = (__be64 *)hash;
-	__be32 bits[4];
+	__be64 bits[2];
 	unsigned int index, pad_len;
 	int i;
 
 	/* Save number of bits */
-	bits[3] = cpu_to_be32(sctx->count[0]);
-	bits[2] = cpu_to_be32(sctx->count[1]);
-	bits[1] = cpu_to_be32(sctx->count[2]);
-	bits[0] = cpu_to_be32(sctx->count[3]);
+	bits[1] = cpu_to_be64(sctx->count[0] << 3);
+	bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
 
 	/* Pad out to 112 mod 128. */
-	index = (sctx->count[0] >> 3) & 0x7f;
+	index = sctx->count[0] & 0x7f;
 	pad_len = (index < 112) ? (112 - index) : ((128+112) - index);
 	sha512_update(desc, padding, pad_len);
 
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 45b25ccf7cc6..069e85ba97e1 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -77,9 +77,9 @@ struct sha256_state {
 };
 
 struct sha512_state {
-	u64 state[8];
-	u32 count[4];
-	u8 buf[128];
+	u64 count[2];
+	u64 state[SHA512_DIGEST_SIZE / 8];
+	u8 buf[SHA512_BLOCK_SIZE];
 };
 
 #endif
-- 
cgit v1.2.3


From 9ba5f005c994ad28e266a0cd14ef29354be382c9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 22 Jul 2009 14:18:35 +0200
Subject: softirq: introduce tasklet_hrtimer infrastructure

commit ca109491f (hrtimer: removing all ur callback modes) moved all
hrtimer callbacks into hard interrupt context when high resolution
timers are active. That breaks code which relied on the assumption
that the callback happens in softirq context.

Provide a generic infrastructure which combines tasklets and hrtimers
together to provide an in-softirq hrtimer experience.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: torvalds@linux-foundation.org
Cc: kaber@trash.net
Cc: David Miller <davem@davemloft.net>
LKML-Reference: <1248265724.27058.1366.camel@twins>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 26 +++++++++++++++++++
 kernel/softirq.c          | 64 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2721f07e9354..fd4c9c63c757 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -14,6 +14,7 @@
 #include <linux/irqflags.h>
 #include <linux/smp.h>
 #include <linux/percpu.h>
+#include <linux/hrtimer.h>
 
 #include <asm/atomic.h>
 #include <asm/ptrace.h>
@@ -517,6 +518,31 @@ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
 extern void tasklet_init(struct tasklet_struct *t,
 			 void (*func)(unsigned long), unsigned long data);
 
+struct tasklet_hrtimer {
+	struct hrtimer		timer;
+	struct tasklet_struct	tasklet;
+	enum hrtimer_restart	(*function)(struct hrtimer *);
+};
+
+extern void
+tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
+		     enum hrtimer_restart (*function)(struct hrtimer *),
+		     clockid_t which_clock, enum hrtimer_mode mode);
+
+static inline
+int tasklet_hrtimer_start(struct tasklet_hrtimer *ttimer, ktime_t time,
+			  const enum hrtimer_mode mode)
+{
+	return hrtimer_start(&ttimer->timer, time, mode);
+}
+
+static inline
+void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
+{
+	hrtimer_cancel(&ttimer->timer);
+	tasklet_kill(&ttimer->tasklet);
+}
+
 /*
  * Autoprobing for irqs:
  *
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3a94905fa5d2..eb5e131a0485 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -345,7 +345,9 @@ void open_softirq(int nr, void (*action)(struct softirq_action *))
 	softirq_vec[nr].action = action;
 }
 
-/* Tasklets */
+/*
+ * Tasklets
+ */
 struct tasklet_head
 {
 	struct tasklet_struct *head;
@@ -493,6 +495,66 @@ void tasklet_kill(struct tasklet_struct *t)
 
 EXPORT_SYMBOL(tasklet_kill);
 
+/*
+ * tasklet_hrtimer
+ */
+
+/*
+ * The trampoline is called when the hrtimer expires. If this is
+ * called from the hrtimer interrupt then we schedule the tasklet as
+ * the timer callback function expects to run in softirq context. If
+ * it's called in softirq context anyway (i.e. high resolution timers
+ * disabled) then the hrtimer callback is called right away.
+ */
+static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
+{
+	struct tasklet_hrtimer *ttimer =
+		container_of(timer, struct tasklet_hrtimer, timer);
+
+	if (hrtimer_is_hres_active(timer)) {
+		tasklet_hi_schedule(&ttimer->tasklet);
+		return HRTIMER_NORESTART;
+	}
+	return ttimer->function(timer);
+}
+
+/*
+ * Helper function which calls the hrtimer callback from
+ * tasklet/softirq context
+ */
+static void __tasklet_hrtimer_trampoline(unsigned long data)
+{
+	struct tasklet_hrtimer *ttimer = (void *)data;
+	enum hrtimer_restart restart;
+
+	restart = ttimer->function(&ttimer->timer);
+	if (restart != HRTIMER_NORESTART)
+		hrtimer_restart(&ttimer->timer);
+}
+
+/**
+ * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
+ * @ttimer:	 tasklet_hrtimer which is initialized
+ * @function:	 hrtimer callback funtion which gets called from softirq context
+ * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
+ * @mode:	 hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
+ */
+void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
+			  enum hrtimer_restart (*function)(struct hrtimer *),
+			  clockid_t which_clock, enum hrtimer_mode mode)
+{
+	hrtimer_init(&ttimer->timer, which_clock, mode);
+	ttimer->timer.function = __hrtimer_tasklet_trampoline;
+	tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
+		     (unsigned long)ttimer);
+	ttimer->function = function;
+}
+EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
+
+/*
+ * Remote softirq bits
+ */
+
 DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
 EXPORT_PER_CPU_SYMBOL(softirq_work_list);
 
-- 
cgit v1.2.3


From fbd90375d7531927d312766b548376d909811b4d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 22 Jul 2009 13:40:14 +0200
Subject: hrtimer: Remove cb_entry from struct hrtimer

It's unused, remove it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
---
 include/linux/hrtimer.h | 2 --
 kernel/hrtimer.c        | 1 -
 2 files changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 54648e625efd..40e7d54fc424 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -91,7 +91,6 @@ enum hrtimer_restart {
  * @function:	timer expiry callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  * @state:	state information (See bit values above)
- * @cb_entry:	list head to enqueue an expired timer into the callback list
  * @start_site:	timer statistics field to store the site where the timer
  *		was started
  * @start_comm: timer statistics field to store the name of the process which
@@ -108,7 +107,6 @@ struct hrtimer {
 	enum hrtimer_restart		(*function)(struct hrtimer *);
 	struct hrtimer_clock_base	*base;
 	unsigned long			state;
-	struct list_head		cb_entry;
 #ifdef CONFIG_TIMER_STATS
 	int				start_pid;
 	void				*start_site;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 43d151f185b6..052a0f53e4eb 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1092,7 +1092,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		clock_id = CLOCK_MONOTONIC;
 
 	timer->base = &cpu_base->clock_base[clock_id];
-	INIT_LIST_HEAD(&timer->cb_entry);
 	hrtimer_init_timer_hres(timer);
 
 #ifdef CONFIG_TIMER_STATS
-- 
cgit v1.2.3


From 7f453c24b95a085fc7bd35d53b33abc4dc5a048b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 21 Jul 2009 13:19:40 +0200
Subject: perf_counter: PERF_SAMPLE_ID and inherited counters

Anton noted that for inherited counters the counter-id as provided by
PERF_SAMPLE_ID isn't mappable to the id found through PERF_RECORD_ID
because each inherited counter gets its own id.

His suggestion was to always return the parent counter id, since that
is the primary counter id as exposed. However, these inherited
counters have a unique identifier so that events like
PERF_EVENT_PERIOD and PERF_EVENT_THROTTLE can be specific about which
counter gets modified, which is important when trying to normalize the
sample streams.

This patch removes PERF_EVENT_PERIOD in favour of PERF_SAMPLE_PERIOD,
which is more useful anyway, since changing periods became a lot more
common than initially thought -- rendering PERF_EVENT_PERIOD the less
useful solution (also, PERF_SAMPLE_PERIOD reports the more accurate
value, since it reports the value used to trigger the overflow,
whereas PERF_EVENT_PERIOD simply reports the requested period changed,
which might only take effect on the next cycle).

This still leaves us PERF_EVENT_THROTTLE to consider, but since that
_should_ be a rare occurrence, and linking it to a primary id is the
most useful bit to diagnose the problem, we introduce a
PERF_SAMPLE_STREAM_ID, for those few cases where the full
reconstruction is important.

[Does change the ABI a little, but I see no other way out]

Suggested-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1248095846.15751.8781.camel@twins>
---
 include/linux/perf_counter.h  | 15 ++-----
 kernel/perf_counter.c         | 92 +++++++++++++++----------------------------
 tools/perf/builtin-annotate.c | 24 -----------
 tools/perf/builtin-report.c   | 24 -----------
 4 files changed, 35 insertions(+), 120 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5e970c7d3fd5..bd15d7a5f5ce 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -120,8 +120,9 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_ID				= 1U << 6,
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 
-	PERF_SAMPLE_MAX = 1U << 9,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 10,		/* non-ABI */
 };
 
 /*
@@ -312,16 +313,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u64				time;
 	 *	u64				id;
-	 *	u64				sample_period;
-	 * };
-	 */
-	PERF_EVENT_PERIOD		= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
+	 *	u64				stream_id;
 	 * };
 	 */
 	PERF_EVENT_THROTTLE		= 5,
@@ -356,6 +348,7 @@ enum perf_event_type {
 	 *	{ u64			time;     } && PERF_SAMPLE_TIME
 	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
 	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e1d6a3aa1333..7530588fa5c5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -154,6 +154,20 @@ static void unclone_ctx(struct perf_counter_context *ctx)
 	}
 }
 
+/*
+ * If we inherit counters we want to return the parent counter id
+ * to userspace.
+ */
+static u64 primary_counter_id(struct perf_counter *counter)
+{
+	u64 id = counter->id;
+
+	if (counter->parent)
+		id = counter->parent->id;
+
+	return id;
+}
+
 /*
  * Get the perf_counter_context for a task and lock it.
  * This has to cope with with the fact that until it is locked,
@@ -1296,7 +1310,6 @@ static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
 #define MAX_INTERRUPTS (~0ULL)
 
 static void perf_log_throttle(struct perf_counter *counter, int enable);
-static void perf_log_period(struct perf_counter *counter, u64 period);
 
 static void perf_adjust_period(struct perf_counter *counter, u64 events)
 {
@@ -1315,8 +1328,6 @@ static void perf_adjust_period(struct perf_counter *counter, u64 events)
 	if (!sample_period)
 		sample_period = 1;
 
-	perf_log_period(counter, sample_period);
-
 	hwc->sample_period = sample_period;
 }
 
@@ -1705,7 +1716,7 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 		values[n++] = counter->total_time_running +
 			atomic64_read(&counter->child_total_time_running);
 	if (counter->attr.read_format & PERF_FORMAT_ID)
-		values[n++] = counter->id;
+		values[n++] = primary_counter_id(counter);
 	mutex_unlock(&counter->child_mutex);
 
 	if (count < n * sizeof(u64))
@@ -1812,8 +1823,6 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
 
 		counter->attr.sample_freq = value;
 	} else {
-		perf_log_period(counter, value);
-
 		counter->attr.sample_period = value;
 		counter->hw.sample_period = value;
 	}
@@ -2662,6 +2671,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 	if (sample_type & PERF_SAMPLE_ID)
 		header.size += sizeof(u64);
 
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		header.size += sizeof(u64);
+
 	if (sample_type & PERF_SAMPLE_CPU) {
 		header.size += sizeof(cpu_entry);
 
@@ -2705,7 +2717,13 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 	if (sample_type & PERF_SAMPLE_ADDR)
 		perf_output_put(&handle, data->addr);
 
-	if (sample_type & PERF_SAMPLE_ID)
+	if (sample_type & PERF_SAMPLE_ID) {
+		u64 id = primary_counter_id(counter);
+
+		perf_output_put(&handle, id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
 		perf_output_put(&handle, counter->id);
 
 	if (sample_type & PERF_SAMPLE_CPU)
@@ -2728,7 +2746,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 			if (sub != counter)
 				sub->pmu->read(sub);
 
-			group_entry.id = sub->id;
+			group_entry.id = primary_counter_id(sub);
 			group_entry.counter = atomic64_read(&sub->count);
 
 			perf_output_put(&handle, group_entry);
@@ -2788,15 +2806,8 @@ perf_counter_read_event(struct perf_counter *counter,
 	}
 
 	if (counter->attr.read_format & PERF_FORMAT_ID) {
-		u64 id;
-
 		event.header.size += sizeof(u64);
-		if (counter->parent)
-			id = counter->parent->id;
-		else
-			id = counter->id;
-
-		event.format[i++] = id;
+		event.format[i++] = primary_counter_id(counter);
 	}
 
 	ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
@@ -3190,49 +3201,6 @@ void __perf_counter_mmap(struct vm_area_struct *vma)
 	perf_counter_mmap_event(&mmap_event);
 }
 
-/*
- * Log sample_period changes so that analyzing tools can re-normalize the
- * event flow.
- */
-
-struct freq_event {
-	struct perf_event_header	header;
-	u64				time;
-	u64				id;
-	u64				period;
-};
-
-static void perf_log_period(struct perf_counter *counter, u64 period)
-{
-	struct perf_output_handle handle;
-	struct freq_event event;
-	int ret;
-
-	if (counter->hw.sample_period == period)
-		return;
-
-	if (counter->attr.sample_type & PERF_SAMPLE_PERIOD)
-		return;
-
-	event = (struct freq_event) {
-		.header = {
-			.type = PERF_EVENT_PERIOD,
-			.misc = 0,
-			.size = sizeof(event),
-		},
-		.time = sched_clock(),
-		.id = counter->id,
-		.period = period,
-	};
-
-	ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, event);
-	perf_output_end(&handle);
-}
-
 /*
  * IRQ throttle logging
  */
@@ -3246,14 +3214,16 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
 		struct perf_event_header	header;
 		u64				time;
 		u64				id;
+		u64				stream_id;
 	} throttle_event = {
 		.header = {
 			.type = PERF_EVENT_THROTTLE + 1,
 			.misc = 0,
 			.size = sizeof(throttle_event),
 		},
-		.time	= sched_clock(),
-		.id	= counter->id,
+		.time		= sched_clock(),
+		.id		= primary_counter_id(counter),
+		.stream_id	= counter->id,
 	};
 
 	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5f9eefecc574..1dba568e1941 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -74,20 +74,12 @@ struct fork_event {
 	u32 pid, ppid;
 };
 
-struct period_event {
-	struct perf_event_header header;
-	u64 time;
-	u64 id;
-	u64 sample_period;
-};
-
 typedef union event_union {
 	struct perf_event_header	header;
 	struct ip_event			ip;
 	struct mmap_event		mmap;
 	struct comm_event		comm;
 	struct fork_event		fork;
-	struct period_event		period;
 } event_t;
 
 
@@ -997,19 +989,6 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 	return 0;
 }
 
-static int
-process_period_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->period.time,
-		event->period.id,
-		event->period.sample_period);
-
-	return 0;
-}
-
 static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
@@ -1025,9 +1004,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 	case PERF_EVENT_FORK:
 		return process_fork_event(event, offset, head);
-
-	case PERF_EVENT_PERIOD:
-		return process_period_event(event, offset, head);
 	/*
 	 * We dont process them right now but they are fine:
 	 */
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index a118bc77286d..b20a4b6e31b7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -101,13 +101,6 @@ struct fork_event {
 	u32 pid, ppid;
 };
 
-struct period_event {
-	struct perf_event_header header;
-	u64 time;
-	u64 id;
-	u64 sample_period;
-};
-
 struct lost_event {
 	struct perf_event_header header;
 	u64 id;
@@ -127,7 +120,6 @@ typedef union event_union {
 	struct mmap_event		mmap;
 	struct comm_event		comm;
 	struct fork_event		fork;
-	struct period_event		period;
 	struct lost_event		lost;
 	struct read_event		read;
 } event_t;
@@ -1635,19 +1627,6 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 	return 0;
 }
 
-static int
-process_period_event(event_t *event, unsigned long offset, unsigned long head)
-{
-	dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n",
-		(void *)(offset + head),
-		(void *)(long)(event->header.size),
-		event->period.time,
-		event->period.id,
-		event->period.sample_period);
-
-	return 0;
-}
-
 static int
 process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 {
@@ -1729,9 +1708,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	case PERF_EVENT_FORK:
 		return process_fork_event(event, offset, head);
 
-	case PERF_EVENT_PERIOD:
-		return process_period_event(event, offset, head);
-
 	case PERF_EVENT_LOST:
 		return process_lost_event(event, offset, head);
 
-- 
cgit v1.2.3


From 24c30dbbcdda9aeccb23b4eecb6bb8e538742ea4 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 16 Jul 2009 21:31:31 +0000
Subject: of/mdio: Add support function for Ethernet fixed-link property

Fixed-link support is broken for the ucc_eth, gianfar, and fs_enet
device drivers.  The "OF MDIO rework" patches removed most of the
support. Instead of re-adding fixed-link stuff to the drivers, this
patch adds a support function for parsing the fixed-link property
and obtaining a dummy phy to match.

Note: the dummy phy handling in arch/powerpc is a bit of a hack and
needs to be reworked.  This function is being added now to solve the
regression in the Ethernet drivers, but it should be considered a
temporary measure until the fixed link handling can be reworked.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_mdio.h |  3 +++
 2 files changed, 45 insertions(+)

(limited to 'include')

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index aee967d7f760..bacaa536fd51 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -9,6 +9,10 @@
  * out of the OpenFirmware device tree and using it to populate an mii_bus.
  */
 
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/err.h>
 #include <linux/phy.h>
 #include <linux/of.h>
 #include <linux/of_mdio.h>
@@ -137,3 +141,41 @@ struct phy_device *of_phy_connect(struct net_device *dev,
 	return phy_connect_direct(dev, phy, hndlr, flags, iface) ? NULL : phy;
 }
 EXPORT_SYMBOL(of_phy_connect);
+
+/**
+ * of_phy_connect_fixed_link - Parse fixed-link property and return a dummy phy
+ * @dev: pointer to net_device claiming the phy
+ * @hndlr: Link state callback for the network device
+ * @iface: PHY data interface type
+ *
+ * This function is a temporary stop-gap and will be removed soon.  It is
+ * only to support the fs_enet, ucc_geth and gianfar Ethernet drivers.  Do
+ * not call this function from new drivers.
+ */
+struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
+					     void (*hndlr)(struct net_device *),
+					     phy_interface_t iface)
+{
+	struct device_node *net_np;
+	char bus_id[MII_BUS_ID_SIZE + 3];
+	struct phy_device *phy;
+	const u32 *phy_id;
+	int sz;
+
+	if (!dev->dev.parent)
+		return NULL;
+
+	net_np = dev_archdata_get_node(&dev->dev.parent->archdata);
+	if (!net_np)
+		return NULL;
+
+	phy_id = of_get_property(net_np, "fixed-link", &sz);
+	if (!phy_id || sz < sizeof(*phy_id))
+		return NULL;
+
+	sprintf(bus_id, PHY_ID_FMT, "0", phy_id[0]);
+
+	phy = phy_connect(dev, bus_id, hndlr, 0, iface);
+	return IS_ERR(phy) ? NULL : phy;
+}
+EXPORT_SYMBOL(of_phy_connect_fixed_link);
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index c9663c690303..53b94e025c7c 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -18,5 +18,8 @@ extern struct phy_device *of_phy_connect(struct net_device *dev,
 					 struct device_node *phy_np,
 					 void (*hndlr)(struct net_device *),
 					 u32 flags, phy_interface_t iface);
+extern struct phy_device *of_phy_connect_fixed_link(struct net_device *dev,
+					 void (*hndlr)(struct net_device *),
+					 phy_interface_t iface);
 
 #endif /* __LINUX_OF_MDIO_H */
-- 
cgit v1.2.3


From cf4f1e76c49dacfde0680b170b9a9b6a42f296bb Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 22 Jul 2009 14:32:03 +0000
Subject: usb: move r8a66597 register defines

Move r8a66597 hardware register definitions from the host
controller header file to the platform data header file.

With this change in place we can easily share register
definitions between the host controller driver and a future
gadget driver.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/usb/host/r8a66597.h  | 366 ------------------------------------------
 include/linux/usb/r8a66597.h | 372 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 370 insertions(+), 368 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/host/r8a66597.h b/drivers/usb/host/r8a66597.h
index eecbd917bc81..228e3fb23854 100644
--- a/drivers/usb/host/r8a66597.h
+++ b/drivers/usb/host/r8a66597.h
@@ -32,372 +32,6 @@
 
 #include <linux/usb/r8a66597.h>
 
-#define SYSCFG0		0x00
-#define SYSCFG1		0x02
-#define SYSSTS0		0x04
-#define SYSSTS1		0x06
-#define DVSTCTR0	0x08
-#define DVSTCTR1	0x0A
-#define TESTMODE	0x0C
-#define PINCFG		0x0E
-#define DMA0CFG		0x10
-#define DMA1CFG		0x12
-#define CFIFO		0x14
-#define D0FIFO		0x18
-#define D1FIFO		0x1C
-#define CFIFOSEL	0x20
-#define CFIFOCTR	0x22
-#define CFIFOSIE	0x24
-#define D0FIFOSEL	0x28
-#define D0FIFOCTR	0x2A
-#define D1FIFOSEL	0x2C
-#define D1FIFOCTR	0x2E
-#define INTENB0		0x30
-#define INTENB1		0x32
-#define INTENB2		0x34
-#define BRDYENB		0x36
-#define NRDYENB		0x38
-#define BEMPENB		0x3A
-#define SOFCFG		0x3C
-#define INTSTS0		0x40
-#define INTSTS1		0x42
-#define INTSTS2		0x44
-#define BRDYSTS		0x46
-#define NRDYSTS		0x48
-#define BEMPSTS		0x4A
-#define FRMNUM		0x4C
-#define UFRMNUM		0x4E
-#define USBADDR		0x50
-#define USBREQ		0x54
-#define USBVAL		0x56
-#define USBINDX		0x58
-#define USBLENG		0x5A
-#define DCPCFG		0x5C
-#define DCPMAXP		0x5E
-#define DCPCTR		0x60
-#define PIPESEL		0x64
-#define PIPECFG		0x68
-#define PIPEBUF		0x6A
-#define PIPEMAXP	0x6C
-#define PIPEPERI	0x6E
-#define PIPE1CTR	0x70
-#define PIPE2CTR	0x72
-#define PIPE3CTR	0x74
-#define PIPE4CTR	0x76
-#define PIPE5CTR	0x78
-#define PIPE6CTR	0x7A
-#define PIPE7CTR	0x7C
-#define PIPE8CTR	0x7E
-#define PIPE9CTR	0x80
-#define PIPE1TRE	0x90
-#define PIPE1TRN	0x92
-#define PIPE2TRE	0x94
-#define PIPE2TRN	0x96
-#define PIPE3TRE	0x98
-#define PIPE3TRN	0x9A
-#define PIPE4TRE	0x9C
-#define	PIPE4TRN	0x9E
-#define	PIPE5TRE	0xA0
-#define	PIPE5TRN	0xA2
-#define DEVADD0		0xD0
-#define DEVADD1		0xD2
-#define DEVADD2		0xD4
-#define DEVADD3		0xD6
-#define DEVADD4		0xD8
-#define DEVADD5		0xDA
-#define DEVADD6		0xDC
-#define DEVADD7		0xDE
-#define DEVADD8		0xE0
-#define DEVADD9		0xE2
-#define DEVADDA		0xE4
-
-/* System Configuration Control Register */
-#define	XTAL		0xC000	/* b15-14: Crystal selection */
-#define	  XTAL48	 0x8000	  /* 48MHz */
-#define	  XTAL24	 0x4000	  /* 24MHz */
-#define	  XTAL12	 0x0000	  /* 12MHz */
-#define	XCKE		0x2000	/* b13: External clock enable */
-#define	PLLC		0x0800	/* b11: PLL control */
-#define	SCKE		0x0400	/* b10: USB clock enable */
-#define	PCSDIS		0x0200	/* b9: not CS wakeup */
-#define	LPSME		0x0100	/* b8: Low power sleep mode */
-#define	HSE		0x0080	/* b7: Hi-speed enable */
-#define	DCFM		0x0040	/* b6: Controller function select  */
-#define	DRPD		0x0020	/* b5: D+/- pull down control */
-#define	DPRPU		0x0010	/* b4: D+ pull up control */
-#define	USBE		0x0001	/* b0: USB module operation enable */
-
-/* System Configuration Status Register */
-#define	OVCBIT		0x8000	/* b15-14: Over-current bit */
-#define	OVCMON		0xC000	/* b15-14: Over-current monitor */
-#define	SOFEA		0x0020	/* b5: SOF monitor */
-#define	IDMON		0x0004	/* b3: ID-pin monitor */
-#define	LNST		0x0003	/* b1-0: D+, D- line status */
-#define	  SE1		 0x0003	  /* SE1 */
-#define	  FS_KSTS	 0x0002	  /* Full-Speed K State */
-#define	  FS_JSTS	 0x0001	  /* Full-Speed J State */
-#define	  LS_JSTS	 0x0002	  /* Low-Speed J State */
-#define	  LS_KSTS	 0x0001	  /* Low-Speed K State */
-#define	  SE0		 0x0000	  /* SE0 */
-
-/* Device State Control Register */
-#define	EXTLP0		0x0400	/* b10: External port */
-#define	VBOUT		0x0200	/* b9: VBUS output */
-#define	WKUP		0x0100	/* b8: Remote wakeup */
-#define	RWUPE		0x0080	/* b7: Remote wakeup sense */
-#define	USBRST		0x0040	/* b6: USB reset enable */
-#define	RESUME		0x0020	/* b5: Resume enable */
-#define	UACT		0x0010	/* b4: USB bus enable */
-#define	RHST		0x0007	/* b1-0: Reset handshake status */
-#define	  HSPROC	 0x0004	  /* HS handshake is processing */
-#define	  HSMODE	 0x0003	  /* Hi-Speed mode */
-#define	  FSMODE	 0x0002	  /* Full-Speed mode */
-#define	  LSMODE	 0x0001	  /* Low-Speed mode */
-#define	  UNDECID	 0x0000	  /* Undecided */
-
-/* Test Mode Register */
-#define	UTST			0x000F	/* b3-0: Test select */
-#define	  H_TST_PACKET		 0x000C	  /* HOST TEST Packet */
-#define	  H_TST_SE0_NAK		 0x000B	  /* HOST TEST SE0 NAK */
-#define	  H_TST_K		 0x000A	  /* HOST TEST K */
-#define	  H_TST_J		 0x0009	  /* HOST TEST J */
-#define	  H_TST_NORMAL		 0x0000	  /* HOST Normal Mode */
-#define	  P_TST_PACKET		 0x0004	  /* PERI TEST Packet */
-#define	  P_TST_SE0_NAK		 0x0003	  /* PERI TEST SE0 NAK */
-#define	  P_TST_K		 0x0002	  /* PERI TEST K */
-#define	  P_TST_J		 0x0001	  /* PERI TEST J */
-#define	  P_TST_NORMAL		 0x0000	  /* PERI Normal Mode */
-
-/* Data Pin Configuration Register */
-#define	LDRV			0x8000	/* b15: Drive Current Adjust */
-#define	  VIF1			  0x0000		/* VIF = 1.8V */
-#define	  VIF3			  0x8000		/* VIF = 3.3V */
-#define	INTA			0x0001	/* b1: USB INT-pin active */
-
-/* DMAx Pin Configuration Register */
-#define	DREQA			0x4000	/* b14: Dreq active select */
-#define	BURST			0x2000	/* b13: Burst mode */
-#define	DACKA			0x0400	/* b10: Dack active select */
-#define	DFORM			0x0380	/* b9-7: DMA mode select */
-#define	  CPU_ADR_RD_WR		 0x0000	  /* Address + RD/WR mode (CPU bus) */
-#define	  CPU_DACK_RD_WR	 0x0100	  /* DACK + RD/WR mode (CPU bus) */
-#define	  CPU_DACK_ONLY		 0x0180	  /* DACK only mode (CPU bus) */
-#define	  SPLIT_DACK_ONLY	 0x0200	  /* DACK only mode (SPLIT bus) */
-#define	DENDA			0x0040	/* b6: Dend active select */
-#define	PKTM			0x0020	/* b5: Packet mode */
-#define	DENDE			0x0010	/* b4: Dend enable */
-#define	OBUS			0x0004	/* b2: OUTbus mode */
-
-/* CFIFO/DxFIFO Port Select Register */
-#define	RCNT		0x8000	/* b15: Read count mode */
-#define	REW		0x4000	/* b14: Buffer rewind */
-#define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
-#define	DREQE		0x1000	/* b12: DREQ output enable */
-#define	  MBW_8		 0x0000	  /*  8bit */
-#define	  MBW_16	 0x0400	  /* 16bit */
-#define	  MBW_32	 0x0800   /* 32bit */
-#define	BIGEND		0x0100	/* b8: Big endian mode */
-#define	  BYTE_LITTLE	 0x0000		/* little dendian */
-#define	  BYTE_BIG	 0x0100		/* big endifan */
-#define	ISEL		0x0020	/* b5: DCP FIFO port direction select */
-#define	CURPIPE		0x000F	/* b2-0: PIPE select */
-
-/* CFIFO/DxFIFO Port Control Register */
-#define	BVAL		0x8000	/* b15: Buffer valid flag */
-#define	BCLR		0x4000	/* b14: Buffer clear */
-#define	FRDY		0x2000	/* b13: FIFO ready */
-#define	DTLN		0x0FFF	/* b11-0: FIFO received data length */
-
-/* Interrupt Enable Register 0 */
-#define	VBSE	0x8000	/* b15: VBUS interrupt */
-#define	RSME	0x4000	/* b14: Resume interrupt */
-#define	SOFE	0x2000	/* b13: Frame update interrupt */
-#define	DVSE	0x1000	/* b12: Device state transition interrupt */
-#define	CTRE	0x0800	/* b11: Control transfer stage transition interrupt */
-#define	BEMPE	0x0400	/* b10: Buffer empty interrupt */
-#define	NRDYE	0x0200	/* b9: Buffer not ready interrupt */
-#define	BRDYE	0x0100	/* b8: Buffer ready interrupt */
-
-/* Interrupt Enable Register 1 */
-#define	OVRCRE		0x8000	/* b15: Over-current interrupt */
-#define	BCHGE		0x4000	/* b14: USB us chenge interrupt */
-#define	DTCHE		0x1000	/* b12: Detach sense interrupt */
-#define	ATTCHE		0x0800	/* b11: Attach sense interrupt */
-#define	EOFERRE		0x0040	/* b6: EOF error interrupt */
-#define	SIGNE		0x0020	/* b5: SETUP IGNORE interrupt */
-#define	SACKE		0x0010	/* b4: SETUP ACK interrupt */
-
-/* BRDY Interrupt Enable/Status Register */
-#define	BRDY9		0x0200	/* b9: PIPE9 */
-#define	BRDY8		0x0100	/* b8: PIPE8 */
-#define	BRDY7		0x0080	/* b7: PIPE7 */
-#define	BRDY6		0x0040	/* b6: PIPE6 */
-#define	BRDY5		0x0020	/* b5: PIPE5 */
-#define	BRDY4		0x0010	/* b4: PIPE4 */
-#define	BRDY3		0x0008	/* b3: PIPE3 */
-#define	BRDY2		0x0004	/* b2: PIPE2 */
-#define	BRDY1		0x0002	/* b1: PIPE1 */
-#define	BRDY0		0x0001	/* b1: PIPE0 */
-
-/* NRDY Interrupt Enable/Status Register */
-#define	NRDY9		0x0200	/* b9: PIPE9 */
-#define	NRDY8		0x0100	/* b8: PIPE8 */
-#define	NRDY7		0x0080	/* b7: PIPE7 */
-#define	NRDY6		0x0040	/* b6: PIPE6 */
-#define	NRDY5		0x0020	/* b5: PIPE5 */
-#define	NRDY4		0x0010	/* b4: PIPE4 */
-#define	NRDY3		0x0008	/* b3: PIPE3 */
-#define	NRDY2		0x0004	/* b2: PIPE2 */
-#define	NRDY1		0x0002	/* b1: PIPE1 */
-#define	NRDY0		0x0001	/* b1: PIPE0 */
-
-/* BEMP Interrupt Enable/Status Register */
-#define	BEMP9		0x0200	/* b9: PIPE9 */
-#define	BEMP8		0x0100	/* b8: PIPE8 */
-#define	BEMP7		0x0080	/* b7: PIPE7 */
-#define	BEMP6		0x0040	/* b6: PIPE6 */
-#define	BEMP5		0x0020	/* b5: PIPE5 */
-#define	BEMP4		0x0010	/* b4: PIPE4 */
-#define	BEMP3		0x0008	/* b3: PIPE3 */
-#define	BEMP2		0x0004	/* b2: PIPE2 */
-#define	BEMP1		0x0002	/* b1: PIPE1 */
-#define	BEMP0		0x0001	/* b0: PIPE0 */
-
-/* SOF Pin Configuration Register */
-#define	TRNENSEL	0x0100	/* b8: Select transaction enable period */
-#define	BRDYM		0x0040	/* b6: BRDY clear timing */
-#define	INTL		0x0020	/* b5: Interrupt sense select */
-#define	EDGESTS		0x0010	/* b4:  */
-#define	SOFMODE		0x000C	/* b3-2: SOF pin select */
-#define	  SOF_125US	 0x0008	  /* SOF OUT 125us Frame Signal */
-#define	  SOF_1MS	 0x0004	  /* SOF OUT 1ms Frame Signal */
-#define	  SOF_DISABLE	 0x0000	  /* SOF OUT Disable */
-
-/* Interrupt Status Register 0 */
-#define	VBINT	0x8000	/* b15: VBUS interrupt */
-#define	RESM	0x4000	/* b14: Resume interrupt */
-#define	SOFR	0x2000	/* b13: SOF frame update interrupt */
-#define	DVST	0x1000	/* b12: Device state transition interrupt */
-#define	CTRT	0x0800	/* b11: Control transfer stage transition interrupt */
-#define	BEMP	0x0400	/* b10: Buffer empty interrupt */
-#define	NRDY	0x0200	/* b9: Buffer not ready interrupt */
-#define	BRDY	0x0100	/* b8: Buffer ready interrupt */
-#define	VBSTS	0x0080	/* b7: VBUS input port */
-#define	DVSQ	0x0070	/* b6-4: Device state */
-#define	  DS_SPD_CNFG	 0x0070	  /* Suspend Configured */
-#define	  DS_SPD_ADDR	 0x0060	  /* Suspend Address */
-#define	  DS_SPD_DFLT	 0x0050	  /* Suspend Default */
-#define	  DS_SPD_POWR	 0x0040	  /* Suspend Powered */
-#define	  DS_SUSP	 0x0040	  /* Suspend */
-#define	  DS_CNFG	 0x0030	  /* Configured */
-#define	  DS_ADDS	 0x0020	  /* Address */
-#define	  DS_DFLT	 0x0010	  /* Default */
-#define	  DS_POWR	 0x0000	  /* Powered */
-#define	DVSQS		0x0030	/* b5-4: Device state */
-#define	VALID		0x0008	/* b3: Setup packet detected flag */
-#define	CTSQ		0x0007	/* b2-0: Control transfer stage */
-#define	  CS_SQER	 0x0006	  /* Sequence error */
-#define	  CS_WRND	 0x0005	  /* Control write nodata status stage */
-#define	  CS_WRSS	 0x0004	  /* Control write status stage */
-#define	  CS_WRDS	 0x0003	  /* Control write data stage */
-#define	  CS_RDSS	 0x0002	  /* Control read status stage */
-#define	  CS_RDDS	 0x0001	  /* Control read data stage */
-#define	  CS_IDST	 0x0000	  /* Idle or setup stage */
-
-/* Interrupt Status Register 1 */
-#define	OVRCR		0x8000	/* b15: Over-current interrupt */
-#define	BCHG		0x4000	/* b14: USB bus chenge interrupt */
-#define	DTCH		0x1000	/* b12: Detach sense interrupt */
-#define	ATTCH		0x0800	/* b11: Attach sense interrupt */
-#define	EOFERR		0x0040	/* b6: EOF-error interrupt */
-#define	SIGN		0x0020	/* b5: Setup ignore interrupt */
-#define	SACK		0x0010	/* b4: Setup acknowledge interrupt */
-
-/* Frame Number Register */
-#define	OVRN		0x8000	/* b15: Overrun error */
-#define	CRCE		0x4000	/* b14: Received data error */
-#define	FRNM		0x07FF	/* b10-0: Frame number */
-
-/* Micro Frame Number Register */
-#define	UFRNM		0x0007	/* b2-0: Micro frame number */
-
-/* Default Control Pipe Maxpacket Size Register */
-/* Pipe Maxpacket Size Register */
-#define	DEVSEL	0xF000	/* b15-14: Device address select */
-#define	MAXP	0x007F	/* b6-0: Maxpacket size of default control pipe */
-
-/* Default Control Pipe Control Register */
-#define	BSTS		0x8000	/* b15: Buffer status */
-#define	SUREQ		0x4000	/* b14: Send USB request  */
-#define	CSCLR		0x2000	/* b13: complete-split status clear */
-#define	CSSTS		0x1000	/* b12: complete-split status */
-#define	SUREQCLR	0x0800	/* b11: stop setup request */
-#define	SQCLR		0x0100	/* b8: Sequence toggle bit clear */
-#define	SQSET		0x0080	/* b7: Sequence toggle bit set */
-#define	SQMON		0x0040	/* b6: Sequence toggle bit monitor */
-#define	PBUSY		0x0020	/* b5: pipe busy */
-#define	PINGE		0x0010	/* b4: ping enable */
-#define	CCPL		0x0004	/* b2: Enable control transfer complete */
-#define	PID		0x0003	/* b1-0: Response PID */
-#define	  PID_STALL11	 0x0003	  /* STALL */
-#define	  PID_STALL	 0x0002	  /* STALL */
-#define	  PID_BUF	 0x0001	  /* BUF */
-#define	  PID_NAK	 0x0000	  /* NAK */
-
-/* Pipe Window Select Register */
-#define	PIPENM		0x0007	/* b2-0: Pipe select */
-
-/* Pipe Configuration Register */
-#define	R8A66597_TYP	0xC000	/* b15-14: Transfer type */
-#define	  R8A66597_ISO	 0xC000		  /* Isochronous */
-#define	  R8A66597_INT	 0x8000		  /* Interrupt */
-#define	  R8A66597_BULK	 0x4000		  /* Bulk */
-#define	R8A66597_BFRE	0x0400	/* b10: Buffer ready interrupt mode select */
-#define	R8A66597_DBLB	0x0200	/* b9: Double buffer mode select */
-#define	R8A66597_CNTMD	0x0100	/* b8: Continuous transfer mode select */
-#define	R8A66597_SHTNAK	0x0080	/* b7: Transfer end NAK */
-#define	R8A66597_DIR	0x0010	/* b4: Transfer direction select */
-#define	R8A66597_EPNUM	0x000F	/* b3-0: Eendpoint number select */
-
-/* Pipe Buffer Configuration Register */
-#define	BUFSIZE		0x7C00	/* b14-10: Pipe buffer size */
-#define	BUFNMB		0x007F	/* b6-0: Pipe buffer number */
-#define	PIPE0BUF	256
-#define	PIPExBUF	64
-
-/* Pipe Maxpacket Size Register */
-#define	MXPS		0x07FF	/* b10-0: Maxpacket size */
-
-/* Pipe Cycle Configuration Register */
-#define	IFIS	0x1000	/* b12: Isochronous in-buffer flush mode select */
-#define	IITV	0x0007	/* b2-0: Isochronous interval */
-
-/* Pipex Control Register */
-#define	BSTS	0x8000	/* b15: Buffer status */
-#define	INBUFM	0x4000	/* b14: IN buffer monitor (Only for PIPE1 to 5) */
-#define	CSCLR	0x2000	/* b13: complete-split status clear */
-#define	CSSTS	0x1000	/* b12: complete-split status */
-#define	ATREPM	0x0400	/* b10: Auto repeat mode */
-#define	ACLRM	0x0200	/* b9: Out buffer auto clear mode */
-#define	SQCLR	0x0100	/* b8: Sequence toggle bit clear */
-#define	SQSET	0x0080	/* b7: Sequence toggle bit set */
-#define	SQMON	0x0040	/* b6: Sequence toggle bit monitor */
-#define	PBUSY	0x0020	/* b5: pipe busy */
-#define	PID	0x0003	/* b1-0: Response PID */
-
-/* PIPExTRE */
-#define	TRENB		0x0200	/* b9: Transaction counter enable */
-#define	TRCLR		0x0100	/* b8: Transaction counter clear */
-
-/* PIPExTRN */
-#define	TRNCNT		0xFFFF	/* b15-0: Transaction counter */
-
-/* DEVADDx */
-#define	UPPHUB		0x7800
-#define	HUBPORT		0x0700
-#define	USBSPD		0x00C0
-#define	RTPORT		0x0001
-
 #define R8A66597_MAX_NUM_PIPE		10
 #define R8A66597_BUF_BSIZE		8
 #define R8A66597_MAX_DEVICE		10
diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h
index 460ee3f6a2c6..26d216734057 100644
--- a/include/linux/usb/r8a66597.h
+++ b/include/linux/usb/r8a66597.h
@@ -28,7 +28,7 @@
 #define R8A66597_PLATDATA_XTAL_48MHZ	0x03
 
 struct r8a66597_platdata {
-	/* This ops can controll port power instead of DVSTCTR register. */
+	/* This callback can control port power instead of DVSTCTR register. */
 	void (*port_power)(int port, int power);
 
 	/* set one = on chip controller, set zero = external controller */
@@ -43,5 +43,373 @@ struct r8a66597_platdata {
 	/* set one = big endian, set zero = little endian */
 	unsigned	endian:1;
 };
-#endif
+
+/* Register definitions */
+#define SYSCFG0		0x00
+#define SYSCFG1		0x02
+#define SYSSTS0		0x04
+#define SYSSTS1		0x06
+#define DVSTCTR0	0x08
+#define DVSTCTR1	0x0A
+#define TESTMODE	0x0C
+#define PINCFG		0x0E
+#define DMA0CFG		0x10
+#define DMA1CFG		0x12
+#define CFIFO		0x14
+#define D0FIFO		0x18
+#define D1FIFO		0x1C
+#define CFIFOSEL	0x20
+#define CFIFOCTR	0x22
+#define CFIFOSIE	0x24
+#define D0FIFOSEL	0x28
+#define D0FIFOCTR	0x2A
+#define D1FIFOSEL	0x2C
+#define D1FIFOCTR	0x2E
+#define INTENB0		0x30
+#define INTENB1		0x32
+#define INTENB2		0x34
+#define BRDYENB		0x36
+#define NRDYENB		0x38
+#define BEMPENB		0x3A
+#define SOFCFG		0x3C
+#define INTSTS0		0x40
+#define INTSTS1		0x42
+#define INTSTS2		0x44
+#define BRDYSTS		0x46
+#define NRDYSTS		0x48
+#define BEMPSTS		0x4A
+#define FRMNUM		0x4C
+#define UFRMNUM		0x4E
+#define USBADDR		0x50
+#define USBREQ		0x54
+#define USBVAL		0x56
+#define USBINDX		0x58
+#define USBLENG		0x5A
+#define DCPCFG		0x5C
+#define DCPMAXP		0x5E
+#define DCPCTR		0x60
+#define PIPESEL		0x64
+#define PIPECFG		0x68
+#define PIPEBUF		0x6A
+#define PIPEMAXP	0x6C
+#define PIPEPERI	0x6E
+#define PIPE1CTR	0x70
+#define PIPE2CTR	0x72
+#define PIPE3CTR	0x74
+#define PIPE4CTR	0x76
+#define PIPE5CTR	0x78
+#define PIPE6CTR	0x7A
+#define PIPE7CTR	0x7C
+#define PIPE8CTR	0x7E
+#define PIPE9CTR	0x80
+#define PIPE1TRE	0x90
+#define PIPE1TRN	0x92
+#define PIPE2TRE	0x94
+#define PIPE2TRN	0x96
+#define PIPE3TRE	0x98
+#define PIPE3TRN	0x9A
+#define PIPE4TRE	0x9C
+#define	PIPE4TRN	0x9E
+#define	PIPE5TRE	0xA0
+#define	PIPE5TRN	0xA2
+#define DEVADD0		0xD0
+#define DEVADD1		0xD2
+#define DEVADD2		0xD4
+#define DEVADD3		0xD6
+#define DEVADD4		0xD8
+#define DEVADD5		0xDA
+#define DEVADD6		0xDC
+#define DEVADD7		0xDE
+#define DEVADD8		0xE0
+#define DEVADD9		0xE2
+#define DEVADDA		0xE4
+
+/* System Configuration Control Register */
+#define	XTAL		0xC000	/* b15-14: Crystal selection */
+#define	  XTAL48	 0x8000	  /* 48MHz */
+#define	  XTAL24	 0x4000	  /* 24MHz */
+#define	  XTAL12	 0x0000	  /* 12MHz */
+#define	XCKE		0x2000	/* b13: External clock enable */
+#define	PLLC		0x0800	/* b11: PLL control */
+#define	SCKE		0x0400	/* b10: USB clock enable */
+#define	PCSDIS		0x0200	/* b9: not CS wakeup */
+#define	LPSME		0x0100	/* b8: Low power sleep mode */
+#define	HSE		0x0080	/* b7: Hi-speed enable */
+#define	DCFM		0x0040	/* b6: Controller function select  */
+#define	DRPD		0x0020	/* b5: D+/- pull down control */
+#define	DPRPU		0x0010	/* b4: D+ pull up control */
+#define	USBE		0x0001	/* b0: USB module operation enable */
+
+/* System Configuration Status Register */
+#define	OVCBIT		0x8000	/* b15-14: Over-current bit */
+#define	OVCMON		0xC000	/* b15-14: Over-current monitor */
+#define	SOFEA		0x0020	/* b5: SOF monitor */
+#define	IDMON		0x0004	/* b3: ID-pin monitor */
+#define	LNST		0x0003	/* b1-0: D+, D- line status */
+#define	  SE1		 0x0003	  /* SE1 */
+#define	  FS_KSTS	 0x0002	  /* Full-Speed K State */
+#define	  FS_JSTS	 0x0001	  /* Full-Speed J State */
+#define	  LS_JSTS	 0x0002	  /* Low-Speed J State */
+#define	  LS_KSTS	 0x0001	  /* Low-Speed K State */
+#define	  SE0		 0x0000	  /* SE0 */
+
+/* Device State Control Register */
+#define	EXTLP0		0x0400	/* b10: External port */
+#define	VBOUT		0x0200	/* b9: VBUS output */
+#define	WKUP		0x0100	/* b8: Remote wakeup */
+#define	RWUPE		0x0080	/* b7: Remote wakeup sense */
+#define	USBRST		0x0040	/* b6: USB reset enable */
+#define	RESUME		0x0020	/* b5: Resume enable */
+#define	UACT		0x0010	/* b4: USB bus enable */
+#define	RHST		0x0007	/* b1-0: Reset handshake status */
+#define	  HSPROC	 0x0004	  /* HS handshake is processing */
+#define	  HSMODE	 0x0003	  /* Hi-Speed mode */
+#define	  FSMODE	 0x0002	  /* Full-Speed mode */
+#define	  LSMODE	 0x0001	  /* Low-Speed mode */
+#define	  UNDECID	 0x0000	  /* Undecided */
+
+/* Test Mode Register */
+#define	UTST			0x000F	/* b3-0: Test select */
+#define	  H_TST_PACKET		 0x000C	  /* HOST TEST Packet */
+#define	  H_TST_SE0_NAK		 0x000B	  /* HOST TEST SE0 NAK */
+#define	  H_TST_K		 0x000A	  /* HOST TEST K */
+#define	  H_TST_J		 0x0009	  /* HOST TEST J */
+#define	  H_TST_NORMAL		 0x0000	  /* HOST Normal Mode */
+#define	  P_TST_PACKET		 0x0004	  /* PERI TEST Packet */
+#define	  P_TST_SE0_NAK		 0x0003	  /* PERI TEST SE0 NAK */
+#define	  P_TST_K		 0x0002	  /* PERI TEST K */
+#define	  P_TST_J		 0x0001	  /* PERI TEST J */
+#define	  P_TST_NORMAL		 0x0000	  /* PERI Normal Mode */
+
+/* Data Pin Configuration Register */
+#define	LDRV			0x8000	/* b15: Drive Current Adjust */
+#define	  VIF1			  0x0000		/* VIF = 1.8V */
+#define	  VIF3			  0x8000		/* VIF = 3.3V */
+#define	INTA			0x0001	/* b1: USB INT-pin active */
+
+/* DMAx Pin Configuration Register */
+#define	DREQA			0x4000	/* b14: Dreq active select */
+#define	BURST			0x2000	/* b13: Burst mode */
+#define	DACKA			0x0400	/* b10: Dack active select */
+#define	DFORM			0x0380	/* b9-7: DMA mode select */
+#define	  CPU_ADR_RD_WR		 0x0000	  /* Address + RD/WR mode (CPU bus) */
+#define	  CPU_DACK_RD_WR	 0x0100	  /* DACK + RD/WR mode (CPU bus) */
+#define	  CPU_DACK_ONLY		 0x0180	  /* DACK only mode (CPU bus) */
+#define	  SPLIT_DACK_ONLY	 0x0200	  /* DACK only mode (SPLIT bus) */
+#define	DENDA			0x0040	/* b6: Dend active select */
+#define	PKTM			0x0020	/* b5: Packet mode */
+#define	DENDE			0x0010	/* b4: Dend enable */
+#define	OBUS			0x0004	/* b2: OUTbus mode */
+
+/* CFIFO/DxFIFO Port Select Register */
+#define	RCNT		0x8000	/* b15: Read count mode */
+#define	REW		0x4000	/* b14: Buffer rewind */
+#define	DCLRM		0x2000	/* b13: DMA buffer clear mode */
+#define	DREQE		0x1000	/* b12: DREQ output enable */
+#define	  MBW_8		 0x0000	  /*  8bit */
+#define	  MBW_16	 0x0400	  /* 16bit */
+#define	  MBW_32	 0x0800   /* 32bit */
+#define	BIGEND		0x0100	/* b8: Big endian mode */
+#define	  BYTE_LITTLE	 0x0000		/* little dendian */
+#define	  BYTE_BIG	 0x0100		/* big endifan */
+#define	ISEL		0x0020	/* b5: DCP FIFO port direction select */
+#define	CURPIPE		0x000F	/* b2-0: PIPE select */
+
+/* CFIFO/DxFIFO Port Control Register */
+#define	BVAL		0x8000	/* b15: Buffer valid flag */
+#define	BCLR		0x4000	/* b14: Buffer clear */
+#define	FRDY		0x2000	/* b13: FIFO ready */
+#define	DTLN		0x0FFF	/* b11-0: FIFO received data length */
+
+/* Interrupt Enable Register 0 */
+#define	VBSE	0x8000	/* b15: VBUS interrupt */
+#define	RSME	0x4000	/* b14: Resume interrupt */
+#define	SOFE	0x2000	/* b13: Frame update interrupt */
+#define	DVSE	0x1000	/* b12: Device state transition interrupt */
+#define	CTRE	0x0800	/* b11: Control transfer stage transition interrupt */
+#define	BEMPE	0x0400	/* b10: Buffer empty interrupt */
+#define	NRDYE	0x0200	/* b9: Buffer not ready interrupt */
+#define	BRDYE	0x0100	/* b8: Buffer ready interrupt */
+
+/* Interrupt Enable Register 1 */
+#define	OVRCRE		0x8000	/* b15: Over-current interrupt */
+#define	BCHGE		0x4000	/* b14: USB us chenge interrupt */
+#define	DTCHE		0x1000	/* b12: Detach sense interrupt */
+#define	ATTCHE		0x0800	/* b11: Attach sense interrupt */
+#define	EOFERRE		0x0040	/* b6: EOF error interrupt */
+#define	SIGNE		0x0020	/* b5: SETUP IGNORE interrupt */
+#define	SACKE		0x0010	/* b4: SETUP ACK interrupt */
+
+/* BRDY Interrupt Enable/Status Register */
+#define	BRDY9		0x0200	/* b9: PIPE9 */
+#define	BRDY8		0x0100	/* b8: PIPE8 */
+#define	BRDY7		0x0080	/* b7: PIPE7 */
+#define	BRDY6		0x0040	/* b6: PIPE6 */
+#define	BRDY5		0x0020	/* b5: PIPE5 */
+#define	BRDY4		0x0010	/* b4: PIPE4 */
+#define	BRDY3		0x0008	/* b3: PIPE3 */
+#define	BRDY2		0x0004	/* b2: PIPE2 */
+#define	BRDY1		0x0002	/* b1: PIPE1 */
+#define	BRDY0		0x0001	/* b1: PIPE0 */
+
+/* NRDY Interrupt Enable/Status Register */
+#define	NRDY9		0x0200	/* b9: PIPE9 */
+#define	NRDY8		0x0100	/* b8: PIPE8 */
+#define	NRDY7		0x0080	/* b7: PIPE7 */
+#define	NRDY6		0x0040	/* b6: PIPE6 */
+#define	NRDY5		0x0020	/* b5: PIPE5 */
+#define	NRDY4		0x0010	/* b4: PIPE4 */
+#define	NRDY3		0x0008	/* b3: PIPE3 */
+#define	NRDY2		0x0004	/* b2: PIPE2 */
+#define	NRDY1		0x0002	/* b1: PIPE1 */
+#define	NRDY0		0x0001	/* b1: PIPE0 */
+
+/* BEMP Interrupt Enable/Status Register */
+#define	BEMP9		0x0200	/* b9: PIPE9 */
+#define	BEMP8		0x0100	/* b8: PIPE8 */
+#define	BEMP7		0x0080	/* b7: PIPE7 */
+#define	BEMP6		0x0040	/* b6: PIPE6 */
+#define	BEMP5		0x0020	/* b5: PIPE5 */
+#define	BEMP4		0x0010	/* b4: PIPE4 */
+#define	BEMP3		0x0008	/* b3: PIPE3 */
+#define	BEMP2		0x0004	/* b2: PIPE2 */
+#define	BEMP1		0x0002	/* b1: PIPE1 */
+#define	BEMP0		0x0001	/* b0: PIPE0 */
+
+/* SOF Pin Configuration Register */
+#define	TRNENSEL	0x0100	/* b8: Select transaction enable period */
+#define	BRDYM		0x0040	/* b6: BRDY clear timing */
+#define	INTL		0x0020	/* b5: Interrupt sense select */
+#define	EDGESTS		0x0010	/* b4:  */
+#define	SOFMODE		0x000C	/* b3-2: SOF pin select */
+#define	  SOF_125US	 0x0008	  /* SOF OUT 125us Frame Signal */
+#define	  SOF_1MS	 0x0004	  /* SOF OUT 1ms Frame Signal */
+#define	  SOF_DISABLE	 0x0000	  /* SOF OUT Disable */
+
+/* Interrupt Status Register 0 */
+#define	VBINT	0x8000	/* b15: VBUS interrupt */
+#define	RESM	0x4000	/* b14: Resume interrupt */
+#define	SOFR	0x2000	/* b13: SOF frame update interrupt */
+#define	DVST	0x1000	/* b12: Device state transition interrupt */
+#define	CTRT	0x0800	/* b11: Control transfer stage transition interrupt */
+#define	BEMP	0x0400	/* b10: Buffer empty interrupt */
+#define	NRDY	0x0200	/* b9: Buffer not ready interrupt */
+#define	BRDY	0x0100	/* b8: Buffer ready interrupt */
+#define	VBSTS	0x0080	/* b7: VBUS input port */
+#define	DVSQ	0x0070	/* b6-4: Device state */
+#define	  DS_SPD_CNFG	 0x0070	  /* Suspend Configured */
+#define	  DS_SPD_ADDR	 0x0060	  /* Suspend Address */
+#define	  DS_SPD_DFLT	 0x0050	  /* Suspend Default */
+#define	  DS_SPD_POWR	 0x0040	  /* Suspend Powered */
+#define	  DS_SUSP	 0x0040	  /* Suspend */
+#define	  DS_CNFG	 0x0030	  /* Configured */
+#define	  DS_ADDS	 0x0020	  /* Address */
+#define	  DS_DFLT	 0x0010	  /* Default */
+#define	  DS_POWR	 0x0000	  /* Powered */
+#define	DVSQS		0x0030	/* b5-4: Device state */
+#define	VALID		0x0008	/* b3: Setup packet detected flag */
+#define	CTSQ		0x0007	/* b2-0: Control transfer stage */
+#define	  CS_SQER	 0x0006	  /* Sequence error */
+#define	  CS_WRND	 0x0005	  /* Control write nodata status stage */
+#define	  CS_WRSS	 0x0004	  /* Control write status stage */
+#define	  CS_WRDS	 0x0003	  /* Control write data stage */
+#define	  CS_RDSS	 0x0002	  /* Control read status stage */
+#define	  CS_RDDS	 0x0001	  /* Control read data stage */
+#define	  CS_IDST	 0x0000	  /* Idle or setup stage */
+
+/* Interrupt Status Register 1 */
+#define	OVRCR		0x8000	/* b15: Over-current interrupt */
+#define	BCHG		0x4000	/* b14: USB bus chenge interrupt */
+#define	DTCH		0x1000	/* b12: Detach sense interrupt */
+#define	ATTCH		0x0800	/* b11: Attach sense interrupt */
+#define	EOFERR		0x0040	/* b6: EOF-error interrupt */
+#define	SIGN		0x0020	/* b5: Setup ignore interrupt */
+#define	SACK		0x0010	/* b4: Setup acknowledge interrupt */
+
+/* Frame Number Register */
+#define	OVRN		0x8000	/* b15: Overrun error */
+#define	CRCE		0x4000	/* b14: Received data error */
+#define	FRNM		0x07FF	/* b10-0: Frame number */
+
+/* Micro Frame Number Register */
+#define	UFRNM		0x0007	/* b2-0: Micro frame number */
+
+/* Default Control Pipe Maxpacket Size Register */
+/* Pipe Maxpacket Size Register */
+#define	DEVSEL	0xF000	/* b15-14: Device address select */
+#define	MAXP	0x007F	/* b6-0: Maxpacket size of default control pipe */
+
+/* Default Control Pipe Control Register */
+#define	BSTS		0x8000	/* b15: Buffer status */
+#define	SUREQ		0x4000	/* b14: Send USB request  */
+#define	CSCLR		0x2000	/* b13: complete-split status clear */
+#define	CSSTS		0x1000	/* b12: complete-split status */
+#define	SUREQCLR	0x0800	/* b11: stop setup request */
+#define	SQCLR		0x0100	/* b8: Sequence toggle bit clear */
+#define	SQSET		0x0080	/* b7: Sequence toggle bit set */
+#define	SQMON		0x0040	/* b6: Sequence toggle bit monitor */
+#define	PBUSY		0x0020	/* b5: pipe busy */
+#define	PINGE		0x0010	/* b4: ping enable */
+#define	CCPL		0x0004	/* b2: Enable control transfer complete */
+#define	PID		0x0003	/* b1-0: Response PID */
+#define	  PID_STALL11	 0x0003	  /* STALL */
+#define	  PID_STALL	 0x0002	  /* STALL */
+#define	  PID_BUF	 0x0001	  /* BUF */
+#define	  PID_NAK	 0x0000	  /* NAK */
+
+/* Pipe Window Select Register */
+#define	PIPENM		0x0007	/* b2-0: Pipe select */
+
+/* Pipe Configuration Register */
+#define	R8A66597_TYP	0xC000	/* b15-14: Transfer type */
+#define	  R8A66597_ISO	 0xC000		  /* Isochronous */
+#define	  R8A66597_INT	 0x8000		  /* Interrupt */
+#define	  R8A66597_BULK	 0x4000		  /* Bulk */
+#define	R8A66597_BFRE	0x0400	/* b10: Buffer ready interrupt mode select */
+#define	R8A66597_DBLB	0x0200	/* b9: Double buffer mode select */
+#define	R8A66597_CNTMD	0x0100	/* b8: Continuous transfer mode select */
+#define	R8A66597_SHTNAK	0x0080	/* b7: Transfer end NAK */
+#define	R8A66597_DIR	0x0010	/* b4: Transfer direction select */
+#define	R8A66597_EPNUM	0x000F	/* b3-0: Eendpoint number select */
+
+/* Pipe Buffer Configuration Register */
+#define	BUFSIZE		0x7C00	/* b14-10: Pipe buffer size */
+#define	BUFNMB		0x007F	/* b6-0: Pipe buffer number */
+#define	PIPE0BUF	256
+#define	PIPExBUF	64
+
+/* Pipe Maxpacket Size Register */
+#define	MXPS		0x07FF	/* b10-0: Maxpacket size */
+
+/* Pipe Cycle Configuration Register */
+#define	IFIS	0x1000	/* b12: Isochronous in-buffer flush mode select */
+#define	IITV	0x0007	/* b2-0: Isochronous interval */
+
+/* Pipex Control Register */
+#define	BSTS	0x8000	/* b15: Buffer status */
+#define	INBUFM	0x4000	/* b14: IN buffer monitor (Only for PIPE1 to 5) */
+#define	CSCLR	0x2000	/* b13: complete-split status clear */
+#define	CSSTS	0x1000	/* b12: complete-split status */
+#define	ATREPM	0x0400	/* b10: Auto repeat mode */
+#define	ACLRM	0x0200	/* b9: Out buffer auto clear mode */
+#define	SQCLR	0x0100	/* b8: Sequence toggle bit clear */
+#define	SQSET	0x0080	/* b7: Sequence toggle bit set */
+#define	SQMON	0x0040	/* b6: Sequence toggle bit monitor */
+#define	PBUSY	0x0020	/* b5: pipe busy */
+#define	PID	0x0003	/* b1-0: Response PID */
+
+/* PIPExTRE */
+#define	TRENB		0x0200	/* b9: Transaction counter enable */
+#define	TRCLR		0x0100	/* b8: Transaction counter clear */
+
+/* PIPExTRN */
+#define	TRNCNT		0xFFFF	/* b15-0: Transaction counter */
+
+/* DEVADDx */
+#define	UPPHUB		0x7800
+#define	HUBPORT		0x0700
+#define	USBSPD		0x00C0
+#define	RTPORT		0x0001
+
+#endif /* __LINUX_USB_R8A66597_H */
 
-- 
cgit v1.2.3


From 2c59b0b70b9d5d61c726f179724660c4c2423f31 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Wed, 22 Jul 2009 14:41:35 +0000
Subject: usb: m66592-udc platform data on_chip support

Convert the m66592-udc driver to use the on_chip flag
from platform data to enable on chip behaviour instead
of relying on CONFIG_SUPERH_BUILT_IN_M66592 ugliness.

This makes the code cleaner and also allows us to support
both external and internal m66592 with the same kernel.

It also makes the Kconfig part more future proof since
we with this patch can add support for new processors
with on-chip m66592 without modifying the Kconfig.

The patch adds a m66592 header file for platform data
and ties in platform data to the existing m66592 devices.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/mach-highlander/setup.c |   7 +
 arch/sh/boards/mach-x3proto/setup.c    |   7 +
 arch/sh/kernel/cpu/sh4a/setup-sh7722.c |   8 +-
 drivers/usb/gadget/Kconfig             |  10 --
 drivers/usb/gadget/m66592-udc.c        | 252 +++++++++++++++++++--------------
 drivers/usb/gadget/m66592-udc.h        |  89 ++++++------
 include/linux/usb/m66592.h             |  44 ++++++
 7 files changed, 257 insertions(+), 160 deletions(-)
 create mode 100644 include/linux/usb/m66592.h

(limited to 'include')

diff --git a/arch/sh/boards/mach-highlander/setup.c b/arch/sh/boards/mach-highlander/setup.c
index 1639f8915000..566e69d8d729 100644
--- a/arch/sh/boards/mach-highlander/setup.c
+++ b/arch/sh/boards/mach-highlander/setup.c
@@ -22,6 +22,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/usb/m66592.h>
 #include <net/ax88796.h>
 #include <asm/machvec.h>
 #include <mach/highlander.h>
@@ -60,6 +61,11 @@ static struct platform_device r8a66597_usb_host_device = {
 	.resource	= r8a66597_usb_host_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.xtal = M66592_PLATDATA_XTAL_24MHZ,
+	.vif = 1,
+};
+
 static struct resource m66592_usb_peripheral_resources[] = {
 	[0] = {
 		.name	= "m66592_udc",
@@ -81,6 +87,7 @@ static struct platform_device m66592_usb_peripheral_device = {
 	.dev = {
 		.dma_mask		= NULL,		/* don't use dma */
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(m66592_usb_peripheral_resources),
 	.resource	= m66592_usb_peripheral_resources,
diff --git a/arch/sh/boards/mach-x3proto/setup.c b/arch/sh/boards/mach-x3proto/setup.c
index 8913ae39a802..efe4cb9f8a77 100644
--- a/arch/sh/boards/mach-x3proto/setup.c
+++ b/arch/sh/boards/mach-x3proto/setup.c
@@ -17,6 +17,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 #include <linux/usb/r8a66597.h>
+#include <linux/usb/m66592.h>
 #include <asm/ilsel.h>
 
 static struct resource heartbeat_resources[] = {
@@ -89,6 +90,11 @@ static struct platform_device r8a66597_usb_host_device = {
 	.resource	= r8a66597_usb_host_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.xtal = M66592_PLATDATA_XTAL_24MHZ,
+	.vif = 1,
+};
+
 static struct resource m66592_usb_peripheral_resources[] = {
 	[0] = {
 		.name	= "m66592_udc",
@@ -109,6 +115,7 @@ static struct platform_device m66592_usb_peripheral_device = {
 	.dev = {
 		.dma_mask		= NULL,		/* don't use dma */
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(m66592_usb_peripheral_resources),
 	.resource	= m66592_usb_peripheral_resources,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ea524a2da3e4..0bad14a44238 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -13,6 +13,7 @@
 #include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <linux/uio_driver.h>
+#include <linux/usb/m66592.h>
 #include <linux/sh_timer.h>
 #include <asm/clock.h>
 #include <asm/mmzone.h>
@@ -47,9 +48,13 @@ static struct platform_device rtc_device = {
 	.resource	= rtc_resources,
 };
 
+static struct m66592_platdata usbf_platdata = {
+	.on_chip = 1,
+};
+
 static struct resource usbf_resources[] = {
 	[0] = {
-		.name	= "m66592_udc",
+		.name	= "USBF",
 		.start	= 0x04480000,
 		.end	= 0x044800FF,
 		.flags	= IORESOURCE_MEM,
@@ -67,6 +72,7 @@ static struct platform_device usbf_device = {
 	.dev = {
 		.dma_mask		= NULL,
 		.coherent_dma_mask	= 0xffffffff,
+		.platform_data		= &usbf_platdata,
 	},
 	.num_resources	= ARRAY_SIZE(usbf_resources),
 	.resource	= usbf_resources,
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 7f8e83a954ac..b7f10bc25c2c 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -360,16 +360,6 @@ config USB_M66592
 	default USB_GADGET
 	select USB_GADGET_SELECTED
 
-config SUPERH_BUILT_IN_M66592
-	boolean "Enable SuperH built-in USB like the M66592"
-	depends on USB_GADGET_M66592 && CPU_SUBTYPE_SH7722
-	help
-	   SH7722 has USB like the M66592.
-
-	   The transfer rate is very slow when use "Ethernet Gadget".
-	   However, this problem is improved if change a value of
-	   NET_IP_ALIGN to 4.
-
 #
 # Controllers available only in discrete form (and all PCI controllers)
 #
diff --git a/drivers/usb/gadget/m66592-udc.c b/drivers/usb/gadget/m66592-udc.c
index 0dddd2f8ff35..a61c70caff12 100644
--- a/drivers/usb/gadget/m66592-udc.c
+++ b/drivers/usb/gadget/m66592-udc.c
@@ -31,38 +31,12 @@
 
 #include "m66592-udc.h"
 
-
 MODULE_DESCRIPTION("M66592 USB gadget driver");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yoshihiro Shimoda");
 MODULE_ALIAS("platform:m66592_udc");
 
-#define DRIVER_VERSION	"26 Jun 2009"
-
-/* module parameters */
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-static unsigned short endian = M66592_LITTLE;
-module_param(endian, ushort, 0644);
-MODULE_PARM_DESC(endian, "data endian: big=0, little=0 (default=0)");
-#else
-static unsigned short clock = M66592_XTAL24;
-module_param(clock, ushort, 0644);
-MODULE_PARM_DESC(clock, "input clock: 48MHz=32768, 24MHz=16384, 12MHz=0 "
-		"(default=16384)");
-
-static unsigned short vif = M66592_LDRV;
-module_param(vif, ushort, 0644);
-MODULE_PARM_DESC(vif, "input VIF: 3.3V=32768, 1.5V=0 (default=32768)");
-
-static unsigned short endian;
-module_param(endian, ushort, 0644);
-MODULE_PARM_DESC(endian, "data endian: big=256, little=0 (default=0)");
-
-static unsigned short irq_sense = M66592_INTL;
-module_param(irq_sense, ushort, 0644);
-MODULE_PARM_DESC(irq_sense, "IRQ sense: low level=2, falling edge=0 "
-		"(default=2)");
-#endif
+#define DRIVER_VERSION	"21 July 2009"
 
 static const char udc_name[] = "m66592_udc";
 static const char *m66592_ep_name[] = {
@@ -244,6 +218,7 @@ static inline int get_buffer_size(struct m66592 *m66592, u16 pipenum)
 static inline void pipe_change(struct m66592 *m66592, u16 pipenum)
 {
 	struct m66592_ep *ep = m66592->pipenum2ep[pipenum];
+	unsigned short mbw;
 
 	if (ep->use_dma)
 		return;
@@ -252,7 +227,12 @@ static inline void pipe_change(struct m66592 *m66592, u16 pipenum)
 
 	ndelay(450);
 
-	m66592_bset(m66592, M66592_MBW, ep->fifosel);
+	if (m66592->pdata->on_chip)
+		mbw = M66592_MBW_32;
+	else
+		mbw = M66592_MBW_16;
+
+	m66592_bset(m66592, mbw, ep->fifosel);
 }
 
 static int pipe_buffer_setting(struct m66592 *m66592,
@@ -332,6 +312,7 @@ static void pipe_buffer_release(struct m66592 *m66592,
 static void pipe_initialize(struct m66592_ep *ep)
 {
 	struct m66592 *m66592 = ep->m66592;
+	unsigned short mbw;
 
 	m66592_mdfy(m66592, 0, M66592_CURPIPE, ep->fifosel);
 
@@ -343,7 +324,12 @@ static void pipe_initialize(struct m66592_ep *ep)
 
 		ndelay(450);
 
-		m66592_bset(m66592, M66592_MBW, ep->fifosel);
+		if (m66592->pdata->on_chip)
+			mbw = M66592_MBW_32;
+		else
+			mbw = M66592_MBW_16;
+
+		m66592_bset(m66592, mbw, ep->fifosel);
 	}
 }
 
@@ -359,15 +345,13 @@ static void m66592_ep_setting(struct m66592 *m66592, struct m66592_ep *ep,
 			ep->fifosel = M66592_D0FIFOSEL;
 			ep->fifoctr = M66592_D0FIFOCTR;
 			ep->fifotrn = M66592_D0FIFOTRN;
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
-		} else if (m66592->num_dma == 1) {
+		} else if (!m66592->pdata->on_chip && m66592->num_dma == 1) {
 			m66592->num_dma++;
 			ep->use_dma = 1;
 			ep->fifoaddr = M66592_D1FIFO;
 			ep->fifosel = M66592_D1FIFOSEL;
 			ep->fifoctr = M66592_D1FIFOCTR;
 			ep->fifotrn = M66592_D1FIFOTRN;
-#endif
 		} else {
 			ep->use_dma = 0;
 			ep->fifoaddr = M66592_CFIFO;
@@ -612,76 +596,120 @@ static void start_ep0(struct m66592_ep *ep, struct m66592_request *req)
 	}
 }
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
 static void init_controller(struct m66592 *m66592)
 {
-	m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);		/* High spd */
-	m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
-	m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
-	m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
+	unsigned int endian;
 
-	/* This is a workaound for SH7722 2nd cut */
-	m66592_bset(m66592, 0x8000, M66592_DVSTCTR);
-	m66592_bset(m66592, 0x1000, M66592_TESTMODE);
-	m66592_bclr(m66592, 0x8000, M66592_DVSTCTR);
+	if (m66592->pdata->on_chip) {
+		if (m66592->pdata->endian)
+			endian = 0; /* big endian */
+		else
+			endian = M66592_LITTLE; /* little endian */
 
-	m66592_bset(m66592, M66592_INTL, M66592_INTENB1);
+		m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);	/* High spd */
+		m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
 
-	m66592_write(m66592, 0, M66592_CFBCFG);
-	m66592_write(m66592, 0, M66592_D0FBCFG);
-	m66592_bset(m66592, endian, M66592_CFBCFG);
-	m66592_bset(m66592, endian, M66592_D0FBCFG);
-}
-#else	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
-static void init_controller(struct m66592 *m66592)
-{
-	m66592_bset(m66592, (vif & M66592_LDRV) | (endian & M66592_BIGEND),
-			M66592_PINCFG);
-	m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);		/* High spd */
-	m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL, M66592_SYSCFG);
+		/* This is a workaound for SH7722 2nd cut */
+		m66592_bset(m66592, 0x8000, M66592_DVSTCTR);
+		m66592_bset(m66592, 0x1000, M66592_TESTMODE);
+		m66592_bclr(m66592, 0x8000, M66592_DVSTCTR);
 
-	m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
-	m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
-	m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_INTL, M66592_INTENB1);
+
+		m66592_write(m66592, 0, M66592_CFBCFG);
+		m66592_write(m66592, 0, M66592_D0FBCFG);
+		m66592_bset(m66592, endian, M66592_CFBCFG);
+		m66592_bset(m66592, endian, M66592_D0FBCFG);
+	} else {
+		unsigned int clock, vif, irq_sense;
+
+		if (m66592->pdata->endian)
+			endian = M66592_BIGEND; /* big endian */
+		else
+			endian = 0; /* little endian */
+
+		if (m66592->pdata->vif)
+			vif = M66592_LDRV; /* 3.3v */
+		else
+			vif = 0; /* 1.5v */
+
+		switch (m66592->pdata->xtal) {
+		case M66592_PLATDATA_XTAL_12MHZ:
+			clock = M66592_XTAL12;
+			break;
+		case M66592_PLATDATA_XTAL_24MHZ:
+			clock = M66592_XTAL24;
+			break;
+		case M66592_PLATDATA_XTAL_48MHZ:
+			clock = M66592_XTAL48;
+			break;
+		default:
+			pr_warning("m66592-udc: xtal configuration error\n");
+			clock = 0;
+		}
 
-	m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+		switch (m66592->irq_trigger) {
+		case IRQF_TRIGGER_LOW:
+			irq_sense = M66592_INTL;
+			break;
+		case IRQF_TRIGGER_FALLING:
+			irq_sense = 0;
+			break;
+		default:
+			pr_warning("m66592-udc: irq trigger config error\n");
+			irq_sense = 0;
+		}
 
-	msleep(3);
+		m66592_bset(m66592,
+			    (vif & M66592_LDRV) | (endian & M66592_BIGEND),
+			    M66592_PINCFG);
+		m66592_bset(m66592, M66592_HSE, M66592_SYSCFG);	/* High spd */
+		m66592_mdfy(m66592, clock & M66592_XTAL, M66592_XTAL,
+			    M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_USBE, M66592_SYSCFG);
+		m66592_bclr(m66592, M66592_DPRPU, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_USBE, M66592_SYSCFG);
 
-	m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG);
+		m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+
+		msleep(3);
 
-	msleep(1);
+		m66592_bset(m66592, M66592_RCKE | M66592_PLLC, M66592_SYSCFG);
 
-	m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG);
+		msleep(1);
 
-	m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1);
-	m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR,
-			M66592_DMA0CFG);
+		m66592_bset(m66592, M66592_SCKE, M66592_SYSCFG);
+
+		m66592_bset(m66592, irq_sense & M66592_INTL, M66592_INTENB1);
+		m66592_write(m66592, M66592_BURST | M66592_CPU_ADR_RD_WR,
+			     M66592_DMA0CFG);
+	}
 }
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 
 static void disable_controller(struct m66592 *m66592)
 {
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG);
-	udelay(1);
-	m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG);
-#endif
+	if (!m66592->pdata->on_chip) {
+		m66592_bclr(m66592, M66592_SCKE, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_PLLC, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_RCKE, M66592_SYSCFG);
+		udelay(1);
+		m66592_bclr(m66592, M66592_XCKE, M66592_SYSCFG);
+	}
 }
 
 static void m66592_start_xclock(struct m66592 *m66592)
 {
-#if !defined(CONFIG_SUPERH_BUILT_IN_M66592)
 	u16 tmp;
 
-	tmp = m66592_read(m66592, M66592_SYSCFG);
-	if (!(tmp & M66592_XCKE))
-		m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
-#endif
+	if (!m66592->pdata->on_chip) {
+		tmp = m66592_read(m66592, M66592_SYSCFG);
+		if (!(tmp & M66592_XCKE))
+			m66592_bset(m66592, M66592_XCKE, M66592_SYSCFG);
+	}
 }
 
 /*-------------------------------------------------------------------------*/
@@ -1169,8 +1197,7 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 	intsts0 = m66592_read(m66592, M66592_INTSTS0);
 	intenb0 = m66592_read(m66592, M66592_INTENB0);
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	if (!intsts0 && !intenb0) {
+	if (m66592->pdata->on_chip && !intsts0 && !intenb0) {
 		/*
 		 * When USB clock stops, it cannot read register. Even if a
 		 * clock stops, the interrupt occurs. So this driver turn on
@@ -1180,7 +1207,6 @@ static irqreturn_t m66592_irq(int irq, void *_m66592)
 		intsts0 = m66592_read(m66592, M66592_INTSTS0);
 		intenb0 = m66592_read(m66592, M66592_INTENB0);
 	}
-#endif
 
 	savepipe = m66592_read(m66592, M66592_CFIFOSEL);
 
@@ -1526,9 +1552,11 @@ static int __exit m66592_remove(struct platform_device *pdev)
 	iounmap(m66592->reg);
 	free_irq(platform_get_irq(pdev, 0), m66592);
 	m66592_free_request(&m66592->ep[0].ep, m66592->ep0_req);
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	clk_disable(m66592->clk);
-	clk_put(m66592->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		clk_disable(m66592->clk);
+		clk_put(m66592->clk);
+	}
 #endif
 	kfree(m66592);
 	return 0;
@@ -1540,11 +1568,10 @@ static void nop_completion(struct usb_ep *ep, struct usb_request *r)
 
 static int __init m66592_probe(struct platform_device *pdev)
 {
-	struct resource *res;
-	int irq;
+	struct resource *res, *ires;
 	void __iomem *reg = NULL;
 	struct m66592 *m66592 = NULL;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	char clk_name[8];
 #endif
 	int ret = 0;
@@ -1557,10 +1584,11 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
-	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
+	ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!ires) {
 		ret = -ENODEV;
-		pr_err("platform_get_irq error.\n");
+		dev_err(&pdev->dev,
+			"platform_get_resource IORESOURCE_IRQ error.\n");
 		goto clean_up;
 	}
 
@@ -1571,6 +1599,12 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
+	if (pdev->dev.platform_data == NULL) {
+		dev_err(&pdev->dev, "no platform data\n");
+		ret = -ENODEV;
+		goto clean_up;
+	}
+
 	/* initialize ucd */
 	m66592 = kzalloc(sizeof(struct m66592), GFP_KERNEL);
 	if (m66592 == NULL) {
@@ -1578,6 +1612,9 @@ static int __init m66592_probe(struct platform_device *pdev)
 		goto clean_up;
 	}
 
+	m66592->pdata = pdev->dev.platform_data;
+	m66592->irq_trigger = ires->flags & IRQF_TRIGGER_MASK;
+
 	spin_lock_init(&m66592->lock);
 	dev_set_drvdata(&pdev->dev, m66592);
 
@@ -1595,22 +1632,25 @@ static int __init m66592_probe(struct platform_device *pdev)
 	m66592->timer.data = (unsigned long)m66592;
 	m66592->reg = reg;
 
-	ret = request_irq(irq, m66592_irq, IRQF_DISABLED | IRQF_SHARED,
+	ret = request_irq(ires->start, m66592_irq, IRQF_DISABLED | IRQF_SHARED,
 			udc_name, m66592);
 	if (ret < 0) {
 		pr_err("request_irq error (%d)\n", ret);
 		goto clean_up;
 	}
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id);
-	m66592->clk = clk_get(&pdev->dev, clk_name);
-	if (IS_ERR(m66592->clk)) {
-		dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name);
-		ret = PTR_ERR(m66592->clk);
-		goto clean_up2;
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		snprintf(clk_name, sizeof(clk_name), "usbf%d", pdev->id);
+		m66592->clk = clk_get(&pdev->dev, clk_name);
+		if (IS_ERR(m66592->clk)) {
+			dev_err(&pdev->dev, "cannot get clock \"%s\"\n",
+				clk_name);
+			ret = PTR_ERR(m66592->clk);
+			goto clean_up2;
+		}
+		clk_enable(m66592->clk);
 	}
-	clk_enable(m66592->clk);
 #endif
 	INIT_LIST_HEAD(&m66592->gadget.ep_list);
 	m66592->gadget.ep0 = &m66592->ep[0].ep;
@@ -1652,12 +1692,14 @@ static int __init m66592_probe(struct platform_device *pdev)
 	return 0;
 
 clean_up3:
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
-	clk_disable(m66592->clk);
-	clk_put(m66592->clk);
+#ifdef CONFIG_HAVE_CLK
+	if (m66592->pdata->on_chip) {
+		clk_disable(m66592->clk);
+		clk_put(m66592->clk);
+	}
 clean_up2:
 #endif
-	free_irq(irq, m66592);
+	free_irq(ires->start, m66592);
 clean_up:
 	if (m66592) {
 		if (m66592->ep0_req)
diff --git a/drivers/usb/gadget/m66592-udc.h b/drivers/usb/gadget/m66592-udc.h
index 9a9c2bf9fbd5..8b960deed680 100644
--- a/drivers/usb/gadget/m66592-udc.h
+++ b/drivers/usb/gadget/m66592-udc.h
@@ -23,10 +23,12 @@
 #ifndef __M66592_UDC_H__
 #define __M66592_UDC_H__
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 #include <linux/clk.h>
 #endif
 
+#include <linux/usb/m66592.h>
+
 #define M66592_SYSCFG		0x00
 #define M66592_XTAL		0xC000	/* b15-14: Crystal selection */
 #define   M66592_XTAL48		 0x8000		/* 48MHz */
@@ -76,11 +78,11 @@
 #define   M66592_P_TST_J	 0x0001		/* PERI TEST J */
 #define   M66592_P_TST_NORMAL	 0x0000		/* PERI Normal Mode */
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
+/* built-in registers */
 #define M66592_CFBCFG		0x0A
 #define M66592_D0FBCFG		0x0C
 #define M66592_LITTLE		0x0100	/* b8: Little endian mode */
-#else
+/* external chip case */
 #define M66592_PINCFG		0x0A
 #define M66592_LDRV		0x8000	/* b15: Drive Current Adjust */
 #define M66592_BIGEND		0x0100	/* b8: Big endian mode */
@@ -100,8 +102,8 @@
 #define M66592_PKTM		0x0020	/* b5: Packet mode */
 #define M66592_DENDE		0x0010	/* b4: Dend enable */
 #define M66592_OBUS		0x0004	/* b2: OUTbus mode */
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 
+/* common case */
 #define M66592_CFIFO		0x10
 #define M66592_D0FIFO		0x14
 #define M66592_D1FIFO		0x18
@@ -113,13 +115,9 @@
 #define M66592_REW		0x4000	/* b14: Buffer rewind */
 #define M66592_DCLRM		0x2000	/* b13: DMA buffer clear mode */
 #define M66592_DREQE		0x1000	/* b12: DREQ output enable */
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-#define M66592_MBW		0x0800	/* b11: Maximum bit width for FIFO */
-#else
-#define M66592_MBW		0x0400	/* b10: Maximum bit width for FIFO */
-#define   M66592_MBW_8		 0x0000   /*  8bit */
-#define   M66592_MBW_16		 0x0400   /* 16bit */
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
+#define M66592_MBW_8		0x0000   /*  8bit */
+#define M66592_MBW_16		0x0400   /* 16bit */
+#define M66592_MBW_32		0x0800   /* 32bit */
 #define M66592_TRENB		0x0200	/* b9: Transaction counter enable */
 #define M66592_TRCLR		0x0100	/* b8: Transaction counter clear */
 #define M66592_DEZPM		0x0080	/* b7: Zero-length packet mode */
@@ -480,9 +478,11 @@ struct m66592_ep {
 struct m66592 {
 	spinlock_t		lock;
 	void __iomem		*reg;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592) && defined(CONFIG_HAVE_CLK)
+#ifdef CONFIG_HAVE_CLK
 	struct clk *clk;
 #endif
+	struct m66592_platdata	*pdata;
+	unsigned long		irq_trigger;
 
 	struct usb_gadget		gadget;
 	struct usb_gadget_driver	*driver;
@@ -546,13 +546,13 @@ static inline void m66592_read_fifo(struct m66592 *m66592,
 {
 	unsigned long fifoaddr = (unsigned long)m66592->reg + offset;
 
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	len = (len + 3) / 4;
-	insl(fifoaddr, buf, len);
-#else
-	len = (len + 1) / 2;
-	insw(fifoaddr, buf, len);
-#endif
+	if (m66592->pdata->on_chip) {
+		len = (len + 3) / 4;
+		insl(fifoaddr, buf, len);
+	} else {
+		len = (len + 1) / 2;
+		insw(fifoaddr, buf, len);
+	}
 }
 
 static inline void m66592_write(struct m66592 *m66592, u16 val,
@@ -566,33 +566,34 @@ static inline void m66592_write_fifo(struct m66592 *m66592,
 		void *buf, unsigned long len)
 {
 	unsigned long fifoaddr = (unsigned long)m66592->reg + offset;
-#if defined(CONFIG_SUPERH_BUILT_IN_M66592)
-	unsigned long count;
-	unsigned char *pb;
-	int i;
-
-	count = len / 4;
-	outsl(fifoaddr, buf, count);
-
-	if (len & 0x00000003) {
-		pb = buf + count * 4;
-		for (i = 0; i < (len & 0x00000003); i++) {
-			if (m66592_read(m66592, M66592_CFBCFG))	/* little */
-				outb(pb[i], fifoaddr + (3 - i));
-			else
-				outb(pb[i], fifoaddr + i);
+
+	if (m66592->pdata->on_chip) {
+		unsigned long count;
+		unsigned char *pb;
+		int i;
+
+		count = len / 4;
+		outsl(fifoaddr, buf, count);
+
+		if (len & 0x00000003) {
+			pb = buf + count * 4;
+			for (i = 0; i < (len & 0x00000003); i++) {
+				if (m66592_read(m66592, M66592_CFBCFG))	/* le */
+					outb(pb[i], fifoaddr + (3 - i));
+				else
+					outb(pb[i], fifoaddr + i);
+			}
+		}
+	} else {
+		unsigned long odd = len & 0x0001;
+
+		len = len / 2;
+		outsw(fifoaddr, buf, len);
+		if (odd) {
+			unsigned char *p = buf + len*2;
+			outb(*p, fifoaddr);
 		}
 	}
-#else
-	unsigned long odd = len & 0x0001;
-
-	len = len / 2;
-	outsw(fifoaddr, buf, len);
-	if (odd) {
-		unsigned char *p = buf + len*2;
-		outb(*p, fifoaddr);
-	}
-#endif	/* #if defined(CONFIG_SUPERH_BUILT_IN_M66592) */
 }
 
 static inline void m66592_mdfy(struct m66592 *m66592, u16 val, u16 pat,
diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h
new file mode 100644
index 000000000000..cda9625e7df0
--- /dev/null
+++ b/include/linux/usb/m66592.h
@@ -0,0 +1,44 @@
+/*
+ * M66592 driver platform data
+ *
+ * Copyright (C) 2009  Renesas Solutions Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef __LINUX_USB_M66592_H
+#define __LINUX_USB_M66592_H
+
+#define M66592_PLATDATA_XTAL_12MHZ	0x01
+#define M66592_PLATDATA_XTAL_24MHZ	0x02
+#define M66592_PLATDATA_XTAL_48MHZ	0x03
+
+struct m66592_platdata {
+	/* one = on chip controller, zero = external controller */
+	unsigned	on_chip:1;
+
+	/* one = big endian, zero = little endian */
+	unsigned	endian:1;
+
+	/* (external controller only) M66592_PLATDATA_XTAL_nnMHZ */
+	unsigned	xtal:2;
+
+	/* (external controller only) one = 3.3V, zero = 1.5V */
+	unsigned	vif:1;
+
+};
+
+#endif /* __LINUX_USB_M66592_H */
+
-- 
cgit v1.2.3


From 878fa89f97954337d1dc41f0ccc3a8b5f89cfbc7 Mon Sep 17 00:00:00 2001
From: Daniel Silverstone <dsilvers@digital-scurf.org>
Date: Wed, 22 Jul 2009 18:51:24 +0200
Subject: IEEE80154: Add documentation to the IEEE80154 netlink and fakehard
 driver

This adds some perfunctory documentation comments to the IEEE 802.15.4
fakehard.c driver (Fake hard MAC) and the nl802154.h (outgoing netlink messages)
header.

These comments are not necessarily complete, but they do reference the
IEEE 802.15.4-2006 document where possible.

Signed-off-by: Daniel Silverstone <dsilvers@simtec.co.uk>
---
 drivers/ieee802154/fakehard.c     | 120 ++++++++++++++++++++++++++++++++++++++
 include/net/ieee802154/nl802154.h |  76 ++++++++++++++++++++++++
 2 files changed, 196 insertions(+)

(limited to 'include')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index 0384144c0b34..9ec07e8552f2 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -31,6 +31,12 @@
 #include <net/ieee802154/mac_def.h>
 #include <net/ieee802154/nl802154.h>
 
+/**
+ * fake_get_pan_id - Retrieve the PAN ID of the device.
+ * @dev: The network device to retrieve the PAN of.
+ *
+ * Return the ID of the PAN from the PIB.
+ */
 static u16 fake_get_pan_id(struct net_device *dev)
 {
 	BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -38,6 +44,14 @@ static u16 fake_get_pan_id(struct net_device *dev)
 	return 0xeba1;
 }
 
+/**
+ * fake_get_short_addr - Retrieve the short address of the device.
+ * @dev: The network device to retrieve the short address of.
+ *
+ * Returns the IEEE 802.15.4 short-form address cached for this
+ * device. If the device has not yet had a short address assigned
+ * then this should return 0xFFFF to indicate a lack of association.
+ */
 static u16 fake_get_short_addr(struct net_device *dev)
 {
 	BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -45,6 +59,19 @@ static u16 fake_get_short_addr(struct net_device *dev)
 	return 0x1;
 }
 
+/**
+ * fake_get_dsn - Retrieve the DSN of the device.
+ * @dev: The network device to retrieve the DSN for.
+ *
+ * Returns the IEEE 802.15.4 DSN for the network device.
+ * The DSN is the sequence number which will be added to each
+ * packet or MAC command frame by the MAC during transmission.
+ *
+ * DSN means 'Data Sequence Number'.
+ *
+ * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006
+ *       document.
+ */
 static u8 fake_get_dsn(struct net_device *dev)
 {
 	BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -52,6 +79,19 @@ static u8 fake_get_dsn(struct net_device *dev)
 	return 0x00; /* DSN are implemented in HW, so return just 0 */
 }
 
+/**
+ * fake_get_bsn - Retrieve the BSN of the device.
+ * @dev: The network device to retrieve the BSN for.
+ *
+ * Returns the IEEE 802.15.4 BSN for the network device.
+ * The BSN is the sequence number which will be added to each
+ * beacon frame sent by the MAC.
+ *
+ * BSN means 'Beacon Sequence Number'.
+ *
+ * Note: This is in section 7.2.1.2 of the IEEE 802.15.4-2006
+ *       document.
+ */
 static u8 fake_get_bsn(struct net_device *dev)
 {
 	BUG_ON(dev->type != ARPHRD_IEEE802154);
@@ -59,6 +99,19 @@ static u8 fake_get_bsn(struct net_device *dev)
 	return 0x00; /* BSN are implemented in HW, so return just 0 */
 }
 
+/**
+ * fake_assoc_req - Make an association request to the HW.
+ * @dev: The network device which we are associating to a network.
+ * @addr: The coordinator with which we wish to associate.
+ * @channel: The channel on which to associate.
+ * @cap: The capability information field to use in the association.
+ *
+ * Start an association with a coordinator. The coordinator's address
+ * and PAN ID can be found in @addr.
+ *
+ * Note: This is in section 7.3.1 and 7.5.3.1 of the IEEE
+ *       802.15.4-2006 document.
+ */
 static int fake_assoc_req(struct net_device *dev,
 		struct ieee802154_addr *addr, u8 channel, u8 cap)
 {
@@ -67,18 +120,70 @@ static int fake_assoc_req(struct net_device *dev,
 			IEEE802154_SUCCESS);
 }
 
+/**
+ * fake_assoc_resp - Send an association response to a device.
+ * @dev: The network device on which to send the response.
+ * @addr: The address of the device to respond to.
+ * @short_addr: The assigned short address for the device (if any).
+ * @status: The result of the association request.
+ *
+ * Queue the association response of the coordinator to another
+ * device's attempt to associate with the network which we
+ * coordinate. This is then added to the indirect-send queue to be
+ * transmitted to the end device when it polls for data.
+ *
+ * Note: This is in section 7.3.2 and 7.5.3.1 of the IEEE
+ *       802.15.4-2006 document.
+ */
 static int fake_assoc_resp(struct net_device *dev,
 		struct ieee802154_addr *addr, u16 short_addr, u8 status)
 {
 	return 0;
 }
 
+/**
+ * fake_disassoc_req - Disassociate a device from a network.
+ * @dev: The network device on which we're disassociating a device.
+ * @addr: The device to disassociate from the network.
+ * @reason: The reason to give to the device for being disassociated.
+ *
+ * This sends a disassociation notification to the device being
+ * disassociated from the network.
+ *
+ * Note: This is in section 7.5.3.2 of the IEEE 802.15.4-2006
+ *       document, with the reason described in 7.3.3.2.
+ */
 static int fake_disassoc_req(struct net_device *dev,
 		struct ieee802154_addr *addr, u8 reason)
 {
 	return ieee802154_nl_disassoc_confirm(dev, IEEE802154_SUCCESS);
 }
 
+/**
+ * fake_start_req - Start an IEEE 802.15.4 PAN.
+ * @dev: The network device on which to start the PAN.
+ * @addr: The coordinator address to use when starting the PAN.
+ * @channel: The channel on which to start the PAN.
+ * @bcn_ord: Beacon order.
+ * @sf_ord: Superframe order.
+ * @pan_coord: Whether or not we are the PAN coordinator or just
+ *             requesting a realignment perhaps?
+ * @blx: Battery Life Extension feature bitfield.
+ * @coord_realign: Something to realign something else.
+ *
+ * If pan_coord is non-zero then this starts a network with the
+ * provided parameters, otherwise it attempts a coordinator
+ * realignment of the stated network instead.
+ *
+ * Note: This is in section 7.5.2.3 of the IEEE 802.15.4-2006
+ * document, with 7.3.8 describing coordinator realignment.
+ *
+ * Note: There is currently no way to notify the coordinator userland
+ * program of whether or not the PAN has started successfully. As
+ * such, the coordinator program cannot know when the MAC has
+ * completed starting the network and will simply have to assume
+ * completeness based on some form of time delay.
+ */
 static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr,
 				u8 channel,
 				u8 bcn_ord, u8 sf_ord, u8 pan_coord, u8 blx,
@@ -87,6 +192,21 @@ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr,
 	return 0;
 }
 
+/**
+ * fake_scan_req - Start a channel scan.
+ * @dev: The network device on which to perform a channel scan.
+ * @type: The type of scan to perform.
+ * @channels: The channel bitmask to scan.
+ * @duration: How long to spend on each channel.
+ *
+ * This starts either a passive (energy) scan or an active (PAN) scan
+ * on the channels indicated in the @channels bitmask. The duration of
+ * the scan is measured in terms of superframe duration. Specifically,
+ * the scan will spend aBaseSuperFrameDuration * ((2^n) + 1) on each
+ * channel.
+ *
+ * Note: This is in section 7.5.2.1 of the IEEE 802.15.4-2006 document.
+ */
 static int fake_scan_req(struct net_device *dev, u8 type, u32 channels,
 		u8 duration)
 {
diff --git a/include/net/ieee802154/nl802154.h b/include/net/ieee802154/nl802154.h
index 78efcdf52b59..6096096f6d7d 100644
--- a/include/net/ieee802154/nl802154.h
+++ b/include/net/ieee802154/nl802154.h
@@ -24,17 +24,93 @@
 struct net_device;
 struct ieee802154_addr;
 
+/**
+ * ieee802154_nl_assoc_indic - Notify userland of an association request.
+ * @dev: The network device on which this association request was
+ *       received.
+ * @addr: The address of the device requesting association.
+ * @cap: The capability information field from the device.
+ *
+ * This informs a userland coordinator of a device requesting to
+ * associate with the PAN controlled by the coordinator.
+ *
+ * Note: This is in section 7.3.1 of the IEEE 802.15.4-2006 document.
+ */
 int ieee802154_nl_assoc_indic(struct net_device *dev,
 		struct ieee802154_addr *addr, u8 cap);
+
+/**
+ * ieee802154_nl_assoc_confirm - Notify userland of association.
+ * @dev: The device which has completed association.
+ * @short_addr: The short address assigned to the device.
+ * @status: The status of the association.
+ *
+ * Inform userland of the result of an association request. If the
+ * association request included asking the coordinator to allocate
+ * a short address then it is returned in @short_addr.
+ *
+ * Note: This is in section 7.3.2 of the IEEE 802.15.4 document.
+ */
 int ieee802154_nl_assoc_confirm(struct net_device *dev,
 		u16 short_addr, u8 status);
+
+/**
+ * ieee802154_nl_disassoc_indic - Notify userland of disassociation.
+ * @dev: The device on which disassociation was indicated.
+ * @addr: The device which is disassociating.
+ * @reason: The reason for the disassociation.
+ *
+ * Inform userland that a device has disassociated from the network.
+ *
+ * Note: This is in section 7.3.3 of the IEEE 802.15.4 document.
+ */
 int ieee802154_nl_disassoc_indic(struct net_device *dev,
 		struct ieee802154_addr *addr, u8 reason);
+
+/**
+ * ieee802154_nl_disassoc_confirm - Notify userland of disassociation
+ * completion.
+ * @dev: The device on which disassociation was ordered.
+ * @status: The result of the disassociation.
+ *
+ * Inform userland of the result of requesting that a device
+ * disassociate, or the result of requesting that we disassociate from
+ * a PAN managed by another coordinator.
+ *
+ * Note: This is in section 7.1.4.3 of the IEEE 802.15.4 document.
+ */
 int ieee802154_nl_disassoc_confirm(struct net_device *dev,
 		u8 status);
+
+/**
+ * ieee802154_nl_scan_confirm - Notify userland of completion of scan.
+ * @dev: The device which was instructed to scan.
+ * @status: The status of the scan operation.
+ * @scan_type: What type of scan was performed.
+ * @unscanned: Any channels that the device was unable to scan.
+ * @edl: The energy levels (if a passive scan).
+ *
+ *
+ * Note: This is in section 7.1.11 of the IEEE 802.15.4 document.
+ * Note: This API does not permit the return of an active scan result.
+ */
 int ieee802154_nl_scan_confirm(struct net_device *dev,
 		u8 status, u8 scan_type, u32 unscanned,
 		u8 *edl/*, struct list_head *pan_desc_list */);
+
+/**
+ * ieee802154_nl_beacon_indic - Notify userland of a received beacon.
+ * @dev: The device on which a beacon was received.
+ * @panid: The PAN of the coordinator.
+ * @coord_addr: The short address of the coordinator on that PAN.
+ *
+ * Note: This is in section 7.1.5 of the IEEE 802.15.4 document.
+ * Note: This API does not provide extended information such as what
+ * channel the PAN is on or what the LQI of the beacon frame was on
+ * receipt.
+ * Note: This API cannot indicate a beacon frame for a coordinator
+ *       operating in long addressing mode.
+ */
 int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid,
 		u16 coord_addr);
 
-- 
cgit v1.2.3


From f0166e5e3cdab66d5a31f796ce18e21fd3ce99dc Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Thu, 23 Jul 2009 16:56:29 +0400
Subject: ieee802154: move headers out of extra directory

include/net/ieee802154/af_ieee802154.h (and others) naming seems to be too long
and redundant. Drop one level of subdirectories.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 drivers/ieee802154/fakehard.c          |   8 +-
 include/net/af_ieee802154.h            |  60 +++++++++++++
 include/net/ieee802154.h               | 160 +++++++++++++++++++++++++++++++++
 include/net/ieee802154/af_ieee802154.h |  60 -------------
 include/net/ieee802154/mac_def.h       | 160 ---------------------------------
 include/net/ieee802154/netdevice.h     | 115 ------------------------
 include/net/ieee802154/nl802154.h      | 117 ------------------------
 include/net/ieee802154_netdev.h        | 115 ++++++++++++++++++++++++
 include/net/nl802154.h                 | 117 ++++++++++++++++++++++++
 net/ieee802154/af_ieee802154.c         |   4 +-
 net/ieee802154/dgram.c                 |   6 +-
 net/ieee802154/netlink.c               |   6 +-
 net/ieee802154/raw.c                   |   2 +-
 13 files changed, 465 insertions(+), 465 deletions(-)
 create mode 100644 include/net/af_ieee802154.h
 create mode 100644 include/net/ieee802154.h
 delete mode 100644 include/net/ieee802154/af_ieee802154.h
 delete mode 100644 include/net/ieee802154/mac_def.h
 delete mode 100644 include/net/ieee802154/netdevice.h
 delete mode 100644 include/net/ieee802154/nl802154.h
 create mode 100644 include/net/ieee802154_netdev.h
 create mode 100644 include/net/nl802154.h

(limited to 'include')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index 9ec07e8552f2..8a52e6efa239 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -26,10 +26,10 @@
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 
-#include <net/ieee802154/af_ieee802154.h>
-#include <net/ieee802154/netdevice.h>
-#include <net/ieee802154/mac_def.h>
-#include <net/ieee802154/nl802154.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
+#include <net/ieee802154.h>
+#include <net/nl802154.h>
 
 /**
  * fake_get_pan_id - Retrieve the PAN ID of the device.
diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
new file mode 100644
index 000000000000..0d78605fb1a6
--- /dev/null
+++ b/include/net/af_ieee802154.h
@@ -0,0 +1,60 @@
+/*
+ * IEEE 802.15.4 inteface for userspace
+ *
+ * Copyright 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Sergey Lapin <slapin@ossfans.org>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef _AF_IEEE802154_H
+#define _AF_IEEE802154_H
+
+#include <linux/socket.h> /* for sa_family_t */
+
+enum {
+	IEEE802154_ADDR_NONE = 0x0,
+	/* RESERVED = 0x01, */
+	IEEE802154_ADDR_SHORT = 0x2, /* 16-bit address + PANid */
+	IEEE802154_ADDR_LONG = 0x3, /* 64-bit address + PANid */
+};
+
+/* address length, octets */
+#define IEEE802154_ADDR_LEN	8
+
+struct ieee802154_addr {
+	int addr_type;
+	u16 pan_id;
+	union {
+		u8 hwaddr[IEEE802154_ADDR_LEN];
+		u16 short_addr;
+	};
+};
+
+#define IEEE802154_PANID_BROADCAST	0xffff
+#define IEEE802154_ADDR_BROADCAST	0xffff
+#define IEEE802154_ADDR_UNDEF		0xfffe
+
+struct sockaddr_ieee802154 {
+	sa_family_t family; /* AF_IEEE802154 */
+	struct ieee802154_addr addr;
+};
+
+/* master device */
+#define IEEE802154_SIOC_ADD_SLAVE		(SIOCDEVPRIVATE + 0)
+
+#endif
diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h
new file mode 100644
index 000000000000..d52685defb11
--- /dev/null
+++ b/include/net/ieee802154.h
@@ -0,0 +1,160 @@
+/*
+ * IEEE802.15.4-2003 specification
+ *
+ * Copyright (C) 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef NET_IEEE802154_H
+#define NET_IEEE802154_H
+
+#define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
+#define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
+#define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
+#define IEEE802154_FC_TYPE_MAC_CMD	0x3	/* Frame is MAC command */
+
+#define IEEE802154_FC_TYPE_SHIFT		0
+#define IEEE802154_FC_TYPE_MASK		((1 << 3) - 1)
+#define IEEE802154_FC_TYPE(x)		((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT)
+#define IEEE802154_FC_SET_TYPE(v, x)	do {	\
+	v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \
+	    (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \
+	} while (0)
+
+#define IEEE802154_FC_SECEN		(1 << 3)
+#define IEEE802154_FC_FRPEND		(1 << 4)
+#define IEEE802154_FC_ACK_REQ		(1 << 5)
+#define IEEE802154_FC_INTRA_PAN		(1 << 6)
+
+#define IEEE802154_FC_SAMODE_SHIFT	14
+#define IEEE802154_FC_SAMODE_MASK	(3 << IEEE802154_FC_SAMODE_SHIFT)
+#define IEEE802154_FC_DAMODE_SHIFT	10
+#define IEEE802154_FC_DAMODE_MASK	(3 << IEEE802154_FC_DAMODE_SHIFT)
+
+#define IEEE802154_FC_SAMODE(x)		\
+	(((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT)
+
+#define IEEE802154_FC_DAMODE(x)		\
+	(((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT)
+
+
+/* MAC's Command Frames Identifiers */
+#define IEEE802154_CMD_ASSOCIATION_REQ		0x01
+#define IEEE802154_CMD_ASSOCIATION_RESP		0x02
+#define IEEE802154_CMD_DISASSOCIATION_NOTIFY	0x03
+#define IEEE802154_CMD_DATA_REQ			0x04
+#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY	0x05
+#define IEEE802154_CMD_ORPHAN_NOTIFY		0x06
+#define IEEE802154_CMD_BEACON_REQ		0x07
+#define IEEE802154_CMD_COORD_REALIGN_NOTIFY	0x08
+#define IEEE802154_CMD_GTS_REQ			0x09
+
+/*
+ * The return values of MAC operations
+ */
+enum {
+	/*
+	 * The requested operation was completed successfully.
+	 * For a transmission request, this value indicates
+	 * a successful transmission.
+	 */
+	IEEE802154_SUCCESS = 0x0,
+
+	/* The beacon was lost following a synchronization request. */
+	IEEE802154_BEACON_LOSS = 0xe0,
+	/*
+	 * A transmission could not take place due to activity on the
+	 * channel, i.e., the CSMA-CA mechanism has failed.
+	 */
+	IEEE802154_CHNL_ACCESS_FAIL = 0xe1,
+	/* The GTS request has been denied by the PAN coordinator. */
+	IEEE802154_DENINED = 0xe2,
+	/* The attempt to disable the transceiver has failed. */
+	IEEE802154_DISABLE_TRX_FAIL = 0xe3,
+	/*
+	 * The received frame induces a failed security check according to
+	 * the security suite.
+	 */
+	IEEE802154_FAILED_SECURITY_CHECK = 0xe4,
+	/*
+	 * The frame resulting from secure processing has a length that is
+	 * greater than aMACMaxFrameSize.
+	 */
+	IEEE802154_FRAME_TOO_LONG = 0xe5,
+	/*
+	 * The requested GTS transmission failed because the specified GTS
+	 * either did not have a transmit GTS direction or was not defined.
+	 */
+	IEEE802154_INVALID_GTS = 0xe6,
+	/*
+	 * A request to purge an MSDU from the transaction queue was made using
+	 * an MSDU handle that was not found in the transaction table.
+	 */
+	IEEE802154_INVALID_HANDLE = 0xe7,
+	/* A parameter in the primitive is out of the valid range.*/
+	IEEE802154_INVALID_PARAMETER = 0xe8,
+	/* No acknowledgment was received after aMaxFrameRetries. */
+	IEEE802154_NO_ACK = 0xe9,
+	/* A scan operation failed to find any network beacons.*/
+	IEEE802154_NO_BEACON = 0xea,
+	/* No response data were available following a request. */
+	IEEE802154_NO_DATA = 0xeb,
+	/* The operation failed because a short address was not allocated. */
+	IEEE802154_NO_SHORT_ADDRESS = 0xec,
+	/*
+	 * A receiver enable request was unsuccessful because it could not be
+	 * completed within the CAP.
+	 */
+	IEEE802154_OUT_OF_CAP = 0xed,
+	/*
+	 * A PAN identifier conflict has been detected and communicated to the
+	 * PAN coordinator.
+	 */
+	IEEE802154_PANID_CONFLICT = 0xee,
+	/* A coordinator realignment command has been received. */
+	IEEE802154_REALIGMENT = 0xef,
+	/* The transaction has expired and its information discarded. */
+	IEEE802154_TRANSACTION_EXPIRED = 0xf0,
+	/* There is no capacity to store the transaction. */
+	IEEE802154_TRANSACTION_OVERFLOW = 0xf1,
+	/*
+	 * The transceiver was in the transmitter enabled state when the
+	 * receiver was requested to be enabled.
+	 */
+	IEEE802154_TX_ACTIVE = 0xf2,
+	/* The appropriate key is not available in the ACL. */
+	IEEE802154_UNAVAILABLE_KEY = 0xf3,
+	/*
+	 * A SET/GET request was issued with the identifier of a PIB attribute
+	 * that is not supported.
+	 */
+	IEEE802154_UNSUPPORTED_ATTR = 0xf4,
+	/*
+	 * A request to perform a scan operation failed because the MLME was
+	 * in the process of performing a previously initiated scan operation.
+	 */
+	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
+};
+
+
+#endif
+
+
diff --git a/include/net/ieee802154/af_ieee802154.h b/include/net/ieee802154/af_ieee802154.h
deleted file mode 100644
index 0d78605fb1a6..000000000000
--- a/include/net/ieee802154/af_ieee802154.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * IEEE 802.15.4 inteface for userspace
- *
- * Copyright 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Written by:
- * Sergey Lapin <slapin@ossfans.org>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#ifndef _AF_IEEE802154_H
-#define _AF_IEEE802154_H
-
-#include <linux/socket.h> /* for sa_family_t */
-
-enum {
-	IEEE802154_ADDR_NONE = 0x0,
-	/* RESERVED = 0x01, */
-	IEEE802154_ADDR_SHORT = 0x2, /* 16-bit address + PANid */
-	IEEE802154_ADDR_LONG = 0x3, /* 64-bit address + PANid */
-};
-
-/* address length, octets */
-#define IEEE802154_ADDR_LEN	8
-
-struct ieee802154_addr {
-	int addr_type;
-	u16 pan_id;
-	union {
-		u8 hwaddr[IEEE802154_ADDR_LEN];
-		u16 short_addr;
-	};
-};
-
-#define IEEE802154_PANID_BROADCAST	0xffff
-#define IEEE802154_ADDR_BROADCAST	0xffff
-#define IEEE802154_ADDR_UNDEF		0xfffe
-
-struct sockaddr_ieee802154 {
-	sa_family_t family; /* AF_IEEE802154 */
-	struct ieee802154_addr addr;
-};
-
-/* master device */
-#define IEEE802154_SIOC_ADD_SLAVE		(SIOCDEVPRIVATE + 0)
-
-#endif
diff --git a/include/net/ieee802154/mac_def.h b/include/net/ieee802154/mac_def.h
deleted file mode 100644
index 8cb684635650..000000000000
--- a/include/net/ieee802154/mac_def.h
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * IEEE802.15.4-2003 specification
- *
- * Copyright (C) 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Written by:
- * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
- * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
- * Maxim Osipov <maxim.osipov@siemens.com>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#ifndef IEEE802154_MAC_DEF_H
-#define IEEE802154_MAC_DEF_H
-
-#define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
-#define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
-#define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
-#define IEEE802154_FC_TYPE_MAC_CMD	0x3	/* Frame is MAC command */
-
-#define IEEE802154_FC_TYPE_SHIFT		0
-#define IEEE802154_FC_TYPE_MASK		((1 << 3) - 1)
-#define IEEE802154_FC_TYPE(x)		((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT)
-#define IEEE802154_FC_SET_TYPE(v, x)	do {	\
-	v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \
-	    (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \
-	} while (0)
-
-#define IEEE802154_FC_SECEN		(1 << 3)
-#define IEEE802154_FC_FRPEND		(1 << 4)
-#define IEEE802154_FC_ACK_REQ		(1 << 5)
-#define IEEE802154_FC_INTRA_PAN		(1 << 6)
-
-#define IEEE802154_FC_SAMODE_SHIFT	14
-#define IEEE802154_FC_SAMODE_MASK	(3 << IEEE802154_FC_SAMODE_SHIFT)
-#define IEEE802154_FC_DAMODE_SHIFT	10
-#define IEEE802154_FC_DAMODE_MASK	(3 << IEEE802154_FC_DAMODE_SHIFT)
-
-#define IEEE802154_FC_SAMODE(x)		\
-	(((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT)
-
-#define IEEE802154_FC_DAMODE(x)		\
-	(((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT)
-
-
-/* MAC's Command Frames Identifiers */
-#define IEEE802154_CMD_ASSOCIATION_REQ		0x01
-#define IEEE802154_CMD_ASSOCIATION_RESP		0x02
-#define IEEE802154_CMD_DISASSOCIATION_NOTIFY	0x03
-#define IEEE802154_CMD_DATA_REQ			0x04
-#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY	0x05
-#define IEEE802154_CMD_ORPHAN_NOTIFY		0x06
-#define IEEE802154_CMD_BEACON_REQ		0x07
-#define IEEE802154_CMD_COORD_REALIGN_NOTIFY	0x08
-#define IEEE802154_CMD_GTS_REQ			0x09
-
-/*
- * The return values of MAC operations
- */
-enum {
-	/*
-	 * The requested operation was completed successfully.
-	 * For a transmission request, this value indicates
-	 * a successful transmission.
-	 */
-	IEEE802154_SUCCESS = 0x0,
-
-	/* The beacon was lost following a synchronization request. */
-	IEEE802154_BEACON_LOSS = 0xe0,
-	/*
-	 * A transmission could not take place due to activity on the
-	 * channel, i.e., the CSMA-CA mechanism has failed.
-	 */
-	IEEE802154_CHNL_ACCESS_FAIL = 0xe1,
-	/* The GTS request has been denied by the PAN coordinator. */
-	IEEE802154_DENINED = 0xe2,
-	/* The attempt to disable the transceiver has failed. */
-	IEEE802154_DISABLE_TRX_FAIL = 0xe3,
-	/*
-	 * The received frame induces a failed security check according to
-	 * the security suite.
-	 */
-	IEEE802154_FAILED_SECURITY_CHECK = 0xe4,
-	/*
-	 * The frame resulting from secure processing has a length that is
-	 * greater than aMACMaxFrameSize.
-	 */
-	IEEE802154_FRAME_TOO_LONG = 0xe5,
-	/*
-	 * The requested GTS transmission failed because the specified GTS
-	 * either did not have a transmit GTS direction or was not defined.
-	 */
-	IEEE802154_INVALID_GTS = 0xe6,
-	/*
-	 * A request to purge an MSDU from the transaction queue was made using
-	 * an MSDU handle that was not found in the transaction table.
-	 */
-	IEEE802154_INVALID_HANDLE = 0xe7,
-	/* A parameter in the primitive is out of the valid range.*/
-	IEEE802154_INVALID_PARAMETER = 0xe8,
-	/* No acknowledgment was received after aMaxFrameRetries. */
-	IEEE802154_NO_ACK = 0xe9,
-	/* A scan operation failed to find any network beacons.*/
-	IEEE802154_NO_BEACON = 0xea,
-	/* No response data were available following a request. */
-	IEEE802154_NO_DATA = 0xeb,
-	/* The operation failed because a short address was not allocated. */
-	IEEE802154_NO_SHORT_ADDRESS = 0xec,
-	/*
-	 * A receiver enable request was unsuccessful because it could not be
-	 * completed within the CAP.
-	 */
-	IEEE802154_OUT_OF_CAP = 0xed,
-	/*
-	 * A PAN identifier conflict has been detected and communicated to the
-	 * PAN coordinator.
-	 */
-	IEEE802154_PANID_CONFLICT = 0xee,
-	/* A coordinator realignment command has been received. */
-	IEEE802154_REALIGMENT = 0xef,
-	/* The transaction has expired and its information discarded. */
-	IEEE802154_TRANSACTION_EXPIRED = 0xf0,
-	/* There is no capacity to store the transaction. */
-	IEEE802154_TRANSACTION_OVERFLOW = 0xf1,
-	/*
-	 * The transceiver was in the transmitter enabled state when the
-	 * receiver was requested to be enabled.
-	 */
-	IEEE802154_TX_ACTIVE = 0xf2,
-	/* The appropriate key is not available in the ACL. */
-	IEEE802154_UNAVAILABLE_KEY = 0xf3,
-	/*
-	 * A SET/GET request was issued with the identifier of a PIB attribute
-	 * that is not supported.
-	 */
-	IEEE802154_UNSUPPORTED_ATTR = 0xf4,
-	/*
-	 * A request to perform a scan operation failed because the MLME was
-	 * in the process of performing a previously initiated scan operation.
-	 */
-	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
-};
-
-
-#endif
-
-
diff --git a/include/net/ieee802154/netdevice.h b/include/net/ieee802154/netdevice.h
deleted file mode 100644
index e2506af3e7c8..000000000000
--- a/include/net/ieee802154/netdevice.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * An interface between IEEE802.15.4 device and rest of the kernel.
- *
- * Copyright (C) 2007, 2008, 2009 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Written by:
- * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
- * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
- * Maxim Osipov <maxim.osipov@siemens.com>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- */
-
-#ifndef IEEE802154_NETDEVICE_H
-#define IEEE802154_NETDEVICE_H
-
-/*
- * A control block of skb passed between the ARPHRD_IEEE802154 device
- * and other stack parts.
- */
-struct ieee802154_mac_cb {
-	u8 lqi;
-	struct ieee802154_addr sa;
-	struct ieee802154_addr da;
-	u8 flags;
-	u8 seq;
-};
-
-static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb)
-{
-	return (struct ieee802154_mac_cb *)skb->cb;
-}
-
-#define MAC_CB_FLAG_TYPEMASK		((1 << 3) - 1)
-
-#define MAC_CB_FLAG_ACKREQ		(1 << 3)
-#define MAC_CB_FLAG_SECEN		(1 << 4)
-#define MAC_CB_FLAG_INTRAPAN		(1 << 5)
-
-static inline int mac_cb_is_ackreq(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ;
-}
-
-static inline int mac_cb_is_secen(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN;
-}
-
-static inline int mac_cb_is_intrapan(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_INTRAPAN;
-}
-
-static inline int mac_cb_type(struct sk_buff *skb)
-{
-	return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK;
-}
-
-#define IEEE802154_MAC_SCAN_ED		0
-#define IEEE802154_MAC_SCAN_ACTIVE	1
-#define IEEE802154_MAC_SCAN_PASSIVE	2
-#define IEEE802154_MAC_SCAN_ORPHAN	3
-
-/*
- * This should be located at net_device->ml_priv
- */
-struct ieee802154_mlme_ops {
-	int (*assoc_req)(struct net_device *dev,
-			struct ieee802154_addr *addr,
-			u8 channel, u8 cap);
-	int (*assoc_resp)(struct net_device *dev,
-			struct ieee802154_addr *addr,
-			u16 short_addr, u8 status);
-	int (*disassoc_req)(struct net_device *dev,
-			struct ieee802154_addr *addr,
-			u8 reason);
-	int (*start_req)(struct net_device *dev,
-			struct ieee802154_addr *addr,
-			u8 channel, u8 bcn_ord, u8 sf_ord,
-			u8 pan_coord, u8 blx, u8 coord_realign);
-	int (*scan_req)(struct net_device *dev,
-			u8 type, u32 channels, u8 duration);
-
-	/*
-	 * FIXME: these should become the part of PIB/MIB interface.
-	 * However we still don't have IB interface of any kind
-	 */
-	u16 (*get_pan_id)(struct net_device *dev);
-	u16 (*get_short_addr)(struct net_device *dev);
-	u8 (*get_dsn)(struct net_device *dev);
-	u8 (*get_bsn)(struct net_device *dev);
-};
-
-static inline struct ieee802154_mlme_ops *ieee802154_mlme_ops(
-		struct net_device *dev)
-{
-	return dev->ml_priv;
-}
-
-#endif
-
-
diff --git a/include/net/ieee802154/nl802154.h b/include/net/ieee802154/nl802154.h
deleted file mode 100644
index 6096096f6d7d..000000000000
--- a/include/net/ieee802154/nl802154.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * nl802154.h
- *
- * Copyright (C) 2007, 2008, 2009 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef IEEE802154_NL_H
-#define IEEE802154_NL_H
-
-struct net_device;
-struct ieee802154_addr;
-
-/**
- * ieee802154_nl_assoc_indic - Notify userland of an association request.
- * @dev: The network device on which this association request was
- *       received.
- * @addr: The address of the device requesting association.
- * @cap: The capability information field from the device.
- *
- * This informs a userland coordinator of a device requesting to
- * associate with the PAN controlled by the coordinator.
- *
- * Note: This is in section 7.3.1 of the IEEE 802.15.4-2006 document.
- */
-int ieee802154_nl_assoc_indic(struct net_device *dev,
-		struct ieee802154_addr *addr, u8 cap);
-
-/**
- * ieee802154_nl_assoc_confirm - Notify userland of association.
- * @dev: The device which has completed association.
- * @short_addr: The short address assigned to the device.
- * @status: The status of the association.
- *
- * Inform userland of the result of an association request. If the
- * association request included asking the coordinator to allocate
- * a short address then it is returned in @short_addr.
- *
- * Note: This is in section 7.3.2 of the IEEE 802.15.4 document.
- */
-int ieee802154_nl_assoc_confirm(struct net_device *dev,
-		u16 short_addr, u8 status);
-
-/**
- * ieee802154_nl_disassoc_indic - Notify userland of disassociation.
- * @dev: The device on which disassociation was indicated.
- * @addr: The device which is disassociating.
- * @reason: The reason for the disassociation.
- *
- * Inform userland that a device has disassociated from the network.
- *
- * Note: This is in section 7.3.3 of the IEEE 802.15.4 document.
- */
-int ieee802154_nl_disassoc_indic(struct net_device *dev,
-		struct ieee802154_addr *addr, u8 reason);
-
-/**
- * ieee802154_nl_disassoc_confirm - Notify userland of disassociation
- * completion.
- * @dev: The device on which disassociation was ordered.
- * @status: The result of the disassociation.
- *
- * Inform userland of the result of requesting that a device
- * disassociate, or the result of requesting that we disassociate from
- * a PAN managed by another coordinator.
- *
- * Note: This is in section 7.1.4.3 of the IEEE 802.15.4 document.
- */
-int ieee802154_nl_disassoc_confirm(struct net_device *dev,
-		u8 status);
-
-/**
- * ieee802154_nl_scan_confirm - Notify userland of completion of scan.
- * @dev: The device which was instructed to scan.
- * @status: The status of the scan operation.
- * @scan_type: What type of scan was performed.
- * @unscanned: Any channels that the device was unable to scan.
- * @edl: The energy levels (if a passive scan).
- *
- *
- * Note: This is in section 7.1.11 of the IEEE 802.15.4 document.
- * Note: This API does not permit the return of an active scan result.
- */
-int ieee802154_nl_scan_confirm(struct net_device *dev,
-		u8 status, u8 scan_type, u32 unscanned,
-		u8 *edl/*, struct list_head *pan_desc_list */);
-
-/**
- * ieee802154_nl_beacon_indic - Notify userland of a received beacon.
- * @dev: The device on which a beacon was received.
- * @panid: The PAN of the coordinator.
- * @coord_addr: The short address of the coordinator on that PAN.
- *
- * Note: This is in section 7.1.5 of the IEEE 802.15.4 document.
- * Note: This API does not provide extended information such as what
- * channel the PAN is on or what the LQI of the beacon frame was on
- * receipt.
- * Note: This API cannot indicate a beacon frame for a coordinator
- *       operating in long addressing mode.
- */
-int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid,
-		u16 coord_addr);
-
-#endif
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
new file mode 100644
index 000000000000..e2506af3e7c8
--- /dev/null
+++ b/include/net/ieee802154_netdev.h
@@ -0,0 +1,115 @@
+/*
+ * An interface between IEEE802.15.4 device and rest of the kernel.
+ *
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef IEEE802154_NETDEVICE_H
+#define IEEE802154_NETDEVICE_H
+
+/*
+ * A control block of skb passed between the ARPHRD_IEEE802154 device
+ * and other stack parts.
+ */
+struct ieee802154_mac_cb {
+	u8 lqi;
+	struct ieee802154_addr sa;
+	struct ieee802154_addr da;
+	u8 flags;
+	u8 seq;
+};
+
+static inline struct ieee802154_mac_cb *mac_cb(struct sk_buff *skb)
+{
+	return (struct ieee802154_mac_cb *)skb->cb;
+}
+
+#define MAC_CB_FLAG_TYPEMASK		((1 << 3) - 1)
+
+#define MAC_CB_FLAG_ACKREQ		(1 << 3)
+#define MAC_CB_FLAG_SECEN		(1 << 4)
+#define MAC_CB_FLAG_INTRAPAN		(1 << 5)
+
+static inline int mac_cb_is_ackreq(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_ACKREQ;
+}
+
+static inline int mac_cb_is_secen(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_SECEN;
+}
+
+static inline int mac_cb_is_intrapan(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_INTRAPAN;
+}
+
+static inline int mac_cb_type(struct sk_buff *skb)
+{
+	return mac_cb(skb)->flags & MAC_CB_FLAG_TYPEMASK;
+}
+
+#define IEEE802154_MAC_SCAN_ED		0
+#define IEEE802154_MAC_SCAN_ACTIVE	1
+#define IEEE802154_MAC_SCAN_PASSIVE	2
+#define IEEE802154_MAC_SCAN_ORPHAN	3
+
+/*
+ * This should be located at net_device->ml_priv
+ */
+struct ieee802154_mlme_ops {
+	int (*assoc_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 channel, u8 cap);
+	int (*assoc_resp)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u16 short_addr, u8 status);
+	int (*disassoc_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 reason);
+	int (*start_req)(struct net_device *dev,
+			struct ieee802154_addr *addr,
+			u8 channel, u8 bcn_ord, u8 sf_ord,
+			u8 pan_coord, u8 blx, u8 coord_realign);
+	int (*scan_req)(struct net_device *dev,
+			u8 type, u32 channels, u8 duration);
+
+	/*
+	 * FIXME: these should become the part of PIB/MIB interface.
+	 * However we still don't have IB interface of any kind
+	 */
+	u16 (*get_pan_id)(struct net_device *dev);
+	u16 (*get_short_addr)(struct net_device *dev);
+	u8 (*get_dsn)(struct net_device *dev);
+	u8 (*get_bsn)(struct net_device *dev);
+};
+
+static inline struct ieee802154_mlme_ops *ieee802154_mlme_ops(
+		struct net_device *dev)
+{
+	return dev->ml_priv;
+}
+
+#endif
+
+
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
new file mode 100644
index 000000000000..6096096f6d7d
--- /dev/null
+++ b/include/net/nl802154.h
@@ -0,0 +1,117 @@
+/*
+ * nl802154.h
+ *
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef IEEE802154_NL_H
+#define IEEE802154_NL_H
+
+struct net_device;
+struct ieee802154_addr;
+
+/**
+ * ieee802154_nl_assoc_indic - Notify userland of an association request.
+ * @dev: The network device on which this association request was
+ *       received.
+ * @addr: The address of the device requesting association.
+ * @cap: The capability information field from the device.
+ *
+ * This informs a userland coordinator of a device requesting to
+ * associate with the PAN controlled by the coordinator.
+ *
+ * Note: This is in section 7.3.1 of the IEEE 802.15.4-2006 document.
+ */
+int ieee802154_nl_assoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 cap);
+
+/**
+ * ieee802154_nl_assoc_confirm - Notify userland of association.
+ * @dev: The device which has completed association.
+ * @short_addr: The short address assigned to the device.
+ * @status: The status of the association.
+ *
+ * Inform userland of the result of an association request. If the
+ * association request included asking the coordinator to allocate
+ * a short address then it is returned in @short_addr.
+ *
+ * Note: This is in section 7.3.2 of the IEEE 802.15.4 document.
+ */
+int ieee802154_nl_assoc_confirm(struct net_device *dev,
+		u16 short_addr, u8 status);
+
+/**
+ * ieee802154_nl_disassoc_indic - Notify userland of disassociation.
+ * @dev: The device on which disassociation was indicated.
+ * @addr: The device which is disassociating.
+ * @reason: The reason for the disassociation.
+ *
+ * Inform userland that a device has disassociated from the network.
+ *
+ * Note: This is in section 7.3.3 of the IEEE 802.15.4 document.
+ */
+int ieee802154_nl_disassoc_indic(struct net_device *dev,
+		struct ieee802154_addr *addr, u8 reason);
+
+/**
+ * ieee802154_nl_disassoc_confirm - Notify userland of disassociation
+ * completion.
+ * @dev: The device on which disassociation was ordered.
+ * @status: The result of the disassociation.
+ *
+ * Inform userland of the result of requesting that a device
+ * disassociate, or the result of requesting that we disassociate from
+ * a PAN managed by another coordinator.
+ *
+ * Note: This is in section 7.1.4.3 of the IEEE 802.15.4 document.
+ */
+int ieee802154_nl_disassoc_confirm(struct net_device *dev,
+		u8 status);
+
+/**
+ * ieee802154_nl_scan_confirm - Notify userland of completion of scan.
+ * @dev: The device which was instructed to scan.
+ * @status: The status of the scan operation.
+ * @scan_type: What type of scan was performed.
+ * @unscanned: Any channels that the device was unable to scan.
+ * @edl: The energy levels (if a passive scan).
+ *
+ *
+ * Note: This is in section 7.1.11 of the IEEE 802.15.4 document.
+ * Note: This API does not permit the return of an active scan result.
+ */
+int ieee802154_nl_scan_confirm(struct net_device *dev,
+		u8 status, u8 scan_type, u32 unscanned,
+		u8 *edl/*, struct list_head *pan_desc_list */);
+
+/**
+ * ieee802154_nl_beacon_indic - Notify userland of a received beacon.
+ * @dev: The device on which a beacon was received.
+ * @panid: The PAN of the coordinator.
+ * @coord_addr: The short address of the coordinator on that PAN.
+ *
+ * Note: This is in section 7.1.5 of the IEEE 802.15.4 document.
+ * Note: This API does not provide extended information such as what
+ * channel the PAN is on or what the LQI of the beacon frame was on
+ * receipt.
+ * Note: This API cannot indicate a beacon frame for a coordinator
+ *       operating in long addressing mode.
+ */
+int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid,
+		u16 coord_addr);
+
+#endif
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 3bb6bdb1dac1..69c8d9207aa7 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -34,8 +34,8 @@
 #include <net/tcp_states.h>
 #include <net/route.h>
 
-#include <net/ieee802154/af_ieee802154.h>
-#include <net/ieee802154/netdevice.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154_netdev.h>
 
 #include "af802154.h"
 
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 14d39840dd62..53dd912d52b4 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -26,9 +26,9 @@
 #include <linux/if_arp.h>
 #include <linux/list.h>
 #include <net/sock.h>
-#include <net/ieee802154/af_ieee802154.h>
-#include <net/ieee802154/mac_def.h>
-#include <net/ieee802154/netdevice.h>
+#include <net/af_ieee802154.h>
+#include <net/ieee802154.h>
+#include <net/ieee802154_netdev.h>
 
 #include <asm/ioctls.h>
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 27eda9fdf3c2..a615b9d13212 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -27,9 +27,9 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <linux/nl802154.h>
-#include <net/ieee802154/af_ieee802154.h>
-#include <net/ieee802154/nl802154.h>
-#include <net/ieee802154/netdevice.h>
+#include <net/af_ieee802154.h>
+#include <net/nl802154.h>
+#include <net/ieee802154_netdev.h>
 
 static unsigned int ieee802154_seq_num;
 
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index fca44d59f97e..ea8d1f15206e 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -26,7 +26,7 @@
 #include <linux/if_arp.h>
 #include <linux/list.h>
 #include <net/sock.h>
-#include <net/ieee802154/af_ieee802154.h>
+#include <net/af_ieee802154.h>
 
 #include "af802154.h"
 
-- 
cgit v1.2.3


From 5dea271b6d87bd1d79a59c1d5baac2596a841c37 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 23 Jul 2009 20:30:42 +0100
Subject: dm table: pass correct dev area size to device_area_is_valid

Incorrect device area lengths are being passed to device_area_is_valid().

The regression appeared in 2.6.31-rc1 through commit
754c5fc7ebb417b23601a6222a6005cc2e7f2913.

With the dm-stripe target, the size of the target (ti->len) was used
instead of the stripe_width (ti->len/#stripes).  An example of a
consequent incorrect error message is:

  device-mapper: table: 254:0: sdb too small for target

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-crypt.c         |  2 +-
 drivers/md/dm-delay.c         |  4 ++--
 drivers/md/dm-linear.c        |  2 +-
 drivers/md/dm-mpath.c         |  2 +-
 drivers/md/dm-raid1.c         |  2 +-
 drivers/md/dm-stripe.c        |  7 ++++---
 drivers/md/dm-table.c         | 10 +++++-----
 include/linux/device-mapper.h |  4 ++--
 8 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 529e2ba505c3..ed1038164019 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1318,7 +1318,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
 {
 	struct crypt_config *cc = ti->private;
 
-	return fn(ti, cc->dev, cc->start, data);
+	return fn(ti, cc->dev, cc->start, ti->len, data);
 }
 
 static struct target_type crypt_target = {
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 4e5b843cd4d7..ebe7381f47c8 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -324,12 +324,12 @@ static int delay_iterate_devices(struct dm_target *ti,
 	struct delay_c *dc = ti->private;
 	int ret = 0;
 
-	ret = fn(ti, dc->dev_read, dc->start_read, data);
+	ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data);
 	if (ret)
 		goto out;
 
 	if (dc->dev_write)
-		ret = fn(ti, dc->dev_write, dc->start_write, data);
+		ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
 
 out:
 	return ret;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 9184b6deb868..82f7d6e6b1ea 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -139,7 +139,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 {
 	struct linear_c *lc = ti->private;
 
-	return fn(ti, lc->dev, lc->start, data);
+	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
 static struct target_type linear_target = {
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c70604a20897..6f0d90d4a541 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1453,7 +1453,7 @@ static int multipath_iterate_devices(struct dm_target *ti,
 
 	list_for_each_entry(pg, &m->priority_groups, list) {
 		list_for_each_entry(p, &pg->pgpaths, list) {
-			ret = fn(ti, p->path.dev, ti->begin, data);
+			ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
 			if (ret)
 				goto out;
 		}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 42a3e4341d60..9726577cde49 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1293,7 +1293,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
 
 	for (i = 0; !ret && i < ms->nr_mirrors; i++)
 		ret = fn(ti, ms->mirror[i].dev,
-			 ms->mirror[i].offset, data);
+			 ms->mirror[i].offset, ti->len, data);
 
 	return ret;
 }
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index b240e85ae39a..4e0e5937e42a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -320,10 +320,11 @@ static int stripe_iterate_devices(struct dm_target *ti,
 	int ret = 0;
 	unsigned i = 0;
 
-	do
+	do {
 		ret = fn(ti, sc->stripe[i].dev,
-			 sc->stripe[i].physical_start, data);
-	while (!ret && ++i < sc->stripes);
+			 sc->stripe[i].physical_start,
+			 sc->stripe_width, data);
+	} while (!ret && ++i < sc->stripes);
 
 	return ret;
 }
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5d16d06613aa..d952b3441913 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -346,7 +346,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
  * If possible, this checks an area of a destination device is valid.
  */
 static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
-				sector_t start, void *data)
+				sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
@@ -359,7 +359,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
 	if (!dev_size)
 		return 1;
 
-	if ((start >= dev_size) || (start + ti->len > dev_size)) {
+	if ((start >= dev_size) || (start + len > dev_size)) {
 		DMWARN("%s: %s too small for target",
 		       dm_device_name(ti->table->md), bdevname(bdev, b));
 		return 0;
@@ -377,11 +377,11 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
 		return 0;
 	}
 
-	if (ti->len & (logical_block_size_sectors - 1)) {
+	if (len & (logical_block_size_sectors - 1)) {
 		DMWARN("%s: len=%llu not aligned to h/w "
 		       "logical block size %hu of %s",
 		       dm_device_name(ti->table->md),
-		       (unsigned long long)ti->len,
+		       (unsigned long long)len,
 		       limits->logical_block_size, bdevname(bdev, b));
 		return 0;
 	}
@@ -482,7 +482,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
 
 int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, void *data)
+			 sector_t start, sector_t len, void *data)
 {
 	struct queue_limits *limits = data;
 	struct block_device *bdev = dev->bdev;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0d6310657f32..655e7721580a 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -84,7 +84,7 @@ typedef int (*dm_merge_fn) (struct dm_target *ti, struct bvec_merge_data *bvm,
 
 typedef int (*iterate_devices_callout_fn) (struct dm_target *ti,
 					   struct dm_dev *dev,
-					   sector_t physical_start,
+					   sector_t start, sector_t len,
 					   void *data);
 
 typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
@@ -104,7 +104,7 @@ void dm_error(const char *message);
  * Combine device limits.
  */
 int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
-			 sector_t start, void *data);
+			 sector_t start, sector_t len, void *data);
 
 struct dm_dev {
 	struct block_device *bdev;
-- 
cgit v1.2.3


From d9ab77161d811ffb0bccf396f7155cc905c1b9e1 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 22 Jul 2009 00:37:25 +0200
Subject: Driver Core: Make PM operations a const pointer

They are not supposed to be modified by drivers, so make them const.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c   |  2 +-
 drivers/base/power/main.c |  8 +++++---
 drivers/pci/pci-driver.c  | 16 ++++++++--------
 include/linux/device.h    |  9 +++++----
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 455e55971d0e..ae5c4aa6d269 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -889,7 +889,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-static struct dev_pm_ops platform_dev_pm_ops = {
+static const struct dev_pm_ops platform_dev_pm_ops = {
 	.prepare = platform_pm_prepare,
 	.complete = platform_pm_complete,
 	.suspend = platform_pm_suspend,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 58a3e572f2c9..1b1a786b7dec 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -157,8 +157,9 @@ void device_pm_move_last(struct device *dev)
  *	@ops:	PM operations to choose from.
  *	@state:	PM transition of the system being carried out.
  */
-static int pm_op(struct device *dev, struct dev_pm_ops *ops,
-			pm_message_t state)
+static int pm_op(struct device *dev,
+		 const struct dev_pm_ops *ops,
+		 pm_message_t state)
 {
 	int error = 0;
 
@@ -220,7 +221,8 @@ static int pm_op(struct device *dev, struct dev_pm_ops *ops,
  *	The operation is executed with interrupts disabled by the only remaining
  *	functional CPU in the system.
  */
-static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops,
+static int pm_noirq_op(struct device *dev,
+			const struct dev_pm_ops *ops,
 			pm_message_t state)
 {
 	int error = 0;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d76c4c85367e..0c2ea44ae5e1 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -575,7 +575,7 @@ static void pci_pm_complete(struct device *dev)
 static int pci_pm_suspend(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_SUSPEND);
@@ -613,7 +613,7 @@ static int pci_pm_suspend(struct device *dev)
 static int pci_pm_suspend_noirq(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
@@ -672,7 +672,7 @@ static int pci_pm_resume_noirq(struct device *dev)
 static int pci_pm_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	/*
@@ -711,7 +711,7 @@ static int pci_pm_resume(struct device *dev)
 static int pci_pm_freeze(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_FREEZE);
@@ -780,7 +780,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
 static int pci_pm_thaw(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	if (pci_has_legacy_pm_support(pci_dev))
@@ -799,7 +799,7 @@ static int pci_pm_thaw(struct device *dev)
 static int pci_pm_poweroff(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_HIBERNATE);
@@ -872,7 +872,7 @@ static int pci_pm_restore_noirq(struct device *dev)
 static int pci_pm_restore(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	/*
@@ -910,7 +910,7 @@ static int pci_pm_restore(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-struct dev_pm_ops pci_dev_pm_ops = {
+const struct dev_pm_ops pci_dev_pm_ops = {
 	.prepare = pci_pm_prepare,
 	.complete = pci_pm_complete,
 	.suspend = pci_pm_suspend,
diff --git a/include/linux/device.h b/include/linux/device.h
index aebb81036db2..a28642975053 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,7 +62,7 @@ struct bus_type {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 
 	struct bus_type_private *p;
 };
@@ -132,7 +132,7 @@ struct device_driver {
 	int (*resume) (struct device *dev);
 	struct attribute_group **groups;
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 
 	struct driver_private *p;
 };
@@ -200,7 +200,8 @@ struct class {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
+
 	struct class_private *p;
 };
 
@@ -291,7 +292,7 @@ struct device_type {
 	char *(*nodename)(struct device *dev);
 	void (*release)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 };
 
 /* interface for exporting device attributes */
-- 
cgit v1.2.3


From c1dc13e9d0bc35a8d85bf4238c48c1b627d48f35 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Tue, 21 Jul 2009 01:57:57 +0000
Subject: Phonet: sockets list through proc_fs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides a list of sockets with their Phonet bind addresses and
some socket debug informations through /proc/net/phonet.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pn_dev.h |  2 +
 net/phonet/pn_dev.c         |  7 ++++
 net/phonet/socket.c         | 96 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+)

(limited to 'include')

diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h
index 29d126736611..44c923c9e21d 100644
--- a/include/net/phonet/pn_dev.h
+++ b/include/net/phonet/pn_dev.h
@@ -49,4 +49,6 @@ void phonet_address_notify(int event, struct net_device *dev, u8 addr);
 
 #define PN_NO_ADDR	0xff
 
+extern const struct file_operations pn_sock_seq_fops;
+
 #endif
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b0d6ddd82a9d..5107b7949c9c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -218,6 +218,11 @@ static int phonet_init_net(struct net *net)
 	if (!pnn)
 		return -ENOMEM;
 
+	if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops)) {
+		kfree(pnn);
+		return -ENOMEM;
+	}
+
 	INIT_LIST_HEAD(&pnn->pndevs.list);
 	spin_lock_init(&pnn->pndevs.lock);
 	net_assign_generic(net, phonet_net_id, pnn);
@@ -233,6 +238,8 @@ static void phonet_exit_net(struct net *net)
 	for_each_netdev(net, dev)
 		phonet_device_destroy(dev);
 	rtnl_unlock();
+
+	proc_net_remove(net, "phonet");
 	kfree(pnn);
 }
 
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ada2a35bf7a2..aa1617a7f265 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -412,3 +412,99 @@ found:
 	return 0;
 }
 EXPORT_SYMBOL(pn_sock_get_port);
+
+static struct sock *pn_sock_get_idx(struct seq_file *seq, loff_t pos)
+{
+	struct net *net = seq_file_net(seq);
+	struct hlist_node *node;
+	struct sock *sknode;
+
+	sk_for_each(sknode, node, &pnsocks.hlist) {
+		if (!net_eq(net, sock_net(sknode)))
+			continue;
+		if (!pos)
+			return sknode;
+		pos--;
+	}
+	return NULL;
+}
+
+static struct sock *pn_sock_get_next(struct seq_file *seq, struct sock *sk)
+{
+	struct net *net = seq_file_net(seq);
+
+	do
+		sk = sk_next(sk);
+	while (sk && !net_eq(net, sock_net(sk)));
+
+	return sk;
+}
+
+static void *pn_sock_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(pnsocks.lock)
+{
+	spin_lock_bh(&pnsocks.lock);
+	return *pos ? pn_sock_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+}
+
+static void *pn_sock_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct sock *sk;
+
+	if (v == SEQ_START_TOKEN)
+		sk = pn_sock_get_idx(seq, 0);
+	else
+		sk = pn_sock_get_next(seq, v);
+	(*pos)++;
+	return sk;
+}
+
+static void pn_sock_seq_stop(struct seq_file *seq, void *v)
+	__releases(pnsocks.lock)
+{
+	spin_unlock_bh(&pnsocks.lock);
+}
+
+static int pn_sock_seq_show(struct seq_file *seq, void *v)
+{
+	int len;
+
+	if (v == SEQ_START_TOKEN)
+		seq_printf(seq, "%s%n", "pt  loc  rem rs st tx_queue rx_queue "
+			"  uid inode ref pointer drops", &len);
+	else {
+		struct sock *sk = v;
+		struct pn_sock *pn = pn_sk(sk);
+
+		seq_printf(seq, "%2d %04X:%04X:%02X %02X %08X:%08X %5d %lu "
+			"%d %p %d%n",
+			sk->sk_protocol, pn->sobject, 0, pn->resource,
+			sk->sk_state,
+			sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk),
+			sock_i_uid(sk), sock_i_ino(sk),
+			atomic_read(&sk->sk_refcnt), sk,
+			atomic_read(&sk->sk_drops), &len);
+	}
+	seq_printf(seq, "%*s\n", 127 - len, "");
+	return 0;
+}
+
+static const struct seq_operations pn_sock_seq_ops = {
+	.start = pn_sock_seq_start,
+	.next = pn_sock_seq_next,
+	.stop = pn_sock_seq_stop,
+	.show = pn_sock_seq_show,
+};
+
+static int pn_sock_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &pn_sock_seq_ops);
+}
+
+const struct file_operations pn_sock_seq_fops = {
+	.owner = THIS_MODULE,
+	.open = pn_sock_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
-- 
cgit v1.2.3


From ab300465676b0c0559af62d57ec9a902f5680b03 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 24 Jul 2009 15:26:15 +0800
Subject: crypto: api - Fix aligned ctx helper

The aligned ctx helper was using a bogus alignment value thas was
one off the correct value.  Fortunately the current users do not
require anything beyond the natural alignment of the platform so
this hasn't caused a problem.

This patch fixes that and also removes the unnecessary minimum
check since if the alignment is less than the natural alignment
then the subsequent ALIGN operation should be a noop.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 9de6c38f4069..3f388146a914 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -175,12 +175,8 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc,
 
 static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
 {
-	unsigned long addr = (unsigned long)crypto_tfm_ctx(tfm);
-	unsigned long align = crypto_tfm_alg_alignmask(tfm);
-
-	if (align <= crypto_tfm_ctx_alignment())
-		align = 1;
-	return (void *)ALIGN(addr, align);
+	return PTR_ALIGN(crypto_tfm_ctx(tfm),
+			 crypto_tfm_alg_alignmask(tfm) + 1);
 }
 
 static inline struct crypto_instance *crypto_tfm_alg_instance(
-- 
cgit v1.2.3


From c94aa5ca3088018d2a7a9bd3258aefffe29df265 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: lockdep: Print the shortest dependency chain if finding a circle

Currently lockdep will print the 1st circle detected if it
exists when acquiring a new (next) lock.

This patch prints the shortest path from the next lock to be
acquired to the previous held lock if a circle is found.

The patch still uses the current method to check circle, and
once the circle is found, breadth-first search algorithem is
used to compute the shortest path from the next lock to the
previous lock in the forward lock dependency graph.

Printing the shortest path will shorten the dependency chain,
and make troubleshooting for possible circular locking easier.

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1246201486-7308-2-git-send-email-tom.leiming@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h    |   6 +++
 kernel/lockdep.c           | 115 +++++++++++++++++++++++++++++++++++++++++----
 kernel/lockdep_internals.h |  83 ++++++++++++++++++++++++++++++++
 3 files changed, 195 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b25d1b53df0d..9ec026f8d09e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -149,6 +149,12 @@ struct lock_list {
 	struct lock_class		*class;
 	struct stack_trace		trace;
 	int				distance;
+
+	/*The parent field is used to implement breadth-first search,and
+	 *the bit 0 is reused to indicate if the lock has been accessed
+	 *in BFS.
+	 */
+	struct lock_list		*parent;
 };
 
 /*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 8bbeef996c76..93dc70d18cdf 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -897,6 +897,79 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
 	return 1;
 }
 
+static struct circular_queue  lock_cq;
+static int __search_shortest_path(struct lock_list *source_entry,
+				struct lock_class *target,
+				struct lock_list **target_entry,
+				int forward)
+{
+	struct lock_list *entry;
+	struct circular_queue *cq = &lock_cq;
+	int ret = 1;
+
+	__cq_init(cq);
+
+	mark_lock_accessed(source_entry, NULL);
+	if (source_entry->class == target) {
+		*target_entry = source_entry;
+		ret = 0;
+		goto exit;
+	}
+
+	__cq_enqueue(cq, (unsigned long)source_entry);
+
+	while (!__cq_empty(cq)) {
+		struct lock_list *lock;
+		struct list_head *head;
+
+		__cq_dequeue(cq, (unsigned long *)&lock);
+
+		if (!lock->class) {
+			ret = -2;
+			goto exit;
+		}
+
+		if (forward)
+			head = &lock->class->locks_after;
+		else
+			head = &lock->class->locks_before;
+
+		list_for_each_entry(entry, head, entry) {
+			if (!lock_accessed(entry)) {
+				mark_lock_accessed(entry, lock);
+				if (entry->class == target) {
+					*target_entry = entry;
+					ret = 0;
+					goto exit;
+				}
+
+				if (__cq_enqueue(cq, (unsigned long)entry)) {
+					ret = -1;
+					goto exit;
+				}
+			}
+		}
+	}
+exit:
+	return ret;
+}
+
+static inline int __search_forward_shortest_path(struct lock_list *src_entry,
+				struct lock_class *target,
+				struct lock_list **target_entry)
+{
+	return __search_shortest_path(src_entry, target, target_entry, 1);
+
+}
+
+static inline int __search_backward_shortest_path(struct lock_list *src_entry,
+				struct lock_class *target,
+				struct lock_list **target_entry)
+{
+	return __search_shortest_path(src_entry, target, target_entry, 0);
+
+}
+
 /*
  * Recursive, forwards-direction lock-dependency checking, used for
  * both noncyclic checking and for hardirq-unsafe/softirq-unsafe
@@ -934,7 +1007,7 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
 {
 	struct task_struct *curr = current;
 
-	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
+	if (debug_locks_silent)
 		return 0;
 
 	printk("\n=======================================================\n");
@@ -954,19 +1027,41 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth)
 	return 0;
 }
 
-static noinline int print_circular_bug_tail(void)
+static noinline int print_circular_bug(void)
 {
 	struct task_struct *curr = current;
 	struct lock_list this;
+	struct lock_list *target;
+	struct lock_list *parent;
+	int result;
+	unsigned long depth;
 
-	if (debug_locks_silent)
+	if (!debug_locks_off_graph_unlock() || debug_locks_silent)
 		return 0;
 
 	this.class = hlock_class(check_source);
 	if (!save_trace(&this.trace))
 		return 0;
 
-	print_circular_bug_entry(&this, 0);
+	result = __search_forward_shortest_path(&this,
+						hlock_class(check_target),
+						&target);
+	if (result) {
+		printk("\n%s:search shortest path failed:%d\n", __func__,
+			result);
+		return 0;
+	}
+
+	depth = get_lock_depth(target);
+
+	print_circular_bug_header(target, depth);
+
+	parent = get_lock_parent(target);
+
+	while (parent) {
+		print_circular_bug_entry(parent, --depth);
+		parent = get_lock_parent(parent);
+	}
 
 	printk("\nother info that might help us debug this:\n\n");
 	lockdep_print_held_locks(curr);
@@ -1072,14 +1167,15 @@ check_noncircular(struct lock_class *source, unsigned int depth)
 	 */
 	list_for_each_entry(entry, &source->locks_after, entry) {
 		if (entry->class == hlock_class(check_target))
-			return print_circular_bug_header(entry, depth+1);
+			return 2;
 		debug_atomic_inc(&nr_cyclic_checks);
-		if (!check_noncircular(entry->class, depth+1))
-			return print_circular_bug_entry(entry, depth+1);
+		if (check_noncircular(entry->class, depth+1) == 2)
+			return 2;
 	}
 	return 1;
 }
 
+
 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
 /*
  * Forwards and backwards subgraph searching, for the purposes of
@@ -1484,8 +1580,9 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
 	 */
 	check_source = next;
 	check_target = prev;
-	if (!(check_noncircular(hlock_class(next), 0)))
-		return print_circular_bug_tail();
+	if (check_noncircular(hlock_class(next), 0) == 2)
+		return print_circular_bug();
+
 
 	if (!check_prev_add_irq(curr, prev, next))
 		return 0;
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 699a2ac3a0d7..6f48d37d5be2 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -136,3 +136,86 @@ extern atomic_t nr_find_usage_backwards_recursions;
 # define debug_atomic_dec(ptr)		do { } while (0)
 # define debug_atomic_read(ptr)		0
 #endif
+
+/* The circular_queue and helpers is used to implement the
+ * breadth-first search(BFS)algorithem, by which we can build
+ * the shortest path from the next lock to be acquired to the
+ * previous held lock if there is a circular between them.
+ * */
+#define  MAX_CIRCULAR_QUE_SIZE	    4096UL
+struct circular_queue{
+	unsigned long element[MAX_CIRCULAR_QUE_SIZE];
+	unsigned int  front, rear;
+};
+
+#define LOCK_ACCESSED 		1UL
+#define LOCK_ACCESSED_MASK	(~LOCK_ACCESSED)
+
+static inline void __cq_init(struct circular_queue *cq)
+{
+	cq->front = cq->rear = 0;
+}
+
+static inline int __cq_empty(struct circular_queue *cq)
+{
+	return (cq->front == cq->rear);
+}
+
+static inline int __cq_full(struct circular_queue *cq)
+{
+	return ((cq->rear + 1)%MAX_CIRCULAR_QUE_SIZE)  == cq->front;
+}
+
+static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
+{
+	if (__cq_full(cq))
+		return -1;
+
+	cq->element[cq->rear] = elem;
+	cq->rear = (cq->rear + 1)%MAX_CIRCULAR_QUE_SIZE;
+	return 0;
+}
+
+static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
+{
+	if (__cq_empty(cq))
+		return -1;
+
+	*elem = cq->element[cq->front];
+	cq->front = (cq->front + 1)%MAX_CIRCULAR_QUE_SIZE;
+	return 0;
+}
+
+static inline int __cq_get_elem_count(struct circular_queue *cq)
+{
+	return (cq->rear - cq->front)%MAX_CIRCULAR_QUE_SIZE;
+}
+
+static inline void mark_lock_accessed(struct lock_list *lock,
+					struct lock_list *parent)
+{
+	lock->parent = (void *) parent + LOCK_ACCESSED;
+}
+
+static inline unsigned long lock_accessed(struct lock_list *lock)
+{
+	return (unsigned long)lock->parent & LOCK_ACCESSED;
+}
+
+static inline struct lock_list *get_lock_parent(struct lock_list *child)
+{
+	return (struct lock_list *)
+		((unsigned long)child->parent & LOCK_ACCESSED_MASK);
+}
+
+static inline unsigned long get_lock_depth(struct lock_list *child)
+{
+	unsigned long depth = 0;
+	struct lock_list *parent;
+
+	while ((parent = get_lock_parent(child))) {
+		child = parent;
+		depth++;
+	}
+	return depth;
+}
-- 
cgit v1.2.3


From af012961450949ea297b209e091bd1a3805b8a0a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 16 Jul 2009 15:44:29 +0200
Subject: lockdep: BFS cleanup

Some cleanups of the lockdep code after the BFS series:

 - Remove the last traces of the generation id
 - Fixup comment style
 - Move the bfs routines into lockdep.c
 - Cleanup the bfs routines

[ tom.leiming@gmail.com: Fix crash ]
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1246201486-7308-11-git-send-email-tom.leiming@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h    |   7 +-
 kernel/lockdep.c           | 284 +++++++++++++++++++++++++++------------------
 kernel/lockdep_internals.h |  97 +---------------
 3 files changed, 175 insertions(+), 213 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 9ec026f8d09e..12aabfcb45f6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -58,7 +58,6 @@ struct lock_class {
 
 	struct lockdep_subclass_key	*key;
 	unsigned int			subclass;
-	unsigned int			dep_gen_id;
 
 	/*
 	 * IRQ/softirq usage tracking bits:
@@ -150,9 +149,9 @@ struct lock_list {
 	struct stack_trace		trace;
 	int				distance;
 
-	/*The parent field is used to implement breadth-first search,and
-	 *the bit 0 is reused to indicate if the lock has been accessed
-	 *in BFS.
+	/*
+	 * The parent field is used to implement breadth-first search, and the
+	 * bit 0 is reused to indicate if the lock has been accessed in BFS.
 	 */
 	struct lock_list		*parent;
 };
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 744da6265d99..1cedb00e3e7a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -43,6 +43,7 @@
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
 #include <linux/bitops.h>
+
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
@@ -118,7 +119,7 @@ static inline int debug_locks_off_graph_unlock(void)
 static int lockdep_initialized;
 
 unsigned long nr_list_entries;
-struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
+static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
 
 /*
  * All data structures here are protected by the global debug_lock.
@@ -390,19 +391,6 @@ unsigned int nr_process_chains;
 unsigned int max_lockdep_depth;
 unsigned int max_recursion_depth;
 
-static unsigned int lockdep_dependency_gen_id;
-
-static bool lockdep_dependency_visit(struct lock_class *source,
-				     unsigned int depth)
-{
-	if (!depth)
-		lockdep_dependency_gen_id++;
-	if (source->dep_gen_id == lockdep_dependency_gen_id)
-		return true;
-	source->dep_gen_id = lockdep_dependency_gen_id;
-	return false;
-}
-
 #ifdef CONFIG_DEBUG_LOCKDEP
 /*
  * We cannot printk in early bootup code. Not even early_printk()
@@ -551,88 +539,6 @@ static void lockdep_print_held_locks(struct task_struct *curr)
 	}
 }
 
-static void print_lock_class_header(struct lock_class *class, int depth)
-{
-	int bit;
-
-	printk("%*s->", depth, "");
-	print_lock_name(class);
-	printk(" ops: %lu", class->ops);
-	printk(" {\n");
-
-	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
-		if (class->usage_mask & (1 << bit)) {
-			int len = depth;
-
-			len += printk("%*s   %s", depth, "", usage_str[bit]);
-			len += printk(" at:\n");
-			print_stack_trace(class->usage_traces + bit, len);
-		}
-	}
-	printk("%*s }\n", depth, "");
-
-	printk("%*s ... key      at: ",depth,"");
-	print_ip_sym((unsigned long)class->key);
-}
-
-/*
- * printk the shortest lock dependencies from @start to @end in reverse order:
- */
-static void __used
-print_shortest_lock_dependencies(struct lock_list *leaf,
-				struct lock_list *root)
-{
-	struct lock_list *entry = leaf;
-	int depth;
-
-	/*compute depth from generated tree by BFS*/
-	depth = get_lock_depth(leaf);
-
-	do {
-		print_lock_class_header(entry->class, depth);
-		printk("%*s ... acquired at:\n", depth, "");
-		print_stack_trace(&entry->trace, 2);
-		printk("\n");
-
-		if (depth == 0 && (entry != root)) {
-			printk("lockdep:%s bad BFS generated tree\n", __func__);
-			break;
-		}
-
-		entry = get_lock_parent(entry);
-		depth--;
-	} while (entry && (depth >= 0));
-
-	return;
-}
-/*
- * printk all lock dependencies starting at <entry>:
- */
-static void __used
-print_lock_dependencies(struct lock_class *class, int depth)
-{
-	struct lock_list *entry;
-
-	if (lockdep_dependency_visit(class, depth))
-		return;
-
-	if (DEBUG_LOCKS_WARN_ON(depth >= 20))
-		return;
-
-	print_lock_class_header(class, depth);
-
-	list_for_each_entry(entry, &class->locks_after, entry) {
-		if (DEBUG_LOCKS_WARN_ON(!entry->class))
-			return;
-
-		print_lock_dependencies(entry->class, depth + 1);
-
-		printk("%*s ... acquired at:\n",depth,"");
-		print_stack_trace(&entry->trace, 2);
-		printk("\n");
-	}
-}
-
 static void print_kernel_version(void)
 {
 	printk("%s %.*s\n", init_utsname()->release,
@@ -927,14 +833,106 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
 	return 1;
 }
 
-unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)];
-static struct circular_queue  lock_cq;
+/*For good efficiency of modular, we use power of 2*/
+#define MAX_CIRCULAR_QUEUE_SIZE		4096UL
+#define CQ_MASK				(MAX_CIRCULAR_QUEUE_SIZE-1)
+
+/* The circular_queue and helpers is used to implement the
+ * breadth-first search(BFS)algorithem, by which we can build
+ * the shortest path from the next lock to be acquired to the
+ * previous held lock if there is a circular between them.
+ * */
+struct circular_queue {
+	unsigned long element[MAX_CIRCULAR_QUEUE_SIZE];
+	unsigned int  front, rear;
+};
+
+static struct circular_queue lock_cq;
+static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)];
+
 unsigned int max_bfs_queue_depth;
+
+static inline void __cq_init(struct circular_queue *cq)
+{
+	cq->front = cq->rear = 0;
+	bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES);
+}
+
+static inline int __cq_empty(struct circular_queue *cq)
+{
+	return (cq->front == cq->rear);
+}
+
+static inline int __cq_full(struct circular_queue *cq)
+{
+	return ((cq->rear + 1) & CQ_MASK) == cq->front;
+}
+
+static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
+{
+	if (__cq_full(cq))
+		return -1;
+
+	cq->element[cq->rear] = elem;
+	cq->rear = (cq->rear + 1) & CQ_MASK;
+	return 0;
+}
+
+static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
+{
+	if (__cq_empty(cq))
+		return -1;
+
+	*elem = cq->element[cq->front];
+	cq->front = (cq->front + 1) & CQ_MASK;
+	return 0;
+}
+
+static inline unsigned int  __cq_get_elem_count(struct circular_queue *cq)
+{
+	return (cq->rear - cq->front) & CQ_MASK;
+}
+
+static inline void mark_lock_accessed(struct lock_list *lock,
+					struct lock_list *parent)
+{
+	unsigned long nr;
+	nr = lock - list_entries;
+	WARN_ON(nr >= nr_list_entries);
+	lock->parent = parent;
+	set_bit(nr, bfs_accessed);
+}
+
+static inline unsigned long lock_accessed(struct lock_list *lock)
+{
+	unsigned long nr;
+	nr = lock - list_entries;
+	WARN_ON(nr >= nr_list_entries);
+	return test_bit(nr, bfs_accessed);
+}
+
+static inline struct lock_list *get_lock_parent(struct lock_list *child)
+{
+	return child->parent;
+}
+
+static inline int get_lock_depth(struct lock_list *child)
+{
+	int depth = 0;
+	struct lock_list *parent;
+
+	while ((parent = get_lock_parent(child))) {
+		child = parent;
+		depth++;
+	}
+	return depth;
+}
+
 static int __bfs(struct lock_list *source_entry,
-			void *data,
-			int (*match)(struct lock_list *entry, void *data),
-			struct lock_list **target_entry,
-			int forward)
+		 void *data,
+		 int (*match)(struct lock_list *entry, void *data),
+		 struct lock_list **target_entry,
+		 int forward)
 {
 	struct lock_list *entry;
 	struct list_head *head;
@@ -1202,14 +1200,6 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
  * without creating any illegal irq-safe -> irq-unsafe lock dependency.
  */
 
-
-#define   BFS_PROCESS_RET(ret)	do { \
-					if (ret < 0) \
-						return print_bfs_bug(ret); \
-					if (ret == 1) \
-						return 1; \
-				} while (0)
-
 static inline int usage_match(struct lock_list *entry, void *bit)
 {
 	return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit);
@@ -1263,6 +1253,60 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
 	return result;
 }
 
+static void print_lock_class_header(struct lock_class *class, int depth)
+{
+	int bit;
+
+	printk("%*s->", depth, "");
+	print_lock_name(class);
+	printk(" ops: %lu", class->ops);
+	printk(" {\n");
+
+	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
+		if (class->usage_mask & (1 << bit)) {
+			int len = depth;
+
+			len += printk("%*s   %s", depth, "", usage_str[bit]);
+			len += printk(" at:\n");
+			print_stack_trace(class->usage_traces + bit, len);
+		}
+	}
+	printk("%*s }\n", depth, "");
+
+	printk("%*s ... key      at: ",depth,"");
+	print_ip_sym((unsigned long)class->key);
+}
+
+/*
+ * printk the shortest lock dependencies from @start to @end in reverse order:
+ */
+static void __used
+print_shortest_lock_dependencies(struct lock_list *leaf,
+				struct lock_list *root)
+{
+	struct lock_list *entry = leaf;
+	int depth;
+
+	/*compute depth from generated tree by BFS*/
+	depth = get_lock_depth(leaf);
+
+	do {
+		print_lock_class_header(entry->class, depth);
+		printk("%*s ... acquired at:\n", depth, "");
+		print_stack_trace(&entry->trace, 2);
+		printk("\n");
+
+		if (depth == 0 && (entry != root)) {
+			printk("lockdep:%s bad BFS generated tree\n", __func__);
+			break;
+		}
+
+		entry = get_lock_parent(entry);
+		depth--;
+	} while (entry && (depth >= 0));
+
+	return;
+}
 
 static int
 print_bad_irq_dependency(struct task_struct *curr,
@@ -1349,12 +1393,18 @@ check_usage(struct task_struct *curr, struct held_lock *prev,
 
 	this.class = hlock_class(prev);
 	ret = find_usage_backwards(&this, bit_backwards, &target_entry);
-	BFS_PROCESS_RET(ret);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (ret == 1)
+		return ret;
 
 	that.parent = NULL;
 	that.class = hlock_class(next);
 	ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
-	BFS_PROCESS_RET(ret);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (ret == 1)
+		return ret;
 
 	return print_bad_irq_dependency(curr, &this, &that,
 			target_entry, target_entry1,
@@ -2038,7 +2088,10 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
 	root.parent = NULL;
 	root.class = hlock_class(this);
 	ret = find_usage_forwards(&root, bit, &target_entry);
-	BFS_PROCESS_RET(ret);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (ret == 1)
+		return ret;
 
 	return print_irq_inversion_bug(curr, &root, target_entry,
 					this, 1, irqclass);
@@ -2059,7 +2112,10 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
 	root.parent = NULL;
 	root.class = hlock_class(this);
 	ret = find_usage_backwards(&root, bit, &target_entry);
-	BFS_PROCESS_RET(ret);
+	if (ret < 0)
+		return print_bfs_bug(ret);
+	if (ret == 1)
+		return ret;
 
 	return print_irq_inversion_bug(curr, &root, target_entry,
 					this, 1, irqclass);
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index 6baa8807efdd..a2ee95ad1313 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -91,6 +91,8 @@ extern unsigned int nr_process_chains;
 extern unsigned int max_lockdep_depth;
 extern unsigned int max_recursion_depth;
 
+extern unsigned int max_bfs_queue_depth;
+
 #ifdef CONFIG_PROVE_LOCKING
 extern unsigned long lockdep_count_forward_deps(struct lock_class *);
 extern unsigned long lockdep_count_backward_deps(struct lock_class *);
@@ -136,98 +138,3 @@ extern atomic_t nr_find_usage_backwards_recursions;
 # define debug_atomic_dec(ptr)		do { } while (0)
 # define debug_atomic_read(ptr)		0
 #endif
-
-
-extern unsigned int max_bfs_queue_depth;
-extern unsigned long nr_list_entries;
-extern struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
-extern unsigned long bfs_accessed[];
-
-/*For good efficiency of modular, we use power of 2*/
-#define  MAX_CIRCULAR_QUE_SIZE	    4096UL
-
-/* The circular_queue and helpers is used to implement the
- * breadth-first search(BFS)algorithem, by which we can build
- * the shortest path from the next lock to be acquired to the
- * previous held lock if there is a circular between them.
- * */
-struct circular_queue{
-	unsigned long element[MAX_CIRCULAR_QUE_SIZE];
-	unsigned int  front, rear;
-};
-
-static inline void __cq_init(struct circular_queue *cq)
-{
-	cq->front = cq->rear = 0;
-	bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES);
-}
-
-static inline int __cq_empty(struct circular_queue *cq)
-{
-	return (cq->front == cq->rear);
-}
-
-static inline int __cq_full(struct circular_queue *cq)
-{
-	return ((cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1))  == cq->front;
-}
-
-static inline int __cq_enqueue(struct circular_queue *cq, unsigned long elem)
-{
-	if (__cq_full(cq))
-		return -1;
-
-	cq->element[cq->rear] = elem;
-	cq->rear = (cq->rear + 1)&(MAX_CIRCULAR_QUE_SIZE-1);
-	return 0;
-}
-
-static inline int __cq_dequeue(struct circular_queue *cq, unsigned long *elem)
-{
-	if (__cq_empty(cq))
-		return -1;
-
-	*elem = cq->element[cq->front];
-	cq->front = (cq->front + 1)&(MAX_CIRCULAR_QUE_SIZE-1);
-	return 0;
-}
-
-static inline unsigned int  __cq_get_elem_count(struct circular_queue *cq)
-{
-	return (cq->rear - cq->front)&(MAX_CIRCULAR_QUE_SIZE-1);
-}
-
-static inline void mark_lock_accessed(struct lock_list *lock,
-					struct lock_list *parent)
-{
-	unsigned long nr;
-	nr = lock - list_entries;
-	WARN_ON(nr >= nr_list_entries);
-	lock->parent = parent;
-	set_bit(nr, bfs_accessed);
-}
-
-static inline unsigned long lock_accessed(struct lock_list *lock)
-{
-	unsigned long nr;
-	nr = lock - list_entries;
-	WARN_ON(nr >= nr_list_entries);
-	return test_bit(nr, bfs_accessed);
-}
-
-static inline struct lock_list *get_lock_parent(struct lock_list *child)
-{
-	return child->parent;
-}
-
-static inline int get_lock_depth(struct lock_list *child)
-{
-	int depth = 0;
-	struct lock_list *parent;
-
-	while ((parent = get_lock_parent(child))) {
-		child = parent;
-		depth++;
-	}
-	return depth;
-}
-- 
cgit v1.2.3


From 26e744b6b61066203fd57de0d3962353621e06f8 Mon Sep 17 00:00:00 2001
From: Brian Johnson <brijohn@gmail.com>
Date: Sun, 19 Jul 2009 05:52:58 -0300
Subject: V4L/DVB (12283): gspca - sn9c20x: New subdriver for sn9c201 and
 sn9c202 bridges.

Signed-off-by: Brian Johnson <brijohn@gmail.com>
Signed-off-by: Jean-Francois Moine <moinejf@free.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/gspca.txt |   32 +
 drivers/media/video/gspca/Kconfig   |   16 +
 drivers/media/video/gspca/Makefile  |    2 +
 drivers/media/video/gspca/sn9c20x.c | 2433 +++++++++++++++++++++++++++++++++++
 include/linux/videodev2.h           |    1 +
 include/media/v4l2-chip-ident.h     |   12 +
 6 files changed, 2496 insertions(+)
 create mode 100644 drivers/media/video/gspca/sn9c20x.c

(limited to 'include')

diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 2bcf78896e22..573f95b58807 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -44,7 +44,9 @@ zc3xx		0458:7007	Genius VideoCam V2
 zc3xx		0458:700c	Genius VideoCam V3
 zc3xx		0458:700f	Genius VideoCam Web V2
 sonixj		0458:7025	Genius Eye 311Q
+sn9c20x		0458:7029	Genius Look 320s
 sonixj		0458:702e	Genius Slim 310 NB
+sn9c20x		045e:00f4	LifeCam VX-6000 (SN9C20x + OV9650)
 sonixj		045e:00f5	MicroSoft VX3000
 sonixj		045e:00f7	MicroSoft VX1000
 ov519		045e:028c	Micro$oft xbox cam
@@ -282,6 +284,28 @@ sonixj		0c45:613a	Microdia Sonix PC Camera
 sonixj		0c45:613b	Surfer SN-206
 sonixj		0c45:613c	Sonix Pccam168
 sonixj		0c45:6143	Sonix Pccam168
+sn9c20x		0c45:6240	PC Camera (SN9C201 + MT9M001)
+sn9c20x		0c45:6242	PC Camera (SN9C201 + MT9M111)
+sn9c20x		0c45:6248	PC Camera (SN9C201 + OV9655)
+sn9c20x		0c45:624e	PC Camera (SN9C201 + SOI968)
+sn9c20x		0c45:624f	PC Camera (SN9C201 + OV9650)
+sn9c20x		0c45:6251	PC Camera (SN9C201 + OV9650)
+sn9c20x		0c45:6253	PC Camera (SN9C201 + OV9650)
+sn9c20x		0c45:6260	PC Camera (SN9C201 + OV7670)
+sn9c20x		0c45:6270	PC Camera (SN9C201 + MT9V011/MT9V111/MT9V112)
+sn9c20x		0c45:627b	PC Camera (SN9C201 + OV7660)
+sn9c20x		0c45:627c	PC Camera (SN9C201 + HV7131R)
+sn9c20x		0c45:627f	PC Camera (SN9C201 + OV9650)
+sn9c20x		0c45:6280	PC Camera (SN9C202 + MT9M001)
+sn9c20x		0c45:6282	PC Camera (SN9C202 + MT9M111)
+sn9c20x		0c45:6288	PC Camera (SN9C202 + OV9655)
+sn9c20x		0c45:628e	PC Camera (SN9C202 + SOI968)
+sn9c20x		0c45:628f	PC Camera (SN9C202 + OV9650)
+sn9c20x		0c45:62a0	PC Camera (SN9C202 + OV7670)
+sn9c20x		0c45:62b0	PC Camera (SN9C202 + MT9V011/MT9V111/MT9V112)
+sn9c20x		0c45:62b3	PC Camera (SN9C202 + OV9655)
+sn9c20x		0c45:62bb	PC Camera (SN9C202 + OV7660)
+sn9c20x		0c45:62bc	PC Camera (SN9C202 + HV7131R)
 sunplus		0d64:0303	Sunplus FashionCam DXG
 etoms		102c:6151	Qcam Sangha CIF
 etoms		102c:6251	Qcam xxxxxx VGA
@@ -290,6 +314,7 @@ spca561		10fd:7e50	FlyCam Usb 100
 zc3xx		10fd:8050	Typhoon Webshot II USB 300k
 ov534		1415:2000	Sony HD Eye for PS3 (SLEH 00201)
 pac207		145f:013a	Trust WB-1300N
+sn9c20x		145f:013d	Trust WB-3600R
 vc032x		15b8:6001	HP 2.0 Megapixel
 vc032x		15b8:6002	HP 2.0 Megapixel rz406aa
 spca501		1776:501c	Arowana 300K CMOS Camera
@@ -300,4 +325,11 @@ spca500		2899:012c	Toptro Industrial
 spca508		8086:0110	Intel Easy PC Camera
 spca500		8086:0630	Intel Pocket PC Camera
 spca506		99fa:8988	Grandtec V.cap
+sn9c20x		a168:0610	Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x		a168:0611	Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x		a168:0613	Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x		a168:0618	Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x		a168:0614	Dino-Lite Digital Microscope (SN9C201 + MT9M111)
+sn9c20x		a168:0615	Dino-Lite Digital Microscope (SN9C201 + MT9M111)
+sn9c20x		a168:0617	Dino-Lite Digital Microscope (SN9C201 + MT9M111)
 spca561		abcd:cdee	Petcam
diff --git a/drivers/media/video/gspca/Kconfig b/drivers/media/video/gspca/Kconfig
index 578dc4ffc965..34f46f2bc040 100644
--- a/drivers/media/video/gspca/Kconfig
+++ b/drivers/media/video/gspca/Kconfig
@@ -102,6 +102,22 @@ config USB_GSPCA_PAC7311
 	  To compile this driver as a module, choose M here: the
 	  module will be called gspca_pac7311.
 
+config USB_GSPCA_SN9C20X
+       tristate "SN9C20X USB Camera Driver"
+       depends on VIDEO_V4L2 && USB_GSPCA
+       help
+	 Say Y here if you want support for cameras based on the
+	 sn9c20x chips (SN9C201 and SN9C202).
+
+	 To compile this driver as a module, choose M here: the
+	 module will be called gspca_sn9c20x.
+
+config USB_GSPCA_SN9C20X_EVDEV
+       bool "Enable evdev support"
+       depends on USB_GSPCA_SN9C20X
+       ---help---
+	 Say Y here in order to enable evdev support for sn9c20x webcam button.
+
 config USB_GSPCA_SONIXB
 	tristate "SONIX Bayer USB Camera Driver"
 	depends on VIDEO_V4L2 && USB_GSPCA
diff --git a/drivers/media/video/gspca/Makefile b/drivers/media/video/gspca/Makefile
index 8a6643e8eb96..f6d3b86e9ad5 100644
--- a/drivers/media/video/gspca/Makefile
+++ b/drivers/media/video/gspca/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_USB_GSPCA_OV519)    += gspca_ov519.o
 obj-$(CONFIG_USB_GSPCA_OV534)    += gspca_ov534.o
 obj-$(CONFIG_USB_GSPCA_PAC207)   += gspca_pac207.o
 obj-$(CONFIG_USB_GSPCA_PAC7311)  += gspca_pac7311.o
+obj-$(CONFIG_USB_GSPCA_SN9C20X)  += gspca_sn9c20x.o
 obj-$(CONFIG_USB_GSPCA_SONIXB)   += gspca_sonixb.o
 obj-$(CONFIG_USB_GSPCA_SONIXJ)   += gspca_sonixj.o
 obj-$(CONFIG_USB_GSPCA_SPCA500)  += gspca_spca500.o
@@ -35,6 +36,7 @@ gspca_ov519-objs    := ov519.o
 gspca_ov534-objs    := ov534.o
 gspca_pac207-objs   := pac207.o
 gspca_pac7311-objs  := pac7311.o
+gspca_sn9c20x-objs  := sn9c20x.o
 gspca_sonixb-objs   := sonixb.o
 gspca_sonixj-objs   := sonixj.o
 gspca_spca500-objs  := spca500.o
diff --git a/drivers/media/video/gspca/sn9c20x.c b/drivers/media/video/gspca/sn9c20x.c
new file mode 100644
index 000000000000..78ab26ceb90e
--- /dev/null
+++ b/drivers/media/video/gspca/sn9c20x.c
@@ -0,0 +1,2433 @@
+/*
+ *	Sonix sn9c201 sn9c202 library
+ *	Copyright (C) 2008-2009 microdia project <microdia@googlegroups.com>
+ *	Copyright (C) 2009 Brian Johnson <brijohn@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include "gspca.h"
+#include "jpeg.h"
+
+#include <media/v4l2-chip-ident.h>
+#ifdef CONFIG_USB_GSPCA_SN9C20X_EVDEV
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+#include <linux/usb/input.h>
+#include <linux/input.h>
+#endif
+
+MODULE_AUTHOR("Brian Johnson <brijohn@gmail.com>, "
+		"microdia project <microdia@googlegroups.com>");
+MODULE_DESCRIPTION("GSPCA/SN9C20X USB Camera Driver");
+MODULE_LICENSE("GPL");
+
+#define MODULE_NAME "sn9c20x"
+
+#define MODE_RAW	0x10
+#define MODE_JPEG	0x20
+#define MODE_SXGA	0x80
+
+#define SENSOR_OV9650	0
+#define SENSOR_OV9655	1
+#define SENSOR_SOI968	2
+#define SENSOR_OV7660	3
+#define SENSOR_OV7670	4
+#define SENSOR_MT9V011	5
+#define SENSOR_MT9V111	6
+#define SENSOR_MT9V112	7
+#define SENSOR_MT9M001	8
+#define SENSOR_MT9M111	9
+#define SENSOR_HV7131R	10
+#define SENSOR_MT9VPRB	20
+
+/* specific webcam descriptor */
+struct sd {
+	struct gspca_dev gspca_dev;
+
+#define MIN_AVG_LUM 80
+#define MAX_AVG_LUM 130
+	atomic_t avg_lum;
+	u8 old_step;
+	u8 older_step;
+	u8 exposure_step;
+
+	u8 brightness;
+	u8 contrast;
+	u8 saturation;
+	s16 hue;
+	u8 gamma;
+	u8 red;
+	u8 blue;
+
+	u8 hflip;
+	u8 vflip;
+	u8 gain;
+	u16 exposure;
+	u8 auto_exposure;
+
+	u8 i2c_addr;
+	u8 sensor;
+	u8 hstart;
+	u8 vstart;
+
+	u8 *jpeg_hdr;
+	u8 quality;
+
+#ifdef CONFIG_USB_GSPCA_SN9C20X_EVDEV
+	struct input_dev *input_dev;
+	u8 input_gpio;
+	struct task_struct *input_task;
+#endif
+};
+
+static int sd_setbrightness(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getbrightness(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setcontrast(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getcontrast(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setsaturation(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getsaturation(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_sethue(struct gspca_dev *gspca_dev, s32 val);
+static int sd_gethue(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setgamma(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getgamma(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setredbalance(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getredbalance(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setbluebalance(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getbluebalance(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setvflip(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getvflip(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_sethflip(struct gspca_dev *gspca_dev, s32 val);
+static int sd_gethflip(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setgain(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getgain(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setexposure(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getexposure(struct gspca_dev *gspca_dev, s32 *val);
+static int sd_setautoexposure(struct gspca_dev *gspca_dev, s32 val);
+static int sd_getautoexposure(struct gspca_dev *gspca_dev, s32 *val);
+
+static struct ctrl sd_ctrls[] = {
+	{
+#define BRIGHTNESS_IDX 0
+	    {
+		.id      = V4L2_CID_BRIGHTNESS,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Brightness",
+		.minimum = 0,
+		.maximum = 0xff,
+		.step    = 1,
+#define BRIGHTNESS_DEFAULT 0x7f
+		.default_value = BRIGHTNESS_DEFAULT,
+	    },
+	    .set = sd_setbrightness,
+	    .get = sd_getbrightness,
+	},
+	{
+#define CONTRAST_IDX 1
+	    {
+		.id      = V4L2_CID_CONTRAST,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Contrast",
+		.minimum = 0,
+		.maximum = 0xff,
+		.step    = 1,
+#define CONTRAST_DEFAULT 0x7f
+		.default_value = CONTRAST_DEFAULT,
+	    },
+	    .set = sd_setcontrast,
+	    .get = sd_getcontrast,
+	},
+	{
+#define SATURATION_IDX 2
+	    {
+		.id      = V4L2_CID_SATURATION,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Saturation",
+		.minimum = 0,
+		.maximum = 0xff,
+		.step    = 1,
+#define SATURATION_DEFAULT 0x7f
+		.default_value = SATURATION_DEFAULT,
+	    },
+	    .set = sd_setsaturation,
+	    .get = sd_getsaturation,
+	},
+	{
+#define HUE_IDX 3
+	    {
+		.id      = V4L2_CID_HUE,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Hue",
+		.minimum = -180,
+		.maximum = 180,
+		.step    = 1,
+#define HUE_DEFAULT 0
+		.default_value = HUE_DEFAULT,
+	    },
+	    .set = sd_sethue,
+	    .get = sd_gethue,
+	},
+	{
+#define GAMMA_IDX 4
+	    {
+		.id      = V4L2_CID_GAMMA,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Gamma",
+		.minimum = 0,
+		.maximum = 0xff,
+		.step    = 1,
+#define GAMMA_DEFAULT 0x10
+		.default_value = GAMMA_DEFAULT,
+	    },
+	    .set = sd_setgamma,
+	    .get = sd_getgamma,
+	},
+	{
+#define BLUE_IDX 5
+	    {
+		.id	 = V4L2_CID_BLUE_BALANCE,
+		.type	 = V4L2_CTRL_TYPE_INTEGER,
+		.name	 = "Blue Balance",
+		.minimum = 0,
+		.maximum = 0x7f,
+		.step	 = 1,
+#define BLUE_DEFAULT 0x28
+		.default_value = BLUE_DEFAULT,
+	    },
+	    .set = sd_setbluebalance,
+	    .get = sd_getbluebalance,
+	},
+	{
+#define RED_IDX 6
+	    {
+		.id	 = V4L2_CID_RED_BALANCE,
+		.type	 = V4L2_CTRL_TYPE_INTEGER,
+		.name	 = "Red Balance",
+		.minimum = 0,
+		.maximum = 0x7f,
+		.step	 = 1,
+#define RED_DEFAULT 0x28
+		.default_value = RED_DEFAULT,
+	    },
+	    .set = sd_setredbalance,
+	    .get = sd_getredbalance,
+	},
+	{
+#define HFLIP_IDX 7
+	    {
+		.id      = V4L2_CID_HFLIP,
+		.type    = V4L2_CTRL_TYPE_BOOLEAN,
+		.name    = "Horizontal Flip",
+		.minimum = 0,
+		.maximum = 1,
+		.step    = 1,
+#define HFLIP_DEFAULT 0
+		.default_value = HFLIP_DEFAULT,
+	    },
+	    .set = sd_sethflip,
+	    .get = sd_gethflip,
+	},
+	{
+#define VFLIP_IDX 8
+	    {
+		.id      = V4L2_CID_VFLIP,
+		.type    = V4L2_CTRL_TYPE_BOOLEAN,
+		.name    = "Vertical Flip",
+		.minimum = 0,
+		.maximum = 1,
+		.step    = 1,
+#define VFLIP_DEFAULT 0
+		.default_value = VFLIP_DEFAULT,
+	    },
+	    .set = sd_setvflip,
+	    .get = sd_getvflip,
+	},
+	{
+#define EXPOSURE_IDX 9
+	    {
+		.id      = V4L2_CID_EXPOSURE,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Exposure",
+		.minimum = 0,
+		.maximum = 0x1780,
+		.step    = 1,
+#define EXPOSURE_DEFAULT 0x33
+		.default_value = EXPOSURE_DEFAULT,
+	    },
+	    .set = sd_setexposure,
+	    .get = sd_getexposure,
+	},
+	{
+#define GAIN_IDX 10
+	    {
+		.id      = V4L2_CID_GAIN,
+		.type    = V4L2_CTRL_TYPE_INTEGER,
+		.name    = "Gain",
+		.minimum = 0,
+		.maximum = 28,
+		.step    = 1,
+#define GAIN_DEFAULT 0x00
+		.default_value = GAIN_DEFAULT,
+	    },
+	    .set = sd_setgain,
+	    .get = sd_getgain,
+	},
+	{
+#define AUTOGAIN_IDX 11
+	    {
+		.id      = V4L2_CID_AUTOGAIN,
+		.type    = V4L2_CTRL_TYPE_BOOLEAN,
+		.name    = "Auto Exposure",
+		.minimum = 0,
+		.maximum = 1,
+		.step    = 1,
+#define AUTO_EXPOSURE_DEFAULT 1
+		.default_value = AUTO_EXPOSURE_DEFAULT,
+	    },
+	    .set = sd_setautoexposure,
+	    .get = sd_getautoexposure,
+	},
+};
+
+static const struct v4l2_pix_format vga_mode[] = {
+	{160, 120, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 240,
+		.sizeimage = 240 * 120,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0 | MODE_JPEG},
+	{160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 160,
+		.sizeimage = 160 * 120,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0 | MODE_RAW},
+	{160, 120, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 240,
+		.sizeimage = 240 * 120,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 480,
+		.sizeimage = 480 * 240 ,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1 | MODE_JPEG},
+	{320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 ,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 1 | MODE_RAW},
+	{320, 240, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 480,
+		.sizeimage = 480 * 240 ,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 1},
+	{640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 960,
+		.sizeimage = 960 * 480,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 2 | MODE_JPEG},
+	{640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 2 | MODE_RAW},
+	{640, 480, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 960,
+		.sizeimage = 960 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 2},
+};
+
+static const struct v4l2_pix_format sxga_mode[] = {
+	{160, 120, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 240,
+		.sizeimage = 240 * 120,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 0 | MODE_JPEG},
+	{160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 160,
+		.sizeimage = 160 * 120,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0 | MODE_RAW},
+	{160, 120, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 240,
+		.sizeimage = 240 * 120,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 0},
+	{320, 240, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 480,
+		.sizeimage = 480 * 240 ,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 1 | MODE_JPEG},
+	{320, 240, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 320,
+		.sizeimage = 320 * 240 ,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 1 | MODE_RAW},
+	{320, 240, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 480,
+		.sizeimage = 480 * 240 ,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 1},
+	{640, 480, V4L2_PIX_FMT_JPEG, V4L2_FIELD_NONE,
+		.bytesperline = 960,
+		.sizeimage = 960 * 480,
+		.colorspace = V4L2_COLORSPACE_JPEG,
+		.priv = 2 | MODE_JPEG},
+	{640, 480, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 640,
+		.sizeimage = 640 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 2 | MODE_RAW},
+	{640, 480, V4L2_PIX_FMT_SN9C20X_I420, V4L2_FIELD_NONE,
+		.bytesperline = 960,
+		.sizeimage = 960 * 480,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 2},
+	{1280, 1024, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE,
+		.bytesperline = 1280,
+		.sizeimage = (1280 * 1024) + 64,
+		.colorspace = V4L2_COLORSPACE_SRGB,
+		.priv = 3 | MODE_RAW | MODE_SXGA},
+};
+
+static const int hsv_red_x[] = {
+	41,  44,  46,  48,  50,  52,  54,  56,
+	58,  60,  62,  64,  66,  68,  70,  72,
+	74,  76,  78,  80,  81,  83,  85,  87,
+	88,  90,  92,  93,  95,  97,  98, 100,
+	101, 102, 104, 105, 107, 108, 109, 110,
+	112, 113, 114, 115, 116, 117, 118, 119,
+	120, 121, 122, 123, 123, 124, 125, 125,
+	126, 127, 127, 128, 128, 129, 129, 129,
+	130, 130, 130, 130, 131, 131, 131, 131,
+	131, 131, 131, 131, 130, 130, 130, 130,
+	129, 129, 129, 128, 128, 127, 127, 126,
+	125, 125, 124, 123, 122, 122, 121, 120,
+	119, 118, 117, 116, 115, 114, 112, 111,
+	110, 109, 107, 106, 105, 103, 102, 101,
+	99,  98,  96,  94,  93,  91,  90,  88,
+	86,  84,  83,  81,  79,  77,  75,  74,
+	72,  70,  68,  66,  64,  62,  60,  58,
+	56,  54,  52,  49,  47,  45,  43,  41,
+	39,  36,  34,  32,  30,  28,  25,  23,
+	21,  19,  16,  14,  12,   9,   7,   5,
+	3,   0,  -1,  -3,  -6,  -8, -10, -12,
+	-15, -17, -19, -22, -24, -26, -28, -30,
+	-33, -35, -37, -39, -41, -44, -46, -48,
+	-50, -52, -54, -56, -58, -60, -62, -64,
+	-66, -68, -70, -72, -74, -76, -78, -80,
+	-81, -83, -85, -87, -88, -90, -92, -93,
+	-95, -97, -98, -100, -101, -102, -104, -105,
+	-107, -108, -109, -110, -112, -113, -114, -115,
+	-116, -117, -118, -119, -120, -121, -122, -123,
+	-123, -124, -125, -125, -126, -127, -127, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-128, -127, -127, -126, -125, -125, -124, -123,
+	-122, -122, -121, -120, -119, -118, -117, -116,
+	-115, -114, -112, -111, -110, -109, -107, -106,
+	-105, -103, -102, -101, -99, -98, -96, -94,
+	-93, -91, -90, -88, -86, -84, -83, -81,
+	-79, -77, -75, -74, -72, -70, -68, -66,
+	-64, -62, -60, -58, -56, -54, -52, -49,
+	-47, -45, -43, -41, -39, -36, -34, -32,
+	-30, -28, -25, -23, -21, -19, -16, -14,
+	-12,  -9,  -7,  -5,  -3,   0,   1,   3,
+	6,   8,  10,  12,  15,  17,  19,  22,
+	24,  26,  28,  30,  33,  35,  37,  39, 41
+};
+
+static const int hsv_red_y[] = {
+	82,  80,  78,  76,  74,  73,  71,  69,
+	67,  65,  63,  61,  58,  56,  54,  52,
+	50,  48,  46,  44,  41,  39,  37,  35,
+	32,  30,  28,  26,  23,  21,  19,  16,
+	14,  12,  10,   7,   5,   3,   0,  -1,
+	-3,  -6,  -8, -10, -13, -15, -17, -19,
+	-22, -24, -26, -29, -31, -33, -35, -38,
+	-40, -42, -44, -46, -48, -51, -53, -55,
+	-57, -59, -61, -63, -65, -67, -69, -71,
+	-73, -75, -77, -79, -81, -82, -84, -86,
+	-88, -89, -91, -93, -94, -96, -98, -99,
+	-101, -102, -104, -105, -106, -108, -109, -110,
+	-112, -113, -114, -115, -116, -117, -119, -120,
+	-120, -121, -122, -123, -124, -125, -126, -126,
+	-127, -128, -128, -128, -128, -128, -128, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-128, -128, -128, -128, -128, -128, -128, -128,
+	-127, -127, -126, -125, -125, -124, -123, -122,
+	-121, -120, -119, -118, -117, -116, -115, -114,
+	-113, -111, -110, -109, -107, -106, -105, -103,
+	-102, -100, -99, -97, -96, -94, -92, -91,
+	-89, -87, -85, -84, -82, -80, -78, -76,
+	-74, -73, -71, -69, -67, -65, -63, -61,
+	-58, -56, -54, -52, -50, -48, -46, -44,
+	-41, -39, -37, -35, -32, -30, -28, -26,
+	-23, -21, -19, -16, -14, -12, -10,  -7,
+	-5,  -3,   0,   1,   3,   6,   8,  10,
+	13,  15,  17,  19,  22,  24,  26,  29,
+	31,  33,  35,  38,  40,  42,  44,  46,
+	48,  51,  53,  55,  57,  59,  61,  63,
+	65,  67,  69,  71,  73,  75,  77,  79,
+	81,  82,  84,  86,  88,  89,  91,  93,
+	94,  96,  98,  99, 101, 102, 104, 105,
+	106, 108, 109, 110, 112, 113, 114, 115,
+	116, 117, 119, 120, 120, 121, 122, 123,
+	124, 125, 126, 126, 127, 128, 128, 129,
+	129, 130, 130, 131, 131, 131, 131, 132,
+	132, 132, 132, 132, 132, 132, 132, 132,
+	132, 132, 132, 131, 131, 131, 130, 130,
+	130, 129, 129, 128, 127, 127, 126, 125,
+	125, 124, 123, 122, 121, 120, 119, 118,
+	117, 116, 115, 114, 113, 111, 110, 109,
+	107, 106, 105, 103, 102, 100,  99,  97,
+	96, 94, 92, 91, 89, 87, 85, 84, 82
+};
+
+static const int hsv_green_x[] = {
+	-124, -124, -125, -125, -125, -125, -125, -125,
+	-125, -126, -126, -125, -125, -125, -125, -125,
+	-125, -124, -124, -124, -123, -123, -122, -122,
+	-121, -121, -120, -120, -119, -118, -117, -117,
+	-116, -115, -114, -113, -112, -111, -110, -109,
+	-108, -107, -105, -104, -103, -102, -100, -99,
+	-98, -96, -95, -93, -92, -91, -89, -87,
+	-86, -84, -83, -81, -79, -77, -76, -74,
+	-72, -70, -69, -67, -65, -63, -61, -59,
+	-57, -55, -53, -51, -49, -47, -45, -43,
+	-41, -39, -37, -35, -33, -30, -28, -26,
+	-24, -22, -20, -18, -15, -13, -11,  -9,
+	-7,  -4,  -2,   0,   1,   3,   6,   8,
+	10,  12,  14,  17,  19,  21,  23,  25,
+	27,  29,  32,  34,  36,  38,  40,  42,
+	44,  46,  48,  50,  52,  54,  56,  58,
+	60,  62,  64,  66,  68,  70,  71,  73,
+	75,  77,  78,  80,  82,  83,  85,  87,
+	88,  90,  91,  93,  94,  96,  97,  98,
+	100, 101, 102, 104, 105, 106, 107, 108,
+	109, 111, 112, 113, 113, 114, 115, 116,
+	117, 118, 118, 119, 120, 120, 121, 122,
+	122, 123, 123, 124, 124, 124, 125, 125,
+	125, 125, 125, 125, 125, 126, 126, 125,
+	125, 125, 125, 125, 125, 124, 124, 124,
+	123, 123, 122, 122, 121, 121, 120, 120,
+	119, 118, 117, 117, 116, 115, 114, 113,
+	112, 111, 110, 109, 108, 107, 105, 104,
+	103, 102, 100,  99,  98,  96,  95,  93,
+	92,  91,  89,  87,  86,  84,  83,  81,
+	79,  77,  76,  74,  72,  70,  69,  67,
+	65,  63,  61,  59,  57,  55,  53,  51,
+	49,  47,  45,  43,  41,  39,  37,  35,
+	33,  30,  28,  26,  24,  22,  20,  18,
+	15,  13,  11,   9,   7,   4,   2,   0,
+	-1,  -3,  -6,  -8, -10, -12, -14, -17,
+	-19, -21, -23, -25, -27, -29, -32, -34,
+	-36, -38, -40, -42, -44, -46, -48, -50,
+	-52, -54, -56, -58, -60, -62, -64, -66,
+	-68, -70, -71, -73, -75, -77, -78, -80,
+	-82, -83, -85, -87, -88, -90, -91, -93,
+	-94, -96, -97, -98, -100, -101, -102, -104,
+	-105, -106, -107, -108, -109, -111, -112, -113,
+	-113, -114, -115, -116, -117, -118, -118, -119,
+	-120, -120, -121, -122, -122, -123, -123, -124, -124
+};
+
+static const int hsv_green_y[] = {
+	-100, -99, -98, -97, -95, -94, -93, -91,
+	-90, -89, -87, -86, -84, -83, -81, -80,
+	-78, -76, -75, -73, -71, -70, -68, -66,
+	-64, -63, -61, -59, -57, -55, -53, -51,
+	-49, -48, -46, -44, -42, -40, -38, -36,
+	-34, -32, -30, -27, -25, -23, -21, -19,
+	-17, -15, -13, -11,  -9,  -7,  -4,  -2,
+	0,   1,   3,   5,   7,   9,  11,  14,
+	16,  18,  20,  22,  24,  26,  28,  30,
+	32,  34,  36,  38,  40,  42,  44,  46,
+	48,  50,  52,  54,  56,  58,  59,  61,
+	63,  65,  67,  68,  70,  72,  74,  75,
+	77,  78,  80,  82,  83,  85,  86,  88,
+	89,  90,  92,  93,  95,  96,  97,  98,
+	100, 101, 102, 103, 104, 105, 106, 107,
+	108, 109, 110, 111, 112, 112, 113, 114,
+	115, 115, 116, 116, 117, 117, 118, 118,
+	119, 119, 119, 120, 120, 120, 120, 120,
+	121, 121, 121, 121, 121, 121, 120, 120,
+	120, 120, 120, 119, 119, 119, 118, 118,
+	117, 117, 116, 116, 115, 114, 114, 113,
+	112, 111, 111, 110, 109, 108, 107, 106,
+	105, 104, 103, 102, 100,  99,  98,  97,
+	95,  94,  93,  91,  90,  89,  87,  86,
+	84,  83,  81,  80,  78,  76,  75,  73,
+	71,  70,  68,  66,  64,  63,  61,  59,
+	57,  55,  53,  51,  49,  48,  46,  44,
+	42,  40,  38,  36,  34,  32,  30,  27,
+	25,  23,  21,  19,  17,  15,  13,  11,
+	9,   7,   4,   2,   0,  -1,  -3,  -5,
+	-7,  -9, -11, -14, -16, -18, -20, -22,
+	-24, -26, -28, -30, -32, -34, -36, -38,
+	-40, -42, -44, -46, -48, -50, -52, -54,
+	-56, -58, -59, -61, -63, -65, -67, -68,
+	-70, -72, -74, -75, -77, -78, -80, -82,
+	-83, -85, -86, -88, -89, -90, -92, -93,
+	-95, -96, -97, -98, -100, -101, -102, -103,
+	-104, -105, -106, -107, -108, -109, -110, -111,
+	-112, -112, -113, -114, -115, -115, -116, -116,
+	-117, -117, -118, -118, -119, -119, -119, -120,
+	-120, -120, -120, -120, -121, -121, -121, -121,
+	-121, -121, -120, -120, -120, -120, -120, -119,
+	-119, -119, -118, -118, -117, -117, -116, -116,
+	-115, -114, -114, -113, -112, -111, -111, -110,
+	-109, -108, -107, -106, -105, -104, -103, -102, -100
+};
+
+static const int hsv_blue_x[] = {
+	112, 113, 114, 114, 115, 116, 117, 117,
+	118, 118, 119, 119, 120, 120, 120, 121,
+	121, 121, 122, 122, 122, 122, 122, 122,
+	122, 122, 122, 122, 122, 122, 121, 121,
+	121, 120, 120, 120, 119, 119, 118, 118,
+	117, 116, 116, 115, 114, 113, 113, 112,
+	111, 110, 109, 108, 107, 106, 105, 104,
+	103, 102, 100,  99,  98,  97,  95,  94,
+	93,  91,  90,  88,  87,  85,  84,  82,
+	80,  79,  77,  76,  74,  72,  70,  69,
+	67,  65,  63,  61,  60,  58,  56,  54,
+	52,  50,  48,  46,  44,  42,  40,  38,
+	36,  34,  32,  30,  28,  26,  24,  22,
+	19,  17,  15,  13,  11,   9,   7,   5,
+	2,   0,  -1,  -3,  -5,  -7,  -9, -12,
+	-14, -16, -18, -20, -22, -24, -26, -28,
+	-31, -33, -35, -37, -39, -41, -43, -45,
+	-47, -49, -51, -53, -54, -56, -58, -60,
+	-62, -64, -66, -67, -69, -71, -73, -74,
+	-76, -78, -79, -81, -83, -84, -86, -87,
+	-89, -90, -92, -93, -94, -96, -97, -98,
+	-99, -101, -102, -103, -104, -105, -106, -107,
+	-108, -109, -110, -111, -112, -113, -114, -114,
+	-115, -116, -117, -117, -118, -118, -119, -119,
+	-120, -120, -120, -121, -121, -121, -122, -122,
+	-122, -122, -122, -122, -122, -122, -122, -122,
+	-122, -122, -121, -121, -121, -120, -120, -120,
+	-119, -119, -118, -118, -117, -116, -116, -115,
+	-114, -113, -113, -112, -111, -110, -109, -108,
+	-107, -106, -105, -104, -103, -102, -100, -99,
+	-98, -97, -95, -94, -93, -91, -90, -88,
+	-87, -85, -84, -82, -80, -79, -77, -76,
+	-74, -72, -70, -69, -67, -65, -63, -61,
+	-60, -58, -56, -54, -52, -50, -48, -46,
+	-44, -42, -40, -38, -36, -34, -32, -30,
+	-28, -26, -24, -22, -19, -17, -15, -13,
+	-11,  -9,  -7,  -5,  -2,   0,   1,   3,
+	5,   7,   9,  12,  14,  16,  18,  20,
+	22,  24,  26,  28,  31,  33,  35,  37,
+	39,  41,  43,  45,  47,  49,  51,  53,
+	54,  56,  58,  60,  62,  64,  66,  67,
+	69,  71,  73,  74,  76,  78,  79,  81,
+	83,  84,  86,  87,  89,  90,  92,  93,
+	94,  96,  97,  98,  99, 101, 102, 103,
+	104, 105, 106, 107, 108, 109, 110, 111, 112
+};
+
+static const int hsv_blue_y[] = {
+	-11, -13, -15, -17, -19, -21, -23, -25,
+	-27, -29, -31, -33, -35, -37, -39, -41,
+	-43, -45, -46, -48, -50, -52, -54, -55,
+	-57, -59, -61, -62, -64, -66, -67, -69,
+	-71, -72, -74, -75, -77, -78, -80, -81,
+	-83, -84, -86, -87, -88, -90, -91, -92,
+	-93, -95, -96, -97, -98, -99, -100, -101,
+	-102, -103, -104, -105, -106, -106, -107, -108,
+	-109, -109, -110, -111, -111, -112, -112, -113,
+	-113, -114, -114, -114, -115, -115, -115, -115,
+	-116, -116, -116, -116, -116, -116, -116, -116,
+	-116, -115, -115, -115, -115, -114, -114, -114,
+	-113, -113, -112, -112, -111, -111, -110, -110,
+	-109, -108, -108, -107, -106, -105, -104, -103,
+	-102, -101, -100, -99, -98, -97, -96, -95,
+	-94, -93, -91, -90, -89, -88, -86, -85,
+	-84, -82, -81, -79, -78, -76, -75, -73,
+	-71, -70, -68, -67, -65, -63, -62, -60,
+	-58, -56, -55, -53, -51, -49, -47, -45,
+	-44, -42, -40, -38, -36, -34, -32, -30,
+	-28, -26, -24, -22, -20, -18, -16, -14,
+	-12, -10,  -8,  -6,  -4,  -2,   0,   1,
+	3,   5,   7,   9,  11,  13,  15,  17,
+	19,  21,  23,  25,  27,  29,  31,  33,
+	35,  37,  39,  41,  43,  45,  46,  48,
+	50,  52,  54,  55,  57,  59,  61,  62,
+	64,  66,  67,  69,  71,  72,  74,  75,
+	77,  78,  80,  81,  83,  84,  86,  87,
+	88,  90,  91,  92,  93,  95,  96,  97,
+	98,  99, 100, 101, 102, 103, 104, 105,
+	106, 106, 107, 108, 109, 109, 110, 111,
+	111, 112, 112, 113, 113, 114, 114, 114,
+	115, 115, 115, 115, 116, 116, 116, 116,
+	116, 116, 116, 116, 116, 115, 115, 115,
+	115, 114, 114, 114, 113, 113, 112, 112,
+	111, 111, 110, 110, 109, 108, 108, 107,
+	106, 105, 104, 103, 102, 101, 100,  99,
+	98,  97,  96,  95,  94,  93,  91,  90,
+	89,  88,  86,  85,  84,  82,  81,  79,
+	78,  76,  75,  73,  71,  70,  68,  67,
+	65,  63,  62,  60,  58,  56,  55,  53,
+	51,  49,  47,  45,  44,  42,  40,  38,
+	36,  34,  32,  30,  28,  26,  24,  22,
+	20,  18,  16,  14,  12,  10,   8,   6,
+	4,   2,   0,  -1,  -3,  -5,  -7,  -9, -11
+};
+
+static u16 i2c_ident[] = {
+	V4L2_IDENT_OV9650,
+	V4L2_IDENT_OV9655,
+	V4L2_IDENT_SOI968,
+	V4L2_IDENT_OV7660,
+	V4L2_IDENT_OV7670,
+	V4L2_IDENT_MT9V011,
+	V4L2_IDENT_MT9V111,
+	V4L2_IDENT_MT9V112,
+	V4L2_IDENT_MT9M001C12ST,
+	V4L2_IDENT_MT9M111,
+	V4L2_IDENT_HV7131R,
+};
+
+static u16 bridge_init[][2] = {
+	{0x1000, 0x78}, {0x1001, 0x40}, {0x1002, 0x1c},
+	{0x1020, 0x80}, {0x1061, 0x01}, {0x1067, 0x40},
+	{0x1068, 0x30}, {0x1069, 0x20},	{0x106a, 0x10},
+	{0x106b, 0x08},	{0x1188, 0x87},	{0x11a1, 0x00},
+	{0x11a2, 0x00},	{0x11a3, 0x6a},	{0x11a4, 0x50},
+	{0x11ab, 0x00},	{0x11ac, 0x00},	{0x11ad, 0x50},
+	{0x11ae, 0x3c},	{0x118a, 0x04},	{0x0395, 0x04},
+	{0x11b8, 0x3a},	{0x118b, 0x0e},	{0x10f7, 0x05},
+	{0x10f8, 0x14},	{0x10fa, 0xff},	{0x10f9, 0x00},
+	{0x11ba, 0x0a},	{0x11a5, 0x2d},	{0x11a6, 0x2d},
+	{0x11a7, 0x3a},	{0x11a8, 0x05},	{0x11a9, 0x04},
+	{0x11aa, 0x3f},	{0x11af, 0x28},	{0x11b0, 0xd8},
+	{0x11b1, 0x14},	{0x11b2, 0xec},	{0x11b3, 0x32},
+	{0x11b4, 0xdd},	{0x11b5, 0x32},	{0x11b6, 0xdd},
+	{0x10e0, 0x2c},	{0x11bc, 0x40},	{0x11bd, 0x01},
+	{0x11be, 0xf0},	{0x11bf, 0x00},	{0x118c, 0x1f},
+	{0x118d, 0x1f},	{0x118e, 0x1f},	{0x118f, 0x1f},
+	{0x1180, 0x01},	{0x1181, 0x00},	{0x1182, 0x01},
+	{0x1183, 0x00},	{0x1184, 0x50},	{0x1185, 0x80}
+};
+
+/* Gain = (bit[3:0] / 16 + 1) * (bit[4] + 1) * (bit[5] + 1) * (bit[6] + 1) */
+static u8 ov_gain[] = {
+	0x00 /* 1x */, 0x04 /* 1.25x */, 0x08 /* 1.5x */, 0x0c /* 1.75x */,
+	0x10 /* 2x */, 0x12 /* 2.25x */, 0x14 /* 2.5x */, 0x16 /* 2.75x */,
+	0x18 /* 3x */, 0x1a /* 3.25x */, 0x1c /* 3.5x */, 0x1e /* 3.75x */,
+	0x30 /* 4x */, 0x31 /* 4.25x */, 0x32 /* 4.5x */, 0x33 /* 4.75x */,
+	0x34 /* 5x */, 0x35 /* 5.25x */, 0x36 /* 5.5x */, 0x37 /* 5.75x */,
+	0x38 /* 6x */, 0x39 /* 6.25x */, 0x3a /* 6.5x */, 0x3b /* 6.75x */,
+	0x3c /* 7x */, 0x3d /* 7.25x */, 0x3e /* 7.5x */, 0x3f /* 7.75x */,
+	0x70 /* 8x */
+};
+
+/* Gain = (bit[8] + 1) * (bit[7] + 1) * (bit[6:0] * 0.03125) */
+static u16 micron1_gain[] = {
+	/* 1x   1.25x   1.5x    1.75x */
+	0x0020, 0x0028, 0x0030, 0x0038,
+	/* 2x   2.25x   2.5x    2.75x */
+	0x00a0, 0x00a4, 0x00a8, 0x00ac,
+	/* 3x   3.25x   3.5x    3.75x */
+	0x00b0, 0x00b4, 0x00b8, 0x00bc,
+	/* 4x   4.25x   4.5x    4.75x */
+	0x00c0, 0x00c4, 0x00c8, 0x00cc,
+	/* 5x   5.25x   5.5x    5.75x */
+	0x00d0, 0x00d4, 0x00d8, 0x00dc,
+	/* 6x   6.25x   6.5x    6.75x */
+	0x00e0, 0x00e4, 0x00e8, 0x00ec,
+	/* 7x   7.25x   7.5x    7.75x */
+	0x00f0, 0x00f4, 0x00f8, 0x00fc,
+	/* 8x */
+	0x01c0
+};
+
+/* mt9m001 sensor uses a different gain formula then other micron sensors */
+/* Gain = (bit[6] + 1) * (bit[5-0] * 0.125) */
+static u16 micron2_gain[] = {
+	/* 1x   1.25x   1.5x    1.75x */
+	0x0008, 0x000a, 0x000c, 0x000e,
+	/* 2x   2.25x   2.5x    2.75x */
+	0x0010, 0x0012, 0x0014, 0x0016,
+	/* 3x   3.25x   3.5x    3.75x */
+	0x0018, 0x001a, 0x001c, 0x001e,
+	/* 4x   4.25x   4.5x    4.75x */
+	0x0020, 0x0051, 0x0052, 0x0053,
+	/* 5x   5.25x   5.5x    5.75x */
+	0x0054, 0x0055, 0x0056, 0x0057,
+	/* 6x   6.25x   6.5x    6.75x */
+	0x0058, 0x0059, 0x005a, 0x005b,
+	/* 7x   7.25x   7.5x    7.75x */
+	0x005c, 0x005d, 0x005e, 0x005f,
+	/* 8x */
+	0x0060
+};
+
+/* Gain = .5 + bit[7:0] / 16 */
+static u8 hv7131r_gain[] = {
+	0x08 /* 1x */, 0x0c /* 1.25x */, 0x10 /* 1.5x */, 0x14 /* 1.75x */,
+	0x18 /* 2x */, 0x1c /* 2.25x */, 0x20 /* 2.5x */, 0x24 /* 2.75x */,
+	0x28 /* 3x */, 0x2c /* 3.25x */, 0x30 /* 3.5x */, 0x34 /* 3.75x */,
+	0x38 /* 4x */, 0x3c /* 4.25x */, 0x40 /* 4.5x */, 0x44 /* 4.75x */,
+	0x48 /* 5x */, 0x4c /* 5.25x */, 0x50 /* 5.5x */, 0x54 /* 5.75x */,
+	0x58 /* 6x */, 0x5c /* 6.25x */, 0x60 /* 6.5x */, 0x64 /* 6.75x */,
+	0x68 /* 7x */, 0x6c /* 7.25x */, 0x70 /* 7.5x */, 0x74 /* 7.75x */,
+	0x78 /* 8x */
+};
+
+static u8 soi968_init[][2] = {
+	{0x12, 0x80}, {0x0c, 0x00}, {0x0f, 0x1f},
+	{0x11, 0x80}, {0x38, 0x52}, {0x1e, 0x00},
+	{0x33, 0x08}, {0x35, 0x8c}, {0x36, 0x0c},
+	{0x37, 0x04}, {0x45, 0x04}, {0x47, 0xff},
+	{0x3e, 0x00}, {0x3f, 0x00}, {0x3b, 0x20},
+	{0x3a, 0x96}, {0x3d, 0x0a}, {0x14, 0x8e},
+	{0x13, 0x8a}, {0x12, 0x40}, {0x17, 0x13},
+	{0x18, 0x63}, {0x19, 0x01}, {0x1a, 0x79},
+	{0x32, 0x24}, {0x03, 0x00}, {0x11, 0x40},
+	{0x2a, 0x10}, {0x2b, 0xe0}, {0x10, 0x32},
+	{0x00, 0x00}, {0x01, 0x80}, {0x02, 0x80},
+};
+
+static u8 ov7660_init[][2] = {
+	{0x0e, 0x80}, {0x0d, 0x08}, {0x0f, 0xc3},
+	{0x04, 0xc3}, {0x10, 0x40}, {0x11, 0x40},
+	{0x12, 0x05}, {0x13, 0xba}, {0x14, 0x2a},
+	{0x37, 0x0f}, {0x38, 0x02}, {0x39, 0x43},
+	{0x3a, 0x00}, {0x69, 0x90}, {0x2d, 0xf6},
+	{0x2e, 0x0b}, {0x01, 0x78}, {0x02, 0x50},
+};
+
+static u8 ov7670_init[][2] = {
+	{0x12, 0x80}, {0x11, 0x80}, {0x3a, 0x04}, {0x12, 0x01},
+	{0x32, 0xb6}, {0x03, 0x0a}, {0x0c, 0x00}, {0x3e, 0x00},
+	{0x70, 0x3a}, {0x71, 0x35}, {0x72, 0x11}, {0x73, 0xf0},
+	{0xa2, 0x02}, {0x13, 0xe0}, {0x00, 0x00}, {0x10, 0x00},
+	{0x0d, 0x40}, {0x14, 0x28}, {0xa5, 0x05}, {0xab, 0x07},
+	{0x24, 0x95}, {0x25, 0x33}, {0x26, 0xe3}, {0x9f, 0x75},
+	{0xa0, 0x65}, {0xa1, 0x0b}, {0xa6, 0xd8}, {0xa7, 0xd8},
+	{0xa8, 0xf0}, {0xa9, 0x90}, {0xaa, 0x94}, {0x13, 0xe5},
+	{0x0e, 0x61}, {0x0f, 0x4b}, {0x16, 0x02}, {0x1e, 0x27},
+	{0x21, 0x02}, {0x22, 0x91}, {0x29, 0x07}, {0x33, 0x0b},
+	{0x35, 0x0b}, {0x37, 0x1d}, {0x38, 0x71}, {0x39, 0x2a},
+	{0x3c, 0x78}, {0x4d, 0x40}, {0x4e, 0x20}, {0x69, 0x00},
+	{0x74, 0x19}, {0x8d, 0x4f}, {0x8e, 0x00}, {0x8f, 0x00},
+	{0x90, 0x00}, {0x91, 0x00}, {0x96, 0x00}, {0x9a, 0x80},
+	{0xb0, 0x84}, {0xb1, 0x0c}, {0xb2, 0x0e}, {0xb3, 0x82},
+	{0xb8, 0x0a}, {0x43, 0x0a}, {0x44, 0xf0}, {0x45, 0x20},
+	{0x46, 0x7d}, {0x47, 0x29}, {0x48, 0x4a}, {0x59, 0x8c},
+	{0x5a, 0xa5}, {0x5b, 0xde}, {0x5c, 0x96}, {0x5d, 0x66},
+	{0x5e, 0x10}, {0x6c, 0x0a}, {0x6d, 0x55}, {0x6e, 0x11},
+	{0x6f, 0x9e}, {0x6a, 0x40}, {0x01, 0x40}, {0x02, 0x40},
+	{0x13, 0xe7}, {0x4f, 0x6e}, {0x50, 0x70}, {0x51, 0x02},
+	{0x52, 0x1d}, {0x53, 0x56}, {0x54, 0x73}, {0x55, 0x0a},
+	{0x56, 0x55}, {0x57, 0x80}, {0x58, 0x9e}, {0x41, 0x08},
+	{0x3f, 0x02}, {0x75, 0x03}, {0x76, 0x63}, {0x4c, 0x04},
+	{0x77, 0x06}, {0x3d, 0x02}, {0x4b, 0x09}, {0xc9, 0x30},
+	{0x41, 0x08}, {0x56, 0x48}, {0x34, 0x11}, {0xa4, 0x88},
+	{0x96, 0x00}, {0x97, 0x30}, {0x98, 0x20}, {0x99, 0x30},
+	{0x9a, 0x84}, {0x9b, 0x29}, {0x9c, 0x03}, {0x9d, 0x99},
+	{0x9e, 0x7f}, {0x78, 0x04}, {0x79, 0x01}, {0xc8, 0xf0},
+	{0x79, 0x0f}, {0xc8, 0x00}, {0x79, 0x10}, {0xc8, 0x7e},
+	{0x79, 0x0a}, {0xc8, 0x80}, {0x79, 0x0b}, {0xc8, 0x01},
+	{0x79, 0x0c}, {0xc8, 0x0f}, {0x79, 0x0d}, {0xc8, 0x20},
+	{0x79, 0x09}, {0xc8, 0x80}, {0x79, 0x02}, {0xc8, 0xc0},
+	{0x79, 0x03}, {0xc8, 0x40}, {0x79, 0x05}, {0xc8, 0x30},
+	{0x79, 0x26}, {0x62, 0x20}, {0x63, 0x00}, {0x64, 0x06},
+	{0x65, 0x00}, {0x66, 0x05}, {0x94, 0x05}, {0x95, 0x0a},
+	{0x17, 0x13}, {0x18, 0x01}, {0x19, 0x02}, {0x1a, 0x7a},
+	{0x46, 0x59}, {0x47, 0x30}, {0x58, 0x9a}, {0x59, 0x84},
+	{0x5a, 0x91}, {0x5b, 0x57}, {0x5c, 0x75}, {0x5d, 0x6d},
+	{0x5e, 0x13}, {0x64, 0x07}, {0x94, 0x07}, {0x95, 0x0d},
+	{0xa6, 0xdf}, {0xa7, 0xdf}, {0x48, 0x4d}, {0x51, 0x00},
+	{0x6b, 0x0a}, {0x11, 0x80}, {0x2a, 0x00}, {0x2b, 0x00},
+	{0x92, 0x00}, {0x93, 0x00}, {0x55, 0x0a}, {0x56, 0x60},
+	{0x4f, 0x6e}, {0x50, 0x70}, {0x51, 0x00}, {0x52, 0x1d},
+	{0x53, 0x56}, {0x54, 0x73}, {0x58, 0x9a}, {0x4f, 0x6e},
+	{0x50, 0x70}, {0x51, 0x00}, {0x52, 0x1d}, {0x53, 0x56},
+	{0x54, 0x73}, {0x58, 0x9a}, {0x3f, 0x01}, {0x7b, 0x03},
+	{0x7c, 0x09}, {0x7d, 0x16}, {0x7e, 0x38}, {0x7f, 0x47},
+	{0x80, 0x53}, {0x81, 0x5e}, {0x82, 0x6a}, {0x83, 0x74},
+	{0x84, 0x80}, {0x85, 0x8c}, {0x86, 0x9b}, {0x87, 0xb2},
+	{0x88, 0xcc}, {0x89, 0xe5}, {0x7a, 0x24}, {0x3b, 0x00},
+	{0x9f, 0x76}, {0xa0, 0x65}, {0x13, 0xe2}, {0x6b, 0x0a},
+	{0x11, 0x80}, {0x2a, 0x00}, {0x2b, 0x00}, {0x92, 0x00},
+	{0x93, 0x00},
+};
+
+static u8 ov9650_init[][2] = {
+	{0x12, 0x80}, {0x00, 0x00}, {0x01, 0x78},
+	{0x02, 0x78}, {0x03, 0x36}, {0x04, 0x03},
+	{0x05, 0x00}, {0x06, 0x00}, {0x08, 0x00},
+	{0x09, 0x01}, {0x0c, 0x00}, {0x0d, 0x00},
+	{0x0e, 0xa0}, {0x0f, 0x52}, {0x10, 0x7c},
+	{0x11, 0x80}, {0x12, 0x45}, {0x13, 0xc2},
+	{0x14, 0x2e}, {0x15, 0x00}, {0x16, 0x07},
+	{0x17, 0x24}, {0x18, 0xc5}, {0x19, 0x00},
+	{0x1a, 0x3c}, {0x1b, 0x00}, {0x1e, 0x04},
+	{0x1f, 0x00}, {0x24, 0x78}, {0x25, 0x68},
+	{0x26, 0xd4}, {0x27, 0x80}, {0x28, 0x80},
+	{0x29, 0x30}, {0x2a, 0x00}, {0x2b, 0x00},
+	{0x2c, 0x80}, {0x2d, 0x00}, {0x2e, 0x00},
+	{0x2f, 0x00}, {0x30, 0x08}, {0x31, 0x30},
+	{0x32, 0x84}, {0x33, 0xe2}, {0x34, 0xbf},
+	{0x35, 0x81}, {0x36, 0xf9}, {0x37, 0x00},
+	{0x38, 0x93}, {0x39, 0x50}, {0x3a, 0x01},
+	{0x3b, 0x01}, {0x3c, 0x73}, {0x3d, 0x19},
+	{0x3e, 0x0b}, {0x3f, 0x80}, {0x40, 0xc1},
+	{0x41, 0x00}, {0x42, 0x08}, {0x67, 0x80},
+	{0x68, 0x80}, {0x69, 0x40}, {0x6a, 0x00},
+	{0x6b, 0x0a}, {0x8b, 0x06}, {0x8c, 0x20},
+	{0x8d, 0x00}, {0x8e, 0x00}, {0x8f, 0xdf},
+	{0x92, 0x00}, {0x93, 0x00}, {0x94, 0x88},
+	{0x95, 0x88}, {0x96, 0x04}, {0xa1, 0x00},
+	{0xa5, 0x80}, {0xa8, 0x80}, {0xa9, 0xb8},
+	{0xaa, 0x92}, {0xab, 0x0a},
+};
+
+static u8 ov9655_init[][2] = {
+	{0x12, 0x80}, {0x12, 0x01}, {0x0d, 0x00}, {0x0e, 0x61},
+	{0x11, 0x80}, {0x13, 0xba}, {0x14, 0x2e}, {0x16, 0x24},
+	{0x1e, 0x04}, {0x1e, 0x04}, {0x1e, 0x04}, {0x27, 0x08},
+	{0x28, 0x08}, {0x29, 0x15}, {0x2c, 0x08}, {0x32, 0xbf},
+	{0x34, 0x3d}, {0x35, 0x00}, {0x36, 0xf8}, {0x38, 0x12},
+	{0x39, 0x57}, {0x3a, 0x00}, {0x3b, 0xcc}, {0x3c, 0x0c},
+	{0x3d, 0x19}, {0x3e, 0x0c}, {0x3f, 0x01}, {0x41, 0x40},
+	{0x42, 0x80}, {0x45, 0x46}, {0x46, 0x62}, {0x47, 0x2a},
+	{0x48, 0x3c}, {0x4a, 0xf0}, {0x4b, 0xdc}, {0x4c, 0xdc},
+	{0x4d, 0xdc}, {0x4e, 0xdc}, {0x69, 0x02}, {0x6c, 0x04},
+	{0x6f, 0x9e}, {0x70, 0x05}, {0x71, 0x78}, {0x77, 0x02},
+	{0x8a, 0x23}, {0x8c, 0x0d}, {0x90, 0x7e}, {0x91, 0x7c},
+	{0x9f, 0x6e}, {0xa0, 0x6e}, {0xa5, 0x68}, {0xa6, 0x60},
+	{0xa8, 0xc1}, {0xa9, 0xfa}, {0xaa, 0x92}, {0xab, 0x04},
+	{0xac, 0x80}, {0xad, 0x80}, {0xae, 0x80}, {0xaf, 0x80},
+	{0xb2, 0xf2}, {0xb3, 0x20}, {0xb5, 0x00}, {0xb6, 0xaf},
+	{0xbb, 0xae}, {0xbc, 0x44}, {0xbd, 0x44}, {0xbe, 0x3b},
+	{0xbf, 0x3a}, {0xc0, 0xe2}, {0xc1, 0xc8}, {0xc2, 0x01},
+	{0xc4, 0x00}, {0xc6, 0x85}, {0xc7, 0x81}, {0xc9, 0xe0},
+	{0xca, 0xe8}, {0xcc, 0xd8}, {0xcd, 0x93}, {0x12, 0x61},
+	{0x36, 0xfa}, {0x8c, 0x8d}, {0xc0, 0xaa}, {0x69, 0x0a},
+	{0x03, 0x12}, {0x17, 0x14}, {0x18, 0x00}, {0x19, 0x01},
+	{0x1a, 0x3d}, {0x32, 0xbf}, {0x11, 0x80}, {0x2a, 0x10},
+	{0x2b, 0x0a}, {0x92, 0x00}, {0x93, 0x00}, {0x1e, 0x04},
+	{0x1e, 0x04}, {0x10, 0x7c}, {0x04, 0x03}, {0xa1, 0x00},
+	{0x2d, 0x00}, {0x2e, 0x00}, {0x00, 0x00}, {0x01, 0x80},
+	{0x02, 0x80}, {0x12, 0x61}, {0x36, 0xfa}, {0x8c, 0x8d},
+	{0xc0, 0xaa}, {0x69, 0x0a}, {0x03, 0x12}, {0x17, 0x14},
+	{0x18, 0x00}, {0x19, 0x01}, {0x1a, 0x3d}, {0x32, 0xbf},
+	{0x11, 0x80}, {0x2a, 0x10}, {0x2b, 0x0a}, {0x92, 0x00},
+	{0x93, 0x00}, {0x04, 0x01}, {0x10, 0x1f}, {0xa1, 0x00},
+	{0x00, 0x0a}, {0xa1, 0x00}, {0x10, 0x5d}, {0x04, 0x03},
+	{0x00, 0x01}, {0xa1, 0x00}, {0x10, 0x7c}, {0x04, 0x03},
+	{0x00, 0x03}, {0x00, 0x0a}, {0x00, 0x10}, {0x00, 0x13},
+};
+
+static u16 mt9v112_init[][2] = {
+	{0xf0, 0x0000}, {0x0d, 0x0021}, {0x0d, 0x0020},
+	{0x34, 0xc019}, {0x0a, 0x0011}, {0x0b, 0x000b},
+	{0x20, 0x0703}, {0x35, 0x2022}, {0xf0, 0x0001},
+	{0x05, 0x0000}, {0x06, 0x340c}, {0x3b, 0x042a},
+	{0x3c, 0x0400}, {0xf0, 0x0002}, {0x2e, 0x0c58},
+	{0x5b, 0x0001}, {0xc8, 0x9f0b}, {0xf0, 0x0001},
+	{0x9b, 0x5300}, {0xf0, 0x0000}, {0x2b, 0x0020},
+	{0x2c, 0x002a}, {0x2d, 0x0032}, {0x2e, 0x0020},
+	{0x09, 0x01dc}, {0x01, 0x000c}, {0x02, 0x0020},
+	{0x03, 0x01e0}, {0x04, 0x0280}, {0x06, 0x000c},
+	{0x05, 0x0098}, {0x20, 0x0703}, {0x09, 0x01f2},
+	{0x2b, 0x00a0}, {0x2c, 0x00a0}, {0x2d, 0x00a0},
+	{0x2e, 0x00a0}, {0x01, 0x000c}, {0x02, 0x0020},
+	{0x03, 0x01e0}, {0x04, 0x0280}, {0x06, 0x000c},
+	{0x05, 0x0098}, {0x09, 0x01c1}, {0x2b, 0x00ae},
+	{0x2c, 0x00ae}, {0x2d, 0x00ae}, {0x2e, 0x00ae},
+};
+
+static u16 mt9v111_init[][2] = {
+	{0x01, 0x0004}, {0x0d, 0x0001}, {0x0d, 0x0000},
+	{0x01, 0x0001}, {0x02, 0x0016}, {0x03, 0x01e1},
+	{0x04, 0x0281}, {0x05, 0x0004}, {0x07, 0x3002},
+	{0x21, 0x0000}, {0x25, 0x4024}, {0x26, 0xff03},
+	{0x27, 0xff10}, {0x2b, 0x7828}, {0x2c, 0xb43c},
+	{0x2d, 0xf0a0},	{0x2e, 0x0c64},	{0x2f, 0x0064},
+	{0x67, 0x4010},	{0x06, 0x301e},	{0x08, 0x0480},
+	{0x01, 0x0004},	{0x02, 0x0016}, {0x03, 0x01e6},
+	{0x04, 0x0286},	{0x05, 0x0004}, {0x06, 0x0000},
+	{0x07, 0x3002},	{0x08, 0x0008}, {0x0c, 0x0000},
+	{0x0d, 0x0000}, {0x0e, 0x0000}, {0x0f, 0x0000},
+	{0x10, 0x0000},	{0x11, 0x0000},	{0x12, 0x00b0},
+	{0x13, 0x007c},	{0x14, 0x0000}, {0x15, 0x0000},
+	{0x16, 0x0000}, {0x17, 0x0000},	{0x18, 0x0000},
+	{0x19, 0x0000},	{0x1a, 0x0000},	{0x1b, 0x0000},
+	{0x1c, 0x0000},	{0x1d, 0x0000},	{0x30, 0x0000},
+	{0x30, 0x0005},	{0x31, 0x0000},	{0x02, 0x0016},
+	{0x03, 0x01e1},	{0x04, 0x0281}, {0x05, 0x0004},
+	{0x06, 0x0000},	{0x07, 0x3002},	{0x06, 0x002d},
+	{0x05, 0x0004},	{0x09, 0x0064},	{0x2b, 0x00a0},
+	{0x2c, 0x00a0},	{0x2d, 0x00a0},	{0x2e, 0x00a0},
+	{0x02, 0x0016},	{0x03, 0x01e1},	{0x04, 0x0281},
+	{0x05, 0x0004},	{0x06, 0x002d},	{0x07, 0x3002},
+	{0x0e, 0x0008},	{0x06, 0x002d},	{0x05, 0x0004},
+};
+
+static u16 mt9v011_init[][2] = {
+	{0x07, 0x0002},	{0x0d, 0x0001},	{0x0d, 0x0000},
+	{0x01, 0x0008},	{0x02, 0x0016},	{0x03, 0x01e1},
+	{0x04, 0x0281},	{0x05, 0x0083},	{0x06, 0x0006},
+	{0x0d, 0x0002}, {0x0a, 0x0000},	{0x0b, 0x0000},
+	{0x0c, 0x0000},	{0x0d, 0x0000},	{0x0e, 0x0000},
+	{0x0f, 0x0000},	{0x10, 0x0000},	{0x11, 0x0000},
+	{0x12, 0x0000},	{0x13, 0x0000},	{0x14, 0x0000},
+	{0x15, 0x0000},	{0x16, 0x0000},	{0x17, 0x0000},
+	{0x18, 0x0000},	{0x19, 0x0000},	{0x1a, 0x0000},
+	{0x1b, 0x0000},	{0x1c, 0x0000},	{0x1d, 0x0000},
+	{0x32, 0x0000},	{0x20, 0x1101},	{0x21, 0x0000},
+	{0x22, 0x0000},	{0x23, 0x0000},	{0x24, 0x0000},
+	{0x25, 0x0000},	{0x26, 0x0000},	{0x27, 0x0024},
+	{0x2f, 0xf7b0},	{0x30, 0x0005},	{0x31, 0x0000},
+	{0x32, 0x0000},	{0x33, 0x0000},	{0x34, 0x0100},
+	{0x3d, 0x068f},	{0x40, 0x01e0},	{0x41, 0x00d1},
+	{0x44, 0x0082},	{0x5a, 0x0000},	{0x5b, 0x0000},
+	{0x5c, 0x0000},	{0x5d, 0x0000},	{0x5e, 0x0000},
+	{0x5f, 0xa31d},	{0x62, 0x0611},	{0x0a, 0x0000},
+	{0x06, 0x0029},	{0x05, 0x0009},	{0x20, 0x1101},
+	{0x20, 0x1101},	{0x09, 0x0064},	{0x07, 0x0003},
+	{0x2b, 0x0033},	{0x2c, 0x00a0},	{0x2d, 0x00a0},
+	{0x2e, 0x0033},	{0x07, 0x0002},	{0x06, 0x0000},
+	{0x06, 0x0029},	{0x05, 0x0009},
+};
+
+static u16 mt9m001_init[][2] = {
+	{0x0d, 0x0001}, {0x0d, 0x0000}, {0x01, 0x000e},
+	{0x02, 0x0014}, {0x03, 0x03c1}, {0x04, 0x0501},
+	{0x05, 0x0083}, {0x06, 0x0006}, {0x0d, 0x0002},
+	{0x0a, 0x0000}, {0x0c, 0x0000}, {0x11, 0x0000},
+	{0x1e, 0x8000}, {0x5f, 0x8904}, {0x60, 0x0000},
+	{0x61, 0x0000}, {0x62, 0x0498}, {0x63, 0x0000},
+	{0x64, 0x0000}, {0x20, 0x111d}, {0x06, 0x00f2},
+	{0x05, 0x0013}, {0x09, 0x10f2}, {0x07, 0x0003},
+	{0x2b, 0x002a}, {0x2d, 0x002a}, {0x2c, 0x002a},
+	{0x2e, 0x0029}, {0x07, 0x0002},
+};
+
+static u16 mt9m111_init[][2] = {
+	{0xf0, 0x0000}, {0x0d, 0x0008}, {0x0d, 0x0009},
+	{0x0d, 0x0008}, {0xf0, 0x0001}, {0x3a, 0x4300},
+	{0x9b, 0x4300}, {0xa1, 0x0280}, {0xa4, 0x0200},
+	{0x06, 0x308e}, {0xf0, 0x0000},
+};
+
+static u8 hv7131r_init[][2] = {
+	{0x02, 0x08}, {0x02, 0x00}, {0x01, 0x08},
+	{0x02, 0x00}, {0x20, 0x00}, {0x21, 0xd0},
+	{0x22, 0x00}, {0x23, 0x09}, {0x01, 0x08},
+	{0x01, 0x08}, {0x01, 0x08}, {0x25, 0x07},
+	{0x26, 0xc3}, {0x27, 0x50}, {0x30, 0x62},
+	{0x31, 0x10}, {0x32, 0x06}, {0x33, 0x10},
+	{0x20, 0x00}, {0x21, 0xd0}, {0x22, 0x00},
+	{0x23, 0x09}, {0x01, 0x08},
+};
+
+int reg_r(struct gspca_dev *gspca_dev, u16 reg, u16 length)
+{
+	struct usb_device *dev = gspca_dev->dev;
+	int result;
+	result = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0),
+			0x00,
+			USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
+			reg,
+			0x00,
+			gspca_dev->usb_buf,
+			length,
+			500);
+	if (unlikely(result < 0 || result != length)) {
+		err("Read register failed 0x%02X", reg);
+		return -EIO;
+	}
+	return 0;
+}
+
+int reg_w(struct gspca_dev *gspca_dev, u16 reg, const u8 *buffer, int length)
+{
+	struct usb_device *dev = gspca_dev->dev;
+	int result;
+	memcpy(gspca_dev->usb_buf, buffer, length);
+	result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
+			0x08,
+			USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
+			reg,
+			0x00,
+			gspca_dev->usb_buf,
+			length,
+			500);
+	if (unlikely(result < 0 || result != length)) {
+		err("Write register failed index 0x%02X", reg);
+		return -EIO;
+	}
+	return 0;
+}
+
+int reg_w1(struct gspca_dev *gspca_dev, u16 reg, const u8 value)
+{
+	u8 data[1] = {value};
+	return reg_w(gspca_dev, reg, data, 1);
+}
+
+int i2c_w(struct gspca_dev *gspca_dev, const u8 *buffer)
+{
+	int i;
+	reg_w(gspca_dev, 0x10c0, buffer, 8);
+	for (i = 0; i < 5; i++) {
+		reg_r(gspca_dev, 0x10c0, 1);
+		if (gspca_dev->usb_buf[0] & 0x04) {
+			if (gspca_dev->usb_buf[0] & 0x08)
+				return -1;
+			return 0;
+		}
+		msleep(1);
+	}
+	return -1;
+}
+
+int i2c_w1(struct gspca_dev *gspca_dev, u8 reg, u8 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	u8 row[8];
+
+	/*
+	 * from the point of view of the bridge, the length
+	 * includes the address
+	 */
+	row[0] = 0x81 | (2 << 4);
+	row[1] = sd->i2c_addr;
+	row[2] = reg;
+	row[3] = val;
+	row[4] = 0x00;
+	row[5] = 0x00;
+	row[6] = 0x00;
+	row[7] = 0x10;
+
+	return i2c_w(gspca_dev, row);
+}
+
+int i2c_w2(struct gspca_dev *gspca_dev, u8 reg, u16 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 row[8];
+
+	/*
+	 * from the point of view of the bridge, the length
+	 * includes the address
+	 */
+	row[0] = 0x81 | (3 << 4);
+	row[1] = sd->i2c_addr;
+	row[2] = reg;
+	row[3] = (val >> 8) & 0xff;
+	row[4] = val & 0xff;
+	row[5] = 0x00;
+	row[6] = 0x00;
+	row[7] = 0x10;
+
+	return i2c_w(gspca_dev, row);
+}
+
+int i2c_r1(struct gspca_dev *gspca_dev, u8 reg, u8 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 row[8];
+
+	row[0] = 0x81 | 0x10;
+	row[1] = sd->i2c_addr;
+	row[2] = reg;
+	row[3] = 0;
+	row[4] = 0;
+	row[5] = 0;
+	row[6] = 0;
+	row[7] = 0x10;
+	reg_w(gspca_dev, 0x10c0, row, 8);
+	msleep(1);
+	row[0] = 0x81 | (2 << 4) | 0x02;
+	row[2] = 0;
+	reg_w(gspca_dev, 0x10c0, row, 8);
+	msleep(1);
+	reg_r(gspca_dev, 0x10c2, 5);
+	*val = gspca_dev->usb_buf[3];
+	return 0;
+}
+
+int i2c_r2(struct gspca_dev *gspca_dev, u8 reg, u16 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 row[8];
+
+	row[0] = 0x81 | 0x10;
+	row[1] = sd->i2c_addr;
+	row[2] = reg;
+	row[3] = 0;
+	row[4] = 0;
+	row[5] = 0;
+	row[6] = 0;
+	row[7] = 0x10;
+	reg_w(gspca_dev, 0x10c0, row, 8);
+	msleep(1);
+	row[0] = 0x81 | (3 << 4) | 0x02;
+	row[2] = 0;
+	reg_w(gspca_dev, 0x10c0, row, 8);
+	msleep(1);
+	reg_r(gspca_dev, 0x10c2, 5);
+	*val = (gspca_dev->usb_buf[2] << 8) | gspca_dev->usb_buf[3];
+	return 0;
+}
+
+static int ov9650_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(ov9650_init); i++) {
+		if (i2c_w1(gspca_dev, ov9650_init[i][0],
+				ov9650_init[i][1]) < 0) {
+			err("OV9650 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	sd->hstart = 1;
+	sd->vstart = 7;
+	return 0;
+}
+
+static int ov9655_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(ov9655_init); i++) {
+		if (i2c_w1(gspca_dev, ov9655_init[i][0],
+				ov9655_init[i][1]) < 0) {
+			err("OV9655 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	/* disable hflip and vflip */
+	gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX);
+	sd->hstart = 0;
+	sd->vstart = 7;
+	return 0;
+}
+
+static int soi968_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(soi968_init); i++) {
+		if (i2c_w1(gspca_dev, soi968_init[i][0],
+				soi968_init[i][1]) < 0) {
+			err("SOI968 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	/* disable hflip and vflip */
+	gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX);
+	sd->hstart = 60;
+	sd->vstart = 11;
+	return 0;
+}
+
+static int ov7660_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(ov7660_init); i++) {
+		if (i2c_w1(gspca_dev, ov7660_init[i][0],
+				ov7660_init[i][1]) < 0) {
+			err("OV7660 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	/* disable hflip and vflip */
+	gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX);
+	sd->hstart = 1;
+	sd->vstart = 1;
+	return 0;
+}
+
+static int ov7670_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(ov7670_init); i++) {
+		if (i2c_w1(gspca_dev, ov7670_init[i][0],
+				ov7670_init[i][1]) < 0) {
+			err("OV7670 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	/* disable hflip and vflip */
+	gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX);
+	sd->hstart = 0;
+	sd->vstart = 1;
+	return 0;
+}
+
+static int mt9v_init_sensor(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+	u16 value;
+	int ret;
+
+	sd->i2c_addr = 0x5d;
+	ret = i2c_r2(gspca_dev, 0xff, &value);
+	if ((ret == 0) && (value == 0x8243)) {
+		for (i = 0; i < ARRAY_SIZE(mt9v011_init); i++) {
+			if (i2c_w2(gspca_dev, mt9v011_init[i][0],
+					mt9v011_init[i][1]) < 0) {
+				err("MT9V011 sensor initialization failed");
+				return -ENODEV;
+			}
+		}
+		sd->hstart = 2;
+		sd->vstart = 2;
+		sd->sensor = SENSOR_MT9V011;
+		info("MT9V011 sensor detected");
+		return 0;
+	}
+
+	sd->i2c_addr = 0x5c;
+	i2c_w2(gspca_dev, 0x01, 0x0004);
+	ret = i2c_r2(gspca_dev, 0xff, &value);
+	if ((ret == 0) && (value == 0x823a)) {
+		for (i = 0; i < ARRAY_SIZE(mt9v111_init); i++) {
+			if (i2c_w2(gspca_dev, mt9v111_init[i][0],
+					mt9v111_init[i][1]) < 0) {
+				err("MT9V111 sensor initialization failed");
+				return -ENODEV;
+			}
+		}
+		sd->hstart = 2;
+		sd->vstart = 2;
+		sd->sensor = SENSOR_MT9V111;
+		info("MT9V111 sensor detected");
+		return 0;
+	}
+
+	sd->i2c_addr = 0x5d;
+	ret = i2c_w2(gspca_dev, 0xf0, 0x0000);
+	if (ret < 0) {
+		sd->i2c_addr = 0x48;
+		i2c_w2(gspca_dev, 0xf0, 0x0000);
+	}
+	ret = i2c_r2(gspca_dev, 0x00, &value);
+	if ((ret == 0) && (value == 0x1229)) {
+		for (i = 0; i < ARRAY_SIZE(mt9v112_init); i++) {
+			if (i2c_w2(gspca_dev, mt9v112_init[i][0],
+					mt9v112_init[i][1]) < 0) {
+				err("MT9V112 sensor initialization failed");
+				return -ENODEV;
+			}
+		}
+		sd->hstart = 6;
+		sd->vstart = 2;
+		sd->sensor = SENSOR_MT9V112;
+		info("MT9V112 sensor detected");
+		return 0;
+	}
+
+	return -ENODEV;
+}
+
+static int mt9m111_init_sensor(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(mt9m111_init); i++) {
+		if (i2c_w2(gspca_dev, mt9m111_init[i][0],
+				mt9m111_init[i][1]) < 0) {
+			err("MT9M111 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	sd->hstart = 0;
+	sd->vstart = 2;
+	return 0;
+}
+
+static int mt9m001_init_sensor(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+	for (i = 0; i < ARRAY_SIZE(mt9m001_init); i++) {
+		if (i2c_w2(gspca_dev, mt9m001_init[i][0],
+				mt9m001_init[i][1]) < 0) {
+			err("MT9M001 sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	/* disable hflip and vflip */
+	gspca_dev->ctrl_dis = (1 << HFLIP_IDX) | (1 << VFLIP_IDX);
+	sd->hstart = 2;
+	sd->vstart = 2;
+	return 0;
+}
+
+static int hv7131r_init_sensor(struct gspca_dev *gspca_dev)
+{
+	int i;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	for (i = 0; i < ARRAY_SIZE(hv7131r_init); i++) {
+		if (i2c_w1(gspca_dev, hv7131r_init[i][0],
+				hv7131r_init[i][1]) < 0) {
+			err("HV7131R Sensor initialization failed");
+			return -ENODEV;
+		}
+	}
+	sd->hstart = 0;
+	sd->vstart = 1;
+	return 0;
+}
+
+#ifdef CONFIG_USB_GSPCA_SN9C20X_EVDEV
+static int input_kthread(void *data)
+{
+	struct gspca_dev *gspca_dev = (struct gspca_dev *)data;
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	DECLARE_WAIT_QUEUE_HEAD(wait);
+	set_freezable();
+	for (;;) {
+		if (kthread_should_stop())
+			break;
+
+		if (reg_r(gspca_dev, 0x1005, 1) < 0)
+			continue;
+
+		input_report_key(sd->input_dev,
+				 KEY_CAMERA,
+				 gspca_dev->usb_buf[0] & sd->input_gpio);
+		input_sync(sd->input_dev);
+
+		wait_event_freezable_timeout(wait,
+					     kthread_should_stop(),
+					     msecs_to_jiffies(100));
+	}
+	return 0;
+}
+
+
+static int sn9c20x_input_init(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	if (sd->input_gpio == 0)
+		return 0;
+
+	sd->input_dev = input_allocate_device();
+	if (!sd->input_dev)
+		return -ENOMEM;
+
+	sd->input_dev->name = "SN9C20X Webcam";
+
+	sd->input_dev->phys = kasprintf(GFP_KERNEL, "usb-%s-%s",
+					 gspca_dev->dev->bus->bus_name,
+					 gspca_dev->dev->devpath);
+
+	if (!sd->input_dev->phys)
+		return -ENOMEM;
+
+	usb_to_input_id(gspca_dev->dev, &sd->input_dev->id);
+	sd->input_dev->dev.parent = &gspca_dev->dev->dev;
+
+	set_bit(EV_KEY, sd->input_dev->evbit);
+	set_bit(KEY_CAMERA, sd->input_dev->keybit);
+
+	if (input_register_device(sd->input_dev))
+		return -EINVAL;
+
+	sd->input_task = kthread_run(input_kthread, gspca_dev, "sn9c20x/%d",
+				     gspca_dev->vdev.minor);
+
+	if (IS_ERR(sd->input_task))
+		return -EINVAL;
+
+	return 0;
+}
+
+static void sn9c20x_input_cleanup(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	if (sd->input_task != NULL && !IS_ERR(sd->input_task))
+		kthread_stop(sd->input_task);
+
+	if (sd->input_dev != NULL) {
+		input_unregister_device(sd->input_dev);
+		kfree(sd->input_dev->phys);
+		input_free_device(sd->input_dev);
+		sd->input_dev = NULL;
+	}
+}
+#endif
+
+static int set_cmatrix(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	s32 hue_coord, hue_index = 180 + sd->hue;
+	u8 cmatrix[21];
+	memset(cmatrix, 0, 21);
+
+	cmatrix[2] = (sd->contrast * 0x25 / 0x100) + 0x26;
+	cmatrix[0] = 0x13 + (cmatrix[2] - 0x26) * 0x13 / 0x25;
+	cmatrix[4] = 0x07 + (cmatrix[2] - 0x26) * 0x07 / 0x25;
+	cmatrix[18] = sd->brightness - 0x80;
+
+	hue_coord = (hsv_red_x[hue_index] * sd->saturation) >> 8;
+	cmatrix[6] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[7] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	hue_coord = (hsv_red_y[hue_index] * sd->saturation) >> 8;
+	cmatrix[8] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[9] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	hue_coord = (hsv_green_x[hue_index] * sd->saturation) >> 8;
+	cmatrix[10] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[11] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	hue_coord = (hsv_green_y[hue_index] * sd->saturation) >> 8;
+	cmatrix[12] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[13] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	hue_coord = (hsv_blue_x[hue_index] * sd->saturation) >> 8;
+	cmatrix[14] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[15] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	hue_coord = (hsv_blue_y[hue_index] * sd->saturation) >> 8;
+	cmatrix[16] = (unsigned char)(hue_coord & 0xff);
+	cmatrix[17] = (unsigned char)((hue_coord >> 8) & 0x0f);
+
+	return reg_w(gspca_dev, 0x10e1, cmatrix, 21);
+}
+
+static int set_gamma(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 gamma[17];
+	u8 gval = sd->gamma * 0xb8 / 0x100;
+
+
+	gamma[0] = 0x0a;
+	gamma[1] = 0x13 + (gval * (0xcb - 0x13) / 0xb8);
+	gamma[2] = 0x25 + (gval * (0xee - 0x25) / 0xb8);
+	gamma[3] = 0x37 + (gval * (0xfa - 0x37) / 0xb8);
+	gamma[4] = 0x45 + (gval * (0xfc - 0x45) / 0xb8);
+	gamma[5] = 0x55 + (gval * (0xfb - 0x55) / 0xb8);
+	gamma[6] = 0x65 + (gval * (0xfc - 0x65) / 0xb8);
+	gamma[7] = 0x74 + (gval * (0xfd - 0x74) / 0xb8);
+	gamma[8] = 0x83 + (gval * (0xfe - 0x83) / 0xb8);
+	gamma[9] = 0x92 + (gval * (0xfc - 0x92) / 0xb8);
+	gamma[10] = 0xa1 + (gval * (0xfc - 0xa1) / 0xb8);
+	gamma[11] = 0xb0 + (gval * (0xfc - 0xb0) / 0xb8);
+	gamma[12] = 0xbf + (gval * (0xfb - 0xbf) / 0xb8);
+	gamma[13] = 0xce + (gval * (0xfb - 0xce) / 0xb8);
+	gamma[14] = 0xdf + (gval * (0xfd - 0xdf) / 0xb8);
+	gamma[15] = 0xea + (gval * (0xf9 - 0xea) / 0xb8);
+	gamma[16] = 0xf5;
+
+	return reg_w(gspca_dev, 0x1190, gamma, 17);
+}
+
+static int set_redblue(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	reg_w1(gspca_dev, 0x118c, sd->red);
+	reg_w1(gspca_dev, 0x118f, sd->blue);
+	return 0;
+}
+
+static int set_hvflip(struct gspca_dev *gspca_dev)
+{
+	u8 value, tslb;
+	u16 value2;
+	struct sd *sd = (struct sd *) gspca_dev;
+	switch (sd->sensor) {
+	case SENSOR_OV9650:
+		i2c_r1(gspca_dev, 0x1e, &value);
+		value &= ~0x30;
+		tslb = 0x01;
+		if (sd->hflip)
+			value |= 0x20;
+		if (sd->vflip) {
+			value |= 0x10;
+			tslb = 0x49;
+		}
+		i2c_w1(gspca_dev, 0x1e, value);
+		i2c_w1(gspca_dev, 0x3a, tslb);
+		break;
+	case SENSOR_MT9V111:
+	case SENSOR_MT9V011:
+		i2c_r2(gspca_dev, 0x20, &value2);
+		value2 &= ~0xc0a0;
+		if (sd->hflip)
+			value2 |= 0x8080;
+		if (sd->vflip)
+			value2 |= 0x4020;
+		i2c_w2(gspca_dev, 0x20, value2);
+		break;
+	case SENSOR_MT9M111:
+	case SENSOR_MT9V112:
+		i2c_r2(gspca_dev, 0x20, &value2);
+		value2 &= ~0x0003;
+		if (sd->hflip)
+			value2 |= 0x0002;
+		if (sd->vflip)
+			value2 |= 0x0001;
+		i2c_w2(gspca_dev, 0x20, value2);
+		break;
+	case SENSOR_HV7131R:
+		i2c_r1(gspca_dev, 0x01, &value);
+		value &= ~0x03;
+		if (sd->vflip)
+			value |= 0x01;
+		if (sd->hflip)
+			value |= 0x02;
+		i2c_w1(gspca_dev, 0x01, value);
+		break;
+	}
+	return 0;
+}
+
+static int set_exposure(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 exp[8] = {0x81, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e};
+	switch (sd->sensor) {
+	case SENSOR_OV7660:
+	case SENSOR_OV7670:
+	case SENSOR_SOI968:
+	case SENSOR_OV9655:
+	case SENSOR_OV9650:
+		exp[0] |= (3 << 4);
+		exp[2] = 0x2d;
+		exp[3] = sd->exposure & 0xff;
+		exp[4] = sd->exposure >> 8;
+		break;
+	case SENSOR_MT9M001:
+	case SENSOR_MT9M111:
+	case SENSOR_MT9V112:
+	case SENSOR_MT9V111:
+	case SENSOR_MT9V011:
+		exp[0] |= (3 << 4);
+		exp[2] = 0x09;
+		exp[3] = sd->exposure >> 8;
+		exp[4] = sd->exposure & 0xff;
+		break;
+	case SENSOR_HV7131R:
+		exp[0] |= (4 << 4);
+		exp[2] = 0x25;
+		exp[3] = ((sd->exposure * 0xffffff) / 0xffff) >> 16;
+		exp[4] = ((sd->exposure * 0xffffff) / 0xffff) >> 8;
+		exp[5] = ((sd->exposure * 0xffffff) / 0xffff) & 0xff;
+		break;
+	}
+	i2c_w(gspca_dev, exp);
+	return 0;
+}
+
+static int set_gain(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 gain[8] = {0x81, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d};
+	switch (sd->sensor) {
+	case SENSOR_OV7660:
+	case SENSOR_OV7670:
+	case SENSOR_SOI968:
+	case SENSOR_OV9655:
+	case SENSOR_OV9650:
+		gain[0] |= (2 << 4);
+		gain[3] = ov_gain[sd->gain];
+		break;
+	case SENSOR_MT9V011:
+	case SENSOR_MT9V111:
+		gain[0] |= (3 << 4);
+		gain[2] = 0x35;
+		gain[3] = micron1_gain[sd->gain] >> 8;
+		gain[4] = micron1_gain[sd->gain] & 0xff;
+		break;
+	case SENSOR_MT9V112:
+	case SENSOR_MT9M111:
+		gain[0] |= (3 << 4);
+		gain[2] = 0x2f;
+		gain[3] = micron1_gain[sd->gain] >> 8;
+		gain[4] = micron1_gain[sd->gain] & 0xff;
+		break;
+	case SENSOR_MT9M001:
+		gain[0] |= (3 << 4);
+		gain[2] = 0x2f;
+		gain[3] = micron2_gain[sd->gain] >> 8;
+		gain[4] = micron2_gain[sd->gain] & 0xff;
+		break;
+	case SENSOR_HV7131R:
+		gain[0] |= (2 << 4);
+		gain[2] = 0x30;
+		gain[3] = hv7131r_gain[sd->gain];
+		break;
+	}
+	i2c_w(gspca_dev, gain);
+	return 0;
+}
+
+static int sd_setbrightness(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->brightness = val;
+	if (gspca_dev->streaming)
+		return set_cmatrix(gspca_dev);
+	return 0;
+}
+
+static int sd_getbrightness(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->brightness;
+	return 0;
+}
+
+
+static int sd_setcontrast(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->contrast = val;
+	if (gspca_dev->streaming)
+		return set_cmatrix(gspca_dev);
+	return 0;
+}
+
+static int sd_getcontrast(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->contrast;
+	return 0;
+}
+
+static int sd_setsaturation(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->saturation = val;
+	if (gspca_dev->streaming)
+		return set_cmatrix(gspca_dev);
+	return 0;
+}
+
+static int sd_getsaturation(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->saturation;
+	return 0;
+}
+
+static int sd_sethue(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->hue = val;
+	if (gspca_dev->streaming)
+		return set_cmatrix(gspca_dev);
+	return 0;
+}
+
+static int sd_gethue(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->hue;
+	return 0;
+}
+
+static int sd_setgamma(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->gamma = val;
+	if (gspca_dev->streaming)
+		return set_gamma(gspca_dev);
+	return 0;
+}
+
+static int sd_getgamma(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->gamma;
+	return 0;
+}
+
+static int sd_setredbalance(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->red = val;
+	if (gspca_dev->streaming)
+		return set_redblue(gspca_dev);
+	return 0;
+}
+
+static int sd_getredbalance(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->red;
+	return 0;
+}
+
+static int sd_setbluebalance(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->blue = val;
+	if (gspca_dev->streaming)
+		return set_redblue(gspca_dev);
+	return 0;
+}
+
+static int sd_getbluebalance(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->blue;
+	return 0;
+}
+
+static int sd_sethflip(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->hflip = val;
+	if (gspca_dev->streaming)
+		return set_hvflip(gspca_dev);
+	return 0;
+}
+
+static int sd_gethflip(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->hflip;
+	return 0;
+}
+
+static int sd_setvflip(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->vflip = val;
+	if (gspca_dev->streaming)
+		return set_hvflip(gspca_dev);
+	return 0;
+}
+
+static int sd_getvflip(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->vflip;
+	return 0;
+}
+
+static int sd_setexposure(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->exposure = val;
+	if (gspca_dev->streaming)
+		return set_exposure(gspca_dev);
+	return 0;
+}
+
+static int sd_getexposure(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->exposure;
+	return 0;
+}
+
+static int sd_setgain(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	sd->gain = val;
+	if (gspca_dev->streaming)
+		return set_gain(gspca_dev);
+	return 0;
+}
+
+static int sd_getgain(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->gain;
+	return 0;
+}
+
+static int sd_setautoexposure(struct gspca_dev *gspca_dev, s32 val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	sd->auto_exposure = val;
+	return 0;
+}
+
+static int sd_getautoexposure(struct gspca_dev *gspca_dev, s32 *val)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	*val = sd->auto_exposure;
+	return 0;
+}
+
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+static int sd_dbg_g_register(struct gspca_dev *gspca_dev,
+			struct v4l2_dbg_register *reg)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	switch (reg->match.type) {
+	case V4L2_CHIP_MATCH_HOST:
+		if (reg->match.addr != 0)
+			return -EINVAL;
+		if (reg->reg < 0x1000 || reg->reg > 0x11ff)
+			return -EINVAL;
+		if (reg_r(gspca_dev, reg->reg, 1) < 0)
+			return -EINVAL;
+		reg->val = gspca_dev->usb_buf[0];
+		return 0;
+	case V4L2_CHIP_MATCH_I2C_ADDR:
+		if (reg->match.addr != sd->i2c_addr)
+			return -EINVAL;
+		if (sd->sensor >= SENSOR_MT9V011 &&
+		    sd->sensor <= SENSOR_MT9M111) {
+			if (i2c_r2(gspca_dev, reg->reg, (u16 *)&reg->val) < 0)
+				return -EINVAL;
+		} else {
+			if (i2c_r1(gspca_dev, reg->reg, (u8 *)&reg->val) < 0)
+				return -EINVAL;
+		}
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int sd_dbg_s_register(struct gspca_dev *gspca_dev,
+			struct v4l2_dbg_register *reg)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	switch (reg->match.type) {
+	case V4L2_CHIP_MATCH_HOST:
+		if (reg->match.addr != 0)
+			return -EINVAL;
+		if (reg->reg < 0x1000 || reg->reg > 0x11ff)
+			return -EINVAL;
+		if (reg_w1(gspca_dev, reg->reg, reg->val) < 0)
+			return -EINVAL;
+		return 0;
+	case V4L2_CHIP_MATCH_I2C_ADDR:
+		if (reg->match.addr != sd->i2c_addr)
+			return -EINVAL;
+		if (sd->sensor >= SENSOR_MT9V011 &&
+		    sd->sensor <= SENSOR_MT9M111) {
+			if (i2c_w2(gspca_dev, reg->reg, reg->val) < 0)
+				return -EINVAL;
+		} else {
+			if (i2c_w1(gspca_dev, reg->reg, reg->val) < 0)
+				return -EINVAL;
+		}
+		return 0;
+	}
+	return -EINVAL;
+}
+#endif
+
+static int sd_chip_ident(struct gspca_dev *gspca_dev,
+			struct v4l2_dbg_chip_ident *chip)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+
+	switch (chip->match.type) {
+	case V4L2_CHIP_MATCH_HOST:
+		if (chip->match.addr != 0)
+			return -EINVAL;
+		chip->revision = 0;
+		chip->ident = V4L2_IDENT_SN9C20X;
+		return 0;
+	case V4L2_CHIP_MATCH_I2C_ADDR:
+		if (chip->match.addr != sd->i2c_addr)
+			return -EINVAL;
+		chip->revision = 0;
+		chip->ident = i2c_ident[sd->sensor];
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int sd_config(struct gspca_dev *gspca_dev,
+			const struct usb_device_id *id)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	struct cam *cam;
+
+	cam = &gspca_dev->cam;
+
+	sd->sensor = (id->driver_info >> 8) & 0xff;
+	sd->i2c_addr = id->driver_info & 0xff;
+
+	switch (sd->sensor) {
+	case SENSOR_OV9650:
+		cam->cam_mode = sxga_mode;
+		cam->nmodes = ARRAY_SIZE(sxga_mode);
+		break;
+	default:
+		cam->cam_mode = vga_mode;
+		cam->nmodes = ARRAY_SIZE(vga_mode);
+	}
+
+	sd->old_step = 0;
+	sd->older_step = 0;
+	sd->exposure_step = 16;
+
+	sd->brightness = BRIGHTNESS_DEFAULT;
+	sd->contrast = CONTRAST_DEFAULT;
+	sd->saturation = SATURATION_DEFAULT;
+	sd->hue = HUE_DEFAULT;
+	sd->gamma = GAMMA_DEFAULT;
+	sd->red = RED_DEFAULT;
+	sd->blue = BLUE_DEFAULT;
+
+	sd->hflip = HFLIP_DEFAULT;
+	sd->vflip = VFLIP_DEFAULT;
+	sd->exposure = EXPOSURE_DEFAULT;
+	sd->gain = GAIN_DEFAULT;
+	sd->auto_exposure = AUTO_EXPOSURE_DEFAULT;
+
+	sd->quality = 95;
+
+#ifdef CONFIG_USB_GSPCA_SN9C20X_EVDEV
+	sd->input_gpio = (id->driver_info >> 16) & 0xff;
+	if (sn9c20x_input_init(gspca_dev) < 0)
+		return -ENODEV;
+#endif
+	return 0;
+}
+
+static int sd_init(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int i;
+	u8 value;
+	u8 i2c_init[9] =
+		{0x80, sd->i2c_addr, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03};
+
+	for (i = 0; i < ARRAY_SIZE(bridge_init); i++) {
+		value = bridge_init[i][1];
+		if (reg_w(gspca_dev, bridge_init[i][0], &value, 1) < 0) {
+			err("Device initialization failed");
+			return -ENODEV;
+		}
+	}
+
+	if (reg_w(gspca_dev, 0x10c0, i2c_init, 9) < 0) {
+		err("Device initialization failed");
+		return -ENODEV;
+	}
+
+	switch (sd->sensor) {
+	case SENSOR_OV9650:
+		if (ov9650_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("OV9650 sensor detected");
+		break;
+	case SENSOR_OV9655:
+		if (ov9655_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("OV9655 sensor detected");
+		break;
+	case SENSOR_SOI968:
+		if (soi968_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("SOI968 sensor detected");
+		break;
+	case SENSOR_OV7660:
+		if (ov7660_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("OV7660 sensor detected");
+		break;
+	case SENSOR_OV7670:
+		if (ov7670_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("OV7670 sensor detected");
+		break;
+	case SENSOR_MT9VPRB:
+		if (mt9v_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		break;
+	case SENSOR_MT9M111:
+		if (mt9m111_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("MT9M111 sensor detected");
+		break;
+	case SENSOR_MT9M001:
+		if (mt9m001_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("MT9M001 sensor detected");
+		break;
+	case SENSOR_HV7131R:
+		if (hv7131r_init_sensor(gspca_dev) < 0)
+			return -ENODEV;
+		info("HV7131R sensor detected");
+		break;
+	default:
+		info("Unsupported Sensor");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void configure_sensor_output(struct gspca_dev *gspca_dev, int mode)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	u8 value;
+	switch (sd->sensor) {
+	case SENSOR_OV9650:
+		if (mode & MODE_SXGA) {
+			i2c_w1(gspca_dev, 0x17, 0x1b);
+			i2c_w1(gspca_dev, 0x18, 0xbc);
+			i2c_w1(gspca_dev, 0x19, 0x01);
+			i2c_w1(gspca_dev, 0x1a, 0x82);
+			i2c_r1(gspca_dev, 0x12, &value);
+			i2c_w1(gspca_dev, 0x12, value & 0x07);
+		} else {
+			i2c_w1(gspca_dev, 0x17, 0x24);
+			i2c_w1(gspca_dev, 0x18, 0xc5);
+			i2c_w1(gspca_dev, 0x19, 0x00);
+			i2c_w1(gspca_dev, 0x1a, 0x3c);
+			i2c_r1(gspca_dev, 0x12, &value);
+			i2c_w1(gspca_dev, 0x12, (value & 0x7) | 0x40);
+		}
+		break;
+	}
+}
+
+#define HW_WIN(mode, hstart, vstart) \
+((const u8 []){hstart & 0xff, hstart >> 8, \
+vstart & 0xff, vstart >> 8, \
+(mode & MODE_SXGA ? 1280 >> 4 : 640 >> 4), \
+(mode & MODE_SXGA ? 1024 >> 3 : 480 >> 3)})
+
+#define CLR_WIN(width, height) \
+((const u8 [])\
+{0, width >> 2, 0, height >> 1,\
+((width >> 10) & 0x01) | ((height >> 8) & 0x6)})
+
+static int sd_start(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv;
+	int width = gspca_dev->width;
+	int height = gspca_dev->height;
+	u8 fmt, scale = 0;
+
+	sd->jpeg_hdr = kmalloc(JPEG_HDR_SZ, GFP_KERNEL);
+	if (sd->jpeg_hdr == NULL)
+		return -ENOMEM;
+
+	jpeg_define(sd->jpeg_hdr, height, width,
+			0x21);
+	jpeg_set_qual(sd->jpeg_hdr, sd->quality);
+
+	if (mode & MODE_RAW)
+		fmt = 0x2d;
+	else if (mode & MODE_JPEG)
+		fmt = 0x2c;
+	else
+		fmt = 0x2f;
+
+	switch (mode & 0x0f) {
+	case 3:
+		scale = 0xc0;
+		info("Set 1280x1024");
+		break;
+	case 2:
+		scale = 0x80;
+		info("Set 640x480");
+		break;
+	case 1:
+		scale = 0x90;
+		info("Set 320x240");
+		break;
+	case 0:
+		scale = 0xa0;
+		info("Set 160x120");
+		break;
+	}
+
+	configure_sensor_output(gspca_dev, mode);
+	reg_w(gspca_dev, 0x1100, sd->jpeg_hdr + JPEG_QT0_OFFSET, 64);
+	reg_w(gspca_dev, 0x1140, sd->jpeg_hdr + JPEG_QT1_OFFSET, 64);
+	reg_w(gspca_dev, 0x10fb, CLR_WIN(width, height), 5);
+	reg_w(gspca_dev, 0x1180, HW_WIN(mode, sd->hstart, sd->vstart), 6);
+	reg_w1(gspca_dev, 0x1189, scale);
+	reg_w1(gspca_dev, 0x10e0, fmt);
+
+	set_cmatrix(gspca_dev);
+	set_gamma(gspca_dev);
+	set_redblue(gspca_dev);
+	set_gain(gspca_dev);
+	set_exposure(gspca_dev);
+	set_hvflip(gspca_dev);
+
+	reg_r(gspca_dev, 0x1061, 1);
+	reg_w1(gspca_dev, 0x1061, gspca_dev->usb_buf[0] | 0x02);
+	return 0;
+}
+
+static void sd_stopN(struct gspca_dev *gspca_dev)
+{
+	reg_r(gspca_dev, 0x1061, 1);
+	reg_w1(gspca_dev, 0x1061, gspca_dev->usb_buf[0] & ~0x02);
+}
+
+static void sd_stop0(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	kfree(sd->jpeg_hdr);
+}
+
+static void do_autoexposure(struct gspca_dev *gspca_dev)
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int avg_lum, new_exp;
+
+	if (!sd->auto_exposure)
+		return;
+
+	avg_lum = atomic_read(&sd->avg_lum);
+
+	/*
+	 * some hardcoded values are present
+	 * like those for maximal/minimal exposure
+	 * and exposure steps
+	 */
+	if (avg_lum < MIN_AVG_LUM) {
+		if (sd->exposure > 0x1770)
+			return;
+
+		new_exp = sd->exposure + sd->exposure_step;
+		if (new_exp > 0x1770)
+			new_exp = 0x1770;
+		if (new_exp < 0x10)
+			new_exp = 0x10;
+		sd->exposure = new_exp;
+		set_exposure(gspca_dev);
+
+		sd->older_step = sd->old_step;
+		sd->old_step = 1;
+
+		if (sd->old_step ^ sd->older_step)
+			sd->exposure_step /= 2;
+		else
+			sd->exposure_step += 2;
+	}
+	if (avg_lum > MAX_AVG_LUM) {
+		if (sd->exposure < 0x10)
+			return;
+		new_exp = sd->exposure - sd->exposure_step;
+		if (new_exp > 0x1700)
+			new_exp = 0x1770;
+		if (new_exp < 0x10)
+			new_exp = 0x10;
+		sd->exposure = new_exp;
+		set_exposure(gspca_dev);
+		sd->older_step = sd->old_step;
+		sd->old_step = 0;
+
+		if (sd->old_step ^ sd->older_step)
+			sd->exposure_step /= 2;
+		else
+			sd->exposure_step += 2;
+	}
+}
+
+static void sd_pkt_scan(struct gspca_dev *gspca_dev,
+			struct gspca_frame *frame,	/* target */
+			u8 *data,			/* isoc packet */
+			int len)			/* iso packet length */
+{
+	struct sd *sd = (struct sd *) gspca_dev;
+	int avg_lum;
+	static unsigned char frame_header[] =
+		{0xff, 0xff, 0x00, 0xc4, 0xc4, 0x96};
+	if (len == 64 && memcmp(data, frame_header, 6) == 0) {
+		avg_lum = ((data[35] >> 2) & 3) |
+			   (data[20] << 2) |
+			   (data[19] << 10);
+		avg_lum += ((data[35] >> 4) & 3) |
+			    (data[22] << 2) |
+			    (data[21] << 10);
+		avg_lum += ((data[35] >> 6) & 3) |
+			    (data[24] << 2) |
+			    (data[23] << 10);
+		avg_lum += (data[36] & 3) |
+			   (data[26] << 2) |
+			   (data[25] << 10);
+		avg_lum += ((data[36] >> 2) & 3) |
+			    (data[28] << 2) |
+			    (data[27] << 10);
+		avg_lum += ((data[36] >> 4) & 3) |
+			    (data[30] << 2) |
+			    (data[29] << 10);
+		avg_lum += ((data[36] >> 6) & 3) |
+			    (data[32] << 2) |
+			    (data[31] << 10);
+		avg_lum += ((data[44] >> 4) & 3) |
+			    (data[34] << 2) |
+			    (data[33] << 10);
+		avg_lum >>= 9;
+		atomic_set(&sd->avg_lum, avg_lum);
+		gspca_frame_add(gspca_dev, LAST_PACKET,
+				frame, data, len);
+		return;
+	}
+	if (gspca_dev->last_packet_type == LAST_PACKET) {
+		if (gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv
+				& MODE_JPEG) {
+			gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				sd->jpeg_hdr, JPEG_HDR_SZ);
+			gspca_frame_add(gspca_dev, INTER_PACKET, frame,
+				data, len);
+		} else {
+			gspca_frame_add(gspca_dev, FIRST_PACKET, frame,
+				data, len);
+		}
+	} else {
+		gspca_frame_add(gspca_dev, INTER_PACKET, frame, data, len);
+	}
+}
+
+/* sub-driver description */
+static const struct sd_desc sd_desc = {
+	.name = MODULE_NAME,
+	.ctrls = sd_ctrls,
+	.nctrls = ARRAY_SIZE(sd_ctrls),
+	.config = sd_config,
+	.init = sd_init,
+	.start = sd_start,
+	.stopN = sd_stopN,
+	.stop0 = sd_stop0,
+	.pkt_scan = sd_pkt_scan,
+	.dq_callback = do_autoexposure,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.set_register = sd_dbg_s_register,
+	.get_register = sd_dbg_g_register,
+#endif
+	.get_chip_ident = sd_chip_ident,
+};
+
+#define SN9C20X(sensor, i2c_addr, button_mask) \
+	.driver_info =  (button_mask << 16) \
+			| (SENSOR_ ## sensor << 8) \
+			| (i2c_addr)
+
+static const __devinitdata struct usb_device_id device_table[] = {
+	{USB_DEVICE(0x0c45, 0x6240), SN9C20X(MT9M001, 0x5d, 0)},
+	{USB_DEVICE(0x0c45, 0x6242), SN9C20X(MT9M111, 0x5d, 0)},
+	{USB_DEVICE(0x0c45, 0x6248), SN9C20X(OV9655, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x624e), SN9C20X(SOI968, 0x30, 0x10)},
+	{USB_DEVICE(0x0c45, 0x624f), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x6251), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x6253), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x6260), SN9C20X(OV7670, 0x21, 0)},
+	{USB_DEVICE(0x0c45, 0x6270), SN9C20X(MT9VPRB, 0x00, 0)},
+	{USB_DEVICE(0x0c45, 0x627b), SN9C20X(OV7660, 0x21, 0)},
+	{USB_DEVICE(0x0c45, 0x627c), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0x0c45, 0x627f), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x6280), SN9C20X(MT9M001, 0x5d, 0)},
+	{USB_DEVICE(0x0c45, 0x6282), SN9C20X(MT9M111, 0x5d, 0)},
+	{USB_DEVICE(0x0c45, 0x6288), SN9C20X(OV9655, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x628e), SN9C20X(SOI968, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x628f), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x62a0), SN9C20X(OV7670, 0x21, 0)},
+	{USB_DEVICE(0x0c45, 0x62b0), SN9C20X(MT9VPRB, 0x00, 0)},
+	{USB_DEVICE(0x0c45, 0x62b3), SN9C20X(OV9655, 0x30, 0)},
+	{USB_DEVICE(0x0c45, 0x62bb), SN9C20X(OV7660, 0x21, 0)},
+	{USB_DEVICE(0x0c45, 0x62bc), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0x045e, 0x00f4), SN9C20X(OV9650, 0x30, 0)},
+	{USB_DEVICE(0x145f, 0x013d), SN9C20X(OV7660, 0x21, 0)},
+	{USB_DEVICE(0x0458, 0x7029), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0xa168, 0x0610), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0xa168, 0x0611), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0xa168, 0x0613), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0xa168, 0x0618), SN9C20X(HV7131R, 0x11, 0)},
+	{USB_DEVICE(0xa168, 0x0614), SN9C20X(MT9M111, 0x5d, 0)},
+	{USB_DEVICE(0xa168, 0x0615), SN9C20X(MT9M111, 0x5d, 0)},
+	{USB_DEVICE(0xa168, 0x0617), SN9C20X(MT9M111, 0x5d, 0)},
+	{}
+};
+MODULE_DEVICE_TABLE(usb, device_table);
+
+/* -- device connect -- */
+static int sd_probe(struct usb_interface *intf,
+		    const struct usb_device_id *id)
+{
+	return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd),
+				THIS_MODULE);
+}
+
+static void sd_disconnect(struct usb_interface *intf)
+{
+#ifdef CONFIG_USB_GSPCA_SN9C20X_EVDEV
+	struct gspca_dev *gspca_dev = usb_get_intfdata(intf);
+
+	sn9c20x_input_cleanup(gspca_dev);
+#endif
+
+	gspca_disconnect(intf);
+}
+
+static struct usb_driver sd_driver = {
+	.name = MODULE_NAME,
+	.id_table = device_table,
+	.probe = sd_probe,
+	.disconnect = sd_disconnect,
+#ifdef CONFIG_PM
+	.suspend = gspca_suspend,
+	.resume = gspca_resume,
+	.reset_resume = gspca_resume,
+#endif
+};
+
+/* -- module insert / remove -- */
+static int __init sd_mod_init(void)
+{
+	int ret;
+	ret = usb_register(&sd_driver);
+	if (ret < 0)
+		return ret;
+	info("registered");
+	return 0;
+}
+static void __exit sd_mod_exit(void)
+{
+	usb_deregister(&sd_driver);
+	info("deregistered");
+}
+
+module_init(sd_mod_init);
+module_exit(sd_mod_exit);
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 95846d988011..74f16876f38d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -338,6 +338,7 @@ struct v4l2_pix_format {
 /*  Vendor-specific formats   */
 #define V4L2_PIX_FMT_WNVA     v4l2_fourcc('W', 'N', 'V', 'A') /* Winnov hw compress */
 #define V4L2_PIX_FMT_SN9C10X  v4l2_fourcc('S', '9', '1', '0') /* SN9C10x compression */
+#define V4L2_PIX_FMT_SN9C20X_I420 v4l2_fourcc('S', '9', '2', '0') /* SN9C20x YUV 4:2:0 */
 #define V4L2_PIX_FMT_PWC1     v4l2_fourcc('P', 'W', 'C', '1') /* pwc older webcam */
 #define V4L2_PIX_FMT_PWC2     v4l2_fourcc('P', 'W', 'C', '2') /* pwc newer webcam */
 #define V4L2_PIX_FMT_ET61X251 v4l2_fourcc('E', '6', '2', '5') /* ET61X251 compression */
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 11a4a2d3e364..94e908c0d7a0 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -60,6 +60,10 @@ enum {
 	V4L2_IDENT_OV7670 = 250,
 	V4L2_IDENT_OV7720 = 251,
 	V4L2_IDENT_OV7725 = 252,
+	V4L2_IDENT_OV7660 = 253,
+	V4L2_IDENT_OV9650 = 254,
+	V4L2_IDENT_OV9655 = 255,
+	V4L2_IDENT_SOI968 = 256,
 
 	/* module saa7146: reserved range 300-309 */
 	V4L2_IDENT_SAA7146 = 300,
@@ -161,6 +165,9 @@ enum {
 	/* module tw9910: just ident 9910 */
 	V4L2_IDENT_TW9910 = 9910,
 
+	/* module sn9c20x: just ident 10000 */
+	V4L2_IDENT_SN9C20X = 10000,
+
 	/* module msp3400: reserved range 34000-34999 and 44000-44999 */
 	V4L2_IDENT_MSPX4XX  = 34000, /* generic MSPX4XX identifier, only
 					use internally (tveeprom.c). */
@@ -237,6 +244,11 @@ enum {
 	V4L2_IDENT_MT9V022IX7ATC	= 45010, /* No way to detect "normal" I77ATx */
 	V4L2_IDENT_MT9V022IX7ATM	= 45015, /* and "lead free" IA7ATx chips */
 	V4L2_IDENT_MT9T031		= 45020,
+	V4L2_IDENT_MT9V111		= 45031,
+	V4L2_IDENT_MT9V112		= 45032,
+
+	/* HV7131R CMOS sensor: just ident 46000 */
+	V4L2_IDENT_HV7131R		= 46000,
 
 	/* module cs53132a: just ident 53132 */
 	V4L2_IDENT_CS53l32A = 53132,
-- 
cgit v1.2.3


From b9454e83cac42fcdc90bfbfba479132bd6629455 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 8 Jul 2009 13:29:08 +0200
Subject: nl80211: introduce new key attributes

We will soon want to nest key attributes into
some new attribute for configuring static WEP
keys at connect() and ibss_join() time, so we
need nested attributes for that. However, key
attributes right now are 'global'. This patch
thus introduces new nested attributes for the
key settings and functions for parsing them.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  36 +++++++++
 net/wireless/nl80211.c  | 203 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 192 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index e496a2daf7ef..48e0913c2209 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -567,6 +567,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_PREV_BSSID: previous BSSID, to be used by in ASSOCIATE
  *	commands to specify using a reassociate frame
  *
+ * @NL80211_ATTR_KEY: key information in a nested attribute with
+ *	%NL80211_KEY_* sub-attributes
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -692,6 +695,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PREV_BSSID,
 
+	NL80211_ATTR_KEY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -720,6 +725,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_CIPHER_SUITE_GROUP NL80211_ATTR_CIPHER_SUITE_GROUP
 #define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS
 #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
+#define NL80211_ATTR_KEY NL80211_ATTR_KEY
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -1320,4 +1326,34 @@ enum nl80211_wpa_versions {
 	NL80211_WPA_VERSION_2 = 1 << 1,
 };
 
+/**
+ * enum nl80211_key_attributes - key attributes
+ * @__NL80211_KEY_INVALID: invalid
+ * @NL80211_KEY_DATA: (temporal) key data; for TKIP this consists of
+ *	16 bytes encryption key followed by 8 bytes each for TX and RX MIC
+ *	keys
+ * @NL80211_KEY_IDX: key ID (u8, 0-3)
+ * @NL80211_KEY_CIPHER: key cipher suite (u32, as defined by IEEE 802.11
+ *	section 7.3.2.25.1, e.g. 0x000FAC04)
+ * @NL80211_KEY_SEQ: transmit key sequence number (IV/PN) for TKIP and
+ *	CCMP keys, each six bytes in little endian
+ * @NL80211_KEY_DEFAULT: flag indicating default key
+ * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key
+ * @__NL80211_KEY_AFTER_LAST: internal
+ * @NL80211_KEY_MAX: highest key attribute
+ */
+enum nl80211_key_attributes {
+	__NL80211_KEY_INVALID,
+	NL80211_KEY_DATA,
+	NL80211_KEY_IDX,
+	NL80211_KEY_CIPHER,
+	NL80211_KEY_SEQ,
+	NL80211_KEY_DEFAULT,
+	NL80211_KEY_DEFAULT_MGMT,
+
+	/* keep last */
+	__NL80211_KEY_AFTER_LAST,
+	NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a00fd644370b..50cf59316292 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -73,6 +73,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_MAC] = { .type = NLA_BINARY, .len = ETH_ALEN },
 	[NL80211_ATTR_PREV_BSSID] = { .type = NLA_BINARY, .len = ETH_ALEN },
 
+	[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
 				    .len = WLAN_MAX_KEY_LEN },
 	[NL80211_ATTR_KEY_IDX] = { .type = NLA_U8 },
@@ -134,6 +135,18 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 };
 
+/* policy for the attributes */
+static struct nla_policy
+nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = {
+	[NL80211_KEY_DATA] = { .type = NLA_BINARY,
+				    .len = WLAN_MAX_KEY_LEN },
+	[NL80211_KEY_IDX] = { .type = NLA_U8 },
+	[NL80211_KEY_CIPHER] = { .type = NLA_U32 },
+	[NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
+	[NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
+	[NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
+};
+
 /* IE validation */
 static bool is_valid_ie_attr(const struct nlattr *attr)
 {
@@ -198,6 +211,100 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 
 /* netlink command implementations */
 
+struct key_parse {
+	struct key_params p;
+	int idx;
+	bool def, defmgmt;
+};
+
+static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
+{
+	struct nlattr *tb[NL80211_KEY_MAX + 1];
+	int err = nla_parse_nested(tb, NL80211_KEY_MAX, key,
+				   nl80211_key_policy);
+	if (err)
+		return err;
+
+	k->def = !!tb[NL80211_KEY_DEFAULT];
+	k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT];
+
+	if (tb[NL80211_KEY_IDX])
+		k->idx = nla_get_u8(tb[NL80211_KEY_IDX]);
+
+	if (tb[NL80211_KEY_DATA]) {
+		k->p.key = nla_data(tb[NL80211_KEY_DATA]);
+		k->p.key_len = nla_len(tb[NL80211_KEY_DATA]);
+	}
+
+	if (tb[NL80211_KEY_SEQ]) {
+		k->p.seq = nla_data(tb[NL80211_KEY_SEQ]);
+		k->p.seq_len = nla_len(tb[NL80211_KEY_SEQ]);
+	}
+
+	if (tb[NL80211_KEY_CIPHER])
+		k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
+
+	return 0;
+}
+
+static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
+{
+	if (info->attrs[NL80211_ATTR_KEY_DATA]) {
+		k->p.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]);
+		k->p.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
+	}
+
+	if (info->attrs[NL80211_ATTR_KEY_SEQ]) {
+		k->p.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]);
+		k->p.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]);
+	}
+
+	if (info->attrs[NL80211_ATTR_KEY_IDX])
+		k->idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+
+	if (info->attrs[NL80211_ATTR_KEY_CIPHER])
+		k->p.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]);
+
+	k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
+	k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
+
+	return 0;
+}
+
+static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
+{
+	int err;
+
+	memset(k, 0, sizeof(*k));
+	k->idx = -1;
+
+	if (info->attrs[NL80211_ATTR_KEY])
+		err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k);
+	else
+		err = nl80211_parse_key_old(info, k);
+
+	if (err)
+		return err;
+
+	if (k->def && k->defmgmt)
+		return -EINVAL;
+
+	if (k->idx != -1) {
+		if (k->defmgmt) {
+			if (k->idx < 4 || k->idx > 5)
+				return -EINVAL;
+		} else if (k->def) {
+			if (k->idx < 0 || k->idx > 3)
+				return -EINVAL;
+		} else {
+			if (k->idx < 0 || k->idx > 5)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			      struct cfg80211_registered_device *dev)
 {
@@ -943,10 +1050,12 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 struct get_key_cookie {
 	struct sk_buff *msg;
 	int error;
+	int idx;
 };
 
 static void get_key_callback(void *c, struct key_params *params)
 {
+	struct nlattr *key;
 	struct get_key_cookie *cookie = c;
 
 	if (params->key)
@@ -961,6 +1070,26 @@ static void get_key_callback(void *c, struct key_params *params)
 		NLA_PUT_U32(cookie->msg, NL80211_ATTR_KEY_CIPHER,
 			    params->cipher);
 
+	key = nla_nest_start(cookie->msg, NL80211_ATTR_KEY);
+	if (!key)
+		goto nla_put_failure;
+
+	if (params->key)
+		NLA_PUT(cookie->msg, NL80211_KEY_DATA,
+			params->key_len, params->key);
+
+	if (params->seq)
+		NLA_PUT(cookie->msg, NL80211_KEY_SEQ,
+			params->seq_len, params->seq);
+
+	if (params->cipher)
+		NLA_PUT_U32(cookie->msg, NL80211_KEY_CIPHER,
+			    params->cipher);
+
+	NLA_PUT_U8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx);
+
+	nla_nest_end(cookie->msg, key);
+
 	return;
  nla_put_failure:
 	cookie->error = 1;
@@ -1014,6 +1143,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	cookie.msg = msg;
+	cookie.idx = key_idx;
 
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
 	NLA_PUT_U8(msg, NL80211_ATTR_KEY_IDX, key_idx);
@@ -1049,26 +1179,21 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
+	struct key_parse key;
 	int err;
 	struct net_device *dev;
-	u8 key_idx;
 	int (*func)(struct wiphy *wiphy, struct net_device *netdev,
 		    u8 key_index);
 
-	if (!info->attrs[NL80211_ATTR_KEY_IDX])
-		return -EINVAL;
-
-	key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
+	err = nl80211_parse_key(info, &key);
+	if (err)
+		return err;
 
-	if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) {
-		if (key_idx < 4 || key_idx > 5)
-			return -EINVAL;
-	} else if (key_idx > 3)
+	if (key.idx < 0)
 		return -EINVAL;
 
-	/* currently only support setting default key */
-	if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] &&
-	    !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT])
+	/* only support setting default key */
+	if (!key.def && !key.defmgmt)
 		return -EINVAL;
 
 	rtnl_lock();
@@ -1077,7 +1202,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto unlock_rtnl;
 
-	if (info->attrs[NL80211_ATTR_KEY_DEFAULT])
+	if (key.def)
 		func = rdev->ops->set_default_key;
 	else
 		func = rdev->ops->set_default_mgmt_key;
@@ -1087,13 +1212,13 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = func(&rdev->wiphy, dev, key_idx);
+	err = func(&rdev->wiphy, dev, key.idx);
 #ifdef CONFIG_WIRELESS_EXT
 	if (!err) {
 		if (func == rdev->ops->set_default_key)
-			dev->ieee80211_ptr->wext.default_key = key_idx;
+			dev->ieee80211_ptr->wext.default_key = key.idx;
 		else
-			dev->ieee80211_ptr->wext.default_mgmt_key = key_idx;
+			dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
 	}
 #endif
 
@@ -1112,34 +1237,20 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev;
 	int err, i;
 	struct net_device *dev;
-	struct key_params params;
-	u8 key_idx = 0;
+	struct key_parse key;
 	u8 *mac_addr = NULL;
 
-	memset(&params, 0, sizeof(params));
+	err = nl80211_parse_key(info, &key);
+	if (err)
+		return err;
 
-	if (!info->attrs[NL80211_ATTR_KEY_CIPHER])
+	if (!key.p.key)
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_KEY_DATA]) {
-		params.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]);
-		params.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]);
-	}
-
-	if (info->attrs[NL80211_ATTR_KEY_SEQ]) {
-		params.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]);
-		params.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]);
-	}
-
-	if (info->attrs[NL80211_ATTR_KEY_IDX])
-		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
-
-	params.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]);
-
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (cfg80211_validate_key_settings(&params, key_idx, mac_addr))
+	if (cfg80211_validate_key_settings(&key.p, key.idx, mac_addr))
 		return -EINVAL;
 
 	rtnl_lock();
@@ -1149,7 +1260,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 		goto unlock_rtnl;
 
 	for (i = 0; i < rdev->wiphy.n_cipher_suites; i++)
-		if (params.cipher == rdev->wiphy.cipher_suites[i])
+		if (key.p.cipher == rdev->wiphy.cipher_suites[i])
 			break;
 	if (i == rdev->wiphy.n_cipher_suites) {
 		err = -EINVAL;
@@ -1161,7 +1272,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = rdev->ops->add_key(&rdev->wiphy, dev, key_idx, mac_addr, &params);
+	err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, mac_addr, &key.p);
 
  out:
 	cfg80211_unlock_rdev(rdev);
@@ -1177,14 +1288,12 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev;
 	int err;
 	struct net_device *dev;
-	u8 key_idx = 0;
 	u8 *mac_addr = NULL;
+	struct key_parse key;
 
-	if (info->attrs[NL80211_ATTR_KEY_IDX])
-		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
-
-	if (key_idx > 5)
-		return -EINVAL;
+	err = nl80211_parse_key(info, &key);
+	if (err)
+		return err;
 
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -1200,13 +1309,13 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = rdev->ops->del_key(&rdev->wiphy, dev, key_idx, mac_addr);
+	err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr);
 
 #ifdef CONFIG_WIRELESS_EXT
 	if (!err) {
-		if (key_idx == dev->ieee80211_ptr->wext.default_key)
+		if (key.idx == dev->ieee80211_ptr->wext.default_key)
 			dev->ieee80211_ptr->wext.default_key = -1;
-		else if (key_idx == dev->ieee80211_ptr->wext.default_mgmt_key)
+		else if (key.idx == dev->ieee80211_ptr->wext.default_mgmt_key)
 			dev->ieee80211_ptr->wext.default_mgmt_key = -1;
 	}
 #endif
-- 
cgit v1.2.3


From fffd0934b9390f34bec45762192b7edd3b12b4b5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 8 Jul 2009 14:22:54 +0200
Subject: cfg80211: rework key operation

This reworks the key operation in cfg80211, and now only
allows, from userspace, configuring keys (via nl80211)
after the connection has been established (in managed
mode), the IBSS been joined (in IBSS mode), at any time
(in AP[_VLAN] modes) or never for all the other modes.

In order to do shared key authentication correctly, it
is now possible to give a WEP key to the AUTH command.
To configure static WEP keys, these are given to the
CONNECT or IBSS_JOIN command directly, for a userspace
SME it is assumed it will configure it properly after
the connection has been established.

Since mac80211 used to check the default key in IBSS
mode to see whether or not the network is protected,
it needs an update in that area, as well as an update
to make use of the WEP key passed to auth() for shared
key authentication.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h    |   5 ++
 include/net/cfg80211.h     |  18 ++++-
 net/mac80211/ibss.c        |   9 +--
 net/mac80211/ieee80211_i.h |   8 ++-
 net/mac80211/mlme.c        |  11 ++-
 net/mac80211/util.c        |  16 +++--
 net/mac80211/wep.c         |   6 +-
 net/mac80211/wep.h         |   3 +
 net/wireless/core.c        |  11 ++-
 net/wireless/core.h        |  32 +++++++--
 net/wireless/ibss.c        |  79 +++++++++++++++++----
 net/wireless/mlme.c        |  16 ++++-
 net/wireless/nl80211.c     | 170 ++++++++++++++++++++++++++++++++++++++++-----
 net/wireless/sme.c         |  97 +++++++++++++++++++-------
 net/wireless/util.c        |  41 ++++++++++-
 net/wireless/wext-compat.c | 163 ++++++++++++++++++++++++++-----------------
 net/wireless/wext-sme.c    |  30 ++++++--
 17 files changed, 554 insertions(+), 161 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 48e0913c2209..b043b78dd2c3 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -569,6 +569,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_KEY: key information in a nested attribute with
  *	%NL80211_KEY_* sub-attributes
+ * @NL80211_ATTR_KEYS: array of keys for static WEP keys for connect()
+ *	and join_ibss(), key information is in a nested attribute each
+ *	with %NL80211_KEY_* sub-attributes
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -696,6 +699,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_PREV_BSSID,
 
 	NL80211_ATTR_KEY,
+	NL80211_ATTR_KEYS,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -726,6 +730,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_WPA_VERSIONS NL80211_ATTR_WPA_VERSIONS
 #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
 #define NL80211_ATTR_KEY NL80211_ATTR_KEY
+#define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 83c2c727d71e..65a5cbcb5d14 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -647,12 +647,17 @@ struct cfg80211_crypto_settings {
  * @auth_type: Authentication type (algorithm)
  * @ie: Extra IEs to add to Authentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
+ * @key_len: length of WEP key for shared key authentication
+ * @key_idx: index of WEP key for shared key authentication
+ * @key: WEP key for shared key authentication
  */
 struct cfg80211_auth_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
 	enum nl80211_auth_type auth_type;
+	const u8 *key;
+	u8 key_len, key_idx;
 };
 
 /**
@@ -727,6 +732,8 @@ struct cfg80211_disassoc_request {
  * @ie: information element(s) to include in the beacon
  * @ie_len: length of that
  * @beacon_interval: beacon interval to use
+ * @privacy: this is a protected network, keys will be configured
+ *	after joining
  */
 struct cfg80211_ibss_params {
 	u8 *ssid;
@@ -736,6 +743,7 @@ struct cfg80211_ibss_params {
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
 	bool channel_fixed;
+	bool privacy;
 };
 
 /**
@@ -755,6 +763,9 @@ struct cfg80211_ibss_params {
  * @assoc_ie_len: Length of assoc_ie in octets
  * @privacy: indicates whether privacy-enabled APs should be used
  * @crypto: crypto settings
+ * @key_len: length of WEP key for shared key authentication
+ * @key_idx: index of WEP key for shared key authentication
+ * @key: WEP key for shared key authentication
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -766,6 +777,8 @@ struct cfg80211_connect_params {
 	size_t ie_len;
 	bool privacy;
 	struct cfg80211_crypto_settings crypto;
+	const u8 *key;
+	u8 key_len, key_idx;
 };
 
 /**
@@ -1223,9 +1236,10 @@ extern void wiphy_unregister(struct wiphy *wiphy);
  */
 extern void wiphy_free(struct wiphy *wiphy);
 
-/* internal struct */
+/* internal structs */
 struct cfg80211_conn;
 struct cfg80211_internal_bss;
+struct cfg80211_cached_keys;
 
 #define MAX_AUTH_BSSES		4
 
@@ -1267,6 +1281,7 @@ struct wireless_dev {
 		CFG80211_SME_CONNECTED,
 	} sme_state;
 	struct cfg80211_conn *conn;
+	struct cfg80211_cached_keys *connect_keys;
 
 	struct list_head event_list;
 	spinlock_t event_lock;
@@ -1280,6 +1295,7 @@ struct wireless_dev {
 	struct {
 		struct cfg80211_ibss_params ibss;
 		struct cfg80211_connect_params connect;
+		struct cfg80211_cached_keys *keys;
 		u8 *ie;
 		size_t ie_len;
 		u8 bssid[ETH_ALEN];
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 15d5a53b59a8..8e2220000e5c 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -57,7 +57,7 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
 	 */
 	if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1)
 		ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0,
-				    sdata->u.ibss.bssid, 0);
+				    sdata->u.ibss.bssid, NULL, 0, 0);
 }
 
 static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
@@ -494,7 +494,7 @@ static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
 
 	capability = WLAN_CAPABILITY_IBSS;
 
-	if (sdata->default_key)
+	if (ifibss->privacy)
 		capability |= WLAN_CAPABILITY_PRIVACY;
 	else
 		sdata->drop_unencrypted = 0;
@@ -524,9 +524,8 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 		return;
 
 	capability = WLAN_CAPABILITY_IBSS;
-	if (sdata->default_key)
+	if (ifibss->privacy)
 		capability |= WLAN_CAPABILITY_PRIVACY;
-
 	if (ifibss->fixed_bssid)
 		bssid = ifibss->bssid;
 	if (ifibss->fixed_channel)
@@ -872,6 +871,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	} else
 		sdata->u.ibss.fixed_bssid = false;
 
+	sdata->u.ibss.privacy = params->privacy;
+
 	sdata->vif.bss_conf.beacon_int = params->beacon_interval;
 
 	sdata->u.ibss.channel = params->channel;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 327aabc07abe..06b3411530f2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -247,6 +247,9 @@ struct ieee80211_mgd_work {
 
 	int tries;
 
+	u8 key[WLAN_KEY_LEN_WEP104];
+	u8 key_len, key_idx;
+
 	/* must be last */
 	u8 ie[0]; /* for auth or assoc frame, not probe */
 };
@@ -321,6 +324,7 @@ struct ieee80211_if_ibss {
 
 	bool fixed_bssid;
 	bool fixed_channel;
+	bool privacy;
 
 	u8 bssid[ETH_ALEN];
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
@@ -1093,8 +1097,8 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg,
-			 u8 *extra, size_t extra_len,
-			 const u8 *bssid, int encrypt);
+			 u8 *extra, size_t extra_len, const u8 *bssid,
+			 const u8 *key, u8 key_len, u8 key_idx);
 int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 			     const u8 *ie, size_t ie_len);
 void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c9db9646025b..8e4a60497bba 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -954,7 +954,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 	       sdata->dev->name, wk->bss->cbss.bssid, wk->tries);
 
 	ieee80211_send_auth(sdata, 1, wk->auth_alg, wk->ie, wk->ie_len,
-			    wk->bss->cbss.bssid, 0);
+			    wk->bss->cbss.bssid, NULL, 0, 0);
 	wk->auth_transaction = 2;
 
 	wk->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
@@ -1176,7 +1176,8 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
 		return;
 	ieee80211_send_auth(sdata, 3, wk->auth_alg,
 			    elems.challenge - 2, elems.challenge_len + 2,
-			    wk->bss->cbss.bssid, 1);
+			    wk->bss->cbss.bssid,
+			    wk->key, wk->key_len, wk->key_idx);
 	wk->auth_transaction = 4;
 }
 
@@ -2175,6 +2176,12 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 		wk->ie_len = req->ie_len;
 	}
 
+	if (req->key && req->key_len) {
+		wk->key_len = req->key_len;
+		wk->key_idx = req->key_idx;
+		memcpy(wk->key, req->key, req->key_len);
+	}
+
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
 	memcpy(wk->ssid, ssid + 2, ssid[1]);
 	wk->ssid_len = ssid[1];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 915e77769312..dbf66b52d38c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -31,6 +31,7 @@
 #include "mesh.h"
 #include "wme.h"
 #include "led.h"
+#include "wep.h"
 
 /* privid for wiphys to determine whether they belong to us or not */
 void *mac80211_wiphy_privid = &mac80211_wiphy_privid;
@@ -804,12 +805,13 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 
 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 			 u16 transaction, u16 auth_alg,
-			 u8 *extra, size_t extra_len,
-			 const u8 *bssid, int encrypt)
+			 u8 *extra, size_t extra_len, const u8 *bssid,
+			 const u8 *key, u8 key_len, u8 key_idx)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
+	int err;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(*mgmt) + 6 + extra_len);
@@ -824,8 +826,6 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	memset(mgmt, 0, 24 + 6);
 	mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
 					  IEEE80211_STYPE_AUTH);
-	if (encrypt)
-		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
 	memcpy(mgmt->da, bssid, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
 	memcpy(mgmt->bssid, bssid, ETH_ALEN);
@@ -835,7 +835,13 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
 	if (extra)
 		memcpy(skb_put(skb, extra_len), extra, extra_len);
 
-	ieee80211_tx_skb(sdata, skb, encrypt);
+	if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
+		mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+		err = ieee80211_wep_encrypt(local, skb, key, key_len, key_idx);
+		WARN_ON(err);
+	}
+
+	ieee80211_tx_skb(sdata, skb, 0);
 }
 
 int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 4fafb2d27c84..8a980f136941 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -144,9 +144,9 @@ void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
  *
  * WEP frame payload: IV + TX key idx, RC4(data), ICV = RC4(CRC32(data))
  */
-static int ieee80211_wep_encrypt(struct ieee80211_local *local,
-				 struct sk_buff *skb,
-				 const u8 *key, int keylen, int keyidx)
+int ieee80211_wep_encrypt(struct ieee80211_local *local,
+			  struct sk_buff *skb,
+			  const u8 *key, int keylen, int keyidx)
 {
 	u8 *iv;
 	size_t len;
diff --git a/net/mac80211/wep.h b/net/mac80211/wep.h
index 85219ded8703..fe29d7e5759f 100644
--- a/net/mac80211/wep.h
+++ b/net/mac80211/wep.h
@@ -20,6 +20,9 @@ int ieee80211_wep_init(struct ieee80211_local *local);
 void ieee80211_wep_free(struct ieee80211_local *local);
 void ieee80211_wep_encrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
 				size_t klen, u8 *data, size_t data_len);
+int ieee80211_wep_encrypt(struct ieee80211_local *local,
+			  struct sk_buff *skb,
+			  const u8 *key, int keylen, int keyidx);
 int ieee80211_wep_decrypt_data(struct crypto_blkcipher *tfm, u8 *rc4key,
 			       size_t klen, u8 *data, size_t data_len);
 bool ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 1a78b3c70cf2..97cc5968b7d6 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -666,14 +666,10 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		wdev_lock(wdev);
 		switch (wdev->iftype) {
 		case NL80211_IFTYPE_ADHOC:
-			if (wdev->wext.ibss.ssid_len)
-				__cfg80211_join_ibss(rdev, dev,
-						     &wdev->wext.ibss);
+			cfg80211_ibss_wext_join(rdev, wdev);
 			break;
 		case NL80211_IFTYPE_STATION:
-			if (wdev->wext.connect.ssid_len)
-				__cfg80211_connect(rdev, dev,
-						   &wdev->wext.connect);
+			cfg80211_mgd_wext_connect(rdev, wdev);
 			break;
 		default:
 			break;
@@ -690,6 +686,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		}
 		mutex_unlock(&rdev->devlist_mtx);
 		mutex_destroy(&wdev->mtx);
+#ifdef CONFIG_WIRELESS_EXT
+		kfree(wdev->wext.keys);
+#endif
 		break;
 	case NETDEV_PRE_UP:
 		if (!(wdev->wiphy->interface_modes & BIT(wdev->iftype)))
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e46cd6eb61d7..2ec8ddbe57de 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -238,6 +238,12 @@ struct cfg80211_event {
 	};
 };
 
+struct cfg80211_cached_keys {
+	struct key_params params[6];
+	u8 data[6][WLAN_MAX_KEY_LEN];
+	int def, defmgmt;
+};
+
 
 /* free object */
 extern void cfg80211_dev_free(struct cfg80211_registered_device *rdev);
@@ -256,14 +262,18 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev,
 /* IBSS */
 int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 struct cfg80211_ibss_params *params);
+			 struct cfg80211_ibss_params *params,
+			 struct cfg80211_cached_keys *connkeys);
 int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       struct cfg80211_ibss_params *params);
+		       struct cfg80211_ibss_params *params,
+		       struct cfg80211_cached_keys *connkeys);
 void cfg80211_clear_ibss(struct net_device *dev, bool nowext);
 int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, bool nowext);
 void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid);
+int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
+			    struct wireless_dev *wdev);
 
 /* MLME */
 int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
@@ -272,12 +282,14 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 enum nl80211_auth_type auth_type,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
-			 const u8 *ie, int ie_len);
+			 const u8 *ie, int ie_len,
+			 const u8 *key, int key_len, int key_idx);
 int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
-		       const u8 *ie, int ie_len);
+		       const u8 *ie, int ie_len,
+		       const u8 *key, int key_len, int key_idx);
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
@@ -310,10 +322,12 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       struct cfg80211_connect_params *connect);
+		       struct cfg80211_connect_params *connect,
+		       struct cfg80211_cached_keys *connkeys);
 int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
-		     struct cfg80211_connect_params *connect);
+		     struct cfg80211_connect_params *connect,
+		     struct cfg80211_cached_keys *connkeys);
 int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev, u16 reason,
 			  bool wextev);
@@ -323,11 +337,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
 		       const u8 *req_ie, size_t req_ie_len,
 		       const u8 *resp_ie, size_t resp_ie_len);
+int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
+			      struct wireless_dev *wdev);
 
 void cfg80211_conn_work(struct work_struct *work);
 
 /* internal helpers */
-int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
+int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
+				   struct key_params *params, int key_idx,
 				   const u8 *mac_addr);
 void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap);
@@ -335,5 +352,6 @@ void cfg80211_sme_scan_done(struct net_device *dev);
 void cfg80211_sme_rx_auth(struct net_device *dev, const u8 *buf, size_t len);
 void cfg80211_sme_disassoc(struct net_device *dev, int idx);
 void __cfg80211_scan_done(struct work_struct *wk);
+void cfg80211_upload_connect_keys(struct wireless_dev *wdev);
 
 #endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 99ef9364b7e8..9394e78cd11f 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -39,6 +39,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid)
 	cfg80211_hold_bss(bss_from_pub(bss));
 	wdev->current_bss = bss_from_pub(bss);
 
+	cfg80211_upload_connect_keys(wdev);
+
 	nl80211_send_ibss_bssid(wiphy_to_dev(wdev->wiphy), dev, bssid,
 				GFP_KERNEL);
 #ifdef CONFIG_WIRELESS_EXT
@@ -71,7 +73,8 @@ EXPORT_SYMBOL(cfg80211_ibss_joined);
 
 int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev,
-			 struct cfg80211_ibss_params *params)
+			 struct cfg80211_ibss_params *params,
+			 struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
@@ -81,13 +84,18 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 	if (wdev->ssid_len)
 		return -EALREADY;
 
+	if (WARN_ON(wdev->connect_keys))
+		kfree(wdev->connect_keys);
+	wdev->connect_keys = connkeys;
+
 #ifdef CONFIG_WIRELESS_EXT
 	wdev->wext.ibss.channel = params->channel;
 #endif
 	err = rdev->ops->join_ibss(&rdev->wiphy, dev, params);
-
-	if (err)
+	if (err) {
+		wdev->connect_keys = NULL;
 		return err;
+	}
 
 	memcpy(wdev->ssid, params->ssid, params->ssid_len);
 	wdev->ssid_len = params->ssid_len;
@@ -97,13 +105,14 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 
 int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       struct cfg80211_ibss_params *params)
+		       struct cfg80211_ibss_params *params,
+		       struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_join_ibss(rdev, dev, params);
+	err = __cfg80211_join_ibss(rdev, dev, params, connkeys);
 	wdev_unlock(wdev);
 
 	return err;
@@ -112,9 +121,22 @@ int cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int i;
 
 	ASSERT_WDEV_LOCK(wdev);
 
+	kfree(wdev->connect_keys);
+	wdev->connect_keys = NULL;
+
+	/*
+	 * Delete all the keys ... pairwise keys can't really
+	 * exist any more anyway, but default keys might.
+	 */
+	if (rdev->ops->del_key)
+		for (i = 0; i < 6; i++)
+			rdev->ops->del_key(wdev->wiphy, dev, i, NULL);
+
 	if (wdev->current_bss) {
 		cfg80211_unhold_bss(wdev->current_bss);
 		cfg80211_put_bss(&wdev->current_bss->pub);
@@ -172,11 +194,14 @@ int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev,
 }
 
 #ifdef CONFIG_WIRELESS_EXT
-static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
-				   struct wireless_dev *wdev)
+int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
+			    struct wireless_dev *wdev)
 {
+	struct cfg80211_cached_keys *ck = NULL;
 	enum ieee80211_band band;
-	int i;
+	int i, err;
+
+	ASSERT_WDEV_LOCK(wdev);
 
 	if (!wdev->wext.ibss.beacon_interval)
 		wdev->wext.ibss.beacon_interval = 100;
@@ -216,8 +241,24 @@ static int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
 	if (!netif_running(wdev->netdev))
 		return 0;
 
-	return cfg80211_join_ibss(wiphy_to_dev(wdev->wiphy),
-				  wdev->netdev, &wdev->wext.ibss);
+	if (wdev->wext.keys)
+		wdev->wext.keys->def = wdev->wext.default_key;
+
+	wdev->wext.ibss.privacy = wdev->wext.default_key != -1;
+
+	if (wdev->wext.keys) {
+		ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
+		if (!ck)
+			return -ENOMEM;
+		for (i = 0; i < 6; i++)
+			ck->params[i].key = ck->data[i];
+	}
+	err = __cfg80211_join_ibss(rdev, wdev->netdev,
+				   &wdev->wext.ibss, ck);
+	if (err)
+		kfree(ck);
+
+	return err;
 }
 
 int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
@@ -265,7 +306,11 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 		wdev->wext.ibss.channel_fixed = false;
 	}
 
-	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_lock(wdev);
+	err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_unlock(wdev);
+
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwfreq);
@@ -333,7 +378,11 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 	memcpy(wdev->wext.ibss.ssid, ssid, len);
 	wdev->wext.ibss.ssid_len = len;
 
-	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_lock(wdev);
+	err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_unlock(wdev);
+
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwessid);
@@ -414,7 +463,11 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 	} else
 		wdev->wext.ibss.bssid = NULL;
 
-	return cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_lock(wdev);
+	err = cfg80211_ibss_wext_join(wiphy_to_dev(wdev->wiphy), wdev);
+	wdev_unlock(wdev);
+
+	return err;
 }
 /* temporary symbol - mark GPL - in the future the handler won't be */
 EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwap);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 1b2ca1fea7a1..8e4ce2fdf862 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -328,7 +328,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 enum nl80211_auth_type auth_type,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
-			 const u8 *ie, int ie_len)
+			 const u8 *ie, int ie_len,
+			 const u8 *key, int key_len, int key_idx)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_auth_request req;
@@ -337,6 +338,10 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 
 	ASSERT_WDEV_LOCK(wdev);
 
+	if (auth_type == NL80211_AUTHTYPE_SHARED_KEY)
+		if (!key || !key_len || key_idx < 0 || key_idx > 4)
+			return -EINVAL;
+
 	if (wdev->current_bss &&
 	    memcmp(bssid, wdev->current_bss->pub.bssid, ETH_ALEN) == 0)
 		return -EALREADY;
@@ -359,6 +364,9 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	req.auth_type = auth_type;
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
 				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
+	req.key = key;
+	req.key_len = key_len;
+	req.key_idx = key_idx;
 	if (!req.bss)
 		return -ENOENT;
 
@@ -396,13 +404,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
-		       const u8 *ie, int ie_len)
+		       const u8 *ie, int ie_len,
+		       const u8 *key, int key_len, int key_idx)
 {
 	int err;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
-				   ssid, ssid_len, ie, ie_len);
+				   ssid, ssid_len, ie, ie_len,
+				   key, key_len, key_idx);
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 50cf59316292..45c5f9c8e51b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -138,8 +138,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 /* policy for the attributes */
 static struct nla_policy
 nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = {
-	[NL80211_KEY_DATA] = { .type = NLA_BINARY,
-				    .len = WLAN_MAX_KEY_LEN },
+	[NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN },
 	[NL80211_KEY_IDX] = { .type = NLA_U8 },
 	[NL80211_KEY_CIPHER] = { .type = NLA_U32 },
 	[NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
@@ -305,6 +304,83 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
 	return 0;
 }
 
+static struct cfg80211_cached_keys *
+nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
+		       struct nlattr *keys)
+{
+	struct key_parse parse;
+	struct nlattr *key;
+	struct cfg80211_cached_keys *result;
+	int rem, err, def = 0;
+
+	result = kzalloc(sizeof(*result), GFP_KERNEL);
+	if (!result)
+		return ERR_PTR(-ENOMEM);
+
+	result->def = -1;
+	result->defmgmt = -1;
+
+	nla_for_each_nested(key, keys, rem) {
+		memset(&parse, 0, sizeof(parse));
+		parse.idx = -1;
+
+		err = nl80211_parse_key_new(key, &parse);
+		if (err)
+			goto error;
+		err = -EINVAL;
+		if (!parse.p.key)
+			goto error;
+		if (parse.idx < 0 || parse.idx > 4)
+			goto error;
+		if (parse.def) {
+			if (def)
+				goto error;
+			def = 1;
+			result->def = parse.idx;
+		} else if (parse.defmgmt)
+			goto error;
+		err = cfg80211_validate_key_settings(rdev, &parse.p,
+						     parse.idx, NULL);
+		if (err)
+			goto error;
+		result->params[parse.idx].cipher = parse.p.cipher;
+		result->params[parse.idx].key_len = parse.p.key_len;
+		result->params[parse.idx].key = result->data[parse.idx];
+		memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len);
+	}
+
+	return result;
+ error:
+	kfree(result);
+	return ERR_PTR(err);
+}
+
+static int nl80211_key_allowed(struct wireless_dev *wdev)
+{
+	ASSERT_WDEV_LOCK(wdev);
+
+	if (!netif_running(wdev->netdev))
+		return -ENETDOWN;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		if (!wdev->current_bss)
+			return -ENOLINK;
+		break;
+	case NL80211_IFTYPE_STATION:
+		if (wdev->sme_state != CFG80211_SME_CONNECTED)
+			return -ENOLINK;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			      struct cfg80211_registered_device *dev)
 {
@@ -1212,7 +1288,11 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = func(&rdev->wiphy, dev, key.idx);
+	wdev_lock(dev->ieee80211_ptr);
+	err = nl80211_key_allowed(dev->ieee80211_ptr);
+	if (!err)
+		err = func(&rdev->wiphy, dev, key.idx);
+
 #ifdef CONFIG_WIRELESS_EXT
 	if (!err) {
 		if (func == rdev->ops->set_default_key)
@@ -1221,6 +1301,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 			dev->ieee80211_ptr->wext.default_mgmt_key = key.idx;
 	}
 #endif
+	wdev_unlock(dev->ieee80211_ptr);
 
  out:
 	cfg80211_unlock_rdev(rdev);
@@ -1235,7 +1316,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int err, i;
+	int err;
 	struct net_device *dev;
 	struct key_parse key;
 	u8 *mac_addr = NULL;
@@ -1250,29 +1331,28 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_MAC])
 		mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	if (cfg80211_validate_key_settings(&key.p, key.idx, mac_addr))
-		return -EINVAL;
-
 	rtnl_lock();
 
 	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
-	for (i = 0; i < rdev->wiphy.n_cipher_suites; i++)
-		if (key.p.cipher == rdev->wiphy.cipher_suites[i])
-			break;
-	if (i == rdev->wiphy.n_cipher_suites) {
-		err = -EINVAL;
+	if (!rdev->ops->add_key) {
+		err = -EOPNOTSUPP;
 		goto out;
 	}
 
-	if (!rdev->ops->add_key) {
-		err = -EOPNOTSUPP;
+	if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr)) {
+		err = -EINVAL;
 		goto out;
 	}
 
-	err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, mac_addr, &key.p);
+	wdev_lock(dev->ieee80211_ptr);
+	err = nl80211_key_allowed(dev->ieee80211_ptr);
+	if (!err)
+		err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx,
+					 mac_addr, &key.p);
+	wdev_unlock(dev->ieee80211_ptr);
 
  out:
 	cfg80211_unlock_rdev(rdev);
@@ -1309,7 +1389,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr);
+	wdev_lock(dev->ieee80211_ptr);
+	err = nl80211_key_allowed(dev->ieee80211_ptr);
+	if (!err)
+		err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr);
 
 #ifdef CONFIG_WIRELESS_EXT
 	if (!err) {
@@ -1319,6 +1402,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 			dev->ieee80211_ptr->wext.default_mgmt_key = -1;
 	}
 #endif
+	wdev_unlock(dev->ieee80211_ptr);
 
  out:
 	cfg80211_unlock_rdev(rdev);
@@ -3159,6 +3243,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *bssid, *ssid, *ie = NULL;
 	int err, ssid_len, ie_len = 0;
 	enum nl80211_auth_type auth_type;
+	struct key_parse key;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3175,6 +3260,25 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
 		return -EINVAL;
 
+	err = nl80211_parse_key(info, &key);
+	if (err)
+		return err;
+
+	if (key.idx >= 0) {
+		if (!key.p.key || !key.p.key_len)
+			return -EINVAL;
+		if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 ||
+		     key.p.key_len != WLAN_KEY_LEN_WEP40) &&
+		    (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 ||
+		     key.p.key_len != WLAN_KEY_LEN_WEP104))
+			return -EINVAL;
+		if (key.idx > 4)
+			return -EINVAL;
+	} else {
+		key.p.key_len = 0;
+		key.p.key = NULL;
+	}
+
 	rtnl_lock();
 
 	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
@@ -3219,7 +3323,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
-				 ssid, ssid_len, ie, ie_len);
+				 ssid, ssid_len, ie, ie_len,
+				 key.p.key, key.p.key_len, key.idx);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3506,6 +3611,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev;
 	struct cfg80211_ibss_params ibss;
 	struct wiphy *wiphy;
+	struct cfg80211_cached_keys *connkeys = NULL;
 	int err;
 
 	memset(&ibss, 0, sizeof(ibss));
@@ -3570,13 +3676,26 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
+	ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
+
+	if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
+		connkeys = nl80211_parse_connkeys(rdev,
+					info->attrs[NL80211_ATTR_KEYS]);
+		if (IS_ERR(connkeys)) {
+			err = PTR_ERR(connkeys);
+			connkeys = NULL;
+			goto out;
+		}
+	}
 
-	err = cfg80211_join_ibss(rdev, dev, &ibss);
+	err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
 
 out:
 	cfg80211_unlock_rdev(rdev);
 	dev_put(dev);
 unlock_rtnl:
+	if (err)
+		kfree(connkeys);
 	rtnl_unlock();
 	return err;
 }
@@ -3746,6 +3865,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev;
 	struct cfg80211_connect_params connect;
 	struct wiphy *wiphy;
+	struct cfg80211_cached_keys *connkeys = NULL;
 	int err;
 
 	memset(&connect, 0, sizeof(connect));
@@ -3810,12 +3930,24 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	err = cfg80211_connect(rdev, dev, &connect);
+	if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
+		connkeys = nl80211_parse_connkeys(rdev,
+					info->attrs[NL80211_ATTR_KEYS]);
+		if (IS_ERR(connkeys)) {
+			err = PTR_ERR(connkeys);
+			connkeys = NULL;
+			goto out;
+		}
+	}
+
+	err = cfg80211_connect(rdev, dev, &connect, connkeys);
 
 out:
 	cfg80211_unlock_rdev(rdev);
 	dev_put(dev);
 unlock_rtnl:
+	if (err)
+		kfree(connkeys);
 	rtnl_unlock();
 	return err;
 }
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 79ca56cbfd36..d635a99dba51 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -125,7 +125,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    params->channel, params->auth_type,
 					    params->bssid,
 					    params->ssid, params->ssid_len,
-					    NULL, 0);
+					    NULL, 0,
+					    params->key, params->key_len,
+					    params->key_idx);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -279,8 +281,12 @@ void cfg80211_sme_rx_auth(struct net_device *dev,
 		/* select automatically between only open, shared, leap */
 		switch (wdev->conn->params.auth_type) {
 		case NL80211_AUTHTYPE_OPEN_SYSTEM:
-			wdev->conn->params.auth_type =
-				NL80211_AUTHTYPE_SHARED_KEY;
+			if (wdev->connect_keys)
+				wdev->conn->params.auth_type =
+					NL80211_AUTHTYPE_SHARED_KEY;
+			else
+				wdev->conn->params.auth_type =
+					NL80211_AUTHTYPE_NETWORK_EAP;
 			break;
 		case NL80211_AUTHTYPE_SHARED_KEY:
 			wdev->conn->params.auth_type =
@@ -353,10 +359,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 #endif
 
 	if (status == WLAN_STATUS_SUCCESS &&
-	    wdev->sme_state == CFG80211_SME_IDLE) {
-		wdev->sme_state = CFG80211_SME_CONNECTED;
-		return;
-	}
+	    wdev->sme_state == CFG80211_SME_IDLE)
+		goto success;
 
 	if (wdev->sme_state != CFG80211_SME_CONNECTING)
 		return;
@@ -370,24 +374,29 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	if (wdev->conn)
 		wdev->conn->state = CFG80211_CONN_IDLE;
 
-	if (status == WLAN_STATUS_SUCCESS) {
-		bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
-				       wdev->ssid, wdev->ssid_len,
-				       WLAN_CAPABILITY_ESS,
-				       WLAN_CAPABILITY_ESS);
-
-		if (WARN_ON(!bss))
-			return;
-
-		cfg80211_hold_bss(bss_from_pub(bss));
-		wdev->current_bss = bss_from_pub(bss);
-
-		wdev->sme_state = CFG80211_SME_CONNECTED;
-	} else {
+	if (status != WLAN_STATUS_SUCCESS) {
 		wdev->sme_state = CFG80211_SME_IDLE;
 		kfree(wdev->conn);
 		wdev->conn = NULL;
+		kfree(wdev->connect_keys);
+		wdev->connect_keys = NULL;
+		return;
 	}
+
+	bss = cfg80211_get_bss(wdev->wiphy, NULL, bssid,
+			       wdev->ssid, wdev->ssid_len,
+			       WLAN_CAPABILITY_ESS,
+			       WLAN_CAPABILITY_ESS);
+
+	if (WARN_ON(!bss))
+		return;
+
+	cfg80211_hold_bss(bss_from_pub(bss));
+	wdev->current_bss = bss_from_pub(bss);
+
+ success:
+	wdev->sme_state = CFG80211_SME_CONNECTED;
+	cfg80211_upload_connect_keys(wdev);
 }
 
 void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
@@ -516,6 +525,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 			     size_t ie_len, u16 reason, bool from_ap)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	int i;
 #ifdef CONFIG_WIRELESS_EXT
 	union iwreq_data wrqu;
 #endif
@@ -543,8 +554,15 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 		wdev->conn = NULL;
 	}
 
-	nl80211_send_disconnected(wiphy_to_dev(wdev->wiphy), dev,
-				  reason, ie, ie_len, from_ap);
+	nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap);
+
+	/*
+	 * Delete all the keys ... pairwise keys can't really
+	 * exist any more anyway, but default keys might.
+	 */
+	if (rdev->ops->del_key)
+		for (i = 0; i < 6; i++)
+			rdev->ops->del_key(wdev->wiphy, dev, i, NULL);
 
 #ifdef CONFIG_WIRELESS_EXT
 	memset(&wrqu, 0, sizeof(wrqu));
@@ -580,7 +598,8 @@ EXPORT_SYMBOL(cfg80211_disconnected);
 
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
-		       struct cfg80211_connect_params *connect)
+		       struct cfg80211_connect_params *connect,
+		       struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
@@ -590,6 +609,24 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
 
+	if (WARN_ON(wdev->connect_keys)) {
+		kfree(wdev->connect_keys);
+		wdev->connect_keys = NULL;
+	}
+
+	if (connkeys && connkeys->def >= 0) {
+		int idx;
+
+		idx = connkeys->def;
+		/* If given a WEP key we may need it for shared key auth */
+		if (connkeys->params[idx].cipher == WLAN_CIPHER_SUITE_WEP40 ||
+		    connkeys->params[idx].cipher == WLAN_CIPHER_SUITE_WEP104) {
+			connect->key_idx = idx;
+			connect->key = connkeys->params[idx].key;
+			connect->key_len = connkeys->params[idx].key_len;
+		}
+	}
+
 	if (!rdev->ops->connect) {
 		if (!rdev->ops->auth || !rdev->ops->assoc)
 			return -EOPNOTSUPP;
@@ -640,6 +677,7 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 			cfg80211_get_conn_bss(wdev);
 
 		wdev->sme_state = CFG80211_SME_CONNECTING;
+		wdev->connect_keys = connkeys;
 
 		/* we're good if we have both BSSID and channel */
 		if (wdev->conn->params.bssid && wdev->conn->params.channel) {
@@ -662,13 +700,16 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 			kfree(wdev->conn);
 			wdev->conn = NULL;
 			wdev->sme_state = CFG80211_SME_IDLE;
+			wdev->connect_keys = NULL;
 		}
 
 		return err;
 	} else {
 		wdev->sme_state = CFG80211_SME_CONNECTING;
+		wdev->connect_keys = connkeys;
 		err = rdev->ops->connect(&rdev->wiphy, dev, connect);
 		if (err) {
+			wdev->connect_keys = NULL;
 			wdev->sme_state = CFG80211_SME_IDLE;
 			return err;
 		}
@@ -682,12 +723,13 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
-		     struct cfg80211_connect_params *connect)
+		     struct cfg80211_connect_params *connect,
+		     struct cfg80211_cached_keys *connkeys)
 {
 	int err;
 
 	wdev_lock(dev->ieee80211_ptr);
-	err = __cfg80211_connect(rdev, dev, connect);
+	err = __cfg80211_connect(rdev, dev, connect, connkeys);
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
@@ -704,6 +746,9 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state == CFG80211_SME_IDLE)
 		return -EINVAL;
 
+	kfree(wdev->connect_keys);
+	wdev->connect_keys = NULL;
+
 	if (!rdev->ops->disconnect) {
 		if (!rdev->ops->deauth)
 			return -EOPNOTSUPP;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 28f8f96801d4..4bab380a1204 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -141,9 +141,12 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
 			set_mandatory_flags_band(wiphy->bands[band], band);
 }
 
-int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
+int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
+				   struct key_params *params, int key_idx,
 				   const u8 *mac_addr)
 {
+	int i;
+
 	if (key_idx > 5)
 		return -EINVAL;
 
@@ -197,6 +200,12 @@ int cfg80211_validate_key_settings(struct key_params *params, int key_idx,
 		}
 	}
 
+	for (i = 0; i < rdev->wiphy.n_cipher_suites; i++)
+		if (params->cipher == rdev->wiphy.cipher_suites[i])
+			break;
+	if (i == rdev->wiphy.n_cipher_suites)
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -523,3 +532,33 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie)
 	return NULL;
 }
 EXPORT_SYMBOL(ieee80211_bss_get_ie);
+
+void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct net_device *dev = wdev->netdev;
+	int i;
+
+	if (!wdev->connect_keys)
+		return;
+
+	for (i = 0; i < 6; i++) {
+		if (!wdev->connect_keys->params[i].cipher)
+			continue;
+		if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL,
+					&wdev->connect_keys->params[i]))
+			printk(KERN_ERR "%s: failed to set key %d\n",
+				dev->name, i);
+		if (wdev->connect_keys->def == i)
+			if (rdev->ops->set_default_key(wdev->wiphy, dev, i))
+				printk(KERN_ERR "%s: failed to set defkey %d\n",
+					dev->name, i);
+		if (wdev->connect_keys->defmgmt == i)
+			if (rdev->ops->set_default_mgmt_key(wdev->wiphy, dev, i))
+				printk(KERN_ERR "%s: failed to set mgtdef %d\n",
+					dev->name, i);
+	}
+
+	kfree(wdev->connect_keys);
+	wdev->connect_keys = NULL;
+}
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 5088d89a30fc..5d0176338539 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -453,15 +453,32 @@ int cfg80211_wext_giwretry(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry);
 
-static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
-				   struct net_device *dev, const u8 *addr,
-				   bool remove, bool tx_key, int idx,
-				   struct key_params *params)
+static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
+				     struct net_device *dev, const u8 *addr,
+				     bool remove, bool tx_key, int idx,
+				     struct key_params *params)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	int err;
+	int err, i;
+
+	if (!wdev->wext.keys) {
+		wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
+					      GFP_KERNEL);
+		if (!wdev->wext.keys)
+			return -ENOMEM;
+		for (i = 0; i < 6; i++)
+			wdev->wext.keys->params[i].key =
+				wdev->wext.keys->data[i];
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_ADHOC &&
+	    wdev->iftype != NL80211_IFTYPE_STATION)
+		return -EOPNOTSUPP;
 
 	if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
+		if (!wdev->current_bss)
+			return -ENOLINK;
+
 		if (!rdev->ops->set_default_mgmt_key)
 			return -EOPNOTSUPP;
 
@@ -471,8 +488,14 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 		return -EINVAL;
 
 	if (remove) {
-		err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr);
+		err = 0;
+		if (wdev->current_bss)
+			err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr);
 		if (!err) {
+			if (!addr) {
+				wdev->wext.keys->params[idx].key_len = 0;
+				wdev->wext.keys->params[idx].cipher = 0;
+			}
 			if (idx == wdev->wext.default_key)
 				wdev->wext.default_key = -1;
 			else if (idx == wdev->wext.default_mgmt_key)
@@ -486,36 +509,64 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
 			return 0;
 
 		return err;
-	} else {
-		if (addr)
-			tx_key = false;
+	}
 
-		if (cfg80211_validate_key_settings(params, idx, addr))
-			return -EINVAL;
+	if (addr)
+		tx_key = false;
 
+	if (cfg80211_validate_key_settings(rdev, params, idx, addr))
+		return -EINVAL;
+
+	err = 0;
+	if (wdev->current_bss)
 		err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params);
-		if (err)
-			return err;
+	if (err)
+		return err;
+
+	if (!addr) {
+		wdev->wext.keys->params[idx] = *params;
+		memcpy(wdev->wext.keys->data[idx],
+			params->key, params->key_len);
+		wdev->wext.keys->params[idx].key =
+			wdev->wext.keys->data[idx];
+	}
 
-		if (tx_key || (!addr && wdev->wext.default_key == -1)) {
+	if (params->cipher != WLAN_CIPHER_SUITE_AES_CMAC &&
+	    (tx_key || (!addr && wdev->wext.default_key == -1))) {
+		if (wdev->current_bss)
 			err = rdev->ops->set_default_key(&rdev->wiphy,
 							 dev, idx);
-			if (!err)
-				wdev->wext.default_key = idx;
-			return err;
-		}
+		if (!err)
+			wdev->wext.default_key = idx;
+		return err;
+	}
 
-		if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC &&
-		    (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) {
+	if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC &&
+	    (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) {
+		if (wdev->current_bss)
 			err = rdev->ops->set_default_mgmt_key(&rdev->wiphy,
 							      dev, idx);
-			if (!err)
-				wdev->wext.default_mgmt_key = idx;
-			return err;
-		}
-
-		return 0;
+		if (!err)
+			wdev->wext.default_mgmt_key = idx;
+		return err;
 	}
+
+	return 0;
+}
+
+static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
+				   struct net_device *dev, const u8 *addr,
+				   bool remove, bool tx_key, int idx,
+				   struct key_params *params)
+{
+	int err;
+
+	wdev_lock(dev->ieee80211_ptr);
+	err = __cfg80211_set_encryption(rdev, dev, addr, remove,
+					tx_key, idx, params);
+	wdev_unlock(dev->ieee80211_ptr);
+
+	return err;
 }
 
 int cfg80211_wext_siwencode(struct net_device *dev,
@@ -528,6 +579,10 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 	bool remove = false;
 	struct key_params params;
 
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_ADHOC)
+		return -EOPNOTSUPP;
+
 	/* no use -- only MFP (set_default_mgmt_key) is optional */
 	if (!rdev->ops->del_key ||
 	    !rdev->ops->add_key ||
@@ -548,9 +603,14 @@ int cfg80211_wext_siwencode(struct net_device *dev,
 		remove = true;
 	else if (erq->length == 0) {
 		/* No key data - just set the default TX key index */
-		err = rdev->ops->set_default_key(&rdev->wiphy, dev, idx);
+		err = 0;
+		wdev_lock(wdev);
+		if (wdev->current_bss)
+			err = rdev->ops->set_default_key(&rdev->wiphy,
+							 dev, idx);
 		if (!err)
 			wdev->wext.default_key = idx;
+		wdev_unlock(wdev);
 		return err;
 	}
 
@@ -583,6 +643,10 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
 	struct key_params params;
 	u32 cipher;
 
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_ADHOC)
+		return -EOPNOTSUPP;
+
 	/* no use -- only MFP (set_default_mgmt_key) is optional */
 	if (!rdev->ops->del_key ||
 	    !rdev->ops->add_key ||
@@ -656,37 +720,15 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwencodeext);
 
-struct giwencode_cookie {
-	size_t buflen;
-	char *keybuf;
-};
-
-static void giwencode_get_key_cb(void *cookie, struct key_params *params)
-{
-	struct giwencode_cookie *data = cookie;
-
-	if (!params->key) {
-		data->buflen = 0;
-		return;
-	}
-
-	data->buflen = min_t(size_t, data->buflen, params->key_len);
-	memcpy(data->keybuf, params->key, data->buflen);
-}
-
 int cfg80211_wext_giwencode(struct net_device *dev,
 			    struct iw_request_info *info,
 			    struct iw_point *erq, char *keybuf)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
-	int idx, err;
-	struct giwencode_cookie data = {
-		.keybuf = keybuf,
-		.buflen = erq->length,
-	};
+	int idx;
 
-	if (!rdev->ops->get_key)
+	if (wdev->iftype != NL80211_IFTYPE_STATION &&
+	    wdev->iftype != NL80211_IFTYPE_ADHOC)
 		return -EOPNOTSUPP;
 
 	idx = erq->flags & IW_ENCODE_INDEX;
@@ -701,21 +743,18 @@ int cfg80211_wext_giwencode(struct net_device *dev,
 
 	erq->flags = idx + 1;
 
-	err = rdev->ops->get_key(&rdev->wiphy, dev, idx, NULL, &data,
-				 giwencode_get_key_cb);
-	if (!err) {
-		erq->length = data.buflen;
-		erq->flags |= IW_ENCODE_ENABLED;
-		return 0;
-	}
-
-	if (err == -ENOENT) {
+	if (!wdev->wext.keys || !wdev->wext.keys->params[idx].cipher) {
 		erq->flags |= IW_ENCODE_DISABLED;
 		erq->length = 0;
 		return 0;
 	}
 
-	return err;
+	erq->length = min_t(size_t, erq->length,
+			    wdev->wext.keys->params[idx].key_len);
+	memcpy(keybuf, wdev->wext.keys->params[idx].key, erq->length);
+	erq->flags |= IW_ENCODE_ENABLED;
+
+	return 0;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode);
 
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 6f75aaa7f795..c33ea9a5de78 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -10,10 +10,11 @@
 #include <net/cfg80211.h>
 #include "nl80211.h"
 
-static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
-				     struct wireless_dev *wdev)
+int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
+			      struct wireless_dev *wdev)
 {
-	int err;
+	struct cfg80211_cached_keys *ck = NULL;
+	int err, i;
 
 	ASSERT_RDEV_LOCK(rdev);
 	ASSERT_WDEV_LOCK(wdev);
@@ -25,10 +26,25 @@ static int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 	wdev->wext.connect.ie_len = wdev->wext.ie_len;
 	wdev->wext.connect.privacy = wdev->wext.default_key != -1;
 
-	err = 0;
-	if (wdev->wext.connect.ssid_len != 0)
-		err = __cfg80211_connect(rdev, wdev->netdev,
-					 &wdev->wext.connect);
+	if (wdev->wext.keys) {
+		wdev->wext.keys->def = wdev->wext.default_key;
+		wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key;
+	}
+
+	if (!wdev->wext.connect.ssid_len)
+		return 0;
+
+	if (wdev->wext.keys) {
+		ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
+		if (!ck)
+			return -ENOMEM;
+		for (i = 0; i < 6; i++)
+			ck->params[i].key = ck->data[i];
+	}
+	err = __cfg80211_connect(rdev, wdev->netdev,
+				 &wdev->wext.connect, ck);
+	if (err)
+		kfree(ck);
 
 	return err;
 }
-- 
cgit v1.2.3


From ca3dbc20d47ae43c201c215259d078e227bfcf01 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Fri, 10 Jul 2009 14:54:58 +0200
Subject: cfg80211: update misleading comment

In cfg80211_scan_request n_channels refers to the total number
of channels to scan. Update the misleading comment accordingly.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 65a5cbcb5d14..a981ca8a5701 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -538,7 +538,7 @@ struct cfg80211_ssid {
  * @ssids: SSIDs to scan for (active scan only)
  * @n_ssids: number of SSIDs
  * @channels: channels to scan on.
- * @n_channels: number of channels for each band
+ * @n_channels: total number of channels to scan
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
  * @wiphy: the wiphy this was for
-- 
cgit v1.2.3


From 48ab905d1a81b7df33a33def04a890e4e0c51460 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 10 Jul 2009 18:42:31 +0200
Subject: nl80211: report BSS status

When connected to a BSS, or joined to an IBSS, we'll want
to know in userspace without using wireless extensions, so
report the BSS status in the BSS list. Userspace can query
the BSS list, display all the information and retrieve the
station information as well.

For example (from hwsim):

$ iw dev wlan1 scan dump
BSS 02:00:00:00:00:00 (on wlan1) -- associated
	freq: 2462
	beacon interval: 100
	capability: ESS ShortSlotTime (0x0401)
	signal: -50.00 dBm
	SSID: j
	Supported rates: 1.0* 2.0* 5.5* 11.0* 6.0 9.0 12.0 18.0
	DS Paramater set: channel 11
	ERP: <no flags>
	Extended supported rates: 24.0 36.0 48.0 54.0

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 11 +++++++++
 net/wireless/nl80211.c  | 65 ++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 59 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index b043b78dd2c3..962e2232a074 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1260,6 +1260,7 @@ enum nl80211_channel_type {
  *	in mBm (100 * dBm) (s32)
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
  *	in unspecified units, scaled to 0..100 (u8)
+ * @NL80211_BSS_STATUS: status, if this BSS is "used"
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -1273,12 +1274,22 @@ enum nl80211_bss {
 	NL80211_BSS_INFORMATION_ELEMENTS,
 	NL80211_BSS_SIGNAL_MBM,
 	NL80211_BSS_SIGNAL_UNSPEC,
+	NL80211_BSS_STATUS,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
 	NL80211_BSS_MAX = __NL80211_BSS_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_bss_status - BSS "status"
+ */
+enum nl80211_bss_status {
+	NL80211_BSS_STATUS_AUTHENTICATED,
+	NL80211_BSS_STATUS_ASSOCIATED,
+	NL80211_BSS_STATUS_IBSS_JOINED,
+};
+
 /**
  * enum nl80211_auth_type - AuthenticationType
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 45c5f9c8e51b..da450ef1fc7e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3094,11 +3094,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 
 static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 			    struct cfg80211_registered_device *rdev,
-			    struct net_device *dev,
-			    struct cfg80211_bss *res)
+			    struct wireless_dev *wdev,
+			    struct cfg80211_internal_bss *intbss)
 {
+	struct cfg80211_bss *res = &intbss->pub;
 	void *hdr;
 	struct nlattr *bss;
+	int i;
+
+	ASSERT_WDEV_LOCK(wdev);
 
 	hdr = nl80211hdr_put(msg, pid, seq, flags,
 			     NL80211_CMD_NEW_SCAN_RESULTS);
@@ -3107,7 +3111,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION,
 		    rdev->bss_generation);
-	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex);
 
 	bss = nla_nest_start(msg, NL80211_ATTR_BSS);
 	if (!bss)
@@ -3136,6 +3140,28 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		break;
 	}
 
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+		if (intbss == wdev->current_bss)
+			NLA_PUT_U32(msg, NL80211_BSS_STATUS,
+				    NL80211_BSS_STATUS_ASSOCIATED);
+		else for (i = 0; i < MAX_AUTH_BSSES; i++) {
+			if (intbss != wdev->auth_bsses[i])
+				continue;
+			NLA_PUT_U32(msg, NL80211_BSS_STATUS,
+				    NL80211_BSS_STATUS_AUTHENTICATED);
+			break;
+		}
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		if (intbss == wdev->current_bss)
+			NLA_PUT_U32(msg, NL80211_BSS_STATUS,
+				    NL80211_BSS_STATUS_IBSS_JOINED);
+		break;
+	default:
+		break;
+	}
+
 	nla_nest_end(msg, bss);
 
 	return genlmsg_end(msg, hdr);
@@ -3148,9 +3174,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 static int nl80211_dump_scan(struct sk_buff *skb,
 			     struct netlink_callback *cb)
 {
-	struct cfg80211_registered_device *dev;
-	struct net_device *netdev;
+	struct cfg80211_registered_device *rdev;
+	struct net_device *dev;
 	struct cfg80211_internal_bss *scan;
+	struct wireless_dev *wdev;
 	int ifidx = cb->args[0];
 	int start = cb->args[1], idx = 0;
 	int err;
@@ -3171,39 +3198,43 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 		cb->args[0] = ifidx;
 	}
 
-	netdev = dev_get_by_index(&init_net, ifidx);
-	if (!netdev)
+	dev = dev_get_by_index(&init_net, ifidx);
+	if (!dev)
 		return -ENODEV;
 
-	dev = cfg80211_get_dev_from_ifindex(ifidx);
-	if (IS_ERR(dev)) {
-		err = PTR_ERR(dev);
+	rdev = cfg80211_get_dev_from_ifindex(ifidx);
+	if (IS_ERR(rdev)) {
+		err = PTR_ERR(rdev);
 		goto out_put_netdev;
 	}
 
-	spin_lock_bh(&dev->bss_lock);
-	cfg80211_bss_expire(dev);
+	wdev = dev->ieee80211_ptr;
 
-	list_for_each_entry(scan, &dev->bss_list, list) {
+	wdev_lock(wdev);
+	spin_lock_bh(&rdev->bss_lock);
+	cfg80211_bss_expire(rdev);
+
+	list_for_each_entry(scan, &rdev->bss_list, list) {
 		if (++idx <= start)
 			continue;
 		if (nl80211_send_bss(skb,
 				NETLINK_CB(cb->skb).pid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				dev, netdev, &scan->pub) < 0) {
+				rdev, wdev, scan) < 0) {
 			idx--;
 			goto out;
 		}
 	}
 
  out:
-	spin_unlock_bh(&dev->bss_lock);
+	spin_unlock_bh(&rdev->bss_lock);
+	wdev_unlock(wdev);
 
 	cb->args[1] = idx;
 	err = skb->len;
-	cfg80211_unlock_rdev(dev);
+	cfg80211_unlock_rdev(rdev);
  out_put_netdev:
-	dev_put(netdev);
+	dev_put(dev);
 
 	return err;
 }
-- 
cgit v1.2.3


From b770b43e95a66587fbd8c1841de83da87fbf23ea Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 16 Jul 2009 10:15:09 -0700
Subject: mac80211: drop frames for sta with no valid rate

When we're associated we should be able to send data to
target sta. If we cannot we may be trying to use the incorrect
band to talk to the sta. Lets catch any such cases, warn, and
drop the frames to not invalidate assumptions being made on
rate control algorithms when they have a valid sta to
communicate with. Any such cases should be handled and fixed.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 11 +++++++++++
 net/mac80211/tx.c      | 20 ++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ce7cb1b5d453..d98fac54577b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2110,6 +2110,17 @@ rate_lowest_index(struct ieee80211_supported_band *sband,
 	return 0;
 }
 
+static inline
+bool rate_usable_index_exists(struct ieee80211_supported_band *sband,
+			      struct ieee80211_sta *sta)
+{
+	unsigned int i;
+
+	for (i = 0; i < sband->n_bitrates; i++)
+		if (rate_supported(sta, sband->band, i))
+			return true;
+	return false;
+}
 
 int ieee80211_rate_control_register(struct rate_control_ops *ops);
 void ieee80211_rate_control_unregister(struct rate_control_ops *ops);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 60ae086995b1..f3efd4f16e91 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -512,6 +512,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	int i, len;
 	bool inval = false, rts = false, short_preamble = false;
 	struct ieee80211_tx_rate_control txrc;
+	u32 sta_flags;
 
 	memset(&txrc, 0, sizeof(txrc));
 
@@ -544,7 +545,26 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	     (tx->sta && test_sta_flags(tx->sta, WLAN_STA_SHORT_PREAMBLE))))
 		txrc.short_preamble = short_preamble = true;
 
+	sta_flags = tx->sta ? get_sta_flags(tx->sta) : 0;
+
+	/*
+	 * Lets not bother rate control if we're associated and cannot
+	 * talk to the sta. This should not happen.
+	 */
+	if (WARN((tx->local->sw_scanning) &&
+		 (sta_flags & WLAN_STA_ASSOC) &&
+		 !rate_usable_index_exists(sband, &tx->sta->sta),
+		 "%s: Dropped data frame as no usable bitrate found while "
+		 "scanning and associated. Target station: "
+		 "%pM on %d GHz band\n",
+		 tx->dev->name, hdr->addr1,
+		 tx->channel->band ? 5 : 2))
+		return TX_DROP;
 
+	/*
+	 * If we're associated with the sta at this point we know we can at
+	 * least send the frame at the lowest bit rate.
+	 */
 	rate_control_get_rate(tx->sdata, tx->sta, &txrc);
 
 	if (unlikely(info->control.rates[0].idx < 0))
-- 
cgit v1.2.3


From 4c6d4f5c33fbe19b134c1af43af166fee79eb986 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 16 Jul 2009 10:05:41 -0700
Subject: mac80211: add helper for management / no-ack frame rate decision

All current rate control algorithms agree to send management and no-ack
frames at the lowest rate. They also agree to do this when sta
and the private rate control data is NULL. We add a hlper to mac80211
for this and simplify the rate control algorithm code.

Developers wishing to make enhancements to rate control algorithms
are for broadcast/multicast can opt to not use this in their
gate_rate() mac80211 callback.

Cc: Zhu Yi <yi.zhu@intel.com>
Acked-by: Reinette Chatre <reinette.chatre@intel.com>
Cc: ipw3945-devel@lists.sourceforge.net
Cc: Gabor Juhos <juhosg@openwrt.org>
Acked-by: Felix Fietkau <nbd@openwrt.org>
Cc: Derek Smithies <derek@indranet.co.nz>
Cc: Chittajit Mitra <Chittajit.Mitra@Atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/rc.c        | 14 +-------------
 drivers/net/wireless/iwlwifi/iwl-3945-rs.c | 13 +++----------
 drivers/net/wireless/iwlwifi/iwl-agn-rs.c  |  7 +------
 include/net/mac80211.h                     | 23 +++++++++++++++++++++++
 net/mac80211/rate.c                        | 29 +++++++++++++++++++++++++++++
 net/mac80211/rc80211_minstrel.c            | 22 +---------------------
 net/mac80211/rc80211_pid_algo.c            | 11 +----------
 7 files changed, 59 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath9k/rc.c b/drivers/net/wireless/ath/ath9k/rc.c
index 2c72901adbee..630fcf46e0dd 100644
--- a/drivers/net/wireless/ath/ath9k/rc.c
+++ b/drivers/net/wireless/ath/ath9k/rc.c
@@ -1518,23 +1518,11 @@ exit:
 static void ath_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
 			 struct ieee80211_tx_rate_control *txrc)
 {
-	struct ieee80211_supported_band *sband = txrc->sband;
-	struct sk_buff *skb = txrc->skb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ath_softc *sc = priv;
 	struct ath_rate_priv *ath_rc_priv = priv_sta;
-	__le16 fc = hdr->frame_control;
 
-	/* lowest rate for management and NO_ACK frames */
-	if (!ieee80211_is_data(fc) ||
-	    tx_info->flags & IEEE80211_TX_CTL_NO_ACK || !sta) {
-		tx_info->control.rates[0].idx = rate_lowest_index(sband, sta);
-		tx_info->control.rates[0].count =
-			(tx_info->flags & IEEE80211_TX_CTL_NO_ACK) ?
-				1 : ATH_MGT_TXMAXTRY;
+	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
-	}
 
 	/* Find tx rate for unicast frames */
 	ath_rc_ratefind(sc, ath_rc_priv, txrc);
diff --git a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
index 2b776924d5e7..a16bd4147eac 100644
--- a/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-3945-rs.c
@@ -673,7 +673,6 @@ static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta,
 	s8 scale_action = 0;
 	unsigned long flags;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
-	__le16 fc;
 	u16 rate_mask = sta ? sta->supp_rates[sband->band] : 0;
 	s8 max_rate_idx = -1;
 	struct iwl_priv *priv = (struct iwl_priv *)priv_r;
@@ -681,16 +680,10 @@ static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta,
 
 	IWL_DEBUG_RATE(priv, "enter\n");
 
-	/* Send management frames and NO_ACK data using lowest rate. */
-	fc = hdr->frame_control;
-	if (!ieee80211_is_data(fc) || info->flags & IEEE80211_TX_CTL_NO_ACK ||
-	    !sta || !priv_sta) {
-		IWL_DEBUG_RATE(priv, "leave: No STA priv data to update!\n");
-		info->control.rates[0].idx = rate_lowest_index(sband, sta);
-		if (info->flags & IEEE80211_TX_CTL_NO_ACK)
-			info->control.rates[0].count = 1;
+	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
-	}
+
+	rate_mask = sta->supp_rates[sband->band];
 
 	/* get user max rate if set */
 	max_rate_idx = txrc->max_rate_idx;
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
index 3fea027f35d1..63280411fd58 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn-rs.c
@@ -2481,13 +2481,8 @@ static void rs_get_rate(void *priv_r, struct ieee80211_sta *sta, void *priv_sta,
 	}
 
 	/* Send management frames and NO_ACK data using lowest rate. */
-	if (!ieee80211_is_data(hdr->frame_control) ||
-	    info->flags & IEEE80211_TX_CTL_NO_ACK || !sta || !lq_sta) {
-		info->control.rates[0].idx = rate_lowest_index(sband, sta);
-		if (info->flags & IEEE80211_TX_CTL_NO_ACK)
-			info->control.rates[0].count = 1;
+	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
-	}
 
 	rate_idx  = lq_sta->last_txrate_idx;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d98fac54577b..a861259c3050 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2094,6 +2094,29 @@ static inline int rate_supported(struct ieee80211_sta *sta,
 	return (sta == NULL || sta->supp_rates[band] & BIT(index));
 }
 
+/**
+ * rate_control_send_low - helper for drivers for management/no-ack frames
+ *
+ * Rate control algorithms that agree to use the lowest rate to
+ * send management frames and NO_ACK data with the respective hw
+ * retries should use this in the beginning of their mac80211 get_rate
+ * callback. If true is returned the rate control can simply return.
+ * If false is returned we guarantee that sta and sta and priv_sta is
+ * not null.
+ *
+ * Rate control algorithms wishing to do more intelligent selection of
+ * rate for multicast/broadcast frames may choose to not use this.
+ *
+ * @sta: &struct ieee80211_sta pointer to the target destination. Note
+ * 	that this may be null.
+ * @priv_sta: private rate control structure. This may be null.
+ * @txrc: rate control information we sholud populate for mac80211.
+ */
+bool rate_control_send_low(struct ieee80211_sta *sta,
+			   void *priv_sta,
+			   struct ieee80211_tx_rate_control *txrc);
+
+
 static inline s8
 rate_lowest_index(struct ieee80211_supported_band *sband,
 		  struct ieee80211_sta *sta)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 4641f00a1e5c..8ac7a984d886 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -198,6 +198,35 @@ static void rate_control_release(struct kref *kref)
 	kfree(ctrl_ref);
 }
 
+static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
+{
+	struct sk_buff *skb = txrc->skb;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	__le16 fc;
+
+	fc = hdr->frame_control;
+
+	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc));
+}
+
+bool rate_control_send_low(struct ieee80211_sta *sta,
+			   void *priv_sta,
+			   struct ieee80211_tx_rate_control *txrc)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
+
+	if (!sta || !priv_sta || rc_no_data_or_no_ack(txrc)) {
+		info->control.rates[0].idx = rate_lowest_index(txrc->sband, sta);
+		info->control.rates[0].count =
+			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
+			1 : txrc->hw->max_rate_tries;
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(rate_control_send_low);
+
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta,
 			   struct ieee80211_tx_rate_control *txrc)
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 5bdce0c951dd..7c5142988bbb 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -70,19 +70,6 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix)
 	return i;
 }
 
-static inline bool
-use_low_rate(struct sk_buff *skb)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-	__le16 fc;
-
-	fc = hdr->frame_control;
-
-	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc));
-}
-
-
 static void
 minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi)
 {
@@ -231,7 +218,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 		  void *priv_sta, struct ieee80211_tx_rate_control *txrc)
 {
 	struct sk_buff *skb = txrc->skb;
-	struct ieee80211_supported_band *sband = txrc->sband;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct minstrel_sta_info *mi = priv_sta;
 	struct minstrel_priv *mp = priv;
@@ -244,14 +230,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
 	int mrr_ndx[3];
 	int sample_rate;
 
-	if (!sta || !mi || use_low_rate(skb)) {
-		ar[0].idx = rate_lowest_index(sband, sta);
-		if (info->flags & IEEE80211_TX_CTL_NO_ACK)
-			ar[0].count = 1;
-		else
-			ar[0].count = mp->max_retry;
+	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
-	}
 
 	mrr = mp->has_mrr && !txrc->rts && !txrc->bss_conf->use_cts_prot;
 
diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c
index 549607703bd8..8c053be9dc24 100644
--- a/net/mac80211/rc80211_pid_algo.c
+++ b/net/mac80211/rc80211_pid_algo.c
@@ -276,11 +276,9 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
 {
 	struct sk_buff *skb = txrc->skb;
 	struct ieee80211_supported_band *sband = txrc->sband;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct rc_pid_sta_info *spinfo = priv_sta;
 	int rateidx;
-	__le16 fc;
 
 	if (txrc->rts)
 		info->control.rates[0].count =
@@ -290,15 +288,8 @@ rate_control_pid_get_rate(void *priv, struct ieee80211_sta *sta,
 			txrc->hw->conf.short_frame_max_tx_count;
 
 	/* Send management frames and NO_ACK data using lowest rate. */
-	fc = hdr->frame_control;
-	if (!sta || !spinfo || !ieee80211_is_data(fc) ||
-	    info->flags & IEEE80211_TX_CTL_NO_ACK) {
-		info->control.rates[0].idx = rate_lowest_index(sband, sta);
-		if (info->flags & IEEE80211_TX_CTL_NO_ACK)
-			info->control.rates[0].count = 1;
-
+	if (rate_control_send_low(sta, priv_sta, txrc))
 		return;
-	}
 
 	rateidx = spinfo->txrate_idx;
 
-- 
cgit v1.2.3


From 3b8d81e020f77c9da8b85b0685c8cd2ca7c7b150 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 17 Jun 2009 17:43:56 +0200
Subject: mac80211: remove master netdev

With the internal 'pending' queue system in place, we can simply
put packets there instead of pushing them off to the master dev,
getting rid of the master interface completely.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |   3 +
 net/mac80211/agg-tx.c      |   3 -
 net/mac80211/debugfs.c     |   2 +-
 net/mac80211/ieee80211_i.h |  19 ++-
 net/mac80211/iface.c       |  43 ++++---
 net/mac80211/main.c        | 120 +-----------------
 net/mac80211/rate.c        |   2 +-
 net/mac80211/rx.c          |  16 ++-
 net/mac80211/scan.c        |  19 ++-
 net/mac80211/tx.c          | 299 +++++++++++++++++++--------------------------
 net/mac80211/util.c        |  52 ++------
 net/mac80211/wme.c         |   6 +-
 net/mac80211/wme.h         |   3 +-
 13 files changed, 195 insertions(+), 392 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a861259c3050..7dd67a1ff4d5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -241,6 +241,8 @@ struct ieee80211_bss_conf {
  *	it can be sent out.
  * @IEEE80211_TX_INTFL_RETRIED: completely internal to mac80211,
  *	used to indicate that a frame was already retried due to PS
+ * @IEEE80211_TX_INTFL_DONT_ENCRYPT: completely internal to mac80211,
+ *	used to indicate frame should not be encrypted
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -259,6 +261,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_RCALGO		= BIT(13),
 	IEEE80211_TX_INTFL_NEED_TXPROCESSING	= BIT(14),
 	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
+	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
 };
 
 /**
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9e5762ad307d..1958c7c42cd9 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -383,9 +383,6 @@ static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
 
 	if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) {
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-		/* mark queue as pending, it is stopped already */
-		__set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING,
-			  &local->queue_stop_reasons[queue]);
 		/* copy over remaining packets */
 		skb_queue_splice_tail_init(
 			&sta->ampdu_mlme.tid_tx[tid]->pending,
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 6c439cd5ccea..96991b68f048 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -175,7 +175,7 @@ static ssize_t queues_read(struct file *file, char __user *user_buf,
 	for (q = 0; q < local->hw.queues; q++)
 		res += sprintf(buf + res, "%02d: %#.8lx/%d\n", q,
 				local->queue_stop_reasons[q],
-				__netif_subqueue_stopped(local->mdev, q));
+				skb_queue_len(&local->pending[q]));
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
 	return simple_read_from_buffer(user_buf, count, ppos, buf, res);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a34bca2dc52f..6a0177137dd5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -567,14 +567,9 @@ enum queue_stop_reason {
 	IEEE80211_QUEUE_STOP_REASON_CSA,
 	IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
 	IEEE80211_QUEUE_STOP_REASON_SUSPEND,
-	IEEE80211_QUEUE_STOP_REASON_PENDING,
 	IEEE80211_QUEUE_STOP_REASON_SKB_ADD,
 };
 
-struct ieee80211_master_priv {
-	struct ieee80211_local *local;
-};
-
 struct ieee80211_local {
 	/* embed the driver visible part.
 	 * don't cast (use the static inlines below), but we keep
@@ -587,13 +582,20 @@ struct ieee80211_local {
 	/* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
 	spinlock_t queue_stop_reason_lock;
 
-	struct net_device *mdev; /* wmaster# - "master" 802.11 device */
 	int open_count;
 	int monitors, cooked_mntrs;
 	/* number of interfaces with corresponding FIF_ flags */
 	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss;
 	unsigned int filter_flags; /* FIF_* */
 	struct iw_statistics wstats;
+
+	/* protects the aggregated multicast list and filter calls */
+	spinlock_t filter_lock;
+
+	/* aggregated multicast list */
+	struct dev_addr_list *mc_list;
+	int mc_count;
+
 	bool tim_in_locked_section; /* see ieee80211_beacon_get() */
 
 	/*
@@ -813,10 +815,6 @@ struct ieee80211_local {
 static inline struct ieee80211_sub_if_data *
 IEEE80211_DEV_TO_SUB_IF(struct net_device *dev)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-
-	BUG_ON(!local || local->mdev == dev);
-
 	return netdev_priv(dev);
 }
 
@@ -996,7 +994,6 @@ void ieee80211_recalc_idle(struct ieee80211_local *local);
 /* tx handling */
 void ieee80211_clear_tx_pending(struct ieee80211_local *local);
 void ieee80211_tx_pending(unsigned long data);
-int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
 int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 090aa5a47182..2f797a86ced5 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -190,10 +190,6 @@ static int ieee80211_open(struct net_device *dev)
 			       ETH_ALEN);
 	}
 
-	if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0)
-		memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr,
-		       ETH_ALEN);
-
 	/*
 	 * Validate the MAC address for this device.
 	 */
@@ -229,9 +225,9 @@ static int ieee80211_open(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss++;
 
-		netif_addr_lock_bh(local->mdev);
+		spin_lock_bh(&local->filter_lock);
 		ieee80211_configure_filter(local);
-		netif_addr_unlock_bh(local->mdev);
+		spin_unlock_bh(&local->filter_lock);
 		break;
 	default:
 		conf.vif = &sdata->vif;
@@ -243,9 +239,9 @@ static int ieee80211_open(struct net_device *dev)
 
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
 			local->fif_other_bss++;
-			netif_addr_lock_bh(local->mdev);
+			spin_lock_bh(&local->filter_lock);
 			ieee80211_configure_filter(local);
-			netif_addr_unlock_bh(local->mdev);
+			spin_unlock_bh(&local->filter_lock);
 
 			ieee80211_start_mesh(sdata);
 		}
@@ -279,10 +275,6 @@ static int ieee80211_open(struct net_device *dev)
 	}
 
 	if (local->open_count == 0) {
-		res = dev_open(local->mdev);
-		WARN_ON(res);
-		if (res)
-			goto err_del_interface;
 		tasklet_enable(&local->tx_pending_tasklet);
 		tasklet_enable(&local->tasklet);
 	}
@@ -393,7 +385,14 @@ static int ieee80211_stop(struct net_device *dev)
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_dec(&local->iff_promiscs);
 
-	dev_mc_unsync(local->mdev, dev);
+	netif_addr_lock_bh(dev);
+	spin_lock_bh(&local->filter_lock);
+	__dev_addr_unsync(&local->mc_list, &local->mc_count,
+			  &dev->mc_list, &dev->mc_count);
+	ieee80211_configure_filter(local);
+	spin_unlock_bh(&local->filter_lock);
+	netif_addr_unlock_bh(dev);
+
 	del_timer_sync(&local->dynamic_ps_timer);
 	cancel_work_sync(&local->dynamic_ps_enable_work);
 
@@ -442,9 +441,9 @@ static int ieee80211_stop(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss--;
 
-		netif_addr_lock_bh(local->mdev);
+		spin_lock_bh(&local->filter_lock);
 		ieee80211_configure_filter(local);
-		netif_addr_unlock_bh(local->mdev);
+		spin_unlock_bh(&local->filter_lock);
 		break;
 	case NL80211_IFTYPE_STATION:
 		del_timer_sync(&sdata->u.mgd.chswitch_timer);
@@ -487,9 +486,9 @@ static int ieee80211_stop(struct net_device *dev)
 			local->fif_other_bss--;
 			atomic_dec(&local->iff_allmultis);
 
-			netif_addr_lock_bh(local->mdev);
+			spin_lock_bh(&local->filter_lock);
 			ieee80211_configure_filter(local);
-			netif_addr_unlock_bh(local->mdev);
+			spin_unlock_bh(&local->filter_lock);
 
 			ieee80211_stop_mesh(sdata);
 		}
@@ -535,9 +534,6 @@ static int ieee80211_stop(struct net_device *dev)
 	ieee80211_recalc_ps(local, -1);
 
 	if (local->open_count == 0) {
-		if (netif_running(local->mdev))
-			dev_close(local->mdev);
-
 		drv_stop(local);
 
 		ieee80211_led_radio(local, false);
@@ -584,8 +580,11 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 			atomic_dec(&local->iff_promiscs);
 		sdata->flags ^= IEEE80211_SDATA_PROMISC;
 	}
-
-	dev_mc_sync(local->mdev, dev);
+	spin_lock_bh(&local->filter_lock);
+	__dev_addr_sync(&local->mc_list, &local->mc_count,
+			&dev->mc_list, &dev->mc_count);
+	ieee80211_configure_filter(local);
+	spin_unlock_bh(&local->filter_lock);
 }
 
 /*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5b69f5f07299..3234f3751d22 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -83,75 +83,14 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	new_flags |= (1<<31);
 
 	drv_configure_filter(local, changed_flags, &new_flags,
-			     local->mdev->mc_count,
-			     local->mdev->mc_list);
+			     local->mc_count,
+			     local->mc_list);
 
 	WARN_ON(new_flags & (1<<31));
 
 	local->filter_flags = new_flags & ~(1<<31);
 }
 
-/* master interface */
-
-static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr)
-{
-	memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */
-	return ETH_ALEN;
-}
-
-static const struct header_ops ieee80211_header_ops = {
-	.create		= eth_header,
-	.parse		= header_parse_80211,
-	.rebuild	= eth_rebuild_header,
-	.cache		= eth_header_cache,
-	.cache_update	= eth_header_cache_update,
-};
-
-static int ieee80211_master_open(struct net_device *dev)
-{
-	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
-	struct ieee80211_local *local = mpriv->local;
-	struct ieee80211_sub_if_data *sdata;
-	int res = -EOPNOTSUPP;
-
-	/* we hold the RTNL here so can safely walk the list */
-	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (netif_running(sdata->dev)) {
-			res = 0;
-			break;
-		}
-	}
-
-	if (res)
-		return res;
-
-	netif_tx_start_all_queues(local->mdev);
-
-	return 0;
-}
-
-static int ieee80211_master_stop(struct net_device *dev)
-{
-	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
-	struct ieee80211_local *local = mpriv->local;
-	struct ieee80211_sub_if_data *sdata;
-
-	/* we hold the RTNL here so can safely walk the list */
-	list_for_each_entry(sdata, &local->interfaces, list)
-		if (netif_running(sdata->dev))
-			dev_close(sdata->dev);
-
-	return 0;
-}
-
-static void ieee80211_master_set_multicast_list(struct net_device *dev)
-{
-	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
-	struct ieee80211_local *local = mpriv->local;
-
-	ieee80211_configure_filter(local);
-}
-
 int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 {
 	struct ieee80211_channel *chan, *scan_chan;
@@ -310,7 +249,6 @@ void ieee80211_tx_status_irqsafe(struct ieee80211_hw *hw,
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int tmp;
 
-	skb->dev = local->mdev;
 	skb->pkt_type = IEEE80211_TX_STATUS_MSG;
 	skb_queue_tail(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS ?
 		       &local->skb_queue : &local->skb_queue_unreliable, skb);
@@ -716,7 +654,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	mutex_init(&local->scan_mtx);
 
 	spin_lock_init(&local->key_lock);
-
+	spin_lock_init(&local->filter_lock);
 	spin_lock_init(&local->queue_stop_reason_lock);
 
 	INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work);
@@ -752,30 +690,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 }
 EXPORT_SYMBOL(ieee80211_alloc_hw);
 
-static const struct net_device_ops ieee80211_master_ops = {
-	.ndo_start_xmit = ieee80211_master_start_xmit,
-	.ndo_open = ieee80211_master_open,
-	.ndo_stop = ieee80211_master_stop,
-	.ndo_set_multicast_list = ieee80211_master_set_multicast_list,
-	.ndo_select_queue = ieee80211_select_queue,
-};
-
-static void ieee80211_master_setup(struct net_device *mdev)
-{
-	mdev->type = ARPHRD_IEEE80211;
-	mdev->netdev_ops = &ieee80211_master_ops;
-	mdev->header_ops = &ieee80211_header_ops;
-	mdev->tx_queue_len = 1000;
-	mdev->addr_len = ETH_ALEN;
-}
-
 int ieee80211_register_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	int result;
 	enum ieee80211_band band;
-	struct net_device *mdev;
-	struct ieee80211_master_priv *mpriv;
 	int channels, i, j, max_bitrates;
 	bool supp_ht;
 	static const u32 cipher_suites[] = {
@@ -874,16 +793,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (hw->queues > IEEE80211_MAX_QUEUES)
 		hw->queues = IEEE80211_MAX_QUEUES;
 
-	mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv),
-			       "wmaster%d", ieee80211_master_setup,
-			       hw->queues);
-	if (!mdev)
-		goto fail_mdev_alloc;
-
-	mpriv = netdev_priv(mdev);
-	mpriv->local = local;
-	local->mdev = mdev;
-
 	local->hw.workqueue =
 		create_singlethread_workqueue(wiphy_name(local->hw.wiphy));
 	if (!local->hw.workqueue) {
@@ -918,17 +827,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	}
 
 	rtnl_lock();
-	result = dev_alloc_name(local->mdev, local->mdev->name);
-	if (result < 0)
-		goto fail_dev;
-
-	memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
-	SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy));
-	local->mdev->features |= NETIF_F_NETNS_LOCAL;
-
-	result = register_netdevice(local->mdev);
-	if (result < 0)
-		goto fail_dev;
 
 	result = ieee80211_init_rate_ctrl_alg(local,
 					      hw->rate_control_algorithm);
@@ -981,9 +879,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	ieee80211_led_exit(local);
 	ieee80211_remove_interfaces(local);
  fail_rate:
-	unregister_netdevice(local->mdev);
-	local->mdev = NULL;
- fail_dev:
 	rtnl_unlock();
 	ieee80211_wep_free(local);
  fail_wep:
@@ -992,9 +887,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	debugfs_hw_del(local);
 	destroy_workqueue(local->hw.workqueue);
  fail_workqueue:
-	if (local->mdev)
-		free_netdev(local->mdev);
- fail_mdev_alloc:
 	wiphy_unregister(local->hw.wiphy);
  fail_wiphy_register:
 	kfree(local->int_scan_req.channels);
@@ -1019,13 +911,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	 * because the driver cannot be handing us frames any
 	 * more and the tasklet is killed.
 	 */
-
-	/* First, we remove all virtual interfaces. */
 	ieee80211_remove_interfaces(local);
 
-	/* then, finally, remove the master interface */
-	unregister_netdevice(local->mdev);
-
 	rtnl_unlock();
 
 	ieee80211_clear_tx_pending(local);
@@ -1044,7 +931,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	wiphy_unregister(local->hw.wiphy);
 	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
-	free_netdev(local->mdev);
 	kfree(local->int_scan_req.channels);
 }
 EXPORT_SYMBOL(ieee80211_unregister_hw);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 8ac7a984d886..b33efc4fc267 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -287,7 +287,7 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
 	struct rate_control_ref *ref, *old;
 
 	ASSERT_RTNL();
-	if (local->open_count || netif_running(local->mdev))
+	if (local->open_count)
 		return -EBUSY;
 
 	ref = rate_control_alloc(name, local);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b513fb791153..7f33f775c5df 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1478,6 +1478,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 	struct ieee80211s_hdr *mesh_hdr;
 	unsigned int hdrlen;
 	struct sk_buff *skb = rx->skb, *fwd_skb;
+	struct ieee80211_local *local = rx->local;
 
 	hdr = (struct ieee80211_hdr *) skb->data;
 	hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1520,6 +1521,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 						     dropped_frames_ttl);
 		else {
 			struct ieee80211_hdr *fwd_hdr;
+			struct ieee80211_tx_info *info;
+
 			fwd_skb = skb_copy(skb, GFP_ATOMIC);
 
 			if (!fwd_skb && net_ratelimit())
@@ -1533,9 +1536,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
 			 */
 			memcpy(fwd_hdr->addr1, fwd_hdr->addr2, ETH_ALEN);
 			memcpy(fwd_hdr->addr2, rx->dev->dev_addr, ETH_ALEN);
-			fwd_skb->dev = rx->local->mdev;
+			info = IEEE80211_SKB_CB(fwd_skb);
+			memset(info, 0, sizeof(*info));
+			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 			fwd_skb->iif = rx->dev->ifindex;
-			dev_queue_xmit(fwd_skb);
+			ieee80211_select_queue(local, fwd_skb);
+			ieee80211_add_pending_skb(local, fwd_skb);
 		}
 	}
 
@@ -1803,8 +1809,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
 	return RX_DROP_MONITOR;
 }
 
-static void ieee80211_rx_michael_mic_report(struct net_device *dev,
-					    struct ieee80211_hdr *hdr,
+static void ieee80211_rx_michael_mic_report(struct ieee80211_hdr *hdr,
 					    struct ieee80211_rx_data *rx)
 {
 	int keyidx;
@@ -2114,7 +2119,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	}
 
 	if ((status->flag & RX_FLAG_MMIC_ERROR)) {
-		ieee80211_rx_michael_mic_report(local->mdev, hdr, &rx);
+		ieee80211_rx_michael_mic_report(hdr, &rx);
 		return;
 	}
 
@@ -2483,7 +2488,6 @@ void ieee80211_rx_irqsafe(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	BUILD_BUG_ON(sizeof(struct ieee80211_rx_status) > sizeof(skb->cb));
 
-	skb->dev = local->mdev;
 	skb->pkt_type = IEEE80211_RX_MSG;
 	skb_queue_tail(&local->skb_queue, skb);
 	tasklet_schedule(&local->tasklet);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5f4f7869d050..74820656dc89 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -294,16 +294,13 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	if (was_hw_scan)
 		goto done;
 
-	netif_tx_lock_bh(local->mdev);
-	netif_addr_lock(local->mdev);
+	spin_lock_bh(&local->filter_lock);
 	local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC;
 	drv_configure_filter(local, FIF_BCN_PRBRESP_PROMISC,
 			     &local->filter_flags,
-			     local->mdev->mc_count,
-			     local->mdev->mc_list);
-
-	netif_addr_unlock(local->mdev);
-	netif_tx_unlock_bh(local->mdev);
+			     local->mc_count,
+			     local->mc_list);
+	spin_unlock_bh(&local->filter_lock);
 
 	drv_sw_scan_complete(local);
 
@@ -382,13 +379,13 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	local->scan_state = SCAN_SET_CHANNEL;
 	local->scan_channel_idx = 0;
 
-	netif_addr_lock_bh(local->mdev);
+	spin_lock_bh(&local->filter_lock);
 	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
 	drv_configure_filter(local, FIF_BCN_PRBRESP_PROMISC,
 			     &local->filter_flags,
-			     local->mdev->mc_count,
-			     local->mdev->mc_list);
-	netif_addr_unlock_bh(local->mdev);
+			     local->mc_count,
+			     local->mc_list);
+	spin_unlock_bh(&local->filter_lock);
 
 	/* TODO: start scan as soon as all nullfunc frames are ACKed */
 	queue_delayed_work(local->hw.workqueue, &local->scan_work,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f3efd4f16e91..7adaeb2c53e8 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -451,7 +451,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
 
-	if (unlikely(tx->skb->do_not_encrypt))
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
 		tx->key = NULL;
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
@@ -497,7 +497,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	}
 
 	if (!tx->key || !(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE))
-		tx->skb->do_not_encrypt = 1;
+		info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 
 	return TX_CONTINUE;
 }
@@ -774,9 +774,7 @@ static int ieee80211_fragment(struct ieee80211_local *local,
 		memcpy(tmp->cb, skb->cb, sizeof(tmp->cb));
 		skb_copy_queue_mapping(tmp, skb);
 		tmp->priority = skb->priority;
-		tmp->do_not_encrypt = skb->do_not_encrypt;
 		tmp->dev = skb->dev;
-		tmp->iif = skb->iif;
 
 		/* copy header and data */
 		memcpy(skb_put(tmp, hdrlen), skb->data, hdrlen);
@@ -804,7 +802,7 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
 
 	/*
 	 * Warn when submitting a fragmented A-MPDU frame and drop it.
-	 * This scenario is handled in __ieee80211_tx_prepare but extra
+	 * This scenario is handled in ieee80211_tx_prepare but extra
 	 * caution taken here as fragmented ampdu may cause Tx stop.
 	 */
 	if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
@@ -943,11 +941,12 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 	struct ieee80211_radiotap_header *rthdr =
 		(struct ieee80211_radiotap_header *) skb->data;
 	struct ieee80211_supported_band *sband;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret = ieee80211_radiotap_iterator_init(&iterator, rthdr, skb->len);
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	skb->do_not_encrypt = 1;
+	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	tx->flags &= ~IEEE80211_TX_FRAGMENTED;
 
 	/*
@@ -985,7 +984,7 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
 				skb_trim(skb, skb->len - FCS_LEN);
 			}
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_WEP)
-				tx->skb->do_not_encrypt = 0;
+				info->flags &= ~IEEE80211_TX_INTFL_DONT_ENCRYPT;
 			if (*iterator.this_arg & IEEE80211_RADIOTAP_F_FRAG)
 				tx->flags |= IEEE80211_TX_FRAGMENTED;
 			break;
@@ -1018,13 +1017,12 @@ static bool __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx,
  * initialises @tx
  */
 static ieee80211_tx_result
-__ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
-		       struct sk_buff *skb,
-		       struct net_device *dev)
+ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
+		     struct ieee80211_tx_data *tx,
+		     struct sk_buff *skb)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_hdr *hdr;
-	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int hdrlen, tid;
 	u8 *qc, *state;
@@ -1032,9 +1030,9 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 
 	memset(tx, 0, sizeof(*tx));
 	tx->skb = skb;
-	tx->dev = dev; /* use original interface */
+	tx->dev = sdata->dev; /* use original interface */
 	tx->local = local;
-	tx->sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	tx->sdata = sdata;
 	tx->channel = local->hw.conf.channel;
 	/*
 	 * Set this flag (used below to indicate "automatic fragmentation"),
@@ -1043,7 +1041,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	tx->flags |= IEEE80211_TX_FRAGMENTED;
 
 	/* process and remove the injection radiotap header */
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) {
 		if (!__ieee80211_parse_tx_radiotap(tx, skb))
 			return TX_DROP;
@@ -1139,50 +1136,28 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
 	return TX_CONTINUE;
 }
 
-/*
- * NB: @tx is uninitialised when passed in here
- */
-static int ieee80211_tx_prepare(struct ieee80211_local *local,
-				struct ieee80211_tx_data *tx,
-				struct sk_buff *skb)
-{
-	struct net_device *dev;
-
-	dev = dev_get_by_index(&init_net, skb->iif);
-	if (unlikely(dev && !is_ieee80211_device(local, dev))) {
-		dev_put(dev);
-		dev = NULL;
-	}
-	if (unlikely(!dev))
-		return -ENODEV;
-	/*
-	 * initialises tx with control
-	 *
-	 * return value is safe to ignore here because this function
-	 * can only be invoked for multicast frames
-	 *
-	 * XXX: clean up
-	 */
-	__ieee80211_tx_prepare(tx, skb, dev);
-	dev_put(dev);
-	return 0;
-}
-
 static int __ieee80211_tx(struct ieee80211_local *local,
 			  struct sk_buff **skbp,
-			  struct sta_info *sta)
+			  struct sta_info *sta,
+			  bool txpending)
 {
 	struct sk_buff *skb = *skbp, *next;
 	struct ieee80211_tx_info *info;
+	unsigned long flags;
 	int ret, len;
 	bool fragm = false;
 
-	local->mdev->trans_start = jiffies;
-
 	while (skb) {
-		if (ieee80211_queue_stopped(&local->hw,
-					    skb_get_queue_mapping(skb)))
-			return IEEE80211_TX_PENDING;
+		int q = skb_get_queue_mapping(skb);
+
+		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+		ret = IEEE80211_TX_OK;
+		if (local->queue_stop_reasons[q] ||
+		    (!txpending && !skb_queue_empty(&local->pending[q])))
+			ret = IEEE80211_TX_PENDING;
+		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+		if (ret != IEEE80211_TX_OK)
+			return ret;
 
 		info = IEEE80211_SKB_CB(skb);
 
@@ -1254,10 +1229,10 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	return 0;
 }
 
-static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
-			 bool txpending)
+static void ieee80211_tx(struct ieee80211_sub_if_data *sdata,
+			 struct sk_buff *skb, bool txpending)
 {
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result res_prepare;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
@@ -1268,8 +1243,6 @@ static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 
 	queue = skb_get_queue_mapping(skb);
 
-	WARN_ON(!txpending && !skb_queue_empty(&local->pending[queue]));
-
 	if (unlikely(skb->len < 10)) {
 		dev_kfree_skb(skb);
 		return;
@@ -1278,7 +1251,7 @@ static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 	rcu_read_lock();
 
 	/* initialises tx */
-	res_prepare = __ieee80211_tx_prepare(&tx, skb, dev);
+	res_prepare = ieee80211_tx_prepare(sdata, &tx, skb);
 
 	if (unlikely(res_prepare == TX_DROP)) {
 		dev_kfree_skb(skb);
@@ -1297,7 +1270,7 @@ static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 
 	retries = 0;
  retry:
-	ret = __ieee80211_tx(local, &tx.skb, tx.sta);
+	ret = __ieee80211_tx(local, &tx.skb, tx.sta, txpending);
 	switch (ret) {
 	case IEEE80211_TX_OK:
 		break;
@@ -1315,34 +1288,35 @@ static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 
 		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 
-		if (__netif_subqueue_stopped(local->mdev, queue)) {
+		if (local->queue_stop_reasons[queue] ||
+		    !skb_queue_empty(&local->pending[queue])) {
+			/*
+			 * if queue is stopped, queue up frames for later
+			 * transmission from the tasklet
+			 */
 			do {
 				next = skb->next;
 				skb->next = NULL;
 				if (unlikely(txpending))
-					skb_queue_head(&local->pending[queue],
-						       skb);
+					__skb_queue_head(&local->pending[queue],
+							 skb);
 				else
-					skb_queue_tail(&local->pending[queue],
-						       skb);
+					__skb_queue_tail(&local->pending[queue],
+							 skb);
 			} while ((skb = next));
 
-			/*
-			 * Make sure nobody will enable the queue on us
-			 * (without going through the tasklet) nor disable the
-			 * netdev queue underneath the pending handling code.
-			 */
-			__set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING,
-				  &local->queue_stop_reasons[queue]);
-
 			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
 					       flags);
 		} else {
+			/*
+			 * otherwise retry, but this is a race condition or
+			 * a driver bug (which we warn about if it persists)
+			 */
 			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
 					       flags);
 
 			retries++;
-			if (WARN(retries > 10, "tx refused but queue active"))
+			if (WARN(retries > 10, "tx refused but queue active\n"))
 				goto drop;
 			goto retry;
 		}
@@ -1403,14 +1377,13 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
 	return 0;
 }
 
-int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
+			   struct sk_buff *skb)
 {
-	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
-	struct ieee80211_local *local = mpriv->local;
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
-	struct net_device *odev = NULL;
-	struct ieee80211_sub_if_data *osdata;
+	struct ieee80211_sub_if_data *tmp_sdata;
 	int headroom;
 	bool may_encrypt;
 	enum {
@@ -1419,20 +1392,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		UNKNOWN_ADDRESS,
 	} monitor_iface = NOT_MONITOR;
 
-	if (skb->iif)
-		odev = dev_get_by_index(&init_net, skb->iif);
-	if (unlikely(odev && !is_ieee80211_device(local, odev))) {
-		dev_put(odev);
-		odev = NULL;
-	}
-	if (unlikely(!odev)) {
-#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-		printk(KERN_DEBUG "%s: Discarded packet with nonexistent "
-		       "originating device\n", dev->name);
-#endif
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
+	dev_hold(sdata->dev);
 
 	if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
 	    local->hw.conf.dynamic_ps_timeout > 0 &&
@@ -1448,26 +1408,21 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		        msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
 	}
 
-	memset(info, 0, sizeof(*info));
-
 	info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
 
-	osdata = IEEE80211_DEV_TO_SUB_IF(odev);
-
-	if (ieee80211_vif_is_mesh(&osdata->vif) &&
+	if (ieee80211_vif_is_mesh(&sdata->vif) &&
 	    ieee80211_is_data(hdr->frame_control)) {
 		if (is_multicast_ether_addr(hdr->addr3))
 			memcpy(hdr->addr1, hdr->addr3, ETH_ALEN);
 		else
-			if (mesh_nexthop_lookup(skb, osdata)) {
-				dev_put(odev);
-				return NETDEV_TX_OK;
+			if (mesh_nexthop_lookup(skb, sdata)) {
+				dev_put(sdata->dev);
+				return;
 			}
-		if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
-			IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
-							    fwded_frames);
-	} else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) {
-		struct ieee80211_sub_if_data *sdata;
+		if (memcmp(sdata->dev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
+			IEEE80211_IFSTA_MESH_CTR_INC(&sdata->u.mesh,
+						     fwded_frames);
+	} else if (unlikely(sdata->vif.type == NL80211_IFTYPE_MONITOR)) {
 		int hdrlen;
 		u16 len_rthdr;
 
@@ -1491,19 +1446,17 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			 */
 
 			rcu_read_lock();
-			list_for_each_entry_rcu(sdata, &local->interfaces,
+			list_for_each_entry_rcu(tmp_sdata, &local->interfaces,
 						list) {
-				if (!netif_running(sdata->dev))
+				if (!netif_running(tmp_sdata->dev))
 					continue;
-				if (sdata->vif.type != NL80211_IFTYPE_AP)
+				if (tmp_sdata->vif.type != NL80211_IFTYPE_AP)
 					continue;
-				if (compare_ether_addr(sdata->dev->dev_addr,
+				if (compare_ether_addr(tmp_sdata->dev->dev_addr,
 						       hdr->addr2)) {
-					dev_hold(sdata->dev);
-					dev_put(odev);
-					osdata = sdata;
-					odev = osdata->dev;
-					skb->iif = sdata->dev->ifindex;
+					dev_hold(tmp_sdata->dev);
+					dev_put(sdata->dev);
+					sdata = tmp_sdata;
 					monitor_iface = FOUND_SDATA;
 					break;
 				}
@@ -1512,31 +1465,31 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	may_encrypt = !skb->do_not_encrypt;
+	may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT);
 
-	headroom = osdata->local->tx_headroom;
+	headroom = local->tx_headroom;
 	if (may_encrypt)
 		headroom += IEEE80211_ENCRYPT_HEADROOM;
 	headroom -= skb_headroom(skb);
 	headroom = max_t(int, 0, headroom);
 
-	if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) {
+	if (ieee80211_skb_resize(local, skb, headroom, may_encrypt)) {
 		dev_kfree_skb(skb);
-		dev_put(odev);
-		return NETDEV_TX_OK;
+		dev_put(sdata->dev);
+		return;
 	}
 
-	if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN)
-		osdata = container_of(osdata->bss,
-				      struct ieee80211_sub_if_data,
-				      u.ap);
+	tmp_sdata = sdata;
+	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+		tmp_sdata = container_of(sdata->bss,
+					 struct ieee80211_sub_if_data,
+					 u.ap);
 	if (likely(monitor_iface != UNKNOWN_ADDRESS))
-		info->control.vif = &osdata->vif;
-
-	ieee80211_tx(odev, skb, false);
-	dev_put(odev);
+		info->control.vif = &tmp_sdata->vif;
 
-	return NETDEV_TX_OK;
+	ieee80211_select_queue(local, skb);
+	ieee80211_tx(sdata, skb, false);
+	dev_put(sdata->dev);
 }
 
 int ieee80211_monitor_start_xmit(struct sk_buff *skb,
@@ -1546,6 +1499,7 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	struct ieee80211_channel *chan = local->hw.conf.channel;
 	struct ieee80211_radiotap_header *prthdr =
 		(struct ieee80211_radiotap_header *)skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	u16 len_rthdr;
 
 	/*
@@ -1583,15 +1537,9 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	if (unlikely(skb->len < len_rthdr))
 		goto fail; /* skb too short for claimed rt header extent */
 
-	skb->dev = local->mdev;
-
 	/* needed because we set skb device to master */
 	skb->iif = dev->ifindex;
 
-	/* sometimes we do encrypt injected frames, will be fixed
-	 * up in radiotap parser if not wanted */
-	skb->do_not_encrypt = 0;
-
 	/*
 	 * fix up the pointers accounting for the radiotap
 	 * header still being in there.  We are being given
@@ -1606,8 +1554,10 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
 	skb_set_network_header(skb, len_rthdr);
 	skb_set_transport_header(skb, len_rthdr);
 
-	/* pass the radiotap header up to the next stage intact */
-	dev_queue_xmit(skb);
+	memset(info, 0, sizeof(*info));
+
+	/* pass the radiotap header up to xmit */
+	ieee80211_xmit(IEEE80211_DEV_TO_SUB_IF(dev), skb);
 	return NETDEV_TX_OK;
 
 fail:
@@ -1635,6 +1585,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	int ret = NETDEV_TX_BUSY, head_need;
 	u16 ethertype, hdrlen,  meshhdrlen = 0;
 	__le16 fc;
@@ -1864,7 +1815,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 
 	skb->iif = dev->ifindex;
 
-	skb->dev = local->mdev;
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
@@ -1875,8 +1825,10 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
 	skb_set_network_header(skb, nh_pos);
 	skb_set_transport_header(skb, h_pos);
 
+	memset(info, 0, sizeof(*info));
+
 	dev->trans_start = jiffies;
-	dev_queue_xmit(skb);
+	ieee80211_xmit(sdata, skb);
 
 	return NETDEV_TX_OK;
 
@@ -1918,7 +1870,6 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 		return true;
 	}
 
-	/* validate info->control.vif against skb->iif */
 	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
 		sdata = container_of(sdata->bss,
@@ -1932,12 +1883,13 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 	}
 
 	if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
-		ieee80211_tx(dev, skb, true);
+		/* do not use sdata, it may have been changed above */
+		ieee80211_tx(IEEE80211_DEV_TO_SUB_IF(dev), skb, true);
 	} else {
 		hdr = (struct ieee80211_hdr *)skb->data;
 		sta = sta_info_get(local, hdr->addr1);
 
-		ret = __ieee80211_tx(local, &skb, sta);
+		ret = __ieee80211_tx(local, &skb, sta, true);
 		if (ret != IEEE80211_TX_OK)
 			result = false;
 	}
@@ -1949,59 +1901,43 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
 }
 
 /*
- * Transmit all pending packets. Called from tasklet, locks master device
- * TX lock so that no new packets can come in.
+ * Transmit all pending packets. Called from tasklet.
  */
 void ieee80211_tx_pending(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *)data;
-	struct net_device *dev = local->mdev;
 	unsigned long flags;
 	int i;
-	bool next;
+	bool txok;
 
 	rcu_read_lock();
-	netif_tx_lock_bh(dev);
 
+	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 	for (i = 0; i < local->hw.queues; i++) {
 		/*
 		 * If queue is stopped by something other than due to pending
 		 * frames, or we have no pending frames, proceed to next queue.
 		 */
-		spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
-		next = false;
-		if (local->queue_stop_reasons[i] !=
-			BIT(IEEE80211_QUEUE_STOP_REASON_PENDING) ||
+		if (local->queue_stop_reasons[i] ||
 		    skb_queue_empty(&local->pending[i]))
-			next = true;
-		spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-
-		if (next)
 			continue;
 
-		/*
-		 * start the queue now to allow processing our packets,
-		 * we're under the tx lock here anyway so nothing will
-		 * happen as a result of this
-		 */
-		netif_start_subqueue(local->mdev, i);
-
 		while (!skb_queue_empty(&local->pending[i])) {
-			struct sk_buff *skb = skb_dequeue(&local->pending[i]);
-
-			if (!ieee80211_tx_pending_skb(local, skb)) {
-				skb_queue_head(&local->pending[i], skb);
+			struct sk_buff *skb = __skb_dequeue(&local->pending[i]);
+			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
+						flags);
+
+			txok = ieee80211_tx_pending_skb(local, skb);
+			if (!txok)
+				__skb_queue_head(&local->pending[i], skb);
+			spin_lock_irqsave(&local->queue_stop_reason_lock,
+					  flags);
+			if (!txok)
 				break;
-			}
 		}
-
-		/* Start regular packet processing again. */
-		if (skb_queue_empty(&local->pending[i]))
-			ieee80211_wake_queue_by_reason(&local->hw, i,
-					IEEE80211_QUEUE_STOP_REASON_PENDING);
 	}
+	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
-	netif_tx_unlock_bh(dev);
 	rcu_read_unlock();
 }
 
@@ -2176,8 +2112,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 
 	info = IEEE80211_SKB_CB(skb);
 
-	skb->do_not_encrypt = 1;
-
+	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
 	info->band = band;
 	/*
 	 * XXX: For now, always use the lowest rate
@@ -2248,9 +2183,6 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	sdata = vif_to_sdata(vif);
 	bss = &sdata->u.ap;
 
-	if (!bss)
-		return NULL;
-
 	rcu_read_lock();
 	beacon = rcu_dereference(bss->beacon);
 
@@ -2276,7 +2208,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 				cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 		}
 
-		if (!ieee80211_tx_prepare(local, &tx, skb))
+		if (!ieee80211_tx_prepare(sdata, &tx, skb))
 			break;
 		dev_kfree_skb_any(skb);
 	}
@@ -2296,3 +2228,18 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw,
 	return skb;
 }
 EXPORT_SYMBOL(ieee80211_get_buffered_bc);
+
+void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
+		      int encrypt)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	skb_set_mac_header(skb, 0);
+	skb_set_network_header(skb, 0);
+	skb_set_transport_header(skb, 0);
+
+	skb->iif = sdata->dev->ifindex;
+	if (!encrypt)
+		info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+
+	ieee80211_xmit(sdata, skb);
+}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index dbf66b52d38c..7fc55846d601 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -275,16 +275,12 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 
 	__clear_bit(reason, &local->queue_stop_reasons[queue]);
 
-	if (!skb_queue_empty(&local->pending[queue]) &&
-	    local->queue_stop_reasons[queue] ==
-				BIT(IEEE80211_QUEUE_STOP_REASON_PENDING))
-		tasklet_schedule(&local->tx_pending_tasklet);
-
 	if (local->queue_stop_reasons[queue] != 0)
 		/* someone still has this queue stopped */
 		return;
 
-	netif_wake_subqueue(local->mdev, queue);
+	if (!skb_queue_empty(&local->pending[queue]))
+		tasklet_schedule(&local->tx_pending_tasklet);
 }
 
 void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -313,14 +309,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
 	if (WARN_ON(queue >= hw->queues))
 		return;
 
-	/*
-	 * Only stop if it was previously running, this is necessary
-	 * for correct pending packets handling because there we may
-	 * start (but not wake) the queue and rely on that.
-	 */
-	if (!local->queue_stop_reasons[queue])
-		netif_stop_subqueue(local->mdev, queue);
-
 	__set_bit(reason, &local->queue_stop_reasons[queue]);
 }
 
@@ -351,8 +339,7 @@ void ieee80211_add_pending_skb(struct ieee80211_local *local,
 
 	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
 	__ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
-	__ieee80211_stop_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_PENDING);
-	skb_queue_tail(&local->pending[queue], skb);
+	__skb_queue_tail(&local->pending[queue], skb);
 	__ieee80211_wake_queue(hw, queue, IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 }
@@ -373,16 +360,12 @@ int ieee80211_add_pending_skbs(struct ieee80211_local *local,
 	while ((skb = skb_dequeue(skbs))) {
 		ret++;
 		queue = skb_get_queue_mapping(skb);
-		skb_queue_tail(&local->pending[queue], skb);
+		__skb_queue_tail(&local->pending[queue], skb);
 	}
 
-	for (i = 0; i < hw->queues; i++) {
-		if (ret)
-			__ieee80211_stop_queue(hw, i,
-				IEEE80211_QUEUE_STOP_REASON_PENDING);
+	for (i = 0; i < hw->queues; i++)
 		__ieee80211_wake_queue(hw, i,
 			IEEE80211_QUEUE_STOP_REASON_SKB_ADD);
-	}
 	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
 
 	return ret;
@@ -413,11 +396,16 @@ EXPORT_SYMBOL(ieee80211_stop_queues);
 int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
+	unsigned long flags;
+	int ret;
 
 	if (WARN_ON(queue >= hw->queues))
 		return true;
 
-	return __netif_subqueue_stopped(local->mdev, queue);
+	spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
+	ret = !!local->queue_stop_reasons[queue];
+	spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
+	return ret;
 }
 EXPORT_SYMBOL(ieee80211_queue_stopped);
 
@@ -761,20 +749,6 @@ void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
 	ieee80211_set_wmm_default(sdata);
 }
 
-void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
-		      int encrypt)
-{
-	skb->dev = sdata->local->mdev;
-	skb_set_mac_header(skb, 0);
-	skb_set_network_header(skb, 0);
-	skb_set_transport_header(skb, 0);
-
-	skb->iif = sdata->dev->ifindex;
-	skb->do_not_encrypt = !encrypt;
-
-	dev_queue_xmit(skb);
-}
-
 u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
 			      enum ieee80211_band band)
 {
@@ -1049,9 +1023,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	/* reconfigure hardware */
 	ieee80211_hw_config(local, ~0);
 
-	netif_addr_lock_bh(local->mdev);
+	spin_lock_bh(&local->filter_lock);
 	ieee80211_configure_filter(local);
-	netif_addr_unlock_bh(local->mdev);
+	spin_unlock_bh(&local->filter_lock);
 
 	/* Finally also reconfigure all the BSS information */
 	list_for_each_entry(sdata, &local->interfaces, list) {
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index 116a923b14d6..b19b7696f3a2 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -85,10 +85,8 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb)
 	return ieee802_1d_to_ac[skb->priority];
 }
 
-u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
+void ieee80211_select_queue(struct ieee80211_local *local, struct sk_buff *skb)
 {
-	struct ieee80211_master_priv *mpriv = netdev_priv(dev);
-	struct ieee80211_local *local = mpriv->local;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 	u16 queue;
 	u8 tid;
@@ -113,5 +111,5 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
 		*p = 0;
 	}
 
-	return queue;
+	skb_set_queue_mapping(skb, queue);
 }
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index 7520d2e014dc..d4fd87ca5118 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -20,6 +20,7 @@
 
 extern const int ieee802_1d_to_ac[8];
 
-u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb);
+void ieee80211_select_queue(struct ieee80211_local *local,
+			    struct sk_buff *skb);
 
 #endif /* _WME_H */
-- 
cgit v1.2.3


From 72bce62775db0315511474e8d8f8e25d25b48366 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 17 Jun 2009 17:45:28 +0200
Subject: net: remove unused skb->do_not_encrypt

mac80211 required this due to the master netdev, but now
it can put all information into skb->cb and this can go.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/skbuff.h | 6 +-----
 net/core/skbuff.c      | 3 ---
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f2c69a2cca17..df7b23ac66e6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -304,7 +304,6 @@ typedef unsigned char *sk_buff_data_t;
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
  *	@ndisc_nodetype: router type (from link layer)
- *	@do_not_encrypt: set to prevent encryption of this frame
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
@@ -379,13 +378,10 @@ struct sk_buff {
 	kmemcheck_bitfield_begin(flags2);
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
-#endif
-#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
-	__u8			do_not_encrypt:1;
 #endif
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/13/14 bit hole */
+	/* 0/14 bit hole */
 
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9e0597d189b0..80a96166df39 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -559,9 +559,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #endif
 #endif
 	new->vlan_tci		= old->vlan_tci;
-#if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
-	new->do_not_encrypt	= old->do_not_encrypt;
-#endif
 
 	skb_copy_secmark(new, old);
 }
-- 
cgit v1.2.3


From 8150f32b90f630ad3e460f026ce338cb81685bc9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 24 Jul 2009 22:11:32 -0700
Subject: Driver Core: Make PM operations a const pointer

They are not supposed to be modified by drivers, so make them const.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 drivers/base/platform.c   |  2 +-
 drivers/base/power/main.c |  8 +++++---
 drivers/pci/pci-driver.c  | 16 ++++++++--------
 include/linux/device.h    |  9 +++++----
 4 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 81cb01bfc356..c4b3fcee17b2 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -925,7 +925,7 @@ static int platform_pm_restore_noirq(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-static struct dev_pm_ops platform_dev_pm_ops = {
+static const struct dev_pm_ops platform_dev_pm_ops = {
 	.prepare = platform_pm_prepare,
 	.complete = platform_pm_complete,
 	.suspend = platform_pm_suspend,
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 58a3e572f2c9..1b1a786b7dec 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -157,8 +157,9 @@ void device_pm_move_last(struct device *dev)
  *	@ops:	PM operations to choose from.
  *	@state:	PM transition of the system being carried out.
  */
-static int pm_op(struct device *dev, struct dev_pm_ops *ops,
-			pm_message_t state)
+static int pm_op(struct device *dev,
+		 const struct dev_pm_ops *ops,
+		 pm_message_t state)
 {
 	int error = 0;
 
@@ -220,7 +221,8 @@ static int pm_op(struct device *dev, struct dev_pm_ops *ops,
  *	The operation is executed with interrupts disabled by the only remaining
  *	functional CPU in the system.
  */
-static int pm_noirq_op(struct device *dev, struct dev_pm_ops *ops,
+static int pm_noirq_op(struct device *dev,
+			const struct dev_pm_ops *ops,
 			pm_message_t state)
 {
 	int error = 0;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index d76c4c85367e..0c2ea44ae5e1 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -575,7 +575,7 @@ static void pci_pm_complete(struct device *dev)
 static int pci_pm_suspend(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_SUSPEND);
@@ -613,7 +613,7 @@ static int pci_pm_suspend(struct device *dev)
 static int pci_pm_suspend_noirq(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
@@ -672,7 +672,7 @@ static int pci_pm_resume_noirq(struct device *dev)
 static int pci_pm_resume(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	/*
@@ -711,7 +711,7 @@ static int pci_pm_resume(struct device *dev)
 static int pci_pm_freeze(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_FREEZE);
@@ -780,7 +780,7 @@ static int pci_pm_thaw_noirq(struct device *dev)
 static int pci_pm_thaw(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	if (pci_has_legacy_pm_support(pci_dev))
@@ -799,7 +799,7 @@ static int pci_pm_thaw(struct device *dev)
 static int pci_pm_poweroff(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
 		return pci_legacy_suspend(dev, PMSG_HIBERNATE);
@@ -872,7 +872,7 @@ static int pci_pm_restore_noirq(struct device *dev)
 static int pci_pm_restore(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int error = 0;
 
 	/*
@@ -910,7 +910,7 @@ static int pci_pm_restore(struct device *dev)
 
 #endif /* !CONFIG_HIBERNATION */
 
-struct dev_pm_ops pci_dev_pm_ops = {
+const struct dev_pm_ops pci_dev_pm_ops = {
 	.prepare = pci_pm_prepare,
 	.complete = pci_pm_complete,
 	.suspend = pci_pm_suspend,
diff --git a/include/linux/device.h b/include/linux/device.h
index aebb81036db2..a28642975053 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -62,7 +62,7 @@ struct bus_type {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 
 	struct bus_type_private *p;
 };
@@ -132,7 +132,7 @@ struct device_driver {
 	int (*resume) (struct device *dev);
 	struct attribute_group **groups;
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 
 	struct driver_private *p;
 };
@@ -200,7 +200,8 @@ struct class {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
+
 	struct class_private *p;
 };
 
@@ -291,7 +292,7 @@ struct device_type {
 	char *(*nodename)(struct device *dev);
 	void (*release)(struct device *dev);
 
-	struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm;
 };
 
 /* interface for exporting device attributes */
-- 
cgit v1.2.3


From cb3824bade2549d7ad059d5802da43312540fdee Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 8 Jul 2009 14:21:12 +0200
Subject: ISDN: Make isdnhdlc usable for other ISDN drivers

isdnhdlc is useful for other ISDN drivers as well.
Move the include file to a central location and the source
to the central isdn location.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 drivers/isdn/Kconfig          |   6 +-
 drivers/isdn/hisax/Kconfig    |   6 +-
 drivers/isdn/hisax/Makefile   |   4 -
 drivers/isdn/hisax/isdnhdlc.c | 603 ------------------------------------------
 drivers/isdn/hisax/isdnhdlc.h |  70 -----
 drivers/isdn/hisax/st5481.h   |   2 +-
 drivers/isdn/i4l/Kconfig      |  11 +
 drivers/isdn/i4l/Makefile     |   1 +
 drivers/isdn/i4l/isdnhdlc.c   | 603 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/isdn/hdlc.h     |  70 +++++
 10 files changed, 689 insertions(+), 687 deletions(-)
 delete mode 100644 drivers/isdn/hisax/isdnhdlc.c
 delete mode 100644 drivers/isdn/hisax/isdnhdlc.h
 create mode 100644 drivers/isdn/i4l/isdnhdlc.c
 create mode 100644 include/linux/isdn/hdlc.h

(limited to 'include')

diff --git a/drivers/isdn/Kconfig b/drivers/isdn/Kconfig
index 02bdca6f95c3..022a19452953 100644
--- a/drivers/isdn/Kconfig
+++ b/drivers/isdn/Kconfig
@@ -21,8 +21,6 @@ menuconfig ISDN
 
 if ISDN
 
-source "drivers/isdn/mISDN/Kconfig"
-
 menuconfig ISDN_I4L
 	tristate "Old ISDN4Linux (deprecated)"
 	---help---
@@ -41,9 +39,9 @@ menuconfig ISDN_I4L
 	  It is still available, though, for use with adapters that are not
 	  supported by the new CAPI subsystem yet.
 
-if ISDN_I4L
+source "drivers/isdn/mISDN/Kconfig"
+
 source "drivers/isdn/i4l/Kconfig"
-endif
 
 menuconfig ISDN_CAPI
 	tristate "CAPI 2.0 subsystem"
diff --git a/drivers/isdn/hisax/Kconfig b/drivers/isdn/hisax/Kconfig
index 7832d8ba8e44..3464ebc4cdbc 100644
--- a/drivers/isdn/hisax/Kconfig
+++ b/drivers/isdn/hisax/Kconfig
@@ -391,6 +391,7 @@ comment "HiSax sub driver modules"
 config HISAX_ST5481
 	tristate "ST5481 USB ISDN modem (EXPERIMENTAL)"
 	depends on USB && EXPERIMENTAL
+	select ISDN_HDLC
 	select CRC_CCITT
 	select BITREVERSE
 	help
@@ -418,11 +419,6 @@ config HISAX_FRITZ_PCIPNP
 	  (the latter also needs you to select "ISA Plug and Play support"
 	  from the menu "Plug and Play configuration")
 
-config HISAX_HDLC
-	bool
-	depends on HISAX_ST5481
-	default y
-
 config HISAX_AVM_A1_PCMCIA
 	bool
 	depends on HISAX_AVM_A1_CS
diff --git a/drivers/isdn/hisax/Makefile b/drivers/isdn/hisax/Makefile
index c7a3794bdae4..ab638b083df9 100644
--- a/drivers/isdn/hisax/Makefile
+++ b/drivers/isdn/hisax/Makefile
@@ -16,10 +16,6 @@ obj-$(CONFIG_HISAX_HFCUSB)		+= hfc_usb.o
 obj-$(CONFIG_HISAX_HFC4S8S)		+= hfc4s8s_l1.o
 obj-$(CONFIG_HISAX_FRITZ_PCIPNP)        += hisax_isac.o hisax_fcpcipnp.o
 
-ifdef CONFIG_HISAX_HDLC
-obj-$(CONFIG_ISDN_DRV_HISAX)		+= isdnhdlc.o
-endif
-
 # Multipart objects.
 
 hisax_st5481-y 				:= st5481_init.o st5481_usb.o st5481_d.o \
diff --git a/drivers/isdn/hisax/isdnhdlc.c b/drivers/isdn/hisax/isdnhdlc.c
deleted file mode 100644
index c69a77a80062..000000000000
--- a/drivers/isdn/hisax/isdnhdlc.c
+++ /dev/null
@@ -1,603 +0,0 @@
-/*
- * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
- *
- *Copyright (C) 2002	Wolfgang Mües      <wolfgang@iksw-muees.de>
- *		2001 	Frode Isaksen      <fisaksen@bewan.com>
- *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
- *
- *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License as published by
- *      the Free Software Foundation; either version 2 of the License, or
- *      (at your option) any later version.
- *
- *      This program is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *      GNU General Public License for more details.
- *
- *      You should have received a copy of the GNU General Public License
- *      along with this program; if not, write to the Free Software
- *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/crc-ccitt.h>
-#include "isdnhdlc.h"
-
-/*-------------------------------------------------------------------*/
-
-MODULE_AUTHOR("Wolfgang Mües <wolfgang@iksw-muees.de>, "
-	      "Frode Isaksen <fisaksen@bewan.com>, "
-	      "Kai Germaschewski <kai.germaschewski@gmx.de>");
-MODULE_DESCRIPTION("General purpose ISDN HDLC decoder");
-MODULE_LICENSE("GPL");
-
-/*-------------------------------------------------------------------*/
-
-enum {
-	HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7,
-	HDLC_GET_DATA,HDLC_FAST_FLAG
-};
-
-enum {
-	HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG,
-	HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG,
-	HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0,
-	HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED
-};
-
-void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56)
-{
-   	hdlc->bit_shift = 0;
-	hdlc->hdlc_bits1 = 0;
-	hdlc->data_bits = 0;
-	hdlc->ffbit_shift = 0;
-	hdlc->data_received = 0;
-	hdlc->state = HDLC_GET_DATA;
-	hdlc->do_adapt56 = do_adapt56;
-	hdlc->dchannel = 0;
-	hdlc->crc = 0;
-	hdlc->cbin = 0;
-	hdlc->shift_reg = 0;
-	hdlc->ffvalue = 0;
-	hdlc->dstpos = 0;
-}
-
-void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56)
-{
-   	hdlc->bit_shift = 0;
-	hdlc->hdlc_bits1 = 0;
-	hdlc->data_bits = 0;
-	hdlc->ffbit_shift = 0;
-	hdlc->data_received = 0;
-	hdlc->do_closing = 0;
-	hdlc->ffvalue = 0;
-	if (is_d_channel) {
-		hdlc->dchannel = 1;
-		hdlc->state = HDLC_SEND_FIRST_FLAG;
-	} else {
-		hdlc->dchannel = 0;
-		hdlc->state = HDLC_SEND_FAST_FLAG;
-		hdlc->ffvalue = 0x7e;
-	}
-	hdlc->cbin = 0x7e;
-	hdlc->bit_shift = 0;
-	if(do_adapt56){
-		hdlc->do_adapt56 = 1;
-		hdlc->data_bits = 0;
-		hdlc->state = HDLC_SENDFLAG_B0;
-	} else {
-		hdlc->do_adapt56 = 0;
-		hdlc->data_bits = 8;
-	}
-	hdlc->shift_reg = 0;
-}
-
-/*
-  isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
-
-  The source buffer is scanned for valid HDLC frames looking for
-  flags (01111110) to indicate the start of a frame. If the start of
-  the frame is found, the bit stuffing is removed (0 after 5 1's).
-  When a new flag is found, the complete frame has been received
-  and the CRC is checked.
-  If a valid frame is found, the function returns the frame length
-  excluding the CRC with the bit HDLC_END_OF_FRAME set.
-  If the beginning of a valid frame is found, the function returns
-  the length.
-  If a framing error is found (too many 1s and not a flag) the function
-  returns the length with the bit HDLC_FRAMING_ERROR set.
-  If a CRC error is found the function returns the length with the
-  bit HDLC_CRC_ERROR set.
-  If the frame length exceeds the destination buffer size, the function
-  returns the length with the bit HDLC_LENGTH_ERROR set.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (decoded) from the source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of decoded bytes in the destination buffer and status
-  flag.
- */
-int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
-		     int slen, int *count, unsigned char *dst, int dsize)
-{
-	int status=0;
-
-	static const unsigned char fast_flag[]={
-		0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f
-	};
-
-	static const unsigned char fast_flag_value[]={
-		0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f
-	};
-
-	static const unsigned char fast_abort[]={
-		0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff
-	};
-
-	*count = slen;
-
-	while(slen > 0){
-		if(hdlc->bit_shift==0){
-			hdlc->cbin = *src++;
-			slen--;
-			hdlc->bit_shift = 8;
-			if(hdlc->do_adapt56){
-				hdlc->bit_shift --;
-			}
-		}
-
-		switch(hdlc->state){
-		case STOPPED:
-			return 0;
-		case HDLC_FAST_IDLE:
-			if(hdlc->cbin == 0xff){
-				hdlc->bit_shift = 0;
-				break;
-			}
-			hdlc->state = HDLC_GET_FLAG_B0;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->bit_shift = 8;
-			break;
-		case HDLC_GET_FLAG_B0:
-			if(!(hdlc->cbin & 0x80)) {
-				hdlc->state = HDLC_GETFLAG_B1A6;
-				hdlc->hdlc_bits1 = 0;
-			} else {
-				if(!hdlc->do_adapt56){
-					if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1)
-						hdlc->state = HDLC_FAST_IDLE;
-				}
-			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
-			break;
-		case HDLC_GETFLAG_B1A6:
-			if(hdlc->cbin & 0x80){
-				hdlc->hdlc_bits1++;
-				if(hdlc->hdlc_bits1==6){
-					hdlc->state = HDLC_GETFLAG_B7;
-				}
-			} else {
-				hdlc->hdlc_bits1 = 0;
-			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
-			break;
-		case HDLC_GETFLAG_B7:
-			if(hdlc->cbin & 0x80) {
-				hdlc->state = HDLC_GET_FLAG_B0;
-			} else {
-				hdlc->state = HDLC_GET_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->shift_reg = 0;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_bits = 0;
-				hdlc->data_received = 0;
-			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
-			break;
-		case HDLC_GET_DATA:
-			if(hdlc->cbin & 0x80){
-				hdlc->hdlc_bits1++;
-				switch(hdlc->hdlc_bits1){
-				case 6:
-					break;
-				case 7:
-					if(hdlc->data_received) {
-						// bad frame
-						status = -HDLC_FRAMING_ERROR;
-					}
-					if(!hdlc->do_adapt56){
-						if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){
-							hdlc->state = HDLC_FAST_IDLE;
-							hdlc->bit_shift=1;
-							break;
-						}
-					} else {
-						hdlc->state = HDLC_GET_FLAG_B0;
-					}
-					break;
-				default:
-					hdlc->shift_reg>>=1;
-					hdlc->shift_reg |= 0x80;
-					hdlc->data_bits++;
-					break;
-				}
-			} else {
-				switch(hdlc->hdlc_bits1){
-				case 5:
-					break;
-				case 6:
-					if(hdlc->data_received){
-						if (hdlc->dstpos < 2) {
-							status = -HDLC_FRAMING_ERROR;
-						} else if (hdlc->crc != 0xf0b8){
-							// crc error
-							status = -HDLC_CRC_ERROR;
-						} else {
-							// remove CRC
-							hdlc->dstpos -= 2;
-							// good frame
-							status = hdlc->dstpos;
-						}
-					}
-					hdlc->crc = 0xffff;
-					hdlc->shift_reg = 0;
-					hdlc->data_bits = 0;
-					if(!hdlc->do_adapt56){
-						if(hdlc->cbin==fast_flag[hdlc->bit_shift]){
-							hdlc->ffvalue = fast_flag_value[hdlc->bit_shift];
-							hdlc->state = HDLC_FAST_FLAG;
-							hdlc->ffbit_shift = hdlc->bit_shift;
-							hdlc->bit_shift = 1;
-						} else {
-							hdlc->state = HDLC_GET_DATA;
-							hdlc->data_received = 0;
-						}
-					} else {
-						hdlc->state = HDLC_GET_DATA;
-						hdlc->data_received = 0;
-					}
-					break;
-				default:
-					hdlc->shift_reg>>=1;
-					hdlc->data_bits++;
-					break;
-				}
-				hdlc->hdlc_bits1 = 0;
-			}
-			if (status) {
-				hdlc->dstpos = 0;
-				*count -= slen;
-				hdlc->cbin <<= 1;
-				hdlc->bit_shift--;
-				return status;
-			}
-			if(hdlc->data_bits==8){
-				hdlc->data_bits = 0;
-				hdlc->data_received = 1;
-				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
-
-				// good byte received
-				if (hdlc->dstpos < dsize) {
-					dst[hdlc->dstpos++] = hdlc->shift_reg;
-				} else {
-					// frame too long
-					status = -HDLC_LENGTH_ERROR;
-					hdlc->dstpos = 0;
-				}
-			}
-			hdlc->cbin <<= 1;
-			hdlc->bit_shift--;
-			break;
-		case HDLC_FAST_FLAG:
-			if(hdlc->cbin==hdlc->ffvalue){
-				hdlc->bit_shift = 0;
-				break;
-			} else {
-				if(hdlc->cbin == 0xff){
-					hdlc->state = HDLC_FAST_IDLE;
-					hdlc->bit_shift=0;
-				} else if(hdlc->ffbit_shift==8){
-					hdlc->state = HDLC_GETFLAG_B7;
-					break;
-				} else {
-					hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1];
-					hdlc->hdlc_bits1 = hdlc->ffbit_shift-2;
-					if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0;
-					hdlc->data_bits = hdlc->ffbit_shift-1;
-					hdlc->state = HDLC_GET_DATA;
-					hdlc->data_received = 0;
-				}
-			}
-			break;
-		default:
-			break;
-		}
-	}
-	*count -= slen;
-	return 0;
-}
-
-/*
-  isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
-
-  The bit stream starts with a beginning flag (01111110). After
-  that each byte is added to the bit stream with bit stuffing added
-  (0 after 5 1's).
-  When the last byte has been removed from the source buffer, the
-  CRC (2 bytes is added) and the frame terminates with the ending flag.
-  For the dchannel, the idle character (all 1's) is also added at the end.
-  If this function is called with empty source buffer (slen=0), flags or
-  idle character will be generated.
-
-  src - source buffer
-  slen - source buffer length
-  count - number of bytes removed (encoded) from source buffer
-  dst _ destination buffer
-  dsize - destination buffer size
-  returns - number of encoded bytes in the destination buffer
-*/
-int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
-		unsigned short slen, int *count,
-		unsigned char *dst, int dsize)
-{
-	static const unsigned char xfast_flag_value[] = {
-		0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e
-	};
-
-	int len = 0;
-
-	*count = slen;
-
-	while (dsize > 0) {
-		if(hdlc->bit_shift==0){
-			if(slen && !hdlc->do_closing){
-				hdlc->shift_reg = *src++;
-				slen--;
-				if (slen == 0)
-					hdlc->do_closing = 1;  /* closing sequence, CRC + flag(s) */
-				hdlc->bit_shift = 8;
-			} else {
-				if(hdlc->state == HDLC_SEND_DATA){
-					if(hdlc->data_received){
-						hdlc->state = HDLC_SEND_CRC1;
-						hdlc->crc ^= 0xffff;
-						hdlc->bit_shift = 8;
-						hdlc->shift_reg = hdlc->crc & 0xff;
-					} else if(!hdlc->do_adapt56){
-						hdlc->state = HDLC_SEND_FAST_FLAG;
-					} else {
-						hdlc->state = HDLC_SENDFLAG_B0;
-					}
-				}
-
-			}
-		}
-
-		switch(hdlc->state){
-		case STOPPED:
-			while (dsize--)
-				*dst++ = 0xff;
-
-			return dsize;
-		case HDLC_SEND_FAST_FLAG:
-			hdlc->do_closing = 0;
-			if(slen == 0){
-				*dst++ = hdlc->ffvalue;
-				len++;
-				dsize--;
-				break;
-			}
-			if(hdlc->bit_shift==8){
-				hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits);
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SENDFLAG_B0:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->hdlc_bits1 = 0;
-			hdlc->state = HDLC_SENDFLAG_B1A6;
-			break;
-		case HDLC_SENDFLAG_B1A6:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			hdlc->cbin++;
-			if(++hdlc->hdlc_bits1 == 6)
-				hdlc->state = HDLC_SENDFLAG_B7;
-			break;
-		case HDLC_SENDFLAG_B7:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(slen == 0){
-				hdlc->state = HDLC_SENDFLAG_B0;
-				break;
-			}
-			if(hdlc->bit_shift==8){
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				hdlc->data_received = 1;
-			}
-			break;
-		case HDLC_SEND_FIRST_FLAG:
-			hdlc->data_received = 1;
-			if(hdlc->data_bits==8){
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(hdlc->shift_reg & 0x01)
-				hdlc->cbin++;
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
-				hdlc->state = HDLC_SEND_DATA;
-				hdlc->crc = 0xffff;
-				hdlc->hdlc_bits1 = 0;
-			}
-			break;
-		case HDLC_SEND_DATA:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if(hdlc->bit_shift==8){
-				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
-			}
-			if(hdlc->shift_reg & 0x01){
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			break;
-		case HDLC_SEND_CRC1:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if(hdlc->shift_reg & 0x01){
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if(hdlc->bit_shift==0){
-				hdlc->shift_reg = (hdlc->crc >> 8);
-				hdlc->state = HDLC_SEND_CRC2;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CRC2:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if(hdlc->shift_reg & 0x01){
-				hdlc->hdlc_bits1++;
-				hdlc->cbin++;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			} else {
-				hdlc->hdlc_bits1 = 0;
-				hdlc->shift_reg >>= 1;
-				hdlc->bit_shift--;
-			}
-			if(hdlc->bit_shift==0){
-				hdlc->shift_reg = 0x7e;
-				hdlc->state = HDLC_SEND_CLOSING_FLAG;
-				hdlc->bit_shift = 8;
-			}
-			break;
-		case HDLC_SEND_CLOSING_FLAG:
-			hdlc->cbin <<= 1;
-			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
-				hdlc->hdlc_bits1 = 0;
-				break;
-			}
-			if(hdlc->shift_reg & 0x01){
-				hdlc->cbin++;
-			}
-			hdlc->shift_reg >>= 1;
-			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
-				hdlc->ffvalue = xfast_flag_value[hdlc->data_bits];
-				if(hdlc->dchannel){
-					hdlc->ffvalue = 0x7e;
-					hdlc->state = HDLC_SEND_IDLE1;
-					hdlc->bit_shift = 8-hdlc->data_bits;
-					if(hdlc->bit_shift==0)
-						hdlc->state = HDLC_SEND_FAST_IDLE;
-				} else {
-					if(!hdlc->do_adapt56){
-						hdlc->state = HDLC_SEND_FAST_FLAG;
-						hdlc->data_received = 0;
-					} else {
-						hdlc->state = HDLC_SENDFLAG_B0;
-						hdlc->data_received = 0;
-					}
-					// Finished with this frame, send flags
-					if (dsize > 1) dsize = 1;
-				}
-			}
-			break;
-		case HDLC_SEND_IDLE1:
-			hdlc->do_closing = 0;
-			hdlc->cbin <<= 1;
-			hdlc->cbin++;
-			hdlc->data_bits++;
-			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
-				hdlc->state = HDLC_SEND_FAST_IDLE;
-				hdlc->bit_shift = 0;
-			}
-			break;
-		case HDLC_SEND_FAST_IDLE:
-			hdlc->do_closing = 0;
-			hdlc->cbin = 0xff;
-			hdlc->data_bits = 8;
-			if(hdlc->bit_shift == 8){
-				hdlc->cbin = 0x7e;
-				hdlc->state = HDLC_SEND_FIRST_FLAG;
-			} else {
-				*dst++ = hdlc->cbin;
-				hdlc->bit_shift = hdlc->data_bits = 0;
-				len++;
-				dsize = 0;
-			}
-			break;
-		default:
-			break;
-		}
-		if(hdlc->do_adapt56){
-			if(hdlc->data_bits==7){
-				hdlc->cbin <<= 1;
-				hdlc->cbin++;
-				hdlc->data_bits++;
-			}
-		}
-		if(hdlc->data_bits==8){
-			*dst++ = hdlc->cbin;
-			hdlc->data_bits = 0;
-			len++;
-			dsize--;
-		}
-	}
-	*count -= slen;
-
-	return len;
-}
-
-EXPORT_SYMBOL(isdnhdlc_rcv_init);
-EXPORT_SYMBOL(isdnhdlc_decode);
-EXPORT_SYMBOL(isdnhdlc_out_init);
-EXPORT_SYMBOL(isdnhdlc_encode);
diff --git a/drivers/isdn/hisax/isdnhdlc.h b/drivers/isdn/hisax/isdnhdlc.h
deleted file mode 100644
index cf0a95a24015..000000000000
--- a/drivers/isdn/hisax/isdnhdlc.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * isdnhdlc.h  --  General purpose ISDN HDLC decoder.
- *
- * Implementation of a HDLC decoder/encoder in software.
- * Neccessary because some ISDN devices don't have HDLC
- * controllers. Also included: a bit reversal table.
- *
- *Copyright (C) 2002    Wolfgang Mües      <wolfgang@iksw-muees.de>
- *		2001 	Frode Isaksen      <fisaksen@bewan.com>
- *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
- *
- *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License as published by
- *      the Free Software Foundation; either version 2 of the License, or
- *      (at your option) any later version.
- *
- *      This program is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *      GNU General Public License for more details.
- *
- *      You should have received a copy of the GNU General Public License
- *      along with this program; if not, write to the Free Software
- *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ISDNHDLC_H__
-#define __ISDNHDLC_H__
-
-struct isdnhdlc_vars {
-	int bit_shift;
-	int hdlc_bits1;
-	int data_bits;
-	int ffbit_shift; 	// encoding only
-	int state;
-	int dstpos;
-
-	unsigned short crc;
-
-	unsigned char cbin;
-	unsigned char shift_reg;
-	unsigned char ffvalue;
-
-	unsigned int data_received:1; 	// set if transferring data
-	unsigned int dchannel:1; 	// set if D channel (send idle instead of flags)
-	unsigned int do_adapt56:1; 	// set if 56K adaptation
-	unsigned int do_closing:1; 	// set if in closing phase (need to send CRC + flag
-};
-
-
-/*
-  The return value from isdnhdlc_decode is
-  the frame length, 0 if no complete frame was decoded,
-  or a negative error number
-*/
-#define HDLC_FRAMING_ERROR     1
-#define HDLC_CRC_ERROR         2
-#define HDLC_LENGTH_ERROR      3
-
-extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56);
-
-extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count,
-	                    unsigned char *dst, int dsize);
-
-extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56);
-
-extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count,
-	                    unsigned char *dst,int dsize);
-
-#endif /* __ISDNHDLC_H__ */
diff --git a/drivers/isdn/hisax/st5481.h b/drivers/isdn/hisax/st5481.h
index cff7a6354334..64f78a8c28c5 100644
--- a/drivers/isdn/hisax/st5481.h
+++ b/drivers/isdn/hisax/st5481.h
@@ -226,7 +226,7 @@ printk(KERN_WARNING "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 #define INFO(format, arg...) \
 printk(KERN_INFO "%s:%s: " format "\n" , __FILE__,  __func__ , ## arg)
 
-#include "isdnhdlc.h"
+#include <linux/isdn/hdlc.h>
 #include "fsm.h"
 #include "hisax_if.h"
 #include <linux/skbuff.h>
diff --git a/drivers/isdn/i4l/Kconfig b/drivers/isdn/i4l/Kconfig
index ed3510f273d8..dd744ffd240b 100644
--- a/drivers/isdn/i4l/Kconfig
+++ b/drivers/isdn/i4l/Kconfig
@@ -2,6 +2,8 @@
 # Old ISDN4Linux config
 #
 
+if ISDN_I4L
+
 config ISDN_PPP
 	bool "Support synchronous PPP"
 	depends on INET
@@ -135,3 +137,12 @@ source "drivers/isdn/act2000/Kconfig"
 source "drivers/isdn/hysdn/Kconfig"
 
 endmenu
+# end ISDN_I4L
+endif
+
+config ISDN_HDLC
+	tristate 
+	depends on HISAX_ST5481
+	select CRC_CCITT
+	select BITREVERSE
+
diff --git a/drivers/isdn/i4l/Makefile b/drivers/isdn/i4l/Makefile
index 49a06c0005dd..cb9d3bb9fae0 100644
--- a/drivers/isdn/i4l/Makefile
+++ b/drivers/isdn/i4l/Makefile
@@ -4,6 +4,7 @@
 
 obj-$(CONFIG_ISDN_I4L)		+= isdn.o
 obj-$(CONFIG_ISDN_PPP_BSDCOMP)	+= isdn_bsdcomp.o
+obj-$(CONFIG_ISDN_HDLC)		+= isdnhdlc.o
 
 # Multipart objects.
 
diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c
new file mode 100644
index 000000000000..44ec7418496b
--- /dev/null
+++ b/drivers/isdn/i4l/isdnhdlc.c
@@ -0,0 +1,603 @@
+/*
+ * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
+ *
+ *Copyright (C) 2002	Wolfgang Mües      <wolfgang@iksw-muees.de>
+ *		2001 	Frode Isaksen      <fisaksen@bewan.com>
+ *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/crc-ccitt.h>
+#include <linux/isdn/hdlc.h>
+
+/*-------------------------------------------------------------------*/
+
+MODULE_AUTHOR("Wolfgang Mües <wolfgang@iksw-muees.de>, "
+	      "Frode Isaksen <fisaksen@bewan.com>, "
+	      "Kai Germaschewski <kai.germaschewski@gmx.de>");
+MODULE_DESCRIPTION("General purpose ISDN HDLC decoder");
+MODULE_LICENSE("GPL");
+
+/*-------------------------------------------------------------------*/
+
+enum {
+	HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7,
+	HDLC_GET_DATA,HDLC_FAST_FLAG
+};
+
+enum {
+	HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG,
+	HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG,
+	HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0,
+	HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED
+};
+
+void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56)
+{
+   	hdlc->bit_shift = 0;
+	hdlc->hdlc_bits1 = 0;
+	hdlc->data_bits = 0;
+	hdlc->ffbit_shift = 0;
+	hdlc->data_received = 0;
+	hdlc->state = HDLC_GET_DATA;
+	hdlc->do_adapt56 = do_adapt56;
+	hdlc->dchannel = 0;
+	hdlc->crc = 0;
+	hdlc->cbin = 0;
+	hdlc->shift_reg = 0;
+	hdlc->ffvalue = 0;
+	hdlc->dstpos = 0;
+}
+
+void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56)
+{
+   	hdlc->bit_shift = 0;
+	hdlc->hdlc_bits1 = 0;
+	hdlc->data_bits = 0;
+	hdlc->ffbit_shift = 0;
+	hdlc->data_received = 0;
+	hdlc->do_closing = 0;
+	hdlc->ffvalue = 0;
+	if (is_d_channel) {
+		hdlc->dchannel = 1;
+		hdlc->state = HDLC_SEND_FIRST_FLAG;
+	} else {
+		hdlc->dchannel = 0;
+		hdlc->state = HDLC_SEND_FAST_FLAG;
+		hdlc->ffvalue = 0x7e;
+	}
+	hdlc->cbin = 0x7e;
+	hdlc->bit_shift = 0;
+	if(do_adapt56){
+		hdlc->do_adapt56 = 1;
+		hdlc->data_bits = 0;
+		hdlc->state = HDLC_SENDFLAG_B0;
+	} else {
+		hdlc->do_adapt56 = 0;
+		hdlc->data_bits = 8;
+	}
+	hdlc->shift_reg = 0;
+}
+
+/*
+  isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
+
+  The source buffer is scanned for valid HDLC frames looking for
+  flags (01111110) to indicate the start of a frame. If the start of
+  the frame is found, the bit stuffing is removed (0 after 5 1's).
+  When a new flag is found, the complete frame has been received
+  and the CRC is checked.
+  If a valid frame is found, the function returns the frame length
+  excluding the CRC with the bit HDLC_END_OF_FRAME set.
+  If the beginning of a valid frame is found, the function returns
+  the length.
+  If a framing error is found (too many 1s and not a flag) the function
+  returns the length with the bit HDLC_FRAMING_ERROR set.
+  If a CRC error is found the function returns the length with the
+  bit HDLC_CRC_ERROR set.
+  If the frame length exceeds the destination buffer size, the function
+  returns the length with the bit HDLC_LENGTH_ERROR set.
+
+  src - source buffer
+  slen - source buffer length
+  count - number of bytes removed (decoded) from the source buffer
+  dst _ destination buffer
+  dsize - destination buffer size
+  returns - number of decoded bytes in the destination buffer and status
+  flag.
+ */
+int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
+		     int slen, int *count, unsigned char *dst, int dsize)
+{
+	int status=0;
+
+	static const unsigned char fast_flag[]={
+		0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f
+	};
+
+	static const unsigned char fast_flag_value[]={
+		0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f
+	};
+
+	static const unsigned char fast_abort[]={
+		0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff
+	};
+
+	*count = slen;
+
+	while(slen > 0){
+		if(hdlc->bit_shift==0){
+			hdlc->cbin = *src++;
+			slen--;
+			hdlc->bit_shift = 8;
+			if(hdlc->do_adapt56){
+				hdlc->bit_shift --;
+			}
+		}
+
+		switch(hdlc->state){
+		case STOPPED:
+			return 0;
+		case HDLC_FAST_IDLE:
+			if(hdlc->cbin == 0xff){
+				hdlc->bit_shift = 0;
+				break;
+			}
+			hdlc->state = HDLC_GET_FLAG_B0;
+			hdlc->hdlc_bits1 = 0;
+			hdlc->bit_shift = 8;
+			break;
+		case HDLC_GET_FLAG_B0:
+			if(!(hdlc->cbin & 0x80)) {
+				hdlc->state = HDLC_GETFLAG_B1A6;
+				hdlc->hdlc_bits1 = 0;
+			} else {
+				if(!hdlc->do_adapt56){
+					if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1)
+						hdlc->state = HDLC_FAST_IDLE;
+				}
+			}
+			hdlc->cbin<<=1;
+			hdlc->bit_shift --;
+			break;
+		case HDLC_GETFLAG_B1A6:
+			if(hdlc->cbin & 0x80){
+				hdlc->hdlc_bits1++;
+				if(hdlc->hdlc_bits1==6){
+					hdlc->state = HDLC_GETFLAG_B7;
+				}
+			} else {
+				hdlc->hdlc_bits1 = 0;
+			}
+			hdlc->cbin<<=1;
+			hdlc->bit_shift --;
+			break;
+		case HDLC_GETFLAG_B7:
+			if(hdlc->cbin & 0x80) {
+				hdlc->state = HDLC_GET_FLAG_B0;
+			} else {
+				hdlc->state = HDLC_GET_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->shift_reg = 0;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_bits = 0;
+				hdlc->data_received = 0;
+			}
+			hdlc->cbin<<=1;
+			hdlc->bit_shift --;
+			break;
+		case HDLC_GET_DATA:
+			if(hdlc->cbin & 0x80){
+				hdlc->hdlc_bits1++;
+				switch(hdlc->hdlc_bits1){
+				case 6:
+					break;
+				case 7:
+					if(hdlc->data_received) {
+						// bad frame
+						status = -HDLC_FRAMING_ERROR;
+					}
+					if(!hdlc->do_adapt56){
+						if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){
+							hdlc->state = HDLC_FAST_IDLE;
+							hdlc->bit_shift=1;
+							break;
+						}
+					} else {
+						hdlc->state = HDLC_GET_FLAG_B0;
+					}
+					break;
+				default:
+					hdlc->shift_reg>>=1;
+					hdlc->shift_reg |= 0x80;
+					hdlc->data_bits++;
+					break;
+				}
+			} else {
+				switch(hdlc->hdlc_bits1){
+				case 5:
+					break;
+				case 6:
+					if(hdlc->data_received){
+						if (hdlc->dstpos < 2) {
+							status = -HDLC_FRAMING_ERROR;
+						} else if (hdlc->crc != 0xf0b8){
+							// crc error
+							status = -HDLC_CRC_ERROR;
+						} else {
+							// remove CRC
+							hdlc->dstpos -= 2;
+							// good frame
+							status = hdlc->dstpos;
+						}
+					}
+					hdlc->crc = 0xffff;
+					hdlc->shift_reg = 0;
+					hdlc->data_bits = 0;
+					if(!hdlc->do_adapt56){
+						if(hdlc->cbin==fast_flag[hdlc->bit_shift]){
+							hdlc->ffvalue = fast_flag_value[hdlc->bit_shift];
+							hdlc->state = HDLC_FAST_FLAG;
+							hdlc->ffbit_shift = hdlc->bit_shift;
+							hdlc->bit_shift = 1;
+						} else {
+							hdlc->state = HDLC_GET_DATA;
+							hdlc->data_received = 0;
+						}
+					} else {
+						hdlc->state = HDLC_GET_DATA;
+						hdlc->data_received = 0;
+					}
+					break;
+				default:
+					hdlc->shift_reg>>=1;
+					hdlc->data_bits++;
+					break;
+				}
+				hdlc->hdlc_bits1 = 0;
+			}
+			if (status) {
+				hdlc->dstpos = 0;
+				*count -= slen;
+				hdlc->cbin <<= 1;
+				hdlc->bit_shift--;
+				return status;
+			}
+			if(hdlc->data_bits==8){
+				hdlc->data_bits = 0;
+				hdlc->data_received = 1;
+				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
+
+				// good byte received
+				if (hdlc->dstpos < dsize) {
+					dst[hdlc->dstpos++] = hdlc->shift_reg;
+				} else {
+					// frame too long
+					status = -HDLC_LENGTH_ERROR;
+					hdlc->dstpos = 0;
+				}
+			}
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
+			break;
+		case HDLC_FAST_FLAG:
+			if(hdlc->cbin==hdlc->ffvalue){
+				hdlc->bit_shift = 0;
+				break;
+			} else {
+				if(hdlc->cbin == 0xff){
+					hdlc->state = HDLC_FAST_IDLE;
+					hdlc->bit_shift=0;
+				} else if(hdlc->ffbit_shift==8){
+					hdlc->state = HDLC_GETFLAG_B7;
+					break;
+				} else {
+					hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1];
+					hdlc->hdlc_bits1 = hdlc->ffbit_shift-2;
+					if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0;
+					hdlc->data_bits = hdlc->ffbit_shift-1;
+					hdlc->state = HDLC_GET_DATA;
+					hdlc->data_received = 0;
+				}
+			}
+			break;
+		default:
+			break;
+		}
+	}
+	*count -= slen;
+	return 0;
+}
+
+/*
+  isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
+
+  The bit stream starts with a beginning flag (01111110). After
+  that each byte is added to the bit stream with bit stuffing added
+  (0 after 5 1's).
+  When the last byte has been removed from the source buffer, the
+  CRC (2 bytes is added) and the frame terminates with the ending flag.
+  For the dchannel, the idle character (all 1's) is also added at the end.
+  If this function is called with empty source buffer (slen=0), flags or
+  idle character will be generated.
+
+  src - source buffer
+  slen - source buffer length
+  count - number of bytes removed (encoded) from source buffer
+  dst _ destination buffer
+  dsize - destination buffer size
+  returns - number of encoded bytes in the destination buffer
+*/
+int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
+		unsigned short slen, int *count,
+		unsigned char *dst, int dsize)
+{
+	static const unsigned char xfast_flag_value[] = {
+		0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e
+	};
+
+	int len = 0;
+
+	*count = slen;
+
+	while (dsize > 0) {
+		if(hdlc->bit_shift==0){
+			if(slen && !hdlc->do_closing){
+				hdlc->shift_reg = *src++;
+				slen--;
+				if (slen == 0)
+					hdlc->do_closing = 1;  /* closing sequence, CRC + flag(s) */
+				hdlc->bit_shift = 8;
+			} else {
+				if(hdlc->state == HDLC_SEND_DATA){
+					if(hdlc->data_received){
+						hdlc->state = HDLC_SEND_CRC1;
+						hdlc->crc ^= 0xffff;
+						hdlc->bit_shift = 8;
+						hdlc->shift_reg = hdlc->crc & 0xff;
+					} else if(!hdlc->do_adapt56){
+						hdlc->state = HDLC_SEND_FAST_FLAG;
+					} else {
+						hdlc->state = HDLC_SENDFLAG_B0;
+					}
+				}
+
+			}
+		}
+
+		switch(hdlc->state){
+		case STOPPED:
+			while (dsize--)
+				*dst++ = 0xff;
+
+			return dsize;
+		case HDLC_SEND_FAST_FLAG:
+			hdlc->do_closing = 0;
+			if(slen == 0){
+				*dst++ = hdlc->ffvalue;
+				len++;
+				dsize--;
+				break;
+			}
+			if(hdlc->bit_shift==8){
+				hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits);
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_received = 1;
+			}
+			break;
+		case HDLC_SENDFLAG_B0:
+			hdlc->do_closing = 0;
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			hdlc->hdlc_bits1 = 0;
+			hdlc->state = HDLC_SENDFLAG_B1A6;
+			break;
+		case HDLC_SENDFLAG_B1A6:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			hdlc->cbin++;
+			if(++hdlc->hdlc_bits1 == 6)
+				hdlc->state = HDLC_SENDFLAG_B7;
+			break;
+		case HDLC_SENDFLAG_B7:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(slen == 0){
+				hdlc->state = HDLC_SENDFLAG_B0;
+				break;
+			}
+			if(hdlc->bit_shift==8){
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				hdlc->data_received = 1;
+			}
+			break;
+		case HDLC_SEND_FIRST_FLAG:
+			hdlc->data_received = 1;
+			if(hdlc->data_bits==8){
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(hdlc->shift_reg & 0x01)
+				hdlc->cbin++;
+			hdlc->shift_reg >>= 1;
+			hdlc->bit_shift--;
+			if(hdlc->bit_shift==0){
+				hdlc->state = HDLC_SEND_DATA;
+				hdlc->crc = 0xffff;
+				hdlc->hdlc_bits1 = 0;
+			}
+			break;
+		case HDLC_SEND_DATA:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(hdlc->hdlc_bits1 == 5){
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if(hdlc->bit_shift==8){
+				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
+			}
+			if(hdlc->shift_reg & 0x01){
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			break;
+		case HDLC_SEND_CRC1:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(hdlc->hdlc_bits1 == 5){
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if(hdlc->shift_reg & 0x01){
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			if(hdlc->bit_shift==0){
+				hdlc->shift_reg = (hdlc->crc >> 8);
+				hdlc->state = HDLC_SEND_CRC2;
+				hdlc->bit_shift = 8;
+			}
+			break;
+		case HDLC_SEND_CRC2:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(hdlc->hdlc_bits1 == 5){
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if(hdlc->shift_reg & 0x01){
+				hdlc->hdlc_bits1++;
+				hdlc->cbin++;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			} else {
+				hdlc->hdlc_bits1 = 0;
+				hdlc->shift_reg >>= 1;
+				hdlc->bit_shift--;
+			}
+			if(hdlc->bit_shift==0){
+				hdlc->shift_reg = 0x7e;
+				hdlc->state = HDLC_SEND_CLOSING_FLAG;
+				hdlc->bit_shift = 8;
+			}
+			break;
+		case HDLC_SEND_CLOSING_FLAG:
+			hdlc->cbin <<= 1;
+			hdlc->data_bits++;
+			if(hdlc->hdlc_bits1 == 5){
+				hdlc->hdlc_bits1 = 0;
+				break;
+			}
+			if(hdlc->shift_reg & 0x01){
+				hdlc->cbin++;
+			}
+			hdlc->shift_reg >>= 1;
+			hdlc->bit_shift--;
+			if(hdlc->bit_shift==0){
+				hdlc->ffvalue = xfast_flag_value[hdlc->data_bits];
+				if(hdlc->dchannel){
+					hdlc->ffvalue = 0x7e;
+					hdlc->state = HDLC_SEND_IDLE1;
+					hdlc->bit_shift = 8-hdlc->data_bits;
+					if(hdlc->bit_shift==0)
+						hdlc->state = HDLC_SEND_FAST_IDLE;
+				} else {
+					if(!hdlc->do_adapt56){
+						hdlc->state = HDLC_SEND_FAST_FLAG;
+						hdlc->data_received = 0;
+					} else {
+						hdlc->state = HDLC_SENDFLAG_B0;
+						hdlc->data_received = 0;
+					}
+					// Finished with this frame, send flags
+					if (dsize > 1) dsize = 1;
+				}
+			}
+			break;
+		case HDLC_SEND_IDLE1:
+			hdlc->do_closing = 0;
+			hdlc->cbin <<= 1;
+			hdlc->cbin++;
+			hdlc->data_bits++;
+			hdlc->bit_shift--;
+			if(hdlc->bit_shift==0){
+				hdlc->state = HDLC_SEND_FAST_IDLE;
+				hdlc->bit_shift = 0;
+			}
+			break;
+		case HDLC_SEND_FAST_IDLE:
+			hdlc->do_closing = 0;
+			hdlc->cbin = 0xff;
+			hdlc->data_bits = 8;
+			if(hdlc->bit_shift == 8){
+				hdlc->cbin = 0x7e;
+				hdlc->state = HDLC_SEND_FIRST_FLAG;
+			} else {
+				*dst++ = hdlc->cbin;
+				hdlc->bit_shift = hdlc->data_bits = 0;
+				len++;
+				dsize = 0;
+			}
+			break;
+		default:
+			break;
+		}
+		if(hdlc->do_adapt56){
+			if(hdlc->data_bits==7){
+				hdlc->cbin <<= 1;
+				hdlc->cbin++;
+				hdlc->data_bits++;
+			}
+		}
+		if(hdlc->data_bits==8){
+			*dst++ = hdlc->cbin;
+			hdlc->data_bits = 0;
+			len++;
+			dsize--;
+		}
+	}
+	*count -= slen;
+
+	return len;
+}
+
+EXPORT_SYMBOL(isdnhdlc_rcv_init);
+EXPORT_SYMBOL(isdnhdlc_decode);
+EXPORT_SYMBOL(isdnhdlc_out_init);
+EXPORT_SYMBOL(isdnhdlc_encode);
diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h
new file mode 100644
index 000000000000..cf0a95a24015
--- /dev/null
+++ b/include/linux/isdn/hdlc.h
@@ -0,0 +1,70 @@
+/*
+ * isdnhdlc.h  --  General purpose ISDN HDLC decoder.
+ *
+ * Implementation of a HDLC decoder/encoder in software.
+ * Neccessary because some ISDN devices don't have HDLC
+ * controllers. Also included: a bit reversal table.
+ *
+ *Copyright (C) 2002    Wolfgang Mües      <wolfgang@iksw-muees.de>
+ *		2001 	Frode Isaksen      <fisaksen@bewan.com>
+ *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
+ *
+ *      This program is free software; you can redistribute it and/or modify
+ *      it under the terms of the GNU General Public License as published by
+ *      the Free Software Foundation; either version 2 of the License, or
+ *      (at your option) any later version.
+ *
+ *      This program is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *      GNU General Public License for more details.
+ *
+ *      You should have received a copy of the GNU General Public License
+ *      along with this program; if not, write to the Free Software
+ *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ISDNHDLC_H__
+#define __ISDNHDLC_H__
+
+struct isdnhdlc_vars {
+	int bit_shift;
+	int hdlc_bits1;
+	int data_bits;
+	int ffbit_shift; 	// encoding only
+	int state;
+	int dstpos;
+
+	unsigned short crc;
+
+	unsigned char cbin;
+	unsigned char shift_reg;
+	unsigned char ffvalue;
+
+	unsigned int data_received:1; 	// set if transferring data
+	unsigned int dchannel:1; 	// set if D channel (send idle instead of flags)
+	unsigned int do_adapt56:1; 	// set if 56K adaptation
+	unsigned int do_closing:1; 	// set if in closing phase (need to send CRC + flag
+};
+
+
+/*
+  The return value from isdnhdlc_decode is
+  the frame length, 0 if no complete frame was decoded,
+  or a negative error number
+*/
+#define HDLC_FRAMING_ERROR     1
+#define HDLC_CRC_ERROR         2
+#define HDLC_LENGTH_ERROR      3
+
+extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56);
+
+extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count,
+	                    unsigned char *dst, int dsize);
+
+extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56);
+
+extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count,
+	                    unsigned char *dst,int dsize);
+
+#endif /* __ISDNHDLC_H__ */
-- 
cgit v1.2.3


From 6bd4bcd3cd8affc09eaee7efbc037f65f4a71501 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 8 Jul 2009 19:11:09 +0200
Subject: ISDN: Clean up isdnhdlc code

Clean up isdnhdlc to meet current code standard.
Remove hint to already removed bit reversal table.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 drivers/isdn/i4l/isdnhdlc.c | 366 +++++++++++++++++++++++---------------------
 include/linux/isdn/hdlc.h   |  68 ++++----
 2 files changed, 231 insertions(+), 203 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c
index 44ec7418496b..b80e55ab8914 100644
--- a/drivers/isdn/i4l/isdnhdlc.c
+++ b/drivers/isdn/i4l/isdnhdlc.c
@@ -1,23 +1,24 @@
 /*
  * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
  *
- *Copyright (C) 2002	Wolfgang Mües      <wolfgang@iksw-muees.de>
- *		2001 	Frode Isaksen      <fisaksen@bewan.com>
- *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
+ * Copyright (C)
+ *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
+ *	2001	Frode Isaksen		<fisaksen@bewan.com>
+ *      2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
  *
- *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License as published by
- *      the Free Software Foundation; either version 2 of the License, or
- *      (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
- *      This program is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *      GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *      You should have received a copy of the GNU General Public License
- *      along with this program; if not, write to the Free Software
- *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/module.h>
@@ -36,20 +37,20 @@ MODULE_LICENSE("GPL");
 /*-------------------------------------------------------------------*/
 
 enum {
-	HDLC_FAST_IDLE,HDLC_GET_FLAG_B0,HDLC_GETFLAG_B1A6,HDLC_GETFLAG_B7,
-	HDLC_GET_DATA,HDLC_FAST_FLAG
+	HDLC_FAST_IDLE, HDLC_GET_FLAG_B0, HDLC_GETFLAG_B1A6, HDLC_GETFLAG_B7,
+	HDLC_GET_DATA, HDLC_FAST_FLAG
 };
 
 enum {
-	HDLC_SEND_DATA,HDLC_SEND_CRC1,HDLC_SEND_FAST_FLAG,
-	HDLC_SEND_FIRST_FLAG,HDLC_SEND_CRC2,HDLC_SEND_CLOSING_FLAG,
-	HDLC_SEND_IDLE1,HDLC_SEND_FAST_IDLE,HDLC_SENDFLAG_B0,
-	HDLC_SENDFLAG_B1A6,HDLC_SENDFLAG_B7,STOPPED
+	HDLC_SEND_DATA, HDLC_SEND_CRC1, HDLC_SEND_FAST_FLAG,
+	HDLC_SEND_FIRST_FLAG, HDLC_SEND_CRC2, HDLC_SEND_CLOSING_FLAG,
+	HDLC_SEND_IDLE1, HDLC_SEND_FAST_IDLE, HDLC_SENDFLAG_B0,
+	HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED
 };
 
-void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56)
+void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56)
 {
-   	hdlc->bit_shift = 0;
+	hdlc->bit_shift = 0;
 	hdlc->hdlc_bits1 = 0;
 	hdlc->data_bits = 0;
 	hdlc->ffbit_shift = 0;
@@ -63,10 +64,12 @@ void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56)
 	hdlc->ffvalue = 0;
 	hdlc->dstpos = 0;
 }
+EXPORT_SYMBOL(isdnhdlc_out_init);
 
-void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_adapt56)
+void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel,
+	int do_adapt56)
 {
-   	hdlc->bit_shift = 0;
+	hdlc->bit_shift = 0;
 	hdlc->hdlc_bits1 = 0;
 	hdlc->data_bits = 0;
 	hdlc->ffbit_shift = 0;
@@ -83,7 +86,7 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada
 	}
 	hdlc->cbin = 0x7e;
 	hdlc->bit_shift = 0;
-	if(do_adapt56){
+	if (do_adapt56) {
 		hdlc->do_adapt56 = 1;
 		hdlc->data_bits = 0;
 		hdlc->state = HDLC_SENDFLAG_B0;
@@ -93,6 +96,25 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada
 	}
 	hdlc->shift_reg = 0;
 }
+EXPORT_SYMBOL(isdnhdlc_rcv_init);
+
+static int
+check_frame(struct isdnhdlc_vars *hdlc)
+{
+	int status;
+
+	if (hdlc->dstpos < 2) 	/* too small - framing error */
+		status = -HDLC_FRAMING_ERROR;
+	else if (hdlc->crc != 0xf0b8)	/* crc error */
+		status = -HDLC_CRC_ERROR;
+	else {
+		/* remove CRC */
+		hdlc->dstpos -= 2;
+		/* good frame */
+		status = hdlc->dstpos;
+	}
+	return status;
+}
 
 /*
   isdnhdlc_decode - decodes HDLC frames from a transparent bit stream.
@@ -121,40 +143,63 @@ void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc, int is_d_channel, int do_ada
   returns - number of decoded bytes in the destination buffer and status
   flag.
  */
-int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
-		     int slen, int *count, unsigned char *dst, int dsize)
+int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen,
+	int *count, u8 *dst, int dsize)
 {
-	int status=0;
+	int status = 0;
 
-	static const unsigned char fast_flag[]={
-		0x00,0x00,0x00,0x20,0x30,0x38,0x3c,0x3e,0x3f
+	static const unsigned char fast_flag[] = {
+		0x00, 0x00, 0x00, 0x20, 0x30, 0x38, 0x3c, 0x3e, 0x3f
 	};
 
-	static const unsigned char fast_flag_value[]={
-		0x00,0x7e,0xfc,0xf9,0xf3,0xe7,0xcf,0x9f,0x3f
+	static const unsigned char fast_flag_value[] = {
+		0x00, 0x7e, 0xfc, 0xf9, 0xf3, 0xe7, 0xcf, 0x9f, 0x3f
 	};
 
-	static const unsigned char fast_abort[]={
-		0x00,0x00,0x80,0xc0,0xe0,0xf0,0xf8,0xfc,0xfe,0xff
+	static const unsigned char fast_abort[] = {
+		0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
 	};
 
+#define handle_fast_flag(h) \
+	do {\
+		if (h->cbin == fast_flag[h->bit_shift]) {\
+			h->ffvalue = fast_flag_value[h->bit_shift];\
+			h->state = HDLC_FAST_FLAG;\
+			h->ffbit_shift = h->bit_shift;\
+			h->bit_shift = 1;\
+		} else {\
+			h->state = HDLC_GET_DATA;\
+			h->data_received = 0;\
+		} \
+	} while (0)
+
+#define handle_abort(h) \
+	do {\
+		h->shift_reg = fast_abort[h->ffbit_shift - 1];\
+		h->hdlc_bits1 = h->ffbit_shift - 2;\
+		if (h->hdlc_bits1 < 0)\
+			h->hdlc_bits1 = 0;\
+		h->data_bits = h->ffbit_shift - 1;\
+		h->state = HDLC_GET_DATA;\
+		h->data_received = 0;\
+	} while (0)
+
 	*count = slen;
 
-	while(slen > 0){
-		if(hdlc->bit_shift==0){
+	while (slen > 0) {
+		if (hdlc->bit_shift == 0) {
 			hdlc->cbin = *src++;
 			slen--;
 			hdlc->bit_shift = 8;
-			if(hdlc->do_adapt56){
-				hdlc->bit_shift --;
-			}
+			if (hdlc->do_adapt56)
+				hdlc->bit_shift--;
 		}
 
-		switch(hdlc->state){
+		switch (hdlc->state) {
 		case STOPPED:
 			return 0;
 		case HDLC_FAST_IDLE:
-			if(hdlc->cbin == 0xff){
+			if (hdlc->cbin == 0xff) {
 				hdlc->bit_shift = 0;
 				break;
 			}
@@ -163,32 +208,30 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			hdlc->bit_shift = 8;
 			break;
 		case HDLC_GET_FLAG_B0:
-			if(!(hdlc->cbin & 0x80)) {
+			if (!(hdlc->cbin & 0x80)) {
 				hdlc->state = HDLC_GETFLAG_B1A6;
 				hdlc->hdlc_bits1 = 0;
 			} else {
-				if(!hdlc->do_adapt56){
-					if(++hdlc->hdlc_bits1 >=8 ) if(hdlc->bit_shift==1)
+				if ((!hdlc->do_adapt56) &&
+				    (++hdlc->hdlc_bits1 >= 8) &&
+				    (hdlc->bit_shift == 1))
 						hdlc->state = HDLC_FAST_IDLE;
-				}
 			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
 			break;
 		case HDLC_GETFLAG_B1A6:
-			if(hdlc->cbin & 0x80){
+			if (hdlc->cbin & 0x80) {
 				hdlc->hdlc_bits1++;
-				if(hdlc->hdlc_bits1==6){
+				if (hdlc->hdlc_bits1 == 6)
 					hdlc->state = HDLC_GETFLAG_B7;
-				}
-			} else {
+			} else
 				hdlc->hdlc_bits1 = 0;
-			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
 			break;
 		case HDLC_GETFLAG_B7:
-			if(hdlc->cbin & 0x80) {
+			if (hdlc->cbin & 0x80) {
 				hdlc->state = HDLC_GET_FLAG_B0;
 			} else {
 				hdlc->state = HDLC_GET_DATA;
@@ -198,74 +241,55 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
 				hdlc->data_bits = 0;
 				hdlc->data_received = 0;
 			}
-			hdlc->cbin<<=1;
-			hdlc->bit_shift --;
+			hdlc->cbin <<= 1;
+			hdlc->bit_shift--;
 			break;
 		case HDLC_GET_DATA:
-			if(hdlc->cbin & 0x80){
+			if (hdlc->cbin & 0x80) {
 				hdlc->hdlc_bits1++;
-				switch(hdlc->hdlc_bits1){
+				switch (hdlc->hdlc_bits1) {
 				case 6:
 					break;
 				case 7:
-					if(hdlc->data_received) {
-						// bad frame
+					if (hdlc->data_received)
+						/* bad frame */
 						status = -HDLC_FRAMING_ERROR;
-					}
-					if(!hdlc->do_adapt56){
-						if(hdlc->cbin==fast_abort[hdlc->bit_shift+1]){
-							hdlc->state = HDLC_FAST_IDLE;
-							hdlc->bit_shift=1;
+					if (!hdlc->do_adapt56) {
+						if (hdlc->cbin == fast_abort
+						    [hdlc->bit_shift + 1]) {
+							hdlc->state =
+								HDLC_FAST_IDLE;
+							hdlc->bit_shift = 1;
 							break;
 						}
-					} else {
+					} else
 						hdlc->state = HDLC_GET_FLAG_B0;
-					}
 					break;
 				default:
-					hdlc->shift_reg>>=1;
+					hdlc->shift_reg >>= 1;
 					hdlc->shift_reg |= 0x80;
 					hdlc->data_bits++;
 					break;
 				}
 			} else {
-				switch(hdlc->hdlc_bits1){
+				switch (hdlc->hdlc_bits1) {
 				case 5:
 					break;
 				case 6:
-					if(hdlc->data_received){
-						if (hdlc->dstpos < 2) {
-							status = -HDLC_FRAMING_ERROR;
-						} else if (hdlc->crc != 0xf0b8){
-							// crc error
-							status = -HDLC_CRC_ERROR;
-						} else {
-							// remove CRC
-							hdlc->dstpos -= 2;
-							// good frame
-							status = hdlc->dstpos;
-						}
-					}
+					if (hdlc->data_received)
+						status = check_frame(hdlc);
 					hdlc->crc = 0xffff;
 					hdlc->shift_reg = 0;
 					hdlc->data_bits = 0;
-					if(!hdlc->do_adapt56){
-						if(hdlc->cbin==fast_flag[hdlc->bit_shift]){
-							hdlc->ffvalue = fast_flag_value[hdlc->bit_shift];
-							hdlc->state = HDLC_FAST_FLAG;
-							hdlc->ffbit_shift = hdlc->bit_shift;
-							hdlc->bit_shift = 1;
-						} else {
-							hdlc->state = HDLC_GET_DATA;
-							hdlc->data_received = 0;
-						}
-					} else {
+					if (!hdlc->do_adapt56)
+						handle_fast_flag(hdlc);
+					else {
 						hdlc->state = HDLC_GET_DATA;
 						hdlc->data_received = 0;
 					}
 					break;
 				default:
-					hdlc->shift_reg>>=1;
+					hdlc->shift_reg >>= 1;
 					hdlc->data_bits++;
 					break;
 				}
@@ -278,16 +302,17 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
 				hdlc->bit_shift--;
 				return status;
 			}
-			if(hdlc->data_bits==8){
+			if (hdlc->data_bits == 8) {
 				hdlc->data_bits = 0;
 				hdlc->data_received = 1;
-				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
+				hdlc->crc = crc_ccitt_byte(hdlc->crc,
+						hdlc->shift_reg);
 
-				// good byte received
-				if (hdlc->dstpos < dsize) {
+				/* good byte received */
+				if (hdlc->dstpos < dsize)
 					dst[hdlc->dstpos++] = hdlc->shift_reg;
-				} else {
-					// frame too long
+				else {
+					/* frame too long */
 					status = -HDLC_LENGTH_ERROR;
 					hdlc->dstpos = 0;
 				}
@@ -296,24 +321,18 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			hdlc->bit_shift--;
 			break;
 		case HDLC_FAST_FLAG:
-			if(hdlc->cbin==hdlc->ffvalue){
+			if (hdlc->cbin == hdlc->ffvalue) {
 				hdlc->bit_shift = 0;
 				break;
 			} else {
-				if(hdlc->cbin == 0xff){
+				if (hdlc->cbin == 0xff) {
 					hdlc->state = HDLC_FAST_IDLE;
-					hdlc->bit_shift=0;
-				} else if(hdlc->ffbit_shift==8){
+					hdlc->bit_shift = 0;
+				} else if (hdlc->ffbit_shift == 8) {
 					hdlc->state = HDLC_GETFLAG_B7;
 					break;
-				} else {
-					hdlc->shift_reg = fast_abort[hdlc->ffbit_shift-1];
-					hdlc->hdlc_bits1 = hdlc->ffbit_shift-2;
-					if(hdlc->hdlc_bits1<0)hdlc->hdlc_bits1 = 0;
-					hdlc->data_bits = hdlc->ffbit_shift-1;
-					hdlc->state = HDLC_GET_DATA;
-					hdlc->data_received = 0;
-				}
+				} else
+					handle_abort(hdlc);
 			}
 			break;
 		default:
@@ -323,7 +342,7 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
 	*count -= slen;
 	return 0;
 }
-
+EXPORT_SYMBOL(isdnhdlc_decode);
 /*
   isdnhdlc_encode - encodes HDLC frames to a transparent bit stream.
 
@@ -343,12 +362,11 @@ int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src,
   dsize - destination buffer size
   returns - number of encoded bytes in the destination buffer
 */
-int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
-		unsigned short slen, int *count,
-		unsigned char *dst, int dsize)
+int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
+	int *count, u8 *dst, int dsize)
 {
 	static const unsigned char xfast_flag_value[] = {
-		0x7e,0x3f,0x9f,0xcf,0xe7,0xf3,0xf9,0xfc,0x7e
+		0x7e, 0x3f, 0x9f, 0xcf, 0xe7, 0xf3, 0xf9, 0xfc, 0x7e
 	};
 
 	int len = 0;
@@ -356,31 +374,34 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 	*count = slen;
 
 	while (dsize > 0) {
-		if(hdlc->bit_shift==0){
-			if(slen && !hdlc->do_closing){
+		if (hdlc->bit_shift == 0) {
+			if (slen && !hdlc->do_closing) {
 				hdlc->shift_reg = *src++;
 				slen--;
 				if (slen == 0)
-					hdlc->do_closing = 1;  /* closing sequence, CRC + flag(s) */
+					/* closing sequence, CRC + flag(s) */
+					hdlc->do_closing = 1;
 				hdlc->bit_shift = 8;
 			} else {
-				if(hdlc->state == HDLC_SEND_DATA){
-					if(hdlc->data_received){
+				if (hdlc->state == HDLC_SEND_DATA) {
+					if (hdlc->data_received) {
 						hdlc->state = HDLC_SEND_CRC1;
 						hdlc->crc ^= 0xffff;
 						hdlc->bit_shift = 8;
-						hdlc->shift_reg = hdlc->crc & 0xff;
-					} else if(!hdlc->do_adapt56){
-						hdlc->state = HDLC_SEND_FAST_FLAG;
-					} else {
-						hdlc->state = HDLC_SENDFLAG_B0;
-					}
+						hdlc->shift_reg =
+							hdlc->crc & 0xff;
+					} else if (!hdlc->do_adapt56)
+						hdlc->state =
+							HDLC_SEND_FAST_FLAG;
+					else
+						hdlc->state =
+							HDLC_SENDFLAG_B0;
 				}
 
 			}
 		}
 
-		switch(hdlc->state){
+		switch (hdlc->state) {
 		case STOPPED:
 			while (dsize--)
 				*dst++ = 0xff;
@@ -388,14 +409,15 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			return dsize;
 		case HDLC_SEND_FAST_FLAG:
 			hdlc->do_closing = 0;
-			if(slen == 0){
+			if (slen == 0) {
 				*dst++ = hdlc->ffvalue;
 				len++;
 				dsize--;
 				break;
 			}
-			if(hdlc->bit_shift==8){
-				hdlc->cbin = hdlc->ffvalue>>(8-hdlc->data_bits);
+			if (hdlc->bit_shift == 8) {
+				hdlc->cbin = hdlc->ffvalue >>
+					(8 - hdlc->data_bits);
 				hdlc->state = HDLC_SEND_DATA;
 				hdlc->crc = 0xffff;
 				hdlc->hdlc_bits1 = 0;
@@ -413,17 +435,17 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
 			hdlc->cbin++;
-			if(++hdlc->hdlc_bits1 == 6)
+			if (++hdlc->hdlc_bits1 == 6)
 				hdlc->state = HDLC_SENDFLAG_B7;
 			break;
 		case HDLC_SENDFLAG_B7:
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(slen == 0){
+			if (slen == 0) {
 				hdlc->state = HDLC_SENDFLAG_B0;
 				break;
 			}
-			if(hdlc->bit_shift==8){
+			if (hdlc->bit_shift == 8) {
 				hdlc->state = HDLC_SEND_DATA;
 				hdlc->crc = 0xffff;
 				hdlc->hdlc_bits1 = 0;
@@ -432,7 +454,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			break;
 		case HDLC_SEND_FIRST_FLAG:
 			hdlc->data_received = 1;
-			if(hdlc->data_bits==8){
+			if (hdlc->data_bits == 8) {
 				hdlc->state = HDLC_SEND_DATA;
 				hdlc->crc = 0xffff;
 				hdlc->hdlc_bits1 = 0;
@@ -440,11 +462,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			}
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(hdlc->shift_reg & 0x01)
+			if (hdlc->shift_reg & 0x01)
 				hdlc->cbin++;
 			hdlc->shift_reg >>= 1;
 			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
+			if (hdlc->bit_shift == 0) {
 				hdlc->state = HDLC_SEND_DATA;
 				hdlc->crc = 0xffff;
 				hdlc->hdlc_bits1 = 0;
@@ -453,14 +475,14 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 		case HDLC_SEND_DATA:
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
+			if (hdlc->hdlc_bits1 == 5) {
 				hdlc->hdlc_bits1 = 0;
 				break;
 			}
-			if(hdlc->bit_shift==8){
-				hdlc->crc = crc_ccitt_byte(hdlc->crc, hdlc->shift_reg);
-			}
-			if(hdlc->shift_reg & 0x01){
+			if (hdlc->bit_shift == 8)
+				hdlc->crc = crc_ccitt_byte(hdlc->crc,
+					hdlc->shift_reg);
+			if (hdlc->shift_reg & 0x01) {
 				hdlc->hdlc_bits1++;
 				hdlc->cbin++;
 				hdlc->shift_reg >>= 1;
@@ -474,11 +496,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 		case HDLC_SEND_CRC1:
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
+			if (hdlc->hdlc_bits1 == 5) {
 				hdlc->hdlc_bits1 = 0;
 				break;
 			}
-			if(hdlc->shift_reg & 0x01){
+			if (hdlc->shift_reg & 0x01) {
 				hdlc->hdlc_bits1++;
 				hdlc->cbin++;
 				hdlc->shift_reg >>= 1;
@@ -488,7 +510,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 				hdlc->shift_reg >>= 1;
 				hdlc->bit_shift--;
 			}
-			if(hdlc->bit_shift==0){
+			if (hdlc->bit_shift == 0) {
 				hdlc->shift_reg = (hdlc->crc >> 8);
 				hdlc->state = HDLC_SEND_CRC2;
 				hdlc->bit_shift = 8;
@@ -497,11 +519,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 		case HDLC_SEND_CRC2:
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
+			if (hdlc->hdlc_bits1 == 5) {
 				hdlc->hdlc_bits1 = 0;
 				break;
 			}
-			if(hdlc->shift_reg & 0x01){
+			if (hdlc->shift_reg & 0x01) {
 				hdlc->hdlc_bits1++;
 				hdlc->cbin++;
 				hdlc->shift_reg >>= 1;
@@ -511,7 +533,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 				hdlc->shift_reg >>= 1;
 				hdlc->bit_shift--;
 			}
-			if(hdlc->bit_shift==0){
+			if (hdlc->bit_shift == 0) {
 				hdlc->shift_reg = 0x7e;
 				hdlc->state = HDLC_SEND_CLOSING_FLAG;
 				hdlc->bit_shift = 8;
@@ -520,33 +542,36 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 		case HDLC_SEND_CLOSING_FLAG:
 			hdlc->cbin <<= 1;
 			hdlc->data_bits++;
-			if(hdlc->hdlc_bits1 == 5){
+			if (hdlc->hdlc_bits1 == 5) {
 				hdlc->hdlc_bits1 = 0;
 				break;
 			}
-			if(hdlc->shift_reg & 0x01){
+			if (hdlc->shift_reg & 0x01)
 				hdlc->cbin++;
-			}
 			hdlc->shift_reg >>= 1;
 			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
-				hdlc->ffvalue = xfast_flag_value[hdlc->data_bits];
-				if(hdlc->dchannel){
+			if (hdlc->bit_shift == 0) {
+				hdlc->ffvalue =
+					xfast_flag_value[hdlc->data_bits];
+				if (hdlc->dchannel) {
 					hdlc->ffvalue = 0x7e;
 					hdlc->state = HDLC_SEND_IDLE1;
 					hdlc->bit_shift = 8-hdlc->data_bits;
-					if(hdlc->bit_shift==0)
-						hdlc->state = HDLC_SEND_FAST_IDLE;
+					if (hdlc->bit_shift == 0)
+						hdlc->state =
+							HDLC_SEND_FAST_IDLE;
 				} else {
-					if(!hdlc->do_adapt56){
-						hdlc->state = HDLC_SEND_FAST_FLAG;
+					if (!hdlc->do_adapt56) {
+						hdlc->state =
+							HDLC_SEND_FAST_FLAG;
 						hdlc->data_received = 0;
 					} else {
 						hdlc->state = HDLC_SENDFLAG_B0;
 						hdlc->data_received = 0;
 					}
-					// Finished with this frame, send flags
-					if (dsize > 1) dsize = 1;
+					/* Finished this frame, send flags */
+					if (dsize > 1)
+						dsize = 1;
 				}
 			}
 			break;
@@ -556,7 +581,7 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			hdlc->cbin++;
 			hdlc->data_bits++;
 			hdlc->bit_shift--;
-			if(hdlc->bit_shift==0){
+			if (hdlc->bit_shift == 0) {
 				hdlc->state = HDLC_SEND_FAST_IDLE;
 				hdlc->bit_shift = 0;
 			}
@@ -565,12 +590,13 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 			hdlc->do_closing = 0;
 			hdlc->cbin = 0xff;
 			hdlc->data_bits = 8;
-			if(hdlc->bit_shift == 8){
+			if (hdlc->bit_shift == 8) {
 				hdlc->cbin = 0x7e;
 				hdlc->state = HDLC_SEND_FIRST_FLAG;
 			} else {
 				*dst++ = hdlc->cbin;
-				hdlc->bit_shift = hdlc->data_bits = 0;
+				hdlc->bit_shift = 0;
+				hdlc->data_bits = 0;
 				len++;
 				dsize = 0;
 			}
@@ -578,14 +604,14 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 		default:
 			break;
 		}
-		if(hdlc->do_adapt56){
-			if(hdlc->data_bits==7){
+		if (hdlc->do_adapt56) {
+			if (hdlc->data_bits == 7) {
 				hdlc->cbin <<= 1;
 				hdlc->cbin++;
 				hdlc->data_bits++;
 			}
 		}
-		if(hdlc->data_bits==8){
+		if (hdlc->data_bits == 8) {
 			*dst++ = hdlc->cbin;
 			hdlc->data_bits = 0;
 			len++;
@@ -596,8 +622,4 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const unsigned char *src,
 
 	return len;
 }
-
-EXPORT_SYMBOL(isdnhdlc_rcv_init);
-EXPORT_SYMBOL(isdnhdlc_decode);
-EXPORT_SYMBOL(isdnhdlc_out_init);
 EXPORT_SYMBOL(isdnhdlc_encode);
diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h
index cf0a95a24015..8f3540c7f692 100644
--- a/include/linux/isdn/hdlc.h
+++ b/include/linux/isdn/hdlc.h
@@ -1,27 +1,28 @@
 /*
- * isdnhdlc.h  --  General purpose ISDN HDLC decoder.
+ * hdlc.h  --  General purpose ISDN HDLC decoder.
  *
  * Implementation of a HDLC decoder/encoder in software.
  * Neccessary because some ISDN devices don't have HDLC
- * controllers. Also included: a bit reversal table.
+ * controllers.
  *
- *Copyright (C) 2002    Wolfgang Mües      <wolfgang@iksw-muees.de>
- *		2001 	Frode Isaksen      <fisaksen@bewan.com>
- *              2001 	Kai Germaschewski  <kai.germaschewski@gmx.de>
+ * Copyright (C)
+ *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
+ *	2001	Frode Isaksen		<fisaksen@bewan.com>
+ *	2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
  *
- *      This program is free software; you can redistribute it and/or modify
- *      it under the terms of the GNU General Public License as published by
- *      the Free Software Foundation; either version 2 of the License, or
- *      (at your option) any later version.
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
  *
- *      This program is distributed in the hope that it will be useful,
- *      but WITHOUT ANY WARRANTY; without even the implied warranty of
- *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *      GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
  *
- *      You should have received a copy of the GNU General Public License
- *      along with this program; if not, write to the Free Software
- *      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef __ISDNHDLC_H__
@@ -31,20 +32,24 @@ struct isdnhdlc_vars {
 	int bit_shift;
 	int hdlc_bits1;
 	int data_bits;
-	int ffbit_shift; 	// encoding only
+	int ffbit_shift;	/* encoding only */
 	int state;
 	int dstpos;
 
-	unsigned short crc;
+	u16 crc;
 
-	unsigned char cbin;
-	unsigned char shift_reg;
-	unsigned char ffvalue;
+	u8 cbin;
+	u8 shift_reg;
+	u8 ffvalue;
 
-	unsigned int data_received:1; 	// set if transferring data
-	unsigned int dchannel:1; 	// set if D channel (send idle instead of flags)
-	unsigned int do_adapt56:1; 	// set if 56K adaptation
-	unsigned int do_closing:1; 	// set if in closing phase (need to send CRC + flag
+	/* set if transferring data */
+	u32 data_received:1;
+	/* set if D channel (send idle instead of flags) */
+	u32 dchannel:1;
+	/* set if 56K adaptation */
+	u32 do_adapt56:1;
+	/* set if in closing phase (need to send CRC + flag) */
+	u32 do_closing:1;
 };
 
 
@@ -57,14 +62,15 @@ struct isdnhdlc_vars {
 #define HDLC_CRC_ERROR         2
 #define HDLC_LENGTH_ERROR      3
 
-extern void isdnhdlc_rcv_init (struct isdnhdlc_vars *hdlc, int do_adapt56);
+extern void	isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56);
 
-extern int isdnhdlc_decode (struct isdnhdlc_vars *hdlc, const unsigned char *src, int slen,int *count,
-	                    unsigned char *dst, int dsize);
+extern int	isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src,
+			int slen, int *count, u8 *dst, int dsize);
 
-extern void isdnhdlc_out_init (struct isdnhdlc_vars *hdlc,int is_d_channel,int do_adapt56);
+extern void	isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel,
+			int do_adapt56);
 
-extern int isdnhdlc_encode (struct isdnhdlc_vars *hdlc,const unsigned char *src,unsigned short slen,int *count,
-	                    unsigned char *dst,int dsize);
+extern int	isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src,
+			u16 slen, int *count, u8 *dst, int dsize);
 
 #endif /* __ISDNHDLC_H__ */
-- 
cgit v1.2.3


From c38fc3bc2ecddd4f5278131603e6964cbed071b2 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 8 Jul 2009 20:31:42 +0200
Subject: ISDN: Add support for none reverse bitstreams to isdnhdc

The original isdnhdlc code was developed for devices which had
reversed bitorder in the byte stream. Adding code to handle normal
bitstreams as well.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 drivers/isdn/hisax/st5481_b.c   |  5 ++-
 drivers/isdn/hisax/st5481_d.c   |  2 +-
 drivers/isdn/hisax/st5481_usb.c | 11 ++++---
 drivers/isdn/i4l/isdnhdlc.c     | 70 ++++++++++++++++++++---------------------
 include/linux/isdn/hdlc.h       | 12 +++++--
 5 files changed, 56 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c
index 0074b600a0ef..95b1cdd97958 100644
--- a/drivers/isdn/hisax/st5481_b.c
+++ b/drivers/isdn/hisax/st5481_b.c
@@ -218,7 +218,10 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode)
 	if (bcs->mode != L1_MODE_NULL) {
 		// Open the B channel
 		if (bcs->mode != L1_MODE_TRANS) {
-			isdnhdlc_out_init(&b_out->hdlc_state, 0, bcs->mode == L1_MODE_HDLC_56K);
+			u32 features = HDLC_BITREVERSE;
+			if (bcs->mode == L1_MODE_HDLC_56K)
+				features |= HDLC_56KBIT;
+			isdnhdlc_out_init(&b_out->hdlc_state, features);
 		}
 		st5481_usb_pipe_reset(adapter, (bcs->channel+1)*2, NULL, NULL);
 	
diff --git a/drivers/isdn/hisax/st5481_d.c b/drivers/isdn/hisax/st5481_d.c
index 077991c1cd05..39e8e49cfd2d 100644
--- a/drivers/isdn/hisax/st5481_d.c
+++ b/drivers/isdn/hisax/st5481_d.c
@@ -417,7 +417,7 @@ static void dout_start_xmit(struct FsmInst *fsm, int event, void *arg)
 
 	DBG(2,"len=%d",skb->len);
 
-	isdnhdlc_out_init(&d_out->hdlc_state, 1, 0);
+	isdnhdlc_out_init(&d_out->hdlc_state, HDLC_DCHANNEL | HDLC_BITREVERSE);
 
 	if (test_and_set_bit(buf_nr, &d_out->busy)) {
 		WARNING("ep %d urb %d busy %#lx", EP_D_OUT, buf_nr, d_out->busy);
diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c
index 2b3a055059ea..10d41c5d73ed 100644
--- a/drivers/isdn/hisax/st5481_usb.c
+++ b/drivers/isdn/hisax/st5481_usb.c
@@ -637,10 +637,13 @@ void st5481_in_mode(struct st5481_in *in, int mode)
 	usb_unlink_urb(in->urb[1]);
 
 	if (in->mode != L1_MODE_NULL) {
-		if (in->mode != L1_MODE_TRANS)
-			isdnhdlc_rcv_init(&in->hdlc_state,
-				in->mode == L1_MODE_HDLC_56K);
-		
+		if (in->mode != L1_MODE_TRANS) {
+			u32 features = HDLC_BITREVERSE;
+
+			if (in->mode == L1_MODE_HDLC_56K)
+				features |= HDLC_56KBIT;
+			isdnhdlc_rcv_init(&in->hdlc_state, features);
+		}
 		st5481_usb_pipe_reset(in->adapter, in->ep, NULL, NULL);
 		st5481_usb_device_ctrl_msg(in->adapter, in->counter,
 					   in->packet_size,
diff --git a/drivers/isdn/i4l/isdnhdlc.c b/drivers/isdn/i4l/isdnhdlc.c
index b80e55ab8914..df345ce73f48 100644
--- a/drivers/isdn/i4l/isdnhdlc.c
+++ b/drivers/isdn/i4l/isdnhdlc.c
@@ -2,6 +2,7 @@
  * isdnhdlc.c  --  General purpose ISDN HDLC decoder.
  *
  * Copyright (C)
+ *	2009	Karsten Keil		<keil@b1-systems.de>
  *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
  *	2001	Frode Isaksen		<fisaksen@bewan.com>
  *      2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
@@ -25,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/crc-ccitt.h>
 #include <linux/isdn/hdlc.h>
+#include <linux/bitrev.h>
 
 /*-------------------------------------------------------------------*/
 
@@ -48,35 +50,21 @@ enum {
 	HDLC_SENDFLAG_B1A6, HDLC_SENDFLAG_B7, STOPPED
 };
 
-void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56)
+void isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features)
 {
-	hdlc->bit_shift = 0;
-	hdlc->hdlc_bits1 = 0;
-	hdlc->data_bits = 0;
-	hdlc->ffbit_shift = 0;
-	hdlc->data_received = 0;
+	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
 	hdlc->state = HDLC_GET_DATA;
-	hdlc->do_adapt56 = do_adapt56;
-	hdlc->dchannel = 0;
-	hdlc->crc = 0;
-	hdlc->cbin = 0;
-	hdlc->shift_reg = 0;
-	hdlc->ffvalue = 0;
-	hdlc->dstpos = 0;
+	if (features & HDLC_56KBIT)
+		hdlc->do_adapt56 = 1;
+	if (features & HDLC_BITREVERSE)
+		hdlc->do_bitreverse = 1;
 }
 EXPORT_SYMBOL(isdnhdlc_out_init);
 
-void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel,
-	int do_adapt56)
+void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features)
 {
-	hdlc->bit_shift = 0;
-	hdlc->hdlc_bits1 = 0;
-	hdlc->data_bits = 0;
-	hdlc->ffbit_shift = 0;
-	hdlc->data_received = 0;
-	hdlc->do_closing = 0;
-	hdlc->ffvalue = 0;
-	if (is_d_channel) {
+	memset(hdlc, 0, sizeof(struct isdnhdlc_vars));
+	if (features & HDLC_DCHANNEL) {
 		hdlc->dchannel = 1;
 		hdlc->state = HDLC_SEND_FIRST_FLAG;
 	} else {
@@ -85,16 +73,13 @@ void isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel,
 		hdlc->ffvalue = 0x7e;
 	}
 	hdlc->cbin = 0x7e;
-	hdlc->bit_shift = 0;
-	if (do_adapt56) {
+	if (features & HDLC_56KBIT) {
 		hdlc->do_adapt56 = 1;
-		hdlc->data_bits = 0;
 		hdlc->state = HDLC_SENDFLAG_B0;
-	} else {
-		hdlc->do_adapt56 = 0;
+	} else
 		hdlc->data_bits = 8;
-	}
-	hdlc->shift_reg = 0;
+	if (features & HDLC_BITREVERSE)
+		hdlc->do_bitreverse = 1;
 }
 EXPORT_SYMBOL(isdnhdlc_rcv_init);
 
@@ -188,7 +173,11 @@ int isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src, int slen,
 
 	while (slen > 0) {
 		if (hdlc->bit_shift == 0) {
-			hdlc->cbin = *src++;
+			/* the code is for bitreverse streams */
+			if (hdlc->do_bitreverse == 0)
+				hdlc->cbin = bitrev8(*src++);
+			else
+				hdlc->cbin = *src++;
 			slen--;
 			hdlc->bit_shift = 8;
 			if (hdlc->do_adapt56)
@@ -405,12 +394,15 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
 		case STOPPED:
 			while (dsize--)
 				*dst++ = 0xff;
-
 			return dsize;
 		case HDLC_SEND_FAST_FLAG:
 			hdlc->do_closing = 0;
 			if (slen == 0) {
-				*dst++ = hdlc->ffvalue;
+				/* the code is for bitreverse streams */
+				if (hdlc->do_bitreverse == 0)
+					*dst++ = bitrev8(hdlc->ffvalue);
+				else
+					*dst++ = hdlc->ffvalue;
 				len++;
 				dsize--;
 				break;
@@ -594,7 +586,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
 				hdlc->cbin = 0x7e;
 				hdlc->state = HDLC_SEND_FIRST_FLAG;
 			} else {
-				*dst++ = hdlc->cbin;
+				/* the code is for bitreverse streams */
+				if (hdlc->do_bitreverse == 0)
+					*dst++ = bitrev8(hdlc->cbin);
+				else
+					*dst++ = hdlc->cbin;
 				hdlc->bit_shift = 0;
 				hdlc->data_bits = 0;
 				len++;
@@ -612,7 +608,11 @@ int isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src, u16 slen,
 			}
 		}
 		if (hdlc->data_bits == 8) {
-			*dst++ = hdlc->cbin;
+			/* the code is for bitreverse streams */
+			if (hdlc->do_bitreverse == 0)
+				*dst++ = bitrev8(hdlc->cbin);
+			else
+				*dst++ = hdlc->cbin;
 			hdlc->data_bits = 0;
 			len++;
 			dsize--;
diff --git a/include/linux/isdn/hdlc.h b/include/linux/isdn/hdlc.h
index 8f3540c7f692..4b3ecc40889a 100644
--- a/include/linux/isdn/hdlc.h
+++ b/include/linux/isdn/hdlc.h
@@ -6,6 +6,7 @@
  * controllers.
  *
  * Copyright (C)
+ *	2009	Karsten Keil		<keil@b1-systems.de>
  *	2002	Wolfgang Mües		<wolfgang@iksw-muees.de>
  *	2001	Frode Isaksen		<fisaksen@bewan.com>
  *	2001	Kai Germaschewski	<kai.germaschewski@gmx.de>
@@ -50,8 +51,14 @@ struct isdnhdlc_vars {
 	u32 do_adapt56:1;
 	/* set if in closing phase (need to send CRC + flag) */
 	u32 do_closing:1;
+	/* set if data is bitreverse */
+	u32 do_bitreverse:1;
 };
 
+/* Feature Flags */
+#define HDLC_56KBIT	0x01
+#define HDLC_DCHANNEL	0x02
+#define HDLC_BITREVERSE	0x04
 
 /*
   The return value from isdnhdlc_decode is
@@ -62,13 +69,12 @@ struct isdnhdlc_vars {
 #define HDLC_CRC_ERROR         2
 #define HDLC_LENGTH_ERROR      3
 
-extern void	isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, int do_adapt56);
+extern void	isdnhdlc_rcv_init(struct isdnhdlc_vars *hdlc, u32 features);
 
 extern int	isdnhdlc_decode(struct isdnhdlc_vars *hdlc, const u8 *src,
 			int slen, int *count, u8 *dst, int dsize);
 
-extern void	isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, int is_d_channel,
-			int do_adapt56);
+extern void	isdnhdlc_out_init(struct isdnhdlc_vars *hdlc, u32 features);
 
 extern int	isdnhdlc_encode(struct isdnhdlc_vars *hdlc, const u8 *src,
 			u16 slen, int *count, u8 *dst, int dsize);
-- 
cgit v1.2.3


From fb286f0471a04ef646c8e5c79750ae6718183745 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Thu, 9 Jul 2009 10:02:29 +0200
Subject: mISDN: Make clearing B-channel a common function

Clearing B-channel is needed in every driver, so it makes sense
to have it as common function.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 16 +---------------
 drivers/isdn/hardware/mISDN/hfcpci.c   | 16 +---------------
 drivers/isdn/hardware/mISDN/hfcsusb.c  | 16 +---------------
 drivers/isdn/mISDN/hwchannel.c         | 15 +++++++++++++--
 include/linux/mISDNhw.h                |  1 +
 5 files changed, 17 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index e1dab30aed30..fd77bb15d790 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -3416,22 +3416,8 @@ deactivate_bchannel(struct bchannel *bch)
 	u_long			flags;
 
 	spin_lock_irqsave(&hc->lock, flags);
-	if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) {
-		dev_kfree_skb(bch->next_skb);
-		bch->next_skb = NULL;
-	}
-	if (bch->tx_skb) {
-		dev_kfree_skb(bch->tx_skb);
-		bch->tx_skb = NULL;
-	}
-	bch->tx_idx = 0;
-	if (bch->rx_skb) {
-		dev_kfree_skb(bch->rx_skb);
-		bch->rx_skb = NULL;
-	}
+	mISDN_clear_bchannel(bch);
 	hc->chan[bch->slot].coeff_count = 0;
-	test_and_clear_bit(FLG_ACTIVE, &bch->Flags);
-	test_and_clear_bit(FLG_TX_BUSY, &bch->Flags);
 	hc->chan[bch->slot].rx_off = 0;
 	hc->chan[bch->slot].conf = -1;
 	mode_hfcmulti(hc, bch->slot, ISDN_P_NONE, -1, 0, -1, 0);
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index 228ffbed1286..70e6b0e01121 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -1522,22 +1522,8 @@ deactivate_bchannel(struct bchannel *bch)
 	u_long		flags;
 
 	spin_lock_irqsave(&hc->lock, flags);
-	if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) {
-		dev_kfree_skb(bch->next_skb);
-		bch->next_skb = NULL;
-	}
-	if (bch->tx_skb) {
-		dev_kfree_skb(bch->tx_skb);
-		bch->tx_skb = NULL;
-	}
-	bch->tx_idx = 0;
-	if (bch->rx_skb) {
-		dev_kfree_skb(bch->rx_skb);
-		bch->rx_skb = NULL;
-	}
+	mISDN_clear_bchannel(bch);
 	mode_hfcpci(bch, bch->nr, ISDN_P_NONE);
-	test_and_clear_bit(FLG_ACTIVE, &bch->Flags);
-	test_and_clear_bit(FLG_TX_BUSY, &bch->Flags);
 	spin_unlock_irqrestore(&hc->lock, flags);
 }
 
diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 6b7704c41b94..fc46a26cb14f 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -1809,21 +1809,7 @@ deactivate_bchannel(struct bchannel *bch)
 		    hw->name, __func__, bch->nr);
 
 	spin_lock_irqsave(&hw->lock, flags);
-	if (test_and_clear_bit(FLG_TX_NEXT, &bch->Flags)) {
-		dev_kfree_skb(bch->next_skb);
-		bch->next_skb = NULL;
-	}
-	if (bch->tx_skb) {
-		dev_kfree_skb(bch->tx_skb);
-		bch->tx_skb = NULL;
-	}
-	bch->tx_idx = 0;
-	if (bch->rx_skb) {
-		dev_kfree_skb(bch->rx_skb);
-		bch->rx_skb = NULL;
-	}
-	clear_bit(FLG_ACTIVE, &bch->Flags);
-	clear_bit(FLG_TX_BUSY, &bch->Flags);
+	mISDN_clear_bchannel(bch);
 	spin_unlock_irqrestore(&hw->lock, flags);
 	hfcsusb_setup_bch(bch, ISDN_P_NONE);
 	hfcsusb_stop_endpoint(hw, bch->nr);
diff --git a/drivers/isdn/mISDN/hwchannel.c b/drivers/isdn/mISDN/hwchannel.c
index 0481a0cdf6db..e8049be552aa 100644
--- a/drivers/isdn/mISDN/hwchannel.c
+++ b/drivers/isdn/mISDN/hwchannel.c
@@ -114,13 +114,14 @@ mISDN_freedchannel(struct dchannel *ch)
 }
 EXPORT_SYMBOL(mISDN_freedchannel);
 
-int
-mISDN_freebchannel(struct bchannel *ch)
+void
+mISDN_clear_bchannel(struct bchannel *ch)
 {
 	if (ch->tx_skb) {
 		dev_kfree_skb(ch->tx_skb);
 		ch->tx_skb = NULL;
 	}
+	ch->tx_idx = 0;
 	if (ch->rx_skb) {
 		dev_kfree_skb(ch->rx_skb);
 		ch->rx_skb = NULL;
@@ -129,6 +130,16 @@ mISDN_freebchannel(struct bchannel *ch)
 		dev_kfree_skb(ch->next_skb);
 		ch->next_skb = NULL;
 	}
+	test_and_clear_bit(FLG_TX_BUSY, &ch->Flags);
+	test_and_clear_bit(FLG_TX_NEXT, &ch->Flags);
+	test_and_clear_bit(FLG_ACTIVE, &ch->Flags);
+}
+EXPORT_SYMBOL(mISDN_clear_bchannel);
+
+int
+mISDN_freebchannel(struct bchannel *ch)
+{
+	mISDN_clear_bchannel(ch);
 	skb_queue_purge(&ch->rqueue);
 	ch->rcount = 0;
 	flush_scheduled_work();
diff --git a/include/linux/mISDNhw.h b/include/linux/mISDNhw.h
index 7f9831da847f..4af841408fb5 100644
--- a/include/linux/mISDNhw.h
+++ b/include/linux/mISDNhw.h
@@ -168,6 +168,7 @@ struct bchannel {
 extern int	mISDN_initdchannel(struct dchannel *, int, void *);
 extern int	mISDN_initbchannel(struct bchannel *, int);
 extern int	mISDN_freedchannel(struct dchannel *);
+extern void	mISDN_clear_bchannel(struct bchannel *);
 extern int	mISDN_freebchannel(struct bchannel *);
 extern void	queue_ch_frame(struct mISDNchannel *, u_int,
 			int, struct sk_buff *);
-- 
cgit v1.2.3


From da2272c91ae81b41ae6fa6ebdc767a6cef73b770 Mon Sep 17 00:00:00 2001
From: Karsten Keil <keil@b1-systems.de>
Date: Wed, 22 Jul 2009 20:01:59 +0200
Subject: mISDN: Add support for Speedfax+ cards

Add support for the Siemens ISAR DSP chip and cards based on it,
including analog modem protocols.

Signed-off-by: Karsten Keil <keil@b1-systems.de>
---
 drivers/isdn/hardware/mISDN/Kconfig     |   13 +
 drivers/isdn/hardware/mISDN/Makefile    |    2 +
 drivers/isdn/hardware/mISDN/isar.h      |  269 +++++
 drivers/isdn/hardware/mISDN/mISDNisar.c | 1726 +++++++++++++++++++++++++++++++
 drivers/isdn/hardware/mISDN/speedfax.c  |  526 ++++++++++
 include/linux/mISDNif.h                 |   16 +-
 6 files changed, 2551 insertions(+), 1 deletion(-)
 create mode 100644 drivers/isdn/hardware/mISDN/isar.h
 create mode 100644 drivers/isdn/hardware/mISDN/mISDNisar.c
 create mode 100644 drivers/isdn/hardware/mISDN/speedfax.c

(limited to 'include')

diff --git a/drivers/isdn/hardware/mISDN/Kconfig b/drivers/isdn/hardware/mISDN/Kconfig
index 212dee6adec6..2600534be07e 100644
--- a/drivers/isdn/hardware/mISDN/Kconfig
+++ b/drivers/isdn/hardware/mISDN/Kconfig
@@ -47,6 +47,15 @@ config MISDN_AVMFRITZ
 	help
 	  Enable support for AVMs FRITZ!CARD PCI cards
 
+config MISDN_SPEEDFAX
+	tristate "Support for Sedlbauer Speedfax+"
+	depends on MISDN
+	depends on PCI
+	select MISDN_IPAC
+	select MISDN_ISAR
+	help
+	  Enable support for Sedlbauer Speedfax+.
+
 config MISDN_INFINEON
 	tristate "Support for cards with Infineon chipset"
 	depends on MISDN
@@ -61,3 +70,7 @@ config MISDN_IPAC
 	tristate
 	depends on MISDN
 
+config MISDN_ISAR
+	tristate
+	depends on MISDN
+
diff --git a/drivers/isdn/hardware/mISDN/Makefile b/drivers/isdn/hardware/mISDN/Makefile
index 8204d84af70a..e18f964e185b 100644
--- a/drivers/isdn/hardware/mISDN/Makefile
+++ b/drivers/isdn/hardware/mISDN/Makefile
@@ -7,6 +7,8 @@ obj-$(CONFIG_MISDN_HFCPCI) += hfcpci.o
 obj-$(CONFIG_MISDN_HFCMULTI) += hfcmulti.o
 obj-$(CONFIG_MISDN_HFCUSB) += hfcsusb.o
 obj-$(CONFIG_MISDN_AVMFRITZ) += avmfritz.o
+obj-$(CONFIG_MISDN_SPEEDFAX) += speedfax.o
 obj-$(CONFIG_MISDN_INFINEON) += mISDNinfineon.o
 # chip modules
 obj-$(CONFIG_MISDN_IPAC) += mISDNipac.o
+obj-$(CONFIG_MISDN_ISAR) += mISDNisar.o
diff --git a/drivers/isdn/hardware/mISDN/isar.h b/drivers/isdn/hardware/mISDN/isar.h
new file mode 100644
index 000000000000..092351acd2ab
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/isar.h
@@ -0,0 +1,269 @@
+/*
+ *
+ * isar.h   ISAR (Siemens PSB 7110) specific defines
+ *
+ * Author Karsten Keil (keil@isdn4linux.de)
+ *
+ * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include "iohelper.h"
+
+struct isar_hw;
+
+struct isar_ch {
+	struct bchannel		bch;
+	struct isar_hw		*is;
+	struct timer_list	ftimer;
+	u8			nr;
+	u8			dpath;
+	u8			mml;
+	u8			state;
+	u8			cmd;
+	u8			mod;
+	u8			newcmd;
+	u8			newmod;
+	u8			try_mod;
+	u8			conmsg[16];
+};
+
+struct isar_hw {
+	struct	isar_ch	ch[2];
+	void		*hw;
+	spinlock_t	*hwlock;	/* lock HW acccess */
+	char		*name;
+	struct module	*owner;
+	read_reg_t	*read_reg;
+	write_reg_t	*write_reg;
+	fifo_func_t	*read_fifo;
+	fifo_func_t	*write_fifo;
+	int		(*ctrl)(void *, u32, u_long);
+	void		(*release)(struct isar_hw *);
+	int		(*init)(struct isar_hw *);
+	int		(*open)(struct isar_hw *, struct channel_req *);
+	int		(*firmware)(struct isar_hw *, const u8 *, int);
+	unsigned long	Flags;
+	int		version;
+	u8		bstat;
+	u8		iis;
+	u8		cmsb;
+	u8		clsb;
+	u8		buf[256];
+	u8		log[256];
+};
+
+#define ISAR_IRQMSK	0x04
+#define ISAR_IRQSTA	0x04
+#define ISAR_IRQBIT	0x75
+#define ISAR_CTRL_H	0x61
+#define ISAR_CTRL_L	0x60
+#define ISAR_IIS	0x58
+#define ISAR_IIA	0x58
+#define ISAR_HIS	0x50
+#define ISAR_HIA	0x50
+#define ISAR_MBOX	0x4c
+#define ISAR_WADR	0x4a
+#define ISAR_RADR	0x48
+
+#define ISAR_HIS_VNR		0x14
+#define ISAR_HIS_DKEY		0x02
+#define ISAR_HIS_FIRM		0x1e
+#define ISAR_HIS_STDSP		0x08
+#define ISAR_HIS_DIAG		0x05
+#define ISAR_HIS_P0CFG		0x3c
+#define ISAR_HIS_P12CFG		0x24
+#define ISAR_HIS_SARTCFG	0x25
+#define ISAR_HIS_PUMPCFG	0x26
+#define ISAR_HIS_PUMPCTRL	0x2a
+#define ISAR_HIS_IOM2CFG	0x27
+#define ISAR_HIS_IOM2REQ	0x07
+#define ISAR_HIS_IOM2CTRL	0x2b
+#define ISAR_HIS_BSTREQ		0x0c
+#define ISAR_HIS_PSTREQ		0x0e
+#define ISAR_HIS_SDATA		0x20
+#define ISAR_HIS_DPS1		0x40
+#define ISAR_HIS_DPS2		0x80
+#define SET_DPS(x)		((x<<6) & 0xc0)
+
+#define ISAR_IIS_MSCMSD		0x3f
+#define ISAR_IIS_VNR		0x15
+#define ISAR_IIS_DKEY		0x03
+#define ISAR_IIS_FIRM		0x1f
+#define ISAR_IIS_STDSP		0x09
+#define ISAR_IIS_DIAG		0x25
+#define ISAR_IIS_GSTEV		0x00
+#define ISAR_IIS_BSTEV		0x28
+#define ISAR_IIS_BSTRSP		0x2c
+#define ISAR_IIS_PSTRSP		0x2e
+#define ISAR_IIS_PSTEV		0x2a
+#define ISAR_IIS_IOM2RSP	0x27
+#define ISAR_IIS_RDATA		0x20
+#define ISAR_IIS_INVMSG		0x3f
+
+#define ISAR_CTRL_SWVER	0x10
+#define ISAR_CTRL_STST	0x40
+
+#define ISAR_MSG_HWVER	0x20
+
+#define ISAR_DP1_USE	1
+#define ISAR_DP2_USE	2
+#define ISAR_RATE_REQ	3
+
+#define PMOD_DISABLE	0
+#define PMOD_FAX	1
+#define PMOD_DATAMODEM	2
+#define PMOD_HALFDUPLEX	3
+#define PMOD_V110	4
+#define PMOD_DTMF	5
+#define PMOD_DTMF_TRANS	6
+#define PMOD_BYPASS	7
+
+#define PCTRL_ORIG	0x80
+#define PV32P2_V23R	0x40
+#define PV32P2_V22A	0x20
+#define PV32P2_V22B	0x10
+#define PV32P2_V22C	0x08
+#define PV32P2_V21	0x02
+#define PV32P2_BEL	0x01
+
+/* LSB MSB in ISAR doc wrong !!! Arghhh */
+#define PV32P3_AMOD	0x80
+#define PV32P3_V32B	0x02
+#define PV32P3_V23B	0x01
+#define PV32P4_48	0x11
+#define PV32P5_48	0x05
+#define PV32P4_UT48	0x11
+#define PV32P5_UT48	0x0d
+#define PV32P4_96	0x11
+#define PV32P5_96	0x03
+#define PV32P4_UT96	0x11
+#define PV32P5_UT96	0x0f
+#define PV32P4_B96	0x91
+#define PV32P5_B96	0x0b
+#define PV32P4_UTB96	0xd1
+#define PV32P5_UTB96	0x0f
+#define PV32P4_120	0xb1
+#define PV32P5_120	0x09
+#define PV32P4_UT120	0xf1
+#define PV32P5_UT120	0x0f
+#define PV32P4_144	0x99
+#define PV32P5_144	0x09
+#define PV32P4_UT144	0xf9
+#define PV32P5_UT144	0x0f
+#define PV32P6_CTN	0x01
+#define PV32P6_ATN	0x02
+
+#define PFAXP2_CTN	0x01
+#define PFAXP2_ATN	0x04
+
+#define PSEV_10MS_TIMER	0x02
+#define PSEV_CON_ON	0x18
+#define PSEV_CON_OFF	0x19
+#define PSEV_V24_OFF	0x20
+#define PSEV_CTS_ON	0x21
+#define PSEV_CTS_OFF	0x22
+#define PSEV_DCD_ON	0x23
+#define PSEV_DCD_OFF	0x24
+#define PSEV_DSR_ON	0x25
+#define PSEV_DSR_OFF	0x26
+#define PSEV_REM_RET	0xcc
+#define PSEV_REM_REN	0xcd
+#define PSEV_GSTN_CLR	0xd4
+
+#define PSEV_RSP_READY	0xbc
+#define PSEV_LINE_TX_H	0xb3
+#define PSEV_LINE_TX_B	0xb2
+#define PSEV_LINE_RX_H	0xb1
+#define PSEV_LINE_RX_B	0xb0
+#define PSEV_RSP_CONN	0xb5
+#define PSEV_RSP_DISC	0xb7
+#define PSEV_RSP_FCERR	0xb9
+#define PSEV_RSP_SILDET	0xbe
+#define PSEV_RSP_SILOFF	0xab
+#define PSEV_FLAGS_DET	0xba
+
+#define PCTRL_CMD_TDTMF	0x5a
+
+#define PCTRL_CMD_FTH	0xa7
+#define PCTRL_CMD_FRH	0xa5
+#define PCTRL_CMD_FTM	0xa8
+#define PCTRL_CMD_FRM	0xa6
+#define PCTRL_CMD_SILON	0xac
+#define PCTRL_CMD_CONT	0xa2
+#define PCTRL_CMD_ESC	0xa4
+#define PCTRL_CMD_SILOFF 0xab
+#define PCTRL_CMD_HALT	0xa9
+
+#define PCTRL_LOC_RET	0xcf
+#define PCTRL_LOC_REN	0xce
+
+#define SMODE_DISABLE	0
+#define SMODE_V14	2
+#define SMODE_HDLC	3
+#define SMODE_BINARY	4
+#define SMODE_FSK_V14	5
+
+#define SCTRL_HDMC_BOTH	0x00
+#define SCTRL_HDMC_DTX	0x80
+#define SCTRL_HDMC_DRX	0x40
+#define S_P1_OVSP	0x40
+#define S_P1_SNP	0x20
+#define S_P1_EOP	0x10
+#define S_P1_EDP	0x08
+#define S_P1_NSB	0x04
+#define S_P1_CHS_8	0x03
+#define S_P1_CHS_7	0x02
+#define S_P1_CHS_6	0x01
+#define S_P1_CHS_5	0x00
+
+#define S_P2_BFT_DEF	0x10
+
+#define IOM_CTRL_ENA	0x80
+#define IOM_CTRL_NOPCM	0x00
+#define IOM_CTRL_ALAW	0x02
+#define IOM_CTRL_ULAW	0x04
+#define IOM_CTRL_RCV	0x01
+
+#define IOM_P1_TXD	0x10
+
+#define HDLC_FED	0x40
+#define HDLC_FSD	0x20
+#define HDLC_FST	0x20
+#define HDLC_ERROR	0x1c
+#define HDLC_ERR_FAD	0x10
+#define HDLC_ERR_RER	0x08
+#define HDLC_ERR_CER	0x04
+#define SART_NMD	0x01
+
+#define BSTAT_RDM0	0x1
+#define BSTAT_RDM1	0x2
+#define BSTAT_RDM2	0x4
+#define BSTAT_RDM3	0x8
+#define BSTEV_TBO	0x1f
+#define BSTEV_RBO	0x2f
+
+/* FAX State Machine */
+#define STFAX_NULL	0
+#define STFAX_READY	1
+#define STFAX_LINE	2
+#define STFAX_CONT	3
+#define STFAX_ACTIV	4
+#define STFAX_ESCAPE	5
+#define STFAX_SILDET	6
+
+extern u32 mISDNisar_init(struct isar_hw *, void *);
+extern void mISDNisar_irq(struct isar_hw *);
diff --git a/drivers/isdn/hardware/mISDN/mISDNisar.c b/drivers/isdn/hardware/mISDN/mISDNisar.c
new file mode 100644
index 000000000000..de352a17673a
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/mISDNisar.c
@@ -0,0 +1,1726 @@
+/*
+ * mISDNisar.c   ISAR (Siemens PSB 7110) specific functions
+ *
+ * Author Karsten Keil (keil@isdn4linux.de)
+ *
+ * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+/* define this to enable static debug messages, if you kernel supports
+ * dynamic debugging, you should use debugfs for this
+ */
+/* #define DEBUG */
+
+#include <linux/delay.h>
+#include <linux/vmalloc.h>
+#include <linux/mISDNhw.h>
+#include "isar.h"
+
+#define ISAR_REV	"2.1"
+
+MODULE_AUTHOR("Karsten Keil");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(ISAR_REV);
+
+#define DEBUG_HW_FIRMWARE_FIFO	0x10000
+
+static const u8 faxmodulation_s[] = "3,24,48,72,73,74,96,97,98,121,122,145,146";
+static const u8 faxmodulation[] = {3, 24, 48, 72, 73, 74, 96, 97, 98, 121,
+					122, 145, 146};
+#define FAXMODCNT 13
+
+static void isar_setup(struct isar_hw *);
+
+static inline int
+waitforHIA(struct isar_hw *isar, int timeout)
+{
+	int t = timeout;
+	u8 val = isar->read_reg(isar->hw, ISAR_HIA);
+
+	while ((val & 1) && t) {
+		udelay(1);
+		t--;
+		val = isar->read_reg(isar->hw, ISAR_HIA);
+	}
+	pr_debug("%s: HIA after %dus\n", isar->name, timeout - t);
+	return timeout;
+}
+
+/*
+ * send msg to ISAR mailbox
+ * if msg is NULL use isar->buf
+ */
+static int
+send_mbox(struct isar_hw *isar, u8 his, u8 creg, u8 len, u8 *msg)
+{
+	if (!waitforHIA(isar, 1000))
+		return 0;
+	pr_debug("send_mbox(%02x,%02x,%d)\n", his, creg, len);
+	isar->write_reg(isar->hw, ISAR_CTRL_H, creg);
+	isar->write_reg(isar->hw, ISAR_CTRL_L, len);
+	isar->write_reg(isar->hw, ISAR_WADR, 0);
+	if (!msg)
+		msg = isar->buf;
+	if (msg && len) {
+		isar->write_fifo(isar->hw, ISAR_MBOX, msg, len);
+		if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) {
+			int l = 0;
+
+			while (l < (int)len) {
+				hex_dump_to_buffer(msg + l, len - l, 32, 1,
+					isar->log, 256, 1);
+				pr_debug("%s: %s %02x: %s\n", isar->name,
+					__func__, l, isar->log);
+				l += 32;
+			}
+		}
+	}
+	isar->write_reg(isar->hw, ISAR_HIS, his);
+	waitforHIA(isar, 1000);
+	return 1;
+}
+
+/*
+ * receive message from ISAR mailbox
+ * if msg is NULL use isar->buf
+ */
+static void
+rcv_mbox(struct isar_hw *isar, u8 *msg)
+{
+	if (!msg)
+		msg = isar->buf;
+	isar->write_reg(isar->hw, ISAR_RADR, 0);
+	if (msg && isar->clsb) {
+		isar->read_fifo(isar->hw, ISAR_MBOX, msg, isar->clsb);
+		if (isar->ch[0].bch.debug & DEBUG_HW_BFIFO) {
+			int l = 0;
+
+			while (l < (int)isar->clsb) {
+				hex_dump_to_buffer(msg + l, isar->clsb - l, 32,
+					1, isar->log, 256, 1);
+				pr_debug("%s: %s %02x: %s\n", isar->name,
+					__func__, l, isar->log);
+				l += 32;
+			}
+		}
+	}
+	isar->write_reg(isar->hw, ISAR_IIA, 0);
+}
+
+static inline void
+get_irq_infos(struct isar_hw *isar)
+{
+	isar->iis = isar->read_reg(isar->hw, ISAR_IIS);
+	isar->cmsb = isar->read_reg(isar->hw, ISAR_CTRL_H);
+	isar->clsb = isar->read_reg(isar->hw, ISAR_CTRL_L);
+	pr_debug("%s: rcv_mbox(%02x,%02x,%d)\n", isar->name,
+		isar->iis, isar->cmsb, isar->clsb);
+}
+
+/*
+ * poll answer message from ISAR mailbox
+ * should be used only with ISAR IRQs disabled before DSP was started
+ *
+ */
+static int
+poll_mbox(struct isar_hw *isar, int maxdelay)
+{
+	int t = maxdelay;
+	u8 irq;
+
+	irq = isar->read_reg(isar->hw, ISAR_IRQBIT);
+	while (t && !(irq & ISAR_IRQSTA)) {
+		udelay(1);
+		t--;
+	}
+	if (t)	{
+		get_irq_infos(isar);
+		rcv_mbox(isar, NULL);
+	}
+	pr_debug("%s: pulled %d bytes after %d us\n",
+		isar->name, isar->clsb, maxdelay - t);
+	return t;
+}
+
+static int
+ISARVersion(struct isar_hw *isar)
+{
+	int ver;
+
+	/* disable ISAR IRQ */
+	isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
+	isar->buf[0] = ISAR_MSG_HWVER;
+	isar->buf[1] = 0;
+	isar->buf[2] = 1;
+	if (!send_mbox(isar, ISAR_HIS_VNR, 0, 3, NULL))
+		return -1;
+	if (!poll_mbox(isar, 1000))
+		return -2;
+	if (isar->iis == ISAR_IIS_VNR) {
+		if (isar->clsb == 1) {
+			ver = isar->buf[0] & 0xf;
+			return ver;
+		}
+		return -3;
+	}
+	return -4;
+}
+
+static int
+load_firmware(struct isar_hw *isar, const u8 *buf, int size)
+{
+	u32	saved_debug = isar->ch[0].bch.debug;
+	int	ret, cnt;
+	u8	nom, noc;
+	u16	left, val, *sp = (u16 *)buf;
+	u8	*mp;
+	u_long	flags;
+
+	struct {
+		u16 sadr;
+		u16 len;
+		u16 d_key;
+	} blk_head;
+
+	if (1 != isar->version) {
+		pr_err("%s: ISAR wrong version %d firmware download aborted\n",
+			isar->name, isar->version);
+		return -EINVAL;
+	}
+	if (!(saved_debug & DEBUG_HW_FIRMWARE_FIFO))
+		isar->ch[0].bch.debug &= ~DEBUG_HW_BFIFO;
+	pr_debug("%s: load firmware %d words (%d bytes)\n",
+		isar->name, size/2, size);
+	cnt = 0;
+	size /= 2;
+	/* disable ISAR IRQ */
+	spin_lock_irqsave(isar->hwlock, flags);
+	isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	while (cnt < size) {
+		blk_head.sadr = le16_to_cpu(*sp++);
+		blk_head.len = le16_to_cpu(*sp++);
+		blk_head.d_key = le16_to_cpu(*sp++);
+		cnt += 3;
+		pr_debug("ISAR firmware block (%#x,%d,%#x)\n",
+			blk_head.sadr, blk_head.len, blk_head.d_key & 0xff);
+		left = blk_head.len;
+		if (cnt + left > size) {
+			pr_info("%s: firmware error have %d need %d words\n",
+				isar->name, size, cnt + left);
+			ret = -EINVAL;
+			goto reterrflg;
+		}
+		spin_lock_irqsave(isar->hwlock, flags);
+		if (!send_mbox(isar, ISAR_HIS_DKEY, blk_head.d_key & 0xff,
+		    0, NULL)) {
+			pr_info("ISAR send_mbox dkey failed\n");
+			ret = -ETIME;
+			goto reterror;
+		}
+		if (!poll_mbox(isar, 1000)) {
+			pr_warning("ISAR poll_mbox dkey failed\n");
+			ret = -ETIME;
+			goto reterror;
+		}
+		spin_unlock_irqrestore(isar->hwlock, flags);
+		if ((isar->iis != ISAR_IIS_DKEY) || isar->cmsb || isar->clsb) {
+			pr_info("ISAR wrong dkey response (%x,%x,%x)\n",
+				isar->iis, isar->cmsb, isar->clsb);
+			ret = 1;
+			goto reterrflg;
+		}
+		while (left > 0) {
+			if (left > 126)
+				noc = 126;
+			else
+				noc = left;
+			nom = (2 * noc) + 3;
+			mp  = isar->buf;
+			/* the ISAR is big endian */
+			*mp++ = blk_head.sadr >> 8;
+			*mp++ = blk_head.sadr & 0xFF;
+			left -= noc;
+			cnt += noc;
+			*mp++ = noc;
+			pr_debug("%s: load %3d words at %04x\n", isar->name,
+				noc, blk_head.sadr);
+			blk_head.sadr += noc;
+			while (noc) {
+				val = le16_to_cpu(*sp++);
+				*mp++ = val >> 8;
+				*mp++ = val & 0xFF;;
+				noc--;
+			}
+			spin_lock_irqsave(isar->hwlock, flags);
+			if (!send_mbox(isar, ISAR_HIS_FIRM, 0, nom, NULL)) {
+				pr_info("ISAR send_mbox prog failed\n");
+				ret = -ETIME;
+				goto reterror;
+			}
+			if (!poll_mbox(isar, 1000)) {
+				pr_info("ISAR poll_mbox prog failed\n");
+				ret = -ETIME;
+				goto reterror;
+			}
+			spin_unlock_irqrestore(isar->hwlock, flags);
+			if ((isar->iis != ISAR_IIS_FIRM) ||
+			    isar->cmsb || isar->clsb) {
+				pr_info("ISAR wrong prog response (%x,%x,%x)\n",
+					isar->iis, isar->cmsb, isar->clsb);
+				ret = -EIO;
+				goto reterrflg;
+			}
+		}
+		pr_debug("%s: ISAR firmware block %d words loaded\n",
+			isar->name, blk_head.len);
+	}
+	isar->ch[0].bch.debug = saved_debug;
+	/* 10ms delay */
+	cnt = 10;
+	while (cnt--)
+		mdelay(1);
+	isar->buf[0] = 0xff;
+	isar->buf[1] = 0xfe;
+	isar->bstat = 0;
+	spin_lock_irqsave(isar->hwlock, flags);
+	if (!send_mbox(isar, ISAR_HIS_STDSP, 0, 2, NULL)) {
+		pr_info("ISAR send_mbox start dsp failed\n");
+		ret = -ETIME;
+		goto reterror;
+	}
+	if (!poll_mbox(isar, 1000)) {
+		pr_info("ISAR poll_mbox start dsp failed\n");
+		ret = -ETIME;
+		goto reterror;
+	}
+	if ((isar->iis != ISAR_IIS_STDSP) || isar->cmsb || isar->clsb) {
+		pr_info("ISAR wrong start dsp response (%x,%x,%x)\n",
+			isar->iis, isar->cmsb, isar->clsb);
+		ret = -EIO;
+		goto reterror;
+	} else
+		pr_debug("%s: ISAR start dsp success\n", isar->name);
+
+	/* NORMAL mode entered */
+	/* Enable IRQs of ISAR */
+	isar->write_reg(isar->hw, ISAR_IRQBIT, ISAR_IRQSTA);
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	cnt = 1000; /* max 1s */
+	while ((!isar->bstat) && cnt) {
+		mdelay(1);
+		cnt--;
+	}
+	if (!cnt) {
+		pr_info("ISAR no general status event received\n");
+		ret = -ETIME;
+		goto reterrflg;
+	} else
+		pr_debug("%s: ISAR general status event %x\n",
+			isar->name, isar->bstat);
+	/* 10ms delay */
+	cnt = 10;
+	while (cnt--)
+		mdelay(1);
+	isar->iis = 0;
+	spin_lock_irqsave(isar->hwlock, flags);
+	if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_STST, 0, NULL)) {
+		pr_info("ISAR send_mbox self tst failed\n");
+		ret = -ETIME;
+		goto reterror;
+	}
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	cnt = 10000; /* max 100 ms */
+	while ((isar->iis != ISAR_IIS_DIAG) && cnt) {
+		udelay(10);
+		cnt--;
+	}
+	mdelay(1);
+	if (!cnt) {
+		pr_info("ISAR no self tst response\n");
+		ret = -ETIME;
+		goto reterrflg;
+	}
+	if ((isar->cmsb == ISAR_CTRL_STST) && (isar->clsb == 1)
+	    && (isar->buf[0] == 0))
+		pr_debug("%s: ISAR selftest OK\n", isar->name);
+	else {
+		pr_info("ISAR selftest not OK %x/%x/%x\n",
+			isar->cmsb, isar->clsb, isar->buf[0]);
+		ret = -EIO;
+		goto reterrflg;
+	}
+	spin_lock_irqsave(isar->hwlock, flags);
+	isar->iis = 0;
+	if (!send_mbox(isar, ISAR_HIS_DIAG, ISAR_CTRL_SWVER, 0, NULL)) {
+		pr_info("ISAR RQST SVN failed\n");
+		ret = -ETIME;
+		goto reterror;
+	}
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	cnt = 30000; /* max 300 ms */
+	while ((isar->iis != ISAR_IIS_DIAG) && cnt) {
+		udelay(10);
+		cnt--;
+	}
+	mdelay(1);
+	if (!cnt) {
+		pr_info("ISAR no SVN response\n");
+		ret = -ETIME;
+		goto reterrflg;
+	} else {
+		if ((isar->cmsb == ISAR_CTRL_SWVER) && (isar->clsb == 1)) {
+			pr_notice("%s: ISAR software version %#x\n",
+				isar->name, isar->buf[0]);
+		} else {
+			pr_info("%s: ISAR wrong swver response (%x,%x)"
+				" cnt(%d)\n", isar->name, isar->cmsb,
+				isar->clsb, cnt);
+			ret = -EIO;
+			goto reterrflg;
+		}
+	}
+	spin_lock_irqsave(isar->hwlock, flags);
+	isar_setup(isar);
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	ret = 0;
+reterrflg:
+	spin_lock_irqsave(isar->hwlock, flags);
+reterror:
+	isar->ch[0].bch.debug = saved_debug;
+	if (ret)
+		/* disable ISAR IRQ */
+		isar->write_reg(isar->hw, ISAR_IRQBIT, 0);
+	spin_unlock_irqrestore(isar->hwlock, flags);
+	return ret;
+}
+
+static inline void
+deliver_status(struct isar_ch *ch, int status)
+{
+	pr_debug("%s: HL->LL FAXIND %x\n", ch->is->name, status);
+	_queue_data(&ch->bch.ch, PH_CONTROL_IND, status, 0, NULL, GFP_ATOMIC);
+}
+
+static inline void
+isar_rcv_frame(struct isar_ch *ch)
+{
+	u8		*ptr;
+
+	if (!ch->is->clsb) {
+		pr_debug("%s; ISAR zero len frame\n", ch->is->name);
+		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+		return;
+	}
+	switch (ch->bch.state) {
+	case ISDN_P_NONE:
+		pr_debug("%s: ISAR protocol 0 spurious IIS_RDATA %x/%x/%x\n",
+			ch->is->name, ch->is->iis, ch->is->cmsb, ch->is->clsb);
+		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+		break;
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_L2DTMF:
+	case ISDN_P_B_MODEM_ASYNC:
+		if (!ch->bch.rx_skb) {
+			ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen,
+						GFP_ATOMIC);
+			if (unlikely(!ch->bch.rx_skb)) {
+				pr_info("%s: B receive out of memory\n",
+					ch->is->name);
+				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+				break;
+			}
+		}
+		rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb));
+		recv_Bchannel(&ch->bch, 0);
+		break;
+	case ISDN_P_B_HDLC:
+		if (!ch->bch.rx_skb) {
+			ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen,
+						GFP_ATOMIC);
+			if (unlikely(!ch->bch.rx_skb)) {
+				pr_info("%s: B receive out of memory\n",
+					ch->is->name);
+				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+				break;
+			}
+		}
+		if ((ch->bch.rx_skb->len + ch->is->clsb) >
+		    (ch->bch.maxlen + 2)) {
+			pr_debug("%s: incoming packet too large\n",
+				ch->is->name);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			skb_trim(ch->bch.rx_skb, 0);
+			break;
+		}
+		if (ch->is->cmsb & HDLC_ERROR) {
+			pr_debug("%s: ISAR frame error %x len %d\n",
+				ch->is->name, ch->is->cmsb, ch->is->clsb);
+#ifdef ERROR_STATISTIC
+			if (ch->is->cmsb & HDLC_ERR_RER)
+				ch->bch.err_inv++;
+			if (ch->is->cmsb & HDLC_ERR_CER)
+				ch->bch.err_crc++;
+#endif
+			skb_trim(ch->bch.rx_skb, 0);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			break;
+		}
+		if (ch->is->cmsb & HDLC_FSD)
+			skb_trim(ch->bch.rx_skb, 0);
+		ptr = skb_put(ch->bch.rx_skb, ch->is->clsb);
+		rcv_mbox(ch->is, ptr);
+		if (ch->is->cmsb & HDLC_FED) {
+			if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */
+				pr_debug("%s: ISAR frame to short %d\n",
+					ch->is->name, ch->bch.rx_skb->len);
+				skb_trim(ch->bch.rx_skb, 0);
+				break;
+			}
+			skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2);
+			recv_Bchannel(&ch->bch, 0);
+		}
+		break;
+	case ISDN_P_B_T30_FAX:
+		if (ch->state != STFAX_ACTIV) {
+			pr_debug("%s: isar_rcv_frame: not ACTIV\n",
+				ch->is->name);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			if (ch->bch.rx_skb)
+				skb_trim(ch->bch.rx_skb, 0);
+			break;
+		}
+		if (!ch->bch.rx_skb) {
+			ch->bch.rx_skb = mI_alloc_skb(ch->bch.maxlen,
+						GFP_ATOMIC);
+			if (unlikely(!ch->bch.rx_skb)) {
+				pr_info("%s: B receive out of memory\n",
+					__func__);
+				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+				break;
+			}
+		}
+		if (ch->cmd == PCTRL_CMD_FRM) {
+			rcv_mbox(ch->is, skb_put(ch->bch.rx_skb, ch->is->clsb));
+			pr_debug("%s: isar_rcv_frame: %d\n",
+				ch->is->name, ch->bch.rx_skb->len);
+			if (ch->is->cmsb & SART_NMD) { /* ABORT */
+				pr_debug("%s: isar_rcv_frame: no more data\n",
+					ch->is->name);
+				ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+				send_mbox(ch->is, SET_DPS(ch->dpath) |
+					ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC,
+					0, NULL);
+				ch->state = STFAX_ESCAPE;
+				/* set_skb_flag(skb, DF_NOMOREDATA); */
+			}
+			recv_Bchannel(&ch->bch, 0);
+			if (ch->is->cmsb & SART_NMD)
+				deliver_status(ch, HW_MOD_NOCARR);
+			break;
+		}
+		if (ch->cmd != PCTRL_CMD_FRH) {
+			pr_debug("%s: isar_rcv_frame: unknown fax mode %x\n",
+				ch->is->name, ch->cmd);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			if (ch->bch.rx_skb)
+				skb_trim(ch->bch.rx_skb, 0);
+			break;
+		}
+		/* PCTRL_CMD_FRH */
+		if ((ch->bch.rx_skb->len + ch->is->clsb) >
+		    (ch->bch.maxlen + 2)) {
+			pr_info("%s: %s incoming packet too large\n",
+				ch->is->name, __func__);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			skb_trim(ch->bch.rx_skb, 0);
+			break;
+		}  else if (ch->is->cmsb & HDLC_ERROR) {
+			pr_info("%s: ISAR frame error %x len %d\n",
+				ch->is->name, ch->is->cmsb, ch->is->clsb);
+			skb_trim(ch->bch.rx_skb, 0);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			break;
+		}
+		if (ch->is->cmsb & HDLC_FSD)
+			skb_trim(ch->bch.rx_skb, 0);
+		ptr = skb_put(ch->bch.rx_skb, ch->is->clsb);
+		rcv_mbox(ch->is, ptr);
+		if (ch->is->cmsb & HDLC_FED) {
+			if (ch->bch.rx_skb->len < 3) { /* last 2 are the FCS */
+				pr_info("%s: ISAR frame to short %d\n",
+					ch->is->name, ch->bch.rx_skb->len);
+				skb_trim(ch->bch.rx_skb, 0);
+				break;
+			}
+			skb_trim(ch->bch.rx_skb, ch->bch.rx_skb->len - 2);
+			recv_Bchannel(&ch->bch, 0);
+		}
+		if (ch->is->cmsb & SART_NMD) { /* ABORT */
+			pr_debug("%s: isar_rcv_frame: no more data\n",
+				ch->is->name);
+			ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+			if (ch->bch.rx_skb)
+				skb_trim(ch->bch.rx_skb, 0);
+			send_mbox(ch->is, SET_DPS(ch->dpath) |
+				ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC, 0, NULL);
+			ch->state = STFAX_ESCAPE;
+			deliver_status(ch, HW_MOD_NOCARR);
+		}
+		break;
+	default:
+		pr_info("isar_rcv_frame protocol (%x)error\n", ch->bch.state);
+		ch->is->write_reg(ch->is->hw, ISAR_IIA, 0);
+		break;
+	}
+}
+
+static void
+isar_fill_fifo(struct isar_ch *ch)
+{
+	int count;
+	u8 msb;
+	u8 *ptr;
+
+	pr_debug("%s: ch%d  tx_skb %p tx_idx %d\n",
+		ch->is->name, ch->bch.nr, ch->bch.tx_skb, ch->bch.tx_idx);
+	if (!ch->bch.tx_skb)
+		return;
+	count = ch->bch.tx_skb->len - ch->bch.tx_idx;
+	if (count <= 0)
+		return;
+	if (!(ch->is->bstat &
+		(ch->dpath == 1 ? BSTAT_RDM1 : BSTAT_RDM2)))
+		return;
+	if (count > ch->mml) {
+		msb = 0;
+		count = ch->mml;
+	} else {
+		msb = HDLC_FED;
+	}
+	ptr = ch->bch.tx_skb->data + ch->bch.tx_idx;
+	if (!ch->bch.tx_idx) {
+		pr_debug("%s: frame start\n", ch->is->name);
+		if ((ch->bch.state == ISDN_P_B_T30_FAX) &&
+			(ch->cmd == PCTRL_CMD_FTH)) {
+			if (count > 1) {
+				if ((ptr[0] == 0xff) && (ptr[1] == 0x13)) {
+					/* last frame */
+					test_and_set_bit(FLG_LASTDATA,
+						&ch->bch.Flags);
+					pr_debug("%s: set LASTDATA\n",
+						ch->is->name);
+					if (msb == HDLC_FED)
+						test_and_set_bit(FLG_DLEETX,
+							&ch->bch.Flags);
+				}
+			}
+		}
+		msb |= HDLC_FST;
+	}
+	ch->bch.tx_idx += count;
+	switch (ch->bch.state) {
+	case ISDN_P_NONE:
+		pr_info("%s: wrong protocol 0\n", __func__);
+		break;
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_L2DTMF:
+	case ISDN_P_B_MODEM_ASYNC:
+		send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
+			0, count, ptr);
+		break;
+	case ISDN_P_B_HDLC:
+		send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
+			msb, count, ptr);
+		break;
+	case ISDN_P_B_T30_FAX:
+		if (ch->state != STFAX_ACTIV)
+			pr_debug("%s: not ACTIV\n", ch->is->name);
+		else if (ch->cmd == PCTRL_CMD_FTH)
+			send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
+				msb, count, ptr);
+		else if (ch->cmd == PCTRL_CMD_FTM)
+			send_mbox(ch->is, SET_DPS(ch->dpath) | ISAR_HIS_SDATA,
+				0, count, ptr);
+		else
+			pr_debug("%s: not FTH/FTM\n", ch->is->name);
+		break;
+	default:
+		pr_info("%s: protocol(%x) error\n",
+			__func__, ch->bch.state);
+		break;
+	}
+}
+
+static inline struct isar_ch *
+sel_bch_isar(struct isar_hw *isar, u8 dpath)
+{
+	struct isar_ch	*base = &isar->ch[0];
+
+	if ((!dpath) || (dpath > 2))
+		return NULL;
+	if (base->dpath == dpath)
+		return base;
+	base++;
+	if (base->dpath == dpath)
+		return base;
+	return NULL;
+}
+
+static void
+send_next(struct isar_ch *ch)
+{
+	pr_debug("%s: %s ch%d tx_skb %p tx_idx %d\n",
+		ch->is->name, __func__, ch->bch.nr,
+		ch->bch.tx_skb, ch->bch.tx_idx);
+	if (ch->bch.state == ISDN_P_B_T30_FAX) {
+		if (ch->cmd == PCTRL_CMD_FTH) {
+			if (test_bit(FLG_LASTDATA, &ch->bch.Flags)) {
+				pr_debug("set NMD_DATA\n");
+				test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags);
+			}
+		} else if (ch->cmd == PCTRL_CMD_FTM) {
+			if (test_bit(FLG_DLEETX, &ch->bch.Flags)) {
+				test_and_set_bit(FLG_LASTDATA, &ch->bch.Flags);
+				test_and_set_bit(FLG_NMD_DATA, &ch->bch.Flags);
+			}
+		}
+	}
+	if (ch->bch.tx_skb) {
+		/* send confirm, on trans, free on hdlc. */
+		if (test_bit(FLG_TRANSPARENT, &ch->bch.Flags))
+			confirm_Bsend(&ch->bch);
+		dev_kfree_skb(ch->bch.tx_skb);
+	}
+	if (get_next_bframe(&ch->bch))
+		isar_fill_fifo(ch);
+	else {
+		if (test_and_clear_bit(FLG_DLEETX, &ch->bch.Flags)) {
+			if (test_and_clear_bit(FLG_LASTDATA,
+			    &ch->bch.Flags)) {
+				if (test_and_clear_bit(FLG_NMD_DATA,
+				    &ch->bch.Flags)) {
+					u8 zd = 0;
+					send_mbox(ch->is, SET_DPS(ch->dpath) |
+						ISAR_HIS_SDATA, 0x01, 1, &zd);
+				}
+				test_and_set_bit(FLG_LL_OK, &ch->bch.Flags);
+			} else {
+				deliver_status(ch, HW_MOD_CONNECT);
+			}
+		}
+	}
+}
+
+static void
+check_send(struct isar_hw *isar, u8 rdm)
+{
+	struct isar_ch	*ch;
+
+	pr_debug("%s: rdm %x\n", isar->name, rdm);
+	if (rdm & BSTAT_RDM1) {
+		ch = sel_bch_isar(isar, 1);
+		if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) {
+			if (ch->bch.tx_skb && (ch->bch.tx_skb->len >
+			    ch->bch.tx_idx))
+				isar_fill_fifo(ch);
+			else
+				send_next(ch);
+		}
+	}
+	if (rdm & BSTAT_RDM2) {
+		ch = sel_bch_isar(isar, 2);
+		if (ch && test_bit(FLG_ACTIVE, &ch->bch.Flags)) {
+			if (ch->bch.tx_skb && (ch->bch.tx_skb->len >
+			    ch->bch.tx_idx))
+				isar_fill_fifo(ch);
+			else
+				send_next(ch);
+		}
+	}
+}
+
+const char *dmril[] = {"NO SPEED", "1200/75", "NODEF2", "75/1200", "NODEF4",
+			"300", "600", "1200", "2400", "4800", "7200",
+			"9600nt", "9600t", "12000", "14400", "WRONG"};
+const char *dmrim[] = {"NO MOD", "NO DEF", "V32/V32b", "V22", "V21",
+			"Bell103", "V23", "Bell202", "V17", "V29", "V27ter"};
+
+static void
+isar_pump_status_rsp(struct isar_ch *ch) {
+	u8 ril = ch->is->buf[0];
+	u8 rim;
+
+	if (!test_and_clear_bit(ISAR_RATE_REQ, &ch->is->Flags))
+		return;
+	if (ril > 14) {
+		pr_info("%s: wrong pstrsp ril=%d\n", ch->is->name, ril);
+		ril = 15;
+	}
+	switch (ch->is->buf[1]) {
+	case 0:
+		rim = 0;
+		break;
+	case 0x20:
+		rim = 2;
+		break;
+	case 0x40:
+		rim = 3;
+		break;
+	case 0x41:
+		rim = 4;
+		break;
+	case 0x51:
+		rim = 5;
+		break;
+	case 0x61:
+		rim = 6;
+		break;
+	case 0x71:
+		rim = 7;
+		break;
+	case 0x82:
+		rim = 8;
+		break;
+	case 0x92:
+		rim = 9;
+		break;
+	case 0xa2:
+		rim = 10;
+		break;
+	default:
+		rim = 1;
+		break;
+	}
+	sprintf(ch->conmsg, "%s %s", dmril[ril], dmrim[rim]);
+	pr_debug("%s: pump strsp %s\n", ch->is->name, ch->conmsg);
+}
+
+static void
+isar_pump_statev_modem(struct isar_ch *ch, u8 devt) {
+	u8 dps = SET_DPS(ch->dpath);
+
+	switch (devt) {
+	case PSEV_10MS_TIMER:
+		pr_debug("%s: pump stev TIMER\n", ch->is->name);
+		break;
+	case PSEV_CON_ON:
+		pr_debug("%s: pump stev CONNECT\n", ch->is->name);
+		deliver_status(ch, HW_MOD_CONNECT);
+		break;
+	case PSEV_CON_OFF:
+		pr_debug("%s: pump stev NO CONNECT\n", ch->is->name);
+		send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
+		deliver_status(ch, HW_MOD_NOCARR);
+		break;
+	case PSEV_V24_OFF:
+		pr_debug("%s: pump stev V24 OFF\n", ch->is->name);
+		break;
+	case PSEV_CTS_ON:
+		pr_debug("%s: pump stev CTS ON\n", ch->is->name);
+		break;
+	case PSEV_CTS_OFF:
+		pr_debug("%s pump stev CTS OFF\n", ch->is->name);
+		break;
+	case PSEV_DCD_ON:
+		pr_debug("%s: pump stev CARRIER ON\n", ch->is->name);
+		test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags);
+		send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
+		break;
+	case PSEV_DCD_OFF:
+		pr_debug("%s: pump stev CARRIER OFF\n", ch->is->name);
+		break;
+	case PSEV_DSR_ON:
+		pr_debug("%s: pump stev DSR ON\n", ch->is->name);
+		break;
+	case PSEV_DSR_OFF:
+		pr_debug("%s: pump stev DSR_OFF\n", ch->is->name);
+		break;
+	case PSEV_REM_RET:
+		pr_debug("%s: pump stev REMOTE RETRAIN\n", ch->is->name);
+		break;
+	case PSEV_REM_REN:
+		pr_debug("%s: pump stev REMOTE RENEGOTIATE\n", ch->is->name);
+		break;
+	case PSEV_GSTN_CLR:
+		pr_debug("%s: pump stev GSTN CLEAR\n", ch->is->name);
+		break;
+	default:
+		pr_info("u%s: nknown pump stev %x\n", ch->is->name, devt);
+		break;
+	}
+}
+
+static void
+isar_pump_statev_fax(struct isar_ch *ch, u8 devt) {
+	u8 dps = SET_DPS(ch->dpath);
+	u8 p1;
+
+	switch (devt) {
+	case PSEV_10MS_TIMER:
+		pr_debug("%s: pump stev TIMER\n", ch->is->name);
+		break;
+	case PSEV_RSP_READY:
+		pr_debug("%s: pump stev RSP_READY\n", ch->is->name);
+		ch->state = STFAX_READY;
+		deliver_status(ch, HW_MOD_READY);
+#ifdef AUTOCON
+		if (test_bit(BC_FLG_ORIG, &ch->bch.Flags))
+			isar_pump_cmd(bch, HW_MOD_FRH, 3);
+		else
+			isar_pump_cmd(bch, HW_MOD_FTH, 3);
+#endif
+		break;
+	case PSEV_LINE_TX_H:
+		if (ch->state == STFAX_LINE) {
+			pr_debug("%s: pump stev LINE_TX_H\n", ch->is->name);
+			ch->state = STFAX_CONT;
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+				PCTRL_CMD_CONT, 0, NULL);
+		} else {
+			pr_debug("%s: pump stev LINE_TX_H wrong st %x\n",
+				ch->is->name, ch->state);
+		}
+		break;
+	case PSEV_LINE_RX_H:
+		if (ch->state == STFAX_LINE) {
+			pr_debug("%s: pump stev LINE_RX_H\n", ch->is->name);
+			ch->state = STFAX_CONT;
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+				PCTRL_CMD_CONT, 0, NULL);
+		} else {
+			pr_debug("%s: pump stev LINE_RX_H wrong st %x\n",
+				ch->is->name, ch->state);
+		}
+		break;
+	case PSEV_LINE_TX_B:
+		if (ch->state == STFAX_LINE) {
+			pr_debug("%s: pump stev LINE_TX_B\n", ch->is->name);
+			ch->state = STFAX_CONT;
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+				PCTRL_CMD_CONT, 0, NULL);
+		} else {
+			pr_debug("%s: pump stev LINE_TX_B wrong st %x\n",
+				ch->is->name, ch->state);
+		}
+		break;
+	case PSEV_LINE_RX_B:
+		if (ch->state == STFAX_LINE) {
+			pr_debug("%s: pump stev LINE_RX_B\n", ch->is->name);
+			ch->state = STFAX_CONT;
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+				PCTRL_CMD_CONT, 0, NULL);
+		} else {
+			pr_debug("%s: pump stev LINE_RX_B wrong st %x\n",
+				ch->is->name, ch->state);
+		}
+		break;
+	case PSEV_RSP_CONN:
+		if (ch->state == STFAX_CONT) {
+			pr_debug("%s: pump stev RSP_CONN\n", ch->is->name);
+			ch->state = STFAX_ACTIV;
+			test_and_set_bit(ISAR_RATE_REQ, &ch->is->Flags);
+			send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
+			if (ch->cmd == PCTRL_CMD_FTH) {
+				int delay = (ch->mod == 3) ? 1000 : 200;
+				/* 1s (200 ms) Flags before data */
+				if (test_and_set_bit(FLG_FTI_RUN,
+				    &ch->bch.Flags))
+					del_timer(&ch->ftimer);
+				ch->ftimer.expires =
+					jiffies + ((delay * HZ)/1000);
+				test_and_set_bit(FLG_LL_CONN,
+					&ch->bch.Flags);
+				add_timer(&ch->ftimer);
+			} else {
+				deliver_status(ch, HW_MOD_CONNECT);
+			}
+		} else {
+			pr_debug("%s: pump stev RSP_CONN wrong st %x\n",
+				ch->is->name, ch->state);
+		}
+		break;
+	case PSEV_FLAGS_DET:
+		pr_debug("%s: pump stev FLAGS_DET\n", ch->is->name);
+		break;
+	case PSEV_RSP_DISC:
+		pr_debug("%s: pump stev RSP_DISC state(%d)\n",
+			ch->is->name, ch->state);
+		if (ch->state == STFAX_ESCAPE) {
+			p1 = 5;
+			switch (ch->newcmd) {
+			case 0:
+				ch->state = STFAX_READY;
+				break;
+			case PCTRL_CMD_FTM:
+				p1 = 2;
+			case PCTRL_CMD_FTH:
+				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+					PCTRL_CMD_SILON, 1, &p1);
+				ch->state = STFAX_SILDET;
+				break;
+			case PCTRL_CMD_FRH:
+			case PCTRL_CMD_FRM:
+				ch->mod = ch->newmod;
+				p1 = ch->newmod;
+				ch->newmod = 0;
+				ch->cmd = ch->newcmd;
+				ch->newcmd = 0;
+				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+					ch->cmd, 1, &p1);
+				ch->state = STFAX_LINE;
+				ch->try_mod = 3;
+				break;
+			default:
+				pr_debug("%s: RSP_DISC unknown newcmd %x\n",
+					ch->is->name, ch->newcmd);
+				break;
+			}
+		} else if (ch->state == STFAX_ACTIV) {
+			if (test_and_clear_bit(FLG_LL_OK, &ch->bch.Flags))
+				deliver_status(ch, HW_MOD_OK);
+			else if (ch->cmd == PCTRL_CMD_FRM)
+				deliver_status(ch, HW_MOD_NOCARR);
+			else
+				deliver_status(ch, HW_MOD_FCERROR);
+			ch->state = STFAX_READY;
+		} else if (ch->state != STFAX_SILDET) {
+			/* ignore in STFAX_SILDET */
+			ch->state = STFAX_READY;
+			deliver_status(ch, HW_MOD_FCERROR);
+		}
+		break;
+	case PSEV_RSP_SILDET:
+		pr_debug("%s: pump stev RSP_SILDET\n", ch->is->name);
+		if (ch->state == STFAX_SILDET) {
+			ch->mod = ch->newmod;
+			p1 = ch->newmod;
+			ch->newmod = 0;
+			ch->cmd = ch->newcmd;
+			ch->newcmd = 0;
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+				ch->cmd, 1, &p1);
+			ch->state = STFAX_LINE;
+			ch->try_mod = 3;
+		}
+		break;
+	case PSEV_RSP_SILOFF:
+		pr_debug("%s: pump stev RSP_SILOFF\n", ch->is->name);
+		break;
+	case PSEV_RSP_FCERR:
+		if (ch->state == STFAX_LINE) {
+			pr_debug("%s: pump stev RSP_FCERR try %d\n",
+				ch->is->name, ch->try_mod);
+			if (ch->try_mod--) {
+				send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL,
+					ch->cmd, 1, &ch->mod);
+				break;
+			}
+		}
+		pr_debug("%s: pump stev RSP_FCERR\n", ch->is->name);
+		ch->state = STFAX_ESCAPE;
+		send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, PCTRL_CMD_ESC,
+			0, NULL);
+		deliver_status(ch, HW_MOD_FCERROR);
+		break;
+	default:
+		break;
+	}
+}
+
+void
+mISDNisar_irq(struct isar_hw *isar)
+{
+	struct isar_ch *ch;
+
+	get_irq_infos(isar);
+	switch (isar->iis & ISAR_IIS_MSCMSD) {
+	case ISAR_IIS_RDATA:
+		ch = sel_bch_isar(isar, isar->iis >> 6);
+		if (ch)
+			isar_rcv_frame(ch);
+		else {
+			pr_debug("%s: ISAR spurious IIS_RDATA %x/%x/%x\n",
+				isar->name, isar->iis, isar->cmsb,
+				isar->clsb);
+			isar->write_reg(isar->hw, ISAR_IIA, 0);
+		}
+		break;
+	case ISAR_IIS_GSTEV:
+		isar->write_reg(isar->hw, ISAR_IIA, 0);
+		isar->bstat |= isar->cmsb;
+		check_send(isar, isar->cmsb);
+		break;
+	case ISAR_IIS_BSTEV:
+#ifdef ERROR_STATISTIC
+		ch = sel_bch_isar(isar, isar->iis >> 6);
+		if (ch) {
+			if (isar->cmsb == BSTEV_TBO)
+				ch->bch.err_tx++;
+			if (isar->cmsb == BSTEV_RBO)
+				ch->bch.err_rdo++;
+		}
+#endif
+		pr_debug("%s: Buffer STEV dpath%d msb(%x)\n",
+			isar->name, isar->iis>>6, isar->cmsb);
+		isar->write_reg(isar->hw, ISAR_IIA, 0);
+		break;
+	case ISAR_IIS_PSTEV:
+		ch = sel_bch_isar(isar, isar->iis >> 6);
+		if (ch) {
+			rcv_mbox(isar, NULL);
+			if (ch->bch.state == ISDN_P_B_MODEM_ASYNC)
+				isar_pump_statev_modem(ch, isar->cmsb);
+			else if (ch->bch.state == ISDN_P_B_T30_FAX)
+				isar_pump_statev_fax(ch, isar->cmsb);
+			else if (ch->bch.state == ISDN_P_B_RAW) {
+				int	tt;
+				tt = isar->cmsb | 0x30;
+				if (tt == 0x3e)
+					tt = '*';
+				else if (tt == 0x3f)
+					tt = '#';
+				else if (tt > '9')
+					tt += 7;
+				tt |= DTMF_TONE_VAL;
+				_queue_data(&ch->bch.ch, PH_CONTROL_IND,
+					MISDN_ID_ANY, sizeof(tt), &tt,
+					GFP_ATOMIC);
+			} else
+				pr_debug("%s: ISAR IIS_PSTEV pm %d sta %x\n",
+					isar->name, ch->bch.state,
+					isar->cmsb);
+		} else {
+			pr_debug("%s: ISAR spurious IIS_PSTEV %x/%x/%x\n",
+				isar->name, isar->iis, isar->cmsb,
+				isar->clsb);
+			isar->write_reg(isar->hw, ISAR_IIA, 0);
+		}
+		break;
+	case ISAR_IIS_PSTRSP:
+		ch = sel_bch_isar(isar, isar->iis >> 6);
+		if (ch) {
+			rcv_mbox(isar, NULL);
+			isar_pump_status_rsp(ch);
+		} else {
+			pr_debug("%s: ISAR spurious IIS_PSTRSP %x/%x/%x\n",
+				isar->name, isar->iis, isar->cmsb,
+				isar->clsb);
+			isar->write_reg(isar->hw, ISAR_IIA, 0);
+		}
+		break;
+	case ISAR_IIS_DIAG:
+	case ISAR_IIS_BSTRSP:
+	case ISAR_IIS_IOM2RSP:
+		rcv_mbox(isar, NULL);
+		break;
+	case ISAR_IIS_INVMSG:
+		rcv_mbox(isar, NULL);
+		pr_debug("%s: invalid msg his:%x\n", isar->name, isar->cmsb);
+		break;
+	default:
+		rcv_mbox(isar, NULL);
+		pr_debug("%s: unhandled msg iis(%x) ctrl(%x/%x)\n",
+			isar->name, isar->iis, isar->cmsb, isar->clsb);
+		break;
+	}
+}
+EXPORT_SYMBOL(mISDNisar_irq);
+
+static void
+ftimer_handler(unsigned long data)
+{
+	struct isar_ch *ch = (struct isar_ch *)data;
+
+	pr_debug("%s: ftimer flags %lx\n", ch->is->name, ch->bch.Flags);
+	test_and_clear_bit(FLG_FTI_RUN, &ch->bch.Flags);
+	if (test_and_clear_bit(FLG_LL_CONN, &ch->bch.Flags))
+		deliver_status(ch, HW_MOD_CONNECT);
+}
+
+static void
+setup_pump(struct isar_ch *ch) {
+	u8 dps = SET_DPS(ch->dpath);
+	u8 ctrl, param[6];
+
+	switch (ch->bch.state) {
+	case ISDN_P_NONE:
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_HDLC:
+		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, PMOD_BYPASS, 0, NULL);
+		break;
+	case ISDN_P_B_L2DTMF:
+		if (test_bit(FLG_DTMFSEND, &ch->bch.Flags)) {
+			param[0] = 5; /* TOA 5 db */
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG,
+				PMOD_DTMF_TRANS, 1, param);
+		} else {
+			param[0] = 40; /* REL -46 dbm */
+			send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG,
+				PMOD_DTMF, 1, param);
+		}
+	case ISDN_P_B_MODEM_ASYNC:
+		ctrl = PMOD_DATAMODEM;
+		if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) {
+			ctrl |= PCTRL_ORIG;
+			param[5] = PV32P6_CTN;
+		} else {
+			param[5] = PV32P6_ATN;
+		}
+		param[0] = 6; /* 6 db */
+		param[1] = PV32P2_V23R | PV32P2_V22A | PV32P2_V22B |
+			PV32P2_V22C | PV32P2_V21 | PV32P2_BEL;
+		param[2] = PV32P3_AMOD | PV32P3_V32B | PV32P3_V23B;
+		param[3] = PV32P4_UT144;
+		param[4] = PV32P5_UT144;
+		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 6, param);
+		break;
+	case ISDN_P_B_T30_FAX:
+		ctrl = PMOD_FAX;
+		if (test_bit(FLG_ORIGIN, &ch->bch.Flags)) {
+			ctrl |= PCTRL_ORIG;
+			param[1] = PFAXP2_CTN;
+		} else {
+			param[1] = PFAXP2_ATN;
+		}
+		param[0] = 6; /* 6 db */
+		send_mbox(ch->is, dps | ISAR_HIS_PUMPCFG, ctrl, 2, param);
+		ch->state = STFAX_NULL;
+		ch->newcmd = 0;
+		ch->newmod = 0;
+		test_and_set_bit(FLG_FTI_RUN, &ch->bch.Flags);
+		break;
+	}
+	udelay(1000);
+	send_mbox(ch->is, dps | ISAR_HIS_PSTREQ, 0, 0, NULL);
+	udelay(1000);
+}
+
+static void
+setup_sart(struct isar_ch *ch) {
+	u8 dps = SET_DPS(ch->dpath);
+	u8 ctrl, param[2] = {0, 0};
+
+	switch (ch->bch.state) {
+	case ISDN_P_NONE:
+		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_DISABLE,
+			0, NULL);
+		break;
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_L2DTMF:
+		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_BINARY,
+			2, param);
+		break;
+	case ISDN_P_B_HDLC:
+	case ISDN_P_B_T30_FAX:
+		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, SMODE_HDLC,
+			1, param);
+		break;
+	case ISDN_P_B_MODEM_ASYNC:
+		ctrl = SMODE_V14 | SCTRL_HDMC_BOTH;
+		param[0] = S_P1_CHS_8;
+		param[1] = S_P2_BFT_DEF;
+		send_mbox(ch->is, dps | ISAR_HIS_SARTCFG, ctrl, 2, param);
+		break;
+	}
+	udelay(1000);
+	send_mbox(ch->is, dps | ISAR_HIS_BSTREQ, 0, 0, NULL);
+	udelay(1000);
+}
+
+static void
+setup_iom2(struct isar_ch *ch) {
+	u8 dps = SET_DPS(ch->dpath);
+	u8 cmsb = IOM_CTRL_ENA, msg[5] = {IOM_P1_TXD, 0, 0, 0, 0};
+
+	if (ch->bch.nr == 2) {
+		msg[1] = 1;
+		msg[3] = 1;
+	}
+	switch (ch->bch.state) {
+	case ISDN_P_NONE:
+		cmsb = 0;
+		/* dummy slot */
+		msg[1] = ch->dpath + 2;
+		msg[3] = ch->dpath + 2;
+		break;
+	case ISDN_P_B_RAW:
+	case ISDN_P_B_HDLC:
+		break;
+	case ISDN_P_B_MODEM_ASYNC:
+	case ISDN_P_B_T30_FAX:
+		cmsb |= IOM_CTRL_RCV;
+	case ISDN_P_B_L2DTMF:
+		if (test_bit(FLG_DTMFSEND, &ch->bch.Flags))
+			cmsb |= IOM_CTRL_RCV;
+		cmsb |= IOM_CTRL_ALAW;
+		break;
+	}
+	send_mbox(ch->is, dps | ISAR_HIS_IOM2CFG, cmsb, 5, msg);
+	udelay(1000);
+	send_mbox(ch->is, dps | ISAR_HIS_IOM2REQ, 0, 0, NULL);
+	udelay(1000);
+}
+
+static int
+modeisar(struct isar_ch *ch, u32 bprotocol)
+{
+	/* Here we are selecting the best datapath for requested protocol */
+	if (ch->bch.state == ISDN_P_NONE) { /* New Setup */
+		switch (bprotocol) {
+		case ISDN_P_NONE: /* init */
+			if (!ch->dpath)
+				/* no init for dpath 0 */
+				return 0;
+			test_and_clear_bit(FLG_HDLC, &ch->bch.Flags);
+			test_and_clear_bit(FLG_TRANSPARENT, &ch->bch.Flags);
+			break;
+		case ISDN_P_B_RAW:
+		case ISDN_P_B_HDLC:
+			/* best is datapath 2 */
+			if (!test_and_set_bit(ISAR_DP2_USE, &ch->is->Flags))
+				ch->dpath = 2;
+			else if (!test_and_set_bit(ISAR_DP1_USE,
+			    &ch->is->Flags))
+				ch->dpath = 1;
+			else {
+				pr_info("modeisar both pathes in use\n");
+				return -EBUSY;
+			}
+			if (bprotocol == ISDN_P_B_HDLC)
+				test_and_set_bit(FLG_HDLC, &ch->bch.Flags);
+			else
+				test_and_set_bit(FLG_TRANSPARENT,
+					&ch->bch.Flags);
+			break;
+		case ISDN_P_B_MODEM_ASYNC:
+		case ISDN_P_B_T30_FAX:
+		case ISDN_P_B_L2DTMF:
+			/* only datapath 1 */
+			if (!test_and_set_bit(ISAR_DP1_USE, &ch->is->Flags))
+				ch->dpath = 1;
+			else {
+				pr_info("%s: ISAR modeisar analog functions"
+					"only with DP1\n", ch->is->name);
+				return -EBUSY;
+			}
+			break;
+		default:
+			pr_info("%s: protocol not known %x\n", ch->is->name,
+				bprotocol);
+			return -ENOPROTOOPT;
+		}
+	}
+	pr_debug("%s: ISAR ch%d dp%d protocol %x->%x\n", ch->is->name,
+		ch->bch.nr, ch->dpath, ch->bch.state, bprotocol);
+	ch->bch.state = bprotocol;
+	setup_pump(ch);
+	setup_iom2(ch);
+	setup_sart(ch);
+	if (ch->bch.state == ISDN_P_NONE) {
+		/* Clear resources */
+		if (ch->dpath == 1)
+			test_and_clear_bit(ISAR_DP1_USE, &ch->is->Flags);
+		else if (ch->dpath == 2)
+			test_and_clear_bit(ISAR_DP2_USE, &ch->is->Flags);
+		ch->dpath = 0;
+		ch->is->ctrl(ch->is->hw, HW_DEACT_IND, ch->bch.nr);
+	} else
+		ch->is->ctrl(ch->is->hw, HW_ACTIVATE_IND, ch->bch.nr);
+	return 0;
+}
+
+static void
+isar_pump_cmd(struct isar_ch *ch, u32 cmd, u8 para)
+{
+	u8 dps = SET_DPS(ch->dpath);
+	u8 ctrl = 0, nom = 0, p1 = 0;
+
+	pr_debug("%s: isar_pump_cmd %x/%x state(%x)\n",
+		ch->is->name, cmd, para, ch->bch.state);
+	switch (cmd) {
+	case HW_MOD_FTM:
+		if (ch->state == STFAX_READY) {
+			p1 = para;
+			ctrl = PCTRL_CMD_FTM;
+			nom = 1;
+			ch->state = STFAX_LINE;
+			ch->cmd = ctrl;
+			ch->mod = para;
+			ch->newmod = 0;
+			ch->newcmd = 0;
+			ch->try_mod = 3;
+		} else if ((ch->state == STFAX_ACTIV) &&
+		    (ch->cmd == PCTRL_CMD_FTM) && (ch->mod == para))
+			deliver_status(ch, HW_MOD_CONNECT);
+		else {
+			ch->newmod = para;
+			ch->newcmd = PCTRL_CMD_FTM;
+			nom = 0;
+			ctrl = PCTRL_CMD_ESC;
+			ch->state = STFAX_ESCAPE;
+		}
+		break;
+	case HW_MOD_FTH:
+		if (ch->state == STFAX_READY) {
+			p1 = para;
+			ctrl = PCTRL_CMD_FTH;
+			nom = 1;
+			ch->state = STFAX_LINE;
+			ch->cmd = ctrl;
+			ch->mod = para;
+			ch->newmod = 0;
+			ch->newcmd = 0;
+			ch->try_mod = 3;
+		} else if ((ch->state == STFAX_ACTIV) &&
+		    (ch->cmd == PCTRL_CMD_FTH) && (ch->mod == para))
+				deliver_status(ch, HW_MOD_CONNECT);
+		else {
+			ch->newmod = para;
+			ch->newcmd = PCTRL_CMD_FTH;
+			nom = 0;
+			ctrl = PCTRL_CMD_ESC;
+			ch->state = STFAX_ESCAPE;
+		}
+		break;
+	case HW_MOD_FRM:
+		if (ch->state == STFAX_READY) {
+			p1 = para;
+			ctrl = PCTRL_CMD_FRM;
+			nom = 1;
+			ch->state = STFAX_LINE;
+			ch->cmd = ctrl;
+			ch->mod = para;
+			ch->newmod = 0;
+			ch->newcmd = 0;
+			ch->try_mod = 3;
+		} else if ((ch->state == STFAX_ACTIV) &&
+		    (ch->cmd == PCTRL_CMD_FRM) && (ch->mod == para))
+			deliver_status(ch, HW_MOD_CONNECT);
+		else {
+			ch->newmod = para;
+			ch->newcmd = PCTRL_CMD_FRM;
+			nom = 0;
+			ctrl = PCTRL_CMD_ESC;
+			ch->state = STFAX_ESCAPE;
+		}
+		break;
+	case HW_MOD_FRH:
+		if (ch->state == STFAX_READY) {
+			p1 = para;
+			ctrl = PCTRL_CMD_FRH;
+			nom = 1;
+			ch->state = STFAX_LINE;
+			ch->cmd = ctrl;
+			ch->mod = para;
+			ch->newmod = 0;
+			ch->newcmd = 0;
+			ch->try_mod = 3;
+		} else if ((ch->state == STFAX_ACTIV) &&
+		    (ch->cmd == PCTRL_CMD_FRH) && (ch->mod == para))
+			deliver_status(ch, HW_MOD_CONNECT);
+		else {
+			ch->newmod = para;
+			ch->newcmd = PCTRL_CMD_FRH;
+			nom = 0;
+			ctrl = PCTRL_CMD_ESC;
+			ch->state = STFAX_ESCAPE;
+		}
+		break;
+	case PCTRL_CMD_TDTMF:
+		p1 = para;
+		nom = 1;
+		ctrl = PCTRL_CMD_TDTMF;
+		break;
+	}
+	if (ctrl)
+		send_mbox(ch->is, dps | ISAR_HIS_PUMPCTRL, ctrl, nom, &p1);
+}
+
+static void
+isar_setup(struct isar_hw *isar)
+{
+	u8 msg;
+	int i;
+
+	/* Dpath 1, 2 */
+	msg = 61;
+	for (i = 0; i < 2; i++) {
+		/* Buffer Config */
+		send_mbox(isar, (i ? ISAR_HIS_DPS2 : ISAR_HIS_DPS1) |
+			ISAR_HIS_P12CFG, 4, 1, &msg);
+		isar->ch[i].mml = msg;
+		isar->ch[i].bch.state = 0;
+		isar->ch[i].dpath = i + 1;
+		modeisar(&isar->ch[i], ISDN_P_NONE);
+	}
+}
+
+static int
+isar_l2l1(struct mISDNchannel *ch, struct sk_buff *skb)
+{
+	struct bchannel *bch = container_of(ch, struct bchannel, ch);
+	struct isar_ch *ich = container_of(bch, struct isar_ch, bch);
+	int ret = -EINVAL;
+	struct mISDNhead *hh = mISDN_HEAD_P(skb);
+	u32 id, *val;
+	u_long flags;
+
+	switch (hh->prim) {
+	case PH_DATA_REQ:
+		spin_lock_irqsave(ich->is->hwlock, flags);
+		ret = bchannel_senddata(bch, skb);
+		if (ret > 0) { /* direct TX */
+			id = hh->id; /* skb can be freed */
+			ret = 0;
+			isar_fill_fifo(ich);
+			spin_unlock_irqrestore(ich->is->hwlock, flags);
+			if (!test_bit(FLG_TRANSPARENT, &bch->Flags))
+				queue_ch_frame(ch, PH_DATA_CNF, id, NULL);
+		} else
+			spin_unlock_irqrestore(ich->is->hwlock, flags);
+		return ret;
+	case PH_ACTIVATE_REQ:
+		spin_lock_irqsave(ich->is->hwlock, flags);
+		if (!test_and_set_bit(FLG_ACTIVE, &bch->Flags))
+			ret = modeisar(ich, ch->protocol);
+		else
+			ret = 0;
+		spin_unlock_irqrestore(ich->is->hwlock, flags);
+		if (!ret)
+			_queue_data(ch, PH_ACTIVATE_IND, MISDN_ID_ANY, 0,
+				NULL, GFP_KERNEL);
+		break;
+	case PH_DEACTIVATE_REQ:
+		spin_lock_irqsave(ich->is->hwlock, flags);
+		mISDN_clear_bchannel(bch);
+		modeisar(ich, ISDN_P_NONE);
+		spin_unlock_irqrestore(ich->is->hwlock, flags);
+		_queue_data(ch, PH_DEACTIVATE_IND, MISDN_ID_ANY, 0,
+			NULL, GFP_KERNEL);
+		ret = 0;
+		break;
+	case PH_CONTROL_REQ:
+		val = (u32 *)skb->data;
+		pr_debug("%s: PH_CONTROL | REQUEST %x/%x\n", ich->is->name,
+			hh->id, *val);
+		if ((hh->id == 0) && ((*val & ~DTMF_TONE_MASK) ==
+		    DTMF_TONE_VAL)) {
+			if (bch->state == ISDN_P_B_L2DTMF) {
+				char tt = *val & DTMF_TONE_MASK;
+
+				if (tt == '*')
+					tt = 0x1e;
+				else if (tt == '#')
+					tt = 0x1f;
+				else if (tt > '9')
+					tt -= 7;
+				tt &= 0x1f;
+				spin_lock_irqsave(ich->is->hwlock, flags);
+				isar_pump_cmd(ich, PCTRL_CMD_TDTMF, tt);
+				spin_unlock_irqrestore(ich->is->hwlock, flags);
+			} else {
+				pr_info("%s: DTMF send wrong protocol %x\n",
+					__func__, bch->state);
+				return -EINVAL;
+			}
+		} else if ((hh->id == HW_MOD_FRM) || (hh->id == HW_MOD_FRH) ||
+		    (hh->id == HW_MOD_FTM) || (hh->id == HW_MOD_FTH)) {
+			for (id = 0; id < FAXMODCNT; id++)
+				if (faxmodulation[id] == *val)
+					break;
+			if ((FAXMODCNT > id) &&
+			    test_bit(FLG_INITIALIZED, &bch->Flags)) {
+				pr_debug("%s: isar: new mod\n", ich->is->name);
+				isar_pump_cmd(ich, hh->id, *val);
+				ret = 0;
+			} else {
+				pr_info("%s: wrong modulation\n",
+					ich->is->name);
+				ret = -EINVAL;
+			}
+		} else if (hh->id == HW_MOD_LASTDATA)
+			test_and_set_bit(FLG_DLEETX, &bch->Flags);
+		else {
+			pr_info("%s: unknown PH_CONTROL_REQ %x\n",
+				ich->is->name, hh->id);
+			ret = -EINVAL;
+		}
+	default:
+		pr_info("%s: %s unknown prim(%x,%x)\n",
+			ich->is->name, __func__, hh->prim, hh->id);
+		ret = -EINVAL;
+	}
+	if (!ret)
+		dev_kfree_skb(skb);
+	return ret;
+}
+
+static int
+channel_bctrl(struct bchannel *bch, struct mISDN_ctrl_req *cq)
+{
+	int	ret = 0;
+
+	switch (cq->op) {
+	case MISDN_CTRL_GETOP:
+		cq->op = 0;
+		break;
+	/* Nothing implemented yet */
+	case MISDN_CTRL_FILL_EMPTY:
+	default:
+		pr_info("%s: unknown Op %x\n", __func__, cq->op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+isar_bctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
+{
+	struct bchannel *bch = container_of(ch, struct bchannel, ch);
+	struct isar_ch *ich = container_of(bch, struct isar_ch, bch);
+	int ret = -EINVAL;
+	u_long flags;
+
+	pr_debug("%s: %s cmd:%x %p\n", ich->is->name, __func__, cmd, arg);
+	switch (cmd) {
+	case CLOSE_CHANNEL:
+		test_and_clear_bit(FLG_OPEN, &bch->Flags);
+		if (test_bit(FLG_ACTIVE, &bch->Flags)) {
+			spin_lock_irqsave(ich->is->hwlock, flags);
+			mISDN_freebchannel(bch);
+			modeisar(ich, ISDN_P_NONE);
+			spin_unlock_irqrestore(ich->is->hwlock, flags);
+		} else {
+			skb_queue_purge(&bch->rqueue);
+			bch->rcount = 0;
+		}
+		ch->protocol = ISDN_P_NONE;
+		ch->peer = NULL;
+		module_put(ich->is->owner);
+		ret = 0;
+		break;
+	case CONTROL_CHANNEL:
+		ret = channel_bctrl(bch, arg);
+		break;
+	default:
+		pr_info("%s: %s unknown prim(%x)\n",
+			ich->is->name, __func__, cmd);
+	}
+	return ret;
+}
+
+static void
+free_isar(struct isar_hw *isar)
+{
+	modeisar(&isar->ch[0], ISDN_P_NONE);
+	modeisar(&isar->ch[1], ISDN_P_NONE);
+	del_timer(&isar->ch[0].ftimer);
+	del_timer(&isar->ch[1].ftimer);
+	test_and_clear_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags);
+	test_and_clear_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags);
+}
+
+static int
+init_isar(struct isar_hw *isar)
+{
+	int	cnt = 3;
+
+	while (cnt--) {
+		isar->version = ISARVersion(isar);
+		if (isar->ch[0].bch.debug & DEBUG_HW)
+			pr_notice("%s: Testing version %d (%d time)\n",
+				isar->name, isar->version, 3 - cnt);
+		if (isar->version == 1)
+			break;
+		isar->ctrl(isar->hw, HW_RESET_REQ, 0);
+	}
+	if (isar->version != 1)
+		return -EINVAL;
+	isar->ch[0].ftimer.function = &ftimer_handler;
+	isar->ch[0].ftimer.data = (long)&isar->ch[0];
+	init_timer(&isar->ch[0].ftimer);
+	test_and_set_bit(FLG_INITIALIZED, &isar->ch[0].bch.Flags);
+	isar->ch[1].ftimer.function = &ftimer_handler;
+	isar->ch[1].ftimer.data = (long)&isar->ch[1];
+	init_timer(&isar->ch[1].ftimer);
+	test_and_set_bit(FLG_INITIALIZED, &isar->ch[1].bch.Flags);
+	return 0;
+}
+
+static int
+isar_open(struct isar_hw *isar, struct channel_req *rq)
+{
+	struct bchannel		*bch;
+
+	if (rq->adr.channel > 2)
+		return -EINVAL;
+	if (rq->protocol == ISDN_P_NONE)
+		return -EINVAL;
+	bch = &isar->ch[rq->adr.channel - 1].bch;
+	if (test_and_set_bit(FLG_OPEN, &bch->Flags))
+		return -EBUSY; /* b-channel can be only open once */
+	test_and_clear_bit(FLG_FILLEMPTY, &bch->Flags);
+	bch->ch.protocol = rq->protocol;
+	rq->ch = &bch->ch;
+	return 0;
+}
+
+u32
+mISDNisar_init(struct isar_hw *isar, void *hw)
+{
+	u32 ret, i;
+
+	isar->hw = hw;
+	for (i = 0; i < 2; i++) {
+		isar->ch[i].bch.nr = i + 1;
+		mISDN_initbchannel(&isar->ch[i].bch, MAX_DATA_MEM);
+		isar->ch[i].bch.ch.nr = i + 1;
+		isar->ch[i].bch.ch.send = &isar_l2l1;
+		isar->ch[i].bch.ch.ctrl = isar_bctrl;
+		isar->ch[i].bch.hw = hw;
+		isar->ch[i].is = isar;
+	}
+
+	isar->init = &init_isar;
+	isar->release = &free_isar;
+	isar->firmware = &load_firmware;
+	isar->open = &isar_open;
+
+	ret =	(1 << (ISDN_P_B_RAW & ISDN_P_B_MASK)) |
+		(1 << (ISDN_P_B_HDLC & ISDN_P_B_MASK)) |
+		(1 << (ISDN_P_B_L2DTMF & ISDN_P_B_MASK)) |
+		(1 << (ISDN_P_B_MODEM_ASYNC & ISDN_P_B_MASK)) |
+		(1 << (ISDN_P_B_T30_FAX & ISDN_P_B_MASK));
+
+	return ret;
+}
+EXPORT_SYMBOL(mISDNisar_init);
+
+static int isar_mod_init(void)
+{
+	pr_notice("mISDN: ISAR driver Rev. %s\n", ISAR_REV);
+	return 0;
+}
+
+static void isar_mod_cleanup(void)
+{
+	pr_notice("mISDN: ISAR module unloaded\n");
+}
+module_init(isar_mod_init);
+module_exit(isar_mod_cleanup);
diff --git a/drivers/isdn/hardware/mISDN/speedfax.c b/drivers/isdn/hardware/mISDN/speedfax.c
new file mode 100644
index 000000000000..ff3a4e290da3
--- /dev/null
+++ b/drivers/isdn/hardware/mISDN/speedfax.c
@@ -0,0 +1,526 @@
+/*
+ * speedfax.c	low level stuff for Sedlbauer Speedfax+ cards
+ *		based on the ISAR DSP
+ *		Thanks to Sedlbauer AG for informations and HW
+ *
+ * Author       Karsten Keil <keil@isdn4linux.de>
+ *
+ * Copyright 2009  by Karsten Keil <keil@isdn4linux.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/mISDNhw.h>
+#include <linux/firmware.h>
+#include "ipac.h"
+#include "isar.h"
+
+#define SPEEDFAX_REV	"2.0"
+
+#define PCI_SUBVENDOR_SPEEDFAX_PYRAMID	0x51
+#define PCI_SUBVENDOR_SPEEDFAX_PCI	0x54
+#define PCI_SUB_ID_SEDLBAUER		0x01
+
+#define SFAX_PCI_ADDR		0xc8
+#define SFAX_PCI_ISAC		0xd0
+#define SFAX_PCI_ISAR		0xe0
+
+/* TIGER 100 Registers */
+
+#define TIGER_RESET_ADDR	0x00
+#define TIGER_EXTERN_RESET_ON	0x01
+#define TIGER_EXTERN_RESET_OFF	0x00
+#define TIGER_AUX_CTRL		0x02
+#define TIGER_AUX_DATA		0x03
+#define TIGER_AUX_IRQMASK	0x05
+#define TIGER_AUX_STATUS	0x07
+
+/* Tiger AUX BITs */
+#define SFAX_AUX_IOMASK		0xdd	/* 1 and 5 are inputs */
+#define SFAX_ISAR_RESET_BIT_OFF 0x00
+#define SFAX_ISAR_RESET_BIT_ON	0x01
+#define SFAX_TIGER_IRQ_BIT	0x02
+#define SFAX_LED1_BIT		0x08
+#define SFAX_LED2_BIT		0x10
+
+#define SFAX_PCI_RESET_ON	(SFAX_ISAR_RESET_BIT_ON)
+#define SFAX_PCI_RESET_OFF	(SFAX_LED1_BIT | SFAX_LED2_BIT)
+
+static int sfax_cnt;
+static u32 debug;
+static u32 irqloops = 4;
+
+struct sfax_hw {
+	struct list_head	list;
+	struct pci_dev		*pdev;
+	char			name[MISDN_MAX_IDLEN];
+	u32			irq;
+	u32			irqcnt;
+	u32			cfg;
+	struct _ioport		p_isac;
+	struct _ioport		p_isar;
+	u8			aux_data;
+	spinlock_t		lock;	/* HW access lock */
+	struct isac_hw		isac;
+	struct isar_hw		isar;
+};
+
+static LIST_HEAD(Cards);
+static DEFINE_RWLOCK(card_lock); /* protect Cards */
+
+static void
+_set_debug(struct sfax_hw *card)
+{
+	card->isac.dch.debug = debug;
+	card->isar.ch[0].bch.debug = debug;
+	card->isar.ch[1].bch.debug = debug;
+}
+
+static int
+set_debug(const char *val, struct kernel_param *kp)
+{
+	int ret;
+	struct sfax_hw *card;
+
+	ret = param_set_uint(val, kp);
+	if (!ret) {
+		read_lock(&card_lock);
+		list_for_each_entry(card, &Cards, list)
+			_set_debug(card);
+		read_unlock(&card_lock);
+	}
+	return ret;
+}
+
+MODULE_AUTHOR("Karsten Keil");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION(SPEEDFAX_REV);
+module_param_call(debug, set_debug, param_get_uint, &debug, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Speedfax debug mask");
+module_param(irqloops, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(irqloops, "Speedfax maximal irqloops (default 4)");
+
+IOFUNC_IND(ISAC, sfax_hw, p_isac)
+IOFUNC_IND(ISAR, sfax_hw, p_isar)
+
+static irqreturn_t
+speedfax_irq(int intno, void *dev_id)
+{
+	struct sfax_hw	*sf = dev_id;
+	u8 val;
+	int cnt = irqloops;
+
+	spin_lock(&sf->lock);
+	val = inb(sf->cfg + TIGER_AUX_STATUS);
+	if (val & SFAX_TIGER_IRQ_BIT) { /* for us or shared ? */
+		spin_unlock(&sf->lock);
+		return IRQ_NONE; /* shared */
+	}
+	sf->irqcnt++;
+	val = ReadISAR_IND(sf, ISAR_IRQBIT);
+Start_ISAR:
+	if (val & ISAR_IRQSTA)
+		mISDNisar_irq(&sf->isar);
+	val = ReadISAC_IND(sf, ISAC_ISTA);
+	if (val)
+		mISDNisac_irq(&sf->isac, val);
+	val = ReadISAR_IND(sf, ISAR_IRQBIT);
+	if ((val & ISAR_IRQSTA) && cnt--)
+		goto Start_ISAR;
+	if (cnt < irqloops)
+		pr_debug("%s: %d irqloops cpu%d\n", sf->name,
+			irqloops - cnt, smp_processor_id());
+	if (irqloops && !cnt)
+		pr_notice("%s: %d IRQ LOOP cpu%d\n", sf->name,
+			irqloops, smp_processor_id());
+	spin_unlock(&sf->lock);
+	return IRQ_HANDLED;
+}
+
+static void
+enable_hwirq(struct sfax_hw *sf)
+{
+	WriteISAC_IND(sf, ISAC_MASK, 0);
+	WriteISAR_IND(sf, ISAR_IRQBIT, ISAR_IRQMSK);
+	outb(SFAX_TIGER_IRQ_BIT, sf->cfg + TIGER_AUX_IRQMASK);
+}
+
+static void
+disable_hwirq(struct sfax_hw *sf)
+{
+	WriteISAC_IND(sf, ISAC_MASK, 0xFF);
+	WriteISAR_IND(sf, ISAR_IRQBIT, 0);
+	outb(0, sf->cfg + TIGER_AUX_IRQMASK);
+}
+
+static void
+reset_speedfax(struct sfax_hw *sf)
+{
+
+	pr_debug("%s: resetting card\n", sf->name);
+	outb(TIGER_EXTERN_RESET_ON, sf->cfg + TIGER_RESET_ADDR);
+	outb(SFAX_PCI_RESET_ON, sf->cfg + TIGER_AUX_DATA);
+	mdelay(1);
+	outb(TIGER_EXTERN_RESET_OFF, sf->cfg + TIGER_RESET_ADDR);
+	sf->aux_data = SFAX_PCI_RESET_OFF;
+	outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
+	mdelay(1);
+}
+
+static int
+sfax_ctrl(struct sfax_hw  *sf, u32 cmd, u_long arg)
+{
+	int ret = 0;
+
+	switch (cmd) {
+	case HW_RESET_REQ:
+		reset_speedfax(sf);
+		break;
+	case HW_ACTIVATE_IND:
+		if (arg & 1)
+			sf->aux_data &= ~SFAX_LED1_BIT;
+		if (arg & 2)
+			sf->aux_data &= ~SFAX_LED2_BIT;
+		outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
+		break;
+	case HW_DEACT_IND:
+		if (arg & 1)
+			sf->aux_data |= SFAX_LED1_BIT;
+		if (arg & 2)
+			sf->aux_data |= SFAX_LED2_BIT;
+		outb(sf->aux_data, sf->cfg + TIGER_AUX_DATA);
+		break;
+	default:
+		pr_info("%s: %s unknown command %x %lx\n",
+			sf->name, __func__, cmd, arg);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+channel_ctrl(struct sfax_hw  *sf, struct mISDN_ctrl_req *cq)
+{
+	int	ret = 0;
+
+	switch (cq->op) {
+	case MISDN_CTRL_GETOP:
+		cq->op = MISDN_CTRL_LOOP;
+		break;
+	case MISDN_CTRL_LOOP:
+		/* cq->channel: 0 disable, 1 B1 loop 2 B2 loop, 3 both */
+		if (cq->channel < 0 || cq->channel > 3) {
+			ret = -EINVAL;
+			break;
+		}
+		ret = sf->isac.ctrl(&sf->isac, HW_TESTLOOP, cq->channel);
+		break;
+	default:
+		pr_info("%s: unknown Op %x\n", sf->name, cq->op);
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
+
+static int
+sfax_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg)
+{
+	struct mISDNdevice	*dev = container_of(ch, struct mISDNdevice, D);
+	struct dchannel		*dch = container_of(dev, struct dchannel, dev);
+	struct sfax_hw		*sf = dch->hw;
+	struct channel_req	*rq;
+	int			err = 0;
+
+	pr_debug("%s: cmd:%x %p\n", sf->name, cmd, arg);
+	switch (cmd) {
+	case OPEN_CHANNEL:
+		rq = arg;
+		if (rq->protocol == ISDN_P_TE_S0)
+			err = sf->isac.open(&sf->isac, rq);
+		else
+			err = sf->isar.open(&sf->isar, rq);
+		if (err)
+			break;
+		if (!try_module_get(THIS_MODULE))
+			pr_info("%s: cannot get module\n", sf->name);
+		break;
+	case CLOSE_CHANNEL:
+		pr_debug("%s: dev(%d) close from %p\n", sf->name,
+			dch->dev.id, __builtin_return_address(0));
+		module_put(THIS_MODULE);
+		break;
+	case CONTROL_CHANNEL:
+		err = channel_ctrl(sf, arg);
+		break;
+	default:
+		pr_debug("%s: unknown command %x\n", sf->name, cmd);
+		return -EINVAL;
+	}
+	return err;
+}
+
+static int __devinit
+init_card(struct sfax_hw *sf)
+{
+	int	ret, cnt = 3;
+	u_long	flags;
+
+	ret = request_irq(sf->irq, speedfax_irq, IRQF_SHARED, sf->name, sf);
+	if (ret) {
+		pr_info("%s: couldn't get interrupt %d\n", sf->name, sf->irq);
+		return ret;
+	}
+	while (cnt--) {
+		spin_lock_irqsave(&sf->lock, flags);
+		ret = sf->isac.init(&sf->isac);
+		if (ret) {
+			spin_unlock_irqrestore(&sf->lock, flags);
+			pr_info("%s: ISAC init failed with %d\n",
+				sf->name, ret);
+			break;
+		}
+		enable_hwirq(sf);
+		/* RESET Receiver and Transmitter */
+		WriteISAC_IND(sf, ISAC_CMDR, 0x41);
+		spin_unlock_irqrestore(&sf->lock, flags);
+		msleep_interruptible(10);
+		if (debug & DEBUG_HW)
+			pr_notice("%s: IRQ %d count %d\n", sf->name,
+				sf->irq, sf->irqcnt);
+		if (!sf->irqcnt) {
+			pr_info("%s: IRQ(%d) got no requests during init %d\n",
+			       sf->name, sf->irq, 3 - cnt);
+		} else
+			return 0;
+	}
+	free_irq(sf->irq, sf);
+	return -EIO;
+}
+
+
+static int __devinit
+setup_speedfax(struct sfax_hw *sf)
+{
+	u_long flags;
+
+	if (!request_region(sf->cfg, 256, sf->name)) {
+		pr_info("mISDN: %s config port %x-%x already in use\n",
+		       sf->name, sf->cfg, sf->cfg + 255);
+		return -EIO;
+	}
+	outb(0xff, sf->cfg);
+	outb(0, sf->cfg);
+	outb(0xdd, sf->cfg + TIGER_AUX_CTRL);
+	outb(0, sf->cfg + TIGER_AUX_IRQMASK);
+
+	sf->isac.type = IPAC_TYPE_ISAC;
+	sf->p_isac.ale = sf->cfg + SFAX_PCI_ADDR;
+	sf->p_isac.port = sf->cfg + SFAX_PCI_ISAC;
+	sf->p_isar.ale = sf->cfg + SFAX_PCI_ADDR;
+	sf->p_isar.port = sf->cfg + SFAX_PCI_ISAR;
+	ASSIGN_FUNC(IND, ISAC, sf->isac);
+	ASSIGN_FUNC(IND, ISAR, sf->isar);
+	spin_lock_irqsave(&sf->lock, flags);
+	reset_speedfax(sf);
+	disable_hwirq(sf);
+	spin_unlock_irqrestore(&sf->lock, flags);
+	return 0;
+}
+
+static void
+release_card(struct sfax_hw *card) {
+	u_long	flags;
+
+	spin_lock_irqsave(&card->lock, flags);
+	disable_hwirq(card);
+	spin_unlock_irqrestore(&card->lock, flags);
+	card->isac.release(&card->isac);
+	free_irq(card->irq, card);
+	card->isar.release(&card->isar);
+	mISDN_unregister_device(&card->isac.dch.dev);
+	release_region(card->cfg, 256);
+	pci_disable_device(card->pdev);
+	pci_set_drvdata(card->pdev, NULL);
+	write_lock_irqsave(&card_lock, flags);
+	list_del(&card->list);
+	write_unlock_irqrestore(&card_lock, flags);
+	kfree(card);
+	sfax_cnt--;
+}
+
+static int __devinit
+setup_instance(struct sfax_hw *card)
+{
+	const struct firmware *firmware;
+	int i, err;
+	u_long flags;
+
+	snprintf(card->name, MISDN_MAX_IDLEN - 1, "Speedfax.%d", sfax_cnt + 1);
+	write_lock_irqsave(&card_lock, flags);
+	list_add_tail(&card->list, &Cards);
+	write_unlock_irqrestore(&card_lock, flags);
+	_set_debug(card);
+	spin_lock_init(&card->lock);
+	card->isac.hwlock = &card->lock;
+	card->isar.hwlock = &card->lock;
+	card->isar.ctrl = (void *)&sfax_ctrl;
+	card->isac.name = card->name;
+	card->isar.name = card->name;
+	card->isar.owner = THIS_MODULE;
+
+	err = request_firmware(&firmware, "isdn/ISAR.BIN", &card->pdev->dev);
+	if (err < 0) {
+		pr_info("%s: firmware request failed %d\n",
+			card->name, err);
+		goto error_fw;
+	}
+	if (debug & DEBUG_HW)
+		pr_notice("%s: got firmware %zu bytes\n",
+			card->name, firmware->size);
+
+	mISDNisac_init(&card->isac, card);
+
+	card->isac.dch.dev.D.ctrl = sfax_dctrl;
+	card->isac.dch.dev.Bprotocols =
+		mISDNisar_init(&card->isar, card);
+	for (i = 0; i < 2; i++) {
+		set_channelmap(i + 1, card->isac.dch.dev.channelmap);
+		list_add(&card->isar.ch[i].bch.ch.list,
+			&card->isac.dch.dev.bchannels);
+	}
+
+	err = setup_speedfax(card);
+	if (err)
+		goto error_setup;
+	err = card->isar.init(&card->isar);
+	if (err)
+		goto error;
+	err = mISDN_register_device(&card->isac.dch.dev,
+		&card->pdev->dev, card->name);
+	if (err)
+		goto error;
+	err = init_card(card);
+	if (err)
+		goto error_init;
+	err = card->isar.firmware(&card->isar, firmware->data, firmware->size);
+	if (!err)  {
+		release_firmware(firmware);
+		sfax_cnt++;
+		pr_notice("SpeedFax %d cards installed\n", sfax_cnt);
+		return 0;
+	}
+	disable_hwirq(card);
+	free_irq(card->irq, card);
+error_init:
+	mISDN_unregister_device(&card->isac.dch.dev);
+error:
+	release_region(card->cfg, 256);
+error_setup:
+	card->isac.release(&card->isac);
+	card->isar.release(&card->isar);
+	release_firmware(firmware);
+error_fw:
+	pci_disable_device(card->pdev);
+	write_lock_irqsave(&card_lock, flags);
+	list_del(&card->list);
+	write_unlock_irqrestore(&card_lock, flags);
+	kfree(card);
+	return err;
+}
+
+static int __devinit
+sfaxpci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	int err = -ENOMEM;
+	struct sfax_hw *card = kzalloc(sizeof(struct sfax_hw), GFP_KERNEL);
+
+	if (!card) {
+		pr_info("No memory for Speedfax+ PCI\n");
+		return err;
+	}
+	card->pdev = pdev;
+	err = pci_enable_device(pdev);
+	if (err) {
+		kfree(card);
+		return err;
+	}
+
+	pr_notice("mISDN: Speedfax found adapter %s at %s\n",
+		(char *)ent->driver_data, pci_name(pdev));
+
+	card->cfg = pci_resource_start(pdev, 0);
+	card->irq = pdev->irq;
+	pci_set_drvdata(pdev, card);
+	err = setup_instance(card);
+	if (err)
+		pci_set_drvdata(pdev, NULL);
+	return err;
+}
+
+static void __devexit
+sfax_remove_pci(struct pci_dev *pdev)
+{
+	struct sfax_hw	*card = pci_get_drvdata(pdev);
+
+	if (card)
+		release_card(card);
+	else
+		pr_debug("%s: drvdata allready removed\n", __func__);
+}
+
+static struct pci_device_id sfaxpci_ids[] __devinitdata = {
+	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
+	  PCI_SUBVENDOR_SPEEDFAX_PYRAMID, PCI_SUB_ID_SEDLBAUER,
+	  0, 0, (unsigned long) "Pyramid Speedfax + PCI"
+	},
+	{ PCI_VENDOR_ID_TIGERJET, PCI_DEVICE_ID_TIGERJET_100,
+	  PCI_SUBVENDOR_SPEEDFAX_PCI, PCI_SUB_ID_SEDLBAUER,
+	  0, 0, (unsigned long) "Sedlbauer Speedfax + PCI"
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(pci, sfaxpci_ids);
+
+static struct pci_driver sfaxpci_driver = {
+	.name = "speedfax+ pci",
+	.probe = sfaxpci_probe,
+	.remove = __devexit_p(sfax_remove_pci),
+	.id_table = sfaxpci_ids,
+};
+
+static int __init
+Speedfax_init(void)
+{
+	int err;
+
+	pr_notice("Sedlbauer Speedfax+ Driver Rev. %s\n",
+		SPEEDFAX_REV);
+	err = pci_register_driver(&sfaxpci_driver);
+	return err;
+}
+
+static void __exit
+Speedfax_cleanup(void)
+{
+	pci_unregister_driver(&sfaxpci_driver);
+}
+
+module_init(Speedfax_init);
+module_exit(Speedfax_cleanup);
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 45100b39a7cf..536ca12442ca 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -37,7 +37,7 @@
  */
 #define	MISDN_MAJOR_VERSION	1
 #define	MISDN_MINOR_VERSION	1
-#define MISDN_RELEASE		20
+#define MISDN_RELEASE		21
 
 /* primitives for information exchange
  * generell format
@@ -153,6 +153,18 @@
 #define HFC_VOL_CHANGE_RX	0x2602
 #define HFC_SPL_LOOP_ON		0x2603
 #define HFC_SPL_LOOP_OFF	0x2604
+/* for T30 FAX and analog modem */
+#define HW_MOD_FRM		0x4000
+#define HW_MOD_FRH		0x4001
+#define HW_MOD_FTM		0x4002
+#define HW_MOD_FTH		0x4003
+#define HW_MOD_FTS		0x4004
+#define HW_MOD_CONNECT		0x4010
+#define HW_MOD_OK		0x4011
+#define HW_MOD_NOCARR		0x4012
+#define HW_MOD_FCERROR		0x4013
+#define HW_MOD_READY		0x4014
+#define HW_MOD_LASTDATA		0x4015
 
 /* DSP_TONE_PATT_ON parameter */
 #define TONE_OFF			0x0000
@@ -224,6 +236,8 @@
 #define ISDN_P_B_L2DTMF		0x24
 #define ISDN_P_B_L2DSP		0x25
 #define ISDN_P_B_L2DSPHDLC	0x26
+#define ISDN_P_B_T30_FAX	0x27
+#define ISDN_P_B_MODEM_ASYNC	0x28
 
 #define OPTION_L2_PMX		1
 #define OPTION_L2_PTP		2
-- 
cgit v1.2.3


From 633aae23ff31bef692a70772652e753a0ae59b81 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 22 Jul 2009 21:51:36 -0700
Subject: Input: serio - switch to using dev_pm_ops

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/serio.c | 23 +++++++++++------------
 include/linux/serio.h       |  2 --
 2 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index d66f4944f2a0..0236f0d5fd91 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -931,15 +931,11 @@ static int serio_uevent(struct device *dev, struct kobj_uevent_env *env)
 #endif /* CONFIG_HOTPLUG */
 
 #ifdef CONFIG_PM
-static int serio_suspend(struct device *dev, pm_message_t state)
+static int serio_suspend(struct device *dev)
 {
 	struct serio *serio = to_serio_port(dev);
 
-	if (!serio->suspended && state.event == PM_EVENT_SUSPEND)
-		serio_cleanup(serio);
-
-	serio->suspended = state.event == PM_EVENT_SUSPEND ||
-			   state.event == PM_EVENT_FREEZE;
+	serio_cleanup(serio);
 
 	return 0;
 }
@@ -952,13 +948,17 @@ static int serio_resume(struct device *dev)
 	 * Driver reconnect can take a while, so better let kseriod
 	 * deal with it.
 	 */
-	if (serio->suspended) {
-		serio->suspended = false;
-		serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT);
-	}
+	serio_queue_event(serio, NULL, SERIO_RECONNECT_PORT);
 
 	return 0;
 }
+
+static const struct dev_pm_ops serio_pm_ops = {
+	.suspend	= serio_suspend,
+	.resume		= serio_resume,
+	.poweroff	= serio_suspend,
+	.restore	= serio_resume,
+};
 #endif /* CONFIG_PM */
 
 /* called from serio_driver->connect/disconnect methods under serio_mutex */
@@ -1015,8 +1015,7 @@ static struct bus_type serio_bus = {
 	.remove		= serio_driver_remove,
 	.shutdown	= serio_shutdown,
 #ifdef CONFIG_PM
-	.suspend	= serio_suspend,
-	.resume		= serio_resume,
+	.pm		= &serio_pm_ops,
 #endif
 };
 
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 126d24c9eaa8..a640bc2afe76 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -31,8 +31,6 @@ struct serio {
 
 	bool manual_bind;
 	bool registered;	/* port has been fully registered with driver core */
-	bool suspended;		/* port is suspended */
-
 
 	struct serio_device_id id;
 
-- 
cgit v1.2.3


From dcf777f6ed9799c5ac90ac17a5c369e6b73ca92e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 26 Jul 2009 19:11:14 -0700
Subject: NET: ROSE: Don't use static buffer.

The use of a static buffer in rose2asc() to return its result is not
threadproof and can result in corruption if multiple threads are trying
to use one of the procfs files based on rose2asc().

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rose.h    |  2 +-
 net/rose/af_rose.c    | 18 ++++++++----------
 net/rose/rose_route.c | 23 ++++++++++++-----------
 3 files changed, 21 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/rose.h b/include/net/rose.h
index cbd5364b2c8a..5ba9f02731eb 100644
--- a/include/net/rose.h
+++ b/include/net/rose.h
@@ -156,7 +156,7 @@ extern int  sysctl_rose_maximum_vcs;
 extern int  sysctl_rose_window_size;
 extern int  rosecmp(rose_address *, rose_address *);
 extern int  rosecmpm(rose_address *, rose_address *, unsigned short);
-extern const char *rose2asc(const rose_address *);
+extern char *rose2asc(char *buf, const rose_address *);
 extern struct sock *rose_find_socket(unsigned int, struct rose_neigh *);
 extern void rose_kill_by_neigh(struct rose_neigh *);
 extern unsigned int rose_new_lci(struct rose_neigh *);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 6bd8e93869ed..f0a76f6bca71 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -92,23 +92,21 @@ static void rose_set_lockdep_key(struct net_device *dev)
 /*
  *	Convert a ROSE address into text.
  */
-const char *rose2asc(const rose_address *addr)
+char *rose2asc(char *buf, const rose_address *addr)
 {
-	static char buffer[11];
-
 	if (addr->rose_addr[0] == 0x00 && addr->rose_addr[1] == 0x00 &&
 	    addr->rose_addr[2] == 0x00 && addr->rose_addr[3] == 0x00 &&
 	    addr->rose_addr[4] == 0x00) {
-		strcpy(buffer, "*");
+		strcpy(buf, "*");
 	} else {
-		sprintf(buffer, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF,
+		sprintf(buf, "%02X%02X%02X%02X%02X", addr->rose_addr[0] & 0xFF,
 						addr->rose_addr[1] & 0xFF,
 						addr->rose_addr[2] & 0xFF,
 						addr->rose_addr[3] & 0xFF,
 						addr->rose_addr[4] & 0xFF);
 	}
 
-	return buffer;
+	return buf;
 }
 
 /*
@@ -1437,7 +1435,7 @@ static void rose_info_stop(struct seq_file *seq, void *v)
 
 static int rose_info_show(struct seq_file *seq, void *v)
 {
-	char buf[11];
+	char buf[11], rsbuf[11];
 
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq,
@@ -1455,8 +1453,8 @@ static int rose_info_show(struct seq_file *seq, void *v)
 			devname = dev->name;
 
 		seq_printf(seq, "%-10s %-9s ",
-			rose2asc(&rose->dest_addr),
-			ax2asc(buf, &rose->dest_call));
+			   rose2asc(rsbuf, &rose->dest_addr),
+			   ax2asc(buf, &rose->dest_call));
 
 		if (ax25cmp(&rose->source_call, &null_ax25_address) == 0)
 			callsign = "??????-?";
@@ -1465,7 +1463,7 @@ static int rose_info_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%-10s %-9s %-5s %3.3X %05d  %d  %d  %d  %d %3lu %3lu %3lu %3lu %3lu %3lu/%03lu %5d %5d %ld\n",
-			rose2asc(&rose->source_addr),
+			rose2asc(rsbuf, &rose->source_addr),
 			callsign,
 			devname,
 			rose->lci & 0x0FFF,
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index a81066a1010a..9478d9b3d977 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -1104,6 +1104,7 @@ static void rose_node_stop(struct seq_file *seq, void *v)
 
 static int rose_node_show(struct seq_file *seq, void *v)
 {
+	char rsbuf[11];
 	int i;
 
 	if (v == SEQ_START_TOKEN)
@@ -1112,13 +1113,13 @@ static int rose_node_show(struct seq_file *seq, void *v)
 		const struct rose_node *rose_node = v;
 		/* if (rose_node->loopback) {
 			seq_printf(seq, "%-10s %04d 1 loopback\n",
-				rose2asc(&rose_node->address),
-				rose_node->mask);
+				   rose2asc(rsbuf, &rose_node->address),
+				   rose_node->mask);
 		} else { */
 			seq_printf(seq, "%-10s %04d %d",
-				rose2asc(&rose_node->address),
-				rose_node->mask,
-				rose_node->count);
+				   rose2asc(rsbuf, &rose_node->address),
+				   rose_node->mask,
+				   rose_node->count);
 
 			for (i = 0; i < rose_node->count; i++)
 				seq_printf(seq, " %05d",
@@ -1267,7 +1268,7 @@ static void rose_route_stop(struct seq_file *seq, void *v)
 
 static int rose_route_show(struct seq_file *seq, void *v)
 {
-	char buf[11];
+	char buf[11], rsbuf[11];
 
 	if (v == SEQ_START_TOKEN)
 		seq_puts(seq,
@@ -1279,7 +1280,7 @@ static int rose_route_show(struct seq_file *seq, void *v)
 			seq_printf(seq,
 				   "%3.3X  %-10s  %-9s  %05d      ",
 				   rose_route->lci1,
-				   rose2asc(&rose_route->src_addr),
+				   rose2asc(rsbuf, &rose_route->src_addr),
 				   ax2asc(buf, &rose_route->src_call),
 				   rose_route->neigh1->number);
 		else
@@ -1289,10 +1290,10 @@ static int rose_route_show(struct seq_file *seq, void *v)
 		if (rose_route->neigh2)
 			seq_printf(seq,
 				   "%3.3X  %-10s  %-9s  %05d\n",
-				rose_route->lci2,
-				rose2asc(&rose_route->dest_addr),
-				ax2asc(buf, &rose_route->dest_call),
-				rose_route->neigh2->number);
+				   rose_route->lci2,
+				   rose2asc(rsbuf, &rose_route->dest_addr),
+				   ax2asc(buf, &rose_route->dest_call),
+				   rose_route->neigh2->number);
 		 else
 			 seq_puts(seq,
 				  "000  *           *          00000\n");
-- 
cgit v1.2.3


From b5eb0589937eae2d58fca17fa45ed44152e772ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 15 Jul 2009 05:23:23 +0000
Subject: net: deprecate print_mac

We've had %pM for long enough now, time to deprecate
print_mac() and remove the __maybe_unused attribute
from DECLARE_MAC_BUF so that variables declared with
that can be found and removed. Otherwise people are
putting in new users of print_mac().

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 70fdba2bbf71..580b6004d00e 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -139,10 +139,10 @@ extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len);
 /*
  *	Display a 6 byte device address (MAC) in a readable format.
  */
-extern char *print_mac(char *buf, const unsigned char *addr);
+extern char *print_mac(char *buf, const unsigned char *addr) __deprecated;
 #define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
 #define MAC_BUF_SIZE	18
-#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE] __maybe_unused
+#define DECLARE_MAC_BUF(var) char var[MAC_BUF_SIZE]
 
 #endif
 
-- 
cgit v1.2.3


From 51def0bea92629dff02ff1de40603eb90c609c55 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Wed, 22 Jul 2009 14:06:56 +0000
Subject: imwc3200: move iwmc3200 SDIO ids to sdio_ids.h

1. add intel's sdio vendor id to sdio_ids.h
2. move iwmc3200 sdio devices' ids to sdio_ids.h

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wimax/i2400m/sdio.c          | 12 +++++-------
 drivers/net/wireless/iwmc3200wifi/sdio.c |  4 +++-
 drivers/net/wireless/iwmc3200wifi/sdio.h |  3 ---
 include/linux/mmc/sdio_ids.h             |  6 ++++++
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index 2538825d1c66..ea7b29034aab 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -58,6 +58,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/mmc/sdio_ids.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 #include "i2400m-sdio.h"
@@ -501,15 +502,12 @@ void i2400ms_remove(struct sdio_func *func)
 	d_fnend(3, dev, "SDIO func %p\n", func);
 }
 
-enum {
-	I2400MS_INTEL_VID = 0x89,
-};
-
 static
 const struct sdio_device_id i2400ms_sdio_ids[] = {
-	/* Intel: i2400m WiMAX over SDIO */
-	{ SDIO_DEVICE(I2400MS_INTEL_VID, 0x1402) },
-	{ }, 			/* end: all zeroes */
+	/* Intel: i2400m WiMAX (iwmc3200) over SDIO */
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL,
+		      SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX) },
+	{ /* end: all zeroes */ },
 };
 MODULE_DEVICE_TABLE(sdio, i2400ms_sdio_ids);
 
diff --git a/drivers/net/wireless/iwmc3200wifi/sdio.c b/drivers/net/wireless/iwmc3200wifi/sdio.c
index b93f620ee4f1..8b1de84003ca 100644
--- a/drivers/net/wireless/iwmc3200wifi/sdio.c
+++ b/drivers/net/wireless/iwmc3200wifi/sdio.c
@@ -65,6 +65,7 @@
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
 #include <linux/debugfs.h>
+#include <linux/mmc/sdio_ids.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 
@@ -492,7 +493,8 @@ static void iwm_sdio_remove(struct sdio_func *func)
 }
 
 static const struct sdio_device_id iwm_sdio_ids[] = {
-	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL, SDIO_DEVICE_ID_IWM) },
+	{ SDIO_DEVICE(SDIO_VENDOR_ID_INTEL,
+		      SDIO_DEVICE_ID_INTEL_IWMC3200WIFI) },
 	{ /* end: all zeroes */	},
 };
 MODULE_DEVICE_TABLE(sdio, iwm_sdio_ids);
diff --git a/drivers/net/wireless/iwmc3200wifi/sdio.h b/drivers/net/wireless/iwmc3200wifi/sdio.h
index b3c156b08dda..aab6b6892e45 100644
--- a/drivers/net/wireless/iwmc3200wifi/sdio.h
+++ b/drivers/net/wireless/iwmc3200wifi/sdio.h
@@ -39,9 +39,6 @@
 #ifndef __IWM_SDIO_H__
 #define __IWM_SDIO_H__
 
-#define SDIO_VENDOR_ID_INTEL 0x89
-#define SDIO_DEVICE_ID_IWM   0x1403
-
 #define IWM_SDIO_DATA_ADDR           0x0
 #define IWM_SDIO_INTR_ENABLE_ADDR    0x14
 #define IWM_SDIO_INTR_STATUS_ADDR    0x13
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 39751c8cde9c..2dbfb5a05994 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -22,6 +22,12 @@
 /*
  * Vendors and devices.  Sort key: vendor first, device next.
  */
+#define SDIO_VENDOR_ID_INTEL			0x0089
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIMAX	0x1402
+#define SDIO_DEVICE_ID_INTEL_IWMC3200WIFI	0x1403
+#define SDIO_DEVICE_ID_INTEL_IWMC3200TOP	0x1404
+#define SDIO_DEVICE_ID_INTEL_IWMC3200GPS	0x1405
+#define SDIO_DEVICE_ID_INTEL_IWMC3200BT		0x1406
 
 #define SDIO_VENDOR_ID_MARVELL			0x02df
 #define SDIO_DEVICE_ID_MARVELL_LIBERTAS		0x9103
-- 
cgit v1.2.3


From 8a729fce76f7af50d8b622f2fb26adce9c8df743 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 26 Jul 2009 23:18:11 +0000
Subject: net: ethtool_op_get_rx_csum() should be public and exported

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 1 +
 net/core/ethtool.c      | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 9b660bd2e2b3..90c4a3616d94 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -368,6 +368,7 @@ struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
+u32 ethtool_op_get_rx_csum(struct net_device *dev);
 u32 ethtool_op_get_tx_csum(struct net_device *dev);
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
 int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index cf36ff44ebb2..44e571111d3a 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -34,11 +34,13 @@ u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_ALL_CSUM) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_rx_csum);
 
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_ALL_CSUM) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tx_csum);
 
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
 {
@@ -1125,7 +1127,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 EXPORT_SYMBOL(ethtool_op_get_link);
 EXPORT_SYMBOL(ethtool_op_get_sg);
 EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
 EXPORT_SYMBOL(ethtool_op_set_sg);
 EXPORT_SYMBOL(ethtool_op_set_tso);
 EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-- 
cgit v1.2.3


From 9e1b32caa525cb236e80e9c671e179bcecccc657 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 22 Jul 2009 15:44:28 +1000
Subject: mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()

mm: Pass virtual address to [__]p{te,ud,md}_free_tlb()

Upcoming paches to support the new 64-bit "BookE" powerpc architecture
will need to have the virtual address corresponding to PTE page when
freeing it, due to the way the HW table walker works.

Basically, the TLB can be loaded with "large" pages that cover the whole
virtual space (well, sort-of, half of it actually) represented by a PTE
page, and which contain an "indirect" bit indicating that this TLB entry
RPN points to an array of PTEs from which the TLB can then create direct
entries. Thus, in order to invalidate those when PTE pages are deleted,
we need the virtual address to pass to tlbilx or tlbivax instructions.

The old trick of sticking it somewhere in the PTE page struct page sucks
too much, the address is almost readily available in all call sites and
almost everybody implemets these as macros, so we may as well add the
argument everywhere. I added it to the pmd and pud variants for consistency.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: David Howells <dhowells@redhat.com> [MN10300 & FRV]
Acked-by: Nick Piggin <npiggin@suse.de>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [s390]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/tlb.h             |  4 ++--
 arch/arm/include/asm/tlb.h               |  4 ++--
 arch/avr32/include/asm/pgalloc.h         |  2 +-
 arch/cris/include/asm/pgalloc.h          |  2 +-
 arch/frv/include/asm/pgalloc.h           |  4 ++--
 arch/frv/include/asm/pgtable.h           |  2 +-
 arch/ia64/include/asm/pgalloc.h          |  6 +++---
 arch/ia64/include/asm/tlb.h              | 12 ++++++------
 arch/m32r/include/asm/pgalloc.h          |  4 ++--
 arch/m68k/include/asm/motorola_pgalloc.h |  6 ++++--
 arch/m68k/include/asm/sun3_pgalloc.h     |  4 ++--
 arch/microblaze/include/asm/pgalloc.h    |  4 ++--
 arch/mips/include/asm/pgalloc.h          |  6 +++---
 arch/mn10300/include/asm/pgalloc.h       |  2 +-
 arch/parisc/include/asm/tlb.h            |  4 ++--
 arch/powerpc/include/asm/pgalloc-32.h    |  2 +-
 arch/powerpc/include/asm/pgalloc-64.h    |  4 ++--
 arch/powerpc/include/asm/pgalloc.h       |  6 +++---
 arch/powerpc/mm/hugetlbpage.c            |  4 ++--
 arch/s390/include/asm/tlb.h              |  9 ++++++---
 arch/sh/include/asm/pgalloc.h            |  4 ++--
 arch/sh/include/asm/tlb.h                |  6 +++---
 arch/sparc/include/asm/pgalloc_32.h      |  8 ++++----
 arch/sparc/include/asm/tlb_64.h          |  6 +++---
 arch/um/include/asm/pgalloc.h            |  4 ++--
 arch/um/include/asm/tlb.h                |  6 +++---
 arch/x86/include/asm/pgalloc.h           | 25 ++++++++++++++++++++++---
 arch/x86/mm/pgtable.c                    |  6 +++---
 arch/xtensa/include/asm/tlb.h            |  2 +-
 include/asm-generic/4level-fixup.h       |  4 ++--
 include/asm-generic/pgtable-nopmd.h      |  2 +-
 include/asm-generic/pgtable-nopud.h      |  2 +-
 include/asm-generic/tlb.h                | 12 ++++++------
 mm/memory.c                              | 11 ++++++-----
 34 files changed, 107 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index c13636575fba..42866759f3fa 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -9,7 +9,7 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb, pte)			pte_free((tlb)->mm, pte)
-#define __pmd_free_tlb(tlb, pmd)			pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte, address)		pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, address)		pmd_free((tlb)->mm, pmd)
  
 #endif
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 321c83e43a1e..f41a6f57cd12 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -102,8 +102,8 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb, ptep)		pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp)		pmd_free((tlb)->mm, pmdp)
+#define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
+#define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
diff --git a/arch/avr32/include/asm/pgalloc.h b/arch/avr32/include/asm/pgalloc.h
index 640821323943..92ecd8446ef8 100644
--- a/arch/avr32/include/asm/pgalloc.h
+++ b/arch/avr32/include/asm/pgalloc.h
@@ -83,7 +83,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
diff --git a/arch/cris/include/asm/pgalloc.h b/arch/cris/include/asm/pgalloc.h
index a1ba761d0573..6da975db112f 100644
--- a/arch/cris/include/asm/pgalloc.h
+++ b/arch/cris/include/asm/pgalloc.h
@@ -47,7 +47,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
diff --git a/arch/frv/include/asm/pgalloc.h b/arch/frv/include/asm/pgalloc.h
index 971e6addb009..416d19a632f2 100644
--- a/arch/frv/include/asm/pgalloc.h
+++ b/arch/frv/include/asm/pgalloc.h
@@ -49,7 +49,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb),(pte));			\
@@ -62,7 +62,7 @@ do {							\
  */
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *) 2); })
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
 
 #endif /* CONFIG_MMU */
 
diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h
index 33233011b1c1..22c60692b551 100644
--- a/arch/frv/include/asm/pgtable.h
+++ b/arch/frv/include/asm/pgtable.h
@@ -225,7 +225,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
-#define __pud_free_tlb(tlb, x)			do { } while (0)
+#define __pud_free_tlb(tlb, x, address)		do { } while (0)
 
 /*
  * The "pud_xxx()" functions here are trivial for a folded two-level
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index b9ac1a6fc216..96a8d927db28 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -48,7 +48,7 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
 	quicklist_free(0, NULL, pud);
 }
-#define __pud_free_tlb(tlb, pud)	pud_free((tlb)->mm, pud)
+#define __pud_free_tlb(tlb, pud, address)	pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_4 */
 
 static inline void
@@ -67,7 +67,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	quicklist_free(0, NULL, pmd);
 }
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
+#define __pmd_free_tlb(tlb, pmd, address)	pmd_free((tlb)->mm, pmd)
 
 static inline void
 pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte)
@@ -117,6 +117,6 @@ static inline void check_pgt_cache(void)
 	quicklist_trim(0, NULL, 25, 16);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
 
 #endif				/* _ASM_IA64_PGALLOC_H */
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 20d8a39680c2..85d965cb19a0 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -236,22 +236,22 @@ do {							\
 	__tlb_remove_tlb_entry(tlb, ptep, addr);	\
 } while (0)
 
-#define pte_free_tlb(tlb, ptep)				\
+#define pte_free_tlb(tlb, ptep, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pte_free_tlb(tlb, ptep);			\
+	__pte_free_tlb(tlb, ptep, address);		\
 } while (0)
 
-#define pmd_free_tlb(tlb, ptep)				\
+#define pmd_free_tlb(tlb, ptep, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pmd_free_tlb(tlb, ptep);			\
+	__pmd_free_tlb(tlb, ptep, address);		\
 } while (0)
 
-#define pud_free_tlb(tlb, pudp)				\
+#define pud_free_tlb(tlb, pudp, address)		\
 do {							\
 	tlb->need_flush = 1;				\
-	__pud_free_tlb(tlb, pudp);			\
+	__pud_free_tlb(tlb, pudp, address);		\
 } while (0)
 
 #endif /* _ASM_IA64_TLB_H */
diff --git a/arch/m32r/include/asm/pgalloc.h b/arch/m32r/include/asm/pgalloc.h
index f11a2b909cdb..0fc736198979 100644
--- a/arch/m32r/include/asm/pgalloc.h
+++ b/arch/m32r/include/asm/pgalloc.h
@@ -58,7 +58,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
@@ -68,7 +68,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 
 #define pmd_alloc_one(mm, addr)		({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h
index d08bf6261df8..15ee4c74a9f0 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -54,7 +54,8 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t page)
 	__free_page(page);
 }
 
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page)
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t page,
+				  unsigned long address)
 {
 	pgtable_page_dtor(page);
 	cache_page(kmap(page));
@@ -73,7 +74,8 @@ static inline int pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	return free_pointer_table(pmd);
 }
 
-static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+static inline int __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				 unsigned long address)
 {
 	return free_pointer_table(pmd);
 }
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h
index d4c83f143816..48d80d5a666f 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -32,7 +32,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t page)
         __free_page(page);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
@@ -80,7 +80,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
diff --git a/arch/microblaze/include/asm/pgalloc.h b/arch/microblaze/include/asm/pgalloc.h
index 59a757e46ba5..b0131da1387b 100644
--- a/arch/microblaze/include/asm/pgalloc.h
+++ b/arch/microblaze/include/asm/pgalloc.h
@@ -180,7 +180,7 @@ extern inline void pte_free(struct mm_struct *mm, struct page *ptepage)
 	__free_page(ptepage);
 }
 
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, (pte))
 
 #define pmd_populate(mm, pmd, pte)	(pmd_val(*(pmd)) = page_address(pte))
 
@@ -193,7 +193,7 @@ extern inline void pte_free(struct mm_struct *mm, struct page *ptepage)
  */
 #define pmd_alloc_one(mm, address)	({ BUG(); ((pmd_t *)2); })
 /*#define pmd_free(mm, x)			do { } while (0)*/
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 #define pgd_populate(mm, pmd, pte)	BUG()
 
 extern int do_check_pgt_cache(int, int);
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 1275831dda29..f705735feefc 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -98,7 +98,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_pages(pte, PTE_ORDER);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,address)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), pte);			\
@@ -111,7 +111,7 @@ do {							\
  * inside the pgd, so has no extra memory associated with it.
  */
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb, x)		do { } while (0)
+#define __pmd_free_tlb(tlb, x, addr)	do { } while (0)
 
 #endif
 
@@ -132,7 +132,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
-#define __pmd_free_tlb(tlb, x)	pmd_free((tlb)->mm, x)
+#define __pmd_free_tlb(tlb, x, addr)	pmd_free((tlb)->mm, x)
 
 #endif
 
diff --git a/arch/mn10300/include/asm/pgalloc.h b/arch/mn10300/include/asm/pgalloc.h
index ec057e1bd4cf..a19f11327cd8 100644
--- a/arch/mn10300/include/asm/pgalloc.h
+++ b/arch/mn10300/include/asm/pgalloc.h
@@ -51,6 +51,6 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte)
 }
 
 
-#define __pte_free_tlb(tlb, pte) tlb_remove_page((tlb), (pte))
+#define __pte_free_tlb(tlb, pte, addr) tlb_remove_page((tlb), (pte))
 
 #endif /* _ASM_PGALLOC_H */
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 383b1db310ee..07924903989e 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -21,7 +21,7 @@ do {	if (!(tlb)->fullmm)	\
 
 #include <asm-generic/tlb.h>
 
-#define __pmd_free_tlb(tlb, pmd)	pmd_free((tlb)->mm, pmd)
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
index 0815eb40acae..c9500d666a1d 100644
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ b/arch/powerpc/include/asm/pgalloc-32.h
@@ -16,7 +16,7 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
  */
 /* #define pmd_alloc_one(mm,address)       ({ BUG(); ((pmd_t *)2); }) */
 #define pmd_free(mm, x) 		do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,a)		do { } while (0)
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 #ifndef CONFIG_BOOKE
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index afda2bdd860f..e6f069c4f713 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -118,11 +118,11 @@ static inline void pgtable_free(pgtable_free_t pgf)
 		kmem_cache_free(pgtable_cache[cachenum], p);
 }
 
-#define __pmd_free_tlb(tlb, pmd) 	\
+#define __pmd_free_tlb(tlb, pmd,addr)		      \
 	pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \
 		PMD_CACHE_NUM, PMD_TABLE_SIZE-1))
 #ifndef CONFIG_PPC_64K_PAGES
-#define __pud_free_tlb(tlb, pud)	\
+#define __pud_free_tlb(tlb, pud, addr)		      \
 	pgtable_free_tlb(tlb, pgtable_free_cache(pud, \
 		PUD_CACHE_NUM, PUD_TABLE_SIZE-1))
 #endif /* CONFIG_PPC_64K_PAGES */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index 5d8480265a77..1730e5e298d6 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -38,14 +38,14 @@ static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum,
 extern void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf);
 
 #ifdef CONFIG_SMP
-#define __pte_free_tlb(tlb,ptepage)	\
+#define __pte_free_tlb(tlb,ptepage,address)		\
 do { \
 	pgtable_page_dtor(ptepage); \
 	pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \
-		PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
+					PTE_NONCACHE_NUM, PTE_TABLE_SIZE-1)); \
 } while (0)
 #else
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, (pte))
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, (pte))
 #endif
 
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9920d6a7cf29..c46ef2ffa3d9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -305,7 +305,7 @@ static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
-	pmd_free_tlb(tlb, pmd);
+	pmd_free_tlb(tlb, pmd, start);
 }
 
 static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -348,7 +348,7 @@ static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud);
+	pud_free_tlb(tlb, pud, start);
 }
 
 /*
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3d8a96d39d9d..81150b053689 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -96,7 +96,8 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
  * pte_free_tlb frees a pte table and clears the CRSTE for the
  * page table from the tlb.
  */
-static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte)
+static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
+				unsigned long address)
 {
 	if (!tlb->fullmm) {
 		tlb->array[tlb->nr_ptes++] = pte;
@@ -113,7 +114,8 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte)
  * as the pgd. pmd_free_tlb checks the asce_limit against 2GB
  * to avoid the double free of the pmd in this case.
  */
-static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				unsigned long address)
 {
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 31))
@@ -134,7 +136,8 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
  * as the pgd. pud_free_tlb checks the asce_limit against 4TB
  * to avoid the double free of the pud in this case.
  */
-static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				unsigned long address)
 {
 #ifdef __s390x__
 	if (tlb->mm->context.asce_limit <= (1UL << 42))
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 84dd2db7104c..89a482750a5b 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -73,7 +73,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	quicklist_free_page(QUICK_PT, NULL, pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte,addr)			\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb), (pte));			\
@@ -85,7 +85,7 @@ do {							\
  */
 
 #define pmd_free(mm, x)			do { } while (0)
-#define __pmd_free_tlb(tlb,x)		do { } while (0)
+#define __pmd_free_tlb(tlb,x,addr)	do { } while (0)
 
 static inline void check_pgt_cache(void)
 {
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 9c16f737074a..da8fe7ab8728 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -91,9 +91,9 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
 }
 
 #define tlb_remove_page(tlb,page)	free_page_and_swap_cache(page)
-#define pte_free_tlb(tlb, ptep)		pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp)		pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp)		pud_free((tlb)->mm, pudp)
+#define pte_free_tlb(tlb, ptep, addr)	pte_free((tlb)->mm, ptep)
+#define pmd_free_tlb(tlb, pmdp, addr)	pmd_free((tlb)->mm, pmdp)
+#define pud_free_tlb(tlb, pudp, addr)	pud_free((tlb)->mm, pudp)
 
 #define tlb_migrate_finish(mm)		do { } while (0)
 
diff --git a/arch/sparc/include/asm/pgalloc_32.h b/arch/sparc/include/asm/pgalloc_32.h
index 681582d26969..ca2b34456c4b 100644
--- a/arch/sparc/include/asm/pgalloc_32.h
+++ b/arch/sparc/include/asm/pgalloc_32.h
@@ -44,8 +44,8 @@ BTFIXUPDEF_CALL(pmd_t *, pmd_alloc_one, struct mm_struct *, unsigned long)
 BTFIXUPDEF_CALL(void, free_pmd_fast, pmd_t *)
 #define free_pmd_fast(pmd)	BTFIXUP_CALL(free_pmd_fast)(pmd)
 
-#define pmd_free(mm, pmd)	free_pmd_fast(pmd)
-#define __pmd_free_tlb(tlb, pmd) pmd_free((tlb)->mm, pmd)
+#define pmd_free(mm, pmd)		free_pmd_fast(pmd)
+#define __pmd_free_tlb(tlb, pmd, addr)	pmd_free((tlb)->mm, pmd)
 
 BTFIXUPDEF_CALL(void, pmd_populate, pmd_t *, struct page *)
 #define pmd_populate(MM, PMD, PTE)        BTFIXUP_CALL(pmd_populate)(PMD, PTE)
@@ -62,7 +62,7 @@ BTFIXUPDEF_CALL(void, free_pte_fast, pte_t *)
 #define pte_free_kernel(mm, pte)	BTFIXUP_CALL(free_pte_fast)(pte)
 
 BTFIXUPDEF_CALL(void, pte_free, pgtable_t )
-#define pte_free(mm, pte)	BTFIXUP_CALL(pte_free)(pte)
-#define __pte_free_tlb(tlb, pte)	pte_free((tlb)->mm, pte)
+#define pte_free(mm, pte)		BTFIXUP_CALL(pte_free)(pte)
+#define __pte_free_tlb(tlb, pte, addr)	pte_free((tlb)->mm, pte)
 
 #endif /* _SPARC_PGALLOC_H */
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index ee38e731bfa6..dca406b9b6fc 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -100,9 +100,9 @@ static inline void tlb_remove_page(struct mmu_gather *mp, struct page *page)
 }
 
 #define tlb_remove_tlb_entry(mp,ptep,addr) do { } while (0)
-#define pte_free_tlb(mp, ptepage) pte_free((mp)->mm, ptepage)
-#define pmd_free_tlb(mp, pmdp) pmd_free((mp)->mm, pmdp)
-#define pud_free_tlb(tlb,pudp) __pud_free_tlb(tlb,pudp)
+#define pte_free_tlb(mp, ptepage, addr) pte_free((mp)->mm, ptepage)
+#define pmd_free_tlb(mp, pmdp, addr) pmd_free((mp)->mm, pmdp)
+#define pud_free_tlb(tlb,pudp, addr) __pud_free_tlb(tlb,pudp,addr)
 
 #define tlb_migrate_finish(mm)	do { } while (0)
 #define tlb_start_vma(tlb, vma) do { } while (0)
diff --git a/arch/um/include/asm/pgalloc.h b/arch/um/include/asm/pgalloc.h
index 718984359f8c..32c8ce4e1515 100644
--- a/arch/um/include/asm/pgalloc.h
+++ b/arch/um/include/asm/pgalloc.h
@@ -40,7 +40,7 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
 	__free_page(pte);
 }
 
-#define __pte_free_tlb(tlb,pte)				\
+#define __pte_free_tlb(tlb,pte, address)		\
 do {							\
 	pgtable_page_dtor(pte);				\
 	tlb_remove_page((tlb),(pte));			\
@@ -53,7 +53,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_page((unsigned long)pmd);
 }
 
-#define __pmd_free_tlb(tlb,x)   tlb_remove_page((tlb),virt_to_page(x))
+#define __pmd_free_tlb(tlb,x, address)   tlb_remove_page((tlb),virt_to_page(x))
 #endif
 
 #define check_pgt_cache()	do { } while (0)
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 5240fa1c5e08..660caedac9eb 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -116,11 +116,11 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
-#define pte_free_tlb(tlb, ptep) __pte_free_tlb(tlb, ptep)
+#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
 
-#define pud_free_tlb(tlb, pudp) __pud_free_tlb(tlb, pudp)
+#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
 
-#define pmd_free_tlb(tlb, pmdp) __pmd_free_tlb(tlb, pmdp)
+#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
 
 #define tlb_migrate_finish(mm) do {} while (0)
 
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dd14c54ac718..0e8c2a0fd922 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -46,7 +46,13 @@ static inline void pte_free(struct mm_struct *mm, struct page *pte)
 	__free_page(pte);
 }
 
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+				  unsigned long address)
+{
+	___pte_free_tlb(tlb, pte);
+}
 
 static inline void pmd_populate_kernel(struct mm_struct *mm,
 				       pmd_t *pmd, pte_t *pte)
@@ -78,7 +84,13 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	free_page((unsigned long)pmd);
 }
 
-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+				  unsigned long adddress)
+{
+	___pmd_free_tlb(tlb, pmd);
+}
 
 #ifdef CONFIG_X86_PAE
 extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
@@ -108,7 +120,14 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 	free_page((unsigned long)pud);
 }
 
-extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+				  unsigned long address)
+{
+	___pud_free_tlb(tlb, pud);
+}
+
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8e43bdd45456..af8f9650058c 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -25,7 +25,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
 	paravirt_release_pte(page_to_pfn(pte));
@@ -33,14 +33,14 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 }
 
 #if PAGETABLE_LEVELS > 2
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
 #if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 31c220faca02..0d766f9c1083 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -42,6 +42,6 @@
 
 #include <asm-generic/tlb.h>
 
-#define __pte_free_tlb(tlb, pte)		pte_free((tlb)->mm, pte)
+#define __pte_free_tlb(tlb, pte, address)	pte_free((tlb)->mm, pte)
 
 #endif	/* _XTENSA_TLB_H */
diff --git a/include/asm-generic/4level-fixup.h b/include/asm-generic/4level-fixup.h
index 9d40e879f99e..77ff547730af 100644
--- a/include/asm-generic/4level-fixup.h
+++ b/include/asm-generic/4level-fixup.h
@@ -27,9 +27,9 @@
 #define pud_page_vaddr(pud)		pgd_page_vaddr(pud)
 
 #undef pud_free_tlb
-#define pud_free_tlb(tlb, x)            do { } while (0)
+#define pud_free_tlb(tlb, x, addr)	do { } while (0)
 #define pud_free(mm, x)			do { } while (0)
-#define __pud_free_tlb(tlb, x)		do { } while (0)
+#define __pud_free_tlb(tlb, x, addr)	do { } while (0)
 
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)		(end)
diff --git a/include/asm-generic/pgtable-nopmd.h b/include/asm-generic/pgtable-nopmd.h
index a7cdc48e8b78..725612b793ce 100644
--- a/include/asm-generic/pgtable-nopmd.h
+++ b/include/asm-generic/pgtable-nopmd.h
@@ -59,7 +59,7 @@ static inline pmd_t * pmd_offset(pud_t * pud, unsigned long address)
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
 }
-#define __pmd_free_tlb(tlb, x)			do { } while (0)
+#define __pmd_free_tlb(tlb, x, a)		do { } while (0)
 
 #undef  pmd_addr_end
 #define pmd_addr_end(addr, end)			(end)
diff --git a/include/asm-generic/pgtable-nopud.h b/include/asm-generic/pgtable-nopud.h
index 87cf449a6df3..810431d8351b 100644
--- a/include/asm-generic/pgtable-nopud.h
+++ b/include/asm-generic/pgtable-nopud.h
@@ -52,7 +52,7 @@ static inline pud_t * pud_offset(pgd_t * pgd, unsigned long address)
  */
 #define pud_alloc_one(mm, address)		NULL
 #define pud_free(mm, x)				do { } while (0)
-#define __pud_free_tlb(tlb, x)			do { } while (0)
+#define __pud_free_tlb(tlb, x, a)		do { } while (0)
 
 #undef  pud_addr_end
 #define pud_addr_end(addr, end)			(end)
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f490e43a90b9..e43f9766259f 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -123,24 +123,24 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)
 
-#define pte_free_tlb(tlb, ptep)					\
+#define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pte_free_tlb(tlb, ptep);			\
+		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)
 
 #ifndef __ARCH_HAS_4LEVEL_HACK
-#define pud_free_tlb(tlb, pudp)					\
+#define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pud_free_tlb(tlb, pudp);			\
+		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
 
-#define pmd_free_tlb(tlb, pmdp)					\
+#define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
 		tlb->need_flush = 1;				\
-		__pmd_free_tlb(tlb, pmdp);			\
+		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)
 
 #define tlb_migrate_finish(mm) do {} while (0)
diff --git a/mm/memory.c b/mm/memory.c
index 65216194eb8d..aede2ce3aba4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -135,11 +135,12 @@ void pmd_clear_bad(pmd_t *pmd)
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
-static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd)
+static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+			   unsigned long addr)
 {
 	pgtable_t token = pmd_pgtable(*pmd);
 	pmd_clear(pmd);
-	pte_free_tlb(tlb, token);
+	pte_free_tlb(tlb, token, addr);
 	tlb->mm->nr_ptes--;
 }
 
@@ -157,7 +158,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 		next = pmd_addr_end(addr, end);
 		if (pmd_none_or_clear_bad(pmd))
 			continue;
-		free_pte_range(tlb, pmd);
+		free_pte_range(tlb, pmd, addr);
 	} while (pmd++, addr = next, addr != end);
 
 	start &= PUD_MASK;
@@ -173,7 +174,7 @@ static inline void free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
 
 	pmd = pmd_offset(pud, start);
 	pud_clear(pud);
-	pmd_free_tlb(tlb, pmd);
+	pmd_free_tlb(tlb, pmd, start);
 }
 
 static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
@@ -206,7 +207,7 @@ static inline void free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
 
 	pud = pud_offset(pgd, start);
 	pgd_clear(pgd);
-	pud_free_tlb(tlb, pud);
+	pud_free_tlb(tlb, pud, start);
 }
 
 /*
-- 
cgit v1.2.3


From 463d018323851a608eef52a9427b0585005c647f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 14 Jul 2009 00:33:35 +0200
Subject: cfg80211: make aware of net namespaces

In order to make cfg80211/nl80211 aware of network namespaces,
we have to do the following things:

 * del_virtual_intf method takes an interface index rather
   than a netdev pointer - simply change this

 * nl80211 uses init_net a lot, it changes to use the sender's
   network namespace

 * scan requests use the interface index, hold a netdev pointer
   and reference instead

 * we want a wiphy and its associated virtual interfaces to be
   in one netns together, so
    - we need to be able to change ns for a given interface, so
      export dev_change_net_namespace()
    - for each virtual interface set the NETIF_F_NETNS_LOCAL
      flag, and clear that flag only when the wiphy changes ns,
      to disallow breaking this invariant

 * when a network namespace goes away, we need to reparent the
   wiphy to init_net

 * cfg80211 users that support creating virtual interfaces must
   create them in the wiphy's namespace, currently this affects
   only mac80211

The end result is that you can now switch an entire wiphy into
a different network namespace with the new command
	iw phy#<idx> set netns <pid>
and all virtual interfaces will follow (or the operation fails).

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |   9 +++
 include/net/cfg80211.h  |  40 +++++++++-
 net/core/dev.c          |   1 +
 net/mac80211/cfg.c      |  14 +---
 net/wireless/core.c     |  75 ++++++++++++++++--
 net/wireless/core.h     |   5 +-
 net/wireless/nl80211.c  | 202 +++++++++++++++++++++++++++++++++---------------
 net/wireless/scan.c     |  22 ++----
 net/wireless/sme.c      |   3 +-
 9 files changed, 272 insertions(+), 99 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 962e2232a074..cb3dc6027fd9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -262,6 +262,9 @@
  *	reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
  *	%NL80211_ATTR_REASON_CODE attributes are used.
  *
+ * @NL80211_CMD_SET_WIPHY_NETNS: Set a wiphy's netns. Note that all devices
+ *	associated with this wiphy must be down and will follow.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -336,6 +339,8 @@ enum nl80211_commands {
 	NL80211_CMD_ROAM,
 	NL80211_CMD_DISCONNECT,
 
+	NL80211_CMD_SET_WIPHY_NETNS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -573,6 +578,8 @@ enum nl80211_commands {
  *	and join_ibss(), key information is in a nested attribute each
  *	with %NL80211_KEY_* sub-attributes
  *
+ * @NL80211_ATTR_PID: Process ID of a network namespace.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -701,6 +708,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_KEY,
 	NL80211_ATTR_KEYS,
 
+	NL80211_ATTR_PID,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a981ca8a5701..0d278777e39c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -542,7 +542,7 @@ struct cfg80211_ssid {
  * @ie: optional information element(s) to add into Probe Request or %NULL
  * @ie_len: length of ie in octets
  * @wiphy: the wiphy this was for
- * @ifidx: the interface index
+ * @dev: the interface
  */
 struct cfg80211_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -554,7 +554,7 @@ struct cfg80211_scan_request {
 
 	/* internal */
 	struct wiphy *wiphy;
-	int ifidx;
+	struct net_device *dev;
 	bool aborted;
 };
 
@@ -845,7 +845,8 @@ struct cfg80211_bitrate_mask {
  * @resume: wiphy device needs to be resumed
  *
  * @add_virtual_intf: create a new virtual interface with the given name,
- *	must set the struct wireless_dev's iftype.
+ *	must set the struct wireless_dev's iftype. Beware: You must create
+ *	the new netdev in the wiphy's network namespace!
  *
  * @del_virtual_intf: remove the virtual interface determined by ifindex.
  *
@@ -937,7 +938,7 @@ struct cfg80211_ops {
 	int	(*add_virtual_intf)(struct wiphy *wiphy, char *name,
 				    enum nl80211_iftype type, u32 *flags,
 				    struct vif_params *params);
-	int	(*del_virtual_intf)(struct wiphy *wiphy, int ifindex);
+	int	(*del_virtual_intf)(struct wiphy *wiphy, struct net_device *dev);
 	int	(*change_virtual_intf)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       enum nl80211_iftype type, u32 *flags,
@@ -1088,6 +1089,9 @@ struct cfg80211_ops {
  * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold);
  *	-1 = fragmentation disabled, only odd values >= 256 used
  * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled
+ * @net: the network namespace this wiphy currently lives in
+ * @netnsok: if set to false, do not allow changing the netns of this
+ *	wiphy at all
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1101,6 +1105,8 @@ struct wiphy {
 	bool custom_regulatory;
 	bool strict_regulatory;
 
+	bool netnsok;
+
 	enum cfg80211_signal_type signal_type;
 
 	int bss_priv_size;
@@ -1139,9 +1145,35 @@ struct wiphy {
 	/* dir in debugfs: ieee80211/<wiphyname> */
 	struct dentry *debugfsdir;
 
+#ifdef CONFIG_NET_NS
+	/* the network namespace this phy lives in currently */
+	struct net *_net;
+#endif
+
 	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
 };
 
+#ifdef CONFIG_NET_NS
+static inline struct net *wiphy_net(struct wiphy *wiphy)
+{
+	return wiphy->_net;
+}
+
+static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
+{
+	wiphy->_net = net;
+}
+#else
+static inline struct net *wiphy_net(struct wiphy *wiphy)
+{
+	return &init_net;
+}
+
+static inline void wiphy_net_set(struct wiphy *wiphy, struct net *net)
+{
+}
+#endif
+
 /**
  * wiphy_priv - return priv from wiphy
  *
diff --git a/net/core/dev.c b/net/core/dev.c
index d6c657ee413d..71347668c506 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5344,6 +5344,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 out:
 	return err;
 }
+EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 
 static int dev_cpu_callback(struct notifier_block *nfb,
 			    unsigned long action,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 52928ad90570..4bbf5007799b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -57,19 +57,9 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
 	return 0;
 }
 
-static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex)
+static int ieee80211_del_iface(struct wiphy *wiphy, struct net_device *dev)
 {
-	struct net_device *dev;
-	struct ieee80211_sub_if_data *sdata;
-
-	/* we're under RTNL */
-	dev = __dev_get_by_index(&init_net, ifindex);
-	if (!dev)
-		return -ENODEV;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	ieee80211_if_remove(sdata);
+	ieee80211_if_remove(IEEE80211_DEV_TO_SUB_IF(dev));
 
 	return 0;
 }
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 6891cd0e38d5..442c9f389799 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -106,7 +106,7 @@ __cfg80211_rdev_from_info(struct genl_info *info)
 
 	if (info->attrs[NL80211_ATTR_IFINDEX]) {
 		ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
-		dev = dev_get_by_index(&init_net, ifindex);
+		dev = dev_get_by_index(genl_info_net(info), ifindex);
 		if (dev) {
 			if (dev->ieee80211_ptr)
 				byifidx =
@@ -151,13 +151,13 @@ cfg80211_get_dev_from_info(struct genl_info *info)
 }
 
 struct cfg80211_registered_device *
-cfg80211_get_dev_from_ifindex(int ifindex)
+cfg80211_get_dev_from_ifindex(struct net *net, int ifindex)
 {
 	struct cfg80211_registered_device *rdev = ERR_PTR(-ENODEV);
 	struct net_device *dev;
 
 	mutex_lock(&cfg80211_mutex);
-	dev = dev_get_by_index(&init_net, ifindex);
+	dev = dev_get_by_index(net, ifindex);
 	if (!dev)
 		goto out;
 	if (dev->ieee80211_ptr) {
@@ -222,6 +222,42 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
 	return 0;
 }
 
+int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
+			  struct net *net)
+{
+	struct wireless_dev *wdev;
+	int err = 0;
+
+	if (!rdev->wiphy.netnsok)
+		return -EOPNOTSUPP;
+
+	list_for_each_entry(wdev, &rdev->netdev_list, list) {
+		wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+		err = dev_change_net_namespace(wdev->netdev, net, "wlan%d");
+		if (err)
+			break;
+		wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+	}
+
+	if (err) {
+		/* failed -- clean up to old netns */
+		net = wiphy_net(&rdev->wiphy);
+
+		list_for_each_entry_continue_reverse(wdev, &rdev->netdev_list,
+						     list) {
+			wdev->netdev->features &= ~NETIF_F_NETNS_LOCAL;
+			err = dev_change_net_namespace(wdev->netdev, net,
+							"wlan%d");
+			WARN_ON(err);
+			wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
+		}
+	}
+
+	wiphy_net_set(&rdev->wiphy, net);
+
+	return err;
+}
+
 static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
 {
 	struct cfg80211_registered_device *rdev = data;
@@ -375,6 +411,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.dev.class = &ieee80211_class;
 	rdev->wiphy.dev.platform_data = rdev;
 
+	wiphy_net_set(&rdev->wiphy, &init_net);
+
 	rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
 	rdev->rfkill = rfkill_alloc(dev_name(&rdev->wiphy.dev),
 				   &rdev->wiphy.dev, RFKILL_TYPE_WLAN,
@@ -615,6 +653,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		spin_lock_init(&wdev->event_lock);
 		mutex_lock(&rdev->devlist_mtx);
 		list_add(&wdev->list, &rdev->netdev_list);
+		/* can only change netns with wiphy */
+		dev->features |= NETIF_F_NETNS_LOCAL;
+
 		if (sysfs_create_link(&dev->dev.kobj, &rdev->wiphy.dev.kobj,
 				      "phy80211")) {
 			printk(KERN_ERR "wireless: failed to add phy80211 "
@@ -705,10 +746,32 @@ static struct notifier_block cfg80211_netdev_notifier = {
 	.notifier_call = cfg80211_netdev_notifier_call,
 };
 
-static int cfg80211_init(void)
+static void __net_exit cfg80211_pernet_exit(struct net *net)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rtnl_lock();
+	mutex_lock(&cfg80211_mutex);
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (net_eq(wiphy_net(&rdev->wiphy), net))
+			WARN_ON(cfg80211_switch_netns(rdev, &init_net));
+	}
+	mutex_unlock(&cfg80211_mutex);
+	rtnl_unlock();
+}
+
+static struct pernet_operations cfg80211_pernet_ops = {
+	.exit = cfg80211_pernet_exit,
+};
+
+static int __init cfg80211_init(void)
 {
 	int err;
 
+	err = register_pernet_device(&cfg80211_pernet_ops);
+	if (err)
+		goto out_fail_pernet;
+
 	err = wiphy_sysfs_init();
 	if (err)
 		goto out_fail_sysfs;
@@ -736,9 +799,10 @@ out_fail_nl80211:
 out_fail_notifier:
 	wiphy_sysfs_exit();
 out_fail_sysfs:
+	unregister_pernet_device(&cfg80211_pernet_ops);
+out_fail_pernet:
 	return err;
 }
-
 subsys_initcall(cfg80211_init);
 
 static void cfg80211_exit(void)
@@ -748,5 +812,6 @@ static void cfg80211_exit(void)
 	unregister_netdevice_notifier(&cfg80211_netdev_notifier);
 	wiphy_sysfs_exit();
 	regulatory_exit();
+	unregister_pernet_device(&cfg80211_pernet_ops);
 }
 module_exit(cfg80211_exit);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 2ec8ddbe57de..4276b70cd975 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -170,7 +170,10 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
 
 /* identical to cfg80211_get_dev_from_info but only operate on ifindex */
 extern struct cfg80211_registered_device *
-cfg80211_get_dev_from_ifindex(int ifindex);
+cfg80211_get_dev_from_ifindex(struct net *net, int ifindex);
+
+int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
+			  struct net *net);
 
 static inline void cfg80211_lock_rdev(struct cfg80211_registered_device *rdev)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index da450ef1fc7e..7880a9c4cdda 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14,8 +14,10 @@
 #include <linux/rtnetlink.h>
 #include <linux/netlink.h>
 #include <linux/etherdevice.h>
+#include <net/net_namespace.h>
 #include <net/genetlink.h>
 #include <net/cfg80211.h>
+#include <net/sock.h>
 #include "core.h"
 #include "nl80211.h"
 #include "reg.h"
@@ -27,24 +29,26 @@ static struct genl_family nl80211_fam = {
 	.hdrsize = 0,		/* no private header */
 	.version = 1,		/* no particular meaning now */
 	.maxattr = NL80211_ATTR_MAX,
+	.netnsok = true,
 };
 
 /* internal helper: get rdev and dev */
-static int get_rdev_dev_by_info_ifindex(struct nlattr **attrs,
+static int get_rdev_dev_by_info_ifindex(struct genl_info *info,
 				       struct cfg80211_registered_device **rdev,
 				       struct net_device **dev)
 {
+	struct nlattr **attrs = info->attrs;
 	int ifindex;
 
 	if (!attrs[NL80211_ATTR_IFINDEX])
 		return -EINVAL;
 
 	ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]);
-	*dev = dev_get_by_index(&init_net, ifindex);
+	*dev = dev_get_by_index(genl_info_net(info), ifindex);
 	if (!*dev)
 		return -ENODEV;
 
-	*rdev = cfg80211_get_dev_from_ifindex(ifindex);
+	*rdev = cfg80211_get_dev_from_ifindex(genl_info_net(info), ifindex);
 	if (IS_ERR(*rdev)) {
 		dev_put(*dev);
 		return PTR_ERR(*rdev);
@@ -133,6 +137,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
+	[NL80211_ATTR_PID] = { .type = NLA_U32 },
 };
 
 /* policy for the attributes */
@@ -532,6 +537,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(deauth, DEAUTHENTICATE);
 	CMD(disassoc, DISASSOCIATE);
 	CMD(join_ibss, JOIN_IBSS);
+	if (dev->wiphy.netnsok) {
+		i++;
+		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
+	}
 
 #undef CMD
 
@@ -562,6 +571,8 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
 
 	mutex_lock(&cfg80211_mutex);
 	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
+		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
+			continue;
 		if (++idx <= start)
 			continue;
 		if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid,
@@ -867,6 +878,8 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 
 	mutex_lock(&cfg80211_mutex);
 	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
+		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
+			continue;
 		if (wp_idx < wp_start) {
 			wp_idx++;
 			continue;
@@ -907,7 +920,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *netdev;
 	int err;
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &dev, &netdev);
+	err = get_rdev_dev_by_info_ifindex(info, &dev, &netdev);
 	if (err)
 		return err;
 
@@ -975,7 +988,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1098,26 +1111,25 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
 static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int ifindex, err;
+	int err;
 	struct net_device *dev;
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
-	ifindex = dev->ifindex;
-	dev_put(dev);
 
 	if (!rdev->ops->del_virtual_intf) {
 		err = -EOPNOTSUPP;
 		goto out;
 	}
 
-	err = rdev->ops->del_virtual_intf(&rdev->wiphy, ifindex);
+	err = rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
 
  out:
 	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
  unlock_rtnl:
 	rtnl_unlock();
 	return err;
@@ -1195,7 +1207,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1274,7 +1286,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1333,7 +1345,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1380,7 +1392,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1429,7 +1441,7 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1516,7 +1528,7 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -1726,13 +1738,13 @@ static int nl80211_dump_station(struct sk_buff *skb,
 
 	rtnl_lock();
 
-	netdev = __dev_get_by_index(&init_net, ifidx);
+	netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
 	if (!netdev) {
 		err = -ENODEV;
 		goto out_rtnl;
 	}
 
-	dev = cfg80211_get_dev_from_ifindex(ifidx);
+	dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
 	if (IS_ERR(dev)) {
 		err = PTR_ERR(dev);
 		goto out_rtnl;
@@ -1791,7 +1803,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -1829,14 +1841,16 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 /*
  * Get vlan interface making sure it is on the right wiphy.
  */
-static int get_vlan(struct nlattr *vlanattr,
+static int get_vlan(struct genl_info *info,
 		    struct cfg80211_registered_device *rdev,
 		    struct net_device **vlan)
 {
+	struct nlattr *vlanattr = info->attrs[NL80211_ATTR_STA_VLAN];
 	*vlan = NULL;
 
 	if (vlanattr) {
-		*vlan = dev_get_by_index(&init_net, nla_get_u32(vlanattr));
+		*vlan = dev_get_by_index(genl_info_net(info),
+					 nla_get_u32(vlanattr));
 		if (!*vlan)
 			return -ENODEV;
 		if (!(*vlan)->ieee80211_ptr)
@@ -1891,11 +1905,11 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
-	err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], rdev, &params.vlan);
+	err = get_vlan(info, rdev, &params.vlan);
 	if (err)
 		goto out;
 
@@ -2004,11 +2018,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
-	err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], rdev, &params.vlan);
+	err = get_vlan(info, rdev, &params.vlan);
 	if (err)
 		goto out;
 
@@ -2079,7 +2093,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2185,13 +2199,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
 
 	rtnl_lock();
 
-	netdev = __dev_get_by_index(&init_net, ifidx);
+	netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
 	if (!netdev) {
 		err = -ENODEV;
 		goto out_rtnl;
 	}
 
-	dev = cfg80211_get_dev_from_ifindex(ifidx);
+	dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
 	if (IS_ERR(dev)) {
 		err = PTR_ERR(dev);
 		goto out_rtnl;
@@ -2255,7 +2269,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2314,7 +2328,7 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2362,7 +2376,7 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2404,7 +2418,7 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2455,7 +2469,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2574,7 +2588,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
 	rtnl_lock();
 
 	/* Look up our device */
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2691,7 +2705,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -2947,7 +2961,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto out_rtnl;
 
@@ -3069,14 +3083,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		       request->ie_len);
 	}
 
-	request->ifidx = dev->ifindex;
+	request->dev = dev;
 	request->wiphy = &rdev->wiphy;
 
 	rdev->scan_req = request;
 	err = rdev->ops->scan(&rdev->wiphy, dev, request);
 
-	if (!err)
+	if (!err) {
 		nl80211_send_scan_start(rdev, dev);
+		dev_hold(dev);
+	}
 
  out_free:
 	if (err) {
@@ -3198,11 +3214,11 @@ static int nl80211_dump_scan(struct sk_buff *skb,
 		cb->args[0] = ifidx;
 	}
 
-	dev = dev_get_by_index(&init_net, ifidx);
+	dev = dev_get_by_index(sock_net(skb->sk), ifidx);
 	if (!dev)
 		return -ENODEV;
 
-	rdev = cfg80211_get_dev_from_ifindex(ifidx);
+	rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
 	if (IS_ERR(rdev)) {
 		err = PTR_ERR(rdev);
 		goto out_put_netdev;
@@ -3312,7 +3328,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3448,7 +3464,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3531,7 +3547,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3593,7 +3609,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3666,7 +3682,7 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3739,7 +3755,7 @@ static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -3924,7 +3940,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		return err;
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -4000,7 +4016,7 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
-	err = get_rdev_dev_by_info_ifindex(info->attrs, &rdev, &dev);
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
 	if (err)
 		goto unlock_rtnl;
 
@@ -4024,6 +4040,47 @@ unlock_rtnl:
 	return err;
 }
 
+static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net *net;
+	int err;
+	u32 pid;
+
+	if (!info->attrs[NL80211_ATTR_PID])
+		return -EINVAL;
+
+	pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+
+	rtnl_lock();
+
+	rdev = cfg80211_get_dev_from_info(info);
+	if (IS_ERR(rdev)) {
+		err = PTR_ERR(rdev);
+		goto out;
+	}
+
+	net = get_net_ns_by_pid(pid);
+	if (IS_ERR(net)) {
+		err = PTR_ERR(net);
+		goto out;
+	}
+
+	err = 0;
+
+	/* check if anything to do */
+	if (net_eq(wiphy_net(&rdev->wiphy), net))
+		goto out_put_net;
+
+	err = cfg80211_switch_netns(rdev, net);
+ out_put_net:
+	put_net(net);
+ out:
+	cfg80211_unlock_rdev(rdev);
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4257,6 +4314,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_WIPHY_NETNS,
+		.doit = nl80211_wiphy_netns,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
 	.name = "mlme",
@@ -4288,7 +4351,8 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_config_mcgrp.id, GFP_KERNEL);
 }
 
 static int nl80211_add_scan_req(struct sk_buff *msg,
@@ -4365,7 +4429,8 @@ void nl80211_send_scan_start(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
 void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
@@ -4383,7 +4448,8 @@ void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
 void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
@@ -4401,7 +4467,8 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_scan_mcgrp.id, GFP_KERNEL);
 }
 
 /*
@@ -4450,7 +4517,10 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL);
+	rtnl_lock();
+	genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id,
+				GFP_KERNEL);
+	rtnl_unlock();
 
 	return;
 
@@ -4486,7 +4556,8 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4553,7 +4624,8 @@ static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4611,7 +4683,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4651,7 +4724,8 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4691,7 +4765,8 @@ void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, GFP_KERNEL);
 	return;
 
  nla_put_failure:
@@ -4726,7 +4801,8 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4766,7 +4842,8 @@ void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
@@ -4819,7 +4896,10 @@ void nl80211_send_beacon_hint_event(struct wiphy *wiphy,
 		return;
 	}
 
-	genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_ATOMIC);
+	rcu_read_lock();
+	genlmsg_multicast_allns(msg, 0, nl80211_regulatory_mcgrp.id,
+				GFP_ATOMIC);
+	rcu_read_unlock();
 
 	return;
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index decc59fe0ee8..1b578b8cb1c9 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -32,9 +32,7 @@ void __cfg80211_scan_done(struct work_struct *wk)
 	mutex_lock(&rdev->mtx);
 	request = rdev->scan_req;
 
-	dev = dev_get_by_index(&init_net, request->ifidx);
-	if (!dev)
-		goto out;
+	dev = request->dev;
 
 	/*
 	 * This must be before sending the other events!
@@ -58,7 +56,6 @@ void __cfg80211_scan_done(struct work_struct *wk)
 
 	dev_put(dev);
 
- out:
 	cfg80211_unlock_rdev(rdev);
 	wiphy_to_dev(request->wiphy)->scan_req = NULL;
 	kfree(request);
@@ -66,17 +63,10 @@ void __cfg80211_scan_done(struct work_struct *wk)
 
 void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
 {
-	struct net_device *dev = dev_get_by_index(&init_net, request->ifidx);
-	if (WARN_ON(!dev)) {
-		kfree(request);
-		return;
-	}
-
 	WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
 
 	request->aborted = aborted;
 	schedule_work(&wiphy_to_dev(request->wiphy)->scan_done_wk);
-	dev_put(dev);
 }
 EXPORT_SYMBOL(cfg80211_scan_done);
 
@@ -592,7 +582,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
+	rdev = cfg80211_get_dev_from_ifindex(dev_net(dev), dev->ifindex);
 
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
@@ -617,7 +607,7 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	}
 
 	creq->wiphy = wiphy;
-	creq->ifidx = dev->ifindex;
+	creq->dev = dev;
 	creq->ssids = (void *)(creq + 1);
 	creq->channels = (void *)(creq->ssids + 1);
 	creq->n_channels = n_channels;
@@ -654,8 +644,10 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 	if (err) {
 		rdev->scan_req = NULL;
 		kfree(creq);
-	} else
+	} else {
 		nl80211_send_scan_start(rdev, dev);
+		dev_hold(dev);
+	}
  out:
 	cfg80211_unlock_rdev(rdev);
 	return err;
@@ -948,7 +940,7 @@ int cfg80211_wext_giwscan(struct net_device *dev,
 	if (!netif_running(dev))
 		return -ENETDOWN;
 
-	rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
+	rdev = cfg80211_get_dev_from_ifindex(dev_net(dev), dev->ifindex);
 
 	if (IS_ERR(rdev))
 		return PTR_ERR(rdev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 82de2d9795f4..a19741097989 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -86,7 +86,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 		wdev->conn->params.ssid_len);
 	request->ssids[0].ssid_len = wdev->conn->params.ssid_len;
 
-	request->ifidx = wdev->netdev->ifindex;
+	request->dev = wdev->netdev;
 	request->wiphy = &rdev->wiphy;
 
 	rdev->scan_req = request;
@@ -95,6 +95,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 	if (!err) {
 		wdev->conn->state = CFG80211_CONN_SCANNING;
 		nl80211_send_scan_start(rdev, wdev->netdev);
+		dev_hold(wdev->netdev);
 	} else {
 		rdev->scan_req = NULL;
 		kfree(request);
-- 
cgit v1.2.3


From d1eba248469272ae0618288bccf65b24d017f1d2 Mon Sep 17 00:00:00 2001
From: Sujith <Sujith.Manoharan@atheros.com>
Date: Thu, 23 Jul 2009 15:31:31 +0530
Subject: mac80211: Add a few 802.11n defines for AMPDU parameters

Signed-off-by: Sujith <Sujith.Manoharan@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 23343ab0bb03..21556a2d9e7e 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -802,6 +802,31 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_AMPDU_PARM_FACTOR		0x03
 #define IEEE80211_HT_AMPDU_PARM_DENSITY		0x1C
 
+/*
+ * Maximum length of AMPDU that the STA can receive.
+ * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets)
+ */
+enum ieee80211_max_ampdu_length_exp {
+	IEEE80211_HT_MAX_AMPDU_8K = 0,
+	IEEE80211_HT_MAX_AMPDU_16K = 1,
+	IEEE80211_HT_MAX_AMPDU_32K = 2,
+	IEEE80211_HT_MAX_AMPDU_64K = 3
+};
+
+#define IEEE80211_HT_MAX_AMPDU_FACTOR 13
+
+/* Minimum MPDU start spacing */
+enum ieee80211_min_mpdu_spacing {
+	IEEE80211_HT_MPDU_DENSITY_NONE = 0,	/* No restriction */
+	IEEE80211_HT_MPDU_DENSITY_0_25 = 1,	/* 1/4 usec */
+	IEEE80211_HT_MPDU_DENSITY_0_5 = 2,	/* 1/2 usec */
+	IEEE80211_HT_MPDU_DENSITY_1 = 3,	/* 1 usec */
+	IEEE80211_HT_MPDU_DENSITY_2 = 4,	/* 2 usec */
+	IEEE80211_HT_MPDU_DENSITY_4 = 5,	/* 4 usec */
+	IEEE80211_HT_MPDU_DENSITY_8 = 6,	/* 8 usec */
+	IEEE80211_HT_MPDU_DENSITY_16 = 7	/* 16 usec */
+};
+
 /**
  * struct ieee80211_ht_info - HT information
  *
-- 
cgit v1.2.3


From 3fa52056f3a8e755708241d5795e6d3e6f55ad85 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 24 Jul 2009 13:23:09 +0200
Subject: mac80211: fix PS-poll response, race

When a station queries us for a PS-poll response, we wrongly
queue the frame on the virtual interface's queue rather than
the pending queue.

Additionally, fix a race condition where we could potentially
send multiple frames to the sleeping station due to using a
station flag rather than a packet flag. When converting to a
packet flag, we can also convert p54 and remove the filter
clearing we added for it.

(Also remove a now dead function)

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reported-by: Bob Copeland <me@bobcopeland.com>
Tested-by: Bob Copeland <me@bobcopeland.com>
Cc: Christian Lamparter <chunkeey@web.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/p54/txrx.c |  2 +-
 include/net/mac80211.h          |  4 ++++
 net/mac80211/rx.c               | 11 ++++++-----
 net/mac80211/sta_info.h         | 13 -------------
 net/mac80211/tx.c               | 19 +------------------
 5 files changed, 12 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 0d589d68e547..c32a0d2fa1f7 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -614,7 +614,7 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb,
 	if (info->flags & IEEE80211_TX_CTL_ASSIGN_SEQ)
 		*flags |= P54_HDR_FLAG_DATA_OUT_SEQNR;
 
-	if (info->flags & IEEE80211_TX_CTL_CLEAR_PS_FILT)
+	if (info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE)
 		*flags |= P54_HDR_FLAG_DATA_OUT_NOCANCEL;
 
 	*queue = skb_get_queue_mapping(skb) + P54_QUEUE_DATA;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7dd67a1ff4d5..d4e09a06b4a2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -243,6 +243,9 @@ struct ieee80211_bss_conf {
  *	used to indicate that a frame was already retried due to PS
  * @IEEE80211_TX_INTFL_DONT_ENCRYPT: completely internal to mac80211,
  *	used to indicate frame should not be encrypted
+ * @IEEE80211_TX_CTL_PSPOLL_RESPONSE: (internal?)
+ *	This frame is a response to a PS-poll frame and should be sent
+ *	although the station is in powersave mode.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -262,6 +265,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_NEED_TXPROCESSING	= BIT(14),
 	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
 	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
+	IEEE80211_TX_CTL_PSPOLL_RESPONSE	= BIT(17),
 };
 
 /**
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index cb95a3116034..f195705146bd 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -783,7 +783,7 @@ static void ap_sta_ps_start(struct sta_info *sta)
 	struct ieee80211_local *local = sdata->local;
 
 	atomic_inc(&sdata->bss->num_sta_ps);
-	set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL);
+	set_sta_flags(sta, WLAN_STA_PS);
 	drv_sta_notify(local, &sdata->vif, STA_NOTIFY_SLEEP, &sta->sta);
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 	printk(KERN_DEBUG "%s: STA %pM aid %d enters power save mode\n",
@@ -799,7 +799,7 @@ static int ap_sta_ps_end(struct sta_info *sta)
 
 	atomic_dec(&sdata->bss->num_sta_ps);
 
-	clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL);
+	clear_sta_flags(sta, WLAN_STA_PS);
 	drv_sta_notify(local, &sdata->vif, STA_NOTIFY_AWAKE, &sta->sta);
 
 	if (!skb_queue_empty(&sta->ps_tx_buf))
@@ -1117,14 +1117,15 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		skb_queue_empty(&rx->sta->ps_tx_buf);
 
 	if (skb) {
+		struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 		struct ieee80211_hdr *hdr =
 			(struct ieee80211_hdr *) skb->data;
 
 		/*
-		 * Tell TX path to send one frame even though the STA may
+		 * Tell TX path to send this frame even though the STA may
 		 * still remain is PS mode after this frame exchange.
 		 */
-		set_sta_flags(rx->sta, WLAN_STA_PSPOLL);
+		info->flags |= IEEE80211_TX_CTL_PSPOLL_RESPONSE;
 
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %pM aid %d: PS Poll (entries after %d)\n",
@@ -1139,7 +1140,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
 		else
 			hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA);
 
-		dev_queue_xmit(skb);
+		ieee80211_add_pending_skb(rx->local, skb);
 
 		if (no_pending_pkts)
 			sta_info_clear_tim_bit(rx->sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 4ecf10a9bd00..ccc3adf962c7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -30,7 +30,6 @@
  * @WLAN_STA_ASSOC_AP: We're associated to that station, it is an AP.
  * @WLAN_STA_WME: Station is a QoS-STA.
  * @WLAN_STA_WDS: Station is one of our WDS peers.
- * @WLAN_STA_PSPOLL: Station has just PS-polled us.
  * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
  *	IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
  *	frame to this station is transmitted.
@@ -47,7 +46,6 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_ASSOC_AP	= 1<<5,
 	WLAN_STA_WME		= 1<<6,
 	WLAN_STA_WDS		= 1<<7,
-	WLAN_STA_PSPOLL		= 1<<8,
 	WLAN_STA_CLEAR_PS_FILT	= 1<<9,
 	WLAN_STA_MFP		= 1<<10,
 	WLAN_STA_SUSPEND	= 1<<11
@@ -359,17 +357,6 @@ static inline void clear_sta_flags(struct sta_info *sta, const u32 flags)
 	spin_unlock_irqrestore(&sta->flaglock, irqfl);
 }
 
-static inline void set_and_clear_sta_flags(struct sta_info *sta,
-					   const u32 set, const u32 clear)
-{
-	unsigned long irqfl;
-
-	spin_lock_irqsave(&sta->flaglock, irqfl);
-	sta->flags |= set;
-	sta->flags &= ~clear;
-	spin_unlock_irqrestore(&sta->flaglock, irqfl);
-}
-
 static inline u32 test_sta_flags(struct sta_info *sta, const u32 flags)
 {
 	u32 ret;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 70ff4f065665..edacad1fb1dc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -373,7 +373,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 	staflags = get_sta_flags(sta);
 
 	if (unlikely((staflags & WLAN_STA_PS) &&
-		     !(staflags & WLAN_STA_PSPOLL))) {
+		     !(info->flags & IEEE80211_TX_CTL_PSPOLL_RESPONSE))) {
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
 		printk(KERN_DEBUG "STA %pM aid %d: PS buffer (entries "
 		       "before %d)\n",
@@ -412,24 +412,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		       sta->sta.addr);
 	}
 #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */
-	if (test_and_clear_sta_flags(sta, WLAN_STA_PSPOLL)) {
-		/*
-		 * The sleeping station with pending data is now snoozing.
-		 * It queried us for its buffered frames and will go back
-		 * to deep sleep once it got everything.
-		 *
-		 * inform the driver, in case the hardware does powersave
-		 * frame filtering and keeps a station  blacklist on its own
-		 * (e.g: p54), so that frames can be delivered unimpeded.
-		 *
-		 * Note: It should be safe to disable the filter now.
-		 * As, it is really unlikely that we still have any pending
-		 * frame for this station in the hw's buffers/fifos left,
-		 * that is not rejected with a unsuccessful tx_status yet.
-		 */
 
-		info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
-	}
 	return TX_CONTINUE;
 }
 
-- 
cgit v1.2.3


From 7cb7f45c7feef43c8f71f5cfedfc0b19be2142f7 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 27 Jul 2009 18:42:38 -0400
Subject: Revert "ACPICA: Remove obsolete acpi_os_validate_address interface"

This reverts commit f9ca058430333c9a24c5ca926aa445125f88df18.

which caused a regression:

http://bugzilla.kernel.org/show_bug.cgi?id=13620

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acobject.h |  1 +
 drivers/acpi/acpica/dsopcode.c | 24 ++++++++++++++++++++++++
 drivers/acpi/acpica/exfldio.c  |  6 ++++++
 include/acpi/acpiosxf.h        |  4 ++++
 4 files changed, 35 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h
index 544dcf834922..eb6f038b03d9 100644
--- a/drivers/acpi/acpica/acobject.h
+++ b/drivers/acpi/acpica/acobject.h
@@ -97,6 +97,7 @@
 #define AOPOBJ_OBJECT_INITIALIZED   0x08
 #define AOPOBJ_SETUP_COMPLETE       0x10
 #define AOPOBJ_SINGLE_DATUM         0x20
+#define AOPOBJ_INVALID              0x40	/* Used if host OS won't allow an op_region address */
 
 /******************************************************************************
  *
diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c
index 584d766e6f12..b79978f7bc71 100644
--- a/drivers/acpi/acpica/dsopcode.c
+++ b/drivers/acpi/acpica/dsopcode.c
@@ -397,6 +397,30 @@ acpi_status acpi_ds_get_region_arguments(union acpi_operand_object *obj_desc)
 	status = acpi_ds_execute_arguments(node, acpi_ns_get_parent_node(node),
 					   extra_desc->extra.aml_length,
 					   extra_desc->extra.aml_start);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Validate the region address/length via the host OS */
+
+	status = acpi_os_validate_address(obj_desc->region.space_id,
+					  obj_desc->region.address,
+					  (acpi_size) obj_desc->region.length,
+					  acpi_ut_get_node_name(node));
+
+	if (ACPI_FAILURE(status)) {
+		/*
+		 * Invalid address/length. We will emit an error message and mark
+		 * the region as invalid, so that it will cause an additional error if
+		 * it is ever used. Then return AE_OK.
+		 */
+		ACPI_EXCEPTION((AE_INFO, status,
+				"During address validation of OpRegion [%4.4s]",
+				node->name.ascii));
+		obj_desc->common.flags |= AOPOBJ_INVALID;
+		status = AE_OK;
+	}
+
 	return_ACPI_STATUS(status);
 }
 
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index d4075b821021..6687be167f5f 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -113,6 +113,12 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
 		}
 	}
 
+	/* Exit if Address/Length have been disallowed by the host OS */
+
+	if (rgn_desc->common.flags & AOPOBJ_INVALID) {
+		return_ACPI_STATUS(AE_AML_ILLEGAL_ADDRESS);
+	}
+
 	/*
 	 * Exit now for SMBus address space, it has a non-linear address space
 	 * and the request cannot be directly validated
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 3e798593b17b..ab0b85cf21f3 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -242,6 +242,10 @@ acpi_os_derive_pci_id(acpi_handle rhandle,
 acpi_status acpi_os_validate_interface(char *interface);
 acpi_status acpi_osi_invalidate(char* interface);
 
+acpi_status
+acpi_os_validate_address(u8 space_id, acpi_physical_address address,
+			 acpi_size length, char *name);
+
 u64 acpi_os_get_timer(void);
 
 acpi_status acpi_os_signal(u32 function, void *info);
-- 
cgit v1.2.3


From 27fed4175acf81ddd91d9a4ee2fd298981f60295 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 27 Jul 2009 18:39:45 -0700
Subject: ip: fix logic of reverse path filter sysctl

Even though reverse path filter was changed from simple boolean to
trinary control, the loose mode only works if both all and device are
configured because of this logic error.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index acef2a770b6b..ad27c7da8798 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -82,7 +82,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
-#define IN_DEV_RPFILTER(in_dev)		IN_DEV_ANDCONF((in_dev), RP_FILTER)
+#define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_BOOTP_RELAY(in_dev)	IN_DEV_ANDCONF((in_dev), BOOTP_RELAY)
-- 
cgit v1.2.3


From 3885123da8335dc6b67387e5e626acbffc56f664 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:50 +0900
Subject: swiotlb: remove unused swiotlb_alloc_boot()

Nobody uses swiotlb_alloc_boot().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c | 5 -----
 include/linux/swiotlb.h       | 2 --
 lib/swiotlb.c                 | 7 +------
 3 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 6af96ee44200..0ac7cd524788 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,11 +13,6 @@
 
 int swiotlb __read_mostly;
 
-void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	return alloc_bootmem_low_pages(size);
-}
-
 void *swiotlb_alloc(unsigned order, unsigned long nslabs)
 {
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index cb1a6631b8f4..94db70444c17 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -14,7 +14,6 @@ struct scatterlist;
  */
 #define IO_TLB_SEGSIZE	128
 
-
 /*
  * log of the size of each IO TLB slab.  The number of slabs is command line
  * controllable.
@@ -24,7 +23,6 @@ struct scatterlist;
 extern void
 swiotlb_init(void);
 
-extern void *swiotlb_alloc_boot(size_t bytes, unsigned long nslabs);
 extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bffe6d7ef9d9..9edfdd442edc 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -114,11 +114,6 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void * __weak __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
-	return alloc_bootmem_low_pages(size);
-}
-
 void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
 {
 	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
@@ -189,7 +184,7 @@ swiotlb_init_with_default_size(size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
+	io_tlb_start = alloc_bootmem_low_pages(bytes);
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + bytes;
-- 
cgit v1.2.3


From bb52196be37ce154ddc50b1f39496146d181cbe7 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:51 +0900
Subject: swiotlb: remove unused swiotlb_alloc()

Nobody uses swiotlb_alloc().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c | 5 -----
 include/linux/swiotlb.h       | 2 --
 lib/swiotlb.c                 | 8 ++------
 3 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ac7cd524788..ea675cfe76fe 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -13,11 +13,6 @@
 
 int swiotlb __read_mostly;
 
-void *swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
 dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 94db70444c17..6bc50944040f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -23,8 +23,6 @@ struct scatterlist;
 extern void
 swiotlb_init(void);
 
-extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
-
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
 				      phys_addr_t address);
 extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 9edfdd442edc..3c4c21cdf43d 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -114,11 +114,6 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-
 dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
 {
 	return paddr;
@@ -240,7 +235,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-		io_tlb_start = swiotlb_alloc(order, io_tlb_nslabs);
+		io_tlb_start = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+							order);
 		if (io_tlb_start)
 			break;
 		order--;
-- 
cgit v1.2.3


From cf56e3f2e8a8d5b7bc719980869b0e7985c256f3 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:04:52 +0900
Subject: swiotlb: remove swiotlb_arch_range_needs_mapping

Nobody uses swiotlb_arch_range_needs_mapping().

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 arch/x86/kernel/pci-swiotlb.c |  5 -----
 include/linux/swiotlb.h       |  2 --
 lib/swiotlb.c                 | 15 ++-------------
 3 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index ea675cfe76fe..165bd7f93bb1 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -23,11 +23,6 @@ phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 	return baddr;
 }
 
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
-}
-
 static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 					dma_addr_t *dma_handle, gfp_t flags)
 {
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 6bc50944040f..a977da24f17c 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -28,8 +28,6 @@ extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
 extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
 				       dma_addr_t address);
 
-extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
-
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3c4c21cdf43d..dc1cd1f5369e 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -141,11 +141,6 @@ int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return 0;
-}
-
 static void swiotlb_print_info(unsigned long bytes)
 {
 	phys_addr_t pstart, pend;
@@ -312,11 +307,6 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 	return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
 }
 
-static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
-{
-	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
-}
-
 static int is_swiotlb_buffer(char *addr)
 {
 	return addr >= io_tlb_start && addr < io_tlb_end;
@@ -646,8 +636,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(dev, dev_addr, size) &&
-	    !range_needs_mapping(phys, size))
+	if (!address_needs_mapping(dev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
@@ -810,7 +799,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 		phys_addr_t paddr = sg_phys(sg);
 		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
 
-		if (range_needs_mapping(paddr, sg->length) ||
+		if (swiotlb_force ||
 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, sg_phys(sg),
 					       sg->length, dir);
-- 
cgit v1.2.3


From 8f2502fd8157632909ff335a3c628e7caeec5e03 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:05:00 +0900
Subject: remove is_buffer_dma_capable()

is_buffer_dma_capable() was replaced with dma_capable().

is_buffer_dma_capable() tells if a buffer is dma-capable or
not. However, it doesn't take a pointer to struct device so it doesn't
work for POWERPC.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 include/linux/dma-mapping.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 07dfd460d286..c0f6c3cd788c 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -98,11 +98,6 @@ static inline int is_device_dma_capable(struct device *dev)
 	return dev->dma_mask != NULL && *dev->dma_mask != DMA_MASK_NONE;
 }
 
-static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
-{
-	return addr + size <= mask;
-}
-
 #ifdef CONFIG_HAS_DMA
 #include <asm/dma-mapping.h>
 #else
-- 
cgit v1.2.3


From 862d196b27bd30a5ab8109d5cdb37980d81b1fe9 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 10 Jul 2009 10:05:02 +0900
Subject: swiotlb: use phys_to_dma and dma_to_phys

This converts swiotlb to use phys_to_dma and dma_to_phys instead of
swiotlb_phys_to_bus() and swiotlb_bus_to_phys().

swiotlb_phys_to_bus() and swiotlb_bus_to_phys() are not necessary so
this patch also removes them.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
---
 include/linux/swiotlb.h |  5 -----
 lib/swiotlb.c           | 22 ++++++----------------
 2 files changed, 6 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index a977da24f17c..73b1f1cec423 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -23,11 +23,6 @@ struct scatterlist;
 extern void
 swiotlb_init(void);
 
-extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
-				      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
-				       dma_addr_t address);
-
 extern void
 *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			dma_addr_t *dma_handle, gfp_t flags);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index a012d93792b7..9e2fe3e1804c 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -114,21 +114,11 @@ setup_io_tlb_npages(char *str)
 __setup("swiotlb=", setup_io_tlb_npages);
 /* make io_tlb_overflow tunable too? */
 
-dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
-	return paddr;
-}
-
-phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
-	return baddr;
-}
-
 /* Note that this doesn't work with highmem page */
 static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
 				      volatile void *address)
 {
-	return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
+	return phys_to_dma(hwdev, virt_to_phys(address));
 }
 
 static void swiotlb_print_info(unsigned long bytes)
@@ -567,7 +557,7 @@ void
 swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		      dma_addr_t dev_addr)
 {
-	phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	WARN_ON(irqs_disabled());
 	if (!is_swiotlb_buffer(paddr))
@@ -612,7 +602,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 			    struct dma_attrs *attrs)
 {
 	phys_addr_t phys = page_to_phys(page) + offset;
-	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
+	dma_addr_t dev_addr = phys_to_dma(dev, phys);
 	void *map;
 
 	BUG_ON(dir == DMA_NONE);
@@ -656,7 +646,7 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
 			 size_t size, int dir)
 {
-	phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
@@ -699,7 +689,7 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 		    size_t size, int dir, int target)
 {
-	phys_addr_t paddr = swiotlb_bus_to_phys(hwdev, dev_addr);
+	phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
 
@@ -788,7 +778,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 
 	for_each_sg(sgl, sg, nelems, i) {
 		phys_addr_t paddr = sg_phys(sg);
-		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
+		dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
 
 		if (swiotlb_force ||
 		    !dma_capable(hwdev, dev_addr, sg->length)) {
-- 
cgit v1.2.3


From 0c193054a4c1cf190d2f23e5e91bd14402e43912 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Mon, 27 Jul 2009 19:09:19 -0400
Subject: nfsd41: hange from page to memory based drc limits

NFSD_SLOT_CACHE_SIZE is the size of all encoded operation responses
(excluding the sequence operation) that we want to cache.

For now, keep NFSD_SLOT_CACHE_SIZE at PAGE_SIZE. It will be reduced
when the DRC is changed from page based to memory based.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 28 +++++++++++++---------------
 fs/nfsd/nfssvc.c           | 13 ++++++-------
 include/linux/nfsd/nfsd.h  |  4 ++--
 include/linux/nfsd/state.h |  1 +
 4 files changed, 22 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 70cba3fbfa6d..e2b11b1b515c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,31 +414,31 @@ gen_sessionid(struct nfsd4_session *ses)
 
 /*
  * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by sv_drc_max_pages.
+ * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
  *
- * If we run out of pages (sv_drc_pages_used == sv_drc_max_pages) we
- * should (up to a point) re-negotiate active sessions and reduce their
- * slot usage to make rooom for new connections. For now we just fail the
- * create session.
+ * If we run out of reserved DRC memory we should (up to a point) re-negotiate
+ * active sessions and reduce their slot usage to make rooom for new
+ * connections. For now we just fail the create session.
  */
 static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
 {
-	int np;
+	int mem;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
 	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
 		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	np = fchan->maxreqs * NFSD_PAGES_PER_SLOT;
+	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
 
 	spin_lock(&nfsd_drc_lock);
-	if (np + nfsd_drc_pages_used > nfsd_drc_max_pages)
-		np = nfsd_drc_max_pages - nfsd_drc_pages_used;
-	nfsd_drc_pages_used += np;
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
+		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
+				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = np / NFSD_PAGES_PER_SLOT;
+	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
 	return 0;
@@ -465,9 +465,7 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	/* Set the max response cached size our default which is
-	 * a multiple of PAGE_SIZE and small */
-	session_fchan->maxresp_cached = NFSD_PAGES_PER_SLOT * PAGE_SIZE;
+	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
 	fchan->maxresp_cached = session_fchan->maxresp_cached;
 
 	/* Use the client's maxops if possible */
@@ -585,7 +583,7 @@ free_session(struct kref *kref)
 		nfsd4_release_respages(e->ce_respages, e->ce_resused);
 	}
 	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_pages_used -= ses->se_fchannel.maxreqs * NFSD_PAGES_PER_SLOT;
+	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
 	kfree(ses);
 }
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9be2a1932f8a..5a280a9cb540 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -74,8 +74,8 @@ struct svc_serv 		*nfsd_serv;
  * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
  */
 spinlock_t	nfsd_drc_lock;
-unsigned int	nfsd_drc_max_pages;
-unsigned int	nfsd_drc_pages_used;
+unsigned int	nfsd_drc_max_mem;
+unsigned int	nfsd_drc_mem_used;
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
 static struct svc_stat	nfsd_acl_svcstats;
@@ -247,12 +247,11 @@ void nfsd_reset_versions(void)
 static void set_max_drc(void)
 {
 	#define NFSD_DRC_SIZE_SHIFT	10
-	nfsd_drc_max_pages = nr_free_buffer_pages()
-						>> NFSD_DRC_SIZE_SHIFT;
-	nfsd_drc_pages_used = 0;
+	nfsd_drc_max_mem = (nr_free_buffer_pages()
+					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
+	nfsd_drc_mem_used = 0;
 	spin_lock_init(&nfsd_drc_lock);
-	dprintk("%s nfsd_drc_max_pages %u\n", __func__,
-		nfsd_drc_max_pages);
+	dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
 }
 
 int nfsd_create_serv(void)
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2571f856908f..2812ed52669d 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -57,8 +57,8 @@ extern u32			nfsd_supported_minorversion;
 extern struct mutex		nfsd_mutex;
 extern struct svc_serv		*nfsd_serv;
 extern spinlock_t		nfsd_drc_lock;
-extern unsigned int		nfsd_drc_max_pages;
-extern unsigned int		nfsd_drc_pages_used;
+extern unsigned int		nfsd_drc_max_mem;
+extern unsigned int		nfsd_drc_mem_used;
 
 extern struct seq_operations nfs_exports_op;
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 57ab2ed08459..a6c87d623891 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -96,6 +96,7 @@ struct nfs4_cb_conn {
 #define NFSD_MAX_SLOTS_PER_SESSION	128
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
+#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
 
-- 
cgit v1.2.3


From 49557cc74c7bdf6a984be227ead9a84b3a26f053 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 23 Jul 2009 19:02:16 -0400
Subject: nfsd41: Use separate DRC for setclientid

Instead of trying to share the generic 4.1 reply cache code for the
CREATE_SESSION reply cache, it's simpler to handle CREATE_SESSION
separately.

The nfs41 single slot clientid DRC holds the results of create session
processing.  CREATE_SESSION can be preceeded by a SEQUENCE operation
(an embedded CREATE_SESSION) and the create session single slot cache must be
maintained.  nfsd4_replay_cache_entry() and nfsd4_store_cache_entry() do not
implement the replay of an embedded CREATE_SESSION.

The clientid DRC slot does not need the inuse, cachethis or other fields that
the multiple slot session cache uses.  Replace the clientid DRC cache struct
nfs4_slot cache with a new nfsd4_clid_slot cache.  Save the xdr struct
nfsd4_create_session into the cache at the end of processing, and on a replay,
replace the struct for the replay request with the cached version all while
under the state lock.

nfsd4_proc_compound will handle both the solo and embedded CREATE_SESSION case
via the normal use of encode_operation.

Errors that do not change the create session cache:
A create session NFS4ERR_STALE_CLIENTID error means that a client record
(and associated create session slot) could not be found and therefore can't
be changed.  NFSERR_SEQ_MISORDERED errors do not change the slot cache.

All other errors get cached.

Remove the clientid DRC specific check in nfs4svc_encode_compoundres to
put the session only if cstate.session is set which will now always be true.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4proc.c         |  2 +-
 fs/nfsd/nfs4state.c        | 64 +++++++++++++++++++++++++++-------------------
 fs/nfsd/nfs4xdr.c          |  3 +--
 include/linux/nfsd/state.h | 21 ++++++++++++++-
 include/linux/nfsd/xdr4.h  | 12 ---------
 5 files changed, 60 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d781658e8084..d606c6a427de 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1120,7 +1120,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 			BUG_ON(op->status == nfs_ok);
 
 encode_op:
-		/* Only from SEQUENCE or CREATE_SESSION */
+		/* Only from SEQUENCE */
 		if (resp->cstate.status == nfserr_replay_cache) {
 			dprintk("%s NFS4.1 replay from cache\n", __func__);
 			if (nfsd4_not_cached(resp))
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 99df8e7a687b..7729d092c8a5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -653,8 +653,6 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
-	nfsd4_release_respages(clp->cl_slot.sl_cache_entry.ce_respages,
-			     clp->cl_slot.sl_cache_entry.ce_resused);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1293,12 +1291,11 @@ out_copy:
 	exid->clientid.cl_boot = new->cl_clientid.cl_boot;
 	exid->clientid.cl_id = new->cl_clientid.cl_id;
 
-	new->cl_slot.sl_seqid = 0;
 	exid->seqid = 1;
 	nfsd4_set_ex_flags(new, exid);
 
 	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
-		new->cl_slot.sl_seqid, new->cl_exchange_flags);
+		new->cl_cs_slot.sl_seqid, new->cl_exchange_flags);
 	status = nfs_ok;
 
 out:
@@ -1334,15 +1331,35 @@ check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
 	return nfserr_seq_misordered;
 }
 
+/*
+ * Cache the create session result into the create session single DRC
+ * slot cache by saving the xdr structure. sl_seqid has been set.
+ * Do this for solo or embedded create session operations.
+ */
+static void
+nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
+			   struct nfsd4_clid_slot *slot, int nfserr)
+{
+	slot->sl_status = nfserr;
+	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
+}
+
+static __be32
+nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
+			    struct nfsd4_clid_slot *slot)
+{
+	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
+	return slot->sl_status;
+}
+
 __be32
 nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
 	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
 	struct nfs4_client *conf, *unconf;
-	struct nfsd4_slot *slot = NULL;
+	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
 
 	nfs4_lock_state();
@@ -1350,25 +1367,22 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	conf = find_confirmed_client(&cr_ses->clientid);
 
 	if (conf) {
-		slot = &conf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &conf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status == nfserr_replay_cache) {
 			dprintk("Got a create_session replay! seqid= %d\n",
-				slot->sl_seqid);
-			cstate->slot = slot;
-			cstate->status = status;
+				cs_slot->sl_seqid);
 			/* Return the cached reply status */
-			status = nfsd4_replay_cache_entry(resp, NULL);
+			status = nfsd4_replay_create_session(cr_ses, cs_slot);
 			goto out;
-		} else if (cr_ses->seqid != conf->cl_slot.sl_seqid + 1) {
+		} else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
 			status = nfserr_seq_misordered;
 			dprintk("Sequence misordered!\n");
 			dprintk("Expected seqid= %d but got seqid= %d\n",
-				slot->sl_seqid, cr_ses->seqid);
+				cs_slot->sl_seqid, cr_ses->seqid);
 			goto out;
 		}
-		conf->cl_slot.sl_seqid++;
+		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
 		    (ip_addr != unconf->cl_addr)) {
@@ -1376,16 +1390,15 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			goto out;
 		}
 
-		slot = &unconf->cl_slot;
-		status = check_slot_seqid(cr_ses->seqid, slot->sl_seqid,
-					  slot->sl_inuse);
+		cs_slot = &unconf->cl_cs_slot;
+		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
 		if (status) {
 			/* an unconfirmed replay returns misordered */
 			status = nfserr_seq_misordered;
-			goto out;
+			goto out_cache;
 		}
 
-		slot->sl_seqid++; /* from 0 to 1 */
+		cs_slot->sl_seqid++; /* from 0 to 1 */
 		move_to_confirmed(unconf);
 
 		/*
@@ -1406,12 +1419,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
 	memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data,
 	       NFS4_MAX_SESSIONID_LEN);
-	cr_ses->seqid = slot->sl_seqid;
+	cr_ses->seqid = cs_slot->sl_seqid;
 
-	slot->sl_inuse = true;
-	cstate->slot = slot;
-	/* Ensure a page is used for the cache */
-	slot->sl_cache_entry.ce_cachethis = 1;
+out_cache:
+	/* cache solo and embedded create sessions under the state lock */
+	nfsd4_cache_create_session(cr_ses, cs_slot, status);
 out:
 	nfs4_unlock_state();
 	dprintk("%s returns %d\n", __func__, ntohl(status));
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2dcc7feaa6ff..fdf632bf1cfe 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3313,8 +3313,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
 			resp->cstate.slot->sl_inuse = 0;
 		}
-		if (resp->cstate.session)
-			nfsd4_put_session(resp->cstate.session);
+		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
 }
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index a6c87d623891..58bb19784e12 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -127,6 +127,25 @@ struct nfsd4_channel_attrs {
 	u32		rdma_attrs;
 };
 
+struct nfsd4_create_session {
+	clientid_t			clientid;
+	struct nfs4_sessionid		sessionid;
+	u32				seqid;
+	u32				flags;
+	struct nfsd4_channel_attrs	fore_channel;
+	struct nfsd4_channel_attrs	back_channel;
+	u32				callback_prog;
+	u32				uid;
+	u32				gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+	u32				sl_seqid;
+	__be32				sl_status;
+	struct nfsd4_create_session	sl_cr_ses;
+};
+
 struct nfsd4_session {
 	struct kref		se_ref;
 	struct list_head	se_hash;	/* hash by sessionid */
@@ -193,7 +212,7 @@ struct nfs4_client {
 
 	/* for nfs41 */
 	struct list_head	cl_sessions;
-	struct nfsd4_slot	cl_slot;	/* create_session slot */
+	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
 };
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 2bacf7535069..5e4beb0deb80 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -366,18 +366,6 @@ struct nfsd4_exchange_id {
 	int		spa_how;
 };
 
-struct nfsd4_create_session {
-	clientid_t		clientid;
-	struct nfs4_sessionid	sessionid;
-	u32			seqid;
-	u32			flags;
-	struct nfsd4_channel_attrs fore_channel;
-	struct nfsd4_channel_attrs back_channel;
-	u32			callback_prog;
-	u32			uid;
-	u32			gid;
-};
-
 struct nfsd4_sequence {
 	struct nfs4_sessionid	sessionid;		/* request/response */
 	u32			seqid;			/* request/response */
-- 
cgit v1.2.3


From 5920dadfb4aec6c1372c5570e71bcd3b4837e63c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 15 Jul 2009 17:11:41 +0900
Subject: libata: accept late unlocking of HPA

On certain configurations, HPA isn't or can't be unlocked during
probing but it somehow ends up unlocked afterwards.  In the following
thread, the problem can be reliably reproduced after resuming from
STR.  The BIOS turns on HPA during boot but forgets to do it during
resume.

  http://thread.gmane.org/gmane.linux.kernel/858310

This patch updates libata revalidation such that it considers native
n_sectors.  If the device size has increased to match native
n_sectors, it's assumed that HPA has been unlocked involuntarily and
the device is recognized as the same one.  This should be fairly safe
while nicely working around the problem.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christof Warlich <christof@warlich.name>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 30 +++++++++++++++++++++++-------
 include/linux/libata.h    |  1 +
 2 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 2c6aedaef718..8ac98ff16d7d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1515,6 +1515,7 @@ static int ata_hpa_resize(struct ata_device *dev)
 
 		return rc;
 	}
+	dev->n_native_sectors = native_sectors;
 
 	/* nothing to do? */
 	if (native_sectors <= sectors || !ata_ignore_hpa) {
@@ -4099,6 +4100,7 @@ int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 		       unsigned int readid_flags)
 {
 	u64 n_sectors = dev->n_sectors;
+	u64 n_native_sectors = dev->n_native_sectors;
 	int rc;
 
 	if (!ata_dev_enabled(dev))
@@ -4128,16 +4130,30 @@ int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 	/* verify n_sectors hasn't changed */
 	if (dev->class == ATA_DEV_ATA && n_sectors &&
 	    dev->n_sectors != n_sectors) {
-		ata_dev_printk(dev, KERN_INFO, "n_sectors mismatch "
+		ata_dev_printk(dev, KERN_WARNING, "n_sectors mismatch "
 			       "%llu != %llu\n",
 			       (unsigned long long)n_sectors,
 			       (unsigned long long)dev->n_sectors);
-
-		/* restore original n_sectors */
-		dev->n_sectors = n_sectors;
-
-		rc = -ENODEV;
-		goto fail;
+		/*
+		 * Something could have caused HPA to be unlocked
+		 * involuntarily.  If n_native_sectors hasn't changed
+		 * and the new size matches it, keep the device.
+		 */
+		if (dev->n_native_sectors == n_native_sectors &&
+		    dev->n_sectors > n_sectors &&
+		    dev->n_sectors == n_native_sectors) {
+			ata_dev_printk(dev, KERN_WARNING,
+				       "new n_sectors matches native, probably "
+				       "late HPA unlock, continuing\n");
+			/* keep using the old n_sectors */
+			dev->n_sectors = n_sectors;
+		} else {
+			/* restore original n_[native]_sectors and fail */
+			dev->n_native_sectors = n_native_sectors;
+			dev->n_sectors = n_sectors;
+			rc = -ENODEV;
+			goto fail;
+		}
 	}
 
 	return 0;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 79b6d7fd4ac2..e5b6e33c6571 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -589,6 +589,7 @@ struct ata_device {
 #endif
 	/* n_sector is CLEAR_BEGIN, read comment above CLEAR_BEGIN */
 	u64			n_sectors;	/* size of device, if ATA */
+	u64			n_native_sectors; /* native size, if ATA */
 	unsigned int		class;		/* ATA_DEV_xxx */
 	unsigned long		unpark_deadline;
 
-- 
cgit v1.2.3


From e024e11070a0a0dc7163ce1ec2da354a638bdbed Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Wed, 24 Jun 2009 09:48:08 +1000
Subject: drm/radeon/kms: add initial colortiling support.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds new set/get tiling interfaces where the pitch
and macro/micro tiling enables can be set. Along with
a flag to decide if this object should have a surface when mapped.

The only thing we need to allocate with a mapped surface should be
the frontbuffer. Note rotate scanout shouldn't require one, and
back/depth shouldn't either, though mesa needs some fixes.

It fixes the TTM interfaces along Thomas's suggestions, and I've tested
the surface stealing code with two X servers and not seen any lockdep issues.

I've stopped tiling the fbcon frontbuffer, as I don't see there being
any advantage other than testing, I've left the testing commands in there,
just flip the fb_tiled to true in radeon_fb.c

Open: Can we integrate endian swapping in with this?

Future features:
texture tiling - need to relocate texture registers TXOFFSET* with tiling info.

This also merges Michel's cleanup surfaces regs at init time patch
even though it makes sense on its own, this patch really relies on it.

Some PowerMac firmwares set up a tiling surface at the beginning of VRAM
which messes us up otherwise.
that patch is:
Signed-off-by: Michel Dänzer <daenzer@vmware.com>

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/atombios_crtc.c      |  11 ++-
 drivers/gpu/drm/radeon/r100.c               |  79 ++++++++++++++++-
 drivers/gpu/drm/radeon/r300.c               |  51 ++++++++++-
 drivers/gpu/drm/radeon/r300_reg.h           |   4 +-
 drivers/gpu/drm/radeon/radeon.h             |  32 ++++++-
 drivers/gpu/drm/radeon/radeon_asic.h        |  19 ++++
 drivers/gpu/drm/radeon/radeon_device.c      |   2 +
 drivers/gpu/drm/radeon/radeon_fb.c          |  12 ++-
 drivers/gpu/drm/radeon/radeon_gem.c         |  41 +++++++++
 drivers/gpu/drm/radeon/radeon_kms.c         |   2 +
 drivers/gpu/drm/radeon/radeon_legacy_crtc.c |  15 ++--
 drivers/gpu/drm/radeon/radeon_object.c      | 130 ++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_ttm.c         |   2 +
 drivers/gpu/drm/ttm/ttm_bo.c                |   5 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c             |   3 +
 include/drm/radeon_drm.h                    |  23 ++++-
 include/drm/ttm/ttm_bo_driver.h             |  15 ++++
 17 files changed, 427 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e64a199b5ee1..eac26cdb5dae 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -327,7 +327,7 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	struct drm_gem_object *obj;
 	struct drm_radeon_gem_object *obj_priv;
 	uint64_t fb_location;
-	uint32_t fb_format, fb_pitch_pixels;
+	uint32_t fb_format, fb_pitch_pixels, tiling_flags;
 
 	if (!crtc->fb)
 		return -EINVAL;
@@ -364,7 +364,14 @@ int atombios_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		return -EINVAL;
 	}
 
-	/* TODO tiling */
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MACRO)
+		fb_format |= AVIVO_D1GRPH_MACRO_ADDRESS_MODE;
+
+	if (tiling_flags & RADEON_TILING_MICRO)
+		fb_format |= AVIVO_D1GRPH_TILED;
+
 	if (radeon_crtc->crtc_id == 0)
 		WREG32(AVIVO_D1VGA_CONTROL, 0);
 	else
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 0d05909f03f6..69bd7cb59972 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -909,6 +909,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 	unsigned idx;
 	bool onereg;
 	int r;
+	u32 tile_flags = 0;
 
 	ib = p->ib->ptr;
 	ib_chunk = &p->chunks[p->chunk_ib_idx];
@@ -942,7 +943,20 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			tmp = ib_chunk->kdata[idx] & 0x003fffff;
 			tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-			ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_DST_TILE_MACRO;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+				if (reg == RADEON_SRC_PITCH_OFFSET) {
+					DRM_ERROR("Cannot src blit from microtiled surface\n");
+					r100_cs_dump_packet(p, pkt);
+					return -EINVAL;
+				}
+				tile_flags |= RADEON_DST_TILE_MICRO;
+			}
+
+			tmp |= tile_flags;
+			ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 			break;
 		case RADEON_RB3D_DEPTHOFFSET:
 		case RADEON_RB3D_COLOROFFSET:
@@ -987,6 +1001,25 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			}
 			ib[idx] = ib_chunk->kdata[idx] + ((u32)reloc->lobj.gpu_offset);
 			break;
+		case R300_RB3D_COLORPITCH0:
+		case RADEON_RB3D_COLORPITCH:
+			r = r100_cs_packet_next_reloc(p, &reloc);
+			if (r) {
+				DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+					  idx, reg);
+				r100_cs_dump_packet(p, pkt);
+				return r;
+			}
+
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+				tile_flags |= RADEON_COLOR_TILE_ENABLE;
+			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;
+
+			tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+			tmp |= tile_flags;
+			ib[idx] = tmp;
+			break;
 		default:
 			/* FIXME: we don't want to allow anyothers packet */
 			break;
@@ -1707,3 +1740,47 @@ int r100_debugfs_mc_info_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size)
+{
+	int surf_index = reg * 16;
+	int flags = 0;
+
+	/* r100/r200 divide by 16 */
+	if (rdev->family < CHIP_R300)
+		flags = pitch / 16;
+	else
+		flags = pitch / 8;
+
+	if (rdev->family <= CHIP_RS200) {
+		if ((tiling_flags & (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+				 == (RADEON_TILING_MACRO|RADEON_TILING_MICRO))
+			flags |= RADEON_SURF_TILE_COLOR_BOTH;
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= RADEON_SURF_TILE_COLOR_MACRO;
+	} else if (rdev->family <= CHIP_RV280) {
+		if (tiling_flags & (RADEON_TILING_MACRO))
+			flags |= R200_SURF_TILE_COLOR_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R200_SURF_TILE_COLOR_MICRO;
+	} else {
+		if (tiling_flags & RADEON_TILING_MACRO)
+			flags |= R300_SURF_TILE_MACRO;
+		if (tiling_flags & RADEON_TILING_MICRO)
+			flags |= R300_SURF_TILE_MICRO;
+	}
+
+	DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
+	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
+	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
+	WREG32(RADEON_SURFACE0_UPPER_BOUND + surf_index, offset + obj_size - 1);
+	return 0;
+}
+
+void r100_clear_surface_reg(struct radeon_device *rdev, int reg)
+{
+	int surf_index = reg * 16;
+	WREG32(RADEON_SURFACE0_INFO + surf_index, 0);
+}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 0e0e094da503..28e5777658be 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -30,6 +30,7 @@
 #include "drm.h"
 #include "radeon_reg.h"
 #include "radeon.h"
+#include "radeon_drm.h"
 
 /* r300,r350,rv350,rv370,rv380 depends on : */
 void r100_hdp_reset(struct radeon_device *rdev);
@@ -1023,7 +1024,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	struct radeon_cs_reloc *reloc;
 	struct r300_cs_track *track;
 	volatile uint32_t *ib;
-	uint32_t tmp;
+	uint32_t tmp, tile_flags = 0;
 	unsigned i;
 	int r;
 
@@ -1052,7 +1053,19 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		}
 		tmp = ib_chunk->kdata[idx] & 0x003fffff;
 		tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-		ib[idx] = (ib_chunk->kdata[idx] & 0xffc00000) | tmp;
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= RADEON_DST_TILE_MACRO;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			if (reg == RADEON_SRC_PITCH_OFFSET) {
+				DRM_ERROR("Cannot src blit from microtiled surface\n");
+				r100_cs_dump_packet(p, pkt);
+				return -EINVAL;
+			}
+			tile_flags |= RADEON_DST_TILE_MICRO;
+		}
+		tmp |= tile_flags;
+		ib[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
 		break;
 	case R300_RB3D_COLOROFFSET0:
 	case R300_RB3D_COLOROFFSET1:
@@ -1141,6 +1154,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		/* RB3D_COLORPITCH1 */
 		/* RB3D_COLORPITCH2 */
 		/* RB3D_COLORPITCH3 */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_COLOR_TILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_COLOR_MICROTILE_ENABLE;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		i = (reg - 0x4E38) >> 2;
 		track->cb[i].pitch = ib_chunk->kdata[idx] & 0x3FFE;
 		switch (((ib_chunk->kdata[idx] >> 21) & 0xF)) {
@@ -1196,6 +1226,23 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		break;
 	case 0x4F24:
 		/* ZB_DEPTHPITCH */
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+				  idx, reg);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= R300_DEPTHMACROTILE_ENABLE;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			tile_flags |= R300_DEPTHMICROTILE_TILED;;
+
+		tmp = ib_chunk->kdata[idx] & ~(0x7 << 16);
+		tmp |= tile_flags;
+		ib[idx] = tmp;
+
 		track->zb.pitch = ib_chunk->kdata[idx] & 0x3FFC;
 		break;
 	case 0x4104:
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index 70f48609515e..4b7afef35a65 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -27,7 +27,9 @@
 #ifndef _R300_REG_H_
 #define _R300_REG_H_
 
-
+#define R300_SURF_TILE_MACRO (1<<16)
+#define R300_SURF_TILE_MICRO (2<<16)
+#define R300_SURF_TILE_BOTH (3<<16)
 
 
 #define R300_MC_INIT_MISC_LAT_TIMER	0x180
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 7f007185e7f7..af12a2fe3221 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -201,6 +201,14 @@ int radeon_fence_wait_last(struct radeon_device *rdev);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 
+/*
+ * Tiling registers
+ */
+struct radeon_surface_reg {
+	struct radeon_object *robj;
+};
+
+#define RADEON_GEM_MAX_SURFACES 8
 
 /*
  * Radeon buffer.
@@ -213,6 +221,7 @@ struct radeon_object_list {
 	uint64_t		gpu_offset;
 	unsigned		rdomain;
 	unsigned		wdomain;
+	uint32_t                tiling_flags;
 };
 
 int radeon_object_init(struct radeon_device *rdev);
@@ -242,8 +251,15 @@ void radeon_object_list_clean(struct list_head *head);
 int radeon_object_fbdev_mmap(struct radeon_object *robj,
 			     struct vm_area_struct *vma);
 unsigned long radeon_object_size(struct radeon_object *robj);
-
-
+void radeon_object_clear_surface_reg(struct radeon_object *robj);
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop);
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch);
+void radeon_object_get_tiling_flags(struct radeon_object *robj, uint32_t *tiling_flags, uint32_t *pitch);
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			   struct ttm_mem_reg *mem);
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 /*
  * GEM objects.
  */
@@ -535,6 +551,11 @@ struct radeon_asic {
 	void (*set_memory_clock)(struct radeon_device *rdev, uint32_t mem_clock);
 	void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 	void (*set_clock_gating)(struct radeon_device *rdev, int enable);
+
+	int (*set_surface_reg)(struct radeon_device *rdev, int reg,
+			       uint32_t tiling_flags, uint32_t pitch,
+			       uint32_t offset, uint32_t obj_size);
+	int (*clear_surface_reg)(struct radeon_device *rdev, int reg);
 };
 
 union radeon_asic_config {
@@ -568,6 +589,10 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp);
 
 
 /*
@@ -627,6 +652,7 @@ struct radeon_device {
 	bool				shutdown;
 	bool				suspend;
 	bool				need_dma32;
+	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -801,5 +827,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_set_memory_clock(rdev, e) (rdev)->asic->set_engine_clock((rdev), (e))
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->set_clock_gating((rdev), (e))
+#define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->set_surface_reg((rdev), (r), (f), (p), (o), (s)))
+#define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->clear_surface_reg((rdev), (r)))
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index e2e567395df8..dd903d329406 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -71,6 +71,10 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t dst_offset,
 		   unsigned num_pages,
 		   struct radeon_fence *fence);
+int r100_set_surface_reg(struct radeon_device *rdev, int reg,
+			 uint32_t tiling_flags, uint32_t pitch,
+			 uint32_t offset, uint32_t obj_size);
+int r100_clear_surface_reg(struct radeon_device *rdev, int reg);
 
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
@@ -100,6 +104,8 @@ static struct radeon_asic r100_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -128,6 +134,7 @@ int r300_copy_dma(struct radeon_device *rdev,
 		  uint64_t dst_offset,
 		  unsigned num_pages,
 		  struct radeon_fence *fence);
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
@@ -156,6 +163,8 @@ static struct radeon_asic r300_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 /*
@@ -193,6 +202,8 @@ static struct radeon_asic r420_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -237,6 +248,8 @@ static struct radeon_asic rs400_asic = {
 	.set_memory_clock = NULL,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_legacy_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -322,6 +335,8 @@ static struct radeon_asic rs690_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = NULL,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -367,6 +382,8 @@ static struct radeon_asic rv515_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 
@@ -405,6 +422,8 @@ static struct radeon_asic r520_asic = {
 	.set_memory_clock = &radeon_atom_set_memory_clock,
 	.set_pcie_lanes = &rv370_set_pcie_lanes,
 	.set_clock_gating = &radeon_atom_set_clock_gating,
+	.set_surface_reg = r100_set_surface_reg,
+	.clear_surface_reg = r100_clear_surface_reg,
 };
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index cdef6eb01baf..f23083bbba3f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -48,6 +48,8 @@ static void radeon_surface_init(struct radeon_device *rdev)
 			       i * (RADEON_SURFACE1_INFO - RADEON_SURFACE0_INFO),
 			       0);
 		}
+		/* enable surfaces */
+		WREG32(RADEON_SURFACE_CNTL, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 260870a29d83..36d2f5588f20 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -471,10 +471,10 @@ static struct notifier_block paniced = {
 	.notifier_call = radeonfb_panic,
 };
 
-static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp)
+static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
-	int align_large = (ASIC_IS_AVIVO(rdev));
+	int align_large = (ASIC_IS_AVIVO(rdev)) || tiled;
 	int pitch_mask = 0;
 
 	switch (bpp / 8) {
@@ -512,12 +512,13 @@ int radeonfb_create(struct radeon_device *rdev,
 	u64 fb_gpuaddr;
 	void *fbptr = NULL;
 	unsigned long tmp;
+	bool fb_tiled = false; /* useful for testing */
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 	mode_cmd.bpp = 32;
 	/* need to align pitch with crtc limits */
-	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp) * ((mode_cmd.bpp + 1) / 8);
+	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
 	mode_cmd.depth = 24;
 
 	size = mode_cmd.pitch * mode_cmd.height;
@@ -535,6 +536,8 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	robj = gobj->driver_private;
 
+	if (fb_tiled)
+		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -563,6 +566,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	}
 	rfbdev = info->par;
 
+	if (fb_tiled)
+		radeon_object_check_tiling(robj, 0, 0);
+
 	ret = radeon_object_kmap(robj, &fbptr);
 	if (ret) {
 		goto out_unref;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index eb516034235d..12542087b298 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -285,3 +285,44 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 	return r;
 }
+
+int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_set_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("%d \n", args->handle);
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_set_tiling_flags(robj, args->tiling_flags, args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
+
+int radeon_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct drm_radeon_gem_get_tiling *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r = 0;
+
+	DRM_DEBUG("\n");
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL)
+		return -EINVAL;
+	robj = gobj->driver_private;
+	radeon_object_get_tiling_flags(robj, &args->tiling_flags,
+				       &args->pitch);
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 4612a7c146d1..937a2f1cdb46 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -291,5 +291,7 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_CS, radeon_cs_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 14c1a5107fc9..0613790e2a5b 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -235,6 +235,7 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 	uint64_t base;
 	uint32_t crtc_offset, crtc_offset_cntl, crtc_tile_x0_y0 = 0;
 	uint32_t crtc_pitch, pitch_pixels;
+	uint32_t tiling_flags;
 
 	DRM_DEBUG("\n");
 
@@ -258,8 +259,12 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 		       (crtc->fb->bits_per_pixel * 8));
 	crtc_pitch |= crtc_pitch << 16;
 
-	/* TODO tiling */
-	if (0) {
+	radeon_object_get_tiling_flags(obj->driver_private,
+				       &tiling_flags, NULL);
+	if (tiling_flags & RADEON_TILING_MICRO)
+		DRM_ERROR("trying to scanout microtiled buffer\n");
+
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev))
 			crtc_offset_cntl |= (R300_CRTC_X_Y_MODE_EN |
 					     R300_CRTC_MICRO_TILE_BUFFER_DIS |
@@ -275,15 +280,13 @@ int radeon_crtc_set_base(struct drm_crtc *crtc, int x, int y,
 			crtc_offset_cntl &= ~RADEON_CRTC_TILE_EN;
 	}
 
-
-	/* TODO more tiling */
-	if (0) {
+	if (tiling_flags & RADEON_TILING_MACRO) {
 		if (ASIC_IS_R300(rdev)) {
 			crtc_tile_x0_y0 = x | (y << 16);
 			base &= ~0x7ff;
 		} else {
 			int byteshift = crtc->fb->bits_per_pixel >> 4;
-			int tile_addr = (((y >> 3) * crtc->fb->width + x) >> (8 - byteshift)) << 11;
+			int tile_addr = (((y >> 3) * pitch_pixels +  x) >> (8 - byteshift)) << 11;
 			base += tile_addr + ((x << byteshift) % 256) + ((y % 8) << 8);
 			crtc_offset_cntl |= (y % 16);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index bac0d06c52ac..d5b1fd562d88 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -44,6 +44,9 @@ struct radeon_object {
 	uint64_t			gpu_addr;
 	void				*kptr;
 	bool				is_iomem;
+	uint32_t			tiling_flags;
+	uint32_t			pitch;
+	int				surface_reg;
 };
 
 int radeon_ttm_init(struct radeon_device *rdev);
@@ -70,6 +73,7 @@ static void radeon_ttm_object_object_destroy(struct ttm_buffer_object *tobj)
 
 	robj = container_of(tobj, struct radeon_object, tobj);
 	list_del_init(&robj->list);
+	radeon_object_clear_surface_reg(robj);
 	kfree(robj);
 }
 
@@ -141,6 +145,7 @@ int radeon_object_create(struct radeon_device *rdev,
 	}
 	robj->rdev = rdev;
 	robj->gobj = gobj;
+	robj->surface_reg = -1;
 	INIT_LIST_HEAD(&robj->list);
 
 	flags = radeon_object_flags_from_domain(domain);
@@ -435,6 +440,7 @@ int radeon_object_list_validate(struct list_head *head, void *fence)
 			radeon_object_gpu_addr(robj);
 		}
 		lobj->gpu_offset = robj->gpu_addr;
+		lobj->tiling_flags = robj->tiling_flags;
 		if (fence) {
 			old_fence = (struct radeon_fence *)robj->tobj.sync_obj;
 			robj->tobj.sync_obj = radeon_fence_ref(fence);
@@ -479,3 +485,127 @@ unsigned long radeon_object_size(struct radeon_object *robj)
 {
 	return robj->tobj.num_pages << PAGE_SHIFT;
 }
+
+int radeon_object_get_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+	struct radeon_object *old_object;
+	int steal;
+	int i;
+
+	if (!robj->tiling_flags)
+		return 0;
+
+	if (robj->surface_reg >= 0) {
+		reg = &rdev->surface_regs[robj->surface_reg];
+		i = robj->surface_reg;
+		goto out;
+	}
+
+	steal = -1;
+	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
+
+		reg = &rdev->surface_regs[i];
+		if (!reg->robj)
+			break;
+
+		old_object = reg->robj;
+		if (old_object->pin_count == 0)
+			steal = i;
+	}
+
+	/* if we are all out */
+	if (i == RADEON_GEM_MAX_SURFACES) {
+		if (steal == -1)
+			return -ENOMEM;
+		/* find someone with a surface reg and nuke their BO */
+		reg = &rdev->surface_regs[steal];
+		old_object = reg->robj;
+		/* blow away the mapping */
+		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
+		ttm_bo_unmap_virtual(&old_object->tobj);
+		old_object->surface_reg = -1;
+		i = steal;
+	}
+
+	robj->surface_reg = i;
+	reg->robj = robj;
+
+out:
+	radeon_set_surface_reg(rdev, i, robj->tiling_flags, robj->pitch,
+			       robj->tobj.mem.mm_node->start << PAGE_SHIFT,
+			       robj->tobj.num_pages << PAGE_SHIFT);
+	return 0;
+}
+
+void radeon_object_clear_surface_reg(struct radeon_object *robj)
+{
+	struct radeon_device *rdev = robj->rdev;
+	struct radeon_surface_reg *reg;
+
+	if (robj->surface_reg == -1)
+		return;
+
+	reg = &rdev->surface_regs[robj->surface_reg];
+	radeon_clear_surface_reg(rdev, robj->surface_reg);
+
+	reg->robj = NULL;
+	robj->surface_reg = -1;
+}
+
+void radeon_object_set_tiling_flags(struct radeon_object *robj,
+				    uint32_t tiling_flags, uint32_t pitch)
+{
+	robj->tiling_flags = tiling_flags;
+	robj->pitch = pitch;
+}
+
+void radeon_object_get_tiling_flags(struct radeon_object *robj,
+				    uint32_t *tiling_flags,
+				    uint32_t *pitch)
+{
+	if (tiling_flags)
+		*tiling_flags = robj->tiling_flags;
+	if (pitch)
+		*pitch = robj->pitch;
+}
+
+int radeon_object_check_tiling(struct radeon_object *robj, bool has_moved,
+			       bool force_drop)
+{
+	if (!(robj->tiling_flags & RADEON_TILING_SURFACE))
+		return 0;
+
+	if (force_drop) {
+		radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if (robj->tobj.mem.mem_type != TTM_PL_VRAM) {
+		if (!has_moved)
+			return 0;
+
+		if (robj->surface_reg >= 0)
+			radeon_object_clear_surface_reg(robj);
+		return 0;
+	}
+
+	if ((robj->surface_reg >= 0) && !has_moved)
+		return 0;
+
+	return radeon_object_get_surface_reg(robj);
+}
+
+void radeon_bo_move_notify(struct ttm_buffer_object *bo,
+			  struct ttm_mem_reg *mem)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 1);
+}
+
+void radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
+{
+	struct radeon_object *robj = container_of(bo, struct radeon_object, tobj);
+	radeon_object_check_tiling(robj, 0, 0);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 4ca9aa9203d0..37e1cbcce3a9 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -429,6 +429,8 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.sync_obj_flush = &radeon_sync_obj_flush,
 	.sync_obj_unref = &radeon_sync_obj_unref,
 	.sync_obj_ref = &radeon_sync_obj_ref,
+	.move_notify = &radeon_bo_move_notify,
+	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index e55e7972c897..6538d4236989 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -43,7 +43,6 @@
 #define TTM_BO_HASH_ORDER 13
 
 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
-static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
 
 static inline uint32_t ttm_bo_type_flags(unsigned type)
@@ -307,6 +306,9 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 
 	}
 
+	if (bdev->driver->move_notify)
+		bdev->driver->move_notify(bo, mem);
+
 	if (!(old_man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
 	    !(new_man->flags & TTM_MEMTYPE_FLAG_FIXED))
 		ret = ttm_bo_move_ttm(bo, evict, no_wait, mem);
@@ -1451,6 +1453,7 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
 	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
 }
+EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
 static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 40b75032ea47..41c907f6c560 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -101,6 +101,9 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 
+	if (bdev->driver->fault_reserve_notify)
+		bdev->driver->fault_reserve_notify(bo);
+
 	/*
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 41862e9a4c20..af4b4826997e 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -506,6 +506,8 @@ typedef struct {
 #define DRM_RADEON_GEM_WAIT_IDLE	0x24
 #define DRM_RADEON_CS			0x26
 #define DRM_RADEON_INFO			0x27
+#define DRM_RADEON_GEM_SET_TILING	0x28
+#define DRM_RADEON_GEM_GET_TILING	0x29
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -544,7 +546,8 @@ typedef struct {
 #define DRM_IOCTL_RADEON_GEM_WAIT_IDLE	DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_GEM_WAIT_IDLE, struct drm_radeon_gem_wait_idle)
 #define DRM_IOCTL_RADEON_CS		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_CS, struct drm_radeon_cs)
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
-
+#define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
+#define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
 
 typedef struct drm_radeon_init {
 	enum {
@@ -796,6 +799,24 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
+#define RADEON_TILING_MACRO 0x1
+#define RADEON_TILING_MICRO 0x2
+#define RADEON_TILING_SWAP  0x4
+#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
+				    * when mapped - i.e. front buffer */
+
+struct drm_radeon_gem_set_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
+struct drm_radeon_gem_get_tiling {
+	uint32_t	handle;
+	uint32_t	tiling_flags;
+	uint32_t	pitch;
+};
+
 struct drm_radeon_gem_mmap {
 	uint32_t	handle;
 	uint32_t	pad;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ea83dd23a4d7..a68829db381a 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -354,6 +354,14 @@ struct ttm_bo_driver {
 	int (*sync_obj_flush) (void *sync_obj, void *sync_arg);
 	void (*sync_obj_unref) (void **sync_obj);
 	void *(*sync_obj_ref) (void *sync_obj);
+
+	/* hook to notify driver about a driver move so it
+	 * can do tiling things */
+	void (*move_notify)(struct ttm_buffer_object *bo,
+			    struct ttm_mem_reg *new_mem);
+	/* notify the driver we are taking a fault on this BO
+	 * and have reserved it */
+	void (*fault_reserve_notify)(struct ttm_buffer_object *bo);
 };
 
 #define TTM_NUM_MEM_TYPES 8
@@ -653,6 +661,13 @@ extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
 			      struct ttm_bo_driver *driver,
 			      uint64_t file_page_offset, bool need_dma32);
 
+/**
+ * ttm_bo_unmap_virtual
+ *
+ * @bo: tear down the virtual mappings for this BO
+ */
+extern void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
+
 /**
  * ttm_bo_reserve:
  *
-- 
cgit v1.2.3


From dddac6a7b445de95515f64fdf82fe5dc36c02f26 Mon Sep 17 00:00:00 2001
From: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Date: Wed, 29 Jul 2009 21:07:55 +0200
Subject: PM / Hibernate: Replace bdget call with simple atomic_inc of i_count

Create bdgrab().  This function copies an existing reference to a
block_device.  It is safe to call from any context.

Hibernation code wishes to copy a reference to the active swap device.
Right now it calls bdget() under a spinlock, but this is wrong because
bdget() can sleep.  It doesn't need a full bdget() because we already
hold a reference to active swap devices (and the spinlock protects
against swapoff).

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=13827

Signed-off-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/block_dev.c     | 10 ++++++++++
 include/linux/fs.h |  1 +
 mm/swapfile.c      |  4 ++--
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3a6d4fb2a329..94dfda24c06e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -564,6 +564,16 @@ struct block_device *bdget(dev_t dev)
 
 EXPORT_SYMBOL(bdget);
 
+/**
+ * bdgrab -- Grab a reference to an already referenced block device
+ * @bdev:	Block device to grab a reference to.
+ */
+struct block_device *bdgrab(struct block_device *bdev)
+{
+	atomic_inc(&bdev->bd_inode->i_count);
+	return bdev;
+}
+
 long nr_blockdev_pages(void)
 {
 	struct block_device *bdev;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0872372184fe..a36ffa5a77a4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1946,6 +1946,7 @@ extern void putname(const char *name);
 extern int register_blkdev(unsigned int, const char *);
 extern void unregister_blkdev(unsigned int, const char *);
 extern struct block_device *bdget(dev_t);
+extern struct block_device *bdgrab(struct block_device *bdev);
 extern void bd_set_size(struct block_device *, loff_t size);
 extern void bd_forget(struct inode *inode);
 extern void bdput(struct block_device *);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index d1ade1a48ee7..8ffdc0d23c53 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -753,7 +753,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 
 		if (!bdev) {
 			if (bdev_p)
-				*bdev_p = bdget(sis->bdev->bd_dev);
+				*bdev_p = bdgrab(sis->bdev);
 
 			spin_unlock(&swap_lock);
 			return i;
@@ -765,7 +765,7 @@ int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
 					struct swap_extent, list);
 			if (se->start_block == offset) {
 				if (bdev_p)
-					*bdev_p = bdget(sis->bdev->bd_dev);
+					*bdev_p = bdgrab(sis->bdev);
 
 				spin_unlock(&swap_lock);
 				bdput(bdev);
-- 
cgit v1.2.3


From e043e42bdb66885b3ac10d27a01ccb9972e2b0a3 Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Wed, 29 Jul 2009 12:15:56 -0700
Subject: pty: avoid forcing 'low_latency' tty flag

We really don't want to mark the pty as a low-latency device, because as
Alan points out, the ->write method can be called from an IRQ (ppp?),
and that means we can't use ->low_latency=1 as we take mutexes in the
low_latency case.

So rather than using low_latency to force the written data to be pushed
to the ldisc handling at 'write()' time, just make the reader side (or
the poll function) do the flush when it checks whether there is data to
be had.

This also fixes the problem with lost data in an emacs compile buffer
(bugzilla 13815), and we can thus revert the low_latency pty hack
(commit 3a54297478e6578f96fd54bf4daa1751130aca86: "pty: quickfix for the
pty ENXIO timing problems").

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Tested-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[ Modified to do the tty_flush_to_ldisc() inside input_available_p() so
  that it triggers for both read and poll()  - Linus]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/n_tty.c      |  1 +
 drivers/char/pty.c        |  2 --
 drivers/char/tty_buffer.c | 13 +++++++++++++
 include/linux/tty.h       |  1 +
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c
index ff47907ff1bf..973be2f44195 100644
--- a/drivers/char/n_tty.c
+++ b/drivers/char/n_tty.c
@@ -1583,6 +1583,7 @@ static int n_tty_open(struct tty_struct *tty)
 
 static inline int input_available_p(struct tty_struct *tty, int amt)
 {
+	tty_flush_to_ldisc(tty);
 	if (tty->icanon) {
 		if (tty->canon_data)
 			return 1;
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 3850a68f265a..6e6942c45f5b 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -52,7 +52,6 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 		return;
 	tty->link->packet = 0;
 	set_bit(TTY_OTHER_CLOSED, &tty->link->flags);
-	tty_flip_buffer_push(tty->link);
 	wake_up_interruptible(&tty->link->read_wait);
 	wake_up_interruptible(&tty->link->write_wait);
 	if (tty->driver->subtype == PTY_TYPE_MASTER) {
@@ -208,7 +207,6 @@ static int pty_open(struct tty_struct *tty, struct file *filp)
 	clear_bit(TTY_OTHER_CLOSED, &tty->link->flags);
 	set_bit(TTY_THROTTLED, &tty->flags);
 	retval = 0;
-	tty->low_latency = 1;
 out:
 	return retval;
 }
diff --git a/drivers/char/tty_buffer.c b/drivers/char/tty_buffer.c
index 810ee25d66a4..3108991c5c8b 100644
--- a/drivers/char/tty_buffer.c
+++ b/drivers/char/tty_buffer.c
@@ -461,6 +461,19 @@ static void flush_to_ldisc(struct work_struct *work)
 	tty_ldisc_deref(disc);
 }
 
+/**
+ *	tty_flush_to_ldisc
+ *	@tty: tty to push
+ *
+ *	Push the terminal flip buffers to the line discipline.
+ *
+ *	Must not be called from IRQ context.
+ */
+void tty_flush_to_ldisc(struct tty_struct *tty)
+{
+	flush_to_ldisc(&tty->buf.work.work);
+}
+
 /**
  *	tty_flip_buffer_push	-	terminal
  *	@tty: tty to push
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1488d8c81aac..e8c6c9136c97 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -394,6 +394,7 @@ extern void __do_SAK(struct tty_struct *tty);
 extern void disassociate_ctty(int priv);
 extern void no_tty(void);
 extern void tty_flip_buffer_push(struct tty_struct *tty);
+extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_struct *tty);
 extern void tty_buffer_flush(struct tty_struct *tty);
 extern void tty_buffer_init(struct tty_struct *tty);
-- 
cgit v1.2.3


From 0e82ffe3b90bcad72cfe80e4379946b8fb0691ca Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 27 Jul 2009 12:01:50 +0200
Subject: cfg80211: combine iwfreq implementations

Until now we implemented iwfreq for managed mode, we
needed to keep the implementations separate, but now
that we have all versions implemented we can combine
them and export just one handler.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/wext.c | 34 +--------------
 include/net/cfg80211.h                   | 20 +++------
 net/mac80211/wext.c                      | 73 +-------------------------------
 net/wireless/core.h                      |  3 ++
 net/wireless/ibss.c                      |  5 +--
 net/wireless/nl80211.c                   |  2 +
 net/wireless/wext-compat.c               | 54 ++++++++++++++++++++++-
 net/wireless/wext-compat.h               | 21 +++++++++
 net/wireless/wext-sme.c                  |  5 +--
 9 files changed, 91 insertions(+), 126 deletions(-)
 create mode 100644 net/wireless/wext-compat.h

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/wext.c b/drivers/net/wireless/iwmc3200wifi/wext.c
index c3c90d5963bf..8058e9991c30 100644
--- a/drivers/net/wireless/iwmc3200wifi/wext.c
+++ b/drivers/net/wireless/iwmc3200wifi/wext.c
@@ -27,36 +27,6 @@
 #include "iwm.h"
 #include "commands.h"
 
-static int iwm_wext_siwfreq(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct iw_freq *freq, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int iwm_wext_giwfreq(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct iw_freq *freq, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
-	case UMAC_MODE_BSS:
-		return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int iwm_wext_siwap(struct net_device *dev, struct iw_request_info *info,
 			  struct sockaddr *ap_addr, char *extra)
 {
@@ -125,8 +95,8 @@ static const iw_handler iwm_handlers[] =
 	(iw_handler) cfg80211_wext_giwname,		/* SIOCGIWNAME */
 	(iw_handler) NULL,				/* SIOCSIWNWID */
 	(iw_handler) NULL,				/* SIOCGIWNWID */
-	(iw_handler) iwm_wext_siwfreq,			/* SIOCSIWFREQ */
-	(iw_handler) iwm_wext_giwfreq,			/* SIOCGIWFREQ */
+	(iw_handler) cfg80211_wext_siwfreq,		/* SIOCSIWFREQ */
+	(iw_handler) cfg80211_wext_giwfreq,		/* SIOCGIWFREQ */
 	(iw_handler) cfg80211_wext_siwmode,		/* SIOCSIWMODE */
 	(iw_handler) cfg80211_wext_giwmode,		/* SIOCGIWMODE */
 	(iw_handler) NULL,				/* SIOCSIWSENS */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0d278777e39c..5d249c4bf225 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1595,12 +1595,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 int cfg80211_wext_giwrange(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
-int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
-			       struct iw_request_info *info,
-			       struct iw_freq *freq, char *extra);
-int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
-			       struct iw_request_info *info,
-			       struct iw_freq *freq, char *extra);
 int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 				struct iw_request_info *info,
 				struct iw_point *data, char *ssid);
@@ -1614,12 +1608,6 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 			     struct iw_request_info *info,
 			     struct sockaddr *ap_addr, char *extra);
 
-int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
-			      struct iw_request_info *info,
-			      struct iw_freq *freq, char *extra);
-int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
-			      struct iw_request_info *info,
-			      struct iw_freq *freq, char *extra);
 int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 			       struct iw_request_info *info,
 			       struct iw_point *data, char *ssid);
@@ -1642,8 +1630,12 @@ int cfg80211_wext_giwauth(struct net_device *dev,
 			  struct iw_request_info *info,
 			  struct iw_param *data, char *extra);
 
-struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
-					     struct iw_freq *freq);
+int cfg80211_wext_siwfreq(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_freq *freq, char *extra);
+int cfg80211_wext_giwfreq(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_freq *freq, char *extra);
 
 int cfg80211_wext_siwrate(struct net_device *dev,
 			  struct iw_request_info *info,
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 5acb8140ee58..7cd9aa79ef52 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -27,75 +27,6 @@
 #include "aes_ccm.h"
 
 
-static int ieee80211_ioctl_siwfreq(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_freq *freq, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-	struct ieee80211_local *local = sdata->local;
-	struct ieee80211_channel *chan;
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra);
-	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_siwfreq(dev, info, freq, extra);
-
-	/* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
-	if (freq->e == 0) {
-		if (freq->m < 0)
-			return -EINVAL;
-		else
-			chan = ieee80211_get_channel(local->hw.wiphy,
-				ieee80211_channel_to_frequency(freq->m));
-	} else {
-		int i, div = 1000000;
-		for (i = 0; i < freq->e; i++)
-			div /= 10;
-		if (div <= 0)
-			return -EINVAL;
-		chan = ieee80211_get_channel(local->hw.wiphy, freq->m / div);
-	}
-
-	if (!chan)
-		return -EINVAL;
-
-	if (chan->flags & IEEE80211_CHAN_DISABLED)
-		return -EINVAL;
-
-	/*
-	 * no change except maybe auto -> fixed, ignore the HT
-	 * setting so you can fix a channel you're on already
-	 */
-	if (local->oper_channel == chan)
-		return 0;
-
-	local->oper_channel = chan;
-	local->oper_channel_type = NL80211_CHAN_NO_HT;
-	ieee80211_hw_config(local, 0);
-
-	return 0;
-}
-
-
-static int ieee80211_ioctl_giwfreq(struct net_device *dev,
-				   struct iw_request_info *info,
-				   struct iw_freq *freq, char *extra)
-{
-	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
-	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
-
-	freq->m = local->oper_channel->center_freq;
-	freq->e = 6;
-
-	return 0;
-}
-
-
 static int ieee80211_ioctl_siwessid(struct net_device *dev,
 				    struct iw_request_info *info,
 				    struct iw_point *data, char *ssid)
@@ -173,8 +104,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) cfg80211_wext_giwname,		/* SIOCGIWNAME */
 	(iw_handler) NULL,				/* SIOCSIWNWID */
 	(iw_handler) NULL,				/* SIOCGIWNWID */
-	(iw_handler) ieee80211_ioctl_siwfreq,		/* SIOCSIWFREQ */
-	(iw_handler) ieee80211_ioctl_giwfreq,		/* SIOCGIWFREQ */
+	(iw_handler) cfg80211_wext_siwfreq,		/* SIOCSIWFREQ */
+	(iw_handler) cfg80211_wext_giwfreq,		/* SIOCGIWFREQ */
 	(iw_handler) cfg80211_wext_siwmode,		/* SIOCSIWMODE */
 	(iw_handler) cfg80211_wext_giwmode,		/* SIOCGIWMODE */
 	(iw_handler) NULL,				/* SIOCSIWSENS */
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4276b70cd975..6d903c1d721d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -66,6 +66,9 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
+	/* current channel */
+	struct ieee80211_channel *channel;
+
 #ifdef CONFIG_CFG80211_DEBUGFS
 	/* Debugfs entries */
 	struct wiphy_debugfsdentries {
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 8b65e212ae49..de9ac49cd907 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -7,6 +7,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 #include <net/cfg80211.h>
+#include "wext-compat.h"
 #include "nl80211.h"
 
 
@@ -312,8 +313,6 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwfreq);
 
 int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 			       struct iw_request_info *info,
@@ -342,8 +341,6 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 	/* no channel if not joining */
 	return -EINVAL;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwfreq);
 
 int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 				struct iw_request_info *info,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c951eb2b07d5..0cd548267d4a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -757,6 +757,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 						channel_type);
 		if (result)
 			goto bad_res;
+
+		rdev->channel = chan;
 	}
 
 	changed = 0;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index c7351a98e660..fc2e7768967d 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -14,6 +14,7 @@
 #include <linux/etherdevice.h>
 #include <net/iw_handler.h>
 #include <net/cfg80211.h>
+#include "wext-compat.h"
 #include "core.h"
 
 int cfg80211_wext_giwname(struct net_device *dev,
@@ -300,7 +301,6 @@ struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 		return ERR_PTR(-EINVAL);
 	return chan;
 }
-EXPORT_SYMBOL_GPL(cfg80211_wext_freq);
 
 int cfg80211_wext_siwrts(struct net_device *dev,
 			 struct iw_request_info *info,
@@ -759,6 +759,58 @@ int cfg80211_wext_giwencode(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwencode);
 
+int cfg80211_wext_siwfreq(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct ieee80211_channel *chan;
+	int err;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_siwfreq(dev, info, freq, extra);
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_siwfreq(dev, info, freq, extra);
+	default:
+		chan = cfg80211_wext_freq(wdev->wiphy, freq);
+		if (!chan)
+			return -EINVAL;
+		if (IS_ERR(chan))
+			return PTR_ERR(chan);
+		err = rdev->ops->set_channel(wdev->wiphy, chan,
+					     NL80211_CHAN_NO_HT);
+		if (err)
+			return err;
+		rdev->channel = chan;
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq);
+
+int cfg80211_wext_giwfreq(struct net_device *dev,
+			  struct iw_request_info *info,
+			  struct iw_freq *freq, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra);
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
+	default:
+		if (!rdev->channel)
+			return -EINVAL;
+		freq->m = rdev->channel->center_freq;
+		freq->e = 6;
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwfreq);
+
 int cfg80211_wext_siwtxpower(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *data, char *extra)
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
new file mode 100644
index 000000000000..23a6b5a83f2d
--- /dev/null
+++ b/net/wireless/wext-compat.h
@@ -0,0 +1,21 @@
+#ifndef __WEXT_COMPAT
+#define __WEXT_COMPAT
+
+int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra);
+int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_freq *freq, char *extra);
+
+int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra);
+int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
+			      struct iw_request_info *info,
+			      struct iw_freq *freq, char *extra);
+
+struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
+					     struct iw_freq *freq);
+
+#endif /* __WEXT_COMPAT */
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 4c689fd865b0..509279a1cfb2 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -8,6 +8,7 @@
 #include <linux/etherdevice.h>
 #include <linux/if_arp.h>
 #include <net/cfg80211.h>
+#include "wext-compat.h"
 #include "nl80211.h"
 
 int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
@@ -108,8 +109,6 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 	cfg80211_unlock_rdev(rdev);
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwfreq);
 
 int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
 			      struct iw_request_info *info,
@@ -138,8 +137,6 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
 	/* no channel if not joining */
 	return -EINVAL;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwfreq);
 
 int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 			       struct iw_request_info *info,
-- 
cgit v1.2.3


From 562e482265ac4d660d9f0114419591d62f44361d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 27 Jul 2009 12:01:51 +0200
Subject: cfg80211: combine IWAP handlers

Since we now have IWAP handlers for all modes, we can
combine them into one.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/wext.c | 34 ++-------------------
 include/net/cfg80211.h                   | 24 ++++-----------
 net/mac80211/wext.c                      | 41 ++-----------------------
 net/wireless/ibss.c                      |  4 ---
 net/wireless/wext-compat.c               | 52 +++++++++++++++++++++++++++-----
 net/wireless/wext-compat.h               | 12 ++++++++
 net/wireless/wext-sme.c                  |  4 ---
 7 files changed, 66 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/wext.c b/drivers/net/wireless/iwmc3200wifi/wext.c
index 8058e9991c30..5319b16474e3 100644
--- a/drivers/net/wireless/iwmc3200wifi/wext.c
+++ b/drivers/net/wireless/iwmc3200wifi/wext.c
@@ -27,36 +27,6 @@
 #include "iwm.h"
 #include "commands.h"
 
-static int iwm_wext_siwap(struct net_device *dev, struct iw_request_info *info,
-			  struct sockaddr *ap_addr, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
-	case UMAC_MODE_BSS:
-		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int iwm_wext_giwap(struct net_device *dev, struct iw_request_info *info,
-			  struct sockaddr *ap_addr, char *extra)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
-	case UMAC_MODE_BSS:
-		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int iwm_wext_siwessid(struct net_device *dev,
 			     struct iw_request_info *info,
 			     struct iw_point *data, char *ssid)
@@ -111,8 +81,8 @@ static const iw_handler iwm_handlers[] =
 	(iw_handler) NULL,				/* SIOCGIWSPY */
 	(iw_handler) NULL,				/* SIOCSIWTHRSPY */
 	(iw_handler) NULL,				/* SIOCGIWTHRSPY */
-	(iw_handler) iwm_wext_siwap,	                /* SIOCSIWAP */
-	(iw_handler) iwm_wext_giwap,			/* SIOCGIWAP */
+	(iw_handler) cfg80211_wext_siwap,		/* SIOCSIWAP */
+	(iw_handler) cfg80211_wext_giwap,		/* SIOCGIWAP */
 	(iw_handler) NULL,			        /* SIOCSIWMLME */
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
 	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5d249c4bf225..3348c16e1f35 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1601,12 +1601,6 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 				struct iw_request_info *info,
 				struct iw_point *data, char *ssid);
-int cfg80211_ibss_wext_siwap(struct net_device *dev,
-			     struct iw_request_info *info,
-			     struct sockaddr *ap_addr, char *extra);
-int cfg80211_ibss_wext_giwap(struct net_device *dev,
-			     struct iw_request_info *info,
-			     struct sockaddr *ap_addr, char *extra);
 
 int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 			       struct iw_request_info *info,
@@ -1614,12 +1608,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 int cfg80211_mgd_wext_giwessid(struct net_device *dev,
 			       struct iw_request_info *info,
 			       struct iw_point *data, char *ssid);
-int cfg80211_mgd_wext_siwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *ap_addr, char *extra);
-int cfg80211_mgd_wext_giwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *ap_addr, char *extra);
 int cfg80211_wext_siwgenie(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
@@ -1686,12 +1674,12 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_param *wrq, char *extra);
 
-int cfg80211_wds_wext_siwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *addr, char *extra);
-int cfg80211_wds_wext_giwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *addr, char *extra);
+int cfg80211_wext_siwap(struct net_device *dev,
+			struct iw_request_info *info,
+			struct sockaddr *ap_addr, char *extra);
+int cfg80211_wext_giwap(struct net_device *dev,
+			struct iw_request_info *info,
+			struct sockaddr *ap_addr, char *extra);
 
 /*
  * callbacks for asynchronous cfg80211 methods, notification
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7cd9aa79ef52..72866c8b8c3d 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -59,43 +59,6 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
 }
 
 
-static int ieee80211_ioctl_siwap(struct net_device *dev,
-				 struct iw_request_info *info,
-				 struct sockaddr *ap_addr, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
-
-	if (sdata->vif.type == NL80211_IFTYPE_WDS)
-		return cfg80211_wds_wext_siwap(dev, info, ap_addr, extra);
-	return -EOPNOTSUPP;
-}
-
-
-static int ieee80211_ioctl_giwap(struct net_device *dev,
-				 struct iw_request_info *info,
-				 struct sockaddr *ap_addr, char *extra)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
-
-	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
-
-	if (sdata->vif.type == NL80211_IFTYPE_WDS)
-		return cfg80211_wds_wext_giwap(dev, info, ap_addr, extra);
-
-	return -EOPNOTSUPP;
-}
-
-
 /* Structures to export the Wireless Handlers */
 
 static const iw_handler ieee80211_handler[] =
@@ -120,8 +83,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWSPY */
 	(iw_handler) NULL,				/* SIOCSIWTHRSPY */
 	(iw_handler) NULL,				/* SIOCGIWTHRSPY */
-	(iw_handler) ieee80211_ioctl_siwap,		/* SIOCSIWAP */
-	(iw_handler) ieee80211_ioctl_giwap,		/* SIOCGIWAP */
+	(iw_handler) cfg80211_wext_siwap,		/* SIOCSIWAP */
+	(iw_handler) cfg80211_wext_giwap,		/* SIOCGIWAP */
 	(iw_handler) cfg80211_wext_siwmlme,		/* SIOCSIWMLME */
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
 	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index de9ac49cd907..f955225ed911 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -466,8 +466,6 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwap);
 
 int cfg80211_ibss_wext_giwap(struct net_device *dev,
 			     struct iw_request_info *info,
@@ -493,6 +491,4 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev,
 
 	return 0;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwap);
 #endif
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index fc2e7768967d..c27774bd0107 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1149,9 +1149,9 @@ int cfg80211_wext_giwpower(struct net_device *dev,
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwpower);
 
-int cfg80211_wds_wext_siwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *addr, char *extra)
+static int cfg80211_wds_wext_siwap(struct net_device *dev,
+				   struct iw_request_info *info,
+				   struct sockaddr *addr, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -1177,11 +1177,10 @@ int cfg80211_wds_wext_siwap(struct net_device *dev,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cfg80211_wds_wext_siwap);
 
-int cfg80211_wds_wext_giwap(struct net_device *dev,
-			    struct iw_request_info *info,
-			    struct sockaddr *addr, char *extra)
+static int cfg80211_wds_wext_giwap(struct net_device *dev,
+				   struct iw_request_info *info,
+				   struct sockaddr *addr, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 
@@ -1193,7 +1192,6 @@ int cfg80211_wds_wext_giwap(struct net_device *dev,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(cfg80211_wds_wext_giwap);
 
 int cfg80211_wext_siwrate(struct net_device *dev,
 			  struct iw_request_info *info,
@@ -1327,3 +1325,41 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	return &wstats;
 }
 EXPORT_SYMBOL_GPL(cfg80211_wireless_stats);
+
+int cfg80211_wext_siwap(struct net_device *dev,
+			struct iw_request_info *info,
+			struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra);
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra);
+	case NL80211_IFTYPE_WDS:
+		return cfg80211_wds_wext_siwap(dev, info, ap_addr, extra);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwap);
+
+int cfg80211_wext_giwap(struct net_device *dev,
+			struct iw_request_info *info,
+			struct sockaddr *ap_addr, char *extra)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra);
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra);
+	case NL80211_IFTYPE_WDS:
+		return cfg80211_wds_wext_giwap(dev, info, ap_addr, extra);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwap);
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index 23a6b5a83f2d..51028ebf19ae 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h
@@ -7,6 +7,12 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
 int cfg80211_ibss_wext_giwfreq(struct net_device *dev,
 			       struct iw_request_info *info,
 			       struct iw_freq *freq, char *extra);
+int cfg80211_ibss_wext_siwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra);
+int cfg80211_ibss_wext_giwap(struct net_device *dev,
+			     struct iw_request_info *info,
+			     struct sockaddr *ap_addr, char *extra);
 
 int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 			      struct iw_request_info *info,
@@ -14,6 +20,12 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 int cfg80211_mgd_wext_giwfreq(struct net_device *dev,
 			      struct iw_request_info *info,
 			      struct iw_freq *freq, char *extra);
+int cfg80211_mgd_wext_siwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra);
+int cfg80211_mgd_wext_giwap(struct net_device *dev,
+			    struct iw_request_info *info,
+			    struct sockaddr *ap_addr, char *extra);
 
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 509279a1cfb2..1aa31cc55113 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -273,8 +273,6 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 	cfg80211_unlock_rdev(wiphy_to_dev(wdev->wiphy));
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwap);
 
 int cfg80211_mgd_wext_giwap(struct net_device *dev,
 			    struct iw_request_info *info,
@@ -299,8 +297,6 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev,
 
 	return 0;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwap);
 
 int cfg80211_wext_siwgenie(struct net_device *dev,
 			   struct iw_request_info *info,
-- 
cgit v1.2.3


From 1f9298f96082692bdfe73af6fc2167f627f21647 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 27 Jul 2009 12:01:52 +0200
Subject: cfg80211: combine IWESSID handlers

Since we now have handlers IWESSID for all modes, we can
combine them into one.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwmc3200wifi/wext.c | 36 ++------------------------------
 include/net/cfg80211.h                   | 20 ++++++------------
 net/mac80211/wext.c                      | 35 ++-----------------------------
 net/wireless/ibss.c                      |  4 ----
 net/wireless/wext-compat.c               | 34 ++++++++++++++++++++++++++++++
 net/wireless/wext-compat.h               | 12 +++++++++++
 net/wireless/wext-sme.c                  |  4 ----
 7 files changed, 56 insertions(+), 89 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwmc3200wifi/wext.c b/drivers/net/wireless/iwmc3200wifi/wext.c
index 5319b16474e3..9196024a2890 100644
--- a/drivers/net/wireless/iwmc3200wifi/wext.c
+++ b/drivers/net/wireless/iwmc3200wifi/wext.c
@@ -27,38 +27,6 @@
 #include "iwm.h"
 #include "commands.h"
 
-static int iwm_wext_siwessid(struct net_device *dev,
-			     struct iw_request_info *info,
-			     struct iw_point *data, char *ssid)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
-	case UMAC_MODE_BSS:
-		return cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int iwm_wext_giwessid(struct net_device *dev,
-			     struct iw_request_info *info,
-			     struct iw_point *data, char *ssid)
-{
-	struct iwm_priv *iwm = ndev_to_iwm(dev);
-
-	switch (iwm->conf.mode) {
-	case UMAC_MODE_IBSS:
-		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
-	case UMAC_MODE_BSS:
-		return cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static const iw_handler iwm_handlers[] =
 {
 	(iw_handler) NULL,				/* SIOCSIWCOMMIT */
@@ -87,8 +55,8 @@ static const iw_handler iwm_handlers[] =
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
 	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
 	(iw_handler) cfg80211_wext_giwscan,		/* SIOCGIWSCAN */
-	(iw_handler) iwm_wext_siwessid,			/* SIOCSIWESSID */
-	(iw_handler) iwm_wext_giwessid,			/* SIOCGIWESSID */
+	(iw_handler) cfg80211_wext_siwessid,		/* SIOCSIWESSID */
+	(iw_handler) cfg80211_wext_giwessid,		/* SIOCGIWESSID */
 	(iw_handler) NULL,				/* SIOCSIWNICKN */
 	(iw_handler) NULL,				/* SIOCGIWNICKN */
 	(iw_handler) NULL,				/* -- hole -- */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3348c16e1f35..e1b92358242b 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1595,19 +1595,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev,
 int cfg80211_wext_giwrange(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
-int cfg80211_ibss_wext_siwessid(struct net_device *dev,
-				struct iw_request_info *info,
-				struct iw_point *data, char *ssid);
-int cfg80211_ibss_wext_giwessid(struct net_device *dev,
-				struct iw_request_info *info,
-				struct iw_point *data, char *ssid);
-
-int cfg80211_mgd_wext_siwessid(struct net_device *dev,
-			       struct iw_request_info *info,
-			       struct iw_point *data, char *ssid);
-int cfg80211_mgd_wext_giwessid(struct net_device *dev,
-			       struct iw_request_info *info,
-			       struct iw_point *data, char *ssid);
 int cfg80211_wext_siwgenie(struct net_device *dev,
 			   struct iw_request_info *info,
 			   struct iw_point *data, char *extra);
@@ -1624,7 +1611,12 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 int cfg80211_wext_giwfreq(struct net_device *dev,
 			  struct iw_request_info *info,
 			  struct iw_freq *freq, char *extra);
-
+int cfg80211_wext_siwessid(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *ssid);
+int cfg80211_wext_giwessid(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *ssid);
 int cfg80211_wext_siwrate(struct net_device *dev,
 			  struct iw_request_info *info,
 			  struct iw_param *rate, char *extra);
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 72866c8b8c3d..aa250c3e8fda 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -27,37 +27,6 @@
 #include "aes_ccm.h"
 
 
-static int ieee80211_ioctl_siwessid(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_point *data, char *ssid)
-{
-	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
-	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
-
-	return -EOPNOTSUPP;
-}
-
-
-static int ieee80211_ioctl_giwessid(struct net_device *dev,
-				    struct iw_request_info *info,
-				    struct iw_point *data, char *ssid)
-{
-	struct ieee80211_sub_if_data *sdata;
-
-	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
-
-	if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
-		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
-	else if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		return cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
-
-	return -EOPNOTSUPP;
-}
-
 
 /* Structures to export the Wireless Handlers */
 
@@ -89,8 +58,8 @@ static const iw_handler ieee80211_handler[] =
 	(iw_handler) NULL,				/* SIOCGIWAPLIST */
 	(iw_handler) cfg80211_wext_siwscan,		/* SIOCSIWSCAN */
 	(iw_handler) cfg80211_wext_giwscan,		/* SIOCGIWSCAN */
-	(iw_handler) ieee80211_ioctl_siwessid,		/* SIOCSIWESSID */
-	(iw_handler) ieee80211_ioctl_giwessid,		/* SIOCGIWESSID */
+	(iw_handler) cfg80211_wext_siwessid,		/* SIOCSIWESSID */
+	(iw_handler) cfg80211_wext_giwessid,		/* SIOCGIWESSID */
 	(iw_handler) NULL,				/* SIOCSIWNICKN */
 	(iw_handler) NULL,				/* SIOCGIWNICKN */
 	(iw_handler) NULL,				/* -- hole -- */
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index f955225ed911..4d7a084b35e2 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -381,8 +381,6 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev,
 
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_siwessid);
 
 int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 				struct iw_request_info *info,
@@ -410,8 +408,6 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev,
 
 	return 0;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_ibss_wext_giwessid);
 
 int cfg80211_ibss_wext_siwap(struct net_device *dev,
 			     struct iw_request_info *info,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index c27774bd0107..083e4c33d95d 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1363,3 +1363,37 @@ int cfg80211_wext_giwap(struct net_device *dev,
 	}
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_giwap);
+
+int cfg80211_wext_siwessid(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_siwessid(dev, info, data, ssid);
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_siwessid(dev, info, data, ssid);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_siwessid);
+
+int cfg80211_wext_giwessid(struct net_device *dev,
+			   struct iw_request_info *info,
+			   struct iw_point *data, char *ssid)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_ADHOC:
+		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
+	case NL80211_IFTYPE_STATION:
+		return cfg80211_mgd_wext_giwessid(dev, info, data, ssid);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+EXPORT_SYMBOL_GPL(cfg80211_wext_giwessid);
diff --git a/net/wireless/wext-compat.h b/net/wireless/wext-compat.h
index 51028ebf19ae..c0310d93c2e5 100644
--- a/net/wireless/wext-compat.h
+++ b/net/wireless/wext-compat.h
@@ -13,6 +13,12 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev,
 int cfg80211_ibss_wext_giwap(struct net_device *dev,
 			     struct iw_request_info *info,
 			     struct sockaddr *ap_addr, char *extra);
+int cfg80211_ibss_wext_siwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid);
+int cfg80211_ibss_wext_giwessid(struct net_device *dev,
+				struct iw_request_info *info,
+				struct iw_point *data, char *ssid);
 
 int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 			      struct iw_request_info *info,
@@ -26,6 +32,12 @@ int cfg80211_mgd_wext_siwap(struct net_device *dev,
 int cfg80211_mgd_wext_giwap(struct net_device *dev,
 			    struct iw_request_info *info,
 			    struct sockaddr *ap_addr, char *extra);
+int cfg80211_mgd_wext_siwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid);
+int cfg80211_mgd_wext_giwessid(struct net_device *dev,
+			       struct iw_request_info *info,
+			       struct iw_point *data, char *ssid);
 
 struct ieee80211_channel *cfg80211_wext_freq(struct wiphy *wiphy,
 					     struct iw_freq *freq);
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 1aa31cc55113..7bacbd1c2af6 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -192,8 +192,6 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 	cfg80211_unlock_rdev(wiphy_to_dev(wdev->wiphy));
 	return err;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_siwessid);
 
 int cfg80211_mgd_wext_giwessid(struct net_device *dev,
 			       struct iw_request_info *info,
@@ -218,8 +216,6 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev,
 
 	return 0;
 }
-/* temporary symbol - mark GPL - in the future the handler won't be */
-EXPORT_SYMBOL_GPL(cfg80211_mgd_wext_giwessid);
 
 int cfg80211_mgd_wext_siwap(struct net_device *dev,
 			    struct iw_request_info *info,
-- 
cgit v1.2.3


From 096b7fe012d66ed55e98bc8022405ede0cc80e96 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 29 Jul 2009 15:04:04 -0700
Subject: cgroups: fix pid namespace bug

The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844
("cgroups: convert tasks file to use a seq_file with shared pid array").

We cache a pid array for all threads that are opening the same "tasks"
file, but the pids in the array are always from the namespace of the
last process that opened the file, so all other threads will read pids
from that namespace instead of their own namespaces.

To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.
The list will be of length 1 at most time.

Reported-by: Paul Menage <menage@google.com>
Idea-by: Paul Menage <menage@google.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Serge Hallyn <serue@us.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 11 +++---
 kernel/cgroup.c        | 96 +++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 76 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 665fa70e4094..20411d2876f8 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -179,14 +179,11 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects the fields below */
+	/* pids_mutex protects pids_list and cached pid arrays. */
 	struct rw_semaphore pids_mutex;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the current tasks_pids array */
-	int pids_use_count;
-	/* Length of the current tasks_pids array */
-	int pids_length;
+
+	/* Linked list of struct cgroup_pids */
+	struct list_head pids_list;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3737a682cdf5..250dac05680f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -47,6 +47,7 @@
 #include <linux/hash.h>
 #include <linux/namei.h>
 #include <linux/smp_lock.h>
+#include <linux/pid_namespace.h>
 
 #include <asm/atomic.h>
 
@@ -960,6 +961,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
+	INIT_LIST_HEAD(&cgrp->pids_list);
 	init_rwsem(&cgrp->pids_mutex);
 }
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2201,12 +2203,30 @@ err:
 	return ret;
 }
 
+/*
+ * Cache pids for all threads in the same pid namespace that are
+ * opening the same "tasks" file.
+ */
+struct cgroup_pids {
+	/* The node in cgrp->pids_list */
+	struct list_head list;
+	/* The cgroup those pids belong to */
+	struct cgroup *cgrp;
+	/* The namepsace those pids belong to */
+	struct pid_namespace *ns;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the this tasks_pids array */
+	int use_count;
+	/* Length of the current tasks_pids array */
+	int length;
+};
+
 static int cmppid(const void *a, const void *b)
 {
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
-
 /*
  * seq_file methods for the "tasks" file. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2221,45 +2241,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = cp->cgrp;
 	int index = 0, pid = *pos;
 	int *iter;
 
 	down_read(&cgrp->pids_mutex);
 	if (pid) {
-		int end = cgrp->pids_length;
+		int end = cp->length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cgrp->tasks_pids[mid] == pid) {
+			if (cp->tasks_pids[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cgrp->tasks_pids[mid] <= pid)
+			} else if (cp->tasks_pids[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cgrp->pids_length)
+	if (index >= cp->length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cgrp->tasks_pids + index;
+	iter = cp->tasks_pids + index;
 	*pos = *iter;
 	return iter;
 }
 
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
 {
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = cp->cgrp;
 	up_read(&cgrp->pids_mutex);
 }
 
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pids *cp = s->private;
 	int *p = v;
-	int *end = cgrp->tasks_pids + cgrp->pids_length;
+	int *end = cp->tasks_pids + cp->length;
 
 	/*
 	 * Advance to the next pid in the array. If this goes off the
@@ -2286,26 +2308,33 @@ static struct seq_operations cgroup_tasks_seq_operations = {
 	.show = cgroup_tasks_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup *cgrp)
+static void release_cgroup_pid_array(struct cgroup_pids *cp)
 {
+	struct cgroup *cgrp = cp->cgrp;
+
 	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cgrp->pids_use_count);
-	if (!--cgrp->pids_use_count) {
-		kfree(cgrp->tasks_pids);
-		cgrp->tasks_pids = NULL;
-		cgrp->pids_length = 0;
+	BUG_ON(!cp->use_count);
+	if (!--cp->use_count) {
+		list_del(&cp->list);
+		put_pid_ns(cp->ns);
+		kfree(cp->tasks_pids);
+		kfree(cp);
 	}
 	up_write(&cgrp->pids_mutex);
 }
 
 static int cgroup_tasks_release(struct inode *inode, struct file *file)
 {
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct seq_file *seq;
+	struct cgroup_pids *cp;
 
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	release_cgroup_pid_array(cgrp);
+	seq = file->private_data;
+	cp = seq->private;
+
+	release_cgroup_pid_array(cp);
 	return seq_release(inode, file);
 }
 
@@ -2324,6 +2353,8 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
+	struct pid_namespace *ns = current->nsproxy->pid_ns;
+	struct cgroup_pids *cp;
 	pid_t *pidarray;
 	int npids;
 	int retval;
@@ -2350,20 +2381,37 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * array if necessary
 	 */
 	down_write(&cgrp->pids_mutex);
-	kfree(cgrp->tasks_pids);
-	cgrp->tasks_pids = pidarray;
-	cgrp->pids_length = npids;
-	cgrp->pids_use_count++;
+
+	list_for_each_entry(cp, &cgrp->pids_list, list) {
+		if (ns == cp->ns)
+			goto found;
+	}
+
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+	if (!cp) {
+		up_write(&cgrp->pids_mutex);
+		kfree(pidarray);
+		return -ENOMEM;
+	}
+	cp->cgrp = cgrp;
+	cp->ns = ns;
+	get_pid_ns(ns);
+	list_add(&cp->list, &cgrp->pids_list);
+found:
+	kfree(cp->tasks_pids);
+	cp->tasks_pids = pidarray;
+	cp->length = npids;
+	cp->use_count++;
 	up_write(&cgrp->pids_mutex);
 
 	file->f_op = &cgroup_tasks_operations;
 
 	retval = seq_open(file, &cgroup_tasks_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cgrp);
+		release_cgroup_pid_array(cp);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cgrp;
+	((struct seq_file *)file->private_data)->private = cp;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 887032670d47366a8c8f25396ea7c14b7b2cc620 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Wed, 29 Jul 2009 15:04:06 -0700
Subject: cgroup avoid permanent sleep at rmdir

After commit ec64f51545fffbc4cb968f0cea56341a4b07e85a ("cgroup: fix
frequent -EBUSY at rmdir"), cgroup's rmdir (especially against memcg)
doesn't return -EBUSY by temporary ref counts.  That commit expects all
refs after pre_destroy() is temporary but...it wasn't.  Then, rmdir can
wait permanently.  This patch tries to fix that and change followings.

 - set CGRP_WAIT_ON_RMDIR flag before pre_destroy().
 - clear CGRP_WAIT_ON_RMDIR flag when the subsys finds racy case.
   if there are sleeping ones, wakes them up.
 - rmdir() sleeps only when CGRP_WAIT_ON_RMDIR flag is set.

Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reported-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: Paul Menage <menage@google.com>
Acked-by: Balbir Sigh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 17 ++++++++++++++++
 kernel/cgroup.c        | 55 ++++++++++++++++++++++++++++++++++----------------
 mm/memcontrol.c        | 23 ++++++++++++++++++---
 3 files changed, 75 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 20411d2876f8..90bba9e62286 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -362,6 +362,23 @@ int cgroup_task_count(const struct cgroup *cgrp);
 /* Return true if cgrp is a descendant of the task's cgroup */
 int cgroup_is_descendant(const struct cgroup *cgrp, struct task_struct *task);
 
+/*
+ * When the subsys has to access css and may add permanent refcnt to css,
+ * it should take care of racy conditions with rmdir(). Following set of
+ * functions, is for stop/restart rmdir if necessary.
+ * Because these will call css_get/put, "css" should be alive css.
+ *
+ *  cgroup_exclude_rmdir();
+ *  ...do some jobs which may access arbitrary empty cgroup
+ *  cgroup_release_and_wakeup_rmdir();
+ *
+ *  When someone removes a cgroup while cgroup_exclude_rmdir() holds it,
+ *  it sleeps and cgroup_release_and_wakeup_rmdir() will wake him up.
+ */
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css);
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css);
+
 /*
  * Control Group subsystem type.
  * See Documentation/cgroups/cgroups.txt for details
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 250dac05680f..b6eadfe30e7b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -735,16 +735,28 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
  * reference to css->refcnt. In general, this refcnt is expected to goes down
  * to zero, soon.
  *
- * CGRP_WAIT_ON_RMDIR flag is modified under cgroup's inode->i_mutex;
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
  */
 DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
 
-static void cgroup_wakeup_rmdir_waiters(const struct cgroup *cgrp)
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
 {
-	if (unlikely(test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
 		wake_up_all(&cgroup_rmdir_waitq);
 }
 
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
+
 static int rebind_subsystems(struct cgroupfs_root *root,
 			      unsigned long final_bits)
 {
@@ -1359,7 +1371,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
 	 * is no longer empty.
 	 */
-	cgroup_wakeup_rmdir_waiters(cgrp);
+	cgroup_wakeup_rmdir_waiter(cgrp);
 	return 0;
 }
 
@@ -2743,34 +2755,43 @@ again:
 	}
 	mutex_unlock(&cgroup_mutex);
 
+	/*
+	 * In general, subsystem has no css->refcnt after pre_destroy(). But
+	 * in racy cases, subsystem may have to get css->refcnt after
+	 * pre_destroy() and it makes rmdir return with -EBUSY. This sometimes
+	 * make rmdir return -EBUSY too often. To avoid that, we use waitqueue
+	 * for cgroup's rmdir. CGRP_WAIT_ON_RMDIR is for synchronizing rmdir
+	 * and subsystem's reference count handling. Please see css_get/put
+	 * and css_tryget() and cgroup_wakeup_rmdir_waiter() implementation.
+	 */
+	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
+
 	/*
 	 * Call pre_destroy handlers of subsys. Notify subsystems
 	 * that rmdir() request comes.
 	 */
 	ret = cgroup_call_pre_destroy(cgrp);
-	if (ret)
+	if (ret) {
+		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		return ret;
+	}
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
 	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
-	/*
-	 * css_put/get is provided for subsys to grab refcnt to css. In typical
-	 * case, subsystem has no reference after pre_destroy(). But, under
-	 * hierarchy management, some *temporal* refcnt can be hold.
-	 * To avoid returning -EBUSY to a user, waitqueue is used. If subsys
-	 * is really busy, it should return -EBUSY at pre_destroy(). wake_up
-	 * is called when css_put() is called and refcnt goes down to 0.
-	 */
-	set_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 	prepare_to_wait(&cgroup_rmdir_waitq, &wait, TASK_INTERRUPTIBLE);
-
 	if (!cgroup_clear_css_refs(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
-		schedule();
+		/*
+		 * Because someone may call cgroup_wakeup_rmdir_waiter() before
+		 * prepare_to_wait(), we need to check this flag.
+		 */
+		if (test_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))
+			schedule();
 		finish_wait(&cgroup_rmdir_waitq, &wait);
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		if (signal_pending(current))
@@ -3342,7 +3363,7 @@ void __css_put(struct cgroup_subsys_state *css)
 			set_bit(CGRP_RELEASABLE, &cgrp->flags);
 			check_for_release(cgrp);
 		}
-		cgroup_wakeup_rmdir_waiters(cgrp);
+		cgroup_wakeup_rmdir_waiter(cgrp);
 	}
 	rcu_read_unlock();
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e717964cb5a0..fd4529d86de5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1207,6 +1207,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	ret = 0;
 out:
 	unlock_page_cgroup(pc);
+	/*
+	 * We charges against "to" which may not have any tasks. Then, "to"
+	 * can be under rmdir(). But in current implementation, caller of
+	 * this function is just force_empty() and it's garanteed that
+	 * "to" is never removed. So, we don't check rmdir status here.
+	 */
 	return ret;
 }
 
@@ -1428,6 +1434,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 		return;
 	if (!ptr)
 		return;
+	cgroup_exclude_rmdir(&ptr->css);
 	pc = lookup_page_cgroup(page);
 	mem_cgroup_lru_del_before_commit_swapcache(page);
 	__mem_cgroup_commit_charge(ptr, pc, ctype);
@@ -1457,8 +1464,12 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 		}
 		rcu_read_unlock();
 	}
-	/* add this page(page_cgroup) to the LRU we want. */
-
+	/*
+	 * At swapin, we may charge account against cgroup which has no tasks.
+	 * So, rmdir()->pre_destroy() can be called while we do this charge.
+	 * In that case, we need to call pre_destroy() again. check it here.
+	 */
+	cgroup_release_and_wakeup_rmdir(&ptr->css);
 }
 
 void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
@@ -1664,7 +1675,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 
 	if (!mem)
 		return;
-
+	cgroup_exclude_rmdir(&mem->css);
 	/* at migration success, oldpage->mapping is NULL. */
 	if (oldpage->mapping) {
 		target = oldpage;
@@ -1704,6 +1715,12 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
 	 */
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED)
 		mem_cgroup_uncharge_page(target);
+	/*
+	 * At migration, we may charge account against cgroup which has no tasks
+	 * So, rmdir()->pre_destroy() can be called while we do this charge.
+	 * In that case, we need to call pre_destroy() again. check it here.
+	 */
+	cgroup_release_and_wakeup_rmdir(&mem->css);
 }
 
 /*
-- 
cgit v1.2.3


From f5a55efa140f5e9c9dd0f398fef54f20cdb74ec9 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Wed, 29 Jul 2009 15:04:12 -0700
Subject: pps.h needs <linux/types.h>

Found with make headers_check

/usr/include/linux/pps.h:52: found __[us]{8,16,32,64} type without #include <linux/types.h>

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: Rodolfo Giometti <giometti@linux.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pps.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pps.h b/include/linux/pps.h
index cfe5c7214ec6..0194ab06177b 100644
--- a/include/linux/pps.h
+++ b/include/linux/pps.h
@@ -22,6 +22,8 @@
 #ifndef _PPS_H_
 #define _PPS_H_
 
+#include <linux/types.h>
+
 #define PPS_VERSION		"5.3.6"
 #define PPS_MAX_SOURCES		16		/* should be enough... */
 
-- 
cgit v1.2.3


From 534acc057b5a08ec33fa57cdd2f5a09ef124e7f2 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave@linux.vnet.ibm.com>
Date: Wed, 29 Jul 2009 15:04:18 -0700
Subject: lib: flexible array implementation

Once a structure goes over PAGE_SIZE*2, we see occasional allocation
failures.  Some people have chosen to switch over to things like vmalloc()
that will let them keep array-like access to such a large structures.
But, vmalloc() has plenty of downsides.

Here's an alternative.  I think it's what Andrew was suggesting here:

	http://lkml.org/lkml/2009/7/2/518

I call it a flexible array.  It does all of its work in PAGE_SIZE bits, so
never does an order>0 allocation.  The base level has
PAGE_SIZE-2*sizeof(int) bytes of storage for pointers to the second level.
 So, with a 32-bit arch, you get about 4MB (4183112 bytes) of total
storage when the objects pack nicely into a page.  It is half that on
64-bit because the pointers are twice the size.  There's a table detailing
this in the code.

There are kerneldocs for the functions, but here's an
overview:

flex_array_alloc() - dynamically allocate a base structure
flex_array_free() - free the array and all of the
		    second-level pages
flex_array_free_parts() - free the second-level pages, but
			  not the base (for static bases)
flex_array_put() - copy into the array at the given index
flex_array_get() - copy out of the array at the given index
flex_array_prealloc() - preallocate the second-level pages
			between the given indexes to
			guarantee no allocs will occur at
			put() time.

We could also potentially just pass the "element_size" into each of the
API functions instead of storing it internally.  That would get us one
more base pointer on 32-bit.

I've been testing this by running it in userspace.  The header and patch
that I've been using are here, as well as the little script I'm using to
generate the size table which goes in the kerneldocs.

	http://sr71.net/~dave/linux/flexarray/

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Dave Hansen <dave@linux.vnet.ibm.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  47 ++++++++
 lib/Makefile               |   2 +-
 lib/flex_array.c           | 269 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/flex_array.h
 create mode 100644 lib/flex_array.c

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
new file mode 100644
index 000000000000..23c1ec79a31b
--- /dev/null
+++ b/include/linux/flex_array.h
@@ -0,0 +1,47 @@
+#ifndef _FLEX_ARRAY_H
+#define _FLEX_ARRAY_H
+
+#include <linux/types.h>
+#include <asm/page.h>
+
+#define FLEX_ARRAY_PART_SIZE PAGE_SIZE
+#define FLEX_ARRAY_BASE_SIZE PAGE_SIZE
+
+struct flex_array_part;
+
+/*
+ * This is meant to replace cases where an array-like
+ * structure has gotten too big to fit into kmalloc()
+ * and the developer is getting tempted to use
+ * vmalloc().
+ */
+
+struct flex_array {
+	union {
+		struct {
+			int element_size;
+			int total_nr_elements;
+			struct flex_array_part *parts[0];
+		};
+		/*
+		 * This little trick makes sure that
+		 * sizeof(flex_array) == PAGE_SIZE
+		 */
+		char padding[FLEX_ARRAY_BASE_SIZE];
+	};
+};
+
+#define FLEX_ARRAY_INIT(size, total) { { {\
+	.element_size = (size),		\
+	.total_nr_elements = (total),	\
+} } }
+
+struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags);
+int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags);
+void flex_array_free(struct flex_array *fa);
+void flex_array_free_parts(struct flex_array *fa);
+int flex_array_put(struct flex_array *fa, int element_nr, void *src,
+		gfp_t flags);
+void *flex_array_get(struct flex_array *fa, int element_nr);
+
+#endif /* _FLEX_ARRAY_H */
diff --git a/lib/Makefile b/lib/Makefile
index b6d1857bbf08..2e78277eff9d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 idr.o int_sqrt.o extable.o prio_tree.o \
 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
 	 proportions.o prio_heap.o ratelimit.o show_mem.o \
-	 is_single_threaded.o plist.o decompress.o
+	 is_single_threaded.o plist.o decompress.o flex_array.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/flex_array.c b/lib/flex_array.c
new file mode 100644
index 000000000000..0e7894ce8882
--- /dev/null
+++ b/lib/flex_array.c
@@ -0,0 +1,269 @@
+/*
+ * Flexible array managed in PAGE_SIZE parts
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Dave Hansen <dave@linux.vnet.ibm.com>
+ */
+
+#include <linux/flex_array.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+
+struct flex_array_part {
+	char elements[FLEX_ARRAY_PART_SIZE];
+};
+
+static inline int __elements_per_part(int element_size)
+{
+	return FLEX_ARRAY_PART_SIZE / element_size;
+}
+
+static inline int bytes_left_in_base(void)
+{
+	int element_offset = offsetof(struct flex_array, parts);
+	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
+	return bytes_left;
+}
+
+static inline int nr_base_part_ptrs(void)
+{
+	return bytes_left_in_base() / sizeof(struct flex_array_part *);
+}
+
+/*
+ * If a user requests an allocation which is small
+ * enough, we may simply use the space in the
+ * flex_array->parts[] array to store the user
+ * data.
+ */
+static inline int elements_fit_in_base(struct flex_array *fa)
+{
+	int data_size = fa->element_size * fa->total_nr_elements;
+	if (data_size <= bytes_left_in_base())
+		return 1;
+	return 0;
+}
+
+/**
+ * flex_array_alloc - allocate a new flexible array
+ * @element_size:	the size of individual elements in the array
+ * @total:		total number of elements that this should hold
+ *
+ * Note: all locking must be provided by the caller.
+ *
+ * @total is used to size internal structures.  If the user ever
+ * accesses any array indexes >=@total, it will produce errors.
+ *
+ * The maximum number of elements is defined as: the number of
+ * elements that can be stored in a page times the number of
+ * page pointers that we can fit in the base structure or (using
+ * integer math):
+ *
+ * 	(PAGE_SIZE/element_size) * (PAGE_SIZE-8)/sizeof(void *)
+ *
+ * Here's a table showing example capacities.  Note that the maximum
+ * index that the get/put() functions is just nr_objects-1.   This
+ * basically means that you get 4MB of storage on 32-bit and 2MB on
+ * 64-bit.
+ *
+ *
+ * Element size | Objects | Objects |
+ * PAGE_SIZE=4k |  32-bit |  64-bit |
+ * ---------------------------------|
+ *      1 bytes | 4186112 | 2093056 |
+ *      2 bytes | 2093056 | 1046528 |
+ *      3 bytes | 1395030 |  697515 |
+ *      4 bytes | 1046528 |  523264 |
+ *     32 bytes |  130816 |   65408 |
+ *     33 bytes |  126728 |   63364 |
+ *   2048 bytes |    2044 |    1022 |
+ *   2049 bytes |    1022 |     511 |
+ *       void * | 1046528 |  261632 |
+ *
+ * Since 64-bit pointers are twice the size, we lose half the
+ * capacity in the base structure.  Also note that no effort is made
+ * to efficiently pack objects across page boundaries.
+ */
+struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
+{
+	struct flex_array *ret;
+	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+
+	/* max_size will end up 0 if element_size > PAGE_SIZE */
+	if (total > max_size)
+		return NULL;
+	ret = kzalloc(sizeof(struct flex_array), flags);
+	if (!ret)
+		return NULL;
+	ret->element_size = element_size;
+	ret->total_nr_elements = total;
+	return ret;
+}
+
+static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
+{
+	return element_nr / __elements_per_part(fa->element_size);
+}
+
+/**
+ * flex_array_free_parts - just free the second-level pages
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * This is to be used in cases where the base 'struct flex_array'
+ * has been statically allocated and should not be free.
+ */
+void flex_array_free_parts(struct flex_array *fa)
+{
+	int part_nr;
+	int max_part = nr_base_part_ptrs();
+
+	if (elements_fit_in_base(fa))
+		return;
+	for (part_nr = 0; part_nr < max_part; part_nr++)
+		kfree(fa->parts[part_nr]);
+}
+
+void flex_array_free(struct flex_array *fa)
+{
+	flex_array_free_parts(fa);
+	kfree(fa);
+}
+
+static int fa_index_inside_part(struct flex_array *fa, int element_nr)
+{
+	return element_nr % __elements_per_part(fa->element_size);
+}
+
+static int index_inside_part(struct flex_array *fa, int element_nr)
+{
+	int part_offset = fa_index_inside_part(fa, element_nr);
+	return part_offset * fa->element_size;
+}
+
+static struct flex_array_part *
+__fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
+{
+	struct flex_array_part *part = fa->parts[part_nr];
+	if (!part) {
+		/*
+		 * This leaves the part pages uninitialized
+		 * and with potentially random data, just
+		 * as if the user had kmalloc()'d the whole.
+		 * __GFP_ZERO can be used to zero it.
+		 */
+		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		if (!part)
+			return NULL;
+		fa->parts[part_nr] = part;
+	}
+	return part;
+}
+
+/**
+ * flex_array_put - copy data into the array at @element_nr
+ * @src:	address of data to copy into the array
+ * @element_nr:	index of the position in which to insert
+ * 		the new element.
+ *
+ * Note that this *copies* the contents of @src into
+ * the array.  If you are trying to store an array of
+ * pointers, make sure to pass in &ptr instead of ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = __fa_get_part(fa, part_nr, flags);
+	if (!part)
+		return -ENOMEM;
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memcpy(dst, src, fa->element_size);
+	return 0;
+}
+
+/**
+ * flex_array_prealloc - guarantee that array space exists
+ * @start:	index of first array element for which space is allocated
+ * @end:	index of last (inclusive) element for which space is allocated
+ *
+ * This will guarantee that no future calls to flex_array_put()
+ * will allocate memory.  It can be used if you are expecting to
+ * be holding a lock or in some atomic context while writing
+ * data into the array.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
+{
+	int start_part;
+	int end_part;
+	int part_nr;
+	struct flex_array_part *part;
+
+	if (start >= fa->total_nr_elements || end >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		return 0;
+	start_part = fa_element_to_part_nr(fa, start);
+	end_part = fa_element_to_part_nr(fa, end);
+	for (part_nr = start_part; part_nr <= end_part; part_nr++) {
+		part = __fa_get_part(fa, part_nr, flags);
+		if (!part)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+/**
+ * flex_array_get - pull data back out of the array
+ * @element_nr:	index of the element to fetch from the array
+ *
+ * Returns a pointer to the data at index @element_nr.  Note
+ * that this is a copy of the data that was passed in.  If you
+ * are using this to store pointers, you'll get back &ptr.
+ *
+ * Locking must be provided by the caller.
+ */
+void *flex_array_get(struct flex_array *fa, int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	int index;
+
+	if (element_nr >= fa->total_nr_elements)
+		return NULL;
+	if (!fa->parts[part_nr])
+		return NULL;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else
+		part = fa->parts[part_nr];
+	index = index_inside_part(fa, element_nr);
+	return &part->elements[index_inside_part(fa, element_nr)];
+}
-- 
cgit v1.2.3


From 812ed032cdc8138b7546eecc996879756b92d801 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Wed, 29 Jul 2009 15:04:19 -0700
Subject: uio: mark uio.h functions __KERNEL__ only

To avoid userspace build failures such as:

.../linux/uio.h:37: error: expected `=', `,', `;', `asm' or `__attribute__' before `iov_length'
.../linux/uio.h:47: error: expected declaration specifiers or `...' before `size_t'

move uio functions inside a __KERNEL__ block.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uio.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index b7fe13883bdb..98c114323a8b 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -19,15 +19,6 @@ struct iovec
 	__kernel_size_t iov_len; /* Must be size_t (1003.1g) */
 };
 
-#ifdef __KERNEL__
-
-struct kvec {
-	void *iov_base; /* and that should *never* hold a userland pointer */
-	size_t iov_len;
-};
-
-#endif
-
 /*
  *	UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1)
  */
@@ -35,6 +26,13 @@ struct kvec {
 #define UIO_FASTIOV	8
 #define UIO_MAXIOV	1024
 
+#ifdef __KERNEL__
+
+struct kvec {
+	void *iov_base; /* and that should *never* hold a userland pointer */
+	size_t iov_len;
+};
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -53,5 +51,6 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
 }
 
 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to);
+#endif
 
 #endif
-- 
cgit v1.2.3


From 2e04ef76916d1e29a077ea9d0f2003c8fd86724d Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 30 Jul 2009 16:03:45 -0600
Subject: lguest: fix comment style

I don't really notice it (except to begrudge the extra vertical
space), but Ingo does.  And he pointed out that one excuse of lguest
is as a teaching tool, it should set a good example.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@redhat.com>
---
 Documentation/lguest/lguest.c         | 540 ++++++++++++++++++++++------------
 arch/x86/include/asm/lguest.h         |   3 +-
 arch/x86/include/asm/lguest_hcall.h   |  10 +-
 arch/x86/lguest/boot.c                | 428 +++++++++++++++++----------
 arch/x86/lguest/i386_head.S           | 110 ++++---
 drivers/lguest/core.c                 | 114 ++++---
 drivers/lguest/hypercalls.c           | 141 ++++++---
 drivers/lguest/interrupts_and_traps.c | 288 ++++++++++++------
 drivers/lguest/lg.h                   |  23 +-
 drivers/lguest/lguest_device.c        | 150 ++++++----
 drivers/lguest/lguest_user.c          | 137 ++++++---
 drivers/lguest/page_tables.c          | 427 ++++++++++++++++++---------
 drivers/lguest/segments.c             | 106 ++++---
 drivers/lguest/x86/core.c             | 372 +++++++++++++++--------
 drivers/lguest/x86/switcher_32.S      |  18 +-
 include/linux/lguest.h                |  36 ++-
 include/linux/lguest_launcher.h       |  18 +-
 17 files changed, 1906 insertions(+), 1015 deletions(-)

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 45d7d6dcae7a..aa66a52b73e9 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,9 @@
-/*P:100 This is the Launcher code, a simple program which lays out the
- * "physical" memory for the new Guest by mapping the kernel image and
- * the virtual devices, then opens /dev/lguest to tell the kernel
- * about the Guest and control it. :*/
+/*P:100
+ * This is the Launcher code, a simple program which lays out the "physical"
+ * memory for the new Guest by mapping the kernel image and the virtual
+ * devices, then opens /dev/lguest to tell the kernel about the Guest and
+ * control it.
+:*/
 #define _LARGEFILE64_SOURCE
 #define _GNU_SOURCE
 #include <stdio.h>
@@ -46,13 +48,15 @@
 #include "linux/virtio_rng.h"
 #include "linux/virtio_ring.h"
 #include "asm/bootparam.h"
-/*L:110 We can ignore the 39 include files we need for this program, but I do
- * want to draw attention to the use of kernel-style types.
+/*L:110
+ * We can ignore the 39 include files we need for this program, but I do want
+ * to draw attention to the use of kernel-style types.
  *
  * As Linus said, "C is a Spartan language, and so should your naming be."  I
  * like these abbreviations, so we define them here.  Note that u64 is always
  * unsigned long long, which works on all Linux systems: this means that we can
- * use %llu in printf for any u64. */
+ * use %llu in printf for any u64.
+ */
 typedef unsigned long long u64;
 typedef uint32_t u32;
 typedef uint16_t u16;
@@ -69,8 +73,10 @@ typedef uint8_t u8;
 /* This will occupy 3 pages: it must be a power of 2. */
 #define VIRTQUEUE_NUM 256
 
-/*L:120 verbose is both a global flag and a macro.  The C preprocessor allows
- * this, and although I wouldn't recommend it, it works quite nicely here. */
+/*L:120
+ * verbose is both a global flag and a macro.  The C preprocessor allows
+ * this, and although I wouldn't recommend it, it works quite nicely here.
+ */
 static bool verbose;
 #define verbose(args...) \
 	do { if (verbose) printf(args); } while(0)
@@ -100,8 +106,7 @@ struct device_list
 
 	/* A single linked list of devices. */
 	struct device *dev;
-	/* And a pointer to the last device for easy append and also for
-	 * configuration appending. */
+	/* And a pointer to the last device for easy append. */
 	struct device *lastdev;
 };
 
@@ -168,20 +173,24 @@ static char **main_args;
 /* The original tty settings to restore on exit. */
 static struct termios orig_term;
 
-/* We have to be careful with barriers: our devices are all run in separate
+/*
+ * We have to be careful with barriers: our devices are all run in separate
  * threads and so we need to make sure that changes visible to the Guest happen
- * in precise order. */
+ * in precise order.
+ */
 #define wmb() __asm__ __volatile__("" : : : "memory")
 #define mb() __asm__ __volatile__("" : : : "memory")
 
-/* Convert an iovec element to the given type.
+/*
+ * Convert an iovec element to the given type.
  *
  * This is a fairly ugly trick: we need to know the size of the type and
  * alignment requirement to check the pointer is kosher.  It's also nice to
  * have the name of the type in case we report failure.
  *
  * Typing those three things all the time is cumbersome and error prone, so we
- * have a macro which sets them all up and passes to the real function. */
+ * have a macro which sets them all up and passes to the real function.
+ */
 #define convert(iov, type) \
 	((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
 
@@ -198,8 +207,10 @@ static void *_convert(struct iovec *iov, size_t size, size_t align,
 /* Wrapper for the last available index.  Makes it easier to change. */
 #define lg_last_avail(vq)	((vq)->last_avail_idx)
 
-/* The virtio configuration space is defined to be little-endian.  x86 is
- * little-endian too, but it's nice to be explicit so we have these helpers. */
+/*
+ * The virtio configuration space is defined to be little-endian.  x86 is
+ * little-endian too, but it's nice to be explicit so we have these helpers.
+ */
 #define cpu_to_le16(v16) (v16)
 #define cpu_to_le32(v32) (v32)
 #define cpu_to_le64(v64) (v64)
@@ -241,11 +252,12 @@ static u8 *get_feature_bits(struct device *dev)
 		+ dev->num_vq * sizeof(struct lguest_vqconfig);
 }
 
-/*L:100 The Launcher code itself takes us out into userspace, that scary place
- * where pointers run wild and free!  Unfortunately, like most userspace
- * programs, it's quite boring (which is why everyone likes to hack on the
- * kernel!).  Perhaps if you make up an Lguest Drinking Game at this point, it
- * will get you through this section.  Or, maybe not.
+/*L:100
+ * The Launcher code itself takes us out into userspace, that scary place where
+ * pointers run wild and free!  Unfortunately, like most userspace programs,
+ * it's quite boring (which is why everyone likes to hack on the kernel!).
+ * Perhaps if you make up an Lguest Drinking Game at this point, it will get
+ * you through this section.  Or, maybe not.
  *
  * The Launcher sets up a big chunk of memory to be the Guest's "physical"
  * memory and stores it in "guest_base".  In other words, Guest physical ==
@@ -253,7 +265,8 @@ static u8 *get_feature_bits(struct device *dev)
  *
  * This can be tough to get your head around, but usually it just means that we
  * use these trivial conversion functions when the Guest gives us it's
- * "physical" addresses: */
+ * "physical" addresses:
+ */
 static void *from_guest_phys(unsigned long addr)
 {
 	return guest_base + addr;
@@ -268,7 +281,8 @@ static unsigned long to_guest_phys(const void *addr)
  * Loading the Kernel.
  *
  * We start with couple of simple helper routines.  open_or_die() avoids
- * error-checking code cluttering the callers: */
+ * error-checking code cluttering the callers:
+ */
 static int open_or_die(const char *name, int flags)
 {
 	int fd = open(name, flags);
@@ -283,8 +297,10 @@ static void *map_zeroed_pages(unsigned int num)
 	int fd = open_or_die("/dev/zero", O_RDONLY);
 	void *addr;
 
-	/* We use a private mapping (ie. if we write to the page, it will be
-	 * copied). */
+	/*
+	 * We use a private mapping (ie. if we write to the page, it will be
+	 * copied).
+	 */
 	addr = mmap(NULL, getpagesize() * num,
 		    PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0);
 	if (addr == MAP_FAILED)
@@ -305,20 +321,24 @@ static void *get_pages(unsigned int num)
 	return addr;
 }
 
-/* This routine is used to load the kernel or initrd.  It tries mmap, but if
+/*
+ * This routine is used to load the kernel or initrd.  It tries mmap, but if
  * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
- * it falls back to reading the memory in. */
+ * it falls back to reading the memory in.
+ */
 static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
 {
 	ssize_t r;
 
-	/* We map writable even though for some segments are marked read-only.
+	/*
+	 * We map writable even though for some segments are marked read-only.
 	 * The kernel really wants to be writable: it patches its own
 	 * instructions.
 	 *
 	 * MAP_PRIVATE means that the page won't be copied until a write is
 	 * done to it.  This allows us to share untouched memory between
-	 * Guests. */
+	 * Guests.
+	 */
 	if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC,
 		 MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
 		return;
@@ -329,7 +349,8 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
 		err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
 }
 
-/* This routine takes an open vmlinux image, which is in ELF, and maps it into
+/*
+ * This routine takes an open vmlinux image, which is in ELF, and maps it into
  * the Guest memory.  ELF = Embedded Linking Format, which is the format used
  * by all modern binaries on Linux including the kernel.
  *
@@ -337,23 +358,28 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
  * address.  We use the physical address; the Guest will map itself to the
  * virtual address.
  *
- * We return the starting address. */
+ * We return the starting address.
+ */
 static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 {
 	Elf32_Phdr phdr[ehdr->e_phnum];
 	unsigned int i;
 
-	/* Sanity checks on the main ELF header: an x86 executable with a
-	 * reasonable number of correctly-sized program headers. */
+	/*
+	 * Sanity checks on the main ELF header: an x86 executable with a
+	 * reasonable number of correctly-sized program headers.
+	 */
 	if (ehdr->e_type != ET_EXEC
 	    || ehdr->e_machine != EM_386
 	    || ehdr->e_phentsize != sizeof(Elf32_Phdr)
 	    || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
 		errx(1, "Malformed elf header");
 
-	/* An ELF executable contains an ELF header and a number of "program"
+	/*
+	 * An ELF executable contains an ELF header and a number of "program"
 	 * headers which indicate which parts ("segments") of the program to
-	 * load where. */
+	 * load where.
+	 */
 
 	/* We read in all the program headers at once: */
 	if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
@@ -361,8 +387,10 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 	if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
 		err(1, "Reading program headers");
 
-	/* Try all the headers: there are usually only three.  A read-only one,
-	 * a read-write one, and a "note" section which we don't load. */
+	/*
+	 * Try all the headers: there are usually only three.  A read-only one,
+	 * a read-write one, and a "note" section which we don't load.
+	 */
 	for (i = 0; i < ehdr->e_phnum; i++) {
 		/* If this isn't a loadable segment, we ignore it */
 		if (phdr[i].p_type != PT_LOAD)
@@ -380,13 +408,15 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 	return ehdr->e_entry;
 }
 
-/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded.  You're
- * supposed to jump into it and it will unpack itself.  We used to have to
- * perform some hairy magic because the unpacking code scared me.
+/*L:150
+ * A bzImage, unlike an ELF file, is not meant to be loaded.  You're supposed
+ * to jump into it and it will unpack itself.  We used to have to perform some
+ * hairy magic because the unpacking code scared me.
  *
  * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
  * a small patch to jump over the tricky bits in the Guest, so now we just read
- * the funky header so we know where in the file to load, and away we go! */
+ * the funky header so we know where in the file to load, and away we go!
+ */
 static unsigned long load_bzimage(int fd)
 {
 	struct boot_params boot;
@@ -394,8 +424,10 @@ static unsigned long load_bzimage(int fd)
 	/* Modern bzImages get loaded at 1M. */
 	void *p = from_guest_phys(0x100000);
 
-	/* Go back to the start of the file and read the header.  It should be
-	 * a Linux boot header (see Documentation/x86/i386/boot.txt) */
+	/*
+	 * Go back to the start of the file and read the header.  It should be
+	 * a Linux boot header (see Documentation/x86/i386/boot.txt)
+	 */
 	lseek(fd, 0, SEEK_SET);
 	read(fd, &boot, sizeof(boot));
 
@@ -414,9 +446,11 @@ static unsigned long load_bzimage(int fd)
 	return boot.hdr.code32_start;
 }
 
-/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
+/*L:140
+ * Loading the kernel is easy when it's a "vmlinux", but most kernels
  * come wrapped up in the self-decompressing "bzImage" format.  With a little
- * work, we can load those, too. */
+ * work, we can load those, too.
+ */
 static unsigned long load_kernel(int fd)
 {
 	Elf32_Ehdr hdr;
@@ -433,24 +467,28 @@ static unsigned long load_kernel(int fd)
 	return load_bzimage(fd);
 }
 
-/* This is a trivial little helper to align pages.  Andi Kleen hated it because
+/*
+ * This is a trivial little helper to align pages.  Andi Kleen hated it because
  * it calls getpagesize() twice: "it's dumb code."
  *
  * Kernel guys get really het up about optimization, even when it's not
- * necessary.  I leave this code as a reaction against that. */
+ * necessary.  I leave this code as a reaction against that.
+ */
 static inline unsigned long page_align(unsigned long addr)
 {
 	/* Add upwards and truncate downwards. */
 	return ((addr + getpagesize()-1) & ~(getpagesize()-1));
 }
 
-/*L:180 An "initial ram disk" is a disk image loaded into memory along with
- * the kernel which the kernel can use to boot from without needing any
- * drivers.  Most distributions now use this as standard: the initrd contains
- * the code to load the appropriate driver modules for the current machine.
+/*L:180
+ * An "initial ram disk" is a disk image loaded into memory along with the
+ * kernel which the kernel can use to boot from without needing any drivers.
+ * Most distributions now use this as standard: the initrd contains the code to
+ * load the appropriate driver modules for the current machine.
  *
  * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
- * kernels.  He sent me this (and tells me when I break it). */
+ * kernels.  He sent me this (and tells me when I break it).
+ */
 static unsigned long load_initrd(const char *name, unsigned long mem)
 {
 	int ifd;
@@ -462,12 +500,16 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 	if (fstat(ifd, &st) < 0)
 		err(1, "fstat() on initrd '%s'", name);
 
-	/* We map the initrd at the top of memory, but mmap wants it to be
-	 * page-aligned, so we round the size up for that. */
+	/*
+	 * We map the initrd at the top of memory, but mmap wants it to be
+	 * page-aligned, so we round the size up for that.
+	 */
 	len = page_align(st.st_size);
 	map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
-	/* Once a file is mapped, you can close the file descriptor.  It's a
-	 * little odd, but quite useful. */
+	/*
+	 * Once a file is mapped, you can close the file descriptor.  It's a
+	 * little odd, but quite useful.
+	 */
 	close(ifd);
 	verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
 
@@ -476,8 +518,10 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 }
 /*:*/
 
-/* Simple routine to roll all the commandline arguments together with spaces
- * between them. */
+/*
+ * Simple routine to roll all the commandline arguments together with spaces
+ * between them.
+ */
 static void concat(char *dst, char *args[])
 {
 	unsigned int i, len = 0;
@@ -494,10 +538,12 @@ static void concat(char *dst, char *args[])
 	dst[len] = '\0';
 }
 
-/*L:185 This is where we actually tell the kernel to initialize the Guest.  We
+/*L:185
+ * This is where we actually tell the kernel to initialize the Guest.  We
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
  * the base of Guest "physical" memory, the top physical page to allow and the
- * entry point for the Guest. */
+ * entry point for the Guest.
+ */
 static void tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
@@ -522,20 +568,26 @@ static void tell_kernel(unsigned long start)
 static void *_check_pointer(unsigned long addr, unsigned int size,
 			    unsigned int line)
 {
-	/* We have to separately check addr and addr+size, because size could
-	 * be huge and addr + size might wrap around. */
+	/*
+	 * We have to separately check addr and addr+size, because size could
+	 * be huge and addr + size might wrap around.
+	 */
 	if (addr >= guest_limit || addr + size >= guest_limit)
 		errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
-	/* We return a pointer for the caller's convenience, now we know it's
-	 * safe to use. */
+	/*
+	 * We return a pointer for the caller's convenience, now we know it's
+	 * safe to use.
+	 */
 	return from_guest_phys(addr);
 }
 /* A macro which transparently hands the line number to the real function. */
 #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
 
-/* Each buffer in the virtqueues is actually a chain of descriptors.  This
+/*
+ * Each buffer in the virtqueues is actually a chain of descriptors.  This
  * function returns the next descriptor in the chain, or vq->vring.num if we're
- * at the end. */
+ * at the end.
+ */
 static unsigned next_desc(struct vring_desc *desc,
 			  unsigned int i, unsigned int max)
 {
@@ -576,12 +628,14 @@ static void trigger_irq(struct virtqueue *vq)
 		err(1, "Triggering irq %i", vq->config.irq);
 }
 
-/* This looks in the virtqueue and for the first available buffer, and converts
+/*
+ * This looks in the virtqueue and for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found. */
+ * This function returns the descriptor number found.
+ */
 static unsigned wait_for_vq_desc(struct virtqueue *vq,
 				 struct iovec iov[],
 				 unsigned int *out_num, unsigned int *in_num)
@@ -599,8 +653,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		/* OK, now we need to know about added descriptors. */
 		vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
 
-		/* They could have slipped one in as we were doing that: make
-		 * sure it's written, then check again. */
+		/*
+		 * They could have slipped one in as we were doing that: make
+		 * sure it's written, then check again.
+		 */
 		mb();
 		if (last_avail != vq->vring.avail->idx) {
 			vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
@@ -620,8 +676,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		errx(1, "Guest moved used index from %u to %u",
 		     last_avail, vq->vring.avail->idx);
 
-	/* Grab the next descriptor number they're advertising, and increment
-	 * the index we've seen. */
+	/*
+	 * Grab the next descriptor number they're advertising, and increment
+	 * the index we've seen.
+	 */
 	head = vq->vring.avail->ring[last_avail % vq->vring.num];
 	lg_last_avail(vq)++;
 
@@ -636,8 +694,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	desc = vq->vring.desc;
 	i = head;
 
-	/* If this is an indirect entry, then this buffer contains a descriptor
-	 * table which we handle as if it's any normal descriptor chain. */
+	/*
+	 * If this is an indirect entry, then this buffer contains a descriptor
+	 * table which we handle as if it's any normal descriptor chain.
+	 */
 	if (desc[i].flags & VRING_DESC_F_INDIRECT) {
 		if (desc[i].len % sizeof(struct vring_desc))
 			errx(1, "Invalid size for indirect buffer table");
@@ -656,8 +716,10 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 		if (desc[i].flags & VRING_DESC_F_WRITE)
 			(*in_num)++;
 		else {
-			/* If it's an output descriptor, they're all supposed
-			 * to come before any input descriptors. */
+			/*
+			 * If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors.
+			 */
 			if (*in_num)
 				errx(1, "Descriptor has out after in");
 			(*out_num)++;
@@ -671,14 +733,18 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	return head;
 }
 
-/* After we've used one of their buffers, we tell them about it.  We'll then
- * want to send them an interrupt, using trigger_irq(). */
+/*
+ * After we've used one of their buffers, we tell them about it.  We'll then
+ * want to send them an interrupt, using trigger_irq().
+ */
 static void add_used(struct virtqueue *vq, unsigned int head, int len)
 {
 	struct vring_used_elem *used;
 
-	/* The virtqueue contains a ring of used buffers.  Get a pointer to the
-	 * next entry in that used ring. */
+	/*
+	 * The virtqueue contains a ring of used buffers.  Get a pointer to the
+	 * next entry in that used ring.
+	 */
 	used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
 	used->id = head;
 	used->len = len;
@@ -698,7 +764,8 @@ static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
 /*
  * The Console
  *
- * We associate some data with the console for our exit hack. */
+ * We associate some data with the console for our exit hack.
+ */
 struct console_abort
 {
 	/* How many times have they hit ^C? */
@@ -725,20 +792,24 @@ static void console_input(struct virtqueue *vq)
 	if (len <= 0) {
 		/* Ran out of input? */
 		warnx("Failed to get console input, ignoring console.");
-		/* For simplicity, dying threads kill the whole Launcher.  So
-		 * just nap here. */
+		/*
+		 * For simplicity, dying threads kill the whole Launcher.  So
+		 * just nap here.
+		 */
 		for (;;)
 			pause();
 	}
 
 	add_used_and_trigger(vq, head, len);
 
-	/* Three ^C within one second?  Exit.
+	/*
+	 * Three ^C within one second?  Exit.
 	 *
 	 * This is such a hack, but works surprisingly well.  Each ^C has to
 	 * be in a buffer by itself, so they can't be too fast.  But we check
 	 * that we get three within about a second, so they can't be too
-	 * slow. */
+	 * slow.
+	 */
 	if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
 		abort->count = 0;
 		return;
@@ -809,8 +880,7 @@ static bool will_block(int fd)
 	return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
 }
 
-/* This is where we handle packets coming in from the tun device to our
- * Guest. */
+/* This handles packets coming in from the tun device to our Guest. */
 static void net_input(struct virtqueue *vq)
 {
 	int len;
@@ -842,8 +912,10 @@ static int do_thread(void *_vq)
 	return 0;
 }
 
-/* When a child dies, we kill our entire process group with SIGTERM.  This
- * also has the side effect that the shell restores the console for us! */
+/*
+ * When a child dies, we kill our entire process group with SIGTERM.  This
+ * also has the side effect that the shell restores the console for us!
+ */
 static void kill_launcher(int signal)
 {
 	kill(0, SIGTERM);
@@ -880,9 +952,10 @@ static void reset_device(struct device *dev)
 
 static void create_thread(struct virtqueue *vq)
 {
-	/* Create stack for thread and run it.  Since stack grows
-	 * upwards, we point the stack pointer to the end of this
-	 * region. */
+	/*
+	 * Create stack for thread and run it.  Since the stack grows upwards,
+	 * we point the stack pointer to the end of this region.
+	 */
 	char *stack = malloc(32768);
 	unsigned long args[] = { LHREQ_EVENTFD,
 				 vq->config.pfn*getpagesize(), 0 };
@@ -981,8 +1054,11 @@ static void handle_output(unsigned long addr)
 		}
 	}
 
-	/* Early console write is done using notify on a nul-terminated string
-	 * in Guest memory. */
+	/*
+	 * Early console write is done using notify on a nul-terminated string
+	 * in Guest memory.  It's also great for hacking debugging messages
+	 * into a Guest.
+	 */
 	if (addr >= guest_limit)
 		errx(1, "Bad NOTIFY %#lx", addr);
 
@@ -998,10 +1074,12 @@ static void handle_output(unsigned long addr)
  * routines to allocate and manage them.
  */
 
-/* The layout of the device page is a "struct lguest_device_desc" followed by a
+/*
+ * The layout of the device page is a "struct lguest_device_desc" followed by a
  * number of virtqueue descriptors, then two sets of feature bits, then an
  * array of configuration bytes.  This routine returns the configuration
- * pointer. */
+ * pointer.
+ */
 static u8 *device_config(const struct device *dev)
 {
 	return (void *)(dev->desc + 1)
@@ -1009,9 +1087,11 @@ static u8 *device_config(const struct device *dev)
 		+ dev->feature_len * 2;
 }
 
-/* This routine allocates a new "struct lguest_device_desc" from descriptor
+/*
+ * This routine allocates a new "struct lguest_device_desc" from descriptor
  * table page just above the Guest's normal memory.  It returns a pointer to
- * that descriptor. */
+ * that descriptor.
+ */
 static struct lguest_device_desc *new_dev_desc(u16 type)
 {
 	struct lguest_device_desc d = { .type = type };
@@ -1032,8 +1112,10 @@ static struct lguest_device_desc *new_dev_desc(u16 type)
 	return memcpy(p, &d, sizeof(d));
 }
 
-/* Each device descriptor is followed by the description of its virtqueues.  We
- * specify how many descriptors the virtqueue is to have. */
+/*
+ * Each device descriptor is followed by the description of its virtqueues.  We
+ * specify how many descriptors the virtqueue is to have.
+ */
 static void add_virtqueue(struct device *dev, unsigned int num_descs,
 			  void (*service)(struct virtqueue *))
 {
@@ -1061,10 +1143,12 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 	/* Initialize the vring. */
 	vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
 
-	/* Append virtqueue to this device's descriptor.  We use
+	/*
+	 * Append virtqueue to this device's descriptor.  We use
 	 * device_config() to get the end of the device's current virtqueues;
 	 * we check that we haven't added any config or feature information
-	 * yet, otherwise we'd be overwriting them. */
+	 * yet, otherwise we'd be overwriting them.
+	 */
 	assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
 	memcpy(device_config(dev), &vq->config, sizeof(vq->config));
 	dev->num_vq++;
@@ -1072,14 +1156,18 @@ static void add_virtqueue(struct device *dev, unsigned int num_descs,
 
 	verbose("Virtqueue page %#lx\n", to_guest_phys(p));
 
-	/* Add to tail of list, so dev->vq is first vq, dev->vq->next is
-	 * second.  */
+	/*
+	 * Add to tail of list, so dev->vq is first vq, dev->vq->next is
+	 * second.
+	 */
 	for (i = &dev->vq; *i; i = &(*i)->next);
 	*i = vq;
 }
 
-/* The first half of the feature bitmask is for us to advertise features.  The
- * second half is for the Guest to accept features. */
+/*
+ * The first half of the feature bitmask is for us to advertise features.  The
+ * second half is for the Guest to accept features.
+ */
 static void add_feature(struct device *dev, unsigned bit)
 {
 	u8 *features = get_feature_bits(dev);
@@ -1093,9 +1181,11 @@ static void add_feature(struct device *dev, unsigned bit)
 	features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
 }
 
-/* This routine sets the configuration fields for an existing device's
+/*
+ * This routine sets the configuration fields for an existing device's
  * descriptor.  It only works for the last device, but that's OK because that's
- * how we use it. */
+ * how we use it.
+ */
 static void set_config(struct device *dev, unsigned len, const void *conf)
 {
 	/* Check we haven't overflowed our single page. */
@@ -1110,10 +1200,12 @@ static void set_config(struct device *dev, unsigned len, const void *conf)
 	assert(dev->desc->config_len == len);
 }
 
-/* This routine does all the creation and setup of a new device, including
+/*
+ * This routine does all the creation and setup of a new device, including
  * calling new_dev_desc() to allocate the descriptor and device memory.
  *
- * See what I mean about userspace being boring? */
+ * See what I mean about userspace being boring?
+ */
 static struct device *new_device(const char *name, u16 type)
 {
 	struct device *dev = malloc(sizeof(*dev));
@@ -1126,10 +1218,12 @@ static struct device *new_device(const char *name, u16 type)
 	dev->num_vq = 0;
 	dev->running = false;
 
-	/* Append to device list.  Prepending to a single-linked list is
+	/*
+	 * Append to device list.  Prepending to a single-linked list is
 	 * easier, but the user expects the devices to be arranged on the bus
 	 * in command-line order.  The first network device on the command line
-	 * is eth0, the first block device /dev/vda, etc. */
+	 * is eth0, the first block device /dev/vda, etc.
+	 */
 	if (devices.lastdev)
 		devices.lastdev->next = dev;
 	else
@@ -1139,8 +1233,10 @@ static struct device *new_device(const char *name, u16 type)
 	return dev;
 }
 
-/* Our first setup routine is the console.  It's a fairly simple device, but
- * UNIX tty handling makes it uglier than it could be. */
+/*
+ * Our first setup routine is the console.  It's a fairly simple device, but
+ * UNIX tty handling makes it uglier than it could be.
+ */
 static void setup_console(void)
 {
 	struct device *dev;
@@ -1148,8 +1244,10 @@ static void setup_console(void)
 	/* If we can save the initial standard input settings... */
 	if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
 		struct termios term = orig_term;
-		/* Then we turn off echo, line buffering and ^C etc.  We want a
-		 * raw input stream to the Guest. */
+		/*
+		 * Then we turn off echo, line buffering and ^C etc: We want a
+		 * raw input stream to the Guest.
+		 */
 		term.c_lflag &= ~(ISIG|ICANON|ECHO);
 		tcsetattr(STDIN_FILENO, TCSANOW, &term);
 	}
@@ -1160,10 +1258,12 @@ static void setup_console(void)
 	dev->priv = malloc(sizeof(struct console_abort));
 	((struct console_abort *)dev->priv)->count = 0;
 
-	/* The console needs two virtqueues: the input then the output.  When
+	/*
+	 * The console needs two virtqueues: the input then the output.  When
 	 * they put something the input queue, we make sure we're listening to
 	 * stdin.  When they put something in the output queue, we write it to
-	 * stdout. */
+	 * stdout.
+	 */
 	add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
 	add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
 
@@ -1171,7 +1271,8 @@ static void setup_console(void)
 }
 /*:*/
 
-/*M:010 Inter-guest networking is an interesting area.  Simplest is to have a
+/*M:010
+ * Inter-guest networking is an interesting area.  Simplest is to have a
  * --sharenet=<name> option which opens or creates a named pipe.  This can be
  * used to send packets to another guest in a 1:1 manner.
  *
@@ -1185,7 +1286,8 @@ static void setup_console(void)
  * multiple inter-guest channels behind one interface, although it would
  * require some manner of hotplugging new virtio channels.
  *
- * Finally, we could implement a virtio network switch in the kernel. :*/
+ * Finally, we could implement a virtio network switch in the kernel.
+:*/
 
 static u32 str2ip(const char *ipaddr)
 {
@@ -1210,11 +1312,13 @@ static void str2mac(const char *macaddr, unsigned char mac[6])
 	mac[5] = m[5];
 }
 
-/* This code is "adapted" from libbridge: it attaches the Host end of the
+/*
+ * This code is "adapted" from libbridge: it attaches the Host end of the
  * network device to the bridge device specified by the command line.
  *
  * This is yet another James Morris contribution (I'm an IP-level guy, so I
- * dislike bridging), and I just try not to break it. */
+ * dislike bridging), and I just try not to break it.
+ */
 static void add_to_bridge(int fd, const char *if_name, const char *br_name)
 {
 	int ifidx;
@@ -1234,9 +1338,11 @@ static void add_to_bridge(int fd, const char *if_name, const char *br_name)
 		err(1, "can't add %s to bridge %s", if_name, br_name);
 }
 
-/* This sets up the Host end of the network device with an IP address, brings
+/*
+ * This sets up the Host end of the network device with an IP address, brings
  * it up so packets will flow, the copies the MAC address into the hwaddr
- * pointer. */
+ * pointer.
+ */
 static void configure_device(int fd, const char *tapif, u32 ipaddr)
 {
 	struct ifreq ifr;
@@ -1263,10 +1369,12 @@ static int get_tun_device(char tapif[IFNAMSIZ])
 	/* Start with this zeroed.  Messy but sure. */
 	memset(&ifr, 0, sizeof(ifr));
 
-	/* We open the /dev/net/tun device and tell it we want a tap device.  A
+	/*
+	 * We open the /dev/net/tun device and tell it we want a tap device.  A
 	 * tap device is like a tun device, only somehow different.  To tell
 	 * the truth, I completely blundered my way through this code, but it
-	 * works now! */
+	 * works now!
+	 */
 	netfd = open_or_die("/dev/net/tun", O_RDWR);
 	ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
 	strcpy(ifr.ifr_name, "tap%d");
@@ -1277,18 +1385,22 @@ static int get_tun_device(char tapif[IFNAMSIZ])
 		  TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
 		err(1, "Could not set features for tun device");
 
-	/* We don't need checksums calculated for packets coming in this
-	 * device: trust us! */
+	/*
+	 * We don't need checksums calculated for packets coming in this
+	 * device: trust us!
+	 */
 	ioctl(netfd, TUNSETNOCSUM, 1);
 
 	memcpy(tapif, ifr.ifr_name, IFNAMSIZ);
 	return netfd;
 }
 
-/*L:195 Our network is a Host<->Guest network.  This can either use bridging or
+/*L:195
+ * Our network is a Host<->Guest network.  This can either use bridging or
  * routing, but the principle is the same: it uses the "tun" device to inject
  * packets into the Host as if they came in from a normal network card.  We
- * just shunt packets between the Guest and the tun device. */
+ * just shunt packets between the Guest and the tun device.
+ */
 static void setup_tun_net(char *arg)
 {
 	struct device *dev;
@@ -1305,13 +1417,14 @@ static void setup_tun_net(char *arg)
 	dev = new_device("net", VIRTIO_ID_NET);
 	dev->priv = net_info;
 
-	/* Network devices need a receive and a send queue, just like
-	 * console. */
+	/* Network devices need a recv and a send queue, just like console. */
 	add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
 	add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
 
-	/* We need a socket to perform the magic network ioctls to bring up the
-	 * tap interface, connect to the bridge etc.  Any socket will do! */
+	/*
+	 * We need a socket to perform the magic network ioctls to bring up the
+	 * tap interface, connect to the bridge etc.  Any socket will do!
+	 */
 	ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
 	if (ipfd < 0)
 		err(1, "opening IP socket");
@@ -1366,7 +1479,8 @@ static void setup_tun_net(char *arg)
 			devices.device_num, tapif, arg);
 }
 
-/* Our block (disk) device should be really simple: the Guest asks for a block
+/*
+ * Our block (disk) device should be really simple: the Guest asks for a block
  * number and we read or write that position in the file.  Unfortunately, that
  * was amazingly slow: the Guest waits until the read is finished before
  * running anything else, even if it could have been doing useful work.
@@ -1374,7 +1488,9 @@ static void setup_tun_net(char *arg)
  * We could use async I/O, except it's reputed to suck so hard that characters
  * actually go missing from your code when you try to use it.
  *
- * So we farm the I/O out to thread, and communicate with it via a pipe. */
+ * So this was one reason why lguest now does all virtqueue servicing in
+ * separate threads: it's more efficient and more like a real device.
+ */
 
 /* This hangs off device->priv. */
 struct vblk_info
@@ -1412,9 +1528,11 @@ static void blk_request(struct virtqueue *vq)
 	/* Get the next request. */
 	head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
 
-	/* Every block request should contain at least one output buffer
+	/*
+	 * Every block request should contain at least one output buffer
 	 * (detailing the location on disk and the type of request) and one
-	 * input buffer (to hold the result). */
+	 * input buffer (to hold the result).
+	 */
 	if (out_num == 0 || in_num == 0)
 		errx(1, "Bad virtblk cmd %u out=%u in=%u",
 		     head, out_num, in_num);
@@ -1423,33 +1541,41 @@ static void blk_request(struct virtqueue *vq)
 	in = convert(&iov[out_num+in_num-1], u8);
 	off = out->sector * 512;
 
-	/* The block device implements "barriers", where the Guest indicates
+	/*
+	 * The block device implements "barriers", where the Guest indicates
 	 * that it wants all previous writes to occur before this write.  We
 	 * don't have a way of asking our kernel to do a barrier, so we just
-	 * synchronize all the data in the file.  Pretty poor, no? */
+	 * synchronize all the data in the file.  Pretty poor, no?
+	 */
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
-	/* In general the virtio block driver is allowed to try SCSI commands.
-	 * It'd be nice if we supported eject, for example, but we don't. */
+	/*
+	 * In general the virtio block driver is allowed to try SCSI commands.
+	 * It'd be nice if we supported eject, for example, but we don't.
+	 */
 	if (out->type & VIRTIO_BLK_T_SCSI_CMD) {
 		fprintf(stderr, "Scsi commands unsupported\n");
 		*in = VIRTIO_BLK_S_UNSUPP;
 		wlen = sizeof(*in);
 	} else if (out->type & VIRTIO_BLK_T_OUT) {
-		/* Write */
-
-		/* Move to the right location in the block file.  This can fail
-		 * if they try to write past end. */
+		/*
+		 * Write
+		 *
+		 * Move to the right location in the block file.  This can fail
+		 * if they try to write past end.
+		 */
 		if (lseek64(vblk->fd, off, SEEK_SET) != off)
 			err(1, "Bad seek to sector %llu", out->sector);
 
 		ret = writev(vblk->fd, iov+1, out_num-1);
 		verbose("WRITE to sector %llu: %i\n", out->sector, ret);
 
-		/* Grr... Now we know how long the descriptor they sent was, we
+		/*
+		 * Grr... Now we know how long the descriptor they sent was, we
 		 * make sure they didn't try to write over the end of the block
-		 * file (possibly extending it). */
+		 * file (possibly extending it).
+		 */
 		if (ret > 0 && off + ret > vblk->len) {
 			/* Trim it back to the correct length */
 			ftruncate64(vblk->fd, vblk->len);
@@ -1459,10 +1585,12 @@ static void blk_request(struct virtqueue *vq)
 		wlen = sizeof(*in);
 		*in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
 	} else {
-		/* Read */
-
-		/* Move to the right location in the block file.  This can fail
-		 * if they try to read past end. */
+		/*
+		 * Read
+		 *
+		 * Move to the right location in the block file.  This can fail
+		 * if they try to read past end.
+		 */
 		if (lseek64(vblk->fd, off, SEEK_SET) != off)
 			err(1, "Bad seek to sector %llu", out->sector);
 
@@ -1477,10 +1605,12 @@ static void blk_request(struct virtqueue *vq)
 		}
 	}
 
-	/* OK, so we noted that it was pretty poor to use an fdatasync as a
+	/*
+	 * OK, so we noted that it was pretty poor to use an fdatasync as a
 	 * barrier.  But Christoph Hellwig points out that we need a sync
 	 * *afterwards* as well: "Barriers specify no reordering to the front
-	 * or the back."  And Jens Axboe confirmed it, so here we are: */
+	 * or the back."  And Jens Axboe confirmed it, so here we are:
+	 */
 	if (out->type & VIRTIO_BLK_T_BARRIER)
 		fdatasync(vblk->fd);
 
@@ -1494,7 +1624,7 @@ static void setup_block_file(const char *filename)
 	struct vblk_info *vblk;
 	struct virtio_blk_config conf;
 
-	/* The device responds to return from I/O thread. */
+	/* Creat the device. */
 	dev = new_device("block", VIRTIO_ID_BLOCK);
 
 	/* The device has one virtqueue, where the Guest places requests. */
@@ -1513,8 +1643,10 @@ static void setup_block_file(const char *filename)
 	/* Tell Guest how many sectors this device has. */
 	conf.capacity = cpu_to_le64(vblk->len / 512);
 
-	/* Tell Guest not to put in too many descriptors at once: two are used
-	 * for the in and out elements. */
+	/*
+	 * Tell Guest not to put in too many descriptors at once: two are used
+	 * for the in and out elements.
+	 */
 	add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
 	conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2);
 
@@ -1525,16 +1657,18 @@ static void setup_block_file(const char *filename)
 		++devices.device_num, le64_to_cpu(conf.capacity));
 }
 
-struct rng_info {
-	int rfd;
-};
-
-/* Our random number generator device reads from /dev/random into the Guest's
+/*L:211
+ * Our random number generator device reads from /dev/random into the Guest's
  * input buffers.  The usual case is that the Guest doesn't want random numbers
  * and so has no buffers although /dev/random is still readable, whereas
  * console is the reverse.
  *
- * The same logic applies, however. */
+ * The same logic applies, however.
+ */
+struct rng_info {
+	int rfd;
+};
+
 static void rng_input(struct virtqueue *vq)
 {
 	int len;
@@ -1547,9 +1681,11 @@ static void rng_input(struct virtqueue *vq)
 	if (out_num)
 		errx(1, "Output buffers in rng?");
 
-	/* This is why we convert to iovecs: the readv() call uses them, and so
+	/*
+	 * This is why we convert to iovecs: the readv() call uses them, and so
 	 * it reads straight into the Guest's buffer.  We loop to make sure we
-	 * fill it. */
+	 * fill it.
+	 */
 	while (!iov_empty(iov, in_num)) {
 		len = readv(rng_info->rfd, iov, in_num);
 		if (len <= 0)
@@ -1562,15 +1698,18 @@ static void rng_input(struct virtqueue *vq)
 	add_used(vq, head, totlen);
 }
 
-/* And this creates a "hardware" random number device for the Guest. */
+/*L:199
+ * This creates a "hardware" random number device for the Guest.
+ */
 static void setup_rng(void)
 {
 	struct device *dev;
 	struct rng_info *rng_info = malloc(sizeof(*rng_info));
 
+	/* Our device's privat info simply contains the /dev/random fd. */
 	rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
 
-	/* The device responds to return from I/O thread. */
+	/* Create the new device. */
 	dev = new_device("rng", VIRTIO_ID_RNG);
 	dev->priv = rng_info;
 
@@ -1586,8 +1725,10 @@ static void __attribute__((noreturn)) restart_guest(void)
 {
 	unsigned int i;
 
-	/* Since we don't track all open fds, we simply close everything beyond
-	 * stderr. */
+	/*
+	 * Since we don't track all open fds, we simply close everything beyond
+	 * stderr.
+	 */
 	for (i = 3; i < FD_SETSIZE; i++)
 		close(i);
 
@@ -1598,8 +1739,10 @@ static void __attribute__((noreturn)) restart_guest(void)
 	err(1, "Could not exec %s", main_args[0]);
 }
 
-/*L:220 Finally we reach the core of the Launcher which runs the Guest, serves
- * its input and output, and finally, lays it to rest. */
+/*L:220
+ * Finally we reach the core of the Launcher which runs the Guest, serves
+ * its input and output, and finally, lays it to rest.
+ */
 static void __attribute__((noreturn)) run_guest(void)
 {
 	for (;;) {
@@ -1634,7 +1777,7 @@ static void __attribute__((noreturn)) run_guest(void)
  *
  * Are you ready?  Take a deep breath and join me in the core of the Host, in
  * "make Host".
- :*/
+:*/
 
 static struct option opts[] = {
 	{ "verbose", 0, NULL, 'v' },
@@ -1655,8 +1798,7 @@ static void usage(void)
 /*L:105 The main routine is where the real work begins: */
 int main(int argc, char *argv[])
 {
-	/* Memory, top-level pagetable, code startpoint and size of the
-	 * (optional) initrd. */
+	/* Memory, code startpoint and size of the (optional) initrd. */
 	unsigned long mem = 0, start, initrd_size = 0;
 	/* Two temporaries. */
 	int i, c;
@@ -1668,24 +1810,30 @@ int main(int argc, char *argv[])
 	/* Save the args: we "reboot" by execing ourselves again. */
 	main_args = argv;
 
-	/* First we initialize the device list.  We keep a pointer to the last
+	/*
+	 * First we initialize the device list.  We keep a pointer to the last
 	 * device, and the next interrupt number to use for devices (1:
-	 * remember that 0 is used by the timer). */
+	 * remember that 0 is used by the timer).
+	 */
 	devices.lastdev = NULL;
 	devices.next_irq = 1;
 
 	cpu_id = 0;
-	/* We need to know how much memory so we can set up the device
+	/*
+	 * We need to know how much memory so we can set up the device
 	 * descriptor and memory pages for the devices as we parse the command
 	 * line.  So we quickly look through the arguments to find the amount
-	 * of memory now. */
+	 * of memory now.
+	 */
 	for (i = 1; i < argc; i++) {
 		if (argv[i][0] != '-') {
 			mem = atoi(argv[i]) * 1024 * 1024;
-			/* We start by mapping anonymous pages over all of
+			/*
+			 * We start by mapping anonymous pages over all of
 			 * guest-physical memory range.  This fills it with 0,
 			 * and ensures that the Guest won't be killed when it
-			 * tries to access it. */
+			 * tries to access it.
+			 */
 			guest_base = map_zeroed_pages(mem / getpagesize()
 						      + DEVICE_PAGES);
 			guest_limit = mem;
@@ -1718,8 +1866,10 @@ int main(int argc, char *argv[])
 			usage();
 		}
 	}
-	/* After the other arguments we expect memory and kernel image name,
-	 * followed by command line arguments for the kernel. */
+	/*
+	 * After the other arguments we expect memory and kernel image name,
+	 * followed by command line arguments for the kernel.
+	 */
 	if (optind + 2 > argc)
 		usage();
 
@@ -1737,20 +1887,26 @@ int main(int argc, char *argv[])
 	/* Map the initrd image if requested (at top of physical memory) */
 	if (initrd_name) {
 		initrd_size = load_initrd(initrd_name, mem);
-		/* These are the location in the Linux boot header where the
-		 * start and size of the initrd are expected to be found. */
+		/*
+		 * These are the location in the Linux boot header where the
+		 * start and size of the initrd are expected to be found.
+		 */
 		boot->hdr.ramdisk_image = mem - initrd_size;
 		boot->hdr.ramdisk_size = initrd_size;
 		/* The bootloader type 0xFF means "unknown"; that's OK. */
 		boot->hdr.type_of_loader = 0xFF;
 	}
 
-	/* The Linux boot header contains an "E820" memory map: ours is a
-	 * simple, single region. */
+	/*
+	 * The Linux boot header contains an "E820" memory map: ours is a
+	 * simple, single region.
+	 */
 	boot->e820_entries = 1;
 	boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });
-	/* The boot header contains a command line pointer: we put the command
-	 * line after the boot header. */
+	/*
+	 * The boot header contains a command line pointer: we put the command
+	 * line after the boot header.
+	 */
 	boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
 	/* We use a simple helper to copy the arguments separated by spaces. */
 	concat((char *)(boot + 1), argv+optind+2);
@@ -1764,8 +1920,10 @@ int main(int argc, char *argv[])
 	/* Tell the entry path not to try to reload segment registers. */
 	boot->hdr.loadflags |= KEEP_SEGMENTS;
 
-	/* We tell the kernel to initialize the Guest: this returns the open
-	 * /dev/lguest file descriptor. */
+	/*
+	 * We tell the kernel to initialize the Guest: this returns the open
+	 * /dev/lguest file descriptor.
+	 */
 	tell_kernel(start);
 
 	/* Ensure that we terminate if a child dies. */
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index 313389cd50d2..5136dad57cbb 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -17,8 +17,7 @@
 /* Pages for switcher itself, then two pages per cpu */
 #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
 
-/* We map at -4M (-2M when PAE is activated) for ease of mapping
- * into the guest (one PTE page). */
+/* We map at -4M (-2M for PAE) for ease of mapping (one PTE page). */
 #ifdef CONFIG_X86_PAE
 #define SWITCHER_ADDR 0xFFE00000
 #else
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index 33600a66755f..cceb73e12e50 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -30,7 +30,8 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_para.h>
 
-/*G:030 But first, how does our Guest contact the Host to ask for privileged
+/*G:030
+ * But first, how does our Guest contact the Host to ask for privileged
  * operations?  There are two ways: the direct way is to make a "hypercall",
  * to make requests of the Host Itself.
  *
@@ -41,16 +42,15 @@
  *
  * Grossly invalid calls result in Sudden Death at the hands of the vengeful
  * Host, rather than returning failure.  This reflects Winston Churchill's
- * definition of a gentleman: "someone who is only rude intentionally". */
-/*:*/
+ * definition of a gentleman: "someone who is only rude intentionally".
+:*/
 
 /* Can't use our min() macro here: needs to be a constant */
 #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32)
 
 #define LHCALL_RING_SIZE 64
 struct hcall_args {
-	/* These map directly onto eax, ebx, ecx, edx and esi
-	 * in struct lguest_regs */
+	/* These map directly onto eax/ebx/ecx/edx/esi in struct lguest_regs */
 	unsigned long arg0, arg1, arg2, arg3, arg4;
 };
 
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index f2bf1f73d468..025c04d18f2b 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -22,7 +22,8 @@
  *
  * So how does the kernel know it's a Guest?  We'll see that later, but let's
  * just say that we end up here where we replace the native functions various
- * "paravirt" structures with our Guest versions, then boot like normal. :*/
+ * "paravirt" structures with our Guest versions, then boot like normal.
+:*/
 
 /*
  * Copyright (C) 2006, Rusty Russell <rusty@rustcorp.com.au> IBM Corporation.
@@ -74,7 +75,8 @@
  *
  * The Guest in our tale is a simple creature: identical to the Host but
  * behaving in simplified but equivalent ways.  In particular, the Guest is the
- * same kernel as the Host (or at least, built from the same source code). :*/
+ * same kernel as the Host (or at least, built from the same source code).
+:*/
 
 struct lguest_data lguest_data = {
 	.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
@@ -85,7 +87,8 @@ struct lguest_data lguest_data = {
 	.syscall_vec = SYSCALL_VECTOR,
 };
 
-/*G:037 async_hcall() is pretty simple: I'm quite proud of it really.  We have a
+/*G:037
+ * async_hcall() is pretty simple: I'm quite proud of it really.  We have a
  * ring buffer of stored hypercalls which the Host will run though next time we
  * do a normal hypercall.  Each entry in the ring has 5 slots for the hypercall
  * arguments, and a "hcall_status" word which is 0 if the call is ready to go,
@@ -94,7 +97,8 @@ struct lguest_data lguest_data = {
  * If we come around to a slot which hasn't been finished, then the table is
  * full and we just make the hypercall directly.  This has the nice side
  * effect of causing the Host to run all the stored calls in the ring buffer
- * which empties it for next time! */
+ * which empties it for next time!
+ */
 static void async_hcall(unsigned long call, unsigned long arg1,
 			unsigned long arg2, unsigned long arg3,
 			unsigned long arg4)
@@ -103,9 +107,11 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 	static unsigned int next_call;
 	unsigned long flags;
 
-	/* Disable interrupts if not already disabled: we don't want an
+	/*
+	 * Disable interrupts if not already disabled: we don't want an
 	 * interrupt handler making a hypercall while we're already doing
-	 * one! */
+	 * one!
+	 */
 	local_irq_save(flags);
 	if (lguest_data.hcall_status[next_call] != 0xFF) {
 		/* Table full, so do normal hcall which will flush table. */
@@ -125,8 +131,9 @@ static void async_hcall(unsigned long call, unsigned long arg1,
 	local_irq_restore(flags);
 }
 
-/*G:035 Notice the lazy_hcall() above, rather than hcall().  This is our first
- * real optimization trick!
+/*G:035
+ * Notice the lazy_hcall() above, rather than hcall().  This is our first real
+ * optimization trick!
  *
  * When lazy_mode is set, it means we're allowed to defer all hypercalls and do
  * them as a batch when lazy_mode is eventually turned off.  Because hypercalls
@@ -136,7 +143,8 @@ static void async_hcall(unsigned long call, unsigned long arg1,
  * lguest_leave_lazy_mode().
  *
  * So, when we're in lazy mode, we call async_hcall() to store the call for
- * future processing: */
+ * future processing:
+ */
 static void lazy_hcall1(unsigned long call,
 		       unsigned long arg1)
 {
@@ -208,9 +216,11 @@ static void lguest_end_context_switch(struct task_struct *next)
  * check there before it tries to deliver an interrupt.
  */
 
-/* save_flags() is expected to return the processor state (ie. "flags").  The
+/*
+ * save_flags() is expected to return the processor state (ie. "flags").  The
  * flags word contains all kind of stuff, but in practice Linux only cares
- * about the interrupt flag.  Our "save_flags()" just returns that. */
+ * about the interrupt flag.  Our "save_flags()" just returns that.
+ */
 static unsigned long save_fl(void)
 {
 	return lguest_data.irq_enabled;
@@ -222,13 +232,15 @@ static void irq_disable(void)
 	lguest_data.irq_enabled = 0;
 }
 
-/* Let's pause a moment.  Remember how I said these are called so often?
+/*
+ * Let's pause a moment.  Remember how I said these are called so often?
  * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to
  * break some rules.  In particular, these functions are assumed to save their
  * own registers if they need to: normal C functions assume they can trash the
  * eax register.  To use normal C functions, we use
  * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the
- * C function, then restores it. */
+ * C function, then restores it.
+ */
 PV_CALLEE_SAVE_REGS_THUNK(save_fl);
 PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 /*:*/
@@ -237,18 +249,20 @@ PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
 extern void lg_irq_enable(void);
 extern void lg_restore_fl(unsigned long flags);
 
-/*M:003 Note that we don't check for outstanding interrupts when we re-enable
- * them (or when we unmask an interrupt).  This seems to work for the moment,
- * since interrupts are rare and we'll just get the interrupt on the next timer
- * tick, but now we can run with CONFIG_NO_HZ, we should revisit this.  One way
- * would be to put the "irq_enabled" field in a page by itself, and have the
- * Host write-protect it when an interrupt comes in when irqs are disabled.
- * There will then be a page fault as soon as interrupts are re-enabled.
+/*M:003
+ * Note that we don't check for outstanding interrupts when we re-enable them
+ * (or when we unmask an interrupt).  This seems to work for the moment, since
+ * interrupts are rare and we'll just get the interrupt on the next timer tick,
+ * but now we can run with CONFIG_NO_HZ, we should revisit this.  One way would
+ * be to put the "irq_enabled" field in a page by itself, and have the Host
+ * write-protect it when an interrupt comes in when irqs are disabled.  There
+ * will then be a page fault as soon as interrupts are re-enabled.
  *
  * A better method is to implement soft interrupt disable generally for x86:
  * instead of disabling interrupts, we set a flag.  If an interrupt does come
  * in, we then disable them for real.  This is uncommon, so we could simply use
- * a hypercall for interrupt control and not worry about efficiency. :*/
+ * a hypercall for interrupt control and not worry about efficiency.
+:*/
 
 /*G:034
  * The Interrupt Descriptor Table (IDT).
@@ -261,10 +275,12 @@ extern void lg_restore_fl(unsigned long flags);
 static void lguest_write_idt_entry(gate_desc *dt,
 				   int entrynum, const gate_desc *g)
 {
-	/* The gate_desc structure is 8 bytes long: we hand it to the Host in
+	/*
+	 * The gate_desc structure is 8 bytes long: we hand it to the Host in
 	 * two 32-bit chunks.  The whole 32-bit kernel used to hand descriptors
 	 * around like this; typesafety wasn't a big concern in Linux's early
-	 * years. */
+	 * years.
+	 */
 	u32 *desc = (u32 *)g;
 	/* Keep the local copy up to date. */
 	native_write_idt_entry(dt, entrynum, g);
@@ -272,9 +288,11 @@ static void lguest_write_idt_entry(gate_desc *dt,
 	kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]);
 }
 
-/* Changing to a different IDT is very rare: we keep the IDT up-to-date every
+/*
+ * Changing to a different IDT is very rare: we keep the IDT up-to-date every
  * time it is written, so we can simply loop through all entries and tell the
- * Host about them. */
+ * Host about them.
+ */
 static void lguest_load_idt(const struct desc_ptr *desc)
 {
 	unsigned int i;
@@ -305,9 +323,11 @@ static void lguest_load_gdt(const struct desc_ptr *desc)
 		kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b);
 }
 
-/* For a single GDT entry which changes, we do the lazy thing: alter our GDT,
+/*
+ * For a single GDT entry which changes, we do the lazy thing: alter our GDT,
  * then tell the Host to reload the entire thing.  This operation is so rare
- * that this naive implementation is reasonable. */
+ * that this naive implementation is reasonable.
+ */
 static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
 				   const void *desc, int type)
 {
@@ -317,29 +337,36 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum,
 		       dt[entrynum].a, dt[entrynum].b);
 }
 
-/* OK, I lied.  There are three "thread local storage" GDT entries which change
+/*
+ * OK, I lied.  There are three "thread local storage" GDT entries which change
  * on every context switch (these three entries are how glibc implements
- * __thread variables).  So we have a hypercall specifically for this case. */
+ * __thread variables).  So we have a hypercall specifically for this case.
+ */
 static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
 {
-	/* There's one problem which normal hardware doesn't have: the Host
+	/*
+	 * There's one problem which normal hardware doesn't have: the Host
 	 * can't handle us removing entries we're currently using.  So we clear
-	 * the GS register here: if it's needed it'll be reloaded anyway. */
+	 * the GS register here: if it's needed it'll be reloaded anyway.
+	 */
 	lazy_load_gs(0);
 	lazy_hcall2(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu);
 }
 
-/*G:038 That's enough excitement for now, back to ploughing through each of
- * the different pv_ops structures (we're about 1/3 of the way through).
+/*G:038
+ * That's enough excitement for now, back to ploughing through each of the
+ * different pv_ops structures (we're about 1/3 of the way through).
  *
  * This is the Local Descriptor Table, another weird Intel thingy.  Linux only
  * uses this for some strange applications like Wine.  We don't do anything
- * here, so they'll get an informative and friendly Segmentation Fault. */
+ * here, so they'll get an informative and friendly Segmentation Fault.
+ */
 static void lguest_set_ldt(const void *addr, unsigned entries)
 {
 }
 
-/* This loads a GDT entry into the "Task Register": that entry points to a
+/*
+ * This loads a GDT entry into the "Task Register": that entry points to a
  * structure called the Task State Segment.  Some comments scattered though the
  * kernel code indicate that this used for task switching in ages past, along
  * with blood sacrifice and astrology.
@@ -347,19 +374,21 @@ static void lguest_set_ldt(const void *addr, unsigned entries)
  * Now there's nothing interesting in here that we don't get told elsewhere.
  * But the native version uses the "ltr" instruction, which makes the Host
  * complain to the Guest about a Segmentation Fault and it'll oops.  So we
- * override the native version with a do-nothing version. */
+ * override the native version with a do-nothing version.
+ */
 static void lguest_load_tr_desc(void)
 {
 }
 
-/* The "cpuid" instruction is a way of querying both the CPU identity
+/*
+ * The "cpuid" instruction is a way of querying both the CPU identity
  * (manufacturer, model, etc) and its features.  It was introduced before the
  * Pentium in 1993 and keeps getting extended by both Intel, AMD and others.
  * As you might imagine, after a decade and a half this treatment, it is now a
  * giant ball of hair.  Its entry in the current Intel manual runs to 28 pages.
  *
  * This instruction even it has its own Wikipedia entry.  The Wikipedia entry
- * has been translated into 4 languages.  I am not making this up!
+ * has been translated into 5 languages.  I am not making this up!
  *
  * We could get funky here and identify ourselves as "GenuineLguest", but
  * instead we just use the real "cpuid" instruction.  Then I pretty much turned
@@ -371,7 +400,8 @@ static void lguest_load_tr_desc(void)
  * Replacing the cpuid so we can turn features off is great for the kernel, but
  * anyone (including userspace) can just use the raw "cpuid" instruction and
  * the Host won't even notice since it isn't privileged.  So we try not to get
- * too worked up about it. */
+ * too worked up about it.
+ */
 static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 			 unsigned int *cx, unsigned int *dx)
 {
@@ -379,43 +409,63 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
 
 	native_cpuid(ax, bx, cx, dx);
 	switch (function) {
-	case 0: /* ID and highest CPUID.  Futureproof a little by sticking to
-		 * older ones. */
+	/*
+	 * CPUID 0 gives the highest legal CPUID number (and the ID string).
+	 * We futureproof our code a little by sticking to known CPUID values.
+	 */
+	case 0:
 		if (*ax > 5)
 			*ax = 5;
 		break;
-	case 1:	/* Basic feature request. */
-		/* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */
+
+	/*
+	 * CPUID 1 is a basic feature request.
+	 *
+	 * CX: we only allow kernel to see SSE3, CMPXCHG16B and SSSE3
+	 * DX: SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU and PAE.
+	 */
+	case 1:
 		*cx &= 0x00002201;
-		/* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */
 		*dx &= 0x07808151;
-		/* The Host can do a nice optimization if it knows that the
+		/*
+		 * The Host can do a nice optimization if it knows that the
 		 * kernel mappings (addresses above 0xC0000000 or whatever
 		 * PAGE_OFFSET is set to) haven't changed.  But Linux calls
 		 * flush_tlb_user() for both user and kernel mappings unless
-		 * the Page Global Enable (PGE) feature bit is set. */
+		 * the Page Global Enable (PGE) feature bit is set.
+		 */
 		*dx |= 0x00002000;
-		/* We also lie, and say we're family id 5.  6 or greater
+		/*
+		 * We also lie, and say we're family id 5.  6 or greater
 		 * leads to a rdmsr in early_init_intel which we can't handle.
-		 * Family ID is returned as bits 8-12 in ax. */
+		 * Family ID is returned as bits 8-12 in ax.
+		 */
 		*ax &= 0xFFFFF0FF;
 		*ax |= 0x00000500;
 		break;
+	/*
+	 * 0x80000000 returns the highest Extended Function, so we futureproof
+	 * like we do above by limiting it to known fields.
+	 */
 	case 0x80000000:
-		/* Futureproof this a little: if they ask how much extended
-		 * processor information there is, limit it to known fields. */
 		if (*ax > 0x80000008)
 			*ax = 0x80000008;
 		break;
+
+	/*
+	 * PAE systems can mark pages as non-executable.  Linux calls this the
+	 * NX bit.  Intel calls it XD (eXecute Disable), AMD EVP (Enhanced
+	 * Virus Protection).  We just switch turn if off here, since we don't
+	 * support it.
+	 */
 	case 0x80000001:
-		/* Here we should fix nx cap depending on host. */
-		/* For this version of PAE, we just clear NX bit. */
 		*dx &= ~(1 << 20);
 		break;
 	}
 }
 
-/* Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
+/*
+ * Intel has four control registers, imaginatively named cr0, cr2, cr3 and cr4.
  * I assume there's a cr1, but it hasn't bothered us yet, so we'll not bother
  * it.  The Host needs to know when the Guest wants to change them, so we have
  * a whole series of functions like read_cr0() and write_cr0().
@@ -430,7 +480,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx,
  * name like "FPUTRAP bit" be a little less cryptic?
  *
  * We store cr0 locally because the Host never changes it.  The Guest sometimes
- * wants to read it and we'd prefer not to bother the Host unnecessarily. */
+ * wants to read it and we'd prefer not to bother the Host unnecessarily.
+ */
 static unsigned long current_cr0;
 static void lguest_write_cr0(unsigned long val)
 {
@@ -443,18 +494,22 @@ static unsigned long lguest_read_cr0(void)
 	return current_cr0;
 }
 
-/* Intel provided a special instruction to clear the TS bit for people too cool
+/*
+ * Intel provided a special instruction to clear the TS bit for people too cool
  * to use write_cr0() to do it.  This "clts" instruction is faster, because all
- * the vowels have been optimized out. */
+ * the vowels have been optimized out.
+ */
 static void lguest_clts(void)
 {
 	lazy_hcall1(LHCALL_TS, 0);
 	current_cr0 &= ~X86_CR0_TS;
 }
 
-/* cr2 is the virtual address of the last page fault, which the Guest only ever
+/*
+ * cr2 is the virtual address of the last page fault, which the Guest only ever
  * reads.  The Host kindly writes this into our "struct lguest_data", so we
- * just read it out of there. */
+ * just read it out of there.
+ */
 static unsigned long lguest_read_cr2(void)
 {
 	return lguest_data.cr2;
@@ -463,10 +518,12 @@ static unsigned long lguest_read_cr2(void)
 /* See lguest_set_pte() below. */
 static bool cr3_changed = false;
 
-/* cr3 is the current toplevel pagetable page: the principle is the same as
+/*
+ * cr3 is the current toplevel pagetable page: the principle is the same as
  * cr0.  Keep a local copy, and tell the Host when it changes.  The only
  * difference is that our local copy is in lguest_data because the Host needs
- * to set it upon our initial hypercall. */
+ * to set it upon our initial hypercall.
+ */
 static void lguest_write_cr3(unsigned long cr3)
 {
 	lguest_data.pgdir = cr3;
@@ -538,10 +595,12 @@ static void lguest_write_cr4(unsigned long val)
  * the real page tables based on the Guests'.
  */
 
-/* The Guest calls this to set a second-level entry (pte), ie. to map a page
+/*
+ * The Guest calls this to set a second-level entry (pte), ie. to map a page
  * into a process' address space.  We set the entry then tell the Host the
  * toplevel and address this corresponds to.  The Guest uses one pagetable per
- * process, so we need to tell the Host which one we're changing (mm->pgd). */
+ * process, so we need to tell the Host which one we're changing (mm->pgd).
+ */
 static void lguest_pte_update(struct mm_struct *mm, unsigned long addr,
 			       pte_t *ptep)
 {
@@ -560,10 +619,13 @@ static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	lguest_pte_update(mm, addr, ptep);
 }
 
-/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
+/*
+ * The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd
  * to set a middle-level entry when PAE is activated.
+ *
  * Again, we set the entry then tell the Host which page we changed,
- * and the index of the entry we changed. */
+ * and the index of the entry we changed.
+ */
 #ifdef CONFIG_X86_PAE
 static void lguest_set_pud(pud_t *pudp, pud_t pudval)
 {
@@ -582,8 +644,7 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 }
 #else
 
-/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not
- * activated. */
+/* The Guest calls lguest_set_pmd to set a top-level entry when !PAE. */
 static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 {
 	native_set_pmd(pmdp, pmdval);
@@ -592,7 +653,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
 }
 #endif
 
-/* There are a couple of legacy places where the kernel sets a PTE, but we
+/*
+ * There are a couple of legacy places where the kernel sets a PTE, but we
  * don't know the top level any more.  This is useless for us, since we don't
  * know which pagetable is changing or what address, so we just tell the Host
  * to forget all of them.  Fortunately, this is very rare.
@@ -600,7 +662,8 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval)
  * ... except in early boot when the kernel sets up the initial pagetables,
  * which makes booting astonishingly slow: 1.83 seconds!  So we don't even tell
  * the Host anything changed until we've done the first page table switch,
- * which brings boot back to 0.25 seconds. */
+ * which brings boot back to 0.25 seconds.
+ */
 static void lguest_set_pte(pte_t *ptep, pte_t pteval)
 {
 	native_set_pte(ptep, pteval);
@@ -628,7 +691,8 @@ void lguest_pmd_clear(pmd_t *pmdp)
 }
 #endif
 
-/* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
+/*
+ * Unfortunately for Lguest, the pv_mmu_ops for page tables were based on
  * native page table operations.  On native hardware you can set a new page
  * table entry whenever you want, but if you want to remove one you have to do
  * a TLB flush (a TLB is a little cache of page table entries kept by the CPU).
@@ -637,24 +701,29 @@ void lguest_pmd_clear(pmd_t *pmdp)
  * called when a valid entry is written, not when it's removed (ie. marked not
  * present).  Instead, this is where we come when the Guest wants to remove a
  * page table entry: we tell the Host to set that entry to 0 (ie. the present
- * bit is zero). */
+ * bit is zero).
+ */
 static void lguest_flush_tlb_single(unsigned long addr)
 {
 	/* Simply set it to zero: if it was not, it will fault back in. */
 	lazy_hcall3(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0);
 }
 
-/* This is what happens after the Guest has removed a large number of entries.
+/*
+ * This is what happens after the Guest has removed a large number of entries.
  * This tells the Host that any of the page table entries for userspace might
- * have changed, ie. virtual addresses below PAGE_OFFSET. */
+ * have changed, ie. virtual addresses below PAGE_OFFSET.
+ */
 static void lguest_flush_tlb_user(void)
 {
 	lazy_hcall1(LHCALL_FLUSH_TLB, 0);
 }
 
-/* This is called when the kernel page tables have changed.  That's not very
+/*
+ * This is called when the kernel page tables have changed.  That's not very
  * common (unless the Guest is using highmem, which makes the Guest extremely
- * slow), so it's worth separating this from the user flushing above. */
+ * slow), so it's worth separating this from the user flushing above.
+ */
 static void lguest_flush_tlb_kernel(void)
 {
 	lazy_hcall1(LHCALL_FLUSH_TLB, 1);
@@ -691,23 +760,27 @@ static struct irq_chip lguest_irq_controller = {
 	.unmask		= enable_lguest_irq,
 };
 
-/* This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
+/*
+ * This sets up the Interrupt Descriptor Table (IDT) entry for each hardware
  * interrupt (except 128, which is used for system calls), and then tells the
  * Linux infrastructure that each interrupt is controlled by our level-based
- * lguest interrupt controller. */
+ * lguest interrupt controller.
+ */
 static void __init lguest_init_IRQ(void)
 {
 	unsigned int i;
 
 	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-		/* Some systems map "vectors" to interrupts weirdly.  Lguest has
-		 * a straightforward 1 to 1 mapping, so force that here. */
+		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR;
 		if (i != SYSCALL_VECTOR)
 			set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
-	/* This call is required to set up for 4k stacks, where we have
-	 * separate stacks for hard and soft interrupts. */
+
+	/*
+	 * This call is required to set up for 4k stacks, where we have
+	 * separate stacks for hard and soft interrupts.
+	 */
 	irq_ctx_init(smp_processor_id());
 }
 
@@ -729,31 +802,39 @@ static unsigned long lguest_get_wallclock(void)
 	return lguest_data.time.tv_sec;
 }
 
-/* The TSC is an Intel thing called the Time Stamp Counter.  The Host tells us
+/*
+ * The TSC is an Intel thing called the Time Stamp Counter.  The Host tells us
  * what speed it runs at, or 0 if it's unusable as a reliable clock source.
  * This matches what we want here: if we return 0 from this function, the x86
- * TSC clock will give up and not register itself. */
+ * TSC clock will give up and not register itself.
+ */
 static unsigned long lguest_tsc_khz(void)
 {
 	return lguest_data.tsc_khz;
 }
 
-/* If we can't use the TSC, the kernel falls back to our lower-priority
- * "lguest_clock", where we read the time value given to us by the Host. */
+/*
+ * If we can't use the TSC, the kernel falls back to our lower-priority
+ * "lguest_clock", where we read the time value given to us by the Host.
+ */
 static cycle_t lguest_clock_read(struct clocksource *cs)
 {
 	unsigned long sec, nsec;
 
-	/* Since the time is in two parts (seconds and nanoseconds), we risk
+	/*
+	 * Since the time is in two parts (seconds and nanoseconds), we risk
 	 * reading it just as it's changing from 99 & 0.999999999 to 100 and 0,
 	 * and getting 99 and 0.  As Linux tends to come apart under the stress
-	 * of time travel, we must be careful: */
+	 * of time travel, we must be careful:
+	 */
 	do {
 		/* First we read the seconds part. */
 		sec = lguest_data.time.tv_sec;
-		/* This read memory barrier tells the compiler and the CPU that
+		/*
+		 * This read memory barrier tells the compiler and the CPU that
 		 * this can't be reordered: we have to complete the above
-		 * before going on. */
+		 * before going on.
+		 */
 		rmb();
 		/* Now we read the nanoseconds part. */
 		nsec = lguest_data.time.tv_nsec;
@@ -777,9 +858,11 @@ static struct clocksource lguest_clock = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-/* We also need a "struct clock_event_device": Linux asks us to set it to go
+/*
+ * We also need a "struct clock_event_device": Linux asks us to set it to go
  * off some time in the future.  Actually, James Morris figured all this out, I
- * just applied the patch. */
+ * just applied the patch.
+ */
 static int lguest_clockevent_set_next_event(unsigned long delta,
                                            struct clock_event_device *evt)
 {
@@ -829,8 +912,10 @@ static struct clock_event_device lguest_clockevent = {
 	.max_delta_ns           = LG_CLOCK_MAX_DELTA,
 };
 
-/* This is the Guest timer interrupt handler (hardware interrupt 0).  We just
- * call the clockevent infrastructure and it does whatever needs doing. */
+/*
+ * This is the Guest timer interrupt handler (hardware interrupt 0).  We just
+ * call the clockevent infrastructure and it does whatever needs doing.
+ */
 static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
 {
 	unsigned long flags;
@@ -841,10 +926,12 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
 	local_irq_restore(flags);
 }
 
-/* At some point in the boot process, we get asked to set up our timing
+/*
+ * At some point in the boot process, we get asked to set up our timing
  * infrastructure.  The kernel doesn't expect timer interrupts before this, but
  * we cleverly initialized the "blocked_interrupts" field of "struct
- * lguest_data" so that timer interrupts were blocked until now. */
+ * lguest_data" so that timer interrupts were blocked until now.
+ */
 static void lguest_time_init(void)
 {
 	/* Set up the timer interrupt (0) to go to our simple timer routine */
@@ -868,14 +955,16 @@ static void lguest_time_init(void)
  * to work.  They're pretty simple.
  */
 
-/* The Guest needs to tell the Host what stack it expects traps to use.  For
+/*
+ * The Guest needs to tell the Host what stack it expects traps to use.  For
  * native hardware, this is part of the Task State Segment mentioned above in
  * lguest_load_tr_desc(), but to help hypervisors there's this special call.
  *
  * We tell the Host the segment we want to use (__KERNEL_DS is the kernel data
  * segment), the privilege level (we're privilege level 1, the Host is 0 and
  * will not tolerate us trying to use that), the stack pointer, and the number
- * of pages in the stack. */
+ * of pages in the stack.
+ */
 static void lguest_load_sp0(struct tss_struct *tss,
 			    struct thread_struct *thread)
 {
@@ -889,7 +978,8 @@ static void lguest_set_debugreg(int regno, unsigned long value)
 	/* FIXME: Implement */
 }
 
-/* There are times when the kernel wants to make sure that no memory writes are
+/*
+ * There are times when the kernel wants to make sure that no memory writes are
  * caught in the cache (that they've all reached real hardware devices).  This
  * doesn't matter for the Guest which has virtual hardware.
  *
@@ -903,11 +993,13 @@ static void lguest_wbinvd(void)
 {
 }
 
-/* If the Guest expects to have an Advanced Programmable Interrupt Controller,
+/*
+ * If the Guest expects to have an Advanced Programmable Interrupt Controller,
  * we play dumb by ignoring writes and returning 0 for reads.  So it's no
  * longer Programmable nor Controlling anything, and I don't think 8 lines of
  * code qualifies for Advanced.  It will also never interrupt anything.  It
- * does, however, allow us to get through the Linux boot code. */
+ * does, however, allow us to get through the Linux boot code.
+ */
 #ifdef CONFIG_X86_LOCAL_APIC
 static void lguest_apic_write(u32 reg, u32 v)
 {
@@ -956,11 +1048,13 @@ static void lguest_safe_halt(void)
 	kvm_hypercall0(LHCALL_HALT);
 }
 
-/* The SHUTDOWN hypercall takes a string to describe what's happening, and
+/*
+ * The SHUTDOWN hypercall takes a string to describe what's happening, and
  * an argument which says whether this to restart (reboot) the Guest or not.
  *
  * Note that the Host always prefers that the Guest speak in physical addresses
- * rather than virtual addresses, so we use __pa() here. */
+ * rather than virtual addresses, so we use __pa() here.
+ */
 static void lguest_power_off(void)
 {
 	kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"),
@@ -991,8 +1085,10 @@ static __init char *lguest_memory_setup(void)
 	 * nice to move it back to lguest_init.  Patch welcome... */
 	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
 
-	/* The Linux bootloader header contains an "e820" memory map: the
-	 * Launcher populated the first entry with our memory limit. */
+	/*
+	 *The Linux bootloader header contains an "e820" memory map: the
+	 * Launcher populated the first entry with our memory limit.
+	 */
 	e820_add_region(boot_params.e820_map[0].addr,
 			  boot_params.e820_map[0].size,
 			  boot_params.e820_map[0].type);
@@ -1001,16 +1097,17 @@ static __init char *lguest_memory_setup(void)
 	return "LGUEST";
 }
 
-/* We will eventually use the virtio console device to produce console output,
+/*
+ * We will eventually use the virtio console device to produce console output,
  * but before that is set up we use LHCALL_NOTIFY on normal memory to produce
- * console output. */
+ * console output.
+ */
 static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 {
 	char scratch[17];
 	unsigned int len = count;
 
-	/* We use a nul-terminated string, so we have to make a copy.  Icky,
-	 * huh? */
+	/* We use a nul-terminated string, so we make a copy.  Icky, huh? */
 	if (len > sizeof(scratch) - 1)
 		len = sizeof(scratch) - 1;
 	scratch[len] = '\0';
@@ -1021,8 +1118,10 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count)
 	return len;
 }
 
-/* Rebooting also tells the Host we're finished, but the RESTART flag tells the
- * Launcher to reboot us. */
+/*
+ * Rebooting also tells the Host we're finished, but the RESTART flag tells the
+ * Launcher to reboot us.
+ */
 static void lguest_restart(char *reason)
 {
 	kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART);
@@ -1049,7 +1148,8 @@ static void lguest_restart(char *reason)
  * fit comfortably.
  *
  * First we need assembly templates of each of the patchable Guest operations,
- * and these are in i386_head.S. */
+ * and these are in i386_head.S.
+ */
 
 /*G:060 We construct a table from the assembler templates: */
 static const struct lguest_insns
@@ -1060,9 +1160,11 @@ static const struct lguest_insns
 	[PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf },
 };
 
-/* Now our patch routine is fairly simple (based on the native one in
+/*
+ * Now our patch routine is fairly simple (based on the native one in
  * paravirt.c).  If we have a replacement, we copy it in and return how much of
- * the available space we used. */
+ * the available space we used.
+ */
 static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 			     unsigned long addr, unsigned len)
 {
@@ -1074,8 +1176,7 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 
 	insn_len = lguest_insns[type].end - lguest_insns[type].start;
 
-	/* Similarly if we can't fit replacement (shouldn't happen, but let's
-	 * be thorough). */
+	/* Similarly if it can't fit (doesn't happen, but let's be thorough). */
 	if (len < insn_len)
 		return paravirt_patch_default(type, clobber, ibuf, addr, len);
 
@@ -1084,22 +1185,28 @@ static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
 	return insn_len;
 }
 
-/*G:029 Once we get to lguest_init(), we know we're a Guest.  The various
+/*G:029
+ * Once we get to lguest_init(), we know we're a Guest.  The various
  * pv_ops structures in the kernel provide points for (almost) every routine we
- * have to override to avoid privileged instructions. */
+ * have to override to avoid privileged instructions.
+ */
 __init void lguest_init(void)
 {
-	/* We're under lguest, paravirt is enabled, and we're running at
-	 * privilege level 1, not 0 as normal. */
+	/* We're under lguest. */
 	pv_info.name = "lguest";
+	/* Paravirt is enabled. */
 	pv_info.paravirt_enabled = 1;
+	/* We're running at privilege level 1, not 0 as normal. */
 	pv_info.kernel_rpl = 1;
+	/* Everyone except Xen runs with this set. */
 	pv_info.shared_kernel_pmd = 1;
 
-	/* We set up all the lguest overrides for sensitive operations.  These
-	 * are detailed with the operations themselves. */
+	/*
+	 * We set up all the lguest overrides for sensitive operations.  These
+	 * are detailed with the operations themselves.
+	 */
 
-	/* interrupt-related operations */
+	/* Interrupt-related operations */
 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
 	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
@@ -1107,11 +1214,11 @@ __init void lguest_init(void)
 	pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable);
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
-	/* init-time operations */
+	/* Setup operations */
 	pv_init_ops.memory_setup = lguest_memory_setup;
 	pv_init_ops.patch = lguest_patch;
 
-	/* Intercepts of various cpu instructions */
+	/* Intercepts of various CPU instructions */
 	pv_cpu_ops.load_gdt = lguest_load_gdt;
 	pv_cpu_ops.cpuid = lguest_cpuid;
 	pv_cpu_ops.load_idt = lguest_load_idt;
@@ -1132,7 +1239,7 @@ __init void lguest_init(void)
 	pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
 	pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
-	/* pagetable management */
+	/* Pagetable management */
 	pv_mmu_ops.write_cr3 = lguest_write_cr3;
 	pv_mmu_ops.flush_tlb_user = lguest_flush_tlb_user;
 	pv_mmu_ops.flush_tlb_single = lguest_flush_tlb_single;
@@ -1154,54 +1261,71 @@ __init void lguest_init(void)
 	pv_mmu_ops.pte_update_defer = lguest_pte_update;
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	/* apic read/write intercepts */
+	/* APIC read/write intercepts */
 	set_lguest_basic_apic_ops();
 #endif
 
-	/* time operations */
+	/* Time operations */
 	pv_time_ops.get_wallclock = lguest_get_wallclock;
 	pv_time_ops.time_init = lguest_time_init;
 	pv_time_ops.get_tsc_khz = lguest_tsc_khz;
 
-	/* Now is a good time to look at the implementations of these functions
-	 * before returning to the rest of lguest_init(). */
+	/*
+	 * Now is a good time to look at the implementations of these functions
+	 * before returning to the rest of lguest_init().
+	 */
 
-	/*G:070 Now we've seen all the paravirt_ops, we return to
+	/*G:070
+	 * Now we've seen all the paravirt_ops, we return to
 	 * lguest_init() where the rest of the fairly chaotic boot setup
-	 * occurs. */
+	 * occurs.
+	 */
 
-	/* The stack protector is a weird thing where gcc places a canary
+	/*
+	 * The stack protector is a weird thing where gcc places a canary
 	 * value on the stack and then checks it on return.  This file is
 	 * compiled with -fno-stack-protector it, so we got this far without
 	 * problems.  The value of the canary is kept at offset 20 from the
 	 * %gs register, so we need to set that up before calling C functions
-	 * in other files. */
+	 * in other files.
+	 */
 	setup_stack_canary_segment(0);
-	/* We could just call load_stack_canary_segment(), but we might as
-	 * call switch_to_new_gdt() which loads the whole table and sets up
-	 * the per-cpu segment descriptor register %fs as well. */
+
+	/*
+	 * We could just call load_stack_canary_segment(), but we might as well
+	 * call switch_to_new_gdt() which loads the whole table and sets up the
+	 * per-cpu segment descriptor register %fs as well.
+	 */
 	switch_to_new_gdt(0);
 
 	/* As described in head_32.S, we map the first 128M of memory. */
 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
-	/* The Host<->Guest Switcher lives at the top of our address space, and
+	/*
+	 * The Host<->Guest Switcher lives at the top of our address space, and
 	 * the Host told us how big it is when we made LGUEST_INIT hypercall:
-	 * it put the answer in lguest_data.reserve_mem  */
+	 * it put the answer in lguest_data.reserve_mem
+	 */
 	reserve_top_address(lguest_data.reserve_mem);
 
-	/* If we don't initialize the lock dependency checker now, it crashes
-	 * paravirt_disable_iospace. */
+	/*
+	 * If we don't initialize the lock dependency checker now, it crashes
+	 * paravirt_disable_iospace.
+	 */
 	lockdep_init();
 
-	/* The IDE code spends about 3 seconds probing for disks: if we reserve
+	/*
+	 * The IDE code spends about 3 seconds probing for disks: if we reserve
 	 * all the I/O ports up front it can't get them and so doesn't probe.
 	 * Other device drivers are similar (but less severe).  This cuts the
-	 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds. */
+	 * kernel boot time on my machine from 4.1 seconds to 0.45 seconds.
+	 */
 	paravirt_disable_iospace();
 
-	/* This is messy CPU setup stuff which the native boot code does before
-	 * start_kernel, so we have to do, too: */
+	/*
+	 * This is messy CPU setup stuff which the native boot code does before
+	 * start_kernel, so we have to do, too:
+	 */
 	cpu_detect(&new_cpu_data);
 	/* head.S usually sets up the first capability word, so do it here. */
 	new_cpu_data.x86_capability[0] = cpuid_edx(1);
@@ -1218,22 +1342,28 @@ __init void lguest_init(void)
 	acpi_ht = 0;
 #endif
 
-	/* We set the preferred console to "hvc".  This is the "hypervisor
+	/*
+	 * We set the preferred console to "hvc".  This is the "hypervisor
 	 * virtual console" driver written by the PowerPC people, which we also
-	 * adapted for lguest's use. */
+	 * adapted for lguest's use.
+	 */
 	add_preferred_console("hvc", 0, NULL);
 
 	/* Register our very early console. */
 	virtio_cons_early_init(early_put_chars);
 
-	/* Last of all, we set the power management poweroff hook to point to
+	/*
+	 * Last of all, we set the power management poweroff hook to point to
 	 * the Guest routine to power off, and the reboot hook to our restart
-	 * routine. */
+	 * routine.
+	 */
 	pm_power_off = lguest_power_off;
 	machine_ops.restart = lguest_restart;
 
-	/* Now we're set up, call i386_start_kernel() in head32.c and we proceed
-	 * to boot as normal.  It never returns. */
+	/*
+	 * Now we're set up, call i386_start_kernel() in head32.c and we proceed
+	 * to boot as normal.  It never returns.
+	 */
 	i386_start_kernel();
 }
 /*
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S
index a9c8cfe61cd4..db6aa95eb054 100644
--- a/arch/x86/lguest/i386_head.S
+++ b/arch/x86/lguest/i386_head.S
@@ -5,7 +5,8 @@
 #include <asm/thread_info.h>
 #include <asm/processor-flags.h>
 
-/*G:020 Our story starts with the kernel booting into startup_32 in
+/*G:020
+ * Our story starts with the kernel booting into startup_32 in
  * arch/x86/kernel/head_32.S.  It expects a boot header, which is created by
  * the bootloader (the Launcher in our case).
  *
@@ -21,11 +22,14 @@
  * data without remembering to subtract __PAGE_OFFSET!
  *
  * The .section line puts this code in .init.text so it will be discarded after
- * boot. */
+ * boot.
+ */
 .section .init.text, "ax", @progbits
 ENTRY(lguest_entry)
-	/* We make the "initialization" hypercall now to tell the Host about
-	 * us, and also find out where it put our page tables. */
+	/*
+	 * We make the "initialization" hypercall now to tell the Host about
+	 * us, and also find out where it put our page tables.
+	 */
 	movl $LHCALL_LGUEST_INIT, %eax
 	movl $lguest_data - __PAGE_OFFSET, %ebx
 	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
@@ -33,13 +37,14 @@ ENTRY(lguest_entry)
 	/* Set up the initial stack so we can run C code. */
 	movl $(init_thread_union+THREAD_SIZE),%esp
 
-	/* Jumps are relative, and we're running __PAGE_OFFSET too low at the
-	 * moment. */
+	/* Jumps are relative: we're running __PAGE_OFFSET too low. */
 	jmp lguest_init+__PAGE_OFFSET
 
-/*G:055 We create a macro which puts the assembler code between lgstart_ and
- * lgend_ markers.  These templates are put in the .text section: they can't be
- * discarded after boot as we may need to patch modules, too. */
+/*G:055
+ * We create a macro which puts the assembler code between lgstart_ and lgend_
+ * markers.  These templates are put in the .text section: they can't be
+ * discarded after boot as we may need to patch modules, too.
+ */
 .text
 #define LGUEST_PATCH(name, insns...)			\
 	lgstart_##name:	insns; lgend_##name:;		\
@@ -48,58 +53,74 @@ ENTRY(lguest_entry)
 LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled)
 LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax)
 
-/*G:033 But using those wrappers is inefficient (we'll see why that doesn't
- * matter for save_fl and irq_disable later).  If we write our routines
- * carefully in assembler, we can avoid clobbering any registers and avoid
- * jumping through the wrapper functions.
+/*G:033
+ * But using those wrappers is inefficient (we'll see why that doesn't matter
+ * for save_fl and irq_disable later).  If we write our routines carefully in
+ * assembler, we can avoid clobbering any registers and avoid jumping through
+ * the wrapper functions.
  *
  * I skipped over our first piece of assembler, but this one is worth studying
- * in a bit more detail so I'll describe in easy stages.  First, the routine
- * to enable interrupts: */
+ * in a bit more detail so I'll describe in easy stages.  First, the routine to
+ * enable interrupts:
+ */
 ENTRY(lg_irq_enable)
-	/* The reverse of irq_disable, this sets lguest_data.irq_enabled to
-	 * X86_EFLAGS_IF (ie. "Interrupts enabled"). */
+	/*
+	 * The reverse of irq_disable, this sets lguest_data.irq_enabled to
+	 * X86_EFLAGS_IF (ie. "Interrupts enabled").
+	 */
 	movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled
-	/* But now we need to check if the Host wants to know: there might have
+	/*
+	 * But now we need to check if the Host wants to know: there might have
 	 * been interrupts waiting to be delivered, in which case it will have
 	 * set lguest_data.irq_pending to X86_EFLAGS_IF.  If it's not zero, we
-	 * jump to send_interrupts, otherwise we're done. */
+	 * jump to send_interrupts, otherwise we're done.
+	 */
 	testl $0, lguest_data+LGUEST_DATA_irq_pending
 	jnz send_interrupts
-	/* One cool thing about x86 is that you can do many things without using
+	/*
+	 * One cool thing about x86 is that you can do many things without using
 	 * a register.  In this case, the normal path hasn't needed to save or
-	 * restore any registers at all! */
+	 * restore any registers at all!
+	 */
 	ret
 send_interrupts:
-	/* OK, now we need a register: eax is used for the hypercall number,
+	/*
+	 * OK, now we need a register: eax is used for the hypercall number,
 	 * which is LHCALL_SEND_INTERRUPTS.
 	 *
 	 * We used not to bother with this pending detection at all, which was
 	 * much simpler.  Sooner or later the Host would realize it had to
 	 * send us an interrupt.  But that turns out to make performance 7
 	 * times worse on a simple tcp benchmark.  So now we do this the hard
-	 * way. */
+	 * way.
+	 */
 	pushl %eax
 	movl $LHCALL_SEND_INTERRUPTS, %eax
-	/* This is a vmcall instruction (same thing that KVM uses).  Older
+	/*
+	 * This is a vmcall instruction (same thing that KVM uses).  Older
 	 * assembler versions might not know the "vmcall" instruction, so we
-	 * create one manually here. */
+	 * create one manually here.
+	 */
 	.byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */
 	popl %eax
 	ret
 
-/* Finally, the "popf" or "restore flags" routine.  The %eax register holds the
+/*
+ * Finally, the "popf" or "restore flags" routine.  The %eax register holds the
  * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're
- * enabling interrupts again, if it's 0 we're leaving them off. */
+ * enabling interrupts again, if it's 0 we're leaving them off.
+ */
 ENTRY(lg_restore_fl)
 	/* This is just "lguest_data.irq_enabled = flags;" */
 	movl %eax, lguest_data+LGUEST_DATA_irq_enabled
-	/* Now, if the %eax value has enabled interrupts and
+	/*
+	 * Now, if the %eax value has enabled interrupts and
 	 * lguest_data.irq_pending is set, we want to tell the Host so it can
 	 * deliver any outstanding interrupts.  Fortunately, both values will
 	 * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl"
 	 * instruction will AND them together for us.  If both are set, we
-	 * jump to send_interrupts. */
+	 * jump to send_interrupts.
+	 */
 	testl lguest_data+LGUEST_DATA_irq_pending, %eax
 	jnz send_interrupts
 	/* Again, the normal path has used no extra registers.  Clever, huh? */
@@ -109,22 +130,24 @@ ENTRY(lg_restore_fl)
 .global lguest_noirq_start
 .global lguest_noirq_end
 
-/*M:004 When the Host reflects a trap or injects an interrupt into the Guest,
- * it sets the eflags interrupt bit on the stack based on
- * lguest_data.irq_enabled, so the Guest iret logic does the right thing when
- * restoring it.  However, when the Host sets the Guest up for direct traps,
- * such as system calls, the processor is the one to push eflags onto the
- * stack, and the interrupt bit will be 1 (in reality, interrupts are always
- * enabled in the Guest).
+/*M:004
+ * When the Host reflects a trap or injects an interrupt into the Guest, it
+ * sets the eflags interrupt bit on the stack based on lguest_data.irq_enabled,
+ * so the Guest iret logic does the right thing when restoring it.  However,
+ * when the Host sets the Guest up for direct traps, such as system calls, the
+ * processor is the one to push eflags onto the stack, and the interrupt bit
+ * will be 1 (in reality, interrupts are always enabled in the Guest).
  *
  * This turns out to be harmless: the only trap which should happen under Linux
  * with interrupts disabled is Page Fault (due to our lazy mapping of vmalloc
  * regions), which has to be reflected through the Host anyway.  If another
  * trap *does* go off when interrupts are disabled, the Guest will panic, and
- * we'll never get to this iret! :*/
+ * we'll never get to this iret!
+:*/
 
-/*G:045 There is one final paravirt_op that the Guest implements, and glancing
- * at it you can see why I left it to last.  It's *cool*!  It's in *assembler*!
+/*G:045
+ * There is one final paravirt_op that the Guest implements, and glancing at it
+ * you can see why I left it to last.  It's *cool*!  It's in *assembler*!
  *
  * The "iret" instruction is used to return from an interrupt or trap.  The
  * stack looks like this:
@@ -148,15 +171,18 @@ ENTRY(lg_restore_fl)
  * return to userspace or wherever.  Our solution to this is to surround the
  * code with lguest_noirq_start: and lguest_noirq_end: labels.  We tell the
  * Host that it is *never* to interrupt us there, even if interrupts seem to be
- * enabled. */
+ * enabled.
+ */
 ENTRY(lguest_iret)
 	pushl	%eax
 	movl	12(%esp), %eax
 lguest_noirq_start:
-	/* Note the %ss: segment prefix here.  Normal data accesses use the
+	/*
+	 * Note the %ss: segment prefix here.  Normal data accesses use the
 	 * "ds" segment, but that will have already been restored for whatever
 	 * we're returning to (such as userspace): we can't trust it.  The %ss:
-	 * prefix makes sure we use the stack segment, which is still valid. */
+	 * prefix makes sure we use the stack segment, which is still valid.
+	 */
 	movl	%eax,%ss:lguest_data+LGUEST_DATA_irq_enabled
 	popl	%eax
 	iret
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index a6974e9b8ebf..cd058bc903ff 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -1,6 +1,8 @@
-/*P:400 This contains run_guest() which actually calls into the Host<->Guest
+/*P:400
+ * This contains run_guest() which actually calls into the Host<->Guest
  * Switcher and analyzes the return, such as determining if the Guest wants the
- * Host to do something.  This file also contains useful helper routines. :*/
+ * Host to do something.  This file also contains useful helper routines.
+:*/
 #include <linux/module.h>
 #include <linux/stringify.h>
 #include <linux/stddef.h>
@@ -24,7 +26,8 @@ static struct page **switcher_page;
 /* This One Big lock protects all inter-guest data structures. */
 DEFINE_MUTEX(lguest_lock);
 
-/*H:010 We need to set up the Switcher at a high virtual address.  Remember the
+/*H:010
+ * We need to set up the Switcher at a high virtual address.  Remember the
  * Switcher is a few hundred bytes of assembler code which actually changes the
  * CPU to run the Guest, and then changes back to the Host when a trap or
  * interrupt happens.
@@ -33,7 +36,8 @@ DEFINE_MUTEX(lguest_lock);
  * Host since it will be running as the switchover occurs.
  *
  * Trying to map memory at a particular address is an unusual thing to do, so
- * it's not a simple one-liner. */
+ * it's not a simple one-liner.
+ */
 static __init int map_switcher(void)
 {
 	int i, err;
@@ -47,8 +51,10 @@ static __init int map_switcher(void)
 	 * easy.
 	 */
 
-	/* We allocate an array of struct page pointers.  map_vm_area() wants
-	 * this, rather than just an array of pages. */
+	/*
+	 * We allocate an array of struct page pointers.  map_vm_area() wants
+	 * this, rather than just an array of pages.
+	 */
 	switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
 				GFP_KERNEL);
 	if (!switcher_page) {
@@ -56,8 +62,10 @@ static __init int map_switcher(void)
 		goto out;
 	}
 
-	/* Now we actually allocate the pages.  The Guest will see these pages,
-	 * so we make sure they're zeroed. */
+	/*
+	 * Now we actually allocate the pages.  The Guest will see these pages,
+	 * so we make sure they're zeroed.
+	 */
 	for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
 		unsigned long addr = get_zeroed_page(GFP_KERNEL);
 		if (!addr) {
@@ -67,19 +75,23 @@ static __init int map_switcher(void)
 		switcher_page[i] = virt_to_page(addr);
 	}
 
-	/* First we check that the Switcher won't overlap the fixmap area at
+	/*
+	 * First we check that the Switcher won't overlap the fixmap area at
 	 * the top of memory.  It's currently nowhere near, but it could have
-	 * very strange effects if it ever happened. */
+	 * very strange effects if it ever happened.
+	 */
 	if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
 		err = -ENOMEM;
 		printk("lguest: mapping switcher would thwack fixmap\n");
 		goto free_pages;
 	}
 
-	/* Now we reserve the "virtual memory area" we want: 0xFFC00000
+	/*
+	 * Now we reserve the "virtual memory area" we want: 0xFFC00000
 	 * (SWITCHER_ADDR).  We might not get it in theory, but in practice
 	 * it's worked so far.  The end address needs +1 because __get_vm_area
-	 * allocates an extra guard page, so we need space for that. */
+	 * allocates an extra guard page, so we need space for that.
+	 */
 	switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
 				     VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
 				     + (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
@@ -89,11 +101,13 @@ static __init int map_switcher(void)
 		goto free_pages;
 	}
 
-	/* This code actually sets up the pages we've allocated to appear at
+	/*
+	 * This code actually sets up the pages we've allocated to appear at
 	 * SWITCHER_ADDR.  map_vm_area() takes the vma we allocated above, the
 	 * kind of pages we're mapping (kernel pages), and a pointer to our
 	 * array of struct pages.  It increments that pointer, but we don't
-	 * care. */
+	 * care.
+	 */
 	pagep = switcher_page;
 	err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
 	if (err) {
@@ -101,8 +115,10 @@ static __init int map_switcher(void)
 		goto free_vma;
 	}
 
-	/* Now the Switcher is mapped at the right address, we can't fail!
-	 * Copy in the compiled-in Switcher code (from <arch>_switcher.S). */
+	/*
+	 * Now the Switcher is mapped at the right address, we can't fail!
+	 * Copy in the compiled-in Switcher code (from <arch>_switcher.S).
+	 */
 	memcpy(switcher_vma->addr, start_switcher_text,
 	       end_switcher_text - start_switcher_text);
 
@@ -124,8 +140,7 @@ out:
 }
 /*:*/
 
-/* Cleaning up the mapping when the module is unloaded is almost...
- * too easy. */
+/* Cleaning up the mapping when the module is unloaded is almost... too easy. */
 static void unmap_switcher(void)
 {
 	unsigned int i;
@@ -151,16 +166,19 @@ static void unmap_switcher(void)
  * But we can't trust the Guest: it might be trying to access the Launcher
  * code.  We have to check that the range is below the pfn_limit the Launcher
  * gave us.  We have to make sure that addr + len doesn't give us a false
- * positive by overflowing, too. */
+ * positive by overflowing, too.
+ */
 bool lguest_address_ok(const struct lguest *lg,
 		       unsigned long addr, unsigned long len)
 {
 	return (addr+len) / PAGE_SIZE < lg->pfn_limit && (addr+len >= addr);
 }
 
-/* This routine copies memory from the Guest.  Here we can see how useful the
+/*
+ * This routine copies memory from the Guest.  Here we can see how useful the
  * kill_lguest() routine we met in the Launcher can be: we return a random
- * value (all zeroes) instead of needing to return an error. */
+ * value (all zeroes) instead of needing to return an error.
+ */
 void __lgread(struct lg_cpu *cpu, void *b, unsigned long addr, unsigned bytes)
 {
 	if (!lguest_address_ok(cpu->lg, addr, bytes)
@@ -181,9 +199,11 @@ void __lgwrite(struct lg_cpu *cpu, unsigned long addr, const void *b,
 }
 /*:*/
 
-/*H:030 Let's jump straight to the the main loop which runs the Guest.
+/*H:030
+ * Let's jump straight to the the main loop which runs the Guest.
  * Remember, this is called by the Launcher reading /dev/lguest, and we keep
- * going around and around until something interesting happens. */
+ * going around and around until something interesting happens.
+ */
 int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 {
 	/* We stop running once the Guest is dead. */
@@ -195,8 +215,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		if (cpu->hcall)
 			do_hypercalls(cpu);
 
-		/* It's possible the Guest did a NOTIFY hypercall to the
-		 * Launcher, in which case we return from the read() now. */
+		/*
+		 * It's possible the Guest did a NOTIFY hypercall to the
+		 * Launcher, in which case we return from the read() now.
+		 */
 		if (cpu->pending_notify) {
 			if (!send_notify_to_eventfd(cpu)) {
 				if (put_user(cpu->pending_notify, user))
@@ -209,29 +231,39 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 		if (signal_pending(current))
 			return -ERESTARTSYS;
 
-		/* Check if there are any interrupts which can be delivered now:
+		/*
+		 * Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
-		 * run the Guest. */
+		 * run the Guest.
+		 */
 		irq = interrupt_pending(cpu, &more);
 		if (irq < LGUEST_IRQS)
 			try_deliver_interrupt(cpu, irq, more);
 
-		/* All long-lived kernel loops need to check with this horrible
+		/*
+		 * All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
-		 * it stops us. */
+		 * it stops us.
+		 */
 		try_to_freeze();
 
-		/* Just make absolutely sure the Guest is still alive.  One of
-		 * those hypercalls could have been fatal, for example. */
+		/*
+		 * Just make absolutely sure the Guest is still alive.  One of
+		 * those hypercalls could have been fatal, for example.
+		 */
 		if (cpu->lg->dead)
 			break;
 
-		/* If the Guest asked to be stopped, we sleep.  The Guest's
-		 * clock timer will wake us. */
+		/*
+		 * If the Guest asked to be stopped, we sleep.  The Guest's
+		 * clock timer will wake us.
+		 */
 		if (cpu->halted) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			/* Just before we sleep, make sure no interrupt snuck in
-			 * which we should be doing. */
+			/*
+			 * Just before we sleep, make sure no interrupt snuck in
+			 * which we should be doing.
+			 */
 			if (interrupt_pending(cpu, &more) < LGUEST_IRQS)
 				set_current_state(TASK_RUNNING);
 			else
@@ -239,8 +271,10 @@ int run_guest(struct lg_cpu *cpu, unsigned long __user *user)
 			continue;
 		}
 
-		/* OK, now we're ready to jump into the Guest.  First we put up
-		 * the "Do Not Disturb" sign: */
+		/*
+		 * OK, now we're ready to jump into the Guest.  First we put up
+		 * the "Do Not Disturb" sign:
+		 */
 		local_irq_disable();
 
 		/* Actually run the Guest until something happens. */
@@ -327,8 +361,10 @@ static void __exit fini(void)
 }
 /*:*/
 
-/* The Host side of lguest can be a module.  This is a nice way for people to
- * play with it.  */
+/*
+ * The Host side of lguest can be a module.  This is a nice way for people to
+ * play with it.
+ */
 module_init(init);
 module_exit(fini);
 MODULE_LICENSE("GPL");
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index c29ffa19cb74..787ab4bc09f0 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -1,8 +1,10 @@
-/*P:500 Just as userspace programs request kernel operations through a system
+/*P:500
+ * Just as userspace programs request kernel operations through a system
  * call, the Guest requests Host operations through a "hypercall".  You might
  * notice this nomenclature doesn't really follow any logic, but the name has
  * been around for long enough that we're stuck with it.  As you'd expect, this
- * code is basically a one big switch statement. :*/
+ * code is basically a one big switch statement.
+:*/
 
 /*  Copyright (C) 2006 Rusty Russell IBM Corporation
 
@@ -28,30 +30,41 @@
 #include <asm/pgtable.h>
 #include "lg.h"
 
-/*H:120 This is the core hypercall routine: where the Guest gets what it wants.
- * Or gets killed.  Or, in the case of LHCALL_SHUTDOWN, both. */
+/*H:120
+ * This is the core hypercall routine: where the Guest gets what it wants.
+ * Or gets killed.  Or, in the case of LHCALL_SHUTDOWN, both.
+ */
 static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 {
 	switch (args->arg0) {
 	case LHCALL_FLUSH_ASYNC:
-		/* This call does nothing, except by breaking out of the Guest
-		 * it makes us process all the asynchronous hypercalls. */
+		/*
+		 * This call does nothing, except by breaking out of the Guest
+		 * it makes us process all the asynchronous hypercalls.
+		 */
 		break;
 	case LHCALL_SEND_INTERRUPTS:
-		/* This call does nothing too, but by breaking out of the Guest
-		 * it makes us process any pending interrupts. */
+		/*
+		 * This call does nothing too, but by breaking out of the Guest
+		 * it makes us process any pending interrupts.
+		 */
 		break;
 	case LHCALL_LGUEST_INIT:
-		/* You can't get here unless you're already initialized.  Don't
-		 * do that. */
+		/*
+		 * You can't get here unless you're already initialized.  Don't
+		 * do that.
+		 */
 		kill_guest(cpu, "already have lguest_data");
 		break;
 	case LHCALL_SHUTDOWN: {
-		/* Shutdown is such a trivial hypercall that we do it in four
-		 * lines right here. */
 		char msg[128];
-		/* If the lgread fails, it will call kill_guest() itself; the
-		 * kill_guest() with the message will be ignored. */
+		/*
+		 * Shutdown is such a trivial hypercall that we do it in four
+		 * lines right here.
+		 *
+		 * If the lgread fails, it will call kill_guest() itself; the
+		 * kill_guest() with the message will be ignored.
+		 */
 		__lgread(cpu, msg, args->arg1, sizeof(msg));
 		msg[sizeof(msg)-1] = '\0';
 		kill_guest(cpu, "CRASH: %s", msg);
@@ -60,16 +73,17 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 		break;
 	}
 	case LHCALL_FLUSH_TLB:
-		/* FLUSH_TLB comes in two flavors, depending on the
-		 * argument: */
+		/* FLUSH_TLB comes in two flavors, depending on the argument: */
 		if (args->arg1)
 			guest_pagetable_clear_all(cpu);
 		else
 			guest_pagetable_flush_user(cpu);
 		break;
 
-	/* All these calls simply pass the arguments through to the right
-	 * routines. */
+	/*
+	 * All these calls simply pass the arguments through to the right
+	 * routines.
+	 */
 	case LHCALL_NEW_PGTABLE:
 		guest_new_pagetable(cpu, args->arg1);
 		break;
@@ -112,15 +126,16 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
 			kill_guest(cpu, "Bad hypercall %li\n", args->arg0);
 	}
 }
-/*:*/
 
-/*H:124 Asynchronous hypercalls are easy: we just look in the array in the
+/*H:124
+ * Asynchronous hypercalls are easy: we just look in the array in the
  * Guest's "struct lguest_data" to see if any new ones are marked "ready".
  *
  * We are careful to do these in order: obviously we respect the order the
  * Guest put them in the ring, but we also promise the Guest that they will
  * happen before any normal hypercall (which is why we check this before
- * checking for a normal hcall). */
+ * checking for a normal hcall).
+ */
 static void do_async_hcalls(struct lg_cpu *cpu)
 {
 	unsigned int i;
@@ -133,22 +148,28 @@ static void do_async_hcalls(struct lg_cpu *cpu)
 	/* We process "struct lguest_data"s hcalls[] ring once. */
 	for (i = 0; i < ARRAY_SIZE(st); i++) {
 		struct hcall_args args;
-		/* We remember where we were up to from last time.  This makes
+		/*
+		 * We remember where we were up to from last time.  This makes
 		 * sure that the hypercalls are done in the order the Guest
-		 * places them in the ring. */
+		 * places them in the ring.
+		 */
 		unsigned int n = cpu->next_hcall;
 
 		/* 0xFF means there's no call here (yet). */
 		if (st[n] == 0xFF)
 			break;
 
-		/* OK, we have hypercall.  Increment the "next_hcall" cursor,
-		 * and wrap back to 0 if we reach the end. */
+		/*
+		 * OK, we have hypercall.  Increment the "next_hcall" cursor,
+		 * and wrap back to 0 if we reach the end.
+		 */
 		if (++cpu->next_hcall == LHCALL_RING_SIZE)
 			cpu->next_hcall = 0;
 
-		/* Copy the hypercall arguments into a local copy of
-		 * the hcall_args struct. */
+		/*
+		 * Copy the hypercall arguments into a local copy of the
+		 * hcall_args struct.
+		 */
 		if (copy_from_user(&args, &cpu->lg->lguest_data->hcalls[n],
 				   sizeof(struct hcall_args))) {
 			kill_guest(cpu, "Fetching async hypercalls");
@@ -164,19 +185,25 @@ static void do_async_hcalls(struct lg_cpu *cpu)
 			break;
 		}
 
-		/* Stop doing hypercalls if they want to notify the Launcher:
-		 * it needs to service this first. */
+		/*
+		 * Stop doing hypercalls if they want to notify the Launcher:
+		 * it needs to service this first.
+		 */
 		if (cpu->pending_notify)
 			break;
 	}
 }
 
-/* Last of all, we look at what happens first of all.  The very first time the
- * Guest makes a hypercall, we end up here to set things up: */
+/*
+ * Last of all, we look at what happens first of all.  The very first time the
+ * Guest makes a hypercall, we end up here to set things up:
+ */
 static void initialize(struct lg_cpu *cpu)
 {
-	/* You can't do anything until you're initialized.  The Guest knows the
-	 * rules, so we're unforgiving here. */
+	/*
+	 * You can't do anything until you're initialized.  The Guest knows the
+	 * rules, so we're unforgiving here.
+	 */
 	if (cpu->hcall->arg0 != LHCALL_LGUEST_INIT) {
 		kill_guest(cpu, "hypercall %li before INIT", cpu->hcall->arg0);
 		return;
@@ -185,32 +212,40 @@ static void initialize(struct lg_cpu *cpu)
 	if (lguest_arch_init_hypercalls(cpu))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
-	/* The Guest tells us where we're not to deliver interrupts by putting
-	 * the range of addresses into "struct lguest_data". */
+	/*
+	 * The Guest tells us where we're not to deliver interrupts by putting
+	 * the range of addresses into "struct lguest_data".
+	 */
 	if (get_user(cpu->lg->noirq_start, &cpu->lg->lguest_data->noirq_start)
 	    || get_user(cpu->lg->noirq_end, &cpu->lg->lguest_data->noirq_end))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
-	/* We write the current time into the Guest's data page once so it can
-	 * set its clock. */
+	/*
+	 * We write the current time into the Guest's data page once so it can
+	 * set its clock.
+	 */
 	write_timestamp(cpu);
 
 	/* page_tables.c will also do some setup. */
 	page_table_guest_data_init(cpu);
 
-	/* This is the one case where the above accesses might have been the
+	/*
+	 * This is the one case where the above accesses might have been the
 	 * first write to a Guest page.  This may have caused a copy-on-write
 	 * fault, but the old page might be (read-only) in the Guest
-	 * pagetable. */
+	 * pagetable.
+	 */
 	guest_pagetable_clear_all(cpu);
 }
 /*:*/
 
-/*M:013 If a Guest reads from a page (so creates a mapping) that it has never
+/*M:013
+ * If a Guest reads from a page (so creates a mapping) that it has never
  * written to, and then the Launcher writes to it (ie. the output of a virtual
  * device), the Guest will still see the old page.  In practice, this never
  * happens: why would the Guest read a page which it has never written to?  But
- * a similar scenario might one day bite us, so it's worth mentioning. :*/
+ * a similar scenario might one day bite us, so it's worth mentioning.
+:*/
 
 /*H:100
  * Hypercalls
@@ -229,17 +264,22 @@ void do_hypercalls(struct lg_cpu *cpu)
 		return;
 	}
 
-	/* The Guest has initialized.
+	/*
+	 * The Guest has initialized.
 	 *
-	 * Look in the hypercall ring for the async hypercalls: */
+	 * Look in the hypercall ring for the async hypercalls:
+	 */
 	do_async_hcalls(cpu);
 
-	/* If we stopped reading the hypercall ring because the Guest did a
+	/*
+	 * If we stopped reading the hypercall ring because the Guest did a
 	 * NOTIFY to the Launcher, we want to return now.  Otherwise we do
-	 * the hypercall. */
+	 * the hypercall.
+	 */
 	if (!cpu->pending_notify) {
 		do_hcall(cpu, cpu->hcall);
-		/* Tricky point: we reset the hcall pointer to mark the
+		/*
+		 * Tricky point: we reset the hcall pointer to mark the
 		 * hypercall as "done".  We use the hcall pointer rather than
 		 * the trap number to indicate a hypercall is pending.
 		 * Normally it doesn't matter: the Guest will run again and
@@ -248,13 +288,16 @@ void do_hypercalls(struct lg_cpu *cpu)
 		 * However, if we are signalled or the Guest sends I/O to the
 		 * Launcher, the run_guest() loop will exit without running the
 		 * Guest.  When it comes back it would try to re-run the
-		 * hypercall.  Finding that bug sucked. */
+		 * hypercall.  Finding that bug sucked.
+		 */
 		cpu->hcall = NULL;
 	}
 }
 
-/* This routine supplies the Guest with time: it's used for wallclock time at
- * initial boot and as a rough time source if the TSC isn't available. */
+/*
+ * This routine supplies the Guest with time: it's used for wallclock time at
+ * initial boot and as a rough time source if the TSC isn't available.
+ */
 void write_timestamp(struct lg_cpu *cpu)
 {
 	struct timespec now;
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 0e9067b0d507..18648180db02 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -1,4 +1,5 @@
-/*P:800 Interrupts (traps) are complicated enough to earn their own file.
+/*P:800
+ * Interrupts (traps) are complicated enough to earn their own file.
  * There are three classes of interrupts:
  *
  * 1) Real hardware interrupts which occur while we're running the Guest,
@@ -10,7 +11,8 @@
  * just like real hardware would deliver them.  Traps from the Guest can be set
  * up to go directly back into the Guest, but sometimes the Host wants to see
  * them first, so we also have a way of "reflecting" them into the Guest as if
- * they had been delivered to it directly. :*/
+ * they had been delivered to it directly.
+:*/
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
@@ -26,8 +28,10 @@ static unsigned long idt_address(u32 lo, u32 hi)
 	return (lo & 0x0000FFFF) | (hi & 0xFFFF0000);
 }
 
-/* The "type" of the interrupt handler is a 4 bit field: we only support a
- * couple of types. */
+/*
+ * The "type" of the interrupt handler is a 4 bit field: we only support a
+ * couple of types.
+ */
 static int idt_type(u32 lo, u32 hi)
 {
 	return (hi >> 8) & 0xF;
@@ -39,8 +43,10 @@ static bool idt_present(u32 lo, u32 hi)
 	return (hi & 0x8000);
 }
 
-/* We need a helper to "push" a value onto the Guest's stack, since that's a
- * big part of what delivering an interrupt does. */
+/*
+ * We need a helper to "push" a value onto the Guest's stack, since that's a
+ * big part of what delivering an interrupt does.
+ */
 static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
 {
 	/* Stack grows upwards: move stack then write value. */
@@ -48,7 +54,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
 	lgwrite(cpu, *gstack, u32, val);
 }
 
-/*H:210 The set_guest_interrupt() routine actually delivers the interrupt or
+/*H:210
+ * The set_guest_interrupt() routine actually delivers the interrupt or
  * trap.  The mechanics of delivering traps and interrupts to the Guest are the
  * same, except some traps have an "error code" which gets pushed onto the
  * stack as well: the caller tells us if this is one.
@@ -59,7 +66,8 @@ static void push_guest_stack(struct lg_cpu *cpu, unsigned long *gstack, u32 val)
  *
  * We set up the stack just like the CPU does for a real interrupt, so it's
  * identical for the Guest (and the standard "iret" instruction will undo
- * it). */
+ * it).
+ */
 static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 				bool has_err)
 {
@@ -67,20 +75,26 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 	u32 eflags, ss, irq_enable;
 	unsigned long virtstack;
 
-	/* There are two cases for interrupts: one where the Guest is already
+	/*
+	 * There are two cases for interrupts: one where the Guest is already
 	 * in the kernel, and a more complex one where the Guest is in
-	 * userspace.  We check the privilege level to find out. */
+	 * userspace.  We check the privilege level to find out.
+	 */
 	if ((cpu->regs->ss&0x3) != GUEST_PL) {
-		/* The Guest told us their kernel stack with the SET_STACK
-		 * hypercall: both the virtual address and the segment */
+		/*
+		 * The Guest told us their kernel stack with the SET_STACK
+		 * hypercall: both the virtual address and the segment.
+		 */
 		virtstack = cpu->esp1;
 		ss = cpu->ss1;
 
 		origstack = gstack = guest_pa(cpu, virtstack);
-		/* We push the old stack segment and pointer onto the new
+		/*
+		 * We push the old stack segment and pointer onto the new
 		 * stack: when the Guest does an "iret" back from the interrupt
 		 * handler the CPU will notice they're dropping privilege
-		 * levels and expect these here. */
+		 * levels and expect these here.
+		 */
 		push_guest_stack(cpu, &gstack, cpu->regs->ss);
 		push_guest_stack(cpu, &gstack, cpu->regs->esp);
 	} else {
@@ -91,18 +105,22 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 		origstack = gstack = guest_pa(cpu, virtstack);
 	}
 
-	/* Remember that we never let the Guest actually disable interrupts, so
+	/*
+	 * Remember that we never let the Guest actually disable interrupts, so
 	 * the "Interrupt Flag" bit is always set.  We copy that bit from the
 	 * Guest's "irq_enabled" field into the eflags word: we saw the Guest
-	 * copy it back in "lguest_iret". */
+	 * copy it back in "lguest_iret".
+	 */
 	eflags = cpu->regs->eflags;
 	if (get_user(irq_enable, &cpu->lg->lguest_data->irq_enabled) == 0
 	    && !(irq_enable & X86_EFLAGS_IF))
 		eflags &= ~X86_EFLAGS_IF;
 
-	/* An interrupt is expected to push three things on the stack: the old
+	/*
+	 * An interrupt is expected to push three things on the stack: the old
 	 * "eflags" word, the old code segment, and the old instruction
-	 * pointer. */
+	 * pointer.
+	 */
 	push_guest_stack(cpu, &gstack, eflags);
 	push_guest_stack(cpu, &gstack, cpu->regs->cs);
 	push_guest_stack(cpu, &gstack, cpu->regs->eip);
@@ -111,15 +129,19 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
 	if (has_err)
 		push_guest_stack(cpu, &gstack, cpu->regs->errcode);
 
-	/* Now we've pushed all the old state, we change the stack, the code
-	 * segment and the address to execute. */
+	/*
+	 * Now we've pushed all the old state, we change the stack, the code
+	 * segment and the address to execute.
+	 */
 	cpu->regs->ss = ss;
 	cpu->regs->esp = virtstack + (gstack - origstack);
 	cpu->regs->cs = (__KERNEL_CS|GUEST_PL);
 	cpu->regs->eip = idt_address(lo, hi);
 
-	/* There are two kinds of interrupt handlers: 0xE is an "interrupt
-	 * gate" which expects interrupts to be disabled on entry. */
+	/*
+	 * There are two kinds of interrupt handlers: 0xE is an "interrupt
+	 * gate" which expects interrupts to be disabled on entry.
+	 */
 	if (idt_type(lo, hi) == 0xE)
 		if (put_user(0, &cpu->lg->lguest_data->irq_enabled))
 			kill_guest(cpu, "Disabling interrupts");
@@ -130,7 +152,8 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi,
  *
  * interrupt_pending() returns the first pending interrupt which isn't blocked
  * by the Guest.  It is called before every entry to the Guest, and just before
- * we go to sleep when the Guest has halted itself. */
+ * we go to sleep when the Guest has halted itself.
+ */
 unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 {
 	unsigned int irq;
@@ -140,8 +163,10 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 	if (!cpu->lg->lguest_data)
 		return LGUEST_IRQS;
 
-	/* Take our "irqs_pending" array and remove any interrupts the Guest
-	 * wants blocked: the result ends up in "blk". */
+	/*
+	 * Take our "irqs_pending" array and remove any interrupts the Guest
+	 * wants blocked: the result ends up in "blk".
+	 */
 	if (copy_from_user(&blk, cpu->lg->lguest_data->blocked_interrupts,
 			   sizeof(blk)))
 		return LGUEST_IRQS;
@@ -154,16 +179,20 @@ unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 	return irq;
 }
 
-/* This actually diverts the Guest to running an interrupt handler, once an
- * interrupt has been identified by interrupt_pending(). */
+/*
+ * This actually diverts the Guest to running an interrupt handler, once an
+ * interrupt has been identified by interrupt_pending().
+ */
 void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 {
 	struct desc_struct *idt;
 
 	BUG_ON(irq >= LGUEST_IRQS);
 
-	/* They may be in the middle of an iret, where they asked us never to
-	 * deliver interrupts. */
+	/*
+	 * They may be in the middle of an iret, where they asked us never to
+	 * deliver interrupts.
+	 */
 	if (cpu->regs->eip >= cpu->lg->noirq_start &&
 	   (cpu->regs->eip < cpu->lg->noirq_end))
 		return;
@@ -187,29 +216,37 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 		}
 	}
 
-	/* Look at the IDT entry the Guest gave us for this interrupt.  The
+	/*
+	 * Look at the IDT entry the Guest gave us for this interrupt.  The
 	 * first 32 (FIRST_EXTERNAL_VECTOR) entries are for traps, so we skip
-	 * over them. */
+	 * over them.
+	 */
 	idt = &cpu->arch.idt[FIRST_EXTERNAL_VECTOR+irq];
 	/* If they don't have a handler (yet?), we just ignore it */
 	if (idt_present(idt->a, idt->b)) {
 		/* OK, mark it no longer pending and deliver it. */
 		clear_bit(irq, cpu->irqs_pending);
-		/* set_guest_interrupt() takes the interrupt descriptor and a
+		/*
+		 * set_guest_interrupt() takes the interrupt descriptor and a
 		 * flag to say whether this interrupt pushes an error code onto
-		 * the stack as well: virtual interrupts never do. */
+		 * the stack as well: virtual interrupts never do.
+		 */
 		set_guest_interrupt(cpu, idt->a, idt->b, false);
 	}
 
-	/* Every time we deliver an interrupt, we update the timestamp in the
+	/*
+	 * Every time we deliver an interrupt, we update the timestamp in the
 	 * Guest's lguest_data struct.  It would be better for the Guest if we
 	 * did this more often, but it can actually be quite slow: doing it
 	 * here is a compromise which means at least it gets updated every
-	 * timer interrupt. */
+	 * timer interrupt.
+	 */
 	write_timestamp(cpu);
 
-	/* If there are no other interrupts we want to deliver, clear
-	 * the pending flag. */
+	/*
+	 * If there are no other interrupts we want to deliver, clear
+	 * the pending flag.
+	 */
 	if (!more)
 		put_user(0, &cpu->lg->lguest_data->irq_pending);
 }
@@ -217,24 +254,29 @@ void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 /* And this is the routine when we want to set an interrupt for the Guest. */
 void set_interrupt(struct lg_cpu *cpu, unsigned int irq)
 {
-	/* Next time the Guest runs, the core code will see if it can deliver
-	 * this interrupt. */
+	/*
+	 * Next time the Guest runs, the core code will see if it can deliver
+	 * this interrupt.
+	 */
 	set_bit(irq, cpu->irqs_pending);
 
-	/* Make sure it sees it; it might be asleep (eg. halted), or
-	 * running the Guest right now, in which case kick_process()
-	 * will knock it out. */
+	/*
+	 * Make sure it sees it; it might be asleep (eg. halted), or running
+	 * the Guest right now, in which case kick_process() will knock it out.
+	 */
 	if (!wake_up_process(cpu->tsk))
 		kick_process(cpu->tsk);
 }
 /*:*/
 
-/* Linux uses trap 128 for system calls.  Plan9 uses 64, and Ron Minnich sent
+/*
+ * Linux uses trap 128 for system calls.  Plan9 uses 64, and Ron Minnich sent
  * me a patch, so we support that too.  It'd be a big step for lguest if half
  * the Plan 9 user base were to start using it.
  *
  * Actually now I think of it, it's possible that Ron *is* half the Plan 9
- * userbase.  Oh well. */
+ * userbase.  Oh well.
+ */
 static bool could_be_syscall(unsigned int num)
 {
 	/* Normal Linux SYSCALL_VECTOR or reserved vector? */
@@ -274,9 +316,11 @@ void free_interrupts(void)
 		clear_bit(syscall_vector, used_vectors);
 }
 
-/*H:220 Now we've got the routines to deliver interrupts, delivering traps like
+/*H:220
+ * Now we've got the routines to deliver interrupts, delivering traps like
  * page fault is easy.  The only trick is that Intel decided that some traps
- * should have error codes: */
+ * should have error codes:
+ */
 static bool has_err(unsigned int trap)
 {
 	return (trap == 8 || (trap >= 10 && trap <= 14) || trap == 17);
@@ -285,13 +329,17 @@ static bool has_err(unsigned int trap)
 /* deliver_trap() returns true if it could deliver the trap. */
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
 {
-	/* Trap numbers are always 8 bit, but we set an impossible trap number
-	 * for traps inside the Switcher, so check that here. */
+	/*
+	 * Trap numbers are always 8 bit, but we set an impossible trap number
+	 * for traps inside the Switcher, so check that here.
+	 */
 	if (num >= ARRAY_SIZE(cpu->arch.idt))
 		return false;
 
-	/* Early on the Guest hasn't set the IDT entries (or maybe it put a
-	 * bogus one in): if we fail here, the Guest will be killed. */
+	/*
+	 * Early on the Guest hasn't set the IDT entries (or maybe it put a
+	 * bogus one in): if we fail here, the Guest will be killed.
+	 */
 	if (!idt_present(cpu->arch.idt[num].a, cpu->arch.idt[num].b))
 		return false;
 	set_guest_interrupt(cpu, cpu->arch.idt[num].a,
@@ -299,7 +347,8 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
 	return true;
 }
 
-/*H:250 Here's the hard part: returning to the Host every time a trap happens
+/*H:250
+ * Here's the hard part: returning to the Host every time a trap happens
  * and then calling deliver_trap() and re-entering the Guest is slow.
  * Particularly because Guest userspace system calls are traps (usually trap
  * 128).
@@ -311,69 +360,87 @@ bool deliver_trap(struct lg_cpu *cpu, unsigned int num)
  * the other hypervisors would beat it up at lunchtime.
  *
  * This routine indicates if a particular trap number could be delivered
- * directly. */
+ * directly.
+ */
 static bool direct_trap(unsigned int num)
 {
-	/* Hardware interrupts don't go to the Guest at all (except system
-	 * call). */
+	/*
+	 * Hardware interrupts don't go to the Guest at all (except system
+	 * call).
+	 */
 	if (num >= FIRST_EXTERNAL_VECTOR && !could_be_syscall(num))
 		return false;
 
-	/* The Host needs to see page faults (for shadow paging and to save the
+	/*
+	 * The Host needs to see page faults (for shadow paging and to save the
 	 * fault address), general protection faults (in/out emulation) and
 	 * device not available (TS handling), invalid opcode fault (kvm hcall),
-	 * and of course, the hypercall trap. */
+	 * and of course, the hypercall trap.
+	 */
 	return num != 14 && num != 13 && num != 7 &&
 			num != 6 && num != LGUEST_TRAP_ENTRY;
 }
 /*:*/
 
-/*M:005 The Guest has the ability to turn its interrupt gates into trap gates,
+/*M:005
+ * The Guest has the ability to turn its interrupt gates into trap gates,
  * if it is careful.  The Host will let trap gates can go directly to the
  * Guest, but the Guest needs the interrupts atomically disabled for an
  * interrupt gate.  It can do this by pointing the trap gate at instructions
- * within noirq_start and noirq_end, where it can safely disable interrupts. */
+ * within noirq_start and noirq_end, where it can safely disable interrupts.
+ */
 
-/*M:006 The Guests do not use the sysenter (fast system call) instruction,
+/*M:006
+ * The Guests do not use the sysenter (fast system call) instruction,
  * because it's hardcoded to enter privilege level 0 and so can't go direct.
  * It's about twice as fast as the older "int 0x80" system call, so it might
  * still be worthwhile to handle it in the Switcher and lcall down to the
  * Guest.  The sysenter semantics are hairy tho: search for that keyword in
- * entry.S :*/
+ * entry.S
+:*/
 
-/*H:260 When we make traps go directly into the Guest, we need to make sure
+/*H:260
+ * When we make traps go directly into the Guest, we need to make sure
  * the kernel stack is valid (ie. mapped in the page tables).  Otherwise, the
  * CPU trying to deliver the trap will fault while trying to push the interrupt
  * words on the stack: this is called a double fault, and it forces us to kill
  * the Guest.
  *
- * Which is deeply unfair, because (literally!) it wasn't the Guests' fault. */
+ * Which is deeply unfair, because (literally!) it wasn't the Guests' fault.
+ */
 void pin_stack_pages(struct lg_cpu *cpu)
 {
 	unsigned int i;
 
-	/* Depending on the CONFIG_4KSTACKS option, the Guest can have one or
-	 * two pages of stack space. */
+	/*
+	 * Depending on the CONFIG_4KSTACKS option, the Guest can have one or
+	 * two pages of stack space.
+	 */
 	for (i = 0; i < cpu->lg->stack_pages; i++)
-		/* The stack grows *upwards*, so the address we're given is the
+		/*
+		 * The stack grows *upwards*, so the address we're given is the
 		 * start of the page after the kernel stack.  Subtract one to
 		 * get back onto the first stack page, and keep subtracting to
-		 * get to the rest of the stack pages. */
+		 * get to the rest of the stack pages.
+		 */
 		pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
 }
 
-/* Direct traps also mean that we need to know whenever the Guest wants to use
+/*
+ * Direct traps also mean that we need to know whenever the Guest wants to use
  * a different kernel stack, so we can change the IDT entries to use that
  * stack.  The IDT entries expect a virtual address, so unlike most addresses
  * the Guest gives us, the "esp" (stack pointer) value here is virtual, not
  * physical.
  *
  * In Linux each process has its own kernel stack, so this happens a lot: we
- * change stacks on each context switch. */
+ * change stacks on each context switch.
+ */
 void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
 {
-	/* You are not allowed have a stack segment with privilege level 0: bad
-	 * Guest! */
+	/*
+	 * You're not allowed a stack segment with privilege level 0: bad Guest!
+	 */
 	if ((seg & 0x3) != GUEST_PL)
 		kill_guest(cpu, "bad stack segment %i", seg);
 	/* We only expect one or two stack pages. */
@@ -387,11 +454,15 @@ void guest_set_stack(struct lg_cpu *cpu, u32 seg, u32 esp, unsigned int pages)
 	pin_stack_pages(cpu);
 }
 
-/* All this reference to mapping stacks leads us neatly into the other complex
- * part of the Host: page table handling. */
+/*
+ * All this reference to mapping stacks leads us neatly into the other complex
+ * part of the Host: page table handling.
+ */
 
-/*H:235 This is the routine which actually checks the Guest's IDT entry and
- * transfers it into the entry in "struct lguest": */
+/*H:235
+ * This is the routine which actually checks the Guest's IDT entry and
+ * transfers it into the entry in "struct lguest":
+ */
 static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
 		     unsigned int num, u32 lo, u32 hi)
 {
@@ -407,30 +478,38 @@ static void set_trap(struct lg_cpu *cpu, struct desc_struct *trap,
 	if (type != 0xE && type != 0xF)
 		kill_guest(cpu, "bad IDT type %i", type);
 
-	/* We only copy the handler address, present bit, privilege level and
+	/*
+	 * We only copy the handler address, present bit, privilege level and
 	 * type.  The privilege level controls where the trap can be triggered
 	 * manually with an "int" instruction.  This is usually GUEST_PL,
-	 * except for system calls which userspace can use. */
+	 * except for system calls which userspace can use.
+	 */
 	trap->a = ((__KERNEL_CS|GUEST_PL)<<16) | (lo&0x0000FFFF);
 	trap->b = (hi&0xFFFFEF00);
 }
 
-/*H:230 While we're here, dealing with delivering traps and interrupts to the
+/*H:230
+ * While we're here, dealing with delivering traps and interrupts to the
  * Guest, we might as well complete the picture: how the Guest tells us where
  * it wants them to go.  This would be simple, except making traps fast
  * requires some tricks.
  *
  * We saw the Guest setting Interrupt Descriptor Table (IDT) entries with the
- * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here. */
+ * LHCALL_LOAD_IDT_ENTRY hypercall before: that comes here.
+ */
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
 {
-	/* Guest never handles: NMI, doublefault, spurious interrupt or
-	 * hypercall.  We ignore when it tries to set them. */
+	/*
+	 * Guest never handles: NMI, doublefault, spurious interrupt or
+	 * hypercall.  We ignore when it tries to set them.
+	 */
 	if (num == 2 || num == 8 || num == 15 || num == LGUEST_TRAP_ENTRY)
 		return;
 
-	/* Mark the IDT as changed: next time the Guest runs we'll know we have
-	 * to copy this again. */
+	/*
+	 * Mark the IDT as changed: next time the Guest runs we'll know we have
+	 * to copy this again.
+	 */
 	cpu->changed |= CHANGED_IDT;
 
 	/* Check that the Guest doesn't try to step outside the bounds. */
@@ -440,9 +519,11 @@ void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int num, u32 lo, u32 hi)
 		set_trap(cpu, &cpu->arch.idt[num], num, lo, hi);
 }
 
-/* The default entry for each interrupt points into the Switcher routines which
+/*
+ * The default entry for each interrupt points into the Switcher routines which
  * simply return to the Host.  The run_guest() loop will then call
- * deliver_trap() to bounce it back into the Guest. */
+ * deliver_trap() to bounce it back into the Guest.
+ */
 static void default_idt_entry(struct desc_struct *idt,
 			      int trap,
 			      const unsigned long handler,
@@ -451,13 +532,17 @@ static void default_idt_entry(struct desc_struct *idt,
 	/* A present interrupt gate. */
 	u32 flags = 0x8e00;
 
-	/* Set the privilege level on the entry for the hypercall: this allows
-	 * the Guest to use the "int" instruction to trigger it. */
+	/*
+	 * Set the privilege level on the entry for the hypercall: this allows
+	 * the Guest to use the "int" instruction to trigger it.
+	 */
 	if (trap == LGUEST_TRAP_ENTRY)
 		flags |= (GUEST_PL << 13);
 	else if (base)
-		/* Copy priv. level from what Guest asked for.  This allows
-		 * debug (int 3) traps from Guest userspace, for example. */
+		/*
+		 * Copy privilege level from what Guest asked for.  This allows
+		 * debug (int 3) traps from Guest userspace, for example.
+		 */
 		flags |= (base->b & 0x6000);
 
 	/* Now pack it into the IDT entry in its weird format. */
@@ -475,16 +560,20 @@ void setup_default_idt_entries(struct lguest_ro_state *state,
 		default_idt_entry(&state->guest_idt[i], i, def[i], NULL);
 }
 
-/*H:240 We don't use the IDT entries in the "struct lguest" directly, instead
+/*H:240
+ * We don't use the IDT entries in the "struct lguest" directly, instead
  * we copy them into the IDT which we've set up for Guests on this CPU, just
- * before we run the Guest.  This routine does that copy. */
+ * before we run the Guest.  This routine does that copy.
+ */
 void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		const unsigned long *def)
 {
 	unsigned int i;
 
-	/* We can simply copy the direct traps, otherwise we use the default
-	 * ones in the Switcher: they will return to the Host. */
+	/*
+	 * We can simply copy the direct traps, otherwise we use the default
+	 * ones in the Switcher: they will return to the Host.
+	 */
 	for (i = 0; i < ARRAY_SIZE(cpu->arch.idt); i++) {
 		const struct desc_struct *gidt = &cpu->arch.idt[i];
 
@@ -492,14 +581,16 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
 		if (!direct_trap(i))
 			continue;
 
-		/* Only trap gates (type 15) can go direct to the Guest.
+		/*
+		 * Only trap gates (type 15) can go direct to the Guest.
 		 * Interrupt gates (type 14) disable interrupts as they are
 		 * entered, which we never let the Guest do.  Not present
 		 * entries (type 0x0) also can't go direct, of course.
 		 *
 		 * If it can't go direct, we still need to copy the priv. level:
 		 * they might want to give userspace access to a software
-		 * interrupt. */
+		 * interrupt.
+		 */
 		if (idt_type(gidt->a, gidt->b) == 0xF)
 			idt[i] = *gidt;
 		else
@@ -518,7 +609,8 @@ void copy_traps(const struct lg_cpu *cpu, struct desc_struct *idt,
  * the next timer interrupt (in nanoseconds).  We use the high-resolution timer
  * infrastructure to set a callback at that time.
  *
- * 0 means "turn off the clock". */
+ * 0 means "turn off the clock".
+ */
 void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
 {
 	ktime_t expires;
@@ -529,9 +621,11 @@ void guest_set_clockevent(struct lg_cpu *cpu, unsigned long delta)
 		return;
 	}
 
-	/* We use wallclock time here, so the Guest might not be running for
+	/*
+	 * We use wallclock time here, so the Guest might not be running for
 	 * all the time between now and the timer interrupt it asked for.  This
-	 * is almost always the right thing to do. */
+	 * is almost always the right thing to do.
+	 */
 	expires = ktime_add_ns(ktime_get_real(), delta);
 	hrtimer_start(&cpu->hrt, expires, HRTIMER_MODE_ABS);
 }
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 01c591923793..74c0db691b53 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -54,13 +54,13 @@ struct lg_cpu {
 
 	unsigned long pending_notify; /* pfn from LHCALL_NOTIFY */
 
-	/* At end of a page shared mapped over lguest_pages in guest.  */
+	/* At end of a page shared mapped over lguest_pages in guest. */
 	unsigned long regs_page;
 	struct lguest_regs *regs;
 
 	struct lguest_pages *last_pages;
 
-	int cpu_pgd; /* which pgd this cpu is currently using */
+	int cpu_pgd; /* Which pgd this cpu is currently using */
 
 	/* If a hypercall was asked for, this points to the arguments. */
 	struct hcall_args *hcall;
@@ -96,8 +96,11 @@ struct lguest
 	unsigned int nr_cpus;
 
 	u32 pfn_limit;
-	/* This provides the offset to the base of guest-physical
-	 * memory in the Launcher. */
+
+	/*
+	 * This provides the offset to the base of guest-physical memory in the
+	 * Launcher.
+	 */
 	void __user *mem_base;
 	unsigned long kernel_address;
 
@@ -122,11 +125,13 @@ bool lguest_address_ok(const struct lguest *lg,
 void __lgread(struct lg_cpu *, void *, unsigned long, unsigned);
 void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
 
-/*H:035 Using memory-copy operations like that is usually inconvient, so we
+/*H:035
+ * Using memory-copy operations like that is usually inconvient, so we
  * have the following helper macros which read and write a specific type (often
  * an unsigned long).
  *
- * This reads into a variable of the given type then returns that. */
+ * This reads into a variable of the given type then returns that.
+ */
 #define lgread(cpu, addr, type)						\
 	({ type _v; __lgread((cpu), &_v, (addr), sizeof(_v)); _v; })
 
@@ -140,9 +145,11 @@ void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
 
 int run_guest(struct lg_cpu *cpu, unsigned long __user *user);
 
-/* Helper macros to obtain the first 12 or the last 20 bits, this is only the
+/*
+ * Helper macros to obtain the first 12 or the last 20 bits, this is only the
  * first step in the migration to the kernel types.  pte_pfn is already defined
- * in the kernel. */
+ * in the kernel.
+ */
 #define pgd_flags(x)	(pgd_val(x) & ~PAGE_MASK)
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 #define pmd_flags(x)    (pmd_val(x) & ~PAGE_MASK)
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index e082cdac88b4..cc000e79c3d1 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -1,10 +1,12 @@
-/*P:050 Lguest guests use a very simple method to describe devices.  It's a
+/*P:050
+ * Lguest guests use a very simple method to describe devices.  It's a
  * series of device descriptors contained just above the top of normal Guest
  * memory.
  *
  * We use the standard "virtio" device infrastructure, which provides us with a
  * console, a network and a block driver.  Each one expects some configuration
- * information and a "virtqueue" or two to send and receive data. :*/
+ * information and a "virtqueue" or two to send and receive data.
+:*/
 #include <linux/init.h>
 #include <linux/bootmem.h>
 #include <linux/lguest_launcher.h>
@@ -20,8 +22,10 @@
 /* The pointer to our (page) of device descriptions. */
 static void *lguest_devices;
 
-/* For Guests, device memory can be used as normal memory, so we cast away the
- * __iomem to quieten sparse. */
+/*
+ * For Guests, device memory can be used as normal memory, so we cast away the
+ * __iomem to quieten sparse.
+ */
 static inline void *lguest_map(unsigned long phys_addr, unsigned long pages)
 {
 	return (__force void *)ioremap_cache(phys_addr, PAGE_SIZE*pages);
@@ -32,8 +36,10 @@ static inline void lguest_unmap(void *addr)
 	iounmap((__force void __iomem *)addr);
 }
 
-/*D:100 Each lguest device is just a virtio device plus a pointer to its entry
- * in the lguest_devices page. */
+/*D:100
+ * Each lguest device is just a virtio device plus a pointer to its entry
+ * in the lguest_devices page.
+ */
 struct lguest_device {
 	struct virtio_device vdev;
 
@@ -41,9 +47,11 @@ struct lguest_device {
 	struct lguest_device_desc *desc;
 };
 
-/* Since the virtio infrastructure hands us a pointer to the virtio_device all
+/*
+ * Since the virtio infrastructure hands us a pointer to the virtio_device all
  * the time, it helps to have a curt macro to get a pointer to the struct
- * lguest_device it's enclosed in.  */
+ * lguest_device it's enclosed in.
+ */
 #define to_lgdev(vd) container_of(vd, struct lguest_device, vdev)
 
 /*D:130
@@ -55,7 +63,8 @@ struct lguest_device {
  * the driver will look at them during setup.
  *
  * A convenient routine to return the device's virtqueue config array:
- * immediately after the descriptor. */
+ * immediately after the descriptor.
+ */
 static struct lguest_vqconfig *lg_vq(const struct lguest_device_desc *desc)
 {
 	return (void *)(desc + 1);
@@ -98,10 +107,12 @@ static u32 lg_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-/* The virtio core takes the features the Host offers, and copies the
- * ones supported by the driver into the vdev->features array.  Once
- * that's all sorted out, this routine is called so we can tell the
- * Host which features we understand and accept. */
+/*
+ * The virtio core takes the features the Host offers, and copies the ones
+ * supported by the driver into the vdev->features array.  Once that's all
+ * sorted out, this routine is called so we can tell the Host which features we
+ * understand and accept.
+ */
 static void lg_finalize_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
@@ -112,10 +123,11 @@ static void lg_finalize_features(struct virtio_device *vdev)
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
-	/* The vdev->feature array is a Linux bitmask: this isn't the
-	 * same as a the simple array of bits used by lguest devices
-	 * for features.  So we do this slow, manual conversion which is
-	 * completely general. */
+	/*
+	 * The vdev->feature array is a Linux bitmask: this isn't the same as a
+	 * the simple array of bits used by lguest devices for features.  So we
+	 * do this slow, manual conversion which is completely general.
+	 */
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
@@ -146,15 +158,19 @@ static void lg_set(struct virtio_device *vdev, unsigned int offset,
 	memcpy(lg_config(desc) + offset, buf, len);
 }
 
-/* The operations to get and set the status word just access the status field
- * of the device descriptor. */
+/*
+ * The operations to get and set the status word just access the status field
+ * of the device descriptor.
+ */
 static u8 lg_get_status(struct virtio_device *vdev)
 {
 	return to_lgdev(vdev)->desc->status;
 }
 
-/* To notify on status updates, we (ab)use the NOTIFY hypercall, with the
- * descriptor address of the device.  A zero status means "reset". */
+/*
+ * To notify on status updates, we (ab)use the NOTIFY hypercall, with the
+ * descriptor address of the device.  A zero status means "reset".
+ */
 static void set_status(struct virtio_device *vdev, u8 status)
 {
 	unsigned long offset = (void *)to_lgdev(vdev)->desc - lguest_devices;
@@ -200,13 +216,17 @@ struct lguest_vq_info
 	void *pages;
 };
 
-/* When the virtio_ring code wants to prod the Host, it calls us here and we
+/*
+ * When the virtio_ring code wants to prod the Host, it calls us here and we
  * make a hypercall.  We hand the physical address of the virtqueue so the Host
- * knows which virtqueue we're talking about. */
+ * knows which virtqueue we're talking about.
+ */
 static void lg_notify(struct virtqueue *vq)
 {
-	/* We store our virtqueue information in the "priv" pointer of the
-	 * virtqueue structure. */
+	/*
+	 * We store our virtqueue information in the "priv" pointer of the
+	 * virtqueue structure.
+	 */
 	struct lguest_vq_info *lvq = vq->priv;
 
 	kvm_hypercall1(LHCALL_NOTIFY, lvq->config.pfn << PAGE_SHIFT);
@@ -215,7 +235,8 @@ static void lg_notify(struct virtqueue *vq)
 /* An extern declaration inside a C file is bad form.  Don't do it. */
 extern void lguest_setup_irq(unsigned int irq);
 
-/* This routine finds the first virtqueue described in the configuration of
+/*
+ * This routine finds the first virtqueue described in the configuration of
  * this device and sets it up.
  *
  * This is kind of an ugly duckling.  It'd be nicer to have a standard
@@ -225,7 +246,8 @@ extern void lguest_setup_irq(unsigned int irq);
  * simpler for the Host to simply tell us where the pages are.
  *
  * So we provide drivers with a "find the Nth virtqueue and set it up"
- * function. */
+ * function.
+ */
 static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 				    unsigned index,
 				    void (*callback)(struct virtqueue *vq),
@@ -244,9 +266,11 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	if (!lvq)
 		return ERR_PTR(-ENOMEM);
 
-	/* Make a copy of the "struct lguest_vqconfig" entry, which sits after
+	/*
+	 * Make a copy of the "struct lguest_vqconfig" entry, which sits after
 	 * the descriptor.  We need a copy because the config space might not
-	 * be aligned correctly. */
+	 * be aligned correctly.
+	 */
 	memcpy(&lvq->config, lg_vq(ldev->desc)+index, sizeof(lvq->config));
 
 	printk("Mapping virtqueue %i addr %lx\n", index,
@@ -261,8 +285,10 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 		goto free_lvq;
 	}
 
-	/* OK, tell virtio_ring.c to set up a virtqueue now we know its size
-	 * and we've got a pointer to its pages. */
+	/*
+	 * OK, tell virtio_ring.c to set up a virtqueue now we know its size
+	 * and we've got a pointer to its pages.
+	 */
 	vq = vring_new_virtqueue(lvq->config.num, LGUEST_VRING_ALIGN,
 				 vdev, lvq->pages, lg_notify, callback, name);
 	if (!vq) {
@@ -273,18 +299,23 @@ static struct virtqueue *lg_find_vq(struct virtio_device *vdev,
 	/* Make sure the interrupt is allocated. */
 	lguest_setup_irq(lvq->config.irq);
 
-	/* Tell the interrupt for this virtqueue to go to the virtio_ring
-	 * interrupt handler. */
-	/* FIXME: We used to have a flag for the Host to tell us we could use
+	/*
+	 * Tell the interrupt for this virtqueue to go to the virtio_ring
+	 * interrupt handler.
+	 *
+	 * FIXME: We used to have a flag for the Host to tell us we could use
 	 * the interrupt as a source of randomness: it'd be nice to have that
-	 * back.. */
+	 * back.
+	 */
 	err = request_irq(lvq->config.irq, vring_interrupt, IRQF_SHARED,
 			  dev_name(&vdev->dev), vq);
 	if (err)
 		goto destroy_vring;
 
-	/* Last of all we hook up our 'struct lguest_vq_info" to the
-	 * virtqueue's priv pointer. */
+	/*
+	 * Last of all we hook up our 'struct lguest_vq_info" to the
+	 * virtqueue's priv pointer.
+	 */
 	vq->priv = lvq;
 	return vq;
 
@@ -358,11 +389,14 @@ static struct virtio_config_ops lguest_config_ops = {
 	.del_vqs = lg_del_vqs,
 };
 
-/* The root device for the lguest virtio devices.  This makes them appear as
- * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2. */
+/*
+ * The root device for the lguest virtio devices.  This makes them appear as
+ * /sys/devices/lguest/0,1,2 not /sys/devices/0,1,2.
+ */
 static struct device *lguest_root;
 
-/*D:120 This is the core of the lguest bus: actually adding a new device.
+/*D:120
+ * This is the core of the lguest bus: actually adding a new device.
  * It's a separate function because it's neater that way, and because an
  * earlier version of the code supported hotplug and unplug.  They were removed
  * early on because they were never used.
@@ -371,14 +405,14 @@ static struct device *lguest_root;
  *
  * It's worth reading this carefully: we start with a pointer to the new device
  * descriptor in the "lguest_devices" page, and the offset into the device
- * descriptor page so we can uniquely identify it if things go badly wrong. */
+ * descriptor page so we can uniquely identify it if things go badly wrong.
+ */
 static void add_lguest_device(struct lguest_device_desc *d,
 			      unsigned int offset)
 {
 	struct lguest_device *ldev;
 
-	/* Start with zeroed memory; Linux's device layer seems to count on
-	 * it. */
+	/* Start with zeroed memory; Linux's device layer counts on it. */
 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
 	if (!ldev) {
 		printk(KERN_EMERG "Cannot allocate lguest dev %u type %u\n",
@@ -390,15 +424,19 @@ static void add_lguest_device(struct lguest_device_desc *d,
 	ldev->vdev.dev.parent = lguest_root;
 	/* We have a unique device index thanks to the dev_index counter. */
 	ldev->vdev.id.device = d->type;
-	/* We have a simple set of routines for querying the device's
-	 * configuration information and setting its status. */
+	/*
+	 * We have a simple set of routines for querying the device's
+	 * configuration information and setting its status.
+	 */
 	ldev->vdev.config = &lguest_config_ops;
 	/* And we remember the device's descriptor for lguest_config_ops. */
 	ldev->desc = d;
 
-	/* register_virtio_device() sets up the generic fields for the struct
+	/*
+	 * register_virtio_device() sets up the generic fields for the struct
 	 * virtio_device and calls device_register().  This makes the bus
-	 * infrastructure look for a matching driver. */
+	 * infrastructure look for a matching driver.
+	 */
 	if (register_virtio_device(&ldev->vdev) != 0) {
 		printk(KERN_ERR "Failed to register lguest dev %u type %u\n",
 		       offset, d->type);
@@ -406,8 +444,10 @@ static void add_lguest_device(struct lguest_device_desc *d,
 	}
 }
 
-/*D:110 scan_devices() simply iterates through the device page.  The type 0 is
- * reserved to mean "end of devices". */
+/*D:110
+ * scan_devices() simply iterates through the device page.  The type 0 is
+ * reserved to mean "end of devices".
+ */
 static void scan_devices(void)
 {
 	unsigned int i;
@@ -426,7 +466,8 @@ static void scan_devices(void)
 	}
 }
 
-/*D:105 Fairly early in boot, lguest_devices_init() is called to set up the
+/*D:105
+ * Fairly early in boot, lguest_devices_init() is called to set up the
  * lguest device infrastructure.  We check that we are a Guest by checking
  * pv_info.name: there are other ways of checking, but this seems most
  * obvious to me.
@@ -437,7 +478,8 @@ static void scan_devices(void)
  * correct sysfs incantation).
  *
  * Finally we call scan_devices() which adds all the devices found in the
- * lguest_devices page. */
+ * lguest_devices page.
+ */
 static int __init lguest_devices_init(void)
 {
 	if (strcmp(pv_info.name, "lguest") != 0)
@@ -456,11 +498,13 @@ static int __init lguest_devices_init(void)
 /* We do this after core stuff, but before the drivers. */
 postcore_initcall(lguest_devices_init);
 
-/*D:150 At this point in the journey we used to now wade through the lguest
+/*D:150
+ * At this point in the journey we used to now wade through the lguest
  * devices themselves: net, block and console.  Since they're all now virtio
  * devices rather than lguest-specific, I've decided to ignore them.  Mostly,
  * they're kind of boring.  But this does mean you'll never experience the
  * thrill of reading the forbidden love scene buried deep in the block driver.
  *
  * "make Launcher" beckons, where we answer questions like "Where do Guests
- * come from?", and "What do you do when someone asks for optimization?". */
+ * come from?", and "What do you do when someone asks for optimization?".
+ */
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index 407722a8e0c4..7e92017103dc 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -1,8 +1,10 @@
-/*P:200 This contains all the /dev/lguest code, whereby the userspace launcher
+/*P:200
+ * This contains all the /dev/lguest code, whereby the userspace launcher
  * controls and communicates with the Guest.  For example, the first write will
  * tell us the Guest's memory layout, pagetable, entry point and kernel address
  * offset.  A read will run the Guest until something happens, such as a signal
- * or the Guest doing a NOTIFY out to the Launcher. :*/
+ * or the Guest doing a NOTIFY out to the Launcher.
+:*/
 #include <linux/uaccess.h>
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
@@ -37,8 +39,10 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
 	if (!addr)
 		return -EINVAL;
 
-	/* Replace the old array with the new one, carefully: others can
-	 * be accessing it at the same time */
+	/*
+	 * Replace the old array with the new one, carefully: others can
+	 * be accessing it at the same time.
+	 */
 	new = kmalloc(sizeof(*new) + sizeof(new->map[0]) * (old->num + 1),
 		      GFP_KERNEL);
 	if (!new)
@@ -61,8 +65,10 @@ static int add_eventfd(struct lguest *lg, unsigned long addr, int fd)
 	/* Now put new one in place. */
 	rcu_assign_pointer(lg->eventfds, new);
 
-	/* We're not in a big hurry.  Wait until noone's looking at old
-	 * version, then delete it. */
+	/*
+	 * We're not in a big hurry.  Wait until noone's looking at old
+	 * version, then delete it.
+	 */
 	synchronize_rcu();
 	kfree(old);
 
@@ -87,8 +93,10 @@ static int attach_eventfd(struct lguest *lg, const unsigned long __user *input)
 	return err;
 }
 
-/*L:050 Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
- * number to /dev/lguest. */
+/*L:050
+ * Sending an interrupt is done by writing LHREQ_IRQ and an interrupt
+ * number to /dev/lguest.
+ */
 static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
 {
 	unsigned long irq;
@@ -102,8 +110,10 @@ static int user_send_irq(struct lg_cpu *cpu, const unsigned long __user *input)
 	return 0;
 }
 
-/*L:040 Once our Guest is initialized, the Launcher makes it run by reading
- * from /dev/lguest. */
+/*L:040
+ * Once our Guest is initialized, the Launcher makes it run by reading
+ * from /dev/lguest.
+ */
 static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 {
 	struct lguest *lg = file->private_data;
@@ -139,8 +149,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 		return len;
 	}
 
-	/* If we returned from read() last time because the Guest sent I/O,
-	 * clear the flag. */
+	/*
+	 * If we returned from read() last time because the Guest sent I/O,
+	 * clear the flag.
+	 */
 	if (cpu->pending_notify)
 		cpu->pending_notify = 0;
 
@@ -148,8 +160,10 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
 	return run_guest(cpu, (unsigned long __user *)user);
 }
 
-/*L:025 This actually initializes a CPU.  For the moment, a Guest is only
- * uniprocessor, so "id" is always 0. */
+/*L:025
+ * This actually initializes a CPU.  For the moment, a Guest is only
+ * uniprocessor, so "id" is always 0.
+ */
 static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 {
 	/* We have a limited number the number of CPUs in the lguest struct. */
@@ -164,8 +178,10 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	/* Each CPU has a timer it can set. */
 	init_clockdev(cpu);
 
-	/* We need a complete page for the Guest registers: they are accessible
-	 * to the Guest and we can only grant it access to whole pages. */
+	/*
+	 * We need a complete page for the Guest registers: they are accessible
+	 * to the Guest and we can only grant it access to whole pages.
+	 */
 	cpu->regs_page = get_zeroed_page(GFP_KERNEL);
 	if (!cpu->regs_page)
 		return -ENOMEM;
@@ -173,29 +189,38 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
 	/* We actually put the registers at the bottom of the page. */
 	cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs);
 
-	/* Now we initialize the Guest's registers, handing it the start
-	 * address. */
+	/*
+	 * Now we initialize the Guest's registers, handing it the start
+	 * address.
+	 */
 	lguest_arch_setup_regs(cpu, start_ip);
 
-	/* We keep a pointer to the Launcher task (ie. current task) for when
-	 * other Guests want to wake this one (eg. console input). */
+	/*
+	 * We keep a pointer to the Launcher task (ie. current task) for when
+	 * other Guests want to wake this one (eg. console input).
+	 */
 	cpu->tsk = current;
 
-	/* We need to keep a pointer to the Launcher's memory map, because if
+	/*
+	 * We need to keep a pointer to the Launcher's memory map, because if
 	 * the Launcher dies we need to clean it up.  If we don't keep a
-	 * reference, it is destroyed before close() is called. */
+	 * reference, it is destroyed before close() is called.
+	 */
 	cpu->mm = get_task_mm(cpu->tsk);
 
-	/* We remember which CPU's pages this Guest used last, for optimization
-	 * when the same Guest runs on the same CPU twice. */
+	/*
+	 * We remember which CPU's pages this Guest used last, for optimization
+	 * when the same Guest runs on the same CPU twice.
+	 */
 	cpu->last_pages = NULL;
 
 	/* No error == success. */
 	return 0;
 }
 
-/*L:020 The initialization write supplies 3 pointer sized (32 or 64 bit)
- * values (in addition to the LHREQ_INITIALIZE value).  These are:
+/*L:020
+ * The initialization write supplies 3 pointer sized (32 or 64 bit) values (in
+ * addition to the LHREQ_INITIALIZE value).  These are:
  *
  * base: The start of the Guest-physical memory inside the Launcher memory.
  *
@@ -207,14 +232,15 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
  */
 static int initialize(struct file *file, const unsigned long __user *input)
 {
-	/* "struct lguest" contains everything we (the Host) know about a
-	 * Guest. */
+	/* "struct lguest" contains all we (the Host) know about a Guest. */
 	struct lguest *lg;
 	int err;
 	unsigned long args[3];
 
-	/* We grab the Big Lguest lock, which protects against multiple
-	 * simultaneous initializations. */
+	/*
+	 * We grab the Big Lguest lock, which protects against multiple
+	 * simultaneous initializations.
+	 */
 	mutex_lock(&lguest_lock);
 	/* You can't initialize twice!  Close the device and start again... */
 	if (file->private_data) {
@@ -249,8 +275,10 @@ static int initialize(struct file *file, const unsigned long __user *input)
 	if (err)
 		goto free_eventfds;
 
-	/* Initialize the Guest's shadow page tables, using the toplevel
-	 * address the Launcher gave us.  This allocates memory, so can fail. */
+	/*
+	 * Initialize the Guest's shadow page tables, using the toplevel
+	 * address the Launcher gave us.  This allocates memory, so can fail.
+	 */
 	err = init_guest_pagetable(lg);
 	if (err)
 		goto free_regs;
@@ -275,7 +303,8 @@ unlock:
 	return err;
 }
 
-/*L:010 The first operation the Launcher does must be a write.  All writes
+/*L:010
+ * The first operation the Launcher does must be a write.  All writes
  * start with an unsigned long number: for the first write this must be
  * LHREQ_INITIALIZE to set up the Guest.  After that the Launcher can use
  * writes of other values to send interrupts.
@@ -283,12 +312,15 @@ unlock:
  * Note that we overload the "offset" in the /dev/lguest file to indicate what
  * CPU number we're dealing with.  Currently this is always 0, since we only
  * support uniprocessor Guests, but you can see the beginnings of SMP support
- * here. */
+ * here.
+ */
 static ssize_t write(struct file *file, const char __user *in,
 		     size_t size, loff_t *off)
 {
-	/* Once the Guest is initialized, we hold the "struct lguest" in the
-	 * file private data. */
+	/*
+	 * Once the Guest is initialized, we hold the "struct lguest" in the
+	 * file private data.
+	 */
 	struct lguest *lg = file->private_data;
 	const unsigned long __user *input = (const unsigned long __user *)in;
 	unsigned long req;
@@ -323,13 +355,15 @@ static ssize_t write(struct file *file, const char __user *in,
 	}
 }
 
-/*L:060 The final piece of interface code is the close() routine.  It reverses
+/*L:060
+ * The final piece of interface code is the close() routine.  It reverses
  * everything done in initialize().  This is usually called because the
  * Launcher exited.
  *
  * Note that the close routine returns 0 or a negative error number: it can't
  * really fail, but it can whine.  I blame Sun for this wart, and K&R C for
- * letting them do it. :*/
+ * letting them do it.
+:*/
 static int close(struct inode *inode, struct file *file)
 {
 	struct lguest *lg = file->private_data;
@@ -339,8 +373,10 @@ static int close(struct inode *inode, struct file *file)
 	if (!lg)
 		return 0;
 
-	/* We need the big lock, to protect from inter-guest I/O and other
-	 * Launchers initializing guests. */
+	/*
+	 * We need the big lock, to protect from inter-guest I/O and other
+	 * Launchers initializing guests.
+	 */
 	mutex_lock(&lguest_lock);
 
 	/* Free up the shadow page tables for the Guest. */
@@ -351,8 +387,10 @@ static int close(struct inode *inode, struct file *file)
 		hrtimer_cancel(&lg->cpus[i].hrt);
 		/* We can free up the register page we allocated. */
 		free_page(lg->cpus[i].regs_page);
-		/* Now all the memory cleanups are done, it's safe to release
-		 * the Launcher's memory management structure. */
+		/*
+		 * Now all the memory cleanups are done, it's safe to release
+		 * the Launcher's memory management structure.
+		 */
 		mmput(lg->cpus[i].mm);
 	}
 
@@ -361,8 +399,10 @@ static int close(struct inode *inode, struct file *file)
 		eventfd_ctx_put(lg->eventfds->map[i].event);
 	kfree(lg->eventfds);
 
-	/* If lg->dead doesn't contain an error code it will be NULL or a
-	 * kmalloc()ed string, either of which is ok to hand to kfree(). */
+	/*
+	 * If lg->dead doesn't contain an error code it will be NULL or a
+	 * kmalloc()ed string, either of which is ok to hand to kfree().
+	 */
 	if (!IS_ERR(lg->dead))
 		kfree(lg->dead);
 	/* Free the memory allocated to the lguest_struct */
@@ -386,7 +426,8 @@ static int close(struct inode *inode, struct file *file)
  *
  * We begin our understanding with the Host kernel interface which the Launcher
  * uses: reading and writing a character device called /dev/lguest.  All the
- * work happens in the read(), write() and close() routines: */
+ * work happens in the read(), write() and close() routines:
+ */
 static struct file_operations lguest_fops = {
 	.owner	 = THIS_MODULE,
 	.release = close,
@@ -394,8 +435,10 @@ static struct file_operations lguest_fops = {
 	.read	 = read,
 };
 
-/* This is a textbook example of a "misc" character device.  Populate a "struct
- * miscdevice" and register it with misc_register(). */
+/*
+ * This is a textbook example of a "misc" character device.  Populate a "struct
+ * miscdevice" and register it with misc_register().
+ */
 static struct miscdevice lguest_dev = {
 	.minor	= MISC_DYNAMIC_MINOR,
 	.name	= "lguest",
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a6fe1abda240..3da902e4b4cb 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -1,9 +1,11 @@
-/*P:700 The pagetable code, on the other hand, still shows the scars of
+/*P:700
+ * The pagetable code, on the other hand, still shows the scars of
  * previous encounters.  It's functional, and as neat as it can be in the
  * circumstances, but be wary, for these things are subtle and break easily.
  * The Guest provides a virtual to physical mapping, but we can neither trust
  * it nor use it: we verify and convert it here then point the CPU to the
- * converted Guest pages when running the Guest. :*/
+ * converted Guest pages when running the Guest.
+:*/
 
 /* Copyright (C) Rusty Russell IBM Corporation 2006.
  * GPL v2 and any later version */
@@ -17,10 +19,12 @@
 #include <asm/bootparam.h>
 #include "lg.h"
 
-/*M:008 We hold reference to pages, which prevents them from being swapped.
+/*M:008
+ * We hold reference to pages, which prevents them from being swapped.
  * It'd be nice to have a callback in the "struct mm_struct" when Linux wants
  * to swap out.  If we had this, and a shrinker callback to trim PTE pages, we
- * could probably consider launching Guests as non-root. :*/
+ * could probably consider launching Guests as non-root.
+:*/
 
 /*H:300
  * The Page Table Code
@@ -45,16 +49,19 @@
  *  (v) Flushing (throwing away) page tables,
  *  (vi) Mapping the Switcher when the Guest is about to run,
  *  (vii) Setting up the page tables initially.
- :*/
+:*/
 
-
-/* 1024 entries in a page table page maps 1024 pages: 4MB.  The Switcher is
+/*
+ * 1024 entries in a page table page maps 1024 pages: 4MB.  The Switcher is
  * conveniently placed at the top 4MB, so it uses a separate, complete PTE
- * page.  */
+ * page.
+ */
 #define SWITCHER_PGD_INDEX (PTRS_PER_PGD - 1)
 
-/* For PAE we need the PMD index as well. We use the last 2MB, so we
- * will need the last pmd entry of the last pmd page.  */
+/*
+ * For PAE we need the PMD index as well. We use the last 2MB, so we
+ * will need the last pmd entry of the last pmd page.
+ */
 #ifdef CONFIG_X86_PAE
 #define SWITCHER_PMD_INDEX 	(PTRS_PER_PMD - 1)
 #define RESERVE_MEM 		2U
@@ -64,13 +71,16 @@
 #define CHECK_GPGD_MASK		_PAGE_TABLE
 #endif
 
-/* We actually need a separate PTE page for each CPU.  Remember that after the
+/*
+ * We actually need a separate PTE page for each CPU.  Remember that after the
  * Switcher code itself comes two pages for each CPU, and we don't want this
- * CPU's guest to see the pages of any other CPU. */
+ * CPU's guest to see the pages of any other CPU.
+ */
 static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
 #define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
 
-/*H:320 The page table code is curly enough to need helper functions to keep it
+/*H:320
+ * The page table code is curly enough to need helper functions to keep it
  * clear and clean.
  *
  * There are two functions which return pointers to the shadow (aka "real")
@@ -79,7 +89,8 @@ static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
  * spgd_addr() takes the virtual address and returns a pointer to the top-level
  * page directory entry (PGD) for that address.  Since we keep track of several
  * page tables, the "i" argument tells us which one we're interested in (it's
- * usually the current one). */
+ * usually the current one).
+ */
 static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 {
 	unsigned int index = pgd_index(vaddr);
@@ -96,9 +107,11 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
 }
 
 #ifdef CONFIG_X86_PAE
-/* This routine then takes the PGD entry given above, which contains the
+/*
+ * This routine then takes the PGD entry given above, which contains the
  * address of the PMD page.  It then returns a pointer to the PMD entry for the
- * given address. */
+ * given address.
+ */
 static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 {
 	unsigned int index = pmd_index(vaddr);
@@ -119,9 +132,11 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 }
 #endif
 
-/* This routine then takes the page directory entry returned above, which
+/*
+ * This routine then takes the page directory entry returned above, which
  * contains the address of the page table entry (PTE) page.  It then returns a
- * pointer to the PTE entry for the given address. */
+ * pointer to the PTE entry for the given address.
+ */
 static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 {
 #ifdef CONFIG_X86_PAE
@@ -139,8 +154,10 @@ static pte_t *spte_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
 	return &page[pte_index(vaddr)];
 }
 
-/* These two functions just like the above two, except they access the Guest
- * page tables.  Hence they return a Guest address. */
+/*
+ * These two functions just like the above two, except they access the Guest
+ * page tables.  Hence they return a Guest address.
+ */
 static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
 {
 	unsigned int index = vaddr >> (PGDIR_SHIFT);
@@ -175,17 +192,21 @@ static unsigned long gpte_addr(struct lg_cpu *cpu,
 #endif
 /*:*/
 
-/*M:014 get_pfn is slow: we could probably try to grab batches of pages here as
- * an optimization (ie. pre-faulting). :*/
+/*M:014
+ * get_pfn is slow: we could probably try to grab batches of pages here as
+ * an optimization (ie. pre-faulting).
+:*/
 
-/*H:350 This routine takes a page number given by the Guest and converts it to
+/*H:350
+ * This routine takes a page number given by the Guest and converts it to
  * an actual, physical page number.  It can fail for several reasons: the
  * virtual address might not be mapped by the Launcher, the write flag is set
  * and the page is read-only, or the write flag was set and the page was
  * shared so had to be copied, but we ran out of memory.
  *
  * This holds a reference to the page, so release_pte() is careful to put that
- * back. */
+ * back.
+ */
 static unsigned long get_pfn(unsigned long virtpfn, int write)
 {
 	struct page *page;
@@ -198,33 +219,41 @@ static unsigned long get_pfn(unsigned long virtpfn, int write)
 	return -1UL;
 }
 
-/*H:340 Converting a Guest page table entry to a shadow (ie. real) page table
+/*H:340
+ * Converting a Guest page table entry to a shadow (ie. real) page table
  * entry can be a little tricky.  The flags are (almost) the same, but the
  * Guest PTE contains a virtual page number: the CPU needs the real page
- * number. */
+ * number.
+ */
 static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write)
 {
 	unsigned long pfn, base, flags;
 
-	/* The Guest sets the global flag, because it thinks that it is using
+	/*
+	 * The Guest sets the global flag, because it thinks that it is using
 	 * PGE.  We only told it to use PGE so it would tell us whether it was
 	 * flushing a kernel mapping or a userspace mapping.  We don't actually
-	 * use the global bit, so throw it away. */
+	 * use the global bit, so throw it away.
+	 */
 	flags = (pte_flags(gpte) & ~_PAGE_GLOBAL);
 
 	/* The Guest's pages are offset inside the Launcher. */
 	base = (unsigned long)cpu->lg->mem_base / PAGE_SIZE;
 
-	/* We need a temporary "unsigned long" variable to hold the answer from
+	/*
+	 * We need a temporary "unsigned long" variable to hold the answer from
 	 * get_pfn(), because it returns 0xFFFFFFFF on failure, which wouldn't
 	 * fit in spte.pfn.  get_pfn() finds the real physical number of the
-	 * page, given the virtual number. */
+	 * page, given the virtual number.
+	 */
 	pfn = get_pfn(base + pte_pfn(gpte), write);
 	if (pfn == -1UL) {
 		kill_guest(cpu, "failed to get page %lu", pte_pfn(gpte));
-		/* When we destroy the Guest, we'll go through the shadow page
+		/*
+		 * When we destroy the Guest, we'll go through the shadow page
 		 * tables and release_pte() them.  Make sure we don't think
-		 * this one is valid! */
+		 * this one is valid!
+		 */
 		flags = 0;
 	}
 	/* Now we assemble our shadow PTE from the page number and flags. */
@@ -234,8 +263,10 @@ static pte_t gpte_to_spte(struct lg_cpu *cpu, pte_t gpte, int write)
 /*H:460 And to complete the chain, release_pte() looks like this: */
 static void release_pte(pte_t pte)
 {
-	/* Remember that get_user_pages_fast() took a reference to the page, in
-	 * get_pfn()?  We have to put it back now. */
+	/*
+	 * Remember that get_user_pages_fast() took a reference to the page, in
+	 * get_pfn()?  We have to put it back now.
+	 */
 	if (pte_flags(pte) & _PAGE_PRESENT)
 		put_page(pte_page(pte));
 }
@@ -273,7 +304,8 @@ static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
  * and return to the Guest without it knowing.
  *
  * If we fixed up the fault (ie. we mapped the address), this routine returns
- * true.  Otherwise, it was a real fault and we need to tell the Guest. */
+ * true.  Otherwise, it was a real fault and we need to tell the Guest.
+ */
 bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 {
 	pgd_t gpgd;
@@ -298,22 +330,26 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 	if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
 		/* No shadow entry: allocate a new shadow PTE page. */
 		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
-		/* This is not really the Guest's fault, but killing it is
-		 * simple for this corner case. */
+		/*
+		 * This is not really the Guest's fault, but killing it is
+		 * simple for this corner case.
+		 */
 		if (!ptepage) {
 			kill_guest(cpu, "out of memory allocating pte page");
 			return false;
 		}
 		/* We check that the Guest pgd is OK. */
 		check_gpgd(cpu, gpgd);
-		/* And we copy the flags to the shadow PGD entry.  The page
-		 * number in the shadow PGD is the page we just allocated. */
+		/*
+		 * And we copy the flags to the shadow PGD entry.  The page
+		 * number in the shadow PGD is the page we just allocated.
+		 */
 		set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
 	}
 
 #ifdef CONFIG_X86_PAE
 	gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
-	/* middle level not present?  We can't map it in. */
+	/* Middle level not present?  We can't map it in. */
 	if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
 		return false;
 
@@ -324,8 +360,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		/* No shadow entry: allocate a new shadow PTE page. */
 		unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
 
-		/* This is not really the Guest's fault, but killing it is
-		* simple for this corner case. */
+		/*
+		 * This is not really the Guest's fault, but killing it is
+		 * simple for this corner case.
+		 */
 		if (!ptepage) {
 			kill_guest(cpu, "out of memory allocating pte page");
 			return false;
@@ -334,17 +372,23 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 		/* We check that the Guest pmd is OK. */
 		check_gpmd(cpu, gpmd);
 
-		/* And we copy the flags to the shadow PMD entry.  The page
-		 * number in the shadow PMD is the page we just allocated. */
+		/*
+		 * And we copy the flags to the shadow PMD entry.  The page
+		 * number in the shadow PMD is the page we just allocated.
+		 */
 		native_set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
 	}
 
-	/* OK, now we look at the lower level in the Guest page table: keep its
-	 * address, because we might update it later. */
+	/*
+	 * OK, now we look at the lower level in the Guest page table: keep its
+	 * address, because we might update it later.
+	 */
 	gpte_ptr = gpte_addr(cpu, gpmd, vaddr);
 #else
-	/* OK, now we look at the lower level in the Guest page table: keep its
-	 * address, because we might update it later. */
+	/*
+	 * OK, now we look at the lower level in the Guest page table: keep its
+	 * address, because we might update it later.
+	 */
 	gpte_ptr = gpte_addr(cpu, gpgd, vaddr);
 #endif
 	gpte = lgread(cpu, gpte_ptr, pte_t);
@@ -353,8 +397,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 	if (!(pte_flags(gpte) & _PAGE_PRESENT))
 		return false;
 
-	/* Check they're not trying to write to a page the Guest wants
-	 * read-only (bit 2 of errcode == write). */
+	/*
+	 * Check they're not trying to write to a page the Guest wants
+	 * read-only (bit 2 of errcode == write).
+	 */
 	if ((errcode & 2) && !(pte_flags(gpte) & _PAGE_RW))
 		return false;
 
@@ -362,8 +408,10 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 	if ((errcode & 4) && !(pte_flags(gpte) & _PAGE_USER))
 		return false;
 
-	/* Check that the Guest PTE flags are OK, and the page number is below
-	 * the pfn_limit (ie. not mapping the Launcher binary). */
+	/*
+	 * Check that the Guest PTE flags are OK, and the page number is below
+	 * the pfn_limit (ie. not mapping the Launcher binary).
+	 */
 	check_gpte(cpu, gpte);
 
 	/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
@@ -373,29 +421,40 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
 
 	/* Get the pointer to the shadow PTE entry we're going to set. */
 	spte = spte_addr(cpu, *spgd, vaddr);
-	/* If there was a valid shadow PTE entry here before, we release it.
-	 * This can happen with a write to a previously read-only entry. */
+
+	/*
+	 * If there was a valid shadow PTE entry here before, we release it.
+	 * This can happen with a write to a previously read-only entry.
+	 */
 	release_pte(*spte);
 
-	/* If this is a write, we insist that the Guest page is writable (the
-	 * final arg to gpte_to_spte()). */
+	/*
+	 * If this is a write, we insist that the Guest page is writable (the
+	 * final arg to gpte_to_spte()).
+	 */
 	if (pte_dirty(gpte))
 		*spte = gpte_to_spte(cpu, gpte, 1);
 	else
-		/* If this is a read, don't set the "writable" bit in the page
+		/*
+		 * If this is a read, don't set the "writable" bit in the page
 		 * table entry, even if the Guest says it's writable.  That way
 		 * we will come back here when a write does actually occur, so
-		 * we can update the Guest's _PAGE_DIRTY flag. */
+		 * we can update the Guest's _PAGE_DIRTY flag.
+		 */
 		native_set_pte(spte, gpte_to_spte(cpu, pte_wrprotect(gpte), 0));
 
-	/* Finally, we write the Guest PTE entry back: we've set the
-	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags. */
+	/*
+	 * Finally, we write the Guest PTE entry back: we've set the
+	 * _PAGE_ACCESSED and maybe the _PAGE_DIRTY flags.
+	 */
 	lgwrite(cpu, gpte_ptr, pte_t, gpte);
 
-	/* The fault is fixed, the page table is populated, the mapping
+	/*
+	 * The fault is fixed, the page table is populated, the mapping
 	 * manipulated, the result returned and the code complete.  A small
 	 * delay and a trace of alliteration are the only indications the Guest
-	 * has that a page fault occurred at all. */
+	 * has that a page fault occurred at all.
+	 */
 	return true;
 }
 
@@ -408,7 +467,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
  * mapped, so it's overkill.
  *
  * This is a quick version which answers the question: is this virtual address
- * mapped by the shadow page tables, and is it writable? */
+ * mapped by the shadow page tables, and is it writable?
+ */
 static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 {
 	pgd_t *spgd;
@@ -428,16 +488,20 @@ static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
 		return false;
 #endif
 
-	/* Check the flags on the pte entry itself: it must be present and
-	 * writable. */
+	/*
+	 * Check the flags on the pte entry itself: it must be present and
+	 * writable.
+	 */
 	flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
 
 	return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
 }
 
-/* So, when pin_stack_pages() asks us to pin a page, we check if it's already
+/*
+ * So, when pin_stack_pages() asks us to pin a page, we check if it's already
  * in the page tables, and if not, we call demand_page() with error code 2
- * (meaning "write"). */
+ * (meaning "write").
+ */
 void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
 {
 	if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
@@ -485,9 +549,11 @@ static void release_pgd(pgd_t *spgd)
 	/* If the entry's not present, there's nothing to release. */
 	if (pgd_flags(*spgd) & _PAGE_PRESENT) {
 		unsigned int i;
-		/* Converting the pfn to find the actual PTE page is easy: turn
+		/*
+		 * Converting the pfn to find the actual PTE page is easy: turn
 		 * the page number into a physical address, then convert to a
-		 * virtual address (easy for kernel pages like this one). */
+		 * virtual address (easy for kernel pages like this one).
+		 */
 		pte_t *ptepage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
 		/* For each entry in the page, we might need to release it. */
 		for (i = 0; i < PTRS_PER_PTE; i++)
@@ -499,9 +565,12 @@ static void release_pgd(pgd_t *spgd)
 	}
 }
 #endif
-/*H:445 We saw flush_user_mappings() twice: once from the flush_user_mappings()
+
+/*H:445
+ * We saw flush_user_mappings() twice: once from the flush_user_mappings()
  * hypercall and once in new_pgdir() when we re-used a top-level pgdir page.
- * It simply releases every PTE page from 0 up to the Guest's kernel address. */
+ * It simply releases every PTE page from 0 up to the Guest's kernel address.
+ */
 static void flush_user_mappings(struct lguest *lg, int idx)
 {
 	unsigned int i;
@@ -510,10 +579,12 @@ static void flush_user_mappings(struct lguest *lg, int idx)
 		release_pgd(lg->pgdirs[idx].pgdir + i);
 }
 
-/*H:440 (v) Flushing (throwing away) page tables,
+/*H:440
+ * (v) Flushing (throwing away) page tables,
  *
  * The Guest has a hypercall to throw away the page tables: it's used when a
- * large number of mappings have been changed. */
+ * large number of mappings have been changed.
+ */
 void guest_pagetable_flush_user(struct lg_cpu *cpu)
 {
 	/* Drop the userspace part of the current page table. */
@@ -551,9 +622,11 @@ unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
 	return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
 }
 
-/* We keep several page tables.  This is a simple routine to find the page
+/*
+ * We keep several page tables.  This is a simple routine to find the page
  * table (if any) corresponding to this top-level address the Guest has given
- * us. */
+ * us.
+ */
 static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
 {
 	unsigned int i;
@@ -563,9 +636,11 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
 	return i;
 }
 
-/*H:435 And this is us, creating the new page directory.  If we really do
+/*H:435
+ * And this is us, creating the new page directory.  If we really do
  * allocate a new one (and so the kernel parts are not there), we set
- * blank_pgdir. */
+ * blank_pgdir.
+ */
 static unsigned int new_pgdir(struct lg_cpu *cpu,
 			      unsigned long gpgdir,
 			      int *blank_pgdir)
@@ -575,8 +650,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 	pmd_t *pmd_table;
 #endif
 
-	/* We pick one entry at random to throw out.  Choosing the Least
-	 * Recently Used might be better, but this is easy. */
+	/*
+	 * We pick one entry at random to throw out.  Choosing the Least
+	 * Recently Used might be better, but this is easy.
+	 */
 	next = random32() % ARRAY_SIZE(cpu->lg->pgdirs);
 	/* If it's never been allocated at all before, try now. */
 	if (!cpu->lg->pgdirs[next].pgdir) {
@@ -587,8 +664,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 			next = cpu->cpu_pgd;
 		else {
 #ifdef CONFIG_X86_PAE
-			/* In PAE mode, allocate a pmd page and populate the
-			 * last pgd entry. */
+			/*
+			 * In PAE mode, allocate a pmd page and populate the
+			 * last pgd entry.
+			 */
 			pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
 			if (!pmd_table) {
 				free_page((long)cpu->lg->pgdirs[next].pgdir);
@@ -598,8 +677,10 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 				set_pgd(cpu->lg->pgdirs[next].pgdir +
 					SWITCHER_PGD_INDEX,
 					__pgd(__pa(pmd_table) | _PAGE_PRESENT));
-				/* This is a blank page, so there are no kernel
-				 * mappings: caller must map the stack! */
+				/*
+				 * This is a blank page, so there are no kernel
+				 * mappings: caller must map the stack!
+				 */
 				*blank_pgdir = 1;
 			}
 #else
@@ -615,19 +696,23 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
 	return next;
 }
 
-/*H:430 (iv) Switching page tables
+/*H:430
+ * (iv) Switching page tables
  *
  * Now we've seen all the page table setting and manipulation, let's see
  * what happens when the Guest changes page tables (ie. changes the top-level
- * pgdir).  This occurs on almost every context switch. */
+ * pgdir).  This occurs on almost every context switch.
+ */
 void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
 {
 	int newpgdir, repin = 0;
 
 	/* Look to see if we have this one already. */
 	newpgdir = find_pgdir(cpu->lg, pgtable);
-	/* If not, we allocate or mug an existing one: if it's a fresh one,
-	 * repin gets set to 1. */
+	/*
+	 * If not, we allocate or mug an existing one: if it's a fresh one,
+	 * repin gets set to 1.
+	 */
 	if (newpgdir == ARRAY_SIZE(cpu->lg->pgdirs))
 		newpgdir = new_pgdir(cpu, pgtable, &repin);
 	/* Change the current pgd index to the new one. */
@@ -637,9 +722,11 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
 		pin_stack_pages(cpu);
 }
 
-/*H:470 Finally, a routine which throws away everything: all PGD entries in all
+/*H:470
+ * Finally, a routine which throws away everything: all PGD entries in all
  * the shadow page tables, including the Guest's kernel mappings.  This is used
- * when we destroy the Guest. */
+ * when we destroy the Guest.
+ */
 static void release_all_pagetables(struct lguest *lg)
 {
 	unsigned int i, j;
@@ -656,8 +743,10 @@ static void release_all_pagetables(struct lguest *lg)
 			spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
 			pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
 
-			/* And release the pmd entries of that pmd page,
-			 * except for the switcher pmd. */
+			/*
+			 * And release the pmd entries of that pmd page,
+			 * except for the switcher pmd.
+			 */
 			for (k = 0; k < SWITCHER_PMD_INDEX; k++)
 				release_pmd(&pmdpage[k]);
 #endif
@@ -667,10 +756,12 @@ static void release_all_pagetables(struct lguest *lg)
 		}
 }
 
-/* We also throw away everything when a Guest tells us it's changed a kernel
+/*
+ * We also throw away everything when a Guest tells us it's changed a kernel
  * mapping.  Since kernel mappings are in every page table, it's easiest to
  * throw them all away.  This traps the Guest in amber for a while as
- * everything faults back in, but it's rare. */
+ * everything faults back in, but it's rare.
+ */
 void guest_pagetable_clear_all(struct lg_cpu *cpu)
 {
 	release_all_pagetables(cpu->lg);
@@ -678,15 +769,19 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
 	pin_stack_pages(cpu);
 }
 /*:*/
-/*M:009 Since we throw away all mappings when a kernel mapping changes, our
+
+/*M:009
+ * Since we throw away all mappings when a kernel mapping changes, our
  * performance sucks for guests using highmem.  In fact, a guest with
  * PAGE_OFFSET 0xc0000000 (the default) and more than about 700MB of RAM is
  * usually slower than a Guest with less memory.
  *
  * This, of course, cannot be fixed.  It would take some kind of... well, I
- * don't know, but the term "puissant code-fu" comes to mind. :*/
+ * don't know, but the term "puissant code-fu" comes to mind.
+:*/
 
-/*H:420 This is the routine which actually sets the page table entry for then
+/*H:420
+ * This is the routine which actually sets the page table entry for then
  * "idx"'th shadow page table.
  *
  * Normally, we can just throw out the old entry and replace it with 0: if they
@@ -715,31 +810,36 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
 		spmd = spmd_addr(cpu, *spgd, vaddr);
 		if (pmd_flags(*spmd) & _PAGE_PRESENT) {
 #endif
-			/* Otherwise, we start by releasing
-			 * the existing entry. */
+			/* Otherwise, start by releasing the existing entry. */
 			pte_t *spte = spte_addr(cpu, *spgd, vaddr);
 			release_pte(*spte);
 
-			/* If they're setting this entry as dirty or accessed,
-			 * we might as well put that entry they've given us
-			 * in now.  This shaves 10% off a
-			 * copy-on-write micro-benchmark. */
+			/*
+			 * If they're setting this entry as dirty or accessed,
+			 * we might as well put that entry they've given us in
+			 * now.  This shaves 10% off a copy-on-write
+			 * micro-benchmark.
+			 */
 			if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
 				check_gpte(cpu, gpte);
 				native_set_pte(spte,
 						gpte_to_spte(cpu, gpte,
 						pte_flags(gpte) & _PAGE_DIRTY));
-			} else
-				/* Otherwise kill it and we can demand_page()
-				 * it in later. */
+			} else {
+				/*
+				 * Otherwise kill it and we can demand_page()
+				 * it in later.
+				 */
 				native_set_pte(spte, __pte(0));
+			}
 #ifdef CONFIG_X86_PAE
 		}
 #endif
 	}
 }
 
-/*H:410 Updating a PTE entry is a little trickier.
+/*H:410
+ * Updating a PTE entry is a little trickier.
  *
  * We keep track of several different page tables (the Guest uses one for each
  * process, so it makes sense to cache at least a few).  Each of these have
@@ -748,12 +848,15 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
  * all the page tables, not just the current one.  This is rare.
  *
  * The benefit is that when we have to track a new page table, we can keep all
- * the kernel mappings.  This speeds up context switch immensely. */
+ * the kernel mappings.  This speeds up context switch immensely.
+ */
 void guest_set_pte(struct lg_cpu *cpu,
 		   unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
 {
-	/* Kernel mappings must be changed on all top levels.  Slow, but doesn't
-	 * happen often. */
+	/*
+	 * Kernel mappings must be changed on all top levels.  Slow, but doesn't
+	 * happen often.
+	 */
 	if (vaddr >= cpu->lg->kernel_address) {
 		unsigned int i;
 		for (i = 0; i < ARRAY_SIZE(cpu->lg->pgdirs); i++)
@@ -802,12 +905,14 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
 }
 #endif
 
-/* Once we know how much memory we have we can construct simple identity
- * (which set virtual == physical) and linear mappings
- * which will get the Guest far enough into the boot to create its own.
+/*
+ * Once we know how much memory we have we can construct simple identity (which
+ * set virtual == physical) and linear mappings which will get the Guest far
+ * enough into the boot to create its own.
  *
  * We lay them out of the way, just below the initrd (which is why we need to
- * know its size here). */
+ * know its size here).
+ */
 static unsigned long setup_pagetables(struct lguest *lg,
 				      unsigned long mem,
 				      unsigned long initrd_size)
@@ -825,8 +930,10 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	unsigned int phys_linear;
 #endif
 
-	/* We have mapped_pages frames to map, so we need
-	 * linear_pages page tables to map them. */
+	/*
+	 * We have mapped_pages frames to map, so we need linear_pages page
+	 * tables to map them.
+	 */
 	mapped_pages = mem / PAGE_SIZE;
 	linear_pages = (mapped_pages + PTRS_PER_PTE - 1) / PTRS_PER_PTE;
 
@@ -839,8 +946,10 @@ static unsigned long setup_pagetables(struct lguest *lg,
 #ifdef CONFIG_X86_PAE
 	pmds = (void *)linear - PAGE_SIZE;
 #endif
-	/* Linear mapping is easy: put every page's address into the
-	 * mapping in order. */
+	/*
+	 * Linear mapping is easy: put every page's address into the
+	 * mapping in order.
+	 */
 	for (i = 0; i < mapped_pages; i++) {
 		pte_t pte;
 		pte = pfn_pte(i, __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER));
@@ -848,8 +957,10 @@ static unsigned long setup_pagetables(struct lguest *lg,
 			return -EFAULT;
 	}
 
-	/* The top level points to the linear page table pages above.
-	 * We setup the identity and linear mappings here. */
+	/*
+	 * The top level points to the linear page table pages above.
+	 * We setup the identity and linear mappings here.
+	 */
 #ifdef CONFIG_X86_PAE
 	for (i = j = 0; i < mapped_pages && j < PTRS_PER_PMD;
 	     i += PTRS_PER_PTE, j++) {
@@ -880,15 +991,19 @@ static unsigned long setup_pagetables(struct lguest *lg,
 	}
 #endif
 
-	/* We return the top level (guest-physical) address: remember where
-	 * this is. */
+	/*
+	 * We return the top level (guest-physical) address: remember where
+	 * this is.
+	 */
 	return (unsigned long)pgdir - mem_base;
 }
 
-/*H:500 (vii) Setting up the page tables initially.
+/*H:500
+ * (vii) Setting up the page tables initially.
  *
  * When a Guest is first created, the Launcher tells us where the toplevel of
- * its first page table is.  We set some things up here: */
+ * its first page table is.  We set some things up here:
+ */
 int init_guest_pagetable(struct lguest *lg)
 {
 	u64 mem;
@@ -898,14 +1013,18 @@ int init_guest_pagetable(struct lguest *lg)
 	pgd_t *pgd;
 	pmd_t *pmd_table;
 #endif
-	/* Get the Guest memory size and the ramdisk size from the boot header
-	 * located at lg->mem_base (Guest address 0). */
+	/*
+	 * Get the Guest memory size and the ramdisk size from the boot header
+	 * located at lg->mem_base (Guest address 0).
+	 */
 	if (copy_from_user(&mem, &boot->e820_map[0].size, sizeof(mem))
 	    || get_user(initrd_size, &boot->hdr.ramdisk_size))
 		return -EFAULT;
 
-	/* We start on the first shadow page table, and give it a blank PGD
-	 * page. */
+	/*
+	 * We start on the first shadow page table, and give it a blank PGD
+	 * page.
+	 */
 	lg->pgdirs[0].gpgdir = setup_pagetables(lg, mem, initrd_size);
 	if (IS_ERR_VALUE(lg->pgdirs[0].gpgdir))
 		return lg->pgdirs[0].gpgdir;
@@ -931,17 +1050,21 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
 	/* We get the kernel address: above this is all kernel memory. */
 	if (get_user(cpu->lg->kernel_address,
 		&cpu->lg->lguest_data->kernel_address)
-		/* We tell the Guest that it can't use the top 2 or 4 MB
-		 * of virtual addresses used by the Switcher. */
+		/*
+		 * We tell the Guest that it can't use the top 2 or 4 MB
+		 * of virtual addresses used by the Switcher.
+		 */
 		|| put_user(RESERVE_MEM * 1024 * 1024,
 			&cpu->lg->lguest_data->reserve_mem)
 		|| put_user(cpu->lg->pgdirs[0].gpgdir,
 			&cpu->lg->lguest_data->pgdir))
 		kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
 
-	/* In flush_user_mappings() we loop from 0 to
+	/*
+	 * In flush_user_mappings() we loop from 0 to
 	 * "pgd_index(lg->kernel_address)".  This assumes it won't hit the
-	 * Switcher mappings, so check that now. */
+	 * Switcher mappings, so check that now.
+	 */
 #ifdef CONFIG_X86_PAE
 	if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
 		pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
@@ -964,12 +1087,14 @@ void free_guest_pagetable(struct lguest *lg)
 		free_page((long)lg->pgdirs[i].pgdir);
 }
 
-/*H:480 (vi) Mapping the Switcher when the Guest is about to run.
+/*H:480
+ * (vi) Mapping the Switcher when the Guest is about to run.
  *
  * The Switcher and the two pages for this CPU need to be visible in the
  * Guest (and not the pages for other CPUs).  We have the appropriate PTE pages
  * for each CPU already set up, we just need to hook them in now we know which
- * Guest is about to run on this CPU. */
+ * Guest is about to run on this CPU.
+ */
 void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
 	pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
@@ -990,20 +1115,24 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
 #else
 	pgd_t switcher_pgd;
 
-	/* Make the last PGD entry for this Guest point to the Switcher's PTE
-	 * page for this CPU (with appropriate flags). */
+	/*
+	 * Make the last PGD entry for this Guest point to the Switcher's PTE
+	 * page for this CPU (with appropriate flags).
+	 */
 	switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
 
 	cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
 
 #endif
-	/* We also change the Switcher PTE page.  When we're running the Guest,
+	/*
+	 * We also change the Switcher PTE page.  When we're running the Guest,
 	 * we want the Guest's "regs" page to appear where the first Switcher
 	 * page for this CPU is.  This is an optimization: when the Switcher
 	 * saves the Guest registers, it saves them into the first page of this
 	 * CPU's "struct lguest_pages": if we make sure the Guest's register
 	 * page is already mapped there, we don't have to copy them out
-	 * again. */
+	 * again.
+	 */
 	pfn = __pa(cpu->regs_page) >> PAGE_SHIFT;
 	native_set_pte(&regs_pte, pfn_pte(pfn, PAGE_KERNEL));
 	native_set_pte(&switcher_pte_page[pte_index((unsigned long)pages)],
@@ -1019,10 +1148,12 @@ static void free_switcher_pte_pages(void)
 		free_page((long)switcher_pte_page(i));
 }
 
-/*H:520 Setting up the Switcher PTE page for given CPU is fairly easy, given
+/*H:520
+ * Setting up the Switcher PTE page for given CPU is fairly easy, given
  * the CPU number and the "struct page"s for the Switcher code itself.
  *
- * Currently the Switcher is less than a page long, so "pages" is always 1. */
+ * Currently the Switcher is less than a page long, so "pages" is always 1.
+ */
 static __init void populate_switcher_pte_page(unsigned int cpu,
 					      struct page *switcher_page[],
 					      unsigned int pages)
@@ -1043,13 +1174,16 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
 	native_set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
 			 __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
 
-	/* The second page contains the "struct lguest_ro_state", and is
-	 * read-only. */
+	/*
+	 * The second page contains the "struct lguest_ro_state", and is
+	 * read-only.
+	 */
 	native_set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
 			   __pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
 }
 
-/* We've made it through the page table code.  Perhaps our tired brains are
+/*
+ * We've made it through the page table code.  Perhaps our tired brains are
  * still processing the details, or perhaps we're simply glad it's over.
  *
  * If nothing else, note that all this complexity in juggling shadow page tables
@@ -1058,10 +1192,13 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
  * uses exotic direct Guest pagetable manipulation, and why both Intel and AMD
  * have implemented shadow page table support directly into hardware.
  *
- * There is just one file remaining in the Host. */
+ * There is just one file remaining in the Host.
+ */
 
-/*H:510 At boot or module load time, init_pagetables() allocates and populates
- * the Switcher PTE page for each CPU. */
+/*H:510
+ * At boot or module load time, init_pagetables() allocates and populates
+ * the Switcher PTE page for each CPU.
+ */
 __init int init_pagetables(struct page **switcher_page, unsigned int pages)
 {
 	unsigned int i;
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index 482ed5a18750..951c57b0a7e0 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -1,4 +1,5 @@
-/*P:600 The x86 architecture has segments, which involve a table of descriptors
+/*P:600
+ * The x86 architecture has segments, which involve a table of descriptors
  * which can be used to do funky things with virtual address interpretation.
  * We originally used to use segments so the Guest couldn't alter the
  * Guest<->Host Switcher, and then we had to trim Guest segments, and restore
@@ -8,7 +9,8 @@
  *
  * In these modern times, the segment handling code consists of simple sanity
  * checks, and the worst you'll experience reading this code is butterfly-rash
- * from frolicking through its parklike serenity. :*/
+ * from frolicking through its parklike serenity.
+:*/
 #include "lg.h"
 
 /*H:600
@@ -41,10 +43,12 @@
  * begin.
  */
 
-/* There are several entries we don't let the Guest set.  The TSS entry is the
+/*
+ * There are several entries we don't let the Guest set.  The TSS entry is the
  * "Task State Segment" which controls all kinds of delicate things.  The
  * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
- * the Guest can't be trusted to deal with double faults. */
+ * the Guest can't be trusted to deal with double faults.
+ */
 static bool ignored_gdt(unsigned int num)
 {
 	return (num == GDT_ENTRY_TSS
@@ -53,42 +57,52 @@ static bool ignored_gdt(unsigned int num)
 		|| num == GDT_ENTRY_DOUBLEFAULT_TSS);
 }
 
-/*H:630 Once the Guest gave us new GDT entries, we fix them up a little.  We
+/*H:630
+ * Once the Guest gave us new GDT entries, we fix them up a little.  We
  * don't care if they're invalid: the worst that can happen is a General
  * Protection Fault in the Switcher when it restores a Guest segment register
  * which tries to use that entry.  Then we kill the Guest for causing such a
- * mess: the message will be "unhandled trap 256". */
+ * mess: the message will be "unhandled trap 256".
+ */
 static void fixup_gdt_table(struct lg_cpu *cpu, unsigned start, unsigned end)
 {
 	unsigned int i;
 
 	for (i = start; i < end; i++) {
-		/* We never copy these ones to real GDT, so we don't care what
-		 * they say */
+		/*
+		 * We never copy these ones to real GDT, so we don't care what
+		 * they say
+		 */
 		if (ignored_gdt(i))
 			continue;
 
-		/* Segment descriptors contain a privilege level: the Guest is
+		/*
+		 * Segment descriptors contain a privilege level: the Guest is
 		 * sometimes careless and leaves this as 0, even though it's
-		 * running at privilege level 1.  If so, we fix it here. */
+		 * running at privilege level 1.  If so, we fix it here.
+		 */
 		if ((cpu->arch.gdt[i].b & 0x00006000) == 0)
 			cpu->arch.gdt[i].b |= (GUEST_PL << 13);
 
-		/* Each descriptor has an "accessed" bit.  If we don't set it
+		/*
+		 * Each descriptor has an "accessed" bit.  If we don't set it
 		 * now, the CPU will try to set it when the Guest first loads
 		 * that entry into a segment register.  But the GDT isn't
-		 * writable by the Guest, so bad things can happen. */
+		 * writable by the Guest, so bad things can happen.
+		 */
 		cpu->arch.gdt[i].b |= 0x00000100;
 	}
 }
 
-/*H:610 Like the IDT, we never simply use the GDT the Guest gives us.  We keep
+/*H:610
+ * Like the IDT, we never simply use the GDT the Guest gives us.  We keep
  * a GDT for each CPU, and copy across the Guest's entries each time we want to
  * run the Guest on that CPU.
  *
  * This routine is called at boot or modprobe time for each CPU to set up the
  * constant GDT entries: the ones which are the same no matter what Guest we're
- * running. */
+ * running.
+ */
 void setup_default_gdt_entries(struct lguest_ro_state *state)
 {
 	struct desc_struct *gdt = state->guest_gdt;
@@ -98,30 +112,37 @@ void setup_default_gdt_entries(struct lguest_ro_state *state)
 	gdt[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
 	gdt[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 
-	/* The TSS segment refers to the TSS entry for this particular CPU.
+	/*
+	 * The TSS segment refers to the TSS entry for this particular CPU.
 	 * Forgive the magic flags: the 0x8900 means the entry is Present, it's
 	 * privilege level 0 Available 386 TSS system segment, and the 0x67
-	 * means Saturn is eclipsed by Mercury in the twelfth house. */
+	 * means Saturn is eclipsed by Mercury in the twelfth house.
+	 */
 	gdt[GDT_ENTRY_TSS].a = 0x00000067 | (tss << 16);
 	gdt[GDT_ENTRY_TSS].b = 0x00008900 | (tss & 0xFF000000)
 		| ((tss >> 16) & 0x000000FF);
 }
 
-/* This routine sets up the initial Guest GDT for booting.  All entries start
- * as 0 (unusable). */
+/*
+ * This routine sets up the initial Guest GDT for booting.  All entries start
+ * as 0 (unusable).
+ */
 void setup_guest_gdt(struct lg_cpu *cpu)
 {
-	/* Start with full 0-4G segments... */
+	/*
+	 * Start with full 0-4G segments...except the Guest is allowed to use
+	 * them, so set the privilege level appropriately in the flags.
+	 */
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_CS] = FULL_EXEC_SEGMENT;
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_DS] = FULL_SEGMENT;
-	/* ...except the Guest is allowed to use them, so set the privilege
-	 * level appropriately in the flags. */
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_CS].b |= (GUEST_PL << 13);
 	cpu->arch.gdt[GDT_ENTRY_KERNEL_DS].b |= (GUEST_PL << 13);
 }
 
-/*H:650 An optimization of copy_gdt(), for just the three "thead-local storage"
- * entries. */
+/*H:650
+ * An optimization of copy_gdt(), for just the three "thead-local storage"
+ * entries.
+ */
 void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt)
 {
 	unsigned int i;
@@ -130,26 +151,34 @@ void copy_gdt_tls(const struct lg_cpu *cpu, struct desc_struct *gdt)
 		gdt[i] = cpu->arch.gdt[i];
 }
 
-/*H:640 When the Guest is run on a different CPU, or the GDT entries have
- * changed, copy_gdt() is called to copy the Guest's GDT entries across to this
- * CPU's GDT. */
+/*H:640
+ * When the Guest is run on a different CPU, or the GDT entries have changed,
+ * copy_gdt() is called to copy the Guest's GDT entries across to this CPU's
+ * GDT.
+ */
 void copy_gdt(const struct lg_cpu *cpu, struct desc_struct *gdt)
 {
 	unsigned int i;
 
-	/* The default entries from setup_default_gdt_entries() are not
-	 * replaced.  See ignored_gdt() above. */
+	/*
+	 * The default entries from setup_default_gdt_entries() are not
+	 * replaced.  See ignored_gdt() above.
+	 */
 	for (i = 0; i < GDT_ENTRIES; i++)
 		if (!ignored_gdt(i))
 			gdt[i] = cpu->arch.gdt[i];
 }
 
-/*H:620 This is where the Guest asks us to load a new GDT entry
- * (LHCALL_LOAD_GDT_ENTRY).  We tweak the entry and copy it in. */
+/*H:620
+ * This is where the Guest asks us to load a new GDT entry
+ * (LHCALL_LOAD_GDT_ENTRY).  We tweak the entry and copy it in.
+ */
 void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
 {
-	/* We assume the Guest has the same number of GDT entries as the
-	 * Host, otherwise we'd have to dynamically allocate the Guest GDT. */
+	/*
+	 * We assume the Guest has the same number of GDT entries as the
+	 * Host, otherwise we'd have to dynamically allocate the Guest GDT.
+	 */
 	if (num >= ARRAY_SIZE(cpu->arch.gdt))
 		kill_guest(cpu, "too many gdt entries %i", num);
 
@@ -157,15 +186,19 @@ void load_guest_gdt_entry(struct lg_cpu *cpu, u32 num, u32 lo, u32 hi)
 	cpu->arch.gdt[num].a = lo;
 	cpu->arch.gdt[num].b = hi;
 	fixup_gdt_table(cpu, num, num+1);
-	/* Mark that the GDT changed so the core knows it has to copy it again,
-	 * even if the Guest is run on the same CPU. */
+	/*
+	 * Mark that the GDT changed so the core knows it has to copy it again,
+	 * even if the Guest is run on the same CPU.
+	 */
 	cpu->changed |= CHANGED_GDT;
 }
 
-/* This is the fast-track version for just changing the three TLS entries.
+/*
+ * This is the fast-track version for just changing the three TLS entries.
  * Remember that this happens on every context switch, so it's worth
  * optimizing.  But wouldn't it be neater to have a single hypercall to cover
- * both cases? */
+ * both cases?
+ */
 void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls)
 {
 	struct desc_struct *tls = &cpu->arch.gdt[GDT_ENTRY_TLS_MIN];
@@ -175,7 +208,6 @@ void guest_load_tls(struct lg_cpu *cpu, unsigned long gtls)
 	/* Note that just the TLS entries have changed. */
 	cpu->changed |= CHANGED_GDT_TLS;
 }
-/*:*/
 
 /*H:660
  * With this, we have finished the Host.
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index eaf722fe309a..96f7d88ec7f8 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -17,13 +17,15 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
-/*P:450 This file contains the x86-specific lguest code.  It used to be all
+/*P:450
+ * This file contains the x86-specific lguest code.  It used to be all
  * mixed in with drivers/lguest/core.c but several foolhardy code slashers
  * wrestled most of the dependencies out to here in preparation for porting
  * lguest to other architectures (see what I mean by foolhardy?).
  *
  * This also contains a couple of non-obvious setup and teardown pieces which
- * were implemented after days of debugging pain. :*/
+ * were implemented after days of debugging pain.
+:*/
 #include <linux/kernel.h>
 #include <linux/start_kernel.h>
 #include <linux/string.h>
@@ -82,25 +84,33 @@ static DEFINE_PER_CPU(struct lg_cpu *, last_cpu);
  */
 static void copy_in_guest_info(struct lg_cpu *cpu, struct lguest_pages *pages)
 {
-	/* Copying all this data can be quite expensive.  We usually run the
+	/*
+	 * Copying all this data can be quite expensive.  We usually run the
 	 * same Guest we ran last time (and that Guest hasn't run anywhere else
 	 * meanwhile).  If that's not the case, we pretend everything in the
-	 * Guest has changed. */
+	 * Guest has changed.
+	 */
 	if (__get_cpu_var(last_cpu) != cpu || cpu->last_pages != pages) {
 		__get_cpu_var(last_cpu) = cpu;
 		cpu->last_pages = pages;
 		cpu->changed = CHANGED_ALL;
 	}
 
-	/* These copies are pretty cheap, so we do them unconditionally: */
-	/* Save the current Host top-level page directory. */
+	/*
+	 * These copies are pretty cheap, so we do them unconditionally: */
+	/* Save the current Host top-level page directory.
+	 */
 	pages->state.host_cr3 = __pa(current->mm->pgd);
-	/* Set up the Guest's page tables to see this CPU's pages (and no
-	 * other CPU's pages). */
+	/*
+	 * Set up the Guest's page tables to see this CPU's pages (and no
+	 * other CPU's pages).
+	 */
 	map_switcher_in_guest(cpu, pages);
-	/* Set up the two "TSS" members which tell the CPU what stack to use
+	/*
+	 * Set up the two "TSS" members which tell the CPU what stack to use
 	 * for traps which do directly into the Guest (ie. traps at privilege
-	 * level 1). */
+	 * level 1).
+	 */
 	pages->state.guest_tss.sp1 = cpu->esp1;
 	pages->state.guest_tss.ss1 = cpu->ss1;
 
@@ -125,40 +135,53 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
 	/* This is a dummy value we need for GCC's sake. */
 	unsigned int clobber;
 
-	/* Copy the guest-specific information into this CPU's "struct
-	 * lguest_pages". */
+	/*
+	 * Copy the guest-specific information into this CPU's "struct
+	 * lguest_pages".
+	 */
 	copy_in_guest_info(cpu, pages);
 
-	/* Set the trap number to 256 (impossible value).  If we fault while
+	/*
+	 * Set the trap number to 256 (impossible value).  If we fault while
 	 * switching to the Guest (bad segment registers or bug), this will
-	 * cause us to abort the Guest. */
+	 * cause us to abort the Guest.
+	 */
 	cpu->regs->trapnum = 256;
 
-	/* Now: we push the "eflags" register on the stack, then do an "lcall".
+	/*
+	 * Now: we push the "eflags" register on the stack, then do an "lcall".
 	 * This is how we change from using the kernel code segment to using
 	 * the dedicated lguest code segment, as well as jumping into the
 	 * Switcher.
 	 *
 	 * The lcall also pushes the old code segment (KERNEL_CS) onto the
 	 * stack, then the address of this call.  This stack layout happens to
-	 * exactly match the stack layout created by an interrupt... */
+	 * exactly match the stack layout created by an interrupt...
+	 */
 	asm volatile("pushf; lcall *lguest_entry"
-		     /* This is how we tell GCC that %eax ("a") and %ebx ("b")
-		      * are changed by this routine.  The "=" means output. */
+		     /*
+		      * This is how we tell GCC that %eax ("a") and %ebx ("b")
+		      * are changed by this routine.  The "=" means output.
+		      */
 		     : "=a"(clobber), "=b"(clobber)
-		     /* %eax contains the pages pointer.  ("0" refers to the
+		     /*
+		      * %eax contains the pages pointer.  ("0" refers to the
 		      * 0-th argument above, ie "a").  %ebx contains the
 		      * physical address of the Guest's top-level page
-		      * directory. */
+		      * directory.
+		      */
 		     : "0"(pages), "1"(__pa(cpu->lg->pgdirs[cpu->cpu_pgd].pgdir))
-		     /* We tell gcc that all these registers could change,
+		     /*
+		      * We tell gcc that all these registers could change,
 		      * which means we don't have to save and restore them in
-		      * the Switcher. */
+		      * the Switcher.
+		      */
 		     : "memory", "%edx", "%ecx", "%edi", "%esi");
 }
 /*:*/
 
-/*M:002 There are hooks in the scheduler which we can register to tell when we
+/*M:002
+ * There are hooks in the scheduler which we can register to tell when we
  * get kicked off the CPU (preempt_notifier_register()).  This would allow us
  * to lazily disable SYSENTER which would regain some performance, and should
  * also simplify copy_in_guest_info().  Note that we'd still need to restore
@@ -166,56 +189,72 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
  *
  * We could also try using this hooks for PGE, but that might be too expensive.
  *
- * The hooks were designed for KVM, but we can also put them to good use. :*/
+ * The hooks were designed for KVM, but we can also put them to good use.
+:*/
 
-/*H:040 This is the i386-specific code to setup and run the Guest.  Interrupts
- * are disabled: we own the CPU. */
+/*H:040
+ * This is the i386-specific code to setup and run the Guest.  Interrupts
+ * are disabled: we own the CPU.
+ */
 void lguest_arch_run_guest(struct lg_cpu *cpu)
 {
-	/* Remember the awfully-named TS bit?  If the Guest has asked to set it
+	/*
+	 * Remember the awfully-named TS bit?  If the Guest has asked to set it
 	 * we set it now, so we can trap and pass that trap to the Guest if it
-	 * uses the FPU. */
+	 * uses the FPU.
+	 */
 	if (cpu->ts)
 		unlazy_fpu(current);
 
-	/* SYSENTER is an optimized way of doing system calls.  We can't allow
+	/*
+	 * SYSENTER is an optimized way of doing system calls.  We can't allow
 	 * it because it always jumps to privilege level 0.  A normal Guest
 	 * won't try it because we don't advertise it in CPUID, but a malicious
 	 * Guest (or malicious Guest userspace program) could, so we tell the
-	 * CPU to disable it before running the Guest. */
+	 * CPU to disable it before running the Guest.
+	 */
 	if (boot_cpu_has(X86_FEATURE_SEP))
 		wrmsr(MSR_IA32_SYSENTER_CS, 0, 0);
 
-	/* Now we actually run the Guest.  It will return when something
+	/*
+	 * Now we actually run the Guest.  It will return when something
 	 * interesting happens, and we can examine its registers to see what it
-	 * was doing. */
+	 * was doing.
+	 */
 	run_guest_once(cpu, lguest_pages(raw_smp_processor_id()));
 
-	/* Note that the "regs" structure contains two extra entries which are
+	/*
+	 * Note that the "regs" structure contains two extra entries which are
 	 * not really registers: a trap number which says what interrupt or
 	 * trap made the switcher code come back, and an error code which some
-	 * traps set.  */
+	 * traps set.
+	 */
 
 	 /* Restore SYSENTER if it's supposed to be on. */
 	 if (boot_cpu_has(X86_FEATURE_SEP))
 		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 
-	/* If the Guest page faulted, then the cr2 register will tell us the
+	/*
+	 * If the Guest page faulted, then the cr2 register will tell us the
 	 * bad virtual address.  We have to grab this now, because once we
 	 * re-enable interrupts an interrupt could fault and thus overwrite
-	 * cr2, or we could even move off to a different CPU. */
+	 * cr2, or we could even move off to a different CPU.
+	 */
 	if (cpu->regs->trapnum == 14)
 		cpu->arch.last_pagefault = read_cr2();
-	/* Similarly, if we took a trap because the Guest used the FPU,
+	/*
+	 * Similarly, if we took a trap because the Guest used the FPU,
 	 * we have to restore the FPU it expects to see.
 	 * math_state_restore() may sleep and we may even move off to
 	 * a different CPU. So all the critical stuff should be done
-	 * before this.  */
+	 * before this.
+	 */
 	else if (cpu->regs->trapnum == 7)
 		math_state_restore();
 }
 
-/*H:130 Now we've examined the hypercall code; our Guest can make requests.
+/*H:130
+ * Now we've examined the hypercall code; our Guest can make requests.
  * Our Guest is usually so well behaved; it never tries to do things it isn't
  * allowed to, and uses hypercalls instead.  Unfortunately, Linux's paravirtual
  * infrastructure isn't quite complete, because it doesn't contain replacements
@@ -225,26 +264,33 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
  *
  * When the Guest uses one of these instructions, we get a trap (General
  * Protection Fault) and come here.  We see if it's one of those troublesome
- * instructions and skip over it.  We return true if we did. */
+ * instructions and skip over it.  We return true if we did.
+ */
 static int emulate_insn(struct lg_cpu *cpu)
 {
 	u8 insn;
 	unsigned int insnlen = 0, in = 0, shift = 0;
-	/* The eip contains the *virtual* address of the Guest's instruction:
-	 * guest_pa just subtracts the Guest's page_offset. */
+	/*
+	 * The eip contains the *virtual* address of the Guest's instruction:
+	 * guest_pa just subtracts the Guest's page_offset.
+	 */
 	unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
 
-	/* This must be the Guest kernel trying to do something, not userspace!
+	/*
+	 * This must be the Guest kernel trying to do something, not userspace!
 	 * The bottom two bits of the CS segment register are the privilege
-	 * level. */
+	 * level.
+	 */
 	if ((cpu->regs->cs & 3) != GUEST_PL)
 		return 0;
 
 	/* Decoding x86 instructions is icky. */
 	insn = lgread(cpu, physaddr, u8);
 
-	/* 0x66 is an "operand prefix".  It means it's using the upper 16 bits
-	   of the eax register. */
+	/*
+	 * 0x66 is an "operand prefix".  It means it's using the upper 16 bits
+	 * of the eax register.
+	 */
 	if (insn == 0x66) {
 		shift = 16;
 		/* The instruction is 1 byte so far, read the next byte. */
@@ -252,8 +298,10 @@ static int emulate_insn(struct lg_cpu *cpu)
 		insn = lgread(cpu, physaddr + insnlen, u8);
 	}
 
-	/* We can ignore the lower bit for the moment and decode the 4 opcodes
-	 * we need to emulate. */
+	/*
+	 * We can ignore the lower bit for the moment and decode the 4 opcodes
+	 * we need to emulate.
+	 */
 	switch (insn & 0xFE) {
 	case 0xE4: /* in     <next byte>,%al */
 		insnlen += 2;
@@ -274,9 +322,11 @@ static int emulate_insn(struct lg_cpu *cpu)
 		return 0;
 	}
 
-	/* If it was an "IN" instruction, they expect the result to be read
+	/*
+	 * If it was an "IN" instruction, they expect the result to be read
 	 * into %eax, so we change %eax.  We always return all-ones, which
-	 * traditionally means "there's nothing there". */
+	 * traditionally means "there's nothing there".
+	 */
 	if (in) {
 		/* Lower bit tells is whether it's a 16 or 32 bit access */
 		if (insn & 0x1)
@@ -290,7 +340,8 @@ static int emulate_insn(struct lg_cpu *cpu)
 	return 1;
 }
 
-/* Our hypercalls mechanism used to be based on direct software interrupts.
+/*
+ * Our hypercalls mechanism used to be based on direct software interrupts.
  * After Anthony's "Refactor hypercall infrastructure" kvm patch, we decided to
  * change over to using kvm hypercalls.
  *
@@ -318,16 +369,20 @@ static int emulate_insn(struct lg_cpu *cpu)
  */
 static void rewrite_hypercall(struct lg_cpu *cpu)
 {
-	/* This are the opcodes we use to patch the Guest.  The opcode for "int
+	/*
+	 * This are the opcodes we use to patch the Guest.  The opcode for "int
 	 * $0x1f" is "0xcd 0x1f" but vmcall instruction is 3 bytes long, so we
-	 * complete the sequence with a NOP (0x90). */
+	 * complete the sequence with a NOP (0x90).
+	 */
 	u8 insn[3] = {0xcd, 0x1f, 0x90};
 
 	__lgwrite(cpu, guest_pa(cpu, cpu->regs->eip), insn, sizeof(insn));
-	/* The above write might have caused a copy of that page to be made
+	/*
+	 * The above write might have caused a copy of that page to be made
 	 * (if it was read-only).  We need to make sure the Guest has
 	 * up-to-date pagetables.  As this doesn't happen often, we can just
-	 * drop them all. */
+	 * drop them all.
+	 */
 	guest_pagetable_clear_all(cpu);
 }
 
@@ -335,9 +390,11 @@ static bool is_hypercall(struct lg_cpu *cpu)
 {
 	u8 insn[3];
 
-	/* This must be the Guest kernel trying to do something.
+	/*
+	 * This must be the Guest kernel trying to do something.
 	 * The bottom two bits of the CS segment register are the privilege
-	 * level. */
+	 * level.
+	 */
 	if ((cpu->regs->cs & 3) != GUEST_PL)
 		return false;
 
@@ -351,86 +408,105 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
 {
 	switch (cpu->regs->trapnum) {
 	case 13: /* We've intercepted a General Protection Fault. */
-		/* Check if this was one of those annoying IN or OUT
+		/*
+		 * Check if this was one of those annoying IN or OUT
 		 * instructions which we need to emulate.  If so, we just go
-		 * back into the Guest after we've done it. */
+		 * back into the Guest after we've done it.
+		 */
 		if (cpu->regs->errcode == 0) {
 			if (emulate_insn(cpu))
 				return;
 		}
-		/* If KVM is active, the vmcall instruction triggers a
-		 * General Protection Fault.  Normally it triggers an
-		 * invalid opcode fault (6): */
+		/*
+		 * If KVM is active, the vmcall instruction triggers a General
+		 * Protection Fault.  Normally it triggers an invalid opcode
+		 * fault (6):
+		 */
 	case 6:
-		/* We need to check if ring == GUEST_PL and
-		 * faulting instruction == vmcall. */
+		/*
+		 * We need to check if ring == GUEST_PL and faulting
+		 * instruction == vmcall.
+		 */
 		if (is_hypercall(cpu)) {
 			rewrite_hypercall(cpu);
 			return;
 		}
 		break;
 	case 14: /* We've intercepted a Page Fault. */
-		/* The Guest accessed a virtual address that wasn't mapped.
+		/*
+		 * The Guest accessed a virtual address that wasn't mapped.
 		 * This happens a lot: we don't actually set up most of the page
 		 * tables for the Guest at all when we start: as it runs it asks
 		 * for more and more, and we set them up as required. In this
 		 * case, we don't even tell the Guest that the fault happened.
 		 *
 		 * The errcode tells whether this was a read or a write, and
-		 * whether kernel or userspace code. */
+		 * whether kernel or userspace code.
+		 */
 		if (demand_page(cpu, cpu->arch.last_pagefault,
 				cpu->regs->errcode))
 			return;
 
-		/* OK, it's really not there (or not OK): the Guest needs to
+		/*
+		 * OK, it's really not there (or not OK): the Guest needs to
 		 * know.  We write out the cr2 value so it knows where the
 		 * fault occurred.
 		 *
 		 * Note that if the Guest were really messed up, this could
 		 * happen before it's done the LHCALL_LGUEST_INIT hypercall, so
-		 * lg->lguest_data could be NULL */
+		 * lg->lguest_data could be NULL
+		 */
 		if (cpu->lg->lguest_data &&
 		    put_user(cpu->arch.last_pagefault,
 			     &cpu->lg->lguest_data->cr2))
 			kill_guest(cpu, "Writing cr2");
 		break;
 	case 7: /* We've intercepted a Device Not Available fault. */
-		/* If the Guest doesn't want to know, we already restored the
-		 * Floating Point Unit, so we just continue without telling
-		 * it. */
+		/*
+		 * If the Guest doesn't want to know, we already restored the
+		 * Floating Point Unit, so we just continue without telling it.
+		 */
 		if (!cpu->ts)
 			return;
 		break;
 	case 32 ... 255:
-		/* These values mean a real interrupt occurred, in which case
+		/*
+		 * These values mean a real interrupt occurred, in which case
 		 * the Host handler has already been run. We just do a
 		 * friendly check if another process should now be run, then
-		 * return to run the Guest again */
+		 * return to run the Guest again
+		 */
 		cond_resched();
 		return;
 	case LGUEST_TRAP_ENTRY:
-		/* Our 'struct hcall_args' maps directly over our regs: we set
-		 * up the pointer now to indicate a hypercall is pending. */
+		/*
+		 * Our 'struct hcall_args' maps directly over our regs: we set
+		 * up the pointer now to indicate a hypercall is pending.
+		 */
 		cpu->hcall = (struct hcall_args *)cpu->regs;
 		return;
 	}
 
 	/* We didn't handle the trap, so it needs to go to the Guest. */
 	if (!deliver_trap(cpu, cpu->regs->trapnum))
-		/* If the Guest doesn't have a handler (either it hasn't
+		/*
+		 * If the Guest doesn't have a handler (either it hasn't
 		 * registered any yet, or it's one of the faults we don't let
-		 * it handle), it dies with this cryptic error message. */
+		 * it handle), it dies with this cryptic error message.
+		 */
 		kill_guest(cpu, "unhandled trap %li at %#lx (%#lx)",
 			   cpu->regs->trapnum, cpu->regs->eip,
 			   cpu->regs->trapnum == 14 ? cpu->arch.last_pagefault
 			   : cpu->regs->errcode);
 }
 
-/* Now we can look at each of the routines this calls, in increasing order of
+/*
+ * Now we can look at each of the routines this calls, in increasing order of
  * complexity: do_hypercalls(), emulate_insn(), maybe_do_interrupt(),
  * deliver_trap() and demand_page().  After all those, we'll be ready to
  * examine the Switcher, and our philosophical understanding of the Host/Guest
- * duality will be complete. :*/
+ * duality will be complete.
+:*/
 static void adjust_pge(void *on)
 {
 	if (on)
@@ -439,13 +515,16 @@ static void adjust_pge(void *on)
 		write_cr4(read_cr4() & ~X86_CR4_PGE);
 }
 
-/*H:020 Now the Switcher is mapped and every thing else is ready, we need to do
- * some more i386-specific initialization. */
+/*H:020
+ * Now the Switcher is mapped and every thing else is ready, we need to do
+ * some more i386-specific initialization.
+ */
 void __init lguest_arch_host_init(void)
 {
 	int i;
 
-	/* Most of the i386/switcher.S doesn't care that it's been moved; on
+	/*
+	 * Most of the i386/switcher.S doesn't care that it's been moved; on
 	 * Intel, jumps are relative, and it doesn't access any references to
 	 * external code or data.
 	 *
@@ -453,7 +532,8 @@ void __init lguest_arch_host_init(void)
 	 * addresses are placed in a table (default_idt_entries), so we need to
 	 * update the table with the new addresses.  switcher_offset() is a
 	 * convenience function which returns the distance between the
-	 * compiled-in switcher code and the high-mapped copy we just made. */
+	 * compiled-in switcher code and the high-mapped copy we just made.
+	 */
 	for (i = 0; i < IDT_ENTRIES; i++)
 		default_idt_entries[i] += switcher_offset();
 
@@ -468,63 +548,81 @@ void __init lguest_arch_host_init(void)
 	for_each_possible_cpu(i) {
 		/* lguest_pages() returns this CPU's two pages. */
 		struct lguest_pages *pages = lguest_pages(i);
-		/* This is a convenience pointer to make the code fit one
-		 * statement to a line. */
+		/* This is a convenience pointer to make the code neater. */
 		struct lguest_ro_state *state = &pages->state;
 
-		/* The Global Descriptor Table: the Host has a different one
+		/*
+		 * The Global Descriptor Table: the Host has a different one
 		 * for each CPU.  We keep a descriptor for the GDT which says
 		 * where it is and how big it is (the size is actually the last
-		 * byte, not the size, hence the "-1"). */
+		 * byte, not the size, hence the "-1").
+		 */
 		state->host_gdt_desc.size = GDT_SIZE-1;
 		state->host_gdt_desc.address = (long)get_cpu_gdt_table(i);
 
-		/* All CPUs on the Host use the same Interrupt Descriptor
+		/*
+		 * All CPUs on the Host use the same Interrupt Descriptor
 		 * Table, so we just use store_idt(), which gets this CPU's IDT
-		 * descriptor. */
+		 * descriptor.
+		 */
 		store_idt(&state->host_idt_desc);
 
-		/* The descriptors for the Guest's GDT and IDT can be filled
+		/*
+		 * The descriptors for the Guest's GDT and IDT can be filled
 		 * out now, too.  We copy the GDT & IDT into ->guest_gdt and
-		 * ->guest_idt before actually running the Guest. */
+		 * ->guest_idt before actually running the Guest.
+		 */
 		state->guest_idt_desc.size = sizeof(state->guest_idt)-1;
 		state->guest_idt_desc.address = (long)&state->guest_idt;
 		state->guest_gdt_desc.size = sizeof(state->guest_gdt)-1;
 		state->guest_gdt_desc.address = (long)&state->guest_gdt;
 
-		/* We know where we want the stack to be when the Guest enters
+		/*
+		 * We know where we want the stack to be when the Guest enters
 		 * the Switcher: in pages->regs.  The stack grows upwards, so
-		 * we start it at the end of that structure. */
+		 * we start it at the end of that structure.
+		 */
 		state->guest_tss.sp0 = (long)(&pages->regs + 1);
-		/* And this is the GDT entry to use for the stack: we keep a
-		 * couple of special LGUEST entries. */
+		/*
+		 * And this is the GDT entry to use for the stack: we keep a
+		 * couple of special LGUEST entries.
+		 */
 		state->guest_tss.ss0 = LGUEST_DS;
 
-		/* x86 can have a finegrained bitmap which indicates what I/O
+		/*
+		 * x86 can have a finegrained bitmap which indicates what I/O
 		 * ports the process can use.  We set it to the end of our
-		 * structure, meaning "none". */
+		 * structure, meaning "none".
+		 */
 		state->guest_tss.io_bitmap_base = sizeof(state->guest_tss);
 
-		/* Some GDT entries are the same across all Guests, so we can
-		 * set them up now. */
+		/*
+		 * Some GDT entries are the same across all Guests, so we can
+		 * set them up now.
+		 */
 		setup_default_gdt_entries(state);
 		/* Most IDT entries are the same for all Guests, too.*/
 		setup_default_idt_entries(state, default_idt_entries);
 
-		/* The Host needs to be able to use the LGUEST segments on this
-		 * CPU, too, so put them in the Host GDT. */
+		/*
+		 * The Host needs to be able to use the LGUEST segments on this
+		 * CPU, too, so put them in the Host GDT.
+		 */
 		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_CS] = FULL_EXEC_SEGMENT;
 		get_cpu_gdt_table(i)[GDT_ENTRY_LGUEST_DS] = FULL_SEGMENT;
 	}
 
-	/* In the Switcher, we want the %cs segment register to use the
+	/*
+	 * In the Switcher, we want the %cs segment register to use the
 	 * LGUEST_CS GDT entry: we've put that in the Host and Guest GDTs, so
 	 * it will be undisturbed when we switch.  To change %cs and jump we
-	 * need this structure to feed to Intel's "lcall" instruction. */
+	 * need this structure to feed to Intel's "lcall" instruction.
+	 */
 	lguest_entry.offset = (long)switch_to_guest + switcher_offset();
 	lguest_entry.segment = LGUEST_CS;
 
-	/* Finally, we need to turn off "Page Global Enable".  PGE is an
+	/*
+	 * Finally, we need to turn off "Page Global Enable".  PGE is an
 	 * optimization where page table entries are specially marked to show
 	 * they never change.  The Host kernel marks all the kernel pages this
 	 * way because it's always present, even when userspace is running.
@@ -534,16 +632,21 @@ void __init lguest_arch_host_init(void)
 	 * you'll get really weird bugs that you'll chase for two days.
 	 *
 	 * I used to turn PGE off every time we switched to the Guest and back
-	 * on when we return, but that slowed the Switcher down noticibly. */
+	 * on when we return, but that slowed the Switcher down noticibly.
+	 */
 
-	/* We don't need the complexity of CPUs coming and going while we're
-	 * doing this. */
+	/*
+	 * We don't need the complexity of CPUs coming and going while we're
+	 * doing this.
+	 */
 	get_online_cpus();
 	if (cpu_has_pge) { /* We have a broader idea of "global". */
 		/* Remember that this was originally set (for cleanup). */
 		cpu_had_pge = 1;
-		/* adjust_pge is a helper function which sets or unsets the PGE
-		 * bit on its CPU, depending on the argument (0 == unset). */
+		/*
+		 * adjust_pge is a helper function which sets or unsets the PGE
+		 * bit on its CPU, depending on the argument (0 == unset).
+		 */
 		on_each_cpu(adjust_pge, (void *)0, 1);
 		/* Turn off the feature in the global feature set. */
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_PGE);
@@ -590,26 +693,32 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 {
 	u32 tsc_speed;
 
-	/* The pointer to the Guest's "struct lguest_data" is the only argument.
-	 * We check that address now. */
+	/*
+	 * The pointer to the Guest's "struct lguest_data" is the only argument.
+	 * We check that address now.
+	 */
 	if (!lguest_address_ok(cpu->lg, cpu->hcall->arg1,
 			       sizeof(*cpu->lg->lguest_data)))
 		return -EFAULT;
 
-	/* Having checked it, we simply set lg->lguest_data to point straight
+	/*
+	 * Having checked it, we simply set lg->lguest_data to point straight
 	 * into the Launcher's memory at the right place and then use
 	 * copy_to_user/from_user from now on, instead of lgread/write.  I put
 	 * this in to show that I'm not immune to writing stupid
-	 * optimizations. */
+	 * optimizations.
+	 */
 	cpu->lg->lguest_data = cpu->lg->mem_base + cpu->hcall->arg1;
 
-	/* We insist that the Time Stamp Counter exist and doesn't change with
+	/*
+	 * We insist that the Time Stamp Counter exist and doesn't change with
 	 * cpu frequency.  Some devious chip manufacturers decided that TSC
 	 * changes could be handled in software.  I decided that time going
 	 * backwards might be good for benchmarks, but it's bad for users.
 	 *
 	 * We also insist that the TSC be stable: the kernel detects unreliable
-	 * TSCs for its own purposes, and we use that here. */
+	 * TSCs for its own purposes, and we use that here.
+	 */
 	if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && !check_tsc_unstable())
 		tsc_speed = tsc_khz;
 	else
@@ -625,38 +734,47 @@ int lguest_arch_init_hypercalls(struct lg_cpu *cpu)
 }
 /*:*/
 
-/*L:030 lguest_arch_setup_regs()
+/*L:030
+ * lguest_arch_setup_regs()
  *
  * Most of the Guest's registers are left alone: we used get_zeroed_page() to
- * allocate the structure, so they will be 0. */
+ * allocate the structure, so they will be 0.
+ */
 void lguest_arch_setup_regs(struct lg_cpu *cpu, unsigned long start)
 {
 	struct lguest_regs *regs = cpu->regs;
 
-	/* There are four "segment" registers which the Guest needs to boot:
+	/*
+	 * There are four "segment" registers which the Guest needs to boot:
 	 * The "code segment" register (cs) refers to the kernel code segment
 	 * __KERNEL_CS, and the "data", "extra" and "stack" segment registers
 	 * refer to the kernel data segment __KERNEL_DS.
 	 *
 	 * The privilege level is packed into the lower bits.  The Guest runs
-	 * at privilege level 1 (GUEST_PL).*/
+	 * at privilege level 1 (GUEST_PL).
+	 */
 	regs->ds = regs->es = regs->ss = __KERNEL_DS|GUEST_PL;
 	regs->cs = __KERNEL_CS|GUEST_PL;
 
-	/* The "eflags" register contains miscellaneous flags.  Bit 1 (0x002)
+	/*
+	 * The "eflags" register contains miscellaneous flags.  Bit 1 (0x002)
 	 * is supposed to always be "1".  Bit 9 (0x200) controls whether
 	 * interrupts are enabled.  We always leave interrupts enabled while
-	 * running the Guest. */
+	 * running the Guest.
+	 */
 	regs->eflags = X86_EFLAGS_IF | 0x2;
 
-	/* The "Extended Instruction Pointer" register says where the Guest is
-	 * running. */
+	/*
+	 * The "Extended Instruction Pointer" register says where the Guest is
+	 * running.
+	 */
 	regs->eip = start;
 
-	/* %esi points to our boot information, at physical address 0, so don't
-	 * touch it. */
+	/*
+	 * %esi points to our boot information, at physical address 0, so don't
+	 * touch it.
+	 */
 
-	/* There are a couple of GDT entries the Guest expects when first
-	 * booting. */
+	/* There are a couple of GDT entries the Guest expects at boot. */
 	setup_guest_gdt(cpu);
 }
diff --git a/drivers/lguest/x86/switcher_32.S b/drivers/lguest/x86/switcher_32.S
index 3fc15318a80f..6dec09793836 100644
--- a/drivers/lguest/x86/switcher_32.S
+++ b/drivers/lguest/x86/switcher_32.S
@@ -1,12 +1,15 @@
-/*P:900 This is the Switcher: code which sits at 0xFFC00000 astride both the
+/*P:900
+ * This is the Switcher: code which sits at 0xFFC00000 astride both the
  * Host and Guest to do the low-level Guest<->Host switch.  It is as simple as
  * it can be made, but it's naturally very specific to x86.
  *
  * You have now completed Preparation.  If this has whet your appetite; if you
  * are feeling invigorated and refreshed then the next, more challenging stage
- * can be found in "make Guest". :*/
+ * can be found in "make Guest".
+ :*/
 
-/*M:012 Lguest is meant to be simple: my rule of thumb is that 1% more LOC must
+/*M:012
+ * Lguest is meant to be simple: my rule of thumb is that 1% more LOC must
  * gain at least 1% more performance.  Since neither LOC nor performance can be
  * measured beforehand, it generally means implementing a feature then deciding
  * if it's worth it.  And once it's implemented, who can say no?
@@ -31,11 +34,14 @@
  * Host (which is actually really easy).
  *
  * Two questions remain.  Would the performance gain outweigh the complexity?
- * And who would write the verse documenting it? :*/
+ * And who would write the verse documenting it?
+:*/
 
-/*M:011 Lguest64 handles NMI.  This gave me NMI envy (until I looked at their
+/*M:011
+ * Lguest64 handles NMI.  This gave me NMI envy (until I looked at their
  * code).  It's worth doing though, since it would let us use oprofile in the
- * Host when a Guest is running. :*/
+ * Host when a Guest is running.
+:*/
 
 /*S:100
  * Welcome to the Switcher itself!
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index dbf2479e808e..0a3a11afd64b 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -1,5 +1,7 @@
-/* Things the lguest guest needs to know.  Note: like all lguest interfaces,
- * this is subject to wild and random change between versions. */
+/*
+ * Things the lguest guest needs to know.  Note: like all lguest interfaces,
+ * this is subject to wild and random change between versions.
+ */
 #ifndef _LINUX_LGUEST_H
 #define _LINUX_LGUEST_H
 
@@ -11,32 +13,42 @@
 #define LG_CLOCK_MIN_DELTA	100UL
 #define LG_CLOCK_MAX_DELTA	ULONG_MAX
 
-/*G:031 The second method of communicating with the Host is to via "struct
+/*G:031
+ * The second method of communicating with the Host is to via "struct
  * lguest_data".  Once the Guest's initialization hypercall tells the Host where
- * this is, the Guest and Host both publish information in it. :*/
+ * this is, the Guest and Host both publish information in it.
+:*/
 struct lguest_data
 {
-	/* 512 == enabled (same as eflags in normal hardware).  The Guest
-	 * changes interrupts so often that a hypercall is too slow. */
+	/*
+	 * 512 == enabled (same as eflags in normal hardware).  The Guest
+	 * changes interrupts so often that a hypercall is too slow.
+	 */
 	unsigned int irq_enabled;
 	/* Fine-grained interrupt disabling by the Guest */
 	DECLARE_BITMAP(blocked_interrupts, LGUEST_IRQS);
 
-	/* The Host writes the virtual address of the last page fault here,
+	/*
+	 * The Host writes the virtual address of the last page fault here,
 	 * which saves the Guest a hypercall.  CR2 is the native register where
-	 * this address would normally be found. */
+	 * this address would normally be found.
+	 */
 	unsigned long cr2;
 
 	/* Wallclock time set by the Host. */
 	struct timespec time;
 
-	/* Interrupt pending set by the Host.  The Guest should do a hypercall
-	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF). */
+	/*
+	 * Interrupt pending set by the Host.  The Guest should do a hypercall
+	 * if it re-enables interrupts and sees this set (to X86_EFLAGS_IF).
+	 */
 	int irq_pending;
 
-	/* Async hypercall ring.  Instead of directly making hypercalls, we can
+	/*
+	 * Async hypercall ring.  Instead of directly making hypercalls, we can
 	 * place them in here for processing the next time the Host wants.
-	 * This batching can be quite efficient. */
+	 * This batching can be quite efficient.
+	 */
 
 	/* 0xFF == done (set by Host), 0 == pending (set by Guest). */
 	u8 hcall_status[LHCALL_RING_SIZE];
diff --git a/include/linux/lguest_launcher.h b/include/linux/lguest_launcher.h
index bfefbdf7498a..495203ff221c 100644
--- a/include/linux/lguest_launcher.h
+++ b/include/linux/lguest_launcher.h
@@ -29,8 +29,10 @@ struct lguest_device_desc {
 	__u8 type;
 	/* The number of virtqueues (first in config array) */
 	__u8 num_vq;
-	/* The number of bytes of feature bits.  Multiply by 2: one for host
-	 * features and one for Guest acknowledgements. */
+	/*
+	 * The number of bytes of feature bits.  Multiply by 2: one for host
+	 * features and one for Guest acknowledgements.
+	 */
 	__u8 feature_len;
 	/* The number of bytes of the config array after virtqueues. */
 	__u8 config_len;
@@ -39,8 +41,10 @@ struct lguest_device_desc {
 	__u8 config[0];
 };
 
-/*D:135 This is how we expect the device configuration field for a virtqueue
- * to be laid out in config space. */
+/*D:135
+ * This is how we expect the device configuration field for a virtqueue
+ * to be laid out in config space.
+ */
 struct lguest_vqconfig {
 	/* The number of entries in the virtio_ring */
 	__u16 num;
@@ -61,7 +65,9 @@ enum lguest_req
 	LHREQ_EVENTFD, /* + address, fd. */
 };
 
-/* The alignment to use between consumer and producer parts of vring.
- * x86 pagesize for historical reasons. */
+/*
+ * The alignment to use between consumer and producer parts of vring.
+ * x86 pagesize for historical reasons.
+ */
 #define LGUEST_VRING_ALIGN	4096
 #endif /* _LINUX_LGUEST_LAUNCHER */
-- 
cgit v1.2.3


From 1842f23c05b6a866be831aa60bc8a8731c58ddd0 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 30 Jul 2009 16:03:46 -0600
Subject: lguest and virtio: cleanup struct definitions to Linux style.

I've been doing this for years, and akpm picked me up on it about 12
months ago.  lguest partly serves as example code, so let's do it Right.

Also, remove two unused fields in struct vblk_info in the example launcher.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Ingo Molnar <mingo@redhat.com>
---
 Documentation/lguest/lguest.c  | 21 +++++----------------
 drivers/lguest/lg.h            |  9 +++------
 drivers/lguest/lguest_device.c |  3 +--
 include/linux/lguest.h         |  3 +--
 include/linux/virtio_blk.h     |  6 ++----
 include/linux/virtio_config.h  |  3 +--
 include/linux/virtio_net.h     |  6 ++----
 include/linux/virtio_ring.h    | 12 ++++--------
 8 files changed, 19 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 45163651b519..950cde6d6e58 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -93,8 +93,7 @@ static int lguest_fd;
 static unsigned int __thread cpu_id;
 
 /* This is our list of devices. */
-struct device_list
-{
+struct device_list {
 	/* Counter to assign interrupt numbers. */
 	unsigned int next_irq;
 
@@ -114,8 +113,7 @@ struct device_list
 static struct device_list devices;
 
 /* The device structure describes a single device. */
-struct device
-{
+struct device {
 	/* The linked-list pointer. */
 	struct device *next;
 
@@ -140,8 +138,7 @@ struct device
 };
 
 /* The virtqueue structure describes a queue attached to a device. */
-struct virtqueue
-{
+struct virtqueue {
 	struct virtqueue *next;
 
 	/* Which device owns me. */
@@ -779,8 +776,7 @@ static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
  *
  * We associate some data with the console for our exit hack.
  */
-struct console_abort
-{
+struct console_abort {
 	/* How many times have they hit ^C? */
 	int count;
 	/* When did they start? */
@@ -1570,20 +1566,13 @@ static void setup_tun_net(char *arg)
 /*:*/
 
 /* This hangs off device->priv. */
-struct vblk_info
-{
+struct vblk_info {
 	/* The size of the file. */
 	off64_t len;
 
 	/* The file descriptor for the file. */
 	int fd;
 
-	/* IO thread listens on this file descriptor [0]. */
-	int workpipe[2];
-
-	/* IO thread writes to this file descriptor to mark it done, then
-	 * Launcher triggers interrupt to Guest. */
-	int done_fd;
 };
 
 /*L:210
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 74c0db691b53..bc28745d05af 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -16,15 +16,13 @@
 void free_pagetables(void);
 int init_pagetables(struct page **switcher_page, unsigned int pages);
 
-struct pgdir
-{
+struct pgdir {
 	unsigned long gpgdir;
 	pgd_t *pgdir;
 };
 
 /* We have two pages shared with guests, per cpu.  */
-struct lguest_pages
-{
+struct lguest_pages {
 	/* This is the stack page mapped rw in guest */
 	char spare[PAGE_SIZE - sizeof(struct lguest_regs)];
 	struct lguest_regs regs;
@@ -89,8 +87,7 @@ struct lg_eventfd_map {
 };
 
 /* The private info the thread maintains about the guest. */
-struct lguest
-{
+struct lguest {
 	struct lguest_data __user *lguest_data;
 	struct lg_cpu cpus[NR_CPUS];
 	unsigned int nr_cpus;
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 1401c1ace1ec..b6200bc39b58 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -207,8 +207,7 @@ static void lg_reset(struct virtio_device *vdev)
  */
 
 /*D:140 This is the information we remember about each virtqueue. */
-struct lguest_vq_info
-{
+struct lguest_vq_info {
 	/* A copy of the information contained in the device config. */
 	struct lguest_vqconfig config;
 
diff --git a/include/linux/lguest.h b/include/linux/lguest.h
index 0a3a11afd64b..2fb1dcbcb5aa 100644
--- a/include/linux/lguest.h
+++ b/include/linux/lguest.h
@@ -18,8 +18,7 @@
  * lguest_data".  Once the Guest's initialization hypercall tells the Host where
  * this is, the Guest and Host both publish information in it.
 :*/
-struct lguest_data
-{
+struct lguest_data {
 	/*
 	 * 512 == enabled (same as eflags in normal hardware).  The Guest
 	 * changes interrupts so often that a hypercall is too slow.
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index be7d255fc7cf..8dab9f2b8832 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -20,8 +20,7 @@
 
 #define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
-struct virtio_blk_config
-{
+struct virtio_blk_config {
 	/* The capacity (in 512-byte sectors). */
 	__u64 capacity;
 	/* The maximum segment size (if VIRTIO_BLK_F_SIZE_MAX) */
@@ -50,8 +49,7 @@ struct virtio_blk_config
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 
 /* This is the first element of the read scatter-gather list. */
-struct virtio_blk_outhdr
-{
+struct virtio_blk_outhdr {
 	/* VIRTIO_BLK_T* */
 	__u32 type;
 	/* io priority. */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 99f514575f6a..e547e3c8ee9a 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -79,8 +79,7 @@
  *	the dev->feature bits if it wants.
  */
 typedef void vq_callback_t(struct virtqueue *);
-struct virtio_config_ops
-{
+struct virtio_config_ops {
 	void (*get)(struct virtio_device *vdev, unsigned offset,
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9c543d6ac535..d8dd539c9f48 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -31,8 +31,7 @@
 
 #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
 
-struct virtio_net_config
-{
+struct virtio_net_config {
 	/* The config defining mac address (if VIRTIO_NET_F_MAC) */
 	__u8 mac[6];
 	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
@@ -41,8 +40,7 @@ struct virtio_net_config
 
 /* This is the first element of the scatter-gather list.  If you don't
  * specify GSO or CSUM features, you can simply ignore the header. */
-struct virtio_net_hdr
-{
+struct virtio_net_hdr {
 #define VIRTIO_NET_HDR_F_NEEDS_CSUM	1	// Use csum_start, csum_offset
 	__u8 flags;
 #define VIRTIO_NET_HDR_GSO_NONE		0	// Not a GSO frame
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 693e0ec5afa6..e4d144b132b5 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -30,8 +30,7 @@
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
-struct vring_desc
-{
+struct vring_desc {
 	/* Address (guest-physical). */
 	__u64 addr;
 	/* Length. */
@@ -42,24 +41,21 @@ struct vring_desc
 	__u16 next;
 };
 
-struct vring_avail
-{
+struct vring_avail {
 	__u16 flags;
 	__u16 idx;
 	__u16 ring[];
 };
 
 /* u32 is used here for ids for padding reasons. */
-struct vring_used_elem
-{
+struct vring_used_elem {
 	/* Index of start of used descriptor chain. */
 	__u32 id;
 	/* Total length of the descriptor chain which was used (written to) */
 	__u32 len;
 };
 
-struct vring_used
-{
+struct vring_used {
 	__u16 flags;
 	__u16 idx;
 	struct vring_used_elem ring[];
-- 
cgit v1.2.3


From e5f5ccb646bc6009572b5c23201b5e81638ff150 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 23 Jul 2009 20:35:53 +0200
Subject: power_supply: get_by_name and set_charged functionality

This adds a function that indicates that a battery is fully charged.
It also includes functions to get a power_supply device from the class
of registered devices by name reference. These can be used to find a
specific battery to call power_supply_set_battery_charged() on.

Some battery drivers might need this information to calibrate
themselves.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Ian Molton <spyro@f2s.com>
Cc: Anton Vorontsov <cbou@mail.ru>
Cc: Matt Reimer <mreimer@vpop.net>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/power_supply_core.c | 28 ++++++++++++++++++++++++++++
 include/linux/power_supply.h      |  3 +++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 12cd6e36ff1d..cce75b40b435 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -116,6 +116,34 @@ int power_supply_is_system_supplied(void)
 }
 EXPORT_SYMBOL_GPL(power_supply_is_system_supplied);
 
+int power_supply_set_battery_charged(struct power_supply *psy)
+{
+	if (psy->type == POWER_SUPPLY_TYPE_BATTERY && psy->set_charged) {
+		psy->set_charged(psy);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(power_supply_set_battery_charged);
+
+static int power_supply_match_device_by_name(struct device *dev, void *data)
+{
+	const char *name = data;
+	struct power_supply *psy = dev_get_drvdata(dev);
+
+	return strcmp(psy->name, name) == 0;
+}
+
+struct power_supply *power_supply_get_by_name(char *name)
+{
+	struct device *dev = class_find_device(power_supply_class, NULL, name,
+					power_supply_match_device_by_name);
+
+	return dev ? dev_get_drvdata(dev) : NULL;
+}
+EXPORT_SYMBOL_GPL(power_supply_get_by_name);
+
 int power_supply_register(struct device *parent, struct power_supply *psy)
 {
 	int rc = 0;
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 4c7c6fc35487..b5d096d3a9be 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -144,6 +144,7 @@ struct power_supply {
 			    enum power_supply_property psp,
 			    union power_supply_propval *val);
 	void (*external_power_changed)(struct power_supply *psy);
+	void (*set_charged)(struct power_supply *psy);
 
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
@@ -183,8 +184,10 @@ struct power_supply_info {
 	int use_for_apm;
 };
 
+extern struct power_supply *power_supply_get_by_name(char *name);
 extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
+extern int power_supply_set_battery_charged(struct power_supply *psy);
 
 #if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
 extern int power_supply_is_system_supplied(void);
-- 
cgit v1.2.3


From 9aada7ac047f789ffb27540cc1695989897b2dfe Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Thu, 30 Jul 2009 14:29:44 -0700
Subject: IPVS: use pr_fmt

While being at it cleanup whitespace.

Signed-off-by: Hannes Eder <heder@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h                     | 119 ++++++++++++++++----------------
 net/netfilter/ipvs/ip_vs_app.c          |   3 +
 net/netfilter/ipvs/ip_vs_conn.c         |   3 +
 net/netfilter/ipvs/ip_vs_core.c         |   3 +
 net/netfilter/ipvs/ip_vs_ctl.c          |   3 +
 net/netfilter/ipvs/ip_vs_dh.c           |   3 +
 net/netfilter/ipvs/ip_vs_est.c          |   4 ++
 net/netfilter/ipvs/ip_vs_ftp.c          |   3 +
 net/netfilter/ipvs/ip_vs_lblc.c         |   3 +
 net/netfilter/ipvs/ip_vs_lblcr.c        |   3 +
 net/netfilter/ipvs/ip_vs_lc.c           |   3 +
 net/netfilter/ipvs/ip_vs_nq.c           |   3 +
 net/netfilter/ipvs/ip_vs_proto.c        |   7 +-
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c |   7 +-
 net/netfilter/ipvs/ip_vs_proto_tcp.c    |   3 +
 net/netfilter/ipvs/ip_vs_proto_udp.c    |   3 +
 net/netfilter/ipvs/ip_vs_rr.c           |   3 +
 net/netfilter/ipvs/ip_vs_sched.c        |   3 +
 net/netfilter/ipvs/ip_vs_sed.c          |   3 +
 net/netfilter/ipvs/ip_vs_sh.c           |   3 +
 net/netfilter/ipvs/ip_vs_sync.c         |   3 +
 net/netfilter/ipvs/ip_vs_wlc.c          |   3 +
 net/netfilter/ipvs/ip_vs_wrr.c          |   3 +
 net/netfilter/ipvs/ip_vs_xmit.c         |   3 +
 24 files changed, 134 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index bbae1e87efcd..910820327bc4 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -99,47 +99,47 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 	return &buf[*idx - len];
 }
 
-#define IP_VS_DBG_BUF(level, msg...)			\
-    do {						\
-	    char ip_vs_dbg_buf[160];			\
-	    int ip_vs_dbg_idx = 0;			\
-	    if (level <= ip_vs_get_debug_level())	\
-		    printk(KERN_DEBUG "IPVS: " msg);	\
-    } while (0)
-#define IP_VS_ERR_BUF(msg...)				\
-    do {						\
-	    char ip_vs_dbg_buf[160];			\
-	    int ip_vs_dbg_idx = 0;			\
-	    printk(KERN_ERR "IPVS: " msg);		\
-    } while (0)
+#define IP_VS_DBG_BUF(level, msg, ...)					\
+	do {								\
+		char ip_vs_dbg_buf[160];				\
+		int ip_vs_dbg_idx = 0;					\
+		if (level <= ip_vs_get_debug_level())			\
+			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
+	} while (0)
+#define IP_VS_ERR_BUF(msg...)						\
+	do {								\
+		char ip_vs_dbg_buf[160];				\
+		int ip_vs_dbg_idx = 0;					\
+		pr_err(msg);						\
+	} while (0)
 
 /* Only use from within IP_VS_DBG_BUF() or IP_VS_ERR_BUF macros */
-#define IP_VS_DBG_ADDR(af, addr)			\
-    ip_vs_dbg_addr(af, ip_vs_dbg_buf,			\
-		   sizeof(ip_vs_dbg_buf), addr,		\
-		   &ip_vs_dbg_idx)
-
-#define IP_VS_DBG(level, msg...)			\
-    do {						\
-	    if (level <= ip_vs_get_debug_level())	\
-		    printk(KERN_DEBUG "IPVS: " msg);	\
-    } while (0)
-#define IP_VS_DBG_RL(msg...)				\
-    do {						\
-	    if (net_ratelimit())			\
-		    printk(KERN_DEBUG "IPVS: " msg);	\
-    } while (0)
-#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg)		\
-    do {						\
-	    if (level <= ip_vs_get_debug_level())	\
-		pp->debug_packet(pp, skb, ofs, msg);	\
-    } while (0)
-#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg)	\
-    do {						\
-	    if (level <= ip_vs_get_debug_level() &&	\
-		net_ratelimit())			\
-		pp->debug_packet(pp, skb, ofs, msg);	\
-    } while (0)
+#define IP_VS_DBG_ADDR(af, addr)					\
+	ip_vs_dbg_addr(af, ip_vs_dbg_buf,				\
+		       sizeof(ip_vs_dbg_buf), addr,			\
+		       &ip_vs_dbg_idx)
+
+#define IP_VS_DBG(level, msg, ...)					\
+	do {								\
+		if (level <= ip_vs_get_debug_level())			\
+			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
+	} while (0)
+#define IP_VS_DBG_RL(msg, ...)						\
+	do {								\
+		if (net_ratelimit())					\
+			printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__);	\
+	} while (0)
+#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg)				\
+	do {								\
+		if (level <= ip_vs_get_debug_level())			\
+			pp->debug_packet(pp, skb, ofs, msg);		\
+	} while (0)
+#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg)			\
+	do {								\
+		if (level <= ip_vs_get_debug_level() &&			\
+		    net_ratelimit())					\
+			pp->debug_packet(pp, skb, ofs, msg);		\
+	} while (0)
 #else	/* NO DEBUGGING at ALL */
 #define IP_VS_DBG_BUF(level, msg...)  do {} while (0)
 #define IP_VS_ERR_BUF(msg...)  do {} while (0)
@@ -150,29 +150,30 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 #endif
 
 #define IP_VS_BUG() BUG()
-#define IP_VS_ERR(msg...) printk(KERN_ERR "IPVS: " msg)
-#define IP_VS_INFO(msg...) printk(KERN_INFO "IPVS: " msg)
-#define IP_VS_WARNING(msg...) \
-	printk(KERN_WARNING "IPVS: " msg)
-#define IP_VS_ERR_RL(msg...)				\
-    do {						\
-	    if (net_ratelimit())			\
-		    printk(KERN_ERR "IPVS: " msg);	\
-    } while (0)
+#define IP_VS_ERR(msg...) pr_err(msg)
+#define IP_VS_INFO(msg...) pr_info(msg)
+#define IP_VS_WARNING(msg...) pr_warning(msg)
+#define IP_VS_ERR_RL(msg...)						\
+	do {								\
+		if (net_ratelimit())					\
+			pr_err(msg);					\
+	} while (0)
 
 #ifdef CONFIG_IP_VS_DEBUG
 #define EnterFunction(level)						\
-    do {								\
-	    if (level <= ip_vs_get_debug_level())			\
-		    printk(KERN_DEBUG "Enter: %s, %s line %i\n",	\
-			   __func__, __FILE__, __LINE__);		\
-    } while (0)
-#define LeaveFunction(level)                                            \
-    do {                                                                \
-	    if (level <= ip_vs_get_debug_level())                       \
-			printk(KERN_DEBUG "Leave: %s, %s line %i\n",    \
-			       __func__, __FILE__, __LINE__);       \
-    } while (0)
+	do {								\
+		if (level <= ip_vs_get_debug_level())			\
+			printk(KERN_DEBUG				\
+			       pr_fmt("Enter: %s, %s line %i\n"),	\
+			       __func__, __FILE__, __LINE__);		\
+	} while (0)
+#define LeaveFunction(level)						\
+	do {								\
+		if (level <= ip_vs_get_debug_level())			\
+			printk(KERN_DEBUG				\
+			       pr_fmt("Leave: %s, %s line %i\n"),	\
+			       __func__, __FILE__, __LINE__);		\
+	} while (0)
 #else
 #define EnterFunction(level)   do {} while (0)
 #define LeaveFunction(level)   do {} while (0)
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 201b8ea3020d..c1781f80daf2 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -18,6 +18,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 77bfdfeb966e..4173d7b1d4cc 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -22,6 +22,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/interrupt.h>
 #include <linux/in.h>
 #include <linux/net.h>
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 8dddb17a947a..6811dcaca0f6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -24,6 +24,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/ip.h>
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 2d24d81474ce..e6133ea1ea4c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -18,6 +18,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index a9dac74bb13f..d0c0594d1e2e 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -35,6 +35,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 2eb2860dabb5..702b53ca937c 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -11,6 +11,10 @@
  * Changes:
  *
  */
+
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/jiffies.h>
 #include <linux/slab.h>
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 428edbf481cc..9c16a3f64c1b 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -22,6 +22,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 3eb5e2660c49..98fb185d890b 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -39,6 +39,9 @@
  * me to write this module.
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index c04ce56c7f0f..5f5e5f4bad5e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -37,6 +37,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index d0dadc8a65fd..4ecd5e19c39a 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -14,6 +14,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 694952db5026..2224478bdea8 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -31,6 +31,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index a01520e3d6b8..a95bc4021c90 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -13,6 +13,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
@@ -181,7 +184,7 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+	pr_debug("%s: %s\n", msg, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -215,7 +218,7 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+	pr_debug("%s: %s\n", msg, buf);
 }
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 79f56c1e7c19..c30b43c36cd7 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -10,6 +10,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/module.h>
@@ -138,7 +141,7 @@ ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 		sprintf(buf, "%s %pI4->%pI4",
 			pp->name, &ih->saddr, &ih->daddr);
 
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+	pr_debug("%s: %s\n", msg, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -156,7 +159,7 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 		sprintf(buf, "%s %pI6->%pI6",
 			pp->name, &ih->saddr, &ih->daddr);
 
-	printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf);
+	pr_debug("%s: %s\n", msg, buf);
 }
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8cba41802850..c36c80d3a2b4 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -13,6 +13,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>                  /* for tcphdr */
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index d2930a71084b..96ebe40bc537 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -13,6 +13,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 2d16ab7f8c1e..b01007e1c11e 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -19,6 +19,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index a46ad9e35016..87bc5ea0ef29 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -17,6 +17,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 20e4657d2f3b..4f745dd86dd8 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -35,6 +35,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 75709ebeb630..fb4d2d23f2fe 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -32,6 +32,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 5c48378a852f..cc04c99815fd 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -17,6 +17,9 @@
  *	Justin Ossevoort	:	Fix endian problem on sync message size.
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/inetdevice.h>
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 8e942565b47d..bbddfdb10db2 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -19,6 +19,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index f7d74ef1ecf9..c39ebb6c5a54 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -18,6 +18,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/net.h>
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 5874657af7f2..061e76dfdad9 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -13,6 +13,9 @@
  *
  */
 
+#define KMSG_COMPONENT "IPVS"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
 #include <linux/kernel.h>
 #include <linux/tcp.h>                  /* for tcphdr */
 #include <net/ip.h>
-- 
cgit v1.2.3


From 78ddb2785980fc01b129c3547463266cae9c6ca9 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Thu, 30 Jul 2009 23:23:34 +0100
Subject: ARM: PL093: Header file for PL093 SSMC PrimeCell

Header to define the standard registers for an
PL093 SSMC memory controller.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 include/linux/amba/pl093.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 include/linux/amba/pl093.h

(limited to 'include')

diff --git a/include/linux/amba/pl093.h b/include/linux/amba/pl093.h
new file mode 100644
index 000000000000..2983e3671adb
--- /dev/null
+++ b/include/linux/amba/pl093.h
@@ -0,0 +1,80 @@
+/* linux/amba/pl093.h
+ *
+ * Copyright (c) 2008 Simtec Electronics
+ *	http://armlinux.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+ * AMBA PL093 SSMC (synchronous static memory controller)
+ *  See DDI0236.pdf (r0p4) for more details
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#define SMB_BANK(x)	((x) * 0x20) /* each bank control set is 0x20 apart */
+
+/* Offsets for SMBxxxxRy registers */
+
+#define SMBIDCYR	(0x00)
+#define SMBWSTRDR	(0x04)
+#define SMBWSTWRR	(0x08)
+#define SMBWSTOENR	(0x0C)
+#define SMBWSTWENR	(0x10)
+#define SMBCR		(0x14)
+#define SMBSR		(0x18)
+#define SMBWSTBRDR	(0x1C)
+
+/* Masks for SMB registers */
+#define IDCY_MASK	(0xf)
+#define WSTRD_MASK	(0xf)
+#define WSTWR_MASK	(0xf)
+#define WSTOEN_MASK	(0xf)
+#define WSTWEN_MASK	(0xf)
+
+/* Notes from datasheet:
+ *	WSTOEN <= WSTRD
+ *	WSTWEN <= WSTWR
+ *
+ * WSTOEN is not used with nWAIT
+ */
+
+/* SMBCR bit definitions */
+#define SMBCR_BIWRITEEN		(1 << 21)
+#define SMBCR_ADDRVALIDWRITEEN	(1 << 20)
+#define SMBCR_SYNCWRITE		(1 << 17)
+#define SMBCR_BMWRITE		(1 << 16)
+#define SMBCR_WRAPREAD		(1 << 14)
+#define SMBCR_BIREADEN		(1 << 13)
+#define SMBCR_ADDRVALIDREADEN	(1 << 12)
+#define SMBCR_SYNCREAD		(1 << 9)
+#define SMBCR_BMREAD		(1 << 8)
+#define SMBCR_SMBLSPOL		(1 << 6)
+#define SMBCR_WP		(1 << 3)
+#define SMBCR_WAITEN		(1 << 2)
+#define SMBCR_WAITPOL		(1 << 1)
+#define SMBCR_RBLE		(1 << 0)
+
+#define SMBCR_BURSTLENWRITE_MASK	(3 << 18)
+#define SMBCR_BURSTLENWRITE_4		(0 << 18)
+#define SMBCR_BURSTLENWRITE_8		(1 << 18)
+#define SMBCR_BURSTLENWRITE_RESERVED	(2 << 18)
+#define SMBCR_BURSTLENWRITE_CONTINUOUS	(3 << 18)
+
+#define SMBCR_BURSTLENREAD_MASK		(3 << 10)
+#define SMBCR_BURSTLENREAD_4		(0 << 10)
+#define SMBCR_BURSTLENREAD_8		(1 << 10)
+#define SMBCR_BURSTLENREAD_16		(2 << 10)
+#define SMBCR_BURSTLENREAD_CONTINUOUS	(3 << 10)
+
+#define SMBCR_MW_MASK			(3 << 4)
+#define SMBCR_MW_8BIT			(0 << 4)
+#define SMBCR_MW_16BIT			(1 << 4)
+#define SMBCR_MW_M32BIT			(2 << 4)
+
+/* SSMC status registers */
+#define SSMCCSR		(0x200)
+#define SSMCCR		(0x204)
+#define SSMCITCR	(0x208)
+#define SSMCITIP	(0x20C)
+#define SSMCITIOP	(0x210)
-- 
cgit v1.2.3


From a33bc5c15154c835aae26f16e6a3a7d9ad4acb45 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 30 Jul 2009 18:52:15 -0700
Subject: xfrm: select sane defaults for xfrm[4|6] gc_thresh

Choose saner defaults for xfrm[4|6] gc_thresh values on init

Currently, the xfrm[4|6] code has hard-coded initial gc_thresh values
(set to 1024).  Given that the ipv4 and ipv6 routing caches are sized
dynamically at boot time, the static selections can be non-sensical.
This patch dynamically selects an appropriate gc threshold based on
the corresponding main routing table size, using the assumption that
we should in the worst case be able to handle as many connections as
the routing table can.

For ipv4, the maximum route cache size is 16 * the number of hash
buckets in the route cache.  Given that xfrm4 starts garbage
collection at the gc_thresh and prevents new allocations at 2 *
gc_thresh, we set gc_thresh to half the maximum route cache size.

For ipv6, its a bit trickier.  there is no maximum route cache size,
but the ipv6 dst_ops gc_thresh is statically set to 1024.  It seems
sane to select a simmilar gc_thresh for the xfrm6 code that is half
the number of hash buckets in the v6 route cache times 16 (like the v4
code does).

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h   |  6 ++++++
 include/net/xfrm.h      |  2 +-
 net/ipv4/route.c        |  2 +-
 net/ipv4/xfrm4_policy.c | 13 ++++++++++++-
 net/ipv6/ip6_fib.c      | 16 +++++-----------
 net/ipv6/xfrm6_policy.c | 15 +++++++++++++++
 6 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 7c5c0f79168a..15b492a9aa79 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -22,6 +22,12 @@
 #include <net/flow.h>
 #include <net/netlink.h>
 
+#ifdef CONFIG_IPV6_MULTIPLE_TABLES
+#define FIB6_TABLE_HASHSZ 256
+#else
+#define FIB6_TABLE_HASHSZ 1
+#endif
+
 struct rt6_info;
 
 struct fib6_config
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 9e3a3f4c1f60..223e90a44824 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1280,7 +1280,7 @@ struct xfrm6_tunnel {
 };
 
 extern void xfrm_init(void);
-extern void xfrm4_init(void);
+extern void xfrm4_init(int rt_hash_size);
 extern int xfrm_state_init(struct net *net);
 extern void xfrm_state_fini(struct net *net);
 extern void xfrm4_state_init(void);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 278f46f5011b..fafbe163e2b5 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3442,7 +3442,7 @@ int __init ip_rt_init(void)
 		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
 	xfrm_init();
-	xfrm4_init();
+	xfrm4_init(ip_rt_max_size);
 #endif
 	rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL);
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 26496babdf3a..1ba44742ebbf 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -290,10 +290,21 @@ static void __exit xfrm4_policy_fini(void)
 	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
 }
 
-void __init xfrm4_init(void)
+void __init xfrm4_init(int rt_max_size)
 {
 	xfrm4_state_init();
 	xfrm4_policy_init();
+	/*
+	 * Select a default value for the gc_thresh based on the main route
+	 * table hash size.  It seems to me the worst case scenario is when
+	 * we have ipsec operating in transport mode, in which we create a
+	 * dst_entry per socket.  The xfrm gc algorithm starts trying to remove
+	 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh
+	 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2.
+	 * That will let us store an ipsec connection per route table entry,
+	 * and start cleaning when were 1/2 full
+	 */
+	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 						xfrm4_policy_table);
 }
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 52ee1dced2ff..0e93ca56eb69 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -164,12 +164,6 @@ static __inline__ void rt6_release(struct rt6_info *rt)
 		dst_free(&rt->u.dst);
 }
 
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-#define FIB_TABLE_HASHSZ 256
-#else
-#define FIB_TABLE_HASHSZ 1
-#endif
-
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
 {
 	unsigned int h;
@@ -180,7 +174,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
 	 */
 	rwlock_init(&tb->tb6_lock);
 
-	h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
+	h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
 
 	/*
 	 * No protection necessary, this is the only list mutatation
@@ -231,7 +225,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
 
 	if (id == 0)
 		id = RT6_TABLE_MAIN;
-	h = id & (FIB_TABLE_HASHSZ - 1);
+	h = id & (FIB6_TABLE_HASHSZ - 1);
 	rcu_read_lock();
 	head = &net->ipv6.fib_table_hash[h];
 	hlist_for_each_entry_rcu(tb, node, head, tb6_hlist) {
@@ -382,7 +376,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
 	arg.net = net;
 	w->args = &arg;
 
-	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
+	for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) {
 		e = 0;
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry(tb, node, head, tb6_hlist) {
@@ -1368,7 +1362,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 	unsigned int h;
 
 	rcu_read_lock();
-	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
 		head = &net->ipv6.fib_table_hash[h];
 		hlist_for_each_entry_rcu(table, node, head, tb6_hlist) {
 			write_lock_bh(&table->tb6_lock);
@@ -1483,7 +1477,7 @@ static int fib6_net_init(struct net *net)
 	if (!net->ipv6.rt6_stats)
 		goto out_timer;
 
-	net->ipv6.fib_table_hash = kcalloc(FIB_TABLE_HASHSZ,
+	net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ,
 					   sizeof(*net->ipv6.fib_table_hash),
 					   GFP_KERNEL);
 	if (!net->ipv6.fib_table_hash)
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 4acc308eac7f..611cffcf554f 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -323,6 +323,7 @@ static struct ctl_table_header *sysctl_hdr;
 int __init xfrm6_init(void)
 {
 	int ret;
+	unsigned int gc_thresh;
 
 	ret = xfrm6_policy_init();
 	if (ret)
@@ -331,6 +332,20 @@ int __init xfrm6_init(void)
 	ret = xfrm6_state_init();
 	if (ret)
 		goto out_policy;
+	/*
+	 * We need a good default value for the xfrm6 gc threshold.
+	 * In ipv4 we set it to the route hash table size * 8, which
+	 * is half the size of the maximaum route cache for ipv4.  It
+	 * would be good to do the same thing for v6, except the table is
+	 * constructed differently here.  Here each table for a net namespace
+	 * can have FIB_TABLE_HASHSZ entries, so lets go with the same
+	 * computation that we used for ipv4 here.  Also, lets keep the initial
+	 * gc_thresh to a minimum of 1024, since, the ipv6 route cache defaults
+	 * to that as a minimum as well
+	 */
+	gc_thresh = FIB6_TABLE_HASHSZ * 8;
+	xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
 						xfrm6_policy_table);
 out:
-- 
cgit v1.2.3


From cbb4f2646d77b536ed2b1500ef6641083228ed8f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 31 Jul 2009 08:55:48 +0200
Subject: io context: fix ref counting

Commit d9c7d394a8ebacb60097b192939ae9f15235225e
("block: prevent possible io_context->refcount overflow") mistakenly
changed atomic_inc(&ioc->nr_tasks) to atomic_long_inc(&ioc->refcount).

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/iocontext.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index dd05434fa45f..4da4a75c3f1e 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -92,7 +92,7 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
 	 * a race).
 	 */
 	if (ioc && atomic_long_inc_not_zero(&ioc->refcount)) {
-		atomic_long_inc(&ioc->refcount);
+		atomic_inc(&ioc->nr_tasks);
 		return ioc;
 	}
 
-- 
cgit v1.2.3


From 6de7e356faf54aa75de5b624bbce28a5b776dfa8 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Thu, 18 Jun 2009 10:19:12 +0200
Subject: lib/scatterlist: add a flags to signalize mapping direction

sg_miter_start() is currently unaware of the direction of the copy
process (to or from the scatter list). It is important to know the
direction because the page has to be flushed in case the data written
is seen on a different mapping in user land on cache incoherent
architectures.

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 include/linux/scatterlist.h |  2 ++
 lib/scatterlist.c           | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index e5996984ddd0..9aaf5bfdad1a 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -242,6 +242,8 @@ size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
  */
 
 #define SG_MITER_ATOMIC		(1 << 0)	 /* use kmap_atomic */
+#define SG_MITER_TO_SG		(1 << 1)	/* flush back to phys on unmap */
+#define SG_MITER_FROM_SG	(1 << 2)	/* nop */
 
 struct sg_mapping_iter {
 	/* the following three fields can be accessed directly */
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a295e404e908..0d475d8167bf 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -314,6 +314,7 @@ void sg_miter_start(struct sg_mapping_iter *miter, struct scatterlist *sgl,
 	miter->__sg = sgl;
 	miter->__nents = nents;
 	miter->__offset = 0;
+	WARN_ON(!(flags & (SG_MITER_TO_SG | SG_MITER_FROM_SG)));
 	miter->__flags = flags;
 }
 EXPORT_SYMBOL(sg_miter_start);
@@ -394,6 +395,9 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
 	if (miter->addr) {
 		miter->__offset += miter->consumed;
 
+		if (miter->__flags & SG_MITER_TO_SG)
+			flush_kernel_dcache_page(miter->page);
+
 		if (miter->__flags & SG_MITER_ATOMIC) {
 			WARN_ON(!irqs_disabled());
 			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
@@ -426,8 +430,14 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
 	unsigned long flags;
+	unsigned int sg_flags = SG_MITER_ATOMIC;
+
+	if (to_buffer)
+		sg_flags |= SG_MITER_FROM_SG;
+	else
+		sg_flags |= SG_MITER_TO_SG;
 
-	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+	sg_miter_start(&miter, sgl, nents, sg_flags);
 
 	local_irq_save(flags);
 
@@ -438,10 +448,8 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 		if (to_buffer)
 			memcpy(buf + offset, miter.addr, len);
-		else {
+		else
 			memcpy(miter.addr, buf + offset, len);
-			flush_kernel_dcache_page(miter.page);
-		}
 
 		offset += len;
 	}
-- 
cgit v1.2.3


From 4b2a108cd0d34880fe9d932258ca5b2ccebcd05e Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Mon, 22 Jun 2009 09:18:05 +0200
Subject: cb710: use SG_MITER_TO_SG/SG_MITER_FROM_SG
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

the code allready uses flush_kernel_dcache_page(). This patch updates the
driver to the recent sg API changes which require that either SG_MITER_TO_SG
or SG_MITER_FROM_SG is set. SG_MITER_TO_SG calls flush_kernel_dcache_page()
in sg_mitter_stop()

Signed-off-by: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Acked-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Pierre Ossman <pierre@ossman.eu>
---
 drivers/misc/cb710/sgbuf2.c  |  4 ----
 drivers/mmc/host/cb710-mmc.c |  6 +++---
 include/linux/cb710.h        | 29 +++--------------------------
 3 files changed, 6 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c
index d38a7acdb6ec..d019746551f3 100644
--- a/drivers/misc/cb710/sgbuf2.c
+++ b/drivers/misc/cb710/sgbuf2.c
@@ -114,7 +114,6 @@ static void sg_dwiter_write_slow(struct sg_mapping_iter *miter, uint32_t data)
 		if (!left)
 			return;
 		addr += len;
-		flush_kernel_dcache_page(miter->page);
 	} while (sg_dwiter_next(miter));
 }
 
@@ -142,9 +141,6 @@ void cb710_sg_dwiter_write_next_block(struct sg_mapping_iter *miter, uint32_t da
 			return;
 	} else
 		sg_dwiter_write_slow(miter, data);
-
-	if (miter->length == miter->consumed)
-		flush_kernel_dcache_page(miter->page);
 }
 EXPORT_SYMBOL_GPL(cb710_sg_dwiter_write_next_block);
 
diff --git a/drivers/mmc/host/cb710-mmc.c b/drivers/mmc/host/cb710-mmc.c
index 11efefb1af51..4e72964a7b43 100644
--- a/drivers/mmc/host/cb710-mmc.c
+++ b/drivers/mmc/host/cb710-mmc.c
@@ -278,7 +278,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data)
 	if (unlikely(data->blksz & 15 && (data->blocks != 1 || data->blksz != 8)))
 		return -EINVAL;
 
-	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+	sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_TO_SG);
 
 	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
 		15, CB710_MMC_C2_READ_PIO_SIZE_MASK);
@@ -307,7 +307,7 @@ static int cb710_mmc_receive(struct cb710_slot *slot, struct mmc_data *data)
 			goto out;
 	}
 out:
-	cb710_sg_miter_stop_writing(&miter);
+	sg_miter_stop(&miter);
 	return err;
 }
 
@@ -322,7 +322,7 @@ static int cb710_mmc_send(struct cb710_slot *slot, struct mmc_data *data)
 	if (unlikely(data->blocks > 1 && data->blksz & 15))
 		return -EINVAL;
 
-	sg_miter_start(&miter, data->sg, data->sg_len, 0);
+	sg_miter_start(&miter, data->sg, data->sg_len, SG_MITER_FROM_SG);
 
 	cb710_modify_port_8(slot, CB710_MMC_CONFIG2_PORT,
 		0, CB710_MMC_C2_READ_PIO_SIZE_MASK);
diff --git a/include/linux/cb710.h b/include/linux/cb710.h
index 63bc9a4d2926..8cc10411bab2 100644
--- a/include/linux/cb710.h
+++ b/include/linux/cb710.h
@@ -140,29 +140,6 @@ void cb710_dump_regs(struct cb710_chip *chip, unsigned dump);
 #include <linux/highmem.h>
 #include <linux/scatterlist.h>
 
-/**
- * cb710_sg_miter_stop_writing - stop mapping iteration after writing
- * @miter: sg mapping iter to be stopped
- *
- * Description:
- *   Stops mapping iterator @miter.  @miter should have been started
- *   started using sg_miter_start().  A stopped iteration can be
- *   resumed by calling sg_miter_next() on it.  This is useful when
- *   resources (kmap) need to be released during iteration.
- *
- *   This is a convenience wrapper that will be optimized out for arches
- *   that don't need flush_kernel_dcache_page().
- *
- * Context:
- *   IRQ disabled if the SG_MITER_ATOMIC is set.  Don't care otherwise.
- */
-static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter)
-{
-	if (miter->page)
-		flush_kernel_dcache_page(miter->page);
-	sg_miter_stop(miter);
-}
-
 /*
  * 32-bit PIO mapping sg iterator
  *
@@ -171,12 +148,12 @@ static inline void cb710_sg_miter_stop_writing(struct sg_mapping_iter *miter)
  * without DMA support).
  *
  * Best-case reading (transfer from device):
- *   sg_miter_start();
+ *   sg_miter_start(, SG_MITER_TO_SG);
  *   cb710_sg_dwiter_write_from_io();
- *   cb710_sg_miter_stop_writing();
+ *   sg_miter_stop();
  *
  * Best-case writing (transfer to device):
- *   sg_miter_start();
+ *   sg_miter_start(, SG_MITER_FROM_SG);
  *   cb710_sg_dwiter_read_to_io();
  *   sg_miter_stop();
  */
-- 
cgit v1.2.3


From c7121843685de2bf7f3afd3ae1d6a146010bf1fc Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Tue, 28 Jul 2009 14:09:55 -0700
Subject: clocksource: Save mult_orig in clocksource_disable()

To fix the common case where ->enable() does not set up
mult, make sure mult_orig is saved in mult on disable.

Also add comments to explain why we do this.

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: johnstul@us.ibm.com
Cc: lethal@linux-sh.org
Cc: akpm@linux-foundation.org
LKML-Reference: <20090618152432.10136.9932.sendpatchset@rx1.opensource.se>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index c56457c8334e..1219be4fb42e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -293,7 +293,12 @@ static inline int clocksource_enable(struct clocksource *cs)
 	if (cs->enable)
 		ret = cs->enable(cs);
 
-	/* save mult_orig on enable */
+	/*
+	 * The frequency may have changed while the clocksource
+	 * was disabled. If so the code in ->enable() must update
+	 * the mult value to reflect the new frequency. Make sure
+	 * mult_orig follows this change.
+	 */
 	cs->mult_orig = cs->mult;
 
 	return ret;
@@ -309,6 +314,13 @@ static inline int clocksource_enable(struct clocksource *cs)
  */
 static inline void clocksource_disable(struct clocksource *cs)
 {
+	/*
+	 * Save mult_orig in mult so clocksource_enable() can
+	 * restore the value regardless if ->enable() updates
+	 * the value of mult or not.
+	 */
+	cs->mult = cs->mult_orig;
+
 	if (cs->disable)
 		cs->disable(cs);
 }
-- 
cgit v1.2.3


From 7c958e32649e0c35801762878fb0b6da8c55a515 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 31 Jul 2009 11:49:11 -0400
Subject: block: Add a wrapper for setting minimum request size without a queue

Introduce blk_limits_io_min() and make blk_queue_io_min() call it.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 31 ++++++++++++++++++++++++-------
 include/linux/blkdev.h |  1 +
 2 files changed, 25 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8e86e2d2b147..1f7197434166 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -383,6 +383,29 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset)
 }
 EXPORT_SYMBOL(blk_queue_alignment_offset);
 
+/**
+ * blk_limits_io_min - set minimum request size for a device
+ * @limits: the queue limits
+ * @min:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Some devices have an internal block size bigger than the reported
+ *   hardware sector size.  This function can be used to signal the
+ *   smallest I/O the device can perform without incurring a performance
+ *   penalty.
+ */
+void blk_limits_io_min(struct queue_limits *limits, unsigned int min)
+{
+	limits->io_min = min;
+
+	if (limits->io_min < limits->logical_block_size)
+		limits->io_min = limits->logical_block_size;
+
+	if (limits->io_min < limits->physical_block_size)
+		limits->io_min = limits->physical_block_size;
+}
+EXPORT_SYMBOL(blk_limits_io_min);
+
 /**
  * blk_queue_io_min - set minimum request size for the queue
  * @q:	the request queue for the device
@@ -396,13 +419,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset);
  */
 void blk_queue_io_min(struct request_queue *q, unsigned int min)
 {
-	q->limits.io_min = min;
-
-	if (q->limits.io_min < q->limits.logical_block_size)
-		q->limits.io_min = q->limits.logical_block_size;
-
-	if (q->limits.io_min < q->limits.physical_block_size)
-		q->limits.io_min = q->limits.physical_block_size;
+	blk_limits_io_min(&q->limits, min);
 }
 EXPORT_SYMBOL(blk_queue_io_min);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e7cb5dbf6c26..69103e053c92 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -913,6 +913,7 @@ extern void blk_queue_logical_block_size(struct request_queue *, unsigned short)
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
+extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
-- 
cgit v1.2.3


From 9f498cc5be7e013d8d6e4c616980ed0ffc8680d2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 23 Jul 2009 14:46:33 +0200
Subject: perf_counter: Full task tracing

In order to be able to distinguish between no samples due to
inactivity and no samples due to task ended, Arjan asked for
PERF_EVENT_EXIT events. This is useful to the boot delay
instrumentation (bootchart) app.

This patch changes the PERF_EVENT_FORK to be emitted on every
clone, and adds PERF_EVENT_EXIT to be emitted on task exit,
after the task's counters have been closed.

This task tracing is controlled through: attr.comm || attr.mmap
and through the new attr.task field.

Suggested-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Anton Blanchard <anton@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
[ cleaned up perf_counter.h a bit ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 13 ++++++-
 kernel/fork.c                |  4 +-
 kernel/perf_counter.c        | 87 +++++++++++++++++++++++++++++---------------
 3 files changed, 71 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index bd15d7a5f5ce..e604e6ef72dd 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -181,8 +181,9 @@ struct perf_counter_attr {
 				freq           :  1, /* use freq, not period  */
 				inherit_stat   :  1, /* per task counts       */
 				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
 
-				__reserved_1   : 51;
+				__reserved_1   : 50;
 
 	__u32			wakeup_events;	/* wakeup every n events */
 	__u32			__reserved_2;
@@ -308,6 +309,15 @@ enum perf_event_type {
 	 */
 	PERF_EVENT_COMM			= 3,
 
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 * };
+	 */
+	PERF_EVENT_EXIT			= 4,
+
 	/*
 	 * struct {
 	 *	struct perf_event_header	header;
@@ -323,6 +333,7 @@ enum perf_event_type {
 	 * struct {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
diff --git a/kernel/fork.c b/kernel/fork.c
index 29b532e718f7..466531eb92cc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
+	perf_counter_fork(p);
 	return p;
 
 bad_fork_free_pid:
@@ -1410,9 +1411,6 @@ long do_fork(unsigned long clone_flags,
 			init_completion(&vfork);
 		}
 
-		if (!(clone_flags & CLONE_THREAD))
-			perf_counter_fork(p);
-
 		audit_finish_fork(p);
 		tracehook_report_clone(regs, clone_flags, nr, p);
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 48471d75ae01..199ed4771315 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1;
 static atomic_t nr_counters __read_mostly;
 static atomic_t nr_mmap_counters __read_mostly;
 static atomic_t nr_comm_counters __read_mostly;
+static atomic_t nr_task_counters __read_mostly;
 
 /*
  * perf counter paranoia level:
@@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter)
 			atomic_dec(&nr_mmap_counters);
 		if (counter->attr.comm)
 			atomic_dec(&nr_comm_counters);
+		if (counter->attr.task)
+			atomic_dec(&nr_task_counters);
 	}
 
 	if (counter->destroy)
@@ -2831,10 +2834,12 @@ perf_counter_read_event(struct perf_counter *counter,
 }
 
 /*
- * fork tracking
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
  */
 
-struct perf_fork_event {
+struct perf_task_event {
 	struct task_struct	*task;
 
 	struct {
@@ -2842,37 +2847,42 @@ struct perf_fork_event {
 
 		u32				pid;
 		u32				ppid;
+		u32				tid;
+		u32				ptid;
 	} event;
 };
 
-static void perf_counter_fork_output(struct perf_counter *counter,
-				     struct perf_fork_event *fork_event)
+static void perf_counter_task_output(struct perf_counter *counter,
+				     struct perf_task_event *task_event)
 {
 	struct perf_output_handle handle;
-	int size = fork_event->event.header.size;
-	struct task_struct *task = fork_event->task;
+	int size = task_event->event.header.size;
+	struct task_struct *task = task_event->task;
 	int ret = perf_output_begin(&handle, counter, size, 0, 0);
 
 	if (ret)
 		return;
 
-	fork_event->event.pid = perf_counter_pid(counter, task);
-	fork_event->event.ppid = perf_counter_pid(counter, task->real_parent);
+	task_event->event.pid = perf_counter_pid(counter, task);
+	task_event->event.ppid = perf_counter_pid(counter, task->real_parent);
 
-	perf_output_put(&handle, fork_event->event);
+	task_event->event.tid = perf_counter_tid(counter, task);
+	task_event->event.ptid = perf_counter_tid(counter, task->real_parent);
+
+	perf_output_put(&handle, task_event->event);
 	perf_output_end(&handle);
 }
 
-static int perf_counter_fork_match(struct perf_counter *counter)
+static int perf_counter_task_match(struct perf_counter *counter)
 {
-	if (counter->attr.comm || counter->attr.mmap)
+	if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
 		return 1;
 
 	return 0;
 }
 
-static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
-				  struct perf_fork_event *fork_event)
+static void perf_counter_task_ctx(struct perf_counter_context *ctx,
+				  struct perf_task_event *task_event)
 {
 	struct perf_counter *counter;
 
@@ -2881,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_fork_match(counter))
-			perf_counter_fork_output(counter, fork_event);
+		if (perf_counter_task_match(counter))
+			perf_counter_task_output(counter, task_event);
 	}
 	rcu_read_unlock();
 }
 
-static void perf_counter_fork_event(struct perf_fork_event *fork_event)
+static void perf_counter_task_event(struct perf_task_event *task_event)
 {
 	struct perf_cpu_context *cpuctx;
 	struct perf_counter_context *ctx;
 
 	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_fork_ctx(&cpuctx->ctx, fork_event);
+	perf_counter_task_ctx(&cpuctx->ctx, task_event);
 	put_cpu_var(perf_cpu_context);
 
 	rcu_read_lock();
@@ -2903,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event)
 	 */
 	ctx = rcu_dereference(current->perf_counter_ctxp);
 	if (ctx)
-		perf_counter_fork_ctx(ctx, fork_event);
+		perf_counter_task_ctx(ctx, task_event);
 	rcu_read_unlock();
 }
 
-void perf_counter_fork(struct task_struct *task)
+static void perf_counter_task(struct task_struct *task, int new)
 {
-	struct perf_fork_event fork_event;
+	struct perf_task_event task_event;
 
 	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters))
+	    !atomic_read(&nr_mmap_counters) &&
+	    !atomic_read(&nr_task_counters))
 		return;
 
-	fork_event = (struct perf_fork_event){
+	task_event = (struct perf_task_event){
 		.task	= task,
 		.event  = {
 			.header = {
-				.type = PERF_EVENT_FORK,
+				.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
 				.misc = 0,
-				.size = sizeof(fork_event.event),
+				.size = sizeof(task_event.event),
 			},
 			/* .pid  */
 			/* .ppid */
+			/* .tid  */
+			/* .ptid */
 		},
 	};
 
-	perf_counter_fork_event(&fork_event);
+	perf_counter_task_event(&task_event);
+}
+
+void perf_counter_fork(struct task_struct *task)
+{
+	perf_counter_task(task, 1);
 }
 
 /*
@@ -3887,6 +3905,8 @@ done:
 			atomic_inc(&nr_mmap_counters);
 		if (counter->attr.comm)
 			atomic_inc(&nr_comm_counters);
+		if (counter->attr.task)
+			atomic_inc(&nr_task_counters);
 	}
 
 	return counter;
@@ -4248,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child)
 	struct perf_counter_context *child_ctx;
 	unsigned long flags;
 
-	if (likely(!child->perf_counter_ctxp))
+	if (likely(!child->perf_counter_ctxp)) {
+		perf_counter_task(child, 0);
 		return;
+	}
 
 	local_irq_save(flags);
 	/*
@@ -4267,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child)
 	 * incremented the context's refcount before we do put_ctx below.
 	 */
 	spin_lock(&child_ctx->lock);
-	child->perf_counter_ctxp = NULL;
 	/*
 	 * If this context is a clone; unclone it so it can't get
 	 * swapped to another process while we're removing all
 	 * the counters from it.
 	 */
 	unclone_ctx(child_ctx);
-	spin_unlock(&child_ctx->lock);
-	local_irq_restore(flags);
+	spin_unlock_irqrestore(&child_ctx->lock, flags);
+
+	/*
+	 * Report the task dead after unscheduling the counters so that we
+	 * won't get any samples after PERF_EVENT_EXIT. We can however still
+	 * get a few PERF_EVENT_READ events.
+	 */
+	perf_counter_task(child, 0);
+
+	child->perf_counter_ctxp = NULL;
 
 	/*
 	 * We can recurse on the same lock type through:
-- 
cgit v1.2.3


From 716a42348cdaf04534b15fbdc9c83e25baebfed5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 24 Jul 2009 20:05:23 +0200
Subject: sched: Fix cond_resched_lock() in !CONFIG_PREEMPT

The might_sleep() test inside cond_resched_lock() assumes the
spinlock is held and then preemption is disabled. This is true
with CONFIG_PREEMPT but the preempt_count() doesn't change
otherwise.

Check by starting from the appropriate preempt offset depending
on the config.

Reported-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1248458723-12146-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbbfca69aa4a..c472414953bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2293,8 +2293,14 @@ extern int _cond_resched(void);
 
 extern int __cond_resched_lock(spinlock_t *lock);
 
+#ifdef CONFIG_PREEMPT
+#define PREEMPT_LOCK_OFFSET	PREEMPT_OFFSET
+#else
+#define PREEMPT_LOCK_OFFSET	0
+#endif
+
 #define cond_resched_lock(lock) ({				\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_OFFSET);	\
+	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
 	__cond_resched_lock(lock);				\
 })
 
-- 
cgit v1.2.3


From 3f029d3c6d62068d59301d90c18dbde8ee402107 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Wed, 29 Jul 2009 11:08:47 -0400
Subject: sched: Enhance the pre/post scheduling logic

We currently have an explicit "needs_post" vtable method which
returns a stack variable for whether we should later run
post-schedule.  This leads to an awkward exchange of the
variable as it bubbles back up out of the context switch. Peter
Zijlstra observed that this information could be stored in the
run-queue itself instead of handled on the stack.

Therefore, we revert to the method of having context_switch
return void, and update an internal rq->post_schedule variable
when we require further processing.

In addition, we fix a race condition where we try to access
current->sched_class without holding the rq->lock.  This is
technically racy, as the sched-class could change out from
under us.  Instead, we reference the per-rq post_schedule
variable with the runqueue unlocked, but with preemption
disabled to see if we need to reacquire the rq->lock.

Finally, we clean the code up slightly by removing the #ifdef
CONFIG_SMP conditionals from the schedule() call, and implement
some inline helper functions instead.

This patch passes checkpatch, and rt-migrate.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090729150422.17691.55590.stgit@dev.haskins.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 -
 kernel/sched.c        | 82 +++++++++++++++++++++++++++++++--------------------
 kernel/sched_rt.c     | 31 +++++++------------
 3 files changed, 61 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c35bc29d2a9..195d72d5c102 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,7 +1047,6 @@ struct sched_class {
 			      struct rq *busiest, struct sched_domain *sd,
 			      enum cpu_idle_type idle);
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
-	int (*needs_post_schedule) (struct rq *this_rq);
 	void (*post_schedule) (struct rq *this_rq);
 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index a030d4514cdc..613fee54fc89 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -616,6 +616,7 @@ struct rq {
 
 	unsigned char idle_at_tick;
 	/* For active balancing */
+	int post_schedule;
 	int active_balance;
 	int push_cpu;
 	/* cpu of this runqueue: */
@@ -2839,17 +2840,11 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
  * with the lock held can cause deadlocks; see schedule() for
  * details.)
  */
-static int finish_task_switch(struct rq *rq, struct task_struct *prev)
+static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct mm_struct *mm = rq->prev_mm;
 	long prev_state;
-	int post_schedule = 0;
-
-#ifdef CONFIG_SMP
-	if (current->sched_class->needs_post_schedule)
-		post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
 
 	rq->prev_mm = NULL;
 
@@ -2880,10 +2875,44 @@ static int finish_task_switch(struct rq *rq, struct task_struct *prev)
 		kprobe_flush_task(prev);
 		put_task_struct(prev);
 	}
+}
+
+#ifdef CONFIG_SMP
+
+/* assumes rq->lock is held */
+static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
+{
+	if (prev->sched_class->pre_schedule)
+		prev->sched_class->pre_schedule(rq, prev);
+}
+
+/* rq->lock is NOT held, but preemption is disabled */
+static inline void post_schedule(struct rq *rq)
+{
+	if (rq->post_schedule) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rq->lock, flags);
+		if (rq->curr->sched_class->post_schedule)
+			rq->curr->sched_class->post_schedule(rq);
+		spin_unlock_irqrestore(&rq->lock, flags);
+
+		rq->post_schedule = 0;
+	}
+}
+
+#else
 
-	return post_schedule;
+static inline void pre_schedule(struct rq *rq, struct task_struct *p)
+{
+}
+
+static inline void post_schedule(struct rq *rq)
+{
 }
 
+#endif
+
 /**
  * schedule_tail - first thing a freshly forked thread must call.
  * @prev: the thread we just switched away from.
@@ -2892,14 +2921,14 @@ asmlinkage void schedule_tail(struct task_struct *prev)
 	__releases(rq->lock)
 {
 	struct rq *rq = this_rq();
-	int post_schedule;
 
-	post_schedule = finish_task_switch(rq, prev);
+	finish_task_switch(rq, prev);
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	/*
+	 * FIXME: do we need to worry about rq being invalidated by the
+	 * task_switch?
+	 */
+	post_schedule(rq);
 
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
 	/* In this case, finish_task_switch does not reenable preemption */
@@ -2913,7 +2942,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  * context_switch - switch to the new MM and the new
  * thread's register state.
  */
-static inline int
+static inline void
 context_switch(struct rq *rq, struct task_struct *prev,
 	       struct task_struct *next)
 {
@@ -2960,7 +2989,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 * CPUs since it called schedule(), thus the 'rq' on its stack
 	 * frame will be invalid.
 	 */
-	return finish_task_switch(this_rq(), prev);
+	finish_task_switch(this_rq(), prev);
 }
 
 /*
@@ -5371,7 +5400,6 @@ asmlinkage void __sched schedule(void)
 {
 	struct task_struct *prev, *next;
 	unsigned long *switch_count;
-	int post_schedule = 0;
 	struct rq *rq;
 	int cpu;
 
@@ -5403,10 +5431,7 @@ need_resched_nonpreemptible:
 		switch_count = &prev->nvcsw;
 	}
 
-#ifdef CONFIG_SMP
-	if (prev->sched_class->pre_schedule)
-		prev->sched_class->pre_schedule(rq, prev);
-#endif
+	pre_schedule(rq, prev);
 
 	if (unlikely(!rq->nr_running))
 		idle_balance(cpu, rq);
@@ -5422,25 +5447,17 @@ need_resched_nonpreemptible:
 		rq->curr = next;
 		++*switch_count;
 
-		post_schedule = context_switch(rq, prev, next); /* unlocks the rq */
+		context_switch(rq, prev, next); /* unlocks the rq */
 		/*
 		 * the context switch might have flipped the stack from under
 		 * us, hence refresh the local variables.
 		 */
 		cpu = smp_processor_id();
 		rq = cpu_rq(cpu);
-	} else {
-#ifdef CONFIG_SMP
-		if (current->sched_class->needs_post_schedule)
-			post_schedule = current->sched_class->needs_post_schedule(rq);
-#endif
+	} else
 		spin_unlock_irq(&rq->lock);
-	}
 
-#ifdef CONFIG_SMP
-	if (post_schedule)
-		current->sched_class->post_schedule(rq);
-#endif
+	post_schedule(rq);
 
 	if (unlikely(reacquire_kernel_lock(current) < 0))
 		goto need_resched_nonpreemptible;
@@ -9403,6 +9420,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 		rq->sd = NULL;
 		rq->rd = NULL;
+		rq->post_schedule = 0;
 		rq->active_balance = 0;
 		rq->next_balance = jiffies;
 		rq->push_cpu = 0;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3918e01994e0..a8f89bc3e5eb 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1056,6 +1056,11 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
 	return p;
 }
 
+static inline int has_pushable_tasks(struct rq *rq)
+{
+	return !plist_head_empty(&rq->rt.pushable_tasks);
+}
+
 static struct task_struct *pick_next_task_rt(struct rq *rq)
 {
 	struct task_struct *p = _pick_next_task_rt(rq);
@@ -1064,6 +1069,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	if (p)
 		dequeue_pushable_task(rq, p);
 
+	/*
+	 * We detect this state here so that we can avoid taking the RQ
+	 * lock again later if there is no need to push
+	 */
+	rq->post_schedule = has_pushable_tasks(rq);
+
 	return p;
 }
 
@@ -1262,11 +1273,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 	return lowest_rq;
 }
 
-static inline int has_pushable_tasks(struct rq *rq)
-{
-	return !plist_head_empty(&rq->rt.pushable_tasks);
-}
-
 static struct task_struct *pick_next_pushable_task(struct rq *rq)
 {
 	struct task_struct *p;
@@ -1466,23 +1472,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
 		pull_rt_task(rq);
 }
 
-/*
- * assumes rq->lock is held
- */
-static int needs_post_schedule_rt(struct rq *rq)
-{
-	return has_pushable_tasks(rq);
-}
-
 static void post_schedule_rt(struct rq *rq)
 {
-	/*
-	 * This is only called if needs_post_schedule_rt() indicates that
-	 * we need to push tasks away
-	 */
-	spin_lock_irq(&rq->lock);
 	push_rt_tasks(rq);
-	spin_unlock_irq(&rq->lock);
 }
 
 /*
@@ -1758,7 +1750,6 @@ static const struct sched_class rt_sched_class = {
 	.rq_online              = rq_online_rt,
 	.rq_offline             = rq_offline_rt,
 	.pre_schedule		= pre_schedule_rt,
-	.needs_post_schedule	= needs_post_schedule_rt,
 	.post_schedule		= post_schedule_rt,
 	.task_wake_up		= task_wake_up_rt,
 	.switched_from		= switched_from_rt,
-- 
cgit v1.2.3


From f607c6685774811b8112e124f10a053d77015485 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 20 Jul 2009 19:16:29 +0200
Subject: lockdep: Introduce lockdep_assert_held()

Add a lockdep helper to validate that we indeed are the owner
of a lock.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h |  8 ++++++++
 kernel/lockdep.c        | 33 +++++++++++++++++++++++++++++++++
 kernel/sched.c          |  2 ++
 3 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 12aabfcb45f6..a6d5e5e4d084 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -296,6 +296,10 @@ extern void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 extern void lock_release(struct lockdep_map *lock, int nested,
 			 unsigned long ip);
 
+#define lockdep_is_held(lock)	lock_is_held(&(lock)->dep_map)
+
+extern int lock_is_held(struct lockdep_map *lock);
+
 extern void lock_set_class(struct lockdep_map *lock, const char *name,
 			   struct lock_class_key *key, unsigned int subclass,
 			   unsigned long ip);
@@ -314,6 +318,8 @@ extern void lockdep_trace_alloc(gfp_t mask);
 
 #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
 
+#define lockdep_assert_held(l)	WARN_ON(debug_locks && !lockdep_is_held(l))
+
 #else /* !LOCKDEP */
 
 static inline void lockdep_off(void)
@@ -358,6 +364,8 @@ struct lock_class_key { };
 
 #define lockdep_depth(tsk)	(0)
 
+#define lockdep_assert_held(l)			do { } while (0)
+
 #endif /* !LOCKDEP */
 
 #ifdef CONFIG_LOCK_STAT
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 4b6cebe8ab31..28914a509914 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3059,6 +3059,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
 	check_chain_key(curr);
 }
 
+static int __lock_is_held(struct lockdep_map *lock)
+{
+	struct task_struct *curr = current;
+	int i;
+
+	for (i = 0; i < curr->lockdep_depth; i++) {
+		if (curr->held_locks[i].instance == lock)
+			return 1;
+	}
+
+	return 0;
+}
+
 /*
  * Check whether we follow the irq-flags state precisely:
  */
@@ -3160,6 +3173,26 @@ void lock_release(struct lockdep_map *lock, int nested,
 }
 EXPORT_SYMBOL_GPL(lock_release);
 
+int lock_is_held(struct lockdep_map *lock)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	if (unlikely(current->lockdep_recursion))
+		return ret;
+
+	raw_local_irq_save(flags);
+	check_flags(flags);
+
+	current->lockdep_recursion = 1;
+	ret = __lock_is_held(lock);
+	current->lockdep_recursion = 0;
+	raw_local_irq_restore(flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(lock_is_held);
+
 void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
 {
 	current->lockdep_reclaim_gfp = gfp_mask;
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..2c75f7daa439 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6609,6 +6609,8 @@ int cond_resched_lock(spinlock_t *lock)
 	int resched = should_resched();
 	int ret = 0;
 
+	lockdep_assert_held(lock);
+
 	if (spin_needbreak(lock) || resched) {
 		spin_unlock(lock);
 		if (resched)
-- 
cgit v1.2.3


From bb97a91e2549a7f2df9c21d32542582f549ab3ec Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 20 Jul 2009 19:15:35 +0200
Subject: lockdep: Deal with many similar locks

spin_lock_nest_lock() allows to take many instances of the same
class, this can easily lead to overflow of MAX_LOCK_DEPTH.

To avoid this overflow, we'll stop accounting instances but
start reference counting the class in the held_lock structure.

[ We could maintain a list of instances, if we'd move the hlock
  stuff into __lock_acquired(), but that would require
  significant modifications to the current code. ]

We restrict this mode to spin_lock_nest_lock() only, because it
degrades the lockdep quality due to lost of instance.

For lockstat this means we don't track lock statistics for any
but the first lock in the series.

Currently nesting is limited to 11 bits because that was the
spare space available in held_lock. This yields a 2048
instances maximium.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h |  4 ++-
 kernel/lockdep.c        | 89 ++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 80 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a6d5e5e4d084..47d42eff6124 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -213,10 +213,12 @@ struct held_lock {
 	 * interrupt context:
 	 */
 	unsigned int irq_context:2; /* bit 0 - soft, bit 1 - hard */
-	unsigned int trylock:1;
+	unsigned int trylock:1;						/* 16 bits */
+
 	unsigned int read:2;        /* see lock_acquire() comment */
 	unsigned int check:2;       /* see lock_acquire() comment */
 	unsigned int hardirqs_off:1;
+	unsigned int references:11;					/* 32 bits */
 };
 
 /*
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 28914a509914..0bb246e21cd7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2708,13 +2708,15 @@ EXPORT_SYMBOL_GPL(lockdep_init_map);
  */
 static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 			  int trylock, int read, int check, int hardirqs_off,
-			  struct lockdep_map *nest_lock, unsigned long ip)
+			  struct lockdep_map *nest_lock, unsigned long ip,
+			  int references)
 {
 	struct task_struct *curr = current;
 	struct lock_class *class = NULL;
 	struct held_lock *hlock;
 	unsigned int depth, id;
 	int chain_head = 0;
+	int class_idx;
 	u64 chain_key;
 
 	if (!prove_locking)
@@ -2762,10 +2764,24 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	if (DEBUG_LOCKS_WARN_ON(depth >= MAX_LOCK_DEPTH))
 		return 0;
 
+	class_idx = class - lock_classes + 1;
+
+	if (depth) {
+		hlock = curr->held_locks + depth - 1;
+		if (hlock->class_idx == class_idx && nest_lock) {
+			if (hlock->references)
+				hlock->references++;
+			else
+				hlock->references = 2;
+
+			return 1;
+		}
+	}
+
 	hlock = curr->held_locks + depth;
 	if (DEBUG_LOCKS_WARN_ON(!class))
 		return 0;
-	hlock->class_idx = class - lock_classes + 1;
+	hlock->class_idx = class_idx;
 	hlock->acquire_ip = ip;
 	hlock->instance = lock;
 	hlock->nest_lock = nest_lock;
@@ -2773,6 +2789,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	hlock->read = read;
 	hlock->check = check;
 	hlock->hardirqs_off = !!hardirqs_off;
+	hlock->references = references;
 #ifdef CONFIG_LOCK_STAT
 	hlock->waittime_stamp = 0;
 	hlock->holdtime_stamp = sched_clock();
@@ -2881,6 +2898,30 @@ static int check_unlock(struct task_struct *curr, struct lockdep_map *lock,
 	return 1;
 }
 
+static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock)
+{
+	if (hlock->instance == lock)
+		return 1;
+
+	if (hlock->references) {
+		struct lock_class *class = lock->class_cache;
+
+		if (!class)
+			class = look_up_lock_class(lock, 0);
+
+		if (DEBUG_LOCKS_WARN_ON(!class))
+			return 0;
+
+		if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock))
+			return 0;
+
+		if (hlock->class_idx == class - lock_classes + 1)
+			return 1;
+	}
+
+	return 0;
+}
+
 static int
 __lock_set_class(struct lockdep_map *lock, const char *name,
 		 struct lock_class_key *key, unsigned int subclass,
@@ -2904,7 +2945,7 @@ __lock_set_class(struct lockdep_map *lock, const char *name,
 		 */
 		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
 			break;
-		if (hlock->instance == lock)
+		if (match_held_lock(hlock, lock))
 			goto found_it;
 		prev_hlock = hlock;
 	}
@@ -2923,7 +2964,8 @@ found_it:
 		if (!__lock_acquire(hlock->instance,
 			hlock_class(hlock)->subclass, hlock->trylock,
 				hlock->read, hlock->check, hlock->hardirqs_off,
-				hlock->nest_lock, hlock->acquire_ip))
+				hlock->nest_lock, hlock->acquire_ip,
+				hlock->references))
 			return 0;
 	}
 
@@ -2962,20 +3004,34 @@ lock_release_non_nested(struct task_struct *curr,
 		 */
 		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
 			break;
-		if (hlock->instance == lock)
+		if (match_held_lock(hlock, lock))
 			goto found_it;
 		prev_hlock = hlock;
 	}
 	return print_unlock_inbalance_bug(curr, lock, ip);
 
 found_it:
-	lock_release_holdtime(hlock);
+	if (hlock->instance == lock)
+		lock_release_holdtime(hlock);
+
+	if (hlock->references) {
+		hlock->references--;
+		if (hlock->references) {
+			/*
+			 * We had, and after removing one, still have
+			 * references, the current lock stack is still
+			 * valid. We're done!
+			 */
+			return 1;
+		}
+	}
 
 	/*
 	 * We have the right lock to unlock, 'hlock' points to it.
 	 * Now we remove it from the stack, and add back the other
 	 * entries (if any), recalculating the hash along the way:
 	 */
+
 	curr->lockdep_depth = i;
 	curr->curr_chain_key = hlock->prev_chain_key;
 
@@ -2984,7 +3040,8 @@ found_it:
 		if (!__lock_acquire(hlock->instance,
 			hlock_class(hlock)->subclass, hlock->trylock,
 				hlock->read, hlock->check, hlock->hardirqs_off,
-				hlock->nest_lock, hlock->acquire_ip))
+				hlock->nest_lock, hlock->acquire_ip,
+				hlock->references))
 			return 0;
 	}
 
@@ -3014,7 +3071,7 @@ static int lock_release_nested(struct task_struct *curr,
 	/*
 	 * Is the unlock non-nested:
 	 */
-	if (hlock->instance != lock)
+	if (hlock->instance != lock || hlock->references)
 		return lock_release_non_nested(curr, lock, ip);
 	curr->lockdep_depth--;
 
@@ -3065,7 +3122,9 @@ static int __lock_is_held(struct lockdep_map *lock)
 	int i;
 
 	for (i = 0; i < curr->lockdep_depth; i++) {
-		if (curr->held_locks[i].instance == lock)
+		struct held_lock *hlock = curr->held_locks + i;
+
+		if (match_held_lock(hlock, lock))
 			return 1;
 	}
 
@@ -3148,7 +3207,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 
 	current->lockdep_recursion = 1;
 	__lock_acquire(lock, subclass, trylock, read, check,
-		       irqs_disabled_flags(flags), nest_lock, ip);
+		       irqs_disabled_flags(flags), nest_lock, ip, 0);
 	current->lockdep_recursion = 0;
 	raw_local_irq_restore(flags);
 }
@@ -3252,7 +3311,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 		 */
 		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
 			break;
-		if (hlock->instance == lock)
+		if (match_held_lock(hlock, lock))
 			goto found_it;
 		prev_hlock = hlock;
 	}
@@ -3260,6 +3319,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
 	return;
 
 found_it:
+	if (hlock->instance != lock)
+		return;
+
 	hlock->waittime_stamp = sched_clock();
 
 	contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
@@ -3299,7 +3361,7 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 		 */
 		if (prev_hlock && prev_hlock->irq_context != hlock->irq_context)
 			break;
-		if (hlock->instance == lock)
+		if (match_held_lock(hlock, lock))
 			goto found_it;
 		prev_hlock = hlock;
 	}
@@ -3307,6 +3369,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip)
 	return;
 
 found_it:
+	if (hlock->instance != lock)
+		return;
+
 	cpu = smp_processor_id();
 	if (hlock->waittime_stamp) {
 		now = sched_clock();
-- 
cgit v1.2.3


From e351b660fddd4df76cc4635f896d311ed0ff3752 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 22 Jul 2009 22:48:09 +0800
Subject: lockdep: Reintroduce generation count to make BFS faster

We still can apply DaveM's generation count optimization to
BFS, based on the following idea:

 - before doing each BFS, increase the global generation id
   by 1

 - if one node in the graph has been visited, mark it as
   visited by storing the current global generation id into
   the node's dep_gen_id field

 - so we can decide if one node has been visited already, by
   comparing the node's dep_gen_id with the global generation id.

By applying DaveM's generation count optimization to current
implementation of BFS, we gain the following advantages:

 - we save MAX_LOCKDEP_ENTRIES/8 bytes memory;

 - we remove the bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES);
   in each BFS, which is very time-consuming since
   MAX_LOCKDEP_ENTRIES may be very large.(16384UL)

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "David S. Miller" <davem@davemloft.net>
LKML-Reference: <1248274089-6358-1-git-send-email-tom.leiming@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/lockdep.h |  1 +
 kernel/lockdep.c        | 11 ++++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 47d42eff6124..9ccf0e286b2a 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -58,6 +58,7 @@ struct lock_class {
 
 	struct lockdep_subclass_key	*key;
 	unsigned int			subclass;
+	unsigned int			dep_gen_id;
 
 	/*
 	 * IRQ/softirq usage tracking bits:
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0bb246e21cd7..2b443b5090a1 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -861,14 +861,15 @@ struct circular_queue {
 };
 
 static struct circular_queue lock_cq;
-static unsigned long bfs_accessed[BITS_TO_LONGS(MAX_LOCKDEP_ENTRIES)];
 
 unsigned int max_bfs_queue_depth;
 
+static unsigned int lockdep_dependency_gen_id;
+
 static inline void __cq_init(struct circular_queue *cq)
 {
 	cq->front = cq->rear = 0;
-	bitmap_zero(bfs_accessed, MAX_LOCKDEP_ENTRIES);
+	lockdep_dependency_gen_id++;
 }
 
 static inline int __cq_empty(struct circular_queue *cq)
@@ -914,7 +915,7 @@ static inline void mark_lock_accessed(struct lock_list *lock,
 	nr = lock - list_entries;
 	WARN_ON(nr >= nr_list_entries);
 	lock->parent = parent;
-	set_bit(nr, bfs_accessed);
+	lock->class->dep_gen_id = lockdep_dependency_gen_id;
 }
 
 static inline unsigned long lock_accessed(struct lock_list *lock)
@@ -923,7 +924,7 @@ static inline unsigned long lock_accessed(struct lock_list *lock)
 
 	nr = lock - list_entries;
 	WARN_ON(nr >= nr_list_entries);
-	return test_bit(nr, bfs_accessed);
+	return lock->class->dep_gen_id == lockdep_dependency_gen_id;
 }
 
 static inline struct lock_list *get_lock_parent(struct lock_list *child)
@@ -3604,7 +3605,7 @@ void __init lockdep_info(void)
 		sizeof(struct lock_chain) * MAX_LOCKDEP_CHAINS +
 		sizeof(struct list_head) * CHAINHASH_SIZE) / 1024
 #ifdef CONFIG_PROVE_LOCKING
-		+ sizeof(struct circular_queue) + sizeof(bfs_accessed)
+		+ sizeof(struct circular_queue)
 #endif
 		);
 
-- 
cgit v1.2.3


From 1e3e238e9c4bf9987b19185235cd0cdc21ea038c Mon Sep 17 00:00:00 2001
From: Hannes Eder <heder@google.com>
Date: Sun, 2 Aug 2009 11:05:41 +0000
Subject: IPVS: use pr_err and friends instead of IP_VS_ERR and friends

Since pr_err and friends are used instead of printk there is no point
in keeping IP_VS_ERR and friends.  Furthermore make use of '__func__'
instead of hard coded function names.

Signed-off-by: Hannes Eder <heder@google.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h                  |  7 ++--
 net/netfilter/ipvs/ip_vs_app.c       | 16 +++++-----
 net/netfilter/ipvs/ip_vs_conn.c      | 14 ++++----
 net/netfilter/ipvs/ip_vs_core.c      | 24 +++++++-------
 net/netfilter/ipvs/ip_vs_ctl.c       | 62 +++++++++++++++++-------------------
 net/netfilter/ipvs/ip_vs_dh.c        |  4 +--
 net/netfilter/ipvs/ip_vs_ftp.c       |  4 +--
 net/netfilter/ipvs/ip_vs_lblc.c      |  6 ++--
 net/netfilter/ipvs/ip_vs_lblcr.c     | 14 ++++----
 net/netfilter/ipvs/ip_vs_lc.c        |  2 +-
 net/netfilter/ipvs/ip_vs_nq.c        |  2 +-
 net/netfilter/ipvs/ip_vs_proto.c     |  2 +-
 net/netfilter/ipvs/ip_vs_proto_tcp.c |  2 +-
 net/netfilter/ipvs/ip_vs_proto_udp.c |  2 +-
 net/netfilter/ipvs/ip_vs_rr.c        |  2 +-
 net/netfilter/ipvs/ip_vs_sched.c     | 38 +++++++++++-----------
 net/netfilter/ipvs/ip_vs_sed.c       |  2 +-
 net/netfilter/ipvs/ip_vs_sh.c        |  2 +-
 net/netfilter/ipvs/ip_vs_sync.c      | 50 ++++++++++++++---------------
 net/netfilter/ipvs/ip_vs_wrr.c       |  4 +--
 net/netfilter/ipvs/ip_vs_xmit.c      | 40 +++++++++++------------
 21 files changed, 147 insertions(+), 152 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 910820327bc4..1c8ee1b13651 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -150,13 +150,10 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len,
 #endif
 
 #define IP_VS_BUG() BUG()
-#define IP_VS_ERR(msg...) pr_err(msg)
-#define IP_VS_INFO(msg...) pr_info(msg)
-#define IP_VS_WARNING(msg...) pr_warning(msg)
-#define IP_VS_ERR_RL(msg...)						\
+#define IP_VS_ERR_RL(msg, ...)						\
 	do {								\
 		if (net_ratelimit())					\
-			pr_err(msg);					\
+			pr_err(msg, ##__VA_ARGS__);			\
 	} while (0)
 
 #ifdef CONFIG_IP_VS_DEBUG
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index c1781f80daf2..3c7e42735b60 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -265,12 +265,12 @@ static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 	if (vseq->delta || vseq->previous_delta) {
 		if(after(seq, vseq->init_seq)) {
 			th->seq = htonl(seq + vseq->delta);
-			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
-				  vseq->delta);
+			IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
+				  __func__, vseq->delta);
 		} else {
 			th->seq = htonl(seq + vseq->previous_delta);
-			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
-				  "(%d) to seq\n", vseq->previous_delta);
+			IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
+				  __func__, vseq->previous_delta);
 		}
 	}
 }
@@ -294,14 +294,14 @@ vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
 		   to receive next, so compare it with init_seq+delta */
 		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
 			th->ack_seq = htonl(ack_seq - vseq->delta);
-			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
-				  "(%d) from ack_seq\n", vseq->delta);
+			IP_VS_DBG(9, "%s(): subtracted delta "
+				  "(%d) from ack_seq\n", __func__, vseq->delta);
 
 		} else {
 			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
-			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
+			IP_VS_DBG(9, "%s(): subtracted "
 				  "previous_delta (%d) from ack_seq\n",
-				  vseq->previous_delta);
+				  __func__, vseq->previous_delta);
 		}
 	}
 }
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 4173d7b1d4cc..27c30cf933da 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -153,8 +153,8 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
 		atomic_inc(&cp->refcnt);
 		ret = 1;
 	} else {
-		IP_VS_ERR("ip_vs_conn_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
+		pr_err("%s(): request for already hashed, called from %pF\n",
+		       __func__, __builtin_return_address(0));
 		ret = 0;
 	}
 
@@ -692,7 +692,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
 
 	cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
 	if (cp == NULL) {
-		IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
+		IP_VS_ERR_RL("%s(): no memory\n", __func__);
 		return NULL;
 	}
 
@@ -1076,10 +1076,10 @@ int __init ip_vs_conn_init(void)
 		return -ENOMEM;
 	}
 
-	IP_VS_INFO("Connection hash table configured "
-		   "(size=%d, memory=%ldKbytes)\n",
-		   IP_VS_CONN_TAB_SIZE,
-		   (long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
+	pr_info("Connection hash table configured "
+		"(size=%d, memory=%ldKbytes)\n",
+		IP_VS_CONN_TAB_SIZE,
+		(long)(IP_VS_CONN_TAB_SIZE*sizeof(struct list_head))/1024);
 	IP_VS_DBG(0, "Each connection entry needs %Zd bytes at least\n",
 		  sizeof(struct ip_vs_conn));
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 6811dcaca0f6..b227750af752 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -391,9 +391,9 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	 */
 	if (!svc->fwmark && pptr[1] != svc->port) {
 		if (!svc->port)
-			IP_VS_ERR("Schedule: port zero only supported "
-				  "in persistent services, "
-				  "check your ipvs configuration\n");
+			pr_err("Schedule: port zero only supported "
+			       "in persistent services, "
+			       "check your ipvs configuration\n");
 		return NULL;
 	}
 
@@ -465,7 +465,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
 		ip_vs_service_put(svc);
 
 		/* create a new connection entry */
-		IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n");
+		IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
 		cp = ip_vs_conn_new(svc->af, iph.protocol,
 				    &iph.saddr, pptr[0],
 				    &iph.daddr, pptr[1],
@@ -667,8 +667,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
 	unsigned int verdict = NF_DROP;
 
 	if (IP_VS_FWD_METHOD(cp) != 0) {
-		IP_VS_ERR("shouldn't reach here, because the box is on the "
-			  "half connection in the tun/dr module.\n");
+		pr_err("shouldn't reach here, because the box is on the "
+		       "half connection in the tun/dr module.\n");
 	}
 
 	/* Ensure the checksum is correct */
@@ -1490,7 +1490,7 @@ static int __init ip_vs_init(void)
 
 	ret = ip_vs_control_init();
 	if (ret < 0) {
-		IP_VS_ERR("can't setup control.\n");
+		pr_err("can't setup control.\n");
 		goto cleanup_estimator;
 	}
 
@@ -1498,23 +1498,23 @@ static int __init ip_vs_init(void)
 
 	ret = ip_vs_app_init();
 	if (ret < 0) {
-		IP_VS_ERR("can't setup application helper.\n");
+		pr_err("can't setup application helper.\n");
 		goto cleanup_protocol;
 	}
 
 	ret = ip_vs_conn_init();
 	if (ret < 0) {
-		IP_VS_ERR("can't setup connection table.\n");
+		pr_err("can't setup connection table.\n");
 		goto cleanup_app;
 	}
 
 	ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
 	if (ret < 0) {
-		IP_VS_ERR("can't register hooks.\n");
+		pr_err("can't register hooks.\n");
 		goto cleanup_conn;
 	}
 
-	IP_VS_INFO("ipvs loaded.\n");
+	pr_info("ipvs loaded.\n");
 	return ret;
 
   cleanup_conn:
@@ -1537,7 +1537,7 @@ static void __exit ip_vs_cleanup(void)
 	ip_vs_protocol_cleanup();
 	ip_vs_control_cleanup();
 	ip_vs_estimator_cleanup();
-	IP_VS_INFO("ipvs unloaded.\n");
+	pr_info("ipvs unloaded.\n");
 }
 
 module_init(ip_vs_init);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e6133ea1ea4c..fba2892b99e1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -343,8 +343,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 	unsigned hash;
 
 	if (svc->flags & IP_VS_SVC_F_HASHED) {
-		IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
-			  "called from %p\n", __builtin_return_address(0));
+		pr_err("%s(): request for already hashed, called from %pF\n",
+		       __func__, __builtin_return_address(0));
 		return 0;
 	}
 
@@ -377,8 +377,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
 {
 	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
-		IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
-			  "called from %p\n", __builtin_return_address(0));
+		pr_err("%s(): request for unhash flagged, called from %pF\n",
+		       __func__, __builtin_return_address(0));
 		return 0;
 	}
 
@@ -844,7 +844,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
 
 	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
 	if (dest == NULL) {
-		IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
+		pr_err("%s(): no memory.\n", __func__);
 		return -ENOMEM;
 	}
 
@@ -888,13 +888,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	EnterFunction(2);
 
 	if (udest->weight < 0) {
-		IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
+		pr_err("%s(): server weight less than zero\n", __func__);
 		return -ERANGE;
 	}
 
 	if (udest->l_threshold > udest->u_threshold) {
-		IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
-			  "upper threshold\n");
+		pr_err("%s(): lower threshold is higher than upper threshold\n",
+			__func__);
 		return -ERANGE;
 	}
 
@@ -906,7 +906,7 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
 
 	if (dest != NULL) {
-		IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
+		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
 		return -EEXIST;
 	}
 
@@ -1000,13 +1000,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	EnterFunction(2);
 
 	if (udest->weight < 0) {
-		IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
+		pr_err("%s(): server weight less than zero\n", __func__);
 		return -ERANGE;
 	}
 
 	if (udest->l_threshold > udest->u_threshold) {
-		IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
-			  "upper threshold\n");
+		pr_err("%s(): lower threshold is higher than upper threshold\n",
+			__func__);
 		return -ERANGE;
 	}
 
@@ -1018,7 +1018,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	dest = ip_vs_lookup_dest(svc, &daddr, dport);
 
 	if (dest == NULL) {
-		IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
+		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
 		return -ENOENT;
 	}
 
@@ -1118,7 +1118,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
 	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
 
 	if (dest == NULL) {
-		IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
+		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
 		return -ENOENT;
 	}
 
@@ -1164,8 +1164,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 	/* Lookup the scheduler by 'u->sched_name' */
 	sched = ip_vs_scheduler_get(u->sched_name);
 	if (sched == NULL) {
-		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-			   u->sched_name);
+		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
 		ret = -ENOENT;
 		goto out_mod_dec;
 	}
@@ -1179,7 +1178,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
 
 	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
 	if (svc == NULL) {
-		IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
+		IP_VS_DBG(1, "%s(): no memory\n", __func__);
 		ret = -ENOMEM;
 		goto out_err;
 	}
@@ -1262,8 +1261,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
 	 */
 	sched = ip_vs_scheduler_get(u->sched_name);
 	if (sched == NULL) {
-		IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
-			   u->sched_name);
+		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
 		return -ENOENT;
 	}
 	old_sched = sched;
@@ -2080,8 +2078,8 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		return -EPERM;
 
 	if (len != set_arglen[SET_CMDID(cmd)]) {
-		IP_VS_ERR("set_ctl: len %u != %u\n",
-			  len, set_arglen[SET_CMDID(cmd)]);
+		pr_err("set_ctl: len %u != %u\n",
+		       len, set_arglen[SET_CMDID(cmd)]);
 		return -EINVAL;
 	}
 
@@ -2132,9 +2130,9 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 
 	/* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
 	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
-		IP_VS_ERR("set_ctl: invalid protocol: %d %pI4:%d %s\n",
-			  usvc.protocol, &usvc.addr.ip,
-			  ntohs(usvc.port), usvc.sched_name);
+		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+		       usvc.protocol, &usvc.addr.ip,
+		       ntohs(usvc.port), usvc.sched_name);
 		ret = -EFAULT;
 		goto out_unlock;
 	}
@@ -2359,8 +2357,8 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		return -EPERM;
 
 	if (*len < get_arglen[GET_CMDID(cmd)]) {
-		IP_VS_ERR("get_ctl: len %u < %u\n",
-			  *len, get_arglen[GET_CMDID(cmd)]);
+		pr_err("get_ctl: len %u < %u\n",
+		       *len, get_arglen[GET_CMDID(cmd)]);
 		return -EINVAL;
 	}
 
@@ -2405,7 +2403,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		size = sizeof(*get) +
 			sizeof(struct ip_vs_service_entry) * get->num_services;
 		if (*len != size) {
-			IP_VS_ERR("length: %u != %u\n", *len, size);
+			pr_err("length: %u != %u\n", *len, size);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -2445,7 +2443,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 		size = sizeof(*get) +
 			sizeof(struct ip_vs_dest_entry) * get->num_dests;
 		if (*len != size) {
-			IP_VS_ERR("length: %u != %u\n", *len, size);
+			pr_err("length: %u != %u\n", *len, size);
 			ret = -EINVAL;
 			goto out;
 		}
@@ -3173,7 +3171,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	else if (cmd == IPVS_CMD_GET_CONFIG)
 		reply_cmd = IPVS_CMD_SET_CONFIG;
 	else {
-		IP_VS_ERR("unknown Generic Netlink command\n");
+		pr_err("unknown Generic Netlink command\n");
 		return -EINVAL;
 	}
 
@@ -3238,7 +3236,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
 	goto out;
 
 nla_put_failure:
-	IP_VS_ERR("not enough space in Netlink message\n");
+	pr_err("not enough space in Netlink message\n");
 	ret = -EMSGSIZE;
 
 out_err:
@@ -3369,13 +3367,13 @@ int __init ip_vs_control_init(void)
 
 	ret = nf_register_sockopt(&ip_vs_sockopts);
 	if (ret) {
-		IP_VS_ERR("cannot register sockopt.\n");
+		pr_err("cannot register sockopt.\n");
 		return ret;
 	}
 
 	ret = ip_vs_genl_register();
 	if (ret) {
-		IP_VS_ERR("cannot register Generic Netlink interface.\n");
+		pr_err("cannot register Generic Netlink interface.\n");
 		nf_unregister_sockopt(&ip_vs_sockopts);
 		return ret;
 	}
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index d0c0594d1e2e..fe3e18834b91 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -150,7 +150,7 @@ static int ip_vs_dh_init_svc(struct ip_vs_service *svc)
 	tbl = kmalloc(sizeof(struct ip_vs_dh_bucket)*IP_VS_DH_TAB_SIZE,
 		      GFP_ATOMIC);
 	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_dh_init_svc(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
@@ -217,7 +217,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
-	IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	tbl = (struct ip_vs_dh_bucket *)svc->sched_data;
 	dest = ip_vs_dh_get(svc->af, tbl, &iph.daddr);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 9c16a3f64c1b..33e2c799cba7 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -385,8 +385,8 @@ static int __init ip_vs_ftp_init(void)
 		ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
 		if (ret)
 			break;
-		IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
-			   app->name, i, ports[i]);
+		pr_info("%s: loaded support on port[%d] = %d\n",
+			app->name, i, ports[i]);
 	}
 
 	if (ret)
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 98fb185d890b..c1757f3620cd 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -202,7 +202,7 @@ ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, const union nf_inet_addr *daddr,
 	if (!en) {
 		en = kmalloc(sizeof(*en), GFP_ATOMIC);
 		if (!en) {
-			IP_VS_ERR("ip_vs_lblc_new(): no memory\n");
+			pr_err("%s(): no memory\n", __func__);
 			return NULL;
 		}
 
@@ -335,7 +335,7 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc)
 	 */
 	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
@@ -480,7 +480,7 @@ ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
-	IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
 	read_lock(&svc->sched_lock);
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 5f5e5f4bad5e..715b57f9540d 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -111,7 +111,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
 
 	e = kmalloc(sizeof(*e), GFP_ATOMIC);
 	if (e == NULL) {
-		IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return NULL;
 	}
 
@@ -205,8 +205,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
 		}
 	}
 
-	IP_VS_DBG_BUF(6, "ip_vs_dest_set_min: server %s:%d "
+	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
 		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      __func__,
 		      IP_VS_DBG_ADDR(least->af, &least->addr),
 		      ntohs(least->port),
 		      atomic_read(&least->activeconns),
@@ -252,8 +253,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
 		}
 	}
 
-	IP_VS_DBG_BUF(6, "ip_vs_dest_set_max: server %s:%d "
+	IP_VS_DBG_BUF(6, "%s(): server %s:%d "
 		      "activeconns %d refcnt %d weight %d overhead %d\n",
+		      __func__,
 		      IP_VS_DBG_ADDR(most->af, &most->addr), ntohs(most->port),
 		      atomic_read(&most->activeconns),
 		      atomic_read(&most->refcnt),
@@ -377,7 +379,7 @@ ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, const union nf_inet_addr *daddr,
 	if (!en) {
 		en = kmalloc(sizeof(*en), GFP_ATOMIC);
 		if (!en) {
-			IP_VS_ERR("ip_vs_lblcr_new(): no memory\n");
+			pr_err("%s(): no memory\n", __func__);
 			return NULL;
 		}
 
@@ -511,7 +513,7 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
 	 */
 	tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC);
 	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
@@ -657,7 +659,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 
 	ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
 
-	IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/* First look in our cache */
 	read_lock(&svc->sched_lock);
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 4ecd5e19c39a..4f69db1fac56 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -47,7 +47,7 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest, *least = NULL;
 	unsigned int loh = 0, doh;
 
-	IP_VS_DBG(6, "ip_vs_lc_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/*
 	 * Simply select the server with the least number of
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index 2224478bdea8..c413e1830823 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -60,7 +60,7 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest, *least = NULL;
 	unsigned int loh = 0, doh;
 
-	IP_VS_DBG(6, "ip_vs_nq_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/*
 	 * We calculate the load of each dest server as follows:
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index a95bc4021c90..85c8892e1e8b 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -262,7 +262,7 @@ int __init ip_vs_protocol_init(void)
 #ifdef CONFIG_IP_VS_PROTO_ESP
 	REGISTER_PROTOCOL(&ip_vs_protocol_esp);
 #endif
-	IP_VS_INFO("Registered protocols (%s)\n", &protocols[2]);
+	pr_info("Registered protocols (%s)\n", &protocols[2]);
 
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index c36c80d3a2b4..2278e141489e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -664,7 +664,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
 				break;
 			spin_unlock(&tcp_app_lock);
 
-			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
 				      __func__,
 				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 96ebe40bc537..33a05d3684d9 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -445,7 +445,7 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
 				break;
 			spin_unlock(&udp_app_lock);
 
-			IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
+			IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
 				      "%s:%u to app %s on port %u\n",
 				      __func__,
 				      IP_VS_DBG_ADDR(cp->af, &cp->caddr),
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index b01007e1c11e..e210f37d8ea2 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -51,7 +51,7 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct list_head *p, *q;
 	struct ip_vs_dest *dest;
 
-	IP_VS_DBG(6, "ip_vs_rr_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	write_lock(&svc->sched_lock);
 	p = (struct list_head *)svc->sched_data;
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 87bc5ea0ef29..bbc1ac795952 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -47,11 +47,11 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 	int ret;
 
 	if (svc == NULL) {
-		IP_VS_ERR("ip_vs_bind_scheduler(): svc arg NULL\n");
+		pr_err("%s(): svc arg NULL\n", __func__);
 		return -EINVAL;
 	}
 	if (scheduler == NULL) {
-		IP_VS_ERR("ip_vs_bind_scheduler(): scheduler arg NULL\n");
+		pr_err("%s(): scheduler arg NULL\n", __func__);
 		return -EINVAL;
 	}
 
@@ -60,7 +60,7 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
 	if (scheduler->init_service) {
 		ret = scheduler->init_service(svc);
 		if (ret) {
-			IP_VS_ERR("ip_vs_bind_scheduler(): init error\n");
+			pr_err("%s(): init error\n", __func__);
 			return ret;
 		}
 	}
@@ -77,19 +77,19 @@ int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
 	struct ip_vs_scheduler *sched;
 
 	if (svc == NULL) {
-		IP_VS_ERR("ip_vs_unbind_scheduler(): svc arg NULL\n");
+		pr_err("%s(): svc arg NULL\n", __func__);
 		return -EINVAL;
 	}
 
 	sched = svc->scheduler;
 	if (sched == NULL) {
-		IP_VS_ERR("ip_vs_unbind_scheduler(): svc isn't bound\n");
+		pr_err("%s(): svc isn't bound\n", __func__);
 		return -EINVAL;
 	}
 
 	if (sched->done_service) {
 		if (sched->done_service(svc) != 0) {
-			IP_VS_ERR("ip_vs_unbind_scheduler(): done error\n");
+			pr_err("%s(): done error\n", __func__);
 			return -EINVAL;
 		}
 	}
@@ -106,8 +106,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
 {
 	struct ip_vs_scheduler *sched;
 
-	IP_VS_DBG(2, "ip_vs_sched_getbyname(): sched_name \"%s\"\n",
-		  sched_name);
+	IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
 
 	read_lock_bh(&__ip_vs_sched_lock);
 
@@ -173,12 +172,12 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	struct ip_vs_scheduler *sched;
 
 	if (!scheduler) {
-		IP_VS_ERR("register_ip_vs_scheduler(): NULL arg\n");
+		pr_err("%s(): NULL arg\n", __func__);
 		return -EINVAL;
 	}
 
 	if (!scheduler->name) {
-		IP_VS_ERR("register_ip_vs_scheduler(): NULL scheduler_name\n");
+		pr_err("%s(): NULL scheduler_name\n", __func__);
 		return -EINVAL;
 	}
 
@@ -190,8 +189,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	if (!list_empty(&scheduler->n_list)) {
 		write_unlock_bh(&__ip_vs_sched_lock);
 		ip_vs_use_count_dec();
-		IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-			  "already linked\n", scheduler->name);
+		pr_err("%s(): [%s] scheduler already linked\n",
+		       __func__, scheduler->name);
 		return -EINVAL;
 	}
 
@@ -203,9 +202,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 		if (strcmp(scheduler->name, sched->name) == 0) {
 			write_unlock_bh(&__ip_vs_sched_lock);
 			ip_vs_use_count_dec();
-			IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
-					"already existed in the system\n",
-					scheduler->name);
+			pr_err("%s(): [%s] scheduler already existed "
+			       "in the system\n", __func__, scheduler->name);
 			return -EINVAL;
 		}
 	}
@@ -215,7 +213,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	list_add(&scheduler->n_list, &ip_vs_schedulers);
 	write_unlock_bh(&__ip_vs_sched_lock);
 
-	IP_VS_INFO("[%s] scheduler registered.\n", scheduler->name);
+	pr_info("[%s] scheduler registered.\n", scheduler->name);
 
 	return 0;
 }
@@ -227,15 +225,15 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 {
 	if (!scheduler) {
-		IP_VS_ERR( "unregister_ip_vs_scheduler(): NULL arg\n");
+		pr_err("%s(): NULL arg\n", __func__);
 		return -EINVAL;
 	}
 
 	write_lock_bh(&__ip_vs_sched_lock);
 	if (list_empty(&scheduler->n_list)) {
 		write_unlock_bh(&__ip_vs_sched_lock);
-		IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
-			  "is not in the list. failed\n", scheduler->name);
+		pr_err("%s(): [%s] scheduler is not in the list. failed\n",
+		       __func__, scheduler->name);
 		return -EINVAL;
 	}
 
@@ -248,7 +246,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
 	/* decrease the module use count */
 	ip_vs_use_count_dec();
 
-	IP_VS_INFO("[%s] scheduler unregistered.\n", scheduler->name);
+	pr_info("[%s] scheduler unregistered.\n", scheduler->name);
 
 	return 0;
 }
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 4f745dd86dd8..1ab75a9dc400 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -64,7 +64,7 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_dest *dest, *least;
 	unsigned int loh, doh;
 
-	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/*
 	 * We calculate the load of each dest server as follows:
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index fb4d2d23f2fe..8e6cfd36e6f0 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -147,7 +147,7 @@ static int ip_vs_sh_init_svc(struct ip_vs_service *svc)
 	tbl = kmalloc(sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE,
 		      GFP_ATOMIC);
 	if (tbl == NULL) {
-		IP_VS_ERR("ip_vs_sh_init_svc(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return -ENOMEM;
 	}
 	svc->sched_data = tbl;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index cc04c99815fd..e177f0dc2084 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -246,7 +246,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
 	if (!curr_sb) {
 		if (!(curr_sb=ip_vs_sync_buff_create())) {
 			spin_unlock(&curr_sb_lock);
-			IP_VS_ERR("ip_vs_sync_buff_create failed.\n");
+			pr_err("ip_vs_sync_buff_create failed.\n");
 			return;
 		}
 	}
@@ -412,7 +412,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
 			if (dest)
 				atomic_dec(&dest->refcnt);
 			if (!cp) {
-				IP_VS_ERR("ip_vs_conn_new failed\n");
+				pr_err("ip_vs_conn_new failed\n");
 				return;
 			}
 		} else if (!cp->dest) {
@@ -580,8 +580,8 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
 
 	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
 	if (!addr)
-		IP_VS_ERR("You probably need to specify IP address on "
-			  "multicast interface.\n");
+		pr_err("You probably need to specify IP address on "
+		       "multicast interface.\n");
 
 	IP_VS_DBG(7, "binding socket with (%s) %pI4\n",
 		  ifname, &addr);
@@ -605,13 +605,13 @@ static struct socket * make_send_sock(void)
 	/* First create a socket */
 	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
-		IP_VS_ERR("Error during creation of socket; terminating\n");
+		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
 
 	result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
 	if (result < 0) {
-		IP_VS_ERR("Error setting outbound mcast interface\n");
+		pr_err("Error setting outbound mcast interface\n");
 		goto error;
 	}
 
@@ -620,14 +620,14 @@ static struct socket * make_send_sock(void)
 
 	result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
 	if (result < 0) {
-		IP_VS_ERR("Error binding address of the mcast interface\n");
+		pr_err("Error binding address of the mcast interface\n");
 		goto error;
 	}
 
 	result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
 			sizeof(struct sockaddr), 0);
 	if (result < 0) {
-		IP_VS_ERR("Error connecting to the multicast addr\n");
+		pr_err("Error connecting to the multicast addr\n");
 		goto error;
 	}
 
@@ -650,7 +650,7 @@ static struct socket * make_receive_sock(void)
 	/* First create a socket */
 	result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
 	if (result < 0) {
-		IP_VS_ERR("Error during creation of socket; terminating\n");
+		pr_err("Error during creation of socket; terminating\n");
 		return ERR_PTR(result);
 	}
 
@@ -660,7 +660,7 @@ static struct socket * make_receive_sock(void)
 	result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
 			sizeof(struct sockaddr));
 	if (result < 0) {
-		IP_VS_ERR("Error binding to the multicast addr\n");
+		pr_err("Error binding to the multicast addr\n");
 		goto error;
 	}
 
@@ -669,7 +669,7 @@ static struct socket * make_receive_sock(void)
 			(struct in_addr *) &mcast_addr.sin_addr,
 			ip_vs_backup_mcast_ifn);
 	if (result < 0) {
-		IP_VS_ERR("Error joining to the multicast group\n");
+		pr_err("Error joining to the multicast group\n");
 		goto error;
 	}
 
@@ -709,7 +709,7 @@ ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg)
 	msg->size = htons(msg->size);
 
 	if (ip_vs_send_async(sock, (char *)msg, msize) != msize)
-		IP_VS_ERR("ip_vs_send_async error\n");
+		pr_err("ip_vs_send_async error\n");
 }
 
 static int
@@ -740,9 +740,9 @@ static int sync_thread_master(void *data)
 	struct ip_vs_sync_thread_data *tinfo = data;
 	struct ip_vs_sync_buff *sb;
 
-	IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
-		   "syncid = %d\n",
-		   ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+	pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
+		"syncid = %d\n",
+		ip_vs_master_mcast_ifn, ip_vs_master_syncid);
 
 	while (!kthread_should_stop()) {
 		while ((sb = sb_dequeue())) {
@@ -783,9 +783,9 @@ static int sync_thread_backup(void *data)
 	struct ip_vs_sync_thread_data *tinfo = data;
 	int len;
 
-	IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
-		   "syncid = %d\n",
-		   ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+	pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
+		"syncid = %d\n",
+		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
 
 	while (!kthread_should_stop()) {
 		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
@@ -797,7 +797,7 @@ static int sync_thread_backup(void *data)
 			len = ip_vs_receive(tinfo->sock, tinfo->buf,
 					sync_recv_mesg_maxlen);
 			if (len <= 0) {
-				IP_VS_ERR("receiving message error\n");
+				pr_err("receiving message error\n");
 				break;
 			}
 
@@ -827,7 +827,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
 	int (*threadfn)(void *data);
 	int result = -ENOMEM;
 
-	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 	IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
 		  sizeof(struct ip_vs_sync_conn));
 
@@ -904,14 +904,14 @@ out:
 
 int stop_sync_thread(int state)
 {
-	IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
+	IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
 
 	if (state == IP_VS_STATE_MASTER) {
 		if (!sync_master_thread)
 			return -ESRCH;
 
-		IP_VS_INFO("stopping master sync thread %d ...\n",
-			   task_pid_nr(sync_master_thread));
+		pr_info("stopping master sync thread %d ...\n",
+			task_pid_nr(sync_master_thread));
 
 		/*
 		 * The lock synchronizes with sb_queue_tail(), so that we don't
@@ -928,8 +928,8 @@ int stop_sync_thread(int state)
 		if (!sync_backup_thread)
 			return -ESRCH;
 
-		IP_VS_INFO("stopping backup sync thread %d ...\n",
-			   task_pid_nr(sync_backup_thread));
+		pr_info("stopping backup sync thread %d ...\n",
+			task_pid_nr(sync_backup_thread));
 
 		ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
 		kthread_stop(sync_backup_thread);
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index c39ebb6c5a54..70ff82cda57d 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -97,7 +97,7 @@ static int ip_vs_wrr_init_svc(struct ip_vs_service *svc)
 	 */
 	mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_ATOMIC);
 	if (mark == NULL) {
-		IP_VS_ERR("ip_vs_wrr_init_svc(): no memory\n");
+		pr_err("%s(): no memory\n", __func__);
 		return -ENOMEM;
 	}
 	mark->cl = &svc->destinations;
@@ -144,7 +144,7 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
 	struct ip_vs_wrr_mark *mark = svc->sched_data;
 	struct list_head *p;
 
-	IP_VS_DBG(6, "ip_vs_wrr_schedule(): Scheduling...\n");
+	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
 
 	/*
 	 * This loop will always terminate, because mark->cw in (0, max_weight]
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 061e76dfdad9..30b3189bd29c 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -238,8 +238,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	if (ip_route_output_key(&init_net, &rt, &fl)) {
-		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, dest: %pI4\n",
-			     &iph->daddr);
+		IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
+			     __func__, &iph->daddr);
 		goto tx_error_icmp;
 	}
 
@@ -248,7 +248,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -302,8 +302,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 
 	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
 	if (!rt) {
-		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, dest: %pI6\n",
-			     &iph->daddr);
+		IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
+			     __func__, &iph->daddr);
 		goto tx_error_icmp;
 	}
 
@@ -312,7 +312,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -539,9 +539,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	if (skb->protocol != htons(ETH_P_IP)) {
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
+		IP_VS_DBG_RL("%s(): protocol error, "
 			     "ETH_P_IP: %d, skb protocol: %d\n",
-			     htons(ETH_P_IP), skb->protocol);
+			     __func__, htons(ETH_P_IP), skb->protocol);
 		goto tx_error;
 	}
 
@@ -553,7 +553,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
 	if (mtu < 68) {
 		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
 		goto tx_error;
 	}
 	if (skb_dst(skb))
@@ -565,7 +565,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	    && mtu < ntohs(old_iph->tot_len)) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -581,7 +581,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 		if (!new_skb) {
 			ip_rt_put(rt);
 			kfree_skb(skb);
-			IP_VS_ERR_RL("ip_vs_tunnel_xmit(): no memory\n");
+			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
 		}
 		kfree_skb(skb);
@@ -649,9 +649,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	EnterFunction(10);
 
 	if (skb->protocol != htons(ETH_P_IPV6)) {
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, "
+		IP_VS_DBG_RL("%s(): protocol error, "
 			     "ETH_P_IPV6: %d, skb protocol: %d\n",
-			     htons(ETH_P_IPV6), skb->protocol);
+			     __func__, htons(ETH_P_IPV6), skb->protocol);
 		goto tx_error;
 	}
 
@@ -665,7 +665,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* TODO IPv6: do we need this check in IPv6? */
 	if (mtu < 1280) {
 		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
+		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
 		goto tx_error;
 	}
 	if (skb_dst(skb))
@@ -674,7 +674,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -690,7 +690,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 		if (!new_skb) {
 			dst_release(&rt->u.dst);
 			kfree_skb(skb);
-			IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n");
+			IP_VS_ERR_RL("%s(): no memory\n", __func__);
 			return NF_STOLEN;
 		}
 		kfree_skb(skb);
@@ -763,7 +763,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
 		ip_rt_put(rt);
-		IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -816,7 +816,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb->len > mtu) {
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 		dst_release(&rt->u.dst);
-		IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -891,7 +891,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
 		ip_rt_put(rt);
 		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
@@ -966,7 +966,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	if (skb->len > mtu) {
 		dst_release(&rt->u.dst);
 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
-		IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
+		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
 		goto tx_error;
 	}
 
-- 
cgit v1.2.3


From e4c4e448cf557921ffbbbd6d6ddac81fdceacb4f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 30 Jul 2009 03:15:07 +0000
Subject: neigh: Convert garbage collection from softirq to workqueue

Current neigh_periodic_timer() function is fired by timer IRQ, and
scans one hash bucket each round (very litle work in fact)

As we are supposed to scan whole hash table in 15 seconds, this means
neigh_periodic_timer() can be fired very often. (depending on the number
of concurrent hash entries we stored in this table)

Converting this to a workqueue permits scanning whole table, minimizing
icache pollution, and firing this work every 15 seconds, independantly
of hash table size.

This 15 seconds delay is not a hard number, as work is a deferrable one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h |  4 +--
 net/core/neighbour.c    | 89 ++++++++++++++++++++++++-------------------------
 2 files changed, 45 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index d8d790e56d3d..18b69b6cecaf 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -24,6 +24,7 @@
 
 #include <linux/err.h>
 #include <linux/sysctl.h>
+#include <linux/workqueue.h>
 #include <net/rtnetlink.h>
 
 /*
@@ -167,7 +168,7 @@ struct neigh_table
 	int			gc_thresh2;
 	int			gc_thresh3;
 	unsigned long		last_flush;
-	struct timer_list 	gc_timer;
+	struct delayed_work	gc_work;
 	struct timer_list 	proxy_timer;
 	struct sk_buff_head	proxy_queue;
 	atomic_t		entries;
@@ -178,7 +179,6 @@ struct neigh_table
 	struct neighbour	**hash_buckets;
 	unsigned int		hash_mask;
 	__u32			hash_rnd;
-	unsigned int		hash_chain_gc;
 	struct pneigh_entry	**phash_buckets;
 };
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index c6f9ad8e4c7a..e587e6819698 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -692,75 +692,74 @@ static void neigh_connect(struct neighbour *neigh)
 		hh->hh_output = neigh->ops->hh_output;
 }
 
-static void neigh_periodic_timer(unsigned long arg)
+static void neigh_periodic_work(struct work_struct *work)
 {
-	struct neigh_table *tbl = (struct neigh_table *)arg;
+	struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
 	struct neighbour *n, **np;
-	unsigned long expire, now = jiffies;
+	unsigned int i;
 
 	NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
 
-	write_lock(&tbl->lock);
+	write_lock_bh(&tbl->lock);
 
 	/*
 	 *	periodically recompute ReachableTime from random function
 	 */
 
-	if (time_after(now, tbl->last_rand + 300 * HZ)) {
+	if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
 		struct neigh_parms *p;
-		tbl->last_rand = now;
+		tbl->last_rand = jiffies;
 		for (p = &tbl->parms; p; p = p->next)
 			p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
 	}
 
-	np = &tbl->hash_buckets[tbl->hash_chain_gc];
-	tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
+	for (i = 0 ; i <= tbl->hash_mask; i++) {
+		np = &tbl->hash_buckets[i];
 
-	while ((n = *np) != NULL) {
-		unsigned int state;
+		while ((n = *np) != NULL) {
+			unsigned int state;
 
-		write_lock(&n->lock);
+			write_lock(&n->lock);
 
-		state = n->nud_state;
-		if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
-			write_unlock(&n->lock);
-			goto next_elt;
-		}
+			state = n->nud_state;
+			if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
+				write_unlock(&n->lock);
+				goto next_elt;
+			}
 
-		if (time_before(n->used, n->confirmed))
-			n->used = n->confirmed;
+			if (time_before(n->used, n->confirmed))
+				n->used = n->confirmed;
 
-		if (atomic_read(&n->refcnt) == 1 &&
-		    (state == NUD_FAILED ||
-		     time_after(now, n->used + n->parms->gc_staletime))) {
-			*np = n->next;
-			n->dead = 1;
+			if (atomic_read(&n->refcnt) == 1 &&
+			    (state == NUD_FAILED ||
+			     time_after(jiffies, n->used + n->parms->gc_staletime))) {
+				*np = n->next;
+				n->dead = 1;
+				write_unlock(&n->lock);
+				neigh_cleanup_and_release(n);
+				continue;
+			}
 			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
-			continue;
-		}
-		write_unlock(&n->lock);
 
 next_elt:
-		np = &n->next;
+			np = &n->next;
+		}
+		/*
+		 * It's fine to release lock here, even if hash table
+		 * grows while we are preempted.
+		 */
+		write_unlock_bh(&tbl->lock);
+		cond_resched();
+		write_lock_bh(&tbl->lock);
 	}
-
 	/* Cycle through all hash buckets every base_reachable_time/2 ticks.
 	 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
 	 * base_reachable_time.
 	 */
-	expire = tbl->parms.base_reachable_time >> 1;
-	expire /= (tbl->hash_mask + 1);
-	if (!expire)
-		expire = 1;
-
-	if (expire>HZ)
-		mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
-	else
-		mod_timer(&tbl->gc_timer, now + expire);
-
-	write_unlock(&tbl->lock);
+	schedule_delayed_work(&tbl->gc_work,
+			      tbl->parms.base_reachable_time >> 1);
+	write_unlock_bh(&tbl->lock);
 }
 
 static __inline__ int neigh_max_probes(struct neighbour *n)
@@ -1442,10 +1441,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
 
 	rwlock_init(&tbl->lock);
-	setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
-	tbl->gc_timer.expires  = now + 1;
-	add_timer(&tbl->gc_timer);
-
+	INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
+	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
 	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
 	skb_queue_head_init_class(&tbl->proxy_queue,
 			&neigh_table_proxy_queue_class);
@@ -1482,7 +1479,8 @@ int neigh_table_clear(struct neigh_table *tbl)
 	struct neigh_table **tp;
 
 	/* It is not clean... Fix it to unload IPv6 module safely */
-	del_timer_sync(&tbl->gc_timer);
+	cancel_delayed_work(&tbl->gc_work);
+	flush_scheduled_work();
 	del_timer_sync(&tbl->proxy_timer);
 	pneigh_queue_purge(&tbl->proxy_queue);
 	neigh_ifdown(tbl, NULL);
@@ -1752,7 +1750,6 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
 			.ndtc_last_rand		= jiffies_to_msecs(rand_delta),
 			.ndtc_hash_rnd		= tbl->hash_rnd,
 			.ndtc_hash_mask		= tbl->hash_mask,
-			.ndtc_hash_chain_gc	= tbl->hash_chain_gc,
 			.ndtc_proxy_qlen	= tbl->proxy_queue.qlen,
 		};
 
-- 
cgit v1.2.3


From 47cab6a722d44c71c4f8224017ef548522243cf4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 3 Aug 2009 09:31:54 +0200
Subject: debug lockups: Improve lockup detection, fix generic arch fallback

As Andrew noted, my previous patch ("debug lockups: Improve lockup
detection") broke/removed SysRq-L support from architecture that do
not provide a __trigger_all_cpu_backtrace implementation.

Restore a fallback path and clean up the SysRq-L machinery a bit:

 - Rename the arch method to arch_trigger_all_cpu_backtrace()

 - Simplify the define

 - Document the method a bit - in the hope of more architectures
   adding support for it.

[ The patch touches Sparc code for the rename. ]

Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "David S. Miller" <davem@davemloft.net>
LKML-Reference: <20090802140809.7ec4bb6b.akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/sparc/include/asm/irq_64.h |  4 ++--
 arch/sparc/kernel/process_64.c  |  4 ++--
 arch/x86/include/asm/nmi.h      |  4 ++--
 arch/x86/kernel/apic/nmi.c      |  2 +-
 drivers/char/sysrq.c            | 15 ++++++++++++++-
 include/linux/nmi.h             | 19 +++++++++++++++++--
 6 files changed, 38 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 1934f2cbf513..a0b443cb3c1f 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -89,8 +89,8 @@ static inline unsigned long get_softint(void)
 	return retval;
 }
 
-void __trigger_all_cpu_backtrace(void);
-#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 extern void *hardirq_stack[NR_CPUS];
 extern void *softirq_stack[NR_CPUS];
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 4041f94e7724..18d67854a1b8 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -251,7 +251,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp)
 	}
 }
 
-void __trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(void)
 {
 	struct thread_info *tp = current_thread_info();
 	struct pt_regs *regs = get_irq_regs();
@@ -304,7 +304,7 @@ void __trigger_all_cpu_backtrace(void)
 
 static void sysrq_handle_globreg(int key, struct tty_struct *tty)
 {
-	__trigger_all_cpu_backtrace();
+	arch_trigger_all_cpu_backtrace();
 }
 
 static struct sysrq_key_op sparc_globalreg_op = {
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index c86e5ed4af51..e63cf7d441e1 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -45,8 +45,8 @@ extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
-void __trigger_all_cpu_backtrace(void);
-#define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
+void arch_trigger_all_cpu_backtrace(void);
+#define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
 
 static inline void localise_nmi_watchdog(void)
 {
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1bb1ac20e9ec..db7220220d09 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -554,7 +554,7 @@ int do_nmi_callback(struct pt_regs *regs, int cpu)
 	return 0;
 }
 
-void __trigger_all_cpu_backtrace(void)
+void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
 
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 165f307f30e8..50eecfe1d724 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -223,7 +223,20 @@ static DECLARE_WORK(sysrq_showallcpus, sysrq_showregs_othercpus);
 
 static void sysrq_handle_showallcpus(int key, struct tty_struct *tty)
 {
-	trigger_all_cpu_backtrace();
+	/*
+	 * Fall back to the workqueue based printing if the
+	 * backtrace printing did not succeed or the
+	 * architecture has no support for it:
+	 */
+	if (!trigger_all_cpu_backtrace()) {
+		struct pt_regs *regs = get_irq_regs();
+
+		if (regs) {
+			printk(KERN_INFO "CPU%d:\n", smp_processor_id());
+			show_regs(regs);
+		}
+		schedule_work(&sysrq_showallcpus);
+	}
 }
 
 static struct sysrq_key_op sysrq_showallcpus_op = {
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 29af2d5df097..b752e807adde 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -28,8 +28,23 @@ static inline void acpi_nmi_disable(void) { }
 static inline void acpi_nmi_enable(void) { }
 #endif
 
-#ifndef trigger_all_cpu_backtrace
-#define trigger_all_cpu_backtrace() do { } while (0)
+/*
+ * Create trigger_all_cpu_backtrace() out of the arch-provided
+ * base function. Return whether such support was available,
+ * to allow calling code to fall back to some other mechanism:
+ */
+#ifdef arch_trigger_all_cpu_backtrace
+static inline bool trigger_all_cpu_backtrace(void)
+{
+	arch_trigger_all_cpu_backtrace();
+
+	return true;
+}
+#else
+static inline bool trigger_all_cpu_backtrace(void)
+{
+	return false;
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From ff663cf8705bea101d5f73cf471855c85242575e Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyu.z.wang@intel.com>
Date: Thu, 23 Jul 2009 17:25:49 +0100
Subject: agp: Add generic support for graphics dma remapping

New driver hooks for support graphics memory dma remapping
are introduced in this patch. It makes generic code can
tell if current device needs dma remapping, then call driver
provided interfaces for mapping and unmapping. Change has
also been made to handle scratch_page in remapping case.

Signed-off-by: Zhenyu Wang <zhenyu.z.wang@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/char/agp/agp.h      |  6 ++++++
 drivers/char/agp/backend.c  | 20 ++++++++++++++++++++
 drivers/char/agp/generic.c  |  9 +++++++++
 include/linux/agp_backend.h |  6 +++++-
 4 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index ce110a3bf298..17e6d0d3ba36 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -121,6 +121,11 @@ struct agp_bridge_driver {
 	void (*agp_destroy_pages)(struct agp_memory *);
 	int (*agp_type_to_mask_type) (struct agp_bridge_data *, int);
 	void (*chipset_flush)(struct agp_bridge_data *);
+
+	int (*agp_map_page)(void *addr, dma_addr_t *ret);
+	void (*agp_unmap_page)(void *addr, dma_addr_t dma);
+	int (*agp_map_memory)(struct agp_memory *mem);
+	void (*agp_unmap_memory)(struct agp_memory *mem);
 };
 
 struct agp_bridge_data {
@@ -135,6 +140,7 @@ struct agp_bridge_data {
 	u32 *gatt_table_real;
 	unsigned long scratch_page;
 	unsigned long scratch_page_real;
+	dma_addr_t scratch_page_dma;
 	unsigned long gart_bus_addr;
 	unsigned long gatt_bus_addr;
 	u32 mode;
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 3bd7e503de41..19ac3663acdc 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -152,6 +152,15 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 		bridge->scratch_page_real = phys_to_gart(page_to_phys(page));
 		bridge->scratch_page = bridge->driver->mask_memory(bridge,
 					   phys_to_gart(page_to_phys(page)), 0);
+
+		if (bridge->driver->agp_map_page &&
+		    bridge->driver->agp_map_page(phys_to_virt(page_to_phys(page)),
+						&bridge->scratch_page_dma)) {
+			dev_err(&bridge->dev->dev,
+				"unable to dma-map scratch page\n");
+			rc = -ENOMEM;
+			goto err_out_nounmap;
+		}
 	}
 
 	size_value = bridge->driver->fetch_size();
@@ -191,6 +200,13 @@ static int agp_backend_initialize(struct agp_bridge_data *bridge)
 	return 0;
 
 err_out:
+	if (bridge->driver->needs_scratch_page &&
+	    bridge->driver->agp_unmap_page) {
+		void *va = gart_to_virt(bridge->scratch_page_real);
+
+		bridge->driver->agp_unmap_page(va, bridge->scratch_page_dma);
+	}
+err_out_nounmap:
 	if (bridge->driver->needs_scratch_page) {
 		void *va = gart_to_virt(bridge->scratch_page_real);
 
@@ -221,6 +237,10 @@ static void agp_backend_cleanup(struct agp_bridge_data *bridge)
 	    bridge->driver->needs_scratch_page) {
 		void *va = gart_to_virt(bridge->scratch_page_real);
 
+		if (bridge->driver->agp_unmap_page)
+			bridge->driver->agp_unmap_page(va,
+					       bridge->scratch_page_dma);
+
 		bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_UNMAP);
 		bridge->driver->agp_destroy_page(va, AGP_PAGE_DESTROY_FREE);
 	}
diff --git a/drivers/char/agp/generic.c b/drivers/char/agp/generic.c
index a3bcc7ef42f9..28f0208c66a6 100644
--- a/drivers/char/agp/generic.c
+++ b/drivers/char/agp/generic.c
@@ -437,6 +437,12 @@ int agp_bind_memory(struct agp_memory *curr, off_t pg_start)
 		curr->bridge->driver->cache_flush();
 		curr->is_flushed = true;
 	}
+
+	if (curr->bridge->driver->agp_map_memory) {
+		ret_val = curr->bridge->driver->agp_map_memory(curr);
+		if (ret_val)
+			return ret_val;
+	}
 	ret_val = curr->bridge->driver->insert_memory(curr, pg_start, curr->type);
 
 	if (ret_val != 0)
@@ -478,6 +484,9 @@ int agp_unbind_memory(struct agp_memory *curr)
 	if (ret_val != 0)
 		return ret_val;
 
+	if (curr->bridge->driver->agp_unmap_memory)
+		curr->bridge->driver->agp_unmap_memory(curr);
+
 	curr->is_bound = false;
 	curr->pg_start = 0;
 	spin_lock(&curr->bridge->mapped_lock);
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 76fa794fdac0..8a294d65b9b1 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -79,9 +79,13 @@ struct agp_memory {
 	u32 physical;
 	bool is_bound;
 	bool is_flushed;
-        bool vmalloc_flag;
+	bool vmalloc_flag;
+	bool sg_vmalloc_flag;
 	/* list of agp_memory mapped to the aperture */
 	struct list_head mapped_list;
+	/* DMA-mapped addresses */
+	struct scatterlist *sg_list;
+	int num_sg;
 };
 
 #define AGP_NORMAL_MEMORY 0
-- 
cgit v1.2.3


From f692775d7e0a22477143cd884e45c955448ac7d2 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Wed, 29 Jul 2009 09:28:45 +0100
Subject: intel-agp: fix sglist allocation to avoid vmalloc()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/char/agp/intel-agp.c | 29 ++++++++++-------------------
 include/linux/agp_backend.h  |  1 -
 2 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index b9d9886ff3c3..d8c80d8be5e2 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -198,39 +198,30 @@ static void intel_agp_unmap_page(struct page *page, dma_addr_t dma)
 
 static void intel_agp_free_sglist(struct agp_memory *mem)
 {
+	struct sg_table st;
+
+	st.sgl = mem->sg_list;
+	st.orig_nents = st.nents = mem->page_count;
+
+	sg_free_table(&st);
 
-	if (mem->sg_vmalloc_flag)
-		vfree(mem->sg_list);
-	else
-		kfree(mem->sg_list);
-	mem->sg_vmalloc_flag = 0;
 	mem->sg_list = NULL;
 	mem->num_sg = 0;
 }
 
 static int intel_agp_map_memory(struct agp_memory *mem)
 {
+	struct sg_table st;
 	struct scatterlist *sg;
 	int i;
 
 	DBG("try mapping %lu pages\n", (unsigned long)mem->page_count);
 
-	if ((mem->page_count * sizeof(*mem->sg_list)) < 2*PAGE_SIZE)
-		mem->sg_list = kcalloc(mem->page_count, sizeof(*mem->sg_list),
-				       GFP_KERNEL);
-
-	if (mem->sg_list == NULL) {
-		mem->sg_list = vmalloc(mem->page_count * sizeof(*mem->sg_list));
-		mem->sg_vmalloc_flag = 1;
-	}
-
-	if (!mem->sg_list) {
-		mem->sg_vmalloc_flag = 0;
+	if (sg_alloc_table(&st, mem->page_count, GFP_KERNEL))
 		return -ENOMEM;
-	}
-	sg_init_table(mem->sg_list, mem->page_count);
 
-	sg = mem->sg_list;
+	mem->sg_list = sg = st.sgl;
+
 	for (i = 0 ; i < mem->page_count; i++, sg = sg_next(sg))
 		sg_set_page(sg, mem->pages[i], PAGE_SIZE, 0);
 
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 8a294d65b9b1..880130f7311f 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -80,7 +80,6 @@ struct agp_memory {
 	bool is_bound;
 	bool is_flushed;
 	bool vmalloc_flag;
-	bool sg_vmalloc_flag;
 	/* list of agp_memory mapped to the aperture */
 	struct list_head mapped_list;
 	/* DMA-mapped addresses */
-- 
cgit v1.2.3


From 42c4ab41a176ee784c0f28c0b29025a8fc34f05a Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:26 +0200
Subject: itimers: Merge ITIMER_VIRT and ITIMER_PROF

Both cpu itimers have same data flow in the few places, this
patch make unification of code related with VIRT and PROF
itimers.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-2-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     |  14 ++++-
 kernel/fork.c             |   9 +--
 kernel/itimer.c           | 146 +++++++++++++++++++++-------------------------
 kernel/posix-cpu-timers.c |  98 +++++++++++++++----------------
 4 files changed, 130 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4bb6b8..3b3efaddd953 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -470,6 +470,11 @@ struct pacct_struct {
 	unsigned long		ac_minflt, ac_majflt;
 };
 
+struct cpu_itimer {
+	cputime_t expires;
+	cputime_t incr;
+};
+
 /**
  * struct task_cputime - collected CPU time counts
  * @utime:		time spent in user mode, in &cputime_t units
@@ -564,9 +569,12 @@ struct signal_struct {
 	struct pid *leader_pid;
 	ktime_t it_real_incr;
 
-	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
-	cputime_t it_prof_expires, it_virt_expires;
-	cputime_t it_prof_incr, it_virt_incr;
+	/*
+	 * ITIMER_PROF and ITIMER_VIRTUAL timers for the process, we use
+	 * CPUCLOCK_PROF and CPUCLOCK_VIRT for indexing array as these
+	 * values are defined to 0 and 1 respectively
+	 */
+	struct cpu_itimer it[2];
 
 	/*
 	 * Thread group totals for process CPU timers.
diff --git a/kernel/fork.c b/kernel/fork.c
index 29b532e718f7..893ab0bf5e39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -62,6 +62,7 @@
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
 #include <linux/perf_counter.h>
+#include <linux/posix-timers.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -790,10 +791,10 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
 	thread_group_cputime_init(sig);
 
 	/* Expiration times and increments. */
-	sig->it_virt_expires = cputime_zero;
-	sig->it_virt_incr = cputime_zero;
-	sig->it_prof_expires = cputime_zero;
-	sig->it_prof_incr = cputime_zero;
+	sig->it[CPUCLOCK_PROF].expires = cputime_zero;
+	sig->it[CPUCLOCK_PROF].incr = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].expires = cputime_zero;
+	sig->it[CPUCLOCK_VIRT].incr = cputime_zero;
 
 	/* Cached expiration times. */
 	sig->cputime_expires.prof_exp = cputime_zero;
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 58762f7077ec..852c88ddd1f0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -41,10 +41,43 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 	return ktime_to_timeval(rem);
 }
 
+static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value)
+{
+	cputime_t cval, cinterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero)) {
+		struct task_cputime cputime;
+		cputime_t t;
+
+		thread_group_cputimer(tsk, &cputime);
+		if (clock_id == CPUCLOCK_PROF)
+			t = cputime_add(cputime.utime, cputime.stime);
+		else
+			/* CPUCLOCK_VIRT */
+			t = cputime.utime;
+
+		if (cputime_le(cval, t))
+			/* about to fire */
+			cval = jiffies_to_cputime(1);
+		else
+			cval = cputime_sub(cval, t);
+	}
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	cputime_to_timeval(cval, &value->it_value);
+	cputime_to_timeval(cinterval, &value->it_interval);
+}
+
 int do_getitimer(int which, struct itimerval *value)
 {
 	struct task_struct *tsk = current;
-	cputime_t cinterval, cval;
 
 	switch (which) {
 	case ITIMER_REAL:
@@ -55,44 +88,10 @@ int do_getitimer(int which, struct itimerval *value)
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime cputime;
-			cputime_t utime;
-
-			thread_group_cputimer(tsk, &cputime);
-			utime = cputime.utime;
-			if (cputime_le(cval, utime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, utime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_VIRT, value);
 		break;
 	case ITIMER_PROF:
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero)) {
-			struct task_cputime times;
-			cputime_t ptime;
-
-			thread_group_cputimer(tsk, &times);
-			ptime = cputime_add(times.utime, times.stime);
-			if (cputime_le(cval, ptime)) { /* about to fire */
-				cval = jiffies_to_cputime(1);
-			} else {
-				cval = cputime_sub(cval, ptime);
-			}
-		}
-		spin_unlock_irq(&tsk->sighand->siglock);
-		cputime_to_timeval(cval, &value->it_value);
-		cputime_to_timeval(cinterval, &value->it_interval);
+		get_cpu_itimer(tsk, CPUCLOCK_PROF, value);
 		break;
 	default:
 		return(-EINVAL);
@@ -128,6 +127,36 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
+			   struct itimerval *value, struct itimerval *ovalue)
+{
+	cputime_t cval, cinterval, nval, ninterval;
+	struct cpu_itimer *it = &tsk->signal->it[clock_id];
+
+	nval = timeval_to_cputime(&value->it_value);
+	ninterval = timeval_to_cputime(&value->it_interval);
+
+	spin_lock_irq(&tsk->sighand->siglock);
+
+	cval = it->expires;
+	cinterval = it->incr;
+	if (!cputime_eq(cval, cputime_zero) ||
+	    !cputime_eq(nval, cputime_zero)) {
+		if (cputime_gt(nval, cputime_zero))
+			nval = cputime_add(nval, jiffies_to_cputime(1));
+		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
+	}
+	it->expires = nval;
+	it->incr = ninterval;
+
+	spin_unlock_irq(&tsk->sighand->siglock);
+
+	if (ovalue) {
+		cputime_to_timeval(cval, &ovalue->it_value);
+		cputime_to_timeval(cinterval, &ovalue->it_interval);
+	}
+}
+
 /*
  * Returns true if the timeval is in canonical form
  */
@@ -139,7 +168,6 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
 	struct task_struct *tsk = current;
 	struct hrtimer *timer;
 	ktime_t expires;
-	cputime_t cval, cinterval, nval, ninterval;
 
 	/*
 	 * Validate the timevals in value.
@@ -174,48 +202,10 @@ again:
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_virt_expires;
-		cinterval = tsk->signal->it_virt_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_VIRT,
-					      &nval, &cval);
-		}
-		tsk->signal->it_virt_expires = nval;
-		tsk->signal->it_virt_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_VIRT, value, ovalue);
 		break;
 	case ITIMER_PROF:
-		nval = timeval_to_cputime(&value->it_value);
-		ninterval = timeval_to_cputime(&value->it_interval);
-		spin_lock_irq(&tsk->sighand->siglock);
-		cval = tsk->signal->it_prof_expires;
-		cinterval = tsk->signal->it_prof_incr;
-		if (!cputime_eq(cval, cputime_zero) ||
-		    !cputime_eq(nval, cputime_zero)) {
-			if (cputime_gt(nval, cputime_zero))
-				nval = cputime_add(nval,
-						   jiffies_to_cputime(1));
-			set_process_cpu_timer(tsk, CPUCLOCK_PROF,
-					      &nval, &cval);
-		}
-		tsk->signal->it_prof_expires = nval;
-		tsk->signal->it_prof_incr = ninterval;
-		spin_unlock_irq(&tsk->sighand->siglock);
-		if (ovalue) {
-			cputime_to_timeval(cval, &ovalue->it_value);
-			cputime_to_timeval(cinterval, &ovalue->it_interval);
-		}
+		set_cpu_itimer(tsk, CPUCLOCK_PROF, value, ovalue);
 		break;
 	default:
 		return -EINVAL;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..9b2d5e4dc8c4 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -14,11 +14,11 @@
  */
 void update_rlimit_cpu(unsigned long rlim_new)
 {
-	cputime_t cputime;
+	cputime_t cputime = secs_to_cputime(rlim_new);
+	struct signal_struct *const sig = current->signal;
 
-	cputime = secs_to_cputime(rlim_new);
-	if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
-	    cputime_gt(current->signal->it_prof_expires, cputime)) {
+	if (cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) ||
+	    cputime_gt(sig->it[CPUCLOCK_PROF].expires, cputime)) {
 		spin_lock_irq(&current->sighand->siglock);
 		set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
 		spin_unlock_irq(&current->sighand->siglock);
@@ -613,6 +613,9 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 				break;
 			}
 		} else {
+			struct signal_struct *const sig = p->signal;
+			union cpu_time_count *exp = &timer->it.cpu.expires;
+
 			/*
 			 * For a process timer, set the cached expiration time.
 			 */
@@ -620,30 +623,27 @@ static void arm_timer(struct k_itimer *timer, union cpu_time_count now)
 			default:
 				BUG();
 			case CPUCLOCK_VIRT:
-				if (!cputime_eq(p->signal->it_virt_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_VIRT].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_virt_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_VIRT].expires,
+					       exp->cpu))
 					break;
-				p->signal->cputime_expires.virt_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.virt_exp = exp->cpu;
 				break;
 			case CPUCLOCK_PROF:
-				if (!cputime_eq(p->signal->it_prof_expires,
+				if (!cputime_eq(sig->it[CPUCLOCK_PROF].expires,
 						cputime_zero) &&
-				    cputime_lt(p->signal->it_prof_expires,
-					       timer->it.cpu.expires.cpu))
+				    cputime_lt(sig->it[CPUCLOCK_PROF].expires,
+					       exp->cpu))
 					break;
-				i = p->signal->rlim[RLIMIT_CPU].rlim_cur;
+				i = sig->rlim[RLIMIT_CPU].rlim_cur;
 				if (i != RLIM_INFINITY &&
-				    i <= cputime_to_secs(timer->it.cpu.expires.cpu))
+				    i <= cputime_to_secs(exp->cpu))
 					break;
-				p->signal->cputime_expires.prof_exp =
-					timer->it.cpu.expires.cpu;
+				sig->cputime_expires.prof_exp = exp->cpu;
 				break;
 			case CPUCLOCK_SCHED:
-				p->signal->cputime_expires.sched_exp =
-					timer->it.cpu.expires.sched;
+				sig->cputime_expires.sched_exp = exp->sched;
 				break;
 			}
 		}
@@ -1070,6 +1070,27 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
+			     cputime_t *expires, cputime_t cur_time, int signo)
+{
+	if (cputime_eq(it->expires, cputime_zero))
+		return;
+
+	if (cputime_ge(cur_time, it->expires)) {
+		it->expires = it->incr;
+		if (!cputime_eq(it->expires, cputime_zero))
+			it->expires = cputime_add(it->expires, cur_time);
+
+		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
+	}
+
+	if (!cputime_eq(it->expires, cputime_zero) &&
+	    (cputime_eq(*expires, cputime_zero) ||
+	     cputime_lt(it->expires, *expires))) {
+		*expires = it->expires;
+	}
+}
+
 /*
  * Check for any per-thread CPU timers that have fired and move them
  * off the tsk->*_timers list onto the firing list.  Per-thread timers
@@ -1089,10 +1110,10 @@ static void check_process_timers(struct task_struct *tsk,
 	 * Don't sample the current process CPU clocks if there are no timers.
 	 */
 	if (list_empty(&timers[CPUCLOCK_PROF]) &&
-	    cputime_eq(sig->it_prof_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_PROF].expires, cputime_zero) &&
 	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&
 	    list_empty(&timers[CPUCLOCK_VIRT]) &&
-	    cputime_eq(sig->it_virt_expires, cputime_zero) &&
+	    cputime_eq(sig->it[CPUCLOCK_VIRT].expires, cputime_zero) &&
 	    list_empty(&timers[CPUCLOCK_SCHED])) {
 		stop_process_timers(tsk);
 		return;
@@ -1152,38 +1173,11 @@ static void check_process_timers(struct task_struct *tsk,
 	/*
 	 * Check for the special case process timers.
 	 */
-	if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-		if (cputime_ge(ptime, sig->it_prof_expires)) {
-			/* ITIMER_PROF fires and reloads.  */
-			sig->it_prof_expires = sig->it_prof_incr;
-			if (!cputime_eq(sig->it_prof_expires, cputime_zero)) {
-				sig->it_prof_expires = cputime_add(
-					sig->it_prof_expires, ptime);
-			}
-			__group_send_sig_info(SIGPROF, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_prof_expires, cputime_zero) &&
-		    (cputime_eq(prof_expires, cputime_zero) ||
-		     cputime_lt(sig->it_prof_expires, prof_expires))) {
-			prof_expires = sig->it_prof_expires;
-		}
-	}
-	if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-		if (cputime_ge(utime, sig->it_virt_expires)) {
-			/* ITIMER_VIRTUAL fires and reloads.  */
-			sig->it_virt_expires = sig->it_virt_incr;
-			if (!cputime_eq(sig->it_virt_expires, cputime_zero)) {
-				sig->it_virt_expires = cputime_add(
-					sig->it_virt_expires, utime);
-			}
-			__group_send_sig_info(SIGVTALRM, SEND_SIG_PRIV, tsk);
-		}
-		if (!cputime_eq(sig->it_virt_expires, cputime_zero) &&
-		    (cputime_eq(virt_expires, cputime_zero) ||
-		     cputime_lt(sig->it_virt_expires, virt_expires))) {
-			virt_expires = sig->it_virt_expires;
-		}
-	}
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
+			 SIGPROF);
+	check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
+			 SIGVTALRM);
+
 	if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
 		unsigned long psecs = cputime_to_secs(ptime);
 		cputime_t x;
-- 
cgit v1.2.3


From 8356b5f9c424e5831715abbce747197c30d1fd71 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:27 +0200
Subject: itimers: Fix periodic tics precision

Measure ITIMER_PROF and ITIMER_VIRT timers interval error
between real ticks and requested by user. Take it into account
when scheduling next tick.

This patch introduce possibility where time between two
consecutive tics is smaller then requested interval, it
preserve however dependency that n tick is generated not
earlier than n*interval time - counting from the beginning of
periodic signal generation.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-3-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h     |  2 ++
 kernel/itimer.c           | 24 +++++++++++++++++++++---
 kernel/posix-cpu-timers.c | 20 +++++++++++++++++---
 3 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b3efaddd953..a069e65e8bb7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -473,6 +473,8 @@ struct pacct_struct {
 struct cpu_itimer {
 	cputime_t expires;
 	cputime_t incr;
+	u32 error;
+	u32 incr_error;
 };
 
 /**
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 852c88ddd1f0..21adff7b2a17 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -42,7 +42,7 @@ static struct timeval itimer_get_remtime(struct hrtimer *timer)
 }
 
 static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value)
+			   struct itimerval *const value)
 {
 	cputime_t cval, cinterval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
@@ -127,14 +127,32 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
+static inline u32 cputime_sub_ns(cputime_t ct, s64 real_ns)
+{
+	struct timespec ts;
+	s64 cpu_ns;
+
+	cputime_to_timespec(ct, &ts);
+	cpu_ns = timespec_to_ns(&ts);
+
+	return (cpu_ns <= real_ns) ? 0 : cpu_ns - real_ns;
+}
+
 static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
-			   struct itimerval *value, struct itimerval *ovalue)
+			   const struct itimerval *const value,
+			   struct itimerval *const ovalue)
 {
-	cputime_t cval, cinterval, nval, ninterval;
+	cputime_t cval, nval, cinterval, ninterval;
+	s64 ns_ninterval, ns_nval;
 	struct cpu_itimer *it = &tsk->signal->it[clock_id];
 
 	nval = timeval_to_cputime(&value->it_value);
+	ns_nval = timeval_to_ns(&value->it_value);
 	ninterval = timeval_to_cputime(&value->it_interval);
+	ns_ninterval = timeval_to_ns(&value->it_interval);
+
+	it->incr_error = cputime_sub_ns(ninterval, ns_ninterval);
+	it->error = cputime_sub_ns(nval, ns_nval);
 
 	spin_lock_irq(&tsk->sighand->siglock);
 
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 9b2d5e4dc8c4..b60d644ea4b3 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1070,6 +1070,8 @@ static void stop_process_timers(struct task_struct *tsk)
 	spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
+static u32 onecputick;
+
 static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			     cputime_t *expires, cputime_t cur_time, int signo)
 {
@@ -1077,9 +1079,16 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 		return;
 
 	if (cputime_ge(cur_time, it->expires)) {
-		it->expires = it->incr;
-		if (!cputime_eq(it->expires, cputime_zero))
-			it->expires = cputime_add(it->expires, cur_time);
+		if (!cputime_eq(it->incr, cputime_zero)) {
+			it->expires = cputime_add(it->expires, it->incr);
+			it->error += it->incr_error;
+			if (it->error >= onecputick) {
+				it->expires = cputime_sub(it->expires,
+							jiffies_to_cputime(1));
+				it->error -= onecputick;
+			}
+		} else
+			it->expires = cputime_zero;
 
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
@@ -1696,10 +1705,15 @@ static __init int init_posix_cpu_timers(void)
 		.nsleep = thread_cpu_nsleep,
 		.nsleep_restart = thread_cpu_nsleep_restart,
 	};
+	struct timespec ts;
 
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
+	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	onecputick = ts.tv_nsec;
+	WARN_ON(ts.tv_sec != 0);
+
 	return 0;
 }
 __initcall(init_posix_cpu_timers);
-- 
cgit v1.2.3


From a42548a18866e87092db93b771e6c5b060d78401 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 29 Jul 2009 12:15:29 +0200
Subject: cputime: Optimize jiffies_to_cputime(1)

For powerpc with CONFIG_VIRT_CPU_ACCOUNTING
jiffies_to_cputime(1) is not compile time constant and run time
calculations are quite expensive. To optimize we use
precomputed value. For all other architectures is is
preprocessor definition.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <1248862529-6063-5-git-send-email-sgruszka@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/cputime.h    |  1 +
 arch/powerpc/include/asm/cputime.h | 13 +++++++++++++
 arch/powerpc/kernel/time.c         |  4 ++++
 arch/s390/include/asm/cputime.h    |  1 +
 include/asm-generic/cputime.h      |  1 +
 kernel/itimer.c                    |  4 ++--
 kernel/posix-cpu-timers.c          |  6 +++---
 kernel/sched.c                     |  9 ++++-----
 8 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h
index d20b998cb91d..7fa8a8594660 100644
--- a/arch/ia64/include/asm/cputime.h
+++ b/arch/ia64/include/asm/cputime.h
@@ -30,6 +30,7 @@ typedef u64 cputime_t;
 typedef u64 cputime64_t;
 
 #define cputime_zero			((cputime_t)0)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~((cputime_t)0) >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index f42e623030ee..fa19f3fe05ff 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -18,6 +18,9 @@
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #include <asm-generic/cputime.h>
+#ifdef __KERNEL__
+static inline void setup_cputime_one_jiffy(void) { }
+#endif
 #else
 
 #include <linux/types.h>
@@ -48,6 +51,11 @@ typedef u64 cputime64_t;
 
 #ifdef __KERNEL__
 
+/*
+ * One jiffy in timebase units computed during initialization
+ */
+extern cputime_t cputime_one_jiffy;
+
 /*
  * Convert cputime <-> jiffies
  */
@@ -89,6 +97,11 @@ static inline cputime_t jiffies_to_cputime(const unsigned long jif)
 	return ct;
 }
 
+static inline void setup_cputime_one_jiffy(void)
+{
+	cputime_one_jiffy = jiffies_to_cputime(1);
+}
+
 static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
 {
 	cputime_t ct;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eae4511ceeac..211d7b0cd370 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -193,6 +193,8 @@ EXPORT_SYMBOL(__cputime_clockt_factor);
 DEFINE_PER_CPU(unsigned long, cputime_last_delta);
 DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
+cputime_t cputime_one_jiffy;
+
 static void calc_cputime_factors(void)
 {
 	struct div_result res;
@@ -500,6 +502,7 @@ static int __init iSeries_tb_recal(void)
 				tb_to_xs = divres.result_low;
 				vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
 				vdso_data->tb_to_xs = tb_to_xs;
+				setup_cputime_one_jiffy();
 			}
 			else {
 				printk( "Titan recalibrate: FAILED (difference > 4 percent)\n"
@@ -945,6 +948,7 @@ void __init time_init(void)
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
 	tb_to_us = mulhwu_scale_factor(ppc_tb_freq, 1000000);
 	calc_cputime_factors();
+	setup_cputime_one_jiffy();
 
 	/*
 	 * Calculate the length of each tick in ns.  It will not be
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 7a3817a656df..24b1244aadb9 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -42,6 +42,7 @@ __div(unsigned long long n, unsigned int base)
 #endif /* __s390x__ */
 
 #define cputime_zero			(0ULL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h
index 1c1fa422d18a..ca0f239f0e13 100644
--- a/include/asm-generic/cputime.h
+++ b/include/asm-generic/cputime.h
@@ -7,6 +7,7 @@
 typedef unsigned long cputime_t;
 
 #define cputime_zero			(0UL)
+#define cputime_one_jiffy		jiffies_to_cputime(1)
 #define cputime_max			((~0UL >> 1) - 1)
 #define cputime_add(__a, __b)		((__a) +  (__b))
 #define cputime_sub(__a, __b)		((__a) -  (__b))
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 21adff7b2a17..8078a32d3b10 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -64,7 +64,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 
 		if (cputime_le(cval, t))
 			/* about to fire */
-			cval = jiffies_to_cputime(1);
+			cval = cputime_one_jiffy;
 		else
 			cval = cputime_sub(cval, t);
 	}
@@ -161,7 +161,7 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	if (!cputime_eq(cval, cputime_zero) ||
 	    !cputime_eq(nval, cputime_zero)) {
 		if (cputime_gt(nval, cputime_zero))
-			nval = cputime_add(nval, jiffies_to_cputime(1));
+			nval = cputime_add(nval, cputime_one_jiffy);
 		set_process_cpu_timer(tsk, clock_id, &nval, &cval);
 	}
 	it->expires = nval;
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 69c92374355f..18bdde6f676f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1086,7 +1086,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 			it->error += it->incr_error;
 			if (it->error >= onecputick) {
 				it->expires = cputime_sub(it->expires,
-							jiffies_to_cputime(1));
+							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
 		} else
@@ -1461,7 +1461,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
 		if (!cputime_eq(*oldval, cputime_zero)) {
 			if (cputime_le(*oldval, now.cpu)) {
 				/* Just about to fire. */
-				*oldval = jiffies_to_cputime(1);
+				*oldval = cputime_one_jiffy;
 			} else {
 				*oldval = cputime_sub(*oldval, now.cpu);
 			}
@@ -1712,7 +1712,7 @@ static __init int init_posix_cpu_timers(void)
 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
 
-	cputime_to_timespec(jiffies_to_cputime(1), &ts);
+	cputime_to_timespec(cputime_one_jiffy, &ts);
 	onecputick = ts.tv_nsec;
 	WARN_ON(ts.tv_sec != 0);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..8f977d5cc515 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5031,17 +5031,16 @@ void account_idle_time(cputime_t cputime)
  */
 void account_process_tick(struct task_struct *p, int user_tick)
 {
-	cputime_t one_jiffy = jiffies_to_cputime(1);
-	cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy);
+	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
 	struct rq *rq = this_rq();
 
 	if (user_tick)
-		account_user_time(p, one_jiffy, one_jiffy_scaled);
+		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
 	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
-		account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
+		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
 				    one_jiffy_scaled);
 	else
-		account_idle_time(one_jiffy);
+		account_idle_time(cputime_one_jiffy);
 }
 
 /*
-- 
cgit v1.2.3


From 7699ad35ed06044c4fc1be162553880f98658616 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 15 Jun 2009 01:10:18 -0400
Subject: mtd: let include/linux/mtd/partitions.h stand on its own

When declaring static MTD partitions in board specific code, only
including <include/linux/mtd/partitions.h> should suffice without
gcc nagging us with:

In file included from arch/arm/mach-kirkwood/sheevaplug-setup.c:14:
include/linux/mtd/partitions.h:50: warning: 'struct mtd_info' declared inside parameter list
include/linux/mtd/partitions.h:50: warning: its scope is only this definition or declaration, which is probably not what you want
include/linux/mtd/partitions.h:51: warning: 'struct mtd_info' declared inside parameter list
include/linux/mtd/partitions.h:61: warning: 'struct mtd_info' declared inside parameter list
include/linux/mtd/partitions.h:67: warning: 'struct mtd_info' declared inside parameter list

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/partitions.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index af6dcb992bc3..b70313d33ff8 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -47,6 +47,8 @@ struct mtd_partition {
 #define MTDPART_SIZ_FULL	(0)
 
 
+struct mtd_info;
+
 int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
 int del_mtd_partitions(struct mtd_info *);
 
-- 
cgit v1.2.3


From 6afc4fdb3e94ba60cd566cb878b60c6c01979277 Mon Sep 17 00:00:00 2001
From: Saeed Bishara <saeed@marvell.com>
Date: Tue, 28 Jul 2009 04:56:43 -0700
Subject: mtd: fix the conversion from dev to mtd_info

The patch fixes a bug when converting dev to mtd_info by using the
drvdata of the dev, the previous code used
container_of(dev, struct mtd_info, dev), but won't work for the mtdXro
devices as they created without being contained inside mtd_info structure.

Signed-off-by: Saeed Bishara <saeed@marvell.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 7 ++++---
 include/linux/mtd/mtd.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index fac54a3fa3f1..00ebf7af7467 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -65,8 +65,8 @@ static void mtd_release(struct device *dev)
 static int mtd_cls_suspend(struct device *dev, pm_message_t state)
 {
 	struct mtd_info *mtd = dev_to_mtd(dev);
-	
-	if (mtd->suspend)
+
+	if (mtd && mtd->suspend)
 		return mtd->suspend(mtd);
 	else
 		return 0;
@@ -76,7 +76,7 @@ static int mtd_cls_resume(struct device *dev)
 {
 	struct mtd_info *mtd = dev_to_mtd(dev);
 	
-	if (mtd->resume)
+	if (mtd && mtd->resume)
 		mtd->resume(mtd);
 	return 0;
 }
@@ -298,6 +298,7 @@ int add_mtd_device(struct mtd_info *mtd)
 			mtd->dev.class = &mtd_class;
 			mtd->dev.devt = MTD_DEVT(i);
 			dev_set_name(&mtd->dev, "mtd%d", i);
+			dev_set_drvdata(&mtd->dev, mtd);
 			if (device_register(&mtd->dev) != 0) {
 				mtd_table[i] = NULL;
 				break;
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 5675b63a0631..0f32a9b6ff55 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -251,7 +251,7 @@ struct mtd_info {
 
 static inline struct mtd_info *dev_to_mtd(struct device *dev)
 {
-	return dev ? container_of(dev, struct mtd_info, dev) : NULL;
+	return dev ? dev_get_drvdata(dev) : NULL;
 }
 
 static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
-- 
cgit v1.2.3


From 3e352aa8ee2bd48f1a19c7742810b3a4a7ba605e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 3 Aug 2009 14:10:11 +0900
Subject: x86, percpu: Fix DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED()

DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() put percpu variables in
.page_aligned section without adding any alignment restrictions.
Currently, this doesn't cause any problem because all users of the
macros have explicit page alignment and page-sized but it's much safer
to enforce page alignment from the macros.  After all, it's what they
claim to do.

Add __aligned(PAGE_SIZE) to DECLARE/DEFINE_PER_CPU_PAGE_ALIGNED() and
drop explicit alignment from it users.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 3 +--
 include/linux/percpu-defs.h  | 8 +++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..12493c5485e5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1008,8 +1008,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
-	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
-	__aligned(PAGE_SIZE);
+	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 
 /* May not be marked __init: used by software suspend */
 void syscall_init(void)
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 68438e18fff4..afd5f8b7061f 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -69,11 +69,13 @@
 /*
  * Declaration/definition used for per-CPU variables that must be page aligned.
  */
-#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name)				\
-	DECLARE_PER_CPU_SECTION(type, name, ".page_aligned")
+#define DECLARE_PER_CPU_PAGE_ALIGNED(type, name)			\
+	DECLARE_PER_CPU_SECTION(type, name, ".page_aligned")		\
+	__aligned(PAGE_SIZE)
 
 #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
-	DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
+	DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")		\
+	__aligned(PAGE_SIZE)
 
 /*
  * Intermodule exports for per-CPU variables.
-- 
cgit v1.2.3


From af0d3b103bcfa877343ee338de12002cd50c9ee5 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Mon, 3 Aug 2009 04:26:16 +0000
Subject: bluetooth: rfcomm_init bug fix

rfcomm tty may be used before rfcomm_tty_driver initilized,
The problem is that now socket layer init before tty layer, if userspace
program do socket callback right here then oops will happen.

reporting in:
http://marc.info/?l=linux-bluetooth&m=124404919324542&w=2

make 3 changes:
1. remove #ifdef in rfcomm/core.c,
make it blank function when rfcomm tty not selected in rfcomm.h

2. tune the rfcomm_init error patch to ensure
tty driver initilized before rfcomm socket usage.

3. remove __exit for rfcomm_cleanup_sockets
because above change need call it in a __init function.

Reported-by: Oliver Hartkopp <oliver@hartkopp.net>
Tested-by: Oliver Hartkopp <oliver@hartkopp.net>
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/bluetooth/rfcomm.h | 12 +++++++++++-
 net/bluetooth/rfcomm/core.c    | 27 +++++++++++++++++++--------
 net/bluetooth/rfcomm/sock.c    |  2 +-
 3 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 80072611d26a..c274993234e3 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -355,7 +355,17 @@ struct rfcomm_dev_list_req {
 };
 
 int  rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
+
+#ifdef CONFIG_BT_RFCOMM_TTY
 int  rfcomm_init_ttys(void);
 void rfcomm_cleanup_ttys(void);
-
+#else
+static inline int rfcomm_init_ttys(void)
+{
+	return 0;
+}
+static inline void rfcomm_cleanup_ttys(void)
+{
+}
+#endif
 #endif /* __RFCOMM_H */
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index e50566ebf9f9..94b3388c188b 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -2080,28 +2080,41 @@ static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL);
 /* ---- Initialization ---- */
 static int __init rfcomm_init(void)
 {
+	int ret;
+
 	l2cap_load();
 
 	hci_register_cb(&rfcomm_cb);
 
 	rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd");
 	if (IS_ERR(rfcomm_thread)) {
-		hci_unregister_cb(&rfcomm_cb);
-		return PTR_ERR(rfcomm_thread);
+		ret = PTR_ERR(rfcomm_thread);
+		goto out_thread;
 	}
 
 	if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0)
 		BT_ERR("Failed to create RFCOMM info file");
 
-	rfcomm_init_sockets();
+	ret = rfcomm_init_ttys();
+	if (ret)
+		goto out_tty;
 
-#ifdef CONFIG_BT_RFCOMM_TTY
-	rfcomm_init_ttys();
-#endif
+	ret = rfcomm_init_sockets();
+	if (ret)
+		goto out_sock;
 
 	BT_INFO("RFCOMM ver %s", VERSION);
 
 	return 0;
+
+out_sock:
+	rfcomm_cleanup_ttys();
+out_tty:
+	kthread_stop(rfcomm_thread);
+out_thread:
+	hci_unregister_cb(&rfcomm_cb);
+
+	return ret;
 }
 
 static void __exit rfcomm_exit(void)
@@ -2112,9 +2125,7 @@ static void __exit rfcomm_exit(void)
 
 	kthread_stop(rfcomm_thread);
 
-#ifdef CONFIG_BT_RFCOMM_TTY
 	rfcomm_cleanup_ttys();
-#endif
 
 	rfcomm_cleanup_sockets();
 }
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7f482784e9f7..0b85e8116859 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1132,7 +1132,7 @@ error:
 	return err;
 }
 
-void __exit rfcomm_cleanup_sockets(void)
+void rfcomm_cleanup_sockets(void)
 {
 	class_remove_file(bt_class, &class_attr_rfcomm);
 
-- 
cgit v1.2.3


From 371842448c05b42d11a4be1c8e4e81d62ecc7534 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 30 Jul 2009 17:43:48 -0700
Subject: cfg80211: fix regression on beacon world roaming feature

A regression was added through patch a4ed90d6:

"cfg80211: respect API on orig_flags on channel for beacon hint"

We did indeed respect _orig flags but the intention was not clearly
stated in the commit log. This patch fixes firmware issues picked
up by iwlwifi when we lift passive scan of beaconing restrictions
on channels its EEPROM has been configured to always enable.

By doing so though we also disallowed beacon hints on devices
registering their wiphy with custom world regulatory domains
enabled, this happens to be currently ath5k, ath9k and ar9170.
The passive scan and beacon restrictions on those devices would
never be lifted even if we did find a beacon and the hardware did
support such enhancements when world roaming.

Since Johannes indicates iwlwifi firmware cannot be changed to
allow beacon hinting we set up a flag now to specifically allow
drivers to disable beacon hints for devices which cannot use them.

We enable the flag on iwlwifi to disable beacon hints and by default
enable it for all other drivers. It should be noted beacon hints lift
passive scan flags and beacon restrictions when we receive a beacon from
an AP on any 5 GHz non-DFS channels, and channels 12-14 on the 2.4 GHz
band. We don't bother with channels 1-11 as those channels are allowed
world wide.

This should fix world roaming for ath5k, ath9k and ar9170, thereby
improving scan time when we receive the first beacon from any AP,
and also enabling beaconing operation (AP/IBSS/Mesh) on cards which
would otherwise not be allowed to do so. Drivers not using custom
regulatory stuff (wiphy_apply_custom_regulatory()) were not affected
by this as the orig_flags for the channels would have been cleared
upon wiphy registration.

I tested this with a world roaming ath5k card.

Cc: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-core.c     | 3 +++
 drivers/net/wireless/iwlwifi/iwl3945-base.c | 3 +++
 include/net/cfg80211.h                      | 5 +++++
 net/wireless/reg.c                          | 9 +++++----
 net/wireless/reg.h                          | 3 ++-
 5 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index 6ab07165ea28..18b135f510e5 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv)
 
 	hw->wiphy->custom_regulatory = true;
 
+	/* Firmware does not support this */
+	hw->wiphy->disable_beacon_hints = true;
+
 	hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX;
 	/* we create the 802.11 header and a zero-length SSID element */
 	hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2;
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index 2f50ab60bfdf..523843369ca2 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv)
 
 	hw->wiphy->custom_regulatory = true;
 
+	/* Firmware does not support this */
+	hw->wiphy->disable_beacon_hints = true;
+
 	hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945;
 	/* we create the 802.11 header and a zero-length SSID element */
 	hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1a21895b732b..d1892d66701a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -979,6 +979,10 @@ struct cfg80211_ops {
  * 	channels at a later time. This can be used for devices which do not
  * 	have calibration information gauranteed for frequencies or settings
  * 	outside of its regulatory domain.
+ * @disable_beacon_hints: enable this if your driver needs to ensure that
+ *	passive scan flags and beaconing flags may not be lifted by cfg80211
+ *	due to regulatory beacon hints. For more information on beacon
+ *	hints read the documenation for regulatory_hint_found_beacon()
  * @reg_notifier: the driver's regulatory notification callback
  * @regd: the driver's regulatory domain, if one was requested via
  * 	the regulatory_hint() API. This can be used by the driver
@@ -1004,6 +1008,7 @@ struct wiphy {
 
 	bool custom_regulatory;
 	bool strict_regulatory;
+	bool disable_beacon_hints;
 
 	enum cfg80211_signal_type signal_type;
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5e14371cda70..75a406d33619 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1089,17 +1089,18 @@ static void handle_reg_beacon(struct wiphy *wiphy,
 
 	chan->beacon_found = true;
 
+	if (wiphy->disable_beacon_hints)
+		return;
+
 	chan_before.center_freq = chan->center_freq;
 	chan_before.flags = chan->flags;
 
-	if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) &&
-	    !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) {
+	if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) {
 		chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN;
 		channel_changed = true;
 	}
 
-	if ((chan->flags & IEEE80211_CHAN_NO_IBSS) &&
-	    !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) {
+	if (chan->flags & IEEE80211_CHAN_NO_IBSS) {
 		chan->flags &= ~IEEE80211_CHAN_NO_IBSS;
 		channel_changed = true;
 	}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index e37829a49dc4..4e167a8e11be 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -30,7 +30,8 @@ int set_regdom(const struct ieee80211_regdomain *rd);
  * non-radar 5 GHz channels.
  *
  * Drivers do not need to call this, cfg80211 will do it for after a scan
- * on a newly found BSS.
+ * on a newly found BSS. If you cannot make use of this feature you can
+ * set the wiphy->disable_beacon_hints to true.
  */
 int regulatory_hint_found_beacon(struct wiphy *wiphy,
 					struct ieee80211_channel *beacon_chan,
-- 
cgit v1.2.3


From 7320700df1864b601cef5adbddce8654a0e3f78b Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Mon, 3 Aug 2009 17:01:53 -0400
Subject: drm/radeon: add some new r7xx pci ids

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_pciids.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 7174818c2c13..9d4c00491547 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -257,9 +257,12 @@
 	{0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x94B9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \
@@ -288,6 +291,7 @@
 	{0x1002, 0x948F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9490, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9491, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9495, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9498, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x949C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x949E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \
@@ -325,6 +329,7 @@
 	{0x1002, 0x9552, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9553, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9555, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
+	{0x1002, 0x9557, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9580, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9581, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x9583, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
-- 
cgit v1.2.3


From 8a4c47f346cc7a12d0897c05eb3cc1add26b487f Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:04 +0800
Subject: drm: Remove the unused prefix in DRM_DEBUG_KMS/DRIVER/MODE

We will have to add a prefix when using the macro defintion of DRM_DEBUG_KMS
/DRM_DEBUG_DRIVER/MODE. It is not convenient. We should use the DRM_NAME
as default prefix.
So remove the prefix in the macro definition of DRM_DEBUG_KMS/DRIVER/MODE.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c       |  8 +++-----
 drivers/gpu/drm/i915/i915_dma.c   | 35 +++++++++++++++--------------------
 drivers/gpu/drm/i915/intel_lvds.c | 10 +++-------
 drivers/gpu/drm/i915/intel_sdvo.c | 35 ++++++++++++++++-------------------
 include/drm/drmP.h                | 18 +++++++++---------
 5 files changed, 46 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index fd489d76fbbc..5eca2d5c5f23 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -40,7 +40,6 @@
 #include "drm.h"
 #include "drm_crtc.h"
 
-#define DRM_MODESET_DEBUG	"drm_mode"
 /**
  * drm_mode_debug_printmodeline - debug print a mode
  * @dev: DRM device
@@ -53,8 +52,8 @@
  */
 void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 {
-	DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
-		"Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+	DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
+			"0x%x 0x%x\n",
 		mode->base.id, mode->name, mode->vrefresh, mode->clock,
 		mode->hdisplay, mode->hsync_start,
 		mode->hsync_end, mode->htotal,
@@ -819,8 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev,
 			list_del(&mode->head);
 			if (verbose) {
 				drm_mode_debug_printmodeline(mode);
-				DRM_DEBUG_MODE(DRM_MODESET_DEBUG,
-					"Not using %s mode %d\n",
+				DRM_DEBUG_MODE("Not using %s mode %d\n",
 					mode->name, mode->status);
 			}
 			drm_mode_destroy(dev, mode);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8c4783180bf6..14625e146f18 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,8 +33,6 @@
 #include "i915_drm.h"
 #include "i915_drv.h"
 
-#define I915_DRV	"i915_drv"
-
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
  * the head pointer changes, so that EBUSY only happens if the ring
@@ -101,7 +99,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 
 	I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER("Enabled hardware status page\n");
 	return 0;
 }
 
@@ -187,8 +185,7 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init)
 		master_priv->sarea_priv = (drm_i915_sarea_t *)
 			((u8 *)master_priv->sarea->handle + init->sarea_priv_offset);
 	} else {
-		DRM_DEBUG_DRIVER(I915_DRV,
-				"sarea not found assuming DRI2 userspace\n");
+		DRM_DEBUG_DRIVER("sarea not found assuming DRI2 userspace\n");
 	}
 
 	if (init->ring_size != 0) {
@@ -238,7 +235,7 @@ static int i915_dma_resume(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
+	DRM_DEBUG_DRIVER("%s\n", __func__);
 
 	if (dev_priv->ring.map.handle == NULL) {
 		DRM_ERROR("can not ioremap virtual address for"
@@ -251,14 +248,14 @@ static int i915_dma_resume(struct drm_device * dev)
 		DRM_ERROR("Can not find hardware status page\n");
 		return -EINVAL;
 	}
-	DRM_DEBUG_DRIVER(I915_DRV, "hw status page @ %p\n",
+	DRM_DEBUG_DRIVER("hw status page @ %p\n",
 				dev_priv->hw_status_page);
 
 	if (dev_priv->status_gfx_addr != 0)
 		I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
 	else
 		I915_WRITE(HWS_PGA, dev_priv->dma_status_page);
-	DRM_DEBUG_DRIVER(I915_DRV, "Enabled hardware status page\n");
+	DRM_DEBUG_DRIVER("Enabled hardware status page\n");
 
 	return 0;
 }
@@ -552,7 +549,7 @@ static int i915_dispatch_flip(struct drm_device * dev)
 	if (!master_priv->sarea_priv)
 		return -EINVAL;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s: page=%d pfCurrentPage=%d\n",
+	DRM_DEBUG_DRIVER("%s: page=%d pfCurrentPage=%d\n",
 			  __func__,
 			 dev_priv->current_page,
 			 master_priv->sarea_priv->pf_current_page);
@@ -633,8 +630,7 @@ static int i915_batchbuffer(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	DRM_DEBUG_DRIVER(I915_DRV,
-			"i915 batchbuffer, start %x used %d cliprects %d\n",
+	DRM_DEBUG_DRIVER("i915 batchbuffer, start %x used %d cliprects %d\n",
 			batch->start, batch->used, batch->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
@@ -681,8 +677,7 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data,
 	void *batch_data;
 	int ret;
 
-	DRM_DEBUG_DRIVER(I915_DRV,
-			"i915 cmdbuffer, buf %p sz %d cliprects %d\n",
+	DRM_DEBUG_DRIVER("i915 cmdbuffer, buf %p sz %d cliprects %d\n",
 			cmdbuf->buf, cmdbuf->sz, cmdbuf->num_cliprects);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
@@ -735,7 +730,7 @@ static int i915_flip_bufs(struct drm_device *dev, void *data,
 {
 	int ret;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "%s\n", __func__);
+	DRM_DEBUG_DRIVER("%s\n", __func__);
 
 	RING_LOCK_TEST_WITH_RETURN(dev, file_priv);
 
@@ -778,7 +773,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = dev_priv->num_fence_regs - dev_priv->fence_reg_start;
 		break;
 	default:
-		DRM_DEBUG_DRIVER(I915_DRV, "Unknown parameter %d\n",
+		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 					param->param);
 		return -EINVAL;
 	}
@@ -819,7 +814,7 @@ static int i915_setparam(struct drm_device *dev, void *data,
 		dev_priv->fence_reg_start = param->value;
 		break;
 	default:
-		DRM_DEBUG_DRIVER(I915_DRV, "unknown parameter %d\n",
+		DRM_DEBUG_DRIVER("unknown parameter %d\n",
 					param->param);
 		return -EINVAL;
 	}
@@ -846,7 +841,7 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 		return 0;
 	}
 
-	DRM_DEBUG("set status page addr 0x%08x\n", (u32)hws->addr);
+	DRM_DEBUG_DRIVER("set status page addr 0x%08x\n", (u32)hws->addr);
 
 	dev_priv->status_gfx_addr = hws->addr & (0x1ffff<<12);
 
@@ -868,9 +863,9 @@ static int i915_set_status_page(struct drm_device *dev, void *data,
 
 	memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
 	I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
-	DRM_DEBUG_DRIVER(I915_DRV, "load hws HWS_PGA with gfx mem 0x%x\n",
+	DRM_DEBUG_DRIVER("load hws HWS_PGA with gfx mem 0x%x\n",
 				dev_priv->status_gfx_addr);
-	DRM_DEBUG_DRIVER(I915_DRV, "load hws at %p\n",
+	DRM_DEBUG_DRIVER("load hws at %p\n",
 				dev_priv->hw_status_page);
 	return 0;
 }
@@ -1310,7 +1305,7 @@ int i915_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct drm_i915_file_private *i915_file_priv;
 
-	DRM_DEBUG_DRIVER(I915_DRV, "\n");
+	DRM_DEBUG_DRIVER("\n");
 	i915_file_priv = (struct drm_i915_file_private *)
 	    kmalloc(sizeof(*i915_file_priv), GFP_KERNEL);
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 9ab38efffecf..b59c65d19d81 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -38,8 +38,6 @@
 #include "i915_drv.h"
 #include <linux/acpi.h>
 
-#define I915_LVDS "i915_lvds"
-
 /*
  * the following four scaling options are defined.
  * #define DRM_MODE_SCALE_NON_GPU	0
@@ -673,8 +671,7 @@ static int intel_lvds_set_property(struct drm_connector *connector,
 		struct drm_crtc *crtc = connector->encoder->crtc;
 		struct intel_lvds_priv *lvds_priv = intel_output->dev_priv;
 		if (value == DRM_MODE_SCALE_NON_GPU) {
-			DRM_DEBUG_KMS(I915_LVDS,
-					"non_GPU property is unsupported\n");
+			DRM_DEBUG_KMS("non_GPU property is unsupported\n");
 			return 0;
 		}
 		if (lvds_priv->fitting_mode == value) {
@@ -731,8 +728,7 @@ static const struct drm_encoder_funcs intel_lvds_enc_funcs = {
 
 static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 {
-	DRM_DEBUG_KMS(I915_LVDS,
-		      "Skipping LVDS initialization for %s\n", id->ident);
+	DRM_DEBUG_KMS("Skipping LVDS initialization for %s\n", id->ident);
 	return 1;
 }
 
@@ -1013,7 +1009,7 @@ out:
 	return;
 
 failed:
-	DRM_DEBUG_KMS(I915_LVDS, "No LVDS modes found, disabling.\n");
+	DRM_DEBUG_KMS("No LVDS modes found, disabling.\n");
 	if (intel_output->ddc_bus)
 		intel_i2c_destroy(intel_output->ddc_bus);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 4f0c30948bc4..abef69c8a49a 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -36,7 +36,6 @@
 #include "intel_sdvo_regs.h"
 
 #undef SDVO_DEBUG
-#define I915_SDVO	"i915_sdvo"
 struct intel_sdvo_priv {
 	u8 slave_addr;
 
@@ -178,7 +177,7 @@ static bool intel_sdvo_read_byte(struct intel_output *intel_output, u8 addr,
 		return true;
 	}
 
-	DRM_DEBUG("i2c transfer returned %d\n", ret);
+	DRM_DEBUG_KMS("i2c transfer returned %d\n", ret);
 	return false;
 }
 
@@ -288,7 +287,7 @@ static void intel_sdvo_debug_write(struct intel_output *intel_output, u8 cmd,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s: W: %02X ",
+	DRM_DEBUG_KMS("%s: W: %02X ",
 				SDVO_NAME(sdvo_priv), cmd);
 	for (i = 0; i < args_len; i++)
 		DRM_LOG_KMS("%02X ", ((u8 *)args)[i]);
@@ -341,7 +340,7 @@ static void intel_sdvo_debug_response(struct intel_output *intel_output,
 	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
 	int i;
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s: R: ", SDVO_NAME(sdvo_priv));
+	DRM_DEBUG_KMS("%s: R: ", SDVO_NAME(sdvo_priv));
 	for (i = 0; i < response_len; i++)
 		DRM_LOG_KMS("%02X ", ((u8 *)response)[i]);
 	for (; i < 8; i++)
@@ -658,10 +657,10 @@ static int intel_sdvo_get_clock_rate_mult(struct intel_output *intel_output)
 	status = intel_sdvo_read_response(intel_output, &response, 1);
 
 	if (status != SDVO_CMD_STATUS_SUCCESS) {
-		DRM_DEBUG("Couldn't get SDVO clock rate multiplier\n");
+		DRM_DEBUG_KMS("Couldn't get SDVO clock rate multiplier\n");
 		return SDVO_CLOCK_RATE_MULT_1X;
 	} else {
-		DRM_DEBUG("Current clock rate multiplier: %d\n", response);
+		DRM_DEBUG_KMS("Current clock rate multiplier: %d\n", response);
 	}
 
 	return response;
@@ -942,14 +941,14 @@ static void intel_sdvo_set_tv_format(struct intel_output *output)
 	format = &sdvo_priv->tv_format;
 	memset(&unset, 0, sizeof(unset));
 	if (memcmp(format, &unset, sizeof(*format))) {
-		DRM_DEBUG("%s: Choosing default TV format of NTSC-M\n",
+		DRM_DEBUG_KMS("%s: Choosing default TV format of NTSC-M\n",
 				SDVO_NAME(sdvo_priv));
 		format->ntsc_m = 1;
 		intel_sdvo_write_cmd(output, SDVO_CMD_SET_TV_FORMAT, format,
 				sizeof(*format));
 		status = intel_sdvo_read_response(output, NULL, 0);
 		if (status != SDVO_CMD_STATUS_SUCCESS)
-			DRM_DEBUG("%s: Failed to set TV format\n",
+			DRM_DEBUG_KMS("%s: Failed to set TV format\n",
 					SDVO_NAME(sdvo_priv));
 	}
 }
@@ -1220,8 +1219,8 @@ static void intel_sdvo_dpms(struct drm_encoder *encoder, int mode)
 		 * a given it the status is a success, we succeeded.
 		 */
 		if (status == SDVO_CMD_STATUS_SUCCESS && !input1) {
-			DRM_DEBUG("First %s output reported failure to sync\n",
-				   SDVO_NAME(sdvo_priv));
+			DRM_DEBUG_KMS("First %s output reported failure to "
+					"sync\n", SDVO_NAME(sdvo_priv));
 		}
 
 		if (0)
@@ -1316,8 +1315,8 @@ static void intel_sdvo_restore(struct drm_connector *connector)
 			intel_wait_for_vblank(dev);
 		status = intel_sdvo_get_trained_inputs(intel_output, &input1, &input2);
 		if (status == SDVO_CMD_STATUS_SUCCESS && !input1)
-			DRM_DEBUG("First %s output reported failure to sync\n",
-				   SDVO_NAME(sdvo_priv));
+			DRM_DEBUG_KMS("First %s output reported failure to "
+					"sync\n", SDVO_NAME(sdvo_priv));
 	}
 
 	intel_sdvo_set_active_outputs(intel_output, sdvo_priv->save_active_outputs);
@@ -1395,7 +1394,7 @@ int intel_sdvo_supports_hotplug(struct drm_connector *connector)
 	u8 response[2];
 	u8 status;
 	struct intel_output *intel_output;
-	DRM_DEBUG("\n");
+	DRM_DEBUG_KMS("\n");
 
 	if (!connector)
 		return 0;
@@ -1460,7 +1459,7 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect
 	intel_sdvo_write_cmd(intel_output, SDVO_CMD_GET_ATTACHED_DISPLAYS, NULL, 0);
 	status = intel_sdvo_read_response(intel_output, &response, 2);
 
-	DRM_DEBUG("SDVO response %d %d\n", response[0], response[1]);
+	DRM_DEBUG_KMS("SDVO response %d %d\n", response[0], response[1]);
 
 	if (status != SDVO_CMD_STATUS_SUCCESS)
 		return connector_status_unknown;
@@ -1905,8 +1904,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
 		if (!intel_sdvo_read_byte(intel_output, i, &ch[i])) {
-			DRM_DEBUG_KMS(I915_SDVO,
-					"No SDVO device found on SDVO%c\n",
+			DRM_DEBUG_KMS("No SDVO device found on SDVO%c\n",
 					output_device == SDVOB ? 'B' : 'C');
 			goto err_i2c;
 		}
@@ -1989,8 +1987,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 
 		sdvo_priv->controlled_output = 0;
 		memcpy (bytes, &sdvo_priv->caps.output_flags, 2);
-		DRM_DEBUG_KMS(I915_SDVO,
-				"%s: Unknown SDVO output type (0x%02x%02x)\n",
+		DRM_DEBUG_KMS("%s: Unknown SDVO output type (0x%02x%02x)\n",
 				  SDVO_NAME(sdvo_priv),
 				  bytes[0], bytes[1]);
 		encoder_type = DRM_MODE_ENCODER_NONE;
@@ -2022,7 +2019,7 @@ bool intel_sdvo_init(struct drm_device *dev, int output_device)
 					       &sdvo_priv->pixel_clock_max);
 
 
-	DRM_DEBUG_KMS(I915_SDVO, "%s device VID/DID: %02X:%02X.%02X, "
+	DRM_DEBUG_KMS("%s device VID/DID: %02X:%02X.%02X, "
 			"clock range %dMHz - %dMHz, "
 			"input 1: %c, input 2: %c, "
 			"output 1: %c, output 2: %c\n",
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 45b67d9c39c1..edbdb02a7a3f 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -174,19 +174,19 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 					__func__, fmt, ##args);		\
 	} while (0)
 
-#define DRM_DEBUG_DRIVER(prefix, fmt, args...)				\
+#define DRM_DEBUG_DRIVER(fmt, args...)					\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_DRIVER, prefix,		\
+		drm_ut_debug_printk(DRM_UT_DRIVER, DRM_NAME,		\
 					__func__, fmt, ##args);		\
 	} while (0)
-#define DRM_DEBUG_KMS(prefix, fmt, args...)				\
+#define DRM_DEBUG_KMS(fmt, args...)				\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_KMS, prefix, 		\
+		drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
-#define DRM_DEBUG_MODE(prefix, fmt, args...)				\
+#define DRM_DEBUG_MODE(fmt, args...)				\
 	do {								\
-		drm_ut_debug_printk(DRM_UT_MODE, prefix, 		\
+		drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
 #define DRM_LOG(fmt, args...)						\
@@ -210,9 +210,9 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 					NULL, fmt, ##args);		\
 	} while (0)
 #else
-#define DRM_DEBUG_DRIVER(prefix, fmt, args...) do { } while (0)
-#define DRM_DEBUG_KMS(prefix, fmt, args...)	do { } while (0)
-#define DRM_DEBUG_MODE(prefix, fmt, args...)	do { } while (0)
+#define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0)
+#define DRM_DEBUG_KMS(fmt, args...)	do { } while (0)
+#define DRM_DEBUG_MODE(fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
 #define DRM_LOG(fmt, arg...)		do { } while (0)
 #define DRM_LOG_KMS(fmt, args...) do { } while (0)
-- 
cgit v1.2.3


From f940f37f022f7392ab81a35516222cbd46110b42 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:05 +0800
Subject: drm: Remove the macro defintion of DRM_DEBUG_MODE

Two macro definitions of DRM_DEBUG_KMS/MODE can be used to add the debug
info related with KMS. It is confusing.
So remove the macro definition of DRM_DEBUG_MODE. Instead it can be replaced
by the DRM_DEBUG_KMS.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_modes.c | 4 ++--
 include/drm/drmP.h          | 7 -------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 5eca2d5c5f23..6b4d2dc3cdd9 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -52,7 +52,7 @@
  */
 void drm_mode_debug_printmodeline(struct drm_display_mode *mode)
 {
-	DRM_DEBUG_MODE("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
+	DRM_DEBUG_KMS("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d "
 			"0x%x 0x%x\n",
 		mode->base.id, mode->name, mode->vrefresh, mode->clock,
 		mode->hdisplay, mode->hsync_start,
@@ -818,7 +818,7 @@ void drm_mode_prune_invalid(struct drm_device *dev,
 			list_del(&mode->head);
 			if (verbose) {
 				drm_mode_debug_printmodeline(mode);
-				DRM_DEBUG_MODE("Not using %s mode %d\n",
+				DRM_DEBUG_KMS("Not using %s mode %d\n",
 					mode->name, mode->status);
 			}
 			drm_mode_destroy(dev, mode);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index edbdb02a7a3f..6513d16cd029 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -88,7 +88,6 @@ struct drm_device;
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
 #define DRM_UT_KMS		0x04
-#define DRM_UT_MODE		0x08
 
 extern void drm_ut_debug_printk(unsigned int request_level,
 				const char *prefix,
@@ -184,11 +183,6 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 		drm_ut_debug_printk(DRM_UT_KMS, DRM_NAME, 		\
 					 __func__, fmt, ##args);	\
 	} while (0)
-#define DRM_DEBUG_MODE(fmt, args...)				\
-	do {								\
-		drm_ut_debug_printk(DRM_UT_MODE, DRM_NAME, 		\
-					 __func__, fmt, ##args);	\
-	} while (0)
 #define DRM_LOG(fmt, args...)						\
 	do {								\
 		drm_ut_debug_printk(DRM_UT_CORE, NULL,			\
@@ -212,7 +206,6 @@ extern void drm_ut_debug_printk(unsigned int request_level,
 #else
 #define DRM_DEBUG_DRIVER(fmt, args...) do { } while (0)
 #define DRM_DEBUG_KMS(fmt, args...)	do { } while (0)
-#define DRM_DEBUG_MODE(fmt, args...)	do { } while (0)
 #define DRM_DEBUG(fmt, arg...)		 do { } while (0)
 #define DRM_LOG(fmt, arg...)		do { } while (0)
 #define DRM_LOG_KMS(fmt, args...) do { } while (0)
-- 
cgit v1.2.3


From 87fdff81cd2d770f0adc742e21eb5e062ad20def Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Mon, 20 Jul 2009 13:48:06 +0800
Subject: DRM: Add the explanation about DRM debug level

Add the explanation about DRM debug level in the drmP header file. This is to
explain how/where to use the different DRM debug level.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drmP.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 6513d16cd029..e0f1c1fee58b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -88,6 +88,37 @@ struct drm_device;
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
 #define DRM_UT_KMS		0x04
+/*
+ * Three debug levels are defined.
+ * drm_core, drm_driver, drm_kms
+ * drm_core level can be used in the generic drm code. For example:
+ * 	drm_ioctl, drm_mm, drm_memory
+ * The macro definiton of DRM_DEBUG is used.
+ * 	DRM_DEBUG(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG can be obtained by adding
+ * 	the boot option of "drm.debug=1".
+ *
+ * drm_driver level can be used in the specific drm driver. It is used
+ * to add the debug info related with the drm driver. For example:
+ * i915_drv, i915_dma, i915_gem, radeon_drv,
+ * 	The macro definition of DRM_DEBUG_DRIVER can be used.
+ * 	DRM_DEBUG_DRIVER(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG_DRIVER can be obtained by
+ * 	adding the boot option of "drm.debug=0x02"
+ *
+ * drm_kms level can be used in the KMS code related with specific drm driver.
+ * It is used to add the debug info related with KMS mode. For example:
+ * the connector/crtc ,
+ * 	The macro definition of DRM_DEBUG_KMS can be used.
+ * 	DRM_DEBUG_KMS(fmt, args...)
+ * 	The debug info by using the DRM_DEBUG_KMS can be obtained by
+ * 	adding the boot option of "drm.debug=0x04"
+ *
+ * If we add the boot option of "drm.debug=0x06", we can get the debug info by
+ * using the DRM_DEBUG_KMS and DRM_DEBUG_DRIVER.
+ * If we add the boot option of "drm.debug=0x05", we can get the debug info by
+ * using the DRM_DEBUG_KMS and DRM_DEBUG.
+ */
 
 extern void drm_ut_debug_printk(unsigned int request_level,
 				const char *prefix,
-- 
cgit v1.2.3


From 2066facca4c7dfe9f5068ece0200a4dbf10f49e1 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:17 +0200
Subject: drm/kms: slave encoder interface.

Define some helper functions to make easier to detach a KMS encoder
implementation from the drm module of the GPU it's used in. This is
mainly useful for some external I2C encoders known to be present on
cards with GPUs from several different manufacturers.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile            |   2 +-
 drivers/gpu/drm/drm_encoder_slave.c | 116 ++++++++++++++++++++++++++
 include/drm/drm_encoder_slave.h     | 162 ++++++++++++++++++++++++++++++++++++
 3 files changed, 279 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/drm_encoder_slave.c
 create mode 100644 include/drm/drm_encoder_slave.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index fe23f29f7cba..5f0aec4f082a 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -11,7 +11,7 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
 		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o
+		drm_info.o drm_debugfs.o drm_encoder_slave.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_encoder_slave.c b/drivers/gpu/drm/drm_encoder_slave.c
new file mode 100644
index 000000000000..6ffd600ccfae
--- /dev/null
+++ b/drivers/gpu/drm/drm_encoder_slave.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <drm/drm_encoder_slave.h>
+
+/**
+ * drm_i2c_encoder_init - Initialize an I2C slave encoder
+ * @dev:	DRM device.
+ * @encoder:    Encoder to be attached to the I2C device. You aren't
+ *		required to have called drm_encoder_init() before.
+ * @adap:	I2C adapter that will be used to communicate with
+ *		the device.
+ * @info:	Information that will be used to create the I2C device.
+ *		Required fields are @addr and @type.
+ *
+ * Create an I2C device on the specified bus (the module containing its
+ * driver is transparently loaded) and attach it to the specified
+ * &drm_encoder_slave. The @slave_funcs field will be initialized with
+ * the hooks provided by the slave driver.
+ *
+ * Returns 0 on success or a negative errno on failure, in particular,
+ * -ENODEV is returned when no matching driver is found.
+ */
+int drm_i2c_encoder_init(struct drm_device *dev,
+			 struct drm_encoder_slave *encoder,
+			 struct i2c_adapter *adap,
+			 const struct i2c_board_info *info)
+{
+	char modalias[sizeof(I2C_MODULE_PREFIX)
+		      + I2C_NAME_SIZE];
+	struct module *module = NULL;
+	struct i2c_client *client;
+	struct drm_i2c_encoder_driver *encoder_drv;
+	int err = 0;
+
+	snprintf(modalias, sizeof(modalias),
+		 "%s%s", I2C_MODULE_PREFIX, info->type);
+	request_module(modalias);
+
+	client = i2c_new_device(adap, info);
+	if (!client) {
+		err = -ENOMEM;
+		goto fail;
+	}
+
+	if (!client->driver) {
+		err = -ENODEV;
+		goto fail_unregister;
+	}
+
+	module = client->driver->driver.owner;
+	if (!try_module_get(module)) {
+		err = -ENODEV;
+		goto fail_unregister;
+	}
+
+	encoder->bus_priv = client;
+
+	encoder_drv = to_drm_i2c_encoder_driver(client->driver);
+
+	err = encoder_drv->encoder_init(client, dev, encoder);
+	if (err)
+		goto fail_unregister;
+
+	return 0;
+
+fail_unregister:
+	i2c_unregister_device(client);
+	module_put(module);
+fail:
+	return err;
+}
+EXPORT_SYMBOL(drm_i2c_encoder_init);
+
+/**
+ * drm_i2c_encoder_destroy - Unregister the I2C device backing an encoder
+ * @drm_encoder:	Encoder to be unregistered.
+ *
+ * This should be called from the @destroy method of an I2C slave
+ * encoder driver once I2C access is no longer needed.
+ */
+void drm_i2c_encoder_destroy(struct drm_encoder *drm_encoder)
+{
+	struct drm_encoder_slave *encoder = to_encoder_slave(drm_encoder);
+	struct i2c_client *client = drm_i2c_encoder_get_client(drm_encoder);
+	struct module *module = client->driver->driver.owner;
+
+	i2c_unregister_device(client);
+	encoder->bus_priv = NULL;
+
+	module_put(module);
+}
+EXPORT_SYMBOL(drm_i2c_encoder_destroy);
diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
new file mode 100644
index 000000000000..821ec40c17d8
--- /dev/null
+++ b/include/drm/drm_encoder_slave.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2009 Francisco Jerez.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __DRM_ENCODER_SLAVE_H__
+#define __DRM_ENCODER_SLAVE_H__
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
+/**
+ * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
+ * @set_config:	Initialize any encoder-specific modesetting parameters.
+ *		The meaning of the @params parameter is implementation
+ *		dependent. It will usually be a structure with DVO port
+ *		data format settings or timings. It's not required for
+ *		the new parameters to take effect until the next mode
+ *		is set.
+ *
+ * Most of its members are analogous to the function pointers in
+ * &drm_encoder_helper_funcs and they can optionally be used to
+ * initialize the latter. Connector-like methods (e.g. @get_modes and
+ * @set_property) will typically be wrapped around and only be called
+ * if the encoder is the currently selected one for the connector.
+ */
+struct drm_encoder_slave_funcs {
+	void (*set_config)(struct drm_encoder *encoder,
+			   void *params);
+
+	void (*destroy)(struct drm_encoder *encoder);
+	void (*dpms)(struct drm_encoder *encoder, int mode);
+	void (*save)(struct drm_encoder *encoder);
+	void (*restore)(struct drm_encoder *encoder);
+	bool (*mode_fixup)(struct drm_encoder *encoder,
+			   struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	int (*mode_valid)(struct drm_encoder *encoder,
+			  struct drm_display_mode *mode);
+	void (*mode_set)(struct drm_encoder *encoder,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+
+	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
+					    struct drm_connector *connector);
+	int (*get_modes)(struct drm_encoder *encoder,
+			 struct drm_connector *connector);
+	int (*create_resources)(struct drm_encoder *encoder,
+				 struct drm_connector *connector);
+	int (*set_property)(struct drm_encoder *encoder,
+			    struct drm_connector *connector,
+			    struct drm_property *property,
+			    uint64_t val);
+
+};
+
+/**
+ * struct drm_encoder_slave - Slave encoder struct
+ * @base: DRM encoder object.
+ * @slave_funcs: Slave encoder callbacks.
+ * @slave_priv: Slave encoder private data.
+ * @bus_priv: Bus specific data.
+ *
+ * A &drm_encoder_slave has two sets of callbacks, @slave_funcs and the
+ * ones in @base. The former are never actually called by the common
+ * CRTC code, it's just a convenience for splitting the encoder
+ * functions in an upper, GPU-specific layer and a (hopefully)
+ * GPU-agnostic lower layer: It's the GPU driver responsibility to
+ * call the slave methods when appropriate.
+ *
+ * drm_i2c_encoder_init() provides a way to get an implementation of
+ * this.
+ */
+struct drm_encoder_slave {
+	struct drm_encoder base;
+
+	struct drm_encoder_slave_funcs *slave_funcs;
+	void *slave_priv;
+	void *bus_priv;
+};
+#define to_encoder_slave(x) container_of((x), struct drm_encoder_slave, base)
+
+int drm_i2c_encoder_init(struct drm_device *dev,
+			 struct drm_encoder_slave *encoder,
+			 struct i2c_adapter *adap,
+			 const struct i2c_board_info *info);
+
+
+/**
+ * struct drm_i2c_encoder_driver
+ *
+ * Describes a device driver for an encoder connected to the GPU
+ * through an I2C bus. In addition to the entry points in @i2c_driver
+ * an @encoder_init function should be provided. It will be called to
+ * give the driver an opportunity to allocate any per-encoder data
+ * structures and to initialize the @slave_funcs and (optionally)
+ * @slave_priv members of @encoder.
+ */
+struct drm_i2c_encoder_driver {
+	struct i2c_driver i2c_driver;
+
+	int (*encoder_init)(struct i2c_client *client,
+			    struct drm_device *dev,
+			    struct drm_encoder_slave *encoder);
+
+};
+#define to_drm_i2c_encoder_driver(x) container_of((x),			\
+						  struct drm_i2c_encoder_driver, \
+						  i2c_driver)
+
+/**
+ * drm_i2c_encoder_get_client - Get the I2C client corresponding to an encoder
+ */
+static inline struct i2c_client *drm_i2c_encoder_get_client(struct drm_encoder *encoder)
+{
+	return (struct i2c_client *)to_encoder_slave(encoder)->bus_priv;
+}
+
+/**
+ * drm_i2c_encoder_register - Register an I2C encoder driver
+ * @owner:	Module containing the driver.
+ * @driver:	Driver to be registered.
+ */
+static inline int drm_i2c_encoder_register(struct module *owner,
+					   struct drm_i2c_encoder_driver *driver)
+{
+	return i2c_register_driver(owner, &driver->i2c_driver);
+}
+
+/**
+ * drm_i2c_encoder_unregister - Unregister an I2C encoder driver
+ * @driver:	Driver to be unregistered.
+ */
+static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver)
+{
+	return i2c_del_driver(&driver->i2c_driver);
+}
+
+void drm_i2c_encoder_destroy(struct drm_encoder *encoder);
+
+#endif
-- 
cgit v1.2.3


From 74bd3c26b90f39b9dcc05c471333da8998572b5d Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:18 +0200
Subject: drm: Define DRM_MODE_CONNECTOR_TV

The existing TV connector types are often unsuitable either because
there is no way to probe them until they're actually plugged in or
because they can change during run time (e.g. 7-pin DIN connectors
that behave as S-Video, Component, Composite or SCART depending on the
adaptor plugged in).

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c  | 1 +
 drivers/gpu/drm/drm_sysfs.c | 3 +++
 include/drm/drm_mode.h      | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 9c758305472c..c7ab80b45e3f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -146,6 +146,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
 	{ DRM_MODE_CONNECTOR_DisplayPort, "DisplayPort", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
+	{ DRM_MODE_CONNECTOR_TV, "TV", 0 },
 };
 
 static struct drm_prop_enum_list drm_encoder_enum_list[] =
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 85ec31b3ff00..adc179459c25 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -247,6 +247,7 @@ static ssize_t subconnector_show(struct device *device,
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			prop = dev->mode_config.tv_subconnector_property;
 			is_tv = 1;
 			break;
@@ -287,6 +288,7 @@ static ssize_t select_subconnector_show(struct device *device,
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			prop = dev->mode_config.tv_select_subconnector_property;
 			is_tv = 1;
 			break;
@@ -385,6 +387,7 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
 		case DRM_MODE_CONNECTOR_Composite:
 		case DRM_MODE_CONNECTOR_SVIDEO:
 		case DRM_MODE_CONNECTOR_Component:
+		case DRM_MODE_CONNECTOR_TV:
 			for (i = 0; i < ARRAY_SIZE(connector_attrs_opt1); i++) {
 				ret = device_create_file(&connector->kdev, &connector_attrs_opt1[i]);
 				if (ret)
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index ae304cc73c90..c51e9f528c8f 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -155,6 +155,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_CONNECTOR_DisplayPort	10
 #define DRM_MODE_CONNECTOR_HDMIA	11
 #define DRM_MODE_CONNECTOR_HDMIB	12
+#define DRM_MODE_CONNECTOR_TV		13
 
 struct drm_mode_get_connector {
 
-- 
cgit v1.2.3


From aeaa1ad3ff32be833680e484d99ec29d892da1ff Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:19 +0200
Subject: drm: Define DRM_MODE_SUBCONNECTOR_SCART

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 2 ++
 include/drm/drm_mode.h     | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index c7ab80b45e3f..ed53c5c37ac4 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -108,6 +108,7 @@ static struct drm_prop_enum_list drm_tv_select_enum_list[] =
 	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
 };
 
 DRM_ENUM_NAME_FN(drm_get_tv_select_name, drm_tv_select_enum_list)
@@ -118,6 +119,7 @@ static struct drm_prop_enum_list drm_tv_subconnector_enum_list[] =
 	{ DRM_MODE_SUBCONNECTOR_Composite, "Composite" }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_SVIDEO,    "SVIDEO"    }, /* TV-out */
 	{ DRM_MODE_SUBCONNECTOR_Component, "Component" }, /* TV-out */
+	{ DRM_MODE_SUBCONNECTOR_SCART,     "SCART"     }, /* TV-out */
 };
 
 DRM_ENUM_NAME_FN(drm_get_tv_subconnector_name,
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index c51e9f528c8f..616aeb42b773 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -141,6 +141,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_SUBCONNECTOR_Composite	5
 #define DRM_MODE_SUBCONNECTOR_SVIDEO	6
 #define DRM_MODE_SUBCONNECTOR_Component	8
+#define DRM_MODE_SUBCONNECTOR_SCART	9
 
 #define DRM_MODE_CONNECTOR_Unknown	0
 #define DRM_MODE_CONNECTOR_VGA		1
-- 
cgit v1.2.3


From b6b7902e54c7e8abbc213d8bdc290350c00ccfe5 Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Sun, 2 Aug 2009 04:19:20 +0200
Subject: drm: Define some new standard TV properties.

Namely "brightness", "contrast" and "flicker reduction".

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++
 include/drm/drm_crtc.h     |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ed53c5c37ac4..a8c831134fc3 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -718,6 +718,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
 		drm_property_add_enum(dev->mode_config.tv_mode_property, i,
 				      i, modes[i]);
 
+	dev->mode_config.tv_brightness_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "brightness", 2);
+	dev->mode_config.tv_brightness_property->values[0] = 0;
+	dev->mode_config.tv_brightness_property->values[1] = 100;
+
+	dev->mode_config.tv_contrast_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "contrast", 2);
+	dev->mode_config.tv_contrast_property->values[0] = 0;
+	dev->mode_config.tv_contrast_property->values[1] = 100;
+
+	dev->mode_config.tv_flicker_reduction_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "flicker reduction", 2);
+	dev->mode_config.tv_flicker_reduction_property->values[0] = 0;
+	dev->mode_config.tv_flicker_reduction_property->values[1] = 100;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_mode_create_tv_properties);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 125994d8ac0b..5f2cc0ca4c7d 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -572,6 +572,9 @@ struct drm_mode_config {
 	struct drm_property *tv_right_margin_property;
 	struct drm_property *tv_top_margin_property;
 	struct drm_property *tv_bottom_margin_property;
+	struct drm_property *tv_brightness_property;
+	struct drm_property *tv_contrast_property;
+	struct drm_property *tv_flicker_reduction_property;
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
-- 
cgit v1.2.3


From 42935ecaf4e784d0815afa9a7e5fe7e141157ca3 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Wed, 29 Jul 2009 20:08:07 -0400
Subject: mac80211: redefine usage of the mac80211 workqueue

The mac80211 workqueue exists to enable mac80211 and drivers
to queue their own work on a single threaded workqueue. mac80211
takes care to flush the workqueue during suspend but we never
really had requirements on drivers for how they should use
the workqueue in consideration for suspend.

We extend mac80211 to document how the mac80211 workqueue should
be used, how it should not be used and finally move raw access to
the workqueue to mac80211 only. Drivers and mac80211 use helpers
to queue work onto the mac80211 workqueue:

  * ieee80211_queue_work()
  * ieee80211_queue_delayed_work()

These helpers will now warn if mac80211 already completed its
suspend cycle and someone is trying to queue work. mac80211
flushes the mac80211 workqueue prior to suspend a few times,
but we haven't taken the care to ensure drivers won't add more
work after suspend. To help with this we add a warning when
someone tries to add work and mac80211 already completed the
suspend cycle.

Drivers should ensure they cancel any work or delayed work
in the mac80211 stop() callback.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/at76c50x-usb.c         | 10 +++---
 drivers/net/wireless/ath/ar9170/led.c       | 11 ++++---
 drivers/net/wireless/ath/ar9170/main.c      | 26 ++++++++-------
 drivers/net/wireless/ath/ath9k/main.c       | 15 +++++----
 drivers/net/wireless/ath/ath9k/virtual.c    | 17 +++++-----
 drivers/net/wireless/ath/ath9k/xmit.c       |  2 +-
 drivers/net/wireless/b43/main.c             |  8 ++---
 drivers/net/wireless/b43/phy_common.c       |  2 +-
 drivers/net/wireless/b43/pio.c              |  2 +-
 drivers/net/wireless/b43legacy/main.c       |  8 ++---
 drivers/net/wireless/p54/led.c              |  5 ++-
 drivers/net/wireless/p54/main.c             |  2 +-
 drivers/net/wireless/p54/p54spi.c           |  4 +--
 drivers/net/wireless/p54/txrx.c             |  2 +-
 drivers/net/wireless/rt2x00/rt2x00dev.c     |  2 +-
 drivers/net/wireless/rt2x00/rt2x00link.c    |  8 ++---
 drivers/net/wireless/rt2x00/rt2x00mac.c     |  2 +-
 drivers/net/wireless/rtl818x/rtl8187_dev.c  |  2 +-
 drivers/net/wireless/rtl818x/rtl8187_leds.c | 10 +++---
 drivers/net/wireless/zd1211rw/zd_mac.c      |  2 +-
 include/net/mac80211.h                      | 50 ++++++++++++++++++++++++-----
 net/mac80211/ibss.c                         |  6 ++--
 net/mac80211/ieee80211_i.h                  |  6 ++++
 net/mac80211/iface.c                        |  4 +--
 net/mac80211/main.c                         |  8 ++---
 net/mac80211/mesh.c                         | 10 +++---
 net/mac80211/mesh_hwmp.c                    |  4 +--
 net/mac80211/mlme.c                         | 48 +++++++++++++--------------
 net/mac80211/pm.c                           |  4 +--
 net/mac80211/scan.c                         |  8 ++---
 net/mac80211/tx.c                           |  2 +-
 net/mac80211/util.c                         | 41 +++++++++++++++++++++++
 32 files changed, 208 insertions(+), 123 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index aff09a1cf64f..7218dbabad3e 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1875,8 +1875,8 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 	/* FIXME: add maximum time for scan to complete */
 
 	if (ret != CMD_STATUS_COMPLETE) {
-		queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan,
-				   SCAN_POLL_INTERVAL);
+		ieee80211_queue_delayed_work(priv->hw, &priv->dwork_hw_scan,
+					     SCAN_POLL_INTERVAL);
 		mutex_unlock(&priv->mtx);
 		return;
 	}
@@ -1937,8 +1937,8 @@ static int at76_hw_scan(struct ieee80211_hw *hw,
 		goto exit;
 	}
 
-	queue_delayed_work(priv->hw->workqueue, &priv->dwork_hw_scan,
-			   SCAN_POLL_INTERVAL);
+	ieee80211_queue_delayed_work(priv->hw, &priv->dwork_hw_scan,
+				     SCAN_POLL_INTERVAL);
 
 exit:
 	mutex_unlock(&priv->mtx);
@@ -2027,7 +2027,7 @@ static void at76_configure_filter(struct ieee80211_hw *hw,
 	} else
 		return;
 
-	queue_work(hw->workqueue, &priv->work_set_promisc);
+	ieee80211_queue_work(hw, &priv->work_set_promisc);
 }
 
 static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
diff --git a/drivers/net/wireless/ath/ar9170/led.c b/drivers/net/wireless/ath/ar9170/led.c
index 63fda6cd2101..86c4e79f6bc8 100644
--- a/drivers/net/wireless/ath/ar9170/led.c
+++ b/drivers/net/wireless/ath/ar9170/led.c
@@ -90,9 +90,12 @@ static void ar9170_update_leds(struct work_struct *work)
 	ar9170_set_leds_state(ar, led_val);
 	mutex_unlock(&ar->mutex);
 
-	if (rerun)
-		queue_delayed_work(ar->hw->workqueue, &ar->led_work,
-				   msecs_to_jiffies(blink_delay));
+	if (!rerun)
+		return;
+
+	ieee80211_queue_delayed_work(ar->hw,
+				     &ar->led_work,
+				     msecs_to_jiffies(blink_delay));
 }
 
 static void ar9170_led_brightness_set(struct led_classdev *led,
@@ -110,7 +113,7 @@ static void ar9170_led_brightness_set(struct led_classdev *led,
 	}
 
 	if (likely(IS_ACCEPTING_CMD(ar) && arl->toggled))
-		queue_delayed_work(ar->hw->workqueue, &ar->led_work, HZ/10);
+		ieee80211_queue_delayed_work(ar->hw, &ar->led_work, HZ/10);
 }
 
 static int ar9170_register_led(struct ar9170 *ar, int i, char *name,
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 099ed3c3ba28..4fc389ae74b4 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -595,10 +595,12 @@ static void ar9170_tx_janitor(struct work_struct *work)
 
 	ar9170_tx_fake_ampdu_status(ar);
 
-	if (resched)
-		queue_delayed_work(ar->hw->workqueue,
-				   &ar->tx_janitor,
-				   msecs_to_jiffies(AR9170_JANITOR_DELAY));
+	if (!resched)
+		return;
+
+	ieee80211_queue_delayed_work(ar->hw,
+				     &ar->tx_janitor,
+				     msecs_to_jiffies(AR9170_JANITOR_DELAY));
 }
 
 void ar9170_handle_command_response(struct ar9170 *ar, void *buf, u32 len)
@@ -648,7 +650,7 @@ void ar9170_handle_command_response(struct ar9170 *ar, void *buf, u32 len)
 		 * pre-TBTT event
 		 */
 		if (ar->vif && ar->vif->type == NL80211_IFTYPE_AP)
-			queue_work(ar->hw->workqueue, &ar->beacon_work);
+			ieee80211_queue_work(ar->hw, &ar->beacon_work);
 		break;
 
 	case 0xc2:
@@ -1825,10 +1827,12 @@ static void ar9170_tx(struct ar9170 *ar)
 		}
 	}
 
-	if (schedule_garbagecollector)
-		queue_delayed_work(ar->hw->workqueue,
-				   &ar->tx_janitor,
-				   msecs_to_jiffies(AR9170_JANITOR_DELAY));
+	if (!schedule_garbagecollector)
+		return;
+
+	ieee80211_queue_delayed_work(ar->hw,
+				     &ar->tx_janitor,
+				     msecs_to_jiffies(AR9170_JANITOR_DELAY));
 }
 
 static bool ar9170_tx_ampdu_queue(struct ar9170 *ar, struct sk_buff *skb)
@@ -2157,7 +2161,7 @@ static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
 	}
 
 	if (likely(IS_STARTED(ar)))
-		queue_work(ar->hw->workqueue, &ar->filter_config_work);
+		ieee80211_queue_work(ar->hw, &ar->filter_config_work);
 }
 
 static void ar9170_op_bss_info_changed(struct ieee80211_hw *hw,
@@ -2415,7 +2419,7 @@ static void ar9170_sta_notify(struct ieee80211_hw *hw,
 	}
 
 	if (IS_STARTED(ar) && ar->filter_changed)
-		queue_work(ar->hw->workqueue, &ar->filter_config_work);
+		ieee80211_queue_work(ar->hw, &ar->filter_config_work);
 }
 
 static int ar9170_get_stats(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index cf44623b5cd2..292ac2b41891 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -973,10 +973,11 @@ static void ath_led_blink_work(struct work_struct *work)
 		ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN,
 				  (sc->sc_flags & SC_OP_LED_ON) ? 1 : 0);
 
-	queue_delayed_work(sc->hw->workqueue, &sc->ath_led_blink_work,
-			   (sc->sc_flags & SC_OP_LED_ON) ?
-			   msecs_to_jiffies(sc->led_off_duration) :
-			   msecs_to_jiffies(sc->led_on_duration));
+	ieee80211_queue_delayed_work(sc->hw,
+				     &sc->ath_led_blink_work,
+				     (sc->sc_flags & SC_OP_LED_ON) ?
+					msecs_to_jiffies(sc->led_off_duration) :
+					msecs_to_jiffies(sc->led_on_duration));
 
 	sc->led_on_duration = sc->led_on_cnt ?
 			max((ATH_LED_ON_DURATION_IDLE - sc->led_on_cnt), 25) :
@@ -1013,8 +1014,8 @@ static void ath_led_brightness(struct led_classdev *led_cdev,
 	case LED_FULL:
 		if (led->led_type == ATH_LED_ASSOC) {
 			sc->sc_flags |= SC_OP_LED_ASSOCIATED;
-			queue_delayed_work(sc->hw->workqueue,
-					   &sc->ath_led_blink_work, 0);
+			ieee80211_queue_delayed_work(sc->hw,
+						     &sc->ath_led_blink_work, 0);
 		} else if (led->led_type == ATH_LED_RADIO) {
 			ath9k_hw_set_gpio(sc->sc_ah, ATH_LED_PIN, 0);
 			sc->sc_flags |= SC_OP_LED_ON;
@@ -1972,7 +1973,7 @@ static int ath9k_start(struct ieee80211_hw *hw)
 
 	ieee80211_wake_queues(hw);
 
-	queue_delayed_work(sc->hw->workqueue, &sc->tx_complete_work, 0);
+	ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work, 0);
 
 mutex_unlock:
 	mutex_unlock(&sc->mutex);
diff --git a/drivers/net/wireless/ath/ath9k/virtual.c b/drivers/net/wireless/ath/ath9k/virtual.c
index e1d419e02b4a..19b88f8177fd 100644
--- a/drivers/net/wireless/ath/ath9k/virtual.c
+++ b/drivers/net/wireless/ath/ath9k/virtual.c
@@ -351,7 +351,7 @@ void ath9k_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			 * Drop from tasklet to work to allow mutex for channel
 			 * change.
 			 */
-			queue_work(aphy->sc->hw->workqueue,
+			ieee80211_queue_work(aphy->sc->hw,
 				   &aphy->sc->chan_work);
 		}
 	}
@@ -367,7 +367,7 @@ static void ath9k_mark_paused(struct ath_wiphy *aphy)
 	struct ath_softc *sc = aphy->sc;
 	aphy->state = ATH_WIPHY_PAUSED;
 	if (!__ath9k_wiphy_pausing(sc))
-		queue_work(sc->hw->workqueue, &sc->chan_work);
+		ieee80211_queue_work(sc->hw, &sc->chan_work);
 }
 
 static void ath9k_pause_iter(void *data, u8 *mac, struct ieee80211_vif *vif)
@@ -521,7 +521,7 @@ int ath9k_wiphy_select(struct ath_wiphy *aphy)
 			spin_unlock_bh(&sc->wiphy_lock);
 			ath_radio_disable(sc);
 			ath_radio_enable(sc);
-			queue_work(aphy->sc->hw->workqueue,
+			ieee80211_queue_work(aphy->sc->hw,
 				   &aphy->sc->chan_work);
 			return -EBUSY; /* previous select still in progress */
 		}
@@ -541,7 +541,7 @@ int ath9k_wiphy_select(struct ath_wiphy *aphy)
 
 	if (now) {
 		/* Ready to request channel change immediately */
-		queue_work(aphy->sc->hw->workqueue, &aphy->sc->chan_work);
+		ieee80211_queue_work(aphy->sc->hw, &aphy->sc->chan_work);
 	}
 
 	/*
@@ -648,8 +648,9 @@ try_again:
 		       "change\n");
 	}
 
-	queue_delayed_work(sc->hw->workqueue, &sc->wiphy_work,
-			   sc->wiphy_scheduler_int);
+	ieee80211_queue_delayed_work(sc->hw,
+				     &sc->wiphy_work,
+				     sc->wiphy_scheduler_int);
 }
 
 void ath9k_wiphy_set_scheduler(struct ath_softc *sc, unsigned int msec_int)
@@ -657,8 +658,8 @@ void ath9k_wiphy_set_scheduler(struct ath_softc *sc, unsigned int msec_int)
 	cancel_delayed_work_sync(&sc->wiphy_work);
 	sc->wiphy_scheduler_int = msecs_to_jiffies(msec_int);
 	if (sc->wiphy_scheduler_int)
-		queue_delayed_work(sc->hw->workqueue, &sc->wiphy_work,
-				   sc->wiphy_scheduler_int);
+		ieee80211_queue_delayed_work(sc->hw, &sc->wiphy_work,
+					     sc->wiphy_scheduler_int);
 }
 
 /* caller must hold wiphy_lock */
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index b7806e2ca0e1..87762da0383b 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -2063,7 +2063,7 @@ static void ath_tx_complete_poll_work(struct work_struct *work)
 		ath_reset(sc, false);
 	}
 
-	queue_delayed_work(sc->hw->workqueue, &sc->tx_complete_work,
+	ieee80211_queue_delayed_work(sc->hw, &sc->tx_complete_work,
 			msecs_to_jiffies(ATH_TX_COMPLETE_POLL_INT));
 }
 
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 3f4360ad0e4e..f985938962e3 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -1654,7 +1654,7 @@ static void b43_update_templates(struct b43_wl *wl)
 	wl->current_beacon = beacon;
 	wl->beacon0_uploaded = 0;
 	wl->beacon1_uploaded = 0;
-	queue_work(wl->hw->workqueue, &wl->beacon_update_trigger);
+	ieee80211_queue_work(wl->hw, &wl->beacon_update_trigger);
 }
 
 static void b43_set_beacon_int(struct b43_wldev *dev, u16 beacon_int)
@@ -2914,7 +2914,7 @@ out_requeue:
 		delay = msecs_to_jiffies(50);
 	else
 		delay = round_jiffies_relative(HZ * 15);
-	queue_delayed_work(wl->hw->workqueue, &dev->periodic_work, delay);
+	ieee80211_queue_delayed_work(wl->hw, &dev->periodic_work, delay);
 out:
 	mutex_unlock(&wl->mutex);
 }
@@ -2925,7 +2925,7 @@ static void b43_periodic_tasks_setup(struct b43_wldev *dev)
 
 	dev->periodic_state = 0;
 	INIT_DELAYED_WORK(work, b43_periodic_work_handler);
-	queue_delayed_work(dev->wl->hw->workqueue, work, 0);
+	ieee80211_queue_delayed_work(dev->wl->hw, work, 0);
 }
 
 /* Check if communication with the device works correctly. */
@@ -4871,7 +4871,7 @@ void b43_controller_restart(struct b43_wldev *dev, const char *reason)
 	if (b43_status(dev) < B43_STAT_INITIALIZED)
 		return;
 	b43info(dev->wl, "Controller RESET (%s) ...\n", reason);
-	queue_work(dev->wl->hw->workqueue, &dev->restart_work);
+	ieee80211_queue_work(dev->wl->hw, &dev->restart_work);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/b43/phy_common.c b/drivers/net/wireless/b43/phy_common.c
index 6d241622210e..f537bfef690a 100644
--- a/drivers/net/wireless/b43/phy_common.c
+++ b/drivers/net/wireless/b43/phy_common.c
@@ -352,7 +352,7 @@ void b43_phy_txpower_check(struct b43_wldev *dev, unsigned int flags)
 
 	/* We must adjust the transmission power in hardware.
 	 * Schedule b43_phy_txpower_adjust_work(). */
-	queue_work(dev->wl->hw->workqueue, &dev->wl->txpower_adjust_work);
+	ieee80211_queue_work(dev->wl->hw, &dev->wl->txpower_adjust_work);
 }
 
 int b43_phy_shm_tssi_read(struct b43_wldev *dev, u16 shm_offset)
diff --git a/drivers/net/wireless/b43/pio.c b/drivers/net/wireless/b43/pio.c
index 69138e8c1db6..73c047d8de40 100644
--- a/drivers/net/wireless/b43/pio.c
+++ b/drivers/net/wireless/b43/pio.c
@@ -783,7 +783,7 @@ void b43_pio_rx(struct b43_pio_rxqueue *q)
 {
 	/* Due to latency issues we must run the RX path in
 	 * a workqueue to be able to schedule between packets. */
-	queue_work(q->dev->wl->hw->workqueue, &q->rx_work);
+	ieee80211_queue_work(q->dev->wl->hw, &q->rx_work);
 }
 
 static void b43_pio_tx_suspend_queue(struct b43_pio_txqueue *q)
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index c4973c1942bf..b1435594921a 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -1252,7 +1252,7 @@ static void b43legacy_update_templates(struct b43legacy_wl *wl)
 	wl->current_beacon = beacon;
 	wl->beacon0_uploaded = 0;
 	wl->beacon1_uploaded = 0;
-	queue_work(wl->hw->workqueue, &wl->beacon_update_trigger);
+	ieee80211_queue_work(wl->hw, &wl->beacon_update_trigger);
 }
 
 static void b43legacy_set_beacon_int(struct b43legacy_wldev *dev,
@@ -2300,7 +2300,7 @@ out_requeue:
 		delay = msecs_to_jiffies(50);
 	else
 		delay = round_jiffies_relative(HZ * 15);
-	queue_delayed_work(wl->hw->workqueue, &dev->periodic_work, delay);
+	ieee80211_queue_delayed_work(wl->hw, &dev->periodic_work, delay);
 out:
 	mutex_unlock(&wl->mutex);
 }
@@ -2311,7 +2311,7 @@ static void b43legacy_periodic_tasks_setup(struct b43legacy_wldev *dev)
 
 	dev->periodic_state = 0;
 	INIT_DELAYED_WORK(work, b43legacy_periodic_work_handler);
-	queue_delayed_work(dev->wl->hw->workqueue, work, 0);
+	ieee80211_queue_delayed_work(dev->wl->hw, work, 0);
 }
 
 /* Validate access to the chip (SHM) */
@@ -3885,7 +3885,7 @@ void b43legacy_controller_restart(struct b43legacy_wldev *dev,
 	if (b43legacy_status(dev) < B43legacy_STAT_INITIALIZED)
 		return;
 	b43legacyinfo(dev->wl, "Controller RESET (%s) ...\n", reason);
-	queue_work(dev->wl->hw->workqueue, &dev->restart_work);
+	ieee80211_queue_work(dev->wl->hw, &dev->restart_work);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/p54/led.c b/drivers/net/wireless/p54/led.c
index c00115b206d4..9575ac033630 100644
--- a/drivers/net/wireless/p54/led.c
+++ b/drivers/net/wireless/p54/led.c
@@ -61,7 +61,7 @@ static void p54_update_leds(struct work_struct *work)
 			wiphy_name(priv->hw->wiphy), err);
 
 	if (rerun)
-		queue_delayed_work(priv->hw->workqueue, &priv->led_work,
+		ieee80211_queue_delayed_work(priv->hw, &priv->led_work,
 			msecs_to_jiffies(blink_delay));
 }
 
@@ -78,8 +78,7 @@ static void p54_led_brightness_set(struct led_classdev *led_dev,
 
 	if ((brightness) && (led->registered)) {
 		led->toggled++;
-		queue_delayed_work(priv->hw->workqueue, &priv->led_work,
-				   HZ/10);
+		ieee80211_queue_delayed_work(priv->hw, &priv->led_work, HZ/10);
 	}
 }
 
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 955f6d7ec16a..a0d0e726bc4e 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -180,7 +180,7 @@ static int p54_start(struct ieee80211_hw *dev)
 		goto out;
 	}
 
-	queue_delayed_work(dev->workqueue, &priv->work, 0);
+	ieee80211_queue_delayed_work(dev, &priv->work, 0);
 
 	priv->softled_state = 0;
 	err = p54_set_leds(priv);
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index eef532987d05..05458d9249ce 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -391,7 +391,7 @@ static irqreturn_t p54spi_interrupt(int irq, void *config)
 	struct spi_device *spi = config;
 	struct p54s_priv *priv = dev_get_drvdata(&spi->dev);
 
-	queue_work(priv->hw->workqueue, &priv->work);
+	ieee80211_queue_work(priv->hw, &priv->work);
 
 	return IRQ_HANDLED;
 }
@@ -479,7 +479,7 @@ static void p54spi_op_tx(struct ieee80211_hw *dev, struct sk_buff *skb)
 	list_add_tail(&di->tx_list, &priv->tx_pending);
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
-	queue_work(priv->hw->workqueue, &priv->work);
+	ieee80211_queue_work(priv->hw, &priv->work);
 }
 
 static void p54spi_work(struct work_struct *work)
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index c32a0d2fa1f7..704685fab177 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -380,7 +380,7 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
 
 	ieee80211_rx_irqsafe(priv->hw, skb);
 
-	queue_delayed_work(priv->hw->workqueue, &priv->work,
+	ieee80211_queue_delayed_work(priv->hw, &priv->work,
 			   msecs_to_jiffies(P54_STATISTICS_UPDATE));
 
 	return -1;
diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c
index 658a63bfb761..b717afbf3f38 100644
--- a/drivers/net/wireless/rt2x00/rt2x00dev.c
+++ b/drivers/net/wireless/rt2x00/rt2x00dev.c
@@ -215,7 +215,7 @@ void rt2x00lib_beacondone(struct rt2x00_dev *rt2x00dev)
 						   rt2x00lib_beacondone_iter,
 						   rt2x00dev);
 
-	queue_work(rt2x00dev->hw->workqueue, &rt2x00dev->intf_work);
+	ieee80211_queue_work(rt2x00dev->hw, &rt2x00dev->intf_work);
 }
 EXPORT_SYMBOL_GPL(rt2x00lib_beacondone);
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00link.c b/drivers/net/wireless/rt2x00/rt2x00link.c
index 79915687e744..917831689ccd 100644
--- a/drivers/net/wireless/rt2x00/rt2x00link.c
+++ b/drivers/net/wireless/rt2x00/rt2x00link.c
@@ -351,8 +351,8 @@ void rt2x00link_start_tuner(struct rt2x00_dev *rt2x00dev)
 
 	rt2x00link_reset_tuner(rt2x00dev, false);
 
-	queue_delayed_work(rt2x00dev->hw->workqueue,
-			   &link->work, LINK_TUNE_INTERVAL);
+	ieee80211_queue_delayed_work(rt2x00dev->hw,
+				     &link->work, LINK_TUNE_INTERVAL);
 }
 
 void rt2x00link_stop_tuner(struct rt2x00_dev *rt2x00dev)
@@ -461,8 +461,8 @@ static void rt2x00link_tuner(struct work_struct *work)
 	 * Increase tuner counter, and reschedule the next link tuner run.
 	 */
 	link->count++;
-	queue_delayed_work(rt2x00dev->hw->workqueue,
-			   &link->work, LINK_TUNE_INTERVAL);
+	ieee80211_queue_delayed_work(rt2x00dev->hw,
+				     &link->work, LINK_TUNE_INTERVAL);
 }
 
 void rt2x00link_register(struct rt2x00_dev *rt2x00dev)
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index e92c8f99d695..81febdfd6639 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -431,7 +431,7 @@ void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
 	if (!test_bit(DRIVER_REQUIRE_SCHEDULED, &rt2x00dev->flags))
 		rt2x00dev->ops->lib->config_filter(rt2x00dev, *total_flags);
 	else
-		queue_work(rt2x00dev->hw->workqueue, &rt2x00dev->filter_work);
+		ieee80211_queue_work(rt2x00dev->hw, &rt2x00dev->filter_work);
 }
 EXPORT_SYMBOL_GPL(rt2x00mac_configure_filter);
 
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index c9b9dbe584c6..53f57dc52226 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -220,7 +220,7 @@ static void rtl8187_tx_cb(struct urb *urb)
 		 * reading a register in the device. We are in interrupt mode
 		 * here, thus queue the skb and finish on a work queue. */
 		skb_queue_tail(&priv->b_tx_status.queue, skb);
-		queue_delayed_work(hw->workqueue, &priv->work, 0);
+		ieee80211_queue_delayed_work(hw, &priv->work, 0);
 	}
 }
 
diff --git a/drivers/net/wireless/rtl818x/rtl8187_leds.c b/drivers/net/wireless/rtl818x/rtl8187_leds.c
index cf9f899fe0e6..a6cfb7e77994 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_leds.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_leds.c
@@ -108,11 +108,11 @@ static void rtl8187_led_brightness_set(struct led_classdev *led_dev,
 	struct rtl8187_priv *priv = hw->priv;
 
 	if (brightness == LED_OFF) {
-		queue_delayed_work(hw->workqueue, &priv->led_off, 0);
+		ieee80211_queue_delayed_work(hw, &priv->led_off, 0);
 		/* The LED is off for 1/20 sec so that it just blinks. */
-		queue_delayed_work(hw->workqueue, &priv->led_on, HZ / 20);
+		ieee80211_queue_delayed_work(hw, &priv->led_on, HZ / 20);
 	} else
-		queue_delayed_work(hw->workqueue, &priv->led_on, 0);
+		ieee80211_queue_delayed_work(hw, &priv->led_on, 0);
 }
 
 static int rtl8187_register_led(struct ieee80211_hw *dev,
@@ -193,7 +193,7 @@ void rtl8187_leds_init(struct ieee80211_hw *dev, u16 custid)
 	err = rtl8187_register_led(dev, &priv->led_rx, name,
 			 ieee80211_get_rx_led_name(dev), ledpin);
 	if (!err) {
-		queue_delayed_work(dev->workqueue, &priv->led_on, 0);
+		ieee80211_queue_delayed_work(dev, &priv->led_on, 0);
 		return;
 	}
 	/* registration of RX LED failed - unregister TX */
@@ -209,7 +209,7 @@ void rtl8187_leds_exit(struct ieee80211_hw *dev)
 	struct rtl8187_priv *priv = dev->priv;
 
 	/* turn the LED off before exiting */
-	queue_delayed_work(dev->workqueue, &priv->led_off, 0);
+	ieee80211_queue_delayed_work(dev, &priv->led_off, 0);
 	cancel_delayed_work_sync(&priv->led_off);
 	cancel_delayed_work_sync(&priv->led_on);
 	rtl8187_unregister_led(&priv->led_rx);
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 9600b72495da..54abdd0c0045 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -698,7 +698,7 @@ int zd_mac_rx(struct ieee80211_hw *hw, const u8 *buffer, unsigned int length)
 			&& !mac->pass_ctrl)
 		return 0;
 
-	fc = *(__le16 *)buffer;
+	fc = get_unaligned((__le16*)buffer);
 	need_padding = ieee80211_is_data_qos(fc) ^ ieee80211_has_a4(fc);
 
 	skb = dev_alloc_skb(length + (need_padding ? 2 : 0));
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d4e09a06b4a2..5ed93f4406a8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -72,6 +72,21 @@
  * not do so then mac80211 may add this under certain circumstances.
  */
 
+/**
+ * DOC: mac80211 workqueue
+ *
+ * mac80211 provides its own workqueue for drivers and internal mac80211 use.
+ * The workqueue is a single threaded workqueue and can only be accessed by
+ * helpers for sanity checking. Drivers must ensure all work added onto the
+ * mac80211 workqueue should be cancelled on the driver stop() callback.
+ *
+ * mac80211 will flushed the workqueue upon interface removal and during
+ * suspend.
+ *
+ * All work performed on the mac80211 workqueue must not acquire the RTNL lock.
+ *
+ */
+
 /**
  * enum ieee80211_max_queues - maximum number of queues
  *
@@ -913,12 +928,6 @@ enum ieee80211_hw_flags {
  *
  * @conf: &struct ieee80211_conf, device configuration, don't use.
  *
- * @workqueue: single threaded workqueue available for driver use,
- *	allocated by mac80211 on registration and flushed when an
- *	interface is removed.
- *	NOTICE: All work performed on this workqueue must not
- *	acquire the RTNL lock.
- *
  * @priv: pointer to private area that was allocated for driver use
  *	along with this structure.
  *
@@ -954,7 +963,6 @@ enum ieee80211_hw_flags {
 struct ieee80211_hw {
 	struct ieee80211_conf conf;
 	struct wiphy *wiphy;
-	struct workqueue_struct *workqueue;
 	const char *rate_control_algorithm;
 	void *priv;
 	u32 flags;
@@ -1301,7 +1309,8 @@ enum ieee80211_ampdu_mlme_action {
  *	is disabled. This should turn off the hardware (at least
  *	it must turn off frame reception.)
  *	May be called right after add_interface if that rejects
- *	an interface.
+ *	an interface. If you added any work onto the mac80211 workqueue
+ *	you should ensure to cancel it on this callback.
  *	Must be implemented.
  *
  * @add_interface: Called when a netdevice attached to the hardware is
@@ -1927,6 +1936,31 @@ void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
 						    struct ieee80211_vif *vif),
 						void *data);
 
+/**
+ * ieee80211_queue_work - add work onto the mac80211 workqueue
+ *
+ * Drivers and mac80211 use this to add work onto the mac80211 workqueue.
+ * This helper ensures drivers are not queueing work when they should not be.
+ *
+ * @hw: the hardware struct for the interface we are adding work for
+ * @work: the work we want to add onto the mac80211 workqueue
+ */
+void ieee80211_queue_work(struct ieee80211_hw *hw, struct work_struct *work);
+
+/**
+ * ieee80211_queue_delayed_work - add work onto the mac80211 workqueue
+ *
+ * Drivers and mac80211 use this to queue delayed work onto the mac80211
+ * workqueue.
+ *
+ * @hw: the hardware struct for the interface we are adding work for
+ * @dwork: delayable work to queue onto the mac80211 workqueue
+ * @delay: number of jiffies to wait before queueing
+ */
+void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
+				  struct delayed_work *dwork,
+				  unsigned long delay);
+
 /**
  * ieee80211_start_tx_ba_session - Start a tx Block Ack session.
  * @hw: pointer as obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6e3cca65c460..920ec8792f4b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -781,7 +781,7 @@ static void ieee80211_ibss_timer(unsigned long data)
 	}
 
 	set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
-	queue_work(local->hw.workqueue, &ifibss->work);
+	ieee80211_queue_work(&local->hw, &ifibss->work);
 }
 
 #ifdef CONFIG_PM
@@ -853,7 +853,7 @@ ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_PROBE_REQ:
 	case IEEE80211_STYPE_AUTH:
 		skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
-		queue_work(local->hw.workqueue, &sdata->u.ibss.work);
+		ieee80211_queue_work(&local->hw, &sdata->u.ibss.work);
 		return RX_QUEUED;
 	}
 
@@ -912,7 +912,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	ieee80211_recalc_idle(sdata->local);
 
 	set_bit(IEEE80211_IBSS_REQ_RUN, &sdata->u.ibss.request);
-	queue_work(sdata->local->hw.workqueue, &sdata->u.ibss.work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.ibss.work);
 
 	return 0;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index aec6853cb435..316825be2019 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -614,6 +614,12 @@ struct ieee80211_local {
 
 	const struct ieee80211_ops *ops;
 
+	/*
+	 * private workqueue to mac80211. mac80211 makes this accessible
+	 * via ieee80211_queue_work()
+	 */
+	struct workqueue_struct *workqueue;
+
 	unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES];
 	/* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
 	spinlock_t queue_stop_reason_lock;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index a83087f4237d..8c1284d45e69 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -312,7 +312,7 @@ static int ieee80211_open(struct net_device *dev)
 	 * to fix this.
 	 */
 	if (sdata->vif.type == NL80211_IFTYPE_STATION)
-		queue_work(local->hw.workqueue, &sdata->u.mgd.work);
+		ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
 
 	netif_tx_start_all_queues(dev);
 
@@ -551,7 +551,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 		ieee80211_led_radio(local, false);
 
-		flush_workqueue(local->hw.workqueue);
+		flush_workqueue(local->workqueue);
 
 		tasklet_disable(&local->tx_pending_tasklet);
 		tasklet_disable(&local->tasklet);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 5e76dd1daf71..22e07385ff60 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -821,9 +821,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (hw->queues > IEEE80211_MAX_QUEUES)
 		hw->queues = IEEE80211_MAX_QUEUES;
 
-	local->hw.workqueue =
+	local->workqueue =
 		create_singlethread_workqueue(wiphy_name(local->hw.wiphy));
-	if (!local->hw.workqueue) {
+	if (!local->workqueue) {
 		result = -ENOMEM;
 		goto fail_workqueue;
 	}
@@ -913,7 +913,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	sta_info_stop(local);
  fail_sta_info:
 	debugfs_hw_del(local);
-	destroy_workqueue(local->hw.workqueue);
+	destroy_workqueue(local->workqueue);
  fail_workqueue:
 	wiphy_unregister(local->hw.wiphy);
  fail_wiphy_register:
@@ -955,7 +955,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
 
-	destroy_workqueue(local->hw.workqueue);
+	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);
 	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 9a3826978b1c..2f4f518ab45c 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -54,7 +54,7 @@ static void ieee80211_mesh_housekeeping_timer(unsigned long data)
 		return;
 	}
 
-	queue_work(local->hw.workqueue, &ifmsh->work);
+	ieee80211_queue_work(local->hw.workqueue, &ifmsh->work);
 }
 
 /**
@@ -357,7 +357,7 @@ static void ieee80211_mesh_path_timer(unsigned long data)
 		return;
 	}
 
-	queue_work(local->hw.workqueue, &ifmsh->work);
+	ieee80211_queue_work(local->hw.workqueue, &ifmsh->work);
 }
 
 struct mesh_table *mesh_table_grow(struct mesh_table *tbl)
@@ -471,7 +471,7 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 
 	ifmsh->housekeeping = true;
-	queue_work(local->hw.workqueue, &ifmsh->work);
+	ieee80211_queue_work(local->hw.workqueue, &ifmsh->work);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON |
 						BSS_CHANGED_BEACON_ENABLED);
 }
@@ -619,7 +619,7 @@ void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
 	rcu_read_lock();
 	list_for_each_entry_rcu(sdata, &local->interfaces, list)
 		if (ieee80211_vif_is_mesh(&sdata->vif))
-			queue_work(local->hw.workqueue, &sdata->u.mesh.work);
+			ieee80211_queue_work(local->hw.workqueue, &sdata->u.mesh.work);
 	rcu_read_unlock();
 }
 
@@ -692,7 +692,7 @@ ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 	case IEEE80211_STYPE_PROBE_RESP:
 	case IEEE80211_STYPE_BEACON:
 		skb_queue_tail(&ifmsh->skb_queue, skb);
-		queue_work(local->hw.workqueue, &ifmsh->work);
+		ieee80211_queue_work(local->hw.workqueue, &ifmsh->work);
 		return RX_QUEUED;
 	}
 
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index e93c37ef6a48..11ab71a68ff9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -660,14 +660,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
 	spin_unlock(&ifmsh->mesh_preq_queue_lock);
 
 	if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata)))
-		queue_work(sdata->local->hw.workqueue, &ifmsh->work);
+		ieee80211_queue_work(sdata->local->hw.workqueue, &ifmsh->work);
 
 	else if (time_before(jiffies, ifmsh->last_preq)) {
 		/* avoid long wait if did not send preqs for a long time
 		 * and jiffies wrapped around
 		 */
 		ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
-		queue_work(sdata->local->hw.workqueue, &ifmsh->work);
+		ieee80211_queue_work(sdata->local->hw.workqueue, &ifmsh->work);
 	} else
 		mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq +
 						min_preq_int_jiff(sdata));
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ee83125ed179..0779ba150b26 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -565,7 +565,7 @@ static void ieee80211_chswitch_timer(unsigned long data)
 		return;
 	}
 
-	queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
+	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 }
 
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
@@ -597,7 +597,7 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	sdata->local->csa_channel = new_ch;
 
 	if (sw_elem->count <= 1) {
-		queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
+		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 	} else {
 		ieee80211_stop_queues_by_reason(&sdata->local->hw,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
@@ -763,7 +763,7 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
 	if (local->quiescing || local->suspended)
 		return;
 
-	queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work);
+	ieee80211_queue_work(&local->hw, &local->dynamic_ps_enable_work);
 }
 
 /* MLME */
@@ -950,7 +950,7 @@ ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata,
 		 * due to work needing to be done. Hence, queue the STAs work
 		 * again for that.
 		 */
-		queue_work(local->hw.workqueue, &ifmgd->work);
+		ieee80211_queue_work(&local->hw, &ifmgd->work);
 		return RX_MGMT_CFG80211_AUTH_TO;
 	}
 
@@ -995,7 +995,7 @@ ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
 		 * due to work needing to be done. Hence, queue the STAs work
 		 * again for that.
 		 */
-		queue_work(local->hw.workqueue, &ifmgd->work);
+		ieee80211_queue_work(&local->hw, &ifmgd->work);
 		return RX_MGMT_CFG80211_AUTH_TO;
 	}
 
@@ -1124,7 +1124,7 @@ ieee80211_associate(struct ieee80211_sub_if_data *sdata,
 		 * due to work needing to be done. Hence, queue the STAs work
 		 * again for that.
 		 */
-		queue_work(local->hw.workqueue, &ifmgd->work);
+		ieee80211_queue_work(&local->hw, &ifmgd->work);
 		return RX_MGMT_CFG80211_ASSOC_TO;
 	}
 
@@ -1232,8 +1232,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 
-	queue_work(sdata->local->hw.workqueue,
-		   &sdata->u.mgd.beacon_loss_work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
 
@@ -1888,7 +1887,7 @@ ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
 	case IEEE80211_STYPE_DISASSOC:
 	case IEEE80211_STYPE_ACTION:
 		skb_queue_tail(&sdata->u.mgd.skb_queue, skb);
-		queue_work(local->hw.workqueue, &sdata->u.mgd.work);
+		ieee80211_queue_work(&local->hw, &sdata->u.mgd.work);
 		return RX_QUEUED;
 	}
 
@@ -2026,7 +2025,7 @@ static void ieee80211_sta_timer(unsigned long data)
 		return;
 	}
 
-	queue_work(local->hw.workqueue, &ifmgd->work);
+	ieee80211_queue_work(&local->hw, &ifmgd->work);
 }
 
 static void ieee80211_sta_work(struct work_struct *work)
@@ -2051,13 +2050,11 @@ static void ieee80211_sta_work(struct work_struct *work)
 		return;
 
 	/*
-	 * Nothing should have been stuffed into the workqueue during
-	 * the suspend->resume cycle. If this WARN is seen then there
-	 * is a bug with either the driver suspend or something in
-	 * mac80211 stuffing into the workqueue which we haven't yet
-	 * cleared during mac80211's suspend cycle.
+	 * ieee80211_queue_work() should have picked up most cases,
+	 * here we'll pick the the rest.
 	 */
-	if (WARN_ON(local->suspended))
+	if (WARN(local->suspended, "STA MLME work scheduled while "
+		 "going to suspend\n"))
 		return;
 
 	ifmgd = &sdata->u.mgd;
@@ -2113,9 +2110,9 @@ static void ieee80211_sta_work(struct work_struct *work)
 		mutex_unlock(&ifmgd->mtx);
 
 		if (test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request))
-			queue_delayed_work(local->hw.workqueue,
-					   &local->scan_work,
-					   round_jiffies_relative(0));
+			ieee80211_queue_delayed_work(&local->hw,
+						     &local->scan_work,
+						     round_jiffies_relative(0));
 		return;
 	}
 
@@ -2196,8 +2193,7 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
 	if (local->quiescing)
 		return;
 
-	queue_work(sdata->local->hw.workqueue,
-		   &sdata->u.mgd.beacon_loss_work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
 }
 
 static void ieee80211_sta_conn_mon_timer(unsigned long data)
@@ -2210,7 +2206,7 @@ static void ieee80211_sta_conn_mon_timer(unsigned long data)
 	if (local->quiescing)
 		return;
 
-	queue_work(local->hw.workqueue, &ifmgd->monitor_work);
+	ieee80211_queue_work(&local->hw, &ifmgd->monitor_work);
 }
 
 static void ieee80211_sta_monitor_work(struct work_struct *work)
@@ -2229,10 +2225,10 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
 					IEEE80211_STA_CONNECTION_POLL);
 
 		/* let's probe the connection once */
-		queue_work(sdata->local->hw.workqueue,
+		ieee80211_queue_work(&sdata->local->hw,
 			   &sdata->u.mgd.monitor_work);
 		/* and do all the other regular work too */
-		queue_work(sdata->local->hw.workqueue,
+		ieee80211_queue_work(&sdata->local->hw,
 			   &sdata->u.mgd.work);
 	}
 }
@@ -2393,7 +2389,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	list_add(&wk->list, &sdata->u.mgd.work_list);
 	mutex_unlock(&ifmgd->mtx);
 
-	queue_work(sdata->local->hw.workqueue, &sdata->u.mgd.work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.work);
 	return 0;
 }
 
@@ -2467,7 +2463,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	else
 		ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
 
-	queue_work(sdata->local->hw.workqueue, &sdata->u.mgd.work);
+	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.work);
 
 	err = 0;
 
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 3320f7daaf25..a5d2f1fb4417 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -26,7 +26,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	/* make quiescing visible to timers everywhere */
 	mb();
 
-	flush_workqueue(local->hw.workqueue);
+	flush_workqueue(local->workqueue);
 
 	/* Don't try to run timers while suspended. */
 	del_timer_sync(&local->sta_cleanup);
@@ -117,7 +117,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 	 * shouldn't be doing (or cancel everything in the
 	 * stop callback) that but better safe than sorry.
 	 */
-	flush_workqueue(local->hw.workqueue);
+	flush_workqueue(local->workqueue);
 
 	local->suspended = true;
 	/* need suspended to be visible before quiescing is false */
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 45731000eb8d..244f53f3c8b4 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -385,8 +385,9 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	spin_unlock_bh(&local->filter_lock);
 
 	/* TODO: start scan as soon as all nullfunc frames are ACKed */
-	queue_delayed_work(local->hw.workqueue, &local->scan_work,
-			   IEEE80211_CHANNEL_TIME);
+	ieee80211_queue_delayed_work(&local->hw,
+				     &local->scan_work,
+				     IEEE80211_CHANNEL_TIME);
 
 	return 0;
 }
@@ -715,8 +716,7 @@ void ieee80211_scan_work(struct work_struct *work)
 		}
 	} while (next_delay == 0);
 
-	queue_delayed_work(local->hw.workqueue, &local->scan_work,
-			   next_delay);
+	ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
 }
 
 int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4e1b2ba122cd..7cffaa046b33 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1400,7 +1400,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 		if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 			ieee80211_stop_queues_by_reason(&local->hw,
 					IEEE80211_QUEUE_STOP_REASON_PS);
-			queue_work(local->hw.workqueue,
+			ieee80211_queue_work(&local->hw,
 					&local->dynamic_ps_disable_work);
 		}
 
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 8502936e5314..e55d57f559ec 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -511,6 +511,46 @@ void ieee80211_iterate_active_interfaces_atomic(
 }
 EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
 
+/*
+ * Nothing should have been stuffed into the workqueue during
+ * the suspend->resume cycle. If this WARN is seen then there
+ * is a bug with either the driver suspend or something in
+ * mac80211 stuffing into the workqueue which we haven't yet
+ * cleared during mac80211's suspend cycle.
+ */
+static bool ieee80211_can_queue_work(struct ieee80211_local *local)
+{
+        if (WARN(local->suspended, "queueing ieee80211 work while "
+		 "going to suspend\n"))
+                return false;
+
+	return true;
+}
+
+void ieee80211_queue_work(struct ieee80211_hw *hw, struct work_struct *work)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	if (!ieee80211_can_queue_work(local))
+		return;
+
+	queue_work(local->workqueue, work);
+}
+EXPORT_SYMBOL(ieee80211_queue_work);
+
+void ieee80211_queue_delayed_work(struct ieee80211_hw *hw,
+				  struct delayed_work *dwork,
+				  unsigned long delay)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+
+	if (!ieee80211_can_queue_work(local))
+		return;
+
+	queue_delayed_work(local->workqueue, dwork, delay);
+}
+EXPORT_SYMBOL(ieee80211_queue_delayed_work);
+
 void ieee802_11_parse_elems(u8 *start, size_t len,
 			    struct ieee802_11_elems *elems)
 {
@@ -1114,3 +1154,4 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 #endif
 	return 0;
 }
+
-- 
cgit v1.2.3


From 1487cd5e76337555737cbc55d7d83f41460d198f Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Thu, 30 Jul 2009 19:41:20 +0300
Subject: usbnet: allow "minidriver" to prevent urb unlinking on usbnet_stop

rndis_wlan devices freeze after running usbnet_stop several times. It appears
that firmware freezes in state where it does not respond to any RNDIS commands
and device have to be physically unplugged/replugged. This patch lets
minidrivers to disable unlink_urbs on usbnet_stop through new info flag.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/usb/usbnet.c          | 32 ++++++++++++++++++--------------
 drivers/net/wireless/rndis_wlan.c |  9 ++++++---
 include/linux/usb/usbnet.h        |  1 +
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 25e435c49040..af1fe4696509 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -601,21 +601,25 @@ int usbnet_stop (struct net_device *net)
 				info->description);
 	}
 
-	// ensure there are no more active urbs
-	add_wait_queue (&unlink_wakeup, &wait);
-	dev->wait = &unlink_wakeup;
-	temp = unlink_urbs (dev, &dev->txq) + unlink_urbs (dev, &dev->rxq);
-
-	// maybe wait for deletions to finish.
-	while (!skb_queue_empty(&dev->rxq)
-			&& !skb_queue_empty(&dev->txq)
-			&& !skb_queue_empty(&dev->done)) {
-		msleep(UNLINK_TIMEOUT_MS);
-		if (netif_msg_ifdown (dev))
-			devdbg (dev, "waited for %d urb completions", temp);
+	if (!(info->flags & FLAG_AVOID_UNLINK_URBS)) {
+		/* ensure there are no more active urbs */
+		add_wait_queue(&unlink_wakeup, &wait);
+		dev->wait = &unlink_wakeup;
+		temp = unlink_urbs(dev, &dev->txq) +
+			unlink_urbs(dev, &dev->rxq);
+
+		/* maybe wait for deletions to finish. */
+		while (!skb_queue_empty(&dev->rxq)
+				&& !skb_queue_empty(&dev->txq)
+				&& !skb_queue_empty(&dev->done)) {
+			msleep(UNLINK_TIMEOUT_MS);
+			if (netif_msg_ifdown(dev))
+				devdbg(dev, "waited for %d urb completions",
+					temp);
+		}
+		dev->wait = NULL;
+		remove_wait_queue(&unlink_wakeup, &wait);
 	}
-	dev->wait = NULL;
-	remove_wait_queue (&unlink_wakeup, &wait);
 
 	usb_kill_urb(dev->interrupt);
 
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 09c0702ae645..76c5ec6bbbc5 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2513,7 +2513,8 @@ static int rndis_wlan_stop(struct usbnet *usbdev)
 
 static const struct driver_info	bcm4320b_info = {
 	.description =	"Wireless RNDIS device, BCM4320b based",
-	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT |
+				FLAG_AVOID_UNLINK_URBS,
 	.bind =		rndis_wlan_bind,
 	.unbind =	rndis_wlan_unbind,
 	.status =	rndis_status,
@@ -2527,7 +2528,8 @@ static const struct driver_info	bcm4320b_info = {
 
 static const struct driver_info	bcm4320a_info = {
 	.description =	"Wireless RNDIS device, BCM4320a based",
-	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT |
+				FLAG_AVOID_UNLINK_URBS,
 	.bind =		rndis_wlan_bind,
 	.unbind =	rndis_wlan_unbind,
 	.status =	rndis_status,
@@ -2541,7 +2543,8 @@ static const struct driver_info	bcm4320a_info = {
 
 static const struct driver_info rndis_wlan_info = {
 	.description =	"Wireless RNDIS device",
-	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT,
+	.flags =	FLAG_WLAN | FLAG_FRAMING_RN | FLAG_NO_SETINT |
+				FLAG_AVOID_UNLINK_URBS,
 	.bind =		rndis_wlan_bind,
 	.unbind =	rndis_wlan_unbind,
 	.status =	rndis_status,
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 7c17b2efba86..c642f78dd9cf 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -86,6 +86,7 @@ struct driver_info {
 
 #define FLAG_FRAMING_AX 0x0040		/* AX88772/178 packets */
 #define FLAG_WLAN	0x0080		/* use "wlan%d" names */
+#define FLAG_AVOID_UNLINK_URBS 0x0100	/* don't unlink urbs at usbnet_stop() */
 
 
 	/* init device ... can sleep, or cause probe() failure */
-- 
cgit v1.2.3


From 2a4901bcbe9c122bd56e1f6c337fcb4ad75fafb7 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Thu, 30 Jul 2009 19:41:52 +0300
Subject: rndis_host: allow rndis_wlan to see all indications

Allow rndis_wlan to see all indications. Currently rndis_host lets rndis_wlan to
know about link state changes only, but there is whole set of other
802.11-specific indications that rndis_wlan should handle properly. So rename
link_change() to indication() and convert rndis_wlan to use it.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/usb/rndis_host.c      | 50 +++++++++++++++++++++++----------------
 drivers/net/wireless/rndis_wlan.c | 31 +++++++++++++++++++-----
 include/linux/usb/usbnet.h        |  5 ++--
 3 files changed, 56 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
index 2232232b7989..d032bba9bc4c 100644
--- a/drivers/net/usb/rndis_host.c
+++ b/drivers/net/usb/rndis_host.c
@@ -64,6 +64,32 @@ void rndis_status(struct usbnet *dev, struct urb *urb)
 }
 EXPORT_SYMBOL_GPL(rndis_status);
 
+/*
+ * RNDIS indicate messages.
+ */
+static void rndis_msg_indicate(struct usbnet *dev, struct rndis_indicate *msg,
+				int buflen)
+{
+	struct cdc_state *info = (void *)&dev->data;
+	struct device *udev = &info->control->dev;
+
+	if (dev->driver_info->indication) {
+		dev->driver_info->indication(dev, msg, buflen);
+	} else {
+		switch (msg->status) {
+		case RNDIS_STATUS_MEDIA_CONNECT:
+			dev_info(udev, "rndis media connect\n");
+			break;
+		case RNDIS_STATUS_MEDIA_DISCONNECT:
+			dev_info(udev, "rndis media disconnect\n");
+			break;
+		default:
+			dev_info(udev, "rndis indication: 0x%08x\n",
+					le32_to_cpu(msg->status));
+		}
+	}
+}
+
 /*
  * RPC done RNDIS-style.  Caller guarantees:
  * - message is properly byteswapped
@@ -143,27 +169,9 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf, int buflen)
 					request_id, xid);
 				/* then likely retry */
 			} else switch (buf->msg_type) {
-			case RNDIS_MSG_INDICATE: {	/* fault/event */
-				struct rndis_indicate *msg = (void *)buf;
-				int state = 0;
-
-				switch (msg->status) {
-				case RNDIS_STATUS_MEDIA_CONNECT:
-					state = 1;
-				case RNDIS_STATUS_MEDIA_DISCONNECT:
-					dev_info(&info->control->dev,
-						"rndis media %sconnect\n",
-						!state?"dis":"");
-					if (dev->driver_info->link_change)
-						dev->driver_info->link_change(
-							dev, state);
-					break;
-				default:
-					dev_info(&info->control->dev,
-						"rndis indication: 0x%08x\n",
-						le32_to_cpu(msg->status));
-				}
-				}
+			case RNDIS_MSG_INDICATE:	/* fault/event */
+				rndis_msg_indicate(dev, (void *)buf, buflen);
+
 				break;
 			case RNDIS_MSG_KEEPALIVE: {	/* ping */
 				struct rndis_keepalive_c *msg = (void *)buf;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index f6dcbb168b78..6b6452b0e8c4 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2211,13 +2211,32 @@ static void rndis_wlan_set_multicast_list(struct net_device *dev)
 	queue_work(priv->workqueue, &priv->work);
 }
 
-static void rndis_wlan_link_change(struct usbnet *usbdev, int state)
+static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 {
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
+	struct rndis_indicate *msg = ind;
 
 	/* queue work to avoid recursive calls into rndis_command */
-	set_bit(state ? WORK_LINK_UP : WORK_LINK_DOWN, &priv->work_pending);
-	queue_work(priv->workqueue, &priv->work);
+	switch (msg->status) {
+	case RNDIS_STATUS_MEDIA_CONNECT:
+		devinfo(usbdev, "media connect");
+
+		set_bit(WORK_LINK_UP, &priv->work_pending);
+		queue_work(priv->workqueue, &priv->work);
+		break;
+
+	case RNDIS_STATUS_MEDIA_DISCONNECT:
+		devinfo(usbdev, "media disconnect");
+
+		set_bit(WORK_LINK_DOWN, &priv->work_pending);
+		queue_work(priv->workqueue, &priv->work);
+		break;
+
+	default:
+		devinfo(usbdev, "indication: 0x%08x",
+				le32_to_cpu(msg->status));
+		break;
+	}
 }
 
 
@@ -2666,7 +2685,7 @@ static const struct driver_info	bcm4320b_info = {
 	.reset =	rndis_wlan_reset,
 	.stop =		rndis_wlan_stop,
 	.early_init =	bcm4320b_early_init,
-	.link_change =	rndis_wlan_link_change,
+	.indication =	rndis_wlan_indication,
 };
 
 static const struct driver_info	bcm4320a_info = {
@@ -2681,7 +2700,7 @@ static const struct driver_info	bcm4320a_info = {
 	.reset =	rndis_wlan_reset,
 	.stop =		rndis_wlan_stop,
 	.early_init =	bcm4320a_early_init,
-	.link_change =	rndis_wlan_link_change,
+	.indication =	rndis_wlan_indication,
 };
 
 static const struct driver_info rndis_wlan_info = {
@@ -2696,7 +2715,7 @@ static const struct driver_info rndis_wlan_info = {
 	.reset =	rndis_wlan_reset,
 	.stop =		rndis_wlan_stop,
 	.early_init =	bcm4320a_early_init,
-	.link_change =	rndis_wlan_link_change,
+	.indication =	rndis_wlan_indication,
 };
 
 /*-------------------------------------------------------------------------*/
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index c642f78dd9cf..de8b4b18961b 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -122,9 +122,8 @@ struct driver_info {
 	 * right after minidriver have initialized hardware. */
 	int	(*early_init)(struct usbnet *dev);
 
-	/* called by minidriver when link state changes, state: 0=disconnect,
-	 * 1=connect */
-	void	(*link_change)(struct usbnet *dev, int state);
+	/* called by minidriver when receiving indication */
+	void	(*indication)(struct usbnet *dev, void *ind, int indlen);
 
 	/* for new devices, use the descriptor-reading code instead */
 	int		in;		/* rx endpoint */
-- 
cgit v1.2.3


From 030645aceb3d9f10b1c3d2231c50f5a8bb3a9667 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Thu, 30 Jul 2009 19:41:58 +0300
Subject: rndis_wlan: handle 802.11 indications from device

Add handling for 802.11 specific rndis indications.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/rndis_wlan.c | 233 +++++++++++++++++++++++++++++++++++++-
 include/linux/usb/rndis_host.h    |  13 ++-
 2 files changed, 239 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 6b6452b0e8c4..7a50cfa18843 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -201,6 +201,24 @@ enum ndis_80211_priv_filter {
 	NDIS_80211_PRIV_8021X_WEP
 };
 
+enum ndis_80211_status_type {
+	NDIS_80211_STATUSTYPE_AUTHENTICATION,
+	NDIS_80211_STATUSTYPE_MEDIASTREAMMODE,
+	NDIS_80211_STATUSTYPE_PMKID_CANDIDATELIST,
+	NDIS_80211_STATUSTYPE_RADIOSTATE,
+};
+
+enum ndis_80211_media_stream_mode {
+	NDIS_80211_MEDIA_STREAM_OFF,
+	NDIS_80211_MEDIA_STREAM_ON
+};
+
+enum ndis_80211_radio_status {
+	NDIS_80211_RADIO_STATUS_ON,
+	NDIS_80211_RADIO_STATUS_HARDWARE_OFF,
+	NDIS_80211_RADIO_STATUS_SOFTWARE_OFF,
+};
+
 enum ndis_80211_addkey_bits {
 	NDIS_80211_ADDKEY_8021X_AUTH = cpu_to_le32(1 << 28),
 	NDIS_80211_ADDKEY_SET_INIT_RECV_SEQ = cpu_to_le32(1 << 29),
@@ -213,6 +231,35 @@ enum ndis_80211_addwep_bits {
 	NDIS_80211_ADDWEP_TRANSMIT_KEY = cpu_to_le32(1 << 31)
 };
 
+struct ndis_80211_auth_request {
+	__le32 length;
+	u8 bssid[6];
+	u8 padding[2];
+	__le32 flags;
+} __attribute__((packed));
+
+struct ndis_80211_pmkid_candidate {
+	u8 bssid[6];
+	u8 padding[2];
+	__le32 flags;
+} __attribute__((packed));
+
+struct ndis_80211_pmkid_cand_list {
+	__le32 version;
+	__le32 num_candidates;
+	struct ndis_80211_pmkid_candidate candidate_list[0];
+} __attribute__((packed));
+
+struct ndis_80211_status_indication {
+	__le32 status_type;
+	union {
+		enum ndis_80211_media_stream_mode	media_stream_mode;
+		enum ndis_80211_radio_status		radio_status;
+		struct ndis_80211_auth_request		auth_request[0];
+		struct ndis_80211_pmkid_cand_list	cand_list;
+	} u;
+} __attribute__((packed));
+
 struct ndis_80211_ssid {
 	__le32 length;
 	u8 essid[NDIS_802_11_LENGTH_SSID];
@@ -2211,16 +2258,195 @@ static void rndis_wlan_set_multicast_list(struct net_device *dev)
 	queue_work(priv->workqueue, &priv->work);
 }
 
+
+static void rndis_wlan_auth_indication(struct usbnet *usbdev,
+				struct ndis_80211_status_indication *indication,
+				int len)
+{
+	u8 *buf;
+	const char *type;
+	int flags, buflen;
+	bool pairwise_error, group_error;
+	struct ndis_80211_auth_request *auth_req;
+
+	/* must have at least one array entry */
+	if (len < offsetof(struct ndis_80211_status_indication, u) +
+				sizeof(struct ndis_80211_auth_request)) {
+		devinfo(usbdev, "authentication indication: "
+				"too short message (%i)", len);
+		return;
+	}
+
+	buf = (void *)&indication->u.auth_request[0];
+	buflen = len - offsetof(struct ndis_80211_status_indication, u);
+
+	while (buflen >= sizeof(*auth_req)) {
+		auth_req = (void *)buf;
+		type = "unknown";
+		flags = le32_to_cpu(auth_req->flags);
+		pairwise_error = false;
+		group_error = false;
+
+		if (flags & 0x1)
+			type = "reauth request";
+		if (flags & 0x2)
+			type = "key update request";
+		if (flags & 0x6) {
+			pairwise_error = true;
+			type = "pairwise_error";
+		}
+		if (flags & 0xe) {
+			group_error = true;
+			type = "group_error";
+		}
+
+		devinfo(usbdev, "authentication indication: %s (0x%08x)", type,
+				le32_to_cpu(auth_req->flags));
+
+		if (pairwise_error || group_error) {
+			union iwreq_data wrqu;
+			struct iw_michaelmicfailure micfailure;
+
+			memset(&micfailure, 0, sizeof(micfailure));
+			if (pairwise_error)
+				micfailure.flags |= IW_MICFAILURE_PAIRWISE;
+			if (group_error)
+				micfailure.flags |= IW_MICFAILURE_GROUP;
+
+			memcpy(micfailure.src_addr.sa_data, auth_req->bssid,
+				ETH_ALEN);
+
+			memset(&wrqu, 0, sizeof(wrqu));
+			wrqu.data.length = sizeof(micfailure);
+			wireless_send_event(usbdev->net, IWEVMICHAELMICFAILURE,
+						&wrqu, (u8 *)&micfailure);
+		}
+
+		buflen -= le32_to_cpu(auth_req->length);
+		buf += le32_to_cpu(auth_req->length);
+	}
+}
+
+static void rndis_wlan_pmkid_cand_list_indication(struct usbnet *usbdev,
+				struct ndis_80211_status_indication *indication,
+				int len)
+{
+	struct ndis_80211_pmkid_cand_list *cand_list;
+	int list_len, expected_len, i;
+
+	if (len < offsetof(struct ndis_80211_status_indication, u) +
+				sizeof(struct ndis_80211_pmkid_cand_list)) {
+		devinfo(usbdev, "pmkid candidate list indication: "
+				"too short message (%i)", len);
+		return;
+	}
+
+	list_len = le32_to_cpu(indication->u.cand_list.num_candidates) *
+			sizeof(struct ndis_80211_pmkid_candidate);
+	expected_len = sizeof(struct ndis_80211_pmkid_cand_list) + list_len +
+			offsetof(struct ndis_80211_status_indication, u);
+
+	if (len < expected_len) {
+		devinfo(usbdev, "pmkid candidate list indication: "
+				"list larger than buffer (%i < %i)",
+				len, expected_len);
+		return;
+	}
+
+	cand_list = &indication->u.cand_list;
+
+	devinfo(usbdev, "pmkid candidate list indication: "
+			"version %i, candidates %i",
+			le32_to_cpu(cand_list->version),
+			le32_to_cpu(cand_list->num_candidates));
+
+	if (le32_to_cpu(cand_list->version) != 1)
+		return;
+
+	for (i = 0; i < le32_to_cpu(cand_list->num_candidates); i++) {
+		struct iw_pmkid_cand pcand;
+		union iwreq_data wrqu;
+		struct ndis_80211_pmkid_candidate *cand =
+						&cand_list->candidate_list[i];
+
+		devdbg(usbdev, "cand[%i]: flags: 0x%08x, bssid: %pM",
+				i, le32_to_cpu(cand->flags), cand->bssid);
+
+		memset(&pcand, 0, sizeof(pcand));
+		if (le32_to_cpu(cand->flags) & 0x01)
+			pcand.flags |= IW_PMKID_CAND_PREAUTH;
+		pcand.index = i;
+		memcpy(pcand.bssid.sa_data, cand->bssid, ETH_ALEN);
+
+		memset(&wrqu, 0, sizeof(wrqu));
+		wrqu.data.length = sizeof(pcand);
+		wireless_send_event(usbdev->net, IWEVPMKIDCAND, &wrqu,
+								(u8 *)&pcand);
+	}
+}
+
+static void rndis_wlan_media_specific_indication(struct usbnet *usbdev,
+			struct rndis_indicate *msg, int buflen)
+{
+	struct ndis_80211_status_indication *indication;
+	int len, offset;
+
+	offset = offsetof(struct rndis_indicate, status) +
+			le32_to_cpu(msg->offset);
+	len = le32_to_cpu(msg->length);
+
+	if (len < 8) {
+		devinfo(usbdev, "media specific indication, "
+				"ignore too short message (%i < 8)", len);
+		return;
+	}
+
+	if (offset + len > buflen) {
+		devinfo(usbdev, "media specific indication, "
+				"too large to fit to buffer (%i > %i)",
+				offset + len, buflen);
+		return;
+	}
+
+	indication = (void *)((u8 *)msg + offset);
+
+	switch (le32_to_cpu(indication->status_type)) {
+	case NDIS_80211_STATUSTYPE_RADIOSTATE:
+		devinfo(usbdev, "radio state indication: %i",
+			le32_to_cpu(indication->u.radio_status));
+		return;
+
+	case NDIS_80211_STATUSTYPE_MEDIASTREAMMODE:
+		devinfo(usbdev, "media stream mode indication: %i",
+			le32_to_cpu(indication->u.media_stream_mode));
+		return;
+
+	case NDIS_80211_STATUSTYPE_AUTHENTICATION:
+		rndis_wlan_auth_indication(usbdev, indication, len);
+		return;
+
+	case NDIS_80211_STATUSTYPE_PMKID_CANDIDATELIST:
+		rndis_wlan_pmkid_cand_list_indication(usbdev, indication, len);
+		return;
+
+	default:
+		devinfo(usbdev, "media specific indication: "
+				"unknown status type 0x%08x",
+				le32_to_cpu(indication->status_type));
+	}
+}
+
+
 static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 {
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
 	struct rndis_indicate *msg = ind;
 
-	/* queue work to avoid recursive calls into rndis_command */
 	switch (msg->status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
 		devinfo(usbdev, "media connect");
 
+		/* queue work to avoid recursive calls into rndis_command */
 		set_bit(WORK_LINK_UP, &priv->work_pending);
 		queue_work(priv->workqueue, &priv->work);
 		break;
@@ -2228,10 +2454,15 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 	case RNDIS_STATUS_MEDIA_DISCONNECT:
 		devinfo(usbdev, "media disconnect");
 
+		/* queue work to avoid recursive calls into rndis_command */
 		set_bit(WORK_LINK_DOWN, &priv->work_pending);
 		queue_work(priv->workqueue, &priv->work);
 		break;
 
+	case RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION:
+		rndis_wlan_media_specific_indication(usbdev, msg, buflen);
+		break;
+
 	default:
 		devinfo(usbdev, "indication: 0x%08x",
 				le32_to_cpu(msg->status));
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 37836b937d97..1ef1ebc2b04f 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -70,12 +70,13 @@ struct rndis_msg_hdr {
 #define RNDIS_MSG_KEEPALIVE_C	(RNDIS_MSG_KEEPALIVE|RNDIS_MSG_COMPLETION)
 
 /* codes for "status" field of completion messages */
-#define	RNDIS_STATUS_SUCCESS		cpu_to_le32(0x00000000)
-#define	RNDIS_STATUS_FAILURE		cpu_to_le32(0xc0000001)
-#define	RNDIS_STATUS_INVALID_DATA	cpu_to_le32(0xc0010015)
-#define	RNDIS_STATUS_NOT_SUPPORTED	cpu_to_le32(0xc00000bb)
-#define	RNDIS_STATUS_MEDIA_CONNECT	cpu_to_le32(0x4001000b)
-#define	RNDIS_STATUS_MEDIA_DISCONNECT	cpu_to_le32(0x4001000c)
+#define	RNDIS_STATUS_SUCCESS			cpu_to_le32(0x00000000)
+#define	RNDIS_STATUS_FAILURE			cpu_to_le32(0xc0000001)
+#define	RNDIS_STATUS_INVALID_DATA		cpu_to_le32(0xc0010015)
+#define	RNDIS_STATUS_NOT_SUPPORTED		cpu_to_le32(0xc00000bb)
+#define	RNDIS_STATUS_MEDIA_CONNECT		cpu_to_le32(0x4001000b)
+#define	RNDIS_STATUS_MEDIA_DISCONNECT		cpu_to_le32(0x4001000c)
+#define	RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION	cpu_to_le32(0x40010012)
 
 /* codes for OID_GEN_PHYSICAL_MEDIUM */
 #define	RNDIS_PHYSICAL_MEDIUM_UNSPECIFIED	cpu_to_le32(0x00000000)
-- 
cgit v1.2.3


From 8b19e6ca3bac7e04e93fb73f561d670e77c5fae6 Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 30 Jul 2009 17:38:09 -0700
Subject: cfg80211: enable country IE support to all cfg80211 drivers

Since the bss is always set now once we are connected, if the
bss has its own information element we refer to it and pass that
instead of relying on mac80211's parsing.

Now all cfg80211 drivers get country IE support, automatically and
we reduce the call overhead that we had on mac80211 which called this
upon every beacon and instead now call this only upon a successfull
connection by a STA on cfg80211.

Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 14 --------------
 net/mac80211/mlme.c    |  6 +-----
 net/wireless/reg.c     |  6 +-----
 net/wireless/reg.h     | 15 +++++++++++++++
 net/wireless/sme.c     | 16 ++++++++++++++++
 5 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e1b92358242b..fa729979de88 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1513,20 +1513,6 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb);
  */
 extern int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 
-/**
- * regulatory_hint_11d - hints a country IE as a regulatory domain
- * @wiphy: the wireless device giving the hint (used only for reporting
- *	conflicts)
- * @country_ie: pointer to the country IE
- * @country_ie_len: length of the country IE
- *
- * We will intersect the rd with the what CRDA tells us should apply
- * for the alpha2 this country IE belongs to, this prevents APs from
- * sending us incorrect or outdated information against a country.
- */
-extern void regulatory_hint_11d(struct wiphy *wiphy,
-				u8 *country_ie,
-				u8 country_ie_len);
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2d5edfda867a..c9e4091cd2bb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1845,12 +1845,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 					       bssid, ap_ht_cap_flags);
 	}
 
+	/* Note: country IE parsing is done for us by cfg80211 */
 	if (elems.country_elem) {
-		/* Note we are only reviewing this on beacons
-		 * for the BSSID we are associated to */
-		regulatory_hint_11d(local->hw.wiphy,
-			elems.country_elem, elems.country_elem_len);
-
 		/* TODO: IBSS also needs this */
 		if (elems.pwr_constr_elem)
 			ieee80211_handle_pwr_constr(sdata,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6ab56f098de1..b3ac0aace0e5 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1822,10 +1822,7 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 		env = ENVIRON_OUTDOOR;
 
 	/*
-	 * We will run this for *every* beacon processed for the BSSID, so
-	 * we optimize an early check to exit out early if we don't have to
-	 * do anything
-	 *
+	 * We will run this only upon a successful connection on cfg80211.
 	 * We leave conflict resolution to the workqueue, where can hold
 	 * cfg80211_mutex.
 	 */
@@ -1878,7 +1875,6 @@ free_rd_out:
 out:
 	mutex_unlock(&reg_mutex);
 }
-EXPORT_SYMBOL(regulatory_hint_11d);
 
 static bool freq_is_chan_12_13_14(u16 freq)
 {
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index e37829a49dc4..662a9dad76d5 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -36,4 +36,19 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
 					struct ieee80211_channel *beacon_chan,
 					gfp_t gfp);
 
+/**
+ * regulatory_hint_11d - hints a country IE as a regulatory domain
+ * @wiphy: the wireless device giving the hint (used only for reporting
+ *	conflicts)
+ * @country_ie: pointer to the country IE
+ * @country_ie_len: length of the country IE
+ *
+ * We will intersect the rd with the what CRDA tells us should apply
+ * for the alpha2 this country IE belongs to, this prevents APs from
+ * sending us incorrect or outdated information against a country.
+ */
+void regulatory_hint_11d(struct wiphy *wiphy,
+			 u8 *country_ie,
+			 u8 country_ie_len);
+
 #endif  /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3728d2b88b25..af91192eedf5 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -13,6 +13,7 @@
 #include <net/cfg80211.h>
 #include <net/rtnetlink.h>
 #include "nl80211.h"
+#include "reg.h"
 
 struct cfg80211_conn {
 	struct cfg80211_connect_params params;
@@ -320,6 +321,7 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       struct cfg80211_bss *bss)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	u8 *country_ie;
 #ifdef CONFIG_WIRELESS_EXT
 	union iwreq_data wrqu;
 #endif
@@ -401,6 +403,20 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 	wdev->sme_state = CFG80211_SME_CONNECTED;
 	cfg80211_upload_connect_keys(wdev);
+
+	country_ie = (u8 *) ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY);
+
+	if (!country_ie)
+		return;
+
+	/*
+	 * ieee80211_bss_get_ie() ensures we can access:
+	 * - country_ie + 2, the start of the country ie data, and
+	 * - and country_ie[1] which is the IE length
+	 */
+	regulatory_hint_11d(wdev->wiphy,
+			    country_ie + 2,
+			    country_ie[1]);
 }
 
 void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
-- 
cgit v1.2.3


From 3ad201496badddd8e1cda87ee6d29e8b3b8e1279 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Sun, 2 Aug 2009 02:36:49 +0300
Subject: rfkill: add the GPS radio type

Althoug GPS is a technology w/o transmitting radio
and thus not a primary candidate for rfkill switch,
rfkill gives unified interface point for devices with
wireless technology.

The input key is not supplied as it is too be deprecated.

Cc: johannes@sipsolutions.net
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 1 +
 net/rfkill/core.c      | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 10202903141a..21ca51bf4dd2 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -47,6 +47,7 @@ enum rfkill_type {
 	RFKILL_TYPE_UWB,
 	RFKILL_TYPE_WIMAX,
 	RFKILL_TYPE_WWAN,
+	RFKILL_TYPE_GPS,
 	NUM_RFKILL_TYPES,
 };
 
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 044de1c6af3d..dbeaf2983822 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -589,11 +589,13 @@ static const char *rfkill_get_type_str(enum rfkill_type type)
 		return "wimax";
 	case RFKILL_TYPE_WWAN:
 		return "wwan";
+	case RFKILL_TYPE_GPS:
+		return "gps";
 	default:
 		BUG();
 	}
 
-	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_WWAN + 1);
+	BUILD_BUG_ON(NUM_RFKILL_TYPES != RFKILL_TYPE_GPS + 1);
 }
 
 static ssize_t rfkill_type_show(struct device *dev,
-- 
cgit v1.2.3


From e3b90ca28412fb9dcc8c5ca38e179e78fec07eee Mon Sep 17 00:00:00 2001
From: Igor Perminov <igor.perminov@inbox.ru>
Date: Tue, 4 Aug 2009 16:48:51 +0400
Subject: mac80211: FIF_PSPOLL filter flag

When an interface is configured in the AP mode, the mac80211
implementation doesn't inform the driver to receive PS Poll frames.
It leads to inability to communicate with power-saving stations
reliably.
The FIF_CONTROL flag isn't passed by mac80211 to
ieee80211_ops.configure_filter when an interface is in the AP mode.
And it's ok, because we don't want to receive ACK frames and other
control ones, but only PS Poll ones.

This patch introduces the FIF_PSPOLL filter flag in addition to
FIF_CONTROL, which means for the driver "pass PS Poll frames".

This flag is passed to the driver:
A) When an interface is configured in the AP mode.
B) In all cases, when the FIF_CONTROL flag was passed earlier (in
addition to it).

Signed-off-by: Igor Perminov <igor.perminov@inbox.ru>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     |  8 ++++++--
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/iface.c       | 18 ++++++++++++++++--
 net/mac80211/main.c        |  3 +++
 4 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5ed93f4406a8..e2fb5767e1fa 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1244,10 +1244,13 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  *	mac80211 needs to do and the amount of CPU wakeups, so you should
  *	honour this flag if possible.
  *
- * @FIF_CONTROL: pass control frames, if PROMISC_IN_BSS is not set then
- *	only those addressed to this station
+ * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS
+ *  is not set then only those addressed to this station.
  *
  * @FIF_OTHER_BSS: pass frames destined to other BSSes
+ *
+ * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS  is not set then only
+ *  those addressed to this station.
  */
 enum ieee80211_filter_flags {
 	FIF_PROMISC_IN_BSS	= 1<<0,
@@ -1257,6 +1260,7 @@ enum ieee80211_filter_flags {
 	FIF_BCN_PRBRESP_PROMISC	= 1<<4,
 	FIF_CONTROL		= 1<<5,
 	FIF_OTHER_BSS		= 1<<6,
+	FIF_PSPOLL		= 1<<7,
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8d790e40f3e9..630a438180fd 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -628,7 +628,7 @@ struct ieee80211_local {
 	int open_count;
 	int monitors, cooked_mntrs;
 	/* number of interfaces with corresponding FIF_ flags */
-	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss;
+	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll;
 	unsigned int filter_flags; /* FIF_* */
 	struct iw_statistics wstats;
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 8c1284d45e69..e8fb03b91a44 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -220,8 +220,10 @@ static int ieee80211_open(struct net_device *dev)
 			local->fif_fcsfail++;
 		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
 			local->fif_plcpfail++;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
+		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
 			local->fif_control++;
+			local->fif_pspoll++;
+		}
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss++;
 
@@ -244,7 +246,14 @@ static int ieee80211_open(struct net_device *dev)
 			spin_unlock_bh(&local->filter_lock);
 
 			ieee80211_start_mesh(sdata);
+		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
+			local->fif_pspoll++;
+
+			spin_lock_bh(&local->filter_lock);
+			ieee80211_configure_filter(local);
+			spin_unlock_bh(&local->filter_lock);
 		}
+
 		changed |= ieee80211_reset_erp_info(sdata);
 		ieee80211_bss_info_change_notify(sdata, changed);
 		ieee80211_enable_keys(sdata);
@@ -388,6 +397,9 @@ static int ieee80211_stop(struct net_device *dev)
 	if (sdata->flags & IEEE80211_SDATA_PROMISC)
 		atomic_dec(&local->iff_promiscs);
 
+	if (sdata->vif.type == NL80211_IFTYPE_AP)
+		local->fif_pspoll--;
+
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
 	__dev_addr_unsync(&local->mc_list, &local->mc_count,
@@ -439,8 +451,10 @@ static int ieee80211_stop(struct net_device *dev)
 			local->fif_fcsfail--;
 		if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
 			local->fif_plcpfail--;
-		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL)
+		if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
+			local->fif_pspoll--;
 			local->fif_control--;
+		}
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss--;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 22e07385ff60..0c4f8e122ed6 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -77,6 +77,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	if (local->fif_other_bss)
 		new_flags |= FIF_OTHER_BSS;
 
+	if (local->fif_pspoll)
+		new_flags |= FIF_PSPOLL;
+
 	changed_flags = local->filter_flags ^ new_flags;
 
 	/* be a bit nasty */
-- 
cgit v1.2.3


From 18eac1cc100fa2afd5f39085aae6b694e417734b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 3 Aug 2009 10:58:29 -0700
Subject: tty-ldisc: make refcount be atomic_t 'users' count

This is pure preparation of changing the ldisc reference counting to be
a true refcount that defines the lifetime of the ldisc.  But this is a
purely syntactic change for now to make the next steps easier.

This patch should make no semantic changes at all. But I wanted to make
the ldisc refcount be an atomic (I will be touching it without locks
soon enough), and I wanted to rename it so that there isn't quite as
much confusion between 'ldo->refcount' (ldisk operations refcount) and
'ld->refcount' (ldisc refcount itself) in the same file.

So it's now an atomic 'ld->users' count. It still starts at zero,
despite having a reference from 'tty->ldisc', but that will change once
we turn it into a _real_ refcount.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Tested-by: Sergey Senozhatsky <sergey.senozhatsky@mail.by>
Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_ldisc.c  | 18 ++++++++----------
 include/linux/tty_ldisc.h |  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c
index acd76b767d4c..fd175e60aad5 100644
--- a/drivers/char/tty_ldisc.c
+++ b/drivers/char/tty_ldisc.c
@@ -142,7 +142,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc)
 			/* lock it */
 			ldops->refcount++;
 			ld->ops = ldops;
-			ld->refcount = 0;
+			atomic_set(&ld->users, 0);
 			err = 0;
 		}
 	}
@@ -206,7 +206,7 @@ static void tty_ldisc_put(struct tty_ldisc *ld)
 	ldo->refcount--;
 	module_put(ldo->owner);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-	WARN_ON(ld->refcount);
+	WARN_ON(atomic_read(&ld->users));
 	kfree(ld);
 }
 
@@ -297,7 +297,7 @@ static int tty_ldisc_try(struct tty_struct *tty)
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
 	ld = tty->ldisc;
 	if (test_bit(TTY_LDISC, &tty->flags)) {
-		ld->refcount++;
+		atomic_inc(&ld->users);
 		ret = 1;
 	}
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
@@ -324,7 +324,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
 	/* wait_event is a macro */
 	wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-	WARN_ON(tty->ldisc->refcount == 0);
+	WARN_ON(atomic_read(&tty->ldisc->users) == 0);
 	return tty->ldisc;
 }
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
@@ -365,11 +365,9 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
 	BUG_ON(ld == NULL);
 
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	if (ld->refcount == 0)
+	if (atomic_read(&ld->users) == 0)
 		printk(KERN_ERR "tty_ldisc_deref: no references.\n");
-	else
-		ld->refcount--;
-	if (ld->refcount == 0)
+	else if (atomic_dec_and_test(&ld->users))
 		wake_up(&tty_ldisc_wait);
 	spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
@@ -536,10 +534,10 @@ static int tty_ldisc_wait_idle(struct tty_struct *tty)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&tty_ldisc_lock, flags);
-	while (tty->ldisc->refcount) {
+	while (atomic_read(&tty->ldisc->users)) {
 		spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 		if (wait_event_timeout(tty_ldisc_wait,
-				tty->ldisc->refcount == 0, 5 * HZ) == 0)
+				atomic_read(&tty->ldisc->users) == 0, 5 * HZ) == 0)
 			return -EBUSY;
 		spin_lock_irqsave(&tty_ldisc_lock, flags);
 	}
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 40f38d896777..0c4ee9b88f85 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -144,7 +144,7 @@ struct tty_ldisc_ops {
 
 struct tty_ldisc {
 	struct tty_ldisc_ops *ops;
-	int refcount;
+	atomic_t users;
 };
 
 #define TTY_LDISC_MAGIC	0x5403
-- 
cgit v1.2.3


From 6502fbfaf81b09b3f474bb7b3796257e9450273e Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Tue, 4 Aug 2009 11:24:24 -0400
Subject: drm/radeon: Add support for RS880 chips

These are new AMD IGP chips

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r600_cp.c    | 22 +++++++++++++++-------
 drivers/gpu/drm/radeon/radeon_drv.h |  1 +
 include/drm/drm_pciids.h            |  5 +++++
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 146f3570af8e..20f17908b036 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -384,8 +384,9 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
 		DRM_INFO("Loading RV670 PFP Microcode\n");
 		for (i = 0; i < PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]);
-	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
-		DRM_INFO("Loading RS780 CP Microcode\n");
+	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
+		DRM_INFO("Loading RS780/RS880 CP Microcode\n");
 		for (i = 0; i < PM4_UCODE_SIZE; i++) {
 			RADEON_WRITE(R600_CP_ME_RAM_DATA,
 				     RS780_cp_microcode[i][0]);
@@ -396,7 +397,7 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv)
 		}
 
 		RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0);
-		DRM_INFO("Loading RS780 PFP Microcode\n");
+		DRM_INFO("Loading RS780/RS880 PFP Microcode\n");
 		for (i = 0; i < PFP_UCODE_SIZE; i++)
 			RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RS780_pfp_microcode[i]);
 	}
@@ -783,6 +784,7 @@ static void r600_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		dev_priv->r600_max_pipes = 1;
 		dev_priv->r600_max_tile_pipes = 1;
@@ -917,7 +919,8 @@ static void r600_gfx_init(struct drm_device *dev,
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
 		RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE);
 	else
 		RADEON_WRITE(R600_DB_DEBUG, 0);
@@ -935,7 +938,8 @@ static void r600_gfx_init(struct drm_device *dev,
 	sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES);
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
 		sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) |
 				    R600_FETCH_FIFO_HIWATER(0xa) |
 				    R600_DONE_FIFO_HIWATER(0xe0) |
@@ -978,7 +982,8 @@ static void r600_gfx_init(struct drm_device *dev,
 					    R600_NUM_ES_STACK_ENTRIES(0));
 	} else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) {
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+		   ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) {
 		/* no vertex cache */
 		sq_config &= ~R600_VC_ENABLE;
 
@@ -1035,7 +1040,8 @@ static void r600_gfx_init(struct drm_device *dev,
 
 	if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) ||
 	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) ||
-	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780))
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) ||
+	    ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880))
 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY));
 	else
 		RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC));
@@ -1078,6 +1084,7 @@ static void r600_gfx_init(struct drm_device *dev,
 		break;
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		gs_prim_buffer_depth = 32;
 		break;
@@ -1123,6 +1130,7 @@ static void r600_gfx_init(struct drm_device *dev,
 	switch (dev_priv->flags & RADEON_FAMILY_MASK) {
 	case CHIP_RV610:
 	case CHIP_RS780:
+	case CHIP_RS880:
 	case CHIP_RV620:
 		tc_cntl = R600_TC_L2_SIZE(8);
 		break;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 127d0456f628..3933f8216a34 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -143,6 +143,7 @@ enum radeon_family {
 	CHIP_RV635,
 	CHIP_RV670,
 	CHIP_RS780,
+	CHIP_RS880,
 	CHIP_RV770,
 	CHIP_RV730,
 	CHIP_RV710,
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 9d4c00491547..853508499d20 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -370,6 +370,11 @@
 	{0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0, 0, 0}
 
 #define r128_PCI_IDS \
-- 
cgit v1.2.3


From 5116d8f6b977970ebefc1932c0f313163a6ec91f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 26 Jul 2009 17:10:01 +0300
Subject: KVM: fix ack not being delivered when msi present

kvm_notify_acked_irq does not check irq type, so that it sometimes
interprets msi vector as irq.  As a result, ack notifiers are not
called, which typially hangs the guest.  The fix is to track and
check irq type.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/irq_comm.c      | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 16713dc672e4..3060bdc35ffe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_memory_slot {
 
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
+	u32 type;
 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
 		    struct kvm *kvm, int level);
 	union {
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a8bd466d00cc..ddc17f0e2f35 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	unsigned gsi = pin;
 
 	list_for_each_entry(e, &kvm->irq_routing, link)
-		if (e->irqchip.irqchip == irqchip &&
+		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
+		    e->irqchip.irqchip == irqchip &&
 		    e->irqchip.pin == pin) {
 			gsi = e->gsi;
 			break;
@@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 	int delta;
 
 	e->gsi = ue->gsi;
+	e->type = ue->type;
 	switch (ue->type) {
 	case KVM_IRQ_ROUTING_IRQCHIP:
 		delta = 0;
-- 
cgit v1.2.3


From 36cbd3dcc10384f813ec0814255f576c84f2bcd4 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 5 Aug 2009 10:42:58 -0700
Subject: net: mark read-only arrays as const

String literals are constant, and usually, we can also tag the array
of pointers const too, moving it to the .rodata section.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h                  |  3 ++-
 include/net/irda/ircomm_event.h      |  2 +-
 include/net/irda/ircomm_tty_attach.h |  4 ++--
 include/net/irda/irlap_event.h       |  2 +-
 include/net/irda/irlmp_event.h       |  4 ++--
 include/net/sctp/constants.h         |  4 +++-
 net/8021q/vlanproc.c                 |  2 +-
 net/atm/lec.c                        |  9 +++++----
 net/atm/proc.c                       |  9 +++++----
 net/bluetooth/af_bluetooth.c         |  4 ++--
 net/bridge/br_stp.c                  |  2 +-
 net/core/dev.c                       |  2 +-
 net/core/net-sysfs.c                 |  2 +-
 net/core/sock.c                      |  6 +++---
 net/dccp/ccids/ccid3.c               |  4 ++--
 net/dccp/feat.c                      |  7 ++++---
 net/dccp/proto.c                     |  4 ++--
 net/ipv4/fib_trie.c                  |  2 +-
 net/ipv6/proc.c                      |  4 ++--
 net/irda/ircomm/ircomm_event.c       |  4 ++--
 net/irda/ircomm/ircomm_tty_attach.c  |  4 ++--
 net/irda/iriap.c                     |  4 ++--
 net/irda/irlan/irlan_common.c        |  4 ++--
 net/irda/irlap.c                     |  2 +-
 net/irda/irlap_event.c               |  4 ++--
 net/irda/irlmp_event.c               |  6 +++---
 net/llc/llc_proc.c                   |  2 +-
 net/netfilter/ipvs/ip_vs_proto.c     |  3 ++-
 net/netfilter/ipvs/ip_vs_proto_tcp.c |  2 +-
 net/netfilter/ipvs/ip_vs_proto_udp.c |  2 +-
 net/rds/ib_stats.c                   |  2 +-
 net/rds/iw_stats.c                   |  2 +-
 net/rds/rds.h                        |  3 ++-
 net/rds/stats.c                      |  4 ++--
 net/rxrpc/ar-ack.c                   |  2 +-
 net/sctp/debug.c                     | 14 +++++++-------
 36 files changed, 74 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 1c8ee1b13651..98978e73f666 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -738,7 +738,8 @@ extern void ip_vs_protocol_cleanup(void);
 extern void ip_vs_protocol_timeout_change(int flags);
 extern int *ip_vs_create_timeout_table(int *table, int size);
 extern int
-ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to);
+ip_vs_set_state_timeout(int *table, int num, const char *const *names,
+			const char *name, int to);
 extern void
 ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 			  int offset, const char *msg);
diff --git a/include/net/irda/ircomm_event.h b/include/net/irda/ircomm_event.h
index c290447872d1..bc0c6f31f1c6 100644
--- a/include/net/irda/ircomm_event.h
+++ b/include/net/irda/ircomm_event.h
@@ -74,7 +74,7 @@ struct ircomm_info {
 	struct qos_info *qos;
 };
 
-extern char *ircomm_state[];
+extern const char *const ircomm_state[];
 
 struct ircomm_cb;   /* Forward decl. */
 
diff --git a/include/net/irda/ircomm_tty_attach.h b/include/net/irda/ircomm_tty_attach.h
index f91a5695aa44..0a63bbb972d7 100644
--- a/include/net/irda/ircomm_tty_attach.h
+++ b/include/net/irda/ircomm_tty_attach.h
@@ -66,8 +66,8 @@ struct ircomm_tty_info {
         __u8      dlsap_sel;
 };
 
-extern char *ircomm_state[];
-extern char *ircomm_tty_state[];
+extern const char *const ircomm_state[];
+extern const char *const ircomm_tty_state[];
 
 int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event,
 			struct sk_buff *skb, struct ircomm_tty_info *info);
diff --git a/include/net/irda/irlap_event.h b/include/net/irda/irlap_event.h
index 2ae2e119ef4b..4c90824c50fb 100644
--- a/include/net/irda/irlap_event.h
+++ b/include/net/irda/irlap_event.h
@@ -120,7 +120,7 @@ typedef enum { /* FIXME check the two first reason codes */
 	LAP_PRIMARY_CONFLICT,
 } LAP_REASON;
 
-extern const char *irlap_state[];
+extern const char *const irlap_state[];
 
 void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, 
 		    struct sk_buff *skb, struct irlap_info *info);
diff --git a/include/net/irda/irlmp_event.h b/include/net/irda/irlmp_event.h
index e03ae4ae3963..9e4ec17a7449 100644
--- a/include/net/irda/irlmp_event.h
+++ b/include/net/irda/irlmp_event.h
@@ -79,8 +79,8 @@ typedef enum {
 	LM_LAP_IDLE_TIMEOUT,
 } IRLMP_EVENT;
 
-extern const char *irlmp_state[];
-extern const char *irlsap_state[];
+extern const char *const irlmp_state[];
+extern const char *const irlsap_state[];
 
 void irlmp_watchdog_timer_expired(void *data);
 void irlmp_discovery_timer_expired(void *data);
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index b05b0557211f..8bc25f7b04ce 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -241,7 +241,9 @@ const char *sctp_tname(const sctp_subtype_t);	/* timeouts */
 const char *sctp_pname(const sctp_subtype_t);	/* primitives */
 
 /* This is a table of printable names of sctp_state_t's.  */
-extern const char *sctp_state_tbl[], *sctp_evttype_tbl[], *sctp_status_tbl[];
+extern const char *const sctp_state_tbl[];
+extern const char *const sctp_evttype_tbl[];
+extern const char *const sctp_status_tbl[];
 
 /* Maximum chunk length considering padding requirements. */
 enum { SCTP_MAX_CHUNK_LEN = ((1<<16) - sizeof(__u32)) };
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index b55a091a33df..6262c335f3c2 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -107,7 +107,7 @@ static const struct file_operations vlandev_fops = {
  */
 
 /* Strings */
-static const char *vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
+static const char *const vlan_name_type_str[VLAN_NAME_TYPE_HIGHEST] = {
     [VLAN_NAME_TYPE_RAW_PLUS_VID]        = "VLAN_NAME_TYPE_RAW_PLUS_VID",
     [VLAN_NAME_TYPE_PLUS_VID_NO_PAD]	 = "VLAN_NAME_TYPE_PLUS_VID_NO_PAD",
     [VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD] = "VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD",
diff --git a/net/atm/lec.c b/net/atm/lec.c
index c463868c993b..8e723c2654cb 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -935,9 +935,9 @@ static int lecd_attach(struct atm_vcc *vcc, int arg)
 }
 
 #ifdef CONFIG_PROC_FS
-static char *lec_arp_get_status_string(unsigned char status)
+static const char *lec_arp_get_status_string(unsigned char status)
 {
-	static char *lec_arp_status_string[] = {
+	static const char *const lec_arp_status_string[] = {
 		"ESI_UNKNOWN       ",
 		"ESI_ARP_PENDING   ",
 		"ESI_VC_PENDING    ",
@@ -1121,7 +1121,8 @@ static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static int lec_seq_show(struct seq_file *seq, void *v)
 {
-	static char lec_banner[] = "Itf  MAC          ATM destination"
+	static const char lec_banner[] =
+	    "Itf  MAC          ATM destination"
 	    "                          Status            Flags "
 	    "VPI/VCI Recv VPI/VCI\n";
 
@@ -1505,7 +1506,7 @@ lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove)
 }
 
 #if DEBUG_ARP_TABLE
-static char *get_status_string(unsigned char st)
+static const char *get_status_string(unsigned char st)
 {
 	switch (st) {
 	case ESI_UNKNOWN:
diff --git a/net/atm/proc.c b/net/atm/proc.c
index 38de5ff61ecd..ab8419a324b6 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -151,8 +151,9 @@ static void *vcc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc)
 {
-	static const char *class_name[] = { "off","UBR","CBR","VBR","ABR" };
-	static const char *aal_name[] = {
+	static const char *const class_name[] =
+		{"off","UBR","CBR","VBR","ABR"};
+	static const char *const aal_name[] = {
 		"---",	"1",	"2",	"3/4",	/*  0- 3 */
 		"???",	"5",	"???",	"???",	/*  4- 7 */
 		"???",	"???",	"???",	"???",	/*  8-11 */
@@ -178,7 +179,7 @@ static void pvc_info(struct seq_file *seq, struct atm_vcc *vcc)
 
 static const char *vcc_state(struct atm_vcc *vcc)
 {
-	static const char *map[] = { ATM_VS2TXT_MAP };
+	static const char *const map[] = { ATM_VS2TXT_MAP };
 
 	return map[ATM_VF2VS(vcc->flags)];
 }
@@ -335,7 +336,7 @@ static const struct file_operations vcc_seq_fops = {
 
 static int svc_seq_show(struct seq_file *seq, void *v)
 {
-	static char atm_svc_banner[] =
+	static const char atm_svc_banner[] =
 		"Itf VPI VCI           State      Remote\n";
 
 	if (v == SEQ_START_TOKEN)
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 0250e0600150..8cfb5a849841 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -49,7 +49,7 @@ static struct net_proto_family *bt_proto[BT_MAX_PROTO];
 static DEFINE_RWLOCK(bt_proto_lock);
 
 static struct lock_class_key bt_lock_key[BT_MAX_PROTO];
-static const char *bt_key_strings[BT_MAX_PROTO] = {
+static const char *const bt_key_strings[BT_MAX_PROTO] = {
 	"sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP",
 	"sk_lock-AF_BLUETOOTH-BTPROTO_HCI",
 	"sk_lock-AF_BLUETOOTH-BTPROTO_SCO",
@@ -61,7 +61,7 @@ static const char *bt_key_strings[BT_MAX_PROTO] = {
 };
 
 static struct lock_class_key bt_slock_key[BT_MAX_PROTO];
-static const char *bt_slock_key_strings[BT_MAX_PROTO] = {
+static const char *const bt_slock_key_strings[BT_MAX_PROTO] = {
 	"slock-AF_BLUETOOTH-BTPROTO_L2CAP",
 	"slock-AF_BLUETOOTH-BTPROTO_HCI",
 	"slock-AF_BLUETOOTH-BTPROTO_SCO",
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 0660515f3992..fd3f8d6c0998 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -21,7 +21,7 @@
  */
 #define MESSAGE_AGE_INCR	((HZ < 256) ? 1 : (HZ/256))
 
-static const char *br_port_state_names[] = {
+static const char *const br_port_state_names[] = {
 	[BR_STATE_DISABLED] = "disabled",
 	[BR_STATE_LISTENING] = "listening",
 	[BR_STATE_LEARNING] = "learning",
diff --git a/net/core/dev.c b/net/core/dev.c
index 71347668c506..f01a9c41f112 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -272,7 +272,7 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
 	 ARPHRD_VOID, ARPHRD_NONE};
 
-static const char *netdev_lock_name[] =
+static const char *const netdev_lock_name[] =
 	{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 	 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 	 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 3994680c08b9..ad91e9e5f475 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -141,7 +141,7 @@ static ssize_t show_dormant(struct device *dev,
 	return -EINVAL;
 }
 
-static const char *operstates[] = {
+static const char *const operstates[] = {
 	"unknown",
 	"notpresent", /* currently unused */
 	"down",
diff --git a/net/core/sock.c b/net/core/sock.c
index bbb25be7ddfe..a324a80c163e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -142,7 +142,7 @@ static struct lock_class_key af_family_slock_keys[AF_MAX];
  * strings build-time, so that runtime initialization of socket
  * locks is fast):
  */
-static const char *af_family_key_strings[AF_MAX+1] = {
+static const char *const af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_UNSPEC", "sk_lock-AF_UNIX"     , "sk_lock-AF_INET"     ,
   "sk_lock-AF_AX25"  , "sk_lock-AF_IPX"      , "sk_lock-AF_APPLETALK",
   "sk_lock-AF_NETROM", "sk_lock-AF_BRIDGE"   , "sk_lock-AF_ATMPVC"   ,
@@ -158,7 +158,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
   "sk_lock-AF_IEEE802154",
   "sk_lock-AF_MAX"
 };
-static const char *af_family_slock_key_strings[AF_MAX+1] = {
+static const char *const af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_UNSPEC", "slock-AF_UNIX"     , "slock-AF_INET"     ,
   "slock-AF_AX25"  , "slock-AF_IPX"      , "slock-AF_APPLETALK",
   "slock-AF_NETROM", "slock-AF_BRIDGE"   , "slock-AF_ATMPVC"   ,
@@ -174,7 +174,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
   "slock-AF_IEEE802154",
   "slock-AF_MAX"
 };
-static const char *af_family_clock_key_strings[AF_MAX+1] = {
+static const char *const af_family_clock_key_strings[AF_MAX+1] = {
   "clock-AF_UNSPEC", "clock-AF_UNIX"     , "clock-AF_INET"     ,
   "clock-AF_AX25"  , "clock-AF_IPX"      , "clock-AF_APPLETALK",
   "clock-AF_NETROM", "clock-AF_BRIDGE"   , "clock-AF_ATMPVC"   ,
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index a27b7f4c19c5..f596ce149c3c 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -52,7 +52,7 @@ static int ccid3_debug;
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
 {
-	static char *ccid3_state_names[] = {
+	static const char *const ccid3_state_names[] = {
 	[TFRC_SSTATE_NO_SENT]  = "NO_SENT",
 	[TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
 	[TFRC_SSTATE_FBACK]    = "FBACK",
@@ -646,7 +646,7 @@ enum ccid3_fback_type {
 #ifdef CONFIG_IP_DCCP_CCID3_DEBUG
 static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
 {
-	static char *ccid3_rx_state_names[] = {
+	static const char *const ccid3_rx_state_names[] = {
 	[TFRC_RSTATE_NO_DATA] = "NO_DATA",
 	[TFRC_RSTATE_DATA]    = "DATA",
 	[TFRC_RSTATE_TERM]    = "TERM",
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index b04160a2eea5..972b8dc918d6 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -213,7 +213,7 @@ static int dccp_feat_default_value(u8 feat_num)
  */
 static const char *dccp_feat_fname(const u8 feat)
 {
-	static const char *feature_names[] = {
+	static const char *const feature_names[] = {
 		[DCCPF_RESERVED]	= "Reserved",
 		[DCCPF_CCID]		= "CCID",
 		[DCCPF_SHORT_SEQNOS]	= "Allow Short Seqnos",
@@ -236,8 +236,9 @@ static const char *dccp_feat_fname(const u8 feat)
 	return feature_names[feat];
 }
 
-static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING",
-					 "UNSTABLE", "STABLE" };
+static const char *const dccp_feat_sname[] = {
+	"DEFAULT", "INITIALISING", "CHANGING", "UNSTABLE", "STABLE",
+};
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 static const char *dccp_feat_oname(const u8 opt)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 94ca8eaace7d..37b3b4293ef4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL_GPL(dccp_done);
 
 const char *dccp_packet_name(const int type)
 {
-	static const char *dccp_packet_names[] = {
+	static const char *const dccp_packet_names[] = {
 		[DCCP_PKT_REQUEST]  = "REQUEST",
 		[DCCP_PKT_RESPONSE] = "RESPONSE",
 		[DCCP_PKT_DATA]	    = "DATA",
@@ -147,7 +147,7 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
 
 const char *dccp_state_name(const int state)
 {
-	static char *dccp_state_names[] = {
+	static const char *const dccp_state_names[] = {
 	[DCCP_OPEN]		= "OPEN",
 	[DCCP_REQUESTING]	= "REQUESTING",
 	[DCCP_PARTOPEN]		= "PARTOPEN",
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d58b49115386..fe3c846b99a6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2421,7 +2421,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
 	}
 }
 
-static const char *rtn_type_names[__RTN_MAX] = {
+static const char *const rtn_type_names[__RTN_MAX] = {
 	[RTN_UNSPEC] = "UNSPEC",
 	[RTN_UNICAST] = "UNICAST",
 	[RTN_LOCAL] = "LOCAL",
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 590ddefb7ffc..c9605c3ad91f 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -101,7 +101,7 @@ static struct snmp_mib snmp6_icmp6_list[] = {
 };
 
 /* RFC 4293 v6 ICMPMsgStatsTable; named items for RFC 2466 compatibility */
-static char *icmp6type2name[256] = {
+static const char *const icmp6type2name[256] = {
 	[ICMPV6_DEST_UNREACH] = "DestUnreachs",
 	[ICMPV6_PKT_TOOBIG] = "PktTooBigs",
 	[ICMPV6_TIME_EXCEED] = "TimeExcds",
@@ -144,7 +144,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
 	/* print by name -- deprecated items */
 	for (i = 0; i < ICMP6MSG_MIB_MAX; i++) {
 		int icmptype;
-		char *p;
+		const char *p;
 
 		icmptype = i & 0xff;
 		p = icmp6type2name[icmptype];
diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c
index c35b3ef5c2f0..d78554fedbac 100644
--- a/net/irda/ircomm/ircomm_event.c
+++ b/net/irda/ircomm/ircomm_event.c
@@ -49,7 +49,7 @@ static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event,
 static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event,
 			     struct sk_buff *skb, struct ircomm_info *info);
 
-char *ircomm_state[] = {
+const char *const ircomm_state[] = {
 	"IRCOMM_IDLE",
 	"IRCOMM_WAITI",
 	"IRCOMM_WAITR",
@@ -57,7 +57,7 @@ char *ircomm_state[] = {
 };
 
 #ifdef CONFIG_IRDA_DEBUG
-static char *ircomm_event[] = {
+static const char *const ircomm_event[] = {
 	"IRCOMM_CONNECT_REQUEST",
 	"IRCOMM_CONNECT_RESPONSE",
 	"IRCOMM_TTP_CONNECT_INDICATION",
diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c
index 9032a1d1190d..eafc010907c2 100644
--- a/net/irda/ircomm/ircomm_tty_attach.c
+++ b/net/irda/ircomm/ircomm_tty_attach.c
@@ -80,7 +80,7 @@ static int ircomm_tty_state_ready(struct ircomm_tty_cb *self,
 				  struct sk_buff *skb,
 				  struct ircomm_tty_info *info);
 
-char *ircomm_tty_state[] = {
+const char *const ircomm_tty_state[] = {
 	"IRCOMM_TTY_IDLE",
 	"IRCOMM_TTY_SEARCH",
 	"IRCOMM_TTY_QUERY_PARAMETERS",
@@ -91,7 +91,7 @@ char *ircomm_tty_state[] = {
 };
 
 #ifdef CONFIG_IRDA_DEBUG
-static char *ircomm_tty_event[] = {
+static const char *const ircomm_tty_event[] = {
 	"IRCOMM_TTY_ATTACH_CABLE",
 	"IRCOMM_TTY_DETACH_CABLE",
 	"IRCOMM_TTY_DATA_REQUEST",
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 4a105dc32dcd..294e34d3517c 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -44,7 +44,7 @@
 
 #ifdef CONFIG_IRDA_DEBUG
 /* FIXME: This one should go in irlmp.c */
-static const char *ias_charset_types[] = {
+static const char *const ias_charset_types[] = {
 	"CS_ASCII",
 	"CS_ISO_8859_1",
 	"CS_ISO_8859_2",
@@ -966,7 +966,7 @@ static void iriap_watchdog_timer_expired(void *data)
 
 #ifdef CONFIG_PROC_FS
 
-static const char *ias_value_types[] = {
+static const char *const ias_value_types[] = {
 	"IAS_MISSING",
 	"IAS_INTEGER",
 	"IAS_OCT_SEQ",
diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
index 774d73a76852..62116829b817 100644
--- a/net/irda/irlan/irlan_common.c
+++ b/net/irda/irlan/irlan_common.c
@@ -69,14 +69,14 @@ static int eth;   /* Use "eth" or "irlan" name for devices */
 static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */
 
 #ifdef CONFIG_PROC_FS
-static const char *irlan_access[] = {
+static const char *const irlan_access[] = {
 	"UNKNOWN",
 	"DIRECT",
 	"PEER",
 	"HOSTED"
 };
 
-static const char *irlan_media[] = {
+static const char *const irlan_media[] = {
 	"UNKNOWN",
 	"802.3",
 	"802.5"
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index e4965b764b9b..356e65b1dc42 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -63,7 +63,7 @@ static void irlap_init_qos_capabilities(struct irlap_cb *self,
 					struct qos_info *qos_user);
 
 #ifdef CONFIG_IRDA_DEBUG
-static char *lap_reasons[] = {
+static const char *const lap_reasons[] = {
 	"ERROR, NOT USED",
 	"LAP_DISC_INDICATION",
 	"LAP_NO_RESPONSE",
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 16c4ef0f5c1a..c5c51959e3ce 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -78,7 +78,7 @@ static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event,
 				   struct sk_buff *, struct irlap_info *);
 
 #ifdef CONFIG_IRDA_DEBUG
-static const char *irlap_event[] = {
+static const char *const irlap_event[] = {
 	"DISCOVERY_REQUEST",
 	"CONNECT_REQUEST",
 	"CONNECT_RESPONSE",
@@ -120,7 +120,7 @@ static const char *irlap_event[] = {
 };
 #endif	/* CONFIG_IRDA_DEBUG */
 
-const char *irlap_state[] = {
+const char *const irlap_state[] = {
 	"LAP_NDM",
 	"LAP_QUERY",
 	"LAP_REPLY",
diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c
index 78cce0cb073f..c1fb5db81042 100644
--- a/net/irda/irlmp_event.c
+++ b/net/irda/irlmp_event.c
@@ -33,13 +33,13 @@
 #include <net/irda/irlmp_frame.h>
 #include <net/irda/irlmp_event.h>
 
-const char *irlmp_state[] = {
+const char *const irlmp_state[] = {
 	"LAP_STANDBY",
 	"LAP_U_CONNECT",
 	"LAP_ACTIVE",
 };
 
-const char *irlsap_state[] = {
+const char *const irlsap_state[] = {
 	"LSAP_DISCONNECTED",
 	"LSAP_CONNECT",
 	"LSAP_CONNECT_PEND",
@@ -49,7 +49,7 @@ const char *irlsap_state[] = {
 };
 
 #ifdef CONFIG_IRDA_DEBUG
-static const char *irlmp_event[] = {
+static const char *const irlmp_event[] = {
 	"LM_CONNECT_REQUEST",
 	"LM_CONNECT_CONFIRM",
 	"LM_CONNECT_RESPONSE",
diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c
index f97be471fe2e..be47ac427f6b 100644
--- a/net/llc/llc_proc.c
+++ b/net/llc/llc_proc.c
@@ -143,7 +143,7 @@ out:
 	return 0;
 }
 
-static char *llc_conn_state_names[] = {
+static const char *const llc_conn_state_names[] = {
 	[LLC_CONN_STATE_ADM] =        "adm",
 	[LLC_CONN_STATE_SETUP] =      "setup",
 	[LLC_CONN_STATE_NORMAL] =     "normal",
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 85c8892e1e8b..3e7671674549 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -127,7 +127,8 @@ ip_vs_create_timeout_table(int *table, int size)
  *	Set timeout value for state specified by name
  */
 int
-ip_vs_set_state_timeout(int *table, int num, char **names, char *name, int to)
+ip_vs_set_state_timeout(int *table, int num, const char *const *names,
+			const char *name, int to)
 {
 	int i;
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 2278e141489e..91d28e073742 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -377,7 +377,7 @@ static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_LAST]		=	2*HZ,
 };
 
-static char * tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
+static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
 	[IP_VS_TCP_S_NONE]		=	"NONE",
 	[IP_VS_TCP_S_ESTABLISHED]	=	"ESTABLISHED",
 	[IP_VS_TCP_S_SYN_SENT]		=	"SYN_SENT",
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 33a05d3684d9..e7a6885e0167 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -472,7 +472,7 @@ static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_LAST]		=	2*HZ,
 };
 
-static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
+static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
 	[IP_VS_UDP_S_NORMAL]		=	"UDP",
 	[IP_VS_UDP_S_LAST]		=	"BUG!",
 };
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 02e3e3d50d4a..8d8488306fe4 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -39,7 +39,7 @@
 
 DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned;
 
-static char *rds_ib_stat_names[] = {
+static const char *const rds_ib_stat_names[] = {
 	"ib_connect_raced",
 	"ib_listen_closed_stale",
 	"ib_tx_cq_call",
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c
index ccc7e8f0bf0e..d33ea790484e 100644
--- a/net/rds/iw_stats.c
+++ b/net/rds/iw_stats.c
@@ -39,7 +39,7 @@
 
 DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned;
 
-static char *rds_iw_stat_names[] = {
+static const char *const rds_iw_stat_names[] = {
 	"iw_connect_raced",
 	"iw_listen_closed_stale",
 	"iw_tx_cq_call",
diff --git a/net/rds/rds.h b/net/rds/rds.h
index dbe111236783..290566c69d28 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -652,7 +652,8 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
 int __init rds_stats_init(void);
 void rds_stats_exit(void);
 void rds_stats_info_copy(struct rds_info_iterator *iter,
-			 uint64_t *values, char **names, size_t nr);
+			 uint64_t *values, const char *const *names,
+			 size_t nr);
 
 /* sysctl.c */
 int __init rds_sysctl_init(void);
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 637146893cf3..91d8c58b8335 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -40,7 +40,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
 
 /* :.,$s/unsigned long\>.*\<s_\(.*\);/"\1",/g */
 
-static char *rds_stat_names[] = {
+static const char *const rds_stat_names[] = {
 	"conn_reset",
 	"recv_drop_bad_checksum",
 	"recv_drop_old_seq",
@@ -77,7 +77,7 @@ static char *rds_stat_names[] = {
 };
 
 void rds_stats_info_copy(struct rds_info_iterator *iter,
-			 uint64_t *values, char **names, size_t nr)
+			 uint64_t *values, const char *const *names, size_t nr)
 {
 	struct rds_info_counter ctr;
 	size_t i;
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index 3ac1672e1070..c9f1f0a3a2ff 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -20,7 +20,7 @@
 
 static unsigned rxrpc_ack_defer = 1;
 
-static const char *rxrpc_acks[] = {
+static const char *const rxrpc_acks[] = {
 	"---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", "IDL",
 	"-?-"
 };
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 7ff548a30cfb..bf24fa697de2 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -52,7 +52,7 @@ int sctp_debug_flag = 1;	/* Initially enable DEBUG */
 #endif	/* SCTP_DEBUG */
 
 /* These are printable forms of Chunk ID's from section 3.1.  */
-static const char *sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
+static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = {
 	"DATA",
 	"INIT",
 	"INIT_ACK",
@@ -97,7 +97,7 @@ const char *sctp_cname(const sctp_subtype_t cid)
 }
 
 /* These are printable forms of the states.  */
-const char *sctp_state_tbl[SCTP_STATE_NUM_STATES] = {
+const char *const sctp_state_tbl[SCTP_STATE_NUM_STATES] = {
 	"STATE_EMPTY",
 	"STATE_CLOSED",
 	"STATE_COOKIE_WAIT",
@@ -110,7 +110,7 @@ const char *sctp_state_tbl[SCTP_STATE_NUM_STATES] = {
 };
 
 /* Events that could change the state of an association.  */
-const char *sctp_evttype_tbl[] = {
+const char *const sctp_evttype_tbl[] = {
 	"EVENT_T_unknown",
 	"EVENT_T_CHUNK",
 	"EVENT_T_TIMEOUT",
@@ -119,7 +119,7 @@ const char *sctp_evttype_tbl[] = {
 };
 
 /* Return value of a state function */
-const char *sctp_status_tbl[] = {
+const char *const sctp_status_tbl[] = {
 	"DISPOSITION_DISCARD",
 	"DISPOSITION_CONSUME",
 	"DISPOSITION_NOMEM",
@@ -132,7 +132,7 @@ const char *sctp_status_tbl[] = {
 };
 
 /* Printable forms of primitives */
-static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
+static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
 	"PRIMITIVE_ASSOCIATE",
 	"PRIMITIVE_SHUTDOWN",
 	"PRIMITIVE_ABORT",
@@ -149,7 +149,7 @@ const char *sctp_pname(const sctp_subtype_t id)
 	return "unknown_primitive";
 }
 
-static const char *sctp_other_tbl[] = {
+static const char *const sctp_other_tbl[] = {
 	"NO_PENDING_TSN",
 	"ICMP_PROTO_UNREACH",
 };
@@ -162,7 +162,7 @@ const char *sctp_oname(const sctp_subtype_t id)
 	return "unknown 'other' event";
 }
 
-static const char *sctp_timer_tbl[] = {
+static const char *const sctp_timer_tbl[] = {
 	"TIMEOUT_NONE",
 	"TIMEOUT_T1_COOKIE",
 	"TIMEOUT_T1_INIT",
-- 
cgit v1.2.3


From 49c794e94649020248e37b78db16cd25bad38b4f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 4 Aug 2009 07:28:28 +0000
Subject: net: implement a SO_PROTOCOL getsockoption

Similar to SO_TYPE returning the socket type, SO_PROTOCOL allows to
retrieve the protocol used with a given socket.

I am not quite sure why we have that-many copies of socket.h, and why
the values are not the same on all arches either, but for where hex
numbers dominate, I use 0x1029 for SO_PROTOCOL as that seems to be
the next free unused number across a bunch of operating systems, or
so Google results make me want to believe. SO_PROTOCOL for others
just uses the next free Linux number, 38.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h      | 1 +
 arch/arm/include/asm/socket.h        | 2 ++
 arch/avr32/include/asm/socket.h      | 2 ++
 arch/cris/include/asm/socket.h       | 2 ++
 arch/frv/include/asm/socket.h        | 2 ++
 arch/h8300/include/asm/socket.h      | 2 ++
 arch/ia64/include/asm/socket.h       | 2 ++
 arch/m32r/include/asm/socket.h       | 2 ++
 arch/m68k/include/asm/socket.h       | 2 ++
 arch/microblaze/include/asm/socket.h | 2 ++
 arch/mips/include/asm/socket.h       | 1 +
 arch/mn10300/include/asm/socket.h    | 2 ++
 arch/parisc/include/asm/socket.h     | 1 +
 arch/powerpc/include/asm/socket.h    | 2 ++
 arch/s390/include/asm/socket.h       | 2 ++
 arch/sparc/include/asm/socket.h      | 2 ++
 arch/x86/include/asm/socket.h        | 2 ++
 arch/xtensa/include/asm/socket.h     | 2 ++
 include/asm-generic/socket.h         | 2 ++
 net/core/sock.c                      | 5 +++++
 20 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 3641ec1452f4..2f8b4d377749 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -32,6 +32,7 @@
 #define	SO_RCVTIMEO	0x1012
 #define	SO_SNDTIMEO	0x1013
 #define SO_ACCEPTCONN	0x1014
+#define SO_PROTOCOL	0x1028
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 537de4e0ef50..7f47454ffbf3 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 04c860619700..6af2866a4f00 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index d5cf74005408..f3859fb0990c 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -59,6 +59,8 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 57c3d4054e8b..8dab3486ffa4 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -57,5 +57,7 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 602518a70a1a..ba770d09cd63 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 745421225ec6..091cd9d47d0f 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -66,4 +66,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index be7ed589af5c..d36f5928fb79 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index ca87f938b03f..060cb7ed024f 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/microblaze/include/asm/socket.h b/arch/microblaze/include/asm/socket.h
index 825936860314..96bf8bfa935e 100644
--- a/arch/microblaze/include/asm/socket.h
+++ b/arch/microblaze/include/asm/socket.h
@@ -66,4 +66,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_MICROBLAZE_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 2abca1780169..289ce5f5f2a3 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -42,6 +42,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_SNDTIMEO	0x1005	/* send timeout */
 #define SO_RCVTIMEO 	0x1006	/* receive timeout */
 #define SO_ACCEPTCONN	0x1009
+#define SO_PROTOCOL	0x1028	/* protocol type */
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index fb5daf438ec9..19d7cf709b77 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 885472bf7b78..a658b09df624 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -24,6 +24,7 @@
 #define SO_RCVTIMEO	0x1006
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
+#define SO_PROTOCOL	0x1028
 #define SO_PEERNAME	0x2000
 
 #define SO_NO_CHECK	0x400b
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 1e5cfad0e3f7..609049d7117e 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -64,4 +64,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index 02330c50241b..65baa9a83abc 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -65,4 +65,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 982a12f959f4..9cbbfafd0538 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -29,6 +29,8 @@
 #define SO_RCVBUFFORCE	0x100b
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
+#define SO_PROTOCOL	0x1028
+
 
 /* Linux specific, keep the same. */
 #define SO_NO_CHECK	0x000b
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index ca8bf2cd0ba9..1077d2535a32 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -57,4 +57,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index dd1a7a4a1cea..e47f172142f1 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -68,4 +68,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 5d79e409241c..7e24d186616e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -60,4 +60,6 @@
 #define SO_TIMESTAMPING		37
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
+#define SO_PROTOCOL		38
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index a324a80c163e..ebce661234ac 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -482,6 +482,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		sk->sk_reuse = valbool;
 		break;
 	case SO_TYPE:
+	case SO_PROTOCOL:
 	case SO_ERROR:
 		ret = -ENOPROTOOPT;
 		break;
@@ -764,6 +765,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_type;
 		break;
 
+	case SO_PROTOCOL:
+		v.val = sk->sk_protocol;
+		break;
+
 	case SO_ERROR:
 		v.val = -sock_error(sk);
 		if (v.val == 0)
-- 
cgit v1.2.3


From 0d6038ee76f2e06b79d0465807f67e86bf4025de Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 4 Aug 2009 07:28:29 +0000
Subject: net: implement a SO_DOMAIN getsockoption
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This sockopt goes in line with SO_TYPE and SO_PROTOCOL. It makes it
possible for userspace programs to pass around file descriptors — I
am referring to arguments-to-functions, but it may even work for the
fd passing over UNIX sockets — without needing to also pass the
auxiliary information (PF_INET6/IPPROTO_TCP).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/socket.h      | 1 +
 arch/arm/include/asm/socket.h        | 1 +
 arch/avr32/include/asm/socket.h      | 1 +
 arch/cris/include/asm/socket.h       | 1 +
 arch/frv/include/asm/socket.h        | 1 +
 arch/h8300/include/asm/socket.h      | 1 +
 arch/ia64/include/asm/socket.h       | 1 +
 arch/m32r/include/asm/socket.h       | 1 +
 arch/m68k/include/asm/socket.h       | 1 +
 arch/microblaze/include/asm/socket.h | 1 +
 arch/mips/include/asm/socket.h       | 1 +
 arch/mn10300/include/asm/socket.h    | 1 +
 arch/parisc/include/asm/socket.h     | 1 +
 arch/powerpc/include/asm/socket.h    | 1 +
 arch/s390/include/asm/socket.h       | 1 +
 arch/sparc/include/asm/socket.h      | 1 +
 arch/x86/include/asm/socket.h        | 1 +
 arch/xtensa/include/asm/socket.h     | 1 +
 include/asm-generic/socket.h         | 1 +
 net/core/sock.c                      | 5 +++++
 20 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 2f8b4d377749..26773e3246e2 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -33,6 +33,7 @@
 #define	SO_SNDTIMEO	0x1013
 #define SO_ACCEPTCONN	0x1014
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 7f47454ffbf3..92ac61d294fd 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index 6af2866a4f00..fe863f9794d5 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index f3859fb0990c..45ec49bdb7b1 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -60,6 +60,7 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index 8dab3486ffa4..2dea726095c2 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -58,6 +58,7 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index ba770d09cd63..1547f01c8e22 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 091cd9d47d0f..0b0d5ff062e5 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -67,5 +67,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index d36f5928fb79..3390a864f224 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index 060cb7ed024f..eee01cce921b 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/microblaze/include/asm/socket.h b/arch/microblaze/include/asm/socket.h
index 96bf8bfa935e..7361ae7cfcde 100644
--- a/arch/microblaze/include/asm/socket.h
+++ b/arch/microblaze/include/asm/socket.h
@@ -67,5 +67,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_MICROBLAZE_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 289ce5f5f2a3..ae05accd9fe4 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -43,6 +43,7 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 #define SO_RCVTIMEO 	0x1006	/* receive timeout */
 #define SO_ACCEPTCONN	0x1009
 #define SO_PROTOCOL	0x1028	/* protocol type */
+#define SO_DOMAIN	0x1029	/* domain/socket family */
 
 /* linux-specific, might as well be the same as on i386 */
 #define SO_NO_CHECK	11
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 19d7cf709b77..4df75af29d76 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index a658b09df624..960b1e5d8e16 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -25,6 +25,7 @@
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 #define SO_PEERNAME	0x2000
 
 #define SO_NO_CHECK	0x400b
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 609049d7117e..3ab8b3e6feb0 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -65,5 +65,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index 65baa9a83abc..e42df89a0b85 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -66,5 +66,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 9cbbfafd0538..3a5ae3d12088 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -30,6 +30,7 @@
 #define SO_ERROR	0x1007
 #define SO_TYPE		0x1008
 #define SO_PROTOCOL	0x1028
+#define SO_DOMAIN	0x1029
 
 
 /* Linux specific, keep the same. */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/asm/socket.h
index 1077d2535a32..b2a8c74f2d06 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/asm/socket.h
@@ -58,5 +58,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* _ASM_X86_SOCKET_H */
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index e47f172142f1..beb3a6bdb61d 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -69,5 +69,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 7e24d186616e..538991cef6f0 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -61,5 +61,6 @@
 #define SCM_TIMESTAMPING	SO_TIMESTAMPING
 
 #define SO_PROTOCOL		38
+#define SO_DOMAIN		39
 
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index ebce661234ac..3ac34ea6ec05 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -483,6 +483,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	case SO_TYPE:
 	case SO_PROTOCOL:
+	case SO_DOMAIN:
 	case SO_ERROR:
 		ret = -ENOPROTOOPT;
 		break;
@@ -769,6 +770,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_protocol;
 		break;
 
+	case SO_DOMAIN:
+		v.val = sk->sk_family;
+		break;
+
 	case SO_ERROR:
 		v.val = -sock_error(sk);
 		if (v.val == 0)
-- 
cgit v1.2.3


From 7c73875e7dda627040b12c19b01db634fa7f0fd1 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 31 Jul 2009 12:53:58 -0400
Subject: Capabilities: move cap_file_mmap to commoncap.c

Currently we duplicate the mmap_min_addr test in cap_file_mmap and in
security_file_mmap if !CONFIG_SECURITY.  This patch moves cap_file_mmap
into commoncap.c and then calls that function directly from
security_file_mmap ifndef CONFIG_SECURITY like all of the other capability
checks are done.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h |  7 ++++---
 security/capability.c    |  9 ---------
 security/commoncap.c     | 30 ++++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 145909165dbf..963a48fc3005 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -66,6 +66,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
+extern int cap_file_mmap(struct file *file, unsigned long reqprot,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long addr, unsigned long addr_only);
 extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5);
@@ -2197,9 +2200,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
-	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
-		return -EACCES;
-	return 0;
+	return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
 }
 
 static inline int security_file_mprotect(struct vm_area_struct *vma,
diff --git a/security/capability.c b/security/capability.c
index f218dd361647..ec0573054024 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -330,15 +330,6 @@ static int cap_file_ioctl(struct file *file, unsigned int command,
 	return 0;
 }
 
-static int cap_file_mmap(struct file *file, unsigned long reqprot,
-			 unsigned long prot, unsigned long flags,
-			 unsigned long addr, unsigned long addr_only)
-{
-	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
-		return -EACCES;
-	return 0;
-}
-
 static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			     unsigned long prot)
 {
diff --git a/security/commoncap.c b/security/commoncap.c
index aa97704564d4..3852e9432801 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -984,3 +984,33 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
+
+/*
+ * cap_file_mmap - check if able to map given addr
+ * @file: unused
+ * @reqprot: unused
+ * @prot: unused
+ * @flags: unused
+ * @addr: address attempting to be mapped
+ * @addr_only: unused
+ *
+ * If the process is attempting to map memory below mmap_min_addr they need
+ * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
+ * capability security module.  Returns 0 if this mapping should be allowed
+ * -EPERM if not.
+ */
+int cap_file_mmap(struct file *file, unsigned long reqprot,
+		  unsigned long prot, unsigned long flags,
+		  unsigned long addr, unsigned long addr_only)
+{
+	int ret = 0;
+
+	if (addr < mmap_min_addr) {
+		ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO,
+				  SECURITY_CAP_AUDIT);
+		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
+		if (ret == 0)
+			current->flags |= PF_SUPERPRIV;
+	}
+	return ret;
+}
-- 
cgit v1.2.3


From a2551df7ec568d87793d2eea4ca744e86318f205 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 31 Jul 2009 12:54:11 -0400
Subject: Security/SELinux: seperate lsm specific mmap_min_addr

Currently SELinux enforcement of controls on the ability to map low memory
is determined by the mmap_min_addr tunable.  This patch causes SELinux to
ignore the tunable and instead use a seperate Kconfig option specific to how
much space the LSM should protect.

The tunable will now only control the need for CAP_SYS_RAWIO and SELinux
permissions will always protect the amount of low memory designated by
CONFIG_LSM_MMAP_MIN_ADDR.

This allows users who need to disable the mmap_min_addr controls (usual reason
being they run WINE as a non-root user) to do so and still have SELinux
controls preventing confined domains (like a web server) from being able to
map some area of low memory.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mm.h       | 15 ---------------
 include/linux/security.h | 17 +++++++++++++++++
 kernel/sysctl.c          |  7 ++++---
 mm/Kconfig               |  6 +++---
 mm/mmap.c                |  3 ---
 mm/nommu.c               |  3 ---
 security/Kconfig         | 16 ++++++++++++++++
 security/Makefile        |  2 +-
 security/commoncap.c     |  2 +-
 security/min_addr.c      | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 security/selinux/hooks.c |  2 +-
 11 files changed, 92 insertions(+), 30 deletions(-)
 create mode 100644 security/min_addr.c

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ba3a7cb1eaa0..9a72cc78e6b8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -34,8 +34,6 @@ extern int sysctl_legacy_va_layout;
 #define sysctl_legacy_va_layout 0
 #endif
 
-extern unsigned long mmap_min_addr;
-
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -574,19 +572,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
 	set_page_section(page, pfn_to_section_nr(pfn));
 }
 
-/*
- * If a hint addr is less than mmap_min_addr change hint to be as
- * low as possible but still greater than mmap_min_addr
- */
-static inline unsigned long round_hint_to_min(unsigned long hint)
-{
-	hint &= PAGE_MASK;
-	if (((void *)hint != NULL) &&
-	    (hint < mmap_min_addr))
-		return PAGE_ALIGN(mmap_min_addr);
-	return hint;
-}
-
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
diff --git a/include/linux/security.h b/include/linux/security.h
index 963a48fc3005..7b431155e392 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -28,6 +28,7 @@
 #include <linux/resource.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
+#include <linux/mm.h> /* PAGE_ALIGN */
 #include <linux/msg.h>
 #include <linux/sched.h>
 #include <linux/key.h>
@@ -95,6 +96,7 @@ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
 extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
 /*
  * Values used in the task_security_ops calls
  */
@@ -147,6 +149,21 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 	opts->num_mnt_opts = 0;
 }
 
+/*
+ * If a hint addr is less than mmap_min_addr change hint to be as
+ * low as possible but still greater than mmap_min_addr
+ */
+static inline unsigned long round_hint_to_min(unsigned long hint)
+{
+	hint &= PAGE_MASK;
+	if (((void *)hint != NULL) &&
+	    (hint < mmap_min_addr))
+		return PAGE_ALIGN(mmap_min_addr);
+	return hint;
+}
+
+extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+				 void __user *buffer, size_t *lenp, loff_t *ppos);
 /**
  * struct security_operations - main security structure
  *
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 98e02328c67d..58be76017fd0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
+#include <linux/security.h>
 #include <linux/slow-work.h>
 #include <linux/perf_counter.h>
 
@@ -1306,10 +1307,10 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "mmap_min_addr",
-		.data		= &mmap_min_addr,
-		.maxlen         = sizeof(unsigned long),
+		.data		= &dac_mmap_min_addr,
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax,
+		.proc_handler	= &mmap_min_addr_handler,
 	},
 #ifdef CONFIG_NUMA
 	{
diff --git a/mm/Kconfig b/mm/Kconfig
index c948d4ca8bde..fe5f674d7a7d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR
 	  For most ia64, ppc64 and x86 users with lots of address space
 	  a value of 65536 is reasonable and should cause no problems.
 	  On arm and other archs it should not be higher than 32768.
-	  Programs which use vm86 functionality would either need additional
-	  permissions from either the LSM or the capabilities module or have
-	  this protection disabled.
+	  Programs which use vm86 functionality or have some need to map
+	  this low address space will need CAP_SYS_RAWIO or disable this
+	  protection by setting the value to 0.
 
 	  This value can be changed after boot using the
 	  /proc/sys/vm/mmap_min_addr tunable.
diff --git a/mm/mmap.c b/mm/mmap.c
index 34579b23ebd5..8101de490c73 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index 53cab10fece4..28754c40be98 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 int heap_stack_gap = 0;
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-
 atomic_long_t mmap_pages_allocated;
 
 EXPORT_SYMBOL(mem_map);
diff --git a/security/Kconfig b/security/Kconfig
index d23c839038f0..9c60c346a91d 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -113,6 +113,22 @@ config SECURITY_ROOTPLUG
 
 	  If you are unsure how to answer this question, answer N.
 
+config LSM_MMAP_MIN_ADDR
+	int "Low address space for LSM to from user allocation"
+	depends on SECURITY && SECURITY_SELINUX
+	default 65535
+	help
+	  This is the portion of low virtual memory which should be protected
+	  from userspace allocation.  Keeping a user from writing to low pages
+	  can help reduce the impact of kernel NULL pointer bugs.
+
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality or have some need to map
+	  this low address space will need the permission specific to the
+	  systems running LSM.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
diff --git a/security/Makefile b/security/Makefile
index c67557cdaa85..b56e7f9ecbc2 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,7 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o
+obj-y		+= commoncap.o min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
diff --git a/security/commoncap.c b/security/commoncap.c
index 3852e9432801..fe30751a6cd9 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1005,7 +1005,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
 {
 	int ret = 0;
 
-	if (addr < mmap_min_addr) {
+	if (addr < dac_mmap_min_addr) {
 		ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO,
 				  SECURITY_CAP_AUDIT);
 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
diff --git a/security/min_addr.c b/security/min_addr.c
new file mode 100644
index 000000000000..14cc7b3b8d03
--- /dev/null
+++ b/security/min_addr.c
@@ -0,0 +1,49 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/security.h>
+#include <linux/sysctl.h>
+
+/* amount of vm to protect from userspace access by both DAC and the LSM*/
+unsigned long mmap_min_addr;
+/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */
+unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */
+
+/*
+ * Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR)
+ */
+static void update_mmap_min_addr(void)
+{
+#ifdef CONFIG_LSM_MMAP_MIN_ADDR
+	if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR)
+		mmap_min_addr = dac_mmap_min_addr;
+	else
+		mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR;
+#else
+	mmap_min_addr = dac_mmap_min_addr;
+#endif
+}
+
+/*
+ * sysctl handler which just sets dac_mmap_min_addr = the new value and then
+ * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
+ */
+int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+	update_mmap_min_addr();
+
+	return ret;
+}
+
+int __init init_mmap_min_addr(void)
+{
+	update_mmap_min_addr();
+
+	return 0;
+}
+pure_initcall(init_mmap_min_addr);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 8a78f584f46e..5dee88362e71 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3040,7 +3040,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
 	 * at bad behaviour/exploit that we always want to get the AVC, even
 	 * if DAC would have also denied the operation.
 	 */
-	if (addr < mmap_min_addr) {
+	if (addr < CONFIG_LSM_MMAP_MIN_ADDR) {
 		rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
 				  MEMPROTECT__MMAP_ZERO, NULL);
 		if (rc)
-- 
cgit v1.2.3


From af6af30c0fcd77e621638e53ef8b176bca8bd3b4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 5 Aug 2009 20:41:04 +0200
Subject: ftrace: Fix perf-tracepoint OOPS

Not all tracepoints are created equal, in specific the ftrace
tracepoints are created with TRACE_EVENT_FORMAT() which does
not generate the needed bits to tie them into perf counters.

For those events, don't create the 'id' file and fail
->profile_enable when their ID is specified through other
means.

Reported-by: Chris Mason <chris.mason@oracle.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <1249497664.5890.4.camel@laptop>
[ v2: fix build error in the !CONFIG_EVENT_PROFILE case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       | 8 +++-----
 kernel/trace/trace_event_profile.c | 2 +-
 kernel/trace/trace_events.c        | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 5c093ffc655b..d7cd193c2277 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -119,11 +119,9 @@ struct ftrace_event_call {
 	void			*filter;
 	void			*mod;
 
-#ifdef CONFIG_EVENT_PROFILE
-	atomic_t	profile_count;
-	int		(*profile_enable)(struct ftrace_event_call *);
-	void		(*profile_disable)(struct ftrace_event_call *);
-#endif
+	atomic_t		profile_count;
+	int			(*profile_enable)(struct ftrace_event_call *);
+	void			(*profile_disable)(struct ftrace_event_call *);
 };
 
 #define MAX_FILTER_PRED		32
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 5b5895afecfe..11ba5bb4ed0a 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id)
 
 	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
-		if (event->id == event_id) {
+		if (event->id == event_id && event->profile_enable) {
 			ret = event->profile_enable(event);
 			break;
 		}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 23d2972b22d6..e75276a49cf5 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 		entry = trace_create_file("enable", 0644, call->dir, call,
 					  enable);
 
-	if (call->id)
+	if (call->id && call->profile_enable)
 		entry = trace_create_file("id", 0444, call->dir, call,
 					  id);
 
-- 
cgit v1.2.3


From d82f1c35348cebe2fb2d4a4d31ce0ab0769e3d93 Mon Sep 17 00:00:00 2001
From: Eric Miao <eric.y.miao@gmail.com>
Date: Wed, 5 Aug 2009 01:24:41 -0700
Subject: Input: matrix_keypad - make matrix keymap size dynamic

Remove assumption on the shift and size of rows/columns form
matrix_keypad driver.

Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/matrix_keypad.c | 18 +++++++++---------
 include/linux/input/matrix_keypad.h    | 13 +++++++------
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index e9b2e7cb05be..541b981ff075 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -27,6 +27,7 @@ struct matrix_keypad {
 	const struct matrix_keypad_platform_data *pdata;
 	struct input_dev *input_dev;
 	unsigned short *keycodes;
+	unsigned int row_shift;
 
 	uint32_t last_key_state[MATRIX_MAX_COLS];
 	struct delayed_work work;
@@ -136,7 +137,7 @@ static void matrix_keypad_scan(struct work_struct *work)
 			if ((bits_changed & (1 << row)) == 0)
 				continue;
 
-			code = (row << 4) + col;
+			code = MATRIX_SCAN_CODE(row, col, keypad->row_shift);
 			input_event(input_dev, EV_MSC, MSC_SCAN, code);
 			input_report_key(input_dev,
 					 keypad->keycodes[code],
@@ -317,6 +318,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 	struct matrix_keypad *keypad;
 	struct input_dev *input_dev;
 	unsigned short *keycodes;
+	unsigned int row_shift;
 	int i;
 	int err;
 
@@ -332,14 +334,11 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (!keymap_data->max_keymap_size) {
-		dev_err(&pdev->dev, "invalid keymap data supplied\n");
-		return -EINVAL;
-	}
+	row_shift = get_count_order(pdata->num_col_gpios);
 
 	keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL);
-	keycodes = kzalloc(keymap_data->max_keymap_size *
-				sizeof(keypad->keycodes),
+	keycodes = kzalloc((pdata->num_row_gpios << row_shift) *
+				sizeof(*keycodes),
 			   GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!keypad || !keycodes || !input_dev) {
@@ -350,6 +349,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 	keypad->input_dev = input_dev;
 	keypad->pdata = pdata;
 	keypad->keycodes = keycodes;
+	keypad->row_shift = row_shift;
 	keypad->stopped = true;
 	INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan);
 	spin_lock_init(&keypad->lock);
@@ -363,7 +363,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 
 	input_dev->keycode	= keycodes;
 	input_dev->keycodesize	= sizeof(*keycodes);
-	input_dev->keycodemax	= keymap_data->max_keymap_size;
+	input_dev->keycodemax	= pdata->num_row_gpios << keypad->row_shift;
 
 	for (i = 0; i < keymap_data->keymap_size; i++) {
 		unsigned int key = keymap_data->keymap[i];
@@ -371,7 +371,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 		unsigned int col = KEY_COL(key);
 		unsigned short code = KEY_VAL(key);
 
-		keycodes[(row << 4) + col] = code;
+		keycodes[MATRIX_SCAN_CODE(row, col, row_shift)] = code;
 		__set_bit(code, input_dev->keybit);
 	}
 	__clear_bit(KEY_RESERVED, input_dev->keybit);
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index 7964516c6954..15d5903af2dd 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -15,12 +15,13 @@
 #define KEY_COL(k)		(((k) >> 16) & 0xff)
 #define KEY_VAL(k)		((k) & 0xffff)
 
+#define MATRIX_SCAN_CODE(row, col, row_shift)	(((row) << (row_shift)) + (col))
+
 /**
  * struct matrix_keymap_data - keymap for matrix keyboards
  * @keymap: pointer to array of uint32 values encoded with KEY() macro
  *	representing keymap
  * @keymap_size: number of entries (initialized) in this keymap
- * @max_keymap_size: maximum size of keymap supported by the device
  *
  * This structure is supposed to be used by platform code to supply
  * keymaps to drivers that implement matrix-like keypads/keyboards.
@@ -28,14 +29,13 @@
 struct matrix_keymap_data {
 	const uint32_t *keymap;
 	unsigned int	keymap_size;
-	unsigned int	max_keymap_size;
 };
 
 /**
  * struct matrix_keypad_platform_data - platform-dependent keypad data
  * @keymap_data: pointer to &matrix_keymap_data
- * @row_gpios: array of gpio numbers reporesenting rows
- * @col_gpios: array of gpio numbers reporesenting colums
+ * @row_gpios: pointer to array of gpio numbers representing rows
+ * @col_gpios: pointer to array of gpio numbers reporesenting colums
  * @num_row_gpios: actual number of row gpios used by device
  * @num_col_gpios: actual number of col gpios used by device
  * @col_scan_delay_us: delay, measured in microseconds, that is
@@ -48,8 +48,9 @@ struct matrix_keymap_data {
 struct matrix_keypad_platform_data {
 	const struct matrix_keymap_data *keymap_data;
 
-	unsigned int	row_gpios[MATRIX_MAX_ROWS];
-	unsigned int	col_gpios[MATRIX_MAX_COLS];
+	const unsigned int *row_gpios;
+	const unsigned int *col_gpios;
+
 	unsigned int	num_row_gpios;
 	unsigned int	num_col_gpios;
 
-- 
cgit v1.2.3


From ace1366369841c9c3a9788f79baa4d73f1c53107 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 6 Aug 2009 15:35:20 +1000
Subject: crypto: cryptd - Add support to access underlaying shash

cryptd_alloc_ahash() will allocate a cryptd-ed ahash for specified
algorithm name. The new allocated one is guaranteed to be cryptd-ed
ahash, so the shash underlying can be gotten via cryptd_ahash_child().

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cryptd.c         | 35 +++++++++++++++++++++++++++++++++++
 include/crypto/cryptd.h | 17 +++++++++++++++++
 2 files changed, 52 insertions(+)

(limited to 'include')

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 2eb705822f2b..35335825a4ef 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -682,6 +682,41 @@ void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm)
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ablkcipher);
 
+struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
+					u32 type, u32 mask)
+{
+	char cryptd_alg_name[CRYPTO_MAX_ALG_NAME];
+	struct crypto_ahash *tfm;
+
+	if (snprintf(cryptd_alg_name, CRYPTO_MAX_ALG_NAME,
+		     "cryptd(%s)", alg_name) >= CRYPTO_MAX_ALG_NAME)
+		return ERR_PTR(-EINVAL);
+	tfm = crypto_alloc_ahash(cryptd_alg_name, type, mask);
+	if (IS_ERR(tfm))
+		return ERR_CAST(tfm);
+	if (tfm->base.__crt_alg->cra_module != THIS_MODULE) {
+		crypto_free_ahash(tfm);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return __cryptd_ahash_cast(tfm);
+}
+EXPORT_SYMBOL_GPL(cryptd_alloc_ahash);
+
+struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm)
+{
+	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
+
+	return ctx->child;
+}
+EXPORT_SYMBOL_GPL(cryptd_ahash_child);
+
+void cryptd_free_ahash(struct cryptd_ahash *tfm)
+{
+	crypto_free_ahash(&tfm->base);
+}
+EXPORT_SYMBOL_GPL(cryptd_free_ahash);
+
 static int __init cryptd_init(void)
 {
 	int err;
diff --git a/include/crypto/cryptd.h b/include/crypto/cryptd.h
index 55fa7bbdbc71..2f65a6e8ea4d 100644
--- a/include/crypto/cryptd.h
+++ b/include/crypto/cryptd.h
@@ -7,6 +7,7 @@
 
 #include <linux/crypto.h>
 #include <linux/kernel.h>
+#include <crypto/hash.h>
 
 struct cryptd_ablkcipher {
 	struct crypto_ablkcipher base;
@@ -24,4 +25,20 @@ struct cryptd_ablkcipher *cryptd_alloc_ablkcipher(const char *alg_name,
 struct crypto_blkcipher *cryptd_ablkcipher_child(struct cryptd_ablkcipher *tfm);
 void cryptd_free_ablkcipher(struct cryptd_ablkcipher *tfm);
 
+struct cryptd_ahash {
+	struct crypto_ahash base;
+};
+
+static inline struct cryptd_ahash *__cryptd_ahash_cast(
+	struct crypto_ahash *tfm)
+{
+	return (struct cryptd_ahash *)tfm;
+}
+
+/* alg_name should be algorithm to be cryptd-ed */
+struct cryptd_ahash *cryptd_alloc_ahash(const char *alg_name,
+					u32 type, u32 mask);
+struct crypto_shash *cryptd_ahash_child(struct cryptd_ahash *tfm);
+void cryptd_free_ahash(struct cryptd_ahash *tfm);
+
 #endif
-- 
cgit v1.2.3


From bbff2e433e80fae72c8d00d482927d52ec19ba33 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Thu, 6 Aug 2009 11:36:25 +0300
Subject: slab: remove duplicate kmem_cache_init_late() declarations

kmem_cache_init_late() has been declared in slab.h

CC: Nick Piggin <npiggin@suse.de>
CC: Matt Mackall <mpm@selenic.com>
CC: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slob_def.h | 5 -----
 include/linux/slub_def.h | 2 --
 mm/slob.c                | 5 +++++
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index bb5368df4be8..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -34,9 +34,4 @@ static __always_inline void *__kmalloc(size_t size, gfp_t flags)
 	return kmalloc(size, flags);
 }
 
-static inline void kmem_cache_init_late(void)
-{
-	/* Nothing to do */
-}
-
 #endif /* __LINUX_SLOB_DEF_H */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c1c862b1d01a..76c831693616 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -304,6 +304,4 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 }
 #endif
 
-void __init kmem_cache_init_late(void);
-
 #endif /* _LINUX_SLUB_DEF_H */
diff --git a/mm/slob.c b/mm/slob.c
index 9641da3d5e58..837ebd64cc34 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -692,3 +692,8 @@ void __init kmem_cache_init(void)
 {
 	slob_ready = 1;
 }
+
+void __init kmem_cache_init_late(void)
+{
+	/* Nothing to do */
+}
-- 
cgit v1.2.3


From bbd8a0d3a3b65d341437f8b99c828fa5cc29c739 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Thu, 6 Aug 2009 01:44:21 +0000
Subject: net: Avoid enqueuing skb for default qdiscs

dev_queue_xmit enqueue's a skb and calls qdisc_run which
dequeue's the skb and xmits it. In most cases, the skb that
is enqueue'd is the same one that is dequeue'd (unless the
queue gets stopped or multiple cpu's write to the same queue
and ends in a race with qdisc_run). For default qdiscs, we
can remove the redundant enqueue/dequeue and simply xmit the
skb since the default qdisc is work-conserving.

The patch uses a new flag - TCQ_F_CAN_BYPASS to identify the
default fast queue. The controversial part of the patch is
incrementing qlen when a skb is requeued - this is to avoid
checks like the second line below:

+  } else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
>>         !q->gso_skb &&
+          !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {

Results of a 2 hour testing for multiple netperf sessions (1,
2, 4, 8, 12 sessions on a 4 cpu system-X). The BW numbers are
aggregate Mb/s across iterations tested with this version on
System-X boxes with Chelsio 10gbps cards:

----------------------------------
Size |  ORG BW          NEW BW   |
----------------------------------
128K |  156964          159381   |
256K |  158650          162042   |
----------------------------------

Changes from ver1:

1. Move sch_direct_xmit declaration from sch_generic.h to
   pkt_sched.h
2. Update qdisc basic statistics for direct xmit path.
3. Set qlen to zero in qdisc_reset.
4. Changed some function names to more meaningful ones.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  3 ++
 include/net/sch_generic.h | 15 +++++++-
 net/core/dev.c            | 48 +++++++++++++++++-------
 net/sched/sch_generic.c   | 93 +++++++++++++++++++++++++++++------------------
 4 files changed, 108 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 82a3191375f5..f911ec7598ef 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -87,6 +87,9 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 extern void qdisc_put_stab(struct qdisc_size_table *tab);
 extern void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc);
+extern int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+			   struct net_device *dev, struct netdev_queue *txq,
+			   spinlock_t *root_lock);
 
 extern void __qdisc_run(struct Qdisc *q);
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 964ffa0d8815..84b3fc2aef0f 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -45,6 +45,7 @@ struct Qdisc
 #define TCQ_F_BUILTIN		1
 #define TCQ_F_THROTTLED		2
 #define TCQ_F_INGRESS		4
+#define TCQ_F_CAN_BYPASS	8
 #define TCQ_F_WARN_NONWC	(1 << 16)
 	int			padded;
 	struct Qdisc_ops	*ops;
@@ -182,6 +183,11 @@ struct qdisc_skb_cb {
 	char			data[];
 };
 
+static inline int qdisc_qlen(struct Qdisc *q)
+{
+	return q->q.qlen;
+}
+
 static inline struct qdisc_skb_cb *qdisc_skb_cb(struct sk_buff *skb)
 {
 	return (struct qdisc_skb_cb *)skb->cb;
@@ -387,13 +393,18 @@ static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch)
 	return qdisc_enqueue(skb, sch) & NET_XMIT_MASK;
 }
 
+static inline void __qdisc_update_bstats(struct Qdisc *sch, unsigned int len)
+{
+	sch->bstats.bytes += len;
+	sch->bstats.packets++;
+}
+
 static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch,
 				       struct sk_buff_head *list)
 {
 	__skb_queue_tail(list, skb);
 	sch->qstats.backlog += qdisc_pkt_len(skb);
-	sch->bstats.bytes += qdisc_pkt_len(skb);
-	sch->bstats.packets++;
+	__qdisc_update_bstats(sch, qdisc_pkt_len(skb));
 
 	return NET_XMIT_SUCCESS;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index f01a9c41f112..a0bc087616a4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1786,6 +1786,40 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	return netdev_get_tx_queue(dev, queue_index);
 }
 
+static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
+				 struct net_device *dev,
+				 struct netdev_queue *txq)
+{
+	spinlock_t *root_lock = qdisc_lock(q);
+	int rc;
+
+	spin_lock(root_lock);
+	if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+		kfree_skb(skb);
+		rc = NET_XMIT_DROP;
+	} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
+		   !test_and_set_bit(__QDISC_STATE_RUNNING, &q->state)) {
+		/*
+		 * This is a work-conserving queue; there are no old skbs
+		 * waiting to be sent out; and the qdisc is not running -
+		 * xmit the skb directly.
+		 */
+		__qdisc_update_bstats(q, skb->len);
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
+			__qdisc_run(q);
+		else
+			clear_bit(__QDISC_STATE_RUNNING, &q->state);
+
+		rc = NET_XMIT_SUCCESS;
+	} else {
+		rc = qdisc_enqueue_root(skb, q);
+		qdisc_run(q);
+	}
+	spin_unlock(root_lock);
+
+	return rc;
+}
+
 /**
  *	dev_queue_xmit - transmit a buffer
  *	@skb: buffer to transmit
@@ -1859,19 +1893,7 @@ gso:
 	skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
 	if (q->enqueue) {
-		spinlock_t *root_lock = qdisc_lock(q);
-
-		spin_lock(root_lock);
-
-		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
-			kfree_skb(skb);
-			rc = NET_XMIT_DROP;
-		} else {
-			rc = qdisc_enqueue_root(skb, q);
-			qdisc_run(q);
-		}
-		spin_unlock(root_lock);
-
+		rc = __dev_xmit_skb(skb, q, dev, txq);
 		goto out;
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 27d03816ec3e..693df7ae33d8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -37,15 +37,11 @@
  * - updates to tree and tree walking are only done under the rtnl mutex.
  */
 
-static inline int qdisc_qlen(struct Qdisc *q)
-{
-	return q->q.qlen;
-}
-
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
 	q->gso_skb = skb;
 	q->qstats.requeues++;
+	q->q.qlen++;	/* it's still part of the queue */
 	__netif_schedule(q);
 
 	return 0;
@@ -61,9 +57,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+		if (!netif_tx_queue_stopped(txq) &&
+		    !netif_tx_queue_frozen(txq)) {
 			q->gso_skb = NULL;
-		else
+			q->q.qlen--;
+		} else
 			skb = NULL;
 	} else {
 		skb = q->dequeue(q);
@@ -103,44 +101,23 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 }
 
 /*
- * NOTE: Called under qdisc_lock(q) with locally disabled BH.
- *
- * __QDISC_STATE_RUNNING guarantees only one CPU can process
- * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
- * this queue.
- *
- *  netif_tx_lock serializes accesses to device driver.
- *
- *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
- *  if one is grabbed, another must be free.
- *
- * Note, that this procedure can be called by a watchdog timer
+ * Transmit one skb, and handle the return status as required. Holding the
+ * __QDISC_STATE_RUNNING bit guarantees that only one CPU can execute this
+ * function.
  *
  * Returns to the caller:
  *				0  - queue is empty or throttled.
  *				>0 - queue is not empty.
- *
  */
-static inline int qdisc_restart(struct Qdisc *q)
+int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
+		    struct net_device *dev, struct netdev_queue *txq,
+		    spinlock_t *root_lock)
 {
-	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
-	struct net_device *dev;
-	spinlock_t *root_lock;
-	struct sk_buff *skb;
-
-	/* Dequeue packet */
-	if (unlikely((skb = dequeue_skb(q)) == NULL))
-		return 0;
-
-	root_lock = qdisc_lock(q);
 
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	dev = qdisc_dev(q);
-	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_tx_queue_stopped(txq) &&
 	    !netif_tx_queue_frozen(txq))
@@ -177,6 +154,44 @@ static inline int qdisc_restart(struct Qdisc *q)
 	return ret;
 }
 
+/*
+ * NOTE: Called under qdisc_lock(q) with locally disabled BH.
+ *
+ * __QDISC_STATE_RUNNING guarantees only one CPU can process
+ * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
+ * this queue.
+ *
+ *  netif_tx_lock serializes accesses to device driver.
+ *
+ *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
+ *  if one is grabbed, another must be free.
+ *
+ * Note, that this procedure can be called by a watchdog timer
+ *
+ * Returns to the caller:
+ *				0  - queue is empty or throttled.
+ *				>0 - queue is not empty.
+ *
+ */
+static inline int qdisc_restart(struct Qdisc *q)
+{
+	struct netdev_queue *txq;
+	struct net_device *dev;
+	spinlock_t *root_lock;
+	struct sk_buff *skb;
+
+	/* Dequeue packet */
+	skb = dequeue_skb(q);
+	if (unlikely(!skb))
+		return 0;
+
+	root_lock = qdisc_lock(q);
+	dev = qdisc_dev(q);
+	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+
+	return sch_direct_xmit(skb, q, dev, txq, root_lock);
+}
+
 void __qdisc_run(struct Qdisc *q)
 {
 	unsigned long start_time = jiffies;
@@ -547,8 +562,11 @@ void qdisc_reset(struct Qdisc *qdisc)
 	if (ops->reset)
 		ops->reset(qdisc);
 
-	kfree_skb(qdisc->gso_skb);
-	qdisc->gso_skb = NULL;
+	if (qdisc->gso_skb) {
+		kfree_skb(qdisc->gso_skb);
+		qdisc->gso_skb = NULL;
+		qdisc->q.qlen = 0;
+	}
 }
 EXPORT_SYMBOL(qdisc_reset);
 
@@ -605,6 +623,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
 			printk(KERN_INFO "%s: activation failed\n", dev->name);
 			return;
 		}
+
+		/* Can by-pass the queue discipline for default qdisc */
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
 	} else {
 		qdisc =  &noqueue_qdisc;
 	}
-- 
cgit v1.2.3


From 54e346215e4fe2ca8c94c54e546cc61902060510 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 7 Aug 2009 14:38:25 -0300
Subject: vfs: fix inode_init_always calling convention

Currently inode_init_always calls into ->destroy_inode if the additional
initialization fails.  That's not only counter-intuitive because
inode_init_always did not allocate the inode structure, but in case of
XFS it's actively harmful as ->destroy_inode might delete the inode from
a radix-tree that has never been added.  This in turn might end up
deleting the inode for the same inum that has been instanciated by
another process and cause lots of cause subtile problems.

Also in the case of re-initializing a reclaimable inode in XFS it would
free an inode we still want to keep alive.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/inode.c         | 30 +++++++++++++++++-------------
 fs/xfs/xfs_iget.c  | 17 +++++------------
 include/linux/fs.h |  2 +-
 3 files changed, 23 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index 901bad1e5f12..af2c05235cc8 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode)
  * These are initializations that need to be done on every inode
  * allocation as the fields are not initialised by slab allocation.
  */
-struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
+int inode_init_always(struct super_block *sb, struct inode *inode)
 {
 	static const struct address_space_operations empty_aops;
 	static struct inode_operations empty_iops;
 	static const struct file_operations empty_fops;
-
 	struct address_space *const mapping = &inode->i_data;
 
 	inode->i_sb = sb;
@@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->dirtied_when = 0;
 
 	if (security_inode_alloc(inode))
-		goto out_free_inode;
+		goto out;
 
 	/* allocate and initialize an i_integrity */
 	if (ima_inode_alloc(inode))
@@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_fsnotify_mask = 0;
 #endif
 
-	return inode;
+	return 0;
 
 out_free_security:
 	security_inode_free(inode);
-out_free_inode:
-	if (inode->i_sb->s_op->destroy_inode)
-		inode->i_sb->s_op->destroy_inode(inode);
-	else
-		kmem_cache_free(inode_cachep, (inode));
-	return NULL;
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(inode_init_always);
 
@@ -220,9 +215,18 @@ static struct inode *alloc_inode(struct super_block *sb)
 	else
 		inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL);
 
-	if (inode)
-		return inode_init_always(sb, inode);
-	return NULL;
+	if (!inode)
+		return NULL;
+
+	if (unlikely(inode_init_always(sb, inode))) {
+		if (inode->i_sb->s_op->destroy_inode)
+			inode->i_sb->s_op->destroy_inode(inode);
+		else
+			kmem_cache_free(inode_cachep, inode);
+		return NULL;
+	}
+
+	return inode;
 }
 
 void destroy_inode(struct inode *inode)
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 5fcec6f020a7..719c85b155f4 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -64,6 +64,10 @@ xfs_inode_alloc(
 	ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
 	if (!ip)
 		return NULL;
+	if (inode_init_always(mp->m_super, VFS_I(ip))) {
+		kmem_zone_free(xfs_inode_zone, ip);
+		return NULL;
+	}
 
 	ASSERT(atomic_read(&ip->i_iocount) == 0);
 	ASSERT(atomic_read(&ip->i_pincount) == 0);
@@ -105,17 +109,6 @@ xfs_inode_alloc(
 #ifdef XFS_DIR2_TRACE
 	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
 #endif
-	/*
-	* Now initialise the VFS inode. We do this after the xfs_inode
-	* initialisation as internal failures will result in ->destroy_inode
-	* being called and that will pass down through the reclaim path and
-	* free the XFS inode. This path requires the XFS inode to already be
-	* initialised. Hence if this call fails, the xfs_inode has already
-	* been freed and we should not reference it at all in the error
-	* handling.
-	*/
-	if (!inode_init_always(mp->m_super, VFS_I(ip)))
-		return NULL;
 
 	/* prevent anyone from using this yet */
 	VFS_I(ip)->i_state = I_NEW|I_LOCK;
@@ -167,7 +160,7 @@ xfs_iget_cache_hit(
 		 * errors cleanly, then tag it so it can be set up correctly
 		 * later.
 		 */
-		if (!inode_init_always(mp->m_super, VFS_I(ip))) {
+		if (inode_init_always(mp->m_super, VFS_I(ip))) {
 			error = ENOMEM;
 			goto out_error;
 		}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a36ffa5a77a4..0c3b5e58a986 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2137,7 +2137,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
 
 extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
 
-extern struct inode * inode_init_always(struct super_block *, struct inode *);
+extern int inode_init_always(struct super_block *, struct inode *);
 extern void inode_init_once(struct inode *);
 extern void inode_add_to_lists(struct super_block *, struct inode *);
 extern void iput(struct inode *);
-- 
cgit v1.2.3


From 2e00c97e2c1d2ffc9e26252ca26b237678b0b772 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 7 Aug 2009 14:38:29 -0300
Subject: vfs: add __destroy_inode

When we want to tear down an inode that lost the add to the cache race
in XFS we must not call into ->destroy_inode because that would delete
the inode that won the race from the inode cache radix tree.

This patch provides the __destroy_inode helper needed to fix this,
the actual fix will be in th next patch.  As XFS was the only reason
destroy_inode was exported we shift the export to the new __destroy_inode.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Sandeen <sandeen@sandeen.net>
---
 fs/inode.c         | 10 +++++++---
 include/linux/fs.h |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index af2c05235cc8..ae7b67e48661 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -229,7 +229,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 	return inode;
 }
 
-void destroy_inode(struct inode *inode)
+void __destroy_inode(struct inode *inode)
 {
 	BUG_ON(inode_has_buffers(inode));
 	ima_inode_free(inode);
@@ -241,13 +241,17 @@ void destroy_inode(struct inode *inode)
 	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
 		posix_acl_release(inode->i_default_acl);
 #endif
+}
+EXPORT_SYMBOL(__destroy_inode);
+
+void destroy_inode(struct inode *inode)
+{
+	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
 	else
 		kmem_cache_free(inode_cachep, (inode));
 }
-EXPORT_SYMBOL(destroy_inode);
-
 
 /*
  * These are initializations that only need to be done
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0c3b5e58a986..67888a9e0655 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2164,6 +2164,7 @@ extern void __iget(struct inode * inode);
 extern void iget_failed(struct inode *);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
+extern void __destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
 extern int should_remove_suid(struct dentry *);
 extern int file_remove_suid(struct file *);
-- 
cgit v1.2.3


From 4bfc44958e499af9a73f62201543b3a1f617cfeb Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 6 Aug 2009 15:07:33 -0700
Subject: mm: make set_mempolicy(MPOL_INTERLEAV) N_HIGH_MEMORY aware

At first, init_task's mems_allowed is initialized as this.
 init_task->mems_allowed == node_state[N_POSSIBLE]

And cpuset's top_cpuset mask is initialized as this
 top_cpuset->mems_allowed = node_state[N_HIGH_MEMORY]

Before 2.6.29:
policy's mems_allowed is initialized as this.

  1. update tasks->mems_allowed by its cpuset->mems_allowed.
  2. policy->mems_allowed = nodes_and(tasks->mems_allowed, user's mask)

Updating task's mems_allowed in reference to top_cpuset's one.
cpuset's mems_allowed is aware of N_HIGH_MEMORY, always.

In 2.6.30: After commit 58568d2a8215cb6f55caf2332017d7bdff954e1c
("cpuset,mm: update tasks' mems_allowed in time"), policy's mems_allowed
is initialized as this.

  1. policy->mems_allowd = nodes_and(task->mems_allowed, user's mask)

Here, if task is in top_cpuset, task->mems_allowed is not updated from
init's one.  Assume user excutes command as #numactrl --interleave=all
,....

  policy->mems_allowd = nodes_and(N_POSSIBLE, ALL_SET_MASK)

Then, policy's mems_allowd can includes a possible node, which has no pgdat.

MPOL's INTERLEAVE just scans nodemask of task->mems_allowd and access this
directly.

  NODE_DATA(nid)->zonelist even if NODE_DATA(nid)==NULL

Then, what's we need is making policy->mems_allowed be aware of
N_HIGH_MEMORY.  This patch does that.  But to do so, extra nodemask will
be on statck.  Because I know cpumask has a new interface of
CPUMASK_ALLOC(), I added it to node.

This patch stands on old behavior.  But I feel this fix itself is just a
Band-Aid.  But to do fundametal fix, we have to take care of memory
hotplug and it takes time.  (task->mems_allowd should be N_HIGH_MEMORY, I
think.)

mpol_set_nodemask() should be aware of N_HIGH_MEMORY and policy's nodemask
should be includes only online nodes.

In old behavior, this is guaranteed by frequent reference to cpuset's
code.  Now, most of them are removed and mempolicy has to check it by
itself.

To do check, a few nodemask_t will be used for calculating nodemask.  But,
size of nodemask_t can be big and it's not good to allocate them on stack.

Now, cpumask_t has CPUMASK_ALLOC/FREE an easy code for get scratch area.
NODEMASK_ALLOC/FREE shoudl be there.

[akpm@linux-foundation.org: cleanups & tweaks]
Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Miao Xie <miaox@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Paul Menage <menage@google.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: David Rientjes <rientjes@google.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nodemask.h | 28 ++++++++++++++++
 mm/mempolicy.c           | 84 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 86 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 829b94b156f2..b359c4a9ec9e 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -82,6 +82,12 @@
  *    to generate slightly worse code.  So use a simple one-line #define
  *    for node_isset(), instead of wrapping an inline inside a macro, the
  *    way we do the other calls.
+ *
+ * NODEMASK_SCRATCH
+ * When doing above logical AND, OR, XOR, Remap operations the callers tend to
+ * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large,
+ * nodemask_t's consume too much stack space.  NODEMASK_SCRATCH is a helper
+ * for such situations. See below and CPUMASK_ALLOC also.
  */
 
 #include <linux/kernel.h>
@@ -473,4 +479,26 @@ static inline int num_node_state(enum node_states state)
 #define for_each_node(node)	   for_each_node_state(node, N_POSSIBLE)
 #define for_each_online_node(node) for_each_node_state(node, N_ONLINE)
 
+/*
+ * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h)
+ */
+
+#if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */
+#define NODEMASK_ALLOC(x, m) struct x *m = kmalloc(sizeof(*m), GFP_KERNEL)
+#define NODEMASK_FREE(m) kfree(m)
+#else
+#define NODEMASK_ALLOC(x, m) struct x _m, *m = &_m
+#define NODEMASK_FREE(m)
+#endif
+
+/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */
+struct nodemask_scratch {
+	nodemask_t	mask1;
+	nodemask_t	mask2;
+};
+
+#define NODEMASK_SCRATCH(x) NODEMASK_ALLOC(nodemask_scratch, x)
+#define NODEMASK_SCRATCH_FREE(x)  NODEMASK_FREE(x)
+
+
 #endif /* __LINUX_NODEMASK_H */
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e08e2c4da63a..7dd9d9f80694 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
  * Must be called holding task's alloc_lock to protect task's mems_allowed
  * and mempolicy.  May also be called holding the mmap_semaphore for write.
  */
-static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
+static int mpol_set_nodemask(struct mempolicy *pol,
+		     const nodemask_t *nodes, struct nodemask_scratch *nsc)
 {
-	nodemask_t cpuset_context_nmask;
 	int ret;
 
 	/* if mode is MPOL_DEFAULT, pol is NULL. This is right. */
 	if (pol == NULL)
 		return 0;
+	/* Check N_HIGH_MEMORY */
+	nodes_and(nsc->mask1,
+		  cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]);
 
 	VM_BUG_ON(!nodes);
 	if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes))
 		nodes = NULL;	/* explicit local allocation */
 	else {
 		if (pol->flags & MPOL_F_RELATIVE_NODES)
-			mpol_relative_nodemask(&cpuset_context_nmask, nodes,
-					       &cpuset_current_mems_allowed);
+			mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
 		else
-			nodes_and(cpuset_context_nmask, *nodes,
-				  cpuset_current_mems_allowed);
+			nodes_and(nsc->mask2, *nodes, nsc->mask1);
+
 		if (mpol_store_user_nodemask(pol))
 			pol->w.user_nodemask = *nodes;
 		else
@@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes)
 						cpuset_current_mems_allowed;
 	}
 
-	ret = mpol_ops[pol->mode].create(pol,
-				nodes ? &cpuset_context_nmask : NULL);
+	if (nodes)
+		ret = mpol_ops[pol->mode].create(pol, &nsc->mask2);
+	else
+		ret = mpol_ops[pol->mode].create(pol, NULL);
 	return ret;
 }
 
@@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 {
 	struct mempolicy *new, *old;
 	struct mm_struct *mm = current->mm;
+	NODEMASK_SCRATCH(scratch);
 	int ret;
 
-	new = mpol_new(mode, flags, nodes);
-	if (IS_ERR(new))
-		return PTR_ERR(new);
+	if (!scratch)
+		return -ENOMEM;
 
+	new = mpol_new(mode, flags, nodes);
+	if (IS_ERR(new)) {
+		ret = PTR_ERR(new);
+		goto out;
+	}
 	/*
 	 * prevent changing our mempolicy while show_numa_maps()
 	 * is using it.
@@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 	if (mm)
 		down_write(&mm->mmap_sem);
 	task_lock(current);
-	ret = mpol_set_nodemask(new, nodes);
+	ret = mpol_set_nodemask(new, nodes, scratch);
 	if (ret) {
 		task_unlock(current);
 		if (mm)
 			up_write(&mm->mmap_sem);
 		mpol_put(new);
-		return ret;
+		goto out;
 	}
 	old = current->mempolicy;
 	current->mempolicy = new;
@@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
 		up_write(&mm->mmap_sem);
 
 	mpol_put(old);
-	return 0;
+	ret = 0;
+out:
+	NODEMASK_SCRATCH_FREE(scratch);
+	return ret;
 }
 
 /*
@@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len,
 		if (err)
 			return err;
 	}
-	down_write(&mm->mmap_sem);
-	task_lock(current);
-	err = mpol_set_nodemask(new, nmask);
-	task_unlock(current);
+	{
+		NODEMASK_SCRATCH(scratch);
+		if (scratch) {
+			down_write(&mm->mmap_sem);
+			task_lock(current);
+			err = mpol_set_nodemask(new, nmask, scratch);
+			task_unlock(current);
+			if (err)
+				up_write(&mm->mmap_sem);
+		} else
+			err = -ENOMEM;
+		NODEMASK_SCRATCH_FREE(scratch);
+	}
 	if (err) {
-		up_write(&mm->mmap_sem);
 		mpol_put(new);
 		return err;
 	}
@@ -1891,6 +1911,7 @@ restart:
  * Install non-NULL @mpol in inode's shared policy rb-tree.
  * On entry, the current task has a reference on a non-NULL @mpol.
  * This must be released on exit.
+ * This is called at get_inode() calls and we can use GFP_KERNEL.
  */
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 {
@@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 	if (mpol) {
 		struct vm_area_struct pvma;
 		struct mempolicy *new;
+		NODEMASK_SCRATCH(scratch);
 
+		if (!scratch)
+			return;
 		/* contextualize the tmpfs mount point mempolicy */
 		new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask);
 		if (IS_ERR(new)) {
 			mpol_put(mpol);	/* drop our ref on sb mpol */
+			NODEMASK_SCRATCH_FREE(scratch);
 			return;		/* no valid nodemask intersection */
 		}
 
 		task_lock(current);
-		ret = mpol_set_nodemask(new, &mpol->w.user_nodemask);
+		ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch);
 		task_unlock(current);
 		mpol_put(mpol);	/* drop our ref on sb mpol */
 		if (ret) {
+			NODEMASK_SCRATCH_FREE(scratch);
 			mpol_put(new);
 			return;
 		}
@@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol)
 		pvma.vm_end = TASK_SIZE;	/* policy covers entire file */
 		mpol_set_shared_policy(sp, &pvma, new); /* adds ref */
 		mpol_put(new);			/* drop initial ref */
+		NODEMASK_SCRATCH_FREE(scratch);
 	}
 }
 
@@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
 		err = 1;
 	else {
 		int ret;
-
-		task_lock(current);
-		ret = mpol_set_nodemask(new, &nodes);
-		task_unlock(current);
-		if (ret)
+		NODEMASK_SCRATCH(scratch);
+		if (scratch) {
+			task_lock(current);
+			ret = mpol_set_nodemask(new, &nodes, scratch);
+			task_unlock(current);
+		} else
+			ret = -ENOMEM;
+		NODEMASK_SCRATCH_FREE(scratch);
+		if (ret) {
 			err = 1;
-		else if (no_context) {
+			mpol_put(new);
+		} else if (no_context) {
 			/* save for contextualization */
 			new->w.user_nodemask = nodes;
 		}
-- 
cgit v1.2.3


From daeb6b6fbe27049f465c48a7d0ee5555c3b84064 Mon Sep 17 00:00:00 2001
From: Phillip Lougher <phillip@lougher.demon.co.uk>
Date: Thu, 6 Aug 2009 15:09:30 -0700
Subject: bzip2/lzma/gzip: fix comments describing decompressor API

Fix and improve comments in decompress/generic.h that describe the
decompressor API.  Also remove an unused definition, and rename INBUF_LEN
in lib/decompress_inflate.c to conform to bzip2/lzma naming.

Signed-off-by: Phillip Lougher <phillip@lougher.demon.co.uk>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/generic.h | 32 +++++++++++++++++++-------------
 lib/decompress_inflate.c           |  8 ++++----
 2 files changed, 23 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h
index 6dfb856327bb..0c7111a55a1a 100644
--- a/include/linux/decompress/generic.h
+++ b/include/linux/decompress/generic.h
@@ -1,31 +1,37 @@
 #ifndef DECOMPRESS_GENERIC_H
 #define DECOMPRESS_GENERIC_H
 
-/* Minimal chunksize to be read.
- *Bzip2 prefers at least 4096
- *Lzma prefers 0x10000 */
-#define COMPR_IOBUF_SIZE	4096
-
 typedef int (*decompress_fn) (unsigned char *inbuf, int len,
 			      int(*fill)(void*, unsigned int),
-			      int(*writebb)(void*, unsigned int),
-			      unsigned char *output,
+			      int(*flush)(void*, unsigned int),
+			      unsigned char *outbuf,
 			      int *posp,
 			      void(*error)(char *x));
 
 /* inbuf   - input buffer
  *len     - len of pre-read data in inbuf
- *fill    - function to fill inbuf if empty
- *writebb - function to write out outbug
+ *fill    - function to fill inbuf when empty
+ *flush   - function to write out outbuf
+ *outbuf  - output buffer
  *posp    - if non-null, input position (number of bytes read) will be
  *	  returned here
  *
- *If len != 0, the inbuf is initialized (with as much data), and fill
- *should not be called
- *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE
- *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE
+ *If len != 0, inbuf should contain all the necessary input data, and fill
+ *should be NULL
+ *If len = 0, inbuf can be NULL, in which case the decompressor will allocate
+ *the input buffer.  If inbuf != NULL it must be at least XXX_IOBUF_SIZE bytes.
+ *fill will be called (repeatedly...) to read data, at most XXX_IOBUF_SIZE
+ *bytes should be read per call.  Replace XXX with the appropriate decompressor
+ *name, i.e. LZMA_IOBUF_SIZE.
+ *
+ *If flush = NULL, outbuf must be large enough to buffer all the expected
+ *output.  If flush != NULL, the output buffer will be allocated by the
+ *decompressor (outbuf = NULL), and the flush function will be called to
+ *flush the output buffer at the appropriate time (decompressor and stream
+ *dependent).
  */
 
+
 /* Utility routine to detect the decompression method */
 decompress_fn decompress_method(const unsigned char *inbuf, int len,
 				const char **name);
diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c
index e36b296fc9f8..bfe605ac534f 100644
--- a/lib/decompress_inflate.c
+++ b/lib/decompress_inflate.c
@@ -25,7 +25,7 @@
 #include <linux/decompress/mm.h>
 #include <linux/slab.h>
 
-#define INBUF_LEN (16*1024)
+#define GZIP_IOBUF_SIZE (16*1024)
 
 /* Included from initramfs et al code */
 STATIC int INIT gunzip(unsigned char *buf, int len,
@@ -55,7 +55,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	if (buf)
 		zbuf = buf;
 	else {
-		zbuf = malloc(INBUF_LEN);
+		zbuf = malloc(GZIP_IOBUF_SIZE);
 		len = 0;
 	}
 	if (!zbuf) {
@@ -77,7 +77,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	}
 
 	if (len == 0)
-		len = fill(zbuf, INBUF_LEN);
+		len = fill(zbuf, GZIP_IOBUF_SIZE);
 
 	/* verify the gzip header */
 	if (len < 10 ||
@@ -113,7 +113,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
 	while (rc == Z_OK) {
 		if (strm->avail_in == 0) {
 			/* TODO: handle case where both pos and fill are set */
-			len = fill(zbuf, INBUF_LEN);
+			len = fill(zbuf, GZIP_IOBUF_SIZE);
 			if (len < 0) {
 				rc = -1;
 				error("read error");
-- 
cgit v1.2.3


From fa56d4cb4022c8b313c3b99236e1b87effc3655b Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 23 Jun 2009 11:29:11 +0000
Subject: ide: allow ide_dev_read_id() to be called from the IRQ context

* Un-static __ide_wait_stat().

* Allow ide_dev_read_id() helper to be called from the IRQ context by
  adding irq_ctx flag and using mdelay()/__ide_wait_stat() when needed.

* Switch ide_driveid_update() to set irq_ctx flag.

This change is needed for the consecutive patch which fixes races in
handling of user-space SET XFER commands but for improved bisectability
and clarity it is better to do it in a separate patch.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-iops.c  |  6 +++---
 drivers/ide/ide-probe.c | 31 +++++++++++++++++++++----------
 include/linux/ide.h     |  3 ++-
 3 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 2892b242bbe1..b99873845d21 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -102,8 +102,8 @@ EXPORT_SYMBOL(ide_fixstring);
  * setting a timer to wake up at half second intervals thereafter,
  * until timeout is achieved, before timing out.
  */
-static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
-			   unsigned long timeout, u8 *rstat)
+int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad,
+		    unsigned long timeout, u8 *rstat)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	const struct ide_tp_ops *tp_ops = hwif->tp_ops;
@@ -316,7 +316,7 @@ int ide_driveid_update(ide_drive_t *drive)
 		return 0;
 
 	SELECT_MASK(drive, 1);
-	rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id);
+	rc = ide_dev_read_id(drive, ATA_CMD_ID_ATA, id, 1);
 	SELECT_MASK(drive, 0);
 
 	if (rc)
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 1bb106f6221a..8de442cbee94 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -238,6 +238,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  *	@drive: drive to identify
  *	@cmd: command to use
  *	@id: buffer for IDENTIFY data
+ *	@irq_ctx: flag set when called from the IRQ context
  *
  *	Sends an ATA(PI) IDENTIFY request to a drive and waits for a response.
  *
@@ -246,7 +247,7 @@ static void do_identify(ide_drive_t *drive, u8 cmd, u16 *id)
  *			2  device aborted the command (refused to identify itself)
  */
 
-int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
+int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id, int irq_ctx)
 {
 	ide_hwif_t *hwif = drive->hwif;
 	struct ide_io_ports *io_ports = &hwif->io_ports;
@@ -263,7 +264,10 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 		tp_ops->write_devctl(hwif, ATA_NIEN | ATA_DEVCTL_OBS);
 
 	/* take a deep breath */
-	msleep(50);
+	if (irq_ctx)
+		mdelay(50);
+	else
+		msleep(50);
 
 	if (io_ports->ctl_addr &&
 	    (hwif->host_flags & IDE_HFLAG_BROKEN_ALTSTATUS) == 0) {
@@ -295,12 +299,19 @@ int ide_dev_read_id(ide_drive_t *drive, u8 cmd, u16 *id)
 
 	timeout = ((cmd == ATA_CMD_ID_ATA) ? WAIT_WORSTCASE : WAIT_PIDENTIFY) / 2;
 
-	if (ide_busy_sleep(drive, timeout, use_altstatus))
-		return 1;
-
 	/* wait for IRQ and ATA_DRQ */
-	msleep(50);
-	s = tp_ops->read_status(hwif);
+	if (irq_ctx) {
+		rc = __ide_wait_stat(drive, ATA_DRQ, BAD_R_STAT, timeout, &s);
+		if (rc)
+			return 1;
+	} else {
+		rc = ide_busy_sleep(drive, timeout, use_altstatus);
+		if (rc)
+			return 1;
+
+		msleep(50);
+		s = tp_ops->read_status(hwif);
+	}
 
 	if (OK_STAT(s, ATA_DRQ, BAD_R_STAT)) {
 		/* drive returned ID */
@@ -406,10 +417,10 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 
 	if (OK_STAT(stat, ATA_DRDY, ATA_BUSY) ||
 	    present || cmd == ATA_CMD_ID_ATAPI) {
-		rc = ide_dev_read_id(drive, cmd, id);
+		rc = ide_dev_read_id(drive, cmd, id, 0);
 		if (rc)
 			/* failed: try again */
-			rc = ide_dev_read_id(drive, cmd, id);
+			rc = ide_dev_read_id(drive, cmd, id, 0);
 
 		stat = tp_ops->read_status(hwif);
 
@@ -424,7 +435,7 @@ static int do_probe (ide_drive_t *drive, u8 cmd)
 			msleep(50);
 			tp_ops->exec_command(hwif, ATA_CMD_DEV_RESET);
 			(void)ide_busy_sleep(drive, WAIT_WORSTCASE, 0);
-			rc = ide_dev_read_id(drive, cmd, id);
+			rc = ide_dev_read_id(drive, cmd, id, 0);
 		}
 
 		/* ensure drive IRQ is clear */
diff --git a/include/linux/ide.h b/include/linux/ide.h
index edc93a6d931d..cb6cd0459a5e 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1081,6 +1081,7 @@ extern void ide_fixstring(u8 *, const int, const int);
 
 int ide_busy_sleep(ide_drive_t *, unsigned long, int);
 
+int __ide_wait_stat(ide_drive_t *, u8, u8, unsigned long, u8 *);
 int ide_wait_stat(ide_startstop_t *, ide_drive_t *, u8, u8, unsigned long);
 
 ide_startstop_t ide_do_park_unpark(ide_drive_t *, struct request *);
@@ -1169,7 +1170,7 @@ int ide_no_data_taskfile(ide_drive_t *, struct ide_cmd *);
 
 int ide_taskfile_ioctl(ide_drive_t *, unsigned long);
 
-int ide_dev_read_id(ide_drive_t *, u8, u16 *);
+int ide_dev_read_id(ide_drive_t *, u8, u16 *, int);
 
 extern int ide_driveid_update(ide_drive_t *);
 extern int ide_config_drive_speed(ide_drive_t *, u8);
-- 
cgit v1.2.3


From 665d66e8fad60a5a162c4615f27f916ad1a6d567 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Tue, 23 Jun 2009 11:35:51 +0000
Subject: ide: fix races in handling of user-space SET XFER commands

* Make cmd->tf_flags field 'u16' and add IDE_TFLAG_SET_XFER taskfile flag.

* Update ide_finish_cmd() to set xfer / re-read id if the new flag is set.

* Convert set_xfer_rate() (write handler for /proc/ide/hd?/current_speed)
  and ide_cmd_ioctl() (HDIO_DRIVE_CMD ioctl handler) to use the new flag.

* Remove no longer needed disable_irq_nosync() + enable_irq() from
  ide_config_drive_speed().

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-ioctls.c   |  8 ++------
 drivers/ide/ide-iops.c     | 10 ----------
 drivers/ide/ide-proc.c     | 10 ++--------
 drivers/ide/ide-taskfile.c |  9 ++++++++-
 include/linux/ide.h        |  3 ++-
 5 files changed, 14 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-ioctls.c b/drivers/ide/ide-ioctls.c
index e246d3d3fbcc..d3440b5010a5 100644
--- a/drivers/ide/ide-ioctls.c
+++ b/drivers/ide/ide-ioctls.c
@@ -167,6 +167,8 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 			err = -EINVAL;
 			goto abort;
 		}
+
+		cmd.tf_flags |= IDE_TFLAG_SET_XFER;
 	}
 
 	err = ide_raw_taskfile(drive, &cmd, buf, args[3]);
@@ -174,12 +176,6 @@ static int ide_cmd_ioctl(ide_drive_t *drive, unsigned long arg)
 	args[0] = tf->status;
 	args[1] = tf->error;
 	args[2] = tf->nsect;
-
-	if (!err && xfer_rate) {
-		/* active-retuning-calls future */
-		ide_set_xfer_rate(drive, xfer_rate);
-		ide_driveid_update(drive);
-	}
 abort:
 	if (copy_to_user((void __user *)arg, &args, 4))
 		err = -EFAULT;
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index b99873845d21..b14fa9a87c49 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -363,14 +363,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 	 * this point (lost interrupt).
 	 */
 
-	/*
-	 *	FIXME: we race against the running IRQ here if
-	 *	this is called from non IRQ context. If we use
-	 *	disable_irq() we hang on the error path. Work
-	 *	is needed.
-	 */
-	disable_irq_nosync(hwif->irq);
-
 	udelay(1);
 	tp_ops->dev_select(drive);
 	SELECT_MASK(drive, 1);
@@ -394,8 +386,6 @@ int ide_config_drive_speed(ide_drive_t *drive, u8 speed)
 
 	SELECT_MASK(drive, 0);
 
-	enable_irq(hwif->irq);
-
 	if (error) {
 		(void) ide_dump_status(drive, "set_drive_speed_status", stat);
 		return error;
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 3242698832a4..021de41655e6 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -195,7 +195,6 @@ ide_devset_get(xfer_rate, current_speed);
 static int set_xfer_rate (ide_drive_t *drive, int arg)
 {
 	struct ide_cmd cmd;
-	int err;
 
 	if (arg < XFER_PIO_0 || arg > XFER_UDMA_6)
 		return -EINVAL;
@@ -206,14 +205,9 @@ static int set_xfer_rate (ide_drive_t *drive, int arg)
 	cmd.tf.nsect   = (u8)arg;
 	cmd.valid.out.tf = IDE_VALID_FEATURE | IDE_VALID_NSECT;
 	cmd.valid.in.tf  = IDE_VALID_NSECT;
+	cmd.tf_flags   = IDE_TFLAG_SET_XFER;
 
-	err = ide_no_data_taskfile(drive, &cmd);
-
-	if (!err) {
-		ide_set_xfer_rate(drive, (u8) arg);
-		ide_driveid_update(drive);
-	}
-	return err;
+	return ide_no_data_taskfile(drive, &cmd);
 }
 
 ide_devset_rw(current_speed, xfer_rate);
diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c
index 50336d51eebc..cc8633cbe133 100644
--- a/drivers/ide/ide-taskfile.c
+++ b/drivers/ide/ide-taskfile.c
@@ -324,10 +324,17 @@ static void ide_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
 void ide_finish_cmd(ide_drive_t *drive, struct ide_cmd *cmd, u8 stat)
 {
 	struct request *rq = drive->hwif->rq;
-	u8 err = ide_read_error(drive);
+	u8 err = ide_read_error(drive), nsect = cmd->tf.nsect;
+	u8 set_xfer = !!(cmd->tf_flags & IDE_TFLAG_SET_XFER);
 
 	ide_complete_cmd(drive, cmd, stat, err);
 	rq->errors = err;
+
+	if (err == 0 && set_xfer) {
+		ide_set_xfer_rate(drive, nsect);
+		ide_driveid_update(drive);
+	}
+
 	ide_complete_rq(drive, err ? -EIO : 0, blk_rq_bytes(rq));
 }
 
diff --git a/include/linux/ide.h b/include/linux/ide.h
index cb6cd0459a5e..803c1ae31237 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -258,6 +258,7 @@ enum {
 	IDE_TFLAG_DYN			= (1 << 5),
 	IDE_TFLAG_FS			= (1 << 6),
 	IDE_TFLAG_MULTI_PIO		= (1 << 7),
+	IDE_TFLAG_SET_XFER		= (1 << 8),
 };
 
 enum {
@@ -294,7 +295,7 @@ struct ide_cmd {
 		} out, in;
 	} valid;
 
-	u8			tf_flags;
+	u16			tf_flags;
 	u8			ftf_flags;	/* for TASKFILE ioctl */
 	int			protocol;
 
-- 
cgit v1.2.3


From 3a6593050fbd8bbcaed3a44d01c31d907315c86c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 21 Jul 2009 17:34:57 +0200
Subject: perf_counter, ftrace: Fix perf_counter integration

Adds possible second part to the assign argument of TP_EVENT().

  TP_perf_assign(
	__perf_count(foo);
	__perf_addr(bar);
  )

Which, when specified make the swcounter increment with @foo instead
of the usual 1, and report @bar for PERF_SAMPLE_ADDR (data address
associated with the event) when this triggers a counter overflow.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h | 110 ++++++++++++++++++++++++++++++++++++++-----------
 kernel/perf_counter.c  |   6 +--
 2 files changed, 88 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1867553c61e5..fec71f8dbc48 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -144,6 +144,9 @@
 #undef TP_fast_assign
 #define TP_fast_assign(args...) args
 
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 static int								\
@@ -345,6 +348,88 @@ static inline int ftrace_get_offsets_##call(				\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+#ifdef CONFIG_EVENT_PROFILE
+
+/*
+ * Generate the functions needed for tracepoint perf_counter support.
+ *
+ * static void ftrace_profile_<call>(proto)
+ * {
+ * 	extern void perf_tpcounter_event(int, u64, u64);
+ * 	u64 __addr = 0, __count = 1;
+ *
+ * 	<assign>   <-- here we expand the TP_perf_assign() macro
+ *
+ * 	perf_tpcounter_event(event_<call>.id, __addr, __count);
+ * }
+ *
+ * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * 	int ret = 0;
+ *
+ * 	if (!atomic_inc_return(&event_call->profile_count))
+ * 		ret = register_trace_<call>(ftrace_profile_<call>);
+ *
+ * 	return ret;
+ * }
+ *
+ * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * {
+ * 	if (atomic_add_negative(-1, &event->call->profile_count))
+ * 		unregister_trace_<call>(ftrace_profile_<call>);
+ * }
+ *
+ */
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...)
+
+#undef TP_perf_assign
+#define TP_perf_assign(args...) args
+
+#undef __perf_addr
+#define __perf_addr(a) __addr = (a)
+
+#undef __perf_count
+#define __perf_count(c) __count = (c)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+									\
+static void ftrace_profile_##call(proto)				\
+{									\
+	extern void perf_tpcounter_event(int, u64, u64);		\
+	u64 __addr = 0, __count = 1;					\
+	{ assign; }							\
+	perf_tpcounter_event(event_##call.id, __addr, __count);		\
+}									\
+									\
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{									\
+	int ret = 0;							\
+									\
+	if (!atomic_inc_return(&event_call->profile_count))		\
+		ret = register_trace_##call(ftrace_profile_##call);	\
+									\
+	return ret;							\
+}									\
+									\
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{									\
+	if (atomic_add_negative(-1, &event_call->profile_count))	\
+		unregister_trace_##call(ftrace_profile_##call);		\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TP_perf_assign
+#define TP_perf_assign(args...)
+
+#endif
+
 /*
  * Stage 4 of the trace events.
  *
@@ -447,28 +532,6 @@ static inline int ftrace_get_offsets_##call(				\
 #define TP_FMT(fmt, args...)	fmt "\n", ##args
 
 #ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)				\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int);				\
-	perf_tpcounter_event(event_##call.id);				\
-}									\
-									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
-{									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&event_call->profile_count))		\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
-}									\
-									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
-{									\
-	if (atomic_add_negative(-1, &event_call->profile_count))	\
-		unregister_trace_##call(ftrace_profile_##call);		\
-}
 
 #define _TRACE_PROFILE_INIT(call)					\
 	.profile_count = ATOMIC_INIT(-1),				\
@@ -476,7 +539,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 	.profile_disable = ftrace_profile_disable_##call,
 
 #else
-#define _TRACE_PROFILE(call, proto, args)
 #define _TRACE_PROFILE_INIT(call)
 #endif
 
@@ -502,7 +564,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
 									\
 static struct ftrace_event_call event_##call;				\
 									\
@@ -586,6 +647,5 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef _TRACE_PROFILE
 #undef _TRACE_PROFILE_INIT
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 673c1aaf7332..52eb4b68d34f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3703,17 +3703,17 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id)
+void perf_tpcounter_event(int event_id, u64 addr, u64 count)
 {
 	struct perf_sample_data data = {
 		.regs = get_irq_regs(),
-		.addr = 0,
+		.addr = addr,
 	};
 
 	if (!data.regs)
 		data.regs = task_pt_regs(current);
 
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
+	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
 }
 EXPORT_SYMBOL_GPL(perf_tpcounter_event);
 
-- 
cgit v1.2.3


From f413cdb80ce00ec1a4d0ab949b5d96c81cae7f75 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 7 Aug 2009 01:25:54 +0200
Subject: perf_counter: Fix/complete ftrace event records sampling

This patch implements the kernel side support for ftrace event
record sampling.

A new counter sampling attribute is added:

   PERF_SAMPLE_TP_RECORD

which requests ftrace events record sampling. In this case
if a PERF_TYPE_TRACEPOINT counter is active and a tracepoint
fires, we emit the tracepoint binary record to the
perfcounter event buffer, as a sample.

Result, after setting PERF_SAMPLE_TP_RECORD attribute from perf
record:

 perf record -f -F 1 -a -e workqueue:workqueue_execution
 perf report -D

 0x21e18 [0x48]: event: 9
 .
 . ... raw event: size 72 bytes
 .  0000:  09 00 00 00 01 00 48 00 d0 c7 00 81 ff ff ff ff  ......H........
 .  0010:  0a 00 00 00 0a 00 00 00 21 00 00 00 00 00 00 00  ........!......
 .  0020:  2b 00 01 02 0a 00 00 00 0a 00 00 00 65 76 65 6e  +...........eve
 .  0030:  74 73 2f 31 00 00 00 00 00 00 00 00 0a 00 00 00  ts/1...........
 .  0040:  e0 b1 31 81 ff ff ff ff                          .......
.
0x21e18 [0x48]: PERF_EVENT_SAMPLE (IP, 1): 10: 0xffffffff8100c7d0 period: 33

The raw ftrace binary record starts at offset 0020.

Translation:

 struct trace_entry {
	type		= 0x2b = 43;
	flags		= 1;
	preempt_count	= 2;
	pid		= 0xa = 10;
	tgid		= 0xa = 10;
 }

 thread_comm = "events/1"
 thread_pid  = 0xa = 10;
 func	    = 0xffffffff8131b1e0 = flush_to_ldisc()

What will come next?

 - Userspace support ('perf trace'), 'flight data recorder' mode
   for perf trace, etc.

 - The unconditional copy from the profiling callback brings
   some costs however if someone wants no such sampling to
   occur, and needs to be fixed in the future. For that we need
   to have an instant access to the perf counter attribute.
   This is a matter of a flag to add in the struct ftrace_event.

 - Take care of the events recursivity! Don't ever try to record
   a lock event for example, it seems some locking is used in
   the profiling fast path and lead to a tracing recursivity.
   That will be fixed using raw spinlock or recursivity
   protection.

 - [...]

 - Profit! :-)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Gabriel Munteanu <eduard.munteanu@linux360.ro>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h |   4 +-
 include/linux/perf_counter.h |   9 ++-
 include/trace/ftrace.h       | 130 ++++++++++++++++++++++++++++++++-----------
 kernel/perf_counter.c        |  18 +++++-
 kernel/trace/trace.c         |   1 +
 kernel/trace/trace.h         |   4 --
 tools/perf/builtin-record.c  |   1 +
 7 files changed, 126 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d7cd193c2277..a81170de7f6b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -89,7 +89,9 @@ enum print_line_t {
 	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
 };
 
-
+void tracing_generic_entry_update(struct trace_entry *entry,
+				  unsigned long flags,
+				  int pc);
 struct ring_buffer_event *
 trace_current_buffer_lock_reserve(int type, unsigned long len,
 				  unsigned long flags, int pc);
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e604e6ef72dd..a67dd5c5b6d3 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -121,8 +121,9 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_TP_RECORD			= 1U << 10,
 
-	PERF_SAMPLE_MAX = 1U << 10,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
 };
 
 /*
@@ -413,6 +414,11 @@ struct perf_callchain_entry {
 	__u64				ip[PERF_MAX_STACK_DEPTH];
 };
 
+struct perf_tracepoint_record {
+	int				size;
+	char				*record;
+};
+
 struct task_struct;
 
 /**
@@ -681,6 +687,7 @@ struct perf_sample_data {
 	struct pt_regs			*regs;
 	u64				addr;
 	u64				period;
+	void				*private;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fec71f8dbc48..7fb16d90e7b1 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -353,15 +353,7 @@ static inline int ftrace_get_offsets_##call(				\
 /*
  * Generate the functions needed for tracepoint perf_counter support.
  *
- * static void ftrace_profile_<call>(proto)
- * {
- * 	extern void perf_tpcounter_event(int, u64, u64);
- * 	u64 __addr = 0, __count = 1;
- *
- * 	<assign>   <-- here we expand the TP_perf_assign() macro
- *
- * 	perf_tpcounter_event(event_<call>.id, __addr, __count);
- * }
+ * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
  * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
  * {
@@ -381,28 +373,10 @@ static inline int ftrace_get_offsets_##call(				\
  *
  */
 
-#undef TP_fast_assign
-#define TP_fast_assign(args...)
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...) args
-
-#undef __perf_addr
-#define __perf_addr(a) __addr = (a)
-
-#undef __perf_count
-#define __perf_count(c) __count = (c)
-
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
 									\
-static void ftrace_profile_##call(proto)				\
-{									\
-	extern void perf_tpcounter_event(int, u64, u64);		\
-	u64 __addr = 0, __count = 1;					\
-	{ assign; }							\
-	perf_tpcounter_event(event_##call.id, __addr, __count);		\
-}									\
+static void ftrace_profile_##call(proto);				\
 									\
 static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
 {									\
@@ -422,12 +396,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TP_perf_assign
-#define TP_perf_assign(args...)
-
 #endif
 
 /*
@@ -647,5 +615,99 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+/*
+ * Define the insertion callback to profile events
+ *
+ * The job is very similar to ftrace_raw_event_<call> except that we don't
+ * insert in the ring buffer but in a perf counter.
+ *
+ * static void ftrace_profile_<call>(proto)
+ * {
+ *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
+ *	struct ftrace_event_call *event_call = &event_<call>;
+ *	extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ *	struct ftrace_raw_##call *entry;
+ *	u64 __addr = 0, __count = 1;
+ *	unsigned long irq_flags;
+ *	int __entry_size;
+ *	int __data_size;
+ *	int pc;
+ *
+ *	local_save_flags(irq_flags);
+ *	pc = preempt_count();
+ *
+ *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
+ *	__entry_size = __data_size + sizeof(*entry);
+ *
+ *	do {
+ *		char raw_data[__entry_size]; <- allocate our sample in the stack
+ *		struct trace_entry *ent;
+ *
+ *		entry = (struct ftrace_raw_<call> *)raw_data;
+ *		ent = &entry->ent;
+ *		tracing_generic_entry_update(ent, irq_flags, pc);
+ *		ent->type = event_call->id;
+ *
+ *		<tstruct> <- do some jobs with dynamic arrays
+ *
+ *		<assign>  <- affect our values
+ *
+ *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *			     __entry_size);  <- submit them to perf counter
+ *	} while (0);
+ *
+ * }
+ */
+
+#ifdef CONFIG_EVENT_PROFILE
+
+#undef __perf_addr
+#define __perf_addr(a) __addr = (a)
+
+#undef __perf_count
+#define __perf_count(c) __count = (c)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
+static void ftrace_profile_##call(proto)				\
+{									\
+	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+	struct ftrace_event_call *event_call = &event_##call;		\
+	extern void perf_tpcounter_event(int, u64, u64, void *, int);	\
+	struct ftrace_raw_##call *entry;				\
+	u64 __addr = 0, __count = 1;					\
+	unsigned long irq_flags;					\
+	int __entry_size;						\
+	int __data_size;						\
+	int pc;								\
+									\
+	local_save_flags(irq_flags);					\
+	pc = preempt_count();						\
+									\
+	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+	__entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
+									\
+	do {								\
+		char raw_data[__entry_size];				\
+		struct trace_entry *ent;				\
+									\
+		entry = (struct ftrace_raw_##call *)raw_data;		\
+		ent = &entry->ent;					\
+		tracing_generic_entry_update(ent, irq_flags, pc);	\
+		ent->type = event_call->id;				\
+									\
+		tstruct							\
+									\
+		{ assign; }						\
+									\
+		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+			     __entry_size);				\
+	} while (0);							\
+									\
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+#endif /* CONFIG_EVENT_PROFILE */
+
 #undef _TRACE_PROFILE_INIT
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 52eb4b68d34f..868102172aa4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2646,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
+	struct perf_tracepoint_record *tp;
 	int callchain_size = 0;
 	u64 time;
 	struct {
@@ -2714,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 			header.size += sizeof(u64);
 	}
 
+	if (sample_type & PERF_SAMPLE_TP_RECORD) {
+		tp = data->private;
+		header.size += tp->size;
+	}
+
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
 	if (ret)
 		return;
@@ -2777,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		}
 	}
 
+	if (sample_type & PERF_SAMPLE_TP_RECORD)
+		perf_output_copy(&handle, tp->record, tp->size);
+
 	perf_output_end(&handle);
 }
 
@@ -3703,11 +3712,18 @@ static const struct pmu perf_ops_task_clock = {
 };
 
 #ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id, u64 addr, u64 count)
+void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size)
 {
+	struct perf_tracepoint_record tp = {
+		.size = entry_size,
+		.record = record,
+	};
+
 	struct perf_sample_data data = {
 		.regs = get_irq_regs(),
 		.addr = addr,
+		.private = &tp,
 	};
 
 	if (!data.regs)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8930e39b9d8c..c22b40f8f576 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
 }
+EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
 						    int type,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5cc780..8b9f4f6e9559 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
 					  int *ent_cpu, u64 *ent_ts);
 
-void tracing_generic_entry_update(struct trace_entry *entry,
-				  unsigned long flags,
-				  int pc);
-
 void default_wait_pipe(struct trace_iterator *iter);
 void poll_wait_pipe(struct trace_iterator *iter);
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6da09928130f..90c98082af10 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -412,6 +412,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	if (call_graph)
 		attr->sample_type	|= PERF_SAMPLE_CALLCHAIN;
 
+
 	attr->mmap		= track;
 	attr->comm		= track;
 	attr->inherit		= (cpu < 0) && inherit;
-- 
cgit v1.2.3


From 3a43ce68ae1758fa6a839386025ef45acb6baa22 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 8 Aug 2009 04:26:37 +0200
Subject: perf_counter: Fix tracepoint sampling to be part of generic sampling

Based on Peter's comments, make tracepoint sampling generic
just like all the other sampling bits are. This is a rename
with no code changes:

- PERF_SAMPLE_TP_RECORD to PERF_SAMPLE_RAW
- struct perf_tracepoint_record to perf_raw_record

We want the system in place that transport tracepoints raw
samples events into the perf ring buffer to be generalized and
usable by any type of counter.

Reported-by; Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1249698400-5441-4-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 10 +++++-----
 kernel/perf_counter.c        | 20 ++++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a67dd5c5b6d3..2aabe43c1d04 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -121,7 +121,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_CPU				= 1U << 7,
 	PERF_SAMPLE_PERIOD			= 1U << 8,
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_TP_RECORD			= 1U << 10,
+	PERF_SAMPLE_RAW				= 1U << 10,
 
 	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
 };
@@ -414,9 +414,9 @@ struct perf_callchain_entry {
 	__u64				ip[PERF_MAX_STACK_DEPTH];
 };
 
-struct perf_tracepoint_record {
-	int				size;
-	char				*record;
+struct perf_raw_record {
+	u32				size;
+	void				*data;
 };
 
 struct task_struct;
@@ -687,7 +687,7 @@ struct perf_sample_data {
 	struct pt_regs			*regs;
 	u64				addr;
 	u64				period;
-	void				*private;
+	struct perf_raw_record		*raw;
 };
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 117622cb73a3..002310540417 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2646,7 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
-	struct perf_tracepoint_record *tp = NULL;
+	struct perf_raw_record *raw = NULL;
 	int callchain_size = 0;
 	u64 time;
 	struct {
@@ -2715,10 +2715,10 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 			header.size += sizeof(u64);
 	}
 
-	if (sample_type & PERF_SAMPLE_TP_RECORD) {
-		tp = data->private;
-		if (tp)
-			header.size += tp->size;
+	if (sample_type & PERF_SAMPLE_RAW) {
+		raw = data->raw;
+		if (raw)
+			header.size += raw->size;
 	}
 
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2784,8 +2784,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		}
 	}
 
-	if ((sample_type & PERF_SAMPLE_TP_RECORD) && tp)
-		perf_output_copy(&handle, tp->record, tp->size);
+	if ((sample_type & PERF_SAMPLE_RAW) && raw)
+		perf_output_copy(&handle, raw->data, raw->size);
 
 	perf_output_end(&handle);
 }
@@ -3740,15 +3740,15 @@ static const struct pmu perf_ops_task_clock = {
 void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
 			  int entry_size)
 {
-	struct perf_tracepoint_record tp = {
+	struct perf_raw_record raw = {
 		.size = entry_size,
-		.record = record,
+		.data = record,
 	};
 
 	struct perf_sample_data data = {
 		.regs = get_irq_regs(),
 		.addr = addr,
-		.private = &tp,
+		.raw = &raw,
 	};
 
 	if (!data.regs)
-- 
cgit v1.2.3


From 8e5b59a2d728e6963b35dba8bb36e0b70267462e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 6 Aug 2009 16:02:50 -0700
Subject: sched: Add default defines for PREEMPT_ACTIVE

The PREEMPT_ACTIVE setting doesn't actually need to be
arch-specific, so set up a sane default for all arches to
(hopefully) migrate to.

> if we look at linux/hardirq.h, it makes this claim:
>  * - bit 28 is the PREEMPT_ACTIVE flag
> if that's true, then why are we letting any arch set this define ?  a
> quick survey shows that half the arches (11) are using 0x10000000 (bit
> 28) while the other half (10) are using 0x4000000 (bit 26).  and then
> there is the ia64 oddity which uses bit 30.  the exact value here
> shouldnt really matter across arches though should it ?

actually alpha, arm and avr32 also use bit 30 (0x40000000),
there are only five (or eight, depending on how you count)
architectures (blackfin, h8300, m68k, s390 and sparc) using bit
26.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 8246c697863d..0d885fd75111 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -64,6 +64,12 @@
 #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
 #define NMI_OFFSET	(1UL << NMI_SHIFT)
 
+#ifndef PREEMPT_ACTIVE
+#define PREEMPT_ACTIVE_BITS	1
+#define PREEMPT_ACTIVE_SHIFT	(NMI_SHIFT + NMI_BITS)
+#define PREEMPT_ACTIVE	(__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
+#endif
+
 #if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
 #error PREEMPT_ACTIVE is too low!
 #endif
-- 
cgit v1.2.3


From 62ab460cf5e450e1d207a98a9c6cf2e4a6a78fd1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:06:19 -0400
Subject: NFSv4: Add 'server capability' flags for NFSv4 recommended attributes

If the NFSv4 server doesn't support a POSIX attribute, the generic NFS code
needs to know that, so that it don't keep trying to poll for it.

However, by the same count, if the NFSv4 server does support that
attribute, then we should ensure that the inode metadata is appropriately
labelled as being untrusted. For instance, if we don't know the correct
value of the file's uid, we should certainly not be caching ACLs or ACCESS
results.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  7 ++--
 fs/nfs/inode.c            | 92 +++++++++++++++++++++++++++++++++++++++--------
 fs/nfs/nfs4proc.c         | 22 ++++++++++++
 include/linux/nfs_fs_sb.h |  9 +++++
 4 files changed, 114 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..8f34fd8028fc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -809,6 +809,9 @@ static int nfs_init_server(struct nfs_server *server,
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
 	server->options = data->options;
+	server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+		NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
+		NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
 
 	if (data->rsize)
 		server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1274,7 +1277,7 @@ static int nfs4_init_server(struct nfs_server *server,
 
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags;
-	server->caps |= NFS_CAP_ATOMIC_OPEN;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
 	server->options = data->options;
 
 	/* Get a client record */
@@ -1400,7 +1403,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 
 	/* Initialise the client representation from the parent server */
 	nfs_server_copy_userdata(server, parent_server);
-	server->caps |= NFS_CAP_ATOMIC_OPEN;
+	server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
 
 	/* Get a client representation.
 	 * Note: NFSv4 always uses TCP, */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd7938eda6a8..fe5a8b45d867 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -286,6 +286,11 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		/* We can't support update_atime(), since the server will reset it */
 		inode->i_flags |= S_NOATIME|S_NOCMTIME;
 		inode->i_mode = fattr->mode;
+		if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
+				&& nfs_server_capable(inode, NFS_CAP_MODE))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL;
 		/* Why so? Because we want revalidate for devices/FIFOs, and
 		 * that's precisely what we have in nfs_file_inode_operations.
 		 */
@@ -330,20 +335,46 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		nfsi->attr_gencount = fattr->gencount;
 		if (fattr->valid & NFS_ATTR_FATTR_ATIME)
 			inode->i_atime = fattr->atime;
+		else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (fattr->valid & NFS_ATTR_FATTR_MTIME)
 			inode->i_mtime = fattr->mtime;
+		else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_DATA;
 		if (fattr->valid & NFS_ATTR_FATTR_CTIME)
 			inode->i_ctime = fattr->ctime;
+		else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL;
 		if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
 			nfsi->change_attr = fattr->change_attr;
+		else if (nfs_server_capable(inode, NFS_CAP_CHANGE_ATTR))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_DATA;
 		if (fattr->valid & NFS_ATTR_FATTR_SIZE)
 			inode->i_size = nfs_size_to_loff_t(fattr->size);
+		else
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_DATA
+				| NFS_INO_REVAL_PAGECACHE;
 		if (fattr->valid & NFS_ATTR_FATTR_NLINK)
 			inode->i_nlink = fattr->nlink;
+		else if (nfs_server_capable(inode, NFS_CAP_NLINK))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR;
 		if (fattr->valid & NFS_ATTR_FATTR_OWNER)
 			inode->i_uid = fattr->uid;
+		else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL;
 		if (fattr->valid & NFS_ATTR_FATTR_GROUP)
 			inode->i_gid = fattr->gid;
+		else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+			nfsi->cache_validity |= NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL;
 		if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
 			inode->i_blocks = fattr->du.nfs2.blocks;
 		if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
@@ -1145,6 +1176,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	loff_t cur_isize, new_isize;
 	unsigned long invalid = 0;
 	unsigned long now = jiffies;
+	unsigned long save_cache_validity;
 
 	dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
 			__func__, inode->i_sb->s_id, inode->i_ino,
@@ -1171,10 +1203,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 	 */
 	nfsi->read_cache_jiffies = fattr->time_start;
 
-	if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) || (fattr->valid & (NFS_ATTR_FATTR_MTIME|NFS_ATTR_FATTR_CTIME)))
-	    nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
-		    | NFS_INO_INVALID_ATIME
-		    | NFS_INO_REVAL_PAGECACHE);
+	save_cache_validity = nfsi->cache_validity;
+	nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
+			| NFS_INO_INVALID_ATIME
+			| NFS_INO_REVAL_FORCED
+			| NFS_INO_REVAL_PAGECACHE);
 
 	/* Do atomic weak cache consistency updates */
 	nfs_wcc_update_inode(inode, fattr);
@@ -1189,7 +1222,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 				nfs_force_lookup_revalidate(inode);
 			nfsi->change_attr = fattr->change_attr;
 		}
-	}
+	} else if (server->caps & NFS_CAP_CHANGE_ATTR)
+		invalid |= save_cache_validity;
 
 	if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
 		/* NFSv2/v3: Check if the mtime agrees */
@@ -1201,7 +1235,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 				nfs_force_lookup_revalidate(inode);
 			memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
 		}
-	}
+	} else if (server->caps & NFS_CAP_MTIME)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_DATA
+				| NFS_INO_REVAL_PAGECACHE
+				| NFS_INO_REVAL_FORCED);
+
 	if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
 		/* If ctime has changed we should definitely clear access+acl caches */
 		if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
@@ -1215,7 +1254,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			}
 			memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
 		}
-	}
+	} else if (server->caps & NFS_CAP_CTIME)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_REVAL_FORCED);
 
 	/* Check if our cached file size is stale */
 	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
@@ -1231,30 +1274,50 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			dprintk("NFS: isize change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 		}
-	}
+	} else
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_REVAL_PAGECACHE
+				| NFS_INO_REVAL_FORCED);
 
 
 	if (fattr->valid & NFS_ATTR_FATTR_ATIME)
 		memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+	else if (server->caps & NFS_CAP_ATIME)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_MODE) {
 		if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 			inode->i_mode = fattr->mode;
 		}
-	}
+	} else if (server->caps & NFS_CAP_MODE)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_REVAL_FORCED);
+
 	if (fattr->valid & NFS_ATTR_FATTR_OWNER) {
 		if (inode->i_uid != fattr->uid) {
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 			inode->i_uid = fattr->uid;
 		}
-	}
+	} else if (server->caps & NFS_CAP_OWNER)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_REVAL_FORCED);
+
 	if (fattr->valid & NFS_ATTR_FATTR_GROUP) {
 		if (inode->i_gid != fattr->gid) {
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
 			inode->i_gid = fattr->gid;
 		}
-	}
+	} else if (server->caps & NFS_CAP_OWNER_GROUP)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_INVALID_ACCESS
+				| NFS_INO_INVALID_ACL
+				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_NLINK) {
 		if (inode->i_nlink != fattr->nlink) {
@@ -1263,7 +1326,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 				invalid |= NFS_INO_INVALID_DATA;
 			inode->i_nlink = fattr->nlink;
 		}
-	}
+	} else if (server->caps & NFS_CAP_NLINK)
+		invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+				| NFS_INO_REVAL_FORCED);
 
 	if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
 		/*
@@ -1293,9 +1358,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 				|| S_ISLNK(inode->i_mode)))
 		invalid &= ~NFS_INO_INVALID_DATA;
 	if (!nfs_have_delegation(inode, FMODE_READ) ||
-			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
+			(save_cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
-	nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
 
 	return 0;
  out_changed:
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d95f7f9e60c4..be6544aef41f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2003,12 +2003,34 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
 	status = nfs4_call_sync(server, &msg, &args, &res, 0);
 	if (status == 0) {
 		memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
+		server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
+				NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
+				NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
+				NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
+				NFS_CAP_CTIME|NFS_CAP_MTIME);
 		if (res.attr_bitmask[0] & FATTR4_WORD0_ACL)
 			server->caps |= NFS_CAP_ACLS;
 		if (res.has_links != 0)
 			server->caps |= NFS_CAP_HARDLINKS;
 		if (res.has_symlinks != 0)
 			server->caps |= NFS_CAP_SYMLINKS;
+		if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
+			server->caps |= NFS_CAP_FILEID;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
+			server->caps |= NFS_CAP_MODE;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
+			server->caps |= NFS_CAP_NLINK;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
+			server->caps |= NFS_CAP_OWNER;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
+			server->caps |= NFS_CAP_OWNER_GROUP;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
+			server->caps |= NFS_CAP_ATIME;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
+			server->caps |= NFS_CAP_CTIME;
+		if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
+			server->caps |= NFS_CAP_MTIME;
+
 		memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask));
 		server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
 		server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 19fe15d12042..320569eabe3b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -167,6 +167,15 @@ struct nfs_server {
 #define NFS_CAP_SYMLINKS	(1U << 2)
 #define NFS_CAP_ACLS		(1U << 3)
 #define NFS_CAP_ATOMIC_OPEN	(1U << 4)
+#define NFS_CAP_CHANGE_ATTR	(1U << 5)
+#define NFS_CAP_FILEID		(1U << 6)
+#define NFS_CAP_MODE		(1U << 7)
+#define NFS_CAP_NLINK		(1U << 8)
+#define NFS_CAP_OWNER		(1U << 9)
+#define NFS_CAP_OWNER_GROUP	(1U << 10)
+#define NFS_CAP_ATIME		(1U << 11)
+#define NFS_CAP_CTIME		(1U << 12)
+#define NFS_CAP_MTIME		(1U << 13)
 
 
 /* maximum number of slots to use */
-- 
cgit v1.2.3


From 169026a61e6f436dfc12c9d10d95455c4e9f945b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Aug 2009 15:09:32 -0400
Subject: SUNRPC: Clean up RPCBIND_MAXUADDRLEN definitions

Clean up: Replace the single-integer definition of RPCBIND_MAXUADDRLEN
with a definition that is based on previously defined address string
sizes, and document the way this maximum is calculated.  Also provide
a separate macro for the size of the port number extension.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/msg_prot.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 70df4f1d8847..77e624883393 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -189,7 +189,22 @@ typedef __be32	rpc_fraghdr;
  * Additionally, the two alternative forms specified in Section 2.2 of
  * [RFC2373] are also acceptable.
  */
-#define RPCBIND_MAXUADDRLEN	(56u)
+
+#include <linux/inet.h>
+
+/* Maximum size of the port number part of a universal address */
+#define RPCBIND_MAXUADDRPLEN	sizeof(".255.255")
+
+/* Maximum size of an IPv4 universal address */
+#define RPCBIND_MAXUADDR4LEN	\
+		(INET_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN)
+
+/* Maximum size of an IPv6 universal address */
+#define RPCBIND_MAXUADDR6LEN	\
+		(INET6_ADDRSTRLEN + RPCBIND_MAXUADDRPLEN)
+
+/* Assume INET6_ADDRSTRLEN will always be larger than INET_ADDRSTRLEN... */
+#define RPCBIND_MAXUADDRLEN	RPCBIND_MAXUADDR6LEN
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_MSGPROT_H_ */
-- 
cgit v1.2.3


From a02d692611348f11ee1bc37431a883c3ff2de23e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Aug 2009 15:09:34 -0400
Subject: SUNRPC: Provide functions for managing universal addresses

Introduce a set of functions in the kernel's RPC implementation for
converting between a socket address and either a standard
presentation address string or an RPC universal address.

The universal address functions will be used to encode and decode
RPCB_FOO and NFSv4 SETCLIENTID arguments.  The other functions are
part of a previous promise to deliver shared functions that can be
used by upper-layer protocols to display and manipulate IP
addresses.

The kernel's current address printf formatters were designed
specifically for kernel to user-space APIs that require a particular
string format for socket addresses, thus are somewhat limited for the
purposes of sunrpc.ko.  The formatter for IPv6 addresses, %pI6, does
not support short-handing or scope IDs.  Also, these printf formatters
are unique per address family, so a separate formatter string is
required for printing AF_INET and AF_INET6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h           |   2 -
 include/linux/sunrpc/clnt.h |  38 +++++
 net/sunrpc/Makefile         |   2 +-
 net/sunrpc/addr.c           | 364 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 403 insertions(+), 3 deletions(-)
 create mode 100644 net/sunrpc/addr.c

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 8d2b71d57d29..ff68397f9b19 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -370,8 +370,6 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len)
 		PAGE_SIZE - 1) >> PAGE_SHIFT;
 }
 
-#define IPV6_SCOPE_DELIMITER	'%'
-
 /*
  * Set the port number in an address.  Be agnostic about the address
  * family.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 37881f1a0bd7..2636e8ad551c 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -9,6 +9,10 @@
 #ifndef _LINUX_SUNRPC_CLNT_H
 #define _LINUX_SUNRPC_CLNT_H
 
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xprt.h>
@@ -151,5 +155,39 @@ void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
+size_t		rpc_ntop(const struct sockaddr *, char *, const size_t);
+size_t		rpc_pton(const char *, const size_t,
+			 struct sockaddr *, const size_t);
+char *		rpc_sockaddr2uaddr(const struct sockaddr *);
+size_t		rpc_uaddr2sockaddr(const char *, const size_t,
+				   struct sockaddr *, const size_t);
+
+static inline unsigned short rpc_get_port(const struct sockaddr *sap)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		return ntohs(((struct sockaddr_in *)sap)->sin_port);
+	case AF_INET6:
+		return ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+	}
+	return 0;
+}
+
+static inline void rpc_set_port(struct sockaddr *sap,
+				const unsigned short port)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)sap)->sin_port = htons(port);
+		break;
+	case AF_INET6:
+		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
+		break;
+	}
+}
+
+#define IPV6_SCOPE_DELIMITER		'%'
+#define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index db73fd2a3f0e..9d2fca5ad14a 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
 sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    auth.o auth_null.o auth_unix.o auth_generic.o \
 	    svc.o svcsock.o svcauth.o svcauth_unix.o \
-	    rpcb_clnt.o timer.o xdr.o \
+	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
 	    svc_xprt.o
 sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
new file mode 100644
index 000000000000..22e8fd89477f
--- /dev/null
+++ b/net/sunrpc/addr.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright 2009, Oracle.  All rights reserved.
+ *
+ * Convert socket addresses to presentation addresses and universal
+ * addresses, and vice versa.
+ *
+ * Universal addresses are introduced by RFC 1833 and further refined by
+ * recent RFCs describing NFSv4.  The universal address format is part
+ * of the external (network) interface provided by rpcbind version 3
+ * and 4, and by NFSv4.  Such an address is a string containing a
+ * presentation format IP address followed by a port number in
+ * "hibyte.lobyte" format.
+ *
+ * IPv6 addresses can also include a scope ID, typically denoted by
+ * a '%' followed by a device name or a non-negative integer.  Refer to
+ * RFC 4291, Section 2.2 for details on IPv6 presentation formats.
+ */
+
+#include <net/ipv6.h>
+#include <linux/sunrpc/clnt.h>
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
+static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
+				  char *buf, const int buflen)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	const struct in6_addr *addr = &sin6->sin6_addr;
+
+	/*
+	 * RFC 4291, Section 2.2.2
+	 *
+	 * Shorthanded ANY address
+	 */
+	if (ipv6_addr_any(addr))
+		return snprintf(buf, buflen, "::");
+
+	/*
+	 * RFC 4291, Section 2.2.2
+	 *
+	 * Shorthanded loopback address
+	 */
+	if (ipv6_addr_loopback(addr))
+		return snprintf(buf, buflen, "::1");
+
+	/*
+	 * RFC 4291, Section 2.2.3
+	 *
+	 * Special presentation address format for mapped v4
+	 * addresses.
+	 */
+	if (ipv6_addr_v4mapped(addr))
+		return snprintf(buf, buflen, "::ffff:%pI4",
+					&addr->s6_addr32[3]);
+
+	/*
+	 * RFC 4291, Section 2.2.1
+	 *
+	 * To keep the result as short as possible, especially
+	 * since we don't shorthand, we don't want leading zeros
+	 * in each halfword, so avoid %pI6.
+	 */
+	return snprintf(buf, buflen, "%x:%x:%x:%x:%x:%x:%x:%x",
+		ntohs(addr->s6_addr16[0]), ntohs(addr->s6_addr16[1]),
+		ntohs(addr->s6_addr16[2]), ntohs(addr->s6_addr16[3]),
+		ntohs(addr->s6_addr16[4]), ntohs(addr->s6_addr16[5]),
+		ntohs(addr->s6_addr16[6]), ntohs(addr->s6_addr16[7]));
+}
+
+static size_t rpc_ntop6(const struct sockaddr *sap,
+			char *buf, const size_t buflen)
+{
+	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	char scopebuf[IPV6_SCOPE_ID_LEN];
+	size_t len;
+	int rc;
+
+	len = rpc_ntop6_noscopeid(sap, buf, buflen);
+	if (unlikely(len == 0))
+		return len;
+
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+		return len;
+
+	rc = snprintf(scopebuf, sizeof(scopebuf), "%c%u",
+			IPV6_SCOPE_DELIMITER, sin6->sin6_scope_id);
+	if (unlikely((size_t)rc > sizeof(scopebuf)))
+		return 0;
+
+	len += rc;
+	if (unlikely(len > buflen))
+		return 0;
+
+	strcat(buf, scopebuf);
+	return len;
+}
+
+#else	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+
+static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
+				  char *buf, const int buflen)
+{
+	return 0;
+}
+
+static size_t rpc_ntop6(const struct sockaddr *sap,
+			char *buf, const size_t buflen)
+{
+	return 0;
+}
+
+#endif	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+
+static int rpc_ntop4(const struct sockaddr *sap,
+		     char *buf, const size_t buflen)
+{
+	const struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+
+	return snprintf(buf, buflen, "%pI4", &sin->sin_addr);
+}
+
+/**
+ * rpc_ntop - construct a presentation address in @buf
+ * @sap: socket address
+ * @buf: construction area
+ * @buflen: size of @buf, in bytes
+ *
+ * Plants a %NUL-terminated string in @buf and returns the length
+ * of the string, excluding the %NUL.  Otherwise zero is returned.
+ */
+size_t rpc_ntop(const struct sockaddr *sap, char *buf, const size_t buflen)
+{
+	switch (sap->sa_family) {
+	case AF_INET:
+		return rpc_ntop4(sap, buf, buflen);
+	case AF_INET6:
+		return rpc_ntop6(sap, buf, buflen);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_ntop);
+
+static size_t rpc_pton4(const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+	u8 *addr = (u8 *)&sin->sin_addr.s_addr;
+
+	if (buflen > INET_ADDRSTRLEN || salen < sizeof(struct sockaddr_in))
+		return 0;
+
+	memset(sap, 0, sizeof(struct sockaddr_in));
+
+	if (in4_pton(buf, buflen, addr, '\0', NULL) == 0)
+		return 0;
+
+	sin->sin_family = AF_INET;
+	return sizeof(struct sockaddr_in);;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int rpc_parse_scope_id(const char *buf, const size_t buflen,
+			      const char *delim, struct sockaddr_in6 *sin6)
+{
+	char *p;
+	size_t len;
+
+	if ((buf + buflen) == delim)
+		return 1;
+
+	if (*delim != IPV6_SCOPE_DELIMITER)
+		return 0;
+
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
+	    !(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_SITELOCAL))
+		return 0;
+
+	len = (buf + buflen) - delim - 1;
+	p = kstrndup(delim + 1, len, GFP_KERNEL);
+	if (p) {
+		unsigned long scope_id = 0;
+		struct net_device *dev;
+
+		dev = dev_get_by_name(&init_net, p);
+		if (dev != NULL) {
+			scope_id = dev->ifindex;
+			dev_put(dev);
+		} else {
+			if (strict_strtoul(p, 10, &scope_id) == 0) {
+				kfree(p);
+				return 0;
+			}
+		}
+
+		kfree(p);
+
+		sin6->sin6_scope_id = scope_id;
+		return 1;
+	}
+
+	return 0;
+}
+
+static size_t rpc_pton6(const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+	u8 *addr = (u8 *)&sin6->sin6_addr.in6_u;
+	const char *delim;
+
+	if (buflen > (INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN) ||
+	    salen < sizeof(struct sockaddr_in6))
+		return 0;
+
+	memset(sap, 0, sizeof(struct sockaddr_in6));
+
+	if (in6_pton(buf, buflen, addr, IPV6_SCOPE_DELIMITER, &delim) == 0)
+		return 0;
+
+	if (!rpc_parse_scope_id(buf, buflen, delim, sin6))
+		return 0;
+
+	sin6->sin6_family = AF_INET6;
+	return sizeof(struct sockaddr_in6);
+}
+#else
+static size_t rpc_pton6(const char *buf, const size_t buflen,
+			struct sockaddr *sap, const size_t salen)
+{
+	return 0;
+}
+#endif
+
+/**
+ * rpc_pton - Construct a sockaddr in @sap
+ * @buf: C string containing presentation format IP address
+ * @buflen: length of presentation address in bytes
+ * @sap: buffer into which to plant socket address
+ * @salen: size of buffer in bytes
+ *
+ * Returns the size of the socket address if successful; otherwise
+ * zero is returned.
+ *
+ * Plants a socket address in @sap and returns the size of the
+ * socket address, if successful.  Returns zero if an error
+ * occurred.
+ */
+size_t rpc_pton(const char *buf, const size_t buflen,
+		struct sockaddr *sap, const size_t salen)
+{
+	unsigned int i;
+
+	for (i = 0; i < buflen; i++)
+		if (buf[i] == ':')
+			return rpc_pton6(buf, buflen, sap, salen);
+	return rpc_pton4(buf, buflen, sap, salen);
+}
+EXPORT_SYMBOL_GPL(rpc_pton);
+
+/**
+ * rpc_sockaddr2uaddr - Construct a universal address string from @sap.
+ * @sap: socket address
+ *
+ * Returns a %NUL-terminated string in dynamically allocated memory;
+ * otherwise NULL is returned if an error occurred.  Caller must
+ * free the returned string.
+ */
+char *rpc_sockaddr2uaddr(const struct sockaddr *sap)
+{
+	char portbuf[RPCBIND_MAXUADDRPLEN];
+	char addrbuf[RPCBIND_MAXUADDRLEN];
+	unsigned short port;
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		if (rpc_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
+			return NULL;
+		port = ntohs(((struct sockaddr_in *)sap)->sin_port);
+		break;
+	case AF_INET6:
+		if (rpc_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
+			return NULL;
+		port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
+		break;
+	default:
+		return NULL;
+	}
+
+	if (snprintf(portbuf, sizeof(portbuf),
+		     ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
+		return NULL;
+
+	if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
+		return NULL;
+
+	return kstrdup(addrbuf, GFP_KERNEL);
+}
+EXPORT_SYMBOL_GPL(rpc_sockaddr2uaddr);
+
+/**
+ * rpc_uaddr2sockaddr - convert a universal address to a socket address.
+ * @uaddr: C string containing universal address to convert
+ * @uaddr_len: length of universal address string
+ * @sap: buffer into which to plant socket address
+ * @salen: size of buffer
+ *
+ * Returns the size of the socket address if successful; otherwise
+ * zero is returned.
+ */
+size_t rpc_uaddr2sockaddr(const char *uaddr, const size_t uaddr_len,
+			  struct sockaddr *sap, const size_t salen)
+{
+	char *c, buf[RPCBIND_MAXUADDRLEN];
+	unsigned long portlo, porthi;
+	unsigned short port;
+
+	if (uaddr_len > sizeof(buf))
+		return 0;
+
+	memcpy(buf, uaddr, uaddr_len);
+
+	buf[uaddr_len] = '\n';
+	buf[uaddr_len + 1] = '\0';
+
+	c = strrchr(buf, '.');
+	if (unlikely(c == NULL))
+		return 0;
+	if (unlikely(strict_strtoul(c + 1, 10, &portlo) != 0))
+		return 0;
+	if (unlikely(portlo > 255))
+		return 0;
+
+	c[0] = '\n';
+	c[1] = '\0';
+
+	c = strrchr(buf, '.');
+	if (unlikely(c == NULL))
+		return 0;
+	if (unlikely(strict_strtoul(c + 1, 10, &porthi) != 0))
+		return 0;
+	if (unlikely(porthi > 255))
+		return 0;
+
+	port = (unsigned short)((porthi << 8) | portlo);
+
+	c[0] = '\0';
+
+	if (rpc_pton(buf, strlen(buf), sap, salen) == 0)
+		return 0;
+
+	switch (sap->sa_family) {
+	case AF_INET:
+		((struct sockaddr_in *)sap)->sin_port = htons(port);
+		return sizeof(struct sockaddr_in);
+	case AF_INET6:
+		((struct sockaddr_in6 *)sap)->sin6_port = htons(port);
+		return sizeof(struct sockaddr_in6);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpc_uaddr2sockaddr);
-- 
cgit v1.2.3


From ba809130bc260fce04141aca01ef9e068d32af2a Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Aug 2009 15:09:35 -0400
Subject: SUNRPC: Remove duplicate universal address generation

RPC universal address generation is currently done in several places:
rpcb_clnt.c, nfs4proc.c xprtsock.c, and xprtrdma.c.  Remove the
redundant cases that convert a socket address to a universal
address.  The nfs4proc.c case takes a pre-formatted presentation
address string, not a socket address, so we'll leave that one.

Because the new uaddr constructor uses the recently introduced
rpc_ntop(), it now supports proper "::" shorthanding for IPv6
addresses.  This allows the kernel to register properly formed
universal addresses with the local rpcbind service, in _all_ cases.

The kernel can now also send properly formed universal addresses in
RPCB_GETADDR requests, and support link-local properly when
encoding and decoding IPv6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     |  1 -
 net/sunrpc/rpcb_clnt.c          | 48 +++++++++++++++++++++++++----------------
 net/sunrpc/xprtrdma/transport.c |  8 -------
 net/sunrpc/xprtsock.c           | 18 ----------------
 4 files changed, 29 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 1175d58efc2e..65fad9534d93 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -41,7 +41,6 @@ enum rpc_display_format_t {
 	RPC_DISPLAY_ALL,
 	RPC_DISPLAY_HEX_ADDR,
 	RPC_DISPLAY_HEX_PORT,
-	RPC_DISPLAY_UNIVERSAL_ADDR,
 	RPC_DISPLAY_NETID,
 	RPC_DISPLAY_MAX,
 };
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index ad1d7315c498..1fb1c070c19d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -153,6 +153,7 @@ static void rpcb_map_release(void *data)
 
 	rpcb_wake_rpcbind_waiters(map->r_xprt, map->r_status);
 	xprt_put(map->r_xprt);
+	kfree(map->r_addr);
 	kfree(map);
 }
 
@@ -299,12 +300,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap,
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin->sin_port);
-	char buf[32];
+	int result;
 
-	/* Construct AF_INET universal address */
-	snprintf(buf, sizeof(buf), "%pI4.%u.%u",
-		 &sin->sin_addr.s_addr, port >> 8, port & 0xff);
-	map->r_addr = buf;
+	map->r_addr = rpc_sockaddr2uaddr(sap);
 
 	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
 		"local rpcbind\n", (port ? "" : "un"),
@@ -315,7 +313,9 @@ static int rpcb_register_inet4(const struct sockaddr *sap,
 	if (port)
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-	return rpcb_register_call(RPCBVERS_4, msg);
+	result = rpcb_register_call(RPCBVERS_4, msg);
+	kfree(map->r_addr);
+	return result;
 }
 
 /*
@@ -327,16 +327,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap,
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin6->sin6_port);
-	char buf[64];
+	int result;
 
-	/* Construct AF_INET6 universal address */
-	if (ipv6_addr_any(&sin6->sin6_addr))
-		snprintf(buf, sizeof(buf), "::.%u.%u",
-				port >> 8, port & 0xff);
-	else
-		snprintf(buf, sizeof(buf), "%pI6.%u.%u",
-			 &sin6->sin6_addr, port >> 8, port & 0xff);
-	map->r_addr = buf;
+	map->r_addr = rpc_sockaddr2uaddr(sap);
 
 	dprintk("RPC:       %sregistering [%u, %u, %s, '%s'] with "
 		"local rpcbind\n", (port ? "" : "un"),
@@ -347,7 +340,9 @@ static int rpcb_register_inet6(const struct sockaddr *sap,
 	if (port)
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
 
-	return rpcb_register_call(RPCBVERS_4, msg);
+	result = rpcb_register_call(RPCBVERS_4, msg);
+	kfree(map->r_addr);
+	return result;
 }
 
 static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
@@ -570,6 +565,7 @@ void rpcb_getport_async(struct rpc_task *task)
 		goto bailout_nofree;
 	}
 
+	/* Parent transport's destination address */
 	salen = rpc_peeraddr(clnt, sap, sizeof(addr));
 
 	/* Don't ever use rpcbind v2 for AF_INET6 requests */
@@ -620,11 +616,22 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_prot = xprt->prot;
 	map->r_port = 0;
 	map->r_xprt = xprt_get(xprt);
-	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
-	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
-	map->r_owner = "";
 	map->r_status = -EIO;
 
+	switch (bind_version) {
+	case RPCBVERS_4:
+	case RPCBVERS_3:
+		map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+		map->r_addr = rpc_sockaddr2uaddr(sap);
+		map->r_owner = "";
+		break;
+	case RPCBVERS_2:
+		map->r_addr = NULL;
+		break;
+	default:
+		BUG();
+	}
+
 	child = rpcb_call_async(rpcb_clnt, map, proc);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
@@ -722,6 +729,9 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p,
 static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p,
 			       struct rpcbind_args *rpcb)
 {
+	if (rpcb->r_addr == NULL)
+		return -EIO;
+
 	dprintk("RPC:       encoding rpcb request (%u, %u, %s)\n",
 			rpcb->r_prog, rpcb->r_vers, rpcb->r_addr);
 	*p++ = htonl(rpcb->r_prog);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1dd6123070e9..537c210a8b92 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -202,14 +202,6 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 		snprintf(buf, 8, "%4hx", ntohs(addr->sin_port));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
 
-	buf = kzalloc(30, GFP_KERNEL);
-	if (buf)
-		snprintf(buf, 30, "%pI4.%u.%u",
-			&addr->sin_addr.s_addr,
-			ntohs(addr->sin_port) >> 8,
-			ntohs(addr->sin_port) & 0xff);
-	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
-
 	/* netid */
 	xprt->address_strings[RPC_DISPLAY_NETID] = "rdma";
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 83c73c4d017a..a42c2adda59f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -341,15 +341,6 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
 	}
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
 
-	buf = kzalloc(30, GFP_KERNEL);
-	if (buf) {
-		snprintf(buf, 30, "%pI4.%u.%u",
-				&addr->sin_addr.s_addr,
-				ntohs(addr->sin_port) >> 8,
-				ntohs(addr->sin_port) & 0xff);
-	}
-	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
-
 	xprt->address_strings[RPC_DISPLAY_NETID] = netid;
 }
 
@@ -397,15 +388,6 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
 	}
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf;
 
-	buf = kzalloc(50, GFP_KERNEL);
-	if (buf) {
-		snprintf(buf, 50, "%pI6.%u.%u",
-			 &addr->sin6_addr,
-			 ntohs(addr->sin6_port) >> 8,
-			 ntohs(addr->sin6_port) & 0xff);
-	}
-	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
-
 	xprt->address_strings[RPC_DISPLAY_NETID] = netid;
 }
 
-- 
cgit v1.2.3


From c740eff84bcfd63c0497ef880e80171931cb8222 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sun, 9 Aug 2009 15:09:46 -0400
Subject: SUNRPC: Kill RPC_DISPLAY_ALL

At some point, I recall that rpc_pipe_fs used RPC_DISPLAY_ALL.
Currently there are no uses of RPC_DISPLAY_ALL outside the transport
modules themselves, so we can safely get rid of it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     |  1 -
 net/sunrpc/xprtrdma/transport.c |  5 -----
 net/sunrpc/xprtsock.c           | 50 ++++++++++++++++++++++++++++-------------
 3 files changed, 34 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 65fad9534d93..c090df442572 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -38,7 +38,6 @@ enum rpc_display_format_t {
 	RPC_DISPLAY_ADDR = 0,
 	RPC_DISPLAY_PORT,
 	RPC_DISPLAY_PROTO,
-	RPC_DISPLAY_ALL,
 	RPC_DISPLAY_HEX_ADDR,
 	RPC_DISPLAY_HEX_PORT,
 	RPC_DISPLAY_NETID,
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5f9b8676b6bd..9a63f669ece4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -181,11 +181,6 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 
 	xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma";
 
-	(void)snprintf(buf, sizeof(buf), "addr=%s port=%s proto=rdma",
-			xprt->address_strings[RPC_DISPLAY_ADDR],
-			xprt->address_strings[RPC_DISPLAY_PORT]);
-	xprt->address_strings[RPC_DISPLAY_ALL] = kstrdup(buf, GFP_KERNEL);
-
 	(void)snprintf(buf, sizeof(buf), "%02x%02x%02x%02x",
 				NIPQUAD(sin->sin_addr.s_addr));
 	xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7bc3c178ccb3..eee5ac96e177 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -307,12 +307,6 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
 	(void)snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL);
 
-	(void)snprintf(buf, sizeof(buf), "addr=%s port=%s proto=%s",
-			xprt->address_strings[RPC_DISPLAY_ADDR],
-			xprt->address_strings[RPC_DISPLAY_PORT],
-			xprt->address_strings[RPC_DISPLAY_PROTO]);
-	xprt->address_strings[RPC_DISPLAY_ALL] = kstrdup(buf, GFP_KERNEL);
-
 	(void)snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap));
 	xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL);
 }
@@ -1721,8 +1715,11 @@ static void xs_udp_connect_worker4(struct work_struct *work)
 		goto out;
 	}
 
-	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+	dprintk("RPC:       worker connecting xprt %p via %s to "
+				"%s (port %s)\n", xprt,
+			xprt->address_strings[RPC_DISPLAY_PROTO],
+			xprt->address_strings[RPC_DISPLAY_ADDR],
+			xprt->address_strings[RPC_DISPLAY_PORT]);
 
 	xs_udp_finish_connecting(xprt, sock);
 	status = 0;
@@ -1763,8 +1760,11 @@ static void xs_udp_connect_worker6(struct work_struct *work)
 		goto out;
 	}
 
-	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+	dprintk("RPC:       worker connecting xprt %p via %s to "
+				"%s (port %s)\n", xprt,
+			xprt->address_strings[RPC_DISPLAY_PROTO],
+			xprt->address_strings[RPC_DISPLAY_ADDR],
+			xprt->address_strings[RPC_DISPLAY_PORT]);
 
 	xs_udp_finish_connecting(xprt, sock);
 	status = 0;
@@ -1889,8 +1889,11 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
 			goto out_eagain;
 	}
 
-	dprintk("RPC:       worker connecting xprt %p to address: %s\n",
-			xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
+	dprintk("RPC:       worker connecting xprt %p via %s to "
+				"%s (port %s)\n", xprt,
+			xprt->address_strings[RPC_DISPLAY_PROTO],
+			xprt->address_strings[RPC_DISPLAY_ADDR],
+			xprt->address_strings[RPC_DISPLAY_PORT]);
 
 	status = xs_tcp_finish_connecting(xprt, sock);
 	dprintk("RPC:       %p connect status %d connected %d sock state %d\n",
@@ -2228,8 +2231,15 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
 
-	dprintk("RPC:       set up transport to address %s\n",
-			xprt->address_strings[RPC_DISPLAY_ALL]);
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PORT],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
@@ -2293,8 +2303,16 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
 
-	dprintk("RPC:       set up transport to address %s\n",
-			xprt->address_strings[RPC_DISPLAY_ALL]);
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PORT],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+
 
 	if (try_module_get(THIS_MODULE))
 		return xprt;
-- 
cgit v1.2.3


From b693ba4a338da15db1db4b5ebaa36e4ab9781c82 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:15 -0400
Subject: SUNRPC: Constify rpc_pipe_ops...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c                     | 2 +-
 include/linux/sunrpc/rpc_pipe_fs.h | 5 +++--
 net/sunrpc/auth_gss/auth_gss.c     | 8 ++++----
 net/sunrpc/rpc_pipe.c              | 7 ++++---
 4 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 86147b0ab2cf..fae0d3e52b44 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -101,7 +101,7 @@ static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
 
 static unsigned int fnvhash32(const void *, size_t);
 
-static struct rpc_pipe_ops idmap_upcall_ops = {
+static const struct rpc_pipe_ops idmap_upcall_ops = {
 	.upcall		= idmap_pipe_upcall,
 	.downcall	= idmap_pipe_downcall,
 	.destroy_msg	= idmap_pipe_destroy_msg,
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index cea764c2359f..91f5b13389c5 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -32,8 +32,8 @@ struct rpc_inode {
 	wait_queue_head_t waitq;
 #define RPC_PIPE_WAIT_FOR_OPEN	1
 	int flags;
-	struct rpc_pipe_ops *ops;
 	struct delayed_work queue_timeout;
+	const struct rpc_pipe_ops *ops;
 };
 
 static inline struct rpc_inode *
@@ -46,7 +46,8 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
 extern int rpc_rmdir(struct dentry *);
-extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *, struct rpc_pipe_ops *, int flags);
+extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *,
+				 const struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
 extern struct vfsmount *rpc_get_mount(void);
 extern void rpc_put_mount(void);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 66d458fc6920..23eb3864ffc0 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -89,8 +89,8 @@ static struct rpc_wait_queue pipe_version_rpc_waitqueue;
 static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
 
 static void gss_free_ctx(struct gss_cl_ctx *);
-static struct rpc_pipe_ops gss_upcall_ops_v0;
-static struct rpc_pipe_ops gss_upcall_ops_v1;
+static const struct rpc_pipe_ops gss_upcall_ops_v0;
+static const struct rpc_pipe_ops gss_upcall_ops_v1;
 
 static inline struct gss_cl_ctx *
 gss_get_ctx(struct gss_cl_ctx *ctx)
@@ -1507,7 +1507,7 @@ static const struct rpc_credops gss_nullops = {
 	.crunwrap_resp	= gss_unwrap_resp,
 };
 
-static struct rpc_pipe_ops gss_upcall_ops_v0 = {
+static const struct rpc_pipe_ops gss_upcall_ops_v0 = {
 	.upcall		= gss_pipe_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
@@ -1515,7 +1515,7 @@ static struct rpc_pipe_ops gss_upcall_ops_v0 = {
 	.release_pipe	= gss_pipe_release,
 };
 
-static struct rpc_pipe_ops gss_upcall_ops_v1 = {
+static const struct rpc_pipe_ops gss_upcall_ops_v1 = {
 	.upcall		= gss_pipe_upcall,
 	.downcall	= gss_pipe_downcall,
 	.destroy_msg	= gss_pipe_destroy_msg,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 9ced0628d69c..f6f60f625e3f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -125,7 +125,7 @@ static void
 rpc_close_pipes(struct inode *inode)
 {
 	struct rpc_inode *rpci = RPC_I(inode);
-	struct rpc_pipe_ops *ops;
+	const struct rpc_pipe_ops *ops;
 	int need_release;
 
 	mutex_lock(&inode->i_mutex);
@@ -776,8 +776,9 @@ rpc_rmdir(struct dentry *dentry)
  * The @private argument passed here will be available to all these methods
  * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
  */
-struct dentry *
-rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
+struct dentry *rpc_mkpipe(struct dentry *parent, const char *name,
+			  void *private, const struct rpc_pipe_ops *ops,
+			  int flags)
 {
 	struct dentry *dentry;
 	struct inode *dir, *inode;
-- 
cgit v1.2.3


From 458adb8ba9b26bfc66593866013adbb62a1a3d2e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:22 -0400
Subject: SUNRPC: Rename rpc_mkdir to rpc_create_client_dir()

This reflects the fact that rpc_mkdir() as it stands today, can only create
a RPC client type directory.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h |  5 +++--
 net/sunrpc/clnt.c                  |  6 +++---
 net/sunrpc/rpc_pipe.c              | 11 +++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 91f5b13389c5..8de0ac276499 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -44,8 +44,9 @@ RPC_I(struct inode *inode)
 
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
-extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *);
-extern int rpc_rmdir(struct dentry *);
+struct rpc_clnt;
+extern struct dentry *rpc_create_client_dir(const char *, struct rpc_clnt *);
+extern int rpc_remove_client_dir(struct dentry *);
 extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *,
 				 const struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ebfcf9b89909..6ec37701a165 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -113,7 +113,7 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 				"%s/clnt%x", dir_name,
 				(unsigned int)clntid++);
 		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
-		clnt->cl_dentry = rpc_mkdir(clnt->cl_pathname, clnt);
+		clnt->cl_dentry = rpc_create_client_dir(clnt->cl_pathname, clnt);
 		if (!IS_ERR(clnt->cl_dentry))
 			return 0;
 		error = PTR_ERR(clnt->cl_dentry);
@@ -232,7 +232,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 
 out_no_auth:
 	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_rmdir(clnt->cl_dentry);
+		rpc_remove_client_dir(clnt->cl_dentry);
 		rpc_put_mount();
 	}
 out_no_path:
@@ -424,7 +424,7 @@ rpc_free_client(struct kref *kref)
 	dprintk("RPC:       destroying %s client for %s\n",
 			clnt->cl_protname, clnt->cl_server);
 	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_rmdir(clnt->cl_dentry);
+		rpc_remove_client_dir(clnt->cl_dentry);
 		rpc_put_mount();
 	}
 	if (clnt->cl_parent != clnt) {
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 2a4e6eb0a3e9..08580bedc25f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -755,7 +755,7 @@ out_bad:
 }
 
 /**
- * rpc_mkdir - Create a new directory in rpc_pipefs
+ * rpc_create_client_dir - Create a new rpc_client directory in rpc_pipefs
  * @path: path from the rpc_pipefs root to the new directory
  * @rpc_client: rpc client to associate with this directory
  *
@@ -764,8 +764,8 @@ out_bad:
  * information about the client, together with any "pipes" that may
  * later be created using rpc_mkpipe().
  */
-struct dentry *
-rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
+struct dentry *rpc_create_client_dir(const char *path,
+				     struct rpc_clnt *rpc_client)
 {
 	struct nameidata nd;
 	struct dentry *dentry;
@@ -797,11 +797,10 @@ out_err:
 }
 
 /**
- * rpc_rmdir - Remove a directory created with rpc_mkdir()
+ * rpc_remove_client_dir - Remove a directory created with rpc_create_client_dir()
  * @dentry: directory to remove
  */
-int
-rpc_rmdir(struct dentry *dentry)
+int rpc_remove_client_dir(struct dentry *dentry)
 {
 	struct dentry *parent;
 	struct inode *dir;
-- 
cgit v1.2.3


From 7d217caca5d704e48aa5e59aba0b3ad4c7af4fd2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:24 -0400
Subject: SUNRPC: Replace rpc_client->cl_dentry and cl_mnt, with a cl_path

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c                 |  4 ++--
 include/linux/sunrpc/clnt.h    |  4 ++--
 net/sunrpc/auth_gss/auth_gss.c |  4 ++--
 net/sunrpc/clnt.c              | 24 ++++++++++++------------
 4 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index fae0d3e52b44..21a84d45916f 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -119,8 +119,8 @@ nfs_idmap_new(struct nfs_client *clp)
 	if (idmap == NULL)
 		return -ENOMEM;
 
-	idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
-					 idmap, &idmap_upcall_ops, 0);
+	idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_path.dentry,
+			"idmap", idmap, &idmap_upcall_ops, 0);
 	if (IS_ERR(idmap->idmap_dentry)) {
 		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 37881f1a0bd7..38ad162330a2 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -17,6 +17,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
+#include <linux/path.h>
 
 struct rpc_inode;
 
@@ -51,8 +52,7 @@ struct rpc_clnt {
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
 	char			cl_pathname[30];/* Path in rpc_pipe_fs */
-	struct vfsmount *	cl_vfsmnt;
-	struct dentry *		cl_dentry;	/* inode */
+	struct path		cl_path;
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
 	struct rpc_timeout	cl_timeout_default;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 23eb3864ffc0..fc6a43ccd950 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -777,7 +777,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 	 * that we supported only the old pipe.  So we instead create
 	 * the new pipe first.
 	 */
-	gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_dentry,
+	gss_auth->dentry[1] = rpc_mkpipe(clnt->cl_path.dentry,
 					 "gssd",
 					 clnt, &gss_upcall_ops_v1,
 					 RPC_PIPE_WAIT_FOR_OPEN);
@@ -786,7 +786,7 @@ gss_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
 		goto err_put_mech;
 	}
 
-	gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_dentry,
+	gss_auth->dentry[0] = rpc_mkpipe(clnt->cl_path.dentry,
 					 gss_auth->mech->gm_name,
 					 clnt, &gss_upcall_ops_v0,
 					 RPC_PIPE_WAIT_FOR_OPEN);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6ec37701a165..b3f863346300 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -99,24 +99,24 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 	static uint32_t clntid;
 	int error;
 
-	clnt->cl_vfsmnt = ERR_PTR(-ENOENT);
-	clnt->cl_dentry = ERR_PTR(-ENOENT);
+	clnt->cl_path.mnt = ERR_PTR(-ENOENT);
+	clnt->cl_path.dentry = ERR_PTR(-ENOENT);
 	if (dir_name == NULL)
 		return 0;
 
-	clnt->cl_vfsmnt = rpc_get_mount();
-	if (IS_ERR(clnt->cl_vfsmnt))
-		return PTR_ERR(clnt->cl_vfsmnt);
+	clnt->cl_path.mnt = rpc_get_mount();
+	if (IS_ERR(clnt->cl_path.mnt))
+		return PTR_ERR(clnt->cl_path.mnt);
 
 	for (;;) {
 		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
 				"%s/clnt%x", dir_name,
 				(unsigned int)clntid++);
 		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
-		clnt->cl_dentry = rpc_create_client_dir(clnt->cl_pathname, clnt);
-		if (!IS_ERR(clnt->cl_dentry))
+		clnt->cl_path.dentry = rpc_create_client_dir(clnt->cl_pathname, clnt);
+		if (!IS_ERR(clnt->cl_path.dentry))
 			return 0;
-		error = PTR_ERR(clnt->cl_dentry);
+		error = PTR_ERR(clnt->cl_path.dentry);
 		if (error != -EEXIST) {
 			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
 					clnt->cl_pathname, error);
@@ -231,8 +231,8 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	return clnt;
 
 out_no_auth:
-	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_remove_client_dir(clnt->cl_dentry);
+	if (!IS_ERR(clnt->cl_path.dentry)) {
+		rpc_remove_client_dir(clnt->cl_path.dentry);
 		rpc_put_mount();
 	}
 out_no_path:
@@ -423,8 +423,8 @@ rpc_free_client(struct kref *kref)
 
 	dprintk("RPC:       destroying %s client for %s\n",
 			clnt->cl_protname, clnt->cl_server);
-	if (!IS_ERR(clnt->cl_dentry)) {
-		rpc_remove_client_dir(clnt->cl_dentry);
+	if (!IS_ERR(clnt->cl_path.dentry)) {
+		rpc_remove_client_dir(clnt->cl_path.dentry);
 		rpc_put_mount();
 	}
 	if (clnt->cl_parent != clnt) {
-- 
cgit v1.2.3


From 23ac6581702ac6d029643328a7e6ea3baf834c5e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:25 -0400
Subject: SUNRPC: clean up rpc_setup_pipedir()

There is still a little wart or two there: Since we've already got a
vfsmount, we might as well pass that in to rpc_create_client_dir.
Another point is that if we open code __rpc_lookup_path() here, then we can
avoid looking up the entire parent directory path over and over again: it
doesn't change.

Also get rid of rpc_clnt->cl_pathname, since it has no users...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h        |  1 -
 include/linux/sunrpc/rpc_pipe_fs.h |  2 +-
 net/sunrpc/clnt.c                  | 48 +++++++++++++++++++++----------
 net/sunrpc/rpc_pipe.c              | 58 ++------------------------------------
 4 files changed, 37 insertions(+), 72 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 38ad162330a2..1848d44922ef 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -51,7 +51,6 @@ struct rpc_clnt {
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
-	char			cl_pathname[30];/* Path in rpc_pipe_fs */
 	struct path		cl_path;
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 8de0ac276499..88332ef1e959 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -45,7 +45,7 @@ RPC_I(struct inode *inode)
 extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 
 struct rpc_clnt;
-extern struct dentry *rpc_create_client_dir(const char *, struct rpc_clnt *);
+extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *);
 extern int rpc_remove_client_dir(struct dentry *);
 extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *,
 				 const struct rpc_pipe_ops *, int flags);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b3f863346300..c1e467e1b07d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -27,6 +27,8 @@
 #include <linux/types.h>
 #include <linux/kallsyms.h>
 #include <linux/mm.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
 #include <linux/slab.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
@@ -97,6 +99,12 @@ static int
 rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 {
 	static uint32_t clntid;
+	struct nameidata nd;
+	struct path path;
+	char name[15];
+	struct qstr q = {
+		.name = name,
+	};
 	int error;
 
 	clnt->cl_path.mnt = ERR_PTR(-ENOENT);
@@ -104,26 +112,36 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 	if (dir_name == NULL)
 		return 0;
 
-	clnt->cl_path.mnt = rpc_get_mount();
-	if (IS_ERR(clnt->cl_path.mnt))
-		return PTR_ERR(clnt->cl_path.mnt);
+	path.mnt = rpc_get_mount();
+	if (IS_ERR(path.mnt))
+		return PTR_ERR(path.mnt);
+	error = vfs_path_lookup(path.mnt->mnt_root, path.mnt, dir_name, 0, &nd);
+	if (error)
+		goto err;
 
 	for (;;) {
-		snprintf(clnt->cl_pathname, sizeof(clnt->cl_pathname),
-				"%s/clnt%x", dir_name,
-				(unsigned int)clntid++);
-		clnt->cl_pathname[sizeof(clnt->cl_pathname) - 1] = '\0';
-		clnt->cl_path.dentry = rpc_create_client_dir(clnt->cl_pathname, clnt);
-		if (!IS_ERR(clnt->cl_path.dentry))
-			return 0;
-		error = PTR_ERR(clnt->cl_path.dentry);
+		q.len = snprintf(name, sizeof(name), "clnt%x", (unsigned int)clntid++);
+		name[sizeof(name) - 1] = '\0';
+		q.hash = full_name_hash(q.name, q.len);
+		path.dentry = rpc_create_client_dir(nd.path.dentry, &q, clnt);
+		if (!IS_ERR(path.dentry))
+			break;
+		error = PTR_ERR(path.dentry);
 		if (error != -EEXIST) {
-			printk(KERN_INFO "RPC: Couldn't create pipefs entry %s, error %d\n",
-					clnt->cl_pathname, error);
-			rpc_put_mount();
-			return error;
+			printk(KERN_INFO "RPC: Couldn't create pipefs entry"
+					" %s/%s, error %d\n",
+					dir_name, name, error);
+			goto err_path_put;
 		}
 	}
+	path_put(&nd.path);
+	clnt->cl_path = path;
+	return 0;
+err_path_put:
+	path_put(&nd.path);
+err:
+	rpc_put_mount();
+	return error;
 }
 
 static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 6d152f69587e..1613d858ba38 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -443,42 +443,6 @@ static const struct dentry_operations rpc_dentry_operations = {
 	.d_delete = rpc_delete_dentry,
 };
 
-static int __rpc_lookup_path(const char *pathname, unsigned flags,
-			     struct nameidata *nd)
-{
-	struct vfsmount *mnt;
-
-	if (pathname[0] == '\0')
-		return -ENOENT;
-
-	mnt = rpc_get_mount();
-	if (IS_ERR(mnt)) {
-		printk(KERN_WARNING "%s: %s failed to mount "
-			       "pseudofilesystem \n", __FILE__, __func__);
-		return PTR_ERR(mnt);
-	}
-
-	if (vfs_path_lookup(mnt->mnt_root, mnt, pathname, flags, nd)) {
-		printk(KERN_WARNING "%s: %s failed to find path %s\n",
-				__FILE__, __func__, pathname);
-		rpc_put_mount();
-		return -ENOENT;
-	}
-	return 0;
-}
-
-static int rpc_lookup_parent(const char *pathname, struct nameidata *nd)
-{
-	return __rpc_lookup_path(pathname, LOOKUP_PARENT, nd);
-}
-
-static void
-rpc_release_path(struct nameidata *nd)
-{
-	path_put(&nd->path);
-	rpc_put_mount();
-}
-
 static struct inode *
 rpc_get_inode(struct super_block *sb, umode_t mode)
 {
@@ -889,27 +853,11 @@ EXPORT_SYMBOL_GPL(rpc_unlink);
  * information about the client, together with any "pipes" that may
  * later be created using rpc_mkpipe().
  */
-struct dentry *rpc_create_client_dir(const char *path,
+struct dentry *rpc_create_client_dir(struct dentry *dentry,
+				     struct qstr *name,
 				     struct rpc_clnt *rpc_client)
 {
-	struct nameidata nd;
-	struct dentry *ret;
-	struct inode *dir;
-
-	ret = ERR_PTR(rpc_lookup_parent(path, &nd));
-	if (IS_ERR(ret))
-		goto out_err;
-	dir = nd.path.dentry->d_inode;
-
-	ret = rpc_mkdir_populate(nd.path.dentry, &nd.last,
-			S_IRUGO | S_IXUGO, rpc_client);
-	rpc_release_path(&nd);
-	if (!IS_ERR(ret))
-		return ret;
-out_err:
-	printk(KERN_WARNING "%s: %s() failed to create directory %s (errno = %ld)\n",
-			__FILE__, __func__, path, PTR_ERR(ret));
-	return ret;
+	return rpc_mkdir_populate(dentry, name, S_IRUGO | S_IXUGO, rpc_client);
 }
 
 /*
-- 
cgit v1.2.3


From 2da8ca26c6bfad685bfddf39728eac1c83906aa9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:26 -0400
Subject: NFSD: Clean up the idmapper warning...

What part of 'internal use' is so hard to understand?

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/nfs4idmap.c          | 4 ++--
 include/linux/sunrpc/cache.h | 3 ++-
 net/sunrpc/cache.c           | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5b398421b051..e9012ad36ac0 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -175,10 +175,10 @@ idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
 }
 
 static void
-warn_no_idmapd(struct cache_detail *detail)
+warn_no_idmapd(struct cache_detail *detail, int has_died)
 {
 	printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
-			detail->last_close? "died" : "not been started");
+			has_died ? "died" : "not been started");
 }
 
 
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 2d8b211b9324..3d1fad22185a 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -79,6 +79,8 @@ struct cache_detail {
 	int			(*cache_show)(struct seq_file *m,
 					      struct cache_detail *cd,
 					      struct cache_head *h);
+	void			(*warn_no_listener)(struct cache_detail *cd,
+					      int has_died);
 
 	struct cache_head *	(*alloc)(void);
 	int			(*match)(struct cache_head *orig, struct cache_head *new);
@@ -102,7 +104,6 @@ struct cache_detail {
 	atomic_t		readers;		/* how many time is /chennel open */
 	time_t			last_close;		/* if no readers, when did last close */
 	time_t			last_warn;		/* when we last warned about no readers */
-	void			(*warn_no_listener)(struct cache_detail *cd);
 };
 
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ff0c23053d2f..8ede4a6f384b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1020,7 +1020,7 @@ static void warn_no_listener(struct cache_detail *detail)
 	if (detail->last_warn != detail->last_close) {
 		detail->last_warn = detail->last_close;
 		if (detail->warn_no_listener)
-			detail->warn_no_listener(detail);
+			detail->warn_no_listener(detail, detail->last_close != 0);
 	}
 }
 
-- 
cgit v1.2.3


From bc74b4f5e63a09fb78e245794a0de1e5a2716bbe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:29 -0400
Subject: SUNRPC: Allow the cache_detail to specify alternative upcall
 mechanisms

For events that are rare, such as referral DNS lookups, it makes limited
sense to have a daemon constantly listening for upcalls on a channel. An
alternative in those cases might simply be to run the app that fills the
cache using call_usermodehelper_exec() and friends.

The following patch allows the cache_detail to specify alternative upcall
mechanisms for these particular cases.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfsd/export.c                  | 14 ++++++++++++--
 fs/nfsd/nfs4idmap.c               | 16 ++++++++++++++--
 include/linux/sunrpc/cache.h      | 13 ++++++++++---
 net/sunrpc/auth_gss/svcauth_gss.c |  7 ++++++-
 net/sunrpc/cache.c                | 26 ++++++++++++++++++--------
 net/sunrpc/svcauth_unix.c         | 14 ++++++++++++--
 6 files changed, 72 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b92a27629fb7..d9462643155c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -85,6 +85,11 @@ static void expkey_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
+static int expkey_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	return sunrpc_cache_pipe_upcall(cd, h, expkey_request);
+}
+
 static struct svc_expkey *svc_expkey_update(struct svc_expkey *new, struct svc_expkey *old);
 static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *);
 static struct cache_detail svc_expkey_cache;
@@ -259,7 +264,7 @@ static struct cache_detail svc_expkey_cache = {
 	.hash_table	= expkey_table,
 	.name		= "nfsd.fh",
 	.cache_put	= expkey_put,
-	.cache_request	= expkey_request,
+	.cache_upcall	= expkey_upcall,
 	.cache_parse	= expkey_parse,
 	.cache_show	= expkey_show,
 	.match		= expkey_match,
@@ -355,6 +360,11 @@ static void svc_export_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
+static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	return sunrpc_cache_pipe_upcall(cd, h, svc_export_request);
+}
+
 static struct svc_export *svc_export_update(struct svc_export *new,
 					    struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
@@ -724,7 +734,7 @@ struct cache_detail svc_export_cache = {
 	.hash_table	= export_table,
 	.name		= "nfsd.export",
 	.cache_put	= svc_export_put,
-	.cache_request	= svc_export_request,
+	.cache_upcall	= svc_export_upcall,
 	.cache_parse	= svc_export_parse,
 	.cache_show	= svc_export_show,
 	.match		= svc_export_match,
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index e9012ad36ac0..cdfa86fa1471 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -145,6 +145,12 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
+static int
+idtoname_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+	return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request);
+}
+
 static int
 idtoname_match(struct cache_head *ca, struct cache_head *cb)
 {
@@ -192,7 +198,7 @@ static struct cache_detail idtoname_cache = {
 	.hash_table	= idtoname_table,
 	.name		= "nfs4.idtoname",
 	.cache_put	= ent_put,
-	.cache_request	= idtoname_request,
+	.cache_upcall	= idtoname_upcall,
 	.cache_parse	= idtoname_parse,
 	.cache_show	= idtoname_show,
 	.warn_no_listener = warn_no_idmapd,
@@ -324,6 +330,12 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
 	(*bpp)[-1] = '\n';
 }
 
+static int
+nametoid_upcall(struct cache_detail *cd, struct cache_head *ch)
+{
+	return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request);
+}
+
 static int
 nametoid_match(struct cache_head *ca, struct cache_head *cb)
 {
@@ -363,7 +375,7 @@ static struct cache_detail nametoid_cache = {
 	.hash_table	= nametoid_table,
 	.name		= "nfs4.nametoid",
 	.cache_put	= ent_put,
-	.cache_request	= nametoid_request,
+	.cache_upcall	= nametoid_upcall,
 	.cache_parse	= nametoid_parse,
 	.cache_show	= nametoid_show,
 	.warn_no_listener = warn_no_idmapd,
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 3d1fad22185a..23ee25981a0c 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -70,9 +70,9 @@ struct cache_detail {
 	char			*name;
 	void			(*cache_put)(struct kref *);
 
-	void			(*cache_request)(struct cache_detail *cd,
-						 struct cache_head *h,
-						 char **bpp, int *blen);
+	int			(*cache_upcall)(struct cache_detail *,
+						struct cache_head *);
+
 	int			(*cache_parse)(struct cache_detail *,
 					       char *buf, int len);
 
@@ -135,6 +135,13 @@ extern struct cache_head *
 sunrpc_cache_update(struct cache_detail *detail,
 		    struct cache_head *new, struct cache_head *old, int hash);
 
+extern int
+sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
+		void (*cache_request)(struct cache_detail *,
+				      struct cache_head *,
+				      char **,
+				      int *));
+
 
 extern void cache_clean_deferred(void *owner);
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 2278a50c6444..2e6a148d277c 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -181,6 +181,11 @@ static void rsi_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
+static int rsi_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	return sunrpc_cache_pipe_upcall(cd, h, rsi_request);
+}
+
 
 static int rsi_parse(struct cache_detail *cd,
 		    char *mesg, int mlen)
@@ -270,7 +275,7 @@ static struct cache_detail rsi_cache = {
 	.hash_table     = rsi_table,
 	.name           = "auth.rpcsec.init",
 	.cache_put      = rsi_put,
-	.cache_request  = rsi_request,
+	.cache_upcall   = rsi_upcall,
 	.cache_parse    = rsi_parse,
 	.match		= rsi_match,
 	.init		= rsi_init,
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index c8e7d2d07260..e438352bed7b 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -176,7 +176,13 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_update);
 
-static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h);
+static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	if (!cd->cache_upcall)
+		return -EINVAL;
+	return cd->cache_upcall(cd, h);
+}
+
 /*
  * This is the generic cache management routine for all
  * the authentication caches.
@@ -322,7 +328,7 @@ static int create_cache_proc_entries(struct cache_detail *cd)
 	if (p == NULL)
 		goto out_nomem;
 
-	if (cd->cache_request || cd->cache_parse) {
+	if (cd->cache_upcall || cd->cache_parse) {
 		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
 				     cd->proc_ent, &cache_file_operations, cd);
 		cd->channel_ent = p;
@@ -1080,10 +1086,16 @@ static void warn_no_listener(struct cache_detail *detail)
 }
 
 /*
- * register an upcall request to user-space.
+ * register an upcall request to user-space and queue it up for read() by the
+ * upcall daemon.
+ *
  * Each request is at most one page long.
  */
-static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
+int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h,
+		void (*cache_request)(struct cache_detail *,
+				      struct cache_head *,
+				      char **,
+				      int *))
 {
 
 	char *buf;
@@ -1091,9 +1103,6 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
 	char *bp;
 	int len;
 
-	if (detail->cache_request == NULL)
-		return -EINVAL;
-
 	if (atomic_read(&detail->readers) == 0 &&
 	    detail->last_close < get_seconds() - 30) {
 			warn_no_listener(detail);
@@ -1112,7 +1121,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
 
 	bp = buf; len = PAGE_SIZE;
 
-	detail->cache_request(detail, h, &bp, &len);
+	cache_request(detail, h, &bp, &len);
 
 	if (len < 0) {
 		kfree(buf);
@@ -1130,6 +1139,7 @@ static int cache_make_upcall(struct cache_detail *detail, struct cache_head *h)
 	wake_up(&queue_wait);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(sunrpc_cache_pipe_upcall);
 
 /*
  * parse a message from user-space and pass it
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 5c865e2d299e..6caffa34ac01 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -171,6 +171,11 @@ static void ip_map_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
+static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	return sunrpc_cache_pipe_upcall(cd, h, ip_map_request);
+}
+
 static struct ip_map *ip_map_lookup(char *class, struct in6_addr *addr);
 static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
 
@@ -289,7 +294,7 @@ struct cache_detail ip_map_cache = {
 	.hash_table	= ip_table,
 	.name		= "auth.unix.ip",
 	.cache_put	= ip_map_put,
-	.cache_request	= ip_map_request,
+	.cache_upcall	= ip_map_upcall,
 	.cache_parse	= ip_map_parse,
 	.cache_show	= ip_map_show,
 	.match		= ip_map_match,
@@ -523,6 +528,11 @@ static void unix_gid_request(struct cache_detail *cd,
 	(*bpp)[-1] = '\n';
 }
 
+static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h)
+{
+	return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request);
+}
+
 static struct unix_gid *unix_gid_lookup(uid_t uid);
 extern struct cache_detail unix_gid_cache;
 
@@ -622,7 +632,7 @@ struct cache_detail unix_gid_cache = {
 	.hash_table	= gid_table,
 	.name		= "auth.unix.gid",
 	.cache_put	= unix_gid_put,
-	.cache_request	= unix_gid_request,
+	.cache_upcall	= unix_gid_upcall,
 	.cache_parse	= unix_gid_parse,
 	.cache_show	= unix_gid_show,
 	.match		= unix_gid_match,
-- 
cgit v1.2.3


From 173912a6add00f4715774dcecf9ee53274c5924c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:29 -0400
Subject: SUNRPC: Move procfs-specific stuff out of the generic sunrpc cache
 code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/cache.h |  11 +-
 net/sunrpc/cache.c           | 319 ++++++++++++++++++++++++++-----------------
 2 files changed, 199 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 23ee25981a0c..8e5bf3036652 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -59,6 +59,11 @@ struct cache_head {
 
 #define	CACHE_NEW_EXPIRY 120	/* keep new things pending confirmation for 120 seconds */
 
+struct cache_detail_procfs {
+	struct proc_dir_entry	*proc_ent;
+	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
+};
+
 struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
@@ -98,12 +103,14 @@ struct cache_detail {
 
 	/* fields for communication over channel */
 	struct list_head	queue;
-	struct proc_dir_entry	*proc_ent;
-	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
 
 	atomic_t		readers;		/* how many time is /chennel open */
 	time_t			last_close;		/* if no readers, when did last close */
 	time_t			last_warn;		/* when we last warned about no readers */
+
+	union {
+		struct cache_detail_procfs procfs;
+	} u;
 };
 
 
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index e438352bed7b..1cd82eda56d0 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -291,69 +291,9 @@ static DEFINE_SPINLOCK(cache_list_lock);
 static struct cache_detail *current_detail;
 static int current_index;
 
-static const struct file_operations cache_file_operations;
-static const struct file_operations content_file_operations;
-static const struct file_operations cache_flush_operations;
-
 static void do_cache_clean(struct work_struct *work);
 static DECLARE_DELAYED_WORK(cache_cleaner, do_cache_clean);
 
-static void remove_cache_proc_entries(struct cache_detail *cd)
-{
-	if (cd->proc_ent == NULL)
-		return;
-	if (cd->flush_ent)
-		remove_proc_entry("flush", cd->proc_ent);
-	if (cd->channel_ent)
-		remove_proc_entry("channel", cd->proc_ent);
-	if (cd->content_ent)
-		remove_proc_entry("content", cd->proc_ent);
-	cd->proc_ent = NULL;
-	remove_proc_entry(cd->name, proc_net_rpc);
-}
-
-#ifdef CONFIG_PROC_FS
-static int create_cache_proc_entries(struct cache_detail *cd)
-{
-	struct proc_dir_entry *p;
-
-	cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc);
-	if (cd->proc_ent == NULL)
-		goto out_nomem;
-	cd->channel_ent = cd->content_ent = NULL;
-
-	p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
-			     cd->proc_ent, &cache_flush_operations, cd);
-	cd->flush_ent = p;
-	if (p == NULL)
-		goto out_nomem;
-
-	if (cd->cache_upcall || cd->cache_parse) {
-		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
-				     cd->proc_ent, &cache_file_operations, cd);
-		cd->channel_ent = p;
-		if (p == NULL)
-			goto out_nomem;
-	}
-	if (cd->cache_show) {
-		p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR,
-				cd->proc_ent, &content_file_operations, cd);
-		cd->content_ent = p;
-		if (p == NULL)
-			goto out_nomem;
-	}
-	return 0;
-out_nomem:
-	remove_cache_proc_entries(cd);
-	return -ENOMEM;
-}
-#else /* CONFIG_PROC_FS */
-static int create_cache_proc_entries(struct cache_detail *cd)
-{
-	return 0;
-}
-#endif
-
 static void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
 	rwlock_init(&cd->hash_lock);
@@ -395,25 +335,6 @@ out:
 	printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
 }
 
-int cache_register(struct cache_detail *cd)
-{
-	int ret;
-
-	sunrpc_init_cache_detail(cd);
-	ret = create_cache_proc_entries(cd);
-	if (ret)
-		sunrpc_destroy_cache_detail(cd);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(cache_register);
-
-void cache_unregister(struct cache_detail *cd)
-{
-	remove_cache_proc_entries(cd);
-	sunrpc_destroy_cache_detail(cd);
-}
-EXPORT_SYMBOL_GPL(cache_unregister);
-
 /* clean cache tries to find something to clean
  * and cleans it.
  * It returns 1 if it cleaned something,
@@ -704,13 +625,12 @@ struct cache_reader {
 	int			offset;	/* if non-0, we have a refcnt on next request */
 };
 
-static ssize_t
-cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
+static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
+			  loff_t *ppos, struct cache_detail *cd)
 {
 	struct cache_reader *rp = filp->private_data;
 	struct cache_request *rq;
 	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct cache_detail *cd = PDE(inode)->data;
 	int err;
 
 	if (count == 0)
@@ -834,13 +754,12 @@ out_slow:
 	return cache_slow_downcall(buf, count, cd);
 }
 
-static ssize_t
-cache_write(struct file *filp, const char __user *buf, size_t count,
-	    loff_t *ppos)
+static ssize_t cache_write(struct file *filp, const char __user *buf,
+			   size_t count, loff_t *ppos,
+			   struct cache_detail *cd)
 {
 	struct address_space *mapping = filp->f_mapping;
 	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct cache_detail *cd = PDE(inode)->data;
 	ssize_t ret = -EINVAL;
 
 	if (!cd->cache_parse)
@@ -855,13 +774,12 @@ out:
 
 static DECLARE_WAIT_QUEUE_HEAD(queue_wait);
 
-static unsigned int
-cache_poll(struct file *filp, poll_table *wait)
+static unsigned int cache_poll(struct file *filp, poll_table *wait,
+			       struct cache_detail *cd)
 {
 	unsigned int mask;
 	struct cache_reader *rp = filp->private_data;
 	struct cache_queue *cq;
-	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
 
 	poll_wait(filp, &queue_wait, wait);
 
@@ -883,14 +801,13 @@ cache_poll(struct file *filp, poll_table *wait)
 	return mask;
 }
 
-static int
-cache_ioctl(struct inode *ino, struct file *filp,
-	    unsigned int cmd, unsigned long arg)
+static int cache_ioctl(struct inode *ino, struct file *filp,
+		       unsigned int cmd, unsigned long arg,
+		       struct cache_detail *cd)
 {
 	int len = 0;
 	struct cache_reader *rp = filp->private_data;
 	struct cache_queue *cq;
-	struct cache_detail *cd = PDE(ino)->data;
 
 	if (cmd != FIONREAD || !rp)
 		return -EINVAL;
@@ -913,15 +830,13 @@ cache_ioctl(struct inode *ino, struct file *filp,
 	return put_user(len, (int __user *)arg);
 }
 
-static int
-cache_open(struct inode *inode, struct file *filp)
+static int cache_open(struct inode *inode, struct file *filp,
+		      struct cache_detail *cd)
 {
 	struct cache_reader *rp = NULL;
 
 	nonseekable_open(inode, filp);
 	if (filp->f_mode & FMODE_READ) {
-		struct cache_detail *cd = PDE(inode)->data;
-
 		rp = kmalloc(sizeof(*rp), GFP_KERNEL);
 		if (!rp)
 			return -ENOMEM;
@@ -936,11 +851,10 @@ cache_open(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static int
-cache_release(struct inode *inode, struct file *filp)
+static int cache_release(struct inode *inode, struct file *filp,
+			 struct cache_detail *cd)
 {
 	struct cache_reader *rp = filp->private_data;
-	struct cache_detail *cd = PDE(inode)->data;
 
 	if (rp) {
 		spin_lock(&queue_lock);
@@ -969,18 +883,6 @@ cache_release(struct inode *inode, struct file *filp)
 
 
-static const struct file_operations cache_file_operations = {
-	.owner		= THIS_MODULE,
-	.llseek		= no_llseek,
-	.read		= cache_read,
-	.write		= cache_write,
-	.poll		= cache_poll,
-	.ioctl		= cache_ioctl, /* for FIONREAD */
-	.open		= cache_open,
-	.release	= cache_release,
-};
-
-
 static void queue_loose(struct cache_detail *detail, struct cache_head *ch)
 {
 	struct cache_queue *cq;
@@ -1307,10 +1209,10 @@ static const struct seq_operations cache_content_op = {
 	.show	= c_show,
 };
 
-static int content_open(struct inode *inode, struct file *file)
+static int content_open(struct inode *inode, struct file *file,
+			struct cache_detail *cd)
 {
 	struct handle *han;
-	struct cache_detail *cd = PDE(inode)->data;
 
 	han = __seq_open_private(file, &cache_content_op, sizeof(*han));
 	if (han == NULL)
@@ -1320,17 +1222,10 @@ static int content_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static const struct file_operations content_file_operations = {
-	.open		= content_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_private,
-};
-
 static ssize_t read_flush(struct file *file, char __user *buf,
-			    size_t count, loff_t *ppos)
+			  size_t count, loff_t *ppos,
+			  struct cache_detail *cd)
 {
-	struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
 	char tbuf[20];
 	unsigned long p = *ppos;
 	size_t len;
@@ -1348,10 +1243,10 @@ static ssize_t read_flush(struct file *file, char __user *buf,
 	return len;
 }
 
-static ssize_t write_flush(struct file * file, const char __user * buf,
-			     size_t count, loff_t *ppos)
+static ssize_t write_flush(struct file *file, const char __user *buf,
+			   size_t count, loff_t *ppos,
+			   struct cache_detail *cd)
 {
-	struct cache_detail *cd = PDE(file->f_path.dentry->d_inode)->data;
 	char tbuf[20];
 	char *ep;
 	long flushtime;
@@ -1372,8 +1267,174 @@ static ssize_t write_flush(struct file * file, const char __user * buf,
 	return count;
 }
 
-static const struct file_operations cache_flush_operations = {
+static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+
+	return cache_read(filp, buf, count, ppos, cd);
+}
+
+static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+
+	return cache_write(filp, buf, count, ppos, cd);
+}
+
+static unsigned int cache_poll_procfs(struct file *filp, poll_table *wait)
+{
+	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+
+	return cache_poll(filp, wait, cd);
+}
+
+static int cache_ioctl_procfs(struct inode *inode, struct file *filp,
+			      unsigned int cmd, unsigned long arg)
+{
+	struct cache_detail *cd = PDE(inode)->data;
+
+	return cache_ioctl(inode, filp, cmd, arg, cd);
+}
+
+static int cache_open_procfs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = PDE(inode)->data;
+
+	return cache_open(inode, filp, cd);
+}
+
+static int cache_release_procfs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = PDE(inode)->data;
+
+	return cache_release(inode, filp, cd);
+}
+
+static const struct file_operations cache_file_operations_procfs = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= cache_read_procfs,
+	.write		= cache_write_procfs,
+	.poll		= cache_poll_procfs,
+	.ioctl		= cache_ioctl_procfs, /* for FIONREAD */
+	.open		= cache_open_procfs,
+	.release	= cache_release_procfs,
+};
+
+static int content_open_procfs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = PDE(inode)->data;
+
+	return content_open(inode, filp, cd);
+}
+
+static const struct file_operations content_file_operations_procfs = {
+	.open		= content_open_procfs,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+
+	return read_flush(filp, buf, count, ppos, cd);
+}
+
+static ssize_t write_flush_procfs(struct file *filp,
+				  const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = PDE(filp->f_path.dentry->d_inode)->data;
+
+	return write_flush(filp, buf, count, ppos, cd);
+}
+
+static const struct file_operations cache_flush_operations_procfs = {
 	.open		= nonseekable_open,
-	.read		= read_flush,
-	.write		= write_flush,
+	.read		= read_flush_procfs,
+	.write		= write_flush_procfs,
 };
+
+static void remove_cache_proc_entries(struct cache_detail *cd)
+{
+	if (cd->u.procfs.proc_ent == NULL)
+		return;
+	if (cd->u.procfs.flush_ent)
+		remove_proc_entry("flush", cd->u.procfs.proc_ent);
+	if (cd->u.procfs.channel_ent)
+		remove_proc_entry("channel", cd->u.procfs.proc_ent);
+	if (cd->u.procfs.content_ent)
+		remove_proc_entry("content", cd->u.procfs.proc_ent);
+	cd->u.procfs.proc_ent = NULL;
+	remove_proc_entry(cd->name, proc_net_rpc);
+}
+
+#ifdef CONFIG_PROC_FS
+static int create_cache_proc_entries(struct cache_detail *cd)
+{
+	struct proc_dir_entry *p;
+
+	cd->u.procfs.proc_ent = proc_mkdir(cd->name, proc_net_rpc);
+	if (cd->u.procfs.proc_ent == NULL)
+		goto out_nomem;
+	cd->u.procfs.channel_ent = NULL;
+	cd->u.procfs.content_ent = NULL;
+
+	p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
+			     cd->u.procfs.proc_ent,
+			     &cache_flush_operations_procfs, cd);
+	cd->u.procfs.flush_ent = p;
+	if (p == NULL)
+		goto out_nomem;
+
+	if (cd->cache_upcall || cd->cache_parse) {
+		p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
+				     cd->u.procfs.proc_ent,
+				     &cache_file_operations_procfs, cd);
+		cd->u.procfs.channel_ent = p;
+		if (p == NULL)
+			goto out_nomem;
+	}
+	if (cd->cache_show) {
+		p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR,
+				cd->u.procfs.proc_ent,
+				&content_file_operations_procfs, cd);
+		cd->u.procfs.content_ent = p;
+		if (p == NULL)
+			goto out_nomem;
+	}
+	return 0;
+out_nomem:
+	remove_cache_proc_entries(cd);
+	return -ENOMEM;
+}
+#else /* CONFIG_PROC_FS */
+static int create_cache_proc_entries(struct cache_detail *cd)
+{
+	return 0;
+}
+#endif
+
+int cache_register(struct cache_detail *cd)
+{
+	int ret;
+
+	sunrpc_init_cache_detail(cd);
+	ret = create_cache_proc_entries(cd);
+	if (ret)
+		sunrpc_destroy_cache_detail(cd);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cache_register);
+
+void cache_unregister(struct cache_detail *cd)
+{
+	remove_cache_proc_entries(cd);
+	sunrpc_destroy_cache_detail(cd);
+}
+EXPORT_SYMBOL_GPL(cache_unregister);
-- 
cgit v1.2.3


From 8854e82d9accc80f43c0bc3ff06b5979ac858185 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 9 Aug 2009 15:14:30 -0400
Subject: SUNRPC: Add an rpc_pipefs front end for the sunrpc cache code

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/cache.h       |  13 ++++
 include/linux/sunrpc/rpc_pipe_fs.h |   8 +++
 net/sunrpc/cache.c                 | 126 +++++++++++++++++++++++++++++++++++++
 net/sunrpc/rpc_pipe.c              |  43 +++++++++++++
 4 files changed, 190 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 8e5bf3036652..6f52b4d7c447 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -64,6 +64,10 @@ struct cache_detail_procfs {
 	struct proc_dir_entry   *flush_ent, *channel_ent, *content_ent;
 };
 
+struct cache_detail_pipefs {
+	struct dentry *dir;
+};
+
 struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
@@ -110,6 +114,7 @@ struct cache_detail {
 
 	union {
 		struct cache_detail_procfs procfs;
+		struct cache_detail_pipefs pipefs;
 	} u;
 };
 
@@ -135,6 +140,10 @@ struct cache_deferred_req {
 };
 
 
+extern const struct file_operations cache_file_operations_pipefs;
+extern const struct file_operations content_file_operations_pipefs;
+extern const struct file_operations cache_flush_operations_pipefs;
+
 extern struct cache_head *
 sunrpc_cache_lookup(struct cache_detail *detail,
 		    struct cache_head *key, int hash);
@@ -186,6 +195,10 @@ extern void cache_purge(struct cache_detail *detail);
 extern int cache_register(struct cache_detail *cd);
 extern void cache_unregister(struct cache_detail *cd);
 
+extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *,
+					mode_t, struct cache_detail *);
+extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
+
 extern void qword_add(char **bpp, int *lp, char *str);
 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
 extern int qword_get(char **bpp, char *dest, int bufsize);
diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index 88332ef1e959..a92571a34556 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -47,6 +47,14 @@ extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *);
 struct rpc_clnt;
 extern struct dentry *rpc_create_client_dir(struct dentry *, struct qstr *, struct rpc_clnt *);
 extern int rpc_remove_client_dir(struct dentry *);
+
+struct cache_detail;
+extern struct dentry *rpc_create_cache_dir(struct dentry *,
+					   struct qstr *,
+					   mode_t umode,
+					   struct cache_detail *);
+extern void rpc_remove_cache_dir(struct dentry *);
+
 extern struct dentry *rpc_mkpipe(struct dentry *, const char *, void *,
 				 const struct rpc_pipe_ops *, int flags);
 extern int rpc_unlink(struct dentry *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1cd82eda56d0..db7720e453c3 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -32,6 +32,7 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/rpc_pipe_fs.h>
 
 #define	 RPCDBG_FACILITY RPCDBG_CACHE
 
@@ -1438,3 +1439,128 @@ void cache_unregister(struct cache_detail *cd)
 	sunrpc_destroy_cache_detail(cd);
 }
 EXPORT_SYMBOL_GPL(cache_unregister);
+
+static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+
+	return cache_read(filp, buf, count, ppos, cd);
+}
+
+static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+
+	return cache_write(filp, buf, count, ppos, cd);
+}
+
+static unsigned int cache_poll_pipefs(struct file *filp, poll_table *wait)
+{
+	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+
+	return cache_poll(filp, wait, cd);
+}
+
+static int cache_ioctl_pipefs(struct inode *inode, struct file *filp,
+			      unsigned int cmd, unsigned long arg)
+{
+	struct cache_detail *cd = RPC_I(inode)->private;
+
+	return cache_ioctl(inode, filp, cmd, arg, cd);
+}
+
+static int cache_open_pipefs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = RPC_I(inode)->private;
+
+	return cache_open(inode, filp, cd);
+}
+
+static int cache_release_pipefs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = RPC_I(inode)->private;
+
+	return cache_release(inode, filp, cd);
+}
+
+const struct file_operations cache_file_operations_pipefs = {
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+	.read		= cache_read_pipefs,
+	.write		= cache_write_pipefs,
+	.poll		= cache_poll_pipefs,
+	.ioctl		= cache_ioctl_pipefs, /* for FIONREAD */
+	.open		= cache_open_pipefs,
+	.release	= cache_release_pipefs,
+};
+
+static int content_open_pipefs(struct inode *inode, struct file *filp)
+{
+	struct cache_detail *cd = RPC_I(inode)->private;
+
+	return content_open(inode, filp, cd);
+}
+
+const struct file_operations content_file_operations_pipefs = {
+	.open		= content_open_pipefs,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_private,
+};
+
+static ssize_t read_flush_pipefs(struct file *filp, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+
+	return read_flush(filp, buf, count, ppos, cd);
+}
+
+static ssize_t write_flush_pipefs(struct file *filp,
+				  const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	struct cache_detail *cd = RPC_I(filp->f_path.dentry->d_inode)->private;
+
+	return write_flush(filp, buf, count, ppos, cd);
+}
+
+const struct file_operations cache_flush_operations_pipefs = {
+	.open		= nonseekable_open,
+	.read		= read_flush_pipefs,
+	.write		= write_flush_pipefs,
+};
+
+int sunrpc_cache_register_pipefs(struct dentry *parent,
+				 const char *name, mode_t umode,
+				 struct cache_detail *cd)
+{
+	struct qstr q;
+	struct dentry *dir;
+	int ret = 0;
+
+	sunrpc_init_cache_detail(cd);
+	q.name = name;
+	q.len = strlen(name);
+	q.hash = full_name_hash(q.name, q.len);
+	dir = rpc_create_cache_dir(parent, &q, umode, cd);
+	if (!IS_ERR(dir))
+		cd->u.pipefs.dir = dir;
+	else {
+		sunrpc_destroy_cache_detail(cd);
+		ret = PTR_ERR(dir);
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
+
+void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
+{
+	rpc_remove_cache_dir(cd->u.pipefs.dir);
+	cd->u.pipefs.dir = NULL;
+	sunrpc_destroy_cache_detail(cd);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
+
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 57e9cd3c49b6..8dd81535e08f 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -26,6 +26,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/workqueue.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
+#include <linux/sunrpc/cache.h>
 
 static struct vfsmount *rpc_mount __read_mostly;
 static int rpc_mount_count;
@@ -882,6 +883,48 @@ int rpc_remove_client_dir(struct dentry *dentry)
 	return rpc_rmdir_depopulate(dentry, rpc_clntdir_depopulate);
 }
 
+static const struct rpc_filelist cache_pipefs_files[3] = {
+	[0] = {
+		.name = "channel",
+		.i_fop = &cache_file_operations_pipefs,
+		.mode = S_IFIFO|S_IRUSR|S_IWUSR,
+	},
+	[1] = {
+		.name = "content",
+		.i_fop = &content_file_operations_pipefs,
+		.mode = S_IFREG|S_IRUSR,
+	},
+	[2] = {
+		.name = "flush",
+		.i_fop = &cache_flush_operations_pipefs,
+		.mode = S_IFREG|S_IRUSR|S_IWUSR,
+	},
+};
+
+static int rpc_cachedir_populate(struct dentry *dentry, void *private)
+{
+	return rpc_populate(dentry,
+			    cache_pipefs_files, 0, 3,
+			    private);
+}
+
+static void rpc_cachedir_depopulate(struct dentry *dentry)
+{
+	rpc_depopulate(dentry, cache_pipefs_files, 0, 3);
+}
+
+struct dentry *rpc_create_cache_dir(struct dentry *parent, struct qstr *name,
+				    mode_t umode, struct cache_detail *cd)
+{
+	return rpc_mkdir_populate(parent, name, umode, NULL,
+			rpc_cachedir_populate, cd);
+}
+
+void rpc_remove_cache_dir(struct dentry *dentry)
+{
+	rpc_rmdir_depopulate(dentry, rpc_cachedir_depopulate);
+}
+
 /*
  * populate the filesystem
  */
-- 
cgit v1.2.3


From 47d439e9fb8a81a90022cfa785bf1c36c4e2aff6 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 7 Aug 2009 14:53:57 -0400
Subject: security: define round_hint_to_min in !CONFIG_SECURITY

Fix the header files to define round_hint_to_min() and to define
mmap_min_addr_handler() in the !CONFIG_SECURITY case.

Built and tested with !CONFIG_SECURITY

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 7b431155e392..57ead99d2593 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,6 +121,21 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+/*
+ * If a hint addr is less than mmap_min_addr change hint to be as
+ * low as possible but still greater than mmap_min_addr
+ */
+static inline unsigned long round_hint_to_min(unsigned long hint)
+{
+	hint &= PAGE_MASK;
+	if (((void *)hint != NULL) &&
+	    (hint < mmap_min_addr))
+		return PAGE_ALIGN(mmap_min_addr);
+	return hint;
+}
+extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+				 void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #ifdef CONFIG_SECURITY
 
 struct security_mnt_opts {
@@ -149,21 +164,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 	opts->num_mnt_opts = 0;
 }
 
-/*
- * If a hint addr is less than mmap_min_addr change hint to be as
- * low as possible but still greater than mmap_min_addr
- */
-static inline unsigned long round_hint_to_min(unsigned long hint)
-{
-	hint &= PAGE_MASK;
-	if (((void *)hint != NULL) &&
-	    (hint < mmap_min_addr))
-		return PAGE_ALIGN(mmap_min_addr);
-	return hint;
-}
-
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
-				 void __user *buffer, size_t *lenp, loff_t *ppos);
 /**
  * struct security_operations - main security structure
  *
-- 
cgit v1.2.3


From a8ad568dd8ca122aa8048ea067d3599820d1c1b4 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 10 Aug 2009 11:53:10 +0900
Subject: dma-ops: Remove flush_write_buffers() in dma-mapping-common.h

This moves flush_write_buffers() in
asm-generic/dma-mapping-common.h to
arch/x86/kernel/pci-nommu.c.

The purpose of this patch is that, we can avoid defining NULL
flush_write_buffers() on IA64 and SPARC.

dma-mapping-common.h is used by X86 and IA64 (and SPARC soon)
but only X86 with CONFIG_X86_OOSTORE or CONFIG_X86_PPRO_FENCE
actually uses flush_write_buffers(). CONFIG_X86_OOSTORE or
CONFIG_X86_PPRO_FENCE is usable with only kernel/pci-nommu.c
(that is, not usable with other X86 IOMMU implementations such
as SWIOTLB, VT-d, etc) so we can safely move
flush_write_buffers() in asm-generic/dma-mapping-common.h to
arch/x86/kernel/pci-nommu.c.

The further discussion is:

  http://lkml.org/lkml/2009/6/28/104

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: davem@davemloft.net
Cc: tony.luck@intel.com
Cc: fenghua.yu@intel.com
LKML-Reference: <1249872797-1314-2-git-send-email-fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-nommu.c              | 27 ++++++++++++++++++++++-----
 include/asm-generic/dma-mapping-common.h |  6 ------
 2 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index c0a4222bf62b..a3933d4330cd 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr,
 	free_pages((unsigned long)vaddr, get_order(size));
 }
 
+static void nommu_sync_single_for_device(struct device *dev,
+			dma_addr_t addr, size_t size,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
+
+static void nommu_sync_sg_for_device(struct device *dev,
+			struct scatterlist *sg, int nelems,
+			enum dma_data_direction dir)
+{
+	flush_write_buffers();
+}
+
 struct dma_map_ops nommu_dma_ops = {
-	.alloc_coherent	= dma_generic_alloc_coherent,
-	.free_coherent	= nommu_free_coherent,
-	.map_sg		= nommu_map_sg,
-	.map_page	= nommu_map_page,
-	.is_phys	= 1,
+	.alloc_coherent		= dma_generic_alloc_coherent,
+	.free_coherent		= nommu_free_coherent,
+	.map_sg			= nommu_map_sg,
+	.map_page		= nommu_map_page,
+	.sync_single_for_device = nommu_sync_single_for_device,
+	.sync_sg_for_device	= nommu_sync_sg_for_device,
+	.is_phys		= 1,
 };
 
 void __init no_iommu_init(void)
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index 5406a601185c..e694263445f7 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -103,7 +103,6 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
 	if (ops->sync_single_for_cpu)
 		ops->sync_single_for_cpu(dev, addr, size, dir);
 	debug_dma_sync_single_for_cpu(dev, addr, size, dir);
-	flush_write_buffers();
 }
 
 static inline void dma_sync_single_for_device(struct device *dev,
@@ -116,7 +115,6 @@ static inline void dma_sync_single_for_device(struct device *dev,
 	if (ops->sync_single_for_device)
 		ops->sync_single_for_device(dev, addr, size, dir);
 	debug_dma_sync_single_for_device(dev, addr, size, dir);
-	flush_write_buffers();
 }
 
 static inline void dma_sync_single_range_for_cpu(struct device *dev,
@@ -132,7 +130,6 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 		ops->sync_single_range_for_cpu(dev, addr, offset, size, dir);
 		debug_dma_sync_single_range_for_cpu(dev, addr, offset, size, dir);
 
-		flush_write_buffers();
 	} else
 		dma_sync_single_for_cpu(dev, addr, size, dir);
 }
@@ -150,7 +147,6 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 		ops->sync_single_range_for_device(dev, addr, offset, size, dir);
 		debug_dma_sync_single_range_for_device(dev, addr, offset, size, dir);
 
-		flush_write_buffers();
 	} else
 		dma_sync_single_for_device(dev, addr, size, dir);
 }
@@ -165,7 +161,6 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
 	if (ops->sync_sg_for_cpu)
 		ops->sync_sg_for_cpu(dev, sg, nelems, dir);
 	debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
-	flush_write_buffers();
 }
 
 static inline void
@@ -179,7 +174,6 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
 		ops->sync_sg_for_device(dev, sg, nelems, dir);
 	debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
 
-	flush_write_buffers();
 }
 
 #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
-- 
cgit v1.2.3


From a044560c3a1f0ad75ce685c1ed7604820b9ed319 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 10 Aug 2009 11:16:52 +0200
Subject: perf_counter: Correct PERF_SAMPLE_RAW output

PERF_SAMPLE_* output switches should unconditionally output the
correct format, as they are the only way to unambiguously parse
the PERF_EVENT_SAMPLE data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <1249896447.17467.74.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 include/trace/ftrace.h       |  3 ++-
 kernel/perf_counter.c        | 30 ++++++++++++++++++++++++------
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2aabe43c1d04..a9d823a93fe8 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -369,6 +369,8 @@ enum perf_event_type {
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 * };
 	 */
 	PERF_EVENT_SAMPLE		= 9,
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 7fb16d90e7b1..7167b9b97da2 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -685,7 +685,8 @@ static void ftrace_profile_##call(proto)				\
 	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
-	__entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\
+	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
+			     sizeof(u64));				\
 									\
 	do {								\
 		char raw_data[__entry_size];				\
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 546e62d62941..5229d1666fa5 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		u64 counter;
 	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
-	struct perf_raw_record *raw = NULL;
 	int callchain_size = 0;
 	u64 time;
 	struct {
@@ -2716,9 +2715,15 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
-		raw = data->raw;
-		if (raw)
-			header.size += raw->size;
+		int size = sizeof(u32);
+
+		if (data->raw)
+			size += data->raw->size;
+		else
+			size += sizeof(u32);
+
+		WARN_ON_ONCE(size & (sizeof(u64)-1));
+		header.size += size;
 	}
 
 	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2784,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
 		}
 	}
 
-	if ((sample_type & PERF_SAMPLE_RAW) && raw)
-		perf_output_copy(&handle, raw->data, raw->size);
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(&handle, data->raw->size);
+			perf_output_copy(&handle, data->raw->data, data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(&handle, raw);
+		}
+	}
 
 	perf_output_end(&handle);
 }
-- 
cgit v1.2.3


From 7cd1837b5d24417eca667d674a97bea936849785 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:36:33 +0200
Subject: netfilter: xtables: remove xt_TOS v0

Superseded by xt_TOS v1 (v2.6.24-2396-g5c350e5).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |  3 --
 include/linux/netfilter_ipv4/Kbuild        |  2 --
 include/linux/netfilter_ipv4/ipt_TOS.h     | 12 --------
 include/linux/netfilter_ipv4/ipt_tos.h     | 13 ---------
 net/netfilter/xt_DSCP.c                    | 46 ------------------------------
 net/netfilter/xt_dscp.c                    | 17 -----------
 6 files changed, 93 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ipt_TOS.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_tos.h

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index f8cd450be9aa..3aa4a779092b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -235,9 +235,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 ---------------------------
 
 What (Why):
-	- include/linux/netfilter_ipv4/ipt_TOS.h ipt_tos.h header files
-	  (superseded by xt_TOS/xt_tos target & match)
-
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 3a7105bb8f33..86d81a285f9c 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -9,7 +9,6 @@ header-y += ipt_NFQUEUE.h
 header-y += ipt_REJECT.h
 header-y += ipt_SAME.h
 header-y += ipt_TCPMSS.h
-header-y += ipt_TOS.h
 header-y += ipt_TTL.h
 header-y += ipt_ULOG.h
 header-y += ipt_addrtype.h
@@ -40,7 +39,6 @@ header-y += ipt_sctp.h
 header-y += ipt_state.h
 header-y += ipt_string.h
 header-y += ipt_tcpmss.h
-header-y += ipt_tos.h
 header-y += ipt_ttl.h
 
 unifdef-y += ip_queue.h
diff --git a/include/linux/netfilter_ipv4/ipt_TOS.h b/include/linux/netfilter_ipv4/ipt_TOS.h
deleted file mode 100644
index 6bf9e1fdfd88..000000000000
--- a/include/linux/netfilter_ipv4/ipt_TOS.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef _IPT_TOS_H_target
-#define _IPT_TOS_H_target
-
-#ifndef IPTOS_NORMALSVC
-#define IPTOS_NORMALSVC 0
-#endif
-
-struct ipt_tos_target_info {
-	u_int8_t tos;
-};
-
-#endif /*_IPT_TOS_H_target*/
diff --git a/include/linux/netfilter_ipv4/ipt_tos.h b/include/linux/netfilter_ipv4/ipt_tos.h
deleted file mode 100644
index a21f5df23c50..000000000000
--- a/include/linux/netfilter_ipv4/ipt_tos.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef _IPT_TOS_H
-#define _IPT_TOS_H
-
-struct ipt_tos_info {
-    u_int8_t tos;
-    u_int8_t invert;
-};
-
-#ifndef IPTOS_NORMALSVC
-#define IPTOS_NORMALSVC 0
-#endif
-
-#endif /*_IPT_TOS_H*/
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 6a347e768f86..74ce89260056 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -18,7 +18,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_DSCP.h>
-#include <linux/netfilter_ipv4/ipt_TOS.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification");
@@ -72,41 +71,6 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static unsigned int
-tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct ipt_tos_target_info *info = par->targinfo;
-	struct iphdr *iph = ip_hdr(skb);
-	u_int8_t oldtos;
-
-	if ((iph->tos & IPTOS_TOS_MASK) != info->tos) {
-		if (!skb_make_writable(skb, sizeof(struct iphdr)))
-			return NF_DROP;
-
-		iph      = ip_hdr(skb);
-		oldtos   = iph->tos;
-		iph->tos = (iph->tos & IPTOS_PREC_MASK) | info->tos;
-		csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
-	}
-
-	return XT_CONTINUE;
-}
-
-static bool tos_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	const struct ipt_tos_target_info *info = par->targinfo;
-	const uint8_t tos = info->tos;
-
-	if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT &&
-	    tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST &&
-	    tos != IPTOS_NORMALSVC) {
-		printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
-		return false;
-	}
-
-	return true;
-}
-
 static unsigned int
 tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
@@ -166,16 +130,6 @@ static struct xt_target dscp_tg_reg[] __read_mostly = {
 		.table		= "mangle",
 		.me		= THIS_MODULE,
 	},
-	{
-		.name		= "TOS",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.table		= "mangle",
-		.target		= tos_tg_v0,
-		.targetsize	= sizeof(struct ipt_tos_target_info),
-		.checkentry	= tos_tg_check_v0,
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "TOS",
 		.revision	= 1,
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index c3f8085460d7..0280d3a8c161 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -15,7 +15,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_dscp.h>
-#include <linux/netfilter_ipv4/ipt_tos.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: DSCP/TOS field match");
@@ -55,14 +54,6 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static bool
-tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct ipt_tos_info *info = par->matchinfo;
-
-	return (ip_hdr(skb)->tos == info->tos) ^ info->invert;
-}
-
 static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
@@ -92,14 +83,6 @@ static struct xt_match dscp_mt_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct xt_dscp_info),
 		.me		= THIS_MODULE,
 	},
-	{
-		.name		= "tos",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.match		= tos_mt_v0,
-		.matchsize	= sizeof(struct ipt_tos_info),
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "tos",
 		.revision	= 1,
-- 
cgit v1.2.3


From e973a70ca033bfcd4d8b59d1f66bfc1e782e1276 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:42:12 +0200
Subject: netfilter: xtables: remove xt_CONNMARK v0

Superseded by xt_CONNMARK v1 (v2.6.24-2917-g0dc8c76).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |   3 -
 include/linux/netfilter/xt_CONNMARK.h      |   6 --
 net/netfilter/xt_CONNMARK.c                | 134 +++--------------------------
 3 files changed, 11 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 3aa4a779092b..7eccf945d4e0 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_CONNMARK match revision 0
-	  (superseded by xt_CONNMARK match revision 1)
-
 	- xt_MARK target revisions 0 and 1
 	  (superseded by xt_MARK match revision 2)
 
diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h
index 7635c8ffdadb..0a8545866752 100644
--- a/include/linux/netfilter/xt_CONNMARK.h
+++ b/include/linux/netfilter/xt_CONNMARK.h
@@ -18,12 +18,6 @@ enum {
 	XT_CONNMARK_RESTORE
 };
 
-struct xt_connmark_target_info {
-	unsigned long mark;
-	unsigned long mask;
-	__u8 mode;
-};
-
 struct xt_connmark_tginfo1 {
 	__u32 ctmark, ctmask, nfmask;
 	__u8 mode;
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
index d6e5ab463277..593457068ae1 100644
--- a/net/netfilter/xt_CONNMARK.c
+++ b/net/netfilter/xt_CONNMARK.c
@@ -35,45 +35,6 @@ MODULE_ALIAS("ip6t_CONNMARK");
 #include <linux/netfilter/xt_CONNMARK.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-static unsigned int
-connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_connmark_target_info *markinfo = par->targinfo;
-	struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	u_int32_t diff;
-	u_int32_t mark;
-	u_int32_t newmark;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct) {
-		switch(markinfo->mode) {
-		case XT_CONNMARK_SET:
-			newmark = (ct->mark & ~markinfo->mask) | markinfo->mark;
-			if (newmark != ct->mark) {
-				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, ct);
-			}
-			break;
-		case XT_CONNMARK_SAVE:
-			newmark = (ct->mark & ~markinfo->mask) |
-				  (skb->mark & markinfo->mask);
-			if (ct->mark != newmark) {
-				ct->mark = newmark;
-				nf_conntrack_event_cache(IPCT_MARK, ct);
-			}
-			break;
-		case XT_CONNMARK_RESTORE:
-			mark = skb->mark;
-			diff = (ct->mark ^ mark) & markinfo->mask;
-			skb->mark = mark ^ diff;
-			break;
-		}
-	}
-
-	return XT_CONTINUE;
-}
-
 static unsigned int
 connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
@@ -112,30 +73,6 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connmark_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	const struct xt_connmark_target_info *matchinfo = par->targinfo;
-
-	if (matchinfo->mode == XT_CONNMARK_RESTORE) {
-		if (strcmp(par->table, "mangle") != 0) {
-			printk(KERN_WARNING "CONNMARK: restore can only be "
-			       "called from \"mangle\" table, not \"%s\"\n",
-			       par->table);
-			return false;
-		}
-	}
-	if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) {
-		printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n");
-		return false;
-	}
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
 static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
@@ -151,74 +88,25 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_connmark_target_info {
-	compat_ulong_t	mark, mask;
-	u_int8_t	mode;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void connmark_tg_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_connmark_target_info *cm = src;
-	struct xt_connmark_target_info m = {
-		.mark	= cm->mark,
-		.mask	= cm->mask,
-		.mode	= cm->mode,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int connmark_tg_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_connmark_target_info *m = src;
-	struct compat_xt_connmark_target_info cm = {
-		.mark	= m->mark,
-		.mask	= m->mask,
-		.mode	= m->mode,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target connmark_tg_reg[] __read_mostly = {
-	{
-		.name		= "CONNMARK",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= connmark_tg_check_v0,
-		.destroy	= connmark_tg_destroy,
-		.target		= connmark_tg_v0,
-		.targetsize	= sizeof(struct xt_connmark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_target_info),
-		.compat_from_user = connmark_tg_compat_from_user_v0,
-		.compat_to_user	= connmark_tg_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "CONNMARK",
-		.revision       = 1,
-		.family         = NFPROTO_UNSPEC,
-		.checkentry     = connmark_tg_check,
-		.target         = connmark_tg,
-		.targetsize     = sizeof(struct xt_connmark_tginfo1),
-		.destroy        = connmark_tg_destroy,
-		.me             = THIS_MODULE,
-	},
+static struct xt_target connmark_tg_reg __read_mostly = {
+	.name           = "CONNMARK",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_tg_check,
+	.target         = connmark_tg,
+	.targetsize     = sizeof(struct xt_connmark_tginfo1),
+	.destroy        = connmark_tg_destroy,
+	.me             = THIS_MODULE,
 };
 
 static int __init connmark_tg_init(void)
 {
-	return xt_register_targets(connmark_tg_reg,
-	       ARRAY_SIZE(connmark_tg_reg));
+	return xt_register_target(&connmark_tg_reg);
 }
 
 static void __exit connmark_tg_exit(void)
 {
-	xt_unregister_targets(connmark_tg_reg, ARRAY_SIZE(connmark_tg_reg));
+	xt_unregister_target(&connmark_tg_reg);
 }
 
 module_init(connmark_tg_init);
-- 
cgit v1.2.3


From c8001f7fd5a4684280fddceed9fae9ea2e4fb521 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:47:32 +0200
Subject: netfilter: xtables: remove xt_MARK v0, v1

Superseded by xt_MARK v2 (v2.6.24-2918-ge0a812a).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |   3 -
 include/linux/netfilter/xt_MARK.h          |  17 ---
 net/netfilter/xt_MARK.c                    | 163 ++---------------------------
 3 files changed, 9 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 7eccf945d4e0..121e19c9eee6 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_MARK target revisions 0 and 1
-	  (superseded by xt_MARK match revision 2)
-
 	- xt_connmark match revision 0
 	  (superseded by xt_connmark match revision 1)
 
diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h
index 028304bcc0b1..bc9561bdef79 100644
--- a/include/linux/netfilter/xt_MARK.h
+++ b/include/linux/netfilter/xt_MARK.h
@@ -3,23 +3,6 @@
 
 #include <linux/types.h>
 
-/* Version 0 */
-struct xt_mark_target_info {
-	unsigned long mark;
-};
-
-/* Version 1 */
-enum {
-	XT_MARK_SET=0,
-	XT_MARK_AND,
-	XT_MARK_OR,
-};
-
-struct xt_mark_target_info_v1 {
-	unsigned long mark;
-	__u8 mode;
-};
-
 struct xt_mark_tginfo2 {
 	__u32 mark, mask;
 };
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
index 67574bcfb8ac..225f8d11e173 100644
--- a/net/netfilter/xt_MARK.c
+++ b/net/netfilter/xt_MARK.c
@@ -24,39 +24,6 @@ MODULE_DESCRIPTION("Xtables: packet mark modification");
 MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
-static unsigned int
-mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_mark_target_info *markinfo = par->targinfo;
-
-	skb->mark = markinfo->mark;
-	return XT_CONTINUE;
-}
-
-static unsigned int
-mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
-	int mark = 0;
-
-	switch (markinfo->mode) {
-	case XT_MARK_SET:
-		mark = markinfo->mark;
-		break;
-
-	case XT_MARK_AND:
-		mark = skb->mark & markinfo->mark;
-		break;
-
-	case XT_MARK_OR:
-		mark = skb->mark | markinfo->mark;
-		break;
-	}
-
-	skb->mark = mark;
-	return XT_CONTINUE;
-}
-
 static unsigned int
 mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
@@ -66,135 +33,23 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool mark_tg_check_v0(const struct xt_tgchk_param *par)
-{
-	const struct xt_mark_target_info *markinfo = par->targinfo;
-
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return false;
-	}
-	return true;
-}
-
-static bool mark_tg_check_v1(const struct xt_tgchk_param *par)
-{
-	const struct xt_mark_target_info_v1 *markinfo = par->targinfo;
-
-	if (markinfo->mode != XT_MARK_SET
-	    && markinfo->mode != XT_MARK_AND
-	    && markinfo->mode != XT_MARK_OR) {
-		printk(KERN_WARNING "MARK: unknown mode %u\n",
-		       markinfo->mode);
-		return false;
-	}
-	if (markinfo->mark > 0xffffffff) {
-		printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n");
-		return false;
-	}
-	return true;
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_xt_mark_target_info {
-	compat_ulong_t	mark;
-};
-
-static void mark_tg_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_mark_target_info *cm = src;
-	struct xt_mark_target_info m = {
-		.mark	= cm->mark,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int mark_tg_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_mark_target_info *m = src;
-	struct compat_xt_mark_target_info cm = {
-		.mark	= m->mark,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-
-struct compat_xt_mark_target_info_v1 {
-	compat_ulong_t	mark;
-	u_int8_t	mode;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void mark_tg_compat_from_user_v1(void *dst, void *src)
-{
-	const struct compat_xt_mark_target_info_v1 *cm = src;
-	struct xt_mark_target_info_v1 m = {
-		.mark	= cm->mark,
-		.mode	= cm->mode,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int mark_tg_compat_to_user_v1(void __user *dst, void *src)
-{
-	const struct xt_mark_target_info_v1 *m = src;
-	struct compat_xt_mark_target_info_v1 cm = {
-		.mark	= m->mark,
-		.mode	= m->mode,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_target mark_tg_reg[] __read_mostly = {
-	{
-		.name		= "MARK",
-		.family		= NFPROTO_UNSPEC,
-		.revision	= 0,
-		.checkentry	= mark_tg_check_v0,
-		.target		= mark_tg_v0,
-		.targetsize	= sizeof(struct xt_mark_target_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info),
-		.compat_from_user = mark_tg_compat_from_user_v0,
-		.compat_to_user	= mark_tg_compat_to_user_v0,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "MARK",
-		.family		= NFPROTO_UNSPEC,
-		.revision	= 1,
-		.checkentry	= mark_tg_check_v1,
-		.target		= mark_tg_v1,
-		.targetsize	= sizeof(struct xt_mark_target_info_v1),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_target_info_v1),
-		.compat_from_user = mark_tg_compat_from_user_v1,
-		.compat_to_user	= mark_tg_compat_to_user_v1,
-#endif
-		.table		= "mangle",
-		.me		= THIS_MODULE,
-	},
-	{
-		.name           = "MARK",
-		.revision       = 2,
-		.family         = NFPROTO_UNSPEC,
-		.target         = mark_tg,
-		.targetsize     = sizeof(struct xt_mark_tginfo2),
-		.me             = THIS_MODULE,
-	},
+static struct xt_target mark_tg_reg __read_mostly = {
+	.name           = "MARK",
+	.revision       = 2,
+	.family         = NFPROTO_UNSPEC,
+	.target         = mark_tg,
+	.targetsize     = sizeof(struct xt_mark_tginfo2),
+	.me             = THIS_MODULE,
 };
 
 static int __init mark_tg_init(void)
 {
-	return xt_register_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+	return xt_register_target(&mark_tg_reg);
 }
 
 static void __exit mark_tg_exit(void)
 {
-	xt_unregister_targets(mark_tg_reg, ARRAY_SIZE(mark_tg_reg));
+	xt_unregister_target(&mark_tg_reg);
 }
 
 module_init(mark_tg_init);
-- 
cgit v1.2.3


From 84899a2b9adaf6c2e20d198d7c24562ce6b391d8 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:50:33 +0200
Subject: netfilter: xtables: remove xt_connmark v0

Superseded by xt_connmark v1 (v2.6.24-2919-g96e3227).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |   3 -
 include/linux/netfilter/xt_connmark.h      |   5 --
 net/netfilter/xt_connmark.c                | 101 ++++-------------------------
 3 files changed, 11 insertions(+), 98 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 121e19c9eee6..54f935794922 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_connmark match revision 0
-	  (superseded by xt_connmark match revision 1)
-
 	- xt_conntrack match revision 0
 	  (superseded by xt_conntrack match revision 1)
 
diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h
index 571e266d004c..619e47cde01a 100644
--- a/include/linux/netfilter/xt_connmark.h
+++ b/include/linux/netfilter/xt_connmark.h
@@ -12,11 +12,6 @@
  * (at your option) any later version.
  */
 
-struct xt_connmark_info {
-	unsigned long mark, mask;
-	__u8 invert;
-};
-
 struct xt_connmark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 86cacab7a4a3..122aa8b0147b 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -47,36 +47,6 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool
-connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct xt_connmark_info *info = par->matchinfo;
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (!ct)
-		return false;
-
-	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
-}
-
-static bool connmark_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct xt_connmark_info *cm = par->matchinfo;
-
-	if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) {
-		printk(KERN_WARNING "connmark: only support 32bit mark\n");
-		return false;
-	}
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
 static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
@@ -92,74 +62,25 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_connmark_info {
-	compat_ulong_t	mark, mask;
-	u_int8_t	invert;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void connmark_mt_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_connmark_info *cm = src;
-	struct xt_connmark_info m = {
-		.mark	= cm->mark,
-		.mask	= cm->mask,
-		.invert	= cm->invert,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int connmark_mt_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_connmark_info *m = src;
-	struct compat_xt_connmark_info cm = {
-		.mark	= m->mark,
-		.mask	= m->mask,
-		.invert	= m->invert,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_match connmark_mt_reg[] __read_mostly = {
-	{
-		.name		= "connmark",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= connmark_mt_check_v0,
-		.match		= connmark_mt_v0,
-		.destroy	= connmark_mt_destroy,
-		.matchsize	= sizeof(struct xt_connmark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_connmark_info),
-		.compat_from_user = connmark_mt_compat_from_user_v0,
-		.compat_to_user	= connmark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE
-	},
-	{
-		.name           = "connmark",
-		.revision       = 1,
-		.family         = NFPROTO_UNSPEC,
-		.checkentry     = connmark_mt_check,
-		.match          = connmark_mt,
-		.matchsize      = sizeof(struct xt_connmark_mtinfo1),
-		.destroy        = connmark_mt_destroy,
-		.me             = THIS_MODULE,
-	},
+static struct xt_match connmark_mt_reg __read_mostly = {
+	.name           = "connmark",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_mt_check,
+	.match          = connmark_mt,
+	.matchsize      = sizeof(struct xt_connmark_mtinfo1),
+	.destroy        = connmark_mt_destroy,
+	.me             = THIS_MODULE,
 };
 
 static int __init connmark_mt_init(void)
 {
-	return xt_register_matches(connmark_mt_reg,
-	       ARRAY_SIZE(connmark_mt_reg));
+	return xt_register_match(&connmark_mt_reg);
 }
 
 static void __exit connmark_mt_exit(void)
 {
-	xt_unregister_matches(connmark_mt_reg, ARRAY_SIZE(connmark_mt_reg));
+	xt_unregister_match(&connmark_mt_reg);
 }
 
 module_init(connmark_mt_init);
-- 
cgit v1.2.3


From 9e05ec4b1804a1ba51f61fe169aef9b86edcd3f7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:56:14 +0200
Subject: netfilter: xtables: remove xt_conntrack v0

Superseded by xt_conntrack v1 (v2.6.24-2921-g64eb12f).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |   3 -
 include/linux/netfilter/xt_conntrack.h     |  36 -------
 net/netfilter/xt_conntrack.c               | 155 +----------------------------
 3 files changed, 1 insertion(+), 193 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 54f935794922..6746473ef033 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_conntrack match revision 0
-	  (superseded by xt_conntrack match revision 1)
-
 	- xt_iprange match revision 0,
 	  include/linux/netfilter_ipv4/ipt_iprange.h
 	  (superseded by xt_iprange match revision 1)
diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h
index 7ae05338e94c..54f47a2f6152 100644
--- a/include/linux/netfilter/xt_conntrack.h
+++ b/include/linux/netfilter/xt_conntrack.h
@@ -32,42 +32,6 @@ enum {
 	XT_CONNTRACK_DIRECTION    = 1 << 12,
 };
 
-/* This is exposed to userspace, so remains frozen in time. */
-struct ip_conntrack_old_tuple
-{
-	struct {
-		__be32 ip;
-		union {
-			__u16 all;
-		} u;
-	} src;
-
-	struct {
-		__be32 ip;
-		union {
-			__u16 all;
-		} u;
-
-		/* The protocol. */
-		__u16 protonum;
-	} dst;
-};
-
-struct xt_conntrack_info
-{
-	unsigned int statemask, statusmask;
-
-	struct ip_conntrack_old_tuple tuple[IP_CT_DIR_MAX];
-	struct in_addr sipmsk[IP_CT_DIR_MAX], dipmsk[IP_CT_DIR_MAX];
-
-	unsigned long expires_min, expires_max;
-
-	/* Flags word */
-	__u8 flags;
-	/* Inverse flags */
-	__u8 invflags;
-};
-
 struct xt_conntrack_mtinfo1 {
 	union nf_inet_addr origsrc_addr, origsrc_mask;
 	union nf_inet_addr origdst_addr, origdst_mask;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index fc581800698e..6dc4652f2fe8 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -19,100 +19,11 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: connection tracking state match");
 MODULE_ALIAS("ipt_conntrack");
 MODULE_ALIAS("ip6t_conntrack");
 
-static bool
-conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct xt_conntrack_info *sinfo = par->matchinfo;
-	const struct nf_conn *ct;
-	enum ip_conntrack_info ctinfo;
-	unsigned int statebit;
-
-	ct = nf_ct_get(skb, &ctinfo);
-
-#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & (invflg)))
-
-	if (ct == &nf_conntrack_untracked)
-		statebit = XT_CONNTRACK_STATE_UNTRACKED;
-	else if (ct)
-		statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
-	else
-		statebit = XT_CONNTRACK_STATE_INVALID;
-
-	if (sinfo->flags & XT_CONNTRACK_STATE) {
-		if (ct) {
-			if (test_bit(IPS_SRC_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_SNAT;
-			if (test_bit(IPS_DST_NAT_BIT, &ct->status))
-				statebit |= XT_CONNTRACK_STATE_DNAT;
-		}
-		if (FWINV((statebit & sinfo->statemask) == 0,
-			  XT_CONNTRACK_STATE))
-			return false;
-	}
-
-	if (ct == NULL) {
-		if (sinfo->flags & ~XT_CONNTRACK_STATE)
-			return false;
-		return true;
-	}
-
-	if (sinfo->flags & XT_CONNTRACK_PROTO &&
-	    FWINV(nf_ct_protonum(ct) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum,
-		  XT_CONNTRACK_PROTO))
-		return false;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip &
-		   sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip,
-		  XT_CONNTRACK_ORIGSRC))
-		return false;
-
-	if (sinfo->flags & XT_CONNTRACK_ORIGDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip &
-		   sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip,
-		  XT_CONNTRACK_ORIGDST))
-		return false;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLSRC &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip &
-		   sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].src.ip,
-		  XT_CONNTRACK_REPLSRC))
-		return false;
-
-	if (sinfo->flags & XT_CONNTRACK_REPLDST &&
-	    FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip &
-		   sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) !=
-		  sinfo->tuple[IP_CT_DIR_REPLY].dst.ip,
-		  XT_CONNTRACK_REPLDST))
-		return false;
-
-	if (sinfo->flags & XT_CONNTRACK_STATUS &&
-	    FWINV((ct->status & sinfo->statusmask) == 0,
-		  XT_CONNTRACK_STATUS))
-		return false;
-
-	if(sinfo->flags & XT_CONNTRACK_EXPIRES) {
-		unsigned long expires = timer_pending(&ct->timeout) ?
-					(ct->timeout.expires - jiffies)/HZ : 0;
-
-		if (FWINV(!(expires >= sinfo->expires_min &&
-			    expires <= sinfo->expires_max),
-			  XT_CONNTRACK_EXPIRES))
-			return false;
-	}
-	return true;
-#undef FWINV
-}
-
 static bool
 conntrack_addrcmp(const union nf_inet_addr *kaddr,
                   const union nf_inet_addr *uaddr,
@@ -337,71 +248,7 @@ static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par)
 	conntrack_mt_destroy(par);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_conntrack_info
-{
-	compat_uint_t			statemask;
-	compat_uint_t			statusmask;
-	struct ip_conntrack_old_tuple	tuple[IP_CT_DIR_MAX];
-	struct in_addr			sipmsk[IP_CT_DIR_MAX];
-	struct in_addr			dipmsk[IP_CT_DIR_MAX];
-	compat_ulong_t			expires_min;
-	compat_ulong_t			expires_max;
-	u_int8_t			flags;
-	u_int8_t			invflags;
-};
-
-static void conntrack_mt_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_conntrack_info *cm = src;
-	struct xt_conntrack_info m = {
-		.statemask	= cm->statemask,
-		.statusmask	= cm->statusmask,
-		.expires_min	= cm->expires_min,
-		.expires_max	= cm->expires_max,
-		.flags		= cm->flags,
-		.invflags	= cm->invflags,
-	};
-	memcpy(m.tuple, cm->tuple, sizeof(m.tuple));
-	memcpy(m.sipmsk, cm->sipmsk, sizeof(m.sipmsk));
-	memcpy(m.dipmsk, cm->dipmsk, sizeof(m.dipmsk));
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int conntrack_mt_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_conntrack_info *m = src;
-	struct compat_xt_conntrack_info cm = {
-		.statemask	= m->statemask,
-		.statusmask	= m->statusmask,
-		.expires_min	= m->expires_min,
-		.expires_max	= m->expires_max,
-		.flags		= m->flags,
-		.invflags	= m->invflags,
-	};
-	memcpy(cm.tuple, m->tuple, sizeof(cm.tuple));
-	memcpy(cm.sipmsk, m->sipmsk, sizeof(cm.sipmsk));
-	memcpy(cm.dipmsk, m->dipmsk, sizeof(cm.dipmsk));
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif
-
 static struct xt_match conntrack_mt_reg[] __read_mostly = {
-	{
-		.name       = "conntrack",
-		.revision   = 0,
-		.family     = NFPROTO_IPV4,
-		.match      = conntrack_mt_v0,
-		.checkentry = conntrack_mt_check,
-		.destroy    = conntrack_mt_destroy,
-		.matchsize  = sizeof(struct xt_conntrack_info),
-		.me         = THIS_MODULE,
-#ifdef CONFIG_COMPAT
-		.compatsize       = sizeof(struct compat_xt_conntrack_info),
-		.compat_from_user = conntrack_mt_compat_from_user_v0,
-		.compat_to_user   = conntrack_mt_compat_to_user_v0,
-#endif
-	},
 	{
 		.name       = "conntrack",
 		.revision   = 1,
-- 
cgit v1.2.3


From 36d4084dc8eb7a9a3655a2041097a46aff3061e9 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 18:58:19 +0200
Subject: netfilter: xtables: remove xt_iprange v0

Superseded by xt_iprange v1 (v2.6.24-2928-g1a50c5a1).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |  4 ---
 include/linux/netfilter_ipv4/Kbuild        |  1 -
 include/linux/netfilter_ipv4/ipt_iprange.h | 21 --------------
 net/netfilter/xt_iprange.c                 | 45 ++----------------------------
 4 files changed, 2 insertions(+), 69 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ipt_iprange.h

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 6746473ef033..8862b037f0ac 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,10 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_iprange match revision 0,
-	  include/linux/netfilter_ipv4/ipt_iprange.h
-	  (superseded by xt_iprange match revision 1)
-
 	- xt_mark match revision 0
 	  (superseded by xt_mark match revision 1)
 
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 86d81a285f9c..5e361ef44e87 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -23,7 +23,6 @@ header-y += ipt_ecn.h
 header-y += ipt_esp.h
 header-y += ipt_hashlimit.h
 header-y += ipt_helper.h
-header-y += ipt_iprange.h
 header-y += ipt_length.h
 header-y += ipt_limit.h
 header-y += ipt_mac.h
diff --git a/include/linux/netfilter_ipv4/ipt_iprange.h b/include/linux/netfilter_ipv4/ipt_iprange.h
deleted file mode 100644
index 5f1aebde4d2f..000000000000
--- a/include/linux/netfilter_ipv4/ipt_iprange.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _IPT_IPRANGE_H
-#define _IPT_IPRANGE_H
-
-#include <linux/types.h>
-#include <linux/netfilter/xt_iprange.h>
-
-struct ipt_iprange {
-	/* Inclusive: network order. */
-	__be32 min_ip, max_ip;
-};
-
-struct ipt_iprange_info
-{
-	struct ipt_iprange src;
-	struct ipt_iprange dst;
-
-	/* Flags from above */
-	u_int8_t flags;
-};
-
-#endif /* _IPT_IPRANGE_H */
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 501f9b623188..ffc96387d556 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -14,40 +14,6 @@
 #include <linux/ipv6.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_iprange.h>
-#include <linux/netfilter_ipv4/ipt_iprange.h>
-
-static bool
-iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct ipt_iprange_info *info = par->matchinfo;
-	const struct iphdr *iph = ip_hdr(skb);
-
-	if (info->flags & IPRANGE_SRC) {
-		if ((ntohl(iph->saddr) < ntohl(info->src.min_ip)
-			  || ntohl(iph->saddr) > ntohl(info->src.max_ip))
-			 ^ !!(info->flags & IPRANGE_SRC_INV)) {
-			pr_debug("src IP %pI4 NOT in range %s%pI4-%pI4\n",
-				 &iph->saddr,
-				 info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
-				 &info->src.min_ip,
-				 &info->src.max_ip);
-			return false;
-		}
-	}
-	if (info->flags & IPRANGE_DST) {
-		if ((ntohl(iph->daddr) < ntohl(info->dst.min_ip)
-			  || ntohl(iph->daddr) > ntohl(info->dst.max_ip))
-			 ^ !!(info->flags & IPRANGE_DST_INV)) {
-			pr_debug("dst IP %pI4 NOT in range %s%pI4-%pI4\n",
-				 &iph->daddr,
-				 info->flags & IPRANGE_DST_INV ? "(INV) " : "",
-				 &info->dst.min_ip,
-				 &info->dst.max_ip);
-			return false;
-		}
-	}
-	return true;
-}
 
 static bool
 iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -125,14 +91,6 @@ iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static struct xt_match iprange_mt_reg[] __read_mostly = {
-	{
-		.name      = "iprange",
-		.revision  = 0,
-		.family    = NFPROTO_IPV4,
-		.match     = iprange_mt_v0,
-		.matchsize = sizeof(struct ipt_iprange_info),
-		.me        = THIS_MODULE,
-	},
 	{
 		.name      = "iprange",
 		.revision  = 1,
@@ -164,7 +122,8 @@ static void __exit iprange_mt_exit(void)
 module_init(iprange_mt_init);
 module_exit(iprange_mt_exit);
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary IPv4 range matching");
 MODULE_ALIAS("ipt_iprange");
 MODULE_ALIAS("ip6t_iprange");
-- 
cgit v1.2.3


From 4725c7287ef2c4340cb433f59e40d143c1f43c22 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 19:02:27 +0200
Subject: netfilter: xtables: remove xt_mark v0

Superseded by xt_mark v1 (v2.6.24-2922-g17b0d7e).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |  3 --
 include/linux/netfilter/xt_mark.h          |  5 --
 net/netfilter/xt_mark.c                    | 86 ++++--------------------------
 3 files changed, 10 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 8862b037f0ac..5556d2300bea 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -238,9 +238,6 @@ What (Why):
 	- "forwarding" header files like ipt_mac.h in
 	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
 
-	- xt_mark match revision 0
-	  (superseded by xt_mark match revision 1)
-
 	- xt_recent: the old ipt_recent proc dir
 	  (superseded by /proc/net/xt_recent)
 
diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h
index 6fa460a3cc29..6607c8f38ea5 100644
--- a/include/linux/netfilter/xt_mark.h
+++ b/include/linux/netfilter/xt_mark.h
@@ -3,11 +3,6 @@
 
 #include <linux/types.h>
 
-struct xt_mark_info {
-    unsigned long mark, mask;
-    __u8 invert;
-};
-
 struct xt_mark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 10b9e34bbc5b..1db07d8125f8 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -3,7 +3,7 @@
  *
  *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
+ *	Jan Engelhardt <jengelh@medozas.de>
  *
  *	This program is free software; you can redistribute it and/or modify
  *	it under the terms of the GNU General Public License version 2 as
@@ -22,14 +22,6 @@ MODULE_DESCRIPTION("Xtables: packet mark match");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
 
-static bool
-mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct xt_mark_info *info = par->matchinfo;
-
-	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
-}
-
 static bool
 mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -38,81 +30,23 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool mark_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct xt_mark_info *minfo = par->matchinfo;
-
-	if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) {
-		printk(KERN_WARNING "mark: only supports 32bit mark\n");
-		return false;
-	}
-	return true;
-}
-
-#ifdef CONFIG_COMPAT
-struct compat_xt_mark_info {
-	compat_ulong_t	mark, mask;
-	u_int8_t	invert;
-	u_int8_t	__pad1;
-	u_int16_t	__pad2;
-};
-
-static void mark_mt_compat_from_user_v0(void *dst, void *src)
-{
-	const struct compat_xt_mark_info *cm = src;
-	struct xt_mark_info m = {
-		.mark	= cm->mark,
-		.mask	= cm->mask,
-		.invert	= cm->invert,
-	};
-	memcpy(dst, &m, sizeof(m));
-}
-
-static int mark_mt_compat_to_user_v0(void __user *dst, void *src)
-{
-	const struct xt_mark_info *m = src;
-	struct compat_xt_mark_info cm = {
-		.mark	= m->mark,
-		.mask	= m->mask,
-		.invert	= m->invert,
-	};
-	return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0;
-}
-#endif /* CONFIG_COMPAT */
-
-static struct xt_match mark_mt_reg[] __read_mostly = {
-	{
-		.name		= "mark",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= mark_mt_check_v0,
-		.match		= mark_mt_v0,
-		.matchsize	= sizeof(struct xt_mark_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_mark_info),
-		.compat_from_user = mark_mt_compat_from_user_v0,
-		.compat_to_user	= mark_mt_compat_to_user_v0,
-#endif
-		.me		= THIS_MODULE,
-	},
-	{
-		.name           = "mark",
-		.revision       = 1,
-		.family         = NFPROTO_UNSPEC,
-		.match          = mark_mt,
-		.matchsize      = sizeof(struct xt_mark_mtinfo1),
-		.me             = THIS_MODULE,
-	},
+static struct xt_match mark_mt_reg __read_mostly = {
+	.name           = "mark",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.match          = mark_mt,
+	.matchsize      = sizeof(struct xt_mark_mtinfo1),
+	.me             = THIS_MODULE,
 };
 
 static int __init mark_mt_init(void)
 {
-	return xt_register_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
+	return xt_register_match(&mark_mt_reg);
 }
 
 static void __exit mark_mt_exit(void)
 {
-	xt_unregister_matches(mark_mt_reg, ARRAY_SIZE(mark_mt_reg));
+	xt_unregister_match(&mark_mt_reg);
 }
 
 module_init(mark_mt_init);
-- 
cgit v1.2.3


From 6e2efaacb3579fd9643d0dc59963b58b801c03a1 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Mon, 10 Aug 2009 10:06:53 +0200
Subject: sound: ymfpci: increase timer resolution to 96 kHz

Allow the interval timer to be programmed with its full 96 kHz
precision.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/ymfpci.h         |  1 +
 sound/pci/ymfpci/ymfpci_main.c | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/ymfpci.h b/include/sound/ymfpci.h
index 05ead6698434..444cd6ba0ba7 100644
--- a/include/sound/ymfpci.h
+++ b/include/sound/ymfpci.h
@@ -331,6 +331,7 @@ struct snd_ymfpci {
 	struct snd_ac97 *ac97;
 	struct snd_rawmidi *rawmidi;
 	struct snd_timer *timer;
+	unsigned int timer_ticks;
 
 	struct pci_dev *pci;
 	struct snd_card *card;
diff --git a/sound/pci/ymfpci/ymfpci_main.c b/sound/pci/ymfpci/ymfpci_main.c
index 2f0925236a1b..5518371db13f 100644
--- a/sound/pci/ymfpci/ymfpci_main.c
+++ b/sound/pci/ymfpci/ymfpci_main.c
@@ -834,7 +834,7 @@ static irqreturn_t snd_ymfpci_interrupt(int irq, void *dev_id)
 	status = snd_ymfpci_readw(chip, YDSXGR_INTFLAG);
 	if (status & 1) {
 		if (chip->timer)
-			snd_timer_interrupt(chip->timer, chip->timer->sticks);
+			snd_timer_interrupt(chip->timer, chip->timer_ticks);
 	}
 	snd_ymfpci_writew(chip, YDSXGR_INTFLAG, status);
 
@@ -1885,8 +1885,18 @@ static int snd_ymfpci_timer_start(struct snd_timer *timer)
 	unsigned int count;
 
 	chip = snd_timer_chip(timer);
-	count = (timer->sticks << 1) - 1;
 	spin_lock_irqsave(&chip->reg_lock, flags);
+	if (timer->sticks > 1) {
+		chip->timer_ticks = timer->sticks;
+		count = timer->sticks - 1;
+	} else {
+		/*
+		 * Divisor 1 is not allowed; fake it by using divisor 2 and
+		 * counting two ticks for each interrupt.
+		 */
+		chip->timer_ticks = 2;
+		count = 2 - 1;
+	}
 	snd_ymfpci_writew(chip, YDSXGR_TIMERCOUNT, count);
 	snd_ymfpci_writeb(chip, YDSXGR_TIMERCTRL, 0x03);
 	spin_unlock_irqrestore(&chip->reg_lock, flags);
@@ -1909,14 +1919,14 @@ static int snd_ymfpci_timer_precise_resolution(struct snd_timer *timer,
 					       unsigned long *num, unsigned long *den)
 {
 	*num = 1;
-	*den = 48000;
+	*den = 96000;
 	return 0;
 }
 
 static struct snd_timer_hardware snd_ymfpci_timer_hw = {
 	.flags = SNDRV_TIMER_HW_AUTO,
-	.resolution = 20833, /* 1/fs = 20.8333...us */
-	.ticks = 0x8000,
+	.resolution = 10417, /* 1 / 96 kHz = 10.41666...us */
+	.ticks = 0x10000,
 	.start = snd_ymfpci_timer_start,
 	.stop = snd_ymfpci_timer_stop,
 	.precise_resolution = snd_ymfpci_timer_precise_resolution,
-- 
cgit v1.2.3


From 6461caed83412ae3e9a16785ffa64396fb66c6a6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 19:46:26 +0200
Subject: netfilter: xtables: remove xt_owner v0

Superseded by xt_owner v1 (v2.6.24-2388-g0265ab4).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter_ipv4/Kbuild       |   1 -
 include/linux/netfilter_ipv4/ipt_owner.h  |  20 -----
 include/linux/netfilter_ipv6/Kbuild       |   1 -
 include/linux/netfilter_ipv6/ip6t_owner.h |  18 -----
 net/netfilter/xt_owner.c                  | 130 +++---------------------------
 5 files changed, 12 insertions(+), 158 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ipt_owner.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_owner.h

(limited to 'include')

diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 5e361ef44e87..541300531cb3 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -28,7 +28,6 @@ header-y += ipt_limit.h
 header-y += ipt_mac.h
 header-y += ipt_mark.h
 header-y += ipt_multiport.h
-header-y += ipt_owner.h
 header-y += ipt_physdev.h
 header-y += ipt_pkttype.h
 header-y += ipt_policy.h
diff --git a/include/linux/netfilter_ipv4/ipt_owner.h b/include/linux/netfilter_ipv4/ipt_owner.h
deleted file mode 100644
index a78445be9992..000000000000
--- a/include/linux/netfilter_ipv4/ipt_owner.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _IPT_OWNER_H
-#define _IPT_OWNER_H
-
-/* match and invert flags */
-#define IPT_OWNER_UID	0x01
-#define IPT_OWNER_GID	0x02
-#define IPT_OWNER_PID	0x04
-#define IPT_OWNER_SID	0x08
-#define IPT_OWNER_COMM	0x10
-
-struct ipt_owner_info {
-    __kernel_uid32_t uid;
-    __kernel_gid32_t gid;
-    __kernel_pid_t pid;
-    __kernel_pid_t sid;
-    char comm[16];
-    u_int8_t match, invert;	/* flags */
-};
-
-#endif /*_IPT_OWNER_H*/
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index aca4bd1f6d7c..4610a16da0ab 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -14,7 +14,6 @@ header-y += ip6t_mark.h
 header-y += ip6t_mh.h
 header-y += ip6t_multiport.h
 header-y += ip6t_opts.h
-header-y += ip6t_owner.h
 header-y += ip6t_physdev.h
 header-y += ip6t_policy.h
 header-y += ip6t_rt.h
diff --git a/include/linux/netfilter_ipv6/ip6t_owner.h b/include/linux/netfilter_ipv6/ip6t_owner.h
deleted file mode 100644
index ec5cc7a38c42..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_owner.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _IP6T_OWNER_H
-#define _IP6T_OWNER_H
-
-/* match and invert flags */
-#define IP6T_OWNER_UID	0x01
-#define IP6T_OWNER_GID	0x02
-#define IP6T_OWNER_PID	0x04
-#define IP6T_OWNER_SID	0x08
-
-struct ip6t_owner_info {
-    __kernel_uid32_t uid;
-    __kernel_gid32_t gid;
-    __kernel_pid_t pid;
-    __kernel_pid_t sid;
-    u_int8_t match, invert;	/* flags */
-};
-
-#endif /*_IPT_OWNER_H*/
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 22b2a5e881ea..d24c76dffee2 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -5,7 +5,6 @@
  * (C) 2000 Marc Boucher <marc@mbsi.ca>
  *
  * Copyright © CC Computer Consultants GmbH, 2007 - 2008
- * <jengelh@computergmbh.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -17,60 +16,6 @@
 #include <net/sock.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_owner.h>
-#include <linux/netfilter_ipv4/ipt_owner.h>
-#include <linux/netfilter_ipv6/ip6t_owner.h>
-
-static bool
-owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct ipt_owner_info *info = par->matchinfo;
-	const struct file *filp;
-
-	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
-		return false;
-
-	filp = skb->sk->sk_socket->file;
-	if (filp == NULL)
-		return false;
-
-	if (info->match & IPT_OWNER_UID)
-		if ((filp->f_cred->fsuid != info->uid) ^
-		    !!(info->invert & IPT_OWNER_UID))
-			return false;
-
-	if (info->match & IPT_OWNER_GID)
-		if ((filp->f_cred->fsgid != info->gid) ^
-		    !!(info->invert & IPT_OWNER_GID))
-			return false;
-
-	return true;
-}
-
-static bool
-owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct ip6t_owner_info *info = par->matchinfo;
-	const struct file *filp;
-
-	if (skb->sk == NULL || skb->sk->sk_socket == NULL)
-		return false;
-
-	filp = skb->sk->sk_socket->file;
-	if (filp == NULL)
-		return false;
-
-	if (info->match & IP6T_OWNER_UID)
-		if ((filp->f_cred->fsuid != info->uid) ^
-		    !!(info->invert & IP6T_OWNER_UID))
-			return false;
-
-	if (info->match & IP6T_OWNER_GID)
-		if ((filp->f_cred->fsgid != info->gid) ^
-		    !!(info->invert & IP6T_OWNER_GID))
-			return false;
-
-	return true;
-}
 
 static bool
 owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -107,81 +52,30 @@ owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool owner_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ipt_owner_info *info = par->matchinfo;
-
-	if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) {
-		printk(KERN_WARNING KBUILD_MODNAME
-		       ": PID, SID and command matching is not "
-		       "supported anymore\n");
-		return false;
-	}
-
-	return true;
-}
-
-static bool owner_mt6_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ip6t_owner_info *info = par->matchinfo;
-
-	if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) {
-		printk(KERN_WARNING KBUILD_MODNAME
-		       ": PID and SID matching is not supported anymore\n");
-		return false;
-	}
-
-	return true;
-}
-
-static struct xt_match owner_mt_reg[] __read_mostly = {
-	{
-		.name       = "owner",
-		.revision   = 0,
-		.family     = NFPROTO_IPV4,
-		.match      = owner_mt_v0,
-		.matchsize  = sizeof(struct ipt_owner_info),
-		.checkentry = owner_mt_check_v0,
-		.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		              (1 << NF_INET_POST_ROUTING),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "owner",
-		.revision   = 0,
-		.family     = NFPROTO_IPV6,
-		.match      = owner_mt6_v0,
-		.matchsize  = sizeof(struct ip6t_owner_info),
-		.checkentry = owner_mt6_check_v0,
-		.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		              (1 << NF_INET_POST_ROUTING),
-		.me         = THIS_MODULE,
-	},
-	{
-		.name       = "owner",
-		.revision   = 1,
-		.family     = NFPROTO_UNSPEC,
-		.match      = owner_mt,
-		.matchsize  = sizeof(struct xt_owner_match_info),
-		.hooks      = (1 << NF_INET_LOCAL_OUT) |
-		              (1 << NF_INET_POST_ROUTING),
-		.me         = THIS_MODULE,
-	},
+static struct xt_match owner_mt_reg __read_mostly = {
+	.name       = "owner",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.match      = owner_mt,
+	.matchsize  = sizeof(struct xt_owner_match_info),
+	.hooks      = (1 << NF_INET_LOCAL_OUT) |
+	              (1 << NF_INET_POST_ROUTING),
+	.me         = THIS_MODULE,
 };
 
 static int __init owner_mt_init(void)
 {
-	return xt_register_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+	return xt_register_match(&owner_mt_reg);
 }
 
 static void __exit owner_mt_exit(void)
 {
-	xt_unregister_matches(owner_mt_reg, ARRAY_SIZE(owner_mt_reg));
+	xt_unregister_match(&owner_mt_reg);
 }
 
 module_init(owner_mt_init);
 module_exit(owner_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: socket owner matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_owner");
-- 
cgit v1.2.3


From 93bb1e9d117bfc60306b2b8bd9e0fa7ba3c88636 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 12 Jun 2009 19:47:21 +0200
Subject: netfilter: xtables: remove redirecting header files

When IPv4 and IPv6 matches were unified approx. 3.5 years ago, they
received new header filenames (e.g. xt_CLASSIFY.h). Let's remove the
old ones now.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt    |   3 -
 include/linux/netfilter_ipv4/Kbuild           |  28 -------
 include/linux/netfilter_ipv4/ipt_CLASSIFY.h   |   7 --
 include/linux/netfilter_ipv4/ipt_CONNMARK.h   |  19 -----
 include/linux/netfilter_ipv4/ipt_DSCP.h       |  18 -----
 include/linux/netfilter_ipv4/ipt_ECN.h        |   4 +-
 include/linux/netfilter_ipv4/ipt_MARK.h       |  18 -----
 include/linux/netfilter_ipv4/ipt_NFQUEUE.h    |  16 ----
 include/linux/netfilter_ipv4/ipt_TCPMSS.h     |   9 ---
 include/linux/netfilter_ipv4/ipt_comment.h    |  10 ---
 include/linux/netfilter_ipv4/ipt_connbytes.h  |  18 -----
 include/linux/netfilter_ipv4/ipt_connmark.h   |   7 --
 include/linux/netfilter_ipv4/ipt_conntrack.h  |  28 -------
 include/linux/netfilter_ipv4/ipt_dccp.h       |  15 ----
 include/linux/netfilter_ipv4/ipt_dscp.h       |  21 ------
 include/linux/netfilter_ipv4/ipt_ecn.h        |   4 +-
 include/linux/netfilter_ipv4/ipt_esp.h        |  10 ---
 include/linux/netfilter_ipv4/ipt_hashlimit.h  |  14 ----
 include/linux/netfilter_ipv4/ipt_helper.h     |   7 --
 include/linux/netfilter_ipv4/ipt_length.h     |   7 --
 include/linux/netfilter_ipv4/ipt_limit.h      |   8 --
 include/linux/netfilter_ipv4/ipt_mac.h        |   7 --
 include/linux/netfilter_ipv4/ipt_mark.h       |   9 ---
 include/linux/netfilter_ipv4/ipt_multiport.h  |  15 ----
 include/linux/netfilter_ipv4/ipt_physdev.h    |  17 -----
 include/linux/netfilter_ipv4/ipt_pkttype.h    |   7 --
 include/linux/netfilter_ipv4/ipt_policy.h     |  23 ------
 include/linux/netfilter_ipv4/ipt_recent.h     |  21 ------
 include/linux/netfilter_ipv4/ipt_sctp.h       | 105 --------------------------
 include/linux/netfilter_ipv4/ipt_state.h      |  15 ----
 include/linux/netfilter_ipv4/ipt_string.h     |  10 ---
 include/linux/netfilter_ipv4/ipt_tcpmss.h     |   7 --
 include/linux/netfilter_ipv6/Kbuild           |  11 +--
 include/linux/netfilter_ipv6/ip6t_MARK.h      |   9 ---
 include/linux/netfilter_ipv6/ip6t_esp.h       |  10 ---
 include/linux/netfilter_ipv6/ip6t_length.h    |   8 --
 include/linux/netfilter_ipv6/ip6t_limit.h     |   8 --
 include/linux/netfilter_ipv6/ip6t_mac.h       |   7 --
 include/linux/netfilter_ipv6/ip6t_mark.h      |   9 ---
 include/linux/netfilter_ipv6/ip6t_multiport.h |  14 ----
 include/linux/netfilter_ipv6/ip6t_physdev.h   |  17 -----
 include/linux/netfilter_ipv6/ip6t_policy.h    |  23 ------
 42 files changed, 5 insertions(+), 618 deletions(-)
 delete mode 100644 include/linux/netfilter_ipv4/ipt_CLASSIFY.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_CONNMARK.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_DSCP.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_MARK.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_TCPMSS.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_comment.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_connmark.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_conntrack.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_dscp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_esp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_hashlimit.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_helper.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_length.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_limit.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_mac.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_mark.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_multiport.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_physdev.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_pkttype.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_policy.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_recent.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_sctp.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_state.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_string.h
 delete mode 100644 include/linux/netfilter_ipv4/ipt_tcpmss.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_MARK.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_esp.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_length.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_limit.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_mac.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_mark.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_multiport.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_physdev.h
 delete mode 100644 include/linux/netfilter_ipv6/ip6t_policy.h

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 5556d2300bea..698e1e8b3042 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -235,9 +235,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 ---------------------------
 
 What (Why):
-	- "forwarding" header files like ipt_mac.h in
-	  include/linux/netfilter_ipv4/ and include/linux/netfilter_ipv6/
-
 	- xt_recent: the old ipt_recent proc dir
 	  (superseded by /proc/net/xt_recent)
 
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index 541300531cb3..431b40761920 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -1,42 +1,14 @@
-header-y += ipt_CLASSIFY.h
 header-y += ipt_CLUSTERIP.h
-header-y += ipt_CONNMARK.h
-header-y += ipt_DSCP.h
 header-y += ipt_ECN.h
 header-y += ipt_LOG.h
-header-y += ipt_MARK.h
-header-y += ipt_NFQUEUE.h
 header-y += ipt_REJECT.h
 header-y += ipt_SAME.h
-header-y += ipt_TCPMSS.h
 header-y += ipt_TTL.h
 header-y += ipt_ULOG.h
 header-y += ipt_addrtype.h
 header-y += ipt_ah.h
-header-y += ipt_comment.h
-header-y += ipt_connbytes.h
-header-y += ipt_connmark.h
-header-y += ipt_conntrack.h
-header-y += ipt_dccp.h
-header-y += ipt_dscp.h
 header-y += ipt_ecn.h
-header-y += ipt_esp.h
-header-y += ipt_hashlimit.h
-header-y += ipt_helper.h
-header-y += ipt_length.h
-header-y += ipt_limit.h
-header-y += ipt_mac.h
-header-y += ipt_mark.h
-header-y += ipt_multiport.h
-header-y += ipt_physdev.h
-header-y += ipt_pkttype.h
-header-y += ipt_policy.h
 header-y += ipt_realm.h
-header-y += ipt_recent.h
-header-y += ipt_sctp.h
-header-y += ipt_state.h
-header-y += ipt_string.h
-header-y += ipt_tcpmss.h
 header-y += ipt_ttl.h
 
 unifdef-y += ip_queue.h
diff --git a/include/linux/netfilter_ipv4/ipt_CLASSIFY.h b/include/linux/netfilter_ipv4/ipt_CLASSIFY.h
deleted file mode 100644
index a46d511b5c36..000000000000
--- a/include/linux/netfilter_ipv4/ipt_CLASSIFY.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_CLASSIFY_H
-#define _IPT_CLASSIFY_H
-
-#include <linux/netfilter/xt_CLASSIFY.h>
-#define ipt_classify_target_info xt_classify_target_info
-
-#endif /*_IPT_CLASSIFY_H */
diff --git a/include/linux/netfilter_ipv4/ipt_CONNMARK.h b/include/linux/netfilter_ipv4/ipt_CONNMARK.h
deleted file mode 100644
index 9ecfee0a9e33..000000000000
--- a/include/linux/netfilter_ipv4/ipt_CONNMARK.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef _IPT_CONNMARK_H_target
-#define _IPT_CONNMARK_H_target
-
-/* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-#include <linux/netfilter/xt_CONNMARK.h>
-#define IPT_CONNMARK_SET	XT_CONNMARK_SET
-#define IPT_CONNMARK_SAVE	XT_CONNMARK_SAVE
-#define	IPT_CONNMARK_RESTORE	XT_CONNMARK_RESTORE
-
-#define ipt_connmark_target_info xt_connmark_target_info
-
-#endif /*_IPT_CONNMARK_H_target*/
diff --git a/include/linux/netfilter_ipv4/ipt_DSCP.h b/include/linux/netfilter_ipv4/ipt_DSCP.h
deleted file mode 100644
index 3491e524d5ea..000000000000
--- a/include/linux/netfilter_ipv4/ipt_DSCP.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field
- *
- * (C) 2002 Harald Welte <laforge@gnumonks.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- * This software is distributed under GNU GPL v2, 1991
- * 
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp
-*/
-#ifndef _IPT_DSCP_TARGET_H
-#define _IPT_DSCP_TARGET_H
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter/xt_DSCP.h>
-
-#define ipt_DSCP_info xt_DSCP_info
-
-#endif /* _IPT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h
index 94e0d9866469..7ca45918ab8e 100644
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -8,9 +8,9 @@
 */
 #ifndef _IPT_ECN_TARGET_H
 #define _IPT_ECN_TARGET_H
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
+#include <linux/netfilter/xt_DSCP.h>
 
-#define IPT_ECN_IP_MASK	(~IPT_DSCP_MASK)
+#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
 
 #define IPT_ECN_OP_SET_IP	0x01	/* set ECN bits of IPv4 header */
 #define IPT_ECN_OP_SET_ECE	0x10	/* set ECE bit of TCP header */
diff --git a/include/linux/netfilter_ipv4/ipt_MARK.h b/include/linux/netfilter_ipv4/ipt_MARK.h
deleted file mode 100644
index 697a486a96d3..000000000000
--- a/include/linux/netfilter_ipv4/ipt_MARK.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _IPT_MARK_H_target
-#define _IPT_MARK_H_target
-
-/* Backwards compatibility for old userspace */
-
-#include <linux/netfilter/xt_MARK.h>
-
-/* Version 0 */
-#define ipt_mark_target_info xt_mark_target_info
-
-/* Version 1 */
-#define IPT_MARK_SET	XT_MARK_SET
-#define IPT_MARK_AND	XT_MARK_AND
-#define	IPT_MARK_OR	XT_MARK_OR
-
-#define ipt_mark_target_info_v1 xt_mark_target_info_v1
-
-#endif /*_IPT_MARK_H_target*/
diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h
deleted file mode 100644
index 97a2a7557cb9..000000000000
--- a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* iptables module for using NFQUEUE mechanism
- *
- * (C) 2005 Harald Welte <laforge@netfilter.org>
- *
- * This software is distributed under GNU GPL v2, 1991
- * 
-*/
-#ifndef _IPT_NFQ_TARGET_H
-#define _IPT_NFQ_TARGET_H
-
-/* Backwards compatibility for old userspace */
-#include <linux/netfilter/xt_NFQUEUE.h>
-
-#define ipt_NFQ_info xt_NFQ_info
-
-#endif /* _IPT_DSCP_TARGET_H */
diff --git a/include/linux/netfilter_ipv4/ipt_TCPMSS.h b/include/linux/netfilter_ipv4/ipt_TCPMSS.h
deleted file mode 100644
index 7a850f945824..000000000000
--- a/include/linux/netfilter_ipv4/ipt_TCPMSS.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _IPT_TCPMSS_H
-#define _IPT_TCPMSS_H
-
-#include <linux/netfilter/xt_TCPMSS.h>
-
-#define ipt_tcpmss_info		xt_tcpmss_info
-#define IPT_TCPMSS_CLAMP_PMTU	XT_TCPMSS_CLAMP_PMTU
-
-#endif /*_IPT_TCPMSS_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_comment.h b/include/linux/netfilter_ipv4/ipt_comment.h
deleted file mode 100644
index ae2afc2f7481..000000000000
--- a/include/linux/netfilter_ipv4/ipt_comment.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _IPT_COMMENT_H
-#define _IPT_COMMENT_H
-
-#include <linux/netfilter/xt_comment.h>
-
-#define IPT_MAX_COMMENT_LEN XT_MAX_COMMENT_LEN
-
-#define ipt_comment_info xt_comment_info
-
-#endif /* _IPT_COMMENT_H */
diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h
deleted file mode 100644
index f63e6ee91113..000000000000
--- a/include/linux/netfilter_ipv4/ipt_connbytes.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _IPT_CONNBYTES_H
-#define _IPT_CONNBYTES_H
-
-#include <linux/netfilter/xt_connbytes.h>
-#define ipt_connbytes_what xt_connbytes_what
-
-#define IPT_CONNBYTES_PKTS	XT_CONNBYTES_PKTS
-#define IPT_CONNBYTES_BYTES	XT_CONNBYTES_BYTES
-#define IPT_CONNBYTES_AVGPKT	XT_CONNBYTES_AVGPKT
-
-#define ipt_connbytes_direction 	xt_connbytes_direction
-#define IPT_CONNBYTES_DIR_ORIGINAL 	XT_CONNBYTES_DIR_ORIGINAL
-#define IPT_CONNBYTES_DIR_REPLY 	XT_CONNBYTES_DIR_REPLY
-#define IPT_CONNBYTES_DIR_BOTH		XT_CONNBYTES_DIR_BOTH
-
-#define ipt_connbytes_info xt_connbytes_info
-
-#endif
diff --git a/include/linux/netfilter_ipv4/ipt_connmark.h b/include/linux/netfilter_ipv4/ipt_connmark.h
deleted file mode 100644
index c7ba6560d44c..000000000000
--- a/include/linux/netfilter_ipv4/ipt_connmark.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_CONNMARK_H
-#define _IPT_CONNMARK_H
-
-#include <linux/netfilter/xt_connmark.h>
-#define ipt_connmark_info xt_connmark_info
-
-#endif /*_IPT_CONNMARK_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_conntrack.h b/include/linux/netfilter_ipv4/ipt_conntrack.h
deleted file mode 100644
index cde6762949c5..000000000000
--- a/include/linux/netfilter_ipv4/ipt_conntrack.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* Header file for kernel module to match connection tracking information.
- * GPL (C) 2001  Marc Boucher (marc@mbsi.ca).
- */
-
-#ifndef _IPT_CONNTRACK_H
-#define _IPT_CONNTRACK_H
-
-#include <linux/netfilter/xt_conntrack.h>
-
-#define IPT_CONNTRACK_STATE_BIT(ctinfo) XT_CONNTRACK_STATE_BIT(ctinfo)
-#define IPT_CONNTRACK_STATE_INVALID 	XT_CONNTRACK_STATE_INVALID
-
-#define IPT_CONNTRACK_STATE_SNAT 	XT_CONNTRACK_STATE_SNAT
-#define IPT_CONNTRACK_STATE_DNAT	XT_CONNTRACK_STATE_DNAT
-#define IPT_CONNTRACK_STATE_UNTRACKED	XT_CONNTRACK_STATE_UNTRACKED
-
-/* flags, invflags: */
-#define IPT_CONNTRACK_STATE		XT_CONNTRACK_STATE
-#define IPT_CONNTRACK_PROTO		XT_CONNTRACK_PROTO
-#define IPT_CONNTRACK_ORIGSRC		XT_CONNTRACK_ORIGSRC
-#define IPT_CONNTRACK_ORIGDST		XT_CONNTRACK_ORIGDST
-#define IPT_CONNTRACK_REPLSRC		XT_CONNTRACK_REPLSRC
-#define IPT_CONNTRACK_REPLDST		XT_CONNTRACK_REPLDST
-#define IPT_CONNTRACK_STATUS		XT_CONNTRACK_STATUS
-#define IPT_CONNTRACK_EXPIRES		XT_CONNTRACK_EXPIRES
-
-#define ipt_conntrack_info		xt_conntrack_info
-#endif /*_IPT_CONNTRACK_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h
deleted file mode 100644
index e70d11e1f53c..000000000000
--- a/include/linux/netfilter_ipv4/ipt_dccp.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _IPT_DCCP_H_
-#define _IPT_DCCP_H_
-
-#include <linux/netfilter/xt_dccp.h>
-#define IPT_DCCP_SRC_PORTS	XT_DCCP_SRC_PORTS
-#define IPT_DCCP_DEST_PORTS	XT_DCCP_DEST_PORTS
-#define IPT_DCCP_TYPE		XT_DCCP_TYPE
-#define IPT_DCCP_OPTION		XT_DCCP_OPTION
-
-#define IPT_DCCP_VALID_FLAGS 	XT_DCCP_VALID_FLAGS
-
-#define ipt_dccp_info xt_dccp_info
-
-#endif /* _IPT_DCCP_H_ */
-
diff --git a/include/linux/netfilter_ipv4/ipt_dscp.h b/include/linux/netfilter_ipv4/ipt_dscp.h
deleted file mode 100644
index 4b82ca912b0e..000000000000
--- a/include/linux/netfilter_ipv4/ipt_dscp.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* iptables module for matching the IPv4 DSCP field
- *
- * (C) 2002 Harald Welte <laforge@gnumonks.org>
- * This software is distributed under GNU GPL v2, 1991
- * 
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp
-*/
-#ifndef _IPT_DSCP_H
-#define _IPT_DSCP_H
-
-#include <linux/netfilter/xt_dscp.h>
-
-#define IPT_DSCP_MASK	XT_DSCP_MASK
-#define IPT_DSCP_SHIFT	XT_DSCP_SHIFT
-#define IPT_DSCP_MAX	XT_DSCP_MAX
-
-#define ipt_dscp_info	xt_dscp_info
-
-#endif /* _IPT_DSCP_H */
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index 1f0d9a4d3378..9945baa4ccd7 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -8,9 +8,9 @@
 */
 #ifndef _IPT_ECN_H
 #define _IPT_ECN_H
-#include <linux/netfilter_ipv4/ipt_dscp.h>
+#include <linux/netfilter/xt_dscp.h>
 
-#define IPT_ECN_IP_MASK	(~IPT_DSCP_MASK)
+#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
 
 #define IPT_ECN_OP_MATCH_IP	0x01
 #define IPT_ECN_OP_MATCH_ECE	0x10
diff --git a/include/linux/netfilter_ipv4/ipt_esp.h b/include/linux/netfilter_ipv4/ipt_esp.h
deleted file mode 100644
index 78296e7eeff9..000000000000
--- a/include/linux/netfilter_ipv4/ipt_esp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _IPT_ESP_H
-#define _IPT_ESP_H
-
-#include <linux/netfilter/xt_esp.h>
-
-#define ipt_esp xt_esp
-#define IPT_ESP_INV_SPI		XT_ESP_INV_SPI
-#define IPT_ESP_INV_MASK	XT_ESP_INV_MASK
-
-#endif /*_IPT_ESP_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_hashlimit.h b/include/linux/netfilter_ipv4/ipt_hashlimit.h
deleted file mode 100644
index 5662120a3d7b..000000000000
--- a/include/linux/netfilter_ipv4/ipt_hashlimit.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _IPT_HASHLIMIT_H
-#define _IPT_HASHLIMIT_H
-
-#include <linux/netfilter/xt_hashlimit.h>
-
-#define IPT_HASHLIMIT_SCALE	XT_HASHLIMIT_SCALE
-#define IPT_HASHLIMIT_HASH_DIP	XT_HASHLIMIT_HASH_DIP
-#define IPT_HASHLIMIT_HASH_DPT	XT_HASHLIMIT_HASH_DPT
-#define IPT_HASHLIMIT_HASH_SIP	XT_HASHLIMIT_HASH_SIP
-#define IPT_HASHLIMIT_HASH_SPT	XT_HASHLIMIT_HASH_SPT
-
-#define ipt_hashlimit_info xt_hashlimit_info
-
-#endif /* _IPT_HASHLIMIT_H */
diff --git a/include/linux/netfilter_ipv4/ipt_helper.h b/include/linux/netfilter_ipv4/ipt_helper.h
deleted file mode 100644
index 80452c218551..000000000000
--- a/include/linux/netfilter_ipv4/ipt_helper.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_HELPER_H
-#define _IPT_HELPER_H
-
-#include <linux/netfilter/xt_helper.h>
-#define ipt_helper_info xt_helper_info
-
-#endif /* _IPT_HELPER_H */
diff --git a/include/linux/netfilter_ipv4/ipt_length.h b/include/linux/netfilter_ipv4/ipt_length.h
deleted file mode 100644
index 9b45206ffcef..000000000000
--- a/include/linux/netfilter_ipv4/ipt_length.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_LENGTH_H
-#define _IPT_LENGTH_H
-
-#include <linux/netfilter/xt_length.h>
-#define ipt_length_info xt_length_info
-
-#endif /*_IPT_LENGTH_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_limit.h b/include/linux/netfilter_ipv4/ipt_limit.h
deleted file mode 100644
index 92f5cd07bbc4..000000000000
--- a/include/linux/netfilter_ipv4/ipt_limit.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _IPT_RATE_H
-#define _IPT_RATE_H
-
-#include <linux/netfilter/xt_limit.h>
-#define IPT_LIMIT_SCALE XT_LIMIT_SCALE
-#define ipt_rateinfo xt_rateinfo
-
-#endif /*_IPT_RATE_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_mac.h b/include/linux/netfilter_ipv4/ipt_mac.h
deleted file mode 100644
index b186008a3c47..000000000000
--- a/include/linux/netfilter_ipv4/ipt_mac.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_MAC_H
-#define _IPT_MAC_H
-
-#include <linux/netfilter/xt_mac.h>
-#define ipt_mac_info xt_mac_info
-
-#endif /*_IPT_MAC_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_mark.h b/include/linux/netfilter_ipv4/ipt_mark.h
deleted file mode 100644
index bfde67c61224..000000000000
--- a/include/linux/netfilter_ipv4/ipt_mark.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _IPT_MARK_H
-#define _IPT_MARK_H
-
-/* Backwards compatibility for old userspace */
-#include <linux/netfilter/xt_mark.h>
-
-#define ipt_mark_info xt_mark_info
-
-#endif /*_IPT_MARK_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_multiport.h b/include/linux/netfilter_ipv4/ipt_multiport.h
deleted file mode 100644
index 55fe85eca88c..000000000000
--- a/include/linux/netfilter_ipv4/ipt_multiport.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _IPT_MULTIPORT_H
-#define _IPT_MULTIPORT_H
-
-#include <linux/netfilter/xt_multiport.h>
-
-#define IPT_MULTIPORT_SOURCE		XT_MULTIPORT_SOURCE
-#define IPT_MULTIPORT_DESTINATION	XT_MULTIPORT_DESTINATION
-#define IPT_MULTIPORT_EITHER		XT_MULTIPORT_EITHER
-
-#define IPT_MULTI_PORTS			XT_MULTI_PORTS
-
-#define ipt_multiport			xt_multiport
-#define ipt_multiport_v1		xt_multiport_v1
-
-#endif /*_IPT_MULTIPORT_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_physdev.h b/include/linux/netfilter_ipv4/ipt_physdev.h
deleted file mode 100644
index 2400e7140f26..000000000000
--- a/include/linux/netfilter_ipv4/ipt_physdev.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _IPT_PHYSDEV_H
-#define _IPT_PHYSDEV_H
-
-/* Backwards compatibility for old userspace */
-
-#include <linux/netfilter/xt_physdev.h>
-
-#define IPT_PHYSDEV_OP_IN		XT_PHYSDEV_OP_IN
-#define IPT_PHYSDEV_OP_OUT		XT_PHYSDEV_OP_OUT
-#define IPT_PHYSDEV_OP_BRIDGED		XT_PHYSDEV_OP_BRIDGED
-#define IPT_PHYSDEV_OP_ISIN		XT_PHYSDEV_OP_ISIN
-#define IPT_PHYSDEV_OP_ISOUT		XT_PHYSDEV_OP_ISOUT
-#define IPT_PHYSDEV_OP_MASK		XT_PHYSDEV_OP_MASK
-
-#define ipt_physdev_info xt_physdev_info
-
-#endif /*_IPT_PHYSDEV_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_pkttype.h b/include/linux/netfilter_ipv4/ipt_pkttype.h
deleted file mode 100644
index ff1fbc949a0c..000000000000
--- a/include/linux/netfilter_ipv4/ipt_pkttype.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_PKTTYPE_H
-#define _IPT_PKTTYPE_H
-
-#include <linux/netfilter/xt_pkttype.h>
-#define ipt_pkttype_info xt_pkttype_info
-
-#endif /*_IPT_PKTTYPE_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_policy.h b/include/linux/netfilter_ipv4/ipt_policy.h
deleted file mode 100644
index 1037fb2cd206..000000000000
--- a/include/linux/netfilter_ipv4/ipt_policy.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _IPT_POLICY_H
-#define _IPT_POLICY_H
-
-#include <linux/netfilter/xt_policy.h>
-
-#define IPT_POLICY_MAX_ELEM		XT_POLICY_MAX_ELEM
-
-/* ipt_policy_flags */
-#define IPT_POLICY_MATCH_IN		XT_POLICY_MATCH_IN
-#define IPT_POLICY_MATCH_OUT		XT_POLICY_MATCH_OUT
-#define IPT_POLICY_MATCH_NONE		XT_POLICY_MATCH_NONE
-#define IPT_POLICY_MATCH_STRICT		XT_POLICY_MATCH_STRICT
-
-/* ipt_policy_modes */
-#define IPT_POLICY_MODE_TRANSPORT	XT_POLICY_MODE_TRANSPORT
-#define IPT_POLICY_MODE_TUNNEL		XT_POLICY_MODE_TUNNEL
-
-#define ipt_policy_spec			xt_policy_spec
-#define ipt_policy_addr			xt_policy_addr
-#define ipt_policy_elem			xt_policy_elem
-#define ipt_policy_info			xt_policy_info
-
-#endif /* _IPT_POLICY_H */
diff --git a/include/linux/netfilter_ipv4/ipt_recent.h b/include/linux/netfilter_ipv4/ipt_recent.h
deleted file mode 100644
index d636cca133c2..000000000000
--- a/include/linux/netfilter_ipv4/ipt_recent.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _IPT_RECENT_H
-#define _IPT_RECENT_H
-
-#include <linux/netfilter/xt_recent.h>
-
-#define ipt_recent_info xt_recent_mtinfo
-
-enum {
-	IPT_RECENT_CHECK    = XT_RECENT_CHECK,
-	IPT_RECENT_SET      = XT_RECENT_SET,
-	IPT_RECENT_UPDATE   = XT_RECENT_UPDATE,
-	IPT_RECENT_REMOVE   = XT_RECENT_REMOVE,
-	IPT_RECENT_TTL      = XT_RECENT_TTL,
-
-	IPT_RECENT_SOURCE   = XT_RECENT_SOURCE,
-	IPT_RECENT_DEST     = XT_RECENT_DEST,
-
-	IPT_RECENT_NAME_LEN = XT_RECENT_NAME_LEN,
-};
-
-#endif /*_IPT_RECENT_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_sctp.h b/include/linux/netfilter_ipv4/ipt_sctp.h
deleted file mode 100644
index 80b3dbacd193..000000000000
--- a/include/linux/netfilter_ipv4/ipt_sctp.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _IPT_SCTP_H_
-#define _IPT_SCTP_H_
-
-#define IPT_SCTP_SRC_PORTS	        0x01
-#define IPT_SCTP_DEST_PORTS	        0x02
-#define IPT_SCTP_CHUNK_TYPES		0x04
-
-#define IPT_SCTP_VALID_FLAGS		0x07
-
-
-struct ipt_sctp_flag_info {
-	u_int8_t chunktype;
-	u_int8_t flag;
-	u_int8_t flag_mask;
-};
-
-#define IPT_NUM_SCTP_FLAGS	4
-
-struct ipt_sctp_info {
-	u_int16_t dpts[2];  /* Min, Max */
-	u_int16_t spts[2];  /* Min, Max */
-
-	u_int32_t chunkmap[256 / sizeof (u_int32_t)];  /* Bit mask of chunks to be matched according to RFC 2960 */
-
-#define SCTP_CHUNK_MATCH_ANY   0x01  /* Match if any of the chunk types are present */
-#define SCTP_CHUNK_MATCH_ALL   0x02  /* Match if all of the chunk types are present */
-#define SCTP_CHUNK_MATCH_ONLY  0x04  /* Match if these are the only chunk types present */
-
-	u_int32_t chunk_match_type;
-	struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS];
-	int flag_count;
-
-	u_int32_t flags;
-	u_int32_t invflags;
-};
-
-#define bytes(type) (sizeof(type) * 8)
-
-#define SCTP_CHUNKMAP_SET(chunkmap, type) 		\
-	do { 						\
-		chunkmap[type / bytes(u_int32_t)] |= 	\
-			1 << (type % bytes(u_int32_t));	\
-	} while (0)
-
-#define SCTP_CHUNKMAP_CLEAR(chunkmap, type)		 	\
-	do {							\
-		chunkmap[type / bytes(u_int32_t)] &= 		\
-			~(1 << (type % bytes(u_int32_t)));	\
-	} while (0)
-
-#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) 			\
-({								\
-	(chunkmap[type / bytes (u_int32_t)] & 			\
-		(1 << (type % bytes (u_int32_t)))) ? 1: 0;	\
-})
-
-#define SCTP_CHUNKMAP_RESET(chunkmap) 				\
-	do {							\
-		int i; 						\
-		for (i = 0; i < ARRAY_SIZE(chunkmap); i++)	\
-			chunkmap[i] = 0;			\
-	} while (0)
-
-#define SCTP_CHUNKMAP_SET_ALL(chunkmap) 			\
-	do {							\
-		int i; 						\
-		for (i = 0; i < ARRAY_SIZE(chunkmap); i++)	\
-			chunkmap[i] = ~0;			\
-	} while (0)
-
-#define SCTP_CHUNKMAP_COPY(destmap, srcmap) 			\
-	do {							\
-		int i; 						\
-		for (i = 0; i < ARRAY_SIZE(chunkmap); i++)	\
-			destmap[i] = srcmap[i];			\
-	} while (0)
-
-#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) 		\
-({							\
-	int i; 						\
-	int flag = 1;					\
-	for (i = 0; i < ARRAY_SIZE(chunkmap); i++) {	\
-		if (chunkmap[i]) {			\
-			flag = 0;			\
-			break;				\
-		}					\
-	}						\
-        flag;						\
-})
-
-#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) 		\
-({							\
-	int i; 						\
-	int flag = 1;					\
-	for (i = 0; i < ARRAY_SIZE(chunkmap); i++) {	\
-		if (chunkmap[i] != ~0) {		\
-			flag = 0;			\
-				break;			\
-		}					\
-	}						\
-        flag;						\
-})
-
-#endif /* _IPT_SCTP_H_ */
-
diff --git a/include/linux/netfilter_ipv4/ipt_state.h b/include/linux/netfilter_ipv4/ipt_state.h
deleted file mode 100644
index a44a99cc28cc..000000000000
--- a/include/linux/netfilter_ipv4/ipt_state.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef _IPT_STATE_H
-#define _IPT_STATE_H
-
-/* Backwards compatibility for old userspace */
-
-#include <linux/netfilter/xt_state.h>
-
-#define IPT_STATE_BIT		XT_STATE_BIT
-#define IPT_STATE_INVALID	XT_STATE_INVALID
-
-#define IPT_STATE_UNTRACKED	XT_STATE_UNTRACKED
-
-#define ipt_state_info		xt_state_info
-
-#endif /*_IPT_STATE_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h
deleted file mode 100644
index c26de3059903..000000000000
--- a/include/linux/netfilter_ipv4/ipt_string.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _IPT_STRING_H
-#define _IPT_STRING_H
-
-#include <linux/netfilter/xt_string.h>
-
-#define IPT_STRING_MAX_PATTERN_SIZE XT_STRING_MAX_PATTERN_SIZE
-#define IPT_STRING_MAX_ALGO_NAME_SIZE XT_STRING_MAX_ALGO_NAME_SIZE
-#define ipt_string_info xt_string_info
-
-#endif /*_IPT_STRING_H*/
diff --git a/include/linux/netfilter_ipv4/ipt_tcpmss.h b/include/linux/netfilter_ipv4/ipt_tcpmss.h
deleted file mode 100644
index 18bbc8e8e009..000000000000
--- a/include/linux/netfilter_ipv4/ipt_tcpmss.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IPT_TCPMSS_MATCH_H
-#define _IPT_TCPMSS_MATCH_H
-
-#include <linux/netfilter/xt_tcpmss.h>
-#define ipt_tcpmss_match_info xt_tcpmss_match_info
-
-#endif /*_IPT_TCPMSS_MATCH_H*/
diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild
index 4610a16da0ab..e864eaee9e5e 100644
--- a/include/linux/netfilter_ipv6/Kbuild
+++ b/include/linux/netfilter_ipv6/Kbuild
@@ -1,21 +1,12 @@
 header-y += ip6t_HL.h
 header-y += ip6t_LOG.h
-header-y += ip6t_MARK.h
 header-y += ip6t_REJECT.h
 header-y += ip6t_ah.h
-header-y += ip6t_esp.h
 header-y += ip6t_frag.h
-header-y += ip6t_hl.h
 header-y += ip6t_ipv6header.h
-header-y += ip6t_length.h
-header-y += ip6t_limit.h
-header-y += ip6t_mac.h
-header-y += ip6t_mark.h
+header-y += ip6t_hl.h
 header-y += ip6t_mh.h
-header-y += ip6t_multiport.h
 header-y += ip6t_opts.h
-header-y += ip6t_physdev.h
-header-y += ip6t_policy.h
 header-y += ip6t_rt.h
 
 unifdef-y += ip6_tables.h
diff --git a/include/linux/netfilter_ipv6/ip6t_MARK.h b/include/linux/netfilter_ipv6/ip6t_MARK.h
deleted file mode 100644
index 7cf629a8ab92..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_MARK.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _IP6T_MARK_H_target
-#define _IP6T_MARK_H_target
-
-/* Backwards compatibility for old userspace */
-#include <linux/netfilter/xt_MARK.h>
-
-#define ip6t_mark_target_info xt_mark_target_info
-
-#endif /*_IP6T_MARK_H_target*/
diff --git a/include/linux/netfilter_ipv6/ip6t_esp.h b/include/linux/netfilter_ipv6/ip6t_esp.h
deleted file mode 100644
index f62eaf53c16c..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_esp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _IP6T_ESP_H
-#define _IP6T_ESP_H
-
-#include <linux/netfilter/xt_esp.h>
-
-#define ip6t_esp xt_esp
-#define IP6T_ESP_INV_SPI	XT_ESP_INV_SPI
-#define IP6T_ESP_INV_MASK	XT_ESP_INV_MASK
-
-#endif /*_IP6T_ESP_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_length.h b/include/linux/netfilter_ipv6/ip6t_length.h
deleted file mode 100644
index 9e9689d03ed7..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_length.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _IP6T_LENGTH_H
-#define _IP6T_LENGTH_H
-
-#include <linux/netfilter/xt_length.h>
-#define ip6t_length_info xt_length_info
-
-#endif /*_IP6T_LENGTH_H*/
-	
diff --git a/include/linux/netfilter_ipv6/ip6t_limit.h b/include/linux/netfilter_ipv6/ip6t_limit.h
deleted file mode 100644
index 487e5ea342c6..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_limit.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef _IP6T_RATE_H
-#define _IP6T_RATE_H
-
-#include <linux/netfilter/xt_limit.h>
-#define IP6T_LIMIT_SCALE XT_LIMIT_SCALE
-#define ip6t_rateinfo xt_rateinfo
-
-#endif /*_IP6T_RATE_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_mac.h b/include/linux/netfilter_ipv6/ip6t_mac.h
deleted file mode 100644
index ac58e83e9423..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_mac.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _IP6T_MAC_H
-#define _IP6T_MAC_H
-
-#include <linux/netfilter/xt_mac.h>
-#define ip6t_mac_info xt_mac_info
-
-#endif /*_IP6T_MAC_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_mark.h b/include/linux/netfilter_ipv6/ip6t_mark.h
deleted file mode 100644
index ff204951ddc3..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_mark.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _IP6T_MARK_H
-#define _IP6T_MARK_H
-
-/* Backwards compatibility for old userspace */
-#include <linux/netfilter/xt_mark.h>
-
-#define ip6t_mark_info xt_mark_info
-
-#endif /*_IPT_MARK_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_multiport.h b/include/linux/netfilter_ipv6/ip6t_multiport.h
deleted file mode 100644
index 042c92661cee..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_multiport.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _IP6T_MULTIPORT_H
-#define _IP6T_MULTIPORT_H
-
-#include <linux/netfilter/xt_multiport.h>
-
-#define IP6T_MULTIPORT_SOURCE		XT_MULTIPORT_SOURCE
-#define IP6T_MULTIPORT_DESTINATION	XT_MULTIPORT_DESTINATION
-#define IP6T_MULTIPORT_EITHER		XT_MULTIPORT_EITHER
-
-#define IP6T_MULTI_PORTS		XT_MULTI_PORTS
-
-#define ip6t_multiport			xt_multiport
-
-#endif /*_IP6T_MULTIPORT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_physdev.h b/include/linux/netfilter_ipv6/ip6t_physdev.h
deleted file mode 100644
index c161c0a81b55..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_physdev.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _IP6T_PHYSDEV_H
-#define _IP6T_PHYSDEV_H
-
-/* Backwards compatibility for old userspace */
-
-#include <linux/netfilter/xt_physdev.h>
-
-#define IP6T_PHYSDEV_OP_IN		XT_PHYSDEV_OP_IN
-#define IP6T_PHYSDEV_OP_OUT		XT_PHYSDEV_OP_OUT
-#define IP6T_PHYSDEV_OP_BRIDGED		XT_PHYSDEV_OP_BRIDGED
-#define IP6T_PHYSDEV_OP_ISIN		XT_PHYSDEV_OP_ISIN
-#define IP6T_PHYSDEV_OP_ISOUT		XT_PHYSDEV_OP_ISOUT
-#define IP6T_PHYSDEV_OP_MASK		XT_PHYSDEV_OP_MASK
-
-#define ip6t_physdev_info xt_physdev_info
-
-#endif /*_IP6T_PHYSDEV_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_policy.h b/include/linux/netfilter_ipv6/ip6t_policy.h
deleted file mode 100644
index b1c449d7ec89..000000000000
--- a/include/linux/netfilter_ipv6/ip6t_policy.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef _IP6T_POLICY_H
-#define _IP6T_POLICY_H
-
-#include <linux/netfilter/xt_policy.h>
-
-#define IP6T_POLICY_MAX_ELEM		XT_POLICY_MAX_ELEM
-
-/* ip6t_policy_flags */
-#define IP6T_POLICY_MATCH_IN		XT_POLICY_MATCH_IN
-#define IP6T_POLICY_MATCH_OUT		XT_POLICY_MATCH_OUT
-#define IP6T_POLICY_MATCH_NONE		XT_POLICY_MATCH_NONE
-#define IP6T_POLICY_MATCH_STRICT	XT_POLICY_MATCH_STRICT
-
-/* ip6t_policy_modes */
-#define IP6T_POLICY_MODE_TRANSPORT	XT_POLICY_MODE_TRANSPORT
-#define IP6T_POLICY_MODE_TUNNEL		XT_POLICY_MODE_TUNNEL
-
-#define ip6t_policy_spec		xt_policy_spec
-#define ip6t_policy_addr		xt_policy_addr
-#define ip6t_policy_elem		xt_policy_elem
-#define ip6t_policy_info		xt_policy_info
-
-#endif /* _IP6T_POLICY_H */
-- 
cgit v1.2.3


From 98d89b4198cf7273968e9217a62ec7ccfd760171 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 15:55:06 +0200
Subject: netfilter: xtables: realign struct xt_target_param

This commit gets rid of a padding hole as reported by pahole(1).
Saves 8 bytes on x86_64.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1030b7593898..4fa6e4c263e0 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -238,9 +238,9 @@ struct xt_mtdtor_param {
  */
 struct xt_target_param {
 	const struct net_device *in, *out;
-	unsigned int hooknum;
 	const struct xt_target *target;
 	const void *targinfo;
+	unsigned int hooknum;
 	u_int8_t family;
 };
 
-- 
cgit v1.2.3


From 1905b1bfc0de6f69a61dc03cac0d86a04b3216bd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 6 Aug 2009 18:13:23 +0900
Subject: chrdev: implement __[un]register_chrdev()

[un]register_chrdev() assume minor range 0-255.  This patch adds __
prefixed versions which take @minorbase and @count explicitly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 fs/char_dev.c      | 39 ++++++++++++++++++++++++++-------------
 include/linux/fs.h | 19 ++++++++++++++++---
 2 files changed, 42 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/char_dev.c b/fs/char_dev.c
index a173551e19d7..2f18c1e4e301 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -237,8 +237,10 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
 }
 
 /**
- * register_chrdev() - Register a major number for character devices.
+ * __register_chrdev() - create and register a cdev occupying a range of minors
  * @major: major device number or 0 for dynamic allocation
+ * @baseminor: first of the requested range of minor numbers
+ * @count: the number of minor numbers required
  * @name: name of this range of devices
  * @fops: file operations associated with this devices
  *
@@ -254,19 +256,17 @@ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count,
  * /dev. It only helps to keep track of the different owners of devices. If
  * your module name has only one type of devices it's ok to use e.g. the name
  * of the module here.
- *
- * This function registers a range of 256 minor numbers. The first minor number
- * is 0.
  */
-int register_chrdev(unsigned int major, const char *name,
-		    const struct file_operations *fops)
+int __register_chrdev(unsigned int major, unsigned int baseminor,
+		      unsigned int count, const char *name,
+		      const struct file_operations *fops)
 {
 	struct char_device_struct *cd;
 	struct cdev *cdev;
 	char *s;
 	int err = -ENOMEM;
 
-	cd = __register_chrdev_region(major, 0, 256, name);
+	cd = __register_chrdev_region(major, baseminor, count, name);
 	if (IS_ERR(cd))
 		return PTR_ERR(cd);
 	
@@ -280,7 +280,7 @@ int register_chrdev(unsigned int major, const char *name,
 	for (s = strchr(kobject_name(&cdev->kobj),'/'); s; s = strchr(s, '/'))
 		*s = '!';
 		
-	err = cdev_add(cdev, MKDEV(cd->major, 0), 256);
+	err = cdev_add(cdev, MKDEV(cd->major, baseminor), count);
 	if (err)
 		goto out;
 
@@ -290,7 +290,7 @@ int register_chrdev(unsigned int major, const char *name,
 out:
 	kobject_put(&cdev->kobj);
 out2:
-	kfree(__unregister_chrdev_region(cd->major, 0, 256));
+	kfree(__unregister_chrdev_region(cd->major, baseminor, count));
 	return err;
 }
 
@@ -316,10 +316,23 @@ void unregister_chrdev_region(dev_t from, unsigned count)
 	}
 }
 
-void unregister_chrdev(unsigned int major, const char *name)
+/**
+ * __unregister_chrdev - unregister and destroy a cdev
+ * @major: major device number
+ * @baseminor: first of the range of minor numbers
+ * @count: the number of minor numbers this cdev is occupying
+ * @name: name of this range of devices
+ *
+ * Unregister and destroy the cdev occupying the region described by
+ * @major, @baseminor and @count.  This function undoes what
+ * __register_chrdev() did.
+ */
+void __unregister_chrdev(unsigned int major, unsigned int baseminor,
+			 unsigned int count, const char *name)
 {
 	struct char_device_struct *cd;
-	cd = __unregister_chrdev_region(major, 0, 256);
+
+	cd = __unregister_chrdev_region(major, baseminor, count);
 	if (cd && cd->cdev)
 		cdev_del(cd->cdev);
 	kfree(cd);
@@ -568,6 +581,6 @@ EXPORT_SYMBOL(cdev_alloc);
 EXPORT_SYMBOL(cdev_del);
 EXPORT_SYMBOL(cdev_add);
 EXPORT_SYMBOL(cdev_index);
-EXPORT_SYMBOL(register_chrdev);
-EXPORT_SYMBOL(unregister_chrdev);
+EXPORT_SYMBOL(__register_chrdev);
+EXPORT_SYMBOL(__unregister_chrdev);
 EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a36ffa5a77a4..6c36ab788854 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1998,12 +1998,25 @@ extern void bd_release_from_disk(struct block_device *, struct gendisk *);
 #define CHRDEV_MAJOR_HASH_SIZE	255
 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
 extern int register_chrdev_region(dev_t, unsigned, const char *);
-extern int register_chrdev(unsigned int, const char *,
-			   const struct file_operations *);
-extern void unregister_chrdev(unsigned int, const char *);
+extern int __register_chrdev(unsigned int major, unsigned int baseminor,
+			     unsigned int count, const char *name,
+			     const struct file_operations *fops);
+extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
+				unsigned int count, const char *name);
 extern void unregister_chrdev_region(dev_t, unsigned);
 extern void chrdev_show(struct seq_file *,off_t);
 
+static inline int register_chrdev(unsigned int major, const char *name,
+				  const struct file_operations *fops)
+{
+	return __register_chrdev(major, 0, 256, name, fops);
+}
+
+static inline void unregister_chrdev(unsigned int major, const char *name)
+{
+	__unregister_chrdev(major, 0, 256, name);
+}
+
 /* fs/block_dev.c */
 #define BDEVNAME_SIZE	32	/* Largest string for a blockdev identifier */
 #define BDEVT_SIZE	10	/* Largest string for MAJ:MIN for blkdev */
-- 
cgit v1.2.3


From 2fc391112fb6f3424435a3aa2fda887497b5f807 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 10 Aug 2009 12:33:05 +0100
Subject: locking, sched: Give waitqueue spinlocks their own lockdep classes

Give waitqueue spinlocks their own lockdep classes when they
are initialised from init_waitqueue_head().  This means that
struct wait_queue::func functions can operate other waitqueues.

This is used by CacheFiles to catch the page from a backing fs
being unlocked and to wake up another thread to take a copy of
it.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Takashi Iwai <tiwai@suse.de>
Cc: linux-cachefs@redhat.com
Cc: torvalds@osdl.org
Cc: akpm@linux-foundation.org
LKML-Reference: <20090810113305.17284.81508.stgit@warthog.procyon.org.uk>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/wait.h | 9 ++++++++-
 kernel/wait.c        | 5 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6788e1a4d4ca..cf3c2f5dba51 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
 #define __WAIT_BIT_KEY_INITIALIZER(word, bit)				\
 	{ .flags = word, .bit_nr = bit, }
 
-extern void init_waitqueue_head(wait_queue_head_t *q);
+extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
+
+#define init_waitqueue_head(q)				\
+	do {						\
+		static struct lock_class_key __key;	\
+							\
+		__init_waitqueue_head((q), &__key);	\
+	} while (0)
 
 #ifdef CONFIG_LOCKDEP
 # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
 #include <linux/wait.h>
 #include <linux/hash.h>
 
-void init_waitqueue_head(wait_queue_head_t *q)
+void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
 {
 	spin_lock_init(&q->lock);
+	lockdep_set_class(&q->lock, key);
 	INIT_LIST_HEAD(&q->task_list);
 }
 
-EXPORT_SYMBOL(init_waitqueue_head);
+EXPORT_SYMBOL(__init_waitqueue_head);
 
 void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
 {
-- 
cgit v1.2.3


From 304703aba31a87903b8c0db8f5e6890cac2d596d Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 10 Aug 2009 16:11:32 +0200
Subject: perf_counter: Subtract the buffer size field from the event record
 size

We compute the perf raw sample size by aligning the raw ftrace
event size plus the buffer size field itself. We do that
instead of aligning only the perf raw sample size, so that we
might economize some in some cases.

But this buffer size field is not stored in the perf raw
sample, we must then substract its size from the buffer once we
computed the alignment unless we may get a useless u32 field in
the buffer.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090810141129.GA5124@nowhere>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 7167b9b97da2..a05524fa245d 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -637,7 +637,12 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	pc = preempt_count();
  *
  *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
- *	__entry_size = __data_size + sizeof(*entry);
+ *
+ *	// Below we want to get the aligned size by taking into account
+ *	// the u32 field that will later store the buffer size
+ *	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
+ *			     sizeof(u64));
+ *	__entry_size -= sizeof(u32);
  *
  *	do {
  *		char raw_data[__entry_size]; <- allocate our sample in the stack
@@ -687,6 +692,7 @@ static void ftrace_profile_##call(proto)				\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 	__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
 			     sizeof(u64));				\
+	__entry_size -= sizeof(u32);					\
 									\
 	do {								\
 		char raw_data[__entry_size];				\
-- 
cgit v1.2.3


From 1853db0e02ae4088f102b0d8e59e83dc98f93f03 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 10 Aug 2009 16:38:36 +0200
Subject: perf_counter: Zero dead bytes from ftrace raw samples size alignment

After aligning the ftrace raw samples, there are dead bytes storing
random data from the stack. We don't want to leak these to userspace,
then zero these out.

Before:

	0x2de88 [0x50]: event: 9
	.
	. ... raw event: size 80 bytes
	.  0000:  09 00 00 00 01 00 50 00 d0 c7 00 81 ff ff ff ff  ......P........
	.  0010:  68 01 00 00 68 01 00 00 2c 00 00 00 00 00 00 00  h...h...,......
	.  0020:  2c 00 00 00 2b 00 01 02 68 01 00 00 68 01 00 00  ,...+...h...h..
	.  0030:  6b 6f 6e 64 65 6d 61 6e 64 2f 30 00 00 00 00 00  kondemand/0....
	.  0040:  68 01 00 00 40 7f 46 81 ff ff ff ff 00 10 1b 7f  h...@.F........
                                                      ^  ^  ^  ^
                                                         Leak

After:

	0x2d318 [0x50]: event: 9
	.
	. ... raw event: size 80 bytes
	.  0000:  09 00 00 00 01 00 50 00 d0 c7 00 81 ff ff ff ff  ......P........
	.  0010:  68 01 00 00 68 01 00 00 68 14 00 00 00 00 00 00  h...h...h......
	.  0020:  2c 00 00 00 2b 00 01 02 68 01 00 00 68 01 00 00  ,...+...h...h..
	.  0030:  6b 6f 6e 64 65 6d 61 6e 64 2f 30 00 00 00 00 00  kondemand/0....
	.  0040:  68 01 00 00 a0 80 46 81 ff ff ff ff 00 00 00 00  h.....F........
                                                      ^  ^  ^  ^
							 Fixed

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <1249915116-5210-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
---
 include/trace/ftrace.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a05524fa245d..f64fbaae781a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -648,6 +648,9 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *		char raw_data[__entry_size]; <- allocate our sample in the stack
  *		struct trace_entry *ent;
  *
+ *		zero dead bytes from alignment to avoid stack leak to userspace:
+ *
+ *		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
  *		entry = (struct ftrace_raw_<call> *)raw_data;
  *		ent = &entry->ent;
  *		tracing_generic_entry_update(ent, irq_flags, pc);
@@ -698,6 +701,7 @@ static void ftrace_profile_##call(proto)				\
 		char raw_data[__entry_size];				\
 		struct trace_entry *ent;				\
 									\
+		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;	\
 		entry = (struct ftrace_raw_##call *)raw_data;		\
 		ent = &entry->ent;					\
 		tracing_generic_entry_update(ent, irq_flags, pc);	\
-- 
cgit v1.2.3


From 066e0378c23f0a3db730893f6a041e4a3922a385 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:23 -0400
Subject: tracing: Call arch_init_ftrace_syscalls at boot

Call arch_init_ftrace_syscalls at boot, so we can determine early the
set of syscalls for the syscall trace events.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c      | 15 ++++-----------
 include/trace/syscall.h       |  1 -
 kernel/trace/trace_syscalls.c |  1 -
 3 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index afb31d72618d..0d93d409b8d2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -516,31 +516,24 @@ int syscall_name_to_nr(char *name)
 	return -1;
 }
 
-void arch_init_ftrace_syscalls(void)
+static int __init arch_init_ftrace_syscalls(void)
 {
 	int i;
 	struct syscall_metadata *meta;
 	unsigned long **psys_syscall_table = &sys_call_table;
-	static atomic_t refs;
-
-	if (atomic_inc_return(&refs) != 1)
-		goto end;
 
 	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
 					FTRACE_SYSCALL_MAX, GFP_KERNEL);
 	if (!syscalls_metadata) {
 		WARN_ON(1);
-		return;
+		return -ENOMEM;
 	}
 
 	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
 		meta = find_syscall_meta(psys_syscall_table[i]);
 		syscalls_metadata[i] = meta;
 	}
-	return;
-
-	/* Paranoid: avoid overflow */
-end:
-	atomic_dec(&refs);
+	return 0;
 }
+arch_initcall(arch_init_ftrace_syscalls);
 #endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 8cfe515cbc47..c55fcce4fbb2 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -19,7 +19,6 @@ struct syscall_metadata {
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-extern void arch_init_ftrace_syscalls(void);
 extern struct syscall_metadata *syscall_nr_to_meta(int nr);
 extern void start_ftrace_syscalls(void);
 extern void stop_ftrace_syscalls(void);
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 5e579645ac86..08aed439feaf 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -106,7 +106,6 @@ void start_ftrace_syscalls(void)
 	if (++refcount != 1)
 		goto unlock;
 
-	arch_init_ftrace_syscalls();
 	read_lock_irqsave(&tasklist_lock, flags);
 
 	do_each_thread(g, t) {
-- 
cgit v1.2.3


From 63fbdab3157b72467013fe4dcf88c85e45280ef7 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:27 -0400
Subject: tracing: Add DECLARE_TRACE_WITH_CALLBACK() macro

Introduce a new 'DECLARE_TRACE_WITH_CALLBACK()' macro, so that
tracepoints can associate an external register/unregister function.

This prepares for the syscalls tracer conversion to trace events. We
will need to perform arch level operations once a syscall event is
turned on/off, such as TIF flags setting, hence the need of such
specific callbacks.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tracepoint.h | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index b9dc4ca0246f..5984ed04c03b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -60,8 +60,10 @@ struct tracepoint {
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
+ * An optional set of (un)registration functions can be passed to perform any
+ * additional (un)registration work.
  */
-#define DECLARE_TRACE(name, proto, args)				\
+#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg)	\
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
@@ -71,13 +73,30 @@ struct tracepoint {
 	}								\
 	static inline int register_trace_##name(void (*probe)(proto))	\
 	{								\
-		return tracepoint_probe_register(#name, (void *)probe);	\
+		int ret;						\
+		void (*func)(void) = reg;				\
+									\
+		ret = tracepoint_probe_register(#name, (void *)probe);	\
+		if (func && !ret)					\
+			func();						\
+		return ret;						\
 	}								\
 	static inline int unregister_trace_##name(void (*probe)(proto))	\
 	{								\
-		return tracepoint_probe_unregister(#name, (void *)probe);\
+		int ret;						\
+		void (*func)(void) = unreg;				\
+									\
+		ret = tracepoint_probe_unregister(#name, (void *)probe);\
+		if (func && !ret)					\
+			func();						\
+		return ret;						\
 	}
 
+
+#define DECLARE_TRACE(name, proto, args)				 \
+	DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\
+					NULL, NULL);
+
 #define DEFINE_TRACE(name)						\
 	static const char __tpstrtab_##name[]				\
 	__attribute__((section("__tracepoints_strings"))) = #name;	\
@@ -94,7 +113,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end);
 
 #else /* !CONFIG_TRACEPOINTS */
-#define DECLARE_TRACE(name, proto, args)				\
+#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg)	\
 	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
 	{ }								\
 	static inline void trace_##name(proto)				\
@@ -108,6 +127,10 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 		return -ENOSYS;						\
 	}
 
+#define DECLARE_TRACE(name, proto, args)				 \
+	DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\
+					NULL, NULL);
+
 #define DEFINE_TRACE(name)
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)
-- 
cgit v1.2.3


From a871bd33a6c0bc86fb47cd02ea2650dd43d3d95f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:31 -0400
Subject: tracing: Add syscall tracepoints

add two tracepoints in syscall exit and entry path, conditioned on
TIF_SYSCALL_FTRACE. Supports the syscall trace event code.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ptrace.c |  7 +++++--
 include/trace/syscall.h  | 20 ++++++++++++++++++++
 kernel/tracepoint.c      | 38 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 09ecbde91c13..34dd6f15185d 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -37,6 +37,9 @@
 
 #include <trace/syscall.h>
 
+DEFINE_TRACE(syscall_enter);
+DEFINE_TRACE(syscall_exit);
+
 #include "tls.h"
 
 enum x86_regset {
@@ -1498,7 +1501,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_enter(regs);
+		trace_syscall_enter(regs, regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1524,7 +1527,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
-		ftrace_syscall_exit(regs);
+		trace_syscall_exit(regs, regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index c55fcce4fbb2..3951d774de18 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -1,8 +1,28 @@
 #ifndef _TRACE_SYSCALL_H
 #define _TRACE_SYSCALL_H
 
+#include <linux/tracepoint.h>
+
 #include <asm/ptrace.h>
 
+
+extern void syscall_regfunc(void);
+extern void syscall_unregfunc(void);
+
+DECLARE_TRACE_WITH_CALLBACK(syscall_enter,
+	TP_PROTO(struct pt_regs *regs, long id),
+	TP_ARGS(regs, id),
+	syscall_regfunc,
+	syscall_unregfunc
+);
+
+DECLARE_TRACE_WITH_CALLBACK(syscall_exit,
+	TP_PROTO(struct pt_regs *regs, long ret),
+	TP_ARGS(regs, ret),
+	syscall_regfunc,
+	syscall_unregfunc
+);
+
 /*
  * A syscall entry in the ftrace syscalls array.
  *
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 1ef5d3a601c7..070a42bb8920 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -24,6 +24,7 @@
 #include <linux/tracepoint.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 extern struct tracepoint __start___tracepoints[];
 extern struct tracepoint __stop___tracepoints[];
@@ -577,3 +578,40 @@ static int init_tracepoints(void)
 __initcall(init_tracepoints);
 
 #endif /* CONFIG_MODULES */
+
+static DEFINE_MUTEX(regfunc_mutex);
+static int sys_tracepoint_refcount;
+
+void syscall_regfunc(void)
+{
+	unsigned long flags;
+	struct task_struct *g, *t;
+
+	mutex_lock(&regfunc_mutex);
+	if (!sys_tracepoint_refcount) {
+		read_lock_irqsave(&tasklist_lock, flags);
+		do_each_thread(g, t) {
+			set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
+		} while_each_thread(g, t);
+		read_unlock_irqrestore(&tasklist_lock, flags);
+	}
+	sys_tracepoint_refcount++;
+	mutex_unlock(&regfunc_mutex);
+}
+
+void syscall_unregfunc(void)
+{
+	unsigned long flags;
+	struct task_struct *g, *t;
+
+	mutex_lock(&regfunc_mutex);
+	sys_tracepoint_refcount--;
+	if (!sys_tracepoint_refcount) {
+		read_lock_irqsave(&tasklist_lock, flags);
+		do_each_thread(g, t) {
+			clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
+		} while_each_thread(g, t);
+		read_unlock_irqrestore(&tasklist_lock, flags);
+	}
+	mutex_unlock(&regfunc_mutex);
+}
-- 
cgit v1.2.3


From 69fd4f0eb2ececbf8ade55e31a933e174965745e Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:44 -0400
Subject: tracing: Add ftrace_event_call void * 'data' field

add an optional void * pointer to 'ftrace_event_call' that is
passed in for regfunc and unregfunc.

This prepares for syscall tracepoints creation by passing the name of
the syscall we want to trace and then retrieve its number through our
arch syscall table.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/ftrace_event.h | 5 +++--
 include/trace/ftrace.h       | 4 ++--
 kernel/trace/trace_events.c  | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ac8c6f8cf242..8544f121d9f1 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -112,8 +112,8 @@ struct ftrace_event_call {
 	struct dentry		*dir;
 	struct trace_event	*event;
 	int			enabled;
-	int			(*regfunc)(void);
-	void			(*unregfunc)(void);
+	int			(*regfunc)(void *);
+	void			(*unregfunc)(void *);
 	int			id;
 	int			(*raw_init)(void);
 	int			(*show_format)(struct trace_seq *s);
@@ -122,6 +122,7 @@ struct ftrace_event_call {
 	int			filter_active;
 	struct event_filter	*filter;
 	void			*mod;
+	void			*data;
 
 	atomic_t		profile_count;
 	int			(*profile_enable)(struct ftrace_event_call *);
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 25d3b02a06f8..46d81b5e8610 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -568,7 +568,7 @@ static void ftrace_raw_event_##call(proto)				\
 		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
 }									\
 									\
-static int ftrace_raw_reg_event_##call(void)				\
+static int ftrace_raw_reg_event_##call(void *ptr)			\
 {									\
 	int ret;							\
 									\
@@ -579,7 +579,7 @@ static int ftrace_raw_reg_event_##call(void)				\
 	return ret;							\
 }									\
 									\
-static void ftrace_raw_unreg_event_##call(void)				\
+static void ftrace_raw_unreg_event_##call(void *ptr)			\
 {									\
 	unregister_trace_##call(ftrace_raw_event_##call);		\
 }									\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f95f8470dd38..1d289e2d6693 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -86,14 +86,14 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call,
 		if (call->enabled) {
 			call->enabled = 0;
 			tracing_stop_cmdline_record();
-			call->unregfunc();
+			call->unregfunc(call->data);
 		}
 		break;
 	case 1:
 		if (!call->enabled) {
 			call->enabled = 1;
 			tracing_start_cmdline_record();
-			call->regfunc();
+			call->regfunc(call->data);
 		}
 		break;
 	}
-- 
cgit v1.2.3


From fb34a08c3469b2be9eae626ccb96476b4687b810 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:47 -0400
Subject: tracing: Add trace events for each syscall entry/exit

Layer Frederic's syscall tracer on tracepoints. We create trace events
via hooking into the SYSCALL_DEFINE macros. This allows us to
individually toggle syscall entry and exit points on/off.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/syscalls.h      |  61 +++++++++++++-
 include/trace/syscall.h       |  18 ++---
 kernel/trace/trace_syscalls.c | 183 +++++++++++++++++++++---------------------
 3 files changed, 159 insertions(+), 103 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 80de7003d8c2..5e5b4d33a31c 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -64,6 +64,7 @@ struct perf_counter_attr;
 #include <linux/sem.h>
 #include <asm/siginfo.h>
 #include <asm/signal.h>
+#include <linux/unistd.h>
 #include <linux/quota.h>
 #include <linux/key.h>
 #include <trace/syscall.h>
@@ -112,6 +113,59 @@ struct perf_counter_attr;
 #define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
 #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
 
+
+#define SYSCALL_TRACE_ENTER_EVENT(sname)				\
+	static struct ftrace_event_call event_enter_##sname;		\
+	static int init_enter_##sname(void)				\
+	{								\
+		int num;						\
+		num = syscall_name_to_nr("sys"#sname);			\
+		if (num < 0)						\
+			return -ENOSYS;					\
+		register_ftrace_event(&event_syscall_enter);		\
+		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
+		init_preds(&event_enter_##sname);			\
+		return 0;						\
+	}								\
+	static struct ftrace_event_call __used				\
+	  __attribute__((__aligned__(4)))				\
+	  __attribute__((section("_ftrace_events")))			\
+	  event_enter_##sname = {					\
+		.name                   = "sys_enter"#sname,		\
+		.system                 = "syscalls",			\
+		.event                  = &event_syscall_enter,		\
+		.raw_init		= init_enter_##sname,		\
+		.regfunc		= reg_event_syscall_enter,	\
+		.unregfunc		= unreg_event_syscall_enter,	\
+		.data			= "sys"#sname,			\
+	}
+
+#define SYSCALL_TRACE_EXIT_EVENT(sname)					\
+	static struct ftrace_event_call event_exit_##sname;		\
+	static int init_exit_##sname(void)				\
+	{								\
+		int num;						\
+		num = syscall_name_to_nr("sys"#sname);			\
+		if (num < 0)						\
+			return -ENOSYS;					\
+		register_ftrace_event(&event_syscall_exit);		\
+		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
+		init_preds(&event_exit_##sname);			\
+		return 0;						\
+	}								\
+	static struct ftrace_event_call __used				\
+	  __attribute__((__aligned__(4)))				\
+	  __attribute__((section("_ftrace_events")))			\
+	  event_exit_##sname = {					\
+		.name                   = "sys_exit"#sname,		\
+		.system                 = "syscalls",			\
+		.event                  = &event_syscall_exit,		\
+		.raw_init		= init_exit_##sname,		\
+		.regfunc		= reg_event_syscall_exit,	\
+		.unregfunc		= unreg_event_syscall_exit,	\
+		.data			= "sys"#sname,			\
+	}
+
 #define SYSCALL_METADATA(sname, nb)				\
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
@@ -121,7 +175,9 @@ struct perf_counter_attr;
 		.nb_args 	= nb,				\
 		.types		= types_##sname,		\
 		.args		= args_##sname,			\
-	}
+	};							\
+	SYSCALL_TRACE_ENTER_EVENT(sname);			\
+	SYSCALL_TRACE_EXIT_EVENT(sname);
 
 #define SYSCALL_DEFINE0(sname)					\
 	static const struct syscall_metadata __used		\
@@ -131,8 +187,9 @@ struct perf_counter_attr;
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 	};							\
+	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
+	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
 	asmlinkage long sys_##sname(void)
-
 #else
 #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
 #endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 3951d774de18..73fb8b4a9955 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -2,6 +2,8 @@
 #define _TRACE_SYSCALL_H
 
 #include <linux/tracepoint.h>
+#include <linux/unistd.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/ptrace.h>
 
@@ -40,15 +42,13 @@ struct syscall_metadata {
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern struct syscall_metadata *syscall_nr_to_meta(int nr);
-extern void start_ftrace_syscalls(void);
-extern void stop_ftrace_syscalls(void);
-extern void ftrace_syscall_enter(struct pt_regs *regs);
-extern void ftrace_syscall_exit(struct pt_regs *regs);
-#else
-static inline void start_ftrace_syscalls(void)			{ }
-static inline void stop_ftrace_syscalls(void)			{ }
-static inline void ftrace_syscall_enter(struct pt_regs *regs)	{ }
-static inline void ftrace_syscall_exit(struct pt_regs *regs)	{ }
+extern int syscall_name_to_nr(char *name);
+extern struct trace_event event_syscall_enter;
+extern struct trace_event event_syscall_exit;
+extern int reg_event_syscall_enter(void *ptr);
+extern void unreg_event_syscall_enter(void *ptr);
+extern int reg_event_syscall_exit(void *ptr);
+extern void unreg_event_syscall_exit(void *ptr);
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 08aed439feaf..c7ae25ee95d8 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,15 +1,16 @@
 #include <trace/syscall.h>
 #include <linux/kernel.h>
+#include <linux/ftrace.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
 #include "trace.h"
 
-/* Keep a counter of the syscall tracing users */
-static int refcount;
-
-/* Prevent from races on thread flags toggling */
 static DEFINE_MUTEX(syscall_trace_lock);
+static int sys_refcount_enter;
+static int sys_refcount_exit;
+static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
 
 /* Option to display the parameters types */
 enum {
@@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
 	return TRACE_TYPE_HANDLED;
 }
 
-void start_ftrace_syscalls(void)
-{
-	unsigned long flags;
-	struct task_struct *g, *t;
-
-	mutex_lock(&syscall_trace_lock);
-
-	/* Don't enable the flag on the tasks twice */
-	if (++refcount != 1)
-		goto unlock;
-
-	read_lock_irqsave(&tasklist_lock, flags);
-
-	do_each_thread(g, t) {
-		set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
-	} while_each_thread(g, t);
-
-	read_unlock_irqrestore(&tasklist_lock, flags);
-
-unlock:
-	mutex_unlock(&syscall_trace_lock);
-}
-
-void stop_ftrace_syscalls(void)
-{
-	unsigned long flags;
-	struct task_struct *g, *t;
-
-	mutex_lock(&syscall_trace_lock);
-
-	/* There are perhaps still some users */
-	if (--refcount)
-		goto unlock;
-
-	read_lock_irqsave(&tasklist_lock, flags);
-
-	do_each_thread(g, t) {
-		clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
-	} while_each_thread(g, t);
-
-	read_unlock_irqrestore(&tasklist_lock, flags);
-
-unlock:
-	mutex_unlock(&syscall_trace_lock);
-}
-
-void ftrace_syscall_enter(struct pt_regs *regs)
+void ftrace_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_trace_enter *entry;
 	struct syscall_metadata *sys_data;
@@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
 	int syscall_nr;
 
 	syscall_nr = syscall_get_nr(current, regs);
+	if (!test_bit(syscall_nr, enabled_enter_syscalls))
+		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
 	if (!sys_data)
@@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
 	trace_wake_up();
 }
 
-void ftrace_syscall_exit(struct pt_regs *regs)
+void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_trace_exit *entry;
 	struct syscall_metadata *sys_data;
@@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
 	int syscall_nr;
 
 	syscall_nr = syscall_get_nr(current, regs);
+	if (!test_bit(syscall_nr, enabled_exit_syscalls))
+		return;
 
 	sys_data = syscall_nr_to_meta(syscall_nr);
 	if (!sys_data)
@@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
 	trace_wake_up();
 }
 
-static int init_syscall_tracer(struct trace_array *tr)
+int reg_event_syscall_enter(void *ptr)
 {
-	start_ftrace_syscalls();
-
-	return 0;
+	int ret = 0;
+	int num;
+	char *name;
+
+	name = (char *)ptr;
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return -ENOSYS;
+	mutex_lock(&syscall_trace_lock);
+	if (!sys_refcount_enter)
+		ret = register_trace_syscall_enter(ftrace_syscall_enter);
+	if (ret) {
+		pr_info("event trace: Could not activate"
+				"syscall entry trace point");
+	} else {
+		set_bit(num, enabled_enter_syscalls);
+		sys_refcount_enter++;
+	}
+	mutex_unlock(&syscall_trace_lock);
+	return ret;
 }
 
-static void reset_syscall_tracer(struct trace_array *tr)
+void unreg_event_syscall_enter(void *ptr)
 {
-	stop_ftrace_syscalls();
-	tracing_reset_online_cpus(tr);
-}
-
-static struct trace_event syscall_enter_event = {
-	.type	 	= TRACE_SYSCALL_ENTER,
-	.trace		= print_syscall_enter,
-};
-
-static struct trace_event syscall_exit_event = {
-	.type	 	= TRACE_SYSCALL_EXIT,
-	.trace		= print_syscall_exit,
-};
+	int num;
+	char *name;
 
-static struct tracer syscall_tracer __read_mostly = {
-	.name	     	= "syscall",
-	.init		= init_syscall_tracer,
-	.reset		= reset_syscall_tracer,
-	.flags		= &syscalls_flags,
-};
+	name = (char *)ptr;
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return;
+	mutex_lock(&syscall_trace_lock);
+	sys_refcount_enter--;
+	clear_bit(num, enabled_enter_syscalls);
+	if (!sys_refcount_enter)
+		unregister_trace_syscall_enter(ftrace_syscall_enter);
+	mutex_unlock(&syscall_trace_lock);
+}
 
-__init int register_ftrace_syscalls(void)
+int reg_event_syscall_exit(void *ptr)
 {
-	int ret;
-
-	ret = register_ftrace_event(&syscall_enter_event);
-	if (!ret) {
-		printk(KERN_WARNING "event %d failed to register\n",
-		       syscall_enter_event.type);
-		WARN_ON_ONCE(1);
+	int ret = 0;
+	int num;
+	char *name;
+
+	name = (char *)ptr;
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return -ENOSYS;
+	mutex_lock(&syscall_trace_lock);
+	if (!sys_refcount_exit)
+		ret = register_trace_syscall_exit(ftrace_syscall_exit);
+	if (ret) {
+		pr_info("event trace: Could not activate"
+				"syscall exit trace point");
+	} else {
+		set_bit(num, enabled_exit_syscalls);
+		sys_refcount_exit++;
 	}
+	mutex_unlock(&syscall_trace_lock);
+	return ret;
+}
 
-	ret = register_ftrace_event(&syscall_exit_event);
-	if (!ret) {
-		printk(KERN_WARNING "event %d failed to register\n",
-		       syscall_exit_event.type);
-		WARN_ON_ONCE(1);
-	}
+void unreg_event_syscall_exit(void *ptr)
+{
+	int num;
+	char *name;
 
-	return register_tracer(&syscall_tracer);
+	name = (char *)ptr;
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return;
+	mutex_lock(&syscall_trace_lock);
+	sys_refcount_exit--;
+	clear_bit(num, enabled_exit_syscalls);
+	if (!sys_refcount_exit)
+		unregister_trace_syscall_exit(ftrace_syscall_exit);
+	mutex_unlock(&syscall_trace_lock);
 }
-device_initcall(register_ftrace_syscalls);
+
+struct trace_event event_syscall_enter = {
+	.trace			= print_syscall_enter,
+	.type			= TRACE_SYSCALL_ENTER
+};
+
+struct trace_event event_syscall_exit = {
+	.trace			= print_syscall_exit,
+	.type			= TRACE_SYSCALL_EXIT
+};
-- 
cgit v1.2.3


From 64c12e0444fcc6b75eb49144ba46d43dbdc6bc8f Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:52:53 -0400
Subject: tracing: Add individual syscalls tracepoint id support

The current state of syscalls tracepoints generates only one event id
for every syscall events.

This patch associates an id with each syscall trace event, so that we
can identify each syscall trace event using the 'perf' tool.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/kernel/ftrace.c      | 10 ++++++++++
 include/linux/syscalls.h      | 22 ++++++++++++++++++----
 include/trace/syscall.h       |  8 ++++++++
 kernel/trace/trace.h          |  6 ------
 kernel/trace/trace_syscalls.c | 26 ++++++++++++++++----------
 5 files changed, 52 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0d93d409b8d2..3cff1214e176 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -516,6 +516,16 @@ int syscall_name_to_nr(char *name)
 	return -1;
 }
 
+void set_syscall_enter_id(int num, int id)
+{
+	syscalls_metadata[num]->enter_id = id;
+}
+
+void set_syscall_exit_id(int num, int id)
+{
+	syscalls_metadata[num]->exit_id = id;
+}
+
 static int __init arch_init_ftrace_syscalls(void)
 {
 	int i;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5e5b4d33a31c..ce4b01c658eb 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -116,13 +116,20 @@ struct perf_counter_attr;
 
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static struct ftrace_event_call event_enter_##sname;		\
+	struct trace_event enter_syscall_print_##sname = {		\
+		.trace                  = print_syscall_enter,		\
+	};								\
 	static int init_enter_##sname(void)				\
 	{								\
-		int num;						\
+		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
 		if (num < 0)						\
 			return -ENOSYS;					\
-		register_ftrace_event(&event_syscall_enter);		\
+		id = register_ftrace_event(&enter_syscall_print_##sname);\
+		if (!id)						\
+			return -ENODEV;					\
+		event_enter_##sname.id = id;				\
+		set_syscall_enter_id(num, id);				\
 		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
 		init_preds(&event_enter_##sname);			\
 		return 0;						\
@@ -142,13 +149,20 @@ struct perf_counter_attr;
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
 	static struct ftrace_event_call event_exit_##sname;		\
+	struct trace_event exit_syscall_print_##sname = {		\
+		.trace                  = print_syscall_exit,		\
+	};								\
 	static int init_exit_##sname(void)				\
 	{								\
-		int num;						\
+		int num, id;						\
 		num = syscall_name_to_nr("sys"#sname);			\
 		if (num < 0)						\
 			return -ENOSYS;					\
-		register_ftrace_event(&event_syscall_exit);		\
+		id = register_ftrace_event(&exit_syscall_print_##sname);\
+		if (!id)						\
+			return -ENODEV;					\
+		event_exit_##sname.id = id;				\
+		set_syscall_exit_id(num, id);				\
 		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
 		init_preds(&event_exit_##sname);			\
 		return 0;						\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 73fb8b4a9955..df628404241a 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -32,23 +32,31 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit,
  * @nb_args: number of parameters it takes
  * @types: list of types as strings
  * @args: list of args as strings (args[i] matches types[i])
+ * @enter_id: associated ftrace enter event id
+ * @exit_id: associated ftrace exit event id
  */
 struct syscall_metadata {
 	const char	*name;
 	int		nb_args;
 	const char	**types;
 	const char	**args;
+	int		enter_id;
+	int		exit_id;
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 extern struct syscall_metadata *syscall_nr_to_meta(int nr);
 extern int syscall_name_to_nr(char *name);
+void set_syscall_enter_id(int num, int id);
+void set_syscall_exit_id(int num, int id);
 extern struct trace_event event_syscall_enter;
 extern struct trace_event event_syscall_exit;
 extern int reg_event_syscall_enter(void *ptr);
 extern void unreg_event_syscall_enter(void *ptr);
 extern int reg_event_syscall_exit(void *ptr);
 extern void unreg_event_syscall_exit(void *ptr);
+enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
+enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d682357e4b1f..300ef788c976 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,8 +34,6 @@ enum trace_type {
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
 	TRACE_HW_BRANCHES,
-	TRACE_SYSCALL_ENTER,
-	TRACE_SYSCALL_EXIT,
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_POWER,
@@ -319,10 +317,6 @@ extern void __ftrace_bad_type(void);
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
 			  TRACE_KMEM_FREE);	\
-		IF_ASSIGN(var, ent, struct syscall_trace_enter,		\
-			  TRACE_SYSCALL_ENTER);				\
-		IF_ASSIGN(var, ent, struct syscall_trace_exit,		\
-			  TRACE_SYSCALL_EXIT);				\
 		__ftrace_bad_type();					\
 	} while (0)
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index c7ae25ee95d8..e58a9c11ba85 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -36,14 +36,18 @@ print_syscall_enter(struct trace_iterator *iter, int flags)
 	struct syscall_metadata *entry;
 	int i, ret, syscall;
 
-	trace_assign_type(trace, ent);
-
+	trace = (typeof(trace))ent;
 	syscall = trace->nr;
-
 	entry = syscall_nr_to_meta(syscall);
+
 	if (!entry)
 		goto end;
 
+	if (entry->enter_id != ent->type) {
+		WARN_ON_ONCE(1);
+		goto end;
+	}
+
 	ret = trace_seq_printf(s, "%s(", entry->name);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -78,16 +82,20 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
 	struct syscall_metadata *entry;
 	int ret;
 
-	trace_assign_type(trace, ent);
-
+	trace = (typeof(trace))ent;
 	syscall = trace->nr;
-
 	entry = syscall_nr_to_meta(syscall);
+
 	if (!entry) {
 		trace_seq_printf(s, "\n");
 		return TRACE_TYPE_HANDLED;
 	}
 
+	if (entry->exit_id != ent->type) {
+		WARN_ON_ONCE(1);
+		return TRACE_TYPE_UNHANDLED;
+	}
+
 	ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
 				trace->ret);
 	if (!ret)
@@ -114,7 +122,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-	event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
+	event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
 							0, 0);
 	if (!event)
 		return;
@@ -142,7 +150,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
+	event = trace_current_buffer_lock_reserve(sys_data->exit_id,
 				sizeof(*entry), 0, 0);
 	if (!event)
 		return;
@@ -239,10 +247,8 @@ void unreg_event_syscall_exit(void *ptr)
 
 struct trace_event event_syscall_enter = {
 	.trace			= print_syscall_enter,
-	.type			= TRACE_SYSCALL_ENTER
 };
 
 struct trace_event event_syscall_exit = {
 	.trace			= print_syscall_exit,
-	.type			= TRACE_SYSCALL_EXIT
 };
-- 
cgit v1.2.3


From f4b5ffccc83c82947f5d9f15d6f1b6edb1b71cd7 Mon Sep 17 00:00:00 2001
From: Jason Baron <jbaron@redhat.com>
Date: Mon, 10 Aug 2009 16:53:02 -0400
Subject: tracing: Add perf counter support for syscalls tracing

The perf counter support is automated for usual trace events. But we
have to define specific callbacks for this to handle syscalls trace
events

Make 'perf stat -e syscalls:sys_enter_blah' work with syscall style
tracepoints.

Signed-off-by: Jason Baron <jbaron@redhat.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/perf_counter.h  |   2 +
 include/linux/syscalls.h      |  52 +++++++++++++++++-
 include/trace/syscall.h       |   7 +++
 kernel/trace/trace_syscalls.c | 121 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 181 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a9d823a93fe8..8e6460fb4c02 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -734,6 +734,8 @@ extern int sysctl_perf_counter_mlock;
 extern int sysctl_perf_counter_sample_rate;
 
 extern void perf_counter_init(void);
+extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
+				 void *record, int entry_size);
 
 #ifndef perf_misc_flags
 #define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index ce4b01c658eb..5541e75e140a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -98,6 +98,53 @@ struct perf_counter_attr;
 #define __SC_TEST5(t5, a5, ...)	__SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
 #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
 
+#ifdef CONFIG_EVENT_PROFILE
+#define TRACE_SYS_ENTER_PROFILE(sname)					       \
+static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call)  \
+{									       \
+	int ret = 0;							       \
+	if (!atomic_inc_return(&event_enter_##sname.profile_count))	       \
+		ret = reg_prof_syscall_enter("sys"#sname);		       \
+	return ret;							       \
+}									       \
+									       \
+static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
+{									       \
+	if (atomic_add_negative(-1, &event_enter_##sname.profile_count))       \
+		unreg_prof_syscall_enter("sys"#sname);			       \
+}
+
+#define TRACE_SYS_EXIT_PROFILE(sname)					       \
+static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call)   \
+{									       \
+	int ret = 0;							       \
+	if (!atomic_inc_return(&event_exit_##sname.profile_count))	       \
+		ret = reg_prof_syscall_exit("sys"#sname);		       \
+	return ret;							       \
+}									       \
+									       \
+static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
+{                                                                              \
+	if (atomic_add_negative(-1, &event_exit_##sname.profile_count))	       \
+		unreg_prof_syscall_exit("sys"#sname);			       \
+}
+
+#define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
+	.profile_count = ATOMIC_INIT(-1),				       \
+	.profile_enable = prof_sysenter_enable_##sname,			       \
+	.profile_disable = prof_sysenter_disable_##sname,
+
+#define TRACE_SYS_EXIT_PROFILE_INIT(sname)				       \
+	.profile_count = ATOMIC_INIT(-1),				       \
+	.profile_enable = prof_sysexit_enable_##sname,			       \
+	.profile_disable = prof_sysexit_disable_##sname,
+#else
+#define TRACE_SYS_ENTER_PROFILE(sname)
+#define TRACE_SYS_ENTER_PROFILE_INIT(sname)
+#define TRACE_SYS_EXIT_PROFILE(sname)
+#define TRACE_SYS_EXIT_PROFILE_INIT(sname)
+#endif
+
 #ifdef CONFIG_FTRACE_SYSCALLS
 #define __SC_STR_ADECL1(t, a)		#a
 #define __SC_STR_ADECL2(t, a, ...)	#a, __SC_STR_ADECL1(__VA_ARGS__)
@@ -113,7 +160,6 @@ struct perf_counter_attr;
 #define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
 #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
 
-
 #define SYSCALL_TRACE_ENTER_EVENT(sname)				\
 	static struct ftrace_event_call event_enter_##sname;		\
 	struct trace_event enter_syscall_print_##sname = {		\
@@ -134,6 +180,7 @@ struct perf_counter_attr;
 		init_preds(&event_enter_##sname);			\
 		return 0;						\
 	}								\
+	TRACE_SYS_ENTER_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -145,6 +192,7 @@ struct perf_counter_attr;
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= "sys"#sname,			\
+		TRACE_SYS_ENTER_PROFILE_INIT(sname)			\
 	}
 
 #define SYSCALL_TRACE_EXIT_EVENT(sname)					\
@@ -167,6 +215,7 @@ struct perf_counter_attr;
 		init_preds(&event_exit_##sname);			\
 		return 0;						\
 	}								\
+	TRACE_SYS_EXIT_PROFILE(sname);					\
 	static struct ftrace_event_call __used				\
 	  __attribute__((__aligned__(4)))				\
 	  __attribute__((section("_ftrace_events")))			\
@@ -178,6 +227,7 @@ struct perf_counter_attr;
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
 		.data			= "sys"#sname,			\
+		TRACE_SYS_EXIT_PROFILE_INIT(sname)			\
 	}
 
 #define SYSCALL_METADATA(sname, nb)				\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index df628404241a..3ab6dd18fa3a 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -57,6 +57,13 @@ extern int reg_event_syscall_exit(void *ptr);
 extern void unreg_event_syscall_exit(void *ptr);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
+#endif
+#ifdef CONFIG_EVENT_PROFILE
+int reg_prof_syscall_enter(char *name);
+void unreg_prof_syscall_enter(char *name);
+int reg_prof_syscall_exit(char *name);
+void unreg_prof_syscall_exit(char *name);
+
 #endif
 
 #endif /* _TRACE_SYSCALL_H */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index e58a9c11ba85..f4eaec3d559a 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,6 +1,7 @@
 #include <trace/syscall.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
+#include <linux/perf_counter.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -252,3 +253,123 @@ struct trace_event event_syscall_enter = {
 struct trace_event event_syscall_exit = {
 	.trace			= print_syscall_exit,
 };
+
+#ifdef CONFIG_EVENT_PROFILE
+static DECLARE_BITMAP(enabled_prof_enter_syscalls, FTRACE_SYSCALL_MAX);
+static DECLARE_BITMAP(enabled_prof_exit_syscalls, FTRACE_SYSCALL_MAX);
+static int sys_prof_refcount_enter;
+static int sys_prof_refcount_exit;
+
+static void prof_syscall_enter(struct pt_regs *regs, long id)
+{
+	struct syscall_metadata *sys_data;
+	int syscall_nr;
+
+	syscall_nr = syscall_get_nr(current, regs);
+	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
+		return;
+
+	sys_data = syscall_nr_to_meta(syscall_nr);
+	if (!sys_data)
+		return;
+
+	perf_tpcounter_event(sys_data->enter_id, 0, 1, NULL, 0);
+}
+
+int reg_prof_syscall_enter(char *name)
+{
+	int ret = 0;
+	int num;
+
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return -ENOSYS;
+
+	mutex_lock(&syscall_trace_lock);
+	if (!sys_prof_refcount_enter)
+		ret = register_trace_syscall_enter(prof_syscall_enter);
+	if (ret) {
+		pr_info("event trace: Could not activate"
+				"syscall entry trace point");
+	} else {
+		set_bit(num, enabled_prof_enter_syscalls);
+		sys_prof_refcount_enter++;
+	}
+	mutex_unlock(&syscall_trace_lock);
+	return ret;
+}
+
+void unreg_prof_syscall_enter(char *name)
+{
+	int num;
+
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return;
+
+	mutex_lock(&syscall_trace_lock);
+	sys_prof_refcount_enter--;
+	clear_bit(num, enabled_prof_enter_syscalls);
+	if (!sys_prof_refcount_enter)
+		unregister_trace_syscall_enter(prof_syscall_enter);
+	mutex_unlock(&syscall_trace_lock);
+}
+
+static void prof_syscall_exit(struct pt_regs *regs, long ret)
+{
+	struct syscall_metadata *sys_data;
+	int syscall_nr;
+
+	syscall_nr = syscall_get_nr(current, regs);
+	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
+		return;
+
+	sys_data = syscall_nr_to_meta(syscall_nr);
+	if (!sys_data)
+		return;
+
+	perf_tpcounter_event(sys_data->exit_id, 0, 1, NULL, 0);
+}
+
+int reg_prof_syscall_exit(char *name)
+{
+	int ret = 0;
+	int num;
+
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return -ENOSYS;
+
+	mutex_lock(&syscall_trace_lock);
+	if (!sys_prof_refcount_exit)
+		ret = register_trace_syscall_exit(prof_syscall_exit);
+	if (ret) {
+		pr_info("event trace: Could not activate"
+				"syscall entry trace point");
+	} else {
+		set_bit(num, enabled_prof_exit_syscalls);
+		sys_prof_refcount_exit++;
+	}
+	mutex_unlock(&syscall_trace_lock);
+	return ret;
+}
+
+void unreg_prof_syscall_exit(char *name)
+{
+	int num;
+
+	num = syscall_name_to_nr(name);
+	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
+		return;
+
+	mutex_lock(&syscall_trace_lock);
+	sys_prof_refcount_exit--;
+	clear_bit(num, enabled_prof_exit_syscalls);
+	if (!sys_prof_refcount_exit)
+		unregister_trace_syscall_exit(prof_syscall_exit);
+	mutex_unlock(&syscall_trace_lock);
+}
+
+#endif
+
+
-- 
cgit v1.2.3


From e8f9f4d79a677f55c8ec3acbe87b33a87e2df0de Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Aug 2009 17:42:52 +0200
Subject: tracing: Add ftrace event call parameter to its field descriptor
 handler

Add the struct ftrace_event_call as a parameter of its show_format()
callback. This way we can use it from the syscall trace events to
retrieve the syscall name from the ftrace event call parameter and
describe its fields using the syscalls metadata.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/ftrace_event.h | 3 ++-
 include/trace/ftrace.h       | 3 ++-
 kernel/trace/trace_events.c  | 2 +-
 kernel/trace/trace_export.c  | 6 ++++--
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 8544f121d9f1..189806b6e69e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -116,7 +116,8 @@ struct ftrace_event_call {
 	void			(*unregfunc)(void *);
 	int			id;
 	int			(*raw_init)(void);
-	int			(*show_format)(struct trace_seq *s);
+	int			(*show_format)(struct ftrace_event_call *call,
+					       struct trace_seq *s);
 	int			(*define_fields)(void);
 	struct list_head	fields;
 	int			filter_active;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 46d81b5e8610..b250b0616571 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -151,7 +151,8 @@
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 static int								\
-ftrace_format_##call(struct trace_seq *s)				\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
 {									\
 	struct ftrace_raw_##call field __attribute__((unused));		\
 	int ret = 0;							\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 1d289e2d6693..b568ade8f453 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -576,7 +576,7 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
 	trace_seq_printf(s, "format:\n");
 	trace_write_header(s);
 
-	r = call->show_format(s);
+	r = call->show_format(call, s);
 	if (!r) {
 		/*
 		 * ug!  The format output is bigger than a PAGE!!
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d06cf898dc86..956d4bc675e5 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -60,7 +60,8 @@ extern void __bad_type_size(void);
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 static int								\
-ftrace_format_##call(struct trace_seq *s)				\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
 {									\
 	struct args field;						\
 	int ret;							\
@@ -76,7 +77,8 @@ ftrace_format_##call(struct trace_seq *s)				\
 #define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,	\
 				    tpfmt)				\
 static int								\
-ftrace_format_##call(struct trace_seq *s)				\
+ftrace_format_##call(struct ftrace_event_call *unused,			\
+		      struct trace_seq *s)				\
 {									\
 	struct args field;						\
 	int ret;							\
-- 
cgit v1.2.3


From dc4ddb4c0b7348f1c9759ae8a9e7d734dc1cda82 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 11 Aug 2009 19:03:54 +0200
Subject: tracing: Add fields format definition for syscall events

Define the format of the syscall trace fields to parse the binary
values from a raw trace using the syscall events "format" file.

This is defined dynamically using the syscalls metadata.
It prepares the export of syscall event raw records to perf
counters.

Example:

$ cat /debug/tracing/events/syscalls/sys_enter_sched_getparam/format
name: sys_enter_sched_getparam
ID: 39
format:
	field:unsigned short common_type;	offset:0;	size:2;
	field:unsigned char common_flags;	offset:2;	size:1;
	field:unsigned char common_preempt_count;	offset:3;	size:1;
	field:int common_pid;	offset:4;	size:4;
	field:int common_tgid;	offset:8;	size:4;

	field:pid_t pid;	offset:12;	size:8;
	field:struct sched_param * param;	offset:20;	size:8;

print fmt: "pid: 0x%08lx, param: 0x%08lx", ((unsigned long)(REC->pid)), ((unsigned long)(REC->param))

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
---
 include/linux/syscalls.h      |  1 +
 include/trace/syscall.h       |  2 ++
 kernel/trace/trace_syscalls.c | 46 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 49 insertions(+)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5541e75e140a..87d06c173ddc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -189,6 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.system                 = "syscalls",			\
 		.event                  = &event_syscall_enter,		\
 		.raw_init		= init_enter_##sname,		\
+		.show_format		= ftrace_format_syscall,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= "sys"#sname,			\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 3ab6dd18fa3a..0cb03625edd9 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -55,6 +55,8 @@ extern int reg_event_syscall_enter(void *ptr);
 extern void unreg_event_syscall_enter(void *ptr);
 extern int reg_event_syscall_exit(void *ptr);
 extern void unreg_event_syscall_exit(void *ptr);
+extern int
+ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f4eaec3d559a..9ee6386cf842 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -105,6 +105,52 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
 	return TRACE_TYPE_HANDLED;
 }
 
+int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s)
+{
+	int i;
+	int nr;
+	int ret = 0;
+	struct syscall_metadata *entry;
+	int offset = sizeof(struct trace_entry);
+
+	nr = syscall_name_to_nr((char *)call->data);
+	entry = syscall_nr_to_meta(nr);
+
+	if (!entry)
+		return ret;
+
+	for (i = 0; i < entry->nb_args; i++) {
+		ret = trace_seq_printf(s, "\tfield:%s %s;", entry->types[i],
+				        entry->args[i]);
+		if (!ret)
+			return 0;
+		ret = trace_seq_printf(s, "\toffset:%d;\tsize:%lu;\n", offset,
+				       sizeof(unsigned long));
+		if (!ret)
+			return 0;
+		offset += sizeof(unsigned long);
+	}
+
+	trace_seq_printf(s, "\nprint fmt: \"");
+	for (i = 0; i < entry->nb_args; i++) {
+		ret = trace_seq_printf(s, "%s: 0x%%0%lulx%s", entry->args[i],
+				        sizeof(unsigned long),
+					i == entry->nb_args - 1 ? "\", " : ", ");
+		if (!ret)
+			return 0;
+	}
+
+	for (i = 0; i < entry->nb_args; i++) {
+		ret = trace_seq_printf(s, "((unsigned long)(REC->%s))%s",
+				        entry->args[i],
+					i == entry->nb_args - 1 ? "\n" : ", ");
+		if (!ret)
+			return 0;
+	}
+
+	return ret;
+}
+
 void ftrace_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_trace_enter *entry;
-- 
cgit v1.2.3


From 25a70e38cd57952b09a013bf070e03705ee4f2ff Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Wed, 12 Aug 2009 00:50:09 -0700
Subject: Input: eeti_ts - allow active high irq lines

This adds a struct eeti_ts_platform_data which currently holds only one
value to specify the interrupt polarity.

The driver has a fallback if no platform data is passed in via the
i2c_board_info, so no regression is caused.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/eeti_ts.c | 22 +++++++++++++++++++---
 include/linux/input/eeti_ts.h       |  9 +++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/input/eeti_ts.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/eeti_ts.c b/drivers/input/touchscreen/eeti_ts.c
index 3ab92222a525..9029bd3f34e5 100644
--- a/drivers/input/touchscreen/eeti_ts.c
+++ b/drivers/input/touchscreen/eeti_ts.c
@@ -32,6 +32,7 @@
 #include <linux/i2c.h>
 #include <linux/timer.h>
 #include <linux/gpio.h>
+#include <linux/input/eeti_ts.h>
 
 static int flip_x;
 module_param(flip_x, bool, 0644);
@@ -46,7 +47,7 @@ struct eeti_ts_priv {
 	struct input_dev *input;
 	struct work_struct work;
 	struct mutex mutex;
-	int irq;
+	int irq, irq_active_high;
 };
 
 #define EETI_TS_BITDEPTH	(11)
@@ -58,6 +59,11 @@ struct eeti_ts_priv {
 #define REPORT_BIT_HAS_PRESSURE	(1 << 6)
 #define REPORT_RES_BITS(v)	(((v) >> 1) + EETI_TS_BITDEPTH)
 
+static inline int eeti_ts_irq_active(struct eeti_ts_priv *priv)
+{
+	return gpio_get_value(irq_to_gpio(priv->irq)) == priv->irq_active_high;
+}
+
 static void eeti_ts_read(struct work_struct *work)
 {
 	char buf[6];
@@ -67,7 +73,7 @@ static void eeti_ts_read(struct work_struct *work)
 
 	mutex_lock(&priv->mutex);
 
-	while (!gpio_get_value(irq_to_gpio(priv->irq)) && --to)
+	while (eeti_ts_irq_active(priv) && --to)
 		i2c_master_recv(priv->client, buf, sizeof(buf));
 
 	if (!to) {
@@ -140,8 +146,10 @@ static void eeti_ts_close(struct input_dev *dev)
 static int __devinit eeti_ts_probe(struct i2c_client *client,
 				   const struct i2c_device_id *idp)
 {
+	struct eeti_ts_platform_data *pdata;
 	struct eeti_ts_priv *priv;
 	struct input_dev *input;
+	unsigned int irq_flags;
 	int err = -ENOMEM;
 
 	/* In contrast to what's described in the datasheet, there seems
@@ -180,6 +188,14 @@ static int __devinit eeti_ts_probe(struct i2c_client *client,
 	priv->input = input;
 	priv->irq = client->irq;
 
+	pdata = client->dev.platform_data;
+
+	if (pdata)
+		priv->irq_active_high = pdata->irq_active_high;
+
+	irq_flags = priv->irq_active_high ?
+		IRQF_TRIGGER_RISING : IRQF_TRIGGER_FALLING;
+
 	INIT_WORK(&priv->work, eeti_ts_read);
 	i2c_set_clientdata(client, priv);
 	input_set_drvdata(input, priv);
@@ -188,7 +204,7 @@ static int __devinit eeti_ts_probe(struct i2c_client *client,
 	if (err)
 		goto err1;
 
-	err = request_irq(priv->irq, eeti_ts_isr, IRQF_TRIGGER_FALLING,
+	err = request_irq(priv->irq, eeti_ts_isr, irq_flags,
 			  client->name, priv);
 	if (err) {
 		dev_err(&client->dev, "Unable to request touchscreen IRQ.\n");
diff --git a/include/linux/input/eeti_ts.h b/include/linux/input/eeti_ts.h
new file mode 100644
index 000000000000..f875b316249d
--- /dev/null
+++ b/include/linux/input/eeti_ts.h
@@ -0,0 +1,9 @@
+#ifndef LINUX_INPUT_EETI_TS_H
+#define LINUX_INPUT_EETI_TS_H
+
+struct eeti_ts_platform_data {
+	unsigned int irq_active_high;
+};
+
+#endif /* LINUX_INPUT_EETI_TS_H */
+
-- 
cgit v1.2.3


From 1ae88b2e446261c038f2c0c3150ffae142b227a2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 12 Aug 2009 09:12:30 -0400
Subject: NFS: Fix an O_DIRECT Oops...

We can't call nfs_readdata_release()/nfs_writedata_release() without
first initialising and referencing args.context. Doing so inside
nfs_direct_read_schedule_segment()/nfs_direct_write_schedule_segment()
causes an Oops.

We should rather be calling nfs_readdata_free()/nfs_writedata_free() in
those cases.

Looking at the O_DIRECT code, the "struct nfs_direct_req" is already
referencing the nfs_open_context for us. Since the readdata and writedata
structures carry a reference to that, we can simplify things by getting rid
of the extra nfs_open_context references, so that we can replace all
instances of nfs_readdata_release()/nfs_writedata_release().

Reported-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Tested-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/direct.c        | 20 ++++++++++----------
 fs/nfs/read.c          |  6 ++----
 fs/nfs/write.c         |  6 ++----
 include/linux/nfs_fs.h |  5 ++---
 4 files changed, 16 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
 
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
-	nfs_readdata_release(calldata);
+	nfs_readdata_free(data);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 					data->npages, 1, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_readdata_release(data);
+			nfs_readdata_free(data);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_readdata_release(data);
+				nfs_readdata_free(data);
 				break;
 			}
 			bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = get_nfs_open_context(ctx);
+		data->args.context = ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
 		struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
 		list_del(&data->pages);
 		nfs_direct_release_pages(data->pagevec, data->npages);
-		nfs_writedata_release(data);
+		nfs_writedata_free(data);
 	}
 }
 
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
 
 	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
 	nfs_direct_write_complete(dreq, data->inode);
-	nfs_commitdata_release(calldata);
+	nfs_commit_free(data);
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
 	data->args.count = 0;
-	data->args.context = get_nfs_open_context(dreq->ctx);
+	data->args.context = dreq->ctx;
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 					data->npages, 0, 0, data->pagevec, NULL);
 		up_read(&current->mm->mmap_sem);
 		if (result < 0) {
-			nfs_writedata_release(data);
+			nfs_writedata_free(data);
 			break;
 		}
 		if ((unsigned)result < data->npages) {
 			bytes = result * PAGE_SIZE;
 			if (bytes <= pgbase) {
 				nfs_direct_release_pages(data->pagevec, result);
-				nfs_writedata_release(data);
+				nfs_writedata_free(data);
 				break;
 			}
 			bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = get_nfs_open_context(ctx);
+		data->args.context = ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+void nfs_readdata_free(struct nfs_read_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
 
-void nfs_readdata_release(void *data)
+static void nfs_readdata_release(struct nfs_read_data *rdata)
 {
-	struct nfs_read_data *rdata = data;
-
 	put_nfs_open_context(rdata->args.context);
 	nfs_readdata_free(rdata);
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..a34fae21fe10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_writedata_free(struct nfs_write_data *p)
+void nfs_writedata_free(struct nfs_write_data *p)
 {
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-void nfs_writedata_release(void *data)
+static void nfs_writedata_release(struct nfs_write_data *wdata)
 {
-	struct nfs_write_data *wdata = data;
-
 	put_nfs_open_context(wdata->args.context);
 	nfs_writedata_free(wdata);
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fdffb413b192..f6b90240dd41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,7 +473,6 @@ extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
-extern void nfs_writedata_release(void *);
 
 /*
  * Try to write back everything synchronously (but check the
@@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_write_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_write_data *wdata);
-extern void nfs_commitdata_release(void *wdata);
 #else
 static inline int
 nfs_commit_inode(struct inode *inode, int how)
@@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode)
  * Allocate nfs_write_data structures
  */
 extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
+extern void nfs_writedata_free(struct nfs_write_data *);
 
 /*
  * linux/fs/nfs/read.c
@@ -515,7 +514,6 @@ extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
 extern int  nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
-extern void nfs_readdata_release(void *data);
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
@@ -523,6 +521,7 @@ extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
  * Allocate nfs_read_data structures
  */
 extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
+extern void nfs_readdata_free(struct nfs_read_data *);
 
 /*
  * linux/fs/nfs3proc.c
-- 
cgit v1.2.3


From a75f0236292a5fca65f26efedca48bd07db1834d Mon Sep 17 00:00:00 2001
From: Francisco Jerez <currojerez@riseup.net>
Date: Wed, 12 Aug 2009 02:30:10 +0200
Subject: drm: Add more standard TV properties.

Overscan, saturation, hue. Used in the nouveau driver for GPUs with
integrated TV encoders.

Signed-off-by: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 18 ++++++++++++++++++
 include/drm/drm_crtc.h     |  3 +++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index a8c831134fc3..362a538cdedc 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -736,6 +736,24 @@ int drm_mode_create_tv_properties(struct drm_device *dev, int num_modes,
 	dev->mode_config.tv_flicker_reduction_property->values[0] = 0;
 	dev->mode_config.tv_flicker_reduction_property->values[1] = 100;
 
+	dev->mode_config.tv_overscan_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "overscan", 2);
+	dev->mode_config.tv_overscan_property->values[0] = 0;
+	dev->mode_config.tv_overscan_property->values[1] = 100;
+
+	dev->mode_config.tv_saturation_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "saturation", 2);
+	dev->mode_config.tv_saturation_property->values[0] = 0;
+	dev->mode_config.tv_saturation_property->values[1] = 100;
+
+	dev->mode_config.tv_hue_property =
+		drm_property_create(dev, DRM_MODE_PROP_RANGE,
+				    "hue", 2);
+	dev->mode_config.tv_hue_property->values[0] = 0;
+	dev->mode_config.tv_hue_property->values[1] = 100;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_mode_create_tv_properties);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index 5f2cc0ca4c7d..db92a83f8ca9 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -575,6 +575,9 @@ struct drm_mode_config {
 	struct drm_property *tv_brightness_property;
 	struct drm_property *tv_contrast_property;
 	struct drm_property *tv_flicker_reduction_property;
+	struct drm_property *tv_overscan_property;
+	struct drm_property *tv_saturation_property;
+	struct drm_property *tv_hue_property;
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
-- 
cgit v1.2.3


From 8505091d2a067ad27d4a82df9cff8eae6ee52fca Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 7 Aug 2009 02:58:36 +0000
Subject: af_ieee802154: drop IEEE802154_SIOC_ADD_SLAVE declaration

IEEE802154_SIOC_ADD_SLAVE was used to allocate 802.15.4 interfaces
on the top of radio. It's not used anymore, drop it.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_ieee802154.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index 0d78605fb1a6..e9c70ead23fc 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -54,7 +54,4 @@ struct sockaddr_ieee802154 {
 	struct ieee802154_addr addr;
 };
 
-/* master device */
-#define IEEE802154_SIOC_ADD_SLAVE		(SIOCDEVPRIVATE + 0)
-
 #endif
-- 
cgit v1.2.3


From 78090a58c49f2f6213d0bb1b3b4c4df73e26865f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 7 Aug 2009 02:58:38 +0000
Subject: nl802154: make ieee802154_policy constant

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/nl802154.h   | 2 +-
 net/ieee802154/nl_policy.c | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 2cda00ccfcca..266dd96ced96 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -69,7 +69,7 @@ enum {
 
 #define IEEE802154_ATTR_MAX (__IEEE802154_ATTR_MAX - 1)
 
-extern struct nla_policy ieee802154_policy[];
+extern const struct nla_policy ieee802154_policy[];
 
 /* commands */
 /* REQ should be responded with CONF
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index c7d71d1adcac..83cb4ccef90d 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -24,7 +24,7 @@
 
 #define NLA_HW_ADDR NLA_U64
 
-struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
+const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_DEV_NAME] = { .type = NLA_STRING, },
 	[IEEE802154_ATTR_DEV_INDEX] = { .type = NLA_U32, },
 
@@ -50,3 +50,4 @@ struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_DURATION] = { .type = NLA_U8, },
 	[IEEE802154_ATTR_ED_LIST] = { .len = 27 },
 };
+
-- 
cgit v1.2.3


From 8e753dd0a82bd266256c20a20b98dfa48f98d21e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 7 Aug 2009 02:58:40 +0000
Subject: nl802154: add support for dumping WPAN interface information

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/nl802154.h |   2 +
 net/ieee802154/netlink.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 266dd96ced96..9a1af5f871a3 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -111,6 +111,8 @@ enum {
 	IEEE802154_RX_ENABLE_REQ, /* Not supported yet */
 	IEEE802154_RX_ENABLE_CONF, /* Not supported yet */
 
+	IEEE802154_LIST_IFACE,
+
 	__IEEE802154_CMD_MAX,
 };
 
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index a615b9d13212..3fe02b394ad6 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -19,6 +19,7 @@
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
  */
 
 #include <linux/kernel.h>
@@ -26,6 +27,7 @@
 #include <linux/netdevice.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
+#include <net/sock.h>
 #include <linux/nl802154.h>
 #include <net/af_ieee802154.h>
 #include <net/nl802154.h>
@@ -73,7 +75,7 @@ static int ieee802154_nl_finish(struct sk_buff *msg)
 	/* XXX: nlh is right at the start of msg */
 	void *hdr = genlmsg_data(NLMSG_DATA(msg->data));
 
-	if (!genlmsg_end(msg, hdr))
+	if (genlmsg_end(msg, hdr) < 0)
 		goto out;
 
 	return genlmsg_multicast(msg, 0, ieee802154_coord_mcgrp.id,
@@ -260,6 +262,35 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(ieee802154_nl_scan_confirm);
 
+static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid,
+	u32 seq, int flags, struct net_device *dev)
+{
+	void *hdr;
+
+	pr_debug("%s\n", __func__);
+
+	hdr = genlmsg_put(msg, 0, seq, &ieee802154_coordinator_family, flags,
+		IEEE802154_LIST_IFACE);
+	if (!hdr)
+		goto out;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+		dev->dev_addr);
+	NLA_PUT_U16(msg, IEEE802154_ATTR_SHORT_ADDR,
+		ieee802154_mlme_ops(dev)->get_short_addr(dev));
+	NLA_PUT_U16(msg, IEEE802154_ATTR_PAN_ID,
+		ieee802154_mlme_ops(dev)->get_pan_id(dev));
+	return genlmsg_end(msg, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+out:
+	return -EMSGSIZE;
+}
+
 /* Requests from userspace */
 static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
 {
@@ -272,7 +303,7 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info)
 		dev = dev_get_by_name(&init_net, name);
 	} else if (info->attrs[IEEE802154_ATTR_DEV_INDEX])
 		dev = dev_get_by_index(&init_net,
-				nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX]));
+			nla_get_u32(info->attrs[IEEE802154_ATTR_DEV_INDEX]));
 	else
 		return NULL;
 
@@ -466,6 +497,67 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 	return ret;
 }
 
+static int ieee802154_list_iface(struct sk_buff *skb,
+	struct genl_info *info)
+{
+	/* Request for interface name, index, type, IEEE address,
+	   PAN Id, short address */
+	struct sk_buff *msg;
+	struct net_device *dev = NULL;
+	int rc = -ENOBUFS;
+
+	pr_debug("%s\n", __func__);
+
+	dev = ieee802154_nl_get_dev(info);
+	if (!dev)
+		return -ENODEV;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		goto out_dev;
+
+	rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq,
+			0, dev);
+	if (rc < 0)
+		goto out_free;
+
+	dev_put(dev);
+
+	return genlmsg_unicast(&init_net, msg, info->snd_pid);
+out_free:
+	nlmsg_free(msg);
+out_dev:
+	dev_put(dev);
+	return rc;
+
+}
+
+static int ieee802154_dump_iface(struct sk_buff *skb,
+	struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct net_device *dev;
+	int idx;
+	int s_idx = cb->args[0];
+
+	pr_debug("%s\n", __func__);
+
+	idx = 0;
+	for_each_netdev(net, dev) {
+		if (idx < s_idx || (dev->type != ARPHRD_IEEE802154))
+			goto cont;
+
+		if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid,
+			cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0)
+			break;
+cont:
+		idx++;
+	}
+	cb->args[0] = idx;
+
+	return skb->len;
+}
+
 #define IEEE802154_OP(_cmd, _func)			\
 	{						\
 		.cmd	= _cmd,				\
@@ -475,12 +567,22 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 		.flags	= GENL_ADMIN_PERM,		\
 	}
 
+#define IEEE802154_DUMP(_cmd, _func, _dump)		\
+	{						\
+		.cmd	= _cmd,				\
+		.policy	= ieee802154_policy,		\
+		.doit	= _func,			\
+		.dumpit	= _dump,			\
+	}
+
 static struct genl_ops ieee802154_coordinator_ops[] = {
 	IEEE802154_OP(IEEE802154_ASSOCIATE_REQ, ieee802154_associate_req),
 	IEEE802154_OP(IEEE802154_ASSOCIATE_RESP, ieee802154_associate_resp),
 	IEEE802154_OP(IEEE802154_DISASSOCIATE_REQ, ieee802154_disassociate_req),
 	IEEE802154_OP(IEEE802154_SCAN_REQ, ieee802154_scan_req),
 	IEEE802154_OP(IEEE802154_START_REQ, ieee802154_start_req),
+	IEEE802154_DUMP(IEEE802154_LIST_IFACE, ieee802154_list_iface,
+							ieee802154_dump_iface),
 };
 
 static int __init ieee802154_nl_init(void)
-- 
cgit v1.2.3


From 99eb8558642b988055d2b8b16a334475550f78d3 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 7 Aug 2009 02:58:43 +0000
Subject: af_ieee802154: add support for WANT_ACK socket option

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/af_ieee802154.h |  5 ++++
 net/ieee802154/dgram.c      | 58 +++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index e9c70ead23fc..75e64c7a2960 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -54,4 +54,9 @@ struct sockaddr_ieee802154 {
 	struct ieee802154_addr addr;
 };
 
+/* get/setsockopt */
+#define SOL_IEEE802154	0
+
+#define WPAN_WANTACK	0
+
 #endif
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 25018a9a53aa..77ae6852b93d 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -44,6 +44,7 @@ struct dgram_sock {
 	struct ieee802154_addr dst_addr;
 
 	unsigned bound:1;
+	unsigned want_ack:1;
 };
 
 static inline struct dgram_sock *dgram_sk(const struct sock *sk)
@@ -51,7 +52,6 @@ static inline struct dgram_sock *dgram_sk(const struct sock *sk)
 	return container_of(sk, struct dgram_sock, sk);
 }
 
-
 static void dgram_hash(struct sock *sk)
 {
 	write_lock_bh(&dgram_lock);
@@ -74,6 +74,7 @@ static int dgram_init(struct sock *sk)
 
 	ro->dst_addr.addr_type = IEEE802154_ADDR_LONG;
 	ro->dst_addr.pan_id = 0xffff;
+	ro->want_ack = 1;
 	memset(&ro->dst_addr.hwaddr, 0xff, sizeof(ro->dst_addr.hwaddr));
 	return 0;
 }
@@ -237,7 +238,10 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 	skb_reset_network_header(skb);
 
-	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA | MAC_CB_FLAG_ACKREQ;
+	mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
+	if (ro->want_ack)
+		mac_cb(skb)->flags |= MAC_CB_FLAG_ACKREQ;
+
 	mac_cb(skb)->seq = ieee802154_mlme_ops(dev)->get_dsn(dev);
 	err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &ro->dst_addr,
 			ro->bound ? &ro->src_addr : NULL, size);
@@ -382,13 +386,59 @@ int ieee802154_dgram_deliver(struct net_device *dev, struct sk_buff *skb)
 static int dgram_getsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int __user *optlen)
 {
-	return -EOPNOTSUPP;
+	struct dgram_sock *ro = dgram_sk(sk);
+
+	int val, len;
+
+	if (level != SOL_IEEE802154)
+		return -EOPNOTSUPP;
+
+	if (get_user(len, optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		val = ro->want_ack;
+		break;
+	default:
+		return -ENOPROTOOPT;
+	}
+
+	if (put_user(len, optlen))
+		return -EFAULT;
+	if (copy_to_user(optval, &val, len))
+		return -EFAULT;
+	return 0;
 }
 
 static int dgram_setsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, int __user optlen)
 {
-	return -EOPNOTSUPP;
+	struct dgram_sock *ro = dgram_sk(sk);
+	int val;
+	int err = 0;
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	lock_sock(sk);
+
+	switch (optname) {
+	case WPAN_WANTACK:
+		ro->want_ack = !!val;
+		break;
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	release_sock(sk);
+	return err;
 }
 
 struct proto ieee802154_dgram_prot = {
-- 
cgit v1.2.3


From acb8aacda3f0bc3aeb652f4365c078a2b0adb0bf Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Fri, 7 Aug 2009 02:58:44 +0000
Subject: nl802154: support START-CONFIRM primitive

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nl802154.h   |  9 +++++++++
 net/ieee802154/netlink.c | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 6096096f6d7d..e554ecd3727a 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -114,4 +114,13 @@ int ieee802154_nl_scan_confirm(struct net_device *dev,
 int ieee802154_nl_beacon_indic(struct net_device *dev, u16 panid,
 		u16 coord_addr);
 
+/**
+ * ieee802154_nl_start_confirm - Notify userland of completion of start.
+ * @dev: The device which was instructed to scan.
+ * @status: The status of the scan operation.
+ *
+ * Note: This is in section 7.1.14 of the IEEE 802.15.4 document.
+ */
+int ieee802154_nl_start_confirm(struct net_device *dev, u8 status);
+
 #endif
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 3fe02b394ad6..cd0567f06716 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -31,6 +31,7 @@
 #include <linux/nl802154.h>
 #include <net/af_ieee802154.h>
 #include <net/nl802154.h>
+#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 
 static unsigned int ieee802154_seq_num;
@@ -262,6 +263,31 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(ieee802154_nl_scan_confirm);
 
+int ieee802154_nl_start_confirm(struct net_device *dev, u8 status)
+{
+	struct sk_buff *msg;
+
+	pr_debug("%s\n", __func__);
+
+	msg = ieee802154_nl_create(0, IEEE802154_START_CONF);
+	if (!msg)
+		return -ENOBUFS;
+
+	NLA_PUT_STRING(msg, IEEE802154_ATTR_DEV_NAME, dev->name);
+	NLA_PUT_U32(msg, IEEE802154_ATTR_DEV_INDEX, dev->ifindex);
+	NLA_PUT(msg, IEEE802154_ATTR_HW_ADDR, IEEE802154_ADDR_LEN,
+			dev->dev_addr);
+
+	NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status);
+
+	return ieee802154_nl_finish(msg);
+
+nla_put_failure:
+	nlmsg_free(msg);
+	return -ENOBUFS;
+}
+EXPORT_SYMBOL(ieee802154_nl_start_confirm);
+
 static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid,
 	u32 seq, int flags, struct net_device *dev)
 {
@@ -462,6 +488,12 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]);
 	coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]);
 
+	if (addr.short_addr == IEEE802154_ADDR_BROADCAST) {
+		ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
+		dev_put(dev);
+		return -EINVAL;
+	}
+
 	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel,
 		bcn_ord, sf_ord, pan_coord, blx, coord_realign);
 
-- 
cgit v1.2.3


From aed7df86983ead0af8364fd26d8a9609ef2ed584 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
Date: Sun, 9 Aug 2009 03:27:18 +0000
Subject: net: include/linux/icmpv6: includecheck fix for icmpv6.h

fix the following 'make includecheck' warning:

  include/linux/icmpv6.h: linux/skbuff.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/icmpv6.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h
index b6a85183c333..c0d8357917e2 100644
--- a/include/linux/icmpv6.h
+++ b/include/linux/icmpv6.h
@@ -171,8 +171,6 @@ struct icmp6_filter {
 #ifdef __KERNEL__
 
 #include <linux/netdevice.h>
-#include <linux/skbuff.h>
-
 
 extern void				icmpv6_send(struct sk_buff *skb,
 						    u8 type, u8 code,
-- 
cgit v1.2.3


From 2e955856ff1212bd63dbbf403940c72eca5b4a8f Mon Sep 17 00:00:00 2001
From: françois romieu <romieu@fr.zoreil.com>
Date: Mon, 10 Aug 2009 19:44:19 +0000
Subject: r8169: phy init for the 8169scd

Synced with Realtek's 6.011.00 r8169 driver.

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Cc: Edward Hsu <edward_hsu@realtek.com.tw>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/r8169.c     | 68 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h |  2 ++
 2 files changed, 70 insertions(+)

(limited to 'include')

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 86722886ae47..4470fefbe97e 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1389,6 +1389,71 @@ static void rtl8169sb_hw_phy_config(void __iomem *ioaddr)
 	rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
 }
 
+static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp,
+					   void __iomem *ioaddr)
+{
+	struct pci_dev *pdev = tp->pci_dev;
+	u16 vendor_id, device_id;
+
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_VENDOR_ID, &vendor_id);
+	pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &device_id);
+
+	if ((vendor_id != PCI_VENDOR_ID_GIGABYTE) || (device_id != 0xe000))
+		return;
+
+	mdio_write(ioaddr, 0x1f, 0x0001);
+	mdio_write(ioaddr, 0x10, 0xf01b);
+	mdio_write(ioaddr, 0x1f, 0x0000);
+}
+
+static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp,
+				     void __iomem *ioaddr)
+{
+	struct phy_reg phy_reg_init[] = {
+		{ 0x1f, 0x0001 },
+		{ 0x04, 0x0000 },
+		{ 0x03, 0x00a1 },
+		{ 0x02, 0x0008 },
+		{ 0x01, 0x0120 },
+		{ 0x00, 0x1000 },
+		{ 0x04, 0x0800 },
+		{ 0x04, 0x9000 },
+		{ 0x03, 0x802f },
+		{ 0x02, 0x4f02 },
+		{ 0x01, 0x0409 },
+		{ 0x00, 0xf099 },
+		{ 0x04, 0x9800 },
+		{ 0x04, 0xa000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0xff95 },
+		{ 0x00, 0xba00 },
+		{ 0x04, 0xa800 },
+		{ 0x04, 0xf000 },
+		{ 0x03, 0xdf01 },
+		{ 0x02, 0xdf20 },
+		{ 0x01, 0x101a },
+		{ 0x00, 0xa0ff },
+		{ 0x04, 0xf800 },
+		{ 0x04, 0x0000 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x10, 0xf41b },
+		{ 0x14, 0xfb54 },
+		{ 0x18, 0xf5c7 },
+		{ 0x1f, 0x0000 },
+
+		{ 0x1f, 0x0001 },
+		{ 0x17, 0x0cc0 },
+		{ 0x1f, 0x0000 }
+	};
+
+	rtl_phy_write(ioaddr, phy_reg_init, ARRAY_SIZE(phy_reg_init));
+
+	rtl8169scd_hw_phy_config_quirk(tp, ioaddr);
+}
+
 static void rtl8169sce_hw_phy_config(void __iomem *ioaddr)
 {
 	struct phy_reg phy_reg_init[] = {
@@ -1681,6 +1746,9 @@ static void rtl_hw_phy_config(struct net_device *dev)
 	case RTL_GIGA_MAC_VER_04:
 		rtl8169sb_hw_phy_config(ioaddr);
 		break;
+	case RTL_GIGA_MAC_VER_05:
+		rtl8169scd_hw_phy_config(tp, ioaddr);
+		break;
 	case RTL_GIGA_MAC_VER_06:
 		rtl8169sce_hw_phy_config(ioaddr);
 		break;
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..fdc3110c90c0 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1984,6 +1984,8 @@
 
 #define PCI_VENDOR_ID_SAMSUNG		0x144d
 
+#define PCI_VENDOR_ID_GIGABYTE		0x1458
+
 #define PCI_VENDOR_ID_AMBIT		0x1468
 
 #define PCI_VENDOR_ID_MYRICOM		0x14c1
-- 
cgit v1.2.3


From 28402971d869e26271b25301011f667d3a5640c3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 13 Aug 2009 10:13:22 +0200
Subject: perf_counter: Provide hw_perf_counter_setup_online() APIs

Provide weak aliases for hw_perf_counter_setup_online(). This is
used by the BTS patches (for v2.6.32), but it interacts with
fixes so propagate this upstream. (it has no effect as of yet)

Also export perf_counter_output() to architecture code.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 10 +++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a9d823a93fe8..2b36afe6ae0f 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -694,6 +694,8 @@ struct perf_sample_data {
 
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
 				 struct perf_sample_data *data);
+extern void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data);
 
 /*
  * Return 1 for a software counter, 0 for a hardware counter
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b0b20a07f394..e26d2fcfa320 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
 void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
+void __weak hw_perf_counter_setup_online(int cpu)	{ barrier(); }
 
 int __weak
 hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -2630,7 +2631,7 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
 	return task_pid_nr_ns(p, counter->ns);
 }
 
-static void perf_counter_output(struct perf_counter *counter, int nmi,
+void perf_counter_output(struct perf_counter *counter, int nmi,
 				struct perf_sample_data *data)
 {
 	int ret;
@@ -4592,6 +4593,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 		perf_counter_init_cpu(cpu);
 		break;
 
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		hw_perf_counter_setup_online(cpu);
+		break;
+
 	case CPU_DOWN_PREPARE:
 	case CPU_DOWN_PREPARE_FROZEN:
 		perf_counter_exit_cpu(cpu);
@@ -4616,6 +4622,8 @@ void __init perf_counter_init(void)
 {
 	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+			(void *)(long)smp_processor_id());
 	register_cpu_notifier(&perf_cpu_nb);
 }
 
-- 
cgit v1.2.3


From 3dab77fb1bf89664bb1c9544607159dcab6f7d57 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 13 Aug 2009 11:47:53 +0200
Subject: perf: Rework/fix the whole read vs group stuff

Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce
PERF_FORMAT_GROUP to deal with group reads in a more generic
way.

This allows you to get group reads out of read() as well.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey J Ashford <cjashfor@us.ibm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
LKML-Reference: <20090813103655.117411814@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  47 ++++++--
 kernel/perf_counter.c        | 274 +++++++++++++++++++++++++++++++------------
 2 files changed, 238 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 2b36afe6ae0f..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
 	PERF_SAMPLE_TID				= 1U << 1,
 	PERF_SAMPLE_TIME			= 1U << 2,
 	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_GROUP			= 1U << 4,
+	PERF_SAMPLE_READ			= 1U << 4,
 	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
 	PERF_SAMPLE_ID				= 1U << 6,
 	PERF_SAMPLE_CPU				= 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
 };
 
 /*
- * Bits that can be set in attr.read_format to request that
- * reads on the counter should return the indicated quantities,
- * in increasing order of bit value, after the counter value.
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * 	{ u64		value;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		id;           } && PERF_FORMAT_ID
+ * 	} && !PERF_FORMAT_GROUP
+ *
+ * 	{ u64		nr;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		value;
+ * 	    { u64	id;           } && PERF_FORMAT_ID
+ * 	  }		cntr[nr];
+ * 	} && PERF_FORMAT_GROUP
+ * };
  */
 enum perf_counter_read_format {
 	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
 	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
 	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
 
-	PERF_FORMAT_MAX = 1U << 3, 		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
 	 * struct {
 	 * 	struct perf_event_header	header;
 	 * 	u32				pid, tid;
-	 * 	u64				value;
-	 * 	{ u64		time_enabled; 	} && PERF_FORMAT_ENABLED
-	 * 	{ u64		time_running; 	} && PERF_FORMAT_RUNNING
-	 * 	{ u64		parent_id;	} && PERF_FORMAT_ID
+	 *
+	 * 	struct read_format		values;
 	 * };
 	 */
 	PERF_EVENT_READ			= 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
 	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
-	 *	{ u64			nr;
-	 *	  { u64 id, val; }	cnt[nr];  } && PERF_SAMPLE_GROUP
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 * 	#
+	 * 	# The RAW record below is opaque data wrt the ABI
+	 * 	#
+	 * 	# That is, the ABI doesn't make any promises wrt to
+	 * 	# the stability of its content, it may vary depending
+	 * 	# on event, hardware, kernel version and phase of
+	 * 	# the moon.
+	 * 	#
+	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 * 	#
+	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 * };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 3dd4339589a0..b8c6b97a20a3 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1692,7 +1692,32 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static u64 perf_counter_read_tree(struct perf_counter *counter)
+static int perf_counter_read_size(struct perf_counter *counter)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (counter->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += counter->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+
+	return size;
+}
+
+static u64 perf_counter_read_value(struct perf_counter *counter)
 {
 	struct perf_counter *child;
 	u64 total = 0;
@@ -1704,14 +1729,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
 	return total;
 }
 
+static int perf_counter_read_entry(struct perf_counter *counter,
+				   u64 read_format, char __user *buf)
+{
+	int n = 0, count = 0;
+	u64 values[2];
+
+	values[n++] = perf_counter_read_value(counter);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(counter);
+
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static int perf_counter_read_group(struct perf_counter *counter,
+				   u64 read_format, char __user *buf)
+{
+	struct perf_counter *leader = counter->group_leader, *sub;
+	int n = 0, size = 0, err = -EFAULT;
+	u64 values[3];
+
+	values[n++] = 1 + leader->nr_siblings;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = leader->total_time_enabled +
+			atomic64_read(&leader->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = leader->total_time_running +
+			atomic64_read(&leader->child_total_time_running);
+	}
+
+	size = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, size))
+		return -EFAULT;
+
+	err = perf_counter_read_entry(leader, read_format, buf + size);
+	if (err < 0)
+		return err;
+
+	size += err;
+
+	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		err = perf_counter_read_entry(counter, read_format,
+				buf + size);
+		if (err < 0)
+			return err;
+
+		size += err;
+	}
+
+	return size;
+}
+
+static int perf_counter_read_one(struct perf_counter *counter,
+				 u64 read_format, char __user *buf)
+{
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = perf_counter_read_value(counter);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(counter);
+
+	if (copy_to_user(buf, values, n * sizeof(u64)))
+		return -EFAULT;
+
+	return n * sizeof(u64);
+}
+
 /*
  * Read the performance counter - simple non blocking version for now
  */
 static ssize_t
 perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 {
-	u64 values[4];
-	int n;
+	u64 read_format = counter->attr.read_format;
+	int ret;
 
 	/*
 	 * Return end-of-file for a read on a counter that is in
@@ -1721,28 +1828,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
 	if (counter->state == PERF_COUNTER_STATE_ERROR)
 		return 0;
 
+	if (count < perf_counter_read_size(counter))
+		return -ENOSPC;
+
 	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->child_mutex);
-	values[0] = perf_counter_read_tree(counter);
-	n = 1;
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	if (counter->attr.read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
+	if (read_format & PERF_FORMAT_GROUP)
+		ret = perf_counter_read_group(counter, read_format, buf);
+	else
+		ret = perf_counter_read_one(counter, read_format, buf);
 	mutex_unlock(&counter->child_mutex);
 
-	if (count < n * sizeof(u64))
-		return -EINVAL;
-	count = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
-
-	return count;
+	return ret;
 }
 
 static ssize_t
@@ -2631,6 +2728,79 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
 	return task_pid_nr_ns(p, counter->ns);
 }
 
+static void perf_output_read_one(struct perf_output_handle *handle,
+				 struct perf_counter *counter)
+{
+	u64 read_format = counter->attr.read_format;
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = atomic64_read(&counter->count);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = counter->total_time_enabled +
+			atomic64_read(&counter->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = counter->total_time_running +
+			atomic64_read(&counter->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(counter);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+			    struct perf_counter *counter)
+{
+	struct perf_counter *leader = counter->group_leader, *sub;
+	u64 read_format = counter->attr.read_format;
+	u64 values[5];
+	int n = 0;
+
+	values[n++] = 1 + leader->nr_siblings;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = leader->total_time_enabled;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = leader->total_time_running;
+
+	if (leader != counter)
+		leader->pmu->read(leader);
+
+	values[n++] = atomic64_read(&leader->count);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_counter_id(leader);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+
+	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+		n = 0;
+
+		if (sub != counter)
+			sub->pmu->read(sub);
+
+		values[n++] = atomic64_read(&sub->count);
+		if (read_format & PERF_FORMAT_ID)
+			values[n++] = primary_counter_id(sub);
+
+		perf_output_copy(handle, values, n * sizeof(u64));
+	}
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+			     struct perf_counter *counter)
+{
+	if (counter->attr.read_format & PERF_FORMAT_GROUP)
+		perf_output_read_group(handle, counter);
+	else
+		perf_output_read_one(handle, counter);
+}
+
 void perf_counter_output(struct perf_counter *counter, int nmi,
 				struct perf_sample_data *data)
 {
@@ -2642,10 +2812,6 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
 	struct {
 		u32 pid, tid;
 	} tid_entry;
-	struct {
-		u64 id;
-		u64 counter;
-	} group_entry;
 	struct perf_callchain_entry *callchain = NULL;
 	int callchain_size = 0;
 	u64 time;
@@ -2700,10 +2866,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
 	if (sample_type & PERF_SAMPLE_PERIOD)
 		header.size += sizeof(u64);
 
-	if (sample_type & PERF_SAMPLE_GROUP) {
-		header.size += sizeof(u64) +
-			counter->nr_siblings * sizeof(group_entry);
-	}
+	if (sample_type & PERF_SAMPLE_READ)
+		header.size += perf_counter_read_size(counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		callchain = perf_callchain(data->regs);
@@ -2760,26 +2924,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
 	if (sample_type & PERF_SAMPLE_PERIOD)
 		perf_output_put(&handle, data->period);
 
-	/*
-	 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult.
-	 */
-	if (sample_type & PERF_SAMPLE_GROUP) {
-		struct perf_counter *leader, *sub;
-		u64 nr = counter->nr_siblings;
-
-		perf_output_put(&handle, nr);
-
-		leader = counter->group_leader;
-		list_for_each_entry(sub, &leader->sibling_list, list_entry) {
-			if (sub != counter)
-				sub->pmu->read(sub);
-
-			group_entry.id = primary_counter_id(sub);
-			group_entry.counter = atomic64_read(&sub->count);
-
-			perf_output_put(&handle, group_entry);
-		}
-	}
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(&handle, counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
 		if (callchain)
@@ -2818,8 +2964,6 @@ struct perf_read_event {
 
 	u32				pid;
 	u32				tid;
-	u64				value;
-	u64				format[3];
 };
 
 static void
@@ -2831,34 +2975,20 @@ perf_counter_read_event(struct perf_counter *counter,
 		.header = {
 			.type = PERF_EVENT_READ,
 			.misc = 0,
-			.size = sizeof(event) - sizeof(event.format),
+			.size = sizeof(event) + perf_counter_read_size(counter),
 		},
 		.pid = perf_counter_pid(counter, task),
 		.tid = perf_counter_tid(counter, task),
-		.value = atomic64_read(&counter->count),
 	};
-	int ret, i = 0;
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = counter->total_time_enabled;
-	}
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = counter->total_time_running;
-	}
-
-	if (counter->attr.read_format & PERF_FORMAT_ID) {
-		event.header.size += sizeof(u64);
-		event.format[i++] = primary_counter_id(counter);
-	}
+	int ret;
 
 	ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
 	if (ret)
 		return;
 
-	perf_output_copy(&handle, &event, event.header.size);
+	perf_output_put(&handle, event);
+	perf_output_read(&handle, counter);
+
 	perf_output_end(&handle);
 }
 
@@ -3921,9 +4051,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	atomic64_set(&hwc->period_left, hwc->sample_period);
 
 	/*
-	 * we currently do not support PERF_SAMPLE_GROUP on inherited counters
+	 * we currently do not support PERF_FORMAT_GROUP on inherited counters
 	 */
-	if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP))
+	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
 		goto done;
 
 	switch (attr->type) {
-- 
cgit v1.2.3


From e9b3cc1b3779fe10a80de4c3e7404bd308d0eae3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Thu, 13 Aug 2009 05:19:44 +0000
Subject: net: skb ftracer - add tracepoint to skb_copy_datagram_iovec (v3)

skb allocation / cosumption tracer - Add consumption tracepoint

This patch adds a tracepoint to skb_copy_datagram_iovec, which is called each
time a userspace process copies a frame from a socket receive queue to a user
space buffer.  It allows us to hook in and examine each sk_buff that the system
receives on a per-socket bases, and can be use to compile a list of which skb's
were received by which processes.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

 include/trace/events/skb.h |   20 ++++++++++++++++++++
 net/core/datagram.c        |    3 +++
 2 files changed, 23 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/skb.h | 20 ++++++++++++++++++++
 net/core/datagram.c        |  3 +++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
index e499863b9669..4b2be6dc76f0 100644
--- a/include/trace/events/skb.h
+++ b/include/trace/events/skb.h
@@ -5,6 +5,7 @@
 #define _TRACE_SKB_H
 
 #include <linux/skbuff.h>
+#include <linux/netdevice.h>
 #include <linux/tracepoint.h>
 
 /*
@@ -34,6 +35,25 @@ TRACE_EVENT(kfree_skb,
 		__entry->skbaddr, __entry->protocol, __entry->location)
 );
 
+TRACE_EVENT(skb_copy_datagram_iovec,
+
+	TP_PROTO(const struct sk_buff *skb, int len),
+
+	TP_ARGS(skb, len),
+
+	TP_STRUCT__entry(
+		__field(	const void *,		skbaddr		)
+		__field(	int,			len		)
+	),
+
+	TP_fast_assign(
+		__entry->skbaddr = skb;
+		__entry->len = len;
+	),
+
+	TP_printk("skbaddr=%p len=%d", __entry->skbaddr, __entry->len)
+);
+
 #endif /* _TRACE_SKB_H */
 
 /* This part must be outside protection */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b0fe69211eef..1c6cf3a1a4f6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -55,6 +55,7 @@
 #include <net/checksum.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <trace/events/skb.h>
 
 /*
  *	Is a socket 'connection oriented' ?
@@ -284,6 +285,8 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 
+	trace_skb_copy_datagram_iovec(skb, len);
+
 	/* Copy header. */
 	if (copy > 0) {
 		if (copy > len)
-- 
cgit v1.2.3


From 9188499cdb117d86a1ea6b04374095b098d56936 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Thu, 13 Aug 2009 09:44:57 -0400
Subject: security: introducing security_request_module

Calling request_module() will trigger a userspace upcall which will load a
new module into the kernel.  This can be a dangerous event if the process
able to trigger request_module() is able to control either the modprobe
binary or the module binary.  This patch adds a new security hook to
request_module() which can be used by an LSM to control a processes ability
to call request_module().

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 10 ++++++++++
 kernel/kmod.c            |  4 ++++
 security/capability.c    |  6 ++++++
 security/security.c      |  5 +++++
 4 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 57ead99d2593..1e3dd86eea99 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -678,6 +678,9 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@inode points to the inode to use as a reference.
  *	The current task must be the one that nominated @inode.
  *	Return 0 if successful.
+ * @kernel_module_request:
+ *	Ability to trigger the kernel to automatically upcall to userspace for
+ *	userspace to load a kernel module with the given name.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
@@ -1489,6 +1492,7 @@ struct security_operations {
 	void (*cred_commit)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
+	int (*kernel_module_request)(void);
 	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
@@ -1741,6 +1745,7 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_commit_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
+int security_kernel_module_request(void);
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
@@ -2292,6 +2297,11 @@ static inline int security_kernel_create_files_as(struct cred *cred,
 	return 0;
 }
 
+static inline int security_kernel_module_request(void)
+{
+	return 0;
+}
+
 static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
 				       int flags)
 {
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 385c31a1bdbf..5a7ae57f983f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -78,6 +78,10 @@ int __request_module(bool wait, const char *fmt, ...)
 #define MAX_KMOD_CONCURRENT 50	/* Completely arbitrary value - KAO */
 	static int kmod_loop_msg;
 
+	ret = security_kernel_module_request();
+	if (ret)
+		return ret;
+
 	va_start(args, fmt);
 	ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
 	va_end(args);
diff --git a/security/capability.c b/security/capability.c
index ec0573054024..1b943f54b2ea 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -396,6 +396,11 @@ static int cap_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return 0;
 }
 
+static int cap_kernel_module_request(void)
+{
+	return 0;
+}
+
 static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return 0;
@@ -945,6 +950,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, cred_commit);
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
+	set_to_cap_if_null(ops, kernel_module_request);
 	set_to_cap_if_null(ops, task_setuid);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setgid);
diff --git a/security/security.c b/security/security.c
index 4501c5e1f988..0e993f42ce3d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -709,6 +709,11 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode)
 	return security_ops->kernel_create_files_as(new, inode);
 }
 
+int security_kernel_module_request(void)
+{
+	return security_ops->kernel_module_request();
+}
+
 int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
 {
 	return security_ops->task_setuid(id0, id1, id2, flags);
-- 
cgit v1.2.3


From f322abf83feddc3c37c3a91794e0c5aece4af18e Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Fri, 14 Aug 2009 11:19:29 +1000
Subject: security: update documentation for security_request_module

Update documentation for security_request_module to indicate
return value, as suggested by Serge Hallyn.

Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 1e3dd86eea99..a16d6b7c4ebe 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -681,6 +681,7 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  * @kernel_module_request:
  *	Ability to trigger the kernel to automatically upcall to userspace for
  *	userspace to load a kernel module with the given name.
+ *	Return 0 if successful.
  * @task_setuid:
  *	Check permission before setting one or more of the user identity
  *	attributes of the current process.  The @flags parameter indicates
-- 
cgit v1.2.3


From 00ae4064b1445524752575dd84df227c0687c99d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:49 +0900
Subject: percpu: rename 4k first chunk allocator to page

Page size isn't always 4k depending on arch and configuration.  Rename
4k first chunk allocator to page.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David Howells <dhowells@redhat.com>
---
 Documentation/kernel-parameters.txt |  2 +-
 arch/x86/kernel/setup_percpu.c      | 23 ++++++++++++-----------
 include/linux/percpu.h              |  2 +-
 mm/percpu.c                         | 25 ++++++++++++++-----------
 4 files changed, 28 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7936b801fe6a..12e9eb77ee0d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,7 +1920,7 @@ and is between 256 and 4096 characters. It is defined in the file
 			See arch/parisc/kernel/pdc_chassis.c
 
 	percpu_alloc=	[X86] Select which percpu first chunk allocator to use.
-			Allowed values are one of "lpage", "embed" and "4k".
+			Allowed values are one of "lpage", "embed" and "page".
 			See comments in arch/x86/kernel/setup_percpu.c for
 			details on each allocator.  This parameter is primarily
 			for debugging and performance comparison.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index a26ff61e2fb0..1e17711c29d6 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -249,21 +249,22 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 }
 
 /*
- * 4k allocator
+ * Page allocator
  *
- * Boring fallback 4k allocator.  This allocator puts more pressure on
- * PTE TLBs but other than that behaves nicely on both UMA and NUMA.
+ * Boring fallback 4k page allocator.  This allocator puts more
+ * pressure on PTE TLBs but other than that behaves nicely on both UMA
+ * and NUMA.
  */
-static void __init pcpu4k_populate_pte(unsigned long addr)
+static void __init pcpup_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_4k(size_t static_size)
+static ssize_t __init setup_pcpu_page(size_t static_size)
 {
-	return pcpu_4k_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				   pcpu_fc_alloc, pcpu_fc_free,
-				   pcpu4k_populate_pte);
+	return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+				     pcpu_fc_alloc, pcpu_fc_free,
+				     pcpup_populate_pte);
 }
 
 /* for explicit first chunk allocator selection */
@@ -307,7 +308,7 @@ void __init setup_per_cpu_areas(void)
 	 */
 	ret = -EINVAL;
 	if (strlen(pcpu_chosen_alloc)) {
-		if (strcmp(pcpu_chosen_alloc, "4k")) {
+		if (strcmp(pcpu_chosen_alloc, "page")) {
 			if (!strcmp(pcpu_chosen_alloc, "lpage"))
 				ret = setup_pcpu_lpage(static_size, true);
 			else if (!strcmp(pcpu_chosen_alloc, "embed"))
@@ -317,7 +318,7 @@ void __init setup_per_cpu_areas(void)
 					   "specified\n", pcpu_chosen_alloc);
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
-					   "falling back to 4k\n",
+					   "falling back to page size\n",
 					   pcpu_chosen_alloc, ret);
 		}
 	} else {
@@ -326,7 +327,7 @@ void __init setup_per_cpu_areas(void)
 			ret = setup_pcpu_embed(static_size, false);
 	}
 	if (ret < 0)
-		ret = setup_pcpu_4k(static_size);
+		ret = setup_pcpu_page(static_size);
 	if (ret < 0)
 		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
 		      static_size, ret);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e134c8229631..7989f61b03f3 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -74,7 +74,7 @@ extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
 
-extern ssize_t __init pcpu_4k_first_chunk(
+extern ssize_t __init pcpu_page_first_chunk(
 				size_t static_size, size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
diff --git a/mm/percpu.c b/mm/percpu.c
index cbddcbdab681..6feac7934904 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1497,15 +1497,15 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 }
 
 /**
- * pcpu_4k_first_chunk - map the first chunk using PAGE_SIZE pages
+ * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
  * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
  * @populate_pte_fn: function to populate pte
  *
- * This is a helper to ease setting up embedded first percpu chunk and
- * can be called where pcpu_setup_first_chunk() is expected.
+ * This is a helper to ease setting up page-remapped first percpu
+ * chunk and can be called where pcpu_setup_first_chunk() is expected.
  *
  * This is the basic allocator.  Static percpu area is allocated
  * page-by-page into vmalloc area.
@@ -1514,12 +1514,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
-				   pcpu_fc_alloc_fn_t alloc_fn,
-				   pcpu_fc_free_fn_t free_fn,
-				   pcpu_fc_populate_pte_fn_t populate_pte_fn)
+ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size,
+				     pcpu_fc_alloc_fn_t alloc_fn,
+				     pcpu_fc_free_fn_t free_fn,
+				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
@@ -1527,6 +1528,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 	int i, j;
 	ssize_t ret;
 
+	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
+
 	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
 				  PCPU_MIN_UNIT_SIZE));
 
@@ -1542,8 +1545,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 
 			ptr = alloc_fn(cpu, PAGE_SIZE);
 			if (!ptr) {
-				pr_warning("PERCPU: failed to allocate "
-					   "4k page for cpu%u\n", cpu);
+				pr_warning("PERCPU: failed to allocate %s page "
+					   "for cpu%u\n", psize_str, cpu);
 				goto enomem;
 			}
 			pages[j++] = virt_to_page(ptr);
@@ -1580,8 +1583,8 @@ ssize_t __init pcpu_4k_first_chunk(size_t static_size, size_t reserved_size,
 	}
 
 	/* we're ready, commit */
-	pr_info("PERCPU: %d 4k pages/cpu @%p s%zu r%zu\n",
-		unit_pages, vm.addr, static_size, reserved_size);
+	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n",
+		unit_pages, psize_str, vm.addr, static_size, reserved_size);
 
 	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
 				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
-- 
cgit v1.2.3


From 08fc45806103e59a37418e84719b878f9bb32540 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:49 +0900
Subject: percpu: build first chunk allocators selectively

There's no need to build unused first chunk allocators in.  Define
CONFIG_NEED_PER_CPU_*_FIRST_CHUNK and let archs enable them
selectively.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/Kconfig       | 10 ++++++++++
 include/linux/percpu.h | 27 +++++----------------------
 mm/percpu.c            | 19 +++++++++++--------
 3 files changed, 26 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e06b2eeff9f2..f7ac27215512 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -150,6 +150,16 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
+	def_bool y
+
+config NEED_PER_CPU_LPAGE_FIRST_CHUNK
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
 config HAVE_CPUMASK_OF_CPU_MAP
 	def_bool X86_64_SMP
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 7989f61b03f3..e26788e0da4a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -70,17 +70,21 @@ extern size_t __init pcpu_setup_first_chunk(
 				ssize_t dyn_size, size_t unit_size,
 				void *base_addr, const int *unit_map);
 
+#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
 				size_t static_size, size_t reserved_size,
 				ssize_t dyn_size);
+#endif
 
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_page_first_chunk(
 				size_t static_size, size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
+#endif
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern int __init pcpu_lpage_build_unit_map(
 				size_t static_size, size_t reserved_size,
 				ssize_t *dyn_sizep, size_t *unit_sizep,
@@ -98,27 +102,6 @@ extern ssize_t __init pcpu_lpage_first_chunk(
 
 extern void *pcpu_lpage_remapped(void *kaddr);
 #else
-static inline int pcpu_lpage_build_unit_map(
-				size_t static_size, size_t reserved_size,
-				ssize_t *dyn_sizep, size_t *unit_sizep,
-				size_t lpage_size, int *unit_map,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-	return -EINVAL;
-}
-
-static inline ssize_t __init pcpu_lpage_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				size_t lpage_size, const int *unit_map,
-				int nr_units,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
-				pcpu_fc_map_fn_t map_fn)
-{
-	return -EINVAL;
-}
-
 static inline void *pcpu_lpage_remapped(void *kaddr)
 {
 	return NULL;
diff --git a/mm/percpu.c b/mm/percpu.c
index 6feac7934904..7971997de310 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1414,8 +1414,9 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_unit_size;
 }
 
-static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
-				 ssize_t *dyn_sizep)
+static inline size_t pcpu_calc_fc_sizes(size_t static_size,
+					size_t reserved_size,
+					ssize_t *dyn_sizep)
 {
 	size_t size_sum;
 
@@ -1427,6 +1428,8 @@ static size_t pcpu_calc_fc_sizes(size_t static_size, size_t reserved_size,
 	return size_sum;
 }
 
+#if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
+	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @static_size: the size of static percpu area in bytes
@@ -1495,7 +1498,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
 				      unit_size, base, NULL);
 }
+#endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
+	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
 
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
  * @static_size: the size of static percpu area in bytes
@@ -1598,12 +1604,9 @@ out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
 	return ret;
 }
+#endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
-/*
- * Large page remapping first chunk setup helper
- */
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
  * pcpu_lpage_build_unit_map - build unit_map for large page remapping
  * @static_size: the size of static percpu area in bytes
@@ -1982,7 +1985,7 @@ void *pcpu_lpage_remapped(void *kaddr)
 
 	return NULL;
 }
-#endif
+#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
 
 /*
  * Generic percpu area setup.
-- 
cgit v1.2.3


From f58dc01ba2ca9fe3ab2ba4ca43d9c8a735cf62d8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: generalize first chunk allocator selection

Now that all first chunk allocators are in mm/percpu.c, it makes sense
to make generalize percpu_alloc kernel parameter.  Define PCPU_FC_*
and set pcpu_chosen_fc using early_param() in mm/percpu.c.  Arch code
can use the set value to determine which first chunk allocator to use.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 Documentation/kernel-parameters.txt | 11 ++++++-----
 arch/x86/kernel/setup_percpu.c      | 24 ++++++------------------
 include/linux/percpu.h              | 12 ++++++++++++
 mm/percpu.c                         | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 12e9eb77ee0d..dee9ce2e6cfa 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1919,11 +1919,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			Format: { 0 | 1 }
 			See arch/parisc/kernel/pdc_chassis.c
 
-	percpu_alloc=	[X86] Select which percpu first chunk allocator to use.
-			Allowed values are one of "lpage", "embed" and "page".
-			See comments in arch/x86/kernel/setup_percpu.c for
-			details on each allocator.  This parameter is primarily
-			for debugging and performance comparison.
+	percpu_alloc=	Select which percpu first chunk allocator to use.
+			Currently supported values are "embed", "page" and
+			"lpage".  Archs may support subset or none of the
+			selections.  See comments in mm/percpu.c for details
+			on each allocator.  This parameter is primarily	for
+			debugging and performance comparison.
 
 	pf.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 1e17711c29d6..b961d99e6416 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -267,16 +267,6 @@ static ssize_t __init setup_pcpu_page(size_t static_size)
 				     pcpup_populate_pte);
 }
 
-/* for explicit first chunk allocator selection */
-static char pcpu_chosen_alloc[16] __initdata;
-
-static int __init percpu_alloc_setup(char *str)
-{
-	strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
-	return 0;
-}
-early_param("percpu_alloc", percpu_alloc_setup);
-
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -307,19 +297,17 @@ void __init setup_per_cpu_areas(void)
 	 * each allocator for details.
 	 */
 	ret = -EINVAL;
-	if (strlen(pcpu_chosen_alloc)) {
-		if (strcmp(pcpu_chosen_alloc, "page")) {
-			if (!strcmp(pcpu_chosen_alloc, "lpage"))
+	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
+		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
 				ret = setup_pcpu_lpage(static_size, true);
-			else if (!strcmp(pcpu_chosen_alloc, "embed"))
-				ret = setup_pcpu_embed(static_size, true);
 			else
-				pr_warning("PERCPU: unknown allocator %s "
-					   "specified\n", pcpu_chosen_alloc);
+				ret = setup_pcpu_embed(static_size, true);
+
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
 					   "falling back to page size\n",
-					   pcpu_chosen_alloc, ret);
+					   pcpu_fc_names[pcpu_chosen_fc], ret);
 		}
 	} else {
 		ret = setup_pcpu_lpage(static_size, false);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index e26788e0da4a..9be05cbe5ee0 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,6 +59,18 @@
 extern void *pcpu_base_addr;
 extern const int *pcpu_unit_map;
 
+enum pcpu_fc {
+	PCPU_FC_AUTO,
+	PCPU_FC_EMBED,
+	PCPU_FC_PAGE,
+	PCPU_FC_LPAGE,
+
+	PCPU_FC_NR,
+};
+extern const char *pcpu_fc_names[PCPU_FC_NR];
+
+extern enum pcpu_fc pcpu_chosen_fc;
+
 typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
diff --git a/mm/percpu.c b/mm/percpu.c
index 7971997de310..7fb40bb1555a 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1414,6 +1414,38 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	return pcpu_unit_size;
 }
 
+const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
+	[PCPU_FC_AUTO]	= "auto",
+	[PCPU_FC_EMBED]	= "embed",
+	[PCPU_FC_PAGE]	= "page",
+	[PCPU_FC_LPAGE]	= "lpage",
+};
+
+enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
+
+static int __init percpu_alloc_setup(char *str)
+{
+	if (0)
+		/* nada */;
+#ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
+	else if (!strcmp(str, "embed"))
+		pcpu_chosen_fc = PCPU_FC_EMBED;
+#endif
+#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
+	else if (!strcmp(str, "page"))
+		pcpu_chosen_fc = PCPU_FC_PAGE;
+#endif
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+	else if (!strcmp(str, "lpage"))
+		pcpu_chosen_fc = PCPU_FC_LPAGE;
+#endif
+	else
+		pr_warning("PERCPU: unknown allocator %s specified\n", str);
+
+	return 0;
+}
+early_param("percpu_alloc", percpu_alloc_setup);
+
 static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 					size_t reserved_size,
 					ssize_t *dyn_sizep)
-- 
cgit v1.2.3


From 9a7737691e90d3cce0e5248f91826c50e5aa3fcf Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: drop @static_size from first chunk allocators

First chunk allocators assume percpu areas have been linked using one
of PERCPU_*() macros and depend on __per_cpu_load symbol defined by
those macros, so there isn't much point in passing in static area size
explicitly when it can be easily calculated from __per_cpu_start and
__per_cpu_end.  Drop @static_size from all percpu first chunk
allocators and helpers.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c | 34 +++++++++++++++-------------------
 include/linux/percpu.h         | 18 ++++++++----------
 mm/percpu.c                    | 29 +++++++++++++----------------
 3 files changed, 36 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index b961d99e6416..8aad486c688f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -157,7 +157,7 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
 		return REMOTE_DISTANCE;
 }
 
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
@@ -184,8 +184,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		return -ENOMEM;
 	}
 
-	ret = pcpu_lpage_build_unit_map(static_size,
-					PERCPU_FIRST_CHUNK_RESERVE,
+	ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE,
 					&dyn_size, &unit_size, PMD_SIZE,
 					unit_map, pcpu_lpage_cpu_distance);
 	if (ret < 0) {
@@ -208,9 +207,8 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				     dyn_size, unit_size, PMD_SIZE,
-				     unit_map, nr_units,
+	ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
+				     unit_size, PMD_SIZE, unit_map, nr_units,
 				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 out_free:
 	if (ret < 0)
@@ -218,7 +216,7 @@ out_free:
 	return ret;
 }
 #else
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	return -EINVAL;
 }
@@ -232,7 +230,7 @@ static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
  * mapping so that it can use PMD mapping without additional TLB
  * pressure.
  */
-static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
+static ssize_t __init setup_pcpu_embed(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
@@ -244,7 +242,7 @@ static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
 	if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
 		return -EINVAL;
 
-	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+	return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
 }
 
@@ -260,9 +258,9 @@ static void __init pcpup_populate_pte(unsigned long addr)
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_page(size_t static_size)
+static ssize_t __init setup_pcpu_page(void)
 {
-	return pcpu_page_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
+	return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				     pcpu_fc_alloc, pcpu_fc_free,
 				     pcpup_populate_pte);
 }
@@ -282,7 +280,6 @@ static inline void setup_percpu_segment(int cpu)
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t static_size = __per_cpu_end - __per_cpu_start;
 	unsigned int cpu;
 	unsigned long delta;
 	size_t pcpu_unit_size;
@@ -300,9 +297,9 @@ void __init setup_per_cpu_areas(void)
 	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
 		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
-				ret = setup_pcpu_lpage(static_size, true);
+				ret = setup_pcpu_lpage(true);
 			else
-				ret = setup_pcpu_embed(static_size, true);
+				ret = setup_pcpu_embed(true);
 
 			if (ret < 0)
 				pr_warning("PERCPU: %s allocator failed (%zd), "
@@ -310,15 +307,14 @@ void __init setup_per_cpu_areas(void)
 					   pcpu_fc_names[pcpu_chosen_fc], ret);
 		}
 	} else {
-		ret = setup_pcpu_lpage(static_size, false);
+		ret = setup_pcpu_lpage(false);
 		if (ret < 0)
-			ret = setup_pcpu_embed(static_size, false);
+			ret = setup_pcpu_embed(false);
 	}
 	if (ret < 0)
-		ret = setup_pcpu_page(static_size);
+		ret = setup_pcpu_page();
 	if (ret < 0)
-		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
-		      static_size, ret);
+		panic("cannot initialize percpu area (err=%zd)", ret);
 
 	pcpu_unit_size = ret;
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9be05cbe5ee0..be2fc8fb9b6f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -84,13 +84,12 @@ extern size_t __init pcpu_setup_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
-				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size);
+				size_t reserved_size, ssize_t dyn_size);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_page_first_chunk(
-				size_t static_size, size_t reserved_size,
+				size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
@@ -98,16 +97,15 @@ extern ssize_t __init pcpu_page_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern int __init pcpu_lpage_build_unit_map(
-				size_t static_size, size_t reserved_size,
-				ssize_t *dyn_sizep, size_t *unit_sizep,
-				size_t lpage_size, int *unit_map,
+				size_t reserved_size, ssize_t *dyn_sizep,
+				size_t *unit_sizep, size_t lpage_size,
+				int *unit_map,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
 
 extern ssize_t __init pcpu_lpage_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				size_t lpage_size, const int *unit_map,
-				int nr_units,
+				size_t reserved_size, size_t dyn_size,
+				size_t unit_size, size_t lpage_size,
+				const int *unit_map, int nr_units,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 7fb40bb1555a..e2ac58a39bb2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1464,7 +1464,6 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
  *
@@ -1489,9 +1488,9 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
-				      ssize_t dyn_size)
+ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	size_t size_sum, unit_size, chunk_size;
 	void *base;
 	unsigned int cpu;
@@ -1536,7 +1535,6 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 /**
  * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
  * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
@@ -1552,12 +1550,13 @@ ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_page_first_chunk(size_t static_size, size_t reserved_size,
+ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 				     pcpu_fc_alloc_fn_t alloc_fn,
 				     pcpu_fc_free_fn_t free_fn,
 				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
@@ -1641,7 +1640,6 @@ out_free_ar:
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
  * pcpu_lpage_build_unit_map - build unit_map for large page remapping
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
  * @unit_sizep: out parameter for unit size
@@ -1661,13 +1659,14 @@ out_free_ar:
  * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
  * returns the number of units to be allocated.  -errno on failure.
  */
-int __init pcpu_lpage_build_unit_map(size_t static_size, size_t reserved_size,
-				     ssize_t *dyn_sizep, size_t *unit_sizep,
-				     size_t lpage_size, int *unit_map,
+int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
+				     size_t *unit_sizep, size_t lpage_size,
+				     int *unit_map,
 				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
 	static int group_map[NR_CPUS] __initdata;
 	static int group_cnt[NR_CPUS] __initdata;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	int group_cnt_max = 0;
 	size_t size_sum, min_unit_size, alloc_size;
 	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
@@ -1819,7 +1818,6 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
 
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes
  * @unit_size: unit size in bytes
@@ -1850,15 +1848,15 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(size_t static_size, size_t reserved_size,
-				      size_t dyn_size, size_t unit_size,
-				      size_t lpage_size, const int *unit_map,
-				      int nr_units,
+ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
+				      size_t unit_size, size_t lpage_size,
+				      const int *unit_map, int nr_units,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
 	size_t chunk_size = unit_size * nr_units;
 	size_t map_size;
 	unsigned int cpu;
@@ -2037,7 +2035,6 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t static_size = __per_cpu_end - __per_cpu_start;
 	ssize_t unit_size;
 	unsigned long delta;
 	unsigned int cpu;
@@ -2046,7 +2043,7 @@ void __init setup_per_cpu_areas(void)
 	 * Always reserve area for module percpu variables.  That's
 	 * what the legacy allocator did.
 	 */
-	unit_size = pcpu_embed_first_chunk(static_size, PERCPU_MODULE_RESERVE,
+	unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
 					   PERCPU_DYNAMIC_RESERVE);
 	if (unit_size < 0)
 		panic("Failed to initialized percpu areas.");
-- 
cgit v1.2.3


From 1d9d32572163b30be81dbe1409dfa7ea9763d0e8 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: make @dyn_size mandatory for pcpu_setup_first_chunk()

Now that all actual first chunk allocation and copying happen in the
first chunk allocators and helpers, there's no reason for
pcpu_setup_first_chunk() to try to determine @dyn_size automatically.
The only left user is page first chunk allocator.  Make it determine
dyn_size like other allocators and make @dyn_size mandatory for
pcpu_setup_first_chunk().

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  2 +-
 mm/percpu.c            | 39 +++++++++++++++++++--------------------
 2 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index be2fc8fb9b6f..0cfdd14d096a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -79,7 +79,7 @@ typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
-				ssize_t dyn_size, size_t unit_size,
+				size_t dyn_size, size_t unit_size,
 				void *base_addr, const int *unit_map);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
diff --git a/mm/percpu.c b/mm/percpu.c
index e2ac58a39bb2..287f59cc5fb9 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1235,7 +1235,7 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @static_size: the size of static percpu area in bytes
  * @reserved_size: the size of reserved percpu area in bytes, 0 for none
- * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @dyn_size: free size for dynamic allocation in bytes
  * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
  * @base_addr: mapped address
  * @unit_map: cpu -> unit map, NULL for sequential mapping
@@ -1252,10 +1252,9 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * limited offset range for symbol relocations to guarantee module
  * percpu symbols fall inside the relocatable range.
  *
- * @dyn_size, if non-negative, determines the number of bytes
- * available for dynamic allocation in the first chunk.  Specifying
- * non-negative value makes percpu leave alone the area beyond
- * @static_size + @reserved_size + @dyn_size.
+ * @dyn_size determines the number of bytes available for dynamic
+ * allocation in the first chunk.  The area between @static_size +
+ * @reserved_size + @dyn_size and @unit_size is unused.
  *
  * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
  * equal to or larger than @static_size + @reserved_size + if
@@ -1276,13 +1275,12 @@ EXPORT_SYMBOL_GPL(free_percpu);
  * percpu access.
  */
 size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
-				     ssize_t dyn_size, size_t unit_size,
+				     size_t dyn_size, size_t unit_size,
 				     void *base_addr, const int *unit_map)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
-	size_t size_sum = static_size + reserved_size +
-			  (dyn_size >= 0 ? dyn_size : 0);
+	size_t size_sum = static_size + reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
 	unsigned int cpu, tcpu;
 	int i;
@@ -1345,9 +1343,6 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
 		BITS_TO_LONGS(pcpu_unit_pages) * sizeof(unsigned long);
 
-	if (dyn_size < 0)
-		dyn_size = pcpu_unit_size - static_size - reserved_size;
-
 	first_vm.flags = VM_ALLOC;
 	first_vm.size = pcpu_chunk_size;
 	first_vm.addr = base_addr;
@@ -1557,6 +1552,8 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 {
 	static struct vm_struct vm;
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
+	ssize_t dyn_size = -1;
+	size_t size_sum, unit_size;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
@@ -1567,8 +1564,9 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-	unit_pages = PFN_UP(max_t(size_t, static_size + reserved_size,
-				  PCPU_MIN_UNIT_SIZE));
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+	unit_pages = unit_size >> PAGE_SHIFT;
 
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0]));
@@ -1591,12 +1589,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * unit_pages << PAGE_SHIFT;
+	vm.size = nr_cpu_ids * unit_size;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
 	for_each_possible_cpu(cpu) {
-		unsigned long unit_addr = (unsigned long)vm.addr +
-			(cpu * unit_pages << PAGE_SHIFT);
+		unsigned long unit_addr =
+			(unsigned long)vm.addr + cpu * unit_size;
 
 		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
@@ -1620,11 +1618,12 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 	}
 
 	/* we're ready, commit */
-	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu\n",
-		unit_pages, psize_str, vm.addr, static_size, reserved_size);
+	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
+		unit_pages, psize_str, vm.addr, static_size, reserved_size,
+		dyn_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, -1,
-				     unit_pages << PAGE_SHIFT, vm.addr, NULL);
+	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
+				     unit_size, vm.addr, NULL);
 	goto out_free_ar;
 
 enomem:
-- 
cgit v1.2.3


From 3cbc85652767c38b252c8de55f9fd180b29e4c0d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:50 +0900
Subject: percpu: add @align to pcpu_fc_alloc_fn_t

pcpu_fc_alloc_fn_t is about to see more interesting usage, add @align
parameter.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c | 4 ++--
 include/linux/percpu.h         | 3 ++-
 mm/percpu.c                    | 4 ++--
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 8aad486c688f..660cde133141 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -126,9 +126,9 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 /*
  * Helpers for first chunk memory allocation
  */
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	return pcpu_alloc_bootmem(cpu, size, size);
+	return pcpu_alloc_bootmem(cpu, size, align);
 }
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 0cfdd14d096a..d385dbcf190b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -71,7 +71,8 @@ extern const char *pcpu_fc_names[PCPU_FC_NR];
 
 extern enum pcpu_fc pcpu_chosen_fc;
 
-typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size);
+typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
+				     size_t align);
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
diff --git a/mm/percpu.c b/mm/percpu.c
index 287f59cc5fb9..3316e3aac7ee 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1578,7 +1578,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		for (i = 0; i < unit_pages; i++) {
 			void *ptr;
 
-			ptr = alloc_fn(cpu, PAGE_SIZE);
+			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
 			if (!ptr) {
 				pr_warning("PERCPU: failed to allocate %s page "
 					   "for cpu%u\n", psize_str, cpu);
@@ -1888,7 +1888,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 				goto found;
 		continue;
 	found:
-		ptr = alloc_fn(cpu, lpage_size);
+		ptr = alloc_fn(cpu, lpage_size, lpage_size);
 		if (!ptr) {
 			pr_warning("PERCPU: failed to allocate large page "
 				   "for cpu%u\n", cpu);
-- 
cgit v1.2.3


From 033e48fb82958053113178264ddb9d5038d5e38b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: move pcpu_lpage_build_unit_map() and pcpul_lpage_dump_cfg()
 upward

Unit map handling will be generalized and extended and used for
embedding sparse first chunk and other purposes.  Relocate two
unit_map related functions upward in preparation.  This patch just
moves the code without any actual change.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  14 +-
 mm/percpu.c            | 339 +++++++++++++++++++++++++------------------------
 2 files changed, 180 insertions(+), 173 deletions(-)

(limited to 'include')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d385dbcf190b..570fb18de2ba 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -78,6 +78,14 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+extern int __init pcpu_lpage_build_unit_map(
+				size_t reserved_size, ssize_t *dyn_sizep,
+				size_t *unit_sizep, size_t lpage_size,
+				int *unit_map,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
+#endif
+
 extern size_t __init pcpu_setup_first_chunk(
 				size_t static_size, size_t reserved_size,
 				size_t dyn_size, size_t unit_size,
@@ -97,12 +105,6 @@ extern ssize_t __init pcpu_page_first_chunk(
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_build_unit_map(
-				size_t reserved_size, ssize_t *dyn_sizep,
-				size_t *unit_sizep, size_t lpage_size,
-				int *unit_map,
-				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-
 extern ssize_t __init pcpu_lpage_first_chunk(
 				size_t reserved_size, size_t dyn_size,
 				size_t unit_size, size_t lpage_size,
diff --git a/mm/percpu.c b/mm/percpu.c
index 3316e3aac7ee..2b9c4b2a2fc0 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1231,6 +1231,178 @@ void free_percpu(void *ptr)
 }
 EXPORT_SYMBOL_GPL(free_percpu);
 
+static inline size_t pcpu_calc_fc_sizes(size_t static_size,
+					size_t reserved_size,
+					ssize_t *dyn_sizep)
+{
+	size_t size_sum;
+
+	size_sum = PFN_ALIGN(static_size + reserved_size +
+			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
+	if (*dyn_sizep != 0)
+		*dyn_sizep = size_sum - static_size - reserved_size;
+
+	return size_sum;
+}
+
+#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
+/**
+ * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * @reserved_size: the size of reserved percpu area in bytes
+ * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
+ * @unit_sizep: out parameter for unit size
+ * @unit_map: unit_map to be filled
+ * @cpu_distance_fn: callback to determine distance between cpus
+ *
+ * This function builds cpu -> unit map and determine other parameters
+ * considering needed percpu size, large page size and distances
+ * between CPUs in NUMA.
+ *
+ * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
+ * may share units in the same large page.  The returned configuration
+ * is guaranteed to have CPUs on different nodes on different large
+ * pages and >=75% usage of allocated virtual address space.
+ *
+ * RETURNS:
+ * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
+ * returns the number of units to be allocated.  -errno on failure.
+ */
+int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
+				     size_t *unit_sizep, size_t lpage_size,
+				     int *unit_map,
+				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+{
+	static int group_map[NR_CPUS] __initdata;
+	static int group_cnt[NR_CPUS] __initdata;
+	const size_t static_size = __per_cpu_end - __per_cpu_start;
+	int group_cnt_max = 0;
+	size_t size_sum, min_unit_size, alloc_size;
+	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
+	int last_allocs;
+	unsigned int cpu, tcpu;
+	int group, unit;
+
+	/*
+	 * Determine min_unit_size, alloc_size and max_upa such that
+	 * alloc_size is multiple of lpage_size and is the smallest
+	 * which can accomodate 4k aligned segments which are equal to
+	 * or larger than min_unit_size.
+	 */
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
+
+	alloc_size = roundup(min_unit_size, lpage_size);
+	upa = alloc_size / min_unit_size;
+	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+		upa--;
+	max_upa = upa;
+
+	/* group cpus according to their proximity */
+	for_each_possible_cpu(cpu) {
+		group = 0;
+	next_group:
+		for_each_possible_cpu(tcpu) {
+			if (cpu == tcpu)
+				break;
+			if (group_map[tcpu] == group &&
+			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
+			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
+				group++;
+				goto next_group;
+			}
+		}
+		group_map[cpu] = group;
+		group_cnt[group]++;
+		group_cnt_max = max(group_cnt_max, group_cnt[group]);
+	}
+
+	/*
+	 * Expand unit size until address space usage goes over 75%
+	 * and then as much as possible without using more address
+	 * space.
+	 */
+	last_allocs = INT_MAX;
+	for (upa = max_upa; upa; upa--) {
+		int allocs = 0, wasted = 0;
+
+		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
+			continue;
+
+		for (group = 0; group_cnt[group]; group++) {
+			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
+			allocs += this_allocs;
+			wasted += this_allocs * upa - group_cnt[group];
+		}
+
+		/*
+		 * Don't accept if wastage is over 25%.  The
+		 * greater-than comparison ensures upa==1 always
+		 * passes the following check.
+		 */
+		if (wasted > num_possible_cpus() / 3)
+			continue;
+
+		/* and then don't consume more memory */
+		if (allocs > last_allocs)
+			break;
+		last_allocs = allocs;
+		best_upa = upa;
+	}
+	*unit_sizep = alloc_size / best_upa;
+
+	/* assign units to cpus accordingly */
+	unit = 0;
+	for (group = 0; group_cnt[group]; group++) {
+		for_each_possible_cpu(cpu)
+			if (group_map[cpu] == group)
+				unit_map[cpu] = unit++;
+		unit = roundup(unit, best_upa);
+	}
+
+	return unit;	/* unit contains aligned number of units */
+}
+
+static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+				     unsigned int *cpup);
+
+static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
+					size_t reserved_size, size_t dyn_size,
+					size_t unit_size, size_t lpage_size,
+					const int *unit_map, int nr_units)
+{
+	int width = 1, v = nr_units;
+	char empty_str[] = "--------";
+	int upl, lpl;	/* units per lpage, lpage per line */
+	unsigned int cpu;
+	int lpage, unit;
+
+	while (v /= 10)
+		width++;
+	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+
+	upl = max_t(int, lpage_size / unit_size, 1);
+	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+
+	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
+	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+
+	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
+		if (!(unit % upl)) {
+			if (!(lpage++ % lpl)) {
+				printk("\n");
+				printk("%spcpu-lpage: ", lvl);
+			} else
+				printk("| ");
+		}
+		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			printk("%0*d ", width, cpu);
+		else
+			printk("%s ", empty_str);
+	}
+	printk("\n");
+}
+#endif
+
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
  * @static_size: the size of static percpu area in bytes
@@ -1441,20 +1613,6 @@ static int __init percpu_alloc_setup(char *str)
 }
 early_param("percpu_alloc", percpu_alloc_setup);
 
-static inline size_t pcpu_calc_fc_sizes(size_t static_size,
-					size_t reserved_size,
-					ssize_t *dyn_sizep)
-{
-	size_t size_sum;
-
-	size_sum = PFN_ALIGN(static_size + reserved_size +
-			     (*dyn_sizep >= 0 ? *dyn_sizep : 0));
-	if (*dyn_sizep != 0)
-		*dyn_sizep = size_sum - static_size - reserved_size;
-
-	return size_sum;
-}
-
 #if defined(CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK) || \
 	!defined(CONFIG_HAVE_SETUP_PER_CPU_AREA)
 /**
@@ -1637,122 +1795,6 @@ out_free_ar:
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-/**
- * pcpu_lpage_build_unit_map - build unit_map for large page remapping
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
- * @unit_sizep: out parameter for unit size
- * @unit_map: unit_map to be filled
- * @cpu_distance_fn: callback to determine distance between cpus
- *
- * This function builds cpu -> unit map and determine other parameters
- * considering needed percpu size, large page size and distances
- * between CPUs in NUMA.
- *
- * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
- * may share units in the same large page.  The returned configuration
- * is guaranteed to have CPUs on different nodes on different large
- * pages and >=75% usage of allocated virtual address space.
- *
- * RETURNS:
- * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
- * returns the number of units to be allocated.  -errno on failure.
- */
-int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
-				     size_t *unit_sizep, size_t lpage_size,
-				     int *unit_map,
-				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
-{
-	static int group_map[NR_CPUS] __initdata;
-	static int group_cnt[NR_CPUS] __initdata;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	int group_cnt_max = 0;
-	size_t size_sum, min_unit_size, alloc_size;
-	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
-	int last_allocs;
-	unsigned int cpu, tcpu;
-	int group, unit;
-
-	/*
-	 * Determine min_unit_size, alloc_size and max_upa such that
-	 * alloc_size is multiple of lpage_size and is the smallest
-	 * which can accomodate 4k aligned segments which are equal to
-	 * or larger than min_unit_size.
-	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
-	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-
-	alloc_size = roundup(min_unit_size, lpage_size);
-	upa = alloc_size / min_unit_size;
-	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-		upa--;
-	max_upa = upa;
-
-	/* group cpus according to their proximity */
-	for_each_possible_cpu(cpu) {
-		group = 0;
-	next_group:
-		for_each_possible_cpu(tcpu) {
-			if (cpu == tcpu)
-				break;
-			if (group_map[tcpu] == group &&
-			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
-			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
-				group++;
-				goto next_group;
-			}
-		}
-		group_map[cpu] = group;
-		group_cnt[group]++;
-		group_cnt_max = max(group_cnt_max, group_cnt[group]);
-	}
-
-	/*
-	 * Expand unit size until address space usage goes over 75%
-	 * and then as much as possible without using more address
-	 * space.
-	 */
-	last_allocs = INT_MAX;
-	for (upa = max_upa; upa; upa--) {
-		int allocs = 0, wasted = 0;
-
-		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
-			continue;
-
-		for (group = 0; group_cnt[group]; group++) {
-			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
-			allocs += this_allocs;
-			wasted += this_allocs * upa - group_cnt[group];
-		}
-
-		/*
-		 * Don't accept if wastage is over 25%.  The
-		 * greater-than comparison ensures upa==1 always
-		 * passes the following check.
-		 */
-		if (wasted > num_possible_cpus() / 3)
-			continue;
-
-		/* and then don't consume more memory */
-		if (allocs > last_allocs)
-			break;
-		last_allocs = allocs;
-		best_upa = upa;
-	}
-	*unit_sizep = alloc_size / best_upa;
-
-	/* assign units to cpus accordingly */
-	unit = 0;
-	for (group = 0; group_cnt[group]; group++) {
-		for_each_possible_cpu(cpu)
-			if (group_map[cpu] == group)
-				unit_map[cpu] = unit++;
-		unit = roundup(unit, best_upa);
-	}
-
-	return unit;	/* unit contains aligned number of units */
-}
-
 struct pcpul_ent {
 	void		*ptr;
 	void		*map_addr;
@@ -1778,43 +1820,6 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
 	return false;
 }
 
-static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
-					size_t reserved_size, size_t dyn_size,
-					size_t unit_size, size_t lpage_size,
-					const int *unit_map, int nr_units)
-{
-	int width = 1, v = nr_units;
-	char empty_str[] = "--------";
-	int upl, lpl;	/* units per lpage, lpage per line */
-	unsigned int cpu;
-	int lpage, unit;
-
-	while (v /= 10)
-		width++;
-	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
-
-	upl = max_t(int, lpage_size / unit_size, 1);
-	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
-
-	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
-	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
-
-	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
-		if (!(unit % upl)) {
-			if (!(lpage++ % lpl)) {
-				printk("\n");
-				printk("%spcpu-lpage: ", lvl);
-			} else
-				printk("| ");
-		}
-		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
-			printk("%0*d ", width, cpu);
-		else
-			printk("%s ", empty_str);
-	}
-	printk("\n");
-}
-
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
  * @reserved_size: the size of reserved percpu area in bytes
-- 
cgit v1.2.3


From fd1e8a1fe2b54df6c185b4fa65f181f50b9c4d4e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: introduce pcpu_alloc_info and pcpu_group_info

Till now, non-linear cpu->unit map was expressed using an integer
array which maps each cpu to a unit and used only by lpage allocator.
Although how many units have been placed in a single contiguos area
(group) is known while building unit_map, the information is lost when
the result is recorded into the unit_map array.  For lpage allocator,
as all allocations are done by lpages and whether two adjacent lpages
are in the same group or not is irrelevant, this didn't cause any
problem.  Non-linear cpu->unit mapping will be used for sparse
embedding and this grouping information is necessary for that.

This patch introduces pcpu_alloc_info which contains all the
information necessary for initializing percpu allocator.
pcpu_alloc_info contains array of pcpu_group_info which describes how
units are grouped and mapped to cpus.  pcpu_group_info also has
base_offset field to specify its offset from the chunk's base address.
pcpu_build_alloc_info() initializes this field as if all groups are
allocated back-to-back as is currently done but this will be used to
sparsely place groups.

pcpu_alloc_info is a rather complex data structure which contains a
flexible array which in turn points to nested cpu_map arrays.

* pcpu_alloc_alloc_info() and pcpu_free_alloc_info() are provided to
  help dealing with pcpu_alloc_info.

* pcpu_lpage_build_unit_map() is updated to build pcpu_alloc_info,
  generalized and renamed to pcpu_build_alloc_info().
  @cpu_distance_fn may be NULL indicating that all cpus are of
  LOCAL_DISTANCE.

* pcpul_lpage_dump_cfg() is updated to process pcpu_alloc_info,
  generalized and renamed to pcpu_dump_alloc_info().  It now also
  prints which group each alloc unit belongs to.

* pcpu_setup_first_chunk() now takes pcpu_alloc_info instead of the
  separate parameters.  All first chunk allocators are updated to use
  pcpu_build_alloc_info() to build alloc_info and call
  pcpu_setup_first_chunk() with it.  This has the side effect of
  packing units for sparse possible cpus.  ie. if cpus 0, 2 and 4 are
  possible, they'll be assigned unit 0, 1 and 2 instead of 0, 2 and 4.

* x86 setup_pcpu_lpage() is updated to deal with alloc_info.

* sparc64 setup_per_cpu_areas() is updated to build alloc_info.

Although the changes made by this patch are pretty pervasive, it
doesn't cause any behavior difference other than packing of sparse
cpus.  It mostly changes how information is passed among
initialization functions and makes room for more flexibility.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: David Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c     |  24 +-
 arch/x86/kernel/setup_percpu.c |  38 ++-
 include/linux/percpu.h         |  42 +++-
 mm/percpu.c                    | 529 +++++++++++++++++++++++++----------------
 4 files changed, 389 insertions(+), 244 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 9856d866b77b..a42a4a744d14 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1475,17 +1475,29 @@ static void __init pcpu_map_range(unsigned long start, unsigned long end,
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t dyn_size, static_size = __per_cpu_end - __per_cpu_start;
 	static struct vm_struct vm;
+	struct pcpu_alloc_info *ai;
 	unsigned long delta, cpu;
 	size_t size_sum, pcpu_unit_size;
 	size_t ptrs_size;
 	void **ptrs;
 
-	size_sum = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
+	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
+
+	ai->static_size = __per_cpu_end - __per_cpu_start;
+	ai->reserved_size = PERCPU_MODULE_RESERVE;
+
+	size_sum = PFN_ALIGN(ai->static_size + ai->reserved_size +
 			     PERCPU_DYNAMIC_RESERVE);
-	dyn_size = size_sum - static_size - PERCPU_MODULE_RESERVE;
 
+	ai->dyn_size = size_sum - ai->static_size - ai->reserved_size;
+	ai->unit_size = PCPU_CHUNK_SIZE;
+	ai->atom_size = PCPU_CHUNK_SIZE;
+	ai->alloc_size = PCPU_CHUNK_SIZE;
+	ai->groups[0].nr_units = nr_cpu_ids;
+
+	for_each_possible_cpu(cpu)
+		ai->groups[0].cpu_map[cpu] = cpu;
 
 	ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(ptrs[0]));
 	ptrs = alloc_bootmem(ptrs_size);
@@ -1497,7 +1509,7 @@ void __init setup_per_cpu_areas(void)
 		free_bootmem(__pa(ptrs[cpu] + size_sum),
 			     PCPU_CHUNK_SIZE - size_sum);
 
-		memcpy(ptrs[cpu], __per_cpu_load, static_size);
+		memcpy(ptrs[cpu], __per_cpu_load, ai->static_size);
 	}
 
 	/* allocate address and map */
@@ -1514,9 +1526,7 @@ void __init setup_per_cpu_areas(void)
 		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(static_size,
-						PERCPU_MODULE_RESERVE, dyn_size,
-						PCPU_CHUNK_SIZE, vm.addr, NULL);
+	pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 660cde133141..db5f9c49fec5 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -161,9 +161,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
-	size_t unit_map_size, unit_size;
-	int *unit_map;
-	int nr_units;
+	struct pcpu_alloc_info *ai;
 	ssize_t ret;
 
 	/* on non-NUMA, embedding is better */
@@ -177,26 +175,22 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 	}
 
 	/* allocate and build unit_map */
-	unit_map_size = nr_cpu_ids * sizeof(int);
-	unit_map = alloc_bootmem_nopanic(unit_map_size);
-	if (!unit_map) {
-		pr_warning("PERCPU: failed to allocate unit_map\n");
-		return -ENOMEM;
+	ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
+				   PMD_SIZE, pcpu_lpage_cpu_distance);
+	if (IS_ERR(ai)) {
+		pr_warning("PERCPU: failed to build unit_map (%ld)\n",
+			   PTR_ERR(ai));
+		return PTR_ERR(ai);
 	}
 
-	ret = pcpu_lpage_build_unit_map(PERCPU_FIRST_CHUNK_RESERVE,
-					&dyn_size, &unit_size, PMD_SIZE,
-					unit_map, pcpu_lpage_cpu_distance);
-	if (ret < 0) {
-		pr_warning("PERCPU: failed to build unit_map\n");
-		goto out_free;
-	}
-	nr_units = ret;
-
 	/* do the parameters look okay? */
 	if (!chosen) {
 		size_t vm_size = VMALLOC_END - VMALLOC_START;
-		size_t tot_size = nr_units * unit_size;
+		size_t tot_size = 0;
+		int group;
+
+		for (group = 0; group < ai->nr_groups; group++)
+			tot_size += ai->unit_size * ai->groups[group].nr_units;
 
 		/* don't consume more than 20% of vmalloc area */
 		if (tot_size > vm_size / 5) {
@@ -207,12 +201,10 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
-				     unit_size, PMD_SIZE, unit_map, nr_units,
-				     pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
+	ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free,
+				     pcpul_map);
 out_free:
-	if (ret < 0)
-		free_bootmem(__pa(unit_map), unit_map_size);
+	pcpu_free_alloc_info(ai);
 	return ret;
 }
 #else
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 570fb18de2ba..77b86be8ce4f 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -59,6 +59,25 @@
 extern void *pcpu_base_addr;
 extern const int *pcpu_unit_map;
 
+struct pcpu_group_info {
+	int			nr_units;	/* aligned # of units */
+	unsigned long		base_offset;	/* base address offset */
+	unsigned int		*cpu_map;	/* unit->cpu map, empty
+						 * entries contain NR_CPUS */
+};
+
+struct pcpu_alloc_info {
+	size_t			static_size;
+	size_t			reserved_size;
+	size_t			dyn_size;
+	size_t			unit_size;
+	size_t			atom_size;
+	size_t			alloc_size;
+	size_t			__ai_size;	/* internal, don't use */
+	int			nr_groups;	/* 0 if grouping unnecessary */
+	struct pcpu_group_info	groups[];
+};
+
 enum pcpu_fc {
 	PCPU_FC_AUTO,
 	PCPU_FC_EMBED,
@@ -78,18 +97,17 @@ typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
 typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_build_unit_map(
-				size_t reserved_size, ssize_t *dyn_sizep,
-				size_t *unit_sizep, size_t lpage_size,
-				int *unit_map,
+extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
+							     int nr_units);
+extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai);
+
+extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
-#endif
 
-extern size_t __init pcpu_setup_first_chunk(
-				size_t static_size, size_t reserved_size,
-				size_t dyn_size, size_t unit_size,
-				void *base_addr, const int *unit_map);
+extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+					    void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
 extern ssize_t __init pcpu_embed_first_chunk(
@@ -106,9 +124,7 @@ extern ssize_t __init pcpu_page_first_chunk(
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 extern ssize_t __init pcpu_lpage_first_chunk(
-				size_t reserved_size, size_t dyn_size,
-				size_t unit_size, size_t lpage_size,
-				const int *unit_map, int nr_units,
+				const struct pcpu_alloc_info *ai,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 2b9c4b2a2fc0..99f7fa682722 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -58,6 +58,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/bootmem.h>
+#include <linux/err.h>
 #include <linux/list.h>
 #include <linux/log2.h>
 #include <linux/mm.h>
@@ -1245,53 +1246,108 @@ static inline size_t pcpu_calc_fc_sizes(size_t static_size,
 	return size_sum;
 }
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
 /**
- * pcpu_lpage_build_unit_map - build unit_map for large page remapping
+ * pcpu_alloc_alloc_info - allocate percpu allocation info
+ * @nr_groups: the number of groups
+ * @nr_units: the number of units
+ *
+ * Allocate ai which is large enough for @nr_groups groups containing
+ * @nr_units units.  The returned ai's groups[0].cpu_map points to the
+ * cpu_map array which is long enough for @nr_units and filled with
+ * NR_CPUS.  It's the caller's responsibility to initialize cpu_map
+ * pointer of other groups.
+ *
+ * RETURNS:
+ * Pointer to the allocated pcpu_alloc_info on success, NULL on
+ * failure.
+ */
+struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
+						      int nr_units)
+{
+	struct pcpu_alloc_info *ai;
+	size_t base_size, ai_size;
+	void *ptr;
+	int unit;
+
+	base_size = ALIGN(sizeof(*ai) + nr_groups * sizeof(ai->groups[0]),
+			  __alignof__(ai->groups[0].cpu_map[0]));
+	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
+
+	ptr = alloc_bootmem_nopanic(PFN_ALIGN(ai_size));
+	if (!ptr)
+		return NULL;
+	ai = ptr;
+	ptr += base_size;
+
+	ai->groups[0].cpu_map = ptr;
+
+	for (unit = 0; unit < nr_units; unit++)
+		ai->groups[0].cpu_map[unit] = NR_CPUS;
+
+	ai->nr_groups = nr_groups;
+	ai->__ai_size = PFN_ALIGN(ai_size);
+
+	return ai;
+}
+
+/**
+ * pcpu_free_alloc_info - free percpu allocation info
+ * @ai: pcpu_alloc_info to free
+ *
+ * Free @ai which was allocated by pcpu_alloc_alloc_info().
+ */
+void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai)
+{
+	free_bootmem(__pa(ai), ai->__ai_size);
+}
+
+/**
+ * pcpu_build_alloc_info - build alloc_info considering distances between CPUs
  * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_sizep: in/out parameter for dynamic size, -1 for auto
- * @unit_sizep: out parameter for unit size
- * @unit_map: unit_map to be filled
- * @cpu_distance_fn: callback to determine distance between cpus
+ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
  *
- * This function builds cpu -> unit map and determine other parameters
- * considering needed percpu size, large page size and distances
- * between CPUs in NUMA.
+ * This function determines grouping of units, their mappings to cpus
+ * and other parameters considering needed percpu size, allocation
+ * atom size and distances between CPUs.
  *
- * CPUs which are of LOCAL_DISTANCE both ways are grouped together and
- * may share units in the same large page.  The returned configuration
- * is guaranteed to have CPUs on different nodes on different large
- * pages and >=75% usage of allocated virtual address space.
+ * Groups are always mutliples of atom size and CPUs which are of
+ * LOCAL_DISTANCE both ways are grouped together and share space for
+ * units in the same group.  The returned configuration is guaranteed
+ * to have CPUs on different nodes on different groups and >=75% usage
+ * of allocated virtual address space.
  *
  * RETURNS:
- * On success, fills in @unit_map, sets *@dyn_sizep, *@unit_sizep and
- * returns the number of units to be allocated.  -errno on failure.
+ * On success, pointer to the new allocation_info is returned.  On
+ * failure, ERR_PTR value is returned.
  */
-int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
-				     size_t *unit_sizep, size_t lpage_size,
-				     int *unit_map,
-				     pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
+struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+				size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
 	static int group_map[NR_CPUS] __initdata;
 	static int group_cnt[NR_CPUS] __initdata;
 	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	int group_cnt_max = 0;
+	int group_cnt_max = 0, nr_groups = 1, nr_units = 0;
 	size_t size_sum, min_unit_size, alloc_size;
 	int upa, max_upa, uninitialized_var(best_upa);	/* units_per_alloc */
-	int last_allocs;
+	int last_allocs, group, unit;
 	unsigned int cpu, tcpu;
-	int group, unit;
+	struct pcpu_alloc_info *ai;
+	unsigned int *cpu_map;
 
 	/*
 	 * Determine min_unit_size, alloc_size and max_upa such that
-	 * alloc_size is multiple of lpage_size and is the smallest
+	 * alloc_size is multiple of atom_size and is the smallest
 	 * which can accomodate 4k aligned segments which are equal to
 	 * or larger than min_unit_size.
 	 */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, dyn_sizep);
+	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
 	min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
 
-	alloc_size = roundup(min_unit_size, lpage_size);
+	alloc_size = roundup(min_unit_size, atom_size);
 	upa = alloc_size / min_unit_size;
 	while (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
 		upa--;
@@ -1304,10 +1360,11 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		for_each_possible_cpu(tcpu) {
 			if (cpu == tcpu)
 				break;
-			if (group_map[tcpu] == group &&
+			if (group_map[tcpu] == group && cpu_distance_fn &&
 			    (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
 			     cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
 				group++;
+				nr_groups = max(nr_groups, group + 1);
 				goto next_group;
 			}
 		}
@@ -1328,7 +1385,7 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		if (alloc_size % upa || ((alloc_size / upa) & ~PAGE_MASK))
 			continue;
 
-		for (group = 0; group_cnt[group]; group++) {
+		for (group = 0; group < nr_groups; group++) {
 			int this_allocs = DIV_ROUND_UP(group_cnt[group], upa);
 			allocs += this_allocs;
 			wasted += this_allocs * upa - group_cnt[group];
@@ -1348,75 +1405,122 @@ int __init pcpu_lpage_build_unit_map(size_t reserved_size, ssize_t *dyn_sizep,
 		last_allocs = allocs;
 		best_upa = upa;
 	}
-	*unit_sizep = alloc_size / best_upa;
+	upa = best_upa;
+
+	/* allocate and fill alloc_info */
+	for (group = 0; group < nr_groups; group++)
+		nr_units += roundup(group_cnt[group], upa);
+
+	ai = pcpu_alloc_alloc_info(nr_groups, nr_units);
+	if (!ai)
+		return ERR_PTR(-ENOMEM);
+	cpu_map = ai->groups[0].cpu_map;
+
+	for (group = 0; group < nr_groups; group++) {
+		ai->groups[group].cpu_map = cpu_map;
+		cpu_map += roundup(group_cnt[group], upa);
+	}
+
+	ai->static_size = static_size;
+	ai->reserved_size = reserved_size;
+	ai->dyn_size = dyn_size;
+	ai->unit_size = alloc_size / upa;
+	ai->atom_size = atom_size;
+	ai->alloc_size = alloc_size;
+
+	for (group = 0, unit = 0; group_cnt[group]; group++) {
+		struct pcpu_group_info *gi = &ai->groups[group];
+
+		/*
+		 * Initialize base_offset as if all groups are located
+		 * back-to-back.  The caller should update this to
+		 * reflect actual allocation.
+		 */
+		gi->base_offset = unit * ai->unit_size;
 
-	/* assign units to cpus accordingly */
-	unit = 0;
-	for (group = 0; group_cnt[group]; group++) {
 		for_each_possible_cpu(cpu)
 			if (group_map[cpu] == group)
-				unit_map[cpu] = unit++;
-		unit = roundup(unit, best_upa);
+				gi->cpu_map[gi->nr_units++] = cpu;
+		gi->nr_units = roundup(gi->nr_units, upa);
+		unit += gi->nr_units;
 	}
+	BUG_ON(unit != nr_units);
 
-	return unit;	/* unit contains aligned number of units */
+	return ai;
 }
 
-static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
-				     unsigned int *cpup);
-
-static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
-					size_t reserved_size, size_t dyn_size,
-					size_t unit_size, size_t lpage_size,
-					const int *unit_map, int nr_units)
+/**
+ * pcpu_dump_alloc_info - print out information about pcpu_alloc_info
+ * @lvl: loglevel
+ * @ai: allocation info to dump
+ *
+ * Print out information about @ai using loglevel @lvl.
+ */
+static void pcpu_dump_alloc_info(const char *lvl,
+				 const struct pcpu_alloc_info *ai)
 {
-	int width = 1, v = nr_units;
+	int group_width = 1, cpu_width = 1, width;
 	char empty_str[] = "--------";
-	int upl, lpl;	/* units per lpage, lpage per line */
-	unsigned int cpu;
-	int lpage, unit;
+	int alloc = 0, alloc_end = 0;
+	int group, v;
+	int upa, apl;	/* units per alloc, allocs per line */
+
+	v = ai->nr_groups;
+	while (v /= 10)
+		group_width++;
 
+	v = num_possible_cpus();
 	while (v /= 10)
-		width++;
-	empty_str[min_t(int, width, sizeof(empty_str) - 1)] = '\0';
+		cpu_width++;
+	empty_str[min_t(int, cpu_width, sizeof(empty_str) - 1)] = '\0';
 
-	upl = max_t(int, lpage_size / unit_size, 1);
-	lpl = rounddown_pow_of_two(max_t(int, 60 / (upl * (width + 1) + 2), 1));
+	upa = ai->alloc_size / ai->unit_size;
+	width = upa * (cpu_width + 1) + group_width + 3;
+	apl = rounddown_pow_of_two(max(60 / width, 1));
 
-	printk("%spcpu-lpage: sta/res/dyn=%zu/%zu/%zu unit=%zu lpage=%zu", lvl,
-	       static_size, reserved_size, dyn_size, unit_size, lpage_size);
+	printk("%spcpu-alloc: s%zu r%zu d%zu u%zu alloc=%zu*%zu",
+	       lvl, ai->static_size, ai->reserved_size, ai->dyn_size,
+	       ai->unit_size, ai->alloc_size / ai->atom_size, ai->atom_size);
 
-	for (lpage = 0, unit = 0; unit < nr_units; unit++) {
-		if (!(unit % upl)) {
-			if (!(lpage++ % lpl)) {
+	for (group = 0; group < ai->nr_groups; group++) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+		int unit = 0, unit_end = 0;
+
+		BUG_ON(gi->nr_units % upa);
+		for (alloc_end += gi->nr_units / upa;
+		     alloc < alloc_end; alloc++) {
+			if (!(alloc % apl)) {
 				printk("\n");
-				printk("%spcpu-lpage: ", lvl);
-			} else
-				printk("| ");
+				printk("%spcpu-alloc: ", lvl);
+			}
+			printk("[%0*d] ", group_width, group);
+
+			for (unit_end += upa; unit < unit_end; unit++)
+				if (gi->cpu_map[unit] != NR_CPUS)
+					printk("%0*d ", cpu_width,
+					       gi->cpu_map[unit]);
+				else
+					printk("%s ", empty_str);
 		}
-		if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
-			printk("%0*d ", width, cpu);
-		else
-			printk("%s ", empty_str);
 	}
 	printk("\n");
 }
-#endif
 
 /**
  * pcpu_setup_first_chunk - initialize the first percpu chunk
- * @static_size: the size of static percpu area in bytes
- * @reserved_size: the size of reserved percpu area in bytes, 0 for none
- * @dyn_size: free size for dynamic allocation in bytes
- * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE
+ * @ai: pcpu_alloc_info describing how to percpu area is shaped
  * @base_addr: mapped address
- * @unit_map: cpu -> unit map, NULL for sequential mapping
  *
  * Initialize the first percpu chunk which contains the kernel static
  * perpcu area.  This function is to be called from arch percpu area
  * setup path.
  *
- * @reserved_size, if non-zero, specifies the amount of bytes to
+ * @ai contains all information necessary to initialize the first
+ * chunk and prime the dynamic percpu allocator.
+ *
+ * @ai->static_size is the size of static percpu area.
+ *
+ * @ai->reserved_size, if non-zero, specifies the amount of bytes to
  * reserve after the static area in the first chunk.  This reserves
  * the first chunk such that it's available only through reserved
  * percpu allocation.  This is primarily used to serve module percpu
@@ -1424,13 +1528,26 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * limited offset range for symbol relocations to guarantee module
  * percpu symbols fall inside the relocatable range.
  *
- * @dyn_size determines the number of bytes available for dynamic
- * allocation in the first chunk.  The area between @static_size +
- * @reserved_size + @dyn_size and @unit_size is unused.
+ * @ai->dyn_size determines the number of bytes available for dynamic
+ * allocation in the first chunk.  The area between @ai->static_size +
+ * @ai->reserved_size + @ai->dyn_size and @ai->unit_size is unused.
  *
- * @unit_size specifies unit size and must be aligned to PAGE_SIZE and
- * equal to or larger than @static_size + @reserved_size + if
- * non-negative, @dyn_size.
+ * @ai->unit_size specifies unit size and must be aligned to PAGE_SIZE
+ * and equal to or larger than @ai->static_size + @ai->reserved_size +
+ * @ai->dyn_size.
+ *
+ * @ai->atom_size is the allocation atom size and used as alignment
+ * for vm areas.
+ *
+ * @ai->alloc_size is the allocation size and always multiple of
+ * @ai->atom_size.  This is larger than @ai->atom_size if
+ * @ai->unit_size is larger than @ai->atom_size.
+ *
+ * @ai->nr_groups and @ai->groups describe virtual memory layout of
+ * percpu areas.  Units which should be colocated are put into the
+ * same group.  Dynamic VM areas will be allocated according to these
+ * groupings.  If @ai->nr_groups is zero, a single group containing
+ * all units is assumed.
  *
  * The caller should have mapped the first chunk at @base_addr and
  * copied static data to each unit.
@@ -1446,70 +1563,63 @@ static void __init pcpul_lpage_dump_cfg(const char *lvl, size_t static_size,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access.
  */
-size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
-				     size_t dyn_size, size_t unit_size,
-				     void *base_addr, const int *unit_map)
+size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+				     void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
-	size_t size_sum = static_size + reserved_size + dyn_size;
+	size_t dyn_size = ai->dyn_size;
+	size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
-	unsigned int cpu, tcpu;
-	int i;
+	unsigned int cpu;
+	int *unit_map;
+	int group, unit, i;
 
 	/* sanity checks */
 	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
 		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
-	BUG_ON(!static_size);
+	BUG_ON(ai->nr_groups <= 0);
+	BUG_ON(!ai->static_size);
 	BUG_ON(!base_addr);
-	BUG_ON(unit_size < size_sum);
-	BUG_ON(unit_size & ~PAGE_MASK);
-	BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
+	BUG_ON(ai->unit_size < size_sum);
+	BUG_ON(ai->unit_size & ~PAGE_MASK);
+	BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE);
+
+	pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
 	/* determine number of units and verify and initialize pcpu_unit_map */
-	if (unit_map) {
-		int first_unit = INT_MAX, last_unit = INT_MIN;
-
-		for_each_possible_cpu(cpu) {
-			int unit = unit_map[cpu];
-
-			BUG_ON(unit < 0);
-			for_each_possible_cpu(tcpu) {
-				if (tcpu == cpu)
-					break;
-				/* the mapping should be one-to-one */
-				BUG_ON(unit_map[tcpu] == unit);
-			}
+	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
 
-			if (unit < first_unit) {
-				pcpu_first_unit_cpu = cpu;
-				first_unit = unit;
-			}
-			if (unit > last_unit) {
-				pcpu_last_unit_cpu = cpu;
-				last_unit = unit;
-			}
-		}
-		pcpu_nr_units = last_unit + 1;
-		pcpu_unit_map = unit_map;
-	} else {
-		int *identity_map;
+	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
+		unit_map[cpu] = NR_CPUS;
+	pcpu_first_unit_cpu = NR_CPUS;
 
-		/* #units == #cpus, identity mapped */
-		identity_map = alloc_bootmem(nr_cpu_ids *
-					     sizeof(identity_map[0]));
+	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
 
-		for_each_possible_cpu(cpu)
-			identity_map[cpu] = cpu;
+		for (i = 0; i < gi->nr_units; i++) {
+			cpu = gi->cpu_map[i];
+			if (cpu == NR_CPUS)
+				continue;
 
-		pcpu_first_unit_cpu = 0;
-		pcpu_last_unit_cpu = pcpu_nr_units - 1;
-		pcpu_nr_units = nr_cpu_ids;
-		pcpu_unit_map = identity_map;
+			BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu));
+			BUG_ON(unit_map[cpu] != NR_CPUS);
+
+			unit_map[cpu] = unit + i;
+			if (pcpu_first_unit_cpu == NR_CPUS)
+				pcpu_first_unit_cpu = cpu;
+		}
 	}
+	pcpu_last_unit_cpu = cpu;
+	pcpu_nr_units = unit;
+
+	for_each_possible_cpu(cpu)
+		BUG_ON(unit_map[cpu] == NR_CPUS);
+
+	pcpu_unit_map = unit_map;
 
 	/* determine basic parameters */
-	pcpu_unit_pages = unit_size >> PAGE_SHIFT;
+	pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
 	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
 	pcpu_chunk_size = pcpu_nr_units * pcpu_unit_size;
 	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) +
@@ -1543,17 +1653,17 @@ size_t __init pcpu_setup_first_chunk(size_t static_size, size_t reserved_size,
 	schunk->immutable = true;
 	bitmap_fill(schunk->populated, pcpu_unit_pages);
 
-	if (reserved_size) {
-		schunk->free_size = reserved_size;
+	if (ai->reserved_size) {
+		schunk->free_size = ai->reserved_size;
 		pcpu_reserved_chunk = schunk;
-		pcpu_reserved_chunk_limit = static_size + reserved_size;
+		pcpu_reserved_chunk_limit = ai->static_size + ai->reserved_size;
 	} else {
 		schunk->free_size = dyn_size;
 		dyn_size = 0;			/* dynamic area covered */
 	}
 	schunk->contig_hint = schunk->free_size;
 
-	schunk->map[schunk->map_used++] = -static_size;
+	schunk->map[schunk->map_used++] = -ai->static_size;
 	if (schunk->free_size)
 		schunk->map[schunk->map_used++] = schunk->free_size;
 
@@ -1643,44 +1753,47 @@ early_param("percpu_alloc", percpu_alloc_setup);
  */
 ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	size_t size_sum, unit_size, chunk_size;
+	struct pcpu_alloc_info *ai;
+	size_t size_sum, chunk_size;
 	void *base;
-	unsigned int cpu;
+	int unit;
+	ssize_t ret;
 
-	/* determine parameters and allocate */
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
+	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
+	if (IS_ERR(ai))
+		return PTR_ERR(ai);
+	BUG_ON(ai->nr_groups != 1);
+	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
 
-	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	chunk_size = unit_size * nr_cpu_ids;
+	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
+	chunk_size = ai->unit_size * num_possible_cpus();
 
 	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
 				       __pa(MAX_DMA_ADDRESS));
 	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_free_ai;
 	}
 
 	/* return the leftover and copy */
-	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
-		void *ptr = base + cpu * unit_size;
-
-		if (cpu_possible(cpu)) {
-			free_bootmem(__pa(ptr + size_sum),
-				     unit_size - size_sum);
-			memcpy(ptr, __per_cpu_load, static_size);
-		} else
-			free_bootmem(__pa(ptr), unit_size);
+	for (unit = 0; unit < num_possible_cpus(); unit++) {
+		void *ptr = base + unit * ai->unit_size;
+
+		free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum);
+		memcpy(ptr, __per_cpu_load, ai->static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
-		PFN_DOWN(size_sum), base, static_size, reserved_size, dyn_size,
-		unit_size);
+		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
+		ai->dyn_size, ai->unit_size);
 
-	return pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				      unit_size, base, NULL);
+	ret = pcpu_setup_first_chunk(ai, base);
+out_free_ai:
+	pcpu_free_alloc_info(ai);
+	return ret;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
 	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
@@ -1709,31 +1822,34 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	ssize_t dyn_size = -1;
-	size_t size_sum, unit_size;
+	struct pcpu_alloc_info *ai;
 	char psize_str[16];
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
-	unsigned int cpu;
-	int i, j;
+	int unit, i, j;
 	ssize_t ret;
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
-	size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size);
-	unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
-	unit_pages = unit_size >> PAGE_SHIFT;
+	ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL);
+	if (IS_ERR(ai))
+		return PTR_ERR(ai);
+	BUG_ON(ai->nr_groups != 1);
+	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
+
+	unit_pages = ai->unit_size >> PAGE_SHIFT;
 
 	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(unit_pages * nr_cpu_ids * sizeof(pages[0]));
+	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
+			       sizeof(pages[0]));
 	pages = alloc_bootmem(pages_size);
 
 	/* allocate pages */
 	j = 0;
-	for_each_possible_cpu(cpu)
+	for (unit = 0; unit < num_possible_cpus(); unit++)
 		for (i = 0; i < unit_pages; i++) {
+			unsigned int cpu = ai->groups[0].cpu_map[unit];
 			void *ptr;
 
 			ptr = alloc_fn(cpu, PAGE_SIZE, PAGE_SIZE);
@@ -1747,18 +1863,18 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 
 	/* allocate vm area, map the pages and copy static data */
 	vm.flags = VM_ALLOC;
-	vm.size = nr_cpu_ids * unit_size;
+	vm.size = num_possible_cpus() * ai->unit_size;
 	vm_area_register_early(&vm, PAGE_SIZE);
 
-	for_each_possible_cpu(cpu) {
+	for (unit = 0; unit < num_possible_cpus(); unit++) {
 		unsigned long unit_addr =
-			(unsigned long)vm.addr + cpu * unit_size;
+			(unsigned long)vm.addr + unit * ai->unit_size;
 
 		for (i = 0; i < unit_pages; i++)
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr, &pages[cpu * unit_pages],
+		ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
 				       unit_pages);
 		if (ret < 0)
 			panic("failed to map percpu area, err=%zd\n", ret);
@@ -1772,16 +1888,15 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		 */
 
 		/* copy static data */
-		memcpy((void *)unit_addr, __per_cpu_load, static_size);
+		memcpy((void *)unit_addr, __per_cpu_load, ai->static_size);
 	}
 
 	/* we're ready, commit */
 	pr_info("PERCPU: %d %s pages/cpu @%p s%zu r%zu d%zu\n",
-		unit_pages, psize_str, vm.addr, static_size, reserved_size,
-		dyn_size);
+		unit_pages, psize_str, vm.addr, ai->static_size,
+		ai->reserved_size, ai->dyn_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     unit_size, vm.addr, NULL);
+	ret = pcpu_setup_first_chunk(ai, vm.addr);
 	goto out_free_ar;
 
 enomem:
@@ -1790,6 +1905,7 @@ enomem:
 	ret = -ENOMEM;
 out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
+	pcpu_free_alloc_info(ai);
 	return ret;
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
@@ -1805,38 +1921,50 @@ static size_t pcpul_lpage_size;
 static int pcpul_nr_lpages;
 static struct pcpul_ent *pcpul_map;
 
-static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
+static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
 				     unsigned int *cpup)
 {
-	unsigned int cpu;
+	int group, cunit;
 
-	for_each_possible_cpu(cpu)
-		if (unit_map[cpu] == unit) {
+	for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+
+		if (unit < cunit + gi->nr_units) {
 			if (cpup)
-				*cpup = cpu;
+				*cpup = gi->cpu_map[unit - cunit];
 			return true;
 		}
+		cunit += gi->nr_units;
+	}
 
 	return false;
 }
 
+static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
+{
+	int group, unit, i;
+
+	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
+		const struct pcpu_group_info *gi = &ai->groups[group];
+
+		for (i = 0; i < gi->nr_units; i++)
+			if (gi->cpu_map[i] == cpu)
+				return unit + i;
+	}
+	BUG();
+}
+
 /**
  * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @reserved_size: the size of reserved percpu area in bytes
- * @dyn_size: free size for dynamic allocation in bytes
- * @unit_size: unit size in bytes
- * @lpage_size: the size of a large page
- * @unit_map: cpu -> unit mapping
- * @nr_units: the number of units
+ * @ai: pcpu_alloc_info
  * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
  * @free_fn: function to free percpu memory, @size <= lpage_size
  * @map_fn: function to map percpu lpage, always called with lpage_size
  *
  * This allocator uses large page to build and map the first chunk.
- * Unlike other helpers, the caller should always specify @dyn_size
- * and @unit_size.  These parameters along with @unit_map and
- * @nr_units can be determined using pcpu_lpage_build_unit_map().
- * This two stage initialization is to allow arch code to evaluate the
+ * Unlike other helpers, the caller should provide fully initialized
+ * @ai.  This can be done using pcpu_build_alloc_info().  This two
+ * stage initialization is to allow arch code to evaluate the
  * parameters before committing to it.
  *
  * Large pages are allocated as directed by @unit_map and other
@@ -1852,27 +1980,26 @@ static bool __init pcpul_unit_to_cpu(int unit, const int *unit_map,
  * The determined pcpu_unit_size which can be used to initialize
  * percpu access on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
-				      size_t unit_size, size_t lpage_size,
-				      const int *unit_map, int nr_units,
+ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 				      pcpu_fc_alloc_fn_t alloc_fn,
 				      pcpu_fc_free_fn_t free_fn,
 				      pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
-	const size_t static_size = __per_cpu_end - __per_cpu_start;
-	size_t chunk_size = unit_size * nr_units;
-	size_t map_size;
+	const size_t lpage_size = ai->atom_size;
+	size_t chunk_size, map_size;
 	unsigned int cpu;
 	ssize_t ret;
-	int i, j, unit;
+	int i, j, unit, nr_units;
 
-	pcpul_lpage_dump_cfg(KERN_DEBUG, static_size, reserved_size, dyn_size,
-			     unit_size, lpage_size, unit_map, nr_units);
+	nr_units = 0;
+	for (i = 0; i < ai->nr_groups; i++)
+		nr_units += ai->groups[i].nr_units;
 
+	chunk_size = ai->unit_size * nr_units;
 	BUG_ON(chunk_size % lpage_size);
 
-	pcpul_size = static_size + reserved_size + dyn_size;
+	pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
 	pcpul_lpage_size = lpage_size;
 	pcpul_nr_lpages = chunk_size / lpage_size;
 
@@ -1883,13 +2010,13 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	/* allocate all pages */
 	for (i = 0; i < pcpul_nr_lpages; i++) {
 		size_t offset = i * lpage_size;
-		int first_unit = offset / unit_size;
-		int last_unit = (offset + lpage_size - 1) / unit_size;
+		int first_unit = offset / ai->unit_size;
+		int last_unit = (offset + lpage_size - 1) / ai->unit_size;
 		void *ptr;
 
 		/* find out which cpu is mapped to this unit */
 		for (unit = first_unit; unit <= last_unit; unit++)
-			if (pcpul_unit_to_cpu(unit, unit_map, &cpu))
+			if (pcpul_unit_to_cpu(unit, ai, &cpu))
 				goto found;
 		continue;
 	found:
@@ -1905,12 +2032,12 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 
 	/* return unused holes */
 	for (unit = 0; unit < nr_units; unit++) {
-		size_t start = unit * unit_size;
-		size_t end = start + unit_size;
+		size_t start = unit * ai->unit_size;
+		size_t end = start + ai->unit_size;
 		size_t off, next;
 
 		/* don't free used part of occupied unit */
-		if (pcpul_unit_to_cpu(unit, unit_map, NULL))
+		if (pcpul_unit_to_cpu(unit, ai, NULL))
 			start += pcpul_size;
 
 		/* unit can span more than one page, punch the holes */
@@ -1925,7 +2052,7 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	/* allocate address, map and copy */
 	vm.flags = VM_ALLOC;
 	vm.size = chunk_size;
-	vm_area_register_early(&vm, unit_size);
+	vm_area_register_early(&vm, ai->unit_size);
 
 	for (i = 0; i < pcpul_nr_lpages; i++) {
 		if (!pcpul_map[i].ptr)
@@ -1935,15 +2062,15 @@ ssize_t __init pcpu_lpage_first_chunk(size_t reserved_size, size_t dyn_size,
 	}
 
 	for_each_possible_cpu(cpu)
-		memcpy(vm.addr + unit_map[cpu] * unit_size, __per_cpu_load,
-		       static_size);
+		memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
+		       __per_cpu_load, ai->static_size);
 
 	/* we're ready, commit */
 	pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
-		vm.addr, static_size, reserved_size, dyn_size, unit_size);
+		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
+		ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(static_size, reserved_size, dyn_size,
-				     unit_size, vm.addr, unit_map);
+	ret = pcpu_setup_first_chunk(ai, vm.addr);
 
 	/*
 	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
-- 
cgit v1.2.3


From fb435d5233f8b6f9b93c11d6304d8e98fed03234 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:51 +0900
Subject: percpu: add pcpu_unit_offsets[]

Currently units are mapped sequentially into address space.  This
patch adds pcpu_unit_offsets[] which allows units to be mapped to
arbitrary offsets from the chunk base address.  This is necessary to
allow sparse embedding which might would need to allocate address
ranges and memory areas which aren't aligned to unit size but
allocation atom size (page or large page size).  This also simplifies
things a bit by removing the need to calculate offset from unit
number.

With this change, there's no need for the arch code to know
pcpu_unit_size.  Update pcpu_setup_first_chunk() and first chunk
allocators to return regular 0 or -errno return code instead of unit
size or -errno.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/smp_64.c     | 12 +++---
 arch/x86/kernel/setup_percpu.c | 51 ++++++++++-------------
 include/linux/percpu.h         | 16 ++++---
 mm/percpu.c                    | 95 +++++++++++++++++++++---------------------
 4 files changed, 84 insertions(+), 90 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a42a4a744d14..b03fd362c629 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1478,9 +1478,10 @@ void __init setup_per_cpu_areas(void)
 	static struct vm_struct vm;
 	struct pcpu_alloc_info *ai;
 	unsigned long delta, cpu;
-	size_t size_sum, pcpu_unit_size;
+	size_t size_sum;
 	size_t ptrs_size;
 	void **ptrs;
+	int rc;
 
 	ai = pcpu_alloc_alloc_info(1, nr_cpu_ids);
 
@@ -1526,14 +1527,15 @@ void __init setup_per_cpu_areas(void)
 		pcpu_map_range(start, end, virt_to_page(ptrs[cpu]));
 	}
 
-	pcpu_unit_size = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
+	if (rc)
+		panic("failed to setup percpu first chunk (%d)", rc);
 
 	free_bootmem(__pa(ptrs), ptrs_size);
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
-	for_each_possible_cpu(cpu) {
-		__per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
-	}
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 
 	/* Setup %g5 for the boot cpu.  */
 	__local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index db5f9c49fec5..9becc5d4b518 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -157,12 +157,12 @@ static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
 		return REMOTE_DISTANCE;
 }
 
-static ssize_t __init setup_pcpu_lpage(bool chosen)
+static int __init setup_pcpu_lpage(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 	size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
 	struct pcpu_alloc_info *ai;
-	ssize_t ret;
+	int rc;
 
 	/* on non-NUMA, embedding is better */
 	if (!chosen && !pcpu_need_numa())
@@ -196,19 +196,18 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
 		if (tot_size > vm_size / 5) {
 			pr_info("PERCPU: too large chunk size %zuMB for "
 				"large page remap\n", tot_size >> 20);
-			ret = -EINVAL;
+			rc = -EINVAL;
 			goto out_free;
 		}
 	}
 
-	ret = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free,
-				     pcpul_map);
+	rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
 out_free:
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #else
-static ssize_t __init setup_pcpu_lpage(bool chosen)
+static int __init setup_pcpu_lpage(bool chosen)
 {
 	return -EINVAL;
 }
@@ -222,7 +221,7 @@ static ssize_t __init setup_pcpu_lpage(bool chosen)
  * mapping so that it can use PMD mapping without additional TLB
  * pressure.
  */
-static ssize_t __init setup_pcpu_embed(bool chosen)
+static int __init setup_pcpu_embed(bool chosen)
 {
 	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
 
@@ -250,7 +249,7 @@ static void __init pcpup_populate_pte(unsigned long addr)
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_page(void)
+static int __init setup_pcpu_page(void)
 {
 	return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
 				     pcpu_fc_alloc, pcpu_fc_free,
@@ -274,8 +273,7 @@ void __init setup_per_cpu_areas(void)
 {
 	unsigned int cpu;
 	unsigned long delta;
-	size_t pcpu_unit_size;
-	ssize_t ret;
+	int rc;
 
 	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -285,36 +283,33 @@ void __init setup_per_cpu_areas(void)
 	 * of large page mappings.  Please read comments on top of
 	 * each allocator for details.
 	 */
-	ret = -EINVAL;
+	rc = -EINVAL;
 	if (pcpu_chosen_fc != PCPU_FC_AUTO) {
 		if (pcpu_chosen_fc != PCPU_FC_PAGE) {
 			if (pcpu_chosen_fc == PCPU_FC_LPAGE)
-				ret = setup_pcpu_lpage(true);
+				rc = setup_pcpu_lpage(true);
 			else
-				ret = setup_pcpu_embed(true);
+				rc = setup_pcpu_embed(true);
 
-			if (ret < 0)
-				pr_warning("PERCPU: %s allocator failed (%zd), "
+			if (rc < 0)
+				pr_warning("PERCPU: %s allocator failed (%d), "
 					   "falling back to page size\n",
-					   pcpu_fc_names[pcpu_chosen_fc], ret);
+					   pcpu_fc_names[pcpu_chosen_fc], rc);
 		}
 	} else {
-		ret = setup_pcpu_lpage(false);
-		if (ret < 0)
-			ret = setup_pcpu_embed(false);
+		rc = setup_pcpu_lpage(false);
+		if (rc < 0)
+			rc = setup_pcpu_embed(false);
 	}
-	if (ret < 0)
-		ret = setup_pcpu_page();
-	if (ret < 0)
-		panic("cannot initialize percpu area (err=%zd)", ret);
-
-	pcpu_unit_size = ret;
+	if (rc < 0)
+		rc = setup_pcpu_page();
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
 
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
-		per_cpu_offset(cpu) =
-			delta + pcpu_unit_map[cpu] * pcpu_unit_size;
+		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 77b86be8ce4f..a7ec840f596c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -57,7 +57,7 @@
 #endif
 
 extern void *pcpu_base_addr;
-extern const int *pcpu_unit_map;
+extern const unsigned long *pcpu_unit_offsets;
 
 struct pcpu_group_info {
 	int			nr_units;	/* aligned # of units */
@@ -106,25 +106,23 @@ extern struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 				size_t atom_size,
 				pcpu_fc_cpu_distance_fn_t cpu_distance_fn);
 
-extern size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
-					    void *base_addr);
+extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern ssize_t __init pcpu_embed_first_chunk(
-				size_t reserved_size, ssize_t dyn_size);
+extern int __init pcpu_embed_first_chunk(size_t reserved_size,
+					 ssize_t dyn_size);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
-extern ssize_t __init pcpu_page_first_chunk(
-				size_t reserved_size,
+extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern ssize_t __init pcpu_lpage_first_chunk(
-				const struct pcpu_alloc_info *ai,
+extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 				pcpu_fc_alloc_fn_t alloc_fn,
 				pcpu_fc_free_fn_t free_fn,
 				pcpu_fc_map_fn_t map_fn);
diff --git a/mm/percpu.c b/mm/percpu.c
index 99f7fa682722..653b02c40200 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -117,8 +117,8 @@ static unsigned int pcpu_last_unit_cpu __read_mostly;
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
-/* cpu -> unit map */
-const int *pcpu_unit_map __read_mostly;
+static const int *pcpu_unit_map __read_mostly;		/* cpu -> unit */
+const unsigned long *pcpu_unit_offsets __read_mostly;	/* cpu -> unit offset */
 
 /*
  * The first chunk which always exists.  Note that unlike other
@@ -196,8 +196,8 @@ static int pcpu_page_idx(unsigned int cpu, int page_idx)
 static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 				     unsigned int cpu, int page_idx)
 {
-	return (unsigned long)chunk->vm->addr +
-		(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
+	return (unsigned long)chunk->vm->addr + pcpu_unit_offsets[cpu] +
+		(page_idx << PAGE_SHIFT);
 }
 
 static struct page *pcpu_chunk_page(struct pcpu_chunk *chunk,
@@ -341,7 +341,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 	 * space.  Note that any possible cpu id can be used here, so
 	 * there's no need to worry about preemption or cpu hotplug.
 	 */
-	addr += pcpu_unit_map[smp_processor_id()] * pcpu_unit_size;
+	addr += pcpu_unit_offsets[smp_processor_id()];
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
@@ -1560,17 +1560,17 @@ static void pcpu_dump_alloc_info(const char *lvl,
  * and available for dynamic allocation like any other chunks.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access.
+ * 0 on success, -errno on failure.
  */
-size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
-				     void *base_addr)
+int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
+				  void *base_addr)
 {
 	static struct vm_struct first_vm;
 	static int smap[2], dmap[2];
 	size_t dyn_size = ai->dyn_size;
 	size_t size_sum = ai->static_size + ai->reserved_size + dyn_size;
 	struct pcpu_chunk *schunk, *dchunk = NULL;
+	unsigned long *unit_off;
 	unsigned int cpu;
 	int *unit_map;
 	int group, unit, i;
@@ -1587,8 +1587,9 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	pcpu_dump_alloc_info(KERN_DEBUG, ai);
 
-	/* determine number of units and verify and initialize pcpu_unit_map */
+	/* determine number of units and initialize unit_map and base */
 	unit_map = alloc_bootmem(nr_cpu_ids * sizeof(unit_map[0]));
+	unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0]));
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = NR_CPUS;
@@ -1606,6 +1607,8 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 			BUG_ON(unit_map[cpu] != NR_CPUS);
 
 			unit_map[cpu] = unit + i;
+			unit_off[cpu] = gi->base_offset + i * ai->unit_size;
+
 			if (pcpu_first_unit_cpu == NR_CPUS)
 				pcpu_first_unit_cpu = cpu;
 		}
@@ -1617,6 +1620,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 		BUG_ON(unit_map[cpu] == NR_CPUS);
 
 	pcpu_unit_map = unit_map;
+	pcpu_unit_offsets = unit_off;
 
 	/* determine basic parameters */
 	pcpu_unit_pages = ai->unit_size >> PAGE_SHIFT;
@@ -1688,7 +1692,7 @@ size_t __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 
 	/* we're done */
 	pcpu_base_addr = schunk->vm->addr;
-	return pcpu_unit_size;
+	return 0;
 }
 
 const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
@@ -1748,16 +1752,15 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * size, the leftover is returned to the bootmem allocator.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
+int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 {
 	struct pcpu_alloc_info *ai;
 	size_t size_sum, chunk_size;
 	void *base;
 	int unit;
-	ssize_t ret;
+	int rc;
 
 	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
 	if (IS_ERR(ai))
@@ -1773,7 +1776,7 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 	if (!base) {
 		pr_warning("PERCPU: failed to allocate %zu bytes for "
 			   "embedding\n", chunk_size);
-		ret = -ENOMEM;
+		rc = -ENOMEM;
 		goto out_free_ai;
 	}
 
@@ -1790,10 +1793,10 @@ ssize_t __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
 		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(ai, base);
+	rc = pcpu_setup_first_chunk(ai, base);
 out_free_ai:
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
 	  !CONFIG_HAVE_SETUP_PER_CPU_AREA */
@@ -1813,13 +1816,12 @@ out_free_ai:
  * page-by-page into vmalloc area.
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
-				     pcpu_fc_alloc_fn_t alloc_fn,
-				     pcpu_fc_free_fn_t free_fn,
-				     pcpu_fc_populate_pte_fn_t populate_pte_fn)
+int __init pcpu_page_first_chunk(size_t reserved_size,
+				 pcpu_fc_alloc_fn_t alloc_fn,
+				 pcpu_fc_free_fn_t free_fn,
+				 pcpu_fc_populate_pte_fn_t populate_pte_fn)
 {
 	static struct vm_struct vm;
 	struct pcpu_alloc_info *ai;
@@ -1827,8 +1829,7 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 	int unit_pages;
 	size_t pages_size;
 	struct page **pages;
-	int unit, i, j;
-	ssize_t ret;
+	int unit, i, j, rc;
 
 	snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10);
 
@@ -1874,10 +1875,10 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 			populate_pte_fn(unit_addr + (i << PAGE_SHIFT));
 
 		/* pte already populated, the following shouldn't fail */
-		ret = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
-				       unit_pages);
-		if (ret < 0)
-			panic("failed to map percpu area, err=%zd\n", ret);
+		rc = __pcpu_map_pages(unit_addr, &pages[unit * unit_pages],
+				      unit_pages);
+		if (rc < 0)
+			panic("failed to map percpu area, err=%d\n", rc);
 
 		/*
 		 * FIXME: Archs with virtual cache should flush local
@@ -1896,17 +1897,17 @@ ssize_t __init pcpu_page_first_chunk(size_t reserved_size,
 		unit_pages, psize_str, vm.addr, ai->static_size,
 		ai->reserved_size, ai->dyn_size);
 
-	ret = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
 	goto out_free_ar;
 
 enomem:
 	while (--j >= 0)
 		free_fn(page_address(pages[j]), PAGE_SIZE);
-	ret = -ENOMEM;
+	rc = -ENOMEM;
 out_free_ar:
 	free_bootmem(__pa(pages), pages_size);
 	pcpu_free_alloc_info(ai);
-	return ret;
+	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
@@ -1977,20 +1978,18 @@ static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
  * pcpu_lpage_remapped().
  *
  * RETURNS:
- * The determined pcpu_unit_size which can be used to initialize
- * percpu access on success, -errno on failure.
+ * 0 on success, -errno on failure.
  */
-ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				      pcpu_fc_alloc_fn_t alloc_fn,
-				      pcpu_fc_free_fn_t free_fn,
-				      pcpu_fc_map_fn_t map_fn)
+int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
+				  pcpu_fc_alloc_fn_t alloc_fn,
+				  pcpu_fc_free_fn_t free_fn,
+				  pcpu_fc_map_fn_t map_fn)
 {
 	static struct vm_struct vm;
 	const size_t lpage_size = ai->atom_size;
 	size_t chunk_size, map_size;
 	unsigned int cpu;
-	ssize_t ret;
-	int i, j, unit, nr_units;
+	int i, j, unit, nr_units, rc;
 
 	nr_units = 0;
 	for (i = 0; i < ai->nr_groups; i++)
@@ -2070,7 +2069,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
 		ai->unit_size);
 
-	ret = pcpu_setup_first_chunk(ai, vm.addr);
+	rc = pcpu_setup_first_chunk(ai, vm.addr);
 
 	/*
 	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
@@ -2094,7 +2093,7 @@ ssize_t __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
 	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
 		pcpul_nr_lpages--;
 
-	return ret;
+	return rc;
 
 enomem:
 	for (i = 0; i < pcpul_nr_lpages; i++)
@@ -2166,21 +2165,21 @@ EXPORT_SYMBOL(__per_cpu_offset);
 
 void __init setup_per_cpu_areas(void)
 {
-	ssize_t unit_size;
 	unsigned long delta;
 	unsigned int cpu;
+	int rc;
 
 	/*
 	 * Always reserve area for module percpu variables.  That's
 	 * what the legacy allocator did.
 	 */
-	unit_size = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-					   PERCPU_DYNAMIC_RESERVE);
-	if (unit_size < 0)
+	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+				    PERCPU_DYNAMIC_RESERVE);
+	if (rc < 0)
 		panic("Failed to initialized percpu areas.");
 
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu)
-		__per_cpu_offset[cpu] = delta + cpu * unit_size;
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
 }
 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
-- 
cgit v1.2.3


From ca23e405e06d5fffb005df004c72781f76062f51 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:52 +0900
Subject: vmalloc: implement pcpu_get_vm_areas()

To directly use spread NUMA memories for percpu units, percpu
allocator will be updated to allow sparsely mapping units in a chunk.
As the distances between units can be very large, this makes
allocating single vmap area for each chunk undesirable.  This patch
implements pcpu_get_vm_areas() and pcpu_free_vm_areas() which
allocates and frees sparse congruent vmap areas.

pcpu_get_vm_areas() take @offsets and @sizes array which define
distances and sizes of vmap areas.  It scans down from the top of
vmalloc area looking for the top-most address which can accomodate all
the areas.  The top-down scan is to avoid interacting with regular
vmallocs which can push up these congruent areas up little by little
ending up wasting address space and page table.

To speed up top-down scan, the highest possible address hint is
maintained.  Although the scan is linear from the hint, given the
usual large holes between memory addresses between NUMA nodes, the
scanning is highly likely to finish after finding the first hole for
the last unit which is scanned first.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Nick Piggin <npiggin@suse.de>
---
 include/linux/vmalloc.h |   6 +
 mm/vmalloc.c            | 293 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 299 insertions(+)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index a43ebec3a7b9..227c2a585e4f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -115,4 +115,10 @@ extern rwlock_t vmlist_lock;
 extern struct vm_struct *vmlist;
 extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
 
+struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
+				     const size_t *sizes, int nr_vms,
+				     size_t align, gfp_t gfp_mask);
+
+void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms);
+
 #endif /* _LINUX_VMALLOC_H */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2eb461c3a46e..204b8243d8ab 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -265,6 +265,7 @@ struct vmap_area {
 static DEFINE_SPINLOCK(vmap_area_lock);
 static struct rb_root vmap_area_root = RB_ROOT;
 static LIST_HEAD(vmap_area_list);
+static unsigned long vmap_area_pcpu_hole;
 
 static struct vmap_area *__find_vmap_area(unsigned long addr)
 {
@@ -431,6 +432,15 @@ static void __free_vmap_area(struct vmap_area *va)
 	RB_CLEAR_NODE(&va->rb_node);
 	list_del_rcu(&va->list);
 
+	/*
+	 * Track the highest possible candidate for pcpu area
+	 * allocation.  Areas outside of vmalloc area can be returned
+	 * here too, consider only end addresses which fall inside
+	 * vmalloc area proper.
+	 */
+	if (va->va_end > VMALLOC_START && va->va_end <= VMALLOC_END)
+		vmap_area_pcpu_hole = max(vmap_area_pcpu_hole, va->va_end);
+
 	call_rcu(&va->rcu_head, rcu_free_va);
 }
 
@@ -1038,6 +1048,9 @@ void __init vmalloc_init(void)
 		va->va_end = va->va_start + tmp->size;
 		__insert_vmap_area(va);
 	}
+
+	vmap_area_pcpu_hole = VMALLOC_END;
+
 	vmap_initialized = true;
 }
 
@@ -1821,6 +1834,286 @@ void free_vm_area(struct vm_struct *area)
 }
 EXPORT_SYMBOL_GPL(free_vm_area);
 
+static struct vmap_area *node_to_va(struct rb_node *n)
+{
+	return n ? rb_entry(n, struct vmap_area, rb_node) : NULL;
+}
+
+/**
+ * pvm_find_next_prev - find the next and prev vmap_area surrounding @end
+ * @end: target address
+ * @pnext: out arg for the next vmap_area
+ * @pprev: out arg for the previous vmap_area
+ *
+ * Returns: %true if either or both of next and prev are found,
+ *	    %false if no vmap_area exists
+ *
+ * Find vmap_areas end addresses of which enclose @end.  ie. if not
+ * NULL, *pnext->va_end > @end and *pprev->va_end <= @end.
+ */
+static bool pvm_find_next_prev(unsigned long end,
+			       struct vmap_area **pnext,
+			       struct vmap_area **pprev)
+{
+	struct rb_node *n = vmap_area_root.rb_node;
+	struct vmap_area *va = NULL;
+
+	while (n) {
+		va = rb_entry(n, struct vmap_area, rb_node);
+		if (end < va->va_end)
+			n = n->rb_left;
+		else if (end > va->va_end)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	if (!va)
+		return false;
+
+	if (va->va_end > end) {
+		*pnext = va;
+		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
+	} else {
+		*pprev = va;
+		*pnext = node_to_va(rb_next(&(*pprev)->rb_node));
+	}
+	return true;
+}
+
+/**
+ * pvm_determine_end - find the highest aligned address between two vmap_areas
+ * @pnext: in/out arg for the next vmap_area
+ * @pprev: in/out arg for the previous vmap_area
+ * @align: alignment
+ *
+ * Returns: determined end address
+ *
+ * Find the highest aligned address between *@pnext and *@pprev below
+ * VMALLOC_END.  *@pnext and *@pprev are adjusted so that the aligned
+ * down address is between the end addresses of the two vmap_areas.
+ *
+ * Please note that the address returned by this function may fall
+ * inside *@pnext vmap_area.  The caller is responsible for checking
+ * that.
+ */
+static unsigned long pvm_determine_end(struct vmap_area **pnext,
+				       struct vmap_area **pprev,
+				       unsigned long align)
+{
+	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
+	unsigned long addr;
+
+	if (*pnext)
+		addr = min((*pnext)->va_start & ~(align - 1), vmalloc_end);
+	else
+		addr = vmalloc_end;
+
+	while (*pprev && (*pprev)->va_end > addr) {
+		*pnext = *pprev;
+		*pprev = node_to_va(rb_prev(&(*pnext)->rb_node));
+	}
+
+	return addr;
+}
+
+/**
+ * pcpu_get_vm_areas - allocate vmalloc areas for percpu allocator
+ * @offsets: array containing offset of each area
+ * @sizes: array containing size of each area
+ * @nr_vms: the number of areas to allocate
+ * @align: alignment, all entries in @offsets and @sizes must be aligned to this
+ * @gfp_mask: allocation mask
+ *
+ * Returns: kmalloc'd vm_struct pointer array pointing to allocated
+ *	    vm_structs on success, %NULL on failure
+ *
+ * Percpu allocator wants to use congruent vm areas so that it can
+ * maintain the offsets among percpu areas.  This function allocates
+ * congruent vmalloc areas for it.  These areas tend to be scattered
+ * pretty far, distance between two areas easily going up to
+ * gigabytes.  To avoid interacting with regular vmallocs, these areas
+ * are allocated from top.
+ *
+ * Despite its complicated look, this allocator is rather simple.  It
+ * does everything top-down and scans areas from the end looking for
+ * matching slot.  While scanning, if any of the areas overlaps with
+ * existing vmap_area, the base address is pulled down to fit the
+ * area.  Scanning is repeated till all the areas fit and then all
+ * necessary data structres are inserted and the result is returned.
+ */
+struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
+				     const size_t *sizes, int nr_vms,
+				     size_t align, gfp_t gfp_mask)
+{
+	const unsigned long vmalloc_start = ALIGN(VMALLOC_START, align);
+	const unsigned long vmalloc_end = VMALLOC_END & ~(align - 1);
+	struct vmap_area **vas, *prev, *next;
+	struct vm_struct **vms;
+	int area, area2, last_area, term_area;
+	unsigned long base, start, end, last_end;
+	bool purged = false;
+
+	gfp_mask &= GFP_RECLAIM_MASK;
+
+	/* verify parameters and allocate data structures */
+	BUG_ON(align & ~PAGE_MASK || !is_power_of_2(align));
+	for (last_area = 0, area = 0; area < nr_vms; area++) {
+		start = offsets[area];
+		end = start + sizes[area];
+
+		/* is everything aligned properly? */
+		BUG_ON(!IS_ALIGNED(offsets[area], align));
+		BUG_ON(!IS_ALIGNED(sizes[area], align));
+
+		/* detect the area with the highest address */
+		if (start > offsets[last_area])
+			last_area = area;
+
+		for (area2 = 0; area2 < nr_vms; area2++) {
+			unsigned long start2 = offsets[area2];
+			unsigned long end2 = start2 + sizes[area2];
+
+			if (area2 == area)
+				continue;
+
+			BUG_ON(start2 >= start && start2 < end);
+			BUG_ON(end2 <= end && end2 > start);
+		}
+	}
+	last_end = offsets[last_area] + sizes[last_area];
+
+	if (vmalloc_end - vmalloc_start < last_end) {
+		WARN_ON(true);
+		return NULL;
+	}
+
+	vms = kzalloc(sizeof(vms[0]) * nr_vms, gfp_mask);
+	vas = kzalloc(sizeof(vas[0]) * nr_vms, gfp_mask);
+	if (!vas || !vms)
+		goto err_free;
+
+	for (area = 0; area < nr_vms; area++) {
+		vas[area] = kzalloc(sizeof(struct vmap_area), gfp_mask);
+		vms[area] = kzalloc(sizeof(struct vm_struct), gfp_mask);
+		if (!vas[area] || !vms[area])
+			goto err_free;
+	}
+retry:
+	spin_lock(&vmap_area_lock);
+
+	/* start scanning - we scan from the top, begin with the last area */
+	area = term_area = last_area;
+	start = offsets[area];
+	end = start + sizes[area];
+
+	if (!pvm_find_next_prev(vmap_area_pcpu_hole, &next, &prev)) {
+		base = vmalloc_end - last_end;
+		goto found;
+	}
+	base = pvm_determine_end(&next, &prev, align) - end;
+
+	while (true) {
+		BUG_ON(next && next->va_end <= base + end);
+		BUG_ON(prev && prev->va_end > base + end);
+
+		/*
+		 * base might have underflowed, add last_end before
+		 * comparing.
+		 */
+		if (base + last_end < vmalloc_start + last_end) {
+			spin_unlock(&vmap_area_lock);
+			if (!purged) {
+				purge_vmap_area_lazy();
+				purged = true;
+				goto retry;
+			}
+			goto err_free;
+		}
+
+		/*
+		 * If next overlaps, move base downwards so that it's
+		 * right below next and then recheck.
+		 */
+		if (next && next->va_start < base + end) {
+			base = pvm_determine_end(&next, &prev, align) - end;
+			term_area = area;
+			continue;
+		}
+
+		/*
+		 * If prev overlaps, shift down next and prev and move
+		 * base so that it's right below new next and then
+		 * recheck.
+		 */
+		if (prev && prev->va_end > base + start)  {
+			next = prev;
+			prev = node_to_va(rb_prev(&next->rb_node));
+			base = pvm_determine_end(&next, &prev, align) - end;
+			term_area = area;
+			continue;
+		}
+
+		/*
+		 * This area fits, move on to the previous one.  If
+		 * the previous one is the terminal one, we're done.
+		 */
+		area = (area + nr_vms - 1) % nr_vms;
+		if (area == term_area)
+			break;
+		start = offsets[area];
+		end = start + sizes[area];
+		pvm_find_next_prev(base + end, &next, &prev);
+	}
+found:
+	/* we've found a fitting base, insert all va's */
+	for (area = 0; area < nr_vms; area++) {
+		struct vmap_area *va = vas[area];
+
+		va->va_start = base + offsets[area];
+		va->va_end = va->va_start + sizes[area];
+		__insert_vmap_area(va);
+	}
+
+	vmap_area_pcpu_hole = base + offsets[last_area];
+
+	spin_unlock(&vmap_area_lock);
+
+	/* insert all vm's */
+	for (area = 0; area < nr_vms; area++)
+		insert_vmalloc_vm(vms[area], vas[area], VM_ALLOC,
+				  pcpu_get_vm_areas);
+
+	kfree(vas);
+	return vms;
+
+err_free:
+	for (area = 0; area < nr_vms; area++) {
+		if (vas)
+			kfree(vas[area]);
+		if (vms)
+			kfree(vms[area]);
+	}
+	kfree(vas);
+	kfree(vms);
+	return NULL;
+}
+
+/**
+ * pcpu_free_vm_areas - free vmalloc areas for percpu allocator
+ * @vms: vm_struct pointer array returned by pcpu_get_vm_areas()
+ * @nr_vms: the number of allocated areas
+ *
+ * Free vm_structs and the array allocated by pcpu_get_vm_areas().
+ */
+void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
+{
+	int i;
+
+	for (i = 0; i < nr_vms; i++)
+		free_vm_area(vms[i]);
+	kfree(vms);
+}
 
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
-- 
cgit v1.2.3


From c8826dd538602d730ed2c18c6753f1bbfa6c4933 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:52 +0900
Subject: percpu: update embedding first chunk allocator to handle sparse units

Now that percpu core can handle very sparse units, given that vmalloc
space is large enough, embedding first chunk allocator can use any
memory to build the first chunk.  This patch teaches
pcpu_embed_first_chunk() about distances between cpus and to use
alloc/free callbacks to allocate node specific areas for each group
and use them for the first chunk.

This brings the benefits of embedding allocator to NUMA configurations
- no extra TLB pressure with the flexibility of unified dynamic
allocator and no need to restructure arch code to build memory layout
suitable for percpu.  With units put into atom_size aligned groups
according to cpu distances, using large page for dynamic chunks is
also easily possible with falling back to reuglar pages if large
allocation fails.

Embedding allocator users are converted to specify NULL
cpu_distance_fn, so this patch doesn't cause any visible behavior
difference.  Following patches will convert them.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/kernel/setup_percpu.c |   4 +-
 include/linux/percpu.h         |   7 ++-
 mm/percpu.c                    | 113 +++++++++++++++++++++++++++++++----------
 3 files changed, 93 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9becc5d4b518..67f6314de9f1 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -234,7 +234,9 @@ static int __init setup_pcpu_embed(bool chosen)
 		return -EINVAL;
 
 	return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE);
+				      reserve - PERCPU_FIRST_CHUNK_RESERVE,
+				      PAGE_SIZE, NULL, pcpu_fc_alloc,
+				      pcpu_fc_free);
 }
 
 /*
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index a7ec840f596c..25359932740e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -110,8 +110,11 @@ extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 					 void *base_addr);
 
 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK
-extern int __init pcpu_embed_first_chunk(size_t reserved_size,
-					 ssize_t dyn_size);
+extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+				size_t atom_size,
+				pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				pcpu_fc_alloc_fn_t alloc_fn,
+				pcpu_fc_free_fn_t free_fn);
 #endif
 
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
diff --git a/mm/percpu.c b/mm/percpu.c
index cc9c4c64606d..c2826d05505c 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1747,15 +1747,25 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
  * @reserved_size: the size of reserved percpu area in bytes
  * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
+ * @atom_size: allocation atom size
+ * @cpu_distance_fn: callback to determine distance between cpus, optional
+ * @alloc_fn: function to allocate percpu page
+ * @free_fn: funtion to free percpu page
  *
  * This is a helper to ease setting up embedded first percpu chunk and
  * can be called where pcpu_setup_first_chunk() is expected.
  *
  * If this function is used to setup the first chunk, it is allocated
- * as a contiguous area using bootmem allocator and used as-is without
- * being mapped into vmalloc area.  This enables the first chunk to
- * piggy back on the linear physical mapping which often uses larger
- * page size.
+ * by calling @alloc_fn and used as-is without being mapped into
+ * vmalloc area.  Allocations are always whole multiples of @atom_size
+ * aligned to @atom_size.
+ *
+ * This enables the first chunk to piggy back on the linear physical
+ * mapping which often uses larger page size.  Please note that this
+ * can result in very sparse cpu->unit mapping on NUMA machines thus
+ * requiring large vmalloc address space.  Don't use this allocator if
+ * vmalloc space is not orders of magnitude larger than distances
+ * between node memory addresses (ie. 32bit NUMA machines).
  *
  * When @dyn_size is positive, dynamic area might be larger than
  * specified to fill page alignment.  When @dyn_size is auto,
@@ -1763,53 +1773,88 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * and reserved areas.
  *
  * If the needed size is smaller than the minimum or specified unit
- * size, the leftover is returned to the bootmem allocator.
+ * size, the leftover is returned using @free_fn.
  *
  * RETURNS:
  * 0 on success, -errno on failure.
  */
-int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size)
+int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
+				  size_t atom_size,
+				  pcpu_fc_cpu_distance_fn_t cpu_distance_fn,
+				  pcpu_fc_alloc_fn_t alloc_fn,
+				  pcpu_fc_free_fn_t free_fn)
 {
+	void *base = (void *)ULONG_MAX;
+	void **areas = NULL;
 	struct pcpu_alloc_info *ai;
-	size_t size_sum, chunk_size;
-	void *base;
-	int unit;
-	int rc;
+	size_t size_sum, areas_size;
+	int group, i, rc;
 
-	ai = pcpu_build_alloc_info(reserved_size, dyn_size, PAGE_SIZE, NULL);
+	ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size,
+				   cpu_distance_fn);
 	if (IS_ERR(ai))
 		return PTR_ERR(ai);
-	BUG_ON(ai->nr_groups != 1);
-	BUG_ON(ai->groups[0].nr_units != num_possible_cpus());
 
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
-	chunk_size = ai->unit_size * num_possible_cpus();
+	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	base = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE,
-				       __pa(MAX_DMA_ADDRESS));
-	if (!base) {
-		pr_warning("PERCPU: failed to allocate %zu bytes for "
-			   "embedding\n", chunk_size);
+	areas = alloc_bootmem_nopanic(areas_size);
+	if (!areas) {
 		rc = -ENOMEM;
-		goto out_free_ai;
+		goto out_free;
 	}
 
-	/* return the leftover and copy */
-	for (unit = 0; unit < num_possible_cpus(); unit++) {
-		void *ptr = base + unit * ai->unit_size;
+	/* allocate, copy and determine base address */
+	for (group = 0; group < ai->nr_groups; group++) {
+		struct pcpu_group_info *gi = &ai->groups[group];
+		unsigned int cpu = NR_CPUS;
+		void *ptr;
+
+		for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
+			cpu = gi->cpu_map[i];
+		BUG_ON(cpu == NR_CPUS);
+
+		/* allocate space for the whole group */
+		ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
+		if (!ptr) {
+			rc = -ENOMEM;
+			goto out_free_areas;
+		}
+		areas[group] = ptr;
 
-		free_bootmem(__pa(ptr + size_sum), ai->unit_size - size_sum);
-		memcpy(ptr, __per_cpu_load, ai->static_size);
+		base = min(ptr, base);
+
+		for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
+			if (gi->cpu_map[i] == NR_CPUS) {
+				/* unused unit, free whole */
+				free_fn(ptr, ai->unit_size);
+				continue;
+			}
+			/* copy and return the unused part */
+			memcpy(ptr, __per_cpu_load, ai->static_size);
+			free_fn(ptr + size_sum, ai->unit_size - size_sum);
+		}
 	}
 
-	/* we're ready, commit */
+	/* base address is now known, determine group base offsets */
+	for (group = 0; group < ai->nr_groups; group++)
+		ai->groups[group].base_offset = areas[group] - base;
+
 	pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n",
 		PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,
 		ai->dyn_size, ai->unit_size);
 
 	rc = pcpu_setup_first_chunk(ai, base);
-out_free_ai:
+	goto out_free;
+
+out_free_areas:
+	for (group = 0; group < ai->nr_groups; group++)
+		free_fn(areas[group],
+			ai->groups[group].nr_units * ai->unit_size);
+out_free:
 	pcpu_free_alloc_info(ai);
+	if (areas)
+		free_bootmem(__pa(areas), areas_size);
 	return rc;
 }
 #endif /* CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK ||
@@ -2177,6 +2222,17 @@ void *pcpu_lpage_remapped(void *kaddr)
 unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(__per_cpu_offset);
 
+static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
+				       size_t align)
+{
+	return __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
+}
+
+static void __init pcpu_dfl_fc_free(void *ptr, size_t size)
+{
+	free_bootmem(__pa(ptr), size);
+}
+
 void __init setup_per_cpu_areas(void)
 {
 	unsigned long delta;
@@ -2188,7 +2244,8 @@ void __init setup_per_cpu_areas(void)
 	 * what the legacy allocator did.
 	 */
 	rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-				    PERCPU_DYNAMIC_RESERVE);
+				    PERCPU_DYNAMIC_RESERVE, PAGE_SIZE, NULL,
+				    pcpu_dfl_fc_alloc, pcpu_dfl_fc_free);
 	if (rc < 0)
 		panic("Failed to initialized percpu areas.");
 
-- 
cgit v1.2.3


From e933a73f48e3b2d40cfa56d81e2646f194b5a66a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 14 Aug 2009 15:00:53 +0900
Subject: percpu: kill lpage first chunk allocator

With x86 converted to embedding allocator, lpage doesn't have any user
left.  Kill it along with cpa handling code.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jan Beulich <JBeulich@novell.com>
---
 Documentation/kernel-parameters.txt |  10 +-
 arch/x86/mm/pageattr.c              |  20 +--
 include/linux/percpu.h              |  16 ---
 mm/percpu.c                         | 241 ------------------------------------
 4 files changed, 6 insertions(+), 281 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dee9ce2e6cfa..e710093e3d32 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1920,11 +1920,11 @@ and is between 256 and 4096 characters. It is defined in the file
 			See arch/parisc/kernel/pdc_chassis.c
 
 	percpu_alloc=	Select which percpu first chunk allocator to use.
-			Currently supported values are "embed", "page" and
-			"lpage".  Archs may support subset or none of the
-			selections.  See comments in mm/percpu.c for details
-			on each allocator.  This parameter is primarily	for
-			debugging and performance comparison.
+			Currently supported values are "embed" and "page".
+			Archs may support subset or none of the	selections.
+			See comments in mm/percpu.c for details on each
+			allocator.  This parameter is primarily	for debugging
+			and performance comparison.
 
 	pf.		[PARIDE]
 			See Documentation/blockdev/paride.txt.
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index dce282f65700..f53cfc7f963d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -687,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
 	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr, remapped;
+	unsigned long vaddr;
 	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
@@ -745,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	}
 #endif
 
-	/*
-	 * If the PMD page was partially used for per-cpu remapping,
-	 * the recycled area needs to be split and modified.  Because
-	 * the area is always proper subset of a PMD page
-	 * cpa->numpages is guaranteed to be 1 for these areas, so
-	 * there's no need to loop over and check for further remaps.
-	 */
-	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-	if (remapped) {
-		WARN_ON(cpa->numpages > 1);
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &remapped;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 25359932740e..878836ca999c 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -82,7 +82,6 @@ enum pcpu_fc {
 	PCPU_FC_AUTO,
 	PCPU_FC_EMBED,
 	PCPU_FC_PAGE,
-	PCPU_FC_LPAGE,
 
 	PCPU_FC_NR,
 };
@@ -95,7 +94,6 @@ typedef void * (*pcpu_fc_alloc_fn_t)(unsigned int cpu, size_t size,
 typedef void (*pcpu_fc_free_fn_t)(void *ptr, size_t size);
 typedef void (*pcpu_fc_populate_pte_fn_t)(unsigned long addr);
 typedef int (pcpu_fc_cpu_distance_fn_t)(unsigned int from, unsigned int to);
-typedef void (*pcpu_fc_map_fn_t)(void *ptr, size_t size, void *addr);
 
 extern struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 							     int nr_units);
@@ -124,20 +122,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size,
 				pcpu_fc_populate_pte_fn_t populate_pte_fn);
 #endif
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-extern int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				pcpu_fc_alloc_fn_t alloc_fn,
-				pcpu_fc_free_fn_t free_fn,
-				pcpu_fc_map_fn_t map_fn);
-
-extern void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-#endif
-
 /*
  * Use this to get to a cpu's version of the per-cpu object
  * dynamically allocated. Non-atomic access to the current CPU's
diff --git a/mm/percpu.c b/mm/percpu.c
index c2826d05505c..77933928107d 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1713,7 +1713,6 @@ const char *pcpu_fc_names[PCPU_FC_NR] __initdata = {
 	[PCPU_FC_AUTO]	= "auto",
 	[PCPU_FC_EMBED]	= "embed",
 	[PCPU_FC_PAGE]	= "page",
-	[PCPU_FC_LPAGE]	= "lpage",
 };
 
 enum pcpu_fc pcpu_chosen_fc __initdata = PCPU_FC_AUTO;
@@ -1729,10 +1728,6 @@ static int __init percpu_alloc_setup(char *str)
 #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
 	else if (!strcmp(str, "page"))
 		pcpu_chosen_fc = PCPU_FC_PAGE;
-#endif
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-	else if (!strcmp(str, "lpage"))
-		pcpu_chosen_fc = PCPU_FC_LPAGE;
 #endif
 	else
 		pr_warning("PERCPU: unknown allocator %s specified\n", str);
@@ -1970,242 +1965,6 @@ out_free_ar:
 }
 #endif /* CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK */
 
-#ifdef CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK
-struct pcpul_ent {
-	void		*ptr;
-	void		*map_addr;
-};
-
-static size_t pcpul_size;
-static size_t pcpul_lpage_size;
-static int pcpul_nr_lpages;
-static struct pcpul_ent *pcpul_map;
-
-static bool __init pcpul_unit_to_cpu(int unit, const struct pcpu_alloc_info *ai,
-				     unsigned int *cpup)
-{
-	int group, cunit;
-
-	for (group = 0, cunit = 0; group < ai->nr_groups; group++) {
-		const struct pcpu_group_info *gi = &ai->groups[group];
-
-		if (unit < cunit + gi->nr_units) {
-			if (cpup)
-				*cpup = gi->cpu_map[unit - cunit];
-			return true;
-		}
-		cunit += gi->nr_units;
-	}
-
-	return false;
-}
-
-static int __init pcpul_cpu_to_unit(int cpu, const struct pcpu_alloc_info *ai)
-{
-	int group, unit, i;
-
-	for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
-		const struct pcpu_group_info *gi = &ai->groups[group];
-
-		for (i = 0; i < gi->nr_units; i++)
-			if (gi->cpu_map[i] == cpu)
-				return unit + i;
-	}
-	BUG();
-}
-
-/**
- * pcpu_lpage_first_chunk - remap the first percpu chunk using large page
- * @ai: pcpu_alloc_info
- * @alloc_fn: function to allocate percpu lpage, always called with lpage_size
- * @free_fn: function to free percpu memory, @size <= lpage_size
- * @map_fn: function to map percpu lpage, always called with lpage_size
- *
- * This allocator uses large page to build and map the first chunk.
- * Unlike other helpers, the caller should provide fully initialized
- * @ai.  This can be done using pcpu_build_alloc_info().  This two
- * stage initialization is to allow arch code to evaluate the
- * parameters before committing to it.
- *
- * Large pages are allocated as directed by @unit_map and other
- * parameters and mapped to vmalloc space.  Unused holes are returned
- * to the page allocator.  Note that these holes end up being actively
- * mapped twice - once to the physical mapping and to the vmalloc area
- * for the first percpu chunk.  Depending on architecture, this might
- * cause problem when changing page attributes of the returned area.
- * These double mapped areas can be detected using
- * pcpu_lpage_remapped().
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int __init pcpu_lpage_first_chunk(const struct pcpu_alloc_info *ai,
-				  pcpu_fc_alloc_fn_t alloc_fn,
-				  pcpu_fc_free_fn_t free_fn,
-				  pcpu_fc_map_fn_t map_fn)
-{
-	static struct vm_struct vm;
-	const size_t lpage_size = ai->atom_size;
-	size_t chunk_size, map_size;
-	unsigned int cpu;
-	int i, j, unit, nr_units, rc;
-
-	nr_units = 0;
-	for (i = 0; i < ai->nr_groups; i++)
-		nr_units += ai->groups[i].nr_units;
-
-	chunk_size = ai->unit_size * nr_units;
-	BUG_ON(chunk_size % lpage_size);
-
-	pcpul_size = ai->static_size + ai->reserved_size + ai->dyn_size;
-	pcpul_lpage_size = lpage_size;
-	pcpul_nr_lpages = chunk_size / lpage_size;
-
-	/* allocate pointer array and alloc large pages */
-	map_size = pcpul_nr_lpages * sizeof(pcpul_map[0]);
-	pcpul_map = alloc_bootmem(map_size);
-
-	/* allocate all pages */
-	for (i = 0; i < pcpul_nr_lpages; i++) {
-		size_t offset = i * lpage_size;
-		int first_unit = offset / ai->unit_size;
-		int last_unit = (offset + lpage_size - 1) / ai->unit_size;
-		void *ptr;
-
-		/* find out which cpu is mapped to this unit */
-		for (unit = first_unit; unit <= last_unit; unit++)
-			if (pcpul_unit_to_cpu(unit, ai, &cpu))
-				goto found;
-		continue;
-	found:
-		ptr = alloc_fn(cpu, lpage_size, lpage_size);
-		if (!ptr) {
-			pr_warning("PERCPU: failed to allocate large page "
-				   "for cpu%u\n", cpu);
-			goto enomem;
-		}
-
-		pcpul_map[i].ptr = ptr;
-	}
-
-	/* return unused holes */
-	for (unit = 0; unit < nr_units; unit++) {
-		size_t start = unit * ai->unit_size;
-		size_t end = start + ai->unit_size;
-		size_t off, next;
-
-		/* don't free used part of occupied unit */
-		if (pcpul_unit_to_cpu(unit, ai, NULL))
-			start += pcpul_size;
-
-		/* unit can span more than one page, punch the holes */
-		for (off = start; off < end; off = next) {
-			void *ptr = pcpul_map[off / lpage_size].ptr;
-			next = min(roundup(off + 1, lpage_size), end);
-			if (ptr)
-				free_fn(ptr + off % lpage_size, next - off);
-		}
-	}
-
-	/* allocate address, map and copy */
-	vm.flags = VM_ALLOC;
-	vm.size = chunk_size;
-	vm_area_register_early(&vm, ai->unit_size);
-
-	for (i = 0; i < pcpul_nr_lpages; i++) {
-		if (!pcpul_map[i].ptr)
-			continue;
-		pcpul_map[i].map_addr = vm.addr + i * lpage_size;
-		map_fn(pcpul_map[i].ptr, lpage_size, pcpul_map[i].map_addr);
-	}
-
-	for_each_possible_cpu(cpu)
-		memcpy(vm.addr + pcpul_cpu_to_unit(cpu, ai) * ai->unit_size,
-		       __per_cpu_load, ai->static_size);
-
-	/* we're ready, commit */
-	pr_info("PERCPU: large pages @%p s%zu r%zu d%zu u%zu\n",
-		vm.addr, ai->static_size, ai->reserved_size, ai->dyn_size,
-		ai->unit_size);
-
-	rc = pcpu_setup_first_chunk(ai, vm.addr);
-
-	/*
-	 * Sort pcpul_map array for pcpu_lpage_remapped().  Unmapped
-	 * lpages are pushed to the end and trimmed.
-	 */
-	for (i = 0; i < pcpul_nr_lpages - 1; i++)
-		for (j = i + 1; j < pcpul_nr_lpages; j++) {
-			struct pcpul_ent tmp;
-
-			if (!pcpul_map[j].ptr)
-				continue;
-			if (pcpul_map[i].ptr &&
-			    pcpul_map[i].ptr < pcpul_map[j].ptr)
-				continue;
-
-			tmp = pcpul_map[i];
-			pcpul_map[i] = pcpul_map[j];
-			pcpul_map[j] = tmp;
-		}
-
-	while (pcpul_nr_lpages && !pcpul_map[pcpul_nr_lpages - 1].ptr)
-		pcpul_nr_lpages--;
-
-	return rc;
-
-enomem:
-	for (i = 0; i < pcpul_nr_lpages; i++)
-		if (pcpul_map[i].ptr)
-			free_fn(pcpul_map[i].ptr, lpage_size);
-	free_bootmem(__pa(pcpul_map), map_size);
-	return -ENOMEM;
-}
-
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu large
- * page mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used large
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
-{
-	unsigned long lpage_mask = pcpul_lpage_size - 1;
-	void *lpage_addr = (void *)((unsigned long)kaddr & ~lpage_mask);
-	unsigned long offset = (unsigned long)kaddr & lpage_mask;
-	int left = 0, right = pcpul_nr_lpages - 1;
-	int pos;
-
-	/* pcpul in use at all? */
-	if (!pcpul_map)
-		return NULL;
-
-	/* okay, perform binary search */
-	while (left <= right) {
-		pos = (left + right) / 2;
-
-		if (pcpul_map[pos].ptr < lpage_addr)
-			left = pos + 1;
-		else if (pcpul_map[pos].ptr > lpage_addr)
-			right = pos - 1;
-		else
-			return pcpul_map[pos].map_addr + offset;
-	}
-
-	return NULL;
-}
-#endif /* CONFIG_NEED_PER_CPU_LPAGE_FIRST_CHUNK */
-
 /*
  * Generic percpu area setup.
  *
-- 
cgit v1.2.3


From f401a6f7ede753e56b84025e7d2db0d5ef560ce6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 14:51:05 +0200
Subject: cfg80211: use reassociation when possible

With the move of everything related to the SME from
mac80211 to cfg80211, we lost the ability to send
reassociation frames. This adds them back, but only
for wireless extensions. With the userspace SME, it
shall control assoc vs. reassoc (it already can do
so with the nl80211 interface).

I haven't touched the connect() implementation, so
it is not possible to reassociate with the nl80211
connect primitive. I think that should be done with
the NL80211_CMD_ROAM command, but we'll have to see
how that can be handled in the future, especially
with fullmac chips.

This patch addresses only the immediate regression
we had in mac80211, which previously sent reassoc.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h  |  4 ++--
 net/wireless/core.h     |  4 +++-
 net/wireless/mlme.c     | 11 ++++++++++
 net/wireless/sme.c      | 54 ++++++++++++++++++++++++++++++++++++++-----------
 net/wireless/wext-sme.c |  8 +++++++-
 5 files changed, 65 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 64df51d9a89f..1ee30fcd6fdc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1335,10 +1335,10 @@ struct wireless_dev {
 		struct cfg80211_cached_keys *keys;
 		u8 *ie;
 		size_t ie_len;
-		u8 bssid[ETH_ALEN];
+		u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 		u8 ssid[IEEE80211_MAX_SSID_LEN];
 		s8 default_key, default_mgmt_key;
-		bool ps;
+		bool ps, prev_bssid_valid;
 		int ps_timeout;
 	} wext;
 #endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 5696b95af9be..92e0492b0e4d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -335,7 +335,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
 		       struct cfg80211_connect_params *connect,
-		       struct cfg80211_cached_keys *connkeys);
+		       struct cfg80211_cached_keys *connkeys,
+		       const u8 *prev_bssid);
 int cfg80211_connect(struct cfg80211_registered_device *rdev,
 		     struct net_device *dev,
 		     struct cfg80211_connect_params *connect,
@@ -353,6 +354,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev);
 
 void cfg80211_conn_work(struct work_struct *work);
+bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev);
 
 /* internal helpers */
 int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 51d5df67c632..da64071ceb84 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -67,6 +67,16 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 
 	status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code);
 
+	/*
+	 * This is a bit of a hack, we don't notify userspace of
+	 * a (re-)association reply if we tried to send a reassoc
+	 * and got a reject -- we only try again with an assoc
+	 * frame instead of reassoc.
+	 */
+	if (status_code != WLAN_STATUS_SUCCESS && wdev->conn &&
+	    cfg80211_sme_failed_reassoc(wdev))
+		goto out;
+
 	nl80211_send_rx_assoc(rdev, dev, buf, len, GFP_KERNEL);
 
 	if (status_code == WLAN_STATUS_SUCCESS) {
@@ -97,6 +107,7 @@ void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
 		cfg80211_put_bss(&bss->pub);
 	}
 
+ out:
 	wdev_unlock(wdev);
 }
 EXPORT_SYMBOL(cfg80211_send_rx_assoc);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 219c3bc2c37d..104b33e34d22 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -27,10 +27,10 @@ struct cfg80211_conn {
 		CFG80211_CONN_ASSOCIATE_NEXT,
 		CFG80211_CONN_ASSOCIATING,
 	} state;
-	u8 bssid[ETH_ALEN];
+	u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
 	u8 *ie;
 	size_t ie_len;
-	bool auto_auth;
+	bool auto_auth, prev_bssid_valid;
 };
 
 
@@ -110,6 +110,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	struct cfg80211_connect_params *params;
+	const u8 *prev_bssid = NULL;
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
@@ -135,15 +136,11 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
-		/*
-		 * We could, later, implement roaming here and then actually
-		 * set prev_bssid to non-NULL. But then we need to be aware
-		 * that some APs don't like that -- so we'd need to retry
-		 * the association.
-		 */
+		if (wdev->conn->prev_bssid_valid)
+			prev_bssid = wdev->conn->prev_bssid;
 		err = __cfg80211_mlme_assoc(rdev, wdev->netdev,
 					    params->channel, params->bssid,
-					    NULL,
+					    prev_bssid,
 					    params->ssid, params->ssid_len,
 					    params->ie, params->ie_len,
 					    false, &params->crypto);
@@ -316,6 +313,28 @@ void cfg80211_sme_rx_auth(struct net_device *dev,
 	}
 }
 
+bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev)
+{
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	if (WARN_ON(!wdev->conn))
+		return false;
+
+	if (!wdev->conn->prev_bssid_valid)
+		return false;
+
+	/*
+	 * Some stupid APs don't accept reassoc, so we
+	 * need to fall back to trying regular assoc.
+	 */
+	wdev->conn->prev_bssid_valid = false;
+	wdev->conn->state = CFG80211_CONN_ASSOCIATE_NEXT;
+	schedule_work(&rdev->conn_work);
+
+	return true;
+}
+
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 			       const u8 *req_ie, size_t req_ie_len,
 			       const u8 *resp_ie, size_t resp_ie_len,
@@ -359,8 +378,11 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 
 		memset(&wrqu, 0, sizeof(wrqu));
 		wrqu.ap_addr.sa_family = ARPHRD_ETHER;
-		if (bssid && status == WLAN_STATUS_SUCCESS)
+		if (bssid && status == WLAN_STATUS_SUCCESS) {
 			memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+			memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN);
+			wdev->wext.prev_bssid_valid = true;
+		}
 		wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 	}
 #endif
@@ -511,6 +533,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
 	memset(&wrqu, 0, sizeof(wrqu));
 	wrqu.ap_addr.sa_family = ARPHRD_ETHER;
 	memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
+	memcpy(wdev->wext.prev_bssid, bssid, ETH_ALEN);
+	wdev->wext.prev_bssid_valid = true;
 	wireless_send_event(wdev->netdev, SIOCGIWAP, &wrqu, NULL);
 #endif
 }
@@ -643,7 +667,8 @@ EXPORT_SYMBOL(cfg80211_disconnected);
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev,
 		       struct cfg80211_connect_params *connect,
-		       struct cfg80211_cached_keys *connkeys)
+		       struct cfg80211_cached_keys *connkeys,
+		       const u8 *prev_bssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct ieee80211_channel *chan;
@@ -742,6 +767,11 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		wdev->sme_state = CFG80211_SME_CONNECTING;
 		wdev->connect_keys = connkeys;
 
+		if (prev_bssid) {
+			memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
+			wdev->conn->prev_bssid_valid = true;
+		}
+
 		/* we're good if we have both BSSID and channel */
 		if (wdev->conn->params.bssid && wdev->conn->params.channel) {
 			wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
@@ -794,7 +824,7 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
 
 	mutex_lock(&rdev->devlist_mtx);
 	wdev_lock(dev->ieee80211_ptr);
-	err = __cfg80211_connect(rdev, dev, connect, connkeys);
+	err = __cfg80211_connect(rdev, dev, connect, connkeys, NULL);
 	wdev_unlock(dev->ieee80211_ptr);
 	mutex_unlock(&rdev->devlist_mtx);
 
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index fe1a53639122..907470063f22 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -15,6 +15,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 			      struct wireless_dev *wdev)
 {
 	struct cfg80211_cached_keys *ck = NULL;
+	const u8 *prev_bssid = NULL;
 	int err, i;
 
 	ASSERT_RDEV_LOCK(rdev);
@@ -42,8 +43,12 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
 		for (i = 0; i < 6; i++)
 			ck->params[i].key = ck->data[i];
 	}
+
+	if (wdev->wext.prev_bssid_valid)
+		prev_bssid = wdev->wext.prev_bssid;
+
 	err = __cfg80211_connect(rdev, wdev->netdev,
-				 &wdev->wext.connect, ck);
+				 &wdev->wext.connect, ck, prev_bssid);
 	if (err)
 		kfree(ck);
 
@@ -184,6 +189,7 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
 			goto out;
 	}
 
+	wdev->wext.prev_bssid_valid = false;
 	wdev->wext.connect.ssid = wdev->wext.ssid;
 	memcpy(wdev->wext.ssid, ssid, len);
 	wdev->wext.connect.ssid_len = len;
-- 
cgit v1.2.3


From f5ea9120be2e5d5c846243416cfdce01d02f5836 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 16:17:38 +0200
Subject: nl80211: add generation number to all dumps

In order for userspace to be able to figure out whether
it obtained a consistent snapshot of data or not when
using netlink dumps, we need to have a generation number
in each dump message that indicates whether the list has
changed or not -- its value is arbitrary.

This patch adds such a number to all dumps, this needs
some mac80211 involvement to keep track of a generation
number to start with when adding/removing mesh paths or
stations.

The wiphy and netdev lists can be fully handled within
cfg80211, of course, but generation numbers need to be
stored there as well.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h     | 17 ++++++++++++-----
 include/net/cfg80211.h      | 12 ++++++++++++
 net/mac80211/cfg.c          |  4 ++++
 net/mac80211/ieee80211_i.h  |  1 +
 net/mac80211/mesh.h         |  2 ++
 net/mac80211/mesh_pathtbl.c |  5 +++++
 net/mac80211/sta_info.c     |  2 ++
 net/wireless/core.c         |  5 +++++
 net/wireless/core.h         |  2 ++
 net/wireless/nl80211.c      | 31 +++++++++++++++++++++----------
 net/wireless/scan.c         |  1 +
 11 files changed, 67 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index cb3dc6027fd9..a8d71ed43a0e 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -480,10 +480,6 @@ enum nl80211_commands {
  * @NL80211_ATTR_SCAN_FREQUENCIES: nested attribute with frequencies (in MHz)
  * @NL80211_ATTR_SCAN_SSIDS: nested attribute with SSIDs, leave out for passive
  *	scanning and include a zero-length SSID (wildcard) for wildcard scan
- * @NL80211_ATTR_SCAN_GENERATION: the scan generation increases whenever the
- *	scan result list changes (BSS expired or added) so that applications
- *	can verify that they got a single, consistent snapshot (when all dump
- *	messages carried the same generation number)
  * @NL80211_ATTR_BSS: scan result BSS
  *
  * @NL80211_ATTR_REG_INITIATOR: indicates who requested the regulatory domain
@@ -580,6 +576,14 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_PID: Process ID of a network namespace.
  *
+ * @NL80211_ATTR_GENERATION: Used to indicate consistent snapshots for
+ *	dumps. This number increases whenever the object list being
+ *	dumped changes, and as such userspace can verify that it has
+ *	obtained a complete and consistent snapshot by verifying that
+ *	all dump messages contain the same generation number. If it
+ *	changed then the list changed and the dump should be repeated
+ *	completely from scratch.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -651,7 +655,7 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SCAN_FREQUENCIES,
 	NL80211_ATTR_SCAN_SSIDS,
-	NL80211_ATTR_SCAN_GENERATION,
+	NL80211_ATTR_GENERATION, /* replaces old SCAN_GENERATION */
 	NL80211_ATTR_BSS,
 
 	NL80211_ATTR_REG_INITIATOR,
@@ -716,6 +720,9 @@ enum nl80211_attrs {
 	NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
 };
 
+/* source-level API compatibility */
+#define NL80211_ATTR_SCAN_GENERATION NL80211_ATTR_GENERATION
+
 /*
  * Allow user space programs to use #ifdef on new attributes by defining them
  * here
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1ee30fcd6fdc..de7d116acc3d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -372,6 +372,10 @@ struct rate_info {
  * @txrate: current unicast bitrate to this station
  * @rx_packets: packets received from this station
  * @tx_packets: packets transmitted to this station
+ * @generation: generation number for nl80211 dumps.
+ *	This number should increase every time the list of stations
+ *	changes, i.e. when a station is added or removed, so that
+ *	userspace can tell whether it got a consistent snapshot.
  */
 struct station_info {
 	u32 filled;
@@ -385,6 +389,8 @@ struct station_info {
 	struct rate_info txrate;
 	u32 rx_packets;
 	u32 tx_packets;
+
+	int generation;
 };
 
 /**
@@ -444,6 +450,10 @@ enum mpath_info_flags {
  * @flags: mesh path flags
  * @discovery_timeout: total mesh path discovery timeout, in msecs
  * @discovery_retries: mesh path discovery retries
+ * @generation: generation number for nl80211 dumps.
+ *	This number should increase every time the list of mesh paths
+ *	changes, i.e. when a station is added or removed, so that
+ *	userspace can tell whether it got a consistent snapshot.
  */
 struct mpath_info {
 	u32 filled;
@@ -454,6 +464,8 @@ struct mpath_info {
 	u32 discovery_timeout;
 	u8 discovery_retries;
 	u8 flags;
+
+	int generation;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4bbf5007799b..5608f6c68413 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -323,6 +323,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 {
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 
+	sinfo->generation = sdata->local->sta_generation;
+
 	sinfo->filled = STATION_INFO_INACTIVE_TIME |
 			STATION_INFO_RX_BYTES |
 			STATION_INFO_TX_BYTES |
@@ -909,6 +911,8 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop,
 	else
 		memset(next_hop, 0, ETH_ALEN);
 
+	pinfo->generation = mesh_paths_generation;
+
 	pinfo->filled = MPATH_INFO_FRAME_QLEN |
 			MPATH_INFO_DSN |
 			MPATH_INFO_METRIC |
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 989591787aee..99433222bc5c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -678,6 +678,7 @@ struct ieee80211_local {
 	struct list_head sta_list;
 	struct sta_info *sta_hash[STA_HASH_SIZE];
 	struct timer_list sta_cleanup;
+	int sta_generation;
 
 	struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
 	struct tasklet_struct tx_pending_tasklet;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 2a2ed182cb7e..ce538814b9bd 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -265,6 +265,8 @@ void mesh_path_discard_frame(struct sk_buff *skb,
 void mesh_path_quiesce(struct ieee80211_sub_if_data *sdata);
 void mesh_path_restart(struct ieee80211_sub_if_data *sdata);
 
+extern int mesh_paths_generation;
+
 #ifdef CONFIG_MAC80211_MESH
 extern int mesh_allocated;
 
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 04b9e4d61b8e..431865a58622 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -38,6 +38,8 @@ struct mpath_node {
 static struct mesh_table *mesh_paths;
 static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */
 
+int mesh_paths_generation;
+
 /* This lock will have the grow table function as writer and add / delete nodes
  * as readers. When reading the table (i.e. doing lookups) we are well protected
  * by RCU
@@ -243,6 +245,8 @@ int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata)
 		mesh_paths->mean_chain_len * (mesh_paths->hash_mask + 1))
 		grow = 1;
 
+	mesh_paths_generation++;
+
 	spin_unlock(&mesh_paths->hashwlock[hash_idx]);
 	read_unlock(&pathtbl_resize_lock);
 	if (grow) {
@@ -484,6 +488,7 @@ int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata)
 
 	err = -ENXIO;
 enddel:
+	mesh_paths_generation++;
 	spin_unlock(&mesh_paths->hashwlock[hash_idx]);
 	read_unlock(&pathtbl_resize_lock);
 	return err;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index a360bceeba59..eec001491e66 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -349,6 +349,7 @@ int sta_info_insert(struct sta_info *sta)
 		goto out_free;
 	}
 	list_add(&sta->list, &local->sta_list);
+	local->sta_generation++;
 	local->num_sta++;
 	sta_info_hash_add(local, sta);
 
@@ -485,6 +486,7 @@ static void __sta_info_unlink(struct sta_info **sta)
 	}
 
 	local->num_sta--;
+	local->sta_generation++;
 
 	if (local->ops->sta_notify) {
 		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 1e189306560d..62e1ac00879b 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -32,6 +32,7 @@ MODULE_DESCRIPTION("wireless configuration support");
  * only read the list, and that can happen quite
  * often because we need to do it for each command */
 LIST_HEAD(cfg80211_rdev_list);
+int cfg80211_rdev_list_generation;
 
 /*
  * This is used to protect the cfg80211_rdev_list
@@ -511,6 +512,7 @@ int wiphy_register(struct wiphy *wiphy)
 	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
 
 	list_add(&rdev->list, &cfg80211_rdev_list);
+	cfg80211_rdev_list_generation++;
 
 	mutex_unlock(&cfg80211_mutex);
 
@@ -593,6 +595,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	reg_device_remove(wiphy);
 
 	list_del(&rdev->list);
+	cfg80211_rdev_list_generation++;
 	device_del(&rdev->wiphy.dev);
 	debugfs_remove(rdev->wiphy.debugfsdir);
 
@@ -653,6 +656,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		spin_lock_init(&wdev->event_lock);
 		mutex_lock(&rdev->devlist_mtx);
 		list_add(&wdev->list, &rdev->netdev_list);
+		rdev->devlist_generation++;
 		/* can only change netns with wiphy */
 		dev->features |= NETIF_F_NETNS_LOCAL;
 
@@ -733,6 +737,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		if (!list_empty(&wdev->list)) {
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_init(&wdev->list);
+			rdev->devlist_generation++;
 			mutex_destroy(&wdev->mtx);
 #ifdef CONFIG_WIRELESS_EXT
 			kfree(wdev->wext.keys);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 92e0492b0e4d..639db52eeff7 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -49,6 +49,7 @@ struct cfg80211_registered_device {
 	/* associate netdev list */
 	struct mutex devlist_mtx;
 	struct list_head netdev_list;
+	int devlist_generation;
 
 	/* BSSes/scanning */
 	spinlock_t bss_lock;
@@ -101,6 +102,7 @@ bool wiphy_idx_valid(int wiphy_idx)
 
 extern struct mutex cfg80211_mutex;
 extern struct list_head cfg80211_rdev_list;
+extern int cfg80211_rdev_list_generation;
 
 #define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex))
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2ff7376f35a3..b3d5c1df08dd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -408,6 +408,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx);
 	NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
 
+	NLA_PUT_U32(msg, NL80211_ATTR_GENERATION,
+		    cfg80211_rdev_list_generation);
+
 	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT,
 		   dev->wiphy.retry_short);
 	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_RETRY_LONG,
@@ -825,6 +828,11 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
 	NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name);
 	NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype);
+
+	NLA_PUT_U32(msg, NL80211_ATTR_GENERATION,
+		    rdev->devlist_generation ^
+			(cfg80211_rdev_list_generation << 2));
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -838,12 +846,12 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 	int if_idx = 0;
 	int wp_start = cb->args[0];
 	int if_start = cb->args[1];
-	struct cfg80211_registered_device *dev;
+	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 
 	mutex_lock(&cfg80211_mutex);
-	list_for_each_entry(dev, &cfg80211_rdev_list, list) {
-		if (!net_eq(wiphy_net(&dev->wiphy), sock_net(skb->sk)))
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
 			continue;
 		if (wp_idx < wp_start) {
 			wp_idx++;
@@ -851,21 +859,21 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
 		}
 		if_idx = 0;
 
-		mutex_lock(&dev->devlist_mtx);
-		list_for_each_entry(wdev, &dev->netdev_list, list) {
+		mutex_lock(&rdev->devlist_mtx);
+		list_for_each_entry(wdev, &rdev->netdev_list, list) {
 			if (if_idx < if_start) {
 				if_idx++;
 				continue;
 			}
 			if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid,
 					       cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					       dev, wdev->netdev) < 0) {
-				mutex_unlock(&dev->devlist_mtx);
+					       rdev, wdev->netdev) < 0) {
+				mutex_unlock(&rdev->devlist_mtx);
 				goto out;
 			}
 			if_idx++;
 		}
-		mutex_unlock(&dev->devlist_mtx);
+		mutex_unlock(&rdev->devlist_mtx);
 
 		wp_idx++;
 	}
@@ -1616,6 +1624,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
 	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, sinfo->generation);
+
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
@@ -2101,6 +2111,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, dst);
 	NLA_PUT(msg, NL80211_ATTR_MPATH_NEXT_HOP, ETH_ALEN, next_hop);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, pinfo->generation);
+
 	pinfoattr = nla_nest_start(msg, NL80211_ATTR_MPATH_INFO);
 	if (!pinfoattr)
 		goto nla_put_failure;
@@ -3090,8 +3102,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (!hdr)
 		return -1;
 
-	NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION,
-		    rdev->bss_generation);
+	NLA_PUT_U32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation);
 	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex);
 
 	bss = nla_nest_start(msg, NL80211_ATTR_BSS);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0ccf3a07dc02..1bcb1312bd94 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -562,6 +562,7 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
 	spin_lock_bh(&dev->bss_lock);
 
 	list_del(&bss->list);
+	dev->bss_generation++;
 	rb_erase(&bss->rbn, &dev->bss_tree);
 
 	spin_unlock_bh(&dev->bss_lock);
-- 
cgit v1.2.3


From c555b9b3713e05586fabe85f4e46f28859e72930 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 16:23:43 +0200
Subject: mac80211: explain TX retry and status

Add some more documentation including an example so that
it's clearer what should be done for TX retries.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e2fb5767e1fa..467eed71be22 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -338,6 +338,21 @@ enum mac80211_rate_control_flags {
  *
  * When used for transmit status reporting, the driver should
  * always report the rate along with the flags it used.
+ *
+ * &struct ieee80211_tx_info contains an array of these structs
+ * in the control information, and it will be filled by the rate
+ * control algorithm according to what should be sent. For example,
+ * if this array contains, in the format { <idx>, <count> } the
+ * information
+ *    { 3, 2 }, { 2, 2 }, { 1, 4 }, { -1, 0 }, { -1, 0 }
+ * then this means that the frame should be transmitted
+ * up to twice at rate 3, up to twice at rate 2, and up to four
+ * times at rate 1 if it doesn't get acknowledged. Say it gets
+ * acknowledged by the peer after the fifth attempt, the status
+ * information should then contain
+ *   { 3, 2 }, { 2, 2 }, { 1, 1 }, { -1, 0 } ...
+ * since it was transmitted twice at rate 3, twice at rate 2
+ * and once at rate 1 after which we received an acknowledgement.
  */
 struct ieee80211_tx_rate {
 	s8 idx;
-- 
cgit v1.2.3


From ab5b5342fd0ba5b9a2f58a94c5d41dd074b7c48e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 16:28:09 +0200
Subject: mac80211: document TX powersave filter requirements

This documents what's required to implement that TX powersave
filter properly wrt. handling hardware queues.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 467eed71be22..cd4eb20f98f5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -239,7 +239,14 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_AMPDU: this frame should be sent as part of an A-MPDU
  * @IEEE80211_TX_CTL_INJECTED: Frame was injected, internal to mac80211.
  * @IEEE80211_TX_STAT_TX_FILTERED: The frame was not transmitted
- *	because the destination STA was in powersave mode.
+ *	because the destination STA was in powersave mode. Note that to
+ *	avoid race conditions, the filter must be set by the hardware or
+ *	firmware upon receiving a frame that indicates that the station
+ *	went to sleep (must be done on device to filter frames already on
+ *	the queue) and may only be unset after mac80211 gives the OK for
+ *	that by setting the IEEE80211_TX_CTL_CLEAR_PS_FILT (see above),
+ *	since only then is it guaranteed that no more frames are in the
+ *	hardware queue.
  * @IEEE80211_TX_STAT_ACK: Frame was acknowledged
  * @IEEE80211_TX_STAT_AMPDU: The frame was aggregated, so status
  * 	is for the whole aggregation.
-- 
cgit v1.2.3


From ad5351db89681515681c5d5659ddf4c69e3cc6f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 16:42:15 +0200
Subject: mac80211: allow DMA optimisation

If we have a lot of frames to transmit at once, for
instance with fragmentation, it can be an optimisation
to only tell the DMA engine about them on the last
fragment/frame to avoid banging the IO too much. This
patch allows implementation such an optimisation by
telling the driver when more frames can be expected.

Currently, this is used by mac80211 only on fragmented
frames, but could also be used in the future on other
frames when the queue was full and there are multiple
frames pending.

Note that drivers need to be careful when using this
flag, they need to kick their DMA engines not just
when this flag is clear, but also when the queue gets
full so that progress can be made.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 5 +++++
 net/mac80211/tx.c      | 3 +++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index cd4eb20f98f5..76d43e12cc29 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -268,6 +268,10 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_PSPOLL_RESPONSE: (internal?)
  *	This frame is a response to a PS-poll frame and should be sent
  *	although the station is in powersave mode.
+ * @IEEE80211_TX_CTL_MORE_FRAMES: More frames will be passed to the
+ *	transmit function after the current frame, this can be used
+ *	by drivers to kick the DMA queue only if unset or when the
+ *	queue gets full.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -288,6 +292,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
 	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
 	IEEE80211_TX_CTL_PSPOLL_RESPONSE	= BIT(17),
+	IEEE80211_TX_CTL_MORE_FRAMES		= BIT(18),
 };
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7cffaa046b33..7f2e4cdb8904 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1154,6 +1154,9 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 		next = skb->next;
 		len = skb->len;
 
+		if (next)
+			info->flags |= IEEE80211_TX_CTL_MORE_FRAMES;
+
 		sdata = vif_to_sdata(info->control.vif);
 
 		switch (sdata->vif.type) {
-- 
cgit v1.2.3


From 5ba63533bbf653631faab60f6988506160ec6ba4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 7 Aug 2009 17:54:07 +0200
Subject: cfg80211: fix alignment problem in scan request

The memory layout for scan requests was rather wrong,
we put the scan SSIDs before the channels which could
lead to the channel pointers being unaligned in memory.
It turns out that using a pointer to the channel array
isn't necessary anyway since we can embed a zero-length
array into the struct.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h     |  4 +++-
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/main.c        | 16 ++++++++--------
 net/mac80211/scan.c        | 10 +++++-----
 net/wireless/nl80211.c     |  3 +--
 net/wireless/scan.c        |  4 ++--
 net/wireless/sme.c         |  3 +--
 7 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index de7d116acc3d..d5756c9fe3d3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -559,7 +559,6 @@ struct cfg80211_ssid {
 struct cfg80211_scan_request {
 	struct cfg80211_ssid *ssids;
 	int n_ssids;
-	struct ieee80211_channel **channels;
 	u32 n_channels;
 	const u8 *ie;
 	size_t ie_len;
@@ -568,6 +567,9 @@ struct cfg80211_scan_request {
 	struct wiphy *wiphy;
 	struct net_device *dev;
 	bool aborted;
+
+	/* keep last */
+	struct ieee80211_channel *channels[0];
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 99433222bc5c..d6bd7dd77960 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -715,7 +715,7 @@ struct ieee80211_local {
 	struct mutex scan_mtx;
 	unsigned long scanning;
 	struct cfg80211_ssid scan_ssid;
-	struct cfg80211_scan_request int_scan_req;
+	struct cfg80211_scan_request *int_scan_req;
 	struct cfg80211_scan_request *scan_req;
 	struct ieee80211_channel *scan_channel;
 	const u8 *orig_ies;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0c4f8e122ed6..b03fd84777fa 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -765,9 +765,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		supp_ht = supp_ht || sband->ht_cap.ht_supported;
 	}
 
-	local->int_scan_req.n_channels = channels;
-	local->int_scan_req.channels = kzalloc(sizeof(void *) * channels, GFP_KERNEL);
-	if (!local->int_scan_req.channels)
+	local->int_scan_req = kzalloc(sizeof(*local->int_scan_req) +
+				      sizeof(void *) * channels, GFP_KERNEL);
+	if (!local->int_scan_req)
 		return -ENOMEM;
 
 	/* if low-level driver supports AP, we also support VLAN */
@@ -882,13 +882,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	/* alloc internal scan request */
 	i = 0;
-	local->int_scan_req.ssids = &local->scan_ssid;
-	local->int_scan_req.n_ssids = 1;
+	local->int_scan_req->ssids = &local->scan_ssid;
+	local->int_scan_req->n_ssids = 1;
 	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
 		if (!hw->wiphy->bands[band])
 			continue;
 		for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
-			local->int_scan_req.channels[i] =
+			local->int_scan_req->channels[i] =
 				&hw->wiphy->bands[band]->channels[j];
 			i++;
 		}
@@ -920,7 +920,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
  fail_workqueue:
 	wiphy_unregister(local->hw.wiphy);
  fail_wiphy_register:
-	kfree(local->int_scan_req.channels);
+	kfree(local->int_scan_req->channels);
 	return result;
 }
 EXPORT_SYMBOL(ieee80211_register_hw);
@@ -962,7 +962,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	wiphy_unregister(local->hw.wiphy);
 	ieee80211_wep_free(local);
 	ieee80211_led_exit(local);
-	kfree(local->int_scan_req.channels);
+	kfree(local->int_scan_req);
 }
 EXPORT_SYMBOL(ieee80211_unregister_hw);
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 244f53f3c8b4..e091cbc3434f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -277,7 +277,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	if (test_bit(SCAN_HW_SCANNING, &local->scanning))
 		ieee80211_restore_scan_ies(local);
 
-	if (local->scan_req != &local->int_scan_req)
+	if (local->scan_req != local->int_scan_req)
 		cfg80211_scan_done(local->scan_req, aborted);
 	local->scan_req = NULL;
 
@@ -423,7 +423,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	local->scan_req = req;
 	local->scan_sdata = sdata;
 
-	if (req != &local->int_scan_req &&
+	if (req != local->int_scan_req &&
 	    sdata->vif.type == NL80211_IFTYPE_STATION &&
 	    !list_empty(&ifmgd->work_list)) {
 		/* actually wait for the work it's doing to finish/time out */
@@ -743,10 +743,10 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->scan_req)
 		goto unlock;
 
-	memcpy(local->int_scan_req.ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
-	local->int_scan_req.ssids[0].ssid_len = ssid_len;
+	memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
+	local->int_scan_req->ssids[0].ssid_len = ssid_len;
 
-	ret = __ieee80211_start_scan(sdata, &sdata->local->int_scan_req);
+	ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
  unlock:
 	mutex_unlock(&local->scan_mtx);
 	return ret;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b3d5c1df08dd..667a87d307da 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3002,10 +3002,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	request->channels = (void *)((char *)request + sizeof(*request));
 	request->n_channels = n_channels;
 	if (n_ssids)
-		request->ssids = (void *)(request->channels + n_channels);
+		request->ssids = (void *)&request->channels[n_channels];
 	request->n_ssids = n_ssids;
 	if (ie_len) {
 		if (request->ssids)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 1bcb1312bd94..e6c1f11595da 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -612,8 +612,8 @@ int cfg80211_wext_siwscan(struct net_device *dev,
 
 	creq->wiphy = wiphy;
 	creq->dev = dev;
-	creq->ssids = (void *)(creq + 1);
-	creq->channels = (void *)(creq->ssids + 1);
+	/* SSIDs come after channels */
+	creq->ssids = (void *)&creq->channels[n_channels];
 	creq->n_channels = n_channels;
 	creq->n_ssids = 1;
 
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 104b33e34d22..8e2ef54ea714 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -65,7 +65,6 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 	if (!request)
 		return -ENOMEM;
 
-	request->channels = (void *)((char *)request + sizeof(*request));
 	if (wdev->conn->params.channel)
 		request->channels[0] = wdev->conn->params.channel;
 	else {
@@ -82,7 +81,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev)
 		}
 	}
 	request->n_channels = n_channels;
-	request->ssids = (void *)(request->channels + n_channels);
+	request->ssids = (void *)&request->channels[n_channels];
 	request->n_ssids = 1;
 
 	memcpy(request->ssids[0].ssid, wdev->conn->params.ssid,
-- 
cgit v1.2.3


From d5b96a6f39a8aaa7534069b3db71048df44f023b Mon Sep 17 00:00:00 2001
From: Pat Erley <pat-lkml@erley.org>
Date: Sat, 8 Aug 2009 17:53:19 -0400
Subject: mac80211: remove max_bandwidth

This removes the max_bandwidth attribute.  It is only ever
written to, and is duplicated by max_bandwidth_khz in the
regulatory code.

Signed-off-by: Pat Erley <pat-lkml@erley.org>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 --
 net/wireless/reg.c     | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d5756c9fe3d3..223913434e51 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -80,7 +80,6 @@ enum ieee80211_channel_flags {
  * with cfg80211.
  *
  * @center_freq: center frequency in MHz
- * @max_bandwidth: maximum allowed bandwidth for this channel, in MHz
  * @hw_value: hardware-specific value for the channel
  * @flags: channel flags from &enum ieee80211_channel_flags.
  * @orig_flags: channel flags at registration time, used by regulatory
@@ -97,7 +96,6 @@ enum ieee80211_channel_flags {
 struct ieee80211_channel {
 	enum ieee80211_band band;
 	u16 center_freq;
-	u8 max_bandwidth;
 	u16 hw_value;
 	u32 flags;
 	int max_antenna_gain;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0f61ae613f3b..fc7a4849c990 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1018,7 +1018,6 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 			map_regdom_flags(reg_rule->flags) | bw_flags;
 		chan->max_antenna_gain = chan->orig_mag =
 			(int) MBI_TO_DBI(power_rule->max_antenna_gain);
-		chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 		chan->max_power = chan->orig_mpwr =
 			(int) MBM_TO_DBM(power_rule->max_eirp);
 		return;
@@ -1027,7 +1026,6 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
 	chan->flags = flags | bw_flags | map_regdom_flags(reg_rule->flags);
 	chan->max_antenna_gain = min(chan->orig_mag,
 		(int) MBI_TO_DBI(power_rule->max_antenna_gain));
-	chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 	if (chan->orig_mpwr)
 		chan->max_power = min(chan->orig_mpwr,
 			(int) MBM_TO_DBM(power_rule->max_eirp));
@@ -1329,7 +1327,6 @@ static void handle_channel_custom(struct wiphy *wiphy,
 
 	chan->flags |= map_regdom_flags(reg_rule->flags) | bw_flags;
 	chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
-	chan->max_bandwidth = KHZ_TO_MHZ(desired_bw_khz);
 	chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
 }
 
-- 
cgit v1.2.3


From f679056b2fdd4e9b7c8eb42ba447cd9646236305 Mon Sep 17 00:00:00 2001
From: Gábor Stefanik <netrolller.3d@gmail.com>
Date: Mon, 10 Aug 2009 21:23:08 +0200
Subject: ssb: Implement the remaining rev.8 SPROM vars needed for LP-PHY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also add a "SPEX32" macro for extracting 32-bit SPROM variables.

Signed-off-by: Gábor Stefanik <netrolller.3d@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 53 ++++++++++++++++++++++++++++++++++-
 include/linux/ssb/ssb.h      | 44 ++++++++++++++++++++++++-----
 include/linux/ssb/ssb_regs.h | 66 +++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 148 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 40ea41762247..593fc618a2ea 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -169,8 +169,14 @@ err_pci:
 /* Get the word-offset for a SSB_SPROM_XXX define. */
 #define SPOFF(offset)	(((offset) - SSB_SPROM_BASE) / sizeof(u16))
 /* Helper to extract some _offset, which is one of the SSB_SPROM_XXX defines. */
-#define SPEX(_outvar, _offset, _mask, _shift)	\
+#define SPEX16(_outvar, _offset, _mask, _shift)	\
 	out->_outvar = ((in[SPOFF(_offset)] & (_mask)) >> (_shift))
+#define SPEX32(_outvar, _offset, _mask, _shift)	\
+	out->_outvar = ((((u32)in[SPOFF((_offset)+2)] << 16 | \
+			   in[SPOFF(_offset)]) & (_mask)) >> (_shift))
+#define SPEX(_outvar, _offset, _mask, _shift) \
+	SPEX16(_outvar, _offset, _mask, _shift)
+
 
 static inline u8 ssb_crc8(u8 crc, u8 data)
 {
@@ -480,6 +486,8 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
 	SPEX(country_code, SSB_SPROM8_CCODE, 0xFFFF, 0);
 	SPEX(boardflags_lo, SSB_SPROM8_BFLLO, 0xFFFF, 0);
 	SPEX(boardflags_hi, SSB_SPROM8_BFLHI, 0xFFFF, 0);
+	SPEX(boardflags2_lo, SSB_SPROM8_BFL2LO, 0xFFFF, 0);
+	SPEX(boardflags2_hi, SSB_SPROM8_BFL2HI, 0xFFFF, 0);
 	SPEX(ant_available_a, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_A,
 	     SSB_SPROM8_ANTAVAIL_A_SHIFT);
 	SPEX(ant_available_bg, SSB_SPROM8_ANTAVAIL, SSB_SPROM8_ANTAVAIL_BG,
@@ -490,12 +498,55 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
 	SPEX(maxpwr_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_MAXP_A_MASK, 0);
 	SPEX(itssi_a, SSB_SPROM8_MAXP_A, SSB_SPROM8_ITSSI_A,
 	     SSB_SPROM8_ITSSI_A_SHIFT);
+	SPEX(maxpwr_ah, SSB_SPROM8_MAXP_AHL, SSB_SPROM8_MAXP_AH_MASK, 0);
+	SPEX(maxpwr_al, SSB_SPROM8_MAXP_AHL, SSB_SPROM8_MAXP_AL_MASK,
+	     SSB_SPROM8_MAXP_AL_SHIFT);
 	SPEX(gpio0, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P0, 0);
 	SPEX(gpio1, SSB_SPROM8_GPIOA, SSB_SPROM8_GPIOA_P1,
 	     SSB_SPROM8_GPIOA_P1_SHIFT);
 	SPEX(gpio2, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P2, 0);
 	SPEX(gpio3, SSB_SPROM8_GPIOB, SSB_SPROM8_GPIOB_P3,
 	     SSB_SPROM8_GPIOB_P3_SHIFT);
+	SPEX(tri2g, SSB_SPROM8_TRI25G, SSB_SPROM8_TRI2G, 0);
+	SPEX(tri5g, SSB_SPROM8_TRI25G, SSB_SPROM8_TRI5G,
+	     SSB_SPROM8_TRI5G_SHIFT);
+	SPEX(tri5gl, SSB_SPROM8_TRI5GHL, SSB_SPROM8_TRI5GL, 0);
+	SPEX(tri5gh, SSB_SPROM8_TRI5GHL, SSB_SPROM8_TRI5GH,
+	     SSB_SPROM8_TRI5GH_SHIFT);
+	SPEX(rxpo2g, SSB_SPROM8_RXPO, SSB_SPROM8_RXPO2G, 0);
+	SPEX(rxpo5g, SSB_SPROM8_RXPO, SSB_SPROM8_RXPO5G,
+	     SSB_SPROM8_RXPO5G_SHIFT);
+	SPEX(rssismf2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISMF2G, 0);
+	SPEX(rssismc2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISMC2G,
+	     SSB_SPROM8_RSSISMC2G_SHIFT);
+	SPEX(rssisav2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_RSSISAV2G,
+	     SSB_SPROM8_RSSISAV2G_SHIFT);
+	SPEX(bxa2g, SSB_SPROM8_RSSIPARM2G, SSB_SPROM8_BXA2G,
+	     SSB_SPROM8_BXA2G_SHIFT);
+	SPEX(rssismf5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISMF5G, 0);
+	SPEX(rssismc5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISMC5G,
+	     SSB_SPROM8_RSSISMC5G_SHIFT);
+	SPEX(rssisav5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_RSSISAV5G,
+	     SSB_SPROM8_RSSISAV5G_SHIFT);
+	SPEX(bxa5g, SSB_SPROM8_RSSIPARM5G, SSB_SPROM8_BXA5G,
+	     SSB_SPROM8_BXA5G_SHIFT);
+	SPEX(pa0b0, SSB_SPROM8_PA0B0, 0xFFFF, 0);
+	SPEX(pa0b1, SSB_SPROM8_PA0B1, 0xFFFF, 0);
+	SPEX(pa0b2, SSB_SPROM8_PA0B2, 0xFFFF, 0);
+	SPEX(pa1b0, SSB_SPROM8_PA1B0, 0xFFFF, 0);
+	SPEX(pa1b1, SSB_SPROM8_PA1B1, 0xFFFF, 0);
+	SPEX(pa1b2, SSB_SPROM8_PA1B2, 0xFFFF, 0);
+	SPEX(pa1lob0, SSB_SPROM8_PA1LOB0, 0xFFFF, 0);
+	SPEX(pa1lob1, SSB_SPROM8_PA1LOB1, 0xFFFF, 0);
+	SPEX(pa1lob2, SSB_SPROM8_PA1LOB2, 0xFFFF, 0);
+	SPEX(pa1hib0, SSB_SPROM8_PA1HIB0, 0xFFFF, 0);
+	SPEX(pa1hib1, SSB_SPROM8_PA1HIB1, 0xFFFF, 0);
+	SPEX(pa1hib2, SSB_SPROM8_PA1HIB2, 0xFFFF, 0);
+	SPEX(cck2gpo, SSB_SPROM8_CCK2GPO, 0xFFFF, 0);
+	SPEX32(ofdm2gpo, SSB_SPROM8_OFDM2GPO, 0xFFFFFFFF, 0);
+	SPEX32(ofdm5glpo, SSB_SPROM8_OFDM5GLPO, 0xFFFFFFFF, 0);
+	SPEX32(ofdm5gpo, SSB_SPROM8_OFDM5GPO, 0xFFFFFFFF, 0);
+	SPEX32(ofdm5ghpo, SSB_SPROM8_OFDM5GHPO, 0xFFFFFFFF, 0);
 
 	/* Extract the antenna gain values. */
 	SPEX(antenna_gain.ghz24.a0, SSB_SPROM8_AGAIN01,
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 5ae8fa22d331..17ffc1f84d76 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -27,24 +27,54 @@ struct ssb_sprom {
 	u8 et1mdcport;		/* MDIO for enet1 */
 	u8 board_rev;		/* Board revision number from SPROM. */
 	u8 country_code;	/* Country Code */
-	u8 ant_available_a;	/* A-PHY antenna available bits (up to 4) */
-	u8 ant_available_bg;	/* B/G-PHY antenna available bits (up to 4) */
+	u8 ant_available_a;	/* 2GHz antenna available bits (up to 4) */
+	u8 ant_available_bg;	/* 5GHz antenna available bits (up to 4) */
 	u16 pa0b0;
 	u16 pa0b1;
 	u16 pa0b2;
 	u16 pa1b0;
 	u16 pa1b1;
 	u16 pa1b2;
+	u16 pa1lob0;
+	u16 pa1lob1;
+	u16 pa1lob2;
+	u16 pa1hib0;
+	u16 pa1hib1;
+	u16 pa1hib2;
 	u8 gpio0;		/* GPIO pin 0 */
 	u8 gpio1;		/* GPIO pin 1 */
 	u8 gpio2;		/* GPIO pin 2 */
 	u8 gpio3;		/* GPIO pin 3 */
-	u16 maxpwr_a;		/* A-PHY Amplifier Max Power (in dBm Q5.2) */
-	u16 maxpwr_bg;		/* B/G-PHY Amplifier Max Power (in dBm Q5.2) */
+	u16 maxpwr_bg;		/* 2.4GHz Amplifier Max Power (in dBm Q5.2) */
+	u16 maxpwr_al;		/* 5.2GHz Amplifier Max Power (in dBm Q5.2) */
+	u16 maxpwr_a;		/* 5.3GHz Amplifier Max Power (in dBm Q5.2) */
+	u16 maxpwr_ah;		/* 5.8GHz Amplifier Max Power (in dBm Q5.2) */
 	u8 itssi_a;		/* Idle TSSI Target for A-PHY */
 	u8 itssi_bg;		/* Idle TSSI Target for B/G-PHY */
-	u16 boardflags_lo;	/* Boardflags (low 16 bits) */
-	u16 boardflags_hi;	/* Boardflags (high 16 bits) */
+	u8 tri2g;		/* 2.4GHz TX isolation */
+	u8 tri5gl;		/* 5.2GHz TX isolation */
+	u8 tri5g;		/* 5.3GHz TX isolation */
+	u8 tri5gh;		/* 5.8GHz TX isolation */
+	u8 rxpo2g;		/* 2GHz RX power offset */
+	u8 rxpo5g;		/* 5GHz RX power offset */
+	u8 rssisav2g;		/* 2GHz RSSI params */
+	u8 rssismc2g;
+	u8 rssismf2g;
+	u8 bxa2g;		/* 2GHz BX arch */
+	u8 rssisav5g;		/* 5GHz RSSI params */
+	u8 rssismc5g;
+	u8 rssismf5g;
+	u8 bxa5g;		/* 5GHz BX arch */
+	u16 cck2gpo;		/* CCK power offset */
+	u32 ofdm2gpo;		/* 2.4GHz OFDM power offset */
+	u32 ofdm5glpo;		/* 5.2GHz OFDM power offset */
+	u32 ofdm5gpo;		/* 5.3GHz OFDM power offset */
+	u32 ofdm5ghpo;		/* 5.8GHz OFDM power offset */
+	u16 boardflags_lo;	/* Board flags (bits 0-15) */
+	u16 boardflags_hi;	/* Board flags (bits 16-31) */
+	u16 boardflags2_lo;	/* Board flags (bits 32-47) */
+	u16 boardflags2_hi;	/* Board flags (bits 48-63) */
+	/* TODO store board flags in a single u64 */
 
 	/* Antenna gain values for up to 4 antennas
 	 * on each band. Values in dBm/4 (Q5.2). Negative gain means the
@@ -58,7 +88,7 @@ struct ssb_sprom {
 		} ghz5;		/* 5GHz band */
 	} antenna_gain;
 
-	/* TODO - add any parameters needed from rev 2, 3, or 4 SPROMs */
+	/* TODO - add any parameters needed from rev 2, 3, 4, 5 or 8 SPROMs */
 };
 
 /* Information about the PCB the circuitry is soldered on. */
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index a01b982b5783..9ae9082eaeb4 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -162,7 +162,7 @@
 
 /* SPROM shadow area. If not otherwise noted, fields are
  * two bytes wide. Note that the SPROM can _only_ be read
- * in two-byte quantinies.
+ * in two-byte quantities.
  */
 #define SSB_SPROMSIZE_WORDS		64
 #define SSB_SPROMSIZE_BYTES		(SSB_SPROMSIZE_WORDS * sizeof(u16))
@@ -327,8 +327,11 @@
 #define  SSB_SPROM5_GPIOB_P3_SHIFT	8
 
 /* SPROM Revision 8 */
-#define SSB_SPROM8_BFLLO		0x1084	/* Boardflags (low 16 bits) */
-#define SSB_SPROM8_BFLHI		0x1086	/* Boardflags Hi */
+#define SSB_SPROM8_BOARDREV		0x1082	/* Board revision */
+#define SSB_SPROM8_BFLLO		0x1084	/* Board flags (bits 0-15) */
+#define SSB_SPROM8_BFLHI		0x1086	/* Board flags (bits 16-31) */
+#define SSB_SPROM8_BFL2LO		0x1088	/* Board flags (bits 32-47) */
+#define SSB_SPROM8_BFL2HI		0x108A	/* Board flags (bits 48-63) */
 #define SSB_SPROM8_IL0MAC		0x108C	/* 6 byte MAC address */
 #define SSB_SPROM8_CCODE		0x1092	/* 2 byte country code */
 #define SSB_SPROM8_ANTAVAIL		0x109C  /* Antenna available bitfields*/
@@ -354,14 +357,63 @@
 #define  SSB_SPROM8_GPIOB_P2		0x00FF	/* Pin 2 */
 #define  SSB_SPROM8_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM8_GPIOB_P3_SHIFT	8
-#define SSB_SPROM8_MAXP_BG		0x10C0  /* Max Power BG in path 1 */
-#define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
+#define SSB_SPROM8_RSSIPARM2G		0x10A4	/* RSSI params for 2GHz */
+#define  SSB_SPROM8_RSSISMF2G		0x000F
+#define  SSB_SPROM8_RSSISMC2G		0x00F0
+#define  SSB_SPROM8_RSSISMC2G_SHIFT	4
+#define  SSB_SPROM8_RSSISAV2G		0x0700
+#define  SSB_SPROM8_RSSISAV2G_SHIFT	8
+#define  SSB_SPROM8_BXA2G		0x1800
+#define  SSB_SPROM8_BXA2G_SHIFT		11
+#define SSB_SPROM8_RSSIPARM5G		0x10A6	/* RSSI params for 5GHz */
+#define  SSB_SPROM8_RSSISMF5G		0x000F
+#define  SSB_SPROM8_RSSISMC5G		0x00F0
+#define  SSB_SPROM8_RSSISMC5G_SHIFT	4
+#define  SSB_SPROM8_RSSISAV5G		0x0700
+#define  SSB_SPROM8_RSSISAV5G_SHIFT	8
+#define  SSB_SPROM8_BXA5G		0x1800
+#define  SSB_SPROM8_BXA5G_SHIFT		11
+#define SSB_SPROM8_TRI25G		0x10A8	/* TX isolation 2.4&5.3GHz */
+#define  SSB_SPROM8_TRI2G		0x00FF	/* TX isolation 2.4GHz */
+#define  SSB_SPROM8_TRI5G		0xFF00	/* TX isolation 5.3GHz */
+#define  SSB_SPROM8_TRI5G_SHIFT		8
+#define SSB_SPROM8_TRI5GHL		0x10AA	/* TX isolation 5.2/5.8GHz */
+#define  SSB_SPROM8_TRI5GL		0x00FF	/* TX isolation 5.2GHz */
+#define  SSB_SPROM8_TRI5GH		0xFF00	/* TX isolation 5.8GHz */
+#define  SSB_SPROM8_TRI5GH_SHIFT	8
+#define SSB_SPROM8_RXPO			0x10AC  /* RX power offsets */
+#define  SSB_SPROM8_RXPO2G		0x00FF	/* 2GHz RX power offset */
+#define  SSB_SPROM8_RXPO5G		0xFF00	/* 5GHz RX power offset */
+#define  SSB_SPROM8_RXPO5G_SHIFT	8
+#define SSB_SPROM8_MAXP_BG		0x10C0  /* Max Power 2GHz in path 1 */
+#define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power 2GHz */
 #define  SSB_SPROM8_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
 #define  SSB_SPROM8_ITSSI_BG_SHIFT	8
-#define SSB_SPROM8_MAXP_A		0x10C8  /* Max Power A in path 1 */
-#define  SSB_SPROM8_MAXP_A_MASK		0x00FF  /* Mask for Max Power A */
+#define SSB_SPROM8_PA0B0		0x10C2	/* 2GHz power amp settings */
+#define SSB_SPROM8_PA0B1		0x10C4
+#define SSB_SPROM8_PA0B2		0x10C6
+#define SSB_SPROM8_MAXP_A		0x10C8  /* Max Power 5.3GHz */
+#define  SSB_SPROM8_MAXP_A_MASK		0x00FF  /* Mask for Max Power 5.3GHz */
 #define  SSB_SPROM8_ITSSI_A		0xFF00	/* Mask for path 1 itssi_a */
 #define  SSB_SPROM8_ITSSI_A_SHIFT	8
+#define SSB_SPROM8_MAXP_AHL		0x10CA  /* Max Power 5.2/5.8GHz */
+#define  SSB_SPROM8_MAXP_AH_MASK	0x00FF  /* Mask for Max Power 5.8GHz */
+#define  SSB_SPROM8_MAXP_AL_MASK	0xFF00  /* Mask for Max Power 5.2GHz */
+#define  SSB_SPROM8_MAXP_AL_SHIFT	8
+#define SSB_SPROM8_PA1B0		0x10CC	/* 5.3GHz power amp settings */
+#define SSB_SPROM8_PA1B1		0x10CE
+#define SSB_SPROM8_PA1B2		0x10D0
+#define SSB_SPROM8_PA1LOB0		0x10D2	/* 5.2GHz power amp settings */
+#define SSB_SPROM8_PA1LOB1		0x10D4
+#define SSB_SPROM8_PA1LOB2		0x10D6
+#define SSB_SPROM8_PA1HIB0		0x10D8	/* 5.8GHz power amp settings */
+#define SSB_SPROM8_PA1HIB1		0x10DA
+#define SSB_SPROM8_PA1HIB2		0x10DC
+#define SSB_SPROM8_CCK2GPO		0x1140	/* CCK power offset */
+#define SSB_SPROM8_OFDM2GPO		0x1142	/* 2.4GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GPO		0x1146	/* 5.3GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GLPO		0x114A	/* 5.2GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GHPO		0x114E	/* 5.8GHz OFDM power offset */
 
 /* Values for SSB_SPROM1_BINF_CCODE */
 enum {
-- 
cgit v1.2.3


From 7834ddbcc7a097443761b0722e8c9fb8511b95b1 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 11 Aug 2009 22:57:16 +0300
Subject: usbnet: add rx queue pausing

Add rx queue pausing to usbnet. This is needed by rndis_wlan so that it can
control rx queue and prevent received packets from being send forward before
rndis_wlan receives and handles 'media connect'-indication. Without this
establishing WPA connections is hard and fail often.

[v2] - removed unneeded use of skb_clone

Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/usb/usbnet.c          | 44 ++++++++++++++++++++++++++++++++++++++-
 drivers/net/wireless/rndis_wlan.c | 13 +++++++++++-
 include/linux/usb/usbnet.h        |  6 ++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index af1fe4696509..7d471fca2743 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -233,6 +233,11 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb)
 {
 	int	status;
 
+	if (test_bit(EVENT_RX_PAUSED, &dev->flags)) {
+		skb_queue_tail(&dev->rxq_pause, skb);
+		return;
+	}
+
 	skb->protocol = eth_type_trans (skb, dev->net);
 	dev->net->stats.rx_packets++;
 	dev->net->stats.rx_bytes += skb->len;
@@ -525,6 +530,41 @@ static void intr_complete (struct urb *urb)
 		deverr(dev, "intr resubmit --> %d", status);
 }
 
+/*-------------------------------------------------------------------------*/
+void usbnet_pause_rx(struct usbnet *dev)
+{
+	set_bit(EVENT_RX_PAUSED, &dev->flags);
+
+	if (netif_msg_rx_status(dev))
+		devdbg(dev, "paused rx queue enabled");
+}
+EXPORT_SYMBOL_GPL(usbnet_pause_rx);
+
+void usbnet_resume_rx(struct usbnet *dev)
+{
+	struct sk_buff *skb;
+	int num = 0;
+
+	clear_bit(EVENT_RX_PAUSED, &dev->flags);
+
+	while ((skb = skb_dequeue(&dev->rxq_pause)) != NULL) {
+		usbnet_skb_return(dev, skb);
+		num++;
+	}
+
+	tasklet_schedule(&dev->bh);
+
+	if (netif_msg_rx_status(dev))
+		devdbg(dev, "paused rx queue disabled, %d skbs requeued", num);
+}
+EXPORT_SYMBOL_GPL(usbnet_resume_rx);
+
+void usbnet_purge_paused_rxq(struct usbnet *dev)
+{
+	skb_queue_purge(&dev->rxq_pause);
+}
+EXPORT_SYMBOL_GPL(usbnet_purge_paused_rxq);
+
 /*-------------------------------------------------------------------------*/
 
 // unlink pending rx/tx; completion handlers do all other cleanup
@@ -623,6 +663,8 @@ int usbnet_stop (struct net_device *net)
 
 	usb_kill_urb(dev->interrupt);
 
+	usbnet_purge_paused_rxq(dev);
+
 	/* deferred work (task, timer, softirq) must also stop.
 	 * can't flush_scheduled_work() until we drop rtnl (later),
 	 * else workers could deadlock; so make workers a NOP.
@@ -1113,7 +1155,6 @@ static void usbnet_bh (unsigned long param)
 }
 
 
-
 /*-------------------------------------------------------------------------
  *
  * USB Device Driver support
@@ -1210,6 +1251,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	skb_queue_head_init (&dev->rxq);
 	skb_queue_head_init (&dev->txq);
 	skb_queue_head_init (&dev->done);
+	skb_queue_head_init(&dev->rxq_pause);
 	dev->bh.func = usbnet_bh;
 	dev->bh.data = (unsigned long) dev;
 	INIT_WORK (&dev->kevent, kevent);
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 828dc1825bba..d42692dfbc67 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -1764,8 +1764,15 @@ static int rndis_iw_set_essid(struct net_device *dev,
 
 	if (!wrqu->essid.flags || length == 0)
 		return disassociate(usbdev, 1);
-	else
+	else {
+		/* Pause and purge rx queue, so we don't pass packets before
+		 * 'media connect'-indication.
+		 */
+		usbnet_pause_rx(usbdev);
+		usbnet_purge_paused_rxq(usbdev);
+
 		return set_essid(usbdev, &ssid);
+	}
 }
 
 
@@ -2328,6 +2335,8 @@ get_bssid:
 			memcpy(evt.ap_addr.sa_data, bssid, ETH_ALEN);
 			wireless_send_event(usbdev->net, SIOCGIWAP, &evt, NULL);
 		}
+
+		usbnet_resume_rx(usbdev);
 	}
 
 	if (test_and_clear_bit(WORK_LINK_DOWN, &priv->work_pending)) {
@@ -2541,6 +2550,8 @@ static void rndis_wlan_indication(struct usbnet *usbdev, void *ind, int buflen)
 
 	switch (msg->status) {
 	case RNDIS_STATUS_MEDIA_CONNECT:
+		usbnet_pause_rx(usbdev);
+
 		devinfo(usbdev, "media connect");
 
 		/* queue work to avoid recursive calls into rndis_command */
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index de8b4b18961b..09514252d84e 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -53,6 +53,7 @@ struct usbnet {
 	struct sk_buff_head	rxq;
 	struct sk_buff_head	txq;
 	struct sk_buff_head	done;
+	struct sk_buff_head	rxq_pause;
 	struct urb		*interrupt;
 	struct tasklet_struct	bh;
 
@@ -63,6 +64,7 @@ struct usbnet {
 #		define EVENT_RX_MEMORY	2
 #		define EVENT_STS_SPLIT	3
 #		define EVENT_LINK_RESET	4
+#		define EVENT_RX_PAUSED	5
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -190,6 +192,10 @@ extern void usbnet_defer_kevent (struct usbnet *, int);
 extern void usbnet_skb_return (struct usbnet *, struct sk_buff *);
 extern void usbnet_unlink_rx_urbs(struct usbnet *);
 
+extern void usbnet_pause_rx(struct usbnet *);
+extern void usbnet_resume_rx(struct usbnet *);
+extern void usbnet_purge_paused_rxq(struct usbnet *);
+
 extern int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd);
 extern int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd);
 extern u32 usbnet_get_link (struct net_device *net);
-- 
cgit v1.2.3


From 16cb9d42b68b339852e8914f2538ca9a2aec616c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 12 Aug 2009 23:33:20 +0200
Subject: cfg80211: allow driver to override PS default

Sometimes drivers might have a good reason to override
the PS default, like iwlwifi right now where it affects
RX performance significantly at this point. This will
allow them to override the default, if desired, in a
way that users can still change it according to their
trade-off choices, not the driver's, like would happen
if the driver just disabled PS completely then.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 4 ++++
 net/wireless/core.c    | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 223913434e51..0b146bb2dd14 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1108,6 +1108,9 @@ struct cfg80211_ops {
  * @net: the network namespace this wiphy currently lives in
  * @netnsok: if set to false, do not allow changing the netns of this
  *	wiphy at all
+ * @ps_default: default for powersave, will be set depending on the
+ *	kernel's default on wiphy_new(), but can be changed by the
+ *	driver if it has a good reason to override the default
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1123,6 +1126,7 @@ struct wiphy {
 	bool disable_beacon_hints;
 
 	bool netnsok;
+	bool ps_default;
 
 	enum cfg80211_signal_type signal_type;
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 35d83bedfe5b..bc99e4ec7463 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -412,6 +412,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.dev.class = &ieee80211_class;
 	rdev->wiphy.dev.platform_data = rdev;
 
+	rdev->wiphy.ps_default = CONFIG_CFG80211_DEFAULT_PS_VALUE;
+
 	wiphy_net_set(&rdev->wiphy, &init_net);
 
 	rdev->rfkill_ops.set_block = cfg80211_rfkill_set_block;
@@ -674,7 +676,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		wdev->wext.default_key = -1;
 		wdev->wext.default_mgmt_key = -1;
 		wdev->wext.connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC;
-		wdev->wext.ps = CONFIG_CFG80211_DEFAULT_PS_VALUE;
+		wdev->wext.ps = wdev->wiphy->ps_default;
 		wdev->wext.ps_timeout = 100;
 		if (rdev->ops->set_power_mgmt)
 			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
-- 
cgit v1.2.3


From 9f162d2a810b4db48f7b8d7e734d0932c81ec2a1 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 14 Aug 2009 17:18:44 +0300
Subject: sunrpc: hton -> cpu_to_be*

htonl is already defined as cpu_to_be32.
cpu_to_be64 has architecture specific optimized implementations.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 5 ++---
 net/sunrpc/xdr.c           | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b99c625fddfe..f94bbdc75c4b 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -117,9 +117,8 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
 static inline __be32 *
 xdr_encode_hyper(__be32 *p, __u64 val)
 {
-	*p++ = htonl(val >> 32);
-	*p++ = htonl(val & 0xFFFFFFFF);
-	return p;
+	*(__be64 *)p = cpu_to_be64(val);
+	return p + 2;
 }
 
 static inline __be32 *
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 406e26de584e..0d05d2554b42 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
 	unsigned int	quadlen = XDR_QUADLEN(obj->len);
 
 	p[quadlen] = 0;		/* zero trailing bytes */
-	*p++ = htonl(obj->len);
+	*p++ = cpu_to_be32(obj->len);
 	memcpy(p, obj->data, obj->len);
 	return p + XDR_QUADLEN(obj->len);
 }
@@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed);
  */
 __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
 {
-	*p++ = htonl(nbytes);
+	*p++ = cpu_to_be32(nbytes);
 	return xdr_encode_opaque_fixed(p, ptr, nbytes);
 }
 EXPORT_SYMBOL_GPL(xdr_encode_opaque);
@@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word);
 int
 xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 {
-	__be32	raw = htonl(obj);
+	__be32	raw = cpu_to_be32(obj);
 
 	return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
 }
-- 
cgit v1.2.3


From 98866b5abe1513cdacc011874ca045d40002eccd Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Fri, 14 Aug 2009 17:18:49 +0300
Subject: sunrpc: ntoh -> be*_to_cpu

ntohl is already defined as be32_to_cpu.
be64_to_cpu has architecture specific optimized implementations.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 5 ++---
 net/sunrpc/xdr.c           | 6 +++---
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f94bbdc75c4b..7da466ba4b0d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -124,9 +124,8 @@ xdr_encode_hyper(__be32 *p, __u64 val)
 static inline __be32 *
 xdr_decode_hyper(__be32 *p, __u64 *valp)
 {
-	*valp  = ((__u64) ntohl(*p++)) << 32;
-	*valp |= ntohl(*p++);
-	return p;
+	*valp = be64_to_cpup((__be64 *)p);
+	return p + 2;
 }
 
 /*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0d05d2554b42..8bd690c48b69 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
 {
 	unsigned int	len;
 
-	if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ)
+	if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ)
 		return NULL;
 	obj->len  = len;
 	obj->data = (u8 *) p;
@@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
 {
 	u32 len;
 
-	len = ntohl(*p++);
+	len = be32_to_cpu(*p++);
 	if (len > maxlen)
 		return NULL;
 	*lenp = len;
@@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
 	status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
 	if (status)
 		return status;
-	*obj = ntohl(raw);
+	*obj = be32_to_cpu(raw);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xdr_decode_word);
-- 
cgit v1.2.3


From bb2af4f54ffa8245d5ce278cae9c66198bc14d8b Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Fri, 14 Aug 2009 12:41:57 +0000
Subject: net: Add NETIF_F_FCOE_MTU to indicate support for a different MTU for
 FCoE

Add NETIF_F_FCOE_MTU to indicate that the NIC can support a secondary MTU for
converged traffic of LAN and Fiber Channel over Ethernet (FCoE). The MTU for
FCoE is 2158 = 14 (FCoE header) + 24 (FC header) + 2112 (FC max payload) +
4 (FC CRC) + 4 (FCoE trailer).

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f25ab2899de..9192cdf5bd2c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -701,6 +701,7 @@ struct net_device
 /* the GSO_MASK reserves bits 16 through 23 */
 #define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
 #define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
+#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
-- 
cgit v1.2.3


From 31089c13bcb18d2cd2a3ddfbe3a28666346f237e Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Fri, 14 Aug 2009 15:47:18 +0200
Subject: timekeeping: Introduce timekeeping_leap_insert

Move the adjustment of xtime, wall_to_monotonic and the update of the
vsyscall variables to the timekeeping code.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
LKML-Reference: <20090814134807.609730216@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      | 1 +
 kernel/time/ntp.c         | 7 ++-----
 kernel/time/timekeeping.c | 7 +++++++
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index ea16c1a01d51..e7c844558884 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -147,6 +147,7 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern void update_wall_time(void);
 extern void update_xtime_cache(u64 nsec);
+extern void timekeeping_leap_insert(int leapsecond);
 
 struct tms;
 extern void do_sys_times(struct tms *);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7fc64375ff43..4800f933910e 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -194,8 +194,7 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 	case TIME_OK:
 		break;
 	case TIME_INS:
-		xtime.tv_sec--;
-		wall_to_monotonic.tv_sec++;
+		timekeeping_leap_insert(-1);
 		time_state = TIME_OOP;
 		printk(KERN_NOTICE
 			"Clock: inserting leap second 23:59:60 UTC\n");
@@ -203,9 +202,8 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 		res = HRTIMER_RESTART;
 		break;
 	case TIME_DEL:
-		xtime.tv_sec++;
+		timekeeping_leap_insert(1);
 		time_tai--;
-		wall_to_monotonic.tv_sec--;
 		time_state = TIME_WAIT;
 		printk(KERN_NOTICE
 			"Clock: deleting leap second 23:59:59 UTC\n");
@@ -219,7 +217,6 @@ static enum hrtimer_restart ntp_leap_second(struct hrtimer *timer)
 			time_state = TIME_OK;
 		break;
 	}
-	update_vsyscall(&xtime, clock);
 
 	write_sequnlock(&xtime_lock);
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 02c0b2c9c674..b8b70fb545fc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -58,6 +58,13 @@ void update_xtime_cache(u64 nsec)
 
 struct clocksource *clock;
 
+/* must hold xtime_lock */
+void timekeeping_leap_insert(int leapsecond)
+{
+	xtime.tv_sec += leapsecond;
+	wall_to_monotonic.tv_sec -= leapsecond;
+	update_vsyscall(&xtime, clock);
+}
 
 #ifdef CONFIG_GENERIC_TIME
 /**
-- 
cgit v1.2.3


From a0f7d48bfb95a4c5172a2756dbc4b82afc8e9ae4 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:19 +0200
Subject: timekeeping: Remove clocksource inline functions

The three inline functions clocksource_read, clocksource_enable and
clocksource_disable are simple wrappers of an indirect call plus the
copy from and to the mult_orig value. The functions are exclusively
used by the timekeeping code which has intimate knowledge of the
clocksource anyway. Therefore remove the inline functions. No
functional change.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134807.903108946@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 58 ---------------------------------------------
 kernel/time/timekeeping.c   | 41 ++++++++++++++++++++++----------
 2 files changed, 28 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 1219be4fb42e..a1ef46f61c81 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -267,64 +267,6 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 	return (u32)tmp;
 }
 
-/**
- * clocksource_read: - Access the clocksource's current cycle value
- * @cs:		pointer to clocksource being read
- *
- * Uses the clocksource to return the current cycle_t value
- */
-static inline cycle_t clocksource_read(struct clocksource *cs)
-{
-	return cs->read(cs);
-}
-
-/**
- * clocksource_enable: - enable clocksource
- * @cs:		pointer to clocksource
- *
- * Enables the specified clocksource. The clocksource callback
- * function should start up the hardware and setup mult and field
- * members of struct clocksource to reflect hardware capabilities.
- */
-static inline int clocksource_enable(struct clocksource *cs)
-{
-	int ret = 0;
-
-	if (cs->enable)
-		ret = cs->enable(cs);
-
-	/*
-	 * The frequency may have changed while the clocksource
-	 * was disabled. If so the code in ->enable() must update
-	 * the mult value to reflect the new frequency. Make sure
-	 * mult_orig follows this change.
-	 */
-	cs->mult_orig = cs->mult;
-
-	return ret;
-}
-
-/**
- * clocksource_disable: - disable clocksource
- * @cs:		pointer to clocksource
- *
- * Disables the specified clocksource. The clocksource callback
- * function should power down the now unused hardware block to
- * save power.
- */
-static inline void clocksource_disable(struct clocksource *cs)
-{
-	/*
-	 * Save mult_orig in mult so clocksource_enable() can
-	 * restore the value regardless if ->enable() updates
-	 * the value of mult or not.
-	 */
-	cs->mult = cs->mult_orig;
-
-	if (cs->disable)
-		cs->disable(cs);
-}
-
 /**
  * cyc2ns - converts clocksource cycles to nanoseconds
  * @cs:		Pointer to clocksource
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b8b70fb545fc..016a2591d719 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -79,7 +79,7 @@ static void clocksource_forward_now(void)
 	cycle_t cycle_now, cycle_delta;
 	s64 nsec;
 
-	cycle_now = clocksource_read(clock);
+	cycle_now = clock->read(clock);
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
@@ -114,7 +114,7 @@ void getnstimeofday(struct timespec *ts)
 		*ts = xtime;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -146,7 +146,7 @@ ktime_t ktime_get(void)
 		nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -186,7 +186,7 @@ void ktime_get_ts(struct timespec *ts)
 		tomono = wall_to_monotonic;
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -274,16 +274,29 @@ static void change_clocksource(void)
 
 	clocksource_forward_now();
 
-	if (clocksource_enable(new))
+	if (new->enable && !new->enable(new))
 		return;
+	/*
+	 * The frequency may have changed while the clocksource
+	 * was disabled. If so the code in ->enable() must update
+	 * the mult value to reflect the new frequency. Make sure
+	 * mult_orig follows this change.
+	 */
+	new->mult_orig = new->mult;
 
 	new->raw_time = clock->raw_time;
 	old = clock;
 	clock = new;
-	clocksource_disable(old);
+	/*
+	 * Save mult_orig in mult so that the value can be restored
+	 * regardless if ->enable() updates the value of mult or not.
+	 */
+	old->mult = old->mult_orig;
+	if (old->disable)
+		old->disable(old);
 
 	clock->cycle_last = 0;
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 	clock->error = 0;
 	clock->xtime_nsec = 0;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -373,7 +386,7 @@ void getrawmonotonic(struct timespec *ts)
 		seq = read_seqbegin(&xtime_lock);
 
 		/* read clocksource: */
-		cycle_now = clocksource_read(clock);
+		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
@@ -435,9 +448,12 @@ void __init timekeeping_init(void)
 	ntp_init();
 
 	clock = clocksource_get_next();
-	clocksource_enable(clock);
+	if (clock->enable)
+		clock->enable(clock);
+	/* set mult_orig on enable */
+	clock->mult_orig = clock->mult;
 	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 
 	xtime.tv_sec = sec;
 	xtime.tv_nsec = 0;
@@ -477,8 +493,7 @@ static int timekeeping_resume(struct sys_device *dev)
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
-	clock->cycle_last = 0;
-	clock->cycle_last = clocksource_read(clock);
+	clock->cycle_last = clock->read(clock);
 	clock->error = 0;
 	timekeeping_suspended = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -630,7 +645,7 @@ void update_wall_time(void)
 		return;
 
 #ifdef CONFIG_GENERIC_TIME
-	offset = (clocksource_read(clock) - clock->cycle_last) & clock->mask;
+	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #else
 	offset = clock->cycle_interval;
 #endif
-- 
cgit v1.2.3


From f1b82746c1e93daf24e1ab9bfbd39bcdb2e7018b Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:21 +0200
Subject: clocksource: Cleanup clocksource selection

If a non high-resolution clocksource is first set as override clock
and then registered it becomes active even if the system is in one-shot
mode. Move the override check from sysfs_override_clocksource to the
clocksource selection. That fixes the bug and simplifies the code. The
check in clocksource_register for double registration of the same
clocksource is removed without replacement.

To find the initial clocksource a new weak function in jiffies.c is
defined that returns the jiffies clocksource. The architecture code
can then override the weak function with a more suitable clocksource,
e.g. the TOD clock on s390.

[ tglx: Folded in a fix from John Stultz ]

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134808.388024160@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c     |   4 ++
 include/linux/clocksource.h |   2 +
 kernel/time/clocksource.c   | 134 +++++++++++++++++---------------------------
 kernel/time/jiffies.c       |   6 +-
 kernel/time/timekeeping.c   |   4 +-
 5 files changed, 64 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index d4c8e9c47c81..afefe514df0f 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -205,6 +205,10 @@ static struct clocksource clocksource_tod = {
 	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
+struct clocksource * __init clocksource_default_clock(void)
+{
+	return &clocksource_tod;
+}
 
 void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 {
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a1ef46f61c81..f263b3abf46e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -14,6 +14,7 @@
 #include <linux/list.h>
 #include <linux/cache.h>
 #include <linux/timer.h>
+#include <linux/init.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 
@@ -322,6 +323,7 @@ extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_resume(void);
+extern struct clocksource * __init __weak clocksource_default_clock(void);
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7466cb811251..e91662e87cde 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -21,7 +21,6 @@
  *
  * TODO WishList:
  *   o Allow clocksource drivers to be unregistered
- *   o get rid of clocksource_jiffies extern
  */
 
 #include <linux/clocksource.h>
@@ -107,12 +106,9 @@ u64 timecounter_cyc2time(struct timecounter *tc,
 }
 EXPORT_SYMBOL(timecounter_cyc2time);
 
-/* XXX - Would like a better way for initializing curr_clocksource */
-extern struct clocksource clocksource_jiffies;
-
 /*[Clocksource internal variables]---------
  * curr_clocksource:
- *	currently selected clocksource. Initialized to clocksource_jiffies.
+ *	currently selected clocksource.
  * next_clocksource:
  *	pending next selected clocksource.
  * clocksource_list:
@@ -123,9 +119,8 @@ extern struct clocksource clocksource_jiffies;
  * override_name:
  *	Name of the user-specified clocksource.
  */
-static struct clocksource *curr_clocksource = &clocksource_jiffies;
+static struct clocksource *curr_clocksource;
 static struct clocksource *next_clocksource;
-static struct clocksource *clocksource_override;
 static LIST_HEAD(clocksource_list);
 static DEFINE_SPINLOCK(clocksource_lock);
 static char override_name[32];
@@ -320,6 +315,7 @@ void clocksource_touch_watchdog(void)
 	clocksource_resume_watchdog();
 }
 
+#ifdef CONFIG_GENERIC_TIME
 /**
  * clocksource_get_next - Returns the selected clocksource
  *
@@ -339,56 +335,65 @@ struct clocksource *clocksource_get_next(void)
 }
 
 /**
- * select_clocksource - Selects the best registered clocksource.
+ * clocksource_select - Select the best clocksource available
  *
  * Private function. Must hold clocksource_lock when called.
  *
  * Select the clocksource with the best rating, or the clocksource,
  * which is selected by userspace override.
  */
-static struct clocksource *select_clocksource(void)
+static void clocksource_select(void)
 {
-	struct clocksource *next;
+	struct clocksource *best, *cs;
 
 	if (list_empty(&clocksource_list))
-		return NULL;
+		return;
+	/* First clocksource on the list has the best rating. */
+	best = list_first_entry(&clocksource_list, struct clocksource, list);
+	/* Check for the override clocksource. */
+	list_for_each_entry(cs, &clocksource_list, list) {
+		if (strcmp(cs->name, override_name) != 0)
+			continue;
+		/*
+		 * Check to make sure we don't switch to a non-highres
+		 * capable clocksource if the tick code is in oneshot
+		 * mode (highres or nohz)
+		 */
+		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
+		    tick_oneshot_mode_active()) {
+			/* Override clocksource cannot be used. */
+			printk(KERN_WARNING "Override clocksource %s is not "
+			       "HRT compatible. Cannot switch while in "
+			       "HRT/NOHZ mode\n", cs->name);
+			override_name[0] = 0;
+		} else
+			/* Override clocksource can be used. */
+			best = cs;
+		break;
+	}
+	if (curr_clocksource != best)
+		next_clocksource = best;
+}
 
-	if (clocksource_override)
-		next = clocksource_override;
-	else
-		next = list_entry(clocksource_list.next, struct clocksource,
-				  list);
+#else /* CONFIG_GENERIC_TIME */
 
-	if (next == curr_clocksource)
-		return NULL;
+static void clocksource_select(void) { }
 
-	return next;
-}
+#endif
 
 /*
  * Enqueue the clocksource sorted by rating
  */
-static int clocksource_enqueue(struct clocksource *c)
+static void clocksource_enqueue(struct clocksource *cs)
 {
-	struct list_head *tmp, *entry = &clocksource_list;
-
-	list_for_each(tmp, &clocksource_list) {
-		struct clocksource *cs;
+	struct list_head *entry = &clocksource_list;
+	struct clocksource *tmp;
 
-		cs = list_entry(tmp, struct clocksource, list);
-		if (cs == c)
-			return -EBUSY;
+	list_for_each_entry(tmp, &clocksource_list, list)
 		/* Keep track of the place, where to insert */
-		if (cs->rating >= c->rating)
-			entry = tmp;
-	}
-	list_add(&c->list, entry);
-
-	if (strlen(c->name) == strlen(override_name) &&
-	    !strcmp(c->name, override_name))
-		clocksource_override = c;
-
-	return 0;
+		if (tmp->rating >= cs->rating)
+			entry = &tmp->list;
+	list_add(&cs->list, entry);
 }
 
 /**
@@ -397,19 +402,16 @@ static int clocksource_enqueue(struct clocksource *c)
  *
  * Returns -EBUSY if registration fails, zero otherwise.
  */
-int clocksource_register(struct clocksource *c)
+int clocksource_register(struct clocksource *cs)
 {
 	unsigned long flags;
-	int ret;
 
 	spin_lock_irqsave(&clocksource_lock, flags);
-	ret = clocksource_enqueue(c);
-	if (!ret)
-		next_clocksource = select_clocksource();
+	clocksource_enqueue(cs);
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
-	if (!ret)
-		clocksource_check_watchdog(c);
-	return ret;
+	clocksource_check_watchdog(cs);
+	return 0;
 }
 EXPORT_SYMBOL(clocksource_register);
 
@@ -425,7 +427,7 @@ void clocksource_change_rating(struct clocksource *cs, int rating)
 	list_del(&cs->list);
 	cs->rating = rating;
 	clocksource_enqueue(cs);
-	next_clocksource = select_clocksource();
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
@@ -438,9 +440,7 @@ void clocksource_unregister(struct clocksource *cs)
 
 	spin_lock_irqsave(&clocksource_lock, flags);
 	list_del(&cs->list);
-	if (clocksource_override == cs)
-		clocksource_override = NULL;
-	next_clocksource = select_clocksource();
+	clocksource_select();
 	spin_unlock_irqrestore(&clocksource_lock, flags);
 }
 
@@ -478,9 +478,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 					  struct sysdev_attribute *attr,
 					  const char *buf, size_t count)
 {
-	struct clocksource *ovr = NULL;
 	size_t ret = count;
-	int len;
 
 	/* strings from sysfs write are not 0 terminated! */
 	if (count >= sizeof(override_name))
@@ -495,37 +493,7 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	if (count > 0)
 		memcpy(override_name, buf, count);
 	override_name[count] = 0;
-
-	len = strlen(override_name);
-	if (len) {
-		struct clocksource *cs;
-
-		ovr = clocksource_override;
-		/* try to select it: */
-		list_for_each_entry(cs, &clocksource_list, list) {
-			if (strlen(cs->name) == len &&
-			    !strcmp(cs->name, override_name))
-				ovr = cs;
-		}
-	}
-
-	/*
-	 * Check to make sure we don't switch to a non-highres capable
-	 * clocksource if the tick code is in oneshot mode (highres or nohz)
-	 */
-	if (tick_oneshot_mode_active() && ovr &&
-	    !(ovr->flags & CLOCK_SOURCE_VALID_FOR_HRES)) {
-		printk(KERN_WARNING "%s clocksource is not HRT compatible. "
-			"Cannot switch while in HRT/NOHZ mode\n", ovr->name);
-		ovr = NULL;
-		override_name[0] = 0;
-	}
-
-	/* Reselect, when the override name has changed */
-	if (ovr != clocksource_override) {
-		clocksource_override = ovr;
-		next_clocksource = select_clocksource();
-	}
+	clocksource_select();
 
 	spin_unlock_irq(&clocksource_lock);
 
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index c3f6c30816e3..5404a8456909 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -61,7 +61,6 @@ struct clocksource clocksource_jiffies = {
 	.read		= jiffies_read,
 	.mask		= 0xffffffff, /*32bits*/
 	.mult		= NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
-	.mult_orig	= NSEC_PER_JIFFY << JIFFIES_SHIFT,
 	.shift		= JIFFIES_SHIFT,
 };
 
@@ -71,3 +70,8 @@ static int __init init_jiffies_clocksource(void)
 }
 
 core_initcall(init_jiffies_clocksource);
+
+struct clocksource * __init __weak clocksource_default_clock(void)
+{
+	return &clocksource_jiffies;
+}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index b5673016089f..325a9b63265a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -269,7 +269,7 @@ static void change_clocksource(void)
 
 	new = clocksource_get_next();
 
-	if (clock == new)
+	if (!new || clock == new)
 		return;
 
 	clocksource_forward_now();
@@ -446,7 +446,7 @@ void __init timekeeping_init(void)
 
 	ntp_init();
 
-	clock = clocksource_get_next();
+	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
 	/* set mult_orig on enable */
-- 
cgit v1.2.3


From c55c87c892c1875deace0c8fc28787335277fdf2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:25 +0200
Subject: clocksource: Move watchdog downgrade to a work queue thread

Move the downgrade of an unstable clocksource from the timer interrupt
context into the process context of a work queue thread. This is
needed to be able to do the clocksource switch with stop_machine.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134809.354926067@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 56 +++++++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index f263b3abf46e..19ad43af62d0 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -213,6 +213,7 @@ extern struct clocksource *clock;	/* current clocksource */
 
 #define CLOCK_SOURCE_WATCHDOG			0x10
 #define CLOCK_SOURCE_VALID_FOR_HRES		0x20
+#define CLOCK_SOURCE_UNSTABLE			0x40
 
 /* simplify initialization of mask field */
 #define CLOCKSOURCE_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 56aaa749645d..f1508019bfb4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -143,10 +143,13 @@ fs_initcall(clocksource_done_booting);
 static LIST_HEAD(watchdog_list);
 static struct clocksource *watchdog;
 static struct timer_list watchdog_timer;
+static struct work_struct watchdog_work;
 static DEFINE_SPINLOCK(watchdog_lock);
 static cycle_t watchdog_last;
 static int watchdog_running;
 
+static void clocksource_watchdog_work(struct work_struct *work);
+
 /*
  * Interval: 0.5sec Threshold: 0.0625s
  */
@@ -158,15 +161,16 @@ static void clocksource_unstable(struct clocksource *cs, int64_t delta)
 	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
 	       cs->name, delta);
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
-	clocksource_change_rating(cs, 0);
-	list_del(&cs->wd_list);
+	cs->flags |= CLOCK_SOURCE_UNSTABLE;
+	schedule_work(&watchdog_work);
 }
 
 static void clocksource_watchdog(unsigned long data)
 {
-	struct clocksource *cs, *tmp;
+	struct clocksource *cs;
 	cycle_t csnow, wdnow;
 	int64_t wd_nsec, cs_nsec;
+	int next_cpu;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -176,7 +180,12 @@ static void clocksource_watchdog(unsigned long data)
 	wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
 	watchdog_last = wdnow;
 
-	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
+	list_for_each_entry(cs, &watchdog_list, wd_list) {
+
+		/* Clocksource already marked unstable? */
+		if (cs->flags & CLOCK_SOURCE_UNSTABLE)
+			continue;
+
 		csnow = cs->read(cs);
 
 		/* Clocksource initialized ? */
@@ -207,19 +216,15 @@ static void clocksource_watchdog(unsigned long data)
 		}
 	}
 
-	if (!list_empty(&watchdog_list)) {
-		/*
-		 * Cycle through CPUs to check if the CPUs stay
-		 * synchronized to each other.
-		 */
-		int next_cpu = cpumask_next(raw_smp_processor_id(),
-					    cpu_online_mask);
-
-		if (next_cpu >= nr_cpu_ids)
-			next_cpu = cpumask_first(cpu_online_mask);
-		watchdog_timer.expires += WATCHDOG_INTERVAL;
-		add_timer_on(&watchdog_timer, next_cpu);
-	}
+	/*
+	 * Cycle through CPUs to check if the CPUs stay synchronized
+	 * to each other.
+	 */
+	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(cpu_online_mask);
+	watchdog_timer.expires += WATCHDOG_INTERVAL;
+	add_timer_on(&watchdog_timer, next_cpu);
 out:
 	spin_unlock(&watchdog_lock);
 }
@@ -228,6 +233,7 @@ static inline void clocksource_start_watchdog(void)
 {
 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
 		return;
+	INIT_WORK(&watchdog_work, clocksource_watchdog_work);
 	init_timer(&watchdog_timer);
 	watchdog_timer.function = clocksource_watchdog;
 	watchdog_last = watchdog->read(watchdog);
@@ -313,6 +319,22 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static void clocksource_watchdog_work(struct work_struct *work)
+{
+	struct clocksource *cs, *tmp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&watchdog_lock, flags);
+	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list)
+		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
+			list_del_init(&cs->wd_list);
+			clocksource_change_rating(cs, 0);
+		}
+	/* Check if the watchdog timer needs to be stopped. */
+	clocksource_stop_watchdog();
+	spin_unlock(&watchdog_lock);
+}
+
 #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
 
 static void clocksource_enqueue_watchdog(struct clocksource *cs)
-- 
cgit v1.2.3


From 155ec60226ae0ae2aadaa57c951a58a359331030 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:26 +0200
Subject: timekeeping: Introduce struct timekeeper

Add struct timekeeper to keep the internal values timekeeping.c needs
in regard to the currently selected clock source. This moves the
timekeeping intervals, xtime_nsec and the ntp error value from struct
clocksource to struct timekeeper. The raw_time is removed from the
clocksource as well. It gets treated like xtime as a global variable.
Eventually xtime raw_time should be moved to struct timekeeper.

[ tglx: minor cleanup ]

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134809.613209842@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c     |   1 -
 include/linux/clocksource.h |  54 +---------
 kernel/time/clocksource.c   |   6 +-
 kernel/time/timekeeping.c   | 235 +++++++++++++++++++++++++++++---------------
 4 files changed, 164 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index afefe514df0f..e76c2e7a8b9a 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -280,7 +280,6 @@ void __init time_init(void)
 	now = get_clock();
 	tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
 	clocksource_tod.cycle_last = now;
-	clocksource_tod.raw_time = xtime;
 	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
 	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
 	write_sequnlock_irqrestore(&xtime_lock, flags);
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 19ad43af62d0..e12e3095e2fb 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -155,8 +155,6 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
  * @resume:		resume function for the clocksource, if necessary
- * @cycle_interval:	Used internally by timekeeping core, please ignore.
- * @xtime_interval:	Used internally by timekeeping core, please ignore.
  */
 struct clocksource {
 	/*
@@ -182,19 +180,12 @@ struct clocksource {
 #define CLKSRC_FSYS_MMIO_SET(mmio, addr)      do { } while (0)
 #endif
 
-	/* timekeeping specific data, ignore */
-	cycle_t cycle_interval;
-	u64	xtime_interval;
-	u32	raw_interval;
 	/*
 	 * Second part is written at each timer interrupt
 	 * Keep it in a different cache line to dirty no
 	 * more than one cache line.
 	 */
 	cycle_t cycle_last ____cacheline_aligned_in_smp;
-	u64 xtime_nsec;
-	s64 error;
-	struct timespec raw_time;
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 	/* Watchdog related data, used by the framework */
@@ -203,8 +194,6 @@ struct clocksource {
 #endif
 };
 
-extern struct clocksource *clock;	/* current clocksource */
-
 /*
  * Clock source flags bits::
  */
@@ -270,50 +259,15 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 shift_constant)
 }
 
 /**
- * cyc2ns - converts clocksource cycles to nanoseconds
- * @cs:		Pointer to clocksource
- * @cycles:	Cycles
+ * clocksource_cyc2ns - converts clocksource cycles to nanoseconds
  *
- * Uses the clocksource and ntp ajdustment to convert cycle_ts to nanoseconds.
+ * Converts cycles to nanoseconds, using the given mult and shift.
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-static inline s64 cyc2ns(struct clocksource *cs, cycle_t cycles)
+static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 {
-	u64 ret = (u64)cycles;
-	ret = (ret * cs->mult) >> cs->shift;
-	return ret;
-}
-
-/**
- * clocksource_calculate_interval - Calculates a clocksource interval struct
- *
- * @c:		Pointer to clocksource.
- * @length_nsec: Desired interval length in nanoseconds.
- *
- * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
- * pair and interval request.
- *
- * Unless you're the timekeeping code, you should not be using this!
- */
-static inline void clocksource_calculate_interval(struct clocksource *c,
-					  	  unsigned long length_nsec)
-{
-	u64 tmp;
-
-	/* Do the ns -> cycle conversion first, using original mult */
-	tmp = length_nsec;
-	tmp <<= c->shift;
-	tmp += c->mult_orig/2;
-	do_div(tmp, c->mult_orig);
-
-	c->cycle_interval = (cycle_t)tmp;
-	if (c->cycle_interval == 0)
-		c->cycle_interval = 1;
-
-	/* Go back from cycles -> shifted ns, this time use ntp adjused mult */
-	c->xtime_interval = (u64)c->cycle_interval * c->mult;
-	c->raw_interval = ((u64)c->cycle_interval * c->mult_orig) >> c->shift;
+	return ((u64) cycles * mult) >> shift;
 }
 
 
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f1508019bfb4..f18c9a6bdcf4 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -177,7 +177,8 @@ static void clocksource_watchdog(unsigned long data)
 		goto out;
 
 	wdnow = watchdog->read(watchdog);
-	wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
+	wd_nsec = clocksource_cyc2ns((wdnow - watchdog_last) & watchdog->mask,
+				     watchdog->mult, watchdog->shift);
 	watchdog_last = wdnow;
 
 	list_for_each_entry(cs, &watchdog_list, wd_list) {
@@ -196,7 +197,8 @@ static void clocksource_watchdog(unsigned long data)
 		}
 
 		/* Check the deviation from the watchdog clocksource. */
-		cs_nsec = cyc2ns(cs, (csnow - cs->wd_last) & cs->mask);
+		cs_nsec = clocksource_cyc2ns((csnow - cs->wd_last) &
+					     cs->mask, cs->mult, cs->shift);
 		cs->wd_last = csnow;
 		if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
 			clocksource_unstable(cs, cs_nsec - wd_nsec);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 325a9b63265a..7af45cbf6b13 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -19,6 +19,65 @@
 #include <linux/time.h>
 #include <linux/tick.h>
 
+/* Structure holding internal timekeeping values. */
+struct timekeeper {
+	/* Current clocksource used for timekeeping. */
+	struct clocksource *clock;
+
+	/* Number of clock cycles in one NTP interval. */
+	cycle_t cycle_interval;
+	/* Number of clock shifted nano seconds in one NTP interval. */
+	u64	xtime_interval;
+	/* Raw nano seconds accumulated per NTP interval. */
+	u32	raw_interval;
+
+	/* Clock shifted nano seconds remainder not stored in xtime.tv_nsec. */
+	u64	xtime_nsec;
+	/* Difference between accumulated time and NTP time in ntp
+	 * shifted nano seconds. */
+	s64	ntp_error;
+};
+
+struct timekeeper timekeeper;
+
+/**
+ * timekeeper_setup_internals - Set up internals to use clocksource clock.
+ *
+ * @clock:		Pointer to clocksource.
+ *
+ * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
+ * pair and interval request.
+ *
+ * Unless you're the timekeeping code, you should not be using this!
+ */
+static void timekeeper_setup_internals(struct clocksource *clock)
+{
+	cycle_t interval;
+	u64 tmp;
+
+	timekeeper.clock = clock;
+	clock->cycle_last = clock->read(clock);
+
+	/* Do the ns -> cycle conversion first, using original mult */
+	tmp = NTP_INTERVAL_LENGTH;
+	tmp <<= clock->shift;
+	tmp += clock->mult_orig/2;
+	do_div(tmp, clock->mult_orig);
+	if (tmp == 0)
+		tmp = 1;
+
+	interval = (cycle_t) tmp;
+	timekeeper.cycle_interval = interval;
+
+	/* Go back from cycles -> shifted ns */
+	timekeeper.xtime_interval = (u64) interval * clock->mult;
+	timekeeper.raw_interval =
+		((u64) interval * clock->mult_orig) >> clock->shift;
+
+	timekeeper.xtime_nsec = 0;
+
+	timekeeper.ntp_error = 0;
+}
 
 /*
  * This read-write spinlock protects us from races in SMP while
@@ -46,6 +105,11 @@ struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
 static unsigned long total_sleep_time;		/* seconds */
 
+/*
+ * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
+ */
+struct timespec raw_time;
+
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
@@ -56,42 +120,42 @@ void update_xtime_cache(u64 nsec)
 	timespec_add_ns(&xtime_cache, nsec);
 }
 
-struct clocksource *clock;
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
 	xtime.tv_sec += leapsecond;
 	wall_to_monotonic.tv_sec -= leapsecond;
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 }
 
 #ifdef CONFIG_GENERIC_TIME
 /**
- * clocksource_forward_now - update clock to the current time
+ * timekeeping_forward_now - update clock to the current time
  *
  * Forward the current clock to update its state since the last call to
  * update_wall_time(). This is useful before significant clock changes,
  * as it avoids having to deal with this time offset explicitly.
  */
-static void clocksource_forward_now(void)
+static void timekeeping_forward_now(void)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	s64 nsec;
 
+	clock = timekeeper.clock;
 	cycle_now = clock->read(clock);
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
-	nsec = cyc2ns(clock, cycle_delta);
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 
 	/* If arch requires, add in gettimeoffset() */
 	nsec += arch_gettimeoffset();
 
 	timespec_add_ns(&xtime, nsec);
 
-	nsec = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
-	clock->raw_time.tv_nsec += nsec;
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift);
+	timespec_add_ns(&raw_time, nsec);
 }
 
 /**
@@ -103,6 +167,7 @@ static void clocksource_forward_now(void)
 void getnstimeofday(struct timespec *ts)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	unsigned long seq;
 	s64 nsecs;
 
@@ -114,13 +179,15 @@ void getnstimeofday(struct timespec *ts)
 		*ts = xtime;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = cyc2ns(clock, cycle_delta);
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
+					   clock->shift);
 
 		/* If arch requires, add in gettimeoffset() */
 		nsecs += arch_gettimeoffset();
@@ -135,6 +202,7 @@ EXPORT_SYMBOL(getnstimeofday);
 ktime_t ktime_get(void)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	unsigned int seq;
 	s64 secs, nsecs;
 
@@ -146,13 +214,15 @@ ktime_t ktime_get(void)
 		nsecs = xtime.tv_nsec + wall_to_monotonic.tv_nsec;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs += cyc2ns(clock, cycle_delta);
+		nsecs += clocksource_cyc2ns(cycle_delta, clock->mult,
+					    clock->shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 	/*
@@ -174,6 +244,7 @@ EXPORT_SYMBOL_GPL(ktime_get);
 void ktime_get_ts(struct timespec *ts)
 {
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 	struct timespec tomono;
 	unsigned int seq;
 	s64 nsecs;
@@ -186,13 +257,15 @@ void ktime_get_ts(struct timespec *ts)
 		tomono = wall_to_monotonic;
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = cyc2ns(clock, cycle_delta);
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
+					   clock->shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -233,7 +306,7 @@ int do_settimeofday(struct timespec *tv)
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	clocksource_forward_now();
+	timekeeping_forward_now();
 
 	ts_delta.tv_sec = tv->tv_sec - xtime.tv_sec;
 	ts_delta.tv_nsec = tv->tv_nsec - xtime.tv_nsec;
@@ -243,10 +316,10 @@ int do_settimeofday(struct timespec *tv)
 
 	update_xtime_cache(0);
 
-	clock->error = 0;
+	timekeeper.ntp_error = 0;
 	ntp_clear();
 
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -269,10 +342,10 @@ static void change_clocksource(void)
 
 	new = clocksource_get_next();
 
-	if (!new || clock == new)
+	if (!new || timekeeper.clock == new)
 		return;
 
-	clocksource_forward_now();
+	timekeeping_forward_now();
 
 	if (new->enable && !new->enable(new))
 		return;
@@ -284,9 +357,9 @@ static void change_clocksource(void)
 	 */
 	new->mult_orig = new->mult;
 
-	new->raw_time = clock->raw_time;
-	old = clock;
-	clock = new;
+	old = timekeeper.clock;
+	timekeeper_setup_internals(new);
+
 	/*
 	 * Save mult_orig in mult so that the value can be restored
 	 * regardless if ->enable() updates the value of mult or not.
@@ -295,22 +368,10 @@ static void change_clocksource(void)
 	if (old->disable)
 		old->disable(old);
 
-	clock->cycle_last = clock->read(clock);
-	clock->error = 0;
-	clock->xtime_nsec = 0;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-
 	tick_clock_notify();
-
-	/*
-	 * We're holding xtime lock and waking up klogd would deadlock
-	 * us on enqueue.  So no printing!
-	printk(KERN_INFO "Time: %s clocksource has been installed.\n",
-	       clock->name);
-	 */
 }
 #else /* GENERIC_TIME */
-static inline void clocksource_forward_now(void) { }
+static inline void timekeeping_forward_now(void) { }
 static inline void change_clocksource(void) { }
 
 /**
@@ -380,20 +441,23 @@ void getrawmonotonic(struct timespec *ts)
 	unsigned long seq;
 	s64 nsecs;
 	cycle_t cycle_now, cycle_delta;
+	struct clocksource *clock;
 
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
 		/* read clocksource: */
+		clock = timekeeper.clock;
 		cycle_now = clock->read(clock);
 
 		/* calculate the delta since the last update_wall_time: */
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = ((s64)cycle_delta * clock->mult_orig) >> clock->shift;
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig,
+					   clock->shift);
 
-		*ts = clock->raw_time;
+		*ts = raw_time;
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -413,7 +477,7 @@ int timekeeping_valid_for_hres(void)
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
-		ret = clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+		ret = timekeeper.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -439,6 +503,7 @@ unsigned long __attribute__((weak)) read_persistent_clock(void)
  */
 void __init timekeeping_init(void)
 {
+	struct clocksource *clock;
 	unsigned long flags;
 	unsigned long sec = read_persistent_clock();
 
@@ -451,11 +516,13 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	/* set mult_orig on enable */
 	clock->mult_orig = clock->mult;
-	clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
-	clock->cycle_last = clock->read(clock);
+
+	timekeeper_setup_internals(clock);
 
 	xtime.tv_sec = sec;
 	xtime.tv_nsec = 0;
+	raw_time.tv_sec = 0;
+	raw_time.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	update_xtime_cache(0);
@@ -492,8 +559,8 @@ static int timekeeping_resume(struct sys_device *dev)
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
-	clock->cycle_last = clock->read(clock);
-	clock->error = 0;
+	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
+	timekeeper.ntp_error = 0;
 	timekeeping_suspended = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -514,7 +581,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 	timekeeping_suspend_time = read_persistent_clock();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
-	clocksource_forward_now();
+	timekeeping_forward_now();
 	timekeeping_suspended = 1;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 
@@ -549,7 +616,7 @@ device_initcall(timekeeping_init_device);
  * If the error is already larger, we look ahead even further
  * to compensate for late or lost adjustments.
  */
-static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
+static __always_inline int timekeeping_bigadjust(s64 error, s64 *interval,
 						 s64 *offset)
 {
 	s64 tick_error, i;
@@ -565,7 +632,7 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
 	 * here.  This is tuned so that an error of about 1 msec is adjusted
 	 * within about 1 sec (or 2^20 nsec in 2^SHIFT_HZ ticks).
 	 */
-	error2 = clock->error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
+	error2 = timekeeper.ntp_error >> (NTP_SCALE_SHIFT + 22 - 2 * SHIFT_HZ);
 	error2 = abs(error2);
 	for (look_ahead = 0; error2 > 0; look_ahead++)
 		error2 >>= 2;
@@ -574,8 +641,9 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
 	 * Now calculate the error in (1 << look_ahead) ticks, but first
 	 * remove the single look ahead already included in the error.
 	 */
-	tick_error = tick_length >> (NTP_SCALE_SHIFT - clock->shift + 1);
-	tick_error -= clock->xtime_interval >> 1;
+	tick_error = tick_length >>
+			(NTP_SCALE_SHIFT - timekeeper.clock->shift + 1);
+	tick_error -= timekeeper.xtime_interval >> 1;
 	error = ((error - tick_error) >> look_ahead) + tick_error;
 
 	/* Finally calculate the adjustment shift value.  */
@@ -600,18 +668,19 @@ static __always_inline int clocksource_bigadjust(s64 error, s64 *interval,
  * this is optimized for the most common adjustments of -1,0,1,
  * for other values we can do a bit more work.
  */
-static void clocksource_adjust(s64 offset)
+static void timekeeping_adjust(s64 offset)
 {
-	s64 error, interval = clock->cycle_interval;
+	s64 error, interval = timekeeper.cycle_interval;
 	int adj;
 
-	error = clock->error >> (NTP_SCALE_SHIFT - clock->shift - 1);
+	error = timekeeper.ntp_error >>
+		(NTP_SCALE_SHIFT - timekeeper.clock->shift - 1);
 	if (error > interval) {
 		error >>= 2;
 		if (likely(error <= interval))
 			adj = 1;
 		else
-			adj = clocksource_bigadjust(error, &interval, &offset);
+			adj = timekeeping_bigadjust(error, &interval, &offset);
 	} else if (error < -interval) {
 		error >>= 2;
 		if (likely(error >= -interval)) {
@@ -619,15 +688,15 @@ static void clocksource_adjust(s64 offset)
 			interval = -interval;
 			offset = -offset;
 		} else
-			adj = clocksource_bigadjust(error, &interval, &offset);
+			adj = timekeeping_bigadjust(error, &interval, &offset);
 	} else
 		return;
 
-	clock->mult += adj;
-	clock->xtime_interval += interval;
-	clock->xtime_nsec -= offset;
-	clock->error -= (interval - offset) <<
-			(NTP_SCALE_SHIFT - clock->shift);
+	timekeeper.clock->mult += adj;
+	timekeeper.xtime_interval += interval;
+	timekeeper.xtime_nsec -= offset;
+	timekeeper.ntp_error -= (interval - offset) <<
+			(NTP_SCALE_SHIFT - timekeeper.clock->shift);
 }
 
 /**
@@ -637,53 +706,59 @@ static void clocksource_adjust(s64 offset)
  */
 void update_wall_time(void)
 {
+	struct clocksource *clock;
 	cycle_t offset;
+	s64 nsecs;
 
 	/* Make sure we're fully resumed: */
 	if (unlikely(timekeeping_suspended))
 		return;
 
+	clock = timekeeper.clock;
 #ifdef CONFIG_GENERIC_TIME
 	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;
 #else
-	offset = clock->cycle_interval;
+	offset = timekeeper.cycle_interval;
 #endif
-	clock->xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
+	timekeeper.xtime_nsec = (s64)xtime.tv_nsec << clock->shift;
 
 	/* normally this loop will run just once, however in the
 	 * case of lost or late ticks, it will accumulate correctly.
 	 */
-	while (offset >= clock->cycle_interval) {
+	while (offset >= timekeeper.cycle_interval) {
+		u64 nsecps = (u64)NSEC_PER_SEC << clock->shift;
+
 		/* accumulate one interval */
-		offset -= clock->cycle_interval;
-		clock->cycle_last += clock->cycle_interval;
+		offset -= timekeeper.cycle_interval;
+		clock->cycle_last += timekeeper.cycle_interval;
 
-		clock->xtime_nsec += clock->xtime_interval;
-		if (clock->xtime_nsec >= (u64)NSEC_PER_SEC << clock->shift) {
-			clock->xtime_nsec -= (u64)NSEC_PER_SEC << clock->shift;
+		timekeeper.xtime_nsec += timekeeper.xtime_interval;
+		if (timekeeper.xtime_nsec >= nsecps) {
+			timekeeper.xtime_nsec -= nsecps;
 			xtime.tv_sec++;
 			second_overflow();
 		}
 
-		clock->raw_time.tv_nsec += clock->raw_interval;
-		if (clock->raw_time.tv_nsec >= NSEC_PER_SEC) {
-			clock->raw_time.tv_nsec -= NSEC_PER_SEC;
-			clock->raw_time.tv_sec++;
+		raw_time.tv_nsec += timekeeper.raw_interval;
+		if (raw_time.tv_nsec >= NSEC_PER_SEC) {
+			raw_time.tv_nsec -= NSEC_PER_SEC;
+			raw_time.tv_sec++;
 		}
 
 		/* accumulate error between NTP and clock interval */
-		clock->error += tick_length;
-		clock->error -= clock->xtime_interval << (NTP_SCALE_SHIFT - clock->shift);
+		timekeeper.ntp_error += tick_length;
+		timekeeper.ntp_error -= timekeeper.xtime_interval <<
+					(NTP_SCALE_SHIFT - clock->shift);
 	}
 
 	/* correct the clock when NTP error is too big */
-	clocksource_adjust(offset);
+	timekeeping_adjust(offset);
 
 	/*
 	 * Since in the loop above, we accumulate any amount of time
 	 * in xtime_nsec over a second into xtime.tv_sec, its possible for
 	 * xtime_nsec to be fairly small after the loop. Further, if we're
-	 * slightly speeding the clocksource up in clocksource_adjust(),
+	 * slightly speeding the clocksource up in timekeeping_adjust(),
 	 * its possible the required corrective factor to xtime_nsec could
 	 * cause it to underflow.
 	 *
@@ -695,24 +770,26 @@ void update_wall_time(void)
 	 * We'll correct this error next time through this function, when
 	 * xtime_nsec is not as small.
 	 */
-	if (unlikely((s64)clock->xtime_nsec < 0)) {
-		s64 neg = -(s64)clock->xtime_nsec;
-		clock->xtime_nsec = 0;
-		clock->error += neg << (NTP_SCALE_SHIFT - clock->shift);
+	if (unlikely((s64)timekeeper.xtime_nsec < 0)) {
+		s64 neg = -(s64)timekeeper.xtime_nsec;
+		timekeeper.xtime_nsec = 0;
+		timekeeper.ntp_error += neg << (NTP_SCALE_SHIFT - clock->shift);
 	}
 
 	/* store full nanoseconds into xtime after rounding it up and
 	 * add the remainder to the error difference.
 	 */
-	xtime.tv_nsec = ((s64)clock->xtime_nsec >> clock->shift) + 1;
-	clock->xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
-	clock->error += clock->xtime_nsec << (NTP_SCALE_SHIFT - clock->shift);
+	xtime.tv_nsec = ((s64)timekeeper.xtime_nsec >> clock->shift) + 1;
+	timekeeper.xtime_nsec -= (s64)xtime.tv_nsec << clock->shift;
+	timekeeper.ntp_error += timekeeper.xtime_nsec <<
+				(NTP_SCALE_SHIFT - clock->shift);
 
-	update_xtime_cache(cyc2ns(clock, offset));
+	nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift);
+	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
 	change_clocksource();
-	update_vsyscall(&xtime, clock);
+	update_vsyscall(&xtime, timekeeper.clock);
 }
 
 /**
-- 
cgit v1.2.3


From 0a54419836254a27baecd9037103171bcbabaf67 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:28 +0200
Subject: timekeeping: Move NTP adjusted clock multiplier to struct timekeeper

The clocksource structure has two multipliers, the unmodified multiplier
clock->mult_orig and the NTP corrected multiplier clock->mult. The NTP
multiplier is misplaced in the struct clocksource, this is private
information of the timekeeping code. Add the mult field to the struct
timekeeper to contain the NTP corrected value, keep the unmodifed
multiplier in clock->mult and remove clock->mult_orig.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134810.149047645@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/plat-omap/common.c |  7 ++----
 include/linux/clocksource.h |  4 +---
 kernel/time/timekeeping.c   | 53 ++++++++++++++++++++-------------------------
 3 files changed, 27 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/arch/arm/plat-omap/common.c b/arch/arm/plat-omap/common.c
index ebcf006406f9..95587b6c0259 100644
--- a/arch/arm/plat-omap/common.c
+++ b/arch/arm/plat-omap/common.c
@@ -253,11 +253,8 @@ static struct clocksource clocksource_32k = {
  */
 unsigned long long sched_clock(void)
 {
-	unsigned long long ret;
-
-	ret = (unsigned long long)clocksource_32k.read(&clocksource_32k);
-	ret = (ret * clocksource_32k.mult_orig) >> clocksource_32k.shift;
-	return ret;
+	return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
+				  clocksource_32k.mult, clocksource_32k.shift);
 }
 
 static int __init omap_init_clocksource_32k(void)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e12e3095e2fb..e34015effeb6 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -149,8 +149,7 @@ extern u64 timecounter_cyc2time(struct timecounter *tc,
  * @disable:		optional function to disable the clocksource
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters
- * @mult:		cycle to nanosecond multiplier (adjusted by NTP)
- * @mult_orig:		cycle to nanosecond multiplier (unadjusted by NTP)
+ * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  * @flags:		flags describing special properties
  * @vread:		vsyscall based read
@@ -168,7 +167,6 @@ struct clocksource {
 	void (*disable)(struct clocksource *cs);
 	cycle_t mask;
 	u32 mult;
-	u32 mult_orig;
 	u32 shift;
 	unsigned long flags;
 	cycle_t (*vread)(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index dfdab1cefe1e..f4056f6c2632 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -41,6 +41,8 @@ struct timekeeper {
 	/* Shift conversion between clock shifted nano seconds and
 	 * ntp shifted nano seconds. */
 	int	ntp_error_shift;
+	/* NTP adjusted clock multiplier */
+	u32	mult;
 };
 
 struct timekeeper timekeeper;
@@ -66,8 +68,8 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 	/* Do the ns -> cycle conversion first, using original mult */
 	tmp = NTP_INTERVAL_LENGTH;
 	tmp <<= clock->shift;
-	tmp += clock->mult_orig/2;
-	do_div(tmp, clock->mult_orig);
+	tmp += clock->mult/2;
+	do_div(tmp, clock->mult);
 	if (tmp == 0)
 		tmp = 1;
 
@@ -77,13 +79,20 @@ static void timekeeper_setup_internals(struct clocksource *clock)
 	/* Go back from cycles -> shifted ns */
 	timekeeper.xtime_interval = (u64) interval * clock->mult;
 	timekeeper.raw_interval =
-		((u64) interval * clock->mult_orig) >> clock->shift;
+		((u64) interval * clock->mult) >> clock->shift;
 
 	timekeeper.xtime_nsec = 0;
 	timekeeper.shift = clock->shift;
 
 	timekeeper.ntp_error = 0;
 	timekeeper.ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
+
+	/*
+	 * The timekeeper keeps its own mult values for the currently
+	 * active clocksource. These value will be adjusted via NTP
+	 * to counteract clock drifting.
+	 */
+	timekeeper.mult = clock->mult;
 }
 
 /*
@@ -154,14 +163,15 @@ static void timekeeping_forward_now(void)
 	cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 	clock->cycle_last = cycle_now;
 
-	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
+	nsec = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+				  timekeeper.shift);
 
 	/* If arch requires, add in gettimeoffset() */
 	nsec += arch_gettimeoffset();
 
 	timespec_add_ns(&xtime, nsec);
 
-	nsec = clocksource_cyc2ns(cycle_delta, clock->mult_orig, clock->shift);
+	nsec = clocksource_cyc2ns(cycle_delta, clock->mult, clock->shift);
 	timespec_add_ns(&raw_time, nsec);
 }
 
@@ -193,8 +203,8 @@ void getnstimeofday(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
-					   clock->shift);
+		nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					   timekeeper.shift);
 
 		/* If arch requires, add in gettimeoffset() */
 		nsecs += arch_gettimeoffset();
@@ -228,8 +238,8 @@ ktime_t ktime_get(void)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs += clocksource_cyc2ns(cycle_delta, clock->mult,
-					    clock->shift);
+		nsecs += clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					    timekeeper.shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 	/*
@@ -271,8 +281,8 @@ void ktime_get_ts(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
-					   clock->shift);
+		nsecs = clocksource_cyc2ns(cycle_delta, timekeeper.mult,
+					   timekeeper.shift);
 
 	} while (read_seqretry(&xtime_lock, seq));
 
@@ -356,22 +366,10 @@ static void change_clocksource(void)
 
 	if (new->enable && !new->enable(new))
 		return;
-	/*
-	 * The frequency may have changed while the clocksource
-	 * was disabled. If so the code in ->enable() must update
-	 * the mult value to reflect the new frequency. Make sure
-	 * mult_orig follows this change.
-	 */
-	new->mult_orig = new->mult;
 
 	old = timekeeper.clock;
 	timekeeper_setup_internals(new);
 
-	/*
-	 * Save mult_orig in mult so that the value can be restored
-	 * regardless if ->enable() updates the value of mult or not.
-	 */
-	old->mult = old->mult_orig;
 	if (old->disable)
 		old->disable(old);
 
@@ -461,7 +459,7 @@ void getrawmonotonic(struct timespec *ts)
 		cycle_delta = (cycle_now - clock->cycle_last) & clock->mask;
 
 		/* convert to nanoseconds: */
-		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult_orig,
+		nsecs = clocksource_cyc2ns(cycle_delta, clock->mult,
 					   clock->shift);
 
 		*ts = raw_time;
@@ -521,9 +519,6 @@ void __init timekeeping_init(void)
 	clock = clocksource_default_clock();
 	if (clock->enable)
 		clock->enable(clock);
-	/* set mult_orig on enable */
-	clock->mult_orig = clock->mult;
-
 	timekeeper_setup_internals(clock);
 
 	xtime.tv_sec = sec;
@@ -697,7 +692,7 @@ static void timekeeping_adjust(s64 offset)
 	} else
 		return;
 
-	timekeeper.clock->mult += adj;
+	timekeeper.mult += adj;
 	timekeeper.xtime_interval += interval;
 	timekeeper.xtime_nsec -= offset;
 	timekeeper.ntp_error -= (interval - offset) <<
@@ -789,7 +784,7 @@ void update_wall_time(void)
 	timekeeper.ntp_error +=	timekeeper.xtime_nsec <<
 				timekeeper.ntp_error_shift;
 
-	nsecs = clocksource_cyc2ns(offset, clock->mult, clock->shift);
+	nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
-- 
cgit v1.2.3


From 75c5158f70c065b9704b924503d96e8297838f79 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:30 +0200
Subject: timekeeping: Update clocksource with stop_machine

update_wall_time calls change_clocksource HZ times per second to check
if a new clock source is available. In close to 100% of all calls
there is no new clock. Replace the tick based check by an update done
with stop_machine.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134810.711836357@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h |   2 +
 kernel/time/clocksource.c   | 112 +++++++++++++++++---------------------------
 kernel/time/timekeeping.c   |  41 ++++++++++------
 3 files changed, 72 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index e34015effeb6..9ea40ff26f0e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -291,4 +291,6 @@ static inline void update_vsyscall_tz(void)
 }
 #endif
 
+extern void timekeeping_notify(struct clocksource *clock);
+
 #endif /* _LINUX_CLOCKSOURCE_H */
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f18c9a6bdcf4..a1657b5fdeb9 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -109,35 +109,17 @@ EXPORT_SYMBOL(timecounter_cyc2time);
 /*[Clocksource internal variables]---------
  * curr_clocksource:
  *	currently selected clocksource.
- * next_clocksource:
- *	pending next selected clocksource.
  * clocksource_list:
  *	linked list with the registered clocksources
- * clocksource_lock:
- *	protects manipulations to curr_clocksource and next_clocksource
- *	and the clocksource_list
+ * clocksource_mutex:
+ *	protects manipulations to curr_clocksource and the clocksource_list
  * override_name:
  *	Name of the user-specified clocksource.
  */
 static struct clocksource *curr_clocksource;
-static struct clocksource *next_clocksource;
 static LIST_HEAD(clocksource_list);
-static DEFINE_SPINLOCK(clocksource_lock);
+static DEFINE_MUTEX(clocksource_mutex);
 static char override_name[32];
-static int finished_booting;
-
-/* clocksource_done_booting - Called near the end of core bootup
- *
- * Hack to avoid lots of clocksource churn at boot time.
- * We use fs_initcall because we want this to start before
- * device_initcall but after subsys_initcall.
- */
-static int __init clocksource_done_booting(void)
-{
-	finished_booting = 1;
-	return 0;
-}
-fs_initcall(clocksource_done_booting);
 
 #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
 static LIST_HEAD(watchdog_list);
@@ -356,18 +338,16 @@ static inline void clocksource_resume_watchdog(void) { }
 void clocksource_resume(void)
 {
 	struct clocksource *cs;
-	unsigned long flags;
 
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 
-	list_for_each_entry(cs, &clocksource_list, list) {
+	list_for_each_entry(cs, &clocksource_list, list)
 		if (cs->resume)
 			cs->resume();
-	}
 
 	clocksource_resume_watchdog();
 
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 
 /**
@@ -383,28 +363,13 @@ void clocksource_touch_watchdog(void)
 }
 
 #ifdef CONFIG_GENERIC_TIME
-/**
- * clocksource_get_next - Returns the selected clocksource
- *
- */
-struct clocksource *clocksource_get_next(void)
-{
-	unsigned long flags;
 
-	spin_lock_irqsave(&clocksource_lock, flags);
-	if (next_clocksource && finished_booting) {
-		curr_clocksource = next_clocksource;
-		next_clocksource = NULL;
-	}
-	spin_unlock_irqrestore(&clocksource_lock, flags);
-
-	return curr_clocksource;
-}
+static int finished_booting;
 
 /**
  * clocksource_select - Select the best clocksource available
  *
- * Private function. Must hold clocksource_lock when called.
+ * Private function. Must hold clocksource_mutex when called.
  *
  * Select the clocksource with the best rating, or the clocksource,
  * which is selected by userspace override.
@@ -413,7 +378,7 @@ static void clocksource_select(void)
 {
 	struct clocksource *best, *cs;
 
-	if (list_empty(&clocksource_list))
+	if (!finished_booting || list_empty(&clocksource_list))
 		return;
 	/* First clocksource on the list has the best rating. */
 	best = list_first_entry(&clocksource_list, struct clocksource, list);
@@ -438,13 +403,31 @@ static void clocksource_select(void)
 			best = cs;
 		break;
 	}
-	if (curr_clocksource != best)
-		next_clocksource = best;
+	if (curr_clocksource != best) {
+		printk(KERN_INFO "Switching to clocksource %s\n", best->name);
+		curr_clocksource = best;
+		timekeeping_notify(curr_clocksource);
+	}
 }
 
+/*
+ * clocksource_done_booting - Called near the end of core bootup
+ *
+ * Hack to avoid lots of clocksource churn at boot time.
+ * We use fs_initcall because we want this to start before
+ * device_initcall but after subsys_initcall.
+ */
+static int __init clocksource_done_booting(void)
+{
+	finished_booting = 1;
+	clocksource_select();
+	return 0;
+}
+fs_initcall(clocksource_done_booting);
+
 #else /* CONFIG_GENERIC_TIME */
 
-static void clocksource_select(void) { }
+static inline void clocksource_select(void) { }
 
 #endif
 
@@ -471,13 +454,11 @@ static void clocksource_enqueue(struct clocksource *cs)
  */
 int clocksource_register(struct clocksource *cs)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	clocksource_enqueue(cs);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
 	clocksource_enqueue_watchdog(cs);
+	mutex_unlock(&clocksource_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(clocksource_register);
@@ -487,14 +468,12 @@ EXPORT_SYMBOL(clocksource_register);
  */
 void clocksource_change_rating(struct clocksource *cs, int rating)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	list_del(&cs->list);
 	cs->rating = rating;
 	clocksource_enqueue(cs);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_change_rating);
 
@@ -503,13 +482,11 @@ EXPORT_SYMBOL(clocksource_change_rating);
  */
 void clocksource_unregister(struct clocksource *cs)
 {
-	unsigned long flags;
-
+	mutex_lock(&clocksource_mutex);
 	clocksource_dequeue_watchdog(cs);
-	spin_lock_irqsave(&clocksource_lock, flags);
 	list_del(&cs->list);
 	clocksource_select();
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 }
 EXPORT_SYMBOL(clocksource_unregister);
 
@@ -527,9 +504,9 @@ sysfs_show_current_clocksources(struct sys_device *dev,
 {
 	ssize_t count = 0;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 	count = snprintf(buf, PAGE_SIZE, "%s\n", curr_clocksource->name);
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	return count;
 }
@@ -557,14 +534,14 @@ static ssize_t sysfs_override_clocksource(struct sys_device *dev,
 	if (buf[count-1] == '\n')
 		count--;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 
 	if (count > 0)
 		memcpy(override_name, buf, count);
 	override_name[count] = 0;
 	clocksource_select();
 
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	return ret;
 }
@@ -584,7 +561,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 	struct clocksource *src;
 	ssize_t count = 0;
 
-	spin_lock_irq(&clocksource_lock);
+	mutex_lock(&clocksource_mutex);
 	list_for_each_entry(src, &clocksource_list, list) {
 		/*
 		 * Don't show non-HRES clocksource if the tick code is
@@ -596,7 +573,7 @@ sysfs_show_available_clocksources(struct sys_device *dev,
 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
 				  "%s ", src->name);
 	}
-	spin_unlock_irq(&clocksource_lock);
+	mutex_unlock(&clocksource_mutex);
 
 	count += snprintf(buf + count,
 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
@@ -651,11 +628,10 @@ device_initcall(init_clocksource_sysfs);
  */
 static int __init boot_override_clocksource(char* str)
 {
-	unsigned long flags;
-	spin_lock_irqsave(&clocksource_lock, flags);
+	mutex_lock(&clocksource_mutex);
 	if (str)
 		strlcpy(override_name, str, sizeof(override_name));
-	spin_unlock_irqrestore(&clocksource_lock, flags);
+	mutex_unlock(&clocksource_mutex);
 	return 1;
 }
 
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 27ae01b596b7..41579e7fcf9d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -18,6 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/time.h>
 #include <linux/tick.h>
+#include <linux/stop_machine.h>
 
 /* Structure holding internal timekeeping values. */
 struct timekeeper {
@@ -179,6 +180,7 @@ void timekeeping_leap_insert(int leapsecond)
 }
 
 #ifdef CONFIG_GENERIC_TIME
+
 /**
  * timekeeping_forward_now - update clock to the current time
  *
@@ -351,31 +353,40 @@ EXPORT_SYMBOL(do_settimeofday);
  *
  * Accumulates current time interval and initializes new clocksource
  */
-static void change_clocksource(void)
+static int change_clocksource(void *data)
 {
 	struct clocksource *new, *old;
 
-	new = clocksource_get_next();
-
-	if (!new || timekeeper.clock == new)
-		return;
+	new = (struct clocksource *) data;
 
 	timekeeping_forward_now();
+	if (!new->enable || new->enable(new) == 0) {
+		old = timekeeper.clock;
+		timekeeper_setup_internals(new);
+		if (old->disable)
+			old->disable(old);
+	}
+	return 0;
+}
 
-	if (new->enable && !new->enable(new))
+/**
+ * timekeeping_notify - Install a new clock source
+ * @clock:		pointer to the clock source
+ *
+ * This function is called from clocksource.c after a new, better clock
+ * source has been registered. The caller holds the clocksource_mutex.
+ */
+void timekeeping_notify(struct clocksource *clock)
+{
+	if (timekeeper.clock == clock)
 		return;
-
-	old = timekeeper.clock;
-	timekeeper_setup_internals(new);
-
-	if (old->disable)
-		old->disable(old);
-
+	stop_machine(change_clocksource, clock, NULL);
 	tick_clock_notify();
 }
+
 #else /* GENERIC_TIME */
+
 static inline void timekeeping_forward_now(void) { }
-static inline void change_clocksource(void) { }
 
 /**
  * ktime_get - get the monotonic time in ktime_t format
@@ -416,6 +427,7 @@ void ktime_get_ts(struct timespec *ts)
 				ts->tv_nsec + tomono.tv_nsec);
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts);
+
 #endif /* !GENERIC_TIME */
 
 /**
@@ -773,7 +785,6 @@ void update_wall_time(void)
 	update_xtime_cache(nsecs);
 
 	/* check to see if there is a new clocksource to use */
-	change_clocksource();
 	update_vsyscall(&xtime, timekeeper.clock);
 }
 
-- 
cgit v1.2.3


From d4f587c67fc39e0030ddd718675e252e208da4d7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:31 +0200
Subject: timekeeping: Increase granularity of read_persistent_clock()

The persistent clock of some architectures (e.g. s390) have a
better granularity than seconds. To reduce the delta between the
host clock and the guest clock in a virtualized system change the
read_persistent_clock function to return a struct timespec.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134811.013873340@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/m68knommu/kernel/time.c          |  5 ++--
 arch/mips/dec/time.c                  |  5 ++--
 arch/mips/lasat/ds1603.c              |  5 ++--
 arch/mips/lasat/sysctl.c              |  8 ++++--
 arch/mips/lemote/lm2e/setup.c         |  5 ++--
 arch/mips/mti-malta/malta-time.c      |  5 ++--
 arch/mips/pmc-sierra/yosemite/setup.c |  5 ++--
 arch/mips/sibyte/swarm/setup.c        | 15 +++++++---
 arch/mips/sni/time.c                  |  5 ++--
 arch/powerpc/kernel/time.c            |  7 +++--
 arch/s390/kernel/time.c               | 22 +++------------
 arch/sh/kernel/time.c                 |  6 ++--
 arch/x86/kernel/rtc.c                 |  5 ++--
 arch/xtensa/kernel/time.c             |  5 ++--
 include/linux/time.h                  |  2 +-
 kernel/time/timekeeping.c             | 52 +++++++++++++++++++----------------
 16 files changed, 83 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index d182b2f72211..68432248515c 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -72,9 +72,10 @@ static unsigned long read_rtc_mmss(void)
 	return  mktime(year, mon, day, hour, min, sec);;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return read_rtc_mmss();
+	ts->tv_sec = read_rtc_mmss();
+	ts->tv_nsec = 0;
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 463136e6685a..02f505f23c32 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -18,7 +18,7 @@
 #include <asm/dec/ioasic.h>
 #include <asm/dec/machtype.h>
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned int year, mon, day, hour, min, sec, real_year;
 	unsigned long flags;
@@ -53,7 +53,8 @@ unsigned long read_persistent_clock(void)
 
 	year += real_year - 72 + 2000;
 
-	return mktime(year, mon, day, hour, min, sec);
+	ts->tv_sec = mktime(year, mon, day, hour, min, sec);
+	ts->tv_nsec = 0;
 }
 
 /*
diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c
index 52cb1436a12a..c6fd96ff118d 100644
--- a/arch/mips/lasat/ds1603.c
+++ b/arch/mips/lasat/ds1603.c
@@ -135,7 +135,7 @@ static void rtc_end_op(void)
 	lasat_ndelay(1000);
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned long word;
 	unsigned long flags;
@@ -147,7 +147,8 @@ unsigned long read_persistent_clock(void)
 	rtc_end_op();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return word;
+	ts->tv_sec = word;
+	ts->tv_nsec = 0;
 }
 
 int rtc_mips_set_mmss(unsigned long time)
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 8f88886feb12..3f04d4c406b7 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -92,10 +92,12 @@ static int rtctmp;
 int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
+	struct timespec ts;
 	int r;
 
 	if (!write) {
-		rtctmp = read_persistent_clock();
+		read_persistent_clock(&ts);
+		rtctmp = ts.tv_sec;
 		/* check for time < 0 and set to 0 */
 		if (rtctmp < 0)
 			rtctmp = 0;
@@ -134,9 +136,11 @@ int sysctl_lasat_rtc(ctl_table *table,
 		    void *oldval, size_t *oldlenp,
 		    void *newval, size_t newlen)
 {
+	struct timespec ts;
 	int r;
 
-	rtctmp = read_persistent_clock();
+	read_persistent_clock(&ts);
+	rtctmp = ts.tv_sec;
 	if (rtctmp < 0)
 		rtctmp = 0;
 	r = sysctl_intvec(table, oldval, oldlenp, newval, newlen);
diff --git a/arch/mips/lemote/lm2e/setup.c b/arch/mips/lemote/lm2e/setup.c
index ebd6ceaef2fd..24b355df6127 100644
--- a/arch/mips/lemote/lm2e/setup.c
+++ b/arch/mips/lemote/lm2e/setup.c
@@ -54,9 +54,10 @@ void __init plat_time_init(void)
 	mips_hpt_frequency = cpu_clock_freq / 2;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return mc146818_get_cmos_time();
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
 }
 
 void (*__wbflush)(void);
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 0b97d47691fc..3c6f190aa61c 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -100,9 +100,10 @@ static unsigned int __init estimate_cpu_frequency(void)
 	return count;
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return mc146818_get_cmos_time();
+	ts->tv_sec = mc146818_get_cmos_time();
+	ts->tv_nsec = 0;
 }
 
 static void __init plat_perf_setup(void)
diff --git a/arch/mips/pmc-sierra/yosemite/setup.c b/arch/mips/pmc-sierra/yosemite/setup.c
index 2d3c0dca275d..3498ac9c35af 100644
--- a/arch/mips/pmc-sierra/yosemite/setup.c
+++ b/arch/mips/pmc-sierra/yosemite/setup.c
@@ -70,7 +70,7 @@ void __init bus_error_init(void)
 }
 
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned int year, month, day, hour, min, sec;
 	unsigned long flags;
@@ -92,7 +92,8 @@ unsigned long read_persistent_clock(void)
 	m48t37_base->control = 0x00;
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return mktime(year, month, day, hour, min, sec);
+	ts->tv_sec = mktime(year, month, day, hour, min, sec);
+	ts->tv_nsec = 0;
 }
 
 int rtc_mips_set_time(unsigned long tim)
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 672e45d495a9..623ffc933c4c 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -87,19 +87,26 @@ enum swarm_rtc_type {
 
 enum swarm_rtc_type swarm_rtc_type;
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
+	unsigned long sec;
+
 	switch (swarm_rtc_type) {
 	case RTC_XICOR:
-		return xicor_get_time();
+		sec = xicor_get_time();
+		break;
 
 	case RTC_M4LT81:
-		return m41t81_get_time();
+		sec = m41t81_get_time();
+		break;
 
 	case RTC_NONE:
 	default:
-		return mktime(2000, 1, 1, 0, 0, 0);
+		sec = mktime(2000, 1, 1, 0, 0, 0);
+		break;
 	}
+	ts->tv_sec = sec;
+	tv->tv_nsec = 0;
 }
 
 int rtc_mips_set_time(unsigned long sec)
diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c
index 0d9ec1a5c24a..62df6a598e0a 100644
--- a/arch/mips/sni/time.c
+++ b/arch/mips/sni/time.c
@@ -182,7 +182,8 @@ void __init plat_time_init(void)
 	setup_pit_timer();
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	return -1;
+	ts->tv_sec = -1;
+	ts->tv_nsec = 0;
 }
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index eae4511ceeac..ad63f30fe3da 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -769,7 +769,7 @@ int update_persistent_clock(struct timespec now)
 	return ppc_md.set_rtc_time(&tm);
 }
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	struct rtc_time tm;
 	static int first = 1;
@@ -787,8 +787,9 @@ unsigned long read_persistent_clock(void)
 	if (!ppc_md.get_rtc_time)
 		return 0;
 	ppc_md.get_rtc_time(&tm);
-	return mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
-		      tm.tm_hour, tm.tm_min, tm.tm_sec);
+	ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
+			    tm.tm_hour, tm.tm_min, tm.tm_sec);
+	ts->tv_nsec = 0;
 }
 
 /* clocksource code */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index e76c2e7a8b9a..a94ec48587b4 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -182,12 +182,9 @@ static void timing_alert_interrupt(__u16 code)
 static void etr_reset(void);
 static void stp_reset(void);
 
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	struct timespec ts;
-
-	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts);
-	return ts.tv_sec;
+	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
 }
 
 static cycle_t read_tod_clock(struct clocksource *cs)
@@ -248,7 +245,6 @@ void __init time_init(void)
 {
 	struct timespec ts;
 	unsigned long flags;
-	cycle_t now;
 
 	/* Reset time synchronization interfaces. */
 	etr_reset();
@@ -266,20 +262,10 @@ void __init time_init(void)
 		panic("Could not register TOD clock source");
 
 	/*
-	 * The TOD clock is an accurate clock. The xtime should be
-	 * initialized in a way that the difference between TOD and
-	 * xtime is reasonably small. Too bad that timekeeping_init
-	 * sets xtime.tv_nsec to zero. In addition the clock source
-	 * change from the jiffies clock source to the TOD clock
-	 * source add another error of up to 1/HZ second. The same
-	 * function sets wall_to_monotonic to a value that is too
-	 * small for /proc/uptime to be accurate.
-	 * Reset xtime and wall_to_monotonic to sane values.
+	 * Reset wall_to_monotonic to the initial timestamp created
+	 * in head.S to get a precise value in /proc/uptime.
 	 */
 	write_seqlock_irqsave(&xtime_lock, flags);
-	now = get_clock();
-	tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
-	clocksource_tod.cycle_last = now;
 	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
 	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
 	write_sequnlock_irqrestore(&xtime_lock, flags);
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 9b352a1e3fb4..3f4706aa975e 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -39,11 +39,9 @@ void (*rtc_sh_get_time)(struct timespec *) = null_rtc_get_time;
 int (*rtc_sh_set_time)(const time_t) = null_rtc_set_time;
 
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
-	struct timespec tv;
-	rtc_sh_get_time(&tv);
-	return tv.tv_sec;
+	rtc_sh_get_time(&ts);
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5d465b207e72..bf67dcb4a44c 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -178,7 +178,7 @@ static int set_rtc_mmss(unsigned long nowtime)
 }
 
 /* not static: needed by APM */
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned long retval, flags;
 
@@ -186,7 +186,8 @@ unsigned long read_persistent_clock(void)
 	retval = get_wallclock();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return retval;
+	ts->tv_sec = retval;
+	ts->tv_nsec = 0;
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c
index 8848120d291b..19085ff0484a 100644
--- a/arch/xtensa/kernel/time.c
+++ b/arch/xtensa/kernel/time.c
@@ -59,9 +59,8 @@ static struct irqaction timer_irqaction = {
 
 void __init time_init(void)
 {
-	xtime.tv_nsec = 0;
-	xtime.tv_sec = read_persistent_clock();
-
+	/* FIXME: xtime&wall_to_monotonic are set in timekeeping_init. */
+	read_persistent_clock(&xtime);
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 
diff --git a/include/linux/time.h b/include/linux/time.h
index e7c844558884..53a3216f0d1b 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -101,7 +101,7 @@ extern struct timespec xtime;
 extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
-extern unsigned long read_persistent_clock(void);
+extern void read_persistent_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
 extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 41579e7fcf9d..f1a21ce491e6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -154,7 +154,7 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
  */
 struct timespec xtime __attribute__ ((aligned (16)));
 struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
-static unsigned long total_sleep_time;		/* seconds */
+static struct timespec total_sleep_time;
 
 /*
  * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
@@ -487,17 +487,18 @@ int timekeeping_valid_for_hres(void)
 }
 
 /**
- * read_persistent_clock -  Return time in seconds from the persistent clock.
+ * read_persistent_clock -  Return time from the persistent clock.
  *
  * Weak dummy function for arches that do not yet support it.
- * Returns seconds from epoch using the battery backed persistent clock.
- * Returns zero if unsupported.
+ * Reads the time from the battery backed persistent clock.
+ * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
  *
  *  XXX - Do be sure to remove it once all arches implement it.
  */
-unsigned long __attribute__((weak)) read_persistent_clock(void)
+void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
 {
-	return 0;
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
 }
 
 /*
@@ -507,7 +508,9 @@ void __init timekeeping_init(void)
 {
 	struct clocksource *clock;
 	unsigned long flags;
-	unsigned long sec = read_persistent_clock();
+	struct timespec now;
+
+	read_persistent_clock(&now);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
@@ -518,19 +521,20 @@ void __init timekeeping_init(void)
 		clock->enable(clock);
 	timekeeper_setup_internals(clock);
 
-	xtime.tv_sec = sec;
-	xtime.tv_nsec = 0;
+	xtime.tv_sec = now.tv_sec;
+	xtime.tv_nsec = now.tv_nsec;
 	raw_time.tv_sec = 0;
 	raw_time.tv_nsec = 0;
 	set_normalized_timespec(&wall_to_monotonic,
 		-xtime.tv_sec, -xtime.tv_nsec);
 	update_xtime_cache(0);
-	total_sleep_time = 0;
+	total_sleep_time.tv_sec = 0;
+	total_sleep_time.tv_nsec = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
 }
 
 /* time in seconds when suspend began */
-static unsigned long timekeeping_suspend_time;
+static struct timespec timekeeping_suspend_time;
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
@@ -543,18 +547,19 @@ static unsigned long timekeeping_suspend_time;
 static int timekeeping_resume(struct sys_device *dev)
 {
 	unsigned long flags;
-	unsigned long now = read_persistent_clock();
+	struct timespec ts;
+
+	read_persistent_clock(&ts);
 
 	clocksource_resume();
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
-	if (now && (now > timekeeping_suspend_time)) {
-		unsigned long sleep_length = now - timekeeping_suspend_time;
-
-		xtime.tv_sec += sleep_length;
-		wall_to_monotonic.tv_sec -= sleep_length;
-		total_sleep_time += sleep_length;
+	if (timespec_compare(&ts, &timekeeping_suspend_time) > 0) {
+		ts = timespec_sub(ts, timekeeping_suspend_time);
+		xtime = timespec_add_safe(xtime, ts);
+		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
+		total_sleep_time = timespec_add_safe(total_sleep_time, ts);
 	}
 	update_xtime_cache(0);
 	/* re-base the last cycle value */
@@ -577,7 +582,7 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
 {
 	unsigned long flags;
 
-	timekeeping_suspend_time = read_persistent_clock();
+	read_persistent_clock(&timekeeping_suspend_time);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 	timekeeping_forward_now();
@@ -801,9 +806,10 @@ void update_wall_time(void)
  */
 void getboottime(struct timespec *ts)
 {
-	set_normalized_timespec(ts,
-		- (wall_to_monotonic.tv_sec + total_sleep_time),
-		- wall_to_monotonic.tv_nsec);
+	struct timespec boottime;
+
+	boottime = timespec_add_safe(wall_to_monotonic, total_sleep_time);
+	set_normalized_timespec(ts, -boottime.tv_sec, -boottime.tv_nsec);
 }
 
 /**
@@ -812,7 +818,7 @@ void getboottime(struct timespec *ts)
  */
 void monotonic_to_bootbased(struct timespec *ts)
 {
-	ts->tv_sec += total_sleep_time;
+	*ts = timespec_add_safe(*ts, total_sleep_time);
 }
 
 unsigned long get_seconds(void)
-- 
cgit v1.2.3


From 23970e389e9cee43c4b41023935e1417271708b2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 14 Aug 2009 15:47:32 +0200
Subject: timekeeping: Introduce read_boot_clock

Add the new function read_boot_clock to get the exact time the system
has been started. For architectures without support for exact boot
time a new weak function is added that returns 0.  Use the exact boot
time to initialize wall_to_monotonic, or xtime if the read_boot_clock
returned 0.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: John Stultz <johnstul@us.ibm.com>
Cc: Daniel Walker <dwalker@fifo99.com>
LKML-Reference: <20090814134811.296703241@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/s390/kernel/time.c   | 17 +++++------------
 include/linux/time.h      |  1 +
 kernel/time/timekeeping.c | 24 ++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index a94ec48587b4..6bff1a1d9060 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -187,6 +187,11 @@ void read_persistent_clock(struct timespec *ts)
 	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, ts);
 }
 
+void read_boot_clock(struct timespec *ts)
+{
+	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, ts);
+}
+
 static cycle_t read_tod_clock(struct clocksource *cs)
 {
 	return get_clock();
@@ -243,9 +248,6 @@ void update_vsyscall_tz(void)
  */
 void __init time_init(void)
 {
-	struct timespec ts;
-	unsigned long flags;
-
 	/* Reset time synchronization interfaces. */
 	etr_reset();
 	stp_reset();
@@ -261,15 +263,6 @@ void __init time_init(void)
 	if (clocksource_register(&clocksource_tod) != 0)
 		panic("Could not register TOD clock source");
 
-	/*
-	 * Reset wall_to_monotonic to the initial timestamp created
-	 * in head.S to get a precise value in /proc/uptime.
-	 */
-	write_seqlock_irqsave(&xtime_lock, flags);
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
-	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
-	write_sequnlock_irqrestore(&xtime_lock, flags);
-
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
 
diff --git a/include/linux/time.h b/include/linux/time.h
index 53a3216f0d1b..f505988398e6 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -102,6 +102,7 @@ extern struct timespec wall_to_monotonic;
 extern seqlock_t xtime_lock;
 
 extern void read_persistent_clock(struct timespec *ts);
+extern void read_boot_clock(struct timespec *ts);
 extern int update_persistent_clock(struct timespec now);
 extern int no_sync_cmos_clock __read_mostly;
 void timekeeping_init(void);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f1a21ce491e6..15e06defca55 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -501,6 +501,21 @@ void __attribute__((weak)) read_persistent_clock(struct timespec *ts)
 	ts->tv_nsec = 0;
 }
 
+/**
+ * read_boot_clock -  Return time of the system start.
+ *
+ * Weak dummy function for arches that do not yet support it.
+ * Function to read the exact time the system has been started.
+ * Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
+ *
+ *  XXX - Do be sure to remove it once all arches implement it.
+ */
+void __attribute__((weak)) read_boot_clock(struct timespec *ts)
+{
+	ts->tv_sec = 0;
+	ts->tv_nsec = 0;
+}
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -508,9 +523,10 @@ void __init timekeeping_init(void)
 {
 	struct clocksource *clock;
 	unsigned long flags;
-	struct timespec now;
+	struct timespec now, boot;
 
 	read_persistent_clock(&now);
+	read_boot_clock(&boot);
 
 	write_seqlock_irqsave(&xtime_lock, flags);
 
@@ -525,8 +541,12 @@ void __init timekeeping_init(void)
 	xtime.tv_nsec = now.tv_nsec;
 	raw_time.tv_sec = 0;
 	raw_time.tv_nsec = 0;
+	if (boot.tv_sec == 0 && boot.tv_nsec == 0) {
+		boot.tv_sec = xtime.tv_sec;
+		boot.tv_nsec = xtime.tv_nsec;
+	}
 	set_normalized_timespec(&wall_to_monotonic,
-		-xtime.tv_sec, -xtime.tv_nsec);
+				-boot.tv_sec, -boot.tv_nsec);
 	update_xtime_cache(0);
 	total_sleep_time.tv_sec = 0;
 	total_sleep_time.tv_nsec = 0;
-- 
cgit v1.2.3


From 799e64f05f4bfaad2bb3165cab95c8c992a1c296 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 15 Aug 2009 09:53:47 -0700
Subject: cpu hotplug: Introduce cpu_notifier() to handle !HOTPLUG_CPU case

This patch introduces a new cpu_notifier() API that is similar
to hotcpu_notifier(), but which also notifies of CPUs coming
online during boot in the !HOTPLUG_CPU case.

Reported-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tested-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: josht@linux.vnet.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: benh@kernel.crashing.org
LKML-Reference: <12503552312611-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpu.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 4d668e05d458..47536197ffdd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -48,6 +48,15 @@ struct notifier_block;
 
 #ifdef CONFIG_SMP
 /* Need to know about CPUs going up/down? */
+#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
+#define cpu_notifier(fn, pri) {					\
+	static struct notifier_block fn##_nb __cpuinitdata =	\
+		{ .notifier_call = fn, .priority = pri };	\
+	register_cpu_notifier(&fn##_nb);			\
+}
+#else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
+#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */
 #ifdef CONFIG_HOTPLUG_CPU
 extern int register_cpu_notifier(struct notifier_block *nb);
 extern void unregister_cpu_notifier(struct notifier_block *nb);
@@ -74,6 +83,8 @@ extern void cpu_maps_update_done(void);
 
 #else	/* CONFIG_SMP */
 
+#define cpu_notifier(fn, pri)	do { (void)(fn); } while (0)
+
 static inline int register_cpu_notifier(struct notifier_block *nb)
 {
 	return 0;
@@ -99,11 +110,7 @@ extern struct sysdev_class cpu_sysdev_class;
 
 extern void get_online_cpus(void);
 extern void put_online_cpus(void);
-#define hotcpu_notifier(fn, pri) {				\
-	static struct notifier_block fn##_nb __cpuinitdata =	\
-		{ .notifier_call = fn, .priority = pri };	\
-	register_cpu_notifier(&fn##_nb);			\
-}
+#define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
 int cpu_down(unsigned int cpu);
-- 
cgit v1.2.3


From 2bf49690325b62480a42f7afed5e9f164173c570 Mon Sep 17 00:00:00 2001
From: Thomas Liu <tliu@redhat.com>
Date: Tue, 14 Jul 2009 12:14:09 -0400
Subject: SELinux: Convert avc_audit to use lsm_audit.h

Convert avc_audit in security/selinux/avc.c to use lsm_audit.h,
for better maintainability.

 - changed selinux to use common_audit_data instead of
    avc_audit_data
 - eliminated code in avc.c and used code from lsm_audit.h instead.

Had to add a LSM_AUDIT_NO_AUDIT to lsm_audit.h so that avc_audit
can call common_lsm_audit and do the pre and post callbacks without
doing the actual dump.  This makes it so that the patched version
behaves the same way as the unpatched version.

Also added a denied field to the selinux_audit_data private space,
once again to make it so that the patched version behaves like the
unpatched.

I've tested and confirmed that AVCs look the same before and after
this patch.

Signed-off-by: Thomas Liu <tliu@redhat.com>
Acked-by:  Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h           |   2 +
 security/Makefile                   |   4 +-
 security/lsm_audit.c                |   2 +
 security/selinux/avc.c              | 197 ++++++++----------------------------
 security/selinux/hooks.c            | 142 +++++++++++++-------------
 security/selinux/include/avc.h      |  49 +--------
 security/selinux/include/netlabel.h |   4 +-
 security/selinux/include/xfrm.h     |   8 +-
 security/selinux/netlabel.c         |   2 +-
 security/selinux/xfrm.c             |   4 +-
 10 files changed, 131 insertions(+), 283 deletions(-)

(limited to 'include')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index a5514a3a4f17..190c37854870 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -33,6 +33,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_IPC     4
 #define LSM_AUDIT_DATA_TASK    5
 #define LSM_AUDIT_DATA_KEY     6
+#define LSM_AUDIT_NO_AUDIT     7
 	struct task_struct *tsk;
 	union 	{
 		struct {
@@ -86,6 +87,7 @@ struct common_audit_data {
 			u16 tclass;
 			u32 requested;
 			u32 audited;
+			u32 denied;
 			struct av_decision *avd;
 			int result;
 		} selinux_audit_data;
diff --git a/security/Makefile b/security/Makefile
index b56e7f9ecbc2..95ecc06392d7 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -16,9 +16,7 @@ obj-$(CONFIG_SECURITYFS)		+= inode.o
 # Must precede capability.o in order to stack properly.
 obj-$(CONFIG_SECURITY_SELINUX)		+= selinux/built-in.o
 obj-$(CONFIG_SECURITY_SMACK)		+= smack/built-in.o
-ifeq ($(CONFIG_AUDIT),y)
-obj-$(CONFIG_SECURITY_SMACK)		+= lsm_audit.o
-endif
+obj-$(CONFIG_AUDIT)			+= lsm_audit.o
 obj-$(CONFIG_SECURITY_TOMOYO)		+= tomoyo/built-in.o
 obj-$(CONFIG_SECURITY_ROOTPLUG)		+= root_plug.o
 obj-$(CONFIG_CGROUP_DEVICE)		+= device_cgroup.o
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 94b868494b31..500aad0ebd6a 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -220,6 +220,8 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 	}
 
 	switch (a->type) {
+	case LSM_AUDIT_NO_AUDIT:
+		return;
 	case LSM_AUDIT_DATA_IPC:
 		audit_log_format(ab, " key=%d ", a->u.ipc_id);
 		break;
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 236aaa2ea86d..e3d19014259b 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -492,23 +492,35 @@ out:
 	return node;
 }
 
-static inline void avc_print_ipv6_addr(struct audit_buffer *ab,
-				       struct in6_addr *addr, __be16 port,
-				       char *name1, char *name2)
+/**
+ * avc_audit_pre_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_pre_callback(struct audit_buffer *ab, void *a)
 {
-	if (!ipv6_addr_any(addr))
-		audit_log_format(ab, " %s=%pI6", name1, addr);
-	if (port)
-		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+	struct common_audit_data *ad = a;
+	audit_log_format(ab, "avc:  %s ",
+			 ad->selinux_audit_data.denied ? "denied" : "granted");
+	avc_dump_av(ab, ad->selinux_audit_data.tclass,
+			ad->selinux_audit_data.audited);
+	audit_log_format(ab, " for ");
 }
 
-static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
-				       __be16 port, char *name1, char *name2)
+/**
+ * avc_audit_post_callback - SELinux specific information
+ * will be called by generic audit code
+ * @ab: the audit buffer
+ * @a: audit_data
+ */
+static void avc_audit_post_callback(struct audit_buffer *ab, void *a)
 {
-	if (addr)
-		audit_log_format(ab, " %s=%pI4", name1, &addr);
-	if (port)
-		audit_log_format(ab, " %s=%d", name2, ntohs(port));
+	struct common_audit_data *ad = a;
+	audit_log_format(ab, " ");
+	avc_dump_query(ab, ad->selinux_audit_data.ssid,
+			   ad->selinux_audit_data.tsid,
+			   ad->selinux_audit_data.tclass);
 }
 
 /**
@@ -532,13 +544,10 @@ static inline void avc_print_ipv4_addr(struct audit_buffer *ab, __be32 addr,
  */
 void avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
-	       struct av_decision *avd, int result, struct avc_audit_data *a)
+	       struct av_decision *avd, int result, struct common_audit_data *a)
 {
-	struct task_struct *tsk = current;
-	struct inode *inode = NULL;
+	struct common_audit_data stack_data;
 	u32 denied, audited;
-	struct audit_buffer *ab;
-
 	denied = requested & ~avd->allowed;
 	if (denied) {
 		audited = denied;
@@ -551,144 +560,20 @@ void avc_audit(u32 ssid, u32 tsid,
 		if (!(audited & avd->auditallow))
 			return;
 	}
-
-	ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC);
-	if (!ab)
-		return;		/* audit_panic has been called */
-	audit_log_format(ab, "avc:  %s ", denied ? "denied" : "granted");
-	avc_dump_av(ab, tclass, audited);
-	audit_log_format(ab, " for ");
-	if (a && a->tsk)
-		tsk = a->tsk;
-	if (tsk && tsk->pid) {
-		audit_log_format(ab, " pid=%d comm=", tsk->pid);
-		audit_log_untrustedstring(ab, tsk->comm);
-	}
-	if (a) {
-		switch (a->type) {
-		case AVC_AUDIT_DATA_IPC:
-			audit_log_format(ab, " key=%d", a->u.ipc_id);
-			break;
-		case AVC_AUDIT_DATA_CAP:
-			audit_log_format(ab, " capability=%d", a->u.cap);
-			break;
-		case AVC_AUDIT_DATA_FS:
-			if (a->u.fs.path.dentry) {
-				struct dentry *dentry = a->u.fs.path.dentry;
-				if (a->u.fs.path.mnt) {
-					audit_log_d_path(ab, "path=",
-							 &a->u.fs.path);
-				} else {
-					audit_log_format(ab, " name=");
-					audit_log_untrustedstring(ab, dentry->d_name.name);
-				}
-				inode = dentry->d_inode;
-			} else if (a->u.fs.inode) {
-				struct dentry *dentry;
-				inode = a->u.fs.inode;
-				dentry = d_find_alias(inode);
-				if (dentry) {
-					audit_log_format(ab, " name=");
-					audit_log_untrustedstring(ab, dentry->d_name.name);
-					dput(dentry);
-				}
-			}
-			if (inode)
-				audit_log_format(ab, " dev=%s ino=%lu",
-						 inode->i_sb->s_id,
-						 inode->i_ino);
-			break;
-		case AVC_AUDIT_DATA_NET:
-			if (a->u.net.sk) {
-				struct sock *sk = a->u.net.sk;
-				struct unix_sock *u;
-				int len = 0;
-				char *p = NULL;
-
-				switch (sk->sk_family) {
-				case AF_INET: {
-					struct inet_sock *inet = inet_sk(sk);
-
-					avc_print_ipv4_addr(ab, inet->rcv_saddr,
-							    inet->sport,
-							    "laddr", "lport");
-					avc_print_ipv4_addr(ab, inet->daddr,
-							    inet->dport,
-							    "faddr", "fport");
-					break;
-				}
-				case AF_INET6: {
-					struct inet_sock *inet = inet_sk(sk);
-					struct ipv6_pinfo *inet6 = inet6_sk(sk);
-
-					avc_print_ipv6_addr(ab, &inet6->rcv_saddr,
-							    inet->sport,
-							    "laddr", "lport");
-					avc_print_ipv6_addr(ab, &inet6->daddr,
-							    inet->dport,
-							    "faddr", "fport");
-					break;
-				}
-				case AF_UNIX:
-					u = unix_sk(sk);
-					if (u->dentry) {
-						struct path path = {
-							.dentry = u->dentry,
-							.mnt = u->mnt
-						};
-						audit_log_d_path(ab, "path=",
-								 &path);
-						break;
-					}
-					if (!u->addr)
-						break;
-					len = u->addr->len-sizeof(short);
-					p = &u->addr->name->sun_path[0];
-					audit_log_format(ab, " path=");
-					if (*p)
-						audit_log_untrustedstring(ab, p);
-					else
-						audit_log_n_hex(ab, p, len);
-					break;
-				}
-			}
-
-			switch (a->u.net.family) {
-			case AF_INET:
-				avc_print_ipv4_addr(ab, a->u.net.v4info.saddr,
-						    a->u.net.sport,
-						    "saddr", "src");
-				avc_print_ipv4_addr(ab, a->u.net.v4info.daddr,
-						    a->u.net.dport,
-						    "daddr", "dest");
-				break;
-			case AF_INET6:
-				avc_print_ipv6_addr(ab, &a->u.net.v6info.saddr,
-						    a->u.net.sport,
-						    "saddr", "src");
-				avc_print_ipv6_addr(ab, &a->u.net.v6info.daddr,
-						    a->u.net.dport,
-						    "daddr", "dest");
-				break;
-			}
-			if (a->u.net.netif > 0) {
-				struct net_device *dev;
-
-				/* NOTE: we always use init's namespace */
-				dev = dev_get_by_index(&init_net,
-						       a->u.net.netif);
-				if (dev) {
-					audit_log_format(ab, " netif=%s",
-							 dev->name);
-					dev_put(dev);
-				}
-			}
-			break;
-		}
+	if (!a) {
+		a = &stack_data;
+		memset(a, 0, sizeof(*a));
+		a->type = LSM_AUDIT_NO_AUDIT;
 	}
-	audit_log_format(ab, " ");
-	avc_dump_query(ab, ssid, tsid, tclass);
-	audit_log_end(ab);
+	a->selinux_audit_data.tclass = tclass;
+	a->selinux_audit_data.requested = requested;
+	a->selinux_audit_data.ssid = ssid;
+	a->selinux_audit_data.tsid = tsid;
+	a->selinux_audit_data.audited = audited;
+	a->selinux_audit_data.denied = denied;
+	a->lsm_pre_audit = avc_audit_pre_callback;
+	a->lsm_post_audit = avc_audit_post_callback;
+	common_lsm_audit(a);
 }
 
 /**
@@ -956,7 +841,7 @@ out:
  * another -errno upon other errors.
  */
 int avc_has_perm(u32 ssid, u32 tsid, u16 tclass,
-		 u32 requested, struct avc_audit_data *auditdata)
+		 u32 requested, struct common_audit_data *auditdata)
 {
 	struct av_decision avd;
 	int rc;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5aa45b168122..254b7983657d 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1478,14 +1478,14 @@ static int task_has_capability(struct task_struct *tsk,
 			       const struct cred *cred,
 			       int cap, int audit)
 {
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct av_decision avd;
 	u16 sclass;
 	u32 sid = cred_sid(cred);
 	u32 av = CAP_TO_MASK(cap);
 	int rc;
 
-	AVC_AUDIT_DATA_INIT(&ad, CAP);
+	COMMON_AUDIT_DATA_INIT(&ad, CAP);
 	ad.tsk = tsk;
 	ad.u.cap = cap;
 
@@ -1524,10 +1524,10 @@ static int task_has_system(struct task_struct *tsk,
 static int inode_has_perm(const struct cred *cred,
 			  struct inode *inode,
 			  u32 perms,
-			  struct avc_audit_data *adp)
+			  struct common_audit_data *adp)
 {
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid;
 
 	if (unlikely(IS_PRIVATE(inode)))
@@ -1538,7 +1538,7 @@ static int inode_has_perm(const struct cred *cred,
 
 	if (!adp) {
 		adp = &ad;
-		AVC_AUDIT_DATA_INIT(&ad, FS);
+		COMMON_AUDIT_DATA_INIT(&ad, FS);
 		ad.u.fs.inode = inode;
 	}
 
@@ -1554,9 +1554,9 @@ static inline int dentry_has_perm(const struct cred *cred,
 				  u32 av)
 {
 	struct inode *inode = dentry->d_inode;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.mnt = mnt;
 	ad.u.fs.path.dentry = dentry;
 	return inode_has_perm(cred, inode, av, &ad);
@@ -1576,11 +1576,11 @@ static int file_has_perm(const struct cred *cred,
 {
 	struct file_security_struct *fsec = file->f_security;
 	struct inode *inode = file->f_path.dentry->d_inode;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = cred_sid(cred);
 	int rc;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path = file->f_path;
 
 	if (sid != fsec->sid) {
@@ -1611,7 +1611,7 @@ static int may_create(struct inode *dir,
 	struct inode_security_struct *dsec;
 	struct superblock_security_struct *sbsec;
 	u32 sid, newsid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int rc;
 
 	dsec = dir->i_security;
@@ -1620,7 +1620,7 @@ static int may_create(struct inode *dir,
 	sid = tsec->sid;
 	newsid = tsec->create_sid;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR,
@@ -1664,7 +1664,7 @@ static int may_link(struct inode *dir,
 
 {
 	struct inode_security_struct *dsec, *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	u32 av;
 	int rc;
@@ -1672,7 +1672,7 @@ static int may_link(struct inode *dir,
 	dsec = dir->i_security;
 	isec = dentry->d_inode->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	av = DIR__SEARCH;
@@ -1707,7 +1707,7 @@ static inline int may_rename(struct inode *old_dir,
 			     struct dentry *new_dentry)
 {
 	struct inode_security_struct *old_dsec, *new_dsec, *old_isec, *new_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	u32 av;
 	int old_is_dir, new_is_dir;
@@ -1718,7 +1718,7 @@ static inline int may_rename(struct inode *old_dir,
 	old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
 	new_dsec = new_dir->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 
 	ad.u.fs.path.dentry = old_dentry;
 	rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR,
@@ -1760,7 +1760,7 @@ static inline int may_rename(struct inode *old_dir,
 static int superblock_has_perm(const struct cred *cred,
 			       struct super_block *sb,
 			       u32 perms,
-			       struct avc_audit_data *ad)
+			       struct common_audit_data *ad)
 {
 	struct superblock_security_struct *sbsec;
 	u32 sid = cred_sid(cred);
@@ -2100,7 +2100,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 	const struct task_security_struct *old_tsec;
 	struct task_security_struct *new_tsec;
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct inode *inode = bprm->file->f_path.dentry->d_inode;
 	int rc;
 
@@ -2138,7 +2138,7 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm)
 			return rc;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path = bprm->file->f_path;
 
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
@@ -2231,7 +2231,7 @@ extern struct dentry *selinux_null;
 static inline void flush_unauthorized_files(const struct cred *cred,
 					    struct files_struct *files)
 {
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	struct file *file, *devnull = NULL;
 	struct tty_struct *tty;
 	struct fdtable *fdt;
@@ -2265,7 +2265,7 @@ static inline void flush_unauthorized_files(const struct cred *cred,
 
 	/* Revalidate access to inherited open files. */
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 
 	spin_lock(&files->file_lock);
 	for (;;) {
@@ -2514,7 +2514,7 @@ out:
 static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 {
 	const struct cred *cred = current_cred();
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int rc;
 
 	rc = superblock_doinit(sb, data);
@@ -2525,7 +2525,7 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 	if (flags & MS_KERNMOUNT)
 		return 0;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = sb->s_root;
 	return superblock_has_perm(cred, sb, FILESYSTEM__MOUNT, &ad);
 }
@@ -2533,9 +2533,9 @@ static int selinux_sb_kern_mount(struct super_block *sb, int flags, void *data)
 static int selinux_sb_statfs(struct dentry *dentry)
 {
 	const struct cred *cred = current_cred();
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry->d_sb->s_root;
 	return superblock_has_perm(cred, dentry->d_sb, FILESYSTEM__GETATTR, &ad);
 }
@@ -2755,7 +2755,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 	struct inode *inode = dentry->d_inode;
 	struct inode_security_struct *isec = inode->i_security;
 	struct superblock_security_struct *sbsec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 newsid, sid = current_sid();
 	int rc = 0;
 
@@ -2769,7 +2769,7 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
 	if (!is_owner_or_cap(inode))
 		return -EPERM;
 
-	AVC_AUDIT_DATA_INIT(&ad, FS);
+	COMMON_AUDIT_DATA_INIT(&ad, FS);
 	ad.u.fs.path.dentry = dentry;
 
 	rc = avc_has_perm(sid, isec->sid, isec->sclass,
@@ -3418,7 +3418,7 @@ static void selinux_task_to_inode(struct task_struct *p,
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv4(struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 *proto)
+			struct common_audit_data *ad, u8 *proto)
 {
 	int offset, ihlen, ret = -EINVAL;
 	struct iphdr _iph, *ih;
@@ -3499,7 +3499,7 @@ out:
 
 /* Returns error only if unable to parse addresses */
 static int selinux_parse_skb_ipv6(struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 *proto)
+			struct common_audit_data *ad, u8 *proto)
 {
 	u8 nexthdr;
 	int ret = -EINVAL, offset;
@@ -3570,7 +3570,7 @@ out:
 
 #endif /* IPV6 */
 
-static int selinux_parse_skb(struct sk_buff *skb, struct avc_audit_data *ad,
+static int selinux_parse_skb(struct sk_buff *skb, struct common_audit_data *ad,
 			     char **_addrp, int src, u8 *proto)
 {
 	char *addrp;
@@ -3652,7 +3652,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
 			   u32 perms)
 {
 	struct inode_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid;
 	int err = 0;
 
@@ -3662,7 +3662,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock,
 		goto out;
 	sid = task_sid(task);
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = sock->sk;
 	err = avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
 
@@ -3749,7 +3749,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 	if (family == PF_INET || family == PF_INET6) {
 		char *addrp;
 		struct inode_security_struct *isec;
-		struct avc_audit_data ad;
+		struct common_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
 		unsigned short snum;
@@ -3778,7 +3778,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 						      snum, &sid);
 				if (err)
 					goto out;
-				AVC_AUDIT_DATA_INIT(&ad, NET);
+				COMMON_AUDIT_DATA_INIT(&ad, NET);
 				ad.u.net.sport = htons(snum);
 				ad.u.net.family = family;
 				err = avc_has_perm(isec->sid, sid,
@@ -3811,7 +3811,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 		if (err)
 			goto out;
 
-		AVC_AUDIT_DATA_INIT(&ad, NET);
+		COMMON_AUDIT_DATA_INIT(&ad, NET);
 		ad.u.net.sport = htons(snum);
 		ad.u.net.family = family;
 
@@ -3845,7 +3845,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 	isec = SOCK_INODE(sock)->i_security;
 	if (isec->sclass == SECCLASS_TCP_SOCKET ||
 	    isec->sclass == SECCLASS_DCCP_SOCKET) {
-		struct avc_audit_data ad;
+		struct common_audit_data ad;
 		struct sockaddr_in *addr4 = NULL;
 		struct sockaddr_in6 *addr6 = NULL;
 		unsigned short snum;
@@ -3870,7 +3870,7 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address,
 		perm = (isec->sclass == SECCLASS_TCP_SOCKET) ?
 		       TCP_SOCKET__NAME_CONNECT : DCCP_SOCKET__NAME_CONNECT;
 
-		AVC_AUDIT_DATA_INIT(&ad, NET);
+		COMMON_AUDIT_DATA_INIT(&ad, NET);
 		ad.u.net.dport = htons(snum);
 		ad.u.net.family = sk->sk_family;
 		err = avc_has_perm(isec->sid, sid, isec->sclass, perm, &ad);
@@ -3960,13 +3960,13 @@ static int selinux_socket_unix_stream_connect(struct socket *sock,
 	struct sk_security_struct *ssec;
 	struct inode_security_struct *isec;
 	struct inode_security_struct *other_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int err;
 
 	isec = SOCK_INODE(sock)->i_security;
 	other_isec = SOCK_INODE(other)->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = other->sk;
 
 	err = avc_has_perm(isec->sid, other_isec->sid,
@@ -3992,13 +3992,13 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 {
 	struct inode_security_struct *isec;
 	struct inode_security_struct *other_isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	int err;
 
 	isec = SOCK_INODE(sock)->i_security;
 	other_isec = SOCK_INODE(other)->i_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.sk = other->sk;
 
 	err = avc_has_perm(isec->sid, other_isec->sid,
@@ -4011,7 +4011,7 @@ static int selinux_socket_unix_may_send(struct socket *sock,
 
 static int selinux_inet_sys_rcv_skb(int ifindex, char *addrp, u16 family,
 				    u32 peer_sid,
-				    struct avc_audit_data *ad)
+				    struct common_audit_data *ad)
 {
 	int err;
 	u32 if_sid;
@@ -4039,10 +4039,10 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb,
 	struct sk_security_struct *sksec = sk->sk_security;
 	u32 peer_sid;
 	u32 sk_sid = sksec->sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4080,7 +4080,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	struct sk_security_struct *sksec = sk->sk_security;
 	u16 family = sk->sk_family;
 	u32 sk_sid = sksec->sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 secmark_active;
 	u8 peerlbl_active;
@@ -4104,7 +4104,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (!secmark_active && !peerlbl_active)
 		return 0;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = skb->iif;
 	ad.u.net.family = family;
 	err = selinux_parse_skb(skb, &ad, &addrp, 1, NULL);
@@ -4362,7 +4362,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	int err;
 	char *addrp;
 	u32 peer_sid;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u8 secmark_active;
 	u8 netlbl_active;
 	u8 peerlbl_active;
@@ -4379,7 +4379,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, int ifindex,
 	if (selinux_skb_peerlbl_sid(skb, family, &peer_sid) != 0)
 		return NF_DROP;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 1, NULL) != 0)
@@ -4467,7 +4467,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 {
 	struct sock *sk = skb->sk;
 	struct sk_security_struct *sksec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 proto;
 
@@ -4475,7 +4475,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 		return NF_ACCEPT;
 	sksec = sk->sk_security;
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 0, &proto))
@@ -4499,7 +4499,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 	u32 secmark_perm;
 	u32 peer_sid;
 	struct sock *sk;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	char *addrp;
 	u8 secmark_active;
 	u8 peerlbl_active;
@@ -4558,7 +4558,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, int ifindex,
 		secmark_perm = PACKET__SEND;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, NET);
+	COMMON_AUDIT_DATA_INIT(&ad, NET);
 	ad.u.net.netif = ifindex;
 	ad.u.net.family = family;
 	if (selinux_parse_skb(skb, &ad, &addrp, 0, NULL))
@@ -4628,13 +4628,13 @@ static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
 static int selinux_netlink_recv(struct sk_buff *skb, int capability)
 {
 	int err;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 
 	err = cap_netlink_recv(skb, capability);
 	if (err)
 		return err;
 
-	AVC_AUDIT_DATA_INIT(&ad, CAP);
+	COMMON_AUDIT_DATA_INIT(&ad, CAP);
 	ad.u.cap = capability;
 
 	return avc_has_perm(NETLINK_CB(skb).sid, NETLINK_CB(skb).sid,
@@ -4693,12 +4693,12 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms,
 			u32 perms)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = ipc_perms->security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = ipc_perms->key;
 
 	return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad);
@@ -4718,7 +4718,7 @@ static void selinux_msg_msg_free_security(struct msg_msg *msg)
 static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4728,7 +4728,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq)
 
 	isec = msq->q_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4748,12 +4748,12 @@ static void selinux_msg_queue_free_security(struct msg_queue *msq)
 static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = msq->q_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ,
@@ -4792,7 +4792,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 {
 	struct ipc_security_struct *isec;
 	struct msg_security_struct *msec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4813,7 +4813,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg,
 			return rc;
 	}
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	/* Can this process write to the queue? */
@@ -4837,14 +4837,14 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 {
 	struct ipc_security_struct *isec;
 	struct msg_security_struct *msec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = task_sid(target);
 	int rc;
 
 	isec = msq->q_perm.security;
 	msec = msg->security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = msq->q_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid,
@@ -4859,7 +4859,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg,
 static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4869,7 +4869,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp)
 
 	isec = shp->shm_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = shp->shm_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4889,12 +4889,12 @@ static void selinux_shm_free_security(struct shmid_kernel *shp)
 static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = shp->shm_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = shp->shm_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_SHM,
@@ -4951,7 +4951,7 @@ static int selinux_shm_shmat(struct shmid_kernel *shp,
 static int selinux_sem_alloc_security(struct sem_array *sma)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 	int rc;
 
@@ -4961,7 +4961,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma)
 
 	isec = sma->sem_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = sma->sem_perm.key;
 
 	rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM,
@@ -4981,12 +4981,12 @@ static void selinux_sem_free_security(struct sem_array *sma)
 static int selinux_sem_associate(struct sem_array *sma, int semflg)
 {
 	struct ipc_security_struct *isec;
-	struct avc_audit_data ad;
+	struct common_audit_data ad;
 	u32 sid = current_sid();
 
 	isec = sma->sem_perm.security;
 
-	AVC_AUDIT_DATA_INIT(&ad, IPC);
+	COMMON_AUDIT_DATA_INIT(&ad, IPC);
 	ad.u.ipc_id = sma->sem_perm.key;
 
 	return avc_has_perm(sid, isec->sid, SECCLASS_SEM,
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index ae4c3a0e2c1a..e94e82f73818 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -13,6 +13,7 @@
 #include <linux/spinlock.h>
 #include <linux/init.h>
 #include <linux/audit.h>
+#include <linux/lsm_audit.h>
 #include <linux/in6.h>
 #include <linux/path.h>
 #include <asm/system.h>
@@ -36,48 +37,6 @@ struct inode;
 struct sock;
 struct sk_buff;
 
-/* Auxiliary data to use in generating the audit record. */
-struct avc_audit_data {
-	char    type;
-#define AVC_AUDIT_DATA_FS   1
-#define AVC_AUDIT_DATA_NET  2
-#define AVC_AUDIT_DATA_CAP  3
-#define AVC_AUDIT_DATA_IPC  4
-	struct task_struct *tsk;
-	union 	{
-		struct {
-			struct path path;
-			struct inode *inode;
-		} fs;
-		struct {
-			int netif;
-			struct sock *sk;
-			u16 family;
-			__be16 dport;
-			__be16 sport;
-			union {
-				struct {
-					__be32 daddr;
-					__be32 saddr;
-				} v4;
-				struct {
-					struct in6_addr daddr;
-					struct in6_addr saddr;
-				} v6;
-			} fam;
-		} net;
-		int cap;
-		int ipc_id;
-	} u;
-};
-
-#define v4info fam.v4
-#define v6info fam.v6
-
-/* Initialize an AVC audit data structure. */
-#define AVC_AUDIT_DATA_INIT(_d,_t) \
-	{ memset((_d), 0, sizeof(struct avc_audit_data)); (_d)->type = AVC_AUDIT_DATA_##_t; }
-
 /*
  * AVC statistics
  */
@@ -98,7 +57,9 @@ void __init avc_init(void);
 
 void avc_audit(u32 ssid, u32 tsid,
 	       u16 tclass, u32 requested,
-	       struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+	       struct av_decision *avd,
+	       int result,
+	       struct common_audit_data *a);
 
 #define AVC_STRICT 1 /* Ignore permissive mode. */
 int avc_has_perm_noaudit(u32 ssid, u32 tsid,
@@ -108,7 +69,7 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
 
 int avc_has_perm(u32 ssid, u32 tsid,
 		 u16 tclass, u32 requested,
-		 struct avc_audit_data *auditdata);
+		 struct common_audit_data *auditdata);
 
 u32 avc_policy_seqno(void);
 
diff --git a/security/selinux/include/netlabel.h b/security/selinux/include/netlabel.h
index b4b5b9b2f0be..8d7384280a7a 100644
--- a/security/selinux/include/netlabel.h
+++ b/security/selinux/include/netlabel.h
@@ -59,7 +59,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family);
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 				struct sk_buff *skb,
 				u16 family,
-				struct avc_audit_data *ad);
+				struct common_audit_data *ad);
 int selinux_netlbl_socket_setsockopt(struct socket *sock,
 				     int level,
 				     int optname);
@@ -129,7 +129,7 @@ static inline int selinux_netlbl_socket_post_create(struct sock *sk,
 static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 					      struct sk_buff *skb,
 					      u16 family,
-					      struct avc_audit_data *ad)
+					      struct common_audit_data *ad)
 {
 	return 0;
 }
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 289e24b39e3e..13128f9a3e5a 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -41,9 +41,9 @@ static inline int selinux_xfrm_enabled(void)
 }
 
 int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb,
-			struct avc_audit_data *ad);
+			struct common_audit_data *ad);
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 proto);
+			struct common_audit_data *ad, u8 proto);
 int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall);
 
 static inline void selinux_xfrm_notify_policyload(void)
@@ -57,13 +57,13 @@ static inline int selinux_xfrm_enabled(void)
 }
 
 static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad)
+			struct common_audit_data *ad)
 {
 	return 0;
 }
 
 static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-			struct avc_audit_data *ad, u8 proto)
+			struct common_audit_data *ad, u8 proto)
 {
 	return 0;
 }
diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c
index 2e984413c7b2..e68823741ad5 100644
--- a/security/selinux/netlabel.c
+++ b/security/selinux/netlabel.c
@@ -342,7 +342,7 @@ int selinux_netlbl_socket_post_create(struct sock *sk, u16 family)
 int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec,
 				struct sk_buff *skb,
 				u16 family,
-				struct avc_audit_data *ad)
+				struct common_audit_data *ad)
 {
 	int rc;
 	u32 nlbl_sid;
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 72b18452e1a1..f3cb9ed731a9 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -401,7 +401,7 @@ int selinux_xfrm_state_delete(struct xfrm_state *x)
  * gone thru the IPSec process.
  */
 int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
-				struct avc_audit_data *ad)
+				struct common_audit_data *ad)
 {
 	int i, rc = 0;
 	struct sec_path *sp;
@@ -442,7 +442,7 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb,
  * checked in the selinux_xfrm_state_pol_flow_match hook above.
  */
 int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb,
-					struct avc_audit_data *ad, u8 proto)
+					struct common_audit_data *ad, u8 proto)
 {
 	struct dst_entry *dst;
 	int rc = 0;
-- 
cgit v1.2.3


From cefb87efc9aa0288849149484870d5ab989fbd59 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Sun, 16 Aug 2009 21:05:45 +1000
Subject: drm/radeon/kms: implement bo busy check + current domain

This implements the busy ioctl along with a current domain check.
returns 0 or -EBUSY
puts the current domain no matter what the answer.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_gem.c    | 22 +++++++++++++++++++++-
 drivers/gpu/drm/radeon/radeon_object.c | 19 +++++++++++++++++++
 include/drm/radeon_drm.h               |  2 +-
 4 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 87170a56e37b..79ad98264e33 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -242,6 +242,7 @@ int radeon_object_pin(struct radeon_object *robj, uint32_t domain,
 		      uint64_t *gpu_addr);
 void radeon_object_unpin(struct radeon_object *robj);
 int radeon_object_wait(struct radeon_object *robj);
+int radeon_object_busy_domain(struct radeon_object *robj, uint32_t *cur_placement);
 int radeon_object_evict_vram(struct radeon_device *rdev);
 int radeon_object_mmap(struct radeon_object *robj, uint64_t *offset);
 void radeon_object_force_delete(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index cded5180c752..d4ceff13bbb1 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -262,7 +262,27 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
 int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
-	/* FIXME: implement */
+	struct drm_radeon_gem_busy *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_object *robj;
+	int r;
+	uint32_t cur_placement;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -EINVAL;
+	}
+	robj = gobj->driver_private;
+	r = radeon_object_busy_domain(robj, &cur_placement);
+	if (cur_placement == TTM_PL_VRAM)
+		args->domain = RADEON_GEM_DOMAIN_VRAM;
+	if (cur_placement == TTM_PL_FLAG_TT)
+		args->domain = RADEON_GEM_DOMAIN_GTT;
+	if (cur_placement == TTM_PL_FLAG_SYSTEM)
+		args->domain = RADEON_GEM_DOMAIN_CPU;
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(gobj);
+	mutex_unlock(&dev->struct_mutex);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index e98cae3bf4a6..b85fb83d7ae8 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -316,6 +316,25 @@ int radeon_object_wait(struct radeon_object *robj)
 	return r;
 }
 
+int radeon_object_busy_domain(struct radeon_object *robj, uint32_t *cur_placement)
+{
+	int r = 0;
+
+	r = radeon_object_reserve(robj, true);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("radeon: failed to reserve object for waiting.\n");
+		return r;
+	}
+	spin_lock(&robj->tobj.lock);
+	*cur_placement = robj->tobj.mem.mem_type;
+	if (robj->tobj.sync_obj) {
+		r = ttm_bo_wait(&robj->tobj, true, true, true);
+	}
+	spin_unlock(&robj->tobj.lock);
+	radeon_object_unreserve(robj);
+	return r;
+}
+
 int radeon_object_evict_vram(struct radeon_device *rdev)
 {
 	if (rdev->flags & RADEON_IS_IGP) {
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index af4b4826997e..f81c3232accd 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -838,7 +838,7 @@ struct drm_radeon_gem_wait_idle {
 
 struct drm_radeon_gem_busy {
 	uint32_t	handle;
-	uint32_t	busy;
+	uint32_t        domain;
 };
 
 struct drm_radeon_gem_pread {
-- 
cgit v1.2.3


From 9c0d90103c7e0eb6e638e5b649e9f6d8d9c1b4b3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 31 Jul 2009 12:53:58 -0400
Subject: Capabilities: move cap_file_mmap to commoncap.c

Currently we duplicate the mmap_min_addr test in cap_file_mmap and in
security_file_mmap if !CONFIG_SECURITY.  This patch moves cap_file_mmap
into commoncap.c and then calls that function directly from
security_file_mmap ifndef CONFIG_SECURITY like all of the other capability
checks are done.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h |  7 ++++---
 security/capability.c    |  9 ---------
 security/commoncap.c     | 30 ++++++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 5eff459b3833..ac4bc3760b46 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -66,6 +66,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
+extern int cap_file_mmap(struct file *file, unsigned long reqprot,
+			 unsigned long prot, unsigned long flags,
+			 unsigned long addr, unsigned long addr_only);
 extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5);
@@ -2197,9 +2200,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
-	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
-		return -EACCES;
-	return 0;
+	return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
 }
 
 static inline int security_file_mprotect(struct vm_area_struct *vma,
diff --git a/security/capability.c b/security/capability.c
index 21b6cead6a8e..88f752e8152c 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -330,15 +330,6 @@ static int cap_file_ioctl(struct file *file, unsigned int command,
 	return 0;
 }
 
-static int cap_file_mmap(struct file *file, unsigned long reqprot,
-			 unsigned long prot, unsigned long flags,
-			 unsigned long addr, unsigned long addr_only)
-{
-	if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO))
-		return -EACCES;
-	return 0;
-}
-
 static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			     unsigned long prot)
 {
diff --git a/security/commoncap.c b/security/commoncap.c
index 48b7e0228fa3..6bcf6e81e547 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -984,3 +984,33 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }
+
+/*
+ * cap_file_mmap - check if able to map given addr
+ * @file: unused
+ * @reqprot: unused
+ * @prot: unused
+ * @flags: unused
+ * @addr: address attempting to be mapped
+ * @addr_only: unused
+ *
+ * If the process is attempting to map memory below mmap_min_addr they need
+ * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
+ * capability security module.  Returns 0 if this mapping should be allowed
+ * -EPERM if not.
+ */
+int cap_file_mmap(struct file *file, unsigned long reqprot,
+		  unsigned long prot, unsigned long flags,
+		  unsigned long addr, unsigned long addr_only)
+{
+	int ret = 0;
+
+	if (addr < mmap_min_addr) {
+		ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO,
+				  SECURITY_CAP_AUDIT);
+		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
+		if (ret == 0)
+			current->flags |= PF_SUPERPRIV;
+	}
+	return ret;
+}
-- 
cgit v1.2.3


From 788084aba2ab7348257597496befcbccabdc98a3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 31 Jul 2009 12:54:11 -0400
Subject: Security/SELinux: seperate lsm specific mmap_min_addr

Currently SELinux enforcement of controls on the ability to map low memory
is determined by the mmap_min_addr tunable.  This patch causes SELinux to
ignore the tunable and instead use a seperate Kconfig option specific to how
much space the LSM should protect.

The tunable will now only control the need for CAP_SYS_RAWIO and SELinux
permissions will always protect the amount of low memory designated by
CONFIG_LSM_MMAP_MIN_ADDR.

This allows users who need to disable the mmap_min_addr controls (usual reason
being they run WINE as a non-root user) to do so and still have SELinux
controls preventing confined domains (like a web server) from being able to
map some area of low memory.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/mm.h       | 15 ---------------
 include/linux/security.h | 17 +++++++++++++++++
 kernel/sysctl.c          |  7 ++++---
 mm/Kconfig               |  6 +++---
 mm/mmap.c                |  3 ---
 mm/nommu.c               |  3 ---
 security/Kconfig         | 16 ++++++++++++++++
 security/Makefile        |  2 +-
 security/commoncap.c     |  2 +-
 security/min_addr.c      | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 security/selinux/hooks.c |  2 +-
 11 files changed, 92 insertions(+), 30 deletions(-)
 create mode 100644 security/min_addr.c

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ba3a7cb1eaa0..9a72cc78e6b8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -34,8 +34,6 @@ extern int sysctl_legacy_va_layout;
 #define sysctl_legacy_va_layout 0
 #endif
 
-extern unsigned long mmap_min_addr;
-
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -574,19 +572,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
 	set_page_section(page, pfn_to_section_nr(pfn));
 }
 
-/*
- * If a hint addr is less than mmap_min_addr change hint to be as
- * low as possible but still greater than mmap_min_addr
- */
-static inline unsigned long round_hint_to_min(unsigned long hint)
-{
-	hint &= PAGE_MASK;
-	if (((void *)hint != NULL) &&
-	    (hint < mmap_min_addr))
-		return PAGE_ALIGN(mmap_min_addr);
-	return hint;
-}
-
 /*
  * Some inline functions in vmstat.h depend on page_zone()
  */
diff --git a/include/linux/security.h b/include/linux/security.h
index ac4bc3760b46..dc3472c1f781 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -28,6 +28,7 @@
 #include <linux/resource.h>
 #include <linux/sem.h>
 #include <linux/shm.h>
+#include <linux/mm.h> /* PAGE_ALIGN */
 #include <linux/msg.h>
 #include <linux/sched.h>
 #include <linux/key.h>
@@ -95,6 +96,7 @@ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb, int cap);
 
 extern unsigned long mmap_min_addr;
+extern unsigned long dac_mmap_min_addr;
 /*
  * Values used in the task_security_ops calls
  */
@@ -147,6 +149,21 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 	opts->num_mnt_opts = 0;
 }
 
+/*
+ * If a hint addr is less than mmap_min_addr change hint to be as
+ * low as possible but still greater than mmap_min_addr
+ */
+static inline unsigned long round_hint_to_min(unsigned long hint)
+{
+	hint &= PAGE_MASK;
+	if (((void *)hint != NULL) &&
+	    (hint < mmap_min_addr))
+		return PAGE_ALIGN(mmap_min_addr);
+	return hint;
+}
+
+extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+				 void __user *buffer, size_t *lenp, loff_t *ppos);
 /**
  * struct security_operations - main security structure
  *
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 98e02328c67d..58be76017fd0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -49,6 +49,7 @@
 #include <linux/acpi.h>
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
+#include <linux/security.h>
 #include <linux/slow-work.h>
 #include <linux/perf_counter.h>
 
@@ -1306,10 +1307,10 @@ static struct ctl_table vm_table[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "mmap_min_addr",
-		.data		= &mmap_min_addr,
-		.maxlen         = sizeof(unsigned long),
+		.data		= &dac_mmap_min_addr,
+		.maxlen		= sizeof(unsigned long),
 		.mode		= 0644,
-		.proc_handler	= &proc_doulongvec_minmax,
+		.proc_handler	= &mmap_min_addr_handler,
 	},
 #ifdef CONFIG_NUMA
 	{
diff --git a/mm/Kconfig b/mm/Kconfig
index c948d4ca8bde..fe5f674d7a7d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR
 	  For most ia64, ppc64 and x86 users with lots of address space
 	  a value of 65536 is reasonable and should cause no problems.
 	  On arm and other archs it should not be higher than 32768.
-	  Programs which use vm86 functionality would either need additional
-	  permissions from either the LSM or the capabilities module or have
-	  this protection disabled.
+	  Programs which use vm86 functionality or have some need to map
+	  this low address space will need CAP_SYS_RAWIO or disable this
+	  protection by setting the value to 0.
 
 	  This value can be changed after boot using the
 	  /proc/sys/vm/mmap_min_addr tunable.
diff --git a/mm/mmap.c b/mm/mmap.c
index 34579b23ebd5..8101de490c73 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50;	/* default is 50% */
 int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 struct percpu_counter vm_committed_as;
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/nommu.c b/mm/nommu.c
index 53cab10fece4..28754c40be98 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT;
 int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS;
 int heap_stack_gap = 0;
 
-/* amount of vm to protect from userspace access */
-unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
-
 atomic_long_t mmap_pages_allocated;
 
 EXPORT_SYMBOL(mem_map);
diff --git a/security/Kconfig b/security/Kconfig
index d23c839038f0..9c60c346a91d 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -113,6 +113,22 @@ config SECURITY_ROOTPLUG
 
 	  If you are unsure how to answer this question, answer N.
 
+config LSM_MMAP_MIN_ADDR
+	int "Low address space for LSM to from user allocation"
+	depends on SECURITY && SECURITY_SELINUX
+	default 65535
+	help
+	  This is the portion of low virtual memory which should be protected
+	  from userspace allocation.  Keeping a user from writing to low pages
+	  can help reduce the impact of kernel NULL pointer bugs.
+
+	  For most ia64, ppc64 and x86 users with lots of address space
+	  a value of 65536 is reasonable and should cause no problems.
+	  On arm and other archs it should not be higher than 32768.
+	  Programs which use vm86 functionality or have some need to map
+	  this low address space will need the permission specific to the
+	  systems running LSM.
+
 source security/selinux/Kconfig
 source security/smack/Kconfig
 source security/tomoyo/Kconfig
diff --git a/security/Makefile b/security/Makefile
index c67557cdaa85..b56e7f9ecbc2 100644
--- a/security/Makefile
+++ b/security/Makefile
@@ -8,7 +8,7 @@ subdir-$(CONFIG_SECURITY_SMACK)		+= smack
 subdir-$(CONFIG_SECURITY_TOMOYO)        += tomoyo
 
 # always enable default capabilities
-obj-y		+= commoncap.o
+obj-y		+= commoncap.o min_addr.o
 
 # Object file lists
 obj-$(CONFIG_SECURITY)			+= security.o capability.o
diff --git a/security/commoncap.c b/security/commoncap.c
index 6bcf6e81e547..e3097c0a1311 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -1005,7 +1005,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
 {
 	int ret = 0;
 
-	if (addr < mmap_min_addr) {
+	if (addr < dac_mmap_min_addr) {
 		ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO,
 				  SECURITY_CAP_AUDIT);
 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
diff --git a/security/min_addr.c b/security/min_addr.c
new file mode 100644
index 000000000000..14cc7b3b8d03
--- /dev/null
+++ b/security/min_addr.c
@@ -0,0 +1,49 @@
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/security.h>
+#include <linux/sysctl.h>
+
+/* amount of vm to protect from userspace access by both DAC and the LSM*/
+unsigned long mmap_min_addr;
+/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */
+unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR;
+/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */
+
+/*
+ * Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR)
+ */
+static void update_mmap_min_addr(void)
+{
+#ifdef CONFIG_LSM_MMAP_MIN_ADDR
+	if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR)
+		mmap_min_addr = dac_mmap_min_addr;
+	else
+		mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR;
+#else
+	mmap_min_addr = dac_mmap_min_addr;
+#endif
+}
+
+/*
+ * sysctl handler which just sets dac_mmap_min_addr = the new value and then
+ * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
+ */
+int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+
+	update_mmap_min_addr();
+
+	return ret;
+}
+
+int __init init_mmap_min_addr(void)
+{
+	update_mmap_min_addr();
+
+	return 0;
+}
+pure_initcall(init_mmap_min_addr);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e6d1432b0800..8d8b69c5664e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3036,7 +3036,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
 	 * at bad behaviour/exploit that we always want to get the AVC, even
 	 * if DAC would have also denied the operation.
 	 */
-	if (addr < mmap_min_addr) {
+	if (addr < CONFIG_LSM_MMAP_MIN_ADDR) {
 		rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
 				  MEMPROTECT__MMAP_ZERO, NULL);
 		if (rc)
-- 
cgit v1.2.3


From 1d9959734a1949ea4f2427bd2d8b21ede6b2441c Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Fri, 7 Aug 2009 14:53:57 -0400
Subject: security: define round_hint_to_min in !CONFIG_SECURITY

Fix the header files to define round_hint_to_min() and to define
mmap_min_addr_handler() in the !CONFIG_SECURITY case.

Built and tested with !CONFIG_SECURITY

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index dc3472c1f781..1f16eea2017b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,6 +121,21 @@ struct request_sock;
 #define LSM_UNSAFE_PTRACE	2
 #define LSM_UNSAFE_PTRACE_CAP	4
 
+/*
+ * If a hint addr is less than mmap_min_addr change hint to be as
+ * low as possible but still greater than mmap_min_addr
+ */
+static inline unsigned long round_hint_to_min(unsigned long hint)
+{
+	hint &= PAGE_MASK;
+	if (((void *)hint != NULL) &&
+	    (hint < mmap_min_addr))
+		return PAGE_ALIGN(mmap_min_addr);
+	return hint;
+}
+extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+				 void __user *buffer, size_t *lenp, loff_t *ppos);
+
 #ifdef CONFIG_SECURITY
 
 struct security_mnt_opts {
@@ -149,21 +164,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 	opts->num_mnt_opts = 0;
 }
 
-/*
- * If a hint addr is less than mmap_min_addr change hint to be as
- * low as possible but still greater than mmap_min_addr
- */
-static inline unsigned long round_hint_to_min(unsigned long hint)
-{
-	hint &= PAGE_MASK;
-	if (((void *)hint != NULL) &&
-	    (hint < mmap_min_addr))
-		return PAGE_ALIGN(mmap_min_addr);
-	return hint;
-}
-
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
-				 void __user *buffer, size_t *lenp, loff_t *ppos);
 /**
  * struct security_operations - main security structure
  *
-- 
cgit v1.2.3


From b25c340c195447afb1860da580fe2a85a6b652c5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Aug 2009 12:17:22 +0200
Subject: genirq: Add oneshot support

For threaded interrupt handlers we expect the hard interrupt handler
part to mask the interrupt on the originating device. The interrupt
line itself is reenabled after the hard interrupt handler has
executed.

This requires access to the originating device from hard interrupt
context which is not always possible. There are devices which can only
be accessed via a bus (i2c, spi, ...). The bus access requires thread
context. For such devices we need to keep the interrupt line masked
until the threaded handler has executed.

Add a new flag IRQF_ONESHOT which allows drivers to request that the
interrupt is not unmasked after the hard interrupt context handler has
been executed and the thread has been woken. The interrupt line is
unmasked after the thread handler function has been executed.

Note that for now IRQF_ONESHOT cannot be used with IRQF_SHARED to
avoid complex accounting mechanisms.

For oneshot interrupts the primary handler simply returns
IRQ_WAKE_THREAD and does nothing else. A generic implementation
irq_default_primary_handler() is provided to avoid useless copies all
over the place. It is automatically installed when
request_threaded_irq() is called with handler=NULL and
thread_fn!=NULL.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Trilok Soni <soni.trilok@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: m.szyprowski@samsung.com
Cc: t.fujak@samsung.com
Cc: kyungmin.park@samsung.com,
Cc: David Brownell <david-b@pacbell.net>
Cc: Daniel Ribeiro <drwyrm@gmail.com>
Cc: arve@android.com
Cc: Barry Song <21cnbao@gmail.com>
---
 include/linux/interrupt.h |  4 ++++
 include/linux/irq.h       |  1 +
 kernel/irq/chip.c         | 14 +++++++++++---
 kernel/irq/manage.c       | 49 +++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 61 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7df1e9f30..1ac57e522a1f 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -50,6 +50,9 @@
  * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is
  *                registered first in an shared interrupt is considered for
  *                performance reasons)
+ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
+ *                Used by threaded interrupts which need to keep the
+ *                irq line disabled until the threaded handler has been run.
  */
 #define IRQF_DISABLED		0x00000020
 #define IRQF_SAMPLE_RANDOM	0x00000040
@@ -59,6 +62,7 @@
 #define IRQF_PERCPU		0x00000400
 #define IRQF_NOBALANCING	0x00000800
 #define IRQF_IRQPOLL		0x00001000
+#define IRQF_ONESHOT		0x00002000
 
 /*
  * Bits used by threaded handlers:
diff --git a/include/linux/irq.h b/include/linux/irq.h
index cb2e77a3f7f7..5e7c6ee8c35c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -69,6 +69,7 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_MOVE_PCNTXT		0x01000000	/* IRQ migration from process context */
 #define IRQ_AFFINITY_SET	0x02000000	/* IRQ affinity was set from userspace*/
 #define IRQ_SUSPENDED		0x04000000	/* IRQ has gone through suspend sequence */
+#define IRQ_ONESHOT		0x08000000	/* IRQ is not unmasked after hardirq */
 
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 13c68e71b726..b08c0d24f202 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -382,7 +382,10 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
 	spin_lock(&desc->lock);
 	desc->status &= ~IRQ_INPROGRESS;
-	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
+
+	if (unlikely(desc->status & IRQ_ONESHOT))
+		desc->status |= IRQ_MASKED;
+	else if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
 		desc->chip->unmask(irq);
 out_unlock:
 	spin_unlock(&desc->lock);
@@ -478,8 +481,13 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
 	kstat_incr_irqs_this_cpu(irq, desc);
 
 	/* Start handling the irq */
-	if (desc->chip->ack)
-		desc->chip->ack(irq);
+	if (unlikely(desc->status & IRQ_ONESHOT)) {
+		desc->status |= IRQ_MASKED;
+		mask_ack_irq(desc, irq);
+	} else {
+		if (desc->chip->ack)
+			desc->chip->ack(irq);
+	}
 
 	/* Mark the IRQ currently in progress.*/
 	desc->status |= IRQ_INPROGRESS;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d222515a5a06..d7f7b5fd2476 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -436,6 +436,16 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 	return ret;
 }
 
+/*
+ * Default primary interrupt handler for threaded interrupts. Is
+ * assigned as primary handler when request_threaded_irq is called
+ * with handler == NULL. Useful for oneshot interrupts.
+ */
+static irqreturn_t irq_default_primary_handler(int irq, void *dev_id)
+{
+	return IRQ_WAKE_THREAD;
+}
+
 static int irq_wait_for_interrupt(struct irqaction *action)
 {
 	while (!kthread_should_stop()) {
@@ -451,6 +461,21 @@ static int irq_wait_for_interrupt(struct irqaction *action)
 	return -1;
 }
 
+/*
+ * Oneshot interrupts keep the irq line masked until the threaded
+ * handler finished. unmask if the interrupt has not been disabled and
+ * is marked MASKED.
+ */
+static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
+{
+	spin_lock_irq(&desc->lock);
+	if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
+		desc->status &= ~IRQ_MASKED;
+		desc->chip->unmask(irq);
+	}
+	spin_unlock_irq(&desc->lock);
+}
+
 #ifdef CONFIG_SMP
 /*
  * Check whether we need to change the affinity of the interrupt thread.
@@ -492,7 +517,7 @@ static int irq_thread(void *data)
 	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
 	struct irqaction *action = data;
 	struct irq_desc *desc = irq_to_desc(action->irq);
-	int wake;
+	int wake, oneshot = desc->status & IRQ_ONESHOT;
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
 	current->irqaction = action;
@@ -518,6 +543,9 @@ static int irq_thread(void *data)
 			spin_unlock_irq(&desc->lock);
 
 			action->thread_fn(action->irq, action->dev_id);
+
+			if (oneshot)
+				irq_finalize_oneshot(action->irq, desc);
 		}
 
 		wake = atomic_dec_and_test(&desc->threads_active);
@@ -590,6 +618,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		rand_initialize_irq(irq);
 	}
 
+	/* Oneshot interrupts are not allowed with shared */
+	if ((new->flags & IRQF_ONESHOT) && (new->flags & IRQF_SHARED))
+		return -EINVAL;
+
 	/*
 	 * Threaded handler ?
 	 */
@@ -663,9 +695,12 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 			desc->status |= IRQ_PER_CPU;
 #endif
 
-		desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING |
+		desc->status &= ~(IRQ_AUTODETECT | IRQ_WAITING | IRQ_ONESHOT |
 				  IRQ_INPROGRESS | IRQ_SPURIOUS_DISABLED);
 
+		if (new->flags & IRQF_ONESHOT)
+			desc->status |= IRQ_ONESHOT;
+
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
@@ -878,6 +913,8 @@ EXPORT_SYMBOL(free_irq);
  *	@irq: Interrupt line to allocate
  *	@handler: Function to be called when the IRQ occurs.
  *		  Primary handler for threaded interrupts
+ *		  If NULL and thread_fn != NULL the default
+ *		  primary handler is installed
  *	@thread_fn: Function called from the irq handler thread
  *		    If NULL, no irq thread is created
  *	@irqflags: Interrupt type flags
@@ -957,8 +994,12 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 
 	if (desc->status & IRQ_NOREQUEST)
 		return -EINVAL;
-	if (!handler)
-		return -EINVAL;
+
+	if (!handler) {
+		if (!thread_fn)
+			return -EINVAL;
+		handler = irq_default_primary_handler;
+	}
 
 	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
 	if (!action)
-- 
cgit v1.2.3


From 70aedd24d20e75198f5a0b11750faabbb56924e2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Aug 2009 12:17:48 +0200
Subject: genirq: Add buslock support

Some interrupt chips are connected to a "slow" bus (i2c, spi ...). The
bus access needs to sleep and therefor cannot be called in atomic
contexts.

Some of the generic interrupt management functions like disable_irq(),
enable_irq() ... call interrupt chip functions with the irq_desc->lock
held and interrupts disabled. This does not work for such devices.

Provide a separate synchronization mechanism for such interrupt
chips. The irq_chip structure is extended by two optional functions
(bus_lock and bus_sync_and_unlock).

The idea is to serialize the bus access for those operations in the
core code so that drivers which are behind that bus operated interrupt
controller do not have to worry about it and just can use the normal
interfaces. To achieve this we add two function pointers to the
irq_chip: bus_lock and bus_sync_unlock.

bus_lock() is called to serialize access to the interrupt controller
bus.

Now the core code can issue chip->mask/unmask ... commands without
changing the fast path code at all. The chip implementation merily
stores that information in a chip private data structure and
returns. No bus interaction as these functions are called from atomic
context.

After that bus_sync_unlock() is called outside the atomic context. Now
the chip implementation issues the bus commands, waits for completion
and unlocks the interrupt controller bus.

The irq_chip implementation as pseudo code:

struct irq_chip_data {
       struct mutex   mutex;
       unsigned int   irq_offset;
       unsigned long  mask;
       unsigned long  mask_status;
}

static void bus_lock(unsigned int irq)
{
        struct irq_chip_data *data = get_irq_desc_chip_data(irq);

        mutex_lock(&data->mutex);
}

static void mask(unsigned int irq)
{
        struct irq_chip_data *data = get_irq_desc_chip_data(irq);

        irq -= data->irq_offset;
        data->mask |= (1 << irq);
}

static void unmask(unsigned int irq)
{
        struct irq_chip_data *data = get_irq_desc_chip_data(irq);

        irq -= data->irq_offset;
        data->mask &= ~(1 << irq);
}

static void bus_sync_unlock(unsigned int irq)
{
        struct irq_chip_data *data = get_irq_desc_chip_data(irq);

        if (data->mask != data->mask_status) {
                do_bus_magic_to_set_mask(data->mask);
                data->mask_status = data->mask;
        }
        mutex_unlock(&data->mutex);
}

The device drivers can use request_threaded_irq, free_irq, disable_irq
and enable_irq as usual with the only restriction that the calls need
to come from non atomic context.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Trilok Soni <soni.trilok@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: m.szyprowski@samsung.com
Cc: t.fujak@samsung.com
Cc: kyungmin.park@samsung.com,
Cc: David Brownell <david-b@pacbell.net>
Cc: Daniel Ribeiro <drwyrm@gmail.com>
Cc: arve@android.com
Cc: Barry Song <21cnbao@gmail.com>
---
 include/linux/irq.h    |  6 ++++++
 kernel/irq/chip.c      |  2 ++
 kernel/irq/internals.h | 13 +++++++++++++
 kernel/irq/manage.c    | 19 ++++++++++++++++++-
 4 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 5e7c6ee8c35c..ce8171bc6fac 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -101,6 +101,9 @@ struct msi_desc;
  * @set_type:		set the flow type (IRQ_TYPE_LEVEL/etc.) of an IRQ
  * @set_wake:		enable/disable power-management wake-on of an IRQ
  *
+ * @bus_lock:		function to lock access to slow bus (i2c) chips
+ * @bus_sync_unlock:	function to sync and unlock slow bus (i2c) chips
+ *
  * @release:		release function solely used by UML
  * @typename:		obsoleted by name, kept as migration helper
  */
@@ -124,6 +127,9 @@ struct irq_chip {
 	int		(*set_type)(unsigned int irq, unsigned int flow_type);
 	int		(*set_wake)(unsigned int irq, unsigned int on);
 
+	void		(*bus_lock)(unsigned int irq);
+	void		(*bus_sync_unlock)(unsigned int irq);
+
 	/* Currently used only by UML, might disappear one day.*/
 #ifdef CONFIG_IRQ_RELEASE_METHOD
 	void		(*release)(unsigned int irq, void *dev_id);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index b08c0d24f202..f856330e684a 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -580,6 +580,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->chip = &dummy_irq_chip;
 	}
 
+	chip_bus_lock(irq, desc);
 	spin_lock_irqsave(&desc->lock, flags);
 
 	/* Uninstall? */
@@ -599,6 +600,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 		desc->chip->startup(irq);
 	}
 	spin_unlock_irqrestore(&desc->lock, flags);
+	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL_GPL(__set_irq_handler);
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index e70ed5592eb9..1b5d742c6a77 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -44,6 +44,19 @@ extern int irq_select_affinity_usr(unsigned int irq);
 
 extern void irq_set_thread_affinity(struct irq_desc *desc);
 
+/* Inline functions for support of irq chips on slow busses */
+static inline void chip_bus_lock(unsigned int irq, struct irq_desc *desc)
+{
+	if (unlikely(desc->chip->bus_lock))
+		desc->chip->bus_lock(irq);
+}
+
+static inline void chip_bus_sync_unlock(unsigned int irq, struct irq_desc *desc)
+{
+	if (unlikely(desc->chip->bus_sync_unlock))
+		desc->chip->bus_sync_unlock(irq);
+}
+
 /*
  * Debugging printout:
  */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index d7f7b5fd2476..0a3fd5b524c9 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -230,9 +230,11 @@ void disable_irq_nosync(unsigned int irq)
 	if (!desc)
 		return;
 
+	chip_bus_lock(irq, desc);
 	spin_lock_irqsave(&desc->lock, flags);
 	__disable_irq(desc, irq, false);
 	spin_unlock_irqrestore(&desc->lock, flags);
+	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(disable_irq_nosync);
 
@@ -294,7 +296,8 @@ void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume)
  *	matches the last disable, processing of interrupts on this
  *	IRQ line is re-enabled.
  *
- *	This function may be called from IRQ context.
+ *	This function may be called from IRQ context only when
+ *	desc->chip->bus_lock and desc->chip->bus_sync_unlock are NULL !
  */
 void enable_irq(unsigned int irq)
 {
@@ -304,9 +307,11 @@ void enable_irq(unsigned int irq)
 	if (!desc)
 		return;
 
+	chip_bus_lock(irq, desc);
 	spin_lock_irqsave(&desc->lock, flags);
 	__enable_irq(desc, irq, false);
 	spin_unlock_irqrestore(&desc->lock, flags);
+	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(enable_irq);
 
@@ -468,12 +473,14 @@ static int irq_wait_for_interrupt(struct irqaction *action)
  */
 static void irq_finalize_oneshot(unsigned int irq, struct irq_desc *desc)
 {
+	chip_bus_lock(irq, desc);
 	spin_lock_irq(&desc->lock);
 	if (!(desc->status & IRQ_DISABLED) && (desc->status & IRQ_MASKED)) {
 		desc->status &= ~IRQ_MASKED;
 		desc->chip->unmask(irq);
 	}
 	spin_unlock_irq(&desc->lock);
+	chip_bus_sync_unlock(irq, desc);
 }
 
 #ifdef CONFIG_SMP
@@ -904,7 +911,14 @@ EXPORT_SYMBOL_GPL(remove_irq);
  */
 void free_irq(unsigned int irq, void *dev_id)
 {
+	struct irq_desc *desc = irq_to_desc(irq);
+
+	if (!desc)
+		return;
+
+	chip_bus_lock(irq, desc);
 	kfree(__free_irq(irq, dev_id));
+	chip_bus_sync_unlock(irq, desc);
 }
 EXPORT_SYMBOL(free_irq);
 
@@ -1011,7 +1025,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	action->name = devname;
 	action->dev_id = dev_id;
 
+	chip_bus_lock(irq, desc);
 	retval = __setup_irq(irq, desc, action);
+	chip_bus_sync_unlock(irq, desc);
+
 	if (retval)
 		kfree(action);
 
-- 
cgit v1.2.3


From 399b5da29b9f851eb7b96e2882097127f003e87c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 13 Aug 2009 13:21:38 +0200
Subject: genirq: Support nested threaded irq handling

Interrupt chips which are behind a slow bus (i2c, spi ...) and
demultiplex other interrupt sources need to run their interrupt
handler in a thread.

The demultiplexed interrupt handlers need to run in thread context as
well and need to finish before the demux handler thread can reenable
the interrupt line. So the easiest way is to run the sub device
handlers in the context of the demultiplexing handler thread.

To avoid that a separate thread is created for the subdevices the
function set_nested_irq_thread() is provided which sets the
IRQ_NESTED_THREAD flag in the interrupt descriptor.

A driver which calls request_threaded_irq() must not be aware of the
fact that the threaded handler is called in the context of the
demultiplexing handler thread. The setup code checks the
IRQ_NESTED_THREAD flag which was set from the irq chip setup code and
does not setup a separate thread for the interrupt. The primary
function which is provided by the device driver is replaced by an
internal dummy function which warns when it is called.

For the demultiplexing handler a helper function handle_nested_irq()
is provided which calls the demux interrupt thread function in the
context of the caller and does the proper interrupt accounting and
takes the interrupt disabled status of the demultiplexed subdevice
into account.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Trilok Soni <soni.trilok@gmail.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Brian Swetland <swetland@google.com>
Cc: Joonyoung Shim <jy0922.shim@samsung.com>
Cc: m.szyprowski@samsung.com
Cc: t.fujak@samsung.com
Cc: kyungmin.park@samsung.com,
Cc: David Brownell <david-b@pacbell.net>
Cc: Daniel Ribeiro <drwyrm@gmail.com>
Cc: arve@android.com
Cc: Barry Song <21cnbao@gmail.com>
---
 include/linux/irq.h |  3 +++
 kernel/irq/chip.c   | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/manage.c | 34 ++++++++++++++++++++++++---
 3 files changed, 101 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ce8171bc6fac..8778ee993937 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -70,6 +70,7 @@ typedef	void (*irq_flow_handler_t)(unsigned int irq,
 #define IRQ_AFFINITY_SET	0x02000000	/* IRQ affinity was set from userspace*/
 #define IRQ_SUSPENDED		0x04000000	/* IRQ has gone through suspend sequence */
 #define IRQ_ONESHOT		0x08000000	/* IRQ is not unmasked after hardirq */
+#define IRQ_NESTED_THREAD	0x10000000	/* IRQ is nested into another, no own handler thread */
 
 #ifdef CONFIG_IRQ_PER_CPU
 # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
@@ -386,6 +387,8 @@ set_irq_chained_handler(unsigned int irq,
 	__set_irq_handler(irq, handle, 1, NULL);
 }
 
+extern void set_irq_nested_thread(unsigned int irq, int nest);
+
 extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f856330e684a..5765aad94998 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -222,6 +222,34 @@ int set_irq_chip_data(unsigned int irq, void *data)
 }
 EXPORT_SYMBOL(set_irq_chip_data);
 
+/**
+ *	set_irq_nested_thread - Set/Reset the IRQ_NESTED_THREAD flag of an irq
+ *
+ *	@irq:	Interrupt number
+ *	@nest:	0 to clear / 1 to set the IRQ_NESTED_THREAD flag
+ *
+ *	The IRQ_NESTED_THREAD flag indicates that on
+ *	request_threaded_irq() no separate interrupt thread should be
+ *	created for the irq as the handler are called nested in the
+ *	context of a demultiplexing interrupt handler thread.
+ */
+void set_irq_nested_thread(unsigned int irq, int nest)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc)
+		return;
+
+	spin_lock_irqsave(&desc->lock, flags);
+	if (nest)
+		desc->status |= IRQ_NESTED_THREAD;
+	else
+		desc->status &= ~IRQ_NESTED_THREAD;
+	spin_unlock_irqrestore(&desc->lock, flags);
+}
+EXPORT_SYMBOL_GPL(set_irq_nested_thread);
+
 /*
  * default enable function
  */
@@ -299,6 +327,45 @@ static inline void mask_ack_irq(struct irq_desc *desc, int irq)
 	}
 }
 
+/*
+ *	handle_nested_irq - Handle a nested irq from a irq thread
+ *	@irq:	the interrupt number
+ *
+ *	Handle interrupts which are nested into a threaded interrupt
+ *	handler. The handler function is called inside the calling
+ *	threads context.
+ */
+void handle_nested_irq(unsigned int irq)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	struct irqaction *action;
+	irqreturn_t action_ret;
+
+	might_sleep();
+
+	spin_lock_irq(&desc->lock);
+
+	kstat_incr_irqs_this_cpu(irq, desc);
+
+	action = desc->action;
+	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+		goto out_unlock;
+
+	desc->status |= IRQ_INPROGRESS;
+	spin_unlock_irq(&desc->lock);
+
+	action_ret = action->thread_fn(action->irq, action->dev_id);
+	if (!noirqdebug)
+		note_interrupt(irq, desc, action_ret);
+
+	spin_lock_irq(&desc->lock);
+	desc->status &= ~IRQ_INPROGRESS;
+
+out_unlock:
+	spin_unlock_irq(&desc->lock);
+}
+EXPORT_SYMBOL_GPL(handle_nested_irq);
+
 /**
  *	handle_simple_irq - Simple and software-decoded IRQs.
  *	@irq:	the interrupt number
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a3fd5b524c9..e485cea04bf1 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -451,6 +451,16 @@ static irqreturn_t irq_default_primary_handler(int irq, void *dev_id)
 	return IRQ_WAKE_THREAD;
 }
 
+/*
+ * Primary handler for nested threaded interrupts. Should never be
+ * called.
+ */
+static irqreturn_t irq_nested_primary_handler(int irq, void *dev_id)
+{
+	WARN(1, "Primary handler called for nested irq %d\n", irq);
+	return IRQ_NONE;
+}
+
 static int irq_wait_for_interrupt(struct irqaction *action)
 {
 	while (!kthread_should_stop()) {
@@ -600,7 +610,7 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	struct irqaction *old, **old_ptr;
 	const char *old_name = NULL;
 	unsigned long flags;
-	int shared = 0;
+	int nested, shared = 0;
 	int ret;
 
 	if (!desc)
@@ -630,9 +640,27 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		return -EINVAL;
 
 	/*
-	 * Threaded handler ?
+	 * Check whether the interrupt nests into another interrupt
+	 * thread.
+	 */
+	nested = desc->status & IRQ_NESTED_THREAD;
+	if (nested) {
+		if (!new->thread_fn)
+			return -EINVAL;
+		/*
+		 * Replace the primary handler which was provided from
+		 * the driver for non nested interrupt handling by the
+		 * dummy function which warns when called.
+		 */
+		new->handler = irq_nested_primary_handler;
+	}
+
+	/*
+	 * Create a handler thread when a thread function is supplied
+	 * and the interrupt does not nest into another interrupt
+	 * thread.
 	 */
-	if (new->thread_fn) {
+	if (new->thread_fn && !nested) {
 		struct task_struct *t;
 
 		t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
-- 
cgit v1.2.3


From 7ead8b8313d92b3a69a1a61b0dcbc4cd66c960dc Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 17 Aug 2009 16:56:28 +0800
Subject: tracing/events: Add module tracepoints

Add trace points to trace module_load, module_free, module_get,
module_put and module_request, and use trace_event facility to
get the trace output.

Here's the sample output:

     TASK-PID    CPU#    TIMESTAMP  FUNCTION
        | |       |          |         |
    <...>-42    [000]     1.758380: module_request: fb0 wait=1 call_site=fb_open
    ...
    <...>-60    [000]     3.269403: module_load: scsi_wait_scan
    <...>-60    [000]     3.269432: module_put: scsi_wait_scan call_site=sys_init_module refcnt=0
    <...>-61    [001]     3.273168: module_free: scsi_wait_scan
    ...
    <...>-1021  [000]    13.836081: module_load: sunrpc
    <...>-1021  [000]    13.840589: module_put: sunrpc call_site=sys_init_module refcnt=-1
    <...>-1027  [000]    13.848098: module_get: sunrpc call_site=try_module_get refcnt=0
    <...>-1027  [000]    13.848308: module_get: sunrpc call_site=get_filesystem refcnt=1
    <...>-1027  [000]    13.848692: module_put: sunrpc call_site=put_filesystem refcnt=0
    ...
 modprobe-2587  [001]  1088.437213: module_load: trace_events_sample F
 modprobe-2587  [001]  1088.437786: module_put: trace_events_sample call_site=sys_init_module refcnt=0

Note:

- the taints flag can be 'F', 'C' and/or 'P' if mod->taints != 0

- the module refcnt is percpu, so it can be negative in a
  specific cpu

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <4A891B3C.5030608@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/module.h        |  14 ++++-
 include/trace/events/module.h | 126 ++++++++++++++++++++++++++++++++++++++++++
 kernel/kmod.c                 |   4 ++
 kernel/module.c               |  11 ++++
 4 files changed, 152 insertions(+), 3 deletions(-)
 create mode 100644 include/trace/events/module.h

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 098bdb7bfacf..f8f92d015efe 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -17,10 +17,12 @@
 #include <linux/moduleparam.h>
 #include <linux/marker.h>
 #include <linux/tracepoint.h>
-#include <asm/local.h>
 
+#include <asm/local.h>
 #include <asm/module.h>
 
+#include <trace/events/module.h>
+
 /* Not Yet Implemented */
 #define MODULE_SUPPORTED_DEVICE(name)
 
@@ -462,7 +464,10 @@ static inline local_t *__module_ref_addr(struct module *mod, int cpu)
 static inline void __module_get(struct module *module)
 {
 	if (module) {
-		local_inc(__module_ref_addr(module, get_cpu()));
+		unsigned int cpu = get_cpu();
+		local_inc(__module_ref_addr(module, cpu));
+		trace_module_get(module, _THIS_IP_,
+				 local_read(__module_ref_addr(module, cpu)));
 		put_cpu();
 	}
 }
@@ -473,8 +478,11 @@ static inline int try_module_get(struct module *module)
 
 	if (module) {
 		unsigned int cpu = get_cpu();
-		if (likely(module_is_live(module)))
+		if (likely(module_is_live(module))) {
 			local_inc(__module_ref_addr(module, cpu));
+			trace_module_get(module, _THIS_IP_,
+				local_read(__module_ref_addr(module, cpu)));
+		}
 		else
 			ret = 0;
 		put_cpu();
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
new file mode 100644
index 000000000000..84160fb18478
--- /dev/null
+++ b/include/trace/events/module.h
@@ -0,0 +1,126 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM module
+
+#if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MODULE_H
+
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_MODULES
+
+struct module;
+
+#define show_module_flags(flags) __print_flags(flags, "",	\
+	{ (1UL << TAINT_PROPRIETARY_MODULE),	"P" },		\
+	{ (1UL << TAINT_FORCED_MODULE),		"F" },		\
+	{ (1UL << TAINT_CRAP),			"C" })
+
+TRACE_EVENT(module_load,
+
+	TP_PROTO(struct module *mod),
+
+	TP_ARGS(mod),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	taints		)
+		__string(	name,		mod->name	)
+	),
+
+	TP_fast_assign(
+		__entry->taints = mod->taints;
+		__assign_str(name, mod->name);
+	),
+
+	TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints))
+);
+
+TRACE_EVENT(module_free,
+
+	TP_PROTO(struct module *mod),
+
+	TP_ARGS(mod),
+
+	TP_STRUCT__entry(
+		__string(	name,		mod->name	)
+	),
+
+	TP_fast_assign(
+		__assign_str(name, mod->name);
+	),
+
+	TP_printk("%s", __get_str(name))
+);
+
+TRACE_EVENT(module_get,
+
+	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
+
+	TP_ARGS(mod, ip, refcnt),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ip		)
+		__field(	int,		refcnt		)
+		__string(	name,		mod->name	)
+	),
+
+	TP_fast_assign(
+		__entry->ip	= ip;
+		__entry->refcnt	= refcnt;
+		__assign_str(name, mod->name);
+	),
+
+	TP_printk("%s call_site=%pf refcnt=%d",
+		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
+);
+
+TRACE_EVENT(module_put,
+
+	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
+
+	TP_ARGS(mod, ip, refcnt),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	ip		)
+		__field(	int,		refcnt		)
+		__string(	name,		mod->name	)
+	),
+
+	TP_fast_assign(
+		__entry->ip	= ip;
+		__entry->refcnt	= refcnt;
+		__assign_str(name, mod->name);
+	),
+
+	TP_printk("%s call_site=%pf refcnt=%d",
+		  __get_str(name), (void *)__entry->ip, __entry->refcnt)
+);
+
+TRACE_EVENT(module_request,
+
+	TP_PROTO(char *name, bool wait, unsigned long ip),
+
+	TP_ARGS(name, wait, ip),
+
+	TP_STRUCT__entry(
+		__field(	bool,		wait		)
+		__field(	unsigned long,	ip		)
+		__string(	name,		name		)
+	),
+
+	TP_fast_assign(
+		__entry->wait	= wait;
+		__entry->ip	= ip;
+		__assign_str(name, name);
+	),
+
+	TP_printk("%s wait=%d call_site=%pf",
+		  __get_str(name), (int)__entry->wait, (void *)__entry->ip)
+);
+
+#endif /* CONFIG_MODULES */
+
+#endif /* _TRACE_MODULE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 385c31a1bdbf..a92280870e30 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -37,6 +37,8 @@
 #include <linux/suspend.h>
 #include <asm/uaccess.h>
 
+#include <trace/events/module.h>
+
 extern int max_threads;
 
 static struct workqueue_struct *khelper_wq;
@@ -108,6 +110,8 @@ int __request_module(bool wait, const char *fmt, ...)
 		return -ENOMEM;
 	}
 
+	trace_module_request(module_name, wait, _RET_IP_);
+
 	ret = call_usermodehelper(modprobe_path, argv, envp,
 			wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
 	atomic_dec(&kmod_concurrent);
diff --git a/kernel/module.c b/kernel/module.c
index fd1411403558..b1821438694e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -55,6 +55,11 @@
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/module.h>
+
+EXPORT_TRACEPOINT_SYMBOL(module_get);
+
 #if 0
 #define DEBUGP printk
 #else
@@ -940,6 +945,8 @@ void module_put(struct module *module)
 	if (module) {
 		unsigned int cpu = get_cpu();
 		local_dec(__module_ref_addr(module, cpu));
+		trace_module_put(module, _RET_IP_,
+				 local_read(__module_ref_addr(module, cpu)));
 		/* Maybe they're waiting for us to drop reference? */
 		if (unlikely(!module_is_live(module)))
 			wake_up_process(module->waiter);
@@ -1491,6 +1498,8 @@ static int __unlink_module(void *_mod)
 /* Free a module, remove from lists, etc (must hold module_mutex). */
 static void free_module(struct module *mod)
 {
+	trace_module_free(mod);
+
 	/* Delete from various lists */
 	stop_machine(__unlink_module, mod, NULL);
 	remove_notes_attrs(mod);
@@ -2358,6 +2367,8 @@ static noinline struct module *load_module(void __user *umod,
 	/* Get rid of temporary copy */
 	vfree(hdr);
 
+	trace_module_load(mod);
+
 	/* Done! */
 	return mod;
 
-- 
cgit v1.2.3


From 0ccff1a49def92d6b838a6da166c89004b3a4d0c Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Mon, 17 Aug 2009 22:38:04 -0400
Subject: jbd2: bitfields should be unsigned

This fixes sparse noise:
  error: dubious one-bit signed bitfield

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan Kara <jack@ucw.cz>
---
 include/linux/jbd2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d97eb652d6ca..52695d3dfd0b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -652,7 +652,7 @@ struct transaction_s
 	 * This transaction is being forced and some process is
 	 * waiting for it to finish.
 	 */
-	int t_synchronous_commit:1;
+	unsigned int t_synchronous_commit:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3


From c1a8f1f1c8e01eab5862c8db39b49ace814e6c66 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 16 Aug 2009 09:36:49 +0000
Subject: net: restore gnet_stats_basic to previous definition

In 5e140dfc1fe87eae27846f193086724806b33c7d "net: reorder struct Qdisc
for better SMP performance" the definition of struct gnet_stats_basic
changed incompatibly, as copies of this struct are shipped to
userland via netlink.

Restoring old behavior is not welcome, for performance reason.

Fix is to use a private structure for kernel, and
teach gnet_stats_copy_basic() to convert from kernel to user land,
using legacy structure (struct gnet_stats_basic)

Based on a report and initial patch from Michael Spang.

Reported-by: Michael Spang <mspang@csclub.uwaterloo.ca>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/gen_stats.h          |  5 +++++
 include/net/act_api.h              |  2 +-
 include/net/gen_stats.h            | 10 +++++-----
 include/net/netfilter/xt_rateest.h |  2 +-
 include/net/sch_generic.h          |  2 +-
 net/core/gen_estimator.c           | 12 ++++++------
 net/core/gen_stats.c               | 11 ++++++++---
 net/netfilter/xt_RATEEST.c         |  2 +-
 net/sched/sch_atm.c                |  2 +-
 net/sched/sch_cbq.c                |  2 +-
 net/sched/sch_drr.c                |  2 +-
 net/sched/sch_hfsc.c               |  2 +-
 net/sched/sch_htb.c                |  2 +-
 13 files changed, 33 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
index 0ffa41df0ee8..710e901085d0 100644
--- a/include/linux/gen_stats.h
+++ b/include/linux/gen_stats.h
@@ -19,6 +19,11 @@ enum {
  * @packets: number of seen packets
  */
 struct gnet_stats_basic
+{
+	__u64	bytes;
+	__u32	packets;
+};
+struct gnet_stats_basic_packed
 {
 	__u64	bytes;
 	__u32	packets;
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 565eed8fe496..c05fd717c588 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -16,7 +16,7 @@ struct tcf_common {
 	u32				tcfc_capab;
 	int				tcfc_action;
 	struct tcf_t			tcfc_tm;
-	struct gnet_stats_basic		tcfc_bstats;
+	struct gnet_stats_basic_packed	tcfc_bstats;
 	struct gnet_stats_queue		tcfc_qstats;
 	struct gnet_stats_rate_est	tcfc_rate_est;
 	spinlock_t			tcfc_lock;
diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h
index d136b5240ef2..c1488553e349 100644
--- a/include/net/gen_stats.h
+++ b/include/net/gen_stats.h
@@ -28,7 +28,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type,
 					spinlock_t *lock, struct gnet_dump *d);
 
 extern int gnet_stats_copy_basic(struct gnet_dump *d,
-				 struct gnet_stats_basic *b);
+				 struct gnet_stats_basic_packed *b);
 extern int gnet_stats_copy_rate_est(struct gnet_dump *d,
 				    struct gnet_stats_rate_est *r);
 extern int gnet_stats_copy_queue(struct gnet_dump *d,
@@ -37,14 +37,14 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len);
 
 extern int gnet_stats_finish_copy(struct gnet_dump *d);
 
-extern int gen_new_estimator(struct gnet_stats_basic *bstats,
+extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 			     struct gnet_stats_rate_est *rate_est,
 			     spinlock_t *stats_lock, struct nlattr *opt);
-extern void gen_kill_estimator(struct gnet_stats_basic *bstats,
+extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			       struct gnet_stats_rate_est *rate_est);
-extern int gen_replace_estimator(struct gnet_stats_basic *bstats,
+extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 				 struct gnet_stats_rate_est *rate_est,
 				 spinlock_t *stats_lock, struct nlattr *opt);
-extern bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 				 const struct gnet_stats_rate_est *rate_est);
 #endif
diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h
index 65d594dffbff..ddbf37e19616 100644
--- a/include/net/netfilter/xt_rateest.h
+++ b/include/net/netfilter/xt_rateest.h
@@ -8,7 +8,7 @@ struct xt_rateest {
 	spinlock_t			lock;
 	struct gnet_estimator		params;
 	struct gnet_stats_rate_est	rstats;
-	struct gnet_stats_basic		bstats;
+	struct gnet_stats_basic_packed	bstats;
 };
 
 extern struct xt_rateest *xt_rateest_lookup(const char *name);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 964ffa0d8815..5482e9582f55 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -72,7 +72,7 @@ struct Qdisc
 	 */
 	unsigned long		state;
 	struct sk_buff_head	q;
-	struct gnet_stats_basic bstats;
+	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue	qstats;
 };
 
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 78e5bfc454ae..493775f4f2f1 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -81,7 +81,7 @@
 struct gen_estimator
 {
 	struct list_head	list;
-	struct gnet_stats_basic	*bstats;
+	struct gnet_stats_basic_packed	*bstats;
 	struct gnet_stats_rate_est	*rate_est;
 	spinlock_t		*stats_lock;
 	int			ewma_log;
@@ -165,7 +165,7 @@ static void gen_add_node(struct gen_estimator *est)
 }
 
 static
-struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
+struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats,
 				    const struct gnet_stats_rate_est *rate_est)
 {
 	struct rb_node *p = est_root.rb_node;
@@ -202,7 +202,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats,
  *
  * NOTE: Called under rtnl_mutex
  */
-int gen_new_estimator(struct gnet_stats_basic *bstats,
+int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
 		      struct gnet_stats_rate_est *rate_est,
 		      spinlock_t *stats_lock,
 		      struct nlattr *opt)
@@ -262,7 +262,7 @@ static void __gen_kill_estimator(struct rcu_head *head)
  *
  * NOTE: Called under rtnl_mutex
  */
-void gen_kill_estimator(struct gnet_stats_basic *bstats,
+void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
 			struct gnet_stats_rate_est *rate_est)
 {
 	struct gen_estimator *e;
@@ -292,7 +292,7 @@ EXPORT_SYMBOL(gen_kill_estimator);
  *
  * Returns 0 on success or a negative error code.
  */
-int gen_replace_estimator(struct gnet_stats_basic *bstats,
+int gen_replace_estimator(struct gnet_stats_basic_packed *bstats,
 			  struct gnet_stats_rate_est *rate_est,
 			  spinlock_t *stats_lock, struct nlattr *opt)
 {
@@ -308,7 +308,7 @@ EXPORT_SYMBOL(gen_replace_estimator);
  *
  * Returns true if estimator is active, and false if not.
  */
-bool gen_estimator_active(const struct gnet_stats_basic *bstats,
+bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats,
 			  const struct gnet_stats_rate_est *rate_est)
 {
 	ASSERT_RTNL();
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index c3d0ffeac243..8569310268ab 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -106,16 +106,21 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock,
  * if the room in the socket buffer was not sufficient.
  */
 int
-gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b)
+gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b)
 {
 	if (d->compat_tc_stats) {
 		d->tc_stats.bytes = b->bytes;
 		d->tc_stats.packets = b->packets;
 	}
 
-	if (d->tail)
-		return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b));
+	if (d->tail) {
+		struct gnet_stats_basic sb;
 
+		memset(&sb, 0, sizeof(sb));
+		sb.bytes = b->bytes;
+		sb.packets = b->packets;
+		return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb));
+	}
 	return 0;
 }
 
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 43f5676b1af4..d80b8192e0d4 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -74,7 +74,7 @@ static unsigned int
 xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_rateest_target_info *info = par->targinfo;
-	struct gnet_stats_basic *stats = &info->est->bstats;
+	struct gnet_stats_basic_packed *stats = &info->est->bstats;
 
 	spin_lock_bh(&info->est->lock);
 	stats->bytes += skb->len;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 2a8b83af7c47..ab82f145f689 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -49,7 +49,7 @@ struct atm_flow_data {
 	struct socket		*sock;		/* for closing */
 	u32			classid;	/* x:y type ID */
 	int			ref;		/* reference count */
-	struct gnet_stats_basic	bstats;
+	struct gnet_stats_basic_packed	bstats;
 	struct gnet_stats_queue	qstats;
 	struct atm_flow_data	*next;
 	struct atm_flow_data	*excess;	/* flow for excess traffic;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 23a167670fd5..d5798e17a832 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -128,7 +128,7 @@ struct cbq_class
 	long			avgidle;
 	long			deficit;	/* Saved deficit for WRR */
 	psched_time_t		penalized;
-	struct gnet_stats_basic bstats;
+	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
 	struct tc_cbq_xstats	xstats;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 7597fe146866..12b2fb04b29b 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -22,7 +22,7 @@ struct drr_class {
 	unsigned int			refcnt;
 	unsigned int			filter_cnt;
 
-	struct gnet_stats_basic		bstats;
+	struct gnet_stats_basic_packed		bstats;
 	struct gnet_stats_queue		qstats;
 	struct gnet_stats_rate_est	rate_est;
 	struct list_head		alist;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 362c2811b2df..dad0144423da 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -116,7 +116,7 @@ struct hfsc_class
 	struct Qdisc_class_common cl_common;
 	unsigned int	refcnt;		/* usage count */
 
-	struct gnet_stats_basic bstats;
+	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
 	unsigned int	level;		/* class level in hierarchy */
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 88cd02626621..ec4d46399d59 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -74,7 +74,7 @@ enum htb_cmode {
 struct htb_class {
 	struct Qdisc_class_common common;
 	/* general class parameters */
-	struct gnet_stats_basic bstats;
+	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue qstats;
 	struct gnet_stats_rate_est rate_est;
 	struct tc_htb_xstats xstats;	/* our special stats */
-- 
cgit v1.2.3


From 1314562a9ae5f39f6f595656023c1baf970831ef Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Tue, 18 Aug 2009 15:06:02 +0900
Subject: sched, task_struct: stack_canary is not needed without
 CC_STACKPROTECTOR

The field stack_canary is only used with CC_STACKPROTECTOR.
This patch reduces task_struct size without CC_STACKPROTECTOR.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
LKML-Reference: <4A8A44CA.2020701@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 195d72d5c102..7bc2d9290837 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1237,8 +1237,10 @@ struct task_struct {
 	pid_t pid;
 	pid_t tgid;
 
+#ifdef CONFIG_CC_STACKPROTECTOR
 	/* Canary value for the -fstack-protector gcc feature */
 	unsigned long stack_canary;
+#endif
 
 	/* 
 	 * pointers to (original) parent process, youngest child, younger sibling,
-- 
cgit v1.2.3


From 0753ba01e126020bf0f8150934903b48935b697d Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Tue, 18 Aug 2009 14:11:10 -0700
Subject: mm: revert "oom: move oom_adj value"

The commit 2ff05b2b (oom: move oom_adj value) moveed the oom_adj value to
the mm_struct.  It was a very good first step for sanitize OOM.

However Paul Menage reported the commit makes regression to his job
scheduler.  Current OOM logic can kill OOM_DISABLED process.

Why? His program has the code of similar to the following.

	...
	set_oom_adj(OOM_DISABLE); /* The job scheduler never killed by oom */
	...
	if (vfork() == 0) {
		set_oom_adj(0); /* Invoked child can be killed */
		execve("foo-bar-cmd");
	}
	....

vfork() parent and child are shared the same mm_struct.  then above
set_oom_adj(0) doesn't only change oom_adj for vfork() child, it's also
change oom_adj for vfork() parent.  Then, vfork() parent (job scheduler)
lost OOM immune and it was killed.

Actually, fork-setting-exec idiom is very frequently used in userland program.
We must not break this assumption.

Then, this patch revert commit 2ff05b2b and related commit.

Reverted commit list
---------------------
- commit 2ff05b2b4e (oom: move oom_adj value from task_struct to mm_struct)
- commit 4d8b9135c3 (oom: avoid unnecessary mm locking and scanning for OOM_DISABLE)
- commit 8123681022 (oom: only oom kill exiting tasks with attached memory)
- commit 933b787b57 (mm: copy over oom_adj value at fork time)

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt | 15 +++------
 fs/proc/base.c                     | 19 ++---------
 include/linux/mm_types.h           |  2 --
 include/linux/sched.h              |  1 +
 kernel/fork.c                      |  1 -
 mm/oom_kill.c                      | 64 +++++++++++++++++++++++---------------
 6 files changed, 48 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fad18f9456e4..ffead13f9443 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1167,13 +1167,11 @@ CHAPTER 3: PER-PROCESS PARAMETERS
 3.1 /proc/<pid>/oom_adj - Adjust the oom-killer score
 ------------------------------------------------------
 
-This file can be used to adjust the score used to select which processes should
-be killed in an out-of-memory situation.  The oom_adj value is a characteristic
-of the task's mm, so all threads that share an mm with pid will have the same
-oom_adj value.  A high value will increase the likelihood of this process being
-killed by the oom-killer.  Valid values are in the range -16 to +15 as
-explained below and a special value of -17, which disables oom-killing
-altogether for threads sharing pid's mm.
+This file can be used to adjust the score used to select which processes
+should be killed in an  out-of-memory  situation.  Giving it a high score will
+increase the likelihood of this process being killed by the oom-killer.  Valid
+values are in the range -16 to +15, plus the special value -17, which disables
+oom-killing altogether for this process.
 
 The process to be killed in an out-of-memory situation is selected among all others
 based on its badness score. This value equals the original memory size of the process
@@ -1187,9 +1185,6 @@ the parent's score if they do not share the same memory. Thus forking servers
 are the prime candidates to be killed. Having only one 'hungry' child will make
 parent less preferable than the child.
 
-/proc/<pid>/oom_adj cannot be changed for kthreads since they are immune from
-oom-killing already.
-
 /proc/<pid>/oom_score shows process' current badness score.
 
 The following heuristics are then applied:
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 175db258942f..6f742f6658a9 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 
 	if (!task)
 		return -ESRCH;
-	task_lock(task);
-	if (task->mm)
-		oom_adjust = task->mm->oom_adj;
-	else
-		oom_adjust = OOM_DISABLE;
-	task_unlock(task);
+	oom_adjust = task->oomkilladj;
 	put_task_struct(task);
 
 	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
-	task_lock(task);
-	if (!task->mm) {
-		task_unlock(task);
-		put_task_struct(task);
-		return -EINVAL;
-	}
-	if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) {
-		task_unlock(task);
+	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
 		put_task_struct(task);
 		return -EACCES;
 	}
-	task->mm->oom_adj = oom_adjust;
-	task_unlock(task);
+	task->oomkilladj = oom_adjust;
 	put_task_struct(task);
 	if (end - buffer == 0)
 		return -EIO;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7acc8439d9b3..0042090a4d70 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,8 +240,6 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
-	s8 oom_adj;	/* OOM kill score adjustment (bit shift) */
-
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4bb6b8..0f1ea4a66957 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1198,6 +1198,7 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
+	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 021e1138556e..144326b7af50 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -426,7 +426,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
 	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
-	mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 175a67a78a99..a7b2460e922b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,7 +58,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	unsigned long points, cpu_time, run_time;
 	struct mm_struct *mm;
 	struct task_struct *child;
-	int oom_adj;
 
 	task_lock(p);
 	mm = p->mm;
@@ -66,11 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		task_unlock(p);
 		return 0;
 	}
-	oom_adj = mm->oom_adj;
-	if (oom_adj == OOM_DISABLE) {
-		task_unlock(p);
-		return 0;
-	}
 
 	/*
 	 * The memory size of the process is the basis for the badness.
@@ -154,15 +148,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 8;
 
 	/*
-	 * Adjust the score by oom_adj.
+	 * Adjust the score by oomkilladj.
 	 */
-	if (oom_adj) {
-		if (oom_adj > 0) {
+	if (p->oomkilladj) {
+		if (p->oomkilladj > 0) {
 			if (!points)
 				points = 1;
-			points <<= oom_adj;
+			points <<= p->oomkilladj;
 		} else
-			points >>= -(oom_adj);
+			points >>= -(p->oomkilladj);
 	}
 
 #ifdef DEBUG
@@ -257,8 +251,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			*ppoints = ULONG_MAX;
 		}
 
+		if (p->oomkilladj == OOM_DISABLE)
+			continue;
+
 		points = badness(p, uptime.tv_sec);
-		if (points > *ppoints) {
+		if (points > *ppoints || !chosen) {
 			chosen = p;
 			*ppoints = points;
 		}
@@ -307,7 +304,8 @@ static void dump_tasks(const struct mem_cgroup *mem)
 		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
 		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-		       get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm);
+		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
+		       p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
 }
@@ -325,8 +323,11 @@ static void __oom_kill_task(struct task_struct *p, int verbose)
 		return;
 	}
 
-	if (!p->mm)
+	if (!p->mm) {
+		WARN_ON(1);
+		printk(KERN_WARNING "tried to kill an mm-less task!\n");
 		return;
+	}
 
 	if (verbose)
 		printk(KERN_ERR "Killed process %d (%s)\n",
@@ -348,13 +349,28 @@ static int oom_kill_task(struct task_struct *p)
 	struct mm_struct *mm;
 	struct task_struct *g, *q;
 
-	task_lock(p);
 	mm = p->mm;
-	if (!mm || mm->oom_adj == OOM_DISABLE) {
-		task_unlock(p);
+
+	/* WARNING: mm may not be dereferenced since we did not obtain its
+	 * value from get_task_mm(p).  This is OK since all we need to do is
+	 * compare mm to q->mm below.
+	 *
+	 * Furthermore, even if mm contains a non-NULL value, p->mm may
+	 * change to NULL at any time since we do not hold task_lock(p).
+	 * However, this is of no concern to us.
+	 */
+
+	if (mm == NULL)
 		return 1;
-	}
-	task_unlock(p);
+
+	/*
+	 * Don't kill the process if any threads are set to OOM_DISABLE
+	 */
+	do_each_thread(g, q) {
+		if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
+			return 1;
+	} while_each_thread(g, q);
+
 	__oom_kill_task(p, 1);
 
 	/*
@@ -377,11 +393,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	struct task_struct *c;
 
 	if (printk_ratelimit()) {
-		task_lock(current);
 		printk(KERN_WARNING "%s invoked oom-killer: "
-			"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
-			current->comm, gfp_mask, order,
-			current->mm ? current->mm->oom_adj : OOM_DISABLE);
+			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
+			current->comm, gfp_mask, order, current->oomkilladj);
+		task_lock(current);
 		cpuset_print_task_mems_allowed(current);
 		task_unlock(current);
 		dump_stack();
@@ -394,9 +409,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	/*
 	 * If the task is already exiting, don't alarm the sysadmin or kill
 	 * its children or threads, just set TIF_MEMDIE so it can die quickly
-	 * if its mm is still attached.
 	 */
-	if (p->mm && (p->flags & PF_EXITING)) {
+	if (p->flags & PF_EXITING) {
 		__oom_kill_task(p, 0);
 		return 0;
 	}
-- 
cgit v1.2.3


From 23428e6b4649adfbdaa6a0c93fc6d652bf5f9d44 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 18 Aug 2009 20:13:03 -0700
Subject: mdio: mdio_if_info::mmds should not be __bitwise

I misunderstood the meaning of __bitwise.  In practice it makes sparse
warn about every use of mmds which is certainly not what we want.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mdio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index cfdf1df2875e..c779b49a1fda 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -304,7 +304,7 @@ static inline __u16 mdio_phy_id_devad(int phy_id)
  */
 struct mdio_if_info {
 	int prtad;
-	u32 __bitwise mmds;
+	u32 mmds;
 	unsigned mode_support;
 
 	struct net_device *dev;
-- 
cgit v1.2.3


From 776f3360de6ed246e973577828f725681120fd7a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 19 Aug 2009 15:56:37 +1000
Subject: drm: fixup includes in encoder slave header files.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_encoder_slave.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index 821ec40c17d8..e5e5c94ca92c 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -27,8 +27,8 @@
 #ifndef __DRM_ENCODER_SLAVE_H__
 #define __DRM_ENCODER_SLAVE_H__
 
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
+#include "drmP.h"
+#include "drm_crtc.h"
 
 /**
  * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
-- 
cgit v1.2.3


From 53bd83899f5ba6b0da8f5ef976129273854a72d4 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Wed, 1 Jul 2009 10:04:40 -0700
Subject: drm: clarify scaling property names

Now that we're using the scaling property in the Intel driver I noticed
that the names were a bit confusing.  I've corrected them according to
our discussion on IRC and the mailing list, though I've left out
potential new additions for a new scaling property with an integer (or
two) for the scaling factor.  None of the drivers implement that today,
but if someone wants to do it, I think it could be done with the
addition of a single new type and a new property to describe the
scaling factor in the X and Y directions.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Acked-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c        |  8 ++++----
 drivers/gpu/drm/i915/intel_lvds.c | 14 +++-----------
 include/drm/drm_mode.h            |  9 +++++----
 3 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 362a538cdedc..39a6bc69d223 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -68,10 +68,10 @@ DRM_ENUM_NAME_FN(drm_get_dpms_name, drm_dpms_enum_list)
  */
 static struct drm_prop_enum_list drm_scaling_mode_enum_list[] =
 {
-	{ DRM_MODE_SCALE_NON_GPU, "Non-GPU" },
-	{ DRM_MODE_SCALE_FULLSCREEN, "Fullscreen" },
-	{ DRM_MODE_SCALE_NO_SCALE, "No scale" },
-	{ DRM_MODE_SCALE_ASPECT, "Aspect" },
+	{ DRM_MODE_SCALE_NONE, "None" },
+	{ DRM_MODE_SCALE_FULLSCREEN, "Full" },
+	{ DRM_MODE_SCALE_CENTER, "Center" },
+	{ DRM_MODE_SCALE_ASPECT, "Full aspect" },
 };
 
 static struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index b59c65d19d81..5df486fbe056 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -38,14 +38,6 @@
 #include "i915_drv.h"
 #include <linux/acpi.h>
 
-/*
- * the following four scaling options are defined.
- * #define DRM_MODE_SCALE_NON_GPU	0
- * #define DRM_MODE_SCALE_FULLSCREEN	1
- * #define DRM_MODE_SCALE_NO_SCALE	2
- * #define DRM_MODE_SCALE_ASPECT	3
- */
-
 /* Private structure for the integrated LVDS support */
 struct intel_lvds_priv {
 	int fitting_mode;
@@ -334,7 +326,7 @@ static bool intel_lvds_mode_fixup(struct drm_encoder *encoder,
 	I915_WRITE(BCLRPAT_B, 0);
 
 	switch (lvds_priv->fitting_mode) {
-	case DRM_MODE_SCALE_NO_SCALE:
+	case DRM_MODE_SCALE_CENTER:
 		/*
 		 * For centered modes, we have to calculate border widths &
 		 * heights and modify the values programmed into the CRTC.
@@ -670,8 +662,8 @@ static int intel_lvds_set_property(struct drm_connector *connector,
 				connector->encoder) {
 		struct drm_crtc *crtc = connector->encoder->crtc;
 		struct intel_lvds_priv *lvds_priv = intel_output->dev_priv;
-		if (value == DRM_MODE_SCALE_NON_GPU) {
-			DRM_DEBUG_KMS("non_GPU property is unsupported\n");
+		if (value == DRM_MODE_SCALE_NONE) {
+			DRM_DEBUG_KMS("no scaling not supported\n");
 			return 0;
 		}
 		if (lvds_priv->fitting_mode == value) {
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 616aeb42b773..1f908416aedb 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -68,10 +68,11 @@
 #define DRM_MODE_DPMS_OFF	3
 
 /* Scaling mode options */
-#define DRM_MODE_SCALE_NON_GPU		0
-#define DRM_MODE_SCALE_FULLSCREEN	1
-#define DRM_MODE_SCALE_NO_SCALE		2
-#define DRM_MODE_SCALE_ASPECT		3
+#define DRM_MODE_SCALE_NONE		0 /* Unmodified timing (display or
+					     software can still scale) */
+#define DRM_MODE_SCALE_FULLSCREEN	1 /* Full screen, ignore aspect */
+#define DRM_MODE_SCALE_CENTER		2 /* Centered, no scaling */
+#define DRM_MODE_SCALE_ASPECT		3 /* Full screen, preserve aspect */
 
 /* Dithering mode options */
 #define DRM_MODE_DITHERING_OFF	0
-- 
cgit v1.2.3


From 949ef70e2d1a5c12178875f513df34fc85d91a38 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 17 Aug 2009 19:49:19 +0300
Subject: drm/kms: no need to return void value (encoder)

Cc: Francisco Jerez <currojerez@riseup.net>
Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_encoder_slave.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index e5e5c94ca92c..2f65633d28a7 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -154,7 +154,7 @@ static inline int drm_i2c_encoder_register(struct module *owner,
  */
 static inline void drm_i2c_encoder_unregister(struct drm_i2c_encoder_driver *driver)
 {
-	return i2c_del_driver(&driver->i2c_driver);
+	i2c_del_driver(&driver->i2c_driver);
 }
 
 void drm_i2c_encoder_destroy(struct drm_encoder *encoder);
-- 
cgit v1.2.3


From a0724fcf829e5afb66159ef68cb16a805ea11b42 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Mon, 17 Aug 2009 01:18:38 +0300
Subject: drm/ttm: optimize bo_kmap_type values

A micro-optimization on the function ttm_kmap_obj_virtual().

By defining the values of enum ttm_bo_kmap_obj::bo_kmap_type to have a
bit indicating iomem, size of the function ttm_kmap_obj_virtual() will be
reduced by 16 bytes on x86_64 (gcc 4.1.2).

ttm_kmap_obj_virtual() may be heavily used, when buffer objects are
accessed via wrappers, that work for both kinds of memory addresses:
iomem cookies and kernel virtual.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/ttm/ttm_bo_api.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index cd22ab4b495c..99dc521aa1a9 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -245,14 +245,15 @@ struct ttm_buffer_object {
  * premapped region.
  */
 
+#define TTM_BO_MAP_IOMEM_MASK 0x80
 struct ttm_bo_kmap_obj {
 	void *virtual;
 	struct page *page;
 	enum {
-		ttm_bo_map_iomap,
-		ttm_bo_map_vmap,
-		ttm_bo_map_kmap,
-		ttm_bo_map_premapped,
+		ttm_bo_map_iomap        = 1 | TTM_BO_MAP_IOMEM_MASK,
+		ttm_bo_map_vmap         = 2,
+		ttm_bo_map_kmap         = 3,
+		ttm_bo_map_premapped    = 4 | TTM_BO_MAP_IOMEM_MASK,
 	} bo_kmap_type;
 };
 
@@ -522,8 +523,7 @@ extern int ttm_bo_evict_mm(struct ttm_bo_device *bdev, unsigned mem_type);
 static inline void *ttm_kmap_obj_virtual(struct ttm_bo_kmap_obj *map,
 					 bool *is_iomem)
 {
-	*is_iomem = (map->bo_kmap_type == ttm_bo_map_iomap ||
-		     map->bo_kmap_type == ttm_bo_map_premapped);
+	*is_iomem = !!(map->bo_kmap_type & TTM_BO_MAP_IOMEM_MASK);
 	return map->virtual;
 }
 
-- 
cgit v1.2.3


From 327c225bd548bf7871f116a0baa5ebdac884e452 Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 17 Aug 2009 16:28:37 +0200
Subject: drm: Enable drm drivers to add drm sysfs devices.

Export utility functions for drivers to add specialized devices in the
sysfs drm class subdirectory.

Initially this will be needed form TTM to add a virtual device that
handles power management.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/drm_sysfs.c | 25 +++++++++++++++++++++++++
 include/drm/drm_sysfs.h     | 12 ++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 include/drm/drm_sysfs.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index adc179459c25..de154556c405 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -16,6 +16,7 @@
 #include <linux/kdev_t.h>
 #include <linux/err.h>
 
+#include "drm_sysfs.h"
 #include "drm_core.h"
 #include "drmP.h"
 
@@ -515,3 +516,27 @@ void drm_sysfs_device_remove(struct drm_minor *minor)
 {
 	device_unregister(&minor->kdev);
 }
+
+
+/**
+ * drm_class_device_register - Register a struct device in the drm class.
+ *
+ * @dev: pointer to struct device to register.
+ *
+ * @dev should have all relevant members pre-filled with the exception
+ * of the class member. In particular, the device_type member must
+ * be set.
+ */
+
+int drm_class_device_register(struct device *dev)
+{
+	dev->class = drm_class;
+	return device_register(dev);
+}
+EXPORT_SYMBOL_GPL(drm_class_device_register);
+
+void drm_class_device_unregister(struct device *dev)
+{
+	return device_unregister(dev);
+}
+EXPORT_SYMBOL_GPL(drm_class_device_unregister);
diff --git a/include/drm/drm_sysfs.h b/include/drm/drm_sysfs.h
new file mode 100644
index 000000000000..1d8e033fde67
--- /dev/null
+++ b/include/drm/drm_sysfs.h
@@ -0,0 +1,12 @@
+#ifndef _DRM_SYSFS_H_
+#define _DRM_SYSFS_H_
+
+/**
+ * This minimalistic include file is intended for users (read TTM) that
+ * don't want to include the full drmP.h file.
+ */
+
+extern int drm_class_device_register(struct device *dev);
+extern void drm_class_device_unregister(struct device *dev);
+
+#endif
-- 
cgit v1.2.3


From 5fd9cbad3a4ae82c83c55b9c621d156c326724ef Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Mon, 17 Aug 2009 16:28:39 +0200
Subject: drm/ttm: Memory accounting rework.

Use inclusive zones to simplify accounting and its sysfs representation.
Use DMA32 accounting where applicable.

Add a sysfs interface to make the heuristically determined limits
readable and configurable.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/ttm/ttm_bo.c     |   6 +-
 drivers/gpu/drm/ttm/ttm_global.c |   4 +-
 drivers/gpu/drm/ttm/ttm_memory.c | 488 +++++++++++++++++++++++++++++++++------
 drivers/gpu/drm/ttm/ttm_tt.c     |  29 +--
 include/drm/ttm/ttm_memory.h     |  43 ++--
 include/drm/ttm/ttm_module.h     |   2 +
 6 files changed, 453 insertions(+), 119 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index c1c407f7cca3..f16909ceec93 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -70,7 +70,7 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	if (bo->destroy)
 		bo->destroy(bo);
 	else {
-		ttm_mem_global_free(bdev->mem_glob, bo->acc_size, false);
+		ttm_mem_global_free(bdev->mem_glob, bo->acc_size);
 		kfree(bo);
 	}
 }
@@ -1065,14 +1065,14 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev,
 
 	size_t acc_size =
 	    ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
-	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false, false);
+	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
 	if (unlikely(ret != 0))
 		return ret;
 
 	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
 
 	if (unlikely(bo == NULL)) {
-		ttm_mem_global_free(mem_glob, acc_size, false);
+		ttm_mem_global_free(mem_glob, acc_size);
 		return -ENOMEM;
 	}
 
diff --git a/drivers/gpu/drm/ttm/ttm_global.c b/drivers/gpu/drm/ttm/ttm_global.c
index 0b14eb1972b8..541744d00d3e 100644
--- a/drivers/gpu/drm/ttm/ttm_global.c
+++ b/drivers/gpu/drm/ttm/ttm_global.c
@@ -71,7 +71,7 @@ int ttm_global_item_ref(struct ttm_global_reference *ref)
 
 	mutex_lock(&item->mutex);
 	if (item->refcount == 0) {
-		item->object = kmalloc(ref->size, GFP_KERNEL);
+		item->object = kzalloc(ref->size, GFP_KERNEL);
 		if (unlikely(item->object == NULL)) {
 			ret = -ENOMEM;
 			goto out_err;
@@ -89,7 +89,6 @@ int ttm_global_item_ref(struct ttm_global_reference *ref)
 	mutex_unlock(&item->mutex);
 	return 0;
 out_err:
-	kfree(item->object);
 	mutex_unlock(&item->mutex);
 	item->object = NULL;
 	return ret;
@@ -105,7 +104,6 @@ void ttm_global_item_unref(struct ttm_global_reference *ref)
 	BUG_ON(ref->object != item->object);
 	if (--item->refcount == 0) {
 		ref->release(ref);
-		kfree(item->object);
 		item->object = NULL;
 	}
 	mutex_unlock(&item->mutex);
diff --git a/drivers/gpu/drm/ttm/ttm_memory.c b/drivers/gpu/drm/ttm/ttm_memory.c
index 87323d4ff68d..62fb5cf0899e 100644
--- a/drivers/gpu/drm/ttm/ttm_memory.c
+++ b/drivers/gpu/drm/ttm/ttm_memory.c
@@ -26,15 +26,180 @@
  **************************************************************************/
 
 #include "ttm/ttm_memory.h"
+#include "ttm/ttm_module.h"
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 
-#define TTM_PFX "[TTM] "
 #define TTM_MEMORY_ALLOC_RETRIES 4
 
+struct ttm_mem_zone {
+	struct kobject kobj;
+	struct ttm_mem_global *glob;
+	const char *name;
+	uint64_t zone_mem;
+	uint64_t emer_mem;
+	uint64_t max_mem;
+	uint64_t swap_limit;
+	uint64_t used_mem;
+};
+
+static struct attribute ttm_mem_sys = {
+	.name = "zone_memory",
+	.mode = S_IRUGO
+};
+static struct attribute ttm_mem_emer = {
+	.name = "emergency_memory",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_max = {
+	.name = "available_memory",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_swap = {
+	.name = "swap_limit",
+	.mode = S_IRUGO | S_IWUSR
+};
+static struct attribute ttm_mem_used = {
+	.name = "used_memory",
+	.mode = S_IRUGO
+};
+
+static void ttm_mem_zone_kobj_release(struct kobject *kobj)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+
+	printk(KERN_INFO TTM_PFX
+	       "Zone %7s: Used memory at exit: %llu kiB.\n",
+	       zone->name, (unsigned long long) zone->used_mem >> 10);
+	kfree(zone);
+}
+
+static ssize_t ttm_mem_zone_show(struct kobject *kobj,
+				 struct attribute *attr,
+				 char *buffer)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+	uint64_t val = 0;
+
+	spin_lock(&zone->glob->lock);
+	if (attr == &ttm_mem_sys)
+		val = zone->zone_mem;
+	else if (attr == &ttm_mem_emer)
+		val = zone->emer_mem;
+	else if (attr == &ttm_mem_max)
+		val = zone->max_mem;
+	else if (attr == &ttm_mem_swap)
+		val = zone->swap_limit;
+	else if (attr == &ttm_mem_used)
+		val = zone->used_mem;
+	spin_unlock(&zone->glob->lock);
+
+	return snprintf(buffer, PAGE_SIZE, "%llu\n",
+			(unsigned long long) val >> 10);
+}
+
+static void ttm_check_swapping(struct ttm_mem_global *glob);
+
+static ssize_t ttm_mem_zone_store(struct kobject *kobj,
+				  struct attribute *attr,
+				  const char *buffer,
+				  size_t size)
+{
+	struct ttm_mem_zone *zone =
+		container_of(kobj, struct ttm_mem_zone, kobj);
+	int chars;
+	unsigned long val;
+	uint64_t val64;
+
+	chars = sscanf(buffer, "%lu", &val);
+	if (chars == 0)
+		return size;
+
+	val64 = val;
+	val64 <<= 10;
+
+	spin_lock(&zone->glob->lock);
+	if (val64 > zone->zone_mem)
+		val64 = zone->zone_mem;
+	if (attr == &ttm_mem_emer) {
+		zone->emer_mem = val64;
+		if (zone->max_mem > val64)
+			zone->max_mem = val64;
+	} else if (attr == &ttm_mem_max) {
+		zone->max_mem = val64;
+		if (zone->emer_mem < val64)
+			zone->emer_mem = val64;
+	} else if (attr == &ttm_mem_swap)
+		zone->swap_limit = val64;
+	spin_unlock(&zone->glob->lock);
+
+	ttm_check_swapping(zone->glob);
+
+	return size;
+}
+
+static struct attribute *ttm_mem_zone_attrs[] = {
+	&ttm_mem_sys,
+	&ttm_mem_emer,
+	&ttm_mem_max,
+	&ttm_mem_swap,
+	&ttm_mem_used,
+	NULL
+};
+
+static struct sysfs_ops ttm_mem_zone_ops = {
+	.show = &ttm_mem_zone_show,
+	.store = &ttm_mem_zone_store
+};
+
+static struct kobj_type ttm_mem_zone_kobj_type = {
+	.release = &ttm_mem_zone_kobj_release,
+	.sysfs_ops = &ttm_mem_zone_ops,
+	.default_attrs = ttm_mem_zone_attrs,
+};
+
+static void ttm_mem_global_kobj_release(struct kobject *kobj)
+{
+	struct ttm_mem_global *glob =
+		container_of(kobj, struct ttm_mem_global, kobj);
+
+	kfree(glob);
+}
+
+static struct kobj_type ttm_mem_glob_kobj_type = {
+	.release = &ttm_mem_global_kobj_release,
+};
+
+static bool ttm_zones_above_swap_target(struct ttm_mem_global *glob,
+					bool from_wq, uint64_t extra)
+{
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+	uint64_t target;
+
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+
+		if (from_wq)
+			target = zone->swap_limit;
+		else if (capable(CAP_SYS_ADMIN))
+			target = zone->emer_mem;
+		else
+			target = zone->max_mem;
+
+		target = (extra > target) ? 0ULL : target;
+
+		if (zone->used_mem > target)
+			return true;
+	}
+	return false;
+}
+
 /**
  * At this point we only support a single shrink callback.
  * Extend this if needed, perhaps using a linked list of callbacks.
@@ -42,34 +207,17 @@
  * many threads may try to swap out at any given time.
  */
 
-static void ttm_shrink(struct ttm_mem_global *glob, bool from_workqueue,
+static void ttm_shrink(struct ttm_mem_global *glob, bool from_wq,
 		       uint64_t extra)
 {
 	int ret;
 	struct ttm_mem_shrink *shrink;
-	uint64_t target;
-	uint64_t total_target;
 
 	spin_lock(&glob->lock);
 	if (glob->shrink == NULL)
 		goto out;
 
-	if (from_workqueue) {
-		target = glob->swap_limit;
-		total_target = glob->total_memory_swap_limit;
-	} else if (capable(CAP_SYS_ADMIN)) {
-		total_target = glob->emer_total_memory;
-		target = glob->emer_memory;
-	} else {
-		total_target = glob->max_total_memory;
-		target = glob->max_memory;
-	}
-
-	total_target = (extra >= total_target) ? 0 : total_target - extra;
-	target = (extra >= target) ? 0 : target - extra;
-
-	while (glob->used_memory > target ||
-	       glob->used_total_memory > total_target) {
+	while (ttm_zones_above_swap_target(glob, from_wq, extra)) {
 		shrink = glob->shrink;
 		spin_unlock(&glob->lock);
 		ret = shrink->do_shrink(shrink);
@@ -81,6 +229,8 @@ out:
 	spin_unlock(&glob->lock);
 }
 
+
+
 static void ttm_shrink_work(struct work_struct *work)
 {
 	struct ttm_mem_global *glob =
@@ -89,63 +239,178 @@ static void ttm_shrink_work(struct work_struct *work)
 	ttm_shrink(glob, true, 0ULL);
 }
 
+static int ttm_mem_init_kernel_zone(struct ttm_mem_global *glob,
+				    const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	mem = si->totalram - si->totalhigh;
+	mem *= si->mem_unit;
+
+	zone->name = "kernel";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_kernel = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+
+#ifdef CONFIG_HIGHMEM
+static int ttm_mem_init_highmem_zone(struct ttm_mem_global *glob,
+				     const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	if (si->totalhigh == 0)
+		return 0;
+
+	mem = si->totalram;
+	mem *= si->mem_unit;
+
+	zone->name = "highmem";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_highmem = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+#else
+static int ttm_mem_init_dma32_zone(struct ttm_mem_global *glob,
+				   const struct sysinfo *si)
+{
+	struct ttm_mem_zone *zone = kzalloc(sizeof(*zone), GFP_KERNEL);
+	uint64_t mem;
+
+	if (unlikely(!zone))
+		return -ENOMEM;
+
+	mem = si->totalram;
+	mem *= si->mem_unit;
+
+	/**
+	 * No special dma32 zone needed.
+	 */
+
+	if (mem <= ((uint64_t) 1ULL << 32))
+		return 0;
+
+	/*
+	 * Limit max dma32 memory to 4GB for now
+	 * until we can figure out how big this
+	 * zone really is.
+	 */
+
+	mem = ((uint64_t) 1ULL << 32);
+	zone->name = "dma32";
+	zone->zone_mem = mem;
+	zone->max_mem = mem >> 1;
+	zone->emer_mem = (mem >> 1) + (mem >> 2);
+	zone->swap_limit = zone->max_mem - (mem >> 3);
+	zone->used_mem = 0;
+	zone->glob = glob;
+	glob->zone_dma32 = zone;
+	glob->zones[glob->num_zones++] = zone;
+	kobject_init(&zone->kobj, &ttm_mem_zone_kobj_type);
+	return kobject_add(&zone->kobj, &glob->kobj, zone->name);
+}
+#endif
+
 int ttm_mem_global_init(struct ttm_mem_global *glob)
 {
 	struct sysinfo si;
-	uint64_t mem;
+	int ret;
+	int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock_init(&glob->lock);
 	glob->swap_queue = create_singlethread_workqueue("ttm_swap");
 	INIT_WORK(&glob->work, ttm_shrink_work);
 	init_waitqueue_head(&glob->queue);
+	kobject_init(&glob->kobj, &ttm_mem_glob_kobj_type);
+	ret = kobject_add(&glob->kobj,
+			  ttm_get_kobj(),
+			  "memory_accounting");
+	if (unlikely(ret != 0))
+		goto out_no_zone;
 
 	si_meminfo(&si);
 
-	mem = si.totalram - si.totalhigh;
-	mem *= si.mem_unit;
-
-	glob->max_memory = mem >> 1;
-	glob->emer_memory = (mem >> 1) + (mem >> 2);
-	glob->swap_limit = glob->max_memory - (mem >> 3);
-	glob->used_memory = 0;
-	glob->used_total_memory = 0;
-	glob->shrink = NULL;
-
-	mem = si.totalram;
-	mem *= si.mem_unit;
-
-	glob->max_total_memory = mem >> 1;
-	glob->emer_total_memory = (mem >> 1) + (mem >> 2);
-
-	glob->total_memory_swap_limit = glob->max_total_memory - (mem >> 3);
-
-	printk(KERN_INFO TTM_PFX "TTM available graphics memory: %llu MiB\n",
-	       glob->max_total_memory >> 20);
-	printk(KERN_INFO TTM_PFX "TTM available object memory: %llu MiB\n",
-	       glob->max_memory >> 20);
-
+	ret = ttm_mem_init_kernel_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#ifdef CONFIG_HIGHMEM
+	ret = ttm_mem_init_highmem_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#else
+	ret = ttm_mem_init_dma32_zone(glob, &si);
+	if (unlikely(ret != 0))
+		goto out_no_zone;
+#endif
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		printk(KERN_INFO TTM_PFX
+		       "Zone %7s: Available graphics memory: %llu kiB.\n",
+		       zone->name, (unsigned long long) zone->max_mem >> 10);
+	}
 	return 0;
+out_no_zone:
+	ttm_mem_global_release(glob);
+	return ret;
 }
 EXPORT_SYMBOL(ttm_mem_global_init);
 
 void ttm_mem_global_release(struct ttm_mem_global *glob)
 {
-	printk(KERN_INFO TTM_PFX "Used total memory is %llu bytes.\n",
-	       (unsigned long long)glob->used_total_memory);
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+
 	flush_workqueue(glob->swap_queue);
 	destroy_workqueue(glob->swap_queue);
 	glob->swap_queue = NULL;
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		kobject_del(&zone->kobj);
+		kobject_put(&zone->kobj);
+	}
+	kobject_del(&glob->kobj);
+	kobject_put(&glob->kobj);
 }
 EXPORT_SYMBOL(ttm_mem_global_release);
 
-static inline void ttm_check_swapping(struct ttm_mem_global *glob)
+static void ttm_check_swapping(struct ttm_mem_global *glob)
 {
-	bool needs_swapping;
+	bool needs_swapping = false;
+	unsigned int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock(&glob->lock);
-	needs_swapping = (glob->used_memory > glob->swap_limit ||
-			  glob->used_total_memory >
-			  glob->total_memory_swap_limit);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (zone->used_mem > zone->swap_limit) {
+			needs_swapping = true;
+			break;
+		}
+	}
+
 	spin_unlock(&glob->lock);
 
 	if (unlikely(needs_swapping))
@@ -153,44 +418,60 @@ static inline void ttm_check_swapping(struct ttm_mem_global *glob)
 
 }
 
-void ttm_mem_global_free(struct ttm_mem_global *glob,
-			 uint64_t amount, bool himem)
+static void ttm_mem_global_free_zone(struct ttm_mem_global *glob,
+				     struct ttm_mem_zone *single_zone,
+				     uint64_t amount)
 {
+	unsigned int i;
+	struct ttm_mem_zone *zone;
+
 	spin_lock(&glob->lock);
-	glob->used_total_memory -= amount;
-	if (!himem)
-		glob->used_memory -= amount;
-	wake_up_all(&glob->queue);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (single_zone && zone != single_zone)
+			continue;
+		zone->used_mem -= amount;
+	}
 	spin_unlock(&glob->lock);
 }
 
+void ttm_mem_global_free(struct ttm_mem_global *glob,
+			 uint64_t amount)
+{
+	return ttm_mem_global_free_zone(glob, NULL, amount);
+}
+
 static int ttm_mem_global_reserve(struct ttm_mem_global *glob,
-				  uint64_t amount, bool himem, bool reserve)
+				  struct ttm_mem_zone *single_zone,
+				  uint64_t amount, bool reserve)
 {
 	uint64_t limit;
-	uint64_t lomem_limit;
 	int ret = -ENOMEM;
+	unsigned int i;
+	struct ttm_mem_zone *zone;
 
 	spin_lock(&glob->lock);
+	for (i = 0; i < glob->num_zones; ++i) {
+		zone = glob->zones[i];
+		if (single_zone && zone != single_zone)
+			continue;
 
-	if (capable(CAP_SYS_ADMIN)) {
-		limit = glob->emer_total_memory;
-		lomem_limit = glob->emer_memory;
-	} else {
-		limit = glob->max_total_memory;
-		lomem_limit = glob->max_memory;
-	}
+		limit = (capable(CAP_SYS_ADMIN)) ?
+			zone->emer_mem : zone->max_mem;
 
-	if (unlikely(glob->used_total_memory + amount > limit))
-		goto out_unlock;
-	if (unlikely(!himem && glob->used_memory + amount > lomem_limit))
-		goto out_unlock;
+		if (zone->used_mem > limit)
+			goto out_unlock;
+	}
 
 	if (reserve) {
-		glob->used_total_memory += amount;
-		if (!himem)
-			glob->used_memory += amount;
+		for (i = 0; i < glob->num_zones; ++i) {
+			zone = glob->zones[i];
+			if (single_zone && zone != single_zone)
+				continue;
+			zone->used_mem += amount;
+		}
 	}
+
 	ret = 0;
 out_unlock:
 	spin_unlock(&glob->lock);
@@ -199,12 +480,17 @@ out_unlock:
 	return ret;
 }
 
-int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-			 bool no_wait, bool interruptible, bool himem)
+
+static int ttm_mem_global_alloc_zone(struct ttm_mem_global *glob,
+				     struct ttm_mem_zone *single_zone,
+				     uint64_t memory,
+				     bool no_wait, bool interruptible)
 {
 	int count = TTM_MEMORY_ALLOC_RETRIES;
 
-	while (unlikely(ttm_mem_global_reserve(glob, memory, himem, true)
+	while (unlikely(ttm_mem_global_reserve(glob,
+					       single_zone,
+					       memory, true)
 			!= 0)) {
 		if (no_wait)
 			return -ENOMEM;
@@ -216,6 +502,56 @@ int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
 	return 0;
 }
 
+int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
+			 bool no_wait, bool interruptible)
+{
+	/**
+	 * Normal allocations of kernel memory are registered in
+	 * all zones.
+	 */
+
+	return ttm_mem_global_alloc_zone(glob, NULL, memory, no_wait,
+					 interruptible);
+}
+
+int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
+			      struct page *page,
+			      bool no_wait, bool interruptible)
+{
+
+	struct ttm_mem_zone *zone = NULL;
+
+	/**
+	 * Page allocations may be registed in a single zone
+	 * only if highmem or !dma32.
+	 */
+
+#ifdef CONFIG_HIGHMEM
+	if (PageHighMem(page) && glob->zone_highmem != NULL)
+		zone = glob->zone_highmem;
+#else
+	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
+		zone = glob->zone_kernel;
+#endif
+	return ttm_mem_global_alloc_zone(glob, zone, PAGE_SIZE, no_wait,
+					 interruptible);
+}
+
+void ttm_mem_global_free_page(struct ttm_mem_global *glob, struct page *page)
+{
+	struct ttm_mem_zone *zone = NULL;
+
+#ifdef CONFIG_HIGHMEM
+	if (PageHighMem(page) && glob->zone_highmem != NULL)
+		zone = glob->zone_highmem;
+#else
+	if (glob->zone_dma32 && page_to_pfn(page) > 0x00100000UL)
+		zone = glob->zone_kernel;
+#endif
+	ttm_mem_global_free_zone(glob, zone, PAGE_SIZE);
+}
+
+
 size_t ttm_round_pot(size_t size)
 {
 	if ((size & (size - 1)) == 0)
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 75dc8bd24592..4e1e2566d519 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 			set_page_dirty_lock(page);
 
 		ttm->pages[i] = NULL;
-		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE, false);
+		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE);
 		put_page(page);
 	}
 	ttm->state = tt_unpopulated;
@@ -187,21 +187,14 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 		if (!p)
 			return NULL;
 
-		if (PageHighMem(p)) {
-			ret =
-			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
-						 false, false, true);
-			if (unlikely(ret != 0))
-				goto out_err;
+		ret = ttm_mem_global_alloc_page(mem_glob, p, false, false);
+		if (unlikely(ret != 0))
+			goto out_err;
+
+		if (PageHighMem(p))
 			ttm->pages[--ttm->first_himem_page] = p;
-		} else {
-			ret =
-			    ttm_mem_global_alloc(mem_glob, PAGE_SIZE,
-						 false, false, false);
-			if (unlikely(ret != 0))
-				goto out_err;
+		else
 			ttm->pages[++ttm->last_lomem_page] = p;
-		}
 	}
 	return p;
 out_err:
@@ -355,8 +348,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
 				       "Leaking pages.\n");
-			ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE,
-					    PageHighMem(cur_page));
+			ttm_mem_global_free_page(ttm->bdev->mem_glob,
+						 cur_page);
 			__free_page(cur_page);
 		}
 	}
@@ -411,7 +404,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 	 */
 
 	ret = ttm_mem_global_alloc(mem_glob, num_pages * PAGE_SIZE,
-				   false, false, false);
+				   false, false);
 	if (unlikely(ret != 0))
 		return ret;
 
@@ -422,7 +415,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 
 	if (ret != num_pages && write) {
 		ttm_tt_free_user_pages(ttm);
-		ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE, false);
+		ttm_mem_global_free(mem_glob, num_pages * PAGE_SIZE);
 		return -ENOMEM;
 	}
 
diff --git a/include/drm/ttm/ttm_memory.h b/include/drm/ttm/ttm_memory.h
index d8b8f042c4f1..6983a7cf4da4 100644
--- a/include/drm/ttm/ttm_memory.h
+++ b/include/drm/ttm/ttm_memory.h
@@ -32,6 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/errno.h>
+#include <linux/kobject.h>
 
 /**
  * struct ttm_mem_shrink - callback to shrink TTM memory usage.
@@ -60,34 +61,33 @@ struct ttm_mem_shrink {
  * @queue: Wait queue for processes suspended waiting for memory.
  * @lock: Lock to protect the @shrink - and the memory accounting members,
  * that is, essentially the whole structure with some exceptions.
- * @emer_memory: Lowmem memory limit available for root.
- * @max_memory: Lowmem memory limit available for non-root.
- * @swap_limit: Lowmem memory limit where the shrink workqueue kicks in.
- * @used_memory: Currently used lowmem memory.
- * @used_total_memory: Currently used total (lowmem + highmem) memory.
- * @total_memory_swap_limit: Total memory limit where the shrink workqueue
- * kicks in.
- * @max_total_memory: Total memory available to non-root processes.
- * @emer_total_memory: Total memory available to root processes.
+ * @zones: Array of pointers to accounting zones.
+ * @num_zones: Number of populated entries in the @zones array.
+ * @zone_kernel: Pointer to the kernel zone.
+ * @zone_highmem: Pointer to the highmem zone if there is one.
+ * @zone_dma32: Pointer to the dma32 zone if there is one.
  *
  * Note that this structure is not per device. It should be global for all
  * graphics devices.
  */
 
+#define TTM_MEM_MAX_ZONES 2
+struct ttm_mem_zone;
 struct ttm_mem_global {
+	struct kobject kobj;
 	struct ttm_mem_shrink *shrink;
 	struct workqueue_struct *swap_queue;
 	struct work_struct work;
 	wait_queue_head_t queue;
 	spinlock_t lock;
-	uint64_t emer_memory;
-	uint64_t max_memory;
-	uint64_t swap_limit;
-	uint64_t used_memory;
-	uint64_t used_total_memory;
-	uint64_t total_memory_swap_limit;
-	uint64_t max_total_memory;
-	uint64_t emer_total_memory;
+	struct ttm_mem_zone *zones[TTM_MEM_MAX_ZONES];
+	unsigned int num_zones;
+	struct ttm_mem_zone *zone_kernel;
+#ifdef CONFIG_HIGHMEM
+	struct ttm_mem_zone *zone_highmem;
+#else
+	struct ttm_mem_zone *zone_dma32;
+#endif
 };
 
 /**
@@ -146,8 +146,13 @@ static inline void ttm_mem_unregister_shrink(struct ttm_mem_global *glob,
 extern int ttm_mem_global_init(struct ttm_mem_global *glob);
 extern void ttm_mem_global_release(struct ttm_mem_global *glob);
 extern int ttm_mem_global_alloc(struct ttm_mem_global *glob, uint64_t memory,
-				bool no_wait, bool interruptible, bool himem);
+				bool no_wait, bool interruptible);
 extern void ttm_mem_global_free(struct ttm_mem_global *glob,
-				uint64_t amount, bool himem);
+				uint64_t amount);
+extern int ttm_mem_global_alloc_page(struct ttm_mem_global *glob,
+				     struct page *page,
+				     bool no_wait, bool interruptible);
+extern void ttm_mem_global_free_page(struct ttm_mem_global *glob,
+				     struct page *page);
 extern size_t ttm_round_pot(size_t size);
 #endif
diff --git a/include/drm/ttm/ttm_module.h b/include/drm/ttm/ttm_module.h
index 889a4c7958ae..0a72ac7c7e58 100644
--- a/include/drm/ttm/ttm_module.h
+++ b/include/drm/ttm/ttm_module.h
@@ -32,6 +32,7 @@
 #define _TTM_MODULE_H_
 
 #include <linux/kernel.h>
+struct kobject;
 
 #define TTM_PFX "[TTM]"
 
@@ -54,5 +55,6 @@ extern void ttm_global_init(void);
 extern void ttm_global_release(void);
 extern int ttm_global_item_ref(struct ttm_global_reference *ref);
 extern void ttm_global_item_unref(struct ttm_global_reference *ref);
+extern struct kobject *ttm_get_kobj(void);
 
 #endif /* _TTM_MODULE_H_ */
-- 
cgit v1.2.3


From a987fcaa805fcb24ba885c2e29fd4fdb6816f08f Mon Sep 17 00:00:00 2001
From: Thomas Hellstrom <thellstrom@vmware.com>
Date: Tue, 18 Aug 2009 16:51:56 +0200
Subject: ttm: Make parts of a struct ttm_bo_device global.

Common resources, like memory accounting and swap lists should be
global and not per device. Introduce a struct ttm_bo_global to
accomodate this, and register it with sysfs. Add a small sysfs interface
to return the number of active buffer objects.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Signed-off-by: Dave Airlie <airlied@linux.ie>
---
 drivers/gpu/drm/radeon/radeon_object.h |   1 +
 drivers/gpu/drm/radeon/radeon_ttm.c    |  33 +++-
 drivers/gpu/drm/ttm/ttm_bo.c           | 292 ++++++++++++++++++++++-----------
 drivers/gpu/drm/ttm/ttm_bo_util.c      |   4 +-
 drivers/gpu/drm/ttm/ttm_tt.c           |  12 +-
 include/drm/ttm/ttm_bo_api.h           |   1 +
 include/drm/ttm/ttm_bo_driver.h        |  94 ++++++++---
 7 files changed, 296 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 473e4775dc5a..10e8af6bb456 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -37,6 +37,7 @@
  * TTM.
  */
 struct radeon_mman {
+	struct ttm_bo_global_ref        bo_global_ref;
 	struct ttm_global_reference	mem_global_ref;
 	bool				mem_global_referenced;
 	struct ttm_bo_device		bdev;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 1227a97f5169..343b6d6b99c6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -77,9 +77,25 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 	global_ref->release = &radeon_ttm_mem_global_release;
 	r = ttm_global_item_ref(global_ref);
 	if (r != 0) {
-		DRM_ERROR("Failed referencing a global TTM memory object.\n");
+		DRM_ERROR("Failed setting up TTM memory accounting "
+			  "subsystem.\n");
 		return r;
 	}
+
+	rdev->mman.bo_global_ref.mem_glob =
+		rdev->mman.mem_global_ref.object;
+	global_ref = &rdev->mman.bo_global_ref.ref;
+	global_ref->global_type = TTM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+	r = ttm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
+		ttm_global_item_unref(&rdev->mman.mem_global_ref);
+		return r;
+	}
+
 	rdev->mman.mem_global_referenced = true;
 	return 0;
 }
@@ -87,6 +103,7 @@ static int radeon_ttm_global_init(struct radeon_device *rdev)
 static void radeon_ttm_global_fini(struct radeon_device *rdev)
 {
 	if (rdev->mman.mem_global_referenced) {
+		ttm_global_item_unref(&rdev->mman.bo_global_ref.ref);
 		ttm_global_item_unref(&rdev->mman.mem_global_ref);
 		rdev->mman.mem_global_referenced = false;
 	}
@@ -286,9 +303,11 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	r = ttm_bo_move_ttm(bo, true, no_wait, new_mem);
 out_cleanup:
 	if (tmp_mem.mm_node) {
-		spin_lock(&rdev->mman.bdev.lru_lock);
+		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&rdev->mman.bdev.lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return r;
 	}
 	return r;
@@ -323,9 +342,11 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	}
 out_cleanup:
 	if (tmp_mem.mm_node) {
-		spin_lock(&rdev->mman.bdev.lru_lock);
+		struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(tmp_mem.mm_node);
-		spin_unlock(&rdev->mman.bdev.lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return r;
 	}
 	return r;
@@ -441,7 +462,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	}
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
-			       rdev->mman.mem_global_ref.object,
+			       rdev->mman.bo_global_ref.ref.object,
 			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index f16909ceec93..0d0b1b7afbcf 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -45,6 +45,39 @@
 static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
 static void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
+static void ttm_bo_global_kobj_release(struct kobject *kobj);
+
+static struct attribute ttm_bo_count = {
+	.name = "bo_count",
+	.mode = S_IRUGO
+};
+
+static ssize_t ttm_bo_global_show(struct kobject *kobj,
+				  struct attribute *attr,
+				  char *buffer)
+{
+	struct ttm_bo_global *glob =
+		container_of(kobj, struct ttm_bo_global, kobj);
+
+	return snprintf(buffer, PAGE_SIZE, "%lu\n",
+			(unsigned long) atomic_read(&glob->bo_count));
+}
+
+static struct attribute *ttm_bo_global_attrs[] = {
+	&ttm_bo_count,
+	NULL
+};
+
+static struct sysfs_ops ttm_bo_global_ops = {
+	.show = &ttm_bo_global_show
+};
+
+static struct kobj_type ttm_bo_glob_kobj_type  = {
+	.release = &ttm_bo_global_kobj_release,
+	.sysfs_ops = &ttm_bo_global_ops,
+	.default_attrs = ttm_bo_global_attrs
+};
+
 
 static inline uint32_t ttm_bo_type_flags(unsigned type)
 {
@@ -67,10 +100,11 @@ static void ttm_bo_release_list(struct kref *list_kref)
 
 	if (bo->ttm)
 		ttm_tt_destroy(bo->ttm);
+	atomic_dec(&bo->glob->bo_count);
 	if (bo->destroy)
 		bo->destroy(bo);
 	else {
-		ttm_mem_global_free(bdev->mem_glob, bo->acc_size);
+		ttm_mem_global_free(bdev->glob->mem_glob, bo->acc_size);
 		kfree(bo);
 	}
 }
@@ -107,7 +141,7 @@ static void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 		kref_get(&bo->list_kref);
 
 		if (bo->ttm != NULL) {
-			list_add_tail(&bo->swap, &bdev->swap_lru);
+			list_add_tail(&bo->swap, &bo->glob->swap_lru);
 			kref_get(&bo->list_kref);
 		}
 	}
@@ -142,7 +176,7 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 			  bool interruptible,
 			  bool no_wait, bool use_sequence, uint32_t sequence)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret;
 
 	while (unlikely(atomic_cmpxchg(&bo->reserved, 0, 1) != 0)) {
@@ -154,9 +188,9 @@ int ttm_bo_reserve_locked(struct ttm_buffer_object *bo,
 		if (no_wait)
 			return -EBUSY;
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		ret = ttm_bo_wait_unreserved(bo, interruptible);
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 
 		if (unlikely(ret))
 			return ret;
@@ -182,16 +216,16 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 		   bool interruptible,
 		   bool no_wait, bool use_sequence, uint32_t sequence)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int put_count = 0;
 	int ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	ret = ttm_bo_reserve_locked(bo, interruptible, no_wait, use_sequence,
 				    sequence);
 	if (likely(ret == 0))
 		put_count = ttm_bo_del_from_lru(bo);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	while (put_count--)
 		kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -201,13 +235,13 @@ int ttm_bo_reserve(struct ttm_buffer_object *bo,
 
 void ttm_bo_unreserve(struct ttm_buffer_object *bo)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	ttm_bo_add_to_lru(bo);
 	atomic_set(&bo->reserved, 0);
 	wake_up_all(&bo->event_queue);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 }
 EXPORT_SYMBOL(ttm_bo_unreserve);
 
@@ -218,6 +252,7 @@ EXPORT_SYMBOL(ttm_bo_unreserve);
 static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret = 0;
 	uint32_t page_flags = 0;
 
@@ -230,14 +265,14 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc)
 			page_flags |= TTM_PAGE_FLAG_ZERO_ALLOC;
 	case ttm_bo_type_kernel:
 		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
-					page_flags, bdev->dummy_read_page);
+					page_flags, glob->dummy_read_page);
 		if (unlikely(bo->ttm == NULL))
 			ret = -ENOMEM;
 		break;
 	case ttm_bo_type_user:
 		bo->ttm = ttm_tt_create(bdev, bo->num_pages << PAGE_SHIFT,
 					page_flags | TTM_PAGE_FLAG_USER,
-					bdev->dummy_read_page);
+					glob->dummy_read_page);
 		if (unlikely(bo->ttm == NULL))
 			ret = -ENOMEM;
 		break;
@@ -355,6 +390,7 @@ out_err:
 static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_bo_driver *driver = bdev->driver;
 	int ret;
 
@@ -366,7 +402,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 
 		spin_unlock(&bo->lock);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		ret = ttm_bo_reserve_locked(bo, false, false, false, 0);
 		BUG_ON(ret);
 		if (bo->ttm)
@@ -381,7 +417,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 			bo->mem.mm_node = NULL;
 		}
 		put_count = ttm_bo_del_from_lru(bo);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 
 		atomic_set(&bo->reserved, 0);
 
@@ -391,14 +427,14 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		return 0;
 	}
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (list_empty(&bo->ddestroy)) {
 		void *sync_obj = bo->sync_obj;
 		void *sync_obj_arg = bo->sync_obj_arg;
 
 		kref_get(&bo->list_kref);
 		list_add_tail(&bo->ddestroy, &bdev->ddestroy);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		spin_unlock(&bo->lock);
 
 		if (sync_obj)
@@ -408,7 +444,7 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 		ret = 0;
 
 	} else {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		spin_unlock(&bo->lock);
 		ret = -EBUSY;
 	}
@@ -423,11 +459,12 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, bool remove_all)
 
 static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_buffer_object *entry, *nentry;
 	struct list_head *list, *next;
 	int ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	list_for_each_safe(list, next, &bdev->ddestroy) {
 		entry = list_entry(list, struct ttm_buffer_object, ddestroy);
 		nentry = NULL;
@@ -444,16 +481,16 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 		}
 		kref_get(&entry->list_kref);
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		ret = ttm_bo_cleanup_refs(entry, remove_all);
 		kref_put(&entry->list_kref, ttm_bo_release_list);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		if (nentry) {
 			bool next_onlist = !list_empty(next);
-			spin_unlock(&bdev->lru_lock);
+			spin_unlock(&glob->lru_lock);
 			kref_put(&nentry->list_kref, ttm_bo_release_list);
-			spin_lock(&bdev->lru_lock);
+			spin_lock(&glob->lru_lock);
 			/*
 			 * Someone might have raced us and removed the
 			 * next entry from the list. We don't bother restarting
@@ -467,7 +504,7 @@ static int ttm_bo_delayed_delete(struct ttm_bo_device *bdev, bool remove_all)
 			break;
 	}
 	ret = !list_empty(&bdev->ddestroy);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	return ret;
 }
@@ -517,6 +554,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type,
 {
 	int ret = 0;
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_mem_reg evict_mem;
 	uint32_t proposed_placement;
 
@@ -565,12 +603,12 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, unsigned mem_type,
 		goto out;
 	}
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (evict_mem.mm_node) {
 		drm_mm_put_block(evict_mem.mm_node);
 		evict_mem.mm_node = NULL;
 	}
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 	bo->evicted = true;
 out:
 	return ret;
@@ -585,6 +623,7 @@ static int ttm_bo_mem_force_space(struct ttm_bo_device *bdev,
 				  uint32_t mem_type,
 				  bool interruptible, bool no_wait)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct drm_mm_node *node;
 	struct ttm_buffer_object *entry;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
@@ -598,7 +637,7 @@ retry_pre_get:
 	if (unlikely(ret != 0))
 		return ret;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	do {
 		node = drm_mm_search_free(&man->manager, num_pages,
 					  mem->page_alignment, 1);
@@ -619,7 +658,7 @@ retry_pre_get:
 		if (likely(ret == 0))
 			put_count = ttm_bo_del_from_lru(entry);
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 
 		if (unlikely(ret != 0))
 			return ret;
@@ -635,21 +674,21 @@ retry_pre_get:
 		if (ret)
 			return ret;
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 	} while (1);
 
 	if (!node) {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		return -ENOMEM;
 	}
 
 	node = drm_mm_get_block_atomic(node, num_pages, mem->page_alignment);
 	if (unlikely(!node)) {
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		goto retry_pre_get;
 	}
 
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 	mem->mm_node = node;
 	mem->mem_type = mem_type;
 	return 0;
@@ -697,6 +736,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		     bool interruptible, bool no_wait)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	struct ttm_mem_type_manager *man;
 
 	uint32_t num_prios = bdev->driver->num_mem_type_prio;
@@ -733,20 +773,20 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 				if (unlikely(ret))
 					return ret;
 
-				spin_lock(&bdev->lru_lock);
+				spin_lock(&glob->lru_lock);
 				node = drm_mm_search_free(&man->manager,
 							  mem->num_pages,
 							  mem->page_alignment,
 							  1);
 				if (unlikely(!node)) {
-					spin_unlock(&bdev->lru_lock);
+					spin_unlock(&glob->lru_lock);
 					break;
 				}
 				node = drm_mm_get_block_atomic(node,
 							       mem->num_pages,
 							       mem->
 							       page_alignment);
-				spin_unlock(&bdev->lru_lock);
+				spin_unlock(&glob->lru_lock);
 			} while (!node);
 		}
 		if (node)
@@ -816,7 +856,7 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 		       uint32_t proposed_placement,
 		       bool interruptible, bool no_wait)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_bo_global *glob = bo->glob;
 	int ret = 0;
 	struct ttm_mem_reg mem;
 
@@ -852,9 +892,9 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 
 out_unlock:
 	if (ret && mem.mm_node) {
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		drm_mm_put_block(mem.mm_node);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 	}
 	return ret;
 }
@@ -990,6 +1030,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev,
 	INIT_LIST_HEAD(&bo->ddestroy);
 	INIT_LIST_HEAD(&bo->swap);
 	bo->bdev = bdev;
+	bo->glob = bdev->glob;
 	bo->type = type;
 	bo->num_pages = num_pages;
 	bo->mem.mem_type = TTM_PL_SYSTEM;
@@ -1002,6 +1043,7 @@ int ttm_buffer_object_init(struct ttm_bo_device *bdev,
 	bo->seq_valid = false;
 	bo->persistant_swap_storage = persistant_swap_storage;
 	bo->acc_size = acc_size;
+	atomic_inc(&bo->glob->bo_count);
 
 	ret = ttm_bo_check_placement(bo, flags, 0ULL);
 	if (unlikely(ret != 0))
@@ -1040,13 +1082,13 @@ out_err:
 }
 EXPORT_SYMBOL(ttm_buffer_object_init);
 
-static inline size_t ttm_bo_size(struct ttm_bo_device *bdev,
+static inline size_t ttm_bo_size(struct ttm_bo_global *glob,
 				 unsigned long num_pages)
 {
 	size_t page_array_size = (num_pages * sizeof(void *) + PAGE_SIZE - 1) &
 	    PAGE_MASK;
 
-	return bdev->ttm_bo_size + 2 * page_array_size;
+	return glob->ttm_bo_size + 2 * page_array_size;
 }
 
 int ttm_buffer_object_create(struct ttm_bo_device *bdev,
@@ -1061,10 +1103,10 @@ int ttm_buffer_object_create(struct ttm_bo_device *bdev,
 {
 	struct ttm_buffer_object *bo;
 	int ret;
-	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
 
 	size_t acc_size =
-	    ttm_bo_size(bdev, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
+	    ttm_bo_size(bdev->glob, (size + PAGE_SIZE - 1) >> PAGE_SHIFT);
 	ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
 	if (unlikely(ret != 0))
 		return ret;
@@ -1118,6 +1160,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 				   struct list_head *head,
 				   unsigned mem_type, bool allow_errors)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_buffer_object *entry;
 	int ret;
 	int put_count;
@@ -1126,30 +1169,31 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 	 * Can't use standard list traversal since we're unlocking.
 	 */
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 
 	while (!list_empty(head)) {
 		entry = list_first_entry(head, struct ttm_buffer_object, lru);
 		kref_get(&entry->list_kref);
 		ret = ttm_bo_reserve_locked(entry, false, false, false, 0);
 		put_count = ttm_bo_del_from_lru(entry);
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 		while (put_count--)
 			kref_put(&entry->list_kref, ttm_bo_ref_bug);
 		BUG_ON(ret);
 		ret = ttm_bo_leave_list(entry, mem_type, allow_errors);
 		ttm_bo_unreserve(entry);
 		kref_put(&entry->list_kref, ttm_bo_release_list);
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 	}
 
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	return 0;
 }
 
 int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 {
+	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	int ret = -EINVAL;
 
@@ -1171,13 +1215,13 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 	if (mem_type > 0) {
 		ttm_bo_force_list_clean(bdev, &man->lru, mem_type, false);
 
-		spin_lock(&bdev->lru_lock);
+		spin_lock(&glob->lru_lock);
 		if (drm_mm_clean(&man->manager))
 			drm_mm_takedown(&man->manager);
 		else
 			ret = -EBUSY;
 
-		spin_unlock(&bdev->lru_lock);
+		spin_unlock(&glob->lru_lock);
 	}
 
 	return ret;
@@ -1251,11 +1295,83 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 }
 EXPORT_SYMBOL(ttm_bo_init_mm);
 
+static void ttm_bo_global_kobj_release(struct kobject *kobj)
+{
+	struct ttm_bo_global *glob =
+		container_of(kobj, struct ttm_bo_global, kobj);
+
+	printk(KERN_INFO TTM_PFX "Freeing bo global.\n");
+	ttm_mem_unregister_shrink(glob->mem_glob, &glob->shrink);
+	__free_page(glob->dummy_read_page);
+	kfree(glob);
+}
+
+void ttm_bo_global_release(struct ttm_global_reference *ref)
+{
+	struct ttm_bo_global *glob = ref->object;
+
+	kobject_del(&glob->kobj);
+	kobject_put(&glob->kobj);
+}
+EXPORT_SYMBOL(ttm_bo_global_release);
+
+int ttm_bo_global_init(struct ttm_global_reference *ref)
+{
+	struct ttm_bo_global_ref *bo_ref =
+		container_of(ref, struct ttm_bo_global_ref, ref);
+	struct ttm_bo_global *glob = ref->object;
+	int ret;
+
+	mutex_init(&glob->device_list_mutex);
+	spin_lock_init(&glob->lru_lock);
+	glob->mem_glob = bo_ref->mem_glob;
+	glob->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
+
+	if (unlikely(glob->dummy_read_page == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_drp;
+	}
+
+	INIT_LIST_HEAD(&glob->swap_lru);
+	INIT_LIST_HEAD(&glob->device_list);
+
+	ttm_mem_init_shrink(&glob->shrink, ttm_bo_swapout);
+	ret = ttm_mem_register_shrink(glob->mem_glob, &glob->shrink);
+	if (unlikely(ret != 0)) {
+		printk(KERN_ERR TTM_PFX
+		       "Could not register buffer object swapout.\n");
+		goto out_no_shrink;
+	}
+
+	glob->ttm_bo_extra_size =
+		ttm_round_pot(sizeof(struct ttm_tt)) +
+		ttm_round_pot(sizeof(struct ttm_backend));
+
+	glob->ttm_bo_size = glob->ttm_bo_extra_size +
+		ttm_round_pot(sizeof(struct ttm_buffer_object));
+
+	atomic_set(&glob->bo_count, 0);
+
+	kobject_init(&glob->kobj, &ttm_bo_glob_kobj_type);
+	ret = kobject_add(&glob->kobj, ttm_get_kobj(), "buffer_objects");
+	if (unlikely(ret != 0))
+		kobject_put(&glob->kobj);
+	return ret;
+out_no_shrink:
+	__free_page(glob->dummy_read_page);
+out_no_drp:
+	kfree(glob);
+	return ret;
+}
+EXPORT_SYMBOL(ttm_bo_global_init);
+
+
 int ttm_bo_device_release(struct ttm_bo_device *bdev)
 {
 	int ret = 0;
 	unsigned i = TTM_NUM_MEM_TYPES;
 	struct ttm_mem_type_manager *man;
+	struct ttm_bo_global *glob = bdev->glob;
 
 	while (i--) {
 		man = &bdev->man[i];
@@ -1271,98 +1387,74 @@ int ttm_bo_device_release(struct ttm_bo_device *bdev)
 		}
 	}
 
+	mutex_lock(&glob->device_list_mutex);
+	list_del(&bdev->device_list);
+	mutex_unlock(&glob->device_list_mutex);
+
 	if (!cancel_delayed_work(&bdev->wq))
 		flush_scheduled_work();
 
 	while (ttm_bo_delayed_delete(bdev, true))
 		;
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	if (list_empty(&bdev->ddestroy))
 		TTM_DEBUG("Delayed destroy list was clean\n");
 
 	if (list_empty(&bdev->man[0].lru))
 		TTM_DEBUG("Swap list was clean\n");
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
-	ttm_mem_unregister_shrink(bdev->mem_glob, &bdev->shrink);
 	BUG_ON(!drm_mm_clean(&bdev->addr_space_mm));
 	write_lock(&bdev->vm_lock);
 	drm_mm_takedown(&bdev->addr_space_mm);
 	write_unlock(&bdev->vm_lock);
 
-	__free_page(bdev->dummy_read_page);
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_device_release);
 
-/*
- * This function is intended to be called on drm driver load.
- * If you decide to call it from firstopen, you must protect the call
- * from a potentially racing ttm_bo_driver_finish in lastclose.
- * (This may happen on X server restart).
- */
-
 int ttm_bo_device_init(struct ttm_bo_device *bdev,
-		       struct ttm_mem_global *mem_glob,
-		       struct ttm_bo_driver *driver, uint64_t file_page_offset)
+		       struct ttm_bo_global *glob,
+		       struct ttm_bo_driver *driver,
+		       uint64_t file_page_offset)
 {
 	int ret = -EINVAL;
 
-	bdev->dummy_read_page = NULL;
 	rwlock_init(&bdev->vm_lock);
-	spin_lock_init(&bdev->lru_lock);
+	spin_lock_init(&glob->lru_lock);
 
 	bdev->driver = driver;
-	bdev->mem_glob = mem_glob;
 
 	memset(bdev->man, 0, sizeof(bdev->man));
 
-	bdev->dummy_read_page = alloc_page(__GFP_ZERO | GFP_DMA32);
-	if (unlikely(bdev->dummy_read_page == NULL)) {
-		ret = -ENOMEM;
-		goto out_err0;
-	}
-
 	/*
 	 * Initialize the system memory buffer type.
 	 * Other types need to be driver / IOCTL initialized.
 	 */
 	ret = ttm_bo_init_mm(bdev, TTM_PL_SYSTEM, 0, 0);
 	if (unlikely(ret != 0))
-		goto out_err1;
+		goto out_no_sys;
 
 	bdev->addr_space_rb = RB_ROOT;
 	ret = drm_mm_init(&bdev->addr_space_mm, file_page_offset, 0x10000000);
 	if (unlikely(ret != 0))
-		goto out_err2;
+		goto out_no_addr_mm;
 
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
 	bdev->nice_mode = true;
 	INIT_LIST_HEAD(&bdev->ddestroy);
-	INIT_LIST_HEAD(&bdev->swap_lru);
 	bdev->dev_mapping = NULL;
-	ttm_mem_init_shrink(&bdev->shrink, ttm_bo_swapout);
-	ret = ttm_mem_register_shrink(mem_glob, &bdev->shrink);
-	if (unlikely(ret != 0)) {
-		printk(KERN_ERR TTM_PFX
-		       "Could not register buffer object swapout.\n");
-		goto out_err2;
-	}
+	bdev->glob = glob;
 
-	bdev->ttm_bo_extra_size =
-		ttm_round_pot(sizeof(struct ttm_tt)) +
-		ttm_round_pot(sizeof(struct ttm_backend));
-
-	bdev->ttm_bo_size = bdev->ttm_bo_extra_size +
-		ttm_round_pot(sizeof(struct ttm_buffer_object));
+	mutex_lock(&glob->device_list_mutex);
+	list_add_tail(&bdev->device_list, &glob->device_list);
+	mutex_unlock(&glob->device_list_mutex);
 
 	return 0;
-out_err2:
+out_no_addr_mm:
 	ttm_bo_clean_mm(bdev, 0);
-out_err1:
-	__free_page(bdev->dummy_read_page);
-out_err0:
+out_no_sys:
 	return ret;
 }
 EXPORT_SYMBOL(ttm_bo_device_init);
@@ -1607,21 +1699,21 @@ void ttm_bo_synccpu_write_release(struct ttm_buffer_object *bo)
 
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 {
-	struct ttm_bo_device *bdev =
-	    container_of(shrink, struct ttm_bo_device, shrink);
+	struct ttm_bo_global *glob =
+	    container_of(shrink, struct ttm_bo_global, shrink);
 	struct ttm_buffer_object *bo;
 	int ret = -EBUSY;
 	int put_count;
 	uint32_t swap_placement = (TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM);
 
-	spin_lock(&bdev->lru_lock);
+	spin_lock(&glob->lru_lock);
 	while (ret == -EBUSY) {
-		if (unlikely(list_empty(&bdev->swap_lru))) {
-			spin_unlock(&bdev->lru_lock);
+		if (unlikely(list_empty(&glob->swap_lru))) {
+			spin_unlock(&glob->lru_lock);
 			return -EBUSY;
 		}
 
-		bo = list_first_entry(&bdev->swap_lru,
+		bo = list_first_entry(&glob->swap_lru,
 				      struct ttm_buffer_object, swap);
 		kref_get(&bo->list_kref);
 
@@ -1633,16 +1725,16 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 
 		ret = ttm_bo_reserve_locked(bo, false, true, false, 0);
 		if (unlikely(ret == -EBUSY)) {
-			spin_unlock(&bdev->lru_lock);
+			spin_unlock(&glob->lru_lock);
 			ttm_bo_wait_unreserved(bo, false);
 			kref_put(&bo->list_kref, ttm_bo_release_list);
-			spin_lock(&bdev->lru_lock);
+			spin_lock(&glob->lru_lock);
 		}
 	}
 
 	BUG_ON(ret != 0);
 	put_count = ttm_bo_del_from_lru(bo);
-	spin_unlock(&bdev->lru_lock);
+	spin_unlock(&glob->lru_lock);
 
 	while (put_count--)
 		kref_put(&bo->list_kref, ttm_bo_ref_bug);
@@ -1696,6 +1788,6 @@ out:
 
 void ttm_bo_swapout_all(struct ttm_bo_device *bdev)
 {
-	while (ttm_bo_swapout(&bdev->shrink) == 0)
+	while (ttm_bo_swapout(&bdev->glob->shrink) == 0)
 		;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index bdec583901eb..12cd47aa18ce 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -41,9 +41,9 @@ void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (old_mem->mm_node) {
-		spin_lock(&bo->bdev->lru_lock);
+		spin_lock(&bo->glob->lru_lock);
 		drm_mm_put_block(old_mem->mm_node);
-		spin_unlock(&bo->bdev->lru_lock);
+		spin_unlock(&bo->glob->lru_lock);
 	}
 	old_mem->mm_node = NULL;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 4e1e2566d519..b0f73096d372 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,7 +166,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 			set_page_dirty_lock(page);
 
 		ttm->pages[i] = NULL;
-		ttm_mem_global_free(ttm->bdev->mem_glob, PAGE_SIZE);
+		ttm_mem_global_free(ttm->glob->mem_glob, PAGE_SIZE);
 		put_page(page);
 	}
 	ttm->state = tt_unpopulated;
@@ -177,8 +177,7 @@ static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
 static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 {
 	struct page *p;
-	struct ttm_bo_device *bdev = ttm->bdev;
-	struct ttm_mem_global *mem_glob = bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 	int ret;
 
 	while (NULL == (p = ttm->pages[index])) {
@@ -348,7 +347,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 				printk(KERN_ERR TTM_PFX
 				       "Erroneous page count. "
 				       "Leaking pages.\n");
-			ttm_mem_global_free_page(ttm->bdev->mem_glob,
+			ttm_mem_global_free_page(ttm->glob->mem_glob,
 						 cur_page);
 			__free_page(cur_page);
 		}
@@ -394,7 +393,7 @@ int ttm_tt_set_user(struct ttm_tt *ttm,
 	struct mm_struct *mm = tsk->mm;
 	int ret;
 	int write = (ttm->page_flags & TTM_PAGE_FLAG_WRITE) != 0;
-	struct ttm_mem_global *mem_glob = ttm->bdev->mem_glob;
+	struct ttm_mem_global *mem_glob = ttm->glob->mem_glob;
 
 	BUG_ON(num_pages != ttm->num_pages);
 	BUG_ON((ttm->page_flags & TTM_PAGE_FLAG_USER) == 0);
@@ -439,8 +438,7 @@ struct ttm_tt *ttm_tt_create(struct ttm_bo_device *bdev, unsigned long size,
 	if (!ttm)
 		return NULL;
 
-	ttm->bdev = bdev;
-
+	ttm->glob = bdev->glob;
 	ttm->num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 99dc521aa1a9..491146170522 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -155,6 +155,7 @@ struct ttm_buffer_object {
 	 * Members constant at init.
 	 */
 
+	struct ttm_bo_global *glob;
 	struct ttm_bo_device *bdev;
 	unsigned long buffer_start;
 	enum ttm_bo_type type;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 62ed733c52a2..9dc32f70b9a2 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -32,6 +32,7 @@
 
 #include "ttm/ttm_bo_api.h"
 #include "ttm/ttm_memory.h"
+#include "ttm/ttm_module.h"
 #include "drm_mm.h"
 #include "linux/workqueue.h"
 #include "linux/fs.h"
@@ -160,7 +161,7 @@ struct ttm_tt {
 	long last_lomem_page;
 	uint32_t page_flags;
 	unsigned long num_pages;
-	struct ttm_bo_device *bdev;
+	struct ttm_bo_global *glob;
 	struct ttm_backend *be;
 	struct task_struct *tsk;
 	unsigned long start;
@@ -355,24 +356,73 @@ struct ttm_bo_driver {
 	void *(*sync_obj_ref) (void *sync_obj);
 };
 
-#define TTM_NUM_MEM_TYPES 8
+/**
+ * struct ttm_bo_global_ref - Argument to initialize a struct ttm_bo_global.
+ */
+
+struct ttm_bo_global_ref {
+	struct ttm_global_reference ref;
+	struct ttm_mem_global *mem_glob;
+};
 
-#define TTM_BO_PRIV_FLAG_MOVING  0	/* Buffer object is moving and needs
-					   idling before CPU mapping */
-#define TTM_BO_PRIV_FLAG_MAX 1
 /**
- * struct ttm_bo_device - Buffer object driver device-specific data.
+ * struct ttm_bo_global - Buffer object driver global data.
  *
  * @mem_glob: Pointer to a struct ttm_mem_global object for accounting.
- * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
- * @count: Current number of buffer object.
- * @pages: Current number of pinned pages.
  * @dummy_read_page: Pointer to a dummy page used for mapping requests
  * of unpopulated pages.
- * @shrink: A shrink callback object used for buffre object swap.
+ * @shrink: A shrink callback object used for buffer object swap.
  * @ttm_bo_extra_size: Extra size (sizeof(struct ttm_buffer_object) excluded)
  * used by a buffer object. This is excluding page arrays and backing pages.
  * @ttm_bo_size: This is @ttm_bo_extra_size + sizeof(struct ttm_buffer_object).
+ * @device_list_mutex: Mutex protecting the device list.
+ * This mutex is held while traversing the device list for pm options.
+ * @lru_lock: Spinlock protecting the bo subsystem lru lists.
+ * @device_list: List of buffer object devices.
+ * @swap_lru: Lru list of buffer objects used for swapping.
+ */
+
+struct ttm_bo_global {
+
+	/**
+	 * Constant after init.
+	 */
+
+	struct kobject kobj;
+	struct ttm_mem_global *mem_glob;
+	struct page *dummy_read_page;
+	struct ttm_mem_shrink shrink;
+	size_t ttm_bo_extra_size;
+	size_t ttm_bo_size;
+	struct mutex device_list_mutex;
+	spinlock_t lru_lock;
+
+	/**
+	 * Protected by device_list_mutex.
+	 */
+	struct list_head device_list;
+
+	/**
+	 * Protected by the lru_lock.
+	 */
+	struct list_head swap_lru;
+
+	/**
+	 * Internal protection.
+	 */
+	atomic_t bo_count;
+};
+
+
+#define TTM_NUM_MEM_TYPES 8
+
+#define TTM_BO_PRIV_FLAG_MOVING  0	/* Buffer object is moving and needs
+					   idling before CPU mapping */
+#define TTM_BO_PRIV_FLAG_MAX 1
+/**
+ * struct ttm_bo_device - Buffer object driver device-specific data.
+ *
+ * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver.
  * @man: An array of mem_type_managers.
  * @addr_space_mm: Range manager for the device address space.
  * lru_lock: Spinlock that protects the buffer+device lru lists and
@@ -390,32 +440,21 @@ struct ttm_bo_device {
 	/*
 	 * Constant after bo device init / atomic.
 	 */
-
-	struct ttm_mem_global *mem_glob;
+	struct list_head device_list;
+	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
-	struct page *dummy_read_page;
-	struct ttm_mem_shrink shrink;
-
-	size_t ttm_bo_extra_size;
-	size_t ttm_bo_size;
-
 	rwlock_t vm_lock;
+	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	/*
 	 * Protected by the vm lock.
 	 */
-	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	struct rb_root addr_space_rb;
 	struct drm_mm addr_space_mm;
 
 	/*
-	 * Might want to change this to one lock per manager.
-	 */
-	spinlock_t lru_lock;
-	/*
-	 * Protected by the lru lock.
+	 * Protected by the global:lru lock.
 	 */
 	struct list_head ddestroy;
-	struct list_head swap_lru;
 
 	/*
 	 * Protected by load / firstopen / lastclose /unload sync.
@@ -629,6 +668,9 @@ extern int ttm_bo_pci_offset(struct ttm_bo_device *bdev,
 			     unsigned long *bus_offset,
 			     unsigned long *bus_size);
 
+extern void ttm_bo_global_release(struct ttm_global_reference *ref);
+extern int ttm_bo_global_init(struct ttm_global_reference *ref);
+
 extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
 
 /**
@@ -646,7 +688,7 @@ extern int ttm_bo_device_release(struct ttm_bo_device *bdev);
  * !0: Failure.
  */
 extern int ttm_bo_device_init(struct ttm_bo_device *bdev,
-			      struct ttm_mem_global *mem_glob,
+			      struct ttm_bo_global *glob,
 			      struct ttm_bo_driver *driver,
 			      uint64_t file_page_offset);
 
-- 
cgit v1.2.3


From 10a5b66f625904ad5a2867cf7a28073e1236ff32 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 19 Aug 2009 15:53:05 +0800
Subject: tracing/syscalls: Add fields format for exit events

Add "format" file for syscall exit events:

 # cat events/syscalls/sys_exit_open/format
 name: sys_exit_open
 ID: 344
 format:
         field:unsigned short common_type;       offset:0;       size:2;
         field:unsigned char common_flags;       offset:2;       size:1;
         field:unsigned char common_preempt_count;       offset:3;       size:1;
         field:int common_pid;   offset:4;       size:4;
         field:int common_tgid;  offset:8;       size:4;

         field:int nr;   offset:12;      size:4;
         field:unsigned long ret;        offset:16;      size:4;

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A8BAF61.3060307@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/syscalls.h      |  3 ++-
 include/trace/syscall.h       |  6 ++++--
 kernel/trace/trace_syscalls.c | 18 +++++++++++++++++-
 3 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 87d06c173ddc..8d57f77794ee 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -189,7 +189,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.system                 = "syscalls",			\
 		.event                  = &event_syscall_enter,		\
 		.raw_init		= init_enter_##sname,		\
-		.show_format		= ftrace_format_syscall,	\
+		.show_format		= syscall_enter_format,		\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= "sys"#sname,			\
@@ -225,6 +225,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.system                 = "syscalls",			\
 		.event                  = &event_syscall_exit,		\
 		.raw_init		= init_exit_##sname,		\
+		.show_format		= syscall_exit_format,		\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
 		.data			= "sys"#sname,			\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 0cb03625edd9..5ce85d75d31b 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -55,8 +55,10 @@ extern int reg_event_syscall_enter(void *ptr);
 extern void unreg_event_syscall_enter(void *ptr);
 extern int reg_event_syscall_exit(void *ptr);
 extern void unreg_event_syscall_exit(void *ptr);
-extern int
-ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s);
+extern int syscall_enter_format(struct ftrace_event_call *call,
+				struct trace_seq *s);
+extern int syscall_exit_format(struct ftrace_event_call *call,
+				struct trace_seq *s);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d10daf0922b5..7336b6c265d7 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -97,7 +97,7 @@ extern char *__bad_type_size(void);
 		__bad_type_size() :					\
 		#type, #name, offsetof(typeof(trace), name), sizeof(trace.name)
 
-int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s)
+int syscall_enter_format(struct ftrace_event_call *call, struct trace_seq *s)
 {
 	int i;
 	int nr;
@@ -149,6 +149,22 @@ int ftrace_format_syscall(struct ftrace_event_call *call, struct trace_seq *s)
 	return ret;
 }
 
+int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
+{
+	int ret;
+	struct syscall_trace_exit trace;
+
+	ret = trace_seq_printf(s,
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
+			       "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n",
+			       SYSCALL_FIELD(int, nr),
+			       SYSCALL_FIELD(unsigned long, ret));
+	if (!ret)
+		return 0;
+
+	return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
+}
+
 void ftrace_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_trace_enter *entry;
-- 
cgit v1.2.3


From 14be96c9716cb8c46dca94bd890defd7856e0734 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 19 Aug 2009 15:53:52 +0800
Subject: tracing/events: Add ftrace_event_call param to define_fields()

This parameter is needed by syscall events to add define_fields()
handler.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A8BAF90.6060801@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h | 2 +-
 include/trace/ftrace.h       | 3 +--
 kernel/trace/trace_events.c  | 2 +-
 kernel/trace/trace_export.c  | 5 ++---
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 189806b6e69e..35b3a4a5ba86 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -118,7 +118,7 @@ struct ftrace_event_call {
 	int			(*raw_init)(void);
 	int			(*show_format)(struct ftrace_event_call *call,
 					       struct trace_seq *s);
-	int			(*define_fields)(void);
+	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
 	int			filter_active;
 	struct event_filter	*filter;
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index b250b0616571..4e81c9b37515 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -294,10 +294,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
 int									\
-ftrace_define_fields_##call(void)					\
+ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
 	struct ftrace_raw_##call field;					\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	int ret;							\
 									\
 	__common_field(int, type, 1);					\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b568ade8f453..af8fb8ebef0b 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -941,7 +941,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
 					  id);
 
 	if (call->define_fields) {
-		ret = call->define_fields();
+		ret = call->define_fields(call);
 		if (ret < 0) {
 			pr_warning("Could not initialize trace point"
 				   " events/%s\n", call->name);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 956d4bc675e5..cf2c752a25bf 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -119,7 +119,7 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
-int ftrace_define_fields_##call(void);					\
+int ftrace_define_fields_##call(struct ftrace_event_call *event_call);	\
 static int ftrace_raw_init_event_##call(void);				\
 									\
 struct ftrace_event_call __used						\
@@ -184,9 +184,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
 int									\
-ftrace_define_fields_##call(void)					\
+ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
-	struct ftrace_event_call *event_call = &event_##call;		\
 	struct args field;						\
 	int ret;							\
 									\
-- 
cgit v1.2.3


From e647d6b314266adb904d4b84973eda0afa856946 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 19 Aug 2009 15:54:32 +0800
Subject: tracing/events: Add trace_define_common_fields()

Extract duplicate code. Also prepare for the later patch.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A8BAFB8.1010304@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h |  8 +-------
 include/trace/ftrace.h       |  8 +++-----
 kernel/trace/trace_events.c  | 22 ++++++++++++++++++++++
 kernel/trace/trace_export.c  |  8 +++-----
 4 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 35b3a4a5ba86..427cbae47f84 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -142,6 +142,7 @@ extern int filter_current_check_discard(struct ftrace_event_call *call,
 
 extern int trace_define_field(struct ftrace_event_call *call, char *type,
 			      char *name, int offset, int size, int is_signed);
+extern int trace_define_common_fields(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
 
@@ -166,11 +167,4 @@ do {									\
 		__trace_printk(ip, fmt, ##args);			\
 } while (0)
 
-#define __common_field(type, item, is_signed)				\
-	ret = trace_define_field(event_call, #type, "common_" #item,	\
-				 offsetof(typeof(field.ent), item),	\
-				 sizeof(field.ent.item), is_signed);	\
-	if (ret)							\
-		return ret;
-
 #endif /* _LINUX_FTRACE_EVENT_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4e81c9b37515..127400255e4c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -299,11 +299,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct ftrace_raw_##call field;					\
 	int ret;							\
 									\
-	__common_field(int, type, 1);					\
-	__common_field(unsigned char, flags, 0);			\
-	__common_field(unsigned char, preempt_count, 0);		\
-	__common_field(int, pid, 1);					\
-	__common_field(int, tgid, 1);					\
+	ret = trace_define_common_fields(event_call);			\
+	if (ret)							\
+		return ret;						\
 									\
 	tstruct;							\
 									\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index af8fb8ebef0b..9c7ecfb3416f 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -62,6 +62,28 @@ err:
 }
 EXPORT_SYMBOL_GPL(trace_define_field);
 
+#define __common_field(type, item)					\
+	ret = trace_define_field(call, #type, "common_" #item,		\
+				 offsetof(typeof(ent), item),		\
+				 sizeof(ent.item),			\
+				 is_signed_type(type));			\
+	if (ret)							\
+		return ret;
+
+int trace_define_common_fields(struct ftrace_event_call *call)
+{
+	int ret;
+	struct trace_entry ent;
+
+	__common_field(unsigned short, type);
+	__common_field(unsigned char, flags);
+	__common_field(unsigned char, preempt_count);
+	__common_field(int, pid);
+	__common_field(int, tgid);
+
+	return ret;
+}
+
 #ifdef CONFIG_MODULES
 
 static void trace_destroy_fields(struct ftrace_event_call *call)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index cf2c752a25bf..70875303ae46 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -189,11 +189,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 	struct args field;						\
 	int ret;							\
 									\
-	__common_field(unsigned char, type, 0);				\
-	__common_field(unsigned char, flags, 0);			\
-	__common_field(unsigned char, preempt_count, 0);		\
-	__common_field(int, pid, 1);					\
-	__common_field(int, tgid, 1);					\
+	ret = trace_define_common_fields(event_call);			\
+	if (ret)							\
+		return ret;						\
 									\
 	tstruct;							\
 									\
-- 
cgit v1.2.3


From 540b7b8d65575c80162f2a0f38e1d313c92a6042 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 19 Aug 2009 15:54:51 +0800
Subject: tracing/syscalls: Add filtering support

Add filtering support for syscall events:

 # echo 'mode == 0666' > events/syscalls/sys_enter_open
 # echo 'ret == 0' > events/syscalls/sys_exit_open
 # echo 1 > events/syscalls/sys_enter_open
 # echo 1 > events/syscalls/sys_exit_open
 # cat trace
 ...
   modprobe-3084 [001] 117.463140: sys_open(filename: 917d3e8, flags: 0, mode: 1b6)
   modprobe-3084 [001] 117.463176: sys_open -> 0x0
       less-3086 [001] 117.510455: sys_open(filename: 9c6bdb8, flags: 8000, mode: 1b6)
   sendmail-2574 [001] 122.145840: sys_open(filename: b807a365, flags: 0, mode: 1b6)
 ...

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <4A8BAFCB.1040006@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h  |  5 +++--
 include/linux/syscalls.h      | 16 +++++++++-----
 include/trace/syscall.h       |  7 ++++++
 kernel/trace/trace_events.c   |  5 +++--
 kernel/trace/trace_syscalls.c | 51 +++++++++++++++++++++++++++++++++++++++----
 5 files changed, 71 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 427cbae47f84..df5b085c4150 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -140,8 +140,9 @@ extern int filter_current_check_discard(struct ftrace_event_call *call,
 					void *rec,
 					struct ring_buffer_event *event);
 
-extern int trace_define_field(struct ftrace_event_call *call, char *type,
-			      char *name, int offset, int size, int is_signed);
+extern int trace_define_field(struct ftrace_event_call *call,
+			      const char *type, const char *name,
+			      int offset, int size, int is_signed);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8d57f77794ee..f124c8995555 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -190,6 +190,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.event                  = &event_syscall_enter,		\
 		.raw_init		= init_enter_##sname,		\
 		.show_format		= syscall_enter_format,		\
+		.define_fields		= syscall_enter_define_fields,	\
 		.regfunc		= reg_event_syscall_enter,	\
 		.unregfunc		= unreg_event_syscall_enter,	\
 		.data			= "sys"#sname,			\
@@ -226,6 +227,7 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.event                  = &event_syscall_exit,		\
 		.raw_init		= init_exit_##sname,		\
 		.show_format		= syscall_exit_format,		\
+		.define_fields		= syscall_exit_define_fields,	\
 		.regfunc		= reg_event_syscall_exit,	\
 		.unregfunc		= unreg_event_syscall_exit,	\
 		.data			= "sys"#sname,			\
@@ -233,6 +235,8 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 	}
 
 #define SYSCALL_METADATA(sname, nb)				\
+	SYSCALL_TRACE_ENTER_EVENT(sname);			\
+	SYSCALL_TRACE_EXIT_EVENT(sname);			\
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
@@ -241,20 +245,22 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		.nb_args 	= nb,				\
 		.types		= types_##sname,		\
 		.args		= args_##sname,			\
-	};							\
-	SYSCALL_TRACE_ENTER_EVENT(sname);			\
-	SYSCALL_TRACE_EXIT_EVENT(sname);
+		.enter_event	= &event_enter_##sname,		\
+		.exit_event	= &event_exit_##sname,		\
+	};
 
 #define SYSCALL_DEFINE0(sname)					\
+	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
+	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
 	  __syscall_meta_##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
+		.enter_event	= &event_enter__##sname,	\
+		.exit_event	= &event_exit__##sname,		\
 	};							\
-	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
-	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
 	asmlinkage long sys_##sname(void)
 #else
 #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5ce85d75d31b..9661dd406b93 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -34,6 +34,8 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit,
  * @args: list of args as strings (args[i] matches types[i])
  * @enter_id: associated ftrace enter event id
  * @exit_id: associated ftrace exit event id
+ * @enter_event: associated syscall_enter trace event
+ * @exit_event: associated syscall_exit trace event
  */
 struct syscall_metadata {
 	const char	*name;
@@ -42,6 +44,9 @@ struct syscall_metadata {
 	const char	**args;
 	int		enter_id;
 	int		exit_id;
+
+	struct ftrace_event_call *enter_event;
+	struct ftrace_event_call *exit_event;
 };
 
 #ifdef CONFIG_FTRACE_SYSCALLS
@@ -59,6 +64,8 @@ extern int syscall_enter_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
 extern int syscall_exit_format(struct ftrace_event_call *call,
 				struct trace_seq *s);
+extern int syscall_enter_define_fields(struct ftrace_event_call *call);
+extern int syscall_exit_define_fields(struct ftrace_event_call *call);
 enum print_line_t print_syscall_enter(struct trace_iterator *iter, int flags);
 enum print_line_t print_syscall_exit(struct trace_iterator *iter, int flags);
 #endif
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9c7ecfb3416f..79d352027a61 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -27,8 +27,8 @@ DEFINE_MUTEX(event_mutex);
 
 LIST_HEAD(ftrace_events);
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-		       char *name, int offset, int size, int is_signed)
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+		       const char *name, int offset, int size, int is_signed)
 {
 	struct ftrace_event_field *field;
 
@@ -83,6 +83,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(trace_define_common_fields);
 
 #ifdef CONFIG_MODULES
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 7336b6c265d7..28e4dae4af21 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -165,6 +165,49 @@ int syscall_exit_format(struct ftrace_event_call *call, struct trace_seq *s)
 	return trace_seq_printf(s, "\nprint fmt: \"0x%%lx\", REC->ret\n");
 }
 
+int syscall_enter_define_fields(struct ftrace_event_call *call)
+{
+	struct syscall_trace_enter trace;
+	struct syscall_metadata *meta;
+	int ret;
+	int nr;
+	int i;
+	int offset = offsetof(typeof(trace), args);
+
+	nr = syscall_name_to_nr(call->data);
+	meta = syscall_nr_to_meta(nr);
+
+	if (!meta)
+		return 0;
+
+	ret = trace_define_common_fields(call);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < meta->nb_args; i++) {
+		ret = trace_define_field(call, meta->types[i],
+					 meta->args[i], offset,
+					 sizeof(unsigned long), 0);
+		offset += sizeof(unsigned long);
+	}
+
+	return ret;
+}
+
+int syscall_exit_define_fields(struct ftrace_event_call *call)
+{
+	struct syscall_trace_exit trace;
+	int ret;
+
+	ret = trace_define_common_fields(call);
+	if (ret)
+		return ret;
+
+	ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0);
+
+	return ret;
+}
+
 void ftrace_syscall_enter(struct pt_regs *regs, long id)
 {
 	struct syscall_trace_enter *entry;
@@ -192,8 +235,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 	entry->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 
-	trace_current_buffer_unlock_commit(event, 0, 0);
-	trace_wake_up();
+	if (!filter_current_check_discard(sys_data->enter_event, entry, event))
+		trace_current_buffer_unlock_commit(event, 0, 0);
 }
 
 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
@@ -220,8 +263,8 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	entry->nr = syscall_nr;
 	entry->ret = syscall_get_return_value(current, regs);
 
-	trace_current_buffer_unlock_commit(event, 0, 0);
-	trace_wake_up();
+	if (!filter_current_check_discard(sys_data->exit_event, entry, event))
+		trace_current_buffer_unlock_commit(event, 0, 0);
 }
 
 int reg_event_syscall_enter(void *ptr)
-- 
cgit v1.2.3


From 2bfb1070ba1fdb8cbc2b0b9ff61a3b0701ab40de Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Fri, 14 Aug 2009 16:13:12 +0400
Subject: ieee802154: add a sysfs representation of WPAN master devices

Add a sysfs/in-kernel representation of LR-WPAN master devices.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 include/net/wpan-phy.h      |  63 ++++++++++++++++++
 net/ieee802154/Makefile     |   2 +-
 net/ieee802154/wpan-class.c | 159 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 223 insertions(+), 1 deletion(-)
 create mode 100644 include/net/wpan-phy.h
 create mode 100644 net/ieee802154/wpan-class.c

(limited to 'include')

diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h
new file mode 100644
index 000000000000..547b1e271ac9
--- /dev/null
+++ b/include/net/wpan-phy.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Written by:
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ */
+
+#ifndef WPAN_PHY_H
+#define WPAN_PHY_H
+
+#include <linux/netdevice.h>
+#include <linux/mutex.h>
+
+struct wpan_phy {
+	struct mutex pib_lock;
+
+	/*
+	 * This is a PIB acording to 802.15.4-2006.
+	 * We do not provide timing-related variables, as they
+	 * aren't used outside of driver
+	 */
+	u8 current_channel;
+	u8 current_page;
+	u32 channels_supported;
+	u8 transmit_power;
+	u8 cca_mode;
+
+	struct device dev;
+	int idx;
+
+	char priv[0] __attribute__((__aligned__(NETDEV_ALIGN)));
+};
+
+struct wpan_phy *wpan_phy_alloc(size_t priv_size);
+int wpan_phy_register(struct device *parent, struct wpan_phy *phy);
+void wpan_phy_unregister(struct wpan_phy *phy);
+void wpan_phy_free(struct wpan_phy *phy);
+
+static inline void *wpan_phy_priv(struct wpan_phy *phy)
+{
+	BUG_ON(!phy);
+	return &phy->priv;
+}
+
+struct wpan_phy *wpan_phy_find(const char *str);
+static inline const char *wpan_phy_name(struct wpan_phy *phy)
+{
+	return dev_name(&phy->dev);
+}
+#endif
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index f99338a26100..4068a9f5113e 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -1,4 +1,4 @@
-obj-$(CONFIG_IEEE802154) +=	nl802154.o af_802154.o
+obj-$(CONFIG_IEEE802154) +=	nl802154.o af_802154.o wpan-class.o
 nl802154-y		:= netlink.o nl_policy.o
 af_802154-y		:= af_ieee802154.o raw.o dgram.o
 
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
new file mode 100644
index 000000000000..f306604da67a
--- /dev/null
+++ b/net/ieee802154/wpan-class.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (C) 2007, 2008, 2009 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+
+#include <net/wpan-phy.h>
+
+#define MASTER_SHOW_COMPLEX(name, format_string, args...)		\
+static ssize_t name ## _show(struct device *dev,			\
+			    struct device_attribute *attr, char *buf)	\
+{									\
+	struct wpan_phy *phy = container_of(dev, struct wpan_phy, dev);	\
+	int ret;							\
+									\
+	mutex_lock(&phy->pib_lock);					\
+	ret = sprintf(buf, format_string "\n", args);			\
+	mutex_unlock(&phy->pib_lock);					\
+	return ret;							\
+}
+
+#define MASTER_SHOW(field, format_string)				\
+	MASTER_SHOW_COMPLEX(field, format_string, phy->field)
+
+MASTER_SHOW(current_channel, "%d");
+MASTER_SHOW(current_page, "%d");
+MASTER_SHOW(channels_supported, "%#x");
+MASTER_SHOW_COMPLEX(transmit_power, "%d +- %d dB",
+	((signed char) (phy->transmit_power << 2)) >> 2,
+	(phy->transmit_power >> 6) ? (phy->transmit_power >> 6) * 3 : 1 );
+MASTER_SHOW(cca_mode, "%d");
+
+static struct device_attribute pmib_attrs[] = {
+	__ATTR_RO(current_channel),
+	__ATTR_RO(current_page),
+	__ATTR_RO(channels_supported),
+	__ATTR_RO(transmit_power),
+	__ATTR_RO(cca_mode),
+	{},
+};
+
+static void wpan_phy_release(struct device *d)
+{
+	struct wpan_phy *phy = container_of(d, struct wpan_phy, dev);
+	kfree(phy);
+}
+
+static struct class wpan_phy_class = {
+	.name = "ieee802154",
+	.dev_release = wpan_phy_release,
+	.dev_attrs = pmib_attrs,
+};
+
+static DEFINE_MUTEX(wpan_phy_mutex);
+static int wpan_phy_idx;
+
+static int wpan_phy_match(struct device *dev, void *data)
+{
+	return !strcmp(dev_name(dev), (const char *)data);
+}
+
+struct wpan_phy *wpan_phy_find(const char *str)
+{
+	struct device *dev;
+
+	if (WARN_ON(!str))
+		return NULL;
+
+	dev = class_find_device(&wpan_phy_class, NULL,
+			(void *)str, wpan_phy_match);
+	if (!dev)
+		return NULL;
+
+	return container_of(dev, struct wpan_phy, dev);
+}
+EXPORT_SYMBOL(wpan_phy_find);
+
+static int wpan_phy_idx_valid(int idx)
+{
+	return idx >= 0;
+}
+
+struct wpan_phy *wpan_phy_alloc(size_t priv_size)
+{
+	struct wpan_phy *phy = kzalloc(sizeof(*phy) + priv_size,
+			GFP_KERNEL);
+
+	mutex_lock(&wpan_phy_mutex);
+	phy->idx = wpan_phy_idx++;
+	if (unlikely(!wpan_phy_idx_valid(phy->idx))) {
+		wpan_phy_idx--;
+		mutex_unlock(&wpan_phy_mutex);
+		kfree(phy);
+		return NULL;
+	}
+	mutex_unlock(&wpan_phy_mutex);
+
+	mutex_init(&phy->pib_lock);
+
+	device_initialize(&phy->dev);
+	dev_set_name(&phy->dev, "wpan-phy%d", phy->idx);
+
+	phy->dev.class = &wpan_phy_class;
+
+	return phy;
+}
+EXPORT_SYMBOL(wpan_phy_alloc);
+
+int wpan_phy_register(struct device *parent, struct wpan_phy *phy)
+{
+	phy->dev.parent = parent;
+
+	return device_add(&phy->dev);
+}
+EXPORT_SYMBOL(wpan_phy_register);
+
+void wpan_phy_unregister(struct wpan_phy *phy)
+{
+	device_del(&phy->dev);
+}
+EXPORT_SYMBOL(wpan_phy_unregister);
+
+void wpan_phy_free(struct wpan_phy *phy)
+{
+	put_device(&phy->dev);
+}
+EXPORT_SYMBOL(wpan_phy_free);
+
+static int __init wpan_phy_class_init(void)
+{
+	return class_register(&wpan_phy_class);
+}
+subsys_initcall(wpan_phy_class_init);
+
+static void __exit wpan_phy_class_exit(void)
+{
+	class_unregister(&wpan_phy_class);
+}
+module_exit(wpan_phy_class_exit);
+
+MODULE_DESCRIPTION("IEEE 802.15.4 device class");
+MODULE_LICENSE("GPL v2");
+
-- 
cgit v1.2.3


From 16eea493da563b5a3356a77c6d8776dffc29d3b6 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Wed, 19 Aug 2009 19:32:24 +0400
Subject: ieee802154: add support for channel pages from IEEE 802.15.4-2006

IEEE 802.15.4-2006 adds new concept: channel pages, which can contain several
channels. Add support for channel pages in the API and in the fakehard driver.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 drivers/ieee802154/fakehard.c   | 10 ++++++----
 include/linux/nl802154.h        |  2 ++
 include/net/ieee802154_netdev.h |  6 +++---
 include/net/nl802154.h          |  2 +-
 net/ieee802154/netlink.c        | 28 +++++++++++++++++++++++++---
 net/ieee802154/nl_policy.c      |  1 +
 6 files changed, 38 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index 22a93bc764c5..c1c9697f9fde 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -119,12 +119,13 @@ static u8 fake_get_bsn(struct net_device *dev)
  *       802.15.4-2006 document.
  */
 static int fake_assoc_req(struct net_device *dev,
-		struct ieee802154_addr *addr, u8 channel, u8 cap)
+		struct ieee802154_addr *addr, u8 channel, u8 page, u8 cap)
 {
 	struct wpan_phy *phy = net_to_phy(dev);
 
 	mutex_lock(&phy->pib_lock);
 	phy->current_channel = channel;
+	phy->current_page = page;
 	mutex_unlock(&phy->pib_lock);
 
 	/* We simply emulate it here */
@@ -191,7 +192,7 @@ static int fake_disassoc_req(struct net_device *dev,
  * document, with 7.3.8 describing coordinator realignment.
  */
 static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr,
-				u8 channel,
+				u8 channel, u8 page,
 				u8 bcn_ord, u8 sf_ord, u8 pan_coord, u8 blx,
 				u8 coord_realign)
 {
@@ -199,6 +200,7 @@ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr,
 
 	mutex_lock(&phy->pib_lock);
 	phy->current_channel = channel;
+	phy->current_page = page;
 	mutex_unlock(&phy->pib_lock);
 
 	/* We don't emulate beacons here at all, so START should fail */
@@ -222,11 +224,11 @@ static int fake_start_req(struct net_device *dev, struct ieee802154_addr *addr,
  * Note: This is in section 7.5.2.1 of the IEEE 802.15.4-2006 document.
  */
 static int fake_scan_req(struct net_device *dev, u8 type, u32 channels,
-		u8 duration)
+		u8 page, u8 duration)
 {
 	u8 edl[27] = {};
 	return ieee802154_nl_scan_confirm(dev, IEEE802154_SUCCESS, type,
-			channels,
+			channels, page,
 			type == IEEE802154_MAC_SCAN_ED ? edl : NULL);
 }
 
diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 9a1af5f871a3..b7d9435d5a9f 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -64,6 +64,8 @@ enum {
 	IEEE802154_ATTR_COORD_REALIGN,
 	IEEE802154_ATTR_SEC,
 
+	IEEE802154_ATTR_PAGE,
+
 	__IEEE802154_ATTR_MAX,
 };
 
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index e2506af3e7c8..5dc6a61952de 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -80,7 +80,7 @@ static inline int mac_cb_type(struct sk_buff *skb)
 struct ieee802154_mlme_ops {
 	int (*assoc_req)(struct net_device *dev,
 			struct ieee802154_addr *addr,
-			u8 channel, u8 cap);
+			u8 channel, u8 page, u8 cap);
 	int (*assoc_resp)(struct net_device *dev,
 			struct ieee802154_addr *addr,
 			u16 short_addr, u8 status);
@@ -89,10 +89,10 @@ struct ieee802154_mlme_ops {
 			u8 reason);
 	int (*start_req)(struct net_device *dev,
 			struct ieee802154_addr *addr,
-			u8 channel, u8 bcn_ord, u8 sf_ord,
+			u8 channel, u8 page, u8 bcn_ord, u8 sf_ord,
 			u8 pan_coord, u8 blx, u8 coord_realign);
 	int (*scan_req)(struct net_device *dev,
-			u8 type, u32 channels, u8 duration);
+			u8 type, u32 channels, u8 page, u8 duration);
 
 	/*
 	 * FIXME: these should become the part of PIB/MIB interface.
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index e554ecd3727a..99d2ba1c7e03 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -95,7 +95,7 @@ int ieee802154_nl_disassoc_confirm(struct net_device *dev,
  * Note: This API does not permit the return of an active scan result.
  */
 int ieee802154_nl_scan_confirm(struct net_device *dev,
-		u8 status, u8 scan_type, u32 unscanned,
+		u8 status, u8 scan_type, u32 unscanned, u8 page,
 		u8 *edl/*, struct list_head *pan_desc_list */);
 
 /**
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index cd0567f06716..2106ecbf0308 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -232,7 +232,7 @@ nla_put_failure:
 EXPORT_SYMBOL(ieee802154_nl_beacon_indic);
 
 int ieee802154_nl_scan_confirm(struct net_device *dev,
-		u8 status, u8 scan_type, u32 unscanned,
+		u8 status, u8 scan_type, u32 unscanned, u8 page,
 		u8 *edl/* , struct list_head *pan_desc_list */)
 {
 	struct sk_buff *msg;
@@ -251,6 +251,7 @@ int ieee802154_nl_scan_confirm(struct net_device *dev,
 	NLA_PUT_U8(msg, IEEE802154_ATTR_STATUS, status);
 	NLA_PUT_U8(msg, IEEE802154_ATTR_SCAN_TYPE, scan_type);
 	NLA_PUT_U32(msg, IEEE802154_ATTR_CHANNELS, unscanned);
+	NLA_PUT_U8(msg, IEEE802154_ATTR_PAGE, page);
 
 	if (edl)
 		NLA_PUT(msg, IEEE802154_ATTR_ED_LIST, 27, edl);
@@ -349,6 +350,7 @@ static int ieee802154_associate_req(struct sk_buff *skb,
 {
 	struct net_device *dev;
 	struct ieee802154_addr addr;
+	u8 page;
 	int ret = -EINVAL;
 
 	if (!info->attrs[IEEE802154_ATTR_CHANNEL] ||
@@ -374,8 +376,14 @@ static int ieee802154_associate_req(struct sk_buff *skb,
 	}
 	addr.pan_id = nla_get_u16(info->attrs[IEEE802154_ATTR_COORD_PAN_ID]);
 
+	if (info->attrs[IEEE802154_ATTR_PAGE])
+		page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
+	else
+		page = 0;
+
 	ret = ieee802154_mlme_ops(dev)->assoc_req(dev, &addr,
 			nla_get_u8(info->attrs[IEEE802154_ATTR_CHANNEL]),
+			page,
 			nla_get_u8(info->attrs[IEEE802154_ATTR_CAPABILITY]));
 
 	dev_put(dev);
@@ -458,6 +466,7 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	struct ieee802154_addr addr;
 
 	u8 channel, bcn_ord, sf_ord;
+	u8 page;
 	int pan_coord, blx, coord_realign;
 	int ret;
 
@@ -488,13 +497,19 @@ static int ieee802154_start_req(struct sk_buff *skb, struct genl_info *info)
 	blx = nla_get_u8(info->attrs[IEEE802154_ATTR_BAT_EXT]);
 	coord_realign = nla_get_u8(info->attrs[IEEE802154_ATTR_COORD_REALIGN]);
 
+	if (info->attrs[IEEE802154_ATTR_PAGE])
+		page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
+	else
+		page = 0;
+
+
 	if (addr.short_addr == IEEE802154_ADDR_BROADCAST) {
 		ieee802154_nl_start_confirm(dev, IEEE802154_NO_SHORT_ADDRESS);
 		dev_put(dev);
 		return -EINVAL;
 	}
 
-	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel,
+	ret = ieee802154_mlme_ops(dev)->start_req(dev, &addr, channel, page,
 		bcn_ord, sf_ord, pan_coord, blx, coord_realign);
 
 	dev_put(dev);
@@ -508,6 +523,7 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 	u8 type;
 	u32 channels;
 	u8 duration;
+	u8 page;
 
 	if (!info->attrs[IEEE802154_ATTR_SCAN_TYPE] ||
 	    !info->attrs[IEEE802154_ATTR_CHANNELS] ||
@@ -522,7 +538,13 @@ static int ieee802154_scan_req(struct sk_buff *skb, struct genl_info *info)
 	channels = nla_get_u32(info->attrs[IEEE802154_ATTR_CHANNELS]);
 	duration = nla_get_u8(info->attrs[IEEE802154_ATTR_DURATION]);
 
-	ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels,
+	if (info->attrs[IEEE802154_ATTR_PAGE])
+		page = nla_get_u8(info->attrs[IEEE802154_ATTR_PAGE]);
+	else
+		page = 0;
+
+
+	ret = ieee802154_mlme_ops(dev)->scan_req(dev, type, channels, page,
 			duration);
 
 	dev_put(dev);
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 83cb4ccef90d..2363ebee02e7 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -33,6 +33,7 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = {
 	[IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, },
 	[IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, },
 	[IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+	[IEEE802154_ATTR_PAGE] = { .type = NLA_U8, },
 	[IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, },
 	[IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, },
 	[IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, },
-- 
cgit v1.2.3


From 929122cdd5d4c344e59f9b55f870a8fcf7aa0d27 Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Fri, 14 Aug 2009 20:00:20 +0400
Subject: Drop ARPHRD_IEEE802154_PHY

There are not maste devices in mac802154 anymore, so drop
ARPHRD_IEEE802154_PHY definition.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 include/linux/if_arp.h         | 1 -
 net/core/dev.c                 | 4 ++--
 net/ieee802154/af_ieee802154.c | 4 +---
 net/ieee802154/raw.c           | 3 +--
 4 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index b554300ef8bf..282eb37e2dec 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -87,7 +87,6 @@
 #define ARPHRD_IEEE80211_PRISM 802	/* IEEE 802.11 + Prism2 header  */
 #define ARPHRD_IEEE80211_RADIOTAP 803	/* IEEE 802.11 + radiotap header */
 #define ARPHRD_IEEE802154	  804
-#define ARPHRD_IEEE802154_PHY	  805
 
 #define ARPHRD_PHONET	820		/* PhoNet media type		*/
 #define ARPHRD_PHONET_PIPE 821		/* PhoNet pipe header		*/
diff --git a/net/core/dev.c b/net/core/dev.c
index 09fb03fa1ae6..4b837896ebd2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -269,7 +269,7 @@ static const unsigned short netdev_lock_type[] =
 	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 	 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 	 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
-	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154, ARPHRD_IEEE802154_PHY,
+	 ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
 	 ARPHRD_VOID, ARPHRD_NONE};
 
 static const char *const netdev_lock_name[] =
@@ -287,7 +287,7 @@ static const char *const netdev_lock_name[] =
 	 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 	 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 	 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
-	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154", "_xmit_IEEE802154_PHY",
+	 "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
 	 "_xmit_VOID", "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index d504c349cb0c..cd949d5e451b 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -147,9 +147,7 @@ static int ieee802154_dev_ioctl(struct sock *sk, struct ifreq __user *arg,
 	dev_load(sock_net(sk), ifr.ifr_name);
 	dev = dev_get_by_name(sock_net(sk), ifr.ifr_name);
 
-	if ((dev->type == ARPHRD_IEEE802154 ||
-	     dev->type == ARPHRD_IEEE802154_PHY) &&
-	    dev->netdev_ops->ndo_do_ioctl)
+	if (dev->type == ARPHRD_IEEE802154 && dev->netdev_ops->ndo_do_ioctl)
 		ret = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, cmd);
 
 	if (!ret && copy_to_user(arg, &ifr, sizeof(struct ifreq)))
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 60dee69a1d04..4681501aae93 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -74,8 +74,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int len)
 		goto out;
 	}
 
-	if (dev->type != ARPHRD_IEEE802154_PHY &&
-	    dev->type != ARPHRD_IEEE802154) {
+	if (dev->type != ARPHRD_IEEE802154) {
 		err = -ENODEV;
 		goto out_put;
 	}
-- 
cgit v1.2.3


From fc69f4a6af49ee69475dc4217924d9edf77760e0 Mon Sep 17 00:00:00 2001
From: Tai-hwa Liang <avatar@sentelic.com>
Date: Sun, 10 May 2009 18:15:39 -0700
Subject: Input: add new driver for Sentelic Finger Sensing Pad

This is the driver for Sentelic Finger Sensing Pad which can be found
on MSI WIND Netbook.

Signed-off-by: Tai-hwa Liang <avatar@sentelic.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 Documentation/input/sentelic.txt   | 475 ++++++++++++++++++++
 drivers/input/mouse/Kconfig        |   8 +
 drivers/input/mouse/Makefile       |   1 +
 drivers/input/mouse/psmouse-base.c |  26 +-
 drivers/input/mouse/psmouse.h      |   1 +
 drivers/input/mouse/sentelic.c     | 867 +++++++++++++++++++++++++++++++++++++
 drivers/input/mouse/sentelic.h     |  98 +++++
 drivers/input/serio/libps2.c       |  15 +-
 include/linux/libps2.h             |   1 +
 9 files changed, 1488 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/input/sentelic.txt
 create mode 100644 drivers/input/mouse/sentelic.c
 create mode 100644 drivers/input/mouse/sentelic.h

(limited to 'include')

diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt
new file mode 100644
index 000000000000..f7160a2fb6a2
--- /dev/null
+++ b/Documentation/input/sentelic.txt
@@ -0,0 +1,475 @@
+Copyright (C) 2002-2008 Sentelic Corporation.
+Last update: Oct-31-2008
+
+==============================================================================
+* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons)
+==============================================================================
+A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward
+   page (5th button)
+@1. Set sample rate to 200;
+@2. Set sample rate to 200;
+@3. Set sample rate to 80;
+@4. Issuing the "Get device ID" command (0xF2) and waits for the response;
+@5. FSP will respond 0x04.
+
+Packet 1
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |Y|X|y|x|1|M|R|L|  2  |X|X|X|X|X|X|X|X|  3 |Y|Y|Y|Y|Y|Y|Y|Y|  4 | | |B|F|W|W|W|W|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7 => Y overflow
+        Bit6 => X overflow
+        Bit5 => Y sign bit
+        Bit4 => X sign bit
+        Bit3 => 1
+        Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+        Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+        Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X Movement(9-bit 2's complement integers)
+Byte 3: Y Movement(9-bit 2's complement integers)
+Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report.
+                     valid values, -8 ~ +7
+        Bit4 => 1 = 4th mouse button is pressed, Forward one page.
+                0 = 4th mouse button is not pressed.
+        Bit5 => 1 = 5th mouse button is pressed, Backward one page.
+                0 = 5th mouse button is not pressed.
+
+B) MSID 6: Horizontal and Vertical scrolling.
+@ Set bit 1 in register 0x40 to 1
+
+# FSP replaces scrolling wheel's movement as 4 bits to show horizontal and
+  vertical scrolling.
+
+Packet 1
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |Y|X|y|x|1|M|R|L|  2  |X|X|X|X|X|X|X|X|  3 |Y|Y|Y|Y|Y|Y|Y|Y|  4 | | |B|F|l|r|u|d|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7 => Y overflow
+        Bit6 => X overflow
+        Bit5 => Y sign bit
+        Bit4 => X sign bit
+        Bit3 => 1
+        Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+        Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+        Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X Movement(9-bit 2's complement integers)
+Byte 3: Y Movement(9-bit 2's complement integers)
+Byte 4: Bit0 => the Vertical scrolling movement downward.
+	Bit1 => the Vertical scrolling movement upward.
+	Bit2 => the Vertical scrolling movement rightward.
+	Bit3 => the Vertical scrolling movement leftward.
+        Bit4 => 1 = 4th mouse button is pressed, Forward one page.
+                0 = 4th mouse button is not pressed.
+        Bit5 => 1 = 5th mouse button is pressed, Backward one page.
+                0 = 5th mouse button is not pressed.
+
+C) MSID 7:
+# FSP uses 2 packets(8 Bytes) data to represent Absolute Position
+  so we have PACKET NUMBER to identify packets.
+  If PACKET NUMBER is 0, the packet is Packet 1.
+  If PACKET NUMBER is 1, the packet is Packet 2.
+  Please count this number in program.
+
+# MSID6 special packet will be enable at the same time when enable MSID 7.
+
+==============================================================================
+* Absolute position for STL3886-G0.
+==============================================================================
+@ Set bit 2 or 3 in register 0x40 to 1
+@ Set bit 6 in register 0x40 to 1
+
+Packet 1 (ABSOLUTE POSITION)
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |0|1|V|1|1|M|R|L|  2  |X|X|X|X|X|X|X|X|  3 |Y|Y|Y|Y|Y|Y|Y|Y|  4 |r|l|d|u|X|X|Y|Y|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+                  => 01, Absolute coordination packet
+                  => 10, Notify packet
+        Bit5 => valid bit
+        Bit4 => 1
+        Bit3 => 1
+        Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+        Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+        Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+        Bit3~Bit2 => X coordinate (ypos[1:0])
+        Bit4 => scroll up
+        Bit5 => scroll down
+        Bit6 => scroll left
+        Bit7 => scroll right
+
+Notify Packet for G0
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |1|0|0|1|1|M|R|L|  2  |C|C|C|C|C|C|C|C|  3 |M|M|M|M|M|M|M|M|  4 |0|0|0|0|0|0|0|0|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+                  => 01, Absolute coordination packet
+                  => 10, Notify packet
+        Bit5 => 0
+        Bit4 => 1
+        Bit3 => 1
+        Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+        Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+        Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message Type => 0x5A (Enable/Disable status packet)
+        Mode Type => 0xA5 (Normal/Icon mode status)
+Byte 3: Message Type => 0x00 (Disabled)
+                     => 0x01 (Enabled)
+        Mode Type    => 0x00 (Normal)
+                     => 0x01 (Icon)
+Byte 4: Bit7~Bit0 => Don't Care
+
+==============================================================================
+* Absolute position for STL3888-A0.
+==============================================================================
+Packet 1 (ABSOLUTE POSITION)
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |0|1|V|A|1|L|0|1|  2  |X|X|X|X|X|X|X|X|  3 |Y|Y|Y|Y|Y|Y|Y|Y|  4 |x|x|y|y|X|X|Y|Y|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+                  => 01, Absolute coordination packet
+                  => 10, Notify packet
+        Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+                When both fingers are up, the last two reports have zero valid
+                bit.
+        Bit4 => arc
+        Bit3 => 1
+        Bit2 => Left Button, 1 is pressed, 0 is released.
+        Bit1 => 0
+        Bit0 => 1
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+        Bit3~Bit2 => X coordinate (ypos[1:0])
+        Bit5~Bit4 => y1_g
+        Bit7~Bit6 => x1_g
+
+Packet 2 (ABSOLUTE POSITION)
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |0|1|V|A|1|R|1|0|  2  |X|X|X|X|X|X|X|X|  3 |Y|Y|Y|Y|Y|Y|Y|Y|  4 |x|x|y|y|X|X|Y|Y|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+                  => 01, Absolute coordinates packet
+                  => 10, Notify packet
+        Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+                When both fingers are up, the last two reports have zero valid
+                bit.
+        Bit4 => arc
+        Bit3 => 1
+        Bit2 => Right Button, 1 is pressed, 0 is released.
+        Bit1 => 1
+        Bit0 => 0
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+        Bit3~Bit2 => X coordinate (ypos[1:0])
+        Bit5~Bit4 => y2_g
+        Bit7~Bit6 => x2_g
+
+Notify Packet for STL3888-A0
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |1|0|1|P|1|M|R|L|  2  |C|C|C|C|C|C|C|C|  3 |0|0|F|F|0|0|0|i|  4 |r|l|d|u|0|0|0|0|
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+                  => 01, Absolute coordination packet
+                  => 10, Notify packet
+        Bit5 => 1
+        Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
+                0: left button is generated by the on-pad command
+                1: left button is generated by the external button
+        Bit3 => 1
+        Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+        Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+        Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode)
+Byte 3: Bit7~Bit6 => Don't care
+        Bit5~Bit4 => Number of fingers
+        Bit3~Bit1 => Reserved
+        Bit0 => 1: enter gesture mode; 0: leaving gesture mode
+Byte 4: Bit7 => scroll right button
+        Bit6 => scroll left button
+        Bit5 => scroll down button
+        Bit4 => scroll up button
+            * Note that if gesture and additional button (Bit4~Bit7)
+	      happen at the same time, the button information will not
+	      be sent.
+        Bit3~Bit0 => Reserved
+
+Sample sequence of Multi-finger, Multi-coordinate mode:
+
+	notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1,
+	abs pkt 2, ..., notify packet(valid bit == 0)
+
+==============================================================================
+* FSP Enable/Disable packet
+==============================================================================
+   Bit 7 6 5 4 3 2 1 0       7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0      7 6 5 4 3 2 1 0
+BYTE  |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+  1   |Y|X|0|0|1|M|R|L|  2  |0|1|0|1|1|0|1|E|  3 | | | | | | | | |  4 | | | | | | | | |
+      |---------------|     |---------------|    |---------------|    |---------------|
+
+FSP will send out enable/disable packet when FSP receive PS/2 enable/disable
+command. Host will receive the packet which Middle, Right, Left button will
+be set. The packet only use byte 0 and byte 1 as a pattern of original packet.
+Ignore the other bytes of the packet.
+
+Byte 1: Bit7 => 0, Y overflow
+        Bit6 => 0, X overflow
+	Bit5 => 0, Y sign bit
+        Bit4 => 0, X sign bit
+	Bit3 => 1
+	Bit2 => 1, Middle Button
+        Bit1 => 1, Right Button
+        Bit0 => 1, Left Button
+Byte 2: Bit7~1 => (0101101b)
+        Bit0 => 1 = Enable
+		0 = Disable
+Byte 3: Don't care
+Byte 4: Don't care (MOUSE ID 3, 4)
+Byte 5~8: Don't care (Absolute packet)
+
+==============================================================================
+* PS/2 Command Set
+==============================================================================
+
+FSP supports basic PS/2 commanding set and modes, refer to following URL for
+details about PS/2 commands:
+
+http://www.computer-engineering.org/index.php?title=PS/2_Mouse_Interface
+
+==============================================================================
+* Programming Sequence for Determining Packet Parsing Flow
+==============================================================================
+1. Identify FSP by reading device ID(0x00) and version(0x01) register
+
+2. Determine number of buttons by reading status2 (0x0b) register
+
+	buttons = reg[0x0b] & 0x30
+
+	if buttons == 0x30 or buttons == 0x20:
+		# two/four buttons
+		Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+		section A for packet parsing detail(ignore byte 4, bit ~ 7)
+	elif buttons == 0x10:
+		# 6 buttons
+		Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+		section B for packet parsing detail
+	elif buttons == 0x00:
+		# 6 buttons
+		Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+		section A for packet parsing detail
+
+==============================================================================
+* Programming Sequence for Register Reading/Writing
+==============================================================================
+
+Register inversion requirement:
+
+  Following values needed to be inverted(the '~' operator in C) before being
+sent to FSP:
+
+	0xe9, 0xee, 0xf2 and 0xff.
+
+Register swapping requirement:
+
+  Following values needed to have their higher 4 bits and lower 4 bits being
+swapped before being sent to FSP:
+
+	10, 20, 40, 60, 80, 100 and 200.
+
+Register reading sequence:
+
+	1. send 0xf3 PS/2 command to FSP;
+
+	2. send 0x66 PS/2 command to FSP;
+
+	3. send 0x88 PS/2 command to FSP;
+
+	4. send 0xf3 PS/2 command to FSP;
+
+	5. if the register address being to read is not required to be
+	inverted(refer to the 'Register inversion requirement' section),
+	goto step 6
+
+	5a. send 0x68 PS/2 command to FSP;
+
+	5b. send the inverted register address to FSP and goto step 8;
+
+	6. if the register address being to read is not required to be
+	swapped(refer to the 'Register swapping requirement' section),
+	goto step 7
+
+	6a. send 0xcc PS/2 command to FSP;
+
+	6b. send the swapped register address to FSP and goto step 8;
+
+	7. send 0x66 PS/2 command to FSP;
+
+	7a. send the original register address to FSP and goto step 8;
+
+	8. send 0xe9(status request) PS/2 command to FSP;
+
+	9. the response read from FSP should be the requested register value.
+
+Register writing sequence:
+
+	1. send 0xf3 PS/2 command to FSP;
+
+	2. if the register address being to write is not required to be
+	inverted(refer to the 'Register inversion requirement' section),
+	goto step 3
+
+	2a. send 0x74 PS/2 command to FSP;
+
+	2b. send the inverted register address to FSP and goto step 5;
+
+	3. if the register address being to write is not required to be
+	swapped(refer to the 'Register swapping requirement' section),
+	goto step 4
+
+	3a. send 0x77 PS/2 command to FSP;
+
+	3b. send the swapped register address to FSP and goto step 5;
+
+	4. send 0x55 PS/2 command to FSP;
+
+	4a. send the register address to FSP and goto step 5;
+
+	5. send 0xf3 PS/2 command to FSP;
+
+	6. if the register value being to write is not required to be
+	inverted(refer to the 'Register inversion requirement' section),
+	goto step 7
+
+	6a. send 0x47 PS/2 command to FSP;
+
+	6b. send the inverted register value to FSP and goto step 9;
+
+	7. if the register value being to write is not required to be
+	swapped(refer to the 'Register swapping requirement' section),
+	goto step 8
+
+	7a. send 0x44 PS/2 command to FSP;
+
+	7b. send the swapped register value to FSP and goto step 9;
+
+	8. send 0x33 PS/2 command to FSP;
+
+	8a. send the register value to FSP;
+
+	9. the register writing sequence is completed.
+
+==============================================================================
+* Register Listing
+==============================================================================
+
+offset	width		default	r/w	name
+0x00	bit7~bit0	0x01	RO	device ID
+
+0x01	bit7~bit0	0xc0	RW	version ID
+
+0x02	bit7~bit0	0x01	RO	vendor ID
+
+0x03	bit7~bit0	0x01	RO	product ID
+
+0x04	bit3~bit0	0x01	RW	revision ID
+
+0x0b				RO	test mode status 1
+	bit3		1	RO	0: rotate 180 degree, 1: no rotation
+
+	bit5~bit4		RO	number of buttons
+			11 => 2, lbtn/rbtn
+			10 => 4, lbtn/rbtn/scru/scrd
+			01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr
+			00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn
+
+0x0f				RW	register file page control
+	bit0		0	RW	1 to enable page 1 register files
+
+0x10				RW	system control 1
+	bit0		1	RW	Reserved, must be 1
+	bit1		0	RW	Reserved, must be 0
+	bit4		1	RW	Reserved, must be 0
+	bit5		0	RW	register clock gating enable
+					0: read only, 1: read/write enable
+	(Note that following registers does not require clock gating being
+	enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e
+	40 41 42 43.)
+
+0x31				RW	on-pad command detection
+	bit7		0	RW	on-pad command left button down tag
+					enable
+					0: disable, 1: enable
+
+0x34				RW	on-pad command control 5
+	bit4~bit0	0x05	RW	XLO in 0s/4/1, so 03h = 0010.1b = 2.5
+	(Note that position unit is in 0.5 scanline)
+
+	bit7		0	RW	on-pad tap zone enable
+					0: disable, 1: enable
+
+0x35				RW	on-pad command control 6
+	bit4~bit0	0x1d	RW	XHI in 0s/4/1, so 19h = 1100.1b = 12.5
+	(Note that position unit is in 0.5 scanline)
+
+0x36				RW	on-pad command control 7
+	bit4~bit0	0x04	RW	YLO in 0s/4/1, so 03h = 0010.1b = 2.5
+	(Note that position unit is in 0.5 scanline)
+
+0x37				RW	on-pad command control 8
+	bit4~bit0	0x13	RW	YHI in 0s/4/1, so 11h = 1000.1b = 8.5
+	(Note that position unit is in 0.5 scanline)
+
+0x40				RW	system control 5
+	bit1		0	RW	FSP Intellimouse mode enable
+					0: disable, 1: enable
+
+	bit2		0	RW	movement + abs. coordinate mode enable
+					0: disable, 1: enable
+	(Note that this function has the functionality of bit 1 even when
+	bit 1 is not set. However, the format is different from that of bit 1.
+	In addition, when bit 1 and bit 2 are set at the same time, bit 2 will
+	override bit 1.)
+
+	bit3		0	RW	abs. coordinate only mode enable
+					0: disable, 1: enable
+	(Note that this function has the functionality of bit 1 even when
+	bit 1 is not set. However, the format is different from that of bit 1.
+	In addition, when bit 1, bit 2 and bit 3 are set at the same time,
+	bit 3 will override bit 1 and 2.)
+
+	bit5		0	RW	auto switch enable
+					0: disable, 1: enable
+
+	bit6		0	RW	G0 abs. + notify packet format enable
+					0: disable, 1: enable
+	(Note that the absolute/relative coordinate output still depends on
+	bit 2 and 3.  That is, if any of those bit is 1, host will receive
+	absolute coordinates; otherwise, host only receives packets with
+	relative coordinate.)
+
+0x43				RW	on-pad control
+	bit0		0	RW	on-pad control enable
+					0: disable, 1: enable
+	(Note that if this bit is cleared, bit 3/5 will be ineffective)
+
+	bit3		0	RW	on-pad fix vertical scrolling enable
+					0: disable, 1: enable
+
+	bit5		0	RW	on-pad fix horizontal scrolling enable
+					0: disable, 1: enable
diff --git a/drivers/input/mouse/Kconfig b/drivers/input/mouse/Kconfig
index 90bef5d498f0..3feeb3af8abd 100644
--- a/drivers/input/mouse/Kconfig
+++ b/drivers/input/mouse/Kconfig
@@ -107,6 +107,14 @@ config MOUSE_PS2_ELANTECH
 	  entries. For further information,
 	  see <file:Documentation/input/elantech.txt>.
 
+config MOUSE_PS2_SENTELIC
+	bool "Sentelic Finger Sensing Pad PS/2 protocol extension"
+	depends on MOUSE_PS2
+	help
+	  Say Y here if you have a laptop (such as MSI WIND Netbook)
+	  with Sentelic Finger Sensing Pad touchpad.
+
+	  If unsure, say N.
 
 config MOUSE_PS2_TOUCHKIT
 	bool "eGalax TouchKit PS/2 protocol extension"
diff --git a/drivers/input/mouse/Makefile b/drivers/input/mouse/Makefile
index ea58c9a372b6..570c84a4a654 100644
--- a/drivers/input/mouse/Makefile
+++ b/drivers/input/mouse/Makefile
@@ -27,5 +27,6 @@ psmouse-$(CONFIG_MOUSE_PS2_ELANTECH)	+= elantech.o
 psmouse-$(CONFIG_MOUSE_PS2_OLPC)	+= hgpk.o
 psmouse-$(CONFIG_MOUSE_PS2_LOGIPS2PP)	+= logips2pp.o
 psmouse-$(CONFIG_MOUSE_PS2_LIFEBOOK)	+= lifebook.o
+psmouse-$(CONFIG_MOUSE_PS2_SENTELIC)	+= sentelic.o
 psmouse-$(CONFIG_MOUSE_PS2_TRACKPOINT)	+= trackpoint.o
 psmouse-$(CONFIG_MOUSE_PS2_TOUCHKIT)	+= touchkit_ps2.o
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index b407b355dceb..df318887ca09 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -30,6 +30,7 @@
 #include "trackpoint.h"
 #include "touchkit_ps2.h"
 #include "elantech.h"
+#include "sentelic.h"
 
 #define DRIVER_DESC	"PS/2 mouse driver"
 
@@ -666,6 +667,20 @@ static int psmouse_extensions(struct psmouse *psmouse,
 		max_proto = PSMOUSE_IMEX;
 	}
 
+/*
+ * Try Finger Sensing Pad
+ */
+	if (max_proto > PSMOUSE_IMEX) {
+		if (fsp_detect(psmouse, set_properties) == 0) {
+			if (!set_properties || fsp_init(psmouse) == 0)
+				return PSMOUSE_FSP;
+/*
+ * Init failed, try basic relative protocols
+ */
+			max_proto = PSMOUSE_IMEX;
+		}
+	}
+
 	if (max_proto > PSMOUSE_IMEX) {
 		if (genius_detect(psmouse, set_properties) == 0)
 			return PSMOUSE_GENPS;
@@ -813,7 +828,16 @@ static const struct psmouse_protocol psmouse_protocols[] = {
 		.detect		= elantech_detect,
 		.init		= elantech_init,
 	},
- #endif
+#endif
+#ifdef CONFIG_MOUSE_PS2_SENTELIC
+	{
+		.type		= PSMOUSE_FSP,
+		.name		= "FSPPS/2",
+		.alias		= "fsp",
+		.detect		= fsp_detect,
+		.init		= fsp_init,
+	},
+#endif
 	{
 		.type		= PSMOUSE_CORTRON,
 		.name		= "CortronPS/2",
diff --git a/drivers/input/mouse/psmouse.h b/drivers/input/mouse/psmouse.h
index e3562daeb2ed..cca1744c2a08 100644
--- a/drivers/input/mouse/psmouse.h
+++ b/drivers/input/mouse/psmouse.h
@@ -91,6 +91,7 @@ enum psmouse_type {
 	PSMOUSE_CORTRON,
 	PSMOUSE_HGPK,
 	PSMOUSE_ELANTECH,
+	PSMOUSE_FSP,
 	PSMOUSE_AUTO		/* This one should always be last */
 };
 
diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
new file mode 100644
index 000000000000..97b1e72855a0
--- /dev/null
+++ b/drivers/input/mouse/sentelic.c
@@ -0,0 +1,867 @@
+/*-
+ * Finger Sensing Pad PS/2 mouse driver.
+ *
+ * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
+ * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation; either version 2
+ *   of the License, or (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/input.h>
+#include <linux/ctype.h>
+#include <linux/libps2.h>
+#include <linux/serio.h>
+#include <linux/jiffies.h>
+
+#include "psmouse.h"
+#include "sentelic.h"
+
+/*
+ * Timeout for FSP PS/2 command only (in milliseconds).
+ */
+#define	FSP_CMD_TIMEOUT		200
+#define	FSP_CMD_TIMEOUT2	30
+
+/** Driver version. */
+static const char fsp_drv_ver[] = "1.0.0-K";
+
+/*
+ * Make sure that the value being sent to FSP will not conflict with
+ * possible sample rate values.
+ */
+static unsigned char fsp_test_swap_cmd(unsigned char reg_val)
+{
+	switch (reg_val) {
+	case 10: case 20: case 40: case 60: case 80: case 100: case 200:
+		/*
+		 * The requested value being sent to FSP matched to possible
+		 * sample rates, swap the given value such that the hardware
+		 * wouldn't get confused.
+		 */
+		return (reg_val >> 4) | (reg_val << 4);
+	default:
+		return reg_val;	/* swap isn't necessary */
+	}
+}
+
+/*
+ * Make sure that the value being sent to FSP will not conflict with certain
+ * commands.
+ */
+static unsigned char fsp_test_invert_cmd(unsigned char reg_val)
+{
+	switch (reg_val) {
+	case 0xe9: case 0xee: case 0xf2: case 0xff:
+		/*
+		 * The requested value being sent to FSP matched to certain
+		 * commands, inverse the given value such that the hardware
+		 * wouldn't get confused.
+		 */
+		return ~reg_val;
+	default:
+		return reg_val;	/* inversion isn't necessary */
+	}
+}
+
+static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char param[3];
+	unsigned char addr;
+	int rc = -1;
+
+	/*
+	 * We need to shut off the device and switch it into command
+	 * mode so we don't confuse our protocol handler. We don't need
+	 * to do that for writes because sysfs set helper does this for
+	 * us.
+	 */
+	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
+	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	/* should return 0xfe(request for resending) */
+	ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2);
+	/* should return 0xfc(failed) */
+	ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	if ((addr = fsp_test_invert_cmd(reg_addr)) != reg_addr) {
+		ps2_sendbyte(ps2dev, 0x68, FSP_CMD_TIMEOUT2);
+	} else if ((addr = fsp_test_swap_cmd(reg_addr)) != reg_addr) {
+		/* swapping is required */
+		ps2_sendbyte(ps2dev, 0xcc, FSP_CMD_TIMEOUT2);
+		/* expect 0xfe */
+	} else {
+		/* swapping isn't necessary */
+		ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2);
+		/* expect 0xfe */
+	}
+	/* should return 0xfc(failed) */
+	ps2_sendbyte(ps2dev, addr, FSP_CMD_TIMEOUT);
+
+	if (__ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO) < 0)
+		goto out;
+
+	*reg_val = param[2];
+	rc = 0;
+
+ out:
+	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
+	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
+	dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
+		reg_addr, *reg_val, rc);
+	return rc;
+}
+
+static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char v;
+	int rc = -1;
+
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	if ((v = fsp_test_invert_cmd(reg_addr)) != reg_addr) {
+		/* inversion is required */
+		ps2_sendbyte(ps2dev, 0x74, FSP_CMD_TIMEOUT2);
+	} else {
+		if ((v = fsp_test_swap_cmd(reg_addr)) != reg_addr) {
+			/* swapping is required */
+			ps2_sendbyte(ps2dev, 0x77, FSP_CMD_TIMEOUT2);
+		} else {
+			/* swapping isn't necessary */
+			ps2_sendbyte(ps2dev, 0x55, FSP_CMD_TIMEOUT2);
+		}
+	}
+	/* write the register address in correct order */
+	ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		return -1;
+
+	if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
+		/* inversion is required */
+		ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2);
+	} else if ((v = fsp_test_swap_cmd(reg_val)) != reg_val) {
+		/* swapping is required */
+		ps2_sendbyte(ps2dev, 0x44, FSP_CMD_TIMEOUT2);
+	} else {
+		/* swapping isn't necessary */
+		ps2_sendbyte(ps2dev, 0x33, FSP_CMD_TIMEOUT2);
+	}
+
+	/* write the register value in correct order */
+	ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2);
+	rc = 0;
+
+ out:
+	mutex_unlock(&ps2dev->cmd_mutex);
+	dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
+		reg_addr, reg_val, rc);
+	return rc;
+}
+
+/* Enable register clock gating for writing certain registers */
+static int fsp_reg_write_enable(struct psmouse *psmouse, bool enable)
+{
+	int v, nv;
+
+	if (fsp_reg_read(psmouse, FSP_REG_SYSCTL1, &v) == -1)
+		return -1;
+
+	if (enable)
+		nv = v | FSP_BIT_EN_REG_CLK;
+	else
+		nv = v & ~FSP_BIT_EN_REG_CLK;
+
+	/* only write if necessary */
+	if (nv != v)
+		if (fsp_reg_write(psmouse, FSP_REG_SYSCTL1, nv) == -1)
+			return -1;
+
+	return 0;
+}
+
+static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char param[3];
+	int rc = -1;
+
+	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
+	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	ps2_sendbyte(ps2dev, 0x66, FSP_CMD_TIMEOUT2);
+	ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	ps2_sendbyte(ps2dev, 0x83, FSP_CMD_TIMEOUT2);
+	ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
+
+	/* get the returned result */
+	if (__ps2_command(ps2dev, param, PSMOUSE_CMD_GETINFO))
+		goto out;
+
+	*reg_val = param[2];
+	rc = 0;
+
+ out:
+	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
+	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
+	dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
+		*reg_val, rc);
+	return rc;
+}
+
+static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
+{
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char v;
+	int rc = -1;
+
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		goto out;
+
+	ps2_sendbyte(ps2dev, 0x38, FSP_CMD_TIMEOUT2);
+	ps2_sendbyte(ps2dev, 0x88, FSP_CMD_TIMEOUT2);
+
+	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
+		return -1;
+
+	if ((v = fsp_test_invert_cmd(reg_val)) != reg_val) {
+		ps2_sendbyte(ps2dev, 0x47, FSP_CMD_TIMEOUT2);
+	} else if ((v = fsp_test_swap_cmd(reg_val)) != reg_val) {
+		/* swapping is required */
+		ps2_sendbyte(ps2dev, 0x44, FSP_CMD_TIMEOUT2);
+	} else {
+		/* swapping isn't necessary */
+		ps2_sendbyte(ps2dev, 0x33, FSP_CMD_TIMEOUT2);
+	}
+
+	ps2_sendbyte(ps2dev, v, FSP_CMD_TIMEOUT2);
+	rc = 0;
+
+ out:
+	mutex_unlock(&ps2dev->cmd_mutex);
+	dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
+		reg_val, rc);
+	return rc;
+}
+
+static int fsp_get_version(struct psmouse *psmouse, int *version)
+{
+	if (fsp_reg_read(psmouse, FSP_REG_VERSION, version))
+		return -EIO;
+
+	return 0;
+}
+
+static int fsp_get_revision(struct psmouse *psmouse, int *rev)
+{
+	if (fsp_reg_read(psmouse, FSP_REG_REVISION, rev))
+		return -EIO;
+
+	return 0;
+}
+
+static int fsp_get_buttons(struct psmouse *psmouse, int *btn)
+{
+	static const int buttons[] = {
+		0x16, /* Left/Middle/Right/Forward/Backward & Scroll Up/Down */
+		0x06, /* Left/Middle/Right & Scroll Up/Down/Right/Left */
+		0x04, /* Left/Middle/Right & Scroll Up/Down */
+		0x02, /* Left/Middle/Right */
+	};
+	int val;
+
+	if (fsp_reg_read(psmouse, FSP_REG_TMOD_STATUS1, &val) == -1)
+		return -EIO;
+
+	*btn = buttons[(val & 0x30) >> 4];
+	return 0;
+}
+
+/* Enable on-pad command tag output */
+static int fsp_opc_tag_enable(struct psmouse *psmouse, bool enable)
+{
+	int v, nv;
+	int res = 0;
+
+	if (fsp_reg_read(psmouse, FSP_REG_OPC_QDOWN, &v) == -1) {
+		dev_err(&psmouse->ps2dev.serio->dev, "Unable get OPC state.\n");
+		return -EIO;
+	}
+
+	if (enable)
+		nv = v | FSP_BIT_EN_OPC_TAG;
+	else
+		nv = v & ~FSP_BIT_EN_OPC_TAG;
+
+	/* only write if necessary */
+	if (nv != v) {
+		fsp_reg_write_enable(psmouse, true);
+		res = fsp_reg_write(psmouse, FSP_REG_OPC_QDOWN, nv);
+		fsp_reg_write_enable(psmouse, false);
+	}
+
+	if (res != 0) {
+		dev_err(&psmouse->ps2dev.serio->dev,
+			"Unable to enable OPC tag.\n");
+		res = -EIO;
+	}
+
+	return res;
+}
+
+static int fsp_onpad_vscr(struct psmouse *psmouse, bool enable)
+{
+	struct fsp_data *pad = psmouse->private;
+	int val;
+
+	if (fsp_reg_read(psmouse, FSP_REG_ONPAD_CTL, &val))
+		return -EIO;
+
+	pad->vscroll = enable;
+
+	if (enable)
+		val |= (FSP_BIT_FIX_VSCR | FSP_BIT_ONPAD_ENABLE);
+	else
+		val &= ~FSP_BIT_FIX_VSCR;
+
+	if (fsp_reg_write(psmouse, FSP_REG_ONPAD_CTL, val))
+		return -EIO;
+
+	return 0;
+}
+
+static int fsp_onpad_hscr(struct psmouse *psmouse, bool enable)
+{
+	struct fsp_data *pad = psmouse->private;
+	int val, v2;
+
+	if (fsp_reg_read(psmouse, FSP_REG_ONPAD_CTL, &val))
+		return -EIO;
+
+	if (fsp_reg_read(psmouse, FSP_REG_SYSCTL5, &v2))
+		return -EIO;
+
+	pad->hscroll = enable;
+
+	if (enable) {
+		val |= (FSP_BIT_FIX_HSCR | FSP_BIT_ONPAD_ENABLE);
+		v2 |= FSP_BIT_EN_MSID6;
+	} else {
+		val &= ~FSP_BIT_FIX_HSCR;
+		v2 &= ~(FSP_BIT_EN_MSID6 | FSP_BIT_EN_MSID7 | FSP_BIT_EN_MSID8);
+	}
+
+	if (fsp_reg_write(psmouse, FSP_REG_ONPAD_CTL, val))
+		return -EIO;
+
+	/* reconfigure horizontal scrolling packet output */
+	if (fsp_reg_write(psmouse, FSP_REG_SYSCTL5, v2))
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * Write device specific initial parameters.
+ *
+ * ex: 0xab 0xcd - write oxcd into register 0xab
+ */
+static ssize_t fsp_attr_set_setreg(struct psmouse *psmouse, void *data,
+				   const char *buf, size_t count)
+{
+	unsigned long reg, val;
+	char *rest;
+	ssize_t retval;
+
+	reg = simple_strtoul(buf, &rest, 16);
+	if (rest == buf || *rest != ' ' || reg > 0xff)
+		return -EINVAL;
+
+	if (strict_strtoul(rest + 1, 16, &val) || val > 0xff)
+		return -EINVAL;
+
+	if (fsp_reg_write_enable(psmouse, true))
+		return -EIO;
+
+	retval = fsp_reg_write(psmouse, reg, val) < 0 ? -EIO : count;
+
+	fsp_reg_write_enable(psmouse, false);
+
+	return count;
+}
+
+PSMOUSE_DEFINE_WO_ATTR(setreg, S_IWUSR, NULL, fsp_attr_set_setreg);
+
+static ssize_t fsp_attr_show_getreg(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	struct fsp_data *pad = psmouse->private;
+
+	return sprintf(buf, "%02x%02x\n", pad->last_reg, pad->last_val);
+}
+
+/*
+ * Read a register from device.
+ *
+ * ex: 0xab -- read content from register 0xab
+ */
+static ssize_t fsp_attr_set_getreg(struct psmouse *psmouse, void *data,
+					const char *buf, size_t count)
+{
+	struct fsp_data *pad = psmouse->private;
+	unsigned long reg;
+	int val;
+
+	if (strict_strtoul(buf, 16, &reg) || reg > 0xff)
+		return -EINVAL;
+
+	if (fsp_reg_read(psmouse, reg, &val))
+		return -EIO;
+
+	pad->last_reg = reg;
+	pad->last_val = val;
+
+	return count;
+}
+
+PSMOUSE_DEFINE_ATTR(getreg, S_IWUSR | S_IRUGO, NULL,
+			fsp_attr_show_getreg, fsp_attr_set_getreg);
+
+static ssize_t fsp_attr_show_pagereg(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	int val = 0;
+
+	if (fsp_page_reg_read(psmouse, &val))
+		return -EIO;
+
+	return sprintf(buf, "%02x\n", val);
+}
+
+static ssize_t fsp_attr_set_pagereg(struct psmouse *psmouse, void *data,
+					const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (strict_strtoul(buf, 16, &val) || val > 0xff)
+		return -EINVAL;
+
+	if (fsp_page_reg_write(psmouse, val))
+		return -EIO;
+
+	return count;
+}
+
+PSMOUSE_DEFINE_ATTR(page, S_IWUSR | S_IRUGO, NULL,
+			fsp_attr_show_pagereg, fsp_attr_set_pagereg);
+
+static ssize_t fsp_attr_show_vscroll(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	struct fsp_data *pad = psmouse->private;
+
+	return sprintf(buf, "%d\n", pad->vscroll);
+}
+
+static ssize_t fsp_attr_set_vscroll(struct psmouse *psmouse, void *data,
+					const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (strict_strtoul(buf, 10, &val) || val > 1)
+		return -EINVAL;
+
+	fsp_onpad_vscr(psmouse, val);
+
+	return count;
+}
+
+PSMOUSE_DEFINE_ATTR(vscroll, S_IWUSR | S_IRUGO, NULL,
+			fsp_attr_show_vscroll, fsp_attr_set_vscroll);
+
+static ssize_t fsp_attr_show_hscroll(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	struct fsp_data *pad = psmouse->private;
+
+	return sprintf(buf, "%d\n", pad->hscroll);
+}
+
+static ssize_t fsp_attr_set_hscroll(struct psmouse *psmouse, void *data,
+					const char *buf, size_t count)
+{
+	unsigned long val;
+
+	if (strict_strtoul(buf, 10, &val) || val > 1)
+		return -EINVAL;
+
+	fsp_onpad_hscr(psmouse, val);
+
+	return count;
+}
+
+PSMOUSE_DEFINE_ATTR(hscroll, S_IWUSR | S_IRUGO, NULL,
+			fsp_attr_show_hscroll, fsp_attr_set_hscroll);
+
+static ssize_t fsp_attr_show_flags(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	struct fsp_data *pad = psmouse->private;
+
+	return sprintf(buf, "%c\n",
+			pad->flags & FSPDRV_FLAG_EN_OPC ? 'C' : 'c');
+}
+
+static ssize_t fsp_attr_set_flags(struct psmouse *psmouse, void *data,
+					const char *buf, size_t count)
+{
+	struct fsp_data *pad = psmouse->private;
+	size_t i;
+
+	for (i = 0; i < count; i++) {
+		switch (buf[i]) {
+		case 'C':
+			pad->flags |= FSPDRV_FLAG_EN_OPC;
+			break;
+		case 'c':
+			pad->flags &= ~FSPDRV_FLAG_EN_OPC;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+	return count;
+}
+
+PSMOUSE_DEFINE_ATTR(flags, S_IWUSR | S_IRUGO, NULL,
+			fsp_attr_show_flags, fsp_attr_set_flags);
+
+static ssize_t fsp_attr_show_ver(struct psmouse *psmouse,
+					void *data, char *buf)
+{
+	return sprintf(buf, "Sentelic FSP kernel module %s\n", fsp_drv_ver);
+}
+
+PSMOUSE_DEFINE_RO_ATTR(ver, S_IRUGO, NULL, fsp_attr_show_ver);
+
+static struct attribute *fsp_attributes[] = {
+	&psmouse_attr_setreg.dattr.attr,
+	&psmouse_attr_getreg.dattr.attr,
+	&psmouse_attr_page.dattr.attr,
+	&psmouse_attr_vscroll.dattr.attr,
+	&psmouse_attr_hscroll.dattr.attr,
+	&psmouse_attr_flags.dattr.attr,
+	&psmouse_attr_ver.dattr.attr,
+	NULL
+};
+
+static struct attribute_group fsp_attribute_group = {
+	.attrs = fsp_attributes,
+};
+
+#ifdef FSP_DEBUG
+static void fsp_packet_debug(unsigned char packet[])
+{
+	static unsigned int ps2_packet_cnt;
+	static unsigned int ps2_last_second;
+	unsigned int jiffies_msec;
+
+	ps2_packet_cnt++;
+	jiffies_msec = jiffies_to_msecs(jiffies);
+	printk(KERN_DEBUG "%08dms PS/2 packets: %02x, %02x, %02x, %02x\n",
+		jiffies_msec, packet[0], packet[1], packet[2], packet[3]);
+
+	if (jiffies_msec - ps2_last_second > 1000) {
+		printk(KERN_DEBUG "PS/2 packets/sec = %d\n", ps2_packet_cnt);
+		ps2_packet_cnt = 0;
+		ps2_last_second = jiffies_msec;
+	}
+}
+#else
+static void fsp_packet_debug(unsigned char packet[])
+{
+}
+#endif
+
+static psmouse_ret_t fsp_process_byte(struct psmouse *psmouse)
+{
+	struct input_dev *dev = psmouse->dev;
+	struct fsp_data *ad = psmouse->private;
+	unsigned char *packet = psmouse->packet;
+	unsigned char button_status = 0, lscroll = 0, rscroll = 0;
+	int rel_x, rel_y;
+
+	if (psmouse->pktcnt < 4)
+		return PSMOUSE_GOOD_DATA;
+
+	/*
+	 * Full packet accumulated, process it
+	 */
+
+	switch (psmouse->packet[0] >> FSP_PKT_TYPE_SHIFT) {
+	case FSP_PKT_TYPE_ABS:
+		dev_warn(&psmouse->ps2dev.serio->dev,
+			 "Unexpected absolute mode packet, ignored.\n");
+		break;
+
+	case FSP_PKT_TYPE_NORMAL_OPC:
+		/* on-pad click, filter it if necessary */
+		if ((ad->flags & FSPDRV_FLAG_EN_OPC) != FSPDRV_FLAG_EN_OPC)
+			packet[0] &= ~BIT(0);
+		/* fall through */
+
+	case FSP_PKT_TYPE_NORMAL:
+		/* normal packet */
+		/* special packet data translation from on-pad packets */
+		if (packet[3] != 0) {
+			if (packet[3] & BIT(0))
+				button_status |= 0x01;	/* wheel down */
+			if (packet[3] & BIT(1))
+				button_status |= 0x0f;	/* wheel up */
+			if (packet[3] & BIT(2))
+				button_status |= BIT(5);/* horizontal left */
+			if (packet[3] & BIT(3))
+				button_status |= BIT(4);/* horizontal right */
+			/* push back to packet queue */
+			if (button_status != 0)
+				packet[3] = button_status;
+			rscroll = (packet[3] >> 4) & 1;
+			lscroll = (packet[3] >> 5) & 1;
+		}
+		/*
+		 * Processing wheel up/down and extra button events
+		 */
+		input_report_rel(dev, REL_WHEEL,
+				 (int)(packet[3] & 8) - (int)(packet[3] & 7));
+		input_report_rel(dev, REL_HWHEEL, lscroll - rscroll);
+		input_report_key(dev, BTN_BACK, lscroll);
+		input_report_key(dev, BTN_FORWARD, rscroll);
+
+		/*
+		 * Standard PS/2 Mouse
+		 */
+		input_report_key(dev, BTN_LEFT, packet[0] & 1);
+		input_report_key(dev, BTN_MIDDLE, (packet[0] >> 2) & 1);
+		input_report_key(dev, BTN_RIGHT, (packet[0] >> 1) & 1);
+
+		rel_x = packet[1] ? (int)packet[1] - (int)((packet[0] << 4) & 0x100) : 0;
+		rel_y = packet[2] ? (int)((packet[0] << 3) & 0x100) - (int)packet[2] : 0;
+
+		input_report_rel(dev, REL_X, rel_x);
+		input_report_rel(dev, REL_Y, rel_y);
+		break;
+	}
+
+	input_sync(dev);
+
+	fsp_packet_debug(packet);
+
+	return PSMOUSE_FULL_PACKET;
+}
+
+static int fsp_activate_protocol(struct psmouse *psmouse)
+{
+	struct fsp_data *pad = psmouse->private;
+	struct ps2dev *ps2dev = &psmouse->ps2dev;
+	unsigned char param[2];
+	int val;
+
+	/*
+	 * Standard procedure to enter FSP Intellimouse mode
+	 * (scrolling wheel, 4th and 5th buttons)
+	 */
+	param[0] = 200;
+	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
+	param[0] = 200;
+	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
+	param[0] =  80;
+	ps2_command(ps2dev, param, PSMOUSE_CMD_SETRATE);
+
+	ps2_command(ps2dev, param, PSMOUSE_CMD_GETID);
+	if (param[0] != 0x04) {
+		dev_err(&psmouse->ps2dev.serio->dev,
+			"Unable to enable 4 bytes packet format.\n");
+		return -EIO;
+	}
+
+	if (fsp_reg_read(psmouse, FSP_REG_SYSCTL5, &val)) {
+		dev_err(&psmouse->ps2dev.serio->dev,
+			"Unable to read SYSCTL5 register.\n");
+		return -EIO;
+	}
+
+	val &= ~(FSP_BIT_EN_MSID7 | FSP_BIT_EN_MSID8 | FSP_BIT_EN_AUTO_MSID8);
+	/* Ensure we are not in absolute mode */
+	val &= ~FSP_BIT_EN_PKT_G0;
+	if (pad->buttons == 0x06) {
+		/* Left/Middle/Right & Scroll Up/Down/Right/Left */
+		val |= FSP_BIT_EN_MSID6;
+	}
+
+	if (fsp_reg_write(psmouse, FSP_REG_SYSCTL5, val)) {
+		dev_err(&psmouse->ps2dev.serio->dev,
+			"Unable to set up required mode bits.\n");
+		return -EIO;
+	}
+
+	/*
+	 * Enable OPC tags such that driver can tell the difference between
+	 * on-pad and real button click
+	 */
+	if (fsp_opc_tag_enable(psmouse, true))
+		dev_warn(&psmouse->ps2dev.serio->dev,
+			 "Failed to enable OPC tag mode.\n");
+
+	/* Enable on-pad vertical and horizontal scrolling */
+	fsp_onpad_vscr(psmouse, true);
+	fsp_onpad_hscr(psmouse, true);
+
+	return 0;
+}
+
+int fsp_detect(struct psmouse *psmouse, int set_properties)
+{
+	int id;
+
+	if (fsp_reg_read(psmouse, FSP_REG_DEVICE_ID, &id))
+		return -EIO;
+
+	if (id != 0x01)
+		return -ENODEV;
+
+	if (set_properties) {
+		psmouse->vendor = "Sentelic";
+		psmouse->name = "FingerSensingPad";
+	}
+
+	return 0;
+}
+
+static void fsp_reset(struct psmouse *psmouse)
+{
+	fsp_opc_tag_enable(psmouse, false);
+	fsp_onpad_vscr(psmouse, false);
+	fsp_onpad_hscr(psmouse, false);
+}
+
+static void fsp_disconnect(struct psmouse *psmouse)
+{
+	sysfs_remove_group(&psmouse->ps2dev.serio->dev.kobj,
+			   &fsp_attribute_group);
+
+	fsp_reset(psmouse);
+	kfree(psmouse->private);
+}
+
+static int fsp_reconnect(struct psmouse *psmouse)
+{
+	int version;
+
+	if (fsp_detect(psmouse, 0))
+		return -ENODEV;
+
+	if (fsp_get_version(psmouse, &version))
+		return -ENODEV;
+
+	if (fsp_activate_protocol(psmouse))
+		return -EIO;
+
+	return 0;
+}
+
+int fsp_init(struct psmouse *psmouse)
+{
+	struct fsp_data *priv;
+	int ver, rev, buttons;
+	int error;
+
+	if (fsp_get_version(psmouse, &ver) ||
+	    fsp_get_revision(psmouse, &rev) ||
+	    fsp_get_buttons(psmouse, &buttons)) {
+		return -ENODEV;
+	}
+
+	printk(KERN_INFO
+		"Finger Sensing Pad, hw: %d.%d.%d, sw: %s, buttons: %d\n",
+		ver >> 4, ver & 0x0F, rev, fsp_drv_ver, buttons & 7);
+
+	psmouse->private = priv = kzalloc(sizeof(struct fsp_data), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->ver = ver;
+	priv->rev = rev;
+	priv->buttons = buttons;
+
+	/* enable on-pad click by default */
+	priv->flags |= FSPDRV_FLAG_EN_OPC;
+
+	/* Set up various supported input event bits */
+	__set_bit(BTN_BACK, psmouse->dev->keybit);
+	__set_bit(BTN_FORWARD, psmouse->dev->keybit);
+	__set_bit(REL_WHEEL, psmouse->dev->relbit);
+	__set_bit(REL_HWHEEL, psmouse->dev->relbit);
+
+	psmouse->protocol_handler = fsp_process_byte;
+	psmouse->disconnect = fsp_disconnect;
+	psmouse->reconnect = fsp_reconnect;
+	psmouse->cleanup = fsp_reset;
+	psmouse->pktsize = 4;
+
+	/* set default packet output based on number of buttons we found */
+	error = fsp_activate_protocol(psmouse);
+	if (error)
+		goto err_out;
+
+	error = sysfs_create_group(&psmouse->ps2dev.serio->dev.kobj,
+				   &fsp_attribute_group);
+	if (error) {
+		dev_err(&psmouse->ps2dev.serio->dev,
+			"Failed to create sysfs attributes (%d)", error);
+		goto err_out;
+	}
+
+	return 0;
+
+ err_out:
+	kfree(psmouse->private);
+	psmouse->private = NULL;
+	return error;
+}
diff --git a/drivers/input/mouse/sentelic.h b/drivers/input/mouse/sentelic.h
new file mode 100644
index 000000000000..083559c7282b
--- /dev/null
+++ b/drivers/input/mouse/sentelic.h
@@ -0,0 +1,98 @@
+/*-
+ * Finger Sensing Pad PS/2 mouse driver.
+ *
+ * Copyright (C) 2005-2007 Asia Vital Components Co., Ltd.
+ * Copyright (C) 2005-2009 Tai-hwa Liang, Sentelic Corporation.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation; either version 2
+ *   of the License, or (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with this program; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef	__SENTELIC_H
+#define	__SENTELIC_H
+
+/* Finger-sensing Pad information registers */
+#define	FSP_REG_DEVICE_ID	0x00
+#define	FSP_REG_VERSION		0x01
+#define	FSP_REG_REVISION	0x04
+#define	FSP_REG_TMOD_STATUS1	0x0B
+#define	FSP_BIT_NO_ROTATION	BIT(3)
+#define	FSP_REG_PAGE_CTRL	0x0F
+
+/* Finger-sensing Pad control registers */
+#define	FSP_REG_SYSCTL1		0x10
+#define	FSP_BIT_EN_REG_CLK	BIT(5)
+#define	FSP_REG_OPC_QDOWN	0x31
+#define	FSP_BIT_EN_OPC_TAG	BIT(7)
+#define	FSP_REG_OPTZ_XLO	0x34
+#define	FSP_REG_OPTZ_XHI	0x35
+#define	FSP_REG_OPTZ_YLO	0x36
+#define	FSP_REG_OPTZ_YHI	0x37
+#define	FSP_REG_SYSCTL5		0x40
+#define	FSP_BIT_90_DEGREE	BIT(0)
+#define	FSP_BIT_EN_MSID6	BIT(1)
+#define	FSP_BIT_EN_MSID7	BIT(2)
+#define	FSP_BIT_EN_MSID8	BIT(3)
+#define	FSP_BIT_EN_AUTO_MSID8	BIT(5)
+#define	FSP_BIT_EN_PKT_G0	BIT(6)
+
+#define	FSP_REG_ONPAD_CTL	0x43
+#define	FSP_BIT_ONPAD_ENABLE	BIT(0)
+#define	FSP_BIT_ONPAD_FBBB	BIT(1)
+#define	FSP_BIT_FIX_VSCR	BIT(3)
+#define	FSP_BIT_FIX_HSCR	BIT(5)
+#define	FSP_BIT_DRAG_LOCK	BIT(6)
+
+/* Finger-sensing Pad packet formating related definitions */
+
+/* absolute packet type */
+#define	FSP_PKT_TYPE_NORMAL	(0x00)
+#define	FSP_PKT_TYPE_ABS	(0x01)
+#define	FSP_PKT_TYPE_NOTIFY	(0x02)
+#define	FSP_PKT_TYPE_NORMAL_OPC	(0x03)
+#define	FSP_PKT_TYPE_SHIFT	(6)
+
+#ifdef __KERNEL__
+
+struct fsp_data {
+	unsigned char	ver;		/* hardware version */
+	unsigned char	rev;		/* hardware revison */
+	unsigned char	buttons;	/* Number of buttons */
+	unsigned int	flags;
+#define	FSPDRV_FLAG_EN_OPC	(0x001)	/* enable on-pad clicking */
+
+	bool		vscroll;	/* Vertical scroll zone enabled */
+	bool		hscroll;	/* Horizontal scroll zone enabled */
+
+	unsigned char	last_reg;	/* Last register we requested read from */
+	unsigned char	last_val;
+};
+
+#ifdef CONFIG_MOUSE_PS2_SENTELIC
+extern int fsp_detect(struct psmouse *psmouse, int set_properties);
+extern int fsp_init(struct psmouse *psmouse);
+#else
+inline int fsp_detect(struct psmouse *psmouse, int set_properties)
+{
+	return -ENOSYS;
+}
+inline int fsp_init(struct psmouse *psmouse)
+{
+	return -ENOSYS;
+}
+#endif
+
+#endif	/* __KERNEL__ */
+
+#endif	/* !__SENTELIC_H */
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index be5bbbb8ae4e..3a95b508bf27 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -161,7 +161,7 @@ static int ps2_adjust_timeout(struct ps2dev *ps2dev, int command, int timeout)
  * ps2_command() can only be called from a process context
  */
 
-int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
+int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 {
 	int timeout;
 	int send = (command >> 12) & 0xf;
@@ -179,8 +179,6 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 		return -1;
 	}
 
-	mutex_lock(&ps2dev->cmd_mutex);
-
 	serio_pause_rx(ps2dev->serio);
 	ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0;
 	ps2dev->cmdcnt = receive;
@@ -231,7 +229,18 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 	ps2dev->flags = 0;
 	serio_continue_rx(ps2dev->serio);
 
+	return rc;
+}
+EXPORT_SYMBOL(__ps2_command);
+
+int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
+{
+	int rc;
+
+	mutex_lock(&ps2dev->cmd_mutex);
+	rc = __ps2_command(ps2dev, param, command);
 	mutex_unlock(&ps2dev->cmd_mutex);
+
 	return rc;
 }
 EXPORT_SYMBOL(ps2_command);
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index b94534b7e266..fcf5fbe6a50c 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -44,6 +44,7 @@ struct ps2dev {
 void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
 int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
 void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
 int ps2_handle_response(struct ps2dev *ps2dev, unsigned char data);
-- 
cgit v1.2.3


From 3ac64beecd27400d12cc7afb4108eef26c499f6a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 17 Aug 2009 16:16:53 +0200
Subject: mac80211: allow configure_filter callback to sleep

Over time, a whole bunch of drivers have come up
with their own scheme to delay the configure_filter
operation to a workqueue. To be able to simplify
things, allow configure_filter to sleep, and add
a new prepare_multicast callback that drivers that
need the multicast address list implement. This new
callback must be atomic, but most drivers either
don't care or just calculate a hash which can be
done atomically and then uploaded to the hardware
non-atomically.

A cursory look suggests that at76c50x-usb, ar9170,
mwl8k (which is actually very broken now), rt2x00,
wl1251, wl1271 and zd1211 should make use of this
new capability.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/adm8211.c             | 42 +++++++++++++-------
 drivers/net/wireless/at76c50x-usb.c        |  7 ++--
 drivers/net/wireless/ath/ar9170/main.c     | 43 +++++++++++---------
 drivers/net/wireless/ath/ath5k/base.c      | 64 ++++++++++++++++++------------
 drivers/net/wireless/ath/ath9k/main.c      |  3 +-
 drivers/net/wireless/b43/main.c            |  2 +-
 drivers/net/wireless/b43legacy/main.c      |  4 +-
 drivers/net/wireless/iwlwifi/iwl-core.c    |  2 +-
 drivers/net/wireless/iwlwifi/iwl-core.h    |  3 +-
 drivers/net/wireless/libertas_tf/main.c    | 37 +++++++++++------
 drivers/net/wireless/mac80211_hwsim.c      |  4 +-
 drivers/net/wireless/mwl8k.c               | 34 ++++++++++++----
 drivers/net/wireless/p54/main.c            |  2 +-
 drivers/net/wireless/rt2x00/rt2x00.h       |  2 +-
 drivers/net/wireless/rt2x00/rt2x00mac.c    |  2 +-
 drivers/net/wireless/rtl818x/rtl8180_dev.c | 11 ++++-
 drivers/net/wireless/rtl818x/rtl8187_dev.c | 11 ++++-
 drivers/net/wireless/wl12xx/wl1251_main.c  |  4 +-
 drivers/net/wireless/wl12xx/wl1271_main.c  |  4 +-
 drivers/net/wireless/zd1211rw/zd_mac.c     | 44 +++++++++++++-------
 include/net/mac80211.h                     | 21 +++++++---
 net/mac80211/driver-ops.h                  | 24 +++++++++--
 net/mac80211/driver-trace.h                | 36 ++++++++++++++---
 net/mac80211/ieee80211_i.h                 |  3 ++
 net/mac80211/iface.c                       | 15 ++-----
 net/mac80211/main.c                        | 24 ++++++++---
 net/mac80211/scan.c                        | 16 +-------
 net/mac80211/util.c                        |  2 -
 28 files changed, 297 insertions(+), 169 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 5695911bc602..b80f514877d8 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1328,16 +1328,39 @@ static void adm8211_bss_info_changed(struct ieee80211_hw *dev,
 	}
 }
 
+static u64 adm8211_prepare_multicast(struct ieee80211_hw *hw,
+				     int mc_count, struct dev_addr_list *mclist)
+{
+	unsigned int bit_nr, i;
+	u32 mc_filter[2];
+
+	mc_filter[1] = mc_filter[0] = 0;
+
+	for (i = 0; i < mc_count; i++) {
+		if (!mclist)
+			break;
+		bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+
+		bit_nr &= 0x3F;
+		mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
+		mclist = mclist->next;
+	}
+
+	return mc_filter[0] | ((u64)(mc_filter[1]) << 32);
+}
+
 static void adm8211_configure_filter(struct ieee80211_hw *dev,
 				     unsigned int changed_flags,
 				     unsigned int *total_flags,
-				     int mc_count, struct dev_mc_list *mclist)
+				     u64 multicast)
 {
 	static const u8 bcast[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
 	struct adm8211_priv *priv = dev->priv;
-	unsigned int bit_nr, new_flags;
+	unsigned int new_flags;
 	u32 mc_filter[2];
-	int i;
+
+	mc_filter[0] = multicast;
+	mc_filter[1] = multicast >> 32;
 
 	new_flags = 0;
 
@@ -1346,23 +1369,13 @@ static void adm8211_configure_filter(struct ieee80211_hw *dev,
 		priv->nar |= ADM8211_NAR_PR;
 		priv->nar &= ~ADM8211_NAR_MM;
 		mc_filter[1] = mc_filter[0] = ~0;
-	} else if ((*total_flags & FIF_ALLMULTI) || (mc_count > 32)) {
+	} else if (*total_flags & FIF_ALLMULTI || multicast == ~(0ULL)) {
 		new_flags |= FIF_ALLMULTI;
 		priv->nar &= ~ADM8211_NAR_PR;
 		priv->nar |= ADM8211_NAR_MM;
 		mc_filter[1] = mc_filter[0] = ~0;
 	} else {
 		priv->nar &= ~(ADM8211_NAR_MM | ADM8211_NAR_PR);
-		mc_filter[1] = mc_filter[0] = 0;
-		for (i = 0; i < mc_count; i++) {
-			if (!mclist)
-				break;
-			bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
-
-			bit_nr &= 0x3F;
-			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
-			mclist = mclist->next;
-		}
 	}
 
 	ADM8211_IDLE_RX();
@@ -1757,6 +1770,7 @@ static const struct ieee80211_ops adm8211_ops = {
 	.remove_interface	= adm8211_remove_interface,
 	.config			= adm8211_config,
 	.bss_info_changed	= adm8211_bss_info_changed,
+	.prepare_multicast	= adm8211_prepare_multicast,
 	.configure_filter	= adm8211_configure_filter,
 	.get_stats		= adm8211_get_stats,
 	.get_tx_stats		= adm8211_get_tx_stats,
diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 7218dbabad3e..a6e19545ac6a 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1997,15 +1997,14 @@ static void at76_bss_info_changed(struct ieee80211_hw *hw,
 /* must be atomic */
 static void at76_configure_filter(struct ieee80211_hw *hw,
 				  unsigned int changed_flags,
-				  unsigned int *total_flags, int mc_count,
-				  struct dev_addr_list *mc_list)
+				  unsigned int *total_flags, u64 multicast)
 {
 	struct at76_priv *priv = hw->priv;
 	int flags;
 
 	at76_dbg(DBG_MAC80211, "%s(): changed_flags=0x%08x "
-		 "total_flags=0x%08x mc_count=%d",
-		 __func__, changed_flags, *total_flags, mc_count);
+		 "total_flags=0x%08x",
+		 __func__, changed_flags, *total_flags);
 
 	flags = changed_flags & AT76_SUPPORTED_FILTERS;
 	*total_flags = AT76_SUPPORTED_FILTERS;
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index ea8c9419336d..6a9462e4fd87 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2100,10 +2100,29 @@ unlock:
 	mutex_unlock(&ar->mutex);
 }
 
+static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
+				       struct dev_addr_list *mclist)
+{
+	u64 mchash;
+	int i;
+
+	/* always get broadcast frames */
+	mchash = 1ULL << (0xff >> 2);
+
+	for (i = 0; i < mc_count; i++) {
+		if (WARN_ON(!mclist))
+			break;
+		mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
+		mclist = mclist->next;
+	}
+
+	return mchash;
+}
+
 static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
 				       unsigned int changed_flags,
 				       unsigned int *new_flags,
-				       int mc_count, struct dev_mc_list *mclist)
+				       u64 multicast)
 {
 	struct ar9170 *ar = hw->priv;
 
@@ -2116,24 +2135,11 @@ static void ar9170_op_configure_filter(struct ieee80211_hw *hw,
 	 * then checking the error flags, later.
 	 */
 
-	if (changed_flags & FIF_ALLMULTI) {
-		if (*new_flags & FIF_ALLMULTI) {
-			ar->want_mc_hash = ~0ULL;
-		} else {
-			u64 mchash;
-			int i;
-
-			/* always get broadcast frames */
-			mchash = 1ULL << (0xff >> 2);
+	if (changed_flags & FIF_ALLMULTI && *new_flags & FIF_ALLMULTI)
+			multicast = ~0ULL;
 
-			for (i = 0; i < mc_count; i++) {
-				if (WARN_ON(!mclist))
-					break;
-				mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
-				mclist = mclist->next;
-			}
-		ar->want_mc_hash = mchash;
-		}
+	if (multicast != ar->want_mc_hash) {
+		ar->want_mc_hash = multicast;
 		set_bit(AR9170_FILTER_CHANGED_MULTICAST, &ar->filter_changed);
 	}
 
@@ -2543,6 +2549,7 @@ static const struct ieee80211_ops ar9170_ops = {
 	.add_interface		= ar9170_op_add_interface,
 	.remove_interface	= ar9170_op_remove_interface,
 	.config			= ar9170_op_config,
+	.prepare_multicast	= ar9170_op_prepare_multicast,
 	.configure_filter	= ar9170_op_configure_filter,
 	.conf_tx		= ar9170_conf_tx,
 	.bss_info_changed	= ar9170_op_bss_info_changed,
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 2b3cf39dd4b1..3951b5b13424 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -229,10 +229,12 @@ static int ath5k_add_interface(struct ieee80211_hw *hw,
 static void ath5k_remove_interface(struct ieee80211_hw *hw,
 		struct ieee80211_if_init_conf *conf);
 static int ath5k_config(struct ieee80211_hw *hw, u32 changed);
+static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
+				   int mc_count, struct dev_addr_list *mc_list);
 static void ath5k_configure_filter(struct ieee80211_hw *hw,
 		unsigned int changed_flags,
 		unsigned int *new_flags,
-		int mc_count, struct dev_mc_list *mclist);
+		u64 multicast);
 static int ath5k_set_key(struct ieee80211_hw *hw,
 		enum set_key_cmd cmd,
 		struct ieee80211_vif *vif, struct ieee80211_sta *sta,
@@ -260,6 +262,7 @@ static const struct ieee80211_ops ath5k_hw_ops = {
 	.add_interface 	= ath5k_add_interface,
 	.remove_interface = ath5k_remove_interface,
 	.config 	= ath5k_config,
+	.prepare_multicast = ath5k_prepare_multicast,
 	.configure_filter = ath5k_configure_filter,
 	.set_key 	= ath5k_set_key,
 	.get_stats 	= ath5k_get_stats,
@@ -2853,6 +2856,37 @@ unlock:
 	return ret;
 }
 
+static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
+				   int mc_count, struct dev_addr_list *mclist)
+{
+	u32 mfilt[2], val;
+	int i;
+	u8 pos;
+
+	mfilt[0] = 0;
+	mfilt[1] = 1;
+
+	for (i = 0; i < mc_count; i++) {
+		if (!mclist)
+			break;
+		/* calculate XOR of eight 6-bit values */
+		val = get_unaligned_le32(mclist->dmi_addr + 0);
+		pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
+		val = get_unaligned_le32(mclist->dmi_addr + 3);
+		pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
+		pos &= 0x3f;
+		mfilt[pos / 32] |= (1 << (pos % 32));
+		/* XXX: we might be able to just do this instead,
+		* but not sure, needs testing, if we do use this we'd
+		* neet to inform below to not reset the mcast */
+		/* ath5k_hw_set_mcast_filterindex(ah,
+		 *      mclist->dmi_addr[5]); */
+		mclist = mclist->next;
+	}
+
+	return ((u64)(mfilt[1]) << 32) | mfilt[0];
+}
+
 #define SUPPORTED_FIF_FLAGS \
 	FIF_PROMISC_IN_BSS |  FIF_ALLMULTI | FIF_FCSFAIL | \
 	FIF_PLCPFAIL | FIF_CONTROL | FIF_OTHER_BSS | \
@@ -2878,16 +2912,14 @@ unlock:
 static void ath5k_configure_filter(struct ieee80211_hw *hw,
 		unsigned int changed_flags,
 		unsigned int *new_flags,
-		int mc_count, struct dev_mc_list *mclist)
+		u64 multicast)
 {
 	struct ath5k_softc *sc = hw->priv;
 	struct ath5k_hw *ah = sc->ah;
-	u32 mfilt[2], val, rfilt;
-	u8 pos;
-	int i;
+	u32 mfilt[2], rfilt;
 
-	mfilt[0] = 0;
-	mfilt[1] = 0;
+	mfilt[0] = multicast;
+	mfilt[1] = multicast >> 32;
 
 	/* Only deal with supported flags */
 	changed_flags &= SUPPORTED_FIF_FLAGS;
@@ -2913,24 +2945,6 @@ static void ath5k_configure_filter(struct ieee80211_hw *hw,
 	if (*new_flags & FIF_ALLMULTI) {
 		mfilt[0] =  ~0;
 		mfilt[1] =  ~0;
-	} else {
-		for (i = 0; i < mc_count; i++) {
-			if (!mclist)
-				break;
-			/* calculate XOR of eight 6-bit values */
-			val = get_unaligned_le32(mclist->dmi_addr + 0);
-			pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
-			val = get_unaligned_le32(mclist->dmi_addr + 3);
-			pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
-			pos &= 0x3f;
-			mfilt[pos / 32] |= (1 << (pos % 32));
-			/* XXX: we might be able to just do this instead,
-			* but not sure, needs testing, if we do use this we'd
-			* neet to inform below to not reset the mcast */
-			/* ath5k_hw_set_mcast_filterindex(ah,
-			 *      mclist->dmi_addr[5]); */
-			mclist = mclist->next;
-		}
 	}
 
 	/* This is the best we can do */
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 3e09b9ac165b..2f9c149fd481 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -2394,8 +2394,7 @@ skip_chan_change:
 static void ath9k_configure_filter(struct ieee80211_hw *hw,
 				   unsigned int changed_flags,
 				   unsigned int *total_flags,
-				   int mc_count,
-				   struct dev_mc_list *mclist)
+				   u64 multicast)
 {
 	struct ath_wiphy *aphy = hw->priv;
 	struct ath_softc *sc = aphy->sc;
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index c5bece090420..78ddbc7f836b 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -3679,7 +3679,7 @@ out_unlock:
 
 static void b43_op_configure_filter(struct ieee80211_hw *hw,
 				    unsigned int changed, unsigned int *fflags,
-				    int mc_count, struct dev_addr_list *mc_list)
+				    u64 multicast)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct b43_wldev *dev = wl->current_dev;
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index b1435594921a..b166a6f9f055 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -2836,9 +2836,7 @@ static void b43legacy_op_bss_info_changed(struct ieee80211_hw *hw,
 
 static void b43legacy_op_configure_filter(struct ieee80211_hw *hw,
 					  unsigned int changed,
-					  unsigned int *fflags,
-					  int mc_count,
-					  struct dev_addr_list *mc_list)
+					  unsigned int *fflags,u64 multicast)
 {
 	struct b43legacy_wl *wl = hw_to_b43legacy_wl(hw);
 	struct b43legacy_wldev *dev = wl->current_dev;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c
index c0efa6623c09..f1f6dabd8fbd 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.c
+++ b/drivers/net/wireless/iwlwifi/iwl-core.c
@@ -1514,7 +1514,7 @@ EXPORT_SYMBOL(iwl_irq_handle_error);
 void iwl_configure_filter(struct ieee80211_hw *hw,
 			  unsigned int changed_flags,
 			  unsigned int *total_flags,
-			  int mc_count, struct dev_addr_list *mc_list)
+			  u64 multicast)
 {
 	struct iwl_priv *priv = hw->priv;
 	__le32 *filter_flags = &priv->staging_rxon.filter_flags;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index 4ca025a34daf..62d90364b61d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -282,8 +282,7 @@ int iwl_set_decrypted_flag(struct iwl_priv *priv,
 void iwl_irq_handle_error(struct iwl_priv *priv);
 void iwl_configure_filter(struct ieee80211_hw *hw,
 			  unsigned int changed_flags,
-			  unsigned int *total_flags,
-			  int mc_count, struct dev_addr_list *mc_list);
+			  unsigned int *total_flags, u64 multicast);
 int iwl_hw_nic_init(struct iwl_priv *priv);
 int iwl_setup_mac(struct iwl_priv *priv);
 int iwl_set_hw_params(struct iwl_priv *priv);
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 4872345a2f61..019431d2f8a9 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -366,15 +366,35 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed)
 	return 0;
 }
 
+static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw,
+				     int mc_count, struct dev_addr_list *mclist)
+{
+	struct lbtf_private *priv = hw->priv;
+	int i;
+
+	if (!mc_count || mc_count > MRVDRV_MAX_MULTICAST_LIST_SIZE)
+		return mc_count;
+
+	priv->nr_of_multicastmacaddr = mc_count;
+	for (i = 0; i < mc_count; i++) {
+		if (!mclist)
+			break;
+		memcpy(&priv->multicastlist[i], mclist->da_addr,
+				ETH_ALEN);
+		mclist = mclist->next;
+	}
+
+	return mc_count;
+}
+
 #define SUPPORTED_FIF_FLAGS  (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)
 static void lbtf_op_configure_filter(struct ieee80211_hw *hw,
 			unsigned int changed_flags,
 			unsigned int *new_flags,
-			int mc_count, struct dev_mc_list *mclist)
+			u64 multicast)
 {
 	struct lbtf_private *priv = hw->priv;
 	int old_mac_control = priv->mac_control;
-	int i;
 	changed_flags &= SUPPORTED_FIF_FLAGS;
 	*new_flags &= SUPPORTED_FIF_FLAGS;
 
@@ -386,20 +406,12 @@ static void lbtf_op_configure_filter(struct ieee80211_hw *hw,
 	else
 		priv->mac_control &= ~CMD_ACT_MAC_PROMISCUOUS_ENABLE;
 	if (*new_flags & (FIF_ALLMULTI) ||
-	    mc_count > MRVDRV_MAX_MULTICAST_LIST_SIZE) {
+	    multicast > MRVDRV_MAX_MULTICAST_LIST_SIZE) {
 		priv->mac_control |= CMD_ACT_MAC_ALL_MULTICAST_ENABLE;
 		priv->mac_control &= ~CMD_ACT_MAC_MULTICAST_ENABLE;
-	} else if (mc_count) {
+	} else if (multicast) {
 		priv->mac_control |= CMD_ACT_MAC_MULTICAST_ENABLE;
 		priv->mac_control &= ~CMD_ACT_MAC_ALL_MULTICAST_ENABLE;
-		priv->nr_of_multicastmacaddr = mc_count;
-		for (i = 0; i < mc_count; i++) {
-			if (!mclist)
-				break;
-			memcpy(&priv->multicastlist[i], mclist->da_addr,
-					ETH_ALEN);
-			mclist = mclist->next;
-		}
 		lbtf_cmd_set_mac_multicast_addr(priv);
 	} else {
 		priv->mac_control &= ~(CMD_ACT_MAC_MULTICAST_ENABLE |
@@ -461,6 +473,7 @@ static const struct ieee80211_ops lbtf_ops = {
 	.add_interface		= lbtf_op_add_interface,
 	.remove_interface	= lbtf_op_remove_interface,
 	.config			= lbtf_op_config,
+	.prepare_multicast	= lbtf_op_prepare_multicast,
 	.configure_filter	= lbtf_op_configure_filter,
 	.bss_info_changed	= lbtf_op_bss_info_changed,
 };
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 930f5c7da4a6..6f6cd43592c8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -582,9 +582,7 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 
 static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw,
 					    unsigned int changed_flags,
-					    unsigned int *total_flags,
-					    int mc_count,
-					    struct dev_addr_list *mc_list)
+					    unsigned int *total_flags,u64 multicast)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
 
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 8a6d3afe4122..f84387083e73 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3251,31 +3251,50 @@ mwl8k_configure_filter_exit:
 	return rc;
 }
 
+static u64 mwl8k_prepare_multicast(struct ieee80211_hw *hw,
+				   int mc_count, struct dev_addr_list *mclist)
+{
+	struct mwl8k_configure_filter_worker *worker;
+
+	worker = kzalloc(sizeof(*worker), GFP_ATOMIC);
+
+	if (!worker)
+		return 0;
+
+	/*
+	 * XXX: This is _HORRIBLY_ broken!!
+	 *
+	 *	No locking, the mclist pointer might be invalid as soon as this
+	 *	function returns, something in the list might be invalidated
+	 *	once we get to the worker, etc...
+	 */
+	worker->mc_count = mc_count;
+	worker->mclist = mclist;
+
+	return (u64)worker;
+}
+
 static void mwl8k_configure_filter(struct ieee80211_hw *hw,
 				   unsigned int changed_flags,
 				   unsigned int *total_flags,
-				   int mc_count,
-				   struct dev_addr_list *mclist)
+				   u64 multicast)
 {
 
-	struct mwl8k_configure_filter_worker *worker;
+	struct mwl8k_configure_filter_worker *worker = (void *)multicast;
 	struct mwl8k_priv *priv = hw->priv;
 
 	/* Clear unsupported feature flags */
 	*total_flags &= MWL8K_SUPPORTED_IF_FLAGS;
 
-	if (!(changed_flags & MWL8K_SUPPORTED_IF_FLAGS) && !mc_count)
+	if (!(changed_flags & MWL8K_SUPPORTED_IF_FLAGS))
 		return;
 
-	worker = kzalloc(sizeof(*worker), GFP_ATOMIC);
 	if (worker == NULL)
 		return;
 
 	worker->header.options = MWL8K_WQ_QUEUE_ONLY | MWL8K_WQ_TX_WAIT_EMPTY;
 	worker->changed_flags = changed_flags;
 	worker->total_flags = total_flags;
-	worker->mc_count = mc_count;
-	worker->mclist = mclist;
 
 	mwl8k_queue_work(hw, &worker->header, priv->config_wq,
 			 mwl8k_configure_filter_wt);
@@ -3441,6 +3460,7 @@ static const struct ieee80211_ops mwl8k_ops = {
 	.remove_interface	= mwl8k_remove_interface,
 	.config			= mwl8k_config,
 	.bss_info_changed	= mwl8k_bss_info_changed,
+	.prepare_multicast	= mwl8k_prepare_multicast,
 	.configure_filter	= mwl8k_configure_filter,
 	.set_rts_threshold	= mwl8k_set_rts_threshold,
 	.conf_tx		= mwl8k_conf_tx,
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 77203e346cda..4d486bf9f725 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -302,7 +302,7 @@ out:
 static void p54_configure_filter(struct ieee80211_hw *dev,
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
-				 int mc_count, struct dev_mc_list *mclist)
+				 u64 multicast)
 {
 	struct p54_common *priv = dev->priv;
 
diff --git a/drivers/net/wireless/rt2x00/rt2x00.h b/drivers/net/wireless/rt2x00/rt2x00.h
index 99e89596cef6..39d7d9baafdd 100644
--- a/drivers/net/wireless/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/rt2x00/rt2x00.h
@@ -978,7 +978,7 @@ int rt2x00mac_config(struct ieee80211_hw *hw, u32 changed);
 void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
 				unsigned int changed_flags,
 				unsigned int *total_flags,
-				int mc_count, struct dev_addr_list *mc_list);
+				u64 multicast);
 int rt2x00mac_set_tim(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
 		      bool set);
 #ifdef CONFIG_RT2X00_LIB_CRYPTO
diff --git a/drivers/net/wireless/rt2x00/rt2x00mac.c b/drivers/net/wireless/rt2x00/rt2x00mac.c
index cb7b6d459331..602f12699718 100644
--- a/drivers/net/wireless/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/rt2x00/rt2x00mac.c
@@ -379,7 +379,7 @@ EXPORT_SYMBOL_GPL(rt2x00mac_config);
 void rt2x00mac_configure_filter(struct ieee80211_hw *hw,
 				unsigned int changed_flags,
 				unsigned int *total_flags,
-				int mc_count, struct dev_addr_list *mc_list)
+				u64 multicast)
 {
 	struct rt2x00_dev *rt2x00dev = hw->priv;
 
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index 09f46abc730a..16429c49139c 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -728,10 +728,16 @@ static void rtl8180_bss_info_changed(struct ieee80211_hw *dev,
 	        priv->rf->conf_erp(dev, info);
 }
 
+static u64 rtl8180_prepare_multicast(struct ieee80211_hw *dev, int mc_count,
+				     struct dev_addr_list *mc_list)
+{
+	return mc_count;
+}
+
 static void rtl8180_configure_filter(struct ieee80211_hw *dev,
 				     unsigned int changed_flags,
 				     unsigned int *total_flags,
-				     int mc_count, struct dev_addr_list *mclist)
+				     u64 multicast)
 {
 	struct rtl8180_priv *priv = dev->priv;
 
@@ -741,7 +747,7 @@ static void rtl8180_configure_filter(struct ieee80211_hw *dev,
 		priv->rx_conf ^= RTL818X_RX_CONF_CTRL;
 	if (changed_flags & FIF_OTHER_BSS)
 		priv->rx_conf ^= RTL818X_RX_CONF_MONITOR;
-	if (*total_flags & FIF_ALLMULTI || mc_count > 0)
+	if (*total_flags & FIF_ALLMULTI || multicast > 0)
 		priv->rx_conf |= RTL818X_RX_CONF_MULTICAST;
 	else
 		priv->rx_conf &= ~RTL818X_RX_CONF_MULTICAST;
@@ -768,6 +774,7 @@ static const struct ieee80211_ops rtl8180_ops = {
 	.remove_interface	= rtl8180_remove_interface,
 	.config			= rtl8180_config,
 	.bss_info_changed	= rtl8180_bss_info_changed,
+	.prepare_multicast	= rtl8180_prepare_multicast,
 	.configure_filter	= rtl8180_configure_filter,
 };
 
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index 53f57dc52226..90f38357393c 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -1192,10 +1192,16 @@ static void rtl8187_bss_info_changed(struct ieee80211_hw *dev,
 				 info->use_short_preamble);
 }
 
+static u64 rtl8187_prepare_multicast(struct ieee80211_hw *dev,
+				     int mc_count, struct dev_addr_list *mc_list)
+{
+	return mc_count;
+}
+
 static void rtl8187_configure_filter(struct ieee80211_hw *dev,
 				     unsigned int changed_flags,
 				     unsigned int *total_flags,
-				     int mc_count, struct dev_addr_list *mclist)
+				     u64 multicast)
 {
 	struct rtl8187_priv *priv = dev->priv;
 
@@ -1205,7 +1211,7 @@ static void rtl8187_configure_filter(struct ieee80211_hw *dev,
 		priv->rx_conf ^= RTL818X_RX_CONF_CTRL;
 	if (changed_flags & FIF_OTHER_BSS)
 		priv->rx_conf ^= RTL818X_RX_CONF_MONITOR;
-	if (*total_flags & FIF_ALLMULTI || mc_count > 0)
+	if (*total_flags & FIF_ALLMULTI || multicast > 0)
 		priv->rx_conf |= RTL818X_RX_CONF_MULTICAST;
 	else
 		priv->rx_conf &= ~RTL818X_RX_CONF_MULTICAST;
@@ -1268,6 +1274,7 @@ static const struct ieee80211_ops rtl8187_ops = {
 	.remove_interface	= rtl8187_remove_interface,
 	.config			= rtl8187_config,
 	.bss_info_changed	= rtl8187_bss_info_changed,
+	.prepare_multicast	= rtl8187_prepare_multicast,
 	.configure_filter	= rtl8187_configure_filter,
 	.conf_tx		= rtl8187_conf_tx
 };
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 7148934e464d..5809ef5b18f8 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -652,9 +652,7 @@ out:
 
 static void wl1251_op_configure_filter(struct ieee80211_hw *hw,
 				       unsigned int changed,
-				       unsigned int *total,
-				       int mc_count,
-				       struct dev_addr_list *mc_list)
+				       unsigned int *total,u64 multicast)
 {
 	struct wl1251 *wl = hw->priv;
 
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 4102d590b798..754be8179307 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -793,9 +793,7 @@ out:
 
 static void wl1271_op_configure_filter(struct ieee80211_hw *hw,
 				       unsigned int changed,
-				       unsigned int *total,
-				       int mc_count,
-				       struct dev_addr_list *mc_list)
+				       unsigned int *total,u64 multicast)
 {
 	struct wl1271 *wl = hw->priv;
 
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 55b7fbdc85dc..6d666359a42f 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -796,18 +796,40 @@ static void set_rx_filter_handler(struct work_struct *work)
 		dev_err(zd_mac_dev(mac), "set_rx_filter_handler error %d\n", r);
 }
 
+static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw,
+				   int mc_count, struct dev_addr_list *mclist)
+{
+	struct zd_mac *mac = zd_hw_mac(hw);
+	struct zd_mc_hash hash;
+	int i;
+
+	zd_mc_clear(&hash);
+
+	for (i = 0; i < mc_count; i++) {
+		if (!mclist)
+			break;
+		dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n", mclist->dmi_addr);
+		zd_mc_add_addr(&hash, mclist->dmi_addr);
+		mclist = mclist->next;
+	}
+
+	return hash.low | ((u64)hash.high << 32);
+}
+
 #define SUPPORTED_FIF_FLAGS \
 	(FIF_PROMISC_IN_BSS | FIF_ALLMULTI | FIF_FCSFAIL | FIF_CONTROL | \
 	FIF_OTHER_BSS | FIF_BCN_PRBRESP_PROMISC)
 static void zd_op_configure_filter(struct ieee80211_hw *hw,
 			unsigned int changed_flags,
 			unsigned int *new_flags,
-			int mc_count, struct dev_mc_list *mclist)
+			u64 multicast)
 {
-	struct zd_mc_hash hash;
+	struct zd_mc_hash hash = {
+		.low = multicast,
+		.high = multicast >> 32,
+	};
 	struct zd_mac *mac = zd_hw_mac(hw);
 	unsigned long flags;
-	int i;
 
 	/* Only deal with supported flags */
 	changed_flags &= SUPPORTED_FIF_FLAGS;
@@ -819,25 +841,16 @@ static void zd_op_configure_filter(struct ieee80211_hw *hw,
 	if (!changed_flags)
 		return;
 
-	if (*new_flags & (FIF_PROMISC_IN_BSS | FIF_ALLMULTI)) {
+	if (*new_flags & (FIF_PROMISC_IN_BSS | FIF_ALLMULTI))
 		zd_mc_add_all(&hash);
-	} else {
-		zd_mc_clear(&hash);
-		for (i = 0; i < mc_count; i++) {
-			if (!mclist)
-				break;
-			dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n",
-				  mclist->dmi_addr);
-			zd_mc_add_addr(&hash, mclist->dmi_addr);
-			mclist = mclist->next;
-		}
-	}
 
 	spin_lock_irqsave(&mac->lock, flags);
 	mac->pass_failed_fcs = !!(*new_flags & FIF_FCSFAIL);
 	mac->pass_ctrl = !!(*new_flags & FIF_CONTROL);
 	mac->multicast_hash = hash;
 	spin_unlock_irqrestore(&mac->lock, flags);
+
+	/* XXX: these can be called here now, can sleep now! */
 	queue_work(zd_workqueue, &mac->set_multicast_hash_work);
 
 	if (changed_flags & FIF_CONTROL)
@@ -940,6 +953,7 @@ static const struct ieee80211_ops zd_ops = {
 	.add_interface		= zd_op_add_interface,
 	.remove_interface	= zd_op_remove_interface,
 	.config			= zd_op_config,
+	.prepare_multicast	= zd_op_prepare_multicast,
 	.configure_filter	= zd_op_configure_filter,
 	.bss_info_changed	= zd_op_bss_info_changed,
 	.get_tsf		= zd_op_get_tsf,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 76d43e12cc29..bc865206e5f1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1219,10 +1219,13 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * the driver's configure_filter() function which frames should be
  * passed to mac80211 and which should be filtered out.
  *
- * The configure_filter() callback is invoked with the parameters
- * @mc_count and @mc_list for the combined multicast address list
- * of all virtual interfaces, @changed_flags telling which flags
- * were changed and @total_flags with the new flag states.
+ * Before configure_filter() is invoked, the prepare_multicast()
+ * callback is invoked with the parameters @mc_count and @mc_list
+ * for the combined multicast address list of all virtual interfaces.
+ * It's use is optional, and it returns a u64 that is passed to
+ * configure_filter(). Additionally, configure_filter() has the
+ * arguments @changed_flags telling which flags were changed and
+ * @total_flags with the new flag states.
  *
  * If your device has no multicast address filters your driver will
  * need to check both the %FIF_ALLMULTI flag and the @mc_count
@@ -1375,9 +1378,13 @@ enum ieee80211_ampdu_mlme_action {
  *	for association indication. The @changed parameter indicates which
  *	of the bss parameters has changed when a call is made.
  *
+ * @prepare_multicast: Prepare for multicast filter configuration.
+ *	This callback is optional, and its return value is passed
+ *	to configure_filter(). This callback must be atomic.
+ *
  * @configure_filter: Configure the device's RX filter.
  *	See the section "Frame filtering" for more information.
- *	This callback must be implemented and atomic.
+ *	This callback must be implemented.
  *
  * @set_tim: Set TIM bit. mac80211 calls this function when a TIM bit
  * 	must be set or cleared for a given STA. Must be atomic.
@@ -1479,10 +1486,12 @@ struct ieee80211_ops {
 				 struct ieee80211_vif *vif,
 				 struct ieee80211_bss_conf *info,
 				 u32 changed);
+	u64 (*prepare_multicast)(struct ieee80211_hw *hw,
+				 int mc_count, struct dev_addr_list *mc_list);
 	void (*configure_filter)(struct ieee80211_hw *hw,
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
-				 int mc_count, struct dev_addr_list *mc_list);
+				 u64 multicast);
 	int (*set_tim)(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
 		       bool set);
 	int (*set_key)(struct ieee80211_hw *hw, enum set_key_cmd cmd,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 4100c361a99d..d231c9323ad1 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -55,16 +55,32 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 	trace_drv_bss_info_changed(local, vif, info, changed);
 }
 
+static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
+					int mc_count,
+					struct dev_addr_list *mc_list)
+{
+	u64 ret = 0;
+
+	if (local->ops->prepare_multicast)
+		ret = local->ops->prepare_multicast(&local->hw, mc_count,
+						    mc_list);
+
+	trace_drv_prepare_multicast(local, mc_count, ret);
+
+	return ret;
+}
+
 static inline void drv_configure_filter(struct ieee80211_local *local,
 					unsigned int changed_flags,
 					unsigned int *total_flags,
-					int mc_count,
-					struct dev_addr_list *mc_list)
+					u64 multicast)
 {
+	might_sleep();
+
 	local->ops->configure_filter(&local->hw, changed_flags, total_flags,
-				     mc_count, mc_list);
+				     multicast);
 	trace_drv_configure_filter(local, changed_flags, total_flags,
-					    mc_count);
+				   multicast);
 }
 
 static inline int drv_set_tim(struct ieee80211_local *local,
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5a10da2d70fd..37b9051afcf3 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -191,31 +191,55 @@ TRACE_EVENT(drv_bss_info_changed,
 	)
 );
 
+TRACE_EVENT(drv_prepare_multicast,
+	TP_PROTO(struct ieee80211_local *local, int mc_count, u64 ret),
+
+	TP_ARGS(local, mc_count, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(int, mc_count)
+		__field(u64, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->mc_count = mc_count;
+		__entry->ret = ret;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " prepare mc (%d): %llx",
+		LOCAL_PR_ARG, __entry->mc_count,
+		(unsigned long long) __entry->ret
+	)
+);
+
 TRACE_EVENT(drv_configure_filter,
 	TP_PROTO(struct ieee80211_local *local,
 		 unsigned int changed_flags,
 		 unsigned int *total_flags,
-		 int mc_count),
+		 u64 multicast),
 
-	TP_ARGS(local, changed_flags, total_flags, mc_count),
+	TP_ARGS(local, changed_flags, total_flags, multicast),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
 		__field(unsigned int, changed)
 		__field(unsigned int, total)
-		__field(int, mc)
+		__field(u64, multicast)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
 		__entry->changed = changed_flags;
 		__entry->total = *total_flags;
-		__entry->mc = mc_count;
+		__entry->multicast = multicast;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " changed:%#x total:%#x mc:%d",
-		LOCAL_PR_ARG, __entry->changed, __entry->total, __entry->mc
+		LOCAL_PR_FMT " changed:%#x total:%#x",
+		LOCAL_PR_ARG, __entry->changed, __entry->total
 	)
 );
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a6abc7dfd903..a07f01736a91 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -636,6 +636,9 @@ struct ieee80211_local {
 	/* protects the aggregated multicast list and filter calls */
 	spinlock_t filter_lock;
 
+	/* used for uploading changed mc list */
+	struct work_struct reconfig_filter;
+
 	/* aggregated multicast list */
 	struct dev_addr_list *mc_list;
 	int mc_count;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e8fb03b91a44..b161301056df 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -227,9 +227,7 @@ static int ieee80211_open(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss++;
 
-		spin_lock_bh(&local->filter_lock);
 		ieee80211_configure_filter(local);
-		spin_unlock_bh(&local->filter_lock);
 		break;
 	default:
 		conf.vif = &sdata->vif;
@@ -241,17 +239,13 @@ static int ieee80211_open(struct net_device *dev)
 
 		if (ieee80211_vif_is_mesh(&sdata->vif)) {
 			local->fif_other_bss++;
-			spin_lock_bh(&local->filter_lock);
 			ieee80211_configure_filter(local);
-			spin_unlock_bh(&local->filter_lock);
 
 			ieee80211_start_mesh(sdata);
 		} else if (sdata->vif.type == NL80211_IFTYPE_AP) {
 			local->fif_pspoll++;
 
-			spin_lock_bh(&local->filter_lock);
 			ieee80211_configure_filter(local);
-			spin_unlock_bh(&local->filter_lock);
 		}
 
 		changed |= ieee80211_reset_erp_info(sdata);
@@ -404,10 +398,11 @@ static int ieee80211_stop(struct net_device *dev)
 	spin_lock_bh(&local->filter_lock);
 	__dev_addr_unsync(&local->mc_list, &local->mc_count,
 			  &dev->mc_list, &dev->mc_count);
-	ieee80211_configure_filter(local);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
+	ieee80211_configure_filter(local);
+
 	del_timer_sync(&local->dynamic_ps_timer);
 	cancel_work_sync(&local->dynamic_ps_enable_work);
 
@@ -458,9 +453,7 @@ static int ieee80211_stop(struct net_device *dev)
 		if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
 			local->fif_other_bss--;
 
-		spin_lock_bh(&local->filter_lock);
 		ieee80211_configure_filter(local);
-		spin_unlock_bh(&local->filter_lock);
 		break;
 	case NL80211_IFTYPE_STATION:
 		del_timer_sync(&sdata->u.mgd.chswitch_timer);
@@ -503,9 +496,7 @@ static int ieee80211_stop(struct net_device *dev)
 			local->fif_other_bss--;
 			atomic_dec(&local->iff_allmultis);
 
-			spin_lock_bh(&local->filter_lock);
 			ieee80211_configure_filter(local);
-			spin_unlock_bh(&local->filter_lock);
 
 			ieee80211_stop_mesh(sdata);
 		}
@@ -622,8 +613,8 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 	spin_lock_bh(&local->filter_lock);
 	__dev_addr_sync(&local->mc_list, &local->mc_count,
 			&dev->mc_list, &dev->mc_count);
-	ieee80211_configure_filter(local);
 	spin_unlock_bh(&local->filter_lock);
+	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
 
 /*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b03fd84777fa..05f923575fee 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -50,9 +50,9 @@ struct ieee80211_tx_status_rtap_hdr {
 } __attribute__ ((packed));
 
 
-/* must be called under mdev tx lock */
 void ieee80211_configure_filter(struct ieee80211_local *local)
 {
+	u64 mc;
 	unsigned int changed_flags;
 	unsigned int new_flags = 0;
 
@@ -62,7 +62,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	if (atomic_read(&local->iff_allmultis))
 		new_flags |= FIF_ALLMULTI;
 
-	if (local->monitors)
+	if (local->monitors || local->scanning)
 		new_flags |= FIF_BCN_PRBRESP_PROMISC;
 
 	if (local->fif_fcsfail)
@@ -80,20 +80,30 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	if (local->fif_pspoll)
 		new_flags |= FIF_PSPOLL;
 
+	spin_lock_bh(&local->filter_lock);
 	changed_flags = local->filter_flags ^ new_flags;
 
+	mc = drv_prepare_multicast(local, local->mc_count, local->mc_list);
+	spin_unlock_bh(&local->filter_lock);
+
 	/* be a bit nasty */
 	new_flags |= (1<<31);
 
-	drv_configure_filter(local, changed_flags, &new_flags,
-			     local->mc_count,
-			     local->mc_list);
+	drv_configure_filter(local, changed_flags, &new_flags, mc);
 
 	WARN_ON(new_flags & (1<<31));
 
 	local->filter_flags = new_flags & ~(1<<31);
 }
 
+static void ieee80211_reconfig_filter(struct work_struct *work)
+{
+	struct ieee80211_local *local =
+		container_of(work, struct ieee80211_local, reconfig_filter);
+
+	ieee80211_configure_filter(local);
+}
+
 int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 {
 	struct ieee80211_channel *chan, *scan_chan;
@@ -692,6 +702,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 
 	INIT_WORK(&local->restart_work, ieee80211_restart_work);
 
+	INIT_WORK(&local->reconfig_filter, ieee80211_reconfig_filter);
+
 	INIT_WORK(&local->dynamic_ps_enable_work,
 		  ieee80211_dynamic_ps_enable_work);
 	INIT_WORK(&local->dynamic_ps_disable_work,
@@ -946,6 +958,8 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 
 	rtnl_unlock();
 
+	cancel_work_sync(&local->reconfig_filter);
+
 	ieee80211_clear_tx_pending(local);
 	sta_info_stop(local);
 	rate_control_deinitialize(local);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index e091cbc3434f..1e04be6b9129 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -292,13 +292,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	if (was_hw_scan)
 		goto done;
 
-	spin_lock_bh(&local->filter_lock);
-	local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC;
-	drv_configure_filter(local, FIF_BCN_PRBRESP_PROMISC,
-			     &local->filter_flags,
-			     local->mc_count,
-			     local->mc_list);
-	spin_unlock_bh(&local->filter_lock);
+	ieee80211_configure_filter(local);
 
 	drv_sw_scan_complete(local);
 
@@ -376,13 +370,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
-	spin_lock_bh(&local->filter_lock);
-	local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
-	drv_configure_filter(local, FIF_BCN_PRBRESP_PROMISC,
-			     &local->filter_flags,
-			     local->mc_count,
-			     local->mc_list);
-	spin_unlock_bh(&local->filter_lock);
+	ieee80211_configure_filter(local);
 
 	/* TODO: start scan as soon as all nullfunc frames are ACKed */
 	ieee80211_queue_delayed_work(&local->hw,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e55d57f559ec..5eb306377c63 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1076,9 +1076,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 	/* reconfigure hardware */
 	ieee80211_hw_config(local, ~0);
 
-	spin_lock_bh(&local->filter_lock);
 	ieee80211_configure_filter(local);
-	spin_unlock_bh(&local->filter_lock);
 
 	/* Finally also reconfigure all the BSS information */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-- 
cgit v1.2.3


From f424afa17899408cbd267a4c4534ca6fc9d8f71c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 17 Aug 2009 16:18:07 +0200
Subject: mac80211: remove deprecated API

All but two drivers have now stopped using the two
deprecated members radio_enabled and beacon_int,
so it's about time to remove them for good.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/at76c50x-usb.c |  5 ++---
 drivers/net/wireless/mwl8k.c        |  7 -------
 include/net/mac80211.h              | 15 ---------------
 net/mac80211/main.c                 |  4 ----
 4 files changed, 2 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index a6e19545ac6a..8e1a55dec351 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1950,9 +1950,8 @@ static int at76_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct at76_priv *priv = hw->priv;
 
-	at76_dbg(DBG_MAC80211, "%s(): channel %d radio %d",
-		 __func__, hw->conf.channel->hw_value,
-		 hw->conf.radio_enabled);
+	at76_dbg(DBG_MAC80211, "%s(): channel %d",
+		 __func__, hw->conf.channel->hw_value);
 	at76_dbg_dump(DBG_MAC80211, priv->bssid, ETH_ALEN, "bssid:");
 
 	mutex_lock(&priv->mtx);
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index f84387083e73..c32e93c8c410 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3031,13 +3031,6 @@ static int mwl8k_config_wt(struct work_struct *wt)
 	struct mwl8k_priv *priv = hw->priv;
 	int rc = 0;
 
-	if (!conf->radio_enabled) {
-		mwl8k_cmd_802_11_radio_control(hw, MWL8K_RADIO_DISABLE);
-		priv->current_channel = NULL;
-		rc = 0;
-		goto mwl8k_config_exit;
-	}
-
 	if (mwl8k_cmd_802_11_radio_control(hw, MWL8K_RADIO_ENABLE)) {
 		rc = -EINVAL;
 		goto mwl8k_config_exit;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bc865206e5f1..aac84d7bd46e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -583,7 +583,6 @@ enum ieee80211_conf_flags {
 /**
  * enum ieee80211_conf_changed - denotes which configuration changed
  *
- * @_IEEE80211_CONF_CHANGE_RADIO_ENABLED: DEPRECATED
  * @IEEE80211_CONF_CHANGE_LISTEN_INTERVAL: the listen interval changed
  * @IEEE80211_CONF_CHANGE_RADIOTAP: the radiotap flag changed
  * @IEEE80211_CONF_CHANGE_PS: the PS flag or dynamic PS timeout changed
@@ -593,7 +592,6 @@ enum ieee80211_conf_flags {
  * @IEEE80211_CONF_CHANGE_IDLE: Idle flag changed
  */
 enum ieee80211_conf_changed {
-	_IEEE80211_CONF_CHANGE_RADIO_ENABLED	= BIT(0),
 	IEEE80211_CONF_CHANGE_LISTEN_INTERVAL	= BIT(2),
 	IEEE80211_CONF_CHANGE_RADIOTAP		= BIT(3),
 	IEEE80211_CONF_CHANGE_PS		= BIT(4),
@@ -603,14 +601,6 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
 };
 
-static inline __deprecated enum ieee80211_conf_changed
-__IEEE80211_CONF_CHANGE_RADIO_ENABLED(void)
-{
-	return _IEEE80211_CONF_CHANGE_RADIO_ENABLED;
-}
-#define IEEE80211_CONF_CHANGE_RADIO_ENABLED \
-	__IEEE80211_CONF_CHANGE_RADIO_ENABLED()
-
 /**
  * struct ieee80211_conf - configuration of the device
  *
@@ -618,9 +608,6 @@ __IEEE80211_CONF_CHANGE_RADIO_ENABLED(void)
  *
  * @flags: configuration flags defined above
  *
- * @radio_enabled: when zero, driver is required to switch off the radio.
- * @beacon_int: DEPRECATED, DO NOT USE
- *
  * @listen_interval: listen interval in units of beacon interval
  * @max_sleep_period: the maximum number of beacon intervals to sleep for
  *	before checking the beacon for a TIM bit (managed mode only); this
@@ -644,13 +631,11 @@ __IEEE80211_CONF_CHANGE_RADIO_ENABLED(void)
  *    number of transmissions not the number of retries
  */
 struct ieee80211_conf {
-	int __deprecated beacon_int;
 	u32 flags;
 	int power_level, dynamic_ps_timeout;
 	int max_sleep_period;
 
 	u16 listen_interval;
-	bool __deprecated radio_enabled;
 
 	u8 long_frame_max_tx_count, short_frame_max_tx_count;
 
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 05f923575fee..3302df96f8d4 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -241,9 +241,6 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
 
 	drv_bss_info_changed(local, &sdata->vif,
 			     &sdata->vif.bss_conf, changed);
-
-	/* DEPRECATED */
-	local->hw.conf.beacon_int = sdata->vif.bss_conf.beacon_int;
 }
 
 u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
@@ -687,7 +684,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->hw.max_rates = 1;
 	local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
 	local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
-	local->hw.conf.radio_enabled = true;
 	local->user_power_level = -1;
 
 	INIT_LIST_HEAD(&local->interfaces);
-- 
cgit v1.2.3


From ad002395fd230528281083f4be71855ed7e35b04 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 18 Aug 2009 19:51:57 +0200
Subject: cfg80211: fix dangling scan request checking

My patch "cfg80211: fix deadlock" broke the code it
was supposed to fix, the scan request checking. But
it's not trivial to put it back the way it was, since
the original patch had a deadlock.

Now do it in a completely new way: queue the check
off to a work struct, where we can freely lock. But
that has some more complications, like needing to
wait for it to be done before the wiphy/rdev can be
destroyed, so some code is required to handle that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h |  2 ++
 net/wireless/core.c    | 76 +++++++++++++++++++++++++++++++++++++++++---------
 net/wireless/core.h    |  2 ++
 3 files changed, 67 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0b146bb2dd14..3d874c620219 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1325,6 +1325,8 @@ struct wireless_dev {
 
 	struct mutex mtx;
 
+	struct work_struct cleanup_work;
+
 	/* currently used for IBSS and SME - might be rearranged later */
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 ssid_len;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 69a185ba9ff1..c150071b6f29 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -430,6 +430,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
 	INIT_WORK(&rdev->event_work, cfg80211_event_work);
 
+	init_waitqueue_head(&rdev->dev_wait);
+
 	/*
 	 * Initialize wiphy parameters to IEEE 802.11 MIB default values.
 	 * Fragmentation and RTS threshold are disabled by default with the
@@ -574,7 +576,23 @@ void wiphy_unregister(struct wiphy *wiphy)
 	/* protect the device list */
 	mutex_lock(&cfg80211_mutex);
 
+	wait_event(rdev->dev_wait, ({
+		int __count;
+		mutex_lock(&rdev->devlist_mtx);
+		__count = rdev->opencount;
+		mutex_unlock(&rdev->devlist_mtx);
+		__count == 0;}));
+
+	mutex_lock(&rdev->devlist_mtx);
 	BUG_ON(!list_empty(&rdev->netdev_list));
+	mutex_unlock(&rdev->devlist_mtx);
+
+	/*
+	 * First remove the hardware from everywhere, this makes
+	 * it impossible to find from userspace.
+	 */
+	cfg80211_debugfs_rdev_del(rdev);
+	list_del(&rdev->list);
 
 	/*
 	 * Try to grab rdev->mtx. If a command is still in progress,
@@ -582,26 +600,18 @@ void wiphy_unregister(struct wiphy *wiphy)
 	 * down the device already. We wait for this command to complete
 	 * before unlinking the item from the list.
 	 * Note: as codified by the BUG_ON above we cannot get here if
-	 * a virtual interface is still associated. Hence, we can only
-	 * get to lock contention here if userspace issues a command
-	 * that identified the hardware by wiphy index.
+	 * a virtual interface is still present. Hence, we can only get
+	 * to lock contention here if userspace issues a command that
+	 * identified the hardware by wiphy index.
 	 */
 	cfg80211_lock_rdev(rdev);
-
-	if (WARN_ON(rdev->scan_req)) {
-		rdev->scan_req->aborted = true;
-		___cfg80211_scan_done(rdev);
-	}
-
+	/* nothing */
 	cfg80211_unlock_rdev(rdev);
 
-	cfg80211_debugfs_rdev_del(rdev);
-
 	/* If this device got a regulatory hint tell core its
 	 * free to listen now to a new shiny device regulatory hint */
 	reg_device_remove(wiphy);
 
-	list_del(&rdev->list);
 	cfg80211_rdev_list_generation++;
 	device_del(&rdev->wiphy.dev);
 	debugfs_remove(rdev->wiphy.debugfsdir);
@@ -640,6 +650,31 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked)
 }
 EXPORT_SYMBOL(wiphy_rfkill_set_hw_state);
 
+static void wdev_cleanup_work(struct work_struct *work)
+{
+	struct wireless_dev *wdev;
+	struct cfg80211_registered_device *rdev;
+
+	wdev = container_of(work, struct wireless_dev, cleanup_work);
+	rdev = wiphy_to_dev(wdev->wiphy);
+
+	cfg80211_lock_rdev(rdev);
+
+	if (WARN_ON(rdev->scan_req && rdev->scan_req->dev == wdev->netdev)) {
+		rdev->scan_req->aborted = true;
+		___cfg80211_scan_done(rdev);
+	}
+
+	cfg80211_unlock_rdev(rdev);
+
+	mutex_lock(&rdev->devlist_mtx);
+	rdev->opencount--;
+	mutex_unlock(&rdev->devlist_mtx);
+	wake_up(&rdev->dev_wait);
+
+	dev_put(wdev->netdev);
+}
+
 static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 					 unsigned long state,
 					 void *ndev)
@@ -663,6 +698,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		 * are added with nl80211.
 		 */
 		mutex_init(&wdev->mtx);
+		INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
 		INIT_LIST_HEAD(&wdev->event_list);
 		spin_lock_init(&wdev->event_lock);
 		mutex_lock(&rdev->devlist_mtx);
@@ -717,8 +753,22 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 		default:
 			break;
 		}
+		dev_hold(dev);
+		schedule_work(&wdev->cleanup_work);
 		break;
 	case NETDEV_UP:
+		/*
+		 * If we have a really quick DOWN/UP succession we may
+		 * have this work still pending ... cancel it and see
+		 * if it was pending, in which case we need to account
+		 * for some of the work it would have done.
+		 */
+		if (cancel_work_sync(&wdev->cleanup_work)) {
+			mutex_lock(&rdev->devlist_mtx);
+			rdev->opencount--;
+			mutex_unlock(&rdev->devlist_mtx);
+			dev_put(dev);
+		}
 #ifdef CONFIG_WIRELESS_EXT
 		cfg80211_lock_rdev(rdev);
 		mutex_lock(&rdev->devlist_mtx);
@@ -734,6 +784,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			break;
 		}
 		wdev_unlock(wdev);
+		rdev->opencount++;
 		mutex_unlock(&rdev->devlist_mtx);
 		cfg80211_unlock_rdev(rdev);
 #endif
@@ -756,7 +807,6 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			sysfs_remove_link(&dev->dev.kobj, "phy80211");
 			list_del_init(&wdev->list);
 			rdev->devlist_generation++;
-			mutex_destroy(&wdev->mtx);
 #ifdef CONFIG_WIRELESS_EXT
 			kfree(wdev->wext.keys);
 #endif
diff --git a/net/wireless/core.h b/net/wireless/core.h
index c603f5286326..f565432ae22f 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -50,6 +50,8 @@ struct cfg80211_registered_device {
 	struct mutex devlist_mtx;
 	struct list_head netdev_list;
 	int devlist_generation;
+	int opencount; /* also protected by devlist_mtx */
+	wait_queue_head_t dev_wait;
 
 	/* BSSes/scanning */
 	spinlock_t bss_lock;
-- 
cgit v1.2.3


From 4dceef96756b667360741712a8e37490f8458516 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 20 Aug 2009 17:08:39 -0400
Subject: nfs: fix compile error in rpc_pipefs.h

This include is needed for the definition of delayed_work.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index a92571a34556..cf14db975da0 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -3,6 +3,8 @@
 
 #ifdef __KERNEL__
 
+#include <linux/workqueue.h>
+
 struct rpc_pipe_msg {
 	struct list_head list;
 	void *data;
-- 
cgit v1.2.3


From e3b2415e281a97ade36d88404094a90cfea838c0 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 21 Aug 2009 09:47:45 +1000
Subject: drm/radeon/kms: implement the bo busy ioctl properly.

The previous patch assumes the ioctl already existed, when
it actually didn't.

It also didn't return the correct error code.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/radeon_gem.c | 2 +-
 drivers/gpu/drm/radeon/radeon_kms.c | 1 +
 include/drm/radeon_drm.h            | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index d4ceff13bbb1..14c199802920 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -283,7 +283,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 	mutex_lock(&dev->struct_mutex);
 	drm_gem_object_unreference(gobj);
 	mutex_unlock(&dev->struct_mutex);
-	return 0;
+	return r;
 }
 
 int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index d2764bf6b2a2..11ed672543b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -318,5 +318,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index f81c3232accd..b43925586b29 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -508,6 +508,7 @@ typedef struct {
 #define DRM_RADEON_INFO			0x27
 #define DRM_RADEON_GEM_SET_TILING	0x28
 #define DRM_RADEON_GEM_GET_TILING	0x29
+#define DRM_RADEON_GEM_BUSY		0x2a
 
 #define DRM_IOCTL_RADEON_CP_INIT    DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t)
 #define DRM_IOCTL_RADEON_CP_START   DRM_IO(  DRM_COMMAND_BASE + DRM_RADEON_CP_START)
@@ -548,6 +549,7 @@ typedef struct {
 #define DRM_IOCTL_RADEON_INFO		DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info)
 #define DRM_IOCTL_RADEON_SET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling)
 #define DRM_IOCTL_RADEON_GET_TILING	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling)
+#define DRM_IOCTL_RADEON_GEM_BUSY	DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy)
 
 typedef struct drm_radeon_init {
 	enum {
-- 
cgit v1.2.3


From 1700f5fde88f9a251037bc86bde538ee32c59905 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 20 Aug 2009 22:05:53 -0700
Subject: Input: ucb1400_ts - enable ADC Filter

This patch enables ADC filtering on UCB1400 codec by default. The
benefit from this change is mostly on some Colibri boards where
the ADCSYNC pin of the UCB1400 codec isn't connected causing the
touchscreen to jitter very badly. This change has no visible
effect on boards where the ADCSYNC pin is connected.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Tested-by: Palo Revak <palo@bielyvlk.sk>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/ucb1400_ts.c | 9 +++++++++
 include/linux/ucb1400.h                | 4 ++++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c
index 6954f5500108..3b345f9cf0c7 100644
--- a/drivers/input/touchscreen/ucb1400_ts.c
+++ b/drivers/input/touchscreen/ucb1400_ts.c
@@ -345,6 +345,7 @@ static int ucb1400_ts_detect_irq(struct ucb1400_ts *ucb)
 static int ucb1400_ts_probe(struct platform_device *dev)
 {
 	int error, x_res, y_res;
+	u16 fcsr;
 	struct ucb1400_ts *ucb = dev->dev.platform_data;
 
 	ucb->ts_idev = input_allocate_device();
@@ -382,6 +383,14 @@ static int ucb1400_ts_probe(struct platform_device *dev)
 	ucb->ts_idev->evbit[0]		= BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY);
 	ucb->ts_idev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
 
+	/*
+	 * Enable ADC filter to prevent horrible jitter on Colibri.
+	 * This also further reduces jitter on boards where ADCSYNC
+	 * pin is connected.
+	 */
+	fcsr = ucb1400_reg_read(ucb->ac97, UCB_FCSR);
+	ucb1400_reg_write(ucb->ac97, UCB_FCSR, fcsr | UCB_FCSR_AVE);
+
 	ucb1400_adc_enable(ucb->ac97);
 	x_res = ucb1400_ts_read_xres(ucb);
 	y_res = ucb1400_ts_read_yres(ucb);
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index ed889f4168f3..ae779bb8cc0f 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -73,6 +73,10 @@
 
 #define UCB_ADC_DATA		0x68
 #define UCB_ADC_DAT_VALID	(1 << 15)
+
+#define UCB_FCSR		0x6c
+#define UCB_FCSR_AVE		(1 << 12)
+
 #define UCB_ADC_DAT_MASK	0x3ff
 
 #define UCB_ID			0x7e
-- 
cgit v1.2.3


From f779b3e513478218cbaaaa0a506d7801cab6fd14 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Wed, 19 Aug 2009 19:11:39 -0400
Subject: drm/radeon: add GET_PARAM/INFO support for Z pipes

Needed for occlusion queries on rv530 chips.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r300.c         |  4 +++-
 drivers/gpu/drm/radeon/r420.c         | 13 ++++++++++++-
 drivers/gpu/drm/radeon/r520.c         |  1 -
 drivers/gpu/drm/radeon/radeon.h       |  1 +
 drivers/gpu/drm/radeon/radeon_cp.c    |  9 +++++++++
 drivers/gpu/drm/radeon/radeon_drv.h   |  5 ++++-
 drivers/gpu/drm/radeon/radeon_kms.c   |  3 +++
 drivers/gpu/drm/radeon/radeon_reg.h   |  2 ++
 drivers/gpu/drm/radeon/radeon_state.c |  3 +++
 include/drm/radeon_drm.h              |  2 ++
 10 files changed, 39 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index c47579dcafa1..053f4ec397f7 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -448,6 +448,7 @@ void r300_gpu_init(struct radeon_device *rdev)
 		/* rv350,rv370,rv380 */
 		rdev->num_gb_pipes = 1;
 	}
+	rdev->num_z_pipes = 1;
 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16);
 	switch (rdev->num_gb_pipes) {
 	case 2:
@@ -486,7 +487,8 @@ void r300_gpu_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait MC idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
+	DRM_INFO("radeon: %d quad pipes, %d Z pipes initialized.\n",
+		 rdev->num_gb_pipes, rdev->num_z_pipes);
 }
 
 int r300_ga_reset(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index dea497a979f2..97426a6f370f 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -165,7 +165,18 @@ void r420_pipes_init(struct radeon_device *rdev)
 		printk(KERN_WARNING "Failed to wait GUI idle while "
 		       "programming pipes. Bad things might happen.\n");
 	}
-	DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes);
+
+	if (rdev->family == CHIP_RV530) {
+		tmp = RREG32(RV530_GB_PIPE_SELECT2);
+		if ((tmp & 3) == 3)
+			rdev->num_z_pipes = 2;
+		else
+			rdev->num_z_pipes = 1;
+	} else
+		rdev->num_z_pipes = 1;
+
+	DRM_INFO("radeon: %d quad pipes, %d z pipes initialized.\n",
+		 rdev->num_gb_pipes, rdev->num_z_pipes);
 }
 
 void r420_gpu_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index 09fb0b6ec7dd..ebd6b0f7bdff 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -177,7 +177,6 @@ void r520_gpu_init(struct radeon_device *rdev)
 	 */
 	/* workaround for RV530 */
 	if (rdev->family == CHIP_RV530) {
-		WREG32(0x4124, 1);
 		WREG32(0x4128, 0xFF);
 	}
 	r420_pipes_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 79ad98264e33..b519fb2fecbb 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -655,6 +655,7 @@ struct radeon_device {
 	int				usec_timeout;
 	enum radeon_pll_errata		pll_errata;
 	int				num_gb_pipes;
+	int				num_z_pipes;
 	int				disp_priority;
 	/* BIOS */
 	uint8_t				*bios;
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index d8356827ef17..7a52c461145c 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -406,6 +406,15 @@ static void radeon_init_pipes(drm_radeon_private_t *dev_priv)
 {
 	uint32_t gb_tile_config, gb_pipe_sel = 0;
 
+	if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) {
+		uint32_t z_pipe_sel = RADEON_READ(RV530_GB_PIPE_SELECT2);
+		if ((z_pipe_sel & 3) == 3)
+			dev_priv->num_z_pipes = 2;
+		else
+			dev_priv->num_z_pipes = 1;
+	} else
+		dev_priv->num_z_pipes = 1;
+
 	/* RS4xx/RS6xx/R4xx/R5xx */
 	if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R420) {
 		gb_pipe_sel = RADEON_READ(R400_GB_PIPE_SELECT);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 3933f8216a34..6fa32dac4e97 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -100,9 +100,10 @@
  * 1.28- Add support for VBL on CRTC2
  * 1.29- R500 3D cmd buffer support
  * 1.30- Add support for occlusion queries
+ * 1.31- Add support for num Z pipes from GET_PARAM
  */
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		30
+#define DRIVER_MINOR		31
 #define DRIVER_PATCHLEVEL	0
 
 /*
@@ -329,6 +330,7 @@ typedef struct drm_radeon_private {
 	resource_size_t fb_aper_offset;
 
 	int num_gb_pipes;
+	int num_z_pipes;
 	int track_flush;
 	drm_local_map_t *mmio;
 
@@ -689,6 +691,7 @@ extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pciga
 
 /* pipe config regs */
 #define R400_GB_PIPE_SELECT             0x402c
+#define RV530_GB_PIPE_SELECT2           0x4124
 #define R500_DYN_SCLK_PWMEM_PIPE        0x000d /* PLL */
 #define R300_GB_TILE_CONFIG             0x4018
 #       define R300_ENABLE_TILING       (1 << 0)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 11ed672543b1..dce09ada32bc 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -95,6 +95,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case RADEON_INFO_NUM_GB_PIPES:
 		value = rdev->num_gb_pipes;
 		break;
+	case RADEON_INFO_NUM_Z_PIPES:
+		value = rdev->num_z_pipes;
+		break;
 	default:
 		DRM_DEBUG("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h
index 5834497b366d..4df43f62c678 100644
--- a/drivers/gpu/drm/radeon/radeon_reg.h
+++ b/drivers/gpu/drm/radeon/radeon_reg.h
@@ -3574,4 +3574,6 @@
 #define RADEON_SCRATCH_REG4		0x15f0
 #define RADEON_SCRATCH_REG5		0x15f4
 
+#define RV530_GB_PIPE_SELECT2           0x4124
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 46645f3e0328..2882f40d5ec5 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -3081,6 +3081,9 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil
 	case RADEON_PARAM_NUM_GB_PIPES:
 		value = dev_priv->num_gb_pipes;
 		break;
+	case RADEON_PARAM_NUM_Z_PIPES:
+		value = dev_priv->num_z_pipes;
+		break;
 	default:
 		DRM_DEBUG("Invalid parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b43925586b29..2ba61e18fc8b 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -709,6 +709,7 @@ typedef struct drm_radeon_indirect {
 #define RADEON_PARAM_FB_LOCATION           14   /* FB location */
 #define RADEON_PARAM_NUM_GB_PIPES          15   /* num GB pipes */
 #define RADEON_PARAM_DEVICE_ID             16
+#define RADEON_PARAM_NUM_Z_PIPES           17   /* num Z pipes */
 
 typedef struct drm_radeon_getparam {
 	int param;
@@ -897,6 +898,7 @@ struct drm_radeon_cs {
 
 #define RADEON_INFO_DEVICE_ID		0x00
 #define RADEON_INFO_NUM_GB_PIPES	0x01
+#define RADEON_INFO_NUM_Z_PIPES 	0x02
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3


From 4516fc0454e7ffe2f369e80045b23c2b32155004 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:54 -0400
Subject: sunrpc: add routine for comparing addresses

lockd needs these sort of routines, as does the NFSv4 callback code.

Move lockd's routines into common code and rename them so that they can
be used by others.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/clntlock.c         |  2 +-
 fs/lockd/host.c             |  4 ++--
 fs/lockd/mon.c              |  2 +-
 fs/lockd/svcsubs.c          |  2 +-
 include/linux/lockd/lockd.h | 43 ----------------------------------------
 include/linux/sunrpc/clnt.h | 48 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 53 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 1f3b0fc0d351..fc9032dc8862 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -166,7 +166,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
 		 */
 		if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid)
 			continue;
-		if (!nlm_cmp_addr(nlm_addr(block->b_host), addr))
+		if (!rpc_cmp_addr(nlm_addr(block->b_host), addr))
 			continue;
 		if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0)
 			continue;
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 7cb076ac6b45..4600c2037b8b 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -111,7 +111,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	 */
 	chain = &nlm_hosts[nlm_hash_address(ni->sap)];
 	hlist_for_each_entry(host, pos, chain, h_hash) {
-		if (!nlm_cmp_addr(nlm_addr(host), ni->sap))
+		if (!rpc_cmp_addr(nlm_addr(host), ni->sap))
 			continue;
 
 		/* See if we have an NSM handle for this client */
@@ -125,7 +125,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 		if (host->h_server != ni->server)
 			continue;
 		if (ni->server &&
-		    !nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap))
+		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
 			continue;
 
 		/* Move to head of hash chain. */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 30c933188dd7..f956651d0f65 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -209,7 +209,7 @@ static struct nsm_handle *nsm_lookup_addr(const struct sockaddr *sap)
 	struct nsm_handle *nsm;
 
 	list_for_each_entry(nsm, &nsm_handles, sm_link)
-		if (nlm_cmp_addr(nsm_addr(nsm), sap))
+		if (rpc_cmp_addr(nsm_addr(nsm), sap))
 			return nsm;
 	return NULL;
 }
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 9e4d6aab611b..ad478da7ca63 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
 static int
 nlmsvc_match_ip(void *datap, struct nlm_host *host)
 {
-	return nlm_cmp_addr(nlm_srcaddr(host), datap);
+	return rpc_cmp_addr(nlm_srcaddr(host), datap);
 }
 
 /**
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b187966b..e7a251a988c0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -338,49 +338,6 @@ static inline int nlm_privileged_requester(const struct svc_rqst *rqstp)
 	}
 }
 
-static inline int __nlm_cmp_addr4(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
-	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
-	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
-}
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
-	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
-	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
-}
-#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-static inline int __nlm_cmp_addr6(const struct sockaddr *sap1,
-				  const struct sockaddr *sap2)
-{
-	return 0;
-}
-#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
-
-/*
- * Compare two host addresses
- *
- * Return TRUE if the addresses are the same; otherwise FALSE.
- */
-static inline int nlm_cmp_addr(const struct sockaddr *sap1,
-			       const struct sockaddr *sap2)
-{
-	if (sap1->sa_family == sap2->sa_family) {
-		switch (sap1->sa_family) {
-		case AF_INET:
-			return __nlm_cmp_addr4(sap1, sap2);
-		case AF_INET6:
-			return __nlm_cmp_addr6(sap1, sap2);
-		}
-	}
-	return 0;
-}
-
 /*
  * Compare two NLM locks.
  * When the second lock is of type F_UNLCK, this acts like a wildcard.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab3f6e90caa5..b17df361be82 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -22,6 +22,7 @@
 #include <linux/sunrpc/timer.h>
 #include <asm/signal.h>
 #include <linux/path.h>
+#include <net/ipv6.h>
 
 struct rpc_inode;
 
@@ -188,5 +189,52 @@ static inline void rpc_set_port(struct sockaddr *sap,
 #define IPV6_SCOPE_DELIMITER		'%'
 #define IPV6_SCOPE_ID_LEN		sizeof("%nnnnnnnnnn")
 
+static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1;
+	const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2;
+
+	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1;
+	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
+	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+}
+#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
+				   const struct sockaddr *sap2)
+{
+	return false;
+}
+#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+
+/**
+ * rpc_cmp_addr - compare the address portion of two sockaddrs.
+ * @sap1: first sockaddr
+ * @sap2: second sockaddr
+ *
+ * Just compares the family and address portion. Ignores port, scope, etc.
+ * Returns true if the addrs are equal, false if they aren't.
+ */
+static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
+				const struct sockaddr *sap2)
+{
+	if (sap1->sa_family == sap2->sa_family) {
+		switch (sap1->sa_family) {
+		case AF_INET:
+			return __rpc_cmp_addr4(sap1, sap2);
+		case AF_INET6:
+			return __rpc_cmp_addr6(sap1, sap2);
+		}
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From be3ad6b0b675fd1d6b48362ca30bdee75fbef6b4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:55 -0400
Subject: sunrpc: add common routine for copying address portion of a sockaddr

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/clnt.h | 50 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b17df361be82..044f531aee70 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -198,6 +198,17 @@ static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1,
 	return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
 }
 
+static inline bool __rpc_copy_addr4(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in *ssin = (struct sockaddr_in *) src;
+	struct sockaddr_in *dsin = (struct sockaddr_in *) dst;
+
+	dsin->sin_family = ssin->sin_family;
+	dsin->sin_addr.s_addr = ssin->sin_addr.s_addr;
+	return true;
+}
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
@@ -206,12 +217,29 @@ static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 	const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2;
 	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src;
+	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
+
+	dsin6->sin6_family = ssin6->sin6_family;
+	ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
+	return true;
+}
 #else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
 	return false;
 }
+
+static inline bool __rpc_copy_addr6(struct sockaddr *dst,
+				    const struct sockaddr *src)
+{
+	return false;
+}
 #endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
 
 /**
@@ -236,5 +264,27 @@ static inline bool rpc_cmp_addr(const struct sockaddr *sap1,
 	return false;
 }
 
+/**
+ * rpc_copy_addr - copy the address portion of one sockaddr to another
+ * @dst: destination sockaddr
+ * @src: source sockaddr
+ *
+ * Just copies the address portion and family. Ignores port, scope, etc.
+ * Caller is responsible for making certain that dst is large enough to hold
+ * the address in src. Returns true if address family is supported. Returns
+ * false otherwise.
+ */
+static inline bool rpc_copy_addr(struct sockaddr *dst,
+				 const struct sockaddr *src)
+{
+	switch (src->sa_family) {
+	case AF_INET:
+		return __rpc_copy_addr4(dst, src);
+	case AF_INET6:
+		return __rpc_copy_addr6(dst, src);
+	}
+	return false;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From 363168b4ea8ec26aeb982ac6024a09f907ecd27e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:56 -0400
Subject: nfsd: make nfs4_client->cl_addr a struct sockaddr_storage

It's currently a __be32, which isn't big enough to hold an IPv6 address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 32 +++++++++++++++++++-------------
 include/linux/nfsd/state.h |  2 +-
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9295c4b56bce..bfc14d879ea1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -55,6 +55,7 @@
 #include <linux/lockd/bind.h>
 #include <linux/module.h>
 #include <linux/sunrpc/svcauth_gss.h>
+#include <linux/sunrpc/clnt.h>
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -1220,13 +1221,15 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
 	int status;
 	unsigned int		strhashval;
 	char			dname[HEXDIR_LEN];
+	char			addr_str[INET6_ADDRSTRLEN];
 	nfs4_verifier		verf = exid->verifier;
-	u32			ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr		*sa = svc_addr(rqstp);
 
+	rpc_ntop(sa, addr_str, sizeof(addr_str));
 	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
-		" ip_addr=%u flags %x, spa_how %d\n",
+		"ip_addr=%s flags %x, spa_how %d\n",
 		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
-		ip_addr, exid->flags, exid->spa_how);
+		addr_str, exid->flags, exid->spa_how);
 
 	if (!check_name(exid->clname) || (exid->flags & ~EXCHGID4_FLAG_MASK_A))
 		return nfserr_inval;
@@ -1315,7 +1318,7 @@ out_new:
 
 	copy_verf(new, &verf);
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
-	new->cl_addr = ip_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	gen_clid(new);
 	gen_confirm(new);
 	add_to_unconfirmed(new, strhashval);
@@ -1389,7 +1392,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		     struct nfsd4_compound_state *cstate,
 		     struct nfsd4_create_session *cr_ses)
 {
-	u32 ip_addr = svc_addr_in(rqstp)->sin_addr.s_addr;
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	struct nfsd4_clid_slot *cs_slot = NULL;
 	int status = 0;
@@ -1417,7 +1420,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cs_slot->sl_seqid++;
 	} else if (unconf) {
 		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
-		    (ip_addr != unconf->cl_addr)) {
+		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
 			status = nfserr_clid_inuse;
 			goto out;
 		}
@@ -1564,7 +1567,7 @@ __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		  struct nfsd4_setclientid *setclid)
 {
-	struct sockaddr_in	*sin = svc_addr_in(rqstp);
+	struct sockaddr		*sa = svc_addr(rqstp);
 	struct xdr_netobj 	clname = { 
 		.len = setclid->se_namelen,
 		.data = setclid->se_name,
@@ -1596,8 +1599,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		/* RFC 3530 14.2.33 CASE 0: */
 		status = nfserr_clid_inuse;
 		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
-			dprintk("NFSD: setclientid: string in use by client"
-				" at %pI4\n", &conf->cl_addr);
+			char addr_str[INET6_ADDRSTRLEN];
+			rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
+				 sizeof(addr_str));
+			dprintk("NFSD: setclientid: string in use by client "
+				"at %s\n", addr_str);
 			goto out;
 		}
 	}
@@ -1659,7 +1665,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		gen_clid(new);
 	}
 	copy_verf(new, &clverifier);
-	new->cl_addr = sin->sin_addr.s_addr;
+	rpc_copy_addr((struct sockaddr *) &new->cl_addr, sa);
 	new->cl_flavor = rqstp->rq_flavor;
 	princ = svc_gss_principal(rqstp);
 	if (princ) {
@@ -1693,7 +1699,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 			 struct nfsd4_compound_state *cstate,
 			 struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
-	struct sockaddr_in *sin = svc_addr_in(rqstp);
+	struct sockaddr *sa = svc_addr(rqstp);
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -1712,9 +1718,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 	unconf = find_unconfirmed_client(clid);
 
 	status = nfserr_clid_inuse;
-	if (conf && conf->cl_addr != sin->sin_addr.s_addr)
+	if (conf && !rpc_cmp_addr((struct sockaddr *) &conf->cl_addr, sa))
 		goto out;
-	if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
+	if (unconf && !rpc_cmp_addr((struct sockaddr *) &unconf->cl_addr, sa))
 		goto out;
 
 	/*
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 58bb19784e12..3510ddd4be49 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -200,7 +200,7 @@ struct nfs4_client {
 	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
-	__be32			cl_addr; 	/* client ipaddress */
+	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	u32			cl_flavor;	/* setclientid pseudoflavor */
 	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
-- 
cgit v1.2.3


From aa9a4ec7707a5391cde556f3fa1b0eb4bca3bcf6 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:57 -0400
Subject: nfsd: convert nfs4_cb_conn struct to hold address in sockaddr_storage

...rather than as a separate address and port fields. This will be
necessary for implementing callbacks over IPv6. Also, convert
gen_callback to use the standard rpcuaddr2sockaddr routine rather than
its own private one.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 11 ++-----
 fs/nfsd/nfs4state.c        | 81 ++++++----------------------------------------
 include/linux/nfsd/state.h |  4 +--
 3 files changed, 13 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3fd23f7aceca..81d1c5285dcc 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -377,7 +377,6 @@ static int max_cb_time(void)
 
 int setup_callback_client(struct nfs4_client *clp)
 {
-	struct sockaddr_in	addr;
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_timeout	timeparms = {
 		.to_initval	= max_cb_time(),
@@ -385,8 +384,8 @@ int setup_callback_client(struct nfs4_client *clp)
 	};
 	struct rpc_create_args args = {
 		.protocol	= IPPROTO_TCP,
-		.address	= (struct sockaddr *)&addr,
-		.addrsize	= sizeof(addr),
+		.address	= (struct sockaddr *) &cb->cb_addr,
+		.addrsize	= cb->cb_addrlen,
 		.timeout	= &timeparms,
 		.program	= &cb_program,
 		.prognumber	= cb->cb_prog,
@@ -400,12 +399,6 @@ int setup_callback_client(struct nfs4_client *clp)
 	if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 		return -EINVAL;
 
-	/* Initialize address */
-	memset(&addr, 0, sizeof(addr));
-	addr.sin_family = AF_INET;
-	addr.sin_port = htons(cb->cb_port);
-	addr.sin_addr.s_addr = htonl(cb->cb_addr);
-
 	/* Create RPC client */
 	client = rpc_create(&args);
 	if (IS_ERR(client)) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index bfc14d879ea1..96a742308cee 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -897,76 +897,6 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 	return NULL;
 }
 
-/* a helper function for parse_callback */
-static int
-parse_octet(unsigned int *lenp, char **addrp)
-{
-	unsigned int len = *lenp;
-	char *p = *addrp;
-	int n = -1;
-	char c;
-
-	for (;;) {
-		if (!len)
-			break;
-		len--;
-		c = *p++;
-		if (c == '.')
-			break;
-		if ((c < '0') || (c > '9')) {
-			n = -1;
-			break;
-		}
-		if (n < 0)
-			n = 0;
-		n = (n * 10) + (c - '0');
-		if (n > 255) {
-			n = -1;
-			break;
-		}
-	}
-	*lenp = len;
-	*addrp = p;
-	return n;
-}
-
-/* parse and set the setclientid ipv4 callback address */
-static int
-parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
-{
-	int temp = 0;
-	u32 cbaddr = 0;
-	u16 cbport = 0;
-	u32 addrlen = addr_len;
-	char *addr = addr_val;
-	int i, shift;
-
-	/* ipaddress */
-	shift = 24;
-	for(i = 4; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbaddr |= (temp << shift);
-		if (shift > 0)
-		shift -= 8;
-	}
-	*cbaddrp = cbaddr;
-
-	/* port */
-	shift = 8;
-	for(i = 2; i > 0  ; i--) {
-		if ((temp = parse_octet(&addrlen, &addr)) < 0) {
-			return 0;
-		}
-		cbport |= (temp << shift);
-		if (shift > 0)
-			shift -= 8;
-	}
-	*cbportp = cbport;
-	return 1;
-}
-
 static void
 gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 {
@@ -976,14 +906,21 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
 		goto out_err;
 
-	if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
-	                 &cb->cb_addr, &cb->cb_port)))
+	cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val,
+					    se->se_callback_addr_len,
+					    (struct sockaddr *) &cb->cb_addr,
+					    sizeof(cb->cb_addr));
+
+	if (!cb->cb_addrlen || cb->cb_addr.ss_family != AF_INET)
 		goto out_err;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
 	return;
 out_err:
+	cb->cb_addr.ss_family = AF_UNSPEC;
+	cb->cb_addrlen = 0;
 	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
 		"will not receive delegations\n",
 		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 3510ddd4be49..fb0c404c7c5c 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -81,8 +81,8 @@ struct nfs4_delegation {
 /* client delegation callback info */
 struct nfs4_cb_conn {
 	/* SETCLIENTID info */
-	u32                     cb_addr;
-	unsigned short          cb_port;
+	struct sockaddr_storage	cb_addr;
+	size_t			cb_addrlen;
 	u32                     cb_prog;
 	u32			cb_minorversion;
 	u32                     cb_ident;	/* minorversion 0 only */
-- 
cgit v1.2.3


From fbf4665f41b02e757ab9d9198df65e319388e728 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 14 Aug 2009 12:57:59 -0400
Subject: nfsd: populate sin6_scope_id on callback address with scopeid from
 rq_addr on SETCLIENTID call

When a SETCLIENTID call comes in, one of the args given is the svc_rqst.
This struct contains an rq_addr field which holds the address that sent
the call. If this is an IPv6 address, then we can use the sin6_scope_id
field in this address to populate the sin6_scope_id field in the
callback address.

AFAICT, the rq_addr.sin6_scope_id is non-zero if and only if the client
mounted the server's link-local address.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Acked-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c         |  7 +++++--
 include/linux/sunrpc/clnt.h | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9ec0ca1ef4ea..d2a052480908 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -898,7 +898,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval,
 }
 
 static void
-gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	unsigned short expected_family;
@@ -921,6 +921,9 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
 	if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family)
 		goto out_err;
 
+	if (cb->cb_addr.ss_family == AF_INET6)
+		((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid;
+
 	cb->cb_minorversion = 0;
 	cb->cb_prog = se->se_callback_prog;
 	cb->cb_ident = se->se_callback_ident;
@@ -1621,7 +1624,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	copy_cred(&new->cl_cred, &rqstp->rq_cred);
 	gen_confirm(new);
-	gen_callback(new, setclid);
+	gen_callback(new, setclid, rpc_get_scope_id(sa));
 	add_to_unconfirmed(new, strhashval);
 	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
 	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 044f531aee70..3d025588e56e 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -286,5 +286,20 @@ static inline bool rpc_copy_addr(struct sockaddr *dst,
 	return false;
 }
 
+/**
+ * rpc_get_scope_id - return scopeid for a given sockaddr
+ * @sa: sockaddr to get scopeid from
+ *
+ * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if
+ * not an AF_INET6 address.
+ */
+static inline u32 rpc_get_scope_id(const struct sockaddr *sa)
+{
+	if (sa->sa_family != AF_INET6)
+		return 0;
+
+	return ((struct sockaddr_in6 *) sa)->sin6_scope_id;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
-- 
cgit v1.2.3


From f4b0373b26567cafd421d91101852ed7a34e9e94 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 21 Aug 2009 09:26:15 -0700
Subject: Make bitmask 'and' operators return a result code

When 'and'ing two bitmasks (where 'andnot' is a variation on it), some
cases want to know whether the result is the empty set or not.  In
particular, the TLB IPI sending code wants to do cpumask operations and
determine if there are any CPU's left in the final set.

So this just makes the bitmask (and cpumask) functions return a boolean
for whether the result has any bits set.

Cc: stable@kernel.org (2.6.30, needed by TLB shootdown fix)
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h  | 18 ++++++++----------
 include/linux/cpumask.h | 20 ++++++++++----------
 lib/bitmap.c            | 12 ++++++++----
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 2878811c6134..756d78b8c1c5 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -94,13 +94,13 @@ extern void __bitmap_shift_right(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
 extern void __bitmap_shift_left(unsigned long *dst,
                         const unsigned long *src, int shift, int bits);
-extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
-extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
 extern int __bitmap_intersects(const unsigned long *bitmap1,
 			const unsigned long *bitmap2, int bits);
@@ -171,13 +171,12 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 	}
 }
 
-static inline void bitmap_and(unsigned long *dst, const unsigned long *src1,
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = *src1 & *src2;
-	else
-		__bitmap_and(dst, src1, src2, nbits);
+		return (*dst = *src1 & *src2) != 0;
+	return __bitmap_and(dst, src1, src2, nbits);
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
@@ -198,13 +197,12 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
 		__bitmap_xor(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1,
+static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = *src1 & ~(*src2);
-	else
-		__bitmap_andnot(dst, src1, src2, nbits);
+		return (*dst = *src1 & ~(*src2)) != 0;
+	return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
 static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c5ac87ca7bc6..796df12091b7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -43,10 +43,10 @@
  * int cpu_isset(cpu, mask)		true iff bit 'cpu' set in mask
  * int cpu_test_and_set(cpu, mask)	test and set bit 'cpu' in mask
  *
- * void cpus_and(dst, src1, src2)	dst = src1 & src2  [intersection]
+ * int cpus_and(dst, src1, src2)	dst = src1 & src2  [intersection]
  * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
  * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
- * void cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
+ * int cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
  * void cpus_complement(dst, src)	dst = ~src
  *
  * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
@@ -179,10 +179,10 @@ static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
 }
 
 #define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
 					const cpumask_t *src2p, int nbits)
 {
-	bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+	return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
 }
 
 #define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
@@ -201,10 +201,10 @@ static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
 
 #define cpus_andnot(dst, src1, src2) \
 				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
 					const cpumask_t *src2p, int nbits)
 {
-	bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
 }
 
 #define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
@@ -738,11 +738,11 @@ static inline void cpumask_clear(struct cpumask *dstp)
  * @src1p: the first input
  * @src2p: the second input
  */
-static inline void cpumask_and(struct cpumask *dstp,
+static inline int cpumask_and(struct cpumask *dstp,
 			       const struct cpumask *src1p,
 			       const struct cpumask *src2p)
 {
-	bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
+	return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p),
 				       cpumask_bits(src2p), nr_cpumask_bits);
 }
 
@@ -779,11 +779,11 @@ static inline void cpumask_xor(struct cpumask *dstp,
  * @src1p: the first input
  * @src2p: the second input
  */
-static inline void cpumask_andnot(struct cpumask *dstp,
+static inline int cpumask_andnot(struct cpumask *dstp,
 				  const struct cpumask *src1p,
 				  const struct cpumask *src2p)
 {
-	bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
+	return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p),
 					  cpumask_bits(src2p), nr_cpumask_bits);
 }
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 35a1f7ff4149..702565821c99 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst,
 }
 EXPORT_SYMBOL(__bitmap_shift_left);
 
-void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
 
@@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
 }
 EXPORT_SYMBOL(__bitmap_xor);
 
-void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 				const unsigned long *bitmap2, int bits)
 {
 	int k;
 	int nr = BITS_TO_LONGS(bits);
+	unsigned long result = 0;
 
 	for (k = 0; k < nr; k++)
-		dst[k] = bitmap1[k] & ~bitmap2[k];
+		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
 
-- 
cgit v1.2.3


From da15cfdae03351c689736f8d142618592e3cebc3 Mon Sep 17 00:00:00 2001
From: john stultz <johnstul@us.ibm.com>
Date: Wed, 19 Aug 2009 19:13:34 -0700
Subject: time: Introduce CLOCK_REALTIME_COARSE

After talking with some application writers who want very fast, but not
fine-grained timestamps, I decided to try to implement new clock_ids
to clock_gettime(): CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE
which returns the time at the last tick. This is very fast as we don't
have to access any hardware (which can be very painful if you're using
something like the acpi_pm clocksource), and we can even use the vdso
clock_gettime() method to avoid the syscall. The only trade off is you
only get low-res tick grained time resolution.

This isn't a new idea, I know Ingo has a patch in the -rt tree that made
the vsyscall gettimeofday() return coarse grained time when the
vsyscall64 sysctrl was set to 2. However this affects all applications
on a system.

With this method, applications can choose the proper speed/granularity
trade-off for themselves.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: nikolag@ca.ibm.com
Cc: Darren Hart <dvhltc@us.ibm.com>
Cc: arjan@infradead.org
Cc: jonathan@jonmasters.org
LKML-Reference: <1250734414.6897.5.camel@localhost.localdomain>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/vgtod.h   |  1 +
 arch/x86/kernel/vsyscall_64.c  |  1 +
 arch/x86/vdso/vclock_gettime.c | 39 ++++++++++++++++++++++++++++++++++++---
 include/linux/time.h           |  4 ++++
 kernel/posix-timers.c          | 35 +++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c      | 21 +++++++++++++++++++++
 6 files changed, 98 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index dc27a69e5d2a..3d61e204826f 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -21,6 +21,7 @@ struct vsyscall_gtod_data {
 		u32	shift;
 	} clock;
 	struct timespec wall_to_monotonic;
+	struct timespec wall_time_coarse;
 };
 extern struct vsyscall_gtod_data __vsyscall_gtod_data
 __section_vsyscall_gtod_data;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 25ee06a80aad..cf53a78e2dcf 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6a40b78b46aa..ee55754cc3c5 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts)
 	return 0;
 }
 
+notrace static noinline int do_realtime_coarse(struct timespec *ts)
+{
+	unsigned long seq;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
+		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	return 0;
+}
+
+notrace static noinline int do_monotonic_coarse(struct timespec *ts)
+{
+	unsigned long seq, ns, secs;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		secs = gtod->wall_time_coarse.tv_sec;
+		ns = gtod->wall_time_coarse.tv_nsec;
+		secs += gtod->wall_to_monotonic.tv_sec;
+		ns += gtod->wall_to_monotonic.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	vset_normalized_timespec(ts, secs, ns);
+	return 0;
+}
+
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-	if (likely(gtod->sysctl_enabled && gtod->clock.vread))
+	if (likely(gtod->sysctl_enabled))
 		switch (clock) {
 		case CLOCK_REALTIME:
-			return do_realtime(ts);
+			if (likely(gtod->clock.vread))
+				return do_realtime(ts);
+			break;
 		case CLOCK_MONOTONIC:
-			return do_monotonic(ts);
+			if (likely(gtod->clock.vread))
+				return do_monotonic(ts);
+			break;
+		case CLOCK_REALTIME_COARSE:
+			return do_realtime_coarse(ts);
+		case CLOCK_MONOTONIC_COARSE:
+			return do_monotonic_coarse(ts);
 		}
 	return vdso_fallback_gettime(clock, ts);
 }
diff --git a/include/linux/time.h b/include/linux/time.h
index f505988398e6..256232f7e5e6 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -110,6 +110,8 @@ extern int timekeeping_suspended;
 
 unsigned long get_seconds(void);
 struct timespec current_kernel_time(void);
+struct timespec __current_kernel_time(void); /* does not hold xtime_lock */
+struct timespec get_monotonic_coarse(void);
 
 #define CURRENT_TIME		(current_kernel_time())
 #define CURRENT_TIME_SEC	((struct timespec) { get_seconds(), 0 })
@@ -243,6 +245,8 @@ struct itimerval {
 #define CLOCK_PROCESS_CPUTIME_ID	2
 #define CLOCK_THREAD_CPUTIME_ID		3
 #define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
 
 /*
  * The IDs of various hardware clocks:
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a9..495440779ce3 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -242,6 +242,25 @@ static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec *tp)
 	return 0;
 }
 
+
+static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec *tp)
+{
+	*tp = current_kernel_time();
+	return 0;
+}
+
+static int posix_get_monotonic_coarse(clockid_t which_clock,
+						struct timespec *tp)
+{
+	*tp = get_monotonic_coarse();
+	return 0;
+}
+
+int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+{
+	*tp = ktime_to_timespec(KTIME_LOW_RES);
+	return 0;
+}
 /*
  * Initialize everything, well, just everything in Posix clocks/timers ;)
  */
@@ -262,10 +281,26 @@ static __init int init_posix_timers(void)
 		.timer_create = no_timer_create,
 		.nsleep = no_nsleep,
 	};
+	struct k_clock clock_realtime_coarse = {
+		.clock_getres = posix_get_coarse_res,
+		.clock_get = posix_get_realtime_coarse,
+		.clock_set = do_posix_clock_nosettime,
+		.timer_create = no_timer_create,
+		.nsleep = no_nsleep,
+	};
+	struct k_clock clock_monotonic_coarse = {
+		.clock_getres = posix_get_coarse_res,
+		.clock_get = posix_get_monotonic_coarse,
+		.clock_set = do_posix_clock_nosettime,
+		.timer_create = no_timer_create,
+		.nsleep = no_nsleep,
+	};
 
 	register_posix_clock(CLOCK_REALTIME, &clock_realtime);
 	register_posix_clock(CLOCK_MONOTONIC, &clock_monotonic);
 	register_posix_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
+	register_posix_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
+	register_posix_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
 
 	posix_timers_cache = kmem_cache_create("posix_timers_cache",
 					sizeof (struct k_itimer), 0, SLAB_PANIC,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 15e06defca55..03cbeb34d141 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -847,6 +847,10 @@ unsigned long get_seconds(void)
 }
 EXPORT_SYMBOL(get_seconds);
 
+struct timespec __current_kernel_time(void)
+{
+	return xtime_cache;
+}
 
 struct timespec current_kernel_time(void)
 {
@@ -862,3 +866,20 @@ struct timespec current_kernel_time(void)
 	return now;
 }
 EXPORT_SYMBOL(current_kernel_time);
+
+struct timespec get_monotonic_coarse(void)
+{
+	struct timespec now, mono;
+	unsigned long seq;
+
+	do {
+		seq = read_seqbegin(&xtime_lock);
+
+		now = xtime_cache;
+		mono = wall_to_monotonic;
+	} while (read_seqretry(&xtime_lock, seq));
+
+	set_normalized_timespec(&now, now.tv_sec + mono.tv_sec,
+				now.tv_nsec + mono.tv_nsec);
+	return now;
+}
-- 
cgit v1.2.3


From 8b5a10fc6fd02289ea03480f93382b1a99006142 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Wed, 19 Aug 2009 08:40:48 +0100
Subject: x86: properly annotate alternatives.c

Some of the NOPs tables aren't used on 64-bits, quite some code and
data is needed post-init for module loading only, and a couple of
functions aren't used outside that file (i.e. can be static, and don't
need to be exported).

The change to __INITDATA/__INITRODATA is needed to avoid an assembler
warning.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
LKML-Reference: <4A8BC8A00200007800010823@vpn.id2.novell.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/alternative.h |  7 -----
 arch/x86/kernel/alternative.c      | 56 ++++++++++++++++++++++----------------
 include/linux/init.h               | 12 ++++++--
 3 files changed, 42 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1a37bcdc8606..c240efc74e00 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -73,8 +73,6 @@ static inline void alternatives_smp_module_del(struct module *mod) {}
 static inline void alternatives_smp_switch(int smp) {}
 #endif	/* CONFIG_SMP */
 
-const unsigned char *const *find_nop_table(void);
-
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)			\
 									\
@@ -144,8 +142,6 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
 #define __parainstructions_end	NULL
 #endif
 
-extern void add_nops(void *insns, unsigned int len);
-
 /*
  * Clear and restore the kernel write-protection flag on the local CPU.
  * Allows the kernel to edit read-only pages.
@@ -161,10 +157,7 @@ extern void add_nops(void *insns, unsigned int len);
  * Intel's errata.
  * On the local CPU you need to be protected again NMI or MCE handlers seeing an
  * inconsistent instruction while you patch.
- * The _early version expects the memory to already be RW.
  */
-
 extern void *text_poke(void *addr, const void *opcode, size_t len);
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f57658702571..486935143e02 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2,6 +2,7 @@
 #include <linux/sched.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
+#include <linux/stringify.h>
 #include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly);
 #define smp_alt_once 1
 #endif
 
-static int debug_alternative;
+static int __initdata_or_module debug_alternative;
 
 static int __init debug_alt(char *str)
 {
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str)
 __setup("noreplace-smp", setup_noreplace_smp);
 
 #ifdef CONFIG_PARAVIRT
-static int noreplace_paravirt = 0;
+static int __initdata_or_module noreplace_paravirt = 0;
 
 static int __init setup_noreplace_paravirt(char *str)
 {
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #define DPRINTK(fmt, args...) if (debug_alternative) \
 	printk(KERN_DEBUG fmt, args)
 
-#ifdef GENERIC_NOP1
+#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
 /* Use inline assembly to define this because the nops are defined
    as inline assembly strings in the include files and we cannot
    get them easily into strings. */
-asm("\t.section .rodata, \"a\"\nintelnops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
 	GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
 	GENERIC_NOP7 GENERIC_NOP8
     "\t.previous");
 extern const unsigned char intelnops[];
-static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+intel_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	intelnops,
 	intelnops + 1,
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
 #endif
 
 #ifdef K8_NOP1
-asm("\t.section .rodata, \"a\"\nk8nops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
 	K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
 	K8_NOP7 K8_NOP8
     "\t.previous");
 extern const unsigned char k8nops[];
-static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+k8_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	k8nops,
 	k8nops + 1,
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
 };
 #endif
 
-#ifdef K7_NOP1
-asm("\t.section .rodata, \"a\"\nk7nops: "
+#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
 	K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
 	K7_NOP7 K7_NOP8
     "\t.previous");
 extern const unsigned char k7nops[];
-static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+k7_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	k7nops,
 	k7nops + 1,
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
 #endif
 
 #ifdef P6_NOP1
-asm("\t.section .rodata, \"a\"\np6nops: "
+asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
 	P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
 	P6_NOP7 P6_NOP8
     "\t.previous");
 extern const unsigned char p6nops[];
-static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
+static const unsigned char *const __initconst_or_module
+p6_nops[ASM_NOP_MAX+1] = {
 	NULL,
 	p6nops,
 	p6nops + 1,
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
 #ifdef CONFIG_X86_64
 
 extern char __vsyscall_0;
-const unsigned char *const *find_nop_table(void)
+static const unsigned char *const *__init_or_module find_nop_table(void)
 {
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
 	    boot_cpu_has(X86_FEATURE_NOPL))
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void)
 
 #else /* CONFIG_X86_64 */
 
-const unsigned char *const *find_nop_table(void)
+static const unsigned char *const *__init_or_module find_nop_table(void)
 {
 	if (boot_cpu_has(X86_FEATURE_K8))
 		return k8_nops;
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void)
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-void add_nops(void *insns, unsigned int len)
+static void __init_or_module add_nops(void *insns, unsigned int len)
 {
 	const unsigned char *const *noptable = find_nop_table();
 
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len)
 		len -= noplen;
 	}
 }
-EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
+static void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 /* Replace instructions with better alternatives for this CPU type.
    This runs before SMP is initialized to avoid SMP problems with
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
    APs have less capabilities than the boot processor are not handled.
    Tough. Make sure you disable such features by hand. */
 
-void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+void __init_or_module apply_alternatives(struct alt_instr *start,
+					 struct alt_instr *end)
 {
 	struct alt_instr *a;
 	char insnbuf[MAX_PATCH_LEN];
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules);
 static DEFINE_MUTEX(smp_alt);
 static int smp_mode = 1;	/* protected by smp_alt */
 
-void alternatives_smp_module_add(struct module *mod, char *name,
-				 void *locks, void *locks_end,
-				 void *text,  void *text_end)
+void __init_or_module alternatives_smp_module_add(struct module *mod,
+						  char *name,
+						  void *locks, void *locks_end,
+						  void *text,  void *text_end)
 {
 	struct smp_alt_module *smp;
 
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name,
 	mutex_unlock(&smp_alt);
 }
 
-void alternatives_smp_module_del(struct module *mod)
+void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
 
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp)
 #endif
 
 #ifdef CONFIG_PARAVIRT
-void apply_paravirt(struct paravirt_patch_site *start,
-		    struct paravirt_patch_site *end)
+void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
+				     struct paravirt_patch_site *end)
 {
 	struct paravirt_patch_site *p;
 	char insnbuf[MAX_PATCH_LEN];
@@ -485,7 +492,8 @@ void __init alternative_instructions(void)
  * instructions. And on the local CPU you need to be protected again NMI or MCE
  * handlers seeing an inconsistent instruction while you patch.
  */
-void *text_poke_early(void *addr, const void *opcode, size_t len)
+static void *__init_or_module text_poke_early(void *addr, const void *opcode,
+					      size_t len)
 {
 	unsigned long flags;
 	local_irq_save(flags);
diff --git a/include/linux/init.h b/include/linux/init.h
index 13b633ed695e..400adbb45414 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -103,8 +103,8 @@
 #define __INIT		.section	".init.text","ax"
 #define __FINIT		.previous
 
-#define __INITDATA	.section	".init.data","aw"
-#define __INITRODATA	.section	".init.rodata","a"
+#define __INITDATA	.section	".init.data","aw",%progbits
+#define __INITRODATA	.section	".init.rodata","a",%progbits
 #define __FINITDATA	.previous
 
 #define __DEVINIT        .section	".devinit.text", "ax"
@@ -305,9 +305,17 @@ void __init parse_early_options(char *cmdline);
 #ifdef CONFIG_MODULES
 #define __init_or_module
 #define __initdata_or_module
+#define __initconst_or_module
+#define __INIT_OR_MODULE	.text
+#define __INITDATA_OR_MODULE	.data
+#define __INITRODATA_OR_MODULE	.section ".rodata","a",%progbits
 #else
 #define __init_or_module __init
 #define __initdata_or_module __initdata
+#define __initconst_or_module __initconst
+#define __INIT_OR_MODULE __INIT
+#define __INITDATA_OR_MODULE __INITDATA
+#define __INITRODATA_OR_MODULE __INITRODATA
 #endif /*CONFIG_MODULES*/
 
 /* Functions marked as __devexit may be discarded at kernel link time, depending
-- 
cgit v1.2.3


From b560d8ad8583803978aaaeba50ef29dc8e97a610 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 21 Aug 2009 22:08:51 -0700
Subject: rcu: Expunge lingering references to CONFIG_CLASSIC_RCU, optimize on
 !SMP

A couple of references to CONFIG_CLASSIC_RCU have survived.
Although these are harmless, it is past time for them to go.
The one in hardirq.h is strictly a readability problem.

The two in pagemap.h appear to disable a !SMP performance
optimization (which this patch re-enables).

This does raise the issue as to whether pagemap.h should really
be referring to the CPU implementation.  Long term, I intend to
make the RCU implementation driven by CONFIG_PREEMPT, at which
point these should change from defined(CONFIG_TREE_RCU) to
!defined(CONFIG_PREEMPT). In the meantime, is there something
else that could be done in pagemap.h?

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <20090822050851.GA8414@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h | 4 ++--
 include/linux/pagemap.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 8246c697863d..330cb31bb496 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -132,7 +132,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
-#if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU)
+#if defined(CONFIG_NO_HZ)
 extern void rcu_irq_enter(void);
 extern void rcu_irq_exit(void);
 extern void rcu_nmi_enter(void);
@@ -142,7 +142,7 @@ extern void rcu_nmi_exit(void);
 # define rcu_irq_exit() do { } while (0)
 # define rcu_nmi_enter() do { } while (0)
 # define rcu_nmi_exit() do { } while (0)
-#endif /* #if defined(CONFIG_NO_HZ) && !defined(CONFIG_CLASSIC_RCU) */
+#endif /* #if defined(CONFIG_NO_HZ) */
 
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index aec3252afcf5..ed5d7501e181 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -132,7 +132,7 @@ static inline int page_cache_get_speculative(struct page *page)
 {
 	VM_BUG_ON(in_interrupt());
 
-#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
+#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
 # ifdef CONFIG_PREEMPT
 	VM_BUG_ON(!in_atomic());
 # endif
@@ -170,7 +170,7 @@ static inline int page_cache_add_speculative(struct page *page, int count)
 {
 	VM_BUG_ON(in_interrupt());
 
-#if !defined(CONFIG_SMP) && defined(CONFIG_CLASSIC_RCU)
+#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
 # ifdef CONFIG_PREEMPT
 	VM_BUG_ON(!in_atomic());
 # endif
-- 
cgit v1.2.3


From 9eba32b86d17ef87131fa0bce43c614904ab5781 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 22 Aug 2009 14:19:26 -0700
Subject: Bluetooth: Add extra device reference counting for connections

The device model itself has no real usable reference counting at the
moment and this causes problems if parents are deleted before their
children. The device model itself handles the memory details of this
correctly, but the uevent order is not consistent. This causes various
problems for systems like HAL or even X.

So until device_put() does a proper cleanup, the device for Bluetooth
connection will be protected with an extra reference counting to ensure
the correct order of uevents when connections are terminated.

This is not an automatic feature. Higher Bluetooth layers like HIDP or
BNEP should grab this new reference to ensure that their uevents are
send before the ones from the parent device.

Based on a report by Brian Rogers <brian@xyzw.org>

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  4 ++++
 net/bluetooth/hci_conn.c         | 17 ++++++++++++++++-
 net/bluetooth/hci_event.c        |  2 ++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c4ca4228b083..25b8a0345a6a 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -187,6 +187,7 @@ struct hci_conn {
 	struct work_struct work_del;
 
 	struct device	dev;
+	atomic_t	devref;
 
 	struct hci_dev	*hdev;
 	void		*l2cap_data;
@@ -339,6 +340,9 @@ int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
 void hci_conn_enter_active_mode(struct hci_conn *conn);
 void hci_conn_enter_sniff_mode(struct hci_conn *conn);
 
+void hci_conn_hold_device(struct hci_conn *conn);
+void hci_conn_put_device(struct hci_conn *conn);
+
 static inline void hci_conn_hold(struct hci_conn *conn)
 {
 	atomic_inc(&conn->refcnt);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index fa47d5d84f5c..a9750984f772 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -246,6 +246,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
 	if (hdev->notify)
 		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
 
+	atomic_set(&conn->devref, 0);
+
 	hci_conn_init_sysfs(conn);
 
 	tasklet_enable(&hdev->tx_task);
@@ -288,7 +290,7 @@ int hci_conn_del(struct hci_conn *conn)
 
 	skb_queue_purge(&conn->data_q);
 
-	hci_conn_del_sysfs(conn);
+	hci_conn_put_device(conn);
 
 	hci_dev_put(hdev);
 
@@ -583,6 +585,19 @@ void hci_conn_check_pending(struct hci_dev *hdev)
 	hci_dev_unlock(hdev);
 }
 
+void hci_conn_hold_device(struct hci_conn *conn)
+{
+	atomic_inc(&conn->devref);
+}
+EXPORT_SYMBOL(hci_conn_hold_device);
+
+void hci_conn_put_device(struct hci_conn *conn)
+{
+	if (atomic_dec_and_test(&conn->devref))
+		hci_conn_del_sysfs(conn);
+}
+EXPORT_SYMBOL(hci_conn_put_device);
+
 int hci_get_conn_list(void __user *arg)
 {
 	struct hci_conn_list_req req, *cl;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 184ba0a88ec0..e99fe385fba2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -887,6 +887,7 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 		} else
 			conn->state = BT_CONNECTED;
 
+		hci_conn_hold_device(conn);
 		hci_conn_add_sysfs(conn);
 
 		if (test_bit(HCI_AUTH, &hdev->flags))
@@ -1693,6 +1694,7 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
 
+		hci_conn_hold_device(conn);
 		hci_conn_add_sysfs(conn);
 		break;
 
-- 
cgit v1.2.3


From a6a67efd7088702fdbbb780c5a3f8e1a74e77b63 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 26 Jul 2009 08:18:19 +0000
Subject: Bluetooth: Convert hdev->req_lock to a mutex

hdev->req_lock is used as mutex so make it a mutex.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 6 +++---
 net/bluetooth/hci_core.c         | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 25b8a0345a6a..7b640aeddb64 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -117,7 +117,7 @@ struct hci_dev {
 	struct sk_buff		*sent_cmd;
 	struct sk_buff		*reassembly[3];
 
-	struct semaphore	req_lock;
+	struct mutex		req_lock;
 	wait_queue_head_t	req_wait_q;
 	__u32			req_status;
 	__u32			req_result;
@@ -704,8 +704,8 @@ struct hci_sec_filter {
 #define HCI_REQ_PEND	  1
 #define HCI_REQ_CANCELED  2
 
-#define hci_req_lock(d)		down(&d->req_lock)
-#define hci_req_unlock(d)	up(&d->req_lock)
+#define hci_req_lock(d)		mutex_lock(&d->req_lock)
+#define hci_req_unlock(d)	mutex_unlock(&d->req_lock)
 
 void hci_req_complete(struct hci_dev *hdev, int result);
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 406ad07cdea1..e1da8f68759c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -911,7 +911,7 @@ int hci_register_dev(struct hci_dev *hdev)
 		hdev->reassembly[i] = NULL;
 
 	init_waitqueue_head(&hdev->req_wait_q);
-	init_MUTEX(&hdev->req_lock);
+	mutex_init(&hdev->req_lock);
 
 	inquiry_cache_init(hdev);
 
-- 
cgit v1.2.3


From c6b03cf986eab00e20d0dbc852b233bb83472138 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 2 May 2009 22:31:10 -0700
Subject: Bluetooth: Allow setting of L2CAP ERTM via socket option

To enable Enhanced Retransmission mode it needs to be set via a socket
option. A different mode can be set on a socket, but on listen() and
connect() the mode is checked and ERTM is only allowed if it is enabled
via the module parameter.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  8 +++++---
 net/bluetooth/l2cap.c         | 37 +++++++++++++++++++++++++++++++++----
 2 files changed, 38 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index e919fca1072a..06b072fd6d54 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -190,7 +190,7 @@ struct l2cap_conf_rfc {
 #define L2CAP_MODE_RETRANS	0x01
 #define L2CAP_MODE_FLOWCTL	0x02
 #define L2CAP_MODE_ERTM		0x03
-#define L2CAP_MODE_STREAM	0x04
+#define L2CAP_MODE_STREAMING	0x04
 
 struct l2cap_disconn_req {
 	__le16     dcid;
@@ -271,9 +271,11 @@ struct l2cap_pinfo {
 	__u16		imtu;
 	__u16		omtu;
 	__u16		flush_to;
-	__u8            sec_level;
+	__u8		mode;
+	__u8		fcs;
+	__u8		sec_level;
 	__u8		role_switch;
-	__u8            force_reliable;
+	__u8		force_reliable;
 
 	__u8		conf_req[64];
 	__u8		conf_len;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 810a3c1a4188..8a59e57d9df1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -717,12 +717,16 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 
 		pi->imtu = l2cap_pi(parent)->imtu;
 		pi->omtu = l2cap_pi(parent)->omtu;
+		pi->mode = l2cap_pi(parent)->mode;
+		pi->fcs  = l2cap_pi(parent)->fcs;
 		pi->sec_level = l2cap_pi(parent)->sec_level;
 		pi->role_switch = l2cap_pi(parent)->role_switch;
 		pi->force_reliable = l2cap_pi(parent)->force_reliable;
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
+		pi->mode = L2CAP_MODE_BASIC;
+		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -958,6 +962,18 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 		goto done;
 	}
 
+	switch (l2cap_pi(sk)->mode) {
+	case L2CAP_MODE_BASIC:
+		break;
+	case L2CAP_MODE_ERTM:
+		if (enable_ertm)
+			break;
+		/* fall through */
+	default:
+		err = -ENOTSUPP;
+		goto done;
+	}
+
 	switch (sk->sk_state) {
 	case BT_CONNECT:
 	case BT_CONNECT2:
@@ -1009,6 +1025,18 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 		goto done;
 	}
 
+	switch (l2cap_pi(sk)->mode) {
+	case L2CAP_MODE_BASIC:
+		break;
+	case L2CAP_MODE_ERTM:
+		if (enable_ertm)
+			break;
+		/* fall through */
+	default:
+		err = -ENOTSUPP;
+		goto done;
+	}
+
 	if (!l2cap_pi(sk)->psm) {
 		bdaddr_t *src = &bt_sk(sk)->src;
 		u16 psm;
@@ -1259,7 +1287,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.imtu     = l2cap_pi(sk)->imtu;
 		opts.omtu     = l2cap_pi(sk)->omtu;
 		opts.flush_to = l2cap_pi(sk)->flush_to;
-		opts.mode     = L2CAP_MODE_BASIC;
+		opts.mode     = l2cap_pi(sk)->mode;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
 		if (copy_from_user((char *) &opts, optval, len)) {
@@ -1267,8 +1295,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		}
 
-		l2cap_pi(sk)->imtu  = opts.imtu;
-		l2cap_pi(sk)->omtu  = opts.omtu;
+		l2cap_pi(sk)->imtu = opts.imtu;
+		l2cap_pi(sk)->omtu = opts.omtu;
+		l2cap_pi(sk)->mode = opts.mode;
 		break;
 
 	case L2CAP_LM:
@@ -1381,7 +1410,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.imtu     = l2cap_pi(sk)->imtu;
 		opts.omtu     = l2cap_pi(sk)->omtu;
 		opts.flush_to = l2cap_pi(sk)->flush_to;
-		opts.mode     = L2CAP_MODE_BASIC;
+		opts.mode     = l2cap_pi(sk)->mode;
 
 		len = min_t(unsigned int, len, sizeof(opts));
 		if (copy_to_user(optval, (char *) &opts, len))
-- 
cgit v1.2.3


From f2fcfcd670257236ebf2088bbdf26f6a8ef459fe Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Sat, 4 Jul 2009 15:06:24 -0300
Subject: Bluetooth: Add configuration support for ERTM and Streaming mode

Add support to config_req and config_rsp to configure ERTM and Streaming
mode. If the remote device specifies ERTM or Streaming mode, then the
same mode is proposed. Otherwise ERTM or Basic mode is used. And in case
of a state 2 device, the remote device should propose the same mode. If
not, then the channel gets disconnected.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  28 +++--
 net/bluetooth/l2cap.c         | 262 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 255 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 06b072fd6d54..6fc76986d70a 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -27,8 +27,9 @@
 
 /* L2CAP defaults */
 #define L2CAP_DEFAULT_MTU		672
+#define L2CAP_DEFAULT_MIN_MTU		48
 #define L2CAP_DEFAULT_FLUSH_TO		0xffff
-#define L2CAP_DEFAULT_RX_WINDOW		1
+#define L2CAP_DEFAULT_TX_WINDOW		1
 #define L2CAP_DEFAULT_MAX_RECEIVE	1
 #define L2CAP_DEFAULT_RETRANS_TO	300    /* 300 milliseconds */
 #define L2CAP_DEFAULT_MONITOR_TO	1000   /* 1 second */
@@ -272,6 +273,9 @@ struct l2cap_pinfo {
 	__u16		omtu;
 	__u16		flush_to;
 	__u8		mode;
+	__u8		num_conf_req;
+	__u8		num_conf_rsp;
+
 	__u8		fcs;
 	__u8		sec_level;
 	__u8		role_switch;
@@ -280,10 +284,15 @@ struct l2cap_pinfo {
 	__u8		conf_req[64];
 	__u8		conf_len;
 	__u8		conf_state;
-	__u8		conf_retry;
 
 	__u8		ident;
 
+	__u8		remote_tx_win;
+	__u8		remote_max_tx;
+	__u16		retrans_timeout;
+	__u16		monitor_timeout;
+	__u16		max_pdu_size;
+
 	__le16		sport;
 
 	struct l2cap_conn	*conn;
@@ -291,12 +300,17 @@ struct l2cap_pinfo {
 	struct sock		*prev_c;
 };
 
-#define L2CAP_CONF_REQ_SENT	0x01
-#define L2CAP_CONF_INPUT_DONE	0x02
-#define L2CAP_CONF_OUTPUT_DONE	0x04
-#define L2CAP_CONF_CONNECT_PEND	0x80
+#define L2CAP_CONF_REQ_SENT       0x01
+#define L2CAP_CONF_INPUT_DONE     0x02
+#define L2CAP_CONF_OUTPUT_DONE    0x04
+#define L2CAP_CONF_MTU_DONE       0x08
+#define L2CAP_CONF_MODE_DONE      0x10
+#define L2CAP_CONF_CONNECT_PEND   0x20
+#define L2CAP_CONF_STATE2_DEVICE  0x80
+
+#define L2CAP_CONF_MAX_CONF_REQ 2
+#define L2CAP_CONF_MAX_CONF_RSP 2
 
-#define L2CAP_CONF_MAX_RETRIES	2
 
 void l2cap_load(void);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7ce1a24735c8..af0fbf9ebfeb 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -966,6 +966,7 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 	case L2CAP_MODE_BASIC:
 		break;
 	case L2CAP_MODE_ERTM:
+	case L2CAP_MODE_STREAMING:
 		if (enable_ertm)
 			break;
 		/* fall through */
@@ -1029,6 +1030,7 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 	case L2CAP_MODE_BASIC:
 		break;
 	case L2CAP_MODE_ERTM:
+	case L2CAP_MODE_STREAMING:
 		if (enable_ertm)
 			break;
 		/* fall through */
@@ -1739,15 +1741,65 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 	*ptr += L2CAP_CONF_OPT_SIZE + len;
 }
 
+static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
+{
+	u32 local_feat_mask = l2cap_feat_mask;
+	if (enable_ertm)
+		local_feat_mask |= L2CAP_FEAT_ERTM;
+
+	switch (mode) {
+	case L2CAP_MODE_ERTM:
+		return L2CAP_FEAT_ERTM & feat_mask & local_feat_mask;
+	case L2CAP_MODE_STREAMING:
+		return L2CAP_FEAT_STREAMING & feat_mask & local_feat_mask;
+	default:
+		return 0x00;
+	}
+}
+
+static inline __u8 l2cap_select_mode(__u8 mode, __u16 remote_feat_mask)
+{
+	switch (mode) {
+	case L2CAP_MODE_STREAMING:
+	case L2CAP_MODE_ERTM:
+		if (l2cap_mode_supported(mode, remote_feat_mask))
+			return mode;
+		/* fall through */
+	default:
+		return L2CAP_MODE_BASIC;
+	}
+}
+
 static int l2cap_build_conf_req(struct sock *sk, void *data)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct l2cap_conf_req *req = data;
-	struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
+	struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_ERTM };
 	void *ptr = req->data;
 
 	BT_DBG("sk %p", sk);
 
+	if (pi->num_conf_req || pi->num_conf_rsp)
+		goto done;
+
+	switch (pi->mode) {
+	case L2CAP_MODE_STREAMING:
+	case L2CAP_MODE_ERTM:
+		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask)) {
+			struct l2cap_disconn_req req;
+			req.dcid = cpu_to_le16(pi->dcid);
+			req.scid = cpu_to_le16(pi->scid);
+			l2cap_send_cmd(pi->conn, l2cap_get_ident(pi->conn),
+					L2CAP_DISCONN_REQ, sizeof(req), &req);
+		}
+		break;
+	default:
+		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
+		break;
+	}
+
+done:
 	switch (pi->mode) {
 	case L2CAP_MODE_BASIC:
 		if (pi->imtu != L2CAP_DEFAULT_MTU)
@@ -1756,10 +1808,22 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
-		rfc.txwin_size      = L2CAP_DEFAULT_RX_WINDOW;
+		rfc.txwin_size      = L2CAP_DEFAULT_TX_WINDOW;
 		rfc.max_transmit    = L2CAP_DEFAULT_MAX_RECEIVE;
-		rfc.retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO);
-		rfc.monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO);
+		rfc.retrans_timeout = 0;
+		rfc.monitor_timeout = 0;
+		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_RX_APDU);
+
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
+					sizeof(rfc), (unsigned long) &rfc);
+		break;
+
+	case L2CAP_MODE_STREAMING:
+		rfc.mode            = L2CAP_MODE_STREAMING;
+		rfc.txwin_size      = 0;
+		rfc.max_transmit    = 0;
+		rfc.retrans_timeout = 0;
+		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_RX_APDU);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
@@ -1825,30 +1889,83 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 		}
 	}
 
+	if (pi->num_conf_rsp || pi->num_conf_req)
+		goto done;
+
+	switch (pi->mode) {
+	case L2CAP_MODE_STREAMING:
+	case L2CAP_MODE_ERTM:
+		pi->conf_state |= L2CAP_CONF_STATE2_DEVICE;
+		if (!l2cap_mode_supported(pi->mode, pi->conn->feat_mask))
+			return -ECONNREFUSED;
+		break;
+	default:
+		pi->mode = l2cap_select_mode(rfc.mode, pi->conn->feat_mask);
+		break;
+	}
+
+done:
+	if (pi->mode != rfc.mode) {
+		result = L2CAP_CONF_UNACCEPT;
+		rfc.mode = pi->mode;
+
+		if (pi->num_conf_rsp == 1)
+			return -ECONNREFUSED;
+
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
+					sizeof(rfc), (unsigned long) &rfc);
+	}
+
+
 	if (result == L2CAP_CONF_SUCCESS) {
 		/* Configure output options and let the other side know
 		 * which ones we don't like. */
 
-		if (rfc.mode == L2CAP_MODE_BASIC) {
-			if (mtu < pi->omtu)
-				result = L2CAP_CONF_UNACCEPT;
-			else {
-				pi->omtu = mtu;
-				pi->conf_state |= L2CAP_CONF_OUTPUT_DONE;
-			}
+		if (mtu < L2CAP_DEFAULT_MIN_MTU)
+			result = L2CAP_CONF_UNACCEPT;
+		else {
+			pi->omtu = mtu;
+			pi->conf_state |= L2CAP_CONF_MTU_DONE;
+		}
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu);
 
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu);
-		} else {
+		switch (rfc.mode) {
+		case L2CAP_MODE_BASIC:
+			pi->fcs = L2CAP_FCS_NONE;
+			pi->conf_state |= L2CAP_CONF_MODE_DONE;
+			break;
+
+		case L2CAP_MODE_ERTM:
+			pi->remote_tx_win = rfc.txwin_size;
+			pi->remote_max_tx = rfc.max_transmit;
+			pi->max_pdu_size = rfc.max_pdu_size;
+
+			rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
+			rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+
+			pi->conf_state |= L2CAP_CONF_MODE_DONE;
+			break;
+
+		case L2CAP_MODE_STREAMING:
+			pi->remote_tx_win = rfc.txwin_size;
+			pi->max_pdu_size = rfc.max_pdu_size;
+
+			pi->conf_state |= L2CAP_CONF_MODE_DONE;
+			break;
+
+		default:
 			result = L2CAP_CONF_UNACCEPT;
 
 			memset(&rfc, 0, sizeof(rfc));
-			rfc.mode = L2CAP_MODE_BASIC;
+			rfc.mode = pi->mode;
+		}
 
-			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
+		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
-		}
-	}
 
+		if (result == L2CAP_CONF_SUCCESS)
+			pi->conf_state |= L2CAP_CONF_OUTPUT_DONE;
+	}
 	rsp->scid   = cpu_to_le16(pi->dcid);
 	rsp->result = cpu_to_le16(result);
 	rsp->flags  = cpu_to_le16(0x0000);
@@ -1856,6 +1973,73 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 	return ptr - data;
 }
 
+static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data, u16 *result)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct l2cap_conf_req *req = data;
+	void *ptr = req->data;
+	int type, olen;
+	unsigned long val;
+	struct l2cap_conf_rfc rfc;
+
+	BT_DBG("sk %p, rsp %p, len %d, req %p", sk, rsp, len, data);
+
+	while (len >= L2CAP_CONF_OPT_SIZE) {
+		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+
+		switch (type) {
+		case L2CAP_CONF_MTU:
+			if (val < L2CAP_DEFAULT_MIN_MTU) {
+				*result = L2CAP_CONF_UNACCEPT;
+				pi->omtu = L2CAP_DEFAULT_MIN_MTU;
+			} else
+				pi->omtu = val;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, pi->omtu);
+			break;
+
+		case L2CAP_CONF_FLUSH_TO:
+			pi->flush_to = val;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO,
+							2, pi->flush_to);
+			break;
+
+		case L2CAP_CONF_RFC:
+			if (olen == sizeof(rfc))
+				memcpy(&rfc, (void *)val, olen);
+
+			if ((pi->conf_state & L2CAP_CONF_STATE2_DEVICE) &&
+							rfc.mode != pi->mode)
+				return -ECONNREFUSED;
+
+			pi->mode = rfc.mode;
+			pi->fcs = 0;
+
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
+					sizeof(rfc), (unsigned long) &rfc);
+			break;
+		}
+	}
+
+	if (*result == L2CAP_CONF_SUCCESS) {
+		switch (rfc.mode) {
+		case L2CAP_MODE_ERTM:
+			pi->remote_tx_win   = rfc.txwin_size;
+			pi->retrans_timeout = rfc.retrans_timeout;
+			pi->monitor_timeout = rfc.monitor_timeout;
+			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
+			break;
+		case L2CAP_MODE_STREAMING:
+			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
+			break;
+		}
+	}
+
+	req->dcid   = cpu_to_le16(pi->dcid);
+	req->flags  = cpu_to_le16(0x0000);
+
+	return ptr - data;
+}
+
 static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 flags)
 {
 	struct l2cap_conf_rsp *rsp = data;
@@ -2042,6 +2226,7 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
 
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
 					l2cap_build_conf_req(sk, req), req);
+		l2cap_pi(sk)->num_conf_req++;
 		break;
 
 	case L2CAP_CR_PEND:
@@ -2100,10 +2285,17 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	/* Complete config. */
 	len = l2cap_parse_conf_req(sk, rsp);
-	if (len < 0)
+	if (len < 0) {
+		struct l2cap_disconn_req req;
+		req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+		req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+		l2cap_send_cmd(conn, l2cap_get_ident(conn),
+					L2CAP_DISCONN_REQ, sizeof(req), &req);
 		goto unlock;
+	}
 
 	l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
+	l2cap_pi(sk)->num_conf_rsp++;
 
 	/* Reset config buffer. */
 	l2cap_pi(sk)->conf_len = 0;
@@ -2121,6 +2313,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		u8 buf[64];
 		l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
 					l2cap_build_conf_req(sk, buf), buf);
+		l2cap_pi(sk)->num_conf_req++;
 	}
 
 unlock:
@@ -2150,16 +2343,29 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		break;
 
 	case L2CAP_CONF_UNACCEPT:
-		if (++l2cap_pi(sk)->conf_retry < L2CAP_CONF_MAX_RETRIES) {
-			char req[128];
-			/* It does not make sense to adjust L2CAP parameters
-			 * that are currently defined in the spec. We simply
-			 * resend config request that we sent earlier. It is
-			 * stupid, but it helps qualification testing which
-			 * expects at least some response from us. */
-			l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
-						l2cap_build_conf_req(sk, req), req);
-			goto done;
+		if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
+			int len = cmd->len - sizeof(*rsp);
+			char req[64];
+
+			/* throw out any old stored conf requests */
+			result = L2CAP_CONF_SUCCESS;
+			len = l2cap_parse_conf_rsp(sk, rsp->data,
+							len, req, &result);
+			if (len < 0) {
+				struct l2cap_disconn_req req;
+				req.dcid = cpu_to_le16(l2cap_pi(sk)->dcid);
+				req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
+				l2cap_send_cmd(conn, l2cap_get_ident(conn),
+					L2CAP_DISCONN_REQ, sizeof(req), &req);
+				goto done;
+			}
+
+			l2cap_send_cmd(conn, l2cap_get_ident(conn),
+						L2CAP_CONF_REQ, len, req);
+			l2cap_pi(sk)->num_conf_req++;
+			if (result != L2CAP_CONF_SUCCESS)
+				goto done;
+			break;
 		}
 
 	default:
-- 
cgit v1.2.3


From 1c2acffb76d4bc5fd27c4ea55cc27ad8ead10f9a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:25:57 -0300
Subject: Bluetooth: Add initial support for ERTM packets transfers

This patch adds support for ERTM transfers, without retransmission, with
txWindow up to 63 and with acknowledgement of packets received. Now the
packets are queued before call l2cap_do_send(), so packets couldn't be
sent at the time we call l2cap_sock_sendmsg(). They will be sent in
an asynchronous way on later calls of l2cap_ertm_send(). Besides if an
error occurs on calling l2cap_do_send() we disconnect the channel.

Initially based on a patch from Nathan Holstein <nathan@lampreynetworks.com>

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   3 +-
 include/net/bluetooth/l2cap.h     |  54 +++++-
 net/bluetooth/l2cap.c             | 384 +++++++++++++++++++++++++++++++++-----
 3 files changed, 390 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 968166a45f86..65a5cf868fd6 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -138,8 +138,9 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
 struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
+	__u8 tx_seq;
 };
-#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) 
+#define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
 static inline struct sk_buff *bt_skb_alloc(unsigned int len, gfp_t how)
 {
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 6fc76986d70a..9bbfbe74d400 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -29,7 +29,8 @@
 #define L2CAP_DEFAULT_MTU		672
 #define L2CAP_DEFAULT_MIN_MTU		48
 #define L2CAP_DEFAULT_FLUSH_TO		0xffff
-#define L2CAP_DEFAULT_TX_WINDOW		1
+#define L2CAP_DEFAULT_TX_WINDOW		63
+#define L2CAP_DEFAULT_NUM_TO_ACK        (L2CAP_DEFAULT_TX_WINDOW/5)
 #define L2CAP_DEFAULT_MAX_RECEIVE	1
 #define L2CAP_DEFAULT_RETRANS_TO	300    /* 300 milliseconds */
 #define L2CAP_DEFAULT_MONITOR_TO	1000   /* 1 second */
@@ -94,6 +95,31 @@ struct l2cap_conninfo {
 #define L2CAP_FCS_NONE		0x00
 #define L2CAP_FCS_CRC16		0x01
 
+/* L2CAP Control Field bit masks */
+#define L2CAP_CTRL_SAR               0xC000
+#define L2CAP_CTRL_REQSEQ            0x3F00
+#define L2CAP_CTRL_TXSEQ             0x007E
+#define L2CAP_CTRL_RETRANS           0x0080
+#define L2CAP_CTRL_FINAL             0x0080
+#define L2CAP_CTRL_POLL              0x0010
+#define L2CAP_CTRL_SUPERVISE         0x000C
+#define L2CAP_CTRL_FRAME_TYPE        0x0001 /* I- or S-Frame */
+
+#define L2CAP_CTRL_TXSEQ_SHIFT      1
+#define L2CAP_CTRL_REQSEQ_SHIFT     8
+
+/* L2CAP Supervisory Function */
+#define L2CAP_SUPER_RCV_READY           0x0000
+#define L2CAP_SUPER_REJECT              0x0004
+#define L2CAP_SUPER_RCV_NOT_READY       0x0008
+#define L2CAP_SUPER_SELECT_REJECT       0x000C
+
+/* L2CAP Segmentation and Reassembly */
+#define L2CAP_SDU_UNSEGMENTED       0x0000
+#define L2CAP_SDU_START             0x4000
+#define L2CAP_SDU_END               0x8000
+#define L2CAP_SDU_CONTINUE          0xC000
+
 /* L2CAP structures */
 struct l2cap_hdr {
 	__le16     len;
@@ -262,6 +288,7 @@ struct l2cap_conn {
 
 /* ----- L2CAP channel and socket info ----- */
 #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk)
+#define TX_QUEUE(sk) (&l2cap_pi(sk)->tx_queue)
 
 struct l2cap_pinfo {
 	struct bt_sock	bt;
@@ -285,6 +312,13 @@ struct l2cap_pinfo {
 	__u8		conf_len;
 	__u8		conf_state;
 
+	__u8		next_tx_seq;
+	__u8		expected_ack_seq;
+	__u8		req_seq;
+	__u8		expected_tx_seq;
+	__u8		unacked_frames;
+	__u8		num_to_ack;
+
 	__u8		ident;
 
 	__u8		remote_tx_win;
@@ -295,6 +329,7 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
+	struct sk_buff_head	tx_queue;
 	struct l2cap_conn	*conn;
 	struct sock		*next_c;
 	struct sock		*prev_c;
@@ -311,6 +346,23 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MAX_CONF_REQ 2
 #define L2CAP_CONF_MAX_CONF_RSP 2
 
+static inline int l2cap_tx_window_full(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int sub;
+
+	sub = (pi->next_tx_seq - pi->expected_ack_seq) % 64;
+
+	if (sub < 0)
+		sub += 64;
+
+	return (sub == pi->remote_tx_win);
+}
+
+#define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1
+#define __get_reqseq(ctrl) ((ctrl) & L2CAP_CTRL_REQSEQ) >> 8
+#define __is_iframe(ctrl) !((ctrl) & L2CAP_CTRL_FRAME_TYPE)
+#define __is_sframe(ctrl) (ctrl) & L2CAP_CTRL_FRAME_TYPE
 
 void l2cap_load(void);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c1b562085cb4..45b8697a7398 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -333,6 +333,30 @@ static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16
 	return hci_send_acl(conn->hcon, skb, 0);
 }
 
+static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
+{
+	struct sk_buff *skb;
+	struct l2cap_hdr *lh;
+	struct l2cap_conn *conn = pi->conn;
+	int count;
+
+	BT_DBG("pi %p, control 0x%2.2x", pi, control);
+
+	count = min_t(unsigned int, conn->mtu, L2CAP_HDR_SIZE + 2);
+	control |= L2CAP_CTRL_FRAME_TYPE;
+
+	skb = bt_skb_alloc(count, GFP_ATOMIC);
+	if (!skb)
+		return -ENOMEM;
+
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->len = cpu_to_le16(2);
+	lh->cid = cpu_to_le16(pi->dcid);
+	put_unaligned_le16(control, skb_put(skb, 2));
+
+	return hci_send_acl(pi->conn->hcon, skb, 0);
+}
+
 static void l2cap_do_start(struct sock *sk)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -1154,39 +1178,80 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 	return 0;
 }
 
-static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
+static void l2cap_drop_acked_frames(struct sock *sk)
 {
-	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
-	struct sk_buff *skb, **frag;
-	int err, hlen, count, sent = 0;
-	struct l2cap_hdr *lh;
+	struct sk_buff *skb;
 
-	BT_DBG("sk %p len %d", sk, len);
+	while ((skb = skb_peek(TX_QUEUE(sk)))) {
+		if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
+			break;
 
-	/* First fragment (with L2CAP header) */
-	if (sk->sk_type == SOCK_DGRAM)
-		hlen = L2CAP_HDR_SIZE + 2;
-	else
-		hlen = L2CAP_HDR_SIZE;
+		skb = skb_dequeue(TX_QUEUE(sk));
+		kfree_skb(skb);
 
-	count = min_t(unsigned int, (conn->mtu - hlen), len);
+		l2cap_pi(sk)->unacked_frames--;
+	}
 
-	skb = bt_skb_send_alloc(sk, hlen + count,
-			msg->msg_flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		return err;
+	return;
+}
 
-	/* Create L2CAP header */
-	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
-	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+static inline int l2cap_do_send(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int err;
+
+	BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
+
+	err = hci_send_acl(pi->conn->hcon, skb, 0);
+	if (err < 0)
+		kfree_skb(skb);
+
+	return err;
+}
+
+static int l2cap_ertm_send(struct sock *sk)
+{
+	struct sk_buff *skb, *tx_skb;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u16 control;
+	int err;
+
+	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
 
-	if (sk->sk_type == SOCK_DGRAM)
-		put_unaligned(l2cap_pi(sk)->psm, (__le16 *) skb_put(skb, 2));
+		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT)
+				| (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+
+		err = l2cap_do_send(sk, tx_skb);
+		if (err < 0) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			return err;
+		}
+
+		bt_cb(skb)->tx_seq = pi->next_tx_seq;
+		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
+
+		pi->unacked_frames++;
+
+		if (skb_queue_is_last(TX_QUEUE(sk), skb))
+			sk->sk_send_head = NULL;
+		else
+			sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
+	}
+
+	return 0;
+}
+
+static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sk_buff **frag;
+	int err, sent = 0;
 
 	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
-		err = -EFAULT;
-		goto fail;
+		return -EFAULT;
 	}
 
 	sent += count;
@@ -1199,33 +1264,112 @@ static inline int l2cap_do_send(struct sock *sk, struct msghdr *msg, int len)
 
 		*frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
 		if (!*frag)
-			goto fail;
-
-		if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) {
-			err = -EFAULT;
-			goto fail;
-		}
+			return -EFAULT;
+		if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
+			return -EFAULT;
 
 		sent += count;
 		len  -= count;
 
 		frag = &(*frag)->next;
 	}
-	err = hci_send_acl(conn->hcon, skb, 0);
-	if (err < 0)
-		goto fail;
 
 	return sent;
+}
 
-fail:
-	kfree_skb(skb);
-	return err;
+static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sk_buff *skb;
+	int err, count, hlen = L2CAP_HDR_SIZE + 2;
+	struct l2cap_hdr *lh;
+
+	BT_DBG("sk %p len %d", sk, (int)len);
+
+	count = min_t(unsigned int, (conn->mtu - hlen), len);
+	skb = bt_skb_send_alloc(sk, count + hlen,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	/* Create L2CAP header */
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+	put_unaligned_le16(l2cap_pi(sk)->psm, skb_put(skb, 2));
+
+	err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
+	if (unlikely(err < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(err);
+	}
+	return skb;
+}
+
+static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sk_buff *skb;
+	int err, count, hlen = L2CAP_HDR_SIZE;
+	struct l2cap_hdr *lh;
+
+	BT_DBG("sk %p len %d", sk, (int)len);
+
+	count = min_t(unsigned int, (conn->mtu - hlen), len);
+	skb = bt_skb_send_alloc(sk, count + hlen,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	/* Create L2CAP header */
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+
+	err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
+	if (unlikely(err < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(err);
+	}
+	return skb;
+}
+
+static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control)
+{
+	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
+	struct sk_buff *skb;
+	int err, count, hlen = L2CAP_HDR_SIZE + 2;
+	struct l2cap_hdr *lh;
+
+	BT_DBG("sk %p len %d", sk, (int)len);
+
+	count = min_t(unsigned int, (conn->mtu - hlen), len);
+	skb = bt_skb_send_alloc(sk, count + hlen,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	/* Create L2CAP header */
+	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
+	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
+	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
+	put_unaligned_le16(control, skb_put(skb, 2));
+
+	err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
+	if (unlikely(err < 0)) {
+		kfree_skb(skb);
+		return ERR_PTR(err);
+	}
+	return skb;
 }
 
 static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
-	int err = 0;
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb;
+	u16 control;
+	int err;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
@@ -1237,16 +1381,67 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		return -EOPNOTSUPP;
 
 	/* Check outgoing MTU */
-	if (sk->sk_type != SOCK_RAW && len > l2cap_pi(sk)->omtu)
+	if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC
+			&& len > pi->omtu)
 		return -EINVAL;
 
 	lock_sock(sk);
 
-	if (sk->sk_state == BT_CONNECTED)
-		err = l2cap_do_send(sk, msg, len);
-	else
+	if (sk->sk_state != BT_CONNECTED) {
 		err = -ENOTCONN;
+		goto done;
+	}
+
+	/* Connectionless channel */
+	if (sk->sk_type == SOCK_DGRAM) {
+		skb = l2cap_create_connless_pdu(sk, msg, len);
+		err = l2cap_do_send(sk, skb);
+		goto done;
+	}
 
+	switch (pi->mode) {
+	case L2CAP_MODE_BASIC:
+		/* Create a basic PDU */
+		skb = l2cap_create_basic_pdu(sk, msg, len);
+		if (IS_ERR(skb)) {
+			err = PTR_ERR(skb);
+			goto done;
+		}
+
+		err = l2cap_do_send(sk, skb);
+		if (!err)
+			err = len;
+		break;
+
+	case L2CAP_MODE_ERTM:
+		/* Entire SDU fits into one PDU */
+		if (len <= pi->omtu) {
+			control = L2CAP_SDU_UNSEGMENTED;
+			skb = l2cap_create_ertm_pdu(sk, msg, len, control);
+			if (IS_ERR(skb)) {
+				err = PTR_ERR(skb);
+				goto done;
+			}
+		} else {
+			/* FIXME: Segmentation will be added later */
+			err = -EINVAL;
+			goto done;
+		}
+		__skb_queue_tail(TX_QUEUE(sk), skb);
+		if (sk->sk_send_head == NULL)
+			sk->sk_send_head = skb;
+
+		err = l2cap_ertm_send(sk);
+		if (!err)
+			err = len;
+		break;
+
+	default:
+		BT_DBG("bad state %1.1x", pi->mode);
+		err = -EINVAL;
+	}
+
+done:
 	release_sock(sk);
 	return err;
 }
@@ -2301,6 +2496,10 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) {
 		sk->sk_state = BT_CONNECTED;
+		l2cap_pi(sk)->next_tx_seq = 0;
+		l2cap_pi(sk)->expected_ack_seq = 0;
+		l2cap_pi(sk)->unacked_frames = 0;
+		__skb_queue_head_init(TX_QUEUE(sk));
 		l2cap_chan_ready(sk);
 		goto unlock;
 	}
@@ -2375,6 +2574,9 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) {
 		sk->sk_state = BT_CONNECTED;
+		l2cap_pi(sk)->expected_tx_seq = 0;
+		l2cap_pi(sk)->num_to_ack = 0;
+		__skb_queue_head_init(TX_QUEUE(sk));
 		l2cap_chan_ready(sk);
 	}
 
@@ -2405,6 +2607,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
+	skb_queue_purge(TX_QUEUE(sk));
+
 	l2cap_chan_del(sk, ECONNRESET);
 	bh_unlock_sock(sk);
 
@@ -2427,6 +2631,8 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 	if (!sk)
 		return 0;
 
+	skb_queue_purge(TX_QUEUE(sk));
+
 	l2cap_chan_del(sk, 0);
 	bh_unlock_sock(sk);
 
@@ -2602,9 +2808,60 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 	kfree_skb(skb);
 }
 
+static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_txseq(rx_control);
+	u16 tx_control = 0;
+	int err = 0;
+
+	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+
+	if (tx_seq != pi->expected_tx_seq)
+		return -EINVAL;
+
+	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
+	err = sock_queue_rcv_skb(sk, skb);
+	if (err)
+		return err;
+
+	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
+	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
+		tx_control |= L2CAP_CTRL_FRAME_TYPE;
+		tx_control |= L2CAP_SUPER_RCV_READY;
+		tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+		err = l2cap_send_sframe(pi, tx_control);
+	}
+	return err;
+}
+
+static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+
+	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+
+	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
+	case L2CAP_SUPER_RCV_READY:
+		pi->expected_ack_seq = __get_reqseq(rx_control);
+		l2cap_drop_acked_frames(sk);
+		l2cap_ertm_send(sk);
+		break;
+
+	case L2CAP_SUPER_RCV_NOT_READY:
+	case L2CAP_SUPER_REJECT:
+	case L2CAP_SUPER_SELECT_REJECT:
+		break;
+	}
+
+	return 0;
+}
+
 static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
 {
 	struct sock *sk;
+	u16 control;
+	int err;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -2617,16 +2874,40 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	if (sk->sk_state != BT_CONNECTED)
 		goto drop;
 
-	if (l2cap_pi(sk)->imtu < skb->len)
-		goto drop;
+	switch (l2cap_pi(sk)->mode) {
+	case L2CAP_MODE_BASIC:
+		/* If socket recv buffers overflows we drop data here
+		 * which is *bad* because L2CAP has to be reliable.
+		 * But we don't have any other choice. L2CAP doesn't
+		 * provide flow control mechanism. */
 
-	/* If socket recv buffers overflows we drop data here
-	 * which is *bad* because L2CAP has to be reliable.
-	 * But we don't have any other choice. L2CAP doesn't
-	 * provide flow control mechanism. */
+		if (l2cap_pi(sk)->imtu < skb->len)
+			goto drop;
 
-	if (!sock_queue_rcv_skb(sk, skb))
-		goto done;
+		if (!sock_queue_rcv_skb(sk, skb))
+			goto done;
+		break;
+
+	case L2CAP_MODE_ERTM:
+		control = get_unaligned_le16(skb->data);
+		skb_pull(skb, 2);
+
+		if (l2cap_pi(sk)->imtu < skb->len)
+			goto drop;
+
+		if (__is_iframe(control))
+			err = l2cap_data_channel_iframe(sk, control, skb);
+		else
+			err = l2cap_data_channel_sframe(sk, control, skb);
+
+		if (!err)
+			goto done;
+		break;
+
+	default:
+		BT_DBG("sk %p: bad mode 0x%2.2x", sk, l2cap_pi(sk)->mode);
+		break;
+	}
 
 drop:
 	kfree_skb(skb);
@@ -2676,6 +2957,11 @@ static void l2cap_recv_frame(struct l2cap_conn *conn, struct sk_buff *skb)
 	cid = __le16_to_cpu(lh->cid);
 	len = __le16_to_cpu(lh->len);
 
+	if (len != skb->len) {
+		kfree_skb(skb);
+		return;
+	}
+
 	BT_DBG("len %d, cid 0x%4.4x", len, cid);
 
 	switch (cid) {
-- 
cgit v1.2.3


From c74e560cd0101455f1889515e1527e4c2e266113 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:25:58 -0300
Subject: Bluetooth: Add support for Segmentation and Reassembly of SDUs

ERTM should use Segmentation and Reassembly to break down a SDU in many
PDUs on sending data to the other side.

On sending packets we queue all 'segments' until end of segmentation and
just the add them to the queue for sending. On receiving we create a new
SKB with the SDU reassembled.

Initially based on a patch from Nathan Holstein <nathan@lampreynetworks.com>

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |   9 ++-
 net/bluetooth/l2cap.c         | 170 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 162 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 9bbfbe74d400..0afde8d22b56 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -34,7 +34,7 @@
 #define L2CAP_DEFAULT_MAX_RECEIVE	1
 #define L2CAP_DEFAULT_RETRANS_TO	300    /* 300 milliseconds */
 #define L2CAP_DEFAULT_MONITOR_TO	1000   /* 1 second */
-#define L2CAP_DEFAULT_MAX_RX_APDU	0xfff7
+#define L2CAP_DEFAULT_MAX_PDU_SIZE	672
 
 #define L2CAP_CONN_TIMEOUT	(40000) /* 40 seconds */
 #define L2CAP_INFO_TIMEOUT	(4000)  /*  4 seconds */
@@ -311,6 +311,7 @@ struct l2cap_pinfo {
 	__u8		conf_req[64];
 	__u8		conf_len;
 	__u8		conf_state;
+	__u8		conn_state;
 
 	__u8		next_tx_seq;
 	__u8		expected_ack_seq;
@@ -318,6 +319,9 @@ struct l2cap_pinfo {
 	__u8		expected_tx_seq;
 	__u8		unacked_frames;
 	__u8		num_to_ack;
+	__u16		sdu_len;
+	__u16		partial_sdu_len;
+	struct sk_buff	*sdu;
 
 	__u8		ident;
 
@@ -346,6 +350,8 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MAX_CONF_REQ 2
 #define L2CAP_CONF_MAX_CONF_RSP 2
 
+#define L2CAP_CONN_SAR_SDU         0x01
+
 static inline int l2cap_tx_window_full(struct sock *sk)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -363,6 +369,7 @@ static inline int l2cap_tx_window_full(struct sock *sk)
 #define __get_reqseq(ctrl) ((ctrl) & L2CAP_CTRL_REQSEQ) >> 8
 #define __is_iframe(ctrl) !((ctrl) & L2CAP_CTRL_FRAME_TYPE)
 #define __is_sframe(ctrl) (ctrl) & L2CAP_CTRL_FRAME_TYPE
+#define __is_sar_start(ctrl) ((ctrl) & L2CAP_CTRL_SAR) == L2CAP_SDU_START
 
 void l2cap_load(void);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 45b8697a7398..167e02532697 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1334,7 +1334,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
 	return skb;
 }
 
-static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control)
+static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg, size_t len, u16 control, u16 sdulen)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 	struct sk_buff *skb;
@@ -1343,6 +1343,9 @@ static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg
 
 	BT_DBG("sk %p len %d", sk, (int)len);
 
+	if (sdulen)
+		hlen += 2;
+
 	count = min_t(unsigned int, (conn->mtu - hlen), len);
 	skb = bt_skb_send_alloc(sk, count + hlen,
 			msg->msg_flags & MSG_DONTWAIT, &err);
@@ -1354,6 +1357,8 @@ static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg
 	lh->cid = cpu_to_le16(l2cap_pi(sk)->dcid);
 	lh->len = cpu_to_le16(len + (hlen - L2CAP_HDR_SIZE));
 	put_unaligned_le16(control, skb_put(skb, 2));
+	if (sdulen)
+		put_unaligned_le16(sdulen, skb_put(skb, 2));
 
 	err = l2cap_skbuff_fromiovec(sk, msg, len, count, skb);
 	if (unlikely(err < 0)) {
@@ -1363,6 +1368,54 @@ static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg
 	return skb;
 }
 
+static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb;
+	struct sk_buff_head sar_queue;
+	u16 control;
+	size_t size = 0;
+
+	__skb_queue_head_init(&sar_queue);
+	control = L2CAP_SDU_START;
+	skb = l2cap_create_ertm_pdu(sk, msg, pi->max_pdu_size, control, len);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	__skb_queue_tail(&sar_queue, skb);
+	len -= pi->max_pdu_size;
+	size +=pi->max_pdu_size;
+	control = 0;
+
+	while (len > 0) {
+		size_t buflen;
+
+		if (len > pi->max_pdu_size) {
+			control |= L2CAP_SDU_CONTINUE;
+			buflen = pi->max_pdu_size;
+		} else {
+			control |= L2CAP_SDU_END;
+			buflen = len;
+		}
+
+		skb = l2cap_create_ertm_pdu(sk, msg, buflen, control, 0);
+		if (IS_ERR(skb)) {
+			skb_queue_purge(&sar_queue);
+			return PTR_ERR(skb);
+		}
+
+		__skb_queue_tail(&sar_queue, skb);
+		len -= buflen;
+		size += buflen;
+		control = 0;
+	}
+	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
+	if (sk->sk_send_head == NULL)
+		sk->sk_send_head = sar_queue.next;
+
+	return size;
+}
+
 static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
@@ -1415,21 +1468,22 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	case L2CAP_MODE_ERTM:
 		/* Entire SDU fits into one PDU */
-		if (len <= pi->omtu) {
+		if (len <= pi->max_pdu_size) {
 			control = L2CAP_SDU_UNSEGMENTED;
-			skb = l2cap_create_ertm_pdu(sk, msg, len, control);
+			skb = l2cap_create_ertm_pdu(sk, msg, len, control, 0);
 			if (IS_ERR(skb)) {
 				err = PTR_ERR(skb);
 				goto done;
 			}
+			__skb_queue_tail(TX_QUEUE(sk), skb);
+			if (sk->sk_send_head == NULL)
+				sk->sk_send_head = skb;
 		} else {
-			/* FIXME: Segmentation will be added later */
-			err = -EINVAL;
-			goto done;
+		/* Segment SDU into multiples PDUs */
+			err = l2cap_sar_segment_sdu(sk, msg, len);
+			if (err < 0)
+				goto done;
 		}
-		__skb_queue_tail(TX_QUEUE(sk), skb);
-		if (sk->sk_send_head == NULL)
-			sk->sk_send_head = skb;
 
 		err = l2cap_ertm_send(sk);
 		if (!err)
@@ -2007,7 +2061,7 @@ done:
 		rfc.max_transmit    = L2CAP_DEFAULT_MAX_RECEIVE;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
-		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_RX_APDU);
+		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2019,7 +2073,7 @@ done:
 		rfc.max_transmit    = 0;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
-		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_RX_APDU);
+		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2808,6 +2862,86 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 	kfree_skb(skb);
 }
 
+static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *_skb;
+	int err = -EINVAL;
+
+	switch (control & L2CAP_CTRL_SAR) {
+	case L2CAP_SDU_UNSEGMENTED:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
+			kfree_skb(pi->sdu);
+			break;
+		}
+
+		err = sock_queue_rcv_skb(sk, skb);
+		if (!err)
+			return 0;
+
+		break;
+
+	case L2CAP_SDU_START:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
+			kfree_skb(pi->sdu);
+			break;
+		}
+
+		pi->sdu_len = get_unaligned_le16(skb->data);
+		skb_pull(skb, 2);
+
+		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
+		if (!pi->sdu) {
+			err = -ENOMEM;
+			break;
+		}
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state |= L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len = skb->len;
+		err = 0;
+		break;
+
+	case L2CAP_SDU_CONTINUE:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			break;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->partial_sdu_len += skb->len;
+		if (pi->partial_sdu_len > pi->sdu_len)
+			kfree_skb(pi->sdu);
+		else
+			err = 0;
+
+		break;
+
+	case L2CAP_SDU_END:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			break;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len += skb->len;
+
+		if (pi->partial_sdu_len == pi->sdu_len) {
+			_skb = skb_clone(pi->sdu, GFP_ATOMIC);
+			err = sock_queue_rcv_skb(sk, _skb);
+			if (err < 0)
+				kfree_skb(_skb);
+		}
+		kfree_skb(pi->sdu);
+		err = 0;
+
+		break;
+	}
+
+	kfree_skb(skb);
+	return err;
+}
+
 static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -2820,11 +2954,11 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	if (tx_seq != pi->expected_tx_seq)
 		return -EINVAL;
 
-	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
-	err = sock_queue_rcv_skb(sk, skb);
-	if (err)
+	err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
+	if (err < 0)
 		return err;
 
+	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
 	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
 		tx_control |= L2CAP_CTRL_FRAME_TYPE;
@@ -2860,7 +2994,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk_buff *skb)
 {
 	struct sock *sk;
-	u16 control;
+	u16 control, len;
 	int err;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
@@ -2891,8 +3025,12 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	case L2CAP_MODE_ERTM:
 		control = get_unaligned_le16(skb->data);
 		skb_pull(skb, 2);
+		len = skb->len;
 
-		if (l2cap_pi(sk)->imtu < skb->len)
+		if (__is_sar_start(control))
+			len -= 2;
+
+		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
 			goto drop;
 
 		if (__is_iframe(control))
-- 
cgit v1.2.3


From 30afb5b2aa83adf4f69e5090d48e1bb04b64c58a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:25:59 -0300
Subject: Bluetooth: Initial support for retransmission of packets with REJ
 frames

When receiving an I-frame with unexpected txSeq, receiver side start the
recovery procedure by sending a REJ S-frame to the transmitter side. So
the transmitter can re-send the lost I-frame.

This patch just adds a basic support for retransmission, it doesn't
mean that ERTM now has full support for packet retransmission.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 +
 net/bluetooth/l2cap.c         | 57 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 0afde8d22b56..a1d8ec468ef3 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -351,6 +351,7 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MAX_CONF_RSP 2
 
 #define L2CAP_CONN_SAR_SDU         0x01
+#define L2CAP_CONN_UNDER_REJ       0x02
 
 static inline int l2cap_tx_window_full(struct sock *sk)
 {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 167e02532697..35e9f5b80545 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2951,22 +2951,36 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (tx_seq != pi->expected_tx_seq)
-		return -EINVAL;
+	if (tx_seq == pi->expected_tx_seq) {
+		if (pi->conn_state & L2CAP_CONN_UNDER_REJ)
+			pi->conn_state &= ~L2CAP_CONN_UNDER_REJ;
 
-	err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
-	if (err < 0)
-		return err;
+		err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
+		if (err < 0)
+			return err;
+
+		pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
+		pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
+		if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
+			tx_control |= L2CAP_SUPER_RCV_READY;
+			tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+			goto send;
+		}
+	} else {
+		/* Unexpected txSeq. Send a REJ S-frame */
+		kfree_skb(skb);
+		if (!(pi->conn_state & L2CAP_CONN_UNDER_REJ)) {
+			tx_control |= L2CAP_SUPER_REJECT;
+			tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+			pi->conn_state |= L2CAP_CONN_UNDER_REJ;
 
-	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
-	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
-		tx_control |= L2CAP_CTRL_FRAME_TYPE;
-		tx_control |= L2CAP_SUPER_RCV_READY;
-		tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-		err = l2cap_send_sframe(pi, tx_control);
+			goto send;
+		}
 	}
-	return err;
+	return 0;
+
+send:
+	return l2cap_send_sframe(pi, tx_control);
 }
 
 static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
@@ -2982,8 +2996,18 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		l2cap_ertm_send(sk);
 		break;
 
-	case L2CAP_SUPER_RCV_NOT_READY:
 	case L2CAP_SUPER_REJECT:
+		pi->expected_ack_seq = __get_reqseq(rx_control);
+		l2cap_drop_acked_frames(sk);
+
+		sk->sk_send_head = TX_QUEUE(sk)->next;
+		pi->next_tx_seq = pi->expected_ack_seq;
+
+		l2cap_ertm_send(sk);
+
+		break;
+
+	case L2CAP_SUPER_RCV_NOT_READY:
 	case L2CAP_SUPER_SELECT_REJECT:
 		break;
 	}
@@ -3030,6 +3054,11 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (__is_sar_start(control))
 			len -= 2;
 
+		/*
+		 * We can just drop the corrupted I-frame here.
+		 * Receiver will miss it and start proper recovery
+		 * procedures and ask retransmission.
+		 */
 		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
 			goto drop;
 
-- 
cgit v1.2.3


From e90bac061b17cd81bd0df30606c64f4543bf5ca0 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:26:00 -0300
Subject: Bluetooth: Add support for Retransmission and Monitor Timers

L2CAP uses retransmission and monitor timers to inquiry the other side
about unacked I-frames. After sending each I-frame we (re)start the
retransmission timer. If it expires, we start a monitor timer that send a
S-frame with P bit set and wait for S-frame with F bit set. If monitor
timer expires, try again, at a maximum of L2CAP_DEFAULT_MAX_TX.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |  1 +
 include/net/bluetooth/l2cap.h     | 15 +++++--
 net/bluetooth/l2cap.c             | 86 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 95 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 65a5cf868fd6..b8b9a8479525 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -139,6 +139,7 @@ struct bt_skb_cb {
 	__u8 pkt_type;
 	__u8 incoming;
 	__u8 tx_seq;
+	__u8 retries;
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index a1d8ec468ef3..2cf7003cb1b6 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -31,9 +31,9 @@
 #define L2CAP_DEFAULT_FLUSH_TO		0xffff
 #define L2CAP_DEFAULT_TX_WINDOW		63
 #define L2CAP_DEFAULT_NUM_TO_ACK        (L2CAP_DEFAULT_TX_WINDOW/5)
-#define L2CAP_DEFAULT_MAX_RECEIVE	1
-#define L2CAP_DEFAULT_RETRANS_TO	300    /* 300 milliseconds */
-#define L2CAP_DEFAULT_MONITOR_TO	1000   /* 1 second */
+#define L2CAP_DEFAULT_MAX_TX		3
+#define L2CAP_DEFAULT_RETRANS_TO	1000    /* 1 second */
+#define L2CAP_DEFAULT_MONITOR_TO	12000   /* 12 seconds */
 #define L2CAP_DEFAULT_MAX_PDU_SIZE	672
 
 #define L2CAP_CONN_TIMEOUT	(40000) /* 40 seconds */
@@ -318,6 +318,7 @@ struct l2cap_pinfo {
 	__u8		req_seq;
 	__u8		expected_tx_seq;
 	__u8		unacked_frames;
+	__u8		retry_count;
 	__u8		num_to_ack;
 	__u16		sdu_len;
 	__u16		partial_sdu_len;
@@ -333,6 +334,8 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
+	struct timer_list	retrans_timer;
+	struct timer_list	monitor_timer;
 	struct sk_buff_head	tx_queue;
 	struct l2cap_conn	*conn;
 	struct sock		*next_c;
@@ -352,6 +355,12 @@ struct l2cap_pinfo {
 
 #define L2CAP_CONN_SAR_SDU         0x01
 #define L2CAP_CONN_UNDER_REJ       0x02
+#define L2CAP_CONN_WAIT_F          0x04
+
+#define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
+		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
+#define __mod_monitor_timer() mod_timer(&l2cap_pi(sk)->monitor_timer, \
+		jiffies + msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO));
 
 static inline int l2cap_tx_window_full(struct sock *sk)
 {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 35e9f5b80545..97172f7c0a6a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1178,6 +1178,39 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 	return 0;
 }
 
+static void l2cap_monitor_timeout(unsigned long arg)
+{
+	struct sock *sk = (void *) arg;
+	u16 control;
+
+	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
+		l2cap_send_disconn_req(l2cap_pi(sk)->conn, sk);
+		return;
+	}
+
+	l2cap_pi(sk)->retry_count++;
+	__mod_monitor_timer();
+
+	control = L2CAP_CTRL_POLL;
+	control |= L2CAP_SUPER_RCV_READY;
+	l2cap_send_sframe(l2cap_pi(sk), control);
+}
+
+static void l2cap_retrans_timeout(unsigned long arg)
+{
+	struct sock *sk = (void *) arg;
+	u16 control;
+
+	l2cap_pi(sk)->retry_count = 1;
+	__mod_monitor_timer();
+
+	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
+
+	control = L2CAP_CTRL_POLL;
+	control |= L2CAP_SUPER_RCV_READY;
+	l2cap_send_sframe(l2cap_pi(sk), control);
+}
+
 static void l2cap_drop_acked_frames(struct sock *sk)
 {
 	struct sk_buff *skb;
@@ -1192,6 +1225,9 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 		l2cap_pi(sk)->unacked_frames--;
 	}
 
+	if (!l2cap_pi(sk)->unacked_frames)
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+
 	return;
 }
 
@@ -1216,19 +1252,32 @@ static int l2cap_ertm_send(struct sock *sk)
 	u16 control;
 	int err;
 
+	if (pi->conn_state & L2CAP_CONN_WAIT_F)
+		return 0;
+
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
 		tx_skb = skb_clone(skb, GFP_ATOMIC);
 
+		if (pi->remote_max_tx &&
+				bt_cb(skb)->retries == pi->remote_max_tx) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			break;
+		}
+
+		bt_cb(skb)->retries++;
+
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
 		control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 				| (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
+
 		err = l2cap_do_send(sk, tx_skb);
 		if (err < 0) {
 			l2cap_send_disconn_req(pi->conn, sk);
 			return err;
 		}
+		__mod_retrans_timer();
 
 		bt_cb(skb)->tx_seq = pi->next_tx_seq;
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
@@ -1365,6 +1414,8 @@ static struct sk_buff *l2cap_create_ertm_pdu(struct sock *sk, struct msghdr *msg
 		kfree_skb(skb);
 		return ERR_PTR(err);
 	}
+
+	bt_cb(skb)->retries = 0;
 	return skb;
 }
 
@@ -2058,7 +2109,7 @@ done:
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
 		rfc.txwin_size      = L2CAP_DEFAULT_TX_WINDOW;
-		rfc.max_transmit    = L2CAP_DEFAULT_MAX_RECEIVE;
+		rfc.max_transmit    = L2CAP_DEFAULT_MAX_TX;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
@@ -2553,6 +2604,12 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		l2cap_pi(sk)->next_tx_seq = 0;
 		l2cap_pi(sk)->expected_ack_seq = 0;
 		l2cap_pi(sk)->unacked_frames = 0;
+
+		setup_timer(&l2cap_pi(sk)->retrans_timer,
+				l2cap_retrans_timeout, (unsigned long) sk);
+		setup_timer(&l2cap_pi(sk)->monitor_timer,
+				l2cap_monitor_timeout, (unsigned long) sk);
+
 		__skb_queue_head_init(TX_QUEUE(sk));
 		l2cap_chan_ready(sk);
 		goto unlock;
@@ -2662,6 +2719,8 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
 	skb_queue_purge(TX_QUEUE(sk));
+	del_timer(&l2cap_pi(sk)->retrans_timer);
+	del_timer(&l2cap_pi(sk)->monitor_timer);
 
 	l2cap_chan_del(sk, ECONNRESET);
 	bh_unlock_sock(sk);
@@ -2686,6 +2745,8 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 		return 0;
 
 	skb_queue_purge(TX_QUEUE(sk));
+	del_timer(&l2cap_pi(sk)->retrans_timer);
+	del_timer(&l2cap_pi(sk)->monitor_timer);
 
 	l2cap_chan_del(sk, 0);
 	bh_unlock_sock(sk);
@@ -2991,9 +3052,26 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 
 	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
 	case L2CAP_SUPER_RCV_READY:
-		pi->expected_ack_seq = __get_reqseq(rx_control);
-		l2cap_drop_acked_frames(sk);
-		l2cap_ertm_send(sk);
+		if (rx_control & L2CAP_CTRL_POLL) {
+			u16 control = L2CAP_CTRL_FINAL;
+			control |= L2CAP_SUPER_RCV_READY;
+			l2cap_send_sframe(l2cap_pi(sk), control);
+		} else if (rx_control & L2CAP_CTRL_FINAL) {
+			if (!(pi->conn_state & L2CAP_CONN_WAIT_F))
+				break;
+
+			pi->conn_state &= ~L2CAP_CONN_WAIT_F;
+			del_timer(&pi->monitor_timer);
+
+			if (pi->unacked_frames > 0)
+				__mod_retrans_timer();
+		} else {
+			pi->expected_ack_seq = __get_reqseq(rx_control);
+			l2cap_drop_acked_frames(sk);
+			if (pi->unacked_frames > 0)
+				__mod_retrans_timer();
+			l2cap_ertm_send(sk);
+		}
 		break;
 
 	case L2CAP_SUPER_REJECT:
-- 
cgit v1.2.3


From fcc203c30d72dde82692f6b761a80e5ca5fdd8fa Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:26:02 -0300
Subject: Bluetooth: Add support for FCS option to L2CAP

Implement CRC16 check for L2CAP packets. FCS is used by Streaming Mode and
Enhanced Retransmission Mode and is a extra check for the packet content.

Using CRC16 is the default, L2CAP won't use FCS only when both side send
a "No FCS" request.

Initially based on a patch from Nathan Holstein <nathan@lampreynetworks.com>

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |   2 +
 net/bluetooth/l2cap.c         | 101 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 97 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 2cf7003cb1b6..59b26bf10f30 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -54,6 +54,7 @@ struct l2cap_options {
 	__u16 imtu;
 	__u16 flush_to;
 	__u8  mode;
+	__u8  fcs;
 };
 
 #define L2CAP_CONNINFO	0x02
@@ -348,6 +349,7 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MTU_DONE       0x08
 #define L2CAP_CONF_MODE_DONE      0x10
 #define L2CAP_CONF_CONNECT_PEND   0x20
+#define L2CAP_CONF_NO_FCS_RECV    0x40
 #define L2CAP_CONF_STATE2_DEVICE  0x80
 
 #define L2CAP_CONF_MAX_CONF_REQ 2
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7f835e761822..4c319003c290 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -41,6 +41,7 @@
 #include <linux/list.h>
 #include <linux/device.h>
 #include <linux/uaccess.h>
+#include <linux/crc16.h>
 #include <net/sock.h>
 
 #include <asm/system.h>
@@ -338,11 +339,14 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	struct sk_buff *skb;
 	struct l2cap_hdr *lh;
 	struct l2cap_conn *conn = pi->conn;
-	int count;
+	int count, hlen = L2CAP_HDR_SIZE + 2;
+
+	if (pi->fcs == L2CAP_FCS_CRC16)
+		hlen += 2;
 
 	BT_DBG("pi %p, control 0x%2.2x", pi, control);
 
-	count = min_t(unsigned int, conn->mtu, L2CAP_HDR_SIZE + 2);
+	count = min_t(unsigned int, conn->mtu, hlen);
 	control |= L2CAP_CTRL_FRAME_TYPE;
 
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
@@ -350,10 +354,15 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		return -ENOMEM;
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
-	lh->len = cpu_to_le16(2);
+	lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
 	lh->cid = cpu_to_le16(pi->dcid);
 	put_unaligned_le16(control, skb_put(skb, 2));
 
+	if (pi->fcs == L2CAP_FCS_CRC16) {
+		u16 fcs = crc16(0, (u8 *)lh, count - 2);
+		put_unaligned_le16(fcs, skb_put(skb, 2));
+	}
+
 	return hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
@@ -1249,7 +1258,7 @@ static int l2cap_streaming_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	u16 control;
+	u16 control, fcs;
 	int err;
 
 	while ((skb = sk->sk_send_head)) {
@@ -1259,6 +1268,11 @@ static int l2cap_streaming_send(struct sock *sk)
 		control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
+		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
+			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
+		}
+
 		err = l2cap_do_send(sk, tx_skb);
 		if (err < 0) {
 			l2cap_send_disconn_req(pi->conn, sk);
@@ -1282,7 +1296,7 @@ static int l2cap_ertm_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	u16 control;
+	u16 control, fcs;
 	int err;
 
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
@@ -1305,6 +1319,11 @@ static int l2cap_ertm_send(struct sock *sk)
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
 
+		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+			fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
+			put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
+		}
+
 		err = l2cap_do_send(sk, tx_skb);
 		if (err < 0) {
 			l2cap_send_disconn_req(pi->conn, sk);
@@ -1428,6 +1447,9 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
 	if (sdulen)
 		hlen += 2;
 
+	if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16)
+		hlen += 2;
+
 	count = min_t(unsigned int, (conn->mtu - hlen), len);
 	skb = bt_skb_send_alloc(sk, count + hlen,
 			msg->msg_flags & MSG_DONTWAIT, &err);
@@ -1448,6 +1470,9 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
 		return ERR_PTR(err);
 	}
 
+	if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16)
+		put_unaligned_le16(0, skb_put(skb, 2));
+
 	bt_cb(skb)->retries = 0;
 	return skb;
 }
@@ -1633,6 +1658,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.omtu     = l2cap_pi(sk)->omtu;
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
+		opts.fcs      = l2cap_pi(sk)->fcs;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
 		if (copy_from_user((char *) &opts, optval, len)) {
@@ -1643,6 +1669,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->imtu = opts.imtu;
 		l2cap_pi(sk)->omtu = opts.omtu;
 		l2cap_pi(sk)->mode = opts.mode;
+		l2cap_pi(sk)->fcs  = opts.fcs;
 		break;
 
 	case L2CAP_LM:
@@ -1756,6 +1783,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.omtu     = l2cap_pi(sk)->omtu;
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
+		opts.fcs      = l2cap_pi(sk)->fcs;
 
 		len = min_t(unsigned int, len, sizeof(opts));
 		if (copy_to_user(optval, (char *) &opts, len))
@@ -2154,6 +2182,15 @@ done:
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
+
+		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
+			break;
+
+		if (pi->fcs == L2CAP_FCS_NONE ||
+				pi->conf_state & L2CAP_CONF_NO_FCS_RECV) {
+			pi->fcs = L2CAP_FCS_NONE;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs);
+		}
 		break;
 
 	case L2CAP_MODE_STREAMING:
@@ -2166,6 +2203,15 @@ done:
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
+
+		if (!(pi->conn->feat_mask & L2CAP_FEAT_FCS))
+			break;
+
+		if (pi->fcs == L2CAP_FCS_NONE ||
+				pi->conf_state & L2CAP_CONF_NO_FCS_RECV) {
+			pi->fcs = L2CAP_FCS_NONE;
+			l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, pi->fcs);
+		}
 		break;
 	}
 
@@ -2217,6 +2263,12 @@ static int l2cap_parse_conf_req(struct sock *sk, void *data)
 				memcpy(&rfc, (void *) val, olen);
 			break;
 
+		case L2CAP_CONF_FCS:
+			if (val == L2CAP_FCS_NONE)
+				pi->conf_state |= L2CAP_CONF_NO_FCS_RECV;
+
+			break;
+
 		default:
 			if (hint)
 				break;
@@ -2638,6 +2690,10 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 		goto unlock;
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_INPUT_DONE) {
+		if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV)
+				|| l2cap_pi(sk)->fcs != L2CAP_FCS_NONE)
+			l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16;
+
 		sk->sk_state = BT_CONNECTED;
 		l2cap_pi(sk)->next_tx_seq = 0;
 		l2cap_pi(sk)->expected_ack_seq = 0;
@@ -2722,6 +2778,10 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	l2cap_pi(sk)->conf_state |= L2CAP_CONF_INPUT_DONE;
 
 	if (l2cap_pi(sk)->conf_state & L2CAP_CONF_OUTPUT_DONE) {
+		if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_NO_FCS_RECV)
+				|| l2cap_pi(sk)->fcs != L2CAP_FCS_NONE)
+			l2cap_pi(sk)->fcs = L2CAP_FCS_CRC16;
+
 		sk->sk_state = BT_CONNECTED;
 		l2cap_pi(sk)->expected_tx_seq = 0;
 		l2cap_pi(sk)->num_to_ack = 0;
@@ -2809,7 +2869,8 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
 		rsp->type   = cpu_to_le16(L2CAP_IT_FEAT_MASK);
 		rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
 		if (enable_ertm)
-			feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING;
+			feat_mask |= L2CAP_FEAT_ERTM | L2CAP_FEAT_STREAMING
+							 | L2CAP_FEAT_FCS;
 		put_unaligned(cpu_to_le32(feat_mask), (__le32 *) rsp->data);
 		l2cap_send_cmd(conn, cmd->ident,
 					L2CAP_INFO_RSP, sizeof(buf), buf);
@@ -2961,6 +3022,22 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn, struct sk_buff *sk
 	kfree_skb(skb);
 }
 
+static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
+{
+	u16 our_fcs, rcv_fcs;
+	int hdr_size = L2CAP_HDR_SIZE + 2;
+
+	if (pi->fcs == L2CAP_FCS_CRC16) {
+		skb_trim(skb, skb->len - 2);
+		rcv_fcs = get_unaligned_le16(skb->data + skb->len);
+		our_fcs = crc16(0, skb->data - hdr_size, skb->len + hdr_size);
+
+		if (our_fcs != rcv_fcs)
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -3174,6 +3251,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (__is_sar_start(control))
 			len -= 2;
 
+		if (pi->fcs == L2CAP_FCS_CRC16)
+			len -= 2;
+
 		/*
 		 * We can just drop the corrupted I-frame here.
 		 * Receiver will miss it and start proper recovery
@@ -3182,6 +3262,9 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
 			goto drop;
 
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		if (__is_iframe(control))
 			err = l2cap_data_channel_iframe(sk, control, skb);
 		else
@@ -3199,9 +3282,15 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (__is_sar_start(control))
 			len -= 2;
 
+		if (pi->fcs == L2CAP_FCS_CRC16)
+			len -= 2;
+
 		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || __is_sframe(control))
 			goto drop;
 
+		if (l2cap_check_fcs(pi, skb))
+			goto drop;
+
 		tx_seq = __get_txseq(control);
 
 		if (pi->expected_tx_seq == tx_seq)
-- 
cgit v1.2.3


From 8f17154f1f70fcc6faa31ac82164fcf7f0599f38 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:26:03 -0300
Subject: Bluetooth: Add support for L2CAP SREJ exception

When L2CAP loses an I-frame we send a SREJ frame to the transmitter side
requesting the lost packet. This patch implement all Recv I-frame events
on SREJ_SENT state table except the ones that deal with SendRej (the REJ
exception at receiver side is yet not implemented).

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   1 +
 include/net/bluetooth/l2cap.h     |  14 ++-
 net/bluetooth/l2cap.c             | 220 +++++++++++++++++++++++++++++++++-----
 3 files changed, 210 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index b8b9a8479525..718394e2c01e 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -140,6 +140,7 @@ struct bt_skb_cb {
 	__u8 incoming;
 	__u8 tx_seq;
 	__u8 retries;
+	__u8 sar;
 };
 #define bt_cb(skb) ((struct bt_skb_cb *)((skb)->cb))
 
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 59b26bf10f30..9f2126a4f6f5 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -108,6 +108,7 @@ struct l2cap_conninfo {
 
 #define L2CAP_CTRL_TXSEQ_SHIFT      1
 #define L2CAP_CTRL_REQSEQ_SHIFT     8
+#define L2CAP_CTRL_SAR_SHIFT       14
 
 /* L2CAP Supervisory Function */
 #define L2CAP_SUPER_RCV_READY           0x0000
@@ -290,6 +291,13 @@ struct l2cap_conn {
 /* ----- L2CAP channel and socket info ----- */
 #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk)
 #define TX_QUEUE(sk) (&l2cap_pi(sk)->tx_queue)
+#define SREJ_QUEUE(sk) (&l2cap_pi(sk)->srej_queue)
+#define SREJ_LIST(sk) (&l2cap_pi(sk)->srej_l.list)
+
+struct srej_list {
+	__u8	tx_seq;
+	struct list_head list;
+};
 
 struct l2cap_pinfo {
 	struct bt_sock	bt;
@@ -318,6 +326,8 @@ struct l2cap_pinfo {
 	__u8		expected_ack_seq;
 	__u8		req_seq;
 	__u8		expected_tx_seq;
+	__u8		buffer_seq;
+	__u8		buffer_seq_srej;
 	__u8		unacked_frames;
 	__u8		retry_count;
 	__u8		num_to_ack;
@@ -338,6 +348,8 @@ struct l2cap_pinfo {
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
 	struct sk_buff_head	tx_queue;
+	struct sk_buff_head	srej_queue;
+	struct srej_list	srej_l;
 	struct l2cap_conn	*conn;
 	struct sock		*next_c;
 	struct sock		*prev_c;
@@ -356,7 +368,7 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MAX_CONF_RSP 2
 
 #define L2CAP_CONN_SAR_SDU         0x01
-#define L2CAP_CONN_UNDER_REJ       0x02
+#define L2CAP_CONN_SREJ_SENT       0x02
 #define L2CAP_CONN_WAIT_F          0x04
 
 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4c319003c290..70aff921db8c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1292,6 +1292,50 @@ static int l2cap_streaming_send(struct sock *sk)
 	return 0;
 }
 
+static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *skb, *tx_skb;
+	u16 control, fcs;
+	int err;
+
+	skb = skb_peek(TX_QUEUE(sk));
+	do {
+		if (bt_cb(skb)->tx_seq != tx_seq) {
+			if (skb_queue_is_last(TX_QUEUE(sk), skb))
+				break;
+			skb = skb_queue_next(TX_QUEUE(sk), skb);
+			continue;
+		}
+
+		if (pi->remote_max_tx &&
+				bt_cb(skb)->retries == pi->remote_max_tx) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			break;
+		}
+
+		tx_skb = skb_clone(skb, GFP_ATOMIC);
+		bt_cb(skb)->retries++;
+		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		control |= (pi->req_seq << L2CAP_CTRL_REQSEQ_SHIFT)
+				| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+
+		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
+			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
+		}
+
+		err = l2cap_do_send(sk, tx_skb);
+		if (err < 0) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			return err;
+		}
+		break;
+	} while(1);
+	return 0;
+}
+
 static int l2cap_ertm_send(struct sock *sk)
 {
 	struct sk_buff *skb, *tx_skb;
@@ -2705,6 +2749,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 				l2cap_monitor_timeout, (unsigned long) sk);
 
 		__skb_queue_head_init(TX_QUEUE(sk));
+		__skb_queue_head_init(SREJ_QUEUE(sk));
 		l2cap_chan_ready(sk);
 		goto unlock;
 	}
@@ -2784,8 +2829,10 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 		sk->sk_state = BT_CONNECTED;
 		l2cap_pi(sk)->expected_tx_seq = 0;
+		l2cap_pi(sk)->buffer_seq = 0;
 		l2cap_pi(sk)->num_to_ack = 0;
 		__skb_queue_head_init(TX_QUEUE(sk));
+		__skb_queue_head_init(SREJ_QUEUE(sk));
 		l2cap_chan_ready(sk);
 	}
 
@@ -2817,6 +2864,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 	sk->sk_shutdown = SHUTDOWN_MASK;
 
 	skb_queue_purge(TX_QUEUE(sk));
+	skb_queue_purge(SREJ_QUEUE(sk));
 	del_timer(&l2cap_pi(sk)->retrans_timer);
 	del_timer(&l2cap_pi(sk)->monitor_timer);
 
@@ -2843,6 +2891,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 		return 0;
 
 	skb_queue_purge(TX_QUEUE(sk));
+	skb_queue_purge(SREJ_QUEUE(sk));
 	del_timer(&l2cap_pi(sk)->retrans_timer);
 	del_timer(&l2cap_pi(sk)->monitor_timer);
 
@@ -3038,6 +3087,33 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
 	return 0;
 }
 
+static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
+{
+	struct sk_buff *next_skb;
+
+	bt_cb(skb)->tx_seq = tx_seq;
+	bt_cb(skb)->sar = sar;
+
+	next_skb = skb_peek(SREJ_QUEUE(sk));
+	if (!next_skb) {
+		__skb_queue_tail(SREJ_QUEUE(sk), skb);
+		return;
+	}
+
+	do {
+		if (bt_cb(next_skb)->tx_seq > tx_seq) {
+			__skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
+			return;
+		}
+
+		if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
+			break;
+
+	} while((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
+
+	__skb_queue_tail(SREJ_QUEUE(sk), skb);
+}
+
 static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -3118,50 +3194,143 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
 	return err;
 }
 
+static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
+{
+	struct sk_buff *skb;
+	u16 control = 0;
+
+	while((skb = skb_peek(SREJ_QUEUE(sk)))) {
+		if (bt_cb(skb)->tx_seq != tx_seq)
+			break;
+
+		skb = skb_dequeue(SREJ_QUEUE(sk));
+		control |= bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+		l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_pi(sk)->buffer_seq_srej =
+			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
+		tx_seq++;
+	}
+}
+
+static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct srej_list *l, *tmp;
+	u16 control;
+
+	list_for_each_entry_safe(l,tmp, SREJ_LIST(sk), list) {
+		if (l->tx_seq == tx_seq) {
+			list_del(&l->list);
+			kfree(l);
+			return;
+		}
+		control = L2CAP_SUPER_SELECT_REJECT;
+		control |= l->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+		l2cap_send_sframe(pi, control);
+		list_del(&l->list);
+		list_add_tail(&l->list, SREJ_LIST(sk));
+	}
+}
+
+static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct srej_list *new;
+	u16 control;
+
+	while (tx_seq != pi->expected_tx_seq) {
+		control = L2CAP_SUPER_SELECT_REJECT;
+		control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+		l2cap_send_sframe(pi, control);
+
+		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
+		new->tx_seq = pi->expected_tx_seq++;
+		list_add_tail(&new->list, SREJ_LIST(sk));
+	}
+	pi->expected_tx_seq++;
+}
+
 static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_txseq(rx_control);
 	u16 tx_control = 0;
+	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
 	int err = 0;
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (tx_seq == pi->expected_tx_seq) {
-		if (pi->conn_state & L2CAP_CONN_UNDER_REJ)
-			pi->conn_state &= ~L2CAP_CONN_UNDER_REJ;
+	if (tx_seq == pi->expected_tx_seq)
+		goto expected;
 
-		err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
-		if (err < 0)
-			return err;
+	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
+		struct srej_list *first;
 
-		pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
-		pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-		if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
-			tx_control |= L2CAP_SUPER_RCV_READY;
-			tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-			goto send;
+		first = list_first_entry(SREJ_LIST(sk),
+				struct srej_list, list);
+		if (tx_seq == first->tx_seq) {
+			l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+			l2cap_check_srej_gap(sk, tx_seq);
+
+			list_del(&first->list);
+			kfree(first);
+
+			if (list_empty(SREJ_LIST(sk))) {
+				pi->buffer_seq = pi->buffer_seq_srej;
+				pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
+			}
+		} else {
+			struct srej_list *l;
+			l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+
+			list_for_each_entry(l, SREJ_LIST(sk), list) {
+				if (l->tx_seq == tx_seq) {
+					l2cap_resend_srejframe(sk, tx_seq);
+					return 0;
+				}
+			}
+			l2cap_send_srejframe(sk, tx_seq);
 		}
 	} else {
-		/* Unexpected txSeq. Send a REJ S-frame */
-		kfree_skb(skb);
-		if (!(pi->conn_state & L2CAP_CONN_UNDER_REJ)) {
-			tx_control |= L2CAP_SUPER_REJECT;
-			tx_control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-			pi->conn_state |= L2CAP_CONN_UNDER_REJ;
+		pi->conn_state |= L2CAP_CONN_SREJ_SENT;
 
-			goto send;
-		}
+		INIT_LIST_HEAD(SREJ_LIST(sk));
+		pi->buffer_seq_srej = pi->buffer_seq;
+
+		__skb_queue_head_init(SREJ_QUEUE(sk));
+		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+
+		l2cap_send_srejframe(sk, tx_seq);
 	}
 	return 0;
 
-send:
-	return l2cap_send_sframe(pi, tx_control);
+expected:
+	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
+
+	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
+		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+		return 0;
+	}
+
+	pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+
+	err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
+	if (err < 0)
+		return err;
+
+	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
+	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
+		tx_control |= L2CAP_SUPER_RCV_READY;
+		tx_control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+		l2cap_send_sframe(pi, tx_control);
+	}
+	return 0;
 }
 
 static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
@@ -3181,7 +3350,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 			if (pi->unacked_frames > 0)
 				__mod_retrans_timer();
 		} else {
-			pi->expected_ack_seq = __get_reqseq(rx_control);
+			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
 			if (pi->unacked_frames > 0)
 				__mod_retrans_timer();
@@ -3200,8 +3369,11 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 
 		break;
 
-	case L2CAP_SUPER_RCV_NOT_READY:
 	case L2CAP_SUPER_SELECT_REJECT:
+		l2cap_retransmit_frame(sk, tx_seq);
+		break;
+
+	case L2CAP_SUPER_RCV_NOT_READY:
 		break;
 	}
 
-- 
cgit v1.2.3


From ef54fd937fbd5ebaeb023818524565bd526a5f36 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Thu, 20 Aug 2009 22:26:04 -0300
Subject: Bluetooth: Full support for receiving L2CAP SREJ frames

Support for receiving of SREJ frames as specified by the state table.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  3 +++
 net/bluetooth/l2cap.c         | 30 +++++++++++++++++++++++++++++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 9f2126a4f6f5..7ca614ac5d43 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -328,6 +328,7 @@ struct l2cap_pinfo {
 	__u8		expected_tx_seq;
 	__u8		buffer_seq;
 	__u8		buffer_seq_srej;
+	__u8		srej_save_reqseq;
 	__u8		unacked_frames;
 	__u8		retry_count;
 	__u8		num_to_ack;
@@ -370,6 +371,8 @@ struct l2cap_pinfo {
 #define L2CAP_CONN_SAR_SDU         0x01
 #define L2CAP_CONN_SREJ_SENT       0x02
 #define L2CAP_CONN_WAIT_F          0x04
+#define L2CAP_CONN_SREJ_ACT        0x08
+#define L2CAP_CONN_SEND_PBIT       0x10
 
 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
 		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 70aff921db8c..c04526f3df2e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3241,6 +3241,10 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
 	while (tx_seq != pi->expected_tx_seq) {
 		control = L2CAP_SUPER_SELECT_REJECT;
 		control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+		if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
+			control |= L2CAP_CTRL_POLL;
+			pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
+		}
 		l2cap_send_sframe(pi, control);
 
 		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
@@ -3300,6 +3304,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		__skb_queue_head_init(SREJ_QUEUE(sk));
 		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
 
+		pi->conn_state |= L2CAP_CONN_SEND_PBIT;
+
 		l2cap_send_srejframe(sk, tx_seq);
 	}
 	return 0;
@@ -3370,7 +3376,29 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		break;
 
 	case L2CAP_SUPER_SELECT_REJECT:
-		l2cap_retransmit_frame(sk, tx_seq);
+		if (rx_control & L2CAP_CTRL_POLL) {
+			l2cap_retransmit_frame(sk, tx_seq);
+			pi->expected_ack_seq = tx_seq;
+			l2cap_drop_acked_frames(sk);
+			l2cap_ertm_send(sk);
+			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+				pi->srej_save_reqseq = tx_seq;
+				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
+			}
+		} else if (rx_control & L2CAP_CTRL_FINAL) {
+			if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
+					pi->srej_save_reqseq == tx_seq)
+				pi->srej_save_reqseq &= ~L2CAP_CONN_SREJ_ACT;
+			else
+				l2cap_retransmit_frame(sk, tx_seq);
+		}
+		else {
+			l2cap_retransmit_frame(sk, tx_seq);
+			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+				pi->srej_save_reqseq = tx_seq;
+				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
+			}
+		}
 		break;
 
 	case L2CAP_SUPER_RCV_NOT_READY:
-- 
cgit v1.2.3


From 5e928f77a09a07f9dd595bb8a489965d69a83458 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 18 Aug 2009 23:38:32 +0200
Subject: PM: Introduce core framework for run-time PM of I/O devices (rev. 17)

Introduce a core framework for run-time power management of I/O
devices.  Add device run-time PM fields to 'struct dev_pm_info'
and device run-time PM callbacks to 'struct dev_pm_ops'.  Introduce
a run-time PM workqueue and define some device run-time PM helper
functions at the core level.  Document all these things.

Special thanks to Alan Stern for his help with the design and
multiple detailed reviews of the pereceding versions of this patch
and to Magnus Damm for testing feedback.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Magnus Damm <damm@igel.co.jp>
---
 Documentation/power/runtime_pm.txt |  378 ++++++++++++++
 drivers/base/dd.c                  |   11 +
 drivers/base/power/Makefile        |    1 +
 drivers/base/power/main.c          |   22 +-
 drivers/base/power/power.h         |   31 +-
 drivers/base/power/runtime.c       | 1011 ++++++++++++++++++++++++++++++++++++
 include/linux/pm.h                 |  101 +++-
 include/linux/pm_runtime.h         |  114 ++++
 kernel/power/Kconfig               |   14 +
 kernel/power/main.c                |   17 +
 10 files changed, 1689 insertions(+), 11 deletions(-)
 create mode 100644 Documentation/power/runtime_pm.txt
 create mode 100644 drivers/base/power/runtime.c
 create mode 100644 include/linux/pm_runtime.h

(limited to 'include')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
new file mode 100644
index 000000000000..f49a33b704d2
--- /dev/null
+++ b/Documentation/power/runtime_pm.txt
@@ -0,0 +1,378 @@
+Run-time Power Management Framework for I/O Devices
+
+(C) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+
+1. Introduction
+
+Support for run-time power management (run-time PM) of I/O devices is provided
+at the power management core (PM core) level by means of:
+
+* The power management workqueue pm_wq in which bus types and device drivers can
+  put their PM-related work items.  It is strongly recommended that pm_wq be
+  used for queuing all work items related to run-time PM, because this allows
+  them to be synchronized with system-wide power transitions (suspend to RAM,
+  hibernation and resume from system sleep states).  pm_wq is declared in
+  include/linux/pm_runtime.h and defined in kernel/power/main.c.
+
+* A number of run-time PM fields in the 'power' member of 'struct device' (which
+  is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
+  be used for synchronizing run-time PM operations with one another.
+
+* Three device run-time PM callbacks in 'struct dev_pm_ops' (defined in
+  include/linux/pm.h).
+
+* A set of helper functions defined in drivers/base/power/runtime.c that can be
+  used for carrying out run-time PM operations in such a way that the
+  synchronization between them is taken care of by the PM core.  Bus types and
+  device drivers are encouraged to use these functions.
+
+The run-time PM callbacks present in 'struct dev_pm_ops', the device run-time PM
+fields of 'struct dev_pm_info' and the core helper functions provided for
+run-time PM are described below.
+
+2. Device Run-time PM Callbacks
+
+There are three device run-time PM callbacks defined in 'struct dev_pm_ops':
+
+struct dev_pm_ops {
+	...
+	int (*runtime_suspend)(struct device *dev);
+	int (*runtime_resume)(struct device *dev);
+	void (*runtime_idle)(struct device *dev);
+	...
+};
+
+The ->runtime_suspend() callback is executed by the PM core for the bus type of
+the device being suspended.  The bus type's callback is then _entirely_
+_responsible_ for handling the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_suspend() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_suspend()
+callback in a device driver as long as the bus type's ->runtime_suspend() knows
+what to do to handle the device).
+
+  * Once the bus type's ->runtime_suspend() callback has completed successfully
+    for given device, the PM core regards the device as suspended, which need
+    not mean that the device has been put into a low power state.  It is
+    supposed to mean, however, that the device will not process data and will
+    not communicate with the CPU(s) and RAM until its bus type's
+    ->runtime_resume() callback is executed for it.  The run-time PM status of
+    a device after successful execution of its bus type's ->runtime_suspend()
+    callback is 'suspended'.
+
+  * If the bus type's ->runtime_suspend() callback returns -EBUSY or -EAGAIN,
+    the device's run-time PM status is supposed to be 'active', which means that
+    the device _must_ be fully operational afterwards.
+
+  * If the bus type's ->runtime_suspend() callback returns an error code
+    different from -EBUSY or -EAGAIN, the PM core regards this as a fatal
+    error and will refuse to run the helper functions described in Section 4
+    for the device, until the status of it is directly set either to 'active'
+    or to 'suspended' (the PM core provides special helper functions for this
+    purpose).
+
+In particular, if the driver requires remote wakeup capability for proper
+functioning and device_may_wakeup() returns 'false' for the device, then
+->runtime_suspend() should return -EBUSY.  On the other hand, if
+device_may_wakeup() returns 'true' for the device and the device is put
+into a low power state during the execution of its bus type's
+->runtime_suspend(), it is expected that remote wake-up (i.e. hardware mechanism
+allowing the device to request a change of its power state, such as PCI PME)
+will be enabled for the device.  Generally, remote wake-up should be enabled
+for all input devices put into a low power state at run time.
+
+The ->runtime_resume() callback is executed by the PM core for the bus type of
+the device being woken up.  The bus type's callback is then _entirely_
+_responsible_ for handling the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_resume() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_resume()
+callback in a device driver as long as the bus type's ->runtime_resume() knows
+what to do to handle the device).
+
+  * Once the bus type's ->runtime_resume() callback has completed successfully,
+    the PM core regards the device as fully operational, which means that the
+    device _must_ be able to complete I/O operations as needed.  The run-time
+    PM status of the device is then 'active'.
+
+  * If the bus type's ->runtime_resume() callback returns an error code, the PM
+    core regards this as a fatal error and will refuse to run the helper
+    functions described in Section 4 for the device, until its status is
+    directly set either to 'active' or to 'suspended' (the PM core provides
+    special helper functions for this purpose).
+
+The ->runtime_idle() callback is executed by the PM core for the bus type of
+given device whenever the device appears to be idle, which is indicated to the
+PM core by two counters, the device's usage counter and the counter of 'active'
+children of the device.
+
+  * If any of these counters is decreased using a helper function provided by
+    the PM core and it turns out to be equal to zero, the other counter is
+    checked.  If that counter also is equal to zero, the PM core executes the
+    device bus type's ->runtime_idle() callback (with the device as an
+    argument).
+
+The action performed by a bus type's ->runtime_idle() callback is totally
+dependent on the bus type in question, but the expected and recommended action
+is to check if the device can be suspended (i.e. if all of the conditions
+necessary for suspending the device are satisfied) and to queue up a suspend
+request for the device in that case.
+
+The helper functions provided by the PM core, described in Section 4, guarantee
+that the following constraints are met with respect to the bus type's run-time
+PM callbacks:
+
+(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
+    ->runtime_suspend() in parallel with ->runtime_resume() or with another
+    instance of ->runtime_suspend() for the same device) with the exception that
+    ->runtime_suspend() or ->runtime_resume() can be executed in parallel with
+    ->runtime_idle() (although ->runtime_idle() will not be started while any
+    of the other callbacks is being executed for the same device).
+
+(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
+    devices (i.e. the PM core will only execute ->runtime_idle() or
+    ->runtime_suspend() for the devices the run-time PM status of which is
+    'active').
+
+(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
+    the usage counter of which is equal to zero _and_ either the counter of
+    'active' children of which is equal to zero, or the 'power.ignore_children'
+    flag of which is set.
+
+(4) ->runtime_resume() can only be executed for 'suspended' devices  (i.e. the
+    PM core will only execute ->runtime_resume() for the devices the run-time
+    PM status of which is 'suspended').
+
+Additionally, the helper functions provided by the PM core obey the following
+rules:
+
+  * If ->runtime_suspend() is about to be executed or there's a pending request
+    to execute it, ->runtime_idle() will not be executed for the same device.
+
+  * A request to execute or to schedule the execution of ->runtime_suspend()
+    will cancel any pending requests to execute ->runtime_idle() for the same
+    device.
+
+  * If ->runtime_resume() is about to be executed or there's a pending request
+    to execute it, the other callbacks will not be executed for the same device.
+
+  * A request to execute ->runtime_resume() will cancel any pending or
+    scheduled requests to execute the other callbacks for the same device.
+
+3. Run-time PM Device Fields
+
+The following device run-time PM fields are present in 'struct dev_pm_info', as
+defined in include/linux/pm.h:
+
+  struct timer_list suspend_timer;
+    - timer used for scheduling (delayed) suspend request
+
+  unsigned long timer_expires;
+    - timer expiration time, in jiffies (if this is different from zero, the
+      timer is running and will expire at that time, otherwise the timer is not
+      running)
+
+  struct work_struct work;
+    - work structure used for queuing up requests (i.e. work items in pm_wq)
+
+  wait_queue_head_t wait_queue;
+    - wait queue used if any of the helper functions needs to wait for another
+      one to complete
+
+  spinlock_t lock;
+    - lock used for synchronisation
+
+  atomic_t usage_count;
+    - the usage counter of the device
+
+  atomic_t child_count;
+    - the count of 'active' children of the device
+
+  unsigned int ignore_children;
+    - if set, the value of child_count is ignored (but still updated)
+
+  unsigned int disable_depth;
+    - used for disabling the helper funcions (they work normally if this is
+      equal to zero); the initial value of it is 1 (i.e. run-time PM is
+      initially disabled for all devices)
+
+  unsigned int runtime_error;
+    - if set, there was a fatal error (one of the callbacks returned error code
+      as described in Section 2), so the helper funtions will not work until
+      this flag is cleared; this is the error code returned by the failing
+      callback
+
+  unsigned int idle_notification;
+    - if set, ->runtime_idle() is being executed
+
+  unsigned int request_pending;
+    - if set, there's a pending request (i.e. a work item queued up into pm_wq)
+
+  enum rpm_request request;
+    - type of request that's pending (valid if request_pending is set)
+
+  unsigned int deferred_resume;
+    - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
+      being executed for that device and it is not practical to wait for the
+      suspend to complete; means "start a resume as soon as you've suspended"
+
+  enum rpm_status runtime_status;
+    - the run-time PM status of the device; this field's initial value is
+      RPM_SUSPENDED, which means that each device is initially regarded by the
+      PM core as 'suspended', regardless of its real hardware status
+
+All of the above fields are members of the 'power' member of 'struct device'.
+
+4. Run-time PM Device Helper Functions
+
+The following run-time PM helper functions are defined in
+drivers/base/power/runtime.c and include/linux/pm_runtime.h:
+
+  void pm_runtime_init(struct device *dev);
+    - initialize the device run-time PM fields in 'struct dev_pm_info'
+
+  void pm_runtime_remove(struct device *dev);
+    - make sure that the run-time PM of the device will be disabled after
+      removing the device from device hierarchy
+
+  int pm_runtime_idle(struct device *dev);
+    - execute ->runtime_idle() for the device's bus type; returns 0 on success
+      or error code on failure, where -EINPROGRESS means that ->runtime_idle()
+      is already being executed
+
+  int pm_runtime_suspend(struct device *dev);
+    - execute ->runtime_suspend() for the device's bus type; returns 0 on
+      success, 1 if the device's run-time PM status was already 'suspended', or
+      error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
+      to suspend the device again in future
+
+  int pm_runtime_resume(struct device *dev);
+    - execute ->runtime_resume() for the device's bus type; returns 0 on
+      success, 1 if the device's run-time PM status was already 'active' or
+      error code on failure, where -EAGAIN means it may be safe to attempt to
+      resume the device again in future, but 'power.runtime_error' should be
+      checked additionally
+
+  int pm_request_idle(struct device *dev);
+    - submit a request to execute ->runtime_idle() for the device's bus type
+      (the request is represented by a work item in pm_wq); returns 0 on success
+      or error code if the request has not been queued up
+
+  int pm_schedule_suspend(struct device *dev, unsigned int delay);
+    - schedule the execution of ->runtime_suspend() for the device's bus type
+      in future, where 'delay' is the time to wait before queuing up a suspend
+      work item in pm_wq, in milliseconds (if 'delay' is zero, the work item is
+      queued up immediately); returns 0 on success, 1 if the device's PM
+      run-time status was already 'suspended', or error code if the request
+      hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
+      ->runtime_suspend() is already scheduled and not yet expired, the new
+      value of 'delay' will be used as the time to wait
+
+  int pm_request_resume(struct device *dev);
+    - submit a request to execute ->runtime_resume() for the device's bus type
+      (the request is represented by a work item in pm_wq); returns 0 on
+      success, 1 if the device's run-time PM status was already 'active', or
+      error code if the request hasn't been queued up
+
+  void pm_runtime_get_noresume(struct device *dev);
+    - increment the device's usage counter
+
+  int pm_runtime_get(struct device *dev);
+    - increment the device's usage counter, run pm_request_resume(dev) and
+      return its result
+
+  int pm_runtime_get_sync(struct device *dev);
+    - increment the device's usage counter, run pm_runtime_resume(dev) and
+      return its result
+
+  void pm_runtime_put_noidle(struct device *dev);
+    - decrement the device's usage counter
+
+  int pm_runtime_put(struct device *dev);
+    - decrement the device's usage counter, run pm_request_idle(dev) and return
+      its result
+
+  int pm_runtime_put_sync(struct device *dev);
+    - decrement the device's usage counter, run pm_runtime_idle(dev) and return
+      its result
+
+  void pm_runtime_enable(struct device *dev);
+    - enable the run-time PM helper functions to run the device bus type's
+      run-time PM callbacks described in Section 2
+
+  int pm_runtime_disable(struct device *dev);
+    - prevent the run-time PM helper functions from running the device bus
+      type's run-time PM callbacks, make sure that all of the pending run-time
+      PM operations on the device are either completed or canceled; returns
+      1 if there was a resume request pending and it was necessary to execute
+      ->runtime_resume() for the device's bus type to satisfy that request,
+      otherwise 0 is returned
+
+  void pm_suspend_ignore_children(struct device *dev, bool enable);
+    - set/unset the power.ignore_children flag of the device
+
+  int pm_runtime_set_active(struct device *dev);
+    - clear the device's 'power.runtime_error' flag, set the device's run-time
+      PM status to 'active' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero); it will fail and return error code if the device has a parent
+      which is not active and the 'power.ignore_children' flag of which is unset
+
+  void pm_runtime_set_suspended(struct device *dev);
+    - clear the device's 'power.runtime_error' flag, set the device's run-time
+      PM status to 'suspended' and update its parent's counter of 'active'
+      children as appropriate (it is only valid to use this function if
+      'power.runtime_error' is set or 'power.disable_depth' is greater than
+      zero)
+
+It is safe to execute the following helper functions from interrupt context:
+
+pm_request_idle()
+pm_schedule_suspend()
+pm_request_resume()
+pm_runtime_get_noresume()
+pm_runtime_get()
+pm_runtime_put_noidle()
+pm_runtime_put()
+pm_suspend_ignore_children()
+pm_runtime_set_active()
+pm_runtime_set_suspended()
+pm_runtime_enable()
+
+5. Run-time PM Initialization, Device Probing and Removal
+
+Initially, the run-time PM is disabled for all devices, which means that the
+majority of the run-time PM helper funtions described in Section 4 will return
+-EAGAIN until pm_runtime_enable() is called for the device.
+
+In addition to that, the initial run-time PM status of all devices is
+'suspended', but it need not reflect the actual physical state of the device.
+Thus, if the device is initially active (i.e. it is able to process I/O), its
+run-time PM status must be changed to 'active', with the help of
+pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
+
+However, if the device has a parent and the parent's run-time PM is enabled,
+calling pm_runtime_set_active() for the device will affect the parent, unless
+the parent's 'power.ignore_children' flag is set.  Namely, in that case the
+parent won't be able to suspend at run time, using the PM core's helper
+functions, as long as the child's status is 'active', even if the child's
+run-time PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
+the child yet or pm_runtime_disable() has been called for it).  For this reason,
+once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
+should be called for it too as soon as reasonably possible or its run-time PM
+status should be changed back to 'suspended' with the help of
+pm_runtime_set_suspended().
+
+If the default initial run-time PM status of the device (i.e. 'suspended')
+reflects the actual state of the device, its bus type's or its driver's
+->probe() callback will likely need to wake it up using one of the PM core's
+helper functions described in Section 4.  In that case, pm_runtime_resume()
+should be used.  Of course, for this purpose the device's run-time PM has to be
+enabled earlier by calling pm_runtime_enable().
+
+If the device bus type's or driver's ->probe() or ->remove() callback runs
+pm_runtime_suspend() or pm_runtime_idle() or their asynchronous counterparts,
+they will fail returning -EAGAIN, because the device's usage counter is
+incremented by the core before executing ->probe() and ->remove().  Still, it
+may be desirable to suspend the device as soon as ->probe() or ->remove() has
+finished, so the PM core uses pm_runtime_idle_sync() to invoke the device bus
+type's ->runtime_idle() callback at that time.
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index f0106875f01d..7b34b3a48f67 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -23,6 +23,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/async.h>
+#include <linux/pm_runtime.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -202,7 +203,10 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
+	pm_runtime_get_noresume(dev);
+	pm_runtime_barrier(dev);
 	ret = really_probe(dev, drv);
+	pm_runtime_put_sync(dev);
 
 	return ret;
 }
@@ -245,7 +249,9 @@ int device_attach(struct device *dev)
 			ret = 0;
 		}
 	} else {
+		pm_runtime_get_noresume(dev);
 		ret = bus_for_each_drv(dev->bus, NULL, dev, __device_attach);
+		pm_runtime_put_sync(dev);
 	}
 	up(&dev->sem);
 	return ret;
@@ -306,6 +312,9 @@ static void __device_release_driver(struct device *dev)
 
 	drv = dev->driver;
 	if (drv) {
+		pm_runtime_get_noresume(dev);
+		pm_runtime_barrier(dev);
+
 		driver_sysfs_remove(dev);
 
 		if (dev->bus)
@@ -324,6 +333,8 @@ static void __device_release_driver(struct device *dev)
 			blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
 						     BUS_NOTIFY_UNBOUND_DRIVER,
 						     dev);
+
+		pm_runtime_put_sync(dev);
 	}
 }
 
diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile
index 911208b89259..3ce3519e8f30 100644
--- a/drivers/base/power/Makefile
+++ b/drivers/base/power/Makefile
@@ -1,5 +1,6 @@
 obj-$(CONFIG_PM)	+= sysfs.o
 obj-$(CONFIG_PM_SLEEP)	+= main.o
+obj-$(CONFIG_PM_RUNTIME)	+= runtime.o
 obj-$(CONFIG_PM_TRACE_RTC)	+= trace.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 1b1a786b7dec..86990011277b 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -21,6 +21,7 @@
 #include <linux/kallsyms.h>
 #include <linux/mutex.h>
 #include <linux/pm.h>
+#include <linux/pm_runtime.h>
 #include <linux/resume-trace.h>
 #include <linux/rwsem.h>
 #include <linux/interrupt.h>
@@ -48,6 +49,16 @@ static DEFINE_MUTEX(dpm_list_mtx);
  */
 static bool transition_started;
 
+/**
+ * device_pm_init - Initialize the PM-related part of a device object
+ * @dev: Device object being initialized.
+ */
+void device_pm_init(struct device *dev)
+{
+	dev->power.status = DPM_ON;
+	pm_runtime_init(dev);
+}
+
 /**
  *	device_pm_lock - lock the list of active devices used by the PM core
  */
@@ -105,6 +116,7 @@ void device_pm_remove(struct device *dev)
 	mutex_lock(&dpm_list_mtx);
 	list_del_init(&dev->power.entry);
 	mutex_unlock(&dpm_list_mtx);
+	pm_runtime_remove(dev);
 }
 
 /**
@@ -512,6 +524,7 @@ static void dpm_complete(pm_message_t state)
 			mutex_unlock(&dpm_list_mtx);
 
 			device_complete(dev, state);
+			pm_runtime_put_noidle(dev);
 
 			mutex_lock(&dpm_list_mtx);
 		}
@@ -757,7 +770,14 @@ static int dpm_prepare(pm_message_t state)
 		dev->power.status = DPM_PREPARING;
 		mutex_unlock(&dpm_list_mtx);
 
-		error = device_prepare(dev, state);
+		pm_runtime_get_noresume(dev);
+		if (pm_runtime_barrier(dev) && device_may_wakeup(dev)) {
+			/* Wake-up requested during system sleep transition. */
+			pm_runtime_put_noidle(dev);
+			error = -EBUSY;
+		} else {
+			error = device_prepare(dev, state);
+		}
 
 		mutex_lock(&dpm_list_mtx);
 		if (error) {
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index c7cb4fc3735c..b8fa1aa5225a 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -1,7 +1,14 @@
-static inline void device_pm_init(struct device *dev)
-{
-	dev->power.status = DPM_ON;
-}
+#ifdef CONFIG_PM_RUNTIME
+
+extern void pm_runtime_init(struct device *dev);
+extern void pm_runtime_remove(struct device *dev);
+
+#else /* !CONFIG_PM_RUNTIME */
+
+static inline void pm_runtime_init(struct device *dev) {}
+static inline void pm_runtime_remove(struct device *dev) {}
+
+#endif /* !CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
 
@@ -16,23 +23,33 @@ static inline struct device *to_device(struct list_head *entry)
 	return container_of(entry, struct device, power.entry);
 }
 
+extern void device_pm_init(struct device *dev);
 extern void device_pm_add(struct device *);
 extern void device_pm_remove(struct device *);
 extern void device_pm_move_before(struct device *, struct device *);
 extern void device_pm_move_after(struct device *, struct device *);
 extern void device_pm_move_last(struct device *);
 
-#else /* CONFIG_PM_SLEEP */
+#else /* !CONFIG_PM_SLEEP */
+
+static inline void device_pm_init(struct device *dev)
+{
+	pm_runtime_init(dev);
+}
+
+static inline void device_pm_remove(struct device *dev)
+{
+	pm_runtime_remove(dev);
+}
 
 static inline void device_pm_add(struct device *dev) {}
-static inline void device_pm_remove(struct device *dev) {}
 static inline void device_pm_move_before(struct device *deva,
 					 struct device *devb) {}
 static inline void device_pm_move_after(struct device *deva,
 					struct device *devb) {}
 static inline void device_pm_move_last(struct device *dev) {}
 
-#endif
+#endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
new file mode 100644
index 000000000000..38556f6cc22d
--- /dev/null
+++ b/drivers/base/power/runtime.c
@@ -0,0 +1,1011 @@
+/*
+ * drivers/base/power/runtime.c - Helper functions for device run-time PM
+ *
+ * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/sched.h>
+#include <linux/pm_runtime.h>
+#include <linux/jiffies.h>
+
+static int __pm_runtime_resume(struct device *dev, bool from_wq);
+static int __pm_request_idle(struct device *dev);
+static int __pm_request_resume(struct device *dev);
+
+/**
+ * pm_runtime_deactivate_timer - Deactivate given device's suspend timer.
+ * @dev: Device to handle.
+ */
+static void pm_runtime_deactivate_timer(struct device *dev)
+{
+	if (dev->power.timer_expires > 0) {
+		del_timer(&dev->power.suspend_timer);
+		dev->power.timer_expires = 0;
+	}
+}
+
+/**
+ * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests.
+ * @dev: Device to handle.
+ */
+static void pm_runtime_cancel_pending(struct device *dev)
+{
+	pm_runtime_deactivate_timer(dev);
+	/*
+	 * In case there's a request pending, make sure its work function will
+	 * return without doing anything.
+	 */
+	dev->power.request = RPM_REQ_NONE;
+}
+
+/**
+ * __pm_runtime_idle - Notify device bus type if the device can be suspended.
+ * @dev: Device to notify the bus type about.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_runtime_idle(struct device *dev)
+	__releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+	int retval = 0;
+
+	dev_dbg(dev, "__pm_runtime_idle()!\n");
+
+	if (dev->power.runtime_error)
+		retval = -EINVAL;
+	else if (dev->power.idle_notification)
+		retval = -EINPROGRESS;
+	else if (atomic_read(&dev->power.usage_count) > 0
+	    || dev->power.disable_depth > 0
+	    || dev->power.runtime_status != RPM_ACTIVE)
+		retval = -EAGAIN;
+	else if (!pm_children_suspended(dev))
+		retval = -EBUSY;
+	if (retval)
+		goto out;
+
+	if (dev->power.request_pending) {
+		/*
+		 * If an idle notification request is pending, cancel it.  Any
+		 * other pending request takes precedence over us.
+		 */
+		if (dev->power.request == RPM_REQ_IDLE) {
+			dev->power.request = RPM_REQ_NONE;
+		} else if (dev->power.request != RPM_REQ_NONE) {
+			retval = -EAGAIN;
+			goto out;
+		}
+	}
+
+	dev->power.idle_notification = true;
+
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_idle) {
+		spin_unlock_irq(&dev->power.lock);
+
+		dev->bus->pm->runtime_idle(dev);
+
+		spin_lock_irq(&dev->power.lock);
+	}
+
+	dev->power.idle_notification = false;
+	wake_up_all(&dev->power.wait_queue);
+
+ out:
+	dev_dbg(dev, "__pm_runtime_idle() returns %d!\n", retval);
+
+	return retval;
+}
+
+/**
+ * pm_runtime_idle - Notify device bus type if the device can be suspended.
+ * @dev: Device to notify the bus type about.
+ */
+int pm_runtime_idle(struct device *dev)
+{
+	int retval;
+
+	spin_lock_irq(&dev->power.lock);
+	retval = __pm_runtime_idle(dev);
+	spin_unlock_irq(&dev->power.lock);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_idle);
+
+/**
+ * __pm_runtime_suspend - Carry out run-time suspend of given device.
+ * @dev: Device to suspend.
+ * @from_wq: If set, the function has been called via pm_wq.
+ *
+ * Check if the device can be suspended and run the ->runtime_suspend() callback
+ * provided by its bus type.  If another suspend has been started earlier, wait
+ * for it to finish.  If an idle notification or suspend request is pending or
+ * scheduled, cancel it.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+int __pm_runtime_suspend(struct device *dev, bool from_wq)
+	__releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+	struct device *parent = NULL;
+	bool notify = false;
+	int retval = 0;
+
+	dev_dbg(dev, "__pm_runtime_suspend()%s!\n",
+		from_wq ? " from workqueue" : "");
+
+ repeat:
+	if (dev->power.runtime_error) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	/* Pending resume requests take precedence over us. */
+	if (dev->power.request_pending
+	    && dev->power.request == RPM_REQ_RESUME) {
+		retval = -EAGAIN;
+		goto out;
+	}
+
+	/* Other scheduled or pending requests need to be canceled. */
+	pm_runtime_cancel_pending(dev);
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		retval = 1;
+	else if (dev->power.runtime_status == RPM_RESUMING
+	    || dev->power.disable_depth > 0
+	    || atomic_read(&dev->power.usage_count) > 0)
+		retval = -EAGAIN;
+	else if (!pm_children_suspended(dev))
+		retval = -EBUSY;
+	if (retval)
+		goto out;
+
+	if (dev->power.runtime_status == RPM_SUSPENDING) {
+		DEFINE_WAIT(wait);
+
+		if (from_wq) {
+			retval = -EINPROGRESS;
+			goto out;
+		}
+
+		/* Wait for the other suspend running in parallel with us. */
+		for (;;) {
+			prepare_to_wait(&dev->power.wait_queue, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (dev->power.runtime_status != RPM_SUSPENDING)
+				break;
+
+			spin_unlock_irq(&dev->power.lock);
+
+			schedule();
+
+			spin_lock_irq(&dev->power.lock);
+		}
+		finish_wait(&dev->power.wait_queue, &wait);
+		goto repeat;
+	}
+
+	dev->power.runtime_status = RPM_SUSPENDING;
+
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) {
+		spin_unlock_irq(&dev->power.lock);
+
+		retval = dev->bus->pm->runtime_suspend(dev);
+
+		spin_lock_irq(&dev->power.lock);
+		dev->power.runtime_error = retval;
+	} else {
+		retval = -ENOSYS;
+	}
+
+	if (retval) {
+		dev->power.runtime_status = RPM_ACTIVE;
+		pm_runtime_cancel_pending(dev);
+		dev->power.deferred_resume = false;
+
+		if (retval == -EAGAIN || retval == -EBUSY) {
+			notify = true;
+			dev->power.runtime_error = 0;
+		}
+	} else {
+		dev->power.runtime_status = RPM_SUSPENDED;
+
+		if (dev->parent) {
+			parent = dev->parent;
+			atomic_add_unless(&parent->power.child_count, -1, 0);
+		}
+	}
+	wake_up_all(&dev->power.wait_queue);
+
+	if (dev->power.deferred_resume) {
+		dev->power.deferred_resume = false;
+		__pm_runtime_resume(dev, false);
+		retval = -EAGAIN;
+		goto out;
+	}
+
+	if (notify)
+		__pm_runtime_idle(dev);
+
+	if (parent && !parent->power.ignore_children) {
+		spin_unlock_irq(&dev->power.lock);
+
+		pm_request_idle(parent);
+
+		spin_lock_irq(&dev->power.lock);
+	}
+
+ out:
+	dev_dbg(dev, "__pm_runtime_suspend() returns %d!\n", retval);
+
+	return retval;
+}
+
+/**
+ * pm_runtime_suspend - Carry out run-time suspend of given device.
+ * @dev: Device to suspend.
+ */
+int pm_runtime_suspend(struct device *dev)
+{
+	int retval;
+
+	spin_lock_irq(&dev->power.lock);
+	retval = __pm_runtime_suspend(dev, false);
+	spin_unlock_irq(&dev->power.lock);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_suspend);
+
+/**
+ * __pm_runtime_resume - Carry out run-time resume of given device.
+ * @dev: Device to resume.
+ * @from_wq: If set, the function has been called via pm_wq.
+ *
+ * Check if the device can be woken up and run the ->runtime_resume() callback
+ * provided by its bus type.  If another resume has been started earlier, wait
+ * for it to finish.  If there's a suspend running in parallel with this
+ * function, wait for it to finish and resume the device.  Cancel any scheduled
+ * or pending requests.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+int __pm_runtime_resume(struct device *dev, bool from_wq)
+	__releases(&dev->power.lock) __acquires(&dev->power.lock)
+{
+	struct device *parent = NULL;
+	int retval = 0;
+
+	dev_dbg(dev, "__pm_runtime_resume()%s!\n",
+		from_wq ? " from workqueue" : "");
+
+ repeat:
+	if (dev->power.runtime_error) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	pm_runtime_cancel_pending(dev);
+
+	if (dev->power.runtime_status == RPM_ACTIVE)
+		retval = 1;
+	else if (dev->power.disable_depth > 0)
+		retval = -EAGAIN;
+	if (retval)
+		goto out;
+
+	if (dev->power.runtime_status == RPM_RESUMING
+	    || dev->power.runtime_status == RPM_SUSPENDING) {
+		DEFINE_WAIT(wait);
+
+		if (from_wq) {
+			if (dev->power.runtime_status == RPM_SUSPENDING)
+				dev->power.deferred_resume = true;
+			retval = -EINPROGRESS;
+			goto out;
+		}
+
+		/* Wait for the operation carried out in parallel with us. */
+		for (;;) {
+			prepare_to_wait(&dev->power.wait_queue, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (dev->power.runtime_status != RPM_RESUMING
+			    && dev->power.runtime_status != RPM_SUSPENDING)
+				break;
+
+			spin_unlock_irq(&dev->power.lock);
+
+			schedule();
+
+			spin_lock_irq(&dev->power.lock);
+		}
+		finish_wait(&dev->power.wait_queue, &wait);
+		goto repeat;
+	}
+
+	if (!parent && dev->parent) {
+		/*
+		 * Increment the parent's resume counter and resume it if
+		 * necessary.
+		 */
+		parent = dev->parent;
+		spin_unlock_irq(&dev->power.lock);
+
+		pm_runtime_get_noresume(parent);
+
+		spin_lock_irq(&parent->power.lock);
+		/*
+		 * We can resume if the parent's run-time PM is disabled or it
+		 * is set to ignore children.
+		 */
+		if (!parent->power.disable_depth
+		    && !parent->power.ignore_children) {
+			__pm_runtime_resume(parent, false);
+			if (parent->power.runtime_status != RPM_ACTIVE)
+				retval = -EBUSY;
+		}
+		spin_unlock_irq(&parent->power.lock);
+
+		spin_lock_irq(&dev->power.lock);
+		if (retval)
+			goto out;
+		goto repeat;
+	}
+
+	dev->power.runtime_status = RPM_RESUMING;
+
+	if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) {
+		spin_unlock_irq(&dev->power.lock);
+
+		retval = dev->bus->pm->runtime_resume(dev);
+
+		spin_lock_irq(&dev->power.lock);
+		dev->power.runtime_error = retval;
+	} else {
+		retval = -ENOSYS;
+	}
+
+	if (retval) {
+		dev->power.runtime_status = RPM_SUSPENDED;
+		pm_runtime_cancel_pending(dev);
+	} else {
+		dev->power.runtime_status = RPM_ACTIVE;
+		if (parent)
+			atomic_inc(&parent->power.child_count);
+	}
+	wake_up_all(&dev->power.wait_queue);
+
+	if (!retval)
+		__pm_request_idle(dev);
+
+ out:
+	if (parent) {
+		spin_unlock_irq(&dev->power.lock);
+
+		pm_runtime_put(parent);
+
+		spin_lock_irq(&dev->power.lock);
+	}
+
+	dev_dbg(dev, "__pm_runtime_resume() returns %d!\n", retval);
+
+	return retval;
+}
+
+/**
+ * pm_runtime_resume - Carry out run-time resume of given device.
+ * @dev: Device to suspend.
+ */
+int pm_runtime_resume(struct device *dev)
+{
+	int retval;
+
+	spin_lock_irq(&dev->power.lock);
+	retval = __pm_runtime_resume(dev, false);
+	spin_unlock_irq(&dev->power.lock);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_resume);
+
+/**
+ * pm_runtime_work - Universal run-time PM work function.
+ * @work: Work structure used for scheduling the execution of this function.
+ *
+ * Use @work to get the device object the work is to be done for, determine what
+ * is to be done and execute the appropriate run-time PM function.
+ */
+static void pm_runtime_work(struct work_struct *work)
+{
+	struct device *dev = container_of(work, struct device, power.work);
+	enum rpm_request req;
+
+	spin_lock_irq(&dev->power.lock);
+
+	if (!dev->power.request_pending)
+		goto out;
+
+	req = dev->power.request;
+	dev->power.request = RPM_REQ_NONE;
+	dev->power.request_pending = false;
+
+	switch (req) {
+	case RPM_REQ_NONE:
+		break;
+	case RPM_REQ_IDLE:
+		__pm_runtime_idle(dev);
+		break;
+	case RPM_REQ_SUSPEND:
+		__pm_runtime_suspend(dev, true);
+		break;
+	case RPM_REQ_RESUME:
+		__pm_runtime_resume(dev, true);
+		break;
+	}
+
+ out:
+	spin_unlock_irq(&dev->power.lock);
+}
+
+/**
+ * __pm_request_idle - Submit an idle notification request for given device.
+ * @dev: Device to handle.
+ *
+ * Check if the device's run-time PM status is correct for suspending the device
+ * and queue up a request to run __pm_runtime_idle() for it.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_idle(struct device *dev)
+{
+	int retval = 0;
+
+	if (dev->power.runtime_error)
+		retval = -EINVAL;
+	else if (atomic_read(&dev->power.usage_count) > 0
+	    || dev->power.disable_depth > 0
+	    || dev->power.runtime_status == RPM_SUSPENDED
+	    || dev->power.runtime_status == RPM_SUSPENDING)
+		retval = -EAGAIN;
+	else if (!pm_children_suspended(dev))
+		retval = -EBUSY;
+	if (retval)
+		return retval;
+
+	if (dev->power.request_pending) {
+		/* Any requests other then RPM_REQ_IDLE take precedence. */
+		if (dev->power.request == RPM_REQ_NONE)
+			dev->power.request = RPM_REQ_IDLE;
+		else if (dev->power.request != RPM_REQ_IDLE)
+			retval = -EAGAIN;
+		return retval;
+	}
+
+	dev->power.request = RPM_REQ_IDLE;
+	dev->power.request_pending = true;
+	queue_work(pm_wq, &dev->power.work);
+
+	return retval;
+}
+
+/**
+ * pm_request_idle - Submit an idle notification request for given device.
+ * @dev: Device to handle.
+ */
+int pm_request_idle(struct device *dev)
+{
+	unsigned long flags;
+	int retval;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = __pm_request_idle(dev);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_request_idle);
+
+/**
+ * __pm_request_suspend - Submit a suspend request for given device.
+ * @dev: Device to suspend.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_suspend(struct device *dev)
+{
+	int retval = 0;
+
+	if (dev->power.runtime_error)
+		return -EINVAL;
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		retval = 1;
+	else if (atomic_read(&dev->power.usage_count) > 0
+	    || dev->power.disable_depth > 0)
+		retval = -EAGAIN;
+	else if (dev->power.runtime_status == RPM_SUSPENDING)
+		retval = -EINPROGRESS;
+	else if (!pm_children_suspended(dev))
+		retval = -EBUSY;
+	if (retval < 0)
+		return retval;
+
+	pm_runtime_deactivate_timer(dev);
+
+	if (dev->power.request_pending) {
+		/*
+		 * Pending resume requests take precedence over us, but we can
+		 * overtake any other pending request.
+		 */
+		if (dev->power.request == RPM_REQ_RESUME)
+			retval = -EAGAIN;
+		else if (dev->power.request != RPM_REQ_SUSPEND)
+			dev->power.request = retval ?
+						RPM_REQ_NONE : RPM_REQ_SUSPEND;
+		return retval;
+	} else if (retval) {
+		return retval;
+	}
+
+	dev->power.request = RPM_REQ_SUSPEND;
+	dev->power.request_pending = true;
+	queue_work(pm_wq, &dev->power.work);
+
+	return 0;
+}
+
+/**
+ * pm_suspend_timer_fn - Timer function for pm_schedule_suspend().
+ * @data: Device pointer passed by pm_schedule_suspend().
+ *
+ * Check if the time is right and execute __pm_request_suspend() in that case.
+ */
+static void pm_suspend_timer_fn(unsigned long data)
+{
+	struct device *dev = (struct device *)data;
+	unsigned long flags;
+	unsigned long expires;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	expires = dev->power.timer_expires;
+	/* If 'expire' is after 'jiffies' we've been called too early. */
+	if (expires > 0 && !time_after(expires, jiffies)) {
+		dev->power.timer_expires = 0;
+		__pm_request_suspend(dev);
+	}
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+
+/**
+ * pm_schedule_suspend - Set up a timer to submit a suspend request in future.
+ * @dev: Device to suspend.
+ * @delay: Time to wait before submitting a suspend request, in milliseconds.
+ */
+int pm_schedule_suspend(struct device *dev, unsigned int delay)
+{
+	unsigned long flags;
+	int retval = 0;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (dev->power.runtime_error) {
+		retval = -EINVAL;
+		goto out;
+	}
+
+	if (!delay) {
+		retval = __pm_request_suspend(dev);
+		goto out;
+	}
+
+	pm_runtime_deactivate_timer(dev);
+
+	if (dev->power.request_pending) {
+		/*
+		 * Pending resume requests take precedence over us, but any
+		 * other pending requests have to be canceled.
+		 */
+		if (dev->power.request == RPM_REQ_RESUME) {
+			retval = -EAGAIN;
+			goto out;
+		}
+		dev->power.request = RPM_REQ_NONE;
+	}
+
+	if (dev->power.runtime_status == RPM_SUSPENDED)
+		retval = 1;
+	else if (dev->power.runtime_status == RPM_SUSPENDING)
+		retval = -EINPROGRESS;
+	else if (atomic_read(&dev->power.usage_count) > 0
+	    || dev->power.disable_depth > 0)
+		retval = -EAGAIN;
+	else if (!pm_children_suspended(dev))
+		retval = -EBUSY;
+	if (retval)
+		goto out;
+
+	dev->power.timer_expires = jiffies + msecs_to_jiffies(delay);
+	mod_timer(&dev->power.suspend_timer, dev->power.timer_expires);
+
+ out:
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_schedule_suspend);
+
+/**
+ * pm_request_resume - Submit a resume request for given device.
+ * @dev: Device to resume.
+ *
+ * This function must be called under dev->power.lock with interrupts disabled.
+ */
+static int __pm_request_resume(struct device *dev)
+{
+	int retval = 0;
+
+	if (dev->power.runtime_error)
+		return -EINVAL;
+
+	if (dev->power.runtime_status == RPM_ACTIVE)
+		retval = 1;
+	else if (dev->power.runtime_status == RPM_RESUMING)
+		retval = -EINPROGRESS;
+	else if (dev->power.disable_depth > 0)
+		retval = -EAGAIN;
+	if (retval < 0)
+		return retval;
+
+	pm_runtime_deactivate_timer(dev);
+
+	if (dev->power.request_pending) {
+		/* If non-resume request is pending, we can overtake it. */
+		dev->power.request = retval ? RPM_REQ_NONE : RPM_REQ_RESUME;
+		return retval;
+	} else if (retval) {
+		return retval;
+	}
+
+	dev->power.request = RPM_REQ_RESUME;
+	dev->power.request_pending = true;
+	queue_work(pm_wq, &dev->power.work);
+
+	return retval;
+}
+
+/**
+ * pm_request_resume - Submit a resume request for given device.
+ * @dev: Device to resume.
+ */
+int pm_request_resume(struct device *dev)
+{
+	unsigned long flags;
+	int retval;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+	retval = __pm_request_resume(dev);
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_request_resume);
+
+/**
+ * __pm_runtime_get - Reference count a device and wake it up, if necessary.
+ * @dev: Device to handle.
+ * @sync: If set and the device is suspended, resume it synchronously.
+ *
+ * Increment the usage count of the device and if it was zero previously,
+ * resume it or submit a resume request for it, depending on the value of @sync.
+ */
+int __pm_runtime_get(struct device *dev, bool sync)
+{
+	int retval = 1;
+
+	if (atomic_add_return(1, &dev->power.usage_count) == 1)
+		retval = sync ? pm_runtime_resume(dev) : pm_request_resume(dev);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_get);
+
+/**
+ * __pm_runtime_put - Decrement the device's usage counter and notify its bus.
+ * @dev: Device to handle.
+ * @sync: If the device's bus type is to be notified, do that synchronously.
+ *
+ * Decrement the usage count of the device and if it reaches zero, carry out a
+ * synchronous idle notification or submit an idle notification request for it,
+ * depending on the value of @sync.
+ */
+int __pm_runtime_put(struct device *dev, bool sync)
+{
+	int retval = 0;
+
+	if (atomic_dec_and_test(&dev->power.usage_count))
+		retval = sync ? pm_runtime_idle(dev) : pm_request_idle(dev);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_put);
+
+/**
+ * __pm_runtime_set_status - Set run-time PM status of a device.
+ * @dev: Device to handle.
+ * @status: New run-time PM status of the device.
+ *
+ * If run-time PM of the device is disabled or its power.runtime_error field is
+ * different from zero, the status may be changed either to RPM_ACTIVE, or to
+ * RPM_SUSPENDED, as long as that reflects the actual state of the device.
+ * However, if the device has a parent and the parent is not active, and the
+ * parent's power.ignore_children flag is unset, the device's status cannot be
+ * set to RPM_ACTIVE, so -EBUSY is returned in that case.
+ *
+ * If successful, __pm_runtime_set_status() clears the power.runtime_error field
+ * and the device parent's counter of unsuspended children is modified to
+ * reflect the new status.  If the new status is RPM_SUSPENDED, an idle
+ * notification request for the parent is submitted.
+ */
+int __pm_runtime_set_status(struct device *dev, unsigned int status)
+{
+	struct device *parent = dev->parent;
+	unsigned long flags;
+	bool notify_parent = false;
+	int error = 0;
+
+	if (status != RPM_ACTIVE && status != RPM_SUSPENDED)
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (!dev->power.runtime_error && !dev->power.disable_depth) {
+		error = -EAGAIN;
+		goto out;
+	}
+
+	if (dev->power.runtime_status == status)
+		goto out_set;
+
+	if (status == RPM_SUSPENDED) {
+		/* It always is possible to set the status to 'suspended'. */
+		if (parent) {
+			atomic_add_unless(&parent->power.child_count, -1, 0);
+			notify_parent = !parent->power.ignore_children;
+		}
+		goto out_set;
+	}
+
+	if (parent) {
+		spin_lock_irq(&parent->power.lock);
+
+		/*
+		 * It is invalid to put an active child under a parent that is
+		 * not active, has run-time PM enabled and the
+		 * 'power.ignore_children' flag unset.
+		 */
+		if (!parent->power.disable_depth
+		    && !parent->power.ignore_children
+		    && parent->power.runtime_status != RPM_ACTIVE) {
+			error = -EBUSY;
+		} else {
+			if (dev->power.runtime_status == RPM_SUSPENDED)
+				atomic_inc(&parent->power.child_count);
+		}
+
+		spin_unlock_irq(&parent->power.lock);
+
+		if (error)
+			goto out;
+	}
+
+ out_set:
+	dev->power.runtime_status = status;
+	dev->power.runtime_error = 0;
+ out:
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+
+	if (notify_parent)
+		pm_request_idle(parent);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_set_status);
+
+/**
+ * __pm_runtime_barrier - Cancel pending requests and wait for completions.
+ * @dev: Device to handle.
+ *
+ * Flush all pending requests for the device from pm_wq and wait for all
+ * run-time PM operations involving the device in progress to complete.
+ *
+ * Should be called under dev->power.lock with interrupts disabled.
+ */
+static void __pm_runtime_barrier(struct device *dev)
+{
+	pm_runtime_deactivate_timer(dev);
+
+	if (dev->power.request_pending) {
+		dev->power.request = RPM_REQ_NONE;
+		spin_unlock_irq(&dev->power.lock);
+
+		cancel_work_sync(&dev->power.work);
+
+		spin_lock_irq(&dev->power.lock);
+		dev->power.request_pending = false;
+	}
+
+	if (dev->power.runtime_status == RPM_SUSPENDING
+	    || dev->power.runtime_status == RPM_RESUMING
+	    || dev->power.idle_notification) {
+		DEFINE_WAIT(wait);
+
+		/* Suspend, wake-up or idle notification in progress. */
+		for (;;) {
+			prepare_to_wait(&dev->power.wait_queue, &wait,
+					TASK_UNINTERRUPTIBLE);
+			if (dev->power.runtime_status != RPM_SUSPENDING
+			    && dev->power.runtime_status != RPM_RESUMING
+			    && !dev->power.idle_notification)
+				break;
+			spin_unlock_irq(&dev->power.lock);
+
+			schedule();
+
+			spin_lock_irq(&dev->power.lock);
+		}
+		finish_wait(&dev->power.wait_queue, &wait);
+	}
+}
+
+/**
+ * pm_runtime_barrier - Flush pending requests and wait for completions.
+ * @dev: Device to handle.
+ *
+ * Prevent the device from being suspended by incrementing its usage counter and
+ * if there's a pending resume request for the device, wake the device up.
+ * Next, make sure that all pending requests for the device have been flushed
+ * from pm_wq and wait for all run-time PM operations involving the device in
+ * progress to complete.
+ *
+ * Return value:
+ * 1, if there was a resume request pending and the device had to be woken up,
+ * 0, otherwise
+ */
+int pm_runtime_barrier(struct device *dev)
+{
+	int retval = 0;
+
+	pm_runtime_get_noresume(dev);
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.request_pending
+	    && dev->power.request == RPM_REQ_RESUME) {
+		__pm_runtime_resume(dev, false);
+		retval = 1;
+	}
+
+	__pm_runtime_barrier(dev);
+
+	spin_unlock_irq(&dev->power.lock);
+	pm_runtime_put_noidle(dev);
+
+	return retval;
+}
+EXPORT_SYMBOL_GPL(pm_runtime_barrier);
+
+/**
+ * __pm_runtime_disable - Disable run-time PM of a device.
+ * @dev: Device to handle.
+ * @check_resume: If set, check if there's a resume request for the device.
+ *
+ * Increment power.disable_depth for the device and if was zero previously,
+ * cancel all pending run-time PM requests for the device and wait for all
+ * operations in progress to complete.  The device can be either active or
+ * suspended after its run-time PM has been disabled.
+ *
+ * If @check_resume is set and there's a resume request pending when
+ * __pm_runtime_disable() is called and power.disable_depth is zero, the
+ * function will wake up the device before disabling its run-time PM.
+ */
+void __pm_runtime_disable(struct device *dev, bool check_resume)
+{
+	spin_lock_irq(&dev->power.lock);
+
+	if (dev->power.disable_depth > 0) {
+		dev->power.disable_depth++;
+		goto out;
+	}
+
+	/*
+	 * Wake up the device if there's a resume request pending, because that
+	 * means there probably is some I/O to process and disabling run-time PM
+	 * shouldn't prevent the device from processing the I/O.
+	 */
+	if (check_resume && dev->power.request_pending
+	    && dev->power.request == RPM_REQ_RESUME) {
+		/*
+		 * Prevent suspends and idle notifications from being carried
+		 * out after we have woken up the device.
+		 */
+		pm_runtime_get_noresume(dev);
+
+		__pm_runtime_resume(dev, false);
+
+		pm_runtime_put_noidle(dev);
+	}
+
+	if (!dev->power.disable_depth++)
+		__pm_runtime_barrier(dev);
+
+ out:
+	spin_unlock_irq(&dev->power.lock);
+}
+EXPORT_SYMBOL_GPL(__pm_runtime_disable);
+
+/**
+ * pm_runtime_enable - Enable run-time PM of a device.
+ * @dev: Device to handle.
+ */
+void pm_runtime_enable(struct device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->power.lock, flags);
+
+	if (dev->power.disable_depth > 0)
+		dev->power.disable_depth--;
+	else
+		dev_warn(dev, "Unbalanced %s!\n", __func__);
+
+	spin_unlock_irqrestore(&dev->power.lock, flags);
+}
+EXPORT_SYMBOL_GPL(pm_runtime_enable);
+
+/**
+ * pm_runtime_init - Initialize run-time PM fields in given device object.
+ * @dev: Device object to initialize.
+ */
+void pm_runtime_init(struct device *dev)
+{
+	spin_lock_init(&dev->power.lock);
+
+	dev->power.runtime_status = RPM_SUSPENDED;
+	dev->power.idle_notification = false;
+
+	dev->power.disable_depth = 1;
+	atomic_set(&dev->power.usage_count, 0);
+
+	dev->power.runtime_error = 0;
+
+	atomic_set(&dev->power.child_count, 0);
+	pm_suspend_ignore_children(dev, false);
+
+	dev->power.request_pending = false;
+	dev->power.request = RPM_REQ_NONE;
+	dev->power.deferred_resume = false;
+	INIT_WORK(&dev->power.work, pm_runtime_work);
+
+	dev->power.timer_expires = 0;
+	setup_timer(&dev->power.suspend_timer, pm_suspend_timer_fn,
+			(unsigned long)dev);
+
+	init_waitqueue_head(&dev->power.wait_queue);
+}
+
+/**
+ * pm_runtime_remove - Prepare for removing a device from device hierarchy.
+ * @dev: Device object being removed from device hierarchy.
+ */
+void pm_runtime_remove(struct device *dev)
+{
+	__pm_runtime_disable(dev, false);
+
+	/* Change the status back to 'suspended' to match the initial status. */
+	if (dev->power.runtime_status == RPM_ACTIVE)
+		pm_runtime_set_suspended(dev);
+}
diff --git a/include/linux/pm.h b/include/linux/pm.h
index b3f74764a586..2b6e20df0e52 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -22,6 +22,10 @@
 #define _LINUX_PM_H
 
 #include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <linux/timer.h>
 
 /*
  * Callbacks for platform drivers to implement.
@@ -165,6 +169,28 @@ typedef struct pm_message {
  * It is allowed to unregister devices while the above callbacks are being
  * executed.  However, it is not allowed to unregister a device from within any
  * of its own callbacks.
+ *
+ * There also are the following callbacks related to run-time power management
+ * of devices:
+ *
+ * @runtime_suspend: Prepare the device for a condition in which it won't be
+ *	able to communicate with the CPU(s) and RAM due to power management.
+ *	This need not mean that the device should be put into a low power state.
+ *	For example, if the device is behind a link which is about to be turned
+ *	off, the device may remain at full power.  If the device does go to low
+ *	power and if device_may_wakeup(dev) is true, remote wake-up (i.e., a
+ *	hardware mechanism allowing the device to request a change of its power
+ *	state, such as PCI PME) should be enabled for it.
+ *
+ * @runtime_resume: Put the device into the fully active state in response to a
+ *	wake-up event generated by hardware or at the request of software.  If
+ *	necessary, put the device into the full power state and restore its
+ *	registers, so that it is fully operational.
+ *
+ * @runtime_idle: Device appears to be inactive and it might be put into a low
+ *	power state if all of the necessary conditions are satisfied.  Check
+ *	these conditions and handle the device as appropriate, possibly queueing
+ *	a suspend request for it.  The return value is ignored by the PM core.
  */
 
 struct dev_pm_ops {
@@ -182,6 +208,9 @@ struct dev_pm_ops {
 	int (*thaw_noirq)(struct device *dev);
 	int (*poweroff_noirq)(struct device *dev);
 	int (*restore_noirq)(struct device *dev);
+	int (*runtime_suspend)(struct device *dev);
+	int (*runtime_resume)(struct device *dev);
+	int (*runtime_idle)(struct device *dev);
 };
 
 /**
@@ -315,14 +344,80 @@ enum dpm_state {
 	DPM_OFF_IRQ,
 };
 
+/**
+ * Device run-time power management status.
+ *
+ * These status labels are used internally by the PM core to indicate the
+ * current status of a device with respect to the PM core operations.  They do
+ * not reflect the actual power state of the device or its status as seen by the
+ * driver.
+ *
+ * RPM_ACTIVE		Device is fully operational.  Indicates that the device
+ *			bus type's ->runtime_resume() callback has completed
+ *			successfully.
+ *
+ * RPM_SUSPENDED	Device bus type's ->runtime_suspend() callback has
+ *			completed successfully.  The device is regarded as
+ *			suspended.
+ *
+ * RPM_RESUMING		Device bus type's ->runtime_resume() callback is being
+ *			executed.
+ *
+ * RPM_SUSPENDING	Device bus type's ->runtime_suspend() callback is being
+ *			executed.
+ */
+
+enum rpm_status {
+	RPM_ACTIVE = 0,
+	RPM_RESUMING,
+	RPM_SUSPENDED,
+	RPM_SUSPENDING,
+};
+
+/**
+ * Device run-time power management request types.
+ *
+ * RPM_REQ_NONE		Do nothing.
+ *
+ * RPM_REQ_IDLE		Run the device bus type's ->runtime_idle() callback
+ *
+ * RPM_REQ_SUSPEND	Run the device bus type's ->runtime_suspend() callback
+ *
+ * RPM_REQ_RESUME	Run the device bus type's ->runtime_resume() callback
+ */
+
+enum rpm_request {
+	RPM_REQ_NONE = 0,
+	RPM_REQ_IDLE,
+	RPM_REQ_SUSPEND,
+	RPM_REQ_RESUME,
+};
+
 struct dev_pm_info {
 	pm_message_t		power_state;
-	unsigned		can_wakeup:1;
-	unsigned		should_wakeup:1;
+	unsigned int		can_wakeup:1;
+	unsigned int		should_wakeup:1;
 	enum dpm_state		status;		/* Owned by the PM core */
-#ifdef	CONFIG_PM_SLEEP
+#ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
 #endif
+#ifdef CONFIG_PM_RUNTIME
+	struct timer_list	suspend_timer;
+	unsigned long		timer_expires;
+	struct work_struct	work;
+	wait_queue_head_t	wait_queue;
+	spinlock_t		lock;
+	atomic_t		usage_count;
+	atomic_t		child_count;
+	unsigned int		disable_depth:3;
+	unsigned int		ignore_children:1;
+	unsigned int		idle_notification:1;
+	unsigned int		request_pending:1;
+	unsigned int		deferred_resume:1;
+	enum rpm_request	request;
+	enum rpm_status		runtime_status;
+	int			runtime_error;
+#endif
 };
 
 /*
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
new file mode 100644
index 000000000000..44087044910f
--- /dev/null
+++ b/include/linux/pm_runtime.h
@@ -0,0 +1,114 @@
+/*
+ * pm_runtime.h - Device run-time power management helper functions.
+ *
+ * Copyright (C) 2009 Rafael J. Wysocki <rjw@sisk.pl>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#ifndef _LINUX_PM_RUNTIME_H
+#define _LINUX_PM_RUNTIME_H
+
+#include <linux/device.h>
+#include <linux/pm.h>
+
+#ifdef CONFIG_PM_RUNTIME
+
+extern struct workqueue_struct *pm_wq;
+
+extern int pm_runtime_idle(struct device *dev);
+extern int pm_runtime_suspend(struct device *dev);
+extern int pm_runtime_resume(struct device *dev);
+extern int pm_request_idle(struct device *dev);
+extern int pm_schedule_suspend(struct device *dev, unsigned int delay);
+extern int pm_request_resume(struct device *dev);
+extern int __pm_runtime_get(struct device *dev, bool sync);
+extern int __pm_runtime_put(struct device *dev, bool sync);
+extern int __pm_runtime_set_status(struct device *dev, unsigned int status);
+extern int pm_runtime_barrier(struct device *dev);
+extern void pm_runtime_enable(struct device *dev);
+extern void __pm_runtime_disable(struct device *dev, bool check_resume);
+
+static inline bool pm_children_suspended(struct device *dev)
+{
+	return dev->power.ignore_children
+		|| !atomic_read(&dev->power.child_count);
+}
+
+static inline void pm_suspend_ignore_children(struct device *dev, bool enable)
+{
+	dev->power.ignore_children = enable;
+}
+
+static inline void pm_runtime_get_noresume(struct device *dev)
+{
+	atomic_inc(&dev->power.usage_count);
+}
+
+static inline void pm_runtime_put_noidle(struct device *dev)
+{
+	atomic_add_unless(&dev->power.usage_count, -1, 0);
+}
+
+#else /* !CONFIG_PM_RUNTIME */
+
+static inline int pm_runtime_idle(struct device *dev) { return -ENOSYS; }
+static inline int pm_runtime_suspend(struct device *dev) { return -ENOSYS; }
+static inline int pm_runtime_resume(struct device *dev) { return 0; }
+static inline int pm_request_idle(struct device *dev) { return -ENOSYS; }
+static inline int pm_schedule_suspend(struct device *dev, unsigned int delay)
+{
+	return -ENOSYS;
+}
+static inline int pm_request_resume(struct device *dev) { return 0; }
+static inline int __pm_runtime_get(struct device *dev, bool sync) { return 1; }
+static inline int __pm_runtime_put(struct device *dev, bool sync) { return 0; }
+static inline int __pm_runtime_set_status(struct device *dev,
+					    unsigned int status) { return 0; }
+static inline int pm_runtime_barrier(struct device *dev) { return 0; }
+static inline void pm_runtime_enable(struct device *dev) {}
+static inline void __pm_runtime_disable(struct device *dev, bool c) {}
+
+static inline bool pm_children_suspended(struct device *dev) { return false; }
+static inline void pm_suspend_ignore_children(struct device *dev, bool en) {}
+static inline void pm_runtime_get_noresume(struct device *dev) {}
+static inline void pm_runtime_put_noidle(struct device *dev) {}
+
+#endif /* !CONFIG_PM_RUNTIME */
+
+static inline int pm_runtime_get(struct device *dev)
+{
+	return __pm_runtime_get(dev, false);
+}
+
+static inline int pm_runtime_get_sync(struct device *dev)
+{
+	return __pm_runtime_get(dev, true);
+}
+
+static inline int pm_runtime_put(struct device *dev)
+{
+	return __pm_runtime_put(dev, false);
+}
+
+static inline int pm_runtime_put_sync(struct device *dev)
+{
+	return __pm_runtime_put(dev, true);
+}
+
+static inline int pm_runtime_set_active(struct device *dev)
+{
+	return __pm_runtime_set_status(dev, RPM_ACTIVE);
+}
+
+static inline void pm_runtime_set_suspended(struct device *dev)
+{
+	__pm_runtime_set_status(dev, RPM_SUSPENDED);
+}
+
+static inline void pm_runtime_disable(struct device *dev)
+{
+	__pm_runtime_disable(dev, true);
+}
+
+#endif
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 72067cbdb37f..91e09d3b2eb2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -208,3 +208,17 @@ config APM_EMULATION
 	  random kernel OOPSes or reboots that don't seem to be related to
 	  anything, try disabling/enabling this option (or disabling/enabling
 	  APM in your BIOS).
+
+config PM_RUNTIME
+	bool "Run-time PM core functionality"
+	depends on PM
+	---help---
+	  Enable functionality allowing I/O devices to be put into energy-saving
+	  (low power) states at run time (or autosuspended) after a specified
+	  period of inactivity and woken up in response to a hardware-generated
+	  wake-up event or a driver's request.
+
+	  Hardware support is generally required for this functionality to work
+	  and the bus type drivers of the buses the devices are on are
+	  responsible for the actual handling of the autosuspend requests and
+	  wake-up events.
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f710e36930cc..347d2cc88cd0 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -11,6 +11,7 @@
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/resume-trace.h>
+#include <linux/workqueue.h>
 
 #include "power.h"
 
@@ -217,8 +218,24 @@ static struct attribute_group attr_group = {
 	.attrs = g,
 };
 
+#ifdef CONFIG_PM_RUNTIME
+struct workqueue_struct *pm_wq;
+
+static int __init pm_start_workqueue(void)
+{
+	pm_wq = create_freezeable_workqueue("pm");
+
+	return pm_wq ? 0 : -ENOMEM;
+}
+#else
+static inline int pm_start_workqueue(void) { return 0; }
+#endif
+
 static int __init pm_init(void)
 {
+	int error = pm_start_workqueue();
+	if (error)
+		return error;
 	power_kobj = kobject_create_and_add("power", NULL);
 	if (!power_kobj)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 9e726b17422bade75fba94e625cd35fd1353e682 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.dentz@openbossa.org>
Date: Wed, 15 Jul 2009 13:50:58 -0300
Subject: Bluetooth: Fix rejected connection not disconnecting ACL link

When using DEFER_SETUP on a RFCOMM socket, a SABM frame triggers
authorization which when rejected send a DM response. This is fine
according to the RFCOMM spec:

    the responding implementation may replace the "proper" response
    on the Multiplexer Control channel with a DM frame, sent on the
    referenced DLCI to indicate that the DLCI is not open, and that
    the responder would not grant a request to open it later either.

But some stacks doesn't seems to cope with this leaving DLCI 0 open after
receiving DM frame.

To fix it properly a timer was introduced to rfcomm_session which is used
to set a timeout when the last active DLC of a session is unlinked, this
will give the remote stack some time to reply with a proper DISC frame on
DLCI 0 avoiding both sides sending DISC to each other on stacks that
follow the specification and taking care of those who don't by taking
down DLCI 0.

Signed-off-by: Luiz Augusto von Dentz <luiz.dentz@openbossa.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/rfcomm.h |  2 ++
 net/bluetooth/rfcomm/core.c    | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index c274993234e3..921d7b3c7f8d 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -29,6 +29,7 @@
 #define RFCOMM_CONN_TIMEOUT (HZ * 30)
 #define RFCOMM_DISC_TIMEOUT (HZ * 20)
 #define RFCOMM_AUTH_TIMEOUT (HZ * 25)
+#define RFCOMM_IDLE_TIMEOUT (HZ * 2)
 
 #define RFCOMM_DEFAULT_MTU	127
 #define RFCOMM_DEFAULT_CREDITS	7
@@ -154,6 +155,7 @@ struct rfcomm_msc {
 struct rfcomm_session {
 	struct list_head list;
 	struct socket   *sock;
+	struct timer_list timer;
 	unsigned long    state;
 	unsigned long    flags;
 	atomic_t         refcnt;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 26af4854af64..25692bc0a342 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -244,6 +244,33 @@ static inline int rfcomm_check_security(struct rfcomm_dlc *d)
 								auth_type);
 }
 
+static void rfcomm_session_timeout(unsigned long arg)
+{
+	struct rfcomm_session *s = (void *) arg;
+
+	BT_DBG("session %p state %ld", s, s->state);
+
+	set_bit(RFCOMM_TIMED_OUT, &s->flags);
+	rfcomm_session_put(s);
+	rfcomm_schedule(RFCOMM_SCHED_TIMEO);
+}
+
+static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
+{
+	BT_DBG("session %p state %ld timeout %ld", s, s->state, timeout);
+
+	if (!mod_timer(&s->timer, jiffies + timeout))
+		rfcomm_session_hold(s);
+}
+
+static void rfcomm_session_clear_timer(struct rfcomm_session *s)
+{
+	BT_DBG("session %p state %ld", s, s->state);
+
+	if (timer_pending(&s->timer) && del_timer(&s->timer))
+		rfcomm_session_put(s);
+}
+
 /* ---- RFCOMM DLCs ---- */
 static void rfcomm_dlc_timeout(unsigned long arg)
 {
@@ -320,6 +347,7 @@ static void rfcomm_dlc_link(struct rfcomm_session *s, struct rfcomm_dlc *d)
 
 	rfcomm_session_hold(s);
 
+	rfcomm_session_clear_timer(s);
 	rfcomm_dlc_hold(d);
 	list_add(&d->list, &s->dlcs);
 	d->session = s;
@@ -335,6 +363,9 @@ static void rfcomm_dlc_unlink(struct rfcomm_dlc *d)
 	d->session = NULL;
 	rfcomm_dlc_put(d);
 
+	if (list_empty(&s->dlcs))
+		rfcomm_session_set_timer(s, RFCOMM_IDLE_TIMEOUT);
+
 	rfcomm_session_put(s);
 }
 
@@ -567,6 +598,8 @@ static struct rfcomm_session *rfcomm_session_add(struct socket *sock, int state)
 
 	BT_DBG("session %p sock %p", s, sock);
 
+	setup_timer(&s->timer, rfcomm_session_timeout, (unsigned long) s);
+
 	INIT_LIST_HEAD(&s->dlcs);
 	s->state = state;
 	s->sock  = sock;
@@ -598,6 +631,7 @@ static void rfcomm_session_del(struct rfcomm_session *s)
 	if (state == BT_CONNECTED)
 		rfcomm_send_disc(s, 0);
 
+	rfcomm_session_clear_timer(s);
 	sock_release(s->sock);
 	kfree(s);
 
@@ -639,6 +673,7 @@ static void rfcomm_session_close(struct rfcomm_session *s, int err)
 		__rfcomm_dlc_close(d, err);
 	}
 
+	rfcomm_session_clear_timer(s);
 	rfcomm_session_put(s);
 }
 
@@ -1879,6 +1914,12 @@ static inline void rfcomm_process_sessions(void)
 		struct rfcomm_session *s;
 		s = list_entry(p, struct rfcomm_session, list);
 
+		if (test_and_clear_bit(RFCOMM_TIMED_OUT, &s->flags)) {
+			s->state = BT_DISCONN;
+			rfcomm_send_disc(s, 0);
+			continue;
+		}
+
 		if (s->state == BT_LISTEN) {
 			rfcomm_accept_connection(s);
 			continue;
-- 
cgit v1.2.3


From 6c10db72c94818573552fd71c89540da325efdfb Mon Sep 17 00:00:00 2001
From: Chandra Seetharaman <sekharan@us.ibm.com>
Date: Fri, 26 Jun 2009 19:30:06 -0700
Subject: [SCSI] scsi_dh: Reference count scsi_dh_attach

Problem reported: http://marc.info/?l=dm-devel&m=124585978305866&w=2

scsi_dh does not do a refernce count for attach/detach, and this affects
the way it is supposed to work with multipath when a device is not
in the dev_list of the hardware handler.

This patch adds a reference count that counts each attach.

Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/device_handler/scsi_dh.c | 23 ++++++++++++++++-------
 include/scsi/scsi_device.h            |  2 ++
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c
index a518f2eff19a..53a7385e1b4d 100644
--- a/drivers/scsi/device_handler/scsi_dh.c
+++ b/drivers/scsi/device_handler/scsi_dh.c
@@ -153,12 +153,24 @@ static int scsi_dh_handler_attach(struct scsi_device *sdev,
 	if (sdev->scsi_dh_data) {
 		if (sdev->scsi_dh_data->scsi_dh != scsi_dh)
 			err = -EBUSY;
-	} else if (scsi_dh->attach)
+		else
+			kref_get(&sdev->scsi_dh_data->kref);
+	} else if (scsi_dh->attach) {
 		err = scsi_dh->attach(sdev);
-
+		if (!err) {
+			kref_init(&sdev->scsi_dh_data->kref);
+			sdev->scsi_dh_data->sdev = sdev;
+		}
+	}
 	return err;
 }
 
+static void __detach_handler (struct kref *kref)
+{
+	struct scsi_dh_data *scsi_dh_data = container_of(kref, struct scsi_dh_data, kref);
+	scsi_dh_data->scsi_dh->detach(scsi_dh_data->sdev);
+}
+
 /*
  * scsi_dh_handler_detach - Detach a device handler from a device
  * @sdev - SCSI device the device handler should be detached from
@@ -180,7 +192,7 @@ static void scsi_dh_handler_detach(struct scsi_device *sdev,
 		scsi_dh = sdev->scsi_dh_data->scsi_dh;
 
 	if (scsi_dh && scsi_dh->detach)
-		scsi_dh->detach(sdev);
+		kref_put(&sdev->scsi_dh_data->kref, __detach_handler);
 }
 
 /*
@@ -474,7 +486,6 @@ int scsi_dh_attach(struct request_queue *q, const char *name)
 
 	if (!err) {
 		err = scsi_dh_handler_attach(sdev, scsi_dh);
-
 		put_device(&sdev->sdev_gendev);
 	}
 	return err;
@@ -505,10 +516,8 @@ void scsi_dh_detach(struct request_queue *q)
 		return;
 
 	if (sdev->scsi_dh_data) {
-		/* if sdev is not on internal list, detach */
 		scsi_dh = sdev->scsi_dh_data->scsi_dh;
-		if (!device_handler_match(scsi_dh, sdev))
-			scsi_dh_handler_detach(sdev, scsi_dh);
+		scsi_dh_handler_detach(sdev, scsi_dh);
 	}
 	put_device(&sdev->sdev_gendev);
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 3f566af3f101..1f3a4c8044c0 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -191,6 +191,8 @@ struct scsi_device_handler {
 
 struct scsi_dh_data {
 	struct scsi_device_handler *scsi_dh;
+	struct scsi_device *sdev;
+	struct kref kref;
 	char buf[0];
 };
 
-- 
cgit v1.2.3


From beb29a6d421f6dbd41d68d0621c1b28ad1d4a9f4 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 29 Jul 2009 17:04:06 -0700
Subject: [SCSI] libfc: remove extra semicolons from debug macros

This is unlikely to cause any problems, but the libfc debug macros
introduce extra undesirable semicolons.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/libfc.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index b92584a8843a..ef04a2c52b8c 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -51,22 +51,22 @@ do {								\
 		do {						\
 			CMD;					\
 		} while (0);					\
-} while (0);
+} while (0)
 
 #define FC_LIBFC_DBG(fmt, args...)					\
 	FC_CHECK_LOGGING(FC_LIBFC_LOGGING,				\
-			 printk(KERN_INFO "libfc: " fmt, ##args);)
+			 printk(KERN_INFO "libfc: " fmt, ##args))
 
 #define FC_LPORT_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_LPORT_LOGGING,				\
 			 printk(KERN_INFO "lport: %6x: " fmt,		\
-				fc_host_port_id(lport->host), ##args);)
+				fc_host_port_id(lport->host), ##args))
 
 #define FC_DISC_DBG(disc, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_DISC_LOGGING,				\
 			 printk(KERN_INFO "disc: %6x: " fmt,		\
 				fc_host_port_id(disc->lport->host),	\
-				##args);)
+				##args))
 
 #define FC_RPORT_DBG(rport, fmt, args...)				\
 do {									\
@@ -75,31 +75,31 @@ do {									\
 	FC_CHECK_LOGGING(FC_RPORT_LOGGING,				\
 			 printk(KERN_INFO "rport: %6x: %6x: " fmt,	\
 				fc_host_port_id(lport->host),		\
-				rport->port_id, ##args);)		\
-} while (0);
+				rport->port_id, ##args));		\
+} while (0)
 
 #define FC_FCP_DBG(pkt, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_FCP_LOGGING,				\
 			 printk(KERN_INFO "fcp: %6x: %6x: " fmt,	\
 				fc_host_port_id(pkt->lp->host),		\
-				pkt->rport->port_id, ##args);)
+				pkt->rport->port_id, ##args))
 
 #define FC_EM_DBG(em, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EM_LOGGING,					\
 			 printk(KERN_INFO "em: %6x: " fmt,		\
 				fc_host_port_id(em->lp->host),		\
-				##args);)
+				##args))
 
 #define FC_EXCH_DBG(exch, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EXCH_LOGGING,				\
 			 printk(KERN_INFO "exch: %6x: %4x: " fmt,	\
 				fc_host_port_id(exch->lp->host),	\
-				exch->xid, ##args);)
+				exch->xid, ##args))
 
 #define FC_SCSI_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_SCSI_LOGGING,                               \
 			 printk(KERN_INFO "scsi: %6x: " fmt,		\
-				fc_host_port_id(lport->host), ##args);)
+				fc_host_port_id(lport->host), ##args))
 
 /*
  * libfc error codes
-- 
cgit v1.2.3


From 7f74549ff630ad444b0b6bbcabf426f781910906 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 29 Jul 2009 17:04:12 -0700
Subject: [SCSI] libfc: change debug messages to give host number.

libfc debug messages currently show 'lport: <fc-id>:'
wher <fc-id> is the hex assigned port-id.  When the lport
is logged off, that will be zero, so its hard to distinguish
which instance is involved.  The FC-ID can change
if the port is re-patched or changes VSANs.

Two lports may even have the same FC-ID if connected to isolated SANs.

Change the debug messages to print the SCSI host number "hostN:",
which will not change for the life of the lport.
Still show the FC_ID on lport messages.

Also, add a macro to FC_RPORT_ID_DBG for rport debugging where there's
no rdata structure involved.  It takes the lport and port_id as parameters.
Use this in fc_rport_recv_plogi_req() and fc_rport_recv_logo_req().

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/libfc.h | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index ef04a2c52b8c..efdb6ba310e5 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -59,47 +59,51 @@ do {								\
 
 #define FC_LPORT_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_LPORT_LOGGING,				\
-			 printk(KERN_INFO "lport: %6x: " fmt,		\
-				fc_host_port_id(lport->host), ##args))
+			 printk(KERN_INFO "host%u: lport %6x: " fmt,	\
+				(lport)->host->host_no,			\
+				fc_host_port_id((lport)->host), ##args))
 
 #define FC_DISC_DBG(disc, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_DISC_LOGGING,				\
-			 printk(KERN_INFO "disc: %6x: " fmt,		\
-				fc_host_port_id(disc->lport->host),	\
+			 printk(KERN_INFO "host%u: disc: " fmt,		\
+				(disc)->lport->host->host_no,		\
 				##args))
 
+#define FC_RPORT_ID_DBG(lport, port_id, fmt, args...)			\
+	FC_CHECK_LOGGING(FC_RPORT_LOGGING,				\
+			 printk(KERN_INFO "host%u: rport %6x: " fmt,	\
+				(lport)->host->host_no,			\
+				(port_id), ##args))
+
 #define FC_RPORT_DBG(rport, fmt, args...)				\
 do {									\
 	struct fc_rport_libfc_priv *rdata = rport->dd_data;		\
 	struct fc_lport *lport = rdata->local_port;			\
-	FC_CHECK_LOGGING(FC_RPORT_LOGGING,				\
-			 printk(KERN_INFO "rport: %6x: %6x: " fmt,	\
-				fc_host_port_id(lport->host),		\
-				rport->port_id, ##args));		\
+	FC_RPORT_ID_DBG(lport, rport->port_id, fmt, ##args);		\
 } while (0)
 
 #define FC_FCP_DBG(pkt, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_FCP_LOGGING,				\
-			 printk(KERN_INFO "fcp: %6x: %6x: " fmt,	\
-				fc_host_port_id(pkt->lp->host),		\
+			 printk(KERN_INFO "host%u: fcp: %6x: " fmt,	\
+				(pkt)->lp->host->host_no,		\
 				pkt->rport->port_id, ##args))
 
 #define FC_EM_DBG(em, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EM_LOGGING,					\
-			 printk(KERN_INFO "em: %6x: " fmt,		\
-				fc_host_port_id(em->lp->host),		\
+			 printk(KERN_INFO "host%u: em: " fmt,		\
+				(em)->lp->host->host_no,		\
 				##args))
 
 #define FC_EXCH_DBG(exch, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EXCH_LOGGING,				\
-			 printk(KERN_INFO "exch: %6x: %4x: " fmt,	\
-				fc_host_port_id(exch->lp->host),	\
+			 printk(KERN_INFO "host%u: xid %4x: " fmt,	\
+				(exch)->lp->host->host_no,		\
 				exch->xid, ##args))
 
 #define FC_SCSI_DBG(lport, fmt, args...)				\
 	FC_CHECK_LOGGING(FC_SCSI_LOGGING,                               \
-			 printk(KERN_INFO "scsi: %6x: " fmt,		\
-				fc_host_port_id(lport->host), ##args))
+			 printk(KERN_INFO "host%u: scsi: " fmt,		\
+				(lport)->host->host_no,	##args))
 
 /*
  * libfc error codes
-- 
cgit v1.2.3


From b1d9fd5574763abe5c763e32e3547a4adee9bd88 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 29 Jul 2009 17:04:22 -0700
Subject: [SCSI] libfc: rename lport NONE state to DISABLED

The state NONE was meant to be invalid, but has been used as
the initial state.  Rename it to be DISABLED, as more descriptive.
Further patches will make it the like the RESET state, except
it won't transition to FLOGI until fc_lport_fabric_login() is called.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_exch.c  |  2 +-
 drivers/scsi/libfc/fc_lport.c | 12 ++++++------
 include/scsi/libfc.h          |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 145ab9ba55ea..e6d82d780acd 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1875,7 +1875,7 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
 	u32 f_ctl;
 
 	/* lport lock ? */
-	if (!lp || !mp || (lp->state == LPORT_ST_NONE)) {
+	if (!lp || !mp || lp->state == LPORT_ST_DISABLED) {
 		FC_LPORT_DBG(lp, "Receiving frames for an lport that "
 			     "has not been initialized correctly\n");
 		fc_frame_free(fp);
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 745fa5555d6a..3b28190ca2eb 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -113,7 +113,7 @@ static void fc_lport_enter_ready(struct fc_lport *);
 static void fc_lport_enter_logo(struct fc_lport *);
 
 static const char *fc_lport_state_names[] = {
-	[LPORT_ST_NONE] =     "none",
+	[LPORT_ST_DISABLED] = "disabled",
 	[LPORT_ST_FLOGI] =    "FLOGI",
 	[LPORT_ST_DNS] =      "dNS",
 	[LPORT_ST_RPN_ID] =   "RPN_ID",
@@ -550,7 +550,7 @@ int fc_fabric_login(struct fc_lport *lport)
 	int rc = -1;
 
 	mutex_lock(&lport->lp_mutex);
-	if (lport->state == LPORT_ST_NONE) {
+	if (lport->state == LPORT_ST_DISABLED) {
 		fc_lport_enter_reset(lport);
 		rc = 0;
 	}
@@ -637,7 +637,7 @@ EXPORT_SYMBOL(fc_fabric_logoff);
 int fc_lport_destroy(struct fc_lport *lport)
 {
 	mutex_lock(&lport->lp_mutex);
-	lport->state = LPORT_ST_NONE;
+	lport->state = LPORT_ST_DISABLED;
 	lport->link_up = 0;
 	lport->tt.frame_send = fc_frame_drop;
 	mutex_unlock(&lport->lp_mutex);
@@ -992,7 +992,7 @@ static void fc_lport_error(struct fc_lport *lport, struct fc_frame *fp)
 			schedule_delayed_work(&lport->retry_work, delay);
 		} else {
 			switch (lport->state) {
-			case LPORT_ST_NONE:
+			case LPORT_ST_DISABLED:
 			case LPORT_ST_READY:
 			case LPORT_ST_RESET:
 			case LPORT_ST_RPN_ID:
@@ -1316,7 +1316,7 @@ static void fc_lport_timeout(struct work_struct *work)
 	mutex_lock(&lport->lp_mutex);
 
 	switch (lport->state) {
-	case LPORT_ST_NONE:
+	case LPORT_ST_DISABLED:
 	case LPORT_ST_READY:
 	case LPORT_ST_RESET:
 		WARN_ON(1);
@@ -1550,7 +1550,7 @@ int fc_lport_config(struct fc_lport *lport)
 	INIT_DELAYED_WORK(&lport->retry_work, fc_lport_timeout);
 	mutex_init(&lport->lp_mutex);
 
-	fc_lport_state_enter(lport, LPORT_ST_NONE);
+	fc_lport_state_enter(lport, LPORT_ST_DISABLED);
 
 	fc_lport_add_fc4_type(lport, FC_TYPE_FCP);
 	fc_lport_add_fc4_type(lport, FC_TYPE_CT);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index efdb6ba310e5..b5c9b285b462 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -129,7 +129,7 @@ do {									\
  * FC HBA status
  */
 enum fc_lport_state {
-	LPORT_ST_NONE = 0,
+	LPORT_ST_DISABLED = 0,
 	LPORT_ST_FLOGI,
 	LPORT_ST_DNS,
 	LPORT_ST_RPN_ID,
-- 
cgit v1.2.3


From 141940548c6919c22bf0573c68fd59d961e22475 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Wed, 29 Jul 2009 17:04:43 -0700
Subject: [SCSI] libfc: rename rport state "NONE" to "DELETE".

State RPORT_ST_NONE was intented to be an invalid state (0), never used.
This was a misguided attempt to be sure it was always initialized.
Having an extra state meaning nothing requires switch statements to
have a case covering that state.

State NONE has been used instead to mean the remote port is being deleted.
Changing the name to RPORT_ST_DELETE.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 28 ++++++++++++++--------------
 include/scsi/libfc.h          |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 7162385f52eb..bf7364fc16cb 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -77,13 +77,13 @@ static void fc_rport_error_retry(struct fc_rport *, struct fc_frame *);
 static void fc_rport_work(struct work_struct *);
 
 static const char *fc_rport_state_names[] = {
-	[RPORT_ST_NONE] = "None",
 	[RPORT_ST_INIT] = "Init",
 	[RPORT_ST_PLOGI] = "PLOGI",
 	[RPORT_ST_PRLI] = "PRLI",
 	[RPORT_ST_RTV] = "RTV",
 	[RPORT_ST_READY] = "Ready",
 	[RPORT_ST_LOGO] = "LOGO",
+	[RPORT_ST_DELETE] = "Delete",
 };
 
 static void fc_rport_rogue_destroy(struct device *dev)
@@ -326,8 +326,8 @@ int fc_rport_logoff(struct fc_rport *rport)
 
 	FC_RPORT_DBG(rport, "Remove port\n");
 
-	if (rdata->rp_state == RPORT_ST_NONE) {
-		FC_RPORT_DBG(rport, "Port in NONE state, not removing\n");
+	if (rdata->rp_state == RPORT_ST_DELETE) {
+		FC_RPORT_DBG(rport, "Port in Delete state, not removing\n");
 		mutex_unlock(&rdata->rp_mutex);
 		goto out;
 	}
@@ -335,10 +335,10 @@ int fc_rport_logoff(struct fc_rport *rport)
 	fc_rport_enter_logo(rport);
 
 	/*
-	 * Change the state to NONE so that we discard
+	 * Change the state to Delete so that we discard
 	 * the response.
 	 */
-	fc_rport_state_enter(rport, RPORT_ST_NONE);
+	fc_rport_state_enter(rport, RPORT_ST_DELETE);
 
 	mutex_unlock(&rdata->rp_mutex);
 
@@ -405,7 +405,7 @@ static void fc_rport_timeout(struct work_struct *work)
 		break;
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
-	case RPORT_ST_NONE:
+	case RPORT_ST_DELETE:
 		break;
 	}
 
@@ -433,14 +433,14 @@ static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
 	case RPORT_ST_PRLI:
 	case RPORT_ST_LOGO:
 		rdata->event = RPORT_EV_FAILED;
-		fc_rport_state_enter(rport, RPORT_ST_NONE);
+		fc_rport_state_enter(rport, RPORT_ST_DELETE);
 		queue_work(rport_event_queue,
 			   &rdata->event_work);
 		break;
 	case RPORT_ST_RTV:
 		fc_rport_enter_ready(rport);
 		break;
-	case RPORT_ST_NONE:
+	case RPORT_ST_DELETE:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 		break;
@@ -652,7 +652,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 	} else {
 		FC_RPORT_DBG(rport, "Bad ELS response for PRLI command\n");
 		rdata->event = RPORT_EV_FAILED;
-		fc_rport_state_enter(rport, RPORT_ST_NONE);
+		fc_rport_state_enter(rport, RPORT_ST_DELETE);
 		queue_work(rport_event_queue, &rdata->event_work);
 	}
 
@@ -703,7 +703,7 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 	} else {
 		FC_RPORT_DBG(rport, "Bad ELS response for LOGO command\n");
 		rdata->event = RPORT_EV_LOGO;
-		fc_rport_state_enter(rport, RPORT_ST_NONE);
+		fc_rport_state_enter(rport, RPORT_ST_DELETE);
 		queue_work(rport_event_queue, &rdata->event_work);
 	}
 
@@ -1012,7 +1012,7 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 			     "- ignored for now\n", rdata->rp_state);
 		/* XXX TBD - should reset */
 		break;
-	case RPORT_ST_NONE:
+	case RPORT_ST_DELETE:
 	default:
 		FC_RPORT_DBG(rport, "Received PLOGI in unexpected "
 			     "state %d\n", rdata->rp_state);
@@ -1238,7 +1238,7 @@ static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
 	FC_RPORT_DBG(rport, "Received PRLO request while in state %s\n",
 		     fc_rport_state(rport));
 
-	if (rdata->rp_state == RPORT_ST_NONE) {
+	if (rdata->rp_state == RPORT_ST_DELETE) {
 		fc_frame_free(fp);
 		return;
 	}
@@ -1271,13 +1271,13 @@ static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
 	FC_RPORT_DBG(rport, "Received LOGO request while in state %s\n",
 		     fc_rport_state(rport));
 
-	if (rdata->rp_state == RPORT_ST_NONE) {
+	if (rdata->rp_state == RPORT_ST_DELETE) {
 		fc_frame_free(fp);
 		return;
 	}
 
 	rdata->event = RPORT_EV_LOGO;
-	fc_rport_state_enter(rport, RPORT_ST_NONE);
+	fc_rport_state_enter(rport, RPORT_ST_DELETE);
 	queue_work(rport_event_queue, &rdata->event_work);
 
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index b5c9b285b462..04db7a9e631b 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -147,13 +147,13 @@ enum fc_disc_event {
 };
 
 enum fc_rport_state {
-	RPORT_ST_NONE = 0,
 	RPORT_ST_INIT,		/* initialized */
 	RPORT_ST_PLOGI,		/* waiting for PLOGI completion */
 	RPORT_ST_PRLI,		/* waiting for PRLI completion */
 	RPORT_ST_RTV,		/* waiting for RTV completion */
 	RPORT_ST_READY,		/* ready for use */
 	RPORT_ST_LOGO,		/* port logout sent */
+	RPORT_ST_DELETE,	/* port being deleted */
 };
 
 enum fc_rport_trans_state {
-- 
cgit v1.2.3


From 96316099ac3cb259eac2d6891f3c75b38b29d26e Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Wed, 29 Jul 2009 17:05:00 -0700
Subject: [SCSI] fcoe, libfc: adds exchange manager(EM) anchor list per lport
 and related APIs

Adds EM list using a anchor struct fc_exch_mgr_anchor, anchor is used
to allow same EM instance sharing across more than one lport on a eth
device, this implementation is per discussed design posted at
http://www.open-fcoe.org/pipermail/devel/2009-June/002566.html.

The shared EM is required for multiple lports on eth device when
using multiple VLANs or NPIV.

Adds fc_exch_mgr_add API to add a EM to the lport and fc_exch_mgr_del
API to delete previously added EM.

Also adds function fc_exch_mgr_destroy() to destroy allocated EM.
The kref is added to the EM to keep track of EM usage count, the EM is
destroyed when no longer in use upon kref reaching to zero.

The caller can specify match function to fc_exch_mgr_add, this
will be used in determining exchange allocation from its EM or not.

Moved calling of fcoe_em_config below fcoe_libfc_config calling,
so that list head lp->ema_list is initialized before configuring
EM.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c      | 14 ++++++-------
 drivers/scsi/libfc/fc_exch.c  | 48 +++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_lport.c |  1 +
 include/scsi/libfc.h          | 24 ++++++++++++++++++++++
 4 files changed, 80 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 14a4017a1535..719a99d4a438 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -603,18 +603,18 @@ static int fcoe_if_create(struct net_device *netdev)
 		goto out_netdev_cleanup;
 	}
 
-	/* lport exch manager allocation */
-	rc = fcoe_em_config(lp);
+	/* Initialize the library */
+	rc = fcoe_libfc_config(lp, &fcoe_libfc_fcn_templ);
 	if (rc) {
-		FCOE_NETDEV_DBG(netdev, "Could not configure the EM for the "
+		FCOE_NETDEV_DBG(netdev, "Could not configure libfc for the "
 				"interface\n");
-		goto out_netdev_cleanup;
+		goto out_lp_destroy;
 	}
 
-	/* Initialize the library */
-	rc = fcoe_libfc_config(lp, &fcoe_libfc_fcn_templ);
+	/* lport exch manager allocation */
+	rc = fcoe_em_config(lp);
 	if (rc) {
-		FCOE_NETDEV_DBG(netdev, "Could not configure libfc for the "
+		FCOE_NETDEV_DBG(netdev, "Could not configure the EM for the "
 				"interface\n");
 		goto out_lp_destroy;
 	}
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index cab54996375c..f1fa2b196e98 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -55,6 +55,7 @@ static struct kmem_cache *fc_em_cachep;        /* cache for exchanges */
  */
 struct fc_exch_mgr {
 	enum fc_class	class;		/* default class for sequences */
+	struct kref	kref;		/* exchange mgr reference count */
 	spinlock_t	em_lock;	/* exchange manager lock,
 					   must be taken before ex_lock */
 	u16		last_xid;	/* last allocated exchange ID */
@@ -84,6 +85,12 @@ struct fc_exch_mgr {
 };
 #define	fc_seq_exch(sp) container_of(sp, struct fc_exch, seq)
 
+struct fc_exch_mgr_anchor {
+	struct list_head ema_list;
+	struct fc_exch_mgr *mp;
+	bool (*match)(struct fc_frame *);
+};
+
 static void fc_exch_rrq(struct fc_exch *);
 static void fc_seq_ls_acc(struct fc_seq *);
 static void fc_seq_ls_rjt(struct fc_seq *, enum fc_els_rjt_reason,
@@ -1729,6 +1736,47 @@ reject:
 	fc_frame_free(fp);
 }
 
+struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport,
+					   struct fc_exch_mgr *mp,
+					   bool (*match)(struct fc_frame *))
+{
+	struct fc_exch_mgr_anchor *ema;
+
+	ema = kmalloc(sizeof(*ema), GFP_ATOMIC);
+	if (!ema)
+		return ema;
+
+	ema->mp = mp;
+	ema->match = match;
+	/* add EM anchor to EM anchors list */
+	list_add_tail(&ema->ema_list, &lport->ema_list);
+	kref_get(&mp->kref);
+	return ema;
+}
+EXPORT_SYMBOL(fc_exch_mgr_add);
+
+static void fc_exch_mgr_destroy(struct kref *kref)
+{
+	struct fc_exch_mgr *mp = container_of(kref, struct fc_exch_mgr, kref);
+
+	/*
+	 * The total exch count must be zero
+	 * before freeing exchange manager.
+	 */
+	WARN_ON(mp->total_exches != 0);
+	mempool_destroy(mp->ep_pool);
+	kfree(mp);
+}
+
+void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema)
+{
+	/* remove EM anchor from EM anchors list */
+	list_del(&ema->ema_list);
+	kref_put(&ema->mp->kref, fc_exch_mgr_destroy);
+	kfree(ema);
+}
+EXPORT_SYMBOL(fc_exch_mgr_del);
+
 struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 				      enum fc_class class,
 				      u16 min_xid, u16 max_xid)
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index a430335ebf59..ca8ea264b684 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1618,6 +1618,7 @@ int fc_lport_init(struct fc_lport *lport)
 	if (lport->link_supported_speeds & FC_PORTSPEED_10GBIT)
 		fc_host_supported_speeds(lport->host) |= FC_PORTSPEED_10GBIT;
 
+	INIT_LIST_HEAD(&lport->ema_list);
 	return 0;
 }
 EXPORT_SYMBOL(fc_lport_init);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 04db7a9e631b..b381b1ca9aec 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -348,6 +348,7 @@ static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp)
  */
 
 struct fc_exch_mgr;
+struct fc_exch_mgr_anchor;
 
 /*
  * Sequence.
@@ -709,6 +710,7 @@ struct fc_lport {
 	/* Associations */
 	struct Scsi_Host	*host;
 	struct fc_exch_mgr	*emp;
+	struct list_head	ema_list;
 	struct fc_rport		*dns_rp;
 	struct fc_rport		*ptp_rp;
 	void			*scsi_priv;
@@ -963,6 +965,28 @@ int fc_elsct_init(struct fc_lport *lp);
  */
 int fc_exch_init(struct fc_lport *lp);
 
+/*
+ * Adds Exchange Manager (EM) mp to lport.
+ *
+ * Adds specified mp to lport using struct fc_exch_mgr_anchor,
+ * the struct fc_exch_mgr_anchor allows same EM sharing by
+ * more than one lport with their specified match function,
+ * the match function is used in allocating exchange from
+ * added mp.
+ */
+struct fc_exch_mgr_anchor *fc_exch_mgr_add(struct fc_lport *lport,
+					   struct fc_exch_mgr *mp,
+					   bool (*match)(struct fc_frame *));
+
+/*
+ * Deletes Exchange Manager (EM) from lport by removing
+ * its anchor ema from lport.
+ *
+ * If removed anchor ema was the last user of its associated EM
+ * then also destroys associated EM.
+ */
+void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema);
+
 /*
  * Allocates an Exchange Manager (EM).
  *
-- 
cgit v1.2.3


From d459b7ea1b4c7aa3dacfeee174d02b2f7a95850d Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Wed, 29 Jul 2009 17:05:05 -0700
Subject: [SCSI] libfc: Remove the FC_EM_DBG macro

Currently there is a 1:1 relationship between the lport
and exchange manager. This macro takes an EM as an argument
and determines the lport from it. However, later patches
will use an EM list per lport, so we will no longer have
this 1:1 relationship- this macro must change.

The FC_EM_DBG macro is rarely used. There are four callers,
two can use FC_LPORT_DBG instead and two can be removed
since they're not necessary. This patch makes those changes
and removes the macro.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_exch.c | 13 ++++++-------
 include/scsi/libfc.h         |  6 ------
 2 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index f1fa2b196e98..3ad7f88e7ae3 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -1129,7 +1129,7 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp,
 			lp->tt.lport_recv(lp, sp, fp);
 		fc_exch_release(ep);	/* release from lookup */
 	} else {
-		FC_EM_DBG(mp, "exch/seq lookup failed: reject %x\n", reject);
+		FC_LPORT_DBG(lp, "exch/seq lookup failed: reject %x\n", reject);
 		fc_frame_free(fp);
 	}
 }
@@ -1235,13 +1235,12 @@ static void fc_exch_recv_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
 	struct fc_seq *sp;
 
 	sp = fc_seq_lookup_orig(mp, fp);	/* doesn't hold sequence */
-	if (!sp) {
+
+	if (!sp)
 		atomic_inc(&mp->stats.xid_not_found);
-		FC_EM_DBG(mp, "seq lookup failed\n");
-	} else {
+	else
 		atomic_inc(&mp->stats.non_bls_resp);
-		FC_EM_DBG(mp, "non-BLS response to sequence");
-	}
+
 	fc_frame_free(fp);
 }
 
@@ -1950,7 +1949,7 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
 			fc_exch_recv_req(lp, mp, fp);
 		break;
 	default:
-		FC_EM_DBG(mp, "dropping invalid frame (eof %x)", fr_eof(fp));
+		FC_LPORT_DBG(lp, "dropping invalid frame (eof %x)", fr_eof(fp));
 		fc_frame_free(fp);
 		break;
 	}
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index b381b1ca9aec..f1bde91f98a2 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -88,12 +88,6 @@ do {									\
 				(pkt)->lp->host->host_no,		\
 				pkt->rport->port_id, ##args))
 
-#define FC_EM_DBG(em, fmt, args...)					\
-	FC_CHECK_LOGGING(FC_EM_LOGGING,					\
-			 printk(KERN_INFO "host%u: em: " fmt,		\
-				(em)->lp->host->host_no,		\
-				##args))
-
 #define FC_EXCH_DBG(exch, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_EXCH_LOGGING,				\
 			 printk(KERN_INFO "host%u: xid %4x: " fmt,	\
-- 
cgit v1.2.3


From 52ff878c912215210f53c0a080552dd6ba3055a2 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Wed, 29 Jul 2009 17:05:10 -0700
Subject: [SCSI] fcoe, fnic, libfc: modifies current code paths to use EM
 anchor list

Modifies current code to use EM anchor list in EM allocation, EM free,
EM reset, exch allocation and exch lookup code paths.

 1. Modifies fc_exch_mgr_alloc to accept EM match function and then
    have allocated EM added to the lport using fc_exch_mgr_add API
    while also updating EM kref for newly added EM.

 2. Updates fc_exch_mgr_free API to accept only lport pointer instead
    EM and then have this API free all EMs of the lport from EM anchor
    list.

 3. Removes single lport pointer link from the EM, which was used in
    associating lport pointer in newly allocated exchange. Instead have
    lport pointer passed along new exchange allocation call path and
    then store passed lport pointer in newly allocated exchange, this
    will allow a single EM instance to be used across more than one
    lport and used in EM reset to reset only lport specific exchanges.

 4. Modifies fc_exch_mgr_reset to reset all EMs from the EM anchor list
    of the lport, adds additional exch lport pointer (ep->lp) check for
    shared EM case to reset exchange specific to a lport requested reset.

 5. Updates exch allocation API fc_exch_alloc to use EM anchor list and
    its anchor match func pointer. The fc_exch_alloc will walk the list
    of EMs until it finds a match, a match will be either null match
    func pointer or call to match function returning true value.

 6. Updates fc_exch_recv to accept incoming frame on local port using
    only lport pointer and frame pointer without specifying EM instance
    of incoming frame. Instead modified fc_exch_recv to locate EM for the
    incoming frame by matching xid of incoming frame against a EM xid range.
    This change was required to use EM list in libfc Rx path and after this
    change the lport fc_exch_mgr pointer emp is not needed anymore, so
    removed emp pointer.

 7. Updates fnic for removed lport emp pointer and above modified libfc APIs
    fc_exch_recv, fc_exch_mgr_alloc and fc_exch_mgr_free.

 8. Removes exch_get and exch_put from libfc_function_template as these
    are no longer needed with EM anchor list and its match function use.
    Also removes its default function fc_exch_get.

A defect this patch introduced regarding the libfc initialization order in
the fnic driver was fixed by Joe Eykholt <jeykholt@cisco.com>.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c      |  16 ++--
 drivers/scsi/fcoe/libfcoe.c   |   2 +-
 drivers/scsi/fnic/fnic_fcs.c  |   2 +-
 drivers/scsi/fnic/fnic_main.c |  20 ++---
 drivers/scsi/libfc/fc_exch.c  | 191 ++++++++++++++++++++++++++----------------
 include/scsi/libfc.h          |  48 ++---------
 6 files changed, 142 insertions(+), 137 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 719a99d4a438..ebf2e20370d7 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -423,11 +423,8 @@ static int fcoe_shost_config(struct fc_lport *lp, struct Scsi_Host *shost,
  */
 static inline int fcoe_em_config(struct fc_lport *lp)
 {
-	BUG_ON(lp->emp);
-
-	lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3,
-				    FCOE_MIN_XID, FCOE_MAX_XID);
-	if (!lp->emp)
+	if (!fc_exch_mgr_alloc(lp, FC_CLASS_3, FCOE_MIN_XID,
+			       FCOE_MAX_XID, NULL))
 		return -ENOMEM;
 
 	return 0;
@@ -478,8 +475,7 @@ static int fcoe_if_destroy(struct net_device *netdev)
 	scsi_remove_host(lp->host);
 
 	/* There are no more rports or I/O, free the EM */
-	if (lp->emp)
-		fc_exch_mgr_free(lp->emp);
+	fc_exch_mgr_free(lp);
 
 	/* Free existing skbs */
 	fcoe_clean_pending_queue(lp);
@@ -634,7 +630,7 @@ static int fcoe_if_create(struct net_device *netdev)
 	return rc;
 
 out_lp_destroy:
-	fc_exch_mgr_free(lp->emp); /* Free the EM */
+	fc_exch_mgr_free(lp);
 out_netdev_cleanup:
 	fcoe_netdev_cleanup(fc);
 out_host_put:
@@ -1277,7 +1273,7 @@ int fcoe_percpu_receive_thread(void *arg)
 		fh = fc_frame_header_get(fp);
 		if (fh->fh_r_ctl == FC_RCTL_DD_SOL_DATA &&
 		    fh->fh_type == FC_TYPE_FCP) {
-			fc_exch_recv(lp, lp->emp, fp);
+			fc_exch_recv(lp, fp);
 			continue;
 		}
 		if (fr_flags(fp) & FCPHF_CRC_UNCHECKED) {
@@ -1298,7 +1294,7 @@ int fcoe_percpu_receive_thread(void *arg)
 			fc_frame_free(fp);
 			continue;
 		}
-		fc_exch_recv(lp, lp->emp, fp);
+		fc_exch_recv(lp, fp);
 	}
 	return 0;
 }
diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
index 78caa6be1130..4db719d6ada1 100644
--- a/drivers/scsi/fcoe/libfcoe.c
+++ b/drivers/scsi/fcoe/libfcoe.c
@@ -885,7 +885,7 @@ static void fcoe_ctlr_recv_els(struct fcoe_ctlr *fip, struct sk_buff *skb)
 	stats->RxFrames++;
 	stats->RxWords += skb->len / FIP_BPW;
 
-	fc_exch_recv(lp, lp->emp, fp);
+	fc_exch_recv(lp, fp);
 	return;
 
 len_err:
diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c
index 07e6eedb83ce..50db3e36a619 100644
--- a/drivers/scsi/fnic/fnic_fcs.c
+++ b/drivers/scsi/fnic/fnic_fcs.c
@@ -115,7 +115,7 @@ void fnic_handle_frame(struct work_struct *work)
 		}
 		spin_unlock_irqrestore(&fnic->fnic_lock, flags);
 
-		fc_exch_recv(lp, lp->emp, fp);
+		fc_exch_recv(lp, fp);
 	}
 
 }
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 2c266c01dc5a..71c7bbe26d05 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -671,14 +671,6 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	lp->link_up = 0;
 	lp->tt = fnic_transport_template;
 
-	lp->emp = fc_exch_mgr_alloc(lp, FC_CLASS_3,
-				    FCPIO_HOST_EXCH_RANGE_START,
-				    FCPIO_HOST_EXCH_RANGE_END);
-	if (!lp->emp) {
-		err = -ENOMEM;
-		goto err_out_remove_scsi_host;
-	}
-
 	lp->max_retry_count = fnic->config.flogi_retries;
 	lp->max_rport_retry_count = fnic->config.plogi_retries;
 	lp->service_params = (FCP_SPPF_INIT_FCN | FCP_SPPF_RD_XRDY_DIS |
@@ -693,12 +685,18 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	fc_set_wwnn(lp, fnic->config.node_wwn);
 	fc_set_wwpn(lp, fnic->config.port_wwn);
 
-	fc_exch_init(lp);
 	fc_lport_init(lp);
+	fc_exch_init(lp);
 	fc_elsct_init(lp);
 	fc_rport_init(lp);
 	fc_disc_init(lp);
 
+	if (!fc_exch_mgr_alloc(lp, FC_CLASS_3, FCPIO_HOST_EXCH_RANGE_START,
+			       FCPIO_HOST_EXCH_RANGE_END, NULL)) {
+		err = -ENOMEM;
+		goto err_out_remove_scsi_host;
+	}
+
 	fc_lport_config(lp);
 
 	if (fc_set_mfs(lp, fnic->config.maxdatafieldsize +
@@ -738,7 +736,7 @@ static int __devinit fnic_probe(struct pci_dev *pdev,
 	return 0;
 
 err_out_free_exch_mgr:
-	fc_exch_mgr_free(lp->emp);
+	fc_exch_mgr_free(lp);
 err_out_remove_scsi_host:
 	fc_remove_host(fnic->lport->host);
 	scsi_remove_host(fnic->lport->host);
@@ -827,7 +825,7 @@ static void __devexit fnic_remove(struct pci_dev *pdev)
 
 	fc_remove_host(fnic->lport->host);
 	scsi_remove_host(fnic->lport->host);
-	fc_exch_mgr_free(fnic->lport->emp);
+	fc_exch_mgr_free(fnic->lport);
 	vnic_dev_notify_unset(fnic->vdev);
 	fnic_free_vnic_resources(fnic);
 	fnic_free_intr(fnic);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 3ad7f88e7ae3..324589a5cc03 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -65,7 +65,6 @@ struct fc_exch_mgr {
 	u16		last_read;	/* last xid allocated for read */
 	u32	total_exches;		/* total allocated exchanges */
 	struct list_head	ex_list;	/* allocated exchanges list */
-	struct fc_lport	*lp;		/* fc device instance */
 	mempool_t	*ep_pool;	/* reserve ep's */
 
 	/*
@@ -275,8 +274,6 @@ static void fc_exch_release(struct fc_exch *ep)
 		mp = ep->em;
 		if (ep->destructor)
 			ep->destructor(&ep->seq, ep->arg);
-		if (ep->lp->tt.exch_put)
-			ep->lp->tt.exch_put(ep->lp, mp, ep->xid);
 		WARN_ON(!(ep->esb_stat & ESB_ST_COMPLETE));
 		mempool_free(ep, mp->ep_pool);
 	}
@@ -513,17 +510,20 @@ static u16 fc_em_alloc_xid(struct fc_exch_mgr *mp, const struct fc_frame *fp)
 	return xid;
 }
 
-/*
- * fc_exch_alloc - allocate an exchange.
- * @mp : ptr to the exchange manager
- * @xid: input xid
+/**
+ * fc_exch_em_alloc() - allocate an exchange from a specified EM.
+ * @lport:	ptr to the local port
+ * @mp:		ptr to the exchange manager
+ * @fp:		ptr to the FC frame
+ * @xid:	input xid
  *
  * if xid is supplied zero then assign next free exchange ID
  * from exchange manager, otherwise use supplied xid.
  * Returns with exch lock held.
  */
-struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
-			      struct fc_frame *fp, u16 xid)
+static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
+					struct fc_exch_mgr *mp,
+					struct fc_frame *fp, u16 xid)
 {
 	struct fc_exch *ep;
 
@@ -566,7 +566,7 @@ struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
 	 */
 	ep->oxid = ep->xid = xid;
 	ep->em = mp;
-	ep->lp = mp->lp;
+	ep->lp = lport;
 	ep->f_ctl = FC_FC_FIRST_SEQ;	/* next seq is first seq */
 	ep->rxid = FC_XID_UNKNOWN;
 	ep->class = mp->class;
@@ -579,6 +579,31 @@ err:
 	mempool_free(ep, mp->ep_pool);
 	return NULL;
 }
+
+/**
+ * fc_exch_alloc() - allocate an exchange.
+ * @lport:	ptr to the local port
+ * @fp:		ptr to the FC frame
+ *
+ * This function walks the list of the exchange manager(EM)
+ * anchors to select a EM for new exchange allocation. The
+ * EM is selected having either a NULL match function pointer
+ * or call to match function returning true.
+ */
+struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_exch_mgr_anchor *ema;
+	struct fc_exch *ep;
+
+	list_for_each_entry(ema, &lport->ema_list, ema_list) {
+		if (!ema->match || ema->match(fp)) {
+			ep = fc_exch_em_alloc(lport, ema->mp, fp, 0);
+			if (ep)
+				return ep;
+		}
+	}
+	return NULL;
+}
 EXPORT_SYMBOL(fc_exch_alloc);
 
 /*
@@ -617,12 +642,14 @@ EXPORT_SYMBOL(fc_exch_done);
  * Allocate a new exchange as responder.
  * Sets the responder ID in the frame header.
  */
-static struct fc_exch *fc_exch_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
+static struct fc_exch *fc_exch_resp(struct fc_lport *lport,
+				    struct fc_exch_mgr *mp,
+				    struct fc_frame *fp)
 {
 	struct fc_exch *ep;
 	struct fc_frame_header *fh;
 
-	ep = mp->lp->tt.exch_get(mp->lp, fp);
+	ep = fc_exch_alloc(lport, fp);
 	if (ep) {
 		ep->class = fc_frame_class(fp);
 
@@ -648,7 +675,7 @@ static struct fc_exch *fc_exch_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
 			ep->esb_stat &= ~ESB_ST_SEQ_INIT;
 
 		fc_exch_hold(ep);	/* hold for caller */
-		spin_unlock_bh(&ep->ex_lock);	/* lock from exch_get */
+		spin_unlock_bh(&ep->ex_lock);	/* lock from fc_exch_alloc */
 	}
 	return ep;
 }
@@ -658,7 +685,8 @@ static struct fc_exch *fc_exch_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
  * If fc_pf_rjt_reason is FC_RJT_NONE then this function will have a hold
  * on the ep that should be released by the caller.
  */
-static enum fc_pf_rjt_reason fc_seq_lookup_recip(struct fc_exch_mgr *mp,
+static enum fc_pf_rjt_reason fc_seq_lookup_recip(struct fc_lport *lport,
+						 struct fc_exch_mgr *mp,
 						 struct fc_frame *fp)
 {
 	struct fc_frame_header *fh = fc_frame_header_get(fp);
@@ -712,7 +740,7 @@ static enum fc_pf_rjt_reason fc_seq_lookup_recip(struct fc_exch_mgr *mp,
 				reject = FC_RJT_RX_ID;
 				goto rel;
 			}
-			ep = fc_exch_resp(mp, fp);
+			ep = fc_exch_resp(lport, mp, fp);
 			if (!ep) {
 				reject = FC_RJT_EXCH_EST;	/* XXX */
 				goto out;
@@ -1103,7 +1131,7 @@ static void fc_exch_recv_req(struct fc_lport *lp, struct fc_exch_mgr *mp,
 	enum fc_pf_rjt_reason reject;
 
 	fr_seq(fp) = NULL;
-	reject = fc_seq_lookup_recip(mp, fp);
+	reject = fc_seq_lookup_recip(lp, mp, fp);
 	if (reject == FC_RJT_NONE) {
 		sp = fr_seq(fp);	/* sequence will be held */
 		ep = fc_seq_exch(sp);
@@ -1467,29 +1495,34 @@ void fc_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
 {
 	struct fc_exch *ep;
 	struct fc_exch *next;
-	struct fc_exch_mgr *mp = lp->emp;
+	struct fc_exch_mgr *mp;
+	struct fc_exch_mgr_anchor *ema;
 
-	spin_lock_bh(&mp->em_lock);
+	list_for_each_entry(ema, &lp->ema_list, ema_list) {
+		mp = ema->mp;
+		spin_lock_bh(&mp->em_lock);
 restart:
-	list_for_each_entry_safe(ep, next, &mp->ex_list, ex_list) {
-		if ((sid == 0 || sid == ep->sid) &&
-		    (did == 0 || did == ep->did)) {
-			fc_exch_hold(ep);
-			spin_unlock_bh(&mp->em_lock);
-
-			fc_exch_reset(ep);
-
-			fc_exch_release(ep);
-			spin_lock_bh(&mp->em_lock);
-
-			/*
-			 * must restart loop incase while lock was down
-			 * multiple eps were released.
-			 */
-			goto restart;
+		list_for_each_entry_safe(ep, next, &mp->ex_list, ex_list) {
+			if ((lp == ep->lp) &&
+			    (sid == 0 || sid == ep->sid) &&
+			    (did == 0 || did == ep->did)) {
+				fc_exch_hold(ep);
+				spin_unlock_bh(&mp->em_lock);
+
+				fc_exch_reset(ep);
+
+				fc_exch_release(ep);
+				spin_lock_bh(&mp->em_lock);
+
+				/*
+				 * must restart loop incase while lock
+				 * was down multiple eps were released.
+				 */
+				goto restart;
+			}
 		}
+		spin_unlock_bh(&mp->em_lock);
 	}
-	spin_unlock_bh(&mp->em_lock);
 }
 EXPORT_SYMBOL(fc_exch_mgr_reset);
 
@@ -1778,7 +1811,8 @@ EXPORT_SYMBOL(fc_exch_mgr_del);
 
 struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 				      enum fc_class class,
-				      u16 min_xid, u16 max_xid)
+				      u16 min_xid, u16 max_xid,
+				      bool (*match)(struct fc_frame *))
 {
 	struct fc_exch_mgr *mp;
 	size_t len;
@@ -1803,7 +1837,6 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	mp->class = class;
 	mp->total_exches = 0;
 	mp->exches = (struct fc_exch **)(mp + 1);
-	mp->lp = lp;
 	/* adjust em exch xid range for offload */
 	mp->min_xid = min_xid;
 	mp->max_xid = max_xid;
@@ -1826,6 +1859,18 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	if (!mp->ep_pool)
 		goto free_mp;
 
+	kref_init(&mp->kref);
+	if (!fc_exch_mgr_add(lp, mp, match)) {
+		mempool_destroy(mp->ep_pool);
+		goto free_mp;
+	}
+
+	/*
+	 * Above kref_init() sets mp->kref to 1 and then
+	 * call to fc_exch_mgr_add incremented mp->kref again,
+	 * so adjust that extra increment.
+	 */
+	kref_put(&mp->kref, fc_exch_mgr_destroy);
 	return mp;
 
 free_mp:
@@ -1834,27 +1879,15 @@ free_mp:
 }
 EXPORT_SYMBOL(fc_exch_mgr_alloc);
 
-void fc_exch_mgr_free(struct fc_exch_mgr *mp)
+void fc_exch_mgr_free(struct fc_lport *lport)
 {
-	WARN_ON(!mp);
-	/*
-	 * The total exch count must be zero
-	 * before freeing exchange manager.
-	 */
-	WARN_ON(mp->total_exches != 0);
-	mempool_destroy(mp->ep_pool);
-	kfree(mp);
+	struct fc_exch_mgr_anchor *ema, *next;
+
+	list_for_each_entry_safe(ema, next, &lport->ema_list, ema_list)
+		fc_exch_mgr_del(ema);
 }
 EXPORT_SYMBOL(fc_exch_mgr_free);
 
-struct fc_exch *fc_exch_get(struct fc_lport *lp, struct fc_frame *fp)
-{
-	if (!lp || !lp->emp)
-		return NULL;
-
-	return fc_exch_alloc(lp->emp, fp, 0);
-}
-EXPORT_SYMBOL(fc_exch_get);
 
 struct fc_seq *fc_exch_seq_send(struct fc_lport *lp,
 				struct fc_frame *fp,
@@ -1869,7 +1902,7 @@ struct fc_seq *fc_exch_seq_send(struct fc_lport *lp,
 	struct fc_frame_header *fh;
 	int rc = 1;
 
-	ep = lp->tt.exch_get(lp, fp);
+	ep = fc_exch_alloc(lp, fp);
 	if (!ep) {
 		fc_frame_free(fp);
 		return NULL;
@@ -1914,24 +1947,44 @@ EXPORT_SYMBOL(fc_exch_seq_send);
 /*
  * Receive a frame
  */
-void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
-		  struct fc_frame *fp)
+void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp)
 {
 	struct fc_frame_header *fh = fc_frame_header_get(fp);
-	u32 f_ctl;
+	struct fc_exch_mgr_anchor *ema;
+	u32 f_ctl, found = 0;
+	u16 oxid;
 
 	/* lport lock ? */
-	if (!lp || !mp || lp->state == LPORT_ST_DISABLED) {
+	if (!lp || lp->state == LPORT_ST_DISABLED) {
 		FC_LPORT_DBG(lp, "Receiving frames for an lport that "
 			     "has not been initialized correctly\n");
 		fc_frame_free(fp);
 		return;
 	}
 
+	f_ctl = ntoh24(fh->fh_f_ctl);
+	oxid = ntohs(fh->fh_ox_id);
+	if (f_ctl & FC_FC_EX_CTX) {
+		list_for_each_entry(ema, &lp->ema_list, ema_list) {
+			if ((oxid >= ema->mp->min_xid) &&
+			    (oxid <= ema->mp->max_xid)) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (!found) {
+			FC_LPORT_DBG(lp, "Received response for out "
+				     "of range oxid:%hx\n", oxid);
+			fc_frame_free(fp);
+			return;
+		}
+	} else
+		ema = list_entry(lp->ema_list.prev, typeof(*ema), ema_list);
+
 	/*
 	 * If frame is marked invalid, just drop it.
 	 */
-	f_ctl = ntoh24(fh->fh_f_ctl);
 	switch (fr_eof(fp)) {
 	case FC_EOF_T:
 		if (f_ctl & FC_FC_END_SEQ)
@@ -1939,34 +1992,24 @@ void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
 		/* fall through */
 	case FC_EOF_N:
 		if (fh->fh_type == FC_TYPE_BLS)
-			fc_exch_recv_bls(mp, fp);
+			fc_exch_recv_bls(ema->mp, fp);
 		else if ((f_ctl & (FC_FC_EX_CTX | FC_FC_SEQ_CTX)) ==
 			 FC_FC_EX_CTX)
-			fc_exch_recv_seq_resp(mp, fp);
+			fc_exch_recv_seq_resp(ema->mp, fp);
 		else if (f_ctl & FC_FC_SEQ_CTX)
-			fc_exch_recv_resp(mp, fp);
+			fc_exch_recv_resp(ema->mp, fp);
 		else
-			fc_exch_recv_req(lp, mp, fp);
+			fc_exch_recv_req(lp, ema->mp, fp);
 		break;
 	default:
 		FC_LPORT_DBG(lp, "dropping invalid frame (eof %x)", fr_eof(fp));
 		fc_frame_free(fp);
-		break;
 	}
 }
 EXPORT_SYMBOL(fc_exch_recv);
 
 int fc_exch_init(struct fc_lport *lp)
 {
-	if (!lp->tt.exch_get) {
-		/*
-		 *  exch_put() should be NULL if
-		 *  exch_get() is NULL
-		 */
-		WARN_ON(lp->tt.exch_put);
-		lp->tt.exch_get = fc_exch_get;
-	}
-
 	if (!lp->tt.seq_start_next)
 		lp->tt.seq_start_next = fc_seq_start_next;
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index f1bde91f98a2..c2b928cfafb9 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -517,25 +517,6 @@ struct libfc_function_template {
 	 */
 	void (*exch_done)(struct fc_seq *sp);
 
-	/*
-	 * Assigns a EM and a free XID for an new exchange and then
-	 * allocates a new exchange and sequence pair.
-	 * The fp can be used to determine free XID.
-	 *
-	 * STATUS: OPTIONAL
-	 */
-	struct fc_exch *(*exch_get)(struct fc_lport *lp, struct fc_frame *fp);
-
-	/*
-	 * Release previously assigned XID by exch_get API.
-	 * The LLD may implement this if XID is assigned by LLD
-	 * in exch_get().
-	 *
-	 * STATUS: OPTIONAL
-	 */
-	void (*exch_put)(struct fc_lport *lp, struct fc_exch_mgr *mp,
-			 u16 ex_id);
-
 	/*
 	 * Start a new sequence on the same exchange/sequence tuple.
 	 *
@@ -703,7 +684,6 @@ struct fc_lport {
 
 	/* Associations */
 	struct Scsi_Host	*host;
-	struct fc_exch_mgr	*emp;
 	struct list_head	ema_list;
 	struct fc_rport		*dns_rp;
 	struct fc_rport		*ptp_rp;
@@ -996,27 +976,25 @@ void fc_exch_mgr_del(struct fc_exch_mgr_anchor *ema);
  * a new exchange.
  * The LLD may choose to have multiple EMs,
  * e.g. one EM instance per CPU receive thread in LLD.
- * The LLD can use exch_get() of struct libfc_function_template
- * to specify XID for a new exchange within
- * a specified EM instance.
  *
- * The em_idx to uniquely identify an EM instance.
+ * Specified match function is used in allocating exchanges
+ * from newly allocated EM.
  */
 struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 				      enum fc_class class,
 				      u16 min_xid,
-				      u16 max_xid);
+				      u16 max_xid,
+				      bool (*match)(struct fc_frame *));
 
 /*
- * Free an exchange manager.
+ * Free all exchange managers of a lport.
  */
-void fc_exch_mgr_free(struct fc_exch_mgr *mp);
+void fc_exch_mgr_free(struct fc_lport *lport);
 
 /*
  * Receive a frame on specified local port and exchange manager.
  */
-void fc_exch_recv(struct fc_lport *lp, struct fc_exch_mgr *mp,
-		  struct fc_frame *fp);
+void fc_exch_recv(struct fc_lport *lp, struct fc_frame *fp);
 
 /*
  * This function is for exch_seq_send function pointer in
@@ -1057,20 +1035,10 @@ int fc_seq_exch_abort(const struct fc_seq *req_sp, unsigned int timer_msec);
  */
 void fc_exch_done(struct fc_seq *sp);
 
-/*
- * Assigns a EM and XID for a frame and then allocates
- * a new exchange and sequence pair.
- * The fp can be used to determine free XID.
- */
-struct fc_exch *fc_exch_get(struct fc_lport *lp, struct fc_frame *fp);
-
 /*
  * Allocate a new exchange and sequence pair.
- * if ex_id is zero then next free exchange id
- * from specified exchange manger mp will be assigned.
  */
-struct fc_exch *fc_exch_alloc(struct fc_exch_mgr *mp,
-			      struct fc_frame *fp, u16 ex_id);
+struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp);
 /*
  * Start a new sequence on the same exchange as the supplied sequence.
  */
-- 
cgit v1.2.3


From 537029f8e950776951ca2a3fe30121d5c05643d1 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Wed, 29 Jul 2009 17:05:32 -0700
Subject: [SCSI] libfc: Remove FC_FRAME_SG_LEN in fc_fcp_send_data

FC_FRAME_SG_LEN is 4 which is too small when offload is enabled. Actually, the
WARN_ON() in fc_fcp_send_data() should be:

	WARN_ON(skb_shinfo(fp_skb(fp))->nr_frags > MAX_SKB_FRAGS);

But since we will not get anything more than 64K anyway, so there is no need
to do this anyway here. Therefore, I am getting rid of FC_FRAME_SG_LEN here
and the WARN_ON here.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_fcp.c | 2 --
 include/scsi/fc_frame.h     | 7 -------
 2 files changed, 9 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index e303e0d12c4b..2069edf80268 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -569,8 +569,6 @@ static int fc_fcp_send_data(struct fc_fcp_pkt *fsp, struct fc_seq *seq,
 		}
 		sg_bytes = min(tlen, sg->length - offset);
 		if (using_sg) {
-			WARN_ON(skb_shinfo(fp_skb(fp))->nr_frags >
-				FC_FRAME_SG_LEN);
 			get_page(sg_page(sg));
 			skb_fill_page_desc(fp_skb(fp),
 					   skb_shinfo(fp_skb(fp))->nr_frags,
diff --git a/include/scsi/fc_frame.h b/include/scsi/fc_frame.h
index 59511057cee0..c35d2383cc26 100644
--- a/include/scsi/fc_frame.h
+++ b/include/scsi/fc_frame.h
@@ -37,13 +37,6 @@
 #define	FC_FRAME_HEADROOM	32	/* headroom for VLAN + FCoE headers */
 #define	FC_FRAME_TAILROOM	8	/* trailer space for FCoE */
 
-/*
- * Information about an individual fibre channel frame received or to be sent.
- * The buffer may be in up to 4 additional non-contiguous sections,
- * but the linear section must hold the frame header.
- */
-#define FC_FRAME_SG_LEN		4	/* scatter/gather list maximum length */
-
 #define fp_skb(fp)	(&((fp)->skb))
 #define fr_hdr(fp)	((fp)->skb.data)
 #define fr_len(fp)	((fp)->skb.len)
-- 
cgit v1.2.3


From 163f52b6cf3a639df6a72c7937e0eb88b20f1ef3 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 1 Aug 2009 00:39:36 +0000
Subject: [SCSI] ses: fix hotplug with multiple devices and expanders

In a situation either with expanders or with multiple enclosure
devices, hot add doesn't always work.  This is because we try to find
a single enclosure device attached to the host.  Fix this by looping
over all enclosure devices attached to the host and also by making the
find loop recognise that the enclosure devices may be expander remote
(i.e. not parented by the host).

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/misc/enclosure.c  | 44 ++++++++++++++++++++++++++++++++------------
 drivers/scsi/ses.c        | 10 ++++++----
 include/linux/enclosure.h |  3 ++-
 3 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 348443bdb23b..789d12128c24 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -33,24 +33,44 @@ static DEFINE_MUTEX(container_list_lock);
 static struct class enclosure_class;
 
 /**
- * enclosure_find - find an enclosure given a device
- * @dev:	the device to find for
+ * enclosure_find - find an enclosure given a parent device
+ * @dev:	the parent to match against
+ * @start:	Optional enclosure device to start from (NULL if none)
  *
- * Looks through the list of registered enclosures to see
- * if it can find a match for a device.  Returns NULL if no
- * enclosure is found. Obtains a reference to the enclosure class
- * device which must be released with device_put().
+ * Looks through the list of registered enclosures to find all those
+ * with @dev as a parent.  Returns NULL if no enclosure is
+ * found. @start can be used as a starting point to obtain multiple
+ * enclosures per parent (should begin with NULL and then be set to
+ * each returned enclosure device). Obtains a reference to the
+ * enclosure class device which must be released with device_put().
+ * If @start is not NULL, a reference must be taken on it which is
+ * released before returning (this allows a loop through all
+ * enclosures to exit with only the reference on the enclosure of
+ * interest held).  Note that the @dev may correspond to the actual
+ * device housing the enclosure, in which case no iteration via @start
+ * is required.
  */
-struct enclosure_device *enclosure_find(struct device *dev)
+struct enclosure_device *enclosure_find(struct device *dev,
+					struct enclosure_device *start)
 {
 	struct enclosure_device *edev;
 
 	mutex_lock(&container_list_lock);
-	list_for_each_entry(edev, &container_list, node) {
-		if (edev->edev.parent == dev) {
-			get_device(&edev->edev);
-			mutex_unlock(&container_list_lock);
-			return edev;
+	edev = list_prepare_entry(start, &container_list, node);
+	if (start)
+		put_device(&start->edev);
+
+	list_for_each_entry_continue(edev, &container_list, node) {
+		struct device *parent = edev->edev.parent;
+		/* parent might not be immediate, so iterate up to
+		 * the root of the tree if necessary */
+		while (parent) {
+			if (parent == dev) {
+				get_device(&edev->edev);
+				mutex_unlock(&container_list_lock);
+				return edev;
+			}
+			parent = parent->parent;
 		}
 	}
 	mutex_unlock(&container_list_lock);
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index 4f618f487356..e1b8c828f03a 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -413,10 +413,11 @@ static int ses_intf_add(struct device *cdev,
 
 	if (!scsi_device_enclosure(sdev)) {
 		/* not an enclosure, but might be in one */
-		edev = enclosure_find(&sdev->host->shost_gendev);
-		if (edev) {
+		struct enclosure_device *prev = NULL;
+
+		while ((edev = enclosure_find(&sdev->host->shost_gendev, prev)) != NULL) {
 			ses_match_to_enclosure(edev, sdev);
-			put_device(&edev->edev);
+			prev = edev;
 		}
 		return -ENODEV;
 	}
@@ -625,7 +626,8 @@ static void ses_intf_remove(struct device *cdev,
 	if (!scsi_device_enclosure(sdev))
 		return;
 
-	edev = enclosure_find(cdev->parent);
+	/*  exact match to this enclosure */
+	edev = enclosure_find(cdev->parent, NULL);
 	if (!edev)
 		return;
 
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index 4332442b1b57..d77811e9ed84 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -123,7 +123,8 @@ enclosure_component_register(struct enclosure_device *, unsigned int,
 int enclosure_add_device(struct enclosure_device *enclosure, int component,
 			 struct device *dev);
 int enclosure_remove_device(struct enclosure_device *enclosure, int component);
-struct enclosure_device *enclosure_find(struct device *dev);
+struct enclosure_device *enclosure_find(struct device *dev,
+					struct enclosure_device *start);
 int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
 			      void *data);
 
-- 
cgit v1.2.3


From 43d8eb9cfd0aea93be32181c64e18191b69c211c Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 1 Aug 2009 00:41:22 +0000
Subject: [SCSI] ses: add support for enclosure component hot removal

Right at the moment, hot removal of a device within an enclosure does
nothing (because the intf_remove only copes with enclosure removal not
with component removal). Fix this by adding a function to remove the
component.  Also needed to fix the prototype of
enclosure_remove_device, since we know the device we've removed but
not the internal component number

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/misc/enclosure.c  | 22 ++++++++++++++--------
 drivers/scsi/ses.c        | 33 ++++++++++++++++++++++++++-------
 include/linux/enclosure.h |  2 +-
 3 files changed, 41 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 789d12128c24..850706a5e553 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -332,19 +332,25 @@ EXPORT_SYMBOL_GPL(enclosure_add_device);
  * Returns zero on success or an error.
  *
  */
-int enclosure_remove_device(struct enclosure_device *edev, int component)
+int enclosure_remove_device(struct enclosure_device *edev, struct device *dev)
 {
 	struct enclosure_component *cdev;
+	int i;
 
-	if (!edev || component >= edev->components)
+	if (!edev || !dev)
 		return -EINVAL;
 
-	cdev = &edev->component[component];
-
-	device_del(&cdev->cdev);
-	put_device(cdev->dev);
-	cdev->dev = NULL;
-	return device_add(&cdev->cdev);
+	for (i = 0; i < edev->components; i++) {
+		cdev = &edev->component[i];
+		if (cdev->dev == dev) {
+			enclosure_remove_links(cdev);
+			device_del(&cdev->cdev);
+			put_device(dev);
+			cdev->dev = NULL;
+			return device_add(&cdev->cdev);
+		}
+	}
+	return -ENODEV;
 }
 EXPORT_SYMBOL_GPL(enclosure_remove_device);
 
diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c
index e1b8c828f03a..be593c8525b5 100644
--- a/drivers/scsi/ses.c
+++ b/drivers/scsi/ses.c
@@ -616,18 +616,26 @@ static int ses_remove(struct device *dev)
 	return 0;
 }
 
-static void ses_intf_remove(struct device *cdev,
-			    struct class_interface *intf)
+static void ses_intf_remove_component(struct scsi_device *sdev)
+{
+	struct enclosure_device *edev, *prev = NULL;
+
+	while ((edev = enclosure_find(&sdev->host->shost_gendev, prev)) != NULL) {
+		prev = edev;
+		if (!enclosure_remove_device(edev, &sdev->sdev_gendev))
+			break;
+	}
+	if (edev)
+		put_device(&edev->edev);
+}
+
+static void ses_intf_remove_enclosure(struct scsi_device *sdev)
 {
-	struct scsi_device *sdev = to_scsi_device(cdev->parent);
 	struct enclosure_device *edev;
 	struct ses_device *ses_dev;
 
-	if (!scsi_device_enclosure(sdev))
-		return;
-
 	/*  exact match to this enclosure */
-	edev = enclosure_find(cdev->parent, NULL);
+	edev = enclosure_find(&sdev->sdev_gendev, NULL);
 	if (!edev)
 		return;
 
@@ -645,6 +653,17 @@ static void ses_intf_remove(struct device *cdev,
 	enclosure_unregister(edev);
 }
 
+static void ses_intf_remove(struct device *cdev,
+			    struct class_interface *intf)
+{
+	struct scsi_device *sdev = to_scsi_device(cdev->parent);
+
+	if (!scsi_device_enclosure(sdev))
+		ses_intf_remove_component(sdev);
+	else
+		ses_intf_remove_enclosure(sdev);
+}
+
 static struct class_interface ses_interface = {
 	.add_dev	= ses_intf_add,
 	.remove_dev	= ses_intf_remove,
diff --git a/include/linux/enclosure.h b/include/linux/enclosure.h
index d77811e9ed84..90d1c2184112 100644
--- a/include/linux/enclosure.h
+++ b/include/linux/enclosure.h
@@ -122,7 +122,7 @@ enclosure_component_register(struct enclosure_device *, unsigned int,
 				 enum enclosure_component_type, const char *);
 int enclosure_add_device(struct enclosure_device *enclosure, int component,
 			 struct device *dev);
-int enclosure_remove_device(struct enclosure_device *enclosure, int component);
+int enclosure_remove_device(struct enclosure_device *, struct device *);
 struct enclosure_device *enclosure_find(struct device *dev,
 					struct enclosure_device *start);
 int enclosure_for_each_device(int (*fn)(struct enclosure_device *, void *),
-- 
cgit v1.2.3


From 18ee70c9d7b2dcd312a1f8c6536841e7c0fea5ca Mon Sep 17 00:00:00 2001
From: Chandra Seetharaman <sekharan@us.ibm.com>
Date: Mon, 3 Aug 2009 12:42:33 -0700
Subject: [SCSI] scsi_dh: add the interface scsi_dh_set_params()

When we moved the device handler functionality from dm layer to SCSI layer
we dropped the parameter functionality.

This path adds an interface to scsi dh layer to set device handler
parameters.

Basically, multipath layer need to create a string with all the parameters
and call scsi_dh_set_params() after it called scsi_dh_attach() on a
device.

If a device handler provides such an interface it will handle the parameters
as it expects them.

Reported-by: Eddie Williams <Eddie.Williams@steeleye.com>
Signed-off-by: Chandra Seetharaman <sekharan@us.ibm.com>
Tested-by: Eddie Williams <Eddie.Williams@steeleye.com>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/device_handler/scsi_dh.c | 33 +++++++++++++++++++++++++++++++++
 include/scsi/scsi_device.h            |  1 +
 include/scsi/scsi_dh.h                |  5 +++++
 3 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/device_handler/scsi_dh.c b/drivers/scsi/device_handler/scsi_dh.c
index 53a7385e1b4d..3ee1cbc89479 100644
--- a/drivers/scsi/device_handler/scsi_dh.c
+++ b/drivers/scsi/device_handler/scsi_dh.c
@@ -451,6 +451,39 @@ int scsi_dh_activate(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(scsi_dh_activate);
 
+/*
+ * scsi_dh_set_params - set the parameters for the device as per the
+ *      string specified in params.
+ * @q - Request queue that is associated with the scsi_device for
+ *      which the parameters to be set.
+ * @params - parameters in the following format
+ *      "no_of_params\0param1\0param2\0param3\0...\0"
+ *      for example, string for 2 parameters with value 10 and 21
+ *      is specified as "2\010\021\0".
+ */
+int scsi_dh_set_params(struct request_queue *q, const char *params)
+{
+	int err = -SCSI_DH_NOSYS;
+	unsigned long flags;
+	struct scsi_device *sdev;
+	struct scsi_device_handler *scsi_dh = NULL;
+
+	spin_lock_irqsave(q->queue_lock, flags);
+	sdev = q->queuedata;
+	if (sdev && sdev->scsi_dh_data)
+		scsi_dh = sdev->scsi_dh_data->scsi_dh;
+	if (scsi_dh && scsi_dh->set_params && get_device(&sdev->sdev_gendev))
+		err = 0;
+	spin_unlock_irqrestore(q->queue_lock, flags);
+
+	if (err)
+		return err;
+	err = scsi_dh->set_params(sdev, params);
+	put_device(&sdev->sdev_gendev);
+	return err;
+}
+EXPORT_SYMBOL_GPL(scsi_dh_set_params);
+
 /*
  * scsi_dh_handler_exist - Return TRUE(1) if a device handler exists for
  *	the given name. FALSE(0) otherwise.
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 1f3a4c8044c0..9af48cbf0036 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -187,6 +187,7 @@ struct scsi_device_handler {
 	void (*detach)(struct scsi_device *);
 	int (*activate)(struct scsi_device *);
 	int (*prep_fn)(struct scsi_device *, struct request *);
+	int (*set_params)(struct scsi_device *, const char *);
 };
 
 struct scsi_dh_data {
diff --git a/include/scsi/scsi_dh.h b/include/scsi/scsi_dh.h
index 33efce20c26c..ff2407405b42 100644
--- a/include/scsi/scsi_dh.h
+++ b/include/scsi/scsi_dh.h
@@ -60,6 +60,7 @@ extern int scsi_dh_activate(struct request_queue *);
 extern int scsi_dh_handler_exist(const char *);
 extern int scsi_dh_attach(struct request_queue *, const char *);
 extern void scsi_dh_detach(struct request_queue *);
+extern int scsi_dh_set_params(struct request_queue *, const char *);
 #else
 static inline int scsi_dh_activate(struct request_queue *req)
 {
@@ -77,4 +78,8 @@ static inline void scsi_dh_detach(struct request_queue *q)
 {
 	return;
 }
+static inline int scsi_dh_set_params(struct request_queue *req, const char *params)
+{
+	return -SCSI_DH_NOSYS;
+}
 #endif
-- 
cgit v1.2.3


From ee5f9757ea17759e1ce5503bdae2b07e48e32af9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 21 Aug 2009 16:33:34 -0700
Subject: pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer

None of this stuff should execute in hw IRQ context, therefore
use a tasklet_hrtimer so that it runs in softirq context.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/net/pkt_sched.h | 4 ++--
 net/sched/sch_api.c     | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 82a3191375f5..7eafb8d54470 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
 }
 
 struct qdisc_watchdog {
-	struct hrtimer	timer;
-	struct Qdisc	*qdisc;
+	struct tasklet_hrtimer	timer;
+	struct Qdisc		*qdisc;
 };
 
 extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 24d17ce9c294..e1c2bf7e9ba4 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 {
-	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	wd->timer.function = qdisc_watchdog;
+	tasklet_hrtimer_init(&wd->timer, qdisc_watchdog,
+			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	wd->qdisc = qdisc;
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
@@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
-	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+	tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
 }
 EXPORT_SYMBOL(qdisc_watchdog_schedule);
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
-	hrtimer_cancel(&wd->timer);
+	tasklet_hrtimer_cancel(&wd->timer);
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
-- 
cgit v1.2.3


From 9f77da9f40045253e91f55c12d4481254b513d2d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:45 -0700
Subject: rcu: Move private definitions from include/linux/rcutree.h to
 kernel/rcutree.h

Some information hiding that makes it easier to merge
preemptability into rcutree without descending into #include
hell.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <1250974613373-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcutree.h | 211 ------------------------------------------
 kernel/rcutree.c        |   2 +
 kernel/rcutree.h        | 238 ++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c  |   1 +
 4 files changed, 241 insertions(+), 211 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d4dfd2489633..e37d5e2a8353 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,217 +30,6 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-
-/*
- * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
- * In theory, it should be possible to add more levels straightforwardly.
- * In practice, this has not been tested, so there is probably some
- * bug somewhere.
- */
-#define MAX_RCU_LVLS 3
-#define RCU_FANOUT	      (CONFIG_RCU_FANOUT)
-#define RCU_FANOUT_SQ	      (RCU_FANOUT * RCU_FANOUT)
-#define RCU_FANOUT_CUBE	      (RCU_FANOUT_SQ * RCU_FANOUT)
-
-#if NR_CPUS <= RCU_FANOUT
-#  define NUM_RCU_LVLS	      1
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      (NR_CPUS)
-#  define NUM_RCU_LVL_2	      0
-#  define NUM_RCU_LVL_3	      0
-#elif NR_CPUS <= RCU_FANOUT_SQ
-#  define NUM_RCU_LVLS	      2
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
-#  define NUM_RCU_LVL_2	      (NR_CPUS)
-#  define NUM_RCU_LVL_3	      0
-#elif NR_CPUS <= RCU_FANOUT_CUBE
-#  define NUM_RCU_LVLS	      3
-#  define NUM_RCU_LVL_0	      1
-#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
-#  define NUM_RCU_LVL_2	      (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
-#  define NUM_RCU_LVL_3	      NR_CPUS
-#else
-# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
-#endif /* #if (NR_CPUS) <= RCU_FANOUT */
-
-#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
-#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
-
-/*
- * Dynticks per-CPU state.
- */
-struct rcu_dynticks {
-	int dynticks_nesting;	/* Track nesting level, sort of. */
-	int dynticks;		/* Even value for dynticks-idle, else odd. */
-	int dynticks_nmi;	/* Even value for either dynticks-idle or */
-				/*  not in nmi handler, else odd.  So this */
-				/*  remains even for nmi from irq handler. */
-};
-
-/*
- * Definition for node within the RCU grace-period-detection hierarchy.
- */
-struct rcu_node {
-	spinlock_t lock;
-	unsigned long qsmask;	/* CPUs or groups that need to switch in */
-				/*  order for current grace period to proceed.*/
-	unsigned long qsmaskinit;
-				/* Per-GP initialization for qsmask. */
-	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
-	int	grplo;		/* lowest-numbered CPU or group here. */
-	int	grphi;		/* highest-numbered CPU or group here. */
-	u8	grpnum;		/* CPU/group number for next level up. */
-	u8	level;		/* root is at level 0. */
-	struct rcu_node *parent;
-} ____cacheline_internodealigned_in_smp;
-
-/* Index values for nxttail array in struct rcu_data. */
-#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
-#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
-#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
-#define RCU_NEXT_TAIL		3
-#define RCU_NEXT_SIZE		4
-
-/* Per-CPU data for read-copy update. */
-struct rcu_data {
-	/* 1) quiescent-state and grace-period handling : */
-	long		completed;	/* Track rsp->completed gp number */
-					/*  in order to detect GP end. */
-	long		gpnum;		/* Highest gp number that this CPU */
-					/*  is aware of having started. */
-	long		passed_quiesc_completed;
-					/* Value of completed at time of qs. */
-	bool		passed_quiesc;	/* User-mode/idle loop etc. */
-	bool		qs_pending;	/* Core waits for quiesc state. */
-	bool		beenonline;	/* CPU online at least once. */
-	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
-	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
-
-	/* 2) batch handling */
-	/*
-	 * If nxtlist is not NULL, it is partitioned as follows.
-	 * Any of the partitions might be empty, in which case the
-	 * pointer to that partition will be equal to the pointer for
-	 * the following partition.  When the list is empty, all of
-	 * the nxttail elements point to nxtlist, which is NULL.
-	 *
-	 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
-	 *	Entries that might have arrived after current GP ended
-	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
-	 *	Entries known to have arrived before current GP ended
-	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
-	 *	Entries that batch # <= ->completed - 1: waiting for current GP
-	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
-	 *	Entries that batch # <= ->completed
-	 *	The grace period for these entries has completed, and
-	 *	the other grace-period-completed entries may be moved
-	 *	here temporarily in rcu_process_callbacks().
-	 */
-	struct rcu_head *nxtlist;
-	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	long		qlen; 	 	/* # of queued callbacks */
-	long		blimit;		/* Upper limit on a processed batch */
-
-#ifdef CONFIG_NO_HZ
-	/* 3) dynticks interface. */
-	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
-	int dynticks_snap;		/* Per-GP tracking for dynticks. */
-	int dynticks_nmi_snap;		/* Per-GP tracking for dynticks_nmi. */
-#endif /* #ifdef CONFIG_NO_HZ */
-
-	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
-#ifdef CONFIG_NO_HZ
-	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
-#endif /* #ifdef CONFIG_NO_HZ */
-	unsigned long offline_fqs;	/* Kicked due to being offline. */
-	unsigned long resched_ipi;	/* Sent a resched IPI. */
-
-	/* 5) __rcu_pending() statistics. */
-	long n_rcu_pending;		/* rcu_pending() calls since boot. */
-	long n_rp_qs_pending;
-	long n_rp_cb_ready;
-	long n_rp_cpu_needs_gp;
-	long n_rp_gp_completed;
-	long n_rp_gp_started;
-	long n_rp_need_fqs;
-	long n_rp_need_nothing;
-
-	int cpu;
-};
-
-/* Values for signaled field in struct rcu_state. */
-#define RCU_GP_INIT		0	/* Grace period being initialized. */
-#define RCU_SAVE_DYNTICK	1	/* Need to scan dyntick state. */
-#define RCU_FORCE_QS		2	/* Need to force quiescent state. */
-#ifdef CONFIG_NO_HZ
-#define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK
-#else /* #ifdef CONFIG_NO_HZ */
-#define RCU_SIGNAL_INIT		RCU_FORCE_QS
-#endif /* #else #ifdef CONFIG_NO_HZ */
-
-#define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ)  /* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ)  /* for rsp->jiffies_stall */
-#define RCU_STALL_RAT_DELAY		2	  /* Allow other CPUs time */
-						  /*  to take at least one */
-						  /*  scheduling clock irq */
-						  /*  before ratting on them. */
-
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-
-/*
- * RCU global state, including node hierarchy.  This hierarchy is
- * represented in "heap" form in a dense array.  The root (first level)
- * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
- * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
- * and the third level in ->node[m+1] and following (->node[m+1] referenced
- * by ->level[2]).  The number of levels is determined by the number of
- * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
- * consisting of a single rcu_node.
- */
-struct rcu_state {
-	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
-	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
-	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
-	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
-	struct rcu_data *rda[NR_CPUS];		/* array of rdp pointers. */
-
-	/* The following fields are guarded by the root rcu_node's lock. */
-
-	u8	signaled ____cacheline_internodealigned_in_smp;
-						/* Force QS state. */
-	long	gpnum;				/* Current gp number. */
-	long	completed;			/* # of last completed gp. */
-	spinlock_t onofflock;			/* exclude on/offline and */
-						/*  starting new GP. */
-	spinlock_t fqslock;			/* Only one task forcing */
-						/*  quiescent states. */
-	unsigned long jiffies_force_qs;		/* Time at which to invoke */
-						/*  force_quiescent_state(). */
-	unsigned long n_force_qs;		/* Number of calls to */
-						/*  force_quiescent_state(). */
-	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */
-						/*  due to lock unavailable. */
-	unsigned long n_force_qs_ngp;		/* Number of calls leaving */
-						/*  due to no GP active. */
-#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-	unsigned long gp_start;			/* Time at which GP started, */
-						/*  but in jiffies. */
-	unsigned long jiffies_stall;		/* Time at which to check */
-						/*  for CPU stalls. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-#ifdef CONFIG_NO_HZ
-	long dynticks_completed;		/* Value of completed @ snap. */
-#endif /* #ifdef CONFIG_NO_HZ */
-};
-
 extern void rcu_qsctr_inc(int cpu);
 extern void rcu_bh_qsctr_inc(int cpu);
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 75762cddbe03..a162f859dd32 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,6 +46,8 @@
 #include <linux/mutex.h>
 #include <linux/time.h>
 
+#include "rcutree.h"
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
 struct lockdep_map rcu_lock_map =
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5e872bbf07f5..7cc830a1c44a 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -1,3 +1,239 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+/*
+ * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
+ * In theory, it should be possible to add more levels straightforwardly.
+ * In practice, this has not been tested, so there is probably some
+ * bug somewhere.
+ */
+#define MAX_RCU_LVLS 3
+#define RCU_FANOUT	      (CONFIG_RCU_FANOUT)
+#define RCU_FANOUT_SQ	      (RCU_FANOUT * RCU_FANOUT)
+#define RCU_FANOUT_CUBE	      (RCU_FANOUT_SQ * RCU_FANOUT)
+
+#if NR_CPUS <= RCU_FANOUT
+#  define NUM_RCU_LVLS	      1
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (NR_CPUS)
+#  define NUM_RCU_LVL_2	      0
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_SQ
+#  define NUM_RCU_LVLS	      2
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT - 1) / RCU_FANOUT)
+#  define NUM_RCU_LVL_2	      (NR_CPUS)
+#  define NUM_RCU_LVL_3	      0
+#elif NR_CPUS <= RCU_FANOUT_CUBE
+#  define NUM_RCU_LVLS	      3
+#  define NUM_RCU_LVL_0	      1
+#  define NUM_RCU_LVL_1	      (((NR_CPUS) + RCU_FANOUT_SQ - 1) / RCU_FANOUT_SQ)
+#  define NUM_RCU_LVL_2	      (((NR_CPUS) + (RCU_FANOUT) - 1) / (RCU_FANOUT))
+#  define NUM_RCU_LVL_3	      NR_CPUS
+#else
+# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
+#endif /* #if (NR_CPUS) <= RCU_FANOUT */
+
+#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3)
+#define NUM_RCU_NODES (RCU_SUM - NR_CPUS)
+
+/*
+ * Dynticks per-CPU state.
+ */
+struct rcu_dynticks {
+	int dynticks_nesting;	/* Track nesting level, sort of. */
+	int dynticks;		/* Even value for dynticks-idle, else odd. */
+	int dynticks_nmi;	/* Even value for either dynticks-idle or */
+				/*  not in nmi handler, else odd.  So this */
+				/*  remains even for nmi from irq handler. */
+};
+
+/*
+ * Definition for node within the RCU grace-period-detection hierarchy.
+ */
+struct rcu_node {
+	spinlock_t lock;
+	unsigned long qsmask;	/* CPUs or groups that need to switch in */
+				/*  order for current grace period to proceed.*/
+	unsigned long qsmaskinit;
+				/* Per-GP initialization for qsmask. */
+	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
+	int	grplo;		/* lowest-numbered CPU or group here. */
+	int	grphi;		/* highest-numbered CPU or group here. */
+	u8	grpnum;		/* CPU/group number for next level up. */
+	u8	level;		/* root is at level 0. */
+	struct rcu_node *parent;
+} ____cacheline_internodealigned_in_smp;
+
+/* Index values for nxttail array in struct rcu_data. */
+#define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */
+#define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */
+#define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */
+#define RCU_NEXT_TAIL		3
+#define RCU_NEXT_SIZE		4
+
+/* Per-CPU data for read-copy update. */
+struct rcu_data {
+	/* 1) quiescent-state and grace-period handling : */
+	long		completed;	/* Track rsp->completed gp number */
+					/*  in order to detect GP end. */
+	long		gpnum;		/* Highest gp number that this CPU */
+					/*  is aware of having started. */
+	long		passed_quiesc_completed;
+					/* Value of completed at time of qs. */
+	bool		passed_quiesc;	/* User-mode/idle loop etc. */
+	bool		qs_pending;	/* Core waits for quiesc state. */
+	bool		beenonline;	/* CPU online at least once. */
+	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
+	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
+
+	/* 2) batch handling */
+	/*
+	 * If nxtlist is not NULL, it is partitioned as follows.
+	 * Any of the partitions might be empty, in which case the
+	 * pointer to that partition will be equal to the pointer for
+	 * the following partition.  When the list is empty, all of
+	 * the nxttail elements point to nxtlist, which is NULL.
+	 *
+	 * [*nxttail[RCU_NEXT_READY_TAIL], NULL = *nxttail[RCU_NEXT_TAIL]):
+	 *	Entries that might have arrived after current GP ended
+	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]):
+	 *	Entries known to have arrived before current GP ended
+	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]):
+	 *	Entries that batch # <= ->completed - 1: waiting for current GP
+	 * [nxtlist, *nxttail[RCU_DONE_TAIL]):
+	 *	Entries that batch # <= ->completed
+	 *	The grace period for these entries has completed, and
+	 *	the other grace-period-completed entries may be moved
+	 *	here temporarily in rcu_process_callbacks().
+	 */
+	struct rcu_head *nxtlist;
+	struct rcu_head **nxttail[RCU_NEXT_SIZE];
+	long		qlen; 	 	/* # of queued callbacks */
+	long		blimit;		/* Upper limit on a processed batch */
+
+#ifdef CONFIG_NO_HZ
+	/* 3) dynticks interface. */
+	struct rcu_dynticks *dynticks;	/* Shared per-CPU dynticks state. */
+	int dynticks_snap;		/* Per-GP tracking for dynticks. */
+	int dynticks_nmi_snap;		/* Per-GP tracking for dynticks_nmi. */
+#endif /* #ifdef CONFIG_NO_HZ */
+
+	/* 4) reasons this CPU needed to be kicked by force_quiescent_state */
+#ifdef CONFIG_NO_HZ
+	unsigned long dynticks_fqs;	/* Kicked due to dynticks idle. */
+#endif /* #ifdef CONFIG_NO_HZ */
+	unsigned long offline_fqs;	/* Kicked due to being offline. */
+	unsigned long resched_ipi;	/* Sent a resched IPI. */
+
+	/* 5) __rcu_pending() statistics. */
+	long n_rcu_pending;		/* rcu_pending() calls since boot. */
+	long n_rp_qs_pending;
+	long n_rp_cb_ready;
+	long n_rp_cpu_needs_gp;
+	long n_rp_gp_completed;
+	long n_rp_gp_started;
+	long n_rp_need_fqs;
+	long n_rp_need_nothing;
+
+	int cpu;
+};
+
+/* Values for signaled field in struct rcu_state. */
+#define RCU_GP_INIT		0	/* Grace period being initialized. */
+#define RCU_SAVE_DYNTICK	1	/* Need to scan dyntick state. */
+#define RCU_FORCE_QS		2	/* Need to force quiescent state. */
+#ifdef CONFIG_NO_HZ
+#define RCU_SIGNAL_INIT		RCU_SAVE_DYNTICK
+#else /* #ifdef CONFIG_NO_HZ */
+#define RCU_SIGNAL_INIT		RCU_FORCE_QS
+#endif /* #else #ifdef CONFIG_NO_HZ */
+
+#define RCU_JIFFIES_TILL_FORCE_QS	 3	/* for rsp->jiffies_force_qs */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK   (10 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ)  /* for rsp->jiffies_stall */
+#define RCU_STALL_RAT_DELAY		2	  /* Allow other CPUs time */
+						  /*  to take at least one */
+						  /*  scheduling clock irq */
+						  /*  before ratting on them. */
+
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * RCU global state, including node hierarchy.  This hierarchy is
+ * represented in "heap" form in a dense array.  The root (first level)
+ * of the hierarchy is in ->node[0] (referenced by ->level[0]), the second
+ * level in ->node[1] through ->node[m] (->node[1] referenced by ->level[1]),
+ * and the third level in ->node[m+1] and following (->node[m+1] referenced
+ * by ->level[2]).  The number of levels is determined by the number of
+ * CPUs and by CONFIG_RCU_FANOUT.  Small systems will have a "hierarchy"
+ * consisting of a single rcu_node.
+ */
+struct rcu_state {
+	struct rcu_node node[NUM_RCU_NODES];	/* Hierarchy. */
+	struct rcu_node *level[NUM_RCU_LVLS];	/* Hierarchy levels. */
+	u32 levelcnt[MAX_RCU_LVLS + 1];		/* # nodes in each level. */
+	u8 levelspread[NUM_RCU_LVLS];		/* kids/node in each level. */
+	struct rcu_data *rda[NR_CPUS];		/* array of rdp pointers. */
+
+	/* The following fields are guarded by the root rcu_node's lock. */
+
+	u8	signaled ____cacheline_internodealigned_in_smp;
+						/* Force QS state. */
+	long	gpnum;				/* Current gp number. */
+	long	completed;			/* # of last completed gp. */
+	spinlock_t onofflock;			/* exclude on/offline and */
+						/*  starting new GP. */
+	spinlock_t fqslock;			/* Only one task forcing */
+						/*  quiescent states. */
+	unsigned long jiffies_force_qs;		/* Time at which to invoke */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs;		/* Number of calls to */
+						/*  force_quiescent_state(). */
+	unsigned long n_force_qs_lh;		/* ~Number of calls leaving */
+						/*  due to lock unavailable. */
+	unsigned long n_force_qs_ngp;		/* Number of calls leaving */
+						/*  due to no GP active. */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;			/* Time at which GP started, */
+						/*  but in jiffies. */
+	unsigned long jiffies_stall;		/* Time at which to check */
+						/*  for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+#ifdef CONFIG_NO_HZ
+	long dynticks_completed;		/* Value of completed @ snap. */
+#endif /* #ifdef CONFIG_NO_HZ */
+};
+
+#ifdef RCU_TREE_NONCORE
 
 /*
  * RCU implementation internal declarations:
@@ -8,3 +244,5 @@ DECLARE_PER_CPU(struct rcu_data, rcu_data);
 extern struct rcu_state rcu_bh_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+#endif /* #ifdef RCU_TREE_NONCORE */
+
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index fe1dcdbf1ca3..0cb52b887758 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -43,6 +43,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
+#define RCU_TREE_NONCORE
 #include "rcutree.h"
 
 static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
-- 
cgit v1.2.3


From d6714c22b43fbcbead7e7b706ff270e15f04a791 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:46 -0700
Subject: rcu: Renamings to increase RCU clarity

Make RCU-sched, RCU-bh, and RCU-preempt be underlying
implementations, with "RCU" defined in terms of one of the
three.  Update the outdated rcu_qsctr_inc() names, as these
functions no longer increment anything.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <12509746132696-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/RCU/trace.txt |  7 ++--
 include/linux/rcupdate.h    | 21 +++++++++---
 include/linux/rcupreempt.h  |  4 +--
 include/linux/rcutree.h     |  8 +++--
 kernel/rcupreempt.c         |  8 ++---
 kernel/rcutree.c            | 80 ++++++++++++++++++++++++++++-----------------
 kernel/rcutree.h            |  4 +--
 kernel/rcutree_trace.c      | 20 ++++++------
 kernel/sched.c              |  2 +-
 kernel/softirq.c            |  4 +--
 10 files changed, 95 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 02cced183b2d..187bbf10c923 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -191,8 +191,7 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
 
 The output of "cat rcu/rcudata" looks as follows:
 
-rcu:
-rcu:
+rcu_sched:
   0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
   1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
   2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
@@ -306,7 +305,7 @@ comma-separated-variable spreadsheet format.
 
 The output of "cat rcu/rcugp" looks as follows:
 
-rcu: completed=33062  gpnum=33063
+rcu_sched: completed=33062  gpnum=33063
 rcu_bh: completed=464  gpnum=464
 
 Again, this output is for both "rcu" and "rcu_bh".  The fields are
@@ -413,7 +412,7 @@ o	Each element of the form "1/1 0:127 ^0" represents one struct
 
 The output of "cat rcu/rcu_pending" looks as follows:
 
-rcu:
+rcu_sched:
   0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
   1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
   2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3c89d6a2591f..e920f0fd59d8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -157,17 +157,28 @@ extern int rcu_scheduler_active;
  * - call_rcu_sched() and rcu_barrier_sched()
  * on the write-side to insure proper synchronization.
  */
-#define rcu_read_lock_sched() preempt_disable()
-#define rcu_read_lock_sched_notrace() preempt_disable_notrace()
+static inline void rcu_read_lock_sched(void)
+{
+	preempt_disable();
+}
+static inline void rcu_read_lock_sched_notrace(void)
+{
+	preempt_disable_notrace();
+}
 
 /*
  * rcu_read_unlock_sched - marks the end of a RCU-classic critical section
  *
  * See rcu_read_lock_sched for more information.
  */
-#define rcu_read_unlock_sched() preempt_enable()
-#define rcu_read_unlock_sched_notrace() preempt_enable_notrace()
-
+static inline void rcu_read_unlock_sched(void)
+{
+	preempt_enable();
+}
+static inline void rcu_read_unlock_sched_notrace(void)
+{
+	preempt_enable_notrace();
+}
 
 
 /**
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index f164ac9b7807..2963f080e48d 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -40,8 +40,8 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
-extern void rcu_qsctr_inc(int cpu);
-static inline void rcu_bh_qsctr_inc(int cpu) { }
+extern void rcu_sched_qs(int cpu);
+static inline void rcu_bh_qs(int cpu) { }
 
 /*
  * Someone might want to pass call_rcu_bh as a function pointer.
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e37d5e2a8353..a0852d0d915b 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,8 +30,8 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-extern void rcu_qsctr_inc(int cpu);
-extern void rcu_bh_qsctr_inc(int cpu);
+extern void rcu_sched_qs(int cpu);
+extern void rcu_bh_qs(int cpu);
 
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
@@ -73,7 +73,8 @@ static inline void __rcu_read_unlock_bh(void)
 
 #define __synchronize_sched() synchronize_rcu()
 
-#define call_rcu_sched(head, func) call_rcu(head, func)
+extern void call_rcu_sched(struct rcu_head *head,
+			   void (*func)(struct rcu_head *rcu));
 
 static inline void synchronize_rcu_expedited(void)
 {
@@ -91,6 +92,7 @@ extern void rcu_restart_cpu(int cpu);
 
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
+extern long rcu_batches_completed_sched(void);
 
 static inline void rcu_init_sched(void)
 {
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 510898a7bd69..7d777c9f394c 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -159,7 +159,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched
 	.dynticks = 1,
 };
 
-void rcu_qsctr_inc(int cpu)
+void rcu_sched_qs(int cpu)
 {
 	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
 
@@ -967,12 +967,12 @@ void rcu_check_callbacks(int cpu, int user)
 	 * If this CPU took its interrupt from user mode or from the
 	 * idle loop, and this is not a nested interrupt, then
 	 * this CPU has to have exited all prior preept-disable
-	 * sections of code.  So increment the counter to note this.
+	 * sections of code.  So invoke rcu_sched_qs() to note this.
 	 *
 	 * The memory barrier is needed to handle the case where
 	 * writes from a preempt-disable section of code get reordered
 	 * into schedule() by this CPU's write buffer.  So the memory
-	 * barrier makes sure that the rcu_qsctr_inc() is seen by other
+	 * barrier makes sure that the rcu_sched_qs() is seen by other
 	 * CPUs to happen after any such write.
 	 */
 
@@ -980,7 +980,7 @@ void rcu_check_callbacks(int cpu, int user)
 	    (idle_cpu(cpu) && !in_softirq() &&
 	     hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
 		smp_mb();	/* Guard against aggressive schedule(). */
-	     	rcu_qsctr_inc(cpu);
+		rcu_sched_qs(cpu);
 	}
 
 	rcu_check_mb(cpu);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index a162f859dd32..4d71d4e8b5a8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -74,26 +74,25 @@ EXPORT_SYMBOL_GPL(rcu_lock_map);
 	.n_force_qs_ngp = 0, \
 }
 
-struct rcu_state rcu_state = RCU_STATE_INITIALIZER(rcu_state);
-DEFINE_PER_CPU(struct rcu_data, rcu_data);
+struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 /*
- * Increment the quiescent state counter.
- * The counter is a bit degenerated: We do not need to know
+ * Note a quiescent state.  Because we do not need to know
  * how many quiescent states passed, just if there was at least
- * one since the start of the grace period. Thus just a flag.
+ * one since the start of the grace period, this just sets a flag.
  */
-void rcu_qsctr_inc(int cpu)
+void rcu_sched_qs(int cpu)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
 	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
 }
 
-void rcu_bh_qsctr_inc(int cpu)
+void rcu_bh_qs(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
 	rdp->passed_quiesc = 1;
@@ -113,12 +112,22 @@ static int qlowmark = 100;	/* Once only this many pending, use blimit. */
 
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
 
+/*
+ * Return the number of RCU-sched batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed_sched(void)
+{
+	return rcu_sched_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
+
 /*
  * Return the number of RCU batches processed thus far for debug & stats.
+ * @@@ placeholder, maps to rcu_batches_completed_sched().
  */
 long rcu_batches_completed(void)
 {
-	return rcu_state.completed;
+	return rcu_batches_completed_sched();
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed);
 
@@ -310,7 +319,7 @@ void rcu_irq_exit(void)
 	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
 
 	/* If the interrupt queued a callback, get out of dyntick mode. */
-	if (__get_cpu_var(rcu_data).nxtlist ||
+	if (__get_cpu_var(rcu_sched_data).nxtlist ||
 	    __get_cpu_var(rcu_bh_data).nxtlist)
 		set_need_resched();
 }
@@ -847,7 +856,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 	/*
 	 * Move callbacks from the outgoing CPU to the running CPU.
 	 * Note that the outgoing CPU is now quiscent, so it is now
-	 * (uncharacteristically) safe to access it rcu_data structure.
+	 * (uncharacteristically) safe to access its rcu_data structure.
 	 * Note also that we must carefully retain the order of the
 	 * outgoing CPU's callbacks in order for rcu_barrier() to work
 	 * correctly.  Finally, note that we start all the callbacks
@@ -878,7 +887,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
  */
 static void rcu_offline_cpu(int cpu)
 {
-	__rcu_offline_cpu(cpu, &rcu_state);
+	__rcu_offline_cpu(cpu, &rcu_sched_state);
 	__rcu_offline_cpu(cpu, &rcu_bh_state);
 }
 
@@ -973,17 +982,16 @@ void rcu_check_callbacks(int cpu, int user)
 		 * Get here if this CPU took its interrupt from user
 		 * mode or from the idle loop, and if this is not a
 		 * nested interrupt.  In this case, the CPU is in
-		 * a quiescent state, so count it.
+		 * a quiescent state, so note it.
 		 *
 		 * No memory barrier is required here because both
-		 * rcu_qsctr_inc() and rcu_bh_qsctr_inc() reference
-		 * only CPU-local variables that other CPUs neither
-		 * access nor modify, at least not while the corresponding
-		 * CPU is online.
+		 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
+		 * variables that other CPUs neither access nor modify,
+		 * at least not while the corresponding CPU is online.
 		 */
 
-		rcu_qsctr_inc(cpu);
-		rcu_bh_qsctr_inc(cpu);
+		rcu_sched_qs(cpu);
+		rcu_bh_qs(cpu);
 
 	} else if (!in_softirq()) {
 
@@ -991,10 +999,10 @@ void rcu_check_callbacks(int cpu, int user)
 		 * Get here if this CPU did not take its interrupt from
 		 * softirq, in other words, if it is not interrupting
 		 * a rcu_bh read-side critical section.  This is an _bh
-		 * critical section, so count it.
+		 * critical section, so note it.
 		 */
 
-		rcu_bh_qsctr_inc(cpu);
+		rcu_bh_qs(cpu);
 	}
 	raise_softirq(RCU_SOFTIRQ);
 }
@@ -1174,7 +1182,8 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	 */
 	smp_mb(); /* See above block comment. */
 
-	__rcu_process_callbacks(&rcu_state, &__get_cpu_var(rcu_data));
+	__rcu_process_callbacks(&rcu_sched_state,
+				&__get_cpu_var(rcu_sched_data));
 	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
 
 	/*
@@ -1231,14 +1240,25 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
 }
 
 /*
- * Queue an RCU callback for invocation after a grace period.
+ * Queue an RCU-sched callback for invocation after a grace period.
+ */
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_sched_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
+/*
+ * @@@ Queue an RCU callback for invocation after a grace period.
+ * @@@ Placeholder pending rcutree_plugin.h.
  */
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 {
-	__call_rcu(head, func, &rcu_state);
+	call_rcu_sched(head, func);
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+
 /*
  * Queue an RCU for invocation after a quicker grace period.
  */
@@ -1311,7 +1331,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
  */
 int rcu_pending(int cpu)
 {
-	return __rcu_pending(&rcu_state, &per_cpu(rcu_data, cpu)) ||
+	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
 	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
 }
 
@@ -1324,7 +1344,7 @@ int rcu_pending(int cpu)
 int rcu_needs_cpu(int cpu)
 {
 	/* RCU callbacks either ready or pending? */
-	return per_cpu(rcu_data, cpu).nxtlist ||
+	return per_cpu(rcu_sched_data, cpu).nxtlist ||
 	       per_cpu(rcu_bh_data, cpu).nxtlist;
 }
 
@@ -1418,7 +1438,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 
 static void __cpuinit rcu_online_cpu(int cpu)
 {
-	rcu_init_percpu_data(cpu, &rcu_state);
+	rcu_init_percpu_data(cpu, &rcu_sched_state);
 	rcu_init_percpu_data(cpu, &rcu_bh_state);
 }
 
@@ -1545,10 +1565,10 @@ void __init __rcu_init(void)
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
-	rcu_init_one(&rcu_state);
-	RCU_DATA_PTR_INIT(&rcu_state, rcu_data);
+	rcu_init_one(&rcu_sched_state);
+	RCU_DATA_PTR_INIT(&rcu_sched_state, rcu_sched_data);
 	for_each_possible_cpu(i)
-		rcu_boot_init_percpu_data(i, &rcu_state);
+		rcu_boot_init_percpu_data(i, &rcu_sched_state);
 	rcu_init_one(&rcu_bh_state);
 	RCU_DATA_PTR_INIT(&rcu_bh_state, rcu_bh_data);
 	for_each_possible_cpu(i)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7cc830a1c44a..0024e5ddcc68 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -238,8 +238,8 @@ struct rcu_state {
 /*
  * RCU implementation internal declarations:
  */
-extern struct rcu_state rcu_state;
-DECLARE_PER_CPU(struct rcu_data, rcu_data);
+extern struct rcu_state rcu_sched_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
 
 extern struct rcu_state rcu_bh_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 0cb52b887758..236c0504fee2 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -77,8 +77,8 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 
 static int show_rcudata(struct seq_file *m, void *unused)
 {
-	seq_puts(m, "rcu:\n");
-	PRINT_RCU_DATA(rcu_data, print_one_rcu_data, m);
+	seq_puts(m, "rcu_sched:\n");
+	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
 	seq_puts(m, "rcu_bh:\n");
 	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data, m);
 	return 0;
@@ -125,8 +125,8 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
 	seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
-	seq_puts(m, "\"rcu:\"\n");
-	PRINT_RCU_DATA(rcu_data, print_one_rcu_data_csv, m);
+	seq_puts(m, "\"rcu_sched:\"\n");
+	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
 	seq_puts(m, "\"rcu_bh:\"\n");
 	PRINT_RCU_DATA(rcu_bh_data, print_one_rcu_data_csv, m);
 	return 0;
@@ -172,8 +172,8 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcuhier(struct seq_file *m, void *unused)
 {
-	seq_puts(m, "rcu:\n");
-	print_one_rcu_state(m, &rcu_state);
+	seq_puts(m, "rcu_sched:\n");
+	print_one_rcu_state(m, &rcu_sched_state);
 	seq_puts(m, "rcu_bh:\n");
 	print_one_rcu_state(m, &rcu_bh_state);
 	return 0;
@@ -194,8 +194,8 @@ static struct file_operations rcuhier_fops = {
 
 static int show_rcugp(struct seq_file *m, void *unused)
 {
-	seq_printf(m, "rcu: completed=%ld  gpnum=%ld\n",
-		   rcu_state.completed, rcu_state.gpnum);
+	seq_printf(m, "rcu_sched: completed=%ld  gpnum=%ld\n",
+		   rcu_sched_state.completed, rcu_sched_state.gpnum);
 	seq_printf(m, "rcu_bh: completed=%ld  gpnum=%ld\n",
 		   rcu_bh_state.completed, rcu_bh_state.gpnum);
 	return 0;
@@ -244,8 +244,8 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcu_pending(struct seq_file *m, void *unused)
 {
-	seq_puts(m, "rcu:\n");
-	print_rcu_pendings(m, &rcu_state);
+	seq_puts(m, "rcu_sched:\n");
+	print_rcu_pendings(m, &rcu_sched_state);
 	seq_puts(m, "rcu_bh:\n");
 	print_rcu_pendings(m, &rcu_bh_state);
 	return 0;
diff --git a/kernel/sched.c b/kernel/sched.c
index cda8b81f8801..c9beca67a53e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5325,7 +5325,7 @@ need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
-	rcu_qsctr_inc(cpu);
+	rcu_sched_qs(cpu);
 	prev = rq->curr;
 	switch_count = &prev->nivcsw;
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index eb5e131a0485..7db25067cd2d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -227,7 +227,7 @@ restart:
 				preempt_count() = prev_count;
 			}
 
-			rcu_bh_qsctr_inc(cpu);
+			rcu_bh_qs(cpu);
 		}
 		h++;
 		pending >>= 1;
@@ -721,7 +721,7 @@ static int ksoftirqd(void * __bind_cpu)
 			preempt_enable_no_resched();
 			cond_resched();
 			preempt_disable();
-			rcu_qsctr_inc((long)__bind_cpu);
+			rcu_sched_qs((long)__bind_cpu);
 		}
 		preempt_enable();
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit v1.2.3


From bc33f24bdca8b6e97376e3a182ab69e6cdefa989 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:47 -0700
Subject: rcu: Consolidate sparse and lockdep declarations in
 include/linux/rcupdate.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <12509746132349-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h   | 46 ++++++++++++++++++++++++++++++++++++++++++----
 include/linux/rcupreempt.h |  4 ++--
 include/linux/rcutree.h    | 18 ------------------
 3 files changed, 44 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e920f0fd59d8..9d85ee19492a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -80,6 +80,16 @@ extern int rcu_scheduler_active;
        (ptr)->next = NULL; (ptr)->func = NULL; \
 } while (0)
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+extern struct lockdep_map rcu_lock_map;
+# define rcu_read_acquire()	\
+			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
+# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
+#else
+# define rcu_read_acquire()	do { } while (0)
+# define rcu_read_release()	do { } while (0)
+#endif
+
 /**
  * rcu_read_lock - mark the beginning of an RCU read-side critical section.
  *
@@ -109,7 +119,12 @@ extern int rcu_scheduler_active;
  *
  * It is illegal to block while in an RCU read-side critical section.
  */
-#define rcu_read_lock() __rcu_read_lock()
+static inline void rcu_read_lock(void)
+{
+	__rcu_read_lock();
+	__acquire(RCU);
+	rcu_read_acquire();
+}
 
 /**
  * rcu_read_unlock - marks the end of an RCU read-side critical section.
@@ -126,7 +141,12 @@ extern int rcu_scheduler_active;
  * used as well.  RCU does not care how the writers keep out of each
  * others' way, as long as they do so.
  */
-#define rcu_read_unlock() __rcu_read_unlock()
+static inline void rcu_read_unlock(void)
+{
+	rcu_read_release();
+	__release(RCU);
+	__rcu_read_unlock();
+}
 
 /**
  * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
@@ -139,14 +159,24 @@ extern int rcu_scheduler_active;
  * can use just rcu_read_lock().
  *
  */
-#define rcu_read_lock_bh() __rcu_read_lock_bh()
+static inline void rcu_read_lock_bh(void)
+{
+	__rcu_read_lock_bh();
+	__acquire(RCU_BH);
+	rcu_read_acquire();
+}
 
 /*
  * rcu_read_unlock_bh - marks the end of a softirq-only RCU critical section
  *
  * See rcu_read_lock_bh() for more information.
  */
-#define rcu_read_unlock_bh() __rcu_read_unlock_bh()
+static inline void rcu_read_unlock_bh(void)
+{
+	rcu_read_release();
+	__release(RCU_BH);
+	__rcu_read_unlock_bh();
+}
 
 /**
  * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
@@ -160,10 +190,14 @@ extern int rcu_scheduler_active;
 static inline void rcu_read_lock_sched(void)
 {
 	preempt_disable();
+	__acquire(RCU_SCHED);
+	rcu_read_acquire();
 }
 static inline void rcu_read_lock_sched_notrace(void)
 {
 	preempt_disable_notrace();
+	__acquire(RCU_SCHED);
+	rcu_read_acquire();
 }
 
 /*
@@ -173,10 +207,14 @@ static inline void rcu_read_lock_sched_notrace(void)
  */
 static inline void rcu_read_unlock_sched(void)
 {
+	rcu_read_release();
+	__release(RCU_SCHED);
 	preempt_enable();
 }
 static inline void rcu_read_unlock_sched_notrace(void)
 {
+	rcu_read_release();
+	__release(RCU_SCHED);
 	preempt_enable_notrace();
 }
 
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 2963f080e48d..6c9dd9cf8b8e 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -64,8 +64,8 @@ static inline void rcu_bh_qs(int cpu) { }
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *head));
 
-extern void __rcu_read_lock(void)	__acquires(RCU);
-extern void __rcu_read_unlock(void)	__releases(RCU);
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index a0852d0d915b..8a0222ce3b13 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -36,38 +36,20 @@ extern void rcu_bh_qs(int cpu);
 extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern struct lockdep_map rcu_lock_map;
-# define rcu_read_acquire()	\
-			lock_acquire(&rcu_lock_map, 0, 0, 2, 1, NULL, _THIS_IP_)
-# define rcu_read_release()	lock_release(&rcu_lock_map, 1, _THIS_IP_)
-#else
-# define rcu_read_acquire()	do { } while (0)
-# define rcu_read_release()	do { } while (0)
-#endif
-
 static inline void __rcu_read_lock(void)
 {
 	preempt_disable();
-	__acquire(RCU);
-	rcu_read_acquire();
 }
 static inline void __rcu_read_unlock(void)
 {
-	rcu_read_release();
-	__release(RCU);
 	preempt_enable();
 }
 static inline void __rcu_read_lock_bh(void)
 {
 	local_bh_disable();
-	__acquire(RCU_BH);
-	rcu_read_acquire();
 }
 static inline void __rcu_read_unlock_bh(void)
 {
-	rcu_read_release();
-	__release(RCU_BH);
 	local_bh_enable();
 }
 
-- 
cgit v1.2.3


From a157229cabd6dd8cfa82525fc9bf730c94cc9ac2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:51 -0700
Subject: rcu: Simplify rcu_pending()/rcu_check_callbacks() API

All calls from outside RCU are of the form:

	if (rcu_pending(cpu))
		rcu_check_callbacks(cpu, user);

This is silly, instead we put a call to rcu_pending() in
rcu_check_callbacks(), and then make the outside calls be to
rcu_check_callbacks().  This cuts down on the code a bit and
also gives the compiler a better chance of optimizing.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <125097461311-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/xen/time.c       |  3 +--
 include/linux/rcupreempt.h |  1 -
 include/linux/rcutree.h    |  1 -
 kernel/rcupreempt.c        | 10 ++++++++--
 kernel/rcutree.c           |  5 ++++-
 kernel/timer.c             |  3 +--
 6 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c
index fb8332690179..dbeadb9c8e20 100644
--- a/arch/ia64/xen/time.c
+++ b/arch/ia64/xen/time.c
@@ -133,8 +133,7 @@ consider_steal_time(unsigned long new_itm)
 		account_idle_ticks(blocked);
 		run_local_timers();
 
-		if (rcu_pending(cpu))
-			rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
+		rcu_check_callbacks(cpu, user_mode(get_irq_regs()));
 
 		scheduler_tick();
 		run_posix_cpu_timers(p);
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 6c9dd9cf8b8e..aff4772fb49e 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -66,7 +66,6 @@ extern void call_rcu_sched(struct rcu_head *head,
 
 extern void __rcu_read_lock(void);
 extern void __rcu_read_unlock(void);
-extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
 #define __rcu_read_lock_bh()	{ rcu_read_lock(); local_bh_disable(); }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 8a0222ce3b13..c739d90f5e68 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -33,7 +33,6 @@
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 
-extern int rcu_pending(int cpu);
 extern int rcu_needs_cpu(int cpu);
 
 static inline void __rcu_read_lock(void)
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 7d777c9f394c..0053ce56e326 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -159,6 +159,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched
 	.dynticks = 1,
 };
 
+static int rcu_pending(int cpu);
+
 void rcu_sched_qs(int cpu)
 {
 	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
@@ -961,7 +963,10 @@ static void rcu_check_mb(int cpu)
 void rcu_check_callbacks(int cpu, int user)
 {
 	unsigned long flags;
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+	struct rcu_data *rdp;
+
+	if (!rcu_pending(cpu))
+		return; /* if nothing for RCU to do. */
 
 	/*
 	 * If this CPU took its interrupt from user mode or from the
@@ -976,6 +981,7 @@ void rcu_check_callbacks(int cpu, int user)
 	 * CPUs to happen after any such write.
 	 */
 
+	rdp = RCU_DATA_CPU(cpu);
 	if (user ||
 	    (idle_cpu(cpu) && !in_softirq() &&
 	     hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1382,7 +1388,7 @@ int rcu_needs_cpu(int cpu)
 		rdp->waitschedlist != NULL);
 }
 
-int rcu_pending(int cpu)
+static int rcu_pending(int cpu)
 {
 	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7c515082ae84..4ce3adcfa94d 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -111,6 +111,7 @@ static int qhimark = 10000;	/* If this many pending, ignore blimit. */
 static int qlowmark = 100;	/* Once only this many pending, use blimit. */
 
 static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
+static int rcu_pending(int cpu);
 
 /*
  * Return the number of RCU-sched batches processed thus far for debug & stats.
@@ -974,6 +975,8 @@ static void rcu_do_batch(struct rcu_data *rdp)
  */
 void rcu_check_callbacks(int cpu, int user)
 {
+	if (!rcu_pending(cpu))
+		return; /* if nothing for RCU to do. */
 	if (user ||
 	    (idle_cpu(cpu) && rcu_scheduler_active &&
 	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1329,7 +1332,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
  * by the current CPU, returning 1 if so.  This function is part of the
  * RCU implementation; it is -not- an exported member of the RCU API.
  */
-int rcu_pending(int cpu)
+static int rcu_pending(int cpu)
 {
 	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
 	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
diff --git a/kernel/timer.c b/kernel/timer.c
index a7f07d5a6241..a3d25f415019 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1156,8 +1156,7 @@ void update_process_times(int user_tick)
 	/* Note: this timer irq context must be accounted for as well. */
 	account_process_tick(p, user_tick);
 	run_local_timers();
-	if (rcu_pending(cpu))
-		rcu_check_callbacks(cpu, user_tick);
+	rcu_check_callbacks(cpu, user_tick);
 	printk_tick();
 	scheduler_tick();
 	run_posix_cpu_timers(p);
-- 
cgit v1.2.3


From f41d911f8c49a5d65c86504c19e8204bb605c4fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:52 -0700
Subject: rcu: Merge preemptable-RCU functionality into hierarchical RCU

Create a kernel/rcutree_plugin.h file that contains definitions
for preemptable RCU (or, under the #else branch of the #ifdef,
empty definitions for the classic non-preemptable semantics).
These definitions fit into plugins defined in kernel/rcutree.c
for this purpose.

This variant of preemptable RCU uses a new algorithm whose
read-side expense is roughly that of classic hierarchical RCU
under CONFIG_PREEMPT. This new algorithm's update-side expense
is similar to that of classic hierarchical RCU, and, in absence
of read-side preemption or blocking, is exactly that of classic
hierarchical RCU.  Perhaps more important, this new algorithm
has a much simpler implementation, saving well over 1,000 lines
of code compared to mainline's implementation of preemptable
RCU, which will hopefully be retired in favor of this new
algorithm.

The simplifications are obtained by maintaining per-task
nesting state for running tasks, and using a simple
lock-protected algorithm to handle accounting when tasks block
within RCU read-side critical sections, making use of lessons
learned while creating numerous user-level RCU implementations
over the past 18 months.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <12509746134003-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h  |  15 ++
 include/linux/rcupdate.h   |   2 +-
 include/linux/rcupreempt.h |   4 +
 include/linux/rcutree.h    |  16 ++
 include/linux/sched.h      |  37 ++++
 init/Kconfig               |  22 ++-
 kernel/Makefile            |   1 +
 kernel/exit.c              |   1 +
 kernel/fork.c              |   5 +-
 kernel/rcutree.c           | 135 +++++++++-----
 kernel/rcutree.h           |   9 +
 kernel/rcutree_plugin.h    | 447 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/rcutree_trace.c     |  20 ++
 lib/Kconfig.debug          |   2 +-
 14 files changed, 661 insertions(+), 55 deletions(-)
 create mode 100644 kernel/rcutree_plugin.h

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 7fc01b13be43..971a968831bf 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -94,6 +94,20 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
+#ifdef CONFIG_PREEMPT_RCU
+#define INIT_TASK_RCU_PREEMPT(tsk)					\
+	.rcu_read_lock_nesting = 0,					\
+	.rcu_flipctr_idx = 0,
+#elif defined(CONFIG_TREE_PREEMPT_RCU)
+#define INIT_TASK_RCU_PREEMPT(tsk)					\
+	.rcu_read_lock_nesting = 0,					\
+	.rcu_read_unlock_special = 0,					\
+	.rcu_blocked_cpu = -1,						\
+	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
+#else
+#define INIT_TASK_RCU_PREEMPT(tsk)
+#endif
+
 extern struct cred init_cred;
 
 #ifdef CONFIG_PERF_COUNTERS
@@ -173,6 +187,7 @@ extern struct cred init_cred;
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
 	INIT_TRACE_RECURSION						\
+	INIT_TASK_RCU_PREEMPT(tsk)					\
 }
 
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9d85ee19492a..26892f5e7bd8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -66,7 +66,7 @@ extern void rcu_scheduler_starting(void);
 extern int rcu_needs_cpu(int cpu);
 extern int rcu_scheduler_active;
 
-#if defined(CONFIG_TREE_RCU)
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
 #elif defined(CONFIG_PREEMPT_RCU)
 #include <linux/rcupreempt.h>
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index aff4772fb49e..a42ab88e9210 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -98,6 +98,10 @@ static inline long rcu_batches_completed_bh(void)
 	return rcu_batches_completed();
 }
 
+static inline void exit_rcu(void)
+{
+}
+
 #ifdef CONFIG_RCU_TRACE
 struct rcupreempt_trace;
 extern long *rcupreempt_flipctr(int cpu);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c739d90f5e68..a89307717825 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,14 +35,30 @@ extern void rcu_bh_qs(int cpu);
 
 extern int rcu_needs_cpu(int cpu);
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+extern void exit_rcu(void);
+
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
 static inline void __rcu_read_lock(void)
 {
 	preempt_disable();
 }
+
 static inline void __rcu_read_unlock(void)
 {
 	preempt_enable();
 }
+
+static inline void exit_rcu(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
 static inline void __rcu_read_lock_bh(void)
 {
 	local_bh_disable();
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3ab08e4bb6b8..d7f98f637a2a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1210,6 +1210,13 @@ struct task_struct {
 	int rcu_flipctr_idx;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	int rcu_read_lock_nesting;
+	char rcu_read_unlock_special;
+	int rcu_blocked_cpu;
+	struct list_head rcu_node_entry;
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
 #endif
@@ -1723,6 +1730,36 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_GOT_QS  (1 << 2) /* CPU has responded to RCU core. */
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_read_unlock_special = 0;
+	p->rcu_blocked_cpu = -1;
+	INIT_LIST_HEAD(&p->rcu_node_entry);
+}
+
+#elif defined(CONFIG_PREEMPT_RCU)
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+	p->rcu_read_lock_nesting = 0;
+	p->rcu_flipctr_idx = 0;
+}
+
+#else
+
+static inline void rcu_copy_process(struct task_struct *p)
+{
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 extern int set_cpus_allowed_ptr(struct task_struct *p,
 				const struct cpumask *new_mask);
diff --git a/init/Kconfig b/init/Kconfig
index 25373cf32672..f88da2d1c1fb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -335,11 +335,20 @@ config PREEMPT_RCU
 	  now-naive assumptions about each RCU read-side critical section
 	  remaining on a given CPU through its execution.
 
+config TREE_PREEMPT_RCU
+	bool "Preemptable tree-based hierarchical RCU"
+	depends on PREEMPT
+	help
+	  This option selects the RCU implementation that is
+	  designed for very large SMP systems with hundreds or
+	  thousands of CPUs, but for which real-time response
+	  is also required.
+
 endchoice
 
 config RCU_TRACE
 	bool "Enable tracing for RCU"
-	depends on TREE_RCU || PREEMPT_RCU
+	depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
@@ -351,7 +360,7 @@ config RCU_FANOUT
 	int "Tree-based hierarchical RCU fanout value"
 	range 2 64 if 64BIT
 	range 2 32 if !64BIT
-	depends on TREE_RCU
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	default 64 if 64BIT
 	default 32 if !64BIT
 	help
@@ -366,7 +375,7 @@ config RCU_FANOUT
 
 config RCU_FANOUT_EXACT
 	bool "Disable tree-based hierarchical RCU auto-balancing"
-	depends on TREE_RCU
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	default n
 	help
 	  This option forces use of the exact RCU_FANOUT value specified,
@@ -379,11 +388,12 @@ config RCU_FANOUT_EXACT
 	  Say N if unsure.
 
 config TREE_RCU_TRACE
-	def_bool RCU_TRACE && TREE_RCU
+	def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
 	select DEBUG_FS
 	help
-	  This option provides tracing for the TREE_RCU implementation,
-	  permitting Makefile to trivially select kernel/rcutree_trace.c.
+	  This option provides tracing for the TREE_RCU and
+	  TREE_PREEMPT_RCU implementations, permitting Makefile to
+	  trivially select kernel/rcutree_trace.c.
 
 config PREEMPT_RCU_TRACE
 	def_bool RCU_TRACE && PREEMPT_RCU
diff --git a/kernel/Makefile b/kernel/Makefile
index 2419c9d43918..1a38b4789dda 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
+obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
 obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
 obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..263f95ed7201 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1010,6 +1010,7 @@ NORET_TYPE void do_exit(long code)
 		__free_pipe_info(tsk->splice_pipe);
 
 	preempt_disable();
+	exit_rcu();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
 	schedule();
diff --git a/kernel/fork.c b/kernel/fork.c
index 021e1138556e..642e8b5edf00 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1022,10 +1022,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	copy_flags(clone_flags, p);
 	INIT_LIST_HEAD(&p->children);
 	INIT_LIST_HEAD(&p->sibling);
-#ifdef CONFIG_PREEMPT_RCU
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_flipctr_idx = 0;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
+	rcu_copy_process(p);
 	p->vfork_done = NULL;
 	spin_lock_init(&p->alloc_lock);
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4ce3adcfa94d..cc0255714075 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -80,6 +80,21 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+extern long rcu_batches_completed_sched(void);
+static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
+			  struct rcu_node *rnp, unsigned long flags);
+static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
+static void __rcu_process_callbacks(struct rcu_state *rsp,
+				    struct rcu_data *rdp);
+static void __call_rcu(struct rcu_head *head,
+		       void (*func)(struct rcu_head *rcu),
+		       struct rcu_state *rsp);
+static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp);
+static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
+					   int preemptable);
+
+#include "rcutree_plugin.h"
+
 /*
  * Note a quiescent state.  Because we do not need to know
  * how many quiescent states passed, just if there was at least
@@ -87,16 +102,27 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
  */
 void rcu_sched_qs(int cpu)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	local_irq_save(flags);
+	rdp = &per_cpu(rcu_sched_data, cpu);
 	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
+	rcu_preempt_qs(cpu);
+	local_irq_restore(flags);
 }
 
 void rcu_bh_qs(int cpu)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+	unsigned long flags;
+	struct rcu_data *rdp;
+
+	local_irq_save(flags);
+	rdp = &per_cpu(rcu_bh_data, cpu);
 	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
+	local_irq_restore(flags);
 }
 
 #ifdef CONFIG_NO_HZ
@@ -122,16 +148,6 @@ long rcu_batches_completed_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_batches_completed_sched);
 
-/*
- * Return the number of RCU batches processed thus far for debug & stats.
- * @@@ placeholder, maps to rcu_batches_completed_sched().
- */
-long rcu_batches_completed(void)
-{
-	return rcu_batches_completed_sched();
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed);
-
 /*
  * Return the number of RCU BH batches processed thus far for debug & stats.
  */
@@ -193,6 +209,10 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 		return 1;
 	}
 
+	/* If preemptable RCU, no point in sending reschedule IPI. */
+	if (rdp->preemptable)
+		return 0;
+
 	/* The CPU is online, so send it a reschedule IPI. */
 	if (rdp->cpu != smp_processor_id())
 		smp_send_reschedule(rdp->cpu);
@@ -473,6 +493,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
 
 	printk(KERN_ERR "INFO: RCU detected CPU stalls:");
 	for (; rnp_cur < rnp_end; rnp_cur++) {
+		rcu_print_task_stall(rnp);
 		if (rnp_cur->qsmask == 0)
 			continue;
 		for (cpu = 0; cpu <= rnp_cur->grphi - rnp_cur->grplo; cpu++)
@@ -685,6 +706,19 @@ rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
 	local_irq_restore(flags);
 }
 
+/*
+ * Clean up after the prior grace period and let rcu_start_gp() start up
+ * the next grace period if one is needed.  Note that the caller must
+ * hold rnp->lock, as required by rcu_start_gp(), which will release it.
+ */
+static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
+	__releases(rnp->lock)
+{
+	rsp->completed = rsp->gpnum;
+	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
+	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */
+}
+
 /*
  * Similar to cpu_quiet(), for which it is a helper function.  Allows
  * a group of CPUs to be quieted at one go, though all the CPUs in the
@@ -706,7 +740,7 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
 			return;
 		}
 		rnp->qsmask &= ~mask;
-		if (rnp->qsmask != 0) {
+		if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
 
 			/* Other bits still set at this level, so done. */
 			spin_unlock_irqrestore(&rnp->lock, flags);
@@ -726,14 +760,10 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
 
 	/*
 	 * Get here if we are the last CPU to pass through a quiescent
-	 * state for this grace period.  Clean up and let rcu_start_gp()
-	 * start up the next grace period if one is needed.  Note that
-	 * we still hold rnp->lock, as required by rcu_start_gp(), which
-	 * will release it.
+	 * state for this grace period.  Invoke cpu_quiet_msk_finish()
+	 * to clean up and start the next grace period if one is needed.
 	 */
-	rsp->completed = rsp->gpnum;
-	rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
-	rcu_start_gp(rsp, flags);  /* releases rnp->lock. */
+	cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */
 }
 
 /*
@@ -840,11 +870,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 		spin_lock(&rnp->lock);		/* irqs already disabled. */
 		rnp->qsmaskinit &= ~mask;
 		if (rnp->qsmaskinit != 0) {
-			spin_unlock(&rnp->lock); /* irqs already disabled. */
+			spin_unlock(&rnp->lock); /* irqs remain disabled. */
 			break;
 		}
 		mask = rnp->grpmask;
-		spin_unlock(&rnp->lock);	/* irqs already disabled. */
+		spin_unlock(&rnp->lock);	/* irqs remain disabled. */
 		rnp = rnp->parent;
 	} while (rnp != NULL);
 	lastcomp = rsp->completed;
@@ -1007,6 +1037,7 @@ void rcu_check_callbacks(int cpu, int user)
 
 		rcu_bh_qs(cpu);
 	}
+	rcu_preempt_check_callbacks(cpu);
 	raise_softirq(RCU_SOFTIRQ);
 }
 
@@ -1188,6 +1219,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 	__rcu_process_callbacks(&rcu_sched_state,
 				&__get_cpu_var(rcu_sched_data));
 	__rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
+	rcu_preempt_process_callbacks();
 
 	/*
 	 * Memory references from any later RCU read-side critical sections
@@ -1251,17 +1283,6 @@ void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * @@@ Queue an RCU callback for invocation after a grace period.
- * @@@ Placeholder pending rcutree_plugin.h.
- */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-	call_rcu_sched(head, func);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
-
 /*
  * Queue an RCU for invocation after a quicker grace period.
  */
@@ -1335,7 +1356,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
 static int rcu_pending(int cpu)
 {
 	return __rcu_pending(&rcu_sched_state, &per_cpu(rcu_sched_data, cpu)) ||
-	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu));
+	       __rcu_pending(&rcu_bh_state, &per_cpu(rcu_bh_data, cpu)) ||
+	       rcu_preempt_pending(cpu);
 }
 
 /*
@@ -1348,7 +1370,8 @@ int rcu_needs_cpu(int cpu)
 {
 	/* RCU callbacks either ready or pending? */
 	return per_cpu(rcu_sched_data, cpu).nxtlist ||
-	       per_cpu(rcu_bh_data, cpu).nxtlist;
+	       per_cpu(rcu_bh_data, cpu).nxtlist ||
+	       rcu_preempt_needs_cpu(cpu);
 }
 
 /*
@@ -1383,7 +1406,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
  * that this CPU cannot possibly have any RCU callbacks in flight yet.
  */
 static void __cpuinit
-rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
+rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
 {
 	unsigned long flags;
 	long lastcomp;
@@ -1399,6 +1422,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 	rdp->passed_quiesc = 0;  /* We could be racing with new GP, */
 	rdp->qs_pending = 1;	 /*  so set up to respond to current GP. */
 	rdp->beenonline = 1;	 /* We have now been online. */
+	rdp->preemptable = preemptable;
 	rdp->passed_quiesc_completed = lastcomp - 1;
 	rdp->blimit = blimit;
 	spin_unlock(&rnp->lock);		/* irqs remain disabled. */
@@ -1441,12 +1465,13 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
 
 static void __cpuinit rcu_online_cpu(int cpu)
 {
-	rcu_init_percpu_data(cpu, &rcu_sched_state);
-	rcu_init_percpu_data(cpu, &rcu_bh_state);
+	rcu_init_percpu_data(cpu, &rcu_sched_state, 0);
+	rcu_init_percpu_data(cpu, &rcu_bh_state, 0);
+	rcu_preempt_init_percpu_data(cpu);
 }
 
 /*
- * Handle CPU online/offline notifcation events.
+ * Handle CPU online/offline notification events.
  */
 int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 			     unsigned long action, void *hcpu)
@@ -1521,6 +1546,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 		rnp = rsp->level[i];
 		for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
 			spin_lock_init(&rnp->lock);
+			rnp->gpnum = 0;
 			rnp->qsmask = 0;
 			rnp->qsmaskinit = 0;
 			rnp->grplo = j * cpustride;
@@ -1538,13 +1564,16 @@ static void __init rcu_init_one(struct rcu_state *rsp)
 					      j / rsp->levelspread[i - 1];
 			}
 			rnp->level = i;
+			INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
+			INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
 		}
 	}
 }
 
 /*
- * Helper macro for __rcu_init().  To be used nowhere else!
- * Assigns leaf node pointers into each CPU's rcu_data structure.
+ * Helper macro for __rcu_init() and __rcu_init_preempt().  To be used
+ * nowhere else!  Assigns leaf node pointers into each CPU's rcu_data
+ * structure.
  */
 #define RCU_INIT_FLAVOR(rsp, rcu_data) \
 do { \
@@ -1560,18 +1589,38 @@ do { \
 	} \
 } while (0)
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+void __init __rcu_init_preempt(void)
+{
+	int i;			/* All used by RCU_INIT_FLAVOR(). */
+	int j;
+	struct rcu_node *rnp;
+
+	RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
+}
+
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+void __init __rcu_init_preempt(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+
 void __init __rcu_init(void)
 {
-	int i;			/* All used by RCU_DATA_PTR_INIT(). */
+	int i;			/* All used by RCU_INIT_FLAVOR(). */
 	int j;
 	struct rcu_node *rnp;
 
-	printk(KERN_INFO "Hierarchical RCU implementation.\n");
+	rcu_bootup_announce();
 #ifdef CONFIG_RCU_CPU_STALL_DETECTOR
 	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
 #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 	RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
 	RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
+	__rcu_init_preempt();
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
 
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 0024e5ddcc68..ca560364d8cd 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -80,6 +80,7 @@ struct rcu_dynticks {
  */
 struct rcu_node {
 	spinlock_t lock;
+	long	gpnum;		/* Current grace period for this node. */
 	unsigned long qsmask;	/* CPUs or groups that need to switch in */
 				/*  order for current grace period to proceed.*/
 	unsigned long qsmaskinit;
@@ -90,6 +91,8 @@ struct rcu_node {
 	u8	grpnum;		/* CPU/group number for next level up. */
 	u8	level;		/* root is at level 0. */
 	struct rcu_node *parent;
+	struct list_head blocked_tasks[2];
+				/* Tasks blocked in RCU read-side critsect. */
 } ____cacheline_internodealigned_in_smp;
 
 /* Index values for nxttail array in struct rcu_data. */
@@ -111,6 +114,7 @@ struct rcu_data {
 	bool		passed_quiesc;	/* User-mode/idle loop etc. */
 	bool		qs_pending;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
+	bool		preemptable;	/* Preemptable RCU? */
 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
 
@@ -244,5 +248,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
 extern struct rcu_state rcu_bh_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+extern struct rcu_state rcu_preempt_state;
+DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
 #endif /* #ifdef RCU_TREE_NONCORE */
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
new file mode 100644
index 000000000000..cd2ab67400c6
--- /dev/null
+++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,447 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptable semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright Red Hat, 2009
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ *	   Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
+struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
+
+/*
+ * Tell them what RCU they are running.
+ */
+static inline void rcu_bootup_announce(void)
+{
+	printk(KERN_INFO
+	       "Experimental preemptable hierarchical RCU implementation.\n");
+}
+
+/*
+ * Return the number of RCU-preempt batches processed thus far
+ * for debug and statistics.
+ */
+long rcu_batches_completed_preempt(void)
+{
+	return rcu_preempt_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_batches_completed_preempt();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Record a preemptable-RCU quiescent state for the specified CPU.  Note
+ * that this just means that the task currently running on the CPU is
+ * not in a quiescent state.  There might be any number of tasks blocked
+ * while in an RCU read-side critical section.
+ */
+static void rcu_preempt_qs_record(int cpu)
+{
+	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+	rdp->passed_quiesc = 1;
+	rdp->passed_quiesc_completed = rdp->completed;
+}
+
+/*
+ * We have entered the scheduler or are between softirqs in ksoftirqd.
+ * If we are in an RCU read-side critical section, we need to reflect
+ * that in the state of the rcu_node structure corresponding to this CPU.
+ * Caller must disable hardirqs.
+ */
+static void rcu_preempt_qs(int cpu)
+{
+	struct task_struct *t = current;
+	int phase;
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+
+	if (t->rcu_read_lock_nesting &&
+	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+
+		/* Possibly blocking in an RCU read-side critical section. */
+		rdp = rcu_preempt_state.rda[cpu];
+		rnp = rdp->mynode;
+		spin_lock(&rnp->lock);
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+		t->rcu_blocked_cpu = cpu;
+
+		/*
+		 * If this CPU has already checked in, then this task
+		 * will hold up the next grace period rather than the
+		 * current grace period.  Queue the task accordingly.
+		 * If the task is queued for the current grace period
+		 * (i.e., this CPU has not yet passed through a quiescent
+		 * state for the current grace period), then as long
+		 * as that task remains queued, the current grace period
+		 * cannot end.
+		 */
+		phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
+		list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
+		smp_mb();  /* Ensure later ctxt swtch seen after above. */
+		spin_unlock(&rnp->lock);
+	}
+
+	/*
+	 * Either we were not in an RCU read-side critical section to
+	 * begin with, or we have now recorded that critical section
+	 * globally.  Either way, we can now note a quiescent state
+	 * for this CPU.  Again, if we were in an RCU read-side critical
+	 * section, and if that critical section was blocking the current
+	 * grace period, then the fact that the task has been enqueued
+	 * means that we continue to block the current grace period.
+	 */
+	rcu_preempt_qs_record(cpu);
+	t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
+					RCU_READ_UNLOCK_GOT_QS);
+}
+
+/*
+ * Tree-preemptable RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+	ACCESS_ONCE(current->rcu_read_lock_nesting)++;
+	barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+	int empty;
+	unsigned long flags;
+	unsigned long mask;
+	struct rcu_node *rnp;
+	int special;
+
+	/* NMI handlers cannot block and cannot safely manipulate state. */
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+
+	/*
+	 * If RCU core is waiting for this CPU to exit critical section,
+	 * let it know that we have done so.
+	 */
+	special = t->rcu_read_unlock_special;
+	if (special & RCU_READ_UNLOCK_NEED_QS) {
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
+	}
+
+	/* Hardware IRQ handlers cannot block. */
+	if (in_irq()) {
+		local_irq_restore(flags);
+		return;
+	}
+
+	/* Clean up if blocked during RCU read-side critical section. */
+	if (special & RCU_READ_UNLOCK_BLOCKED) {
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+
+		/* Remove this task from the list it blocked on. */
+		rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
+		spin_lock(&rnp->lock);
+		empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+		list_del_init(&t->rcu_node_entry);
+		t->rcu_blocked_cpu = -1;
+
+		/*
+		 * If this was the last task on the current list, and if
+		 * we aren't waiting on any CPUs, report the quiescent state.
+		 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
+		 * drop rnp->lock and restore irq.
+		 */
+		if (!empty && rnp->qsmask == 0 &&
+		    list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
+			t->rcu_read_unlock_special &=
+				~(RCU_READ_UNLOCK_NEED_QS |
+				  RCU_READ_UNLOCK_GOT_QS);
+			if (rnp->parent == NULL) {
+				/* Only one rcu_node in the tree. */
+				cpu_quiet_msk_finish(&rcu_preempt_state, flags);
+				return;
+			}
+			/* Report up the rest of the hierarchy. */
+			mask = rnp->grpmask;
+			spin_unlock_irqrestore(&rnp->lock, flags);
+			rnp = rnp->parent;
+			spin_lock_irqsave(&rnp->lock, flags);
+			cpu_quiet_msk(mask, &rcu_preempt_state, rnp, flags);
+			return;
+		}
+		spin_unlock(&rnp->lock);
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Tree-preemptable RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+	struct task_struct *t = current;
+
+	barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
+	if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
+	    unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+		rcu_read_unlock_special(t);
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each.
+ */
+static void rcu_print_task_stall(struct rcu_node *rnp)
+{
+	unsigned long flags;
+	struct list_head *lp;
+	int phase = rnp->gpnum & 0x1;
+	struct task_struct *t;
+
+	if (!list_empty(&rnp->blocked_tasks[phase])) {
+		spin_lock_irqsave(&rnp->lock, flags);
+		phase = rnp->gpnum & 0x1; /* re-read under lock. */
+		lp = &rnp->blocked_tasks[phase];
+		list_for_each_entry(t, lp, rcu_node_entry)
+			printk(" P%d", t->pid);
+		spin_unlock_irqrestore(&rnp->lock, flags);
+	}
+}
+
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * Check for preempted RCU readers for the specified rcu_node structure.
+ * If the caller needs a reliable answer, it must hold the rcu_node's
+ * >lock.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+	return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+}
+
+/*
+ * Check for a quiescent state from the current CPU.  When a task blocks,
+ * the task is recorded in the corresponding CPU's rcu_node structure,
+ * which is checked elsewhere.
+ *
+ * Caller must disable hard irqs.
+ */
+static void rcu_preempt_check_callbacks(int cpu)
+{
+	struct task_struct *t = current;
+
+	if (t->rcu_read_lock_nesting == 0) {
+		t->rcu_read_unlock_special &=
+			~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
+		rcu_preempt_qs_record(cpu);
+		return;
+	}
+	if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
+		if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
+			rcu_preempt_qs_record(cpu);
+			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
+		} else if (!(t->rcu_read_unlock_special &
+			     RCU_READ_UNLOCK_NEED_QS)) {
+			t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+		}
+	}
+}
+
+/*
+ * Process callbacks for preemptable RCU.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+	__rcu_process_callbacks(&rcu_preempt_state,
+				&__get_cpu_var(rcu_preempt_data));
+}
+
+/*
+ * Queue a preemptable-RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	__call_rcu(head, func, &rcu_preempt_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Check to see if there is any immediate preemptable-RCU-related work
+ * to be done.
+ */
+static int rcu_preempt_pending(int cpu)
+{
+	return __rcu_pending(&rcu_preempt_state,
+			     &per_cpu(rcu_preempt_data, cpu));
+}
+
+/*
+ * Does preemptable RCU need the CPU to stay out of dynticks mode?
+ */
+static int rcu_preempt_needs_cpu(int cpu)
+{
+	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
+}
+
+/*
+ * Initialize preemptable RCU's per-CPU data.
+ */
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
+{
+	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
+}
+
+/*
+ * Check for a task exiting while in a preemptable-RCU read-side
+ * critical section, clean up if so.  No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+	struct task_struct *t = current;
+
+	if (t->rcu_read_lock_nesting == 0)
+		return;
+	t->rcu_read_lock_nesting = 1;
+	rcu_read_unlock();
+}
+
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+/*
+ * Tell them what RCU they are running.
+ */
+static inline void rcu_bootup_announce(void)
+{
+	printk(KERN_INFO "Hierarchical RCU implementation.\n");
+}
+
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+	return rcu_batches_completed_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+
+/*
+ * Because preemptable RCU does not exist, we never have to check for
+ * CPUs being in quiescent states.
+ */
+static void rcu_preempt_qs(int cpu)
+{
+}
+
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+/*
+ * Because preemptable RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static void rcu_print_task_stall(struct rcu_node *rnp)
+{
+}
+
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+/*
+ * Because preemptable RCU does not exist, there are never any preempted
+ * RCU readers.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+	return 0;
+}
+
+/*
+ * Because preemptable RCU does not exist, it never has any callbacks
+ * to check.
+ */
+void rcu_preempt_check_callbacks(int cpu)
+{
+}
+
+/*
+ * Because preemptable RCU does not exist, it never has any callbacks
+ * to process.
+ */
+void rcu_preempt_process_callbacks(void)
+{
+}
+
+/*
+ * In classic RCU, call_rcu() is just call_rcu_sched().
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+	call_rcu_sched(head, func);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Because preemptable RCU does not exist, it never has any work to do.
+ */
+static int rcu_preempt_pending(int cpu)
+{
+	return 0;
+}
+
+/*
+ * Because preemptable RCU does not exist, it never needs any CPU.
+ */
+static int rcu_preempt_needs_cpu(int cpu)
+{
+	return 0;
+}
+
+/*
+ * Because preemptable RCU does not exist, there is no per-CPU
+ * data to initialize.
+ */
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 31af3a0fb6d5..0ea1bff69727 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -77,6 +77,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
 
 static int show_rcudata(struct seq_file *m, void *unused)
 {
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_puts(m, "rcu_preempt:\n");
+	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data, m);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	seq_puts(m, "rcu_sched:\n");
 	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data, m);
 	seq_puts(m, "rcu_bh:\n");
@@ -125,6 +129,10 @@ static int show_rcudata_csv(struct seq_file *m, void *unused)
 	seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
 #endif /* #ifdef CONFIG_NO_HZ */
 	seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\"\n");
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_puts(m, "\"rcu_preempt:\"\n");
+	PRINT_RCU_DATA(rcu_preempt_data, print_one_rcu_data_csv, m);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	seq_puts(m, "\"rcu_sched:\"\n");
 	PRINT_RCU_DATA(rcu_sched_data, print_one_rcu_data_csv, m);
 	seq_puts(m, "\"rcu_bh:\"\n");
@@ -172,6 +180,10 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcuhier(struct seq_file *m, void *unused)
 {
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_puts(m, "rcu_preempt:\n");
+	print_one_rcu_state(m, &rcu_preempt_state);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	seq_puts(m, "rcu_sched:\n");
 	print_one_rcu_state(m, &rcu_sched_state);
 	seq_puts(m, "rcu_bh:\n");
@@ -194,6 +206,10 @@ static struct file_operations rcuhier_fops = {
 
 static int show_rcugp(struct seq_file *m, void *unused)
 {
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_printf(m, "rcu_preempt: completed=%ld  gpnum=%ld\n",
+		   rcu_preempt_state.completed, rcu_preempt_state.gpnum);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	seq_printf(m, "rcu_sched: completed=%ld  gpnum=%ld\n",
 		   rcu_sched_state.completed, rcu_sched_state.gpnum);
 	seq_printf(m, "rcu_bh: completed=%ld  gpnum=%ld\n",
@@ -244,6 +260,10 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
 
 static int show_rcu_pending(struct seq_file *m, void *unused)
 {
+#ifdef CONFIG_TREE_PREEMPT_RCU
+	seq_puts(m, "rcu_preempt:\n");
+	print_rcu_pendings(m, &rcu_preempt_state);
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	seq_puts(m, "rcu_sched:\n");
 	print_rcu_pendings(m, &rcu_sched_state);
 	seq_puts(m, "rcu_bh:\n");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2bb785..f87fb0c8f924 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 
 config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on CLASSIC_RCU || TREE_RCU
+	depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU
 	default n
 	help
 	  This option causes RCU to printk information on which
-- 
cgit v1.2.3


From 6b3ef48adf847f7adf11c870e3ffacac150f1564 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 22 Aug 2009 13:56:53 -0700
Subject: rcu: Remove CONFIG_PREEMPT_RCU

Now that CONFIG_TREE_PREEMPT_RCU is in place, there is no
further need for CONFIG_PREEMPT_RCU.  Remove it, along with
whatever subtle bugs it may (or may not) contain.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <125097461396-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/RCU/rcu.txt        |   10 +-
 Documentation/RCU/whatisRCU.txt  |    8 +-
 include/linux/init_task.h        |    6 +-
 include/linux/rcupdate.h         |    4 +-
 include/linux/rcupreempt.h       |  140 ----
 include/linux/rcupreempt_trace.h |   97 ---
 include/linux/sched.h            |   13 -
 init/Kconfig                     |   20 +-
 kernel/Makefile                  |    2 -
 kernel/rcupreempt.c              | 1518 --------------------------------------
 kernel/rcupreempt_trace.c        |  335 ---------
 lib/Kconfig.debug                |    2 +-
 12 files changed, 13 insertions(+), 2142 deletions(-)
 delete mode 100644 include/linux/rcupreempt.h
 delete mode 100644 include/linux/rcupreempt_trace.h
 delete mode 100644 kernel/rcupreempt.c
 delete mode 100644 kernel/rcupreempt_trace.c

(limited to 'include')

diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index 7aa2002ade77..2a23523ce471 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -36,7 +36,7 @@ o	How can the updater tell when a grace period has completed
 	executed in user mode, or executed in the idle loop, we can
 	safely free up that item.
 
-	Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
+	Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
 	same effect, but require that the readers manipulate CPU-local
 	counters.  These counters allow limited types of blocking
 	within RCU read-side critical sections.  SRCU also uses
@@ -79,10 +79,10 @@ o	I hear that RCU is patented?  What is with that?
 o	I hear that RCU needs work in order to support realtime kernels?
 
 	This work is largely completed.  Realtime-friendly RCU can be
-	enabled via the CONFIG_PREEMPT_RCU kernel configuration parameter.
-	However, work is in progress for enabling priority boosting of
-	preempted RCU read-side critical sections.  This is needed if you
-	have CPU-bound realtime threads.
+	enabled via the CONFIG_TREE_PREEMPT_RCU kernel configuration
+	parameter.  However, work is in progress for enabling priority
+	boosting of preempted RCU read-side critical sections.	This is
+	needed if you have CPU-bound realtime threads.
 
 o	Where can I find more information on RCU?
 
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index 97ded2432c59..e41a7fecf0d3 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -136,10 +136,10 @@ rcu_read_lock()
 	Used by a reader to inform the reclaimer that the reader is
 	entering an RCU read-side critical section.  It is illegal
 	to block while in an RCU read-side critical section, though
-	kernels built with CONFIG_PREEMPT_RCU can preempt RCU read-side
-	critical sections.  Any RCU-protected data structure accessed
-	during an RCU read-side critical section is guaranteed to remain
-	unreclaimed for the full duration of that critical section.
+	kernels built with CONFIG_TREE_PREEMPT_RCU can preempt RCU
+	read-side critical sections.  Any RCU-protected data structure
+	accessed during an RCU read-side critical section is guaranteed to
+	remain unreclaimed for the full duration of that critical section.
 	Reference counts may be used in conjunction with RCU to maintain
 	longer-term references to data structures.
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 971a968831bf..79d4baee31b6 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -94,11 +94,7 @@ extern struct group_info init_groups;
 # define CAP_INIT_BSET  CAP_INIT_EFF_SET
 #endif
 
-#ifdef CONFIG_PREEMPT_RCU
-#define INIT_TASK_RCU_PREEMPT(tsk)					\
-	.rcu_read_lock_nesting = 0,					\
-	.rcu_flipctr_idx = 0,
-#elif defined(CONFIG_TREE_PREEMPT_RCU)
+#ifdef CONFIG_TREE_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
 	.rcu_read_lock_nesting = 0,					\
 	.rcu_read_unlock_special = 0,					\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 26892f5e7bd8..ec90fc34fea9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -68,11 +68,9 @@ extern int rcu_scheduler_active;
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
-#elif defined(CONFIG_PREEMPT_RCU)
-#include <linux/rcupreempt.h>
 #else
 #error "Unknown RCU implementation specified to kernel configuration"
-#endif /* #else #if defined(CONFIG_CLASSIC_RCU) */
+#endif
 
 #define RCU_HEAD_INIT 	{ .next = NULL, .func = NULL }
 #define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
deleted file mode 100644
index a42ab88e9210..000000000000
--- a/include/linux/rcupreempt.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion (RT implementation)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author:  Paul McKenney <paulmck@us.ibm.com>
- *
- * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- * Papers:
- * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
- * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
- *
- */
-
-#ifndef __LINUX_RCUPREEMPT_H
-#define __LINUX_RCUPREEMPT_H
-
-#include <linux/cache.h>
-#include <linux/spinlock.h>
-#include <linux/threads.h>
-#include <linux/smp.h>
-#include <linux/cpumask.h>
-#include <linux/seqlock.h>
-
-extern void rcu_sched_qs(int cpu);
-static inline void rcu_bh_qs(int cpu) { }
-
-/*
- * Someone might want to pass call_rcu_bh as a function pointer.
- * So this needs to just be a rename and not a macro function.
- *  (no parentheses)
- */
-#define call_rcu_bh		call_rcu
-
-/**
- * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
- *
- * The update function will be invoked some time after a full
- * synchronize_sched()-style grace period elapses, in other words after
- * all currently executing preempt-disabled sections of code (including
- * hardirq handlers, NMI handlers, and local_irq_save() blocks) have
- * completed.
- */
-extern void call_rcu_sched(struct rcu_head *head,
-			   void (*func)(struct rcu_head *head));
-
-extern void __rcu_read_lock(void);
-extern void __rcu_read_unlock(void);
-extern int rcu_needs_cpu(int cpu);
-
-#define __rcu_read_lock_bh()	{ rcu_read_lock(); local_bh_disable(); }
-#define __rcu_read_unlock_bh()	{ local_bh_enable(); rcu_read_unlock(); }
-
-extern void __synchronize_sched(void);
-
-static inline void synchronize_rcu_expedited(void)
-{
-	synchronize_rcu();  /* Placeholder for new rcupreempt implementation. */
-}
-
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_rcu_bh();  /* Placeholder for new rcupreempt impl. */
-}
-
-extern void __rcu_init(void);
-extern void rcu_init_sched(void);
-extern void rcu_check_callbacks(int cpu, int user);
-extern void rcu_restart_cpu(int cpu);
-extern long rcu_batches_completed(void);
-
-/*
- * Return the number of RCU batches processed thus far. Useful for debug
- * and statistic. The _bh variant is identifcal to straight RCU
- */
-static inline long rcu_batches_completed_bh(void)
-{
-	return rcu_batches_completed();
-}
-
-static inline void exit_rcu(void)
-{
-}
-
-#ifdef CONFIG_RCU_TRACE
-struct rcupreempt_trace;
-extern long *rcupreempt_flipctr(int cpu);
-extern long rcupreempt_data_completed(void);
-extern int rcupreempt_flip_flag(int cpu);
-extern int rcupreempt_mb_flag(int cpu);
-extern char *rcupreempt_try_flip_state_name(void);
-extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
-#endif
-
-struct softirq_action;
-
-#ifdef CONFIG_NO_HZ
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-#else
-# define rcu_enter_nohz()	do { } while (0)
-# define rcu_exit_nohz()	do { } while (0)
-#endif
-
-/*
- * A context switch is a grace period for rcupreempt synchronize_rcu()
- * only during early boot, before the scheduler has been initialized.
- * So, how the heck do we get a context switch?  Well, if the caller
- * invokes synchronize_rcu(), they are willing to accept a context
- * switch, so we simply pretend that one happened.
- *
- * After boot, there might be a blocked or preempted task in an RCU
- * read-side critical section, so we cannot then take the fastpath.
- */
-static inline int rcu_blocking_is_gp(void)
-{
-	return num_online_cpus() == 1 && !rcu_scheduler_active;
-}
-
-#endif /* __LINUX_RCUPREEMPT_H */
diff --git a/include/linux/rcupreempt_trace.h b/include/linux/rcupreempt_trace.h
deleted file mode 100644
index b99ae073192a..000000000000
--- a/include/linux/rcupreempt_trace.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion (RT implementation)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author:  Paul McKenney <paulmck@us.ibm.com>
- *
- * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
- * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
- * Papers:
- * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
- * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
- *
- * For detailed explanation of the Preemptible Read-Copy Update mechanism see -
- * 		 http://lwn.net/Articles/253651/
- */
-
-#ifndef __LINUX_RCUPREEMPT_TRACE_H
-#define __LINUX_RCUPREEMPT_TRACE_H
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-
-#include <asm/atomic.h>
-
-/*
- * PREEMPT_RCU data structures.
- */
-
-struct rcupreempt_trace {
-	long		next_length;
-	long		next_add;
-	long		wait_length;
-	long		wait_add;
-	long		done_length;
-	long		done_add;
-	long		done_remove;
-	atomic_t	done_invoked;
-	long		rcu_check_callbacks;
-	atomic_t	rcu_try_flip_1;
-	atomic_t	rcu_try_flip_e1;
-	long		rcu_try_flip_i1;
-	long		rcu_try_flip_ie1;
-	long		rcu_try_flip_g1;
-	long		rcu_try_flip_a1;
-	long		rcu_try_flip_ae1;
-	long		rcu_try_flip_a2;
-	long		rcu_try_flip_z1;
-	long		rcu_try_flip_ze1;
-	long		rcu_try_flip_z2;
-	long		rcu_try_flip_m1;
-	long		rcu_try_flip_me1;
-	long		rcu_try_flip_m2;
-};
-
-#ifdef CONFIG_RCU_TRACE
-#define RCU_TRACE(fn, arg) 	fn(arg);
-#else
-#define RCU_TRACE(fn, arg)
-#endif
-
-extern void rcupreempt_trace_move2done(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_invoke(struct rcupreempt_trace *trace);
-extern void rcupreempt_trace_next_add(struct rcupreempt_trace *trace);
-
-#endif /* __LINUX_RCUPREEMPT_TRACE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d7f98f637a2a..bfca26d63b13 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1205,11 +1205,6 @@ struct task_struct {
 	unsigned int policy;
 	cpumask_t cpus_allowed;
 
-#ifdef CONFIG_PREEMPT_RCU
-	int rcu_read_lock_nesting;
-	int rcu_flipctr_idx;
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
@@ -1744,14 +1739,6 @@ static inline void rcu_copy_process(struct task_struct *p)
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
-#elif defined(CONFIG_PREEMPT_RCU)
-
-static inline void rcu_copy_process(struct task_struct *p)
-{
-	p->rcu_read_lock_nesting = 0;
-	p->rcu_flipctr_idx = 0;
-}
-
 #else
 
 static inline void rcu_copy_process(struct task_struct *p)
diff --git a/init/Kconfig b/init/Kconfig
index f88da2d1c1fb..8e8b76d8a272 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -324,17 +324,6 @@ config TREE_RCU
 	  thousands of CPUs.  It also scales down nicely to
 	  smaller systems.
 
-config PREEMPT_RCU
-	bool "Preemptible RCU"
-	depends on PREEMPT
-	help
-	  This option reduces the latency of the kernel by making certain
-	  RCU sections preemptible. Normally RCU code is non-preemptible, if
-	  this option is selected then read-only RCU sections become
-	  preemptible. This helps latency, but may expose bugs due to
-	  now-naive assumptions about each RCU read-side critical section
-	  remaining on a given CPU through its execution.
-
 config TREE_PREEMPT_RCU
 	bool "Preemptable tree-based hierarchical RCU"
 	depends on PREEMPT
@@ -348,7 +337,7 @@ endchoice
 
 config RCU_TRACE
 	bool "Enable tracing for RCU"
-	depends on TREE_RCU || PREEMPT_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	help
 	  This option provides tracing in RCU which presents stats
 	  in debugfs for debugging RCU implementation.
@@ -395,13 +384,6 @@ config TREE_RCU_TRACE
 	  TREE_PREEMPT_RCU implementations, permitting Makefile to
 	  trivially select kernel/rcutree_trace.c.
 
-config PREEMPT_RCU_TRACE
-	def_bool RCU_TRACE && PREEMPT_RCU
-	select DEBUG_FS
-	help
-	  This option provides tracing for the PREEMPT_RCU implementation,
-	  permitting Makefile to trivially select kernel/rcupreempt_trace.c.
-
 endmenu # "RCU Subsystem"
 
 config IKCONFIG
diff --git a/kernel/Makefile b/kernel/Makefile
index 1a38b4789dda..b833bd5cc127 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,9 +82,7 @@ obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
 obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
-obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o
 obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
-obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o
 obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
deleted file mode 100644
index 0053ce56e326..000000000000
--- a/kernel/rcupreempt.c
+++ /dev/null
@@ -1,1518 +0,0 @@
-/*
- * Read-Copy Update mechanism for mutual exclusion, realtime implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2006
- *
- * Authors: Paul E. McKenney <paulmck@us.ibm.com>
- *		With thanks to Esben Nielsen, Bill Huey, and Ingo Molnar
- *		for pushing me away from locks and towards counters, and
- *		to Suparna Bhattacharya for pushing me completely away
- *		from atomic instructions on the read side.
- *
- *  - Added handling of Dynamic Ticks
- *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
- *                     - Steven Rostedt <srostedt@redhat.com>
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * Design Document: http://lwn.net/Articles/253651/
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <asm/atomic.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/kthread.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/cpumask.h>
-#include <linux/rcupreempt_trace.h>
-#include <asm/byteorder.h>
-
-/*
- * PREEMPT_RCU data structures.
- */
-
-/*
- * GP_STAGES specifies the number of times the state machine has
- * to go through the all the rcu_try_flip_states (see below)
- * in a single Grace Period.
- *
- * GP in GP_STAGES stands for Grace Period ;)
- */
-#define GP_STAGES    2
-struct rcu_data {
-	spinlock_t	lock;		/* Protect rcu_data fields. */
-	long		completed;	/* Number of last completed batch. */
-	int		waitlistcount;
-	struct rcu_head *nextlist;
-	struct rcu_head **nexttail;
-	struct rcu_head *waitlist[GP_STAGES];
-	struct rcu_head **waittail[GP_STAGES];
-	struct rcu_head *donelist;	/* from waitlist & waitschedlist */
-	struct rcu_head **donetail;
-	long rcu_flipctr[2];
-	struct rcu_head *nextschedlist;
-	struct rcu_head **nextschedtail;
-	struct rcu_head *waitschedlist;
-	struct rcu_head **waitschedtail;
-	int rcu_sched_sleeping;
-#ifdef CONFIG_RCU_TRACE
-	struct rcupreempt_trace trace;
-#endif /* #ifdef CONFIG_RCU_TRACE */
-};
-
-/*
- * States for rcu_try_flip() and friends.
- */
-
-enum rcu_try_flip_states {
-
-	/*
-	 * Stay here if nothing is happening. Flip the counter if somthing
-	 * starts happening. Denoted by "I"
-	 */
-	rcu_try_flip_idle_state,
-
-	/*
-	 * Wait here for all CPUs to notice that the counter has flipped. This
-	 * prevents the old set of counters from ever being incremented once
-	 * we leave this state, which in turn is necessary because we cannot
-	 * test any individual counter for zero -- we can only check the sum.
-	 * Denoted by "A".
-	 */
-	rcu_try_flip_waitack_state,
-
-	/*
-	 * Wait here for the sum of the old per-CPU counters to reach zero.
-	 * Denoted by "Z".
-	 */
-	rcu_try_flip_waitzero_state,
-
-	/*
-	 * Wait here for each of the other CPUs to execute a memory barrier.
-	 * This is necessary to ensure that these other CPUs really have
-	 * completed executing their RCU read-side critical sections, despite
-	 * their CPUs wildly reordering memory. Denoted by "M".
-	 */
-	rcu_try_flip_waitmb_state,
-};
-
-/*
- * States for rcu_ctrlblk.rcu_sched_sleep.
- */
-
-enum rcu_sched_sleep_states {
-	rcu_sched_not_sleeping,	/* Not sleeping, callbacks need GP.  */
-	rcu_sched_sleep_prep,	/* Thinking of sleeping, rechecking. */
-	rcu_sched_sleeping,	/* Sleeping, awaken if GP needed. */
-};
-
-struct rcu_ctrlblk {
-	spinlock_t	fliplock;	/* Protect state-machine transitions. */
-	long		completed;	/* Number of last completed batch. */
-	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
-							the rcu state machine */
-	spinlock_t	schedlock;	/* Protect rcu_sched sleep state. */
-	enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
-	wait_queue_head_t sched_wq;	/* Place for rcu_sched to sleep. */
-};
-
-struct rcu_dyntick_sched {
-	int dynticks;
-	int dynticks_snap;
-	int sched_qs;
-	int sched_qs_snap;
-	int sched_dynticks_snap;
-};
-
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
-	.dynticks = 1,
-};
-
-static int rcu_pending(int cpu);
-
-void rcu_sched_qs(int cpu)
-{
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	rdssp->sched_qs++;
-}
-
-#ifdef CONFIG_NO_HZ
-
-void rcu_enter_nohz(void)
-{
-	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
-
-	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
-	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
-}
-
-void rcu_exit_nohz(void)
-{
-	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
-
-	__get_cpu_var(rcu_dyntick_sched).dynticks++;
-	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
-	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
-				&rs);
-}
-
-#endif /* CONFIG_NO_HZ */
-
-
-static DEFINE_PER_CPU(struct rcu_data, rcu_data);
-
-static struct rcu_ctrlblk rcu_ctrlblk = {
-	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
-	.completed = 0,
-	.rcu_try_flip_state = rcu_try_flip_idle_state,
-	.schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
-	.sched_sleep = rcu_sched_not_sleeping,
-	.sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
-};
-
-static struct task_struct *rcu_sched_grace_period_task;
-
-#ifdef CONFIG_RCU_TRACE
-static char *rcu_try_flip_state_names[] =
-	{ "idle", "waitack", "waitzero", "waitmb" };
-#endif /* #ifdef CONFIG_RCU_TRACE */
-
-static DECLARE_BITMAP(rcu_cpu_online_map, NR_CPUS) __read_mostly
-	= CPU_BITS_NONE;
-
-/*
- * Enum and per-CPU flag to determine when each CPU has seen
- * the most recent counter flip.
- */
-
-enum rcu_flip_flag_values {
-	rcu_flip_seen,		/* Steady/initial state, last flip seen. */
-				/* Only GP detector can update. */
-	rcu_flipped		/* Flip just completed, need confirmation. */
-				/* Only corresponding CPU can update. */
-};
-static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_flip_flag_values, rcu_flip_flag)
-								= rcu_flip_seen;
-
-/*
- * Enum and per-CPU flag to determine when each CPU has executed the
- * needed memory barrier to fence in memory references from its last RCU
- * read-side critical section in the just-completed grace period.
- */
-
-enum rcu_mb_flag_values {
-	rcu_mb_done,		/* Steady/initial state, no mb()s required. */
-				/* Only GP detector can update. */
-	rcu_mb_needed		/* Flip just completed, need an mb(). */
-				/* Only corresponding CPU can update. */
-};
-static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
-								= rcu_mb_done;
-
-/*
- * RCU_DATA_ME: find the current CPU's rcu_data structure.
- * RCU_DATA_CPU: find the specified CPU's rcu_data structure.
- */
-#define RCU_DATA_ME()		(&__get_cpu_var(rcu_data))
-#define RCU_DATA_CPU(cpu)	(&per_cpu(rcu_data, cpu))
-
-/*
- * Helper macro for tracing when the appropriate rcu_data is not
- * cached in a local variable, but where the CPU number is so cached.
- */
-#define RCU_TRACE_CPU(f, cpu) RCU_TRACE(f, &(RCU_DATA_CPU(cpu)->trace));
-
-/*
- * Helper macro for tracing when the appropriate rcu_data is not
- * cached in a local variable.
- */
-#define RCU_TRACE_ME(f) RCU_TRACE(f, &(RCU_DATA_ME()->trace));
-
-/*
- * Helper macro for tracing when the appropriate rcu_data is pointed
- * to by a local variable.
- */
-#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
-
-#define RCU_SCHED_BATCH_TIME (HZ / 50)
-
-/*
- * Return the number of RCU batches processed thus far.  Useful
- * for debug and statistics.
- */
-long rcu_batches_completed(void)
-{
-	return rcu_ctrlblk.completed;
-}
-EXPORT_SYMBOL_GPL(rcu_batches_completed);
-
-void __rcu_read_lock(void)
-{
-	int idx;
-	struct task_struct *t = current;
-	int nesting;
-
-	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
-	if (nesting != 0) {
-
-		/* An earlier rcu_read_lock() covers us, just count it. */
-
-		t->rcu_read_lock_nesting = nesting + 1;
-
-	} else {
-		unsigned long flags;
-
-		/*
-		 * We disable interrupts for the following reasons:
-		 * - If we get scheduling clock interrupt here, and we
-		 *   end up acking the counter flip, it's like a promise
-		 *   that we will never increment the old counter again.
-		 *   Thus we will break that promise if that
-		 *   scheduling clock interrupt happens between the time
-		 *   we pick the .completed field and the time that we
-		 *   increment our counter.
-		 *
-		 * - We don't want to be preempted out here.
-		 *
-		 * NMIs can still occur, of course, and might themselves
-		 * contain rcu_read_lock().
-		 */
-
-		local_irq_save(flags);
-
-		/*
-		 * Outermost nesting of rcu_read_lock(), so increment
-		 * the current counter for the current CPU.  Use volatile
-		 * casts to prevent the compiler from reordering.
-		 */
-
-		idx = ACCESS_ONCE(rcu_ctrlblk.completed) & 0x1;
-		ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])++;
-
-		/*
-		 * Now that the per-CPU counter has been incremented, we
-		 * are protected from races with rcu_read_lock() invoked
-		 * from NMI handlers on this CPU.  We can therefore safely
-		 * increment the nesting counter, relieving further NMIs
-		 * of the need to increment the per-CPU counter.
-		 */
-
-		ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting + 1;
-
-		/*
-		 * Now that we have preventing any NMIs from storing
-		 * to the ->rcu_flipctr_idx, we can safely use it to
-		 * remember which counter to decrement in the matching
-		 * rcu_read_unlock().
-		 */
-
-		ACCESS_ONCE(t->rcu_flipctr_idx) = idx;
-		local_irq_restore(flags);
-	}
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-
-void __rcu_read_unlock(void)
-{
-	int idx;
-	struct task_struct *t = current;
-	int nesting;
-
-	nesting = ACCESS_ONCE(t->rcu_read_lock_nesting);
-	if (nesting > 1) {
-
-		/*
-		 * We are still protected by the enclosing rcu_read_lock(),
-		 * so simply decrement the counter.
-		 */
-
-		t->rcu_read_lock_nesting = nesting - 1;
-
-	} else {
-		unsigned long flags;
-
-		/*
-		 * Disable local interrupts to prevent the grace-period
-		 * detection state machine from seeing us half-done.
-		 * NMIs can still occur, of course, and might themselves
-		 * contain rcu_read_lock() and rcu_read_unlock().
-		 */
-
-		local_irq_save(flags);
-
-		/*
-		 * Outermost nesting of rcu_read_unlock(), so we must
-		 * decrement the current counter for the current CPU.
-		 * This must be done carefully, because NMIs can
-		 * occur at any point in this code, and any rcu_read_lock()
-		 * and rcu_read_unlock() pairs in the NMI handlers
-		 * must interact non-destructively with this code.
-		 * Lots of volatile casts, and -very- careful ordering.
-		 *
-		 * Changes to this code, including this one, must be
-		 * inspected, validated, and tested extremely carefully!!!
-		 */
-
-		/*
-		 * First, pick up the index.
-		 */
-
-		idx = ACCESS_ONCE(t->rcu_flipctr_idx);
-
-		/*
-		 * Now that we have fetched the counter index, it is
-		 * safe to decrement the per-task RCU nesting counter.
-		 * After this, any interrupts or NMIs will increment and
-		 * decrement the per-CPU counters.
-		 */
-		ACCESS_ONCE(t->rcu_read_lock_nesting) = nesting - 1;
-
-		/*
-		 * It is now safe to decrement this task's nesting count.
-		 * NMIs that occur after this statement will route their
-		 * rcu_read_lock() calls through this "else" clause, and
-		 * will thus start incrementing the per-CPU counter on
-		 * their own.  They will also clobber ->rcu_flipctr_idx,
-		 * but that is OK, since we have already fetched it.
-		 */
-
-		ACCESS_ONCE(RCU_DATA_ME()->rcu_flipctr[idx])--;
-		local_irq_restore(flags);
-	}
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-
-/*
- * If a global counter flip has occurred since the last time that we
- * advanced callbacks, advance them.  Hardware interrupts must be
- * disabled when calling this function.
- */
-static void __rcu_advance_callbacks(struct rcu_data *rdp)
-{
-	int cpu;
-	int i;
-	int wlc = 0;
-
-	if (rdp->completed != rcu_ctrlblk.completed) {
-		if (rdp->waitlist[GP_STAGES - 1] != NULL) {
-			*rdp->donetail = rdp->waitlist[GP_STAGES - 1];
-			rdp->donetail = rdp->waittail[GP_STAGES - 1];
-			RCU_TRACE_RDP(rcupreempt_trace_move2done, rdp);
-		}
-		for (i = GP_STAGES - 2; i >= 0; i--) {
-			if (rdp->waitlist[i] != NULL) {
-				rdp->waitlist[i + 1] = rdp->waitlist[i];
-				rdp->waittail[i + 1] = rdp->waittail[i];
-				wlc++;
-			} else {
-				rdp->waitlist[i + 1] = NULL;
-				rdp->waittail[i + 1] =
-					&rdp->waitlist[i + 1];
-			}
-		}
-		if (rdp->nextlist != NULL) {
-			rdp->waitlist[0] = rdp->nextlist;
-			rdp->waittail[0] = rdp->nexttail;
-			wlc++;
-			rdp->nextlist = NULL;
-			rdp->nexttail = &rdp->nextlist;
-			RCU_TRACE_RDP(rcupreempt_trace_move2wait, rdp);
-		} else {
-			rdp->waitlist[0] = NULL;
-			rdp->waittail[0] = &rdp->waitlist[0];
-		}
-		rdp->waitlistcount = wlc;
-		rdp->completed = rcu_ctrlblk.completed;
-	}
-
-	/*
-	 * Check to see if this CPU needs to report that it has seen
-	 * the most recent counter flip, thereby declaring that all
-	 * subsequent rcu_read_lock() invocations will respect this flip.
-	 */
-
-	cpu = raw_smp_processor_id();
-	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
-		smp_mb();  /* Subsequent counter accesses must see new value */
-		per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
-		smp_mb();  /* Subsequent RCU read-side critical sections */
-			   /*  seen -after- acknowledgement. */
-	}
-}
-
-#ifdef CONFIG_NO_HZ
-static DEFINE_PER_CPU(int, rcu_update_flag);
-
-/**
- * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
- *
- * If the CPU was idle with dynamic ticks active, this updates the
- * rcu_dyntick_sched.dynticks to let the RCU handling know that the
- * CPU is active.
- */
-void rcu_irq_enter(void)
-{
-	int cpu = smp_processor_id();
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	if (per_cpu(rcu_update_flag, cpu))
-		per_cpu(rcu_update_flag, cpu)++;
-
-	/*
-	 * Only update if we are coming from a stopped ticks mode
-	 * (rcu_dyntick_sched.dynticks is even).
-	 */
-	if (!in_interrupt() &&
-	    (rdssp->dynticks & 0x1) == 0) {
-		/*
-		 * The following might seem like we could have a race
-		 * with NMI/SMIs. But this really isn't a problem.
-		 * Here we do a read/modify/write, and the race happens
-		 * when an NMI/SMI comes in after the read and before
-		 * the write. But NMI/SMIs will increment this counter
-		 * twice before returning, so the zero bit will not
-		 * be corrupted by the NMI/SMI which is the most important
-		 * part.
-		 *
-		 * The only thing is that we would bring back the counter
-		 * to a postion that it was in during the NMI/SMI.
-		 * But the zero bit would be set, so the rest of the
-		 * counter would again be ignored.
-		 *
-		 * On return from the IRQ, the counter may have the zero
-		 * bit be 0 and the counter the same as the return from
-		 * the NMI/SMI. If the state machine was so unlucky to
-		 * see that, it still doesn't matter, since all
-		 * RCU read-side critical sections on this CPU would
-		 * have already completed.
-		 */
-		rdssp->dynticks++;
-		/*
-		 * The following memory barrier ensures that any
-		 * rcu_read_lock() primitives in the irq handler
-		 * are seen by other CPUs to follow the above
-		 * increment to rcu_dyntick_sched.dynticks. This is
-		 * required in order for other CPUs to correctly
-		 * determine when it is safe to advance the RCU
-		 * grace-period state machine.
-		 */
-		smp_mb(); /* see above block comment. */
-		/*
-		 * Since we can't determine the dynamic tick mode from
-		 * the rcu_dyntick_sched.dynticks after this routine,
-		 * we use a second flag to acknowledge that we came
-		 * from an idle state with ticks stopped.
-		 */
-		per_cpu(rcu_update_flag, cpu)++;
-		/*
-		 * If we take an NMI/SMI now, they will also increment
-		 * the rcu_update_flag, and will not update the
-		 * rcu_dyntick_sched.dynticks on exit. That is for
-		 * this IRQ to do.
-		 */
-	}
-}
-
-/**
- * rcu_irq_exit - Called from exiting Hard irq context.
- *
- * If the CPU was idle with dynamic ticks active, update the
- * rcu_dyntick_sched.dynticks to let the RCU handling be
- * aware that the CPU is going back to idle with no ticks.
- */
-void rcu_irq_exit(void)
-{
-	int cpu = smp_processor_id();
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	/*
-	 * rcu_update_flag is set if we interrupted the CPU
-	 * when it was idle with ticks stopped.
-	 * Once this occurs, we keep track of interrupt nesting
-	 * because a NMI/SMI could also come in, and we still
-	 * only want the IRQ that started the increment of the
-	 * rcu_dyntick_sched.dynticks to be the one that modifies
-	 * it on exit.
-	 */
-	if (per_cpu(rcu_update_flag, cpu)) {
-		if (--per_cpu(rcu_update_flag, cpu))
-			return;
-
-		/* This must match the interrupt nesting */
-		WARN_ON(in_interrupt());
-
-		/*
-		 * If an NMI/SMI happens now we are still
-		 * protected by the rcu_dyntick_sched.dynticks being odd.
-		 */
-
-		/*
-		 * The following memory barrier ensures that any
-		 * rcu_read_unlock() primitives in the irq handler
-		 * are seen by other CPUs to preceed the following
-		 * increment to rcu_dyntick_sched.dynticks. This
-		 * is required in order for other CPUs to determine
-		 * when it is safe to advance the RCU grace-period
-		 * state machine.
-		 */
-		smp_mb(); /* see above block comment. */
-		rdssp->dynticks++;
-		WARN_ON(rdssp->dynticks & 0x1);
-	}
-}
-
-void rcu_nmi_enter(void)
-{
-	rcu_irq_enter();
-}
-
-void rcu_nmi_exit(void)
-{
-	rcu_irq_exit();
-}
-
-static void dyntick_save_progress_counter(int cpu)
-{
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	rdssp->dynticks_snap = rdssp->dynticks;
-}
-
-static inline int
-rcu_try_flip_waitack_needed(int cpu)
-{
-	long curr;
-	long snap;
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	curr = rdssp->dynticks;
-	snap = rdssp->dynticks_snap;
-	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
-
-	/*
-	 * If the CPU remained in dynticks mode for the entire time
-	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
-	 * then it cannot be in the middle of an rcu_read_lock(), so
-	 * the next rcu_read_lock() it executes must use the new value
-	 * of the counter.  So we can safely pretend that this CPU
-	 * already acknowledged the counter.
-	 */
-
-	if ((curr == snap) && ((curr & 0x1) == 0))
-		return 0;
-
-	/*
-	 * If the CPU passed through or entered a dynticks idle phase with
-	 * no active irq handlers, then, as above, we can safely pretend
-	 * that this CPU already acknowledged the counter.
-	 */
-
-	if ((curr - snap) > 2 || (curr & 0x1) == 0)
-		return 0;
-
-	/* We need this CPU to explicitly acknowledge the counter flip. */
-
-	return 1;
-}
-
-static inline int
-rcu_try_flip_waitmb_needed(int cpu)
-{
-	long curr;
-	long snap;
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	curr = rdssp->dynticks;
-	snap = rdssp->dynticks_snap;
-	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
-
-	/*
-	 * If the CPU remained in dynticks mode for the entire time
-	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
-	 * then it cannot have executed an RCU read-side critical section
-	 * during that time, so there is no need for it to execute a
-	 * memory barrier.
-	 */
-
-	if ((curr == snap) && ((curr & 0x1) == 0))
-		return 0;
-
-	/*
-	 * If the CPU either entered or exited an outermost interrupt,
-	 * SMI, NMI, or whatever handler, then we know that it executed
-	 * a memory barrier when doing so.  So we don't need another one.
-	 */
-	if (curr != snap)
-		return 0;
-
-	/* We need the CPU to execute a memory barrier. */
-
-	return 1;
-}
-
-static void dyntick_save_progress_counter_sched(int cpu)
-{
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	rdssp->sched_dynticks_snap = rdssp->dynticks;
-}
-
-static int rcu_qsctr_inc_needed_dyntick(int cpu)
-{
-	long curr;
-	long snap;
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	curr = rdssp->dynticks;
-	snap = rdssp->sched_dynticks_snap;
-	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
-
-	/*
-	 * If the CPU remained in dynticks mode for the entire time
-	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
-	 * then it cannot be in the middle of an rcu_read_lock(), so
-	 * the next rcu_read_lock() it executes must use the new value
-	 * of the counter.  Therefore, this CPU has been in a quiescent
-	 * state the entire time, and we don't need to wait for it.
-	 */
-
-	if ((curr == snap) && ((curr & 0x1) == 0))
-		return 0;
-
-	/*
-	 * If the CPU passed through or entered a dynticks idle phase with
-	 * no active irq handlers, then, as above, this CPU has already
-	 * passed through a quiescent state.
-	 */
-
-	if ((curr - snap) > 2 || (snap & 0x1) == 0)
-		return 0;
-
-	/* We need this CPU to go through a quiescent state. */
-
-	return 1;
-}
-
-#else /* !CONFIG_NO_HZ */
-
-# define dyntick_save_progress_counter(cpu)		do { } while (0)
-# define rcu_try_flip_waitack_needed(cpu)		(1)
-# define rcu_try_flip_waitmb_needed(cpu)		(1)
-
-# define dyntick_save_progress_counter_sched(cpu)	do { } while (0)
-# define rcu_qsctr_inc_needed_dyntick(cpu)		(1)
-
-#endif /* CONFIG_NO_HZ */
-
-static void save_qsctr_sched(int cpu)
-{
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	rdssp->sched_qs_snap = rdssp->sched_qs;
-}
-
-static inline int rcu_qsctr_inc_needed(int cpu)
-{
-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
-
-	/*
-	 * If there has been a quiescent state, no more need to wait
-	 * on this CPU.
-	 */
-
-	if (rdssp->sched_qs != rdssp->sched_qs_snap) {
-		smp_mb(); /* force ordering with cpu entering schedule(). */
-		return 0;
-	}
-
-	/* We need this CPU to go through a quiescent state. */
-
-	return 1;
-}
-
-/*
- * Get here when RCU is idle.  Decide whether we need to
- * move out of idle state, and return non-zero if so.
- * "Straightforward" approach for the moment, might later
- * use callback-list lengths, grace-period duration, or
- * some such to determine when to exit idle state.
- * Might also need a pre-idle test that does not acquire
- * the lock, but let's get the simple case working first...
- */
-
-static int
-rcu_try_flip_idle(void)
-{
-	int cpu;
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_i1);
-	if (!rcu_pending(smp_processor_id())) {
-		RCU_TRACE_ME(rcupreempt_trace_try_flip_ie1);
-		return 0;
-	}
-
-	/*
-	 * Do the flip.
-	 */
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_g1);
-	rcu_ctrlblk.completed++;  /* stands in for rcu_try_flip_g2 */
-
-	/*
-	 * Need a memory barrier so that other CPUs see the new
-	 * counter value before they see the subsequent change of all
-	 * the rcu_flip_flag instances to rcu_flipped.
-	 */
-
-	smp_mb();	/* see above block comment. */
-
-	/* Now ask each CPU for acknowledgement of the flip. */
-
-	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
-		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
-		dyntick_save_progress_counter(cpu);
-	}
-
-	return 1;
-}
-
-/*
- * Wait for CPUs to acknowledge the flip.
- */
-
-static int
-rcu_try_flip_waitack(void)
-{
-	int cpu;
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
-		if (rcu_try_flip_waitack_needed(cpu) &&
-		    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
-			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
-			return 0;
-		}
-
-	/*
-	 * Make sure our checks above don't bleed into subsequent
-	 * waiting for the sum of the counters to reach zero.
-	 */
-
-	smp_mb();	/* see above block comment. */
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_a2);
-	return 1;
-}
-
-/*
- * Wait for collective ``last'' counter to reach zero,
- * then tell all CPUs to do an end-of-grace-period memory barrier.
- */
-
-static int
-rcu_try_flip_waitzero(void)
-{
-	int cpu;
-	int lastidx = !(rcu_ctrlblk.completed & 0x1);
-	int sum = 0;
-
-	/* Check to see if the sum of the "last" counters is zero. */
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-	for_each_possible_cpu(cpu)
-		sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
-	if (sum != 0) {
-		RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
-		return 0;
-	}
-
-	/*
-	 * This ensures that the other CPUs see the call for
-	 * memory barriers -after- the sum to zero has been
-	 * detected here
-	 */
-	smp_mb();  /*  ^^^^^^^^^^^^ */
-
-	/* Call for a memory barrier from each CPU. */
-	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map)) {
-		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
-		dyntick_save_progress_counter(cpu);
-	}
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
-	return 1;
-}
-
-/*
- * Wait for all CPUs to do their end-of-grace-period memory barrier.
- * Return 0 once all CPUs have done so.
- */
-
-static int
-rcu_try_flip_waitmb(void)
-{
-	int cpu;
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-	for_each_cpu(cpu, to_cpumask(rcu_cpu_online_map))
-		if (rcu_try_flip_waitmb_needed(cpu) &&
-		    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
-			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
-			return 0;
-		}
-
-	smp_mb(); /* Ensure that the above checks precede any following flip. */
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_m2);
-	return 1;
-}
-
-/*
- * Attempt a single flip of the counters.  Remember, a single flip does
- * -not- constitute a grace period.  Instead, the interval between
- * at least GP_STAGES consecutive flips is a grace period.
- *
- * If anyone is nuts enough to run this CONFIG_PREEMPT_RCU implementation
- * on a large SMP, they might want to use a hierarchical organization of
- * the per-CPU-counter pairs.
- */
-static void rcu_try_flip(void)
-{
-	unsigned long flags;
-
-	RCU_TRACE_ME(rcupreempt_trace_try_flip_1);
-	if (unlikely(!spin_trylock_irqsave(&rcu_ctrlblk.fliplock, flags))) {
-		RCU_TRACE_ME(rcupreempt_trace_try_flip_e1);
-		return;
-	}
-
-	/*
-	 * Take the next transition(s) through the RCU grace-period
-	 * flip-counter state machine.
-	 */
-
-	switch (rcu_ctrlblk.rcu_try_flip_state) {
-	case rcu_try_flip_idle_state:
-		if (rcu_try_flip_idle())
-			rcu_ctrlblk.rcu_try_flip_state =
-				rcu_try_flip_waitack_state;
-		break;
-	case rcu_try_flip_waitack_state:
-		if (rcu_try_flip_waitack())
-			rcu_ctrlblk.rcu_try_flip_state =
-				rcu_try_flip_waitzero_state;
-		break;
-	case rcu_try_flip_waitzero_state:
-		if (rcu_try_flip_waitzero())
-			rcu_ctrlblk.rcu_try_flip_state =
-				rcu_try_flip_waitmb_state;
-		break;
-	case rcu_try_flip_waitmb_state:
-		if (rcu_try_flip_waitmb())
-			rcu_ctrlblk.rcu_try_flip_state =
-				rcu_try_flip_idle_state;
-	}
-	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
-}
-
-/*
- * Check to see if this CPU needs to do a memory barrier in order to
- * ensure that any prior RCU read-side critical sections have committed
- * their counter manipulations and critical-section memory references
- * before declaring the grace period to be completed.
- */
-static void rcu_check_mb(int cpu)
-{
-	if (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed) {
-		smp_mb();  /* Ensure RCU read-side accesses are visible. */
-		per_cpu(rcu_mb_flag, cpu) = rcu_mb_done;
-	}
-}
-
-void rcu_check_callbacks(int cpu, int user)
-{
-	unsigned long flags;
-	struct rcu_data *rdp;
-
-	if (!rcu_pending(cpu))
-		return; /* if nothing for RCU to do. */
-
-	/*
-	 * If this CPU took its interrupt from user mode or from the
-	 * idle loop, and this is not a nested interrupt, then
-	 * this CPU has to have exited all prior preept-disable
-	 * sections of code.  So invoke rcu_sched_qs() to note this.
-	 *
-	 * The memory barrier is needed to handle the case where
-	 * writes from a preempt-disable section of code get reordered
-	 * into schedule() by this CPU's write buffer.  So the memory
-	 * barrier makes sure that the rcu_sched_qs() is seen by other
-	 * CPUs to happen after any such write.
-	 */
-
-	rdp = RCU_DATA_CPU(cpu);
-	if (user ||
-	    (idle_cpu(cpu) && !in_softirq() &&
-	     hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
-		smp_mb();	/* Guard against aggressive schedule(). */
-		rcu_sched_qs(cpu);
-	}
-
-	rcu_check_mb(cpu);
-	if (rcu_ctrlblk.completed == rdp->completed)
-		rcu_try_flip();
-	spin_lock_irqsave(&rdp->lock, flags);
-	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
-	__rcu_advance_callbacks(rdp);
-	if (rdp->donelist == NULL) {
-		spin_unlock_irqrestore(&rdp->lock, flags);
-	} else {
-		spin_unlock_irqrestore(&rdp->lock, flags);
-		raise_softirq(RCU_SOFTIRQ);
-	}
-}
-
-/*
- * Needed by dynticks, to make sure all RCU processing has finished
- * when we go idle:
- */
-void rcu_advance_callbacks(int cpu, int user)
-{
-	unsigned long flags;
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-
-	if (rcu_ctrlblk.completed == rdp->completed) {
-		rcu_try_flip();
-		if (rcu_ctrlblk.completed == rdp->completed)
-			return;
-	}
-	spin_lock_irqsave(&rdp->lock, flags);
-	RCU_TRACE_RDP(rcupreempt_trace_check_callbacks, rdp);
-	__rcu_advance_callbacks(rdp);
-	spin_unlock_irqrestore(&rdp->lock, flags);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
-		*dsttail = srclist; \
-		if (srclist != NULL) { \
-			dsttail = srctail; \
-			srclist = NULL; \
-			srctail = &srclist;\
-		} \
-	} while (0)
-
-void rcu_offline_cpu(int cpu)
-{
-	int i;
-	struct rcu_head *list = NULL;
-	unsigned long flags;
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-	struct rcu_head *schedlist = NULL;
-	struct rcu_head **schedtail = &schedlist;
-	struct rcu_head **tail = &list;
-
-	/*
-	 * Remove all callbacks from the newly dead CPU, retaining order.
-	 * Otherwise rcu_barrier() will fail
-	 */
-
-	spin_lock_irqsave(&rdp->lock, flags);
-	rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
-	for (i = GP_STAGES - 1; i >= 0; i--)
-		rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
-						list, tail);
-	rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
-	rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
-				schedlist, schedtail);
-	rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
-				schedlist, schedtail);
-	rdp->rcu_sched_sleeping = 0;
-	spin_unlock_irqrestore(&rdp->lock, flags);
-	rdp->waitlistcount = 0;
-
-	/* Disengage the newly dead CPU from the grace-period computation. */
-
-	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
-	rcu_check_mb(cpu);
-	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
-		smp_mb();  /* Subsequent counter accesses must see new value */
-		per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
-		smp_mb();  /* Subsequent RCU read-side critical sections */
-			   /*  seen -after- acknowledgement. */
-	}
-
-	cpumask_clear_cpu(cpu, to_cpumask(rcu_cpu_online_map));
-
-	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
-
-	/*
-	 * Place the removed callbacks on the current CPU's queue.
-	 * Make them all start a new grace period: simple approach,
-	 * in theory could starve a given set of callbacks, but
-	 * you would need to be doing some serious CPU hotplugging
-	 * to make this happen.  If this becomes a problem, adding
-	 * a synchronize_rcu() to the hotplug path would be a simple
-	 * fix.
-	 */
-
-	local_irq_save(flags);  /* disable preempt till we know what lock. */
-	rdp = RCU_DATA_ME();
-	spin_lock(&rdp->lock);
-	*rdp->nexttail = list;
-	if (list)
-		rdp->nexttail = tail;
-	*rdp->nextschedtail = schedlist;
-	if (schedlist)
-		rdp->nextschedtail = schedtail;
-	spin_unlock_irqrestore(&rdp->lock, flags);
-}
-
-#else /* #ifdef CONFIG_HOTPLUG_CPU */
-
-void rcu_offline_cpu(int cpu)
-{
-}
-
-#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
-
-void __cpuinit rcu_online_cpu(int cpu)
-{
-	unsigned long flags;
-	struct rcu_data *rdp;
-
-	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
-	cpumask_set_cpu(cpu, to_cpumask(rcu_cpu_online_map));
-	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
-
-	/*
-	 * The rcu_sched grace-period processing might have bypassed
-	 * this CPU, given that it was not in the rcu_cpu_online_map
-	 * when the grace-period scan started.  This means that the
-	 * grace-period task might sleep.  So make sure that if this
-	 * should happen, the first callback posted to this CPU will
-	 * wake up the grace-period task if need be.
-	 */
-
-	rdp = RCU_DATA_CPU(cpu);
-	spin_lock_irqsave(&rdp->lock, flags);
-	rdp->rcu_sched_sleeping = 1;
-	spin_unlock_irqrestore(&rdp->lock, flags);
-}
-
-static void rcu_process_callbacks(struct softirq_action *unused)
-{
-	unsigned long flags;
-	struct rcu_head *next, *list;
-	struct rcu_data *rdp;
-
-	local_irq_save(flags);
-	rdp = RCU_DATA_ME();
-	spin_lock(&rdp->lock);
-	list = rdp->donelist;
-	if (list == NULL) {
-		spin_unlock_irqrestore(&rdp->lock, flags);
-		return;
-	}
-	rdp->donelist = NULL;
-	rdp->donetail = &rdp->donelist;
-	RCU_TRACE_RDP(rcupreempt_trace_done_remove, rdp);
-	spin_unlock_irqrestore(&rdp->lock, flags);
-	while (list) {
-		next = list->next;
-		list->func(list);
-		list = next;
-		RCU_TRACE_ME(rcupreempt_trace_invoke);
-	}
-}
-
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-	unsigned long flags;
-	struct rcu_data *rdp;
-
-	head->func = func;
-	head->next = NULL;
-	local_irq_save(flags);
-	rdp = RCU_DATA_ME();
-	spin_lock(&rdp->lock);
-	__rcu_advance_callbacks(rdp);
-	*rdp->nexttail = head;
-	rdp->nexttail = &head->next;
-	RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
-	spin_unlock_irqrestore(&rdp->lock, flags);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
-void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
-	unsigned long flags;
-	struct rcu_data *rdp;
-	int wake_gp = 0;
-
-	head->func = func;
-	head->next = NULL;
-	local_irq_save(flags);
-	rdp = RCU_DATA_ME();
-	spin_lock(&rdp->lock);
-	*rdp->nextschedtail = head;
-	rdp->nextschedtail = &head->next;
-	if (rdp->rcu_sched_sleeping) {
-
-		/* Grace-period processing might be sleeping... */
-
-		rdp->rcu_sched_sleeping = 0;
-		wake_gp = 1;
-	}
-	spin_unlock_irqrestore(&rdp->lock, flags);
-	if (wake_gp) {
-
-		/* Wake up grace-period processing, unless someone beat us. */
-
-		spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
-		if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
-			wake_gp = 0;
-		rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
-		spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		if (wake_gp)
-			wake_up_interruptible(&rcu_ctrlblk.sched_wq);
-	}
-}
-EXPORT_SYMBOL_GPL(call_rcu_sched);
-
-/*
- * Wait until all currently running preempt_disable() code segments
- * (including hardware-irq-disable segments) complete.  Note that
- * in -rt this does -not- necessarily result in all currently executing
- * interrupt -handlers- having completed.
- */
-void __synchronize_sched(void)
-{
-	struct rcu_synchronize rcu;
-
-	if (num_online_cpus() == 1)
-		return;  /* blocking is gp if only one CPU! */
-
-	init_completion(&rcu.completion);
-	/* Will wake me after RCU finished. */
-	call_rcu_sched(&rcu.head, wakeme_after_rcu);
-	/* Wait for it. */
-	wait_for_completion(&rcu.completion);
-}
-EXPORT_SYMBOL_GPL(__synchronize_sched);
-
-/*
- * kthread function that manages call_rcu_sched grace periods.
- */
-static int rcu_sched_grace_period(void *arg)
-{
-	int couldsleep;		/* might sleep after current pass. */
-	int couldsleepnext = 0; /* might sleep after next pass. */
-	int cpu;
-	unsigned long flags;
-	struct rcu_data *rdp;
-	int ret;
-
-	/*
-	 * Each pass through the following loop handles one
-	 * rcu_sched grace period cycle.
-	 */
-	do {
-		/* Save each CPU's current state. */
-
-		for_each_online_cpu(cpu) {
-			dyntick_save_progress_counter_sched(cpu);
-			save_qsctr_sched(cpu);
-		}
-
-		/*
-		 * Sleep for about an RCU grace-period's worth to
-		 * allow better batching and to consume less CPU.
-		 */
-		schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
-
-		/*
-		 * If there was nothing to do last time, prepare to
-		 * sleep at the end of the current grace period cycle.
-		 */
-		couldsleep = couldsleepnext;
-		couldsleepnext = 1;
-		if (couldsleep) {
-			spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
-			rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
-			spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		}
-
-		/*
-		 * Wait on each CPU in turn to have either visited
-		 * a quiescent state or been in dynticks-idle mode.
-		 */
-		for_each_online_cpu(cpu) {
-			while (rcu_qsctr_inc_needed(cpu) &&
-			       rcu_qsctr_inc_needed_dyntick(cpu)) {
-				/* resched_cpu(cpu); @@@ */
-				schedule_timeout_interruptible(1);
-			}
-		}
-
-		/* Advance callbacks for each CPU.  */
-
-		for_each_online_cpu(cpu) {
-
-			rdp = RCU_DATA_CPU(cpu);
-			spin_lock_irqsave(&rdp->lock, flags);
-
-			/*
-			 * We are running on this CPU irq-disabled, so no
-			 * CPU can go offline until we re-enable irqs.
-			 * The current CPU might have already gone
-			 * offline (between the for_each_offline_cpu and
-			 * the spin_lock_irqsave), but in that case all its
-			 * callback lists will be empty, so no harm done.
-			 *
-			 * Advance the callbacks!  We share normal RCU's
-			 * donelist, since callbacks are invoked the
-			 * same way in either case.
-			 */
-			if (rdp->waitschedlist != NULL) {
-				*rdp->donetail = rdp->waitschedlist;
-				rdp->donetail = rdp->waitschedtail;
-
-				/*
-				 * Next rcu_check_callbacks() will
-				 * do the required raise_softirq().
-				 */
-			}
-			if (rdp->nextschedlist != NULL) {
-				rdp->waitschedlist = rdp->nextschedlist;
-				rdp->waitschedtail = rdp->nextschedtail;
-				couldsleep = 0;
-				couldsleepnext = 0;
-			} else {
-				rdp->waitschedlist = NULL;
-				rdp->waitschedtail = &rdp->waitschedlist;
-			}
-			rdp->nextschedlist = NULL;
-			rdp->nextschedtail = &rdp->nextschedlist;
-
-			/* Mark sleep intention. */
-
-			rdp->rcu_sched_sleeping = couldsleep;
-
-			spin_unlock_irqrestore(&rdp->lock, flags);
-		}
-
-		/* If we saw callbacks on the last scan, go deal with them. */
-
-		if (!couldsleep)
-			continue;
-
-		/* Attempt to block... */
-
-		spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
-		if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
-
-			/*
-			 * Someone posted a callback after we scanned.
-			 * Go take care of it.
-			 */
-			spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-			couldsleepnext = 0;
-			continue;
-		}
-
-		/* Block until the next person posts a callback. */
-
-		rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
-		spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-		ret = 0; /* unused */
-		__wait_event_interruptible(rcu_ctrlblk.sched_wq,
-			rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
-			ret);
-
-		couldsleepnext = 0;
-
-	} while (!kthread_should_stop());
-
-	return (0);
-}
-
-/*
- * Check to see if any future RCU-related work will need to be done
- * by the current CPU, even if none need be done immediately, returning
- * 1 if so.  Assumes that notifiers would take care of handling any
- * outstanding requests from the RCU core.
- *
- * This function is part of the RCU implementation; it is -not-
- * an exported member of the RCU API.
- */
-int rcu_needs_cpu(int cpu)
-{
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-
-	return (rdp->donelist != NULL ||
-		!!rdp->waitlistcount ||
-		rdp->nextlist != NULL ||
-		rdp->nextschedlist != NULL ||
-		rdp->waitschedlist != NULL);
-}
-
-static int rcu_pending(int cpu)
-{
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-
-	/* The CPU has at least one callback queued somewhere. */
-
-	if (rdp->donelist != NULL ||
-	    !!rdp->waitlistcount ||
-	    rdp->nextlist != NULL ||
-	    rdp->nextschedlist != NULL ||
-	    rdp->waitschedlist != NULL)
-		return 1;
-
-	/* The RCU core needs an acknowledgement from this CPU. */
-
-	if ((per_cpu(rcu_flip_flag, cpu) == rcu_flipped) ||
-	    (per_cpu(rcu_mb_flag, cpu) == rcu_mb_needed))
-		return 1;
-
-	/* This CPU has fallen behind the global grace-period number. */
-
-	if (rdp->completed != rcu_ctrlblk.completed)
-		return 1;
-
-	/* Nothing needed from this CPU. */
-
-	return 0;
-}
-
-int __cpuinit rcu_cpu_notify(struct notifier_block *self,
-			     unsigned long action, void *hcpu)
-{
-	long cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		rcu_online_cpu(cpu);
-		break;
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
-		rcu_offline_cpu(cpu);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
-void __init __rcu_init(void)
-{
-	int cpu;
-	int i;
-	struct rcu_data *rdp;
-
-	printk(KERN_NOTICE "Preemptible RCU implementation.\n");
-	for_each_possible_cpu(cpu) {
-		rdp = RCU_DATA_CPU(cpu);
-		spin_lock_init(&rdp->lock);
-		rdp->completed = 0;
-		rdp->waitlistcount = 0;
-		rdp->nextlist = NULL;
-		rdp->nexttail = &rdp->nextlist;
-		for (i = 0; i < GP_STAGES; i++) {
-			rdp->waitlist[i] = NULL;
-			rdp->waittail[i] = &rdp->waitlist[i];
-		}
-		rdp->donelist = NULL;
-		rdp->donetail = &rdp->donelist;
-		rdp->rcu_flipctr[0] = 0;
-		rdp->rcu_flipctr[1] = 0;
-		rdp->nextschedlist = NULL;
-		rdp->nextschedtail = &rdp->nextschedlist;
-		rdp->waitschedlist = NULL;
-		rdp->waitschedtail = &rdp->waitschedlist;
-		rdp->rcu_sched_sleeping = 0;
-	}
-	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
-}
-
-/*
- * Late-boot-time RCU initialization that must wait until after scheduler
- * has been initialized.
- */
-void __init rcu_init_sched(void)
-{
-	rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
-						  NULL,
-						  "rcu_sched_grace_period");
-	WARN_ON(IS_ERR(rcu_sched_grace_period_task));
-}
-
-#ifdef CONFIG_RCU_TRACE
-long *rcupreempt_flipctr(int cpu)
-{
-	return &RCU_DATA_CPU(cpu)->rcu_flipctr[0];
-}
-EXPORT_SYMBOL_GPL(rcupreempt_flipctr);
-
-int rcupreempt_flip_flag(int cpu)
-{
-	return per_cpu(rcu_flip_flag, cpu);
-}
-EXPORT_SYMBOL_GPL(rcupreempt_flip_flag);
-
-int rcupreempt_mb_flag(int cpu)
-{
-	return per_cpu(rcu_mb_flag, cpu);
-}
-EXPORT_SYMBOL_GPL(rcupreempt_mb_flag);
-
-char *rcupreempt_try_flip_state_name(void)
-{
-	return rcu_try_flip_state_names[rcu_ctrlblk.rcu_try_flip_state];
-}
-EXPORT_SYMBOL_GPL(rcupreempt_try_flip_state_name);
-
-struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu)
-{
-	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
-
-	return &rdp->trace;
-}
-EXPORT_SYMBOL_GPL(rcupreempt_trace_cpu);
-
-#endif /* #ifdef RCU_TRACE */
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
deleted file mode 100644
index 11640346a507..000000000000
--- a/kernel/rcupreempt_trace.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Read-Copy Update tracing for realtime implementation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright IBM Corporation, 2006
- *
- * Papers:  http://www.rdrop.com/users/paulmck/RCU
- *
- * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU/ *.txt
- *
- */
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/smp.h>
-#include <linux/rcupdate.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <asm/atomic.h>
-#include <linux/bitops.h>
-#include <linux/module.h>
-#include <linux/completion.h>
-#include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/cpu.h>
-#include <linux/mutex.h>
-#include <linux/rcupreempt_trace.h>
-#include <linux/debugfs.h>
-
-static struct mutex rcupreempt_trace_mutex;
-static char *rcupreempt_trace_buf;
-#define RCUPREEMPT_TRACE_BUF_SIZE 4096
-
-void rcupreempt_trace_move2done(struct rcupreempt_trace *trace)
-{
-	trace->done_length += trace->wait_length;
-	trace->done_add += trace->wait_length;
-	trace->wait_length = 0;
-}
-void rcupreempt_trace_move2wait(struct rcupreempt_trace *trace)
-{
-	trace->wait_length += trace->next_length;
-	trace->wait_add += trace->next_length;
-	trace->next_length = 0;
-}
-void rcupreempt_trace_try_flip_1(struct rcupreempt_trace *trace)
-{
-	atomic_inc(&trace->rcu_try_flip_1);
-}
-void rcupreempt_trace_try_flip_e1(struct rcupreempt_trace *trace)
-{
-	atomic_inc(&trace->rcu_try_flip_e1);
-}
-void rcupreempt_trace_try_flip_i1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_i1++;
-}
-void rcupreempt_trace_try_flip_ie1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_ie1++;
-}
-void rcupreempt_trace_try_flip_g1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_g1++;
-}
-void rcupreempt_trace_try_flip_a1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_a1++;
-}
-void rcupreempt_trace_try_flip_ae1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_ae1++;
-}
-void rcupreempt_trace_try_flip_a2(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_a2++;
-}
-void rcupreempt_trace_try_flip_z1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_z1++;
-}
-void rcupreempt_trace_try_flip_ze1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_ze1++;
-}
-void rcupreempt_trace_try_flip_z2(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_z2++;
-}
-void rcupreempt_trace_try_flip_m1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_m1++;
-}
-void rcupreempt_trace_try_flip_me1(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_me1++;
-}
-void rcupreempt_trace_try_flip_m2(struct rcupreempt_trace *trace)
-{
-	trace->rcu_try_flip_m2++;
-}
-void rcupreempt_trace_check_callbacks(struct rcupreempt_trace *trace)
-{
-	trace->rcu_check_callbacks++;
-}
-void rcupreempt_trace_done_remove(struct rcupreempt_trace *trace)
-{
-	trace->done_remove += trace->done_length;
-	trace->done_length = 0;
-}
-void rcupreempt_trace_invoke(struct rcupreempt_trace *trace)
-{
-	atomic_inc(&trace->done_invoked);
-}
-void rcupreempt_trace_next_add(struct rcupreempt_trace *trace)
-{
-	trace->next_add++;
-	trace->next_length++;
-}
-
-static void rcupreempt_trace_sum(struct rcupreempt_trace *sp)
-{
-	struct rcupreempt_trace *cp;
-	int cpu;
-
-	memset(sp, 0, sizeof(*sp));
-	for_each_possible_cpu(cpu) {
-		cp = rcupreempt_trace_cpu(cpu);
-		sp->next_length += cp->next_length;
-		sp->next_add += cp->next_add;
-		sp->wait_length += cp->wait_length;
-		sp->wait_add += cp->wait_add;
-		sp->done_length += cp->done_length;
-		sp->done_add += cp->done_add;
-		sp->done_remove += cp->done_remove;
-		atomic_add(atomic_read(&cp->done_invoked), &sp->done_invoked);
-		sp->rcu_check_callbacks += cp->rcu_check_callbacks;
-		atomic_add(atomic_read(&cp->rcu_try_flip_1),
-			   &sp->rcu_try_flip_1);
-		atomic_add(atomic_read(&cp->rcu_try_flip_e1),
-			   &sp->rcu_try_flip_e1);
-		sp->rcu_try_flip_i1 += cp->rcu_try_flip_i1;
-		sp->rcu_try_flip_ie1 += cp->rcu_try_flip_ie1;
-		sp->rcu_try_flip_g1 += cp->rcu_try_flip_g1;
-		sp->rcu_try_flip_a1 += cp->rcu_try_flip_a1;
-		sp->rcu_try_flip_ae1 += cp->rcu_try_flip_ae1;
-		sp->rcu_try_flip_a2 += cp->rcu_try_flip_a2;
-		sp->rcu_try_flip_z1 += cp->rcu_try_flip_z1;
-		sp->rcu_try_flip_ze1 += cp->rcu_try_flip_ze1;
-		sp->rcu_try_flip_z2 += cp->rcu_try_flip_z2;
-		sp->rcu_try_flip_m1 += cp->rcu_try_flip_m1;
-		sp->rcu_try_flip_me1 += cp->rcu_try_flip_me1;
-		sp->rcu_try_flip_m2 += cp->rcu_try_flip_m2;
-	}
-}
-
-static ssize_t rcustats_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	struct rcupreempt_trace trace;
-	ssize_t bcount;
-	int cnt = 0;
-
-	rcupreempt_trace_sum(&trace);
-	mutex_lock(&rcupreempt_trace_mutex);
-	snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
-		 "ggp=%ld rcc=%ld\n",
-		 rcu_batches_completed(),
-		 trace.rcu_check_callbacks);
-	snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE - cnt,
-		 "na=%ld nl=%ld wa=%ld wl=%ld da=%ld dl=%ld dr=%ld di=%d\n"
-		 "1=%d e1=%d i1=%ld ie1=%ld g1=%ld a1=%ld ae1=%ld a2=%ld\n"
-		 "z1=%ld ze1=%ld z2=%ld m1=%ld me1=%ld m2=%ld\n",
-
-		 trace.next_add, trace.next_length,
-		 trace.wait_add, trace.wait_length,
-		 trace.done_add, trace.done_length,
-		 trace.done_remove, atomic_read(&trace.done_invoked),
-		 atomic_read(&trace.rcu_try_flip_1),
-		 atomic_read(&trace.rcu_try_flip_e1),
-		 trace.rcu_try_flip_i1, trace.rcu_try_flip_ie1,
-		 trace.rcu_try_flip_g1,
-		 trace.rcu_try_flip_a1, trace.rcu_try_flip_ae1,
-			 trace.rcu_try_flip_a2,
-		 trace.rcu_try_flip_z1, trace.rcu_try_flip_ze1,
-			 trace.rcu_try_flip_z2,
-		 trace.rcu_try_flip_m1, trace.rcu_try_flip_me1,
-			trace.rcu_try_flip_m2);
-	bcount = simple_read_from_buffer(buffer, count, ppos,
-			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
-	mutex_unlock(&rcupreempt_trace_mutex);
-	return bcount;
-}
-
-static ssize_t rcugp_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	long oldgp = rcu_batches_completed();
-	ssize_t bcount;
-
-	mutex_lock(&rcupreempt_trace_mutex);
-	synchronize_rcu();
-	snprintf(rcupreempt_trace_buf, RCUPREEMPT_TRACE_BUF_SIZE,
-		"oldggp=%ld  newggp=%ld\n", oldgp, rcu_batches_completed());
-	bcount = simple_read_from_buffer(buffer, count, ppos,
-			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
-	mutex_unlock(&rcupreempt_trace_mutex);
-	return bcount;
-}
-
-static ssize_t rcuctrs_read(struct file *filp, char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	int cnt = 0;
-	int cpu;
-	int f = rcu_batches_completed() & 0x1;
-	ssize_t bcount;
-
-	mutex_lock(&rcupreempt_trace_mutex);
-
-	cnt += snprintf(&rcupreempt_trace_buf[cnt], RCUPREEMPT_TRACE_BUF_SIZE,
-				"CPU last cur F M\n");
-	for_each_possible_cpu(cpu) {
-		long *flipctr = rcupreempt_flipctr(cpu);
-		cnt += snprintf(&rcupreempt_trace_buf[cnt],
-				RCUPREEMPT_TRACE_BUF_SIZE - cnt,
-					"%3d%c %4ld %3ld %d %d\n",
-			       cpu,
-			       cpu_is_offline(cpu) ? '!' : ' ',
-			       flipctr[!f],
-			       flipctr[f],
-			       rcupreempt_flip_flag(cpu),
-			       rcupreempt_mb_flag(cpu));
-	}
-	cnt += snprintf(&rcupreempt_trace_buf[cnt],
-			RCUPREEMPT_TRACE_BUF_SIZE - cnt,
-			"ggp = %ld, state = %s\n",
-			rcu_batches_completed(),
-			rcupreempt_try_flip_state_name());
-	cnt += snprintf(&rcupreempt_trace_buf[cnt],
-			RCUPREEMPT_TRACE_BUF_SIZE - cnt,
-			"\n");
-	bcount = simple_read_from_buffer(buffer, count, ppos,
-			rcupreempt_trace_buf, strlen(rcupreempt_trace_buf));
-	mutex_unlock(&rcupreempt_trace_mutex);
-	return bcount;
-}
-
-static struct file_operations rcustats_fops = {
-	.owner = THIS_MODULE,
-	.read = rcustats_read,
-};
-
-static struct file_operations rcugp_fops = {
-	.owner = THIS_MODULE,
-	.read = rcugp_read,
-};
-
-static struct file_operations rcuctrs_fops = {
-	.owner = THIS_MODULE,
-	.read = rcuctrs_read,
-};
-
-static struct dentry *rcudir, *statdir, *ctrsdir, *gpdir;
-static int rcupreempt_debugfs_init(void)
-{
-	rcudir = debugfs_create_dir("rcu", NULL);
-	if (!rcudir)
-		goto out;
-	statdir = debugfs_create_file("rcustats", 0444, rcudir,
-						NULL, &rcustats_fops);
-	if (!statdir)
-		goto free_out;
-
-	gpdir = debugfs_create_file("rcugp", 0444, rcudir, NULL, &rcugp_fops);
-	if (!gpdir)
-		goto free_out;
-
-	ctrsdir = debugfs_create_file("rcuctrs", 0444, rcudir,
-						NULL, &rcuctrs_fops);
-	if (!ctrsdir)
-		goto free_out;
-	return 0;
-free_out:
-	if (statdir)
-		debugfs_remove(statdir);
-	if (gpdir)
-		debugfs_remove(gpdir);
-	debugfs_remove(rcudir);
-out:
-	return 1;
-}
-
-static int __init rcupreempt_trace_init(void)
-{
-	int ret;
-
-	mutex_init(&rcupreempt_trace_mutex);
-	rcupreempt_trace_buf = kmalloc(RCUPREEMPT_TRACE_BUF_SIZE, GFP_KERNEL);
-	if (!rcupreempt_trace_buf)
-		return 1;
-	ret = rcupreempt_debugfs_init();
-	if (ret)
-		kfree(rcupreempt_trace_buf);
-	return ret;
-}
-
-static void __exit rcupreempt_trace_cleanup(void)
-{
-	debugfs_remove(statdir);
-	debugfs_remove(gpdir);
-	debugfs_remove(ctrsdir);
-	debugfs_remove(rcudir);
-	kfree(rcupreempt_trace_buf);
-}
-
-
-module_init(rcupreempt_trace_init);
-module_exit(rcupreempt_trace_cleanup);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f87fb0c8f924..82fbc49728df 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -725,7 +725,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 
 config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
-	depends on CLASSIC_RCU || TREE_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || TREE_PREEMPT_RCU
 	default n
 	help
 	  This option causes RCU to printk information on which
-- 
cgit v1.2.3


From 70041088e3b976627ba9a183b812f39ef8a9ba0e Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Fri, 21 Aug 2009 12:28:31 +0000
Subject: RDS: Add TCP transport to RDS

This code allows RDS to be tunneled over a TCP connection.

RDMA operations are disabled when using TCP transport,
but this frees RDS from the IB/RDMA stack dependency, and allows
it to be used with standard Ethernet adapters, or in a VM.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rds.h   |  12 ++
 net/rds/tcp.c         | 319 ++++++++++++++++++++++++++++++++++++++++++++
 net/rds/tcp.h         |  93 +++++++++++++
 net/rds/tcp_connect.c | 153 ++++++++++++++++++++++
 net/rds/tcp_listen.c  | 199 ++++++++++++++++++++++++++++
 net/rds/tcp_recv.c    | 356 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/rds/tcp_send.c    | 263 +++++++++++++++++++++++++++++++++++++
 net/rds/tcp_stats.c   |  74 +++++++++++
 8 files changed, 1469 insertions(+)
 create mode 100644 net/rds/tcp.c
 create mode 100644 net/rds/tcp.h
 create mode 100644 net/rds/tcp_connect.c
 create mode 100644 net/rds/tcp_listen.c
 create mode 100644 net/rds/tcp_recv.c
 create mode 100644 net/rds/tcp_send.c
 create mode 100644 net/rds/tcp_stats.c

(limited to 'include')

diff --git a/include/linux/rds.h b/include/linux/rds.h
index d91dc91f5443..89d46e1afbb1 100644
--- a/include/linux/rds.h
+++ b/include/linux/rds.h
@@ -147,6 +147,18 @@ struct rds_info_socket {
 	u_int64_t	inum;
 } __attribute__((packed));
 
+struct rds_info_tcp_socket {
+	__be32          local_addr;
+	__be16          local_port;
+	__be32          peer_addr;
+	__be16          peer_port;
+	u_int64_t       hdr_rem;
+	u_int64_t       data_rem;
+	u_int32_t       last_sent_nxt;
+	u_int32_t       last_expected_una;
+	u_int32_t       last_seen_una;
+} __attribute__((packed));
+
 #define RDS_IB_GID_LEN	16
 struct rds_info_rdma_connection {
 	__be32		src_addr;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
new file mode 100644
index 000000000000..e0ac9009db1a
--- /dev/null
+++ b/net/rds/tcp.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <net/tcp.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+/* only for info exporting */
+static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
+static LIST_HEAD(rds_tcp_tc_list);
+unsigned int rds_tcp_tc_count;
+
+/* Track rds_tcp_connection structs so they can be cleaned up */
+static DEFINE_SPINLOCK(rds_tcp_conn_lock);
+static LIST_HEAD(rds_tcp_conn_list);
+
+static struct kmem_cache *rds_tcp_conn_slab;
+
+#define RDS_TCP_DEFAULT_BUFSIZE (128 * 1024)
+
+/* doing it this way avoids calling tcp_sk() */
+void rds_tcp_nonagle(struct socket *sock)
+{
+	mm_segment_t oldfs = get_fs();
+	int val = 1;
+
+	set_fs(KERNEL_DS);
+	sock->ops->setsockopt(sock, SOL_TCP, TCP_NODELAY, (char __user *)&val,
+			      sizeof(val));
+	set_fs(oldfs);
+}
+
+void rds_tcp_tune(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+
+	rds_tcp_nonagle(sock);
+
+	/*
+	 * We're trying to saturate gigabit with the default,
+	 * see svc_sock_setbufsize().
+	 */
+	lock_sock(sk);
+	sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
+	sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
+	sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
+	release_sock(sk);
+}
+
+u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
+{
+	return tcp_sk(tc->t_sock->sk)->snd_nxt;
+}
+
+u32 rds_tcp_snd_una(struct rds_tcp_connection *tc)
+{
+	return tcp_sk(tc->t_sock->sk)->snd_una;
+}
+
+void rds_tcp_restore_callbacks(struct socket *sock,
+			       struct rds_tcp_connection *tc)
+{
+	rdsdebug("restoring sock %p callbacks from tc %p\n", sock, tc);
+	write_lock_bh(&sock->sk->sk_callback_lock);
+
+	/* done under the callback_lock to serialize with write_space */
+	spin_lock(&rds_tcp_tc_list_lock);
+	list_del_init(&tc->t_list_item);
+	rds_tcp_tc_count--;
+	spin_unlock(&rds_tcp_tc_list_lock);
+
+	tc->t_sock = NULL;
+
+	sock->sk->sk_write_space = tc->t_orig_write_space;
+	sock->sk->sk_data_ready = tc->t_orig_data_ready;
+	sock->sk->sk_state_change = tc->t_orig_state_change;
+	sock->sk->sk_user_data = NULL;
+
+	write_unlock_bh(&sock->sk->sk_callback_lock);
+}
+
+/*
+ * This is the only path that sets tc->t_sock.  Send and receive trust that
+ * it is set.  The RDS_CONN_CONNECTED bit protects those paths from being
+ * called while it isn't set.
+ */
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+
+	rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);
+	write_lock_bh(&sock->sk->sk_callback_lock);
+
+	/* done under the callback_lock to serialize with write_space */
+	spin_lock(&rds_tcp_tc_list_lock);
+	list_add_tail(&tc->t_list_item, &rds_tcp_tc_list);
+	rds_tcp_tc_count++;
+	spin_unlock(&rds_tcp_tc_list_lock);
+
+	/* accepted sockets need our listen data ready undone */
+	if (sock->sk->sk_data_ready == rds_tcp_listen_data_ready)
+		sock->sk->sk_data_ready = sock->sk->sk_user_data;
+
+	tc->t_sock = sock;
+	tc->conn = conn;
+	tc->t_orig_data_ready = sock->sk->sk_data_ready;
+	tc->t_orig_write_space = sock->sk->sk_write_space;
+	tc->t_orig_state_change = sock->sk->sk_state_change;
+
+	sock->sk->sk_user_data = conn;
+	sock->sk->sk_data_ready = rds_tcp_data_ready;
+	sock->sk->sk_write_space = rds_tcp_write_space;
+	sock->sk->sk_state_change = rds_tcp_state_change;
+
+	write_unlock_bh(&sock->sk->sk_callback_lock);
+}
+
+static void rds_tcp_tc_info(struct socket *sock, unsigned int len,
+			    struct rds_info_iterator *iter,
+			    struct rds_info_lengths *lens)
+{
+	struct rds_info_tcp_socket tsinfo;
+	struct rds_tcp_connection *tc;
+	unsigned long flags;
+	struct sockaddr_in sin;
+	int sinlen;
+
+	spin_lock_irqsave(&rds_tcp_tc_list_lock, flags);
+
+	if (len / sizeof(tsinfo) < rds_tcp_tc_count)
+		goto out;
+
+	list_for_each_entry(tc, &rds_tcp_tc_list, t_list_item) {
+
+		sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 0);
+		tsinfo.local_addr = sin.sin_addr.s_addr;
+		tsinfo.local_port = sin.sin_port;
+		sock->ops->getname(sock, (struct sockaddr *)&sin, &sinlen, 1);
+		tsinfo.peer_addr = sin.sin_addr.s_addr;
+		tsinfo.peer_port = sin.sin_port;
+
+		tsinfo.hdr_rem = tc->t_tinc_hdr_rem;
+		tsinfo.data_rem = tc->t_tinc_data_rem;
+		tsinfo.last_sent_nxt = tc->t_last_sent_nxt;
+		tsinfo.last_expected_una = tc->t_last_expected_una;
+		tsinfo.last_seen_una = tc->t_last_seen_una;
+
+		rds_info_copy(iter, &tsinfo, sizeof(tsinfo));
+	}
+
+out:
+	lens->nr = rds_tcp_tc_count;
+	lens->each = sizeof(tsinfo);
+
+	spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
+}
+
+static int rds_tcp_laddr_check(__be32 addr)
+{
+	if (inet_addr_type(&init_net, addr) == RTN_LOCAL)
+		return 0;
+	return -EADDRNOTAVAIL;
+}
+
+static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
+{
+	struct rds_tcp_connection *tc;
+
+	tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
+	if (tc == NULL)
+		return -ENOMEM;
+
+	tc->t_sock = NULL;
+	tc->t_tinc = NULL;
+	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+	tc->t_tinc_data_rem = 0;
+
+	conn->c_transport_data = tc;
+
+	spin_lock_irq(&rds_tcp_conn_lock);
+	list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list);
+	spin_unlock_irq(&rds_tcp_conn_lock);
+
+	rdsdebug("alloced tc %p\n", conn->c_transport_data);
+	return 0;
+}
+
+static void rds_tcp_conn_free(void *arg)
+{
+	struct rds_tcp_connection *tc = arg;
+	rdsdebug("freeing tc %p\n", tc);
+	kmem_cache_free(rds_tcp_conn_slab, tc);
+}
+
+static void rds_tcp_destroy_conns(void)
+{
+	struct rds_tcp_connection *tc, *_tc;
+	LIST_HEAD(tmp_list);
+
+	/* avoid calling conn_destroy with irqs off */
+	spin_lock_irq(&rds_tcp_conn_lock);
+	list_splice(&rds_tcp_conn_list, &tmp_list);
+	INIT_LIST_HEAD(&rds_tcp_conn_list);
+	spin_unlock_irq(&rds_tcp_conn_lock);
+
+	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
+		if (tc->conn->c_passive)
+			rds_conn_destroy(tc->conn->c_passive);
+		rds_conn_destroy(tc->conn);
+	}
+}
+
+void rds_tcp_exit(void)
+{
+	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+	rds_tcp_listen_stop();
+	rds_tcp_destroy_conns();
+	rds_trans_unregister(&rds_tcp_transport);
+	rds_tcp_recv_exit();
+	kmem_cache_destroy(rds_tcp_conn_slab);
+}
+module_exit(rds_tcp_exit);
+
+struct rds_transport rds_tcp_transport = {
+	.laddr_check		= rds_tcp_laddr_check,
+	.xmit_prepare		= rds_tcp_xmit_prepare,
+	.xmit_complete		= rds_tcp_xmit_complete,
+	.xmit_cong_map		= rds_tcp_xmit_cong_map,
+	.xmit			= rds_tcp_xmit,
+	.recv			= rds_tcp_recv,
+	.conn_alloc		= rds_tcp_conn_alloc,
+	.conn_free		= rds_tcp_conn_free,
+	.conn_connect		= rds_tcp_conn_connect,
+	.conn_shutdown		= rds_tcp_conn_shutdown,
+	.inc_copy_to_user	= rds_tcp_inc_copy_to_user,
+	.inc_purge		= rds_tcp_inc_purge,
+	.inc_free		= rds_tcp_inc_free,
+	.stats_info_copy	= rds_tcp_stats_info_copy,
+	.exit			= rds_tcp_exit,
+	.t_owner		= THIS_MODULE,
+	.t_name			= "tcp",
+	.t_prefer_loopback	= 1,
+};
+
+int __init rds_tcp_init(void)
+{
+	int ret;
+
+	rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
+					      sizeof(struct rds_tcp_connection),
+					      0, 0, NULL);
+	if (rds_tcp_conn_slab == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = rds_tcp_recv_init();
+	if (ret)
+		goto out_slab;
+
+	ret = rds_trans_register(&rds_tcp_transport);
+	if (ret)
+		goto out_recv;
+
+	ret = rds_tcp_listen_init();
+	if (ret)
+		goto out_register;
+
+	rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+
+	goto out;
+
+out_register:
+	rds_trans_unregister(&rds_tcp_transport);
+out_recv:
+	rds_tcp_recv_exit();
+out_slab:
+	kmem_cache_destroy(rds_tcp_conn_slab);
+out:
+	return ret;
+}
+module_init(rds_tcp_init);
+
+MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
+MODULE_DESCRIPTION("RDS: TCP transport");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
new file mode 100644
index 000000000000..844fa6b9cf5a
--- /dev/null
+++ b/net/rds/tcp.h
@@ -0,0 +1,93 @@
+#ifndef _RDS_TCP_H
+#define _RDS_TCP_H
+
+#define RDS_TCP_PORT	16385
+
+struct rds_tcp_incoming {
+	struct rds_incoming	ti_inc;
+	struct sk_buff_head	ti_skb_list;
+};
+
+struct rds_tcp_connection {
+
+	struct list_head	t_tcp_node;
+	struct rds_connection   *conn;
+	struct socket		*t_sock;
+	void			*t_orig_write_space;
+	void			*t_orig_data_ready;
+	void			*t_orig_state_change;
+
+	struct rds_tcp_incoming	*t_tinc;
+	size_t			t_tinc_hdr_rem;
+	size_t			t_tinc_data_rem;
+
+	/* XXX error report? */
+	struct work_struct	t_conn_w;
+	struct work_struct	t_send_w;
+	struct work_struct	t_down_w;
+	struct work_struct	t_recv_w;
+
+	/* for info exporting only */
+	struct list_head	t_list_item;
+	u32			t_last_sent_nxt;
+	u32			t_last_expected_una;
+	u32			t_last_seen_una;
+};
+
+struct rds_tcp_statistics {
+	uint64_t	s_tcp_data_ready_calls;
+	uint64_t	s_tcp_write_space_calls;
+	uint64_t	s_tcp_sndbuf_full;
+	uint64_t	s_tcp_connect_raced;
+	uint64_t	s_tcp_listen_closed_stale;
+};
+
+/* tcp.c */
+int __init rds_tcp_init(void);
+void rds_tcp_exit(void);
+void rds_tcp_tune(struct socket *sock);
+void rds_tcp_nonagle(struct socket *sock);
+void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
+void rds_tcp_restore_callbacks(struct socket *sock,
+			       struct rds_tcp_connection *tc);
+u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
+u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
+u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
+extern struct rds_transport rds_tcp_transport;
+
+/* tcp_connect.c */
+int rds_tcp_conn_connect(struct rds_connection *conn);
+void rds_tcp_conn_shutdown(struct rds_connection *conn);
+void rds_tcp_state_change(struct sock *sk);
+
+/* tcp_listen.c */
+int __init rds_tcp_listen_init(void);
+void rds_tcp_listen_stop(void);
+void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
+
+/* tcp_recv.c */
+int __init rds_tcp_recv_init(void);
+void rds_tcp_recv_exit(void);
+void rds_tcp_data_ready(struct sock *sk, int bytes);
+int rds_tcp_recv(struct rds_connection *conn);
+void rds_tcp_inc_purge(struct rds_incoming *inc);
+void rds_tcp_inc_free(struct rds_incoming *inc);
+int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
+			     size_t size);
+
+/* tcp_send.c */
+void rds_tcp_xmit_prepare(struct rds_connection *conn);
+void rds_tcp_xmit_complete(struct rds_connection *conn);
+int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
+	         unsigned int hdr_off, unsigned int sg, unsigned int off);
+void rds_tcp_write_space(struct sock *sk);
+int rds_tcp_xmit_cong_map(struct rds_connection *conn,
+			  struct rds_cong_map *map, unsigned long offset);
+
+/* tcp_stats.c */
+DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
+#define rds_tcp_stats_inc(member) rds_stats_inc_which(rds_tcp_stats, member)
+unsigned int rds_tcp_stats_info_copy(struct rds_info_iterator *iter,
+				     unsigned int avail);
+
+#endif
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
new file mode 100644
index 000000000000..211522f9a9a2
--- /dev/null
+++ b/net/rds/tcp_connect.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <net/tcp.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+void rds_tcp_state_change(struct sock *sk)
+{
+	void (*state_change)(struct sock *sk);
+	struct rds_connection *conn;
+	struct rds_tcp_connection *tc;
+
+	read_lock(&sk->sk_callback_lock);
+	conn = sk->sk_user_data;
+	if (conn == NULL) {
+		state_change = sk->sk_state_change;
+		goto out;
+	}
+	tc = conn->c_transport_data;
+	state_change = tc->t_orig_state_change;
+
+	rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state);
+
+	switch(sk->sk_state) {
+		/* ignore connecting sockets as they make progress */
+		case TCP_SYN_SENT:
+		case TCP_SYN_RECV:
+			break;
+		case TCP_ESTABLISHED:
+			rds_connect_complete(conn);
+			break;
+		case TCP_CLOSE:
+			rds_conn_drop(conn);
+		default:
+			break;
+	}
+out:
+	read_unlock(&sk->sk_callback_lock);
+	state_change(sk);
+}
+
+int rds_tcp_conn_connect(struct rds_connection *conn)
+{
+	struct socket *sock = NULL;
+	struct sockaddr_in src, dest;
+	int ret;
+
+	ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (ret < 0)
+		goto out;
+
+	rds_tcp_tune(sock);
+
+	src.sin_family = AF_INET;
+	src.sin_addr.s_addr = (__force u32)conn->c_laddr;
+	src.sin_port = (__force u16)htons(0);
+
+	ret = sock->ops->bind(sock, (struct sockaddr *)&src, sizeof(src));
+	if (ret) {
+		rdsdebug("bind failed with %d at address %u.%u.%u.%u\n",
+		     ret, NIPQUAD(conn->c_laddr));
+		goto out;
+	}
+
+	dest.sin_family = AF_INET;
+	dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
+	dest.sin_port = (__force u16)htons(RDS_TCP_PORT);
+
+	/*
+	 * once we call connect() we can start getting callbacks and they
+	 * own the socket
+	 */
+	rds_tcp_set_callbacks(sock, conn);
+	ret = sock->ops->connect(sock, (struct sockaddr *)&dest, sizeof(dest),
+				 O_NONBLOCK);
+	sock = NULL;
+
+	rdsdebug("connect to address %u.%u.%u.%u returned %d\n",
+		 NIPQUAD(conn->c_faddr), ret);
+	if (ret == -EINPROGRESS)
+		ret = 0;
+
+out:
+	if (sock)
+		sock_release(sock);
+	return ret;
+}
+
+/*
+ * Before killing the tcp socket this needs to serialize with callbacks.  The
+ * caller has already grabbed the sending sem so we're serialized with other
+ * senders.
+ *
+ * TCP calls the callbacks with the sock lock so we hold it while we reset the
+ * callbacks to those set by TCP.  Our callbacks won't execute again once we
+ * hold the sock lock.
+ */
+void rds_tcp_conn_shutdown(struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct socket *sock = tc->t_sock;
+
+	rdsdebug("shutting down conn %p tc %p sock %p\n", conn, tc, sock);
+
+	if (sock) {
+		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
+		lock_sock(sock->sk);
+		rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
+
+		release_sock(sock->sk);
+		sock_release(sock);
+	};
+
+	if (tc->t_tinc) {
+		rds_inc_put(&tc->t_tinc->ti_inc);
+		tc->t_tinc = NULL;
+	}
+	tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+	tc->t_tinc_data_rem = 0;
+}
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
new file mode 100644
index 000000000000..24b743eb0b1b
--- /dev/null
+++ b/net/rds/tcp_listen.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <net/tcp.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+/*
+ * cheesy, but simple..
+ */
+static void rds_tcp_accept_worker(struct work_struct *work);
+static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker);
+static struct socket *rds_tcp_listen_sock;
+
+static int rds_tcp_accept_one(struct socket *sock)
+{
+	struct socket *new_sock = NULL;
+	struct rds_connection *conn;
+	int ret;
+	struct inet_sock *inet;
+
+	ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
+			       sock->sk->sk_protocol, &new_sock);
+	if (ret)
+		goto out;
+
+	new_sock->type = sock->type;
+	new_sock->ops = sock->ops;
+	ret = sock->ops->accept(sock, new_sock, O_NONBLOCK);
+	if (ret < 0)
+		goto out;
+
+	rds_tcp_tune(new_sock);
+
+	inet = inet_sk(new_sock->sk);
+
+	rdsdebug("accepted tcp %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n",
+		  NIPQUAD(inet->saddr), ntohs(inet->sport),
+		  NIPQUAD(inet->daddr), ntohs(inet->dport));
+
+	conn = rds_conn_create(inet->saddr, inet->daddr, &rds_tcp_transport,
+			       GFP_KERNEL);
+	if (IS_ERR(conn)) {
+		ret = PTR_ERR(conn);
+		goto out;
+	}
+
+	/*
+	 * see the comment above rds_queue_delayed_reconnect()
+	 */
+	if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
+		if (rds_conn_state(conn) == RDS_CONN_UP)
+			rds_tcp_stats_inc(s_tcp_listen_closed_stale);
+		else
+			rds_tcp_stats_inc(s_tcp_connect_raced);
+		rds_conn_drop(conn);
+		ret = 0;
+		goto out;
+	}
+
+	rds_tcp_set_callbacks(new_sock, conn);
+	rds_connect_complete(conn);
+	new_sock = NULL;
+	ret = 0;
+
+out:
+	if (new_sock)
+		sock_release(new_sock);
+	return ret;
+}
+
+static void rds_tcp_accept_worker(struct work_struct *work)
+{
+	while (rds_tcp_accept_one(rds_tcp_listen_sock) == 0)
+		cond_resched();
+}
+
+void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
+{
+	void (*ready)(struct sock *sk, int bytes);
+
+	rdsdebug("listen data ready sk %p\n", sk);
+
+	read_lock(&sk->sk_callback_lock);
+	ready = sk->sk_user_data;
+	if (ready == NULL) { /* check for teardown race */
+		ready = sk->sk_data_ready;
+		goto out;
+	}
+
+	/*
+	 * ->sk_data_ready is also called for a newly established child socket
+	 * before it has been accepted and the accepter has set up their
+	 * data_ready.. we only want to queue listen work for our listening
+	 * socket
+	 */
+	if (sk->sk_state == TCP_LISTEN)
+		queue_work(rds_wq, &rds_tcp_listen_work);
+
+out:
+	read_unlock(&sk->sk_callback_lock);
+	ready(sk, bytes);
+}
+
+int __init rds_tcp_listen_init(void)
+{
+	struct sockaddr_in sin;
+	struct socket *sock = NULL;
+	int ret;
+
+	ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+	if (ret < 0)
+		goto out;
+
+	sock->sk->sk_reuse = 1;
+	rds_tcp_nonagle(sock);
+
+	write_lock_bh(&sock->sk->sk_callback_lock);
+	sock->sk->sk_user_data = sock->sk->sk_data_ready;
+	sock->sk->sk_data_ready = rds_tcp_listen_data_ready;
+	write_unlock_bh(&sock->sk->sk_callback_lock);
+
+	sin.sin_family = PF_INET,
+	sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
+	sin.sin_port = (__force u16)htons(RDS_TCP_PORT);
+
+	ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+	if (ret < 0)
+		goto out;
+
+	ret = sock->ops->listen(sock, 64);
+	if (ret < 0)
+		goto out;
+
+	rds_tcp_listen_sock = sock;
+	sock = NULL;
+out:
+	if (sock)
+		sock_release(sock);
+	return ret;
+}
+
+void rds_tcp_listen_stop(void)
+{
+	struct socket *sock = rds_tcp_listen_sock;
+	struct sock *sk;
+
+	if (sock == NULL)
+		return;
+
+	sk = sock->sk;
+
+	/* serialize with and prevent further callbacks */
+	lock_sock(sk);
+	write_lock_bh(&sk->sk_callback_lock);
+	if (sk->sk_user_data) {
+		sk->sk_data_ready = sk->sk_user_data;
+		sk->sk_user_data = NULL;
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+	release_sock(sk);
+
+	/* wait for accepts to stop and close the socket */
+	flush_workqueue(rds_wq);
+	sock_release(sock);
+	rds_tcp_listen_sock = NULL;
+}
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
new file mode 100644
index 000000000000..c00dafffbb5a
--- /dev/null
+++ b/net/rds/tcp_recv.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <net/tcp.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+static struct kmem_cache *rds_tcp_incoming_slab;
+
+void rds_tcp_inc_purge(struct rds_incoming *inc)
+{
+	struct rds_tcp_incoming *tinc;
+	tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
+	rdsdebug("purging tinc %p inc %p\n", tinc, inc);
+	skb_queue_purge(&tinc->ti_skb_list);
+}
+
+void rds_tcp_inc_free(struct rds_incoming *inc)
+{
+	struct rds_tcp_incoming *tinc;
+	tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
+	rds_tcp_inc_purge(inc);
+	rdsdebug("freeing tinc %p inc %p\n", tinc, inc);
+	kmem_cache_free(rds_tcp_incoming_slab, tinc);
+}
+
+/*
+ * this is pretty lame, but, whatever.
+ */
+int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
+			     size_t size)
+{
+	struct rds_tcp_incoming *tinc;
+	struct iovec *iov, tmp;
+	struct sk_buff *skb;
+	unsigned long to_copy, skb_off;
+	int ret = 0;
+
+	if (size == 0)
+		goto out;
+
+	tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
+	iov = first_iov;
+	tmp = *iov;
+
+	skb_queue_walk(&tinc->ti_skb_list, skb) {
+		skb_off = 0;
+		while (skb_off < skb->len) {
+			while (tmp.iov_len == 0) {
+				iov++;
+				tmp = *iov;
+			}
+
+			to_copy = min(tmp.iov_len, size);
+			to_copy = min(to_copy, skb->len - skb_off);
+
+			rdsdebug("ret %d size %zu skb %p skb_off %lu "
+				 "skblen %d iov_base %p iov_len %zu cpy %lu\n",
+				 ret, size, skb, skb_off, skb->len,
+				 tmp.iov_base, tmp.iov_len, to_copy);
+
+			/* modifies tmp as it copies */
+			if (skb_copy_datagram_iovec(skb, skb_off, &tmp,
+						    to_copy)) {
+				ret = -EFAULT;
+				goto out;
+			}
+
+			size -= to_copy;
+			ret += to_copy;
+			skb_off += to_copy;
+			if (size == 0)
+				goto out;
+		}
+	}
+out:
+	return ret;
+}
+
+/*
+ * We have a series of skbs that have fragmented pieces of the congestion
+ * bitmap.  They must add up to the exact size of the congestion bitmap.  We
+ * use the skb helpers to copy those into the pages that make up the in-memory
+ * congestion bitmap for the remote address of this connection.  We then tell
+ * the congestion core that the bitmap has been changed so that it can wake up
+ * sleepers.
+ *
+ * This is racing with sending paths which are using test_bit to see if the
+ * bitmap indicates that their recipient is congested.
+ */
+
+static void rds_tcp_cong_recv(struct rds_connection *conn,
+			      struct rds_tcp_incoming *tinc)
+{
+	struct sk_buff *skb;
+	unsigned int to_copy, skb_off;
+	unsigned int map_off;
+	unsigned int map_page;
+	struct rds_cong_map *map;
+	int ret;
+
+	/* catch completely corrupt packets */
+	if (be32_to_cpu(tinc->ti_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
+		return;
+
+	map_page = 0;
+	map_off = 0;
+	map = conn->c_fcong;
+
+	skb_queue_walk(&tinc->ti_skb_list, skb) {
+		skb_off = 0;
+		while (skb_off < skb->len) {
+			to_copy = min_t(unsigned int, PAGE_SIZE - map_off,
+					skb->len - skb_off);
+
+			BUG_ON(map_page >= RDS_CONG_MAP_PAGES);
+
+			/* only returns 0 or -error */
+			ret = skb_copy_bits(skb, skb_off,
+				(void *)map->m_page_addrs[map_page] + map_off,
+				to_copy);
+			BUG_ON(ret != 0);
+
+			skb_off += to_copy;
+			map_off += to_copy;
+			if (map_off == PAGE_SIZE) {
+				map_off = 0;
+				map_page++;
+			}
+		}
+	}
+
+	rds_cong_map_updated(map, ~(u64) 0);
+}
+
+struct rds_tcp_desc_arg {
+	struct rds_connection *conn;
+	gfp_t gfp;
+	enum km_type km;
+};
+
+static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
+			     unsigned int offset, size_t len)
+{
+	struct rds_tcp_desc_arg *arg = desc->arg.data;
+	struct rds_connection *conn = arg->conn;
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct rds_tcp_incoming *tinc = tc->t_tinc;
+	struct sk_buff *clone;
+	size_t left = len, to_copy;
+
+	rdsdebug("tcp data tc %p skb %p offset %u len %zu\n", tc, skb, offset,
+		 len);
+
+	/*
+	 * tcp_read_sock() interprets partial progress as an indication to stop
+	 * processing.
+	 */
+	while (left) {
+		if (tinc == NULL) {
+			tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
+					        arg->gfp);
+			if (tinc == NULL) {
+				desc->error = -ENOMEM;
+				goto out;
+			}
+			tc->t_tinc = tinc;
+			rdsdebug("alloced tinc %p\n", tinc);
+			rds_inc_init(&tinc->ti_inc, conn, conn->c_faddr);
+			/*
+			 * XXX * we might be able to use the __ variants when
+			 * we've already serialized at a higher level.
+			 */
+			skb_queue_head_init(&tinc->ti_skb_list);
+		}
+
+		if (left && tc->t_tinc_hdr_rem) {
+			to_copy = min(tc->t_tinc_hdr_rem, left);
+			rdsdebug("copying %zu header from skb %p\n", to_copy,
+				 skb);
+			skb_copy_bits(skb, offset,
+				      (char *)&tinc->ti_inc.i_hdr +
+						sizeof(struct rds_header) -
+						tc->t_tinc_hdr_rem,
+				      to_copy);
+			tc->t_tinc_hdr_rem -= to_copy;
+			left -= to_copy;
+			offset += to_copy;
+
+			if (tc->t_tinc_hdr_rem == 0) {
+				/* could be 0 for a 0 len message */
+				tc->t_tinc_data_rem =
+					be32_to_cpu(tinc->ti_inc.i_hdr.h_len);
+			}
+		}
+
+		if (left && tc->t_tinc_data_rem) {
+			clone = skb_clone(skb, arg->gfp);
+			if (clone == NULL) {
+				desc->error = -ENOMEM;
+				goto out;
+			}
+
+			to_copy = min(tc->t_tinc_data_rem, left);
+			pskb_pull(clone, offset);
+			pskb_trim(clone, to_copy);
+			skb_queue_tail(&tinc->ti_skb_list, clone);
+
+			rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
+				 "clone %p data %p len %d\n",
+				 skb, skb->data, skb->len, offset, to_copy,
+				 clone, clone->data, clone->len);
+
+			tc->t_tinc_data_rem -= to_copy;
+			left -= to_copy;
+			offset += to_copy;
+		}
+
+		if (tc->t_tinc_hdr_rem == 0 && tc->t_tinc_data_rem == 0) {
+			if (tinc->ti_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
+				rds_tcp_cong_recv(conn, tinc);
+			else
+				rds_recv_incoming(conn, conn->c_faddr,
+						  conn->c_laddr, &tinc->ti_inc,
+						  arg->gfp, arg->km);
+
+			tc->t_tinc_hdr_rem = sizeof(struct rds_header);
+			tc->t_tinc_data_rem = 0;
+			tc->t_tinc = NULL;
+			rds_inc_put(&tinc->ti_inc);
+			tinc = NULL;
+		}
+	}
+out:
+	rdsdebug("returning len %zu left %zu skb len %d rx queue depth %d\n",
+		 len, left, skb->len,
+		 skb_queue_len(&tc->t_sock->sk->sk_receive_queue));
+	return len - left;
+}
+
+/* the caller has to hold the sock lock */
+int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, enum km_type km)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct socket *sock = tc->t_sock;
+	read_descriptor_t desc;
+	struct rds_tcp_desc_arg arg;
+
+	/* It's like glib in the kernel! */
+	arg.conn = conn;
+	arg.gfp = gfp;
+	arg.km = km;
+	desc.arg.data = &arg;
+	desc.error = 0;
+	desc.count = 1; /* give more than one skb per call */
+
+	tcp_read_sock(sock->sk, &desc, rds_tcp_data_recv);
+	rdsdebug("tcp_read_sock for tc %p gfp 0x%x returned %d\n", tc, gfp,
+		 desc.error);
+
+	return desc.error;
+}
+
+/*
+ * We hold the sock lock to serialize our rds_tcp_recv->tcp_read_sock from
+ * data_ready.
+ *
+ * if we fail to allocate we're in trouble.. blindly wait some time before
+ * trying again to see if the VM can free up something for us.
+ */
+int rds_tcp_recv(struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	struct socket *sock = tc->t_sock;
+	int ret = 0;
+
+	rdsdebug("recv worker conn %p tc %p sock %p\n", conn, tc, sock);
+
+	lock_sock(sock->sk);
+	ret = rds_tcp_read_sock(conn, GFP_KERNEL, KM_USER0);
+	release_sock(sock->sk);
+
+	return ret;
+}
+
+void rds_tcp_data_ready(struct sock *sk, int bytes)
+{
+	void (*ready)(struct sock *sk, int bytes);
+	struct rds_connection *conn;
+	struct rds_tcp_connection *tc;
+
+	rdsdebug("data ready sk %p bytes %d\n", sk, bytes);
+
+	read_lock(&sk->sk_callback_lock);
+	conn = sk->sk_user_data;
+	if (conn == NULL) { /* check for teardown race */
+		ready = sk->sk_data_ready;
+		goto out;
+	}
+
+	tc = conn->c_transport_data;
+	ready = tc->t_orig_data_ready;
+	rds_tcp_stats_inc(s_tcp_data_ready_calls);
+
+	if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM)
+		queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
+out:
+	read_unlock(&sk->sk_callback_lock);
+	ready(sk, bytes);
+}
+
+int __init rds_tcp_recv_init(void)
+{
+	rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
+					sizeof(struct rds_tcp_incoming),
+					0, 0, NULL);
+	if (rds_tcp_incoming_slab == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void rds_tcp_recv_exit(void)
+{
+	kmem_cache_destroy(rds_tcp_incoming_slab);
+}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
new file mode 100644
index 000000000000..ab545e0cd5d6
--- /dev/null
+++ b/net/rds/tcp_send.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/in.h>
+#include <net/tcp.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+static void rds_tcp_cork(struct socket *sock, int val)
+{
+	mm_segment_t oldfs;
+
+	oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	sock->ops->setsockopt(sock, SOL_TCP, TCP_CORK, (char __user *)&val,
+			      sizeof(val));
+	set_fs(oldfs);
+}
+
+void rds_tcp_xmit_prepare(struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+
+	rds_tcp_cork(tc->t_sock, 1);
+}
+
+void rds_tcp_xmit_complete(struct rds_connection *conn)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+
+	rds_tcp_cork(tc->t_sock, 0);
+}
+
+/* the core send_sem serializes this with other xmit and shutdown */
+int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
+{
+	struct kvec vec = {
+                .iov_base = data,
+                .iov_len = len,
+	};
+        struct msghdr msg = {
+                .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
+        };
+
+	return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len);
+}
+
+/* the core send_sem serializes this with other xmit and shutdown */
+int rds_tcp_xmit_cong_map(struct rds_connection *conn,
+			  struct rds_cong_map *map, unsigned long offset)
+{
+	static struct rds_header rds_tcp_map_header = {
+		.h_flags = RDS_FLAG_CONG_BITMAP,
+	};
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	unsigned long i;
+	int ret;
+	int copied = 0;
+
+	/* Some problem claims cpu_to_be32(constant) isn't a constant. */
+	rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
+
+	if (offset < sizeof(struct rds_header)) {
+		ret = rds_tcp_sendmsg(tc->t_sock,
+				      (void *)&rds_tcp_map_header + offset,
+				      sizeof(struct rds_header) - offset);
+		if (ret <= 0)
+			return ret;
+		offset += ret;
+		copied = ret;
+		if (offset < sizeof(struct rds_header))
+			return ret;
+	}
+
+	offset -= sizeof(struct rds_header);
+	i = offset / PAGE_SIZE;
+	offset = offset % PAGE_SIZE;
+	BUG_ON(i >= RDS_CONG_MAP_PAGES);
+
+	do {
+		ret = tc->t_sock->ops->sendpage(tc->t_sock,
+					virt_to_page(map->m_page_addrs[i]),
+					offset, PAGE_SIZE - offset,
+					MSG_DONTWAIT);
+		if (ret <= 0)
+			break;
+		copied += ret;
+		offset += ret;
+		if (offset == PAGE_SIZE) {
+			offset = 0;
+			i++;
+		}
+	} while (i < RDS_CONG_MAP_PAGES);
+
+        return copied ? copied : ret;
+}
+
+/* the core send_sem serializes this with other xmit and shutdown */
+int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
+	         unsigned int hdr_off, unsigned int sg, unsigned int off)
+{
+	struct rds_tcp_connection *tc = conn->c_transport_data;
+	int done = 0;
+	int ret = 0;
+
+	if (hdr_off == 0) {
+		/*
+		 * m_ack_seq is set to the sequence number of the last byte of
+		 * header and data.  see rds_tcp_is_acked().
+		 */
+		tc->t_last_sent_nxt = rds_tcp_snd_nxt(tc);
+		rm->m_ack_seq = tc->t_last_sent_nxt +
+				sizeof(struct rds_header) +
+				be32_to_cpu(rm->m_inc.i_hdr.h_len) - 1;
+		smp_mb__before_clear_bit();
+		set_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags);
+		tc->t_last_expected_una = rm->m_ack_seq + 1;
+
+		rdsdebug("rm %p tcp nxt %u ack_seq %llu\n",
+			 rm, rds_tcp_snd_nxt(tc),
+			 (unsigned long long)rm->m_ack_seq);
+	}
+
+	if (hdr_off < sizeof(struct rds_header)) {
+		/* see rds_tcp_write_space() */
+		set_bit(SOCK_NOSPACE, &tc->t_sock->sk->sk_socket->flags);
+
+		ret = rds_tcp_sendmsg(tc->t_sock,
+				      (void *)&rm->m_inc.i_hdr + hdr_off,
+				      sizeof(rm->m_inc.i_hdr) - hdr_off);
+		if (ret < 0)
+			goto out;
+		done += ret;
+		if (hdr_off + done != sizeof(struct rds_header))
+			goto out;
+	}
+
+	while (sg < rm->m_nents) {
+		ret = tc->t_sock->ops->sendpage(tc->t_sock,
+						sg_page(&rm->m_sg[sg]),
+						rm->m_sg[sg].offset + off,
+						rm->m_sg[sg].length - off,
+						MSG_DONTWAIT|MSG_NOSIGNAL);
+		rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]),
+			 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off,
+			 ret);
+		if (ret <= 0)
+			break;
+
+		off += ret;
+		done += ret;
+		if (off == rm->m_sg[sg].length) {
+			off = 0;
+			sg++;
+		}
+	}
+
+out:
+	if (ret <= 0) {
+		/* write_space will hit after EAGAIN, all else fatal */
+		if (ret == -EAGAIN) {
+			rds_tcp_stats_inc(s_tcp_sndbuf_full);
+			ret = 0;
+		} else {
+			printk(KERN_WARNING "RDS/tcp: send to %u.%u.%u.%u "
+			       "returned %d, disconnecting and reconnecting\n",
+			       NIPQUAD(conn->c_faddr), ret);
+			rds_conn_drop(conn);
+		}
+	}
+	if (done == 0)
+		done = ret;
+	return done;
+}
+
+/*
+ * rm->m_ack_seq is set to the tcp sequence number that corresponds to the
+ * last byte of the message, including the header.  This means that the
+ * entire message has been received if rm->m_ack_seq is "before" the next
+ * unacked byte of the TCP sequence space.  We have to do very careful
+ * wrapping 32bit comparisons here.
+ */
+static int rds_tcp_is_acked(struct rds_message *rm, uint64_t ack)
+{
+	if (!test_bit(RDS_MSG_HAS_ACK_SEQ, &rm->m_flags))
+		return 0;
+	return (__s32)((u32)rm->m_ack_seq - (u32)ack) < 0;
+}
+
+void rds_tcp_write_space(struct sock *sk)
+{
+	void (*write_space)(struct sock *sk);
+	struct rds_connection *conn;
+	struct rds_tcp_connection *tc;
+
+	read_lock(&sk->sk_callback_lock);
+	conn = sk->sk_user_data;
+	if (conn == NULL) {
+		write_space = sk->sk_write_space;
+		goto out;
+	}
+
+	tc = conn->c_transport_data;
+	rdsdebug("write_space for tc %p\n", tc);
+	write_space = tc->t_orig_write_space;
+	rds_tcp_stats_inc(s_tcp_write_space_calls);
+
+	rdsdebug("tcp una %u\n", rds_tcp_snd_una(tc));
+	tc->t_last_seen_una = rds_tcp_snd_una(tc);
+	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
+
+	queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+out:
+	read_unlock(&sk->sk_callback_lock);
+
+	/*
+	 * write_space is only called when data leaves tcp's send queue if
+	 * SOCK_NOSPACE is set.  We set SOCK_NOSPACE every time we put
+	 * data in tcp's send queue because we use write_space to parse the
+	 * sequence numbers and notice that rds messages have been fully
+	 * received.
+	 *
+	 * tcp's write_space clears SOCK_NOSPACE if the send queue has more
+	 * than a certain amount of space. So we need to set it again *after*
+	 * we call tcp's write_space or else we might only get called on the
+	 * first of a series of incoming tcp acks.
+	 */
+	write_space(sk);
+
+	if (sk->sk_socket)
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+}
diff --git a/net/rds/tcp_stats.c b/net/rds/tcp_stats.c
new file mode 100644
index 000000000000..d5898d03cd68
--- /dev/null
+++ b/net/rds/tcp_stats.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2006 Oracle.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/percpu.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+
+#include "rds.h"
+#include "tcp.h"
+
+DEFINE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats)
+	____cacheline_aligned;
+
+static const char const *rds_tcp_stat_names[] = {
+	"tcp_data_ready_calls",
+	"tcp_write_space_calls",
+	"tcp_sndbuf_full",
+	"tcp_connect_raced",
+	"tcp_listen_closed_stale",
+};
+
+unsigned int rds_tcp_stats_info_copy(struct rds_info_iterator *iter,
+				     unsigned int avail)
+{
+	struct rds_tcp_statistics stats = {0, };
+	uint64_t *src;
+	uint64_t *sum;
+	size_t i;
+	int cpu;
+
+	if (avail < ARRAY_SIZE(rds_tcp_stat_names))
+		goto out;
+
+	for_each_online_cpu(cpu) {
+		src = (uint64_t *)&(per_cpu(rds_tcp_stats, cpu));
+		sum = (uint64_t *)&stats;
+		for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
+			*(sum++) += *(src++);
+	}
+
+	rds_stats_info_copy(iter, (uint64_t *)&stats, rds_tcp_stat_names,
+			    ARRAY_SIZE(rds_tcp_stat_names));
+out:
+	return ARRAY_SIZE(rds_tcp_stat_names);
+}
-- 
cgit v1.2.3


From 6777d773a463ac045d333b989d4e44660f8d92ad Mon Sep 17 00:00:00 2001
From: Mimi Zohar <zohar@linux.vnet.ibm.com>
Date: Fri, 21 Aug 2009 14:32:48 -0400
Subject: kernel_read: redefine offset type

vfs_read() offset is defined as loff_t, but kernel_read()
offset is only defined as unsigned long. Redefine
kernel_read() offset as loff_t.

Cc: stable@kernel.org
Signed-off-by: Mimi Zohar <zohar@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/exec.c          | 4 ++--
 include/linux/fs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 4a8849e45b21..fb4f3cdda78c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -678,8 +678,8 @@ exit:
 }
 EXPORT_SYMBOL(open_exec);
 
-int kernel_read(struct file *file, unsigned long offset,
-	char *addr, unsigned long count)
+int kernel_read(struct file *file, loff_t offset,
+		char *addr, unsigned long count)
 {
 	mm_segment_t old_fs;
 	loff_t pos = offset;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 67888a9e0655..73e9b643e455 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2123,7 +2123,7 @@ extern struct file *do_filp_open(int dfd, const char *pathname,
 		int open_flag, int mode, int acc_mode);
 extern int may_open(struct path *, int, int);
 
-extern int kernel_read(struct file *, unsigned long, char *, unsigned long);
+extern int kernel_read(struct file *, loff_t, char *, unsigned long);
 extern struct file * open_exec(const char *);
  
 /* fs/dcache.c -- generic fs support functions */
-- 
cgit v1.2.3


From 05ecd5a1f76c183cca381705b3adb7d77c9a0439 Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@st.com>
Date: Mon, 24 Aug 2009 19:52:38 +0900
Subject: sh: Simplify "multi-evt" interrupt handling.

This patch changes the way in which "multi-evt" interrups are handled.
The intc_evt2irq_table and related intc_evt2irq() have been removed and
the "redirecting" handler is installed for the coupled interrupts.

Thanks to that the do_IRQ() function don't have to use another level
of indirection for all the interrupts...

Signed-off-by: Pawel Moll <pawel.moll@st.com>
Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/irq.c    |  2 +-
 drivers/sh/intc.c       | 54 ++++++++++++++++---------------------------------
 include/linux/sh_intc.h |  1 -
 3 files changed, 18 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index 278c68c60488..d1053392e287 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -114,7 +114,7 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs)
 #endif
 
 	irq_enter();
-	irq = irq_demux(intc_evt2irq(irq));
+	irq = irq_demux(evt2irq(irq));
 
 #ifdef CONFIG_IRQSTACKS
 	curctx = (union irq_ctx *)current_thread_info();
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 4b1ca9d28353..a9174ec72853 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -663,16 +663,9 @@ static unsigned int __init save_reg(struct intc_desc_int *d,
 	return 0;
 }
 
-static unsigned char *intc_evt2irq_table;
-
-unsigned int intc_evt2irq(unsigned int vector)
+static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
 {
-	unsigned int irq = evt2irq(vector);
-
-	if (intc_evt2irq_table && intc_evt2irq_table[irq])
-		irq = intc_evt2irq_table[irq];
-
-	return irq;
+	generic_handle_irq((unsigned int)get_irq_data(irq));
 }
 
 void __init register_intc_controller(struct intc_desc *desc)
@@ -745,34 +738,6 @@ void __init register_intc_controller(struct intc_desc *desc)
 
 	BUG_ON(k > 256); /* _INTC_ADDR_E() and _INTC_ADDR_D() are 8 bits */
 
-	/* keep the first vector only if same enum is used multiple times */
-	for (i = 0; i < desc->nr_vectors; i++) {
-		struct intc_vect *vect = desc->vectors + i;
-		int first_irq = evt2irq(vect->vect);
-
-		if (!vect->enum_id)
-			continue;
-
-		for (k = i + 1; k < desc->nr_vectors; k++) {
-			struct intc_vect *vect2 = desc->vectors + k;
-
-			if (vect->enum_id != vect2->enum_id)
-				continue;
-
-			vect2->enum_id = 0;
-
-			if (!intc_evt2irq_table)
-				intc_evt2irq_table = kzalloc(NR_IRQS, GFP_NOWAIT);
-
-			if (!intc_evt2irq_table) {
-				pr_warning("intc: cannot allocate evt2irq!\n");
-				continue;
-			}
-
-			intc_evt2irq_table[evt2irq(vect2->vect)] = first_irq;
-		}
-	}
-
 	/* register the vectors one by one */
 	for (i = 0; i < desc->nr_vectors; i++) {
 		struct intc_vect *vect = desc->vectors + i;
@@ -789,6 +754,21 @@ void __init register_intc_controller(struct intc_desc *desc)
 		}
 
 		intc_register_irq(desc, d, vect->enum_id, irq);
+
+		for (k = i + 1; k < desc->nr_vectors; k++) {
+			struct intc_vect *vect2 = desc->vectors + k;
+			unsigned int irq2 = evt2irq(vect2->vect);
+
+			if (vect->enum_id != vect2->enum_id)
+				continue;
+
+			vect2->enum_id = 0;
+
+			/* redirect this interrupts to the first one */
+			set_irq_chip_and_handler_name(irq2, &d->chip,
+					intc_redirect_irq, "redirect");
+			set_irq_data(irq2, (void *)irq);
+		}
 	}
 }
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index eb1423a0078d..68e212ff9dde 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -85,7 +85,6 @@ struct intc_desc symbol __initdata = {					\
 }
 #endif
 
-unsigned int intc_evt2irq(unsigned int vector);
 void __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
-- 
cgit v1.2.3


From 35aad0ffdf548617940ca1e78be1f2e0bafc4496 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 24 Aug 2009 14:56:30 +0200
Subject: netfilter: xtables: mark initial tables constant

The inputted table is never modified, so should be considered const.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h        |  2 +-
 include/linux/netfilter_arp/arp_tables.h  |  2 +-
 include/linux/netfilter_bridge/ebtables.h |  2 +-
 include/linux/netfilter_ipv4/ip_tables.h  |  2 +-
 include/linux/netfilter_ipv6/ip6_tables.h |  2 +-
 net/bridge/netfilter/ebtable_broute.c     |  2 +-
 net/bridge/netfilter/ebtable_filter.c     |  2 +-
 net/bridge/netfilter/ebtables.c           | 13 +++++++------
 net/ipv4/netfilter/arp_tables.c           |  3 ++-
 net/ipv4/netfilter/arptable_filter.c      |  4 ++--
 net/ipv4/netfilter/ip_tables.c            |  3 ++-
 net/ipv4/netfilter/iptable_filter.c       |  2 +-
 net/ipv4/netfilter/iptable_mangle.c       |  4 ++--
 net/ipv4/netfilter/iptable_raw.c          |  4 ++--
 net/ipv4/netfilter/iptable_security.c     |  4 ++--
 net/ipv4/netfilter/nf_nat_rule.c          |  4 ++--
 net/ipv6/netfilter/ip6_tables.c           |  3 ++-
 net/ipv6/netfilter/ip6table_filter.c      |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c      |  4 ++--
 net/ipv6/netfilter/ip6table_raw.c         |  4 ++--
 net/ipv6/netfilter/ip6table_security.c    |  4 ++--
 net/netfilter/x_tables.c                  |  7 ++++---
 22 files changed, 42 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 4fa6e4c263e0..812cb153cabb 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -407,7 +407,7 @@ extern int xt_check_target(struct xt_tgchk_param *,
 			   unsigned int size, u_int8_t proto, bool inv_proto);
 
 extern struct xt_table *xt_register_table(struct net *net,
-					  struct xt_table *table,
+					  const struct xt_table *table,
 					  struct xt_table_info *bootstrap,
 					  struct xt_table_info *newinfo);
 extern void *xt_unregister_table(struct xt_table *table);
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 590ac3d6d5d6..6fe3e6aa10db 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -265,7 +265,7 @@ struct arpt_error
 }
 
 extern struct xt_table *arpt_register_table(struct net *net,
-					    struct xt_table *table,
+					    const struct xt_table *table,
 					    const struct arpt_replace *repl);
 extern void arpt_unregister_table(struct xt_table *table);
 extern unsigned int arpt_do_table(struct sk_buff *skb,
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index e40ddb94b1af..ea281e6a2048 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -301,7 +301,7 @@ struct ebt_table
 #define EBT_ALIGN(s) (((s) + (__alignof__(struct ebt_replace)-1)) & \
 		     ~(__alignof__(struct ebt_replace)-1))
 extern struct ebt_table *ebt_register_table(struct net *net,
-					    struct ebt_table *table);
+					    const struct ebt_table *table);
 extern void ebt_unregister_table(struct ebt_table *table);
 extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb,
    const struct net_device *in, const struct net_device *out,
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index 092bd50581a9..61fafc868a7b 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -245,7 +245,7 @@ ipt_get_target(struct ipt_entry *e)
 extern void ipt_init(void) __init;
 
 extern struct xt_table *ipt_register_table(struct net *net,
-					   struct xt_table *table,
+					   const struct xt_table *table,
 					   const struct ipt_replace *repl);
 extern void ipt_unregister_table(struct xt_table *table);
 
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 1089e33cf633..a64e1451ac38 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -306,7 +306,7 @@ ip6t_get_target(struct ip6t_entry *e)
 extern void ip6t_init(void) __init;
 
 extern struct xt_table *ip6t_register_table(struct net *net,
-					    struct xt_table *table,
+					    const struct xt_table *table,
 					    const struct ip6t_replace *repl);
 extern void ip6t_unregister_table(struct xt_table *table);
 extern unsigned int ip6t_do_table(struct sk_buff *skb,
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index c751111440f8..d32ab13e728c 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -41,7 +41,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table broute_table =
+static const struct ebt_table broute_table =
 {
 	.name		= "broute",
 	.table		= &initial_table,
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 4b988db3cd4d..60b1a6ca7185 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -50,7 +50,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
 	return 0;
 }
 
-static struct ebt_table frame_filter =
+static const struct ebt_table frame_filter =
 {
 	.name		= "filter",
 	.table		= &initial_table,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 37928d5f2840..bd1c65425d4f 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1103,23 +1103,24 @@ free_newinfo:
 	return ret;
 }
 
-struct ebt_table *ebt_register_table(struct net *net, struct ebt_table *table)
+struct ebt_table *
+ebt_register_table(struct net *net, const struct ebt_table *input_table)
 {
 	struct ebt_table_info *newinfo;
-	struct ebt_table *t;
+	struct ebt_table *t, *table;
 	struct ebt_replace_kernel *repl;
 	int ret, i, countersize;
 	void *p;
 
-	if (!table || !(repl = table->table) || !repl->entries ||
-	    repl->entries_size == 0 ||
-	    repl->counters || table->private) {
+	if (input_table == NULL || (repl = input_table->table) == NULL ||
+	    repl->entries == 0 || repl->entries_size == 0 ||
+	    repl->counters != NULL || input_table->private != NULL) {
 		BUGPRINT("Bad table data for ebt_register_table!!!\n");
 		return ERR_PTR(-EINVAL);
 	}
 
 	/* Don't add one table to multiple lists. */
-	table = kmemdup(table, sizeof(struct ebt_table), GFP_KERNEL);
+	table = kmemdup(input_table, sizeof(struct ebt_table), GFP_KERNEL);
 	if (!table) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7bc11ffbb845..27774c99d888 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1778,7 +1778,8 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
 	return ret;
 }
 
-struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *arpt_register_table(struct net *net,
+				     const struct xt_table *table,
 				     const struct arpt_replace *repl)
 {
 	int ret;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 6ecfdae7c589..97337601827a 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -15,7 +15,7 @@ MODULE_DESCRIPTION("arptables filter table");
 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
 			   (1 << NF_ARP_FORWARD))
 
-static struct
+static const struct
 {
 	struct arpt_replace repl;
 	struct arpt_standard entries[3];
@@ -45,7 +45,7 @@ static struct
 	.term = ARPT_ERROR_INIT,
 };
 
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 0b43fd7ca04a..cde755d5eeab 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2065,7 +2065,8 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *ipt_register_table(struct net *net,
+				    const struct xt_table *table,
 				    const struct ipt_replace *repl)
 {
 	int ret;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 97dbd94a8e37..df566cbd68e5 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -53,7 +53,7 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 28647f10aa7e..036047f9b0f2 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -28,7 +28,7 @@ MODULE_DESCRIPTION("iptables mangle table");
 			    (1 << NF_INET_POST_ROUTING))
 
 /* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static struct
+static const struct
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[5];
@@ -64,7 +64,7 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table packet_mangler = {
+static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 494784c999eb..993edc23be09 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -9,7 +9,7 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static struct
+static const struct
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[2];
@@ -36,7 +36,7 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table packet_raw = {
+static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks =  RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 8804e1a0f915..99eb76c65d25 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static struct
+static const struct
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
@@ -57,7 +57,7 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table security_table = {
+static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 6448a9b7d6f0..9e81e0dfb4ec 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -28,7 +28,7 @@
 			 (1 << NF_INET_POST_ROUTING) | \
 			 (1 << NF_INET_LOCAL_OUT))
 
-static struct
+static const struct
 {
 	struct ipt_replace repl;
 	struct ipt_standard entries[3];
@@ -58,7 +58,7 @@ static struct
 	.term = IPT_ERROR_INIT,			/* ERROR */
 };
 
-static struct xt_table nat_table = {
+static const struct xt_table nat_table = {
 	.name		= "nat",
 	.valid_hooks	= NAT_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a5d0c27cc26f..cc9f8ef303fd 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2100,7 +2100,8 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
 	return ret;
 }
 
-struct xt_table *ip6t_register_table(struct net *net, struct xt_table *table,
+struct xt_table *ip6t_register_table(struct net *net,
+				     const struct xt_table *table,
 				     const struct ip6t_replace *repl)
 {
 	int ret;
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 0a3ae48ac4d5..6f4383ad86f9 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -51,7 +51,7 @@ static struct
 	.term = IP6T_ERROR_INIT,		/* ERROR */
 };
 
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
 	.name		= "filter",
 	.valid_hooks	= FILTER_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 0f49e005a8c5..0ad91433ed61 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -21,7 +21,7 @@ MODULE_DESCRIPTION("ip6tables mangle table");
 			    (1 << NF_INET_LOCAL_OUT) | \
 			    (1 << NF_INET_POST_ROUTING))
 
-static struct
+static const struct
 {
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[5];
@@ -57,7 +57,7 @@ static struct
 	.term = IP6T_ERROR_INIT,		/* ERROR */
 };
 
-static struct xt_table packet_mangler = {
+static const struct xt_table packet_mangler = {
 	.name		= "mangle",
 	.valid_hooks	= MANGLE_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 679865e3d5ff..ed1a1180f3b3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -8,7 +8,7 @@
 
 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
 
-static struct
+static const struct
 {
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[2];
@@ -35,7 +35,7 @@ static struct
 	.term = IP6T_ERROR_INIT,		/* ERROR */
 };
 
-static struct xt_table packet_raw = {
+static const struct xt_table packet_raw = {
 	.name = "raw",
 	.valid_hooks = RAW_VALID_HOOKS,
 	.me = THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 822afabbdc88..41b444c60934 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
 				(1 << NF_INET_FORWARD) | \
 				(1 << NF_INET_LOCAL_OUT)
 
-static struct
+static const struct
 {
 	struct ip6t_replace repl;
 	struct ip6t_standard entries[3];
@@ -56,7 +56,7 @@ static struct
 	.term = IP6T_ERROR_INIT,		/* ERROR */
 };
 
-static struct xt_table security_table = {
+static const struct xt_table security_table = {
 	.name		= "security",
 	.valid_hooks	= SECURITY_VALID_HOOKS,
 	.me		= THIS_MODULE,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 025d1a0af78b..a6ac83a93348 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -736,16 +736,17 @@ xt_replace_table(struct xt_table *table,
 }
 EXPORT_SYMBOL_GPL(xt_replace_table);
 
-struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *xt_register_table(struct net *net,
+				   const struct xt_table *input_table,
 				   struct xt_table_info *bootstrap,
 				   struct xt_table_info *newinfo)
 {
 	int ret;
 	struct xt_table_info *private;
-	struct xt_table *t;
+	struct xt_table *t, *table;
 
 	/* Don't add one object to multiple lists. */
-	table = kmemdup(table, sizeof(struct xt_table), GFP_KERNEL);
+	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
 		ret = -ENOMEM;
 		goto out;
-- 
cgit v1.2.3


From 7c614d6461399acca5c0ba444f5db49cb332fc08 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 24 Aug 2009 09:42:00 -0700
Subject: rcu: Add "notrace" to RCU function headers used by ftrace

Both rcu_read_lock_sched_notrace() and
rcu_read_unlock_sched_notrace() are used by ftrace, and thus
need to be marked "notrace".

Unfortunately, my naive assumption that gcc would see the inner
"notrace" does not hold.

Kudos to Lai Jiangshan for noting this.

Reported-by: Ingo Molnar <mingo@elte.hu>
Bug-spotted-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <12511321213243-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index ec90fc34fea9..8b4422c558da 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -191,7 +191,7 @@ static inline void rcu_read_lock_sched(void)
 	__acquire(RCU_SCHED);
 	rcu_read_acquire();
 }
-static inline void rcu_read_lock_sched_notrace(void)
+static inline notrace void rcu_read_lock_sched_notrace(void)
 {
 	preempt_disable_notrace();
 	__acquire(RCU_SCHED);
@@ -209,7 +209,7 @@ static inline void rcu_read_unlock_sched(void)
 	__release(RCU_SCHED);
 	preempt_enable();
 }
-static inline void rcu_read_unlock_sched_notrace(void)
+static inline notrace void rcu_read_unlock_sched_notrace(void)
 {
 	rcu_read_release();
 	__release(RCU_SCHED);
-- 
cgit v1.2.3


From 353d5c30c666580347515da609dd74a2b8e9b828 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 24 Aug 2009 16:30:28 +0100
Subject: mm: fix hugetlb bug due to user_shm_unlock call

2.6.30's commit 8a0bdec194c21c8fdef840989d0d7b742bb5d4bc removed
user_shm_lock() calls in hugetlb_file_setup() but left the
user_shm_unlock call in shm_destroy().

In detail:
Assume that can_do_hugetlb_shm() returns true and hence user_shm_lock()
is not called in hugetlb_file_setup(). However, user_shm_unlock() is
called in any case in shm_destroy() and in the following
atomic_dec_and_lock(&up->__count) in free_uid() is executed and if
up->__count gets zero, also cleanup_user_struct() is scheduled.

Note that sched_destroy_user() is empty if CONFIG_USER_SCHED is not set.
However, the ref counter up->__count gets unexpectedly non-positive and
the corresponding structs are freed even though there are live
references to them, resulting in a kernel oops after a lots of
shmget(SHM_HUGETLB)/shmctl(IPC_RMID) cycles and CONFIG_USER_SCHED set.

Hugh changed Stefan's suggested patch: can_do_hugetlb_shm() at the
time of shm_destroy() may give a different answer from at the time
of hugetlb_file_setup().  And fixed newseg()'s no_id error path,
which has missed user_shm_unlock() ever since it came in 2.6.9.

Reported-by: Stefan Huber <shuber2@gmail.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tested-by: Stefan Huber <shuber2@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 20 ++++++++++++--------
 include/linux/hugetlb.h |  6 ++++--
 ipc/shm.c               |  8 +++++---
 3 files changed, 21 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 941c8425c10b..cb88dac8ccaa 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -935,26 +935,28 @@ static int can_do_hugetlb_shm(void)
 	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
 }
 
-struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag)
+struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
+						struct user_struct **user)
 {
 	int error = -ENOMEM;
-	int unlock_shm = 0;
 	struct file *file;
 	struct inode *inode;
 	struct dentry *dentry, *root;
 	struct qstr quick_string;
-	struct user_struct *user = current_user();
 
+	*user = NULL;
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
 
 	if (!can_do_hugetlb_shm()) {
-		if (user_shm_lock(size, user)) {
-			unlock_shm = 1;
+		*user = current_user();
+		if (user_shm_lock(size, *user)) {
 			WARN_ONCE(1,
 			  "Using mlock ulimits for SHM_HUGETLB deprecated\n");
-		} else
+		} else {
+			*user = NULL;
 			return ERR_PTR(-EPERM);
+		}
 	}
 
 	root = hugetlbfs_vfsmount->mnt_root;
@@ -996,8 +998,10 @@ out_inode:
 out_dentry:
 	dput(dentry);
 out_shm_unlock:
-	if (unlock_shm)
-		user_shm_unlock(size, user);
+	if (*user) {
+		user_shm_unlock(size, *user);
+		*user = NULL;
+	}
 	return ERR_PTR(error);
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2723513a5651..5cbc620bdfe0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -10,6 +10,7 @@
 #include <asm/tlbflush.h>
 
 struct ctl_table;
+struct user_struct;
 
 int PageHuge(struct page *page);
 
@@ -146,7 +147,8 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 
 extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
-struct file *hugetlb_file_setup(const char *name, size_t, int);
+struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
+						struct user_struct **user);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
@@ -168,7 +170,7 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acctflag)	ERR_PTR(-ENOSYS)
+#define hugetlb_file_setup(name,size,acct,user)	ERR_PTR(-ENOSYS)
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 15dd238e5338..1bc4701ef4f0 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -174,7 +174,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
 	shm_unlock(shp);
 	if (!is_file_hugepages(shp->shm_file))
 		shmem_lock(shp->shm_file, 0, shp->mlock_user);
-	else
+	else if (shp->mlock_user)
 		user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size,
 						shp->mlock_user);
 	fput (shp->shm_file);
@@ -369,8 +369,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		/* hugetlb_file_setup applies strict accounting */
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
-		file = hugetlb_file_setup(name, size, acctflag);
-		shp->mlock_user = current_user();
+		file = hugetlb_file_setup(name, size, acctflag,
+							&shp->mlock_user);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
@@ -410,6 +410,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 	return error;
 
 no_id:
+	if (shp->mlock_user)	/* shmflg & SHM_HUGETLB case */
+		user_shm_unlock(size, shp->mlock_user);
 	fput(file);
 no_file:
 	security_shm_free(shp);
-- 
cgit v1.2.3


From a4be7c2778d1fd8e0a8a5607bec91fa221ab2c91 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 19 Aug 2009 11:18:27 +0200
Subject: perf_counter: Allow sharing of output channels

Provide the ability to configure a counter to send its output
to another (already existing) counter's output stream.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Paul Mackerras <paulus@samba.org>
LKML-Reference: <20090819092023.980284148@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  5 +++
 kernel/perf_counter.c        | 83 ++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 85 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index b53f7006cc4e..e022b847c90d 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -216,6 +216,7 @@ struct perf_counter_attr {
 #define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
 #define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
 #define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
 
 enum perf_counter_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
@@ -415,6 +416,9 @@ enum perf_callchain_context {
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
+#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
+#define PERF_FLAG_FD_OUTPUT	(1U << 1)
+
 #ifdef __KERNEL__
 /*
  * Kernel-internal data types and definitions:
@@ -536,6 +540,7 @@ struct perf_counter {
 	struct list_head		sibling_list;
 	int				nr_siblings;
 	struct perf_counter		*group_leader;
+	struct perf_counter		*output;
 	const struct pmu		*pmu;
 
 	enum perf_counter_active_state	state;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 06bf6a4f2608..53abcbefa0bf 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1692,6 +1692,11 @@ static void free_counter(struct perf_counter *counter)
 			atomic_dec(&nr_task_counters);
 	}
 
+	if (counter->output) {
+		fput(counter->output->filp);
+		counter->output = NULL;
+	}
+
 	if (counter->destroy)
 		counter->destroy(counter);
 
@@ -1977,6 +1982,8 @@ unlock:
 	return ret;
 }
 
+int perf_counter_set_output(struct perf_counter *counter, int output_fd);
+
 static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct perf_counter *counter = file->private_data;
@@ -2000,6 +2007,9 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case PERF_COUNTER_IOC_PERIOD:
 		return perf_counter_period(counter, (u64 __user *)arg);
 
+	case PERF_COUNTER_IOC_SET_OUTPUT:
+		return perf_counter_set_output(counter, arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -2270,6 +2280,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 
 	WARN_ON_ONCE(counter->ctx->parent_ctx);
 	mutex_lock(&counter->mmap_mutex);
+	if (counter->output) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	if (atomic_inc_not_zero(&counter->mmap_count)) {
 		if (nr_pages != counter->data->nr_pages)
 			ret = -EINVAL;
@@ -2655,6 +2670,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_counter *counter, unsigned int size,
 			     int nmi, int sample)
 {
+	struct perf_counter *output_counter;
 	struct perf_mmap_data *data;
 	unsigned int offset, head;
 	int have_lost;
@@ -2664,13 +2680,17 @@ static int perf_output_begin(struct perf_output_handle *handle,
 		u64			 lost;
 	} lost_event;
 
+	rcu_read_lock();
 	/*
 	 * For inherited counters we send all the output towards the parent.
 	 */
 	if (counter->parent)
 		counter = counter->parent;
 
-	rcu_read_lock();
+	output_counter = rcu_dereference(counter->output);
+	if (output_counter)
+		counter = output_counter;
+
 	data = rcu_dereference(counter->data);
 	if (!data)
 		goto out;
@@ -4218,6 +4238,57 @@ err_size:
 	goto out;
 }
 
+int perf_counter_set_output(struct perf_counter *counter, int output_fd)
+{
+	struct perf_counter *output_counter = NULL;
+	struct file *output_file = NULL;
+	struct perf_counter *old_output;
+	int fput_needed = 0;
+	int ret = -EINVAL;
+
+	if (!output_fd)
+		goto set;
+
+	output_file = fget_light(output_fd, &fput_needed);
+	if (!output_file)
+		return -EBADF;
+
+	if (output_file->f_op != &perf_fops)
+		goto out;
+
+	output_counter = output_file->private_data;
+
+	/* Don't chain output fds */
+	if (output_counter->output)
+		goto out;
+
+	/* Don't set an output fd when we already have an output channel */
+	if (counter->data)
+		goto out;
+
+	atomic_long_inc(&output_file->f_count);
+
+set:
+	mutex_lock(&counter->mmap_mutex);
+	old_output = counter->output;
+	rcu_assign_pointer(counter->output, output_counter);
+	mutex_unlock(&counter->mmap_mutex);
+
+	if (old_output) {
+		/*
+		 * we need to make sure no existing perf_output_*()
+		 * is still referencing this counter.
+		 */
+		synchronize_rcu();
+		fput(old_output->filp);
+	}
+
+	ret = 0;
+out:
+	fput_light(output_file, fput_needed);
+	return ret;
+}
+
 /**
  * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
  *
@@ -4240,7 +4311,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	int ret;
 
 	/* for future expandability... */
-	if (flags)
+	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
 		return -EINVAL;
 
 	ret = perf_copy_attr(attr_uptr, &attr);
@@ -4268,7 +4339,7 @@ SYSCALL_DEFINE5(perf_counter_open,
 	 * Look up the group leader (we will attach this counter to it):
 	 */
 	group_leader = NULL;
-	if (group_fd != -1) {
+	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
 		ret = -EINVAL;
 		group_file = fget_light(group_fd, &fput_needed);
 		if (!group_file)
@@ -4310,6 +4381,12 @@ SYSCALL_DEFINE5(perf_counter_open,
 	if (!counter_file)
 		goto err_free_put_context;
 
+	if (flags & PERF_FLAG_FD_OUTPUT) {
+		ret = perf_counter_set_output(counter, group_fd);
+		if (ret)
+			goto err_free_put_context;
+	}
+
 	counter->filp = counter_file;
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
-- 
cgit v1.2.3


From 31b47cf7609288893a10706c648faa932c7aef90 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 24 Aug 2009 20:28:04 +0100
Subject: genirq: Add prototype for handle_nested_irq()

The function is supposed to be called from the primary IRQ
handler for a demultiplexing chip so make a protype visible for
them.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
LKML-Reference: <1251142084-9852-1-git-send-email-broonie@opensource.wolfsonmicro.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irq.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 6956df9961ab..9e9eb76faf81 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -282,6 +282,7 @@ extern void handle_edge_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_simple_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
+extern void handle_nested_irq(unsigned int irq);
 
 /*
  * Monolithic do_IRQ implementation.
-- 
cgit v1.2.3


From b6ed2e03df1e2c6ee41cf0e2e2699f2410671916 Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 25 Aug 2009 13:44:04 +0100
Subject: GFS2: Add explanation of extended attr on-disk format

Some useful info regarding the on-disk representation of
GFS2 extended attributes.

Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 include/linux/gfs2_ondisk.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/gfs2_ondisk.h b/include/linux/gfs2_ondisk.h
index c56b4bce56d0..b80c88dedbbb 100644
--- a/include/linux/gfs2_ondisk.h
+++ b/include/linux/gfs2_ondisk.h
@@ -333,6 +333,28 @@ struct gfs2_leaf {
 
 /*
  * Extended attribute header format
+ *
+ * This works in a similar way to dirents. There is a fixed size header
+ * followed by a variable length section made up of the name and the
+ * associated data. In the case of a "stuffed" entry, the value is
+ * inline directly after the name, the ea_num_ptrs entry will be
+ * zero in that case. For non-"stuffed" entries, there will be
+ * a set of pointers (aligned to 8 byte boundary) to the block(s)
+ * containing the value.
+ *
+ * The blocks containing the values and the blocks containing the
+ * extended attribute headers themselves all start with the common
+ * metadata header. Each inode, if it has extended attributes, will
+ * have either a single block containing the extended attribute headers
+ * or a single indirect block pointing to blocks containing the
+ * extended attribure headers.
+ *
+ * The maximim size of the data part of an extended attribute is 64k
+ * so the number of blocks required depends upon block size. Since the
+ * block size also determines the number of pointers in an indirect
+ * block, its a fairly complicated calculation to work out the maximum
+ * number of blocks that an inode may have relating to extended attributes.
+ *
  */
 
 #define GFS2_EA_MAX_NAME_LEN	255
-- 
cgit v1.2.3


From 3a6c2b419b7768703cfb2cabdb894517c5065e33 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 25 Aug 2009 16:07:40 +0200
Subject: netlink: constify nlmsghdr arguments

Consitfy nlmsghdr arguments to a couple of functions as preparation
for the next patch, which will constify the netlink message data in
all nfnetlink users.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netlink.h  | 15 ++++++++-------
 include/net/netlink.h    |  4 ++--
 include/net/rtnetlink.h  |  2 +-
 net/netlink/af_netlink.c |  2 +-
 net/sched/act_api.c      |  2 +-
 5 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 5ba398e90304..0fbecbbe8e9e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -217,12 +217,13 @@ int netlink_sendskb(struct sock *sk, struct sk_buff *skb);
 
 struct netlink_callback
 {
-	struct sk_buff	*skb;
-	struct nlmsghdr	*nlh;
-	int		(*dump)(struct sk_buff * skb, struct netlink_callback *cb);
-	int		(*done)(struct netlink_callback *cb);
-	int		family;
-	long		args[6];
+	struct sk_buff		*skb;
+	const struct nlmsghdr	*nlh;
+	int			(*dump)(struct sk_buff * skb,
+					struct netlink_callback *cb);
+	int			(*done)(struct netlink_callback *cb);
+	int			family;
+	long			args[6];
 };
 
 struct netlink_notify
@@ -258,7 +259,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags)
 	NLMSG_NEW(skb, pid, seq, type, len, 0)
 
 extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
-			      struct nlmsghdr *nlh,
+			      const struct nlmsghdr *nlh,
 			      int (*dump)(struct sk_buff *skb, struct netlink_callback*),
 			      int (*done)(struct netlink_callback*));
 
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 007bdb07dabb..a63b2192ac1c 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -365,7 +365,7 @@ static inline struct nlmsghdr *nlmsg_next(struct nlmsghdr *nlh, int *remaining)
  *
  * See nla_parse()
  */
-static inline int nlmsg_parse(struct nlmsghdr *nlh, int hdrlen,
+static inline int nlmsg_parse(const struct nlmsghdr *nlh, int hdrlen,
 			      struct nlattr *tb[], int maxtype,
 			      const struct nla_policy *policy)
 {
@@ -414,7 +414,7 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype,
  *
  * Returns 1 if a report back to the application is requested.
  */
-static inline int nlmsg_report(struct nlmsghdr *nlh)
+static inline int nlmsg_report(const struct nlmsghdr *nlh)
 {
 	return !!(nlh->nlmsg_flags & NLM_F_ECHO);
 }
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3c1895e54b7f..85ba560332ed 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -14,7 +14,7 @@ extern void	rtnl_register(int protocol, int msgtype,
 extern int	rtnl_unregister(int protocol, int msgtype);
 extern void	rtnl_unregister_all(int protocol);
 
-static inline int rtnl_msg_family(struct nlmsghdr *nlh)
+static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
 {
 	if (nlmsg_len(nlh) >= sizeof(struct rtgenmsg))
 		return ((struct rtgenmsg *) nlmsg_data(nlh))->rtgen_family;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index da3163d15ef0..d0ff382c40ca 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1705,7 +1705,7 @@ errout:
 }
 
 int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
-		       struct nlmsghdr *nlh,
+		       const struct nlmsghdr *nlh,
 		       int (*dump)(struct sk_buff *skb,
 				   struct netlink_callback *),
 		       int (*done)(struct netlink_callback *))
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9d03cc33b6cc..2dfb3e7a040d 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1011,7 +1011,7 @@ replay:
 }
 
 static struct nlattr *
-find_dump_kind(struct nlmsghdr *n)
+find_dump_kind(const struct nlmsghdr *n)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-- 
cgit v1.2.3


From 3993832464dd4e14a4c926583a11f0fa92c1f0f0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 25 Aug 2009 16:07:58 +0200
Subject: netfilter: nfnetlink: constify message attributes and headers

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nfnetlink.h  |  3 +-
 include/net/netfilter/nf_nat_core.h  |  2 +-
 net/ipv4/netfilter/nf_nat_core.c     |  6 ++--
 net/netfilter/nf_conntrack_core.c    |  2 +-
 net/netfilter/nf_conntrack_netlink.c | 54 ++++++++++++++++++++++--------------
 net/netfilter/nfnetlink.c            |  2 +-
 net/netfilter/nfnetlink_log.c        |  6 ++--
 net/netfilter/nfnetlink_queue.c      |  9 ++++--
 net/netfilter/xt_osf.c               |  6 ++--
 9 files changed, 55 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index bff4d5741d98..9f00da287f2c 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -58,7 +58,8 @@ struct nfgenmsg {
 struct nfnl_callback
 {
 	int (*call)(struct sock *nl, struct sk_buff *skb, 
-		struct nlmsghdr *nlh, struct nlattr *cda[]);
+		    const struct nlmsghdr *nlh,
+		    const struct nlattr * const cda[]);
 	const struct nla_policy *policy;	/* netlink attribute policy */
 	const u_int16_t attr_count;		/* number of nlattr's */
 };
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 58684066388c..33602ab66190 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -31,6 +31,6 @@ struct nlattr;
 extern int
 (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
 				  enum nf_nat_manip_type manip,
-				  struct nlattr *attr);
+				  const struct nlattr *attr);
 
 #endif /* _NF_NAT_CORE_H */
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index b6ddd5633045..68afc6ecd343 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -620,7 +620,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 };
 
 static int
-nfnetlink_parse_nat(struct nlattr *nat,
+nfnetlink_parse_nat(const struct nlattr *nat,
 		    const struct nf_conn *ct, struct nf_nat_range *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
@@ -656,7 +656,7 @@ nfnetlink_parse_nat(struct nlattr *nat,
 static int
 nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
-			  struct nlattr *attr)
+			  const struct nlattr *attr)
 {
 	struct nf_nat_range range;
 
@@ -671,7 +671,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 static int
 nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
-			  struct nlattr *attr)
+			  const struct nlattr *attr)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b5869b9574b0..565c3a86423f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -47,7 +47,7 @@
 
 int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
 				      enum nf_nat_manip_type manip,
-				      struct nlattr *attr) __read_mostly;
+				      const struct nlattr *attr) __read_mostly;
 EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook);
 
 DEFINE_SPINLOCK(nf_conntrack_lock);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 49479d194570..59d8064eb522 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -704,7 +704,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
 }
 
 static int
-ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
+ctnetlink_parse_tuple(const struct nlattr * const cda[],
+		      struct nf_conntrack_tuple *tuple,
 		      enum ctattr_tuple type, u_int8_t l3num)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
@@ -740,7 +741,7 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple,
 }
 
 static inline int
-ctnetlink_parse_help(struct nlattr *attr, char **helper_name)
+ctnetlink_parse_help(const struct nlattr *attr, char **helper_name)
 {
 	struct nlattr *tb[CTA_HELP_MAX+1];
 
@@ -764,7 +765,8 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 
 static int
 ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nlattr *cda[])
+			const struct nlmsghdr *nlh,
+			const struct nlattr * const cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -823,7 +825,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
 
 static int
 ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nlattr *cda[])
+			const struct nlmsghdr *nlh,
+			const struct nlattr * const cda[])
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct nf_conntrack_tuple tuple;
@@ -884,7 +887,7 @@ out:
 static int
 ctnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
-			  struct nlattr *attr)
+			  const struct nlattr *attr)
 {
 	typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup;
 
@@ -914,7 +917,7 @@ ctnetlink_parse_nat_setup(struct nf_conn *ct,
 #endif
 
 static int
-ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
 {
 	unsigned long d;
 	unsigned int status = ntohl(nla_get_be32(cda[CTA_STATUS]));
@@ -940,7 +943,7 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[])
 }
 
 static int
-ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 {
 #ifdef CONFIG_NF_NAT_NEEDED
 	int ret;
@@ -966,7 +969,7 @@ ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[])
 }
 
 static inline int
-ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])
 {
 	struct nf_conntrack_helper *helper;
 	struct nf_conn_help *help = nfct_help(ct);
@@ -1028,7 +1031,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[])
 }
 
 static inline int
-ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_timeout(struct nf_conn *ct, const struct nlattr * const cda[])
 {
 	u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
 
@@ -1042,9 +1045,10 @@ ctnetlink_change_timeout(struct nf_conn *ct, struct nlattr *cda[])
 }
 
 static inline int
-ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[])
 {
-	struct nlattr *tb[CTA_PROTOINFO_MAX+1], *attr = cda[CTA_PROTOINFO];
+	const struct nlattr *attr = cda[CTA_PROTOINFO];
+	struct nlattr *tb[CTA_PROTOINFO_MAX+1];
 	struct nf_conntrack_l4proto *l4proto;
 	int err = 0;
 
@@ -1061,7 +1065,7 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
 
 #ifdef CONFIG_NF_NAT_NEEDED
 static inline int
-change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
+change_nat_seq_adj(struct nf_nat_seq *natseq, const struct nlattr * const attr)
 {
 	struct nlattr *cda[CTA_NAT_SEQ_MAX+1];
 
@@ -1089,7 +1093,8 @@ change_nat_seq_adj(struct nf_nat_seq *natseq, struct nlattr *attr)
 }
 
 static int
-ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_nat_seq_adj(struct nf_conn *ct,
+			     const struct nlattr * const cda[])
 {
 	int ret = 0;
 	struct nf_conn_nat *nat = nfct_nat(ct);
@@ -1120,7 +1125,8 @@ ctnetlink_change_nat_seq_adj(struct nf_conn *ct, struct nlattr *cda[])
 #endif
 
 static int
-ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
+ctnetlink_change_conntrack(struct nf_conn *ct,
+			   const struct nlattr * const cda[])
 {
 	int err;
 
@@ -1169,7 +1175,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
 }
 
 static struct nf_conn *
-ctnetlink_create_conntrack(struct nlattr *cda[],
+ctnetlink_create_conntrack(const struct nlattr * const cda[],
 			   struct nf_conntrack_tuple *otuple,
 			   struct nf_conntrack_tuple *rtuple,
 			   u8 u3)
@@ -1304,7 +1310,8 @@ err1:
 
 static int
 ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nlattr *cda[])
+			const struct nlmsghdr *nlh,
+			const struct nlattr * const cda[])
 {
 	struct nf_conntrack_tuple otuple, rtuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -1629,7 +1636,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
 
 static int
 ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nlattr *cda[])
+		     const struct nlmsghdr *nlh,
+		     const struct nlattr * const cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
@@ -1689,7 +1697,8 @@ out:
 
 static int
 ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nlattr *cda[])
+		     const struct nlmsghdr *nlh,
+		     const struct nlattr * const cda[])
 {
 	struct nf_conntrack_expect *exp;
 	struct nf_conntrack_tuple tuple;
@@ -1767,13 +1776,15 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
 	return 0;
 }
 static int
-ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nlattr *cda[])
+ctnetlink_change_expect(struct nf_conntrack_expect *x,
+			const struct nlattr * const cda[])
 {
 	return -EOPNOTSUPP;
 }
 
 static int
-ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report)
+ctnetlink_create_expect(const struct nlattr * const cda[], u_int8_t u3,
+			u32 pid, int report)
 {
 	struct nf_conntrack_tuple tuple, mask, master_tuple;
 	struct nf_conntrack_tuple_hash *h = NULL;
@@ -1831,7 +1842,8 @@ out:
 
 static int
 ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
-		     struct nlmsghdr *nlh, struct nlattr *cda[])
+		     const struct nlmsghdr *nlh,
+		     const struct nlattr * const cda[])
 {
 	struct nf_conntrack_tuple tuple;
 	struct nf_conntrack_expect *exp;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 92761a988375..eedc0c1ac7a4 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -170,7 +170,7 @@ replay:
 		if (err < 0)
 			return err;
 
-		err = nc->call(nfnl, skb, nlh, cda);
+		err = nc->call(nfnl, skb, nlh, (const struct nlattr **)cda);
 		if (err == -EAGAIN)
 			goto replay;
 		return err;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 66a6dd5c519a..f900dc3194af 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -694,7 +694,8 @@ static struct notifier_block nfulnl_rtnl_notifier = {
 
 static int
 nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nlattr *nfqa[])
+		   const struct nlmsghdr *nlh,
+		   const struct nlattr * const nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -716,7 +717,8 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = {
 
 static int
 nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nlattr *nfula[])
+		   const struct nlmsghdr *nlh,
+		   const struct nlattr * const nfula[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t group_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 71daa0934b6c..7a9dec9fb822 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -608,7 +608,8 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
 
 static int
 nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
-		   struct nlmsghdr *nlh, struct nlattr *nfqa[])
+		   const struct nlmsghdr *nlh,
+		   const struct nlattr * const nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
@@ -670,7 +671,8 @@ err_out_unlock:
 
 static int
 nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nlattr *nfqa[])
+		  const struct nlmsghdr *nlh,
+		  const struct nlattr * const nfqa[])
 {
 	return -ENOTSUPP;
 }
@@ -687,7 +689,8 @@ static const struct nf_queue_handler nfqh = {
 
 static int
 nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
-		  struct nlmsghdr *nlh, struct nlattr *nfqa[])
+		  const struct nlmsghdr *nlh,
+		  const struct nlattr * const nfqa[])
 {
 	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
 	u_int16_t queue_num = ntohs(nfmsg->res_id);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 0f482e2440b4..63e190504656 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -70,7 +70,8 @@ static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
 }
 
 static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+			       const struct nlmsghdr *nlh,
+			       const struct nlattr * const osf_attrs[])
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *kf = NULL, *sf;
@@ -112,7 +113,8 @@ static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
 }
 
 static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
-			struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+				  const struct nlmsghdr *nlh,
+				  const struct nlattr * const osf_attrs[])
 {
 	struct xt_osf_user_finger *f;
 	struct xt_osf_finger *sf;
-- 
cgit v1.2.3


From 3d27d8cb34fc156beb86de2338ca4029873a5cc6 Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:12 -0700
Subject: tracing: Make syscall tracepoints conditional

The syscall enter/exit tracepoints are only supported on archs that
HAVE_SYSCALL_TRACEPOINTS, so the declarations should be #ifdef'ed.
Also, the definition of syscall_regfunc and syscall_unregfunc should
depend on this same config, rather than the ftrace-specific one.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <1251150194-1713-3-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/syscall.h | 4 ++++
 kernel/tracepoint.c     | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 9661dd406b93..5dcb7e3a544c 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -8,6 +8,8 @@
 #include <asm/ptrace.h>
 
 
+#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
+
 extern void syscall_regfunc(void);
 extern void syscall_unregfunc(void);
 
@@ -25,6 +27,8 @@ DECLARE_TRACE_WITH_CALLBACK(syscall_exit,
 	syscall_unregfunc
 );
 
+#endif
+
 /*
  * A syscall entry in the ftrace syscalls array.
  *
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index be86b9a01a09..9e0a36f0e2a9 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -576,7 +576,7 @@ __initcall(init_tracepoints);
 
 #endif /* CONFIG_MODULES */
 
-#ifdef CONFIG_FTRACE_SYSCALLS
+#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 
 static DEFINE_MUTEX(regfunc_mutex);
 static int sys_tracepoint_refcount;
-- 
cgit v1.2.3


From 97419875865859fd2403e66266c02ce028e2f5ab Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:13 -0700
Subject: tracing: Move tracepoint callbacks from declaration to definition

It's not strictly correct for the tracepoint reg/unreg callbacks to
occur when a client is hooking up, because the actual tracepoint may not
be present yet.  This happens to be fine for syscall, since that's in
the core kernel, but it would cause problems for tracepoints defined in
a module that hasn't been loaded yet.  It also means the reg/unreg has
to be EXPORTed for any modules to use the tracepoint (as in SystemTap).

This patch removes DECLARE_TRACE_WITH_CALLBACK, and instead introduces
DEFINE_TRACE_FN which stores the callbacks in struct tracepoint.  The
callbacks are used now when the active state of the tracepoint changes
in set_tracepoint & disable_tracepoint.

This also introduces TRACE_EVENT_FN, so ftrace events can also provide
registration callbacks if needed.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <1251150194-1713-4-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/kernel/ptrace.c    |  4 ++--
 arch/x86/kernel/ptrace.c     |  4 ++--
 include/linux/tracepoint.h   | 46 +++++++++++++++++---------------------------
 include/trace/define_trace.h |  5 +++++
 include/trace/ftrace.h       |  9 +++++++++
 include/trace/syscall.h      | 12 ++++--------
 kernel/tracepoint.c          | 14 +++++++++-----
 7 files changed, 49 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9d3dcfa79ea2..c05b44b80c23 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -51,8 +51,8 @@
 #include "compat_ptrace.h"
 #endif
 
-DEFINE_TRACE(syscall_enter);
-DEFINE_TRACE(syscall_exit);
+DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
+DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
 
 enum s390_regset {
 	REGSET_GENERAL,
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a909afef44f4..31e9b97ec4d6 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -37,8 +37,8 @@
 
 #include <trace/syscall.h>
 
-DEFINE_TRACE(syscall_enter);
-DEFINE_TRACE(syscall_exit);
+DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
+DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
 
 #include "tls.h"
 
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 5984ed04c03b..846a4ae501eb 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -23,6 +23,8 @@ struct tracepoint;
 struct tracepoint {
 	const char *name;		/* Tracepoint name */
 	int state;			/* State. */
+	void (*regfunc)(void);
+	void (*unregfunc)(void);
 	void **funcs;
 } __attribute__((aligned(32)));		/*
 					 * Aligned on 32 bytes because it is
@@ -60,10 +62,8 @@ struct tracepoint {
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
- * An optional set of (un)registration functions can be passed to perform any
- * additional (un)registration work.
  */
-#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg)	\
+#define DECLARE_TRACE(name, proto, args)				\
 	extern struct tracepoint __tracepoint_##name;			\
 	static inline void trace_##name(proto)				\
 	{								\
@@ -73,36 +73,23 @@ struct tracepoint {
 	}								\
 	static inline int register_trace_##name(void (*probe)(proto))	\
 	{								\
-		int ret;						\
-		void (*func)(void) = reg;				\
-									\
-		ret = tracepoint_probe_register(#name, (void *)probe);	\
-		if (func && !ret)					\
-			func();						\
-		return ret;						\
+		return tracepoint_probe_register(#name, (void *)probe);	\
 	}								\
 	static inline int unregister_trace_##name(void (*probe)(proto))	\
 	{								\
-		int ret;						\
-		void (*func)(void) = unreg;				\
-									\
-		ret = tracepoint_probe_unregister(#name, (void *)probe);\
-		if (func && !ret)					\
-			func();						\
-		return ret;						\
+		return tracepoint_probe_unregister(#name, (void *)probe);\
 	}
 
 
-#define DECLARE_TRACE(name, proto, args)				 \
-	DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\
-					NULL, NULL);
-
-#define DEFINE_TRACE(name)						\
+#define DEFINE_TRACE_FN(name, reg, unreg)				\
 	static const char __tpstrtab_##name[]				\
 	__attribute__((section("__tracepoints_strings"))) = #name;	\
 	struct tracepoint __tracepoint_##name				\
 	__attribute__((section("__tracepoints"), aligned(32))) =	\
-		{ __tpstrtab_##name, 0, NULL }
+		{ __tpstrtab_##name, 0, reg, unreg, NULL }
+
+#define DEFINE_TRACE(name)						\
+	DEFINE_TRACE_FN(name, NULL, NULL);
 
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)				\
 	EXPORT_SYMBOL_GPL(__tracepoint_##name)
@@ -113,7 +100,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 	struct tracepoint *end);
 
 #else /* !CONFIG_TRACEPOINTS */
-#define DECLARE_TRACE_WITH_CALLBACK(name, proto, args, reg, unreg)	\
+#define DECLARE_TRACE(name, proto, args)				\
 	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
 	{ }								\
 	static inline void trace_##name(proto)				\
@@ -127,10 +114,7 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 		return -ENOSYS;						\
 	}
 
-#define DECLARE_TRACE(name, proto, args)				 \
-	DECLARE_TRACE_WITH_CALLBACK(name, TP_PROTO(proto), TP_ARGS(args),\
-					NULL, NULL);
-
+#define DEFINE_TRACE_FN(name, reg, unreg)
 #define DEFINE_TRACE(name)
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)
@@ -282,10 +266,16 @@ static inline void tracepoint_synchronize_unregister(void)
  * can also by used by generic instrumentation like SystemTap), and
  * it is also used to expose a structured trace record in
  * /sys/kernel/debug/tracing/events/.
+ *
+ * A set of (un)registration functions can be passed to the variant
+ * TRACE_EVENT_FN to perform any (un)registration work.
  */
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#define TRACE_EVENT_FN(name, proto, args, struct,		\
+		assign, print, reg, unreg)			\
+	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #endif
 
 #endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index f7a7ae1e8f90..2a969850736d 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -26,6 +26,11 @@
 #define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
 	DEFINE_TRACE(name)
 
+#undef TRACE_EVENT_FN
+#define TRACE_EVENT_FN(name, proto, args, tstruct,		\
+		assign, print, reg, unreg)			\
+	DEFINE_TRACE_FN(name, reg, unreg)
+
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(name, proto, args)	\
 	DEFINE_TRACE(name)
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 127400255e4c..3a0b44bdabf7 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -42,6 +42,15 @@
 	};							\
 	static struct ftrace_event_call event_##name
 
+/* Callbacks are meaningless to ftrace. */
+#undef TRACE_EVENT_FN
+#define TRACE_EVENT_FN(name, proto, args, tstruct,		\
+		assign, print, reg, unreg)			\
+	TRACE_EVENT(name, TP_PROTO(proto), TP_ARGS(args),	\
+		TP_STRUCT__entry(tstruct),			\
+		TP_fast_assign(assign),				\
+		TP_printk(print))
+
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 5dcb7e3a544c..4e1943001854 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -13,18 +13,14 @@
 extern void syscall_regfunc(void);
 extern void syscall_unregfunc(void);
 
-DECLARE_TRACE_WITH_CALLBACK(syscall_enter,
+DECLARE_TRACE(syscall_enter,
 	TP_PROTO(struct pt_regs *regs, long id),
-	TP_ARGS(regs, id),
-	syscall_regfunc,
-	syscall_unregfunc
+	TP_ARGS(regs, id)
 );
 
-DECLARE_TRACE_WITH_CALLBACK(syscall_exit,
+DECLARE_TRACE(syscall_exit,
 	TP_PROTO(struct pt_regs *regs, long ret),
-	TP_ARGS(regs, ret),
-	syscall_regfunc,
-	syscall_unregfunc
+	TP_ARGS(regs, ret)
 );
 
 #endif
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9e0a36f0e2a9..1a6a453b7efb 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -243,6 +243,11 @@ static void set_tracepoint(struct tracepoint_entry **entry,
 {
 	WARN_ON(strcmp((*entry)->name, elem->name) != 0);
 
+	if (elem->regfunc && !elem->state && active)
+		elem->regfunc();
+	else if (elem->unregfunc && elem->state && !active)
+		elem->unregfunc();
+
 	/*
 	 * rcu_assign_pointer has a smp_wmb() which makes sure that the new
 	 * probe callbacks array is consistent before setting a pointer to it.
@@ -262,6 +267,9 @@ static void set_tracepoint(struct tracepoint_entry **entry,
  */
 static void disable_tracepoint(struct tracepoint *elem)
 {
+	if (elem->unregfunc && elem->state)
+		elem->unregfunc();
+
 	elem->state = 0;
 	rcu_assign_pointer(elem->funcs, NULL);
 }
@@ -578,7 +586,7 @@ __initcall(init_tracepoints);
 
 #ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
 
-static DEFINE_MUTEX(regfunc_mutex);
+/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 static int sys_tracepoint_refcount;
 
 void syscall_regfunc(void)
@@ -586,7 +594,6 @@ void syscall_regfunc(void)
 	unsigned long flags;
 	struct task_struct *g, *t;
 
-	mutex_lock(&regfunc_mutex);
 	if (!sys_tracepoint_refcount) {
 		read_lock_irqsave(&tasklist_lock, flags);
 		do_each_thread(g, t) {
@@ -595,7 +602,6 @@ void syscall_regfunc(void)
 		read_unlock_irqrestore(&tasklist_lock, flags);
 	}
 	sys_tracepoint_refcount++;
-	mutex_unlock(&regfunc_mutex);
 }
 
 void syscall_unregfunc(void)
@@ -603,7 +609,6 @@ void syscall_unregfunc(void)
 	unsigned long flags;
 	struct task_struct *g, *t;
 
-	mutex_lock(&regfunc_mutex);
 	sys_tracepoint_refcount--;
 	if (!sys_tracepoint_refcount) {
 		read_lock_irqsave(&tasklist_lock, flags);
@@ -612,6 +617,5 @@ void syscall_unregfunc(void)
 		} while_each_thread(g, t);
 		read_unlock_irqrestore(&tasklist_lock, flags);
 	}
-	mutex_unlock(&regfunc_mutex);
 }
 #endif
-- 
cgit v1.2.3


From 1c569f0264ea629c10bbab471dd0626ce4d3f19f Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Mon, 24 Aug 2009 14:43:14 -0700
Subject: tracing: Create generic syscall TRACE_EVENTs

This converts the syscall_enter/exit tracepoints into TRACE_EVENTs, so
you can have generic ftrace events that capture all system calls with
arguments and return values.  These generic events are also renamed to
sys_enter/exit, so they're more closely aligned to the specific
sys_enter_foo events.

Signed-off-by: Josh Stone <jistone@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jiaying Zhang <jiayingz@google.com>
Cc: Martin Bligh <mbligh@google.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
LKML-Reference: <1251150194-1713-5-git-send-email-jistone@redhat.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/s390/kernel/ptrace.c       |  8 ++---
 arch/x86/kernel/ptrace.c        | 12 +++----
 include/trace/events/syscalls.h | 70 +++++++++++++++++++++++++++++++++++++++++
 include/trace/syscall.h         | 17 ----------
 kernel/trace/trace_syscalls.c   | 17 +++++-----
 5 files changed, 88 insertions(+), 36 deletions(-)
 create mode 100644 include/trace/events/syscalls.h

(limited to 'include')

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c05b44b80c23..f3ddd7ac06c5 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -51,8 +51,8 @@
 #include "compat_ptrace.h"
 #endif
 
-DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
-DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
 
 enum s390_regset {
 	REGSET_GENERAL,
@@ -665,7 +665,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	}
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_enter(regs, regs->gprs[2]);
+		trace_sys_enter(regs, regs->gprs[2]);
 
 	if (unlikely(current->audit_context))
 		audit_syscall_entry(is_compat_task() ?
@@ -683,7 +683,7 @@ asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
 				   regs->gprs[2]);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_exit(regs, regs->gprs[2]);
+		trace_sys_exit(regs, regs->gprs[2]);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 31e9b97ec4d6..8d7d5c9c1be3 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -35,13 +35,11 @@
 #include <asm/proto.h>
 #include <asm/ds.h>
 
-#include <trace/syscall.h>
-
-DEFINE_TRACE_FN(syscall_enter, syscall_regfunc, syscall_unregfunc);
-DEFINE_TRACE_FN(syscall_exit, syscall_regfunc, syscall_unregfunc);
-
 #include "tls.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
 enum x86_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
@@ -1501,7 +1499,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
 		ret = -1L;
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_enter(regs, regs->orig_ax);
+		trace_sys_enter(regs, regs->orig_ax);
 
 	if (unlikely(current->audit_context)) {
 		if (IS_IA32)
@@ -1527,7 +1525,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
 
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
-		trace_syscall_exit(regs, regs->ax);
+		trace_sys_exit(regs, regs->ax);
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, 0);
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
new file mode 100644
index 000000000000..397dff2dbd5a
--- /dev/null
+++ b/include/trace/events/syscalls.h
@@ -0,0 +1,70 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM syscalls
+
+#if !defined(_TRACE_EVENTS_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENTS_SYSCALLS_H
+
+#include <linux/tracepoint.h>
+
+#include <asm/ptrace.h>
+#include <asm/syscall.h>
+
+
+#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
+
+extern void syscall_regfunc(void);
+extern void syscall_unregfunc(void);
+
+TRACE_EVENT_FN(sys_enter,
+
+	TP_PROTO(struct pt_regs *regs, long id),
+
+	TP_ARGS(regs, id),
+
+	TP_STRUCT__entry(
+		__field(	long,		id		)
+		__array(	unsigned long,	args,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->id	= id;
+		syscall_get_arguments(current, regs, 0, 6, __entry->args);
+	),
+
+	TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
+		  __entry->id,
+		  __entry->args[0], __entry->args[1], __entry->args[2],
+		  __entry->args[3], __entry->args[4], __entry->args[5]),
+
+	syscall_regfunc, syscall_unregfunc
+);
+
+TRACE_EVENT_FN(sys_exit,
+
+	TP_PROTO(struct pt_regs *regs, long ret),
+
+	TP_ARGS(regs, ret),
+
+	TP_STRUCT__entry(
+		__field(	long,	id	)
+		__field(	long,	ret	)
+	),
+
+	TP_fast_assign(
+		__entry->id	= syscall_get_nr(current, regs);
+		__entry->ret	= ret;
+	),
+
+	TP_printk("NR %ld = %ld",
+		  __entry->id, __entry->ret),
+
+	syscall_regfunc, syscall_unregfunc
+);
+
+#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */
+
+#endif /* _TRACE_EVENTS_SYSCALLS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 4e1943001854..5dc283ba5ae0 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -8,23 +8,6 @@
 #include <asm/ptrace.h>
 
 
-#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
-
-extern void syscall_regfunc(void);
-extern void syscall_unregfunc(void);
-
-DECLARE_TRACE(syscall_enter,
-	TP_PROTO(struct pt_regs *regs, long id),
-	TP_ARGS(regs, id)
-);
-
-DECLARE_TRACE(syscall_exit,
-	TP_PROTO(struct pt_regs *regs, long ret),
-	TP_ARGS(regs, ret)
-);
-
-#endif
-
 /*
  * A syscall entry in the ftrace syscalls array.
  *
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 46c1b977a2cb..2698fe401ebd 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,4 +1,5 @@
 #include <trace/syscall.h>
+#include <trace/events/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
 #include <linux/perf_counter.h>
@@ -286,7 +287,7 @@ int reg_event_syscall_enter(void *ptr)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_enter)
-		ret = register_trace_syscall_enter(ftrace_syscall_enter);
+		ret = register_trace_sys_enter(ftrace_syscall_enter);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall entry trace point");
@@ -311,7 +312,7 @@ void unreg_event_syscall_enter(void *ptr)
 	sys_refcount_enter--;
 	clear_bit(num, enabled_enter_syscalls);
 	if (!sys_refcount_enter)
-		unregister_trace_syscall_enter(ftrace_syscall_enter);
+		unregister_trace_sys_enter(ftrace_syscall_enter);
 	mutex_unlock(&syscall_trace_lock);
 }
 
@@ -327,7 +328,7 @@ int reg_event_syscall_exit(void *ptr)
 		return -ENOSYS;
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_refcount_exit)
-		ret = register_trace_syscall_exit(ftrace_syscall_exit);
+		ret = register_trace_sys_exit(ftrace_syscall_exit);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall exit trace point");
@@ -352,7 +353,7 @@ void unreg_event_syscall_exit(void *ptr)
 	sys_refcount_exit--;
 	clear_bit(num, enabled_exit_syscalls);
 	if (!sys_refcount_exit)
-		unregister_trace_syscall_exit(ftrace_syscall_exit);
+		unregister_trace_sys_exit(ftrace_syscall_exit);
 	mutex_unlock(&syscall_trace_lock);
 }
 
@@ -418,7 +419,7 @@ int reg_prof_syscall_enter(char *name)
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_enter)
-		ret = register_trace_syscall_enter(prof_syscall_enter);
+		ret = register_trace_sys_enter(prof_syscall_enter);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall entry trace point");
@@ -442,7 +443,7 @@ void unreg_prof_syscall_enter(char *name)
 	sys_prof_refcount_enter--;
 	clear_bit(num, enabled_prof_enter_syscalls);
 	if (!sys_prof_refcount_enter)
-		unregister_trace_syscall_enter(prof_syscall_enter);
+		unregister_trace_sys_enter(prof_syscall_enter);
 	mutex_unlock(&syscall_trace_lock);
 }
 
@@ -479,7 +480,7 @@ int reg_prof_syscall_exit(char *name)
 
 	mutex_lock(&syscall_trace_lock);
 	if (!sys_prof_refcount_exit)
-		ret = register_trace_syscall_exit(prof_syscall_exit);
+		ret = register_trace_sys_exit(prof_syscall_exit);
 	if (ret) {
 		pr_info("event trace: Could not activate"
 				"syscall entry trace point");
@@ -503,7 +504,7 @@ void unreg_prof_syscall_exit(char *name)
 	sys_prof_refcount_exit--;
 	clear_bit(num, enabled_prof_exit_syscalls);
 	if (!sys_prof_refcount_exit)
-		unregister_trace_syscall_exit(prof_syscall_exit);
+		unregister_trace_sys_exit(prof_syscall_exit);
 	mutex_unlock(&syscall_trace_lock);
 }
 
-- 
cgit v1.2.3


From f0693c8bd5c50380b299e19d19e7640024640b42 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 6 Aug 2009 14:59:32 -0400
Subject: tracing/sched: show CPU task wakes up on in trace event

While debugging the scheduler push / pull algorithm, I found
it very annoying that the sched wake up events did not show
the CPU that the task was waking on. In order to analyze the
scheduler, I needed that information.

This patch adds recording of the CPU that a task is waking up
on.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/sched.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 8949bb7eb082..a581ef211ff5 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -94,6 +94,7 @@ TRACE_EVENT(sched_wakeup,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
+		__field(	int,	cpu			)
 	),
 
 	TP_fast_assign(
@@ -101,11 +102,12 @@ TRACE_EVENT(sched_wakeup,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
+		__entry->cpu		= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d",
+	TP_printk("task %s:%d [%d] success=%d [%03d]",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
+		  __entry->success, __entry->cpu)
 );
 
 /*
@@ -125,6 +127,7 @@ TRACE_EVENT(sched_wakeup_new,
 		__field(	pid_t,	pid			)
 		__field(	int,	prio			)
 		__field(	int,	success			)
+		__field(	int,	cpu			)
 	),
 
 	TP_fast_assign(
@@ -132,11 +135,12 @@ TRACE_EVENT(sched_wakeup_new,
 		__entry->pid		= p->pid;
 		__entry->prio		= p->prio;
 		__entry->success	= success;
+		__entry->cpu		= task_cpu(p);
 	),
 
-	TP_printk("task %s:%d [%d] success=%d",
+	TP_printk("task %s:%d [%d] success=%d [%03d]",
 		  __entry->comm, __entry->pid, __entry->prio,
-		  __entry->success)
+		  __entry->success, __entry->cpu)
 );
 
 /*
-- 
cgit v1.2.3


From 43b51ead3f752a3935116e5b1a94254b8573734f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 7 Aug 2009 10:33:22 +0800
Subject: tracing/filters: Add __field_ext() to TRACE_EVENT

Add __field_ext(), so a field can be assigned to a specific
filter_type, which matches a corresponding filter function.

For example, a later patch will allow this:
	__field_ext(const char *, str, FILTER_PTR_STR);

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A7B9272.6050709@cn.fujitsu.com>

[
  Fixed a -1 to FILTER_OTHER
  Forward ported to latest kernel.
]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |  9 ++++++++-
 include/trace/ftrace.h             | 31 ++++++++++++++++++++++++-------
 kernel/trace/trace_events.c        | 11 ++++++++---
 kernel/trace/trace_events_filter.c |  6 ------
 kernel/trace/trace_export.c        |  8 +++++---
 kernel/trace/trace_syscalls.c      |  6 ++++--
 6 files changed, 49 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index df5b085c4150..0440bea8f6bb 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -140,9 +140,16 @@ extern int filter_current_check_discard(struct ftrace_event_call *call,
 					void *rec,
 					struct ring_buffer_event *event);
 
+enum {
+	FILTER_OTHER = 0,
+	FILTER_STATIC_STRING,
+	FILTER_DYN_STRING,
+};
+
 extern int trace_define_field(struct ftrace_event_call *call,
 			      const char *type, const char *name,
-			      int offset, int size, int is_signed);
+			      int offset, int size, int is_signed,
+			      int filter_type);
 extern int trace_define_common_fields(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < 0)
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 127400255e4c..1b1f742a6045 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -21,6 +21,9 @@
 #undef __field
 #define __field(type, item)		type	item;
 
+#undef __field_ext
+#define __field_ext(type, item, filter_type)	type	item;
+
 #undef __array
 #define __array(type, item, len)	type	item[len];
 
@@ -62,7 +65,10 @@
  */
 
 #undef __field
-#define __field(type, item);
+#define __field(type, item)
+
+#undef __field_ext
+#define __field_ext(type, item, filter_type)
 
 #undef __array
 #define __array(type, item, len)
@@ -110,6 +116,9 @@
 	if (!ret)							\
 		return 0;
 
+#undef __field_ext
+#define __field_ext(type, item, filter_type)	__field(type, item)
+
 #undef __array
 #define __array(type, item, len)						\
 	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
@@ -265,28 +274,33 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-#undef __field
-#define __field(type, item)						\
+#undef __field_ext
+#define __field_ext(type, item, filter_type)				\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), is_signed_type(type));	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), filter_type);	\
 	if (ret)							\
 		return ret;
 
+#undef __field
+#define __field(type, item)	__field_ext(type, item, FILTER_OTHER)
+
 #undef __array
 #define __array(type, item, len)					\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0);		\
+				 sizeof(field.item), 0, FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
 #undef __dynamic_array
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
-				offsetof(typeof(field), __data_loc_##item),    \
-				 sizeof(field.__data_loc_##item), 0);
+				 offsetof(typeof(field), __data_loc_##item),   \
+				 sizeof(field.__data_loc_##item), 0,	       \
+				 FILTER_OTHER);
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
@@ -320,6 +334,9 @@ ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 #undef __field
 #define __field(type, item)
 
+#undef __field_ext
+#define __field_ext(type, item, filter_type)
+
 #undef __array
 #define __array(type, item, len)
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5740e90f4ca1..d33bcdeffe69 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -28,7 +28,8 @@ DEFINE_MUTEX(event_mutex);
 LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, const char *type,
-		       const char *name, int offset, int size, int is_signed)
+		       const char *name, int offset, int size, int is_signed,
+		       int filter_type)
 {
 	struct ftrace_event_field *field;
 
@@ -44,7 +45,11 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
 	if (!field->type)
 		goto err;
 
-	field->filter_type = filter_assign_type(type);
+	if (filter_type == FILTER_OTHER)
+		field->filter_type = filter_assign_type(type);
+	else
+		field->filter_type = filter_type;
+
 	field->offset = offset;
 	field->size = size;
 	field->is_signed = is_signed;
@@ -68,7 +73,7 @@ EXPORT_SYMBOL_GPL(trace_define_field);
 	ret = trace_define_field(call, #type, "common_" #item,		\
 				 offsetof(typeof(ent), item),		\
 				 sizeof(ent.item),			\
-				 is_signed_type(type));			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 22e6d822bbaa..8a8e576733fc 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -475,12 +475,6 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
 	return 0;
 }
 
-enum {
-	FILTER_OTHER = 0,
-	FILTER_STATIC_STRING,
-	FILTER_DYN_STRING,
-};
-
 int filter_assign_type(const char *type)
 {
 	if (strstr(type, "__data_loc") && strstr(type, "char"))
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 70875303ae46..029a91f42287 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD(type, item, assign)					\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), is_signed_type(type));	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -166,7 +167,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD_SPECIAL(type, item, len, cmd)			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0);		\
+				 sizeof(field.item), 0, FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -174,7 +175,8 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 #define TRACE_FIELD_SIGN(type, item, assign, is_signed)			\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), is_signed);	\
+				 sizeof(field.item), is_signed,		\
+				 FILTER_OTHER);				\
 	if (ret)							\
 		return ret;
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 46c1b977a2cb..97a2454760b0 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -194,7 +194,8 @@ int syscall_enter_define_fields(struct ftrace_event_call *call)
 	for (i = 0; i < meta->nb_args; i++) {
 		ret = trace_define_field(call, meta->types[i],
 					 meta->args[i], offset,
-					 sizeof(unsigned long), 0);
+					 sizeof(unsigned long), 0,
+					 FILTER_OTHER);
 		offset += sizeof(unsigned long);
 	}
 
@@ -210,7 +211,8 @@ int syscall_exit_define_fields(struct ftrace_event_call *call)
 	if (ret)
 		return ret;
 
-	ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0);
+	ret = trace_define_field(call, SYSCALL_FIELD(unsigned long, ret), 0,
+				 FILTER_OTHER);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 87a342f5db69d53ea70493bb1ec69c9047677038 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 7 Aug 2009 10:33:43 +0800
Subject: tracing/filters: Support filtering for char * strings

Usually, char * entries are dangerous in traces because the string
can be released whereas a pointer to it can still wait to be read from
the ring buffer.

But sometimes we can assume it's safe, like in case of RO data
(eg: __file__ or __line__, used in bkl trace event). If these RO data
are in a module and so is the call to the trace event, then it's safe,
because the ring buffer will be flushed once this module get unloaded.

To allow char * to be treated as a string:

	TRACE_EVENT(...,

		TP_STRUCT__entry(
			__field_ext(const char *, name, FILTER_PTR_STRING)
			...
		)

		...
	);

The filtering will not dereference "char *" unless the developer
explicitly sets FILTER_PTR_STR in __field_ext.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4A7B9287.90205@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |  1 +
 kernel/trace/trace_events_filter.c | 26 +++++++++++++++++++++++---
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0440bea8f6bb..ace2da9e0a0d 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -144,6 +144,7 @@ enum {
 	FILTER_OTHER = 0,
 	FILTER_STATIC_STRING,
 	FILTER_DYN_STRING,
+	FILTER_PTR_STRING,
 };
 
 extern int trace_define_field(struct ftrace_event_call *call,
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 8a8e576733fc..9f03082c81d8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -163,6 +163,20 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 	return match;
 }
 
+/* Filter predicate for char * pointers */
+static int filter_pred_pchar(struct filter_pred *pred, void *event,
+			     int val1, int val2)
+{
+	char **addr = (char **)(event + pred->offset);
+	int cmp, match;
+
+	cmp = strncmp(*addr, pred->str_val, pred->str_len);
+
+	match = (!cmp) ^ pred->not;
+
+	return match;
+}
+
 /*
  * Filter predicate for dynamic sized arrays of characters.
  * These are implemented through a list of strings at the end
@@ -489,7 +503,8 @@ int filter_assign_type(const char *type)
 static bool is_string_field(struct ftrace_event_field *field)
 {
 	return field->filter_type == FILTER_DYN_STRING ||
-	       field->filter_type == FILTER_STATIC_STRING;
+	       field->filter_type == FILTER_STATIC_STRING ||
+	       field->filter_type == FILTER_PTR_STRING;
 }
 
 static int is_legal_op(struct ftrace_event_field *field, int op)
@@ -579,11 +594,16 @@ static int filter_add_pred(struct filter_parse_state *ps,
 	}
 
 	if (is_string_field(field)) {
+		pred->str_len = field->size;
+
 		if (field->filter_type == FILTER_STATIC_STRING)
 			fn = filter_pred_string;
-		else
+		else if (field->filter_type == FILTER_DYN_STRING)
 			fn = filter_pred_strloc;
-		pred->str_len = field->size;
+		else {
+			fn = filter_pred_pchar;
+			pred->str_len = strlen(pred->str_val);
+		}
 	} else {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->str_val, 0, &val);
-- 
cgit v1.2.3


From 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Tue, 25 Aug 2009 14:06:22 +0800
Subject: tracing/events: fix the include file dependencies

The TRACE_EVENT depends on the include/linux/tracepoint.h first
and include/trace/ftrace.h later, if we include the ftrace.h early,
a building error will occur.

Both define TRACE_EVENT in trace_a.h and trace_b.h, if we include
those in .c file, like this:

#define CREATE_TRACE_POINTS
include <trace/events/trace_a.h>
include <trace/events/trace_b.h>

The above will not work, because the TRACE_EVENT was re-defined by
the previous .h file.

Reported-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
LKML-Reference: <4A937F5E.3020802@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   | 3 +--
 include/trace/define_trace.h | 1 +
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 5984ed04c03b..81709854f7ab 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -180,6 +180,7 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #define PARAMS(args...) args
+#endif
 
 #ifndef TRACE_EVENT
 /*
@@ -287,5 +288,3 @@ static inline void tracepoint_synchronize_unregister(void)
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
 #endif
-
-#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index f7a7ae1e8f90..cd150b9d8e32 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -56,6 +56,7 @@
 #include <trace/ftrace.h>
 #endif
 
+#undef TRACE_EVENT
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
-- 
cgit v1.2.3


From 7cb2e3ee2aeec5b83ecadba929a2dc575dd4008f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 26 Aug 2009 00:32:37 -0400
Subject: tracing: add comments to explain TRACE_EVENT out of protection

The commit:
  commit 5ac35daa9343936038a3c9c4f4d6d3fe6a2a7bd8
  Author: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
  tracing/events: fix the include file dependencies

Moved the TRACE_EVENT out of the ifdef protection of tracepoints.h
but uses the define of TRACE_EVENT itself as protection. This patch
adds comments to explain why.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 81709854f7ab..0341f2e2698a 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -180,7 +180,15 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #define PARAMS(args...) args
-#endif
+
+#endif /* _LINUX_TRACEPOINT_H */
+
+/*
+ * Note: we keep the TRACE_EVENT outside the include file ifdef protection.
+ *  This is due to the way trace events work. If a file includes two
+ *  trace event headers under one "CREATE_TRACE_POINTS" the first include
+ *  will override the TRACE_EVENT and break the second include.
+ */
 
 #ifndef TRACE_EVENT
 /*
@@ -287,4 +295,5 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)	\
 	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
-#endif
+
+#endif /* ifdef TRACE_EVENT (see note above) */
-- 
cgit v1.2.3


From 2246b2f1b43f3fbd128e72b129dcbbd3202cc592 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <gustavo@las.ic.unicamp.br>
Date: Wed, 26 Aug 2009 04:04:02 -0300
Subject: Bluetooth: Handle L2CAP case when the remote receiver is busy

Implement all issues related to RemoteBusy in the RECV state table.

Signed-off-by: Gustavo F. Padovan <gustavo@las.ic.unicamp.br>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  2 ++
 net/bluetooth/l2cap.c         | 25 +++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 7ca614ac5d43..9516f4b4a3c2 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -373,6 +373,8 @@ struct l2cap_pinfo {
 #define L2CAP_CONN_WAIT_F          0x04
 #define L2CAP_CONN_SREJ_ACT        0x08
 #define L2CAP_CONN_SEND_PBIT       0x10
+#define L2CAP_CONN_REMOTE_BUSY     0x20
+#define L2CAP_CONN_LOCAL_BUSY      0x40
 
 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
 		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0a36c61c011f..40fbf5cb1f7e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1350,7 +1350,8 @@ static int l2cap_ertm_send(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
 		return 0;
 
-	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))) {
+	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk))
+			&& !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
 		tx_skb = skb_clone(skb, GFP_ATOMIC);
 
 		if (pi->remote_max_tx &&
@@ -3351,7 +3352,10 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 			control |= L2CAP_SUPER_RCV_READY |
 				(pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT);
 			l2cap_send_sframe(l2cap_pi(sk), control);
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
 		} else if (rx_control & L2CAP_CTRL_FINAL) {
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
 
@@ -3366,13 +3370,19 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		} else {
 			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
-			if (pi->unacked_frames > 0)
+
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY)
+					&& (pi->unacked_frames > 0))
 				__mod_retrans_timer();
+
 			l2cap_ertm_send(sk);
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 		}
 		break;
 
 	case L2CAP_SUPER_REJECT:
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
 		pi->expected_ack_seq = __get_reqseq(rx_control);
 		l2cap_drop_acked_frames(sk);
 
@@ -3384,6 +3394,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		break;
 
 	case L2CAP_SUPER_SELECT_REJECT:
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
 		if (rx_control & L2CAP_CTRL_POLL) {
 			l2cap_retransmit_frame(sk, tx_seq);
 			pi->expected_ack_seq = tx_seq;
@@ -3410,6 +3422,15 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		break;
 
 	case L2CAP_SUPER_RCV_NOT_READY:
+		pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
+
+		del_timer(&l2cap_pi(sk)->retrans_timer);
+		if (rx_control & L2CAP_CTRL_POLL) {
+			u16 control = L2CAP_CTRL_FINAL | L2CAP_SUPER_RCV_READY;
+			l2cap_send_sframe(l2cap_pi(sk), control);
+		}
 		break;
 	}
 
-- 
cgit v1.2.3


From 06e799764eb7c2e4640888d438c3524d756613e1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 25 Aug 2009 18:53:37 -0700
Subject: rcu: Remove lockdep annotations from RCU's _notrace() API members

The lockdep annotations rcu_read_acquire() and rcu_read_release()
might lead to infinite looping if called from lockdep.  So this patch
removes them.  Formal repost of http://lkml.org/lkml/2009/8/25/309
on the strength of Lai Jiangshan's review.

Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <20090826015337.GA18904@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8b4422c558da..95e0615f4d75 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -195,7 +195,6 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
 {
 	preempt_disable_notrace();
 	__acquire(RCU_SCHED);
-	rcu_read_acquire();
 }
 
 /*
@@ -211,7 +210,6 @@ static inline void rcu_read_unlock_sched(void)
 }
 static inline notrace void rcu_read_unlock_sched_notrace(void)
 {
-	rcu_read_release();
 	__release(RCU_SCHED);
 	preempt_enable_notrace();
 }
-- 
cgit v1.2.3


From 0396c215f301e92677d1e9a064b405e31501dc1d Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@csr.com>
Date: Tue, 25 Aug 2009 16:41:06 +0100
Subject: uwb: avoid radio controller reset loops

If a radio controller reset attempt occurs while a probe() or remove()
is in progress it fails and is retried endlessly, potentially preventing
the probe() or remove() from completing.

If a reset fails, sleep for a bit before retrying the reset.  This
allows the probe()/remove() to complete.

Signed-off-by: David Vrabel <david.vrabel@csr.com>
---
 drivers/uwb/hwa-rc.c  |  3 +--
 drivers/uwb/reset.c   | 21 +++++++++++----------
 drivers/uwb/umc-bus.c |  2 +-
 drivers/uwb/whc-rc.c  |  3 +--
 include/linux/uwb.h   |  2 +-
 5 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/uwb/hwa-rc.c b/drivers/uwb/hwa-rc.c
index 9052bcb4f528..e7eeb63fab23 100644
--- a/drivers/uwb/hwa-rc.c
+++ b/drivers/uwb/hwa-rc.c
@@ -887,8 +887,7 @@ static int hwarc_post_reset(struct usb_interface *iface)
 	struct hwarc *hwarc = usb_get_intfdata(iface);
 	struct uwb_rc *uwb_rc = hwarc->uwb_rc;
 
-	uwb_rc_post_reset(uwb_rc);
-	return 0;
+	return uwb_rc_post_reset(uwb_rc);
 }
 
 /** USB device ID's that we handle */
diff --git a/drivers/uwb/reset.c b/drivers/uwb/reset.c
index 70f8050221ff..7f0512e43d9d 100644
--- a/drivers/uwb/reset.c
+++ b/drivers/uwb/reset.c
@@ -30,6 +30,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/err.h>
+#include <linux/delay.h>
 
 #include "uwb-internal.h"
 
@@ -323,13 +324,15 @@ int uwbd_msg_handle_reset(struct uwb_event *evt)
 
 	dev_info(&rc->uwb_dev.dev, "resetting radio controller\n");
 	ret = rc->reset(rc);
-	if (ret) {
+	if (ret < 0) {
 		dev_err(&rc->uwb_dev.dev, "failed to reset hardware: %d\n", ret);
 		goto error;
 	}
 	return 0;
 error:
-	/* Nothing can be done except try the reset again. */
+	/* Nothing can be done except try the reset again. Wait a bit
+	   to avoid reset loops during probe() or remove(). */
+	msleep(1000);
 	uwb_rc_reset_all(rc);
 	return ret;
 }
@@ -368,22 +371,20 @@ void uwb_rc_pre_reset(struct uwb_rc *rc)
 }
 EXPORT_SYMBOL_GPL(uwb_rc_pre_reset);
 
-void uwb_rc_post_reset(struct uwb_rc *rc)
+int uwb_rc_post_reset(struct uwb_rc *rc)
 {
 	int ret;
 
 	ret = rc->start(rc);
 	if (ret)
-		goto error;
+		goto out;
 	ret = uwb_rc_mac_addr_set(rc, &rc->uwb_dev.mac_addr);
 	if (ret)
-		goto error;
+		goto out;
 	ret = uwb_rc_dev_addr_set(rc, &rc->uwb_dev.dev_addr);
 	if (ret)
-		goto error;
-	return;
-error:
-	/* Nothing can be done except try the reset again. */
-	uwb_rc_reset_all(rc);
+		goto out;
+out:
+	return ret;
 }
 EXPORT_SYMBOL_GPL(uwb_rc_post_reset);
diff --git a/drivers/uwb/umc-bus.c b/drivers/uwb/umc-bus.c
index 5ad36164c13b..cdd6c8efc9f8 100644
--- a/drivers/uwb/umc-bus.c
+++ b/drivers/uwb/umc-bus.c
@@ -66,7 +66,7 @@ int umc_controller_reset(struct umc_dev *umc)
 		return -EAGAIN;
 	ret = device_for_each_child(parent, parent, umc_bus_pre_reset_helper);
 	if (ret >= 0)
-		device_for_each_child(parent, parent, umc_bus_post_reset_helper);
+		ret = device_for_each_child(parent, parent, umc_bus_post_reset_helper);
 	up(&parent->sem);
 
 	return ret;
diff --git a/drivers/uwb/whc-rc.c b/drivers/uwb/whc-rc.c
index 19a1dd129212..1d9a6f54658e 100644
--- a/drivers/uwb/whc-rc.c
+++ b/drivers/uwb/whc-rc.c
@@ -443,8 +443,7 @@ static int whcrc_post_reset(struct umc_dev *umc)
 	struct whcrc *whcrc = umc_get_drvdata(umc);
 	struct uwb_rc *uwb_rc = whcrc->uwb_rc;
 
-	uwb_rc_post_reset(uwb_rc);
-	return 0;
+	return uwb_rc_post_reset(uwb_rc);
 }
 
 /* PCI device ID's that we handle [so it gets loaded] */
diff --git a/include/linux/uwb.h b/include/linux/uwb.h
index c02128991ff7..7fc9746f22cd 100644
--- a/include/linux/uwb.h
+++ b/include/linux/uwb.h
@@ -597,7 +597,7 @@ void uwb_rc_neh_grok(struct uwb_rc *, void *, size_t);
 void uwb_rc_neh_error(struct uwb_rc *, int);
 void uwb_rc_reset_all(struct uwb_rc *rc);
 void uwb_rc_pre_reset(struct uwb_rc *rc);
-void uwb_rc_post_reset(struct uwb_rc *rc);
+int uwb_rc_post_reset(struct uwb_rc *rc);
 
 /**
  * uwb_rsv_is_owner - is the owner of this reservation the RC?
-- 
cgit v1.2.3


From 9e36fda0b359d2a6ae039c3d7e71a04502a77898 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 10 Jul 2009 09:57:35 -0700
Subject: x86, pat: Add PAT reserve free to io_mapping* APIs

io_mapping_* interfaces were added, mainly for graphics drivers.
Make this interface go through the PAT reserve/free, instead of
hardcoding WC mapping. This makes sure that there are no
aliases due to unconditional WC setting.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/include/asm/iomap.h |  9 ++++++---
 arch/x86/mm/iomap_32.c       | 27 +++++++++++++++++++++++++--
 include/linux/io-mapping.h   | 17 ++++++++++++-----
 3 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 0e9fe1d9d971..f35eb45d6576 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -26,13 +26,16 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-int
-is_io_mapping_possible(resource_size_t base, unsigned long size);
-
 void *
 iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
 
+int
+iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
+
+void
+iomap_free(resource_size_t base, unsigned long size);
+
 #endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84ca121e..84e236ce76ba 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
 {
 #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
 	/* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
 #endif
 	return 1;
 }
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+	unsigned long flag = _PAGE_CACHE_WC;
+	int ret;
+
+	if (!is_io_mapping_possible(base, size))
+		return -EINVAL;
+
+	ret = io_reserve_memtype(base, base + size, &flag);
+	if (ret)
+		return ret;
+
+	*prot = __pgprot(__PAGE_KERNEL | flag);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+	io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
 
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 {
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index 0adb0f91568c..97eb928b4924 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -49,23 +49,30 @@ static inline struct io_mapping *
 io_mapping_create_wc(resource_size_t base, unsigned long size)
 {
 	struct io_mapping *iomap;
-
-	if (!is_io_mapping_possible(base, size))
-		return NULL;
+	pgprot_t prot;
 
 	iomap = kmalloc(sizeof(*iomap), GFP_KERNEL);
 	if (!iomap)
-		return NULL;
+		goto out_err;
+
+	if (iomap_create_wc(base, size, &prot))
+		goto out_free;
 
 	iomap->base = base;
 	iomap->size = size;
-	iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL));
+	iomap->prot = prot;
 	return iomap;
+
+out_free:
+	kfree(iomap);
+out_err:
+	return NULL;
 }
 
 static inline void
 io_mapping_free(struct io_mapping *mapping)
 {
+	iomap_free(mapping->base, mapping->size);
 	kfree(mapping);
 }
 
-- 
cgit v1.2.3


From 46cf98cdaef5471926010b5bddf84c44ec177fdd Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 10 Jul 2009 09:57:37 -0700
Subject: x86, pat: Generalize the use of page flag PG_uncached

Only IA64 was using PG_uncached as of now. We now intend to use this bit
in x86 as well, to keep track of memory type of those addresses that
have page struct for them. So, generalize the use of that bit across
ia64 and x86.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/ia64/Kconfig          | 4 ++++
 arch/x86/Kconfig           | 4 ++++
 include/linux/page-flags.h | 4 ++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 170042b420d4..e6246119932a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -112,6 +112,10 @@ config IA64_UNCACHED_ALLOCATOR
 	bool
 	select GENERIC_ALLOCATOR
 
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on IA64_UNCACHED_ALLOCATOR
+
 config AUDIT_ARCH
 	bool
 	default y
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c07f72205909..8e1595382196 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1414,6 +1414,10 @@ config X86_PAT
 
 	  If unsure, say Y.
 
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on X86_PAT
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e2e5ce543595..2b87acfc5f87 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -99,7 +99,7 @@ enum pageflags {
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 	PG_mlocked,		/* Page is vma mlocked */
 #endif
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
 	PG_uncached,		/* Page has been mapped as uncached */
 #endif
 	__NR_PAGEFLAGS,
@@ -257,7 +257,7 @@ PAGEFLAG_FALSE(Mlocked)
 	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
-#ifdef CONFIG_IA64_UNCACHED_ALLOCATOR
+#ifdef CONFIG_ARCH_USES_PG_UNCACHED
 PAGEFLAG(Uncached, uncached)
 #else
 PAGEFLAG_FALSE(Uncached)
-- 
cgit v1.2.3


From c9c97b8c75019814d8c007059bc827bb475be917 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Thu, 27 Aug 2009 09:53:47 +1000
Subject: drm/ttm: consolidate cache flushing code in one place.

This merges the TTM and drm cache flushing into one file in the
drm core.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_cache.c  | 51 +++++++++++++++++++++++++++------
 drivers/gpu/drm/ttm/ttm_tt.c | 67 ++------------------------------------------
 include/drm/drm_cache.h      | 38 +++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 74 deletions(-)
 create mode 100644 include/drm/drm_cache.h

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 0e994a0e46d4..3a5575e638db 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -45,25 +45,58 @@ drm_clflush_page(struct page *page)
 		clflush(page_virtual + i);
 	kunmap_atomic(page_virtual, KM_USER0);
 }
-#endif
 
+static void drm_cache_flush_clflush(struct page *pages[],
+				    unsigned long num_pages)
+{
+	unsigned long i;
+
+	mb();
+	for (i = 0; i < num_pages; i++)
+		drm_clflush_page(*pages++);
+	mb();
+}
+
+static void
+drm_clflush_ipi_handler(void *null)
+{
+	wbinvd();
+}
+#elif !defined(__powerpc__)
+static void drm_cache_ipi_handler(void *dummy)
+{
+}
+#endif
 void
 drm_clflush_pages(struct page *pages[], unsigned long num_pages)
 {
 
 #if defined(CONFIG_X86)
 	if (cpu_has_clflush) {
-		unsigned long i;
-
-		mb();
-		for (i = 0; i < num_pages; ++i)
-			drm_clflush_page(*pages++);
-		mb();
-
+		drm_cache_flush_clflush(pages, num_pages);
 		return;
 	}
 
-	wbinvd();
+	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR "Timed out waiting for cache flush.\n");
+
+#elif defined(__powerpc__)
+	unsigned long i;
+	for (i = 0; i < num_pages; i++) {
+		struct page *page = pages[i];
+		void *page_virtual;
+
+		if (unlikely(page == NULL))
+			continue;
+
+		page_virtual = kmap_atomic(page, KM_USER0);
+		flush_dcache_range((unsigned long)page_virtual,
+				   (unsigned long)page_virtual + PAGE_SIZE);
+		kunmap_atomic(page_virtual, KM_USER0);
+	}
+#else
+	if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
+		printk(KERN_ERR "Timed out waiting for drm cache flush\n");
 #endif
 }
 EXPORT_SYMBOL(drm_clflush_pages);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 42cca5519761..a55ee1a56c16 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -34,76 +34,13 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/swap.h>
+#include "drm_cache.h"
 #include "ttm/ttm_module.h"
 #include "ttm/ttm_bo_driver.h"
 #include "ttm/ttm_placement.h"
 
 static int ttm_tt_swapin(struct ttm_tt *ttm);
 
-#if defined(CONFIG_X86)
-static void ttm_tt_clflush_page(struct page *page)
-{
-	uint8_t *page_virtual;
-	unsigned int i;
-
-	if (unlikely(page == NULL))
-		return;
-
-	page_virtual = kmap_atomic(page, KM_USER0);
-
-	for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
-		clflush(page_virtual + i);
-
-	kunmap_atomic(page_virtual, KM_USER0);
-}
-
-static void ttm_tt_cache_flush_clflush(struct page *pages[],
-				       unsigned long num_pages)
-{
-	unsigned long i;
-
-	mb();
-	for (i = 0; i < num_pages; ++i)
-		ttm_tt_clflush_page(*pages++);
-	mb();
-}
-#elif !defined(__powerpc__)
-static void ttm_tt_ipi_handler(void *null)
-{
-	;
-}
-#endif
-
-void ttm_tt_cache_flush(struct page *pages[], unsigned long num_pages)
-{
-
-#if defined(CONFIG_X86)
-	if (cpu_has_clflush) {
-		ttm_tt_cache_flush_clflush(pages, num_pages);
-		return;
-	}
-#elif defined(__powerpc__)
-	unsigned long i;
-
-	for (i = 0; i < num_pages; ++i) {
-		struct page *page = pages[i];
-		void *page_virtual;
-
-		if (unlikely(page == NULL))
-			continue;
-
-		page_virtual = kmap_atomic(page, KM_USER0);
-		flush_dcache_range((unsigned long) page_virtual,
-				   (unsigned long) page_virtual + PAGE_SIZE);
-		kunmap_atomic(page_virtual, KM_USER0);
-	}
-#else
-	if (on_each_cpu(ttm_tt_ipi_handler, NULL, 1) != 0)
-		printk(KERN_ERR TTM_PFX
-		       "Timed out waiting for drm cache flush.\n");
-#endif
-}
-
 /**
  * Allocates storage for pointers to the pages that back the ttm.
  *
@@ -302,7 +239,7 @@ static int ttm_tt_set_caching(struct ttm_tt *ttm,
 	}
 
 	if (ttm->caching_state == tt_cached)
-		ttm_tt_cache_flush(ttm->pages, ttm->num_pages);
+		drm_clflush_pages(ttm->pages, ttm->num_pages);
 
 	for (i = 0; i < ttm->num_pages; ++i) {
 		cur_page = ttm->pages[i];
diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h
new file mode 100644
index 000000000000..7bfb063029d8
--- /dev/null
+++ b/include/drm/drm_cache.h
@@ -0,0 +1,38 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Red Hat Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors:
+ * Dave Airlie <airlied@redhat.com>
+ */
+
+#ifndef _DRM_CACHE_H_
+#define _DRM_CACHE_H_
+
+void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
+
+#endif
-- 
cgit v1.2.3


From a1a2d1d32250f6fcc317419e9dfb4a5a6946d2e6 Mon Sep 17 00:00:00 2001
From: Pekka Paalanen <pq@iki.fi>
Date: Sun, 23 Aug 2009 12:40:55 +0300
Subject: drm: GEM handles are u32, not int

Several functions in the GEM kernel API used int as handle type, but
user API has it __u32 which is also the intended type.

Replace int with u32.

Signed-off-by: Pekka Paalanen <pq@iki.fi>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_gem.c       | 11 +++++------
 drivers/gpu/drm/i915/i915_gem.c |  3 ++-
 include/drm/drmP.h              |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index ffe8f4394d50..230c9ffdd5e9 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -164,7 +164,7 @@ EXPORT_SYMBOL(drm_gem_object_alloc);
  * Removes the mapping from handle to filp for this object.
  */
 static int
-drm_gem_handle_delete(struct drm_file *filp, int handle)
+drm_gem_handle_delete(struct drm_file *filp, u32 handle)
 {
 	struct drm_device *dev;
 	struct drm_gem_object *obj;
@@ -207,7 +207,7 @@ drm_gem_handle_delete(struct drm_file *filp, int handle)
 int
 drm_gem_handle_create(struct drm_file *file_priv,
 		       struct drm_gem_object *obj,
-		       int *handlep)
+		       u32 *handlep)
 {
 	int	ret;
 
@@ -221,7 +221,7 @@ again:
 
 	/* do the allocation under our spinlock */
 	spin_lock(&file_priv->table_lock);
-	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, handlep);
+	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, (int *)handlep);
 	spin_unlock(&file_priv->table_lock);
 	if (ret == -EAGAIN)
 		goto again;
@@ -237,7 +237,7 @@ EXPORT_SYMBOL(drm_gem_handle_create);
 /** Returns a reference to the object named by the handle. */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
-		      int handle)
+		      u32 handle)
 {
 	struct drm_gem_object *obj;
 
@@ -344,7 +344,7 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 	struct drm_gem_open *args = data;
 	struct drm_gem_object *obj;
 	int ret;
-	int handle;
+	u32 handle;
 
 	if (!(dev->driver->driver_features & DRIVER_GEM))
 		return -ENODEV;
@@ -539,7 +539,6 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = map->handle;
-	/* FIXME: use pgprot_writecombine when available */
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 
 	/* Take a ref for this mapping of the object, so that the fault
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 140bee142fc2..0e6c9cca897c 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -111,7 +111,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_create *args = data;
 	struct drm_gem_object *obj;
-	int handle, ret;
+	int ret;
+	u32 handle;
 
 	args->size = roundup(args->size, PAGE_SIZE);
 
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index e0f1c1fee58b..eeefb6369e19 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1441,7 +1441,7 @@ drm_gem_object_unreference(struct drm_gem_object *obj)
 
 int drm_gem_handle_create(struct drm_file *file_priv,
 			  struct drm_gem_object *obj,
-			  int *handlep);
+			  u32 *handlep);
 
 static inline void
 drm_gem_object_handle_reference(struct drm_gem_object *obj)
@@ -1467,7 +1467,7 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj)
 
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
-					     int handle);
+					     u32 handle);
 int drm_gem_close_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
-- 
cgit v1.2.3


From 8e7ee27095aee87b5db1b0061e2ceea5878a1bbd Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 26 Aug 2009 14:29:21 -0700
Subject: flex_array: declare parts member to have incomplete type

The `parts' member of struct flex_array should evaluate to an incomplete
type so that sizeof() cannot be used and C99 does not require the
zero-length specification.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 23c1ec79a31b..603160db7c98 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -21,7 +21,7 @@ struct flex_array {
 		struct {
 			int element_size;
 			int total_nr_elements;
-			struct flex_array_part *parts[0];
+			struct flex_array_part *parts[];
 		};
 		/*
 		 * This little trick makes sure that
-- 
cgit v1.2.3


From b62e408c05228f40e69bb38a48db8961cac6cd23 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 26 Aug 2009 14:29:22 -0700
Subject: flex_array: convert element_nr formals to unsigned

It's problematic to allow signed element_nr's or total's to be passed as
part of the flex array API.

flex_array_alloc() allows total_nr_elements to be set to a negative
quantity, which is obviously erroneous.

flex_array_get() and flex_array_put() allows negative array indices in
dereferencing an array part, which could address memory mapped before
struct flex_array.

The fix is to convert all existing element_nr formals to be qualified as
unsigned.  Existing checks to compare it to total_nr_elements or the max
array size based on element_size need not be changed.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h | 10 ++++++----
 lib/flex_array.c           | 24 +++++++++++++-----------
 2 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 603160db7c98..45ff18491514 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -36,12 +36,14 @@ struct flex_array {
 	.total_nr_elements = (total),	\
 } } }
 
-struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags);
-int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags);
+struct flex_array *flex_array_alloc(int element_size, unsigned int total,
+		gfp_t flags);
+int flex_array_prealloc(struct flex_array *fa, unsigned int start,
+		unsigned int end, gfp_t flags);
 void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
-int flex_array_put(struct flex_array *fa, int element_nr, void *src,
+int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
-void *flex_array_get(struct flex_array *fa, int element_nr);
+void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index cf4e372cae90..7baed2fc3bc8 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -99,7 +99,8 @@ static inline int elements_fit_in_base(struct flex_array *fa)
  * capacity in the base structure.  Also note that no effort is made
  * to efficiently pack objects across page boundaries.
  */
-struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
+struct flex_array *flex_array_alloc(int element_size, unsigned int total,
+					gfp_t flags)
 {
 	struct flex_array *ret;
 	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
@@ -115,7 +116,8 @@ struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags)
 	return ret;
 }
 
-static int fa_element_to_part_nr(struct flex_array *fa, int element_nr)
+static int fa_element_to_part_nr(struct flex_array *fa,
+					unsigned int element_nr)
 {
 	return element_nr / __elements_per_part(fa->element_size);
 }
@@ -143,14 +145,12 @@ void flex_array_free(struct flex_array *fa)
 	kfree(fa);
 }
 
-static int fa_index_inside_part(struct flex_array *fa, int element_nr)
+static unsigned int index_inside_part(struct flex_array *fa,
+					unsigned int element_nr)
 {
-	return element_nr % __elements_per_part(fa->element_size);
-}
+	unsigned int part_offset;
 
-static int index_inside_part(struct flex_array *fa, int element_nr)
-{
-	int part_offset = fa_index_inside_part(fa, element_nr);
+	part_offset = element_nr % __elements_per_part(fa->element_size);
 	return part_offset * fa->element_size;
 }
 
@@ -185,7 +185,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
  *
  * Locking must be provided by the caller.
  */
-int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags)
+int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
+			gfp_t flags)
 {
 	int part_nr = fa_element_to_part_nr(fa, element_nr);
 	struct flex_array_part *part;
@@ -217,7 +218,8 @@ int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags
  *
  * Locking must be provided by the caller.
  */
-int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
+int flex_array_prealloc(struct flex_array *fa, unsigned int start,
+			unsigned int end, gfp_t flags)
 {
 	int start_part;
 	int end_part;
@@ -248,7 +250,7 @@ int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags)
  *
  * Locking must be provided by the caller.
  */
-void *flex_array_get(struct flex_array *fa, int element_nr)
+void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 {
 	int part_nr = fa_element_to_part_nr(fa, element_nr);
 	struct flex_array_part *part;
-- 
cgit v1.2.3


From 2a908002c7b1b666616103e9df2419b38d7c6f1f Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 26 Aug 2009 14:29:29 -0700
Subject: ACPI processor: force throttling state when BIOS returns incorrect
 value

If the BIOS reports an invalid throttling state (which seems to be
fairly common after system boot), a reset is done to state T0.
Because of a check in acpi_processor_get_throttling_ptc(), the reset
never actually gets executed, which results in the error reoccurring
on every access of for example /proc/acpi/processor/CPU0/throttling.

Add a 'force' option to acpi_processor_set_throttling() to ensure
the reset really takes effect.

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13389

This patch, together with the next one, fixes a regression introduced in
2.6.30, listed on the regression list. They have been available for 2.5
months now in bugzilla, but have not been picked up, despite various
reminders and without any reason given.

Google shows that numerous people are hitting this issue. The issue is in
itself relatively minor, but the bug in the code is clear.

The patches have been in all my kernels and today testing has shown that
throttling works correctly with the patches applied when the system
overheats (http://bugzilla.kernel.org/show_bug.cgi?id=13918#c14).

Signed-off-by: Frans Pop <elendil@planet.nl>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/acpi/processor_thermal.c    |  6 +++---
 drivers/acpi/processor_throttling.c | 26 ++++++++++++++------------
 include/acpi/processor.h            |  5 +++--
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 39838c666032..31adda1099e0 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -66,7 +66,7 @@ static int acpi_processor_apply_limit(struct acpi_processor *pr)
 		if (pr->limit.thermal.tx > tx)
 			tx = pr->limit.thermal.tx;
 
-		result = acpi_processor_set_throttling(pr, tx);
+		result = acpi_processor_set_throttling(pr, tx, false);
 		if (result)
 			goto end;
 	}
@@ -421,12 +421,12 @@ processor_set_cur_state(struct thermal_cooling_device *cdev,
 
 	if (state <= max_pstate) {
 		if (pr->flags.throttling && pr->throttling.state)
-			result = acpi_processor_set_throttling(pr, 0);
+			result = acpi_processor_set_throttling(pr, 0, false);
 		cpufreq_set_cur_state(pr->id, state);
 	} else {
 		cpufreq_set_cur_state(pr->id, max_pstate);
 		result = acpi_processor_set_throttling(pr,
-				state - max_pstate);
+				state - max_pstate, false);
 	}
 	return result;
 }
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index 227543789ba9..841be4ee2109 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -62,7 +62,8 @@ struct throttling_tstate {
 #define THROTTLING_POSTCHANGE      (2)
 
 static int acpi_processor_get_throttling(struct acpi_processor *pr);
-int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
+int acpi_processor_set_throttling(struct acpi_processor *pr,
+						int state, bool force);
 
 static int acpi_processor_update_tsd_coord(void)
 {
@@ -361,7 +362,7 @@ int acpi_processor_tstate_has_changed(struct acpi_processor *pr)
 		 */
 		target_state = throttling_limit;
 	}
-	return acpi_processor_set_throttling(pr, target_state);
+	return acpi_processor_set_throttling(pr, target_state, false);
 }
 
 /*
@@ -842,7 +843,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
 			ACPI_WARNING((AE_INFO,
 				"Invalid throttling state, reset"));
 			state = 0;
-			ret = acpi_processor_set_throttling(pr, state);
+			ret = acpi_processor_set_throttling(pr, state, true);
 			if (ret)
 				return ret;
 		}
@@ -915,7 +916,7 @@ static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
 }
 
 static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr,
-					      int state)
+					      int state, bool force)
 {
 	u32 value = 0;
 	u32 duty_mask = 0;
@@ -930,7 +931,7 @@ static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr,
 	if (!pr->flags.throttling)
 		return -ENODEV;
 
-	if (state == pr->throttling.state)
+	if (!force && (state == pr->throttling.state))
 		return 0;
 
 	if (state < pr->throttling_platform_limit)
@@ -988,7 +989,7 @@ static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr,
 }
 
 static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
-					     int state)
+					     int state, bool force)
 {
 	int ret;
 	acpi_integer value;
@@ -1002,7 +1003,7 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
 	if (!pr->flags.throttling)
 		return -ENODEV;
 
-	if (state == pr->throttling.state)
+	if (!force && (state == pr->throttling.state))
 		return 0;
 
 	if (state < pr->throttling_platform_limit)
@@ -1018,7 +1019,8 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr,
 	return 0;
 }
 
-int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
+int acpi_processor_set_throttling(struct acpi_processor *pr,
+						int state, bool force)
 {
 	cpumask_var_t saved_mask;
 	int ret = 0;
@@ -1070,7 +1072,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 		/* FIXME: use work_on_cpu() */
 		set_cpus_allowed_ptr(current, cpumask_of(pr->id));
 		ret = p_throttling->acpi_processor_set_throttling(pr,
-						t_state.target_state);
+						t_state.target_state, force);
 	} else {
 		/*
 		 * When the T-state coordination is SW_ALL or HW_ALL,
@@ -1103,7 +1105,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
 			set_cpus_allowed_ptr(current, cpumask_of(i));
 			ret = match_pr->throttling.
 				acpi_processor_set_throttling(
-				match_pr, t_state.target_state);
+				match_pr, t_state.target_state, force);
 		}
 	}
 	/*
@@ -1201,7 +1203,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 				  "Disabling throttling (was T%d)\n",
 				  pr->throttling.state));
-		result = acpi_processor_set_throttling(pr, 0);
+		result = acpi_processor_set_throttling(pr, 0, false);
 		if (result)
 			goto end;
 	}
@@ -1307,7 +1309,7 @@ static ssize_t acpi_processor_write_throttling(struct file *file,
 	if (strcmp(tmpbuf, charp) != 0)
 		return -EINVAL;
 
-	result = acpi_processor_set_throttling(pr, state_val);
+	result = acpi_processor_set_throttling(pr, state_val, false);
 	if (result)
 		return result;
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index baf1e0a9a7ee..740ac3ad8fd0 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -174,7 +174,7 @@ struct acpi_processor_throttling {
 	cpumask_var_t shared_cpu_map;
 	int (*acpi_processor_get_throttling) (struct acpi_processor * pr);
 	int (*acpi_processor_set_throttling) (struct acpi_processor * pr,
-					      int state);
+					      int state, bool force);
 
 	u32 address;
 	u8 duty_offset;
@@ -321,7 +321,8 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr)
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
 int acpi_processor_get_throttling_info(struct acpi_processor *pr);
-extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
+extern int acpi_processor_set_throttling(struct acpi_processor *pr,
+					 int state, bool force);
 extern const struct file_operations acpi_processor_throttling_fops;
 extern void acpi_processor_throttling_init(void);
 /* in processor_idle.c */
-- 
cgit v1.2.3


From a6186d89c913b176e7339f37a4ec6ccb38b2c5c0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 27 Aug 2009 14:29:16 +0100
Subject: kmemleak: Mark the early log buffer as __initdata

This buffer isn't needed after kmemleak was initialised so it can be
freed together with the .init.data section. This patch also marks
functions conditionally accessing the early log variables with __ref.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/kmemleak.h | 18 +++++++++---------
 mm/kmemleak.c            | 26 ++++++++++++++------------
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 6a63807f714e..3c7497d46ee9 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -23,18 +23,18 @@
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
-extern void kmemleak_init(void);
+extern void kmemleak_init(void) __ref;
 extern void kmemleak_alloc(const void *ptr, size_t size, int min_count,
-			   gfp_t gfp);
-extern void kmemleak_free(const void *ptr);
-extern void kmemleak_free_part(const void *ptr, size_t size);
+			   gfp_t gfp) __ref;
+extern void kmemleak_free(const void *ptr) __ref;
+extern void kmemleak_free_part(const void *ptr, size_t size) __ref;
 extern void kmemleak_padding(const void *ptr, unsigned long offset,
-			     size_t size);
-extern void kmemleak_not_leak(const void *ptr);
-extern void kmemleak_ignore(const void *ptr);
+			     size_t size) __ref;
+extern void kmemleak_not_leak(const void *ptr) __ref;
+extern void kmemleak_ignore(const void *ptr) __ref;
 extern void kmemleak_scan_area(const void *ptr, unsigned long offset,
-			       size_t length, gfp_t gfp);
-extern void kmemleak_no_scan(const void *ptr);
+			       size_t length, gfp_t gfp) __ref;
+extern void kmemleak_no_scan(const void *ptr) __ref;
 
 static inline void kmemleak_alloc_recursive(const void *ptr, size_t size,
 					    int min_count, unsigned long flags,
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index c977f7a2f0e4..576c0a4cec52 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -232,8 +232,9 @@ struct early_log {
 };
 
 /* early logging buffer and current position */
-static struct early_log early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE];
-static int crt_early_log;
+static struct early_log
+	early_log[CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE] __initdata;
+static int crt_early_log __initdata;
 
 static void kmemleak_disable(void);
 
@@ -718,8 +719,8 @@ static void object_no_scan(unsigned long ptr)
  * Log an early kmemleak_* call to the early_log buffer. These calls will be
  * processed later once kmemleak is fully initialized.
  */
-static void log_early(int op_type, const void *ptr, size_t size,
-		      int min_count, unsigned long offset, size_t length)
+static void __init log_early(int op_type, const void *ptr, size_t size,
+			     int min_count, unsigned long offset, size_t length)
 {
 	unsigned long flags;
 	struct early_log *log;
@@ -751,7 +752,8 @@ static void log_early(int op_type, const void *ptr, size_t size,
  * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc,
  * vmalloc etc.).
  */
-void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp)
+void __ref kmemleak_alloc(const void *ptr, size_t size, int min_count,
+			  gfp_t gfp)
 {
 	pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count);
 
@@ -766,7 +768,7 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
  * Memory freeing function callback. This function is called from the kernel
  * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.).
  */
-void kmemleak_free(const void *ptr)
+void __ref kmemleak_free(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -781,7 +783,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free);
  * Partial memory freeing function callback. This function is usually called
  * from bootmem allocator when (part of) a memory block is freed.
  */
-void kmemleak_free_part(const void *ptr, size_t size)
+void __ref kmemleak_free_part(const void *ptr, size_t size)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -796,7 +798,7 @@ EXPORT_SYMBOL_GPL(kmemleak_free_part);
  * Mark an already allocated memory block as a false positive. This will cause
  * the block to no longer be reported as leak and always be scanned.
  */
-void kmemleak_not_leak(const void *ptr)
+void __ref kmemleak_not_leak(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -812,7 +814,7 @@ EXPORT_SYMBOL(kmemleak_not_leak);
  * corresponding block is not a leak and does not contain any references to
  * other allocated memory blocks.
  */
-void kmemleak_ignore(const void *ptr)
+void __ref kmemleak_ignore(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -826,8 +828,8 @@ EXPORT_SYMBOL(kmemleak_ignore);
 /*
  * Limit the range to be scanned in an allocated memory block.
  */
-void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length,
-			gfp_t gfp)
+void __ref kmemleak_scan_area(const void *ptr, unsigned long offset,
+			      size_t length, gfp_t gfp)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
@@ -841,7 +843,7 @@ EXPORT_SYMBOL(kmemleak_scan_area);
 /*
  * Inform kmemleak not to scan the given memory block.
  */
-void kmemleak_no_scan(const void *ptr)
+void __ref kmemleak_no_scan(const void *ptr)
 {
 	pr_debug("%s(0x%p)\n", __func__, ptr);
 
-- 
cgit v1.2.3


From f8d80cdf40fe4d2393159012b38ce9f85a488686 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Tue, 2 Jun 2009 13:28:13 +0800
Subject: ACPICA: Remove duplicate extern declarations for public globals

Some were defined twice, causes a warning with gcc
-Wredundant-decls.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acglobal.h | 33 ++++++++++++++++++---------------
 include/acpi/acpixf.h          |  1 +
 2 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 3d87362d17ed..0b73b31c1b53 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -58,6 +58,10 @@
 #define ACPI_INIT_GLOBAL(a,b) a
 #endif
 
+#ifdef DEFINE_ACPI_GLOBALS
+
+/* Public globals, available from outside ACPICA subsystem */
+
 /*****************************************************************************
  *
  * Runtime configuration (static defaults that can be overriden at runtime)
@@ -78,7 +82,7 @@
  * 5) Allow unresolved references (invalid target name) in package objects
  * 6) Enable warning messages for behavior that is not ACPI spec compliant
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
 
 /*
  * Automatically serialize ALL control methods? Default is FALSE, meaning
@@ -86,27 +90,36 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_enable_interpreter_slack, FALSE);
  * Only change this if the ASL code is poorly written and cannot handle
  * reentrancy even though methods are marked "NotSerialized".
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_all_methods_serialized, FALSE);
 
 /*
  * Create the predefined _OSI method in the namespace? Default is TRUE
  * because ACPI CA is fully compatible with other ACPI implementations.
  * Changing this will revert ACPI CA (and machine ASL) to pre-OSI behavior.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_create_osi_method, TRUE);
 
 /*
  * Disable wakeup GPEs during runtime? Default is TRUE because WAKE and
  * RUNTIME GPEs should never be shared, and WAKE GPEs should typically only
  * be enabled just before going to sleep.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_leave_wake_gpes_disabled, TRUE);
 
 /*
  * Optionally use default values for the ACPI register widths. Set this to
  * TRUE to use the defaults, if an FADT contains incorrect widths/lengths.
  */
-ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
+
+/* acpi_gbl_FADT is a local copy of the FADT, converted to a common format. */
+
+struct acpi_table_fadt acpi_gbl_FADT;
+u32 acpi_current_gpe_count;
+u32 acpi_gbl_trace_flags;
+acpi_name acpi_gbl_trace_method_name;
+
+#endif
 
 /*****************************************************************************
  *
@@ -114,11 +127,6 @@ ACPI_EXTERN u8 ACPI_INIT_GLOBAL(acpi_gbl_use_default_register_widths, TRUE);
  *
  ****************************************************************************/
 
-/* Runtime configuration of debug print levels */
-
-extern u32 acpi_dbg_level;
-extern u32 acpi_dbg_layer;
-
 /* Procedure nesting level for debug output */
 
 extern u32 acpi_gbl_nesting_level;
@@ -127,10 +135,8 @@ extern u32 acpi_gbl_nesting_level;
 
 ACPI_EXTERN u32 acpi_gbl_original_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_original_dbg_layer;
-ACPI_EXTERN acpi_name acpi_gbl_trace_method_name;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_level;
 ACPI_EXTERN u32 acpi_gbl_trace_dbg_layer;
-ACPI_EXTERN u32 acpi_gbl_trace_flags;
 
 /*****************************************************************************
  *
@@ -142,10 +148,8 @@ ACPI_EXTERN u32 acpi_gbl_trace_flags;
  * acpi_gbl_root_table_list is the master list of ACPI tables found in the
  * RSDT/XSDT.
  *
- * acpi_gbl_FADT is a local copy of the FADT, converted to a common format.
  */
 ACPI_EXTERN struct acpi_internal_rsdt acpi_gbl_root_table_list;
-ACPI_EXTERN struct acpi_table_fadt acpi_gbl_FADT;
 ACPI_EXTERN struct acpi_table_facs *acpi_gbl_FACS;
 
 /* These addresses are calculated from the FADT Event Block addresses */
@@ -340,7 +344,6 @@ ACPI_EXTERN struct acpi_fixed_event_handler
 ACPI_EXTERN struct acpi_gpe_xrupt_info *acpi_gbl_gpe_xrupt_list_head;
 ACPI_EXTERN struct acpi_gpe_block_info
 *acpi_gbl_gpe_fadt_blocks[ACPI_MAX_GPE_BLOCKS];
-ACPI_EXTERN u32 acpi_current_gpe_count;
 
 /*****************************************************************************
  *
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 82ec6a3c0500..2aecaa5cc06c 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -64,6 +64,7 @@ extern u8 acpi_gbl_enable_interpreter_slack;
 extern u8 acpi_gbl_all_methods_serialized;
 extern u8 acpi_gbl_create_osi_method;
 extern u8 acpi_gbl_leave_wake_gpes_disabled;
+extern u8 acpi_gbl_use_default_register_widths;
 extern acpi_name acpi_gbl_trace_method_name;
 extern u32 acpi_gbl_trace_flags;
 
-- 
cgit v1.2.3


From c6b5774caafa4c12b6019366e2fdaaff117e95a4 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 09:44:06 +0800
Subject: ACPICA: Add 64-bit support to acpi_read and acpi_write

Needed by drivers for new ACPi tables.  Internal versions of
these functions still use 32-bit max transfers, in order to
minimize disruption and stack use for the standard ACPI registers
(FADT-based).

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/achware.h  |   8 ++
 drivers/acpi/acpica/evgpe.c    |   8 +-
 drivers/acpi/acpica/evgpeblk.c |   4 +-
 drivers/acpi/acpica/hwgpe.c    |  34 +++----
 drivers/acpi/acpica/hwregs.c   | 206 ++++++++++++++++++++++++++++++++++++++---
 drivers/acpi/acpica/hwtimer.c  |   2 +-
 drivers/acpi/acpica/hwxface.c  | 166 +++++++++++++++++++--------------
 include/acpi/acpixf.h          |   4 +-
 8 files changed, 327 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/achware.h b/drivers/acpi/acpica/achware.h
index 4afa3d8e0efb..36192f142fbb 100644
--- a/drivers/acpi/acpica/achware.h
+++ b/drivers/acpi/acpica/achware.h
@@ -62,6 +62,14 @@ u32 acpi_hw_get_mode(void);
 /*
  * hwregs - ACPI Register I/O
  */
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address);
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg);
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg);
+
 struct acpi_bit_register_info *acpi_hw_get_bit_register_info(u32 register_id);
 
 acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control);
diff --git a/drivers/acpi/acpica/evgpe.c b/drivers/acpi/acpica/evgpe.c
index b9d8ee69ca6c..afacf4416c73 100644
--- a/drivers/acpi/acpica/evgpe.c
+++ b/drivers/acpi/acpica/evgpe.c
@@ -424,8 +424,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Status Register */
 
 			status =
-			    acpi_read(&status_reg,
-				      &gpe_register_info->status_address);
+			    acpi_hw_read(&status_reg,
+					 &gpe_register_info->status_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
@@ -433,8 +433,8 @@ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info * gpe_xrupt_list)
 			/* Read the Enable Register */
 
 			status =
-			    acpi_read(&enable_reg,
-				      &gpe_register_info->enable_address);
+			    acpi_hw_read(&enable_reg,
+					 &gpe_register_info->enable_address);
 			if (ACPI_FAILURE(status)) {
 				goto unlock_and_exit;
 			}
diff --git a/drivers/acpi/acpica/evgpeblk.c b/drivers/acpi/acpica/evgpeblk.c
index 7b3463639422..a60aaa7635f3 100644
--- a/drivers/acpi/acpica/evgpeblk.c
+++ b/drivers/acpi/acpica/evgpeblk.c
@@ -843,14 +843,14 @@ acpi_ev_create_gpe_info_blocks(struct acpi_gpe_block_info *gpe_block)
 
 		/* Disable all GPEs within this register */
 
-		status = acpi_write(0x00, &this_register->enable_address);
+		status = acpi_hw_write(0x00, &this_register->enable_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
 
 		/* Clear any pending GPE events within this register */
 
-		status = acpi_write(0xFF, &this_register->status_address);
+		status = acpi_hw_write(0xFF, &this_register->status_address);
 		if (ACPI_FAILURE(status)) {
 			goto error_exit;
 		}
diff --git a/drivers/acpi/acpica/hwgpe.c b/drivers/acpi/acpica/hwgpe.c
index d3b7e37c9eed..c28c41b3180b 100644
--- a/drivers/acpi/acpica/hwgpe.c
+++ b/drivers/acpi/acpica/hwgpe.c
@@ -82,7 +82,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Get current value of the enable register that contains this GPE */
 
-	status = acpi_read(&enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_read(&enable_mask, &gpe_register_info->enable_address);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -95,7 +95,7 @@ acpi_status acpi_hw_low_disable_gpe(struct acpi_gpe_event_info *gpe_event_info)
 
 	/* Write the updated enable mask */
 
-	status = acpi_write(enable_mask, &gpe_register_info->enable_address);
+	status = acpi_hw_write(enable_mask, &gpe_register_info->enable_address);
 	return (status);
 }
 
@@ -130,8 +130,8 @@ acpi_hw_write_gpe_enable_reg(struct acpi_gpe_event_info * gpe_event_info)
 
 	/* Write the entire GPE (runtime) enable register */
 
-	status = acpi_write(gpe_register_info->enable_for_run,
-			    &gpe_register_info->enable_address);
+	status = acpi_hw_write(gpe_register_info->enable_for_run,
+			       &gpe_register_info->enable_address);
 
 	return (status);
 }
@@ -163,8 +163,8 @@ acpi_status acpi_hw_clear_gpe(struct acpi_gpe_event_info * gpe_event_info)
 	 * Write a one to the appropriate bit in the status register to
 	 * clear this GPE.
 	 */
-	status = acpi_write(register_bit,
-			    &gpe_event_info->register_info->status_address);
+	status = acpi_hw_write(register_bit,
+			       &gpe_event_info->register_info->status_address);
 
 	return (status);
 }
@@ -222,7 +222,7 @@ acpi_hw_get_gpe_status(struct acpi_gpe_event_info * gpe_event_info,
 
 	/* GPE currently active (status bit == 1)? */
 
-	status = acpi_read(&in_byte, &gpe_register_info->status_address);
+	status = acpi_hw_read(&in_byte, &gpe_register_info->status_address);
 	if (ACPI_FAILURE(status)) {
 		goto unlock_and_exit;
 	}
@@ -266,8 +266,8 @@ acpi_hw_disable_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Disable all GPEs in this register */
 
 		status =
-		    acpi_write(0x00,
-			       &gpe_block->register_info[i].enable_address);
+		    acpi_hw_write(0x00,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -303,8 +303,8 @@ acpi_hw_clear_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 		/* Clear status on all GPEs in this register */
 
 		status =
-		    acpi_write(0xFF,
-			       &gpe_block->register_info[i].status_address);
+		    acpi_hw_write(0xFF,
+				  &gpe_block->register_info[i].status_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -345,9 +345,9 @@ acpi_hw_enable_runtime_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "runtime" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_run,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_run,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -387,9 +387,9 @@ acpi_hw_enable_wakeup_gpe_block(struct acpi_gpe_xrupt_info *gpe_xrupt_info,
 
 		/* Enable all "wake" GPEs in this register */
 
-		status = acpi_write(gpe_block->register_info[i].enable_for_wake,
-				    &gpe_block->register_info[i].
-				    enable_address);
+		status =
+		    acpi_hw_write(gpe_block->register_info[i].enable_for_wake,
+				  &gpe_block->register_info[i].enable_address);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index 23d5505cb1f7..15c9ed2be853 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -62,6 +62,184 @@ acpi_hw_write_multiple(u32 value,
 		       struct acpi_generic_address *register_a,
 		       struct acpi_generic_address *register_b);
 
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_validate_register
+ *
+ * PARAMETERS:  Reg                 - GAS register structure
+ *              max_bit_width       - Max bit_width supported (32 or 64)
+ *              Address             - Pointer to where the gas->address
+ *                                    is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Validate the contents of a GAS register. Checks the GAS
+ *              pointer, Address, space_id, bit_width, and bit_offset.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_hw_validate_register(struct acpi_generic_address *reg,
+			  u8 max_bit_width, u64 *address)
+{
+
+	/* Must have a valid pointer to a GAS structure */
+
+	if (!reg) {
+		return (AE_BAD_PARAMETER);
+	}
+
+	/*
+	 * Copy the target address. This handles possible alignment issues.
+	 * Address must not be null. A null address also indicates an optional
+	 * ACPI register that is not supported, so no error message.
+	 */
+	ACPI_MOVE_64_TO_64(address, &reg->address);
+	if (!(*address)) {
+		return (AE_BAD_ADDRESS);
+	}
+
+	/* Validate the space_iD */
+
+	if ((reg->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) &&
+	    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported address space: 0x%X", reg->space_id));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_width */
+
+	if ((reg->bit_width != 8) &&
+	    (reg->bit_width != 16) &&
+	    (reg->bit_width != 32) && (reg->bit_width != max_bit_width)) {
+		ACPI_ERROR((AE_INFO,
+			    "Unsupported register bit width: 0x%X",
+			    reg->bit_width));
+		return (AE_SUPPORT);
+	}
+
+	/* Validate the bit_offset. Just a warning for now. */
+
+	if (reg->bit_offset != 0) {
+		ACPI_WARNING((AE_INFO,
+			      "Unsupported register bit offset: 0x%X",
+			      reg->bit_offset));
+	}
+
+	return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_read
+ *
+ * PARAMETERS:  Value               - Where the value is returned
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Read from either memory or IO space. This is a 32-bit max
+ *              version of acpi_read, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ * LIMITATIONS: <These limitations also apply to acpi_hw_write>
+ *      bit_width must be exactly 8, 16, or 32.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_read);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/* Initialize entire 32-bit return value to zero */
+
+	*value = 0;
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  *value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_hw_write
+ *
+ * PARAMETERS:  Value               - Value to be written
+ *              Reg                 - GAS register structure
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Write to either memory or IO space. This is a 32-bit max
+ *              version of acpi_write, used internally since the overhead of
+ *              64-bit values is not needed.
+ *
+ ******************************************************************************/
+
+acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
+{
+	u64 address;
+	acpi_status status;
+
+	ACPI_FUNCTION_NAME(hw_write);
+
+	/* Validate contents of the GAS register */
+
+	status = acpi_hw_validate_register(reg, 32, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
+	}
+
+	/*
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
+	 */
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, value, reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
+	}
+
+	ACPI_DEBUG_PRINT((ACPI_DB_IO,
+			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
+			  acpi_ut_get_region_name(reg->space_id)));
+
+	return (status);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_hw_clear_acpi_status
@@ -152,15 +330,16 @@ acpi_status acpi_hw_write_pm1_control(u32 pm1a_control, u32 pm1b_control)
 
 	ACPI_FUNCTION_TRACE(hw_write_pm1_control);
 
-	status = acpi_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
+	status =
+	    acpi_hw_write(pm1a_control, &acpi_gbl_FADT.xpm1a_control_block);
 	if (ACPI_FAILURE(status)) {
 		return_ACPI_STATUS(status);
 	}
 
 	if (acpi_gbl_FADT.xpm1b_control_block.address) {
 		status =
-		    acpi_write(pm1b_control,
-			       &acpi_gbl_FADT.xpm1b_control_block);
+		    acpi_hw_write(pm1b_control,
+				  &acpi_gbl_FADT.xpm1b_control_block);
 	}
 	return_ACPI_STATUS(status);
 }
@@ -218,12 +397,13 @@ acpi_hw_register_read(u32 register_id, u32 * return_value)
 
 	case ACPI_REGISTER_PM2_CONTROL:	/* 8-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_read(&value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_read(&value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_read(&value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -340,7 +520,8 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		 * as per the ACPI spec.
 		 */
 		status =
-		    acpi_read(&read_value, &acpi_gbl_FADT.xpm2_control_block);
+		    acpi_hw_read(&read_value,
+				 &acpi_gbl_FADT.xpm2_control_block);
 		if (ACPI_FAILURE(status)) {
 			goto exit;
 		}
@@ -350,12 +531,13 @@ acpi_status acpi_hw_register_write(u32 register_id, u32 value)
 		ACPI_INSERT_BITS(value, ACPI_PM2_CONTROL_PRESERVED_BITS,
 				 read_value);
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm2_control_block);
+		status =
+		    acpi_hw_write(value, &acpi_gbl_FADT.xpm2_control_block);
 		break;
 
 	case ACPI_REGISTER_PM_TIMER:	/* 32-bit access */
 
-		status = acpi_write(value, &acpi_gbl_FADT.xpm_timer_block);
+		status = acpi_hw_write(value, &acpi_gbl_FADT.xpm_timer_block);
 		break;
 
 	case ACPI_REGISTER_SMI_COMMAND_BLOCK:	/* 8-bit access */
@@ -401,7 +583,7 @@ acpi_hw_read_multiple(u32 *value,
 
 	/* The first register is always required */
 
-	status = acpi_read(&value_a, register_a);
+	status = acpi_hw_read(&value_a, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -409,7 +591,7 @@ acpi_hw_read_multiple(u32 *value,
 	/* Second register is optional */
 
 	if (register_b->address) {
-		status = acpi_read(&value_b, register_b);
+		status = acpi_hw_read(&value_b, register_b);
 		if (ACPI_FAILURE(status)) {
 			return (status);
 		}
@@ -452,7 +634,7 @@ acpi_hw_write_multiple(u32 value,
 
 	/* The first register is always required */
 
-	status = acpi_write(value, register_a);
+	status = acpi_hw_write(value, register_a);
 	if (ACPI_FAILURE(status)) {
 		return (status);
 	}
@@ -470,7 +652,7 @@ acpi_hw_write_multiple(u32 value,
 	 * and writes have no side effects"
 	 */
 	if (register_b->address) {
-		status = acpi_write(value, register_b);
+		status = acpi_hw_write(value, register_b);
 	}
 
 	return (status);
diff --git a/drivers/acpi/acpica/hwtimer.c b/drivers/acpi/acpica/hwtimer.c
index b7f522c8f023..6b282e85d039 100644
--- a/drivers/acpi/acpica/hwtimer.c
+++ b/drivers/acpi/acpica/hwtimer.c
@@ -100,7 +100,7 @@ acpi_status acpi_get_timer(u32 * ticks)
 	}
 
 	status =
-	    acpi_hw_low_level_read(32, ticks, &acpi_gbl_FADT.xpm_timer_block);
+	    acpi_hw_read(ticks, &acpi_gbl_FADT.xpm_timer_block);
 
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/acpica/hwxface.c b/drivers/acpi/acpica/hwxface.c
index 9829979f2bdd..4ead85f29215 100644
--- a/drivers/acpi/acpica/hwxface.c
+++ b/drivers/acpi/acpica/hwxface.c
@@ -80,7 +80,7 @@ acpi_status acpi_reset(void)
 
 	/* Write the reset value to the reset register */
 
-	status = acpi_write(acpi_gbl_FADT.reset_value, reset_reg);
+	status = acpi_hw_write(acpi_gbl_FADT.reset_value, reset_reg);
 	return_ACPI_STATUS(status);
 }
 
@@ -97,67 +97,92 @@ ACPI_EXPORT_SYMBOL(acpi_reset)
  *
  * DESCRIPTION: Read from either memory or IO space.
  *
+ * LIMITATIONS: <These limitations also apply to acpi_write>
+ *      bit_width must be exactly 8, 16, 32, or 64.
+ *      space_iD must be system_memory or system_iO.
+ *      bit_offset and access_width are currently ignored, as there has
+ *          not been a need to implement these.
+ *
  ******************************************************************************/
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg)
+acpi_status acpi_read(u64 *return_value, struct acpi_generic_address *reg)
 {
+	u32 value;
 	u32 width;
 	u64 address;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(acpi_read);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
+	if (!return_value) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	/* Get a local copy of the address. Handles possible alignment issues */
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
-	/* Initialize entire 32-bit return value to zero */
+	/* Initialize entire 64-bit return value to zero */
 
-	*value = 0;
+	*return_value = 0;
+	value = 0;
 
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_read_memory((acpi_physical_address)
+					     address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
+
+		if (reg->bit_width == 64) {
 
-		status = acpi_os_read_memory((acpi_physical_address) address,
-					     value, width);
-		break;
+			/* Read the top 32 bits */
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+			status = acpi_os_read_memory((acpi_physical_address)
+						     (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status =
-		    acpi_hw_read_port((acpi_io_address) address, value, width);
-		break;
+		status = acpi_hw_read_port((acpi_io_address)
+					   address, &value, width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
+		*return_value = value;
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+
+			/* Read the top 32 bits */
+
+			status = acpi_hw_read_port((acpi_io_address)
+						   (address + 4), &value, 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+			*return_value |= ((u64)value << 32);
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Read:  %8.8X width %2d from %8.8X%8.8X (%s)\n",
-			  *value, width, ACPI_FORMAT_UINT64(address),
+			  "Read:  %8.8X%8.8X width %2d from %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(*return_value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
@@ -169,7 +194,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  *
  * FUNCTION:    acpi_write
  *
- * PARAMETERS:  Value               - To be written
+ * PARAMETERS:  Value               - Value to be written
  *              Reg                 - GAS register structure
  *
  * RETURN:      Status
@@ -177,7 +202,7 @@ ACPI_EXPORT_SYMBOL(acpi_read)
  * DESCRIPTION: Write to either memory or IO space.
  *
  ******************************************************************************/
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg)
 {
 	u32 width;
 	u64 address;
@@ -185,54 +210,61 @@ acpi_status acpi_write(u32 value, struct acpi_generic_address *reg)
 
 	ACPI_FUNCTION_NAME(acpi_write);
 
-	/*
-	 * Must have a valid pointer to a GAS structure, and a non-zero address
-	 * within.
-	 */
-	if (!reg) {
-		return (AE_BAD_PARAMETER);
-	}
+	/* Validate contents of the GAS register. Allow 64-bit transfers */
 
-	/* Get a local copy of the address. Handles possible alignment issues */
-
-	ACPI_MOVE_64_TO_64(&address, &reg->address);
-	if (!address) {
-		return (AE_BAD_ADDRESS);
+	status = acpi_hw_validate_register(reg, 64, &address);
+	if (ACPI_FAILURE(status)) {
+		return (status);
 	}
 
-	/* Supported widths are 8/16/32 */
-
 	width = reg->bit_width;
-	if ((width != 8) && (width != 16) && (width != 32)) {
-		return (AE_SUPPORT);
+	if (width == 64) {
+		width = 32;	/* Break into two 32-bit transfers */
 	}
 
 	/*
-	 * Two address spaces supported: Memory or IO.
-	 * PCI_Config is not supported here because the GAS struct is insufficient
+	 * Two address spaces supported: Memory or IO. PCI_Config is
+	 * not supported here because the GAS structure is insufficient
 	 */
-	switch (reg->space_id) {
-	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
-
-		status = acpi_os_write_memory((acpi_physical_address) address,
-					      value, width);
-		break;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, ACPI_LODWORD(value),
+					      width);
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	case ACPI_ADR_SPACE_SYSTEM_IO:
+		if (reg->bit_width == 64) {
+			status = acpi_os_write_memory((acpi_physical_address)
+						      (address + 4),
+						      ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
 
-		status = acpi_hw_write_port((acpi_io_address) address, value,
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, ACPI_LODWORD(value),
 					    width);
-		break;
+		if (ACPI_FAILURE(status)) {
+			return (status);
+		}
 
-	default:
-		ACPI_ERROR((AE_INFO,
-			    "Unsupported address space: %X", reg->space_id));
-		return (AE_BAD_PARAMETER);
+		if (reg->bit_width == 64) {
+			status = acpi_hw_write_port((acpi_io_address)
+						    (address + 4),
+						    ACPI_HIDWORD(value), 32);
+			if (ACPI_FAILURE(status)) {
+				return (status);
+			}
+		}
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
-			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, width, ACPI_FORMAT_UINT64(address),
+			  "Wrote: %8.8X%8.8X width %2d   to %8.8X%8.8X (%s)\n",
+			  ACPI_FORMAT_UINT64(value), reg->bit_width,
+			  ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 2aecaa5cc06c..b450a195319a 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -360,9 +360,9 @@ acpi_status acpi_set_firmware_waking_vector(u32 physical_address);
 acpi_status acpi_set_firmware_waking_vector64(u64 physical_address);
 #endif
 
-acpi_status acpi_read(u32 *value, struct acpi_generic_address *reg);
+acpi_status acpi_read(u64 *value, struct acpi_generic_address *reg);
 
-acpi_status acpi_write(u32 value, struct acpi_generic_address *reg);
+acpi_status acpi_write(u64 value, struct acpi_generic_address *reg);
 
 acpi_status
 acpi_get_sleep_type_data(u8 sleep_state, u8 * slp_typ_a, u8 * slp_typ_b);
-- 
cgit v1.2.3


From 15b8dd53f5ffaf8e2d9095c423f713423f576c0f Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:39:29 +0800
Subject: ACPICA: Major update for acpi_get_object_info external interface

Completed a major update for the acpi_get_object_info external interface.
Changes include:
 - Support for variable, unlimited length HID, UID, and CID strings
 - Support Processor objects the same as Devices (HID,UID,CID,ADR,STA, etc.)
 - Call the _SxW power methods on behalf of a device object
 - Determine if a device is a PCI root bridge
 - Change the ACPI_BUFFER parameter to ACPI_DEVICE_INFO.
These changes will require an update to all callers of this interface.
See the ACPICA Programmer Reference for details.

Also, update all invocations of acpi_get_object_info interface

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/hp/common/sba_iommu.c    |   7 +-
 drivers/acpi/acpi_memhotplug.c     |  11 +-
 drivers/acpi/acpica/Makefile       |   2 +-
 drivers/acpi/acpica/acconfig.h     |   5 +
 drivers/acpi/acpica/acglobal.h     |   3 +-
 drivers/acpi/acpica/acinterp.h     |   4 +-
 drivers/acpi/acpica/acutils.h      |  24 ++-
 drivers/acpi/acpica/evrgnini.c     |  45 +----
 drivers/acpi/acpica/exutils.c      |  53 +++--
 drivers/acpi/acpica/nsdumpdv.c     |   7 +-
 drivers/acpi/acpica/nsxfeval.c     |  23 ++-
 drivers/acpi/acpica/nsxfname.c     | 237 +++++++++++++++++------
 drivers/acpi/acpica/uteval.c       | 375 ++++--------------------------------
 drivers/acpi/acpica/utglobal.c     |  10 +-
 drivers/acpi/acpica/utids.c        | 382 +++++++++++++++++++++++++++++++++++++
 drivers/acpi/acpica/utmisc.c       |  28 +++
 drivers/acpi/container.c           |  11 +-
 drivers/acpi/dock.c                |   8 +-
 drivers/acpi/glue.c                |   6 +-
 drivers/acpi/scan.c                | 153 +++++++++------
 drivers/char/agp/hp-agp.c          |   9 +-
 drivers/ide/ide-acpi.c             |   5 +-
 drivers/pci/hotplug/acpiphp_ibm.c  |  12 +-
 drivers/platform/x86/sony-laptop.c |   7 +-
 drivers/pnp/pnpacpi/core.c         |   6 +-
 include/acpi/acpi_bus.h            |   8 +-
 include/acpi/acpixf.h              |   3 +-
 include/acpi/actypes.h             |  87 +++++----
 28 files changed, 901 insertions(+), 630 deletions(-)
 create mode 100644 drivers/acpi/acpica/utids.c

(limited to 'include')

diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
index 8cfb001092ab..674a8374c6d9 100644
--- a/arch/ia64/hp/common/sba_iommu.c
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -2026,24 +2026,21 @@ acpi_sba_ioc_add(struct acpi_device *device)
 	struct ioc *ioc;
 	acpi_status status;
 	u64 hpa, length;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *dev_info;
 
 	status = hp_acpi_csr_space(device->handle, &hpa, &length);
 	if (ACPI_FAILURE(status))
 		return 1;
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(device->handle, &buffer);
+	status = acpi_get_object_info(device->handle, &dev_info);
 	if (ACPI_FAILURE(status))
 		return 1;
-	dev_info = buffer.pointer;
 
 	/*
 	 * For HWP0001, only SBA appears in ACPI namespace.  It encloses the PCI
 	 * root bridges, and its CSR space includes the IOC function.
 	 */
-	if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
+	if (strncmp("HWP0001", dev_info->hardware_id.string, 7) == 0) {
 		hpa += ZX1_IOC_OFFSET;
 		/* zx1 based systems default to kernel page size iommu pages */
 		if (!iovp_shift)
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 9a62224cc278..80eacbe157e2 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -481,26 +481,23 @@ static acpi_status is_memory_device(acpi_handle handle)
 {
 	char *hardware_id;
 	acpi_status status;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 
-
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status))
 		return status;
 
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID)) {
-		kfree(buffer.pointer);
+		kfree(info);
 		return AE_ERROR;
 	}
 
-	hardware_id = info->hardware_id.value;
+	hardware_id = info->hardware_id.string;
 	if ((hardware_id == NULL) ||
 	    (strcmp(hardware_id, ACPI_MEMORY_DEVICE_HID)))
 		status = AE_ERROR;
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return status;
 }
 
diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile
index 72ac28da14e3..0e7d56185f6d 100644
--- a/drivers/acpi/acpica/Makefile
+++ b/drivers/acpi/acpica/Makefile
@@ -44,4 +44,4 @@ acpi-y += tbxface.o tbinstal.o tbutils.o tbfind.o tbfadt.o tbxfroot.o
 
 acpi-y += utalloc.o utdebug.o uteval.o utinit.o utmisc.o utxface.o \
 		utcopy.o utdelete.o utglobal.o utmath.o utobject.o \
-		utstate.o utmutex.o utobject.o utresrc.o utlock.o
+		utstate.o utmutex.o utobject.o utresrc.o utlock.o utids.o
diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index e6777fb883d2..6c1fb2d9f4d5 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -203,6 +203,11 @@
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
 
+/* _sx_d and _sx_w control methods */
+
+#define ACPI_NUM_sx_d_METHODS           4
+#define ACPI_NUM_sx_w_METHODS           5
+
 /******************************************************************************
  *
  * ACPI AML Debugger
diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h
index 0b73b31c1b53..6389f7c1de59 100644
--- a/drivers/acpi/acpica/acglobal.h
+++ b/drivers/acpi/acpica/acglobal.h
@@ -265,7 +265,8 @@ ACPI_EXTERN u8 acpi_gbl_osi_data;
 extern u8 acpi_gbl_shutdown;
 extern u32 acpi_gbl_startup_flags;
 extern const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT];
-extern const char *acpi_gbl_highest_dstate_names[4];
+extern const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS];
+extern const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS];
 extern const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES];
 extern const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS];
 
diff --git a/drivers/acpi/acpica/acinterp.h b/drivers/acpi/acpica/acinterp.h
index e8db7a3143a5..5db9f2916f7c 100644
--- a/drivers/acpi/acpica/acinterp.h
+++ b/drivers/acpi/acpica/acinterp.h
@@ -461,9 +461,9 @@ void acpi_ex_acquire_global_lock(u32 rule);
 
 void acpi_ex_release_global_lock(u32 rule);
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string);
+void acpi_ex_eisa_id_to_string(char *dest, acpi_integer compressed_id);
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string);
+void acpi_ex_integer_to_string(char *dest, acpi_integer value);
 
 /*
  * exregion - default op_region handlers
diff --git a/drivers/acpi/acpica/acutils.h b/drivers/acpi/acpica/acutils.h
index 897810ba0ccc..b0add85de308 100644
--- a/drivers/acpi/acpica/acutils.h
+++ b/drivers/acpi/acpica/acutils.h
@@ -324,26 +324,30 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address);
+				acpi_integer *value);
 
 acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid);
+acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 *status_flags);
 
 acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
-		    struct acpi_compatible_id_list **return_cid_list);
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values);
 
+/*
+ * utids - device ID support
+ */
 acpi_status
-acpi_ut_execute_STA(struct acpi_namespace_node *device_node,
-		    u32 * status_flags);
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id);
 
 acpi_status
 acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid);
+		    struct acpica_device_id **return_id);
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest);
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list);
 
 /*
  * utlock - reader/writer locks
@@ -445,6 +449,8 @@ acpi_ut_short_divide(acpi_integer in_dividend,
  */
 const char *acpi_ut_validate_exception(acpi_status status);
 
+u8 acpi_ut_is_pci_root_bridge(char *id);
+
 u8 acpi_ut_is_aml_table(struct acpi_table_header *table);
 
 acpi_status acpi_ut_allocate_owner_id(acpi_owner_id * owner_id);
diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c
index 284a7becbe96..cf29c4953028 100644
--- a/drivers/acpi/acpica/evrgnini.c
+++ b/drivers/acpi/acpica/evrgnini.c
@@ -50,8 +50,6 @@
 ACPI_MODULE_NAME("evrgnini")
 
 /* Local prototypes */
-static u8 acpi_ev_match_pci_root_bridge(char *id);
-
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node);
 
 /*******************************************************************************
@@ -330,37 +328,6 @@ acpi_ev_pci_config_region_setup(acpi_handle handle,
 	return_ACPI_STATUS(AE_OK);
 }
 
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ev_match_pci_root_bridge
- *
- * PARAMETERS:  Id              - The HID/CID in string format
- *
- * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
- *
- * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
- *
- ******************************************************************************/
-
-static u8 acpi_ev_match_pci_root_bridge(char *id)
-{
-
-	/*
-	 * Check if this is a PCI root.
-	 * ACPI 3.0+: check for a PCI Express root also.
-	 */
-	if (!(ACPI_STRNCMP(id,
-			   PCI_ROOT_HID_STRING,
-			   sizeof(PCI_ROOT_HID_STRING))) ||
-	    !(ACPI_STRNCMP(id,
-			   PCI_EXPRESS_ROOT_HID_STRING,
-			   sizeof(PCI_EXPRESS_ROOT_HID_STRING)))) {
-		return (TRUE);
-	}
-
-	return (FALSE);
-}
-
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ev_is_pci_root_bridge
@@ -377,9 +344,10 @@ static u8 acpi_ev_match_pci_root_bridge(char *id)
 static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 {
 	acpi_status status;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
+	u8 match;
 
 	/* Get the _HID and check for a PCI Root Bridge */
 
@@ -388,7 +356,10 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 		return (FALSE);
 	}
 
-	if (acpi_ev_match_pci_root_bridge(hid.value)) {
+	match = acpi_ut_is_pci_root_bridge(hid->string);
+	ACPI_FREE(hid);
+
+	if (match) {
 		return (TRUE);
 	}
 
@@ -402,7 +373,7 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node)
 	/* Check all _CIDs in the returned list */
 
 	for (i = 0; i < cid->count; i++) {
-		if (acpi_ev_match_pci_root_bridge(cid->id[i].value)) {
+		if (acpi_ut_is_pci_root_bridge(cid->ids[i].string)) {
 			ACPI_FREE(cid);
 			return (TRUE);
 		}
diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c
index 87730e944132..7d41f99f7052 100644
--- a/drivers/acpi/acpica/exutils.c
+++ b/drivers/acpi/acpica/exutils.c
@@ -358,50 +358,67 @@ static u32 acpi_ex_digits_needed(acpi_integer value, u32 base)
  *
  * FUNCTION:    acpi_ex_eisa_id_to_string
  *
- * PARAMETERS:  numeric_id      - EISA ID to be converted
+ * PARAMETERS:  compressed_id   - EISAID to be converted
  *              out_string      - Where to put the converted string (8 bytes)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Convert a numeric EISA ID to string representation
+ * DESCRIPTION: Convert a numeric EISAID to string representation. Return
+ *              buffer must be large enough to hold the string. The string
+ *              returned is always exactly of length ACPI_EISAID_STRING_SIZE
+ *              (includes null terminator). The EISAID is always 32 bits.
  *
  ******************************************************************************/
 
-void acpi_ex_eisa_id_to_string(u32 numeric_id, char *out_string)
+void acpi_ex_eisa_id_to_string(char *out_string, acpi_integer compressed_id)
 {
-	u32 eisa_id;
+	u32 swapped_id;
 
 	ACPI_FUNCTION_ENTRY();
 
+	/* The EISAID should be a 32-bit integer */
+
+	if (compressed_id > ACPI_UINT32_MAX) {
+		ACPI_WARNING((AE_INFO,
+			      "Expected EISAID is larger than 32 bits: 0x%8.8X%8.8X, truncating",
+			      ACPI_FORMAT_UINT64(compressed_id)));
+	}
+
 	/* Swap ID to big-endian to get contiguous bits */
 
-	eisa_id = acpi_ut_dword_byte_swap(numeric_id);
+	swapped_id = acpi_ut_dword_byte_swap((u32)compressed_id);
 
-	out_string[0] = (char)('@' + (((unsigned long)eisa_id >> 26) & 0x1f));
-	out_string[1] = (char)('@' + ((eisa_id >> 21) & 0x1f));
-	out_string[2] = (char)('@' + ((eisa_id >> 16) & 0x1f));
-	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 12);
-	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 8);
-	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 4);
-	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer) eisa_id, 0);
+	/* First 3 bytes are uppercase letters. Next 4 bytes are hexadecimal */
+
+	out_string[0] =
+	    (char)(0x40 + (((unsigned long)swapped_id >> 26) & 0x1F));
+	out_string[1] = (char)(0x40 + ((swapped_id >> 21) & 0x1F));
+	out_string[2] = (char)(0x40 + ((swapped_id >> 16) & 0x1F));
+	out_string[3] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 12);
+	out_string[4] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 8);
+	out_string[5] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 4);
+	out_string[6] = acpi_ut_hex_to_ascii_char((acpi_integer)swapped_id, 0);
 	out_string[7] = 0;
 }
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ex_unsigned_integer_to_string
+ * FUNCTION:    acpi_ex_integer_to_string
  *
- * PARAMETERS:  Value           - Value to be converted
- *              out_string      - Where to put the converted string (8 bytes)
+ * PARAMETERS:  out_string      - Where to put the converted string. At least
+ *                                21 bytes are needed to hold the largest
+ *                                possible 64-bit integer.
+ *              Value           - Value to be converted
  *
  * RETURN:      None, string
  *
- * DESCRIPTION: Convert a number to string representation. Assumes string
- *              buffer is large enough to hold the string.
+ * DESCRIPTION: Convert a 64-bit integer to decimal string representation.
+ *              Assumes string buffer is large enough to hold the string. The
+ *              largest string is (ACPI_MAX64_DECIMAL_DIGITS + 1).
  *
  ******************************************************************************/
 
-void acpi_ex_unsigned_integer_to_string(acpi_integer value, char *out_string)
+void acpi_ex_integer_to_string(char *out_string, acpi_integer value)
 {
 	u32 count;
 	u32 digits_needed;
diff --git a/drivers/acpi/acpica/nsdumpdv.c b/drivers/acpi/acpica/nsdumpdv.c
index 41994fe7fbb8..0fe87f1aef16 100644
--- a/drivers/acpi/acpica/nsdumpdv.c
+++ b/drivers/acpi/acpica/nsdumpdv.c
@@ -70,7 +70,6 @@ static acpi_status
 acpi_ns_dump_one_device(acpi_handle obj_handle,
 			u32 level, void *context, void **return_value)
 {
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	acpi_status status;
 	u32 i;
@@ -80,17 +79,15 @@ acpi_ns_dump_one_device(acpi_handle obj_handle,
 	status =
 	    acpi_ns_dump_one_object(obj_handle, level, context, return_value);
 
-	buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-	status = acpi_get_object_info(obj_handle, &buffer);
+	status = acpi_get_object_info(obj_handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		for (i = 0; i < level; i++) {
 			ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES, " "));
 		}
 
 		ACPI_DEBUG_PRINT_RAW((ACPI_DB_TABLES,
 				      "    HID: %s, ADR: %8.8X%8.8X, Status: %X\n",
-				      info->hardware_id.value,
+				      info->hardware_id.string,
 				      ACPI_FORMAT_UINT64(info->address),
 				      info->current_status));
 		ACPI_FREE(info);
diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c
index daf4ad37896d..4929dbdbc8f0 100644
--- a/drivers/acpi/acpica/nsxfeval.c
+++ b/drivers/acpi/acpica/nsxfeval.c
@@ -535,10 +535,11 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 	acpi_status status;
 	struct acpi_namespace_node *node;
 	u32 flags;
-	struct acpica_device_id hid;
-	struct acpi_compatible_id_list *cid;
+	struct acpica_device_id *hid;
+	struct acpica_device_id_list *cid;
 	u32 i;
-	int found;
+	u8 found;
+	int no_match;
 
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
@@ -582,10 +583,14 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 			return (AE_CTRL_DEPTH);
 		}
 
-		if (ACPI_STRNCMP(hid.value, info->hid, sizeof(hid.value)) != 0) {
-
-			/* Get the list of Compatible IDs */
+		no_match = ACPI_STRCMP(hid->string, info->hid);
+		ACPI_FREE(hid);
 
+		if (no_match) {
+			/*
+			 * HID does not match, attempt match within the
+			 * list of Compatible IDs (CIDs)
+			 */
 			status = acpi_ut_execute_CID(node, &cid);
 			if (status == AE_NOT_FOUND) {
 				return (AE_OK);
@@ -597,10 +602,8 @@ acpi_ns_get_device_callback(acpi_handle obj_handle,
 
 			found = 0;
 			for (i = 0; i < cid->count; i++) {
-				if (ACPI_STRNCMP(cid->id[i].value, info->hid,
-						 sizeof(struct
-							acpi_compatible_id)) ==
-				    0) {
+				if (ACPI_STRCMP(cid->ids[i].string, info->hid)
+				    == 0) {
 					found = 1;
 					break;
 				}
diff --git a/drivers/acpi/acpica/nsxfname.c b/drivers/acpi/acpica/nsxfname.c
index f23593d6add4..ddc84af6336e 100644
--- a/drivers/acpi/acpica/nsxfname.c
+++ b/drivers/acpi/acpica/nsxfname.c
@@ -51,6 +51,11 @@
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsxfname")
 
+/* Local prototypes */
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area);
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_get_handle
@@ -68,6 +73,7 @@ ACPI_MODULE_NAME("nsxfname")
  *              namespace handle.
  *
  ******************************************************************************/
+
 acpi_status
 acpi_get_handle(acpi_handle parent,
 		acpi_string pathname, acpi_handle * ret_handle)
@@ -208,12 +214,40 @@ acpi_get_name(acpi_handle handle, u32 name_type, struct acpi_buffer * buffer)
 
 ACPI_EXPORT_SYMBOL(acpi_get_name)
 
+/******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_copy_device_id
+ *
+ * PARAMETERS:  Dest                - Pointer to the destination DEVICE_ID
+ *              Source              - Pointer to the source DEVICE_ID
+ *              string_area         - Pointer to where to copy the dest string
+ *
+ * RETURN:      Pointer to the next string area
+ *
+ * DESCRIPTION: Copy a single DEVICE_ID, including the string data.
+ *
+ ******************************************************************************/
+static char *acpi_ns_copy_device_id(struct acpica_device_id *dest,
+				    struct acpica_device_id *source,
+				    char *string_area)
+{
+	/* Create the destination DEVICE_ID */
+
+	dest->string = string_area;
+	dest->length = source->length;
+
+	/* Copy actual string and return a pointer to the next string area */
+
+	ACPI_MEMCPY(string_area, source->string, source->length);
+	return (string_area + source->length);
+}
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_get_object_info
  *
- * PARAMETERS:  Handle          - Object Handle
- *              Buffer          - Where the info is returned
+ * PARAMETERS:  Handle              - Object Handle
+ *              return_buffer       - Where the info is returned
  *
  * RETURN:      Status
  *
@@ -221,33 +255,37 @@ ACPI_EXPORT_SYMBOL(acpi_get_name)
  *              namespace node and possibly by running several standard
  *              control methods (Such as in the case of a device.)
  *
+ * For Device and Processor objects, run the Device _HID, _UID, _CID, _STA,
+ * _ADR, _sx_w, and _sx_d methods.
+ *
+ * Note: Allocates the return buffer, must be freed by the caller.
+ *
  ******************************************************************************/
+
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer)
 {
-	acpi_status status;
 	struct acpi_namespace_node *node;
 	struct acpi_device_info *info;
-	struct acpi_device_info *return_info;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	acpi_size size;
+	struct acpica_device_id_list *cid_list = NULL;
+	struct acpica_device_id *hid = NULL;
+	struct acpica_device_id *uid = NULL;
+	char *next_id_string;
+	acpi_object_type type;
+	acpi_name name;
+	u8 param_count = 0;
+	u8 valid = 0;
+	u32 info_size;
+	u32 i;
+	acpi_status status;
 
 	/* Parameter validation */
 
-	if (!handle || !buffer) {
+	if (!handle || !return_buffer) {
 		return (AE_BAD_PARAMETER);
 	}
 
-	status = acpi_ut_validate_buffer(buffer);
-	if (ACPI_FAILURE(status)) {
-		return (status);
-	}
-
-	info = ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_device_info));
-	if (!info) {
-		return (AE_NO_MEMORY);
-	}
-
 	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
 		goto cleanup;
@@ -256,66 +294,91 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 	node = acpi_ns_map_handle_to_node(handle);
 	if (!node) {
 		(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
-		status = AE_BAD_PARAMETER;
-		goto cleanup;
+		return (AE_BAD_PARAMETER);
 	}
 
-	/* Init return structure */
-
-	size = sizeof(struct acpi_device_info);
+	/* Get the namespace node data while the namespace is locked */
 
-	info->type = node->type;
-	info->name = node->name.integer;
-	info->valid = 0;
+	info_size = sizeof(struct acpi_device_info);
+	type = node->type;
+	name = node->name.integer;
 
 	if (node->type == ACPI_TYPE_METHOD) {
-		info->param_count = node->object->method.param_count;
+		param_count = node->object->method.param_count;
 	}
 
 	status = acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		return (status);
 	}
 
-	/* If not a device, we are all done */
-
-	if (info->type == ACPI_TYPE_DEVICE) {
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
 		/*
-		 * Get extra info for ACPI Devices objects only:
-		 * Run the Device _HID, _UID, _CID, _STA, _ADR and _sx_d methods.
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the Device _HID, _UID, and _CID methods.
 		 *
 		 * Note: none of these methods are required, so they may or may
-		 * not be present for this device.  The Info->Valid bitfield is used
-		 * to indicate which methods were found and ran successfully.
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
 		 */
 
 		/* Execute the Device._HID method */
 
-		status = acpi_ut_execute_HID(node, &info->hardware_id);
+		status = acpi_ut_execute_HID(node, &hid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_HID;
+			info_size += hid->length;
+			valid |= ACPI_VALID_HID;
 		}
 
 		/* Execute the Device._UID method */
 
-		status = acpi_ut_execute_UID(node, &info->unique_id);
+		status = acpi_ut_execute_UID(node, &uid);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_UID;
+			info_size += uid->length;
+			valid |= ACPI_VALID_UID;
 		}
 
 		/* Execute the Device._CID method */
 
 		status = acpi_ut_execute_CID(node, &cid_list);
 		if (ACPI_SUCCESS(status)) {
-			size += cid_list->size;
-			info->valid |= ACPI_VALID_CID;
+
+			/* Add size of CID strings and CID pointer array */
+
+			info_size +=
+			    (cid_list->list_size -
+			     sizeof(struct acpica_device_id_list));
+			valid |= ACPI_VALID_CID;
 		}
+	}
+
+	/*
+	 * Now that we have the variable-length data, we can allocate the
+	 * return buffer
+	 */
+	info = ACPI_ALLOCATE_ZEROED(info_size);
+	if (!info) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Get the fixed-length data */
+
+	if ((type == ACPI_TYPE_DEVICE) || (type == ACPI_TYPE_PROCESSOR)) {
+		/*
+		 * Get extra info for ACPI Device/Processor objects only:
+		 * Run the _STA, _ADR and, sx_w, and _sx_d methods.
+		 *
+		 * Note: none of these methods are required, so they may or may
+		 * not be present for this device. The Info->Valid bitfield is used
+		 * to indicate which methods were found and run successfully.
+		 */
 
 		/* Execute the Device._STA method */
 
 		status = acpi_ut_execute_STA(node, &info->current_status);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_STA;
+			valid |= ACPI_VALID_STA;
 		}
 
 		/* Execute the Device._ADR method */
@@ -323,36 +386,100 @@ acpi_get_object_info(acpi_handle handle, struct acpi_buffer * buffer)
 		status = acpi_ut_evaluate_numeric_object(METHOD_NAME__ADR, node,
 							 &info->address);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_ADR;
+			valid |= ACPI_VALID_ADR;
+		}
+
+		/* Execute the Device._sx_w methods */
+
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_lowest_dstate_names,
+						       ACPI_NUM_sx_w_METHODS,
+						       info->lowest_dstates);
+		if (ACPI_SUCCESS(status)) {
+			valid |= ACPI_VALID_SXWS;
 		}
 
 		/* Execute the Device._sx_d methods */
 
-		status = acpi_ut_execute_sxds(node, info->highest_dstates);
+		status = acpi_ut_execute_power_methods(node,
+						       acpi_gbl_highest_dstate_names,
+						       ACPI_NUM_sx_d_METHODS,
+						       info->highest_dstates);
 		if (ACPI_SUCCESS(status)) {
-			info->valid |= ACPI_VALID_SXDS;
+			valid |= ACPI_VALID_SXDS;
 		}
 	}
 
-	/* Validate/Allocate/Clear caller buffer */
+	/*
+	 * Create a pointer to the string area of the return buffer.
+	 * Point to the end of the base struct acpi_device_info structure.
+	 */
+	next_id_string = ACPI_CAST_PTR(char, info->compatible_id_list.ids);
+	if (cid_list) {
 
-	status = acpi_ut_initialize_buffer(buffer, size);
-	if (ACPI_FAILURE(status)) {
-		goto cleanup;
+		/* Point past the CID DEVICE_ID array */
+
+		next_id_string +=
+		    ((acpi_size) cid_list->count *
+		     sizeof(struct acpica_device_id));
 	}
 
-	/* Populate the return buffer */
+	/*
+	 * Copy the HID, UID, and CIDs to the return buffer. The variable-length
+	 * strings are copied to the reserved area at the end of the buffer.
+	 *
+	 * For HID and CID, check if the ID is a PCI Root Bridge.
+	 */
+	if (hid) {
+		next_id_string = acpi_ns_copy_device_id(&info->hardware_id,
+							hid, next_id_string);
+
+		if (acpi_ut_is_pci_root_bridge(hid->string)) {
+			info->flags |= ACPI_PCI_ROOT_BRIDGE;
+		}
+	}
 
-	return_info = buffer->pointer;
-	ACPI_MEMCPY(return_info, info, sizeof(struct acpi_device_info));
+	if (uid) {
+		next_id_string = acpi_ns_copy_device_id(&info->unique_id,
+							uid, next_id_string);
+	}
 
 	if (cid_list) {
-		ACPI_MEMCPY(&return_info->compatibility_id, cid_list,
-			    cid_list->size);
+		info->compatible_id_list.count = cid_list->count;
+		info->compatible_id_list.list_size = cid_list->list_size;
+
+		/* Copy each CID */
+
+		for (i = 0; i < cid_list->count; i++) {
+			next_id_string =
+			    acpi_ns_copy_device_id(&info->compatible_id_list.
+						   ids[i], &cid_list->ids[i],
+						   next_id_string);
+
+			if (acpi_ut_is_pci_root_bridge(cid_list->ids[i].string)) {
+				info->flags |= ACPI_PCI_ROOT_BRIDGE;
+			}
+		}
 	}
 
+	/* Copy the fixed-length data */
+
+	info->info_size = info_size;
+	info->type = type;
+	info->name = name;
+	info->param_count = param_count;
+	info->valid = valid;
+
+	*return_buffer = info;
+	status = AE_OK;
+
       cleanup:
-	ACPI_FREE(info);
+	if (hid) {
+		ACPI_FREE(hid);
+	}
+	if (uid) {
+		ACPI_FREE(uid);
+	}
 	if (cid_list) {
 		ACPI_FREE(cid_list);
 	}
diff --git a/drivers/acpi/acpica/uteval.c b/drivers/acpi/acpica/uteval.c
index 006b16c26017..5503307b8bb7 100644
--- a/drivers/acpi/acpica/uteval.c
+++ b/drivers/acpi/acpica/uteval.c
@@ -44,19 +44,10 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "acnamesp.h"
-#include "acinterp.h"
 
 #define _COMPONENT          ACPI_UTILITIES
 ACPI_MODULE_NAME("uteval")
 
-/* Local prototypes */
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length);
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid);
-
 /*
  * Strings supported by the _OSI predefined (internal) method.
  *
@@ -213,7 +204,7 @@ acpi_status acpi_osi_invalidate(char *interface)
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a namespace object and verifies the type of the
- *              return object.  Common code that simplifies accessing objects
+ *              return object. Common code that simplifies accessing objects
  *              that have required return objects of fixed types.
  *
  *              NOTE: Internal function, no parameter validation
@@ -298,7 +289,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 
 	if ((acpi_gbl_enable_interpreter_slack) && (!expected_return_btypes)) {
 		/*
-		 * We received a return object, but one was not expected.  This can
+		 * We received a return object, but one was not expected. This can
 		 * happen frequently if the "implicit return" feature is enabled.
 		 * Just delete the return object and return AE_OK.
 		 */
@@ -340,12 +331,12 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
  *
  * PARAMETERS:  object_name         - Object name to be evaluated
  *              device_node         - Node for the device
- *              Address             - Where the value is returned
+ *              Value               - Where the value is returned
  *
  * RETURN:      Status
  *
  * DESCRIPTION: Evaluates a numeric namespace object for a selected device
- *              and stores result in *Address.
+ *              and stores result in *Value.
  *
  *              NOTE: Internal function, no parameter validation
  *
@@ -354,7 +345,7 @@ acpi_ut_evaluate_object(struct acpi_namespace_node *prefix_node,
 acpi_status
 acpi_ut_evaluate_numeric_object(char *object_name,
 				struct acpi_namespace_node *device_node,
-				acpi_integer * address)
+				acpi_integer *value)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
@@ -369,295 +360,7 @@ acpi_ut_evaluate_numeric_object(char *object_name,
 
 	/* Get the returned Integer */
 
-	*address = obj_desc->integer.value;
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_copy_id_string
- *
- * PARAMETERS:  Destination         - Where to copy the string
- *              Source              - Source string
- *              max_length          - Length of the destination buffer
- *
- * RETURN:      None
- *
- * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
- *              Performs removal of a leading asterisk if present -- workaround
- *              for a known issue on a bunch of machines.
- *
- ******************************************************************************/
-
-static void
-acpi_ut_copy_id_string(char *destination, char *source, acpi_size max_length)
-{
-
-	/*
-	 * Workaround for ID strings that have a leading asterisk. This construct
-	 * is not allowed by the ACPI specification  (ID strings must be
-	 * alphanumeric), but enough existing machines have this embedded in their
-	 * ID strings that the following code is useful.
-	 */
-	if (*source == '*') {
-		source++;
-	}
-
-	/* Do the actual copy */
-
-	ACPI_STRNCPY(destination, source, max_length);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_HID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Hid                 - Where the HID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _HID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *hid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_HID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric HID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  hid->value);
-	} else {
-		/* Copy the String HID from the returned object */
-
-		acpi_ut_copy_id_string(hid->value, obj_desc->string.pointer,
-				       sizeof(hid->value));
-	}
-
-	/* On exit, we must delete the return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_translate_one_cid
- *
- * PARAMETERS:  obj_desc            - _CID object, must be integer or string
- *              one_cid             - Where the CID string is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Return a numeric or string _CID value as a string.
- *              (Compatible ID)
- *
- *              NOTE:  Assumes a maximum _CID string length of
- *                     ACPI_MAX_CID_LENGTH.
- *
- ******************************************************************************/
-
-static acpi_status
-acpi_ut_translate_one_cid(union acpi_operand_object *obj_desc,
-			  struct acpi_compatible_id *one_cid)
-{
-
-	switch (obj_desc->common.type) {
-	case ACPI_TYPE_INTEGER:
-
-		/* Convert the Numeric CID to string */
-
-		acpi_ex_eisa_id_to_string((u32) obj_desc->integer.value,
-					  one_cid->value);
-		return (AE_OK);
-
-	case ACPI_TYPE_STRING:
-
-		if (obj_desc->string.length > ACPI_MAX_CID_LENGTH) {
-			return (AE_AML_STRING_LIMIT);
-		}
-
-		/* Copy the String CID from the returned object */
-
-		acpi_ut_copy_id_string(one_cid->value, obj_desc->string.pointer,
-				       ACPI_MAX_CID_LENGTH);
-		return (AE_OK);
-
-	default:
-
-		return (AE_TYPE);
-	}
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_CID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              return_cid_list     - Where the CID list is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _CID control method that returns one or more
- *              compatible hardware IDs for the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_CID(struct acpi_namespace_node * device_node,
-		    struct acpi_compatible_id_list ** return_cid_list)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id_list *cid_list;
-	u32 i;
-
-	ACPI_FUNCTION_TRACE(ut_execute_CID);
-
-	/* Evaluate the _CID method for this device */
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
-					 | ACPI_BTYPE_PACKAGE, &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	/* Get the number of _CIDs returned */
-
-	count = 1;
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-		count = obj_desc->package.count;
-	}
-
-	/* Allocate a worst-case buffer for the _CIDs */
-
-	size = (((count - 1) * sizeof(struct acpi_compatible_id)) +
-		sizeof(struct acpi_compatible_id_list));
-
-	cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-	if (!cid_list) {
-		return_ACPI_STATUS(AE_NO_MEMORY);
-	}
-
-	/* Init CID list */
-
-	cid_list->count = count;
-	cid_list->size = size;
-
-	/*
-	 *  A _CID can return either a single compatible ID or a package of
-	 *  compatible IDs.  Each compatible ID can be one of the following:
-	 *  1) Integer (32 bit compressed EISA ID) or
-	 *  2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
-	 */
-
-	/* The _CID object can be either a single CID or a package (list) of CIDs */
-
-	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
-
-		/* Translate each package element */
-
-		for (i = 0; i < count; i++) {
-			status =
-			    acpi_ut_translate_one_cid(obj_desc->package.
-						      elements[i],
-						      &cid_list->id[i]);
-			if (ACPI_FAILURE(status)) {
-				break;
-			}
-		}
-	} else {
-		/* Only one CID, translate to a string */
-
-		status = acpi_ut_translate_one_cid(obj_desc, cid_list->id);
-	}
-
-	/* Cleanup on error */
-
-	if (ACPI_FAILURE(status)) {
-		ACPI_FREE(cid_list);
-	} else {
-		*return_cid_list = cid_list;
-	}
-
-	/* On exit, we must delete the _CID return object */
-
-	acpi_ut_remove_reference(obj_desc);
-	return_ACPI_STATUS(status);
-}
-
-/*******************************************************************************
- *
- * FUNCTION:    acpi_ut_execute_UID
- *
- * PARAMETERS:  device_node         - Node for the device
- *              Uid                 - Where the UID is returned
- *
- * RETURN:      Status
- *
- * DESCRIPTION: Executes the _UID control method that returns the hardware
- *              ID of the device.
- *
- *              NOTE: Internal function, no parameter validation
- *
- ******************************************************************************/
-
-acpi_status
-acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
-		    struct acpica_device_id *uid)
-{
-	union acpi_operand_object *obj_desc;
-	acpi_status status;
-
-	ACPI_FUNCTION_TRACE(ut_execute_UID);
-
-	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
-					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
-					 &obj_desc);
-	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
-	}
-
-	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
-
-		/* Convert the Numeric UID to string */
-
-		acpi_ex_unsigned_integer_to_string(obj_desc->integer.value,
-						   uid->value);
-	} else {
-		/* Copy the String UID from the returned object */
-
-		acpi_ut_copy_id_string(uid->value, obj_desc->string.pointer,
-				       sizeof(uid->value));
-	}
+	*value = obj_desc->integer.value;
 
 	/* On exit, we must delete the return object */
 
@@ -716,60 +419,64 @@ acpi_ut_execute_STA(struct acpi_namespace_node *device_node, u32 * flags)
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_ut_execute_Sxds
+ * FUNCTION:    acpi_ut_execute_power_methods
  *
  * PARAMETERS:  device_node         - Node for the device
- *              Flags               - Where the status flags are returned
+ *              method_names        - Array of power method names
+ *              method_count        - Number of methods to execute
+ *              out_values          - Where the power method values are returned
  *
- * RETURN:      Status
+ * RETURN:      Status, out_values
  *
- * DESCRIPTION: Executes _STA for selected device and stores results in
- *              *Flags.
+ * DESCRIPTION: Executes the specified power methods for the device and returns
+ *              the result(s).
  *
  *              NOTE: Internal function, no parameter validation
  *
- ******************************************************************************/
+******************************************************************************/
 
 acpi_status
-acpi_ut_execute_sxds(struct acpi_namespace_node *device_node, u8 * highest)
+acpi_ut_execute_power_methods(struct acpi_namespace_node *device_node,
+			      const char **method_names,
+			      u8 method_count, u8 *out_values)
 {
 	union acpi_operand_object *obj_desc;
 	acpi_status status;
+	acpi_status final_status = AE_NOT_FOUND;
 	u32 i;
 
-	ACPI_FUNCTION_TRACE(ut_execute_sxds);
+	ACPI_FUNCTION_TRACE(ut_execute_power_methods);
 
-	for (i = 0; i < 4; i++) {
-		highest[i] = 0xFF;
+	for (i = 0; i < method_count; i++) {
+		/*
+		 * Execute the power method (_sx_d or _sx_w). The only allowable
+		 * return type is an Integer.
+		 */
 		status = acpi_ut_evaluate_object(device_node,
 						 ACPI_CAST_PTR(char,
-							       acpi_gbl_highest_dstate_names
-							       [i]),
+							       method_names[i]),
 						 ACPI_BTYPE_INTEGER, &obj_desc);
-		if (ACPI_FAILURE(status)) {
-			if (status != AE_NOT_FOUND) {
-				ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-						  "%s on Device %4.4s, %s\n",
-						  ACPI_CAST_PTR(char,
-								acpi_gbl_highest_dstate_names
-								[i]),
-						  acpi_ut_get_node_name
-						  (device_node),
-						  acpi_format_exception
-						  (status)));
-
-				return_ACPI_STATUS(status);
-			}
-		} else {
-			/* Extract the Dstate value */
-
-			highest[i] = (u8) obj_desc->integer.value;
+		if (ACPI_SUCCESS(status)) {
+			out_values[i] = (u8)obj_desc->integer.value;
 
 			/* Delete the return object */
 
 			acpi_ut_remove_reference(obj_desc);
+			final_status = AE_OK;	/* At least one value is valid */
+			continue;
 		}
+
+		out_values[i] = ACPI_UINT8_MAX;
+		if (status == AE_NOT_FOUND) {
+			continue;	/* Ignore if not found */
+		}
+
+		ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+				  "Failed %s on Device %4.4s, %s\n",
+				  ACPI_CAST_PTR(char, method_names[i]),
+				  acpi_ut_get_node_name(device_node),
+				  acpi_format_exception(status)));
 	}
 
-	return_ACPI_STATUS(AE_OK);
+	return_ACPI_STATUS(final_status);
 }
diff --git a/drivers/acpi/acpica/utglobal.c b/drivers/acpi/acpica/utglobal.c
index 59e46f257c02..ed7a33c67fbe 100644
--- a/drivers/acpi/acpica/utglobal.c
+++ b/drivers/acpi/acpica/utglobal.c
@@ -90,7 +90,15 @@ const char *acpi_gbl_sleep_state_names[ACPI_S_STATE_COUNT] = {
 	"\\_S5_"
 };
 
-const char *acpi_gbl_highest_dstate_names[4] = {
+const char *acpi_gbl_lowest_dstate_names[ACPI_NUM_sx_w_METHODS] = {
+	"_S0W",
+	"_S1W",
+	"_S2W",
+	"_S3W",
+	"_S4W"
+};
+
+const char *acpi_gbl_highest_dstate_names[ACPI_NUM_sx_d_METHODS] = {
 	"_S1D",
 	"_S2D",
 	"_S3D",
diff --git a/drivers/acpi/acpica/utids.c b/drivers/acpi/acpica/utids.c
new file mode 100644
index 000000000000..52eaae404554
--- /dev/null
+++ b/drivers/acpi/acpica/utids.c
@@ -0,0 +1,382 @@
+/******************************************************************************
+ *
+ * Module Name: utids - support for device IDs - HID, UID, CID
+ *
+ *****************************************************************************/
+
+/*
+ * Copyright (C) 2000 - 2009, Intel Corp.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions, and the following disclaimer,
+ *    without modification.
+ * 2. Redistributions in binary form must reproduce at minimum a disclaimer
+ *    substantially similar to the "NO WARRANTY" disclaimer below
+ *    ("Disclaimer") and any redistribution must be conditioned upon
+ *    including a substantially similar Disclaimer requirement for further
+ *    binary redistribution.
+ * 3. Neither the names of the above-listed copyright holders nor the names
+ *    of any contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * NO WARRANTY
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGES.
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acinterp.h"
+
+#define _COMPONENT          ACPI_UTILITIES
+ACPI_MODULE_NAME("utids")
+
+/* Local prototypes */
+static void acpi_ut_copy_id_string(char *destination, char *source);
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_copy_id_string
+ *
+ * PARAMETERS:  Destination         - Where to copy the string
+ *              Source              - Source string
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Copies an ID string for the _HID, _CID, and _UID methods.
+ *              Performs removal of a leading asterisk if present -- workaround
+ *              for a known issue on a bunch of machines.
+ *
+ ******************************************************************************/
+
+static void acpi_ut_copy_id_string(char *destination, char *source)
+{
+
+	/*
+	 * Workaround for ID strings that have a leading asterisk. This construct
+	 * is not allowed by the ACPI specification  (ID strings must be
+	 * alphanumeric), but enough existing machines have this embedded in their
+	 * ID strings that the following code is useful.
+	 */
+	if (*source == '*') {
+		source++;
+	}
+
+	/* Do the actual copy */
+
+	ACPI_STRCPY(destination, source);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_HID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string HID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _HID control method that returns the hardware
+ *              ID of the device. The HID is either an 32-bit encoded EISAID
+ *              Integer or a String. A string is always returned. An EISAID
+ *              is converted to a string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_HID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *hid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_HID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__HID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_EISAID_STRING_SIZE;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the HID */
+
+	hid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!hid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	hid->string = ACPI_ADD_PTR(char, hid, sizeof(struct acpica_device_id));
+
+	/* Convert EISAID to a string or simply copy existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_eisa_id_to_string(hid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(hid->string, obj_desc->string.pointer);
+	}
+
+	hid->length = length;
+	*return_id = hid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_UID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_id           - Where the string UID is returned
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Executes the _UID control method that returns the unique
+ *              ID of the device. The UID is either a 64-bit Integer (NOT an
+ *              EISAID) or a string. Always returns a string. A 64-bit integer
+ *              is converted to a decimal string.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_UID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id **return_id)
+{
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id *uid;
+	u32 length;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(ut_execute_UID);
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__UID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING,
+					 &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Get the size of the String to be returned, includes null terminator */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		length = ACPI_MAX64_DECIMAL_DIGITS + 1;
+	} else {
+		length = obj_desc->string.length + 1;
+	}
+
+	/* Allocate a buffer for the UID */
+
+	uid =
+	    ACPI_ALLOCATE_ZEROED(sizeof(struct acpica_device_id) +
+				 (acpi_size) length);
+	if (!uid) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for the string starts after DEVICE_ID struct */
+
+	uid->string = ACPI_ADD_PTR(char, uid, sizeof(struct acpica_device_id));
+
+	/* Convert an Integer to string, or just copy an existing string */
+
+	if (obj_desc->common.type == ACPI_TYPE_INTEGER) {
+		acpi_ex_integer_to_string(uid->string, obj_desc->integer.value);
+	} else {
+		acpi_ut_copy_id_string(uid->string, obj_desc->string.pointer);
+	}
+
+	uid->length = length;
+	*return_id = uid;
+
+cleanup:
+
+	/* On exit, we must delete the return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_execute_CID
+ *
+ * PARAMETERS:  device_node         - Node for the device
+ *              return_cid_list     - Where the CID list is returned
+ *
+ * RETURN:      Status, list of CID strings
+ *
+ * DESCRIPTION: Executes the _CID control method that returns one or more
+ *              compatible hardware IDs for the device.
+ *
+ *              NOTE: Internal function, no parameter validation
+ *
+ * A _CID method can return either a single compatible ID or a package of
+ * compatible IDs. Each compatible ID can be one of the following:
+ * 1) Integer (32 bit compressed EISA ID) or
+ * 2) String (PCI ID format, e.g. "PCI\VEN_vvvv&DEV_dddd&SUBSYS_ssssssss")
+ *
+ * The Integer CIDs are converted to string format by this function.
+ *
+ ******************************************************************************/
+
+acpi_status
+acpi_ut_execute_CID(struct acpi_namespace_node *device_node,
+		    struct acpica_device_id_list **return_cid_list)
+{
+	union acpi_operand_object **cid_objects;
+	union acpi_operand_object *obj_desc;
+	struct acpica_device_id_list *cid_list;
+	char *next_id_string;
+	u32 string_area_size;
+	u32 length;
+	u32 cid_list_size;
+	acpi_status status;
+	u32 count;
+	u32 i;
+
+	ACPI_FUNCTION_TRACE(ut_execute_CID);
+
+	/* Evaluate the _CID method for this device */
+
+	status = acpi_ut_evaluate_object(device_node, METHOD_NAME__CID,
+					 ACPI_BTYPE_INTEGER | ACPI_BTYPE_STRING
+					 | ACPI_BTYPE_PACKAGE, &obj_desc);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/*
+	 * Get the count and size of the returned _CIDs. _CID can return either
+	 * a Package of Integers/Strings or a single Integer or String.
+	 * Note: This section also validates that all CID elements are of the
+	 * correct type (Integer or String).
+	 */
+	if (obj_desc->common.type == ACPI_TYPE_PACKAGE) {
+		count = obj_desc->package.count;
+		cid_objects = obj_desc->package.elements;
+	} else {		/* Single Integer or String CID */
+
+		count = 1;
+		cid_objects = &obj_desc;
+	}
+
+	string_area_size = 0;
+	for (i = 0; i < count; i++) {
+
+		/* String lengths include null terminator */
+
+		switch (cid_objects[i]->common.type) {
+		case ACPI_TYPE_INTEGER:
+			string_area_size += ACPI_EISAID_STRING_SIZE;
+			break;
+
+		case ACPI_TYPE_STRING:
+			string_area_size += cid_objects[i]->string.length + 1;
+			break;
+
+		default:
+			status = AE_TYPE;
+			goto cleanup;
+		}
+	}
+
+	/*
+	 * Now that we know the length of the CIDs, allocate return buffer:
+	 * 1) Size of the base structure +
+	 * 2) Size of the CID DEVICE_ID array +
+	 * 3) Size of the actual CID strings
+	 */
+	cid_list_size = sizeof(struct acpica_device_id_list) +
+	    ((count - 1) * sizeof(struct acpica_device_id)) + string_area_size;
+
+	cid_list = ACPI_ALLOCATE_ZEROED(cid_list_size);
+	if (!cid_list) {
+		status = AE_NO_MEMORY;
+		goto cleanup;
+	}
+
+	/* Area for CID strings starts after the CID DEVICE_ID array */
+
+	next_id_string = ACPI_CAST_PTR(char, cid_list->ids) +
+	    ((acpi_size) count * sizeof(struct acpica_device_id));
+
+	/* Copy/convert the CIDs to the return buffer */
+
+	for (i = 0; i < count; i++) {
+		if (cid_objects[i]->common.type == ACPI_TYPE_INTEGER) {
+
+			/* Convert the Integer (EISAID) CID to a string */
+
+			acpi_ex_eisa_id_to_string(next_id_string,
+						  cid_objects[i]->integer.
+						  value);
+			length = ACPI_EISAID_STRING_SIZE;
+		} else {	/* ACPI_TYPE_STRING */
+
+			/* Copy the String CID from the returned object */
+
+			acpi_ut_copy_id_string(next_id_string,
+					       cid_objects[i]->string.pointer);
+			length = cid_objects[i]->string.length + 1;
+		}
+
+		cid_list->ids[i].string = next_id_string;
+		cid_list->ids[i].length = length;
+		next_id_string += length;
+	}
+
+	/* Finish the CID list */
+
+	cid_list->count = count;
+	cid_list->list_size = cid_list_size;
+	*return_cid_list = cid_list;
+
+cleanup:
+
+	/* On exit, we must delete the _CID return object */
+
+	acpi_ut_remove_reference(obj_desc);
+	return_ACPI_STATUS(status);
+}
diff --git a/drivers/acpi/acpica/utmisc.c b/drivers/acpi/acpica/utmisc.c
index fbe782348b0b..9cd65334ca75 100644
--- a/drivers/acpi/acpica/utmisc.c
+++ b/drivers/acpi/acpica/utmisc.c
@@ -118,6 +118,34 @@ const char *acpi_ut_validate_exception(acpi_status status)
 	return (ACPI_CAST_PTR(const char, exception));
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ut_is_pci_root_bridge
+ *
+ * PARAMETERS:  Id              - The HID/CID in string format
+ *
+ * RETURN:      TRUE if the Id is a match for a PCI/PCI-Express Root Bridge
+ *
+ * DESCRIPTION: Determine if the input ID is a PCI Root Bridge ID.
+ *
+ ******************************************************************************/
+
+u8 acpi_ut_is_pci_root_bridge(char *id)
+{
+
+	/*
+	 * Check if this is a PCI root bridge.
+	 * ACPI 3.0+: check for a PCI Express root also.
+	 */
+	if (!(ACPI_STRCMP(id,
+			  PCI_ROOT_HID_STRING)) ||
+	    !(ACPI_STRCMP(id, PCI_EXPRESS_ROOT_HID_STRING))) {
+		return (TRUE);
+	}
+
+	return (FALSE);
+}
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ut_is_aml_table
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf83641a..2aee8c24dc56 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -200,20 +200,17 @@ container_walk_namespace_cb(acpi_handle handle,
 			    u32 lvl, void *context, void **rv)
 {
 	char *hid = NULL;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_device_info *info;
 	acpi_status status;
 	int *action = context;
 
-
-	status = acpi_get_object_info(handle, &buffer);
-	if (ACPI_FAILURE(status) || !buffer.pointer) {
+	status = acpi_get_object_info(handle, &info);
+	if (ACPI_FAILURE(status)) {
 		return AE_OK;
 	}
 
-	info = buffer.pointer;
 	if (info->valid & ACPI_VALID_HID)
-		hid = info->hardware_id.value;
+		hid = info->hardware_id.string;
 
 	if (hid == NULL) {
 		goto end;
@@ -240,7 +237,7 @@ container_walk_namespace_cb(acpi_handle handle,
 	}
 
       end:
-	kfree(buffer.pointer);
+	kfree(info);
 
 	return AE_OK;
 }
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d6c8a9..39536b80bce7 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -231,18 +231,16 @@ static int is_ata(acpi_handle handle)
 static int is_battery(acpi_handle handle)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
 	int ret = 1;
 
-	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &buffer)))
+	if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info)))
 		return 0;
-	info = buffer.pointer;
 	if (!(info->valid & ACPI_VALID_HID))
 		ret = 0;
 	else
-		ret = !strcmp("PNP0C0A", info->hardware_id.value);
+		ret = !strcmp("PNP0C0A", info->hardware_id.string);
 
-	kfree(buffer.pointer);
+	kfree(info);
 	return ret;
 }
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29958c8..27a7072347ea 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -93,15 +93,13 @@ do_acpi_find_child(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
 	acpi_status status;
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_find_child *find = context;
 
-	status = acpi_get_object_info(handle, &buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_SUCCESS(status)) {
-		info = buffer.pointer;
 		if (info->address == find->address)
 			find->handle = handle;
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 	return AE_OK;
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..0ab526de7c55 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -60,13 +60,13 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	}
 
 	if (acpi_dev->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list;
+		struct acpica_device_id_list *cid_list;
 		int i;
 
 		cid_list = acpi_dev->pnp.cid_list;
 		for (i = 0; i < cid_list->count; i++) {
 			count = snprintf(&modalias[len], size, "%s:",
-					 cid_list->id[i].value);
+					 cid_list->ids[i].string);
 			if (count < 0 || count >= size) {
 				printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size",
 				       acpi_dev->pnp.device_name, i);
@@ -287,14 +287,14 @@ int acpi_match_device_ids(struct acpi_device *device,
 	}
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (id = ids; id->id[0]; id++) {
 			/* compare multiple _CID entries against driver ids */
 			for (i = 0; i < cid_list->count; i++) {
 				if (!strcmp((char*)id->id,
-					    cid_list->id[i].value))
+					    cid_list->ids[i].string))
 					return 0;
 			}
 		}
@@ -999,33 +999,89 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
+static struct acpica_device_id_list*
+acpi_add_cid(
+	struct acpi_device_info         *info,
+	struct acpica_device_id         *new_cid)
+{
+	struct acpica_device_id_list    *cid;
+	char                            *next_id_string;
+	acpi_size                       cid_length;
+	acpi_size                       new_cid_length;
+	u32                             i;
+
+
+	/* Allocate new CID list with room for the new CID */
+
+	if (!new_cid)
+		new_cid_length = info->compatible_id_list.list_size;
+	else if (info->compatible_id_list.list_size)
+		new_cid_length = info->compatible_id_list.list_size +
+			new_cid->length + sizeof(struct acpica_device_id);
+	else
+		new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length;
+
+	cid = ACPI_ALLOCATE_ZEROED(new_cid_length);
+	if (!cid) {
+		return NULL;
+	}
+
+	cid->list_size = new_cid_length;
+	cid->count = info->compatible_id_list.count;
+	if (new_cid)
+		cid->count++;
+	next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id));
+
+	/* Copy all existing CIDs */
+
+	for (i = 0; i < info->compatible_id_list.count; i++) {
+		cid_length = info->compatible_id_list.ids[i].length;
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = cid_length;
+
+		ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string,
+			cid_length);
+
+		next_id_string += cid_length;
+	}
+
+	/* Append the new CID */
+
+	if (new_cid) {
+		cid->ids[i].string = next_id_string;
+		cid->ids[i].length = new_cid->length;
+
+		ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length);
+	}
+
+	return cid;
+}
+
 static void acpi_device_set_id(struct acpi_device *device,
 			       struct acpi_device *parent, acpi_handle handle,
 			       int type)
 {
-	struct acpi_device_info *info;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_device_info *info = NULL;
 	char *hid = NULL;
 	char *uid = NULL;
-	struct acpi_compatible_id_list *cid_list = NULL;
-	const char *cid_add = NULL;
+	struct acpica_device_id_list *cid_list = NULL;
+	char *cid_add = NULL;
 	acpi_status status;
 
 	switch (type) {
 	case ACPI_BUS_TYPE_DEVICE:
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__);
 			return;
 		}
 
-		info = buffer.pointer;
 		if (info->valid & ACPI_VALID_HID)
-			hid = info->hardware_id.value;
+			hid = info->hardware_id.string;
 		if (info->valid & ACPI_VALID_UID)
-			uid = info->unique_id.value;
+			uid = info->unique_id.string;
 		if (info->valid & ACPI_VALID_CID)
-			cid_list = &info->compatibility_id;
+			cid_list = &info->compatible_id_list;
 		if (info->valid & ACPI_VALID_ADR) {
 			device->pnp.bus_address = info->address;
 			device->flags.bus_address = 1;
@@ -1076,55 +1132,44 @@ static void acpi_device_set_id(struct acpi_device *device,
 	}
 
 	if (hid) {
-		strcpy(device->pnp.hardware_id, hid);
-		device->flags.hardware_id = 1;
-	}
+		device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1);
+		if (device->pnp.hardware_id) {
+			strcpy(device->pnp.hardware_id, hid);
+			device->flags.hardware_id = 1;
+		}
+	} else
+		device->pnp.hardware_id = NULL;
+
 	if (uid) {
-		strcpy(device->pnp.unique_id, uid);
-		device->flags.unique_id = 1;
-	}
+		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
+		if (device->pnp.unique_id) {
+			strcpy(device->pnp.unique_id, uid);
+			device->flags.unique_id = 1;
+		}
+	} else
+		device->pnp.unique_id = NULL;
+
 	if (cid_list || cid_add) {
-		struct  acpi_compatible_id_list *list;
-		int size = 0;
-		int count = 0;
-
-		if (cid_list) {
-			size = cid_list->size;
-		} else if (cid_add) {
-			size = sizeof(struct acpi_compatible_id_list);
-			cid_list = ACPI_ALLOCATE_ZEROED((acpi_size) size);
-			if (!cid_list) {
-				printk(KERN_ERR "Memory allocation error\n");
-				kfree(buffer.pointer);
-				return;
-			} else {
-				cid_list->count = 0;
-				cid_list->size = size;
-			}
+		struct acpica_device_id_list *list;
+
+		if (cid_add) {
+			struct acpica_device_id cid;
+			cid.length = strlen (cid_add) + 1;
+			cid.string = cid_add;
+
+			list = acpi_add_cid(info, &cid);
+		} else {
+			list = acpi_add_cid(info, NULL);
 		}
-		if (cid_add)
-			size += sizeof(struct acpi_compatible_id);
-		list = kmalloc(size, GFP_KERNEL);
 
 		if (list) {
-			if (cid_list) {
-				memcpy(list, cid_list, cid_list->size);
-				count = cid_list->count;
-			}
-			if (cid_add) {
-				strncpy(list->id[count].value, cid_add,
-					ACPI_MAX_CID_LENGTH);
-				count++;
-				device->flags.compatible_ids = 1;
-			}
-			list->size = size;
-			list->count = count;
 			device->pnp.cid_list = list;
-		} else
-			printk(KERN_ERR PREFIX "Memory allocation error\n");
+			if (cid_add)
+				device->flags.compatible_ids = 1;
+		}
 	}
 
-	kfree(buffer.pointer);
+	kfree(info);
 }
 
 static int acpi_device_set_context(struct acpi_device *device, int type)
diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c
index 8f3d4c184914..7bead4c816ca 100644
--- a/drivers/char/agp/hp-agp.c
+++ b/drivers/char/agp/hp-agp.c
@@ -478,7 +478,6 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 {
 	acpi_handle handle, parent;
 	acpi_status status;
-	struct acpi_buffer buffer;
 	struct acpi_device_info *info;
 	u64 lba_hpa, sba_hpa, length;
 	int match;
@@ -490,13 +489,11 @@ zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret)
 	/* Look for an enclosing IOC scope and find its CSR space */
 	handle = obj;
 	do {
-		buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
-		status = acpi_get_object_info(handle, &buffer);
+		status = acpi_get_object_info(handle, &info);
 		if (ACPI_SUCCESS(status)) {
 			/* TBD check _CID also */
-			info = buffer.pointer;
-			info->hardware_id.value[sizeof(info->hardware_id)-1] = '\0';
-			match = (strcmp(info->hardware_id.value, "HWP0001") == 0);
+			info->hardware_id.string[sizeof(info->hardware_id.length)-1] = '\0';
+			match = (strcmp(info->hardware_id.string, "HWP0001") == 0);
 			kfree(info);
 			if (match) {
 				status = hp_acpi_csr_space(handle, &sba_hpa, &length);
diff --git a/drivers/ide/ide-acpi.c b/drivers/ide/ide-acpi.c
index c509c9916464..c0cf45a11b93 100644
--- a/drivers/ide/ide-acpi.c
+++ b/drivers/ide/ide-acpi.c
@@ -114,8 +114,6 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 	unsigned int bus, devnum, func;
 	acpi_integer addr;
 	acpi_handle dev_handle;
-	struct acpi_buffer buffer = {.length = ACPI_ALLOCATE_BUFFER,
-					.pointer = NULL};
 	acpi_status status;
 	struct acpi_device_info	*dinfo = NULL;
 	int ret = -ENODEV;
@@ -134,12 +132,11 @@ static int ide_get_dev_handle(struct device *dev, acpi_handle *handle,
 		goto err;
 	}
 
-	status = acpi_get_object_info(dev_handle, &buffer);
+	status = acpi_get_object_info(dev_handle, &dinfo);
 	if (ACPI_FAILURE(status)) {
 		DEBPRINT("get_object_info for device failed\n");
 		goto err;
 	}
-	dinfo = buffer.pointer;
 	if (dinfo && (dinfo->valid & ACPI_VALID_ADR) &&
 	    dinfo->address == addr) {
 		*pcidevfn = addr;
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 5befa7e379b7..a9d926b7d805 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -398,23 +398,21 @@ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
 	acpi_handle *phandle = (acpi_handle *)context;
 	acpi_status status; 
 	struct acpi_device_info *info;
-	struct acpi_buffer info_buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	int retval = 0;
 
-	status = acpi_get_object_info(handle, &info_buffer);
+	status = acpi_get_object_info(handle, &info);
 	if (ACPI_FAILURE(status)) {
 		err("%s:  Failed to get device information status=0x%x\n",
 			__func__, status);
 		return retval;
 	}
-	info = info_buffer.pointer;
-	info->hardware_id.value[sizeof(info->hardware_id.value) - 1] = '\0';
+	info->hardware_id.string[sizeof(info->hardware_id.length) - 1] = '\0';
 
 	if (info->current_status && (info->valid & ACPI_VALID_HID) &&
-			(!strcmp(info->hardware_id.value, IBM_HARDWARE_ID1) ||
-			 !strcmp(info->hardware_id.value, IBM_HARDWARE_ID2))) {
+			(!strcmp(info->hardware_id.string, IBM_HARDWARE_ID1) ||
+			 !strcmp(info->hardware_id.string, IBM_HARDWARE_ID2))) {
 		dbg("found hardware: %s, handle: %p\n",
-			info->hardware_id.value, handle);
+			info->hardware_id.string, handle);
 		*phandle = handle;
 		/* returning non-zero causes the search to stop
 		 * and returns this value to the caller of 
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index dafaa4a92df5..f9f68e0e7344 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -976,15 +976,12 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level,
 				      void *context, void **return_value)
 {
 	struct acpi_device_info *info;
-	struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
-
-	if (ACPI_SUCCESS(acpi_get_object_info(handle, &buffer))) {
-		info = buffer.pointer;
 
+	if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) {
 		printk(KERN_WARNING DRV_PFX "method: name: %4.4s, args %X\n",
 			(char *)&info->name, info->param_count);
 
-		kfree(buffer.pointer);
+		kfree(info);
 	}
 
 	return AE_OK;
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 9496494f340e..c07fdb94d665 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -194,13 +194,13 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 		pnpacpi_parse_resource_option_data(dev);
 
 	if (device->flags.compatible_ids) {
-		struct acpi_compatible_id_list *cid_list = device->pnp.cid_list;
+		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
 		int i;
 
 		for (i = 0; i < cid_list->count; i++) {
-			if (!ispnpidacpi(cid_list->id[i].value))
+			if (!ispnpidacpi(cid_list->ids[i].string))
 				continue;
-			pnp_add_id(dev, cid_list->id[i].value);
+			pnp_add_id(dev, cid_list->ids[i].string);
 		}
 	}
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..b91420b52c6f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -173,17 +173,15 @@ struct acpi_device_dir {
 
 typedef char acpi_bus_id[8];
 typedef unsigned long acpi_bus_address;
-typedef char acpi_hardware_id[15];
-typedef char acpi_unique_id[9];
 typedef char acpi_device_name[40];
 typedef char acpi_device_class[20];
 
 struct acpi_device_pnp {
 	acpi_bus_id bus_id;	/* Object name */
 	acpi_bus_address bus_address;	/* _ADR */
-	acpi_hardware_id hardware_id;	/* _HID */
-	struct acpi_compatible_id_list *cid_list;	/* _CIDs */
-	acpi_unique_id unique_id;	/* _UID */
+	char *hardware_id;	/* _HID */
+	struct acpica_device_id_list *cid_list;	/* _CIDs */
+	char *unique_id;	/* _UID */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
 };
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index b450a195319a..04904c7f1aa1 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -200,7 +200,8 @@ acpi_evaluate_object_typed(acpi_handle object,
 			   acpi_object_type return_type);
 
 acpi_status
-acpi_get_object_info(acpi_handle handle, struct acpi_buffer *return_buffer);
+acpi_get_object_info(acpi_handle handle,
+		     struct acpi_device_info **return_buffer);
 
 acpi_status acpi_install_method(u8 *buffer);
 
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576d06e8..7a4ff79e238c 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -338,7 +338,7 @@ typedef u32 acpi_physical_address;
 
 /* PM Timer ticks per second (HZ) */
 
-#define PM_TIMER_FREQUENCY  3579545
+#define PM_TIMER_FREQUENCY              3579545
 
 /*******************************************************************************
  *
@@ -969,38 +969,60 @@ acpi_status(*acpi_walk_callback) (acpi_handle obj_handle,
 #define ACPI_INTERRUPT_NOT_HANDLED      0x00
 #define ACPI_INTERRUPT_HANDLED          0x01
 
-/* Length of _HID, _UID, _CID, and UUID values */
+/* Length of 32-bit EISAID values when converted back to a string */
+
+#define ACPI_EISAID_STRING_SIZE         8	/* Includes null terminator */
+
+/* Length of UUID (string) values */
 
-#define ACPI_DEVICE_ID_LENGTH           0x09
-#define ACPI_MAX_CID_LENGTH             48
 #define ACPI_UUID_LENGTH                16
 
-/* Common string version of device HIDs and UIDs */
+/* Structures used for device/processor HID, UID, CID */
 
 struct acpica_device_id {
-	char value[ACPI_DEVICE_ID_LENGTH];
+	u32 length;		/* Length of string + null */
+	char *string;
 };
 
-/* Common string version of device CIDs */
-
-struct acpi_compatible_id {
-	char value[ACPI_MAX_CID_LENGTH];
+struct acpica_device_id_list {
+	u32 count;		/* Number of IDs in Ids array */
+	u32 list_size;		/* Size of list, including ID strings */
+	struct acpica_device_id ids[1];	/* ID array */
 };
 
-struct acpi_compatible_id_list {
-	u32 count;
-	u32 size;
-	struct acpi_compatible_id id[1];
+/*
+ * Structure returned from acpi_get_object_info.
+ * Optimized for both 32- and 64-bit builds
+ */
+struct acpi_device_info {
+	u32 info_size;		/* Size of info, including ID strings */
+	u32 name;		/* ACPI object Name */
+	acpi_object_type type;	/* ACPI object Type */
+	u8 param_count;		/* If a method, required parameter count */
+	u8 valid;		/* Indicates which optional fields are valid */
+	u8 flags;		/* Miscellaneous info */
+	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
+	u8 lowest_dstates[5];	/* _sx_w values: 0xFF indicates not valid */
+	u32 current_status;	/* _STA value */
+	acpi_integer address;	/* _ADR value */
+	struct acpica_device_id hardware_id;	/* _HID value */
+	struct acpica_device_id unique_id;	/* _UID value */
+	struct acpica_device_id_list compatible_id_list;	/* _CID list <must be last> */
 };
 
-/* Structure and flags for acpi_get_object_info */
+/* Values for Flags field above (acpi_get_object_info) */
+
+#define ACPI_PCI_ROOT_BRIDGE            0x01
 
-#define ACPI_VALID_STA                  0x0001
-#define ACPI_VALID_ADR                  0x0002
-#define ACPI_VALID_HID                  0x0004
-#define ACPI_VALID_UID                  0x0008
-#define ACPI_VALID_CID                  0x0010
-#define ACPI_VALID_SXDS                 0x0020
+/* Flags for Valid field above (acpi_get_object_info) */
+
+#define ACPI_VALID_STA                  0x01
+#define ACPI_VALID_ADR                  0x02
+#define ACPI_VALID_HID                  0x04
+#define ACPI_VALID_UID                  0x08
+#define ACPI_VALID_CID                  0x10
+#define ACPI_VALID_SXDS                 0x20
+#define ACPI_VALID_SXWS                 0x40
 
 /* Flags for _STA method */
 
@@ -1011,29 +1033,6 @@ struct acpi_compatible_id_list {
 #define ACPI_STA_DEVICE_OK              0x08	/* Synonym */
 #define ACPI_STA_BATTERY_PRESENT        0x10
 
-#define ACPI_COMMON_OBJ_INFO \
-	acpi_object_type                type;           /* ACPI object type */ \
-	acpi_name                       name	/* ACPI object Name */
-
-struct acpi_obj_info_header {
-	ACPI_COMMON_OBJ_INFO;
-};
-
-/* Structure returned from Get Object Info */
-
-struct acpi_device_info {
-	ACPI_COMMON_OBJ_INFO;
-
-	u32 param_count;	/* If a method, required parameter count */
-	u32 valid;		/* Indicates which fields below are valid */
-	u32 current_status;	/* _STA value */
-	acpi_integer address;	/* _ADR value if any */
-	struct acpica_device_id hardware_id;	/* _HID value if any */
-	struct acpica_device_id unique_id;	/* _UID value if any */
-	u8 highest_dstates[4];	/* _sx_d values: 0xFF indicates not valid */
-	struct acpi_compatible_id_list compatibility_id;	/* List of _CIDs if any */
-};
-
 /* Context structs for address space handlers */
 
 struct acpi_pci_id {
-- 
cgit v1.2.3


From 6557a49a443a347d24aed58076365432ded30edc Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Wed, 24 Jun 2009 11:32:04 +0800
Subject: ACPICA: ACPI 4.0: Interpreter support for IPMI.

Adds support for IPMI which is similar to SMBus and uses a bi-directional data buffer.
ACPICA BZ 773.

http://acpica.org/bugzilla/show_bug.cgi?id=773

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acconfig.h |  3 +-
 drivers/acpi/acpica/exfield.c  | 82 ++++++++++++++++++++++++++++--------------
 drivers/acpi/acpica/exfldio.c  |  7 ++--
 include/acpi/actypes.h         |  3 +-
 4 files changed, 63 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h
index 9123d5a11627..8e679ef5b231 100644
--- a/drivers/acpi/acpica/acconfig.h
+++ b/drivers/acpi/acpica/acconfig.h
@@ -199,9 +199,10 @@
 #define ACPI_RSDP_CHECKSUM_LENGTH       20
 #define ACPI_RSDP_XCHECKSUM_LENGTH      36
 
-/* SMBus bidirectional buffer size */
+/* SMBus and IPMI bidirectional buffer size */
 
 #define ACPI_SMBUS_BUFFER_SIZE          34
+#define ACPI_IPMI_BUFFER_SIZE           66
 
 /* _sx_d and _sx_w control methods */
 
diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c
index 546dcdd86785..0b33d6c887b9 100644
--- a/drivers/acpi/acpica/exfield.c
+++ b/drivers/acpi/acpica/exfield.c
@@ -72,6 +72,7 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 	union acpi_operand_object *buffer_desc;
 	acpi_size length;
 	void *buffer;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_read_data_from_field, obj_desc);
 
@@ -97,13 +98,27 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus read.  We must create a buffer to hold the data
-		 * and directly access the region handler.
+		 * This is an SMBus or IPMI read. We must create a buffer to hold
+		 * the data and then directly access the region handler.
+		 *
+		 * Note: Smbus protocol value is passed in upper 16-bits of Function
 		 */
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_READ | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_READ;
+		}
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
@@ -112,16 +127,13 @@ acpi_ex_read_data_from_field(struct acpi_walk_state *walk_state,
 
 		acpi_ex_acquire_global_lock(obj_desc->common_field.field_flags);
 
-		/*
-		 * Perform the read.
-		 * Note: Smbus protocol value is passed in upper 16-bits of Function
-		 */
+		/* Call the region handler for the read */
+
 		status = acpi_ex_access_region(obj_desc, 0,
 					       ACPI_CAST_PTR(acpi_integer,
 							     buffer_desc->
 							     buffer.pointer),
-					       ACPI_READ | (obj_desc->field.
-							    attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 		goto exit;
 	}
@@ -212,6 +224,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 	u32 length;
 	void *buffer;
 	union acpi_operand_object *buffer_desc;
+	u32 function;
 
 	ACPI_FUNCTION_TRACE_PTR(ex_write_data_to_field, obj_desc);
 
@@ -234,39 +247,56 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		}
 	} else if ((obj_desc->common.type == ACPI_TYPE_LOCAL_REGION_FIELD) &&
 		   (obj_desc->field.region_obj->region.space_id ==
-		    ACPI_ADR_SPACE_SMBUS)) {
+		    ACPI_ADR_SPACE_SMBUS
+		    || obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_IPMI)) {
 		/*
-		 * This is an SMBus write.  We will bypass the entire field mechanism
-		 * and handoff the buffer directly to the handler.
+		 * This is an SMBus or IPMI write. We will bypass the entire field
+		 * mechanism and handoff the buffer directly to the handler. For
+		 * these address spaces, the buffer is bi-directional; on a write,
+		 * return data is returned in the same buffer.
+		 *
+		 * Source must be a buffer of sufficient size:
+		 * ACPI_SMBUS_BUFFER_SIZE or ACPI_IPMI_BUFFER_SIZE.
 		 *
-		 * Source must be a buffer of sufficient size (ACPI_SMBUS_BUFFER_SIZE).
+		 * Note: SMBus protocol type is passed in upper 16-bits of Function
 		 */
 		if (source_desc->common.type != ACPI_TYPE_BUFFER) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer, found type %s",
+				    "SMBus or IPMI write requires Buffer, found type %s",
 				    acpi_ut_get_object_type_name(source_desc)));
 
 			return_ACPI_STATUS(AE_AML_OPERAND_TYPE);
 		}
 
-		if (source_desc->buffer.length < ACPI_SMBUS_BUFFER_SIZE) {
+		if (obj_desc->field.region_obj->region.space_id ==
+		    ACPI_ADR_SPACE_SMBUS) {
+			length = ACPI_SMBUS_BUFFER_SIZE;
+			function =
+			    ACPI_WRITE | (obj_desc->field.attribute << 16);
+		} else {	/* IPMI */
+
+			length = ACPI_IPMI_BUFFER_SIZE;
+			function = ACPI_WRITE;
+		}
+
+		if (source_desc->buffer.length < length) {
 			ACPI_ERROR((AE_INFO,
-				    "SMBus write requires Buffer of length %X, found length %X",
-				    ACPI_SMBUS_BUFFER_SIZE,
-				    source_desc->buffer.length));
+				    "SMBus or IPMI write requires Buffer of length %X, found length %X",
+				    length, source_desc->buffer.length));
 
 			return_ACPI_STATUS(AE_AML_BUFFER_LIMIT);
 		}
 
-		buffer_desc =
-		    acpi_ut_create_buffer_object(ACPI_SMBUS_BUFFER_SIZE);
+		/* Create the bi-directional buffer */
+
+		buffer_desc = acpi_ut_create_buffer_object(length);
 		if (!buffer_desc) {
 			return_ACPI_STATUS(AE_NO_MEMORY);
 		}
 
 		buffer = buffer_desc->buffer.pointer;
-		ACPI_MEMCPY(buffer, source_desc->buffer.pointer,
-			    ACPI_SMBUS_BUFFER_SIZE);
+		ACPI_MEMCPY(buffer, source_desc->buffer.pointer, length);
 
 		/* Lock entire transaction if requested */
 
@@ -275,12 +305,10 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc,
 		/*
 		 * Perform the write (returns status and perhaps data in the
 		 * same buffer)
-		 * Note: SMBus protocol type is passed in upper 16-bits of Function.
 		 */
 		status = acpi_ex_access_region(obj_desc, 0,
 					       (acpi_integer *) buffer,
-					       ACPI_WRITE | (obj_desc->field.
-							     attribute << 16));
+					       function);
 		acpi_ex_release_global_lock(obj_desc->common_field.field_flags);
 
 		*result_desc = buffer_desc;
diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c
index 6687be167f5f..d7b3b418fb45 100644
--- a/drivers/acpi/acpica/exfldio.c
+++ b/drivers/acpi/acpica/exfldio.c
@@ -120,12 +120,13 @@ acpi_ex_setup_region(union acpi_operand_object *obj_desc,
 	}
 
 	/*
-	 * Exit now for SMBus address space, it has a non-linear address space
+	 * Exit now for SMBus or IPMI address space, it has a non-linear address space
 	 * and the request cannot be directly validated
 	 */
-	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS) {
+	if (rgn_desc->region.space_id == ACPI_ADR_SPACE_SMBUS ||
+	    rgn_desc->region.space_id == ACPI_ADR_SPACE_IPMI) {
 
-		/* SMBus has a non-linear address space */
+		/* SMBus or IPMI has a non-linear address space */
 
 		return_ACPI_STATUS(AE_OK);
 	}
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 7a4ff79e238c..4371805d2def 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -732,7 +732,8 @@ typedef u8 acpi_adr_space_type;
 #define ACPI_ADR_SPACE_SMBUS            (acpi_adr_space_type) 4
 #define ACPI_ADR_SPACE_CMOS             (acpi_adr_space_type) 5
 #define ACPI_ADR_SPACE_PCI_BAR_TARGET   (acpi_adr_space_type) 6
-#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_IPMI             (acpi_adr_space_type) 7
+#define ACPI_ADR_SPACE_DATA_TABLE       (acpi_adr_space_type) 8
 #define ACPI_ADR_SPACE_FIXED_HARDWARE   (acpi_adr_space_type) 127
 
 /*
-- 
cgit v1.2.3


From 8e4319c425077c4cc540696a5bb6c4d12f017dcd Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 29 Jun 2009 13:43:27 +0800
Subject: ACPICA: Fix several acpi_attach_data problems

Handler was never invoked. Now invoked if/when host node is deleted.
Data object was not automatically deleted when host node was deleted.
Interface to handler had an unused parameter, removed it.
ACPICA BZ 778.

http://acpica.org/bugzilla/show_bug.cgi?id=778

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/acnamesp.h |  2 +
 drivers/acpi/acpica/nsalloc.c  | 88 +++++++++++++++++++++++++++++-------------
 drivers/acpi/acpica/nsload.c   |  3 +-
 drivers/acpi/bus.c             |  2 +-
 drivers/acpi/glue.c            |  2 +-
 drivers/acpi/scan.c            |  2 +-
 include/acpi/acpi_bus.h        |  4 +-
 include/acpi/actypes.h         |  2 +-
 8 files changed, 70 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h
index 94cdc2b8cb93..a78e02f62d5e 100644
--- a/drivers/acpi/acpica/acnamesp.h
+++ b/drivers/acpi/acpica/acnamesp.h
@@ -144,6 +144,8 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name);
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node);
 
+void acpi_ns_remove_node(struct acpi_namespace_node *node);
+
 void
 acpi_ns_delete_namespace_subtree(struct acpi_namespace_node *parent_handle);
 
diff --git a/drivers/acpi/acpica/nsalloc.c b/drivers/acpi/acpica/nsalloc.c
index efc971ab7d65..8a58a1b85aa0 100644
--- a/drivers/acpi/acpica/nsalloc.c
+++ b/drivers/acpi/acpica/nsalloc.c
@@ -96,17 +96,68 @@ struct acpi_namespace_node *acpi_ns_create_node(u32 name)
  *
  * RETURN:      None
  *
- * DESCRIPTION: Delete a namespace node
+ * DESCRIPTION: Delete a namespace node. All node deletions must come through
+ *              here. Detaches any attached objects, including any attached
+ *              data. If a handler is associated with attached data, it is
+ *              invoked before the node is deleted.
  *
  ******************************************************************************/
 
 void acpi_ns_delete_node(struct acpi_namespace_node *node)
+{
+	union acpi_operand_object *obj_desc;
+
+	ACPI_FUNCTION_NAME(ns_delete_node);
+
+	/* Detach an object if there is one */
+
+	acpi_ns_detach_object(node);
+
+	/*
+	 * Delete an attached data object if present (an object that was created
+	 * and attached via acpi_attach_data). Note: After any normal object is
+	 * detached above, the only possible remaining object is a data object.
+	 */
+	obj_desc = node->object;
+	if (obj_desc && (obj_desc->common.type == ACPI_TYPE_LOCAL_DATA)) {
+
+		/* Invoke the attached data deletion handler if present */
+
+		if (obj_desc->data.handler) {
+			obj_desc->data.handler(node, obj_desc->data.pointer);
+		}
+
+		acpi_ut_remove_reference(obj_desc);
+	}
+
+	/* Now we can delete the node */
+
+	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+
+	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
+	ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS, "Node %p, Remaining %X\n",
+			  node, acpi_gbl_current_node_count));
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ns_remove_node
+ *
+ * PARAMETERS:  Node            - Node to be removed/deleted
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Remove (unlink) and delete a namespace node
+ *
+ ******************************************************************************/
+
+void acpi_ns_remove_node(struct acpi_namespace_node *node)
 {
 	struct acpi_namespace_node *parent_node;
 	struct acpi_namespace_node *prev_node;
 	struct acpi_namespace_node *next_node;
 
-	ACPI_FUNCTION_TRACE_PTR(ns_delete_node, node);
+	ACPI_FUNCTION_TRACE_PTR(ns_remove_node, node);
 
 	parent_node = acpi_ns_get_parent_node(node);
 
@@ -142,12 +193,9 @@ void acpi_ns_delete_node(struct acpi_namespace_node *node)
 		}
 	}
 
-	ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-	/* Detach an object if there is one, then delete the node */
+	/* Delete the node and any attached objects */
 
-	acpi_ns_detach_object(node);
-	(void)acpi_os_release_object(acpi_gbl_namespace_cache, node);
+	acpi_ns_delete_node(node);
 	return_VOID;
 }
 
@@ -273,25 +321,11 @@ void acpi_ns_delete_children(struct acpi_namespace_node *parent_node)
 				    parent_node, child_node));
 		}
 
-		/* Now we can free this child object */
-
-		ACPI_MEM_TRACKING(acpi_gbl_ns_node_list->total_freed++);
-
-		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
-				  "Object %p, Remaining %X\n", child_node,
-				  acpi_gbl_current_node_count));
-
-		/* Detach an object if there is one, then free the child node */
-
-		acpi_ns_detach_object(child_node);
-
-		/* Now we can delete the node */
-
-		(void)acpi_os_release_object(acpi_gbl_namespace_cache,
-					     child_node);
-
-		/* And move on to the next child in the list */
-
+		/*
+		 * Delete this child node and move on to the next child in the list.
+		 * No need to unlink the node since we are deleting the entire branch.
+		 */
+		acpi_ns_delete_node(child_node);
 		child_node = next_node;
 
 	} while (!(flags & ANOBJ_END_OF_PEER_LIST));
@@ -433,7 +467,7 @@ void acpi_ns_delete_namespace_by_owner(acpi_owner_id owner_id)
 
 		if (deletion_node) {
 			acpi_ns_delete_children(deletion_node);
-			acpi_ns_delete_node(deletion_node);
+			acpi_ns_remove_node(deletion_node);
 			deletion_node = NULL;
 		}
 
diff --git a/drivers/acpi/acpica/nsload.c b/drivers/acpi/acpica/nsload.c
index dcd7a6adbbbc..a7234e60e985 100644
--- a/drivers/acpi/acpica/nsload.c
+++ b/drivers/acpi/acpica/nsload.c
@@ -270,8 +270,7 @@ static acpi_status acpi_ns_delete_subtree(acpi_handle start_handle)
 
 	/* Now delete the starting object, and we are done */
 
-	acpi_ns_delete_node(child_handle);
-
+	acpi_ns_remove_node(child_handle);
 	return_ACPI_STATUS(AE_OK);
 }
 
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc70c3a9..620183f13e5e 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -141,7 +141,7 @@ int acpi_bus_get_status(struct acpi_device *device)
 EXPORT_SYMBOL(acpi_bus_get_status);
 
 void acpi_bus_private_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	return;
 }
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 27a7072347ea..9a4ce33f137e 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(acpi_get_child);
 
 /* Link ACPI devices with physical devices */
 static void acpi_glue_data_handler(acpi_handle handle,
-				   u32 function, void *context)
+				   void *context)
 {
 	/* we provide an empty handler */
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0ab526de7c55..9606af13d3b8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -687,7 +687,7 @@ acpi_bus_get_ejd(acpi_handle handle, acpi_handle *ejd)
 }
 EXPORT_SYMBOL_GPL(acpi_bus_get_ejd);
 
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context)
+void acpi_bus_data_handler(acpi_handle handle, void *context)
 {
 
 	/* TBD */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index b91420b52c6f..6e83a68fbd7b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -312,7 +312,7 @@ struct acpi_bus_event {
 
 extern struct kobject *acpi_kobj;
 extern int acpi_bus_generate_netlink_event(const char*, const char*, u8, int);
-void acpi_bus_private_data_handler(acpi_handle, u32, void *);
+void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
@@ -325,7 +325,7 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb);
  */
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
-void acpi_bus_data_handler(acpi_handle handle, u32 function, void *context);
+void acpi_bus_data_handler(acpi_handle handle, void *context);
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 4371805d2def..ef4601149f49 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -922,7 +922,7 @@ typedef
 void (*acpi_notify_handler) (acpi_handle device, u32 value, void *context);
 
 typedef
-void (*acpi_object_handler) (acpi_handle object, u32 function, void *data);
+void (*acpi_object_handler) (acpi_handle object, void *data);
 
 typedef acpi_status(*acpi_init_handler) (acpi_handle object, u32 function);
 
-- 
cgit v1.2.3


From eb2289ba1ba994de25af0d94b5e80ba93d2c1c3c Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Wed, 24 Jun 2009 13:42:00 +0800
Subject: ACPICA: ACPI 4.0: Changes for existing ACPI tables.

FACS: new flag and new OspmFlags field.
SRAT: x2APIC - add ClockDomain field to descriptor #2

Includes header and disassembler support.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  | 19 +++++++++++++------
 include/acpi/actbl1.h |  8 +++++---
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 222733d01f36..0649a5670026 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -161,17 +161,24 @@ struct acpi_table_facs {
 	u32 flags;
 	u64 xfirmware_waking_vector;	/* 64-bit version of the Firmware Waking Vector (ACPI 2.0+) */
 	u8 version;		/* Version of this table (ACPI 2.0+) */
-	u8 reserved[31];	/* Reserved, must be zero */
+	u8 reserved[3];		/* Reserved, must be zero */
+	u32 ospm_flags;		/* Flags to be set by OSPM (ACPI 4.0) */
+	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* Flag macros */
+/* global_lock flags */
+
+#define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
+#define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
+
+/* Flags  */
 
-#define ACPI_FACS_S4_BIOS_PRESENT (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
+#define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* Global lock flags */
+/* ospm_flags */
 
-#define ACPI_GLOCK_PENDING      0x01	/* 00: Pending global lock ownership */
-#define ACPI_GLOCK_OWNED        0x02	/* 01: Global lock is owned */
+#define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 59ade0752473..ec36693f868c 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -1011,7 +1011,7 @@ struct acpi_madt_interrupt_source {
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
-/* 9: Processor Local X2_APIC (07/2008) */
+/* 9: Processor Local X2APIC (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic {
 	struct acpi_subtable_header header;
@@ -1021,7 +1021,7 @@ struct acpi_madt_local_x2apic {
 	u32 uid;		/* ACPI processor UID */
 };
 
-/* 10: Local X2APIC NMI (07/2008) */
+/* 10: Local X2APIC NMI (ACPI 4.0) */
 
 struct acpi_madt_local_x2apic_nmi {
 	struct acpi_subtable_header header;
@@ -1211,7 +1211,7 @@ struct acpi_srat_mem_affinity {
 #define ACPI_SRAT_MEM_HOT_PLUGGABLE (1<<1)	/* 01: Memory region is hot pluggable */
 #define ACPI_SRAT_MEM_NON_VOLATILE  (1<<2)	/* 02: Memory region is non-volatile */
 
-/* 2: Processor Local X2_APIC Affinity (07/2008) */
+/* 2: Processor Local X2_APIC Affinity (ACPI 4.0) */
 
 struct acpi_srat_x2apic_cpu_affinity {
 	struct acpi_subtable_header header;
@@ -1219,6 +1219,8 @@ struct acpi_srat_x2apic_cpu_affinity {
 	u32 proximity_domain;
 	u32 apic_id;
 	u32 flags;
+	u32 clock_domain;
+	u32 reserved2;
 };
 
 /* Flags for struct acpi_srat_cpu_affinity and struct acpi_srat_x2apic_cpu_affinity */
-- 
cgit v1.2.3


From 3ce804ed83827a7fd27190836f9421b29ac64512 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 25 Jun 2009 10:31:32 -0700
Subject: ACPICA: Update version to 20090625

Update version number.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 04904c7f1aa1..063e577e791e 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090521
+#define ACPI_CA_VERSION                 0x20090625
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 36ce99c1dcab2978fc1900f19b431adedd8f99f6 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 27 Aug 2009 16:45:07 +0200
Subject: ALSA: Add debug module option

Add debug module option to snd core.
This controls the debug print level.  When CONFIG_SND_DEBUG_VERBOSE
is set, you can suppress the debug messages by giving or changing this
parameter to a lower value.  debug=0 means no debug messsages.
As default, it's set to the verbose level 2.

Since this option can be changed dynamically via sysfs file, you can
suppress the verbose debug messages on the fly, which wasn't possible
before.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 Documentation/sound/alsa/ALSA-Configuration.txt |  6 +++
 include/sound/core.h                            | 38 ++++++--------
 sound/core/misc.c                               | 67 ++++++++++++++-----------
 3 files changed, 59 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 4252697a95d6..8e5b3488b370 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -60,6 +60,12 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
     slots	- Reserve the slot index for the given driver.
 		  This option takes multiple strings.		
 		  See "Module Autoloading Support" section for details.
+    debug	- Specifies the debug message level
+		  (0 = disable debug prints, 1 = normal debug messages,
+		   2 = verbose debug messages)
+		  This option appears only when CONFIG_SND_DEBUG=y.
+		  This option can be dynamically changed via sysfs
+		  /sys/modules/snd/parameters/debug file.
   
   Module snd-pcm-oss
   ------------------
diff --git a/include/sound/core.h b/include/sound/core.h
index 309cb9659a05..a89728db5584 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -340,18 +340,17 @@ unsigned int snd_dma_pointer(unsigned long dma, unsigned int size);
 struct resource;
 void release_and_free_resource(struct resource *res);
 
-#ifdef CONFIG_SND_VERBOSE_PRINTK
-void snd_verbose_printk(const char *file, int line, const char *format, ...)
-     __attribute__ ((format (printf, 3, 4)));
-#endif
-#if defined(CONFIG_SND_DEBUG) && defined(CONFIG_SND_VERBOSE_PRINTK)
-void snd_verbose_printd(const char *file, int line, const char *format, ...)
-     __attribute__ ((format (printf, 3, 4)));
-#endif
-
 /* --- */
 
-#ifdef CONFIG_SND_VERBOSE_PRINTK
+#if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK)
+void __snd_printk(unsigned int level, const char *file, int line,
+		  const char *format, ...)
+     __attribute__ ((format (printf, 4, 5)));
+#else
+#define __snd_printk(level, file, line, format, args...) \
+	prinkt(format, ##args)
+#endif
+
 /**
  * snd_printk - printk wrapper
  * @fmt: format string
@@ -360,15 +359,9 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...)
  * when configured with CONFIG_SND_VERBOSE_PRINTK.
  */
 #define snd_printk(fmt, args...) \
-	snd_verbose_printk(__FILE__, __LINE__, fmt ,##args)
-#else
-#define snd_printk(fmt, args...) \
-	printk(fmt ,##args)
-#endif
+	__snd_printk(0, __FILE__, __LINE__, fmt, ##args)
 
 #ifdef CONFIG_SND_DEBUG
-
-#ifdef CONFIG_SND_VERBOSE_PRINTK
 /**
  * snd_printd - debug printk
  * @fmt: format string
@@ -377,11 +370,7 @@ void snd_verbose_printd(const char *file, int line, const char *format, ...)
  * Ignored when CONFIG_SND_DEBUG is not set.
  */
 #define snd_printd(fmt, args...) \
-	snd_verbose_printd(__FILE__, __LINE__, fmt ,##args)
-#else
-#define snd_printd(fmt, args...) \
-	printk(fmt ,##args)
-#endif
+	__snd_printk(1, __FILE__, __LINE__, fmt, ##args)
 
 /**
  * snd_BUG - give a BUG warning message and stack trace
@@ -428,9 +417,10 @@ static inline int __snd_bug_on(int cond)
  * Works like snd_printk() for debugging purposes.
  * Ignored when CONFIG_SND_DEBUG_VERBOSE is not set.
  */
-#define snd_printdd(format, args...) snd_printk(format, ##args)
+#define snd_printdd(format, args...) \
+	__snd_printk(2, __FILE__, __LINE__, format, ##args)
 #else
-#define snd_printdd(format, args...) /* nothing */
+#define snd_printdd(format, args...)	do { } while (0)
 #endif
 
 
diff --git a/sound/core/misc.c b/sound/core/misc.c
index 1d29e678369e..23a032c6d487 100644
--- a/sound/core/misc.c
+++ b/sound/core/misc.c
@@ -24,6 +24,20 @@
 #include <linux/ioport.h>
 #include <sound/core.h>
 
+#ifdef CONFIG_SND_DEBUG
+
+#ifdef CONFIG_SND_DEBUG_VERBOSE
+#define DEFAULT_DEBUG_LEVEL	2
+#else
+#define DEFAULT_DEBUG_LEVEL	1
+#endif
+
+static int debug = DEFAULT_DEBUG_LEVEL;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "Debug level (0 = disable)");
+
+#endif /* CONFIG_SND_DEBUG */
+
 void release_and_free_resource(struct resource *res)
 {
 	if (res) {
@@ -35,6 +49,7 @@ void release_and_free_resource(struct resource *res)
 EXPORT_SYMBOL(release_and_free_resource);
 
 #ifdef CONFIG_SND_VERBOSE_PRINTK
+/* strip the leading path if the given path is absolute */
 static const char *sanity_file_name(const char *path)
 {
 	if (*path == '/')
@@ -43,48 +58,44 @@ static const char *sanity_file_name(const char *path)
 		return path;
 }
 
-void snd_verbose_printk(const char *path, int line, const char *format, ...)
+/* print file and line with a certain printk prefix */
+static int print_snd_pfx(unsigned int level, const char *path, int line,
+			 const char *format)
 {
 	const char *file = sanity_file_name(path);
-	va_list args;
-	
-	if (format[0] == '<' && format[1] >= '0' && format[1] <= '7' && format[2] == '>') {
-		char tmp[] = "<0>";
+	char tmp[] = "<0>";
+	const char *pfx = level ? KERN_DEBUG : KERN_DEFAULT;
+	int ret = 0;
+
+	if (format[0] == '<' && format[2] == '>') {
 		tmp[1] = format[1];
-		printk("%sALSA %s:%d: ", tmp, file, line);
-		format += 3;
-	} else {
-		printk("ALSA %s:%d: ", file, line);
+		pfx = tmp;
+		ret = 1;
 	}
-	va_start(args, format);
-	vprintk(format, args);
-	va_end(args);
+	printk("%sALSA %s:%d: ", pfx, file, line);
+	return ret;
 }
-
-EXPORT_SYMBOL(snd_verbose_printk);
+#else
+#define print_snd_pfx(level, path, line, format)	0
 #endif
 
-#if defined(CONFIG_SND_DEBUG) && defined(CONFIG_SND_VERBOSE_PRINTK)
-void snd_verbose_printd(const char *path, int line, const char *format, ...)
+#if defined(CONFIG_SND_DEBUG) || defined(CONFIG_SND_VERBOSE_PRINTK)
+void __snd_printk(unsigned int level, const char *path, int line,
+		  const char *format, ...)
 {
-	const char *file = sanity_file_name(path);
 	va_list args;
 	
-	if (format[0] == '<' && format[1] >= '0' && format[1] <= '7' && format[2] == '>') {
-		char tmp[] = "<0>";
-		tmp[1] = format[1];
-		printk("%sALSA %s:%d: ", tmp, file, line);
-		format += 3;
-	} else {
-		printk(KERN_DEBUG "ALSA %s:%d: ", file, line);
-	}
+#ifdef CONFIG_SND_DEBUG	
+	if (debug < level)
+		return;
+#endif
 	va_start(args, format);
+	if (print_snd_pfx(level, path, line, format))
+		format += 3; /* skip the printk level-prefix */
 	vprintk(format, args);
 	va_end(args);
-
 }
-
-EXPORT_SYMBOL(snd_verbose_printd);
+EXPORT_SYMBOL_GPL(__snd_printk);
 #endif
 
 #ifdef CONFIG_PCI
-- 
cgit v1.2.3


From 6c347d43eea29221a8ebab9ff9cbe7a00cddac98 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 27 Aug 2009 18:17:34 +0200
Subject: tracing: Undef TRACE_EVENT_FN between trace events headers inclusion

The recent commit:

	tracing/events: fix the include file dependencies

fixed a file dependency problem while including more than
one trace event header file.

This fix undefined TRACE_EVENT after an event header macro
preprocessing in order to make tracepoint.h able to correctly declare
the tracepoints necessary for the next event header file.

But now we also need to undefine TRACE_EVENT_FN at the end of an event
header file preprocessing for the same reason.

This fixes the following build error:

In file included from include/trace/events/napi.h:5,
                 from net/core/net-traces.c:28:
include/linux/tracepoint.h:285:1: warning: "TRACE_EVENT_FN" redefined
In file included from include/trace/define_trace.h:61,
                 from include/trace/events/skb.h:40,
                 from net/core/net-traces.c:27:
include/trace/ftrace.h:50:1: warning: this is the location of the previous definition
In file included from include/trace/events/napi.h:5,
                 from net/core/net-traces.c:28:
include/linux/tracepoint.h:285:1: warning: "TRACE_EVENT_FN" redefined
In file included from include/trace/define_trace.h:61,
                 from include/trace/events/skb.h:40,
                 from net/core/net-traces.c:27:
include/trace/ftrace.h:50:1: warning: this is the location of the previous definition

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <20090827161732.GA7618@nowhere>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/define_trace.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index a89ed590597a..2a4b3bf74033 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -62,6 +62,7 @@
 #endif
 
 #undef TRACE_EVENT
+#undef TRACE_EVENT_FN
 #undef TRACE_HEADER_MULTI_READ
 
 /* Only undef what we defined in this file */
-- 
cgit v1.2.3


From a5fe1a03f7720b8da8364a1737e1e5a357904e99 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Thu, 13 Aug 2009 10:43:27 +0800
Subject: ACPICA: fix leak of acpi_os_validate_address

http://bugzilla.kernel.org/show_bug.cgi?id=13620

If the dynamic region is created and added to resource list over and over again,
it has the potential to be a memory leak by growing the list every time.

This patch fixes the memory leak, as below

1) add a new field "count" to struct acpi_res_list.

   When inserting, if the region(addr, len) is already in the resource
   list, we just increase "count", otherwise, the region is inserted
   with count=1.

   When deleting, the "count" is decreased, if it's decreased to 0,
   the region is deleted from the resource list.

   With "count", the region with same address and length can only be
   inserted to the resource list once, so prevent potential memory leak.

2) add a new function acpi_os_invalidate_address, which is called when
   region is deleted.

Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/acpica/utdelete.c |  6 +++
 drivers/acpi/osl.c             | 94 ++++++++++++++++++++++++++++++++++++++++--
 include/acpi/acpiosxf.h        |  3 ++
 3 files changed, 100 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index bc1710315088..96e26e70c63d 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -215,6 +215,12 @@ static void acpi_ut_delete_internal_obj(union acpi_operand_object *object)
 		ACPI_DEBUG_PRINT((ACPI_DB_ALLOCATIONS,
 				  "***** Region %p\n", object));
 
+		/* Invalidate the region address/length via the host OS */
+
+		acpi_os_invalidate_address(object->region.space_id,
+					  object->region.address,
+					  (acpi_size) object->region.length);
+
 		second_desc = acpi_ns_get_secondary_object(object);
 		if (second_desc) {
 			/*
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 5691f165a952..c5b4f1ed9b71 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -88,6 +88,7 @@ struct acpi_res_list {
 	char name[5];   /* only can have a length of 4 chars, make use of this
 			   one instead of res->name, no need to kalloc then */
 	struct list_head resource_list;
+	int count;
 };
 
 static LIST_HEAD(resource_list_head);
@@ -1358,6 +1359,89 @@ acpi_os_validate_interface (char *interface)
 	return AE_SUPPORT;
 }
 
+static inline int acpi_res_list_add(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * The Region(addr,len) already exist in the list,
+			 * just increase the count
+			 */
+
+			res_list_elem->count++;
+			return 0;
+		}
+	}
+
+	res->count = 1;
+	list_add(&res->resource_list, &resource_list_head);
+	return 1;
+}
+
+static inline void acpi_res_list_del(struct acpi_res_list *res)
+{
+	struct acpi_res_list *res_list_elem;
+
+	list_for_each_entry(res_list_elem, &resource_list_head,
+			    resource_list) {
+
+		if (res->resource_type == res_list_elem->resource_type &&
+		    res->start == res_list_elem->start &&
+		    res->end == res_list_elem->end) {
+
+			/*
+			 * If the res count is decreased to 0,
+			 * remove and free it
+			 */
+
+			if (--res_list_elem->count == 0) {
+				list_del(&res_list_elem->resource_list);
+				kfree(res_list_elem);
+			}
+			return;
+		}
+	}
+}
+
+acpi_status
+acpi_os_invalidate_address(
+    u8                   space_id,
+    acpi_physical_address   address,
+    acpi_size               length)
+{
+	struct acpi_res_list res;
+
+	switch (space_id) {
+	case ACPI_ADR_SPACE_SYSTEM_IO:
+	case ACPI_ADR_SPACE_SYSTEM_MEMORY:
+		/* Only interference checks against SystemIO and SytemMemory
+		   are needed */
+		res.start = address;
+		res.end = address + length - 1;
+		res.resource_type = space_id;
+		spin_lock(&acpi_res_lock);
+		acpi_res_list_del(&res);
+		spin_unlock(&acpi_res_lock);
+		break;
+	case ACPI_ADR_SPACE_PCI_CONFIG:
+	case ACPI_ADR_SPACE_EC:
+	case ACPI_ADR_SPACE_SMBUS:
+	case ACPI_ADR_SPACE_CMOS:
+	case ACPI_ADR_SPACE_PCI_BAR_TARGET:
+	case ACPI_ADR_SPACE_DATA_TABLE:
+	case ACPI_ADR_SPACE_FIXED_HARDWARE:
+		break;
+	}
+	return AE_OK;
+}
+
 /******************************************************************************
  *
  * FUNCTION:    acpi_os_validate_address
@@ -1382,6 +1466,7 @@ acpi_os_validate_address (
     char *name)
 {
 	struct acpi_res_list *res;
+	int added;
 	if (acpi_enforce_resources == ENFORCE_RESOURCES_NO)
 		return AE_OK;
 
@@ -1399,14 +1484,17 @@ acpi_os_validate_address (
 		res->end = address + length - 1;
 		res->resource_type = space_id;
 		spin_lock(&acpi_res_lock);
-		list_add(&res->resource_list, &resource_list_head);
+		added = acpi_res_list_add(res);
 		spin_unlock(&acpi_res_lock);
-		pr_debug("Added %s resource: start: 0x%llx, end: 0x%llx, "
-			 "name: %s\n", (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
+		pr_debug("%s %s resource: start: 0x%llx, end: 0x%llx, "
+			 "name: %s\n", added ? "Added" : "Already exist",
+			 (space_id == ACPI_ADR_SPACE_SYSTEM_IO)
 			 ? "SystemIO" : "System Memory",
 			 (unsigned long long)res->start,
 			 (unsigned long long)res->end,
 			 res->name);
+		if (!added)
+			kfree(res);
 		break;
 	case ACPI_ADR_SPACE_PCI_CONFIG:
 	case ACPI_ADR_SPACE_EC:
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index ab0b85cf21f3..eb0e7189075f 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -245,6 +245,9 @@ acpi_status acpi_osi_invalidate(char* interface);
 acpi_status
 acpi_os_validate_address(u8 space_id, acpi_physical_address address,
 			 acpi_size length, char *name);
+acpi_status
+acpi_os_invalidate_address(u8 space_id, acpi_physical_address address,
+			 acpi_size length);
 
 u64 acpi_os_get_timer(void);
 
-- 
cgit v1.2.3


From 5a53a7640a7af7acf904ed805c6fd1bf9fea829c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 27 Aug 2009 21:04:12 +0200
Subject: ALSA: pcm - Increase protocol version

Increase the PCM protocol version to indicate the drain ioctl behavior
change.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/asound.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/asound.h b/include/sound/asound.h
index 82aed3f47534..1f57bb92eb5a 100644
--- a/include/sound/asound.h
+++ b/include/sound/asound.h
@@ -138,7 +138,7 @@ struct snd_hwdep_dsp_image {
  *                                                                           *
  *****************************************************************************/
 
-#define SNDRV_PCM_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 9)
+#define SNDRV_PCM_VERSION		SNDRV_PROTOCOL_VERSION(2, 0, 10)
 
 typedef unsigned long snd_pcm_uframes_t;
 typedef signed long snd_pcm_sframes_t;
-- 
cgit v1.2.3


From f726f30e32305a34a203ff975e60885aa7556c6a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 4 Aug 2009 19:08:24 +0000
Subject: dma: Add set_dma_mask hook to struct dma_map_ops

POWERPC needs this hook. SPARC could use it too.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Becky Bruce <beckyb@kernel.crashing.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 include/linux/dma-mapping.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c0f6c3cd788c..91b761846061 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -58,6 +58,7 @@ struct dma_map_ops {
 				   enum dma_data_direction dir);
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
+	int (*set_dma_mask)(struct device *dev, u64 mask);
 	int is_phys;
 };
 
-- 
cgit v1.2.3


From 77a53fd21870c726b670c0d8179294ac1ea33468 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 25 Aug 2009 19:24:13 -0700
Subject: Input: matrix-keypad - add function to build device keymap

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/matrix_keypad.c  | 15 +++------------
 drivers/input/keyboard/w90p910_keypad.c | 16 ++--------------
 include/linux/input/matrix_keypad.h     | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c
index 541b981ff075..91cfe5170265 100644
--- a/drivers/input/keyboard/matrix_keypad.c
+++ b/drivers/input/keyboard/matrix_keypad.c
@@ -319,7 +319,6 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 	struct input_dev *input_dev;
 	unsigned short *keycodes;
 	unsigned int row_shift;
-	int i;
 	int err;
 
 	pdata = pdev->dev.platform_data;
@@ -363,18 +362,10 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev)
 
 	input_dev->keycode	= keycodes;
 	input_dev->keycodesize	= sizeof(*keycodes);
-	input_dev->keycodemax	= pdata->num_row_gpios << keypad->row_shift;
-
-	for (i = 0; i < keymap_data->keymap_size; i++) {
-		unsigned int key = keymap_data->keymap[i];
-		unsigned int row = KEY_ROW(key);
-		unsigned int col = KEY_COL(key);
-		unsigned short code = KEY_VAL(key);
+	input_dev->keycodemax	= pdata->num_row_gpios << row_shift;
 
-		keycodes[MATRIX_SCAN_CODE(row, col, row_shift)] = code;
-		__set_bit(code, input_dev->keybit);
-	}
-	__clear_bit(KEY_RESERVED, input_dev->keybit);
+	matrix_keypad_build_keymap(keymap_data, row_shift,
+				   input_dev->keycode, input_dev->keybit);
 
 	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
 	input_set_drvdata(input_dev, keypad);
diff --git a/drivers/input/keyboard/w90p910_keypad.c b/drivers/input/keyboard/w90p910_keypad.c
index b8598ae124ee..2d03dd0f9e07 100644
--- a/drivers/input/keyboard/w90p910_keypad.c
+++ b/drivers/input/keyboard/w90p910_keypad.c
@@ -126,7 +126,6 @@ static int __devinit w90p910_keypad_probe(struct platform_device *pdev)
 	struct resource *res;
 	int irq;
 	int error;
-	int i;
 
 	if (!pdata) {
 		dev_err(&pdev->dev, "no platform data defined\n");
@@ -197,19 +196,8 @@ static int __devinit w90p910_keypad_probe(struct platform_device *pdev)
 	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
 	input_set_capability(input_dev, EV_MSC, MSC_SCAN);
 
-	for (i = 0; i < keymap_data->keymap_size; i++) {
-		unsigned int key = keymap_data->keymap[i];
-		unsigned int row = KEY_ROW(key);
-		unsigned int col = KEY_COL(key);
-		unsigned short keycode = KEY_VAL(key);
-		unsigned int scancode = MATRIX_SCAN_CODE(row, col,
-							 W90P910_ROW_SHIFT);
-
-		keypad->keymap[scancode] = keycode;
-		__set_bit(keycode, input_dev->keybit);
-	}
-	__clear_bit(KEY_RESERVED, input_dev->keybit);
-
+	matrix_keypad_build_keymap(keymap_data, W90P910_ROW_SHIFT,
+				   input_dev->keycode, input_dev->keybit);
 
 	error = request_irq(keypad->irq, w90p910_keypad_irq_handler,
 			    IRQF_DISABLED, pdev->name, keypad);
diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h
index 15d5903af2dd..b3cd42d50e16 100644
--- a/include/linux/input/matrix_keypad.h
+++ b/include/linux/input/matrix_keypad.h
@@ -63,4 +63,36 @@ struct matrix_keypad_platform_data {
 	bool		wakeup;
 };
 
+/**
+ * matrix_keypad_build_keymap - convert platform keymap into matrix keymap
+ * @keymap_data: keymap supplied by the platform code
+ * @row_shift: number of bits to shift row value by to advance to the next
+ * line in the keymap
+ * @keymap: expanded version of keymap that is suitable for use by
+ * matrix keyboad driver
+ * @keybit: pointer to bitmap of keys supported by input device
+ *
+ * This function converts platform keymap (encoded with KEY() macro) into
+ * an array of keycodes that is suitable for using in a standard matrix
+ * keyboard driver that uses row and col as indices.
+ */
+static inline void
+matrix_keypad_build_keymap(const struct matrix_keymap_data *keymap_data,
+			   unsigned int row_shift,
+			   unsigned short *keymap, unsigned long *keybit)
+{
+	int i;
+
+	for (i = 0; i < keymap_data->keymap_size; i++) {
+		unsigned int key = keymap_data->keymap[i];
+		unsigned int row = KEY_ROW(key);
+		unsigned int col = KEY_COL(key);
+		unsigned short code = KEY_VAL(key);
+
+		keymap[MATRIX_SCAN_CODE(row, col, row_shift)] = code;
+		__set_bit(code, keybit);
+	}
+	__clear_bit(KEY_RESERVED, keybit);
+}
+
 #endif /* _MATRIX_KEYPAD_H */
-- 
cgit v1.2.3


From 9d8340687c524ce61e3c9c76758c4c81303acfc0 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 25 Aug 2009 19:24:14 -0700
Subject: Input: add twl4030_keypad driver

Add a driver for the keypad controller on TWL4030 family chips.
These support up to an 8x8 key matrix.  The TWL4030 multifunction
chips are mostly used on OMAP3 (or OMAP 2430) based boards.

[dtor@mail.ru: switch to matrix-keypad framework, fix changing
keymap from userspace]
Reviewed-by: Trilok Soni <soni.trilok@gmail.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig          |  11 +
 drivers/input/keyboard/Makefile         |   1 +
 drivers/input/keyboard/twl4030_keypad.c | 480 ++++++++++++++++++++++++++++++++
 include/linux/i2c/twl4030.h             |  19 +-
 4 files changed, 505 insertions(+), 6 deletions(-)
 create mode 100644 drivers/input/keyboard/twl4030_keypad.c

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index 50e407de8a78..3525c19be428 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -330,6 +330,17 @@ config KEYBOARD_OMAP
 	  To compile this driver as a module, choose M here: the
 	  module will be called omap-keypad.
 
+config KEYBOARD_TWL4030
+	tristate "TI TWL4030/TWL5030/TPS659x0 keypad support"
+	depends on TWL4030_CORE
+	help
+	  Say Y here if your board use the keypad controller on
+	  TWL4030 family chips.  It's safe to say enable this
+	  even on boards that don't use the keypad controller.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called twl4030_keypad.
+
 config KEYBOARD_TOSA
 	tristate "Tosa keyboard"
 	depends on MACH_TOSA
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 152303029203..8a7a22b30266 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -30,5 +30,6 @@ obj-$(CONFIG_KEYBOARD_SPITZ)		+= spitzkbd.o
 obj-$(CONFIG_KEYBOARD_STOWAWAY)		+= stowaway.o
 obj-$(CONFIG_KEYBOARD_SUNKBD)		+= sunkbd.o
 obj-$(CONFIG_KEYBOARD_TOSA)		+= tosakbd.o
+obj-$(CONFIG_KEYBOARD_TWL4030)		+= twl4030_keypad.o
 obj-$(CONFIG_KEYBOARD_XTKBD)		+= xtkbd.o
 obj-$(CONFIG_KEYBOARD_W90P910)		+= w90p910_keypad.o
diff --git a/drivers/input/keyboard/twl4030_keypad.c b/drivers/input/keyboard/twl4030_keypad.c
new file mode 100644
index 000000000000..9a2977c21696
--- /dev/null
+++ b/drivers/input/keyboard/twl4030_keypad.c
@@ -0,0 +1,480 @@
+/*
+ * twl4030_keypad.c - driver for 8x8 keypad controller in twl4030 chips
+ *
+ * Copyright (C) 2007 Texas Instruments, Inc.
+ * Copyright (C) 2008 Nokia Corporation
+ *
+ * Code re-written for 2430SDP by:
+ * Syed Mohammed Khasim <x0khasim@ti.com>
+ *
+ * Initial Code:
+ * Manjunatha G K <manjugk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/platform_device.h>
+#include <linux/i2c/twl4030.h>
+
+
+/*
+ * The TWL4030 family chips include a keypad controller that supports
+ * up to an 8x8 switch matrix.  The controller can issue system wakeup
+ * events, since it uses only the always-on 32KiHz oscillator, and has
+ * an internal state machine that decodes pressed keys, including
+ * multi-key combinations.
+ *
+ * This driver lets boards define what keycodes they wish to report for
+ * which scancodes, as part of the "struct twl4030_keypad_data" used in
+ * the probe() routine.
+ *
+ * See the TPS65950 documentation; that's the general availability
+ * version of the TWL5030 second generation part.
+ */
+#define TWL4030_MAX_ROWS	8	/* TWL4030 hard limit */
+#define TWL4030_MAX_COLS	8
+#define TWL4030_ROW_SHIFT	3
+#define TWL4030_KEYMAP_SIZE	(TWL4030_MAX_ROWS * TWL4030_MAX_COLS)
+
+struct twl4030_keypad {
+	unsigned short	keymap[TWL4030_KEYMAP_SIZE];
+	u16		kp_state[TWL4030_MAX_ROWS];
+	unsigned	n_rows;
+	unsigned	n_cols;
+	unsigned	irq;
+
+	struct device *dbg_dev;
+	struct input_dev *input;
+};
+
+/*----------------------------------------------------------------------*/
+
+/* arbitrary prescaler value 0..7 */
+#define PTV_PRESCALER			4
+
+/* Register Offsets */
+#define KEYP_CTRL			0x00
+#define KEYP_DEB			0x01
+#define KEYP_LONG_KEY			0x02
+#define KEYP_LK_PTV			0x03
+#define KEYP_TIMEOUT_L			0x04
+#define KEYP_TIMEOUT_H			0x05
+#define KEYP_KBC			0x06
+#define KEYP_KBR			0x07
+#define KEYP_SMS			0x08
+#define KEYP_FULL_CODE_7_0		0x09	/* row 0 column status */
+#define KEYP_FULL_CODE_15_8		0x0a	/* ... row 1 ... */
+#define KEYP_FULL_CODE_23_16		0x0b
+#define KEYP_FULL_CODE_31_24		0x0c
+#define KEYP_FULL_CODE_39_32		0x0d
+#define KEYP_FULL_CODE_47_40		0x0e
+#define KEYP_FULL_CODE_55_48		0x0f
+#define KEYP_FULL_CODE_63_56		0x10
+#define KEYP_ISR1			0x11
+#define KEYP_IMR1			0x12
+#define KEYP_ISR2			0x13
+#define KEYP_IMR2			0x14
+#define KEYP_SIR			0x15
+#define KEYP_EDR			0x16	/* edge triggers */
+#define KEYP_SIH_CTRL			0x17
+
+/* KEYP_CTRL_REG Fields */
+#define KEYP_CTRL_SOFT_NRST		BIT(0)
+#define KEYP_CTRL_SOFTMODEN		BIT(1)
+#define KEYP_CTRL_LK_EN			BIT(2)
+#define KEYP_CTRL_TOE_EN		BIT(3)
+#define KEYP_CTRL_TOLE_EN		BIT(4)
+#define KEYP_CTRL_RP_EN			BIT(5)
+#define KEYP_CTRL_KBD_ON		BIT(6)
+
+/* KEYP_DEB, KEYP_LONG_KEY, KEYP_TIMEOUT_x*/
+#define KEYP_PERIOD_US(t, prescale)	((t) / (31 << (prescale + 1)) - 1)
+
+/* KEYP_LK_PTV_REG Fields */
+#define KEYP_LK_PTV_PTV_SHIFT		5
+
+/* KEYP_{IMR,ISR,SIR} Fields */
+#define KEYP_IMR1_MIS			BIT(3)
+#define KEYP_IMR1_TO			BIT(2)
+#define KEYP_IMR1_LK			BIT(1)
+#define KEYP_IMR1_KP			BIT(0)
+
+/* KEYP_EDR Fields */
+#define KEYP_EDR_KP_FALLING		0x01
+#define KEYP_EDR_KP_RISING		0x02
+#define KEYP_EDR_KP_BOTH		0x03
+#define KEYP_EDR_LK_FALLING		0x04
+#define KEYP_EDR_LK_RISING		0x08
+#define KEYP_EDR_TO_FALLING		0x10
+#define KEYP_EDR_TO_RISING		0x20
+#define KEYP_EDR_MIS_FALLING		0x40
+#define KEYP_EDR_MIS_RISING		0x80
+
+
+/*----------------------------------------------------------------------*/
+
+static int twl4030_kpread(struct twl4030_keypad *kp,
+		u8 *data, u32 reg, u8 num_bytes)
+{
+	int ret = twl4030_i2c_read(TWL4030_MODULE_KEYPAD, data, reg, num_bytes);
+
+	if (ret < 0)
+		dev_warn(kp->dbg_dev,
+			"Couldn't read TWL4030: %X - ret %d[%x]\n",
+			 reg, ret, ret);
+
+	return ret;
+}
+
+static int twl4030_kpwrite_u8(struct twl4030_keypad *kp, u8 data, u32 reg)
+{
+	int ret = twl4030_i2c_write_u8(TWL4030_MODULE_KEYPAD, data, reg);
+
+	if (ret < 0)
+		dev_warn(kp->dbg_dev,
+			"Could not write TWL4030: %X - ret %d[%x]\n",
+			 reg, ret, ret);
+
+	return ret;
+}
+
+static inline u16 twl4030_col_xlate(struct twl4030_keypad *kp, u8 col)
+{
+	/* If all bits in a row are active for all coloumns then
+	 * we have that row line connected to gnd. Mark this
+	 * key on as if it was on matrix position n_cols (ie
+	 * one higher than the size of the matrix).
+	 */
+	if (col == 0xFF)
+		return 1 << kp->n_cols;
+	else
+		return col & ((1 << kp->n_cols) - 1);
+}
+
+static int twl4030_read_kp_matrix_state(struct twl4030_keypad *kp, u16 *state)
+{
+	u8 new_state[TWL4030_MAX_ROWS];
+	int row;
+	int ret = twl4030_kpread(kp, new_state,
+				 KEYP_FULL_CODE_7_0, kp->n_rows);
+	if (ret >= 0)
+		for (row = 0; row < kp->n_rows; row++)
+			state[row] = twl4030_col_xlate(kp, new_state[row]);
+
+	return ret;
+}
+
+static int twl4030_is_in_ghost_state(struct twl4030_keypad *kp, u16 *key_state)
+{
+	int i;
+	u16 check = 0;
+
+	for (i = 0; i < kp->n_rows; i++) {
+		u16 col = key_state[i];
+
+		if ((col & check) && hweight16(col) > 1)
+			return 1;
+
+		check |= col;
+	}
+
+	return 0;
+}
+
+static void twl4030_kp_scan(struct twl4030_keypad *kp, bool release_all)
+{
+	struct input_dev *input = kp->input;
+	u16 new_state[TWL4030_MAX_ROWS];
+	int col, row;
+
+	if (release_all)
+		memset(new_state, 0, sizeof(new_state));
+	else {
+		/* check for any changes */
+		int ret = twl4030_read_kp_matrix_state(kp, new_state);
+
+		if (ret < 0)	/* panic ... */
+			return;
+
+		if (twl4030_is_in_ghost_state(kp, new_state))
+			return;
+	}
+
+	/* check for changes and print those */
+	for (row = 0; row < kp->n_rows; row++) {
+		int changed = new_state[row] ^ kp->kp_state[row];
+
+		if (!changed)
+			continue;
+
+		for (col = 0; col < kp->n_cols; col++) {
+			int code;
+
+			if (!(changed & (1 << col)))
+				continue;
+
+			dev_dbg(kp->dbg_dev, "key [%d:%d] %s\n", row, col,
+				(new_state[row] & (1 << col)) ?
+				"press" : "release");
+
+			code = MATRIX_SCAN_CODE(row, col, TWL4030_ROW_SHIFT);
+			input_event(input, EV_MSC, MSC_SCAN, code);
+			input_report_key(input, kp->keymap[code],
+					 new_state[row] & (1 << col));
+		}
+		kp->kp_state[row] = new_state[row];
+	}
+	input_sync(input);
+}
+
+/*
+ * Keypad interrupt handler
+ */
+static irqreturn_t do_kp_irq(int irq, void *_kp)
+{
+	struct twl4030_keypad *kp = _kp;
+	u8 reg;
+	int ret;
+
+#ifdef CONFIG_LOCKDEP
+	/* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which
+	 * we don't want and can't tolerate.  Although it might be
+	 * friendlier not to borrow this thread context...
+	 */
+	local_irq_enable();
+#endif
+
+	/* Read & Clear TWL4030 pending interrupt */
+	ret = twl4030_kpread(kp, &reg, KEYP_ISR1, 1);
+
+	/* Release all keys if I2C has gone bad or
+	 * the KEYP has gone to idle state */
+	if (ret >= 0 && (reg & KEYP_IMR1_KP))
+		twl4030_kp_scan(kp, false);
+	else
+		twl4030_kp_scan(kp, true);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit twl4030_kp_program(struct twl4030_keypad *kp)
+{
+	u8 reg;
+	int i;
+
+	/* Enable controller, with hardware decoding but not autorepeat */
+	reg = KEYP_CTRL_SOFT_NRST | KEYP_CTRL_SOFTMODEN
+		| KEYP_CTRL_TOE_EN | KEYP_CTRL_KBD_ON;
+	if (twl4030_kpwrite_u8(kp, reg, KEYP_CTRL) < 0)
+		return -EIO;
+
+	/* NOTE:  we could use sih_setup() here to package keypad
+	 * event sources as four different IRQs ... but we don't.
+	 */
+
+	/* Enable TO rising and KP rising and falling edge detection */
+	reg = KEYP_EDR_KP_BOTH | KEYP_EDR_TO_RISING;
+	if (twl4030_kpwrite_u8(kp, reg, KEYP_EDR) < 0)
+		return -EIO;
+
+	/* Set PTV prescaler Field */
+	reg = (PTV_PRESCALER << KEYP_LK_PTV_PTV_SHIFT);
+	if (twl4030_kpwrite_u8(kp, reg, KEYP_LK_PTV) < 0)
+		return -EIO;
+
+	/* Set key debounce time to 20 ms */
+	i = KEYP_PERIOD_US(20000, PTV_PRESCALER);
+	if (twl4030_kpwrite_u8(kp, i, KEYP_DEB) < 0)
+		return -EIO;
+
+	/* Set timeout period to 100 ms */
+	i = KEYP_PERIOD_US(200000, PTV_PRESCALER);
+	if (twl4030_kpwrite_u8(kp, (i & 0xFF), KEYP_TIMEOUT_L) < 0)
+		return -EIO;
+
+	if (twl4030_kpwrite_u8(kp, (i >> 8), KEYP_TIMEOUT_H) < 0)
+		return -EIO;
+
+	/*
+	 * Enable Clear-on-Read; disable remembering events that fire
+	 * after the IRQ but before our handler acks (reads) them,
+	 */
+	reg = TWL4030_SIH_CTRL_COR_MASK | TWL4030_SIH_CTRL_PENDDIS_MASK;
+	if (twl4030_kpwrite_u8(kp, reg, KEYP_SIH_CTRL) < 0)
+		return -EIO;
+
+	/* initialize key state; irqs update it from here on */
+	if (twl4030_read_kp_matrix_state(kp, kp->kp_state) < 0)
+		return -EIO;
+
+	return 0;
+}
+
+/*
+ * Registers keypad device with input subsystem
+ * and configures TWL4030 keypad registers
+ */
+static int __devinit twl4030_kp_probe(struct platform_device *pdev)
+{
+	struct twl4030_keypad_data *pdata = pdev->dev.platform_data;
+	const struct matrix_keymap_data *keymap_data = pdata->keymap_data;
+	struct twl4030_keypad *kp;
+	struct input_dev *input;
+	u8 reg;
+	int error;
+
+	if (!pdata || !pdata->rows || !pdata->cols ||
+	    pdata->rows > TWL4030_MAX_ROWS || pdata->cols > TWL4030_MAX_COLS) {
+		dev_err(&pdev->dev, "Invalid platform_data\n");
+		return -EINVAL;
+	}
+
+	kp = kzalloc(sizeof(*kp), GFP_KERNEL);
+	input = input_allocate_device();
+	if (!kp || !input) {
+		error = -ENOMEM;
+		goto err1;
+	}
+
+	/* Get the debug Device */
+	kp->dbg_dev = &pdev->dev;
+	kp->input = input;
+
+	kp->n_rows = pdata->rows;
+	kp->n_cols = pdata->cols;
+	kp->irq = platform_get_irq(pdev, 0);
+
+	/* setup input device */
+	__set_bit(EV_KEY, input->evbit);
+
+	/* Enable auto repeat feature of Linux input subsystem */
+	if (pdata->rep)
+		__set_bit(EV_REP, input->evbit);
+
+	input_set_capability(input, EV_MSC, MSC_SCAN);
+
+	input->name		= "TWL4030 Keypad";
+	input->phys		= "twl4030_keypad/input0";
+	input->dev.parent	= &pdev->dev;
+
+	input->id.bustype	= BUS_HOST;
+	input->id.vendor	= 0x0001;
+	input->id.product	= 0x0001;
+	input->id.version	= 0x0003;
+
+	input->keycode		= kp->keymap;
+	input->keycodesize	= sizeof(kp->keymap[0]);
+	input->keycodemax	= ARRAY_SIZE(kp->keymap);
+
+	matrix_keypad_build_keymap(keymap_data, TWL4030_ROW_SHIFT,
+				   input->keycode, input->keybit);
+
+	error = input_register_device(input);
+	if (error) {
+		dev_err(kp->dbg_dev,
+			"Unable to register twl4030 keypad device\n");
+		goto err1;
+	}
+
+	error = twl4030_kp_program(kp);
+	if (error)
+		goto err2;
+
+	/*
+	 * This ISR will always execute in kernel thread context because of
+	 * the need to access the TWL4030 over the I2C bus.
+	 *
+	 * NOTE:  we assume this host is wired to TWL4040 INT1, not INT2 ...
+	 */
+	error = request_irq(kp->irq, do_kp_irq, 0, pdev->name, kp);
+	if (error) {
+		dev_info(kp->dbg_dev, "request_irq failed for irq no=%d\n",
+			kp->irq);
+		goto err3;
+	}
+
+	/* Enable KP and TO interrupts now. */
+	reg = (u8) ~(KEYP_IMR1_KP | KEYP_IMR1_TO);
+	if (twl4030_kpwrite_u8(kp, reg, KEYP_IMR1)) {
+		error = -EIO;
+		goto err4;
+	}
+
+	platform_set_drvdata(pdev, kp);
+	return 0;
+
+err4:
+	/* mask all events - we don't care about the result */
+	(void) twl4030_kpwrite_u8(kp, 0xff, KEYP_IMR1);
+err3:
+	free_irq(kp->irq, NULL);
+err2:
+	input_unregister_device(input);
+	input = NULL;
+err1:
+	input_free_device(input);
+	kfree(kp);
+	return error;
+}
+
+static int __devexit twl4030_kp_remove(struct platform_device *pdev)
+{
+	struct twl4030_keypad *kp = platform_get_drvdata(pdev);
+
+	free_irq(kp->irq, kp);
+	input_unregister_device(kp->input);
+	platform_set_drvdata(pdev, NULL);
+	kfree(kp);
+
+	return 0;
+}
+
+/*
+ * NOTE: twl4030 are multi-function devices connected via I2C.
+ * So this device is a child of an I2C parent, thus it needs to
+ * support unplug/replug (which most platform devices don't).
+ */
+
+static struct platform_driver twl4030_kp_driver = {
+	.probe		= twl4030_kp_probe,
+	.remove		= __devexit_p(twl4030_kp_remove),
+	.driver		= {
+		.name	= "twl4030_keypad",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init twl4030_kp_init(void)
+{
+	return platform_driver_register(&twl4030_kp_driver);
+}
+module_init(twl4030_kp_init);
+
+static void __exit twl4030_kp_exit(void)
+{
+	platform_driver_unregister(&twl4030_kp_driver);
+}
+module_exit(twl4030_kp_exit);
+
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TWL4030 Keypad Driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:twl4030_keypad");
+
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 0dc80ef24975..3fd21d7cb6bf 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -25,6 +25,9 @@
 #ifndef __TWL4030_H_
 #define __TWL4030_H_
 
+#include <linux/types.h>
+#include <linux/input/matrix_keypad.h>
+
 /*
  * Using the twl4030 core we address registers using a pair
  *	{ module id, relative register offset }
@@ -302,13 +305,17 @@ struct twl4030_madc_platform_data {
 	int		irq_line;
 };
 
+/* Boards have uniqe mappings of {col, row} --> keycode.
+ * Column and row are 4 bits, but range only from 0..7.
+ * a PERSISTENT_KEY is "always on" and never reported.
+ */
+#define PERSISTENT_KEY(c, r)	KEY((c), (r), KEY_RESERVED)
+
 struct twl4030_keypad_data {
-	int rows;
-	int cols;
-	int *keymap;
-	int irq;
-	unsigned int keymapsize;
-	unsigned int rep:1;
+	const struct matrix_keymap_data *keymap_data;
+	unsigned rows;
+	unsigned cols;
+	bool rep;
 };
 
 enum twl4030_usb_mode {
-- 
cgit v1.2.3


From cf0baf16c3a3b3dd67ea3df346479032ab10e988 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 28 Aug 2009 07:22:05 +0200
Subject: ALSA: Fixed a typo of printk()

Fixed a silly typo of printk() included in the previous patch...

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index a89728db5584..1ec992b3f1d0 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -348,7 +348,7 @@ void __snd_printk(unsigned int level, const char *file, int line,
      __attribute__ ((format (printf, 4, 5)));
 #else
 #define __snd_printk(level, file, line, format, args...) \
-	prinkt(format, ##args)
+	printk(format, ##args)
 #endif
 
 /**
-- 
cgit v1.2.3


From 0dd7b74787eaf7858c6c573353a83c3e2766e674 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 28 Aug 2009 00:50:06 +0200
Subject: tracing: Fix double CPP substitution in TRACE_EVENT_FN

TRACE_EVENT_FN relays on TRACE_EVENT by reprocessing its parameters
into the ftrace events CPP macro. This leads to a double substitution
in some cases.

For example, a bad consequence is a format always prefixed by
"%s, %s\n" for every TRACE_EVENT_FN based events.

Eg:
	cat /debug/tracing/events/syscalls/sys_enter/format
	[...]
	print fmt: "%s, %s\n", "\"NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)\"",\
	"REC->id, REC->args[0], REC->args[1], REC->args[2], REC->args[3],\
	REC->args[4], REC->args[5]"

This creates a failure in post-processing tools such as perf trace or
trace-cmd.

Then drop this double substitution and replace it by a new __cpparg()
macro that relays CPP arguments containing commas.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Josh Stone <jistone@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <srostedt@redhat.com>
Cc: Jason Baron <jbaron@redhat.com>
LKML-Reference: <1251413406-6704-1-git-send-email-fweisbec@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/ftrace.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 360a77ad79e1..57c56a998ee6 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -45,14 +45,15 @@
 	};							\
 	static struct ftrace_event_call event_##name
 
+#undef __cpparg
+#define __cpparg(arg...) arg
+
 /* Callbacks are meaningless to ftrace. */
 #undef TRACE_EVENT_FN
-#define TRACE_EVENT_FN(name, proto, args, tstruct,		\
-		assign, print, reg, unreg)			\
-	TRACE_EVENT(name, TP_PROTO(proto), TP_ARGS(args),	\
-		TP_STRUCT__entry(tstruct),			\
-		TP_fast_assign(assign),				\
-		TP_printk(print))
+#define TRACE_EVENT_FN(name, proto, args, tstruct,			\
+		assign, print, reg, unreg)				\
+	TRACE_EVENT(name, __cpparg(proto), __cpparg(args),		\
+		__cpparg(tstruct), __cpparg(assign), __cpparg(print))	\
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
-- 
cgit v1.2.3


From 468de9e54a900559b55aa939a4daeaea1915e572 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 27 Aug 2009 12:07:40 -0400
Subject: nfsd41: expand solo sequence check

Compounds consisting of only a sequence operation don't need any
additional caching beyond the sequence information we store in the slot
entry.  Fix nfsd4_is_solo_sequence to identify this case correctly.

The additional check for a failed sequence in nfsd4_store_cache_entry()
is redundant, since the nfsd4_is_solo_sequence call lower down catches
this case.

The final ce_cachethis set in nfsd4_sequence is also redundant.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c       | 9 ---------
 include/linux/nfsd/xdr4.h | 2 +-
 2 files changed, 1 insertion(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5f634d24861c..b44a2cfde6f1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -991,16 +991,10 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
 	struct svc_rqst *rqstp = resp->rqstp;
-	struct nfsd4_compoundargs *args = rqstp->rq_argp;
-	struct nfsd4_op *op = &args->ops[resp->opcnt];
 	struct kvec *resv = &rqstp->rq_res.head[0];
 
 	dprintk("--> %s entry %p\n", __func__, entry);
 
-	/* Don't cache a failed OP_SEQUENCE. */
-	if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status)
-		return;
-
 	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
 	entry->ce_opcnt = resp->opcnt;
 	entry->ce_status = resp->cstate.status;
@@ -1490,9 +1484,6 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
 	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
-	/* Always set the cache entry cachethis for solo sequence */
-	if (nfsd4_is_solo_sequence(resp))
-		slot->sl_cache_entry.ce_cachethis = 1;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 5e4beb0deb80..3f716607c86d 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -467,7 +467,7 @@ struct nfsd4_compoundres {
 static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 {
 	struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
-	return args->opcnt == 1;
+	return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
 }
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
-- 
cgit v1.2.3


From 7285dd7fd375763bfb8ab1ac9cf3f1206f503c16 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 28 Aug 2009 20:25:24 +0200
Subject: clocksource: Resolve cpu hotplug dead lock with TSC unstable

Martin Schwidefsky analyzed it:
To register a clocksource the clocksource_mutex is acquired and if
necessary timekeeping_notify is called to install the clocksource as
the timekeeper clock. timekeeping_notify uses stop_machine which needs
to take cpu_add_remove_lock mutex.
Starting a new cpu is done with the cpu_add_remove_lock mutex held.
native_cpu_up checks the tsc of the new cpu and if the tsc is no good
clocksource_change_rating is called. Which needs the clocksource_mutex
and the deadlock is complete.

The solution is to replace the TSC via the clocksource watchdog
mechanism. Mark the TSC as unstable and schedule the watchdog work so
it gets removed in the watchdog thread context.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: John Stultz <johnstul@us.ibm.com>
---
 arch/x86/kernel/tsc.c       |  8 +++++---
 include/linux/clocksource.h |  1 +
 kernel/time/clocksource.c   | 33 ++++++++++++++++++++++++++++++---
 3 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 968425422c46..fc3672a303d6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -767,12 +767,14 @@ void mark_tsc_unstable(char *reason)
 {
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
-		printk("Marking TSC unstable due to %s\n", reason);
+		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
 		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
+			clocksource_mark_unstable(&clocksource_tsc);
+		else {
+			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
 			clocksource_tsc.rating = 0;
+		}
 	}
 }
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 9ea40ff26f0e..83d2fbd81b93 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -277,6 +277,7 @@ extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_resume(void);
 extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern void clocksource_mark_unstable(struct clocksource *cs);
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL
 extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index e0c86ad6e9fb..a0af4ffcb6e5 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -149,15 +149,42 @@ static void clocksource_watchdog_work(struct work_struct *work)
 	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
+static void __clocksource_unstable(struct clocksource *cs)
 {
-	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-	       cs->name, delta);
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
 	schedule_work(&watchdog_work);
 }
 
+static void clocksource_unstable(struct clocksource *cs, int64_t delta)
+{
+	printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
+	       cs->name, delta);
+	__clocksource_unstable(cs);
+}
+
+/**
+ * clocksource_mark_unstable - mark clocksource unstable via watchdog
+ * @cs:		clocksource to be marked unstable
+ *
+ * This function is called instead of clocksource_change_rating from
+ * cpu hotplug code to avoid a deadlock between the clocksource mutex
+ * and the cpu hotplug mutex. It defers the update of the clocksource
+ * to the watchdog thread.
+ */
+void clocksource_mark_unstable(struct clocksource *cs)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&watchdog_lock, flags);
+	if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
+		if (list_empty(&cs->wd_list))
+			list_add(&cs->wd_list, &watchdog_list);
+		__clocksource_unstable(cs);
+	}
+	spin_unlock_irqrestore(&watchdog_lock, flags);
+}
+
 static void clocksource_watchdog(unsigned long data)
 {
 	struct clocksource *cs;
-- 
cgit v1.2.3


From 9e03fdfd05e733e1136d431973625b174029c5e6 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Thu, 20 Aug 2009 09:21:45 -0700
Subject: mac80211: Update mesh config IE to 11s draft 3.02

The mesh config information element has changed significantly since draft 1.08
This patch brings it up to date.

Thanks to Sam Leffler and Rui Paulo for identifying this.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  2 +-
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mesh.c        | 49 ++++++++++++++++++++++++++++++++--------------
 3 files changed, 39 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 21556a2d9e7e..52e15e079c61 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -115,7 +115,7 @@
 #define IEEE80211_MAX_SSID_LEN		32
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
-#define IEEE80211_MESH_CONFIG_LEN	19
+#define IEEE80211_MESH_CONFIG_LEN	24
 
 #define IEEE80211_QOS_CTL_LEN		2
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 93e618a980d1..455cc7ade9f5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -367,6 +367,10 @@ struct ieee80211_if_mesh {
 	u8 mesh_pm_id[4];
 	/* Congestion Control Mode Identifier */
 	u8 mesh_cc_id[4];
+	/* Synchronization Protocol Identifier */
+	u8 mesh_sp_id[4];
+	/* Authentication Protocol Identifier */
+	u8 mesh_auth_id[4];
 	/* Local mesh Destination Sequence Number */
 	u32 dsn;
 	/* Last used PREQ ID */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 3185e18c8214..f7364e56f1ee 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -18,8 +18,11 @@
 #define PP_OFFSET 	1		/* Path Selection Protocol */
 #define PM_OFFSET	5		/* Path Selection Metric   */
 #define CC_OFFSET	9		/* Congestion Control Mode */
-#define CAPAB_OFFSET 17
-#define ACCEPT_PLINKS 0x80
+#define SP_OFFSET	13		/* Synchronization Protocol */
+#define AUTH_OFFSET	17		/* Authentication Protocol */
+#define CAPAB_OFFSET 	22
+#define CAPAB_ACCEPT_PLINKS 0x80
+#define CAPAB_FORWARDING    0x10
 
 #define TMR_RUNNING_HK	0
 #define TMR_RUNNING_MP	1
@@ -84,7 +87,9 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
 		memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 &&
 		memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 &&
 		memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 &&
-		memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0)
+		memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0 &&
+		memcmp(ifmsh->mesh_sp_id, ie->mesh_config + SP_OFFSET, 4) == 0 &&
+		memcmp(ifmsh->mesh_auth_id, ie->mesh_config + AUTH_OFFSET, 4) == 0)
 		return true;
 
 	return false;
@@ -97,7 +102,7 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_dat
  */
 bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
 {
-	return (*(ie->mesh_config + CAPAB_OFFSET) & ACCEPT_PLINKS) != 0;
+	return (*(ie->mesh_config + CAPAB_OFFSET) & CAPAB_ACCEPT_PLINKS) != 0;
 }
 
 /**
@@ -123,11 +128,18 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
 
 void mesh_ids_set_default(struct ieee80211_if_mesh *sta)
 {
-	u8 def_id[4] = {0x00, 0x0F, 0xAC, 0xff};
-
-	memcpy(sta->mesh_pp_id, def_id, 4);
-	memcpy(sta->mesh_pm_id, def_id, 4);
-	memcpy(sta->mesh_cc_id, def_id, 4);
+	u8 oui[3] = {0x00, 0x0F, 0xAC};
+
+	memcpy(sta->mesh_pp_id, oui, sizeof(oui));
+	memcpy(sta->mesh_pm_id, oui, sizeof(oui));
+	memcpy(sta->mesh_cc_id, oui, sizeof(oui));
+	memcpy(sta->mesh_sp_id, oui, sizeof(oui));
+	memcpy(sta->mesh_auth_id, oui, sizeof(oui));
+	sta->mesh_pp_id[sizeof(oui)] = 0;
+	sta->mesh_pm_id[sizeof(oui)] = 0;
+	sta->mesh_cc_id[sizeof(oui)] = 0xff;
+	sta->mesh_sp_id[sizeof(oui)] = 0xff;
+	sta->mesh_auth_id[sizeof(oui)] = 0x0;
 }
 
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata)
@@ -245,7 +257,7 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	if (sdata->u.mesh.mesh_id_len)
 		memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len);
 
-	pos = skb_put(skb, 21);
+	pos = skb_put(skb, 2 + IEEE80211_MESH_CONFIG_LEN);
 	*pos++ = WLAN_EID_MESH_CONFIG;
 	*pos++ = IEEE80211_MESH_CONFIG_LEN;
 	/* Version */
@@ -263,15 +275,22 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	memcpy(pos, sdata->u.mesh.mesh_cc_id, 4);
 	pos += 4;
 
-	/* Channel precedence:
-	 * Not running simple channel unification protocol
-	 */
-	memset(pos, 0x00, 4);
+	/* Synchronization protocol identifier */
+	memcpy(pos, sdata->u.mesh.mesh_sp_id, 4);
 	pos += 4;
 
+	/* Authentication Protocol identifier */
+	memcpy(pos, sdata->u.mesh.mesh_auth_id, 4);
+	pos += 4;
+
+	/* Mesh Formation Info */
+	memset(pos, 0x00, 1);
+	pos += 1;
+
 	/* Mesh capability */
 	sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata);
-	*pos++ = sdata->u.mesh.accepting_plinks ? ACCEPT_PLINKS : 0x00;
+	*pos = CAPAB_FORWARDING;
+	*pos++ |= sdata->u.mesh.accepting_plinks ? CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
 
 	return;
-- 
cgit v1.2.3


From b0a4e7d8a291de63f35b04464de9ab4a83d38a7c Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 20 Aug 2009 14:48:03 -0400
Subject: libipw: switch from ieee80211_* to libipw_* naming policy

This eliminates the dual definition of ieee80211_channel (and possibly
others), further clarifying who defines what and paving the way for
inclusion of cfg80211.h.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ipw2x00/ieee80211.h     | 1087 --------------------------
 drivers/net/wireless/ipw2x00/ipw2100.c       |  198 ++---
 drivers/net/wireless/ipw2x00/ipw2100.h       |   14 +-
 drivers/net/wireless/ipw2x00/ipw2200.c       |  902 ++++++++++-----------
 drivers/net/wireless/ipw2x00/ipw2200.h       |   14 +-
 drivers/net/wireless/ipw2x00/libipw.h        | 1087 ++++++++++++++++++++++++++
 drivers/net/wireless/ipw2x00/libipw_geo.c    |   80 +-
 drivers/net/wireless/ipw2x00/libipw_module.c |   80 +-
 drivers/net/wireless/ipw2x00/libipw_rx.c     |  403 +++++-----
 drivers/net/wireless/ipw2x00/libipw_tx.c     |   68 +-
 drivers/net/wireless/ipw2x00/libipw_wx.c     |   92 +--
 include/net/iw_handler.h                     |    6 +-
 12 files changed, 2016 insertions(+), 2015 deletions(-)
 delete mode 100644 drivers/net/wireless/ipw2x00/ieee80211.h
 create mode 100644 drivers/net/wireless/ipw2x00/libipw.h

(limited to 'include')

diff --git a/drivers/net/wireless/ipw2x00/ieee80211.h b/drivers/net/wireless/ipw2x00/ieee80211.h
deleted file mode 100644
index 70755c1336d5..000000000000
--- a/drivers/net/wireless/ipw2x00/ieee80211.h
+++ /dev/null
@@ -1,1087 +0,0 @@
-/*
- * Merged with mainline ieee80211.h in Aug 2004.  Original ieee802_11
- * remains copyright by the original authors
- *
- * Portions of the merged code are based on Host AP (software wireless
- * LAN access point) driver for Intersil Prism2/2.5/3.
- *
- * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
- * <j@w1.fi>
- * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
- *
- * Adaption to a generic IEEE 802.11 stack by James Ketrenos
- * <jketreno@linux.intel.com>
- * Copyright (c) 2004-2005, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation. See README and COPYING for
- * more details.
- *
- * API Version History
- * 1.0.x -- Initial version
- * 1.1.x -- Added radiotap, QoS, TIM, ieee80211_geo APIs,
- *          various structure changes, and crypto API init method
- */
-#ifndef IEEE80211_H
-#define IEEE80211_H
-#include <linux/if_ether.h>	/* ETH_ALEN */
-#include <linux/kernel.h>	/* ARRAY_SIZE */
-#include <linux/wireless.h>
-#include <linux/ieee80211.h>
-
-#include <net/lib80211.h>
-
-#define IEEE80211_VERSION "git-1.1.13"
-
-#define IEEE80211_DATA_LEN		2304
-/* Maximum size for the MA-UNITDATA primitive, 802.11 standard section
-   6.2.1.1.2.
-
-   The figure in section 7.1.2 suggests a body size of up to 2312
-   bytes is allowed, which is a bit confusing, I suspect this
-   represents the 2304 bytes of real data, plus a possible 8 bytes of
-   WEP IV and ICV. (this interpretation suggested by Ramiro Barreiro) */
-
-#define IEEE80211_1ADDR_LEN 10
-#define IEEE80211_2ADDR_LEN 16
-#define IEEE80211_3ADDR_LEN 24
-#define IEEE80211_4ADDR_LEN 30
-#define IEEE80211_FCS_LEN    4
-#define IEEE80211_HLEN			(IEEE80211_4ADDR_LEN)
-#define IEEE80211_FRAME_LEN		(IEEE80211_DATA_LEN + IEEE80211_HLEN)
-
-#define MIN_FRAG_THRESHOLD     256U
-#define	MAX_FRAG_THRESHOLD     2346U
-
-/* QOS control */
-#define IEEE80211_QCTL_TID		0x000F
-
-/* debug macros */
-
-#ifdef CONFIG_LIBIPW_DEBUG
-extern u32 ieee80211_debug_level;
-#define IEEE80211_DEBUG(level, fmt, args...) \
-do { if (ieee80211_debug_level & (level)) \
-  printk(KERN_DEBUG "ieee80211: %c %s " fmt, \
-         in_interrupt() ? 'I' : 'U', __func__ , ## args); } while (0)
-static inline bool ieee80211_ratelimit_debug(u32 level)
-{
-	return (ieee80211_debug_level & level) && net_ratelimit();
-}
-#else
-#define IEEE80211_DEBUG(level, fmt, args...) do {} while (0)
-static inline bool ieee80211_ratelimit_debug(u32 level)
-{
-	return false;
-}
-#endif				/* CONFIG_LIBIPW_DEBUG */
-
-/*
- * To use the debug system:
- *
- * If you are defining a new debug classification, simply add it to the #define
- * list here in the form of:
- *
- * #define IEEE80211_DL_xxxx VALUE
- *
- * shifting value to the left one bit from the previous entry.  xxxx should be
- * the name of the classification (for example, WEP)
- *
- * You then need to either add a IEEE80211_xxxx_DEBUG() macro definition for your
- * classification, or use IEEE80211_DEBUG(IEEE80211_DL_xxxx, ...) whenever you want
- * to send output to that classification.
- *
- * To add your debug level to the list of levels seen when you perform
- *
- * % cat /proc/net/ieee80211/debug_level
- *
- * you simply need to add your entry to the ieee80211_debug_level array.
- *
- * If you do not see debug_level in /proc/net/ieee80211 then you do not have
- * CONFIG_LIBIPW_DEBUG defined in your kernel configuration
- *
- */
-
-#define IEEE80211_DL_INFO          (1<<0)
-#define IEEE80211_DL_WX            (1<<1)
-#define IEEE80211_DL_SCAN          (1<<2)
-#define IEEE80211_DL_STATE         (1<<3)
-#define IEEE80211_DL_MGMT          (1<<4)
-#define IEEE80211_DL_FRAG          (1<<5)
-#define IEEE80211_DL_DROP          (1<<7)
-
-#define IEEE80211_DL_TX            (1<<8)
-#define IEEE80211_DL_RX            (1<<9)
-#define IEEE80211_DL_QOS           (1<<31)
-
-#define IEEE80211_ERROR(f, a...) printk(KERN_ERR "ieee80211: " f, ## a)
-#define IEEE80211_WARNING(f, a...) printk(KERN_WARNING "ieee80211: " f, ## a)
-#define IEEE80211_DEBUG_INFO(f, a...)   IEEE80211_DEBUG(IEEE80211_DL_INFO, f, ## a)
-
-#define IEEE80211_DEBUG_WX(f, a...)     IEEE80211_DEBUG(IEEE80211_DL_WX, f, ## a)
-#define IEEE80211_DEBUG_SCAN(f, a...)   IEEE80211_DEBUG(IEEE80211_DL_SCAN, f, ## a)
-#define IEEE80211_DEBUG_STATE(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_STATE, f, ## a)
-#define IEEE80211_DEBUG_MGMT(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_MGMT, f, ## a)
-#define IEEE80211_DEBUG_FRAG(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_FRAG, f, ## a)
-#define IEEE80211_DEBUG_DROP(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_DROP, f, ## a)
-#define IEEE80211_DEBUG_TX(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_TX, f, ## a)
-#define IEEE80211_DEBUG_RX(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_RX, f, ## a)
-#define IEEE80211_DEBUG_QOS(f, a...)  IEEE80211_DEBUG(IEEE80211_DL_QOS, f, ## a)
-#include <linux/netdevice.h>
-#include <linux/if_arp.h>	/* ARPHRD_ETHER */
-
-#ifndef WIRELESS_SPY
-#define WIRELESS_SPY		/* enable iwspy support */
-#endif
-#include <net/iw_handler.h>	/* new driver API */
-
-#define ETH_P_PREAUTH 0x88C7	/* IEEE 802.11i pre-authentication */
-
-#ifndef ETH_P_80211_RAW
-#define ETH_P_80211_RAW (ETH_P_ECONET + 1)
-#endif
-
-/* IEEE 802.11 defines */
-
-#define P80211_OUI_LEN 3
-
-struct ieee80211_snap_hdr {
-
-	u8 dsap;		/* always 0xAA */
-	u8 ssap;		/* always 0xAA */
-	u8 ctrl;		/* always 0x03 */
-	u8 oui[P80211_OUI_LEN];	/* organizational universal id */
-
-} __attribute__ ((packed));
-
-#define SNAP_SIZE sizeof(struct ieee80211_snap_hdr)
-
-#define WLAN_FC_GET_VERS(fc) ((fc) & IEEE80211_FCTL_VERS)
-#define WLAN_FC_GET_TYPE(fc) ((fc) & IEEE80211_FCTL_FTYPE)
-#define WLAN_FC_GET_STYPE(fc) ((fc) & IEEE80211_FCTL_STYPE)
-
-#define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG)
-#define WLAN_GET_SEQ_SEQ(seq)  (((seq) & IEEE80211_SCTL_SEQ) >> 4)
-
-#define IEEE80211_STATMASK_SIGNAL (1<<0)
-#define IEEE80211_STATMASK_RSSI (1<<1)
-#define IEEE80211_STATMASK_NOISE (1<<2)
-#define IEEE80211_STATMASK_RATE (1<<3)
-#define IEEE80211_STATMASK_WEMASK 0x7
-
-#define IEEE80211_CCK_MODULATION    (1<<0)
-#define IEEE80211_OFDM_MODULATION   (1<<1)
-
-#define IEEE80211_24GHZ_BAND     (1<<0)
-#define IEEE80211_52GHZ_BAND     (1<<1)
-
-#define IEEE80211_CCK_RATE_1MB		        0x02
-#define IEEE80211_CCK_RATE_2MB		        0x04
-#define IEEE80211_CCK_RATE_5MB		        0x0B
-#define IEEE80211_CCK_RATE_11MB		        0x16
-#define IEEE80211_OFDM_RATE_6MB		        0x0C
-#define IEEE80211_OFDM_RATE_9MB		        0x12
-#define IEEE80211_OFDM_RATE_12MB		0x18
-#define IEEE80211_OFDM_RATE_18MB		0x24
-#define IEEE80211_OFDM_RATE_24MB		0x30
-#define IEEE80211_OFDM_RATE_36MB		0x48
-#define IEEE80211_OFDM_RATE_48MB		0x60
-#define IEEE80211_OFDM_RATE_54MB		0x6C
-#define IEEE80211_BASIC_RATE_MASK		0x80
-
-#define IEEE80211_CCK_RATE_1MB_MASK		(1<<0)
-#define IEEE80211_CCK_RATE_2MB_MASK		(1<<1)
-#define IEEE80211_CCK_RATE_5MB_MASK		(1<<2)
-#define IEEE80211_CCK_RATE_11MB_MASK		(1<<3)
-#define IEEE80211_OFDM_RATE_6MB_MASK		(1<<4)
-#define IEEE80211_OFDM_RATE_9MB_MASK		(1<<5)
-#define IEEE80211_OFDM_RATE_12MB_MASK		(1<<6)
-#define IEEE80211_OFDM_RATE_18MB_MASK		(1<<7)
-#define IEEE80211_OFDM_RATE_24MB_MASK		(1<<8)
-#define IEEE80211_OFDM_RATE_36MB_MASK		(1<<9)
-#define IEEE80211_OFDM_RATE_48MB_MASK		(1<<10)
-#define IEEE80211_OFDM_RATE_54MB_MASK		(1<<11)
-
-#define IEEE80211_CCK_RATES_MASK	        0x0000000F
-#define IEEE80211_CCK_BASIC_RATES_MASK	(IEEE80211_CCK_RATE_1MB_MASK | \
-	IEEE80211_CCK_RATE_2MB_MASK)
-#define IEEE80211_CCK_DEFAULT_RATES_MASK	(IEEE80211_CCK_BASIC_RATES_MASK | \
-        IEEE80211_CCK_RATE_5MB_MASK | \
-        IEEE80211_CCK_RATE_11MB_MASK)
-
-#define IEEE80211_OFDM_RATES_MASK		0x00000FF0
-#define IEEE80211_OFDM_BASIC_RATES_MASK	(IEEE80211_OFDM_RATE_6MB_MASK | \
-	IEEE80211_OFDM_RATE_12MB_MASK | \
-	IEEE80211_OFDM_RATE_24MB_MASK)
-#define IEEE80211_OFDM_DEFAULT_RATES_MASK	(IEEE80211_OFDM_BASIC_RATES_MASK | \
-	IEEE80211_OFDM_RATE_9MB_MASK  | \
-	IEEE80211_OFDM_RATE_18MB_MASK | \
-	IEEE80211_OFDM_RATE_36MB_MASK | \
-	IEEE80211_OFDM_RATE_48MB_MASK | \
-	IEEE80211_OFDM_RATE_54MB_MASK)
-#define IEEE80211_DEFAULT_RATES_MASK (IEEE80211_OFDM_DEFAULT_RATES_MASK | \
-                                IEEE80211_CCK_DEFAULT_RATES_MASK)
-
-#define IEEE80211_NUM_OFDM_RATES	    8
-#define IEEE80211_NUM_CCK_RATES	            4
-#define IEEE80211_OFDM_SHIFT_MASK_A         4
-
-/* NOTE: This data is for statistical purposes; not all hardware provides this
- *       information for frames received.
- *       For ieee80211_rx_mgt, you need to set at least the 'len' parameter.
- */
-struct ieee80211_rx_stats {
-	u32 mac_time;
-	s8 rssi;
-	u8 signal;
-	u8 noise;
-	u16 rate;		/* in 100 kbps */
-	u8 received_channel;
-	u8 control;
-	u8 mask;
-	u8 freq;
-	u16 len;
-	u64 tsf;
-	u32 beacon_time;
-};
-
-/* IEEE 802.11 requires that STA supports concurrent reception of at least
- * three fragmented frames. This define can be increased to support more
- * concurrent frames, but it should be noted that each entry can consume about
- * 2 kB of RAM and increasing cache size will slow down frame reassembly. */
-#define IEEE80211_FRAG_CACHE_LEN 4
-
-struct ieee80211_frag_entry {
-	unsigned long first_frag_time;
-	unsigned int seq;
-	unsigned int last_frag;
-	struct sk_buff *skb;
-	u8 src_addr[ETH_ALEN];
-	u8 dst_addr[ETH_ALEN];
-};
-
-struct ieee80211_stats {
-	unsigned int tx_unicast_frames;
-	unsigned int tx_multicast_frames;
-	unsigned int tx_fragments;
-	unsigned int tx_unicast_octets;
-	unsigned int tx_multicast_octets;
-	unsigned int tx_deferred_transmissions;
-	unsigned int tx_single_retry_frames;
-	unsigned int tx_multiple_retry_frames;
-	unsigned int tx_retry_limit_exceeded;
-	unsigned int tx_discards;
-	unsigned int rx_unicast_frames;
-	unsigned int rx_multicast_frames;
-	unsigned int rx_fragments;
-	unsigned int rx_unicast_octets;
-	unsigned int rx_multicast_octets;
-	unsigned int rx_fcs_errors;
-	unsigned int rx_discards_no_buffer;
-	unsigned int tx_discards_wrong_sa;
-	unsigned int rx_discards_undecryptable;
-	unsigned int rx_message_in_msg_fragments;
-	unsigned int rx_message_in_bad_msg_fragments;
-};
-
-struct ieee80211_device;
-
-#define SEC_KEY_1		(1<<0)
-#define SEC_KEY_2		(1<<1)
-#define SEC_KEY_3		(1<<2)
-#define SEC_KEY_4		(1<<3)
-#define SEC_ACTIVE_KEY		(1<<4)
-#define SEC_AUTH_MODE		(1<<5)
-#define SEC_UNICAST_GROUP	(1<<6)
-#define SEC_LEVEL		(1<<7)
-#define SEC_ENABLED		(1<<8)
-#define SEC_ENCRYPT		(1<<9)
-
-#define SEC_LEVEL_0		0	/* None */
-#define SEC_LEVEL_1		1	/* WEP 40 and 104 bit */
-#define SEC_LEVEL_2		2	/* Level 1 + TKIP */
-#define SEC_LEVEL_2_CKIP	3	/* Level 1 + CKIP */
-#define SEC_LEVEL_3		4	/* Level 2 + CCMP */
-
-#define SEC_ALG_NONE		0
-#define SEC_ALG_WEP		1
-#define SEC_ALG_TKIP		2
-#define SEC_ALG_CCMP		3
-
-#define WEP_KEYS		4
-#define WEP_KEY_LEN		13
-#define SCM_KEY_LEN		32
-#define SCM_TEMPORAL_KEY_LENGTH	16
-
-struct ieee80211_security {
-	u16 active_key:2, enabled:1, unicast_uses_group:1, encrypt:1;
-	u8 auth_mode;
-	u8 encode_alg[WEP_KEYS];
-	u8 key_sizes[WEP_KEYS];
-	u8 keys[WEP_KEYS][SCM_KEY_LEN];
-	u8 level;
-	u16 flags;
-} __attribute__ ((packed));
-
-/*
-
- 802.11 data frame from AP
-
-      ,-------------------------------------------------------------------.
-Bytes |  2   |  2   |    6    |    6    |    6    |  2   | 0..2312 |   4  |
-      |------|------|---------|---------|---------|------|---------|------|
-Desc. | ctrl | dura |  DA/RA  |   TA    |    SA   | Sequ |  frame  |  fcs |
-      |      | tion | (BSSID) |         |         | ence |  data   |      |
-      `-------------------------------------------------------------------'
-
-Total: 28-2340 bytes
-
-*/
-
-#define BEACON_PROBE_SSID_ID_POSITION 12
-
-struct ieee80211_hdr_1addr {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 payload[0];
-} __attribute__ ((packed));
-
-struct ieee80211_hdr_2addr {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 addr2[ETH_ALEN];
-	u8 payload[0];
-} __attribute__ ((packed));
-
-struct ieee80211_hdr_3addr {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 addr2[ETH_ALEN];
-	u8 addr3[ETH_ALEN];
-	__le16 seq_ctl;
-	u8 payload[0];
-} __attribute__ ((packed));
-
-struct ieee80211_hdr_4addr {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 addr2[ETH_ALEN];
-	u8 addr3[ETH_ALEN];
-	__le16 seq_ctl;
-	u8 addr4[ETH_ALEN];
-	u8 payload[0];
-} __attribute__ ((packed));
-
-struct ieee80211_hdr_3addrqos {
-	__le16 frame_ctl;
-	__le16 duration_id;
-	u8 addr1[ETH_ALEN];
-	u8 addr2[ETH_ALEN];
-	u8 addr3[ETH_ALEN];
-	__le16 seq_ctl;
-	u8 payload[0];
-	__le16 qos_ctl;
-} __attribute__ ((packed));
-
-struct ieee80211_info_element {
-	u8 id;
-	u8 len;
-	u8 data[0];
-} __attribute__ ((packed));
-
-/*
- * These are the data types that can make up management packets
- *
-	u16 auth_algorithm;
-	u16 auth_sequence;
-	u16 beacon_interval;
-	u16 capability;
-	u8 current_ap[ETH_ALEN];
-	u16 listen_interval;
-	struct {
-		u16 association_id:14, reserved:2;
-	} __attribute__ ((packed));
-	u32 time_stamp[2];
-	u16 reason;
-	u16 status;
-*/
-
-struct ieee80211_auth {
-	struct ieee80211_hdr_3addr header;
-	__le16 algorithm;
-	__le16 transaction;
-	__le16 status;
-	/* challenge */
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-struct ieee80211_channel_switch {
-	u8 id;
-	u8 len;
-	u8 mode;
-	u8 channel;
-	u8 count;
-} __attribute__ ((packed));
-
-struct ieee80211_action {
-	struct ieee80211_hdr_3addr header;
-	u8 category;
-	u8 action;
-	union {
-		struct ieee80211_action_exchange {
-			u8 token;
-			struct ieee80211_info_element info_element[0];
-		} exchange;
-		struct ieee80211_channel_switch channel_switch;
-
-	} format;
-} __attribute__ ((packed));
-
-struct ieee80211_disassoc {
-	struct ieee80211_hdr_3addr header;
-	__le16 reason;
-} __attribute__ ((packed));
-
-/* Alias deauth for disassoc */
-#define ieee80211_deauth ieee80211_disassoc
-
-struct ieee80211_probe_request {
-	struct ieee80211_hdr_3addr header;
-	/* SSID, supported rates */
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-struct ieee80211_probe_response {
-	struct ieee80211_hdr_3addr header;
-	__le32 time_stamp[2];
-	__le16 beacon_interval;
-	__le16 capability;
-	/* SSID, supported rates, FH params, DS params,
-	 * CF params, IBSS params, TIM (if beacon), RSN */
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-/* Alias beacon for probe_response */
-#define ieee80211_beacon ieee80211_probe_response
-
-struct ieee80211_assoc_request {
-	struct ieee80211_hdr_3addr header;
-	__le16 capability;
-	__le16 listen_interval;
-	/* SSID, supported rates, RSN */
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-struct ieee80211_reassoc_request {
-	struct ieee80211_hdr_3addr header;
-	__le16 capability;
-	__le16 listen_interval;
-	u8 current_ap[ETH_ALEN];
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-struct ieee80211_assoc_response {
-	struct ieee80211_hdr_3addr header;
-	__le16 capability;
-	__le16 status;
-	__le16 aid;
-	/* supported rates */
-	struct ieee80211_info_element info_element[0];
-} __attribute__ ((packed));
-
-struct ieee80211_txb {
-	u8 nr_frags;
-	u8 encrypted;
-	u8 rts_included;
-	u8 reserved;
-	u16 frag_size;
-	u16 payload_size;
-	struct sk_buff *fragments[0];
-};
-
-/* SWEEP TABLE ENTRIES NUMBER */
-#define MAX_SWEEP_TAB_ENTRIES		  42
-#define MAX_SWEEP_TAB_ENTRIES_PER_PACKET  7
-/* MAX_RATES_LENGTH needs to be 12.  The spec says 8, and many APs
- * only use 8, and then use extended rates for the remaining supported
- * rates.  Other APs, however, stick all of their supported rates on the
- * main rates information element... */
-#define MAX_RATES_LENGTH                  ((u8)12)
-#define MAX_RATES_EX_LENGTH               ((u8)16)
-#define MAX_NETWORK_COUNT                  128
-
-#define CRC_LENGTH                 4U
-
-#define MAX_WPA_IE_LEN 64
-
-#define NETWORK_HAS_OFDM       (1<<1)
-#define NETWORK_HAS_CCK        (1<<2)
-
-/* QoS structure */
-#define NETWORK_HAS_QOS_PARAMETERS      (1<<3)
-#define NETWORK_HAS_QOS_INFORMATION     (1<<4)
-#define NETWORK_HAS_QOS_MASK            (NETWORK_HAS_QOS_PARAMETERS | \
-					 NETWORK_HAS_QOS_INFORMATION)
-
-/* 802.11h */
-#define NETWORK_HAS_POWER_CONSTRAINT    (1<<5)
-#define NETWORK_HAS_CSA                 (1<<6)
-#define NETWORK_HAS_QUIET               (1<<7)
-#define NETWORK_HAS_IBSS_DFS            (1<<8)
-#define NETWORK_HAS_TPC_REPORT          (1<<9)
-
-#define NETWORK_HAS_ERP_VALUE           (1<<10)
-
-#define QOS_QUEUE_NUM                   4
-#define QOS_OUI_LEN                     3
-#define QOS_OUI_TYPE                    2
-#define QOS_ELEMENT_ID                  221
-#define QOS_OUI_INFO_SUB_TYPE           0
-#define QOS_OUI_PARAM_SUB_TYPE          1
-#define QOS_VERSION_1                   1
-#define QOS_AIFSN_MIN_VALUE             2
-
-struct ieee80211_qos_information_element {
-	u8 elementID;
-	u8 length;
-	u8 qui[QOS_OUI_LEN];
-	u8 qui_type;
-	u8 qui_subtype;
-	u8 version;
-	u8 ac_info;
-} __attribute__ ((packed));
-
-struct ieee80211_qos_ac_parameter {
-	u8 aci_aifsn;
-	u8 ecw_min_max;
-	__le16 tx_op_limit;
-} __attribute__ ((packed));
-
-struct ieee80211_qos_parameter_info {
-	struct ieee80211_qos_information_element info_element;
-	u8 reserved;
-	struct ieee80211_qos_ac_parameter ac_params_record[QOS_QUEUE_NUM];
-} __attribute__ ((packed));
-
-struct ieee80211_qos_parameters {
-	__le16 cw_min[QOS_QUEUE_NUM];
-	__le16 cw_max[QOS_QUEUE_NUM];
-	u8 aifs[QOS_QUEUE_NUM];
-	u8 flag[QOS_QUEUE_NUM];
-	__le16 tx_op_limit[QOS_QUEUE_NUM];
-} __attribute__ ((packed));
-
-struct ieee80211_qos_data {
-	struct ieee80211_qos_parameters parameters;
-	int active;
-	int supported;
-	u8 param_count;
-	u8 old_param_count;
-};
-
-struct ieee80211_tim_parameters {
-	u8 tim_count;
-	u8 tim_period;
-} __attribute__ ((packed));
-
-/*******************************************************/
-
-enum {				/* ieee80211_basic_report.map */
-	IEEE80211_BASIC_MAP_BSS = (1 << 0),
-	IEEE80211_BASIC_MAP_OFDM = (1 << 1),
-	IEEE80211_BASIC_MAP_UNIDENTIFIED = (1 << 2),
-	IEEE80211_BASIC_MAP_RADAR = (1 << 3),
-	IEEE80211_BASIC_MAP_UNMEASURED = (1 << 4),
-	/* Bits 5-7 are reserved */
-
-};
-struct ieee80211_basic_report {
-	u8 channel;
-	__le64 start_time;
-	__le16 duration;
-	u8 map;
-} __attribute__ ((packed));
-
-enum {				/* ieee80211_measurement_request.mode */
-	/* Bit 0 is reserved */
-	IEEE80211_MEASUREMENT_ENABLE = (1 << 1),
-	IEEE80211_MEASUREMENT_REQUEST = (1 << 2),
-	IEEE80211_MEASUREMENT_REPORT = (1 << 3),
-	/* Bits 4-7 are reserved */
-};
-
-enum {
-	IEEE80211_REPORT_BASIC = 0,	/* required */
-	IEEE80211_REPORT_CCA = 1,	/* optional */
-	IEEE80211_REPORT_RPI = 2,	/* optional */
-	/* 3-255 reserved */
-};
-
-struct ieee80211_measurement_params {
-	u8 channel;
-	__le64 start_time;
-	__le16 duration;
-} __attribute__ ((packed));
-
-struct ieee80211_measurement_request {
-	struct ieee80211_info_element ie;
-	u8 token;
-	u8 mode;
-	u8 type;
-	struct ieee80211_measurement_params params[0];
-} __attribute__ ((packed));
-
-struct ieee80211_measurement_report {
-	struct ieee80211_info_element ie;
-	u8 token;
-	u8 mode;
-	u8 type;
-	union {
-		struct ieee80211_basic_report basic[0];
-	} u;
-} __attribute__ ((packed));
-
-struct ieee80211_tpc_report {
-	u8 transmit_power;
-	u8 link_margin;
-} __attribute__ ((packed));
-
-struct ieee80211_channel_map {
-	u8 channel;
-	u8 map;
-} __attribute__ ((packed));
-
-struct ieee80211_ibss_dfs {
-	struct ieee80211_info_element ie;
-	u8 owner[ETH_ALEN];
-	u8 recovery_interval;
-	struct ieee80211_channel_map channel_map[0];
-};
-
-struct ieee80211_csa {
-	u8 mode;
-	u8 channel;
-	u8 count;
-} __attribute__ ((packed));
-
-struct ieee80211_quiet {
-	u8 count;
-	u8 period;
-	u8 duration;
-	u8 offset;
-} __attribute__ ((packed));
-
-struct ieee80211_network {
-	/* These entries are used to identify a unique network */
-	u8 bssid[ETH_ALEN];
-	u8 channel;
-	/* Ensure null-terminated for any debug msgs */
-	u8 ssid[IW_ESSID_MAX_SIZE + 1];
-	u8 ssid_len;
-
-	struct ieee80211_qos_data qos_data;
-
-	/* These are network statistics */
-	struct ieee80211_rx_stats stats;
-	u16 capability;
-	u8 rates[MAX_RATES_LENGTH];
-	u8 rates_len;
-	u8 rates_ex[MAX_RATES_EX_LENGTH];
-	u8 rates_ex_len;
-	unsigned long last_scanned;
-	u8 mode;
-	u32 flags;
-	u32 last_associate;
-	u32 time_stamp[2];
-	u16 beacon_interval;
-	u16 listen_interval;
-	u16 atim_window;
-	u8 erp_value;
-	u8 wpa_ie[MAX_WPA_IE_LEN];
-	size_t wpa_ie_len;
-	u8 rsn_ie[MAX_WPA_IE_LEN];
-	size_t rsn_ie_len;
-	struct ieee80211_tim_parameters tim;
-
-	/* 802.11h info */
-
-	/* Power Constraint - mandatory if spctrm mgmt required */
-	u8 power_constraint;
-
-	/* TPC Report - mandatory if spctrm mgmt required */
-	struct ieee80211_tpc_report tpc_report;
-
-	/* IBSS DFS - mandatory if spctrm mgmt required and IBSS
-	 * NOTE: This is variable length and so must be allocated dynamically */
-	struct ieee80211_ibss_dfs *ibss_dfs;
-
-	/* Channel Switch Announcement - optional if spctrm mgmt required */
-	struct ieee80211_csa csa;
-
-	/* Quiet - optional if spctrm mgmt required */
-	struct ieee80211_quiet quiet;
-
-	struct list_head list;
-};
-
-enum ieee80211_state {
-	IEEE80211_UNINITIALIZED = 0,
-	IEEE80211_INITIALIZED,
-	IEEE80211_ASSOCIATING,
-	IEEE80211_ASSOCIATED,
-	IEEE80211_AUTHENTICATING,
-	IEEE80211_AUTHENTICATED,
-	IEEE80211_SHUTDOWN
-};
-
-#define DEFAULT_MAX_SCAN_AGE (15 * HZ)
-#define DEFAULT_FTS 2346
-
-#define CFG_IEEE80211_RESERVE_FCS (1<<0)
-#define CFG_IEEE80211_COMPUTE_FCS (1<<1)
-#define CFG_IEEE80211_RTS (1<<2)
-
-#define IEEE80211_24GHZ_MIN_CHANNEL 1
-#define IEEE80211_24GHZ_MAX_CHANNEL 14
-#define IEEE80211_24GHZ_CHANNELS (IEEE80211_24GHZ_MAX_CHANNEL - \
-				  IEEE80211_24GHZ_MIN_CHANNEL + 1)
-
-#define IEEE80211_52GHZ_MIN_CHANNEL 34
-#define IEEE80211_52GHZ_MAX_CHANNEL 165
-#define IEEE80211_52GHZ_CHANNELS (IEEE80211_52GHZ_MAX_CHANNEL - \
-				  IEEE80211_52GHZ_MIN_CHANNEL + 1)
-
-enum {
-	IEEE80211_CH_PASSIVE_ONLY = (1 << 0),
-	IEEE80211_CH_80211H_RULES = (1 << 1),
-	IEEE80211_CH_B_ONLY = (1 << 2),
-	IEEE80211_CH_NO_IBSS = (1 << 3),
-	IEEE80211_CH_UNIFORM_SPREADING = (1 << 4),
-	IEEE80211_CH_RADAR_DETECT = (1 << 5),
-	IEEE80211_CH_INVALID = (1 << 6),
-};
-
-struct ieee80211_channel {
-	u32 freq;	/* in MHz */
-	u8 channel;
-	u8 flags;
-	u8 max_power;	/* in dBm */
-};
-
-struct ieee80211_geo {
-	u8 name[4];
-	u8 bg_channels;
-	u8 a_channels;
-	struct ieee80211_channel bg[IEEE80211_24GHZ_CHANNELS];
-	struct ieee80211_channel a[IEEE80211_52GHZ_CHANNELS];
-};
-
-struct ieee80211_device {
-	struct net_device *dev;
-	struct ieee80211_security sec;
-
-	/* Bookkeeping structures */
-	struct ieee80211_stats ieee_stats;
-
-	struct ieee80211_geo geo;
-
-	/* Probe / Beacon management */
-	struct list_head network_free_list;
-	struct list_head network_list;
-	struct ieee80211_network *networks;
-	int scans;
-	int scan_age;
-
-	int iw_mode;		/* operating mode (IW_MODE_*) */
-	struct iw_spy_data spy_data;	/* iwspy support */
-
-	spinlock_t lock;
-
-	int tx_headroom;	/* Set to size of any additional room needed at front
-				 * of allocated Tx SKBs */
-	u32 config;
-
-	/* WEP and other encryption related settings at the device level */
-	int open_wep;		/* Set to 1 to allow unencrypted frames */
-
-	int reset_on_keychange;	/* Set to 1 if the HW needs to be reset on
-				 * WEP key changes */
-
-	/* If the host performs {en,de}cryption, then set to 1 */
-	int host_encrypt;
-	int host_encrypt_msdu;
-	int host_decrypt;
-	/* host performs multicast decryption */
-	int host_mc_decrypt;
-
-	/* host should strip IV and ICV from protected frames */
-	/* meaningful only when hardware decryption is being used */
-	int host_strip_iv_icv;
-
-	int host_open_frag;
-	int host_build_iv;
-	int ieee802_1x;		/* is IEEE 802.1X used */
-
-	/* WPA data */
-	int wpa_enabled;
-	int drop_unencrypted;
-	int privacy_invoked;
-	size_t wpa_ie_len;
-	u8 *wpa_ie;
-
-	struct lib80211_crypt_info crypt_info;
-
-	int bcrx_sta_key;	/* use individual keys to override default keys even
-				 * with RX of broad/multicast frames */
-
-	/* Fragmentation structures */
-	struct ieee80211_frag_entry frag_cache[IEEE80211_FRAG_CACHE_LEN];
-	unsigned int frag_next_idx;
-	u16 fts;		/* Fragmentation Threshold */
-	u16 rts;		/* RTS threshold */
-
-	/* Association info */
-	u8 bssid[ETH_ALEN];
-
-	enum ieee80211_state state;
-
-	int mode;		/* A, B, G */
-	int modulation;		/* CCK, OFDM */
-	int freq_band;		/* 2.4Ghz, 5.2Ghz, Mixed */
-	int abg_true;		/* ABG flag              */
-
-	int perfect_rssi;
-	int worst_rssi;
-
-	u16 prev_seq_ctl;	/* used to drop duplicate frames */
-
-	/* Callback functions */
-	void (*set_security) (struct net_device * dev,
-			      struct ieee80211_security * sec);
-	int (*hard_start_xmit) (struct ieee80211_txb * txb,
-				struct net_device * dev, int pri);
-	int (*reset_port) (struct net_device * dev);
-	int (*is_queue_full) (struct net_device * dev, int pri);
-
-	int (*handle_management) (struct net_device * dev,
-				  struct ieee80211_network * network, u16 type);
-	int (*is_qos_active) (struct net_device *dev, struct sk_buff *skb);
-
-	/* Typical STA methods */
-	int (*handle_auth) (struct net_device * dev,
-			    struct ieee80211_auth * auth);
-	int (*handle_deauth) (struct net_device * dev,
-			      struct ieee80211_deauth * auth);
-	int (*handle_action) (struct net_device * dev,
-			      struct ieee80211_action * action,
-			      struct ieee80211_rx_stats * stats);
-	int (*handle_disassoc) (struct net_device * dev,
-				struct ieee80211_disassoc * assoc);
-	int (*handle_beacon) (struct net_device * dev,
-			      struct ieee80211_beacon * beacon,
-			      struct ieee80211_network * network);
-	int (*handle_probe_response) (struct net_device * dev,
-				      struct ieee80211_probe_response * resp,
-				      struct ieee80211_network * network);
-	int (*handle_probe_request) (struct net_device * dev,
-				     struct ieee80211_probe_request * req,
-				     struct ieee80211_rx_stats * stats);
-	int (*handle_assoc_response) (struct net_device * dev,
-				      struct ieee80211_assoc_response * resp,
-				      struct ieee80211_network * network);
-
-	/* Typical AP methods */
-	int (*handle_assoc_request) (struct net_device * dev);
-	int (*handle_reassoc_request) (struct net_device * dev,
-				       struct ieee80211_reassoc_request * req);
-
-	/* This must be the last item so that it points to the data
-	 * allocated beyond this structure by alloc_ieee80211 */
-	u8 priv[0];
-};
-
-#define IEEE_A            (1<<0)
-#define IEEE_B            (1<<1)
-#define IEEE_G            (1<<2)
-#define IEEE_MODE_MASK    (IEEE_A|IEEE_B|IEEE_G)
-
-static inline void *ieee80211_priv(struct net_device *dev)
-{
-	return ((struct ieee80211_device *)netdev_priv(dev))->priv;
-}
-
-static inline int ieee80211_is_valid_mode(struct ieee80211_device *ieee,
-					  int mode)
-{
-	/*
-	 * It is possible for both access points and our device to support
-	 * combinations of modes, so as long as there is one valid combination
-	 * of ap/device supported modes, then return success
-	 *
-	 */
-	if ((mode & IEEE_A) &&
-	    (ieee->modulation & IEEE80211_OFDM_MODULATION) &&
-	    (ieee->freq_band & IEEE80211_52GHZ_BAND))
-		return 1;
-
-	if ((mode & IEEE_G) &&
-	    (ieee->modulation & IEEE80211_OFDM_MODULATION) &&
-	    (ieee->freq_band & IEEE80211_24GHZ_BAND))
-		return 1;
-
-	if ((mode & IEEE_B) &&
-	    (ieee->modulation & IEEE80211_CCK_MODULATION) &&
-	    (ieee->freq_band & IEEE80211_24GHZ_BAND))
-		return 1;
-
-	return 0;
-}
-
-static inline int ieee80211_get_hdrlen(u16 fc)
-{
-	int hdrlen = IEEE80211_3ADDR_LEN;
-	u16 stype = WLAN_FC_GET_STYPE(fc);
-
-	switch (WLAN_FC_GET_TYPE(fc)) {
-	case IEEE80211_FTYPE_DATA:
-		if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
-			hdrlen = IEEE80211_4ADDR_LEN;
-		if (stype & IEEE80211_STYPE_QOS_DATA)
-			hdrlen += 2;
-		break;
-	case IEEE80211_FTYPE_CTL:
-		switch (WLAN_FC_GET_STYPE(fc)) {
-		case IEEE80211_STYPE_CTS:
-		case IEEE80211_STYPE_ACK:
-			hdrlen = IEEE80211_1ADDR_LEN;
-			break;
-		default:
-			hdrlen = IEEE80211_2ADDR_LEN;
-			break;
-		}
-		break;
-	}
-
-	return hdrlen;
-}
-
-static inline u8 *ieee80211_get_payload(struct ieee80211_hdr *hdr)
-{
-	switch (ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control))) {
-	case IEEE80211_1ADDR_LEN:
-		return ((struct ieee80211_hdr_1addr *)hdr)->payload;
-	case IEEE80211_2ADDR_LEN:
-		return ((struct ieee80211_hdr_2addr *)hdr)->payload;
-	case IEEE80211_3ADDR_LEN:
-		return ((struct ieee80211_hdr_3addr *)hdr)->payload;
-	case IEEE80211_4ADDR_LEN:
-		return ((struct ieee80211_hdr_4addr *)hdr)->payload;
-	}
-	return NULL;
-}
-
-static inline int ieee80211_is_ofdm_rate(u8 rate)
-{
-	switch (rate & ~IEEE80211_BASIC_RATE_MASK) {
-	case IEEE80211_OFDM_RATE_6MB:
-	case IEEE80211_OFDM_RATE_9MB:
-	case IEEE80211_OFDM_RATE_12MB:
-	case IEEE80211_OFDM_RATE_18MB:
-	case IEEE80211_OFDM_RATE_24MB:
-	case IEEE80211_OFDM_RATE_36MB:
-	case IEEE80211_OFDM_RATE_48MB:
-	case IEEE80211_OFDM_RATE_54MB:
-		return 1;
-	}
-	return 0;
-}
-
-static inline int ieee80211_is_cck_rate(u8 rate)
-{
-	switch (rate & ~IEEE80211_BASIC_RATE_MASK) {
-	case IEEE80211_CCK_RATE_1MB:
-	case IEEE80211_CCK_RATE_2MB:
-	case IEEE80211_CCK_RATE_5MB:
-	case IEEE80211_CCK_RATE_11MB:
-		return 1;
-	}
-	return 0;
-}
-
-/* ieee80211.c */
-extern void free_ieee80211(struct net_device *dev);
-extern struct net_device *alloc_ieee80211(int sizeof_priv);
-extern int ieee80211_change_mtu(struct net_device *dev, int new_mtu);
-
-extern void ieee80211_networks_age(struct ieee80211_device *ieee,
-				   unsigned long age_secs);
-
-extern int ieee80211_set_encryption(struct ieee80211_device *ieee);
-
-/* ieee80211_tx.c */
-extern int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev);
-extern void ieee80211_txb_free(struct ieee80211_txb *);
-
-/* ieee80211_rx.c */
-extern void ieee80211_rx_any(struct ieee80211_device *ieee,
-		     struct sk_buff *skb, struct ieee80211_rx_stats *stats);
-extern int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
-			struct ieee80211_rx_stats *rx_stats);
-/* make sure to set stats->len */
-extern void ieee80211_rx_mgt(struct ieee80211_device *ieee,
-			     struct ieee80211_hdr_4addr *header,
-			     struct ieee80211_rx_stats *stats);
-extern void ieee80211_network_reset(struct ieee80211_network *network);
-
-/* ieee80211_geo.c */
-extern const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device
-						     *ieee);
-extern int ieee80211_set_geo(struct ieee80211_device *ieee,
-			     const struct ieee80211_geo *geo);
-
-extern int ieee80211_is_valid_channel(struct ieee80211_device *ieee,
-				      u8 channel);
-extern int ieee80211_channel_to_index(struct ieee80211_device *ieee,
-				      u8 channel);
-extern u8 ieee80211_freq_to_channel(struct ieee80211_device *ieee, u32 freq);
-extern u8 ieee80211_get_channel_flags(struct ieee80211_device *ieee,
-				      u8 channel);
-extern const struct ieee80211_channel *ieee80211_get_channel(struct
-							     ieee80211_device
-							     *ieee, u8 channel);
-extern u32 ieee80211_channel_to_freq(struct ieee80211_device * ieee,
-				      u8 channel);
-
-/* ieee80211_wx.c */
-extern int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
-				 struct iw_request_info *info,
-				 union iwreq_data *wrqu, char *key);
-extern int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
-				   struct iw_request_info *info,
-				   union iwreq_data *wrqu, char *key);
-extern int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
-				   struct iw_request_info *info,
-				   union iwreq_data *wrqu, char *key);
-extern int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
-				      struct iw_request_info *info,
-				      union iwreq_data *wrqu, char *extra);
-extern int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
-				      struct iw_request_info *info,
-				      union iwreq_data *wrqu, char *extra);
-
-static inline void ieee80211_increment_scans(struct ieee80211_device *ieee)
-{
-	ieee->scans++;
-}
-
-static inline int ieee80211_get_scans(struct ieee80211_device *ieee)
-{
-	return ieee->scans;
-}
-
-#endif				/* IEEE80211_H */
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index dee50ed0897d..33bdb20e9f1e 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -1673,7 +1673,7 @@ static int ipw2100_start_scan(struct ipw2100_priv *priv)
 	return err;
 }
 
-static const struct ieee80211_geo ipw_geos[] = {
+static const struct libipw_geo ipw_geos[] = {
 	{			/* Restricted */
 	 "---",
 	 .bg_channels = 14,
@@ -1694,7 +1694,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 
 	/* Age scan list entries found before suspend */
 	if (priv->suspend_time) {
-		ieee80211_networks_age(priv->ieee, priv->suspend_time);
+		libipw_networks_age(priv->ieee, priv->suspend_time);
 		priv->suspend_time = 0;
 	}
 
@@ -1752,11 +1752,11 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	}
 
 	/* Initialize the geo */
-	if (ieee80211_set_geo(priv->ieee, &ipw_geos[0])) {
+	if (libipw_set_geo(priv->ieee, &ipw_geos[0])) {
 		printk(KERN_WARNING DRV_NAME "Could not set geo\n");
 		return 0;
 	}
-	priv->ieee->freq_band = IEEE80211_24GHZ_BAND;
+	priv->ieee->freq_band = LIBIPW_24GHZ_BAND;
 
 	lock = LOCK_NONE;
 	if (ipw2100_set_ordinal(priv, IPW_ORD_PERS_DB_LOCK, &lock, &ord_len)) {
@@ -1817,7 +1817,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 /* Called by register_netdev() */
 static int ipw2100_net_init(struct net_device *dev)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	return ipw2100_up(priv, 1);
 }
 
@@ -2340,8 +2340,8 @@ static u32 ipw2100_match_buf(struct ipw2100_priv *priv, u8 * in_buf,
  *
  * When packet is provided by the firmware, it contains the following:
  *
- * .  ieee80211_hdr
- * .  ieee80211_snap_hdr
+ * .  libipw_hdr
+ * .  libipw_snap_hdr
  *
  * The size of the constructed ethernet
  *
@@ -2396,7 +2396,7 @@ static void ipw2100_corruption_detected(struct ipw2100_priv *priv, int i)
 }
 
 static void isr_rx(struct ipw2100_priv *priv, int i,
-			  struct ieee80211_rx_stats *stats)
+			  struct libipw_rx_stats *stats)
 {
 	struct net_device *dev = priv->net_dev;
 	struct ipw2100_status *status = &priv->status_queue.drv[i];
@@ -2435,13 +2435,13 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 
 #ifdef IPW2100_RX_DEBUG
 	/* Make a copy of the frame so we can dump it to the logs if
-	 * ieee80211_rx fails */
+	 * libipw_rx fails */
 	skb_copy_from_linear_data(packet->skb, packet_data,
 				  min_t(u32, status->frame_size,
 					     IPW_RX_NIC_BUFFER_LENGTH));
 #endif
 
-	if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
+	if (!libipw_rx(priv->ieee, packet->skb, stats)) {
 #ifdef IPW2100_RX_DEBUG
 		IPW_DEBUG_DROP("%s: Non consumed packet:\n",
 			       dev->name);
@@ -2449,7 +2449,7 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 #endif
 		dev->stats.rx_errors++;
 
-		/* ieee80211_rx failed, so it didn't free the SKB */
+		/* libipw_rx failed, so it didn't free the SKB */
 		dev_kfree_skb_any(packet->skb);
 		packet->skb = NULL;
 	}
@@ -2470,7 +2470,7 @@ static void isr_rx(struct ipw2100_priv *priv, int i,
 #ifdef CONFIG_IPW2100_MONITOR
 
 static void isr_rx_monitor(struct ipw2100_priv *priv, int i,
-		   struct ieee80211_rx_stats *stats)
+		   struct libipw_rx_stats *stats)
 {
 	struct net_device *dev = priv->net_dev;
 	struct ipw2100_status *status = &priv->status_queue.drv[i];
@@ -2528,10 +2528,10 @@ static void isr_rx_monitor(struct ipw2100_priv *priv, int i,
 
 	skb_put(packet->skb, status->frame_size + sizeof(struct ipw_rt_hdr));
 
-	if (!ieee80211_rx(priv->ieee, packet->skb, stats)) {
+	if (!libipw_rx(priv->ieee, packet->skb, stats)) {
 		dev->stats.rx_errors++;
 
-		/* ieee80211_rx failed, so it didn't free the SKB */
+		/* libipw_rx failed, so it didn't free the SKB */
 		dev_kfree_skb_any(packet->skb);
 		packet->skb = NULL;
 	}
@@ -2615,7 +2615,7 @@ static void __ipw2100_rx_process(struct ipw2100_priv *priv)
 	u16 frame_type;
 	u32 r, w, i, s;
 	struct ipw2100_rx *u;
-	struct ieee80211_rx_stats stats = {
+	struct libipw_rx_stats stats = {
 		.mac_time = jiffies,
 	};
 
@@ -2661,8 +2661,8 @@ static void __ipw2100_rx_process(struct ipw2100_priv *priv)
 
 		stats.mask = 0;
 		if (stats.rssi != 0)
-			stats.mask |= IEEE80211_STATMASK_RSSI;
-		stats.freq = IEEE80211_24GHZ_BAND;
+			stats.mask |= LIBIPW_STATMASK_RSSI;
+		stats.freq = LIBIPW_24GHZ_BAND;
 
 		IPW_DEBUG_RX("%s: '%s' frame type received (%d).\n",
 			     priv->net_dev->name, frame_types[frame_type],
@@ -2686,11 +2686,11 @@ static void __ipw2100_rx_process(struct ipw2100_priv *priv)
 				break;
 			}
 #endif
-			if (stats.len < sizeof(struct ieee80211_hdr_3addr))
+			if (stats.len < sizeof(struct libipw_hdr_3addr))
 				break;
 			switch (WLAN_FC_GET_TYPE(le16_to_cpu(u->rx_data.header.frame_ctl))) {
 			case IEEE80211_FTYPE_MGMT:
-				ieee80211_rx_mgt(priv->ieee,
+				libipw_rx_mgt(priv->ieee,
 						 &u->rx_data.header, &stats);
 				break;
 
@@ -2884,7 +2884,7 @@ static int __ipw2100_tx_process(struct ipw2100_priv *priv)
 					 tbd->buf_length, PCI_DMA_TODEVICE);
 		}
 
-		ieee80211_txb_free(packet->info.d_struct.txb);
+		libipw_txb_free(packet->info.d_struct.txb);
 		packet->info.d_struct.txb = NULL;
 
 		list_add_tail(element, &priv->tx_free_list);
@@ -3028,7 +3028,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
 	int next = txq->next;
 	int i = 0;
 	struct ipw2100_data_header *ipw_hdr;
-	struct ieee80211_hdr_3addr *hdr;
+	struct libipw_hdr_3addr *hdr;
 
 	while (!list_empty(&priv->tx_pend_list)) {
 		/* if there isn't enough space in TBD queue, then
@@ -3062,7 +3062,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
 		packet->index = txq->next;
 
 		ipw_hdr = packet->info.d_struct.data;
-		hdr = (struct ieee80211_hdr_3addr *)packet->info.d_struct.txb->
+		hdr = (struct libipw_hdr_3addr *)packet->info.d_struct.txb->
 		    fragments[0]->data;
 
 		if (priv->ieee->iw_mode == IW_MODE_INFRA) {
@@ -3086,7 +3086,7 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
 		if (packet->info.d_struct.txb->nr_frags > 1)
 			ipw_hdr->fragment_size =
 			    packet->info.d_struct.txb->frag_size -
-			    IEEE80211_3ADDR_LEN;
+			    LIBIPW_3ADDR_LEN;
 		else
 			ipw_hdr->fragment_size = 0;
 
@@ -3119,13 +3119,13 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv)
 				    IPW_BD_STATUS_TX_FRAME_NOT_LAST_FRAGMENT;
 
 			tbd->buf_length = packet->info.d_struct.txb->
-			    fragments[i]->len - IEEE80211_3ADDR_LEN;
+			    fragments[i]->len - LIBIPW_3ADDR_LEN;
 
 			tbd->host_addr = pci_map_single(priv->pci_dev,
 							packet->info.d_struct.
 							txb->fragments[i]->
 							data +
-							IEEE80211_3ADDR_LEN,
+							LIBIPW_3ADDR_LEN,
 							tbd->buf_length,
 							PCI_DMA_TODEVICE);
 
@@ -3330,10 +3330,10 @@ static irqreturn_t ipw2100_interrupt(int irq, void *data)
 	return IRQ_NONE;
 }
 
-static int ipw2100_tx(struct ieee80211_txb *txb, struct net_device *dev,
+static int ipw2100_tx(struct libipw_txb *txb, struct net_device *dev,
 		      int pri)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct list_head *element;
 	struct ipw2100_tx_packet *packet;
 	unsigned long flags;
@@ -4488,7 +4488,7 @@ static void ipw2100_tx_initialize(struct ipw2100_priv *priv)
 		/* We simply drop any SKBs that have been queued for
 		 * transmit */
 		if (priv->tx_buffers[i].info.d_struct.txb) {
-			ieee80211_txb_free(priv->tx_buffers[i].info.d_struct.
+			libipw_txb_free(priv->tx_buffers[i].info.d_struct.
 					   txb);
 			priv->tx_buffers[i].info.d_struct.txb = NULL;
 		}
@@ -4527,7 +4527,7 @@ static void ipw2100_tx_free(struct ipw2100_priv *priv)
 
 	for (i = 0; i < TX_PENDED_QUEUE_LENGTH; i++) {
 		if (priv->tx_buffers[i].info.d_struct.txb) {
-			ieee80211_txb_free(priv->tx_buffers[i].info.d_struct.
+			libipw_txb_free(priv->tx_buffers[i].info.d_struct.
 					   txb);
 			priv->tx_buffers[i].info.d_struct.txb = NULL;
 		}
@@ -5558,9 +5558,9 @@ static void ipw2100_security_work(struct work_struct *work)
 }
 
 static void shim__set_security(struct net_device *dev,
-			       struct ieee80211_security *sec)
+			       struct libipw_security *sec)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int i, force_update = 0;
 
 	mutex_lock(&priv->action_mutex);
@@ -5753,7 +5753,7 @@ static int ipw2100_adapter_setup(struct ipw2100_priv *priv)
  * method as well) to talk to the firmware */
 static int ipw2100_set_address(struct net_device *dev, void *p)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct sockaddr *addr = p;
 	int err = 0;
 
@@ -5781,7 +5781,7 @@ static int ipw2100_set_address(struct net_device *dev, void *p)
 
 static int ipw2100_open(struct net_device *dev)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	unsigned long flags;
 	IPW_DEBUG_INFO("dev->open\n");
 
@@ -5797,7 +5797,7 @@ static int ipw2100_open(struct net_device *dev)
 
 static int ipw2100_close(struct net_device *dev)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	unsigned long flags;
 	struct list_head *element;
 	struct ipw2100_tx_packet *packet;
@@ -5818,7 +5818,7 @@ static int ipw2100_close(struct net_device *dev)
 		list_del(element);
 		DEC_STAT(&priv->tx_pend_stat);
 
-		ieee80211_txb_free(packet->info.d_struct.txb);
+		libipw_txb_free(packet->info.d_struct.txb);
 		packet->info.d_struct.txb = NULL;
 
 		list_add_tail(element, &priv->tx_free_list);
@@ -5836,7 +5836,7 @@ static int ipw2100_close(struct net_device *dev)
  */
 static void ipw2100_tx_timeout(struct net_device *dev)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	dev->stats.tx_errors++;
 
@@ -5861,8 +5861,8 @@ static int ipw2100_wpa_enable(struct ipw2100_priv *priv, int value)
 static int ipw2100_wpa_set_auth_algs(struct ipw2100_priv *priv, int value)
 {
 
-	struct ieee80211_device *ieee = priv->ieee;
-	struct ieee80211_security sec = {
+	struct libipw_device *ieee = priv->ieee;
+	struct libipw_security sec = {
 		.flags = SEC_AUTH_MODE,
 	};
 	int ret = 0;
@@ -5907,7 +5907,7 @@ static void ipw2100_wpa_assoc_frame(struct ipw2100_priv *priv,
 static void ipw_ethtool_get_drvinfo(struct net_device *dev,
 				    struct ethtool_drvinfo *info)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	char fw_ver[64], ucode_ver[64];
 
 	strcpy(info->driver, DRV_NAME);
@@ -5924,7 +5924,7 @@ static void ipw_ethtool_get_drvinfo(struct net_device *dev,
 
 static u32 ipw2100_ethtool_get_link(struct net_device *dev)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	return (priv->status & STATUS_ASSOCIATED) ? 1 : 0;
 }
 
@@ -6011,8 +6011,8 @@ static void ipw2100_irq_tasklet(struct ipw2100_priv *priv);
 static const struct net_device_ops ipw2100_netdev_ops = {
 	.ndo_open		= ipw2100_open,
 	.ndo_stop		= ipw2100_close,
-	.ndo_start_xmit		= ieee80211_xmit,
-	.ndo_change_mtu		= ieee80211_change_mtu,
+	.ndo_start_xmit		= libipw_xmit,
+	.ndo_change_mtu		= libipw_change_mtu,
 	.ndo_init		= ipw2100_net_init,
 	.ndo_tx_timeout		= ipw2100_tx_timeout,
 	.ndo_set_mac_address	= ipw2100_set_address,
@@ -6032,7 +6032,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
 	dev = alloc_ieee80211(sizeof(struct ipw2100_priv));
 	if (!dev)
 		return NULL;
-	priv = ieee80211_priv(dev);
+	priv = libipw_priv(dev);
 	priv->ieee = netdev_priv(dev);
 	priv->pci_dev = pci_dev;
 	priv->net_dev = dev;
@@ -6046,7 +6046,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev,
 	dev->netdev_ops = &ipw2100_netdev_ops;
 	dev->ethtool_ops = &ipw2100_ethtool_ops;
 	dev->wireless_handlers = &ipw2100_wx_handler_def;
-	priv->wireless_data.ieee80211 = priv->ieee;
+	priv->wireless_data.libipw = priv->ieee;
 	dev->wireless_data = &priv->wireless_data;
 	dev->watchdog_timeo = 3 * HZ;
 	dev->irq = 0;
@@ -6202,7 +6202,7 @@ static int ipw2100_pci_init_one(struct pci_dev *pci_dev,
 		return err;
 	}
 
-	priv = ieee80211_priv(dev);
+	priv = libipw_priv(dev);
 
 	pci_set_master(pci_dev);
 	pci_set_drvdata(pci_dev, priv);
@@ -6629,7 +6629,7 @@ static int ipw2100_wx_get_name(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	if (!(priv->status & STATUS_ASSOCIATED))
 		strcpy(wrqu->name, "unassociated");
 	else
@@ -6643,7 +6643,7 @@ static int ipw2100_wx_set_freq(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct iw_freq *fwrq = &wrqu->freq;
 	int err = 0;
 
@@ -6693,7 +6693,7 @@ static int ipw2100_wx_get_freq(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->freq.e = 0;
 
@@ -6714,7 +6714,7 @@ static int ipw2100_wx_set_mode(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	IPW_DEBUG_WX("SET Mode -> %d \n", wrqu->mode);
@@ -6757,7 +6757,7 @@ static int ipw2100_wx_get_mode(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->mode = priv->ieee->iw_mode;
 	IPW_DEBUG_WX("GET Mode -> %d\n", wrqu->mode);
@@ -6792,7 +6792,7 @@ static int ipw2100_wx_get_range(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct iw_range *range = (struct iw_range *)extra;
 	u16 val;
 	int i, level;
@@ -6913,7 +6913,7 @@ static int ipw2100_wx_set_wap(struct net_device *dev,
 			      struct iw_request_info *info,
 			      union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	static const unsigned char any[] = {
@@ -6962,7 +6962,7 @@ static int ipw2100_wx_get_wap(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	/* If we are associated, trying to associate, or have a statically
 	 * configured BSSID then return that; otherwise return ANY */
@@ -6980,7 +6980,7 @@ static int ipw2100_wx_set_essid(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	char *essid = "";	/* ANY */
 	int length = 0;
 	int err = 0;
@@ -7035,7 +7035,7 @@ static int ipw2100_wx_get_essid(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	DECLARE_SSID_BUF(ssid);
 
 	/* If we are associated, trying to associate, or have a statically
@@ -7063,7 +7063,7 @@ static int ipw2100_wx_set_nick(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	if (wrqu->data.length > IW_ESSID_MAX_SIZE)
 		return -E2BIG;
@@ -7085,7 +7085,7 @@ static int ipw2100_wx_get_nick(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->data.length = strlen(priv->nick);
 	memcpy(extra, priv->nick, wrqu->data.length);
@@ -7100,7 +7100,7 @@ static int ipw2100_wx_set_rate(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	u32 target_rate = wrqu->bitrate.value;
 	u32 rate;
 	int err = 0;
@@ -7140,7 +7140,7 @@ static int ipw2100_wx_get_rate(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int val;
 	unsigned int len = sizeof(val);
 	int err = 0;
@@ -7192,7 +7192,7 @@ static int ipw2100_wx_set_rts(struct net_device *dev,
 			      struct iw_request_info *info,
 			      union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int value, err;
 
 	/* Auto RTS not yet supported */
@@ -7231,7 +7231,7 @@ static int ipw2100_wx_get_rts(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->rts.value = priv->rts_threshold & ~RTS_DISABLED;
 	wrqu->rts.fixed = 1;	/* no auto select */
@@ -7248,7 +7248,7 @@ static int ipw2100_wx_set_txpow(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0, value;
 	
 	if (ipw_radio_kill_sw(priv, wrqu->txpower.disabled))
@@ -7293,7 +7293,7 @@ static int ipw2100_wx_get_txpow(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->txpower.disabled = (priv->status & STATUS_RF_KILL_MASK) ? 1 : 0;
 
@@ -7320,7 +7320,7 @@ static int ipw2100_wx_set_frag(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	if (!wrqu->frag.fixed)
 		return -EINVAL;
@@ -7350,7 +7350,7 @@ static int ipw2100_wx_get_frag(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	wrqu->frag.value = priv->frag_threshold & ~FRAG_DISABLED;
 	wrqu->frag.fixed = 0;	/* no auto select */
 	wrqu->frag.disabled = (priv->frag_threshold & FRAG_DISABLED) ? 1 : 0;
@@ -7364,7 +7364,7 @@ static int ipw2100_wx_set_retry(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	if (wrqu->retry.flags & IW_RETRY_LIFETIME || wrqu->retry.disabled)
@@ -7412,7 +7412,7 @@ static int ipw2100_wx_get_retry(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	wrqu->retry.disabled = 0;	/* can't be disabled */
 
@@ -7440,7 +7440,7 @@ static int ipw2100_wx_set_scan(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	mutex_lock(&priv->action_mutex);
@@ -7472,8 +7472,8 @@ static int ipw2100_wx_get_scan(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_scan(priv->ieee, info, wrqu, extra);
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_scan(priv->ieee, info, wrqu, extra);
 }
 
 /*
@@ -7487,8 +7487,8 @@ static int ipw2100_wx_set_encode(struct net_device *dev,
 	 * No check of STATUS_INITIALIZED required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_set_encode(priv->ieee, info, wrqu, key);
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	return libipw_wx_set_encode(priv->ieee, info, wrqu, key);
 }
 
 static int ipw2100_wx_get_encode(struct net_device *dev,
@@ -7499,15 +7499,15 @@ static int ipw2100_wx_get_encode(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_encode(priv->ieee, info, wrqu, key);
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_encode(priv->ieee, info, wrqu, key);
 }
 
 static int ipw2100_wx_set_power(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	mutex_lock(&priv->action_mutex);
@@ -7556,7 +7556,7 @@ static int ipw2100_wx_get_power(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	if (!(priv->power_mode & IPW_POWER_ENABLED))
 		wrqu->power.disabled = 1;
@@ -7580,8 +7580,8 @@ static int ipw2100_wx_set_genie(struct net_device *dev,
 				union iwreq_data *wrqu, char *extra)
 {
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	u8 *buf;
 
 	if (!ieee->wpa_enabled)
@@ -7615,8 +7615,8 @@ static int ipw2100_wx_get_genie(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 
 	if (ieee->wpa_ie_len == 0 || ieee->wpa_ie == NULL) {
 		wrqu->data.length = 0;
@@ -7637,8 +7637,8 @@ static int ipw2100_wx_set_auth(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	struct iw_param *param = &wrqu->param;
 	struct lib80211_crypt_data *crypt;
 	unsigned long flags;
@@ -7682,7 +7682,7 @@ static int ipw2100_wx_set_auth(struct net_device *dev,
 			 * can use this to determine if the CAP_PRIVACY_ON bit should
 			 * be set.
 			 */
-			struct ieee80211_security sec = {
+			struct libipw_security sec = {
 				.flags = SEC_ENABLED,
 				.enabled = param->value,
 			};
@@ -7730,8 +7730,8 @@ static int ipw2100_wx_get_auth(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	struct lib80211_crypt_data *crypt;
 	struct iw_param *param = &wrqu->param;
 	int ret = 0;
@@ -7792,8 +7792,8 @@ static int ipw2100_wx_set_encodeext(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_set_encodeext(priv->ieee, info, wrqu, extra);
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	return libipw_wx_set_encodeext(priv->ieee, info, wrqu, extra);
 }
 
 /* SIOCGIWENCODEEXT */
@@ -7801,8 +7801,8 @@ static int ipw2100_wx_get_encodeext(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_encodeext(priv->ieee, info, wrqu, extra);
+	struct ipw2100_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_encodeext(priv->ieee, info, wrqu, extra);
 }
 
 /* SIOCSIWMLME */
@@ -7810,7 +7810,7 @@ static int ipw2100_wx_set_mlme(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
 	__le16 reason;
 
@@ -7841,7 +7841,7 @@ static int ipw2100_wx_set_promisc(struct net_device *dev,
 				  struct iw_request_info *info,
 				  union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int *parms = (int *)extra;
 	int enable = (parms[0] > 0);
 	int err = 0;
@@ -7872,7 +7872,7 @@ static int ipw2100_wx_reset(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	if (priv->status & STATUS_INITIALIZED)
 		schedule_reset(priv);
 	return 0;
@@ -7884,7 +7884,7 @@ static int ipw2100_wx_set_powermode(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err = 0, mode = *(int *)extra;
 
 	mutex_lock(&priv->action_mutex);
@@ -7912,7 +7912,7 @@ static int ipw2100_wx_get_powermode(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int level = IPW_POWER_LEVEL(priv->power_mode);
 	s32 timeout, period;
 
@@ -7948,7 +7948,7 @@ static int ipw2100_wx_set_preamble(struct net_device *dev,
 				   struct iw_request_info *info,
 				   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err, mode = *(int *)extra;
 
 	mutex_lock(&priv->action_mutex);
@@ -7981,7 +7981,7 @@ static int ipw2100_wx_get_preamble(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	if (priv->config & CFG_LONG_PREAMBLE)
 		snprintf(wrqu->name, IFNAMSIZ, "long (1)");
@@ -7996,7 +7996,7 @@ static int ipw2100_wx_set_crc_check(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	int err, mode = *(int *)extra;
 
 	mutex_lock(&priv->action_mutex);
@@ -8028,7 +8028,7 @@ static int ipw2100_wx_get_crc_check(struct net_device *dev,
 	 * This can be called at any time.  No action lock required
 	 */
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 
 	if (priv->config & CFG_CRC_CHECK)
 		snprintf(wrqu->name, IFNAMSIZ, "CRC checked (1)");
@@ -8181,7 +8181,7 @@ static struct iw_statistics *ipw2100_wx_wireless_stats(struct net_device *dev)
 	int beacon_qual;
 	int quality;
 
-	struct ipw2100_priv *priv = ieee80211_priv(dev);
+	struct ipw2100_priv *priv = libipw_priv(dev);
 	struct iw_statistics *wstats;
 	u32 rssi, tx_retries, missed_beacons, tx_failures;
 	u32 ord_len = sizeof(u32);
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.h b/drivers/net/wireless/ipw2x00/ipw2100.h
index f183d951cd32..af175bdc9f29 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.h
+++ b/drivers/net/wireless/ipw2x00/ipw2100.h
@@ -46,7 +46,7 @@
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
 struct ipw2100_priv;
 struct ipw2100_tx_packet;
@@ -343,7 +343,7 @@ struct ipw2100_tx_packet {
 		struct {	/* DATA */
 			struct ipw2100_data_header *data;
 			dma_addr_t data_phys;
-			struct ieee80211_txb *txb;
+			struct libipw_txb *txb;
 		} d_struct;
 	} info;
 	int jiffy_start;
@@ -492,7 +492,7 @@ struct ipw2100_priv {
 	int stop_hang_check;	/* Set 1 when shutting down to kill hang_check */
 	int stop_rf_kill;	/* Set 1 when shutting down to kill rf_kill */
 
-	struct ieee80211_device *ieee;
+	struct libipw_device *ieee;
 	unsigned long status;
 	unsigned long config;
 	unsigned long capability;
@@ -788,7 +788,7 @@ struct ipw2100_priv {
 #define IPW_CARD_DISABLE_PHY_OFF_COMPLETE_WAIT	    100	// 100 milli
 #define IPW_PREPARE_POWER_DOWN_COMPLETE_WAIT	    100	// 100 milli
 
-#define IPW_HEADER_802_11_SIZE		 sizeof(struct ieee80211_hdr_3addr)
+#define IPW_HEADER_802_11_SIZE		 sizeof(struct libipw_hdr_3addr)
 #define IPW_MAX_80211_PAYLOAD_SIZE              2304U
 #define IPW_MAX_802_11_PAYLOAD_LENGTH		2312
 #define IPW_MAX_ACCEPTABLE_TX_FRAME_LENGTH	1536
@@ -803,13 +803,13 @@ struct ipw2100_priv {
 		IPW_802_11_FCS_LENGTH)
 
 #define IPW_802_11_PAYLOAD_OFFSET \
-        (sizeof(struct ieee80211_hdr_3addr) + \
-         sizeof(struct ieee80211_snap_hdr))
+        (sizeof(struct libipw_hdr_3addr) + \
+         sizeof(struct libipw_snap_hdr))
 
 struct ipw2100_rx {
 	union {
 		unsigned char payload[IPW_RX_NIC_BUFFER_LENGTH];
-		struct ieee80211_hdr_4addr header;
+		struct libipw_hdr_4addr header;
 		u32 status;
 		struct ipw2100_notification notification;
 		struct ipw2100_cmd_header command;
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index 8e18d5348350..3617e3c1e9ed 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -111,7 +111,7 @@ static int qos_no_ack_mask = 0;
 static int burst_duration_CCK = 0;
 static int burst_duration_OFDM = 0;
 
-static struct ieee80211_qos_parameters def_qos_parameters_OFDM = {
+static struct libipw_qos_parameters def_qos_parameters_OFDM = {
 	{QOS_TX0_CW_MIN_OFDM, QOS_TX1_CW_MIN_OFDM, QOS_TX2_CW_MIN_OFDM,
 	 QOS_TX3_CW_MIN_OFDM},
 	{QOS_TX0_CW_MAX_OFDM, QOS_TX1_CW_MAX_OFDM, QOS_TX2_CW_MAX_OFDM,
@@ -122,7 +122,7 @@ static struct ieee80211_qos_parameters def_qos_parameters_OFDM = {
 	 QOS_TX2_TXOP_LIMIT_OFDM, QOS_TX3_TXOP_LIMIT_OFDM}
 };
 
-static struct ieee80211_qos_parameters def_qos_parameters_CCK = {
+static struct libipw_qos_parameters def_qos_parameters_CCK = {
 	{QOS_TX0_CW_MIN_CCK, QOS_TX1_CW_MIN_CCK, QOS_TX2_CW_MIN_CCK,
 	 QOS_TX3_CW_MIN_CCK},
 	{QOS_TX0_CW_MAX_CCK, QOS_TX1_CW_MAX_CCK, QOS_TX2_CW_MAX_CCK,
@@ -133,7 +133,7 @@ static struct ieee80211_qos_parameters def_qos_parameters_CCK = {
 	 QOS_TX3_TXOP_LIMIT_CCK}
 };
 
-static struct ieee80211_qos_parameters def_parameters_OFDM = {
+static struct libipw_qos_parameters def_parameters_OFDM = {
 	{DEF_TX0_CW_MIN_OFDM, DEF_TX1_CW_MIN_OFDM, DEF_TX2_CW_MIN_OFDM,
 	 DEF_TX3_CW_MIN_OFDM},
 	{DEF_TX0_CW_MAX_OFDM, DEF_TX1_CW_MAX_OFDM, DEF_TX2_CW_MAX_OFDM,
@@ -144,7 +144,7 @@ static struct ieee80211_qos_parameters def_parameters_OFDM = {
 	 DEF_TX2_TXOP_LIMIT_OFDM, DEF_TX3_TXOP_LIMIT_OFDM}
 };
 
-static struct ieee80211_qos_parameters def_parameters_CCK = {
+static struct libipw_qos_parameters def_parameters_CCK = {
 	{DEF_TX0_CW_MIN_CCK, DEF_TX1_CW_MIN_CCK, DEF_TX2_CW_MIN_CCK,
 	 DEF_TX3_CW_MIN_CCK},
 	{DEF_TX0_CW_MAX_CCK, DEF_TX1_CW_MAX_CCK, DEF_TX2_CW_MAX_CCK,
@@ -164,9 +164,9 @@ static int from_priority_to_tx_queue[] = {
 
 static u32 ipw_qos_get_burst_duration(struct ipw_priv *priv);
 
-static int ipw_send_qos_params_command(struct ipw_priv *priv, struct ieee80211_qos_parameters
+static int ipw_send_qos_params_command(struct ipw_priv *priv, struct libipw_qos_parameters
 				       *qos_param);
-static int ipw_send_qos_info_command(struct ipw_priv *priv, struct ieee80211_qos_information_element
+static int ipw_send_qos_info_command(struct ipw_priv *priv, struct libipw_qos_information_element
 				     *qos_param);
 #endif				/* CONFIG_IPW2200_QOS */
 
@@ -1830,7 +1830,7 @@ static ssize_t store_speed_scan(struct device *d, struct device_attribute *attr,
 			break;
 		}
 
-		if (ieee80211_is_valid_channel(priv->ieee, channel))
+		if (libipw_is_valid_channel(priv->ieee, channel))
 			priv->speed_scan[pos++] = channel;
 		else
 			IPW_WARNING("Skipping invalid channel request: %d\n",
@@ -1882,7 +1882,7 @@ static ssize_t show_channels(struct device *d,
 			     char *buf)
 {
 	struct ipw_priv *priv = dev_get_drvdata(d);
-	const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee);
+	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	int len = 0, i;
 
 	len = sprintf(&buf[len],
@@ -1892,14 +1892,14 @@ static ssize_t show_channels(struct device *d,
 	for (i = 0; i < geo->bg_channels; i++) {
 		len += sprintf(&buf[len], "%d: BSS%s%s, %s, Band %s.\n",
 			       geo->bg[i].channel,
-			       geo->bg[i].flags & IEEE80211_CH_RADAR_DETECT ?
+			       geo->bg[i].flags & LIBIPW_CH_RADAR_DETECT ?
 			       " (radar spectrum)" : "",
-			       ((geo->bg[i].flags & IEEE80211_CH_NO_IBSS) ||
-				(geo->bg[i].flags & IEEE80211_CH_RADAR_DETECT))
+			       ((geo->bg[i].flags & LIBIPW_CH_NO_IBSS) ||
+				(geo->bg[i].flags & LIBIPW_CH_RADAR_DETECT))
 			       ? "" : ", IBSS",
-			       geo->bg[i].flags & IEEE80211_CH_PASSIVE_ONLY ?
+			       geo->bg[i].flags & LIBIPW_CH_PASSIVE_ONLY ?
 			       "passive only" : "active/passive",
-			       geo->bg[i].flags & IEEE80211_CH_B_ONLY ?
+			       geo->bg[i].flags & LIBIPW_CH_B_ONLY ?
 			       "B" : "B/G");
 	}
 
@@ -1909,12 +1909,12 @@ static ssize_t show_channels(struct device *d,
 	for (i = 0; i < geo->a_channels; i++) {
 		len += sprintf(&buf[len], "%d: BSS%s%s, %s.\n",
 			       geo->a[i].channel,
-			       geo->a[i].flags & IEEE80211_CH_RADAR_DETECT ?
+			       geo->a[i].flags & LIBIPW_CH_RADAR_DETECT ?
 			       " (radar spectrum)" : "",
-			       ((geo->a[i].flags & IEEE80211_CH_NO_IBSS) ||
-				(geo->a[i].flags & IEEE80211_CH_RADAR_DETECT))
+			       ((geo->a[i].flags & LIBIPW_CH_NO_IBSS) ||
+				(geo->a[i].flags & LIBIPW_CH_RADAR_DETECT))
 			       ? "" : ", IBSS",
-			       geo->a[i].flags & IEEE80211_CH_PASSIVE_ONLY ?
+			       geo->a[i].flags & LIBIPW_CH_PASSIVE_ONLY ?
 			       "passive only" : "active/passive");
 	}
 
@@ -2429,7 +2429,7 @@ static int ipw_send_tx_power(struct ipw_priv *priv, struct ipw_tx_power *power)
 
 static int ipw_set_tx_power(struct ipw_priv *priv)
 {
-	const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee);
+	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	struct ipw_tx_power tx_power;
 	s8 max_power;
 	int i;
@@ -2960,12 +2960,12 @@ static int ipw_fw_dma_wait(struct ipw_priv *priv)
 static void ipw_remove_current_network(struct ipw_priv *priv)
 {
 	struct list_head *element, *safe;
-	struct ieee80211_network *network = NULL;
+	struct libipw_network *network = NULL;
 	unsigned long flags;
 
 	spin_lock_irqsave(&priv->ieee->lock, flags);
 	list_for_each_safe(element, safe, &priv->ieee->network_list) {
-		network = list_entry(element, struct ieee80211_network, list);
+		network = list_entry(element, struct libipw_network, list);
 		if (!memcmp(network->bssid, priv->bssid, ETH_ALEN)) {
 			list_del(element);
 			list_add_tail(&network->list,
@@ -3751,7 +3751,7 @@ static void ipw_queue_tx_free_tfd(struct ipw_priv *priv,
 				 le16_to_cpu(bd->u.data.chunk_len[i]),
 				 PCI_DMA_TODEVICE);
 		if (txq->txb[txq->q.last_used]) {
-			ieee80211_txb_free(txq->txb[txq->q.last_used]);
+			libipw_txb_free(txq->txb[txq->q.last_used]);
 			txq->txb[txq->q.last_used] = NULL;
 		}
 	}
@@ -4070,7 +4070,7 @@ static u32 ipw_get_max_rate(struct ipw_priv *priv)
 	/* If currently associated in B mode, restrict the maximum
 	 * rate match to B rates */
 	if (priv->assoc_request.ieee_mode == IPW_B_MODE)
-		mask &= IEEE80211_CCK_RATES_MASK;
+		mask &= LIBIPW_CCK_RATES_MASK;
 
 	/* TODO: Verify that the rate is supported by the current rates
 	 * list. */
@@ -4078,29 +4078,29 @@ static u32 ipw_get_max_rate(struct ipw_priv *priv)
 	while (i && !(mask & i))
 		i >>= 1;
 	switch (i) {
-	case IEEE80211_CCK_RATE_1MB_MASK:
+	case LIBIPW_CCK_RATE_1MB_MASK:
 		return 1000000;
-	case IEEE80211_CCK_RATE_2MB_MASK:
+	case LIBIPW_CCK_RATE_2MB_MASK:
 		return 2000000;
-	case IEEE80211_CCK_RATE_5MB_MASK:
+	case LIBIPW_CCK_RATE_5MB_MASK:
 		return 5500000;
-	case IEEE80211_OFDM_RATE_6MB_MASK:
+	case LIBIPW_OFDM_RATE_6MB_MASK:
 		return 6000000;
-	case IEEE80211_OFDM_RATE_9MB_MASK:
+	case LIBIPW_OFDM_RATE_9MB_MASK:
 		return 9000000;
-	case IEEE80211_CCK_RATE_11MB_MASK:
+	case LIBIPW_CCK_RATE_11MB_MASK:
 		return 11000000;
-	case IEEE80211_OFDM_RATE_12MB_MASK:
+	case LIBIPW_OFDM_RATE_12MB_MASK:
 		return 12000000;
-	case IEEE80211_OFDM_RATE_18MB_MASK:
+	case LIBIPW_OFDM_RATE_18MB_MASK:
 		return 18000000;
-	case IEEE80211_OFDM_RATE_24MB_MASK:
+	case LIBIPW_OFDM_RATE_24MB_MASK:
 		return 24000000;
-	case IEEE80211_OFDM_RATE_36MB_MASK:
+	case LIBIPW_OFDM_RATE_36MB_MASK:
 		return 36000000;
-	case IEEE80211_OFDM_RATE_48MB_MASK:
+	case LIBIPW_OFDM_RATE_48MB_MASK:
 		return 48000000;
-	case IEEE80211_OFDM_RATE_54MB_MASK:
+	case LIBIPW_OFDM_RATE_54MB_MASK:
 		return 54000000;
 	}
 
@@ -4466,11 +4466,11 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 					     == IEEE80211_STYPE_ASSOC_RESP)) {
 						if ((sizeof
 						     (struct
-						      ieee80211_assoc_response)
+						      libipw_assoc_response)
 						     <= size)
 						    && (size <= 2314)) {
 							struct
-							ieee80211_rx_stats
+							libipw_rx_stats
 							    stats = {
 								.len = size - 1,
 							};
@@ -4478,10 +4478,10 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 							IPW_DEBUG_QOS
 							    ("QoS Associate "
 							     "size %d\n", size);
-							ieee80211_rx_mgt(priv->
+							libipw_rx_mgt(priv->
 									 ieee,
 									 (struct
-									  ieee80211_hdr_4addr
+									  libipw_hdr_4addr
 									  *)
 									 &notif->u.raw, &stats);
 						}
@@ -4537,11 +4537,11 @@ static void ipw_rx_notification(struct ipw_priv *priv,
 			case CMAS_INIT:{
 					if (priv->status & STATUS_AUTH) {
 						struct
-						    ieee80211_assoc_response
+						    libipw_assoc_response
 						*resp;
 						resp =
 						    (struct
-						     ieee80211_assoc_response
+						     libipw_assoc_response
 						     *)&notif->u.raw;
 						IPW_DEBUG(IPW_DL_NOTIF |
 							  IPW_DL_STATE |
@@ -5227,33 +5227,33 @@ static struct ipw_rx_queue *ipw_rx_queue_alloc(struct ipw_priv *priv)
 
 static int ipw_is_rate_in_mask(struct ipw_priv *priv, int ieee_mode, u8 rate)
 {
-	rate &= ~IEEE80211_BASIC_RATE_MASK;
+	rate &= ~LIBIPW_BASIC_RATE_MASK;
 	if (ieee_mode == IEEE_A) {
 		switch (rate) {
-		case IEEE80211_OFDM_RATE_6MB:
-			return priv->rates_mask & IEEE80211_OFDM_RATE_6MB_MASK ?
+		case LIBIPW_OFDM_RATE_6MB:
+			return priv->rates_mask & LIBIPW_OFDM_RATE_6MB_MASK ?
 			    1 : 0;
-		case IEEE80211_OFDM_RATE_9MB:
-			return priv->rates_mask & IEEE80211_OFDM_RATE_9MB_MASK ?
+		case LIBIPW_OFDM_RATE_9MB:
+			return priv->rates_mask & LIBIPW_OFDM_RATE_9MB_MASK ?
 			    1 : 0;
-		case IEEE80211_OFDM_RATE_12MB:
+		case LIBIPW_OFDM_RATE_12MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_12MB_MASK ? 1 : 0;
-		case IEEE80211_OFDM_RATE_18MB:
+			    rates_mask & LIBIPW_OFDM_RATE_12MB_MASK ? 1 : 0;
+		case LIBIPW_OFDM_RATE_18MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_18MB_MASK ? 1 : 0;
-		case IEEE80211_OFDM_RATE_24MB:
+			    rates_mask & LIBIPW_OFDM_RATE_18MB_MASK ? 1 : 0;
+		case LIBIPW_OFDM_RATE_24MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_24MB_MASK ? 1 : 0;
-		case IEEE80211_OFDM_RATE_36MB:
+			    rates_mask & LIBIPW_OFDM_RATE_24MB_MASK ? 1 : 0;
+		case LIBIPW_OFDM_RATE_36MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_36MB_MASK ? 1 : 0;
-		case IEEE80211_OFDM_RATE_48MB:
+			    rates_mask & LIBIPW_OFDM_RATE_36MB_MASK ? 1 : 0;
+		case LIBIPW_OFDM_RATE_48MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_48MB_MASK ? 1 : 0;
-		case IEEE80211_OFDM_RATE_54MB:
+			    rates_mask & LIBIPW_OFDM_RATE_48MB_MASK ? 1 : 0;
+		case LIBIPW_OFDM_RATE_54MB:
 			return priv->
-			    rates_mask & IEEE80211_OFDM_RATE_54MB_MASK ? 1 : 0;
+			    rates_mask & LIBIPW_OFDM_RATE_54MB_MASK ? 1 : 0;
 		default:
 			return 0;
 		}
@@ -5261,14 +5261,14 @@ static int ipw_is_rate_in_mask(struct ipw_priv *priv, int ieee_mode, u8 rate)
 
 	/* B and G mixed */
 	switch (rate) {
-	case IEEE80211_CCK_RATE_1MB:
-		return priv->rates_mask & IEEE80211_CCK_RATE_1MB_MASK ? 1 : 0;
-	case IEEE80211_CCK_RATE_2MB:
-		return priv->rates_mask & IEEE80211_CCK_RATE_2MB_MASK ? 1 : 0;
-	case IEEE80211_CCK_RATE_5MB:
-		return priv->rates_mask & IEEE80211_CCK_RATE_5MB_MASK ? 1 : 0;
-	case IEEE80211_CCK_RATE_11MB:
-		return priv->rates_mask & IEEE80211_CCK_RATE_11MB_MASK ? 1 : 0;
+	case LIBIPW_CCK_RATE_1MB:
+		return priv->rates_mask & LIBIPW_CCK_RATE_1MB_MASK ? 1 : 0;
+	case LIBIPW_CCK_RATE_2MB:
+		return priv->rates_mask & LIBIPW_CCK_RATE_2MB_MASK ? 1 : 0;
+	case LIBIPW_CCK_RATE_5MB:
+		return priv->rates_mask & LIBIPW_CCK_RATE_5MB_MASK ? 1 : 0;
+	case LIBIPW_CCK_RATE_11MB:
+		return priv->rates_mask & LIBIPW_CCK_RATE_11MB_MASK ? 1 : 0;
 	}
 
 	/* If we are limited to B modulations, bail at this point */
@@ -5277,29 +5277,29 @@ static int ipw_is_rate_in_mask(struct ipw_priv *priv, int ieee_mode, u8 rate)
 
 	/* G */
 	switch (rate) {
-	case IEEE80211_OFDM_RATE_6MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_6MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_9MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_9MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_12MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_12MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_18MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_18MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_24MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_24MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_36MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_36MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_48MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_48MB_MASK ? 1 : 0;
-	case IEEE80211_OFDM_RATE_54MB:
-		return priv->rates_mask & IEEE80211_OFDM_RATE_54MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_6MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_6MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_9MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_9MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_12MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_12MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_18MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_18MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_24MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_24MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_36MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_36MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_48MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_48MB_MASK ? 1 : 0;
+	case LIBIPW_OFDM_RATE_54MB:
+		return priv->rates_mask & LIBIPW_OFDM_RATE_54MB_MASK ? 1 : 0;
 	}
 
 	return 0;
 }
 
 static int ipw_compatible_rates(struct ipw_priv *priv,
-				const struct ieee80211_network *network,
+				const struct libipw_network *network,
 				struct ipw_supported_rates *rates)
 {
 	int num_rates, i;
@@ -5311,7 +5311,7 @@ static int ipw_compatible_rates(struct ipw_priv *priv,
 		if (!ipw_is_rate_in_mask(priv, network->mode,
 					 network->rates[i])) {
 
-			if (network->rates[i] & IEEE80211_BASIC_RATE_MASK) {
+			if (network->rates[i] & LIBIPW_BASIC_RATE_MASK) {
 				IPW_DEBUG_SCAN("Adding masked mandatory "
 					       "rate %02X\n",
 					       network->rates[i]);
@@ -5333,7 +5333,7 @@ static int ipw_compatible_rates(struct ipw_priv *priv,
 	for (i = 0; i < num_rates; i++) {
 		if (!ipw_is_rate_in_mask(priv, network->mode,
 					 network->rates_ex[i])) {
-			if (network->rates_ex[i] & IEEE80211_BASIC_RATE_MASK) {
+			if (network->rates_ex[i] & LIBIPW_BASIC_RATE_MASK) {
 				IPW_DEBUG_SCAN("Adding masked mandatory "
 					       "rate %02X\n",
 					       network->rates_ex[i]);
@@ -5369,73 +5369,73 @@ static void ipw_copy_rates(struct ipw_supported_rates *dest,
 static void ipw_add_cck_scan_rates(struct ipw_supported_rates *rates,
 				   u8 modulation, u32 rate_mask)
 {
-	u8 basic_mask = (IEEE80211_OFDM_MODULATION == modulation) ?
-	    IEEE80211_BASIC_RATE_MASK : 0;
+	u8 basic_mask = (LIBIPW_OFDM_MODULATION == modulation) ?
+	    LIBIPW_BASIC_RATE_MASK : 0;
 
-	if (rate_mask & IEEE80211_CCK_RATE_1MB_MASK)
+	if (rate_mask & LIBIPW_CCK_RATE_1MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_1MB;
+		    LIBIPW_BASIC_RATE_MASK | LIBIPW_CCK_RATE_1MB;
 
-	if (rate_mask & IEEE80211_CCK_RATE_2MB_MASK)
+	if (rate_mask & LIBIPW_CCK_RATE_2MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_BASIC_RATE_MASK | IEEE80211_CCK_RATE_2MB;
+		    LIBIPW_BASIC_RATE_MASK | LIBIPW_CCK_RATE_2MB;
 
-	if (rate_mask & IEEE80211_CCK_RATE_5MB_MASK)
+	if (rate_mask & LIBIPW_CCK_RATE_5MB_MASK)
 		rates->supported_rates[rates->num_rates++] = basic_mask |
-		    IEEE80211_CCK_RATE_5MB;
+		    LIBIPW_CCK_RATE_5MB;
 
-	if (rate_mask & IEEE80211_CCK_RATE_11MB_MASK)
+	if (rate_mask & LIBIPW_CCK_RATE_11MB_MASK)
 		rates->supported_rates[rates->num_rates++] = basic_mask |
-		    IEEE80211_CCK_RATE_11MB;
+		    LIBIPW_CCK_RATE_11MB;
 }
 
 static void ipw_add_ofdm_scan_rates(struct ipw_supported_rates *rates,
 				    u8 modulation, u32 rate_mask)
 {
-	u8 basic_mask = (IEEE80211_OFDM_MODULATION == modulation) ?
-	    IEEE80211_BASIC_RATE_MASK : 0;
+	u8 basic_mask = (LIBIPW_OFDM_MODULATION == modulation) ?
+	    LIBIPW_BASIC_RATE_MASK : 0;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_6MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_6MB_MASK)
 		rates->supported_rates[rates->num_rates++] = basic_mask |
-		    IEEE80211_OFDM_RATE_6MB;
+		    LIBIPW_OFDM_RATE_6MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_9MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_9MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_OFDM_RATE_9MB;
+		    LIBIPW_OFDM_RATE_9MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_12MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_12MB_MASK)
 		rates->supported_rates[rates->num_rates++] = basic_mask |
-		    IEEE80211_OFDM_RATE_12MB;
+		    LIBIPW_OFDM_RATE_12MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_18MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_18MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_OFDM_RATE_18MB;
+		    LIBIPW_OFDM_RATE_18MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_24MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_24MB_MASK)
 		rates->supported_rates[rates->num_rates++] = basic_mask |
-		    IEEE80211_OFDM_RATE_24MB;
+		    LIBIPW_OFDM_RATE_24MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_36MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_36MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_OFDM_RATE_36MB;
+		    LIBIPW_OFDM_RATE_36MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_48MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_48MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_OFDM_RATE_48MB;
+		    LIBIPW_OFDM_RATE_48MB;
 
-	if (rate_mask & IEEE80211_OFDM_RATE_54MB_MASK)
+	if (rate_mask & LIBIPW_OFDM_RATE_54MB_MASK)
 		rates->supported_rates[rates->num_rates++] =
-		    IEEE80211_OFDM_RATE_54MB;
+		    LIBIPW_OFDM_RATE_54MB;
 }
 
 struct ipw_network_match {
-	struct ieee80211_network *network;
+	struct libipw_network *network;
 	struct ipw_supported_rates rates;
 };
 
 static int ipw_find_adhoc_network(struct ipw_priv *priv,
 				  struct ipw_network_match *match,
-				  struct ieee80211_network *network,
+				  struct libipw_network *network,
 				  int roaming)
 {
 	struct ipw_supported_rates rates;
@@ -5556,7 +5556,7 @@ static int ipw_find_adhoc_network(struct ipw_priv *priv,
 	}
 
 	/* Filter out any incompatible freq / mode combinations */
-	if (!ieee80211_is_valid_mode(priv->ieee, network->mode)) {
+	if (!libipw_is_valid_mode(priv->ieee, network->mode)) {
 		IPW_DEBUG_MERGE("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
@@ -5606,7 +5606,7 @@ static void ipw_merge_adhoc_network(struct work_struct *work)
 	DECLARE_SSID_BUF(ssid);
 	struct ipw_priv *priv =
 		container_of(work, struct ipw_priv, merge_networks);
-	struct ieee80211_network *network = NULL;
+	struct libipw_network *network = NULL;
 	struct ipw_network_match match = {
 		.network = priv->assoc_network
 	};
@@ -5648,7 +5648,7 @@ static void ipw_merge_adhoc_network(struct work_struct *work)
 
 static int ipw_best_network(struct ipw_priv *priv,
 			    struct ipw_network_match *match,
-			    struct ieee80211_network *network, int roaming)
+			    struct libipw_network *network, int roaming)
 {
 	struct ipw_supported_rates rates;
 	DECLARE_SSID_BUF(ssid);
@@ -5782,7 +5782,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	}
 
 	/* Filter out any incompatible freq / mode combinations */
-	if (!ieee80211_is_valid_mode(priv->ieee, network->mode)) {
+	if (!libipw_is_valid_mode(priv->ieee, network->mode)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid frequency/mode "
 				"combination.\n",
@@ -5793,7 +5793,7 @@ static int ipw_best_network(struct ipw_priv *priv,
 	}
 
 	/* Filter out invalid channel in current GEO */
-	if (!ieee80211_is_valid_channel(priv->ieee, network->channel)) {
+	if (!libipw_is_valid_channel(priv->ieee, network->channel)) {
 		IPW_DEBUG_ASSOC("Network '%s (%pM)' excluded "
 				"because of invalid channel in current GEO\n",
 				print_ssid(ssid, network->ssid,
@@ -5839,9 +5839,9 @@ static int ipw_best_network(struct ipw_priv *priv,
 }
 
 static void ipw_adhoc_create(struct ipw_priv *priv,
-			     struct ieee80211_network *network)
+			     struct libipw_network *network)
 {
-	const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee);
+	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	int i;
 
 	/*
@@ -5856,25 +5856,25 @@ static void ipw_adhoc_create(struct ipw_priv *priv,
 	 * FW fatal error.
 	 *
 	 */
-	switch (ieee80211_is_valid_channel(priv->ieee, priv->channel)) {
-	case IEEE80211_52GHZ_BAND:
+	switch (libipw_is_valid_channel(priv->ieee, priv->channel)) {
+	case LIBIPW_52GHZ_BAND:
 		network->mode = IEEE_A;
-		i = ieee80211_channel_to_index(priv->ieee, priv->channel);
+		i = libipw_channel_to_index(priv->ieee, priv->channel);
 		BUG_ON(i == -1);
-		if (geo->a[i].flags & IEEE80211_CH_PASSIVE_ONLY) {
+		if (geo->a[i].flags & LIBIPW_CH_PASSIVE_ONLY) {
 			IPW_WARNING("Overriding invalid channel\n");
 			priv->channel = geo->a[0].channel;
 		}
 		break;
 
-	case IEEE80211_24GHZ_BAND:
+	case LIBIPW_24GHZ_BAND:
 		if (priv->ieee->mode & IEEE_G)
 			network->mode = IEEE_G;
 		else
 			network->mode = IEEE_B;
-		i = ieee80211_channel_to_index(priv->ieee, priv->channel);
+		i = libipw_channel_to_index(priv->ieee, priv->channel);
 		BUG_ON(i == -1);
-		if (geo->bg[i].flags & IEEE80211_CH_PASSIVE_ONLY) {
+		if (geo->bg[i].flags & LIBIPW_CH_PASSIVE_ONLY) {
 			IPW_WARNING("Overriding invalid channel\n");
 			priv->channel = geo->bg[0].channel;
 		}
@@ -6110,9 +6110,9 @@ static void ipw_set_fixed_rate(struct ipw_priv *priv, int mode)
 	 * Tx rates */
 
 	switch (priv->ieee->freq_band) {
-	case IEEE80211_52GHZ_BAND:	/* A only */
+	case LIBIPW_52GHZ_BAND:	/* A only */
 		/* IEEE_A */
-		if (priv->rates_mask & ~IEEE80211_OFDM_RATES_MASK) {
+		if (priv->rates_mask & ~LIBIPW_OFDM_RATES_MASK) {
 			/* Invalid fixed rate mask */
 			IPW_DEBUG_WX
 			    ("invalid fixed rate mask in ipw_set_fixed_rate\n");
@@ -6120,13 +6120,13 @@ static void ipw_set_fixed_rate(struct ipw_priv *priv, int mode)
 			break;
 		}
 
-		new_tx_rates >>= IEEE80211_OFDM_SHIFT_MASK_A;
+		new_tx_rates >>= LIBIPW_OFDM_SHIFT_MASK_A;
 		break;
 
 	default:		/* 2.4Ghz or Mixed */
 		/* IEEE_B */
 		if (mode == IEEE_B) {
-			if (new_tx_rates & ~IEEE80211_CCK_RATES_MASK) {
+			if (new_tx_rates & ~LIBIPW_CCK_RATES_MASK) {
 				/* Invalid fixed rate mask */
 				IPW_DEBUG_WX
 				    ("invalid fixed rate mask in ipw_set_fixed_rate\n");
@@ -6136,8 +6136,8 @@ static void ipw_set_fixed_rate(struct ipw_priv *priv, int mode)
 		}
 
 		/* IEEE_G */
-		if (new_tx_rates & ~(IEEE80211_CCK_RATES_MASK |
-				    IEEE80211_OFDM_RATES_MASK)) {
+		if (new_tx_rates & ~(LIBIPW_CCK_RATES_MASK |
+				    LIBIPW_OFDM_RATES_MASK)) {
 			/* Invalid fixed rate mask */
 			IPW_DEBUG_WX
 			    ("invalid fixed rate mask in ipw_set_fixed_rate\n");
@@ -6145,19 +6145,19 @@ static void ipw_set_fixed_rate(struct ipw_priv *priv, int mode)
 			break;
 		}
 
-		if (IEEE80211_OFDM_RATE_6MB_MASK & new_tx_rates) {
-			mask |= (IEEE80211_OFDM_RATE_6MB_MASK >> 1);
-			new_tx_rates &= ~IEEE80211_OFDM_RATE_6MB_MASK;
+		if (LIBIPW_OFDM_RATE_6MB_MASK & new_tx_rates) {
+			mask |= (LIBIPW_OFDM_RATE_6MB_MASK >> 1);
+			new_tx_rates &= ~LIBIPW_OFDM_RATE_6MB_MASK;
 		}
 
-		if (IEEE80211_OFDM_RATE_9MB_MASK & new_tx_rates) {
-			mask |= (IEEE80211_OFDM_RATE_9MB_MASK >> 1);
-			new_tx_rates &= ~IEEE80211_OFDM_RATE_9MB_MASK;
+		if (LIBIPW_OFDM_RATE_9MB_MASK & new_tx_rates) {
+			mask |= (LIBIPW_OFDM_RATE_9MB_MASK >> 1);
+			new_tx_rates &= ~LIBIPW_OFDM_RATE_9MB_MASK;
 		}
 
-		if (IEEE80211_OFDM_RATE_12MB_MASK & new_tx_rates) {
-			mask |= (IEEE80211_OFDM_RATE_12MB_MASK >> 1);
-			new_tx_rates &= ~IEEE80211_OFDM_RATE_12MB_MASK;
+		if (LIBIPW_OFDM_RATE_12MB_MASK & new_tx_rates) {
+			mask |= (LIBIPW_OFDM_RATE_12MB_MASK >> 1);
+			new_tx_rates &= ~LIBIPW_OFDM_RATE_12MB_MASK;
 		}
 
 		new_tx_rates |= mask;
@@ -6190,12 +6190,12 @@ static void ipw_add_scan_channels(struct ipw_priv *priv,
 				  int scan_type)
 {
 	int channel_index = 0;
-	const struct ieee80211_geo *geo;
+	const struct libipw_geo *geo;
 	int i;
 
-	geo = ieee80211_get_geo(priv->ieee);
+	geo = libipw_get_geo(priv->ieee);
 
-	if (priv->ieee->freq_band & IEEE80211_52GHZ_BAND) {
+	if (priv->ieee->freq_band & LIBIPW_52GHZ_BAND) {
 		int start = channel_index;
 		for (i = 0; i < geo->a_channels; i++) {
 			if ((priv->status & STATUS_ASSOCIATED) &&
@@ -6205,7 +6205,7 @@ static void ipw_add_scan_channels(struct ipw_priv *priv,
 			scan->channels_list[channel_index] = geo->a[i].channel;
 			ipw_set_scan_type(scan, channel_index,
 					  geo->a[i].
-					  flags & IEEE80211_CH_PASSIVE_ONLY ?
+					  flags & LIBIPW_CH_PASSIVE_ONLY ?
 					  IPW_SCAN_PASSIVE_FULL_DWELL_SCAN :
 					  scan_type);
 		}
@@ -6217,11 +6217,11 @@ static void ipw_add_scan_channels(struct ipw_priv *priv,
 		}
 	}
 
-	if (priv->ieee->freq_band & IEEE80211_24GHZ_BAND) {
+	if (priv->ieee->freq_band & LIBIPW_24GHZ_BAND) {
 		int start = channel_index;
 		if (priv->config & CFG_SPEED_SCAN) {
 			int index;
-			u8 channels[IEEE80211_24GHZ_CHANNELS] = {
+			u8 channels[LIBIPW_24GHZ_CHANNELS] = {
 				/* nop out the list */
 				[0] = 0
 			};
@@ -6253,11 +6253,11 @@ static void ipw_add_scan_channels(struct ipw_priv *priv,
 				channel_index++;
 				scan->channels_list[channel_index] = channel;
 				index =
-				    ieee80211_channel_to_index(priv->ieee, channel);
+				    libipw_channel_to_index(priv->ieee, channel);
 				ipw_set_scan_type(scan, channel_index,
 						  geo->bg[index].
 						  flags &
-						  IEEE80211_CH_PASSIVE_ONLY ?
+						  LIBIPW_CH_PASSIVE_ONLY ?
 						  IPW_SCAN_PASSIVE_FULL_DWELL_SCAN
 						  : scan_type);
 			}
@@ -6272,7 +6272,7 @@ static void ipw_add_scan_channels(struct ipw_priv *priv,
 				ipw_set_scan_type(scan, channel_index,
 						  geo->bg[i].
 						  flags &
-						  IEEE80211_CH_PASSIVE_ONLY ?
+						  LIBIPW_CH_PASSIVE_ONLY ?
 						  IPW_SCAN_PASSIVE_FULL_DWELL_SCAN
 						  : scan_type);
 			}
@@ -6339,7 +6339,7 @@ static int ipw_request_scan_helper(struct ipw_priv *priv, int type, int direct)
 	}
 
 	memset(&scan, 0, sizeof(scan));
-	scan.full_scan_index = cpu_to_le32(ieee80211_get_scans(priv->ieee));
+	scan.full_scan_index = cpu_to_le32(libipw_get_scans(priv->ieee));
 
 	if (type == IW_SCAN_TYPE_PASSIVE) {
 		IPW_DEBUG_WX("use passive scanning\n");
@@ -6370,13 +6370,13 @@ static int ipw_request_scan_helper(struct ipw_priv *priv, int type, int direct)
 		u8 channel;
 		u8 band = 0;
 
-		switch (ieee80211_is_valid_channel(priv->ieee, priv->channel)) {
-		case IEEE80211_52GHZ_BAND:
+		switch (libipw_is_valid_channel(priv->ieee, priv->channel)) {
+		case LIBIPW_52GHZ_BAND:
 			band = (u8) (IPW_A_MODE << 6) | 1;
 			channel = priv->channel;
 			break;
 
-		case IEEE80211_24GHZ_BAND:
+		case LIBIPW_24GHZ_BAND:
 			band = (u8) (IPW_B_MODE << 6) | 1;
 			channel = priv->channel;
 			break;
@@ -6497,8 +6497,8 @@ static int ipw_wpa_enable(struct ipw_priv *priv, int value)
 
 static int ipw_wpa_set_auth_algs(struct ipw_priv *priv, int value)
 {
-	struct ieee80211_device *ieee = priv->ieee;
-	struct ieee80211_security sec = {
+	struct libipw_device *ieee = priv->ieee;
+	struct libipw_security sec = {
 		.flags = SEC_AUTH_MODE,
 	};
 	int ret = 0;
@@ -6548,8 +6548,8 @@ static int ipw_wx_set_genie(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	u8 *buf;
 	int err = 0;
 
@@ -6584,8 +6584,8 @@ static int ipw_wx_get_genie(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	int err = 0;
 
 	if (ieee->wpa_ie_len == 0 || ieee->wpa_ie == NULL) {
@@ -6627,8 +6627,8 @@ static int ipw_wx_set_auth(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	struct iw_param *param = &wrqu->param;
 	struct lib80211_crypt_data *crypt;
 	unsigned long flags;
@@ -6679,7 +6679,7 @@ static int ipw_wx_set_auth(struct net_device *dev,
 			 * can use this to determine if the CAP_PRIVACY_ON bit should
 			 * be set.
 			 */
-			struct ieee80211_security sec = {
+			struct libipw_security sec = {
 				.flags = SEC_ENABLED,
 				.enabled = param->value,
 			};
@@ -6727,8 +6727,8 @@ static int ipw_wx_get_auth(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_device *ieee = priv->ieee;
+	struct ipw_priv *priv = libipw_priv(dev);
+	struct libipw_device *ieee = priv->ieee;
 	struct lib80211_crypt_data *crypt;
 	struct iw_param *param = &wrqu->param;
 	int ret = 0;
@@ -6786,7 +6786,7 @@ static int ipw_wx_set_encodeext(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
 
 	if (hwcrypto) {
@@ -6808,7 +6808,7 @@ static int ipw_wx_set_encodeext(struct net_device *dev,
 		}
 	}
 
-	return ieee80211_wx_set_encodeext(priv->ieee, info, wrqu, extra);
+	return libipw_wx_set_encodeext(priv->ieee, info, wrqu, extra);
 }
 
 /* SIOCGIWENCODEEXT */
@@ -6816,8 +6816,8 @@ static int ipw_wx_get_encodeext(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_encodeext(priv->ieee, info, wrqu, extra);
+	struct ipw_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_encodeext(priv->ieee, info, wrqu, extra);
 }
 
 /* SIOCSIWMLME */
@@ -6825,7 +6825,7 @@ static int ipw_wx_set_mlme(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
 	__le16 reason;
 
@@ -6875,9 +6875,9 @@ static u8 ipw_qos_current_mode(struct ipw_priv * priv)
 */
 static int ipw_qos_handle_probe_response(struct ipw_priv *priv,
 					 int active_network,
-					 struct ieee80211_network *network)
+					 struct libipw_network *network)
 {
-	u32 size = sizeof(struct ieee80211_qos_parameters);
+	u32 size = sizeof(struct libipw_qos_parameters);
 
 	if (network->capability & WLAN_CAPABILITY_IBSS)
 		network->qos_data.active = network->qos_data.supported;
@@ -6935,12 +6935,12 @@ static int ipw_qos_handle_probe_response(struct ipw_priv *priv,
 * IPW_CMD_QOS_PARAMETERS and IPW_CMD_WME_INFO
 */
 static int ipw_qos_activate(struct ipw_priv *priv,
-			    struct ieee80211_qos_data *qos_network_data)
+			    struct libipw_qos_data *qos_network_data)
 {
 	int err;
-	struct ieee80211_qos_parameters qos_parameters[QOS_QOS_SETS];
-	struct ieee80211_qos_parameters *active_one = NULL;
-	u32 size = sizeof(struct ieee80211_qos_parameters);
+	struct libipw_qos_parameters qos_parameters[QOS_QOS_SETS];
+	struct libipw_qos_parameters *active_one = NULL;
+	u32 size = sizeof(struct libipw_qos_parameters);
 	u32 burst_duration;
 	int i;
 	u8 type;
@@ -7001,7 +7001,7 @@ static int ipw_qos_activate(struct ipw_priv *priv,
 
 	IPW_DEBUG_QOS("QoS sending IPW_CMD_QOS_PARAMETERS\n");
 	err = ipw_send_qos_params_command(priv,
-					  (struct ieee80211_qos_parameters *)
+					  (struct libipw_qos_parameters *)
 					  &(qos_parameters[0]));
 	if (err)
 		IPW_DEBUG_QOS("QoS IPW_CMD_QOS_PARAMETERS failed\n");
@@ -7015,13 +7015,13 @@ static int ipw_qos_activate(struct ipw_priv *priv,
 static int ipw_qos_set_info_element(struct ipw_priv *priv)
 {
 	int ret = 0;
-	struct ieee80211_qos_information_element qos_info;
+	struct libipw_qos_information_element qos_info;
 
 	if (priv == NULL)
 		return -1;
 
 	qos_info.elementID = QOS_ELEMENT_ID;
-	qos_info.length = sizeof(struct ieee80211_qos_information_element) - 2;
+	qos_info.length = sizeof(struct libipw_qos_information_element) - 2;
 
 	qos_info.version = QOS_VERSION_1;
 	qos_info.ac_info = 0;
@@ -7041,11 +7041,11 @@ static int ipw_qos_set_info_element(struct ipw_priv *priv)
 * Set the QoS parameter with the association request structure
 */
 static int ipw_qos_association(struct ipw_priv *priv,
-			       struct ieee80211_network *network)
+			       struct libipw_network *network)
 {
 	int err = 0;
-	struct ieee80211_qos_data *qos_data = NULL;
-	struct ieee80211_qos_data ibss_data = {
+	struct libipw_qos_data *qos_data = NULL;
+	struct libipw_qos_data ibss_data = {
 		.supported = 1,
 		.active = 1,
 	};
@@ -7087,11 +7087,11 @@ static int ipw_qos_association(struct ipw_priv *priv,
 * setting
 */
 static int ipw_qos_association_resp(struct ipw_priv *priv,
-				    struct ieee80211_network *network)
+				    struct libipw_network *network)
 {
 	int ret = 0;
 	unsigned long flags;
-	u32 size = sizeof(struct ieee80211_qos_parameters);
+	u32 size = sizeof(struct libipw_qos_parameters);
 	int set_qos_param = 0;
 
 	if ((priv == NULL) || (network == NULL) ||
@@ -7107,7 +7107,7 @@ static int ipw_qos_association_resp(struct ipw_priv *priv,
 	spin_lock_irqsave(&priv->ieee->lock, flags);
 	if (network->flags & NETWORK_HAS_QOS_PARAMETERS) {
 		memcpy(&priv->assoc_network->qos_data, &network->qos_data,
-		       sizeof(struct ieee80211_qos_data));
+		       sizeof(struct libipw_qos_data));
 		priv->assoc_network->qos_data.active = 1;
 		if ((network->qos_data.old_param_count !=
 		     network->qos_data.param_count)) {
@@ -7143,7 +7143,7 @@ static u32 ipw_qos_get_burst_duration(struct ipw_priv *priv)
 	if ((priv == NULL))
 		return 0;
 
-	if (!(priv->ieee->modulation & IEEE80211_OFDM_MODULATION))
+	if (!(priv->ieee->modulation & LIBIPW_OFDM_MODULATION))
 		ret = priv->qos_data.burst_duration_CCK;
 	else
 		ret = priv->qos_data.burst_duration_OFDM;
@@ -7195,8 +7195,8 @@ static int ipw_get_tx_queue_number(struct ipw_priv *priv, u16 priority)
 static int ipw_is_qos_active(struct net_device *dev,
 			     struct sk_buff *skb)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	struct ieee80211_qos_data *qos_data = NULL;
+	struct ipw_priv *priv = libipw_priv(dev);
+	struct libipw_qos_data *qos_data = NULL;
 	int active, supported;
 	u8 *daddr = skb->data + ETH_ALEN;
 	int unicast = !is_multicast_ether_addr(daddr);
@@ -7260,10 +7260,10 @@ static void ipw_bg_qos_activate(struct work_struct *work)
 }
 
 static int ipw_handle_probe_response(struct net_device *dev,
-				     struct ieee80211_probe_response *resp,
-				     struct ieee80211_network *network)
+				     struct libipw_probe_response *resp,
+				     struct libipw_network *network)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int active_network = ((priv->status & STATUS_ASSOCIATED) &&
 			      (network == priv->assoc_network));
 
@@ -7273,10 +7273,10 @@ static int ipw_handle_probe_response(struct net_device *dev,
 }
 
 static int ipw_handle_beacon(struct net_device *dev,
-			     struct ieee80211_beacon *resp,
-			     struct ieee80211_network *network)
+			     struct libipw_beacon *resp,
+			     struct libipw_network *network)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int active_network = ((priv->status & STATUS_ASSOCIATED) &&
 			      (network == priv->assoc_network));
 
@@ -7286,22 +7286,22 @@ static int ipw_handle_beacon(struct net_device *dev,
 }
 
 static int ipw_handle_assoc_response(struct net_device *dev,
-				     struct ieee80211_assoc_response *resp,
-				     struct ieee80211_network *network)
+				     struct libipw_assoc_response *resp,
+				     struct libipw_network *network)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	ipw_qos_association_resp(priv, network);
 	return 0;
 }
 
-static int ipw_send_qos_params_command(struct ipw_priv *priv, struct ieee80211_qos_parameters
+static int ipw_send_qos_params_command(struct ipw_priv *priv, struct libipw_qos_parameters
 				       *qos_param)
 {
 	return ipw_send_cmd_pdu(priv, IPW_CMD_QOS_PARAMETERS,
 				sizeof(*qos_param) * 3, qos_param);
 }
 
-static int ipw_send_qos_info_command(struct ipw_priv *priv, struct ieee80211_qos_information_element
+static int ipw_send_qos_info_command(struct ipw_priv *priv, struct libipw_qos_information_element
 				     *qos_param)
 {
 	return ipw_send_cmd_pdu(priv, IPW_CMD_WME_INFO, sizeof(*qos_param),
@@ -7311,7 +7311,7 @@ static int ipw_send_qos_info_command(struct ipw_priv *priv, struct ieee80211_qos
 #endif				/* CONFIG_IPW2200_QOS */
 
 static int ipw_associate_network(struct ipw_priv *priv,
-				 struct ieee80211_network *network,
+				 struct libipw_network *network,
 				 struct ipw_supported_rates *rates, int roaming)
 {
 	int err;
@@ -7493,7 +7493,7 @@ static int ipw_associate_network(struct ipw_priv *priv,
 static void ipw_roam(void *data)
 {
 	struct ipw_priv *priv = data;
-	struct ieee80211_network *network = NULL;
+	struct libipw_network *network = NULL;
 	struct ipw_network_match match = {
 		.network = priv->assoc_network
 	};
@@ -7568,7 +7568,7 @@ static int ipw_associate(void *data)
 {
 	struct ipw_priv *priv = data;
 
-	struct ieee80211_network *network = NULL;
+	struct libipw_network *network = NULL;
 	struct ipw_network_match match = {
 		.network = NULL
 	};
@@ -7622,8 +7622,8 @@ static int ipw_associate(void *data)
 	    priv->config & CFG_STATIC_CHANNEL) {
 		/* Use oldest network if the free list is empty */
 		if (list_empty(&priv->ieee->network_free_list)) {
-			struct ieee80211_network *oldest = NULL;
-			struct ieee80211_network *target;
+			struct libipw_network *oldest = NULL;
+			struct libipw_network *target;
 
 			list_for_each_entry(target, &priv->ieee->network_list, list) {
 				if ((oldest == NULL) ||
@@ -7644,7 +7644,7 @@ static int ipw_associate(void *data)
 		}
 
 		element = priv->ieee->network_free_list.next;
-		network = list_entry(element, struct ieee80211_network, list);
+		network = list_entry(element, struct libipw_network, list);
 		ipw_adhoc_create(priv, network);
 		rates = &priv->rates;
 		list_del(element);
@@ -7700,18 +7700,18 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv,
 	switch (priv->ieee->sec.level) {
 	case SEC_LEVEL_3:
 		/* Remove CCMP HDR */
-		memmove(skb->data + IEEE80211_3ADDR_LEN,
-			skb->data + IEEE80211_3ADDR_LEN + 8,
-			skb->len - IEEE80211_3ADDR_LEN - 8);
+		memmove(skb->data + LIBIPW_3ADDR_LEN,
+			skb->data + LIBIPW_3ADDR_LEN + 8,
+			skb->len - LIBIPW_3ADDR_LEN - 8);
 		skb_trim(skb, skb->len - 16);	/* CCMP_HDR_LEN + CCMP_MIC_LEN */
 		break;
 	case SEC_LEVEL_2:
 		break;
 	case SEC_LEVEL_1:
 		/* Remove IV */
-		memmove(skb->data + IEEE80211_3ADDR_LEN,
-			skb->data + IEEE80211_3ADDR_LEN + 4,
-			skb->len - IEEE80211_3ADDR_LEN - 4);
+		memmove(skb->data + LIBIPW_3ADDR_LEN,
+			skb->data + LIBIPW_3ADDR_LEN + 4,
+			skb->len - LIBIPW_3ADDR_LEN - 4);
 		skb_trim(skb, skb->len - 8);	/* IV + ICV */
 		break;
 	case SEC_LEVEL_0:
@@ -7725,10 +7725,10 @@ static void ipw_rebuild_decrypted_skb(struct ipw_priv *priv,
 
 static void ipw_handle_data_packet(struct ipw_priv *priv,
 				   struct ipw_rx_mem_buffer *rxb,
-				   struct ieee80211_rx_stats *stats)
+				   struct libipw_rx_stats *stats)
 {
 	struct net_device *dev = priv->net_dev;
-	struct ieee80211_hdr_4addr *hdr;
+	struct libipw_hdr_4addr *hdr;
 	struct ipw_rx_packet *pkt = (struct ipw_rx_packet *)rxb->skb->data;
 
 	/* We received data from the HW, so stop the watchdog */
@@ -7758,15 +7758,15 @@ static void ipw_handle_data_packet(struct ipw_priv *priv,
 	IPW_DEBUG_RX("Rx packet of %d bytes.\n", rxb->skb->len);
 
 	/* HW decrypt will not clear the WEP bit, MIC, PN, etc. */
-	hdr = (struct ieee80211_hdr_4addr *)rxb->skb->data;
+	hdr = (struct libipw_hdr_4addr *)rxb->skb->data;
 	if (priv->ieee->iw_mode != IW_MODE_MONITOR &&
 	    (is_multicast_ether_addr(hdr->addr1) ?
 	     !priv->ieee->host_mc_decrypt : !priv->ieee->host_decrypt))
 		ipw_rebuild_decrypted_skb(priv, rxb->skb);
 
-	if (!ieee80211_rx(priv->ieee, rxb->skb, stats))
+	if (!libipw_rx(priv->ieee, rxb->skb, stats))
 		dev->stats.rx_errors++;
-	else {			/* ieee80211_rx succeeded, so it now owns the SKB */
+	else {			/* libipw_rx succeeded, so it now owns the SKB */
 		rxb->skb = NULL;
 		__ipw_led_activity_on(priv);
 	}
@@ -7775,7 +7775,7 @@ static void ipw_handle_data_packet(struct ipw_priv *priv,
 #ifdef CONFIG_IPW2200_RADIOTAP
 static void ipw_handle_data_packet_monitor(struct ipw_priv *priv,
 					   struct ipw_rx_mem_buffer *rxb,
-					   struct ieee80211_rx_stats *stats)
+					   struct libipw_rx_stats *stats)
 {
 	struct net_device *dev = priv->net_dev;
 	struct ipw_rx_packet *pkt = (struct ipw_rx_packet *)rxb->skb->data;
@@ -7921,9 +7921,9 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv,
 
 	IPW_DEBUG_RX("Rx packet of %d bytes.\n", rxb->skb->len);
 
-	if (!ieee80211_rx(priv->ieee, rxb->skb, stats))
+	if (!libipw_rx(priv->ieee, rxb->skb, stats))
 		dev->stats.rx_errors++;
-	else {			/* ieee80211_rx succeeded, so it now owns the SKB */
+	else {			/* libipw_rx succeeded, so it now owns the SKB */
 		rxb->skb = NULL;
 		/* no LED during capture */
 	}
@@ -7931,28 +7931,28 @@ static void ipw_handle_data_packet_monitor(struct ipw_priv *priv,
 #endif
 
 #ifdef CONFIG_IPW2200_PROMISCUOUS
-#define ieee80211_is_probe_response(fc) \
+#define libipw_is_probe_response(fc) \
    ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && \
     (fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP )
 
-#define ieee80211_is_management(fc) \
+#define libipw_is_management(fc) \
    ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT)
 
-#define ieee80211_is_control(fc) \
+#define libipw_is_control(fc) \
    ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL)
 
-#define ieee80211_is_data(fc) \
+#define libipw_is_data(fc) \
    ((fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)
 
-#define ieee80211_is_assoc_request(fc) \
+#define libipw_is_assoc_request(fc) \
    ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_ASSOC_REQ)
 
-#define ieee80211_is_reassoc_request(fc) \
+#define libipw_is_reassoc_request(fc) \
    ((fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_REASSOC_REQ)
 
 static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 				      struct ipw_rx_mem_buffer *rxb,
-				      struct ieee80211_rx_stats *stats)
+				      struct libipw_rx_stats *stats)
 {
 	struct net_device *dev = priv->prom_net_dev;
 	struct ipw_rx_packet *pkt = (struct ipw_rx_packet *)rxb->skb->data;
@@ -8002,17 +8002,17 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 	}
 
 	hdr = (void *)rxb->skb->data + IPW_RX_FRAME_SIZE;
-	if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) {
+	if (libipw_is_management(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_MGMT)
 			return;
 		if (filter & IPW_PROM_MGMT_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) {
+	} else if (libipw_is_control(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_CTL)
 			return;
 		if (filter & IPW_PROM_CTL_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) {
+	} else if (libipw_is_data(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_DATA)
 			return;
 		if (filter & IPW_PROM_DATA_HEADER_ONLY)
@@ -8030,7 +8030,7 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 	ipw_rt = (void *)skb->data;
 
 	if (hdr_only)
-		len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
+		len = libipw_get_hdrlen(le16_to_cpu(hdr->frame_control));
 
 	memcpy(ipw_rt->payload, hdr, len);
 
@@ -8127,7 +8127,7 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 
 	IPW_DEBUG_RX("Rx packet of %d bytes.\n", skb->len);
 
-	if (!ieee80211_rx(priv->prom_priv->ieee, skb, stats)) {
+	if (!libipw_rx(priv->prom_priv->ieee, skb, stats)) {
 		dev->stats.rx_errors++;
 		dev_kfree_skb_any(skb);
 	}
@@ -8135,7 +8135,7 @@ static void ipw_handle_promiscuous_rx(struct ipw_priv *priv,
 #endif
 
 static int is_network_packet(struct ipw_priv *priv,
-				    struct ieee80211_hdr_4addr *header)
+				    struct libipw_hdr_4addr *header)
 {
 	/* Filter incoming packets to determine if they are targetted toward
 	 * this network, discarding packets coming from ourselves */
@@ -8173,7 +8173,7 @@ static int is_network_packet(struct ipw_priv *priv,
 #define IPW_PACKET_RETRY_TIME HZ
 
 static  int is_duplicate_packet(struct ipw_priv *priv,
-				      struct ieee80211_hdr_4addr *header)
+				      struct libipw_hdr_4addr *header)
 {
 	u16 sc = le16_to_cpu(header->seq_ctl);
 	u16 seq = WLAN_GET_SEQ_SEQ(sc);
@@ -8247,14 +8247,14 @@ static  int is_duplicate_packet(struct ipw_priv *priv,
 
 static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
 				   struct ipw_rx_mem_buffer *rxb,
-				   struct ieee80211_rx_stats *stats)
+				   struct libipw_rx_stats *stats)
 {
 	struct sk_buff *skb = rxb->skb;
 	struct ipw_rx_packet *pkt = (struct ipw_rx_packet *)skb->data;
-	struct ieee80211_hdr_4addr *header = (struct ieee80211_hdr_4addr *)
+	struct libipw_hdr_4addr *header = (struct libipw_hdr_4addr *)
 	    (skb->data + IPW_RX_FRAME_SIZE);
 
-	ieee80211_rx_mgt(priv->ieee, header, stats);
+	libipw_rx_mgt(priv->ieee, header, stats);
 
 	if (priv->ieee->iw_mode == IW_MODE_ADHOC &&
 	    ((WLAN_FC_GET_STYPE(le16_to_cpu(header->frame_ctl)) ==
@@ -8276,12 +8276,12 @@ static void ipw_handle_mgmt_packet(struct ipw_priv *priv,
 		/* Advance past the ipw packet header to the 802.11 frame */
 		skb_pull(skb, IPW_RX_FRAME_SIZE);
 
-		/* Push the ieee80211_rx_stats before the 802.11 frame */
+		/* Push the libipw_rx_stats before the 802.11 frame */
 		memcpy(skb_push(skb, sizeof(*stats)), stats, sizeof(*stats));
 
 		skb->dev = priv->ieee->dev;
 
-		/* Point raw at the ieee80211_stats */
+		/* Point raw at the libipw_stats */
 		skb_reset_mac_header(skb);
 
 		skb->pkt_type = PACKET_OTHERHOST;
@@ -8301,7 +8301,7 @@ static void ipw_rx(struct ipw_priv *priv)
 {
 	struct ipw_rx_mem_buffer *rxb;
 	struct ipw_rx_packet *pkt;
-	struct ieee80211_hdr_4addr *header;
+	struct libipw_hdr_4addr *header;
 	u32 r, w, i;
 	u8 network_packet;
 	u8 fill_rx = 0;
@@ -8332,7 +8332,7 @@ static void ipw_rx(struct ipw_priv *priv)
 
 		switch (pkt->header.message_type) {
 		case RX_FRAME_TYPE:	/* 802.11 frame */  {
-				struct ieee80211_rx_stats stats = {
+				struct libipw_rx_stats stats = {
 					.rssi = pkt->u.frame.rssi_dbm -
 					    IPW_RSSI_TO_DBM,
 					.signal =
@@ -8347,19 +8347,19 @@ static void ipw_rx(struct ipw_priv *priv)
 					.freq =
 					    (pkt->u.frame.
 					     control & (1 << 0)) ?
-					    IEEE80211_24GHZ_BAND :
-					    IEEE80211_52GHZ_BAND,
+					    LIBIPW_24GHZ_BAND :
+					    LIBIPW_52GHZ_BAND,
 					.len = le16_to_cpu(pkt->u.frame.length),
 				};
 
 				if (stats.rssi != 0)
-					stats.mask |= IEEE80211_STATMASK_RSSI;
+					stats.mask |= LIBIPW_STATMASK_RSSI;
 				if (stats.signal != 0)
-					stats.mask |= IEEE80211_STATMASK_SIGNAL;
+					stats.mask |= LIBIPW_STATMASK_SIGNAL;
 				if (stats.noise != 0)
-					stats.mask |= IEEE80211_STATMASK_NOISE;
+					stats.mask |= LIBIPW_STATMASK_NOISE;
 				if (stats.rate != 0)
-					stats.mask |= IEEE80211_STATMASK_RATE;
+					stats.mask |= LIBIPW_STATMASK_RATE;
 
 				priv->rx_packets++;
 
@@ -8384,7 +8384,7 @@ static void ipw_rx(struct ipw_priv *priv)
 #endif
 
 				header =
-				    (struct ieee80211_hdr_4addr *)(rxb->skb->
+				    (struct libipw_hdr_4addr *)(rxb->skb->
 								   data +
 								   IPW_RX_FRAME_SIZE);
 				/* TODO: Check Ad-Hoc dest/source and make sure
@@ -8407,7 +8407,7 @@ static void ipw_rx(struct ipw_priv *priv)
 					     le16_to_cpu(pkt->u.frame.length));
 
 				if (le16_to_cpu(pkt->u.frame.length) <
-				    ieee80211_get_hdrlen(le16_to_cpu(
+				    libipw_get_hdrlen(le16_to_cpu(
 						    header->frame_ctl))) {
 					IPW_DEBUG_DROP
 					    ("Received packet is too small. "
@@ -8592,9 +8592,9 @@ static int ipw_sw_reset(struct ipw_priv *priv, int option)
 			       ": Detected Intel PRO/Wireless 2915ABG Network "
 			       "Connection\n");
 		priv->ieee->abg_true = 1;
-		band = IEEE80211_52GHZ_BAND | IEEE80211_24GHZ_BAND;
-		modulation = IEEE80211_OFDM_MODULATION |
-		    IEEE80211_CCK_MODULATION;
+		band = LIBIPW_52GHZ_BAND | LIBIPW_24GHZ_BAND;
+		modulation = LIBIPW_OFDM_MODULATION |
+		    LIBIPW_CCK_MODULATION;
 		priv->adapter = IPW_2915ABG;
 		priv->ieee->mode = IEEE_A | IEEE_G | IEEE_B;
 	} else {
@@ -8604,9 +8604,9 @@ static int ipw_sw_reset(struct ipw_priv *priv, int option)
 			       "Connection\n");
 
 		priv->ieee->abg_true = 0;
-		band = IEEE80211_24GHZ_BAND;
-		modulation = IEEE80211_OFDM_MODULATION |
-		    IEEE80211_CCK_MODULATION;
+		band = LIBIPW_24GHZ_BAND;
+		modulation = LIBIPW_OFDM_MODULATION |
+		    LIBIPW_CCK_MODULATION;
 		priv->adapter = IPW_2200BG;
 		priv->ieee->mode = IEEE_G | IEEE_B;
 	}
@@ -8614,7 +8614,7 @@ static int ipw_sw_reset(struct ipw_priv *priv, int option)
 	priv->ieee->freq_band = band;
 	priv->ieee->modulation = modulation;
 
-	priv->rates_mask = IEEE80211_DEFAULT_RATES_MASK;
+	priv->rates_mask = LIBIPW_DEFAULT_RATES_MASK;
 
 	priv->disassociate_threshold = IPW_MB_DISASSOCIATE_THRESHOLD_DEFAULT;
 	priv->roaming_threshold = IPW_MB_ROAMING_THRESHOLD_DEFAULT;
@@ -8644,7 +8644,7 @@ static int ipw_wx_get_name(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	if (priv->status & STATUS_RF_KILL_MASK)
 		strcpy(wrqu->name, "radio off");
@@ -8714,8 +8714,8 @@ static int ipw_wx_set_freq(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee);
+	struct ipw_priv *priv = libipw_priv(dev);
+	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	struct iw_freq *fwrq = &wrqu->freq;
 	int ret = 0, i;
 	u8 channel, flags;
@@ -8730,23 +8730,23 @@ static int ipw_wx_set_freq(struct net_device *dev,
 	}
 	/* if setting by freq convert to channel */
 	if (fwrq->e == 1) {
-		channel = ieee80211_freq_to_channel(priv->ieee, fwrq->m);
+		channel = libipw_freq_to_channel(priv->ieee, fwrq->m);
 		if (channel == 0)
 			return -EINVAL;
 	} else
 		channel = fwrq->m;
 
-	if (!(band = ieee80211_is_valid_channel(priv->ieee, channel)))
+	if (!(band = libipw_is_valid_channel(priv->ieee, channel)))
 		return -EINVAL;
 
 	if (priv->ieee->iw_mode == IW_MODE_ADHOC) {
-		i = ieee80211_channel_to_index(priv->ieee, channel);
+		i = libipw_channel_to_index(priv->ieee, channel);
 		if (i == -1)
 			return -EINVAL;
 
-		flags = (band == IEEE80211_24GHZ_BAND) ?
+		flags = (band == LIBIPW_24GHZ_BAND) ?
 		    geo->bg[i].flags : geo->a[i].flags;
-		if (flags & IEEE80211_CH_PASSIVE_ONLY) {
+		if (flags & LIBIPW_CH_PASSIVE_ONLY) {
 			IPW_DEBUG_WX("Invalid Ad-Hoc channel for 802.11a\n");
 			return -EINVAL;
 		}
@@ -8763,7 +8763,7 @@ static int ipw_wx_get_freq(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	wrqu->freq.e = 0;
 
@@ -8774,16 +8774,16 @@ static int ipw_wx_get_freq(struct net_device *dev,
 	    priv->status & (STATUS_ASSOCIATING | STATUS_ASSOCIATED)) {
 		int i;
 
-		i = ieee80211_channel_to_index(priv->ieee, priv->channel);
+		i = libipw_channel_to_index(priv->ieee, priv->channel);
 		BUG_ON(i == -1);
 		wrqu->freq.e = 1;
 
-		switch (ieee80211_is_valid_channel(priv->ieee, priv->channel)) {
-		case IEEE80211_52GHZ_BAND:
+		switch (libipw_is_valid_channel(priv->ieee, priv->channel)) {
+		case LIBIPW_52GHZ_BAND:
 			wrqu->freq.m = priv->ieee->geo.a[i].freq * 100000;
 			break;
 
-		case IEEE80211_24GHZ_BAND:
+		case LIBIPW_24GHZ_BAND:
 			wrqu->freq.m = priv->ieee->geo.bg[i].freq * 100000;
 			break;
 
@@ -8802,7 +8802,7 @@ static int ipw_wx_set_mode(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	IPW_DEBUG_WX("Set MODE: %d\n", wrqu->mode);
@@ -8854,7 +8854,7 @@ static int ipw_wx_get_mode(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->mode = priv->ieee->iw_mode;
 	IPW_DEBUG_WX("Get MODE -> %d\n", wrqu->mode);
@@ -8883,9 +8883,9 @@ static int ipw_wx_get_range(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct iw_range *range = (struct iw_range *)extra;
-	const struct ieee80211_geo *geo = ieee80211_get_geo(priv->ieee);
+	const struct libipw_geo *geo = libipw_get_geo(priv->ieee);
 	int i = 0, j;
 
 	wrqu->data.length = sizeof(*range);
@@ -8929,7 +8929,7 @@ static int ipw_wx_get_range(struct net_device *dev,
 	if (priv->ieee->mode & (IEEE_B | IEEE_G)) {
 		for (j = 0; j < geo->bg_channels && i < IW_MAX_FREQUENCIES; j++) {
 			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
-			    (geo->bg[j].flags & IEEE80211_CH_PASSIVE_ONLY))
+			    (geo->bg[j].flags & LIBIPW_CH_PASSIVE_ONLY))
 				continue;
 
 			range->freq[i].i = geo->bg[j].channel;
@@ -8942,7 +8942,7 @@ static int ipw_wx_get_range(struct net_device *dev,
 	if (priv->ieee->mode & IEEE_A) {
 		for (j = 0; j < geo->a_channels && i < IW_MAX_FREQUENCIES; j++) {
 			if ((priv->ieee->iw_mode == IW_MODE_ADHOC) &&
-			    (geo->a[j].flags & IEEE80211_CH_PASSIVE_ONLY))
+			    (geo->a[j].flags & LIBIPW_CH_PASSIVE_ONLY))
 				continue;
 
 			range->freq[i].i = geo->a[j].channel;
@@ -8977,7 +8977,7 @@ static int ipw_wx_set_wap(struct net_device *dev,
 			  struct iw_request_info *info,
 			  union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	static const unsigned char any[] = {
 		0xff, 0xff, 0xff, 0xff, 0xff, 0xff
@@ -9026,7 +9026,7 @@ static int ipw_wx_get_wap(struct net_device *dev,
 			  struct iw_request_info *info,
 			  union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	/* If we are associated, trying to associate, or have a statically
 	 * configured BSSID then return that; otherwise return ANY */
@@ -9048,7 +9048,7 @@ static int ipw_wx_set_essid(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
         int length;
 	DECLARE_SSID_BUF(ssid);
 
@@ -9094,7 +9094,7 @@ static int ipw_wx_get_essid(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	DECLARE_SSID_BUF(ssid);
 
 	/* If we are associated, trying to associate, or have a statically
@@ -9120,7 +9120,7 @@ static int ipw_wx_set_nick(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	IPW_DEBUG_WX("Setting nick to '%s'\n", extra);
 	if (wrqu->data.length > IW_ESSID_MAX_SIZE)
@@ -9139,7 +9139,7 @@ static int ipw_wx_get_nick(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	IPW_DEBUG_WX("Getting nick\n");
 	mutex_lock(&priv->mutex);
 	wrqu->data.length = strlen(priv->nick);
@@ -9153,7 +9153,7 @@ static int ipw_wx_set_sens(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	IPW_DEBUG_WX("Setting roaming threshold to %d\n", wrqu->sens.value);
@@ -9183,7 +9183,7 @@ static int ipw_wx_get_sens(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->sens.fixed = 1;
 	wrqu->sens.value = priv->roaming_threshold;
@@ -9200,7 +9200,7 @@ static int ipw_wx_set_rate(struct net_device *dev,
 			   union iwreq_data *wrqu, char *extra)
 {
 	/* TODO: We should use semaphores or locks for access to priv */
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	u32 target_rate = wrqu->bitrate.value;
 	u32 fixed, mask;
 
@@ -9210,7 +9210,7 @@ static int ipw_wx_set_rate(struct net_device *dev,
 
 	if (target_rate == -1) {
 		fixed = 0;
-		mask = IEEE80211_DEFAULT_RATES_MASK;
+		mask = LIBIPW_DEFAULT_RATES_MASK;
 		/* Now we should reassociate */
 		goto apply;
 	}
@@ -9219,62 +9219,62 @@ static int ipw_wx_set_rate(struct net_device *dev,
 	fixed = wrqu->bitrate.fixed;
 
 	if (target_rate == 1000000 || !fixed)
-		mask |= IEEE80211_CCK_RATE_1MB_MASK;
+		mask |= LIBIPW_CCK_RATE_1MB_MASK;
 	if (target_rate == 1000000)
 		goto apply;
 
 	if (target_rate == 2000000 || !fixed)
-		mask |= IEEE80211_CCK_RATE_2MB_MASK;
+		mask |= LIBIPW_CCK_RATE_2MB_MASK;
 	if (target_rate == 2000000)
 		goto apply;
 
 	if (target_rate == 5500000 || !fixed)
-		mask |= IEEE80211_CCK_RATE_5MB_MASK;
+		mask |= LIBIPW_CCK_RATE_5MB_MASK;
 	if (target_rate == 5500000)
 		goto apply;
 
 	if (target_rate == 6000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_6MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_6MB_MASK;
 	if (target_rate == 6000000)
 		goto apply;
 
 	if (target_rate == 9000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_9MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_9MB_MASK;
 	if (target_rate == 9000000)
 		goto apply;
 
 	if (target_rate == 11000000 || !fixed)
-		mask |= IEEE80211_CCK_RATE_11MB_MASK;
+		mask |= LIBIPW_CCK_RATE_11MB_MASK;
 	if (target_rate == 11000000)
 		goto apply;
 
 	if (target_rate == 12000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_12MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_12MB_MASK;
 	if (target_rate == 12000000)
 		goto apply;
 
 	if (target_rate == 18000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_18MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_18MB_MASK;
 	if (target_rate == 18000000)
 		goto apply;
 
 	if (target_rate == 24000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_24MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_24MB_MASK;
 	if (target_rate == 24000000)
 		goto apply;
 
 	if (target_rate == 36000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_36MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_36MB_MASK;
 	if (target_rate == 36000000)
 		goto apply;
 
 	if (target_rate == 48000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_48MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_48MB_MASK;
 	if (target_rate == 48000000)
 		goto apply;
 
 	if (target_rate == 54000000 || !fixed)
-		mask |= IEEE80211_OFDM_RATE_54MB_MASK;
+		mask |= LIBIPW_OFDM_RATE_54MB_MASK;
 	if (target_rate == 54000000)
 		goto apply;
 
@@ -9285,7 +9285,7 @@ static int ipw_wx_set_rate(struct net_device *dev,
 	IPW_DEBUG_WX("Setting rate mask to 0x%08X [%s]\n",
 		     mask, fixed ? "fixed" : "sub-rates");
 	mutex_lock(&priv->mutex);
-	if (mask == IEEE80211_DEFAULT_RATES_MASK) {
+	if (mask == LIBIPW_DEFAULT_RATES_MASK) {
 		priv->config &= ~CFG_FIXED_RATE;
 		ipw_set_fixed_rate(priv, priv->ieee->mode);
 	} else
@@ -9312,7 +9312,7 @@ static int ipw_wx_get_rate(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->bitrate.value = priv->last_rate;
 	wrqu->bitrate.fixed = (priv->config & CFG_FIXED_RATE) ? 1 : 0;
@@ -9325,7 +9325,7 @@ static int ipw_wx_set_rts(struct net_device *dev,
 			  struct iw_request_info *info,
 			  union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	if (wrqu->rts.disabled || !wrqu->rts.fixed)
 		priv->rts_threshold = DEFAULT_RTS_THRESHOLD;
@@ -9348,7 +9348,7 @@ static int ipw_wx_get_rts(struct net_device *dev,
 			  struct iw_request_info *info,
 			  union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->rts.value = priv->rts_threshold;
 	wrqu->rts.fixed = 0;	/* no auto select */
@@ -9362,7 +9362,7 @@ static int ipw_wx_set_txpow(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int err = 0;
 
 	mutex_lock(&priv->mutex);
@@ -9396,7 +9396,7 @@ static int ipw_wx_get_txpow(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->power.value = priv->tx_power;
 	wrqu->power.fixed = 1;
@@ -9414,7 +9414,7 @@ static int ipw_wx_set_frag(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	if (wrqu->frag.disabled || !wrqu->frag.fixed)
 		priv->ieee->fts = DEFAULT_FTS;
@@ -9438,7 +9438,7 @@ static int ipw_wx_get_frag(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	wrqu->frag.value = priv->ieee->fts;
 	wrqu->frag.fixed = 0;	/* no auto select */
@@ -9453,7 +9453,7 @@ static int ipw_wx_set_retry(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	if (wrqu->retry.flags & IW_RETRY_LIFETIME || wrqu->retry.disabled)
 		return -EINVAL;
@@ -9486,7 +9486,7 @@ static int ipw_wx_get_retry(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 
 	mutex_lock(&priv->mutex);
 	wrqu->retry.disabled = 0;
@@ -9517,7 +9517,7 @@ static int ipw_wx_set_scan(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct iw_scan_req *req = (struct iw_scan_req *)extra;
 	struct delayed_work *work = NULL;
 
@@ -9553,20 +9553,20 @@ static int ipw_wx_get_scan(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_scan(priv->ieee, info, wrqu, extra);
+	struct ipw_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_scan(priv->ieee, info, wrqu, extra);
 }
 
 static int ipw_wx_set_encode(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *wrqu, char *key)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int ret;
 	u32 cap = priv->capability;
 
 	mutex_lock(&priv->mutex);
-	ret = ieee80211_wx_set_encode(priv->ieee, info, wrqu, key);
+	ret = libipw_wx_set_encode(priv->ieee, info, wrqu, key);
 
 	/* In IBSS mode, we need to notify the firmware to update
 	 * the beacon info after we changed the capability. */
@@ -9583,15 +9583,15 @@ static int ipw_wx_get_encode(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *wrqu, char *key)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
-	return ieee80211_wx_get_encode(priv->ieee, info, wrqu, key);
+	struct ipw_priv *priv = libipw_priv(dev);
+	return libipw_wx_get_encode(priv->ieee, info, wrqu, key);
 }
 
 static int ipw_wx_set_power(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int err;
 	mutex_lock(&priv->mutex);
 	if (wrqu->power.disabled) {
@@ -9642,7 +9642,7 @@ static int ipw_wx_get_power(struct net_device *dev,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	if (!(priv->power_mode & IPW_POWER_ENABLED))
 		wrqu->power.disabled = 1;
@@ -9659,7 +9659,7 @@ static int ipw_wx_set_powermode(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int mode = *(int *)extra;
 	int err;
 
@@ -9685,7 +9685,7 @@ static int ipw_wx_get_powermode(struct net_device *dev,
 				struct iw_request_info *info,
 				union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int level = IPW_POWER_LEVEL(priv->power_mode);
 	char *p = extra;
 
@@ -9717,7 +9717,7 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int mode = *(int *)extra;
 	u8 band = 0, modulation = 0;
 
@@ -9729,8 +9729,8 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev,
 	if (priv->adapter == IPW_2915ABG) {
 		priv->ieee->abg_true = 1;
 		if (mode & IEEE_A) {
-			band |= IEEE80211_52GHZ_BAND;
-			modulation |= IEEE80211_OFDM_MODULATION;
+			band |= LIBIPW_52GHZ_BAND;
+			modulation |= LIBIPW_OFDM_MODULATION;
 		} else
 			priv->ieee->abg_true = 0;
 	} else {
@@ -9745,14 +9745,14 @@ static int ipw_wx_set_wireless_mode(struct net_device *dev,
 	}
 
 	if (mode & IEEE_B) {
-		band |= IEEE80211_24GHZ_BAND;
-		modulation |= IEEE80211_CCK_MODULATION;
+		band |= LIBIPW_24GHZ_BAND;
+		modulation |= LIBIPW_CCK_MODULATION;
 	} else
 		priv->ieee->abg_true = 0;
 
 	if (mode & IEEE_G) {
-		band |= IEEE80211_24GHZ_BAND;
-		modulation |= IEEE80211_OFDM_MODULATION;
+		band |= LIBIPW_24GHZ_BAND;
+		modulation |= LIBIPW_OFDM_MODULATION;
 	} else
 		priv->ieee->abg_true = 0;
 
@@ -9782,7 +9782,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev,
 				    struct iw_request_info *info,
 				    union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	switch (priv->ieee->mode) {
 	case IEEE_A:
@@ -9823,7 +9823,7 @@ static int ipw_wx_set_preamble(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int mode = *(int *)extra;
 	mutex_lock(&priv->mutex);
 	/* Switching from SHORT -> LONG requires a disassociation */
@@ -9856,7 +9856,7 @@ static int ipw_wx_get_preamble(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 	if (priv->config & CFG_PREAMBLE_LONG)
 		snprintf(wrqu->name, IFNAMSIZ, "long (1)");
@@ -9871,7 +9871,7 @@ static int ipw_wx_set_monitor(struct net_device *dev,
 			      struct iw_request_info *info,
 			      union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int *parms = (int *)extra;
 	int enable = (parms[0] > 0);
 	mutex_lock(&priv->mutex);
@@ -9905,7 +9905,7 @@ static int ipw_wx_reset(struct net_device *dev,
 			struct iw_request_info *info,
 			union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	IPW_DEBUG_WX("RESET\n");
 	queue_work(priv->workqueue, &priv->adapter_restart);
 	return 0;
@@ -9915,7 +9915,7 @@ static int ipw_wx_sw_reset(struct net_device *dev,
 			   struct iw_request_info *info,
 			   union iwreq_data *wrqu, char *extra)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	union iwreq_data wrqu_sec = {
 		.encoding = {
 			     .flags = IW_ENCODE_DISABLED,
@@ -9938,7 +9938,7 @@ static int ipw_wx_sw_reset(struct net_device *dev,
 	ipw_radio_kill_sw(priv, priv->status & STATUS_RF_KILL_SW);
 
 	mutex_unlock(&priv->mutex);
-	ieee80211_wx_set_encode(priv->ieee, info, &wrqu_sec, NULL);
+	libipw_wx_set_encode(priv->ieee, info, &wrqu_sec, NULL);
 	mutex_lock(&priv->mutex);
 
 	if (!(priv->status & STATUS_RF_KILL_MASK)) {
@@ -10083,7 +10083,7 @@ static struct iw_handler_def ipw_wx_handler_def = {
  */
 static struct iw_statistics *ipw_get_wireless_stats(struct net_device *dev)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct iw_statistics *wstats;
 
 	wstats = &priv->wstats;
@@ -10164,13 +10164,13 @@ static int ipw_net_stop(struct net_device *dev)
 todo:
 
 modify to send one tfd per fragment instead of using chunking.  otherwise
-we need to heavily modify the ieee80211_skb_to_txb.
+we need to heavily modify the libipw_skb_to_txb.
 */
 
-static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb,
+static int ipw_tx_skb(struct ipw_priv *priv, struct libipw_txb *txb,
 			     int pri)
 {
-	struct ieee80211_hdr_3addrqos *hdr = (struct ieee80211_hdr_3addrqos *)
+	struct libipw_hdr_3addrqos *hdr = (struct libipw_hdr_3addrqos *)
 	    txb->fragments[0]->data;
 	int i = 0;
 	struct tfd_frame *tfd;
@@ -10187,7 +10187,7 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb,
 	if (!(priv->status & STATUS_ASSOCIATED))
 		goto drop;
 
-	hdr_len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+	hdr_len = libipw_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
 	switch (priv->ieee->iw_mode) {
 	case IW_MODE_ADHOC:
 		unicast = !is_multicast_ether_addr(hdr->addr1);
@@ -10356,13 +10356,13 @@ static int ipw_tx_skb(struct ipw_priv *priv, struct ieee80211_txb *txb,
 
       drop:
 	IPW_DEBUG_DROP("Silently dropping Tx packet.\n");
-	ieee80211_txb_free(txb);
+	libipw_txb_free(txb);
 	return NETDEV_TX_OK;
 }
 
 static int ipw_net_is_queue_full(struct net_device *dev, int pri)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 #ifdef CONFIG_IPW2200_QOS
 	int tx_id = ipw_get_tx_queue_number(priv, pri);
 	struct clx2_tx_queue *txq = &priv->txq[tx_id];
@@ -10378,9 +10378,9 @@ static int ipw_net_is_queue_full(struct net_device *dev, int pri)
 
 #ifdef CONFIG_IPW2200_PROMISCUOUS
 static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
-				      struct ieee80211_txb *txb)
+				      struct libipw_txb *txb)
 {
-	struct ieee80211_rx_stats dummystats;
+	struct libipw_rx_stats dummystats;
 	struct ieee80211_hdr *hdr;
 	u8 n;
 	u16 filter = priv->prom_priv->filter;
@@ -10393,17 +10393,17 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 	/* Filtering of fragment chains is done agains the first fragment */
 	hdr = (void *)txb->fragments[0]->data;
-	if (ieee80211_is_management(le16_to_cpu(hdr->frame_control))) {
+	if (libipw_is_management(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_MGMT)
 			return;
 		if (filter & IPW_PROM_MGMT_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_control(le16_to_cpu(hdr->frame_control))) {
+	} else if (libipw_is_control(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_CTL)
 			return;
 		if (filter & IPW_PROM_CTL_HEADER_ONLY)
 			hdr_only = 1;
-	} else if (ieee80211_is_data(le16_to_cpu(hdr->frame_control))) {
+	} else if (libipw_is_data(le16_to_cpu(hdr->frame_control))) {
 		if (filter & IPW_PROM_NO_DATA)
 			return;
 		if (filter & IPW_PROM_DATA_HEADER_ONLY)
@@ -10418,7 +10418,7 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		if (hdr_only) {
 			hdr = (void *)src->data;
-			len = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_control));
+			len = libipw_get_hdrlen(le16_to_cpu(hdr->frame_control));
 		} else
 			len = src->len;
 
@@ -10452,16 +10452,16 @@ static void ipw_handle_promiscuous_tx(struct ipw_priv *priv,
 
 		skb_copy_from_linear_data(src, skb_put(dst, len), len);
 
-		if (!ieee80211_rx(priv->prom_priv->ieee, dst, &dummystats))
+		if (!libipw_rx(priv->prom_priv->ieee, dst, &dummystats))
 			dev_kfree_skb_any(dst);
 	}
 }
 #endif
 
-static int ipw_net_hard_start_xmit(struct ieee80211_txb *txb,
+static int ipw_net_hard_start_xmit(struct libipw_txb *txb,
 				   struct net_device *dev, int pri)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	unsigned long flags;
 	int ret;
 
@@ -10488,7 +10488,7 @@ static void ipw_net_set_multicast_list(struct net_device *dev)
 
 static int ipw_net_set_mac_address(struct net_device *dev, void *p)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	struct sockaddr *addr = p;
 
 	if (!is_valid_ether_addr(addr->sa_data))
@@ -10506,7 +10506,7 @@ static int ipw_net_set_mac_address(struct net_device *dev, void *p)
 static void ipw_ethtool_get_drvinfo(struct net_device *dev,
 				    struct ethtool_drvinfo *info)
 {
-	struct ipw_priv *p = ieee80211_priv(dev);
+	struct ipw_priv *p = libipw_priv(dev);
 	char vers[64];
 	char date[32];
 	u32 len;
@@ -10527,7 +10527,7 @@ static void ipw_ethtool_get_drvinfo(struct net_device *dev,
 
 static u32 ipw_ethtool_get_link(struct net_device *dev)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	return (priv->status & STATUS_ASSOCIATED) != 0;
 }
 
@@ -10539,7 +10539,7 @@ static int ipw_ethtool_get_eeprom_len(struct net_device *dev)
 static int ipw_ethtool_get_eeprom(struct net_device *dev,
 				  struct ethtool_eeprom *eeprom, u8 * bytes)
 {
-	struct ipw_priv *p = ieee80211_priv(dev);
+	struct ipw_priv *p = libipw_priv(dev);
 
 	if (eeprom->offset + eeprom->len > IPW_EEPROM_IMAGE_SIZE)
 		return -EINVAL;
@@ -10552,7 +10552,7 @@ static int ipw_ethtool_get_eeprom(struct net_device *dev,
 static int ipw_ethtool_set_eeprom(struct net_device *dev,
 				  struct ethtool_eeprom *eeprom, u8 * bytes)
 {
-	struct ipw_priv *p = ieee80211_priv(dev);
+	struct ipw_priv *p = libipw_priv(dev);
 	int i;
 
 	if (eeprom->offset + eeprom->len > IPW_EEPROM_IMAGE_SIZE)
@@ -10768,9 +10768,9 @@ static int __devinit ipw_setup_deferred_work(struct ipw_priv *priv)
 }
 
 static void shim__set_security(struct net_device *dev,
-			       struct ieee80211_security *sec)
+			       struct libipw_security *sec)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	int i;
 	for (i = 0; i < 4; i++) {
 		if (sec->flags & (1 << i)) {
@@ -10855,21 +10855,21 @@ static int init_supported_rates(struct ipw_priv *priv,
 	memset(rates, 0, sizeof(*rates));
 	/* configure supported rates */
 	switch (priv->ieee->freq_band) {
-	case IEEE80211_52GHZ_BAND:
+	case LIBIPW_52GHZ_BAND:
 		rates->ieee_mode = IPW_A_MODE;
 		rates->purpose = IPW_RATE_CAPABILITIES;
-		ipw_add_ofdm_scan_rates(rates, IEEE80211_CCK_MODULATION,
-					IEEE80211_OFDM_DEFAULT_RATES_MASK);
+		ipw_add_ofdm_scan_rates(rates, LIBIPW_CCK_MODULATION,
+					LIBIPW_OFDM_DEFAULT_RATES_MASK);
 		break;
 
 	default:		/* Mixed or 2.4Ghz */
 		rates->ieee_mode = IPW_G_MODE;
 		rates->purpose = IPW_RATE_CAPABILITIES;
-		ipw_add_cck_scan_rates(rates, IEEE80211_CCK_MODULATION,
-				       IEEE80211_CCK_DEFAULT_RATES_MASK);
-		if (priv->ieee->modulation & IEEE80211_OFDM_MODULATION) {
-			ipw_add_ofdm_scan_rates(rates, IEEE80211_CCK_MODULATION,
-						IEEE80211_OFDM_DEFAULT_RATES_MASK);
+		ipw_add_cck_scan_rates(rates, LIBIPW_CCK_MODULATION,
+				       LIBIPW_CCK_DEFAULT_RATES_MASK);
+		if (priv->ieee->modulation & LIBIPW_OFDM_MODULATION) {
+			ipw_add_ofdm_scan_rates(rates, LIBIPW_CCK_MODULATION,
+						LIBIPW_OFDM_DEFAULT_RATES_MASK);
 		}
 		break;
 	}
@@ -10975,7 +10975,7 @@ static int ipw_config(struct ipw_priv *priv)
  * table.
  *
  */
-static const struct ieee80211_geo ipw_geos[] = {
+static const struct libipw_geo ipw_geos[] = {
 	{			/* Restricted */
 	 "---",
 	 .bg_channels = 11,
@@ -10997,10 +10997,10 @@ static const struct ieee80211_geo ipw_geos[] = {
 	       {5200, 40},
 	       {5220, 44},
 	       {5240, 48},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY}},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY}},
 	 },
 
 	{			/* Rest of World */
@@ -11025,10 +11025,10 @@ static const struct ieee80211_geo ipw_geos[] = {
 	       {5200, 40},
 	       {5220, 44},
 	       {5240, 48},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY},
 	       {5745, 149},
 	       {5765, 153},
 	       {5785, 157},
@@ -11048,15 +11048,15 @@ static const struct ieee80211_geo ipw_geos[] = {
 	       {5200, 40},
 	       {5220, 44},
 	       {5240, 48},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY},
-	       {5745, 149, IEEE80211_CH_PASSIVE_ONLY},
-	       {5765, 153, IEEE80211_CH_PASSIVE_ONLY},
-	       {5785, 157, IEEE80211_CH_PASSIVE_ONLY},
-	       {5805, 161, IEEE80211_CH_PASSIVE_ONLY},
-	       {5825, 165, IEEE80211_CH_PASSIVE_ONLY}},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY},
+	       {5745, 149, LIBIPW_CH_PASSIVE_ONLY},
+	       {5765, 153, LIBIPW_CH_PASSIVE_ONLY},
+	       {5785, 157, LIBIPW_CH_PASSIVE_ONLY},
+	       {5805, 161, LIBIPW_CH_PASSIVE_ONLY},
+	       {5825, 165, LIBIPW_CH_PASSIVE_ONLY}},
 	 },
 
 	{			/* Custom Japan */
@@ -11093,21 +11093,21 @@ static const struct ieee80211_geo ipw_geos[] = {
 	       {5200, 40},
 	       {5220, 44},
 	       {5240, 48},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY},
-	       {5500, 100, IEEE80211_CH_PASSIVE_ONLY},
-	       {5520, 104, IEEE80211_CH_PASSIVE_ONLY},
-	       {5540, 108, IEEE80211_CH_PASSIVE_ONLY},
-	       {5560, 112, IEEE80211_CH_PASSIVE_ONLY},
-	       {5580, 116, IEEE80211_CH_PASSIVE_ONLY},
-	       {5600, 120, IEEE80211_CH_PASSIVE_ONLY},
-	       {5620, 124, IEEE80211_CH_PASSIVE_ONLY},
-	       {5640, 128, IEEE80211_CH_PASSIVE_ONLY},
-	       {5660, 132, IEEE80211_CH_PASSIVE_ONLY},
-	       {5680, 136, IEEE80211_CH_PASSIVE_ONLY},
-	       {5700, 140, IEEE80211_CH_PASSIVE_ONLY}},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY},
+	       {5500, 100, LIBIPW_CH_PASSIVE_ONLY},
+	       {5520, 104, LIBIPW_CH_PASSIVE_ONLY},
+	       {5540, 108, LIBIPW_CH_PASSIVE_ONLY},
+	       {5560, 112, LIBIPW_CH_PASSIVE_ONLY},
+	       {5580, 116, LIBIPW_CH_PASSIVE_ONLY},
+	       {5600, 120, LIBIPW_CH_PASSIVE_ONLY},
+	       {5620, 124, LIBIPW_CH_PASSIVE_ONLY},
+	       {5640, 128, LIBIPW_CH_PASSIVE_ONLY},
+	       {5660, 132, LIBIPW_CH_PASSIVE_ONLY},
+	       {5680, 136, LIBIPW_CH_PASSIVE_ONLY},
+	       {5700, 140, LIBIPW_CH_PASSIVE_ONLY}},
 	 },
 
 	{			/* Custom Japan */
@@ -11117,7 +11117,7 @@ static const struct ieee80211_geo ipw_geos[] = {
 		{2427, 4}, {2432, 5}, {2437, 6},
 		{2442, 7}, {2447, 8}, {2452, 9},
 		{2457, 10}, {2462, 11}, {2467, 12},
-		{2472, 13}, {2484, 14, IEEE80211_CH_B_ONLY}},
+		{2472, 13}, {2484, 14, LIBIPW_CH_B_ONLY}},
 	 .a_channels = 4,
 	 .a = {{5170, 34}, {5190, 38},
 	       {5210, 42}, {5230, 46}},
@@ -11130,8 +11130,8 @@ static const struct ieee80211_geo ipw_geos[] = {
 		{2427, 4}, {2432, 5}, {2437, 6},
 		{2442, 7}, {2447, 8}, {2452, 9},
 		{2457, 10}, {2462, 11}, {2467, 12},
-		{2472, 13}, {2484, 14, IEEE80211_CH_B_ONLY |
-			     IEEE80211_CH_PASSIVE_ONLY}},
+		{2472, 13}, {2484, 14, LIBIPW_CH_B_ONLY |
+			     LIBIPW_CH_PASSIVE_ONLY}},
 	 },
 
 	{			/* High Band */
@@ -11141,8 +11141,8 @@ static const struct ieee80211_geo ipw_geos[] = {
 		{2427, 4}, {2432, 5}, {2437, 6},
 		{2442, 7}, {2447, 8}, {2452, 9},
 		{2457, 10}, {2462, 11},
-		{2467, 12, IEEE80211_CH_PASSIVE_ONLY},
-		{2472, 13, IEEE80211_CH_PASSIVE_ONLY}},
+		{2467, 12, LIBIPW_CH_PASSIVE_ONLY},
+		{2472, 13, LIBIPW_CH_PASSIVE_ONLY}},
 	 .a_channels = 4,
 	 .a = {{5745, 149}, {5765, 153},
 	       {5785, 157}, {5805, 161}},
@@ -11168,33 +11168,33 @@ static const struct ieee80211_geo ipw_geos[] = {
 		{2427, 4}, {2432, 5}, {2437, 6},
 		{2442, 7}, {2447, 8}, {2452, 9},
 		{2457, 10}, {2462, 11},
-		{2467, 12, IEEE80211_CH_PASSIVE_ONLY},
-		{2472, 13, IEEE80211_CH_PASSIVE_ONLY}},
+		{2467, 12, LIBIPW_CH_PASSIVE_ONLY},
+		{2472, 13, LIBIPW_CH_PASSIVE_ONLY}},
 	 .a_channels = 24,
-	 .a = {{5180, 36, IEEE80211_CH_PASSIVE_ONLY},
-	       {5200, 40, IEEE80211_CH_PASSIVE_ONLY},
-	       {5220, 44, IEEE80211_CH_PASSIVE_ONLY},
-	       {5240, 48, IEEE80211_CH_PASSIVE_ONLY},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY},
-	       {5500, 100, IEEE80211_CH_PASSIVE_ONLY},
-	       {5520, 104, IEEE80211_CH_PASSIVE_ONLY},
-	       {5540, 108, IEEE80211_CH_PASSIVE_ONLY},
-	       {5560, 112, IEEE80211_CH_PASSIVE_ONLY},
-	       {5580, 116, IEEE80211_CH_PASSIVE_ONLY},
-	       {5600, 120, IEEE80211_CH_PASSIVE_ONLY},
-	       {5620, 124, IEEE80211_CH_PASSIVE_ONLY},
-	       {5640, 128, IEEE80211_CH_PASSIVE_ONLY},
-	       {5660, 132, IEEE80211_CH_PASSIVE_ONLY},
-	       {5680, 136, IEEE80211_CH_PASSIVE_ONLY},
-	       {5700, 140, IEEE80211_CH_PASSIVE_ONLY},
-	       {5745, 149, IEEE80211_CH_PASSIVE_ONLY},
-	       {5765, 153, IEEE80211_CH_PASSIVE_ONLY},
-	       {5785, 157, IEEE80211_CH_PASSIVE_ONLY},
-	       {5805, 161, IEEE80211_CH_PASSIVE_ONLY},
-	       {5825, 165, IEEE80211_CH_PASSIVE_ONLY}},
+	 .a = {{5180, 36, LIBIPW_CH_PASSIVE_ONLY},
+	       {5200, 40, LIBIPW_CH_PASSIVE_ONLY},
+	       {5220, 44, LIBIPW_CH_PASSIVE_ONLY},
+	       {5240, 48, LIBIPW_CH_PASSIVE_ONLY},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY},
+	       {5500, 100, LIBIPW_CH_PASSIVE_ONLY},
+	       {5520, 104, LIBIPW_CH_PASSIVE_ONLY},
+	       {5540, 108, LIBIPW_CH_PASSIVE_ONLY},
+	       {5560, 112, LIBIPW_CH_PASSIVE_ONLY},
+	       {5580, 116, LIBIPW_CH_PASSIVE_ONLY},
+	       {5600, 120, LIBIPW_CH_PASSIVE_ONLY},
+	       {5620, 124, LIBIPW_CH_PASSIVE_ONLY},
+	       {5640, 128, LIBIPW_CH_PASSIVE_ONLY},
+	       {5660, 132, LIBIPW_CH_PASSIVE_ONLY},
+	       {5680, 136, LIBIPW_CH_PASSIVE_ONLY},
+	       {5700, 140, LIBIPW_CH_PASSIVE_ONLY},
+	       {5745, 149, LIBIPW_CH_PASSIVE_ONLY},
+	       {5765, 153, LIBIPW_CH_PASSIVE_ONLY},
+	       {5785, 157, LIBIPW_CH_PASSIVE_ONLY},
+	       {5805, 161, LIBIPW_CH_PASSIVE_ONLY},
+	       {5825, 165, LIBIPW_CH_PASSIVE_ONLY}},
 	 },
 
 	{			/* Europe */
@@ -11205,19 +11205,19 @@ static const struct ieee80211_geo ipw_geos[] = {
 		{2442, 7}, {2447, 8}, {2452, 9},
 		{2457, 10}, {2462, 11}},
 	 .a_channels = 13,
-	 .a = {{5180, 36, IEEE80211_CH_PASSIVE_ONLY},
-	       {5200, 40, IEEE80211_CH_PASSIVE_ONLY},
-	       {5220, 44, IEEE80211_CH_PASSIVE_ONLY},
-	       {5240, 48, IEEE80211_CH_PASSIVE_ONLY},
-	       {5260, 52, IEEE80211_CH_PASSIVE_ONLY},
-	       {5280, 56, IEEE80211_CH_PASSIVE_ONLY},
-	       {5300, 60, IEEE80211_CH_PASSIVE_ONLY},
-	       {5320, 64, IEEE80211_CH_PASSIVE_ONLY},
-	       {5745, 149, IEEE80211_CH_PASSIVE_ONLY},
-	       {5765, 153, IEEE80211_CH_PASSIVE_ONLY},
-	       {5785, 157, IEEE80211_CH_PASSIVE_ONLY},
-	       {5805, 161, IEEE80211_CH_PASSIVE_ONLY},
-	       {5825, 165, IEEE80211_CH_PASSIVE_ONLY}},
+	 .a = {{5180, 36, LIBIPW_CH_PASSIVE_ONLY},
+	       {5200, 40, LIBIPW_CH_PASSIVE_ONLY},
+	       {5220, 44, LIBIPW_CH_PASSIVE_ONLY},
+	       {5240, 48, LIBIPW_CH_PASSIVE_ONLY},
+	       {5260, 52, LIBIPW_CH_PASSIVE_ONLY},
+	       {5280, 56, LIBIPW_CH_PASSIVE_ONLY},
+	       {5300, 60, LIBIPW_CH_PASSIVE_ONLY},
+	       {5320, 64, LIBIPW_CH_PASSIVE_ONLY},
+	       {5745, 149, LIBIPW_CH_PASSIVE_ONLY},
+	       {5765, 153, LIBIPW_CH_PASSIVE_ONLY},
+	       {5785, 157, LIBIPW_CH_PASSIVE_ONLY},
+	       {5805, 161, LIBIPW_CH_PASSIVE_ONLY},
+	       {5825, 165, LIBIPW_CH_PASSIVE_ONLY}},
 	 }
 };
 
@@ -11228,7 +11228,7 @@ static int ipw_up(struct ipw_priv *priv)
 
 	/* Age scan list entries found before suspend */
 	if (priv->suspend_time) {
-		ieee80211_networks_age(priv->ieee, priv->suspend_time);
+		libipw_networks_age(priv->ieee, priv->suspend_time);
 		priv->suspend_time = 0;
 	}
 
@@ -11273,7 +11273,7 @@ static int ipw_up(struct ipw_priv *priv)
 				    priv->eeprom[EEPROM_COUNTRY_CODE + 2]);
 			j = 0;
 		}
-		if (ieee80211_set_geo(priv->ieee, &ipw_geos[j])) {
+		if (libipw_set_geo(priv->ieee, &ipw_geos[j])) {
 			IPW_WARNING("Could not set geography.");
 			return 0;
 		}
@@ -11401,7 +11401,7 @@ static void ipw_bg_down(struct work_struct *work)
 /* Called by register_netdev() */
 static int ipw_net_init(struct net_device *dev)
 {
-	struct ipw_priv *priv = ieee80211_priv(dev);
+	struct ipw_priv *priv = libipw_priv(dev);
 	mutex_lock(&priv->mutex);
 
 	if (ipw_up(priv)) {
@@ -11480,7 +11480,7 @@ static struct attribute_group ipw_attribute_group = {
 #ifdef CONFIG_IPW2200_PROMISCUOUS
 static int ipw_prom_open(struct net_device *dev)
 {
-	struct ipw_prom_priv *prom_priv = ieee80211_priv(dev);
+	struct ipw_prom_priv *prom_priv = libipw_priv(dev);
 	struct ipw_priv *priv = prom_priv->priv;
 
 	IPW_DEBUG_INFO("prom dev->open\n");
@@ -11500,7 +11500,7 @@ static int ipw_prom_open(struct net_device *dev)
 
 static int ipw_prom_stop(struct net_device *dev)
 {
-	struct ipw_prom_priv *prom_priv = ieee80211_priv(dev);
+	struct ipw_prom_priv *prom_priv = libipw_priv(dev);
 	struct ipw_priv *priv = prom_priv->priv;
 
 	IPW_DEBUG_INFO("prom dev->stop\n");
@@ -11528,7 +11528,7 @@ static const struct net_device_ops ipw_prom_netdev_ops = {
 	.ndo_open 		= ipw_prom_open,
 	.ndo_stop		= ipw_prom_stop,
 	.ndo_start_xmit		= ipw_prom_hard_start_xmit,
-	.ndo_change_mtu		= ieee80211_change_mtu,
+	.ndo_change_mtu		= libipw_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
@@ -11544,7 +11544,7 @@ static int ipw_prom_alloc(struct ipw_priv *priv)
 	if (priv->prom_net_dev == NULL)
 		return -ENOMEM;
 
-	priv->prom_priv = ieee80211_priv(priv->prom_net_dev);
+	priv->prom_priv = libipw_priv(priv->prom_net_dev);
 	priv->prom_priv->ieee = netdev_priv(priv->prom_net_dev);
 	priv->prom_priv->priv = priv;
 
@@ -11586,8 +11586,8 @@ static const struct net_device_ops ipw_netdev_ops = {
 	.ndo_stop		= ipw_net_stop,
 	.ndo_set_multicast_list	= ipw_net_set_multicast_list,
 	.ndo_set_mac_address	= ipw_net_set_mac_address,
-	.ndo_start_xmit		= ieee80211_xmit,
-	.ndo_change_mtu		= ieee80211_change_mtu,
+	.ndo_start_xmit		= libipw_xmit,
+	.ndo_change_mtu		= libipw_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
@@ -11607,7 +11607,7 @@ static int __devinit ipw_pci_probe(struct pci_dev *pdev,
 		goto out;
 	}
 
-	priv = ieee80211_priv(net_dev);
+	priv = libipw_priv(net_dev);
 	priv->ieee = netdev_priv(net_dev);
 
 	priv->net_dev = net_dev;
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.h b/drivers/net/wireless/ipw2x00/ipw2200.h
index 05e8ccf01c5f..4448bad51b8a 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.h
+++ b/drivers/net/wireless/ipw2x00/ipw2200.h
@@ -55,7 +55,7 @@
 
 #include <linux/workqueue.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
 /* Authentication  and Association States */
 enum connection_manager_assoc_states {
@@ -365,8 +365,8 @@ enum connection_manager_assoc_states {
 /* QoS sturctures */
 struct ipw_qos_info {
 	int qos_enable;
-	struct ieee80211_qos_parameters *def_qos_parm_OFDM;
-	struct ieee80211_qos_parameters *def_qos_parm_CCK;
+	struct libipw_qos_parameters *def_qos_parm_OFDM;
+	struct libipw_qos_parameters *def_qos_parm_CCK;
 	u32 burst_duration_CCK;
 	u32 burst_duration_OFDM;
 	u16 qos_no_ack_mask;
@@ -534,7 +534,7 @@ typedef void destructor_func(const void *);
 struct clx2_tx_queue {
 	struct clx2_queue q;
 	struct tfd_frame *bd;
-	struct ieee80211_txb **txb;
+	struct libipw_txb **txb;
 };
 
 /*
@@ -1144,7 +1144,7 @@ enum ipw_prom_filter {
 struct ipw_priv;
 struct ipw_prom_priv {
 	struct ipw_priv *priv;
-	struct ieee80211_device *ieee;
+	struct libipw_device *ieee;
 	enum ipw_prom_filter filter;
 	int tx_packets;
 	int rx_packets;
@@ -1175,7 +1175,7 @@ struct ipw_rt_hdr {
 
 struct ipw_priv {
 	/* ieee device used by generic ieee processing code */
-	struct ieee80211_device *ieee;
+	struct libipw_device *ieee;
 
 	spinlock_t lock;
 	spinlock_t irq_lock;
@@ -1222,7 +1222,7 @@ struct ipw_priv {
 	u32 roaming_threshold;
 
 	struct ipw_associate assoc_request;
-	struct ieee80211_network *assoc_network;
+	struct libipw_network *assoc_network;
 
 	unsigned long ts_scan_abort;
 	struct ipw_supported_rates rates;
diff --git a/drivers/net/wireless/ipw2x00/libipw.h b/drivers/net/wireless/ipw2x00/libipw.h
new file mode 100644
index 000000000000..cefb94262cc2
--- /dev/null
+++ b/drivers/net/wireless/ipw2x00/libipw.h
@@ -0,0 +1,1087 @@
+/*
+ * Merged with mainline ieee80211.h in Aug 2004.  Original ieee802_11
+ * remains copyright by the original authors
+ *
+ * Portions of the merged code are based on Host AP (software wireless
+ * LAN access point) driver for Intersil Prism2/2.5/3.
+ *
+ * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen
+ * <j@w1.fi>
+ * Copyright (c) 2002-2003, Jouni Malinen <j@w1.fi>
+ *
+ * Adaption to a generic IEEE 802.11 stack by James Ketrenos
+ * <jketreno@linux.intel.com>
+ * Copyright (c) 2004-2005, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation. See README and COPYING for
+ * more details.
+ *
+ * API Version History
+ * 1.0.x -- Initial version
+ * 1.1.x -- Added radiotap, QoS, TIM, libipw_geo APIs,
+ *          various structure changes, and crypto API init method
+ */
+#ifndef LIBIPW_H
+#define LIBIPW_H
+#include <linux/if_ether.h>	/* ETH_ALEN */
+#include <linux/kernel.h>	/* ARRAY_SIZE */
+#include <linux/wireless.h>
+#include <linux/ieee80211.h>
+
+#include <net/lib80211.h>
+
+#define LIBIPW_VERSION "git-1.1.13"
+
+#define LIBIPW_DATA_LEN		2304
+/* Maximum size for the MA-UNITDATA primitive, 802.11 standard section
+   6.2.1.1.2.
+
+   The figure in section 7.1.2 suggests a body size of up to 2312
+   bytes is allowed, which is a bit confusing, I suspect this
+   represents the 2304 bytes of real data, plus a possible 8 bytes of
+   WEP IV and ICV. (this interpretation suggested by Ramiro Barreiro) */
+
+#define LIBIPW_1ADDR_LEN 10
+#define LIBIPW_2ADDR_LEN 16
+#define LIBIPW_3ADDR_LEN 24
+#define LIBIPW_4ADDR_LEN 30
+#define LIBIPW_FCS_LEN    4
+#define LIBIPW_HLEN			(LIBIPW_4ADDR_LEN)
+#define LIBIPW_FRAME_LEN		(LIBIPW_DATA_LEN + LIBIPW_HLEN)
+
+#define MIN_FRAG_THRESHOLD     256U
+#define	MAX_FRAG_THRESHOLD     2346U
+
+/* QOS control */
+#define LIBIPW_QCTL_TID		0x000F
+
+/* debug macros */
+
+#ifdef CONFIG_LIBIPW_DEBUG
+extern u32 libipw_debug_level;
+#define LIBIPW_DEBUG(level, fmt, args...) \
+do { if (libipw_debug_level & (level)) \
+  printk(KERN_DEBUG "ieee80211: %c %s " fmt, \
+         in_interrupt() ? 'I' : 'U', __func__ , ## args); } while (0)
+static inline bool libipw_ratelimit_debug(u32 level)
+{
+	return (libipw_debug_level & level) && net_ratelimit();
+}
+#else
+#define LIBIPW_DEBUG(level, fmt, args...) do {} while (0)
+static inline bool libipw_ratelimit_debug(u32 level)
+{
+	return false;
+}
+#endif				/* CONFIG_LIBIPW_DEBUG */
+
+/*
+ * To use the debug system:
+ *
+ * If you are defining a new debug classification, simply add it to the #define
+ * list here in the form of:
+ *
+ * #define LIBIPW_DL_xxxx VALUE
+ *
+ * shifting value to the left one bit from the previous entry.  xxxx should be
+ * the name of the classification (for example, WEP)
+ *
+ * You then need to either add a LIBIPW_xxxx_DEBUG() macro definition for your
+ * classification, or use LIBIPW_DEBUG(LIBIPW_DL_xxxx, ...) whenever you want
+ * to send output to that classification.
+ *
+ * To add your debug level to the list of levels seen when you perform
+ *
+ * % cat /proc/net/ieee80211/debug_level
+ *
+ * you simply need to add your entry to the libipw_debug_level array.
+ *
+ * If you do not see debug_level in /proc/net/ieee80211 then you do not have
+ * CONFIG_LIBIPW_DEBUG defined in your kernel configuration
+ *
+ */
+
+#define LIBIPW_DL_INFO          (1<<0)
+#define LIBIPW_DL_WX            (1<<1)
+#define LIBIPW_DL_SCAN          (1<<2)
+#define LIBIPW_DL_STATE         (1<<3)
+#define LIBIPW_DL_MGMT          (1<<4)
+#define LIBIPW_DL_FRAG          (1<<5)
+#define LIBIPW_DL_DROP          (1<<7)
+
+#define LIBIPW_DL_TX            (1<<8)
+#define LIBIPW_DL_RX            (1<<9)
+#define LIBIPW_DL_QOS           (1<<31)
+
+#define LIBIPW_ERROR(f, a...) printk(KERN_ERR "ieee80211: " f, ## a)
+#define LIBIPW_WARNING(f, a...) printk(KERN_WARNING "ieee80211: " f, ## a)
+#define LIBIPW_DEBUG_INFO(f, a...)   LIBIPW_DEBUG(LIBIPW_DL_INFO, f, ## a)
+
+#define LIBIPW_DEBUG_WX(f, a...)     LIBIPW_DEBUG(LIBIPW_DL_WX, f, ## a)
+#define LIBIPW_DEBUG_SCAN(f, a...)   LIBIPW_DEBUG(LIBIPW_DL_SCAN, f, ## a)
+#define LIBIPW_DEBUG_STATE(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_STATE, f, ## a)
+#define LIBIPW_DEBUG_MGMT(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_MGMT, f, ## a)
+#define LIBIPW_DEBUG_FRAG(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_FRAG, f, ## a)
+#define LIBIPW_DEBUG_DROP(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_DROP, f, ## a)
+#define LIBIPW_DEBUG_TX(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_TX, f, ## a)
+#define LIBIPW_DEBUG_RX(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_RX, f, ## a)
+#define LIBIPW_DEBUG_QOS(f, a...)  LIBIPW_DEBUG(LIBIPW_DL_QOS, f, ## a)
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>	/* ARPHRD_ETHER */
+
+#ifndef WIRELESS_SPY
+#define WIRELESS_SPY		/* enable iwspy support */
+#endif
+#include <net/iw_handler.h>	/* new driver API */
+
+#define ETH_P_PREAUTH 0x88C7	/* IEEE 802.11i pre-authentication */
+
+#ifndef ETH_P_80211_RAW
+#define ETH_P_80211_RAW (ETH_P_ECONET + 1)
+#endif
+
+/* IEEE 802.11 defines */
+
+#define P80211_OUI_LEN 3
+
+struct libipw_snap_hdr {
+
+	u8 dsap;		/* always 0xAA */
+	u8 ssap;		/* always 0xAA */
+	u8 ctrl;		/* always 0x03 */
+	u8 oui[P80211_OUI_LEN];	/* organizational universal id */
+
+} __attribute__ ((packed));
+
+#define SNAP_SIZE sizeof(struct libipw_snap_hdr)
+
+#define WLAN_FC_GET_VERS(fc) ((fc) & IEEE80211_FCTL_VERS)
+#define WLAN_FC_GET_TYPE(fc) ((fc) & IEEE80211_FCTL_FTYPE)
+#define WLAN_FC_GET_STYPE(fc) ((fc) & IEEE80211_FCTL_STYPE)
+
+#define WLAN_GET_SEQ_FRAG(seq) ((seq) & IEEE80211_SCTL_FRAG)
+#define WLAN_GET_SEQ_SEQ(seq)  (((seq) & IEEE80211_SCTL_SEQ) >> 4)
+
+#define LIBIPW_STATMASK_SIGNAL (1<<0)
+#define LIBIPW_STATMASK_RSSI (1<<1)
+#define LIBIPW_STATMASK_NOISE (1<<2)
+#define LIBIPW_STATMASK_RATE (1<<3)
+#define LIBIPW_STATMASK_WEMASK 0x7
+
+#define LIBIPW_CCK_MODULATION    (1<<0)
+#define LIBIPW_OFDM_MODULATION   (1<<1)
+
+#define LIBIPW_24GHZ_BAND     (1<<0)
+#define LIBIPW_52GHZ_BAND     (1<<1)
+
+#define LIBIPW_CCK_RATE_1MB		        0x02
+#define LIBIPW_CCK_RATE_2MB		        0x04
+#define LIBIPW_CCK_RATE_5MB		        0x0B
+#define LIBIPW_CCK_RATE_11MB		        0x16
+#define LIBIPW_OFDM_RATE_6MB		        0x0C
+#define LIBIPW_OFDM_RATE_9MB		        0x12
+#define LIBIPW_OFDM_RATE_12MB		0x18
+#define LIBIPW_OFDM_RATE_18MB		0x24
+#define LIBIPW_OFDM_RATE_24MB		0x30
+#define LIBIPW_OFDM_RATE_36MB		0x48
+#define LIBIPW_OFDM_RATE_48MB		0x60
+#define LIBIPW_OFDM_RATE_54MB		0x6C
+#define LIBIPW_BASIC_RATE_MASK		0x80
+
+#define LIBIPW_CCK_RATE_1MB_MASK		(1<<0)
+#define LIBIPW_CCK_RATE_2MB_MASK		(1<<1)
+#define LIBIPW_CCK_RATE_5MB_MASK		(1<<2)
+#define LIBIPW_CCK_RATE_11MB_MASK		(1<<3)
+#define LIBIPW_OFDM_RATE_6MB_MASK		(1<<4)
+#define LIBIPW_OFDM_RATE_9MB_MASK		(1<<5)
+#define LIBIPW_OFDM_RATE_12MB_MASK		(1<<6)
+#define LIBIPW_OFDM_RATE_18MB_MASK		(1<<7)
+#define LIBIPW_OFDM_RATE_24MB_MASK		(1<<8)
+#define LIBIPW_OFDM_RATE_36MB_MASK		(1<<9)
+#define LIBIPW_OFDM_RATE_48MB_MASK		(1<<10)
+#define LIBIPW_OFDM_RATE_54MB_MASK		(1<<11)
+
+#define LIBIPW_CCK_RATES_MASK	        0x0000000F
+#define LIBIPW_CCK_BASIC_RATES_MASK	(LIBIPW_CCK_RATE_1MB_MASK | \
+	LIBIPW_CCK_RATE_2MB_MASK)
+#define LIBIPW_CCK_DEFAULT_RATES_MASK	(LIBIPW_CCK_BASIC_RATES_MASK | \
+        LIBIPW_CCK_RATE_5MB_MASK | \
+        LIBIPW_CCK_RATE_11MB_MASK)
+
+#define LIBIPW_OFDM_RATES_MASK		0x00000FF0
+#define LIBIPW_OFDM_BASIC_RATES_MASK	(LIBIPW_OFDM_RATE_6MB_MASK | \
+	LIBIPW_OFDM_RATE_12MB_MASK | \
+	LIBIPW_OFDM_RATE_24MB_MASK)
+#define LIBIPW_OFDM_DEFAULT_RATES_MASK	(LIBIPW_OFDM_BASIC_RATES_MASK | \
+	LIBIPW_OFDM_RATE_9MB_MASK  | \
+	LIBIPW_OFDM_RATE_18MB_MASK | \
+	LIBIPW_OFDM_RATE_36MB_MASK | \
+	LIBIPW_OFDM_RATE_48MB_MASK | \
+	LIBIPW_OFDM_RATE_54MB_MASK)
+#define LIBIPW_DEFAULT_RATES_MASK (LIBIPW_OFDM_DEFAULT_RATES_MASK | \
+                                LIBIPW_CCK_DEFAULT_RATES_MASK)
+
+#define LIBIPW_NUM_OFDM_RATES	    8
+#define LIBIPW_NUM_CCK_RATES	            4
+#define LIBIPW_OFDM_SHIFT_MASK_A         4
+
+/* NOTE: This data is for statistical purposes; not all hardware provides this
+ *       information for frames received.
+ *       For libipw_rx_mgt, you need to set at least the 'len' parameter.
+ */
+struct libipw_rx_stats {
+	u32 mac_time;
+	s8 rssi;
+	u8 signal;
+	u8 noise;
+	u16 rate;		/* in 100 kbps */
+	u8 received_channel;
+	u8 control;
+	u8 mask;
+	u8 freq;
+	u16 len;
+	u64 tsf;
+	u32 beacon_time;
+};
+
+/* IEEE 802.11 requires that STA supports concurrent reception of at least
+ * three fragmented frames. This define can be increased to support more
+ * concurrent frames, but it should be noted that each entry can consume about
+ * 2 kB of RAM and increasing cache size will slow down frame reassembly. */
+#define LIBIPW_FRAG_CACHE_LEN 4
+
+struct libipw_frag_entry {
+	unsigned long first_frag_time;
+	unsigned int seq;
+	unsigned int last_frag;
+	struct sk_buff *skb;
+	u8 src_addr[ETH_ALEN];
+	u8 dst_addr[ETH_ALEN];
+};
+
+struct libipw_stats {
+	unsigned int tx_unicast_frames;
+	unsigned int tx_multicast_frames;
+	unsigned int tx_fragments;
+	unsigned int tx_unicast_octets;
+	unsigned int tx_multicast_octets;
+	unsigned int tx_deferred_transmissions;
+	unsigned int tx_single_retry_frames;
+	unsigned int tx_multiple_retry_frames;
+	unsigned int tx_retry_limit_exceeded;
+	unsigned int tx_discards;
+	unsigned int rx_unicast_frames;
+	unsigned int rx_multicast_frames;
+	unsigned int rx_fragments;
+	unsigned int rx_unicast_octets;
+	unsigned int rx_multicast_octets;
+	unsigned int rx_fcs_errors;
+	unsigned int rx_discards_no_buffer;
+	unsigned int tx_discards_wrong_sa;
+	unsigned int rx_discards_undecryptable;
+	unsigned int rx_message_in_msg_fragments;
+	unsigned int rx_message_in_bad_msg_fragments;
+};
+
+struct libipw_device;
+
+#define SEC_KEY_1		(1<<0)
+#define SEC_KEY_2		(1<<1)
+#define SEC_KEY_3		(1<<2)
+#define SEC_KEY_4		(1<<3)
+#define SEC_ACTIVE_KEY		(1<<4)
+#define SEC_AUTH_MODE		(1<<5)
+#define SEC_UNICAST_GROUP	(1<<6)
+#define SEC_LEVEL		(1<<7)
+#define SEC_ENABLED		(1<<8)
+#define SEC_ENCRYPT		(1<<9)
+
+#define SEC_LEVEL_0		0	/* None */
+#define SEC_LEVEL_1		1	/* WEP 40 and 104 bit */
+#define SEC_LEVEL_2		2	/* Level 1 + TKIP */
+#define SEC_LEVEL_2_CKIP	3	/* Level 1 + CKIP */
+#define SEC_LEVEL_3		4	/* Level 2 + CCMP */
+
+#define SEC_ALG_NONE		0
+#define SEC_ALG_WEP		1
+#define SEC_ALG_TKIP		2
+#define SEC_ALG_CCMP		3
+
+#define WEP_KEYS		4
+#define WEP_KEY_LEN		13
+#define SCM_KEY_LEN		32
+#define SCM_TEMPORAL_KEY_LENGTH	16
+
+struct libipw_security {
+	u16 active_key:2, enabled:1, unicast_uses_group:1, encrypt:1;
+	u8 auth_mode;
+	u8 encode_alg[WEP_KEYS];
+	u8 key_sizes[WEP_KEYS];
+	u8 keys[WEP_KEYS][SCM_KEY_LEN];
+	u8 level;
+	u16 flags;
+} __attribute__ ((packed));
+
+/*
+
+ 802.11 data frame from AP
+
+      ,-------------------------------------------------------------------.
+Bytes |  2   |  2   |    6    |    6    |    6    |  2   | 0..2312 |   4  |
+      |------|------|---------|---------|---------|------|---------|------|
+Desc. | ctrl | dura |  DA/RA  |   TA    |    SA   | Sequ |  frame  |  fcs |
+      |      | tion | (BSSID) |         |         | ence |  data   |      |
+      `-------------------------------------------------------------------'
+
+Total: 28-2340 bytes
+
+*/
+
+#define BEACON_PROBE_SSID_ID_POSITION 12
+
+struct libipw_hdr_1addr {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 payload[0];
+} __attribute__ ((packed));
+
+struct libipw_hdr_2addr {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 payload[0];
+} __attribute__ ((packed));
+
+struct libipw_hdr_3addr {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctl;
+	u8 payload[0];
+} __attribute__ ((packed));
+
+struct libipw_hdr_4addr {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctl;
+	u8 addr4[ETH_ALEN];
+	u8 payload[0];
+} __attribute__ ((packed));
+
+struct libipw_hdr_3addrqos {
+	__le16 frame_ctl;
+	__le16 duration_id;
+	u8 addr1[ETH_ALEN];
+	u8 addr2[ETH_ALEN];
+	u8 addr3[ETH_ALEN];
+	__le16 seq_ctl;
+	u8 payload[0];
+	__le16 qos_ctl;
+} __attribute__ ((packed));
+
+struct libipw_info_element {
+	u8 id;
+	u8 len;
+	u8 data[0];
+} __attribute__ ((packed));
+
+/*
+ * These are the data types that can make up management packets
+ *
+	u16 auth_algorithm;
+	u16 auth_sequence;
+	u16 beacon_interval;
+	u16 capability;
+	u8 current_ap[ETH_ALEN];
+	u16 listen_interval;
+	struct {
+		u16 association_id:14, reserved:2;
+	} __attribute__ ((packed));
+	u32 time_stamp[2];
+	u16 reason;
+	u16 status;
+*/
+
+struct libipw_auth {
+	struct libipw_hdr_3addr header;
+	__le16 algorithm;
+	__le16 transaction;
+	__le16 status;
+	/* challenge */
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+struct libipw_channel_switch {
+	u8 id;
+	u8 len;
+	u8 mode;
+	u8 channel;
+	u8 count;
+} __attribute__ ((packed));
+
+struct libipw_action {
+	struct libipw_hdr_3addr header;
+	u8 category;
+	u8 action;
+	union {
+		struct libipw_action_exchange {
+			u8 token;
+			struct libipw_info_element info_element[0];
+		} exchange;
+		struct libipw_channel_switch channel_switch;
+
+	} format;
+} __attribute__ ((packed));
+
+struct libipw_disassoc {
+	struct libipw_hdr_3addr header;
+	__le16 reason;
+} __attribute__ ((packed));
+
+/* Alias deauth for disassoc */
+#define libipw_deauth libipw_disassoc
+
+struct libipw_probe_request {
+	struct libipw_hdr_3addr header;
+	/* SSID, supported rates */
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+struct libipw_probe_response {
+	struct libipw_hdr_3addr header;
+	__le32 time_stamp[2];
+	__le16 beacon_interval;
+	__le16 capability;
+	/* SSID, supported rates, FH params, DS params,
+	 * CF params, IBSS params, TIM (if beacon), RSN */
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+/* Alias beacon for probe_response */
+#define libipw_beacon libipw_probe_response
+
+struct libipw_assoc_request {
+	struct libipw_hdr_3addr header;
+	__le16 capability;
+	__le16 listen_interval;
+	/* SSID, supported rates, RSN */
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+struct libipw_reassoc_request {
+	struct libipw_hdr_3addr header;
+	__le16 capability;
+	__le16 listen_interval;
+	u8 current_ap[ETH_ALEN];
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+struct libipw_assoc_response {
+	struct libipw_hdr_3addr header;
+	__le16 capability;
+	__le16 status;
+	__le16 aid;
+	/* supported rates */
+	struct libipw_info_element info_element[0];
+} __attribute__ ((packed));
+
+struct libipw_txb {
+	u8 nr_frags;
+	u8 encrypted;
+	u8 rts_included;
+	u8 reserved;
+	u16 frag_size;
+	u16 payload_size;
+	struct sk_buff *fragments[0];
+};
+
+/* SWEEP TABLE ENTRIES NUMBER */
+#define MAX_SWEEP_TAB_ENTRIES		  42
+#define MAX_SWEEP_TAB_ENTRIES_PER_PACKET  7
+/* MAX_RATES_LENGTH needs to be 12.  The spec says 8, and many APs
+ * only use 8, and then use extended rates for the remaining supported
+ * rates.  Other APs, however, stick all of their supported rates on the
+ * main rates information element... */
+#define MAX_RATES_LENGTH                  ((u8)12)
+#define MAX_RATES_EX_LENGTH               ((u8)16)
+#define MAX_NETWORK_COUNT                  128
+
+#define CRC_LENGTH                 4U
+
+#define MAX_WPA_IE_LEN 64
+
+#define NETWORK_HAS_OFDM       (1<<1)
+#define NETWORK_HAS_CCK        (1<<2)
+
+/* QoS structure */
+#define NETWORK_HAS_QOS_PARAMETERS      (1<<3)
+#define NETWORK_HAS_QOS_INFORMATION     (1<<4)
+#define NETWORK_HAS_QOS_MASK            (NETWORK_HAS_QOS_PARAMETERS | \
+					 NETWORK_HAS_QOS_INFORMATION)
+
+/* 802.11h */
+#define NETWORK_HAS_POWER_CONSTRAINT    (1<<5)
+#define NETWORK_HAS_CSA                 (1<<6)
+#define NETWORK_HAS_QUIET               (1<<7)
+#define NETWORK_HAS_IBSS_DFS            (1<<8)
+#define NETWORK_HAS_TPC_REPORT          (1<<9)
+
+#define NETWORK_HAS_ERP_VALUE           (1<<10)
+
+#define QOS_QUEUE_NUM                   4
+#define QOS_OUI_LEN                     3
+#define QOS_OUI_TYPE                    2
+#define QOS_ELEMENT_ID                  221
+#define QOS_OUI_INFO_SUB_TYPE           0
+#define QOS_OUI_PARAM_SUB_TYPE          1
+#define QOS_VERSION_1                   1
+#define QOS_AIFSN_MIN_VALUE             2
+
+struct libipw_qos_information_element {
+	u8 elementID;
+	u8 length;
+	u8 qui[QOS_OUI_LEN];
+	u8 qui_type;
+	u8 qui_subtype;
+	u8 version;
+	u8 ac_info;
+} __attribute__ ((packed));
+
+struct libipw_qos_ac_parameter {
+	u8 aci_aifsn;
+	u8 ecw_min_max;
+	__le16 tx_op_limit;
+} __attribute__ ((packed));
+
+struct libipw_qos_parameter_info {
+	struct libipw_qos_information_element info_element;
+	u8 reserved;
+	struct libipw_qos_ac_parameter ac_params_record[QOS_QUEUE_NUM];
+} __attribute__ ((packed));
+
+struct libipw_qos_parameters {
+	__le16 cw_min[QOS_QUEUE_NUM];
+	__le16 cw_max[QOS_QUEUE_NUM];
+	u8 aifs[QOS_QUEUE_NUM];
+	u8 flag[QOS_QUEUE_NUM];
+	__le16 tx_op_limit[QOS_QUEUE_NUM];
+} __attribute__ ((packed));
+
+struct libipw_qos_data {
+	struct libipw_qos_parameters parameters;
+	int active;
+	int supported;
+	u8 param_count;
+	u8 old_param_count;
+};
+
+struct libipw_tim_parameters {
+	u8 tim_count;
+	u8 tim_period;
+} __attribute__ ((packed));
+
+/*******************************************************/
+
+enum {				/* libipw_basic_report.map */
+	LIBIPW_BASIC_MAP_BSS = (1 << 0),
+	LIBIPW_BASIC_MAP_OFDM = (1 << 1),
+	LIBIPW_BASIC_MAP_UNIDENTIFIED = (1 << 2),
+	LIBIPW_BASIC_MAP_RADAR = (1 << 3),
+	LIBIPW_BASIC_MAP_UNMEASURED = (1 << 4),
+	/* Bits 5-7 are reserved */
+
+};
+struct libipw_basic_report {
+	u8 channel;
+	__le64 start_time;
+	__le16 duration;
+	u8 map;
+} __attribute__ ((packed));
+
+enum {				/* libipw_measurement_request.mode */
+	/* Bit 0 is reserved */
+	LIBIPW_MEASUREMENT_ENABLE = (1 << 1),
+	LIBIPW_MEASUREMENT_REQUEST = (1 << 2),
+	LIBIPW_MEASUREMENT_REPORT = (1 << 3),
+	/* Bits 4-7 are reserved */
+};
+
+enum {
+	LIBIPW_REPORT_BASIC = 0,	/* required */
+	LIBIPW_REPORT_CCA = 1,	/* optional */
+	LIBIPW_REPORT_RPI = 2,	/* optional */
+	/* 3-255 reserved */
+};
+
+struct libipw_measurement_params {
+	u8 channel;
+	__le64 start_time;
+	__le16 duration;
+} __attribute__ ((packed));
+
+struct libipw_measurement_request {
+	struct libipw_info_element ie;
+	u8 token;
+	u8 mode;
+	u8 type;
+	struct libipw_measurement_params params[0];
+} __attribute__ ((packed));
+
+struct libipw_measurement_report {
+	struct libipw_info_element ie;
+	u8 token;
+	u8 mode;
+	u8 type;
+	union {
+		struct libipw_basic_report basic[0];
+	} u;
+} __attribute__ ((packed));
+
+struct libipw_tpc_report {
+	u8 transmit_power;
+	u8 link_margin;
+} __attribute__ ((packed));
+
+struct libipw_channel_map {
+	u8 channel;
+	u8 map;
+} __attribute__ ((packed));
+
+struct libipw_ibss_dfs {
+	struct libipw_info_element ie;
+	u8 owner[ETH_ALEN];
+	u8 recovery_interval;
+	struct libipw_channel_map channel_map[0];
+};
+
+struct libipw_csa {
+	u8 mode;
+	u8 channel;
+	u8 count;
+} __attribute__ ((packed));
+
+struct libipw_quiet {
+	u8 count;
+	u8 period;
+	u8 duration;
+	u8 offset;
+} __attribute__ ((packed));
+
+struct libipw_network {
+	/* These entries are used to identify a unique network */
+	u8 bssid[ETH_ALEN];
+	u8 channel;
+	/* Ensure null-terminated for any debug msgs */
+	u8 ssid[IW_ESSID_MAX_SIZE + 1];
+	u8 ssid_len;
+
+	struct libipw_qos_data qos_data;
+
+	/* These are network statistics */
+	struct libipw_rx_stats stats;
+	u16 capability;
+	u8 rates[MAX_RATES_LENGTH];
+	u8 rates_len;
+	u8 rates_ex[MAX_RATES_EX_LENGTH];
+	u8 rates_ex_len;
+	unsigned long last_scanned;
+	u8 mode;
+	u32 flags;
+	u32 last_associate;
+	u32 time_stamp[2];
+	u16 beacon_interval;
+	u16 listen_interval;
+	u16 atim_window;
+	u8 erp_value;
+	u8 wpa_ie[MAX_WPA_IE_LEN];
+	size_t wpa_ie_len;
+	u8 rsn_ie[MAX_WPA_IE_LEN];
+	size_t rsn_ie_len;
+	struct libipw_tim_parameters tim;
+
+	/* 802.11h info */
+
+	/* Power Constraint - mandatory if spctrm mgmt required */
+	u8 power_constraint;
+
+	/* TPC Report - mandatory if spctrm mgmt required */
+	struct libipw_tpc_report tpc_report;
+
+	/* IBSS DFS - mandatory if spctrm mgmt required and IBSS
+	 * NOTE: This is variable length and so must be allocated dynamically */
+	struct libipw_ibss_dfs *ibss_dfs;
+
+	/* Channel Switch Announcement - optional if spctrm mgmt required */
+	struct libipw_csa csa;
+
+	/* Quiet - optional if spctrm mgmt required */
+	struct libipw_quiet quiet;
+
+	struct list_head list;
+};
+
+enum libipw_state {
+	LIBIPW_UNINITIALIZED = 0,
+	LIBIPW_INITIALIZED,
+	LIBIPW_ASSOCIATING,
+	LIBIPW_ASSOCIATED,
+	LIBIPW_AUTHENTICATING,
+	LIBIPW_AUTHENTICATED,
+	LIBIPW_SHUTDOWN
+};
+
+#define DEFAULT_MAX_SCAN_AGE (15 * HZ)
+#define DEFAULT_FTS 2346
+
+#define CFG_LIBIPW_RESERVE_FCS (1<<0)
+#define CFG_LIBIPW_COMPUTE_FCS (1<<1)
+#define CFG_LIBIPW_RTS (1<<2)
+
+#define LIBIPW_24GHZ_MIN_CHANNEL 1
+#define LIBIPW_24GHZ_MAX_CHANNEL 14
+#define LIBIPW_24GHZ_CHANNELS (LIBIPW_24GHZ_MAX_CHANNEL - \
+				  LIBIPW_24GHZ_MIN_CHANNEL + 1)
+
+#define LIBIPW_52GHZ_MIN_CHANNEL 34
+#define LIBIPW_52GHZ_MAX_CHANNEL 165
+#define LIBIPW_52GHZ_CHANNELS (LIBIPW_52GHZ_MAX_CHANNEL - \
+				  LIBIPW_52GHZ_MIN_CHANNEL + 1)
+
+enum {
+	LIBIPW_CH_PASSIVE_ONLY = (1 << 0),
+	LIBIPW_CH_80211H_RULES = (1 << 1),
+	LIBIPW_CH_B_ONLY = (1 << 2),
+	LIBIPW_CH_NO_IBSS = (1 << 3),
+	LIBIPW_CH_UNIFORM_SPREADING = (1 << 4),
+	LIBIPW_CH_RADAR_DETECT = (1 << 5),
+	LIBIPW_CH_INVALID = (1 << 6),
+};
+
+struct libipw_channel {
+	u32 freq;	/* in MHz */
+	u8 channel;
+	u8 flags;
+	u8 max_power;	/* in dBm */
+};
+
+struct libipw_geo {
+	u8 name[4];
+	u8 bg_channels;
+	u8 a_channels;
+	struct libipw_channel bg[LIBIPW_24GHZ_CHANNELS];
+	struct libipw_channel a[LIBIPW_52GHZ_CHANNELS];
+};
+
+struct libipw_device {
+	struct net_device *dev;
+	struct libipw_security sec;
+
+	/* Bookkeeping structures */
+	struct libipw_stats ieee_stats;
+
+	struct libipw_geo geo;
+
+	/* Probe / Beacon management */
+	struct list_head network_free_list;
+	struct list_head network_list;
+	struct libipw_network *networks;
+	int scans;
+	int scan_age;
+
+	int iw_mode;		/* operating mode (IW_MODE_*) */
+	struct iw_spy_data spy_data;	/* iwspy support */
+
+	spinlock_t lock;
+
+	int tx_headroom;	/* Set to size of any additional room needed at front
+				 * of allocated Tx SKBs */
+	u32 config;
+
+	/* WEP and other encryption related settings at the device level */
+	int open_wep;		/* Set to 1 to allow unencrypted frames */
+
+	int reset_on_keychange;	/* Set to 1 if the HW needs to be reset on
+				 * WEP key changes */
+
+	/* If the host performs {en,de}cryption, then set to 1 */
+	int host_encrypt;
+	int host_encrypt_msdu;
+	int host_decrypt;
+	/* host performs multicast decryption */
+	int host_mc_decrypt;
+
+	/* host should strip IV and ICV from protected frames */
+	/* meaningful only when hardware decryption is being used */
+	int host_strip_iv_icv;
+
+	int host_open_frag;
+	int host_build_iv;
+	int ieee802_1x;		/* is IEEE 802.1X used */
+
+	/* WPA data */
+	int wpa_enabled;
+	int drop_unencrypted;
+	int privacy_invoked;
+	size_t wpa_ie_len;
+	u8 *wpa_ie;
+
+	struct lib80211_crypt_info crypt_info;
+
+	int bcrx_sta_key;	/* use individual keys to override default keys even
+				 * with RX of broad/multicast frames */
+
+	/* Fragmentation structures */
+	struct libipw_frag_entry frag_cache[LIBIPW_FRAG_CACHE_LEN];
+	unsigned int frag_next_idx;
+	u16 fts;		/* Fragmentation Threshold */
+	u16 rts;		/* RTS threshold */
+
+	/* Association info */
+	u8 bssid[ETH_ALEN];
+
+	enum libipw_state state;
+
+	int mode;		/* A, B, G */
+	int modulation;		/* CCK, OFDM */
+	int freq_band;		/* 2.4Ghz, 5.2Ghz, Mixed */
+	int abg_true;		/* ABG flag              */
+
+	int perfect_rssi;
+	int worst_rssi;
+
+	u16 prev_seq_ctl;	/* used to drop duplicate frames */
+
+	/* Callback functions */
+	void (*set_security) (struct net_device * dev,
+			      struct libipw_security * sec);
+	int (*hard_start_xmit) (struct libipw_txb * txb,
+				struct net_device * dev, int pri);
+	int (*reset_port) (struct net_device * dev);
+	int (*is_queue_full) (struct net_device * dev, int pri);
+
+	int (*handle_management) (struct net_device * dev,
+				  struct libipw_network * network, u16 type);
+	int (*is_qos_active) (struct net_device *dev, struct sk_buff *skb);
+
+	/* Typical STA methods */
+	int (*handle_auth) (struct net_device * dev,
+			    struct libipw_auth * auth);
+	int (*handle_deauth) (struct net_device * dev,
+			      struct libipw_deauth * auth);
+	int (*handle_action) (struct net_device * dev,
+			      struct libipw_action * action,
+			      struct libipw_rx_stats * stats);
+	int (*handle_disassoc) (struct net_device * dev,
+				struct libipw_disassoc * assoc);
+	int (*handle_beacon) (struct net_device * dev,
+			      struct libipw_beacon * beacon,
+			      struct libipw_network * network);
+	int (*handle_probe_response) (struct net_device * dev,
+				      struct libipw_probe_response * resp,
+				      struct libipw_network * network);
+	int (*handle_probe_request) (struct net_device * dev,
+				     struct libipw_probe_request * req,
+				     struct libipw_rx_stats * stats);
+	int (*handle_assoc_response) (struct net_device * dev,
+				      struct libipw_assoc_response * resp,
+				      struct libipw_network * network);
+
+	/* Typical AP methods */
+	int (*handle_assoc_request) (struct net_device * dev);
+	int (*handle_reassoc_request) (struct net_device * dev,
+				       struct libipw_reassoc_request * req);
+
+	/* This must be the last item so that it points to the data
+	 * allocated beyond this structure by alloc_ieee80211 */
+	u8 priv[0];
+};
+
+#define IEEE_A            (1<<0)
+#define IEEE_B            (1<<1)
+#define IEEE_G            (1<<2)
+#define IEEE_MODE_MASK    (IEEE_A|IEEE_B|IEEE_G)
+
+static inline void *libipw_priv(struct net_device *dev)
+{
+	return ((struct libipw_device *)netdev_priv(dev))->priv;
+}
+
+static inline int libipw_is_valid_mode(struct libipw_device *ieee,
+					  int mode)
+{
+	/*
+	 * It is possible for both access points and our device to support
+	 * combinations of modes, so as long as there is one valid combination
+	 * of ap/device supported modes, then return success
+	 *
+	 */
+	if ((mode & IEEE_A) &&
+	    (ieee->modulation & LIBIPW_OFDM_MODULATION) &&
+	    (ieee->freq_band & LIBIPW_52GHZ_BAND))
+		return 1;
+
+	if ((mode & IEEE_G) &&
+	    (ieee->modulation & LIBIPW_OFDM_MODULATION) &&
+	    (ieee->freq_band & LIBIPW_24GHZ_BAND))
+		return 1;
+
+	if ((mode & IEEE_B) &&
+	    (ieee->modulation & LIBIPW_CCK_MODULATION) &&
+	    (ieee->freq_band & LIBIPW_24GHZ_BAND))
+		return 1;
+
+	return 0;
+}
+
+static inline int libipw_get_hdrlen(u16 fc)
+{
+	int hdrlen = LIBIPW_3ADDR_LEN;
+	u16 stype = WLAN_FC_GET_STYPE(fc);
+
+	switch (WLAN_FC_GET_TYPE(fc)) {
+	case IEEE80211_FTYPE_DATA:
+		if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS))
+			hdrlen = LIBIPW_4ADDR_LEN;
+		if (stype & IEEE80211_STYPE_QOS_DATA)
+			hdrlen += 2;
+		break;
+	case IEEE80211_FTYPE_CTL:
+		switch (WLAN_FC_GET_STYPE(fc)) {
+		case IEEE80211_STYPE_CTS:
+		case IEEE80211_STYPE_ACK:
+			hdrlen = LIBIPW_1ADDR_LEN;
+			break;
+		default:
+			hdrlen = LIBIPW_2ADDR_LEN;
+			break;
+		}
+		break;
+	}
+
+	return hdrlen;
+}
+
+static inline u8 *libipw_get_payload(struct ieee80211_hdr *hdr)
+{
+	switch (libipw_get_hdrlen(le16_to_cpu(hdr->frame_control))) {
+	case LIBIPW_1ADDR_LEN:
+		return ((struct libipw_hdr_1addr *)hdr)->payload;
+	case LIBIPW_2ADDR_LEN:
+		return ((struct libipw_hdr_2addr *)hdr)->payload;
+	case LIBIPW_3ADDR_LEN:
+		return ((struct libipw_hdr_3addr *)hdr)->payload;
+	case LIBIPW_4ADDR_LEN:
+		return ((struct libipw_hdr_4addr *)hdr)->payload;
+	}
+	return NULL;
+}
+
+static inline int libipw_is_ofdm_rate(u8 rate)
+{
+	switch (rate & ~LIBIPW_BASIC_RATE_MASK) {
+	case LIBIPW_OFDM_RATE_6MB:
+	case LIBIPW_OFDM_RATE_9MB:
+	case LIBIPW_OFDM_RATE_12MB:
+	case LIBIPW_OFDM_RATE_18MB:
+	case LIBIPW_OFDM_RATE_24MB:
+	case LIBIPW_OFDM_RATE_36MB:
+	case LIBIPW_OFDM_RATE_48MB:
+	case LIBIPW_OFDM_RATE_54MB:
+		return 1;
+	}
+	return 0;
+}
+
+static inline int libipw_is_cck_rate(u8 rate)
+{
+	switch (rate & ~LIBIPW_BASIC_RATE_MASK) {
+	case LIBIPW_CCK_RATE_1MB:
+	case LIBIPW_CCK_RATE_2MB:
+	case LIBIPW_CCK_RATE_5MB:
+	case LIBIPW_CCK_RATE_11MB:
+		return 1;
+	}
+	return 0;
+}
+
+/* ieee80211.c */
+extern void free_ieee80211(struct net_device *dev);
+extern struct net_device *alloc_ieee80211(int sizeof_priv);
+extern int libipw_change_mtu(struct net_device *dev, int new_mtu);
+
+extern void libipw_networks_age(struct libipw_device *ieee,
+				   unsigned long age_secs);
+
+extern int libipw_set_encryption(struct libipw_device *ieee);
+
+/* libipw_tx.c */
+extern int libipw_xmit(struct sk_buff *skb, struct net_device *dev);
+extern void libipw_txb_free(struct libipw_txb *);
+
+/* libipw_rx.c */
+extern void libipw_rx_any(struct libipw_device *ieee,
+		     struct sk_buff *skb, struct libipw_rx_stats *stats);
+extern int libipw_rx(struct libipw_device *ieee, struct sk_buff *skb,
+			struct libipw_rx_stats *rx_stats);
+/* make sure to set stats->len */
+extern void libipw_rx_mgt(struct libipw_device *ieee,
+			     struct libipw_hdr_4addr *header,
+			     struct libipw_rx_stats *stats);
+extern void libipw_network_reset(struct libipw_network *network);
+
+/* libipw_geo.c */
+extern const struct libipw_geo *libipw_get_geo(struct libipw_device
+						     *ieee);
+extern int libipw_set_geo(struct libipw_device *ieee,
+			     const struct libipw_geo *geo);
+
+extern int libipw_is_valid_channel(struct libipw_device *ieee,
+				      u8 channel);
+extern int libipw_channel_to_index(struct libipw_device *ieee,
+				      u8 channel);
+extern u8 libipw_freq_to_channel(struct libipw_device *ieee, u32 freq);
+extern u8 libipw_get_channel_flags(struct libipw_device *ieee,
+				      u8 channel);
+extern const struct libipw_channel *libipw_get_channel(struct
+							     libipw_device
+							     *ieee, u8 channel);
+extern u32 libipw_channel_to_freq(struct libipw_device * ieee,
+				      u8 channel);
+
+/* libipw_wx.c */
+extern int libipw_wx_get_scan(struct libipw_device *ieee,
+				 struct iw_request_info *info,
+				 union iwreq_data *wrqu, char *key);
+extern int libipw_wx_set_encode(struct libipw_device *ieee,
+				   struct iw_request_info *info,
+				   union iwreq_data *wrqu, char *key);
+extern int libipw_wx_get_encode(struct libipw_device *ieee,
+				   struct iw_request_info *info,
+				   union iwreq_data *wrqu, char *key);
+extern int libipw_wx_set_encodeext(struct libipw_device *ieee,
+				      struct iw_request_info *info,
+				      union iwreq_data *wrqu, char *extra);
+extern int libipw_wx_get_encodeext(struct libipw_device *ieee,
+				      struct iw_request_info *info,
+				      union iwreq_data *wrqu, char *extra);
+
+static inline void libipw_increment_scans(struct libipw_device *ieee)
+{
+	ieee->scans++;
+}
+
+static inline int libipw_get_scans(struct libipw_device *ieee)
+{
+	return ieee->scans;
+}
+
+#endif				/* LIBIPW_H */
diff --git a/drivers/net/wireless/ipw2x00/libipw_geo.c b/drivers/net/wireless/ipw2x00/libipw_geo.c
index 9dfbb8760f67..d04979ba2a5b 100644
--- a/drivers/net/wireless/ipw2x00/libipw_geo.c
+++ b/drivers/net/wireless/ipw2x00/libipw_geo.c
@@ -41,9 +41,9 @@
 #include <linux/etherdevice.h>
 #include <asm/uaccess.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
-int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel)
+int libipw_is_valid_channel(struct libipw_device *ieee, u8 channel)
 {
 	int i;
 
@@ -52,27 +52,27 @@ int ieee80211_is_valid_channel(struct ieee80211_device *ieee, u8 channel)
 	if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0)
 		return 0;
 
-	if (ieee->freq_band & IEEE80211_24GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_24GHZ_BAND)
 		for (i = 0; i < ieee->geo.bg_channels; i++)
 			/* NOTE: If G mode is currently supported but
 			 * this is a B only channel, we don't see it
 			 * as valid. */
 			if ((ieee->geo.bg[i].channel == channel) &&
-			    !(ieee->geo.bg[i].flags & IEEE80211_CH_INVALID) &&
+			    !(ieee->geo.bg[i].flags & LIBIPW_CH_INVALID) &&
 			    (!(ieee->mode & IEEE_G) ||
-			     !(ieee->geo.bg[i].flags & IEEE80211_CH_B_ONLY)))
-				return IEEE80211_24GHZ_BAND;
+			     !(ieee->geo.bg[i].flags & LIBIPW_CH_B_ONLY)))
+				return LIBIPW_24GHZ_BAND;
 
-	if (ieee->freq_band & IEEE80211_52GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_52GHZ_BAND)
 		for (i = 0; i < ieee->geo.a_channels; i++)
 			if ((ieee->geo.a[i].channel == channel) &&
-			    !(ieee->geo.a[i].flags & IEEE80211_CH_INVALID))
-				return IEEE80211_52GHZ_BAND;
+			    !(ieee->geo.a[i].flags & LIBIPW_CH_INVALID))
+				return LIBIPW_52GHZ_BAND;
 
 	return 0;
 }
 
-int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel)
+int libipw_channel_to_index(struct libipw_device *ieee, u8 channel)
 {
 	int i;
 
@@ -81,12 +81,12 @@ int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel)
 	if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0)
 		return -1;
 
-	if (ieee->freq_band & IEEE80211_24GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_24GHZ_BAND)
 		for (i = 0; i < ieee->geo.bg_channels; i++)
 			if (ieee->geo.bg[i].channel == channel)
 				return i;
 
-	if (ieee->freq_band & IEEE80211_52GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_52GHZ_BAND)
 		for (i = 0; i < ieee->geo.a_channels; i++)
 			if (ieee->geo.a[i].channel == channel)
 				return i;
@@ -94,22 +94,22 @@ int ieee80211_channel_to_index(struct ieee80211_device *ieee, u8 channel)
 	return -1;
 }
 
-u32 ieee80211_channel_to_freq(struct ieee80211_device * ieee, u8 channel)
+u32 libipw_channel_to_freq(struct libipw_device * ieee, u8 channel)
 {
-	const struct ieee80211_channel * ch;
+	const struct libipw_channel * ch;
 
 	/* Driver needs to initialize the geography map before using
 	 * these helper functions */
 	if (ieee->geo.bg_channels == 0 && ieee->geo.a_channels == 0)
 		return 0;
 
-	ch = ieee80211_get_channel(ieee, channel);
+	ch = libipw_get_channel(ieee, channel);
 	if (!ch->channel)
 		return 0;
 	return ch->freq;
 }
 
-u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq)
+u8 libipw_freq_to_channel(struct libipw_device * ieee, u32 freq)
 {
 	int i;
 
@@ -120,12 +120,12 @@ u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq)
 
 	freq /= 100000;
 
-	if (ieee->freq_band & IEEE80211_24GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_24GHZ_BAND)
 		for (i = 0; i < ieee->geo.bg_channels; i++)
 			if (ieee->geo.bg[i].freq == freq)
 				return ieee->geo.bg[i].channel;
 
-	if (ieee->freq_band & IEEE80211_52GHZ_BAND)
+	if (ieee->freq_band & LIBIPW_52GHZ_BAND)
 		for (i = 0; i < ieee->geo.a_channels; i++)
 			if (ieee->geo.a[i].freq == freq)
 				return ieee->geo.a[i].channel;
@@ -133,63 +133,63 @@ u8 ieee80211_freq_to_channel(struct ieee80211_device * ieee, u32 freq)
 	return 0;
 }
 
-int ieee80211_set_geo(struct ieee80211_device *ieee,
-		      const struct ieee80211_geo *geo)
+int libipw_set_geo(struct libipw_device *ieee,
+		      const struct libipw_geo *geo)
 {
 	memcpy(ieee->geo.name, geo->name, 3);
 	ieee->geo.name[3] = '\0';
 	ieee->geo.bg_channels = geo->bg_channels;
 	ieee->geo.a_channels = geo->a_channels;
 	memcpy(ieee->geo.bg, geo->bg, geo->bg_channels *
-	       sizeof(struct ieee80211_channel));
+	       sizeof(struct libipw_channel));
 	memcpy(ieee->geo.a, geo->a, ieee->geo.a_channels *
-	       sizeof(struct ieee80211_channel));
+	       sizeof(struct libipw_channel));
 	return 0;
 }
 
-const struct ieee80211_geo *ieee80211_get_geo(struct ieee80211_device *ieee)
+const struct libipw_geo *libipw_get_geo(struct libipw_device *ieee)
 {
 	return &ieee->geo;
 }
 
-u8 ieee80211_get_channel_flags(struct ieee80211_device * ieee, u8 channel)
+u8 libipw_get_channel_flags(struct libipw_device * ieee, u8 channel)
 {
-	int index = ieee80211_channel_to_index(ieee, channel);
+	int index = libipw_channel_to_index(ieee, channel);
 
 	if (index == -1)
-		return IEEE80211_CH_INVALID;
+		return LIBIPW_CH_INVALID;
 
-	if (channel <= IEEE80211_24GHZ_CHANNELS)
+	if (channel <= LIBIPW_24GHZ_CHANNELS)
 		return ieee->geo.bg[index].flags;
 
 	return ieee->geo.a[index].flags;
 }
 
-static const struct ieee80211_channel bad_channel = {
+static const struct libipw_channel bad_channel = {
 	.channel = 0,
-	.flags = IEEE80211_CH_INVALID,
+	.flags = LIBIPW_CH_INVALID,
 	.max_power = 0,
 };
 
-const struct ieee80211_channel *ieee80211_get_channel(struct ieee80211_device
+const struct libipw_channel *libipw_get_channel(struct libipw_device
 						      *ieee, u8 channel)
 {
-	int index = ieee80211_channel_to_index(ieee, channel);
+	int index = libipw_channel_to_index(ieee, channel);
 
 	if (index == -1)
 		return &bad_channel;
 
-	if (channel <= IEEE80211_24GHZ_CHANNELS)
+	if (channel <= LIBIPW_24GHZ_CHANNELS)
 		return &ieee->geo.bg[index];
 
 	return &ieee->geo.a[index];
 }
 
-EXPORT_SYMBOL(ieee80211_get_channel);
-EXPORT_SYMBOL(ieee80211_get_channel_flags);
-EXPORT_SYMBOL(ieee80211_is_valid_channel);
-EXPORT_SYMBOL(ieee80211_freq_to_channel);
-EXPORT_SYMBOL(ieee80211_channel_to_freq);
-EXPORT_SYMBOL(ieee80211_channel_to_index);
-EXPORT_SYMBOL(ieee80211_set_geo);
-EXPORT_SYMBOL(ieee80211_get_geo);
+EXPORT_SYMBOL(libipw_get_channel);
+EXPORT_SYMBOL(libipw_get_channel_flags);
+EXPORT_SYMBOL(libipw_is_valid_channel);
+EXPORT_SYMBOL(libipw_freq_to_channel);
+EXPORT_SYMBOL(libipw_channel_to_freq);
+EXPORT_SYMBOL(libipw_channel_to_index);
+EXPORT_SYMBOL(libipw_set_geo);
+EXPORT_SYMBOL(libipw_get_geo);
diff --git a/drivers/net/wireless/ipw2x00/libipw_module.c b/drivers/net/wireless/ipw2x00/libipw_module.c
index 8ce6e961c5da..00c3640b0f8a 100644
--- a/drivers/net/wireless/ipw2x00/libipw_module.c
+++ b/drivers/net/wireless/ipw2x00/libipw_module.c
@@ -50,11 +50,11 @@
 #include <net/net_namespace.h>
 #include <net/arp.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
 #define DRV_DESCRIPTION "802.11 data/management/control stack"
 #define DRV_NAME        "ieee80211"
-#define DRV_VERSION	IEEE80211_VERSION
+#define DRV_VERSION	LIBIPW_VERSION
 #define DRV_COPYRIGHT   "Copyright (C) 2004-2005 Intel Corporation <jketreno@linux.intel.com>"
 
 MODULE_VERSION(DRV_VERSION);
@@ -62,13 +62,13 @@ MODULE_DESCRIPTION(DRV_DESCRIPTION);
 MODULE_AUTHOR(DRV_COPYRIGHT);
 MODULE_LICENSE("GPL");
 
-static int ieee80211_networks_allocate(struct ieee80211_device *ieee)
+static int libipw_networks_allocate(struct libipw_device *ieee)
 {
 	if (ieee->networks)
 		return 0;
 
 	ieee->networks =
-	    kzalloc(MAX_NETWORK_COUNT * sizeof(struct ieee80211_network),
+	    kzalloc(MAX_NETWORK_COUNT * sizeof(struct libipw_network),
 		    GFP_KERNEL);
 	if (!ieee->networks) {
 		printk(KERN_WARNING "%s: Out of memory allocating beacons\n",
@@ -79,7 +79,7 @@ static int ieee80211_networks_allocate(struct ieee80211_device *ieee)
 	return 0;
 }
 
-void ieee80211_network_reset(struct ieee80211_network *network)
+void libipw_network_reset(struct libipw_network *network)
 {
 	if (!network)
 		return;
@@ -90,7 +90,7 @@ void ieee80211_network_reset(struct ieee80211_network *network)
 	}
 }
 
-static inline void ieee80211_networks_free(struct ieee80211_device *ieee)
+static inline void libipw_networks_free(struct libipw_device *ieee)
 {
 	int i;
 
@@ -105,10 +105,10 @@ static inline void ieee80211_networks_free(struct ieee80211_device *ieee)
 	ieee->networks = NULL;
 }
 
-void ieee80211_networks_age(struct ieee80211_device *ieee,
+void libipw_networks_age(struct libipw_device *ieee,
                             unsigned long age_secs)
 {
-	struct ieee80211_network *network = NULL;
+	struct libipw_network *network = NULL;
 	unsigned long flags;
 	unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
 
@@ -118,9 +118,9 @@ void ieee80211_networks_age(struct ieee80211_device *ieee,
 	}
 	spin_unlock_irqrestore(&ieee->lock, flags);
 }
-EXPORT_SYMBOL(ieee80211_networks_age);
+EXPORT_SYMBOL(libipw_networks_age);
 
-static void ieee80211_networks_initialize(struct ieee80211_device *ieee)
+static void libipw_networks_initialize(struct libipw_device *ieee)
 {
 	int i;
 
@@ -131,38 +131,38 @@ static void ieee80211_networks_initialize(struct ieee80211_device *ieee)
 			      &ieee->network_free_list);
 }
 
-int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
+int libipw_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if ((new_mtu < 68) || (new_mtu > IEEE80211_DATA_LEN))
+	if ((new_mtu < 68) || (new_mtu > LIBIPW_DATA_LEN))
 		return -EINVAL;
 	dev->mtu = new_mtu;
 	return 0;
 }
-EXPORT_SYMBOL(ieee80211_change_mtu);
+EXPORT_SYMBOL(libipw_change_mtu);
 
 struct net_device *alloc_ieee80211(int sizeof_priv)
 {
-	struct ieee80211_device *ieee;
+	struct libipw_device *ieee;
 	struct net_device *dev;
 	int err;
 
-	IEEE80211_DEBUG_INFO("Initializing...\n");
+	LIBIPW_DEBUG_INFO("Initializing...\n");
 
-	dev = alloc_etherdev(sizeof(struct ieee80211_device) + sizeof_priv);
+	dev = alloc_etherdev(sizeof(struct libipw_device) + sizeof_priv);
 	if (!dev) {
-		IEEE80211_ERROR("Unable to allocate network device.\n");
+		LIBIPW_ERROR("Unable to allocate network device.\n");
 		goto failed;
 	}
 	ieee = netdev_priv(dev);
 
 	ieee->dev = dev;
 
-	err = ieee80211_networks_allocate(ieee);
+	err = libipw_networks_allocate(ieee);
 	if (err) {
-		IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err);
+		LIBIPW_ERROR("Unable to allocate beacon storage: %d\n", err);
 		goto failed_free_netdev;
 	}
-	ieee80211_networks_initialize(ieee);
+	libipw_networks_initialize(ieee);
 
 	/* Default fragmentation threshold is maximum payload size */
 	ieee->fts = DEFAULT_FTS;
@@ -201,25 +201,25 @@ failed:
 
 void free_ieee80211(struct net_device *dev)
 {
-	struct ieee80211_device *ieee = netdev_priv(dev);
+	struct libipw_device *ieee = netdev_priv(dev);
 
 	lib80211_crypt_info_free(&ieee->crypt_info);
 
-	ieee80211_networks_free(ieee);
+	libipw_networks_free(ieee);
 	free_netdev(dev);
 }
 
 #ifdef CONFIG_LIBIPW_DEBUG
 
 static int debug = 0;
-u32 ieee80211_debug_level = 0;
-EXPORT_SYMBOL_GPL(ieee80211_debug_level);
-static struct proc_dir_entry *ieee80211_proc = NULL;
+u32 libipw_debug_level = 0;
+EXPORT_SYMBOL_GPL(libipw_debug_level);
+static struct proc_dir_entry *libipw_proc = NULL;
 
 static int show_debug_level(char *page, char **start, off_t offset,
 			    int count, int *eof, void *data)
 {
-	return snprintf(page, count, "0x%08X\n", ieee80211_debug_level);
+	return snprintf(page, count, "0x%08X\n", libipw_debug_level);
 }
 
 static int store_debug_level(struct file *file, const char __user * buffer,
@@ -236,29 +236,29 @@ static int store_debug_level(struct file *file, const char __user * buffer,
 		printk(KERN_INFO DRV_NAME
 		       ": %s is not in hex or decimal form.\n", buf);
 	else
-		ieee80211_debug_level = val;
+		libipw_debug_level = val;
 
 	return strnlen(buf, len);
 }
 #endif				/* CONFIG_LIBIPW_DEBUG */
 
-static int __init ieee80211_init(void)
+static int __init libipw_init(void)
 {
 #ifdef CONFIG_LIBIPW_DEBUG
 	struct proc_dir_entry *e;
 
-	ieee80211_debug_level = debug;
-	ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net);
-	if (ieee80211_proc == NULL) {
-		IEEE80211_ERROR("Unable to create " DRV_NAME
+	libipw_debug_level = debug;
+	libipw_proc = proc_mkdir(DRV_NAME, init_net.proc_net);
+	if (libipw_proc == NULL) {
+		LIBIPW_ERROR("Unable to create " DRV_NAME
 				" proc directory\n");
 		return -EIO;
 	}
 	e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR,
-			      ieee80211_proc);
+			      libipw_proc);
 	if (!e) {
 		remove_proc_entry(DRV_NAME, init_net.proc_net);
-		ieee80211_proc = NULL;
+		libipw_proc = NULL;
 		return -EIO;
 	}
 	e->read_proc = show_debug_level;
@@ -272,13 +272,13 @@ static int __init ieee80211_init(void)
 	return 0;
 }
 
-static void __exit ieee80211_exit(void)
+static void __exit libipw_exit(void)
 {
 #ifdef CONFIG_LIBIPW_DEBUG
-	if (ieee80211_proc) {
-		remove_proc_entry("debug_level", ieee80211_proc);
+	if (libipw_proc) {
+		remove_proc_entry("debug_level", libipw_proc);
 		remove_proc_entry(DRV_NAME, init_net.proc_net);
-		ieee80211_proc = NULL;
+		libipw_proc = NULL;
 	}
 #endif				/* CONFIG_LIBIPW_DEBUG */
 }
@@ -289,8 +289,8 @@ module_param(debug, int, 0444);
 MODULE_PARM_DESC(debug, "debug output mask");
 #endif				/* CONFIG_LIBIPW_DEBUG */
 
-module_exit(ieee80211_exit);
-module_init(ieee80211_init);
+module_exit(libipw_exit);
+module_init(libipw_init);
 
 EXPORT_SYMBOL(alloc_ieee80211);
 EXPORT_SYMBOL(free_ieee80211);
diff --git a/drivers/net/wireless/ipw2x00/libipw_rx.c b/drivers/net/wireless/ipw2x00/libipw_rx.c
index dae4b8e4d8e9..282b1f7ff1e9 100644
--- a/drivers/net/wireless/ipw2x00/libipw_rx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_rx.c
@@ -34,18 +34,18 @@
 
 #include <net/lib80211.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
-static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
+static void libipw_monitor_rx(struct libipw_device *ieee,
 					struct sk_buff *skb,
-					struct ieee80211_rx_stats *rx_stats)
+					struct libipw_rx_stats *rx_stats)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	u16 fc = le16_to_cpu(hdr->frame_control);
 
 	skb->dev = ieee->dev;
 	skb_reset_mac_header(skb);
-	skb_pull(skb, ieee80211_get_hdrlen(fc));
+	skb_pull(skb, libipw_get_hdrlen(fc));
 	skb->pkt_type = PACKET_OTHERHOST;
 	skb->protocol = htons(ETH_P_80211_RAW);
 	memset(skb->cb, 0, sizeof(skb->cb));
@@ -53,22 +53,22 @@ static void ieee80211_monitor_rx(struct ieee80211_device *ieee,
 }
 
 /* Called only as a tasklet (software IRQ) */
-static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct
-							      ieee80211_device
+static struct libipw_frag_entry *libipw_frag_cache_find(struct
+							      libipw_device
 							      *ieee,
 							      unsigned int seq,
 							      unsigned int frag,
 							      u8 * src,
 							      u8 * dst)
 {
-	struct ieee80211_frag_entry *entry;
+	struct libipw_frag_entry *entry;
 	int i;
 
-	for (i = 0; i < IEEE80211_FRAG_CACHE_LEN; i++) {
+	for (i = 0; i < LIBIPW_FRAG_CACHE_LEN; i++) {
 		entry = &ieee->frag_cache[i];
 		if (entry->skb != NULL &&
 		    time_after(jiffies, entry->first_frag_time + 2 * HZ)) {
-			IEEE80211_DEBUG_FRAG("expiring fragment cache entry "
+			LIBIPW_DEBUG_FRAG("expiring fragment cache entry "
 					     "seq=%u last_frag=%u\n",
 					     entry->seq, entry->last_frag);
 			dev_kfree_skb_any(entry->skb);
@@ -86,13 +86,13 @@ static struct ieee80211_frag_entry *ieee80211_frag_cache_find(struct
 }
 
 /* Called only as a tasklet (software IRQ) */
-static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee,
-						struct ieee80211_hdr_4addr *hdr)
+static struct sk_buff *libipw_frag_cache_get(struct libipw_device *ieee,
+						struct libipw_hdr_4addr *hdr)
 {
 	struct sk_buff *skb = NULL;
 	u16 sc;
 	unsigned int frag, seq;
-	struct ieee80211_frag_entry *entry;
+	struct libipw_frag_entry *entry;
 
 	sc = le16_to_cpu(hdr->seq_ctl);
 	frag = WLAN_GET_SEQ_FRAG(sc);
@@ -101,7 +101,7 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee,
 	if (frag == 0) {
 		/* Reserve enough space to fit maximum frame length */
 		skb = dev_alloc_skb(ieee->dev->mtu +
-				    sizeof(struct ieee80211_hdr_4addr) +
+				    sizeof(struct libipw_hdr_4addr) +
 				    8 /* LLC */  +
 				    2 /* alignment */  +
 				    8 /* WEP */  + ETH_ALEN /* WDS */ );
@@ -110,7 +110,7 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee,
 
 		entry = &ieee->frag_cache[ieee->frag_next_idx];
 		ieee->frag_next_idx++;
-		if (ieee->frag_next_idx >= IEEE80211_FRAG_CACHE_LEN)
+		if (ieee->frag_next_idx >= LIBIPW_FRAG_CACHE_LEN)
 			ieee->frag_next_idx = 0;
 
 		if (entry->skb != NULL)
@@ -125,7 +125,7 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee,
 	} else {
 		/* received a fragment of a frame for which the head fragment
 		 * should have already been received */
-		entry = ieee80211_frag_cache_find(ieee, seq, frag, hdr->addr2,
+		entry = libipw_frag_cache_find(ieee, seq, frag, hdr->addr2,
 						  hdr->addr1);
 		if (entry != NULL) {
 			entry->last_frag = frag;
@@ -137,21 +137,21 @@ static struct sk_buff *ieee80211_frag_cache_get(struct ieee80211_device *ieee,
 }
 
 /* Called only as a tasklet (software IRQ) */
-static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee,
-					   struct ieee80211_hdr_4addr *hdr)
+static int libipw_frag_cache_invalidate(struct libipw_device *ieee,
+					   struct libipw_hdr_4addr *hdr)
 {
 	u16 sc;
 	unsigned int seq;
-	struct ieee80211_frag_entry *entry;
+	struct libipw_frag_entry *entry;
 
 	sc = le16_to_cpu(hdr->seq_ctl);
 	seq = WLAN_GET_SEQ_SEQ(sc);
 
-	entry = ieee80211_frag_cache_find(ieee, seq, -1, hdr->addr2,
+	entry = libipw_frag_cache_find(ieee, seq, -1, hdr->addr2,
 					  hdr->addr1);
 
 	if (entry == NULL) {
-		IEEE80211_DEBUG_FRAG("could not invalidate fragment cache "
+		LIBIPW_DEBUG_FRAG("could not invalidate fragment cache "
 				     "entry (seq=%u)\n", seq);
 		return -1;
 	}
@@ -161,14 +161,14 @@ static int ieee80211_frag_cache_invalidate(struct ieee80211_device *ieee,
 }
 
 #ifdef NOT_YET
-/* ieee80211_rx_frame_mgtmt
+/* libipw_rx_frame_mgtmt
  *
  * Responsible for handling management control frames
  *
- * Called by ieee80211_rx */
+ * Called by libipw_rx */
 static int
-ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
-			struct ieee80211_rx_stats *rx_stats, u16 type,
+libipw_rx_frame_mgmt(struct libipw_device *ieee, struct sk_buff *skb,
+			struct libipw_rx_stats *rx_stats, u16 type,
 			u16 stype)
 {
 	if (ieee->iw_mode == IW_MODE_MASTER) {
@@ -176,7 +176,7 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
 		       ieee->dev->name);
 		return 0;
 /*
-  hostap_update_sta_ps(ieee, (struct hostap_ieee80211_hdr_4addr *)
+  hostap_update_sta_ps(ieee, (struct hostap_libipw_hdr_4addr *)
   skb->data);*/
 	}
 
@@ -219,26 +219,27 @@ ieee80211_rx_frame_mgmt(struct ieee80211_device *ieee, struct sk_buff *skb,
 
 /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */
 /* Ethernet-II snap header (RFC1042 for most EtherTypes) */
-static unsigned char rfc1042_header[] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
+static unsigned char libipw_rfc1042_header[] =
+    { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
 
 /* Bridge-Tunnel header (for EtherTypes ETH_P_AARP and ETH_P_IPX) */
-static unsigned char bridge_tunnel_header[] =
+static unsigned char libipw_bridge_tunnel_header[] =
     { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
 /* No encapsulation header if EtherType < 0x600 (=length) */
 
-/* Called by ieee80211_rx_frame_decrypt */
-static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
+/* Called by libipw_rx_frame_decrypt */
+static int libipw_is_eapol_frame(struct libipw_device *ieee,
 				    struct sk_buff *skb)
 {
 	struct net_device *dev = ieee->dev;
 	u16 fc, ethertype;
-	struct ieee80211_hdr_3addr *hdr;
+	struct libipw_hdr_3addr *hdr;
 	u8 *pos;
 
 	if (skb->len < 24)
 		return 0;
 
-	hdr = (struct ieee80211_hdr_3addr *)skb->data;
+	hdr = (struct libipw_hdr_3addr *)skb->data;
 	fc = le16_to_cpu(hdr->frame_ctl);
 
 	/* check that the frame is unicast frame to us */
@@ -266,28 +267,28 @@ static int ieee80211_is_eapol_frame(struct ieee80211_device *ieee,
 	return 0;
 }
 
-/* Called only as a tasklet (software IRQ), by ieee80211_rx */
+/* Called only as a tasklet (software IRQ), by libipw_rx */
 static int
-ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
+libipw_rx_frame_decrypt(struct libipw_device *ieee, struct sk_buff *skb,
 			   struct lib80211_crypt_data *crypt)
 {
-	struct ieee80211_hdr_3addr *hdr;
+	struct libipw_hdr_3addr *hdr;
 	int res, hdrlen;
 
 	if (crypt == NULL || crypt->ops->decrypt_mpdu == NULL)
 		return 0;
 
-	hdr = (struct ieee80211_hdr_3addr *)skb->data;
-	hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+	hdr = (struct libipw_hdr_3addr *)skb->data;
+	hdrlen = libipw_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
 
 	atomic_inc(&crypt->refcnt);
 	res = crypt->ops->decrypt_mpdu(skb, hdrlen, crypt->priv);
 	atomic_dec(&crypt->refcnt);
 	if (res < 0) {
-		IEEE80211_DEBUG_DROP("decryption failed (SA=%pM) res=%d\n",
+		LIBIPW_DEBUG_DROP("decryption failed (SA=%pM) res=%d\n",
 				     hdr->addr2, res);
 		if (res == -2)
-			IEEE80211_DEBUG_DROP("Decryption failed ICV "
+			LIBIPW_DEBUG_DROP("Decryption failed ICV "
 					     "mismatch (key %d)\n",
 					     skb->data[hdrlen + 3] >> 6);
 		ieee->ieee_stats.rx_discards_undecryptable++;
@@ -297,20 +298,20 @@ ieee80211_rx_frame_decrypt(struct ieee80211_device *ieee, struct sk_buff *skb,
 	return res;
 }
 
-/* Called only as a tasklet (software IRQ), by ieee80211_rx */
+/* Called only as a tasklet (software IRQ), by libipw_rx */
 static int
-ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee,
+libipw_rx_frame_decrypt_msdu(struct libipw_device *ieee,
 				struct sk_buff *skb, int keyidx,
 				struct lib80211_crypt_data *crypt)
 {
-	struct ieee80211_hdr_3addr *hdr;
+	struct libipw_hdr_3addr *hdr;
 	int res, hdrlen;
 
 	if (crypt == NULL || crypt->ops->decrypt_msdu == NULL)
 		return 0;
 
-	hdr = (struct ieee80211_hdr_3addr *)skb->data;
-	hdrlen = ieee80211_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
+	hdr = (struct libipw_hdr_3addr *)skb->data;
+	hdrlen = libipw_get_hdrlen(le16_to_cpu(hdr->frame_ctl));
 
 	atomic_inc(&crypt->refcnt);
 	res = crypt->ops->decrypt_msdu(skb, keyidx, hdrlen, crypt->priv);
@@ -328,11 +329,11 @@ ieee80211_rx_frame_decrypt_msdu(struct ieee80211_device *ieee,
 /* All received frames are sent to this function. @skb contains the frame in
  * IEEE 802.11 format, i.e., in the format it was sent over air.
  * This function is called only as a tasklet (software IRQ). */
-int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
-		 struct ieee80211_rx_stats *rx_stats)
+int libipw_rx(struct libipw_device *ieee, struct sk_buff *skb,
+		 struct libipw_rx_stats *rx_stats)
 {
 	struct net_device *dev = ieee->dev;
-	struct ieee80211_hdr_4addr *hdr;
+	struct libipw_hdr_4addr *hdr;
 	size_t hdrlen;
 	u16 fc, type, stype, sc;
 	unsigned int frag;
@@ -352,7 +353,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	int keyidx = 0;
 	int can_be_decrypted = 0;
 
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
+	hdr = (struct libipw_hdr_4addr *)skb->data;
 	if (skb->len < 10) {
 		printk(KERN_INFO "%s: SKB length < 10\n", dev->name);
 		goto rx_dropped;
@@ -363,7 +364,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	stype = WLAN_FC_GET_STYPE(fc);
 	sc = le16_to_cpu(hdr->seq_ctl);
 	frag = WLAN_GET_SEQ_FRAG(sc);
-	hdrlen = ieee80211_get_hdrlen(fc);
+	hdrlen = libipw_get_hdrlen(fc);
 
 	if (skb->len < hdrlen) {
 		printk(KERN_INFO "%s: invalid SKB length %d\n",
@@ -380,19 +381,19 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		struct iw_quality wstats;
 
 		wstats.updated = 0;
-		if (rx_stats->mask & IEEE80211_STATMASK_RSSI) {
+		if (rx_stats->mask & LIBIPW_STATMASK_RSSI) {
 			wstats.level = rx_stats->signal;
 			wstats.updated |= IW_QUAL_LEVEL_UPDATED;
 		} else
 			wstats.updated |= IW_QUAL_LEVEL_INVALID;
 
-		if (rx_stats->mask & IEEE80211_STATMASK_NOISE) {
+		if (rx_stats->mask & LIBIPW_STATMASK_NOISE) {
 			wstats.noise = rx_stats->noise;
 			wstats.updated |= IW_QUAL_NOISE_UPDATED;
 		} else
 			wstats.updated |= IW_QUAL_NOISE_INVALID;
 
-		if (rx_stats->mask & IEEE80211_STATMASK_SIGNAL) {
+		if (rx_stats->mask & LIBIPW_STATMASK_SIGNAL) {
 			wstats.qual = rx_stats->signal;
 			wstats.updated |= IW_QUAL_QUAL_UPDATED;
 		} else
@@ -411,7 +412,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	if (ieee->iw_mode == IW_MODE_MONITOR) {
 		dev->stats.rx_packets++;
 		dev->stats.rx_bytes += skb->len;
-		ieee80211_monitor_rx(ieee, skb, rx_stats);
+		libipw_monitor_rx(ieee, skb, rx_stats);
 		return 1;
 	}
 
@@ -457,7 +458,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 			 * frames from other than current BSS, so just drop the
 			 * frames silently instead of filling system log with
 			 * these reports. */
-			IEEE80211_DEBUG_DROP("Decryption failed (not set)"
+			LIBIPW_DEBUG_DROP("Decryption failed (not set)"
 					     " (SA=%pM)\n", hdr->addr2);
 			ieee->ieee_stats.rx_discards_undecryptable++;
 			goto rx_dropped;
@@ -475,7 +476,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 			goto rx_dropped;
 		}
 
-		if (ieee80211_rx_frame_mgmt(ieee, skb, rx_stats, type, stype))
+		if (libipw_rx_frame_mgmt(ieee, skb, rx_stats, type, stype))
 			goto rx_dropped;
 		else
 			goto rx_exit;
@@ -488,7 +489,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		ieee->prev_seq_ctl = sc;
 
 	/* Data frame - extract src/dst addresses */
-	if (skb->len < IEEE80211_3ADDR_LEN)
+	if (skb->len < LIBIPW_3ADDR_LEN)
 		goto rx_dropped;
 
 	switch (fc & (IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS)) {
@@ -501,7 +502,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		memcpy(src, hdr->addr2, ETH_ALEN);
 		break;
 	case IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS:
-		if (skb->len < IEEE80211_4ADDR_LEN)
+		if (skb->len < LIBIPW_4ADDR_LEN)
 			goto rx_dropped;
 		memcpy(dst, hdr->addr3, ETH_ALEN);
 		memcpy(src, hdr->addr4, ETH_ALEN);
@@ -560,7 +561,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	    stype != IEEE80211_STYPE_DATA_CFPOLL &&
 	    stype != IEEE80211_STYPE_DATA_CFACKPOLL) {
 		if (stype != IEEE80211_STYPE_NULLFUNC)
-			IEEE80211_DEBUG_DROP("RX: dropped data frame "
+			LIBIPW_DEBUG_DROP("RX: dropped data frame "
 					     "with no data (type=0x%02x, "
 					     "subtype=0x%02x, len=%d)\n",
 					     type, stype, skb->len);
@@ -570,21 +571,21 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	/* skb: hdr + (possibly fragmented, possibly encrypted) payload */
 
 	if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted &&
-	    (keyidx = ieee80211_rx_frame_decrypt(ieee, skb, crypt)) < 0)
+	    (keyidx = libipw_rx_frame_decrypt(ieee, skb, crypt)) < 0)
 		goto rx_dropped;
 
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
+	hdr = (struct libipw_hdr_4addr *)skb->data;
 
 	/* skb: hdr + (possibly fragmented) plaintext payload */
 	// PR: FIXME: hostap has additional conditions in the "if" below:
 	// ieee->host_decrypt && (fc & IEEE80211_FCTL_PROTECTED) &&
 	if ((frag != 0) || (fc & IEEE80211_FCTL_MOREFRAGS)) {
 		int flen;
-		struct sk_buff *frag_skb = ieee80211_frag_cache_get(ieee, hdr);
-		IEEE80211_DEBUG_FRAG("Rx Fragment received (%u)\n", frag);
+		struct sk_buff *frag_skb = libipw_frag_cache_get(ieee, hdr);
+		LIBIPW_DEBUG_FRAG("Rx Fragment received (%u)\n", frag);
 
 		if (!frag_skb) {
-			IEEE80211_DEBUG(IEEE80211_DL_RX | IEEE80211_DL_FRAG,
+			LIBIPW_DEBUG(LIBIPW_DL_RX | LIBIPW_DL_FRAG,
 					"Rx cannot get skb from fragment "
 					"cache (morefrag=%d seq=%u frag=%u)\n",
 					(fc & IEEE80211_FCTL_MOREFRAGS) != 0,
@@ -600,7 +601,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 			printk(KERN_WARNING "%s: host decrypted and "
 			       "reassembled frame did not fit skb\n",
 			       dev->name);
-			ieee80211_frag_cache_invalidate(ieee, hdr);
+			libipw_frag_cache_invalidate(ieee, hdr);
 			goto rx_dropped;
 		}
 
@@ -627,24 +628,24 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 		/* this was the last fragment and the frame will be
 		 * delivered, so remove skb from fragment cache */
 		skb = frag_skb;
-		hdr = (struct ieee80211_hdr_4addr *)skb->data;
-		ieee80211_frag_cache_invalidate(ieee, hdr);
+		hdr = (struct libipw_hdr_4addr *)skb->data;
+		libipw_frag_cache_invalidate(ieee, hdr);
 	}
 
 	/* skb: hdr + (possible reassembled) full MSDU payload; possibly still
 	 * encrypted/authenticated */
 	if ((fc & IEEE80211_FCTL_PROTECTED) && can_be_decrypted &&
-	    ieee80211_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt))
+	    libipw_rx_frame_decrypt_msdu(ieee, skb, keyidx, crypt))
 		goto rx_dropped;
 
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
+	hdr = (struct libipw_hdr_4addr *)skb->data;
 	if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep) {
 		if (		/*ieee->ieee802_1x && */
-			   ieee80211_is_eapol_frame(ieee, skb)) {
+			   libipw_is_eapol_frame(ieee, skb)) {
 			/* pass unencrypted EAPOL frames even if encryption is
 			 * configured */
 		} else {
-			IEEE80211_DEBUG_DROP("encryption configured, but RX "
+			LIBIPW_DEBUG_DROP("encryption configured, but RX "
 					     "frame not encrypted (SA=%pM)\n",
 					     hdr->addr2);
 			goto rx_dropped;
@@ -652,8 +653,8 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	}
 
 	if (crypt && !(fc & IEEE80211_FCTL_PROTECTED) && !ieee->open_wep &&
-	    !ieee80211_is_eapol_frame(ieee, skb)) {
-		IEEE80211_DEBUG_DROP("dropped unencrypted RX data "
+	    !libipw_is_eapol_frame(ieee, skb)) {
+		LIBIPW_DEBUG_DROP("dropped unencrypted RX data "
 				     "frame from %pM (drop_unencrypted=1)\n",
 				     hdr->addr2);
 		goto rx_dropped;
@@ -736,9 +737,9 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 
 	/* convert hdr + possible LLC headers into Ethernet header */
 	if (skb->len - hdrlen >= 8 &&
-	    ((memcmp(payload, rfc1042_header, SNAP_SIZE) == 0 &&
+	    ((memcmp(payload, libipw_rfc1042_header, SNAP_SIZE) == 0 &&
 	      ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
-	     memcmp(payload, bridge_tunnel_header, SNAP_SIZE) == 0)) {
+	     memcmp(payload, libipw_bridge_tunnel_header, SNAP_SIZE) == 0)) {
 		/* remove RFC1042 or Bridge-Tunnel encapsulation and
 		 * replace EtherType */
 		skb_pull(skb, hdrlen + SNAP_SIZE);
@@ -807,7 +808,7 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 			/* netif_rx always succeeds, but it might drop
 			 * the packet.  If it drops the packet, we log that
 			 * in our stats. */
-			IEEE80211_DEBUG_DROP
+			LIBIPW_DEBUG_DROP
 			    ("RX: netif_rx dropped the packet\n");
 			dev->stats.rx_dropped++;
 		}
@@ -829,18 +830,18 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 	return 0;
 }
 
-/* Filter out unrelated packets, call ieee80211_rx[_mgt]
+/* Filter out unrelated packets, call libipw_rx[_mgt]
  * This function takes over the skb, it should not be used again after calling
  * this function. */
-void ieee80211_rx_any(struct ieee80211_device *ieee,
-		     struct sk_buff *skb, struct ieee80211_rx_stats *stats)
+void libipw_rx_any(struct libipw_device *ieee,
+		     struct sk_buff *skb, struct libipw_rx_stats *stats)
 {
-	struct ieee80211_hdr_4addr *hdr;
+	struct libipw_hdr_4addr *hdr;
 	int is_packet_for_us;
 	u16 fc;
 
 	if (ieee->iw_mode == IW_MODE_MONITOR) {
-		if (!ieee80211_rx(ieee, skb, stats))
+		if (!libipw_rx(ieee, skb, stats))
 			dev_kfree_skb_irq(skb);
 		return;
 	}
@@ -848,7 +849,7 @@ void ieee80211_rx_any(struct ieee80211_device *ieee,
 	if (skb->len < sizeof(struct ieee80211_hdr))
 		goto drop_free;
 
-	hdr = (struct ieee80211_hdr_4addr *)skb->data;
+	hdr = (struct libipw_hdr_4addr *)skb->data;
 	fc = le16_to_cpu(hdr->frame_ctl);
 
 	if ((fc & IEEE80211_FCTL_VERS) != 0)
@@ -856,9 +857,9 @@ void ieee80211_rx_any(struct ieee80211_device *ieee,
 
 	switch (fc & IEEE80211_FCTL_FTYPE) {
 	case IEEE80211_FTYPE_MGMT:
-		if (skb->len < sizeof(struct ieee80211_hdr_3addr))
+		if (skb->len < sizeof(struct libipw_hdr_3addr))
 			goto drop_free;
-		ieee80211_rx_mgt(ieee, hdr, stats);
+		libipw_rx_mgt(ieee, hdr, stats);
 		dev_kfree_skb_irq(skb);
 		return;
 	case IEEE80211_FTYPE_DATA:
@@ -910,7 +911,7 @@ void ieee80211_rx_any(struct ieee80211_device *ieee,
 	}
 
 	if (is_packet_for_us)
-		if (!ieee80211_rx(ieee, skb, stats))
+		if (!libipw_rx(ieee, skb, stats))
 			dev_kfree_skb_irq(skb);
 	return;
 
@@ -928,7 +929,7 @@ static u8 qos_oui[QOS_OUI_LEN] = { 0x00, 0x50, 0xF2 };
 * Make ther structure we read from the beacon packet has
 * the right values
 */
-static int ieee80211_verify_qos_info(struct ieee80211_qos_information_element
+static int libipw_verify_qos_info(struct libipw_qos_information_element
 				     *info_element, int sub_type)
 {
 
@@ -947,12 +948,12 @@ static int ieee80211_verify_qos_info(struct ieee80211_qos_information_element
 /*
  * Parse a QoS parameter element
  */
-static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info
-					    *element_param, struct ieee80211_info_element
+static int libipw_read_qos_param_element(struct libipw_qos_parameter_info
+					    *element_param, struct libipw_info_element
 					    *info_element)
 {
 	int ret = 0;
-	u16 size = sizeof(struct ieee80211_qos_parameter_info) - 2;
+	u16 size = sizeof(struct libipw_qos_parameter_info) - 2;
 
 	if ((info_element == NULL) || (element_param == NULL))
 		return -1;
@@ -965,7 +966,7 @@ static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info
 	} else
 		ret = -1;
 	if (ret == 0)
-		ret = ieee80211_verify_qos_info(&element_param->info_element,
+		ret = libipw_verify_qos_info(&element_param->info_element,
 						QOS_OUI_PARAM_SUB_TYPE);
 	return ret;
 }
@@ -973,13 +974,13 @@ static int ieee80211_read_qos_param_element(struct ieee80211_qos_parameter_info
 /*
  * Parse a QoS information element
  */
-static int ieee80211_read_qos_info_element(struct
-					   ieee80211_qos_information_element
-					   *element_info, struct ieee80211_info_element
+static int libipw_read_qos_info_element(struct
+					   libipw_qos_information_element
+					   *element_info, struct libipw_info_element
 					   *info_element)
 {
 	int ret = 0;
-	u16 size = sizeof(struct ieee80211_qos_information_element) - 2;
+	u16 size = sizeof(struct libipw_qos_information_element) - 2;
 
 	if (element_info == NULL)
 		return -1;
@@ -995,7 +996,7 @@ static int ieee80211_read_qos_info_element(struct
 		ret = -1;
 
 	if (ret == 0)
-		ret = ieee80211_verify_qos_info(element_info,
+		ret = libipw_verify_qos_info(element_info,
 						QOS_OUI_INFO_SUB_TYPE);
 	return ret;
 }
@@ -1003,15 +1004,15 @@ static int ieee80211_read_qos_info_element(struct
 /*
  * Write QoS parameters from the ac parameters.
  */
-static int ieee80211_qos_convert_ac_to_parameters(struct
-						  ieee80211_qos_parameter_info
+static int libipw_qos_convert_ac_to_parameters(struct
+						  libipw_qos_parameter_info
 						  *param_elm, struct
-						  ieee80211_qos_parameters
+						  libipw_qos_parameters
 						  *qos_param)
 {
 	int rc = 0;
 	int i;
-	struct ieee80211_qos_ac_parameter *ac_params;
+	struct libipw_qos_ac_parameter *ac_params;
 	u32 txop;
 	u8 cw_min;
 	u8 cw_max;
@@ -1042,27 +1043,27 @@ static int ieee80211_qos_convert_ac_to_parameters(struct
  * parameters element. check the information element length to decide
  * which type to read
  */
-static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element
+static int libipw_parse_qos_info_param_IE(struct libipw_info_element
 					     *info_element,
-					     struct ieee80211_network *network)
+					     struct libipw_network *network)
 {
 	int rc = 0;
-	struct ieee80211_qos_parameters *qos_param = NULL;
-	struct ieee80211_qos_information_element qos_info_element;
+	struct libipw_qos_parameters *qos_param = NULL;
+	struct libipw_qos_information_element qos_info_element;
 
-	rc = ieee80211_read_qos_info_element(&qos_info_element, info_element);
+	rc = libipw_read_qos_info_element(&qos_info_element, info_element);
 
 	if (rc == 0) {
 		network->qos_data.param_count = qos_info_element.ac_info & 0x0F;
 		network->flags |= NETWORK_HAS_QOS_INFORMATION;
 	} else {
-		struct ieee80211_qos_parameter_info param_element;
+		struct libipw_qos_parameter_info param_element;
 
-		rc = ieee80211_read_qos_param_element(&param_element,
+		rc = libipw_read_qos_param_element(&param_element,
 						      info_element);
 		if (rc == 0) {
 			qos_param = &(network->qos_data.parameters);
-			ieee80211_qos_convert_ac_to_parameters(&param_element,
+			libipw_qos_convert_ac_to_parameters(&param_element,
 							       qos_param);
 			network->flags |= NETWORK_HAS_QOS_PARAMETERS;
 			network->qos_data.param_count =
@@ -1071,7 +1072,7 @@ static int ieee80211_parse_qos_info_param_IE(struct ieee80211_info_element
 	}
 
 	if (rc == 0) {
-		IEEE80211_DEBUG_QOS("QoS is supported\n");
+		LIBIPW_DEBUG_QOS("QoS is supported\n");
 		network->qos_data.supported = 1;
 	}
 	return rc;
@@ -1116,9 +1117,9 @@ static const char *get_info_element_string(u16 id)
 }
 #endif
 
-static int ieee80211_parse_info_param(struct ieee80211_info_element
+static int libipw_parse_info_param(struct libipw_info_element
 				      *info_element, u16 length,
-				      struct ieee80211_network *network)
+				      struct libipw_network *network)
 {
 	DECLARE_SSID_BUF(ssid);
 	u8 i;
@@ -1129,7 +1130,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 
 	while (length >= sizeof(*info_element)) {
 		if (sizeof(*info_element) + info_element->len > length) {
-			IEEE80211_DEBUG_MGMT("Info elem: parse failed: "
+			LIBIPW_DEBUG_MGMT("Info elem: parse failed: "
 					     "info_element->len + 2 > left : "
 					     "info_element->len+2=%zd left=%d, id=%d.\n",
 					     info_element->len +
@@ -1151,7 +1152,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 				memset(network->ssid + network->ssid_len, 0,
 				       IW_ESSID_MAX_SIZE - network->ssid_len);
 
-			IEEE80211_DEBUG_MGMT("WLAN_EID_SSID: '%s' len=%d.\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_SSID: '%s' len=%d.\n",
 					     print_ssid(ssid, network->ssid,
 							network->ssid_len),
 					     network->ssid_len);
@@ -1170,17 +1171,17 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 					      (p - rates_str), "%02X ",
 					      network->rates[i]);
 #endif
-				if (ieee80211_is_ofdm_rate
+				if (libipw_is_ofdm_rate
 				    (info_element->data[i])) {
 					network->flags |= NETWORK_HAS_OFDM;
 					if (info_element->data[i] &
-					    IEEE80211_BASIC_RATE_MASK)
+					    LIBIPW_BASIC_RATE_MASK)
 						network->flags &=
 						    ~NETWORK_HAS_CCK;
 				}
 			}
 
-			IEEE80211_DEBUG_MGMT("WLAN_EID_SUPP_RATES: '%s' (%d)\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_SUPP_RATES: '%s' (%d)\n",
 					     rates_str, network->rates_len);
 			break;
 
@@ -1197,61 +1198,61 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 					      (p - rates_str), "%02X ",
 					      network->rates[i]);
 #endif
-				if (ieee80211_is_ofdm_rate
+				if (libipw_is_ofdm_rate
 				    (info_element->data[i])) {
 					network->flags |= NETWORK_HAS_OFDM;
 					if (info_element->data[i] &
-					    IEEE80211_BASIC_RATE_MASK)
+					    LIBIPW_BASIC_RATE_MASK)
 						network->flags &=
 						    ~NETWORK_HAS_CCK;
 				}
 			}
 
-			IEEE80211_DEBUG_MGMT("WLAN_EID_EXT_SUPP_RATES: '%s' (%d)\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_EXT_SUPP_RATES: '%s' (%d)\n",
 					     rates_str, network->rates_ex_len);
 			break;
 
 		case WLAN_EID_DS_PARAMS:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_DS_PARAMS: %d\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_DS_PARAMS: %d\n",
 					     info_element->data[0]);
 			network->channel = info_element->data[0];
 			break;
 
 		case WLAN_EID_FH_PARAMS:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_FH_PARAMS: ignored\n");
+			LIBIPW_DEBUG_MGMT("WLAN_EID_FH_PARAMS: ignored\n");
 			break;
 
 		case WLAN_EID_CF_PARAMS:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_CF_PARAMS: ignored\n");
+			LIBIPW_DEBUG_MGMT("WLAN_EID_CF_PARAMS: ignored\n");
 			break;
 
 		case WLAN_EID_TIM:
 			network->tim.tim_count = info_element->data[0];
 			network->tim.tim_period = info_element->data[1];
-			IEEE80211_DEBUG_MGMT("WLAN_EID_TIM: partially ignored\n");
+			LIBIPW_DEBUG_MGMT("WLAN_EID_TIM: partially ignored\n");
 			break;
 
 		case WLAN_EID_ERP_INFO:
 			network->erp_value = info_element->data[0];
 			network->flags |= NETWORK_HAS_ERP_VALUE;
-			IEEE80211_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n",
+			LIBIPW_DEBUG_MGMT("MFIE_TYPE_ERP_SET: %d\n",
 					     network->erp_value);
 			break;
 
 		case WLAN_EID_IBSS_PARAMS:
 			network->atim_window = info_element->data[0];
-			IEEE80211_DEBUG_MGMT("WLAN_EID_IBSS_PARAMS: %d\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_IBSS_PARAMS: %d\n",
 					     network->atim_window);
 			break;
 
 		case WLAN_EID_CHALLENGE:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_CHALLENGE: ignored\n");
+			LIBIPW_DEBUG_MGMT("WLAN_EID_CHALLENGE: ignored\n");
 			break;
 
 		case WLAN_EID_GENERIC:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_GENERIC: %d bytes\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_GENERIC: %d bytes\n",
 					     info_element->len);
-			if (!ieee80211_parse_qos_info_param_IE(info_element,
+			if (!libipw_parse_qos_info_param_IE(info_element,
 							       network))
 				break;
 
@@ -1268,7 +1269,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 			break;
 
 		case WLAN_EID_RSN:
-			IEEE80211_DEBUG_MGMT("WLAN_EID_RSN: %d bytes\n",
+			LIBIPW_DEBUG_MGMT("WLAN_EID_RSN: %d bytes\n",
 					     info_element->len);
 			network->rsn_ie_len = min(info_element->len + 2,
 						  MAX_WPA_IE_LEN);
@@ -1318,7 +1319,7 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 			break;
 
 		default:
-			IEEE80211_DEBUG_MGMT
+			LIBIPW_DEBUG_MGMT
 			    ("Unsupported info element: %s (%d)\n",
 			     get_info_element_string(info_element->id),
 			     info_element->id);
@@ -1327,20 +1328,20 @@ static int ieee80211_parse_info_param(struct ieee80211_info_element
 
 		length -= sizeof(*info_element) + info_element->len;
 		info_element =
-		    (struct ieee80211_info_element *)&info_element->
+		    (struct libipw_info_element *)&info_element->
 		    data[info_element->len];
 	}
 
 	return 0;
 }
 
-static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct ieee80211_assoc_response
-				       *frame, struct ieee80211_rx_stats *stats)
+static int libipw_handle_assoc_resp(struct libipw_device *ieee, struct libipw_assoc_response
+				       *frame, struct libipw_rx_stats *stats)
 {
-	struct ieee80211_network network_resp = {
+	struct libipw_network network_resp = {
 		.ibss_dfs = NULL,
 	};
-	struct ieee80211_network *network = &network_resp;
+	struct libipw_network *network = &network_resp;
 	struct net_device *dev = ieee->dev;
 
 	network->flags = 0;
@@ -1361,7 +1362,7 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 	network->erp_value =
 	    (network->capability & WLAN_CAPABILITY_IBSS) ? 0x3 : 0x0;
 
-	if (stats->freq == IEEE80211_52GHZ_BAND) {
+	if (stats->freq == LIBIPW_52GHZ_BAND) {
 		/* for A band (No DS info) */
 		network->channel = stats->received_channel;
 	} else
@@ -1370,12 +1371,12 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 	network->wpa_ie_len = 0;
 	network->rsn_ie_len = 0;
 
-	if (ieee80211_parse_info_param
+	if (libipw_parse_info_param
 	    (frame->info_element, stats->len - sizeof(*frame), network))
 		return 1;
 
 	network->mode = 0;
-	if (stats->freq == IEEE80211_52GHZ_BAND)
+	if (stats->freq == LIBIPW_52GHZ_BAND)
 		network->mode = IEEE_A;
 	else {
 		if (network->flags & NETWORK_HAS_OFDM)
@@ -1394,10 +1395,10 @@ static int ieee80211_handle_assoc_resp(struct ieee80211_device *ieee, struct iee
 
 /***************************************************/
 
-static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee80211_probe_response
+static int libipw_network_init(struct libipw_device *ieee, struct libipw_probe_response
 					 *beacon,
-					 struct ieee80211_network *network,
-					 struct ieee80211_rx_stats *stats)
+					 struct libipw_network *network,
+					 struct libipw_rx_stats *stats)
 {
 	DECLARE_SSID_BUF(ssid);
 
@@ -1423,7 +1424,7 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	network->erp_value = (network->capability & WLAN_CAPABILITY_IBSS) ?
 	    0x3 : 0x0;
 
-	if (stats->freq == IEEE80211_52GHZ_BAND) {
+	if (stats->freq == LIBIPW_52GHZ_BAND) {
 		/* for A band (No DS info) */
 		network->channel = stats->received_channel;
 	} else
@@ -1432,12 +1433,12 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	network->wpa_ie_len = 0;
 	network->rsn_ie_len = 0;
 
-	if (ieee80211_parse_info_param
+	if (libipw_parse_info_param
 	    (beacon->info_element, stats->len - sizeof(*beacon), network))
 		return 1;
 
 	network->mode = 0;
-	if (stats->freq == IEEE80211_52GHZ_BAND)
+	if (stats->freq == LIBIPW_52GHZ_BAND)
 		network->mode = IEEE_A;
 	else {
 		if (network->flags & NETWORK_HAS_OFDM)
@@ -1447,7 +1448,7 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	}
 
 	if (network->mode == 0) {
-		IEEE80211_DEBUG_SCAN("Filtered out '%s (%pM)' "
+		LIBIPW_DEBUG_SCAN("Filtered out '%s (%pM)' "
 				     "network.\n",
 				     print_ssid(ssid, network->ssid,
 						 network->ssid_len),
@@ -1460,8 +1461,8 @@ static int ieee80211_network_init(struct ieee80211_device *ieee, struct ieee8021
 	return 0;
 }
 
-static inline int is_same_network(struct ieee80211_network *src,
-				  struct ieee80211_network *dst)
+static inline int is_same_network(struct libipw_network *src,
+				  struct libipw_network *dst)
 {
 	/* A network is only a duplicate if the channel, BSSID, and ESSID
 	 * all match.  We treat all <hidden> with the same BSSID and channel
@@ -1472,13 +1473,13 @@ static inline int is_same_network(struct ieee80211_network *src,
 		!memcmp(src->ssid, dst->ssid, src->ssid_len));
 }
 
-static void update_network(struct ieee80211_network *dst,
-				  struct ieee80211_network *src)
+static void update_network(struct libipw_network *dst,
+				  struct libipw_network *src)
 {
 	int qos_active;
 	u8 old_param;
 
-	ieee80211_network_reset(dst);
+	libipw_network_reset(dst);
 	dst->ibss_dfs = src->ibss_dfs;
 
 	/* We only update the statistics if they were created by receiving
@@ -1488,9 +1489,9 @@ static void update_network(struct ieee80211_network *dst,
 	 * down the signal level of an AP. */
 	if (dst->channel == src->stats.received_channel)
 		memcpy(&dst->stats, &src->stats,
-		       sizeof(struct ieee80211_rx_stats));
+		       sizeof(struct libipw_rx_stats));
 	else
-		IEEE80211_DEBUG_SCAN("Network %pM info received "
+		LIBIPW_DEBUG_SCAN("Network %pM info received "
 			"off channel (%d vs. %d)\n", src->bssid,
 			dst->channel, src->stats.received_channel);
 
@@ -1521,7 +1522,7 @@ static void update_network(struct ieee80211_network *dst,
 	old_param = dst->qos_data.old_param_count;
 	if (dst->flags & NETWORK_HAS_QOS_MASK)
 		memcpy(&dst->qos_data, &src->qos_data,
-		       sizeof(struct ieee80211_qos_data));
+		       sizeof(struct libipw_qos_data));
 	else {
 		dst->qos_data.supported = src->qos_data.supported;
 		dst->qos_data.param_count = src->qos_data.param_count;
@@ -1529,11 +1530,11 @@ static void update_network(struct ieee80211_network *dst,
 
 	if (dst->qos_data.supported == 1) {
 		if (dst->ssid_len)
-			IEEE80211_DEBUG_QOS
+			LIBIPW_DEBUG_QOS
 			    ("QoS the network %s is QoS supported\n",
 			     dst->ssid);
 		else
-			IEEE80211_DEBUG_QOS
+			LIBIPW_DEBUG_QOS
 			    ("QoS the network is QoS supported\n");
 	}
 	dst->qos_data.active = qos_active;
@@ -1547,25 +1548,25 @@ static inline int is_beacon(__le16 fc)
 	return (WLAN_FC_GET_STYPE(le16_to_cpu(fc)) == IEEE80211_STYPE_BEACON);
 }
 
-static void ieee80211_process_probe_response(struct ieee80211_device
+static void libipw_process_probe_response(struct libipw_device
 						    *ieee, struct
-						    ieee80211_probe_response
-						    *beacon, struct ieee80211_rx_stats
+						    libipw_probe_response
+						    *beacon, struct libipw_rx_stats
 						    *stats)
 {
 	struct net_device *dev = ieee->dev;
-	struct ieee80211_network network = {
+	struct libipw_network network = {
 		.ibss_dfs = NULL,
 	};
-	struct ieee80211_network *target;
-	struct ieee80211_network *oldest = NULL;
+	struct libipw_network *target;
+	struct libipw_network *oldest = NULL;
 #ifdef CONFIG_LIBIPW_DEBUG
-	struct ieee80211_info_element *info_element = beacon->info_element;
+	struct libipw_info_element *info_element = beacon->info_element;
 #endif
 	unsigned long flags;
 	DECLARE_SSID_BUF(ssid);
 
-	IEEE80211_DEBUG_SCAN("'%s' (%pM"
+	LIBIPW_DEBUG_SCAN("'%s' (%pM"
 		     "): %c%c%c%c %c%c%c%c-%c%c%c%c %c%c%c%c\n",
 		     print_ssid(ssid, info_element->data, info_element->len),
 		     beacon->header.addr3,
@@ -1586,8 +1587,8 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 		     (beacon->capability & cpu_to_le16(1 << 0x1)) ? '1' : '0',
 		     (beacon->capability & cpu_to_le16(1 << 0x0)) ? '1' : '0');
 
-	if (ieee80211_network_init(ieee, beacon, &network, stats)) {
-		IEEE80211_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n",
+	if (libipw_network_init(ieee, beacon, &network, stats)) {
+		LIBIPW_DEBUG_SCAN("Dropped '%s' (%pM) via %s.\n",
 				     print_ssid(ssid, info_element->data,
 						 info_element->len),
 				     beacon->header.addr3,
@@ -1624,21 +1625,21 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 			/* If there are no more slots, expire the oldest */
 			list_del(&oldest->list);
 			target = oldest;
-			IEEE80211_DEBUG_SCAN("Expired '%s' (%pM) from "
+			LIBIPW_DEBUG_SCAN("Expired '%s' (%pM) from "
 					     "network list.\n",
 					     print_ssid(ssid, target->ssid,
 							 target->ssid_len),
 					     target->bssid);
-			ieee80211_network_reset(target);
+			libipw_network_reset(target);
 		} else {
 			/* Otherwise just pull from the free list */
 			target = list_entry(ieee->network_free_list.next,
-					    struct ieee80211_network, list);
+					    struct libipw_network, list);
 			list_del(ieee->network_free_list.next);
 		}
 
 #ifdef CONFIG_LIBIPW_DEBUG
-		IEEE80211_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n",
+		LIBIPW_DEBUG_SCAN("Adding '%s' (%pM) via %s.\n",
 				     print_ssid(ssid, network.ssid,
 						 network.ssid_len),
 				     network.bssid,
@@ -1649,7 +1650,7 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 		network.ibss_dfs = NULL;
 		list_add_tail(&target->list, &ieee->network_list);
 	} else {
-		IEEE80211_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n",
+		LIBIPW_DEBUG_SCAN("Updating '%s' (%pM) via %s.\n",
 				     print_ssid(ssid, target->ssid,
 						 target->ssid_len),
 				     target->bssid,
@@ -1670,121 +1671,121 @@ static void ieee80211_process_probe_response(struct ieee80211_device
 	}
 }
 
-void ieee80211_rx_mgt(struct ieee80211_device *ieee,
-		      struct ieee80211_hdr_4addr *header,
-		      struct ieee80211_rx_stats *stats)
+void libipw_rx_mgt(struct libipw_device *ieee,
+		      struct libipw_hdr_4addr *header,
+		      struct libipw_rx_stats *stats)
 {
 	switch (WLAN_FC_GET_STYPE(le16_to_cpu(header->frame_ctl))) {
 	case IEEE80211_STYPE_ASSOC_RESP:
-		IEEE80211_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n",
+		LIBIPW_DEBUG_MGMT("received ASSOCIATION RESPONSE (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
-		ieee80211_handle_assoc_resp(ieee,
-					    (struct ieee80211_assoc_response *)
+		libipw_handle_assoc_resp(ieee,
+					    (struct libipw_assoc_response *)
 					    header, stats);
 		break;
 
 	case IEEE80211_STYPE_REASSOC_RESP:
-		IEEE80211_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n",
+		LIBIPW_DEBUG_MGMT("received REASSOCIATION RESPONSE (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
 		break;
 
 	case IEEE80211_STYPE_PROBE_REQ:
-		IEEE80211_DEBUG_MGMT("received auth (%d)\n",
+		LIBIPW_DEBUG_MGMT("received auth (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
 
 		if (ieee->handle_probe_request != NULL)
 			ieee->handle_probe_request(ieee->dev,
 						   (struct
-						    ieee80211_probe_request *)
+						    libipw_probe_request *)
 						   header, stats);
 		break;
 
 	case IEEE80211_STYPE_PROBE_RESP:
-		IEEE80211_DEBUG_MGMT("received PROBE RESPONSE (%d)\n",
+		LIBIPW_DEBUG_MGMT("received PROBE RESPONSE (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
-		IEEE80211_DEBUG_SCAN("Probe response\n");
-		ieee80211_process_probe_response(ieee,
+		LIBIPW_DEBUG_SCAN("Probe response\n");
+		libipw_process_probe_response(ieee,
 						 (struct
-						  ieee80211_probe_response *)
+						  libipw_probe_response *)
 						 header, stats);
 		break;
 
 	case IEEE80211_STYPE_BEACON:
-		IEEE80211_DEBUG_MGMT("received BEACON (%d)\n",
+		LIBIPW_DEBUG_MGMT("received BEACON (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
-		IEEE80211_DEBUG_SCAN("Beacon\n");
-		ieee80211_process_probe_response(ieee,
+		LIBIPW_DEBUG_SCAN("Beacon\n");
+		libipw_process_probe_response(ieee,
 						 (struct
-						  ieee80211_probe_response *)
+						  libipw_probe_response *)
 						 header, stats);
 		break;
 	case IEEE80211_STYPE_AUTH:
 
-		IEEE80211_DEBUG_MGMT("received auth (%d)\n",
+		LIBIPW_DEBUG_MGMT("received auth (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
 
 		if (ieee->handle_auth != NULL)
 			ieee->handle_auth(ieee->dev,
-					  (struct ieee80211_auth *)header);
+					  (struct libipw_auth *)header);
 		break;
 
 	case IEEE80211_STYPE_DISASSOC:
 		if (ieee->handle_disassoc != NULL)
 			ieee->handle_disassoc(ieee->dev,
-					      (struct ieee80211_disassoc *)
+					      (struct libipw_disassoc *)
 					      header);
 		break;
 
 	case IEEE80211_STYPE_ACTION:
-		IEEE80211_DEBUG_MGMT("ACTION\n");
+		LIBIPW_DEBUG_MGMT("ACTION\n");
 		if (ieee->handle_action)
 			ieee->handle_action(ieee->dev,
-					    (struct ieee80211_action *)
+					    (struct libipw_action *)
 					    header, stats);
 		break;
 
 	case IEEE80211_STYPE_REASSOC_REQ:
-		IEEE80211_DEBUG_MGMT("received reassoc (%d)\n",
+		LIBIPW_DEBUG_MGMT("received reassoc (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
 
-		IEEE80211_DEBUG_MGMT("%s: IEEE80211_REASSOC_REQ received\n",
+		LIBIPW_DEBUG_MGMT("%s: LIBIPW_REASSOC_REQ received\n",
 				     ieee->dev->name);
 		if (ieee->handle_reassoc_request != NULL)
 			ieee->handle_reassoc_request(ieee->dev,
-						    (struct ieee80211_reassoc_request *)
+						    (struct libipw_reassoc_request *)
 						     header);
 		break;
 
 	case IEEE80211_STYPE_ASSOC_REQ:
-		IEEE80211_DEBUG_MGMT("received assoc (%d)\n",
+		LIBIPW_DEBUG_MGMT("received assoc (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
 
-		IEEE80211_DEBUG_MGMT("%s: IEEE80211_ASSOC_REQ received\n",
+		LIBIPW_DEBUG_MGMT("%s: LIBIPW_ASSOC_REQ received\n",
 				     ieee->dev->name);
 		if (ieee->handle_assoc_request != NULL)
 			ieee->handle_assoc_request(ieee->dev);
 		break;
 
 	case IEEE80211_STYPE_DEAUTH:
-		IEEE80211_DEBUG_MGMT("DEAUTH\n");
+		LIBIPW_DEBUG_MGMT("DEAUTH\n");
 		if (ieee->handle_deauth != NULL)
 			ieee->handle_deauth(ieee->dev,
-					    (struct ieee80211_deauth *)
+					    (struct libipw_deauth *)
 					    header);
 		break;
 	default:
-		IEEE80211_DEBUG_MGMT("received UNKNOWN (%d)\n",
+		LIBIPW_DEBUG_MGMT("received UNKNOWN (%d)\n",
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
-		IEEE80211_DEBUG_MGMT("%s: Unknown management packet: %d\n",
+		LIBIPW_DEBUG_MGMT("%s: Unknown management packet: %d\n",
 				     ieee->dev->name,
 				     WLAN_FC_GET_STYPE(le16_to_cpu
 						       (header->frame_ctl)));
@@ -1792,6 +1793,6 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee,
 	}
 }
 
-EXPORT_SYMBOL_GPL(ieee80211_rx_any);
-EXPORT_SYMBOL(ieee80211_rx_mgt);
-EXPORT_SYMBOL(ieee80211_rx);
+EXPORT_SYMBOL_GPL(libipw_rx_any);
+EXPORT_SYMBOL(libipw_rx_mgt);
+EXPORT_SYMBOL(libipw_rx);
diff --git a/drivers/net/wireless/ipw2x00/libipw_tx.c b/drivers/net/wireless/ipw2x00/libipw_tx.c
index 2e8f84fb29fa..ce1855f32b7d 100644
--- a/drivers/net/wireless/ipw2x00/libipw_tx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_tx.c
@@ -41,7 +41,7 @@
 #include <linux/etherdevice.h>
 #include <asm/uaccess.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
 /*
 
@@ -126,12 +126,12 @@ payload of each frame is reduced to 492 bytes.
 static u8 P802_1H_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0xf8 };
 static u8 RFC1042_OUI[P80211_OUI_LEN] = { 0x00, 0x00, 0x00 };
 
-static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
+static int libipw_copy_snap(u8 * data, __be16 h_proto)
 {
-	struct ieee80211_snap_hdr *snap;
+	struct libipw_snap_hdr *snap;
 	u8 *oui;
 
-	snap = (struct ieee80211_snap_hdr *)data;
+	snap = (struct libipw_snap_hdr *)data;
 	snap->dsap = 0xaa;
 	snap->ssap = 0xaa;
 	snap->ctrl = 0x03;
@@ -149,7 +149,7 @@ static int ieee80211_copy_snap(u8 * data, __be16 h_proto)
 	return SNAP_SIZE + sizeof(u16);
 }
 
-static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee,
+static int libipw_encrypt_fragment(struct libipw_device *ieee,
 					     struct sk_buff *frag, int hdr_len)
 {
 	struct lib80211_crypt_data *crypt =
@@ -177,7 +177,7 @@ static int ieee80211_encrypt_fragment(struct ieee80211_device *ieee,
 	return 0;
 }
 
-void ieee80211_txb_free(struct ieee80211_txb *txb)
+void libipw_txb_free(struct libipw_txb *txb)
 {
 	int i;
 	if (unlikely(!txb))
@@ -188,17 +188,17 @@ void ieee80211_txb_free(struct ieee80211_txb *txb)
 	kfree(txb);
 }
 
-static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size,
+static struct libipw_txb *libipw_alloc_txb(int nr_frags, int txb_size,
 						 int headroom, gfp_t gfp_mask)
 {
-	struct ieee80211_txb *txb;
+	struct libipw_txb *txb;
 	int i;
-	txb = kmalloc(sizeof(struct ieee80211_txb) + (sizeof(u8 *) * nr_frags),
+	txb = kmalloc(sizeof(struct libipw_txb) + (sizeof(u8 *) * nr_frags),
 		      gfp_mask);
 	if (!txb)
 		return NULL;
 
-	memset(txb, 0, sizeof(struct ieee80211_txb));
+	memset(txb, 0, sizeof(struct libipw_txb));
 	txb->nr_frags = nr_frags;
 	txb->frag_size = txb_size;
 
@@ -220,7 +220,7 @@ static struct ieee80211_txb *ieee80211_alloc_txb(int nr_frags, int txb_size,
 	return txb;
 }
 
-static int ieee80211_classify(struct sk_buff *skb)
+static int libipw_classify(struct sk_buff *skb)
 {
 	struct ethhdr *eth;
 	struct iphdr *ip;
@@ -252,11 +252,11 @@ static int ieee80211_classify(struct sk_buff *skb)
 
 /* Incoming skb is converted to a txb which consists of
  * a block of 802.11 fragment packets (stored as skbs) */
-int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
+int libipw_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct ieee80211_device *ieee = netdev_priv(dev);
-	struct ieee80211_txb *txb = NULL;
-	struct ieee80211_hdr_3addrqos *frag_hdr;
+	struct libipw_device *ieee = netdev_priv(dev);
+	struct libipw_txb *txb = NULL;
+	struct libipw_hdr_3addrqos *frag_hdr;
 	int i, bytes_per_frag, nr_frags, bytes_last_frag, frag_size,
 	    rts_required;
 	unsigned long flags;
@@ -264,7 +264,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be16 ether_type;
 	int bytes, fc, hdr_len;
 	struct sk_buff *skb_frag;
-	struct ieee80211_hdr_3addrqos header = {/* Ensure zero initialized */
+	struct libipw_hdr_3addrqos header = {/* Ensure zero initialized */
 		.duration_id = 0,
 		.seq_ctl = 0,
 		.qos_ctl = 0
@@ -331,14 +331,14 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		memcpy(header.addr2, src, ETH_ALEN);
 		memcpy(header.addr3, ieee->bssid, ETH_ALEN);
 	}
-	hdr_len = IEEE80211_3ADDR_LEN;
+	hdr_len = LIBIPW_3ADDR_LEN;
 
 	if (ieee->is_qos_active && ieee->is_qos_active(dev, skb)) {
 		fc |= IEEE80211_STYPE_QOS_DATA;
 		hdr_len += 2;
 
-		skb->priority = ieee80211_classify(skb);
-		header.qos_ctl |= cpu_to_le16(skb->priority & IEEE80211_QCTL_TID);
+		skb->priority = libipw_classify(skb);
+		header.qos_ctl |= cpu_to_le16(skb->priority & LIBIPW_QCTL_TID);
 	}
 	header.frame_ctl = cpu_to_le16(fc);
 
@@ -362,12 +362,12 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_reserve(skb_new, crypt->ops->extra_msdu_prefix_len);
 		memcpy(skb_put(skb_new, hdr_len), &header, hdr_len);
 		snapped = 1;
-		ieee80211_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
+		libipw_copy_snap(skb_put(skb_new, SNAP_SIZE + sizeof(u16)),
 				    ether_type);
 		skb_copy_from_linear_data(skb, skb_put(skb_new, skb->len), skb->len);
 		res = crypt->ops->encrypt_msdu(skb_new, hdr_len, crypt->priv);
 		if (res < 0) {
-			IEEE80211_ERROR("msdu encryption failed\n");
+			LIBIPW_ERROR("msdu encryption failed\n");
 			dev_kfree_skb_any(skb_new);
 			goto failed;
 		}
@@ -393,8 +393,8 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		 * for it when determining the amount of payload space. */
 		bytes_per_frag = frag_size - hdr_len;
 		if (ieee->config &
-		    (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
-			bytes_per_frag -= IEEE80211_FCS_LEN;
+		    (CFG_LIBIPW_COMPUTE_FCS | CFG_LIBIPW_RESERVE_FCS))
+			bytes_per_frag -= LIBIPW_FCS_LEN;
 
 		/* Each fragment may need to have room for encryptiong
 		 * pre/postfix */
@@ -417,14 +417,14 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	rts_required = (frag_size > ieee->rts
-			&& ieee->config & CFG_IEEE80211_RTS);
+			&& ieee->config & CFG_LIBIPW_RTS);
 	if (rts_required)
 		nr_frags++;
 
 	/* When we allocate the TXB we allocate enough space for the reserve
 	 * and full fragment bytes (bytes_per_frag doesn't include prefix,
 	 * postfix, header, FCS, etc.) */
-	txb = ieee80211_alloc_txb(nr_frags, frag_size,
+	txb = libipw_alloc_txb(nr_frags, frag_size,
 				  ieee->tx_headroom, GFP_ATOMIC);
 	if (unlikely(!txb)) {
 		printk(KERN_WARNING "%s: Could not allocate TXB\n",
@@ -441,7 +441,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (rts_required) {
 		skb_frag = txb->fragments[0];
 		frag_hdr =
-		    (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
+		    (struct libipw_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
 
 		/*
 		 * Set header frame_ctl to the RTS.
@@ -456,7 +456,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		header.frame_ctl = cpu_to_le16(fc);
 
 		if (ieee->config &
-		    (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
+		    (CFG_LIBIPW_COMPUTE_FCS | CFG_LIBIPW_RESERVE_FCS))
 			skb_put(skb_frag, 4);
 
 		txb->rts_included = 1;
@@ -472,7 +472,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 				    crypt->ops->extra_mpdu_prefix_len);
 
 		frag_hdr =
-		    (struct ieee80211_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
+		    (struct libipw_hdr_3addrqos *)skb_put(skb_frag, hdr_len);
 		memcpy(frag_hdr, &header, hdr_len);
 
 		/* If this is not the last fragment, then add the MOREFRAGS
@@ -487,7 +487,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (i == 0 && !snapped) {
-			ieee80211_copy_snap(skb_put
+			libipw_copy_snap(skb_put
 					    (skb_frag, SNAP_SIZE + sizeof(u16)),
 					    ether_type);
 			bytes -= SNAP_SIZE + sizeof(u16);
@@ -501,7 +501,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		/* Encryption routine will move the header forward in order
 		 * to insert the IV between the header and the payload */
 		if (host_encrypt)
-			ieee80211_encrypt_fragment(ieee, skb_frag, hdr_len);
+			libipw_encrypt_fragment(ieee, skb_frag, hdr_len);
 		else if (host_build_iv) {
 			atomic_inc(&crypt->refcnt);
 			if (crypt->ops->build_iv)
@@ -513,7 +513,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 
 		if (ieee->config &
-		    (CFG_IEEE80211_COMPUTE_FCS | CFG_IEEE80211_RESERVE_FCS))
+		    (CFG_LIBIPW_COMPUTE_FCS | CFG_LIBIPW_RESERVE_FCS))
 			skb_put(skb_frag, 4);
 	}
 
@@ -530,7 +530,7 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 			return NETDEV_TX_OK;
 		}
 
-		ieee80211_txb_free(txb);
+		libipw_txb_free(txb);
 	}
 
 	return NETDEV_TX_OK;
@@ -541,6 +541,6 @@ int ieee80211_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_errors++;
 	return NETDEV_TX_BUSY;
 }
-EXPORT_SYMBOL(ieee80211_xmit);
+EXPORT_SYMBOL(libipw_xmit);
 
-EXPORT_SYMBOL(ieee80211_txb_free);
+EXPORT_SYMBOL(libipw_txb_free);
diff --git a/drivers/net/wireless/ipw2x00/libipw_wx.c b/drivers/net/wireless/ipw2x00/libipw_wx.c
index 3c0812db030a..f79ce57ebe68 100644
--- a/drivers/net/wireless/ipw2x00/libipw_wx.c
+++ b/drivers/net/wireless/ipw2x00/libipw_wx.c
@@ -37,9 +37,9 @@
 #include <net/lib80211.h>
 #include <linux/wireless.h>
 
-#include "ieee80211.h"
+#include "libipw.h"
 
-static const char *ieee80211_modes[] = {
+static const char *libipw_modes[] = {
 	"?", "a", "b", "ab", "g", "ag", "bg", "abg"
 };
 
@@ -54,9 +54,9 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
 }
 
 #define MAX_CUSTOM_LEN 64
-static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
+static char *libipw_translate_scan(struct libipw_device *ieee,
 				      char *start, char *stop,
-				      struct ieee80211_network *network,
+				      struct libipw_network *network,
 				      struct iw_request_info *info)
 {
 	char custom[MAX_CUSTOM_LEN];
@@ -84,7 +84,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	/* Add the protocol name */
 	iwe.cmd = SIOCGIWNAME;
 	snprintf(iwe.u.name, IFNAMSIZ, "IEEE 802.11%s",
-		 ieee80211_modes[network->mode]);
+		 libipw_modes[network->mode]);
 	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_CHAR_LEN);
 
 	/* Add mode */
@@ -102,7 +102,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	/* Add channel and frequency */
 	/* Note : userspace automatically computes channel using iwrange */
 	iwe.cmd = SIOCGIWFREQ;
-	iwe.u.freq.m = ieee80211_channel_to_freq(ieee, network->channel);
+	iwe.u.freq.m = libipw_channel_to_freq(ieee, network->channel);
 	iwe.u.freq.e = 6;
 	iwe.u.freq.i = 0;
 	start = iwe_stream_add_event(info, start, stop, &iwe, IW_EV_FREQ_LEN);
@@ -155,7 +155,7 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	iwe.u.qual.updated = IW_QUAL_QUAL_UPDATED | IW_QUAL_LEVEL_UPDATED |
 	    IW_QUAL_NOISE_UPDATED;
 
-	if (!(network->stats.mask & IEEE80211_STATMASK_RSSI)) {
+	if (!(network->stats.mask & LIBIPW_STATMASK_RSSI)) {
 		iwe.u.qual.updated |= IW_QUAL_QUAL_INVALID |
 		    IW_QUAL_LEVEL_INVALID;
 		iwe.u.qual.qual = 0;
@@ -180,14 +180,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 			iwe.u.qual.qual = 0;
 	}
 
-	if (!(network->stats.mask & IEEE80211_STATMASK_NOISE)) {
+	if (!(network->stats.mask & LIBIPW_STATMASK_NOISE)) {
 		iwe.u.qual.updated |= IW_QUAL_NOISE_INVALID;
 		iwe.u.qual.noise = 0;
 	} else {
 		iwe.u.qual.noise = network->stats.noise;
 	}
 
-	if (!(network->stats.mask & IEEE80211_STATMASK_SIGNAL)) {
+	if (!(network->stats.mask & LIBIPW_STATMASK_SIGNAL)) {
 		iwe.u.qual.updated |= IW_QUAL_LEVEL_INVALID;
 		iwe.u.qual.level = 0;
 	} else {
@@ -237,14 +237,14 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 	p = custom;
 	p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), " Channel flags: ");
 
-	if (ieee80211_get_channel_flags(ieee, network->channel) &
-	    IEEE80211_CH_INVALID) {
+	if (libipw_get_channel_flags(ieee, network->channel) &
+	    LIBIPW_CH_INVALID) {
 		iwe.cmd = IWEVCUSTOM;
 		p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "INVALID ");
 	}
 
-	if (ieee80211_get_channel_flags(ieee, network->channel) &
-	    IEEE80211_CH_RADAR_DETECT) {
+	if (libipw_get_channel_flags(ieee, network->channel) &
+	    LIBIPW_CH_RADAR_DETECT) {
 		iwe.cmd = IWEVCUSTOM;
 		p += snprintf(p, MAX_CUSTOM_LEN - (p - custom), "DFS ");
 	}
@@ -259,11 +259,11 @@ static char *ieee80211_translate_scan(struct ieee80211_device *ieee,
 
 #define SCAN_ITEM_SIZE 128
 
-int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
+int libipw_wx_get_scan(struct libipw_device *ieee,
 			  struct iw_request_info *info,
 			  union iwreq_data *wrqu, char *extra)
 {
-	struct ieee80211_network *network;
+	struct libipw_network *network;
 	unsigned long flags;
 	int err = 0;
 
@@ -272,7 +272,7 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 	int i = 0;
 	DECLARE_SSID_BUF(ssid);
 
-	IEEE80211_DEBUG_WX("Getting scan\n");
+	LIBIPW_DEBUG_WX("Getting scan\n");
 
 	spin_lock_irqsave(&ieee->lock, flags);
 
@@ -285,10 +285,10 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 
 		if (ieee->scan_age == 0 ||
 		    time_after(network->last_scanned + ieee->scan_age, jiffies))
-			ev = ieee80211_translate_scan(ieee, ev, stop, network,
+			ev = libipw_translate_scan(ieee, ev, stop, network,
 						      info);
 		else {
-			IEEE80211_DEBUG_SCAN("Not showing network '%s ("
+			LIBIPW_DEBUG_SCAN("Not showing network '%s ("
 					     "%pM)' due to age (%ums).\n",
 					     print_ssid(ssid, network->ssid,
 							 network->ssid_len),
@@ -303,18 +303,18 @@ int ieee80211_wx_get_scan(struct ieee80211_device *ieee,
 	wrqu->data.length = ev - extra;
 	wrqu->data.flags = 0;
 
-	IEEE80211_DEBUG_WX("exit: %d networks returned.\n", i);
+	LIBIPW_DEBUG_WX("exit: %d networks returned.\n", i);
 
 	return err;
 }
 
-int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
+int libipw_wx_set_encode(struct libipw_device *ieee,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *keybuf)
 {
 	struct iw_point *erq = &(wrqu->encoding);
 	struct net_device *dev = ieee->dev;
-	struct ieee80211_security sec = {
+	struct libipw_security sec = {
 		.flags = 0
 	};
 	int i, key, key_provided, len;
@@ -322,7 +322,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 	int host_crypto = ieee->host_encrypt || ieee->host_decrypt || ieee->host_build_iv;
 	DECLARE_SSID_BUF(ssid);
 
-	IEEE80211_DEBUG_WX("SET_ENCODE\n");
+	LIBIPW_DEBUG_WX("SET_ENCODE\n");
 
 	key = erq->flags & IW_ENCODE_INDEX;
 	if (key) {
@@ -335,18 +335,18 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		key = ieee->crypt_info.tx_keyidx;
 	}
 
-	IEEE80211_DEBUG_WX("Key: %d [%s]\n", key, key_provided ?
+	LIBIPW_DEBUG_WX("Key: %d [%s]\n", key, key_provided ?
 			   "provided" : "default");
 
 	crypt = &ieee->crypt_info.crypt[key];
 
 	if (erq->flags & IW_ENCODE_DISABLED) {
 		if (key_provided && *crypt) {
-			IEEE80211_DEBUG_WX("Disabling encryption on key %d.\n",
+			LIBIPW_DEBUG_WX("Disabling encryption on key %d.\n",
 					   key);
 			lib80211_crypt_delayed_deinit(&ieee->crypt_info, crypt);
 		} else
-			IEEE80211_DEBUG_WX("Disabling encryption.\n");
+			LIBIPW_DEBUG_WX("Disabling encryption.\n");
 
 		/* Check all the keys to see if any are still configured,
 		 * and if no key index was provided, de-init them all */
@@ -410,7 +410,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 
 	/* If a new key was provided, set it up */
 	if (erq->length > 0) {
-#ifdef CONFIG_IEEE80211_DEBUG
+#ifdef CONFIG_LIBIPW_DEBUG
 		DECLARE_SSID_BUF(ssid);
 #endif
 
@@ -419,7 +419,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		if (len > erq->length)
 			memset(sec.keys[key] + erq->length, 0,
 			       len - erq->length);
-		IEEE80211_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n",
+		LIBIPW_DEBUG_WX("Setting key %d to '%s' (%d:%d bytes)\n",
 				   key, print_ssid(ssid, sec.keys[key], len),
 				   erq->length, len);
 		sec.key_sizes[key] = len;
@@ -438,7 +438,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 						     NULL, (*crypt)->priv);
 			if (len == 0) {
 				/* Set a default key of all 0 */
-				IEEE80211_DEBUG_WX("Setting key %d to all "
+				LIBIPW_DEBUG_WX("Setting key %d to all "
 						   "zero.\n", key);
 				memset(sec.keys[key], 0, 13);
 				(*crypt)->ops->set_key(sec.keys[key], 13, NULL,
@@ -449,7 +449,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		}
 		/* No key data - just set the default TX key index */
 		if (key_provided) {
-			IEEE80211_DEBUG_WX("Setting key %d to default Tx "
+			LIBIPW_DEBUG_WX("Setting key %d to default Tx "
 					   "key.\n", key);
 			ieee->crypt_info.tx_keyidx = key;
 			sec.active_key = key;
@@ -461,7 +461,7 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 		sec.auth_mode = ieee->open_wep ? WLAN_AUTH_OPEN :
 		    WLAN_AUTH_SHARED_KEY;
 		sec.flags |= SEC_AUTH_MODE;
-		IEEE80211_DEBUG_WX("Auth: %s\n",
+		LIBIPW_DEBUG_WX("Auth: %s\n",
 				   sec.auth_mode == WLAN_AUTH_OPEN ?
 				   "OPEN" : "SHARED KEY");
 	}
@@ -490,16 +490,16 @@ int ieee80211_wx_set_encode(struct ieee80211_device *ieee,
 	return 0;
 }
 
-int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
+int libipw_wx_get_encode(struct libipw_device *ieee,
 			    struct iw_request_info *info,
 			    union iwreq_data *wrqu, char *keybuf)
 {
 	struct iw_point *erq = &(wrqu->encoding);
 	int len, key;
 	struct lib80211_crypt_data *crypt;
-	struct ieee80211_security *sec = &ieee->sec;
+	struct libipw_security *sec = &ieee->sec;
 
-	IEEE80211_DEBUG_WX("GET_ENCODE\n");
+	LIBIPW_DEBUG_WX("GET_ENCODE\n");
 
 	key = erq->flags & IW_ENCODE_INDEX;
 	if (key) {
@@ -532,7 +532,7 @@ int ieee80211_wx_get_encode(struct ieee80211_device *ieee,
 	return 0;
 }
 
-int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
+int libipw_wx_set_encodeext(struct libipw_device *ieee,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
@@ -545,7 +545,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	struct lib80211_crypto_ops *ops;
 	struct lib80211_crypt_data **crypt;
 
-	struct ieee80211_security sec = {
+	struct libipw_security sec = {
 		.flags = 0,
 	};
 
@@ -611,7 +611,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 		module = "lib80211_crypt_ccmp";
 		break;
 	default:
-		IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n",
+		LIBIPW_DEBUG_WX("%s: unknown crypto alg %d\n",
 				   dev->name, ext->alg);
 		ret = -EINVAL;
 		goto done;
@@ -623,7 +623,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 		ops = lib80211_get_crypto_ops(alg);
 	}
 	if (ops == NULL) {
-		IEEE80211_DEBUG_WX("%s: unknown crypto alg %d\n",
+		LIBIPW_DEBUG_WX("%s: unknown crypto alg %d\n",
 				   dev->name, ext->alg);
 		ret = -EINVAL;
 		goto done;
@@ -653,7 +653,7 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	if (ext->key_len > 0 && (*crypt)->ops->set_key &&
 	    (*crypt)->ops->set_key(ext->key, ext->key_len, ext->rx_seq,
 				   (*crypt)->priv) < 0) {
-		IEEE80211_DEBUG_WX("%s: key setting failed\n", dev->name);
+		LIBIPW_DEBUG_WX("%s: key setting failed\n", dev->name);
 		ret = -EINVAL;
 		goto done;
 	}
@@ -700,20 +700,20 @@ int ieee80211_wx_set_encodeext(struct ieee80211_device *ieee,
 	if (ieee->reset_on_keychange &&
 	    ieee->iw_mode != IW_MODE_INFRA &&
 	    ieee->reset_port && ieee->reset_port(dev)) {
-		IEEE80211_DEBUG_WX("%s: reset_port failed\n", dev->name);
+		LIBIPW_DEBUG_WX("%s: reset_port failed\n", dev->name);
 		return -EINVAL;
 	}
 
 	return ret;
 }
 
-int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
+int libipw_wx_get_encodeext(struct libipw_device *ieee,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
 	struct iw_point *encoding = &wrqu->encoding;
 	struct iw_encode_ext *ext = (struct iw_encode_ext *)extra;
-	struct ieee80211_security *sec = &ieee->sec;
+	struct libipw_security *sec = &ieee->sec;
 	int idx, max_key_len;
 
 	max_key_len = encoding->length - sizeof(*ext);
@@ -763,9 +763,9 @@ int ieee80211_wx_get_encodeext(struct ieee80211_device *ieee,
 	return 0;
 }
 
-EXPORT_SYMBOL(ieee80211_wx_set_encodeext);
-EXPORT_SYMBOL(ieee80211_wx_get_encodeext);
+EXPORT_SYMBOL(libipw_wx_set_encodeext);
+EXPORT_SYMBOL(libipw_wx_get_encodeext);
 
-EXPORT_SYMBOL(ieee80211_wx_get_scan);
-EXPORT_SYMBOL(ieee80211_wx_set_encode);
-EXPORT_SYMBOL(ieee80211_wx_get_encode);
+EXPORT_SYMBOL(libipw_wx_get_scan);
+EXPORT_SYMBOL(libipw_wx_set_encode);
+EXPORT_SYMBOL(libipw_wx_get_encode);
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 2b3fbbb8669e..e9054a283fde 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -416,13 +416,13 @@ struct iw_spy_data
  * data (i.e. valid as long as struct net_device exist, same locking rules).
  */
 /* Forward declaration */
-struct ieee80211_device;
+struct libipw_device;
 /* The struct */
 struct iw_public_data {
 	/* Driver enhanced spy support */
 	struct iw_spy_data *		spy_data;
-	/* Structure managed by the in-kernel IEEE 802.11 layer */
-	struct ieee80211_device *	ieee80211;
+	/* Legacy structure managed by the ipw2x00-specific IEEE 802.11 layer */
+	struct libipw_device *		libipw;
 };
 
 /**************************** PROTOTYPES ****************************/
-- 
cgit v1.2.3


From 103bf9f7d35849bce52ad412e4da5063b0716969 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 20 Aug 2009 16:34:15 -0400
Subject: mac80211: remove ieee80211_rx namespace hack

With the libipw naming scheme change, it is no longer necessary for
mac80211 to avoid the ieee80211_rx name clash.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 11 +----------
 net/mac80211/rx.c      |  4 ++--
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index aac84d7bd46e..466859b285e1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1657,12 +1657,6 @@ void ieee80211_free_hw(struct ieee80211_hw *hw);
  */
 void ieee80211_restart_hw(struct ieee80211_hw *hw);
 
-/*
- * trick to avoid symbol clashes with the ieee80211 subsystem,
- * use the inline below instead
- */
-void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb);
-
 /**
  * ieee80211_rx - receive frame
  *
@@ -1678,10 +1672,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb);
  * @hw: the hardware this frame came in on
  * @skb: the buffer to receive, owned by mac80211 after this call
  */
-static inline void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
-{
-	__ieee80211_rx(hw, skb);
-}
+void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb);
 
 /**
  * ieee80211_rx_irqsafe - receive frame
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7065fd7e7ba2..dff2239db6e2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2440,7 +2440,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
  * This is the receive path handler. It is called by a low level driver when an
  * 802.11 MPDU is received from the hardware.
  */
-void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
+void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_rate *rate = NULL;
@@ -2523,7 +2523,7 @@ void __ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
 
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL(__ieee80211_rx);
+EXPORT_SYMBOL(ieee80211_rx);
 
 /* This is a version of the rx handler that can be called from hard irq
  * context. Post the skb on the queue and schedule the tasklet */
-- 
cgit v1.2.3


From 06e4da268c0e8f3b8408403d65e47d2885a78ff2 Mon Sep 17 00:00:00 2001
From: Gábor Stefanik <netrolller.3d@gmail.com>
Date: Wed, 26 Aug 2009 20:51:26 +0200
Subject: ssb: Implement PMU LDO control and use it in b43
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement the "PMU LDO set voltage" and "PMU LDO PA ref enable"
functions, and use them during LP-PHY baseband init in b43.

Signed-off-by: Gábor Stefanik <netrolller.3d@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/phy_lp.c         | 10 +---
 drivers/ssb/driver_chipcommon_pmu.c       | 94 +++++++++++++++++++++++++++++++
 include/linux/ssb/ssb_driver_chipcommon.h | 10 ++++
 3 files changed, 107 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/b43/phy_lp.c b/drivers/net/wireless/b43/phy_lp.c
index 1a57d3390e92..80f245c04042 100644
--- a/drivers/net/wireless/b43/phy_lp.c
+++ b/drivers/net/wireless/b43/phy_lp.c
@@ -234,19 +234,15 @@ static void lpphy_baseband_rev0_1_init(struct b43_wldev *dev)
 	if ((bus->sprom.boardflags_lo & B43_BFL_FEM) &&
 	   ((b43_current_band(dev->wl) == IEEE80211_BAND_5GHZ) ||
 	   (bus->sprom.boardflags_hi & B43_BFH_PAREF))) {
-		/* TODO:
-		 * Set the LDO voltage to 0x0028 - FIXME: What is this?
-		 * Call sb_pmu_set_ldo_voltage with 4 and the LDO voltage
-		 *      as arguments
-		 * Call sb_pmu_paref_ldo_enable with argument TRUE
-		 */
+		ssb_pmu_set_ldo_voltage(&bus->chipco, LDO_PAREF, 0x28);
+		ssb_pmu_set_ldo_paref(&bus->chipco, true);
 		if (dev->phy.rev == 0) {
 			b43_phy_maskset(dev, B43_LPPHY_LP_RF_SIGNAL_LUT,
 					0xFFCF, 0x0010);
 		}
 		b43_lptab_write(dev, B43_LPTAB16(11, 7), 60);
 	} else {
-		//TODO: Call ssb_pmu_paref_ldo_enable with argument FALSE
+		ssb_pmu_set_ldo_paref(&bus->chipco, false);
 		b43_phy_maskset(dev, B43_LPPHY_LP_RF_SIGNAL_LUT,
 				0xFFCF, 0x0020);
 		b43_lptab_write(dev, B43_LPTAB16(11, 7), 100);
diff --git a/drivers/ssb/driver_chipcommon_pmu.c b/drivers/ssb/driver_chipcommon_pmu.c
index 4aaddeec55a2..64abd11f6fbb 100644
--- a/drivers/ssb/driver_chipcommon_pmu.c
+++ b/drivers/ssb/driver_chipcommon_pmu.c
@@ -28,6 +28,21 @@ static void ssb_chipco_pll_write(struct ssb_chipcommon *cc,
 	chipco_write32(cc, SSB_CHIPCO_PLLCTL_DATA, value);
 }
 
+static void ssb_chipco_regctl_maskset(struct ssb_chipcommon *cc,
+				   u32 offset, u32 mask, u32 set)
+{
+	u32 value;
+
+	chipco_read32(cc, SSB_CHIPCO_REGCTL_ADDR);
+	chipco_write32(cc, SSB_CHIPCO_REGCTL_ADDR, offset);
+	chipco_read32(cc, SSB_CHIPCO_REGCTL_ADDR);
+	value = chipco_read32(cc, SSB_CHIPCO_REGCTL_DATA);
+	value &= mask;
+	value |= set;
+	chipco_write32(cc, SSB_CHIPCO_REGCTL_DATA, value);
+	chipco_read32(cc, SSB_CHIPCO_REGCTL_DATA);
+}
+
 struct pmu0_plltab_entry {
 	u16 freq;	/* Crystal frequency in kHz.*/
 	u8 xf;		/* Crystal frequency value for PMU control */
@@ -506,3 +521,82 @@ void ssb_pmu_init(struct ssb_chipcommon *cc)
 	ssb_pmu_pll_init(cc);
 	ssb_pmu_resources_init(cc);
 }
+
+void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc,
+			     enum ssb_pmu_ldo_volt_id id, u32 voltage)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	u32 addr, shift, mask;
+
+	switch (bus->chip_id) {
+	case 0x4328:
+	case 0x5354:
+		switch (id) {
+		case LDO_VOLT1:
+			addr = 2;
+			shift = 25;
+			mask = 0xF;
+			break;
+		case LDO_VOLT2:
+			addr = 3;
+			shift = 1;
+			mask = 0xF;
+			break;
+		case LDO_VOLT3:
+			addr = 3;
+			shift = 9;
+			mask = 0xF;
+			break;
+		case LDO_PAREF:
+			addr = 3;
+			shift = 17;
+			mask = 0x3F;
+			break;
+		default:
+			SSB_WARN_ON(1);
+			return;
+		}
+		break;
+	case 0x4312:
+		if (SSB_WARN_ON(id != LDO_PAREF))
+			return;
+		addr = 0;
+		shift = 21;
+		mask = 0x3F;
+		break;
+	default:
+		return;
+	}
+
+	ssb_chipco_regctl_maskset(cc, addr, ~(mask << shift),
+				  (voltage & mask) << shift);
+}
+
+void ssb_pmu_set_ldo_paref(struct ssb_chipcommon *cc, bool on)
+{
+	struct ssb_bus *bus = cc->dev->bus;
+	int ldo;
+
+	switch (bus->chip_id) {
+	case 0x4312:
+		ldo = SSB_PMURES_4312_PA_REF_LDO;
+		break;
+	case 0x4328:
+		ldo = SSB_PMURES_4328_PA_REF_LDO;
+		break;
+	case 0x5354:
+		ldo = SSB_PMURES_5354_PA_REF_LDO;
+		break;
+	default:
+		return;
+	}
+
+	if (on)
+		chipco_set32(cc, SSB_CHIPCO_PMU_MINRES_MSK, 1 << ldo);
+	else
+		chipco_mask32(cc, SSB_CHIPCO_PMU_MINRES_MSK, ~(1 << ldo));
+	chipco_read32(cc, SSB_CHIPCO_PMU_MINRES_MSK); //SPEC FIXME found via mmiotrace - dummy read?
+}
+
+EXPORT_SYMBOL(ssb_pmu_set_ldo_voltage);
+EXPORT_SYMBOL(ssb_pmu_set_ldo_paref);
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index d3b1d18922f2..4e27acf0a92f 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -629,5 +629,15 @@ extern int ssb_chipco_serial_init(struct ssb_chipcommon *cc,
 /* PMU support */
 extern void ssb_pmu_init(struct ssb_chipcommon *cc);
 
+enum ssb_pmu_ldo_volt_id {
+	LDO_PAREF = 0,
+	LDO_VOLT1,
+	LDO_VOLT2,
+	LDO_VOLT3,
+};
+
+void ssb_pmu_set_ldo_voltage(struct ssb_chipcommon *cc,
+			     enum ssb_pmu_ldo_volt_id id, u32 voltage);
+void ssb_pmu_set_ldo_paref(struct ssb_chipcommon *cc, bool on);
 
 #endif /* LINUX_SSB_CHIPCO_H_ */
-- 
cgit v1.2.3


From b24aad44438d5bc21cbbfb94a99d9bf710d8295b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 24 Jul 2009 13:30:17 +0800
Subject: ACPICA: Split large ACPI table header

Split out the non-acpi-defined ACPI tables into the existing
(but empty) actbl2.h file. Preparation for new ACPI 4.0 tables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  |  35 +--
 include/acpi/actbl1.h | 553 +----------------------------------------------
 include/acpi/actbl2.h | 585 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 611 insertions(+), 562 deletions(-)
 create mode 100644 include/acpi/actbl2.h

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 0649a5670026..55fcfc6725b2 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -44,6 +44,19 @@
 #ifndef __ACTBL_H__
 #define __ACTBL_H__
 
+/*******************************************************************************
+ *
+ * Fundamental ACPI tables
+ *
+ * This file contains definitions for the ACPI tables that are directly consumed
+ * by ACPICA. All other tables are consumed by the OS-dependent ACPI-related
+ * device drivers and other OS support code.
+ *
+ * The RSDP and FACS do not use the common ACPI table header. All other ACPI
+ * tables use the header.
+ *
+ ******************************************************************************/
+
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
@@ -65,11 +78,6 @@
 #pragma pack(1)
 
 /*
- * These are the ACPI tables that are directly consumed by the subsystem.
- *
- * The RSDP and FACS do not use the common ACPI table header. All other ACPI
- * tables use the header.
- *
  * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
  * This is the only type that is even remotely portable. Anything else is not
  * portable, so do not use any other bitfield types.
@@ -77,9 +85,8 @@
 
 /*******************************************************************************
  *
- * ACPI Table Header. This common header is used by all tables except the
- * RSDP and FACS. The define is used for direct inclusion of header into
- * other ACPI tables
+ * Master ACPI Table Header. This common header is used by all ACPI tables
+ * except the RSDP and FACS.
  *
  ******************************************************************************/
 
@@ -95,13 +102,16 @@ struct acpi_table_header {
 	u32 asl_compiler_revision;	/* ASL compiler version */
 };
 
-/*
+/*******************************************************************************
+ *
  * GAS - Generic Address Structure (ACPI 2.0+)
  *
  * Note: Since this structure is used in the ACPI tables, it is byte aligned.
- * If misalignment is not supported, access to the Address field must be
- * performed with care.
- */
+ * If misaliged access is not supported by the hardware, accesses to the
+ * 64-bit Address field must be performed with care.
+ *
+ ******************************************************************************/
+
 struct acpi_generic_address {
 	u8 space_id;		/* Address space where struct or register exists */
 	u8 bit_width;		/* Size in bits of given register */
@@ -325,5 +335,6 @@ struct acpi_table_desc {
  */
 
 #include <acpi/actbl1.h>
+#include <acpi/actbl2.h>
 
 #endif				/* __ACTBL_H__ */
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index ec36693f868c..582af1fcb8f5 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -46,41 +46,29 @@
 
 /*******************************************************************************
  *
- * Additional ACPI Tables
+ * Additional ACPI Tables (1)
  *
  * These tables are not consumed directly by the ACPICA subsystem, but are
  * included here to support device drivers and the AML disassembler.
  *
+ * The tables in this file are fully defined within the ACPI specification.
+ *
  ******************************************************************************/
 
 /*
  * Values for description table header signatures. Useful because they make
  * it more difficult to inadvertently type in the wrong signature.
  */
-#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
-#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
-#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
-#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_ECDT           "ECDT"	/* Embedded Controller Boot Resources Table */
 #define ACPI_SIG_EINJ           "EINJ"	/* Error Injection table */
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
-#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
-#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
-#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
-#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
-#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
-#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
-#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
-#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
-#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
-#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
 /*
  * All tables must be byte-packed to match the ACPI specification, since
@@ -113,115 +101,6 @@ struct acpi_whea_header {
 	u64 mask;		/* Bitmask required for this register instruction */
 };
 
-/*******************************************************************************
- *
- * ASF - Alert Standard Format table (Signature "ASF!")
- *
- * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
- *
- ******************************************************************************/
-
-struct acpi_table_asf {
-	struct acpi_table_header header;	/* Common ACPI table header */
-};
-
-/* ASF subtable header */
-
-struct acpi_asf_header {
-	u8 type;
-	u8 reserved;
-	u16 length;
-};
-
-/* Values for Type field above */
-
-enum acpi_asf_type {
-	ACPI_ASF_TYPE_INFO = 0,
-	ACPI_ASF_TYPE_ALERT = 1,
-	ACPI_ASF_TYPE_CONTROL = 2,
-	ACPI_ASF_TYPE_BOOT = 3,
-	ACPI_ASF_TYPE_ADDRESS = 4,
-	ACPI_ASF_TYPE_RESERVED = 5
-};
-
-/*
- * ASF subtables
- */
-
-/* 0: ASF Information */
-
-struct acpi_asf_info {
-	struct acpi_asf_header header;
-	u8 min_reset_value;
-	u8 min_poll_interval;
-	u16 system_id;
-	u32 mfg_id;
-	u8 flags;
-	u8 reserved2[3];
-};
-
-/* 1: ASF Alerts */
-
-struct acpi_asf_alert {
-	struct acpi_asf_header header;
-	u8 assert_mask;
-	u8 deassert_mask;
-	u8 alerts;
-	u8 data_length;
-};
-
-struct acpi_asf_alert_data {
-	u8 address;
-	u8 command;
-	u8 mask;
-	u8 value;
-	u8 sensor_type;
-	u8 type;
-	u8 offset;
-	u8 source_type;
-	u8 severity;
-	u8 sensor_number;
-	u8 entity;
-	u8 instance;
-};
-
-/* 2: ASF Remote Control */
-
-struct acpi_asf_remote {
-	struct acpi_asf_header header;
-	u8 controls;
-	u8 data_length;
-	u16 reserved2;
-};
-
-struct acpi_asf_control_data {
-	u8 function;
-	u8 address;
-	u8 command;
-	u8 value;
-};
-
-/* 3: ASF RMCP Boot Options */
-
-struct acpi_asf_rmcp {
-	struct acpi_asf_header header;
-	u8 capabilities[7];
-	u8 completion_code;
-	u32 enterprise_id;
-	u8 command;
-	u16 parameter;
-	u16 boot_options;
-	u16 oem_parameters;
-};
-
-/* 4: ASF Address */
-
-struct acpi_asf_address {
-	struct acpi_asf_header header;
-	u8 eprom_address;
-	u8 devices;
-};
-
 /*******************************************************************************
  *
  * BERT - Boot Error Record Table
@@ -251,18 +130,6 @@ struct acpi_bert_region {
 #define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (4)
 #define ACPI_BERT_MULTIPLE_CORRECTABLE      (8)
 
-/*******************************************************************************
- *
- * BOOT - Simple Boot Flag Table
- *
- ******************************************************************************/
-
-struct acpi_table_boot {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
-	u8 reserved[3];
-};
-
 /*******************************************************************************
  *
  * CPEP - Corrected Platform Error Polling table
@@ -284,123 +151,6 @@ struct acpi_cpep_polling {
 	u32 interval;		/* Polling interval (msec) */
 };
 
-/*******************************************************************************
- *
- * DBGP - Debug Port table
- *
- ******************************************************************************/
-
-struct acpi_table_dbgp {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 type;		/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address debug_port;
-};
-
-/*******************************************************************************
- *
- * DMAR - DMA Remapping table
- *	  From "Intel Virtualization Technology for Directed I/O", Sept. 2007
- *
- ******************************************************************************/
-
-struct acpi_table_dmar {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 width;		/* Host Address Width */
-	u8 flags;
-	u8 reserved[10];
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INTR_REMAP	    (1)
-
-/* DMAR subtable header */
-
-struct acpi_dmar_header {
-	u16 type;
-	u16 length;
-};
-
-/* Values for subtable type in struct acpi_dmar_header */
-
-enum acpi_dmar_type {
-	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
-	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
-	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
-};
-
-struct acpi_dmar_device_scope {
-	u8 entry_type;
-	u8 length;
-	u16 reserved;
-	u8 enumeration_id;
-	u8 bus;
-};
-
-/* Values for entry_type in struct acpi_dmar_device_scope */
-
-enum acpi_dmar_scope_type {
-	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
-	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
-	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
-	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
-	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
-	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
-};
-
-struct acpi_dmar_pci_path {
-	u8 dev;
-	u8 fn;
-};
-
-/*
- * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
- */
-
-/* 0: Hardware Unit Definition */
-
-struct acpi_dmar_hardware_unit {
-	struct acpi_dmar_header header;
-	u8 flags;
-	u8 reserved;
-	u16 segment;
-	u64 address;		/* Register Base Address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_INCLUDE_ALL       (1)
-
-/* 1: Reserved Memory Defininition */
-
-struct acpi_dmar_reserved_memory {
-	struct acpi_dmar_header header;
-	u16 reserved;
-	u16 segment;
-	u64 base_address;		/* 4_k aligned base address */
-	u64 end_address;	/* 4_k aligned limit address */
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALLOW_ALL         (1)
-
-
-/* 2: Root Port ATS Capability Reporting Structure */
-
-struct acpi_dmar_atsr {
-       struct acpi_dmar_header header;
-       u8 flags;
-       u8 reserved;
-       u16 segment;
-};
-
-/* Flags */
-
-#define ACPI_DMAR_ALL_PORTS	    (1)
-
 /*******************************************************************************
  *
  * ECDT - Embedded Controller Boot Resources Table
@@ -762,119 +512,6 @@ struct acpi_hest_generic {
 	u32 error_status_block_length;
 };
 
-/*******************************************************************************
- *
- * HPET - High Precision Event Timer table
- *
- ******************************************************************************/
-
-struct acpi_table_hpet {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 id;			/* Hardware ID of event timer block */
-	struct acpi_generic_address address;	/* Address of event timer block */
-	u8 sequence;		/* HPET sequence number */
-	u16 minimum_tick;	/* Main counter min tick, periodic mode */
-	u8 flags;
-};
-
-/*! Flags */
-
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
-
-/*! [End] no source code translation !*/
-
-/*******************************************************************************
- *
- * IBFT - Boot Firmware Table
- *
- ******************************************************************************/
-
-struct acpi_table_ibft {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[12];
-};
-
-/* IBFT common subtable header */
-
-struct acpi_ibft_header {
-	u8 type;
-	u8 version;
-	u16 length;
-	u8 index;
-	u8 flags;
-};
-
-/* Values for Type field above */
-
-enum acpi_ibft_type {
-	ACPI_IBFT_TYPE_NOT_USED = 0,
-	ACPI_IBFT_TYPE_CONTROL = 1,
-	ACPI_IBFT_TYPE_INITIATOR = 2,
-	ACPI_IBFT_TYPE_NIC = 3,
-	ACPI_IBFT_TYPE_TARGET = 4,
-	ACPI_IBFT_TYPE_EXTENSIONS = 5,
-	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
-};
-
-/* IBFT subtables */
-
-struct acpi_ibft_control {
-	struct acpi_ibft_header header;
-	u16 extensions;
-	u16 initiator_offset;
-	u16 nic0_offset;
-	u16 target0_offset;
-	u16 nic1_offset;
-	u16 target1_offset;
-};
-
-struct acpi_ibft_initiator {
-	struct acpi_ibft_header header;
-	u8 sns_server[16];
-	u8 slp_server[16];
-	u8 primary_server[16];
-	u8 secondary_server[16];
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_nic {
-	struct acpi_ibft_header header;
-	u8 ip_address[16];
-	u8 subnet_mask_prefix;
-	u8 origin;
-	u8 gateway[16];
-	u8 primary_dns[16];
-	u8 secondary_dns[16];
-	u8 dhcp[16];
-	u16 vlan;
-	u8 mac_address[6];
-	u16 pci_address;
-	u16 name_length;
-	u16 name_offset;
-};
-
-struct acpi_ibft_target {
-	struct acpi_ibft_header header;
-	u8 target_ip_address[16];
-	u16 target_ip_socket;
-	u8 target_boot_lun[8];
-	u8 chap_type;
-	u8 nic_association;
-	u16 target_name_length;
-	u16 target_name_offset;
-	u16 chap_name_length;
-	u16 chap_name_offset;
-	u16 chap_secret_length;
-	u16 chap_secret_offset;
-	u16 reverse_chap_name_length;
-	u16 reverse_chap_name_offset;
-	u16 reverse_chap_secret_length;
-	u16 reverse_chap_secret_offset;
-};
-
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
@@ -1056,27 +693,6 @@ struct acpi_madt_local_x2apic_nmi {
 #define ACPI_MADT_TRIGGER_RESERVED        (2<<2)
 #define ACPI_MADT_TRIGGER_LEVEL           (3<<2)
 
-/*******************************************************************************
- *
- * MCFG - PCI Memory Mapped Configuration table and sub-table
- *
- ******************************************************************************/
-
-struct acpi_table_mcfg {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved[8];
-};
-
-/* Subtable */
-
-struct acpi_mcfg_allocation {
-	u64 address;		/* Base address, processor-relative */
-	u16 pci_segment;	/* PCI segment group number */
-	u8 start_bus_number;	/* Starting PCI Bus number */
-	u8 end_bus_number;	/* Final PCI Bus number */
-	u32 reserved;
-};
-
 /*******************************************************************************
  *
  * SBST - Smart Battery Specification Table
@@ -1102,59 +718,6 @@ struct acpi_table_slit {
 	u8 entry[1];		/* Real size = localities^2 */
 };
 
-/*******************************************************************************
- *
- * SPCR - Serial Port Console Redirection table
- *
- ******************************************************************************/
-
-struct acpi_table_spcr {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
-	u8 reserved[3];
-	struct acpi_generic_address serial_port;
-	u8 interrupt_type;
-	u8 pc_interrupt;
-	u32 interrupt;
-	u8 baud_rate;
-	u8 parity;
-	u8 stop_bits;
-	u8 flow_control;
-	u8 terminal_type;
-	u8 reserved1;
-	u16 pci_device_id;
-	u16 pci_vendor_id;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-	u32 pci_flags;
-	u8 pci_segment;
-	u32 reserved2;
-};
-
-/*******************************************************************************
- *
- * SPMI - Server Platform Management Interface table
- *
- ******************************************************************************/
-
-struct acpi_table_spmi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
-	u8 interface_type;
-	u16 spec_revision;	/* Version of IPMI */
-	u8 interrupt_type;
-	u8 gpe_number;		/* GPE assigned */
-	u8 reserved1;
-	u8 pci_device_flag;
-	u32 interrupt;
-	struct acpi_generic_address ipmi_register;
-	u8 pci_segment;
-	u8 pci_bus;
-	u8 pci_device;
-	u8 pci_function;
-};
-
 /*******************************************************************************
  *
  * SRAT - System Resource Affinity Table
@@ -1227,116 +790,6 @@ struct acpi_srat_x2apic_cpu_affinity {
 
 #define ACPI_SRAT_CPU_ENABLED       (1)	/* 00: Use affinity structure */
 
-/*******************************************************************************
- *
- * TCPA - Trusted Computing Platform Alliance table
- *
- ******************************************************************************/
-
-struct acpi_table_tcpa {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u16 reserved;
-	u32 max_log_length;	/* Maximum length for the event log area */
-	u64 log_address;	/* Address of the event log area */
-};
-
-/*******************************************************************************
- *
- * UEFI - UEFI Boot optimization Table
- *
- ******************************************************************************/
-
-struct acpi_table_uefi {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 identifier[16];	/* UUID identifier */
-	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
-};
-
-/*******************************************************************************
- *
- * WDAT - Watchdog Action Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdat {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u16 pci_segment;	/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u8 reserved[3];
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved2[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* WDAT Instruction Entries (actions) */
-
-struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
-};
-
-/* Values for Action field above */
-
-enum acpi_wdat_actions {
-	ACPI_WDAT_RESET = 1,
-	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
-	ACPI_WDAT_GET_COUNTDOWN = 5,
-	ACPI_WDAT_SET_COUNTDOWN = 6,
-	ACPI_WDAT_GET_RUNNING_STATE = 8,
-	ACPI_WDAT_SET_RUNNING_STATE = 9,
-	ACPI_WDAT_GET_STOPPED_STATE = 10,
-	ACPI_WDAT_SET_STOPPED_STATE = 11,
-	ACPI_WDAT_GET_REBOOT = 16,
-	ACPI_WDAT_SET_REBOOT = 17,
-	ACPI_WDAT_GET_SHUTDOWN = 18,
-	ACPI_WDAT_SET_SHUTDOWN = 19,
-	ACPI_WDAT_GET_STATUS = 32,
-	ACPI_WDAT_SET_STATUS = 33,
-	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
-};
-
-/* Values for Instruction field above */
-
-enum acpi_wdat_instructions {
-	ACPI_WDAT_READ_VALUE = 0,
-	ACPI_WDAT_READ_COUNTDOWN = 1,
-	ACPI_WDAT_WRITE_VALUE = 2,
-	ACPI_WDAT_WRITE_COUNTDOWN = 3,
-	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
-	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
-};
-
-/*******************************************************************************
- *
- * WDRT - Watchdog Resource Table
- *
- ******************************************************************************/
-
-struct acpi_table_wdrt {
-	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
-	u8 pci_bus;		/* PCI Bus number */
-	u8 pci_device;		/* PCI Device number */
-	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
-};
-
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
new file mode 100644
index 000000000000..b271aba0e524
--- /dev/null
+++ b/include/acpi/actbl2.h
@@ -0,0 +1,585 @@
+#ifndef __ACTBL2_H__
+#define __ACTBL2_H__
+
+/*******************************************************************************
+ *
+ * Additional ACPI Tables (2)
+ *
+ * These tables are not consumed directly by the ACPICA subsystem, but are
+ * included here to support device drivers and the AML disassembler.
+ *
+ * The tables in this file are defined by third-party specifications, and are
+ * not defined directly by the ACPI specification itself.
+ *
+ ******************************************************************************/
+
+/*
+ * Values for description table header signatures. Useful because they make
+ * it more difficult to inadvertently type in the wrong signature.
+ */
+#define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
+#define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
+#define ACPI_SIG_DBGP           "DBGP"	/* Debug Port table */
+#define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
+#define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
+#define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
+#define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
+#define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
+#define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
+#define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
+#define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
+#define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
+
+/*
+ * All tables must be byte-packed to match the ACPI specification, since
+ * the tables are provided by the system BIOS.
+ */
+#pragma pack(1)
+
+/*
+ * Note about bitfields: The u8 type is used for bitfields in ACPI tables.
+ * This is the only type that is even remotely portable. Anything else is not
+ * portable, so do not use any other bitfield types.
+ */
+
+/*******************************************************************************
+ *
+ * ASF - Alert Standard Format table (Signature "ASF!")
+ *
+ * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
+ *
+ ******************************************************************************/
+
+struct acpi_table_asf {
+	struct acpi_table_header header;	/* Common ACPI table header */
+};
+
+/* ASF subtable header */
+
+struct acpi_asf_header {
+	u8 type;
+	u8 reserved;
+	u16 length;
+};
+
+/* Values for Type field above */
+
+enum acpi_asf_type {
+	ACPI_ASF_TYPE_INFO = 0,
+	ACPI_ASF_TYPE_ALERT = 1,
+	ACPI_ASF_TYPE_CONTROL = 2,
+	ACPI_ASF_TYPE_BOOT = 3,
+	ACPI_ASF_TYPE_ADDRESS = 4,
+	ACPI_ASF_TYPE_RESERVED = 5
+};
+
+/*
+ * ASF subtables
+ */
+
+/* 0: ASF Information */
+
+struct acpi_asf_info {
+	struct acpi_asf_header header;
+	u8 min_reset_value;
+	u8 min_poll_interval;
+	u16 system_id;
+	u32 mfg_id;
+	u8 flags;
+	u8 reserved2[3];
+};
+
+/* 1: ASF Alerts */
+
+struct acpi_asf_alert {
+	struct acpi_asf_header header;
+	u8 assert_mask;
+	u8 deassert_mask;
+	u8 alerts;
+	u8 data_length;
+};
+
+struct acpi_asf_alert_data {
+	u8 address;
+	u8 command;
+	u8 mask;
+	u8 value;
+	u8 sensor_type;
+	u8 type;
+	u8 offset;
+	u8 source_type;
+	u8 severity;
+	u8 sensor_number;
+	u8 entity;
+	u8 instance;
+};
+
+/* 2: ASF Remote Control */
+
+struct acpi_asf_remote {
+	struct acpi_asf_header header;
+	u8 controls;
+	u8 data_length;
+	u16 reserved2;
+};
+
+struct acpi_asf_control_data {
+	u8 function;
+	u8 address;
+	u8 command;
+	u8 value;
+};
+
+/* 3: ASF RMCP Boot Options */
+
+struct acpi_asf_rmcp {
+	struct acpi_asf_header header;
+	u8 capabilities[7];
+	u8 completion_code;
+	u32 enterprise_id;
+	u8 command;
+	u16 parameter;
+	u16 boot_options;
+	u16 oem_parameters;
+};
+
+/* 4: ASF Address */
+
+struct acpi_asf_address {
+	struct acpi_asf_header header;
+	u8 eprom_address;
+	u8 devices;
+};
+
+/*******************************************************************************
+ *
+ * BOOT - Simple Boot Flag Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_boot {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 cmos_index;		/* Index in CMOS RAM for the boot register */
+	u8 reserved[3];
+};
+
+/*******************************************************************************
+ *
+ * DBGP - Debug Port table
+ *
+ ******************************************************************************/
+
+struct acpi_table_dbgp {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 type;		/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address debug_port;
+};
+
+/*******************************************************************************
+ *
+ * DMAR - DMA Remapping table
+ *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *
+ ******************************************************************************/
+
+struct acpi_table_dmar {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 width;		/* Host Address Width */
+	u8 flags;
+	u8 reserved[10];
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INTR_REMAP        (1)
+
+/* DMAR subtable header */
+
+struct acpi_dmar_header {
+	u16 type;
+	u16 length;
+};
+
+/* Values for subtable type in struct acpi_dmar_header */
+
+enum acpi_dmar_type {
+	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
+	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
+	ACPI_DMAR_TYPE_ATSR = 2,
+	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+struct acpi_dmar_device_scope {
+	u8 entry_type;
+	u8 length;
+	u16 reserved;
+	u8 enumeration_id;
+	u8 bus;
+};
+
+/* Values for entry_type in struct acpi_dmar_device_scope */
+
+enum acpi_dmar_scope_type {
+	ACPI_DMAR_SCOPE_TYPE_NOT_USED = 0,
+	ACPI_DMAR_SCOPE_TYPE_ENDPOINT = 1,
+	ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2,
+	ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3,
+	ACPI_DMAR_SCOPE_TYPE_HPET = 4,
+	ACPI_DMAR_SCOPE_TYPE_RESERVED = 5	/* 5 and greater are reserved */
+};
+
+struct acpi_dmar_pci_path {
+	u8 dev;
+	u8 fn;
+};
+
+/*
+ * DMAR Sub-tables, correspond to Type in struct acpi_dmar_header
+ */
+
+/* 0: Hardware Unit Definition */
+
+struct acpi_dmar_hardware_unit {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+	u64 address;		/* Register Base Address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_INCLUDE_ALL       (1)
+
+/* 1: Reserved Memory Defininition */
+
+struct acpi_dmar_reserved_memory {
+	struct acpi_dmar_header header;
+	u16 reserved;
+	u16 segment;
+	u64 base_address;	/* 4_k aligned base address */
+	u64 end_address;	/* 4_k aligned limit address */
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALLOW_ALL         (1)
+
+/* 2: Root Port ATS Capability Reporting Structure */
+
+struct acpi_dmar_atsr {
+	struct acpi_dmar_header header;
+	u8 flags;
+	u8 reserved;
+	u16 segment;
+};
+
+/* Flags */
+
+#define ACPI_DMAR_ALL_PORTS         (1)
+
+/*******************************************************************************
+ *
+ * HPET - High Precision Event Timer table
+ *
+ ******************************************************************************/
+
+struct acpi_table_hpet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 id;			/* Hardware ID of event timer block */
+	struct acpi_generic_address address;	/* Address of event timer block */
+	u8 sequence;		/* HPET sequence number */
+	u16 minimum_tick;	/* Main counter min tick, periodic mode */
+	u8 flags;
+};
+
+/*! Flags */
+
+#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
+#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+
+/*! [End] no source code translation !*/
+
+/*******************************************************************************
+ *
+ * IBFT - Boot Firmware Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_ibft {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[12];
+};
+
+/* IBFT common subtable header */
+
+struct acpi_ibft_header {
+	u8 type;
+	u8 version;
+	u16 length;
+	u8 index;
+	u8 flags;
+};
+
+/* Values for Type field above */
+
+enum acpi_ibft_type {
+	ACPI_IBFT_TYPE_NOT_USED = 0,
+	ACPI_IBFT_TYPE_CONTROL = 1,
+	ACPI_IBFT_TYPE_INITIATOR = 2,
+	ACPI_IBFT_TYPE_NIC = 3,
+	ACPI_IBFT_TYPE_TARGET = 4,
+	ACPI_IBFT_TYPE_EXTENSIONS = 5,
+	ACPI_IBFT_TYPE_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* IBFT subtables */
+
+struct acpi_ibft_control {
+	struct acpi_ibft_header header;
+	u16 extensions;
+	u16 initiator_offset;
+	u16 nic0_offset;
+	u16 target0_offset;
+	u16 nic1_offset;
+	u16 target1_offset;
+};
+
+struct acpi_ibft_initiator {
+	struct acpi_ibft_header header;
+	u8 sns_server[16];
+	u8 slp_server[16];
+	u8 primary_server[16];
+	u8 secondary_server[16];
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_nic {
+	struct acpi_ibft_header header;
+	u8 ip_address[16];
+	u8 subnet_mask_prefix;
+	u8 origin;
+	u8 gateway[16];
+	u8 primary_dns[16];
+	u8 secondary_dns[16];
+	u8 dhcp[16];
+	u16 vlan;
+	u8 mac_address[6];
+	u16 pci_address;
+	u16 name_length;
+	u16 name_offset;
+};
+
+struct acpi_ibft_target {
+	struct acpi_ibft_header header;
+	u8 target_ip_address[16];
+	u16 target_ip_socket;
+	u8 target_boot_lun[8];
+	u8 chap_type;
+	u8 nic_association;
+	u16 target_name_length;
+	u16 target_name_offset;
+	u16 chap_name_length;
+	u16 chap_name_offset;
+	u16 chap_secret_length;
+	u16 chap_secret_offset;
+	u16 reverse_chap_name_length;
+	u16 reverse_chap_name_offset;
+	u16 reverse_chap_secret_length;
+	u16 reverse_chap_secret_offset;
+};
+
+/*******************************************************************************
+ *
+ * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *
+ ******************************************************************************/
+
+struct acpi_table_mcfg {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved[8];
+};
+
+/* Subtable */
+
+struct acpi_mcfg_allocation {
+	u64 address;		/* Base address, processor-relative */
+	u16 pci_segment;	/* PCI segment group number */
+	u8 start_bus_number;	/* Starting PCI Bus number */
+	u8 end_bus_number;	/* Final PCI Bus number */
+	u32 reserved;
+};
+
+/*******************************************************************************
+ *
+ * SPCR - Serial Port Console Redirection table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spcr {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 interface_type;	/* 0=full 16550, 1=subset of 16550 */
+	u8 reserved[3];
+	struct acpi_generic_address serial_port;
+	u8 interrupt_type;
+	u8 pc_interrupt;
+	u32 interrupt;
+	u8 baud_rate;
+	u8 parity;
+	u8 stop_bits;
+	u8 flow_control;
+	u8 terminal_type;
+	u8 reserved1;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+	u32 pci_flags;
+	u8 pci_segment;
+	u32 reserved2;
+};
+
+/*******************************************************************************
+ *
+ * SPMI - Server Platform Management Interface table
+ *
+ ******************************************************************************/
+
+struct acpi_table_spmi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 reserved;
+	u8 interface_type;
+	u16 spec_revision;	/* Version of IPMI */
+	u8 interrupt_type;
+	u8 gpe_number;		/* GPE assigned */
+	u8 reserved1;
+	u8 pci_device_flag;
+	u32 interrupt;
+	struct acpi_generic_address ipmi_register;
+	u8 pci_segment;
+	u8 pci_bus;
+	u8 pci_device;
+	u8 pci_function;
+};
+
+/*******************************************************************************
+ *
+ * TCPA - Trusted Computing Platform Alliance table
+ *
+ ******************************************************************************/
+
+struct acpi_table_tcpa {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u16 reserved;
+	u32 max_log_length;	/* Maximum length for the event log area */
+	u64 log_address;	/* Address of the event log area */
+};
+
+/*******************************************************************************
+ *
+ * UEFI - UEFI Boot optimization Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_uefi {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u8 identifier[16];	/* UUID identifier */
+	u16 data_offset;	/* Offset of remaining data in table */
+	u8 data;
+};
+
+/*******************************************************************************
+ *
+ * WDAT - Watchdog Action Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdat {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u16 pci_segment;	/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u8 reserved[3];
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved2[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* WDAT Instruction Entries (actions) */
+
+struct acpi_wdat_entry {
+	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+};
+
+/* Values for Action field above */
+
+enum acpi_wdat_actions {
+	ACPI_WDAT_RESET = 1,
+	ACPI_WDAT_GET_CURRENT_COUNTDOWN = 4,
+	ACPI_WDAT_GET_COUNTDOWN = 5,
+	ACPI_WDAT_SET_COUNTDOWN = 6,
+	ACPI_WDAT_GET_RUNNING_STATE = 8,
+	ACPI_WDAT_SET_RUNNING_STATE = 9,
+	ACPI_WDAT_GET_STOPPED_STATE = 10,
+	ACPI_WDAT_SET_STOPPED_STATE = 11,
+	ACPI_WDAT_GET_REBOOT = 16,
+	ACPI_WDAT_SET_REBOOT = 17,
+	ACPI_WDAT_GET_SHUTDOWN = 18,
+	ACPI_WDAT_SET_SHUTDOWN = 19,
+	ACPI_WDAT_GET_STATUS = 32,
+	ACPI_WDAT_SET_STATUS = 33,
+	ACPI_WDAT_ACTION_RESERVED = 34	/* 34 and greater are reserved */
+};
+
+/* Values for Instruction field above */
+
+enum acpi_wdat_instructions {
+	ACPI_WDAT_READ_VALUE = 0,
+	ACPI_WDAT_READ_COUNTDOWN = 1,
+	ACPI_WDAT_WRITE_VALUE = 2,
+	ACPI_WDAT_WRITE_COUNTDOWN = 3,
+	ACPI_WDAT_INSTRUCTION_RESERVED = 4,	/* 4 and greater are reserved */
+	ACPI_WDAT_PRESERVE_REGISTER = 0x80	/* Except for this value */
+};
+
+/*******************************************************************************
+ *
+ * WDRT - Watchdog Resource Table
+ *
+ ******************************************************************************/
+
+struct acpi_table_wdrt {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 header_length;	/* Watchdog Header Length */
+	u8 pci_segment;		/* PCI Segment number */
+	u8 pci_bus;		/* PCI Bus number */
+	u8 pci_device;		/* PCI Device number */
+	u8 pci_function;	/* PCI Function number */
+	u32 timer_period;	/* Period of one timer count (msec) */
+	u32 max_count;		/* Maximum counter value supported */
+	u32 min_count;		/* Minimum counter value */
+	u8 flags;
+	u8 reserved[3];
+	u32 entries;		/* Number of watchdog entries that follow */
+};
+
+/* Flags */
+
+#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
+
+/* Reset to default packing */
+
+#pragma pack()
+
+#endif				/* __ACTBL2_H__ */
-- 
cgit v1.2.3


From 6e2d5ebd0d36199920676fdceaff4f4bfe66297b Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 10:53:00 +0800
Subject: ACPICA: ACPI 4: Update headers for new and changed ACPI tables.

Add IVRS,MSCT,UEFI,WAET,WDAT.
Updated several existing tables for ACPI 4.0-related changes.
Added document references for all tables not defined in ACPI spec.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl.h  |  30 +++--
 include/acpi/actbl1.h | 339 +++++++++++++++++++++++++++++++++-----------------
 include/acpi/actbl2.h | 339 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 557 insertions(+), 151 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h
index 55fcfc6725b2..1b6587952604 100644
--- a/include/acpi/actbl.h
+++ b/include/acpi/actbl.h
@@ -58,8 +58,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_DSDT           "DSDT"	/* Differentiated System Description Table */
 #define ACPI_SIG_FADT           "FACP"	/* Fixed ACPI Description Table */
@@ -123,6 +124,7 @@ struct acpi_generic_address {
 /*******************************************************************************
  *
  * RSDP - Root System Description Pointer (Signature is "RSD PTR ")
+ *        Version 2
  *
  ******************************************************************************/
 
@@ -143,6 +145,7 @@ struct acpi_table_rsdp {
 /*******************************************************************************
  *
  * RSDT/XSDT - Root System Description Tables
+ *             Version 1 (both)
  *
  ******************************************************************************/
 
@@ -176,23 +179,24 @@ struct acpi_table_facs {
 	u8 reserved1[24];	/* Reserved, must be zero */
 };
 
-/* global_lock flags */
+/* Masks for global_lock flag field above */
 
 #define ACPI_GLOCK_PENDING          (1)	/* 00: Pending global lock ownership */
 #define ACPI_GLOCK_OWNED            (1<<1)	/* 01: Global lock is owned */
 
-/* Flags  */
+/* Masks for Flags field above  */
 
 #define ACPI_FACS_S4_BIOS_PRESENT   (1)	/* 00: S4BIOS support is present */
 #define ACPI_FACS_64BIT_WAKE        (1<<1)	/* 01: 64-bit wake vector supported (ACPI 4.0) */
 
-/* ospm_flags */
+/* Masks for ospm_flags field above */
 
 #define ACPI_FACS_64BIT_ENVIRONMENT (1)	/* 00: 64-bit wake environment is required (ACPI 4.0) */
 
 /*******************************************************************************
  *
  * FADT - Fixed ACPI Description Table (Signature "FACP")
+ *        Version 4
  *
  ******************************************************************************/
 
@@ -253,7 +257,7 @@ struct acpi_table_fadt {
 	struct acpi_generic_address xgpe1_block;	/* 64-bit Extended General Purpose Event 1 Reg Blk address */
 };
 
-/* FADT Boot Architecture Flags (boot_flags) */
+/* Masks for FADT Boot Architecture Flags (boot_flags) */
 
 #define ACPI_FADT_LEGACY_DEVICES    (1)  	/* 00: [V2] System has LPC or ISA bus devices */
 #define ACPI_FADT_8042              (1<<1)	/* 01: [V3] System has an 8042 controller on port 60/64 */
@@ -263,7 +267,7 @@ struct acpi_table_fadt {
 
 #define FADT2_REVISION_ID               3
 
-/* FADT flags */
+/* Masks for FADT flags */
 
 #define ACPI_FADT_WBINVD            (1)	/* 00: [V1] The wbinvd instruction works properly */
 #define ACPI_FADT_WBINVD_FLUSH      (1<<1)	/* 01: [V1] wbinvd flushes but does not invalidate caches */
@@ -286,7 +290,7 @@ struct acpi_table_fadt {
 #define ACPI_FADT_APIC_CLUSTER      (1<<18)	/* 18: [V4] All local APICs must use cluster model (ACPI 3.0) */
 #define ACPI_FADT_APIC_PHYSICAL     (1<<19)	/* 19: [V4] All local x_aPICs must use physical dest mode (ACPI 3.0) */
 
-/* FADT Prefered Power Management Profiles */
+/* Values for preferred_profile (Prefered Power Management Profiles) */
 
 enum acpi_prefered_pm_profiles {
 	PM_UNSPECIFIED = 0,
@@ -304,14 +308,16 @@ enum acpi_prefered_pm_profiles {
 
 #define ACPI_FADT_OFFSET(f)             (u8) ACPI_OFFSET (struct acpi_table_fadt, f)
 
+/*
+ * Internal table-related structures
+ */
 union acpi_name_union {
 	u32 integer;
 	char ascii[4];
 };
 
-/*
- * Internal ACPI Table Descriptor. One per ACPI table
- */
+/* Internal ACPI Table Descriptor. One per ACPI table. */
+
 struct acpi_table_desc {
 	acpi_physical_address address;
 	struct acpi_table_header *pointer;
@@ -321,7 +327,7 @@ struct acpi_table_desc {
 	u8 flags;
 };
 
-/* Flags for above */
+/* Masks for Flags field above */
 
 #define ACPI_TABLE_ORIGIN_UNKNOWN       (0)
 #define ACPI_TABLE_ORIGIN_MAPPED        (1)
diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 582af1fcb8f5..0417f2abc44b 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -56,8 +56,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_BERT           "BERT"	/* Boot Error Record Table */
 #define ACPI_SIG_CPEP           "CPEP"	/* Corrected Platform Error Polling table */
@@ -66,6 +67,7 @@
 #define ACPI_SIG_ERST           "ERST"	/* Error Record Serialization Table */
 #define ACPI_SIG_HEST           "HEST"	/* Hardware Error Source Table */
 #define ACPI_SIG_MADT           "APIC"	/* Multiple APIC Description Table */
+#define ACPI_SIG_MSCT           "MSCT"	/* Maximum System Characteristics Table */
 #define ACPI_SIG_SBST           "SBST"	/* Smart Battery Specification Table */
 #define ACPI_SIG_SLIT           "SLIT"	/* System Locality Distance Information Table */
 #define ACPI_SIG_SRAT           "SRAT"	/* System Resource Affinity Table */
@@ -82,14 +84,20 @@
  * portable, so do not use any other bitfield types.
  */
 
-/* Common Subtable header (used in MADT, SRAT, etc.) */
+/*******************************************************************************
+ *
+ * Common subtable headers
+ *
+ ******************************************************************************/
+
+/* Generic subtable header (used in MADT, SRAT, etc.) */
 
 struct acpi_subtable_header {
 	u8 type;
 	u8 length;
 };
 
-/* Common Subtable header for WHEA tables (EINJ, ERST, WDAT) */
+/* Subtable header for WHEA tables (EINJ, ERST, WDAT) */
 
 struct acpi_whea_header {
 	u8 action;
@@ -103,7 +111,8 @@ struct acpi_whea_header {
 
 /*******************************************************************************
  *
- * BERT - Boot Error Record Table
+ * BERT - Boot Error Record Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -113,26 +122,43 @@ struct acpi_table_bert {
 	u64 address;		/* Physical addresss of the error region */
 };
 
-/* Boot Error Region */
+/* Boot Error Region (not a subtable, pointed to by Address field above) */
 
 struct acpi_bert_region {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
+	u32 block_status;	/* Type of error information */
+	u32 raw_data_offset;	/* Offset to raw error data */
+	u32 raw_data_length;	/* Length of raw error data */
+	u32 data_length;	/* Length of generic error data */
+	u32 error_severity;	/* Severity code */
 };
 
-/* block_status Flags */
+/* Values for block_status flags above */
 
 #define ACPI_BERT_UNCORRECTABLE             (1)
-#define ACPI_BERT_CORRECTABLE               (2)
-#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (4)
-#define ACPI_BERT_MULTIPLE_CORRECTABLE      (8)
+#define ACPI_BERT_CORRECTABLE               (1<<1)
+#define ACPI_BERT_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_BERT_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_BERT_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Values for error_severity above */
+
+enum acpi_bert_error_severity {
+	ACPI_BERT_ERROR_CORRECTABLE = 0,
+	ACPI_BERT_ERROR_FATAL = 1,
+	ACPI_BERT_ERROR_CORRECTED = 2,
+	ACPI_BERT_ERROR_NONE = 3,
+	ACPI_BERT_ERROR_RESERVED = 4	/* 4 and greater are reserved */
+};
+
+/*
+ * Note: The generic error data that follows the error_severity field above
+ * uses the struct acpi_hest_generic_data defined under the HEST table below
+ */
 
 /*******************************************************************************
  *
- * CPEP - Corrected Platform Error Polling table
+ * CPEP - Corrected Platform Error Polling table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -144,8 +170,7 @@ struct acpi_table_cpep {
 /* Subtable */
 
 struct acpi_cpep_polling {
-	u8 type;
-	u8 length;
+	struct acpi_subtable_header header;
 	u8 id;			/* Processor ID */
 	u8 eid;			/* Processor EID */
 	u32 interval;		/* Polling interval (msec) */
@@ -154,6 +179,7 @@ struct acpi_cpep_polling {
 /*******************************************************************************
  *
  * ECDT - Embedded Controller Boot Resources Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -168,14 +194,16 @@ struct acpi_table_ecdt {
 
 /*******************************************************************************
  *
- * EINJ - Error Injection Table
+ * EINJ - Error Injection Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
 struct acpi_table_einj {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u32 header_length;
-	u32 reserved;
+	u8 flags;
+	u8 reserved[3];
 	u32 entries;
 };
 
@@ -185,6 +213,10 @@ struct acpi_einj_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_EINJ_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_einj_actions {
@@ -220,9 +252,34 @@ struct acpi_einj_trigger {
 	u32 entry_count;
 };
 
+/* Command status return values */
+
+enum acpi_einj_command_status {
+	ACPI_EINJ_SUCCESS = 0,
+	ACPI_EINJ_FAILURE = 1,
+	ACPI_EINJ_INVALID_ACCESS = 2,
+	ACPI_EINJ_STATUS_RESERVED = 3	/* 3 and greater are reserved */
+};
+
+/* Error types returned from ACPI_EINJ_GET_ERROR_TYPE (bitfield) */
+
+#define ACPI_EINJ_PROCESSOR_CORRECTABLE     (1)
+#define ACPI_EINJ_PROCESSOR_UNCORRECTABLE   (1<<1)
+#define ACPI_EINJ_PROCESSOR_FATAL           (1<<2)
+#define ACPI_EINJ_MEMORY_CORRECTABLE        (1<<3)
+#define ACPI_EINJ_MEMORY_UNCORRECTABLE      (1<<4)
+#define ACPI_EINJ_MEMORY_FATAL              (1<<5)
+#define ACPI_EINJ_PCIX_CORRECTABLE          (1<<6)
+#define ACPI_EINJ_PCIX_UNCORRECTABLE        (1<<7)
+#define ACPI_EINJ_PCIX_FATAL                (1<<8)
+#define ACPI_EINJ_PLATFORM_CORRECTABLE      (1<<9)
+#define ACPI_EINJ_PLATFORM_UNCORRECTABLE    (1<<10)
+#define ACPI_EINJ_PLATFORM_FATAL            (1<<11)
+
 /*******************************************************************************
  *
- * ERST - Error Record Serialization Table
+ * ERST - Error Record Serialization Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -239,19 +296,23 @@ struct acpi_erst_entry {
 	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ERST_PRESERVE          (1)
+
 /* Values for Action field above */
 
 enum acpi_erst_actions {
-	ACPI_ERST_BEGIN_WRITE_OPERATION = 0,
-	ACPI_ERST_BEGIN_READ_OPERATION = 1,
-	ACPI_ERST_BETGIN_CLEAR_OPERATION = 2,
-	ACPI_ERST_END_OPERATION = 3,
+	ACPI_ERST_BEGIN_WRITE = 0,
+	ACPI_ERST_BEGIN_READ = 1,
+	ACPI_ERST_BEGIN_CLEAR = 2,
+	ACPI_ERST_END = 3,
 	ACPI_ERST_SET_RECORD_OFFSET = 4,
 	ACPI_ERST_EXECUTE_OPERATION = 5,
 	ACPI_ERST_CHECK_BUSY_STATUS = 6,
 	ACPI_ERST_GET_COMMAND_STATUS = 7,
-	ACPI_ERST_GET_RECORD_IDENTIFIER = 8,
-	ACPI_ERST_SET_RECORD_IDENTIFIER = 9,
+	ACPI_ERST_GET_RECORD_ID = 8,
+	ACPI_ERST_SET_RECORD_ID = 9,
 	ACPI_ERST_GET_RECORD_COUNT = 10,
 	ACPI_ERST_BEGIN_DUMMY_WRIITE = 11,
 	ACPI_ERST_NOT_USED = 12,
@@ -286,9 +347,29 @@ enum acpi_erst_instructions {
 	ACPI_ERST_INSTRUCTION_RESERVED = 19	/* 19 and greater are reserved */
 };
 
+/* Command status return values */
+
+enum acpi_erst_command_status {
+	ACPI_ERST_SUCESS = 0,
+	ACPI_ERST_NO_SPACE = 1,
+	ACPI_ERST_NOT_AVAILABLE = 2,
+	ACPI_ERST_FAILURE = 3,
+	ACPI_ERST_RECORD_EMPTY = 4,
+	ACPI_ERST_NOT_FOUND = 5,
+	ACPI_ERST_STATUS_RESERVED = 6	/* 6 and greater are reserved */
+};
+
+/* Error Record Serialization Information */
+
+struct acpi_erst_info {
+	u16 signature;		/* Should be "ER" */
+	u8 data[48];
+};
+
 /*******************************************************************************
  *
- * HEST - Hardware Error Source Table
+ * HEST - Hardware Error Source Table (ACPI 4.0)
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -301,70 +382,49 @@ struct acpi_table_hest {
 
 struct acpi_hest_header {
 	u16 type;
+	u16 source_id;
 };
 
 /* Values for Type field above for subtables */
 
 enum acpi_hest_types {
-	ACPI_HEST_TYPE_XPF_MACHINE_CHECK = 0,
-	ACPI_HEST_TYPE_XPF_CORRECTED_MACHINE_CHECK = 1,
-	ACPI_HEST_TYPE_XPF_UNUSED = 2,
-	ACPI_HEST_TYPE_XPF_NON_MASKABLE_INTERRUPT = 3,
-	ACPI_HEST_TYPE_IPF_CORRECTED_MACHINE_CHECK = 4,
-	ACPI_HEST_TYPE_IPF_CORRECTED_PLATFORM_ERROR = 5,
+	ACPI_HEST_TYPE_IA32_CHECK = 0,
+	ACPI_HEST_TYPE_IA32_CORRECTED_CHECK = 1,
+	ACPI_HEST_TYPE_IA32_NMI = 2,
+	ACPI_HEST_TYPE_NOT_USED3 = 3,
+	ACPI_HEST_TYPE_NOT_USED4 = 4,
+	ACPI_HEST_TYPE_NOT_USED5 = 5,
 	ACPI_HEST_TYPE_AER_ROOT_PORT = 6,
 	ACPI_HEST_TYPE_AER_ENDPOINT = 7,
 	ACPI_HEST_TYPE_AER_BRIDGE = 8,
-	ACPI_HEST_TYPE_GENERIC_HARDWARE_ERROR_SOURCE = 9,
+	ACPI_HEST_TYPE_GENERIC_ERROR = 9,
 	ACPI_HEST_TYPE_RESERVED = 10	/* 10 and greater are reserved */
 };
 
 /*
- * HEST Sub-subtables
+ * HEST substructures contained in subtables
  */
 
-/* XPF Machine Check Error Bank */
-
-struct acpi_hest_xpf_error_bank {
+/*
+ * IA32 Error Bank(s) - Follows the struct acpi_hest_ia_machine_check and
+ * struct acpi_hest_ia_corrected structures.
+ */
+struct acpi_hest_ia_error_bank {
 	u8 bank_number;
 	u8 clear_status_on_init;
 	u8 status_format;
-	u8 config_write_enable;
+	u8 reserved;
 	u32 control_register;
-	u64 control_init_data;
+	u64 control_data;
 	u32 status_register;
 	u32 address_register;
 	u32 misc_register;
 };
 
-/* Generic Error Status */
-
-struct acpi_hest_generic_status {
-	u32 block_status;
-	u32 raw_data_offset;
-	u32 raw_data_length;
-	u32 data_length;
-	u32 error_severity;
-};
-
-/* Generic Error Data */
-
-struct acpi_hest_generic_data {
-	u8 section_type[16];
-	u32 error_severity;
-	u16 revision;
-	u8 validation_bits;
-	u8 flags;
-	u32 error_data_length;
-	u8 fru_id[16];
-	u8 fru_text[20];
-};
-
-/* Common HEST structure for PCI/AER types below (6,7,8) */
+/* Common HEST sub-structure for PCI/AER structures below (6,7,8) */
 
 struct acpi_hest_aer_common {
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
@@ -373,13 +433,18 @@ struct acpi_hest_aer_common {
 	u16 device;
 	u16 function;
 	u16 device_control;
-	u16 reserved;
+	u16 reserved2;
 	u32 uncorrectable_error_mask;
 	u32 uncorrectable_error_severity;
 	u32 correctable_error_mask;
 	u32 advanced_error_capabilities;
 };
 
+/* Masks for HEST Flags fields */
+
+#define ACPI_HEST_FIRMWARE_FIRST        (1)
+#define ACPI_HEST_GLOBAL                (1<<1)
+
 /* Hardware Error Notification */
 
 struct acpi_hest_notify {
@@ -405,71 +470,59 @@ enum acpi_hest_notify_types {
 	ACPI_HEST_NOTIFY_RESERVED = 5	/* 5 and greater are reserved */
 };
 
+/* Values for config_write_enable bitfield above */
+
+#define ACPI_HEST_TYPE                  (1)
+#define ACPI_HEST_POLL_INTERVAL         (1<<1)
+#define ACPI_HEST_POLL_THRESHOLD_VALUE  (1<<2)
+#define ACPI_HEST_POLL_THRESHOLD_WINDOW (1<<3)
+#define ACPI_HEST_ERR_THRESHOLD_VALUE   (1<<4)
+#define ACPI_HEST_ERR_THRESHOLD_WINDOW  (1<<5)
+
 /*
  * HEST subtables
- *
- * From WHEA Design Document, 16 May 2007.
- * Note: There is no subtable type 2 in this version of the document,
- * and there are two different subtable type 3s.
  */
 
- /* 0: XPF Machine Check Exception */
+/* 0: IA32 Machine Check Exception */
 
-struct acpi_hest_xpf_machine_check {
+struct acpi_hest_ia_machine_check {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
-	u8 reserved1;
+	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
 	u8 num_hardware_banks;
-	u8 reserved2[7];
+	u8 reserved3[7];
 };
 
-/* 1: XPF Corrected Machine Check */
+/* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_xpf_corrected {
+struct acpi_table_hest_ia_corrected {
 	struct acpi_hest_header header;
-	u16 source_id;
-	u16 config_write_enable;
+	u16 reserved1;
 	u8 flags;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
-	u8 reserved[3];
+	u8 reserved2[3];
 };
 
-/* 3: XPF Non-Maskable Interrupt */
+/* 2: IA32 Non-Maskable Interrupt */
 
-struct acpi_hest_xpf_nmi {
+struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u32 reserved;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
 
-/* 4: IPF Corrected Machine Check */
-
-struct acpi_hest_ipf_corrected {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
-
-/* 5: IPF Corrected Platform Error */
-
-struct acpi_hest_ipf_corrected_platform {
-	struct acpi_hest_header header;
-	u8 enabled;
-	u8 reserved;
-};
+/* 3,4,5: Not used */
 
 /* 6: PCI Express Root Port AER */
 
@@ -491,30 +544,61 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 secondary_uncorrectable_error_mask;
-	u32 secondary_uncorrectable_error_severity;
-	u32 secondary_advanced_capabilities;
+	u32 second_uncorrectable_error_mask;
+	u32 second_uncorrectable_error_severity;
+	u32 second_advanced_capabilities;
 };
 
 /* 9: Generic Hardware Error Source */
 
 struct acpi_hest_generic {
 	struct acpi_hest_header header;
-	u16 source_id;
 	u16 related_source_id;
-	u8 config_write_enable;
+	u8 reserved;
 	u8 enabled;
 	u32 records_to_pre_allocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
 	struct acpi_hest_notify notify;
-	u32 error_status_block_length;
+	u32 error_block_length;
+};
+
+/* Generic Error Status block */
+
+struct acpi_hest_generic_status {
+	u32 block_status;
+	u32 raw_data_offset;
+	u32 raw_data_length;
+	u32 data_length;
+	u32 error_severity;
+};
+
+/* Values for block_status flags above */
+
+#define ACPI_HEST_UNCORRECTABLE             (1)
+#define ACPI_HEST_CORRECTABLE               (1<<1)
+#define ACPI_HEST_MULTIPLE_UNCORRECTABLE    (1<<2)
+#define ACPI_HEST_MULTIPLE_CORRECTABLE      (1<<3)
+#define ACPI_HEST_ERROR_ENTRY_COUNT         (0xFF<<4)	/* 8 bits, error count */
+
+/* Generic Error Data entry */
+
+struct acpi_hest_generic_data {
+	u8 section_type[16];
+	u32 error_severity;
+	u16 revision;
+	u8 validation_bits;
+	u8 flags;
+	u32 error_data_length;
+	u8 fru_id[16];
+	u8 fru_text[20];
 };
 
 /*******************************************************************************
  *
  * MADT - Multiple APIC Description Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -524,16 +608,16 @@ struct acpi_table_madt {
 	u32 flags;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
-#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00:    System also has dual 8259s */
+#define ACPI_MADT_PCAT_COMPAT       (1)	/* 00: System also has dual 8259s */
 
 /* Values for PCATCompat flag */
 
 #define ACPI_MADT_DUAL_PIC          0
 #define ACPI_MADT_MULTIPLE_APIC     1
 
-/* Values for subtable type in struct acpi_subtable_header */
+/* Values for MADT subtable type in struct acpi_subtable_header */
 
 enum acpi_madt_type {
 	ACPI_MADT_TYPE_LOCAL_APIC = 0,
@@ -644,7 +728,7 @@ struct acpi_madt_interrupt_source {
 	u32 flags;		/* Interrupt Source Flags */
 };
 
-/* Flags field above */
+/* Masks for Flags field above */
 
 #define ACPI_MADT_CPEI_OVERRIDE     (1)
 
@@ -693,9 +777,36 @@ struct acpi_madt_local_x2apic_nmi {
 #define ACPI_MADT_TRIGGER_RESERVED        (2<<2)
 #define ACPI_MADT_TRIGGER_LEVEL           (3<<2)
 
+/*******************************************************************************
+ *
+ * MSCT - Maximum System Characteristics Table (ACPI 4.0)
+ *        Version 1
+ *
+ ******************************************************************************/
+
+struct acpi_table_msct {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 proximity_offset;	/* Location of proximity info struct(s) */
+	u32 max_proximity_domains;	/* Max number of proximity domains */
+	u32 max_clock_domains;	/* Max number of clock domains */
+	u64 max_address;	/* Max physical address in system */
+};
+
+/* Subtable - Maximum Proximity Domain Information. Version 1 */
+
+struct acpi_msct_proximity {
+	u8 revision;
+	u8 length;
+	u32 range_start;	/* Start of domain range */
+	u32 range_end;		/* End of domain range */
+	u32 processor_capacity;
+	u64 memory_capacity;	/* In bytes */
+};
+
 /*******************************************************************************
  *
  * SBST - Smart Battery Specification Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -709,6 +820,7 @@ struct acpi_table_sbst {
 /*******************************************************************************
  *
  * SLIT - System Locality Distance Information Table
+ *        Version 1
  *
  ******************************************************************************/
 
@@ -721,6 +833,7 @@ struct acpi_table_slit {
 /*******************************************************************************
  *
  * SRAT - System Resource Affinity Table
+ *        Version 3
  *
  ******************************************************************************/
 
@@ -755,6 +868,10 @@ struct acpi_srat_cpu_affinity {
 	u32 reserved;		/* Reserved, must be zero */
 };
 
+/* Flags */
+
+#define ACPI_SRAT_CPU_USE_AFFINITY  (1)	/* 00: Use affinity structure */
+
 /* 1: Memory Affinity */
 
 struct acpi_srat_mem_affinity {
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
index b271aba0e524..6f3dce9991e1 100644
--- a/include/acpi/actbl2.h
+++ b/include/acpi/actbl2.h
@@ -14,8 +14,9 @@
  ******************************************************************************/
 
 /*
- * Values for description table header signatures. Useful because they make
- * it more difficult to inadvertently type in the wrong signature.
+ * Values for description table header signatures for tables defined in this
+ * file. Useful because they make it more difficult to inadvertently type in
+ * the wrong signature.
  */
 #define ACPI_SIG_ASF            "ASF!"	/* Alert Standard Format table */
 #define ACPI_SIG_BOOT           "BOOT"	/* Simple Boot Flag Table */
@@ -23,12 +24,14 @@
 #define ACPI_SIG_DMAR           "DMAR"	/* DMA Remapping table */
 #define ACPI_SIG_HPET           "HPET"	/* High Precision Event Timer table */
 #define ACPI_SIG_IBFT           "IBFT"	/* i_sCSI Boot Firmware Table */
+#define ACPI_SIG_IVRS           "IVRS"	/* I/O Virtualization Reporting Structure */
 #define ACPI_SIG_MCFG           "MCFG"	/* PCI Memory Mapped Configuration table */
 #define ACPI_SIG_SLIC           "SLIC"	/* Software Licensing Description Table */
 #define ACPI_SIG_SPCR           "SPCR"	/* Serial Port Console Redirection table */
 #define ACPI_SIG_SPMI           "SPMI"	/* Server Platform Management Interface table */
 #define ACPI_SIG_TCPA           "TCPA"	/* Trusted Computing Platform Alliance table */
 #define ACPI_SIG_UEFI           "UEFI"	/* Uefi Boot Optimization Table */
+#define ACPI_SIG_WAET           "WAET"	/* Windows ACPI Emulated devices Table */
 #define ACPI_SIG_WDAT           "WDAT"	/* Watchdog Action Table */
 #define ACPI_SIG_WDRT           "WDRT"	/* Watchdog Resource Table */
 
@@ -47,6 +50,7 @@
 /*******************************************************************************
  *
  * ASF - Alert Standard Format table (Signature "ASF!")
+ *       Revision 0x10
  *
  * Conforms to the Alert Standard Format Specification V2.0, 23 April 2003
  *
@@ -91,6 +95,10 @@ struct acpi_asf_info {
 	u8 reserved2[3];
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_ASF_SMBUS_PROTOCOLS    (1)
+
 /* 1: ASF Alerts */
 
 struct acpi_asf_alert {
@@ -156,6 +164,9 @@ struct acpi_asf_address {
 /*******************************************************************************
  *
  * BOOT - Simple Boot Flag Table
+ *        Version 1
+ *
+ * Conforms to the "Simple Boot Flag Specification", Version 2.1
  *
  ******************************************************************************/
 
@@ -168,6 +179,9 @@ struct acpi_table_boot {
 /*******************************************************************************
  *
  * DBGP - Debug Port table
+ *        Version 1
+ *
+ * Conforms to the "Debug Port Specification", Version 1.00, 2/9/2000
  *
  ******************************************************************************/
 
@@ -181,7 +195,10 @@ struct acpi_table_dbgp {
 /*******************************************************************************
  *
  * DMAR - DMA Remapping table
- *        From "Intel Virtualization Technology for Directed I/O", Sept. 2007
+ *        Version 1
+ *
+ * Conforms to "Intel Virtualization Technology for Directed I/O",
+ * Version 1.2, Sept. 2008
  *
  ******************************************************************************/
 
@@ -192,7 +209,7 @@ struct acpi_table_dmar {
 	u8 reserved[10];
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INTR_REMAP        (1)
 
@@ -209,9 +226,12 @@ enum acpi_dmar_type {
 	ACPI_DMAR_TYPE_HARDWARE_UNIT = 0,
 	ACPI_DMAR_TYPE_RESERVED_MEMORY = 1,
 	ACPI_DMAR_TYPE_ATSR = 2,
-	ACPI_DMAR_TYPE_RESERVED = 3	/* 3 and greater are reserved */
+	ACPI_DMAR_HARDWARE_AFFINITY = 3,
+	ACPI_DMAR_TYPE_RESERVED = 4	/* 4 and greater are reserved */
 };
 
+/* DMAR Device Scope structure */
+
 struct acpi_dmar_device_scope {
 	u8 entry_type;
 	u8 length;
@@ -250,7 +270,7 @@ struct acpi_dmar_hardware_unit {
 	u64 address;		/* Register Base Address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_INCLUDE_ALL       (1)
 
@@ -264,7 +284,7 @@ struct acpi_dmar_reserved_memory {
 	u64 end_address;	/* 4_k aligned limit address */
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALLOW_ALL         (1)
 
@@ -277,13 +297,26 @@ struct acpi_dmar_atsr {
 	u16 segment;
 };
 
-/* Flags */
+/* Masks for Flags field above */
 
 #define ACPI_DMAR_ALL_PORTS         (1)
 
+/* 3: Remapping Hardware Static Affinity Structure */
+
+struct acpi_dmar_rhsa {
+	struct acpi_dmar_header header;
+	u32 reserved;
+	u64 base_address;
+	u32 proximity_domain;
+};
+
 /*******************************************************************************
  *
  * HPET - High Precision Event Timer table
+ *        Version 1
+ *
+ * Conforms to "IA-PC HPET (High Precision Event Timers) Specification",
+ * Version 1.0a, October 2004
  *
  ******************************************************************************/
 
@@ -296,17 +329,28 @@ struct acpi_table_hpet {
 	u8 flags;
 };
 
-/*! Flags */
+/* Masks for Flags field above */
 
-#define ACPI_HPET_PAGE_PROTECT      (1)	/* 00: No page protection */
-#define ACPI_HPET_PAGE_PROTECT_4    (1<<1)	/* 01: 4KB page protected */
-#define ACPI_HPET_PAGE_PROTECT_64   (1<<2)	/* 02: 64KB page protected */
+#define ACPI_HPET_PAGE_PROTECT_MASK (3)
 
-/*! [End] no source code translation !*/
+/* Values for Page Protect flags */
+
+enum acpi_hpet_page_protect {
+	ACPI_HPET_NO_PAGE_PROTECT = 0,
+	ACPI_HPET_PAGE_PROTECT4 = 1,
+	ACPI_HPET_PAGE_PROTECT64 = 2
+};
 
 /*******************************************************************************
  *
  * IBFT - Boot Firmware Table
+ *        Version 1
+ *
+ * Conforms to "iSCSI Boot Firmware Table (iBFT) as Defined in ACPI 3.0b
+ * Specification", Version 1.01, March 1, 2007
+ *
+ * Note: It appears that this table is not intended to appear in the RSDT/XSDT.
+ * Therefore, it is not currently supported by the disassembler.
  *
  ******************************************************************************/
 
@@ -394,9 +438,184 @@ struct acpi_ibft_target {
 	u16 reverse_chap_secret_offset;
 };
 
+/*******************************************************************************
+ *
+ * IVRS - I/O Virtualization Reporting Structure
+ *        Version 1
+ *
+ * Conforms to "AMD I/O Virtualization Technology (IOMMU) Specification",
+ * Revision 1.26, February 2009.
+ *
+ ******************************************************************************/
+
+struct acpi_table_ivrs {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 info;		/* Common virtualization info */
+	u64 reserved;
+};
+
+/* Values for Info field above */
+
+#define ACPI_IVRS_PHYSICAL_SIZE     0x00007F00	/* 7 bits, physical address size */
+#define ACPI_IVRS_VIRTUAL_SIZE      0x003F8000	/* 7 bits, virtual address size */
+#define ACPI_IVRS_ATS_RESERVED      0x00400000	/* ATS address translation range reserved */
+
+/* IVRS subtable header */
+
+struct acpi_ivrs_header {
+	u8 type;		/* Subtable type */
+	u8 flags;
+	u16 length;		/* Subtable length */
+	u16 device_id;		/* ID of IOMMU */
+};
+
+/* Values for subtable Type above */
+
+enum acpi_ivrs_type {
+	ACPI_IVRS_TYPE_HARDWARE = 0x10,
+	ACPI_IVRS_TYPE_MEMORY1 = 0x20,
+	ACPI_IVRS_TYPE_MEMORY2 = 0x21,
+	ACPI_IVRS_TYPE_MEMORY3 = 0x22
+};
+
+/* Masks for Flags field above for IVHD subtable */
+
+#define ACPI_IVHD_TT_ENABLE         (1)
+#define ACPI_IVHD_PASS_PW           (1<<1)
+#define ACPI_IVHD_RES_PASS_PW       (1<<2)
+#define ACPI_IVHD_ISOC              (1<<3)
+#define ACPI_IVHD_IOTLB             (1<<4)
+
+/* Masks for Flags field above for IVMD subtable */
+
+#define ACPI_IVMD_UNITY             (1)
+#define ACPI_IVMD_READ              (1<<1)
+#define ACPI_IVMD_WRITE             (1<<2)
+#define ACPI_IVMD_EXCLUSION_RANGE   (1<<3)
+
+/*
+ * IVRS subtables, correspond to Type in struct acpi_ivrs_header
+ */
+
+/* 0x10: I/O Virtualization Hardware Definition Block (IVHD) */
+
+struct acpi_ivrs_hardware {
+	struct acpi_ivrs_header header;
+	u16 capability_offset;	/* Offset for IOMMU control fields */
+	u64 base_address;	/* IOMMU control registers */
+	u16 pci_segment_group;
+	u16 info;		/* MSI number and unit ID */
+	u32 reserved;
+};
+
+/* Masks for Info field above */
+
+#define ACPI_IVHD_MSI_NUMBER_MASK   0x001F	/* 5 bits, MSI message number */
+#define ACPI_IVHD_UNIT_ID_MASK      0x1F00	/* 5 bits, unit_iD */
+
+/*
+ * Device Entries for IVHD subtable, appear after struct acpi_ivrs_hardware structure.
+ * Upper two bits of the Type field are the (encoded) length of the structure.
+ * Currently, only 4 and 8 byte entries are defined. 16 and 32 byte entries
+ * are reserved for future use but not defined.
+ */
+struct acpi_ivrs_de_header {
+	u8 type;
+	u16 id;
+	u8 data_setting;
+};
+
+/* Length of device entry is in the top two bits of Type field above */
+
+#define ACPI_IVHD_ENTRY_LENGTH      0xC0
+
+/* Values for device entry Type field above */
+
+enum acpi_ivrs_device_entry_type {
+	/* 4-byte device entries, all use struct acpi_ivrs_device4 */
+
+	ACPI_IVRS_TYPE_PAD4 = 0,
+	ACPI_IVRS_TYPE_ALL = 1,
+	ACPI_IVRS_TYPE_SELECT = 2,
+	ACPI_IVRS_TYPE_START = 3,
+	ACPI_IVRS_TYPE_END = 4,
+
+	/* 8-byte device entries */
+
+	ACPI_IVRS_TYPE_PAD8 = 64,
+	ACPI_IVRS_TYPE_NOT_USED = 65,
+	ACPI_IVRS_TYPE_ALIAS_SELECT = 66,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_ALIAS_START = 67,	/* Uses struct acpi_ivrs_device8a */
+	ACPI_IVRS_TYPE_EXT_SELECT = 70,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_EXT_START = 71,	/* Uses struct acpi_ivrs_device8b */
+	ACPI_IVRS_TYPE_SPECIAL = 72	/* Uses struct acpi_ivrs_device8c */
+};
+
+/* Values for Data field above */
+
+#define ACPI_IVHD_INIT_PASS         (1)
+#define ACPI_IVHD_EINT_PASS         (1<<1)
+#define ACPI_IVHD_NMI_PASS          (1<<2)
+#define ACPI_IVHD_SYSTEM_MGMT       (3<<4)
+#define ACPI_IVHD_LINT0_PASS        (1<<6)
+#define ACPI_IVHD_LINT1_PASS        (1<<7)
+
+/* Types 0-4: 4-byte device entry */
+
+struct acpi_ivrs_device4 {
+	struct acpi_ivrs_de_header header;
+};
+
+/* Types 66-67: 8-byte device entry */
+
+struct acpi_ivrs_device8a {
+	struct acpi_ivrs_de_header header;
+	u8 reserved1;
+	u16 used_id;
+	u8 reserved2;
+};
+
+/* Types 70-71: 8-byte device entry */
+
+struct acpi_ivrs_device8b {
+	struct acpi_ivrs_de_header header;
+	u32 extended_data;
+};
+
+/* Values for extended_data above */
+
+#define ACPI_IVHD_ATS_DISABLED      (1<<31)
+
+/* Type 72: 8-byte device entry */
+
+struct acpi_ivrs_device8c {
+	struct acpi_ivrs_de_header header;
+	u8 handle;
+	u16 used_id;
+	u8 variety;
+};
+
+/* Values for Variety field above */
+
+#define ACPI_IVHD_IOAPIC            1
+#define ACPI_IVHD_HPET              2
+
+/* 0x20, 0x21, 0x22: I/O Virtualization Memory Definition Block (IVMD) */
+
+struct acpi_ivrs_memory {
+	struct acpi_ivrs_header header;
+	u16 aux_data;
+	u64 reserved;
+	u64 start_address;
+	u64 memory_length;
+};
+
 /*******************************************************************************
  *
  * MCFG - PCI Memory Mapped Configuration table and sub-table
+ *        Version 1
+ *
+ * Conforms to "PCI Firmware Specification", Revision 3.0, June 20, 2005
  *
  ******************************************************************************/
 
@@ -418,6 +637,10 @@ struct acpi_mcfg_allocation {
 /*******************************************************************************
  *
  * SPCR - Serial Port Console Redirection table
+ *        Version 1
+ *
+ * Conforms to "Serial Port Console Redirection Table",
+ * Version 1.00, January 11, 2002
  *
  ******************************************************************************/
 
@@ -445,16 +668,25 @@ struct acpi_table_spcr {
 	u32 reserved2;
 };
 
+/* Masks for pci_flags field above */
+
+#define ACPI_SPCR_DO_NOT_DISABLE    (1)
+
 /*******************************************************************************
  *
  * SPMI - Server Platform Management Interface table
+ *        Version 5
+ *
+ * Conforms to "Intelligent Platform Management Interface Specification
+ * Second Generation v2.0", Document Revision 1.0, February 12, 2004 with
+ * June 12, 2009 markup.
  *
  ******************************************************************************/
 
 struct acpi_table_spmi {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u8 reserved;
 	u8 interface_type;
+	u8 reserved;		/* Must be 1 */
 	u16 spec_revision;	/* Version of IPMI */
 	u8 interrupt_type;
 	u8 gpe_number;		/* GPE assigned */
@@ -466,11 +698,27 @@ struct acpi_table_spmi {
 	u8 pci_bus;
 	u8 pci_device;
 	u8 pci_function;
+	u8 reserved2;
+};
+
+/* Values for interface_type above */
+
+enum acpi_spmi_interface_types {
+	ACPI_SPMI_NOT_USED = 0,
+	ACPI_SPMI_KEYBOARD = 1,
+	ACPI_SPMI_SMI = 2,
+	ACPI_SPMI_BLOCK_TRANSFER = 3,
+	ACPI_SPMI_SMBUS = 4,
+	ACPI_SPMI_RESERVED = 5	/* 5 and above are reserved */
 };
 
 /*******************************************************************************
  *
  * TCPA - Trusted Computing Platform Alliance table
+ *        Version 1
+ *
+ * Conforms to "TCG PC Specific Implementation Specification",
+ * Version 1.1, August 18, 2003
  *
  ******************************************************************************/
 
@@ -484,6 +732,10 @@ struct acpi_table_tcpa {
 /*******************************************************************************
  *
  * UEFI - UEFI Boot optimization Table
+ *        Version 1
+ *
+ * Conforms to "Unified Extensible Firmware Interface Specification",
+ * Version 2.3, May 8, 2009
  *
  ******************************************************************************/
 
@@ -491,12 +743,34 @@ struct acpi_table_uefi {
 	struct acpi_table_header header;	/* Common ACPI table header */
 	u8 identifier[16];	/* UUID identifier */
 	u16 data_offset;	/* Offset of remaining data in table */
-	u8 data;
 };
 
+/*******************************************************************************
+ *
+ * WAET - Windows ACPI Emulated devices Table
+ *        Version 1
+ *
+ * Conforms to "Windows ACPI Emulated Devices Table", version 1.0, April 6, 2009
+ *
+ ******************************************************************************/
+
+struct acpi_table_waet {
+	struct acpi_table_header header;	/* Common ACPI table header */
+	u32 flags;
+};
+
+/* Masks for Flags field above */
+
+#define ACPI_WAET_RTC_NO_ACK        (1)	/* RTC requires no int acknowledge */
+#define ACPI_WAET_TIMER_ONE_READ    (1<<1)	/* PM timer requires only one read */
+
 /*******************************************************************************
  *
  * WDAT - Watchdog Action Table
+ *        Version 1
+ *
+ * Conforms to "Hardware Watchdog Timers Design Specification",
+ * Copyright 2006 Microsoft Corporation.
  *
  ******************************************************************************/
 
@@ -516,10 +790,20 @@ struct acpi_table_wdat {
 	u32 entries;		/* Number of watchdog entries that follow */
 };
 
+/* Masks for Flags field above */
+
+#define ACPI_WDAT_ENABLED           (1)
+#define ACPI_WDAT_STOPPED           0x80
+
 /* WDAT Instruction Entries (actions) */
 
 struct acpi_wdat_entry {
-	struct acpi_whea_header whea_header;	/* Common header for WHEA tables */
+	u8 action;
+	u8 instruction;
+	u16 reserved;
+	struct acpi_generic_address register_region;
+	u32 value;		/* Value used with Read/Write register */
+	u32 mask;		/* Bitmask required for this register instruction */
 };
 
 /* Values for Action field above */
@@ -556,28 +840,27 @@ enum acpi_wdat_instructions {
 /*******************************************************************************
  *
  * WDRT - Watchdog Resource Table
+ *        Version 1
+ *
+ * Conforms to "Watchdog Timer Hardware Requirements for Windows Server 2003",
+ * Version 1.01, August 28, 2006
  *
  ******************************************************************************/
 
 struct acpi_table_wdrt {
 	struct acpi_table_header header;	/* Common ACPI table header */
-	u32 header_length;	/* Watchdog Header Length */
-	u8 pci_segment;		/* PCI Segment number */
+	struct acpi_generic_address control_register;
+	struct acpi_generic_address count_register;
+	u16 pci_device_id;
+	u16 pci_vendor_id;
 	u8 pci_bus;		/* PCI Bus number */
 	u8 pci_device;		/* PCI Device number */
 	u8 pci_function;	/* PCI Function number */
-	u32 timer_period;	/* Period of one timer count (msec) */
-	u32 max_count;		/* Maximum counter value supported */
-	u32 min_count;		/* Minimum counter value */
-	u8 flags;
-	u8 reserved[3];
-	u32 entries;		/* Number of watchdog entries that follow */
+	u8 pci_segment;		/* PCI Segment number */
+	u16 max_count;		/* Maximum counter value supported */
+	u8 units;
 };
 
-/* Flags */
-
-#define ACPI_WDRT_TIMER_ENABLED     (1)	/* 00: Timer enabled */
-
 /* Reset to default packing */
 
 #pragma pack()
-- 
cgit v1.2.3


From c276e3884163355464a76e60ed9e272b52b4acc2 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Mon, 27 Jul 2009 14:55:02 +0800
Subject: ACPICA: Update definitions for HEST table

Eliminate duplicated code in disassembler.
Shorten identifiers that were too long.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl1.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 0417f2abc44b..34b10c06bcfd 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -427,17 +427,17 @@ struct acpi_hest_aer_common {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 bus;
 	u16 device;
 	u16 function;
 	u16 device_control;
 	u16 reserved2;
-	u32 uncorrectable_error_mask;
-	u32 uncorrectable_error_severity;
-	u32 correctable_error_mask;
-	u32 advanced_error_capabilities;
+	u32 uncorrectable_mask;
+	u32 uncorrectable_severity;
+	u32 correctable_mask;
+	u32 advanced_capabilities;
 };
 
 /* Masks for HEST Flags fields */
@@ -490,7 +490,7 @@ struct acpi_hest_ia_machine_check {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u64 global_capability_data;
 	u64 global_control_data;
@@ -505,7 +505,7 @@ struct acpi_table_hest_ia_corrected {
 	u16 reserved1;
 	u8 flags;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	struct acpi_hest_notify notify;
 	u8 num_hardware_banks;
@@ -517,7 +517,7 @@ struct acpi_table_hest_ia_corrected {
 struct acpi_hest_ia_nmi {
 	struct acpi_hest_header header;
 	u32 reserved;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 };
@@ -544,9 +544,9 @@ struct acpi_hest_aer {
 struct acpi_hest_aer_bridge {
 	struct acpi_hest_header header;
 	struct acpi_hest_aer_common aer;
-	u32 second_uncorrectable_error_mask;
-	u32 second_uncorrectable_error_severity;
-	u32 second_advanced_capabilities;
+	u32 uncorrectable_mask2;
+	u32 uncorrectable_severity2;
+	u32 advanced_capabilities2;
 };
 
 /* 9: Generic Hardware Error Source */
@@ -556,7 +556,7 @@ struct acpi_hest_generic {
 	u16 related_source_id;
 	u8 reserved;
 	u8 enabled;
-	u32 records_to_pre_allocate;
+	u32 records_to_preallocate;
 	u32 max_sections_per_record;
 	u32 max_raw_data_length;
 	struct acpi_generic_address error_status_address;
-- 
cgit v1.2.3


From 1872bbc94b2d092ece22a8fbf1c3e81f0fba0052 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:31:00 +0800
Subject: ACPICA: Fix typo for HEST ACPI table

Problem with the name of one of the subtables.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/actbl1.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h
index 34b10c06bcfd..0b9b430b092b 100644
--- a/include/acpi/actbl1.h
+++ b/include/acpi/actbl1.h
@@ -500,7 +500,7 @@ struct acpi_hest_ia_machine_check {
 
 /* 1: IA32 Corrected Machine Check */
 
-struct acpi_table_hest_ia_corrected {
+struct acpi_hest_ia_corrected {
 	struct acpi_hest_header header;
 	u16 reserved1;
 	u8 flags;
-- 
cgit v1.2.3


From 49ae80c9944401222e47108883c486b5a5a24006 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Thu, 13 Aug 2009 13:43:12 +0800
Subject: ACPICA: Update version to 20090730

Version 20090730.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 063e577e791e..f3b358b7432f 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090625
+#define ACPI_CA_VERSION                 0x20090730
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From a192a9580bcc41692be1f36b77c3b681827f566a Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 28 Jul 2009 16:45:54 -0400
Subject: ACPI: Move definition of PREFIX from acpi_bus.h to internal..h

Linux/ACPI core files using internal.h all PREFIX "ACPI: ",
however, not all ACPI drivers use/want it -- and they
should not have to #undef PREFIX to define their own.

Add GPL commment to internal.h while we are there.

This does not change any actual console output,
asside from a whitespace fix.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/pci/mmconfig-shared.c        |  2 ++
 drivers/acpi/ac.c                     |  2 ++
 drivers/acpi/battery.c                |  2 ++
 drivers/acpi/blacklist.c              |  2 ++
 drivers/acpi/button.c                 |  2 ++
 drivers/acpi/cm_sbs.c                 |  2 ++
 drivers/acpi/container.c              |  2 ++
 drivers/acpi/dock.c                   |  2 ++
 drivers/acpi/ec.c                     |  1 -
 drivers/acpi/event.c                  |  2 ++
 drivers/acpi/fan.c                    |  2 ++
 drivers/acpi/glue.c                   |  2 ++
 drivers/acpi/internal.h               | 22 +++++++++++++++++++++-
 drivers/acpi/numa.c                   |  2 ++
 drivers/acpi/pci_irq.c                |  2 ++
 drivers/acpi/pci_link.c               |  2 ++
 drivers/acpi/pci_root.c               |  2 ++
 drivers/acpi/power.c                  |  2 ++
 drivers/acpi/processor_core.c         |  2 ++
 drivers/acpi/processor_idle.c         |  2 ++
 drivers/acpi/processor_perflib.c      |  2 ++
 drivers/acpi/processor_thermal.c      |  2 ++
 drivers/acpi/processor_throttling.c   |  2 ++
 drivers/acpi/sbs.c                    |  2 ++
 drivers/acpi/sbshc.c                  |  2 ++
 drivers/acpi/system.c                 |  2 ++
 drivers/acpi/thermal.c                |  2 ++
 drivers/acpi/utils.c                  |  2 ++
 drivers/acpi/video.c                  |  2 ++
 drivers/acpi/video_detect.c           |  2 ++
 drivers/pci/dmar.c                    |  3 +--
 drivers/platform/x86/fujitsu-laptop.c |  4 ++--
 drivers/platform/x86/wmi.c            |  1 -
 include/acpi/acpi_bus.h               |  2 --
 34 files changed, 80 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 712443ec6d43..81d3466765ca 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -18,6 +18,8 @@
 #include <asm/e820.h>
 #include <asm/pci_x86.h>
 
+#define PREFIX "ACPI: "
+
 /* aperture is up to 256MB but BIOS may reserve less */
 #define MMCONFIG_APER_MIN	(2 * 1024*1024)
 #define MMCONFIG_APER_MAX	(256 * 1024*1024)
diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c
index 0df8fcb687d6..98b9690b0159 100644
--- a/drivers/acpi/ac.c
+++ b/drivers/acpi/ac.c
@@ -37,6 +37,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_AC_DEVICE_NAME		"AC Adapter"
 #define ACPI_AC_FILE_STATE		"state"
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 58b4517ce712..f8c3d1bb6969 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -45,6 +45,8 @@
 #include <linux/power_supply.h>
 #endif
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BATTERY_VALUE_UNKNOWN 0xFFFFFFFF
 
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index f6baa77deefb..19152ea2b104 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <linux/dmi.h>
 
+#include "internal.h"
+
 enum acpi_blacklist_predicates {
 	all_versions,
 	less_than_or_equal,
diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 9195deba9d94..d295bdccc09c 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_BUTTON_CLASS		"button"
 #define ACPI_BUTTON_FILE_INFO		"info"
 #define ACPI_BUTTON_FILE_STATE		"state"
diff --git a/drivers/acpi/cm_sbs.c b/drivers/acpi/cm_sbs.c
index 332fe4b21708..6c9ee68e46fb 100644
--- a/drivers/acpi/cm_sbs.c
+++ b/drivers/acpi/cm_sbs.c
@@ -28,6 +28,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("cm_sbs");
 #define ACPI_AC_CLASS		"ac_adapter"
 #define ACPI_BATTERY_CLASS	"battery"
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index fe0cdf83641a..5f2c3c00a315 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -35,6 +35,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/container.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_CONTAINER_DEVICE_NAME	"ACPI container device"
 #define ACPI_CONTAINER_CLASS		"container"
 
diff --git a/drivers/acpi/dock.c b/drivers/acpi/dock.c
index efb959d6c8a9..9a855669ff12 100644
--- a/drivers/acpi/dock.c
+++ b/drivers/acpi/dock.c
@@ -33,6 +33,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_DOCK_DRIVER_DESCRIPTION "ACPI Dock Station Driver"
 
 ACPI_MODULE_NAME("dock");
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331674c7..5180f0f1dd02 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -47,7 +47,6 @@
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
 #define ACPI_EC_FILE_INFO		"info"
 
-#undef PREFIX
 #define PREFIX				"ACPI: EC: "
 
 /* EC status register */
diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index aeb7e5fb4a04..c511071bfd79 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -14,6 +14,8 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("event");
 
diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index 53698ea08371..f419849a0d3f 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c
@@ -34,6 +34,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_FAN_CLASS			"fan"
 #define ACPI_FAN_FILE_STATE		"state"
 
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index a8a5c29958c8..dc36a448de43 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -12,6 +12,8 @@
 #include <linux/rwsem.h>
 #include <linux/acpi.h>
 
+#include "internal.h"
+
 #define ACPI_GLUE_DEBUG	0
 #if ACPI_GLUE_DEBUG
 #define DBG(x...) printk(PREFIX x)
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 11a69b53004e..074cf8682d52 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -1,4 +1,24 @@
-/* For use by Linux/ACPI infrastructure, not drivers */
+/*
+ * acpi/internal.h
+ * For use by Linux/ACPI infrastructure, not drivers
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define PREFIX "ACPI: "
 
 int init_acpi_device_notify(void);
 int acpi_scan_init(void);
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index d440ccd27d91..202dd0c976a3 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -30,6 +30,8 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_NUMA	0x80000000
 #define _COMPONENT	ACPI_NUMA
 ACPI_MODULE_NAME("numa");
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index b794eb88ab90..843699ed93f2 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -40,6 +40,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_irq");
 
diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c
index 16e0f9d3d17c..394ae89409c2 100644
--- a/drivers/acpi/pci_link.c
+++ b/drivers/acpi/pci_link.c
@@ -43,6 +43,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_link");
 #define ACPI_PCI_LINK_CLASS		"pci_irq_routing"
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 55b5b90c2a44..dee916707a7d 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -36,6 +36,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_PCI_COMPONENT
 ACPI_MODULE_NAME("pci_root");
 #define ACPI_PCI_ROOT_CLASS		"pci_bridge"
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index d74365d4a6e7..e86603f37dee 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT			ACPI_POWER_COMPONENT
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 2cc4b3033872..b4a1ab297e7b 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -59,6 +59,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_DEVICE_NAME	"Processor"
 #define ACPI_PROCESSOR_FILE_INFO	"info"
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 66393d5c4c7c..22aab1fc9b45 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -60,6 +60,8 @@
 #include <acpi/processor.h>
 #include <asm/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_idle");
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index 60e543d3234e..11088cf10319 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -39,6 +39,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS		"processor"
 #define ACPI_PROCESSOR_FILE_PERFORMANCE	"performance"
 #define _COMPONENT		ACPI_PROCESSOR_COMPONENT
diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c
index 31adda1099e0..3e3181c0efc3 100644
--- a/drivers/acpi/processor_thermal.c
+++ b/drivers/acpi/processor_thermal.c
@@ -40,6 +40,8 @@
 #include <acpi/processor.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_thermal");
diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c
index ae39797aab55..b366b9c13d4d 100644
--- a/drivers/acpi/processor_throttling.c
+++ b/drivers/acpi/processor_throttling.c
@@ -41,6 +41,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/processor.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_PROCESSOR_CLASS            "processor"
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
 ACPI_MODULE_NAME("processor_throttling");
diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c
index 4b214b74ebaa..52b9db8afc20 100644
--- a/drivers/acpi/sbs.c
+++ b/drivers/acpi/sbs.c
@@ -46,6 +46,8 @@
 
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SBS_CLASS			"sbs"
 #define ACPI_AC_CLASS			"ac_adapter"
 #define ACPI_BATTERY_CLASS		"battery"
diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c
index 0619734895b2..d9339806df45 100644
--- a/drivers/acpi/sbshc.c
+++ b/drivers/acpi/sbshc.c
@@ -15,6 +15,8 @@
 #include <linux/interrupt.h>
 #include "sbshc.h"
 
+#define PREFIX "ACPI: "
+
 #define ACPI_SMB_HC_CLASS	"smbus_host_controller"
 #define ACPI_SMB_HC_DEVICE_NAME	"ACPI SMBus HC"
 
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index 9c61ab2177cf..d11282975f35 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -31,6 +31,8 @@
 
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define _COMPONENT		ACPI_SYSTEM_COMPONENT
 ACPI_MODULE_NAME("system");
 
diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 564ea1424288..65f67815902a 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -47,6 +47,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_THERMAL_CLASS		"thermal_zone"
 #define ACPI_THERMAL_DEVICE_NAME	"Thermal Zone"
 #define ACPI_THERMAL_FILE_STATE		"state"
diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index f844941089bb..811fec10462b 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c
@@ -30,6 +30,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#include "internal.h"
+
 #define _COMPONENT		ACPI_BUS_COMPONENT
 ACPI_MODULE_NAME("utils");
 
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 8851315ce858..a0fa3946b507 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#define PREFIX "ACPI: "
+
 #define ACPI_VIDEO_CLASS		"video"
 #define ACPI_VIDEO_BUS_NAME		"Video Bus"
 #define ACPI_VIDEO_DEVICE_NAME		"Video Device"
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 7cd2b63435ea..7032f25da9b5 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -38,6 +38,8 @@
 #include <linux/dmi.h>
 #include <linux/pci.h>
 
+#define PREFIX "ACPI: "
+
 ACPI_MODULE_NAME("video");
 #define _COMPONENT		ACPI_VIDEO_COMPONENT
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb38b7a..998f02d2ba42 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -34,8 +34,7 @@
 #include <linux/irq.h>
 #include <linux/interrupt.h>
 
-#undef PREFIX
-#define PREFIX "DMAR:"
+#define PREFIX "DMAR: "
 
 /* No locks are needed as DMA remapping hardware unit
  * list is constructed at boot time and hotplug of
diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index 218b9a16ac3f..eabddc9c192b 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c
@@ -700,7 +700,7 @@ static int acpi_fujitsu_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
@@ -874,7 +874,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device)
 		goto end;
 	}
 
-	printk(KERN_INFO PREFIX "%s [%s] (%s)\n",
+	printk(KERN_INFO "ACPI: %s [%s] (%s)\n",
 	       acpi_device_name(device), acpi_device_bid(device),
 	       !device->power.state ? "on" : "off");
 
diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c
index f215a5919192..177f8d767df4 100644
--- a/drivers/platform/x86/wmi.c
+++ b/drivers/platform/x86/wmi.c
@@ -42,7 +42,6 @@ MODULE_LICENSE("GPL");
 
 #define ACPI_WMI_CLASS "wmi"
 
-#undef PREFIX
 #define PREFIX "ACPI: WMI: "
 
 static DEFINE_MUTEX(wmi_data_lock);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..f485107ddc43 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -30,8 +30,6 @@
 
 #include <acpi/acpi.h>
 
-#define PREFIX			"ACPI: "
-
 /* TBD: Make dynamic */
 #define ACPI_MAX_HANDLES	10
 struct acpi_handle_list {
-- 
cgit v1.2.3


From e55a5999ffcf72dc4d43d73618957964cb87065a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Tue, 28 Jul 2009 17:41:53 +0800
Subject: ACPI: Handle CONFIG_ACPI=n better from linux/acpi.h

linux/acpi.h is the top level header for interfacing
with the ACPI sub-system, so acpi_disabled should be
up there instead of down in asm/acpi.h -- particularly
since asm/acpi.h doesn't exist for all architectures.

Same story for acpi_table_parse(), which is a top-level
API to Linux/ACPI.

This is necessary for building some code that
used to always depend on CONFIG_ACPI=y, but will soon
also need to build with CONFIG_ACPI=n.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/acpi.h |  1 -
 include/linux/acpi.h        | 11 ++++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 20d1465a2ab0..4518dc500903 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
 
 #else /* !CONFIG_ACPI */
 
-#define acpi_disabled 1
 #define acpi_lapic 0
 #define acpi_ioapic 0
 static inline void acpi_noirq_set(void) { }
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cfffeab..3fce811bf9ac 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -292,7 +292,10 @@ void __init acpi_s4_no_nvs(void);
 extern acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 flags);
 extern void acpi_early_init(void);
 
-#else	/* CONFIG_ACPI */
+#else	/* !CONFIG_ACPI */
+
+#define acpi_disabled 1
+
 static inline void acpi_early_init(void) { }
 
 static inline int early_acpi_boot_init(void)
@@ -331,5 +334,11 @@ static inline int acpi_check_mem_region(resource_size_t start,
 	return 0;
 }
 
+struct acpi_table_header;
+static inline int acpi_table_parse(char *id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
 #endif	/* !CONFIG_ACPI */
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From 117a9ac777f8034d4675b821172d2ff71f6ec47a Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:10:24 -0400
Subject: SFI: create linux/sfi.h

include/linux/include/sfi.h defines everything that customers
of SFI need to know in order to use the SFI suport in the kernel.

The primary API is sfi_table_parse(), where a driver or another part
of the kernel can supply a handler to parse the named table.

sfi.h also includes the currently defined table signatures and table
formats.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/sfi.h | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 include/linux/sfi.h

(limited to 'include')

diff --git a/include/linux/sfi.h b/include/linux/sfi.h
new file mode 100644
index 000000000000..9a6f7607174e
--- /dev/null
+++ b/include/linux/sfi.h
@@ -0,0 +1,206 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_H
+#define _LINUX_SFI_H
+
+/* Table signatures reserved by the SFI specification */
+#define SFI_SIG_SYST		"SYST"
+#define SFI_SIG_FREQ		"FREQ"
+#define SFI_SIG_IDLE		"IDLE"
+#define SFI_SIG_CPUS		"CPUS"
+#define SFI_SIG_MTMR		"MTMR"
+#define SFI_SIG_MRTC		"MRTC"
+#define SFI_SIG_MMAP		"MMAP"
+#define SFI_SIG_APIC		"APIC"
+#define SFI_SIG_XSDT		"XSDT"
+#define SFI_SIG_WAKE		"WAKE"
+#define SFI_SIG_SPIB		"SPIB"
+#define SFI_SIG_I2CB		"I2CB"
+#define SFI_SIG_GPEM		"GPEM"
+
+#define SFI_SIGNATURE_SIZE	4
+#define SFI_OEM_ID_SIZE		6
+#define SFI_OEM_TABLE_ID_SIZE	8
+
+#define SFI_SYST_SEARCH_BEGIN		0x000E0000
+#define SFI_SYST_SEARCH_END		0x000FFFFF
+
+#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.len - sizeof(struct sfi_table_header)) / \
+	(sizeof(entry_type)))
+/*
+ * Table structures must be byte-packed to match the SFI specification,
+ * as they are provided by the BIOS.
+ */
+struct sfi_table_header {
+	char	sig[SFI_SIGNATURE_SIZE];
+	u32	len;
+	u8	rev;
+	u8	csum;
+	char	oem_id[SFI_OEM_ID_SIZE];
+	char	oem_table_id[SFI_OEM_TABLE_ID_SIZE];
+} __packed;
+
+struct sfi_table_simple {
+	struct sfi_table_header		header;
+	u64				pentry[1];
+} __packed;
+
+/* Comply with UEFI spec 2.1 */
+struct sfi_mem_entry {
+	u32	type;
+	u64	phys_start;
+	u64	virt_start;
+	u64	pages;
+	u64	attrib;
+} __packed;
+
+struct sfi_cpu_table_entry {
+	u32	apic_id;
+} __packed;
+
+struct sfi_cstate_table_entry {
+	u32	hint;		/* MWAIT hint */
+	u32	latency;	/* latency in ms */
+} __packed;
+
+struct sfi_apic_table_entry {
+	u64	phys_addr;	/* phy base addr for APIC reg */
+} __packed;
+
+struct sfi_freq_table_entry {
+	u32	freq_mhz;	/* in MHZ */
+	u32	latency;	/* transition latency in ms */
+	u32	ctrl_val;	/* value to write to PERF_CTL */
+} __packed;
+
+struct sfi_wake_table_entry {
+	u64	phys_addr;	/* pointer to where the wake vector locates */
+} __packed;
+
+struct sfi_timer_table_entry {
+	u64	phys_addr;	/* phy base addr for the timer */
+	u32	freq_hz;	/* in HZ */
+	u32	irq;
+} __packed;
+
+struct sfi_rtc_table_entry {
+	u64	phys_addr;	/* phy base addr for the RTC */
+	u32	irq;
+} __packed;
+
+struct sfi_spi_table_entry {
+	u16	host_num;	/* attached to host 0, 1...*/
+	u16	cs;		/* chip select */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_i2c_table_entry {
+	u16	host_num;
+	u16	addr;		/* slave addr */
+	u16	irq_info;
+	char	name[16];
+	u8	dev_info[10];
+} __packed;
+
+struct sfi_gpe_table_entry {
+	u16	logical_id;	/* logical id */
+	u16	phys_id;	/* physical GPE id */
+} __packed;
+
+
+typedef int (*sfi_table_handler) (struct sfi_table_header *table);
+
+#ifdef CONFIG_SFI
+extern void __init sfi_init(void);
+extern int __init sfi_platform_init(void);
+extern void __init sfi_init_late(void);
+extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+				sfi_table_handler handler);
+
+extern int sfi_disabled;
+static inline void disable_sfi(void)
+{
+	sfi_disabled = 1;
+}
+
+#else /* !CONFIG_SFI */
+
+static inline void sfi_init(void)
+{
+}
+
+static inline void sfi_init_late(void)
+{
+}
+
+#define sfi_disabled	0
+
+static inline int sfi_table_parse(char *signature, char *oem_id,
+					char *oem_table_id,
+					sfi_table_handler handler)
+{
+	return -1;
+}
+
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_H*/
-- 
cgit v1.2.3


From 13e82d023c4c3f13ab1e665cbb917a7ebba8935c Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Fri, 14 Aug 2009 15:17:53 -0400
Subject: SFI: add capability to parse ACPI tables

Extend SFI to access standard ACPI tables.
(eg. the PCI MCFG) using sfi_acpi_table_parse().

Note that this is _not_ a hybrid ACPI + SFI mode.
The platform boots in either ACPI mode or SFI mode.

SFI runs only with acpi_disabled=1, which can be set
at build-time via CONFIG_ACPI=n, or at boot time by
the failure to find ACPI platform support.

So this extension simply allows SFI-platforms to
re-use existing standard table formats that happen to
be defined to live in ACPI envelopes.

Signed-off-by: Feng Tang <feng.tang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/sfi/sfi_acpi.c   | 175 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sfi_acpi.h |  93 +++++++++++++++++++++++++
 2 files changed, 268 insertions(+)
 create mode 100644 drivers/sfi/sfi_acpi.c
 create mode 100644 include/linux/sfi_acpi.h

(limited to 'include')

diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c
new file mode 100644
index 000000000000..34aba30eb84b
--- /dev/null
+++ b/drivers/sfi/sfi_acpi.c
@@ -0,0 +1,175 @@
+/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <acpi/acpi.h>
+
+#include <linux/sfi.h>
+#include "sfi_core.h"
+
+/*
+ * SFI can access ACPI-defined tables via an optional ACPI XSDT.
+ *
+ * This allows re-use, and avoids re-definition, of standard tables.
+ * For example, the "MCFG" table is defined by PCI, reserved by ACPI,
+ * and is expected to be present many SFI-only systems.
+ */
+
+static struct acpi_table_xsdt *xsdt_va __read_mostly;
+
+#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \
+	((ptable->header.length - sizeof(struct acpi_table_header)) / \
+	(sizeof(entry_type)))
+
+static inline struct sfi_table_header *acpi_to_sfi_th(
+				struct acpi_table_header *th)
+{
+	return (struct sfi_table_header *)th;
+}
+
+static inline struct acpi_table_header *sfi_to_acpi_th(
+				struct sfi_table_header *th)
+{
+	return (struct acpi_table_header *)th;
+}
+
+/*
+ * sfi_acpi_parse_xsdt()
+ *
+ * Parse the ACPI XSDT for later access by sfi_acpi_table_parse().
+ */
+static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th)
+{
+	struct sfi_table_key key = SFI_ANY_KEY;
+	int tbl_cnt, i;
+	void *ret;
+
+	xsdt_va = (struct acpi_table_xsdt *)th;
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key);
+		if (IS_ERR(ret)) {
+			disable_sfi();
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int __init sfi_acpi_init(void)
+{
+	struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT };
+
+	sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt);
+
+	/* Only call the get_table to keep the table mapped */
+	xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key);
+	return 0;
+}
+
+static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key)
+{
+	u32 tbl_cnt, i;
+	void *ret;
+
+	tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
+	for (i = 0; i < tbl_cnt; i++) {
+		ret = sfi_check_table(xsdt_va->table_offset_entry[i], key);
+		if (!IS_ERR(ret) && ret)
+			return sfi_to_acpi_th(ret);
+	}
+
+	return NULL;
+}
+
+static void sfi_acpi_put_table(struct acpi_table_header *table)
+{
+	sfi_put_table(acpi_to_sfi_th(table));
+}
+
+/*
+ * sfi_acpi_table_parse()
+ *
+ * Find specified table in XSDT, run handler on it and return its return value
+ */
+int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id,
+			int(*handler)(struct acpi_table_header *))
+{
+	struct acpi_table_header *table = NULL;
+	struct sfi_table_key key;
+	int ret = 0;
+
+	if (sfi_disabled)
+		return -1;
+
+	key.sig = signature;
+	key.oem_id = oem_id;
+	key.oem_table_id = oem_table_id;
+
+	table = sfi_acpi_get_table(&key);
+	if (!table)
+		return -EINVAL;
+
+	ret = handler(table);
+	sfi_acpi_put_table(table);
+	return ret;
+}
diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h
new file mode 100644
index 000000000000..c4a5a8cd4469
--- /dev/null
+++ b/include/linux/sfi_acpi.h
@@ -0,0 +1,93 @@
+/* sfi.h Simple Firmware Interface */
+
+/*
+
+  This file is provided under a dual BSD/GPLv2 license.  When using or
+  redistributing this file, you may do so under either license.
+
+  GPL LICENSE SUMMARY
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of version 2 of the GNU General Public License as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+  The full GNU General Public License is included in this distribution
+  in the file called LICENSE.GPL.
+
+  BSD LICENSE
+
+  Copyright(c) 2009 Intel Corporation. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef _LINUX_SFI_ACPI_H
+#define _LINUX_SFI_ACPI_H
+
+#ifdef CONFIG_SFI
+#include <acpi/acpi.h>		/* struct acpi_table_header */
+
+extern int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *));
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	if (!acpi_table_parse(signature, handler))
+		return 0;
+
+	return sfi_acpi_table_parse(signature, NULL, NULL, handler);
+}
+#else /* !CONFIG_SFI */
+
+static inline int sfi_acpi_table_parse(char *signature, char *oem_id,
+				char *oem_table_id,
+				int (*handler)(struct acpi_table_header *))
+{
+	return -1;
+}
+
+static inline int acpi_sfi_table_parse(char *signature,
+				int (*handler)(struct acpi_table_header *))
+{
+	return acpi_table_parse(signature, handler);
+}
+#endif /* !CONFIG_SFI */
+
+#endif /*_LINUX_SFI_ACPI_H*/
-- 
cgit v1.2.3


From df19a6267705456f463871ae2aabc44299909d2a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 28 Aug 2009 23:48:54 -0700
Subject: tcp: keepalive cleanups

Introduce keepalive_probes(tp) helper, and use it, like
keepalive_time_when(tp) and keepalive_intvl_when(tp)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 5 +++++
 net/ipv4/tcp.c       | 6 +++---
 net/ipv4/tcp_timer.c | 3 +--
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 88af84306471..cbb2a4889fc9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1007,6 +1007,11 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
 	return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
 }
 
+static inline int keepalive_probes(const struct tcp_sock *tp)
+{
+	return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+}
+
 static inline int tcp_fin_time(const struct sock *sk)
 {
 	int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 91145244ea63..59f69a6c5863 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2336,13 +2336,13 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 		val = !!(tp->nonagle&TCP_NAGLE_CORK);
 		break;
 	case TCP_KEEPIDLE:
-		val = (tp->keepalive_time ? : sysctl_tcp_keepalive_time) / HZ;
+		val = keepalive_time_when(tp) / HZ;
 		break;
 	case TCP_KEEPINTVL:
-		val = (tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl) / HZ;
+		val = keepalive_intvl_when(tp) / HZ;
 		break;
 	case TCP_KEEPCNT:
-		val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
+		val = keepalive_probes(tp);
 		break;
 	case TCP_SYNCNT:
 		val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b144a26359bc..c520fb6e06d9 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -499,8 +499,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	elapsed = tcp_time_stamp - tp->rcv_tstamp;
 
 	if (elapsed >= keepalive_time_when(tp)) {
-		if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) ||
-		     (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) {
+		if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
 			tcp_send_active_reset(sk, GFP_ATOMIC);
 			tcp_write_err(sk);
 			goto out;
-- 
cgit v1.2.3


From 0c7d400fafaeab6014504a6a6249f01bac7f7db4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 29 Aug 2009 20:44:04 +1000
Subject: crypto: skcipher - Fix skcipher_dequeue_givcrypt NULL test

As struct skcipher_givcrypt_request includes struct crypto_request
at a non-zero offset, testing for NULL after converting the pointer
returned by crypto_dequeue_request does not work.  This can result
in IPsec crashes when the queue is depleted.

This patch fixes it by doing the pointer conversion only when the
return value is non-NULL.  In particular, we create a new function
__crypto_dequeue_request that does the pointer conversion.

Reported-by: Brad Bosch <bradbosch@comcast.net>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c                    | 11 +++++++++--
 include/crypto/algapi.h            |  1 +
 include/crypto/internal/skcipher.h |  4 ++--
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 56c62e2858d5..df0863d56995 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -692,7 +692,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(crypto_enqueue_request);
 
-struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
+void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset)
 {
 	struct list_head *request;
 
@@ -707,7 +707,14 @@ struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
 	request = queue->list.next;
 	list_del(request);
 
-	return list_entry(request, struct crypto_async_request, list);
+	return (char *)list_entry(request, struct crypto_async_request, list) -
+	       offset;
+}
+EXPORT_SYMBOL_GPL(__crypto_dequeue_request);
+
+struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue)
+{
+	return __crypto_dequeue_request(queue, 0);
 }
 EXPORT_SYMBOL_GPL(crypto_dequeue_request);
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 010545436efa..5a2bd1cc9656 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -137,6 +137,7 @@ struct crypto_instance *crypto_alloc_instance(const char *name,
 void crypto_init_queue(struct crypto_queue *queue, unsigned int max_qlen);
 int crypto_enqueue_request(struct crypto_queue *queue,
 			   struct crypto_async_request *request);
+void *__crypto_dequeue_request(struct crypto_queue *queue, unsigned int offset);
 struct crypto_async_request *crypto_dequeue_request(struct crypto_queue *queue);
 int crypto_tfm_in_queue(struct crypto_queue *queue, struct crypto_tfm *tfm);
 
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2ba42cd7d6aa..3a748a6bf772 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -79,8 +79,8 @@ static inline int skcipher_enqueue_givcrypt(
 static inline struct skcipher_givcrypt_request *skcipher_dequeue_givcrypt(
 	struct crypto_queue *queue)
 {
-	return container_of(ablkcipher_dequeue_request(queue),
-			    struct skcipher_givcrypt_request, creq);
+	return __crypto_dequeue_request(
+		queue, offsetof(struct skcipher_givcrypt_request, creq.base));
 }
 
 static inline void *skcipher_givcrypt_reqctx(
-- 
cgit v1.2.3


From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:48:59 +0800
Subject: timers: Add tracepoints for timer_list timers

Add tracepoints which cover the timer life cycle. The tracepoints are
integrated with the already existing debug_object debug points as far
as possible.

Based on patches from
Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2
and
Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2

[ tglx: Fixed timeout value in timer_start tracepoint, massaged
  comments and made the printk's more readable ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Anton Blanchard <anton@samba.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 137 +++++++++++++++++++++++++++++++++++++++++++
 kernel/timer.c               |  32 ++++++++--
 2 files changed, 165 insertions(+), 4 deletions(-)
 create mode 100644 include/trace/events/timer.h

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
new file mode 100644
index 000000000000..725892a93b49
--- /dev/null
+++ b/include/trace/events/timer.h
@@ -0,0 +1,137 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timer
+
+#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+#include <linux/timer.h>
+
+/**
+ * timer_init - called when the timer is initialized
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_init,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_start - called when the timer is started
+ * @timer:	pointer to struct timer_list
+ * @expires:	the timers expiry time
+ */
+TRACE_EVENT(timer_start,
+
+	TP_PROTO(struct timer_list *timer, unsigned long expires),
+
+	TP_ARGS(timer, expires),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( unsigned long,	expires		)
+		__field( unsigned long,	now		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= expires;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: func %pf, expires %lu, timeout %ld",
+		  __entry->timer, __entry->function, __entry->expires,
+		  (long)__entry->expires - __entry->now)
+);
+
+/**
+ * timer_expire_entry - called immediately before the timer callback
+ * @timer:	pointer to struct timer_list
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(timer_expire_entry,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( unsigned long,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->now		= jiffies;
+	),
+
+	TP_printk("timer %p: now %lu", __entry->timer, __entry->now)
+);
+
+/**
+ * timer_expire_exit - called immediately after the timer callback returns
+ * @timer:	pointer to struct timer_list
+ *
+ * When used in combination with the timer_expire_entry tracepoint we can
+ * determine the runtime of the timer callback function.
+ *
+ * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
+ * be invalid. We solely track the pointer.
+ */
+TRACE_EVENT(timer_expire_exit,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field(void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+/**
+ * timer_cancel - called when the timer is canceled
+ * @timer:	pointer to struct timer_list
+ */
+TRACE_EVENT(timer_cancel,
+
+	TP_PROTO(struct timer_list *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("timer %p", __entry->timer)
+);
+
+#endif /*  _TRACE_TIMER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/timer.c b/kernel/timer.c
index 8e92be654dad..a7352b00703c 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -46,6 +46,9 @@
 #include <asm/timex.h>
 #include <asm/io.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/timer.h>
+
 u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
 
 EXPORT_SYMBOL(jiffies_64);
@@ -521,6 +524,25 @@ static inline void debug_timer_activate(struct timer_list *timer) { }
 static inline void debug_timer_deactivate(struct timer_list *timer) { }
 #endif
 
+static inline void debug_init(struct timer_list *timer)
+{
+	debug_timer_init(timer);
+	trace_timer_init(timer);
+}
+
+static inline void
+debug_activate(struct timer_list *timer, unsigned long expires)
+{
+	debug_timer_activate(timer);
+	trace_timer_start(timer, expires);
+}
+
+static inline void debug_deactivate(struct timer_list *timer)
+{
+	debug_timer_deactivate(timer);
+	trace_timer_cancel(timer);
+}
+
 static void __init_timer(struct timer_list *timer,
 			 const char *name,
 			 struct lock_class_key *key)
@@ -549,7 +571,7 @@ void init_timer_key(struct timer_list *timer,
 		    const char *name,
 		    struct lock_class_key *key)
 {
-	debug_timer_init(timer);
+	debug_init(timer);
 	__init_timer(timer, name, key);
 }
 EXPORT_SYMBOL(init_timer_key);
@@ -568,7 +590,7 @@ static inline void detach_timer(struct timer_list *timer,
 {
 	struct list_head *entry = &timer->entry;
 
-	debug_timer_deactivate(timer);
+	debug_deactivate(timer);
 
 	__list_del(entry->prev, entry->next);
 	if (clear_pending)
@@ -632,7 +654,7 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
 			goto out_unlock;
 	}
 
-	debug_timer_activate(timer);
+	debug_activate(timer, expires);
 
 	new_base = __get_cpu_var(tvec_bases);
 
@@ -787,7 +809,7 @@ void add_timer_on(struct timer_list *timer, int cpu)
 	BUG_ON(timer_pending(timer) || !timer->function);
 	spin_lock_irqsave(&base->lock, flags);
 	timer_set_base(timer, base);
-	debug_timer_activate(timer);
+	debug_activate(timer, timer->expires);
 	if (time_before(timer->expires, base->next_timer) &&
 	    !tbase_get_deferrable(timer->base))
 		base->next_timer = timer->expires;
@@ -1000,7 +1022,9 @@ static inline void __run_timers(struct tvec_base *base)
 				 */
 				lock_map_acquire(&lockdep_map);
 
+				trace_timer_expire_entry(timer);
 				fn(data);
+				trace_timer_expire_exit(timer);
 
 				lock_map_release(&lockdep_map);
 
-- 
cgit v1.2.3


From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:51:23 +0800
Subject: hrtimer: Add tracepoint for hrtimers

Add tracepoints which cover the life cycle of a hrtimer. The
tracepoints are integrated with the already existing debug_object
debug points as far as possible.

[ tglx: Fixed comments, made output conistent, easier to read and
  	parse. Fixed output for 32bit archs which do not use the
  	scalar representation of ktime_t. Hand current time to
  	trace_hrtimer_expiry_entry instead of calling get_time()
  	inside of the trace assignment. ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 139 +++++++++++++++++++++++++++++++++++++++++++
 kernel/hrtimer.c             |  40 ++++++++++---
 2 files changed, 171 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 725892a93b49..df3c07fa0cb8 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -5,6 +5,7 @@
 #define _TRACE_TIMER_H
 
 #include <linux/tracepoint.h>
+#include <linux/hrtimer.h>
 #include <linux/timer.h>
 
 /**
@@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel,
 	TP_printk("timer %p", __entry->timer)
 );
 
+/**
+ * hrtimer_init - called when the hrtimer is initialized
+ * @timer:	pointer to struct hrtimer
+ * @clockid:	the hrtimers clock
+ * @mode:	the hrtimers mode
+ */
+TRACE_EVENT(hrtimer_init,
+
+	TP_PROTO(struct hrtimer *timer, clockid_t clockid,
+		 enum hrtimer_mode mode),
+
+	TP_ARGS(timer, clockid, mode),
+
+	TP_STRUCT__entry(
+		__field( void *,		timer		)
+		__field( clockid_t,		clockid		)
+		__field( enum hrtimer_mode,	mode		)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->clockid	= clockid;
+		__entry->mode		= mode;
+	),
+
+	TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer,
+		  __entry->clockid == CLOCK_REALTIME ?
+			"CLOCK_REALTIME" : "CLOCK_MONOTONIC",
+		  __entry->mode == HRTIMER_MODE_ABS ?
+			"HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL")
+);
+
+/**
+ * hrtimer_start - called when the hrtimer is started
+ * @timer: pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_start,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer		)
+		__field( void *,	function	)
+		__field( s64,		expires		)
+		__field( s64,		softexpires	)
+	),
+
+	TP_fast_assign(
+		__entry->timer		= timer;
+		__entry->function	= timer->function;
+		__entry->expires	= hrtimer_get_expires(timer).tv64;
+		__entry->softexpires	= hrtimer_get_softexpires(timer).tv64;
+	),
+
+	TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu",
+		  __entry->timer, __entry->function,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->expires }),
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->softexpires }))
+);
+
+/**
+ * htimmer_expire_entry - called immediately before the hrtimer callback
+ * @timer:	pointer to struct hrtimer
+ * @now:	pointer to variable which contains current time of the
+ *		timers base.
+ *
+ * Allows to determine the timer latency.
+ */
+TRACE_EVENT(hrtimer_expire_entry,
+
+	TP_PROTO(struct hrtimer *timer, ktime_t *now),
+
+	TP_ARGS(timer, now),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+		__field( s64,		now	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+		__entry->now	= now->tv64;
+	),
+
+	TP_printk("hrtimer %p, now %llu", __entry->timer,
+		  (unsigned long long)ktime_to_ns((ktime_t) {
+				  .tv64 = __entry->now }))
+ );
+
+/**
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer:	pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
+ */
+TRACE_EVENT(hrtimer_expire_exit,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @timer:	pointer to struct hrtimer
+ */
+TRACE_EVENT(hrtimer_cancel,
+
+	TP_PROTO(struct hrtimer *timer),
+
+	TP_ARGS(timer),
+
+	TP_STRUCT__entry(
+		__field( void *,	timer	)
+	),
+
+	TP_fast_assign(
+		__entry->timer	= timer;
+	),
+
+	TP_printk("hrtimer %p", __entry->timer)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index e2f91ecc01a8..b44d1b07377b 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -48,6 +48,8 @@
 
 #include <asm/uaccess.h>
 
+#include <trace/events/timer.h>
+
 /*
  * The timer bases:
  *
@@ -441,6 +443,26 @@ static inline void debug_hrtimer_activate(struct hrtimer *timer) { }
 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { }
 #endif
 
+static inline void
+debug_init(struct hrtimer *timer, clockid_t clockid,
+	   enum hrtimer_mode mode)
+{
+	debug_hrtimer_init(timer);
+	trace_hrtimer_init(timer, clockid, mode);
+}
+
+static inline void debug_activate(struct hrtimer *timer)
+{
+	debug_hrtimer_activate(timer);
+	trace_hrtimer_start(timer);
+}
+
+static inline void debug_deactivate(struct hrtimer *timer)
+{
+	debug_hrtimer_deactivate(timer);
+	trace_hrtimer_cancel(timer);
+}
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -797,7 +819,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
 	struct hrtimer *entry;
 	int leftmost = 1;
 
-	debug_hrtimer_activate(timer);
+	debug_activate(timer);
 
 	/*
 	 * Find the right place in the rbtree:
@@ -883,7 +905,7 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base)
 		 * reprogramming happens in the interrupt handler. This is a
 		 * rare case and less expensive than a smp call.
 		 */
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 		timer_stats_hrtimer_clear_start_info(timer);
 		reprogram = base->cpu_base == &__get_cpu_var(hrtimer_bases);
 		__remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE,
@@ -1116,7 +1138,7 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
 		  enum hrtimer_mode mode)
 {
-	debug_hrtimer_init(timer);
+	debug_init(timer, clock_id, mode);
 	__hrtimer_init(timer, clock_id, mode);
 }
 EXPORT_SYMBOL_GPL(hrtimer_init);
@@ -1140,7 +1162,7 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp)
 }
 EXPORT_SYMBOL_GPL(hrtimer_get_res);
 
-static void __run_hrtimer(struct hrtimer *timer)
+static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
 {
 	struct hrtimer_clock_base *base = timer->base;
 	struct hrtimer_cpu_base *cpu_base = base->cpu_base;
@@ -1149,7 +1171,7 @@ static void __run_hrtimer(struct hrtimer *timer)
 
 	WARN_ON(!irqs_disabled());
 
-	debug_hrtimer_deactivate(timer);
+	debug_deactivate(timer);
 	__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
 	timer_stats_account_hrtimer(timer);
 	fn = timer->function;
@@ -1160,7 +1182,9 @@ static void __run_hrtimer(struct hrtimer *timer)
 	 * the timer base.
 	 */
 	spin_unlock(&cpu_base->lock);
+	trace_hrtimer_expire_entry(timer, now);
 	restart = fn(timer);
+	trace_hrtimer_expire_exit(timer);
 	spin_lock(&cpu_base->lock);
 
 	/*
@@ -1271,7 +1295,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
 				break;
 			}
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &basenow);
 		}
 		base++;
 	}
@@ -1393,7 +1417,7 @@ void hrtimer_run_queues(void)
 					hrtimer_get_expires_tv64(timer))
 				break;
 
-			__run_hrtimer(timer);
+			__run_hrtimer(timer, &base->softirq_time);
 		}
 		spin_unlock(&cpu_base->lock);
 	}
@@ -1569,7 +1593,7 @@ static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base,
 	while ((node = rb_first(&old_base->active))) {
 		timer = rb_entry(node, struct hrtimer, node);
 		BUG_ON(hrtimer_callback_running(timer));
-		debug_hrtimer_deactivate(timer);
+		debug_deactivate(timer);
 
 		/*
 		 * Mark it as STATE_MIGRATE not INACTIVE otherwise the
-- 
cgit v1.2.3


From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Mon, 10 Aug 2009 10:52:30 +0800
Subject: itimers: Add tracepoints for itimer

Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL
and ITIMER_PROF.

[ tglx: Fixed comments and made the output more readable, parseable
  	and consistent. Replaced pid_vnr by pid_nr because the hrtimer
  	callback can happen in any namespace ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Anton Blanchard <anton@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/itimer.c              |  5 ++++
 kernel/posix-cpu-timers.c    |  7 ++++-
 3 files changed, 77 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index df3c07fa0cb8..1844c48d640e 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel,
 	TP_printk("hrtimer %p", __entry->timer)
 );
 
+/**
+ * itimer_state - called when itimer is started or canceled
+ * @which:	name of the interval timer
+ * @value:	the itimers value, itimer is canceled if value->it_value is
+ *		zero, otherwise it is started
+ * @expires:	the itimers expiry time
+ */
+TRACE_EVENT(itimer_state,
+
+	TP_PROTO(int which, const struct itimerval *const value,
+		 cputime_t expires),
+
+	TP_ARGS(which, value, expires),
+
+	TP_STRUCT__entry(
+		__field(	int,		which		)
+		__field(	cputime_t,	expires		)
+		__field(	long,		value_sec	)
+		__field(	long,		value_usec	)
+		__field(	long,		interval_sec	)
+		__field(	long,		interval_usec	)
+	),
+
+	TP_fast_assign(
+		__entry->which		= which;
+		__entry->expires	= expires;
+		__entry->value_sec	= value->it_value.tv_sec;
+		__entry->value_usec	= value->it_value.tv_usec;
+		__entry->interval_sec	= value->it_interval.tv_sec;
+		__entry->interval_usec	= value->it_interval.tv_usec;
+	),
+
+	TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu",
+		  __entry->which, __entry->expires,
+		  __entry->value_sec, __entry->value_usec,
+		  __entry->interval_sec, __entry->interval_usec)
+);
+
+/**
+ * itimer_expire - called when itimer expires
+ * @which:	type of the interval timer
+ * @pid:	pid of the process which owns the timer
+ * @now:	current time, used to calculate the latency of itimer
+ */
+TRACE_EVENT(itimer_expire,
+
+	TP_PROTO(int which, struct pid *pid, cputime_t now),
+
+	TP_ARGS(which, pid, now),
+
+	TP_STRUCT__entry(
+		__field( int ,		which	)
+		__field( pid_t,		pid	)
+		__field( cputime_t,	now	)
+	),
+
+	TP_fast_assign(
+		__entry->which	= which;
+		__entry->now	= now;
+		__entry->pid	= pid_nr(pid);
+	),
+
+	    TP_printk("which %d, pid %d, now %lu", __entry->which,
+		      (int) __entry->pid, __entry->now)
+);
+
 #endif /*  _TRACE_TIMER_H */
 
 /* This part must be outside protection */
diff --git a/kernel/itimer.c b/kernel/itimer.c
index 8078a32d3b10..b03451ede528 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -12,6 +12,7 @@
 #include <linux/time.h>
 #include <linux/posix-timers.h>
 #include <linux/hrtimer.h>
+#include <trace/events/timer.h>
 
 #include <asm/uaccess.h>
 
@@ -122,6 +123,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
 	struct signal_struct *sig =
 		container_of(timer, struct signal_struct, real_timer);
 
+	trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
 	kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
 
 	return HRTIMER_NORESTART;
@@ -166,6 +168,8 @@ static void set_cpu_itimer(struct task_struct *tsk, unsigned int clock_id,
 	}
 	it->expires = nval;
 	it->incr = ninterval;
+	trace_itimer_state(clock_id == CPUCLOCK_VIRT ?
+			   ITIMER_VIRTUAL : ITIMER_PROF, value, nval);
 
 	spin_unlock_irq(&tsk->sighand->siglock);
 
@@ -217,6 +221,7 @@ again:
 		} else
 			tsk->signal->it_real_incr.tv64 = 0;
 
+		trace_itimer_state(ITIMER_REAL, value, 0);
 		spin_unlock_irq(&tsk->sighand->siglock);
 		break;
 	case ITIMER_VIRTUAL:
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 12161f74744e..5c9dc228747b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -8,6 +8,7 @@
 #include <linux/math64.h>
 #include <asm/uaccess.h>
 #include <linux/kernel_stat.h>
+#include <trace/events/timer.h>
 
 /*
  * Called after updating RLIMIT_CPU to set timer expiration if necessary.
@@ -1090,9 +1091,13 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
 							  cputime_one_jiffy);
 				it->error -= onecputick;
 			}
-		} else
+		} else {
 			it->expires = cputime_zero;
+		}
 
+		trace_itimer_expire(signo == SIGPROF ?
+				    ITIMER_PROF : ITIMER_VIRTUAL,
+				    tsk->signal->leader_pid, cur_time);
 		__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
 	}
 
-- 
cgit v1.2.3


From dd5d19bafd90d33043a4a14b2e2d98612caa293c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 27 Aug 2009 14:58:16 -0700
Subject: rcu: Create rcutree plugins to handle hotplug CPU for multi-level
 trees

When offlining CPUs from a multi-level tree, there is the
possibility of offlining the last CPU from a given node when
there are preempted RCU read-side critical sections that
started life on one of the CPUs on that node.

In this case, the corresponding tasks will be enqueued via the
task_struct's rcu_node_entry list_head onto one of the
rcu_node's blocked_tasks[] lists.  These tasks need to be moved
somewhere else so that they will prevent the current grace
period from ending. That somewhere is the root rcu_node.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josht@linux.vnet.ibm.com
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <20090827215816.GA30472@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/init_task.h |  2 +-
 include/linux/sched.h     |  4 +--
 kernel/rcutree.c          |  2 ++
 kernel/rcutree_plugin.h   | 69 +++++++++++++++++++++++++++++++++++++++++++----
 4 files changed, 69 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 79d4baee31b6..9e7f2e8fc66e 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -98,7 +98,7 @@ extern struct group_info init_groups;
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
 	.rcu_read_lock_nesting = 0,					\
 	.rcu_read_unlock_special = 0,					\
-	.rcu_blocked_cpu = -1,						\
+	.rcu_blocked_node = NULL,					\
 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bfca26d63b13..3fe03151a8e6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1208,7 +1208,7 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
-	int rcu_blocked_cpu;
+	void *rcu_blocked_node;
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
@@ -1735,7 +1735,7 @@ static inline void rcu_copy_process(struct task_struct *p)
 {
 	p->rcu_read_lock_nesting = 0;
 	p->rcu_read_unlock_special = 0;
-	p->rcu_blocked_cpu = -1;
+	p->rcu_blocked_node = NULL;
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 }
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index fee6316a8673..d903e2f2b840 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -81,6 +81,7 @@ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
 extern long rcu_batches_completed_sched(void);
+static struct rcu_node *rcu_get_root(struct rcu_state *rsp);
 static void cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp,
 			  struct rcu_node *rnp, unsigned long flags);
 static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags);
@@ -876,6 +877,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
 			spin_unlock(&rnp->lock); /* irqs remain disabled. */
 			break;
 		}
+		rcu_preempt_offline_tasks(rsp, rnp);
 		mask = rnp->grpmask;
 		spin_unlock(&rnp->lock);	/* irqs remain disabled. */
 		rnp = rnp->parent;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 201334cdc200..04343bee646d 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu)
 		rnp = rdp->mynode;
 		spin_lock(&rnp->lock);
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
-		t->rcu_blocked_cpu = cpu;
+		t->rcu_blocked_node = (void *)rnp;
 
 		/*
 		 * If this CPU has already checked in, then this task
@@ -170,12 +170,21 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	if (special & RCU_READ_UNLOCK_BLOCKED) {
 		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
 
-		/* Remove this task from the list it blocked on. */
-		rnp = rcu_preempt_state.rda[t->rcu_blocked_cpu]->mynode;
-		spin_lock(&rnp->lock);
+		/*
+		 * Remove this task from the list it blocked on.  The
+		 * task can migrate while we acquire the lock, but at
+		 * most one time.  So at most two passes through loop.
+		 */
+		for (;;) {
+			rnp = (struct rcu_node *)t->rcu_blocked_node;
+			spin_lock(&rnp->lock);
+			if (rnp == (struct rcu_node *)t->rcu_blocked_node)
+				break;
+			spin_unlock(&rnp->lock);
+		}
 		empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
 		list_del_init(&t->rcu_node_entry);
-		t->rcu_blocked_cpu = -1;
+		t->rcu_blocked_node = NULL;
 
 		/*
 		 * If this was the last task on the current list, and if
@@ -261,6 +270,47 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
+/*
+ * Handle tasklist migration for case in which all CPUs covered by the
+ * specified rcu_node have gone offline.  Move them up to the root
+ * rcu_node.  The reason for not just moving them to the immediate
+ * parent is to remove the need for rcu_read_unlock_special() to
+ * make more than two attempts to acquire the target rcu_node's lock.
+ *
+ * The caller must hold rnp->lock with irqs disabled.
+ */
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+				      struct rcu_node *rnp)
+{
+	int i;
+	struct list_head *lp;
+	struct list_head *lp_root;
+	struct rcu_node *rnp_root = rcu_get_root(rsp);
+	struct task_struct *tp;
+
+	if (rnp == rnp_root)
+		return;  /* Shouldn't happen: at least one CPU online. */
+
+	/*
+	 * Move tasks up to root rcu_node.  Rely on the fact that the
+	 * root rcu_node can be at most one ahead of the rest of the
+	 * rcu_nodes in terms of gp_num value.  This fact allows us to
+	 * move the blocked_tasks[] array directly, element by element.
+	 */
+	for (i = 0; i < 2; i++) {
+		lp = &rnp->blocked_tasks[i];
+		lp_root = &rnp_root->blocked_tasks[i];
+		while (!list_empty(lp)) {
+			tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
+			spin_lock(&rnp_root->lock); /* irqs already disabled */
+			list_del(&tp->rcu_node_entry);
+			tp->rcu_blocked_node = rnp_root;
+			list_add(&tp->rcu_node_entry, lp_root);
+			spin_unlock(&rnp_root->lock); /* irqs remain disabled */
+		}
+	}
+}
+
 /*
  * Do CPU-offline processing for preemptable RCU.
  */
@@ -409,6 +459,15 @@ static int rcu_preempted_readers(struct rcu_node *rnp)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
+/*
+ * Because preemptable RCU does not exist, it never needs to migrate
+ * tasks that were blocked within RCU read-side critical sections.
+ */
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+				      struct rcu_node *rnp)
+{
+}
+
 /*
  * Because preemptable RCU does not exist, it never needs CPU-offline
  * processing.
-- 
cgit v1.2.3


From 868489660dabc0c28087cca3dbc1adbbc398c6fe Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 27 Aug 2009 15:00:12 -0700
Subject: rcu: Changes from reviews: avoid casts, fix/add warnings, improve
 comments

Changes suggested by review comments from Josh Triplett and
Mathieu Desnoyers.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Josh Triplett <josh@joshtriplett.org>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
LKML-Reference: <20090827220012.GA30525@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  4 +++-
 kernel/rcutree.c        | 13 ++++++-------
 kernel/rcutree.h        |  2 ++
 kernel/rcutree_plugin.h | 10 ++++++----
 4 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3fe03151a8e6..855fd0d3f174 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1163,6 +1163,8 @@ struct sched_rt_entity {
 #endif
 };
 
+struct rcu_node;
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
@@ -1208,7 +1210,7 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	int rcu_read_lock_nesting;
 	char rcu_read_unlock_special;
-	void *rcu_blocked_node;
+	struct rcu_node *rcu_blocked_node;
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d903e2f2b840..71bc79791cd9 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -229,7 +229,6 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
 #endif /* #ifdef CONFIG_SMP */
 
 #ifdef CONFIG_NO_HZ
-static DEFINE_RATELIMIT_STATE(rcu_rs, 10 * HZ, 5);
 
 /**
  * rcu_enter_nohz - inform RCU that current CPU is entering nohz
@@ -249,7 +248,7 @@ void rcu_enter_nohz(void)
 	rdtp = &__get_cpu_var(rcu_dynticks);
 	rdtp->dynticks++;
 	rdtp->dynticks_nesting--;
-	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+	WARN_ON_ONCE(rdtp->dynticks & 0x1);
 	local_irq_restore(flags);
 }
 
@@ -268,7 +267,7 @@ void rcu_exit_nohz(void)
 	rdtp = &__get_cpu_var(rcu_dynticks);
 	rdtp->dynticks++;
 	rdtp->dynticks_nesting++;
-	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
 	local_irq_restore(flags);
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 }
@@ -287,7 +286,7 @@ void rcu_nmi_enter(void)
 	if (rdtp->dynticks & 0x1)
 		return;
 	rdtp->dynticks_nmi++;
-	WARN_ON_RATELIMIT(!(rdtp->dynticks_nmi & 0x1), &rcu_rs);
+	WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1));
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 }
 
@@ -306,7 +305,7 @@ void rcu_nmi_exit(void)
 		return;
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	rdtp->dynticks_nmi++;
-	WARN_ON_RATELIMIT(rdtp->dynticks_nmi & 0x1, &rcu_rs);
+	WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1);
 }
 
 /**
@@ -322,7 +321,7 @@ void rcu_irq_enter(void)
 	if (rdtp->dynticks_nesting++)
 		return;
 	rdtp->dynticks++;
-	WARN_ON_RATELIMIT(!(rdtp->dynticks & 0x1), &rcu_rs);
+	WARN_ON_ONCE(!(rdtp->dynticks & 0x1));
 	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
 }
 
@@ -341,7 +340,7 @@ void rcu_irq_exit(void)
 		return;
 	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
 	rdtp->dynticks++;
-	WARN_ON_RATELIMIT(rdtp->dynticks & 0x1, &rcu_rs);
+	WARN_ON_ONCE(rdtp->dynticks & 0x1);
 
 	/* If the interrupt queued a callback, get out of dyntick mode. */
 	if (__get_cpu_var(rcu_sched_data).nxtlist ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index ca560364d8cd..bf8a6f9f134d 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -81,6 +81,8 @@ struct rcu_dynticks {
 struct rcu_node {
 	spinlock_t lock;
 	long	gpnum;		/* Current grace period for this node. */
+				/*  This will either be equal to or one */
+				/*  behind the root rcu_node's gpnum. */
 	unsigned long qsmask;	/* CPUs or groups that need to switch in */
 				/*  order for current grace period to proceed.*/
 	unsigned long qsmaskinit;
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 04343bee646d..47789369ea59 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -92,7 +92,7 @@ static void rcu_preempt_qs(int cpu)
 		rnp = rdp->mynode;
 		spin_lock(&rnp->lock);
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
-		t->rcu_blocked_node = (void *)rnp;
+		t->rcu_blocked_node = rnp;
 
 		/*
 		 * If this CPU has already checked in, then this task
@@ -176,9 +176,9 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		 * most one time.  So at most two passes through loop.
 		 */
 		for (;;) {
-			rnp = (struct rcu_node *)t->rcu_blocked_node;
+			rnp = t->rcu_blocked_node;
 			spin_lock(&rnp->lock);
-			if (rnp == (struct rcu_node *)t->rcu_blocked_node)
+			if (rnp == t->rcu_blocked_node)
 				break;
 			spin_unlock(&rnp->lock);
 		}
@@ -288,8 +288,10 @@ static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
 	struct rcu_node *rnp_root = rcu_get_root(rsp);
 	struct task_struct *tp;
 
-	if (rnp == rnp_root)
+	if (rnp == rnp_root) {
+		WARN_ONCE(1, "Last CPU thought to be offlined?");
 		return;  /* Shouldn't happen: at least one CPU online. */
+	}
 
 	/*
 	 * Move tasks up to root rcu_node.  Rely on the fact that the
-- 
cgit v1.2.3


From 5bfb5b51382f4bd01d9ced11503264d2cc74fe41 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 26 Aug 2009 16:20:48 -0700
Subject: irq: Add irq_node() primitive

... to return irq_desc node info without #ifdefs at the callsites.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <4A95C350.8060308@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/irqnr.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index ec87b212ff7d..7bf89bc8cbca 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -41,6 +41,12 @@ extern struct irq_desc *irq_to_desc(unsigned int irq);
 			;						\
 		else
 
+#ifdef CONFIG_SMP
+#define irq_node(irq)	(irq_to_desc(irq)->node)
+#else
+#define irq_node(irq)	0
+#endif
+
 #endif /* CONFIG_GENERIC_HARDIRQS */
 
 #define for_each_irq_nr(irq)                   \
-- 
cgit v1.2.3


From 138d15692bf76841f252d4b836a535cf5f9154e9 Mon Sep 17 00:00:00 2001
From: Alexey Starikovskiy <astarikovskiy@suse.de>
Date: Fri, 28 Aug 2009 23:29:38 +0400
Subject: ACPICA: Don't switch task then not allowed

Signed-off-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index fcb8e4b159b1..9d7febde10a1 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -149,10 +149,10 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #define ACPI_FREE(a)            kfree(a)
 
 /* Used within ACPICA to show where it is safe to preempt execution */
-
+#include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!irqs_disabled()) \
+		if (!in_atomic_preempt_off()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v1.2.3


From 2befdcea96fcd9a13e94373c66ea1dd7365d2a74 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Fri, 28 Aug 2009 12:28:45 +0000
Subject: tg3: Add new 5785 10/100 only device ID

This patch adds a new device ID for those 5785 devices that will only
use 10/100 phys.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c       | 3 ++-
 drivers/net/tg3.h       | 2 ++
 include/linux/pci_ids.h | 1 -
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index ab3159ef4c56..d43b30b80f1e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -219,7 +219,8 @@ static struct pci_device_id tg3_pci_tbl[] = {
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761S)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5761SE)},
-	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5785)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_G)},
+	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_5785_F)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57780)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57760)},
 	{PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, TG3PCI_DEVICE_TIGON3_57790)},
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 60b12ab79334..1c9495df67ce 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -44,6 +44,8 @@
 #define  TG3PCI_DEVICE_TIGON3_57760	 0x1690
 #define  TG3PCI_DEVICE_TIGON3_57790	 0x1694
 #define  TG3PCI_DEVICE_TIGON3_57788	 0x1691
+#define  TG3PCI_DEVICE_TIGON3_5785_G	 0x1699 /* GPHY */
+#define  TG3PCI_DEVICE_TIGON3_5785_F	 0x16a0 /* 10/100 only */
 /* 0x04 --> 0x64 unused */
 #define TG3PCI_MSI_DATA			0x00000064
 /* 0x66 --> 0x68 unused */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index fdc3110c90c0..85492546d33d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2064,7 +2064,6 @@
 #define PCI_DEVICE_ID_TIGON3_5787M	0x1693
 #define PCI_DEVICE_ID_TIGON3_5782	0x1696
 #define PCI_DEVICE_ID_TIGON3_5784	0x1698
-#define PCI_DEVICE_ID_TIGON3_5785	0x1699
 #define PCI_DEVICE_ID_TIGON3_5786	0x169a
 #define PCI_DEVICE_ID_TIGON3_5787	0x169b
 #define PCI_DEVICE_ID_TIGON3_5788	0x169c
-- 
cgit v1.2.3


From ad283ea4a3ce82cda2efe33163748a397b31b1eb Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 29 Aug 2009 19:09:26 -0700
Subject: async_tx: add sum check flags

Replace the flat zero_sum_result with a collection of flags to contain
the P (xor) zero-sum result, and the soon to be utilized Q (raid6 reed
solomon syndrome) zero-sum result.  Use the SUM_CHECK_ namespace instead
of DMA_ since these flags will be used on non-dma-zero-sum enabled
platforms.

Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/include/asm/hardware/iop3xx-adma.h |  5 +++--
 arch/arm/mach-iop13xx/include/mach/adma.h   | 12 +++++++-----
 crypto/async_tx/async_xor.c                 |  4 ++--
 drivers/md/raid5.c                          |  2 +-
 drivers/md/raid5.h                          |  5 +++--
 include/linux/async_tx.h                    |  2 +-
 include/linux/dmaengine.h                   | 21 ++++++++++++++++++++-
 7 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h
index 83e6ba338e2c..26eefea02314 100644
--- a/arch/arm/include/asm/hardware/iop3xx-adma.h
+++ b/arch/arm/include/asm/hardware/iop3xx-adma.h
@@ -756,13 +756,14 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->src[0] = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 
 	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-	return desc_ctrl.zero_result_err;
+	return desc_ctrl.zero_result_err << SUM_CHECK_P;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h
index 5722e86f2174..1cd31df8924d 100644
--- a/arch/arm/mach-iop13xx/include/mach/adma.h
+++ b/arch/arm/mach-iop13xx/include/mach/adma.h
@@ -428,18 +428,20 @@ static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
 	hw_desc->block_fill_data = val;
 }
 
-static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
+static inline enum sum_check_flags
+iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
 	struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
+	enum sum_check_flags flags;
 
 	BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
 
-	if (desc_ctrl.pq_xfer_en)
-		return byte_count.zero_result_err_q;
-	else
-		return byte_count.zero_result_err;
+	flags = byte_count.zero_result_err_q << SUM_CHECK_Q;
+	flags |= byte_count.zero_result_err << SUM_CHECK_P;
+
+	return flags;
 }
 
 static inline void iop_chan_append(struct iop_adma_chan *chan)
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 1e96c4df7061..78fb7780272a 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -246,7 +246,7 @@ static int page_is_zero(struct page *p, unsigned int offset, size_t len)
  */
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit)
 {
 	struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR_VAL,
@@ -304,7 +304,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 
 		async_tx_quiesce(&tx);
 
-		*result = page_is_zero(dest, offset, len) ? 0 : 1;
+		*result = !page_is_zero(dest, offset, len) << SUM_CHECK_P;
 
 		async_tx_sync_epilog(submit);
 		submit->flags = flags_orig;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 7727954cf726..1f2a266f3cf7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2590,7 +2590,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
 		 * we are done.  Otherwise update the mismatch count and repair
 		 * parity if !MD_RECOVERY_CHECK
 		 */
-		if (sh->ops.zero_sum_result == 0)
+		if ((sh->ops.zero_sum_result & SUM_CHECK_P_RESULT) == 0)
 			/* parity is correct (on disc,
 			 * not in buffer any more)
 			 */
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index e7baabffee86..75f2c6c4cf90 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -2,6 +2,7 @@
 #define _RAID5_H
 
 #include <linux/raid/xor.h>
+#include <linux/dmaengine.h>
 
 /*
  *
@@ -215,8 +216,8 @@ struct stripe_head {
 	 * @target - STRIPE_OP_COMPUTE_BLK target
 	 */
 	struct stripe_operations {
-		int		   target;
-		u32		   zero_sum_result;
+		int		     target;
+		enum sum_check_flags zero_sum_result;
 	} ops;
 	struct r5dev {
 		struct bio	req;
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 00cfb637ddf2..3d21a2517518 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -148,7 +148,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 
 struct dma_async_tx_descriptor *
 async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
-	      int src_cnt, size_t len, u32 *result,
+	      int src_cnt, size_t len, enum sum_check_flags *result,
 	      struct async_submit_ctl *submit);
 
 struct dma_async_tx_descriptor *
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 6768727d00d7..02447afcebad 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -86,6 +86,25 @@ enum dma_ctrl_flags {
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
 };
 
+/**
+ * enum sum_check_bits - bit position of pq_check_flags
+ */
+enum sum_check_bits {
+	SUM_CHECK_P = 0,
+	SUM_CHECK_Q = 1,
+};
+
+/**
+ * enum pq_check_flags - result of async_{xor,pq}_zero_sum operations
+ * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
+ * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
+ */
+enum sum_check_flags {
+	SUM_CHECK_P_RESULT = (1 << SUM_CHECK_P),
+	SUM_CHECK_Q_RESULT = (1 << SUM_CHECK_Q),
+};
+
+
 /**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
@@ -245,7 +264,7 @@ struct dma_device {
 		unsigned int src_cnt, size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
-		size_t len, u32 *result, unsigned long flags);
+		size_t len, enum sum_check_flags *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
-- 
cgit v1.2.3


From 95475e57113c66aac7583925736ed2e2d58c990d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:19:02 -0700
Subject: async_tx: remove walk of tx->parent chain in dma_wait_for_async_tx

We currently walk the parent chain when waiting for a given tx to
complete however this walk may race with the driver cleanup routine.
The routines in async_raid6_recov.c may fall back to the synchronous
path at any point so we need to be prepared to call async_tx_quiesce()
(which calls  dma_wait_for_async_tx).  To remove the ->parent walk we
guarantee that every time a dependency is attached ->issue_pending() is
invoked, then we can simply poll the initial descriptor until
completion.

This also allows for a lighter weight 'issue pending' implementation as
there is no longer a requirement to iterate through all the channels'
->issue_pending() routines as long as operations have been submitted in
an ordered chain.  async_tx_issue_pending() is added for this case.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c | 13 +++++++------
 drivers/dma/dmaengine.c    | 45 ++++++++++-----------------------------------
 include/linux/async_tx.h   | 23 +++++++++++++++++++++++
 3 files changed, 40 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 6e37ad3f4417..60615fedcf5e 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -77,8 +77,8 @@ static void
 async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			struct dma_async_tx_descriptor *tx)
 {
-	struct dma_chan *chan;
-	struct dma_device *device;
+	struct dma_chan *chan = depend_tx->chan;
+	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
 	/* first check to see if we can still append to depend_tx */
@@ -90,11 +90,11 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	}
 	spin_unlock_bh(&depend_tx->lock);
 
-	if (!intr_tx)
+	/* attached dependency, flush the parent channel */
+	if (!intr_tx) {
+		device->device_issue_pending(chan);
 		return;
-
-	chan = depend_tx->chan;
-	device = chan->device;
+	}
 
 	/* see if we can schedule an interrupt
 	 * otherwise poll for completion
@@ -128,6 +128,7 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 			intr_tx->tx_submit(intr_tx);
 			async_tx_ack(intr_tx);
 		}
+		device->device_issue_pending(chan);
 	} else {
 		if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
 			panic("%s: DMA_ERROR waiting for depend_tx\n",
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6781e8f3c064..e002e0e0d055 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -934,49 +934,24 @@ EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
 /* dma_wait_for_async_tx - spin wait for a transaction to complete
  * @tx: in-flight transaction to wait on
- *
- * This routine assumes that tx was obtained from a call to async_memcpy,
- * async_xor, async_memset, etc which ensures that tx is "in-flight" (prepped
- * and submitted).  Walking the parent chain is only meant to cover for DMA
- * drivers that do not implement the DMA_INTERRUPT capability and may race with
- * the driver's descriptor cleanup routine.
  */
 enum dma_status
 dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
 {
-	enum dma_status status;
-	struct dma_async_tx_descriptor *iter;
-	struct dma_async_tx_descriptor *parent;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
 
 	if (!tx)
 		return DMA_SUCCESS;
 
-	WARN_ONCE(tx->parent, "%s: speculatively walking dependency chain for"
-		  " %s\n", __func__, dma_chan_name(tx->chan));
-
-	/* poll through the dependency chain, return when tx is complete */
-	do {
-		iter = tx;
-
-		/* find the root of the unsubmitted dependency chain */
-		do {
-			parent = iter->parent;
-			if (!parent)
-				break;
-			else
-				iter = parent;
-		} while (parent);
-
-		/* there is a small window for ->parent == NULL and
-		 * ->cookie == -EBUSY
-		 */
-		while (iter->cookie == -EBUSY)
-			cpu_relax();
-
-		status = dma_sync_wait(iter->chan, iter->cookie);
-	} while (status == DMA_IN_PROGRESS || (iter != tx));
-
-	return status;
+	while (tx->cookie == -EBUSY) {
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			pr_err("%s timeout waiting for descriptor submission\n",
+				__func__);
+			return DMA_ERROR;
+		}
+		cpu_relax();
+	}
+	return dma_sync_wait(tx->chan, tx->cookie);
 }
 EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 3d21a2517518..12a2efcbd565 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -83,6 +83,24 @@ struct async_submit_ctl {
 
 #ifdef CONFIG_DMA_ENGINE
 #define async_tx_issue_pending_all dma_issue_pending_all
+
+/**
+ * async_tx_issue_pending - send pending descriptor to the hardware channel
+ * @tx: descriptor handle to retrieve hardware context
+ *
+ * Note: any dependent operations will have already been issued by
+ * async_tx_channel_switch, or (in the case of no channel switch) will
+ * be already pending on this channel.
+ */
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	if (likely(tx)) {
+		struct dma_chan *chan = tx->chan;
+		struct dma_device *dma = chan->device;
+
+		dma->device_issue_pending(chan);
+	}
+}
 #ifdef CONFIG_ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 #include <asm/async_tx.h>
 #else
@@ -98,6 +116,11 @@ static inline void async_tx_issue_pending_all(void)
 	do { } while (0);
 }
 
+static inline void async_tx_issue_pending(struct dma_async_tx_descriptor *tx)
+{
+	do { } while (0);
+}
+
 static inline struct dma_chan *
 async_tx_find_channel(struct async_submit_ctl *submit,
 		      enum dma_transaction_type tx_type, struct page **dst,
-- 
cgit v1.2.3


From b2f46fd8ef3dff2ab30f31126833f78b7480283a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:36 -0700
Subject: async_tx: add support for asynchronous GF multiplication

[ Based on an original patch by Yuri Tikhonov ]

This adds support for doing asynchronous GF multiplication by adding
two additional functions to the async_tx API:

 async_gen_syndrome() does simultaneous XOR and Galois field
    multiplication of sources.

 async_syndrome_val() validates the given source buffers against known P
    and Q values.

When a request is made to run async_pq against more than the hardware
maximum number of supported sources we need to reuse the previous
generated P and Q values as sources into the next operation.  Care must
be taken to remove Q from P' and P from Q'.  For example to perform a 5
source pq op with hardware that only supports 4 sources at a time the
following approach is taken:

p, q = PQ(src0, src1, src2, src3, COEF({01}, {02}, {04}, {08}))
p', q' = PQ(p, q, q, src4, COEF({00}, {01}, {00}, {10}))

p' = p + q + q + src4 = p + src4
q' = {00}*p + {01}*q + {00}*q + {10}*src4 = q + {10}*src4

Note: 4 is the minimum acceptable maxpq otherwise we punt to
synchronous-software path.

The DMA_PREP_CONTINUE flag indicates to the driver to reuse p and q as
sources (in the above manner) and fill the remaining slots up to maxpq
with the new sources/coefficients.

Note1: Some devices have native support for P+Q continuation and can skip
this extra work.  Devices with this capability can advertise it with
dma_set_maxpq.  It is up to each driver how to handle the
DMA_PREP_CONTINUE flag.

Note2: The api supports disabling the generation of P when generating Q,
this is ignored by the synchronous path but is implemented by some dma
devices to save unnecessary writes.  In this case the continuation
algorithm is simplified to only reuse Q as a source.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Yuri Tikhonov <yur@emcraft.com>
Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   3 +
 arch/arm/mach-iop13xx/setup.c         |   2 +-
 crypto/async_tx/Kconfig               |   4 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_pq.c            | 388 ++++++++++++++++++++++++++++++++++
 crypto/async_tx/async_xor.c           |   2 +-
 drivers/dma/dmaengine.c               |   4 +
 drivers/dma/iop-adma.c                |   2 +-
 include/linux/async_tx.h              |   9 +
 include/linux/dmaengine.h             |  87 +++++++-
 10 files changed, 493 insertions(+), 9 deletions(-)
 create mode 100644 crypto/async_tx/async_pq.c

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 6b15e488c0e7..0e48e054d69a 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -64,6 +64,9 @@ xor     - xor a series of source buffers and write the result to a
 xor_val - xor a series of source buffers and set a flag if the
 	  result is zero.  The implementation attempts to prevent
 	  writes to memory
+pq	- generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val  - validate that a p and or q buffer are in sync with a given series of
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index 9800228b71d3..2e7ca0d75f8a 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -506,7 +506,7 @@ void __init iop13xx_platform_init(void)
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
+			dma_cap_set(DMA_PQ, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index d8fb39145986..cb6d7314f198 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -14,3 +14,7 @@ config ASYNC_MEMSET
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_PQ
+	tristate
+	select ASYNC_CORE
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 27baa7d52fbc..1b9926588259 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_ASYNC_CORE) += async_tx.o
 obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+obj-$(CONFIG_ASYNC_PQ) += async_pq.o
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
new file mode 100644
index 000000000000..108b21efb499
--- /dev/null
+++ b/crypto/async_tx/async_pq.c
@@ -0,0 +1,388 @@
+/*
+ * Copyright(c) 2007 Yuri Tikhonov <yur@emcraft.com>
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+/**
+ * scribble - space to hold throwaway P buffer for synchronous gen_syndrome
+ */
+static struct page *scribble;
+
+static bool is_raid6_zero_block(struct page *p)
+{
+	return p == (void *) raid6_empty_zero_page;
+}
+
+/* the struct page *blocks[] parameter passed to async_gen_syndrome()
+ * and async_syndrome_val() contains the 'P' destination address at
+ * blocks[disks-2] and the 'Q' destination address at blocks[disks-1]
+ *
+ * note: these are macros as they are used as lvalues
+ */
+#define P(b, d) (b[d-2])
+#define Q(b, d) (b[d-1])
+
+/**
+ * do_async_gen_syndrome - asynchronously calculate P and/or Q
+ */
+static __async_inline struct dma_async_tx_descriptor *
+do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
+		      const unsigned char *scfs, unsigned int offset, int disks,
+		      size_t len, dma_addr_t *dma_src,
+		      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct dma_device *dma = chan->device;
+	enum dma_ctrl_flags dma_flags = 0;
+	enum async_tx_flags flags_orig = submit->flags;
+	dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+	dma_async_tx_callback cb_param_orig = submit->cb_param;
+	int src_cnt = disks - 2;
+	unsigned char coefs[src_cnt];
+	unsigned short pq_src_cnt;
+	dma_addr_t dma_dest[2];
+	int src_off = 0;
+	int idx;
+	int i;
+
+	/* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */
+	if (P(blocks, disks))
+		dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_P;
+	if (Q(blocks, disks))
+		dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset,
+					   len, DMA_BIDIRECTIONAL);
+	else
+		dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+
+	/* convert source addresses being careful to collapse 'empty'
+	 * sources and update the coefficients accordingly
+	 */
+	for (i = 0, idx = 0; i < src_cnt; i++) {
+		if (is_raid6_zero_block(blocks[i]))
+			continue;
+		dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len,
+					    DMA_TO_DEVICE);
+		coefs[idx] = scfs[i];
+		idx++;
+	}
+	src_cnt = idx;
+
+	while (src_cnt > 0) {
+		submit->flags = flags_orig;
+		pq_src_cnt = min(src_cnt, dma_maxpq(dma, dma_flags));
+		/* if we are submitting additional pqs, leave the chain open,
+		 * clear the callback parameters, and leave the destination
+		 * buffers mapped
+		 */
+		if (src_cnt > pq_src_cnt) {
+			submit->flags &= ~ASYNC_TX_ACK;
+			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = NULL;
+			submit->cb_param = NULL;
+		} else {
+			dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP;
+			submit->cb_fn = cb_fn_orig;
+			submit->cb_param = cb_param_orig;
+			if (cb_fn_orig)
+				dma_flags |= DMA_PREP_INTERRUPT;
+		}
+
+		/* Since we have clobbered the src_list we are committed
+		 * to doing this asynchronously.  Drivers force forward
+		 * progress in case they can not provide a descriptor
+		 */
+		for (;;) {
+			tx = dma->device_prep_dma_pq(chan, dma_dest,
+						     &dma_src[src_off],
+						     pq_src_cnt,
+						     &coefs[src_off], len,
+						     dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+
+		async_tx_submit(chan, tx, submit);
+		submit->depend_tx = tx;
+
+		/* drop completed sources */
+		src_cnt -= pq_src_cnt;
+		src_off += pq_src_cnt;
+
+		dma_flags |= DMA_PREP_CONTINUE;
+	}
+
+	return tx;
+}
+
+/**
+ * do_sync_gen_syndrome - synchronously calculate a raid6 syndrome
+ */
+static void
+do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		     size_t len, struct async_submit_ctl *submit)
+{
+	void **srcs;
+	int i;
+
+	if (submit->scribble)
+		srcs = submit->scribble;
+	else
+		srcs = (void **) blocks;
+
+	for (i = 0; i < disks; i++) {
+		if (is_raid6_zero_block(blocks[i])) {
+			BUG_ON(i > disks - 3); /* P or Q can't be zero */
+			srcs[i] = blocks[i];
+		} else
+			srcs[i] = page_address(blocks[i]) + offset;
+	}
+	raid6_call.gen_syndrome(disks, len, srcs);
+	async_tx_sync_epilog(submit);
+}
+
+/**
+ * async_gen_syndrome - asynchronously calculate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @submit: submission/completion modifiers
+ *
+ * General note: This routine assumes a field of GF(2^8) with a
+ * primitive polynomial of 0x11d and a generator of {02}.
+ *
+ * 'disks' note: callers can optionally omit either P or Q (but not
+ * both) from the calculation by setting blocks[disks-2] or
+ * blocks[disks-1] to NULL.  When P or Q is omitted 'len' must be <=
+ * PAGE_SIZE as a temporary buffer of this size is used in the
+ * synchronous path.  'disks' always accounts for both destination
+ * buffers.
+ *
+ * 'blocks' note: if submit->scribble is NULL then the contents of
+ * 'blocks' may be overridden
+ */
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, struct async_submit_ctl *submit)
+{
+	int src_cnt = disks - 2;
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &P(blocks, disks), 2,
+						      blocks, src_cnt, len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks)));
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device &&
+	    (src_cnt <= dma_maxpq(device, 0) ||
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+		/* run the p+q asynchronously */
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset,
+					     disks, len, dma_src, submit);
+	}
+
+	/* run the pq synchronously */
+	pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len);
+
+	/* wait for any prerequisite operations */
+	async_tx_quiesce(&submit->depend_tx);
+
+	if (!P(blocks, disks)) {
+		P(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	if (!Q(blocks, disks)) {
+		Q(blocks, disks) = scribble;
+		BUG_ON(len + offset > PAGE_SIZE);
+	}
+	do_sync_gen_syndrome(blocks, offset, disks, len, submit);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(async_gen_syndrome);
+
+/**
+ * async_syndrome_val - asynchronously validate a raid6 syndrome
+ * @blocks: source blocks from idx 0..disks-3, P @ disks-2 and Q @ disks-1
+ * @offset: common offset into each block (src and dest) to start transaction
+ * @disks: number of blocks (including missing P or Q, see below)
+ * @len: length of operation in bytes
+ * @pqres: on val failure SUM_CHECK_P_RESULT and/or SUM_CHECK_Q_RESULT are set
+ * @spare: temporary result buffer for the synchronous case
+ * @submit: submission / completion modifiers
+ *
+ * The same notes from async_gen_syndrome apply to the 'blocks',
+ * and 'disks' parameters of this routine.  The synchronous path
+ * requires a temporary result buffer and submit->scribble to be
+ * specified.
+ */
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ_VAL,
+						      NULL, 0,  blocks, disks,
+						      len);
+	struct dma_device *device = chan ? chan->device : NULL;
+	struct dma_async_tx_descriptor *tx;
+	enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+	dma_addr_t *dma_src = NULL;
+
+	BUG_ON(disks < 4);
+
+	if (submit->scribble)
+		dma_src = submit->scribble;
+	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
+		dma_src = (dma_addr_t *) blocks;
+
+	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+		struct device *dev = device->dev;
+		dma_addr_t *pq = &dma_src[disks-2];
+		int i;
+
+		pr_debug("%s: (async) disks: %d len: %zu\n",
+			 __func__, disks, len);
+		if (!P(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_P;
+		if (!Q(blocks, disks))
+			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		for (i = 0; i < disks; i++)
+			if (likely(blocks[i])) {
+				BUG_ON(is_raid6_zero_block(blocks[i]));
+				dma_src[i] = dma_map_page(dev, blocks[i],
+							  offset, len,
+							  DMA_TO_DEVICE);
+			}
+
+		for (;;) {
+			tx = device->device_prep_dma_pq_val(chan, pq, dma_src,
+							    disks - 2,
+							    raid6_gfexp,
+							    len, pqres,
+							    dma_flags);
+			if (likely(tx))
+				break;
+			async_tx_quiesce(&submit->depend_tx);
+			dma_async_issue_pending(chan);
+		}
+		async_tx_submit(chan, tx, submit);
+
+		return tx;
+	} else {
+		struct page *p_src = P(blocks, disks);
+		struct page *q_src = Q(blocks, disks);
+		enum async_tx_flags flags_orig = submit->flags;
+		dma_async_tx_callback cb_fn_orig = submit->cb_fn;
+		void *scribble = submit->scribble;
+		void *cb_param_orig = submit->cb_param;
+		void *p, *q, *s;
+
+		pr_debug("%s: (sync) disks: %d len: %zu\n",
+			 __func__, disks, len);
+
+		/* caller must provide a temporary result buffer and
+		 * allow the input parameters to be preserved
+		 */
+		BUG_ON(!spare || !scribble);
+
+		/* wait for any prerequisite operations */
+		async_tx_quiesce(&submit->depend_tx);
+
+		/* recompute p and/or q into the temporary buffer and then
+		 * check to see the result matches the current value
+		 */
+		tx = NULL;
+		*pqres = 0;
+		if (p_src) {
+			init_async_submit(submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+					  NULL, NULL, scribble);
+			tx = async_xor(spare, blocks, offset, disks-2, len, submit);
+			async_tx_quiesce(&tx);
+			p = page_address(p_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(p, s, len) << SUM_CHECK_P;
+		}
+
+		if (q_src) {
+			P(blocks, disks) = NULL;
+			Q(blocks, disks) = spare;
+			init_async_submit(submit, 0, NULL, NULL, NULL, scribble);
+			tx = async_gen_syndrome(blocks, offset, disks, len, submit);
+			async_tx_quiesce(&tx);
+			q = page_address(q_src) + offset;
+			s = page_address(spare) + offset;
+			*pqres |= !!memcmp(q, s, len) << SUM_CHECK_Q;
+		}
+
+		/* restore P, Q and submit */
+		P(blocks, disks) = p_src;
+		Q(blocks, disks) = q_src;
+
+		submit->cb_fn = cb_fn_orig;
+		submit->cb_param = cb_param_orig;
+		submit->flags = flags_orig;
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(async_syndrome_val);
+
+static int __init async_pq_init(void)
+{
+	scribble = alloc_page(GFP_KERNEL);
+
+	if (scribble)
+		return 0;
+
+	pr_err("%s: failed to allocate required spare page\n", __func__);
+
+	return -ENOMEM;
+}
+
+static void __exit async_pq_exit(void)
+{
+	put_page(scribble);
+}
+
+module_init(async_pq_init);
+module_exit(async_pq_exit);
+
+MODULE_DESCRIPTION("asynchronous raid6 syndrome generation/validation");
+MODULE_LICENSE("GPL");
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 78fb7780272a..56b5f98da463 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -62,7 +62,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 	while (src_cnt) {
 		submit->flags = flags_orig;
 		dma_flags = 0;
-		xor_src_cnt = min(src_cnt, dma->max_xor);
+		xor_src_cnt = min(src_cnt, (int)dma->max_xor);
 		/* if we are submitting additional xors, leave the chain open,
 		 * clear the callback parameters, and leave the destination
 		 * buffer mapped
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index e002e0e0d055..cd5673d3043b 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -646,6 +646,10 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_xor);
 	BUG_ON(dma_has_cap(DMA_XOR_VAL, device->cap_mask) &&
 		!device->device_prep_dma_xor_val);
+	BUG_ON(dma_has_cap(DMA_PQ, device->cap_mask) &&
+		!device->device_prep_dma_pq);
+	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
+		!device->device_prep_dma_pq_val);
 	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
 		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 6ff79a672699..4496bc606662 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1257,7 +1257,7 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
 	  "( %s%s%s%s%s%s%s%s%s%s)\n",
-	  dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
+	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
 	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 12a2efcbd565..e6ce5f004f98 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -185,5 +185,14 @@ async_memset(struct page *dest, int val, unsigned int offset,
 
 struct dma_async_tx_descriptor *async_trigger_callback(struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_gen_syndrome(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
+		   size_t len, enum sum_check_flags *pqres, struct page *spare,
+		   struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 02447afcebad..ce010cd991d2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -52,7 +52,7 @@ enum dma_status {
 enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
-	DMA_PQ_XOR,
+	DMA_PQ,
 	DMA_DUAL_XOR,
 	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
@@ -70,20 +70,28 @@ enum dma_transaction_type {
 
 /**
  * enum dma_ctrl_flags - DMA flags to augment operation preparation,
- * 	control completion, and communicate status.
+ *  control completion, and communicate status.
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
- * 	this transaction
+ *  this transaction
  * @DMA_CTRL_ACK - the descriptor cannot be reused until the client
- * 	acknowledges receipt, i.e. has has a chance to establish any
- * 	dependency chains
+ *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+ * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
+ * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
+ * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
+ *  sources that were the result of a previous operation, in the case of a PQ
+ *  operation it continues the calculation with new sources
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
 	DMA_CTRL_ACK = (1 << 1),
 	DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
 	DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+	DMA_PREP_PQ_DISABLE_P = (1 << 4),
+	DMA_PREP_PQ_DISABLE_Q = (1 << 5),
+	DMA_PREP_CONTINUE = (1 << 6),
 };
 
 /**
@@ -226,6 +234,7 @@ struct dma_async_tx_descriptor {
  * @global_node: list_head for global dma_device_list
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
+ * @max_pq: maximum number of PQ sources and PQ-continue capability
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -234,6 +243,8 @@ struct dma_async_tx_descriptor {
  * @device_prep_dma_memcpy: prepares a memcpy operation
  * @device_prep_dma_xor: prepares a xor operation
  * @device_prep_dma_xor_val: prepares a xor validation operation
+ * @device_prep_dma_pq: prepares a pq operation
+ * @device_prep_dma_pq_val: prepares a pqzero_sum operation
  * @device_prep_dma_memset: prepares a memset operation
  * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
  * @device_prep_slave_sg: prepares a slave dma operation
@@ -248,7 +259,9 @@ struct dma_device {
 	struct list_head channels;
 	struct list_head global_node;
 	dma_cap_mask_t  cap_mask;
-	int max_xor;
+	unsigned short max_xor;
+	unsigned short max_pq;
+	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
 	struct device *dev;
@@ -265,6 +278,14 @@ struct dma_device {
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor_val)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
 		size_t len, enum sum_check_flags *result, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq)(
+		struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf,
+		size_t len, unsigned long flags);
+	struct dma_async_tx_descriptor *(*device_prep_dma_pq_val)(
+		struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		unsigned int src_cnt, const unsigned char *scf, size_t len,
+		enum sum_check_flags *pqres, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
 		unsigned long flags);
@@ -283,6 +304,60 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline void
+dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
+{
+	dma->max_pq = maxpq;
+	if (has_pq_continue)
+		dma->max_pq |= DMA_HAS_PQ_CONTINUE;
+}
+
+static inline bool dmaf_continue(enum dma_ctrl_flags flags)
+{
+	return (flags & DMA_PREP_CONTINUE) == DMA_PREP_CONTINUE;
+}
+
+static inline bool dmaf_p_disabled_continue(enum dma_ctrl_flags flags)
+{
+	enum dma_ctrl_flags mask = DMA_PREP_CONTINUE | DMA_PREP_PQ_DISABLE_P;
+
+	return (flags & mask) == mask;
+}
+
+static inline bool dma_dev_has_pq_continue(struct dma_device *dma)
+{
+	return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE;
+}
+
+static unsigned short dma_dev_to_maxpq(struct dma_device *dma)
+{
+	return dma->max_pq & ~DMA_HAS_PQ_CONTINUE;
+}
+
+/* dma_maxpq - reduce maxpq in the face of continued operations
+ * @dma - dma device with PQ capability
+ * @flags - to check if DMA_PREP_CONTINUE and DMA_PREP_PQ_DISABLE_P are set
+ *
+ * When an engine does not support native continuation we need 3 extra
+ * source slots to reuse P and Q with the following coefficients:
+ * 1/ {00} * P : remove P from Q', but use it as a source for P'
+ * 2/ {01} * Q : use Q to continue Q' calculation
+ * 3/ {00} * Q : subtract Q from P' to cancel (2)
+ *
+ * In the case where P is disabled we only need 1 extra source:
+ * 1/ {01} * Q : use Q to continue Q' calculation
+ */
+static inline int dma_maxpq(struct dma_device *dma, enum dma_ctrl_flags flags)
+{
+	if (dma_dev_has_pq_continue(dma) || !dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma);
+	else if (dmaf_p_disabled_continue(flags))
+		return dma_dev_to_maxpq(dma) - 1;
+	else if (dmaf_continue(flags))
+		return dma_dev_to_maxpq(dma) - 3;
+	BUG();
+}
+
 /* --- public DMA engine API --- */
 
 #ifdef CONFIG_DMA_ENGINE
-- 
cgit v1.2.3


From 0a82a6239beecc95db6e05fe43ee62d16b381d38 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 14 Jul 2009 12:20:37 -0700
Subject: async_tx: add support for asynchronous RAID6 recovery operations

 async_raid6_2data_recov() recovers two data disk failures

 async_raid6_datap_recov() recovers a data disk and the P disk

These routines are a port of the synchronous versions found in
drivers/md/raid6recov.c.  The primary difference is breaking out the xor
operations into separate calls to async_xor.  Two helper routines are
introduced to perform scalar multiplication where needed.
async_sum_product() multiplies two sources by scalar coefficients and
then sums (xor) the result.  async_mult() simply multiplies a single
source by a scalar.

This implemention also includes, in contrast to the original
synchronous-only code, special case handling for the 4-disk and 5-disk
array cases.  In these situations the default N-disk algorithm will
present 0-source or 1-source operations to dma devices.  To cover for
dma devices where the minimum source count is 2 we implement 4-disk and
5-disk handling in the recovery code.

[ Impact: asynchronous raid6 recovery routines for 2data and datap cases ]

Cc: Yuri Tikhonov <yur@emcraft.com>
Cc: Ilya Yanok <yanok@emcraft.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: David Woodhouse <David.Woodhouse@intel.com>
Reviewed-by: Andre Noll <maan@systemlinux.org>
Acked-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/crypto/async-tx-api.txt |   4 +
 crypto/async_tx/Kconfig               |   5 +
 crypto/async_tx/Makefile              |   1 +
 crypto/async_tx/async_raid6_recov.c   | 448 ++++++++++++++++++++++++++++++++++
 include/linux/async_tx.h              |   8 +
 5 files changed, 466 insertions(+)
 create mode 100644 crypto/async_tx/async_raid6_recov.c

(limited to 'include')

diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
index 0e48e054d69a..ba046b8fa92f 100644
--- a/Documentation/crypto/async-tx-api.txt
+++ b/Documentation/crypto/async-tx-api.txt
@@ -67,6 +67,10 @@ xor_val - xor a series of source buffers and set a flag if the
 pq	- generate the p+q (raid6 syndrome) from a series of source buffers
 pq_val  - validate that a p and or q buffer are in sync with a given series of
 	  sources
+datap	- (raid6_datap_recov) recover a raid6 data block and the p block
+	  from the given sources
+2data	- (raid6_2data_recov) recover 2 raid6 data blocks from the given
+	  sources
 
 3.3 Descriptor management:
 The return value is non-NULL and points to a 'descriptor' when the operation
diff --git a/crypto/async_tx/Kconfig b/crypto/async_tx/Kconfig
index cb6d7314f198..e5aeb2b79e6f 100644
--- a/crypto/async_tx/Kconfig
+++ b/crypto/async_tx/Kconfig
@@ -18,3 +18,8 @@ config ASYNC_PQ
 	tristate
 	select ASYNC_CORE
 
+config ASYNC_RAID6_RECOV
+	tristate
+	select ASYNC_CORE
+	select ASYNC_PQ
+
diff --git a/crypto/async_tx/Makefile b/crypto/async_tx/Makefile
index 1b9926588259..9a1a76811b80 100644
--- a/crypto/async_tx/Makefile
+++ b/crypto/async_tx/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
 obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
 obj-$(CONFIG_ASYNC_XOR) += async_xor.o
 obj-$(CONFIG_ASYNC_PQ) += async_pq.o
+obj-$(CONFIG_ASYNC_RAID6_RECOV) += async_raid6_recov.o
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
new file mode 100644
index 000000000000..0c14d48c9896
--- /dev/null
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -0,0 +1,448 @@
+/*
+ * Asynchronous RAID-6 recovery calculations ASYNC_TX API.
+ * Copyright(c) 2009 Intel Corporation
+ *
+ * based on raid6recov.c:
+ *   Copyright 2002 H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/raid/pq.h>
+#include <linux/async_tx.h>
+
+static struct dma_async_tx_descriptor *
+async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
+		  size_t len, struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, srcs, 2, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *amul, *bmul;
+	u8 ax, bx;
+	u8 *a, *b, *c;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[2];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
+		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* run the operation synchronously */
+	async_tx_quiesce(&submit->depend_tx);
+	amul = raid6_gfmul[coef[0]];
+	bmul = raid6_gfmul[coef[1]];
+	a = page_address(srcs[0]);
+	b = page_address(srcs[1]);
+	c = page_address(dest);
+
+	while (len--) {
+		ax    = amul[*a++];
+		bx    = bmul[*b++];
+		*c++ = ax ^ bx;
+	}
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
+	   struct async_submit_ctl *submit)
+{
+	struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ,
+						      &dest, 1, &src, 1, len);
+	struct dma_device *dma = chan ? chan->device : NULL;
+	const u8 *qmul; /* Q multiplier table */
+	u8 *d, *s;
+
+	if (dma) {
+		dma_addr_t dma_dest[2];
+		dma_addr_t dma_src[1];
+		struct device *dev = dma->dev;
+		struct dma_async_tx_descriptor *tx;
+		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+
+		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
+		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
+		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
+					     len, dma_flags);
+		if (tx) {
+			async_tx_submit(chan, tx, submit);
+			return tx;
+		}
+	}
+
+	/* no channel available, or failed to allocate a descriptor, so
+	 * perform the operation synchronously
+	 */
+	async_tx_quiesce(&submit->depend_tx);
+	qmul  = raid6_gfmul[coef];
+	d = page_address(dest);
+	s = page_address(src);
+
+	while (len--)
+		*d++ = qmul[*s++];
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *a, *b;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[4-2];
+	q = blocks[4-1];
+
+	a = blocks[faila];
+	b = blocks[failb];
+
+	/* in the 4 disk case P + Pxy == P and Q + Qxy == Q */
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = p;
+	srcs[1] = q;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(b, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = p;
+	srcs[1] = b;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_ZERO_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(a, srcs, 0, 2, bytes, submit);
+
+	return tx;
+
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
+	      struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *g, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	int uninitialized_var(good);
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		if (i == faila || i == failb)
+			continue;
+		else {
+			good = i;
+			break;
+		}
+	}
+	BUG_ON(i >= 3);
+
+	p = blocks[5-2];
+	q = blocks[5-1];
+	g = blocks[good];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for delta p and
+	 * delta q
+	 */
+	dp = blocks[faila];
+	dq = blocks[failb];
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+static struct dma_async_tx_descriptor *
+__2data_recov_n(int disks, size_t bytes, int faila, int failb,
+	      struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dp, *dq;
+	struct page *srcs[2];
+	unsigned char coef[2];
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	 * Use the dead data pages as temporary storage for
+	 * delta p and delta q
+	 */
+	dp = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-2] = dp;
+	dq = blocks[failb];
+	blocks[failb] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+
+	/* Restore pointer table */
+	blocks[faila]   = dp;
+	blocks[failb]   = dq;
+	blocks[disks-2] = p;
+	blocks[disks-1] = q;
+
+	/* compute P + Pxy */
+	srcs[0] = dp;
+	srcs[1] = p;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	/* compute Q + Qxy */
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	coef[0] = raid6_gfexi[failb-faila];
+	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_sum_product(dq, srcs, coef, bytes, submit);
+
+	/* Dy = P+Pxy+Dx */
+	srcs[0] = dp;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+
+/**
+ * async_raid6_2data_recov - asynchronously calculate two missing data blocks
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: first failed drive index
+ * @failb: second failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	BUG_ON(faila == failb);
+	if (failb < faila)
+		swap(faila, failb);
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!submit->scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_2data_recov(disks, bytes, faila, failb, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	switch (disks) {
+	case 4:
+		/* dma devices do not uniformly understand a zero source pq
+		 * operation (in contrast to the synchronous case), so
+		 * explicitly handle the 4 disk special case
+		 */
+		return __2data_recov_4(bytes, faila, failb, blocks, submit);
+	case 5:
+		/* dma devices do not uniformly understand a single
+		 * source pq operation (in contrast to the synchronous
+		 * case), so explicitly handle the 5 disk special case
+		 */
+		return __2data_recov_5(bytes, faila, failb, blocks, submit);
+	default:
+		return __2data_recov_n(disks, bytes, faila, failb, blocks, submit);
+	}
+}
+EXPORT_SYMBOL_GPL(async_raid6_2data_recov);
+
+/**
+ * async_raid6_datap_recov - asynchronously calculate a data and the 'p' block
+ * @disks: number of disks in the RAID-6 array
+ * @bytes: block size
+ * @faila: failed drive index
+ * @blocks: array of source pointers where the last two entries are p and q
+ * @submit: submission/completion modifiers
+ */
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int disks, size_t bytes, int faila,
+			struct page **blocks, struct async_submit_ctl *submit)
+{
+	struct dma_async_tx_descriptor *tx = NULL;
+	struct page *p, *q, *dq;
+	u8 coef;
+	enum async_tx_flags flags = submit->flags;
+	dma_async_tx_callback cb_fn = submit->cb_fn;
+	void *cb_param = submit->cb_param;
+	void *scribble = submit->scribble;
+	struct page *srcs[2];
+
+	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);
+
+	/* we need to preserve the contents of 'blocks' for the async
+	 * case, so punt to synchronous if a scribble buffer is not available
+	 */
+	if (!scribble) {
+		void **ptrs = (void **) blocks;
+		int i;
+
+		async_tx_quiesce(&submit->depend_tx);
+		for (i = 0; i < disks; i++)
+			ptrs[i] = page_address(blocks[i]);
+
+		raid6_datap_recov(disks, bytes, faila, ptrs);
+
+		async_tx_sync_epilog(submit);
+
+		return NULL;
+	}
+
+	p = blocks[disks-2];
+	q = blocks[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	 * Use the dead data page as temporary storage for delta q
+	 */
+	dq = blocks[faila];
+	blocks[faila] = (void *)raid6_empty_zero_page;
+	blocks[disks-1] = dq;
+
+	/* in the 4 disk case we only need to perform a single source
+	 * multiplication
+	 */
+	if (disks == 4) {
+		int good = faila == 0 ? 1 : 0;
+		struct page *g = blocks[good];
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_memcpy(p, g, 0, 0, bytes, submit);
+
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
+	} else {
+		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
+	}
+
+	/* Restore pointer table */
+	blocks[faila]   = dq;
+	blocks[disks-1] = q;
+
+	/* calculate g^{-faila} */
+	coef = raid6_gfinv[raid6_gfexp[faila]];
+
+	srcs[0] = dq;
+	srcs[1] = q;
+	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
+			  scribble);
+	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
+
+	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	tx = async_mult(dq, dq, coef, bytes, submit);
+
+	srcs[0] = p;
+	srcs[1] = dq;
+	init_async_submit(submit, flags | ASYNC_TX_XOR_DROP_DST, tx, cb_fn,
+			  cb_param, scribble);
+	tx = async_xor(p, srcs, 0, 2, bytes, submit);
+
+	return tx;
+}
+EXPORT_SYMBOL_GPL(async_raid6_datap_recov);
+
+MODULE_AUTHOR("Dan Williams <dan.j.williams@intel.com>");
+MODULE_DESCRIPTION("asynchronous RAID-6 recovery api");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index e6ce5f004f98..866e61c4e2e0 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -194,5 +194,13 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int src_cnt,
 		   size_t len, enum sum_check_flags *pqres, struct page *spare,
 		   struct async_submit_ctl *submit);
 
+struct dma_async_tx_descriptor *
+async_raid6_2data_recov(int src_num, size_t bytes, int faila, int failb,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
+struct dma_async_tx_descriptor *
+async_raid6_datap_recov(int src_num, size_t bytes, int faila,
+			struct page **ptrs, struct async_submit_ctl *submit);
+
 void async_tx_quiesce(struct dma_async_tx_descriptor **tx);
 #endif /* _ASYNC_TX_H_ */
-- 
cgit v1.2.3


From acdfcd04d9df7d084ff752f82afad6ed4ad5f363 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@nokia.com>
Date: Fri, 28 Aug 2009 14:28:54 +0300
Subject: SLUB: fix ARCH_KMALLOC_MINALIGN cases 64 and 256

If the minalign is 64 bytes, then the 96 byte cache should not be created
because it would conflict with the 128 byte cache.

If the minalign is 256 bytes, patching the size_index table should not
result in a buffer overrun.

The calculation "(i - 1) / 8" used to access size_index[] is moved to
a separate function as suggested by Christoph Lameter.

Acked-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 include/linux/slub_def.h |  6 ++----
 mm/slub.c                | 30 ++++++++++++++++++++++++------
 2 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4dcbc2c71491..aa5d4a69d461 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -152,12 +152,10 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <= KMALLOC_MIN_SIZE)
 		return KMALLOC_SHIFT_LOW;
 
-#if KMALLOC_MIN_SIZE <= 64
-	if (size > 64 && size <= 96)
+	if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
 		return 1;
-	if (size > 128 && size <= 192)
+	if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
 		return 2;
-#endif
 	if (size <=          8) return 3;
 	if (size <=         16) return 4;
 	if (size <=         32) return 5;
diff --git a/mm/slub.c b/mm/slub.c
index e16c9fb1f48b..be493bd63c31 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2825,6 +2825,11 @@ static s8 size_index[24] = {
 	2	/* 192 */
 };
 
+static inline int size_index_elem(size_t bytes)
+{
+	return (bytes - 1) / 8;
+}
+
 static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 {
 	int index;
@@ -2833,7 +2838,7 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
 		if (!size)
 			return ZERO_SIZE_PTR;
 
-		index = size_index[(size - 1) / 8];
+		index = size_index[size_index_elem(size)];
 	} else
 		index = fls(size - 1);
 
@@ -3188,10 +3193,12 @@ void __init kmem_cache_init(void)
 	slab_state = PARTIAL;
 
 	/* Caches that are not of the two-to-the-power-of size */
-	if (KMALLOC_MIN_SIZE <= 64) {
+	if (KMALLOC_MIN_SIZE <= 32) {
 		create_kmalloc_cache(&kmalloc_caches[1],
 				"kmalloc-96", 96, GFP_NOWAIT);
 		caches++;
+	}
+	if (KMALLOC_MIN_SIZE <= 64) {
 		create_kmalloc_cache(&kmalloc_caches[2],
 				"kmalloc-192", 192, GFP_NOWAIT);
 		caches++;
@@ -3218,17 +3225,28 @@ void __init kmem_cache_init(void)
 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
 		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
 
-	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8)
-		size_index[(i - 1) / 8] = KMALLOC_SHIFT_LOW;
+	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
+		int elem = size_index_elem(i);
+		if (elem >= ARRAY_SIZE(size_index))
+			break;
+		size_index[elem] = KMALLOC_SHIFT_LOW;
+	}
 
-	if (KMALLOC_MIN_SIZE == 128) {
+	if (KMALLOC_MIN_SIZE == 64) {
+		/*
+		 * The 96 byte size cache is not used if the alignment
+		 * is 64 byte.
+		 */
+		for (i = 64 + 8; i <= 96; i += 8)
+			size_index[size_index_elem(i)] = 7;
+	} else if (KMALLOC_MIN_SIZE == 128) {
 		/*
 		 * The 192 byte sized cache is not used if the alignment
 		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
 		 * instead.
 		 */
 		for (i = 128 + 8; i <= 192; i += 8)
-			size_index[(i - 1) / 8] = 8;
+			size_index[size_index_elem(i)] = 8;
 	}
 
 	slab_state = UP;
-- 
cgit v1.2.3


From e500011ffa191d662ac64d4ada6a5187b3180e16 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 30 Aug 2009 13:19:12 -0700
Subject: timers: Drop a function prototype

Drop prototype for non-existent next_timer_interrupt() function.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: akpm <akpm@linux-foundation.org>
LKML-Reference: <4A9ADEC0.70306@oracle.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index be62ec2ebea5..a2d1eb6cb3f0 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -173,11 +173,6 @@ extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
  */
 #define NEXT_TIMER_MAX_DELTA	((1UL << 30) - 1)
 
-/*
- * Return when the next timer-wheel timeout occurs (in absolute jiffies),
- * locks the timer base:
- */
-extern unsigned long next_timer_interrupt(void);
 /*
  * Return when the next timer-wheel timeout occurs (in absolute jiffies),
  * locks the timer base and does the comparison against the given
-- 
cgit v1.2.3


From f380ef86916904e4b79f7bec599deb51057b2d0c Mon Sep 17 00:00:00 2001
From: Maarten Maathuis <madman2003@gmail.com>
Date: Wed, 19 Aug 2009 00:56:44 +0200
Subject: drm/crtc_helper: place drm_helper_encoder_in_use() in the header file

- The symbol was already exported.

Signed-off-by: Maarten Maathuis <madman2003@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_crtc_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 6769ff6c1bc0..e44a4f87303c 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -98,6 +98,7 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 				     int x, int y,
 				     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
+extern bool drm_helper_encoder_in_use(struct drm_encoder *encoder);
 
 extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
 
-- 
cgit v1.2.3


From 785b93ef8c309730c2de84ce9c229e40e2d01480 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 28 Aug 2009 15:46:53 +1000
Subject: drm/kms: move driver specific fb common code to helper functions (v2)

Initially I always meant this code to be shared, but things
ran away from me before I got to it.

This refactors the i915 and radeon kms fbdev interaction layers
out into generic helpers + driver specific pieces.

It moves all the panic/sysrq enhancements to the core file,
and stores a linked list of kernel fbs. This could possibly be
improved to only store the fb which has fbcon on it for panics
etc.

radeon retains some specific codes used for a big endian
workaround.

changes:
fix oops in v1
fix freeing path for crtc_info

Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/Makefile                |   3 +-
 drivers/gpu/drm/drm_fb_helper.c         | 697 ++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_dma.c         |   3 +-
 drivers/gpu/drm/i915/intel_display.c    |  12 -
 drivers/gpu/drm/i915/intel_drv.h        |   3 -
 drivers/gpu/drm/i915/intel_fb.c         | 737 ++------------------------------
 drivers/gpu/drm/radeon/radeon_display.c |   5 +-
 drivers/gpu/drm/radeon/radeon_fb.c      | 670 ++++-------------------------
 drivers/gpu/drm/radeon/radeon_mode.h    |   2 -
 include/drm/drm_crtc.h                  |   2 +
 include/drm/drm_fb_helper.h             |  82 ++++
 11 files changed, 907 insertions(+), 1309 deletions(-)
 create mode 100644 drivers/gpu/drm/drm_fb_helper.c
 create mode 100644 include/drm/drm_fb_helper.h

(limited to 'include')

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 5f0aec4f082a..99071684de25 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -11,7 +11,8 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_agpsupport.o drm_scatter.o ati_pcigart.o drm_pci.o \
 		drm_sysfs.o drm_hashtab.o drm_sman.o drm_mm.o \
 		drm_crtc.o drm_crtc_helper.o drm_modes.o drm_edid.o \
-		drm_info.o drm_debugfs.o drm_encoder_slave.o
+		drm_info.o drm_debugfs.o drm_encoder_slave.o \
+		drm_fb_helper.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
new file mode 100644
index 000000000000..d6ffea74a502
--- /dev/null
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2006-2009 Red Hat Inc.
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM framebuffer helper functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#include <linux/sysrq.h>
+#include <linux/fb.h>
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+#include "drm_crtc_helper.h"
+
+static LIST_HEAD(kernel_fb_helper_list);
+
+bool drm_fb_helper_force_kernel_mode(void)
+{
+	int i = 0;
+	bool ret, error = false;
+	struct drm_fb_helper *helper;
+
+	if (list_empty(&kernel_fb_helper_list))
+		return false;
+
+	list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) {
+		for (i = 0; i < helper->crtc_count; i++) {
+			struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
+			ret = drm_crtc_helper_set_config(mode_set);
+			if (ret)
+				error = true;
+		}
+	}
+	return error;
+}
+
+int drm_fb_helper_panic(struct notifier_block *n, unsigned long ununsed,
+			void *panic_str)
+{
+	DRM_ERROR("panic occurred, switching back to text console\n");
+	return drm_fb_helper_force_kernel_mode();
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_panic);
+
+static struct notifier_block paniced = {
+	.notifier_call = drm_fb_helper_panic,
+};
+
+/**
+ * drm_fb_helper_restore - restore the framebuffer console (kernel) config
+ *
+ * Restore's the kernel's fbcon mode, used for lastclose & panic paths.
+ */
+void drm_fb_helper_restore(void)
+{
+	bool ret;
+	ret = drm_fb_helper_force_kernel_mode();
+	if (ret == true)
+		DRM_ERROR("Failed to restore crtc configuration\n");
+}
+EXPORT_SYMBOL(drm_fb_helper_restore);
+
+static void drm_fb_helper_restore_work_fn(struct work_struct *ignored)
+{
+	drm_fb_helper_restore();
+}
+static DECLARE_WORK(drm_fb_helper_restore_work, drm_fb_helper_restore_work_fn);
+
+static void drm_fb_helper_sysrq(int dummy1, struct tty_struct *dummy3)
+{
+	schedule_work(&drm_fb_helper_restore_work);
+}
+
+static struct sysrq_key_op sysrq_drm_fb_helper_restore_op = {
+	.handler = drm_fb_helper_sysrq,
+	.help_msg = "force-fb(V)",
+	.action_msg = "Restore framebuffer console",
+};
+
+static void drm_fb_helper_on(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, turn the crtc on then,
+	 * find all associated encoders and turn them on.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		mutex_lock(&dev->mode_config.mutex);
+		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+		mutex_unlock(&dev->mode_config.mutex);
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+	}
+}
+
+static void drm_fb_helper_off(struct fb_info *info, int dpms_mode)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int i;
+
+	/*
+	 * For each CRTC in this fb, find all associated encoders
+	 * and turn them off, then turn off the CRTC.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		/* Found a CRTC on this fb, now find encoders */
+		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+			if (encoder->crtc == crtc) {
+				struct drm_encoder_helper_funcs *encoder_funcs;
+
+				encoder_funcs = encoder->helper_private;
+				mutex_lock(&dev->mode_config.mutex);
+				encoder_funcs->dpms(encoder, dpms_mode);
+				mutex_unlock(&dev->mode_config.mutex);
+			}
+		}
+		if (dpms_mode == DRM_MODE_DPMS_OFF) {
+			mutex_lock(&dev->mode_config.mutex);
+			crtc_funcs->dpms(crtc, dpms_mode);
+			mutex_unlock(&dev->mode_config.mutex);
+		}
+	}
+}
+
+int drm_fb_helper_blank(int blank, struct fb_info *info)
+{
+	switch (blank) {
+	case FB_BLANK_UNBLANK:
+		drm_fb_helper_on(info);
+		break;
+	case FB_BLANK_NORMAL:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_HSYNC_SUSPEND:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_STANDBY);
+		break;
+	case FB_BLANK_VSYNC_SUSPEND:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_SUSPEND);
+		break;
+	case FB_BLANK_POWERDOWN:
+		drm_fb_helper_off(info, DRM_MODE_DPMS_OFF);
+		break;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_blank);
+
+static void drm_fb_helper_crtc_free(struct drm_fb_helper *helper)
+{
+	int i;
+
+	for (i = 0; i < helper->crtc_count; i++)
+		kfree(helper->crtc_info[i].mode_set.connectors);
+	kfree(helper->crtc_info);
+}
+
+int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count, int max_conn_count)
+{
+	struct drm_device *dev = helper->dev;
+	struct drm_crtc *crtc;
+	int ret = 0;
+	int i;
+
+	helper->crtc_info = kcalloc(crtc_count, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL);
+	if (!helper->crtc_info)
+		return -ENOMEM;
+
+	helper->crtc_count = crtc_count;
+
+	for (i = 0; i < crtc_count; i++) {
+		helper->crtc_info[i].mode_set.connectors =
+			kcalloc(max_conn_count,
+				sizeof(struct drm_connector *),
+				GFP_KERNEL);
+
+		if (!helper->crtc_info[i].mode_set.connectors) {
+			ret = -ENOMEM;
+			goto out_free;
+		}
+		helper->crtc_info[i].mode_set.num_connectors = 0;
+	}
+
+	i = 0;
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		helper->crtc_info[i].crtc_id = crtc->base.id;
+		helper->crtc_info[i].mode_set.crtc = crtc;
+		i++;
+	}
+	helper->conn_limit = max_conn_count;
+	return 0;
+out_free:
+	drm_fb_helper_crtc_free(helper);
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(drm_fb_helper_init_crtc_count);
+
+int drm_fb_helper_setcolreg(unsigned regno,
+			    unsigned red,
+			    unsigned green,
+			    unsigned blue,
+			    unsigned transp,
+			    struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_crtc *crtc;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		struct drm_framebuffer *fb = fb_helper->fb;
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		if (regno > 255)
+			return 1;
+
+		if (fb->depth == 8) {
+			fb_helper->funcs->gamma_set(crtc, red, green, blue, regno);
+			return 0;
+		}
+
+		if (regno < 16) {
+			switch (fb->depth) {
+			case 15:
+				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
+					((green & 0xf800) >>  6) |
+					((blue & 0xf800) >> 11);
+				break;
+			case 16:
+				fb->pseudo_palette[regno] = (red & 0xf800) |
+					((green & 0xfc00) >>  5) |
+					((blue  & 0xf800) >> 11);
+				break;
+			case 24:
+			case 32:
+				fb->pseudo_palette[regno] =
+					(((red >> 8) & 0xff) << info->var.red.offset) |
+					(((green >> 8) & 0xff) << info->var.green.offset) |
+					(((blue >> 8) & 0xff) << info->var.blue.offset);
+				break;
+			}
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_setcolreg);
+
+int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_framebuffer *fb = fb_helper->fb;
+	int depth;
+
+	if (var->pixclock == -1 || !var->pixclock)
+		return -EINVAL;
+
+	/* Need to resize the fb object !!! */
+	if (var->xres > fb->width || var->yres > fb->height) {
+		DRM_ERROR("Requested width/height is greater than current fb "
+			   "object %dx%d > %dx%d\n", var->xres, var->yres,
+			   fb->width, fb->height);
+		DRM_ERROR("Need resizing code.\n");
+		return -EINVAL;
+	}
+
+	switch (var->bits_per_pixel) {
+	case 16:
+		depth = (var->green.length == 6) ? 16 : 15;
+		break;
+	case 32:
+		depth = (var->transp.length > 0) ? 32 : 24;
+		break;
+	default:
+		depth = var->bits_per_pixel;
+		break;
+	}
+
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.length = 1;
+		var->transp.offset = 15;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 0;
+		var->transp.offset = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.length = 8;
+		var->transp.offset = 24;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_check_var);
+
+/* this will let fbcon do the mode init */
+int drm_fb_helper_set_par(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct fb_var_screeninfo *var = &info->var;
+	struct drm_crtc *crtc;
+	int ret;
+	int i;
+
+	if (var->pixclock != -1) {
+		DRM_ERROR("PIXEL CLCOK SET\n");
+		return -EINVAL;
+	}
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		if (crtc->fb == fb_helper->crtc_info[i].mode_set.fb) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(&fb_helper->crtc_info->mode_set);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_set_par);
+
+int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_mode_set *modeset;
+	struct drm_crtc *crtc;
+	int ret = 0;
+	int i;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		for (i = 0; i < fb_helper->crtc_count; i++) {
+			if (crtc->base.id == fb_helper->crtc_info[i].crtc_id)
+				break;
+		}
+
+		if (i == fb_helper->crtc_count)
+			continue;
+
+		modeset = &fb_helper->crtc_info[i].mode_set;
+
+		modeset->x = var->xoffset;
+		modeset->y = var->yoffset;
+
+		if (modeset->num_connectors) {
+			mutex_lock(&dev->mode_config.mutex);
+			ret = crtc->funcs->set_config(modeset);
+			mutex_unlock(&dev->mode_config.mutex);
+			if (!ret) {
+				info->var.xoffset = var->xoffset;
+				info->var.yoffset = var->yoffset;
+			}
+		}
+	}
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_pan_display);
+
+int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int (*fb_create)(struct drm_device *dev,
+						   uint32_t fb_width,
+						   uint32_t fb_height,
+						   uint32_t surface_width,
+						   uint32_t surface_height,
+						   struct drm_framebuffer **fb_ptr))
+{
+	struct drm_crtc *crtc;
+	struct drm_connector *connector;
+	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
+	unsigned int surface_width = 0, surface_height = 0;
+	int new_fb = 0;
+	int crtc_count = 0;
+	int ret, i, conn_count = 0;
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_mode_set *modeset = NULL;
+	struct drm_fb_helper *fb_helper;
+
+	/* first up get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (drm_helper_crtc_in_use(crtc)) {
+			if (crtc->desired_mode) {
+				if (crtc->desired_mode->hdisplay < fb_width)
+					fb_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay < fb_height)
+					fb_height = crtc->desired_mode->vdisplay;
+
+				if (crtc->desired_mode->hdisplay > surface_width)
+					surface_width = crtc->desired_mode->hdisplay;
+
+				if (crtc->desired_mode->vdisplay > surface_height)
+					surface_height = crtc->desired_mode->vdisplay;
+			}
+			crtc_count++;
+		}
+	}
+
+	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
+		/* hmm everyone went away - assume VGA cable just fell out
+		   and will come back later. */
+		return 0;
+	}
+
+	/* do we have an fb already? */
+	if (list_empty(&dev->mode_config.fb_kernel_list)) {
+		ret = (*fb_create)(dev, fb_width, fb_height, surface_width,
+				   surface_height, &fb);
+		if (ret)
+			return -EINVAL;
+		new_fb = 1;
+	} else {
+		fb = list_first_entry(&dev->mode_config.fb_kernel_list,
+				      struct drm_framebuffer, filp_head);
+
+		/* if someone hotplugs something bigger than we have already allocated, we are pwned.
+		   As really we can't resize an fbdev that is in the wild currently due to fbdev
+		   not really being designed for the lower layers moving stuff around under it.
+		   - so in the grand style of things - punt. */
+		if ((fb->width < surface_width) ||
+		    (fb->height < surface_height)) {
+			DRM_ERROR("Framebuffer not large enough to scale console onto.\n");
+			return -EINVAL;
+		}
+	}
+
+	info = fb->fbdev;
+	fb_helper = info->par;
+
+	crtc_count = 0;
+	/* okay we need to setup new connector sets in the crtcs */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		modeset = &fb_helper->crtc_info[crtc_count].mode_set;
+		modeset->fb = fb;
+		conn_count = 0;
+		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+			if (connector->encoder)
+				if (connector->encoder->crtc == modeset->crtc) {
+					modeset->connectors[conn_count] = connector;
+					conn_count++;
+					if (conn_count > fb_helper->conn_limit)
+						BUG();
+				}
+		}
+
+		for (i = conn_count; i < fb_helper->conn_limit; i++)
+			modeset->connectors[i] = NULL;
+
+		modeset->crtc = crtc;
+		crtc_count++;
+
+		modeset->num_connectors = conn_count;
+		if (modeset->crtc->desired_mode) {
+			if (modeset->mode)
+				drm_mode_destroy(dev, modeset->mode);
+			modeset->mode = drm_mode_duplicate(dev,
+							   modeset->crtc->desired_mode);
+		}
+	}
+	fb_helper->crtc_count = crtc_count;
+	fb_helper->fb = fb;
+
+	if (new_fb) {
+		info->var.pixclock = -1;
+		if (register_framebuffer(info) < 0)
+			return -EINVAL;
+	} else {
+		drm_fb_helper_set_par(info);
+	}
+	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
+	       info->fix.id);
+
+	/* Switch back to kernel console on panic */
+	/* multi card linked list maybe */
+	if (list_empty(&kernel_fb_helper_list)) {
+		printk(KERN_INFO "registered panic notifier\n");
+		atomic_notifier_chain_register(&panic_notifier_list,
+					       &paniced);
+		register_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
+	}
+	list_add(&fb_helper->kernel_fb_list, &kernel_fb_helper_list);
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_single_fb_probe);
+
+void drm_fb_helper_free(struct drm_fb_helper *helper)
+{
+	list_del(&helper->kernel_fb_list);
+	if (list_empty(&kernel_fb_helper_list)) {
+		printk(KERN_INFO "unregistered panic notifier\n");
+		atomic_notifier_chain_unregister(&panic_notifier_list,
+						 &paniced);
+		unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
+	}
+	drm_fb_helper_crtc_free(helper);
+}
+EXPORT_SYMBOL(drm_fb_helper_free);
+
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch)
+{
+	info->fix.type = FB_TYPE_PACKED_PIXELS;
+	info->fix.visual = FB_VISUAL_TRUECOLOR;
+	info->fix.type_aux = 0;
+	info->fix.xpanstep = 1; /* doing it in hw */
+	info->fix.ypanstep = 1; /* doing it in hw */
+	info->fix.ywrapstep = 0;
+	info->fix.accel = FB_ACCEL_I830;
+	info->fix.type_aux = 0;
+
+	info->fix.line_length = pitch;
+	return;
+}
+EXPORT_SYMBOL(drm_fb_helper_fill_fix);
+
+void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
+			    uint32_t fb_width, uint32_t fb_height)
+{
+	info->pseudo_palette = fb->pseudo_palette;
+	info->var.xres_virtual = fb->width;
+	info->var.yres_virtual = fb->height;
+	info->var.bits_per_pixel = fb->bits_per_pixel;
+	info->var.xoffset = 0;
+	info->var.yoffset = 0;
+	info->var.activate = FB_ACTIVATE_NOW;
+	info->var.height = -1;
+	info->var.width = -1;
+
+	switch (fb->depth) {
+	case 8:
+		info->var.red.offset = 0;
+		info->var.green.offset = 0;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8; /* 8bit DAC */
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 15:
+		info->var.red.offset = 10;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 5;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 15;
+		info->var.transp.length = 1;
+		break;
+	case 16:
+		info->var.red.offset = 11;
+		info->var.green.offset = 5;
+		info->var.blue.offset = 0;
+		info->var.red.length = 5;
+		info->var.green.length = 6;
+		info->var.blue.length = 5;
+		info->var.transp.offset = 0;
+		break;
+	case 24:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 0;
+		info->var.transp.length = 0;
+		break;
+	case 32:
+		info->var.red.offset = 16;
+		info->var.green.offset = 8;
+		info->var.blue.offset = 0;
+		info->var.red.length = 8;
+		info->var.green.length = 8;
+		info->var.blue.length = 8;
+		info->var.transp.offset = 24;
+		info->var.transp.length = 8;
+		break;
+	default:
+		break;
+	}
+
+	info->var.xres = fb_width;
+	info->var.yres = fb_height;
+}
+EXPORT_SYMBOL(drm_fb_helper_fill_var);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 544d889b9b16..c628c3671394 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -29,6 +29,7 @@
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
@@ -1347,7 +1348,7 @@ void i915_driver_lastclose(struct drm_device * dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) {
-		intelfb_restore();
+		drm_fb_helper_restore();
 		return;
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d6fce2133413..5fb7a4f4a427 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3060,8 +3060,6 @@ static void intel_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	if (intel_crtc->mode_set.mode)
-		drm_mode_destroy(crtc->dev, intel_crtc->mode_set.mode);
 	drm_crtc_cleanup(crtc);
 	kfree(intel_crtc);
 }
@@ -3107,16 +3105,6 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	intel_crtc->cursor_addr = 0;
 	intel_crtc->dpms_mode = DRM_MODE_DPMS_OFF;
 	drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs);
-
-	intel_crtc->mode_set.crtc = &intel_crtc->base;
-	intel_crtc->mode_set.connectors = (struct drm_connector **)(intel_crtc + 1);
-	intel_crtc->mode_set.num_connectors = 0;
-
-	if (i915_fbpercrtc) {
-
-
-
-	}
 }
 
 int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index d6f92ea1b553..38910f8f30ed 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -96,9 +96,6 @@ struct intel_crtc {
 	uint32_t cursor_addr;
 	u8 lut_r[256], lut_g[256], lut_b[256];
 	int dpms_mode;
-	struct intel_framebuffer *fbdev_fb;
-	/* a mode_set for fbdev users on this crtc */
-	struct drm_mode_set mode_set;
 };
 
 #define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 1d30802e773e..3041530c3673 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -39,339 +39,34 @@
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
+#include "drm_fb_helper.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
 
 struct intelfb_par {
-	struct drm_device *dev;
-	struct drm_display_mode *our_mode;
+	struct drm_fb_helper helper;
 	struct intel_framebuffer *intel_fb;
-	int crtc_count;
-	/* crtc currently bound to this */
-	uint32_t crtc_ids[2];
+	struct drm_display_mode *our_mode;
 };
 
-static int intelfb_setcolreg(unsigned regno, unsigned red, unsigned green,
-			unsigned blue, unsigned transp,
-			struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-		struct drm_mode_set *modeset = &intel_crtc->mode_set;
-		struct drm_framebuffer *fb = modeset->fb;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		if (i == par->crtc_count)
-			continue;
-
-
-		if (regno > 255)
-			return 1;
-
-		if (fb->depth == 8) {
-			intel_crtc_fb_gamma_set(crtc, red, green, blue, regno);
-			return 0;
-		}
-
-		if (regno < 16) {
-			switch (fb->depth) {
-			case 15:
-				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
-					((green & 0xf800) >>  6) |
-					((blue & 0xf800) >> 11);
-				break;
-			case 16:
-				fb->pseudo_palette[regno] = (red & 0xf800) |
-					((green & 0xfc00) >>  5) |
-					((blue  & 0xf800) >> 11);
-				break;
-			case 24:
-			case 32:
-				fb->pseudo_palette[regno] = ((red & 0xff00) << 8) |
-					(green & 0xff00) |
-					((blue  & 0xff00) >> 8);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
-static int intelfb_check_var(struct fb_var_screeninfo *var,
-			struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct intel_framebuffer *intel_fb = par->intel_fb;
-	struct drm_framebuffer *fb = &intel_fb->base;
-	int depth;
-
-	if (var->pixclock == -1 || !var->pixclock)
-		return -EINVAL;
-
-	/* Need to resize the fb object !!! */
-	if (var->xres > fb->width || var->yres > fb->height) {
-		DRM_ERROR("Requested width/height is greater than current fb object %dx%d > %dx%d\n",var->xres,var->yres,fb->width,fb->height);
-		DRM_ERROR("Need resizing code.\n");
-		return -EINVAL;
-	}
-
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/* this will let fbcon do the mode init */
-/* FIXME: take mode config lock? */
-static int intelfb_set_par(struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct fb_var_screeninfo *var = &info->var;
-	int i;
-
-	DRM_DEBUG("%d %d\n", var->xres, var->pixclock);
-
-	if (var->pixclock != -1) {
-
-		DRM_ERROR("PIXEL CLOCK SET\n");
-		return -EINVAL;
-	} else {
-		struct drm_crtc *crtc;
-		int ret;
-
-		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-			struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-			for (i = 0; i < par->crtc_count; i++)
-				if (crtc->base.id == par->crtc_ids[i])
-					break;
-
-			if (i == par->crtc_count)
-				continue;
-
-			if (crtc->fb == intel_crtc->mode_set.fb) {
-				mutex_lock(&dev->mode_config.mutex);
-				ret = crtc->funcs->set_config(&intel_crtc->mode_set);
-				mutex_unlock(&dev->mode_config.mutex);
-				if (ret)
-					return ret;
-			}
-		}
-		return 0;
-	}
-}
-
-static int intelfb_pan_display(struct fb_var_screeninfo *var,
-				struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_mode_set *modeset;
-	struct drm_crtc *crtc;
-	struct intel_crtc *intel_crtc;
-	int ret = 0;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		if (i == par->crtc_count)
-			continue;
-
-		intel_crtc = to_intel_crtc(crtc);
-		modeset = &intel_crtc->mode_set;
-
-		modeset->x = var->xoffset;
-		modeset->y = var->yoffset;
-
-		if (modeset->num_connectors) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(modeset);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (!ret) {
-				info->var.xoffset = var->xoffset;
-				info->var.yoffset = var->yoffset;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static void intelfb_on(struct fb_info *info)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-				encoder_funcs = encoder->helper_private;
-				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
-			}
-		}
-	}
-}
-
-static void intelfb_off(struct fb_info *info, int dpms_mode)
-{
-	struct intelfb_par *par = info->par;
-	struct drm_device *dev = par->dev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < par->crtc_count; i++)
-			if (crtc->base.id == par->crtc_ids[i])
-				break;
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-				encoder_funcs = encoder->helper_private;
-				encoder_funcs->dpms(encoder, dpms_mode);
-			}
-		}
-		if (dpms_mode == DRM_MODE_DPMS_OFF)
-			crtc_funcs->dpms(crtc, dpms_mode);
-	}
-}
-
-static int intelfb_blank(int blank, struct fb_info *info)
-{
-	switch (blank) {
-	case FB_BLANK_UNBLANK:
-		intelfb_on(info);
-		break;
-	case FB_BLANK_NORMAL:
-		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_HSYNC_SUSPEND:
-		intelfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_VSYNC_SUSPEND:
-		intelfb_off(info, DRM_MODE_DPMS_SUSPEND);
-		break;
-	case FB_BLANK_POWERDOWN:
-		intelfb_off(info, DRM_MODE_DPMS_OFF);
-		break;
-	}
-	return 0;
-}
-
 static struct fb_ops intelfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = intelfb_check_var,
-	.fb_set_par = intelfb_set_par,
-	.fb_setcolreg = intelfb_setcolreg,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
 	.fb_copyarea = cfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
-	.fb_pan_display = intelfb_pan_display,
-	.fb_blank = intelfb_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
 };
 
+static struct drm_fb_helper_funcs intel_fb_helper_funcs = {
+	.gamma_set = intel_crtc_fb_gamma_set,
+};
+
+
 /**
  * Curretly it is assumed that the old framebuffer is reused.
  *
@@ -412,25 +107,10 @@ int intelfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
 }
 EXPORT_SYMBOL(intelfb_resize);
 
-static struct drm_mode_set kernelfb_mode;
-
-static int intelfb_panic(struct notifier_block *n, unsigned long ununsed,
-			 void *panic_str)
-{
-	DRM_ERROR("panic occurred, switching back to text console\n");
-
-	intelfb_restore();
-	return 0;
-}
-
-static struct notifier_block paniced = {
-	.notifier_call = intelfb_panic,
-};
-
 static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			  uint32_t fb_height, uint32_t surface_width,
 			  uint32_t surface_height,
-			  struct intel_framebuffer **intel_fb_p)
+			  struct drm_framebuffer **fb_p)
 {
 	struct fb_info *info;
 	struct intelfb_par *par;
@@ -479,7 +159,7 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	list_add(&fb->filp_head, &dev->mode_config.fb_kernel_list);
 
 	intel_fb = to_intel_framebuffer(fb);
-	*intel_fb_p = intel_fb;
+	*fb_p = fb;
 
 	info = framebuffer_alloc(sizeof(struct intelfb_par), device);
 	if (!info) {
@@ -489,21 +169,19 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 	par = info->par;
 
+	par->helper.funcs = &intel_fb_helper_funcs;
+	par->helper.dev = dev;
+	ret = drm_fb_helper_init_crtc_count(&par->helper, 2,
+					    INTELFB_CONN_LIMIT);
+	if (ret)
+		goto out_unref;
+
 	strcpy(info->fix.id, "inteldrmfb");
-	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = FB_VISUAL_TRUECOLOR;
-	info->fix.type_aux = 0;
-	info->fix.xpanstep = 1; /* doing it in hw */
-	info->fix.ypanstep = 1; /* doing it in hw */
-	info->fix.ywrapstep = 0;
-	info->fix.accel = FB_ACCEL_I830;
-	info->fix.type_aux = 0;
 
 	info->flags = FBINFO_DEFAULT;
 
 	info->fbops = &intelfb_ops;
 
-	info->fix.line_length = fb->pitch;
 
 	/* setup aperture base/size for vesafb takeover */
 	info->aperture_base = dev->mode_config.fb_base;
@@ -527,18 +205,8 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 
 //	memset(info->screen_base, 0, size);
 
-	info->pseudo_palette = fb->pseudo_palette;
-	info->var.xres_virtual = fb->width;
-	info->var.yres_virtual = fb->height;
-	info->var.bits_per_pixel = fb->bits_per_pixel;
-	info->var.xoffset = 0;
-	info->var.yoffset = 0;
-	info->var.activate = FB_ACTIVATE_NOW;
-	info->var.height = -1;
-	info->var.width = -1;
-
-	info->var.xres = fb_width;
-	info->var.yres = fb_height;
+	drm_fb_helper_fill_fix(info, fb->depth);
+	drm_fb_helper_fill_var(info, fb, fb_width, fb_height);
 
 	/* FIXME: we really shouldn't expose mmio space at all */
 	info->fix.mmio_start = pci_resource_start(dev->pdev, mmio_bar);
@@ -550,64 +218,9 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	info->pixmap.flags = FB_PIXMAP_SYSTEM;
 	info->pixmap.scan_align = 1;
 
-	switch(fb->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
-
 	fb->fbdev = info;
 
 	par->intel_fb = intel_fb;
-	par->dev = dev;
 
 	/* To allow resizeing without swapping buffers */
 	DRM_DEBUG("allocated %dx%d fb: 0x%08x, bo %p\n", intel_fb->base.width,
@@ -625,307 +238,12 @@ out:
 	return ret;
 }
 
-static int intelfb_multi_fb_probe_crtc(struct drm_device *dev, struct drm_crtc *crtc)
-{
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_framebuffer *intel_fb;
-	struct drm_framebuffer *fb;
-	struct drm_connector *connector;
-	struct fb_info *info;
-	struct intelfb_par *par;
-	struct drm_mode_set *modeset;
-	unsigned int width, height;
-	int new_fb = 0;
-	int ret, i, conn_count;
-
-	if (!drm_helper_crtc_in_use(crtc))
-		return 0;
-
-	if (!crtc->desired_mode)
-		return 0;
-
-	width = crtc->desired_mode->hdisplay;
-	height = crtc->desired_mode->vdisplay;
-
-	/* is there an fb bound to this crtc already */
-	if (!intel_crtc->mode_set.fb) {
-		ret = intelfb_create(dev, width, height, width, height, &intel_fb);
-		if (ret)
-			return -EINVAL;
-		new_fb = 1;
-	} else {
-		fb = intel_crtc->mode_set.fb;
-		intel_fb = to_intel_framebuffer(fb);
-		if ((intel_fb->base.width < width) || (intel_fb->base.height < height))
-			return -EINVAL;
-	}
-
-	info = intel_fb->base.fbdev;
-	par = info->par;
-
-	modeset = &intel_crtc->mode_set;
-	modeset->fb = &intel_fb->base;
-	conn_count = 0;
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder)
-			if (connector->encoder->crtc == modeset->crtc) {
-				modeset->connectors[conn_count] = connector;
-				conn_count++;
-				if (conn_count > INTELFB_CONN_LIMIT)
-					BUG();
-			}
-	}
-
-	for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
-		modeset->connectors[i] = NULL;
-
-	par->crtc_ids[0] = crtc->base.id;
-
-	modeset->num_connectors = conn_count;
-	if (modeset->crtc->desired_mode) {
-		if (modeset->mode)
-			drm_mode_destroy(dev, modeset->mode);
-		modeset->mode = drm_mode_duplicate(dev,
-						   modeset->crtc->desired_mode);
-	}
-
-	par->crtc_count = 1;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else
-		intelfb_set_par(info);
-
-	DRM_INFO("fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	kernelfb_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	DRM_DEBUG("registered panic notifier\n");
-
-	return 0;
-}
-
-static int intelfb_multi_fb_probe(struct drm_device *dev)
-{
-
-	struct drm_crtc *crtc;
-	int ret = 0;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		ret = intelfb_multi_fb_probe_crtc(dev, crtc);
-		if (ret)
-			return ret;
-	}
-	return ret;
-}
-
-static int intelfb_single_fb_probe(struct drm_device *dev)
-{
-	struct drm_crtc *crtc;
-	struct drm_connector *connector;
-	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
-	unsigned int surface_width = 0, surface_height = 0;
-	int new_fb = 0;
-	int crtc_count = 0;
-	int ret, i, conn_count = 0;
-	struct intel_framebuffer *intel_fb;
-	struct fb_info *info;
-	struct intelfb_par *par;
-	struct drm_mode_set *modeset = NULL;
-
-	DRM_DEBUG("\n");
-
-	/* Get a count of crtcs now in use and new min/maxes width/heights */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (!drm_helper_crtc_in_use(crtc))
-			continue;
-
-		crtc_count++;
-		if (!crtc->desired_mode)
-			continue;
-
-		/* Smallest mode determines console size... */
-		if (crtc->desired_mode->hdisplay < fb_width)
-			fb_width = crtc->desired_mode->hdisplay;
-
-		if (crtc->desired_mode->vdisplay < fb_height)
-			fb_height = crtc->desired_mode->vdisplay;
-
-		/* ... but largest for memory allocation dimensions */
-		if (crtc->desired_mode->hdisplay > surface_width)
-			surface_width = crtc->desired_mode->hdisplay;
-
-		if (crtc->desired_mode->vdisplay > surface_height)
-			surface_height = crtc->desired_mode->vdisplay;
-	}
-
-	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
-		/* hmm everyone went away - assume VGA cable just fell out
-		   and will come back later. */
-		DRM_DEBUG("no CRTCs available?\n");
-		return 0;
-	}
-
-//fail
-	/* Find the fb for our new config */
-	if (list_empty(&dev->mode_config.fb_kernel_list)) {
-		DRM_DEBUG("creating new fb (console size %dx%d, "
-			  "buffer size %dx%d)\n", fb_width, fb_height,
-			  surface_width, surface_height);
-		ret = intelfb_create(dev, fb_width, fb_height, surface_width,
-				     surface_height, &intel_fb);
-		if (ret)
-			return -EINVAL;
-		new_fb = 1;
-	} else {
-		struct drm_framebuffer *fb;
-
-		fb = list_first_entry(&dev->mode_config.fb_kernel_list,
-				      struct drm_framebuffer, filp_head);
-		intel_fb = to_intel_framebuffer(fb);
-
-		/* if someone hotplugs something bigger than we have already
-		 * allocated, we are pwned.  As really we can't resize an
-		 * fbdev that is in the wild currently due to fbdev not really
-		 * being designed for the lower layers moving stuff around
-		 * under it.
-		 * - so in the grand style of things - punt.
-		 */
-		if ((fb->width < surface_width) ||
-		    (fb->height < surface_height)) {
-			DRM_ERROR("fb not large enough for console\n");
-			return -EINVAL;
-		}
-	}
-// fail
-
-	info = intel_fb->base.fbdev;
-	par = info->par;
-
-	crtc_count = 0;
-	/*
-	 * For each CRTC, set up the connector list for the CRTC's mode
-	 * set configuration.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-
-		modeset = &intel_crtc->mode_set;
-		modeset->fb = &intel_fb->base;
-		conn_count = 0;
-		list_for_each_entry(connector, &dev->mode_config.connector_list,
-				    head) {
-			if (!connector->encoder)
-				continue;
-
-			if(connector->encoder->crtc == modeset->crtc) {
-				modeset->connectors[conn_count++] = connector;
-				if (conn_count > INTELFB_CONN_LIMIT)
-					BUG();
-			}
-		}
-
-		/* Zero out remaining connector pointers */
-		for (i = conn_count; i < INTELFB_CONN_LIMIT; i++)
-			modeset->connectors[i] = NULL;
-
-		par->crtc_ids[crtc_count++] = crtc->base.id;
-
-		modeset->num_connectors = conn_count;
-		if (modeset->crtc->desired_mode) {
-			if (modeset->mode)
-				drm_mode_destroy(dev, modeset->mode);
-			modeset->mode = drm_mode_duplicate(dev,
-							   modeset->crtc->desired_mode);
-		}
-	}
-	par->crtc_count = crtc_count;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else
-		intelfb_set_par(info);
-
-	DRM_INFO("fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	kernelfb_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	DRM_DEBUG("registered panic notifier\n");
-
-	return 0;
-}
-
-/**
- * intelfb_restore - restore the framebuffer console (kernel) config
- *
- * Restore's the kernel's fbcon mode, used for lastclose & panic paths.
- */
-void intelfb_restore(void)
-{
-	int ret;
-	if ((ret = drm_crtc_helper_set_config(&kernelfb_mode)) != 0) {
-		DRM_ERROR("Failed to restore crtc configuration: %d\n",
-			  ret);
-	}
-}
-
-static void intelfb_restore_work_fn(struct work_struct *ignored)
-{
-	intelfb_restore();
-}
-static DECLARE_WORK(intelfb_restore_work, intelfb_restore_work_fn);
-
-static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3)
-{
-        schedule_work(&intelfb_restore_work);
-}
-
-static struct sysrq_key_op sysrq_intelfb_restore_op = {
-        .handler = intelfb_sysrq,
-        .help_msg = "force-fb(V)",
-        .action_msg = "Restore framebuffer console",
-};
-
 int intelfb_probe(struct drm_device *dev)
 {
 	int ret;
 
 	DRM_DEBUG("\n");
-
-	/* something has changed in the lower levels of hell - deal with it
-	   here */
-
-	/* two modes : a) 1 fb to rule all crtcs.
-	               b) one fb per crtc.
-	   two actions 1) new connected device
-	               2) device removed.
-	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
-	              if the fb size isn't big enough - resize fb into surface.
-		      if everything big enough configure the new crtc/etc.
-	   case a/2 : undo the configuration
-	              possibly resize down the fb to fit the new configuration.
-           case b/1 : see if it is on a new crtc - setup a new fb and add it.
-	   case b/2 : teardown the new fb.
-	*/
-
-	/* mode a first */
-	/* search for an fb */
-	if (i915_fbpercrtc == 1) {
-		ret = intelfb_multi_fb_probe(dev);
-	} else {
-		ret = intelfb_single_fb_probe(dev);
-	}
-
-	register_sysrq_key('v', &sysrq_intelfb_restore_op);
-
+	ret = drm_fb_helper_single_fb_probe(dev, intelfb_create);
 	return ret;
 }
 EXPORT_SYMBOL(intelfb_probe);
@@ -940,13 +258,14 @@ int intelfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 	info = fb->fbdev;
 
 	if (info) {
+		struct intelfb_par *par = info->par;
 		unregister_framebuffer(info);
 		iounmap(info->screen_base);
+		if (info->par)
+			drm_fb_helper_free(&par->helper);
 		framebuffer_release(info);
 	}
 
-	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
-	memset(&kernelfb_mode, 0, sizeof(struct drm_mode_set));
 	return 0;
 }
 EXPORT_SYMBOL(intelfb_remove);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index a8fa1bb84cf7..af035605d147 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -158,9 +158,6 @@ static void radeon_crtc_destroy(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 
-	if (radeon_crtc->mode_set.mode) {
-		drm_mode_destroy(crtc->dev, radeon_crtc->mode_set.mode);
-	}
 	drm_crtc_cleanup(crtc);
 	kfree(radeon_crtc);
 }
@@ -189,9 +186,11 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 	radeon_crtc->crtc_id = index;
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
+#if 0
 	radeon_crtc->mode_set.crtc = &radeon_crtc->base;
 	radeon_crtc->mode_set.connectors = (struct drm_connector **)(radeon_crtc + 1);
 	radeon_crtc->mode_set.num_connectors = 0;
+#endif
 
 	for (i = 0; i < 256; i++) {
 		radeon_crtc->lut_r[i] = i << 2;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index ec383edf5f38..ebb58959f418 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -28,15 +28,7 @@
      */
 
 #include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/tty.h>
-#include <linux/slab.h>
-#include <linux/delay.h>
 #include <linux/fb.h>
-#include <linux/init.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -45,375 +37,86 @@
 #include "radeon_drm.h"
 #include "radeon.h"
 
+#include "drm_fb_helper.h"
+
 struct radeon_fb_device {
-	struct radeon_device		*rdev;
-	struct drm_display_mode		*mode;
+	struct drm_fb_helper helper;
 	struct radeon_framebuffer	*rfb;
-	int				crtc_count;
-	/* crtc currently bound to this */
-	uint32_t			crtc_ids[2];
+	struct radeon_device		*rdev;
 };
 
-static int radeonfb_setcolreg(unsigned regno,
-			      unsigned red,
-			      unsigned green,
-			      unsigned blue,
-			      unsigned transp,
-			      struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-		struct drm_mode_set *modeset = &radeon_crtc->mode_set;
-		struct drm_framebuffer *fb = modeset->fb;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-		if (regno > 255) {
-			return 1;
-		}
-		if (fb->depth == 8) {
-			radeon_crtc_fb_gamma_set(crtc, red, green, blue, regno);
-			return 0;
-		}
-
-		if (regno < 16) {
-			switch (fb->depth) {
-			case 15:
-				fb->pseudo_palette[regno] = ((red & 0xf800) >> 1) |
-					((green & 0xf800) >>  6) |
-					((blue & 0xf800) >> 11);
-				break;
-			case 16:
-				fb->pseudo_palette[regno] = (red & 0xf800) |
-					((green & 0xfc00) >>  5) |
-					((blue  & 0xf800) >> 11);
-				break;
-			case 24:
-			case 32:
-				fb->pseudo_palette[regno] =
-					(((red >> 8) & 0xff) << info->var.red.offset) |
-					(((green >> 8) & 0xff) << info->var.green.offset) |
-					(((blue >> 8) & 0xff) << info->var.blue.offset);
-				break;
-			}
-		}
-	}
-	return 0;
-}
-
-static int radeonfb_check_var(struct fb_var_screeninfo *var,
-			      struct fb_info *info)
+static int radeon_fb_check_var(struct fb_var_screeninfo *var,
+			       struct fb_info *info)
 {
-	struct radeon_fb_device *rfbdev = info->par;
-	struct radeon_framebuffer *rfb = rfbdev->rfb;
-	struct drm_framebuffer *fb = &rfb->base;
-	int depth;
-
-	if (var->pixclock == -1 || !var->pixclock) {
-		return -EINVAL;
-	}
-	/* Need to resize the fb object !!! */
-	if (var->xres > fb->width || var->yres > fb->height) {
-		DRM_ERROR("Requested width/height is greater than current fb "
-			   "object %dx%d > %dx%d\n", var->xres, var->yres,
-			   fb->width, fb->height);
-		DRM_ERROR("Need resizing code.\n");
-		return -EINVAL;
-	}
-
-	switch (var->bits_per_pixel) {
-	case 16:
-		depth = (var->green.length == 6) ? 16 : 15;
-		break;
-	case 32:
-		depth = (var->transp.length > 0) ? 32 : 24;
-		break;
-	default:
-		depth = var->bits_per_pixel;
-		break;
-	}
-
-	switch (depth) {
-	case 8:
-		var->red.offset = 0;
-		var->green.offset = 0;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-#ifdef __LITTLE_ENDIAN
-	case 15:
-		var->red.offset = 10;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 5;
-		var->blue.length = 5;
-		var->transp.length = 1;
-		var->transp.offset = 15;
-		break;
-	case 16:
-		var->red.offset = 11;
-		var->green.offset = 5;
-		var->blue.offset = 0;
-		var->red.length = 5;
-		var->green.length = 6;
-		var->blue.length = 5;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 24:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 16;
-		var->green.offset = 8;
-		var->blue.offset = 0;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 24;
-		break;
-#else
-	case 24:
-		var->red.offset = 8;
-		var->green.offset = 16;
-		var->blue.offset = 24;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 0;
-		var->transp.offset = 0;
-		break;
-	case 32:
-		var->red.offset = 8;
-		var->green.offset = 16;
-		var->blue.offset = 24;
-		var->red.length = 8;
-		var->green.length = 8;
-		var->blue.length = 8;
-		var->transp.length = 8;
-		var->transp.offset = 0;
-		break;
-#endif
-	default:
-		return -EINVAL;
-	}
-	return 0;
-}
-
-/* this will let fbcon do the mode init */
-static int radeonfb_set_par(struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct fb_var_screeninfo *var = &info->var;
-	struct drm_crtc *crtc;
 	int ret;
-	int i;
-
-	if (var->pixclock != -1) {
-		DRM_ERROR("PIXEL CLCOK SET\n");
-		return -EINVAL;
-	}
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-		if (crtc->fb == radeon_crtc->mode_set.fb) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(&radeon_crtc->mode_set);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (ret) {
-				return ret;
-			}
-		}
-	}
-	return 0;
-}
-
-static int radeonfb_pan_display(struct fb_var_screeninfo *var,
-				struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_mode_set *modeset;
-	struct drm_crtc *crtc;
-	struct radeon_crtc *radeon_crtc;
-	int ret = 0;
-	int i;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		if (i == rfbdev->crtc_count) {
-			continue;
-		}
-
-		radeon_crtc = to_radeon_crtc(crtc);
-		modeset = &radeon_crtc->mode_set;
-
-		modeset->x = var->xoffset;
-		modeset->y = var->yoffset;
-
-		if (modeset->num_connectors) {
-			mutex_lock(&dev->mode_config.mutex);
-			ret = crtc->funcs->set_config(modeset);
-			mutex_unlock(&dev->mode_config.mutex);
-			if (!ret) {
-				info->var.xoffset = var->xoffset;
-				info->var.yoffset = var->yoffset;
-			}
+	ret = drm_fb_helper_check_var(var, info);
+	if (ret)
+		return ret;
+
+	/* big endian override for radeon endian workaround */
+#ifdef __BIG_ENDIAN
+	{
+		int depth;
+		switch (var->bits_per_pixel) {
+		case 16:
+			depth = (var->green.length == 6) ? 16 : 15;
+			break;
+		case 32:
+			depth = (var->transp.length > 0) ? 32 : 24;
+			break;
+		default:
+			depth = var->bits_per_pixel;
+			break;
 		}
-	}
-	return ret;
-}
-
-static void radeonfb_on(struct fb_info *info)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		mutex_lock(&dev->mode_config.mutex);
-		crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-		mutex_unlock(&dev->mode_config.mutex);
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-
-				encoder_funcs = encoder->helper_private;
-				mutex_lock(&dev->mode_config.mutex);
-				encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
-				mutex_unlock(&dev->mode_config.mutex);
-			}
-		}
-	}
-}
-
-static void radeonfb_off(struct fb_info *info, int dpms_mode)
-{
-	struct radeon_fb_device *rfbdev = info->par;
-	struct drm_device *dev = rfbdev->rdev->ddev;
-	struct drm_crtc *crtc;
-	struct drm_encoder *encoder;
-	int i;
-
-	/*
-	 * For each CRTC in this fb, find all associated encoders
-	 * and turn them off, then turn off the CRTC.
-	 */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-		for (i = 0; i < rfbdev->crtc_count; i++) {
-			if (crtc->base.id == rfbdev->crtc_ids[i]) {
-				break;
-			}
-		}
-
-		/* Found a CRTC on this fb, now find encoders */
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-			if (encoder->crtc == crtc) {
-				struct drm_encoder_helper_funcs *encoder_funcs;
-
-				encoder_funcs = encoder->helper_private;
-				mutex_lock(&dev->mode_config.mutex);
-				encoder_funcs->dpms(encoder, dpms_mode);
-				mutex_unlock(&dev->mode_config.mutex);
-			}
-		}
-		if (dpms_mode == DRM_MODE_DPMS_OFF) {
-			mutex_lock(&dev->mode_config.mutex);
-			crtc_funcs->dpms(crtc, dpms_mode);
-			mutex_unlock(&dev->mode_config.mutex);
+		switch (depth) {
+		case 8:
+			var->red.offset = 0;
+			var->green.offset = 0;
+			var->blue.offset = 0;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 0;
+			var->transp.offset = 0;
+			break;
+		case 24:
+			var->red.offset = 8;
+			var->green.offset = 16;
+			var->blue.offset = 24;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 0;
+			var->transp.offset = 0;
+			break;
+		case 32:
+			var->red.offset = 8;
+			var->green.offset = 16;
+			var->blue.offset = 24;
+			var->red.length = 8;
+			var->green.length = 8;
+			var->blue.length = 8;
+			var->transp.length = 8;
+			var->transp.offset = 0;
+			break;
+		default:
+			return -EINVAL;
 		}
 	}
-}
-
-int radeonfb_blank(int blank, struct fb_info *info)
-{
-	switch (blank) {
-	case FB_BLANK_UNBLANK:
-		radeonfb_on(info);
-		break;
-	case FB_BLANK_NORMAL:
-		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_HSYNC_SUSPEND:
-		radeonfb_off(info, DRM_MODE_DPMS_STANDBY);
-		break;
-	case FB_BLANK_VSYNC_SUSPEND:
-		radeonfb_off(info, DRM_MODE_DPMS_SUSPEND);
-		break;
-	case FB_BLANK_POWERDOWN:
-		radeonfb_off(info, DRM_MODE_DPMS_OFF);
-		break;
-	}
+#endif
 	return 0;
 }
 
 static struct fb_ops radeonfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = radeonfb_check_var,
-	.fb_set_par = radeonfb_set_par,
-	.fb_setcolreg = radeonfb_setcolreg,
+	.fb_check_var = radeon_fb_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
 	.fb_copyarea = cfb_copyarea,
 	.fb_imageblit = cfb_imageblit,
-	.fb_pan_display = radeonfb_pan_display,
-	.fb_blank = radeonfb_blank,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
 };
 
 /**
@@ -456,21 +159,6 @@ int radeonfb_resize(struct drm_device *dev, struct drm_crtc *crtc)
 }
 EXPORT_SYMBOL(radeonfb_resize);
 
-static struct drm_mode_set panic_mode;
-
-int radeonfb_panic(struct notifier_block *n, unsigned long ununsed,
-		  void *panic_str)
-{
-	DRM_ERROR("panic occurred, switching back to text console\n");
-	drm_crtc_helper_set_config(&panic_mode);
-	return 0;
-}
-EXPORT_SYMBOL(radeonfb_panic);
-
-static struct notifier_block paniced = {
-	.notifier_call = radeonfb_panic,
-};
-
 static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bool tiled)
 {
 	int aligned = width;
@@ -495,11 +183,16 @@ static int radeon_align_pitch(struct radeon_device *rdev, int width, int bpp, bo
 	return aligned;
 }
 
-int radeonfb_create(struct radeon_device *rdev,
+static struct drm_fb_helper_funcs radeon_fb_helper_funcs = {
+	.gamma_set = radeon_crtc_fb_gamma_set,
+};
+
+int radeonfb_create(struct drm_device *dev,
 		    uint32_t fb_width, uint32_t fb_height,
 		    uint32_t surface_width, uint32_t surface_height,
-		    struct radeon_framebuffer **rfb_p)
+		    struct drm_framebuffer **fb_p)
 {
+	struct radeon_device *rdev = dev->dev_private;
 	struct fb_info *info;
 	struct radeon_fb_device *rfbdev;
 	struct drm_framebuffer *fb = NULL;
@@ -554,8 +247,8 @@ int radeonfb_create(struct radeon_device *rdev,
 
 	list_add(&fb->filp_head, &rdev->ddev->mode_config.fb_kernel_list);
 
+	*fb_p = fb;
 	rfb = to_radeon_framebuffer(fb);
-	*rfb_p = rfb;
 	rdev->fbdev_rfb = rfb;
 	rdev->fbdev_robj = robj;
 
@@ -564,7 +257,14 @@ int radeonfb_create(struct radeon_device *rdev,
 		ret = -ENOMEM;
 		goto out_unref;
 	}
+
 	rfbdev = info->par;
+	rfbdev->helper.funcs = &radeon_fb_helper_funcs;
+	rfbdev->helper.dev = dev;
+	ret = drm_fb_helper_init_crtc_count(&rfbdev->helper, 2,
+					    RADEONFB_CONN_LIMIT);
+	if (ret)
+		goto out_unref;
 
 	if (fb_tiled)
 		radeon_object_check_tiling(robj, 0, 0);
@@ -577,33 +277,19 @@ int radeonfb_create(struct radeon_device *rdev,
 	memset_io(fbptr, 0, aligned_size);
 
 	strcpy(info->fix.id, "radeondrmfb");
-	info->fix.type = FB_TYPE_PACKED_PIXELS;
-	info->fix.visual = FB_VISUAL_TRUECOLOR;
-	info->fix.type_aux = 0;
-	info->fix.xpanstep = 1; /* doing it in hw */
-	info->fix.ypanstep = 1; /* doing it in hw */
-	info->fix.ywrapstep = 0;
-	info->fix.accel = FB_ACCEL_NONE;
-	info->fix.type_aux = 0;
+
+	drm_fb_helper_fill_fix(info, fb->pitch);
+
 	info->flags = FBINFO_DEFAULT;
 	info->fbops = &radeonfb_ops;
-	info->fix.line_length = fb->pitch;
+
 	tmp = fb_gpuaddr - rdev->mc.vram_location;
 	info->fix.smem_start = rdev->mc.aper_base + tmp;
 	info->fix.smem_len = size;
 	info->screen_base = fbptr;
 	info->screen_size = size;
-	info->pseudo_palette = fb->pseudo_palette;
-	info->var.xres_virtual = fb->width;
-	info->var.yres_virtual = fb->height;
-	info->var.bits_per_pixel = fb->bits_per_pixel;
-	info->var.xoffset = 0;
-	info->var.yoffset = 0;
-	info->var.activate = FB_ACTIVATE_NOW;
-	info->var.height = -1;
-	info->var.width = -1;
-	info->var.xres = fb_width;
-	info->var.yres = fb_height;
+
+	drm_fb_helper_fill_var(info, fb, fb_width, fb_height);
 
 	/* setup aperture base/size for vesafb takeover */
 	info->aperture_base = rdev->ddev->mode_config.fb_base;
@@ -626,6 +312,9 @@ int radeonfb_create(struct radeon_device *rdev,
 	DRM_INFO("fb depth is %d\n", fb->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitch);
 
+#ifdef __BIG_ENDIAN
+	/* fill var sets defaults for this stuff - override
+	   on big endian */
 	switch (fb->depth) {
 	case 8:
 		info->var.red.offset = 0;
@@ -637,47 +326,6 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.offset = 0;
 		info->var.transp.length = 0;
 		break;
-#ifdef __LITTLE_ENDIAN
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-#else
 	case 24:
 		info->var.red.offset = 8;
 		info->var.green.offset = 16;
@@ -699,9 +347,9 @@ int radeonfb_create(struct radeon_device *rdev,
 		info->var.transp.length = 8;
 		break;
 	default:
-#endif
 		break;
 	}
+#endif
 
 	fb->fbdev = info;
 	rfbdev->rfb = rfb;
@@ -726,145 +374,10 @@ out:
 	return ret;
 }
 
-static int radeonfb_single_fb_probe(struct radeon_device *rdev)
-{
-	struct drm_crtc *crtc;
-	struct drm_connector *connector;
-	unsigned int fb_width = (unsigned)-1, fb_height = (unsigned)-1;
-	unsigned int surface_width = 0, surface_height = 0;
-	int new_fb = 0;
-	int crtc_count = 0;
-	int ret, i, conn_count = 0;
-	struct radeon_framebuffer *rfb;
-	struct fb_info *info;
-	struct radeon_fb_device *rfbdev;
-	struct drm_mode_set *modeset = NULL;
-
-	/* first up get a count of crtcs now in use and new min/maxes width/heights */
-	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
-		if (drm_helper_crtc_in_use(crtc)) {
-			if (crtc->desired_mode) {
-				if (crtc->desired_mode->hdisplay < fb_width)
-					fb_width = crtc->desired_mode->hdisplay;
-
-				if (crtc->desired_mode->vdisplay < fb_height)
-					fb_height = crtc->desired_mode->vdisplay;
-
-				if (crtc->desired_mode->hdisplay > surface_width)
-					surface_width = crtc->desired_mode->hdisplay;
-
-				if (crtc->desired_mode->vdisplay > surface_height)
-					surface_height = crtc->desired_mode->vdisplay;
-			}
-			crtc_count++;
-		}
-	}
-
-	if (crtc_count == 0 || fb_width == -1 || fb_height == -1) {
-		/* hmm everyone went away - assume VGA cable just fell out
-		   and will come back later. */
-		return 0;
-	}
-
-	/* do we have an fb already? */
-	if (list_empty(&rdev->ddev->mode_config.fb_kernel_list)) {
-		/* create an fb if we don't have one */
-		ret = radeonfb_create(rdev, fb_width, fb_height, surface_width, surface_height, &rfb);
-		if (ret) {
-			return -EINVAL;
-		}
-		new_fb = 1;
-	} else {
-		struct drm_framebuffer *fb;
-		fb = list_first_entry(&rdev->ddev->mode_config.fb_kernel_list, struct drm_framebuffer, filp_head);
-		rfb = to_radeon_framebuffer(fb);
-
-		/* if someone hotplugs something bigger than we have already allocated, we are pwned.
-		   As really we can't resize an fbdev that is in the wild currently due to fbdev
-		   not really being designed for the lower layers moving stuff around under it.
-		   - so in the grand style of things - punt. */
-		if ((fb->width < surface_width) || (fb->height < surface_height)) {
-			DRM_ERROR("Framebuffer not large enough to scale console onto.\n");
-			return -EINVAL;
-		}
-	}
-
-	info = rfb->base.fbdev;
-	rdev->fbdev_info = info;
-	rfbdev = info->par;
-
-	crtc_count = 0;
-	/* okay we need to setup new connector sets in the crtcs */
-	list_for_each_entry(crtc, &rdev->ddev->mode_config.crtc_list, head) {
-		struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-		modeset = &radeon_crtc->mode_set;
-		modeset->fb = &rfb->base;
-		conn_count = 0;
-		list_for_each_entry(connector, &rdev->ddev->mode_config.connector_list, head) {
-			if (connector->encoder)
-				if (connector->encoder->crtc == modeset->crtc) {
-					modeset->connectors[conn_count] = connector;
-					conn_count++;
-					if (conn_count > RADEONFB_CONN_LIMIT)
-						BUG();
-				}
-		}
-
-		for (i = conn_count; i < RADEONFB_CONN_LIMIT; i++)
-			modeset->connectors[i] = NULL;
-
-
-		rfbdev->crtc_ids[crtc_count++] = crtc->base.id;
-
-		modeset->num_connectors = conn_count;
-		if (modeset->crtc->desired_mode) {
-			if (modeset->mode) {
-				drm_mode_destroy(rdev->ddev, modeset->mode);
-			}
-			modeset->mode = drm_mode_duplicate(rdev->ddev,
-							   modeset->crtc->desired_mode);
-		}
-	}
-	rfbdev->crtc_count = crtc_count;
-
-	if (new_fb) {
-		info->var.pixclock = -1;
-		if (register_framebuffer(info) < 0)
-			return -EINVAL;
-	} else {
-		radeonfb_set_par(info);
-	}
-	printk(KERN_INFO "fb%d: %s frame buffer device\n", info->node,
-	       info->fix.id);
-
-	/* Switch back to kernel console on panic */
-	panic_mode = *modeset;
-	atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-	printk(KERN_INFO "registered panic notifier\n");
-
-	return 0;
-}
-
 int radeonfb_probe(struct drm_device *dev)
 {
 	int ret;
-
-	/* something has changed in the lower levels of hell - deal with it
-	   here */
-
-	/* two modes : a) 1 fb to rule all crtcs.
-	               b) one fb per crtc.
-	   two actions 1) new connected device
-	               2) device removed.
-	   case a/1 : if the fb surface isn't big enough - resize the surface fb.
-	              if the fb size isn't big enough - resize fb into surface.
-		      if everything big enough configure the new crtc/etc.
-	   case a/2 : undo the configuration
-	              possibly resize down the fb to fit the new configuration.
-           case b/1 : see if it is on a new crtc - setup a new fb and add it.
-	   case b/2 : teardown the new fb.
-	*/
-	ret = radeonfb_single_fb_probe(dev->dev_private);
+	ret = drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
 	return ret;
 }
 EXPORT_SYMBOL(radeonfb_probe);
@@ -880,16 +393,17 @@ int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 	}
 	info = fb->fbdev;
 	if (info) {
+		struct radeon_fb_device *rfbdev = info->par;
 		robj = rfb->obj->driver_private;
 		unregister_framebuffer(info);
 		radeon_object_kunmap(robj);
 		radeon_object_unpin(robj);
+		drm_fb_helper_free(&rfbdev->helper);
 		framebuffer_release(info);
 	}
 
 	printk(KERN_INFO "unregistered panic notifier\n");
-	atomic_notifier_chain_unregister(&panic_notifier_list, &paniced);
-	memset(&panic_mode, 0, sizeof(struct drm_mode_set));
+
 	return 0;
 }
 EXPORT_SYMBOL(radeonfb_remove);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 3b09a1f2d8f9..20e9509a7130 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -195,8 +195,6 @@ struct radeon_crtc {
 	bool enabled;
 	bool can_tile;
 	uint32_t crtc_offset;
-	struct radeon_framebuffer *fbdev_fb;
-	struct drm_mode_set mode_set;
 	struct drm_gem_object *cursor_bo;
 	uint64_t cursor_addr;
 	int cursor_width;
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index db92a83f8ca9..b0427a70fcbd 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -259,6 +259,8 @@ struct drm_framebuffer {
 	void *fbdev;
 	u32 pseudo_palette[17];
 	struct list_head filp_head;
+	/* if you are using the helper */
+	void *helper_private;
 };
 
 struct drm_property_blob {
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
new file mode 100644
index 000000000000..88fffbdfa26f
--- /dev/null
+++ b/include/drm/drm_fb_helper.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2006-2009 Red Hat Inc.
+ * Copyright (c) 2006-2008 Intel Corporation
+ * Copyright (c) 2007 Dave Airlie <airlied@linux.ie>
+ *
+ * DRM framebuffer helper functions
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ *
+ * Authors:
+ *      Dave Airlie <airlied@linux.ie>
+ *      Jesse Barnes <jesse.barnes@intel.com>
+ */
+#ifndef DRM_FB_HELPER_H
+#define DRM_FB_HELPER_H
+
+struct drm_fb_helper_crtc {
+	uint32_t crtc_id;
+	struct drm_mode_set mode_set;
+};
+
+struct drm_fb_helper_funcs {
+	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
+			  u16 blue, int regno);
+};
+
+struct drm_fb_helper {
+	struct drm_framebuffer *fb;
+	struct drm_device *dev;
+	struct drm_display_mode *mode;
+	int crtc_count;
+	struct drm_fb_helper_crtc *crtc_info;
+	struct drm_fb_helper_funcs *funcs;
+	int conn_limit;
+	struct list_head kernel_fb_list;
+};
+
+int drm_fb_helper_single_fb_probe(struct drm_device *dev,
+				  int (*fb_create)(struct drm_device *dev,
+						   uint32_t fb_width,
+						   uint32_t fb_height,
+						   uint32_t surface_width,
+						   uint32_t surface_height,
+						   struct drm_framebuffer **fb_ptr));
+int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count,
+				  int max_conn);
+void drm_fb_helper_free(struct drm_fb_helper *helper);
+int drm_fb_helper_blank(int blank, struct fb_info *info);
+int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info);
+int drm_fb_helper_set_par(struct fb_info *info);
+int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
+			    struct fb_info *info);
+int drm_fb_helper_setcolreg(unsigned regno,
+			    unsigned red,
+			    unsigned green,
+			    unsigned blue,
+			    unsigned transp,
+			    struct fb_info *info);
+
+void drm_fb_helper_restore(void);
+void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
+			    uint32_t fb_width, uint32_t fb_height);
+void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch);
+
+#endif
-- 
cgit v1.2.3


From 7b3d3e4fc685a7d7e0b4c207ce24dfbab5689eb0 Mon Sep 17 00:00:00 2001
From: Krishna Kumar <krkumar2@in.ibm.com>
Date: Sat, 29 Aug 2009 20:21:21 +0000
Subject: netdevice: Consolidate to use existing macros where available.

Patch compiled and 32 simultaneous netperf testing ran fine.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9192cdf5bd2c..60d3aac49ed4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1257,7 +1257,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 {
 #ifdef CONFIG_NETPOLL_TRAP
 	if (netpoll_trap()) {
-		clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+		netif_tx_start_queue(dev_queue);
 		return;
 	}
 #endif
@@ -1363,7 +1363,8 @@ static inline int netif_running(const struct net_device *dev)
 static inline void netif_start_subqueue(struct net_device *dev, u16 queue_index)
 {
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-	clear_bit(__QUEUE_STATE_XOFF, &txq->state);
+
+	netif_tx_start_queue(txq);
 }
 
 /**
@@ -1380,7 +1381,7 @@ static inline void netif_stop_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	set_bit(__QUEUE_STATE_XOFF, &txq->state);
+	netif_tx_stop_queue(txq);
 }
 
 /**
@@ -1394,7 +1395,8 @@ static inline int __netif_subqueue_stopped(const struct net_device *dev,
 					 u16 queue_index)
 {
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-	return test_bit(__QUEUE_STATE_XOFF, &txq->state);
+
+	return netif_tx_queue_stopped(txq);
 }
 
 static inline int netif_subqueue_stopped(const struct net_device *dev,
@@ -1746,8 +1748,7 @@ static inline void netif_tx_unlock(struct net_device *dev)
 		 * force a schedule.
 		 */
 		clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
-		if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
-			__netif_schedule(txq->qdisc);
+		netif_schedule_queue(txq);
 	}
 	spin_unlock(&dev->tx_global_lock);
 }
-- 
cgit v1.2.3


From 8e254c1d183f0225ad21f9049641529e56cce4da Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Mon, 31 Aug 2009 16:49:41 +0800
Subject: tracing/filters: Defer pred allocation

init_preds() allocates about 5392 bytes of memory (on x86_32) for
a TRACE_EVENT. With my config, at system boot total memory occupied
is:

	5392 * (642 + 15) == 3459KB

642 == cat available_events | wc -l
15 == number of dirs in events/ftrace

That's quite a lot, so we'd better defer memory allocation util
it's needed, that's when filter is used.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
LKML-Reference: <4A9B8EA5.6020700@cn.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  1 -
 include/linux/syscalls.h           |  2 --
 include/trace/ftrace.h             |  1 -
 kernel/trace/trace_events_filter.c | 50 +++++++++++++++++++++++++++++++-------
 kernel/trace/trace_export.c        |  1 -
 5 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index ace2da9e0a0d..755480484eb6 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -133,7 +133,6 @@ struct ftrace_event_call {
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	128
 
-extern int init_preds(struct ftrace_event_call *call);
 extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
 extern int filter_current_check_discard(struct ftrace_event_call *call,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index f124c8995555..a8e37821cc60 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -177,7 +177,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		event_enter_##sname.id = id;				\
 		set_syscall_enter_id(num, id);				\
 		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
-		init_preds(&event_enter_##sname);			\
 		return 0;						\
 	}								\
 	TRACE_SYS_ENTER_PROFILE(sname);					\
@@ -214,7 +213,6 @@ static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
 		event_exit_##sname.id = id;				\
 		set_syscall_exit_id(num, id);				\
 		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
-		init_preds(&event_exit_##sname);			\
 		return 0;						\
 	}								\
 	TRACE_SYS_EXIT_PROFILE(sname);					\
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 57c56a998ee6..bfbc842600a1 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -622,7 +622,6 @@ static int ftrace_raw_init_event_##call(void)				\
 		return -ENODEV;						\
 	event_##call.id = id;						\
 	INIT_LIST_HEAD(&event_##call.fields);				\
-	init_preds(&event_##call);					\
 	return 0;							\
 }									\
 									\
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 9f03082c81d8..c6b2edfb7fe9 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -309,7 +309,7 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 	struct event_filter *filter = call->filter;
 
 	mutex_lock(&event_mutex);
-	if (filter->filter_string)
+	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
 		trace_seq_printf(s, "none\n");
@@ -322,7 +322,7 @@ void print_subsystem_event_filter(struct event_subsystem *system,
 	struct event_filter *filter = system->filter;
 
 	mutex_lock(&event_mutex);
-	if (filter->filter_string)
+	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
 		trace_seq_printf(s, "none\n");
@@ -390,6 +390,9 @@ void destroy_preds(struct ftrace_event_call *call)
 	struct event_filter *filter = call->filter;
 	int i;
 
+	if (!filter)
+		return;
+
 	for (i = 0; i < MAX_FILTER_PRED; i++) {
 		if (filter->preds[i])
 			filter_free_pred(filter->preds[i]);
@@ -400,7 +403,7 @@ void destroy_preds(struct ftrace_event_call *call)
 	call->filter = NULL;
 }
 
-int init_preds(struct ftrace_event_call *call)
+static int init_preds(struct ftrace_event_call *call)
 {
 	struct event_filter *filter;
 	struct filter_pred *pred;
@@ -410,7 +413,6 @@ int init_preds(struct ftrace_event_call *call)
 	if (!call->filter)
 		return -ENOMEM;
 
-	call->filter_active = 0;
 	filter->n_preds = 0;
 
 	filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
@@ -432,7 +434,28 @@ oom:
 
 	return -ENOMEM;
 }
-EXPORT_SYMBOL_GPL(init_preds);
+
+static int init_subsystem_preds(struct event_subsystem *system)
+{
+	struct ftrace_event_call *call;
+	int err;
+
+	list_for_each_entry(call, &ftrace_events, list) {
+		if (!call->define_fields)
+			continue;
+
+		if (strcmp(call->system, system->name) != 0)
+			continue;
+
+		if (!call->filter) {
+			err = init_preds(call);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
 
 enum {
 	FILTER_DISABLE_ALL,
@@ -449,6 +472,9 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
 		if (!call->define_fields)
 			continue;
 
+		if (strcmp(call->system, system->name) != 0)
+			continue;
+
 		if (flag == FILTER_INIT_NO_RESET) {
 			call->filter->no_reset = false;
 			continue;
@@ -457,10 +483,8 @@ static void filter_free_subsystem_preds(struct event_subsystem *system,
 		if (flag == FILTER_SKIP_NO_RESET && call->filter->no_reset)
 			continue;
 
-		if (!strcmp(call->system, system->name)) {
-			filter_disable_preds(call);
-			remove_filter_string(call->filter);
-		}
+		filter_disable_preds(call);
+		remove_filter_string(call->filter);
 	}
 }
 
@@ -1094,6 +1118,10 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 
 	mutex_lock(&event_mutex);
 
+	err = init_preds(call);
+	if (err)
+		goto out_unlock;
+
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_disable_preds(call);
 		remove_filter_string(call->filter);
@@ -1139,6 +1167,10 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
 
 	mutex_lock(&event_mutex);
 
+	err = init_subsystem_preds(system);
+	if (err)
+		goto out_unlock;
+
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_free_subsystem_preds(system, FILTER_DISABLE_ALL);
 		remove_filter_string(system->filter);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 029a91f42287..df1bf6e48bb9 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -135,7 +135,6 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
 static int ftrace_raw_init_event_##call(void)				\
 {									\
 	INIT_LIST_HEAD(&event_##call.fields);				\
-	init_preds(&event_##call);					\
 	return 0;							\
 }									\
 
-- 
cgit v1.2.3


From 69d0ee7377eef808e34ba5542b554ec97244b871 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:36 +0200
Subject: locking: Move spinlock function bodies to header file

Move spinlock function bodies to header file by creating a
static inline version of each variant. Use the inline version
on the out-of-line code.

This shouldn't make any difference besides that the spinlock
code can now be used to generate inlined spinlock code.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124417.859022429@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/spinlock.h         |  18 +--
 include/linux/spinlock_api_smp.h | 263 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                | 174 +++++---------------------
 3 files changed, 300 insertions(+), 155 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 4be57ab03478..da76a06556bd 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -143,15 +143,6 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
  */
 #define spin_unlock_wait(lock)	__raw_spin_unlock_wait(&(lock)->raw_lock)
 
-/*
- * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-# include <linux/spinlock_api_smp.h>
-#else
-# include <linux/spinlock_api_up.h>
-#endif
-
 #ifdef CONFIG_DEBUG_SPINLOCK
  extern void _raw_spin_lock(spinlock_t *lock);
 #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
@@ -380,4 +371,13 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
  */
 #define spin_can_lock(lock)	(!spin_is_locked(lock))
 
+/*
+ * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+# include <linux/spinlock_api_smp.h>
+#else
+# include <linux/spinlock_api_up.h>
+#endif
+
 #endif /* __LINUX_SPINLOCK_H */
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index d79845d034b5..6b108f5fb149 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -60,4 +60,267 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+static inline int __spin_trylock(spinlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static inline int __read_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_read_trylock(lock)) {
+		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+static inline int __write_trylock(rwlock_t *lock)
+{
+	preempt_disable();
+	if (_raw_write_trylock(lock)) {
+		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable();
+	return 0;
+}
+
+/*
+ * If lockdep is enabled then we use the non-preemption spin-ops
+ * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are
+ * not re-enabled during lock-acquire (which the preempt-spin-ops do):
+ */
+#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+
+static inline void __read_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __spin_lock_irqsave(spinlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	/*
+	 * On lockdep we dont want the hand-coded irq-enable of
+	 * _raw_spin_lock_flags() code, because lockdep assumes
+	 * that interrupts are not re-enabled during lock-acquire:
+	 */
+#ifdef CONFIG_LOCKDEP
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+#else
+	_raw_spin_lock_flags(lock, &flags);
+#endif
+	return flags;
+}
+
+static inline void __spin_lock_irq(spinlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline void __spin_lock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline unsigned long __read_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
+			     _raw_read_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __read_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline void __read_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+}
+
+static inline unsigned long __write_lock_irqsave(rwlock_t *lock)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
+			     _raw_write_lock_flags, &flags);
+	return flags;
+}
+
+static inline void __write_lock_irq(rwlock_t *lock)
+{
+	local_irq_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __write_lock_bh(rwlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+static inline void __spin_lock(spinlock_t *lock)
+{
+	preempt_disable();
+	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+}
+
+static inline void __write_lock(rwlock_t *lock)
+{
+	preempt_disable();
+	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+}
+
+#endif /* CONFIG_PREEMPT */
+
+static inline void __spin_unlock(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __write_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __read_unlock(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable();
+}
+
+static inline void __spin_unlock_irqrestore(spinlock_t *lock,
+					    unsigned long flags)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __spin_unlock_irq(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __spin_unlock_bh(spinlock_t *lock)
+{
+	spin_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_spin_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline void __read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __read_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __read_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_read_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline void __write_unlock_irqrestore(rwlock_t *lock,
+					     unsigned long flags)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
+static inline void __write_unlock_irq(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	local_irq_enable();
+	preempt_enable();
+}
+
+static inline void __write_unlock_bh(rwlock_t *lock)
+{
+	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+	_raw_write_unlock(lock);
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+}
+
+static inline int __spin_trylock_bh(spinlock_t *lock)
+{
+	local_bh_disable();
+	preempt_disable();
+	if (_raw_spin_trylock(lock)) {
+		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+		return 1;
+	}
+	preempt_enable_no_resched();
+	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	return 0;
+}
+
 #endif /* __LINUX_SPINLOCK_API_SMP_H */
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 7932653c4ebd..2c000f5c070b 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -23,40 +23,19 @@
 
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
-		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-	
-	preempt_enable();
-	return 0;
+	return __spin_trylock(lock);
 }
 EXPORT_SYMBOL(_spin_trylock);
 
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_read_trylock(lock)) {
-		rwlock_acquire_read(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable();
-	return 0;
+	return __read_trylock(lock);
 }
 EXPORT_SYMBOL(_read_trylock);
 
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
-	preempt_disable();
-	if (_raw_write_trylock(lock)) {
-		rwlock_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable();
-	return 0;
+	return __write_trylock(lock);
 }
 EXPORT_SYMBOL(_write_trylock);
 
@@ -69,129 +48,74 @@ EXPORT_SYMBOL(_write_trylock);
 
 void __lockfunc _read_lock(rwlock_t *lock)
 {
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
 
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	/*
-	 * On lockdep we dont want the hand-coded irq-enable of
-	 * _raw_spin_lock_flags() code, because lockdep assumes
-	 * that interrupts are not re-enabled during lock-acquire:
-	 */
-#ifdef CONFIG_LOCKDEP
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
-#else
-	_raw_spin_lock_flags(lock, &flags);
-#endif
-	return flags;
+	return __spin_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
 
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
 
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
 
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_read_trylock, _raw_read_lock,
-			     _raw_read_lock_flags, &flags);
-	return flags;
+	return __read_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_read_lock_irqsave);
 
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock_irq(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
 
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+	__read_lock_bh(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
 
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED_FLAGS(lock, _raw_write_trylock, _raw_write_lock,
-			     _raw_write_lock_flags, &flags);
-	return flags;
+	return __write_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_write_lock_irqsave);
 
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
-	local_irq_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock_irq(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
 
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock_bh(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
 
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
-	preempt_disable();
-	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+	__spin_lock(lock);
 }
-
 EXPORT_SYMBOL(_spin_lock);
 
 void __lockfunc _write_lock(rwlock_t *lock)
 {
-	preempt_disable();
-	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
-	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+	__write_lock(lock);
 }
-
 EXPORT_SYMBOL(_write_lock);
 
 #else /* CONFIG_PREEMPT: */
@@ -320,121 +244,79 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	preempt_enable();
+	__spin_unlock(lock);
 }
 EXPORT_SYMBOL(_spin_unlock);
 
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable();
+	__write_unlock(lock);
 }
 EXPORT_SYMBOL(_write_unlock);
 
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable();
+	__read_unlock(lock);
 }
 EXPORT_SYMBOL(_read_unlock);
 
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
 
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__spin_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_irq);
 
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
-	spin_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_spin_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
 
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__read_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_read_unlock_irqrestore);
 
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__read_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_read_unlock_irq);
 
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_read_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__read_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_read_unlock_bh);
 
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_restore(flags);
-	preempt_enable();
+	__write_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_write_unlock_irqrestore);
 
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	local_irq_enable();
-	preempt_enable();
+	__write_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_write_unlock_irq);
 
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
-	rwlock_release(&lock->dep_map, 1, _RET_IP_);
-	_raw_write_unlock(lock);
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+	__write_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_write_unlock_bh);
 
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
-	local_bh_disable();
-	preempt_disable();
-	if (_raw_spin_trylock(lock)) {
-		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
-		return 1;
-	}
-
-	preempt_enable_no_resched();
-	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
-	return 0;
+	return __spin_trylock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
 
-- 
cgit v1.2.3


From 892a7c67c12da63fa4b51728bbe5b982356a090a Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:37 +0200
Subject: locking: Allow arch-inlined spinlocks

This allows an architecture to specify per lock variant if the
locking code should be kept out-of-line or inlined.

If an architecure wants out-of-line locking code no change is
needed. To force inlining of e.g. spin_lock() the line:

  #define __always_inline__spin_lock

needs to be added to arch/<...>/include/asm/spinlock.h

If CONFIG_DEBUG_SPINLOCK or CONFIG_GENERIC_LOCKBREAK are
defined the per architecture defines are (partly) ignored and
still out-of-line spinlock code will be generated.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124418.375299024@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/spinlock_api_smp.h | 119 +++++++++++++++++++++++++++++++++++++++
 kernel/spinlock.c                |  56 ++++++++++++++++++
 2 files changed, 175 insertions(+)

(limited to 'include')

diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 6b108f5fb149..1a411e3fab95 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -60,6 +60,125 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+#ifndef CONFIG_DEBUG_SPINLOCK
+#ifndef CONFIG_GENERIC_LOCKBREAK
+
+#ifdef __always_inline__spin_lock
+#define _spin_lock(lock) __spin_lock(lock)
+#endif
+
+#ifdef __always_inline__read_lock
+#define _read_lock(lock) __read_lock(lock)
+#endif
+
+#ifdef __always_inline__write_lock
+#define _write_lock(lock) __write_lock(lock)
+#endif
+
+#ifdef __always_inline__spin_lock_bh
+#define _spin_lock_bh(lock) __spin_lock_bh(lock)
+#endif
+
+#ifdef __always_inline__read_lock_bh
+#define _read_lock_bh(lock) __read_lock_bh(lock)
+#endif
+
+#ifdef __always_inline__write_lock_bh
+#define _write_lock_bh(lock) __write_lock_bh(lock)
+#endif
+
+#ifdef __always_inline__spin_lock_irq
+#define _spin_lock_irq(lock) __spin_lock_irq(lock)
+#endif
+
+#ifdef __always_inline__read_lock_irq
+#define _read_lock_irq(lock) __read_lock_irq(lock)
+#endif
+
+#ifdef __always_inline__write_lock_irq
+#define _write_lock_irq(lock) __write_lock_irq(lock)
+#endif
+
+#ifdef __always_inline__spin_lock_irqsave
+#define _spin_lock_irqsave(lock) __spin_lock_irqsave(lock)
+#endif
+
+#ifdef __always_inline__read_lock_irqsave
+#define _read_lock_irqsave(lock) __read_lock_irqsave(lock)
+#endif
+
+#ifdef __always_inline__write_lock_irqsave
+#define _write_lock_irqsave(lock) __write_lock_irqsave(lock)
+#endif
+
+#endif /* !CONFIG_GENERIC_LOCKBREAK */
+
+#ifdef __always_inline__spin_trylock
+#define _spin_trylock(lock) __spin_trylock(lock)
+#endif
+
+#ifdef __always_inline__read_trylock
+#define _read_trylock(lock) __read_trylock(lock)
+#endif
+
+#ifdef __always_inline__write_trylock
+#define _write_trylock(lock) __write_trylock(lock)
+#endif
+
+#ifdef __always_inline__spin_trylock_bh
+#define _spin_trylock_bh(lock) __spin_trylock_bh(lock)
+#endif
+
+#ifdef __always_inline__spin_unlock
+#define _spin_unlock(lock) __spin_unlock(lock)
+#endif
+
+#ifdef __always_inline__read_unlock
+#define _read_unlock(lock) __read_unlock(lock)
+#endif
+
+#ifdef __always_inline__write_unlock
+#define _write_unlock(lock) __write_unlock(lock)
+#endif
+
+#ifdef __always_inline__spin_unlock_bh
+#define _spin_unlock_bh(lock) __spin_unlock_bh(lock)
+#endif
+
+#ifdef __always_inline__read_unlock_bh
+#define _read_unlock_bh(lock) __read_unlock_bh(lock)
+#endif
+
+#ifdef __always_inline__write_unlock_bh
+#define _write_unlock_bh(lock) __write_unlock_bh(lock)
+#endif
+
+#ifdef __always_inline__spin_unlock_irq
+#define _spin_unlock_irq(lock) __spin_unlock_irq(lock)
+#endif
+
+#ifdef __always_inline__read_unlock_irq
+#define _read_unlock_irq(lock) __read_unlock_irq(lock)
+#endif
+
+#ifdef __always_inline__write_unlock_irq
+#define _write_unlock_irq(lock) __write_unlock_irq(lock)
+#endif
+
+#ifdef __always_inline__spin_unlock_irqrestore
+#define _spin_unlock_irqrestore(lock, flags) __spin_unlock_irqrestore(lock, flags)
+#endif
+
+#ifdef __always_inline__read_unlock_irqrestore
+#define _read_unlock_irqrestore(lock, flags) __read_unlock_irqrestore(lock, flags)
+#endif
+
+#ifdef __always_inline__write_unlock_irqrestore
+#define _write_unlock_irqrestore(lock, flags) __write_unlock_irqrestore(lock, flags)
+#endif
+
+#endif /* CONFIG_DEBUG_SPINLOCK */
+
 static inline int __spin_trylock(spinlock_t *lock)
 {
 	preempt_disable();
diff --git a/kernel/spinlock.c b/kernel/spinlock.c
index 2c000f5c070b..5ddab730cb2f 100644
--- a/kernel/spinlock.c
+++ b/kernel/spinlock.c
@@ -21,23 +21,29 @@
 #include <linux/debug_locks.h>
 #include <linux/module.h>
 
+#ifndef _spin_trylock
 int __lockfunc _spin_trylock(spinlock_t *lock)
 {
 	return __spin_trylock(lock);
 }
 EXPORT_SYMBOL(_spin_trylock);
+#endif
 
+#ifndef _read_trylock
 int __lockfunc _read_trylock(rwlock_t *lock)
 {
 	return __read_trylock(lock);
 }
 EXPORT_SYMBOL(_read_trylock);
+#endif
 
+#ifndef _write_trylock
 int __lockfunc _write_trylock(rwlock_t *lock)
 {
 	return __write_trylock(lock);
 }
 EXPORT_SYMBOL(_write_trylock);
+#endif
 
 /*
  * If lockdep is enabled then we use the non-preemption spin-ops
@@ -46,77 +52,101 @@ EXPORT_SYMBOL(_write_trylock);
  */
 #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
 
+#ifndef _read_lock
 void __lockfunc _read_lock(rwlock_t *lock)
 {
 	__read_lock(lock);
 }
 EXPORT_SYMBOL(_read_lock);
+#endif
 
+#ifndef _spin_lock_irqsave
 unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
 {
 	return __spin_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irqsave);
+#endif
 
+#ifndef _spin_lock_irq
 void __lockfunc _spin_lock_irq(spinlock_t *lock)
 {
 	__spin_lock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_lock_irq);
+#endif
 
+#ifndef _spin_lock_bh
 void __lockfunc _spin_lock_bh(spinlock_t *lock)
 {
 	__spin_lock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_lock_bh);
+#endif
 
+#ifndef _read_lock_irqsave
 unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
 {
 	return __read_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_read_lock_irqsave);
+#endif
 
+#ifndef _read_lock_irq
 void __lockfunc _read_lock_irq(rwlock_t *lock)
 {
 	__read_lock_irq(lock);
 }
 EXPORT_SYMBOL(_read_lock_irq);
+#endif
 
+#ifndef _read_lock_bh
 void __lockfunc _read_lock_bh(rwlock_t *lock)
 {
 	__read_lock_bh(lock);
 }
 EXPORT_SYMBOL(_read_lock_bh);
+#endif
 
+#ifndef _write_lock_irqsave
 unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
 {
 	return __write_lock_irqsave(lock);
 }
 EXPORT_SYMBOL(_write_lock_irqsave);
+#endif
 
+#ifndef _write_lock_irq
 void __lockfunc _write_lock_irq(rwlock_t *lock)
 {
 	__write_lock_irq(lock);
 }
 EXPORT_SYMBOL(_write_lock_irq);
+#endif
 
+#ifndef _write_lock_bh
 void __lockfunc _write_lock_bh(rwlock_t *lock)
 {
 	__write_lock_bh(lock);
 }
 EXPORT_SYMBOL(_write_lock_bh);
+#endif
 
+#ifndef _spin_lock
 void __lockfunc _spin_lock(spinlock_t *lock)
 {
 	__spin_lock(lock);
 }
 EXPORT_SYMBOL(_spin_lock);
+#endif
 
+#ifndef _write_lock
 void __lockfunc _write_lock(rwlock_t *lock)
 {
 	__write_lock(lock);
 }
 EXPORT_SYMBOL(_write_lock);
+#endif
 
 #else /* CONFIG_PREEMPT: */
 
@@ -242,83 +272,109 @@ EXPORT_SYMBOL(_spin_lock_nest_lock);
 
 #endif
 
+#ifndef _spin_unlock
 void __lockfunc _spin_unlock(spinlock_t *lock)
 {
 	__spin_unlock(lock);
 }
 EXPORT_SYMBOL(_spin_unlock);
+#endif
 
+#ifndef _write_unlock
 void __lockfunc _write_unlock(rwlock_t *lock)
 {
 	__write_unlock(lock);
 }
 EXPORT_SYMBOL(_write_unlock);
+#endif
 
+#ifndef _read_unlock
 void __lockfunc _read_unlock(rwlock_t *lock)
 {
 	__read_unlock(lock);
 }
 EXPORT_SYMBOL(_read_unlock);
+#endif
 
+#ifndef _spin_unlock_irqrestore
 void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
 {
 	__spin_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_spin_unlock_irqrestore);
+#endif
 
+#ifndef _spin_unlock_irq
 void __lockfunc _spin_unlock_irq(spinlock_t *lock)
 {
 	__spin_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_irq);
+#endif
 
+#ifndef _spin_unlock_bh
 void __lockfunc _spin_unlock_bh(spinlock_t *lock)
 {
 	__spin_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_unlock_bh);
+#endif
 
+#ifndef _read_unlock_irqrestore
 void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__read_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_read_unlock_irqrestore);
+#endif
 
+#ifndef _read_unlock_irq
 void __lockfunc _read_unlock_irq(rwlock_t *lock)
 {
 	__read_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_read_unlock_irq);
+#endif
 
+#ifndef _read_unlock_bh
 void __lockfunc _read_unlock_bh(rwlock_t *lock)
 {
 	__read_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_read_unlock_bh);
+#endif
 
+#ifndef _write_unlock_irqrestore
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 {
 	__write_unlock_irqrestore(lock, flags);
 }
 EXPORT_SYMBOL(_write_unlock_irqrestore);
+#endif
 
+#ifndef _write_unlock_irq
 void __lockfunc _write_unlock_irq(rwlock_t *lock)
 {
 	__write_unlock_irq(lock);
 }
 EXPORT_SYMBOL(_write_unlock_irq);
+#endif
 
+#ifndef _write_unlock_bh
 void __lockfunc _write_unlock_bh(rwlock_t *lock)
 {
 	__write_unlock_bh(lock);
 }
 EXPORT_SYMBOL(_write_unlock_bh);
+#endif
 
+#ifndef _spin_trylock_bh
 int __lockfunc _spin_trylock_bh(spinlock_t *lock)
 {
 	return __spin_trylock_bh(lock);
 }
 EXPORT_SYMBOL(_spin_trylock_bh);
+#endif
 
 notrace int in_lock_functions(unsigned long addr)
 {
-- 
cgit v1.2.3


From bb7bed082500179519c7caf0678ba3bed9752658 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 31 Aug 2009 14:43:38 +0200
Subject: locking: Simplify spinlock inlining

For !DEBUG_SPINLOCK && !PREEMPT && SMP the spin_unlock()
functions were always inlined by using special defines which
would call the __raw* functions.

The out-of-line variants for these functions would be generated
anyway.

Use the new per unlock/locking variant mechanism to force
inlining of the unlock functions like before. This is not a
functional change, we just get rid of one additional way to
force inlining.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Horst Hartmann <horsth@linux.vnet.ibm.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: David Miller <davem@davemloft.net>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <20090831124418.848735034@de.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/spinlock.h         | 46 ++++++----------------------------------
 include/linux/spinlock_api_smp.h | 12 +++++++++++
 2 files changed, 18 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index da76a06556bd..f0ca7a7a1757 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -259,50 +259,16 @@ static inline void smp_mb__after_lock(void) { smp_mb(); }
 
 #define spin_lock_irq(lock)		_spin_lock_irq(lock)
 #define spin_lock_bh(lock)		_spin_lock_bh(lock)
-
 #define read_lock_irq(lock)		_read_lock_irq(lock)
 #define read_lock_bh(lock)		_read_lock_bh(lock)
-
 #define write_lock_irq(lock)		_write_lock_irq(lock)
 #define write_lock_bh(lock)		_write_lock_bh(lock)
-
-/*
- * We inline the unlock functions in the nondebug case:
- */
-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
-	!defined(CONFIG_SMP)
-# define spin_unlock(lock)		_spin_unlock(lock)
-# define read_unlock(lock)		_read_unlock(lock)
-# define write_unlock(lock)		_write_unlock(lock)
-# define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
-# define read_unlock_irq(lock)		_read_unlock_irq(lock)
-# define write_unlock_irq(lock)		_write_unlock_irq(lock)
-#else
-# define spin_unlock(lock) \
-    do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define read_unlock(lock) \
-    do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define write_unlock(lock) \
-    do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0)
-# define spin_unlock_irq(lock)			\
-do {						\
-	__raw_spin_unlock(&(lock)->raw_lock);	\
-	__release(lock);			\
-	local_irq_enable();			\
-} while (0)
-# define read_unlock_irq(lock)			\
-do {						\
-	__raw_read_unlock(&(lock)->raw_lock);	\
-	__release(lock);			\
-	local_irq_enable();			\
-} while (0)
-# define write_unlock_irq(lock)			\
-do {						\
-	__raw_write_unlock(&(lock)->raw_lock);	\
-	__release(lock);			\
-	local_irq_enable();			\
-} while (0)
-#endif
+#define spin_unlock(lock)		_spin_unlock(lock)
+#define read_unlock(lock)		_read_unlock(lock)
+#define write_unlock(lock)		_write_unlock(lock)
+#define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
+#define read_unlock_irq(lock)		_read_unlock_irq(lock)
+#define write_unlock_irq(lock)		_write_unlock_irq(lock)
 
 #define spin_unlock_irqrestore(lock, flags)		\
 	do {						\
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 1a411e3fab95..7a7e18fc2415 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -60,6 +60,18 @@ void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
 							__releases(lock);
 
+/*
+ * We inline the unlock functions in the nondebug case:
+ */
+#if !defined(CONFIG_DEBUG_SPINLOCK) && !defined(CONFIG_PREEMPT)
+#define __always_inline__spin_unlock
+#define __always_inline__read_unlock
+#define __always_inline__write_unlock
+#define __always_inline__spin_unlock_irq
+#define __always_inline__read_unlock_irq
+#define __always_inline__write_unlock_irq
+#endif
+
 #ifndef CONFIG_DEBUG_SPINLOCK
 #ifndef CONFIG_GENERIC_LOCKBREAK
 
-- 
cgit v1.2.3


From 98a56ab382079f777e261e14512cbd4fb2107af4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Sep 2009 08:48:28 -0400
Subject: ext4: Fix spelling typo in the trace format for
 trace_ext4_da_writepages()

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 8d433c4e3709..15051d2d1219 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -243,7 +243,7 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->range_cyclic	= wbc->range_cyclic;
 	),
 
-	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
 		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
-- 
cgit v1.2.3


From 2b980dbd77d229eb60588802162c9659726b11f4 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 28 Aug 2009 18:12:43 -0400
Subject: lsm: Add hooks to the TUN driver

The TUN driver lacks any LSM hooks which makes it difficult for LSM modules,
such as SELinux, to enforce access controls on network traffic generated by
TUN users; this is particularly problematic for virtualization apps such as
QEMU and KVM.  This patch adds three new LSM hooks designed to control the
creation and attachment of TUN devices, the hooks are:

 * security_tun_dev_create()
   Provides access control for the creation of new TUN devices

 * security_tun_dev_post_create()
   Provides the ability to create the necessary socket LSM state for newly
   created TUN devices

 * security_tun_dev_attach()
   Provides access control for attaching to existing, persistent TUN devices
   and the ability to update the TUN device's socket LSM state as necessary

Signed-off-by: Paul Moore <paul.moore@hp.com>
Acked-by: Eric Paris <eparis@parisplace.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: James Morris <jmorris@namei.org>
---
 drivers/net/tun.c        | 22 +++++++++++++++-------
 include/linux/security.h | 31 +++++++++++++++++++++++++++++++
 security/capability.c    | 19 +++++++++++++++++++
 security/security.c      | 18 ++++++++++++++++++
 4 files changed, 83 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 42b6c6319bc2..87214a257d2a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -130,17 +130,10 @@ static inline struct tun_sock *tun_sk(struct sock *sk)
 static int tun_attach(struct tun_struct *tun, struct file *file)
 {
 	struct tun_file *tfile = file->private_data;
-	const struct cred *cred = current_cred();
 	int err;
 
 	ASSERT_RTNL();
 
-	/* Check permissions */
-	if (((tun->owner != -1 && cred->euid != tun->owner) ||
-	     (tun->group != -1 && !in_egroup_p(tun->group))) &&
-		!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	netif_tx_lock_bh(tun->dev);
 
 	err = -EINVAL;
@@ -926,6 +919,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 	dev = __dev_get_by_name(net, ifr->ifr_name);
 	if (dev) {
+		const struct cred *cred = current_cred();
+
 		if (ifr->ifr_flags & IFF_TUN_EXCL)
 			return -EBUSY;
 		if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops)
@@ -935,6 +930,14 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		else
 			return -EINVAL;
 
+		if (((tun->owner != -1 && cred->euid != tun->owner) ||
+		     (tun->group != -1 && !in_egroup_p(tun->group))) &&
+		    !capable(CAP_NET_ADMIN))
+			return -EPERM;
+		err = security_tun_dev_attach(tun->sk);
+		if (err < 0)
+			return err;
+
 		err = tun_attach(tun, file);
 		if (err < 0)
 			return err;
@@ -947,6 +950,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
+		err = security_tun_dev_create();
+		if (err < 0)
+			return err;
 
 		/* Set dev type */
 		if (ifr->ifr_flags & IFF_TUN) {
@@ -989,6 +995,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		tun->sk = sk;
 		container_of(sk, struct tun_sock, sk)->tun = tun;
 
+		security_tun_dev_post_create(sk);
+
 		tun_net_init(dev);
 
 		if (strchr(dev->name, '%')) {
diff --git a/include/linux/security.h b/include/linux/security.h
index a16d6b7c4ebe..40ba39ea68ce 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -998,6 +998,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Sets the connection's peersid to the secmark on skb.
  * @req_classify_flow:
  *	Sets the flow's sid to the openreq sid.
+ * @tun_dev_create:
+ *	Check permissions prior to creating a new TUN device.
+ * @tun_dev_post_create:
+ *	This hook allows a module to update or allocate a per-socket security
+ *	structure.
+ *	@sk contains the newly created sock structure.
+ * @tun_dev_attach:
+ *	Check permissions prior to attaching to a persistent TUN device.  This
+ *	hook can also be used by the module to update any security state
+ *	associated with the TUN device's sock structure.
+ *	@sk contains the existing sock structure.
  *
  * Security hooks for XFRM operations.
  *
@@ -1597,6 +1608,9 @@ struct security_operations {
 	void (*inet_csk_clone) (struct sock *newsk, const struct request_sock *req);
 	void (*inet_conn_established) (struct sock *sk, struct sk_buff *skb);
 	void (*req_classify_flow) (const struct request_sock *req, struct flowi *fl);
+	int (*tun_dev_create)(void);
+	void (*tun_dev_post_create)(struct sock *sk);
+	int (*tun_dev_attach)(struct sock *sk);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -2586,6 +2600,9 @@ void security_inet_csk_clone(struct sock *newsk,
 			const struct request_sock *req);
 void security_inet_conn_established(struct sock *sk,
 			struct sk_buff *skb);
+int security_tun_dev_create(void);
+void security_tun_dev_post_create(struct sock *sk);
+int security_tun_dev_attach(struct sock *sk);
 
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket *sock,
@@ -2736,6 +2753,20 @@ static inline void security_inet_conn_established(struct sock *sk,
 			struct sk_buff *skb)
 {
 }
+
+static inline int security_tun_dev_create(void)
+{
+	return 0;
+}
+
+static inline void security_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static inline int security_tun_dev_attach(struct sock *sk)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
diff --git a/security/capability.c b/security/capability.c
index 1b943f54b2ea..06400cf07757 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -706,10 +706,26 @@ static void cap_inet_conn_established(struct sock *sk, struct sk_buff *skb)
 {
 }
 
+
+
 static void cap_req_classify_flow(const struct request_sock *req,
 				  struct flowi *fl)
 {
 }
+
+static int cap_tun_dev_create(void)
+{
+	return 0;
+}
+
+static void cap_tun_dev_post_create(struct sock *sk)
+{
+}
+
+static int cap_tun_dev_attach(struct sock *sk)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
@@ -1026,6 +1042,9 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inet_csk_clone);
 	set_to_cap_if_null(ops, inet_conn_established);
 	set_to_cap_if_null(ops, req_classify_flow);
+	set_to_cap_if_null(ops, tun_dev_create);
+	set_to_cap_if_null(ops, tun_dev_post_create);
+	set_to_cap_if_null(ops, tun_dev_attach);
 #endif	/* CONFIG_SECURITY_NETWORK */
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
 	set_to_cap_if_null(ops, xfrm_policy_alloc_security);
diff --git a/security/security.c b/security/security.c
index 0e993f42ce3d..f88eaf6b14cc 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1117,6 +1117,24 @@ void security_inet_conn_established(struct sock *sk,
 	security_ops->inet_conn_established(sk, skb);
 }
 
+int security_tun_dev_create(void)
+{
+	return security_ops->tun_dev_create();
+}
+EXPORT_SYMBOL(security_tun_dev_create);
+
+void security_tun_dev_post_create(struct sock *sk)
+{
+	return security_ops->tun_dev_post_create(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_post_create);
+
+int security_tun_dev_attach(struct sock *sk)
+{
+	return security_ops->tun_dev_attach(sk);
+}
+EXPORT_SYMBOL(security_tun_dev_attach);
+
 #endif	/* CONFIG_SECURITY_NETWORK */
 
 #ifdef CONFIG_SECURITY_NETWORK_XFRM
-- 
cgit v1.2.3


From b3a3ca8ca0c3c29abc5b2bfe94bb14f3f4590df9 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 31 Aug 2009 23:13:11 -0400
Subject: ext4: Add new tracepoint: trace_ext4_da_write_pages()

Add a new tracepoint which shows the pages that will be written using
write_cache_pages() by ext4_da_writepages().

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h              | 15 +++++++++++++++
 fs/ext4/inode.c             | 13 +------------
 include/trace/events/ext4.h | 34 ++++++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 41a76e163b99..81014f4ed22d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -113,6 +113,21 @@ struct ext4_allocation_request {
 	unsigned int flags;
 };
 
+/*
+ * For delayed allocation tracking
+ */
+struct mpage_da_data {
+	struct inode *inode;
+	sector_t b_blocknr;		/* start block number of extent */
+	size_t b_size;			/* size of extent */
+	unsigned long b_state;		/* state of the extent */
+	unsigned long first_page, next_page;	/* extent of pages */
+	struct writeback_control *wbc;
+	int io_done;
+	int pages_written;
+	int retval;
+};
+
 /*
  * Special inodes numbers
  */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ff659e757578..17802a96af9f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1875,18 +1875,6 @@ static void ext4_da_page_release_reservation(struct page *page,
  * Delayed allocation stuff
  */
 
-struct mpage_da_data {
-	struct inode *inode;
-	sector_t b_blocknr;		/* start block number of extent */
-	size_t b_size;			/* size of extent */
-	unsigned long b_state;		/* state of the extent */
-	unsigned long first_page, next_page;	/* extent of pages */
-	struct writeback_control *wbc;
-	int io_done;
-	int pages_written;
-	int retval;
-};
-
 /*
  * mpage_da_submit_io - walks through extent of pages and try to write
  * them with writepage() call back
@@ -2863,6 +2851,7 @@ retry:
 			mpd.io_done = 1;
 			ret = MPAGE_DA_EXTENT_TAIL;
 		}
+		trace_ext4_da_write_pages(inode, &mpd);
 		wbc->nr_to_write -= mpd.pages_written;
 
 		ext4_journal_stop(handle);
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 15051d2d1219..dd43399288ea 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -251,6 +251,40 @@ TRACE_EVENT(ext4_da_writepages,
 		  __entry->range_cyclic)
 );
 
+TRACE_EVENT(ext4_da_write_pages,
+	TP_PROTO(struct inode *inode, struct mpage_da_data *mpd),
+
+	TP_ARGS(inode, mpd),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u64,	b_blocknr		)
+		__field(	__u32,	b_size			)
+		__field(	__u32,	b_state			)
+		__field(	unsigned long,	first_page	)
+		__field(	int,	io_done			)
+		__field(	int,	pages_written		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->b_blocknr	= mpd->b_blocknr;
+		__entry->b_size		= mpd->b_size;
+		__entry->b_state	= mpd->b_state;
+		__entry->first_page	= mpd->first_page;
+		__entry->io_done	= mpd->io_done;
+		__entry->pages_written	= mpd->pages_written;
+	),
+
+	TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
+		  jbd2_dev_to_name(__entry->dev), __entry->ino,
+		  __entry->b_blocknr, __entry->b_size,
+		  __entry->b_state, __entry->first_page,
+		  __entry->io_done, __entry->pages_written)
+);
+
 TRACE_EVENT(ext4_da_writepages_result,
 	TP_PROTO(struct inode *inode, struct writeback_control *wbc,
 			int ret, int pages_written),
-- 
cgit v1.2.3


From 1a37f184fa7824982a5f434c06981ec46a66cef7 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 31 Aug 2009 13:48:16 +1000
Subject: lmb: Also remove __init from lmb_end_of_RAM() declaration in lmb.h

My previous patch (commit 4f8ee2c9cc: "lmb: Remove __init from
lmb_end_of_DRAM()") removed __init in lmb.c but missed the fact that it
was also marked as such in the .h

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lmb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lmb.h b/include/linux/lmb.h
index c46c89505dac..2442e3f3d033 100644
--- a/include/linux/lmb.h
+++ b/include/linux/lmb.h
@@ -51,7 +51,7 @@ extern u64 __init lmb_alloc_base(u64 size,
 extern u64 __init __lmb_alloc_base(u64 size,
 		u64 align, u64 max_addr);
 extern u64 __init lmb_phys_mem_size(void);
-extern u64 __init lmb_end_of_DRAM(void);
+extern u64 lmb_end_of_DRAM(void);
 extern void __init lmb_enforce_memory_limit(u64 memory_limit);
 extern int __init lmb_is_reserved(u64 addr);
 extern int lmb_find(struct lmb_property *res);
-- 
cgit v1.2.3


From dc1f8bf68b311b1537cb65893430b6796118498a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 31 Aug 2009 19:50:40 +0000
Subject: netdev: change transmit to limited range type

The transmit function should only return one of three possible values,
some drivers got confused and returned errno's or other values.
This changes the definition so that this can be caught at compile time.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 60d3aac49ed4..376a2e1ac00d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -79,17 +79,19 @@ struct wireless_dev;
 #define net_xmit_eval(e)	((e) == NET_XMIT_CN? 0 : (e))
 #define net_xmit_errno(e)	((e) != NET_XMIT_CN ? -ENOBUFS : 0)
 
+/* Driver transmit return codes */
+enum netdev_tx {
+	NETDEV_TX_OK = 0,	/* driver took care of packet */
+	NETDEV_TX_BUSY,		/* driver tx path was busy*/
+	NETDEV_TX_LOCKED = -1,	/* driver tx lock was already taken */
+};
+typedef enum netdev_tx netdev_tx_t;
+
 #endif
 
 #define MAX_ADDR_LEN	32		/* Largest hardware address length */
 
-/* Driver transmit return codes */
-#define NETDEV_TX_OK 0		/* driver took care of packet */
-#define NETDEV_TX_BUSY 1	/* driver tx path was busy*/
-#define NETDEV_TX_LOCKED -1	/* driver tx lock was already taken */
-
 #ifdef  __KERNEL__
-
 /*
  *	Compute the worst case header length according to the protocols
  *	used.
@@ -507,9 +509,11 @@ struct netdev_queue {
  *     This function is called when network device transistions to the down
  *     state.
  *
- * int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev);
+ * netdev_tx_t (*ndo_start_xmit)(struct sk_buff *skb,
+ *                               struct net_device *dev);
  *	Called when a packet needs to be transmitted.
- *	Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED,
+ *	Must return NETDEV_TX_OK , NETDEV_TX_BUSY.
+ *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb);
@@ -580,7 +584,7 @@ struct net_device_ops {
 	void			(*ndo_uninit)(struct net_device *dev);
 	int			(*ndo_open)(struct net_device *dev);
 	int			(*ndo_stop)(struct net_device *dev);
-	int			(*ndo_start_xmit) (struct sk_buff *skb,
+	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb);
-- 
cgit v1.2.3


From 25a79c41ce0ce88a4288adf278e9b0e00f228383 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 31 Aug 2009 19:50:45 +0000
Subject: usbnet: convert to netdev_tx_t

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/catc.c       | 3 ++-
 drivers/net/usb/cdc-phonet.c | 6 +++---
 drivers/net/usb/hso.c        | 3 ++-
 drivers/net/usb/kaweth.c     | 3 ++-
 drivers/net/usb/pegasus.c    | 3 ++-
 drivers/net/usb/rtl8150.c    | 3 ++-
 drivers/net/usb/usbnet.c     | 8 ++++----
 drivers/usb/gadget/u_ether.c | 3 ++-
 include/linux/usb/usbnet.h   | 3 ++-
 9 files changed, 21 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 7abdc4abbe07..0ffc0c6d03be 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -411,7 +411,8 @@ static void catc_tx_done(struct urb *urb)
 	spin_unlock_irqrestore(&catc->tx_lock, flags);
 }
 
-static int catc_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t catc_start_xmit(struct sk_buff *skb,
+					 struct net_device *netdev)
 {
 	struct catc *catc = netdev_priv(netdev);
 	unsigned long flags;
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 792af72da8ac..0ca5916ca8df 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -55,7 +55,7 @@ static void rx_complete(struct urb *req);
 /*
  * Network device callbacks
  */
-static int usbpn_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t usbpn_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct usbpn_dev *pnd = netdev_priv(dev);
 	struct urb *req = NULL;
@@ -82,12 +82,12 @@ static int usbpn_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (pnd->tx_queue >= dev->tx_queue_len)
 		netif_stop_queue(dev);
 	spin_unlock_irqrestore(&pnd->tx_lock, flags);
-	return 0;
+	return NETDEV_TX_OK;
 
 drop:
 	dev_kfree_skb(skb);
 	dev->stats.tx_dropped++;
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 static void tx_complete(struct urb *req)
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index ffe410635735..123f9b84dd29 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -771,7 +771,8 @@ static void write_bulk_callback(struct urb *urb)
 }
 
 /* called by kernel when we need to transmit a packet */
-static int hso_net_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb,
+					    struct net_device *net)
 {
 	struct hso_net *odev = netdev_priv(net);
 	int result;
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 200fe3d525ca..7f397365b437 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -803,7 +803,8 @@ static void kaweth_usb_transmit_complete(struct urb *urb)
 /****************************************************************
  *     kaweth_start_xmit
  ****************************************************************/
-static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t kaweth_start_xmit(struct sk_buff *skb,
+					   struct net_device *net)
 {
 	struct kaweth_device *kaweth = netdev_priv(net);
 	__le16 *private_header;
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index 69d2df95ac86..7b935b846424 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -876,7 +876,8 @@ static void pegasus_tx_timeout(struct net_device *net)
 	pegasus->stats.tx_errors++;
 }
 
-static int pegasus_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t pegasus_start_xmit(struct sk_buff *skb,
+					    struct net_device *net)
 {
 	pegasus_t *pegasus = netdev_priv(net);
 	int count = ((skb->len + 2) & 0x3f) ? skb->len + 2 : skb->len + 3;
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index bac8b77fb25e..d9f84f22fbc7 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -727,7 +727,8 @@ static void rtl8150_set_multicast(struct net_device *netdev)
 	netif_wake_queue(netdev);
 }
 
-static int rtl8150_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t rtl8150_start_xmit(struct sk_buff *skb,
+					    struct net_device *netdev)
 {
 	rtl8150_t *dev = netdev_priv(netdev);
 	int count, res;
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 7d471fca2743..d166e3385c64 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1007,15 +1007,16 @@ EXPORT_SYMBOL_GPL(usbnet_tx_timeout);
 
 /*-------------------------------------------------------------------------*/
 
-int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net)
+netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
+				     struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			length;
-	int			retval = NET_XMIT_SUCCESS;
 	struct urb		*urb = NULL;
 	struct skb_data		*entry;
 	struct driver_info	*info = dev->driver_info;
 	unsigned long		flags;
+	int retval;
 
 	// some devices want funky USB-level framing, for
 	// win32 driver (usually) and/or hardware quirks
@@ -1079,7 +1080,6 @@ int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net)
 		if (netif_msg_tx_err (dev))
 			devdbg (dev, "drop, code %d", retval);
 drop:
-		retval = NET_XMIT_SUCCESS;
 		dev->net->stats.tx_dropped++;
 		if (skb)
 			dev_kfree_skb_any (skb);
@@ -1088,7 +1088,7 @@ drop:
 		devdbg (dev, "> tx, len %d, type 0x%x",
 			length, skb->protocol);
 	}
-	return retval;
+	return NETDEV_TX_OK;
 }
 EXPORT_SYMBOL_GPL(usbnet_start_xmit);
 
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index aac69b591aeb..dc3ebd1e68ca 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -465,7 +465,8 @@ static inline int is_promisc(u16 cdc_filter)
 	return cdc_filter & USB_CDC_PACKET_TYPE_PROMISCUOUS;
 }
 
-static int eth_start_xmit(struct sk_buff *skb, struct net_device *net)
+static netdev_tx_t eth_start_xmit(struct sk_buff *skb,
+					struct net_device *net)
 {
 	struct eth_dev		*dev = netdev_priv(net);
 	int			length = skb->len;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 09514252d84e..bb69e256cd16 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -182,7 +182,8 @@ struct skb_data {	/* skb->cb is one of these */
 
 extern int usbnet_open (struct net_device *net);
 extern int usbnet_stop (struct net_device *net);
-extern int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net);
+extern netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
+				      struct net_device *net);
 extern void usbnet_tx_timeout (struct net_device *net);
 extern int usbnet_change_mtu (struct net_device *net, int new_mtu);
 
-- 
cgit v1.2.3


From 4c5d502d8b2db8947c44dc44bdc67dbe55cce2b9 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 31 Aug 2009 19:50:48 +0000
Subject: hdlc: convert to netdev_tx_t

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/pcmcia/synclink_cs.c | 7 +++----
 drivers/char/synclink.c           | 7 +++----
 drivers/char/synclink_gt.c        | 7 +++----
 drivers/char/synclinkmp.c         | 7 +++----
 include/linux/hdlc.h              | 8 ++++----
 5 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 77b364889224..caf6e4d19469 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4005,10 +4005,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
  *
  * skb  socket buffer containing HDLC frame
  * dev  pointer to network device structure
- *
- * returns 0 if success, otherwise error code
  */
-static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	MGSLPC_INFO *info = dev_to_port(dev);
 	unsigned long flags;
@@ -4043,7 +4042,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	spin_unlock_irqrestore(&info->lock,flags);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /**
diff --git a/drivers/char/synclink.c b/drivers/char/synclink.c
index 813552f14884..4846b73ef28d 100644
--- a/drivers/char/synclink.c
+++ b/drivers/char/synclink.c
@@ -7697,10 +7697,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
  *
  * skb  socket buffer containing HDLC frame
  * dev  pointer to network device structure
- *
- * returns 0 if success, otherwise error code
  */
-static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct mgsl_struct *info = dev_to_port(dev);
 	unsigned long flags;
@@ -7731,7 +7730,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	 	usc_start_transmitter(info);
 	spin_unlock_irqrestore(&info->irq_spinlock,flags);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /**
diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c
index 91f20a92fddf..8678f0c8699d 100644
--- a/drivers/char/synclink_gt.c
+++ b/drivers/char/synclink_gt.c
@@ -1497,10 +1497,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
  *
  * skb  socket buffer containing HDLC frame
  * dev  pointer to network device structure
- *
- * returns 0 if success, otherwise error code
  */
-static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct slgt_info *info = dev_to_port(dev);
 	unsigned long flags;
@@ -1529,7 +1528,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	update_tx_timer(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /**
diff --git a/drivers/char/synclinkmp.c b/drivers/char/synclinkmp.c
index 8d4a2a8a0a70..2b18adc4ee19 100644
--- a/drivers/char/synclinkmp.c
+++ b/drivers/char/synclinkmp.c
@@ -1608,10 +1608,9 @@ static int hdlcdev_attach(struct net_device *dev, unsigned short encoding,
  *
  * skb  socket buffer containing HDLC frame
  * dev  pointer to network device structure
- *
- * returns 0 if success, otherwise error code
  */
-static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hdlcdev_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	SLMP_INFO *info = dev_to_port(dev);
 	unsigned long flags;
@@ -1642,7 +1641,7 @@ static int hdlcdev_xmit(struct sk_buff *skb, struct net_device *dev)
 	 	tx_start(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /**
diff --git a/include/linux/hdlc.h b/include/linux/hdlc.h
index 6a6e701f1631..ee275c8b3df1 100644
--- a/include/linux/hdlc.h
+++ b/include/linux/hdlc.h
@@ -38,7 +38,7 @@ struct hdlc_proto {
 	int (*ioctl)(struct net_device *dev, struct ifreq *ifr);
 	__be16 (*type_trans)(struct sk_buff *skb, struct net_device *dev);
 	int (*netif_rx)(struct sk_buff *skb);
-	int (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
 	struct module *module;
 	struct hdlc_proto *next; /* next protocol in the list */
 };
@@ -51,7 +51,7 @@ typedef struct hdlc_device {
 		      unsigned short encoding, unsigned short parity);
 
 	/* hardware driver must handle this instead of dev->hard_start_xmit */
-	int (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
 
 	/* Things below are for HDLC layer internal use only */
 	const struct hdlc_proto *proto;
@@ -60,7 +60,7 @@ typedef struct hdlc_device {
 	spinlock_t state_lock;
 	void *state;
 	void *priv;
-}hdlc_device;
+} hdlc_device;
 
 
@@ -106,7 +106,7 @@ void hdlc_close(struct net_device *dev);
 /* May be used by hardware driver */
 int hdlc_change_mtu(struct net_device *dev, int new_mtu);
 /* Must be pointed to by hw driver's dev->netdev_ops->ndo_start_xmit */
-int hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t hdlc_start_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int attach_hdlc_protocol(struct net_device *dev, struct hdlc_proto *proto,
 			 size_t size);
-- 
cgit v1.2.3


From 61357325f377889a1daffa14962d705dc814dd0e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 31 Aug 2009 19:50:58 +0000
Subject: netdev: convert bulk of drivers to netdev_tx_t

In a couple of cases collapse some extra code like:
   int retval = NETDEV_TX_OK;
   ...
   return retval;
into
   return NETDEV_TX_OK;

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee802154/fakehard.c        |  3 ++-
 drivers/net/8139cp.c                 |  3 ++-
 drivers/net/8139too.c                |  7 ++++---
 drivers/net/82596.c                  |  5 ++---
 drivers/net/8390.c                   |  2 +-
 drivers/net/8390.h                   |  4 ++--
 drivers/net/8390p.c                  |  2 +-
 drivers/net/a2065.c                  |  3 ++-
 drivers/net/acenic.c                 |  3 ++-
 drivers/net/acenic.h                 |  3 ++-
 drivers/net/amd8111e.c               |  3 ++-
 drivers/net/arcnet/arcnet.c          |  3 ++-
 drivers/net/ariadne.c                |  6 ++++--
 drivers/net/at1700.c                 |  6 ++++--
 drivers/net/atl1c/atl1c_main.c       |  3 ++-
 drivers/net/atl1e/atl1e_main.c       |  3 ++-
 drivers/net/atlx/atl1.c              |  3 ++-
 drivers/net/atlx/atl2.c              |  3 ++-
 drivers/net/atp.c                    |  6 ++++--
 drivers/net/au1000_eth.c             |  2 +-
 drivers/net/b44.c                    |  2 +-
 drivers/net/benet/be_main.c          |  3 ++-
 drivers/net/bnx2.c                   |  2 +-
 drivers/net/bnx2x_main.c             |  2 +-
 drivers/net/can/sja1000/sja1000.c    |  3 ++-
 drivers/net/cassini.c                |  2 +-
 drivers/net/chelsio/sge.c            |  2 +-
 drivers/net/chelsio/sge.h            |  2 +-
 drivers/net/cs89x0.c                 |  4 ++--
 drivers/net/cxgb3/adapter.h          |  2 +-
 drivers/net/cxgb3/sge.c              |  2 +-
 drivers/net/defxx.c                  | 10 ++++------
 drivers/net/depca.c                  |  6 ++++--
 drivers/net/dl2k.c                   |  4 ++--
 drivers/net/dnet.c                   |  2 +-
 drivers/net/eepro.c                  |  6 ++++--
 drivers/net/eexpress.c               |  5 +++--
 drivers/net/enc28j60.c               |  3 ++-
 drivers/net/enic/enic_main.c         |  3 ++-
 drivers/net/epic100.c                |  5 +++--
 drivers/net/eth16i.c                 |  4 ++--
 drivers/net/ethoc.c                  |  2 +-
 drivers/net/ewrk3.c                  |  4 ++--
 drivers/net/fealnx.c                 |  4 ++--
 drivers/net/forcedeth.c              |  5 +++--
 drivers/net/hamachi.c                |  6 ++++--
 drivers/net/hp100.c                  | 13 ++++++++-----
 drivers/net/ibmlana.c                |  2 +-
 drivers/net/ibmveth.c                |  3 ++-
 drivers/net/ipg.c                    |  3 ++-
 drivers/net/jme.c                    |  2 +-
 drivers/net/ks8842.c                 |  3 ++-
 drivers/net/ks8851.c                 |  5 +++--
 drivers/net/lance.c                  |  6 ++++--
 drivers/net/lib8390.c                |  3 ++-
 drivers/net/loopback.c               |  3 ++-
 drivers/net/lp486e.c                 |  4 ++--
 drivers/net/mlx4/en_tx.c             |  2 +-
 drivers/net/mlx4/mlx4_en.h           |  2 +-
 drivers/net/myri10ge/myri10ge.c      | 11 +++++++----
 drivers/net/natsemi.c                |  4 ++--
 drivers/net/netxen/netxen_nic_main.c |  5 +++--
 drivers/net/ni52.c                   |  5 +++--
 drivers/net/ni65.c                   |  6 ++++--
 drivers/net/niu.c                    |  3 ++-
 drivers/net/ns83820.c                |  3 ++-
 drivers/net/pcnet32.c                |  6 ++++--
 drivers/net/qla3xxx.c                |  3 ++-
 drivers/net/qlge/qlge_main.c         |  2 +-
 drivers/net/r6040.c                  | 10 +++++-----
 drivers/net/r8169.c                  | 12 ++++++------
 drivers/net/rrunner.c                |  3 ++-
 drivers/net/rrunner.h                |  3 ++-
 drivers/net/s2io.c                   |  2 +-
 drivers/net/sb1000.c                 |  5 +++--
 drivers/net/sc92031.c                |  3 ++-
 drivers/net/seeq8005.c               |  6 ++++--
 drivers/net/sfc/efx.h                |  5 +++--
 drivers/net/sfc/selftest.c           |  3 ++-
 drivers/net/sfc/tx.c                 | 18 ++++++++----------
 drivers/net/sfc/tx.h                 |  3 ++-
 drivers/net/sis190.c                 |  3 ++-
 drivers/net/sis900.c                 |  5 +++--
 drivers/net/skfp/skfddi.c            |  6 ++++--
 drivers/net/skge.c                   |  3 ++-
 drivers/net/sky2.c                   |  3 ++-
 drivers/net/smc9194.c                |  6 ++++--
 drivers/net/smsc9420.c               |  3 ++-
 drivers/net/starfire.c               |  4 ++--
 drivers/net/sundance.c               |  4 ++--
 drivers/net/sungem.c                 |  3 ++-
 drivers/net/sunhme.c                 |  3 ++-
 drivers/net/tehuti.c                 |  3 ++-
 drivers/net/tg3.c                    |  9 ++++++---
 drivers/net/tlan.c                   |  4 ++--
 drivers/net/typhoon.c                |  2 +-
 drivers/net/via-rhine.c              |  6 ++++--
 drivers/net/via-velocity.c           |  3 ++-
 drivers/net/vxge/vxge-main.c         |  2 +-
 drivers/net/yellowfin.c              |  6 ++++--
 drivers/net/znet.c                   |  5 +++--
 include/linux/arcdevice.h            |  3 ++-
 102 files changed, 253 insertions(+), 173 deletions(-)

(limited to 'include')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index c1c9697f9fde..96a2959ce877 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -257,7 +257,8 @@ static int ieee802154_fake_close(struct net_device *dev)
 	return 0;
 }
 
-static int ieee802154_fake_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ieee802154_fake_xmit(struct sk_buff *skb,
+					      struct net_device *dev)
 {
 	skb->iif = dev->ifindex;
 	skb->dev = dev;
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 4a8995aaeca3..462d9f59c53a 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -736,7 +736,8 @@ static void cp_tx (struct cp_private *cp)
 		netif_wake_queue(cp->dev);
 }
 
-static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
+					struct net_device *dev)
 {
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned entry;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index b39ec98345ea..4a3628755026 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -628,8 +628,8 @@ static void mdio_write (struct net_device *dev, int phy_id, int location,
 static void rtl8139_start_thread(struct rtl8139_private *tp);
 static void rtl8139_tx_timeout (struct net_device *dev);
 static void rtl8139_init_ring (struct net_device *dev);
-static int rtl8139_start_xmit (struct sk_buff *skb,
-			       struct net_device *dev);
+static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb,
+				       struct net_device *dev);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void rtl8139_poll_controller(struct net_device *dev);
 #endif
@@ -1687,7 +1687,8 @@ static void rtl8139_tx_timeout (struct net_device *dev)
 	}
 }
 
-static int rtl8139_start_xmit (struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb,
+					     struct net_device *dev)
 {
 	struct rtl8139_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index 996cc9102215..ea6b139b812c 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -356,7 +356,7 @@ static char init_setup[] =
 	0x7f /*  *multi IA */ };
 
 static int i596_open(struct net_device *dev);
-static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t i596_interrupt(int irq, void *dev_id);
 static int i596_close(struct net_device *dev);
 static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
@@ -1054,8 +1054,7 @@ static void i596_tx_timeout (struct net_device *dev)
 	netif_wake_queue (dev);
 }
 
-
-static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct i596_private *lp = dev->ml_priv;
 	struct tx_cmd *tx_cmd;
diff --git a/drivers/net/8390.c b/drivers/net/8390.c
index 21153dea8ebe..7c7518be1756 100644
--- a/drivers/net/8390.c
+++ b/drivers/net/8390.c
@@ -17,7 +17,7 @@ int ei_close(struct net_device *dev)
 }
 EXPORT_SYMBOL(ei_close);
 
-int ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	return __ei_start_xmit(skb, dev);
 }
diff --git a/drivers/net/8390.h b/drivers/net/8390.h
index 3c61d6d2748a..3d9e8fb4fbee 100644
--- a/drivers/net/8390.h
+++ b/drivers/net/8390.h
@@ -40,7 +40,7 @@ extern int ei_open(struct net_device *dev);
 extern int ei_close(struct net_device *dev);
 extern irqreturn_t ei_interrupt(int irq, void *dev_id);
 extern void ei_tx_timeout(struct net_device *dev);
-extern int ei_start_xmit(struct sk_buff *skb, struct net_device *dev);
+extern netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev);
 extern void ei_set_multicast_list(struct net_device *dev);
 extern struct net_device_stats *ei_get_stats(struct net_device *dev);
 
@@ -58,7 +58,7 @@ extern int eip_open(struct net_device *dev);
 extern int eip_close(struct net_device *dev);
 extern irqreturn_t eip_interrupt(int irq, void *dev_id);
 extern void eip_tx_timeout(struct net_device *dev);
-extern int eip_start_xmit(struct sk_buff *skb, struct net_device *dev);
+extern netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev);
 extern void eip_set_multicast_list(struct net_device *dev);
 extern struct net_device_stats *eip_get_stats(struct net_device *dev);
 
diff --git a/drivers/net/8390p.c b/drivers/net/8390p.c
index d225c291fd93..a2a64ea0b691 100644
--- a/drivers/net/8390p.c
+++ b/drivers/net/8390p.c
@@ -22,7 +22,7 @@ int eip_close(struct net_device *dev)
 }
 EXPORT_SYMBOL(eip_close);
 
-int eip_start_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	return __ei_start_xmit(skb, dev);
 }
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index 174a81187a95..b7ec0368d7e8 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -547,7 +547,8 @@ static void lance_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t lance_start_xmit (struct sk_buff *skb,
+				     struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_regs *ll = lp->ll;
diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c
index 08419ee10290..5f0b05c2d71f 100644
--- a/drivers/net/acenic.c
+++ b/drivers/net/acenic.c
@@ -2464,7 +2464,8 @@ ace_load_tx_bd(struct ace_private *ap, struct tx_desc *desc, u64 addr,
 }
 
 
-static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
+				  struct net_device *dev)
 {
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h
index c987c9b5a137..17079b927ffa 100644
--- a/drivers/net/acenic.h
+++ b/drivers/net/acenic.h
@@ -775,7 +775,8 @@ static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs);
 static irqreturn_t ace_interrupt(int irq, void *dev_id);
 static int ace_load_firmware(struct net_device *dev);
 static int ace_open(struct net_device *dev);
-static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ace_start_xmit(struct sk_buff *skb,
+				  struct net_device *dev);
 static int ace_close(struct net_device *dev);
 static void ace_tasklet(unsigned long dev);
 static void ace_dump_trace(struct ace_private *ap);
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index 61ac671f97bf..98b5f462c092 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -1300,7 +1300,8 @@ static int amd8111e_tx_queue_avail(struct amd8111e_priv* lp )
 This function will queue the transmit packets to the descriptors and will trigger the send operation. It also initializes the transmit descriptors with buffer physical address, byte count, ownership to hardware etc.
 */
 
-static int amd8111e_start_xmit(struct sk_buff *skb, struct net_device * dev)
+static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb,
+				       struct net_device * dev)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	int tx_index;
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 7d227cdab9f8..75a572560909 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -591,7 +591,8 @@ static int arcnet_rebuild_header(struct sk_buff *skb)
 
 
 /* Called by the kernel in order to transmit a packet. */
-int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
+				     struct net_device *dev)
 {
 	struct arcnet_local *lp = netdev_priv(dev);
 	struct archdr *pkt;
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index 47d976cc3d7d..c35af3e106b1 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -115,7 +115,8 @@ struct lancedata {
 
 static int ariadne_open(struct net_device *dev);
 static void ariadne_init_ring(struct net_device *dev);
-static int ariadne_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ariadne_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev);
 static void ariadne_tx_timeout(struct net_device *dev);
 static int ariadne_rx(struct net_device *dev);
 static void ariadne_reset(struct net_device *dev);
@@ -589,7 +590,8 @@ static void ariadne_tx_timeout(struct net_device *dev)
 }
 
 
-static int ariadne_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ariadne_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
     struct ariadne_private *priv = netdev_priv(dev);
     volatile struct Am79C960 *lance = (struct Am79C960*)dev->base_addr;
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 5349c58d1fae..544d5af6950e 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -159,7 +159,8 @@ struct net_local {
 static int at1700_probe1(struct net_device *dev, int ioaddr);
 static int read_eeprom(long ioaddr, int location);
 static int net_open(struct net_device *dev);
-static int	net_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t net_send_packet(struct sk_buff *skb,
+				   struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
 static void net_rx(struct net_device *dev);
 static int net_close(struct net_device *dev);
@@ -595,7 +596,8 @@ static void net_tx_timeout (struct net_device *dev)
 }
 
 
-static int net_send_packet (struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t net_send_packet (struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
 	int ioaddr = dev->base_addr;
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index 1d601ce7d5b2..bf7cc83e9836 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -2055,7 +2055,8 @@ static void atl1c_tx_queue(struct atl1c_adapter *adapter, struct sk_buff *skb,
 	AT_WRITE_REG(&adapter->hw, REG_MB_PRIO_PROD_IDX, prod_data);
 }
 
-static int atl1c_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
+					  struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index 4570749e3d3b..bca127e65f95 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -1839,7 +1839,8 @@ static void atl1e_tx_queue(struct atl1e_adapter *adapter, u16 count,
 	AT_WRITE_REG(&adapter->hw, REG_MB_TPD_PROD_IDX, tx_ring->next_to_use);
 }
 
-static int atl1e_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
+					  struct net_device *netdev)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c
index 8bca12f71390..00569dc1313c 100644
--- a/drivers/net/atlx/atl1.c
+++ b/drivers/net/atlx/atl1.c
@@ -2349,7 +2349,8 @@ static void atl1_tx_queue(struct atl1_adapter *adapter, u16 count,
 	atomic_set(&tpd_ring->next_to_use, next_to_use);
 }
 
-static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
+					 struct net_device *netdev)
 {
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 	struct atl1_tpd_ring *tpd_ring = &adapter->tpd_ring;
diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c
index 204db961029e..d0bcb572d51e 100644
--- a/drivers/net/atlx/atl2.c
+++ b/drivers/net/atlx/atl2.c
@@ -821,7 +821,8 @@ static inline int TxdFreeBytes(struct atl2_adapter *adapter)
 		(int) (txd_read_ptr - adapter->txd_write_ptr - 1);
 }
 
-static int atl2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb,
+					 struct net_device *netdev)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct tx_pkt_header *txph;
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 4beacc9c909f..9043294fe617 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -199,7 +199,8 @@ static int net_open(struct net_device *dev);
 static void hardware_init(struct net_device *dev);
 static void write_packet(long ioaddr, int length, unsigned char *packet, int pad, int mode);
 static void trigger_send(long ioaddr, int length);
-static int	atp_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t atp_send_packet(struct sk_buff *skb,
+				   struct net_device *dev);
 static irqreturn_t atp_interrupt(int irq, void *dev_id);
 static void net_rx(struct net_device *dev);
 static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode);
@@ -552,7 +553,8 @@ static void tx_timeout(struct net_device *dev)
 	dev->stats.tx_errors++;
 }
 
-static int atp_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t atp_send_packet(struct sk_buff *skb,
+				   struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
 	long ioaddr = dev->base_addr;
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 2aab1ebc6cd1..407fd45f56a1 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -937,7 +937,7 @@ static int au1000_close(struct net_device *dev)
 /*
  * Au1000 transmit routine.
  */
-static int au1000_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct au1000_private *aup = netdev_priv(dev);
 	struct net_device_stats *ps = &dev->stats;
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 07c92f34d8d8..bee510177a3f 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -946,7 +946,7 @@ static void b44_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t b44_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct b44 *bp = netdev_priv(dev);
 	int rc = NETDEV_TX_OK;
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 7b9efee890f8..e19fe1dcd144 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -427,7 +427,8 @@ static int make_tx_wrbs(struct be_adapter *adapter,
 	return copied;
 }
 
-static int be_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t be_xmit(struct sk_buff *skb,
+				 struct net_device *netdev)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	struct be_tx_obj *tx_obj = &adapter->tx_obj;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index c4e85f694272..fcaf3bc8277e 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -6283,7 +6283,7 @@ bnx2_vlan_rx_register(struct net_device *dev, struct vlan_group *vlgrp)
  * bnx2_tx_int() runs without netif_tx_lock unless it needs to call
  * netif_wake_queue().
  */
-static int
+static netdev_tx_t
 bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bnx2 *bp = netdev_priv(dev);
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index b318d16a4d91..e2e50267cc64 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -10936,7 +10936,7 @@ exit_lbl:
  * bnx2x_tx_int() runs without netif_tx_lock unless it needs to call
  * netif_wake_queue()
  */
-static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	struct bnx2x_fastpath *fp, *fp_stat;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index b3004de1e5e4..9ce3ddac4e9b 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -238,7 +238,8 @@ static void chipset_init(struct net_device *dev)
  * xx xx xx xx	 ff	 ll   00 11 22 33 44 55 66 77
  * [  can-id ] [flags] [len] [can data (up to 8 bytes]
  */
-static int sja1000_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb,
+					    struct net_device *dev)
 {
 	struct sja1000_priv *priv = netdev_priv(dev);
 	struct net_device_stats *stats = &dev->stats;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 299a33b914f8..7517dc1da650 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2918,7 +2918,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring,
 	return 0;
 }
 
-static int cas_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t cas_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct cas *cp = netdev_priv(dev);
 
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index 3711d64e45ef..8c658cf6f62f 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1776,7 +1776,7 @@ static inline int eth_hdr_len(const void *data)
 /*
  * Adds the CPL header to the sk_buff and passes it to t1_sge_tx.
  */
-int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct adapter *adapter = dev->ml_priv;
 	struct sge *sge = adapter->sge;
diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h
index 8c9405123992..00cc37fc1f6f 100644
--- a/drivers/net/chelsio/sge.h
+++ b/drivers/net/chelsio/sge.h
@@ -78,7 +78,7 @@ void t1_sge_destroy(struct sge *);
 irqreturn_t t1_interrupt(int irq, void *cookie);
 int t1_poll(struct napi_struct *, int);
 
-int t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void t1_set_vlan_accel(struct adapter *adapter, int on_off);
 void t1_sge_start(struct sge *);
 void t1_sge_stop(struct sge *);
diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c
index 839cac5fd8a5..0c54219960e2 100644
--- a/drivers/net/cs89x0.c
+++ b/drivers/net/cs89x0.c
@@ -246,7 +246,7 @@ struct net_local {
 
 static int cs89x0_probe1(struct net_device *dev, int ioaddr, int modular);
 static int net_open(struct net_device *dev);
-static int net_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t net_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t net_interrupt(int irq, void *dev_id);
 static void set_multicast_list(struct net_device *dev);
 static void net_timeout(struct net_device *dev);
@@ -1518,7 +1518,7 @@ static void net_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int net_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t net_send_packet(struct sk_buff *skb,struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index 74723f2e7431..2b1aea6aa558 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -309,7 +309,7 @@ void t3_stop_sge_timers(struct adapter *adap);
 void t3_free_sge_resources(struct adapter *adap);
 void t3_sge_err_intr_handler(struct adapter *adapter);
 irq_handler_t t3_intr_handler(struct adapter *adap, int polling);
-int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev);
 int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p);
 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 29c79eb43beb..f86612857a73 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -1216,7 +1216,7 @@ static inline void t3_stop_tx_queue(struct netdev_queue *txq,
  *
  *	Add a packet to an SGE Tx queue.  Runs with softirqs disabled.
  */
-int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	int qidx;
 	unsigned int ndesc, pidx, credits, gen, compl;
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index b2e0a8fc21d7..6a6ea038d7a3 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -300,7 +300,8 @@ static int		dfx_rcv_init(DFX_board_t *bp, int get_buffers);
 static void		dfx_rcv_queue_process(DFX_board_t *bp);
 static void		dfx_rcv_flush(DFX_board_t *bp);
 
-static int		dfx_xmt_queue_pkt(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t dfx_xmt_queue_pkt(struct sk_buff *skb,
+				     struct net_device *dev);
 static int		dfx_xmt_done(DFX_board_t *bp);
 static void		dfx_xmt_flush(DFX_board_t *bp);
 
@@ -3188,11 +3189,8 @@ static void dfx_rcv_queue_process(
  *   None
  */
 
-static int dfx_xmt_queue_pkt(
-	struct sk_buff	*skb,
-	struct net_device	*dev
-	)
-
+static netdev_tx_t dfx_xmt_queue_pkt(struct sk_buff *skb,
+				     struct net_device *dev)
 	{
 	DFX_board_t		*bp = netdev_priv(dev);
 	u8			prod;				/* local transmit producer index */
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index adb997c5bb5d..9686c1fa28f1 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -516,7 +516,8 @@ struct depca_private {
 ** Public Functions
 */
 static int depca_open(struct net_device *dev);
-static int depca_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t depca_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev);
 static irqreturn_t depca_interrupt(int irq, void *dev_id);
 static int depca_close(struct net_device *dev);
 static int depca_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
@@ -928,7 +929,8 @@ static void depca_tx_timeout(struct net_device *dev)
 /*
 ** Writes a socket buffer to TX descriptor ring and starts transmission
 */
-static int depca_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t depca_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct depca_private *lp = netdev_priv(dev);
 	u_long ioaddr = dev->base_addr;
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index 4b6a219fecea..7fa7a907f134 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -59,7 +59,7 @@ static int rio_open (struct net_device *dev);
 static void rio_timer (unsigned long data);
 static void rio_tx_timeout (struct net_device *dev);
 static void alloc_list (struct net_device *dev);
-static int start_xmit (struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t rio_interrupt (int irq, void *dev_instance);
 static void rio_free_tx (struct net_device *dev, int irq);
 static void tx_error (struct net_device *dev, int tx_status);
@@ -600,7 +600,7 @@ alloc_list (struct net_device *dev)
 	return;
 }
 
-static int
+static netdev_tx_t
 start_xmit (struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
diff --git a/drivers/net/dnet.c b/drivers/net/dnet.c
index 2818d5de3940..234685213f1a 100644
--- a/drivers/net/dnet.c
+++ b/drivers/net/dnet.c
@@ -541,7 +541,7 @@ static inline void dnet_print_skb(struct sk_buff *skb)
 #define dnet_print_skb(skb)	do {} while (0)
 #endif
 
-static int dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 
 	struct dnet *bp = netdev_priv(dev);
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index 53317a83857a..1e934160062c 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -309,7 +309,8 @@ struct eepro_local {
 
 static int	eepro_probe1(struct net_device *dev, int autoprobe);
 static int	eepro_open(struct net_device *dev);
-static int	eepro_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t eepro_send_packet(struct sk_buff *skb,
+				     struct net_device *dev);
 static irqreturn_t eepro_interrupt(int irq, void *dev_id);
 static void 	eepro_rx(struct net_device *dev);
 static void 	eepro_transmit_interrupt(struct net_device *dev);
@@ -1133,7 +1134,8 @@ static void eepro_tx_timeout (struct net_device *dev)
 }
 
 
-static int eepro_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t eepro_send_packet(struct sk_buff *skb,
+				     struct net_device *dev)
 {
 	struct eepro_local *lp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index d1b6368faacd..592de8f1668a 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -246,7 +246,8 @@ static char mca_irqmap[] = { 12, 9, 3, 4, 5, 10, 11, 15 };
 static int eexp_open(struct net_device *dev);
 static int eexp_close(struct net_device *dev);
 static void eexp_timeout(struct net_device *dev);
-static int eexp_xmit(struct sk_buff *buf, struct net_device *dev);
+static netdev_tx_t eexp_xmit(struct sk_buff *buf,
+			     struct net_device *dev);
 
 static irqreturn_t eexp_irq(int irq, void *dev_addr);
 static void eexp_set_multicast(struct net_device *dev);
@@ -650,7 +651,7 @@ static void eexp_timeout(struct net_device *dev)
  * Called to transmit a packet, or to allow us to right ourselves
  * if the kernel thinks we've died.
  */
-static int eexp_xmit(struct sk_buff *buf, struct net_device *dev)
+static netdev_tx_t eexp_xmit(struct sk_buff *buf, struct net_device *dev)
 {
 	short length = buf->len;
 #ifdef CONFIG_SMP
diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index 372d6c6a4e7f..117fc6c12e34 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -1276,7 +1276,8 @@ static void enc28j60_hw_tx(struct enc28j60_net *priv)
 	locked_reg_bfset(priv, ECON1, ECON1_TXRTS);
 }
 
-static int enc28j60_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t enc28j60_send_packet(struct sk_buff *skb,
+					struct net_device *dev)
 {
 	struct enc28j60_net *priv = netdev_priv(dev);
 
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 8005b602f776..49912eb2a338 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -622,7 +622,8 @@ static inline void enic_queue_wq_skb(struct enic *enic,
 }
 
 /* netif_tx_lock held, process context with BHs disabled, or BH */
-static int enic_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t enic_hard_start_xmit(struct sk_buff *skb,
+					      struct net_device *netdev)
 {
 	struct enic *enic = netdev_priv(netdev);
 	struct vnic_wq *wq = &enic->wq[0];
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index d668ff2af6e3..641a10d2e843 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -298,7 +298,8 @@ static void epic_restart(struct net_device *dev);
 static void epic_timer(unsigned long data);
 static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
-static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t epic_start_xmit(struct sk_buff *skb,
+				   struct net_device *dev);
 static int epic_rx(struct net_device *dev, int budget);
 static int epic_poll(struct napi_struct *napi, int budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance);
@@ -961,7 +962,7 @@ static void epic_init_ring(struct net_device *dev)
 	return;
 }
 
-static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t epic_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct epic_private *ep = netdev_priv(dev);
 	int entry, free_count;
diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c
index 97d5205edc8f..71bfeec33a0b 100644
--- a/drivers/net/eth16i.c
+++ b/drivers/net/eth16i.c
@@ -405,7 +405,7 @@ static int     eth16i_read_eeprom_word(int ioaddr);
 static void    eth16i_eeprom_cmd(int ioaddr, unsigned char command);
 static int     eth16i_open(struct net_device *dev);
 static int     eth16i_close(struct net_device *dev);
-static int     eth16i_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t eth16i_tx(struct sk_buff *skb, struct net_device *dev);
 static void    eth16i_rx(struct net_device *dev);
 static void    eth16i_timeout(struct net_device *dev);
 static irqreturn_t eth16i_interrupt(int irq, void *dev_id);
@@ -1053,7 +1053,7 @@ static void eth16i_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int eth16i_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t eth16i_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct eth16i_local *lp = netdev_priv(dev);
 	int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 4dbe5f173273..b871aefed9c6 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -802,7 +802,7 @@ static struct net_device_stats *ethoc_stats(struct net_device *dev)
 	return &priv->stats;
 }
 
-static int ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ethoc_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ethoc *priv = netdev_priv(dev);
 	struct ethoc_bd bd;
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index 9c51bc813ad3..b2a5ec8f3721 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -298,7 +298,7 @@ struct ewrk3_private {
    ** Public Functions
  */
 static int ewrk3_open(struct net_device *dev);
-static int ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t ewrk3_interrupt(int irq, void *dev_id);
 static int ewrk3_close(struct net_device *dev);
 static void set_multicast_list(struct net_device *dev);
@@ -764,7 +764,7 @@ static void ewrk3_timeout(struct net_device *dev)
 /*
    ** Writes a socket buffer to the free page queue
  */
-static int ewrk3_queue_pkt (struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ewrk3_queue_pkt(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ewrk3_private *lp = netdev_priv(dev);
 	u_long iobase = dev->base_addr;
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index f66da84a9398..18d5fbb9673e 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -433,7 +433,7 @@ static void netdev_timer(unsigned long data);
 static void reset_timer(unsigned long data);
 static void fealnx_tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
-static int start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
 static int netdev_rx(struct net_device *dev);
 static void set_rx_mode(struct net_device *dev);
@@ -1305,7 +1305,7 @@ static void init_ring(struct net_device *dev)
 }
 
 
-static int start_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 3b4e0766c7b2..0a1c2bb27d4d 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -2137,7 +2137,7 @@ static void nv_gear_backoff_reseed(struct net_device *dev)
  * nv_start_xmit: dev->hard_start_xmit function
  * Called with netif_tx_lock held.
  */
-static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u32 tx_flags = 0;
@@ -2257,7 +2257,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static int nv_start_xmit_optimized(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
+					   struct net_device *dev)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u32 tx_flags = 0;
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 635341d6a028..1d5064a09aca 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -557,7 +557,8 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static void hamachi_timer(unsigned long data);
 static void hamachi_tx_timeout(struct net_device *dev);
 static void hamachi_init_ring(struct net_device *dev);
-static int hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev);
 static irqreturn_t hamachi_interrupt(int irq, void *dev_instance);
 static int hamachi_rx(struct net_device *dev);
 static inline int hamachi_tx(struct net_device *dev);
@@ -1263,7 +1264,8 @@ do { \
 } while (0)
 #endif
 
-static int hamachi_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct hamachi_private *hmp = netdev_priv(dev);
 	unsigned entry;
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index d1b63387e9bc..a9a1a99f02dd 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -240,9 +240,10 @@ static int hp100_probe1(struct net_device *dev, int ioaddr, u_char bus,
 
 static int hp100_open(struct net_device *dev);
 static int hp100_close(struct net_device *dev);
-static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static int hp100_start_xmit_bm(struct sk_buff *skb,
-			       struct net_device *dev);
+static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev);
+static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
+				       struct net_device *dev);
 static void hp100_rx(struct net_device *dev);
 static struct net_device_stats *hp100_get_stats(struct net_device *dev);
 static void hp100_misc_interrupt(struct net_device *dev);
@@ -1483,7 +1484,8 @@ static int hp100_check_lan(struct net_device *dev)
  */
 
 /* tx function for busmaster mode */
-static int hp100_start_xmit_bm(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hp100_start_xmit_bm(struct sk_buff *skb,
+				       struct net_device *dev)
 {
 	unsigned long flags;
 	int i, ok_flag;
@@ -1635,7 +1637,8 @@ static void hp100_clean_txring(struct net_device *dev)
 }
 
 /* tx function for slave modes */
-static int hp100_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t hp100_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	unsigned long flags;
 	int i, ok_flag;
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index 448098d3b39b..090a6d3af112 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -812,7 +812,7 @@ static int ibmlana_close(struct net_device *dev)
 
 /* transmit a block. */
 
-static int ibmlana_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ibmlana_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	ibmlana_priv *priv = netdev_priv(dev);
 	int tmplen, addr;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 76b295a18185..5862282ab2fe 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -887,7 +887,8 @@ static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 #define page_offset(v) ((unsigned long)(v) & ((1 << 12) - 1))
 
-static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ibmveth_start_xmit(struct sk_buff *skb,
+				      struct net_device *netdev)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(netdev);
 	union ibmveth_buf_desc desc;
diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c
index 43019461b776..382c5532e6c5 100644
--- a/drivers/net/ipg.c
+++ b/drivers/net/ipg.c
@@ -1858,7 +1858,8 @@ static int ipg_nic_stop(struct net_device *dev)
 	return 0;
 }
 
-static int ipg_nic_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipg_nic_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *dev)
 {
 	struct ipg_nic_private *sp = netdev_priv(dev);
 	void __iomem *ioaddr = sp->ioaddr;
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index e7068c7cd627..1d2a32544ed2 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -1931,7 +1931,7 @@ jme_stop_queue_if_full(struct jme_adapter *jme)
  * This function is already protected by netif_tx_lock()
  */
 
-static int
+static netdev_tx_t
 jme_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c
index 39b0aea2aab3..6e74aa9eea44 100644
--- a/drivers/net/ks8842.c
+++ b/drivers/net/ks8842.c
@@ -551,7 +551,8 @@ static int ks8842_close(struct net_device *netdev)
 	return 0;
 }
 
-static int ks8842_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t ks8842_xmit_frame(struct sk_buff *skb,
+				     struct net_device *netdev)
 {
 	int ret;
 	struct ks8842_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 9a1dea60c1c4..547ac7c7479c 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -868,11 +868,12 @@ static int ks8851_net_stop(struct net_device *dev)
  * and secondly so we can round up more than one packet to transmit which
  * means we can try and avoid generating too many transmit done interrupts.
  */
-static int ks8851_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
 	unsigned needed = calc_txlen(skb->len);
-	int ret = NETDEV_TX_OK;
+	netdev_tx_t ret = NETDEV_TX_OK;
 
 	if (netif_msg_tx_queued(ks))
 		ks_dbg(ks, "%s: skb %p, %d@%p\n", __func__,
diff --git a/drivers/net/lance.c b/drivers/net/lance.c
index 30fd4f5f1d5a..dcda30338b65 100644
--- a/drivers/net/lance.c
+++ b/drivers/net/lance.c
@@ -300,7 +300,8 @@ static unsigned char lance_need_isa_bounce_buffers = 1;
 
 static int lance_open(struct net_device *dev);
 static void lance_init_ring(struct net_device *dev, gfp_t mode);
-static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev);
 static int lance_rx(struct net_device *dev);
 static irqreturn_t lance_interrupt(int irq, void *dev_id);
 static int lance_close(struct net_device *dev);
@@ -949,7 +950,8 @@ static void lance_tx_timeout (struct net_device *dev)
 }
 
 
-static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct lance_private *lp = dev->ml_priv;
 	int ioaddr = dev->base_addr;
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index d6be36000c5c..256119882b1e 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -299,7 +299,8 @@ static void __ei_tx_timeout(struct net_device *dev)
  * Sends a packet to an 8390 network device.
  */
 
-static int __ei_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t __ei_start_xmit(struct sk_buff *skb,
+				   struct net_device *dev)
 {
 	unsigned long e8390_base = dev->base_addr;
 	struct ei_device *ei_local = (struct ei_device *) netdev_priv(dev);
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 51bbce72bede..1bc654a73c47 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -69,7 +69,8 @@ struct pcpu_lstats {
  * The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
  */
-static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t loopback_xmit(struct sk_buff *skb,
+				 struct net_device *dev)
 {
 	struct pcpu_lstats *pcpu_lstats, *lb_stats;
 	int len;
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index c292bad411ee..cc3ed9cf28be 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -377,7 +377,7 @@ static char init_setup[14] = {
 };
 
 static int i596_open(struct net_device *dev);
-static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t i596_interrupt(int irq, void *dev_id);
 static int i596_close(struct net_device *dev);
 static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
@@ -863,7 +863,7 @@ static int i596_open(struct net_device *dev)
 	return 0;			/* Always succeed */
 }
 
-static int i596_start_xmit (struct sk_buff *skb, struct net_device *dev) {
+static netdev_tx_t i596_start_xmit (struct sk_buff *skb, struct net_device *dev) {
 	struct tx_cmd *tx_cmd;
 	short length;
 
diff --git a/drivers/net/mlx4/en_tx.c b/drivers/net/mlx4/en_tx.c
index 0ecc1e1b013e..d3d6e991065b 100644
--- a/drivers/net/mlx4/en_tx.c
+++ b/drivers/net/mlx4/en_tx.c
@@ -588,7 +588,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb)
 	return skb_tx_hash(dev, skb);
 }
 
-int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 4513fb4960dc..4376147b0ea0 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -518,7 +518,7 @@ int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
 void mlx4_en_poll_tx_cq(unsigned long data);
 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb);
-int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
 			   u32 size, u16 stride);
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index 75deef35b1e0..6930c87f362e 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -360,7 +360,8 @@ MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible");
 #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8)
 
 static void myri10ge_set_multicast_list(struct net_device *dev);
-static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
+					 struct net_device *dev);
 
 static inline void put_be32(__be32 val, __be32 __iomem * p)
 {
@@ -2656,7 +2657,8 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
  * it and try again.
  */
 
-static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
+				       struct net_device *dev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_slice_state *ss;
@@ -2947,12 +2949,13 @@ drop:
 
 }
 
-static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
+					 struct net_device *dev)
 {
 	struct sk_buff *segs, *curr;
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_slice_state *ss;
-	int status;
+	netdev_tx_t status;
 
 	segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6);
 	if (IS_ERR(segs))
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 32db12a27342..bd41351e4e26 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -621,7 +621,7 @@ static void drain_ring(struct net_device *dev);
 static void free_ring(struct net_device *dev);
 static void reinit_ring(struct net_device *dev);
 static void init_registers(struct net_device *dev);
-static int start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
 static void netdev_error(struct net_device *dev, int intr_status);
 static int natsemi_poll(struct napi_struct *napi, int budget);
@@ -2079,7 +2079,7 @@ static void reinit_ring(struct net_device *dev)
 	reinit_rx(dev);
 }
 
-static int start_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem * ioaddr = ns_ioaddr(dev);
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index fab51d16f5fb..f824a392bf56 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -63,7 +63,8 @@ static int __devinit netxen_nic_probe(struct pci_dev *pdev,
 static void __devexit netxen_nic_remove(struct pci_dev *pdev);
 static int netxen_nic_open(struct net_device *netdev);
 static int netxen_nic_close(struct net_device *netdev);
-static int netxen_nic_xmit_frame(struct sk_buff *, struct net_device *);
+static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *,
+					       struct net_device *);
 static void netxen_tx_timeout(struct net_device *netdev);
 static void netxen_reset_task(struct work_struct *work);
 static void netxen_watchdog(unsigned long);
@@ -1599,7 +1600,7 @@ netxen_clear_cmddesc(u64 *desc)
 	desc[2] = 0ULL;
 }
 
-static int
+static netdev_tx_t
 netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index a0ac5d4f27d3..bd0ac690d12c 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -170,7 +170,7 @@ static int     ni52_probe1(struct net_device *dev, int ioaddr);
 static irqreturn_t ni52_interrupt(int irq, void *dev_id);
 static int     ni52_open(struct net_device *dev);
 static int     ni52_close(struct net_device *dev);
-static int     ni52_send_packet(struct sk_buff *, struct net_device *);
+static netdev_tx_t ni52_send_packet(struct sk_buff *, struct net_device *);
 static struct  net_device_stats *ni52_get_stats(struct net_device *dev);
 static void    set_multicast_list(struct net_device *dev);
 static void    ni52_timeout(struct net_device *dev);
@@ -1173,7 +1173,8 @@ static void ni52_timeout(struct net_device *dev)
  * send frame
  */
 
-static int ni52_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ni52_send_packet(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	int len, i;
 #ifndef NO_NOPCOMMANDS
diff --git a/drivers/net/ni65.c b/drivers/net/ni65.c
index 81a061785898..752c2e4d9cf4 100644
--- a/drivers/net/ni65.c
+++ b/drivers/net/ni65.c
@@ -252,7 +252,8 @@ static void ni65_xmit_intr(struct net_device *dev,int);
 static int  ni65_open(struct net_device *dev);
 static int  ni65_lance_reinit(struct net_device *dev);
 static void ni65_init_lance(struct priv *p,unsigned char*,int,int);
-static int  ni65_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t ni65_send_packet(struct sk_buff *skb,
+				    struct net_device *dev);
 static void  ni65_timeout(struct net_device *dev);
 static int  ni65_close(struct net_device *dev);
 static int  ni65_alloc_buffer(struct net_device *dev);
@@ -1157,7 +1158,8 @@ static void ni65_timeout(struct net_device *dev)
  *	Send a packet
  */
 
-static int ni65_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ni65_send_packet(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct priv *p = dev->ml_priv;
 
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 3ada7ea2abca..119fd4e04141 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6657,7 +6657,8 @@ static u64 niu_compute_tx_flags(struct sk_buff *skb, struct ethhdr *ehdr,
 	return ret;
 }
 
-static int niu_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t niu_start_xmit(struct sk_buff *skb,
+				  struct net_device *dev)
 {
 	struct niu *np = netdev_priv(dev);
 	unsigned long align, headroom;
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index 1576ac07216e..c594e1946476 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -1077,7 +1077,8 @@ static void ns83820_cleanup_tx(struct ns83820 *dev)
  * while trying to track down a bug in either the zero copy code or
  * the tx fifo (hence the MAX_FRAG_LEN).
  */
-static int ns83820_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t ns83820_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *ndev)
 {
 	struct ns83820 *dev = PRIV(ndev);
 	u32 free_idx, cmdsts, extsts;
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 6859442f1862..6d28b18e7e28 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -303,7 +303,8 @@ static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
 static int pcnet32_probe1(unsigned long, int, struct pci_dev *);
 static int pcnet32_open(struct net_device *);
 static int pcnet32_init_ring(struct net_device *);
-static int pcnet32_start_xmit(struct sk_buff *, struct net_device *);
+static netdev_tx_t pcnet32_start_xmit(struct sk_buff *,
+				      struct net_device *);
 static void pcnet32_tx_timeout(struct net_device *dev);
 static irqreturn_t pcnet32_interrupt(int, void *);
 static int pcnet32_close(struct net_device *);
@@ -2481,7 +2482,8 @@ static void pcnet32_tx_timeout(struct net_device *dev)
 	spin_unlock_irqrestore(&lp->lock, flags);
 }
 
-static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t pcnet32_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	unsigned long ioaddr = dev->base_addr;
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 3e4b67aaa6ea..4c610511eb40 100644
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2572,7 +2572,8 @@ map_error:
  * The IOCB is always the top of the chain followed by one or more
  * OALs (when necessary).
  */
-static int ql3xxx_send(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t ql3xxx_send(struct sk_buff *skb,
+			       struct net_device *ndev)
 {
 	struct ql3_adapter *qdev = (struct ql3_adapter *)netdev_priv(ndev);
 	struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers;
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 8dd266befdc7..220529257828 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2103,7 +2103,7 @@ static void ql_hw_csum_setup(struct sk_buff *skb,
 				    iph->daddr, len, iph->protocol, 0);
 }
 
-static int qlge_send(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct tx_ring_desc *tx_ring_desc;
 	struct ob_mac_iocb_req *mac_iocb_ptr;
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 8068a07eb2b3..7dfcb58b0eb4 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -883,13 +883,13 @@ static int r6040_open(struct net_device *dev)
 	return 0;
 }
 
-static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t r6040_start_xmit(struct sk_buff *skb,
+				    struct net_device *dev)
 {
 	struct r6040_private *lp = netdev_priv(dev);
 	struct r6040_descriptor *descptr;
 	void __iomem *ioaddr = lp->base;
 	unsigned long flags;
-	int ret = NETDEV_TX_OK;
 
 	/* Critical Section */
 	spin_lock_irqsave(&lp->lock, flags);
@@ -899,8 +899,7 @@ static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		spin_unlock_irqrestore(&lp->lock, flags);
 		netif_stop_queue(dev);
 		printk(KERN_ERR DRV_NAME ": no tx descriptor\n");
-		ret = NETDEV_TX_BUSY;
-		return ret;
+		return NETDEV_TX_BUSY;
 	}
 
 	/* Statistic Counter */
@@ -928,7 +927,8 @@ static int r6040_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	dev->trans_start = jiffies;
 	spin_unlock_irqrestore(&lp->lock, flags);
-	return ret;
+
+	return NETDEV_TX_OK;
 }
 
 static void r6040_multicast_list(struct net_device *dev)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index 8cd85309c675..ec0092affd5d 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -507,7 +507,8 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(RTL8169_VERSION);
 
 static int rtl8169_open(struct net_device *dev);
-static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev);
 static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance);
 static int rtl8169_init_ring(struct net_device *dev);
 static void rtl_hw_start(struct net_device *dev);
@@ -3357,7 +3358,8 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
-static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	unsigned int frags, entry = tp->cur_tx % NUM_TX_DESC;
@@ -3366,7 +3368,6 @@ static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	dma_addr_t mapping;
 	u32 status, len;
 	u32 opts1;
-	int ret = NETDEV_TX_OK;
 
 	if (unlikely(TX_BUFFS_AVAIL(tp) < skb_shinfo(skb)->nr_frags)) {
 		if (netif_msg_drv(tp)) {
@@ -3418,13 +3419,12 @@ static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 out:
-	return ret;
+	return NETDEV_TX_OK;
 
 err_stop:
 	netif_stop_queue(dev);
-	ret = NETDEV_TX_BUSY;
 	dev->stats.tx_dropped++;
-	goto out;
+	return NETDEV_TX_BUSY;
 }
 
 static void rtl8169_pcierr_interrupt(struct net_device *dev)
diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c
index d95534655911..20a71749154a 100644
--- a/drivers/net/rrunner.c
+++ b/drivers/net/rrunner.c
@@ -1401,7 +1401,8 @@ static int rr_close(struct net_device *dev)
 }
 
 
-static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t rr_start_xmit(struct sk_buff *skb,
+				 struct net_device *dev)
 {
 	struct rr_private *rrpriv = netdev_priv(dev);
 	struct rr_regs __iomem *regs = rrpriv->regs;
diff --git a/drivers/net/rrunner.h b/drivers/net/rrunner.h
index 6173f11218df..28169043ae49 100644
--- a/drivers/net/rrunner.h
+++ b/drivers/net/rrunner.h
@@ -831,7 +831,8 @@ static int rr_init1(struct net_device *dev);
 static irqreturn_t rr_interrupt(int irq, void *dev_id);
 
 static int rr_open(struct net_device *dev);
-static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t rr_start_xmit(struct sk_buff *skb,
+				 struct net_device *dev);
 static int rr_close(struct net_device *dev);
 static int rr_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static unsigned int rr_read_eeprom(struct rr_private *rrpriv,
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 3138df5773ee..ddccf5fa56b6 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -4072,7 +4072,7 @@ static int s2io_close(struct net_device *dev)
  *  0 on success & 1 on failure.
  */
 
-static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
 	u16 frg_cnt, frg_len, i, queue, queue_len, put_off, get_off;
diff --git a/drivers/net/sb1000.c b/drivers/net/sb1000.c
index 6a81aec645d9..ee366c5a8fa3 100644
--- a/drivers/net/sb1000.c
+++ b/drivers/net/sb1000.c
@@ -82,7 +82,8 @@ struct sb1000_private {
 extern int sb1000_probe(struct net_device *dev);
 static int sb1000_open(struct net_device *dev);
 static int sb1000_dev_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd);
-static int sb1000_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t sb1000_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev);
 static irqreturn_t sb1000_interrupt(int irq, void *dev_id);
 static int sb1000_close(struct net_device *dev);
 
@@ -1080,7 +1081,7 @@ static int sb1000_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 }
 
 /* transmit function: do nothing since SB1000 can't send anything out */
-static int
+static netdev_tx_t
 sb1000_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	printk(KERN_WARNING "%s: trying to transmit!!!\n", dev->name);
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index e3156c97bb58..8d6030022d14 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -941,7 +941,8 @@ static struct net_device_stats *sc92031_get_stats(struct net_device *dev)
 	return &dev->stats;
 }
 
-static int sc92031_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sc92031_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev)
 {
 	struct sc92031_priv *priv = netdev_priv(dev);
 	void __iomem *port_base = priv->port_base;
diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c
index 7cc8bb814137..39246d457ac2 100644
--- a/drivers/net/seeq8005.c
+++ b/drivers/net/seeq8005.c
@@ -81,7 +81,8 @@ struct net_local {
 static int seeq8005_probe1(struct net_device *dev, int ioaddr);
 static int seeq8005_open(struct net_device *dev);
 static void seeq8005_timeout(struct net_device *dev);
-static int seeq8005_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t seeq8005_send_packet(struct sk_buff *skb,
+					struct net_device *dev);
 static irqreturn_t seeq8005_interrupt(int irq, void *dev_id);
 static void seeq8005_rx(struct net_device *dev);
 static int seeq8005_close(struct net_device *dev);
@@ -394,7 +395,8 @@ static void seeq8005_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int seeq8005_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t seeq8005_send_packet(struct sk_buff *skb,
+					struct net_device *dev)
 {
 	short length = skb->len;
 	unsigned char *buf;
diff --git a/drivers/net/sfc/efx.h b/drivers/net/sfc/efx.h
index da157aa74b83..aecaf62f4929 100644
--- a/drivers/net/sfc/efx.h
+++ b/drivers/net/sfc/efx.h
@@ -20,8 +20,9 @@
 #define FALCON_B_P_DEVID        0x0710
 
 /* TX */
-extern int efx_xmit(struct efx_nic *efx,
-		    struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+extern netdev_tx_t efx_xmit(struct efx_nic *efx,
+				  struct efx_tx_queue *tx_queue,
+				  struct sk_buff *skb);
 extern void efx_stop_queue(struct efx_nic *efx);
 extern void efx_wake_queue(struct efx_nic *efx);
 
diff --git a/drivers/net/sfc/selftest.c b/drivers/net/sfc/selftest.c
index b67ccca3fc1a..817c7efc11e0 100644
--- a/drivers/net/sfc/selftest.c
+++ b/drivers/net/sfc/selftest.c
@@ -400,7 +400,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
 	struct efx_loopback_state *state = efx->loopback_selftest;
 	struct efx_loopback_payload *payload;
 	struct sk_buff *skb;
-	int i, rc;
+	int i;
+	netdev_tx_t rc;
 
 	/* Transmit N copies of buffer */
 	for (i = 0; i < state->packet_count; i++) {
diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c
index 14a14788566c..489c4de31447 100644
--- a/drivers/net/sfc/tx.c
+++ b/drivers/net/sfc/tx.c
@@ -138,8 +138,8 @@ static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
  * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
  * You must hold netif_tx_lock() to call this function.
  */
-static int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
-			   struct sk_buff *skb)
+static netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue,
+					 struct sk_buff *skb)
 {
 	struct efx_nic *efx = tx_queue->efx;
 	struct pci_dev *pci_dev = efx->pci_dev;
@@ -152,7 +152,7 @@ static int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
 	unsigned int dma_len;
 	bool unmap_single;
 	int q_space, i = 0;
-	int rc = NETDEV_TX_OK;
+	netdev_tx_t rc = NETDEV_TX_OK;
 
 	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
 
@@ -353,14 +353,11 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
  *
  * Context: netif_tx_lock held
  */
-inline int efx_xmit(struct efx_nic *efx,
-		    struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+inline netdev_tx_t efx_xmit(struct efx_nic *efx,
+			   struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 {
-	int rc;
-
 	/* Map fragments for DMA and add to TX queue */
-	rc = efx_enqueue_skb(tx_queue, skb);
-	return rc;
+	return efx_enqueue_skb(tx_queue, skb);
 }
 
 /* Initiate a packet transmission.  We use one channel per CPU
@@ -372,7 +369,8 @@ inline int efx_xmit(struct efx_nic *efx,
  * Note that returning anything other than NETDEV_TX_OK will cause the
  * OS to free the skb.
  */
-int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
+				      struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 	struct efx_tx_queue *tx_queue;
diff --git a/drivers/net/sfc/tx.h b/drivers/net/sfc/tx.h
index 5e1cc234e42f..e3678962a5b4 100644
--- a/drivers/net/sfc/tx.h
+++ b/drivers/net/sfc/tx.h
@@ -18,7 +18,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
 void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
 
-int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev);
+netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
+				      struct net_device *net_dev);
 void efx_release_tx_buffers(struct efx_tx_queue *tx_queue);
 
 #endif /* EFX_TX_H */
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 1f040e8a000b..7cc9898f4e00 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -1168,7 +1168,8 @@ static int sis190_close(struct net_device *dev)
 	return 0;
 }
 
-static int sis190_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sis190_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
 {
 	struct sis190_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index 61ceeaaf104d..d8827323507a 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -214,7 +214,8 @@ static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_p
 static void sis900_tx_timeout(struct net_device *net_dev);
 static void sis900_init_tx_ring(struct net_device *net_dev);
 static void sis900_init_rx_ring(struct net_device *net_dev);
-static int sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev);
+static netdev_tx_t sis900_start_xmit(struct sk_buff *skb,
+				     struct net_device *net_dev);
 static int sis900_rx(struct net_device *net_dev);
 static void sis900_finish_xmit (struct net_device *net_dev);
 static irqreturn_t sis900_interrupt(int irq, void *dev_instance);
@@ -1571,7 +1572,7 @@ static void sis900_tx_timeout(struct net_device *net_dev)
  *	tell upper layer if the buffer is full
  */
 
-static int
+static netdev_tx_t
 sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
 {
 	struct sis900_private *sis_priv = netdev_priv(net_dev);
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 888a14a045ef..38a508b4aad9 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -107,7 +107,8 @@ static void skfp_ctl_set_multicast_list(struct net_device *dev);
 static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev);
 static int skfp_ctl_set_mac_address(struct net_device *dev, void *addr);
 static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t skfp_send_pkt(struct sk_buff *skb,
+				       struct net_device *dev);
 static void send_queued_packets(struct s_smc *smc);
 static void CheckSourceAddress(unsigned char *frame, unsigned char *hw_addr);
 static void ResetAdapter(struct s_smc *smc);
@@ -1056,7 +1057,8 @@ static int skfp_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
  * Side Effects:
  *   None
  */
-static int skfp_send_pkt(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t skfp_send_pkt(struct sk_buff *skb,
+				       struct net_device *dev)
 {
 	struct s_smc *smc = netdev_priv(dev);
 	skfddi_priv *bp = &smc->os;
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 543af2044f40..1a1e68549f5c 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2746,7 +2746,8 @@ static inline int skge_avail(const struct skge_ring *ring)
 		+ (ring->to_clean - ring->to_use) - 1;
 }
 
-static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t skge_xmit_frame(struct sk_buff *skb,
+				   struct net_device *dev)
 {
 	struct skge_port *skge = netdev_priv(dev);
 	struct skge_hw *hw = skge->hw;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index dd630cf18b4d..c7c0a5b7b53a 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1574,7 +1574,8 @@ static void sky2_tx_unmap(struct pci_dev *pdev,
  * the number of ring elements will probably be less than the number
  * of list elements used.
  */
-static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
+				   struct net_device *dev)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index 0a1b6f401087..934a12012829 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -299,7 +299,8 @@ static void smc_hardware_send_packet( struct net_device * dev );
  . to store the packet, I call this routine, which either sends it
  . now, or generates an interrupt when the card is ready for the
  . packet */
-static int  smc_wait_to_send_packet( struct sk_buff * skb, struct net_device *dev );
+static netdev_tx_t  smc_wait_to_send_packet( struct sk_buff * skb,
+					     struct net_device *dev );
 
 /* this does a soft reset on the device */
 static void smc_reset( int ioaddr );
@@ -487,7 +488,8 @@ static void smc_setmulticast( int ioaddr, int count, struct dev_mc_list * addrs
  . o 	(NO): Enable interrupts and let the interrupt handler deal with it.
  . o	(YES):Send it now.
 */
-static int smc_wait_to_send_packet( struct sk_buff * skb, struct net_device * dev )
+static netdev_tx_t smc_wait_to_send_packet(struct sk_buff *skb,
+					   struct net_device *dev)
 {
 	struct smc_local *lp = netdev_priv(dev);
 	unsigned int ioaddr 	= dev->base_addr;
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 60abdb1081ad..514311d67b36 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -968,7 +968,8 @@ static void smsc9420_complete_tx(struct net_device *dev)
 	}
 }
 
-static int smsc9420_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t smsc9420_hard_start_xmit(struct sk_buff *skb,
+					    struct net_device *dev)
 {
 	struct smsc9420_pdata *pd = netdev_priv(dev);
 	dma_addr_t mapping;
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 5e7645ee8ab0..a36e2b51e88c 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -595,7 +595,7 @@ static int	netdev_open(struct net_device *dev);
 static void	check_duplex(struct net_device *dev);
 static void	tx_timeout(struct net_device *dev);
 static void	init_ring(struct net_device *dev);
-static int	start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
 static void	netdev_error(struct net_device *dev, int intr_status);
 static int	__netdev_rx(struct net_device *dev, int *quota);
@@ -1223,7 +1223,7 @@ static void init_ring(struct net_device *dev)
 }
 
 
-static int start_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	unsigned int entry;
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index d09be481bcc4..e13685a570f4 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -415,7 +415,7 @@ static void check_duplex(struct net_device *dev);
 static void netdev_timer(unsigned long data);
 static void tx_timeout(struct net_device *dev);
 static void init_ring(struct net_device *dev);
-static int  start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
 static int reset_tx (struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
 static void rx_poll(unsigned long data);
@@ -1053,7 +1053,7 @@ static void tx_poll (unsigned long data)
 	return;
 }
 
-static int
+static netdev_tx_t
 start_tx (struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdev_private *np = netdev_priv(dev);
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index d2dfe0ab5106..e0dfdd246470 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1015,7 +1015,8 @@ static __inline__ int gem_intme(int entry)
 	return 0;
 }
 
-static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t gem_start_xmit(struct sk_buff *skb,
+				  struct net_device *dev)
 {
 	struct gem *gp = netdev_priv(dev);
 	int entry;
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 008bd59fc64b..37d721bbdb35 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -2252,7 +2252,8 @@ static void happy_meal_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb,
+					 struct net_device *dev)
 {
 	struct happy_meal *hp = netdev_priv(dev);
  	int entry;
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 3c2679cd196b..918d4c9e49b3 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -1622,7 +1622,8 @@ static inline int bdx_tx_space(struct bdx_priv *priv)
  *   the driver. Note: the driver must NOT put the skb in its DMA ring.
  * o NETDEV_TX_LOCKED Locking failed, please retry quickly.
  */
-static int bdx_tx_transmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
+				   struct net_device *ndev)
 {
 	struct bdx_priv *priv = netdev_priv(ndev);
 	struct txd_fifo *f = &priv->txd_fifo0;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index a7d14aae258a..9d5c1786c664 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -5135,7 +5135,8 @@ static void tg3_set_txd(struct tg3_napi *tnapi, int entry,
 /* hard_start_xmit for devices that don't have any bugs and
  * support TG3_FLG2_HW_TSO_2 only.
  */
-static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t tg3_start_xmit(struct sk_buff *skb,
+				  struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	u32 len, entry, base_flags, mss;
@@ -5251,7 +5252,8 @@ out_unlock:
 	return NETDEV_TX_OK;
 }
 
-static int tg3_start_xmit_dma_bug(struct sk_buff *, struct net_device *);
+static netdev_tx_t tg3_start_xmit_dma_bug(struct sk_buff *,
+					  struct net_device *);
 
 /* Use GSO to workaround a rare TSO bug that may be triggered when the
  * TSO header is greater than 80 bytes.
@@ -5290,7 +5292,8 @@ tg3_tso_bug_end:
 /* hard_start_xmit for devices that have the 4G bug and/or 40-bit bug and
  * support TG3_FLG2_HW_TSO_1 or firmware TSO only.
  */
-static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t tg3_start_xmit_dma_bug(struct sk_buff *skb,
+					  struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	u32 len, entry, base_flags, mss;
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index 70c9ec45d8fb..49e273bceed6 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -289,7 +289,7 @@ static void	TLan_EisaProbe( void );
 static void	TLan_Eisa_Cleanup( void );
 static int      TLan_Init( struct net_device * );
 static int	TLan_Open( struct net_device *dev );
-static int	TLan_StartTx( struct sk_buff *, struct net_device *);
+static netdev_tx_t TLan_StartTx( struct sk_buff *, struct net_device *);
 static irqreturn_t TLan_HandleInterrupt( int, void *);
 static int	TLan_Close( struct net_device *);
 static struct	net_device_stats *TLan_GetStats( struct net_device *);
@@ -1083,7 +1083,7 @@ static void TLan_tx_timeout_work(struct work_struct *work)
 	 *
 	 **************************************************************/
 
-static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
+static netdev_tx_t TLan_StartTx( struct sk_buff *skb, struct net_device *dev )
 {
 	TLanPrivateInfo *priv = netdev_priv(dev);
 	dma_addr_t	tail_list_phys;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 2c26b4577e8a..d6d345229fe9 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -762,7 +762,7 @@ typhoon_tso_fill(struct sk_buff *skb, struct transmit_ring *txRing,
 	tcpd->status = 0;
 }
 
-static int
+static netdev_tx_t
 typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 {
 	struct typhoon *tp = netdev_priv(dev);
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 46eb618bbc90..081402cb05fd 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -408,7 +408,8 @@ static int  mdio_read(struct net_device *dev, int phy_id, int location);
 static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
 static int  rhine_open(struct net_device *dev);
 static void rhine_tx_timeout(struct net_device *dev);
-static int  rhine_start_tx(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
+				  struct net_device *dev);
 static irqreturn_t rhine_interrupt(int irq, void *dev_instance);
 static void rhine_tx(struct net_device *dev);
 static int rhine_rx(struct net_device *dev, int limit);
@@ -1213,7 +1214,8 @@ static void rhine_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
+				  struct net_device *dev)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 	void __iomem *ioaddr = rp->base;
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 47be41a39d35..e56cf6b548d6 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -2465,7 +2465,8 @@ static int velocity_close(struct net_device *dev)
  *	Called by the networ layer to request a packet is queued to
  *	the velocity. Returns zero on success.
  */
-static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t velocity_xmit(struct sk_buff *skb,
+				 struct net_device *dev)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 	int qnum = 0;
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 094d15548a2b..41dccba50c46 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -812,7 +812,7 @@ static int vxge_learn_mac(struct vxgedev *vdev, u8 *mac_header)
  * NOTE: when device cant queue the pkt, just the trans_start variable will
  * not be upadted.
 */
-static int
+static netdev_tx_t
 vxge_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct vxge_fifo *fifo = NULL;
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 76764237cde6..9509477f61f4 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -347,7 +347,8 @@ static int yellowfin_open(struct net_device *dev);
 static void yellowfin_timer(unsigned long data);
 static void yellowfin_tx_timeout(struct net_device *dev);
 static void yellowfin_init_ring(struct net_device *dev);
-static int yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
+					struct net_device *dev);
 static irqreturn_t yellowfin_interrupt(int irq, void *dev_instance);
 static int yellowfin_rx(struct net_device *dev);
 static void yellowfin_error(struct net_device *dev, int intr_status);
@@ -808,7 +809,8 @@ static void yellowfin_init_ring(struct net_device *dev)
 	return;
 }
 
-static int yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
+					struct net_device *dev)
 {
 	struct yellowfin_private *yp = netdev_priv(dev);
 	unsigned entry;
diff --git a/drivers/net/znet.c b/drivers/net/znet.c
index 7f9e14131a5c..a0384b6f09b6 100644
--- a/drivers/net/znet.c
+++ b/drivers/net/znet.c
@@ -156,7 +156,8 @@ struct netidblk {
 };
 
 static int	znet_open(struct net_device *dev);
-static int	znet_send_packet(struct sk_buff *skb, struct net_device *dev);
+static netdev_tx_t znet_send_packet(struct sk_buff *skb,
+				    struct net_device *dev);
 static irqreturn_t znet_interrupt(int irq, void *dev_id);
 static void	znet_rx(struct net_device *dev);
 static int	znet_close(struct net_device *dev);
@@ -534,7 +535,7 @@ static void znet_tx_timeout (struct net_device *dev)
 	netif_wake_queue (dev);
 }
 
-static int znet_send_packet(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t znet_send_packet(struct sk_buff *skb, struct net_device *dev)
 {
 	int ioaddr = dev->base_addr;
 	struct znet_private *znet = netdev_priv(dev);
diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h
index cd4bcb6989ce..7d650a0e3d8f 100644
--- a/include/linux/arcdevice.h
+++ b/include/linux/arcdevice.h
@@ -337,7 +337,8 @@ struct net_device *alloc_arcdev(const char *name);
 
 int arcnet_open(struct net_device *dev);
 int arcnet_close(struct net_device *dev);
-int arcnet_send_packet(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
+				     struct net_device *dev);
 void arcnet_timeout(struct net_device *dev);
 
 #endif				/* __KERNEL__ */
-- 
cgit v1.2.3


From cb45439977d3602b91dd0aca2d94fa3b32aebba6 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Mon, 31 Aug 2009 12:31:36 +0000
Subject: net: Add ndo_fcoe_enable/ndo_fcoe_disable to net_device_ops

Add ndo_fcoe_enable/_disable to net_device_ops so the corresponding
HW can initialize itself for FCoE traffic or clean up after FCoE traffic is
done. This is expected to be called by the kernel FCoE stack upon receiving
a request for creating an FCoE instance on the corresponding netdev interface.
When implemented by the actual HW, the HW driver check the op code to perform
corresponding initialization or clean up for FCoE. The initialization normally
includes allocating extra queues for FCoE, setting corresponding HW registers
for FCoE, indicating FCoE offload features via netdev, etc. The clean-up would
include releasing the resources allocated for FCoE.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 376a2e1ac00d..121cbad0aae5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -627,6 +627,8 @@ struct net_device_ops {
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 #endif
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+	int			(*ndo_fcoe_enable)(struct net_device *dev);
+	int			(*ndo_fcoe_disable)(struct net_device *dev);
 	int			(*ndo_fcoe_ddp_setup)(struct net_device *dev,
 						      u16 xid,
 						      struct scatterlist *sgl,
-- 
cgit v1.2.3


From 0f6f290259896afdca30e1ff4a28aff8edd79a14 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Mon, 31 Aug 2009 12:32:34 +0000
Subject: dcbnl: Add support for setapp/getapp commands to dcbnl

This patch adds dcbnl command definitions to support setapp/getapp
functionality from the IEEE 802.1Qaz Data Center Bridging Capability
Exchange protocol (DCBX) specification. Section 3.3 defines the
application protocol and its 802.1p user priority in DCBX, which is
implemented here as a pair of setapp/getapp commands in the kernel
dcbnl for setting and retrieving the user priority for an given
application protocol. The protocol is identified by the combination of
an id and an idtype. Currently, when idtype is 0, the corresponding
id gives the ether type of this protocol, e.g., for FCoE, it will be
0x8906; when idtype is 1, then the corresponding id gives the TCP or
UDP port number.

For more information regarding DCBX spec., please refer to the following:
http://www.ieee802.org/1/files/public/docs2008/
az-wadekar-dcbx-capability-exchange-discovery-protocol-1108-v1.01.pdf

Signed-off-by: Yi Zou <yi.zou@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index 7d2e10006188..b7cdbb4373df 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -50,6 +50,8 @@ struct dcbmsg {
  * @DCB_CMD_SNUMTCS: set the number of traffic classes
  * @DCB_CMD_GBCN: set backward congestion notification configuration
  * @DCB_CMD_SBCN: get backward congestion notification configration.
+ * @DCB_CMD_GAPP: get application protocol configuration
+ * @DCB_CMD_SAPP: set application protocol configuration
  */
 enum dcbnl_commands {
 	DCB_CMD_UNDEFINED,
@@ -80,6 +82,9 @@ enum dcbnl_commands {
 	DCB_CMD_BCN_GCFG,
 	DCB_CMD_BCN_SCFG,
 
+	DCB_CMD_GAPP,
+	DCB_CMD_SAPP,
+
 	__DCB_CMD_ENUM_MAX,
 	DCB_CMD_MAX = __DCB_CMD_ENUM_MAX - 1,
 };
@@ -114,6 +119,7 @@ enum dcbnl_attrs {
 	DCB_ATTR_CAP,
 	DCB_ATTR_NUMTCS,
 	DCB_ATTR_BCN,
+	DCB_ATTR_APP,
 
 	__DCB_ATTR_ENUM_MAX,
 	DCB_ATTR_MAX = __DCB_ATTR_ENUM_MAX - 1,
@@ -338,5 +344,17 @@ enum dcb_general_attr_values {
 	DCB_ATTR_VALUE_UNDEFINED = 0xff
 };
 
+#define DCB_APP_IDTYPE_ETHTYPE	0x00
+#define DCB_APP_IDTYPE_PORTNUM	0x01
+enum dcbnl_app_attrs {
+	DCB_APP_ATTR_UNDEFINED,
+
+	DCB_APP_ATTR_IDTYPE,
+	DCB_APP_ATTR_ID,
+	DCB_APP_ATTR_PRIORITY,
+
+	__DCB_APP_ATTR_ENUM_MAX,
+	DCB_APP_ATTR_MAX = __DCB_APP_ATTR_ENUM_MAX - 1,
+};
 
 #endif /* __LINUX_DCBNL_H__ */
-- 
cgit v1.2.3


From 7114323b1761bdf787ed79323a4a13f787878295 Mon Sep 17 00:00:00 2001
From: Yi Zou <yi.zou@intel.com>
Date: Mon, 31 Aug 2009 12:32:55 +0000
Subject: dcbnl: Add support for setapp/getapp to netdev dcbnl_rtnl_ops

Adds support of dcbnl setapp/getapp to dcbnl_rtnl_ops in netdev to allow
LLDs to implement their corresponding dcbnl setapp/getapp ops to support
the IEEE 802.1Q DCBX setapp/getapp commands.

Signed-off-by: Yi Zou <yi.zou@intel.com>
Acked-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 775cfc8055be..b36ac7e0914d 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -48,6 +48,8 @@ struct dcbnl_rtnl_ops {
 	void (*setbcncfg)(struct net_device *, int, u32);
 	void (*getbcnrp)(struct net_device *, int, u8 *);
 	void (*setbcnrp)(struct net_device *, int, u8);
+	u8   (*setapp)(struct net_device *, u8, u16, u8);
+	u8   (*getapp)(struct net_device *, u8, u16);
 };
 
 #endif /* __NET_DCBNL_H__ */
-- 
cgit v1.2.3


From f1ecd5d9e7366609d640ff4040304ea197fbc618 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Wed, 26 Aug 2009 00:16:31 +0000
Subject: Revert Backoff [v3]: Revert RTO on ICMP destination unreachable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Here, an ICMP host/network unreachable message, whose payload fits to
TCP's SND.UNA, is taken as an indication that the RTO retransmission has
not been lost due to congestion, but because of a route failure
somewhere along the path.
With true congestion, a router won't trigger such a message and the
patched TCP will operate as standard TCP.

This patch reverts one RTO backoff, if an ICMP host/network unreachable
message, whose payload fits to TCP's SND.UNA, arrives.
Based on the new RTO, the retransmission timer is reset to reflect the
remaining time, or - if the revert clocked out the timer - a retransmission
is sent out immediately.
Backoffs are only reverted, if TCP is in RTO loss recovery, i.e. if
there have been retransmissions and reversible backoffs, already.

Changes from v2:
1) Renaming of skb in tcp_v4_err() moved to another patch.
2) Reintroduced tcp_bound_rto() and __tcp_set_rto().
3) Fixed code comments.

Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 12 ++++++++++++
 net/ipv4/tcp_input.c |  5 ++---
 net/ipv4/tcp_ipv4.c  | 37 +++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_timer.c |  2 +-
 4 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index cbb2a4889fc9..54f212ce8aaf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -469,6 +469,7 @@ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 				      int nonagle);
 extern int tcp_may_send_now(struct sock *sk);
 extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
+extern void tcp_retransmit_timer(struct sock *sk);
 extern void tcp_xmit_retransmit_queue(struct sock *);
 extern void tcp_simple_retransmit(struct sock *);
 extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
@@ -521,6 +522,17 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 extern int tcp_mss_to_mtu(struct sock *sk, int mss);
 extern void tcp_mtup_init(struct sock *sk);
 
+static inline void tcp_bound_rto(const struct sock *sk)
+{
+	if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
+		inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+}
+
+static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
+{
+	return (tp->srtt >> 3) + tp->rttvar;
+}
+
 static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
 {
 	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bdb0da237e6..af6d6fa00db1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
 	 *    is invisible. Actually, Linux-2.4 also generates erratic
 	 *    ACKs in some circumstances.
 	 */
-	inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
+	inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
 
 	/* 2. Fixups made earlier cannot be right.
 	 *    If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
 	/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
 	 * guarantees that rto is higher.
 	 */
-	if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
-		inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
+	tcp_bound_rto(sk);
 }
 
 /* Save metrics learned by this TCP session.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6ca1bc8c3025..6755e29a6dd3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -332,12 +332,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 {
 	struct iphdr *iph = (struct iphdr *)icmp_skb->data;
 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
+	struct inet_connection_sock *icsk;
 	struct tcp_sock *tp;
 	struct inet_sock *inet;
 	const int type = icmp_hdr(icmp_skb)->type;
 	const int code = icmp_hdr(icmp_skb)->code;
 	struct sock *sk;
+	struct sk_buff *skb;
 	__u32 seq;
+	__u32 remaining;
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
 
@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		}
 
 		err = icmp_err_convert[code].errno;
+		/* check if icmp_skb allows revert of backoff
+		 * (see draft-zimmermann-tcp-lcd) */
+		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
+			break;
+		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
+		    !icsk->icsk_backoff)
+			break;
+
+		icsk->icsk_backoff--;
+		inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
+					 icsk->icsk_backoff;
+		tcp_bound_rto(sk);
+
+		skb = tcp_write_queue_head(sk);
+		BUG_ON(!skb);
+
+		remaining = icsk->icsk_rto - min(icsk->icsk_rto,
+				tcp_time_stamp - TCP_SKB_CB(skb)->when);
+
+		if (remaining) {
+			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+						  remaining, TCP_RTO_MAX);
+		} else if (sock_owned_by_user(sk)) {
+			/* RTO revert clocked out retransmission,
+			 * but socket is locked. Will defer. */
+			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+						  HZ/20, TCP_RTO_MAX);
+		} else {
+			/* RTO revert clocked out retransmission.
+			 * Will retransmit now */
+			tcp_retransmit_timer(sk);
+		}
+
 		break;
 	case ICMP_TIME_EXCEEDED:
 		err = EHOSTUNREACH;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c520fb6e06d9..408fa4b7b9ba 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -279,7 +279,7 @@ static void tcp_probe_timer(struct sock *sk)
  *	The TCP retransmit timer.
  */
 
-static void tcp_retransmit_timer(struct sock *sk)
+void tcp_retransmit_timer(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
-- 
cgit v1.2.3


From 6fa12c85031485dff38ce550c24f10da23b0adaa Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Wed, 26 Aug 2009 00:16:34 +0000
Subject: Revert Backoff [v3]: Calculate TCP's connection close threshold as a
 time value.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RFC 1122 specifies two threshold values R1 and R2 for connection timeouts,
which may represent a number of allowed retransmissions or a timeout value.
Currently linux uses sysctl_tcp_retries{1,2} to specify the thresholds
in number of allowed retransmissions.

For any desired threshold R2 (by means of time) one can specify tcp_retries2
(by means of number of retransmissions) such that TCP will not time out
earlier than R2. This is the case, because the RTO schedule follows a fixed
pattern, namely exponential backoff.

However, the RTO behaviour is not predictable any more if RTO backoffs can be
reverted, as it is the case in the draft
"Make TCP more Robust to Long Connectivity Disruptions"
(http://tools.ietf.org/html/draft-zimmermann-tcp-lcd).

In the worst case TCP would time out a connection after 3.2 seconds, if the
initial RTO equaled MIN_RTO and each backoff has been reverted.

This patch introduces a function retransmits_timed_out(N),
which calculates the timeout of a TCP connection, assuming an initial
RTO of MIN_RTO and N unsuccessful, exponentially backed-off retransmissions.

Whenever timeout decisions are made by comparing the retransmission counter
to some value N, this function can be used, instead.

The meaning of tcp_retries2 will be changed, as many more RTO retransmissions
can occur than the value indicates. However, it yields a timeout which is
similar to the one of an unpatched, exponentially backing off TCP in the same
scenario. As no application could rely on an RTO greater than MIN_RTO, there
should be no risk of a regression.

Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 18 ++++++++++++++++++
 net/ipv4/tcp_timer.c | 11 +++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 54f212ce8aaf..e5319495f15e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1252,6 +1252,24 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
 #define tcp_for_write_queue_from_safe(skb, tmp, sk)			\
 	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
 
+static inline bool retransmits_timed_out(const struct sock *sk,
+					 unsigned int boundary)
+{
+	int limit, K;
+	if (!inet_csk(sk)->icsk_retransmits)
+		return false;
+
+	K = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+
+	if (boundary <= K)
+		limit = ((2 << boundary) - 1) * TCP_RTO_MIN;
+	else
+		limit = ((2 << K) - 1) * TCP_RTO_MIN +
+			(boundary - K) * TCP_RTO_MAX;
+
+	return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= limit;
+}
+
 static inline struct sk_buff *tcp_send_head(struct sock *sk)
 {
 	return sk->sk_send_head;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 408fa4b7b9ba..cdb2ca7684d4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	int retry_until;
+	bool do_reset;
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
 			dst_negative_advice(&sk->sk_dst_cache);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 	} else {
-		if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
+		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
@@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
 			const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
 
 			retry_until = tcp_orphan_retries(sk, alive);
+			do_reset = alive ||
+				   !retransmits_timed_out(sk, retry_until);
 
-			if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until))
+			if (tcp_out_of_resources(sk, do_reset))
 				return 1;
 		}
 	}
 
-	if (icsk->icsk_retransmits >= retry_until) {
+	if (retransmits_timed_out(sk, retry_until)) {
 		/* Has it gone just too far? */
 		tcp_write_err(sk);
 		return 1;
@@ -385,7 +388,7 @@ void tcp_retransmit_timer(struct sock *sk)
 out_reset_timer:
 	icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
 	inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
-	if (icsk->icsk_retransmits > sysctl_tcp_retries1)
+	if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
 		__sk_dst_reset(sk);
 
 out:;
-- 
cgit v1.2.3


From 8aa84ad8d6c740a04386f599694609ee4998e82e Mon Sep 17 00:00:00 2001
From: Thomas Renninger <trenn@suse.de>
Date: Fri, 24 Jul 2009 15:25:05 +0200
Subject: [CPUFREQ] Introduce global, not per core:
 /sys/devices/system/cpu/cpufreq

Currently everything in the cpufreq layer is per core based.
This does not reflect reality, for example ondemand on conservative
governors have global sysfs variables.

Introduce a global cpufreq directory and add the kobject to the governor
struct, so that governors can easily access it.
The directory is initialized in the cpufreq_core_init initcall and thus will
always be created if cpufreq is compiled in, even if no cpufreq driver is
active later.

Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 drivers/cpufreq/cpufreq.c |  9 ++++++++-
 include/linux/cpufreq.h   | 10 ++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index bbd5c2164ab6..4da28444b235 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -686,6 +686,9 @@ static struct attribute *default_attrs[] = {
 	NULL
 };
 
+struct kobject *cpufreq_global_kobject;
+EXPORT_SYMBOL(cpufreq_global_kobject);
+
 #define to_policy(k) container_of(k, struct cpufreq_policy, kobj)
 #define to_attr(a) container_of(a, struct freq_attr, attr)
 
@@ -1935,7 +1938,11 @@ static int __init cpufreq_core_init(void)
 		per_cpu(policy_cpu, cpu) = -1;
 		init_rwsem(&per_cpu(cpu_policy_rwsem, cpu));
 	}
+
+	cpufreq_global_kobject = kobject_create_and_add("cpufreq",
+						&cpu_sysdev_class.kset.kobj);
+	BUG_ON(!cpufreq_global_kobject);
+
 	return 0;
 }
-
 core_initcall(cpufreq_core_init);
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 161042746afc..44717eb47639 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -65,6 +65,9 @@ static inline int cpufreq_unregister_notifier(struct notifier_block *nb,
 
 struct cpufreq_governor;
 
+/* /sys/devices/system/cpu/cpufreq: entry point for global variables */
+extern struct kobject *cpufreq_global_kobject;
+
 #define CPUFREQ_ETERNAL			(-1)
 struct cpufreq_cpuinfo {
 	unsigned int		max_freq;
@@ -274,6 +277,13 @@ struct freq_attr {
 	ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count);
 };
 
+struct global_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct kobject *kobj,
+			struct attribute *attr, char *buf);
+	ssize_t (*store)(struct kobject *a, struct attribute *b,
+			 const char *c, size_t count);
+};
 
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
-- 
cgit v1.2.3


From 8bc11b491b6cad75e737f1d38bb4b261bd95b291 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 26 Aug 2009 18:13:17 +0200
Subject: rfkill: relicense header file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This header file is copied into userspace tools that
need not be GPLv2 licensed, make that easier.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Henrique de Moraes Holschuh <hmh@hmh.eng.br>
Acked-by: Iñaky Pérez-González <inaky@linux.intel.com>
Acked-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Acked-by: Michael Buesch <mb@bu3sch.de>
Acked-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/rfkill.h | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h
index 21ca51bf4dd2..3392c59d2706 100644
--- a/include/linux/rfkill.h
+++ b/include/linux/rfkill.h
@@ -6,20 +6,17 @@
  * Copyright (C) 2007 Dmitry Torokhov
  * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
  *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the
- * Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
 #include <linux/types.h>
-- 
cgit v1.2.3


From fa8a123855e20068204982596b8fafceb1a67f0b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 26 Aug 2009 13:13:37 +1000
Subject: drm/mm: add ability to dump mm lists via debugfs

This adds code to the drm_mm to talk to debugfs, and adds
support to radeon to add the VRAM and GTT mm lists to debugfs.

I tested with spinlock debugging and it doesn't give out.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_mm.c            | 21 ++++++++++++++
 drivers/gpu/drm/radeon/radeon_ttm.c | 56 +++++++++++++++++++++++++++++++++++++
 include/drm/drm_mm.h                |  4 +++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 3e47869d6dae..c861d80fd779 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -44,6 +44,7 @@
 #include "drmP.h"
 #include "drm_mm.h"
 #include <linux/slab.h>
+#include <linux/seq_file.h>
 
 #define MM_UNUSED_TARGET 4
 
@@ -370,3 +371,23 @@ void drm_mm_takedown(struct drm_mm * mm)
 	BUG_ON(mm->num_unused != 0);
 }
 EXPORT_SYMBOL(drm_mm_takedown);
+
+#if defined(CONFIG_DEBUG_FS)
+int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
+{
+	struct drm_mm_node *entry;
+	int total_used = 0, total_free = 0, total = 0;
+
+	list_for_each_entry(entry, &mm->ml_entry, ml_entry) {
+		seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: %s\n", entry->start, entry->start + entry->size, entry->size, entry->free ? "free" : "used");
+		total += entry->size;
+		if (entry->free)
+			total_free += entry->size;
+		else
+			total_used += entry->size;
+	}
+	seq_printf(m, "total: %d, used %d free %d\n", total, total_free, total_used);
+	return 0;
+}
+EXPORT_SYMBOL(drm_mm_dump_table);
+#endif
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 0a85e7b5d592..dc7a44274ea8 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -35,11 +35,14 @@
 #include <ttm/ttm_module.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
+#include <linux/seq_file.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
 
+static int radeon_ttm_debugfs_init(struct radeon_device *rdev);
+
 static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
 {
 	struct radeon_mman *mman;
@@ -504,6 +507,12 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
 		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	}
+
+	r = radeon_ttm_debugfs_init(rdev);
+	if (r) {
+		DRM_ERROR("Failed to init debugfs\n");
+		return r;
+	}
 	return 0;
 }
 
@@ -678,3 +687,50 @@ struct ttm_backend *radeon_ttm_backend_create(struct radeon_device *rdev)
 	gtt->bound = false;
 	return &gtt->backend;
 }
+
+#define RADEON_DEBUGFS_MEM_TYPES 2
+
+static struct drm_info_list radeon_mem_types_list[RADEON_DEBUGFS_MEM_TYPES];
+static char radeon_mem_types_names[RADEON_DEBUGFS_MEM_TYPES][32];
+
+#if defined(CONFIG_DEBUG_FS)
+static int radeon_mm_dump_table(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_mm *mm = (struct drm_mm *)node->info_ent->data;
+	struct drm_device *dev = node->minor->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int ret;
+	struct ttm_bo_global *glob = rdev->mman.bdev.glob;
+
+	spin_lock(&glob->lru_lock);
+	ret = drm_mm_dump_table(m, mm);
+	spin_unlock(&glob->lru_lock);
+	return ret;
+}
+#endif
+
+static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
+{
+	unsigned i;
+
+#if defined(CONFIG_DEBUG_FS)
+	for (i = 0; i < RADEON_DEBUGFS_MEM_TYPES; i++) {
+		if (i == 0)
+			sprintf(radeon_mem_types_names[i], "radeon_vram_mm");
+		else
+			sprintf(radeon_mem_types_names[i], "radeon_gtt_mm");
+		radeon_mem_types_list[i].name = radeon_mem_types_names[i];
+		radeon_mem_types_list[i].show = &radeon_mm_dump_table;
+		radeon_mem_types_list[i].driver_features = 0;
+		if (i == 0)
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_VRAM].manager;
+		else
+			radeon_mem_types_list[i].data = &rdev->mman.bdev.man[TTM_PL_TT].manager;
+
+	}
+	return radeon_debugfs_add_files(rdev, radeon_mem_types_list, RADEON_DEBUGFS_MEM_TYPES);
+
+#endif
+	return 0;
+}
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index f8332073d277..bc5a87e8aeea 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -96,4 +96,8 @@ static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
 	return block->mm;
 }
 
+#ifdef CONFIG_DEBUG_FS
+int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm);
+#endif
+
 #endif
-- 
cgit v1.2.3


From a3a0544b2c84e1d7a2022b558ecf66d8c6a8dd93 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 31 Aug 2009 15:16:30 +1000
Subject: drm/kms: add explicit encoder disable function and detach harder.

For shared tv-out and VGA encoders, we really need to know if
the encoder is just being switched off temporarily in blanking
or if we are really disabling it hard.

Also we need to try harder to disconnect encoders from unused
connectors so we can share more efficently.

(shared encoders stuff is coming in radeon tv-out support)

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc_helper.c | 24 ++++++++++++++++++++----
 include/drm/drm_crtc_helper.h     |  2 ++
 2 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 205349ea1075..eea5e6c4099c 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -260,13 +260,27 @@ EXPORT_SYMBOL(drm_helper_crtc_in_use);
 void drm_helper_disable_unused_functions(struct drm_device *dev)
 {
 	struct drm_encoder *encoder;
+	struct drm_connector *connector;
 	struct drm_encoder_helper_funcs *encoder_funcs;
 	struct drm_crtc *crtc;
 
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (!connector->encoder)
+			continue;
+		if (connector->status == connector_status_disconnected)
+			connector->encoder = NULL;
+	}
+
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		encoder_funcs = encoder->helper_private;
-		if (!drm_helper_encoder_in_use(encoder))
-			(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+		if (!drm_helper_encoder_in_use(encoder)) {
+			if (encoder_funcs->disable)
+				(*encoder_funcs->disable)(encoder);
+			else
+				(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+		}
+		/* disconnector encoder from any connector */
+		encoder->crtc = NULL;
 	}
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -411,7 +425,7 @@ static int drm_pick_crtcs(struct drm_device *dev,
 	c = 0;
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 
-		if ((connector->encoder->possible_crtcs & (1 << c)) == 0) {
+		if ((encoder->possible_crtcs & (1 << c)) == 0) {
 			c++;
 			continue;
 		}
@@ -496,8 +510,10 @@ static void drm_setup_crtcs(struct drm_device *dev)
 				  mode->name, crtc->base.id);
 			crtc->desired_mode = mode;
 			connector->encoder->crtc = crtc;
-		} else
+		} else {
 			connector->encoder->crtc = NULL;
+			connector->encoder = NULL;
+		}
 		i++;
 	}
 
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index e44a4f87303c..4c8dacaf4f58 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -79,6 +79,8 @@ struct drm_encoder_helper_funcs {
 	/* detect for DAC style encoders */
 	enum drm_connector_status (*detect)(struct drm_encoder *encoder,
 					    struct drm_connector *connector);
+	/* disable encoder when not in use - more explicit than dpms off */
+	void (*disable)(struct drm_encoder *encoder);
 };
 
 struct drm_connector_helper_funcs {
-- 
cgit v1.2.3


From 388539f3ff0cf1de926b03f94e1eec112358f74d Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 27 Jul 2009 09:24:35 +0800
Subject: [libata] add DMA setup FIS auto-activate feature

Hopefully results in fewer on-the-wire FIS's and no breakage.  We'll see!

Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c        |  2 +-
 drivers/ata/libata-core.c | 36 +++++++++++++++++++++++++++++-------
 include/linux/ata.h       |  4 ++++
 include/linux/libata.h    |  2 ++
 4 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index fe3eba5d6b3e..7a3124a9abc6 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -2869,7 +2869,7 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* prepare host */
 	if (hpriv->cap & HOST_CAP_NCQ)
-		pi.flags |= ATA_FLAG_NCQ;
+		pi.flags |= ATA_FLAG_NCQ | ATA_FLAG_FPDMA_AA;
 
 	if (hpriv->cap & HOST_CAP_PMP)
 		pi.flags |= ATA_FLAG_PMP;
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 072ba5ea138f..98af50f16e0c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -2299,29 +2299,49 @@ static inline u8 ata_dev_knobble(struct ata_device *dev)
 	return ((ap->cbl == ATA_CBL_SATA) && (!ata_id_is_sata(dev->id)));
 }
 
-static void ata_dev_config_ncq(struct ata_device *dev,
+static int ata_dev_config_ncq(struct ata_device *dev,
 			       char *desc, size_t desc_sz)
 {
 	struct ata_port *ap = dev->link->ap;
 	int hdepth = 0, ddepth = ata_id_queue_depth(dev->id);
+	unsigned int err_mask;
+	char *aa_desc = "";
 
 	if (!ata_id_has_ncq(dev->id)) {
 		desc[0] = '\0';
-		return;
+		return 0;
 	}
 	if (dev->horkage & ATA_HORKAGE_NONCQ) {
 		snprintf(desc, desc_sz, "NCQ (not used)");
-		return;
+		return 0;
 	}
 	if (ap->flags & ATA_FLAG_NCQ) {
 		hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE - 1);
 		dev->flags |= ATA_DFLAG_NCQ;
 	}
 
+	if (!(dev->horkage & ATA_HORKAGE_BROKEN_FPDMA_AA) &&
+		(ap->flags & ATA_FLAG_FPDMA_AA) &&
+		ata_id_has_fpdma_aa(dev->id)) {
+		err_mask = ata_dev_set_feature(dev, SETFEATURES_SATA_ENABLE,
+			SATA_FPDMA_AA);
+		if (err_mask) {
+			ata_dev_printk(dev, KERN_ERR, "failed to enable AA"
+				"(error_mask=0x%x)\n", err_mask);
+			if (err_mask != AC_ERR_DEV) {
+				dev->horkage |= ATA_HORKAGE_BROKEN_FPDMA_AA;
+				return -EIO;
+			}
+		} else
+			aa_desc = ", AA";
+	}
+
 	if (hdepth >= ddepth)
-		snprintf(desc, desc_sz, "NCQ (depth %d)", ddepth);
+		snprintf(desc, desc_sz, "NCQ (depth %d)%s", ddepth, aa_desc);
 	else
-		snprintf(desc, desc_sz, "NCQ (depth %d/%d)", hdepth, ddepth);
+		snprintf(desc, desc_sz, "NCQ (depth %d/%d)%s", hdepth,
+			ddepth, aa_desc);
+	return 0;
 }
 
 /**
@@ -2461,7 +2481,7 @@ int ata_dev_configure(struct ata_device *dev)
 
 		if (ata_id_has_lba(id)) {
 			const char *lba_desc;
-			char ncq_desc[20];
+			char ncq_desc[24];
 
 			lba_desc = "LBA";
 			dev->flags |= ATA_DFLAG_LBA;
@@ -2475,7 +2495,9 @@ int ata_dev_configure(struct ata_device *dev)
 			}
 
 			/* config NCQ */
-			ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
+			rc = ata_dev_config_ncq(dev, ncq_desc, sizeof(ncq_desc));
+			if (rc)
+				return rc;
 
 			/* print device info to dmesg */
 			if (ata_msg_drv(ap) && print_info) {
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 9c75921f0c16..f5494050df83 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -306,6 +306,7 @@ enum {
 	/* SETFEATURE Sector counts for SATA features */
 	SATA_AN			= 0x05,  /* Asynchronous Notification */
 	SATA_DIPM		= 0x03,  /* Device Initiated Power Management */
+	SATA_FPDMA_AA		= 0x02,  /* DMA Setup FIS Auto-Activate */
 
 	/* feature values for SET_MAX */
 	ATA_SET_MAX_ADDR	= 0x00,
@@ -525,6 +526,9 @@ static inline int ata_is_data(u8 prot)
 #define ata_id_has_atapi_AN(id)	\
 	( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \
 	  ((id)[78] & (1 << 5)) )
+#define ata_id_has_fpdma_aa(id)	\
+	( (((id)[76] != 0x0000) && ((id)[76] != 0xffff)) && \
+	  ((id)[78] & (1 << 2)) )
 #define ata_id_iordy_disable(id) ((id)[ATA_ID_CAPABILITY] & (1 << 10))
 #define ata_id_has_iordy(id) ((id)[ATA_ID_CAPABILITY] & (1 << 11))
 #define ata_id_u32(id,n)	\
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e5b6e33c6571..d3f7cab4873e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -190,6 +190,7 @@ enum {
 	ATA_FLAG_NO_POWEROFF_SPINDOWN = (1 << 11), /* don't spindown before poweroff */
 	ATA_FLAG_NO_HIBERNATE_SPINDOWN = (1 << 12), /* don't spindown before hibernation */
 	ATA_FLAG_DEBUGMSG	= (1 << 13),
+	ATA_FLAG_FPDMA_AA		= (1 << 14), /* driver supports Auto-Activate */
 	ATA_FLAG_IGN_SIMPLEX	= (1 << 15), /* ignore SIMPLEX */
 	ATA_FLAG_NO_IORDY	= (1 << 16), /* controller lacks iordy */
 	ATA_FLAG_ACPI_SATA	= (1 << 17), /* need native SATA ACPI layout */
@@ -386,6 +387,7 @@ enum {
 	ATA_HORKAGE_FIRMWARE_WARN = (1 << 12),	/* firmware update warning */
 	ATA_HORKAGE_1_5_GBPS	= (1 << 13),	/* force 1.5 Gbps */
 	ATA_HORKAGE_NOSETXFER	= (1 << 14),	/* skip SETXFER, SATA only */
+	ATA_HORKAGE_BROKEN_FPDMA_AA	= (1 << 15),	/* skip AA */
 
 	 /* DMA mask for user DMA control: User visible values; DO NOT
 	    renumber */
-- 
cgit v1.2.3


From 6521148c6449724c3b707820b9c535c7e8b8afcd Mon Sep 17 00:00:00 2001
From: Robert Hancock <hancockrwd@gmail.com>
Date: Tue, 14 Jul 2009 20:43:39 -0600
Subject: libata: add command name parsing for error output

This patch improve libata's output for error/notification messages
to allow easier comprehension and debugging:

When ATAPI commands issued through the SCSI layer fail, use SCSI
functions to print the CDB in human-readable form instead of just
dumping out the CDB in hex.

Print out the name of the failed command (as defined by the ATA
specification) in error handling output along with the raw register
contents.

When reporting status of ACPI taskfile commands executed on resume,
also output the names of the commands being executed (or not) in
readable form.

Since the extra data for printing command names increases kernel
size slightly, a config option has been added to allow disabling
command name output (as well as some of the error register parsing)
for those highly sensitive to kernel text size.

Signed-off-by: Robert Hancock <hancockrwd@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig       |  11 ++++
 drivers/ata/libata-acpi.c |   7 ++-
 drivers/ata/libata-eh.c   | 128 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/ata/libata.h      |   1 +
 include/linux/ata.h       |  32 +++++++++++-
 5 files changed, 174 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index b17c57f85032..8e64d3c81d53 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -26,6 +26,17 @@ config ATA_NONSTANDARD
        bool
        default n
 
+config ATA_VERBOSE_ERROR
+	bool "Verbose ATA error reporting"
+	default y
+	help
+	  This option adds parsing of ATA command descriptions and error bits
+	  in libata kernel output, making it easier to interpret.
+	  This option will enlarge the kernel by approx. 6KB. Disable it only
+	  if kernel size is more important than ease of debugging.
+
+	  If unsure, say Y.
+
 config ATA_ACPI
 	bool "ATA ACPI Support"
 	depends on ACPI && PCI
diff --git a/drivers/ata/libata-acpi.c b/drivers/ata/libata-acpi.c
index ac176da1f94e..01964b6e6f6b 100644
--- a/drivers/ata/libata-acpi.c
+++ b/drivers/ata/libata-acpi.c
@@ -689,6 +689,7 @@ static int ata_acpi_run_tf(struct ata_device *dev,
 	struct ata_taskfile tf, ptf, rtf;
 	unsigned int err_mask;
 	const char *level;
+	const char *descr;
 	char msg[60];
 	int rc;
 
@@ -736,11 +737,13 @@ static int ata_acpi_run_tf(struct ata_device *dev,
 		snprintf(msg, sizeof(msg), "filtered out");
 		rc = 0;
 	}
+	descr = ata_get_cmd_descript(tf.command);
 
 	ata_dev_printk(dev, level,
-		       "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x %s\n",
+		       "ACPI cmd %02x/%02x:%02x:%02x:%02x:%02x:%02x (%s) %s\n",
 		       tf.command, tf.feature, tf.nsect, tf.lbal,
-		       tf.lbam, tf.lbah, tf.device, msg);
+		       tf.lbam, tf.lbah, tf.device,
+		       (descr ? descr : "unknown"), msg);
 
 	return rc;
 }
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 2c34de841e11..a04488f0de88 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -40,6 +40,7 @@
 #include <scsi/scsi_eh.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_dbg.h>
 #include "../scsi/scsi_transport_api.h"
 
 #include <linux/libata.h>
@@ -2111,6 +2112,116 @@ void ata_eh_autopsy(struct ata_port *ap)
 		ata_eh_link_autopsy(&ap->link);
 }
 
+/**
+ *	ata_get_cmd_descript - get description for ATA command
+ *	@command: ATA command code to get description for
+ *
+ *	Return a textual description of the given command, or NULL if the
+ *	command is not known.
+ *
+ *	LOCKING:
+ *	None
+ */
+const char *ata_get_cmd_descript(u8 command)
+{
+#ifdef CONFIG_ATA_VERBOSE_ERROR
+	static const struct
+	{
+		u8 command;
+		const char *text;
+	} cmd_descr[] = {
+		{ ATA_CMD_DEV_RESET,		"DEVICE RESET" },
+		{ ATA_CMD_CHK_POWER, 		"CHECK POWER MODE" },
+		{ ATA_CMD_STANDBY, 		"STANDBY" },
+		{ ATA_CMD_IDLE, 		"IDLE" },
+		{ ATA_CMD_EDD, 			"EXECUTE DEVICE DIAGNOSTIC" },
+		{ ATA_CMD_DOWNLOAD_MICRO,   	"DOWNLOAD MICROCODE" },
+		{ ATA_CMD_NOP,			"NOP" },
+		{ ATA_CMD_FLUSH, 		"FLUSH CACHE" },
+		{ ATA_CMD_FLUSH_EXT, 		"FLUSH CACHE EXT" },
+		{ ATA_CMD_ID_ATA,  		"IDENTIFY DEVICE" },
+		{ ATA_CMD_ID_ATAPI, 		"IDENTIFY PACKET DEVICE" },
+		{ ATA_CMD_SERVICE, 		"SERVICE" },
+		{ ATA_CMD_READ, 		"READ DMA" },
+		{ ATA_CMD_READ_EXT, 		"READ DMA EXT" },
+		{ ATA_CMD_READ_QUEUED, 		"READ DMA QUEUED" },
+		{ ATA_CMD_READ_STREAM_EXT, 	"READ STREAM EXT" },
+		{ ATA_CMD_READ_STREAM_DMA_EXT,  "READ STREAM DMA EXT" },
+		{ ATA_CMD_WRITE, 		"WRITE DMA" },
+		{ ATA_CMD_WRITE_EXT, 		"WRITE DMA EXT" },
+		{ ATA_CMD_WRITE_QUEUED, 	"WRITE DMA QUEUED EXT" },
+		{ ATA_CMD_WRITE_STREAM_EXT, 	"WRITE STREAM EXT" },
+		{ ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" },
+		{ ATA_CMD_WRITE_FUA_EXT,	"WRITE DMA FUA EXT" },
+		{ ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" },
+		{ ATA_CMD_FPDMA_READ,		"READ FPDMA QUEUED" },
+		{ ATA_CMD_FPDMA_WRITE,		"WRITE FPDMA QUEUED" },
+		{ ATA_CMD_PIO_READ,		"READ SECTOR(S)" },
+		{ ATA_CMD_PIO_READ_EXT,		"READ SECTOR(S) EXT" },
+		{ ATA_CMD_PIO_WRITE,		"WRITE SECTOR(S)" },
+		{ ATA_CMD_PIO_WRITE_EXT,	"WRITE SECTOR(S) EXT" },
+		{ ATA_CMD_READ_MULTI,		"READ MULTIPLE" },
+		{ ATA_CMD_READ_MULTI_EXT,	"READ MULTIPLE EXT" },
+		{ ATA_CMD_WRITE_MULTI,		"WRITE MULTIPLE" },
+		{ ATA_CMD_WRITE_MULTI_EXT,	"WRITE MULTIPLE EXT" },
+		{ ATA_CMD_WRITE_MULTI_FUA_EXT, 	"WRITE MULTIPLE FUA EXT" },
+		{ ATA_CMD_SET_FEATURES,		"SET FEATURES" },
+		{ ATA_CMD_SET_MULTI,		"SET MULTIPLE MODE" },
+		{ ATA_CMD_VERIFY,		"READ VERIFY SECTOR(S)" },
+		{ ATA_CMD_VERIFY_EXT,		"READ VERIFY SECTOR(S) EXT" },
+		{ ATA_CMD_WRITE_UNCORR_EXT,	"WRITE UNCORRECTABLE EXT" },
+		{ ATA_CMD_STANDBYNOW1,		"STANDBY IMMEDIATE" },
+		{ ATA_CMD_IDLEIMMEDIATE,	"IDLE IMMEDIATE" },
+		{ ATA_CMD_SLEEP,		"SLEEP" },
+		{ ATA_CMD_INIT_DEV_PARAMS,	"INITIALIZE DEVICE PARAMETERS" },
+		{ ATA_CMD_READ_NATIVE_MAX,	"READ NATIVE MAX ADDRESS" },
+		{ ATA_CMD_READ_NATIVE_MAX_EXT,	"READ NATIVE MAX ADDRESS EXT" },
+		{ ATA_CMD_SET_MAX,		"SET MAX ADDRESS" },
+		{ ATA_CMD_SET_MAX_EXT,		"SET MAX ADDRESS EXT" },
+		{ ATA_CMD_READ_LOG_EXT,		"READ LOG EXT" },
+		{ ATA_CMD_WRITE_LOG_EXT,	"WRITE LOG EXT" },
+		{ ATA_CMD_READ_LOG_DMA_EXT,	"READ LOG DMA EXT" },
+		{ ATA_CMD_WRITE_LOG_DMA_EXT, 	"WRITE LOG DMA EXT" },
+		{ ATA_CMD_TRUSTED_RCV,		"TRUSTED RECEIVE" },
+		{ ATA_CMD_TRUSTED_RCV_DMA, 	"TRUSTED RECEIVE DMA" },
+		{ ATA_CMD_TRUSTED_SND,		"TRUSTED SEND" },
+		{ ATA_CMD_TRUSTED_SND_DMA, 	"TRUSTED SEND DMA" },
+		{ ATA_CMD_PMP_READ,		"READ BUFFER" },
+		{ ATA_CMD_PMP_WRITE,		"WRITE BUFFER" },
+		{ ATA_CMD_CONF_OVERLAY,		"DEVICE CONFIGURATION OVERLAY" },
+		{ ATA_CMD_SEC_SET_PASS,		"SECURITY SET PASSWORD" },
+		{ ATA_CMD_SEC_UNLOCK,		"SECURITY UNLOCK" },
+		{ ATA_CMD_SEC_ERASE_PREP,	"SECURITY ERASE PREPARE" },
+		{ ATA_CMD_SEC_ERASE_UNIT,	"SECURITY ERASE UNIT" },
+		{ ATA_CMD_SEC_FREEZE_LOCK,	"SECURITY FREEZE LOCK" },
+		{ ATA_CMD_SEC_DISABLE_PASS,	"SECURITY DISABLE PASSWORD" },
+		{ ATA_CMD_CONFIG_STREAM,	"CONFIGURE STREAM" },
+		{ ATA_CMD_SMART,		"SMART" },
+		{ ATA_CMD_MEDIA_LOCK,		"DOOR LOCK" },
+		{ ATA_CMD_MEDIA_UNLOCK,		"DOOR UNLOCK" },
+		{ ATA_CMD_CHK_MED_CRD_TYP, 	"CHECK MEDIA CARD TYPE" },
+		{ ATA_CMD_CFA_REQ_EXT_ERR, 	"CFA REQUEST EXTENDED ERROR" },
+		{ ATA_CMD_CFA_WRITE_NE,		"CFA WRITE SECTORS WITHOUT ERASE" },
+		{ ATA_CMD_CFA_TRANS_SECT,	"CFA TRANSLATE SECTOR" },
+		{ ATA_CMD_CFA_ERASE,		"CFA ERASE SECTORS" },
+		{ ATA_CMD_CFA_WRITE_MULT_NE, 	"CFA WRITE MULTIPLE WITHOUT ERASE" },
+		{ ATA_CMD_READ_LONG,		"READ LONG (with retries)" },
+		{ ATA_CMD_READ_LONG_ONCE,	"READ LONG (without retries)" },
+		{ ATA_CMD_WRITE_LONG,		"WRITE LONG (with retries)" },
+		{ ATA_CMD_WRITE_LONG_ONCE,	"WRITE LONG (without retries)" },
+		{ ATA_CMD_RESTORE,		"RECALIBRATE" },
+		{ 0,				NULL } /* terminate list */
+	};
+
+	unsigned int i;
+	for (i = 0; cmd_descr[i].text; i++)
+		if (cmd_descr[i].command == command)
+			return cmd_descr[i].text;
+#endif
+
+	return NULL;
+}
+
 /**
  *	ata_eh_link_report - report error handling to user
  *	@link: ATA link EH is going on
@@ -2177,6 +2288,7 @@ static void ata_eh_link_report(struct ata_link *link)
 			ata_link_printk(link, KERN_ERR, "%s\n", desc);
 	}
 
+#ifdef CONFIG_ATA_VERBOSE_ERROR
 	if (ehc->i.serror)
 		ata_link_printk(link, KERN_ERR,
 		  "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n",
@@ -2197,6 +2309,7 @@ static void ata_eh_link_report(struct ata_link *link)
 		  ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "",
 		  ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "",
 		  ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : "");
+#endif
 
 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
@@ -2228,14 +2341,23 @@ static void ata_eh_link_report(struct ata_link *link)
 				 dma_str[qc->dma_dir]);
 		}
 
-		if (ata_is_atapi(qc->tf.protocol))
-			snprintf(cdb_buf, sizeof(cdb_buf),
+		if (ata_is_atapi(qc->tf.protocol)) {
+			if (qc->scsicmd)
+				scsi_print_command(qc->scsicmd);
+			else
+				snprintf(cdb_buf, sizeof(cdb_buf),
 				 "cdb %02x %02x %02x %02x %02x %02x %02x %02x  "
 				 "%02x %02x %02x %02x %02x %02x %02x %02x\n         ",
 				 cdb[0], cdb[1], cdb[2], cdb[3],
 				 cdb[4], cdb[5], cdb[6], cdb[7],
 				 cdb[8], cdb[9], cdb[10], cdb[11],
 				 cdb[12], cdb[13], cdb[14], cdb[15]);
+		} else {
+			const char *descr = ata_get_cmd_descript(cmd->command);
+			if (descr)
+				ata_dev_printk(qc->dev, KERN_ERR,
+					"failed command: %s\n", descr);
+		}
 
 		ata_dev_printk(qc->dev, KERN_ERR,
 			"cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
@@ -2254,6 +2376,7 @@ static void ata_eh_link_report(struct ata_link *link)
 			res->device, qc->err_mask, ata_err_string(qc->err_mask),
 			qc->err_mask & AC_ERR_NCQ ? " <F>" : "");
 
+#ifdef CONFIG_ATA_VERBOSE_ERROR
 		if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ |
 				    ATA_ERR)) {
 			if (res->command & ATA_BUSY)
@@ -2277,6 +2400,7 @@ static void ata_eh_link_report(struct ata_link *link)
 			  res->feature & ATA_UNC ? "UNC " : "",
 			  res->feature & ATA_IDNF ? "IDNF " : "",
 			  res->feature & ATA_ABORTED ? "ABRT " : "");
+#endif
 	}
 }
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 89a1e0018e71..be8e2628f82c 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -164,6 +164,7 @@ extern void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev,
 extern void ata_eh_done(struct ata_link *link, struct ata_device *dev,
 			unsigned int action);
 extern void ata_eh_autopsy(struct ata_port *ap);
+const char *ata_get_cmd_descript(u8 command);
 extern void ata_eh_report(struct ata_port *ap);
 extern int ata_eh_reset(struct ata_link *link, int classify,
 			ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
diff --git a/include/linux/ata.h b/include/linux/ata.h
index f5494050df83..6299a259ed19 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -210,15 +210,25 @@ enum {
 	ATA_CMD_STANDBY		= 0xE2, /* place in standby power mode */
 	ATA_CMD_IDLE		= 0xE3, /* place in idle power mode */
 	ATA_CMD_EDD		= 0x90,	/* execute device diagnostic */
+	ATA_CMD_DOWNLOAD_MICRO  = 0x92,
+	ATA_CMD_NOP		= 0x00,
 	ATA_CMD_FLUSH		= 0xE7,
 	ATA_CMD_FLUSH_EXT	= 0xEA,
 	ATA_CMD_ID_ATA		= 0xEC,
 	ATA_CMD_ID_ATAPI	= 0xA1,
+	ATA_CMD_SERVICE		= 0xA2,
 	ATA_CMD_READ		= 0xC8,
 	ATA_CMD_READ_EXT	= 0x25,
+	ATA_CMD_READ_QUEUED	= 0x26,
+	ATA_CMD_READ_STREAM_EXT	= 0x2B,
+	ATA_CMD_READ_STREAM_DMA_EXT = 0x2A,
 	ATA_CMD_WRITE		= 0xCA,
 	ATA_CMD_WRITE_EXT	= 0x35,
+	ATA_CMD_WRITE_QUEUED	= 0x36,
+	ATA_CMD_WRITE_STREAM_EXT = 0x3B,
+	ATA_CMD_WRITE_STREAM_DMA_EXT = 0x3A,
 	ATA_CMD_WRITE_FUA_EXT	= 0x3D,
+	ATA_CMD_WRITE_QUEUED_FUA_EXT = 0x3E,
 	ATA_CMD_FPDMA_READ	= 0x60,
 	ATA_CMD_FPDMA_WRITE	= 0x61,
 	ATA_CMD_PIO_READ	= 0x20,
@@ -235,6 +245,7 @@ enum {
 	ATA_CMD_PACKET		= 0xA0,
 	ATA_CMD_VERIFY		= 0x40,
 	ATA_CMD_VERIFY_EXT	= 0x42,
+	ATA_CMD_WRITE_UNCORR_EXT = 0x45,
 	ATA_CMD_STANDBYNOW1	= 0xE0,
 	ATA_CMD_IDLEIMMEDIATE	= 0xE1,
 	ATA_CMD_SLEEP		= 0xE6,
@@ -243,15 +254,34 @@ enum {
 	ATA_CMD_READ_NATIVE_MAX_EXT = 0x27,
 	ATA_CMD_SET_MAX		= 0xF9,
 	ATA_CMD_SET_MAX_EXT	= 0x37,
-	ATA_CMD_READ_LOG_EXT	= 0x2f,
+	ATA_CMD_READ_LOG_EXT	= 0x2F,
+	ATA_CMD_WRITE_LOG_EXT	= 0x3F,
+	ATA_CMD_READ_LOG_DMA_EXT = 0x47,
+	ATA_CMD_WRITE_LOG_DMA_EXT = 0x57,
+	ATA_CMD_TRUSTED_RCV	= 0x5C,
+	ATA_CMD_TRUSTED_RCV_DMA = 0x5D,
+	ATA_CMD_TRUSTED_SND	= 0x5E,
+	ATA_CMD_TRUSTED_SND_DMA = 0x5F,
 	ATA_CMD_PMP_READ	= 0xE4,
 	ATA_CMD_PMP_WRITE	= 0xE8,
 	ATA_CMD_CONF_OVERLAY	= 0xB1,
+	ATA_CMD_SEC_SET_PASS	= 0xF1,
+	ATA_CMD_SEC_UNLOCK	= 0xF2,
+	ATA_CMD_SEC_ERASE_PREP	= 0xF3,
+	ATA_CMD_SEC_ERASE_UNIT	= 0xF4,
 	ATA_CMD_SEC_FREEZE_LOCK	= 0xF5,
+	ATA_CMD_SEC_DISABLE_PASS = 0xF6,
+	ATA_CMD_CONFIG_STREAM	= 0x51,
 	ATA_CMD_SMART		= 0xB0,
 	ATA_CMD_MEDIA_LOCK	= 0xDE,
 	ATA_CMD_MEDIA_UNLOCK	= 0xDF,
 	ATA_CMD_DSM		= 0x06,
+	ATA_CMD_CHK_MED_CRD_TYP = 0xD1,
+	ATA_CMD_CFA_REQ_EXT_ERR = 0x03,
+	ATA_CMD_CFA_WRITE_NE	= 0x38,
+	ATA_CMD_CFA_TRANS_SECT	= 0x87,
+	ATA_CMD_CFA_ERASE	= 0xC0,
+	ATA_CMD_CFA_WRITE_MULT_NE = 0xCD,
 	/* marked obsolete in the ATA/ATAPI-7 spec */
 	ATA_CMD_RESTORE		= 0x10,
 
-- 
cgit v1.2.3


From 051d9fbdd1d1ec85ea18ba20581234cf23f1c217 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Jul 2009 11:46:12 +0900
Subject: libata: remove spindown skipping and warning

This was a hack to give userland shutdown tools time to drop manual
spindown.  All popular distros updated quite some time ago and the due
is well passed.  Drop it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/feature-removal-schedule.txt | 18 -----------
 drivers/ata/libata-scsi.c                  | 51 ------------------------------
 include/linux/libata.h                     |  1 -
 3 files changed, 70 deletions(-)

(limited to 'include')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 09e031c55887..b3b62903f08c 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -206,24 +206,6 @@ Who:	Len Brown <len.brown@intel.com>
 
 ---------------------------
 
-What: libata spindown skipping and warning
-When: Dec 2008
-Why:  Some halt(8) implementations synchronize caches for and spin
-      down libata disks because libata didn't use to spin down disk on
-      system halt (only synchronized caches).
-      Spin down on system halt is now implemented.  sysfs node
-      /sys/class/scsi_disk/h:c:i:l/manage_start_stop is present if
-      spin down support is available.
-      Because issuing spin down command to an already spun down disk
-      makes some disks spin up just to spin down again, libata tracks
-      device spindown status to skip the extra spindown command and
-      warn about it.
-      This is to give userspace tools the time to get updated and will
-      be removed after userspace is reasonably updated.
-Who:  Tejun Heo <htejun@gmail.com>
-
----------------------------
-
 What:	i386/x86_64 bzImage symlinks
 When:	April 2010
 
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index d0dfeef55db5..de3a0050760a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1257,23 +1257,6 @@ int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
 	return queue_depth;
 }
 
-/* XXX: for spindown warning */
-static void ata_delayed_done_timerfn(unsigned long arg)
-{
-	struct scsi_cmnd *scmd = (void *)arg;
-
-	scmd->scsi_done(scmd);
-}
-
-/* XXX: for spindown warning */
-static void ata_delayed_done(struct scsi_cmnd *scmd)
-{
-	static struct timer_list timer;
-
-	setup_timer(&timer, ata_delayed_done_timerfn, (unsigned long)scmd);
-	mod_timer(&timer, jiffies + 5 * HZ);
-}
-
 /**
  *	ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command
  *	@qc: Storage for translated ATA taskfile
@@ -1338,32 +1321,6 @@ static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc)
 		     system_entering_hibernation())
 			goto skip;
 
-		/* XXX: This is for backward compatibility, will be
-		 * removed.  Read Documentation/feature-removal-schedule.txt
-		 * for more info.
-		 */
-		if ((qc->dev->flags & ATA_DFLAG_SPUNDOWN) &&
-		    (system_state == SYSTEM_HALT ||
-		     system_state == SYSTEM_POWER_OFF)) {
-			static unsigned long warned;
-
-			if (!test_and_set_bit(0, &warned)) {
-				ata_dev_printk(qc->dev, KERN_WARNING,
-					"DISK MIGHT NOT BE SPUN DOWN PROPERLY. "
-					"UPDATE SHUTDOWN UTILITY\n");
-				ata_dev_printk(qc->dev, KERN_WARNING,
-					"For more info, visit "
-					"http://linux-ata.org/shutdown.html\n");
-
-				/* ->scsi_done is not used, use it for
-				 * delayed completion.
-				 */
-				scmd->scsi_done = qc->scsidone;
-				qc->scsidone = ata_delayed_done;
-			}
-			goto skip;
-		}
-
 		/* Issue ATA STANDBY IMMEDIATE command */
 		tf->command = ATA_CMD_STANDBYNOW1;
 	}
@@ -1764,14 +1721,6 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 		}
 	}
 
-	/* XXX: track spindown state for spindown skipping and warning */
-	if (unlikely(qc->tf.command == ATA_CMD_STANDBY ||
-		     qc->tf.command == ATA_CMD_STANDBYNOW1))
-		qc->dev->flags |= ATA_DFLAG_SPUNDOWN;
-	else if (likely(system_state != SYSTEM_HALT &&
-			system_state != SYSTEM_POWER_OFF))
-		qc->dev->flags &= ~ATA_DFLAG_SPUNDOWN;
-
 	if (need_sense && !ap->ops->error_handler)
 		ata_dump_status(ap->print_id, &qc->result_tf);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d3f7cab4873e..76319bf03e37 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -143,7 +143,6 @@ enum {
 
 	ATA_DFLAG_PIO		= (1 << 12), /* device limited to PIO mode */
 	ATA_DFLAG_NCQ_OFF	= (1 << 13), /* device limited to non-NCQ mode */
-	ATA_DFLAG_SPUNDOWN	= (1 << 14), /* XXX: for spindown_compat */
 	ATA_DFLAG_SLEEPING	= (1 << 15), /* device is sleeping */
 	ATA_DFLAG_DUBIOUS_XFER	= (1 << 16), /* data transfer not verified */
 	ATA_DFLAG_NO_UNLOAD	= (1 << 17), /* device doesn't support unload */
-- 
cgit v1.2.3


From 86393e52c3f1e2f6be18383f6ecdbcdc5727d545 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 29 Aug 2009 01:34:49 +0000
Subject: netns: embed ip6_dst_ops directly

struct net::ipv6.ip6_dst_ops is separatedly dynamically allocated,
but there is no fundamental reason for it. Embed it directly into
struct netns_ipv6.

For that:
* move struct dst_ops into separate header to fix circular dependencies
	I honestly tried not to, it's pretty impossible to do other way
* drop dynamical allocation, allocate together with netns

For a change, remove struct dst_ops::dst_net, it's deducible
by using container_of() given dst_ops pointer.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h        | 23 +----------------------
 include/net/dst_ops.h    | 28 ++++++++++++++++++++++++++++
 include/net/netns/ipv6.h |  3 ++-
 net/ipv6/route.c         | 34 +++++++++++++---------------------
 4 files changed, 44 insertions(+), 44 deletions(-)
 create mode 100644 include/net/dst_ops.h

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 7fc409c19b37..5a900ddcf10d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -8,6 +8,7 @@
 #ifndef _NET_DST_H
 #define _NET_DST_H
 
+#include <net/dst_ops.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
@@ -102,28 +103,6 @@ struct dst_entry
 	};
 };
 
-
-struct dst_ops
-{
-	unsigned short		family;
-	__be16			protocol;
-	unsigned		gc_thresh;
-
-	int			(*gc)(struct dst_ops *ops);
-	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
-	void			(*destroy)(struct dst_entry *);
-	void			(*ifdown)(struct dst_entry *,
-					  struct net_device *dev, int how);
-	struct dst_entry *	(*negative_advice)(struct dst_entry *);
-	void			(*link_failure)(struct sk_buff *);
-	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
-	int			(*local_out)(struct sk_buff *skb);
-
-	atomic_t		entries;
-	struct kmem_cache 		*kmem_cachep;
-	struct net              *dst_net;
-};
-
 #ifdef __KERNEL__
 
 static inline u32
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
new file mode 100644
index 000000000000..d1ff9b7e99b8
--- /dev/null
+++ b/include/net/dst_ops.h
@@ -0,0 +1,28 @@
+#ifndef _NET_DST_OPS_H
+#define _NET_DST_OPS_H
+#include <linux/types.h>
+
+struct dst_entry;
+struct kmem_cachep;
+struct net_device;
+struct sk_buff;
+
+struct dst_ops {
+	unsigned short		family;
+	__be16			protocol;
+	unsigned		gc_thresh;
+
+	int			(*gc)(struct dst_ops *ops);
+	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
+	void			(*destroy)(struct dst_entry *);
+	void			(*ifdown)(struct dst_entry *,
+					  struct net_device *dev, int how);
+	struct dst_entry *	(*negative_advice)(struct dst_entry *);
+	void			(*link_failure)(struct sk_buff *);
+	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
+	int			(*local_out)(struct sk_buff *skb);
+
+	atomic_t		entries;
+	struct kmem_cache	*kmem_cachep;
+};
+#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index afab4e4cbac7..dfeb2d7c425b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -6,6 +6,7 @@
 
 #ifndef __NETNS_IPV6_H__
 #define __NETNS_IPV6_H__
+#include <net/dst_ops.h>
 
 struct ctl_table_header;
 
@@ -42,7 +43,7 @@ struct netns_ipv6 {
 	struct timer_list       ip6_fib_timer;
 	struct hlist_head       *fib_table_hash;
 	struct fib6_table       *fib6_main_tbl;
-	struct dst_ops		*ip6_dst_ops;
+	struct dst_ops		ip6_dst_ops;
 	unsigned int		 ip6_rt_gc_expire;
 	unsigned long		 ip6_rt_last_gc;
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1473ee0a1f51..9ccfef345560 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -665,7 +665,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad
 				net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
 				net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
 
-				ip6_dst_gc(net->ipv6.ip6_dst_ops);
+				ip6_dst_gc(&net->ipv6.ip6_dst_ops);
 
 				net->ipv6.sysctl.ip6_rt_gc_elasticity =
 					saved_rt_elasticity;
@@ -970,7 +970,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	if (unlikely(idev == NULL))
 		return NULL;
 
-	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 	if (unlikely(rt == NULL)) {
 		in6_dev_put(idev);
 		goto out;
@@ -1060,7 +1060,7 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
 static int ip6_dst_gc(struct dst_ops *ops)
 {
 	unsigned long now = jiffies;
-	struct net *net = ops->dst_net;
+	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
 	int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
@@ -1154,7 +1154,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		goto out;
 	}
 
-	rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt == NULL) {
 		err = -ENOMEM;
@@ -1643,7 +1643,7 @@ out:
 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 {
 	struct net *net = dev_net(ort->rt6i_dev);
-	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 
 	if (rt) {
 		rt->u.dst.input = ort->u.dst.input;
@@ -1923,7 +1923,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 				    int anycast)
 {
 	struct net *net = dev_net(idev->dev);
-	struct rt6_info *rt = ip6_dst_alloc(net->ipv6.ip6_dst_ops);
+	struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
 	struct neighbour *neigh;
 
 	if (rt == NULL)
@@ -2501,7 +2501,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 		   net->ipv6.rt6_stats->fib_rt_alloc,
 		   net->ipv6.rt6_stats->fib_rt_entries,
 		   net->ipv6.rt6_stats->fib_rt_cache,
-		   atomic_read(&net->ipv6.ip6_dst_ops->entries),
+		   atomic_read(&net->ipv6.ip6_dst_ops.entries),
 		   net->ipv6.rt6_stats->fib_discarded_routes);
 
 	return 0;
@@ -2637,7 +2637,7 @@ struct ctl_table *ipv6_route_sysctl_init(struct net *net)
 
 	if (table) {
 		table[0].data = &net->ipv6.sysctl.flush_delay;
-		table[1].data = &net->ipv6.ip6_dst_ops->gc_thresh;
+		table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
 		table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
 		table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 		table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
@@ -2655,12 +2655,8 @@ static int ip6_route_net_init(struct net *net)
 {
 	int ret = -ENOMEM;
 
-	net->ipv6.ip6_dst_ops = kmemdup(&ip6_dst_ops_template,
-					sizeof(*net->ipv6.ip6_dst_ops),
-					GFP_KERNEL);
-	if (!net->ipv6.ip6_dst_ops)
-		goto out;
-	net->ipv6.ip6_dst_ops->dst_net = hold_net(net);
+	memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
+	       sizeof(net->ipv6.ip6_dst_ops));
 
 	net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
 					   sizeof(*net->ipv6.ip6_null_entry),
@@ -2669,7 +2665,7 @@ static int ip6_route_net_init(struct net *net)
 		goto out_ip6_dst_ops;
 	net->ipv6.ip6_null_entry->u.dst.path =
 		(struct dst_entry *)net->ipv6.ip6_null_entry;
-	net->ipv6.ip6_null_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2679,7 +2675,7 @@ static int ip6_route_net_init(struct net *net)
 		goto out_ip6_null_entry;
 	net->ipv6.ip6_prohibit_entry->u.dst.path =
 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
-	net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
 
 	net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
 					       sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2688,7 +2684,7 @@ static int ip6_route_net_init(struct net *net)
 		goto out_ip6_prohibit_entry;
 	net->ipv6.ip6_blk_hole_entry->u.dst.path =
 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
-	net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops;
+	net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
 #endif
 
 	net->ipv6.sysctl.flush_delay = 0;
@@ -2717,8 +2713,6 @@ out_ip6_null_entry:
 	kfree(net->ipv6.ip6_null_entry);
 #endif
 out_ip6_dst_ops:
-	release_net(net->ipv6.ip6_dst_ops->dst_net);
-	kfree(net->ipv6.ip6_dst_ops);
 	goto out;
 }
 
@@ -2733,8 +2727,6 @@ static void ip6_route_net_exit(struct net *net)
 	kfree(net->ipv6.ip6_prohibit_entry);
 	kfree(net->ipv6.ip6_blk_hole_entry);
 #endif
-	release_net(net->ipv6.ip6_dst_ops->dst_net);
-	kfree(net->ipv6.ip6_dst_ops);
 }
 
 static struct pernet_operations ip6_route_net_ops = {
-- 
cgit v1.2.3


From 5152fc7de3ae31b46692022ea63ce0501280f5b1 Mon Sep 17 00:00:00 2001
From: Damian Lukowski <damian@tvk.rwth-aachen.de>
Date: Tue, 1 Sep 2009 10:24:00 +0000
Subject: RTO connection timeout: coding style fixes and comments

This patch affects the retransmits_timed_out() function.

Changes:
1) Variables have more meaningful names
2) retransmits_timed_out() has an introductionary comment.
3) Small coding style changes.

Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index e5319495f15e..df50bc40b5fd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1252,22 +1252,27 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu
 #define tcp_for_write_queue_from_safe(skb, tmp, sk)			\
 	skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
 
+/* This function calculates a "timeout" which is equivalent to the timeout of a
+ * TCP connection after "boundary" unsucessful, exponentially backed-off
+ * retransmissions with an initial RTO of TCP_RTO_MIN.
+ */
 static inline bool retransmits_timed_out(const struct sock *sk,
 					 unsigned int boundary)
 {
-	int limit, K;
+	unsigned int timeout, linear_backoff_thresh;
+
 	if (!inet_csk(sk)->icsk_retransmits)
 		return false;
 
-	K = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
+	linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
 
-	if (boundary <= K)
-		limit = ((2 << boundary) - 1) * TCP_RTO_MIN;
+	if (boundary <= linear_backoff_thresh)
+		timeout = ((2 << boundary) - 1) * TCP_RTO_MIN;
 	else
-		limit = ((2 << K) - 1) * TCP_RTO_MIN +
-			(boundary - K) * TCP_RTO_MAX;
+		timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN +
+			  (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
 
-	return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= limit;
+	return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= timeout;
 }
 
 static inline struct sk_buff *tcp_send_head(struct sock *sk)
-- 
cgit v1.2.3


From 89d69d2b75a8f7e258f4b634cd985374cfd3202e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 1 Sep 2009 11:13:19 +0000
Subject: net: make neigh_ops constant

These tables are never modified at runtime. Move to read-only
section.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/arp.h       | 2 +-
 include/net/neighbour.h | 2 +-
 net/atm/clip.c          | 2 +-
 net/decnet/dn_neigh.c   | 6 +++---
 net/ipv4/arp.c          | 8 ++++----
 net/ipv6/ndisc.c        | 6 +++---
 6 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/arp.h b/include/net/arp.h
index c236270ec95e..716f43c5c98e 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -26,6 +26,6 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
 				  const unsigned char *target_hw);
 extern void arp_xmit(struct sk_buff *skb);
 
-extern struct neigh_ops arp_broken_ops;
+extern const struct neigh_ops arp_broken_ops;
 
 #endif	/* _ARP_H */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 18b69b6cecaf..3817fda82a80 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -118,7 +118,7 @@ struct neighbour
 	int			(*output)(struct sk_buff *skb);
 	struct sk_buff_head	arp_queue;
 	struct timer_list	timer;
-	struct neigh_ops	*ops;
+	const struct neigh_ops	*ops;
 	u8			primary_key[0];
 };
 
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 27f6852ce190..64629c354343 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -267,7 +267,7 @@ static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
-static struct neigh_ops clip_neigh_ops = {
+static const struct neigh_ops clip_neigh_ops = {
 	.family =		AF_INET,
 	.solicit =		clip_neigh_solicit,
 	.error_report =		clip_neigh_error,
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 923786bd6d01..794b5bf95af1 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -59,7 +59,7 @@ static int dn_phase3_output(struct sk_buff *);
 /*
  * For talking to broadcast devices: Ethernet & PPP
  */
-static struct neigh_ops dn_long_ops = {
+static const struct neigh_ops dn_long_ops = {
 	.family =		AF_DECnet,
 	.error_report =		dn_long_error_report,
 	.output =		dn_long_output,
@@ -71,7 +71,7 @@ static struct neigh_ops dn_long_ops = {
 /*
  * For talking to pointopoint and multidrop devices: DDCMP and X.25
  */
-static struct neigh_ops dn_short_ops = {
+static const struct neigh_ops dn_short_ops = {
 	.family =		AF_DECnet,
 	.error_report =		dn_short_error_report,
 	.output =		dn_short_output,
@@ -83,7 +83,7 @@ static struct neigh_ops dn_short_ops = {
 /*
  * For talking to DECnet phase III nodes
  */
-static struct neigh_ops dn_phase3_ops = {
+static const struct neigh_ops dn_phase3_ops = {
 	.family =		AF_DECnet,
 	.error_report =		dn_short_error_report, /* Can use short version here */
 	.output =		dn_phase3_output,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 090e9991ac2a..4e80f336c0cf 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -130,7 +130,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
 static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
 static void parp_redo(struct sk_buff *skb);
 
-static struct neigh_ops arp_generic_ops = {
+static const struct neigh_ops arp_generic_ops = {
 	.family =		AF_INET,
 	.solicit =		arp_solicit,
 	.error_report =		arp_error_report,
@@ -140,7 +140,7 @@ static struct neigh_ops arp_generic_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-static struct neigh_ops arp_hh_ops = {
+static const struct neigh_ops arp_hh_ops = {
 	.family =		AF_INET,
 	.solicit =		arp_solicit,
 	.error_report =		arp_error_report,
@@ -150,7 +150,7 @@ static struct neigh_ops arp_hh_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-static struct neigh_ops arp_direct_ops = {
+static const struct neigh_ops arp_direct_ops = {
 	.family =		AF_INET,
 	.output =		dev_queue_xmit,
 	.connected_output =	dev_queue_xmit,
@@ -158,7 +158,7 @@ static struct neigh_ops arp_direct_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-struct neigh_ops arp_broken_ops = {
+const struct neigh_ops arp_broken_ops = {
 	.family =		AF_INET,
 	.solicit =		arp_solicit,
 	.error_report =		arp_error_report,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 44b4c87e5cce..7015478797f6 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -98,7 +98,7 @@ static int pndisc_constructor(struct pneigh_entry *n);
 static void pndisc_destructor(struct pneigh_entry *n);
 static void pndisc_redo(struct sk_buff *skb);
 
-static struct neigh_ops ndisc_generic_ops = {
+static const struct neigh_ops ndisc_generic_ops = {
 	.family =		AF_INET6,
 	.solicit =		ndisc_solicit,
 	.error_report =		ndisc_error_report,
@@ -108,7 +108,7 @@ static struct neigh_ops ndisc_generic_ops = {
 	.queue_xmit =		dev_queue_xmit,
 };
 
-static struct neigh_ops ndisc_hh_ops = {
+static const struct neigh_ops ndisc_hh_ops = {
 	.family =		AF_INET6,
 	.solicit =		ndisc_solicit,
 	.error_report =		ndisc_error_report,
@@ -119,7 +119,7 @@ static struct neigh_ops ndisc_hh_ops = {
 };
 
 
-static struct neigh_ops ndisc_direct_ops = {
+static const struct neigh_ops ndisc_direct_ops = {
 	.family =		AF_INET6,
 	.output =		dev_queue_xmit,
 	.connected_output =	dev_queue_xmit,
-- 
cgit v1.2.3


From 6d703a81ad5fdd102334751ddacb053ecc6ff046 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 1 Sep 2009 17:52:57 -0700
Subject: ide: convert to ->proc_fops

->read_proc, ->write_proc are going away, ->proc_fops should be used instead.

The only tricky place is IDENTIFY handling: if for some reason
taskfile_lib_get_identify() fails, buffer _is_ changed and at least
first byte is overwritten. Emulate old behaviour with returning
that first byte to userspace and reporting length=1 despite overall -E.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ide/ide-cd.c          |  28 +++-
 drivers/ide/ide-disk_proc.c   | 129 +++++++++++------
 drivers/ide/ide-floppy_proc.c |  30 ++--
 drivers/ide/ide-proc.c        | 330 +++++++++++++++++++++++++++---------------
 drivers/ide/ide-tape.c        |  31 ++--
 include/linux/ide.h           |  24 +--
 6 files changed, 365 insertions(+), 207 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index ad0ab0c0a493..b79ca419d8d9 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -30,6 +30,7 @@
 #include <linux/kernel.h>
 #include <linux/delay.h>
 #include <linux/timer.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
@@ -1389,19 +1390,30 @@ static sector_t ide_cdrom_capacity(ide_drive_t *drive)
 	return capacity * sectors_per_frame;
 }
 
-static int proc_idecd_read_capacity(char *page, char **start, off_t off,
-					int count, int *eof, void *data)
+static int idecd_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t *drive = data;
-	int len;
+	ide_drive_t *drive = m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_cdrom_capacity(drive));
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)ide_cdrom_capacity(drive));
+	return 0;
+}
+
+static int idecd_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idecd_capacity_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations idecd_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idecd_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t idecd_proc[] = {
-	{ "capacity", S_IFREG|S_IRUGO, proc_idecd_read_capacity, NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity", S_IFREG|S_IRUGO, &idecd_capacity_proc_fops },
+	{}
 };
 
 static ide_proc_entry_t *ide_cd_proc_entries(ide_drive_t *drive)
diff --git a/drivers/ide/ide-disk_proc.c b/drivers/ide/ide-disk_proc.c
index 19f263bf0a9e..60b0590ccc9c 100644
--- a/drivers/ide/ide-disk_proc.c
+++ b/drivers/ide/ide-disk_proc.c
@@ -1,5 +1,6 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
+#include <linux/seq_file.h>
 
 #include "ide-disk.h"
 
@@ -37,77 +38,117 @@ static int get_smart_data(ide_drive_t *drive, u8 *buf, u8 sub_cmd)
 	return ide_raw_taskfile(drive, &cmd, buf, 1);
 }
 
-static int proc_idedisk_read_cache
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idedisk_cache_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 
 	if (drive->dev_flags & IDE_DFLAG_ID_READ)
-		len = sprintf(out, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
+		seq_printf(m, "%i\n", drive->id[ATA_ID_BUF_SIZE] / 2);
 	else
-		len = sprintf(out, "(none)\n");
+		seq_printf(m, "(none)\n");
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_cache_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_cache_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_capacity
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations idedisk_cache_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_cache_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int idedisk_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t*drive = (ide_drive_t *)data;
-	int len;
+	ide_drive_t*drive = (ide_drive_t *)m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
+	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_capacity_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_smart(char *page, char **start, off_t off,
-				   int count, int *eof, void *data, u8 sub_cmd)
+static const struct file_operations idedisk_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __idedisk_proc_show(struct seq_file *m, ide_drive_t *drive, u8 sub_cmd)
 {
-	ide_drive_t	*drive = (ide_drive_t *)data;
-	int		len = 0, i = 0;
+	u8 *buf;
+
+	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 
 	(void)smart_enable(drive);
 
-	if (get_smart_data(drive, page, sub_cmd) == 0) {
-		unsigned short *val = (unsigned short *) page;
-		char *out = (char *)val + SECTOR_SIZE;
-
-		page = out;
-		do {
-			out += sprintf(out, "%04x%c", le16_to_cpu(*val),
-				       (++i & 7) ? ' ' : '\n');
-			val += 1;
-		} while (i < SECTOR_SIZE / 2);
-		len = out - page;
+	if (get_smart_data(drive, buf, sub_cmd) == 0) {
+		__le16 *val = (__le16 *)buf;
+		int i;
+
+		for (i = 0; i < SECTOR_SIZE / 2; i++) {
+			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
+					(i % 8) == 7 ? '\n' : ' ');
+		}
 	}
+	kfree(buf);
+	return 0;
+}
 
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int idedisk_sv_proc_show(struct seq_file *m, void *v)
+{
+	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_VALUES);
 }
 
-static int proc_idedisk_read_sv
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idedisk_sv_proc_open(struct inode *inode, struct file *file)
 {
-	return proc_idedisk_read_smart(page, start, off, count, eof, data,
-				       ATA_SMART_READ_VALUES);
+	return single_open(file, idedisk_sv_proc_show, PDE(inode)->data);
 }
 
-static int proc_idedisk_read_st
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations idedisk_sv_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_sv_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int idedisk_st_proc_show(struct seq_file *m, void *v)
 {
-	return proc_idedisk_read_smart(page, start, off, count, eof, data,
-				       ATA_SMART_READ_THRESHOLDS);
+	return __idedisk_proc_show(m, m->private, ATA_SMART_READ_THRESHOLDS);
 }
 
+static int idedisk_st_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idedisk_st_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations idedisk_st_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idedisk_st_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 ide_proc_entry_t ide_disk_proc[] = {
-	{ "cache",	  S_IFREG|S_IRUGO, proc_idedisk_read_cache,    NULL },
-	{ "capacity",	  S_IFREG|S_IRUGO, proc_idedisk_read_capacity, NULL },
-	{ "geometry",	  S_IFREG|S_IRUGO, proc_ide_read_geometry,     NULL },
-	{ "smart_values", S_IFREG|S_IRUSR, proc_idedisk_read_sv,       NULL },
-	{ "smart_thresholds", S_IFREG|S_IRUSR, proc_idedisk_read_st,   NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "cache",	  S_IFREG|S_IRUGO, &idedisk_cache_proc_fops	},
+	{ "capacity",	  S_IFREG|S_IRUGO, &idedisk_capacity_proc_fops	},
+	{ "geometry",	  S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
+	{ "smart_values", S_IFREG|S_IRUSR, &idedisk_sv_proc_fops	},
+	{ "smart_thresholds", S_IFREG|S_IRUSR, &idedisk_st_proc_fops	},
+	{}
 };
 
 ide_devset_rw_field(bios_cyl, bios_cyl);
diff --git a/drivers/ide/ide-floppy_proc.c b/drivers/ide/ide-floppy_proc.c
index fcd4d8153df5..d711d9b883de 100644
--- a/drivers/ide/ide-floppy_proc.c
+++ b/drivers/ide/ide-floppy_proc.c
@@ -1,22 +1,34 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
+#include <linux/seq_file.h>
 
 #include "ide-floppy.h"
 
-static int proc_idefloppy_read_capacity(char *page, char **start, off_t off,
-		int count, int *eof, void *data)
+static int idefloppy_capacity_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t*drive = (ide_drive_t *)data;
-	int len;
+	ide_drive_t*drive = (ide_drive_t *)m->private;
 
-	len = sprintf(page, "%llu\n", (long long)ide_gd_capacity(drive));
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)ide_gd_capacity(drive));
+	return 0;
 }
 
+static int idefloppy_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idefloppy_capacity_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations idefloppy_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idefloppy_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 ide_proc_entry_t ide_floppy_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO, proc_idefloppy_read_capacity,	NULL },
-	{ "geometry",	S_IFREG|S_IRUGO, proc_ide_read_geometry,	NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity",	S_IFREG|S_IRUGO, &idefloppy_capacity_proc_fops	},
+	{ "geometry",	S_IFREG|S_IRUGO, &ide_geometry_proc_fops	},
+	{}
 };
 
 ide_devset_rw_field(bios_cyl, bios_cyl);
diff --git a/drivers/ide/ide-proc.c b/drivers/ide/ide-proc.c
index 021de41655e6..28d09a5d8450 100644
--- a/drivers/ide/ide-proc.c
+++ b/drivers/ide/ide-proc.c
@@ -30,11 +30,9 @@
 
 static struct proc_dir_entry *proc_ide_root;
 
-static int proc_ide_read_imodel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_imodel_proc_show(struct seq_file *m, void *v)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 	const char	*name;
 
 	switch (hwif->chipset) {
@@ -53,63 +51,108 @@ static int proc_ide_read_imodel
 	case ide_acorn:		name = "acorn";		break;
 	default:		name = "(unknown)";	break;
 	}
-	len = sprintf(page, "%s\n", name);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%s\n", name);
+	return 0;
 }
 
-static int proc_ide_read_mate
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_imodel_proc_open(struct inode *inode, struct file *file)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	return single_open(file, ide_imodel_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ide_imodel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_imodel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_mate_proc_show(struct seq_file *m, void *v)
+{
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 
 	if (hwif && hwif->mate)
-		len = sprintf(page, "%s\n", hwif->mate->name);
+		seq_printf(m, "%s\n", hwif->mate->name);
 	else
-		len = sprintf(page, "(none)\n");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+		seq_printf(m, "(none)\n");
+	return 0;
+}
+
+static int ide_mate_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_mate_proc_show, PDE(inode)->data);
 }
 
-static int proc_ide_read_channel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_mate_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_mate_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_channel_proc_show(struct seq_file *m, void *v)
 {
-	ide_hwif_t	*hwif = (ide_hwif_t *) data;
-	int		len;
+	ide_hwif_t	*hwif = (ide_hwif_t *) m->private;
 
-	page[0] = hwif->channel ? '1' : '0';
-	page[1] = '\n';
-	len = 2;
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%c\n", hwif->channel ? '1' : '0');
+	return 0;
 }
 
-static int proc_ide_read_identify
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_channel_proc_open(struct inode *inode, struct file *file)
 {
-	ide_drive_t	*drive = (ide_drive_t *)data;
-	int		len = 0, i = 0;
-	int		err = 0;
+	return single_open(file, ide_channel_proc_show, PDE(inode)->data);
+}
 
-	len = sprintf(page, "\n");
+static const struct file_operations ide_channel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_channel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
-	if (drive) {
-		__le16 *val = (__le16 *)page;
+static int ide_identify_proc_show(struct seq_file *m, void *v)
+{
+	ide_drive_t *drive = (ide_drive_t *)m->private;
+	u8 *buf;
 
-		err = taskfile_lib_get_identify(drive, page);
-		if (!err) {
-			char *out = (char *)page + SECTOR_SIZE;
+	if (!drive) {
+		seq_putc(m, '\n');
+		return 0;
+	}
 
-			page = out;
-			do {
-				out += sprintf(out, "%04x%c",
-					le16_to_cpup(val), (++i & 7) ? ' ' : '\n');
-				val += 1;
-			} while (i < SECTOR_SIZE / 2);
-			len = out - page;
+	buf = kmalloc(SECTOR_SIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+	if (taskfile_lib_get_identify(drive, buf) == 0) {
+		__le16 *val = (__le16 *)buf;
+		int i;
+
+		for (i = 0; i < SECTOR_SIZE / 2; i++) {
+			seq_printf(m, "%04x%c", le16_to_cpu(val[i]),
+					(i % 8) == 7 ? '\n' : ' ');
 		}
-	}
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	} else
+		seq_putc(m, buf[0]);
+	kfree(buf);
+	return 0;
+}
+
+static int ide_identify_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_identify_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations ide_identify_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_identify_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /**
  *	ide_find_setting	-	find a specific setting
  *	@st: setting table pointer
@@ -240,22 +283,20 @@ static void proc_ide_settings_warn(void)
 	warned = 1;
 }
 
-static int proc_ide_read_settings
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_settings_proc_show(struct seq_file *m, void *v)
 {
 	const struct ide_proc_devset *setting, *g, *d;
 	const struct ide_devset *ds;
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len, rc, mul_factor, div_factor;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
+	int		rc, mul_factor, div_factor;
 
 	proc_ide_settings_warn();
 
 	mutex_lock(&ide_setting_mtx);
 	g = ide_generic_settings;
 	d = drive->settings;
-	out += sprintf(out, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
-	out += sprintf(out, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
+	seq_printf(m, "name\t\t\tvalue\t\tmin\t\tmax\t\tmode\n");
+	seq_printf(m, "----\t\t\t-----\t\t---\t\t---\t\t----\n");
 	while (g->name || (d && d->name)) {
 		/* read settings in the alphabetical order */
 		if (g->name && d && d->name) {
@@ -269,31 +310,35 @@ static int proc_ide_read_settings
 			setting = g++;
 		mul_factor = setting->mulf ? setting->mulf(drive) : 1;
 		div_factor = setting->divf ? setting->divf(drive) : 1;
-		out += sprintf(out, "%-24s", setting->name);
+		seq_printf(m, "%-24s", setting->name);
 		rc = ide_read_setting(drive, setting);
 		if (rc >= 0)
-			out += sprintf(out, "%-16d", rc * mul_factor / div_factor);
+			seq_printf(m, "%-16d", rc * mul_factor / div_factor);
 		else
-			out += sprintf(out, "%-16s", "write-only");
-		out += sprintf(out, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
+			seq_printf(m, "%-16s", "write-only");
+		seq_printf(m, "%-16d%-16d", (setting->min * mul_factor + div_factor - 1) / div_factor, setting->max * mul_factor / div_factor);
 		ds = setting->setting;
 		if (ds->get)
-			out += sprintf(out, "r");
+			seq_printf(m, "r");
 		if (ds->set)
-			out += sprintf(out, "w");
-		out += sprintf(out, "\n");
+			seq_printf(m, "w");
+		seq_printf(m, "\n");
 	}
-	len = out - page;
 	mutex_unlock(&ide_setting_mtx);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	return 0;
+}
+
+static int ide_settings_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_settings_proc_show, PDE(inode)->data);
 }
 
 #define MAX_LEN	30
 
-static int proc_ide_write_settings(struct file *file, const char __user *buffer,
-				   unsigned long count, void *data)
+static ssize_t ide_settings_proc_write(struct file *file, const char __user *buffer,
+				       size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
 	char		name[MAX_LEN + 1];
 	int		for_real = 0, mul_factor, div_factor;
 	unsigned long	n;
@@ -388,63 +433,104 @@ static int proc_ide_write_settings(struct file *file, const char __user *buffer,
 	return count;
 parse_error:
 	free_page((unsigned long)buf);
-	printk("proc_ide_write_settings(): parse error\n");
+	printk("%s(): parse error\n", __func__);
 	return -EINVAL;
 }
 
-int proc_ide_read_capacity
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_settings_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_settings_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= ide_settings_proc_write,
+};
+
+static int ide_capacity_proc_show(struct seq_file *m, void *v)
 {
-	int len = sprintf(page, "%llu\n", (long long)0x7fffffff);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%llu\n", (long long)0x7fffffff);
+	return 0;
 }
 
-EXPORT_SYMBOL_GPL(proc_ide_read_capacity);
+static int ide_capacity_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_capacity_proc_show, NULL);
+}
 
-int proc_ide_read_geometry
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+const struct file_operations ide_capacity_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_capacity_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL_GPL(ide_capacity_proc_fops);
+
+static int ide_geometry_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
-	char		*out = page;
-	int		len;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 
-	out += sprintf(out, "physical     %d/%d/%d\n",
+	seq_printf(m, "physical     %d/%d/%d\n",
 			drive->cyl, drive->head, drive->sect);
-	out += sprintf(out, "logical      %d/%d/%d\n",
+	seq_printf(m, "logical      %d/%d/%d\n",
 			drive->bios_cyl, drive->bios_head, drive->bios_sect);
+	return 0;
+}
 
-	len = out - page;
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+static int ide_geometry_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_geometry_proc_show, PDE(inode)->data);
 }
 
-EXPORT_SYMBOL(proc_ide_read_geometry);
+const struct file_operations ide_geometry_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_geometry_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(ide_geometry_proc_fops);
 
-static int proc_ide_read_dmodel
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int ide_dmodel_proc_show(struct seq_file *seq, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) seq->private;
 	char		*m = (char *)&drive->id[ATA_ID_PROD];
-	int		len;
 
-	len = sprintf(page, "%.40s\n", m[0] ? m : "(none)");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(seq, "%.40s\n", m[0] ? m : "(none)");
+	return 0;
+}
+
+static int ide_dmodel_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_dmodel_proc_show, PDE(inode)->data);
 }
 
-static int proc_ide_read_driver
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_dmodel_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_dmodel_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int ide_driver_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t		*drive = (ide_drive_t *)data;
+	ide_drive_t		*drive = (ide_drive_t *)m->private;
 	struct device		*dev = &drive->gendev;
 	struct ide_driver	*ide_drv;
-	int			len;
 
 	if (dev->driver) {
 		ide_drv = to_ide_driver(dev->driver);
-		len = sprintf(page, "%s version %s\n",
+		seq_printf(m, "%s version %s\n",
 				dev->driver->name, ide_drv->version);
 	} else
-		len = sprintf(page, "ide-default version 0.9.newide\n");
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+		seq_printf(m, "ide-default version 0.9.newide\n");
+	return 0;
+}
+
+static int ide_driver_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_driver_proc_show, PDE(inode)->data);
 }
 
 static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
@@ -474,10 +560,10 @@ static int ide_replace_subdriver(ide_drive_t *drive, const char *driver)
 	return ret;
 }
 
-static int proc_ide_write_driver
-	(struct file *file, const char __user *buffer, unsigned long count, void *data)
+static ssize_t ide_driver_proc_write(struct file *file, const char __user *buffer,
+				     size_t count, loff_t *pos)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) PDE(file->f_path.dentry->d_inode)->data;
 	char name[32];
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -492,12 +578,19 @@ static int proc_ide_write_driver
 	return count;
 }
 
-static int proc_ide_read_media
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static const struct file_operations ide_driver_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_driver_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= ide_driver_proc_write,
+};
+
+static int ide_media_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 	const char	*media;
-	int		len;
 
 	switch (drive->media) {
 	case ide_disk:		media = "disk\n";	break;
@@ -507,20 +600,30 @@ static int proc_ide_read_media
 	case ide_optical:	media = "optical\n";	break;
 	default:		media = "UNKNOWN\n";	break;
 	}
-	strcpy(page, media);
-	len = strlen(media);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_puts(m, media);
+	return 0;
+}
+
+static int ide_media_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ide_media_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations ide_media_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ide_media_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t generic_drive_entries[] = {
-	{ "driver",	S_IFREG|S_IRUGO,	 proc_ide_read_driver,
-						 proc_ide_write_driver },
-	{ "identify",	S_IFREG|S_IRUSR,	 proc_ide_read_identify, NULL },
-	{ "media",	S_IFREG|S_IRUGO,	 proc_ide_read_media,    NULL },
-	{ "model",	S_IFREG|S_IRUGO,	 proc_ide_read_dmodel,   NULL },
-	{ "settings",	S_IFREG|S_IRUSR|S_IWUSR, proc_ide_read_settings,
-						 proc_ide_write_settings },
-	{ NULL,	0, NULL, NULL }
+	{ "driver",	S_IFREG|S_IRUGO,	 &ide_driver_proc_fops	},
+	{ "identify",	S_IFREG|S_IRUSR,	 &ide_identify_proc_fops},
+	{ "media",	S_IFREG|S_IRUGO,	 &ide_media_proc_fops	},
+	{ "model",	S_IFREG|S_IRUGO,	 &ide_dmodel_proc_fops	},
+	{ "settings",	S_IFREG|S_IRUSR|S_IWUSR, &ide_settings_proc_fops},
+	{}
 };
 
 static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p, void *data)
@@ -530,11 +633,8 @@ static void ide_add_proc_entries(struct proc_dir_entry *dir, ide_proc_entry_t *p
 	if (!dir || !p)
 		return;
 	while (p->name != NULL) {
-		ent = create_proc_entry(p->name, p->mode, dir);
+		ent = proc_create_data(p->name, p->mode, dir, p->proc_fops, data);
 		if (!ent) return;
-		ent->data = data;
-		ent->read_proc = p->read_proc;
-		ent->write_proc = p->write_proc;
 		p++;
 	}
 }
@@ -617,10 +717,10 @@ void ide_proc_unregister_device(ide_drive_t *drive)
 }
 
 static ide_proc_entry_t hwif_entries[] = {
-	{ "channel",	S_IFREG|S_IRUGO,	proc_ide_read_channel,	NULL },
-	{ "mate",	S_IFREG|S_IRUGO,	proc_ide_read_mate,	NULL },
-	{ "model",	S_IFREG|S_IRUGO,	proc_ide_read_imodel,	NULL },
-	{ NULL,	0, NULL, NULL }
+	{ "channel",	S_IFREG|S_IRUGO,	&ide_channel_proc_fops	},
+	{ "mate",	S_IFREG|S_IRUGO,	&ide_mate_proc_fops	},
+	{ "model",	S_IFREG|S_IRUGO,	&ide_imodel_proc_fops	},
+	{}
 };
 
 void ide_proc_register_port(ide_hwif_t *hwif)
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 7b2032bc357b..9d6f62baac27 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -31,6 +31,7 @@
 #include <linux/major.h>
 #include <linux/errno.h>
 #include <linux/genhd.h>
+#include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/ide.h>
@@ -1816,22 +1817,32 @@ static void ide_tape_release(struct device *dev)
 }
 
 #ifdef CONFIG_IDE_PROC_FS
-static int proc_idetape_read_name
-	(char *page, char **start, off_t off, int count, int *eof, void *data)
+static int idetape_name_proc_show(struct seq_file *m, void *v)
 {
-	ide_drive_t	*drive = (ide_drive_t *) data;
+	ide_drive_t	*drive = (ide_drive_t *) m->private;
 	idetape_tape_t	*tape = drive->driver_data;
-	char		*out = page;
-	int		len;
 
-	len = sprintf(out, "%s\n", tape->name);
-	PROC_IDE_READ_RETURN(page, start, off, count, eof, len);
+	seq_printf(m, "%s\n", tape->name);
+	return 0;
+}
+
+static int idetape_name_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, idetape_name_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations idetape_name_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= idetape_name_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static ide_proc_entry_t idetape_proc[] = {
-	{ "capacity",	S_IFREG|S_IRUGO,	proc_ide_read_capacity, NULL },
-	{ "name",	S_IFREG|S_IRUGO,	proc_idetape_read_name,	NULL },
-	{ NULL, 0, NULL, NULL }
+	{ "capacity",	S_IFREG|S_IRUGO,	&ide_capacity_proc_fops	},
+	{ "name",	S_IFREG|S_IRUGO,	&idetape_name_proc_fops	},
+	{}
 };
 
 static ide_proc_entry_t *ide_tape_proc_entries(ide_drive_t *drive)
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 803c1ae31237..e4135d6e0556 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -919,8 +919,7 @@ __IDE_PROC_DEVSET(_name, _min, _max, NULL, NULL)
 typedef struct {
 	const char	*name;
 	mode_t		mode;
-	read_proc_t	*read_proc;
-	write_proc_t	*write_proc;
+	const struct file_operations *proc_fops;
 } ide_proc_entry_t;
 
 void proc_ide_create(void);
@@ -932,24 +931,8 @@ void ide_proc_unregister_port(ide_hwif_t *);
 void ide_proc_register_driver(ide_drive_t *, struct ide_driver *);
 void ide_proc_unregister_driver(ide_drive_t *, struct ide_driver *);
 
-read_proc_t proc_ide_read_capacity;
-read_proc_t proc_ide_read_geometry;
-
-/*
- * Standard exit stuff:
- */
-#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) \
-{					\
-	len -= off;			\
-	if (len < count) {		\
-		*eof = 1;		\
-		if (len <= 0)		\
-			return 0;	\
-	} else				\
-		len = count;		\
-	*start = page + off;		\
-	return len;			\
-}
+extern const struct file_operations ide_capacity_proc_fops;
+extern const struct file_operations ide_geometry_proc_fops;
 #else
 static inline void proc_ide_create(void) { ; }
 static inline void proc_ide_destroy(void) { ; }
@@ -961,7 +944,6 @@ static inline void ide_proc_register_driver(ide_drive_t *drive,
 					    struct ide_driver *driver) { ; }
 static inline void ide_proc_unregister_driver(ide_drive_t *drive,
 					      struct ide_driver *driver) { ; }
-#define PROC_IDE_READ_RETURN(page,start,off,count,eof,len) return 0;
 #endif
 
 enum {
-- 
cgit v1.2.3


From 2fbd3da3877ad8d923b055e5996f80b4d4a6daf4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 1 Sep 2009 17:59:25 -0700
Subject: pkt_sched: Revert tasklet_hrtimer changes.

These are full of unresolved problems, mainly that conversions don't
work 1-1 from hrtimers to tasklet_hrtimers because unlike hrtimers
tasklets can't be killed from softirq context.

And when a qdisc gets reset, that's exactly what we need to do here.

We'll work this out in the net-next-2.6 tree and if warranted we'll
backport that work to -stable.

This reverts the following 3 changesets:

a2cb6a4dd470d7a64255a10b843b0d188416b78f
("pkt_sched: Fix bogon in tasklet_hrtimer changes.")

38acce2d7983632100a9ff3fd20295f6e34074a8
("pkt_sched: Convert CBQ to tasklet_hrtimer.")

ee5f9757ea17759e1ce5503bdae2b07e48e32af9
("pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer")

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h |  4 ++--
 net/sched/sch_api.c     | 10 +++++-----
 net/sched/sch_cbq.c     | 25 +++++++++++--------------
 3 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7eafb8d54470..82a3191375f5 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound)
 }
 
 struct qdisc_watchdog {
-	struct tasklet_hrtimer	timer;
-	struct Qdisc		*qdisc;
+	struct hrtimer	timer;
+	struct Qdisc	*qdisc;
 };
 
 extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 92e6f3a52c13..24d17ce9c294 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -458,7 +458,7 @@ EXPORT_SYMBOL(qdisc_warn_nonwc);
 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 {
 	struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
-						 timer.timer);
+						 timer);
 
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 	__netif_schedule(qdisc_root(wd->qdisc));
@@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
 
 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
 {
-	tasklet_hrtimer_init(&wd->timer, qdisc_watchdog,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	wd->timer.function = qdisc_watchdog;
 	wd->qdisc = qdisc;
 }
 EXPORT_SYMBOL(qdisc_watchdog_init);
@@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
 	wd->qdisc->flags |= TCQ_F_THROTTLED;
 	time = ktime_set(0, 0);
 	time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
-	tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
+	hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
 }
 EXPORT_SYMBOL(qdisc_watchdog_schedule);
 
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
-	tasklet_hrtimer_cancel(&wd->timer);
+	hrtimer_cancel(&wd->timer);
 	wd->qdisc->flags &= ~TCQ_F_THROTTLED;
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 149b0405c5ec..d5798e17a832 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -163,7 +163,7 @@ struct cbq_sched_data
 	psched_time_t		now_rt;		/* Cached real time */
 	unsigned		pmask;
 
-	struct tasklet_hrtimer	delay_timer;
+	struct hrtimer		delay_timer;
 	struct qdisc_watchdog	watchdog;	/* Watchdog timer,
 						   started when CBQ has
 						   backlog, but cannot
@@ -503,8 +503,6 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 		cl->undertime = q->now + delay;
 
 		if (delay > 0) {
-			struct hrtimer *ht;
-
 			sched += delay + cl->penalty;
 			cl->penalized = sched;
 			cl->cpriority = TC_CBQ_MAXPRIO;
@@ -512,12 +510,12 @@ static void cbq_ovl_delay(struct cbq_class *cl)
 
 			expires = ktime_set(0, 0);
 			expires = ktime_add_ns(expires, PSCHED_TICKS2NS(sched));
-			ht = &q->delay_timer.timer;
-			if (hrtimer_try_to_cancel(ht) &&
-			    ktime_to_ns(ktime_sub(hrtimer_get_expires(ht),
-						  expires)) > 0)
-				hrtimer_set_expires(ht, expires);
-			hrtimer_restart(ht);
+			if (hrtimer_try_to_cancel(&q->delay_timer) &&
+			    ktime_to_ns(ktime_sub(
+					hrtimer_get_expires(&q->delay_timer),
+					expires)) > 0)
+				hrtimer_set_expires(&q->delay_timer, expires);
+			hrtimer_restart(&q->delay_timer);
 			cl->delayed = 1;
 			cl->xstats.overactions++;
 			return;
@@ -593,7 +591,7 @@ static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 {
 	struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
-						delay_timer.timer);
+						delay_timer);
 	struct Qdisc *sch = q->watchdog.qdisc;
 	psched_time_t now;
 	psched_tdiff_t delay = 0;
@@ -623,7 +621,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
 
 		time = ktime_set(0, 0);
 		time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
-		tasklet_hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
+		hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
 	}
 
 	sch->flags &= ~TCQ_F_THROTTLED;
@@ -1216,7 +1214,7 @@ cbq_reset(struct Qdisc* sch)
 	q->tx_class = NULL;
 	q->tx_borrowed = NULL;
 	qdisc_watchdog_cancel(&q->watchdog);
-	tasklet_hrtimer_cancel(&q->delay_timer);
+	hrtimer_cancel(&q->delay_timer);
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
 	q->now_rt = q->now;
@@ -1399,8 +1397,7 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->link.minidle = -0x7FFFFFFF;
 
 	qdisc_watchdog_init(&q->watchdog, sch);
-	tasklet_hrtimer_init(&q->delay_timer, cbq_undelay,
-			     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	q->delay_timer.function = cbq_undelay;
 	q->toplevel = TC_CBQ_MAXLEVEL;
 	q->now = psched_get_time();
-- 
cgit v1.2.3


From 69575d388603365f2afbf4166df93152df59b165 Mon Sep 17 00:00:00 2001
From: Shane Wang <shane.wang@intel.com>
Date: Tue, 1 Sep 2009 18:25:07 -0700
Subject: x86, intel_txt: clean up the impact on generic code, unbreak non-x86

Move tboot.h from asm to linux to fix the build errors of intel_txt
patch on non-X86 platforms. Remove the tboot code from generic code
init/main.c and kernel/cpu.c.

Signed-off-by: Shane Wang <shane.wang@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/Kconfig              |   4 +
 arch/x86/include/asm/tboot.h  | 197 ------------------------------------------
 arch/x86/kernel/reboot.c      |   3 +-
 arch/x86/kernel/setup.c       |   3 +-
 arch/x86/kernel/smpboot.c     |   2 +-
 arch/x86/kernel/tboot.c       |  58 ++++++++++---
 drivers/acpi/acpica/hwsleep.c |   2 +-
 drivers/pci/dmar.c            |   2 +-
 drivers/pci/intel-iommu.c     |   2 +-
 include/linux/tboot.h         | 162 ++++++++++++++++++++++++++++++++++
 init/main.c                   |   3 -
 kernel/cpu.c                  |   6 +-
 security/Kconfig              |   2 +-
 13 files changed, 221 insertions(+), 225 deletions(-)
 delete mode 100644 arch/x86/include/asm/tboot.h
 create mode 100644 include/linux/tboot.h

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..b66f2102c35d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -178,6 +178,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_INTEL_TXT
+	def_bool y
+	depends on EXPERIMENTAL && DMAR && ACPI
+
 # Use the generic interrupt handling code in kernel/irq/:
 config GENERIC_HARDIRQS
 	bool
diff --git a/arch/x86/include/asm/tboot.h b/arch/x86/include/asm/tboot.h
deleted file mode 100644
index b13929d4e5f4..000000000000
--- a/arch/x86/include/asm/tboot.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * tboot.h: shared data structure with tboot and kernel and functions
- *          used by kernel for runtime support of Intel(R) Trusted
- *          Execution Technology
- *
- * Copyright (c) 2006-2009, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- */
-
-#ifndef _ASM_TBOOT_H
-#define _ASM_TBOOT_H
-
-#include <acpi/acpi.h>
-
-/* these must have the values from 0-5 in this order */
-enum {
-	TB_SHUTDOWN_REBOOT = 0,
-	TB_SHUTDOWN_S5,
-	TB_SHUTDOWN_S4,
-	TB_SHUTDOWN_S3,
-	TB_SHUTDOWN_HALT,
-	TB_SHUTDOWN_WFS
-};
-
-#ifdef CONFIG_INTEL_TXT
-
-/* used to communicate between tboot and the launched kernel */
-
-#define TB_KEY_SIZE             64   /* 512 bits */
-
-#define MAX_TB_MAC_REGIONS      32
-
-struct tboot_mac_region {
-	u64  start;         /* must be 64 byte -aligned */
-	u32  size;          /* must be 64 byte -granular */
-} __packed;
-
-/* GAS - Generic Address Structure (ACPI 2.0+) */
-struct tboot_acpi_generic_address {
-	u8  space_id;
-	u8  bit_width;
-	u8  bit_offset;
-	u8  access_width;
-	u64 address;
-} __packed;
-
-/*
- * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
- * (http://www.acpi.info/)
- */
-struct tboot_acpi_sleep_info {
-	struct tboot_acpi_generic_address pm1a_cnt_blk;
-	struct tboot_acpi_generic_address pm1b_cnt_blk;
-	struct tboot_acpi_generic_address pm1a_evt_blk;
-	struct tboot_acpi_generic_address pm1b_evt_blk;
-	u16 pm1a_cnt_val;
-	u16 pm1b_cnt_val;
-	u64 wakeup_vector;
-	u32 vector_width;
-	u64 kernel_s3_resume_vector;
-} __packed;
-
-/*
- * shared memory page used for communication between tboot and kernel
- */
-struct tboot {
-	/*
-	 * version 3+ fields:
-	 */
-
-	/* TBOOT_UUID */
-	u8 uuid[16];
-
-	/* version number: 5 is current */
-	u32 version;
-
-	/* physical addr of tb_log_t log */
-	u32 log_addr;
-
-	/*
-	 * physical addr of entry point for tboot shutdown and
-	 * type of shutdown (TB_SHUTDOWN_*) being requested
-	 */
-	u32 shutdown_entry;
-	u32 shutdown_type;
-
-	/* kernel-specified ACPI info for Sx shutdown */
-	struct tboot_acpi_sleep_info acpi_sinfo;
-
-	/* tboot location in memory (physical) */
-	u32 tboot_base;
-	u32 tboot_size;
-
-	/* memory regions (phys addrs) for tboot to MAC on S3 */
-	u8 num_mac_regions;
-	struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
-
-
-	/*
-	 * version 4+ fields:
-	 */
-
-	/* symmetric key for use by kernel; will be encrypted on S3 */
-	u8 s3_key[TB_KEY_SIZE];
-
-
-	/*
-	 * version 5+ fields:
-	 */
-
-	/* used to 4byte-align num_in_wfs */
-	u8 reserved_align[3];
-
-	/* number of processors in wait-for-SIPI */
-	u32 num_in_wfs;
-} __packed;
-
-/*
- * UUID for tboot data struct to facilitate matching
- * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
- * represented as {} in the char array used here
- */
-#define TBOOT_UUID	{0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
-			 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
-
-extern struct tboot *tboot;
-
-static inline int tboot_enabled(void)
-{
-	return tboot != NULL;
-}
-
-extern void tboot_probe(void);
-extern void tboot_create_trampoline(void);
-extern void tboot_shutdown(u32 shutdown_type);
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
-extern int tboot_wait_for_aps(int num_aps);
-extern struct acpi_table_header *tboot_get_dmar_table(
-				      struct acpi_table_header *dmar_tbl);
-extern int tboot_force_iommu(void);
-
-#else     /* CONFIG_INTEL_TXT */
-
-static inline int tboot_enabled(void)
-{
-	return 0;
-}
-
-static inline void tboot_probe(void)
-{
-}
-
-static inline void tboot_create_trampoline(void)
-{
-}
-
-static inline void tboot_shutdown(u32 shutdown_type)
-{
-}
-
-static inline void tboot_sleep(u8 sleep_state, u32 pm1a_control,
-			       u32 pm1b_control)
-{
-}
-
-static inline int tboot_wait_for_aps(int num_aps)
-{
-	return 0;
-}
-
-static inline struct acpi_table_header *tboot_get_dmar_table(
-					struct acpi_table_header *dmar_tbl)
-{
-	return dmar_tbl;
-}
-
-static inline int tboot_force_iommu(void)
-{
-	return 0;
-}
-
-#endif /* !CONFIG_INTEL_TXT */
-
-#endif /* _ASM_TBOOT_H */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9de01c5d9794..18ce5c04242a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -3,6 +3,7 @@
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/efi.h>
+#include <linux/tboot.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -24,8 +25,6 @@
 # include <asm/iommu.h>
 #endif
 
-#include <asm/tboot.h>
-
 /*
  * Power off function, if any
  */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 80d6e9e32483..6ce0d6f38f7f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -66,6 +66,7 @@
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
@@ -145,8 +146,6 @@ struct boot_params __initdata boot_params;
 struct boot_params boot_params;
 #endif
 
-#include <asm/tboot.h>
-
 /*
  * Machine setup..
  */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 61cc40887c48..7d9d8eea20a0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/bootmem.h>
 #include <linux/err.h>
 #include <linux/nmi.h>
+#include <linux/tboot.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -62,7 +63,6 @@
 #include <asm/vmi.h>
 #include <asm/apic.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/uv/uv.h>
 #include <linux/mc146818rtc.h>
 
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index c2e760ca7b01..86c9f91b48ae 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -22,11 +22,14 @@
 #include <linux/dma_remapping.h>
 #include <linux/init_task.h>
 #include <linux/spinlock.h>
+#include <linux/delay.h>
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/dmar.h>
+#include <linux/cpu.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
+#include <linux/tboot.h>
 
 #include <asm/trampoline.h>
 #include <asm/processor.h>
@@ -36,7 +39,6 @@
 #include <asm/fixmap.h>
 #include <asm/proto.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/e820.h>
 #include <asm/io.h>
 
@@ -154,13 +156,10 @@ static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
 	return 0;
 }
 
-void tboot_create_trampoline(void)
+static void tboot_create_trampoline(void)
 {
 	u32 map_base, map_size;
 
-	if (!tboot_enabled())
-		return;
-
 	/* Create identity map for tboot shutdown code. */
 	map_base = PFN_DOWN(tboot->tboot_base);
 	map_size = PFN_UP(tboot->tboot_size);
@@ -295,21 +294,58 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
 	tboot_shutdown(acpi_shutdown_map[sleep_state]);
 }
 
-int tboot_wait_for_aps(int num_aps)
+static atomic_t ap_wfs_count;
+
+static int tboot_wait_for_aps(int num_aps)
 {
 	unsigned long timeout;
 
+	timeout = AP_WAIT_TIMEOUT*HZ;
+	while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
+	       timeout) {
+		mdelay(1);
+		timeout--;
+	}
+
+	if (timeout)
+		pr_warning("tboot wait for APs timeout\n");
+
+	return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
+}
+
+static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
+			unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_DYING:
+		atomic_inc(&ap_wfs_count);
+		if (num_online_cpus() == 1)
+			if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+				return NOTIFY_BAD;
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block tboot_cpu_notifier __cpuinitdata =
+{
+	.notifier_call = tboot_cpu_callback,
+};
+
+static __init int tboot_late_init(void)
+{
 	if (!tboot_enabled())
 		return 0;
 
-	timeout = jiffies + AP_WAIT_TIMEOUT*HZ;
-	while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
-	       time_before(jiffies, timeout))
-		cpu_relax();
+	tboot_create_trampoline();
 
-	return time_before(jiffies, timeout) ? 0 : 1;
+	atomic_set(&ap_wfs_count, 0);
+	register_hotcpu_notifier(&tboot_cpu_notifier);
+	return 0;
 }
 
+late_initcall(tboot_late_init);
+
 /*
  * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
  */
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index 8c01dd3724e0..cc22f9a585b0 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -45,7 +45,7 @@
 #include <acpi/acpi.h>
 #include "accommon.h"
 #include "actables.h"
-#include <asm/tboot.h>
+#include <linux/tboot.h>
 
 #define _COMPONENT          ACPI_HARDWARE
 ACPI_MODULE_NAME("hwsleep")
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 0cbc5fd26c3c..ab99783dccec 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -33,7 +33,7 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
-#include <asm/tboot.h>
+#include <linux/tboot.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 2dc72a6d7412..833509b53527 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -37,8 +37,8 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/sysdev.h>
+#include <linux/tboot.h>
 #include <asm/cacheflush.h>
-#include <asm/tboot.h>
 #include <asm/iommu.h>
 #include "pci.h"
 
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
new file mode 100644
index 000000000000..bf2a0c748878
--- /dev/null
+++ b/include/linux/tboot.h
@@ -0,0 +1,162 @@
+/*
+ * tboot.h: shared data structure with tboot and kernel and functions
+ *          used by kernel for runtime support of Intel(R) Trusted
+ *          Execution Technology
+ *
+ * Copyright (c) 2006-2009, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#ifndef _LINUX_TBOOT_H
+#define _LINUX_TBOOT_H
+
+/* these must have the values from 0-5 in this order */
+enum {
+	TB_SHUTDOWN_REBOOT = 0,
+	TB_SHUTDOWN_S5,
+	TB_SHUTDOWN_S4,
+	TB_SHUTDOWN_S3,
+	TB_SHUTDOWN_HALT,
+	TB_SHUTDOWN_WFS
+};
+
+#ifdef CONFIG_INTEL_TXT
+#include <acpi/acpi.h>
+/* used to communicate between tboot and the launched kernel */
+
+#define TB_KEY_SIZE             64   /* 512 bits */
+
+#define MAX_TB_MAC_REGIONS      32
+
+struct tboot_mac_region {
+	u64  start;         /* must be 64 byte -aligned */
+	u32  size;          /* must be 64 byte -granular */
+} __packed;
+
+/* GAS - Generic Address Structure (ACPI 2.0+) */
+struct tboot_acpi_generic_address {
+	u8  space_id;
+	u8  bit_width;
+	u8  bit_offset;
+	u8  access_width;
+	u64 address;
+} __packed;
+
+/*
+ * combines Sx info from FADT and FACS tables per ACPI 2.0+ spec
+ * (http://www.acpi.info/)
+ */
+struct tboot_acpi_sleep_info {
+	struct tboot_acpi_generic_address pm1a_cnt_blk;
+	struct tboot_acpi_generic_address pm1b_cnt_blk;
+	struct tboot_acpi_generic_address pm1a_evt_blk;
+	struct tboot_acpi_generic_address pm1b_evt_blk;
+	u16 pm1a_cnt_val;
+	u16 pm1b_cnt_val;
+	u64 wakeup_vector;
+	u32 vector_width;
+	u64 kernel_s3_resume_vector;
+} __packed;
+
+/*
+ * shared memory page used for communication between tboot and kernel
+ */
+struct tboot {
+	/*
+	 * version 3+ fields:
+	 */
+
+	/* TBOOT_UUID */
+	u8 uuid[16];
+
+	/* version number: 5 is current */
+	u32 version;
+
+	/* physical addr of tb_log_t log */
+	u32 log_addr;
+
+	/*
+	 * physical addr of entry point for tboot shutdown and
+	 * type of shutdown (TB_SHUTDOWN_*) being requested
+	 */
+	u32 shutdown_entry;
+	u32 shutdown_type;
+
+	/* kernel-specified ACPI info for Sx shutdown */
+	struct tboot_acpi_sleep_info acpi_sinfo;
+
+	/* tboot location in memory (physical) */
+	u32 tboot_base;
+	u32 tboot_size;
+
+	/* memory regions (phys addrs) for tboot to MAC on S3 */
+	u8 num_mac_regions;
+	struct tboot_mac_region mac_regions[MAX_TB_MAC_REGIONS];
+
+
+	/*
+	 * version 4+ fields:
+	 */
+
+	/* symmetric key for use by kernel; will be encrypted on S3 */
+	u8 s3_key[TB_KEY_SIZE];
+
+
+	/*
+	 * version 5+ fields:
+	 */
+
+	/* used to 4byte-align num_in_wfs */
+	u8 reserved_align[3];
+
+	/* number of processors in wait-for-SIPI */
+	u32 num_in_wfs;
+} __packed;
+
+/*
+ * UUID for tboot data struct to facilitate matching
+ * defined as {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} by tboot, which is
+ * represented as {} in the char array used here
+ */
+#define TBOOT_UUID	{0xff, 0x8d, 0x3c, 0x66, 0xb3, 0xe8, 0x82, 0x4b, 0xbf,\
+			 0xaa, 0x19, 0xea, 0x4d, 0x5, 0x7a, 0x8}
+
+extern struct tboot *tboot;
+
+static inline int tboot_enabled(void)
+{
+	return tboot != NULL;
+}
+
+extern void tboot_probe(void);
+extern void tboot_shutdown(u32 shutdown_type);
+extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
+extern struct acpi_table_header *tboot_get_dmar_table(
+				      struct acpi_table_header *dmar_tbl);
+extern int tboot_force_iommu(void);
+
+#else
+
+#define tboot_probe()			do { } while (0)
+#define tboot_shutdown(shutdown_type)	do { } while (0)
+#define tboot_sleep(sleep_state, pm1a_control, pm1b_control)	\
+					do { } while (0)
+#define tboot_get_dmar_table(dmar_tbl)	(dmar_tbl)
+#define tboot_force_iommu()		0
+
+#endif /* !CONFIG_INTEL_TXT */
+
+#endif /* _LINUX_TBOOT_H */
diff --git a/init/main.c b/init/main.c
index 56ada27c4f47..2c5ade79eb81 100644
--- a/init/main.c
+++ b/init/main.c
@@ -73,7 +73,6 @@
 #include <asm/io.h>
 #include <asm/bugs.h>
 #include <asm/setup.h>
-#include <asm/tboot.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 
@@ -716,8 +715,6 @@ asmlinkage void __init start_kernel(void)
 
 	ftrace_init();
 
-	tboot_create_trampoline();
-
 	/* Do the rest non-__init'ed, we're now alive */
 	rest_init();
 }
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ff071e022a85..67a60076dd7e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -14,7 +14,6 @@
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
 #include <linux/mutex.h>
-#include <asm/tboot.h>
 
 #ifdef CONFIG_SMP
 /* Serializes the updates to cpu_online_mask, cpu_present_mask */
@@ -377,7 +376,7 @@ static cpumask_var_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
 {
-	int cpu, first_cpu, error, num_cpus = 0;
+	int cpu, first_cpu, error;
 
 	error = stop_machine_create();
 	if (error)
@@ -392,7 +391,6 @@ int disable_nonboot_cpus(void)
 	for_each_online_cpu(cpu) {
 		if (cpu == first_cpu)
 			continue;
-		num_cpus++;
 		error = _cpu_down(cpu, 1);
 		if (!error) {
 			cpumask_set_cpu(cpu, frozen_cpus);
@@ -403,8 +401,6 @@ int disable_nonboot_cpus(void)
 			break;
 		}
 	}
-	/* ensure all CPUs have gone into wait-for-SIPI */
-	error |= tboot_wait_for_aps(num_cpus);
 
 	if (!error) {
 		BUG_ON(num_online_cpus() > 1);
diff --git a/security/Kconfig b/security/Kconfig
index 6631774672c1..5721847a7a62 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -115,7 +115,7 @@ config SECURITY_ROOTPLUG
 
 config INTEL_TXT
 	bool "Enable Intel(R) Trusted Execution Technology (Intel(R) TXT)"
-	depends on EXPERIMENTAL && X86 && DMAR && ACPI
+	depends on HAVE_INTEL_TXT
 	help
 	  This option enables support for booting the kernel with the
 	  Trusted Boot (tboot) module. This will utilize
-- 
cgit v1.2.3


From a649637c73a36174287a403cdda7607177d64523 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:01 -0400
Subject: nfsd41: bound forechannel drc size by memory usage

By using the requested ca_maxresponsesize_cached * ca_maxresponses to bound
a forechannel drc request size, clients can tailor a session to usage.

For example, an I/O session (READ/WRITE only) can have a much smaller
ca_maxresponsesize_cached (for only WRITE compound responses) and a lot larger
ca_maxresponses to service a large in-flight data window.

Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 66 +++++++++++++++++++++++++++++++++-------------
 include/linux/nfsd/state.h |  8 ++++--
 2 files changed, 54 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b44a2cfde6f1..02b3ddd0bee3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -414,34 +414,64 @@ gen_sessionid(struct nfsd4_session *ses)
 }
 
 /*
- * Give the client the number of slots it requests bound by
- * NFSD_MAX_SLOTS_PER_SESSION and by nfsd_drc_max_mem.
+ * The protocol defines ca_maxresponssize_cached to include the size of
+ * the rpc header, but all we need to cache is the data starting after
+ * the end of the initial SEQUENCE operation--the rest we regenerate
+ * each time.  Therefore we can advertise a ca_maxresponssize_cached
+ * value that is the number of bytes in our cache plus a few additional
+ * bytes.  In order to stay on the safe side, and not promise more than
+ * we can cache, those additional bytes must be the minimum possible: 24
+ * bytes of rpc header (xid through accept state, with AUTH_NULL
+ * verifier), 12 for the compound header (with zero-length tag), and 44
+ * for the SEQUENCE op response:
+ */
+#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
+
+/*
+ * Give the client the number of ca_maxresponsesize_cached slots it
+ * requests, of size bounded by NFSD_SLOT_CACHE_SIZE,
+ * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more
+ * than NFSD_MAX_SLOTS_PER_SESSION.
  *
- * If we run out of reserved DRC memory we should (up to a point) re-negotiate
- * active sessions and reduce their slot usage to make rooom for new
- * connections. For now we just fail the create session.
+ * If we run out of reserved DRC memory we should (up to a point)
+ * re-negotiate active sessions and reduce their slot usage to make
+ * rooom for new connections. For now we just fail the create session.
  */
-static int set_forechannel_maxreqs(struct nfsd4_channel_attrs *fchan)
+static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
 {
-	int mem;
+	int mem, size = fchan->maxresp_cached;
 
 	if (fchan->maxreqs < 1)
 		return nfserr_inval;
-	else if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
-		fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
 
-	mem = fchan->maxreqs * NFSD_SLOT_CACHE_SIZE;
+	if (size < NFSD_MIN_HDR_SEQ_SZ)
+		size = NFSD_MIN_HDR_SEQ_SZ;
+	size -= NFSD_MIN_HDR_SEQ_SZ;
+	if (size > NFSD_SLOT_CACHE_SIZE)
+		size = NFSD_SLOT_CACHE_SIZE;
+
+	/* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */
+	mem = fchan->maxreqs * size;
+	if (mem > NFSD_MAX_MEM_PER_SESSION) {
+		fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size;
+		if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION)
+			fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION;
+		mem = fchan->maxreqs * size;
+	}
 
 	spin_lock(&nfsd_drc_lock);
-	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem)
-		mem = ((nfsd_drc_max_mem - nfsd_drc_mem_used) /
-				NFSD_SLOT_CACHE_SIZE) * NFSD_SLOT_CACHE_SIZE;
+	/* bound the total session drc memory ussage */
+	if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) {
+		fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size;
+		mem = fchan->maxreqs * size;
+	}
 	nfsd_drc_mem_used += mem;
 	spin_unlock(&nfsd_drc_lock);
 
-	fchan->maxreqs = mem / NFSD_SLOT_CACHE_SIZE;
 	if (fchan->maxreqs == 0)
 		return nfserr_resource;
+
+	fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ;
 	return 0;
 }
 
@@ -466,9 +496,6 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 		fchan->maxresp_sz = maxcount;
 	session_fchan->maxresp_sz = fchan->maxresp_sz;
 
-	session_fchan->maxresp_cached = NFSD_SLOT_CACHE_SIZE;
-	fchan->maxresp_cached = session_fchan->maxresp_cached;
-
 	/* Use the client's maxops if possible */
 	if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND)
 		fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND;
@@ -478,9 +505,12 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	 * recover pages from existing sessions. For now fail session
 	 * creation.
 	 */
-	status = set_forechannel_maxreqs(fchan);
+	status = set_forechannel_drc_size(fchan);
 
+	session_fchan->maxresp_cached = fchan->maxresp_cached;
 	session_fchan->maxreqs = fchan->maxreqs;
+
+	dprintk("%s status %d\n", __func__, status);
 	return status;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index fb0c404c7c5c..ff0b771efde6 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -92,13 +92,17 @@ struct nfs4_cb_conn {
 	struct rpc_cred	*	cb_cred;
 };
 
-/* Maximum number of slots per session. 128 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION	128
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION     160
 /* Maximum number of pages per slot cache entry */
 #define NFSD_PAGES_PER_SLOT	1
 #define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
+#define NFSD_MAX_MEM_PER_SESSION  \
+		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
 struct nfsd4_cache_entry {
 	__be32		ce_status;
-- 
cgit v1.2.3


From 557ce2646e775f6bda734dd92b10d4780874b9c7 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Fri, 28 Aug 2009 08:45:04 -0400
Subject: nfsd41: replace page based DRC with buffer based DRC

Use NFSD_SLOT_CACHE_SIZE size buffers for sessions DRC instead of holding nfsd
pages in cache.

Connectathon testing has shown that 1024 bytes for encoded compound operation
responses past the sequence operation is sufficient, 512 bytes is a little too
small. Set NFSD_SLOT_CACHE_SIZE to 1024.

Allocate memory for the session DRC in the CREATE_SESSION operation
to guarantee that the memory resource is available for caching responses.
Allocate each slot individually in preparation for slot table size negotiation.

Remove struct nfsd4_cache_entry and helper functions for the old page-based
DRC.

The iov_len calculation in nfs4svc_encode_compoundres is now always
correct.  Replay is now done in nfsd4_sequence under the state lock, so
the session ref count is only bumped on non-replay. Clean up the
nfs4svc_encode_compoundres session logic.

The nfsd4_compound_state statp pointer is also not used.
Remove nfsd4_set_statp().

Move useful nfsd4_cache_entry fields into nfsd4_slot.

Signed-off-by: Andy Adamson <andros@netapp.com
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 211 +++++++++++++--------------------------------
 fs/nfsd/nfs4xdr.c          |  17 ++--
 fs/nfsd/nfssvc.c           |   4 -
 include/linux/nfsd/state.h |  27 +++---
 include/linux/nfsd/xdr4.h  |   5 +-
 5 files changed, 79 insertions(+), 185 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c9a45f49019d..46e9ac526872 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -514,12 +514,23 @@ static int init_forechannel_attrs(struct svc_rqst *rqstp,
 	return status;
 }
 
+static void
+free_session_slots(struct nfsd4_session *ses)
+{
+	int i;
+
+	for (i = 0; i < ses->se_fchannel.maxreqs; i++)
+		kfree(ses->se_slots[i]);
+}
+
 static int
 alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 		   struct nfsd4_create_session *cses)
 {
 	struct nfsd4_session *new, tmp;
-	int idx, status = nfserr_serverfault, slotsize;
+	struct nfsd4_slot *sp;
+	int idx, slotsize, cachesize, i;
+	int status;
 
 	memset(&tmp, 0, sizeof(tmp));
 
@@ -530,14 +541,27 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	if (status)
 		goto out;
 
-	/* allocate struct nfsd4_session and slot table in one piece */
-	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot);
+	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot)
+		     + sizeof(struct nfsd4_session) > PAGE_SIZE);
+
+	status = nfserr_serverfault;
+	/* allocate struct nfsd4_session and slot table pointers in one piece */
+	slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *);
 	new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL);
 	if (!new)
 		goto out;
 
 	memcpy(new, &tmp, sizeof(*new));
 
+	/* allocate each struct nfsd4_slot and data cache in one piece */
+	cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+	for (i = 0; i < new->se_fchannel.maxreqs; i++) {
+		sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
+		if (!sp)
+			goto out_free;
+		new->se_slots[i] = sp;
+	}
+
 	new->se_client = clp;
 	gen_sessionid(new);
 	idx = hash_sessionid(&new->se_sessionid);
@@ -554,6 +578,10 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
 	status = nfs_ok;
 out:
 	return status;
+out_free:
+	free_session_slots(new);
+	kfree(new);
+	goto out;
 }
 
 /* caller must hold sessionid_lock */
@@ -596,22 +624,16 @@ release_session(struct nfsd4_session *ses)
 	nfsd4_put_session(ses);
 }
 
-static void nfsd4_release_respages(struct page **respages, short resused);
-
 void
 free_session(struct kref *kref)
 {
 	struct nfsd4_session *ses;
-	int i;
 
 	ses = container_of(kref, struct nfsd4_session, se_ref);
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		struct nfsd4_cache_entry *e = &ses->se_slots[i].sl_cache_entry;
-		nfsd4_release_respages(e->ce_respages, e->ce_resused);
-	}
 	spin_lock(&nfsd_drc_lock);
 	nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
 	spin_unlock(&nfsd_drc_lock);
+	free_session_slots(ses);
 	kfree(ses);
 }
 
@@ -968,116 +990,31 @@ out_err:
 	return;
 }
 
-void
-nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
-{
-	struct nfsd4_compoundres *resp = rqstp->rq_resp;
-
-	resp->cstate.statp = statp;
-}
-
-/*
- * Dereference the result pages.
- */
-static void
-nfsd4_release_respages(struct page **respages, short resused)
-{
-	int i;
-
-	dprintk("--> %s\n", __func__);
-	for (i = 0; i < resused; i++) {
-		if (!respages[i])
-			continue;
-		put_page(respages[i]);
-		respages[i] = NULL;
-	}
-}
-
-static void
-nfsd4_copy_pages(struct page **topages, struct page **frompages, short count)
-{
-	int i;
-
-	for (i = 0; i < count; i++) {
-		topages[i] = frompages[i];
-		if (!topages[i])
-			continue;
-		get_page(topages[i]);
-	}
-}
-
 /*
- * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous
- * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total
- * length of the XDR response is less than se_fmaxresp_cached
- * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a
- * of the reply (e.g. readdir).
- *
- * Store the base and length of the rq_req.head[0] page
- * of the NFSv4.1 data, just past the rpc header.
+ * Cache a reply. nfsd4_check_drc_limit() has bounded the cache size.
  */
 void
 nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &rqstp->rq_res.head[0];
-
-	dprintk("--> %s entry %p\n", __func__, entry);
+	struct nfsd4_slot *slot = resp->cstate.slot;
+	unsigned int base;
 
-	nfsd4_release_respages(entry->ce_respages, entry->ce_resused);
-	entry->ce_opcnt = resp->opcnt;
-	entry->ce_status = resp->cstate.status;
+	dprintk("--> %s slot %p\n", __func__, slot);
 
-	/*
-	 * Don't need a page to cache just the sequence operation - the slot
-	 * does this for us!
-	 */
+	slot->sl_opcnt = resp->opcnt;
+	slot->sl_status = resp->cstate.status;
 
 	if (nfsd4_not_cached(resp)) {
-		entry->ce_resused = 0;
-		entry->ce_rpchdrlen = 0;
-		dprintk("%s Just cache SEQUENCE. ce_cachethis %d\n", __func__,
-			resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		slot->sl_datalen = 0;
 		return;
 	}
-	entry->ce_resused = rqstp->rq_resused;
-	if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1)
-		entry->ce_resused = NFSD_PAGES_PER_SLOT + 1;
-	nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages,
-			 entry->ce_resused);
-	entry->ce_datav.iov_base = resp->cstate.statp;
-	entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]));
-	/* Current request rpc header length*/
-	entry->ce_rpchdrlen = (char *)resp->cstate.statp -
-				(char *)page_address(rqstp->rq_respages[0]);
-}
-
-/*
- * We keep the rpc header, but take the nfs reply from the replycache.
- */
-static int
-nfsd41_copy_replay_data(struct nfsd4_compoundres *resp,
-			struct nfsd4_cache_entry *entry)
-{
-	struct svc_rqst *rqstp = resp->rqstp;
-	struct kvec *resv = &resp->rqstp->rq_res.head[0];
-	int len;
-
-	/* Current request rpc header length*/
-	len = (char *)resp->cstate.statp -
-			(char *)page_address(rqstp->rq_respages[0]);
-	if (entry->ce_datav.iov_len + len > PAGE_SIZE) {
-		dprintk("%s v41 cached reply too large (%Zd).\n", __func__,
-			entry->ce_datav.iov_len);
-		return 0;
-	}
-	/* copy the cached reply nfsd data past the current rpc header */
-	memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base,
-		entry->ce_datav.iov_len);
-	resv->iov_len = len + entry->ce_datav.iov_len;
-	return 1;
+	slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
+	base = (char *)resp->cstate.datap -
+					(char *)resp->xbuf->head[0].iov_base;
+	if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
+				    slot->sl_datalen))
+		WARN("%s: sessions DRC could not cache compound\n", __func__);
+	return;
 }
 
 /*
@@ -1095,14 +1032,14 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 	struct nfsd4_slot *slot = resp->cstate.slot;
 
 	dprintk("--> %s resp->opcnt %d cachethis %u \n", __func__,
-		resp->opcnt, resp->cstate.slot->sl_cache_entry.ce_cachethis);
+		resp->opcnt, resp->cstate.slot->sl_cachethis);
 
 	/* Encode the replayed sequence operation */
 	op = &args->ops[resp->opcnt - 1];
 	nfsd4_encode_operation(resp, op);
 
 	/* Return nfserr_retry_uncached_rep in next operation. */
-	if (args->opcnt > 1 && slot->sl_cache_entry.ce_cachethis == 0) {
+	if (args->opcnt > 1 && slot->sl_cachethis == 0) {
 		op = &args->ops[resp->opcnt++];
 		op->status = nfserr_retry_uncached_rep;
 		nfsd4_encode_operation(resp, op);
@@ -1111,57 +1048,29 @@ nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
 }
 
 /*
- * Keep the first page of the replay. Copy the NFSv4.1 data from the first
- * cached page.  Replace any futher replay pages from the cache.
+ * The sequence operation is not cached because we can use the slot and
+ * session values.
  */
 __be32
 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
 			 struct nfsd4_sequence *seq)
 {
-	struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry;
+	struct nfsd4_slot *slot = resp->cstate.slot;
 	__be32 status;
 
-	dprintk("--> %s entry %p\n", __func__, entry);
-
-	/*
-	 * If this is just the sequence operation, we did not keep
-	 * a page in the cache entry because we can just use the
-	 * slot info stored in struct nfsd4_sequence that was checked
-	 * against the slot in nfsd4_sequence().
-	 *
-	 * This occurs when seq->cachethis is FALSE, or when the client
-	 * session inactivity timer fires and a solo sequence operation
-	 * is sent (lease renewal).
-	 */
+	dprintk("--> %s slot %p\n", __func__, slot);
 
 	/* Either returns 0 or nfserr_retry_uncached */
 	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
 	if (status == nfserr_retry_uncached_rep)
 		return status;
 
-	if (!nfsd41_copy_replay_data(resp, entry)) {
-		/*
-		 * Not enough room to use the replay rpc header, send the
-		 * cached header. Release all the allocated result pages.
-		 */
-		svc_free_res_pages(resp->rqstp);
-		nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages,
-			entry->ce_resused);
-	} else {
-		/* Release all but the first allocated result page */
-
-		resp->rqstp->rq_resused--;
-		svc_free_res_pages(resp->rqstp);
-
-		nfsd4_copy_pages(&resp->rqstp->rq_respages[1],
-				 &entry->ce_respages[1],
-				 entry->ce_resused - 1);
-	}
+	/* The sequence operation has been encoded, cstate->datap set. */
+	memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
 
-	resp->rqstp->rq_resused = entry->ce_resused;
-	resp->opcnt = entry->ce_opcnt;
-	resp->cstate.iovlen = entry->ce_datav.iov_len + entry->ce_rpchdrlen;
-	status = entry->ce_status;
+	resp->opcnt = slot->sl_opcnt;
+	resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
+	status = slot->sl_status;
 
 	return status;
 }
@@ -1493,7 +1402,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out;
 
-	slot = &session->se_slots[seq->slotid];
+	slot = session->se_slots[seq->slotid];
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
@@ -1506,7 +1415,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 		cstate->slot = slot;
 		cstate->session = session;
 		/* Return the cached reply status and set cstate->status
-		 * for nfsd4_svc_encode_compoundres processing */
+		 * for nfsd4_proc_compound processing */
 		status = nfsd4_replay_cache_entry(resp, seq);
 		cstate->status = nfserr_replay_cache;
 		goto out;
@@ -1517,7 +1426,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 	/* Success! bump slot seqid */
 	slot->sl_inuse = true;
 	slot->sl_seqid = seq->seqid;
-	slot->sl_cache_entry.ce_cachethis = seq->cachethis;
+	slot->sl_cachethis = seq->cachethis;
 
 	cstate->slot = slot;
 	cstate->session = session;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 20c5e3db0660..00ed16a18497 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3057,6 +3057,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
 	WRITE32(0);
 
 	ADJUST_ARGS();
+	resp->cstate.datap = p; /* DRC cache data pointer */
 	return 0;
 }
 
@@ -3159,7 +3160,7 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
 		return status;
 
 	session = resp->cstate.session;
-	if (session == NULL || slot->sl_cache_entry.ce_cachethis == 0)
+	if (session == NULL || slot->sl_cachethis == 0)
 		return status;
 
 	if (resp->opcnt >= args->opcnt)
@@ -3284,6 +3285,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 	/*
 	 * All that remains is to write the tag and operation count...
 	 */
+	struct nfsd4_compound_state *cs = &resp->cstate;
 	struct kvec *iov;
 	p = resp->tagp;
 	*p++ = htonl(resp->taglen);
@@ -3297,15 +3299,10 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
 		iov = &rqstp->rq_res.head[0];
 	iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
 	BUG_ON(iov->iov_len > PAGE_SIZE);
-	if (nfsd4_has_session(&resp->cstate)) {
-		if (resp->cstate.status == nfserr_replay_cache &&
-				!nfsd4_not_cached(resp)) {
-			iov->iov_len = resp->cstate.iovlen;
-		} else {
-			nfsd4_store_cache_entry(resp);
-			dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-			resp->cstate.slot->sl_inuse = 0;
-		}
+	if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
+		nfsd4_store_cache_entry(resp);
+		dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+		resp->cstate.slot->sl_inuse = false;
 		nfsd4_put_session(resp->cstate.session);
 	}
 	return 1;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 675d395c4ab6..4472449c0937 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -577,10 +577,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 		+ rqstp->rq_res.head[0].iov_len;
 	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
 
-	/* NFSv4.1 DRC requires statp */
-	if (rqstp->rq_vers == 4)
-		nfsd4_set_statp(rqstp, statp);
-
 	/* Now call the procedure handler, and encode NFS status. */
 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index ff0b771efde6..70ef5f4abbbc 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -94,30 +94,23 @@ struct nfs4_cb_conn {
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
 #define NFSD_MAX_SLOTS_PER_SESSION     160
-/* Maximum number of pages per slot cache entry */
-#define NFSD_PAGES_PER_SLOT	1
-#define NFSD_SLOT_CACHE_SIZE		PAGE_SIZE
 /* Maximum number of operations per session compound */
 #define NFSD_MAX_OPS_PER_COMPOUND	16
+/* Maximum  session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE		1024
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
 #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION	32
 #define NFSD_MAX_MEM_PER_SESSION  \
 		(NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
 
-struct nfsd4_cache_entry {
-	__be32		ce_status;
-	struct kvec	ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */
-	struct page	*ce_respages[NFSD_PAGES_PER_SLOT + 1];
-	int		ce_cachethis;
-	short		ce_resused;
-	int		ce_opcnt;
-	int		ce_rpchdrlen;
-};
-
 struct nfsd4_slot {
-	bool				sl_inuse;
-	u32				sl_seqid;
-	struct nfsd4_cache_entry	sl_cache_entry;
+	bool	sl_inuse;
+	bool	sl_cachethis;
+	u16	sl_opcnt;
+	u32	sl_seqid;
+	__be32	sl_status;
+	u32	sl_datalen;
+	char	sl_data[];
 };
 
 struct nfsd4_channel_attrs {
@@ -159,7 +152,7 @@ struct nfsd4_session {
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
 	struct nfsd4_channel_attrs se_bchannel;
-	struct nfsd4_slot	se_slots[];	/* forward channel slots */
+	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
 };
 
 static inline void
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 3f716607c86d..73164c2b3d29 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -51,7 +51,7 @@ struct nfsd4_compound_state {
 	/* For sessions DRC */
 	struct nfsd4_session	*session;
 	struct nfsd4_slot	*slot;
-	__be32			*statp;
+	__be32			*datap;
 	size_t			iovlen;
 	u32			minorversion;
 	u32			status;
@@ -472,8 +472,7 @@ static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
 
 static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 {
-	return !resp->cstate.slot->sl_cache_entry.ce_cachethis ||
-			nfsd4_is_solo_sequence(resp);
+	return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
 }
 
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
-- 
cgit v1.2.3


From cede3930f0ca6fef353fa01306c72a01420bd45e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 31 Aug 2009 21:34:36 +0000
Subject: powerpc: Fix some late PowerMac G5 with PCIe ATI graphics

A misconfiguration by the firmware of the U4 PCIe bridge on PowerMac G5
with the U4 bridge (latest generations, may also affect the iMac G5
"iSight") is causing us to re-assign the PCI BARs of the video card,
which can get it out of sync with the firmware, thus breaking offb.

This works around it by fixing up the bridge configuration properly
at boot time. It also fixes a bug where the firmware provides us with
an incorrect set of accessible regions in the device-tree.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/pci.c | 61 +++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h               |  1 +
 2 files changed, 62 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index 04cdd32624d4..e81403b245b5 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -1286,3 +1286,64 @@ static void fixup_k2_sata(struct pci_dev* dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata);
 
+/*
+ * On U4 (aka CPC945) the PCIe root complex "P2P" bridge resource ranges aren't
+ * configured by the firmware. The bridge itself seems to ignore them but it
+ * causes problems with Linux which then re-assigns devices below the bridge,
+ * thus changing addresses of those devices from what was in the device-tree,
+ * which sucks when those are video cards using offb
+ *
+ * We could just mark it transparent but I prefer fixing up the resources to
+ * properly show what's going on here, as I have some doubts about having them
+ * badly configured potentially being an issue for DMA.
+ *
+ * We leave PIO alone, it seems to be fine
+ *
+ * Oh and there's another funny bug. The OF properties advertize the region
+ * 0xf1000000..0xf1ffffff as being forwarded as memory space. But that's
+ * actually not true, this region is the memory mapped config space. So we
+ * also need to filter it out or we'll map things in the wrong place.
+ */
+static void fixup_u4_pcie(struct pci_dev* dev)
+{
+	struct pci_controller *host = pci_bus_to_host(dev->bus);
+	struct resource *region = NULL;
+	u32 reg;
+	int i;
+
+	/* Only do that on PowerMac */
+	if (!machine_is(powermac))
+		return;
+
+	/* Find the largest MMIO region */
+	for (i = 0; i < 3; i++) {
+		struct resource *r = &host->mem_resources[i];
+		if (!(r->flags & IORESOURCE_MEM))
+			continue;
+		/* Skip the 0xf0xxxxxx..f2xxxxxx regions, we know they
+		 * are reserved by HW for other things
+		 */
+		if (r->start >= 0xf0000000 && r->start < 0xf3000000)
+			continue;
+		if (!region || (r->end - r->start) >
+		    (region->end - region->start))
+			region = r;
+	}
+	/* Nothing found, bail */
+	if (region == 0)
+		return;
+
+	/* Print things out */
+	printk(KERN_INFO "PCI: Fixup U4 PCIe bridge range: %pR\n", region);
+
+	/* Fixup bridge config space. We know it's a Mac, resource aren't
+	 * offset so let's just blast them as-is. We also know that they
+	 * fit in 32 bits
+	 */
+	reg = ((region->start >> 16) & 0xfff0) | (region->end & 0xfff00000);
+	pci_write_config_dword(dev, PCI_MEMORY_BASE, reg);
+	pci_write_config_dword(dev, PCI_PREF_BASE_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_LIMIT_UPPER32, 0);
+	pci_write_config_dword(dev, PCI_PREF_MEMORY_BASE, 0);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_U4_PCIE, fixup_u4_pcie);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..c86bb6e3dfcc 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -877,6 +877,7 @@
 #define PCI_DEVICE_ID_APPLE_SH_SUNGEM   0x0051
 #define PCI_DEVICE_ID_APPLE_U3L_AGP	0x0058
 #define PCI_DEVICE_ID_APPLE_U3H_AGP	0x0059
+#define PCI_DEVICE_ID_APPLE_U4_PCIE	0x005b
 #define PCI_DEVICE_ID_APPLE_IPID2_AGP	0x0066
 #define PCI_DEVICE_ID_APPLE_IPID2_ATA	0x0069
 #define PCI_DEVICE_ID_APPLE_IPID2_FW	0x006a
-- 
cgit v1.2.3


From 8f0dfc34e9b323a028c2ec41abb7e9de477b7a94 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 20 Jul 2009 11:26:58 -0700
Subject: sched: Provide iowait counters

For counting how long an application has been waiting for
(disk) IO, there currently is only the HZ sample driven
information available, while for all other counters in this
class, a high resolution version is available via
CONFIG_SCHEDSTATS.

In order to make an improved bootchart tool possible, we also
need a higher resolution version of the iowait time.

This patch below adds this scheduler statistic to the kernel.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <4A64B813.1080506@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 4 ++++
 kernel/sched.c        | 4 ++++
 kernel/sched_debug.c  | 4 ++++
 kernel/sched_fair.c   | 5 +++++
 4 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e209ae0e7a8a..9c96ef2f7e68 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1111,6 +1111,8 @@ struct sched_entity {
 	u64			wait_max;
 	u64			wait_count;
 	u64			wait_sum;
+	u64			iowait_count;
+	u64			iowait_sum;
 
 	u64			sleep_start;
 	u64			sleep_max;
@@ -1231,6 +1233,8 @@ struct task_struct {
 	unsigned did_exec:1;
 	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
 				 * execve */
+	unsigned in_iowait:1;
+
 
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
diff --git a/kernel/sched.c b/kernel/sched.c
index 6244d24cafc1..38d05a89e0f2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6754,7 +6754,9 @@ void __sched io_schedule(void)
 
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	current->in_iowait = 1;
 	schedule();
+	current->in_iowait = 0;
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 }
@@ -6767,7 +6769,9 @@ long __sched io_schedule_timeout(long timeout)
 
 	delayacct_blkio_start();
 	atomic_inc(&rq->nr_iowait);
+	current->in_iowait = 1;
 	ret = schedule_timeout(timeout);
+	current->in_iowait = 0;
 	atomic_dec(&rq->nr_iowait);
 	delayacct_blkio_end();
 	return ret;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 70c7e0b79946..5ddbd0891267 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -409,6 +409,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.wait_max);
 	PN(se.wait_sum);
 	P(se.wait_count);
+	PN(se.iowait_sum);
+	P(se.iowait_count);
 	P(sched_info.bkl_count);
 	P(se.nr_migrations);
 	P(se.nr_migrations_cold);
@@ -479,6 +481,8 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.wait_max				= 0;
 	p->se.wait_sum				= 0;
 	p->se.wait_count			= 0;
+	p->se.iowait_sum			= 0;
+	p->se.iowait_count			= 0;
 	p->se.sleep_max				= 0;
 	p->se.sum_sleep_runtime			= 0;
 	p->se.block_max				= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 342000b31ad6..471fa281f5e0 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -652,6 +652,11 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		se->sum_sleep_runtime += delta;
 
 		if (tsk) {
+			if (tsk->in_iowait) {
+				se->iowait_sum += delta;
+				se->iowait_count++;
+			}
+
 			/*
 			 * Blocking time is in units of nanosecs, so shift by
 			 * 20 to get a milliseconds-range estimation of the
-- 
cgit v1.2.3


From 768d0c27226e6587cad2fcf543f9711da3f3774e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 23 Jul 2009 20:13:26 +0200
Subject: sched: Add wait, sleep and iowait accounting tracepoints

Add 3 schedstat tracepoints to help account for wait-time,
sleep-time and iowait-time.

They can also be used as a perf-counter source to profile tasks
on these clocks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arjan van de Ven <arjan@linux.intel.com>
LKML-Reference: <new-submission>
[ build fix for the !CONFIG_SCHEDSTATS case ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 95 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched_fair.c          | 12 +++++-
 2 files changed, 106 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 8949bb7eb082..a4c369ec328f 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send,
 		  __entry->sig, __entry->comm, __entry->pid)
 );
 
+/*
+ * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ *     adding sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+TRACE_EVENT(sched_stat_wait,
+
+	TP_PROTO(struct task_struct *tsk, u64 delay),
+
+	TP_ARGS(tsk, delay),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	delay			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid	= tsk->pid;
+		__entry->delay	= delay;
+	)
+	TP_perf_assign(
+		__perf_count(delay);
+	),
+
+	TP_printk("task: %s:%d wait: %Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->delay)
+);
+
+/*
+ * Tracepoint for accounting sleep time (time the task is not runnable,
+ * including iowait, see below).
+ */
+TRACE_EVENT(sched_stat_sleep,
+
+	TP_PROTO(struct task_struct *tsk, u64 delay),
+
+	TP_ARGS(tsk, delay),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	delay			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid	= tsk->pid;
+		__entry->delay	= delay;
+	)
+	TP_perf_assign(
+		__perf_count(delay);
+	),
+
+	TP_printk("task: %s:%d sleep: %Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->delay)
+);
+
+/*
+ * Tracepoint for accounting iowait time (time the task is not runnable
+ * due to waiting on IO to complete).
+ */
+TRACE_EVENT(sched_stat_iowait,
+
+	TP_PROTO(struct task_struct *tsk, u64 delay),
+
+	TP_ARGS(tsk, delay),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	delay			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid	= tsk->pid;
+		__entry->delay	= delay;
+	)
+	TP_perf_assign(
+		__perf_count(delay);
+	),
+
+	TP_printk("task: %s:%d iowait: %Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->delay)
+);
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 471fa281f5e0..2ff850f90d1e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -546,6 +546,13 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	schedstat_set(se->wait_sum, se->wait_sum +
 			rq_of(cfs_rq)->clock - se->wait_start);
 	schedstat_set(se->wait_start, 0);
+
+#ifdef CONFIG_SCHEDSTATS
+	if (entity_is_task(se)) {
+		trace_sched_stat_wait(task_of(se),
+			rq_of(cfs_rq)->clock - se->wait_start);
+	}
+#endif
 }
 
 static inline void
@@ -636,8 +643,10 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 		se->sleep_start = 0;
 		se->sum_sleep_runtime += delta;
 
-		if (tsk)
+		if (tsk) {
 			account_scheduler_latency(tsk, delta >> 10, 1);
+			trace_sched_stat_sleep(tsk, delta);
+		}
 	}
 	if (se->block_start) {
 		u64 delta = rq_of(cfs_rq)->clock - se->block_start;
@@ -655,6 +664,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 			if (tsk->in_iowait) {
 				se->iowait_sum += delta;
 				se->iowait_count++;
+				trace_sched_stat_iowait(tsk, delta);
 			}
 
 			/*
-- 
cgit v1.2.3


From b2e4b3debc327a5b53d9622e0b1785eea2ea2aad Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 1 Sep 2009 19:25:03 +0000
Subject: tcp: MD5 operations should be const

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h |  6 +++---
 net/ipv4/tcp_ipv4.c |  4 ++--
 net/ipv6/tcp_ipv6.c | 10 +++++-----
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 8afac76cd748..61723a7c21fe 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -233,7 +233,7 @@ struct tcp_request_sock {
 	struct inet_request_sock 	req;
 #ifdef CONFIG_TCP_MD5SIG
 	/* Only used by TCP MD5 Signature so far. */
-	struct tcp_request_sock_ops	*af_specific;
+	const struct tcp_request_sock_ops *af_specific;
 #endif
 	u32			 	rcv_isn;
 	u32			 	snt_isn;
@@ -401,9 +401,9 @@ struct tcp_sock {
 
 #ifdef CONFIG_TCP_MD5SIG
 /* TCP AF-Specific parts; only used by MD5 Signature support so far */
-	struct tcp_sock_af_ops	*af_specific;
+	const struct tcp_sock_af_ops	*af_specific;
 
-/* TCP MD5 Signagure Option information */
+/* TCP MD5 Signature Option information */
 	struct tcp_md5sig_info	*md5sig_info;
 #endif
 };
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6755e29a6dd3..3efbe94f022b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1195,7 +1195,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
 	.md5_lookup	=	tcp_v4_reqsk_md5_lookup,
 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
 };
@@ -1774,7 +1774,7 @@ struct inet_connection_sock_af_ops ipv4_specific = {
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
+static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
 	.md5_lookup		= tcp_v4_md5_lookup,
 	.calc_md5_hash		= tcp_v4_md5_hash_skb,
 	.md5_add		= tcp_v4_md5_add_func,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d849dd53b788..eadbc584e919 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -78,8 +78,8 @@ static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 static struct inet_connection_sock_af_ops ipv6_mapped;
 static struct inet_connection_sock_af_ops ipv6_specific;
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
 #else
 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
 						   struct in6_addr *addr)
@@ -894,7 +894,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
 	.md5_lookup	=	tcp_v6_reqsk_md5_lookup,
 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 };
@@ -1780,7 +1780,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = {
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
 	.md5_lookup	=	tcp_v6_md5_lookup,
 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
 	.md5_add	=	tcp_v6_md5_add_func,
@@ -1812,7 +1812,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = {
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-static struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
+static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
 	.md5_lookup	=	tcp_v4_md5_lookup,
 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
 	.md5_add	=	tcp_v6_md5_add_func,
-- 
cgit v1.2.3


From 3b401a81c0d50ea9c718cf837f62cc2e6e79cc30 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Tue, 1 Sep 2009 19:25:04 +0000
Subject: inet: inet_connection_sock_af_ops const

The function block inet_connect_sock_af_ops contains no data
make it constant.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/transp_v6.h | 2 +-
 net/dccp/ipv4.c         | 2 +-
 net/dccp/ipv6.c         | 8 ++++----
 net/ipv4/tcp_ipv4.c     | 2 +-
 net/ipv6/tcp_ipv6.c     | 8 ++++----
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index bfb240c6cf79..d65381cad0fc 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -51,7 +51,7 @@ extern int			datagram_send_ctl(struct net *net,
 /*
  *	address family specific functions
  */
-extern struct inet_connection_sock_af_ops ipv4_specific;
+extern const struct inet_connection_sock_af_ops ipv4_specific;
 
 extern void inet6_destroy_sock(struct sock *sk);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index a0a36c9e6cce..d01c00de1ad0 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -880,7 +880,7 @@ discard_and_relse:
 	goto discard_it;
 }
 
-static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = dccp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3e70faab2989..64f011cc4491 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -35,8 +35,8 @@
 
 /* The per-net dccp.v6_ctl_sk is used for sending RSTs and ACKs */
 
-static struct inet_connection_sock_af_ops dccp_ipv6_mapped;
-static struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
+static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
+static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
 
 static void dccp_v6_hash(struct sock *sk)
 {
@@ -1055,7 +1055,7 @@ failure:
 	return err;
 }
 
-static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
+static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = dccp_v6_send_check,
 	.rebuild_header	   = inet6_sk_rebuild_header,
@@ -1076,7 +1076,7 @@ static struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
 /*
  *	DCCP over IPv4 via INET6 API
  */
-static struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
+static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = dccp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3efbe94f022b..ce7d3b021ffc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1754,7 +1754,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return 0;
 }
 
-struct inet_connection_sock_af_ops ipv4_specific = {
+const struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index eadbc584e919..d73617e9708e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,8 +75,8 @@ static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 
-static struct inet_connection_sock_af_ops ipv6_mapped;
-static struct inet_connection_sock_af_ops ipv6_specific;
+static const struct inet_connection_sock_af_ops ipv6_mapped;
+static const struct inet_connection_sock_af_ops ipv6_specific;
 #ifdef CONFIG_TCP_MD5SIG
 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
@@ -1760,7 +1760,7 @@ static int tcp_v6_remember_stamp(struct sock *sk)
 	return 0;
 }
 
-static struct inet_connection_sock_af_ops ipv6_specific = {
+static const struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	   = inet6_csk_xmit,
 	.send_check	   = tcp_v6_send_check,
 	.rebuild_header	   = inet6_sk_rebuild_header,
@@ -1792,7 +1792,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
  *	TCP over IPv4 via INET6 API
  */
 
-static struct inet_connection_sock_af_ops ipv6_mapped = {
+static const struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	   = ip_queue_xmit,
 	.send_check	   = tcp_v4_send_check,
 	.rebuild_header	   = inet_sk_rebuild_header,
-- 
cgit v1.2.3


From f1939f7c56456d22a559d2c75156e91912a2e97e Mon Sep 17 00:00:00 2001
From: Shane Wang <shane.wang@intel.com>
Date: Wed, 2 Sep 2009 20:05:22 +1000
Subject: crypto: vmac - New hash algorithm for intel_txt support

This patch adds VMAC (a fast MAC) support into crypto framework.

Signed-off-by: Shane Wang <shane.wang@intel.com>
Signed-off-by: Joseph Cihula <joseph.cihula@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig        |  12 +
 crypto/Makefile       |   1 +
 crypto/tcrypt.c       |   4 +
 crypto/testmgr.c      |   9 +
 crypto/testmgr.h      |  16 ++
 crypto/vmac.c         | 678 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/crypto/vmac.h |  61 +++++
 7 files changed, 781 insertions(+)
 create mode 100644 crypto/vmac.c
 create mode 100644 include/crypto/vmac.h

(limited to 'include')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 762344202725..26b5dd0cb564 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -269,6 +269,18 @@ config CRYPTO_XCBC
 		http://csrc.nist.gov/encryption/modes/proposedmodes/
 		 xcbc-mac/xcbc-mac-spec.pdf
 
+config CRYPTO_VMAC
+	tristate "VMAC support"
+	depends on EXPERIMENTAL
+	select CRYPTO_HASH
+	select CRYPTO_MANAGER
+	help
+	  VMAC is a message authentication algorithm designed for
+	  very high speed on 64-bit architectures.
+
+	  See also:
+	  <http://fastcrypto.org/vmac>
+
 comment "Digest"
 
 config CRYPTO_CRC32C
diff --git a/crypto/Makefile b/crypto/Makefile
index c2ca721eea9d..9e8f61908cb5 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -32,6 +32,7 @@ cryptomgr-objs := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
+obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
 obj-$(CONFIG_CRYPTO_XCBC) += xcbc.o
 obj-$(CONFIG_CRYPTO_NULL) += crypto_null.o
 obj-$(CONFIG_CRYPTO_MD4) += md4.o
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 5a375e819d5d..aa3f84ccc786 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -719,6 +719,10 @@ static int do_test(int m)
 		ret += tcrypt_test("hmac(rmd160)");
 		break;
 
+	case 109:
+		ret += tcrypt_test("vmac(aes)");
+		break;
+
 	case 150:
 		ret += tcrypt_test("ansi_cprng");
 		break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 29b228d9b1a2..6d5b746637be 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2247,6 +2247,15 @@ static const struct alg_test_desc alg_test_descs[] = {
 				.count = TGR192_TEST_VECTORS
 			}
 		}
+	}, {
+		.alg = "vmac(aes)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = {
+				.vecs = aes_vmac128_tv_template,
+				.count = VMAC_AES_TEST_VECTORS
+			}
+		}
 	}, {
 		.alg = "wp256",
 		.test = alg_test_hash,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 69316228fc19..9963b18983ab 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1654,6 +1654,22 @@ static struct hash_testvec aes_xcbc128_tv_template[] = {
 	}
 };
 
+#define VMAC_AES_TEST_VECTORS	1
+static char vmac_string[128] = {'\x01', '\x01', '\x01', '\x01',
+				'\x02', '\x03', '\x02', '\x02',
+				'\x02', '\x04', '\x01', '\x07',
+				'\x04', '\x01', '\x04', '\x03',};
+static struct hash_testvec aes_vmac128_tv_template[] = {
+	{
+		.key    = "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext = vmac_string,
+		.digest = "\xcb\xd7\x8a\xfd\xb7\x33\x79\xe7",
+		.psize  = 128,
+		.ksize  = 16,
+	},
+};
+
 /*
  * SHA384 HMAC test vectors from RFC4231
  */
diff --git a/crypto/vmac.c b/crypto/vmac.c
new file mode 100644
index 000000000000..0a9468e575de
--- /dev/null
+++ b/crypto/vmac.c
@@ -0,0 +1,678 @@
+/*
+ * Modified to interface to the Linux kernel
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+/* --------------------------------------------------------------------------
+ * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ * This implementation is herby placed in the public domain.
+ * The authors offers no warranty. Use at your own risk.
+ * Please send bug reports to the authors.
+ * Last modified: 17 APR 08, 1700 PDT
+ * ----------------------------------------------------------------------- */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/scatterlist.h>
+#include <asm/byteorder.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/vmac.h>
+#include <crypto/internal/hash.h>
+
+/*
+ * Constants and masks
+ */
+#define UINT64_C(x) x##ULL
+const u64 p64   = UINT64_C(0xfffffffffffffeff);  /* 2^64 - 257 prime  */
+const u64 m62   = UINT64_C(0x3fffffffffffffff);  /* 62-bit mask       */
+const u64 m63   = UINT64_C(0x7fffffffffffffff);  /* 63-bit mask       */
+const u64 m64   = UINT64_C(0xffffffffffffffff);  /* 64-bit mask       */
+const u64 mpoly = UINT64_C(0x1fffffff1fffffff);  /* Poly key mask     */
+
+#ifdef __LITTLE_ENDIAN
+#define INDEX_HIGH 1
+#define INDEX_LOW 0
+#else
+#define INDEX_HIGH 0
+#define INDEX_LOW 1
+#endif
+
+/*
+ * The following routines are used in this implementation. They are
+ * written via macros to simulate zero-overhead call-by-reference.
+ *
+ * MUL64: 64x64->128-bit multiplication
+ * PMUL64: assumes top bits cleared on inputs
+ * ADD128: 128x128->128-bit addition
+ */
+
+#define ADD128(rh, rl, ih, il)						\
+	do {								\
+		u64 _il = (il);						\
+		(rl) += (_il);						\
+		if ((rl) < (_il))					\
+			(rh)++;						\
+		(rh) += (ih);						\
+	} while (0)
+
+#define MUL32(i1, i2)	((u64)(u32)(i1)*(u32)(i2))
+
+#define PMUL64(rh, rl, i1, i2)	/* Assumes m doesn't overflow */	\
+	do {								\
+		u64 _i1 = (i1), _i2 = (i2);				\
+		u64 m = MUL32(_i1, _i2>>32) + MUL32(_i1>>32, _i2);	\
+		rh = MUL32(_i1>>32, _i2>>32);				\
+		rl = MUL32(_i1, _i2);					\
+		ADD128(rh, rl, (m >> 32), (m << 32));			\
+	} while (0)
+
+#define MUL64(rh, rl, i1, i2)						\
+	do {								\
+		u64 _i1 = (i1), _i2 = (i2);				\
+		u64 m1 = MUL32(_i1, _i2>>32);				\
+		u64 m2 = MUL32(_i1>>32, _i2);				\
+		rh = MUL32(_i1>>32, _i2>>32);				\
+		rl = MUL32(_i1, _i2);					\
+		ADD128(rh, rl, (m1 >> 32), (m1 << 32));			\
+		ADD128(rh, rl, (m2 >> 32), (m2 << 32));			\
+	} while (0)
+
+/*
+ * For highest performance the L1 NH and L2 polynomial hashes should be
+ * carefully implemented to take advantage of one's target architechture.
+ * Here these two hash functions are defined multiple time; once for
+ * 64-bit architectures, once for 32-bit SSE2 architectures, and once
+ * for the rest (32-bit) architectures.
+ * For each, nh_16 *must* be defined (works on multiples of 16 bytes).
+ * Optionally, nh_vmac_nhbytes can be defined (for multiples of
+ * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two
+ * NH computations at once).
+ */
+
+#ifdef CONFIG_64BIT
+
+#define nh_16(mp, kp, nw, rh, rl)					\
+	do {								\
+		int i; u64 th, tl;					\
+		rh = rl = 0;						\
+		for (i = 0; i < nw; i += 2) {				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			ADD128(rh, rl, th, tl);				\
+		}							\
+	} while (0)
+
+#define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1)				\
+	do {								\
+		int i; u64 th, tl;					\
+		rh1 = rl1 = rh = rl = 0;				\
+		for (i = 0; i < nw; i += 2) {				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			ADD128(rh1, rl1, th, tl);			\
+		}							\
+	} while (0)
+
+#if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */
+#define nh_vmac_nhbytes(mp, kp, nw, rh, rl)				\
+	do {								\
+		int i; u64 th, tl;					\
+		rh = rl = 0;						\
+		for (i = 0; i < nw; i += 8) {				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			ADD128(rh, rl, th, tl);				\
+		}							\
+	} while (0)
+
+#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1)			\
+	do {								\
+		int i; u64 th, tl;					\
+		rh1 = rl1 = rh = rl = 0;				\
+		for (i = 0; i < nw; i += 8) {				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+1]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i)+(kp)[i+2],	\
+				le64_to_cpup((mp)+i+1)+(kp)[i+3]);	\
+			ADD128(rh1, rl1, th, tl);			\
+			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+2],	\
+				le64_to_cpup((mp)+i+3)+(kp)[i+3]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+2)+(kp)[i+4],	\
+				le64_to_cpup((mp)+i+3)+(kp)[i+5]);	\
+			ADD128(rh1, rl1, th, tl);			\
+			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+4],	\
+				le64_to_cpup((mp)+i+5)+(kp)[i+5]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+4)+(kp)[i+6],	\
+				le64_to_cpup((mp)+i+5)+(kp)[i+7]);	\
+			ADD128(rh1, rl1, th, tl);			\
+			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+6],	\
+				le64_to_cpup((mp)+i+7)+(kp)[i+7]);	\
+			ADD128(rh, rl, th, tl);				\
+			MUL64(th, tl, le64_to_cpup((mp)+i+6)+(kp)[i+8],	\
+				le64_to_cpup((mp)+i+7)+(kp)[i+9]);	\
+			ADD128(rh1, rl1, th, tl);			\
+		}							\
+	} while (0)
+#endif
+
+#define poly_step(ah, al, kh, kl, mh, ml)				\
+	do {								\
+		u64 t1h, t1l, t2h, t2l, t3h, t3l, z = 0;		\
+		/* compute ab*cd, put bd into result registers */	\
+		PMUL64(t3h, t3l, al, kh);				\
+		PMUL64(t2h, t2l, ah, kl);				\
+		PMUL64(t1h, t1l, ah, 2*kh);				\
+		PMUL64(ah, al, al, kl);					\
+		/* add 2 * ac to result */				\
+		ADD128(ah, al, t1h, t1l);				\
+		/* add together ad + bc */				\
+		ADD128(t2h, t2l, t3h, t3l);				\
+		/* now (ah,al), (t2l,2*t2h) need summing */		\
+		/* first add the high registers, carrying into t2h */	\
+		ADD128(t2h, ah, z, t2l);				\
+		/* double t2h and add top bit of ah */			\
+		t2h = 2 * t2h + (ah >> 63);				\
+		ah &= m63;						\
+		/* now add the low registers */				\
+		ADD128(ah, al, mh, ml);					\
+		ADD128(ah, al, z, t2h);					\
+	} while (0)
+
+#else /* ! CONFIG_64BIT */
+
+#ifndef nh_16
+#define nh_16(mp, kp, nw, rh, rl)					\
+	do {								\
+		u64 t1, t2, m1, m2, t;					\
+		int i;							\
+		rh = rl = t = 0;					\
+		for (i = 0; i < nw; i += 2)  {				\
+			t1 = le64_to_cpup(mp+i) + kp[i];		\
+			t2 = le64_to_cpup(mp+i+1) + kp[i+1];		\
+			m2 = MUL32(t1 >> 32, t2);			\
+			m1 = MUL32(t1, t2 >> 32);			\
+			ADD128(rh, rl, MUL32(t1 >> 32, t2 >> 32),	\
+				MUL32(t1, t2));				\
+			rh += (u64)(u32)(m1 >> 32)			\
+				+ (u32)(m2 >> 32);			\
+			t += (u64)(u32)m1 + (u32)m2;			\
+		}							\
+		ADD128(rh, rl, (t >> 32), (t << 32));			\
+	} while (0)
+#endif
+
+static void poly_step_func(u64 *ahi, u64 *alo,
+			const u64 *kh, const u64 *kl,
+			const u64 *mh, const u64 *ml)
+{
+#define a0 (*(((u32 *)alo)+INDEX_LOW))
+#define a1 (*(((u32 *)alo)+INDEX_HIGH))
+#define a2 (*(((u32 *)ahi)+INDEX_LOW))
+#define a3 (*(((u32 *)ahi)+INDEX_HIGH))
+#define k0 (*(((u32 *)kl)+INDEX_LOW))
+#define k1 (*(((u32 *)kl)+INDEX_HIGH))
+#define k2 (*(((u32 *)kh)+INDEX_LOW))
+#define k3 (*(((u32 *)kh)+INDEX_HIGH))
+
+	u64 p, q, t;
+	u32 t2;
+
+	p = MUL32(a3, k3);
+	p += p;
+	p += *(u64 *)mh;
+	p += MUL32(a0, k2);
+	p += MUL32(a1, k1);
+	p += MUL32(a2, k0);
+	t = (u32)(p);
+	p >>= 32;
+	p += MUL32(a0, k3);
+	p += MUL32(a1, k2);
+	p += MUL32(a2, k1);
+	p += MUL32(a3, k0);
+	t |= ((u64)((u32)p & 0x7fffffff)) << 32;
+	p >>= 31;
+	p += (u64)(((u32 *)ml)[INDEX_LOW]);
+	p += MUL32(a0, k0);
+	q =  MUL32(a1, k3);
+	q += MUL32(a2, k2);
+	q += MUL32(a3, k1);
+	q += q;
+	p += q;
+	t2 = (u32)(p);
+	p >>= 32;
+	p += (u64)(((u32 *)ml)[INDEX_HIGH]);
+	p += MUL32(a0, k1);
+	p += MUL32(a1, k0);
+	q =  MUL32(a2, k3);
+	q += MUL32(a3, k2);
+	q += q;
+	p += q;
+	*(u64 *)(alo) = (p << 32) | t2;
+	p >>= 32;
+	*(u64 *)(ahi) = p + t;
+
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef k0
+#undef k1
+#undef k2
+#undef k3
+}
+
+#define poly_step(ah, al, kh, kl, mh, ml)				\
+	poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
+
+#endif  /* end of specialized NH and poly definitions */
+
+/* At least nh_16 is defined. Defined others as needed here */
+#ifndef nh_16_2
+#define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2)				\
+	do { 								\
+		nh_16(mp, kp, nw, rh, rl);				\
+		nh_16(mp, ((kp)+2), nw, rh2, rl2);			\
+	} while (0)
+#endif
+#ifndef nh_vmac_nhbytes
+#define nh_vmac_nhbytes(mp, kp, nw, rh, rl)				\
+	nh_16(mp, kp, nw, rh, rl)
+#endif
+#ifndef nh_vmac_nhbytes_2
+#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2)			\
+	do {								\
+		nh_vmac_nhbytes(mp, kp, nw, rh, rl);			\
+		nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2);		\
+	} while (0)
+#endif
+
+static void vhash_abort(struct vmac_ctx *ctx)
+{
+	ctx->polytmp[0] = ctx->polykey[0] ;
+	ctx->polytmp[1] = ctx->polykey[1] ;
+	ctx->first_block_processed = 0;
+}
+
+static u64 l3hash(u64 p1, u64 p2,
+			u64 k1, u64 k2, u64 len)
+{
+	u64 rh, rl, t, z = 0;
+
+	/* fully reduce (p1,p2)+(len,0) mod p127 */
+	t = p1 >> 63;
+	p1 &= m63;
+	ADD128(p1, p2, len, t);
+	/* At this point, (p1,p2) is at most 2^127+(len<<64) */
+	t = (p1 > m63) + ((p1 == m63) && (p2 == m64));
+	ADD128(p1, p2, z, t);
+	p1 &= m63;
+
+	/* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
+	t = p1 + (p2 >> 32);
+	t += (t >> 32);
+	t += (u32)t > 0xfffffffeu;
+	p1 += (t >> 32);
+	p2 += (p1 << 32);
+
+	/* compute (p1+k1)%p64 and (p2+k2)%p64 */
+	p1 += k1;
+	p1 += (0 - (p1 < k1)) & 257;
+	p2 += k2;
+	p2 += (0 - (p2 < k2)) & 257;
+
+	/* compute (p1+k1)*(p2+k2)%p64 */
+	MUL64(rh, rl, p1, p2);
+	t = rh >> 56;
+	ADD128(t, rl, z, rh);
+	rh <<= 8;
+	ADD128(t, rl, z, rh);
+	t += t << 8;
+	rl += t;
+	rl += (0 - (rl < t)) & 257;
+	rl += (0 - (rl > p64-1)) & 257;
+	return rl;
+}
+
+static void vhash_update(const unsigned char *m,
+			unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
+			struct vmac_ctx *ctx)
+{
+	u64 rh, rl, *mptr;
+	const u64 *kptr = (u64 *)ctx->nhkey;
+	int i;
+	u64 ch, cl;
+	u64 pkh = ctx->polykey[0];
+	u64 pkl = ctx->polykey[1];
+
+	mptr = (u64 *)m;
+	i = mbytes / VMAC_NHBYTES;  /* Must be non-zero */
+
+	ch = ctx->polytmp[0];
+	cl = ctx->polytmp[1];
+
+	if (!ctx->first_block_processed) {
+		ctx->first_block_processed = 1;
+		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
+		rh &= m62;
+		ADD128(ch, cl, rh, rl);
+		mptr += (VMAC_NHBYTES/sizeof(u64));
+		i--;
+	}
+
+	while (i--) {
+		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
+		rh &= m62;
+		poly_step(ch, cl, pkh, pkl, rh, rl);
+		mptr += (VMAC_NHBYTES/sizeof(u64));
+	}
+
+	ctx->polytmp[0] = ch;
+	ctx->polytmp[1] = cl;
+}
+
+static u64 vhash(unsigned char m[], unsigned int mbytes,
+			u64 *tagl, struct vmac_ctx *ctx)
+{
+	u64 rh, rl, *mptr;
+	const u64 *kptr = (u64 *)ctx->nhkey;
+	int i, remaining;
+	u64 ch, cl;
+	u64 pkh = ctx->polykey[0];
+	u64 pkl = ctx->polykey[1];
+
+	mptr = (u64 *)m;
+	i = mbytes / VMAC_NHBYTES;
+	remaining = mbytes % VMAC_NHBYTES;
+
+	if (ctx->first_block_processed) {
+		ch = ctx->polytmp[0];
+		cl = ctx->polytmp[1];
+	} else if (i) {
+		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, ch, cl);
+		ch &= m62;
+		ADD128(ch, cl, pkh, pkl);
+		mptr += (VMAC_NHBYTES/sizeof(u64));
+		i--;
+	} else if (remaining) {
+		nh_16(mptr, kptr, 2*((remaining+15)/16), ch, cl);
+		ch &= m62;
+		ADD128(ch, cl, pkh, pkl);
+		mptr += (VMAC_NHBYTES/sizeof(u64));
+		goto do_l3;
+	} else {/* Empty String */
+		ch = pkh; cl = pkl;
+		goto do_l3;
+	}
+
+	while (i--) {
+		nh_vmac_nhbytes(mptr, kptr, VMAC_NHBYTES/8, rh, rl);
+		rh &= m62;
+		poly_step(ch, cl, pkh, pkl, rh, rl);
+		mptr += (VMAC_NHBYTES/sizeof(u64));
+	}
+	if (remaining) {
+		nh_16(mptr, kptr, 2*((remaining+15)/16), rh, rl);
+		rh &= m62;
+		poly_step(ch, cl, pkh, pkl, rh, rl);
+	}
+
+do_l3:
+	vhash_abort(ctx);
+	remaining *= 8;
+	return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1], remaining);
+}
+
+static u64 vmac(unsigned char m[], unsigned int mbytes,
+			unsigned char n[16], u64 *tagl,
+			struct vmac_ctx_t *ctx)
+{
+	u64 *in_n, *out_p;
+	u64 p, h;
+	int i;
+
+	in_n = ctx->__vmac_ctx.cached_nonce;
+	out_p = ctx->__vmac_ctx.cached_aes;
+
+	i = n[15] & 1;
+	if ((*(u64 *)(n+8) != in_n[1]) || (*(u64 *)(n) != in_n[0])) {
+		in_n[0] = *(u64 *)(n);
+		in_n[1] = *(u64 *)(n+8);
+		((unsigned char *)in_n)[15] &= 0xFE;
+		crypto_cipher_encrypt_one(ctx->child,
+			(unsigned char *)out_p, (unsigned char *)in_n);
+
+		((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
+	}
+	p = be64_to_cpup(out_p + i);
+	h = vhash(m, mbytes, (u64 *)0, &ctx->__vmac_ctx);
+	return p + h;
+}
+
+static int vmac_set_key(unsigned char user_key[], struct vmac_ctx_t *ctx)
+{
+	u64 in[2] = {0}, out[2];
+	unsigned i;
+	int err = 0;
+
+	err = crypto_cipher_setkey(ctx->child, user_key, VMAC_KEY_LEN);
+	if (err)
+		return err;
+
+	/* Fill nh key */
+	((unsigned char *)in)[0] = 0x80;
+	for (i = 0; i < sizeof(ctx->__vmac_ctx.nhkey)/8; i += 2) {
+		crypto_cipher_encrypt_one(ctx->child,
+			(unsigned char *)out, (unsigned char *)in);
+		ctx->__vmac_ctx.nhkey[i] = be64_to_cpup(out);
+		ctx->__vmac_ctx.nhkey[i+1] = be64_to_cpup(out+1);
+		((unsigned char *)in)[15] += 1;
+	}
+
+	/* Fill poly key */
+	((unsigned char *)in)[0] = 0xC0;
+	in[1] = 0;
+	for (i = 0; i < sizeof(ctx->__vmac_ctx.polykey)/8; i += 2) {
+		crypto_cipher_encrypt_one(ctx->child,
+			(unsigned char *)out, (unsigned char *)in);
+		ctx->__vmac_ctx.polytmp[i] =
+			ctx->__vmac_ctx.polykey[i] =
+				be64_to_cpup(out) & mpoly;
+		ctx->__vmac_ctx.polytmp[i+1] =
+			ctx->__vmac_ctx.polykey[i+1] =
+				be64_to_cpup(out+1) & mpoly;
+		((unsigned char *)in)[15] += 1;
+	}
+
+	/* Fill ip key */
+	((unsigned char *)in)[0] = 0xE0;
+	in[1] = 0;
+	for (i = 0; i < sizeof(ctx->__vmac_ctx.l3key)/8; i += 2) {
+		do {
+			crypto_cipher_encrypt_one(ctx->child,
+				(unsigned char *)out, (unsigned char *)in);
+			ctx->__vmac_ctx.l3key[i] = be64_to_cpup(out);
+			ctx->__vmac_ctx.l3key[i+1] = be64_to_cpup(out+1);
+			((unsigned char *)in)[15] += 1;
+		} while (ctx->__vmac_ctx.l3key[i] >= p64
+			|| ctx->__vmac_ctx.l3key[i+1] >= p64);
+	}
+
+	/* Invalidate nonce/aes cache and reset other elements */
+	ctx->__vmac_ctx.cached_nonce[0] = (u64)-1; /* Ensure illegal nonce */
+	ctx->__vmac_ctx.cached_nonce[1] = (u64)0;  /* Ensure illegal nonce */
+	ctx->__vmac_ctx.first_block_processed = 0;
+
+	return err;
+}
+
+static int vmac_setkey(struct crypto_shash *parent,
+		const u8 *key, unsigned int keylen)
+{
+	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
+
+	if (keylen != VMAC_KEY_LEN) {
+		crypto_shash_set_flags(parent, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return vmac_set_key((u8 *)key, ctx);
+}
+
+static int vmac_init(struct shash_desc *pdesc)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
+
+	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
+	return 0;
+}
+
+static int vmac_update(struct shash_desc *pdesc, const u8 *p,
+		unsigned int len)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
+
+	vhash_update(p, len, &ctx->__vmac_ctx);
+
+	return 0;
+}
+
+static int vmac_final(struct shash_desc *pdesc, u8 *out)
+{
+	struct crypto_shash *parent = pdesc->tfm;
+	struct vmac_ctx_t *ctx = crypto_shash_ctx(parent);
+	vmac_t mac;
+	u8 nonce[16] = {};
+
+	mac = vmac(NULL, 0, nonce, NULL, ctx);
+	memcpy(out, &mac, sizeof(vmac_t));
+	memset(&mac, 0, sizeof(vmac_t));
+	memset(&ctx->__vmac_ctx, 0, sizeof(struct vmac_ctx));
+	return 0;
+}
+
+static int vmac_init_tfm(struct crypto_tfm *tfm)
+{
+	struct crypto_cipher *cipher;
+	struct crypto_instance *inst = (void *)tfm->__crt_alg;
+	struct crypto_spawn *spawn = crypto_instance_ctx(inst);
+	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
+
+	cipher = crypto_spawn_cipher(spawn);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	ctx->child = cipher;
+	return 0;
+}
+
+static void vmac_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct vmac_ctx_t *ctx = crypto_tfm_ctx(tfm);
+	crypto_free_cipher(ctx->child);
+}
+
+static int vmac_create(struct crypto_template *tmpl, struct rtattr **tb)
+{
+	struct shash_instance *inst;
+	struct crypto_alg *alg;
+	int err;
+
+	err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH);
+	if (err)
+		return err;
+
+	alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_CIPHER,
+			CRYPTO_ALG_TYPE_MASK);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	inst = shash_alloc_instance("vmac", alg);
+	err = PTR_ERR(inst);
+	if (IS_ERR(inst))
+		goto out_put_alg;
+
+	err = crypto_init_spawn(shash_instance_ctx(inst), alg,
+			shash_crypto_instance(inst),
+			CRYPTO_ALG_TYPE_MASK);
+	if (err)
+		goto out_free_inst;
+
+	inst->alg.base.cra_priority = alg->cra_priority;
+	inst->alg.base.cra_blocksize = alg->cra_blocksize;
+	inst->alg.base.cra_alignmask = alg->cra_alignmask;
+
+	inst->alg.digestsize = sizeof(vmac_t);
+	inst->alg.base.cra_ctxsize = sizeof(struct vmac_ctx_t);
+	inst->alg.base.cra_init = vmac_init_tfm;
+	inst->alg.base.cra_exit = vmac_exit_tfm;
+
+	inst->alg.init = vmac_init;
+	inst->alg.update = vmac_update;
+	inst->alg.final = vmac_final;
+	inst->alg.setkey = vmac_setkey;
+
+	err = shash_register_instance(tmpl, inst);
+	if (err) {
+out_free_inst:
+		shash_free_instance(shash_crypto_instance(inst));
+	}
+
+out_put_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static struct crypto_template vmac_tmpl = {
+	.name = "vmac",
+	.create = vmac_create,
+	.free = shash_free_instance,
+	.module = THIS_MODULE,
+};
+
+static int __init vmac_module_init(void)
+{
+	return crypto_register_template(&vmac_tmpl);
+}
+
+static void __exit vmac_module_exit(void)
+{
+	crypto_unregister_template(&vmac_tmpl);
+}
+
+module_init(vmac_module_init);
+module_exit(vmac_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VMAC hash algorithm");
+
diff --git a/include/crypto/vmac.h b/include/crypto/vmac.h
new file mode 100644
index 000000000000..c4467c55df1e
--- /dev/null
+++ b/include/crypto/vmac.h
@@ -0,0 +1,61 @@
+/*
+ * Modified to interface to the Linux kernel
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __CRYPTO_VMAC_H
+#define __CRYPTO_VMAC_H
+
+/* --------------------------------------------------------------------------
+ * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ * This implementation is herby placed in the public domain.
+ * The authors offers no warranty. Use at your own risk.
+ * Please send bug reports to the authors.
+ * Last modified: 17 APR 08, 1700 PDT
+ * ----------------------------------------------------------------------- */
+
+/*
+ * User definable settings.
+ */
+#define VMAC_TAG_LEN	64
+#define VMAC_KEY_SIZE	128/* Must be 128, 192 or 256			*/
+#define VMAC_KEY_LEN	(VMAC_KEY_SIZE/8)
+#define VMAC_NHBYTES	128/* Must 2^i for any 3 < i < 13 Standard = 128*/
+
+/*
+ * This implementation uses u32 and u64 as names for unsigned 32-
+ * and 64-bit integer types. These are defined in C99 stdint.h. The
+ * following may need adaptation if you are not running a C99 or
+ * Microsoft C environment.
+ */
+struct vmac_ctx {
+	u64 nhkey[(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
+	u64 polykey[2*VMAC_TAG_LEN/64];
+	u64 l3key[2*VMAC_TAG_LEN/64];
+	u64 polytmp[2*VMAC_TAG_LEN/64];
+	u64 cached_nonce[2];
+	u64 cached_aes[2];
+	int first_block_processed;
+};
+
+typedef u64 vmac_t;
+
+struct vmac_ctx_t {
+	struct crypto_cipher *child;
+	struct vmac_ctx __vmac_ctx;
+};
+
+#endif /* __CRYPTO_VMAC_H */
-- 
cgit v1.2.3


From e0e817392b9acf2c98d3be80c233dddb1b52003d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Sep 2009 09:13:40 +0100
Subject: CRED: Add some configurable debugging [try #6]

Add a config option (CONFIG_DEBUG_CREDENTIALS) to turn on some debug checking
for credential management.  The additional code keeps track of the number of
pointers from task_structs to any given cred struct, and checks to see that
this number never exceeds the usage count of the cred struct (which includes
all references, not just those from task_structs).

Furthermore, if SELinux is enabled, the code also checks that the security
pointer in the cred struct is never seen to be invalid.

This attempts to catch the bug whereby inode_has_perm() faults in an nfsd
kernel thread on seeing cred->security be a NULL pointer (it appears that the
credential struct has been previously released):

	http://www.kerneloops.org/oops.php?number=252883

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/nfsd/auth.c           |   4 +
 fs/nfsd/nfssvc.c         |   2 +
 fs/nfsd/vfs.c            |   3 +
 fs/open.c                |   2 +
 include/linux/cred.h     |  65 +++++++++++-
 kernel/cred.c            | 250 +++++++++++++++++++++++++++++++++++++++++++++--
 kernel/exit.c            |   4 +
 kernel/fork.c            |   6 +-
 kernel/kmod.c            |   1 +
 lib/Kconfig.debug        |  15 +++
 security/selinux/hooks.c |   6 +-
 11 files changed, 346 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 5573508f707f..36fcabbf5186 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -34,6 +34,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	int flags = nfsexp_flags(rqstp, exp);
 	int ret;
 
+	validate_process_creds();
+
 	/* discard any old override before preparing the new set */
 	revert_creds(get_cred(current->real_cred));
 	new = prepare_creds();
@@ -86,8 +88,10 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
 	else
 		new->cap_effective = cap_raise_nfsd_set(new->cap_effective,
 							new->cap_permitted);
+	validate_process_creds();
 	put_cred(override_creds(new));
 	put_cred(new);
+	validate_process_creds();
 	return 0;
 
 oom:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 492c79b7800b..24d58adfe5fd 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -496,7 +496,9 @@ nfsd(void *vrqstp)
 		/* Lock the export hash tables for reading. */
 		exp_readlock();
 
+		validate_process_creds();
 		svc_process(rqstp);
+		validate_process_creds();
 
 		/* Unlock export hash tables */
 		exp_readunlock();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23341c1063bc..8fa09bfbcba7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -684,6 +684,8 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	__be32		err;
 	int		host_err;
 
+	validate_process_creds();
+
 	/*
 	 * If we get here, then the client has already done an "open",
 	 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
@@ -740,6 +742,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 out_nfserr:
 	err = nfserrno(host_err);
 out:
+	validate_process_creds();
 	return err;
 }
 
diff --git a/fs/open.c b/fs/open.c
index 40d1fa25f5aa..31191bf513e4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -959,6 +959,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
 	int error;
 	struct file *f;
 
+	validate_creds(cred);
+
 	/*
 	 * We must always pass in a valid mount pointer.   Historically
 	 * callers got away with not passing it, but we must enforce this at
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b3c76e815d66..85439abdbc80 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -114,6 +114,13 @@ struct thread_group_cred {
  */
 struct cred {
 	atomic_t	usage;
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	atomic_t	subscribers;	/* number of processes subscribed */
+	void		*put_addr;
+	unsigned	magic;
+#define CRED_MAGIC	0x43736564
+#define CRED_MAGIC_DEAD	0x44656144
+#endif
 	uid_t		uid;		/* real UID of the task */
 	gid_t		gid;		/* real GID of the task */
 	uid_t		suid;		/* saved UID of the task */
@@ -143,6 +150,7 @@ struct cred {
 };
 
 extern void __put_cred(struct cred *);
+extern void exit_creds(struct task_struct *);
 extern int copy_creds(struct task_struct *, unsigned long);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
@@ -158,6 +166,60 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
 extern int set_create_files_as(struct cred *, struct inode *);
 extern void __init cred_init(void);
 
+/*
+ * check for validity of credentials
+ */
+#ifdef CONFIG_DEBUG_CREDENTIALS
+extern void __invalid_creds(const struct cred *, const char *, unsigned);
+extern void __validate_process_creds(struct task_struct *,
+				     const char *, unsigned);
+
+static inline bool creds_are_invalid(const struct cred *cred)
+{
+	if (cred->magic != CRED_MAGIC)
+		return true;
+	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+		return true;
+#ifdef CONFIG_SECURITY_SELINUX
+	if ((unsigned long) cred->security < PAGE_SIZE)
+		return true;
+	if ((*(u32*)cred->security & 0xffffff00) ==
+	    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+		return true;
+#endif
+	return false;
+}
+
+static inline void __validate_creds(const struct cred *cred,
+				    const char *file, unsigned line)
+{
+	if (unlikely(creds_are_invalid(cred)))
+		__invalid_creds(cred, file, line);
+}
+
+#define validate_creds(cred)				\
+do {							\
+	__validate_creds((cred), __FILE__, __LINE__);	\
+} while(0)
+
+#define validate_process_creds()				\
+do {								\
+	__validate_process_creds(current, __FILE__, __LINE__);	\
+} while(0)
+
+extern void validate_creds_for_do_exit(struct task_struct *);
+#else
+static inline void validate_creds(const struct cred *cred)
+{
+}
+static inline void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+}
+static inline void validate_process_creds(void)
+{
+}
+#endif
+
 /**
  * get_new_cred - Get a reference on a new set of credentials
  * @cred: The new credentials to reference
@@ -187,6 +249,7 @@ static inline struct cred *get_new_cred(struct cred *cred)
 static inline const struct cred *get_cred(const struct cred *cred)
 {
 	struct cred *nonconst_cred = (struct cred *) cred;
+	validate_creds(cred);
 	return get_new_cred(nonconst_cred);
 }
 
@@ -205,7 +268,7 @@ static inline void put_cred(const struct cred *_cred)
 {
 	struct cred *cred = (struct cred *) _cred;
 
-	BUG_ON(atomic_read(&(cred)->usage) <= 0);
+	validate_creds(cred);
 	if (atomic_dec_and_test(&(cred)->usage))
 		__put_cred(cred);
 }
diff --git a/kernel/cred.c b/kernel/cred.c
index 1bb4d7e5d616..24dd2f5104b1 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -18,6 +18,18 @@
 #include <linux/cn_proc.h>
 #include "cred-internals.h"
 
+#if 0
+#define kdebug(FMT, ...) \
+	printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#else
+static inline __attribute__((format(printf, 1, 2)))
+void no_printk(const char *fmt, ...)
+{
+}
+#define kdebug(FMT, ...) \
+	no_printk("[%-5.5s%5u] "FMT"\n", current->comm, current->pid ,##__VA_ARGS__)
+#endif
+
 static struct kmem_cache *cred_jar;
 
 /*
@@ -36,6 +48,10 @@ static struct thread_group_cred init_tgcred = {
  */
 struct cred init_cred = {
 	.usage			= ATOMIC_INIT(4),
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	.subscribers		= ATOMIC_INIT(2),
+	.magic			= CRED_MAGIC,
+#endif
 	.securebits		= SECUREBITS_DEFAULT,
 	.cap_inheritable	= CAP_INIT_INH_SET,
 	.cap_permitted		= CAP_FULL_SET,
@@ -48,6 +64,31 @@ struct cred init_cred = {
 #endif
 };
 
+static inline void set_cred_subscribers(struct cred *cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	atomic_set(&cred->subscribers, n);
+#endif
+}
+
+static inline int read_cred_subscribers(const struct cred *cred)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	return atomic_read(&cred->subscribers);
+#else
+	return 0;
+#endif
+}
+
+static inline void alter_cred_subscribers(const struct cred *_cred, int n)
+{
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	struct cred *cred = (struct cred *) _cred;
+
+	atomic_add(n, &cred->subscribers);
+#endif
+}
+
 /*
  * Dispose of the shared task group credentials
  */
@@ -85,9 +126,22 @@ static void put_cred_rcu(struct rcu_head *rcu)
 {
 	struct cred *cred = container_of(rcu, struct cred, rcu);
 
+	kdebug("put_cred_rcu(%p)", cred);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	if (cred->magic != CRED_MAGIC_DEAD ||
+	    atomic_read(&cred->usage) != 0 ||
+	    read_cred_subscribers(cred) != 0)
+		panic("CRED: put_cred_rcu() sees %p with"
+		      " mag %x, put %p, usage %d, subscr %d\n",
+		      cred, cred->magic, cred->put_addr,
+		      atomic_read(&cred->usage),
+		      read_cred_subscribers(cred));
+#else
 	if (atomic_read(&cred->usage) != 0)
 		panic("CRED: put_cred_rcu() sees %p with usage %d\n",
 		      cred, atomic_read(&cred->usage));
+#endif
 
 	security_cred_free(cred);
 	key_put(cred->thread_keyring);
@@ -106,12 +160,47 @@ static void put_cred_rcu(struct rcu_head *rcu)
  */
 void __put_cred(struct cred *cred)
 {
+	kdebug("__put_cred(%p{%d,%d})", cred,
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+
 	BUG_ON(atomic_read(&cred->usage) != 0);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(cred) != 0);
+	cred->magic = CRED_MAGIC_DEAD;
+	cred->put_addr = __builtin_return_address(0);
+#endif
+	BUG_ON(cred == current->cred);
+	BUG_ON(cred == current->real_cred);
 
 	call_rcu(&cred->rcu, put_cred_rcu);
 }
 EXPORT_SYMBOL(__put_cred);
 
+/*
+ * Clean up a task's credentials when it exits
+ */
+void exit_creds(struct task_struct *tsk)
+{
+	struct cred *cred;
+
+	kdebug("exit_creds(%u,%p,%p,{%d,%d})", tsk->pid, tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	cred = (struct cred *) tsk->real_cred;
+	tsk->real_cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+
+	cred = (struct cred *) tsk->cred;
+	tsk->cred = NULL;
+	validate_creds(cred);
+	alter_cred_subscribers(cred, -1);
+	put_cred(cred);
+}
+
 /**
  * prepare_creds - Prepare a new set of credentials for modification
  *
@@ -132,16 +221,19 @@ struct cred *prepare_creds(void)
 	const struct cred *old;
 	struct cred *new;
 
-	BUG_ON(atomic_read(&task->real_cred->usage) < 1);
+	validate_process_creds();
 
 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_creds() alloc %p", new);
+
 	old = task->cred;
 	memcpy(new, old, sizeof(struct cred));
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
 	get_uid(new->user);
 
@@ -157,6 +249,7 @@ struct cred *prepare_creds(void)
 
 	if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
 		goto error;
+	validate_creds(new);
 	return new;
 
 error:
@@ -229,9 +322,12 @@ struct cred *prepare_usermodehelper_creds(void)
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_usermodehelper_creds() alloc %p", new);
+
 	memcpy(new, &init_cred, sizeof(struct cred));
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	get_group_info(new->group_info);
 	get_uid(new->user);
 
@@ -250,6 +346,7 @@ struct cred *prepare_usermodehelper_creds(void)
 #endif
 	if (security_prepare_creds(new, &init_cred, GFP_ATOMIC) < 0)
 		goto error;
+	validate_creds(new);
 
 	BUG_ON(atomic_read(&new->usage) != 1);
 	return new;
@@ -286,6 +383,10 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	    ) {
 		p->real_cred = get_cred(p->cred);
 		get_cred(p->cred);
+		alter_cred_subscribers(p->cred, 2);
+		kdebug("share_creds(%p{%d,%d})",
+		       p->cred, atomic_read(&p->cred->usage),
+		       read_cred_subscribers(p->cred));
 		atomic_inc(&p->cred->user->processes);
 		return 0;
 	}
@@ -331,6 +432,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 
 	atomic_inc(&new->user->processes);
 	p->cred = p->real_cred = get_cred(new);
+	alter_cred_subscribers(new, 2);
+	validate_creds(new);
 	return 0;
 
 error_put:
@@ -355,13 +458,20 @@ error_put:
 int commit_creds(struct cred *new)
 {
 	struct task_struct *task = current;
-	const struct cred *old;
+	const struct cred *old = task->real_cred;
 
-	BUG_ON(task->cred != task->real_cred);
-	BUG_ON(atomic_read(&task->real_cred->usage) < 2);
+	kdebug("commit_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	BUG_ON(task->cred != old);
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(old) < 2);
+	validate_creds(old);
+	validate_creds(new);
+#endif
 	BUG_ON(atomic_read(&new->usage) < 1);
 
-	old = task->real_cred;
 	security_commit_creds(new, old);
 
 	get_cred(new); /* we will require a ref for the subj creds too */
@@ -390,12 +500,14 @@ int commit_creds(struct cred *new)
 	 *   cheaply with the new uid cache, so if it matters
 	 *   we should be checking for it.  -DaveM
 	 */
+	alter_cred_subscribers(new, 2);
 	if (new->user != old->user)
 		atomic_inc(&new->user->processes);
 	rcu_assign_pointer(task->real_cred, new);
 	rcu_assign_pointer(task->cred, new);
 	if (new->user != old->user)
 		atomic_dec(&old->user->processes);
+	alter_cred_subscribers(old, -2);
 
 	sched_switch_user(task);
 
@@ -428,6 +540,13 @@ EXPORT_SYMBOL(commit_creds);
  */
 void abort_creds(struct cred *new)
 {
+	kdebug("abort_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	BUG_ON(read_cred_subscribers(new) != 0);
+#endif
 	BUG_ON(atomic_read(&new->usage) < 1);
 	put_cred(new);
 }
@@ -444,7 +563,20 @@ const struct cred *override_creds(const struct cred *new)
 {
 	const struct cred *old = current->cred;
 
-	rcu_assign_pointer(current->cred, get_cred(new));
+	kdebug("override_creds(%p{%d,%d})", new,
+	       atomic_read(&new->usage),
+	       read_cred_subscribers(new));
+
+	validate_creds(old);
+	validate_creds(new);
+	get_cred(new);
+	alter_cred_subscribers(new, 1);
+	rcu_assign_pointer(current->cred, new);
+	alter_cred_subscribers(old, -1);
+
+	kdebug("override_creds() = %p{%d,%d}", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
 	return old;
 }
 EXPORT_SYMBOL(override_creds);
@@ -460,7 +592,15 @@ void revert_creds(const struct cred *old)
 {
 	const struct cred *override = current->cred;
 
+	kdebug("revert_creds(%p{%d,%d})", old,
+	       atomic_read(&old->usage),
+	       read_cred_subscribers(old));
+
+	validate_creds(old);
+	validate_creds(override);
+	alter_cred_subscribers(old, 1);
 	rcu_assign_pointer(current->cred, old);
+	alter_cred_subscribers(override, -1);
 	put_cred(override);
 }
 EXPORT_SYMBOL(revert_creds);
@@ -502,11 +642,15 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	if (!new)
 		return NULL;
 
+	kdebug("prepare_kernel_cred() alloc %p", new);
+
 	if (daemon)
 		old = get_task_cred(daemon);
 	else
 		old = get_cred(&init_cred);
 
+	validate_creds(old);
+
 	*new = *old;
 	get_uid(new->user);
 	get_group_info(new->group_info);
@@ -526,7 +670,9 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 		goto error;
 
 	atomic_set(&new->usage, 1);
+	set_cred_subscribers(new, 0);
 	put_cred(old);
+	validate_creds(new);
 	return new;
 
 error:
@@ -589,3 +735,95 @@ int set_create_files_as(struct cred *new, struct inode *inode)
 	return security_kernel_create_files_as(new, inode);
 }
 EXPORT_SYMBOL(set_create_files_as);
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+
+/*
+ * dump invalid credentials
+ */
+static void dump_invalid_creds(const struct cred *cred, const char *label,
+			       const struct task_struct *tsk)
+{
+	printk(KERN_ERR "CRED: %s credentials: %p %s%s%s\n",
+	       label, cred,
+	       cred == &init_cred ? "[init]" : "",
+	       cred == tsk->real_cred ? "[real]" : "",
+	       cred == tsk->cred ? "[eff]" : "");
+	printk(KERN_ERR "CRED: ->magic=%x, put_addr=%p\n",
+	       cred->magic, cred->put_addr);
+	printk(KERN_ERR "CRED: ->usage=%d, subscr=%d\n",
+	       atomic_read(&cred->usage),
+	       read_cred_subscribers(cred));
+	printk(KERN_ERR "CRED: ->*uid = { %d,%d,%d,%d }\n",
+	       cred->uid, cred->euid, cred->suid, cred->fsuid);
+	printk(KERN_ERR "CRED: ->*gid = { %d,%d,%d,%d }\n",
+	       cred->gid, cred->egid, cred->sgid, cred->fsgid);
+#ifdef CONFIG_SECURITY
+	printk(KERN_ERR "CRED: ->security is %p\n", cred->security);
+	if ((unsigned long) cred->security >= PAGE_SIZE &&
+	    (((unsigned long) cred->security & 0xffffff00) !=
+	     (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8)))
+		printk(KERN_ERR "CRED: ->security {%x, %x}\n",
+		       ((u32*)cred->security)[0],
+		       ((u32*)cred->security)[1]);
+#endif
+}
+
+/*
+ * report use of invalid credentials
+ */
+void __invalid_creds(const struct cred *cred, const char *file, unsigned line)
+{
+	printk(KERN_ERR "CRED: Invalid credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+	dump_invalid_creds(cred, "Specified", current);
+	BUG();
+}
+EXPORT_SYMBOL(__invalid_creds);
+
+/*
+ * check the credentials on a process
+ */
+void __validate_process_creds(struct task_struct *tsk,
+			      const char *file, unsigned line)
+{
+	if (tsk->cred == tsk->real_cred) {
+		if (unlikely(read_cred_subscribers(tsk->cred) < 2 ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	} else {
+		if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 ||
+			     read_cred_subscribers(tsk->cred) < 1 ||
+			     creds_are_invalid(tsk->real_cred) ||
+			     creds_are_invalid(tsk->cred)))
+			goto invalid_creds;
+	}
+	return;
+
+invalid_creds:
+	printk(KERN_ERR "CRED: Invalid process credentials\n");
+	printk(KERN_ERR "CRED: At %s:%u\n", file, line);
+
+	dump_invalid_creds(tsk->real_cred, "Real", tsk);
+	if (tsk->cred != tsk->real_cred)
+		dump_invalid_creds(tsk->cred, "Effective", tsk);
+	else
+		printk(KERN_ERR "CRED: Effective creds == Real creds\n");
+	BUG();
+}
+EXPORT_SYMBOL(__validate_process_creds);
+
+/*
+ * check creds for do_exit()
+ */
+void validate_creds_for_do_exit(struct task_struct *tsk)
+{
+	kdebug("validate_creds_for_do_exit(%p,%p{%d,%d})",
+	       tsk->real_cred, tsk->cred,
+	       atomic_read(&tsk->cred->usage),
+	       read_cred_subscribers(tsk->cred));
+
+	__validate_process_creds(tsk, __FILE__, __LINE__);
+}
+
+#endif /* CONFIG_DEBUG_CREDENTIALS */
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..c98ff7a8025f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -901,6 +901,8 @@ NORET_TYPE void do_exit(long code)
 
 	tracehook_report_exit(&code);
 
+	validate_creds_for_do_exit(tsk);
+
 	/*
 	 * We're taking recursive faults here in do_exit. Safest is to just
 	 * leave this task alone and wait for reboot.
@@ -1009,6 +1011,8 @@ NORET_TYPE void do_exit(long code)
 	if (tsk->splice_pipe)
 		__free_pipe_info(tsk->splice_pipe);
 
+	validate_creds_for_do_exit(tsk);
+
 	preempt_disable();
 	/* causes final put_task_struct in finish_task_switch(). */
 	tsk->state = TASK_DEAD;
diff --git a/kernel/fork.c b/kernel/fork.c
index 144326b7af50..043b5d88049b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -152,8 +152,7 @@ void __put_task_struct(struct task_struct *tsk)
 	WARN_ON(atomic_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
-	put_cred(tsk->real_cred);
-	put_cred(tsk->cred);
+	exit_creds(tsk);
 	delayacct_tsk_free(tsk);
 
 	if (!profile_handoff_task(tsk))
@@ -1307,8 +1306,7 @@ bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
-	put_cred(p->real_cred);
-	put_cred(p->cred);
+	exit_creds(p);
 bad_fork_free:
 	free_task(p);
 fork_out:
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 5a7ae57f983f..4e8cae2e9148 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -466,6 +466,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info,
 	int retval = 0;
 
 	BUG_ON(atomic_read(&sub_info->cred->usage) != 1);
+	validate_creds(sub_info->cred);
 
 	helper_lock();
 	if (sub_info->path[0] == '\0')
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 12327b2bb785..fbb87cf138c5 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -653,6 +653,21 @@ config DEBUG_NOTIFIERS
 	  This is a relatively cheap check but if you care about maximum
 	  performance, say N.
 
+config DEBUG_CREDENTIALS
+	bool "Debug credential management"
+	depends on DEBUG_KERNEL
+	help
+	  Enable this to turn on some debug checking for credential
+	  management.  The additional code keeps track of the number of
+	  pointers from task_structs to any given cred struct, and checks to
+	  see that this number never exceeds the usage count of the cred
+	  struct.
+
+	  Furthermore, if SELinux is enabled, this also checks that the
+	  security pointer in the cred struct is never seen to be invalid.
+
+	  If unsure, say N.
+
 #
 # Select this config option from the architecture Kconfig, if it
 # it is preferred to always offer frame pointers as a config
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 27b4c5527358..c3bb31ecc5aa 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1531,6 +1531,8 @@ static int inode_has_perm(const struct cred *cred,
 	struct common_audit_data ad;
 	u32 sid;
 
+	validate_creds(cred);
+
 	if (unlikely(IS_PRIVATE(inode)))
 		return 0;
 
@@ -3236,7 +3238,9 @@ static int selinux_task_create(unsigned long clone_flags)
 static void selinux_cred_free(struct cred *cred)
 {
 	struct task_security_struct *tsec = cred->security;
-	cred->security = NULL;
+
+	BUG_ON((unsigned long) cred->security < PAGE_SIZE);
+	cred->security = (void *) 0x7UL;
 	kfree(tsec);
 }
 
-- 
cgit v1.2.3


From 5d135440faf7db8d566de0c6fab36b16cf9cfc3b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Sep 2009 09:14:00 +0100
Subject: KEYS: Add garbage collection for dead, revoked and expired keys. [try
 #6]

Add garbage collection for dead, revoked and expired keys.  This involved
erasing all links to such keys from keyrings that point to them.  At that
point, the key will be deleted in the normal manner.

Keyrings from which garbage collection occurs are shrunk and their quota
consumption reduced as appropriate.

Dead keys (for which the key type has been removed) will be garbage collected
immediately.

Revoked and expired keys will hang around for a number of seconds, as set in
/proc/sys/kernel/keys/gc_delay before being automatically removed.  The default
is 5 minutes.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/keys.txt   |  19 ++++-
 include/linux/key.h      |   5 +-
 security/keys/Makefile   |   1 +
 security/keys/gc.c       | 193 +++++++++++++++++++++++++++++++++++++++++++++++
 security/keys/internal.h |   4 +
 security/keys/key.c      |  14 ++++
 security/keys/keyctl.c   |   1 +
 security/keys/keyring.c  |  85 +++++++++++++++++++++
 security/keys/sysctl.c   |  28 ++++++-
 9 files changed, 344 insertions(+), 6 deletions(-)
 create mode 100644 security/keys/gc.c

(limited to 'include')

diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index b56aacc1fff8..203487e9b1d8 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -26,7 +26,7 @@ This document has the following sections:
 	- Notes on accessing payload contents
 	- Defining a key type
 	- Request-key callback service
-	- Key access filesystem
+	- Garbage collection
 
 
 ============
@@ -113,6 +113,9 @@ Each key has a number of attributes:
 
      (*) Dead. The key's type was unregistered, and so the key is now useless.
 
+Keys in the last three states are subject to garbage collection.  See the
+section on "Garbage collection".
+
 
 ====================
 KEY SERVICE OVERVIEW
@@ -1231,3 +1234,17 @@ by executing:
 
 In this case, the program isn't required to actually attach the key to a ring;
 the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed.  This time is set as a number of seconds in:
+
+	/proc/sys/kernel/keys/gc_delay
diff --git a/include/linux/key.h b/include/linux/key.h
index e544f466d69a..33e0165de100 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -129,7 +129,10 @@ struct key {
 	struct rw_semaphore	sem;		/* change vs change sem */
 	struct key_user		*user;		/* owner of this key */
 	void			*security;	/* security data for this key */
-	time_t			expiry;		/* time at which key expires (or 0) */
+	union {
+		time_t		expiry;		/* time at which key expires (or 0) */
+		time_t		revoked_at;	/* time at which key was revoked */
+	};
 	uid_t			uid;
 	gid_t			gid;
 	key_perm_t		perm;		/* access permissions */
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 747a464943af..74d5447d7df7 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-y := \
+	gc.o \
 	key.o \
 	keyring.o \
 	keyctl.o \
diff --git a/security/keys/gc.c b/security/keys/gc.c
new file mode 100644
index 000000000000..44adc325e15c
--- /dev/null
+++ b/security/keys/gc.c
@@ -0,0 +1,193 @@
+/* Key garbage collector
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <keys/keyring-type.h>
+#include "internal.h"
+
+/*
+ * Delay between key revocation/expiry in seconds
+ */
+unsigned key_gc_delay = 5 * 60;
+
+/*
+ * Reaper
+ */
+static void key_gc_timer_func(unsigned long);
+static void key_garbage_collector(struct work_struct *);
+static DEFINE_TIMER(key_gc_timer, key_gc_timer_func, 0, 0);
+static DECLARE_WORK(key_gc_work, key_garbage_collector);
+static key_serial_t key_gc_cursor; /* the last key the gc considered */
+static unsigned long key_gc_executing;
+static time_t key_gc_next_run = LONG_MAX;
+
+/*
+ * Schedule a garbage collection run
+ * - precision isn't particularly important
+ */
+void key_schedule_gc(time_t gc_at)
+{
+	unsigned long expires;
+	time_t now = current_kernel_time().tv_sec;
+
+	kenter("%ld", gc_at - now);
+
+	gc_at += key_gc_delay;
+
+	if (now >= gc_at) {
+		schedule_work(&key_gc_work);
+	} else if (gc_at < key_gc_next_run) {
+		expires = jiffies + (gc_at - now) * HZ;
+		mod_timer(&key_gc_timer, expires);
+	}
+}
+
+/*
+ * The garbage collector timer kicked off
+ */
+static void key_gc_timer_func(unsigned long data)
+{
+	kenter("");
+	key_gc_next_run = LONG_MAX;
+	schedule_work(&key_gc_work);
+}
+
+/*
+ * Garbage collect pointers from a keyring
+ * - return true if we altered the keyring
+ */
+static bool key_gc_keyring(struct key *keyring, time_t limit)
+{
+	struct keyring_list *klist;
+	struct key *key;
+	int loop;
+
+	kenter("%x", key_serial(keyring));
+
+	if (test_bit(KEY_FLAG_REVOKED, &keyring->flags))
+		goto dont_gc;
+
+	/* scan the keyring looking for dead keys */
+	klist = rcu_dereference(keyring->payload.subscriptions);
+	if (!klist)
+		goto dont_gc;
+
+	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+		key = klist->keys[loop];
+		if (test_bit(KEY_FLAG_DEAD, &key->flags) ||
+		    (key->expiry > 0 && key->expiry <= limit))
+			goto do_gc;
+	}
+
+dont_gc:
+	kleave(" = false");
+	return false;
+
+do_gc:
+	key_gc_cursor = keyring->serial;
+	key_get(keyring);
+	spin_unlock(&key_serial_lock);
+	keyring_gc(keyring, limit);
+	key_put(keyring);
+	kleave(" = true");
+	return true;
+}
+
+/*
+ * Garbage collector for keys
+ * - this involves scanning the keyrings for dead, expired and revoked keys
+ *   that have overstayed their welcome
+ */
+static void key_garbage_collector(struct work_struct *work)
+{
+	struct rb_node *rb;
+	key_serial_t cursor;
+	struct key *key, *xkey;
+	time_t new_timer = LONG_MAX, limit;
+
+	kenter("");
+
+	if (test_and_set_bit(0, &key_gc_executing)) {
+		key_schedule_gc(current_kernel_time().tv_sec);
+		return;
+	}
+
+	limit = current_kernel_time().tv_sec;
+	if (limit > key_gc_delay)
+		limit -= key_gc_delay;
+	else
+		limit = key_gc_delay;
+
+	spin_lock(&key_serial_lock);
+
+	if (RB_EMPTY_ROOT(&key_serial_tree))
+		goto reached_the_end;
+
+	cursor = key_gc_cursor;
+	if (cursor < 0)
+		cursor = 0;
+
+	/* find the first key above the cursor */
+	key = NULL;
+	rb = key_serial_tree.rb_node;
+	while (rb) {
+		xkey = rb_entry(rb, struct key, serial_node);
+		if (cursor < xkey->serial) {
+			key = xkey;
+			rb = rb->rb_left;
+		} else if (cursor > xkey->serial) {
+			rb = rb->rb_right;
+		} else {
+			rb = rb_next(rb);
+			if (!rb)
+				goto reached_the_end;
+			key = rb_entry(rb, struct key, serial_node);
+			break;
+		}
+	}
+
+	if (!key)
+		goto reached_the_end;
+
+	/* trawl through the keys looking for keyrings */
+	for (;;) {
+		if (key->expiry > 0 && key->expiry < new_timer)
+			new_timer = key->expiry;
+
+		if (key->type == &key_type_keyring &&
+		    key_gc_keyring(key, limit)) {
+			/* the gc ate our lock */
+			schedule_work(&key_gc_work);
+			goto no_unlock;
+		}
+
+		rb = rb_next(&key->serial_node);
+		if (!rb) {
+			key_gc_cursor = 0;
+			break;
+		}
+		key = rb_entry(rb, struct key, serial_node);
+	}
+
+out:
+	spin_unlock(&key_serial_lock);
+no_unlock:
+	clear_bit(0, &key_gc_executing);
+	if (new_timer < LONG_MAX)
+		key_schedule_gc(new_timer);
+
+	kleave("");
+	return;
+
+reached_the_end:
+	key_gc_cursor = 0;
+	goto out;
+}
diff --git a/security/keys/internal.h b/security/keys/internal.h
index a7252e7b2e05..fb830514c337 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -132,6 +132,10 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 
 extern long join_session_keyring(const char *name);
 
+extern unsigned key_gc_delay;
+extern void keyring_gc(struct key *keyring, time_t limit);
+extern void key_schedule_gc(time_t expiry_at);
+
 /*
  * check to see whether permission is granted to use a key in the desired way
  */
diff --git a/security/keys/key.c b/security/keys/key.c
index bd9d2670e9c4..08531ad0f252 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -500,6 +500,7 @@ int key_negate_and_link(struct key *key,
 		set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
 		now = current_kernel_time();
 		key->expiry = now.tv_sec + timeout;
+		key_schedule_gc(key->expiry);
 
 		if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags))
 			awaken = 1;
@@ -888,6 +889,9 @@ EXPORT_SYMBOL(key_update);
  */
 void key_revoke(struct key *key)
 {
+	struct timespec now;
+	time_t time;
+
 	key_check(key);
 
 	/* make sure no one's trying to change or use the key when we mark it
@@ -900,6 +904,14 @@ void key_revoke(struct key *key)
 	    key->type->revoke)
 		key->type->revoke(key);
 
+	/* set the death time to no more than the expiry time */
+	now = current_kernel_time();
+	time = now.tv_sec;
+	if (key->revoked_at == 0 || key->revoked_at > time) {
+		key->revoked_at = time;
+		key_schedule_gc(key->revoked_at);
+	}
+
 	up_write(&key->sem);
 
 } /* end key_revoke() */
@@ -984,6 +996,8 @@ void unregister_key_type(struct key_type *ktype)
 	spin_unlock(&key_serial_lock);
 	up_write(&key_types_sem);
 
+	key_schedule_gc(0);
+
 } /* end unregister_key_type() */
 
 EXPORT_SYMBOL(unregister_key_type);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 1160b644dace..736d7800f97f 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1115,6 +1115,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout)
 	}
 
 	key->expiry = expiry;
+	key_schedule_gc(key->expiry);
 
 	up_write(&key->sem);
 	key_put(key);
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 3dba81c2eba3..ac977f661a79 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -1000,3 +1000,88 @@ static void keyring_revoke(struct key *keyring)
 	}
 
 } /* end keyring_revoke() */
+
+/*
+ * Determine whether a key is dead
+ */
+static bool key_is_dead(struct key *key, time_t limit)
+{
+	return test_bit(KEY_FLAG_DEAD, &key->flags) ||
+		(key->expiry > 0 && key->expiry <= limit);
+}
+
+/*
+ * Collect garbage from the contents of a keyring
+ */
+void keyring_gc(struct key *keyring, time_t limit)
+{
+	struct keyring_list *klist, *new;
+	struct key *key;
+	int loop, keep, max;
+
+	kenter("%x", key_serial(keyring));
+
+	down_write(&keyring->sem);
+
+	klist = keyring->payload.subscriptions;
+	if (!klist)
+		goto just_return;
+
+	/* work out how many subscriptions we're keeping */
+	keep = 0;
+	for (loop = klist->nkeys - 1; loop >= 0; loop--)
+		if (!key_is_dead(klist->keys[loop], limit));
+			keep++;
+
+	if (keep == klist->nkeys)
+		goto just_return;
+
+	/* allocate a new keyring payload */
+	max = roundup(keep, 4);
+	new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *),
+		      GFP_KERNEL);
+	if (!new)
+		goto just_return;
+	new->maxkeys = max;
+	new->nkeys = 0;
+	new->delkey = 0;
+
+	/* install the live keys
+	 * - must take care as expired keys may be updated back to life
+	 */
+	keep = 0;
+	for (loop = klist->nkeys - 1; loop >= 0; loop--) {
+		key = klist->keys[loop];
+		if (!key_is_dead(key, limit)) {
+			if (keep >= max)
+				goto discard_new;
+			new->keys[keep++] = key_get(key);
+		}
+	}
+	new->nkeys = keep;
+
+	/* adjust the quota */
+	key_payload_reserve(keyring,
+			    sizeof(struct keyring_list) +
+			    KEYQUOTA_LINK_BYTES * keep);
+
+	if (keep == 0) {
+		rcu_assign_pointer(keyring->payload.subscriptions, NULL);
+		kfree(new);
+	} else {
+		rcu_assign_pointer(keyring->payload.subscriptions, new);
+	}
+
+	up_write(&keyring->sem);
+
+	call_rcu(&klist->rcu, keyring_clear_rcu_disposal);
+	kleave(" [yes]");
+	return;
+
+discard_new:
+	new->nkeys = keep;
+	keyring_clear_rcu_disposal(&new->rcu);
+just_return:
+	up_write(&keyring->sem);
+	kleave(" [no]");
+}
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index b611d493c2d8..5e05dc09e2db 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -13,6 +13,8 @@
 #include <linux/sysctl.h>
 #include "internal.h"
 
+static const int zero, one = 1, max = INT_MAX;
+
 ctl_table key_sysctls[] = {
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -20,7 +22,9 @@ ctl_table key_sysctls[] = {
 		.data = &key_quota_maxkeys,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -28,7 +32,9 @@ ctl_table key_sysctls[] = {
 		.data = &key_quota_maxbytes,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -36,7 +42,9 @@ ctl_table key_sysctls[] = {
 		.data = &key_quota_root_maxkeys,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
 	},
 	{
 		.ctl_name = CTL_UNNUMBERED,
@@ -44,7 +52,19 @@ ctl_table key_sysctls[] = {
 		.data = &key_quota_root_maxbytes,
 		.maxlen = sizeof(unsigned),
 		.mode = 0644,
-		.proc_handler = &proc_dointvec,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &one,
+		.extra2 = (void *) &max,
+	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "gc_delay",
+		.data = &key_gc_delay,
+		.maxlen = sizeof(unsigned),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (void *) &zero,
+		.extra2 = (void *) &max,
 	},
 	{ .ctl_name = 0 }
 };
-- 
cgit v1.2.3


From ee18d64c1f632043a02e6f5ba5e045bb26a5465f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Sep 2009 09:14:21 +0100
Subject: KEYS: Add a keyctl to install a process's session keyring on its
 parent [try #6]

Add a keyctl to install a process's session keyring onto its parent.  This
replaces the parent's session keyring.  Because the COW credential code does
not permit one process to change another process's credentials directly, the
change is deferred until userspace next starts executing again.  Normally this
will be after a wait*() syscall.

To support this, three new security hooks have been provided:
cred_alloc_blank() to allocate unset security creds, cred_transfer() to fill in
the blank security creds and key_session_to_parent() - which asks the LSM if
the process may replace its parent's session keyring.

The replacement may only happen if the process has the same ownership details
as its parent, and the process has LINK permission on the session keyring, and
the session keyring is owned by the process, and the LSM permits it.

Note that this requires alteration to each architecture's notify_resume path.
This has been done for all arches barring blackfin, m68k* and xtensa, all of
which need assembly alteration to support TIF_NOTIFY_RESUME.  This allows the
replacement to be performed at the point the parent process resumes userspace
execution.

This allows the userspace AFS pioctl emulation to fully emulate newpag() and
the VIOCSETTOK and VIOCSETTOK2 pioctls, all of which require the ability to
alter the parent process's PAG membership.  However, since kAFS doesn't use
PAGs per se, but rather dumps the keys into the session keyring, the session
keyring of the parent must be replaced if, for example, VIOCSETTOK is passed
the newpag flag.

This can be tested with the following program:

	#include <stdio.h>
	#include <stdlib.h>
	#include <keyutils.h>

	#define KEYCTL_SESSION_TO_PARENT	18

	#define OSERROR(X, S) do { if ((long)(X) == -1) { perror(S); exit(1); } } while(0)

	int main(int argc, char **argv)
	{
		key_serial_t keyring, key;
		long ret;

		keyring = keyctl_join_session_keyring(argv[1]);
		OSERROR(keyring, "keyctl_join_session_keyring");

		key = add_key("user", "a", "b", 1, keyring);
		OSERROR(key, "add_key");

		ret = keyctl(KEYCTL_SESSION_TO_PARENT);
		OSERROR(ret, "KEYCTL_SESSION_TO_PARENT");

		return 0;
	}

Compiled and linked with -lkeyutils, you should see something like:

	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: _ses
	355907932 --alswrv   4043    -1   \_ keyring: _uid.4043
	[dhowells@andromeda ~]$ /tmp/newpag
	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: _ses
	1055658746 --alswrv   4043  4043   \_ user: a
	[dhowells@andromeda ~]$ /tmp/newpag hello
	[dhowells@andromeda ~]$ keyctl show
	Session Keyring
	       -3 --alswrv   4043  4043  keyring: hello
	340417692 --alswrv   4043  4043   \_ user: a

Where the test program creates a new session keyring, sticks a user key named
'a' into it and then installs it on its parent.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 Documentation/keys.txt        |  20 +++++++++
 arch/alpha/kernel/signal.c    |   2 +
 arch/arm/kernel/signal.c      |   2 +
 arch/avr32/kernel/signal.c    |   2 +
 arch/cris/kernel/ptrace.c     |   2 +
 arch/frv/kernel/signal.c      |   2 +
 arch/h8300/kernel/signal.c    |   2 +
 arch/ia64/kernel/process.c    |   2 +
 arch/m32r/kernel/signal.c     |   2 +
 arch/mips/kernel/signal.c     |   2 +
 arch/mn10300/kernel/signal.c  |   2 +
 arch/parisc/kernel/signal.c   |   2 +
 arch/s390/kernel/signal.c     |   2 +
 arch/sh/kernel/signal_32.c    |   2 +
 arch/sh/kernel/signal_64.c    |   2 +
 arch/sparc/kernel/signal_32.c |   2 +
 arch/sparc/kernel/signal_64.c |   3 ++
 arch/x86/kernel/signal.c      |   2 +
 include/linux/cred.h          |   1 +
 include/linux/key.h           |   3 ++
 include/linux/keyctl.h        |   1 +
 include/linux/sched.h         |   1 +
 include/linux/security.h      |  38 ++++++++++++++++
 kernel/cred.c                 |  43 ++++++++++++++++++
 security/capability.c         |  19 ++++++++
 security/keys/compat.c        |   3 ++
 security/keys/gc.c            |   1 +
 security/keys/internal.h      |   1 +
 security/keys/keyctl.c        | 102 ++++++++++++++++++++++++++++++++++++++++++
 security/keys/process_keys.c  |  49 ++++++++++++++++++++
 security/security.c           |  17 +++++++
 security/selinux/hooks.c      |  28 ++++++++++++
 security/smack/smack_lsm.c    |  30 +++++++++++++
 security/tomoyo/tomoyo.c      |  17 +++++++
 34 files changed, 409 insertions(+)

(limited to 'include')

diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 203487e9b1d8..e4dbbdb1bd96 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -757,6 +757,26 @@ The keyctl syscall functions are:
      successful.
 
 
+ (*) Install the calling process's session keyring on its parent.
+
+	long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+     This functions attempts to install the calling process's session keyring
+     on to the calling process's parent, replacing the parent's current session
+     keyring.
+
+     The calling process must have the same ownership as its parent, the
+     keyring must have the same ownership as the calling process, the calling
+     process must have LINK permission on the keyring and the active LSM module
+     mustn't deny permission, otherwise error EPERM will be returned.
+
+     Error ENOMEM will be returned if there was insufficient memory to complete
+     the operation, otherwise 0 will be returned to indicate success.
+
+     The keyring will be replaced next time the parent process leaves the
+     kernel and resumes executing userspace.
+
+
 ===============
 KERNEL SERVICES
 ===============
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 04e17c1f0f1b..d91aaa747050 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -687,5 +687,7 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 13dec276927a..ea4ad3a43c88 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -711,5 +711,7 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	if (thread_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 62d242e2d033..de9f7fe2aefa 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -326,5 +326,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 	if (ti->flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index 4f06d7fb43dd..32e9d5ee895b 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -40,5 +40,7 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 4a7a62c6e783..6b0a2b6fed6a 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -572,6 +572,8 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 } /* end do_notify_resume() */
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 56b3ab7dbbb0..abac3ee8c52a 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -556,5 +556,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5d7c0e5b9e76..89969e950045 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -192,6 +192,8 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(&scr->pt);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 	/* copy user rbs to kernel rbs */
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 3220258be188..f80bac17c65c 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -411,6 +411,8 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 	clear_thread_flag(TIF_IRET);
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index a3d1015471de..c2acf31874a4 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -704,5 +704,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index feb2f2e810db..a21f43bc68e2 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -568,5 +568,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index b3bfc4326703..5ca1c02b805a 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -649,5 +649,7 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 062bd64e65fa..6b4fef877f9d 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -536,4 +536,6 @@ void do_notify_resume(struct pt_regs *regs)
 {
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
+	if (current->replacement_session_keyring)
+		key_replace_session_keyring();
 }
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index b5afbec1db59..04a21883f327 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -640,5 +640,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 0663a0ee6021..9e5c9b1d7e98 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -772,5 +772,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 181d069a2d44..7ce1a1005b1d 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -590,6 +590,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index ec82d76dc6f2..647afbda7ae1 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -613,5 +613,8 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 }
+
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4c578751e94e..81e58238c4ce 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
+		if (current->replacement_session_keyring)
+			key_replace_session_keyring();
 	}
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 85439abdbc80..24520a539c6f 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -152,6 +152,7 @@ struct cred {
 extern void __put_cred(struct cred *);
 extern void exit_creds(struct task_struct *);
 extern int copy_creds(struct task_struct *, unsigned long);
+extern struct cred *cred_alloc_blank(void);
 extern struct cred *prepare_creds(void);
 extern struct cred *prepare_exec_creds(void);
 extern struct cred *prepare_usermodehelper_creds(void);
diff --git a/include/linux/key.h b/include/linux/key.h
index 33e0165de100..cd50dfa1d4c2 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -278,6 +278,8 @@ static inline key_serial_t key_serial(struct key *key)
 extern ctl_table key_sysctls[];
 #endif
 
+extern void key_replace_session_keyring(void);
+
 /*
  * the userspace interface
  */
@@ -300,6 +302,7 @@ extern void key_init(void);
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
 #define key_init()			do { } while(0)
+#define key_replace_session_keyring()	do { } while(0)
 
 #endif /* CONFIG_KEYS */
 #endif /* __KERNEL__ */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
index c0688eb72093..bd383f1944fb 100644
--- a/include/linux/keyctl.h
+++ b/include/linux/keyctl.h
@@ -52,5 +52,6 @@
 #define KEYCTL_SET_TIMEOUT		15	/* set key timeout */
 #define KEYCTL_ASSUME_AUTHORITY		16	/* assume request_key() authorisation */
 #define KEYCTL_GET_SECURITY		17	/* get key security label */
+#define KEYCTL_SESSION_TO_PARENT	18	/* apply session keyring to parent process */
 
 #endif /*  _LINUX_KEYCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c7ce13c1696..9304027673b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1292,6 +1292,7 @@ struct task_struct {
 	struct mutex cred_guard_mutex;	/* guard against foreign influences on
 					 * credential calculations
 					 * (notably. ptrace) */
+	struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
 
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
diff --git a/include/linux/security.h b/include/linux/security.h
index 40ba39ea68ce..97de3fe3dd0d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -653,6 +653,11 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	manual page for definitions of the @clone_flags.
  *	@clone_flags contains the flags indicating what should be shared.
  *	Return 0 if permission is granted.
+ * @cred_alloc_blank:
+ *	@cred points to the credentials.
+ *	@gfp indicates the atomicity of any memory allocations.
+ *	Only allocate sufficient memory and attach to @cred such that
+ *	cred_transfer() will not get ENOMEM.
  * @cred_free:
  *	@cred points to the credentials.
  *	Deallocate and clear the cred->security field in a set of credentials.
@@ -665,6 +670,10 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@new points to the new credentials.
  *	@old points to the original credentials.
  *	Install a new set of credentials.
+ * @cred_transfer:
+ *	@new points to the new credentials.
+ *	@old points to the original credentials.
+ *	Transfer data from original creds to new creds
  * @kernel_act_as:
  *	Set the credentials for a kernel service to act as (subjective context).
  *	@new points to the credentials to be modified.
@@ -1103,6 +1112,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return the length of the string (including terminating NUL) or -ve if
  *      an error.
  *	May also return 0 (and a NULL buffer pointer) if there is no label.
+ * @key_session_to_parent:
+ *	Forcibly assign the session keyring from a process to its parent
+ *	process.
+ *	@cred: Pointer to process's credentials
+ *	@parent_cred: Pointer to parent process's credentials
+ *	@keyring: Proposed new session keyring
+ *	Return 0 if permission is granted, -ve error otherwise.
  *
  * Security hooks affecting all System V IPC operations.
  *
@@ -1498,10 +1514,12 @@ struct security_operations {
 	int (*dentry_open) (struct file *file, const struct cred *cred);
 
 	int (*task_create) (unsigned long clone_flags);
+	int (*cred_alloc_blank) (struct cred *cred, gfp_t gfp);
 	void (*cred_free) (struct cred *cred);
 	int (*cred_prepare)(struct cred *new, const struct cred *old,
 			    gfp_t gfp);
 	void (*cred_commit)(struct cred *new, const struct cred *old);
+	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(void);
@@ -1639,6 +1657,9 @@ struct security_operations {
 			       const struct cred *cred,
 			       key_perm_t perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
+	int (*key_session_to_parent)(const struct cred *cred,
+				     const struct cred *parent_cred,
+				     struct key *key);
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -1755,9 +1776,11 @@ int security_file_send_sigiotask(struct task_struct *tsk,
 int security_file_receive(struct file *file);
 int security_dentry_open(struct file *file, const struct cred *cred);
 int security_task_create(unsigned long clone_flags);
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
 void security_commit_creds(struct cred *new, const struct cred *old);
+void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(void);
@@ -2286,6 +2309,9 @@ static inline int security_task_create(unsigned long clone_flags)
 	return 0;
 }
 
+static inline void security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{ }
+
 static inline void security_cred_free(struct cred *cred)
 { }
 
@@ -2301,6 +2327,11 @@ static inline void security_commit_creds(struct cred *new,
 {
 }
 
+static inline void security_transfer_creds(struct cred *new,
+					   const struct cred *old)
+{
+}
+
 static inline int security_kernel_act_as(struct cred *cred, u32 secid)
 {
 	return 0;
@@ -2923,6 +2954,9 @@ void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
 			    const struct cred *cred, key_perm_t perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
+int security_key_session_to_parent(const struct cred *cred,
+				   const struct cred *parent_cred,
+				   struct key *key);
 
 #else
 
@@ -2950,6 +2984,10 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
 	return 0;
 }
 
+static inline int security_key_session_to_parent(const struct cred *cred,
+						 const struct cred *parent_cred,
+						 struct key *key);
+
 #endif
 #endif /* CONFIG_KEYS */
 
diff --git a/kernel/cred.c b/kernel/cred.c
index 24dd2f5104b1..006fcab009d5 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -199,6 +199,49 @@ void exit_creds(struct task_struct *tsk)
 	validate_creds(cred);
 	alter_cred_subscribers(cred, -1);
 	put_cred(cred);
+
+	cred = (struct cred *) tsk->replacement_session_keyring;
+	if (cred) {
+		tsk->replacement_session_keyring = NULL;
+		validate_creds(cred);
+		put_cred(cred);
+	}
+}
+
+/*
+ * Allocate blank credentials, such that the credentials can be filled in at a
+ * later date without risk of ENOMEM.
+ */
+struct cred *cred_alloc_blank(void)
+{
+	struct cred *new;
+
+	new = kmem_cache_zalloc(cred_jar, GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+#ifdef CONFIG_KEYS
+	new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL);
+	if (!new->tgcred) {
+		kfree(new);
+		return NULL;
+	}
+	atomic_set(&new->tgcred->usage, 1);
+#endif
+
+	atomic_set(&new->usage, 1);
+
+	if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+		goto error;
+
+#ifdef CONFIG_DEBUG_CREDENTIALS
+	new->magic = CRED_MAGIC;
+#endif
+	return new;
+
+error:
+	abort_creds(new);
+	return NULL;
 }
 
 /**
diff --git a/security/capability.c b/security/capability.c
index 06400cf07757..93a2ffe65905 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -373,6 +373,11 @@ static int cap_task_create(unsigned long clone_flags)
 	return 0;
 }
 
+static int cap_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return 0;
+}
+
 static void cap_cred_free(struct cred *cred)
 {
 }
@@ -386,6 +391,10 @@ static void cap_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+static void cap_cred_transfer(struct cred *new, const struct cred *old)
+{
+}
+
 static int cap_kernel_act_as(struct cred *new, u32 secid)
 {
 	return 0;
@@ -836,6 +845,13 @@ static int cap_key_getsecurity(struct key *key, char **_buffer)
 	return 0;
 }
 
+static int cap_key_session_to_parent(const struct cred *cred,
+				     const struct cred *parent_cred,
+				     struct key *key)
+{
+	return 0;
+}
+
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -961,9 +977,11 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, file_receive);
 	set_to_cap_if_null(ops, dentry_open);
 	set_to_cap_if_null(ops, task_create);
+	set_to_cap_if_null(ops, cred_alloc_blank);
 	set_to_cap_if_null(ops, cred_free);
 	set_to_cap_if_null(ops, cred_prepare);
 	set_to_cap_if_null(ops, cred_commit);
+	set_to_cap_if_null(ops, cred_transfer);
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
@@ -1063,6 +1081,7 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, key_free);
 	set_to_cap_if_null(ops, key_permission);
 	set_to_cap_if_null(ops, key_getsecurity);
+	set_to_cap_if_null(ops, key_session_to_parent);
 #endif	/* CONFIG_KEYS */
 #ifdef CONFIG_AUDIT
 	set_to_cap_if_null(ops, audit_rule_init);
diff --git a/security/keys/compat.c b/security/keys/compat.c
index c766c68a63bc..792c0a611a6d 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -82,6 +82,9 @@ asmlinkage long compat_sys_keyctl(u32 option,
 	case KEYCTL_GET_SECURITY:
 		return keyctl_get_security(arg2, compat_ptr(arg3), arg4);
 
+	case KEYCTL_SESSION_TO_PARENT:
+		return keyctl_session_to_parent();
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/gc.c b/security/keys/gc.c
index 44adc325e15c..1e616aef55fd 100644
--- a/security/keys/gc.c
+++ b/security/keys/gc.c
@@ -65,6 +65,7 @@ static void key_gc_timer_func(unsigned long data)
  * - return true if we altered the keyring
  */
 static bool key_gc_keyring(struct key *keyring, time_t limit)
+	__releases(key_serial_lock)
 {
 	struct keyring_list *klist;
 	struct key *key;
diff --git a/security/keys/internal.h b/security/keys/internal.h
index fb830514c337..24ba0307b7ad 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -201,6 +201,7 @@ extern long keyctl_set_timeout(key_serial_t, unsigned);
 extern long keyctl_assume_authority(key_serial_t);
 extern long keyctl_get_security(key_serial_t keyid, char __user *buffer,
 				size_t buflen);
+extern long keyctl_session_to_parent(void);
 
 /*
  * debugging key validation
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 736d7800f97f..74c968524592 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1228,6 +1228,105 @@ long keyctl_get_security(key_serial_t keyid,
 	return ret;
 }
 
+/*
+ * attempt to install the calling process's session keyring on the process's
+ * parent process
+ * - the keyring must exist and must grant us LINK permission
+ * - implements keyctl(KEYCTL_SESSION_TO_PARENT)
+ */
+long keyctl_session_to_parent(void)
+{
+	struct task_struct *me, *parent;
+	const struct cred *mycred, *pcred;
+	struct cred *cred, *oldcred;
+	key_ref_t keyring_r;
+	int ret;
+
+	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
+	if (IS_ERR(keyring_r))
+		return PTR_ERR(keyring_r);
+
+	/* our parent is going to need a new cred struct, a new tgcred struct
+	 * and new security data, so we allocate them here to prevent ENOMEM in
+	 * our parent */
+	ret = -ENOMEM;
+	cred = cred_alloc_blank();
+	if (!cred)
+		goto error_keyring;
+
+	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
+	keyring_r = NULL;
+
+	me = current;
+	write_lock_irq(&tasklist_lock);
+
+	parent = me->real_parent;
+	ret = -EPERM;
+
+	/* the parent mustn't be init and mustn't be a kernel thread */
+	if (parent->pid <= 1 || !parent->mm)
+		goto not_permitted;
+
+	/* the parent must be single threaded */
+	if (atomic_read(&parent->signal->count) != 1)
+		goto not_permitted;
+
+	/* the parent and the child must have different session keyrings or
+	 * there's no point */
+	mycred = current_cred();
+	pcred = __task_cred(parent);
+	if (mycred == pcred ||
+	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
+		goto already_same;
+
+	/* the parent must have the same effective ownership and mustn't be
+	 * SUID/SGID */
+	if (pcred-> uid	!= mycred->euid	||
+	    pcred->euid	!= mycred->euid	||
+	    pcred->suid	!= mycred->euid	||
+	    pcred-> gid	!= mycred->egid	||
+	    pcred->egid	!= mycred->egid	||
+	    pcred->sgid	!= mycred->egid)
+		goto not_permitted;
+
+	/* the keyrings must have the same UID */
+	if (pcred ->tgcred->session_keyring->uid != mycred->euid ||
+	    mycred->tgcred->session_keyring->uid != mycred->euid)
+		goto not_permitted;
+
+	/* the LSM must permit the replacement of the parent's keyring with the
+	 * keyring from this process */
+	ret = security_key_session_to_parent(mycred, pcred,
+					     key_ref_to_ptr(keyring_r));
+	if (ret < 0)
+		goto not_permitted;
+
+	/* if there's an already pending keyring replacement, then we replace
+	 * that */
+	oldcred = parent->replacement_session_keyring;
+
+	/* the replacement session keyring is applied just prior to userspace
+	 * restarting */
+	parent->replacement_session_keyring = cred;
+	cred = NULL;
+	set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
+
+	write_unlock_irq(&tasklist_lock);
+	if (oldcred)
+		put_cred(oldcred);
+	return 0;
+
+already_same:
+	ret = 0;
+not_permitted:
+	put_cred(cred);
+	return ret;
+
+error_keyring:
+	key_ref_put(keyring_r);
+	return ret;
+}
+
 /*****************************************************************************/
 /*
  * the key control system call
@@ -1313,6 +1412,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3,
 					   (char __user *) arg3,
 					   (size_t) arg4);
 
+	case KEYCTL_SESSION_TO_PARENT:
+		return keyctl_session_to_parent();
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 4739cfbb41b7..5c23afb31ece 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/err.h>
 #include <linux/mutex.h>
+#include <linux/security.h>
 #include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 #include "internal.h"
@@ -768,3 +769,51 @@ error:
 	abort_creds(new);
 	return ret;
 }
+
+/*
+ * Replace a process's session keyring when that process resumes userspace on
+ * behalf of one of its children
+ */
+void key_replace_session_keyring(void)
+{
+	const struct cred *old;
+	struct cred *new;
+
+	if (!current->replacement_session_keyring)
+		return;
+
+	write_lock_irq(&tasklist_lock);
+	new = current->replacement_session_keyring;
+	current->replacement_session_keyring = NULL;
+	write_unlock_irq(&tasklist_lock);
+
+	if (!new)
+		return;
+
+	old = current_cred();
+	new->  uid	= old->  uid;
+	new-> euid	= old-> euid;
+	new-> suid	= old-> suid;
+	new->fsuid	= old->fsuid;
+	new->  gid	= old->  gid;
+	new-> egid	= old-> egid;
+	new-> sgid	= old-> sgid;
+	new->fsgid	= old->fsgid;
+	new->user	= get_uid(old->user);
+	new->group_info	= get_group_info(old->group_info);
+
+	new->securebits	= old->securebits;
+	new->cap_inheritable	= old->cap_inheritable;
+	new->cap_permitted	= old->cap_permitted;
+	new->cap_effective	= old->cap_effective;
+	new->cap_bset		= old->cap_bset;
+
+	new->jit_keyring	= old->jit_keyring;
+	new->thread_keyring	= key_get(old->thread_keyring);
+	new->tgcred->tgid	= old->tgcred->tgid;
+	new->tgcred->process_keyring = key_get(old->tgcred->process_keyring);
+
+	security_transfer_creds(new, old);
+
+	commit_creds(new);
+}
diff --git a/security/security.c b/security/security.c
index f88eaf6b14cc..d8b727637f02 100644
--- a/security/security.c
+++ b/security/security.c
@@ -684,6 +684,11 @@ int security_task_create(unsigned long clone_flags)
 	return security_ops->task_create(clone_flags);
 }
 
+int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return security_ops->cred_alloc_blank(cred, gfp);
+}
+
 void security_cred_free(struct cred *cred)
 {
 	security_ops->cred_free(cred);
@@ -699,6 +704,11 @@ void security_commit_creds(struct cred *new, const struct cred *old)
 	security_ops->cred_commit(new, old);
 }
 
+void security_transfer_creds(struct cred *new, const struct cred *old)
+{
+	security_ops->cred_transfer(new, old);
+}
+
 int security_kernel_act_as(struct cred *new, u32 secid)
 {
 	return security_ops->kernel_act_as(new, secid);
@@ -1241,6 +1251,13 @@ int security_key_getsecurity(struct key *key, char **_buffer)
 	return security_ops->key_getsecurity(key, _buffer);
 }
 
+int security_key_session_to_parent(const struct cred *cred,
+				   const struct cred *parent_cred,
+				   struct key *key)
+{
+	return security_ops->key_session_to_parent(cred, parent_cred, key);
+}
+
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index c3bb31ecc5aa..134a9c0d2004 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3232,6 +3232,21 @@ static int selinux_task_create(unsigned long clone_flags)
 	return current_has_perm(current, PROCESS__FORK);
 }
 
+/*
+ * allocate the SELinux part of blank credentials
+ */
+static int selinux_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	struct task_security_struct *tsec;
+
+	tsec = kzalloc(sizeof(struct task_security_struct), gfp);
+	if (!tsec)
+		return -ENOMEM;
+
+	cred->security = tsec;
+	return 0;
+}
+
 /*
  * detach and free the LSM part of a set of credentials
  */
@@ -3263,6 +3278,17 @@ static int selinux_cred_prepare(struct cred *new, const struct cred *old,
 	return 0;
 }
 
+/*
+ * transfer the SELinux data to a blank set of creds
+ */
+static void selinux_cred_transfer(struct cred *new, const struct cred *old)
+{
+	const struct task_security_struct *old_tsec = old->security;
+	struct task_security_struct *tsec = new->security;
+
+	*tsec = *old_tsec;
+}
+
 /*
  * set the security data for a kernel service
  * - all the creation contexts are set to unlabelled
@@ -5469,8 +5495,10 @@ static struct security_operations selinux_ops = {
 	.dentry_open =			selinux_dentry_open,
 
 	.task_create =			selinux_task_create,
+	.cred_alloc_blank =		selinux_cred_alloc_blank,
 	.cred_free =			selinux_cred_free,
 	.cred_prepare =			selinux_cred_prepare,
+	.cred_transfer =		selinux_cred_transfer,
 	.kernel_act_as =		selinux_kernel_act_as,
 	.kernel_create_files_as =	selinux_kernel_create_files_as,
 	.kernel_module_request =	selinux_kernel_module_request,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index c243a2b25832..969f5fee1906 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1079,6 +1079,22 @@ static int smack_file_receive(struct file *file)
  * Task hooks
  */
 
+/**
+ * smack_cred_alloc_blank - "allocate" blank task-level security credentials
+ * @new: the new credentials
+ * @gfp: the atomicity of any memory allocations
+ *
+ * Prepare a blank set of credentials for modification.  This must allocate all
+ * the memory the LSM module might require such that cred_transfer() can
+ * complete without error.
+ */
+static int smack_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	cred->security = NULL;
+	return 0;
+}
+
+
 /**
  * smack_cred_free - "free" task-level security credentials
  * @cred: the credentials in question
@@ -1116,6 +1132,18 @@ static void smack_cred_commit(struct cred *new, const struct cred *old)
 {
 }
 
+/**
+ * smack_cred_transfer - Transfer the old credentials to the new credentials
+ * @new: the new credentials
+ * @old: the original credentials
+ *
+ * Fill in a set of blank credentials from another set of credentials.
+ */
+static void smack_cred_transfer(struct cred *new, const struct cred *old)
+{
+	new->security = old->security;
+}
+
 /**
  * smack_kernel_act_as - Set the subjective context in a set of credentials
  * @new: points to the set of credentials to be modified.
@@ -3073,9 +3101,11 @@ struct security_operations smack_ops = {
 	.file_send_sigiotask = 		smack_file_send_sigiotask,
 	.file_receive = 		smack_file_receive,
 
+	.cred_alloc_blank =		smack_cred_alloc_blank,
 	.cred_free =			smack_cred_free,
 	.cred_prepare =			smack_cred_prepare,
 	.cred_commit =			smack_cred_commit,
+	.cred_transfer =		smack_cred_transfer,
 	.kernel_act_as =		smack_kernel_act_as,
 	.kernel_create_files_as =	smack_kernel_create_files_as,
 	.task_setpgid = 		smack_task_setpgid,
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 35a13e7915e4..9548a0984cc4 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -14,6 +14,12 @@
 #include "tomoyo.h"
 #include "realpath.h"
 
+static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp)
+{
+	new->security = NULL;
+	return 0;
+}
+
 static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
 			       gfp_t gfp)
 {
@@ -25,6 +31,15 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old,
 	return 0;
 }
 
+static void tomoyo_cred_transfer(struct cred *new, const struct cred *old)
+{
+	/*
+	 * Since "struct tomoyo_domain_info *" is a sharable pointer,
+	 * we don't need to duplicate.
+	 */
+	new->security = old->security;
+}
+
 static int tomoyo_bprm_set_creds(struct linux_binprm *bprm)
 {
 	int rc;
@@ -262,7 +277,9 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
  */
 static struct security_operations tomoyo_security_ops = {
 	.name                = "tomoyo",
+	.cred_alloc_blank    = tomoyo_cred_alloc_blank,
 	.cred_prepare        = tomoyo_cred_prepare,
+	.cred_transfer	     = tomoyo_cred_transfer,
 	.bprm_set_creds      = tomoyo_bprm_set_creds,
 	.bprm_check_security = tomoyo_bprm_check_security,
 #ifdef CONFIG_SYSCTL
-- 
cgit v1.2.3


From 2e59af3dcbdf11635c03f22bfc9706744465d589 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 2 Sep 2009 18:03:00 -0700
Subject: vlan: multiqueue vlan device

vlan devices are currently not multi-queue capable.

We can do that with a new rtnl_link_ops method,
get_tx_queues(), called from rtnl_create_link()

This new method gets num_tx_queues/real_num_tx_queues
from real device.

register_vlan_device() is also handled.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h  |  3 +++
 net/8021q/vlan.c         |  5 +++--
 net/8021q/vlan_netlink.c | 20 ++++++++++++++++++++
 net/core/rtnetlink.c     | 10 +++++++++-
 4 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 3c1895e54b7f..b630196b5627 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -70,6 +70,9 @@ struct rtnl_link_ops {
 	size_t			(*get_xstats_size)(const struct net_device *dev);
 	int			(*fill_xstats)(struct sk_buff *skb,
 					       const struct net_device *dev);
+	int			(*get_tx_queues)(struct net *net, struct nlattr *tb[],
+						 unsigned int *tx_queues,
+						 unsigned int *real_tx_queues);
 };
 
 extern int	__rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e814794b0a1a..8836575f9d79 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -330,12 +330,13 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name,
-			       vlan_setup);
+	new_dev = alloc_netdev_mq(sizeof(struct vlan_dev_info), name,
+				  vlan_setup, real_dev->num_tx_queues);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
 
+	new_dev->real_num_tx_queues = real_dev->real_num_tx_queues;
 	dev_net_set(new_dev, net);
 	/* need 4 bytes for extra VLAN header info,
 	 * hope the underlying device can handle it.
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index e9c91dcecc9b..343146e1bceb 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -100,6 +100,25 @@ static int vlan_changelink(struct net_device *dev,
 	return 0;
 }
 
+static int vlan_get_tx_queues(struct net *net,
+			      struct nlattr *tb[],
+			      unsigned int *num_tx_queues,
+			      unsigned int *real_num_tx_queues)
+{
+	struct net_device *real_dev;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
+	if (!real_dev)
+		return -ENODEV;
+
+	*num_tx_queues      = real_dev->num_tx_queues;
+	*real_num_tx_queues = real_dev->real_num_tx_queues;
+	return 0;
+}
+
 static int vlan_newlink(struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
@@ -216,6 +235,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
 	.priv_size	= sizeof(struct vlan_dev_info),
+	.get_tx_queues  = vlan_get_tx_queues,
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b44775f9f2bf..bbcba2a41018 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -974,12 +974,20 @@ struct net_device *rtnl_create_link(struct net *net, char *ifname,
 {
 	int err;
 	struct net_device *dev;
+	unsigned int num_queues = 1;
+	unsigned int real_num_queues = 1;
 
+	if (ops->get_tx_queues) {
+		err = ops->get_tx_queues(net, tb, &num_queues, &real_num_queues);
+		if (err)
+			goto err;
+	}
 	err = -ENOMEM;
-	dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
+	dev = alloc_netdev_mq(ops->priv_size, ifname, ops->setup, num_queues);
 	if (!dev)
 		goto err;
 
+	dev->real_num_tx_queues = real_num_queues;
 	if (strchr(dev->name, '%')) {
 		err = dev_alloc_name(dev, dev->name);
 		if (err < 0)
-- 
cgit v1.2.3


From 05c6a8d7a7d778f26d8eb821556988993b766092 Mon Sep 17 00:00:00 2001
From: Ajit Khaparde <ajitk@serverengines.com>
Date: Wed, 2 Sep 2009 17:02:55 +0000
Subject: net/ethtool: Add support for the ethtool feature to flash firmware
 image from a specified file.

This patch adds support to flash a firmware image to a device using ethtool.
The driver gets the filename of the firmware image and flashes the image
using the request firmware path.

The region "on the chip" to be flashed can be specified by an option.
It is upto the device driver to enumerate the region number passed by ethtool,
to the region to be flashed.

The default behavior is to flash all the regions on the chip.

Signed-off-by: Ajit Khaparde <ajitk@serverengines.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 14 ++++++++++++++
 net/core/ethtool.c      | 16 ++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'include')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 90c4a3616d94..15e4eb713694 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -362,6 +362,18 @@ struct ethtool_rxnfc {
 	__u32				rule_locs[0];
 };
 
+#define ETHTOOL_FLASH_MAX_FILENAME	128
+enum ethtool_flash_op_type {
+	ETHTOOL_FLASH_ALL_REGIONS	= 0,
+};
+
+/* for passing firmware flashing related parameters */
+struct ethtool_flash {
+	__u32	cmd;
+	__u32	region;
+	char	data[ETHTOOL_FLASH_MAX_FILENAME];
+};
+
 #ifdef __KERNEL__
 
 struct net_device;
@@ -489,6 +501,7 @@ struct ethtool_ops {
 	int	(*get_stats_count)(struct net_device *);/* use get_sset_count */
 	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
+	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
 };
 #endif /* __KERNEL__ */
 
@@ -545,6 +558,7 @@ struct ethtool_ops {
 #define	ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
 #define	ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
 #define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
+#define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 44e571111d3a..4c12ddb5f5ee 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -898,6 +898,19 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
+static int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+{
+	struct ethtool_flash efl;
+
+	if (copy_from_user(&efl, useraddr, sizeof(efl)))
+		return -EFAULT;
+
+	if (!dev->ethtool_ops->flash_device)
+		return -EOPNOTSUPP;
+
+	return dev->ethtool_ops->flash_device(dev, &efl);
+}
+
 /* The main entry point in this file.  Called from net/core/dev.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1111,6 +1124,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_SGRO:
 		rc = ethtool_set_gro(dev, useraddr);
 		break;
+	case ETHTOOL_FLASHDEV:
+		rc = ethtool_flash_device(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From aa1330766c49199bdab4d4a9096d98b072df9044 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 2 Sep 2009 23:45:45 -0700
Subject: tcp: replace hard coded GFP_KERNEL with sk_allocation

This fixed a lockdep warning which appeared when doing stress
memory tests over NFS:

	inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage.

	page reclaim => nfs_writepage => tcp_sendmsg => lock sk_lock

	mount_root => nfs_root_data => tcp_close => lock sk_lock =>
			tcp_send_fin => alloc_skb_fclone => page reclaim

David raised a concern that if the allocation fails in tcp_send_fin(), and it's
GFP_ATOMIC, we are going to yield() (which sleeps) and loop endlessly waiting
for the allocation to succeed.

But fact is, the original GFP_KERNEL also sleeps. GFP_ATOMIC+yield() looks
weird, but it is no worse the implicit sleep inside GFP_KERNEL. Both could
loop endlessly under memory pressure.

CC: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
CC: David S. Miller <davem@davemloft.net>
CC: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  2 +-
 net/ipv4/tcp.c           | 10 +++++-----
 net/ipv4/tcp_ipv4.c      |  7 ++++---
 net/ipv4/tcp_minisocks.c |  2 +-
 net/ipv4/tcp_output.c    |  5 +++--
 net/ipv6/tcp_ipv6.c      |  2 +-
 6 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index df50bc40b5fd..b71a446d58f6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1186,7 +1186,7 @@ extern int			tcp_v4_md5_do_del(struct sock *sk,
 #define tcp_twsk_md5_key(twsk)	NULL
 #endif
 
-extern struct tcp_md5sig_pool	**tcp_alloc_md5sig_pool(void);
+extern struct tcp_md5sig_pool	**tcp_alloc_md5sig_pool(struct sock *);
 extern void			tcp_free_md5sig_pool(void);
 
 extern struct tcp_md5sig_pool	*__tcp_get_md5sig_pool(int cpu);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 59f69a6c5863..edeea060db44 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1839,7 +1839,7 @@ void tcp_close(struct sock *sk, long timeout)
 		/* Unread data was tossed, zap the connection. */
 		NET_INC_STATS_USER(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE);
 		tcp_set_state(sk, TCP_CLOSE);
-		tcp_send_active_reset(sk, GFP_KERNEL);
+		tcp_send_active_reset(sk, sk->sk_allocation);
 	} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
 		/* Check zero linger _after_ checking for unread data. */
 		sk->sk_prot->disconnect(sk, 0);
@@ -2658,7 +2658,7 @@ void tcp_free_md5sig_pool(void)
 
 EXPORT_SYMBOL(tcp_free_md5sig_pool);
 
-static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
+static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(struct sock *sk)
 {
 	int cpu;
 	struct tcp_md5sig_pool **pool;
@@ -2671,7 +2671,7 @@ static struct tcp_md5sig_pool **__tcp_alloc_md5sig_pool(void)
 		struct tcp_md5sig_pool *p;
 		struct crypto_hash *hash;
 
-		p = kzalloc(sizeof(*p), GFP_KERNEL);
+		p = kzalloc(sizeof(*p), sk->sk_allocation);
 		if (!p)
 			goto out_free;
 		*per_cpu_ptr(pool, cpu) = p;
@@ -2688,7 +2688,7 @@ out_free:
 	return NULL;
 }
 
-struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(void)
+struct tcp_md5sig_pool **tcp_alloc_md5sig_pool(struct sock *sk)
 {
 	struct tcp_md5sig_pool **pool;
 	int alloc = 0;
@@ -2709,7 +2709,7 @@ retry:
 
 	if (alloc) {
 		/* we cannot hold spinlock here because this may sleep. */
-		struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool();
+		struct tcp_md5sig_pool **p = __tcp_alloc_md5sig_pool(sk);
 		spin_lock_bh(&tcp_md5sig_pool_lock);
 		if (!p) {
 			tcp_md5sig_users--;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ce7d3b021ffc..0543561da999 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -886,7 +886,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 			}
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		}
-		if (tcp_alloc_md5sig_pool() == NULL) {
+		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
@@ -1007,8 +1007,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 
 	if (!tcp_sk(sk)->md5sig_info) {
 		struct tcp_sock *tp = tcp_sk(sk);
-		struct tcp_md5sig_info *p = kzalloc(sizeof(*p), GFP_KERNEL);
+		struct tcp_md5sig_info *p;
 
+		p = kzalloc(sizeof(*p), sk->sk_allocation);
 		if (!p)
 			return -EINVAL;
 
@@ -1016,7 +1017,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 	}
 
-	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
+	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
 	if (!newkey)
 		return -ENOMEM;
 	return tcp_v4_md5_do_add(sk, sin->sin_addr.s_addr,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 6c8b42299d9f..e48c37d74d77 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -322,7 +322,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 			if (key != NULL) {
 				memcpy(&tcptw->tw_md5_key, key->key, key->keylen);
 				tcptw->tw_md5_keylen = key->keylen;
-				if (tcp_alloc_md5sig_pool() == NULL)
+				if (tcp_alloc_md5sig_pool(sk) == NULL)
 					BUG();
 			}
 		} while (0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e004424d400..5200aab0ca97 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2135,7 +2135,8 @@ void tcp_send_fin(struct sock *sk)
 	} else {
 		/* Socket is locked, keep trying until memory is available. */
 		for (;;) {
-			skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL);
+			skb = alloc_skb_fclone(MAX_TCP_HEADER,
+					       sk->sk_allocation);
 			if (skb)
 				break;
 			yield();
@@ -2388,7 +2389,7 @@ int tcp_connect(struct sock *sk)
 	sk->sk_wmem_queued += buff->truesize;
 	sk_mem_charge(sk, buff->truesize);
 	tp->packets_out += tcp_skb_pcount(buff);
-	tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
+	tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
 
 	/* We change tp->snd_nxt after the tcp_transmit_skb() call
 	 * in order to make this packet get counted in tcpOutSegs.
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d73617e9708e..65aecf27f2c5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -591,7 +591,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
 			}
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
 		}
-		if (tcp_alloc_md5sig_pool() == NULL) {
+		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
 			return -ENOMEM;
 		}
-- 
cgit v1.2.3


From 9d32e03d01649d2dd837923470f3f323e3b88253 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@perex.cz>
Date: Mon, 31 Aug 2009 15:07:23 +0200
Subject: ALSA: Release v1.0.21

Signed-off-by: Jaroslav Kysela <perex@perex.cz>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/version.h b/include/sound/version.h
index 456f1359e1c0..22939142dd23 100644
--- a/include/sound/version.h
+++ b/include/sound/version.h
@@ -1,3 +1,3 @@
 /* include/version.h */
-#define CONFIG_SND_VERSION "1.0.20"
+#define CONFIG_SND_VERSION "1.0.21"
 #define CONFIG_SND_DATE ""
-- 
cgit v1.2.3


From 53f824520b6d84ca5b4a8fd71addc91dbf64357e Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 3 Sep 2009 14:31:44 -0700
Subject: x86/i386: Put aligned stack-canary in percpu shared_aligned section

Pack aligned things together into a special section to minimize
padding holes.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Tejun Heo <tj@kernel.org>
LKML-Reference: <4AA035C0.9070202@goop.org>
[ queued up in tip:x86/asm because it depends on this commit:
  x86/i386: Make sure stack-protector segment base is cache aligned ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/processor.h | 2 +-
 arch/x86/kernel/cpu/common.c     | 2 +-
 include/asm-generic/percpu.h     | 3 +++
 include/linux/percpu-defs.h      | 8 ++++++++
 4 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e597ecc8753c..ac7e79654f3a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -413,7 +413,7 @@ struct stack_canary {
 	char __pad[20];		/* canary at %gs:20 */
 	unsigned long canary;
 };
-DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
+DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 #endif	/* X86_64 */
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7d84bc4c1188..f23e236391a3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1043,7 +1043,7 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
 #else	/* CONFIG_X86_64 */
 
 #ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
+DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
 /* Make sure %fs and %gs are initialized properly in idle threads */
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index aa00800adacc..90079c373f1c 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -81,14 +81,17 @@ extern void setup_per_cpu_areas(void);
 
 #ifdef MODULE
 #define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_ALIGNED_SECTION ""
 #else
 #define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
+#define PER_CPU_ALIGNED_SECTION ".shared_aligned"
 #endif
 #define PER_CPU_FIRST_SECTION ".first"
 
 #else
 
 #define PER_CPU_SHARED_ALIGNED_SECTION ""
+#define PER_CPU_ALIGNED_SECTION ".shared_aligned"
 #define PER_CPU_FIRST_SECTION ""
 
 #endif
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 68438e18fff4..3058cf9dd3d4 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -66,6 +66,14 @@
 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
 	____cacheline_aligned_in_smp
 
+#define DECLARE_PER_CPU_ALIGNED(type, name)				\
+	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
+	____cacheline_aligned
+
+#define DEFINE_PER_CPU_ALIGNED(type, name)				\
+	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
+	____cacheline_aligned
+
 /*
  * Declaration/definition used for per-CPU variables that must be page aligned.
  */
-- 
cgit v1.2.3


From b5d978e0c7e79a7ff842e895c85a86b38c71f1cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 1 Sep 2009 10:34:33 +0200
Subject: sched: Add SD_PREFER_SIBLING

Do the placement thing using SD flags.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083825.897028974@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 29 +++++++++++++++--------------
 kernel/sched.c        | 14 +++++++++++++-
 2 files changed, 28 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c96ef2f7e68..651dded25720 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -798,18 +798,19 @@ enum cpu_idle_type {
 #define SCHED_LOAD_SCALE_FUZZ	SCHED_LOAD_SCALE
 
 #ifdef CONFIG_SMP
-#define SD_LOAD_BALANCE		1	/* Do load balancing on this domain. */
-#define SD_BALANCE_NEWIDLE	2	/* Balance when about to become idle */
-#define SD_BALANCE_EXEC		4	/* Balance on exec */
-#define SD_BALANCE_FORK		8	/* Balance on fork, clone */
-#define SD_WAKE_IDLE		16	/* Wake to idle CPU on task wakeup */
-#define SD_WAKE_AFFINE		32	/* Wake task to waking CPU */
-#define SD_WAKE_BALANCE		64	/* Perform balancing at task wakeup */
-#define SD_SHARE_CPUPOWER	128	/* Domain members share cpu power */
-#define SD_POWERSAVINGS_BALANCE	256	/* Balance for power savings */
-#define SD_SHARE_PKG_RESOURCES	512	/* Domain members share cpu pkg resources */
-#define SD_SERIALIZE		1024	/* Only a single load balancing instance */
-#define SD_WAKE_IDLE_FAR	2048	/* Gain latency sacrificing cache hit */
+#define SD_LOAD_BALANCE		0x0001	/* Do load balancing on this domain. */
+#define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
+#define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
+#define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
+#define SD_WAKE_IDLE		0x0010	/* Wake to idle CPU on task wakeup */
+#define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
+#define SD_WAKE_BALANCE		0x0040	/* Perform balancing at task wakeup */
+#define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
+#define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
+#define SD_WAKE_IDLE_FAR	0x0800	/* Gain latency sacrificing cache hit */
+#define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -829,7 +830,7 @@ static inline int sd_balance_for_mc_power(void)
 	if (sched_smt_power_savings)
 		return SD_POWERSAVINGS_BALANCE;
 
-	return 0;
+	return SD_PREFER_SIBLING;
 }
 
 static inline int sd_balance_for_package_power(void)
@@ -837,7 +838,7 @@ static inline int sd_balance_for_package_power(void)
 	if (sched_mc_power_savings | sched_smt_power_savings)
 		return SD_POWERSAVINGS_BALANCE;
 
-	return 0;
+	return SD_PREFER_SIBLING;
 }
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index 584a122b553c..9d64cec9ae1d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3811,9 +3811,13 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 			const struct cpumask *cpus, int *balance,
 			struct sd_lb_stats *sds)
 {
+	struct sched_domain *child = sd->child;
 	struct sched_group *group = sd->groups;
 	struct sg_lb_stats sgs;
-	int load_idx;
+	int load_idx, prefer_sibling = 0;
+
+	if (child && child->flags & SD_PREFER_SIBLING)
+		prefer_sibling = 1;
 
 	init_sd_power_savings_stats(sd, sds, idle);
 	load_idx = get_sd_load_idx(sd, idle);
@@ -3833,6 +3837,14 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 		sds->total_load += sgs.group_load;
 		sds->total_pwr += group->__cpu_power;
 
+		/*
+		 * In case the child domain prefers tasks go to siblings
+		 * first, lower the group capacity to one so that we'll try
+		 * and move all the excess tasks away.
+		 */
+		if (prefer_sibling)
+			sgs.group_capacity = 1;
+
 		if (local_group) {
 			sds->this_load = sgs.avg_load;
 			sds->this = group;
-- 
cgit v1.2.3


From a52bfd73589eaf88d9c95ad2c1de0b38a6b27972 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 1 Sep 2009 10:34:35 +0200
Subject: sched: Add smt_gain

The idea is that multi-threading a core yields more work
capacity than a single thread, provide a way to express a
static gain for threads.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.073345955@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    | 1 +
 include/linux/topology.h | 1 +
 kernel/sched.c           | 8 +++++++-
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 651dded25720..9c81c921acb3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -921,6 +921,7 @@ struct sched_domain {
 	unsigned int newidle_idx;
 	unsigned int wake_idx;
 	unsigned int forkexec_idx;
+	unsigned int smt_gain;
 	int flags;			/* See SD_* */
 	enum sched_domain_level level;
 
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7402c1a27c4f..6203ae5067ce 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,6 +99,7 @@ int arch_update_cpu_topology(void);
 				| SD_SHARE_CPUPOWER,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
+	.smt_gain		= 1178,	/* 15% */	\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
diff --git a/kernel/sched.c b/kernel/sched.c
index ecb4a47d4214..55112261027b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8523,9 +8523,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 		weight = cpumask_weight(sched_domain_span(sd));
 		/*
 		 * SMT siblings share the power of a single core.
+		 * Usually multiple threads get a better yield out of
+		 * that one core than a single thread would have,
+		 * reflect that in sd->smt_gain.
 		 */
-		if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1)
+		if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
+			power *= sd->smt_gain;
 			power /= weight;
+			power >>= SCHED_LOAD_SHIFT;
+		}
 		sg_inc_cpu_power(sd->groups, power);
 		return;
 	}
-- 
cgit v1.2.3


From e9e9250bc78e7f6342517214c0178a529807964b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 1 Sep 2009 10:34:37 +0200
Subject: sched: Scale down cpu_power due to RT tasks

Keep an average on the amount of time spend on RT tasks and use
that fraction to scale down the cpu_power for regular tasks.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.287778431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 64 ++++++++++++++++++++++++++++++++++++++++++++++++---
 kernel/sched_rt.c     |  6 ++---
 kernel/sysctl.c       |  8 +++++++
 4 files changed, 72 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c81c921acb3..c67ddf309c84 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1831,6 +1831,7 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
diff --git a/kernel/sched.c b/kernel/sched.c
index 036600fd70bb..ab532b5de40e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -627,6 +627,9 @@ struct rq {
 
 	struct task_struct *migration_thread;
 	struct list_head migration_queue;
+
+	u64 rt_avg;
+	u64 age_stamp;
 #endif
 
 	/* calc_load related fields */
@@ -862,6 +865,14 @@ unsigned int sysctl_sched_shares_ratelimit = 250000;
  */
 unsigned int sysctl_sched_shares_thresh = 4;
 
+/*
+ * period over which we average the RT time consumption, measured
+ * in ms.
+ *
+ * default: 1s
+ */
+const_debug unsigned int sysctl_sched_time_avg = MSEC_PER_SEC;
+
 /*
  * period over which we measure -rt task cpu usage in us.
  * default: 1s
@@ -1280,12 +1291,37 @@ void wake_up_idle_cpu(int cpu)
 }
 #endif /* CONFIG_NO_HZ */
 
+static u64 sched_avg_period(void)
+{
+	return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2;
+}
+
+static void sched_avg_update(struct rq *rq)
+{
+	s64 period = sched_avg_period();
+
+	while ((s64)(rq->clock - rq->age_stamp) > period) {
+		rq->age_stamp += period;
+		rq->rt_avg /= 2;
+	}
+}
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+	rq->rt_avg += rt_delta;
+	sched_avg_update(rq);
+}
+
 #else /* !CONFIG_SMP */
 static void resched_task(struct task_struct *p)
 {
 	assert_spin_locked(&task_rq(p)->lock);
 	set_tsk_need_resched(p);
 }
+
+static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
+{
+}
 #endif /* CONFIG_SMP */
 
 #if BITS_PER_LONG == 32
@@ -3699,7 +3735,7 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
-unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
+unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = cpumask_weight(sched_domain_span(sd));
 	unsigned long smt_gain = sd->smt_gain;
@@ -3709,6 +3745,24 @@ unsigned long __weak arch_smt_gain(struct sched_domain *sd, int cpu)
 	return smt_gain;
 }
 
+unsigned long scale_rt_power(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 total, available;
+
+	sched_avg_update(rq);
+
+	total = sched_avg_period() + (rq->clock - rq->age_stamp);
+	available = total - rq->rt_avg;
+
+	if (unlikely((s64)total < SCHED_LOAD_SCALE))
+		total = SCHED_LOAD_SCALE;
+
+	total >>= SCHED_LOAD_SHIFT;
+
+	return div_u64(available, total);
+}
+
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
 	unsigned long weight = cpumask_weight(sched_domain_span(sd));
@@ -3719,11 +3773,15 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	/* here we could scale based on cpufreq */
 
 	if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) {
-		power *= arch_smt_gain(sd, cpu);
+		power *= arch_scale_smt_power(sd, cpu);
 		power >>= SCHED_LOAD_SHIFT;
 	}
 
-	/* here we could scale based on RT time */
+	power *= scale_rt_power(cpu);
+	power >>= SCHED_LOAD_SHIFT;
+
+	if (!power)
+		power = 1;
 
 	if (power != old) {
 		sdg->__cpu_power = power;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3d4020a9ba1b..2eb4bd6a526c 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -615,6 +615,8 @@ static void update_curr_rt(struct rq *rq)
 	curr->se.exec_start = rq->clock;
 	cpuacct_charge(curr, delta_exec);
 
+	sched_rt_avg_update(rq, delta_exec);
+
 	if (!rt_bandwidth_enabled())
 		return;
 
@@ -887,8 +889,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 
 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
-
-	inc_cpu_load(rq, p->se.load.weight);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -899,8 +899,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 	dequeue_rt_entity(rt_se);
 
 	dequeue_pushable_task(rq, p);
-
-	dec_cpu_load(rq, p->se.load.weight);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd0..6c9836ef4b47 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -330,6 +330,14 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_time_avg",
+		.data		= &sysctl_sched_time_avg,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "timer_migration",
-- 
cgit v1.2.3


From 18a3885fc1ffa92c2212ff0afdf033403d5b0fa0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 1 Sep 2009 10:34:39 +0200
Subject: sched: Remove reciprocal for cpu_power

Its a source of fail, also, now that cpu_power is dynamical,
its a waste of time.

before:
<idle>-0   [000]   132.877936: find_busiest_group: avg_load: 0 group_load: 8241 power: 1

after:
bash-1689  [001]   137.862151: find_busiest_group: avg_load: 10636288 group_load: 10387 power: 1

[ v2: build fix from From: Andreas Herrmann ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <20090901083826.425896304@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  10 +----
 kernel/sched.c        | 101 +++++++++++++++++---------------------------------
 2 files changed, 36 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index c67ddf309c84..3b7f43e3b736 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -860,15 +860,9 @@ struct sched_group {
 
 	/*
 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
-	 * single CPU. This is read only (except for setup, hotplug CPU).
-	 * Note : Never change cpu_power without recompute its reciprocal
+	 * single CPU.
 	 */
-	unsigned int __cpu_power;
-	/*
-	 * reciprocal value of cpu_power to avoid expensive divides
-	 * (see include/linux/reciprocal_div.h)
-	 */
-	u32 reciprocal_cpu_power;
+	unsigned int cpu_power;
 
 	/*
 	 * The CPUs this group covers.
diff --git a/kernel/sched.c b/kernel/sched.c
index e1ebf9b00f5d..b53785346850 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -64,7 +64,6 @@
 #include <linux/tsacct_kern.h>
 #include <linux/kprobes.h>
 #include <linux/delayacct.h>
-#include <linux/reciprocal_div.h>
 #include <linux/unistd.h>
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
@@ -120,30 +119,8 @@
  */
 #define RUNTIME_INF	((u64)~0ULL)
 
-#ifdef CONFIG_SMP
-
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
-/*
- * Divide a load by a sched group cpu_power : (load / sg->__cpu_power)
- * Since cpu_power is a 'constant', we can use a reciprocal divide.
- */
-static inline u32 sg_div_cpu_power(const struct sched_group *sg, u32 load)
-{
-	return reciprocal_divide(load, sg->reciprocal_cpu_power);
-}
-
-/*
- * Each time a sched group cpu_power is changed,
- * we must compute its reciprocal value
- */
-static inline void sg_inc_cpu_power(struct sched_group *sg, u32 val)
-{
-	sg->__cpu_power += val;
-	sg->reciprocal_cpu_power = reciprocal_value(sg->__cpu_power);
-}
-#endif
-
 static inline int rt_policy(int policy)
 {
 	if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
@@ -2335,8 +2312,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 		}
 
 		/* Adjust by relative CPU power of the group */
-		avg_load = sg_div_cpu_power(group,
-				avg_load * SCHED_LOAD_SCALE);
+		avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power;
 
 		if (local_group) {
 			this_load = avg_load;
@@ -3768,7 +3744,6 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	unsigned long weight = cpumask_weight(sched_domain_span(sd));
 	unsigned long power = SCHED_LOAD_SCALE;
 	struct sched_group *sdg = sd->groups;
-	unsigned long old = sdg->__cpu_power;
 
 	/* here we could scale based on cpufreq */
 
@@ -3783,33 +3758,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu)
 	if (!power)
 		power = 1;
 
-	if (power != old) {
-		sdg->__cpu_power = power;
-		sdg->reciprocal_cpu_power = reciprocal_value(power);
-	}
+	sdg->cpu_power = power;
 }
 
 static void update_group_power(struct sched_domain *sd, int cpu)
 {
 	struct sched_domain *child = sd->child;
 	struct sched_group *group, *sdg = sd->groups;
-	unsigned long power = sdg->__cpu_power;
 
 	if (!child) {
 		update_cpu_power(sd, cpu);
 		return;
 	}
 
-	sdg->__cpu_power = 0;
+	sdg->cpu_power = 0;
 
 	group = child->groups;
 	do {
-		sdg->__cpu_power += group->__cpu_power;
+		sdg->cpu_power += group->cpu_power;
 		group = group->next;
 	} while (group != child->groups);
-
-	if (power != sdg->__cpu_power)
-		sdg->reciprocal_cpu_power = reciprocal_value(sdg->__cpu_power);
 }
 
 /**
@@ -3889,8 +3857,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	}
 
 	/* Adjust by relative CPU power of the group */
-	sgs->avg_load = sg_div_cpu_power(group,
-			sgs->group_load * SCHED_LOAD_SCALE);
+	sgs->avg_load = (sgs->group_load * SCHED_LOAD_SCALE) / group->cpu_power;
 
 
 	/*
@@ -3902,14 +3869,14 @@ static inline void update_sg_lb_stats(struct sched_domain *sd,
 	 *      normalized nr_running number somewhere that negates
 	 *      the hierarchy?
 	 */
-	avg_load_per_task = sg_div_cpu_power(group,
-			sum_avg_load_per_task * SCHED_LOAD_SCALE);
+	avg_load_per_task = (sum_avg_load_per_task * SCHED_LOAD_SCALE) /
+		group->cpu_power;
 
 	if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
 		sgs->group_imb = 1;
 
 	sgs->group_capacity =
-		DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE);
+		DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE);
 }
 
 /**
@@ -3951,7 +3918,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
 			return;
 
 		sds->total_load += sgs.group_load;
-		sds->total_pwr += group->__cpu_power;
+		sds->total_pwr += group->cpu_power;
 
 		/*
 		 * In case the child domain prefers tasks go to siblings
@@ -4016,28 +3983,28 @@ static inline void fix_small_imbalance(struct sd_lb_stats *sds,
 	 * moving them.
 	 */
 
-	pwr_now += sds->busiest->__cpu_power *
+	pwr_now += sds->busiest->cpu_power *
 			min(sds->busiest_load_per_task, sds->max_load);
-	pwr_now += sds->this->__cpu_power *
+	pwr_now += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load);
 	pwr_now /= SCHED_LOAD_SCALE;
 
 	/* Amount of load we'd subtract */
-	tmp = sg_div_cpu_power(sds->busiest,
-			sds->busiest_load_per_task * SCHED_LOAD_SCALE);
+	tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+		sds->busiest->cpu_power;
 	if (sds->max_load > tmp)
-		pwr_move += sds->busiest->__cpu_power *
+		pwr_move += sds->busiest->cpu_power *
 			min(sds->busiest_load_per_task, sds->max_load - tmp);
 
 	/* Amount of load we'd add */
-	if (sds->max_load * sds->busiest->__cpu_power <
+	if (sds->max_load * sds->busiest->cpu_power <
 		sds->busiest_load_per_task * SCHED_LOAD_SCALE)
-		tmp = sg_div_cpu_power(sds->this,
-			sds->max_load * sds->busiest->__cpu_power);
+		tmp = (sds->max_load * sds->busiest->cpu_power) /
+			sds->this->cpu_power;
 	else
-		tmp = sg_div_cpu_power(sds->this,
-			sds->busiest_load_per_task * SCHED_LOAD_SCALE);
-	pwr_move += sds->this->__cpu_power *
+		tmp = (sds->busiest_load_per_task * SCHED_LOAD_SCALE) /
+			sds->this->cpu_power;
+	pwr_move += sds->this->cpu_power *
 			min(sds->this_load_per_task, sds->this_load + tmp);
 	pwr_move /= SCHED_LOAD_SCALE;
 
@@ -4072,8 +4039,8 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
 			sds->max_load - sds->busiest_load_per_task);
 
 	/* How much load to actually move to equalise the imbalance */
-	*imbalance = min(max_pull * sds->busiest->__cpu_power,
-		(sds->avg_load - sds->this_load) * sds->this->__cpu_power)
+	*imbalance = min(max_pull * sds->busiest->cpu_power,
+		(sds->avg_load - sds->this_load) * sds->this->cpu_power)
 			/ SCHED_LOAD_SCALE;
 
 	/*
@@ -4208,7 +4175,7 @@ static unsigned long power_of(int cpu)
 	if (!group)
 		return SCHED_LOAD_SCALE;
 
-	return group->__cpu_power;
+	return group->cpu_power;
 }
 
 /*
@@ -7922,7 +7889,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 			break;
 		}
 
-		if (!group->__cpu_power) {
+		if (!group->cpu_power) {
 			printk(KERN_CONT "\n");
 			printk(KERN_ERR "ERROR: domain->cpu_power not "
 					"set\n");
@@ -7946,9 +7913,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
 		cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
 
 		printk(KERN_CONT " %s", str);
-		if (group->__cpu_power != SCHED_LOAD_SCALE) {
-			printk(KERN_CONT " (__cpu_power = %d)",
-				group->__cpu_power);
+		if (group->cpu_power != SCHED_LOAD_SCALE) {
+			printk(KERN_CONT " (cpu_power = %d)",
+				group->cpu_power);
 		}
 
 		group = group->next;
@@ -8233,7 +8200,7 @@ init_sched_build_groups(const struct cpumask *span,
 			continue;
 
 		cpumask_clear(sched_group_cpus(sg));
-		sg->__cpu_power = 0;
+		sg->cpu_power = 0;
 
 		for_each_cpu(j, span) {
 			if (group_fn(j, cpu_map, NULL, tmpmask) != group)
@@ -8491,7 +8458,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
 				continue;
 			}
 
-			sg_inc_cpu_power(sg, sd->groups->__cpu_power);
+			sg->cpu_power += sd->groups->cpu_power;
 		}
 		sg = sg->next;
 	} while (sg != group_head);
@@ -8528,7 +8495,7 @@ static int build_numa_sched_groups(struct s_data *d,
 		sd->groups = sg;
 	}
 
-	sg->__cpu_power = 0;
+	sg->cpu_power = 0;
 	cpumask_copy(sched_group_cpus(sg), d->nodemask);
 	sg->next = sg;
 	cpumask_or(d->covered, d->covered, d->nodemask);
@@ -8551,7 +8518,7 @@ static int build_numa_sched_groups(struct s_data *d,
 			       "Can not alloc domain group for node %d\n", j);
 			return -ENOMEM;
 		}
-		sg->__cpu_power = 0;
+		sg->cpu_power = 0;
 		cpumask_copy(sched_group_cpus(sg), d->tmpmask);
 		sg->next = prev->next;
 		cpumask_or(d->covered, d->covered, d->tmpmask);
@@ -8629,7 +8596,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
 	child = sd->child;
 
-	sd->groups->__cpu_power = 0;
+	sd->groups->cpu_power = 0;
 
 	if (!child) {
 		power = SCHED_LOAD_SCALE;
@@ -8645,7 +8612,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 			power /= weight;
 			power >>= SCHED_LOAD_SHIFT;
 		}
-		sg_inc_cpu_power(sd->groups, power);
+		sd->groups->cpu_power += power;
 		return;
 	}
 
@@ -8654,7 +8621,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	 */
 	group = child->groups;
 	do {
-		sg_inc_cpu_power(sd->groups, group->__cpu_power);
+		sd->groups->cpu_power += group->cpu_power;
 		group = group->next;
 	} while (group != child->groups);
 }
-- 
cgit v1.2.3


From 652696efce135559b98ee5a3d7899295e8d553fa Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kmpark@infradead.org>
Date: Wed, 24 Jun 2009 12:03:51 +0900
Subject: mtd: OneNAND: 4-bit ECC status macros

Define ECC status for 4-bit ECC status

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/onenand_regs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/mtd/onenand_regs.h b/include/linux/mtd/onenand_regs.h
index 86a6bbef6465..acadbf53a69f 100644
--- a/include/linux/mtd/onenand_regs.h
+++ b/include/linux/mtd/onenand_regs.h
@@ -207,6 +207,9 @@
 #define ONENAND_ECC_2BIT		(1 << 1)
 #define ONENAND_ECC_2BIT_ALL		(0xAAAA)
 #define FLEXONENAND_UNCORRECTABLE_ERROR	(0x1010)
+#define ONENAND_ECC_3BIT		(1 << 2)
+#define ONENAND_ECC_4BIT		(1 << 3)
+#define ONENAND_ECC_4BIT_UNCORRECTABLE	(0x1010)
 
 /*
  * One-Time Programmable (OTP)
-- 
cgit v1.2.3


From 39e3ab6fded4d7e66e01a878f0cae23ddd480c3b Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Tue, 1 Sep 2009 05:26:12 +0000
Subject: can: add can_free_echo_skb() for upcoming drivers

This patch adds the function can_free_echo_skb to the CAN
device interface to allow upcoming drivers to release echo
skb's in case of error.

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/dev.c   | 18 +++++++++++++++++-
 include/linux/can/dev.h |  1 +
 2 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 1d29082d94ac..f0b9a1e1db46 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -315,13 +315,29 @@ void can_get_echo_skb(struct net_device *dev, int idx)
 {
 	struct can_priv *priv = netdev_priv(dev);
 
-	if ((dev->flags & IFF_ECHO) && priv->echo_skb[idx]) {
+	if (priv->echo_skb[idx]) {
 		netif_rx(priv->echo_skb[idx]);
 		priv->echo_skb[idx] = NULL;
 	}
 }
 EXPORT_SYMBOL_GPL(can_get_echo_skb);
 
+/*
+  * Remove the skb from the stack and free it.
+  *
+  * The function is typically called when TX failed.
+  */
+void can_free_echo_skb(struct net_device *dev, int idx)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if (priv->echo_skb[idx]) {
+		kfree_skb(priv->echo_skb[idx]);
+		priv->echo_skb[idx] = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(can_free_echo_skb);
+
 /*
  * CAN device restart for bus-off recovery
  */
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 4a37a56f6cdd..5824b20b5fcb 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -66,5 +66,6 @@ void can_bus_off(struct net_device *dev);
 
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev, int idx);
 void can_get_echo_skb(struct net_device *dev, int idx);
+void can_free_echo_skb(struct net_device *dev, int idx);
 
 #endif /* CAN_DEV_H */
-- 
cgit v1.2.3


From 47734f89be0614b5acbd6a532390f9c72f019648 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 4 Sep 2009 11:21:24 +0200
Subject: sched: Clean up topology.h

Re-organize the flag settings so that it's visible at a glance
which sched-domains flags are set and which not.

With the new balancer code we'll need to re-tune these details
anyway, so make it cleaner to make fewer mistakes down the
road ;-)

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h |  47 ++++++-----
 include/linux/topology.h        | 169 ++++++++++++++++++++++++----------------
 2 files changed, 129 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 066ef590d7e0..be29eb81fb06 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -129,25 +129,34 @@ extern unsigned long node_remap_size[];
 #endif
 
 /* sched_domains SD_NODE_INIT for NUMA machines */
-#define SD_NODE_INIT (struct sched_domain) {		\
-	.min_interval		= 8,			\
-	.max_interval		= 32,			\
-	.busy_factor		= 32,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= SD_CACHE_NICE_TRIES,	\
-	.busy_idx		= 3,			\
-	.idle_idx		= SD_IDLE_IDX,		\
-	.newidle_idx		= SD_NEWIDLE_IDX,	\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= SD_FORKEXEC_IDX,	\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE	\
-				| SD_SERIALIZE,		\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
+#define SD_NODE_INIT (struct sched_domain) {				\
+	.min_interval		= 8,					\
+	.max_interval		= 32,					\
+	.busy_factor		= 32,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= SD_CACHE_NICE_TRIES,			\
+	.busy_idx		= 3,					\
+	.idle_idx		= SD_IDLE_IDX,				\
+	.newidle_idx		= SD_NEWIDLE_IDX,			\
+	.wake_idx		= 1,					\
+	.forkexec_idx		= SD_FORKEXEC_IDX,			\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_BALANCE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_POWERSAVINGS_BALANCE		\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| 0*SD_PREFER_SIBLING			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
 }
 
 #ifdef CONFIG_X86_64_ACPI_NUMA
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6203ae5067ce..fe2c0329f82f 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -85,21 +85,29 @@ int arch_update_cpu_topology(void);
 #define ARCH_HAS_SCHED_WAKE_IDLE
 /* Common values for SMT siblings */
 #ifndef SD_SIBLING_INIT
-#define SD_SIBLING_INIT (struct sched_domain) {		\
-	.min_interval		= 1,			\
-	.max_interval		= 2,			\
-	.busy_factor		= 64,			\
-	.imbalance_pct		= 110,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_FORK	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE	\
-				| SD_SHARE_CPUPOWER,	\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
-	.smt_gain		= 1178,	/* 15% */	\
+#define SD_SIBLING_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 2,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 110,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_BALANCE			\
+				| 1*SD_SHARE_CPUPOWER			\
+				| 0*SD_POWERSAVINGS_BALANCE		\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| 0*SD_PREFER_SIBLING			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
+	.smt_gain		= 1178,	/* 15% */			\
 }
 #endif
 #endif /* CONFIG_SCHED_SMT */
@@ -107,69 +115,94 @@ int arch_update_cpu_topology(void);
 #ifdef CONFIG_SCHED_MC
 /* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
 #ifndef SD_MC_INIT
-#define SD_MC_INIT (struct sched_domain) {		\
-	.min_interval		= 1,			\
-	.max_interval		= 4,			\
-	.busy_factor		= 64,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 1,			\
-	.busy_idx		= 2,			\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_FORK	\
-				| SD_BALANCE_EXEC	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE	\
-				| SD_SHARE_PKG_RESOURCES\
-				| sd_balance_for_mc_power()\
-				| sd_power_saving_flags(),\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
+#define SD_MC_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.wake_idx		= 1,					\
+	.forkexec_idx		= 1,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_BALANCE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 1*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| sd_balance_for_mc_power()		\
+				| sd_power_saving_flags()		\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
 }
 #endif
 #endif /* CONFIG_SCHED_MC */
 
 /* Common values for CPUs */
 #ifndef SD_CPU_INIT
-#define SD_CPU_INIT (struct sched_domain) {		\
-	.min_interval		= 1,			\
-	.max_interval		= 4,			\
-	.busy_factor		= 64,			\
-	.imbalance_pct		= 125,			\
-	.cache_nice_tries	= 1,			\
-	.busy_idx		= 2,			\
-	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
-	.forkexec_idx		= 1,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_FORK	\
-				| SD_WAKE_AFFINE	\
-				| SD_WAKE_BALANCE	\
-				| sd_balance_for_package_power()\
-				| sd_power_saving_flags(),\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 1,			\
+#define SD_CPU_INIT (struct sched_domain) {				\
+	.min_interval		= 1,					\
+	.max_interval		= 4,					\
+	.busy_factor		= 64,					\
+	.imbalance_pct		= 125,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 2,					\
+	.idle_idx		= 1,					\
+	.newidle_idx		= 2,					\
+	.wake_idx		= 1,					\
+	.forkexec_idx		= 1,					\
+									\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_EXEC			\
+				| 1*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 0*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_BALANCE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 0*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| sd_balance_for_package_power()	\
+				| sd_power_saving_flags()		\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 1,					\
 }
 #endif
 
 /* sched_domains SD_ALLNODES_INIT for NUMA machines */
-#define SD_ALLNODES_INIT (struct sched_domain) {	\
-	.min_interval		= 64,			\
-	.max_interval		= 64*num_online_cpus(),	\
-	.busy_factor		= 128,			\
-	.imbalance_pct		= 133,			\
-	.cache_nice_tries	= 1,			\
-	.busy_idx		= 3,			\
-	.idle_idx		= 3,			\
-	.flags			= SD_LOAD_BALANCE	\
-				| SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_AFFINE	\
-				| SD_SERIALIZE,		\
-	.last_balance		= jiffies,		\
-	.balance_interval	= 64,			\
+#define SD_ALLNODES_INIT (struct sched_domain) {			\
+	.min_interval		= 64,					\
+	.max_interval		= 64*num_online_cpus(),			\
+	.busy_factor		= 128,					\
+	.imbalance_pct		= 133,					\
+	.cache_nice_tries	= 1,					\
+	.busy_idx		= 3,					\
+	.idle_idx		= 3,					\
+	.flags			= 1*SD_LOAD_BALANCE			\
+				| 1*SD_BALANCE_NEWIDLE			\
+				| 0*SD_BALANCE_EXEC			\
+				| 0*SD_BALANCE_FORK			\
+				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_WAKE_BALANCE			\
+				| 0*SD_SHARE_CPUPOWER			\
+				| 0*SD_POWERSAVINGS_BALANCE		\
+				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SERIALIZE			\
+				| 0*SD_WAKE_IDLE_FAR			\
+				| 0*SD_PREFER_SIBLING			\
+				,					\
+	.last_balance		= jiffies,				\
+	.balance_interval	= 64,					\
 }
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From 840a0653100dbde599ae8ddf83fa214dfa5fd1aa Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 4 Sep 2009 11:32:54 +0200
Subject: sched: Turn on SD_BALANCE_NEWIDLE

Start the re-tuning of the balancer by turning on newidle.

It improves hackbench performance and parallelism on a 4x4 box.
The "perf stat --repeat 10" measurements give us:

  domain0             domain1
  .......................................
 -SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
   2041.273208  task-clock-msecs         #      9.354 CPUs    ( +-   0.363% )

 +SD_BALANCE_NEWIDLE -SD_BALANCE_NEWIDLE:
   2086.326925  task-clock-msecs         #     11.934 CPUs    ( +-   0.301% )

 +SD_BALANCE_NEWIDLE +SD_BALANCE_NEWIDLE:
   2115.289791  task-clock-msecs         #     12.158 CPUs    ( +-   0.263% )

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Gautham R Shenoy <ego@in.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 2 +-
 include/linux/topology.h        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index be29eb81fb06..ef7bc7fc2528 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -142,7 +142,7 @@ extern unsigned long node_remap_size[];
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
-				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_WAKE_IDLE			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index fe2c0329f82f..66774fddec9b 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -126,7 +126,7 @@ int arch_update_cpu_topology(void);
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
-				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_WAKE_IDLE			\
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
-				| 0*SD_BALANCE_NEWIDLE			\
+				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 0*SD_WAKE_IDLE			\
-- 
cgit v1.2.3


From 8e019366ba749a536131cde1947af6dcaccf8e8f Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Thu, 27 Aug 2009 14:50:00 +0100
Subject: kmemleak: Don't scan uninitialized memory when kmemcheck is enabled

Ingo Molnar reported the following kmemcheck warning when running both
kmemleak and kmemcheck enabled:

  PM: Adding info for No Bus:vcsa7
  WARNING: kmemcheck: Caught 32-bit read from uninitialized memory
  (f6f6e1a4)
  d873f9f600000000c42ae4c1005c87f70000000070665f666978656400000000
   i i i i u u u u i i i i i i i i i i i i i i i i i i i i i u u u
           ^

  Pid: 3091, comm: kmemleak Not tainted (2.6.31-rc7-tip #1303) P4DC6
  EIP: 0060:[<c110301f>] EFLAGS: 00010006 CPU: 0
  EIP is at scan_block+0x3f/0xe0
  EAX: f40bd700 EBX: f40bd780 ECX: f16b46c0 EDX: 00000001
  ESI: f6f6e1a4 EDI: 00000000 EBP: f10f3f4c ESP: c2605fcc
   DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
  CR0: 8005003b CR2: e89a4844 CR3: 30ff1000 CR4: 000006f0
  DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
  DR6: ffff4ff0 DR7: 00000400
   [<c110313c>] scan_object+0x7c/0xf0
   [<c1103389>] kmemleak_scan+0x1d9/0x400
   [<c1103a3c>] kmemleak_scan_thread+0x4c/0xb0
   [<c10819d4>] kthread+0x74/0x80
   [<c10257db>] kernel_thread_helper+0x7/0x3c
   [<ffffffff>] 0xffffffff
  kmemleak: 515 new suspected memory leaks (see
  /sys/kernel/debug/kmemleak)
  kmemleak: 42 new suspected memory leaks (see /sys/kernel/debug/kmemleak)

The problem here is that kmemleak will scan partially initialized
objects that makes kmemcheck complain. Fix that up by skipping
uninitialized memory regions when kmemcheck is enabled.

Reported-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 arch/x86/mm/kmemcheck/kmemcheck.c | 14 ++++++++++++++
 include/linux/kmemcheck.h         |  7 +++++++
 mm/kmemleak.c                     | 12 ++++++++++--
 3 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 2c55ed098654..528bf954eb74 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -331,6 +331,20 @@ static void kmemcheck_read_strict(struct pt_regs *regs,
 	kmemcheck_shadow_set(shadow, size);
 }
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+	enum kmemcheck_shadow status;
+	void *shadow;
+
+	shadow = kmemcheck_shadow_lookup(addr);
+	if (!shadow)
+		return true;
+
+	status = kmemcheck_shadow_test(shadow, size);
+
+	return status == KMEMCHECK_SHADOW_INITIALIZED;
+}
+
 /* Access may cross page boundary */
 static void kmemcheck_read(struct pt_regs *regs,
 	unsigned long addr, unsigned int size)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7c7e84..dc2fd545db00 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -34,6 +34,8 @@ void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
 int kmemcheck_show_addr(unsigned long address);
 int kmemcheck_hide_addr(unsigned long address);
 
+bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
+
 #else
 #define kmemcheck_enabled 0
 
@@ -99,6 +101,11 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 {
 }
 
+static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
+{
+	return true;
+}
+
 #endif /* CONFIG_KMEMCHECK */
 
 /*
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 1d7645b0a97c..c494fee7a2b5 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -97,6 +97,7 @@
 #include <asm/processor.h>
 #include <asm/atomic.h>
 
+#include <linux/kmemcheck.h>
 #include <linux/kmemleak.h>
 
 /*
@@ -967,15 +968,22 @@ static void scan_block(void *_start, void *_end,
 	unsigned long *end = _end - (BYTES_PER_POINTER - 1);
 
 	for (ptr = start; ptr < end; ptr++) {
-		unsigned long flags;
-		unsigned long pointer = *ptr;
 		struct kmemleak_object *object;
+		unsigned long flags;
+		unsigned long pointer;
 
 		if (allow_resched)
 			cond_resched();
 		if (scan_should_stop())
 			break;
 
+		/* don't scan uninitialized memory */
+		if (!kmemcheck_is_obj_initialized((unsigned long)ptr,
+						  BYTES_PER_POINTER))
+			continue;
+
+		pointer = *ptr;
+
 		object = find_and_get_object(pointer, 1);
 		if (!object)
 			continue;
-- 
cgit v1.2.3


From dc892f7339af2d125478b800edb9081d6149665b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Thu, 3 Sep 2009 15:33:41 -0400
Subject: ring-buffer: remove ring_buffer_event_discard

The function ring_buffer_event_discard can be used on any item in the
ring buffer, even after the item was committed. This function provides
no safety nets and is very race prone.

An item may be safely removed from the ring buffer before it is committed
with the ring_buffer_discard_commit.

Since there are currently no users of this function, and because this
function is racey and error prone, this patch removes it altogether.

Note, removing this function also allows the counters to ignore
all discarded events (patches will follow).

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 14 --------------
 kernel/trace/ring_buffer.c  | 27 ++++++---------------------
 2 files changed, 6 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 7fca71693ae7..e061b4ecdc3a 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -74,20 +74,6 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
 	return event->time_delta;
 }
 
-/*
- * ring_buffer_event_discard can discard any event in the ring buffer.
- *   it is up to the caller to protect against a reader from
- *   consuming it or a writer from wrapping and replacing it.
- *
- * No external protection is needed if this is called before
- * the event is commited. But in that case it would be better to
- * use ring_buffer_discard_commit.
- *
- * Note, if an event that has not been committed is discarded
- * with ring_buffer_event_discard, it must still be committed.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event);
-
 /*
  * ring_buffer_discard_commit will remove an event that has not
  *   ben committed yet. If this is used, then ring_buffer_unlock_commit
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 9d939e7ca924..092fe0c8fdae 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2327,32 +2327,17 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
 		event->time_delta = 1;
 }
 
-/**
- * ring_buffer_event_discard - discard any event in the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-	rb_event_discard(event);
-}
-EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
-
 /**
  * ring_buffer_commit_discard - discard an event that has not been committed
  * @buffer: the ring buffer
  * @event: non committed event to discard
  *
- * This is similar to ring_buffer_event_discard but must only be
- * performed on an event that has not been committed yet. The difference
- * is that this will also try to free the event from the ring buffer
+ * Sometimes an event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * This function only works if it is called before the the item has been
+ * committed. It will try to free the event from the ring buffer
  * if another event has not been added behind it.
  *
  * If another event has been added behind it, it will set the event
-- 
cgit v1.2.3


From b8bdc1d0cfc488ac0d94724639f9a61b0a5a1d40 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Mon, 31 Aug 2009 06:20:12 +0200
Subject: wm97xx_battery: Use platform_data

This patch converts the wm97xx-battery driver to use platform_data
supplied by ac97 bus.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/power/wm97xx_battery.c | 32 +++++++++++++++++++++++++++-----
 include/linux/wm97xx.h         | 18 ++++++++++++++++++
 include/linux/wm97xx_batt.h    | 18 ++++--------------
 3 files changed, 49 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/power/wm97xx_battery.c b/drivers/power/wm97xx_battery.c
index 8bde92126d34..14ebd960ebe5 100644
--- a/drivers/power/wm97xx_battery.c
+++ b/drivers/power/wm97xx_battery.c
@@ -22,17 +22,19 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/gpio.h>
-#include <linux/wm97xx_batt.h>
 
 static DEFINE_MUTEX(bat_lock);
 static struct work_struct bat_work;
 struct mutex work_lock;
 static int bat_status = POWER_SUPPLY_STATUS_UNKNOWN;
-static struct wm97xx_batt_info *pdata;
+static struct wm97xx_batt_info *gpdata;
 static enum power_supply_property *prop;
 
 static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->batt_aux) * pdata->batt_mult /
 					pdata->batt_div;
@@ -40,6 +42,9 @@ static unsigned long wm97xx_read_bat(struct power_supply *bat_ps)
 
 static unsigned long wm97xx_read_temp(struct power_supply *bat_ps)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	return wm97xx_read_aux_adc(bat_ps->dev->parent->driver_data,
 					pdata->temp_aux) * pdata->temp_mult /
 					pdata->temp_div;
@@ -49,6 +54,9 @@ static int wm97xx_bat_get_property(struct power_supply *bat_ps,
 			    enum power_supply_property psp,
 			    union power_supply_propval *val)
 {
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	switch (psp) {
 	case POWER_SUPPLY_PROP_STATUS:
 		val->intval = bat_status;
@@ -97,6 +105,8 @@ static void wm97xx_bat_external_power_changed(struct power_supply *bat_ps)
 static void wm97xx_bat_update(struct power_supply *bat_ps)
 {
 	int old_status = bat_status;
+	struct wm97xx_pdata *wmdata = bat_ps->dev->parent->platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
 
 	mutex_lock(&work_lock);
 
@@ -149,6 +159,15 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	int ret = 0;
 	int props = 1;	/* POWER_SUPPLY_PROP_PRESENT */
 	int i = 0;
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata;
+
+	if (gpdata) {
+		dev_err(&dev->dev, "Do not pass platform_data through "
+			"wm97xx_bat_set_pdata!\n");
+		return -EINVAL;
+	} else
+		pdata = wmdata->batt_pdata;
 
 	if (dev->id != -1)
 		return -EINVAL;
@@ -156,7 +175,7 @@ static int __devinit wm97xx_bat_probe(struct platform_device *dev)
 	mutex_init(&work_lock);
 
 	if (!pdata) {
-		dev_err(&dev->dev, "Please use wm97xx_bat_set_pdata\n");
+		dev_err(&dev->dev, "No platform_data supplied\n");
 		return -EINVAL;
 	}
 
@@ -229,6 +248,9 @@ err:
 
 static int __devexit wm97xx_bat_remove(struct platform_device *dev)
 {
+	struct wm97xx_pdata *wmdata = dev->dev.platform_data;
+	struct wm97xx_batt_pdata *pdata = wmdata->batt_pdata;
+
 	if (pdata && pdata->charge_gpio && pdata->charge_gpio >= 0)
 		gpio_free(pdata->charge_gpio);
 	flush_scheduled_work();
@@ -258,9 +280,9 @@ static void __exit wm97xx_bat_exit(void)
 	platform_driver_unregister(&wm97xx_bat_driver);
 }
 
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data)
 {
-	pdata = data;
+	gpdata = data;
 }
 EXPORT_SYMBOL_GPL(wm97xx_bat_set_pdata);
 
diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h
index 6f69968eab24..b2c2297844e3 100644
--- a/include/linux/wm97xx.h
+++ b/include/linux/wm97xx.h
@@ -286,6 +286,24 @@ struct wm97xx {
 	u16 suspend_mode;               /* PRP in suspend mode */
 };
 
+struct wm97xx_batt_pdata {
+	int	batt_aux;
+	int	temp_aux;
+	int	charge_gpio;
+	int	min_voltage;
+	int	max_voltage;
+	int	batt_div;
+	int	batt_mult;
+	int	temp_div;
+	int	temp_mult;
+	int	batt_tech;
+	char	*batt_name;
+};
+
+struct wm97xx_pdata {
+	struct wm97xx_batt_pdata	*batt_pdata;	/* battery data */
+};
+
 /*
  * Codec GPIO access (not supported on WM9705)
  * This can be used to set/get codec GPIO and Virtual GPIO status.
diff --git a/include/linux/wm97xx_batt.h b/include/linux/wm97xx_batt.h
index 9681d1ab0e4f..a1d6419c2ff8 100644
--- a/include/linux/wm97xx_batt.h
+++ b/include/linux/wm97xx_batt.h
@@ -3,22 +3,12 @@
 
 #include <linux/wm97xx.h>
 
-struct wm97xx_batt_info {
-	int	batt_aux;
-	int	temp_aux;
-	int	charge_gpio;
-	int	min_voltage;
-	int	max_voltage;
-	int	batt_div;
-	int	batt_mult;
-	int	temp_div;
-	int	temp_mult;
-	int	batt_tech;
-	char	*batt_name;
-};
+#warning This file will be removed soon, use wm97xx.h instead!
+
+#define wm97xx_batt_info wm97xx_batt_pdata
 
 #ifdef CONFIG_BATTERY_WM97XX
-void __init wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
+void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data);
 #else
 static inline void wm97xx_bat_set_pdata(struct wm97xx_batt_info *data) {}
 #endif
-- 
cgit v1.2.3


From 3961f7c3cf247eee5df7fabadc7a40f2deeb98f3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 10 Aug 2009 17:43:53 +0100
Subject: power_supply: Add driver for the PMU on WM831x PMICs

The WM831x PMICs provide power path management from three sources:
a wall supply, USB and a battery with integrated charger. They also
provide an additional backup supply with integrated for maintaining
always on functionality such as the RTC and monitoring of power
switches.

After some initial configuration at startup the device operates
autonomously, the driver simply provides reporting of the current
state.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
---
 drivers/power/Kconfig          |   7 +
 drivers/power/Makefile         |   1 +
 drivers/power/wm831x_power.c   | 779 +++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/pmu.h | 189 ++++++++++
 4 files changed, 976 insertions(+)
 create mode 100644 drivers/power/wm831x_power.c
 create mode 100644 include/linux/mfd/wm831x/pmu.h

(limited to 'include')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index bdbc4f73fcdc..cea6cef27e89 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -29,6 +29,13 @@ config APM_POWER
 	  Say Y here to enable support APM status emulation using
 	  battery class devices.
 
+config WM831X_POWER
+	tristate "WM831X PMU support"
+	depends on MFD_WM831X
+	help
+	  Say Y here to enable support for the power management unit
+	  provided by Wolfson Microelectronics WM831x PMICs.
+
 config WM8350_POWER
         tristate "WM8350 PMU support"
         depends on MFD_WM8350
diff --git a/drivers/power/Makefile b/drivers/power/Makefile
index 380d17c9ae29..b96f29d91c28 100644
--- a/drivers/power/Makefile
+++ b/drivers/power/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_POWER_SUPPLY)	+= power_supply.o
 
 obj-$(CONFIG_PDA_POWER)		+= pda_power.o
 obj-$(CONFIG_APM_POWER)		+= apm_power.o
+obj-$(CONFIG_WM831X_POWER)	+= wm831x_power.o
 obj-$(CONFIG_WM8350_POWER)	+= wm8350_power.o
 
 obj-$(CONFIG_BATTERY_DS2760)	+= ds2760_battery.o
diff --git a/drivers/power/wm831x_power.c b/drivers/power/wm831x_power.c
new file mode 100644
index 000000000000..2a4c8b0b829c
--- /dev/null
+++ b/drivers/power/wm831x_power.c
@@ -0,0 +1,779 @@
+/*
+ * PMU driver for Wolfson Microelectronics wm831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/power_supply.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/auxadc.h>
+#include <linux/mfd/wm831x/pmu.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+struct wm831x_power {
+	struct wm831x *wm831x;
+	struct power_supply wall;
+	struct power_supply backup;
+	struct power_supply usb;
+	struct power_supply battery;
+};
+
+static int wm831x_power_check_online(struct wm831x *wm831x, int supply,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & supply)
+		val->intval = 1;
+	else
+		val->intval = 0;
+
+	return 0;
+}
+
+static int wm831x_power_read_voltage(struct wm831x *wm831x,
+				     enum wm831x_auxadc src,
+				     union power_supply_propval *val)
+{
+	int ret;
+
+	ret = wm831x_auxadc_read_uv(wm831x, src);
+	if (ret >= 0)
+		val->intval = ret;
+
+	return ret;
+}
+
+/*********************************************************************
+ *		WALL Power
+ *********************************************************************/
+static int wm831x_wall_get_prop(struct power_supply *psy,
+				enum power_supply_property psp,
+				union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_WALL, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_WALL, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_wall_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		USB Power
+ *********************************************************************/
+static int wm831x_usb_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_USB, val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_USB, val);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_usb_props[] = {
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+};
+
+/*********************************************************************
+ *		Battery properties
+ *********************************************************************/
+
+struct chg_map {
+	int val;
+	int reg_val;
+};
+
+static struct chg_map trickle_ilims[] = {
+	{  50, 0 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 100, 1 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 150, 2 << WM831X_CHG_TRKL_ILIM_SHIFT },
+	{ 200, 3 << WM831X_CHG_TRKL_ILIM_SHIFT },
+};
+
+static struct chg_map vsels[] = {
+	{ 4050, 0 << WM831X_CHG_VSEL_SHIFT },
+	{ 4100, 1 << WM831X_CHG_VSEL_SHIFT },
+	{ 4150, 2 << WM831X_CHG_VSEL_SHIFT },
+	{ 4200, 3 << WM831X_CHG_VSEL_SHIFT },
+};
+
+static struct chg_map fast_ilims[] = {
+	{    0,  0 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{   50,  1 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  100,  2 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  150,  3 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  200,  4 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  250,  5 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  300,  6 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  350,  7 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  400,  8 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  450,  9 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  500, 10 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  600, 11 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  700, 12 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  800, 13 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{  900, 14 << WM831X_CHG_FAST_ILIM_SHIFT },
+	{ 1000, 15 << WM831X_CHG_FAST_ILIM_SHIFT },
+};
+
+static struct chg_map eoc_iterms[] = {
+	{ 20, 0 << WM831X_CHG_ITERM_SHIFT },
+	{ 30, 1 << WM831X_CHG_ITERM_SHIFT },
+	{ 40, 2 << WM831X_CHG_ITERM_SHIFT },
+	{ 50, 3 << WM831X_CHG_ITERM_SHIFT },
+	{ 60, 4 << WM831X_CHG_ITERM_SHIFT },
+	{ 70, 5 << WM831X_CHG_ITERM_SHIFT },
+	{ 80, 6 << WM831X_CHG_ITERM_SHIFT },
+	{ 90, 7 << WM831X_CHG_ITERM_SHIFT },
+};
+
+static struct chg_map chg_times[] = {
+	{  60,  0 << WM831X_CHG_TIME_SHIFT },
+	{  90,  1 << WM831X_CHG_TIME_SHIFT },
+	{ 120,  2 << WM831X_CHG_TIME_SHIFT },
+	{ 150,  3 << WM831X_CHG_TIME_SHIFT },
+	{ 180,  4 << WM831X_CHG_TIME_SHIFT },
+	{ 210,  5 << WM831X_CHG_TIME_SHIFT },
+	{ 240,  6 << WM831X_CHG_TIME_SHIFT },
+	{ 270,  7 << WM831X_CHG_TIME_SHIFT },
+	{ 300,  8 << WM831X_CHG_TIME_SHIFT },
+	{ 330,  9 << WM831X_CHG_TIME_SHIFT },
+	{ 360, 10 << WM831X_CHG_TIME_SHIFT },
+	{ 390, 11 << WM831X_CHG_TIME_SHIFT },
+	{ 420, 12 << WM831X_CHG_TIME_SHIFT },
+	{ 450, 13 << WM831X_CHG_TIME_SHIFT },
+	{ 480, 14 << WM831X_CHG_TIME_SHIFT },
+	{ 510, 15 << WM831X_CHG_TIME_SHIFT },
+};
+
+static void wm831x_battey_apply_config(struct wm831x *wm831x,
+				       struct chg_map *map, int count, int val,
+				       int *reg, const char *name,
+				       const char *units)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (val == map[i].val)
+			break;
+	if (i == count) {
+		dev_err(wm831x->dev, "Invalid %s %d%s\n",
+			name, val, units);
+	} else {
+		*reg |= map[i].reg_val;
+		dev_dbg(wm831x->dev, "Set %s of %d%s\n", name, val, units);
+	}
+}
+
+static void wm831x_config_battery(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_battery_pdata *pdata;
+	int ret, reg1, reg2;
+
+	if (!wm831x_pdata || !wm831x_pdata->battery) {
+		dev_warn(wm831x->dev,
+			 "No battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->battery;
+
+	reg1 = 0;
+	reg2 = 0;
+
+	if (!pdata->enable) {
+		dev_info(wm831x->dev, "Battery charger disabled\n");
+		return;
+	}
+
+	reg1 |= WM831X_CHG_ENA;
+	if (pdata->off_mask)
+		reg2 |= WM831X_CHG_OFF_MSK;
+	if (pdata->fast_enable)
+		reg1 |= WM831X_CHG_FAST;
+
+	wm831x_battey_apply_config(wm831x, trickle_ilims,
+				   ARRAY_SIZE(trickle_ilims),
+				   pdata->trickle_ilim, &reg2,
+				   "trickle charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, vsels, ARRAY_SIZE(vsels),
+				   pdata->vsel, &reg2,
+				   "target voltage", "mV");
+
+	wm831x_battey_apply_config(wm831x, fast_ilims, ARRAY_SIZE(fast_ilims),
+				   pdata->fast_ilim, &reg2,
+				   "fast charge current limit", "mA");
+
+	wm831x_battey_apply_config(wm831x, eoc_iterms, ARRAY_SIZE(eoc_iterms),
+				   pdata->eoc_iterm, &reg1,
+				   "end of charge current threshold", "mA");
+
+	wm831x_battey_apply_config(wm831x, chg_times, ARRAY_SIZE(chg_times),
+				   pdata->timeout, &reg2,
+				   "charger timeout", "min");
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_1,
+			      WM831X_CHG_ENA_MASK |
+			      WM831X_CHG_FAST_MASK |
+			      WM831X_CHG_ITERM_MASK |
+			      WM831X_CHG_ITERM_MASK,
+			      reg1);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 1: %d\n",
+			ret);
+
+	ret = wm831x_set_bits(wm831x, WM831X_CHARGER_CONTROL_2,
+			      WM831X_CHG_OFF_MSK |
+			      WM831X_CHG_TIME_MASK |
+			      WM831X_CHG_FAST_ILIM_MASK |
+			      WM831X_CHG_TRKL_ILIM_MASK |
+			      WM831X_CHG_VSEL_MASK,
+			      reg2);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Failed to set charger control 2: %d\n",
+			ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_bat_check_status(struct wm831x *wm831x, int *status)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_SYSTEM_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_PWR_SRC_BATT) {
+		*status = POWER_SUPPLY_STATUS_DISCHARGING;
+		return 0;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_OFF:
+		*status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_FAST:
+		*status = POWER_SUPPLY_STATUS_CHARGING;
+		break;
+
+	default:
+		*status = POWER_SUPPLY_STATUS_UNKNOWN;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_type(struct wm831x *wm831x, int *type)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE:
+	case WM831X_CHG_STATE_TRICKLE_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_TRICKLE;
+		break;
+	case WM831X_CHG_STATE_FAST:
+	case WM831X_CHG_STATE_FAST_OT:
+		*type = POWER_SUPPLY_CHARGE_TYPE_FAST;
+		break;
+	default:
+		*type = POWER_SUPPLY_CHARGE_TYPE_NONE;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_check_health(struct wm831x *wm831x, int *health)
+{
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_CHARGER_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & WM831X_BATT_HOT_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_COLD_STS) {
+		*health = POWER_SUPPLY_HEALTH_COLD;
+		return 0;
+	}
+
+	if (ret & WM831X_BATT_OV_STS) {
+		*health = POWER_SUPPLY_HEALTH_OVERVOLTAGE;
+		return 0;
+	}
+
+	switch (ret & WM831X_CHG_STATE_MASK) {
+	case WM831X_CHG_STATE_TRICKLE_OT:
+	case WM831X_CHG_STATE_FAST_OT:
+		*health = POWER_SUPPLY_HEALTH_OVERHEAT;
+		break;
+	case WM831X_CHG_STATE_DEFECTIVE:
+		*health = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
+		break;
+	default:
+		*health = POWER_SUPPLY_HEALTH_GOOD;
+		break;
+	}
+
+	return 0;
+}
+
+static int wm831x_bat_get_prop(struct power_supply *psy,
+			       enum power_supply_property psp,
+			       union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		ret = wm831x_bat_check_status(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_ONLINE:
+		ret = wm831x_power_check_online(wm831x, WM831X_PWR_SRC_BATT,
+						val);
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BATT, val);
+		break;
+	case POWER_SUPPLY_PROP_HEALTH:
+		ret = wm831x_bat_check_health(wm831x, &val->intval);
+		break;
+	case POWER_SUPPLY_PROP_CHARGE_TYPE:
+		ret = wm831x_bat_check_type(wm831x, &val->intval);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_bat_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_ONLINE,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CHARGE_TYPE,
+};
+
+static const char *wm831x_bat_irqs[] = {
+	"BATT HOT",
+	"BATT COLD",
+	"BATT FAIL",
+	"OV",
+	"END",
+	"TO",
+	"MODE",
+	"START",
+};
+
+static irqreturn_t wm831x_bat_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Battery status changed: %d\n", irq);
+
+	/* The battery charger is autonomous so we don't need to do
+	 * anything except kick user space */
+	power_supply_changed(&wm831x_power->battery);
+
+	return IRQ_HANDLED;
+}
+
+
+/*********************************************************************
+ *		Backup supply properties
+ *********************************************************************/
+
+static void wm831x_config_backup(struct wm831x *wm831x)
+{
+	struct wm831x_pdata *wm831x_pdata = wm831x->dev->platform_data;
+	struct wm831x_backup_pdata *pdata;
+	int ret, reg;
+
+	if (!wm831x_pdata || !wm831x_pdata->backup) {
+		dev_warn(wm831x->dev,
+			 "No backup battery charger configuration\n");
+		return;
+	}
+
+	pdata = wm831x_pdata->backup;
+
+	reg = 0;
+
+	if (pdata->charger_enable)
+		reg |= WM831X_BKUP_CHG_ENA | WM831X_BKUP_BATT_DET_ENA;
+	if (pdata->no_constant_voltage)
+		reg |= WM831X_BKUP_CHG_MODE;
+
+	switch (pdata->vlim) {
+	case 2500:
+		break;
+	case 3100:
+		reg |= WM831X_BKUP_CHG_VLIM;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup voltage limit %dmV\n",
+			pdata->vlim);
+	}
+
+	switch (pdata->ilim) {
+	case 100:
+		break;
+	case 200:
+		reg |= 1;
+		break;
+	case 300:
+		reg |= 2;
+		break;
+	case 400:
+		reg |= 3;
+		break;
+	default:
+		dev_err(wm831x->dev, "Invalid backup current limit %duA\n",
+			pdata->ilim);
+	}
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to unlock registers: %d\n", ret);
+		return;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_BACKUP_CHARGER_CONTROL,
+			      WM831X_BKUP_CHG_ENA_MASK |
+			      WM831X_BKUP_CHG_MODE_MASK |
+			      WM831X_BKUP_BATT_DET_ENA_MASK |
+			      WM831X_BKUP_CHG_VLIM_MASK |
+			      WM831X_BKUP_CHG_ILIM_MASK,
+			      reg);
+	if (ret != 0)
+		dev_err(wm831x->dev,
+			"Failed to set backup charger config: %d\n", ret);
+
+	wm831x_reg_lock(wm831x);
+}
+
+static int wm831x_backup_get_prop(struct power_supply *psy,
+				  enum power_supply_property psp,
+				  union power_supply_propval *val)
+{
+	struct wm831x_power *wm831x_power = dev_get_drvdata(psy->dev->parent);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int ret = 0;
+
+	ret = wm831x_reg_read(wm831x, WM831X_BACKUP_CHARGER_CONTROL);
+	if (ret < 0)
+		return ret;
+
+	switch (psp) {
+	case POWER_SUPPLY_PROP_STATUS:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		else
+			val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		break;
+
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
+		ret = wm831x_power_read_voltage(wm831x, WM831X_AUX_BKUP_BATT,
+						val);
+		break;
+
+	case POWER_SUPPLY_PROP_PRESENT:
+		if (ret & WM831X_BKUP_CHG_STS)
+			val->intval = 1;
+		else
+			val->intval = 0;
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static enum power_supply_property wm831x_backup_props[] = {
+	POWER_SUPPLY_PROP_STATUS,
+	POWER_SUPPLY_PROP_VOLTAGE_NOW,
+	POWER_SUPPLY_PROP_PRESENT,
+};
+
+/*********************************************************************
+ *		Initialisation
+ *********************************************************************/
+
+static irqreturn_t wm831x_syslo_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	/* Not much we can actually *do* but tell people for
+	 * posterity, we're probably about to run out of power. */
+	dev_crit(wm831x->dev, "SYSVDD under voltage\n");
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t wm831x_pwr_src_irq(int irq, void *data)
+{
+	struct wm831x_power *wm831x_power = data;
+	struct wm831x *wm831x = wm831x_power->wm831x;
+
+	dev_dbg(wm831x->dev, "Power source changed\n");
+
+	/* Just notify for everything - little harm in overnotifying.
+	 * The backup battery is not a power source while the system
+	 * is running so skip that.
+	 */
+	power_supply_changed(&wm831x_power->battery);
+	power_supply_changed(&wm831x_power->usb);
+	power_supply_changed(&wm831x_power->wall);
+
+	return IRQ_HANDLED;
+}
+
+static __devinit int wm831x_power_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_power *power;
+	struct power_supply *usb;
+	struct power_supply *battery;
+	struct power_supply *wall;
+	struct power_supply *backup;
+	int ret, irq, i;
+
+	power = kzalloc(sizeof(struct wm831x_power), GFP_KERNEL);
+	if (power == NULL)
+		return -ENOMEM;
+
+	power->wm831x = wm831x;
+	platform_set_drvdata(pdev, power);
+
+	usb = &power->usb;
+	battery = &power->battery;
+	wall = &power->wall;
+	backup = &power->backup;
+
+	/* We ignore configuration failures since we can still read back
+	 * the status without enabling either of the chargers.
+	 */
+	wm831x_config_battery(wm831x);
+	wm831x_config_backup(wm831x);
+
+	wall->name = "wm831x-wall";
+	wall->type = POWER_SUPPLY_TYPE_MAINS;
+	wall->properties = wm831x_wall_props;
+	wall->num_properties = ARRAY_SIZE(wm831x_wall_props);
+	wall->get_property = wm831x_wall_get_prop;
+	ret = power_supply_register(&pdev->dev, wall);
+	if (ret)
+		goto err_kmalloc;
+
+	battery->name = "wm831x-battery";
+	battery->properties = wm831x_bat_props;
+	battery->num_properties = ARRAY_SIZE(wm831x_bat_props);
+	battery->get_property = wm831x_bat_get_prop;
+	battery->use_for_apm = 1;
+	ret = power_supply_register(&pdev->dev, battery);
+	if (ret)
+		goto err_wall;
+
+	usb->name = "wm831x-usb",
+	usb->type = POWER_SUPPLY_TYPE_USB;
+	usb->properties = wm831x_usb_props;
+	usb->num_properties = ARRAY_SIZE(wm831x_usb_props);
+	usb->get_property = wm831x_usb_get_prop;
+	ret = power_supply_register(&pdev->dev, usb);
+	if (ret)
+		goto err_battery;
+
+	backup->name = "wm831x-backup";
+	backup->type = POWER_SUPPLY_TYPE_BATTERY;
+	backup->properties = wm831x_backup_props;
+	backup->num_properties = ARRAY_SIZE(wm831x_backup_props);
+	backup->get_property = wm831x_backup_get_prop;
+	ret = power_supply_register(&pdev->dev, backup);
+	if (ret)
+		goto err_usb;
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_syslo_irq,
+				 IRQF_TRIGGER_RISING, "SYSLO",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request SYSLO IRQ %d: %d\n",
+			irq, ret);
+		goto err_backup;
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_pwr_src_irq,
+				 IRQF_TRIGGER_RISING, "Power source",
+				 power);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request PWR SRC IRQ %d: %d\n",
+			irq, ret);
+		goto err_syslo;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		ret = wm831x_request_irq(wm831x, irq, wm831x_bat_irq,
+					 IRQF_TRIGGER_RISING,
+					 wm831x_bat_irqs[i],
+					 power);
+		if (ret != 0) {
+			dev_err(&pdev->dev,
+				"Failed to request %s IRQ %d: %d\n",
+				wm831x_bat_irqs[i], irq, ret);
+			goto err_bat_irq;
+		}
+	}
+
+	return ret;
+
+err_bat_irq:
+	for (; i >= 0; i--) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, power);
+	}
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, power);
+err_syslo:
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, power);
+err_backup:
+	power_supply_unregister(backup);
+err_usb:
+	power_supply_unregister(usb);
+err_battery:
+	power_supply_unregister(battery);
+err_wall:
+	power_supply_unregister(wall);
+err_kmalloc:
+	kfree(power);
+	return ret;
+}
+
+static __devexit int wm831x_power_remove(struct platform_device *pdev)
+{
+	struct wm831x_power *wm831x_power = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = wm831x_power->wm831x;
+	int irq, i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_bat_irqs); i++) {
+		irq = platform_get_irq_byname(pdev, wm831x_bat_irqs[i]);
+		wm831x_free_irq(wm831x, irq, wm831x_power);
+	}
+
+	irq = platform_get_irq_byname(pdev, "PWR SRC");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	irq = platform_get_irq_byname(pdev, "SYSLO");
+	wm831x_free_irq(wm831x, irq, wm831x_power);
+
+	power_supply_unregister(&wm831x_power->backup);
+	power_supply_unregister(&wm831x_power->battery);
+	power_supply_unregister(&wm831x_power->wall);
+	power_supply_unregister(&wm831x_power->usb);
+	return 0;
+}
+
+static struct platform_driver wm831x_power_driver = {
+	.probe = wm831x_power_probe,
+	.remove = __devexit_p(wm831x_power_remove),
+	.driver = {
+		.name = "wm831x-power",
+	},
+};
+
+static int __init wm831x_power_init(void)
+{
+	return platform_driver_register(&wm831x_power_driver);
+}
+module_init(wm831x_power_init);
+
+static void __exit wm831x_power_exit(void)
+{
+	platform_driver_unregister(&wm831x_power_driver);
+}
+module_exit(wm831x_power_exit);
+
+MODULE_DESCRIPTION("Power supply driver for WM831x PMICs");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-power");
diff --git a/include/linux/mfd/wm831x/pmu.h b/include/linux/mfd/wm831x/pmu.h
new file mode 100644
index 000000000000..b18cbb027bc3
--- /dev/null
+++ b/include/linux/mfd/wm831x/pmu.h
@@ -0,0 +1,189 @@
+/*
+ * include/linux/mfd/wm831x/pmu.h -- PMU for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_PMU_H__
+#define __MFD_WM831X_PMU_H__
+
+/*
+ * R16387 (0x4003) - Power State
+ */
+#define WM831X_CHIP_ON                          0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_MASK                     0x8000  /* CHIP_ON */
+#define WM831X_CHIP_ON_SHIFT                        15  /* CHIP_ON */
+#define WM831X_CHIP_ON_WIDTH                         1  /* CHIP_ON */
+#define WM831X_CHIP_SLP                         0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_MASK                    0x4000  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_SHIFT                       14  /* CHIP_SLP */
+#define WM831X_CHIP_SLP_WIDTH                        1  /* CHIP_SLP */
+#define WM831X_REF_LP                           0x1000  /* REF_LP */
+#define WM831X_REF_LP_MASK                      0x1000  /* REF_LP */
+#define WM831X_REF_LP_SHIFT                         12  /* REF_LP */
+#define WM831X_REF_LP_WIDTH                          1  /* REF_LP */
+#define WM831X_PWRSTATE_DLY_MASK                0x0C00  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_SHIFT                   10  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_PWRSTATE_DLY_WIDTH                    2  /* PWRSTATE_DLY - [11:10] */
+#define WM831X_SWRST_DLY                        0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_MASK                   0x0200  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_SHIFT                       9  /* SWRST_DLY */
+#define WM831X_SWRST_DLY_WIDTH                       1  /* SWRST_DLY */
+#define WM831X_USB100MA_STARTUP_MASK            0x0030  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_SHIFT                4  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB100MA_STARTUP_WIDTH                2  /* USB100MA_STARTUP - [5:4] */
+#define WM831X_USB_CURR_STS                     0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_MASK                0x0008  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_SHIFT                    3  /* USB_CURR_STS */
+#define WM831X_USB_CURR_STS_WIDTH                    1  /* USB_CURR_STS */
+#define WM831X_USB_ILIM_MASK                    0x0007  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_SHIFT                        0  /* USB_ILIM - [2:0] */
+#define WM831X_USB_ILIM_WIDTH                        3  /* USB_ILIM - [2:0] */
+
+/*
+ * R16397 (0x400D) - System Status
+ */
+#define WM831X_THW_STS                          0x8000  /* THW_STS */
+#define WM831X_THW_STS_MASK                     0x8000  /* THW_STS */
+#define WM831X_THW_STS_SHIFT                        15  /* THW_STS */
+#define WM831X_THW_STS_WIDTH                         1  /* THW_STS */
+#define WM831X_PWR_SRC_BATT                     0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_MASK                0x0400  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_SHIFT                   10  /* PWR_SRC_BATT */
+#define WM831X_PWR_SRC_BATT_WIDTH                    1  /* PWR_SRC_BATT */
+#define WM831X_PWR_WALL                         0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_MASK                    0x0200  /* PWR_WALL */
+#define WM831X_PWR_WALL_SHIFT                        9  /* PWR_WALL */
+#define WM831X_PWR_WALL_WIDTH                        1  /* PWR_WALL */
+#define WM831X_PWR_USB                          0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_MASK                     0x0100  /* PWR_USB */
+#define WM831X_PWR_USB_SHIFT                         8  /* PWR_USB */
+#define WM831X_PWR_USB_WIDTH                         1  /* PWR_USB */
+#define WM831X_MAIN_STATE_MASK                  0x001F  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_SHIFT                      0  /* MAIN_STATE - [4:0] */
+#define WM831X_MAIN_STATE_WIDTH                      5  /* MAIN_STATE - [4:0] */
+
+/*
+ * R16456 (0x4048) - Charger Control 1
+ */
+#define WM831X_CHG_ENA                          0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_MASK                     0x8000  /* CHG_ENA */
+#define WM831X_CHG_ENA_SHIFT                        15  /* CHG_ENA */
+#define WM831X_CHG_ENA_WIDTH                         1  /* CHG_ENA */
+#define WM831X_CHG_FRC                          0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_MASK                     0x4000  /* CHG_FRC */
+#define WM831X_CHG_FRC_SHIFT                        14  /* CHG_FRC */
+#define WM831X_CHG_FRC_WIDTH                         1  /* CHG_FRC */
+#define WM831X_CHG_ITERM_MASK                   0x1C00  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_SHIFT                      10  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_ITERM_WIDTH                       3  /* CHG_ITERM - [12:10] */
+#define WM831X_CHG_FAST                         0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_MASK                    0x0020  /* CHG_FAST */
+#define WM831X_CHG_FAST_SHIFT                        5  /* CHG_FAST */
+#define WM831X_CHG_FAST_WIDTH                        1  /* CHG_FAST */
+#define WM831X_CHG_IMON_ENA                     0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_MASK                0x0002  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_SHIFT                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_IMON_ENA_WIDTH                    1  /* CHG_IMON_ENA */
+#define WM831X_CHG_CHIP_TEMP_MON                0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_MASK           0x0001  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_SHIFT               0  /* CHG_CHIP_TEMP_MON */
+#define WM831X_CHG_CHIP_TEMP_MON_WIDTH               1  /* CHG_CHIP_TEMP_MON */
+
+/*
+ * R16457 (0x4049) - Charger Control 2
+ */
+#define WM831X_CHG_OFF_MSK                      0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_MASK                 0x4000  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_SHIFT                    14  /* CHG_OFF_MSK */
+#define WM831X_CHG_OFF_MSK_WIDTH                     1  /* CHG_OFF_MSK */
+#define WM831X_CHG_TIME_MASK                    0x0F00  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_SHIFT                        8  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TIME_WIDTH                        4  /* CHG_TIME - [11:8] */
+#define WM831X_CHG_TRKL_ILIM_MASK               0x00C0  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_SHIFT                   6  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_TRKL_ILIM_WIDTH                   2  /* CHG_TRKL_ILIM - [7:6] */
+#define WM831X_CHG_VSEL_MASK                    0x0030  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_SHIFT                        4  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_VSEL_WIDTH                        2  /* CHG_VSEL - [5:4] */
+#define WM831X_CHG_FAST_ILIM_MASK               0x000F  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_SHIFT                   0  /* CHG_FAST_ILIM - [3:0] */
+#define WM831X_CHG_FAST_ILIM_WIDTH                   4  /* CHG_FAST_ILIM - [3:0] */
+
+/*
+ * R16458 (0x404A) - Charger Status
+ */
+#define WM831X_BATT_OV_STS                      0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_MASK                 0x8000  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_SHIFT                    15  /* BATT_OV_STS */
+#define WM831X_BATT_OV_STS_WIDTH                     1  /* BATT_OV_STS */
+#define WM831X_CHG_STATE_MASK                   0x7000  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_SHIFT                      12  /* CHG_STATE - [14:12] */
+#define WM831X_CHG_STATE_WIDTH                       3  /* CHG_STATE - [14:12] */
+#define WM831X_BATT_HOT_STS                     0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_MASK                0x0800  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_SHIFT                   11  /* BATT_HOT_STS */
+#define WM831X_BATT_HOT_STS_WIDTH                    1  /* BATT_HOT_STS */
+#define WM831X_BATT_COLD_STS                    0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_MASK               0x0400  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_SHIFT                  10  /* BATT_COLD_STS */
+#define WM831X_BATT_COLD_STS_WIDTH                   1  /* BATT_COLD_STS */
+#define WM831X_CHG_TOPOFF                       0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_MASK                  0x0200  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_SHIFT                      9  /* CHG_TOPOFF */
+#define WM831X_CHG_TOPOFF_WIDTH                      1  /* CHG_TOPOFF */
+#define WM831X_CHG_ACTIVE                       0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_MASK                  0x0100  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_SHIFT                      8  /* CHG_ACTIVE */
+#define WM831X_CHG_ACTIVE_WIDTH                      1  /* CHG_ACTIVE */
+#define WM831X_CHG_TIME_ELAPSED_MASK            0x00FF  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_SHIFT                0  /* CHG_TIME_ELAPSED - [7:0] */
+#define WM831X_CHG_TIME_ELAPSED_WIDTH                8  /* CHG_TIME_ELAPSED - [7:0] */
+
+#define WM831X_CHG_STATE_OFF         (0 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE     (1 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST        (2 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_TRICKLE_OT  (3 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_FAST_OT     (4 << WM831X_CHG_STATE_SHIFT)
+#define WM831X_CHG_STATE_DEFECTIVE   (5 << WM831X_CHG_STATE_SHIFT)
+
+/*
+ * R16459 (0x404B) - Backup Charger Control
+ */
+#define WM831X_BKUP_CHG_ENA                     0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_MASK                0x8000  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_SHIFT                   15  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_ENA_WIDTH                    1  /* BKUP_CHG_ENA */
+#define WM831X_BKUP_CHG_STS                     0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_MASK                0x4000  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_SHIFT                   14  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_STS_WIDTH                    1  /* BKUP_CHG_STS */
+#define WM831X_BKUP_CHG_MODE                    0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_MASK               0x1000  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_SHIFT                  12  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_CHG_MODE_WIDTH                   1  /* BKUP_CHG_MODE */
+#define WM831X_BKUP_BATT_DET_ENA                0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_MASK           0x0800  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_SHIFT              11  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_DET_ENA_WIDTH               1  /* BKUP_BATT_DET_ENA */
+#define WM831X_BKUP_BATT_STS                    0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_MASK               0x0400  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_SHIFT                  10  /* BKUP_BATT_STS */
+#define WM831X_BKUP_BATT_STS_WIDTH                   1  /* BKUP_BATT_STS */
+#define WM831X_BKUP_CHG_VLIM                    0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_MASK               0x0010  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_SHIFT                   4  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_VLIM_WIDTH                   1  /* BKUP_CHG_VLIM */
+#define WM831X_BKUP_CHG_ILIM_MASK               0x0003  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_SHIFT                   0  /* BKUP_CHG_ILIM - [1:0] */
+#define WM831X_BKUP_CHG_ILIM_WIDTH                   2  /* BKUP_CHG_ILIM - [1:0] */
+
+#endif
-- 
cgit v1.2.3


From 40bea431274c247425e7f5970d796ff7b37a2b22 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 4 Sep 2009 20:40:25 +0100
Subject: dm stripe: expose correct io hints

Set sensible I/O hints for striped DM devices in the topology
infrastructure added for 2.6.31 for userspace tools to
obtain via sysfs.

Add .io_hints to 'struct target_type' to allow the I/O hints portion
(io_min and io_opt) of the 'struct queue_limits' to be set by each
target and implement this for dm-stripe.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-stripe.c        | 13 ++++++++++++-
 drivers/md/dm-table.c         |  4 ++++
 include/linux/device-mapper.h |  4 ++++
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4e0e5937e42a..3e563d251733 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -329,9 +329,19 @@ static int stripe_iterate_devices(struct dm_target *ti,
 	return ret;
 }
 
+static void stripe_io_hints(struct dm_target *ti,
+			    struct queue_limits *limits)
+{
+	struct stripe_c *sc = ti->private;
+	unsigned chunk_size = (sc->chunk_mask + 1) << 9;
+
+	blk_limits_io_min(limits, chunk_size);
+	limits->io_opt = chunk_size * sc->stripes;
+}
+
 static struct target_type stripe_target = {
 	.name   = "striped",
-	.version = {1, 2, 0},
+	.version = {1, 3, 0},
 	.module = THIS_MODULE,
 	.ctr    = stripe_ctr,
 	.dtr    = stripe_dtr,
@@ -339,6 +349,7 @@ static struct target_type stripe_target = {
 	.end_io = stripe_end_io,
 	.status = stripe_status,
 	.iterate_devices = stripe_iterate_devices,
+	.io_hints = stripe_io_hints,
 };
 
 int __init dm_stripe_init(void)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index c90e662d2802..1a6cb3c7822e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1007,6 +1007,10 @@ int dm_calculate_queue_limits(struct dm_table *table,
 		ti->type->iterate_devices(ti, dm_set_device_limits,
 					  &ti_limits);
 
+		/* Set I/O hints portion of queue limits */
+		if (ti->type->io_hints)
+			ti->type->io_hints(ti, &ti_limits);
+
 		/*
 		 * Check each device area is consistent with the target's
 		 * overall queue limits.
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 655e7721580a..df7607e6dce8 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -91,6 +91,9 @@ typedef int (*dm_iterate_devices_fn) (struct dm_target *ti,
 				      iterate_devices_callout_fn fn,
 				      void *data);
 
+typedef void (*dm_io_hints_fn) (struct dm_target *ti,
+				struct queue_limits *limits);
+
 /*
  * Returns:
  *    0: The target can handle the next I/O immediately.
@@ -151,6 +154,7 @@ struct target_type {
 	dm_merge_fn merge;
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
+	dm_io_hints_fn io_hints;
 
 	/* For internal device-mapper use. */
 	struct list_head list;
-- 
cgit v1.2.3


From 7ec23d50949d5062b5b749638dd9380ed75e58e5 Mon Sep 17 00:00:00 2001
From: Jonathan Brassow <jbrassow@redhat.com>
Date: Fri, 4 Sep 2009 20:40:34 +0100
Subject: dm log: userspace add luid to distinguish between concurrent log
 instances

Device-mapper userspace logs (like the clustered log) are
identified by a universally unique identifier (UUID).  This
identifier is used to associate requests from the kernel to
a specific log in userspace.  The UUID must be unique everywhere,
since multiple machines may use this identifier when communicating
about a particular log, as is the case for cluster logs.

Sometimes, device-mapper/LVM may re-use a UUID.  This is the
case during pvmoves, when moving from one segment of an LV
to another, or when resizing a mirror, etc.  In these cases,
a new log is created with the same UUID and loaded in the
"inactive" slot.  When a device-mapper "resume" is issued,
the "live" table is deactivated and the new "inactive" table
becomes "live".  (The "inactive" table can also be removed
via a device-mapper 'clear' command.)

The above two issues were colliding.  More than one log was being
created with the same UUID, and there was no way to distinguish
between them.  So, sometimes the wrong log would be swapped
out during the exchange.

The solution is to create a locally unique identifier,
'luid', to go along with the UUID.  This new identifier is used
to determine exactly which log is being referenced by the kernel
when the log exchange is made.  The identifier is not
universally safe, but it does not need to be, since
create/destroy/suspend/resume operations are bound to a specific
machine; and these are the operations that make up the exchange.

Signed-off-by: Jonathan Brassow <jbrassow@redhat.com>
Signed-off-by: Alasdair G Kergon <agk@redhat.com>
---
 drivers/md/dm-log-userspace-base.c     | 23 ++++++++++++++---------
 drivers/md/dm-log-userspace-transfer.c |  6 ++++--
 drivers/md/dm-log-userspace-transfer.h |  2 +-
 include/linux/dm-log-userspace.h       | 13 ++++++++++++-
 4 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c
index c49da0a41c8e..6e186b1a062d 100644
--- a/drivers/md/dm-log-userspace-base.c
+++ b/drivers/md/dm-log-userspace-base.c
@@ -21,6 +21,7 @@ struct log_c {
 	struct dm_target *ti;
 	uint32_t region_size;
 	region_t region_count;
+	uint64_t luid;
 	char uuid[DM_UUID_LEN];
 
 	char *usr_argv_str;
@@ -63,7 +64,7 @@ static int userspace_do_request(struct log_c *lc, const char *uuid,
 	 * restored.
 	 */
 retry:
-	r = dm_consult_userspace(uuid, request_type, data,
+	r = dm_consult_userspace(uuid, lc->luid, request_type, data,
 				 data_size, rdata, rdata_size);
 
 	if (r != -ESRCH)
@@ -74,14 +75,15 @@ retry:
 		set_current_state(TASK_INTERRUPTIBLE);
 		schedule_timeout(2*HZ);
 		DMWARN("Attempting to contact userspace log server...");
-		r = dm_consult_userspace(uuid, DM_ULOG_CTR, lc->usr_argv_str,
+		r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_CTR,
+					 lc->usr_argv_str,
 					 strlen(lc->usr_argv_str) + 1,
 					 NULL, NULL);
 		if (!r)
 			break;
 	}
 	DMINFO("Reconnected to userspace log server... DM_ULOG_CTR complete");
-	r = dm_consult_userspace(uuid, DM_ULOG_RESUME, NULL,
+	r = dm_consult_userspace(uuid, lc->luid, DM_ULOG_RESUME, NULL,
 				 0, NULL, NULL);
 	if (!r)
 		goto retry;
@@ -153,6 +155,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 		return -ENOMEM;
 	}
 
+	/* The ptr value is sufficient for local unique id */
+	lc->luid = (uint64_t)lc;
+
 	lc->ti = ti;
 
 	if (strlen(argv[0]) > (DM_UUID_LEN - 1)) {
@@ -172,7 +177,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 	}
 
 	/* Send table string */
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_CTR,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
 				 ctr_str, str_size, NULL, NULL);
 
 	if (r == -ESRCH) {
@@ -182,7 +187,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
 
 	/* Since the region size does not change, get it now */
 	rdata_size = sizeof(rdata);
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_GET_REGION_SIZE,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_GET_REGION_SIZE,
 				 NULL, 0, (char *)&rdata, &rdata_size);
 
 	if (r) {
@@ -211,7 +216,7 @@ static void userspace_dtr(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_DTR,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -226,7 +231,7 @@ static int userspace_presuspend(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_PRESUSPEND,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_PRESUSPEND,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -238,7 +243,7 @@ static int userspace_postsuspend(struct dm_dirty_log *log)
 	int r;
 	struct log_c *lc = log->context;
 
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_POSTSUSPEND,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_POSTSUSPEND,
 				 NULL, 0,
 				 NULL, NULL);
 
@@ -251,7 +256,7 @@ static int userspace_resume(struct dm_dirty_log *log)
 	struct log_c *lc = log->context;
 
 	lc->in_sync_hint = 0;
-	r = dm_consult_userspace(lc->uuid, DM_ULOG_RESUME,
+	r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_RESUME,
 				 NULL, 0,
 				 NULL, NULL);
 
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index 8ce74d95ae4d..ba0edad2d048 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -147,7 +147,8 @@ static void cn_ulog_callback(void *data)
 
 /**
  * dm_consult_userspace
- * @uuid: log's uuid (must be DM_UUID_LEN in size)
+ * @uuid: log's universal unique identifier (must be DM_UUID_LEN in size)
+ * @luid: log's local unique identifier
  * @request_type:  found in include/linux/dm-log-userspace.h
  * @data: data to tx to the server
  * @data_size: size of data in bytes
@@ -163,7 +164,7 @@ static void cn_ulog_callback(void *data)
  *
  * Returns: 0 on success, -EXXX on failure
  **/
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
 			 char *data, size_t data_size,
 			 char *rdata, size_t *rdata_size)
 {
@@ -190,6 +191,7 @@ resend:
 
 	memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - overhead_size);
 	memcpy(tfr->uuid, uuid, DM_UUID_LEN);
+	tfr->luid = luid;
 	tfr->seq = dm_ulog_seq++;
 
 	/*
diff --git a/drivers/md/dm-log-userspace-transfer.h b/drivers/md/dm-log-userspace-transfer.h
index c26d8e4e2710..04ee874f9153 100644
--- a/drivers/md/dm-log-userspace-transfer.h
+++ b/drivers/md/dm-log-userspace-transfer.h
@@ -11,7 +11,7 @@
 
 int dm_ulog_tfr_init(void);
 void dm_ulog_tfr_exit(void);
-int dm_consult_userspace(const char *uuid, int request_type,
+int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
 			 char *data, size_t data_size,
 			 char *rdata, size_t *rdata_size);
 
diff --git a/include/linux/dm-log-userspace.h b/include/linux/dm-log-userspace.h
index 642e3017b51f..8a1f972c0fe9 100644
--- a/include/linux/dm-log-userspace.h
+++ b/include/linux/dm-log-userspace.h
@@ -371,7 +371,18 @@
 	(DM_ULOG_REQUEST_MASK & (request_type))
 
 struct dm_ulog_request {
-	char uuid[DM_UUID_LEN]; /* Ties a request to a specific mirror log */
+	/*
+	 * The local unique identifier (luid) and the universally unique
+	 * identifier (uuid) are used to tie a request to a specific
+	 * mirror log.  A single machine log could probably make due with
+	 * just the 'luid', but a cluster-aware log must use the 'uuid' and
+	 * the 'luid'.  The uuid is what is required for node to node
+	 * communication concerning a particular log, but the 'luid' helps
+	 * differentiate between logs that are being swapped and have the
+	 * same 'uuid'.  (Think "live" and "inactive" device-mapper tables.)
+	 */
+	uint64_t luid;
+	char uuid[DM_UUID_LEN];
 	char padding[7];        /* Padding because DM_UUID_LEN = 129 */
 
 	int32_t error;          /* Used to report back processing errors */
-- 
cgit v1.2.3


From b4e8c6a7e6caa9c80edcbfb78fc50962b792d5f0 Mon Sep 17 00:00:00 2001
From: Rami Rosen <rosenrami@gmail.com>
Date: Thu, 30 Jul 2009 09:38:43 +0300
Subject: sctp: remove unused union (sctp_cmsg_data_t) definition

This patch removes an unused union definition (sctp_cmsg_data_t)
from include/net/sctp/user.h.

Signed-off-by: Rami Rosen <rosenrami@gmail.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/user.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 1580c04f68bc..be2334aaf52e 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -210,12 +210,6 @@ enum sctp_sinfo_flags {
 };
 
 
-typedef union {
-	__u8   			raw;
-	struct sctp_initmsg	init;
-	struct sctp_sndrcvinfo	sndrcv;
-} sctp_cmsg_data_t;
-
 /* These are cmsg_types.  */
 typedef enum sctp_cmsg_type {
 	SCTP_INIT,              /* 5.2.1 SCTP Initiation Structure */
-- 
cgit v1.2.3


From bec9640bb0d451813b1bb1f2cc13a5bfb17c3e48 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Thu, 30 Jul 2009 18:08:28 -0400
Subject: sctp: Disallow new connection on a closing socket

If a socket has a lot of association that are in the process of
of being closed/aborted, it is possible for a remote to establish
new associations during the time period that the old ones are shutting
down.  If this was a result of a close() call, there will be no socket
and will cause a memory leak.  We'll prevent this by setting the
socket state to CLOSING and disallow new associations when in this state.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/constants.h | 2 +-
 net/sctp/sm_statefuns.c      | 9 +++++++++
 net/sctp/socket.c            | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 8bc25f7b04ce..af8c1508109e 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -231,7 +231,7 @@ typedef enum {
 	SCTP_SS_LISTENING      = TCP_LISTEN,
 	SCTP_SS_ESTABLISHING   = TCP_SYN_SENT,
 	SCTP_SS_ESTABLISHED    = TCP_ESTABLISHED,
-	SCTP_SS_DISCONNECTING  = TCP_CLOSING,
+	SCTP_SS_CLOSING        = TCP_CLOSING,
 } sctp_sock_state_t;
 
 /* These functions map various type to printable names.  */
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7288192f7df5..50225dd2e6dc 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -334,6 +334,15 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep,
 	if (!sctp_chunk_length_valid(chunk, sizeof(sctp_init_chunk_t)))
 		return sctp_sf_pdiscard(ep, asoc, type, arg, commands);
 
+	/* If the INIT is coming toward a closing socket, we'll send back
+	 * and ABORT.  Essentially, this catches the race of INIT being
+	 * backloged to the socket at the same time as the user isses close().
+	 * Since the socket and all its associations are going away, we
+	 * can treat this OOTB
+	 */
+	if (sctp_sstate(ep->base.sk, CLOSING))
+		return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands);
+
 	/* Verify the INIT chunk before processing it. */
 	err_chunk = NULL;
 	if (!sctp_verify_init(asoc, chunk->chunk_hdr->type,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 971890dbfea0..a7e544e3f28a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1361,6 +1361,7 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
 
 	sctp_lock_sock(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
+	sk->sk_state = SCTP_SS_CLOSING;
 
 	ep = sctp_sk(sk)->ep;
 
-- 
cgit v1.2.3


From 9c5c62be2f794c7cee533d856f9f34c3cf21ff1b Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 10 Aug 2009 13:51:03 -0400
Subject: sctp: Send user messages to the lower layer as one

Currenlty, sctp breaks up user messages into fragments and
sends each fragment to the lower layer by itself.  This means
that for each fragment we go all the way down the stack
and back up.  This also discourages bundling of multiple
fragments when they can fit into a sigle packet (ex: due
to user setting a low fragmentation threashold).

We introduce a new command SCTP_CMD_SND_MSG and hand the
whole message down state machine.  The state machine and
the side-effect parser will cork the queue, add all chunks
from the message to the queue, and then un-cork the queue
thus causing the chunks to get transmitted.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/command.h |  3 +++
 include/net/sctp/structs.h |  1 +
 net/sctp/chunk.c           | 13 +++++++++++++
 net/sctp/sm_sideeffect.c   | 29 ++++++++++++++++++++++++++++-
 net/sctp/sm_statefuns.c    |  4 ++--
 net/sctp/socket.c          | 26 ++++++++++++++------------
 6 files changed, 61 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/command.h b/include/net/sctp/command.h
index 3b966802e05d..8be5135ff7aa 100644
--- a/include/net/sctp/command.h
+++ b/include/net/sctp/command.h
@@ -106,6 +106,7 @@ typedef enum {
 	SCTP_CMD_ASSOC_SHKEY,    /* generate the association shared keys */
 	SCTP_CMD_T1_RETRAN,	 /* Mark for retransmission after T1 timeout  */
 	SCTP_CMD_UPDATE_INITTAG, /* Update peer inittag */
+	SCTP_CMD_SEND_MSG,	 /* Send the whole use message */
 	SCTP_CMD_LAST
 } sctp_verb_t;
 
@@ -139,6 +140,7 @@ typedef union {
 	struct sctp_ulpevent *ulpevent;
 	struct sctp_packet *packet;
 	sctp_sackhdr_t *sackh;
+	struct sctp_datamsg *msg;
 } sctp_arg_t;
 
 /* We are simulating ML type constructors here.
@@ -188,6 +190,7 @@ SCTP_ARG_CONSTRUCTOR(PEER_INIT,	sctp_init_chunk_t *, init)
 SCTP_ARG_CONSTRUCTOR(ULPEVENT,  struct sctp_ulpevent *, ulpevent)
 SCTP_ARG_CONSTRUCTOR(PACKET,	struct sctp_packet *, packet)
 SCTP_ARG_CONSTRUCTOR(SACKH,	sctp_sackhdr_t *, sackh)
+SCTP_ARG_CONSTRUCTOR(DATAMSG,	struct sctp_datamsg *, msg)
 
 typedef struct {
 	sctp_arg_t obj;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index edfcacf3250e..97024faaa08f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -643,6 +643,7 @@ struct sctp_datamsg {
 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
 					    struct sctp_sndrcvinfo *,
 					    struct msghdr *, int len);
+void sctp_datamsg_free(struct sctp_datamsg *);
 void sctp_datamsg_put(struct sctp_datamsg *);
 void sctp_chunk_fail(struct sctp_chunk *, int error);
 int sctp_chunk_abandoned(struct sctp_chunk *);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 7acaf15679b6..645577ddc33e 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -73,6 +73,19 @@ SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp)
 	return msg;
 }
 
+void sctp_datamsg_free(struct sctp_datamsg *msg)
+{
+	struct sctp_chunk *chunk;
+
+	/* This doesn't have to be a _safe vairant because
+	 * sctp_chunk_free() only drops the refs.
+	 */
+	list_for_each_entry(chunk, &msg->chunks, frag_list)
+		sctp_chunk_free(chunk);
+
+	sctp_datamsg_put(msg);
+}
+
 /* Final destructruction of datamsg memory. */
 static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
 {
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 86426aac1600..238adf7978e9 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -931,6 +931,27 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc,
 
 }
 
+/* Send the whole message, chunk by chunk, to the outqueue.
+ * This way the whole message is queued up and bundling if
+ * encouraged for small fragments.
+ */
+static int sctp_cmd_send_msg(struct sctp_association *asoc,
+				struct sctp_datamsg *msg)
+{
+	struct sctp_chunk *chunk;
+	int error = 0;
+
+	list_for_each_entry(chunk, &msg->chunks, frag_list) {
+		error = sctp_outq_tail(&asoc->outqueue, chunk);
+		if (error)
+			break;
+	}
+
+	return error;
+}
+
+
+
 /* These three macros allow us to pull the debugging code out of the
  * main flow of sctp_do_sm() to keep attention focused on the real
  * functionality there.
@@ -1575,7 +1596,13 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
 		case SCTP_CMD_UPDATE_INITTAG:
 			asoc->peer.i.init_tag = cmd->obj.u32;
 			break;
-
+		case SCTP_CMD_SEND_MSG:
+			if (!asoc->outqueue.cork) {
+				sctp_outq_cork(&asoc->outqueue);
+				local_cork = 1;
+			}
+			error = sctp_cmd_send_msg(asoc, cmd->obj.msg);
+			break;
 		default:
 			printk(KERN_WARNING "Impossible command: %u, %p\n",
 			       cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 50225dd2e6dc..910926906a3a 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4555,9 +4555,9 @@ sctp_disposition_t sctp_sf_do_prm_send(const struct sctp_endpoint *ep,
 				       void *arg,
 				       sctp_cmd_seq_t *commands)
 {
-	struct sctp_chunk *chunk = arg;
+	struct sctp_datamsg *msg = arg;
 
-	sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(chunk));
+	sctp_add_cmd_sf(commands, SCTP_CMD_SEND_MSG, SCTP_DATAMSG(msg));
 	return SCTP_DISPOSITION_CONSUME;
 }
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a7e544e3f28a..95a5623d79a0 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1814,20 +1814,22 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 		sctp_set_owner_w(chunk);
 
 		chunk->transport = chunk_tp;
-
-		/* Send it to the lower layers.  Note:  all chunks
-		 * must either fail or succeed.   The lower layer
-		 * works that way today.  Keep it that way or this
-		 * breaks.
-		 */
-		err = sctp_primitive_SEND(asoc, chunk);
-		/* Did the lower layer accept the chunk? */
-		if (err)
-			sctp_chunk_free(chunk);
-		SCTP_DEBUG_PRINTK("We sent primitively.\n");
 	}
 
-	sctp_datamsg_put(datamsg);
+	/* Send it to the lower layers.  Note:  all chunks
+	 * must either fail or succeed.   The lower layer
+	 * works that way today.  Keep it that way or this
+	 * breaks.
+	 */
+	err = sctp_primitive_SEND(asoc, datamsg);
+	/* Did the lower layer accept the chunk? */
+	if (err)
+		sctp_datamsg_free(datamsg);
+	else
+		sctp_datamsg_put(datamsg);
+
+	SCTP_DEBUG_PRINTK("We sent primitively.\n");
+
 	if (err)
 		goto out_free;
 	else
-- 
cgit v1.2.3


From 4d3c46e6833208428d366630aa708f6876e61fc1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 Sep 2009 18:20:59 -0400
Subject: sctp: drop a_rwnd to 0 when receive buffer overflows.

SCTP has a problem that when small chunks are used, it is possible
to exhaust the receiver buffer without fully closing receive window.
This happens due to all overhead that we have account for with small
messages.  To fix this, when receive buffer is exceeded, we'll drop
the window to 0 and save the 'drop' portion.  When application starts
reading data and freeing up recevie buffer space, we'll wait until
we've reached the 'drop' window and then add back this 'drop' one
mtu at a time.  This worked well in testing and under stress produced
rather even recovery.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h |  6 ++++++
 net/sctp/associola.c       | 37 +++++++++++++++++++++++++++++++++++--
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 97024faaa08f..b1bd2689bb70 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1739,6 +1739,12 @@ struct sctp_association {
 	 */
 	__u32 rwnd_over;
 
+	/* Keeps treack of rwnd pressure.  This happens when we have
+	 * a window, but not recevie buffer (i.e small packets).  This one
+	 * is releases slowly (1 PMTU at a time ).
+	 */
+	__u32 rwnd_press;
+
 	/* This is the sndbuf size in use for the association.
 	 * This corresponds to the sndbuf size for the association,
 	 * as specified in the sk->sndbuf.
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 215b56951d76..39c3821b7d3d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -202,6 +202,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->a_rwnd = asoc->rwnd;
 
 	asoc->rwnd_over = 0;
+	asoc->rwnd_press = 0;
 
 	/* Use my own max window until I learn something better.  */
 	asoc->peer.rwnd = SCTP_DEFAULT_MAXWINDOW;
@@ -1374,6 +1375,17 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned len)
 		asoc->rwnd += len;
 	}
 
+	/* If we had window pressure, start recovering it
+	 * once our rwnd had reached the accumulated pressure
+	 * threshold.  The idea is to recover slowly, but up
+	 * to the initial advertised window.
+	 */
+	if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+		int change = min(asoc->pathmtu, asoc->rwnd_press);
+		asoc->rwnd += change;
+		asoc->rwnd_press -= change;
+	}
+
 	SCTP_DEBUG_PRINTK("%s: asoc %p rwnd increased by %d to (%u, %u) "
 			  "- %u\n", __func__, asoc, len, asoc->rwnd,
 			  asoc->rwnd_over, asoc->a_rwnd);
@@ -1406,17 +1418,38 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned len)
 /* Decrease asoc's rwnd by len. */
 void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
 {
+	int rx_count;
+	int over = 0;
+
 	SCTP_ASSERT(asoc->rwnd, "rwnd zero", return);
 	SCTP_ASSERT(!asoc->rwnd_over, "rwnd_over not zero", return);
+
+	if (asoc->ep->rcvbuf_policy)
+		rx_count = atomic_read(&asoc->rmem_alloc);
+	else
+		rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+	/* If we've reached or overflowed our receive buffer, announce
+	 * a 0 rwnd if rwnd would still be positive.  Store the
+	 * the pottential pressure overflow so that the window can be restored
+	 * back to original value.
+	 */
+	if (rx_count >= asoc->base.sk->sk_rcvbuf)
+		over = 1;
+
 	if (asoc->rwnd >= len) {
 		asoc->rwnd -= len;
+		if (over) {
+			asoc->rwnd_press = asoc->rwnd;
+			asoc->rwnd = 0;
+		}
 	} else {
 		asoc->rwnd_over = len - asoc->rwnd;
 		asoc->rwnd = 0;
 	}
-	SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u)\n",
+	SCTP_DEBUG_PRINTK("%s: asoc %p rwnd decreased by %d to (%u, %u, %u)\n",
 			  __func__, asoc, len, asoc->rwnd,
-			  asoc->rwnd_over);
+			  asoc->rwnd_over, asoc->rwnd_press);
 }
 
 /* Build the bind address list for the association based on info from the
-- 
cgit v1.2.3


From cb95ea32a457871f72752164de8d94fa20f4703c Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 Sep 2009 18:20:59 -0400
Subject: sctp: Don't do NAGLE delay on large writes that were fragmented small

SCTP will delay the last part of a large write due to NAGLE, if that
part is smaller then MTU.  Since we are doing large writes, we might
as well send the last portion now instead of waiting untill the next
large write happens.  The small portion will be sent as is regardless,
so it's better to not delay it.

This is a result of much discussions with Wei Yongjun <yjwei@cn.fujitsu.com>
and Doug Graham <dgraham@nortel.com>.  Many thanks go out to them.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 2 +-
 net/sctp/chunk.c           | 2 ++
 net/sctp/output.c          | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index b1bd2689bb70..df4c6321996d 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -628,7 +628,7 @@ struct sctp_datamsg {
 	/* Chunks waiting to be submitted to lower layer. */
 	struct list_head chunks;
 	/* Chunks that have been transmitted. */
-	struct list_head track;
+	size_t msg_size;
 	/* Reference counting. */
 	atomic_t refcnt;
 	/* When is this message no longer interesting to the peer? */
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 645577ddc33e..acf7c4d128f7 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -59,6 +59,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->can_abandon = 0;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
+	msg->msg_size = 0;
 }
 
 /* Allocate and initialize datamsg. */
@@ -155,6 +156,7 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
 {
 	sctp_datamsg_hold(msg);
 	chunk->msg = msg;
+	msg->msg_size += chunk->skb->len;
 }
 
 
diff --git a/net/sctp/output.c b/net/sctp/output.c
index d0b84f6eba4d..b801bc9fb639 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -703,8 +703,10 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 		/* Check whether this chunk and all the rest of pending
 		 * data will fit or delay in hopes of bundling a full
 		 * sized packet.
+		 * Don't delay large message writes that may have been
+		 * fragmeneted into small peices.
 		 */
-		if (len < max) {
+		if ((len < max) && (chunk->msg->msg_size < max)) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
-- 
cgit v1.2.3


From f68b2e05f326971cd76c65aa91a1a41771dd7485 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 Sep 2009 18:21:00 -0400
Subject: sctp: Fix SCTP_MAXSEG socket option to comply to spec.

We had a bug that we never stored the user-defined value for
MAXSEG when setting the value on an association.  Thus future
PMTU events ended up re-writing the frag point and increasing
it past user limit.  Additionally, when setting the option on
the socket/endpoint, we effect all current associations, which
is against spec.

Now, we store the user 'maxseg' value along with the computed
'frag_point'.  We inherit 'maxseg' from the socket at association
creation and use it as an upper limit for 'frag_point' when its
set.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/sctp.h    |  7 ++++---
 include/net/sctp/structs.h |  1 +
 net/sctp/associola.c       |  6 +++---
 net/sctp/socket.c          | 11 +++--------
 4 files changed, 11 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index d16a304cbed4..8a6d5297de16 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -486,15 +486,16 @@ static inline __s32 sctp_jitter(__u32 rto)
 }
 
 /* Break down data chunks at this point.  */
-static inline int sctp_frag_point(const struct sctp_sock *sp, int pmtu)
+static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu)
 {
+	struct sctp_sock *sp = sctp_sk(asoc->base.sk);
 	int frag = pmtu;
 
 	frag -= sp->pf->af->net_header_len;
 	frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
 
-	if (sp->user_frag)
-		frag = min_t(int, frag, sp->user_frag);
+	if (asoc->user_frag)
+		frag = min_t(int, frag, asoc->user_frag);
 
 	frag = min_t(int, frag, SCTP_MAX_CHUNK_LEN);
 
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index df4c6321996d..b10612810f56 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1763,6 +1763,7 @@ struct sctp_association {
 
 	/* The message size at which SCTP fragmentation will occur. */
 	__u32 frag_point;
+	__u32 user_frag;
 
 	/* Counter used to count INIT errors. */
 	int init_err_counter;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 39c3821b7d3d..1f05b942564e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -112,6 +112,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	asoc->cookie_life.tv_usec = (sp->assocparams.sasoc_cookie_life % 1000)
 					* 1000;
 	asoc->frag_point = 0;
+	asoc->user_frag = sp->user_frag;
 
 	/* Set the association max_retrans and RTO values from the
 	 * socket values.
@@ -674,7 +675,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 			  "%d\n", asoc, asoc->pathmtu);
 	peer->pmtu_pending = 0;
 
-	asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
+	asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
 
 	/* The asoc->peer.port might not be meaningful yet, but
 	 * initialize the packet structure anyway.
@@ -1330,9 +1331,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
 	}
 
 	if (pmtu) {
-		struct sctp_sock *sp = sctp_sk(asoc->base.sk);
 		asoc->pathmtu = pmtu;
-		asoc->frag_point = sctp_frag_point(sp, pmtu);
+		asoc->frag_point = sctp_frag_point(asoc, pmtu);
 	}
 
 	SCTP_DEBUG_PRINTK("%s: asoc:%p, pmtu:%d, frag_point:%d\n",
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 95a5623d79a0..89af37a6c871 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2243,7 +2243,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			sctp_assoc_sync_pmtu(asoc);
 		} else if (asoc) {
 			asoc->pathmtu = params->spp_pathmtu;
-			sctp_frag_point(sp, params->spp_pathmtu);
+			sctp_frag_point(asoc, params->spp_pathmtu);
 		} else {
 			sp->pathmtu = params->spp_pathmtu;
 		}
@@ -2880,15 +2880,10 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
 			val -= sizeof(struct sctphdr) +
 					sizeof(struct sctp_data_chunk);
 		}
-
-		asoc->frag_point = val;
+		asoc->user_frag = val;
+		asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
 	} else {
 		sp->user_frag = val;
-
-		/* Update the frag_point of the existing associations. */
-		list_for_each_entry(asoc, &(sp->ep->asocs), asocs) {
-			asoc->frag_point = sctp_frag_point(sp, asoc->pathmtu);
-		}
 	}
 
 	return 0;
-- 
cgit v1.2.3


From a803c942303e6a4ef0ab6b80114529852cffa058 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 4 Sep 2009 18:21:01 -0400
Subject: sctp: Turn flags in 'sctp_packet' into bit fields

This shrinks the size of sctp_packet a little.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index b10612810f56..993cfff9218e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -812,22 +812,12 @@ struct sctp_packet {
 	/* pointer to the auth chunk for this packet */
 	struct sctp_chunk *auth;
 
-	/* This packet contains a COOKIE-ECHO chunk. */
-	__u8 has_cookie_echo;
-
-	/* This packet contains a SACK chunk. */
-	__u8 has_sack;
-
-	/* This packet contains an AUTH chunk */
-	__u8 has_auth;
-
-	/* This packet contains at least 1 DATA chunk */
-	__u8 has_data;
-
-	/* SCTP cannot fragment this packet. So let ip fragment it. */
-	__u8 ipfragok;
-
-	__u8 malloced;
+	u8  has_cookie_echo:1,	/* This packet contains a COOKIE-ECHO chunk. */
+	    has_sack:1,		/* This packet contains a SACK chunk. */
+	    has_auth:1,		/* This packet contains an AUTH chunk */
+	    has_data:1,		/* This packet contains at least 1 DATA chunk */
+	    ipfragok:1,		/* So let ip fragment this packet */
+	    malloced:1;		/* Is it malloced? */
 };
 
 struct sctp_packet *sctp_packet_init(struct sctp_packet *,
-- 
cgit v1.2.3


From 723884339f90a9c420783135168cc1045750eb5d Mon Sep 17 00:00:00 2001
From: Bhaskar Dutta <bhaskie@gmail.com>
Date: Thu, 3 Sep 2009 17:25:47 +0530
Subject: sctp: Sysctl configuration for IPv4 Address Scoping

This patch introduces a new sysctl option to make IPv4 Address Scoping
configurable <draft-stewart-tsvwg-sctp-ipv4-00.txt>.

In networking environments where DNAT rules in iptables prerouting
chains convert destination IP's to link-local/private IP addresses,
SCTP connections fail to establish as the INIT chunk is dropped by the
kernel due to address scope match failure.
For example to support overlapping IP addresses (same IP address with
different vlan id) a Layer-5 application listens on link local IP's,
and there is a DNAT rule that maps the destination IP to a link local
IP. Such applications never get the SCTP INIT if the address-scoping
draft is strictly followed.

This sysctl configuration allows SCTP to function in such
unconventional networking environments.

Sysctl options:
0 - Disable IPv4 address scoping draft altogether
1 - Enable IPv4 address scoping (default, current behavior)
2 - Enable address scoping but allow IPv4 private addresses in init/init-ack
3 - Enable address scoping but allow IPv4 link local address in init/init-ack

Signed-off-by: Bhaskar Dutta <bhaskar.dutta@globallogic.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 Documentation/networking/ip-sysctl.txt | 10 ++++++++++
 include/net/sctp/constants.h           |  7 +++++++
 include/net/sctp/structs.h             | 10 ++++++++++
 net/sctp/bind_addr.c                   | 21 ++++++++++++++++++++-
 net/sctp/protocol.c                    | 11 ++++++-----
 net/sctp/sysctl.c                      | 12 ++++++++++++
 6 files changed, 65 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 4e9c6d7b4efc..fbe427a6580c 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1297,6 +1297,16 @@ sctp_rmem - vector of 3 INTEGERs: min, default, max
 sctp_wmem  - vector of 3 INTEGERs: min, default, max
 	See tcp_wmem for a description.
 
+addr_scope_policy - INTEGER
+	Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+
+	0   - Disable IPv4 address scoping
+	1   - Enable IPv4 address scoping
+	2   - Follow draft but allow IPv4 private addresses
+	3   - Follow draft but allow IPv4 link local addresses
+
+	Default: 1
+
 
 /proc/sys/net/core/*
 dev_weight - INTEGER
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index af8c1508109e..58f714a3b670 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -363,6 +363,13 @@ typedef enum {
 	SCTP_SCOPE_UNUSABLE,		/* IPv4 unusable addresses */
 } sctp_scope_t;
 
+typedef enum {
+	SCTP_SCOPE_POLICY_DISABLE,	/* Disable IPv4 address scoping */
+	SCTP_SCOPE_POLICY_ENABLE,	/* Enable IPv4 address scoping */
+	SCTP_SCOPE_POLICY_PRIVATE,	/* Follow draft but allow IPv4 private addresses */
+	SCTP_SCOPE_POLICY_LINK,		/* Follow draft but allow IPv4 link local addresses */
+} sctp_scope_policy_t;
+
 /* Based on IPv4 scoping <draft-stewart-tsvwg-sctp-ipv4-00.txt>,
  * SCTP IPv4 unusable addresses: 0.0.0.0/8, 224.0.0.0/4, 198.18.0.0/24,
  * 192.88.99.0/24.
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 993cfff9218e..a48d80e77cd7 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -219,6 +219,15 @@ extern struct sctp_globals {
 	/* Flag to idicate if SCTP-AUTH is enabled */
 	int auth_enable;
 
+	/*
+	 * Policy to control SCTP IPv4 address scoping
+	 * 0   - Disable IPv4 address scoping
+	 * 1   - Enable IPv4 address scoping
+	 * 2   - Selectively allow only IPv4 private addresses
+	 * 3   - Selectively allow only IPv4 link local address
+	 */
+	int ipv4_scope_policy;
+
 	/* Flag to indicate whether computing and verifying checksum
 	 * is disabled. */
         int checksum_disable;
@@ -252,6 +261,7 @@ extern struct sctp_globals {
 #define sctp_port_hashtable		(sctp_globals.port_hashtable)
 #define sctp_local_addr_list		(sctp_globals.local_addr_list)
 #define sctp_local_addr_lock		(sctp_globals.addr_list_lock)
+#define sctp_scope_policy		(sctp_globals.ipv4_scope_policy)
 #define sctp_addip_enable		(sctp_globals.addip_enable)
 #define sctp_addip_noauth		(sctp_globals.addip_noauth_enable)
 #define sctp_prsctp_enable		(sctp_globals.prsctp_enable)
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 6d5944a745d4..13a6fba41077 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -510,9 +510,28 @@ int sctp_in_scope(const union sctp_addr *addr, sctp_scope_t scope)
 	 * of requested destination address, sender and receiver
 	 * SHOULD include all of its addresses with level greater
 	 * than or equal to L.
+	 *
+	 * Address scoping can be selectively controlled via sysctl
+	 * option
 	 */
-	if (addr_scope <= scope)
+	switch (sctp_scope_policy) {
+	case SCTP_SCOPE_POLICY_DISABLE:
 		return 1;
+	case SCTP_SCOPE_POLICY_ENABLE:
+		if (addr_scope <= scope)
+			return 1;
+		break;
+	case SCTP_SCOPE_POLICY_PRIVATE:
+		if (addr_scope <= scope || SCTP_SCOPE_PRIVATE == addr_scope)
+			return 1;
+		break;
+	case SCTP_SCOPE_POLICY_LINK:
+		if (addr_scope <= scope || SCTP_SCOPE_LINK == addr_scope)
+			return 1;
+		break;
+	default:
+		break;
+	}
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a76da657244a..60093be8385d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -431,16 +431,14 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
  * of requested destination address, sender and receiver
  * SHOULD include all of its addresses with level greater
  * than or equal to L.
+ *
+ * IPv4 scoping can be controlled through sysctl option
+ * net.sctp.addr_scope_policy
  */
 static sctp_scope_t sctp_v4_scope(union sctp_addr *addr)
 {
 	sctp_scope_t retval;
 
-	/* Should IPv4 scoping be a sysctl configurable option
-	 * so users can turn it off (default on) for certain
-	 * unconventional networking environments?
-	 */
-
 	/* Check for unusable SCTP addresses. */
 	if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) {
 		retval =  SCTP_SCOPE_UNUSABLE;
@@ -1259,6 +1257,9 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Disable AUTH by default. */
 	sctp_auth_enable = 0;
 
+	/* Set SCOPE policy to enabled */
+	sctp_scope_policy = SCTP_SCOPE_POLICY_ENABLE;
+
 	sctp_sysctl_register();
 
 	INIT_LIST_HEAD(&sctp_address_families);
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 63eabbc71298..ab7151da120f 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -51,6 +51,7 @@ static int timer_max = 86400000; /* ms in one day */
 static int int_max = INT_MAX;
 static int sack_timer_min = 1;
 static int sack_timer_max = 500;
+static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */
 
 extern int sysctl_sctp_mem[3];
 extern int sysctl_sctp_rmem[3];
@@ -272,6 +273,17 @@ static ctl_table sctp_table[] = {
 		.proc_handler	= proc_dointvec,
 		.strategy	= sysctl_intvec
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "addr_scope_policy",
+		.data		= &sctp_scope_policy,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &addr_scope_max,
+	},
 	{ .ctl_name = 0 }
 };
 
-- 
cgit v1.2.3


From 9237ccbc0b22b5aa5396463ba0b14dc5efe5b98c Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 4 Sep 2009 14:33:19 +0800
Subject: sctp: turn flags in 'struct sctp_association' into bit fields

This shrinks the size of struct sctp_association a little.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index a48d80e77cd7..42d00ced5eb8 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1568,13 +1568,13 @@ struct sctp_association {
 		__u32	sack_cnt;
 
 		/* These are capabilities which our peer advertised.  */
-		__u8	ecn_capable;	 /* Can peer do ECN? */
-		__u8	ipv4_address;	 /* Peer understands IPv4 addresses? */
-		__u8	ipv6_address;	 /* Peer understands IPv6 addresses? */
-		__u8	hostname_address;/* Peer understands DNS addresses? */
-		__u8    asconf_capable;  /* Does peer support ADDIP? */
-		__u8    prsctp_capable;  /* Can peer do PR-SCTP? */
-		__u8	auth_capable;	 /* Is peer doing SCTP-AUTH? */
+		__u8	ecn_capable:1,	    /* Can peer do ECN? */
+			ipv4_address:1,	    /* Peer understands IPv4 addresses? */
+			ipv6_address:1,	    /* Peer understands IPv6 addresses? */
+			hostname_address:1, /* Peer understands DNS addresses? */
+			asconf_capable:1,   /* Does peer support ADDIP? */
+			prsctp_capable:1,   /* Can peer do PR-SCTP? */
+			auth_capable:1;	    /* Is peer doing SCTP-AUTH? */
 
 		__u32   adaptation_ind;	 /* Adaptation Code point. */
 
@@ -1913,11 +1913,8 @@ struct sctp_association {
 
 	__u16 active_key_id;
 
-	/* Need to send an ECNE Chunk? */
-	char need_ecne;
-
-	/* Is it a temporary association? */
-	char temp;
+	__u8 need_ecne:1,	/* Need to send an ECNE Chunk? */
+	     temp:1;		/* Is it a temporary association? */
 };
 
 
-- 
cgit v1.2.3


From e77405ad80f53966524b5c31244e13fbbbecbd84 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 2 Sep 2009 14:17:06 -0400
Subject: tracing: pass around ring buffer instead of tracer

The latency tracers (irqsoff and wakeup) can swap trace buffers
on the fly. If an event is happening and has reserved data on one of
the buffers, and the latency tracer swaps the global buffer with the
max buffer, the result is that the event may commit the data to the
wrong buffer.

This patch changes the API to the trace recording to be recieve the
buffer that was used to reserve a commit. Then this buffer can be passed
in to the commit.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h         |  15 +++--
 include/trace/ftrace.h               |  15 +++--
 kernel/trace/blktrace.c              |  12 ++--
 kernel/trace/trace.c                 | 117 ++++++++++++++++++++---------------
 kernel/trace/trace.h                 |  17 ++---
 kernel/trace/trace_boot.c            |  12 ++--
 kernel/trace/trace_events.c          |   6 +-
 kernel/trace/trace_functions_graph.c |  14 +++--
 kernel/trace/trace_mmiotrace.c       |  10 +--
 kernel/trace/trace_power.c           |  18 ++++--
 kernel/trace/trace_sched_switch.c    |  18 +++---
 kernel/trace/trace_syscalls.c        |  18 +++---
 12 files changed, 163 insertions(+), 109 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 755480484eb6..23f7179bf74e 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -93,13 +93,17 @@ void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(int type, unsigned long len,
+trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer,
+				  int type, unsigned long len,
 				  unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
+					struct ring_buffer_event *event,
 					unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
+				       struct ring_buffer_event *event,
 					unsigned long flags, int pc);
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
+					 struct ring_buffer_event *event);
 
 void tracing_record_cmdline(struct task_struct *tsk);
 
@@ -135,7 +139,8 @@ struct ftrace_event_call {
 
 extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-extern int filter_current_check_discard(struct ftrace_event_call *call,
+extern int filter_current_check_discard(struct ring_buffer *buffer,
+					struct ftrace_event_call *call,
 					void *rec,
 					struct ring_buffer_event *event);
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index bfbc842600a1..308bafd93325 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -460,13 +460,15 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  * {
  *	struct ring_buffer_event *event;
  *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *	struct ring_buffer *buffer;
  *	unsigned long irq_flags;
  *	int pc;
  *
  *	local_save_flags(irq_flags);
  *	pc = preempt_count();
  *
- *	event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *	event = trace_current_buffer_lock_reserve(&buffer,
+ *				  event_<call>.id,
  *				  sizeof(struct ftrace_raw_<call>),
  *				  irq_flags, pc);
  *	if (!event)
@@ -476,7 +478,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
  *	<assign>;  <-- Here we assign the entries by the __field and
  *			__array macros.
  *
- *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ *	trace_current_buffer_unlock_commit(buffer, event, irq_flags, pc);
  * }
  *
  * static int ftrace_raw_reg_event_<call>(void)
@@ -568,6 +570,7 @@ static void ftrace_raw_event_##call(proto)				\
 	struct ftrace_event_call *event_call = &event_##call;		\
 	struct ring_buffer_event *event;				\
 	struct ftrace_raw_##call *entry;				\
+	struct ring_buffer *buffer;					\
 	unsigned long irq_flags;					\
 	int __data_size;						\
 	int pc;								\
@@ -577,7 +580,8 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
 									\
-	event = trace_current_buffer_lock_reserve(event_##call.id,	\
+	event = trace_current_buffer_lock_reserve(&buffer,		\
+				 event_##call.id,			\
 				 sizeof(*entry) + __data_size,		\
 				 irq_flags, pc);			\
 	if (!event)							\
@@ -589,8 +593,9 @@ static void ftrace_raw_event_##call(proto)				\
 									\
 	{ assign; }							\
 									\
-	if (!filter_current_check_discard(event_call, entry, event))	\
-		trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+	if (!filter_current_check_discard(buffer, event_call, entry, event)) \
+		trace_nowake_buffer_unlock_commit(buffer,		\
+						  event, irq_flags, pc); \
 }									\
 									\
 static int ftrace_raw_reg_event_##call(void *ptr)			\
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..243bafc2ec90 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -65,13 +65,15 @@ static void trace_note(struct blk_trace *bt, pid_t pid, int action,
 {
 	struct blk_io_trace *t;
 	struct ring_buffer_event *event = NULL;
+	struct ring_buffer *buffer = NULL;
 	int pc = 0;
 	int cpu = smp_processor_id();
 	bool blk_tracer = blk_tracer_enabled;
 
 	if (blk_tracer) {
+		buffer = blk_tr->buffer;
 		pc = preempt_count();
-		event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
 						  sizeof(*t) + len,
 						  0, pc);
 		if (!event)
@@ -96,7 +98,7 @@ record_it:
 		memcpy((void *) t + sizeof(*t), data, len);
 
 		if (blk_tracer)
-			trace_buffer_unlock_commit(blk_tr, event, 0, pc);
+			trace_buffer_unlock_commit(buffer, event, 0, pc);
 	}
 }
 
@@ -179,6 +181,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 {
 	struct task_struct *tsk = current;
 	struct ring_buffer_event *event = NULL;
+	struct ring_buffer *buffer = NULL;
 	struct blk_io_trace *t;
 	unsigned long flags = 0;
 	unsigned long *sequence;
@@ -204,8 +207,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
 	if (blk_tracer) {
 		tracing_record_cmdline(current);
 
+		buffer = blk_tr->buffer;
 		pc = preempt_count();
-		event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
+		event = trace_buffer_lock_reserve(buffer, TRACE_BLK,
 						  sizeof(*t) + pdu_len,
 						  0, pc);
 		if (!event)
@@ -252,7 +256,7 @@ record_it:
 			memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
 
 		if (blk_tracer) {
-			trace_buffer_unlock_commit(blk_tr, event, 0, pc);
+			trace_buffer_unlock_commit(buffer, event, 0, pc);
 			return;
 		}
 	}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0418e2650d41..0c61836e30e7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -169,10 +169,11 @@ static struct trace_array	global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
-int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+int filter_current_check_discard(struct ring_buffer *buffer,
+				 struct ftrace_event_call *call, void *rec,
 				 struct ring_buffer_event *event)
 {
-	return filter_check_discard(call, rec, global_trace.buffer, event);
+	return filter_check_discard(call, rec, buffer, event);
 }
 EXPORT_SYMBOL_GPL(filter_current_check_discard);
 
@@ -887,14 +888,15 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
 
-struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    int type,
-						    unsigned long len,
-						    unsigned long flags, int pc)
+struct ring_buffer_event *
+trace_buffer_lock_reserve(struct ring_buffer *buffer,
+			  int type,
+			  unsigned long len,
+			  unsigned long flags, int pc)
 {
 	struct ring_buffer_event *event;
 
-	event = ring_buffer_lock_reserve(tr->buffer, len);
+	event = ring_buffer_lock_reserve(buffer, len);
 	if (event != NULL) {
 		struct trace_entry *ent = ring_buffer_event_data(event);
 
@@ -905,53 +907,59 @@ struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
 	return event;
 }
 
-static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
-					struct ring_buffer_event *event,
-					unsigned long flags, int pc,
-					int wake)
+static inline void
+__trace_buffer_unlock_commit(struct ring_buffer *buffer,
+			     struct ring_buffer_event *event,
+			     unsigned long flags, int pc,
+			     int wake)
 {
-	ring_buffer_unlock_commit(tr->buffer, event);
+	ring_buffer_unlock_commit(buffer, event);
 
-	ftrace_trace_stack(tr, flags, 6, pc);
-	ftrace_trace_userstack(tr, flags, pc);
+	ftrace_trace_stack(buffer, flags, 6, pc);
+	ftrace_trace_userstack(buffer, flags, pc);
 
 	if (wake)
 		trace_wake_up();
 }
 
-void trace_buffer_unlock_commit(struct trace_array *tr,
-					struct ring_buffer_event *event,
-					unsigned long flags, int pc)
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
+				struct ring_buffer_event *event,
+				unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(tr, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(int type, unsigned long len,
+trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
+				  int type, unsigned long len,
 				  unsigned long flags, int pc)
 {
-	return trace_buffer_lock_reserve(&global_trace,
+	*current_rb = global_trace.buffer;
+	return trace_buffer_lock_reserve(*current_rb,
 					 type, len, flags, pc);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+void trace_current_buffer_unlock_commit(struct ring_buffer *buffer,
+					struct ring_buffer_event *event,
 					unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc, 1);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-					unsigned long flags, int pc)
+void trace_nowake_buffer_unlock_commit(struct ring_buffer *buffer,
+				       struct ring_buffer_event *event,
+				       unsigned long flags, int pc)
 {
-	__trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+	__trace_buffer_unlock_commit(buffer, event, flags, pc, 0);
 }
 EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
 
-void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
+					 struct ring_buffer_event *event)
 {
-	ring_buffer_discard_commit(global_trace.buffer, event);
+	ring_buffer_discard_commit(buffer, event);
 }
 EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
@@ -961,6 +969,7 @@ trace_function(struct trace_array *tr,
 	       int pc)
 {
 	struct ftrace_event_call *call = &event_function;
+	struct ring_buffer *buffer = tr->buffer;
 	struct ring_buffer_event *event;
 	struct ftrace_entry *entry;
 
@@ -968,7 +977,7 @@ trace_function(struct trace_array *tr,
 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
 		return;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
+	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
 					  flags, pc);
 	if (!event)
 		return;
@@ -976,8 +985,8 @@ trace_function(struct trace_array *tr,
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 }
 
 void
@@ -990,7 +999,7 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
 }
 
 #ifdef CONFIG_STACKTRACE
-static void __ftrace_trace_stack(struct trace_array *tr,
+static void __ftrace_trace_stack(struct ring_buffer *buffer,
 				 unsigned long flags,
 				 int skip, int pc)
 {
@@ -999,7 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	struct stack_entry *entry;
 	struct stack_trace trace;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_STACK,
+	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return;
@@ -1012,26 +1021,27 @@ static void __ftrace_trace_stack(struct trace_array *tr,
 	trace.entries		= entry->caller;
 
 	save_stack_trace(&trace);
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 }
 
-void ftrace_trace_stack(struct trace_array *tr, unsigned long flags, int skip,
-			int pc)
+void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
+			int skip, int pc)
 {
 	if (!(trace_flags & TRACE_ITER_STACKTRACE))
 		return;
 
-	__ftrace_trace_stack(tr, flags, skip, pc);
+	__ftrace_trace_stack(buffer, flags, skip, pc);
 }
 
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
 		   int pc)
 {
-	__ftrace_trace_stack(tr, flags, skip, pc);
+	__ftrace_trace_stack(tr->buffer, flags, skip, pc);
 }
 
-void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
+void
+ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 {
 	struct ftrace_event_call *call = &event_user_stack;
 	struct ring_buffer_event *event;
@@ -1041,7 +1051,7 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
 		return;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
+	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return;
@@ -1055,8 +1065,8 @@ void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags, int pc)
 	trace.entries		= entry->caller;
 
 	save_stack_trace_user(&trace);
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 }
 
 #ifdef UNUSED
@@ -1075,9 +1085,10 @@ ftrace_trace_special(void *__tr,
 {
 	struct ring_buffer_event *event;
 	struct trace_array *tr = __tr;
+	struct ring_buffer *buffer = tr->buffer;
 	struct special_entry *entry;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
+	event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
 					  sizeof(*entry), 0, pc);
 	if (!event)
 		return;
@@ -1085,7 +1096,7 @@ ftrace_trace_special(void *__tr,
 	entry->arg1			= arg1;
 	entry->arg2			= arg2;
 	entry->arg3			= arg3;
-	trace_buffer_unlock_commit(tr, event, 0, pc);
+	trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
 void
@@ -1131,6 +1142,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 
 	struct ftrace_event_call *call = &event_bprint;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	struct bprint_entry *entry;
@@ -1163,7 +1175,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 		goto out_unlock;
 
 	size = sizeof(*entry) + sizeof(u32) * len;
-	event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
+	buffer = tr->buffer;
+	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
+					  flags, pc);
 	if (!event)
 		goto out_unlock;
 	entry = ring_buffer_event_data(event);
@@ -1171,8 +1185,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
 	entry->fmt			= fmt;
 
 	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 
 out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
@@ -1194,6 +1208,7 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
 	struct ftrace_event_call *call = &event_print;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_array *tr = &global_trace;
 	struct trace_array_cpu *data;
 	int cpu, len = 0, size, pc;
@@ -1222,7 +1237,9 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 	trace_buf[len] = 0;
 
 	size = sizeof(*entry) + len + 1;
-	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
+	buffer = tr->buffer;
+	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+					  irq_flags, pc);
 	if (!event)
 		goto out_unlock;
 	entry = ring_buffer_event_data(event);
@@ -1230,8 +1247,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
 	memcpy(&entry->buf, trace_buf, len);
 	entry->buf[len] = 0;
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
 
  out_unlock:
 	__raw_spin_unlock(&trace_buf_lock);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ca070de36227..4d30414fe19a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -415,12 +415,13 @@ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct ring_buffer_event;
 
-struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-						    int type,
-						    unsigned long len,
-						    unsigned long flags,
-						    int pc);
-void trace_buffer_unlock_commit(struct trace_array *tr,
+struct ring_buffer_event *
+trace_buffer_lock_reserve(struct ring_buffer *buffer,
+			  int type,
+			  unsigned long len,
+			  unsigned long flags,
+			  int pc);
+void trace_buffer_unlock_commit(struct ring_buffer *buffer,
 				struct ring_buffer_event *event,
 				unsigned long flags, int pc);
 
@@ -481,10 +482,10 @@ void update_max_tr_single(struct trace_array *tr,
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
 #ifdef CONFIG_STACKTRACE
-void ftrace_trace_stack(struct trace_array *tr, unsigned long flags,
+void ftrace_trace_stack(struct ring_buffer *buffer, unsigned long flags,
 			int skip, int pc);
 
-void ftrace_trace_userstack(struct trace_array *tr, unsigned long flags,
+void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
 			    int pc);
 
 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
index 863139327816..19bfc75d467e 100644
--- a/kernel/trace/trace_boot.c
+++ b/kernel/trace/trace_boot.c
@@ -130,6 +130,7 @@ struct tracer boot_tracer __read_mostly =
 void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
 {
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_boot_call *entry;
 	struct trace_array *tr = boot_trace;
 
@@ -142,13 +143,14 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
 	sprint_symbol(bt->func, (unsigned long)fn);
 	preempt_disable();
 
-	event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
+	buffer = tr->buffer;
+	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
 					  sizeof(*entry), 0, 0);
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->boot_call = *bt;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	trace_buffer_unlock_commit(buffer, event, 0, 0);
  out:
 	preempt_enable();
 }
@@ -156,6 +158,7 @@ void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
 void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
 {
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_boot_ret *entry;
 	struct trace_array *tr = boot_trace;
 
@@ -165,13 +168,14 @@ void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
 	sprint_symbol(bt->func, (unsigned long)fn);
 	preempt_disable();
 
-	event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
+	buffer = tr->buffer;
+	event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
 					  sizeof(*entry), 0, 0);
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->boot_ret = *bt;
-	trace_buffer_unlock_commit(tr, event, 0, 0);
+	trace_buffer_unlock_commit(buffer, event, 0, 0);
  out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index d33bcdeffe69..78b1ed230177 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1438,6 +1438,7 @@ static void
 function_test_events_call(unsigned long ip, unsigned long parent_ip)
 {
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct ftrace_entry *entry;
 	unsigned long flags;
 	long disabled;
@@ -1455,7 +1456,8 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
 
 	local_save_flags(flags);
 
-	event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+	event = trace_current_buffer_lock_reserve(&buffer,
+						  TRACE_FN, sizeof(*entry),
 						  flags, pc);
 	if (!event)
 		goto out;
@@ -1463,7 +1465,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
 	entry->ip			= ip;
 	entry->parent_ip		= parent_ip;
 
-	trace_nowake_buffer_unlock_commit(event, flags, pc);
+	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
 
  out:
 	atomic_dec(&per_cpu(test_event_disable, cpu));
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3f4a251b7d16..b3749a2c3132 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -173,19 +173,20 @@ static int __trace_graph_entry(struct trace_array *tr,
 {
 	struct ftrace_event_call *call = &event_funcgraph_entry;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ent_entry *entry;
 
 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
 		return 0;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_ENT,
+	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_ENT,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return 0;
 	entry	= ring_buffer_event_data(event);
 	entry->graph_ent			= *trace;
-	if (!filter_current_check_discard(call, entry, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_current_check_discard(buffer, call, entry, event))
+		ring_buffer_unlock_commit(buffer, event);
 
 	return 1;
 }
@@ -236,19 +237,20 @@ static void __trace_graph_return(struct trace_array *tr,
 {
 	struct ftrace_event_call *call = &event_funcgraph_exit;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer = tr->buffer;
 	struct ftrace_graph_ret_entry *entry;
 
 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
 		return;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_GRAPH_RET,
+	event = trace_buffer_lock_reserve(buffer, TRACE_GRAPH_RET,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return;
 	entry	= ring_buffer_event_data(event);
 	entry->ret				= *trace;
-	if (!filter_current_check_discard(call, entry, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
+	if (!filter_current_check_discard(buffer, call, entry, event))
+		ring_buffer_unlock_commit(buffer, event);
 }
 
 void trace_graph_return(struct ftrace_graph_ret *trace)
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index d53b45ed0806..c4c9bbda53d3 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -307,11 +307,12 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_rw *rw)
 {
+	struct ring_buffer *buffer = tr->buffer;
 	struct ring_buffer_event *event;
 	struct trace_mmiotrace_rw *entry;
 	int pc = preempt_count();
 
-	event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
+	event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_RW,
 					  sizeof(*entry), 0, pc);
 	if (!event) {
 		atomic_inc(&dropped_count);
@@ -319,7 +320,7 @@ static void __trace_mmiotrace_rw(struct trace_array *tr,
 	}
 	entry	= ring_buffer_event_data(event);
 	entry->rw			= *rw;
-	trace_buffer_unlock_commit(tr, event, 0, pc);
+	trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
 void mmio_trace_rw(struct mmiotrace_rw *rw)
@@ -333,11 +334,12 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 				struct trace_array_cpu *data,
 				struct mmiotrace_map *map)
 {
+	struct ring_buffer *buffer = tr->buffer;
 	struct ring_buffer_event *event;
 	struct trace_mmiotrace_map *entry;
 	int pc = preempt_count();
 
-	event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
+	event = trace_buffer_lock_reserve(buffer, TRACE_MMIO_MAP,
 					  sizeof(*entry), 0, pc);
 	if (!event) {
 		atomic_inc(&dropped_count);
@@ -345,7 +347,7 @@ static void __trace_mmiotrace_map(struct trace_array *tr,
 	}
 	entry	= ring_buffer_event_data(event);
 	entry->map			= *map;
-	trace_buffer_unlock_commit(tr, event, 0, pc);
+	trace_buffer_unlock_commit(buffer, event, 0, pc);
 }
 
 void mmio_trace_mapping(struct mmiotrace_map *map)
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index a5d5a4f7745b..fe1a00f1445a 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -38,6 +38,7 @@ static void probe_power_end(struct power_trace *it)
 {
 	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
 	struct trace_array *tr = power_trace;
@@ -45,18 +46,20 @@ static void probe_power_end(struct power_trace *it)
 	if (!trace_power_enabled)
 		return;
 
+	buffer = tr->buffer;
+
 	preempt_disable();
 	it->end = ktime_get();
 	data = tr->data[smp_processor_id()];
 
-	event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
 					  sizeof(*entry), 0, 0);
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, buffer, event))
+		trace_buffer_unlock_commit(buffer, event, 0, 0);
  out:
 	preempt_enable();
 }
@@ -66,6 +69,7 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
 {
 	struct ftrace_event_call *call = &event_power;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	struct trace_power *entry;
 	struct trace_array_cpu *data;
 	struct trace_array *tr = power_trace;
@@ -73,6 +77,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
 	if (!trace_power_enabled)
 		return;
 
+	buffer = tr->buffer;
+
 	memset(it, 0, sizeof(struct power_trace));
 	it->state = level;
 	it->type = type;
@@ -81,14 +87,14 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
 	it->end = it->stamp;
 	data = tr->data[smp_processor_id()];
 
-	event = trace_buffer_lock_reserve(tr, TRACE_POWER,
+	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
 					  sizeof(*entry), 0, 0);
 	if (!event)
 		goto out;
 	entry	= ring_buffer_event_data(event);
 	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		trace_buffer_unlock_commit(tr, event, 0, 0);
+	if (!filter_check_discard(call, entry, buffer, event))
+		trace_buffer_unlock_commit(buffer, event, 0, 0);
  out:
 	preempt_enable();
 }
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index e1285d7b5488..5fca0f51fde4 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -28,10 +28,11 @@ tracing_sched_switch_trace(struct trace_array *tr,
 			   unsigned long flags, int pc)
 {
 	struct ftrace_event_call *call = &event_context_switch;
+	struct ring_buffer *buffer = tr->buffer;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_CTX,
+	event = trace_buffer_lock_reserve(buffer, TRACE_CTX,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return;
@@ -44,8 +45,8 @@ tracing_sched_switch_trace(struct trace_array *tr,
 	entry->next_state		= next->state;
 	entry->next_cpu	= task_cpu(next);
 
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		trace_buffer_unlock_commit(tr, event, flags, pc);
+	if (!filter_check_discard(call, entry, buffer, event))
+		trace_buffer_unlock_commit(buffer, event, flags, pc);
 }
 
 static void
@@ -86,8 +87,9 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	struct ftrace_event_call *call = &event_wakeup;
 	struct ring_buffer_event *event;
 	struct ctx_switch_entry *entry;
+	struct ring_buffer *buffer = tr->buffer;
 
-	event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
+	event = trace_buffer_lock_reserve(buffer, TRACE_WAKE,
 					  sizeof(*entry), flags, pc);
 	if (!event)
 		return;
@@ -100,10 +102,10 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
 	entry->next_state		= wakee->state;
 	entry->next_cpu			= task_cpu(wakee);
 
-	if (!filter_check_discard(call, entry, tr->buffer, event))
-		ring_buffer_unlock_commit(tr->buffer, event);
-	ftrace_trace_stack(tr, flags, 6, pc);
-	ftrace_trace_userstack(tr, flags, pc);
+	if (!filter_check_discard(call, entry, buffer, event))
+		ring_buffer_unlock_commit(buffer, event);
+	ftrace_trace_stack(tr->buffer, flags, 6, pc);
+	ftrace_trace_userstack(tr->buffer, flags, pc);
 }
 
 static void
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4f5fae6fad90..8712ce3c6a0e 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -223,6 +223,7 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 	struct syscall_trace_enter *entry;
 	struct syscall_metadata *sys_data;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	int size;
 	int syscall_nr;
 
@@ -238,8 +239,8 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 
 	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
 
-	event = trace_current_buffer_lock_reserve(sys_data->enter_id, size,
-							0, 0);
+	event = trace_current_buffer_lock_reserve(&buffer, sys_data->enter_id,
+						  size, 0, 0);
 	if (!event)
 		return;
 
@@ -247,8 +248,9 @@ void ftrace_syscall_enter(struct pt_regs *regs, long id)
 	entry->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
 
-	if (!filter_current_check_discard(sys_data->enter_event, entry, event))
-		trace_current_buffer_unlock_commit(event, 0, 0);
+	if (!filter_current_check_discard(buffer, sys_data->enter_event,
+					  entry, event))
+		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
 void ftrace_syscall_exit(struct pt_regs *regs, long ret)
@@ -256,6 +258,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	struct syscall_trace_exit *entry;
 	struct syscall_metadata *sys_data;
 	struct ring_buffer_event *event;
+	struct ring_buffer *buffer;
 	int syscall_nr;
 
 	syscall_nr = syscall_get_nr(current, regs);
@@ -268,7 +271,7 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	event = trace_current_buffer_lock_reserve(sys_data->exit_id,
+	event = trace_current_buffer_lock_reserve(&buffer, sys_data->exit_id,
 				sizeof(*entry), 0, 0);
 	if (!event)
 		return;
@@ -277,8 +280,9 @@ void ftrace_syscall_exit(struct pt_regs *regs, long ret)
 	entry->nr = syscall_nr;
 	entry->ret = syscall_get_return_value(current, regs);
 
-	if (!filter_current_check_discard(sys_data->exit_event, entry, event))
-		trace_current_buffer_unlock_commit(event, 0, 0);
+	if (!filter_current_check_discard(buffer, sys_data->exit_event,
+					  entry, event))
+		trace_current_buffer_unlock_commit(buffer, event, 0, 0);
 }
 
 int reg_event_syscall_enter(void *ptr)
-- 
cgit v1.2.3


From 85bac32c4a52c592b857f2c360cc5ec93a097d70 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 4 Sep 2009 14:24:40 -0400
Subject: ring-buffer: only enable ring_buffer_swap_cpu when needed

Since the ability to swap the cpu buffers adds a small overhead to
the recording of a trace, we only want to add it when needed.

Only the irqsoff and preemptoff tracers use this feature, and both are
not recommended for production kernels. This patch disables its use
when neither irqsoff nor preemptoff is configured.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h | 9 +++++++++
 kernel/trace/Kconfig        | 8 ++++++++
 kernel/trace/ring_buffer.c  | 4 ++++
 3 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index e061b4ecdc3a..5fcc31ed5771 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -140,8 +140,17 @@ unsigned long ring_buffer_size(struct ring_buffer *buffer);
 void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu);
 void ring_buffer_reset(struct ring_buffer *buffer);
 
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
 int ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
 			 struct ring_buffer *buffer_b, int cpu);
+#else
+static inline int
+ring_buffer_swap_cpu(struct ring_buffer *buffer_a,
+		     struct ring_buffer *buffer_b, int cpu)
+{
+	return -ENODEV;
+}
+#endif
 
 int ring_buffer_empty(struct ring_buffer *buffer);
 int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 163fbfc2f39f..1ea0d1234f4a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -62,6 +62,12 @@ config EVENT_TRACING
 config CONTEXT_SWITCH_TRACER
 	bool
 
+config RING_BUFFER_ALLOW_SWAP
+	bool
+	help
+	 Allow the use of ring_buffer_swap_cpu.
+	 Adds a very slight overhead to tracing when enabled.
+
 # All tracer options should select GENERIC_TRACER. For those options that are
 # enabled by all tracers (context switch and event tracer) they select TRACING.
 # This allows those options to appear when no other tracer is selected. But the
@@ -146,6 +152,7 @@ config IRQSOFF_TRACER
 	select TRACE_IRQFLAGS
 	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
+	select RING_BUFFER_ALLOW_SWAP
 	help
 	  This option measures the time spent in irqs-off critical
 	  sections, with microsecond accuracy.
@@ -167,6 +174,7 @@ config PREEMPT_TRACER
 	depends on PREEMPT
 	select GENERIC_TRACER
 	select TRACER_MAX_TRACE
+	select RING_BUFFER_ALLOW_SWAP
 	help
 	  This option measures the time spent in preemption off critical
 	  sections, with microsecond accuracy.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1766c0e8db5a..454e74e718cf 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2084,6 +2084,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 
 	rb_start_commit(cpu_buffer);
 
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
 	/*
 	 * Due to the ability to swap a cpu buffer from a buffer
 	 * it is possible it was swapped before we committed.
@@ -2096,6 +2097,7 @@ rb_reserve_next_event(struct ring_buffer *buffer,
 		local_dec(&cpu_buffer->commits);
 		return NULL;
 	}
+#endif
 
 	length = rb_calculate_event_length(length);
  again:
@@ -3498,6 +3500,7 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
 
+#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
 /**
  * ring_buffer_swap_cpu - swap a CPU buffer between two ring buffers
  * @buffer_a: One buffer to swap with
@@ -3573,6 +3576,7 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+#endif /* CONFIG_RING_BUFFER_ALLOW_SWAP */
 
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
-- 
cgit v1.2.3


From d1af8a328755f51c9b76157a8692e56520d3fd94 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Thu, 20 Aug 2009 15:11:02 -0500
Subject: [SCSI] iscsi_tcp: add new conn error to indicate tcp conn closed

If a target closed the connection, we will detect it in the
state_changed or data_ready callout. This adds a new conn
error value to use for this problem, so it is not confused
with when the initiator throws a conn error and drops
the connection.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/iscsi_tcp.c | 23 +++++++++--------------
 include/scsi/iscsi_if.h  |  1 +
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index c7e2ff24ee9e..2b1b834a098b 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -109,11 +109,14 @@ static int iscsi_sw_tcp_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
  */
 static inline int iscsi_sw_sk_state_check(struct sock *sk)
 {
-	if ((sk->sk_state == TCP_CLOSE_WAIT ||
-	     sk->sk_state == TCP_CLOSE) &&
-	    !atomic_read(&sk->sk_rmem_alloc))
-		return -ECONNRESET;
+	struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
 
+	if ((sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) &&
+	    !atomic_read(&sk->sk_rmem_alloc)) {
+		ISCSI_SW_TCP_DBG(conn, "TCP_CLOSE|TCP_CLOSE_WAIT\n");
+		iscsi_conn_failure(conn, ISCSI_ERR_TCP_CONN_CLOSE);
+		return -ECONNRESET;
+	}
 	return 0;
 }
 
@@ -135,11 +138,7 @@ static void iscsi_sw_tcp_data_ready(struct sock *sk, int flag)
 	rd_desc.count = 1;
 	tcp_read_sock(sk, &rd_desc, iscsi_sw_tcp_recv);
 
-	if (iscsi_sw_sk_state_check(sk) < 0) {
-		ISCSI_SW_TCP_DBG(conn, "iscsi_tcp_data_ready: "
-				 "TCP_CLOSE|TCP_CLOSE_WAIT\n");
-		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-	}
+	iscsi_sw_sk_state_check(sk);
 
 	read_unlock(&sk->sk_callback_lock);
 
@@ -161,11 +160,7 @@ static void iscsi_sw_tcp_state_change(struct sock *sk)
 	conn = (struct iscsi_conn*)sk->sk_user_data;
 	session = conn->session;
 
-	if (iscsi_sw_sk_state_check(sk) < 0) {
-		ISCSI_SW_TCP_DBG(conn, "iscsi_tcp_state_change: "
-				 "TCP_CLOSE|TCP_CLOSE_WAIT\n");
-		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-	}
+	iscsi_sw_sk_state_check(sk);
 
 	tcp_conn = conn->dd_data;
 	tcp_sw_conn = tcp_conn->dd_data;
diff --git a/include/scsi/iscsi_if.h b/include/scsi/iscsi_if.h
index 4426f00da5ff..d67dda2b6aa0 100644
--- a/include/scsi/iscsi_if.h
+++ b/include/scsi/iscsi_if.h
@@ -262,6 +262,7 @@ enum iscsi_err {
 	ISCSI_ERR_NO_SCSI_CMD		= ISCSI_ERR_BASE + 17,
 	ISCSI_ERR_INVALID_HOST		= ISCSI_ERR_BASE + 18,
 	ISCSI_ERR_XMIT_FAILED		= ISCSI_ERR_BASE + 19,
+	ISCSI_ERR_TCP_CONN_CLOSE	= ISCSI_ERR_BASE + 20,
 };
 
 /*
-- 
cgit v1.2.3


From e4bc50bedf0dd6c63f20a7bc0a2b46667664fba1 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Tue, 25 Aug 2009 13:58:47 -0700
Subject: [SCSI] fcoe, libfc: adds per cpu exch pool within exchange
 manager(EM)

Adds per cpu exch pool for these reasons:-

 1. Currently an EM instance is shared across all cpus to manage
    all exches for all cpus. This required em_lock across all
    cpus for an exch alloc, free, lookup and reset each frame
    and that made em_lock expensive, so instead having per cpu
    exch pool with their own per cpu pool lock will likely reduce
    locking contention in fast path for an exch alloc, free and
    lookup.

 2. Per cpu exch pool will likely improve cache hit ratio since
    all frames of an exch will be processed on the same cpu on
    which exch originated.

This patch is only prep work to help in keeping complexity of next
patch low, so this patch only sets up per cpu exch pool and related
helper funcs to be used by next patch. The next patch fully makes
use of per cpu exch pool in all code paths ie. tx, rx and reset.

Divides per EM exch id range equally across all cpus to setup per
cpu exch pool. This division is such that lower bits of exch id
carries cpu number info on which exch originated, later a simple
bitwise AND operation on exch id of incoming frame with fc_cpu_mask
retrieves cpu number info to direct all frames to same cpu on which
exch originated. This required a global fc_cpu_mask and fc_cpu_order
initialized to max possible cpus number nr_cpu_ids rounded up to 2's
power, this will be used in mapping exch id and exch ptr array
index in pool during exch allocation, find or reset code paths.

Adds a check in fc_exch_mgr_alloc() to ensure specified min_xid
lower bits are zero since these bits are used to carry cpu info.

Adds and initializes struct fc_exch_pool with all required fields
to manage exches in pool.

Allocates per cpu struct fc_exch_pool with memory for exches array
for range of exches per pool. The exches array memory is followed
by struct fc_exch_pool.

Adds fc_exch_ptr_get/set() helper functions to get/set exch ptr in
pool exches array at specified array index.

Increases default FCOE_MAX_XID to 0x0FFF from 0x07EF, so that more
exches are available per cpu after above described exch id range
division across all cpus to each pool.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.h     |  2 +-
 drivers/scsi/libfc/fc_exch.c | 89 ++++++++++++++++++++++++++++++++++++++++++--
 include/scsi/libfc.h         |  1 +
 3 files changed, 88 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.h b/drivers/scsi/fcoe/fcoe.h
index 65120e21f5c4..550d1e49d1a3 100644
--- a/drivers/scsi/fcoe/fcoe.h
+++ b/drivers/scsi/fcoe/fcoe.h
@@ -38,7 +38,7 @@
 #define FCOE_MAX_OUTSTANDING_COMMANDS	1024
 
 #define FCOE_MIN_XID		0x0000	/* the min xid supported by fcoe_sw */
-#define FCOE_MAX_XID		0x07ef	/* the max xid supported by fcoe_sw */
+#define FCOE_MAX_XID		0x0FFF	/* the max xid supported by fcoe_sw */
 
 unsigned int fcoe_debug_logging;
 module_param_named(debug_logging, fcoe_debug_logging, int, S_IRUGO|S_IWUSR);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 40c34274bd81..9cbe8d66eb25 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -32,6 +32,9 @@
 #include <scsi/libfc.h>
 #include <scsi/fc_encode.h>
 
+u16	fc_cpu_mask;		/* cpu mask for possible cpus */
+EXPORT_SYMBOL(fc_cpu_mask);
+static u16	fc_cpu_order;	/* 2's power to represent total possible cpus */
 static struct kmem_cache *fc_em_cachep;        /* cache for exchanges */
 
 /*
@@ -47,6 +50,20 @@ static struct kmem_cache *fc_em_cachep;        /* cache for exchanges */
  * fc_seq holds the state for an individual sequence.
  */
 
+/*
+ * Per cpu exchange pool
+ *
+ * This structure manages per cpu exchanges in array of exchange pointers.
+ * This array is allocated followed by struct fc_exch_pool memory for
+ * assigned range of exchanges to per cpu pool.
+ */
+struct fc_exch_pool {
+	u16		next_index;	/* next possible free exchange index */
+	u16		total_exches;	/* total allocated exchanges */
+	spinlock_t	lock;		/* exch pool lock */
+	struct list_head	ex_list;	/* allocated exchanges list */
+};
+
 /*
  * Exchange manager.
  *
@@ -66,6 +83,8 @@ struct fc_exch_mgr {
 	u32	total_exches;		/* total allocated exchanges */
 	struct list_head	ex_list;	/* allocated exchanges list */
 	mempool_t	*ep_pool;	/* reserve ep's */
+	u16		pool_max_index;	/* max exch array index in exch pool */
+	struct fc_exch_pool *pool;	/* per cpu exch pool */
 
 	/*
 	 * currently exchange mgr stats are updated but not used.
@@ -303,6 +322,19 @@ static int fc_exch_done_locked(struct fc_exch *ep)
 	return rc;
 }
 
+static inline struct fc_exch *fc_exch_ptr_get(struct fc_exch_pool *pool,
+					      u16 index)
+{
+	struct fc_exch **exches = (struct fc_exch **)(pool + 1);
+	return exches[index];
+}
+
+static inline void fc_exch_ptr_set(struct fc_exch_pool *pool, u16 index,
+				   struct fc_exch *ep)
+{
+	((struct fc_exch **)(pool + 1))[index] = ep;
+}
+
 static void fc_exch_mgr_delete_ep(struct fc_exch *ep)
 {
 	struct fc_exch_mgr *mp;
@@ -1751,6 +1783,7 @@ static void fc_exch_mgr_destroy(struct kref *kref)
 	 */
 	WARN_ON(mp->total_exches != 0);
 	mempool_destroy(mp->ep_pool);
+	free_percpu(mp->pool);
 	kfree(mp);
 }
 
@@ -1770,8 +1803,13 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 {
 	struct fc_exch_mgr *mp;
 	size_t len;
+	u16 pool_exch_range;
+	size_t pool_size;
+	unsigned int cpu;
+	struct fc_exch_pool *pool;
 
-	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN) {
+	if (max_xid <= min_xid || max_xid == FC_XID_UNKNOWN ||
+	    (min_xid & fc_cpu_mask) != 0) {
 		FC_LPORT_DBG(lp, "Invalid min_xid 0x:%x and max_xid 0x:%x\n",
 			     min_xid, max_xid);
 		return NULL;
@@ -1802,10 +1840,31 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	if (!mp->ep_pool)
 		goto free_mp;
 
+	/*
+	 * Setup per cpu exch pool with entire exchange id range equally
+	 * divided across all cpus. The exch pointers array memory is
+	 * allocated for exch range per pool.
+	 */
+	pool_exch_range = (mp->max_xid - mp->min_xid + 1) / (fc_cpu_mask + 1);
+	mp->pool_max_index = pool_exch_range - 1;
+
+	/*
+	 * Allocate and initialize per cpu exch pool
+	 */
+	pool_size = sizeof(*pool) + pool_exch_range * sizeof(struct fc_exch *);
+	mp->pool = __alloc_percpu(pool_size, __alignof__(struct fc_exch_pool));
+	if (!mp->pool)
+		goto free_mempool;
+	for_each_possible_cpu(cpu) {
+		pool = per_cpu_ptr(mp->pool, cpu);
+		spin_lock_init(&pool->lock);
+		INIT_LIST_HEAD(&pool->ex_list);
+	}
+
 	kref_init(&mp->kref);
 	if (!fc_exch_mgr_add(lp, mp, match)) {
-		mempool_destroy(mp->ep_pool);
-		goto free_mp;
+		free_percpu(mp->pool);
+		goto free_mempool;
 	}
 
 	/*
@@ -1816,6 +1875,8 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	kref_put(&mp->kref, fc_exch_mgr_destroy);
 	return mp;
 
+free_mempool:
+	mempool_destroy(mp->ep_pool);
 free_mp:
 	kfree(mp);
 	return NULL;
@@ -1975,6 +2036,28 @@ int fc_exch_init(struct fc_lport *lp)
 	if (!lp->tt.seq_exch_abort)
 		lp->tt.seq_exch_abort = fc_seq_exch_abort;
 
+	/*
+	 * Initialize fc_cpu_mask and fc_cpu_order. The
+	 * fc_cpu_mask is set for nr_cpu_ids rounded up
+	 * to order of 2's * power and order is stored
+	 * in fc_cpu_order as this is later required in
+	 * mapping between an exch id and exch array index
+	 * in per cpu exch pool.
+	 *
+	 * This round up is required to align fc_cpu_mask
+	 * to exchange id's lower bits such that all incoming
+	 * frames of an exchange gets delivered to the same
+	 * cpu on which exchange originated by simple bitwise
+	 * AND operation between fc_cpu_mask and exchange id.
+	 */
+	fc_cpu_mask = 1;
+	fc_cpu_order = 0;
+	while (fc_cpu_mask < nr_cpu_ids) {
+		fc_cpu_mask <<= 1;
+		fc_cpu_order++;
+	}
+	fc_cpu_mask--;
+
 	return 0;
 }
 EXPORT_SYMBOL(fc_exch_init);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index c2b928cfafb9..32063389c4b0 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -343,6 +343,7 @@ static inline bool fc_fcp_is_read(const struct fc_fcp_pkt *fsp)
 
 struct fc_exch_mgr;
 struct fc_exch_mgr_anchor;
+extern u16	fc_cpu_mask;	/* cpu mask for possible cpus */
 
 /*
  * Sequence.
-- 
cgit v1.2.3


From b2f0091fbf8b475fa09b5e1712e0ab84cb3e1ca4 Mon Sep 17 00:00:00 2001
From: Vasu Dev <vasu.dev@intel.com>
Date: Tue, 25 Aug 2009 13:58:53 -0700
Subject: [SCSI] fcoe, libfc: fully makes use of per cpu exch pool and then
 removes em_lock

1. Updates fcoe_rcv() to queue incoming frames to the fcoe per
   cpu thread on which this frame's exch was originated and simply
   use current cpu for request exch not originated by initiator.
   It is redundant to add this code under CONFIG_SMP, so removes
   CONFIG_SMP uses around this code.

2. Updates fc_exch_em_alloc, fc_exch_delete, fc_exch_find to use
   per cpu exch pools, here fc_exch_delete is rename of older
   fc_exch_mgr_delete_ep since ep/exch are now deleted in pools
   of EM and so brief new name is sufficient and better name.

   Updates these functions to map exch id to their index into exch
   pool using fc_cpu_mask, fc_cpu_order and EM min_xid.
   This mapping is as per detailed explanation about this in
   last patch and basically this is just as lower fc_cpu_mask
   bits of exch id as cpu number and upper bit sum of EM min_xid
   and exch index in pool.

   Uses pool next_index to keep track of exch allocation from
   pool along with pool_max_index as upper bound of exches array
   in pool.

3. Adds exch pool ptr to fc_exch to free exch to its pool in
   fc_exch_delete.

4. Updates fc_exch_mgr_reset to reset all exch pools of an EM,
   this required adding fc_exch_pool_reset func to reset exches
   in pool and then have fc_exch_mgr_reset call fc_exch_pool_reset
   for each pool within each EM for a lport.

5. Removes no longer needed exches array, em_lock, next_xid, and
   total_exches from struct fc_exch_mgr, these are not needed after
   use of per cpu exch pool, also removes not used max_read,
   last_read from struct fc_exch_mgr.

6. Updates locking notes for exch pool lock with fc_exch lock and
   uses pool lock in exch allocation, lookup and reset.

Signed-off-by: Vasu Dev <vasu.dev@intel.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/fcoe/fcoe.c     |  19 +++--
 drivers/scsi/libfc/fc_exch.c | 189 +++++++++++++++++++++++--------------------
 include/scsi/libfc.h         |   9 ++-
 3 files changed, 115 insertions(+), 102 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 757aa28f0f04..e32a0ed266aa 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -912,8 +912,7 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct fcoe_softc *fc;
 	struct fc_frame_header *fh;
 	struct fcoe_percpu_s *fps;
-	unsigned short oxid;
-	unsigned int cpu = 0;
+	unsigned int cpu;
 
 	fc = container_of(ptype, struct fcoe_softc, fcoe_packet_type);
 	lp = fc->ctlr.lp;
@@ -947,20 +946,20 @@ int fcoe_rcv(struct sk_buff *skb, struct net_device *dev,
 	skb_set_transport_header(skb, sizeof(struct fcoe_hdr));
 	fh = (struct fc_frame_header *) skb_transport_header(skb);
 
-	oxid = ntohs(fh->fh_ox_id);
-
 	fr = fcoe_dev_from_skb(skb);
 	fr->fr_dev = lp;
 	fr->ptype = ptype;
 
-#ifdef CONFIG_SMP
 	/*
-	 * The incoming frame exchange id(oxid) is ANDed with num of online
-	 * cpu bits to get cpu and then this cpu is used for selecting
-	 * a per cpu kernel thread from fcoe_percpu.
+	 * In case the incoming frame's exchange is originated from
+	 * the initiator, then received frame's exchange id is ANDed
+	 * with fc_cpu_mask bits to get the same cpu on which exchange
+	 * was originated, otherwise just use the current cpu.
 	 */
-	cpu = oxid & (num_online_cpus() - 1);
-#endif
+	if (ntoh24(fh->fh_f_ctl) & FC_FC_EX_CTX)
+		cpu = ntohs(fh->fh_ox_id) & fc_cpu_mask;
+	else
+		cpu = smp_processor_id();
 
 	fps = &per_cpu(fcoe_percpu, cpu);
 	spin_lock_bh(&fps->fcoe_rx_list.lock);
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index 9cbe8d66eb25..b51db15a3876 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -73,14 +73,8 @@ struct fc_exch_pool {
 struct fc_exch_mgr {
 	enum fc_class	class;		/* default class for sequences */
 	struct kref	kref;		/* exchange mgr reference count */
-	spinlock_t	em_lock;	/* exchange manager lock,
-					   must be taken before ex_lock */
-	u16		next_xid;	/* next possible free exchange ID */
 	u16		min_xid;	/* min exchange ID */
 	u16		max_xid;	/* max exchange ID */
-	u16		max_read;	/* max exchange ID for read */
-	u16		last_read;	/* last xid allocated for read */
-	u32	total_exches;		/* total allocated exchanges */
 	struct list_head	ex_list;	/* allocated exchanges list */
 	mempool_t	*ep_pool;	/* reserve ep's */
 	u16		pool_max_index;	/* max exch array index in exch pool */
@@ -99,7 +93,6 @@ struct fc_exch_mgr {
 		atomic_t seq_not_found;
 		atomic_t non_bls_resp;
 	} stats;
-	struct fc_exch **exches;	/* for exch pointers indexed by xid */
 };
 #define	fc_seq_exch(sp) container_of(sp, struct fc_exch, seq)
 
@@ -192,8 +185,8 @@ static struct fc_seq *fc_seq_start_next_locked(struct fc_seq *sp);
  * sequence allocation and deallocation must be locked.
  *  - exchange refcnt can be done atomicly without locks.
  *  - sequence allocation must be locked by exch lock.
- *  - If the em_lock and ex_lock must be taken at the same time, then the
- *    em_lock must be taken before the ex_lock.
+ *  - If the EM pool lock and ex_lock must be taken at the same time, then the
+ *    EM pool lock must be taken before the ex_lock.
  */
 
 /*
@@ -335,17 +328,18 @@ static inline void fc_exch_ptr_set(struct fc_exch_pool *pool, u16 index,
 	((struct fc_exch **)(pool + 1))[index] = ep;
 }
 
-static void fc_exch_mgr_delete_ep(struct fc_exch *ep)
+static void fc_exch_delete(struct fc_exch *ep)
 {
-	struct fc_exch_mgr *mp;
+	struct fc_exch_pool *pool;
 
-	mp = ep->em;
-	spin_lock_bh(&mp->em_lock);
-	WARN_ON(mp->total_exches <= 0);
-	mp->total_exches--;
-	mp->exches[ep->xid - mp->min_xid] = NULL;
+	pool = ep->pool;
+	spin_lock_bh(&pool->lock);
+	WARN_ON(pool->total_exches <= 0);
+	pool->total_exches--;
+	fc_exch_ptr_set(pool, (ep->xid - ep->em->min_xid) >> fc_cpu_order,
+			NULL);
 	list_del(&ep->ex_list);
-	spin_unlock_bh(&mp->em_lock);
+	spin_unlock_bh(&pool->lock);
 	fc_exch_release(ep);	/* drop hold for exch in mp */
 }
 
@@ -465,7 +459,7 @@ static void fc_exch_timeout(struct work_struct *work)
 			rc = fc_exch_done_locked(ep);
 		spin_unlock_bh(&ep->ex_lock);
 		if (!rc)
-			fc_exch_mgr_delete_ep(ep);
+			fc_exch_delete(ep);
 		if (resp)
 			resp(sp, ERR_PTR(-FC_EX_TIMEOUT), arg);
 		fc_seq_exch_abort(sp, 2 * ep->r_a_tov);
@@ -509,10 +503,9 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
 					struct fc_exch_mgr *mp)
 {
 	struct fc_exch *ep;
-	u16 min, max, xid;
-
-	min = mp->min_xid;
-	max = mp->max_xid;
+	unsigned int cpu;
+	u16 index;
+	struct fc_exch_pool *pool;
 
 	/* allocate memory for exchange */
 	ep = mempool_alloc(mp->ep_pool, GFP_ATOMIC);
@@ -522,15 +515,17 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
 	}
 	memset(ep, 0, sizeof(*ep));
 
-	spin_lock_bh(&mp->em_lock);
-	xid = mp->next_xid;
-	/* alloc a new xid */
-	while (mp->exches[xid - min]) {
-		xid = (xid == max) ? min : xid + 1;
-		if (xid == mp->next_xid)
+	cpu = smp_processor_id();
+	pool = per_cpu_ptr(mp->pool, cpu);
+	spin_lock_bh(&pool->lock);
+	index = pool->next_index;
+	/* allocate new exch from pool */
+	while (fc_exch_ptr_get(pool, index)) {
+		index = index == mp->pool_max_index ? 0 : index + 1;
+		if (index == pool->next_index)
 			goto err;
 	}
-	mp->next_xid = (xid == max) ? min : xid + 1;
+	pool->next_index = index == mp->pool_max_index ? 0 : index + 1;
 
 	fc_exch_hold(ep);	/* hold for exch in mp */
 	spin_lock_init(&ep->ex_lock);
@@ -541,17 +536,18 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
 	 */
 	spin_lock_bh(&ep->ex_lock);
 
-	mp->exches[xid - mp->min_xid] = ep;
-	list_add_tail(&ep->ex_list, &mp->ex_list);
+	fc_exch_ptr_set(pool, index, ep);
+	list_add_tail(&ep->ex_list, &pool->ex_list);
 	fc_seq_alloc(ep, ep->seq_id++);
-	mp->total_exches++;
-	spin_unlock_bh(&mp->em_lock);
+	pool->total_exches++;
+	spin_unlock_bh(&pool->lock);
 
 	/*
 	 *  update exchange
 	 */
-	ep->oxid = ep->xid = xid;
+	ep->oxid = ep->xid = (index << fc_cpu_order | cpu) + mp->min_xid;
 	ep->em = mp;
+	ep->pool = pool;
 	ep->lp = lport;
 	ep->f_ctl = FC_FC_FIRST_SEQ;	/* next seq is first seq */
 	ep->rxid = FC_XID_UNKNOWN;
@@ -560,7 +556,7 @@ static struct fc_exch *fc_exch_em_alloc(struct fc_lport *lport,
 out:
 	return ep;
 err:
-	spin_unlock_bh(&mp->em_lock);
+	spin_unlock_bh(&pool->lock);
 	atomic_inc(&mp->stats.no_free_exch_xid);
 	mempool_free(ep, mp->ep_pool);
 	return NULL;
@@ -597,16 +593,18 @@ EXPORT_SYMBOL(fc_exch_alloc);
  */
 static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
 {
+	struct fc_exch_pool *pool;
 	struct fc_exch *ep = NULL;
 
 	if ((xid >= mp->min_xid) && (xid <= mp->max_xid)) {
-		spin_lock_bh(&mp->em_lock);
-		ep = mp->exches[xid - mp->min_xid];
+		pool = per_cpu_ptr(mp->pool, xid & fc_cpu_mask);
+		spin_lock_bh(&pool->lock);
+		ep = fc_exch_ptr_get(pool, (xid - mp->min_xid) >> fc_cpu_order);
 		if (ep) {
 			fc_exch_hold(ep);
 			WARN_ON(ep->xid != xid);
 		}
-		spin_unlock_bh(&mp->em_lock);
+		spin_unlock_bh(&pool->lock);
 	}
 	return ep;
 }
@@ -620,7 +618,7 @@ void fc_exch_done(struct fc_seq *sp)
 	rc = fc_exch_done_locked(ep);
 	spin_unlock_bh(&ep->ex_lock);
 	if (!rc)
-		fc_exch_mgr_delete_ep(ep);
+		fc_exch_delete(ep);
 }
 EXPORT_SYMBOL(fc_exch_done);
 
@@ -1213,7 +1211,7 @@ static void fc_exch_recv_seq_resp(struct fc_exch_mgr *mp, struct fc_frame *fp)
 		WARN_ON(fc_seq_exch(sp) != ep);
 		spin_unlock_bh(&ep->ex_lock);
 		if (!rc)
-			fc_exch_mgr_delete_ep(ep);
+			fc_exch_delete(ep);
 	}
 
 	/*
@@ -1323,7 +1321,7 @@ static void fc_exch_abts_resp(struct fc_exch *ep, struct fc_frame *fp)
 		rc = fc_exch_done_locked(ep);
 	spin_unlock_bh(&ep->ex_lock);
 	if (!rc)
-		fc_exch_mgr_delete_ep(ep);
+		fc_exch_delete(ep);
 
 	if (resp)
 		resp(sp, fp, ex_resp_arg);
@@ -1466,48 +1464,76 @@ static void fc_exch_reset(struct fc_exch *ep)
 	rc = fc_exch_done_locked(ep);
 	spin_unlock_bh(&ep->ex_lock);
 	if (!rc)
-		fc_exch_mgr_delete_ep(ep);
+		fc_exch_delete(ep);
 
 	if (resp)
 		resp(sp, ERR_PTR(-FC_EX_CLOSED), arg);
 }
 
-/*
- * Reset an exchange manager, releasing all sequences and exchanges.
- * If sid is non-zero, reset only exchanges we source from that FID.
- * If did is non-zero, reset only exchanges destined to that FID.
+/**
+ * fc_exch_pool_reset() - Resets an per cpu exches pool.
+ * @lport:	ptr to the local port
+ * @pool:	ptr to the per cpu exches pool
+ * @sid:	source FC ID
+ * @did:	destination FC ID
+ *
+ * Resets an per cpu exches pool, releasing its all sequences
+ * and exchanges. If sid is non-zero, then reset only exchanges
+ * we sourced from that FID. If did is non-zero, reset only
+ * exchanges destined to that FID.
  */
-void fc_exch_mgr_reset(struct fc_lport *lp, u32 sid, u32 did)
+static void fc_exch_pool_reset(struct fc_lport *lport,
+			       struct fc_exch_pool *pool,
+			       u32 sid, u32 did)
 {
 	struct fc_exch *ep;
 	struct fc_exch *next;
-	struct fc_exch_mgr *mp;
-	struct fc_exch_mgr_anchor *ema;
 
-	list_for_each_entry(ema, &lp->ema_list, ema_list) {
-		mp = ema->mp;
-		spin_lock_bh(&mp->em_lock);
+	spin_lock_bh(&pool->lock);
 restart:
-		list_for_each_entry_safe(ep, next, &mp->ex_list, ex_list) {
-			if ((lp == ep->lp) &&
-			    (sid == 0 || sid == ep->sid) &&
-			    (did == 0 || did == ep->did)) {
-				fc_exch_hold(ep);
-				spin_unlock_bh(&mp->em_lock);
-
-				fc_exch_reset(ep);
-
-				fc_exch_release(ep);
-				spin_lock_bh(&mp->em_lock);
-
-				/*
-				 * must restart loop incase while lock
-				 * was down multiple eps were released.
-				 */
-				goto restart;
-			}
+	list_for_each_entry_safe(ep, next, &pool->ex_list, ex_list) {
+		if ((lport == ep->lp) &&
+		    (sid == 0 || sid == ep->sid) &&
+		    (did == 0 || did == ep->did)) {
+			fc_exch_hold(ep);
+			spin_unlock_bh(&pool->lock);
+
+			fc_exch_reset(ep);
+
+			fc_exch_release(ep);
+			spin_lock_bh(&pool->lock);
+
+			/*
+			 * must restart loop incase while lock
+			 * was down multiple eps were released.
+			 */
+			goto restart;
 		}
-		spin_unlock_bh(&mp->em_lock);
+	}
+	spin_unlock_bh(&pool->lock);
+}
+
+/**
+ * fc_exch_mgr_reset() - Resets all EMs of a lport
+ * @lport:	ptr to the local port
+ * @sid:	source FC ID
+ * @did:	destination FC ID
+ *
+ * Reset all EMs of a lport, releasing its all sequences and
+ * exchanges. If sid is non-zero, then reset only exchanges
+ * we sourced from that FID. If did is non-zero, reset only
+ * exchanges destined to that FID.
+ */
+void fc_exch_mgr_reset(struct fc_lport *lport, u32 sid, u32 did)
+{
+	struct fc_exch_mgr_anchor *ema;
+	unsigned int cpu;
+
+	list_for_each_entry(ema, &lport->ema_list, ema_list) {
+		for_each_possible_cpu(cpu)
+			fc_exch_pool_reset(lport,
+					   per_cpu_ptr(ema->mp->pool, cpu),
+					   sid, did);
 	}
 }
 EXPORT_SYMBOL(fc_exch_mgr_reset);
@@ -1777,11 +1803,6 @@ static void fc_exch_mgr_destroy(struct kref *kref)
 {
 	struct fc_exch_mgr *mp = container_of(kref, struct fc_exch_mgr, kref);
 
-	/*
-	 * The total exch count must be zero
-	 * before freeing exchange manager.
-	 */
-	WARN_ON(mp->total_exches != 0);
 	mempool_destroy(mp->ep_pool);
 	free_percpu(mp->pool);
 	kfree(mp);
@@ -1802,7 +1823,6 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 				      bool (*match)(struct fc_frame *))
 {
 	struct fc_exch_mgr *mp;
-	size_t len;
 	u16 pool_exch_range;
 	size_t pool_size;
 	unsigned int cpu;
@@ -1816,25 +1836,16 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lp,
 	}
 
 	/*
-	 * Memory need for EM
+	 * allocate memory for EM
 	 */
-	len = (max_xid - min_xid + 1) * (sizeof(struct fc_exch *));
-	len += sizeof(struct fc_exch_mgr);
-
-	mp = kzalloc(len, GFP_ATOMIC);
+	mp = kzalloc(sizeof(struct fc_exch_mgr), GFP_ATOMIC);
 	if (!mp)
 		return NULL;
 
 	mp->class = class;
-	mp->total_exches = 0;
-	mp->exches = (struct fc_exch **)(mp + 1);
 	/* adjust em exch xid range for offload */
 	mp->min_xid = min_xid;
 	mp->max_xid = max_xid;
-	mp->next_xid = min_xid;
-
-	INIT_LIST_HEAD(&mp->ex_list);
-	spin_lock_init(&mp->em_lock);
 
 	mp->ep_pool = mempool_create_slab_pool(2, fc_em_cachep);
 	if (!mp->ep_pool)
@@ -1944,7 +1955,7 @@ err:
 	rc = fc_exch_done_locked(ep);
 	spin_unlock_bh(&ep->ex_lock);
 	if (!rc)
-		fc_exch_mgr_delete_ep(ep);
+		fc_exch_delete(ep);
 	return NULL;
 }
 EXPORT_SYMBOL(fc_exch_seq_send);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 32063389c4b0..53b38814d38a 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -368,6 +368,7 @@ struct fc_seq {
  */
 struct fc_exch {
 	struct fc_exch_mgr *em;		/* exchange manager */
+	struct fc_exch_pool *pool;	/* per cpu exches pool */
 	u32		state;		/* internal driver state */
 	u16		xid;		/* our exchange ID */
 	struct list_head	ex_list;	/* free or busy list linkage */
@@ -1045,10 +1046,12 @@ struct fc_exch *fc_exch_alloc(struct fc_lport *lport, struct fc_frame *fp);
  */
 struct fc_seq *fc_seq_start_next(struct fc_seq *sp);
 
+
 /*
- * Reset an exchange manager, completing all sequences and exchanges.
- * If s_id is non-zero, reset only exchanges originating from that FID.
- * If d_id is non-zero, reset only exchanges sending to that FID.
+ * Reset all EMs of a lport, releasing its all sequences and
+ * exchanges. If sid is non-zero, then reset only exchanges
+ * we sourced from that FID. If did is non-zero, reset only
+ * exchanges destined to that FID.
  */
 void fc_exch_mgr_reset(struct fc_lport *, u32 s_id, u32 d_id);
 
-- 
cgit v1.2.3


From a3710fd1ee8cd542c5de63cf2c39f8912031f867 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 17 Sep 2009 08:50:18 -0400
Subject: ext4: fix tracepoint format string warnings

Unlike on some other architectures ino_t is an unsigned int on s390.
So add an explicit cast to avoid lots of compile warnings:

In file included from include/trace/ftrace.h:285,
                 from include/trace/define_trace.h:61,
                 from include/trace/events/ext4.h:711,
                 from fs/ext4/super.c:50:
include/trace/events/ext4.h: In function 'ftrace_raw_output_ext4_free_inode':
include/trace/events/ext4.h:12: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'ino_t'

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 75 ++++++++++++++++++++++++---------------------
 include/trace/events/jbd2.h |  2 +-
 2 files changed, 41 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index dd43399288ea..68b53c7ef8a7 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -33,8 +33,8 @@ TRACE_EVENT(ext4_free_inode,
 	),
 
 	TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode,
-		  __entry->uid, __entry->gid,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->mode, __entry->uid, __entry->gid,
 		  (unsigned long long) __entry->blocks)
 );
 
@@ -56,7 +56,8 @@ TRACE_EVENT(ext4_request_inode,
 	),
 
 	TP_printk("dev %s dir %lu mode %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
+		  __entry->mode)
 );
 
 TRACE_EVENT(ext4_allocate_inode,
@@ -79,7 +80,8 @@ TRACE_EVENT(ext4_allocate_inode,
 	),
 
 	TP_printk("dev %s ino %lu dir %lu mode %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  (unsigned long) __entry->dir, __entry->mode)
 );
 
 TRACE_EVENT(ext4_write_begin,
@@ -106,8 +108,8 @@ TRACE_EVENT(ext4_write_begin,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->flags)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->flags)
 );
 
 TRACE_EVENT(ext4_ordered_write_end,
@@ -133,8 +135,8 @@ TRACE_EVENT(ext4_ordered_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_writeback_write_end,
@@ -160,8 +162,8 @@ TRACE_EVENT(ext4_writeback_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_journalled_write_end,
@@ -186,8 +188,8 @@ TRACE_EVENT(ext4_journalled_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_writepage,
@@ -209,7 +211,8 @@ TRACE_EVENT(ext4_writepage,
 	),
 
 	TP_printk("dev %s ino %lu page_index %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->index)
 );
 
 TRACE_EVENT(ext4_da_writepages,
@@ -244,7 +247,8 @@ TRACE_EVENT(ext4_da_writepages,
 	),
 
 	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
+		  jbd2_dev_to_name(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
@@ -279,7 +283,7 @@ TRACE_EVENT(ext4_da_write_pages,
 	),
 
 	TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
 		  __entry->b_blocknr, __entry->b_size,
 		  __entry->b_state, __entry->first_page,
 		  __entry->io_done, __entry->pages_written)
@@ -314,7 +318,8 @@ TRACE_EVENT(ext4_da_writepages_result,
 	),
 
 	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret,
+		  jbd2_dev_to_name(__entry->dev),
+		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
 		  __entry->encountered_congestion, __entry->more_io,
 		  __entry->no_nrwrite_index_update)
@@ -343,8 +348,8 @@ TRACE_EVENT(ext4_da_write_begin,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u flags %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->flags)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->flags)
 );
 
 TRACE_EVENT(ext4_da_write_end,
@@ -370,8 +375,8 @@ TRACE_EVENT(ext4_da_write_end,
 	),
 
 	TP_printk("dev %s ino %lu pos %llu len %u copied %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len,
-		  __entry->copied)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pos, __entry->len, __entry->copied)
 );
 
 TRACE_EVENT(ext4_discard_blocks,
@@ -421,8 +426,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa,
 	),
 
 	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
-		  __entry->pa_len, __entry->pa_lstart)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 TRACE_EVENT(ext4_mb_new_group_pa,
@@ -449,8 +454,8 @@ TRACE_EVENT(ext4_mb_new_group_pa,
 	),
 
 	TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart,
-		  __entry->pa_len, __entry->pa_lstart)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
@@ -476,8 +481,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
 	),
 
 	TP_printk("dev %s ino %lu block %llu count %u",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
-		  __entry->count)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->block, __entry->count)
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
@@ -522,7 +527,7 @@ TRACE_EVENT(ext4_discard_preallocations,
 	),
 
 	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 TRACE_EVENT(ext4_mb_discard_preallocations,
@@ -577,8 +582,8 @@ TRACE_EVENT(ext4_request_blocks,
 	),
 
 	TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
-		  __entry->len,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->flags, __entry->len,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -621,8 +626,8 @@ TRACE_EVENT(ext4_allocate_blocks,
 	),
 
 	TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags,
-		  __entry->len, __entry->block,
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->flags, __entry->len, __entry->block,
 		  (unsigned long long) __entry->logical,
 		  (unsigned long long) __entry->goal,
 		  (unsigned long long) __entry->lleft,
@@ -655,8 +660,8 @@ TRACE_EVENT(ext4_free_blocks,
 	),
 
 	TP_printk("dev %s ino %lu block %llu count %lu metadata %d",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block,
-		  __entry->count, __entry->metadata)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->block, __entry->count, __entry->metadata)
 );
 
 TRACE_EVENT(ext4_sync_file,
@@ -679,8 +684,8 @@ TRACE_EVENT(ext4_sync_file,
 	),
 
 	TP_printk("dev %s ino %ld parent %ld datasync %d ",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent,
-		  __entry->datasync)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  (unsigned long) __entry->parent, __entry->datasync)
 );
 
 TRACE_EVENT(ext4_sync_fs,
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 10813fa0c8d0..b851f0b4701c 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data,
 	),
 
 	TP_printk("dev %s ino %lu",
-		  jbd2_dev_to_name(__entry->dev), __entry->ino)
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
 #endif /* _TRACE_JBD2_H */
-- 
cgit v1.2.3


From 4e49627b9bc29a14b393c480e8c979e3bc922ef7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 5 Sep 2009 11:17:06 -0700
Subject: workqueues: introduce __cancel_delayed_work()

cancel_delayed_work() has to use del_timer_sync() to guarantee the timer
function is not running after return.  But most users doesn't actually
need this, and del_timer_sync() has problems: it is not useable from
interrupt, and it depends on every lock which could be taken from irq.

Introduce __cancel_delayed_work() which calls del_timer() instead.

The immediate reason for this patch is
http://bugzilla.kernel.org/show_bug.cgi?id=13757
but hopefully this helper makes sense anyway.

As for 13757 bug, actually we need requeue_delayed_work(), but its
semantics are not yet clear.

Merge this patch early to resolves cross-tree interdependencies between
input and infiniband.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/workqueue.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 13e1adf55c4c..6273fa97b527 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -240,6 +240,21 @@ static inline int cancel_delayed_work(struct delayed_work *work)
 	return ret;
 }
 
+/*
+ * Like above, but uses del_timer() instead of del_timer_sync(). This means,
+ * if it returns 0 the timer function may be running and the queueing is in
+ * progress.
+ */
+static inline int __cancel_delayed_work(struct delayed_work *work)
+{
+	int ret;
+
+	ret = del_timer(&work->timer);
+	if (ret)
+		work_clear_pending(&work->work);
+	return ret;
+}
+
 extern int cancel_delayed_work_sync(struct delayed_work *work);
 
 /* Obsolete. use cancel_delayed_work_sync() */
-- 
cgit v1.2.3


From a2a8474c3fff88d8dd52d05cb450563fb26fd26c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 5 Sep 2009 11:17:13 -0700
Subject: exec: do not sleep in TASK_TRACED under ->cred_guard_mutex

Tom Horsley reports that his debugger hangs when it tries to read
/proc/pid_of_tracee/maps, this happens since

	"mm_for_maps: take ->cred_guard_mutex to fix the race with exec"
	04b836cbf19e885f8366bccb2e4b0474346c02d

commit in 2.6.31.

But the root of the problem lies in the fact that do_execve() path calls
tracehook_report_exec() which can stop if the tracer sets PT_TRACE_EXEC.

The tracee must not sleep in TASK_TRACED holding this mutex.  Even if we
remove ->cred_guard_mutex from mm_for_maps() and proc_pid_attr_write(),
another task doing PTRACE_ATTACH should not hang until it is killed or the
tracee resumes.

With this patch do_execve() does not use ->cred_guard_mutex directly and
we do not hold it throughout, instead:

	- introduce prepare_bprm_creds() helper, it locks the mutex
	  and calls prepare_exec_creds() to initialize bprm->cred.

	- install_exec_creds() drops the mutex after commit_creds(),
	  and thus before tracehook_report_exec()->ptrace_stop().

	  or, if exec fails,

	  free_bprm() drops this mutex when bprm->cred != NULL which
	  indicates install_exec_creds() was not called.

Reported-by: Tom Horsley <tom.horsley@att.net>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/compat.c             | 17 ++++---------
 fs/exec.c               | 63 +++++++++++++++++++++++++++++--------------------
 include/linux/binfmts.h |  1 +
 3 files changed, 43 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/fs/compat.c b/fs/compat.c
index 94502dab972a..6d6f98fe64a0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1485,20 +1485,15 @@ int compat_do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	retval = prepare_bprm_creds(bprm);
+	if (retval)
 		goto out_free;
-	current->in_execve = 1;
-
-	retval = -ENOMEM;
-	bprm->cred = prepare_exec_creds();
-	if (!bprm->cred)
-		goto out_unlock;
 
 	retval = check_unsafe_exec(bprm);
 	if (retval < 0)
-		goto out_unlock;
+		goto out_free;
 	clear_in_exec = retval;
+	current->in_execve = 1;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
@@ -1547,7 +1542,6 @@ int compat_do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1567,10 +1561,7 @@ out_file:
 out_unmark:
 	if (clear_in_exec)
 		current->fs->in_exec = 0;
-
-out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/fs/exec.c b/fs/exec.c
index fb4f3cdda78c..172ceb6edde4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1015,6 +1015,35 @@ out:
 
 EXPORT_SYMBOL(flush_old_exec);
 
+/*
+ * Prepare credentials and lock ->cred_guard_mutex.
+ * install_exec_creds() commits the new creds and drops the lock.
+ * Or, if exec fails before, free_bprm() should release ->cred and
+ * and unlock.
+ */
+int prepare_bprm_creds(struct linux_binprm *bprm)
+{
+	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+		return -ERESTARTNOINTR;
+
+	bprm->cred = prepare_exec_creds();
+	if (likely(bprm->cred))
+		return 0;
+
+	mutex_unlock(&current->cred_guard_mutex);
+	return -ENOMEM;
+}
+
+void free_bprm(struct linux_binprm *bprm)
+{
+	free_arg_pages(bprm);
+	if (bprm->cred) {
+		mutex_unlock(&current->cred_guard_mutex);
+		abort_creds(bprm->cred);
+	}
+	kfree(bprm);
+}
+
 /*
  * install the new credentials for this executable
  */
@@ -1024,12 +1053,13 @@ void install_exec_creds(struct linux_binprm *bprm)
 
 	commit_creds(bprm->cred);
 	bprm->cred = NULL;
-
-	/* cred_guard_mutex must be held at least to this point to prevent
+	/*
+	 * cred_guard_mutex must be held at least to this point to prevent
 	 * ptrace_attach() from altering our determination of the task's
-	 * credentials; any time after this it may be unlocked */
-
+	 * credentials; any time after this it may be unlocked.
+	 */
 	security_bprm_committed_creds(bprm);
+	mutex_unlock(&current->cred_guard_mutex);
 }
 EXPORT_SYMBOL(install_exec_creds);
 
@@ -1246,14 +1276,6 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 
 EXPORT_SYMBOL(search_binary_handler);
 
-void free_bprm(struct linux_binprm *bprm)
-{
-	free_arg_pages(bprm);
-	if (bprm->cred)
-		abort_creds(bprm->cred);
-	kfree(bprm);
-}
-
 /*
  * sys_execve() executes a new program.
  */
@@ -1277,20 +1299,15 @@ int do_execve(char * filename,
 	if (!bprm)
 		goto out_files;
 
-	retval = -ERESTARTNOINTR;
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	retval = prepare_bprm_creds(bprm);
+	if (retval)
 		goto out_free;
-	current->in_execve = 1;
-
-	retval = -ENOMEM;
-	bprm->cred = prepare_exec_creds();
-	if (!bprm->cred)
-		goto out_unlock;
 
 	retval = check_unsafe_exec(bprm);
 	if (retval < 0)
-		goto out_unlock;
+		goto out_free;
 	clear_in_exec = retval;
+	current->in_execve = 1;
 
 	file = open_exec(filename);
 	retval = PTR_ERR(file);
@@ -1340,7 +1357,6 @@ int do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1360,10 +1376,7 @@ out_file:
 out_unmark:
 	if (clear_in_exec)
 		current->fs->in_exec = 0;
-
-out_unlock:
 	current->in_execve = 0;
-	mutex_unlock(&current->cred_guard_mutex);
 
 out_free:
 	free_bprm(bprm);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 61ee18c1bdb4..2046b5b8af48 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -117,6 +117,7 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
 			   int executable_stack);
 extern int bprm_mm_init(struct linux_binprm *bprm);
 extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
+extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
 extern int set_binfmt(struct linux_binfmt *new);
-- 
cgit v1.2.3


From af356afa010f3cd2c8b8fcc3bce90f7a7b7ec02a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 4 Sep 2009 06:41:18 +0000
Subject: net_sched: reintroduce dev->qdisc for use by sch_api

Currently the multiqueue integration with the qdisc API suffers from
a few problems:

- with multiple queues, all root qdiscs use the same handle. This means
  they can't be exposed to userspace in a backwards compatible fashion.

- all API operations always refer to queue number 0. Newly created
  qdiscs are automatically shared between all queues, its not possible
  to address individual queues or restore multiqueue behaviour once a
  shared qdisc has been attached.

- Dumps only contain the root qdisc of queue 0, in case of non-shared
  qdiscs this means the statistics are incomplete.

This patch reintroduces dev->qdisc, which points to the (single) root qdisc
from userspace's point of view. Currently it either points to the first
(non-shared) default qdisc, or a qdisc shared between all queues. The
following patches will introduce a classful dummy qdisc, which will be used
as root qdisc and contain the per-queue qdiscs as children.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/core/rtnetlink.c      |  6 ++----
 net/sched/cls_api.c       |  7 ++-----
 net/sched/sch_api.c       | 41 ++++++++++++++++-------------------------
 net/sched/sch_generic.c   | 25 +++++++++++--------------
 5 files changed, 34 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 121cbad0aae5..a44118b1b56c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -832,6 +832,9 @@ struct net_device
 	/* Number of TX queues currently active in device  */
 	unsigned int		real_num_tx_queues;
 
+	/* root qdisc from userspace point of view */
+	struct Qdisc		*qdisc;
+
 	unsigned long		tx_queue_len;	/* Max frames per queue allowed */
 	spinlock_t		tx_global_lock;
 /*
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bbcba2a41018..eb42873f2a3a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -606,7 +606,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
 {
-	struct netdev_queue *txq;
 	struct ifinfomsg *ifm;
 	struct nlmsghdr *nlh;
 	const struct net_device_stats *stats;
@@ -637,9 +636,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (dev->master)
 		NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex);
 
-	txq = netdev_get_tx_queue(dev, 0);
-	if (txq->qdisc_sleeping)
-		NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id);
+	if (dev->qdisc)
+		NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id);
 
 	if (dev->ifalias)
 		NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index bcfbdb4758c9..6a536949cdc0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -168,8 +168,7 @@ replay:
 
 	/* Find qdisc */
 	if (!parent) {
-		struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
-		q = dev_queue->qdisc_sleeping;
+		q = dev->qdisc;
 		parent = q->handle;
 	} else {
 		q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
@@ -408,7 +407,6 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n,
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
-	struct netdev_queue *dev_queue;
 	int t;
 	int s_t;
 	struct net_device *dev;
@@ -427,9 +425,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
-	dev_queue = netdev_get_tx_queue(dev, 0);
 	if (!tcm->tcm_parent)
-		q = dev_queue->qdisc_sleeping;
+		q = dev->qdisc;
 	else
 		q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
 	if (!q)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 166fcca86e7a..8aa9a0c5a9eb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -207,7 +207,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 static void qdisc_list_add(struct Qdisc *q)
 {
 	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
-		list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
+		list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
 }
 
 void qdisc_list_del(struct Qdisc *q)
@@ -219,17 +219,11 @@ EXPORT_SYMBOL(qdisc_list_del);
 
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 {
-	unsigned int i;
 	struct Qdisc *q;
 
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		struct Qdisc *txq_root = txq->qdisc_sleeping;
-
-		q = qdisc_match_from_root(txq_root, handle);
-		if (q)
-			goto out;
-	}
+	q = qdisc_match_from_root(dev->qdisc, handle);
+	if (q)
+		goto out;
 
 	q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
 out:
@@ -720,9 +714,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 			if (new && i > 0)
 				atomic_inc(&new->refcnt);
 
-			notify_and_destroy(skb, n, classid, old, new);
+			qdisc_destroy(old);
 		}
 
+		notify_and_destroy(skb, n, classid, dev->qdisc, new);
+		if (new)
+			atomic_inc(&new->refcnt);
+		dev->qdisc = new ? : &noop_qdisc;
+
 		if (dev->flags & IFF_UP)
 			dev_activate(dev);
 	} else {
@@ -974,9 +973,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 				q = dev->rx_queue.qdisc_sleeping;
 			}
 		} else {
-			struct netdev_queue *dev_queue;
-			dev_queue = netdev_get_tx_queue(dev, 0);
-			q = dev_queue->qdisc_sleeping;
+			q = dev->qdisc;
 		}
 		if (!q)
 			return -ENOENT;
@@ -1044,9 +1041,7 @@ replay:
 				q = dev->rx_queue.qdisc_sleeping;
 			}
 		} else {
-			struct netdev_queue *dev_queue;
-			dev_queue = netdev_get_tx_queue(dev, 0);
-			q = dev_queue->qdisc_sleeping;
+			q = dev->qdisc;
 		}
 
 		/* It may be default qdisc, ignore it */
@@ -1291,8 +1286,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 			s_q_idx = 0;
 		q_idx = 0;
 
-		dev_queue = netdev_get_tx_queue(dev, 0);
-		if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+		if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
 			goto done;
 
 		dev_queue = &dev->rx_queue;
@@ -1323,7 +1317,6 @@ done:
 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 {
 	struct net *net = sock_net(skb->sk);
-	struct netdev_queue *dev_queue;
 	struct tcmsg *tcm = NLMSG_DATA(n);
 	struct nlattr *tca[TCA_MAX + 1];
 	struct net_device *dev;
@@ -1361,7 +1354,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 
 	/* Step 1. Determine qdisc handle X:0 */
 
-	dev_queue = netdev_get_tx_queue(dev, 0);
 	if (pid != TC_H_ROOT) {
 		u32 qid1 = TC_H_MAJ(pid);
 
@@ -1372,7 +1364,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		} else if (qid1) {
 			qid = qid1;
 		} else if (qid == 0)
-			qid = dev_queue->qdisc_sleeping->handle;
+			qid = dev->qdisc->handle;
 
 		/* Now qid is genuine qdisc handle consistent
 		   both with parent and child.
@@ -1383,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			pid = TC_H_MAKE(qid, pid);
 	} else {
 		if (qid == 0)
-			qid = dev_queue->qdisc_sleeping->handle;
+			qid = dev->qdisc->handle;
 	}
 
 	/* OK. Locate qdisc */
@@ -1588,8 +1580,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	s_t = cb->args[0];
 	t = 0;
 
-	dev_queue = netdev_get_tx_queue(dev, 0);
-	if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+	if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
 		goto done;
 
 	dev_queue = &dev->rx_queue;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6128e6f24589..a91f079fb47a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -623,19 +623,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
-static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
-{
-	unsigned int i;
-
-	for (i = 0; i < dev->num_tx_queues; i++) {
-		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-
-		if (txq->qdisc_sleeping != &noop_qdisc)
-			return false;
-	}
-	return true;
-}
-
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
 				     void *_unused)
@@ -677,6 +664,7 @@ static void transition_one_qdisc(struct net_device *dev,
 
 void dev_activate(struct net_device *dev)
 {
+	struct netdev_queue *txq;
 	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
@@ -685,9 +673,14 @@ void dev_activate(struct net_device *dev)
 	   virtual interfaces
 	 */
 
-	if (dev_all_qdisc_sleeping_noop(dev))
+	if (dev->qdisc == &noop_qdisc) {
 		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
 
+		txq = netdev_get_tx_queue(dev, 0);
+		dev->qdisc = txq->qdisc_sleeping;
+		atomic_inc(&dev->qdisc->refcnt);
+	}
+
 	if (!netif_carrier_ok(dev))
 		/* Delay activation until next carrier-on event */
 		return;
@@ -777,6 +770,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 
 void dev_init_scheduler(struct net_device *dev)
 {
+	dev->qdisc = &noop_qdisc;
 	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
 	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
 
@@ -802,5 +796,8 @@ void dev_shutdown(struct net_device *dev)
 {
 	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
 	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+	qdisc_destroy(dev->qdisc);
+	dev->qdisc = &noop_qdisc;
+
 	WARN_ON(timer_pending(&dev->watchdog_timer));
 }
-- 
cgit v1.2.3


From 589983cd21f4a2e4ed74a958805a90fa676845c5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 4 Sep 2009 06:41:20 +0000
Subject: net_sched: move dev_graft_qdisc() to sch_generic.c

It will be used in a following patch by the multiqueue qdisc.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 ++
 net/sched/sch_api.c       | 26 --------------------------
 net/sched/sch_generic.c   | 25 +++++++++++++++++++++++++
 3 files changed, 27 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a48a4cc7258b..a92dc6208eff 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -302,6 +302,8 @@ extern void dev_init_scheduler(struct net_device *dev);
 extern void dev_shutdown(struct net_device *dev);
 extern void dev_activate(struct net_device *dev);
 extern void dev_deactivate(struct net_device *dev);
+extern struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+				     struct Qdisc *qdisc);
 extern void qdisc_reset(struct Qdisc *qdisc);
 extern void qdisc_destroy(struct Qdisc *qdisc);
 extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 8aa9a0c5a9eb..d71f12be6e29 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -610,32 +610,6 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
 	return i>0 ? autohandle : 0;
 }
 
-/* Attach toplevel qdisc to device queue. */
-
-static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
-				     struct Qdisc *qdisc)
-{
-	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
-	spinlock_t *root_lock;
-
-	root_lock = qdisc_lock(oqdisc);
-	spin_lock_bh(root_lock);
-
-	/* Prune old scheduler */
-	if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
-		qdisc_reset(oqdisc);
-
-	/* ... and graft new one */
-	if (qdisc == NULL)
-		qdisc = &noop_qdisc;
-	dev_queue->qdisc_sleeping = qdisc;
-	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
-
-	spin_unlock_bh(root_lock);
-
-	return oqdisc;
-}
-
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 {
 	const struct Qdisc_class_ops *cops;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a91f079fb47a..e7c47ceb0098 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -623,6 +623,31 @@ void qdisc_destroy(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
+/* Attach toplevel qdisc to device queue. */
+struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
+			      struct Qdisc *qdisc)
+{
+	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
+	spinlock_t *root_lock;
+
+	root_lock = qdisc_lock(oqdisc);
+	spin_lock_bh(root_lock);
+
+	/* Prune old scheduler */
+	if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
+		qdisc_reset(oqdisc);
+
+	/* ... and graft new one */
+	if (qdisc == NULL)
+		qdisc = &noop_qdisc;
+	dev_queue->qdisc_sleeping = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
+
+	spin_unlock_bh(root_lock);
+
+	return oqdisc;
+}
+
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
 				     void *_unused)
-- 
cgit v1.2.3


From 6ec1c69a8f6492fd25722f4762721921da074c12 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 6 Sep 2009 01:58:51 -0700
Subject: net_sched: add classful multiqueue dummy scheduler

This patch adds a classful dummy scheduler which can be used as root qdisc
for multiqueue devices and exposes each device queue as a child class.

This allows to address queues individually and graft them similar to regular
classes. Additionally it presents an accumulated view of the statistics of
all real root qdiscs in the dummy root.

Two new callbacks are added to the qdisc_ops and qdisc_class_ops:

- cl_ops->select_queue selects the tx queue number for new child classes.

- qdisc_ops->attach() overrides root qdisc device grafting to attach
  non-shared qdiscs to the queues.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |   4 +
 net/sched/Makefile        |   2 +-
 net/sched/sch_api.c       |  18 +++-
 net/sched/sch_generic.c   |  32 +++++--
 net/sched/sch_mq.c        | 234 ++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 277 insertions(+), 13 deletions(-)
 create mode 100644 net/sched/sch_mq.c

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a92dc6208eff..9c69585a1be8 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -80,6 +80,7 @@ struct Qdisc
 struct Qdisc_class_ops
 {
 	/* Child qdisc manipulation */
+	unsigned int		(*select_queue)(struct Qdisc *, struct tcmsg *);
 	int			(*graft)(struct Qdisc *, unsigned long cl,
 					struct Qdisc *, struct Qdisc **);
 	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
@@ -122,6 +123,7 @@ struct Qdisc_ops
 	void			(*reset)(struct Qdisc *);
 	void			(*destroy)(struct Qdisc *);
 	int			(*change)(struct Qdisc *, struct nlattr *arg);
+	void			(*attach)(struct Qdisc *);
 
 	int			(*dump)(struct Qdisc *, struct sk_buff *);
 	int			(*dump_stats)(struct Qdisc *, struct gnet_dump *);
@@ -255,6 +257,8 @@ static inline void sch_tree_unlock(struct Qdisc *q)
 
 extern struct Qdisc noop_qdisc;
 extern struct Qdisc_ops noop_qdisc_ops;
+extern struct Qdisc_ops pfifo_fast_ops;
+extern struct Qdisc_ops mq_qdisc_ops;
 
 struct Qdisc_class_common
 {
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 54d950cd4b8d..f14e71bfa58f 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the Linux Traffic Control Unit.
 #
 
-obj-y	:= sch_generic.o
+obj-y	:= sch_generic.o sch_mq.o
 
 obj-$(CONFIG_NET_SCHED)		+= sch_api.o sch_blackhole.o
 obj-$(CONFIG_NET_CLS)		+= cls_api.o
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index d71f12be6e29..2a78d5410154 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -678,6 +678,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		if (dev->flags & IFF_UP)
 			dev_deactivate(dev);
 
+		if (new && new->ops->attach) {
+			new->ops->attach(new);
+			num_q = 0;
+		}
+
 		for (i = 0; i < num_q; i++) {
 			struct netdev_queue *dev_queue = &dev->rx_queue;
 
@@ -692,7 +697,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		notify_and_destroy(skb, n, classid, dev->qdisc, new);
-		if (new)
+		if (new && !new->ops->attach)
 			atomic_inc(&new->refcnt);
 		dev->qdisc = new ? : &noop_qdisc;
 
@@ -1095,10 +1100,16 @@ create_n_graft:
 		q = qdisc_create(dev, &dev->rx_queue,
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
-	else
-		q = qdisc_create(dev, netdev_get_tx_queue(dev, 0),
+	else {
+		unsigned int ntx = 0;
+
+		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
+			ntx = p->ops->cl_ops->select_queue(p, tcm);
+
+		q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx),
 				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
+	}
 	if (q == NULL) {
 		if (err == -EAGAIN)
 			goto replay;
@@ -1674,6 +1685,7 @@ static int __init pktsched_init(void)
 {
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
+	register_qdisc(&mq_qdisc_ops);
 	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index e7c47ceb0098..4ae6aa562f2b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -514,7 +514,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
 	return 0;
 }
 
-static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
+struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.id		=	"pfifo_fast",
 	.priv_size	=	sizeof(struct pfifo_fast_priv),
 	.enqueue	=	pfifo_fast_enqueue,
@@ -670,6 +670,26 @@ static void attach_one_default_qdisc(struct net_device *dev,
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
+static void attach_default_qdiscs(struct net_device *dev)
+{
+	struct netdev_queue *txq;
+	struct Qdisc *qdisc;
+
+	txq = netdev_get_tx_queue(dev, 0);
+
+	if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+		dev->qdisc = txq->qdisc_sleeping;
+		atomic_inc(&dev->qdisc->refcnt);
+	} else {
+		qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
+		if (qdisc) {
+			qdisc->ops->attach(qdisc);
+			dev->qdisc = qdisc;
+		}
+	}
+}
+
 static void transition_one_qdisc(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
 				 void *_need_watchdog)
@@ -689,7 +709,6 @@ static void transition_one_qdisc(struct net_device *dev,
 
 void dev_activate(struct net_device *dev)
 {
-	struct netdev_queue *txq;
 	int need_watchdog;
 
 	/* No queueing discipline is attached to device;
@@ -698,13 +717,8 @@ void dev_activate(struct net_device *dev)
 	   virtual interfaces
 	 */
 
-	if (dev->qdisc == &noop_qdisc) {
-		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
-
-		txq = netdev_get_tx_queue(dev, 0);
-		dev->qdisc = txq->qdisc_sleeping;
-		atomic_inc(&dev->qdisc->refcnt);
-	}
+	if (dev->qdisc == &noop_qdisc)
+		attach_default_qdiscs(dev);
 
 	if (!netif_carrier_ok(dev))
 		/* Delay activation until next carrier-on event */
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
new file mode 100644
index 000000000000..c84dec9c8c7d
--- /dev/null
+++ b/net/sched/sch_mq.c
@@ -0,0 +1,234 @@
+/*
+ * net/sched/sch_mq.c		Classful multiqueue dummy scheduler
+ *
+ * Copyright (c) 2009 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+struct mq_sched {
+	struct Qdisc		**qdiscs;
+};
+
+static void mq_destroy(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mq_sched *priv = qdisc_priv(sch);
+	unsigned int ntx;
+
+	if (!priv->qdiscs)
+		return;
+	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+		qdisc_destroy(priv->qdiscs[ntx]);
+	kfree(priv->qdiscs);
+}
+
+static int mq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mq_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	/* pre-allocate qdiscs, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL)
+		return -ENOMEM;
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		dev_queue = netdev_get_tx_queue(dev, ntx);
+		qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(ntx + 1)));
+		if (qdisc == NULL)
+			goto err;
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
+		priv->qdiscs[ntx] = qdisc;
+	}
+
+	return 0;
+
+err:
+	mq_destroy(sch);
+	return -ENOMEM;
+}
+
+static void mq_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mq_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+	priv->qdiscs = NULL;
+}
+
+static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+		spin_lock_bh(qdisc_lock(qdisc));
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+	return 0;
+}
+
+static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1;
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+{
+	unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
+
+	if (!mq_queue_get(sch, ntx))
+		return 0;
+	return ntx - 1;
+}
+
+static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+	struct net_device *dev = qdisc_dev(sch);
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+	return 0;
+}
+
+static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mq_get(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!mq_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static void mq_put(struct Qdisc *sch, unsigned long cl)
+{
+	return;
+}
+
+static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+	tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	return 0;
+}
+
+static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+			       struct gnet_dump *d)
+{
+	struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+	sch = dev_queue->qdisc_sleeping;
+	if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+		return -1;
+	return 0;
+}
+
+static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned int ntx;
+
+	if (arg->stop)
+		return;
+
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mq_class_ops = {
+	.select_queue	= mq_select_queue,
+	.graft		= mq_graft,
+	.leaf		= mq_leaf,
+	.get		= mq_get,
+	.put		= mq_put,
+	.walk		= mq_walk,
+	.dump		= mq_dump_class,
+	.dump_stats	= mq_dump_class_stats,
+};
+
+struct Qdisc_ops mq_qdisc_ops __read_mostly = {
+	.cl_ops		= &mq_class_ops,
+	.id		= "mq",
+	.priv_size	= sizeof(struct mq_sched),
+	.init		= mq_init,
+	.destroy	= mq_destroy,
+	.attach		= mq_attach,
+	.dump		= mq_dump,
+	.owner		= THIS_MODULE,
+};
-- 
cgit v1.2.3


From 945af7c3289c26c9070d6b1bf3ca759d36643e0b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 4 Sep 2009 09:19:48 +0100
Subject: KEYS: security_cred_alloc_blank() should return int under all
 circumstances

Make security_cred_alloc_blank() return int, not void, when CONFIG_SECURITY=n.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 97de3fe3dd0d..f4eb32d5b890 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2309,8 +2309,10 @@ static inline int security_task_create(unsigned long clone_flags)
 	return 0;
 }
 
-static inline void security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
-{ }
+static inline int security_cred_alloc_blank(struct cred *cred, gfp_t gfp)
+{
+	return 0;
+}
 
 static inline void security_cred_free(struct cred *cred)
 { }
-- 
cgit v1.2.3


From f0fda0a47b26aba986fe65897891956c1792b526 Mon Sep 17 00:00:00 2001
From: Zhao Yakui <yakui.zhao@intel.com>
Date: Thu, 3 Sep 2009 09:33:48 +0800
Subject: drm/kms: add a function that can add the mode for the output device
 without EDID

Add a function that can be used to add the default mode for the output device
without EDID.
It will add the default mode that meets with the requirements of given
hdisplay/vdisplay limit.

Signed-off-by: Zhao Yakui <yakui.zhao@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_edid.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 include/drm/drm_crtc.h     |  2 ++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index f84a98f2e373..e2d5f515f7b2 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1215,3 +1215,49 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	return num_modes;
 }
 EXPORT_SYMBOL(drm_add_edid_modes);
+
+/**
+ * drm_add_modes_noedid - add modes for the connectors without EDID
+ * @connector: connector we're probing
+ * @hdisplay: the horizontal display limit
+ * @vdisplay: the vertical display limit
+ *
+ * Add the specified modes to the connector's mode list. Only when the
+ * hdisplay/vdisplay is not beyond the given limit, it will be added.
+ *
+ * Return number of modes added or 0 if we couldn't find any.
+ */
+int drm_add_modes_noedid(struct drm_connector *connector,
+			int hdisplay, int vdisplay)
+{
+	int i, count, num_modes = 0;
+	struct drm_display_mode *mode, *ptr;
+	struct drm_device *dev = connector->dev;
+
+	count = sizeof(drm_dmt_modes) / sizeof(struct drm_display_mode);
+	if (hdisplay < 0)
+		hdisplay = 0;
+	if (vdisplay < 0)
+		vdisplay = 0;
+
+	for (i = 0; i < count; i++) {
+		ptr = &drm_dmt_modes[i];
+		if (hdisplay && vdisplay) {
+			/*
+			 * Only when two are valid, they will be used to check
+			 * whether the mode should be added to the mode list of
+			 * the connector.
+			 */
+			if (ptr->hdisplay > hdisplay ||
+					ptr->vdisplay > vdisplay)
+				continue;
+		}
+		mode = drm_mode_duplicate(dev, ptr);
+		if (mode) {
+			drm_mode_probed_add(connector, mode);
+			num_modes++;
+		}
+	}
+	return num_modes;
+}
+EXPORT_SYMBOL(drm_add_modes_noedid);
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index b0427a70fcbd..ae1e9e166959 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -750,4 +750,6 @@ extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool interlaced, int margins);
+extern int drm_add_modes_noedid(struct drm_connector *connector,
+				int hdisplay, int vdisplay);
 #endif /* __DRM_CRTC_H__ */
-- 
cgit v1.2.3


From 384824281caa9ac4b76664033416f1eac4a652fe Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Fri, 4 Sep 2009 05:33:46 +0000
Subject: wan: dlci/sdla transmit return dehacking

This is a brute force removal of the wierd slave interface done for
DLCI -> SDLA transmit. Before it was using non-standard return values
and freeing skb in caller.  This changes it to using normal return
values, and freeing in the callee.  Luckly only one driver pair was
doing this. Not tested on real hardware, in fact I wonder if this
driver pair is even being used by any users.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c  | 43 +++++--------------------------------------
 drivers/net/wan/sdla.c  |  8 ++++----
 include/linux/if_frad.h |  5 -----
 3 files changed, 9 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 69d269d32b5b..15d353f268b5 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -186,46 +186,13 @@ static void dlci_receive(struct sk_buff *skb, struct net_device *dev)
 		dev_kfree_skb(skb);
 }
 
-static netdev_tx_t dlci_transmit(struct sk_buff *skb,
-				       struct net_device *dev)
+static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 {
-	struct dlci_local *dlp;
-	netdev_tx_t ret;
-
-	if (!skb || !dev)
-		return NETDEV_TX_OK;
-
-	dlp = netdev_priv(dev);
-
-	netif_stop_queue(dev);
-	
-	/* This is hackish, overloads driver specific return values
-	   on top of normal transmit return! */
-	ret = dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave);
-	switch (ret)
-	{
-		case DLCI_RET_OK:
-			dev->stats.tx_packets++;
-			ret = NETDEV_TX_OK;
-			break;
-		case DLCI_RET_ERR:
-			dev->stats.tx_errors++;
-			ret = NETDEV_TX_OK;
-			break;
-		case DLCI_RET_DROP:
-			dev->stats.tx_dropped++;
-			ret = NETDEV_TX_BUSY;
-			break;
-	}
-	/* Alan Cox recommends always returning 0, and always freeing the packet */
-	/* experience suggest a slightly more conservative approach */
+	struct dlci_local *dlp = netdev_priv(dev);
 
-	if (ret == NETDEV_TX_OK)
-	{
-		dev_kfree_skb(skb);
-		netif_wake_queue(dev);
-	}
-	return(ret);
+	if (skb)
+		dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave);
+	return NETDEV_TX_OK;
 }
 
 static int dlci_config(struct net_device *dev, struct dlci_conf __user *conf, int get)
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index 63c76458431c..2b15a7e40d5b 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -652,7 +652,7 @@ static int sdla_dlci_conf(struct net_device *slave, struct net_device *master, i
 
 /* NOTE: the DLCI driver deals with freeing the SKB!! */
 static netdev_tx_t sdla_transmit(struct sk_buff *skb,
-				       struct net_device *dev)
+				 struct net_device *dev)
 {
 	struct frad_local *flp;
 	int               ret, addr, accept, i;
@@ -712,23 +712,21 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb,
 				}
 				break;
 		}
+
 		switch (ret)
 		{
 			case SDLA_RET_OK:
 				dev->stats.tx_packets++;
-				ret = DLCI_RET_OK;
 				break;
 
 			case SDLA_RET_CIR_OVERFLOW:
 			case SDLA_RET_BUF_OVERSIZE:
 			case SDLA_RET_NO_BUFS:
 				dev->stats.tx_dropped++;
-				ret = DLCI_RET_DROP;
 				break;
 
 			default:
 				dev->stats.tx_errors++;
-				ret = DLCI_RET_ERR;
 				break;
 		}
 	}
@@ -738,6 +736,8 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb,
 		if(flp->master[i]!=NULL)
 			netif_wake_queue(flp->master[i]);
 	}		
+
+	dev_kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h
index 673f2209453d..80b3a1056a5f 100644
--- a/include/linux/if_frad.h
+++ b/include/linux/if_frad.h
@@ -69,11 +69,6 @@ struct dlci_conf {
 
 #define DLCI_VALID_FLAGS	0x000B
 
-/* FRAD driver uses these to indicate what it did with packet */
-#define DLCI_RET_OK		0x00
-#define DLCI_RET_ERR		0x01
-#define DLCI_RET_DROP		0x02
-
 /* defines for the actual Frame Relay hardware */
 #define FRAD_GET_CONF	(SIOCDEVPRIVATE)
 #define FRAD_SET_CONF	(SIOCDEVPRIVATE + 1)
-- 
cgit v1.2.3


From be1d6a5f55b30042c57bdfbe7cb4761ed081def0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 7 Sep 2009 13:24:17 +0100
Subject: KEYS: Fix default security_session_to_parent()

Fix the default security_session_to_parent() in linux/security.h to have a
body.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index f4eb32d5b890..10a09257952b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2988,7 +2988,10 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
 
 static inline int security_key_session_to_parent(const struct cred *cred,
 						 const struct cred *parent_cred,
-						 struct key *key);
+						 struct key *key)
+{
+	return 0;
+}
 
 #endif
 #endif /* CONFIG_KEYS */
-- 
cgit v1.2.3


From c746b5519a88b8803d43946ad06326ece4829116 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Sat, 5 Sep 2009 14:09:21 +0100
Subject: leds: Add WM831x status LED driver

The WM831x devices feature two software controlled status LEDs with
hardware assisted blinking.

The device can also autonomously control the LEDs based on a selection
of sources.  This can be configured at boot time using either platform
data or the chip OTP.  A sysfs file in the style of that for triggers
allowing the control source to be configured at run time.  Triggers
can't be used here since they can't depend on the implementation details
of a specific LED type.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/leds/Kconfig              |   7 +
 drivers/leds/Makefile             |   1 +
 drivers/leds/leds-wm831x-status.c | 341 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/status.h |  34 ++++
 4 files changed, 383 insertions(+)
 create mode 100644 drivers/leds/leds-wm831x-status.c
 create mode 100644 include/linux/mfd/wm831x/status.h

(limited to 'include')

diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 7c8e7122aaa9..edfd4e302be7 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -195,6 +195,13 @@ config LEDS_PCA955X
 	  LED driver chips accessed via the I2C bus.  Supported
 	  devices include PCA9550, PCA9551, PCA9552, and PCA9553.
 
+config LEDS_WM831X_STATUS
+	tristate "LED support for status LEDs on WM831x PMICs"
+	depends on LEDS_CLASS && MFD_WM831X
+	help
+	  This option enables support for the status LEDs of the WM831x
+          series of PMICs.
+
 config LEDS_WM8350
 	tristate "LED Support for WM8350 AudioPlus PMIC"
 	depends on LEDS_CLASS && MFD_WM8350
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index e8cdcf77a4c3..46d72704d606 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_LEDS_HP6XX)		+= leds-hp6xx.o
 obj-$(CONFIG_LEDS_FSG)			+= leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)		+= leds-pca955x.o
 obj-$(CONFIG_LEDS_DA903X)		+= leds-da903x.o
+obj-$(CONFIG_LEDS_WM831X_STATUS)	+= leds-wm831x-status.o
 obj-$(CONFIG_LEDS_WM8350)		+= leds-wm8350.o
 obj-$(CONFIG_LEDS_PWM)			+= leds-pwm.o
 
diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c
new file mode 100644
index 000000000000..c586d05e336a
--- /dev/null
+++ b/drivers/leds/leds-wm831x-status.c
@@ -0,0 +1,341 @@
+/*
+ * LED driver for WM831x status LEDs
+ *
+ * Copyright(C) 2009 Wolfson Microelectronics PLC.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/status.h>
+
+
+struct wm831x_status {
+	struct led_classdev cdev;
+	struct wm831x *wm831x;
+	struct work_struct work;
+	struct mutex mutex;
+
+	spinlock_t value_lock;
+	int reg;     /* Control register */
+	int reg_val; /* Control register value */
+
+	int blink;
+	int blink_time;
+	int blink_cyc;
+	int src;
+	enum led_brightness brightness;
+};
+
+#define to_wm831x_status(led_cdev) \
+	container_of(led_cdev, struct wm831x_status, cdev)
+
+static void wm831x_status_work(struct work_struct *work)
+{
+	struct wm831x_status *led = container_of(work, struct wm831x_status,
+						 work);
+	unsigned long flags;
+
+	mutex_lock(&led->mutex);
+
+	led->reg_val &= ~(WM831X_LED_SRC_MASK | WM831X_LED_MODE_MASK |
+			  WM831X_LED_DUTY_CYC_MASK | WM831X_LED_DUR_MASK);
+
+	spin_lock_irqsave(&led->value_lock, flags);
+
+	led->reg_val |= led->src << WM831X_LED_SRC_SHIFT;
+	if (led->blink) {
+		led->reg_val |= 2 << WM831X_LED_MODE_SHIFT;
+		led->reg_val |= led->blink_time << WM831X_LED_DUR_SHIFT;
+		led->reg_val |= led->blink_cyc;
+	} else {
+		if (led->brightness != LED_OFF)
+			led->reg_val |= 1 << WM831X_LED_MODE_SHIFT;
+	}
+
+	spin_unlock_irqrestore(&led->value_lock, flags);
+
+	wm831x_reg_write(led->wm831x, led->reg, led->reg_val);
+
+	mutex_unlock(&led->mutex);
+}
+
+static void wm831x_status_set(struct led_classdev *led_cdev,
+			   enum led_brightness value)
+{
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&led->value_lock, flags);
+	led->brightness = value;
+	if (value == LED_OFF)
+		led->blink = 0;
+	schedule_work(&led->work);
+	spin_unlock_irqrestore(&led->value_lock, flags);
+}
+
+static int wm831x_status_blink_set(struct led_classdev *led_cdev,
+				   unsigned long *delay_on,
+				   unsigned long *delay_off)
+{
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	unsigned long flags;
+	int ret = 0;
+
+	/* Pick some defaults if we've not been given times */
+	if (*delay_on == 0 && *delay_off == 0) {
+		*delay_on = 250;
+		*delay_off = 250;
+	}
+
+	spin_lock_irqsave(&led->value_lock, flags);
+
+	/* We only have a limited selection of settings, see if we can
+	 * support the configuration we're being given */
+	switch (*delay_on) {
+	case 1000:
+		led->blink_time = 0;
+		break;
+	case 250:
+		led->blink_time = 1;
+		break;
+	case 125:
+		led->blink_time = 2;
+		break;
+	case 62:
+	case 63:
+		/* Actually 62.5ms */
+		led->blink_time = 3;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret == 0) {
+		switch (*delay_off / *delay_on) {
+		case 1:
+			led->blink_cyc = 0;
+			break;
+		case 3:
+			led->blink_cyc = 1;
+			break;
+		case 4:
+			led->blink_cyc = 2;
+			break;
+		case 8:
+			led->blink_cyc = 3;
+			break;
+		default:
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	if (ret == 0)
+		led->blink = 1;
+	else
+		led->blink = 0;
+
+	/* Always update; if we fail turn off blinking since we expect
+	 * a software fallback. */
+	schedule_work(&led->work);
+
+	spin_unlock_irqrestore(&led->value_lock, flags);
+
+	return ret;
+}
+
+static const char *led_src_texts[] = {
+	"otp",
+	"power",
+	"charger",
+	"soft",
+};
+
+static ssize_t wm831x_status_src_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	int i;
+	ssize_t ret = 0;
+
+	mutex_lock(&led->mutex);
+
+	for (i = 0; i < ARRAY_SIZE(led_src_texts); i++)
+		if (i == led->src)
+			ret += sprintf(&buf[ret], "[%s] ", led_src_texts[i]);
+		else
+			ret += sprintf(&buf[ret], "%s ", led_src_texts[i]);
+
+	mutex_unlock(&led->mutex);
+
+	ret += sprintf(&buf[ret], "\n");
+
+	return ret;
+}
+
+static ssize_t wm831x_status_src_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t size)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct wm831x_status *led = to_wm831x_status(led_cdev);
+	char name[20];
+	int i;
+	size_t len;
+
+	name[sizeof(name) - 1] = '\0';
+	strncpy(name, buf, sizeof(name) - 1);
+	len = strlen(name);
+
+	if (len && name[len - 1] == '\n')
+		name[len - 1] = '\0';
+
+	for (i = 0; i < ARRAY_SIZE(led_src_texts); i++) {
+		if (!strcmp(name, led_src_texts[i])) {
+			mutex_lock(&led->mutex);
+
+			led->src = i;
+			schedule_work(&led->work);
+
+			mutex_unlock(&led->mutex);
+		}
+	}
+
+	return size;
+}
+
+static DEVICE_ATTR(src, 0644, wm831x_status_src_show, wm831x_status_src_store);
+
+static int wm831x_status_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *chip_pdata;
+	struct wm831x_status_pdata pdata;
+	struct wm831x_status *drvdata;
+	struct resource *res;
+	int id = pdev->id % ARRAY_SIZE(chip_pdata->status);
+	int ret;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	drvdata = kzalloc(sizeof(struct wm831x_status), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+	dev_set_drvdata(&pdev->dev, drvdata);
+
+	drvdata->wm831x = wm831x;
+	drvdata->reg = res->start;
+
+	if (wm831x->dev->platform_data)
+		chip_pdata = wm831x->dev->platform_data;
+	else
+		chip_pdata = NULL;
+
+	memset(&pdata, 0, sizeof(pdata));
+	if (chip_pdata && chip_pdata->status[id])
+		memcpy(&pdata, chip_pdata->status[id], sizeof(pdata));
+	else
+		pdata.name = dev_name(&pdev->dev);
+
+	mutex_init(&drvdata->mutex);
+	INIT_WORK(&drvdata->work, wm831x_status_work);
+	spin_lock_init(&drvdata->value_lock);
+
+	/* We cache the configuration register and read startup values
+	 * from it. */
+	drvdata->reg_val = wm831x_reg_read(wm831x, drvdata->reg);
+
+	if (drvdata->reg_val & WM831X_LED_MODE_MASK)
+		drvdata->brightness = LED_FULL;
+	else
+		drvdata->brightness = LED_OFF;
+
+	/* Set a default source if configured, otherwise leave the
+	 * current hardware setting.
+	 */
+	if (pdata.default_src == WM831X_STATUS_PRESERVE) {
+		drvdata->src = drvdata->reg_val;
+		drvdata->src &= WM831X_LED_SRC_MASK;
+		drvdata->src >>= WM831X_LED_SRC_SHIFT;
+	} else {
+		drvdata->src = pdata.default_src - 1;
+	}
+
+	drvdata->cdev.name = pdata.name;
+	drvdata->cdev.default_trigger = pdata.default_trigger;
+	drvdata->cdev.brightness_set = wm831x_status_set;
+	drvdata->cdev.blink_set = wm831x_status_blink_set;
+
+	ret = led_classdev_register(wm831x->dev, &drvdata->cdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to register LED: %d\n", ret);
+		goto err_led;
+	}
+
+	ret = device_create_file(drvdata->cdev.dev, &dev_attr_src);
+	if (ret != 0)
+		dev_err(&pdev->dev,
+			"No source control for LED: %d\n", ret);
+
+	return 0;
+
+err_led:
+	led_classdev_unregister(&drvdata->cdev);
+	kfree(drvdata);
+err:
+	return ret;
+}
+
+static int wm831x_status_remove(struct platform_device *pdev)
+{
+	struct wm831x_status *drvdata = platform_get_drvdata(pdev);
+
+	device_remove_file(drvdata->cdev.dev, &dev_attr_src);
+	led_classdev_unregister(&drvdata->cdev);
+	kfree(drvdata);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_status_driver = {
+	.driver = {
+		   .name = "wm831x-status",
+		   .owner = THIS_MODULE,
+		   },
+	.probe = wm831x_status_probe,
+	.remove = wm831x_status_remove,
+};
+
+static int __devinit wm831x_status_init(void)
+{
+	return platform_driver_register(&wm831x_status_driver);
+}
+module_init(wm831x_status_init);
+
+static void wm831x_status_exit(void)
+{
+	platform_driver_unregister(&wm831x_status_driver);
+}
+module_exit(wm831x_status_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM831x status LED driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-status");
diff --git a/include/linux/mfd/wm831x/status.h b/include/linux/mfd/wm831x/status.h
new file mode 100644
index 000000000000..6bc090d0e3ac
--- /dev/null
+++ b/include/linux/mfd/wm831x/status.h
@@ -0,0 +1,34 @@
+/*
+ * include/linux/mfd/wm831x/status.h -- Status LEDs for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_STATUS_H__
+#define __MFD_WM831X_STATUS_H__
+
+#define WM831X_LED_SRC_MASK                    0xC000  /* LED_SRC - [15:14] */
+#define WM831X_LED_SRC_SHIFT                       14  /* LED_SRC - [15:14] */
+#define WM831X_LED_SRC_WIDTH                        2  /* LED_SRC - [15:14] */
+#define WM831X_LED_MODE_MASK                   0x0300  /* LED_MODE - [9:8] */
+#define WM831X_LED_MODE_SHIFT                       8  /* LED_MODE - [9:8] */
+#define WM831X_LED_MODE_WIDTH                       2  /* LED_MODE - [9:8] */
+#define WM831X_LED_SEQ_LEN_MASK                0x0030  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_SEQ_LEN_SHIFT                    4  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_SEQ_LEN_WIDTH                    2  /* LED_SEQ_LEN - [5:4] */
+#define WM831X_LED_DUR_MASK                    0x000C  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUR_SHIFT                        2  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUR_WIDTH                        2  /* LED_DUR - [3:2] */
+#define WM831X_LED_DUTY_CYC_MASK               0x0003  /* LED_DUTY_CYC - [1:0] */
+#define WM831X_LED_DUTY_CYC_SHIFT                   0  /* LED_DUTY_CYC - [1:0] */
+#define WM831X_LED_DUTY_CYC_WIDTH                   2  /* LED_DUTY_CYC - [1:0] */
+
+#endif
-- 
cgit v1.2.3


From 82a783f4bcb878e6c4f02e24c7cd0687bdea7443 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 7 Sep 2009 15:50:18 +0200
Subject: ALSA: Remove struct snd_monitor_file from public sound/core.h

The struct snd_monitor_file is used locally only in sound/core/init.c,
thus it should be moved there from the public sound/core.h.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 9 ---------
 sound/core/init.c    | 8 ++++++++
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 309cb9659a05..3bb07bc62255 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -93,15 +93,6 @@ struct snd_device {
 
 #define snd_device(n) list_entry(n, struct snd_device, list)
 
-/* monitor files for graceful shutdown (hotplug) */
-
-struct snd_monitor_file {
-	struct file *file;
-	const struct file_operations *disconnected_f_op;
-	struct list_head shutdown_list;	/* still need to shutdown */
-	struct list_head list;	/* link of monitor files */
-};
-
 /* main structure for soundcard */
 
 struct snd_card {
diff --git a/sound/core/init.c b/sound/core/init.c
index d5d40d78c409..ec4a50ce5656 100644
--- a/sound/core/init.c
+++ b/sound/core/init.c
@@ -31,6 +31,14 @@
 #include <sound/control.h>
 #include <sound/info.h>
 
+/* monitor files for graceful shutdown (hotplug) */
+struct snd_monitor_file {
+	struct file *file;
+	const struct file_operations *disconnected_f_op;
+	struct list_head shutdown_list;	/* still need to shutdown */
+	struct list_head list;	/* link of monitor files */
+};
+
 static DEFINE_SPINLOCK(shutdown_lock);
 static LIST_HEAD(shutdown_files);
 
-- 
cgit v1.2.3


From b8c60ede6abf8e96a892c114131700b0cfb0be89 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 7 Sep 2009 15:52:30 +0200
Subject: ALSA: Remove unneeded ifdef from sound/core.h

Remove the old hack that was needed for building alsa-driver modules
externally for old kernels.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/core.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/sound/core.h b/include/sound/core.h
index 3bb07bc62255..f545efcf03f2 100644
--- a/include/sound/core.h
+++ b/include/sound/core.h
@@ -302,9 +302,7 @@ int snd_component_add(struct snd_card *card, const char *component);
 int snd_card_file_add(struct snd_card *card, struct file *file);
 int snd_card_file_remove(struct snd_card *card, struct file *file);
 
-#ifndef snd_card_set_dev
 #define snd_card_set_dev(card, devptr) ((card)->dev = (devptr))
-#endif
 
 /* device.c */
 
@@ -429,12 +427,10 @@ static inline int __snd_bug_on(int cond)
 
 /* for easier backward-porting */
 #if defined(CONFIG_GAMEPORT) || defined(CONFIG_GAMEPORT_MODULE)
-#ifndef gameport_set_dev_parent
 #define gameport_set_dev_parent(gp,xdev) ((gp)->dev.parent = (xdev))
 #define gameport_set_port_data(gp,r) ((gp)->port_data = (r))
 #define gameport_get_port_data(gp) (gp)->port_data
 #endif
-#endif
 
 /* PCI quirk list helper */
 struct snd_pci_quirk {
-- 
cgit v1.2.3


From 5036cc41e07d6614350e329666ee8a79cea6f793 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Thu, 6 Aug 2009 16:07:10 -0700
Subject: backlight: spi driver for LMS283GF05 LCD

ADd support for the SPI part of LMS283GF05 LCD.  The LCD uses SPI for
initialization and powerdown sequences.  No further defails are specified
in the datasheet about the initialization/powerdown sequence, just the
magic numbers that have to be sent over SPI bus.  This LCD can be found in
the Aeronix Zipit Z2 handheld.

Signed-off-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/Kconfig      |   7 +
 drivers/video/backlight/Makefile     |   1 +
 drivers/video/backlight/lms283gf05.c | 242 +++++++++++++++++++++++++++++++++++
 include/linux/spi/lms283gf05.h       |  28 ++++
 4 files changed, 278 insertions(+)
 create mode 100644 drivers/video/backlight/lms283gf05.c
 create mode 100644 include/linux/spi/lms283gf05.h

(limited to 'include')

diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index f86dbfd209ad..c0d4a536ea87 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -31,6 +31,13 @@ config LCD_CORGI
 	  Say y here to support the LCD panels usually found on SHARP
 	  corgi (C7x0) and spitz (Cxx00) models.
 
+config LCD_LMS283GF05
+	tristate "Samsung LMS283GF05 LCD"
+	depends on LCD_CLASS_DEVICE && SPI_MASTER && GENERIC_GPIO
+	help
+	  SPI driver for Samsung LMS283GF05. This provides basic support
+	  for powering the LCD up/down through a sysfs interface.
+
 config LCD_LTV350QV
 	tristate "Samsung LTV350QV LCD Panel"
 	depends on LCD_CLASS_DEVICE && SPI_MASTER
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index df0b67ce88b6..0abc014215ba 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -3,6 +3,7 @@
 obj-$(CONFIG_LCD_CLASS_DEVICE)     += lcd.o
 obj-$(CONFIG_LCD_CORGI)		   += corgi_lcd.o
 obj-$(CONFIG_LCD_HP700)		   += jornada720_lcd.o
+obj-$(CONFIG_LCD_LMS283GF05)	   += lms283gf05.o
 obj-$(CONFIG_LCD_LTV350QV)	   += ltv350qv.o
 obj-$(CONFIG_LCD_ILI9320)	   += ili9320.o
 obj-$(CONFIG_LCD_PLATFORM)	   += platform_lcd.o
diff --git a/drivers/video/backlight/lms283gf05.c b/drivers/video/backlight/lms283gf05.c
new file mode 100644
index 000000000000..447b542a20ca
--- /dev/null
+++ b/drivers/video/backlight/lms283gf05.c
@@ -0,0 +1,242 @@
+/*
+ * lms283gf05.c -- support for Samsung LMS283GF05 LCD
+ *
+ * Copyright (c) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/lcd.h>
+
+#include <linux/spi/spi.h>
+#include <linux/spi/lms283gf05.h>
+
+struct lms283gf05_state {
+	struct spi_device	*spi;
+	struct lcd_device	*ld;
+};
+
+struct lms283gf05_seq {
+	unsigned char		reg;
+	unsigned short		value;
+	unsigned char		delay;
+};
+
+/* Magic sequences supplied by manufacturer, for details refer to datasheet */
+static struct lms283gf05_seq disp_initseq[] = {
+	/* REG, VALUE, DELAY */
+	{ 0x07, 0x0000, 0 },
+	{ 0x13, 0x0000, 10 },
+
+	{ 0x11, 0x3004, 0 },
+	{ 0x14, 0x200F, 0 },
+	{ 0x10, 0x1a20, 0 },
+	{ 0x13, 0x0040, 50 },
+
+	{ 0x13, 0x0060, 0 },
+	{ 0x13, 0x0070, 200 },
+
+	{ 0x01, 0x0127, 0 },
+	{ 0x02,	0x0700, 0 },
+	{ 0x03, 0x1030, 0 },
+	{ 0x08, 0x0208, 0 },
+	{ 0x0B, 0x0620, 0 },
+	{ 0x0C, 0x0110, 0 },
+	{ 0x30, 0x0120, 0 },
+	{ 0x31, 0x0127, 0 },
+	{ 0x32, 0x0000, 0 },
+	{ 0x33, 0x0503, 0 },
+	{ 0x34, 0x0727, 0 },
+	{ 0x35, 0x0124, 0 },
+	{ 0x36, 0x0706, 0 },
+	{ 0x37, 0x0701, 0 },
+	{ 0x38, 0x0F00, 0 },
+	{ 0x39, 0x0F00, 0 },
+	{ 0x40, 0x0000, 0 },
+	{ 0x41, 0x0000, 0 },
+	{ 0x42, 0x013f, 0 },
+	{ 0x43, 0x0000, 0 },
+	{ 0x44, 0x013f, 0 },
+	{ 0x45, 0x0000, 0 },
+	{ 0x46, 0xef00, 0 },
+	{ 0x47, 0x013f, 0 },
+	{ 0x48, 0x0000, 0 },
+	{ 0x07, 0x0015, 30 },
+
+	{ 0x07, 0x0017, 0 },
+
+	{ 0x20, 0x0000, 0 },
+	{ 0x21, 0x0000, 0 },
+	{ 0x22, 0x0000, 0 }
+};
+
+static struct lms283gf05_seq disp_pdwnseq[] = {
+	{ 0x07, 0x0016, 30 },
+
+	{ 0x07, 0x0004, 0 },
+	{ 0x10, 0x0220, 20 },
+
+	{ 0x13, 0x0060, 50 },
+
+	{ 0x13, 0x0040, 50 },
+
+	{ 0x13, 0x0000, 0 },
+	{ 0x10, 0x0000, 0 }
+};
+
+
+static void lms283gf05_reset(unsigned long gpio, bool inverted)
+{
+	gpio_set_value(gpio, !inverted);
+	mdelay(100);
+	gpio_set_value(gpio, inverted);
+	mdelay(20);
+	gpio_set_value(gpio, !inverted);
+	mdelay(20);
+}
+
+static void lms283gf05_toggle(struct spi_device *spi,
+			struct lms283gf05_seq *seq, int sz)
+{
+	char buf[3];
+	int i;
+
+	for (i = 0; i < sz; i++) {
+		buf[0] = 0x74;
+		buf[1] = 0x00;
+		buf[2] = seq[i].reg;
+		spi_write(spi, buf, 3);
+
+		buf[0] = 0x76;
+		buf[1] = seq[i].value >> 8;
+		buf[2] = seq[i].value & 0xff;
+		spi_write(spi, buf, 3);
+
+		mdelay(seq[i].delay);
+	}
+}
+
+static int lms283gf05_power_set(struct lcd_device *ld, int power)
+{
+	struct lms283gf05_state *st = lcd_get_data(ld);
+	struct spi_device *spi = st->spi;
+	struct lms283gf05_pdata *pdata = spi->dev.platform_data;
+
+	if (power) {
+		if (pdata)
+			lms283gf05_reset(pdata->reset_gpio,
+					pdata->reset_inverted);
+		lms283gf05_toggle(spi, disp_initseq, ARRAY_SIZE(disp_initseq));
+	} else {
+		lms283gf05_toggle(spi, disp_pdwnseq, ARRAY_SIZE(disp_pdwnseq));
+		if (pdata)
+			gpio_set_value(pdata->reset_gpio,
+					pdata->reset_inverted);
+	}
+
+	return 0;
+}
+
+static struct lcd_ops lms_ops = {
+	.set_power	= lms283gf05_power_set,
+	.get_power	= NULL,
+};
+
+static int __devinit lms283gf05_probe(struct spi_device *spi)
+{
+	struct lms283gf05_state *st;
+	struct lms283gf05_pdata *pdata = spi->dev.platform_data;
+	struct lcd_device *ld;
+	int ret = 0;
+
+	if (pdata != NULL) {
+		ret = gpio_request(pdata->reset_gpio, "LMS285GF05 RESET");
+		if (ret)
+			return ret;
+
+		ret = gpio_direction_output(pdata->reset_gpio,
+						!pdata->reset_inverted);
+		if (ret)
+			goto err;
+	}
+
+	st = kzalloc(sizeof(struct lms283gf05_state), GFP_KERNEL);
+	if (st == NULL) {
+		dev_err(&spi->dev, "No memory for device state\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ld = lcd_device_register("lms283gf05", &spi->dev, st, &lms_ops);
+	if (IS_ERR(ld)) {
+		ret = PTR_ERR(ld);
+		goto err2;
+	}
+
+	st->spi = spi;
+	st->ld = ld;
+
+	dev_set_drvdata(&spi->dev, st);
+
+	/* kick in the LCD */
+	if (pdata)
+		lms283gf05_reset(pdata->reset_gpio, pdata->reset_inverted);
+	lms283gf05_toggle(spi, disp_initseq, ARRAY_SIZE(disp_initseq));
+
+	return 0;
+
+err2:
+	kfree(st);
+err:
+	if (pdata != NULL)
+		gpio_free(pdata->reset_gpio);
+
+	return ret;
+}
+
+static int __devexit lms283gf05_remove(struct spi_device *spi)
+{
+	struct lms283gf05_state *st = dev_get_drvdata(&spi->dev);
+	struct lms283gf05_pdata *pdata = st->spi->dev.platform_data;
+
+	lcd_device_unregister(st->ld);
+
+	if (pdata != NULL)
+		gpio_free(pdata->reset_gpio);
+
+	kfree(st);
+
+	return 0;
+}
+
+static struct spi_driver lms283gf05_driver = {
+	.driver = {
+		.name	= "lms283gf05",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= lms283gf05_probe,
+	.remove		= __devexit_p(lms283gf05_remove),
+};
+
+static __init int lms283gf05_init(void)
+{
+	return spi_register_driver(&lms283gf05_driver);
+}
+
+static __exit void lms283gf05_exit(void)
+{
+	spi_unregister_driver(&lms283gf05_driver);
+}
+
+module_init(lms283gf05_init);
+module_exit(lms283gf05_exit);
+
+MODULE_AUTHOR("Marek Vasut <marek.vasut@gmail.com>");
+MODULE_DESCRIPTION("LCD283GF05 LCD");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/spi/lms283gf05.h b/include/linux/spi/lms283gf05.h
new file mode 100644
index 000000000000..555d254e6606
--- /dev/null
+++ b/include/linux/spi/lms283gf05.h
@@ -0,0 +1,28 @@
+/*
+ * lms283gf05.h - Platform glue for Samsung LMS283GF05 LCD
+ *
+ * Copyright (C) 2009 Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#ifndef _INCLUDE_LINUX_SPI_LMS283GF05_H_
+#define _INCLUDE_LINUX_SPI_LMS283GF05_H_
+
+struct lms283gf05_pdata {
+	unsigned long	reset_gpio;
+	bool		reset_inverted;
+};
+
+#endif /* _INCLUDE_LINUX_SPI_LMS283GF05_H_ */
-- 
cgit v1.2.3


From a8fae3ec5f118dc92517dcbed3ecf69ddb641d0f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 7 Sep 2009 18:32:32 +0200
Subject: sched: enable SD_WAKE_IDLE

Now that SD_WAKE_IDLE doesn't make pipe-test suck anymore,
enable it by default for MC, CPU and NUMA domains.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 2 +-
 include/linux/topology.h        | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ef7bc7fc2528..26d06e052a18 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -152,7 +152,7 @@ extern unsigned long node_remap_size[];
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
+				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 66774fddec9b..85e8cf7d393c 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -129,7 +129,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_IDLE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
@@ -163,7 +163,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_WAKE_IDLE			\
 				| 0*SD_WAKE_AFFINE			\
 				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
@@ -198,7 +198,7 @@ int arch_update_cpu_topology(void);
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
+				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
-- 
cgit v1.2.3


From f1938cd6e900a85de64184e46d841efc9efd3484 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Tue, 8 Sep 2009 11:32:08 +1000
Subject: drm: include seq_file.h for debugfs builds.

Fixes a warning seen on powerpc.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_mm.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index bc5a87e8aeea..62329f9a42cb 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -37,6 +37,9 @@
  * Generic range manager structs
  */
 #include <linux/list.h>
+#ifdef CONFIG_DEBUG_FS
+#include <linux/seq_file.h>
+#endif
 
 struct drm_mm_node {
 	struct list_head fl_entry;
-- 
cgit v1.2.3


From 6e5265ec34d3b9578973841ddec8b925e986136a Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 8 Sep 2009 14:26:51 +0200
Subject: ALSA: Re-export snd_pcm_format_name() function

Re-export snd_pcm_format_name() function to be used outside the PCM core.
As a first example, usbaudio is changed to use it now again.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/pcm.h  |  2 ++
 sound/core/pcm.c     | 26 ++++++++++++++------------
 sound/usb/usbaudio.c |  4 ++--
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 23893523dc8c..4d5b2407514e 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -965,4 +965,6 @@ static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max)
 
 #define PCM_RUNTIME_CHECK(sub) snd_BUG_ON(!(sub) || !(sub)->runtime)
 
+const char *snd_pcm_format_name(snd_pcm_format_t format);
+
 #endif /* __SOUND_PCM_H */
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 145931a9ff30..0c1440121c22 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -162,18 +162,7 @@ static int snd_pcm_control_ioctl(struct snd_card *card,
 	return -ENOIOCTLCMD;
 }
 
-#ifdef CONFIG_SND_VERBOSE_PROCFS
-
-#define STATE(v) [SNDRV_PCM_STATE_##v] = #v
-#define STREAM(v) [SNDRV_PCM_STREAM_##v] = #v
-#define READY(v) [SNDRV_PCM_READY_##v] = #v
-#define XRUN(v) [SNDRV_PCM_XRUN_##v] = #v
-#define SILENCE(v) [SNDRV_PCM_SILENCE_##v] = #v
-#define TSTAMP(v) [SNDRV_PCM_TSTAMP_##v] = #v
-#define ACCESS(v) [SNDRV_PCM_ACCESS_##v] = #v
-#define START(v) [SNDRV_PCM_START_##v] = #v
 #define FORMAT(v) [SNDRV_PCM_FORMAT_##v] = #v
-#define SUBFORMAT(v) [SNDRV_PCM_SUBFORMAT_##v] = #v 
 
 static char *snd_pcm_format_names[] = {
 	FORMAT(S8),
@@ -216,10 +205,23 @@ static char *snd_pcm_format_names[] = {
 	FORMAT(U18_3BE),
 };
 
-static const char *snd_pcm_format_name(snd_pcm_format_t format)
+const char *snd_pcm_format_name(snd_pcm_format_t format)
 {
 	return snd_pcm_format_names[format];
 }
+EXPORT_SYMBOL_GPL(snd_pcm_format_name);
+
+#ifdef CONFIG_SND_VERBOSE_PROCFS
+
+#define STATE(v) [SNDRV_PCM_STATE_##v] = #v
+#define STREAM(v) [SNDRV_PCM_STREAM_##v] = #v
+#define READY(v) [SNDRV_PCM_READY_##v] = #v
+#define XRUN(v) [SNDRV_PCM_XRUN_##v] = #v
+#define SILENCE(v) [SNDRV_PCM_SILENCE_##v] = #v
+#define TSTAMP(v) [SNDRV_PCM_TSTAMP_##v] = #v
+#define ACCESS(v) [SNDRV_PCM_ACCESS_##v] = #v
+#define START(v) [SNDRV_PCM_START_##v] = #v
+#define SUBFORMAT(v) [SNDRV_PCM_SUBFORMAT_##v] = #v 
 
 static char *snd_pcm_stream_names[] = {
 	STREAM(PLAYBACK),
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 44b9cdc8a83b..3a53c79f48b8 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -2124,8 +2124,8 @@ static void proc_dump_substream_formats(struct snd_usb_substream *subs, struct s
 		fp = list_entry(p, struct audioformat, list);
 		snd_iprintf(buffer, "  Interface %d\n", fp->iface);
 		snd_iprintf(buffer, "    Altset %d\n", fp->altsetting);
-		snd_iprintf(buffer, "    Format: %#x (%d bits)\n",
-			    fp->format, snd_pcm_format_width(fp->format));
+		snd_iprintf(buffer, "    Format: %s\n",
+			    snd_pcm_format_name(fp->format));
 		snd_iprintf(buffer, "    Channels: %d\n", fp->channels);
 		snd_iprintf(buffer, "    Endpoint: %d %s (%s)\n",
 			    fp->endpoint & USB_ENDPOINT_NUMBER_MASK,
-- 
cgit v1.2.3


From 4f7454a9970fa0f3e9f1a68201520e3df1bb5224 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 8 Sep 2009 14:29:58 +0200
Subject: ALSA: Add const prefix to proc helper functions

Add appropriate const prefix to char * arguments in proc helper functions.
Also fixed the caller side to be proper const pointers.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/sound/info.h       | 4 ++--
 sound/core/info.c          | 4 ++--
 sound/core/oss/mixer_oss.c | 3 ++-
 sound/core/oss/pcm_oss.c   | 3 ++-
 4 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/sound/info.h b/include/sound/info.h
index 7c2ee1a21b00..112e8949e1a7 100644
--- a/include/sound/info.h
+++ b/include/sound/info.h
@@ -110,13 +110,13 @@ void snd_card_info_read_oss(struct snd_info_buffer *buffer);
 static inline void snd_card_info_read_oss(struct snd_info_buffer *buffer) {}
 #endif
 
-int snd_iprintf(struct snd_info_buffer *buffer, char *fmt, ...) \
+int snd_iprintf(struct snd_info_buffer *buffer, const char *fmt, ...) \
 				__attribute__ ((format (printf, 2, 3)));
 int snd_info_init(void);
 int snd_info_done(void);
 
 int snd_info_get_line(struct snd_info_buffer *buffer, char *line, int len);
-char *snd_info_get_str(char *dest, char *src, int len);
+const char *snd_info_get_str(char *dest, const char *src, int len);
 struct snd_info_entry *snd_info_create_module_entry(struct module *module,
 					       const char *name,
 					       struct snd_info_entry *parent);
diff --git a/sound/core/info.c b/sound/core/info.c
index 35df614f6c55..6eb53930ea1d 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -108,7 +108,7 @@ static int resize_info_buffer(struct snd_info_buffer *buffer,
  *
  * Returns the size of output string.
  */
-int snd_iprintf(struct snd_info_buffer *buffer, char *fmt,...)
+int snd_iprintf(struct snd_info_buffer *buffer, const char *fmt, ...)
 {
 	va_list args;
 	int len, res;
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(snd_info_get_line);
  * Returns the updated pointer of the original string so that
  * it can be used for the next call.
  */
-char *snd_info_get_str(char *dest, char *src, int len)
+const char *snd_info_get_str(char *dest, const char *src, int len)
 {
 	int c;
 
diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c
index 5dcd8a526970..772423889eb3 100644
--- a/sound/core/oss/mixer_oss.c
+++ b/sound/core/oss/mixer_oss.c
@@ -1154,7 +1154,8 @@ static void snd_mixer_oss_proc_write(struct snd_info_entry *entry,
 				     struct snd_info_buffer *buffer)
 {
 	struct snd_mixer_oss *mixer = entry->private_data;
-	char line[128], str[32], idxstr[16], *cptr;
+	char line[128], str[32], idxstr[16];
+	const char *cptr;
 	int ch, idx;
 	struct snd_mixer_oss_assign_table *tbl;
 	struct slot *slot;
diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c
index dbe406b82591..d8b2d76125b4 100644
--- a/sound/core/oss/pcm_oss.c
+++ b/sound/core/oss/pcm_oss.c
@@ -2836,7 +2836,8 @@ static void snd_pcm_oss_proc_write(struct snd_info_entry *entry,
 				   struct snd_info_buffer *buffer)
 {
 	struct snd_pcm_str *pstr = entry->private_data;
-	char line[128], str[32], task_name[32], *ptr;
+	char line[128], str[32], task_name[32];
+	const char *ptr;
 	int idx1;
 	struct snd_pcm_oss_setup *setup, *setup1, template;
 
-- 
cgit v1.2.3


From 5909ccaa300a4a834ffa275327af4df0b9cb5295 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 28 Aug 2009 11:51:25 -0700
Subject: Make 'check_acl()' a first-class filesystem op

This is stage one in flattening out the callchains for the common
permission testing.  Rather than have most filesystem implement their
own inode->i_op->permission function that just calls back down to the
VFS layers 'generic_permission()' with the per-filesystem ACL checking
function, the filesystem can just expose its 'check_acl' function
directly, and let the VFS layer do everything for it.

This is all just preparatory - no filesystem actually enables this yet.

Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/namei.c         | 62 ++++++++++++++++++++++++++++++------------------------
 include/linux/fs.h |  1 +
 2 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index e645e3070360..ed27bb205b7e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -169,19 +169,10 @@ void putname(const char *name)
 EXPORT_SYMBOL(putname);
 #endif
 
-
-/**
- * generic_permission  -  check for access rights on a Posix-like filesystem
- * @inode:	inode to check access rights for
- * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
- * @check_acl:	optional callback to check for Posix ACLs
- *
- * Used to check for read/write/execute permissions on a file.
- * We use "fsuid" for this, letting us set arbitrary permissions
- * for filesystem access without changing the "normal" uids which
- * are used for other things..
+/*
+ * This does basic POSIX ACL permission checking
  */
-int generic_permission(struct inode *inode, int mask,
+static int acl_permission_check(struct inode *inode, int mask,
 		int (*check_acl)(struct inode *inode, int mask))
 {
 	umode_t			mode = inode->i_mode;
@@ -193,9 +184,7 @@ int generic_permission(struct inode *inode, int mask,
 	else {
 		if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
 			int error = check_acl(inode, mask);
-			if (error == -EACCES)
-				goto check_capabilities;
-			else if (error != -EAGAIN)
+			if (error != -EAGAIN)
 				return error;
 		}
 
@@ -208,8 +197,32 @@ int generic_permission(struct inode *inode, int mask,
 	 */
 	if ((mask & ~mode) == 0)
 		return 0;
+	return -EACCES;
+}
+
+/**
+ * generic_permission  -  check for access rights on a Posix-like filesystem
+ * @inode:	inode to check access rights for
+ * @mask:	right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @check_acl:	optional callback to check for Posix ACLs
+ *
+ * Used to check for read/write/execute permissions on a file.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things..
+ */
+int generic_permission(struct inode *inode, int mask,
+		int (*check_acl)(struct inode *inode, int mask))
+{
+	int ret;
+
+	/*
+	 * Do the basic POSIX ACL permission checks.
+	 */
+	ret = acl_permission_check(inode, mask, check_acl);
+	if (ret != -EACCES)
+		return ret;
 
- check_capabilities:
 	/*
 	 * Read/write DACs are always overridable.
 	 * Executable DACs are overridable if at least one exec bit is set.
@@ -262,7 +275,7 @@ int inode_permission(struct inode *inode, int mask)
 	if (inode->i_op->permission)
 		retval = inode->i_op->permission(inode, mask);
 	else
-		retval = generic_permission(inode, mask, NULL);
+		retval = generic_permission(inode, mask, inode->i_op->check_acl);
 
 	if (retval)
 		return retval;
@@ -432,27 +445,22 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
  */
 static int exec_permission_lite(struct inode *inode)
 {
-	umode_t	mode = inode->i_mode;
+	int ret;
 
 	if (inode->i_op->permission) {
-		int ret = inode->i_op->permission(inode, MAY_EXEC);
+		ret = inode->i_op->permission(inode, MAY_EXEC);
 		if (!ret)
 			goto ok;
 		return ret;
 	}
-
-	if (current_fsuid() == inode->i_uid)
-		mode >>= 6;
-	else if (in_group_p(inode->i_gid))
-		mode >>= 3;
-
-	if (mode & MAY_EXEC)
+	ret = acl_permission_check(inode, MAY_EXEC, inode->i_op->check_acl);
+	if (!ret)
 		goto ok;
 
 	if (capable(CAP_DAC_OVERRIDE) || capable(CAP_DAC_READ_SEARCH))
 		goto ok;
 
-	return -EACCES;
+	return ret;
 ok:
 	return security_inode_permission(inode, MAY_EXEC);
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 73e9b643e455..c1f993515f51 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1528,6 +1528,7 @@ struct inode_operations {
 	void (*put_link) (struct dentry *, struct nameidata *, void *);
 	void (*truncate) (struct inode *);
 	int (*permission) (struct inode *, int);
+	int (*check_acl)(struct inode *, int);
 	int (*setattr) (struct dentry *, struct iattr *);
 	int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
 	int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
-- 
cgit v1.2.3


From 6d848a488ad83cc3891bb274691118f45ce6aab9 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 28 Aug 2009 12:04:28 -0700
Subject: shmfs: use 'check_acl' instead of 'permission'

shmfs wants purely standard POSIX ACL semantics, so we can use the new
generic VFS layer POSIX ACL checking rather than cooking our own
'permission()' function.

Reviewed-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/shmem_fs.h |  2 +-
 mm/shmem.c               |  6 +++---
 mm/shmem_acl.c           | 11 +----------
 3 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index abff6c9b413c..6d3f2f449ead 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -39,7 +39,7 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 }
 
 #ifdef CONFIG_TMPFS_POSIX_ACL
-int shmem_permission(struct inode *, int);
+int shmem_check_acl(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
 
 extern struct xattr_handler shmem_xattr_acl_access_handler;
diff --git a/mm/shmem.c b/mm/shmem.c
index d713239ce2ce..5a0b3d4055f3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2446,7 +2446,7 @@ static const struct inode_operations shmem_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 
 };
@@ -2469,7 +2469,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 };
 
@@ -2480,7 +2480,7 @@ static const struct inode_operations shmem_special_inode_operations = {
 	.getxattr	= generic_getxattr,
 	.listxattr	= generic_listxattr,
 	.removexattr	= generic_removexattr,
-	.permission	= shmem_permission,
+	.check_acl	= shmem_check_acl,
 #endif
 };
 
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
index 606a8e757a42..df2c87fdae50 100644
--- a/mm/shmem_acl.c
+++ b/mm/shmem_acl.c
@@ -157,7 +157,7 @@ shmem_acl_init(struct inode *inode, struct inode *dir)
 /**
  * shmem_check_acl  -  check_acl() callback for generic_permission()
  */
-static int
+int
 shmem_check_acl(struct inode *inode, int mask)
 {
 	struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
@@ -169,12 +169,3 @@ shmem_check_acl(struct inode *inode, int mask)
 	}
 	return -EAGAIN;
 }
-
-/**
- * shmem_permission  -  permission() inode operation
- */
-int
-shmem_permission(struct inode *inode, int mask)
-{
-	return generic_permission(inode, mask, shmem_check_acl);
-}
-- 
cgit v1.2.3


From 764302ccb88dd0df062eccd507b6c6de24f1c560 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 8 Sep 2009 19:50:03 -0400
Subject: NFS: Allow the "nfs" file system type to support NFSv4

When mounting an "nfs" type file system, recognize "v4," "vers=4," or
"nfsvers=4" mount options, and convert the file system to "nfs4" under
the covers.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
[trondmy: fixed up binary mount code so it sets the 'version' field too]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h    |  1 +
 fs/nfs/super.c       | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/nfs4.h |  1 +
 3 files changed, 48 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 5e686a4d744e..e21b1bb9972f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -68,6 +68,7 @@ struct nfs_parsed_mount_data {
 	unsigned int		auth_flavor_len;
 	rpc_authflavor_t	auth_flavors[1];
 	char			*client_address;
+	unsigned int		version;
 	unsigned int		minorversion;
 	char			*fscache_uniq;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c105e12197c2..34b1ccf51adf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -73,7 +73,7 @@ enum {
 	Opt_cto, Opt_nocto,
 	Opt_ac, Opt_noac,
 	Opt_lock, Opt_nolock,
-	Opt_v2, Opt_v3,
+	Opt_v2, Opt_v3, Opt_v4,
 	Opt_udp, Opt_tcp, Opt_rdma,
 	Opt_acl, Opt_noacl,
 	Opt_rdirplus, Opt_nordirplus,
@@ -127,6 +127,7 @@ static const match_table_t nfs_mount_option_tokens = {
 	{ Opt_nolock, "nolock" },
 	{ Opt_v2, "v2" },
 	{ Opt_v3, "v3" },
+	{ Opt_v4, "v4" },
 	{ Opt_udp, "udp" },
 	{ Opt_tcp, "tcp" },
 	{ Opt_rdma, "rdma" },
@@ -934,10 +935,18 @@ static int nfs_parse_mount_options(char *raw,
 			break;
 		case Opt_v2:
 			mnt->flags &= ~NFS_MOUNT_VER3;
+			mnt->version = 2;
 			break;
 		case Opt_v3:
 			mnt->flags |= NFS_MOUNT_VER3;
+			mnt->version = 3;
 			break;
+#ifdef CONFIG_NFS_V4
+		case Opt_v4:
+			mnt->flags &= ~NFS_MOUNT_VER3;
+			mnt->version = 4;
+			break;
+#endif
 		case Opt_udp:
 			mnt->flags &= ~NFS_MOUNT_TCP;
 			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1151,10 +1160,18 @@ static int nfs_parse_mount_options(char *raw,
 			switch (option) {
 			case NFS2_VERSION:
 				mnt->flags &= ~NFS_MOUNT_VER3;
+				mnt->version = 2;
 				break;
 			case NFS3_VERSION:
 				mnt->flags |= NFS_MOUNT_VER3;
+				mnt->version = 3;
+				break;
+#ifdef CONFIG_NFS_V4
+			case NFS4_VERSION:
+				mnt->flags &= ~NFS_MOUNT_VER3;
+				mnt->version = 4;
 				break;
+#endif
 			default:
 				goto out_invalid_value;
 			}
@@ -1629,6 +1646,7 @@ static int nfs_validate_mount_data(void *options,
 	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 	args->auth_flavors[0]	= RPC_AUTH_UNIX;
 	args->auth_flavor_len	= 1;
+	args->minorversion	= 0;
 
 	switch (data->version) {
 	case 1:
@@ -1650,8 +1668,11 @@ static int nfs_validate_mount_data(void *options,
 			if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
 				goto out_invalid_fh;
 			mntfh->size = data->root.size;
-		} else
+			args->version = 3;
+		} else {
 			mntfh->size = NFS2_FHSIZE;
+			args->version = 2;
+		}
 
 
 		memcpy(mntfh->data, data->root.data, mntfh->size);
@@ -1726,6 +1747,14 @@ static int nfs_validate_mount_data(void *options,
 		if (!nfs_verify_server_address(sap))
 			goto out_no_address;
 
+		if (args->version == 4)
+#ifdef CONFIG_NFS_V4
+			return nfs4_validate_text_mount_data(options,
+							     args, dev_name);
+#else
+			goto out_v4_not_compiled;
+#endif
+
 		nfs_set_default_port(sap, args->nfs_server.port, 0);
 
 		nfs_set_mount_transport_protocol(args);
@@ -1774,6 +1803,12 @@ out_v3_not_compiled:
 	return -EPROTONOSUPPORT;
 #endif /* !CONFIG_NFS_V3 */
 
+#ifndef CONFIG_NFS_V4
+out_v4_not_compiled:
+	dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
+	return -EPROTONOSUPPORT;
+#endif /* !CONFIG_NFS_V4 */
+
 out_nomem:
 	dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
 	return -ENOMEM;
@@ -2069,6 +2104,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	if (error < 0)
 		goto out;
 
+#ifdef CONFIG_NFS_V4
+	if (data->version == 4) {
+		error = nfs4_try_mount(flags, dev_name, data, mnt);
+		kfree(data->client_address);
+		goto out;
+	}
+#endif	/* CONFIG_NFS_V4 */
+
 	/* Get a volume representation */
 	server = nfs_create_server(data, mntfh);
 	if (IS_ERR(server)) {
@@ -2320,6 +2363,7 @@ static int nfs4_validate_mount_data(void *options,
 	args->nfs_server.port	= NFS_UNSPEC_PORT;
 	args->auth_flavors[0]	= RPC_AUTH_UNIX;
 	args->auth_flavor_len	= 1;
+	args->version		= 4;
 	args->minorversion	= 0;
 
 	switch (data->version) {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index bd2eba530667..33b283601f62 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -472,6 +472,7 @@ enum lock_type4 {
 
 #define NFSPROC4_NULL 0
 #define NFSPROC4_COMPOUND 1
+#define NFS4_VERSION 4
 #define NFS4_MINOR_VERSION 0
 
 #if defined(CONFIG_NFS_V4_1)
-- 
cgit v1.2.3


From 0403e3827788d878163f9ef0541b748b0f88ca5d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:50 -0700
Subject: dmaengine: add fence support

Some engines optimize operation by reading ahead in the descriptor chain
such that descriptor2 may start execution before descriptor1 completes.
If descriptor2 depends on the result from descriptor1 then a fence is
required (on descriptor2) to disable this optimization.  The async_tx
api could implicitly identify dependencies via the 'depend_tx'
parameter, but that would constrain cases where the dependency chain
only specifies a completion order rather than a data dependency.  So,
provide an ASYNC_TX_FENCE to explicitly identify data dependencies.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c      |  7 ++++--
 crypto/async_tx/async_memset.c      |  7 ++++--
 crypto/async_tx/async_pq.c          |  5 ++++
 crypto/async_tx/async_raid6_recov.c | 47 +++++++++++++++++++++----------------
 crypto/async_tx/async_xor.c         | 11 ++++++---
 drivers/md/raid5.c                  | 37 ++++++++++++++++++-----------
 include/linux/async_tx.h            |  3 +++
 include/linux/dmaengine.h           |  3 +++
 8 files changed, 79 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd0dcb5..b38cbb3fd527 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e5f673..a374784e3329 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 
 	if (device) {
 		dma_addr_t dma_dest;
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21efb499..a25e290c39fb 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 		 */
 		if (src_cnt > pq_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
 			if (cb_fn_orig)
 				dma_flags |= DMA_PREP_INTERRUPT;
 		}
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 			dma_flags |= DMA_PREP_PQ_DISABLE_P;
 		if (!Q(blocks, disks))
 			dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < disks; i++)
 			if (likely(blocks[i])) {
 				BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48c9896..822a42d10061 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
 		dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
 		struct dma_async_tx_descriptor *tx;
 		enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
 
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
 		dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
 		tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = q;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(b, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	dp = blocks[faila];
 	dq = blocks[failb];
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_memcpy(dp, g, 0, 0, bytes, submit);
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	blocks[failb] = (void *)raid6_empty_zero_page;
 	blocks[disks-1] = dq;
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 
 	/* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	/* compute P + Pxy */
 	srcs[0] = dp;
 	srcs[1] = p;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dp, srcs, 0, 2, bytes, submit);
 
 	/* compute Q + Qxy */
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
 	/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
 	srcs[1] = dq;
 	coef[0] = raid6_gfexi[failb-faila];
 	coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_sum_product(dq, srcs, coef, bytes, submit);
 
 	/* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 		int good = faila == 0 ? 1 : 0;
 		struct page *g = blocks[good];
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_memcpy(p, g, 0, 0, bytes, submit);
 
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
 	} else {
-		init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+		init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+				  scribble);
 		tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
 	}
 
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
 
 	srcs[0] = dq;
 	srcs[1] = q;
-	init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
-			  scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+			  NULL, NULL, scribble);
 	tx = async_xor(dq, srcs, 0, 2, bytes, submit);
 
-	init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+	init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
 	tx = async_mult(dq, dq, coef, bytes, submit);
 
 	srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98da463..db279872ef3d 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		 */
 		if (src_cnt > xor_src_cnt) {
 			submit->flags &= ~ASYNC_TX_ACK;
+			submit->flags |= ASYNC_TX_FENCE;
 			dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
 			submit->cb_fn = NULL;
 			submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
 		}
 		if (submit->cb_fn)
 			dma_flags |= DMA_PREP_INTERRUPT;
-
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_flags |= DMA_PREP_FENCE;
 		/* Since we have clobbered the src_list we are committed
 		 * to doing this asynchronously.  Drivers force forward progress
 		 * in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 		dma_src = (dma_addr_t *) src_list;
 
 	if (dma_src && device && src_cnt <= device->max_xor) {
-		unsigned long dma_prep_flags;
+		unsigned long dma_prep_flags = 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __func__, len);
 
-		dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+		if (submit->cb_fn)
+			dma_prep_flags |= DMA_PREP_INTERRUPT;
+		if (submit->flags & ASYNC_TX_FENCE)
+			dma_prep_flags |= DMA_PREP_FENCE;
 		for (i = 0; i < src_cnt; i++)
 			dma_src[i] = dma_map_page(device->dev, src_list[i],
 						  offset, len, DMA_TO_DEVICE);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0a5cf2171214..54ef8d75541d 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -502,13 +502,17 @@ async_copy_data(int frombio, struct bio *bio, struct page *page,
 	int i;
 	int page_offset;
 	struct async_submit_ctl submit;
+	enum async_tx_flags flags = 0;
 
 	if (bio->bi_sector >= sector)
 		page_offset = (signed)(bio->bi_sector - sector) * 512;
 	else
 		page_offset = (signed)(sector - bio->bi_sector) * -512;
 
-	init_async_submit(&submit, 0, tx, NULL, NULL, NULL);
+	if (frombio)
+		flags |= ASYNC_TX_FENCE;
+	init_async_submit(&submit, flags, tx, NULL, NULL, NULL);
+
 	bio_for_each_segment(bvl, bio, i) {
 		int len = bio_iovec_idx(bio, i)->bv_len;
 		int clen;
@@ -685,7 +689,7 @@ ops_run_compute5(struct stripe_head *sh, struct raid5_percpu *percpu)
 
 	atomic_inc(&sh->count);
 
-	init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST, NULL,
 			  ops_complete_compute, sh, to_addr_conv(sh, percpu));
 	if (unlikely(count == 1))
 		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, &submit);
@@ -763,7 +767,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 		count = set_syndrome_sources(blocks, sh);
 		blocks[count] = NULL; /* regenerating p is not necessary */
 		BUG_ON(blocks[count+1] != dest); /* q should already be set */
-		init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+				  ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_gen_syndrome(blocks, 0, count+2, STRIPE_SIZE, &submit);
 	} else {
@@ -775,8 +780,8 @@ ops_run_compute6_1(struct stripe_head *sh, struct raid5_percpu *percpu)
 			blocks[count++] = sh->dev[i].page;
 		}
 
-		init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-				  ops_complete_compute, sh,
+		init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+				  NULL, ops_complete_compute, sh,
 				  to_addr_conv(sh, percpu));
 		tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE, &submit);
 	}
@@ -837,8 +842,9 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 		/* Q disk is one of the missing disks */
 		if (faila == syndrome_disks) {
 			/* Missing P+Q, just recompute */
-			init_async_submit(&submit, 0, NULL, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, NULL,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		} else {
@@ -859,21 +865,24 @@ ops_run_compute6_2(struct stripe_head *sh, struct raid5_percpu *percpu)
 				blocks[count++] = sh->dev[i].page;
 			}
 			dest = sh->dev[data_target].page;
-			init_async_submit(&submit, ASYNC_TX_XOR_ZERO_DST, NULL,
-					  NULL, NULL, to_addr_conv(sh, percpu));
+			init_async_submit(&submit,
+					  ASYNC_TX_FENCE|ASYNC_TX_XOR_ZERO_DST,
+					  NULL, NULL, NULL,
+					  to_addr_conv(sh, percpu));
 			tx = async_xor(dest, blocks, 0, count, STRIPE_SIZE,
 				       &submit);
 
 			count = set_syndrome_sources(blocks, sh);
-			init_async_submit(&submit, 0, tx, ops_complete_compute,
-					  sh, to_addr_conv(sh, percpu));
+			init_async_submit(&submit, ASYNC_TX_FENCE, tx,
+					  ops_complete_compute, sh,
+					  to_addr_conv(sh, percpu));
 			return async_gen_syndrome(blocks, 0, count+2,
 						  STRIPE_SIZE, &submit);
 		}
 	}
 
-	init_async_submit(&submit, 0, NULL, ops_complete_compute, sh,
-			  to_addr_conv(sh, percpu));
+	init_async_submit(&submit, ASYNC_TX_FENCE, NULL, ops_complete_compute,
+			  sh, to_addr_conv(sh, percpu));
 	if (failb == syndrome_disks) {
 		/* We're missing D+P. */
 		return async_raid6_datap_recov(syndrome_disks+2, STRIPE_SIZE,
@@ -916,7 +925,7 @@ ops_run_prexor(struct stripe_head *sh, struct raid5_percpu *percpu,
 			xor_srcs[count++] = dev->page;
 	}
 
-	init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, tx,
+	init_async_submit(&submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
 			  ops_complete_prexor, sh, to_addr_conv(sh, percpu));
 	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, &submit);
 
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 866e61c4e2e0..a1c486a88e88 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -58,11 +58,14 @@ struct dma_chan_ref {
  * array.
  * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
  * dependency chain
+ * @ASYNC_TX_FENCE: specify that the next operation in the dependency
+ * chain uses this operation's result as an input
  */
 enum async_tx_flags {
 	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
 	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
 	ASYNC_TX_ACK		 = (1 << 2),
+	ASYNC_TX_FENCE		 = (1 << 3),
 };
 
 /**
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1012f1abcb54..4d6c1c925fd4 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -87,6 +87,8 @@ enum dma_transaction_type {
  * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as
  *  sources that were the result of a previous operation, in the case of a PQ
  *  operation it continues the calculation with new sources
+ * @DMA_PREP_FENCE - tell the driver that subsequent operations depend
+ *  on the result of this operation
  */
 enum dma_ctrl_flags {
 	DMA_PREP_INTERRUPT = (1 << 0),
@@ -98,6 +100,7 @@ enum dma_ctrl_flags {
 	DMA_PREP_PQ_DISABLE_P = (1 << 6),
 	DMA_PREP_PQ_DISABLE_Q = (1 << 7),
 	DMA_PREP_CONTINUE = (1 << 8),
+	DMA_PREP_FENCE = (1 << 9),
 };
 
 /**
-- 
cgit v1.2.3


From 138f4c359d23d2ec38d18bd70dd9613ae515fe93 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:51 -0700
Subject: dmaengine, async_tx: add a "no channel switch" allocator

Channel switching is problematic for some dmaengine drivers as the
architecture precludes separating the ->prep from ->submit.  In these
cases the driver can select ASYNC_TX_DISABLE_CHANNEL_SWITCH to modify
the async_tx allocator to only return channels that support all of the
required asynchronous operations.

For example MD_RAID456=y selects support for asynchronous xor, xor
validate, pq, pq validate, and memcpy.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=y any channel with all these
capabilities is marked DMA_ASYNC_TX allowing async_tx_find_channel() to
quickly locate compatible channels with the guarantee that dependency
chains will remain on one channel.  When
ASYNC_TX_DISABLE_CHANNEL_SWITCH=n async_tx_find_channel() may select
channels that lead to operation chains that need to cross channel
boundaries using the async_tx channel switch capability.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_tx.c |  4 ++++
 drivers/dma/Kconfig        |  4 ++++
 drivers/dma/dmaengine.c    | 40 ++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h  | 10 +++++++++-
 4 files changed, 57 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index 60615fedcf5e..f9cdf04fe7c0 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -81,6 +81,10 @@ async_tx_channel_switch(struct dma_async_tx_descriptor *depend_tx,
 	struct dma_device *device = chan->device;
 	struct dma_async_tx_descriptor *intr_tx = (void *) ~0;
 
+	#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	BUG();
+	#endif
+
 	/* first check to see if we can still append to depend_tx */
 	spin_lock_bh(&depend_tx->lock);
 	if (depend_tx->parent && depend_tx->chan == tx->chan) {
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 912a51b5cbd3..ddcd9793b25c 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -17,11 +17,15 @@ if DMADEVICES
 
 comment "DMA Devices"
 
+config ASYNC_TX_DISABLE_CHANNEL_SWITCH
+	bool
+
 config INTEL_IOATDMA
 	tristate "Intel I/OAT DMA support"
 	depends on PCI && X86
 	select DMA_ENGINE
 	select DCA
+	select ASYNC_TX_DISABLE_CHANNEL_SWITCH
 	help
 	  Enable support for the Intel(R) I/OAT DMA engine present
 	  in recent Intel Xeon chipsets.
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 96598479eece..d5bc628d207c 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -608,6 +608,40 @@ void dmaengine_put(void)
 }
 EXPORT_SYMBOL(dmaengine_put);
 
+static bool device_has_all_tx_types(struct dma_device *device)
+{
+	/* A device that satisfies this test has channels that will never cause
+	 * an async_tx channel switch event as all possible operation types can
+	 * be handled.
+	 */
+	#ifdef CONFIG_ASYNC_TX_DMA
+	if (!dma_has_cap(DMA_INTERRUPT, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMCPY) || defined(CONFIG_ASYNC_MEMCPY_MODULE)
+	if (!dma_has_cap(DMA_MEMCPY, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_MEMSET) || defined(CONFIG_ASYNC_MEMSET_MODULE)
+	if (!dma_has_cap(DMA_MEMSET, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_XOR) || defined(CONFIG_ASYNC_XOR_MODULE)
+	if (!dma_has_cap(DMA_XOR, device->cap_mask))
+		return false;
+	#endif
+
+	#if defined(CONFIG_ASYNC_PQ) || defined(CONFIG_ASYNC_PQ_MODULE)
+	if (!dma_has_cap(DMA_PQ, device->cap_mask))
+		return false;
+	#endif
+
+	return true;
+}
+
 static int get_dma_id(struct dma_device *device)
 {
 	int rc;
@@ -665,6 +699,12 @@ int dma_async_device_register(struct dma_device *device)
 	BUG_ON(!device->device_issue_pending);
 	BUG_ON(!device->dev);
 
+	/* note: this only matters in the
+	 * CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH=y case
+	 */
+	if (device_has_all_tx_types(device))
+		dma_cap_set(DMA_ASYNC_TX, device->cap_mask);
+
 	idr_ref = kmalloc(sizeof(*idr_ref), GFP_KERNEL);
 	if (!idr_ref)
 		return -ENOMEM;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 4d6c1c925fd4..86853ed7970b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -48,6 +48,9 @@ enum dma_status {
 
 /**
  * enum dma_transaction_type - DMA transaction types/indexes
+ *
+ * Note: The DMA_ASYNC_TX capability is not to be set by drivers.  It is
+ * automatically set as dma devices are registered.
  */
 enum dma_transaction_type {
 	DMA_MEMCPY,
@@ -61,6 +64,7 @@ enum dma_transaction_type {
 	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
+	DMA_ASYNC_TX,
 	DMA_SLAVE,
 };
 
@@ -396,7 +400,11 @@ static inline void net_dmaengine_put(void)
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
+#ifdef CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH
+#define async_dma_find_channel(type) dma_find_channel(DMA_ASYNC_TX)
+#else
 #define async_dma_find_channel(type) dma_find_channel(type)
+#endif /* CONFIG_ASYNC_TX_DISABLE_CHANNEL_SWITCH */
 #else
 static inline void async_dmaengine_get(void)
 {
@@ -409,7 +417,7 @@ async_dma_find_channel(enum dma_transaction_type type)
 {
 	return NULL;
 }
-#endif
+#endif /* CONFIG_ASYNC_TX_DMA */
 
 dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
 	void *dest, void *src, size_t len);
-- 
cgit v1.2.3


From 9308add6ea4fedeba37b0d7c4630a542bd34f214 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:52 -0700
Subject: dmaengine: cleanup unused transaction types

No drivers currently implement these operation types, so they can be
deleted.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/arm/mach-iop13xx/setup.c | 7 -------
 arch/arm/plat-iop/adma.c      | 2 --
 drivers/dma/iop-adma.c        | 5 +----
 include/linux/dmaengine.h     | 3 ---
 4 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c
index faaef95342b6..5c147fb66a01 100644
--- a/arch/arm/mach-iop13xx/setup.c
+++ b/arch/arm/mach-iop13xx/setup.c
@@ -477,10 +477,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_0_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_1:
@@ -489,10 +487,8 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_1_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			break;
 		case IOP13XX_INIT_ADMA_2:
@@ -501,13 +497,10 @@ void __init iop13xx_platform_init(void)
 			plat_data = &iop13xx_adma_2_data;
 			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR, plat_data->cap_mask);
-			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
 			dma_cap_set(DMA_XOR_VAL, plat_data->cap_mask);
 			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
-			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
 			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ, plat_data->cap_mask);
-			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
 			dma_cap_set(DMA_PQ_VAL, plat_data->cap_mask);
 			break;
 		}
diff --git a/arch/arm/plat-iop/adma.c b/arch/arm/plat-iop/adma.c
index da1dd0dab07c..1ff6a37e893c 100644
--- a/arch/arm/plat-iop/adma.c
+++ b/arch/arm/plat-iop/adma.c
@@ -179,7 +179,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
 	#endif
 
@@ -188,7 +187,6 @@ static int __init iop3xx_adma_cap_init(void)
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#else
 	dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
-	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
 	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
 	#endif
 
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index 4496bc606662..cecb6d657d55 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -1256,15 +1256,12 @@ static int __devinit iop_adma_probe(struct platform_device *pdev)
 	}
 
 	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
-	  "( %s%s%s%s%s%s%s%s%s%s)\n",
+	  "( %s%s%s%s%s%s%s)\n",
 	  dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
-	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
 	  dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
 	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-	  dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
 	  dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
 	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
-	  dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
 	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 86853ed7970b..db23fd583f98 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -56,12 +56,9 @@ enum dma_transaction_type {
 	DMA_MEMCPY,
 	DMA_XOR,
 	DMA_PQ,
-	DMA_DUAL_XOR,
-	DMA_PQ_UPDATE,
 	DMA_XOR_VAL,
 	DMA_PQ_VAL,
 	DMA_MEMSET,
-	DMA_MEMCPY_CRC32C,
 	DMA_INTERRUPT,
 	DMA_PRIVATE,
 	DMA_ASYNC_TX,
-- 
cgit v1.2.3


From 83544ae9f3991bfc7d5e0fe9a3008cd05a8d57b7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:42:53 -0700
Subject: dmaengine, async_tx: support alignment checks

Some engines have transfer size and address alignment restrictions.  Add
a per-operation alignment property to struct dma_device that the async
routines and dmatest can use to check alignment capabilities.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 crypto/async_tx/async_memcpy.c |  2 +-
 crypto/async_tx/async_memset.c |  2 +-
 crypto/async_tx/async_pq.c     |  6 ++++--
 crypto/async_tx/async_xor.c    |  5 +++--
 drivers/dma/dmatest.c          | 14 ++++++++++++++
 include/linux/dmaengine.h      | 44 ++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 67 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index b38cbb3fd527..0ec1fb69d4ea 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -50,7 +50,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) {
 		dma_addr_t dma_dest, dma_src;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index a374784e3329..58e4a8752aee 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -47,7 +47,7 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
 	struct dma_device *device = chan ? chan->device : NULL;
 	struct dma_async_tx_descriptor *tx = NULL;
 
-	if (device) {
+	if (device && is_dma_fill_aligned(device, offset, 0, len)) {
 		dma_addr_t dma_dest;
 		unsigned long dma_prep_flags = 0;
 
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index a25e290c39fb..b88db6d1dc65 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -211,7 +211,8 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks,
 
 	if (dma_src && device &&
 	    (src_cnt <= dma_maxpq(device, 0) ||
-	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0)) {
+	     dma_maxpq(device, DMA_PREP_CONTINUE) > 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		/* run the p+q asynchronously */
 		pr_debug("%s: (async) disks: %d len: %zu\n",
 			 __func__, disks, len);
@@ -274,7 +275,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) blocks;
 
-	if (dma_src && device && disks <= dma_maxpq(device, 0)) {
+	if (dma_src && device && disks <= dma_maxpq(device, 0) &&
+	    is_dma_pq_aligned(device, offset, 0, len)) {
 		struct device *dev = device->dev;
 		dma_addr_t *pq = &dma_src[disks-2];
 		int i;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index db279872ef3d..b459a9034aac 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -193,7 +193,7 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && chan) {
+	if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) {
 		/* run the xor asynchronously */
 		pr_debug("%s (async): len: %zu\n", __func__, len);
 
@@ -265,7 +265,8 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
 	else if (sizeof(dma_addr_t) <= sizeof(struct page *))
 		dma_src = (dma_addr_t *) src_list;
 
-	if (dma_src && device && src_cnt <= device->max_xor) {
+	if (dma_src && device && src_cnt <= device->max_xor &&
+	    is_dma_xor_aligned(device, offset, 0, len)) {
 		unsigned long dma_prep_flags = 0;
 		int i;
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 58e49e41c7a3..a3722a7384b5 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -288,6 +288,7 @@ static int dmatest_func(void *data)
 		dma_addr_t dma_dsts[dst_cnt];
 		struct completion cmp;
 		unsigned long tmo = msecs_to_jiffies(3000);
+		u8 align = 0;
 
 		total_tests++;
 
@@ -295,6 +296,18 @@ static int dmatest_func(void *data)
 		src_off = dmatest_random() % (test_buf_size - len + 1);
 		dst_off = dmatest_random() % (test_buf_size - len + 1);
 
+		/* honor alignment restrictions */
+		if (thread->type == DMA_MEMCPY)
+			align = dev->copy_align;
+		else if (thread->type == DMA_XOR)
+			align = dev->xor_align;
+		else if (thread->type == DMA_PQ)
+			align = dev->pq_align;
+
+		len = (len >> align) << align;
+		src_off = (src_off >> align) << align;
+		dst_off = (dst_off >> align) << align;
+
 		dmatest_init_srcs(thread->srcs, src_off, len);
 		dmatest_init_dsts(thread->dsts, dst_off, len);
 
@@ -311,6 +324,7 @@ static int dmatest_func(void *data)
 						     DMA_BIDIRECTIONAL);
 		}
 
+
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
 							 dma_dsts[0] + dst_off,
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index db23fd583f98..835b9c7bf1c2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -245,6 +245,10 @@ struct dma_async_tx_descriptor {
  * @cap_mask: one or more dma_capability flags
  * @max_xor: maximum number of xor sources, 0 if no capability
  * @max_pq: maximum number of PQ sources and PQ-continue capability
+ * @copy_align: alignment shift for memcpy operations
+ * @xor_align: alignment shift for xor operations
+ * @pq_align: alignment shift for pq operations
+ * @fill_align: alignment shift for memset operations
  * @dev_id: unique device ID
  * @dev: struct device reference for dma mapping api
  * @device_alloc_chan_resources: allocate resources and return the
@@ -271,6 +275,10 @@ struct dma_device {
 	dma_cap_mask_t  cap_mask;
 	unsigned short max_xor;
 	unsigned short max_pq;
+	u8 copy_align;
+	u8 xor_align;
+	u8 pq_align;
+	u8 fill_align;
 	#define DMA_HAS_PQ_CONTINUE (1 << 15)
 
 	int dev_id;
@@ -314,6 +322,42 @@ struct dma_device {
 	void (*device_issue_pending)(struct dma_chan *chan);
 };
 
+static inline bool dmaengine_check_align(u8 align, size_t off1, size_t off2, size_t len)
+{
+	size_t mask;
+
+	if (!align)
+		return true;
+	mask = (1 << align) - 1;
+	if (mask & (off1 | off2 | len))
+		return false;
+	return true;
+}
+
+static inline bool is_dma_copy_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->copy_align, off1, off2, len);
+}
+
+static inline bool is_dma_xor_aligned(struct dma_device *dev, size_t off1,
+				      size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->xor_align, off1, off2, len);
+}
+
+static inline bool is_dma_pq_aligned(struct dma_device *dev, size_t off1,
+				     size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->pq_align, off1, off2, len);
+}
+
+static inline bool is_dma_fill_aligned(struct dma_device *dev, size_t off1,
+				       size_t off2, size_t len)
+{
+	return dmaengine_check_align(dev->fill_align, off1, off2, len);
+}
+
 static inline void
 dma_set_maxpq(struct dma_device *dma, int maxpq, int has_pq_continue)
 {
-- 
cgit v1.2.3


From b265b11fc1a0bd6ae5a7fde12e374583a52ab326 Mon Sep 17 00:00:00 2001
From: Tom Picard <tom.s.picard@intel.com>
Date: Tue, 8 Sep 2009 17:43:01 -0700
Subject: ioat3: ioat3.2 pci ids for Jasper Forest

Jasper Forest introduces raid offload support via ioat3.2 support.  When
raid offload is enabled two (out of 8 channels) will report raid5/raid6
offload capabilities.  The remaining channels will only report ioat3.0
capabilities (memcpy).

Signed-off-by: Tom Picard <tom.s.picard@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/ioat/pci.c  | 13 +++++++++++++
 include/linux/pci_ids.h | 10 ++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 626a508a4f8b..6c1aac5b3598 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -58,6 +58,19 @@ static struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
 	{ 0, }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 0f71812d67d3..2b4b8ce53256 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2529,6 +2529,16 @@
 #define PCI_DEVICE_ID_INTEL_E7525_MCH	0x359e
 #define PCI_DEVICE_ID_INTEL_IOAT_CNB	0x360b
 #define PCI_DEVICE_ID_INTEL_FBD_CNB	0x360c
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF0	0x3710
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF1	0x3711
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF2	0x3712
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF3	0x3713
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF4	0x3714
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF5	0x3715
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF6	0x3716
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF7	0x3717
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF8	0x3718
+#define PCI_DEVICE_ID_INTEL_IOAT_JSF9	0x3719
 #define PCI_DEVICE_ID_INTEL_ICH10_0	0x3a14
 #define PCI_DEVICE_ID_INTEL_ICH10_1	0x3a16
 #define PCI_DEVICE_ID_INTEL_ICH10_2	0x3a18
-- 
cgit v1.2.3


From 0803172778901e24a75ab074798d98c2b7411559 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 8 Sep 2009 17:53:04 -0700
Subject: dmaengine: kill tx_list

The tx_list attribute of struct dma_async_tx_descriptor is common to
most, but not all dma driver implementations.  None of the upper level
code (dmaengine/async_tx) uses it, so allow drivers to implement it
locally if they need it.  This saves sizeof(struct list_head) bytes for
drivers that do not manage descriptors with a linked list (e.g.: ioatdma
v2,3).

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dma/dmaengine.c   | 1 -
 include/linux/dmaengine.h | 3 ---
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 5a87384ea4ff..562d182eae66 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -933,7 +933,6 @@ void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 {
 	tx->chan = chan;
 	spin_lock_init(&tx->lock);
-	INIT_LIST_HEAD(&tx->tx_list);
 }
 EXPORT_SYMBOL(dma_async_tx_descriptor_init);
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index ffefba81c818..f114bc7790bc 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -180,8 +180,6 @@ typedef void (*dma_async_tx_callback)(void *dma_async_param);
  * @flags: flags to augment operation preparation, control completion, and
  * 	communicate status
  * @phys: physical address of the descriptor
- * @tx_list: driver common field for operations that require multiple
- *	descriptors
  * @chan: target channel for this operation
  * @tx_submit: set the prepared descriptor(s) to be executed by the engine
  * @callback: routine to call after this operation is complete
@@ -195,7 +193,6 @@ struct dma_async_tx_descriptor {
 	dma_cookie_t cookie;
 	enum dma_ctrl_flags flags; /* not a 'long' to pack with cookie */
 	dma_addr_t phys;
-	struct list_head tx_list;
 	struct dma_chan *chan;
 	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
 	dma_async_tx_callback callback;
-- 
cgit v1.2.3


From 3e5cd1f2576c720f1d0705fdd7ba64f27e8836b7 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Sun, 16 Aug 2009 21:02:36 +0900
Subject: dmi: extend dmi_get_year() to dmi_get_date()

There are cases where full date information is required instead of
just the year.  Add month and day parsing to dmi_get_year() and rename
it to dmi_get_date().

As the original function only required '/' followed by any number of
parseable characters at the end of the string, keep that behavior to
avoid upsetting existing users.

The new function takes dates of format [mm[/dd]]/yy[yy].  Year, month
and date are checked to be in the ranges of [1-9999], [1-12] and
[1-31] respectively and any invalid or out-of-range component is
returned as zero.

The dummy implementation is updated accordingly but the return value
is updated to indicate field not found which is consistent with how
other dummy functions behave.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 arch/x86/pci/direct.c       |  5 +--
 drivers/acpi/blacklist.c    |  5 +--
 drivers/ata/ahci.c          |  2 +-
 drivers/firmware/dmi_scan.c | 76 ++++++++++++++++++++++++++++++++++-----------
 include/linux/dmi.h         | 13 ++++++--
 5 files changed, 76 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index bd13c3e4c6db..347d882b3bb3 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -192,13 +192,14 @@ struct pci_raw_ops pci_direct_conf2 = {
 static int __init pci_sanity_check(struct pci_raw_ops *o)
 {
 	u32 x = 0;
-	int devfn;
+	int year, devfn;
 
 	if (pci_probe & PCI_NO_CHECKS)
 		return 1;
 	/* Assume Type 1 works for newer systems.
 	   This handles machines that don't have anything on PCI Bus 0. */
-	if (dmi_get_year(DMI_BIOS_DATE) >= 2001)
+	dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL);
+	if (year >= 2001)
 		return 1;
 
 	for (devfn = 0; devfn < 0x100; devfn++) {
diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index f6baa77deefb..0c4ca4d318b3 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c
@@ -78,9 +78,10 @@ static struct acpi_blacklist_item acpi_blacklist[] __initdata = {
 
 static int __init blacklist_by_year(void)
 {
-	int year = dmi_get_year(DMI_BIOS_DATE);
+	int year;
+
 	/* Doesn't exist? Likely an old system */
-	if (year == -1) {
+	if (!dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL)) {
 		printk(KERN_ERR PREFIX "no DMI BIOS year, "
 			"acpi=force is required to enable ACPI\n" );
 		return 1;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index be4c39f8ab81..147b9be3b4d2 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -2679,7 +2679,7 @@ static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev)
 	 * different versions.
 	 */
 	date = dmi_get_system_info(DMI_BIOS_DATE);
-	year = dmi_get_year(DMI_BIOS_DATE);
+	dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL);
 	if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' &&
 	    (year > 2007 ||
 	     (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0)))
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 531e621677ce..938100f14b16 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -568,36 +568,76 @@ const struct dmi_device * dmi_find_device(int type, const char *name,
 EXPORT_SYMBOL(dmi_find_device);
 
 /**
- *	dmi_get_year - Return year of a DMI date
- *	@field:	data index (like dmi_get_system_info)
+ *	dmi_get_date - parse a DMI date
+ *	@field:	data index (see enum dmi_field)
+ *	@yearp: optional out parameter for the year
+ *	@monthp: optional out parameter for the month
+ *	@dayp: optional out parameter for the day
  *
- *	Returns -1 when the field doesn't exist. 0 when it is broken.
+ *	The date field is assumed to be in the form resembling
+ *	[mm[/dd]]/yy[yy] and the result is stored in the out
+ *	parameters any or all of which can be omitted.
+ *
+ *	If the field doesn't exist, all out parameters are set to zero
+ *	and false is returned.  Otherwise, true is returned with any
+ *	invalid part of date set to zero.
+ *
+ *	On return, year, month and day are guaranteed to be in the
+ *	range of [0,9999], [0,12] and [0,31] respectively.
  */
-int dmi_get_year(int field)
+bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp)
 {
-	int year;
-	const char *s = dmi_get_system_info(field);
+	int year = 0, month = 0, day = 0;
+	bool exists;
+	const char *s, *y;
 	char *e;
 
-	if (!s)
-		return -1;
-	if (*s == '\0')
-		return 0;
-	s = strrchr(s, '/');
-	if (!s)
-		return 0;
+	s = dmi_get_system_info(field);
+	exists = s;
+	if (!exists)
+		goto out;
 
-	s += 1;
-	year = simple_strtoul(s, &e, 10);
-	if (s != e && year < 100) {	/* 2-digit year */
+	/*
+	 * Determine year first.  We assume the date string resembles
+	 * mm/dd/yy[yy] but the original code extracted only the year
+	 * from the end.  Keep the behavior in the spirit of no
+	 * surprises.
+	 */
+	y = strrchr(s, '/');
+	if (!y)
+		goto out;
+
+	y++;
+	year = simple_strtoul(y, &e, 10);
+	if (y != e && year < 100) {	/* 2-digit year */
 		year += 1900;
 		if (year < 1996)	/* no dates < spec 1.0 */
 			year += 100;
 	}
+	if (year > 9999)		/* year should fit in %04d */
+		year = 0;
+
+	/* parse the mm and dd */
+	month = simple_strtoul(s, &e, 10);
+	if (s == e || *e != '/' || !month || month > 12) {
+		month = 0;
+		goto out;
+	}
 
-	return year;
+	s = e + 1;
+	day = simple_strtoul(s, &e, 10);
+	if (s == y || s == e || *e != '/' || day > 31)
+		day = 0;
+out:
+	if (yearp)
+		*yearp = year;
+	if (monthp)
+		*monthp = month;
+	if (dayp)
+		*dayp = day;
+	return exists;
 }
-EXPORT_SYMBOL(dmi_get_year);
+EXPORT_SYMBOL(dmi_get_date);
 
 /**
  *	dmi_walk - Walk the DMI table and get called back for every record
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index bb5489c82c99..a8a3e1ac281d 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -43,7 +43,7 @@ extern const char * dmi_get_system_info(int field);
 extern const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from);
 extern void dmi_scan_machine(void);
-extern int dmi_get_year(int field);
+extern bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp);
 extern int dmi_name_in_vendors(const char *str);
 extern int dmi_name_in_serial(const char *str);
 extern int dmi_available;
@@ -58,7 +58,16 @@ static inline const char * dmi_get_system_info(int field) { return NULL; }
 static inline const struct dmi_device * dmi_find_device(int type, const char *name,
 	const struct dmi_device *from) { return NULL; }
 static inline void dmi_scan_machine(void) { return; }
-static inline int dmi_get_year(int year) { return 0; }
+static inline bool dmi_get_date(int field, int *yearp, int *monthp, int *dayp)
+{
+	if (yearp)
+		*yearp = 0;
+	if (monthp)
+		*monthp = 0;
+	if (dayp)
+		*dayp = 0;
+	return false;
+}
 static inline int dmi_name_in_vendors(const char *s) { return 0; }
 static inline int dmi_name_in_serial(const char *s) { return 0; }
 #define dmi_available 0
-- 
cgit v1.2.3


From c9766237afa92e8d7f27bbcd4964f1b43fa0bce8 Mon Sep 17 00:00:00 2001
From: Bob Moore <robert.moore@intel.com>
Date: Fri, 4 Sep 2009 08:56:17 +0800
Subject: ACPICA: Update version to 20090903.

Version 20090903.

Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpixf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index f3b358b7432f..e723b0fd8e41 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -47,7 +47,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20090730
+#define ACPI_CA_VERSION                 0x20090903
 
 #include "actypes.h"
 #include "actbl.h"
-- 
cgit v1.2.3


From 7839c5d5519b6d9e2ccf3cdbf1c39e3817ad0835 Mon Sep 17 00:00:00 2001
From: Fabian Henze <hoacha@quantentunnel.de>
Date: Tue, 8 Sep 2009 00:59:59 +0800
Subject: drm/i915: add B43 chipset support

Signed-off-by: Fabian Henze <hoacha@quantentunnel.de>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_drv.h | 2 ++
 include/drm/drm_pciids.h        | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77ed060b4292..e5f20e40ac59 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -863,6 +863,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 		       (dev)->pci_device == 0x2E12 || \
 		       (dev)->pci_device == 0x2E22 || \
 		       (dev)->pci_device == 0x2E32 || \
+		       (dev)->pci_device == 0x2E42 || \
 		       (dev)->pci_device == 0x0042 || \
 		       (dev)->pci_device == 0x0046)
 
@@ -875,6 +876,7 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);
 		     (dev)->pci_device == 0x2E12 || \
 		     (dev)->pci_device == 0x2E22 || \
 		     (dev)->pci_device == 0x2E32 || \
+		     (dev)->pci_device == 0x2E42 || \
 		     IS_GM45(dev))
 
 #define IS_IGDG(dev) ((dev)->pci_device == 0xa001)
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 853508499d20..3f6e545609be 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -552,6 +552,7 @@
 	{0x8086, 0x2e12, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x2e32, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x2e42, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
-- 
cgit v1.2.3


From 5d5d9c97ff807b643b6c2284dea7fe04ecc74355 Mon Sep 17 00:00:00 2001
From: Tushar Gohad <tgohad@mvista.com>
Date: Wed, 9 Sep 2009 03:42:23 -0700
Subject: IPv6/addrconf: Fix minor addrlabel thinko

Fix apparent thinko related to RTM_DELADDRLABEL, introduced by commit
2a8cc6c89039e0530a3335954253b76ed0f9339a ("[IPV6] ADDRCONF: Support
RFC3484 configurable address selection policy table.").

Signed-off-by: Tushar Gohad <tgohad@mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index ba3254ecf7fb..adf2068d12b5 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -104,7 +104,7 @@ enum {
 	RTM_NEWADDRLABEL = 72,
 #define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
 	RTM_DELADDRLABEL,
-#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
 	RTM_GETADDRLABEL,
 #define RTM_GETADDRLABEL RTM_GETADDRLABEL
 
-- 
cgit v1.2.3


From 24ea602e183ca20a7577ebe253323d0e5d0f9847 Mon Sep 17 00:00:00 2001
From: Albert Herranz <albert_herranz@yahoo.es>
Date: Tue, 8 Sep 2009 19:30:12 +0200
Subject: ssb: Implement SDIO host bus support

Add support for communicating with a Sonics Silicon Backplane through a
SDIO interface, as found in the Nintendo Wii WLAN daughter card.

The Nintendo Wii WLAN card includes a custom Broadcom 4318 chip with
a SDIO host interface.

Signed-off-by: Albert Herranz <albert_herranz@yahoo.es>
Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/Kconfig       |  14 ++
 drivers/ssb/Makefile      |   1 +
 drivers/ssb/main.c        |  58 ++++-
 drivers/ssb/scan.c        |  11 +
 drivers/ssb/sdio.c        | 610 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/ssb/ssb_private.h |  40 +++
 include/linux/ssb/ssb.h   |  25 +-
 7 files changed, 756 insertions(+), 3 deletions(-)
 create mode 100644 drivers/ssb/sdio.c

(limited to 'include')

diff --git a/drivers/ssb/Kconfig b/drivers/ssb/Kconfig
index 540a2948596c..2d8cc455dbc7 100644
--- a/drivers/ssb/Kconfig
+++ b/drivers/ssb/Kconfig
@@ -66,6 +66,20 @@ config SSB_PCMCIAHOST
 
 	  If unsure, say N
 
+config SSB_SDIOHOST_POSSIBLE
+	bool
+	depends on SSB && (MMC = y || MMC = SSB)
+	default y
+
+config SSB_SDIOHOST
+	bool "Support for SSB on SDIO-bus host"
+	depends on SSB_SDIOHOST_POSSIBLE
+	help
+	  Support for a Sonics Silicon Backplane on top
+	  of a SDIO device.
+
+	  If unsure, say N
+
 config SSB_SILENT
 	bool "No SSB kernel messages"
 	depends on SSB && EMBEDDED
diff --git a/drivers/ssb/Makefile b/drivers/ssb/Makefile
index cfbb74f2982e..656e58b92618 100644
--- a/drivers/ssb/Makefile
+++ b/drivers/ssb/Makefile
@@ -6,6 +6,7 @@ ssb-$(CONFIG_SSB_SPROM)			+= sprom.o
 # host support
 ssb-$(CONFIG_SSB_PCIHOST)		+= pci.o pcihost_wrapper.o
 ssb-$(CONFIG_SSB_PCMCIAHOST)		+= pcmcia.o
+ssb-$(CONFIG_SSB_SDIOHOST)		+= sdio.o
 
 # built-in drivers
 ssb-y					+= driver_chipcommon.o
diff --git a/drivers/ssb/main.c b/drivers/ssb/main.c
index 8d16cb258ccf..579b114be412 100644
--- a/drivers/ssb/main.c
+++ b/drivers/ssb/main.c
@@ -17,6 +17,7 @@
 #include <linux/ssb/ssb_driver_gige.h>
 #include <linux/dma-mapping.h>
 #include <linux/pci.h>
+#include <linux/mmc/sdio_func.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
@@ -88,6 +89,25 @@ found:
 }
 #endif /* CONFIG_SSB_PCMCIAHOST */
 
+#ifdef CONFIG_SSB_SDIOHOST
+struct ssb_bus *ssb_sdio_func_to_bus(struct sdio_func *func)
+{
+	struct ssb_bus *bus;
+
+	ssb_buses_lock();
+	list_for_each_entry(bus, &buses, list) {
+		if (bus->bustype == SSB_BUSTYPE_SDIO &&
+		    bus->host_sdio == func)
+			goto found;
+	}
+	bus = NULL;
+found:
+	ssb_buses_unlock();
+
+	return bus;
+}
+#endif /* CONFIG_SSB_SDIOHOST */
+
 int ssb_for_each_bus_call(unsigned long data,
 			  int (*func)(struct ssb_bus *bus, unsigned long data))
 {
@@ -467,6 +487,12 @@ static int ssb_devices_register(struct ssb_bus *bus)
 #ifdef CONFIG_SSB_PCMCIAHOST
 			sdev->irq = bus->host_pcmcia->irq.AssignedIRQ;
 			dev->parent = &bus->host_pcmcia->dev;
+#endif
+			break;
+		case SSB_BUSTYPE_SDIO:
+#ifdef CONFIG_SSB_SDIO
+			sdev->irq = bus->host_sdio->dev.irq;
+			dev->parent = &bus->host_sdio->dev;
 #endif
 			break;
 		case SSB_BUSTYPE_SSB:
@@ -724,12 +750,18 @@ static int ssb_bus_register(struct ssb_bus *bus,
 	err = ssb_pci_xtal(bus, SSB_GPIO_XTAL | SSB_GPIO_PLL, 1);
 	if (err)
 		goto out;
+
+	/* Init SDIO-host device (if any), before the scan */
+	err = ssb_sdio_init(bus);
+	if (err)
+		goto err_disable_xtal;
+
 	ssb_buses_lock();
 	bus->busnumber = next_busnumber;
 	/* Scan for devices (cores) */
 	err = ssb_bus_scan(bus, baseaddr);
 	if (err)
-		goto err_disable_xtal;
+		goto err_sdio_exit;
 
 	/* Init PCI-host device (if any) */
 	err = ssb_pci_init(bus);
@@ -776,6 +808,8 @@ err_pci_exit:
 	ssb_pci_exit(bus);
 err_unmap:
 	ssb_iounmap(bus);
+err_sdio_exit:
+	ssb_sdio_exit(bus);
 err_disable_xtal:
 	ssb_buses_unlock();
 	ssb_pci_xtal(bus, SSB_GPIO_XTAL | SSB_GPIO_PLL, 0);
@@ -825,6 +859,28 @@ int ssb_bus_pcmciabus_register(struct ssb_bus *bus,
 EXPORT_SYMBOL(ssb_bus_pcmciabus_register);
 #endif /* CONFIG_SSB_PCMCIAHOST */
 
+#ifdef CONFIG_SSB_SDIOHOST
+int ssb_bus_sdiobus_register(struct ssb_bus *bus, struct sdio_func *func,
+			     unsigned int quirks)
+{
+	int err;
+
+	bus->bustype = SSB_BUSTYPE_SDIO;
+	bus->host_sdio = func;
+	bus->ops = &ssb_sdio_ops;
+	bus->quirks = quirks;
+
+	err = ssb_bus_register(bus, ssb_sdio_get_invariants, ~0);
+	if (!err) {
+		ssb_printk(KERN_INFO PFX "Sonics Silicon Backplane found on "
+			   "SDIO device %s\n", sdio_func_id(func));
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(ssb_bus_sdiobus_register);
+#endif /* CONFIG_SSB_PCMCIAHOST */
+
 int ssb_bus_ssbbus_register(struct ssb_bus *bus,
 			    unsigned long baseaddr,
 			    ssb_invariants_func_t get_invariants)
diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 63ee5cfbefbb..b74212d698c7 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -175,6 +175,9 @@ static u32 scan_read32(struct ssb_bus *bus, u8 current_coreidx,
 		} else
 			ssb_pcmcia_switch_segment(bus, 0);
 		break;
+	case SSB_BUSTYPE_SDIO:
+		offset += current_coreidx * SSB_CORE_SIZE;
+		return ssb_sdio_scan_read32(bus, offset);
 	}
 	return readl(bus->mmio + offset);
 }
@@ -188,6 +191,8 @@ static int scan_switchcore(struct ssb_bus *bus, u8 coreidx)
 		return ssb_pci_switch_coreidx(bus, coreidx);
 	case SSB_BUSTYPE_PCMCIA:
 		return ssb_pcmcia_switch_coreidx(bus, coreidx);
+	case SSB_BUSTYPE_SDIO:
+		return ssb_sdio_scan_switch_coreidx(bus, coreidx);
 	}
 	return 0;
 }
@@ -206,6 +211,8 @@ void ssb_iounmap(struct ssb_bus *bus)
 		SSB_BUG_ON(1); /* Can't reach this code. */
 #endif
 		break;
+	case SSB_BUSTYPE_SDIO:
+		break;
 	}
 	bus->mmio = NULL;
 	bus->mapped_device = NULL;
@@ -230,6 +237,10 @@ static void __iomem *ssb_ioremap(struct ssb_bus *bus,
 		SSB_BUG_ON(1); /* Can't reach this code. */
 #endif
 		break;
+	case SSB_BUSTYPE_SDIO:
+		/* Nothing to ioremap in the SDIO case, just fake it */
+		mmio = (void __iomem *)baseaddr;
+		break;
 	}
 
 	return mmio;
diff --git a/drivers/ssb/sdio.c b/drivers/ssb/sdio.c
new file mode 100644
index 000000000000..114051056b52
--- /dev/null
+++ b/drivers/ssb/sdio.c
@@ -0,0 +1,610 @@
+/*
+ * Sonics Silicon Backplane
+ * SDIO-Hostbus related functions
+ *
+ * Copyright 2009 Albert Herranz <albert_herranz@yahoo.es>
+ *
+ * Based on drivers/ssb/pcmcia.c
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2007-2008 Michael Buesch <mb@bu3sch.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ *
+ */
+
+#include <linux/ssb/ssb.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/etherdevice.h>
+#include <linux/mmc/sdio_func.h>
+
+#include "ssb_private.h"
+
+/* Define the following to 1 to enable a printk on each coreswitch. */
+#define SSB_VERBOSE_SDIOCORESWITCH_DEBUG		1
+
+
+/* Hardware invariants CIS tuples */
+#define SSB_SDIO_CIS			0x80
+#define  SSB_SDIO_CIS_SROMREV		0x00
+#define  SSB_SDIO_CIS_ID		0x01
+#define  SSB_SDIO_CIS_BOARDREV		0x02
+#define  SSB_SDIO_CIS_PA		0x03
+#define   SSB_SDIO_CIS_PA_PA0B0_LO	0
+#define   SSB_SDIO_CIS_PA_PA0B0_HI	1
+#define   SSB_SDIO_CIS_PA_PA0B1_LO	2
+#define   SSB_SDIO_CIS_PA_PA0B1_HI	3
+#define   SSB_SDIO_CIS_PA_PA0B2_LO	4
+#define   SSB_SDIO_CIS_PA_PA0B2_HI	5
+#define   SSB_SDIO_CIS_PA_ITSSI		6
+#define   SSB_SDIO_CIS_PA_MAXPOW	7
+#define  SSB_SDIO_CIS_OEMNAME		0x04
+#define  SSB_SDIO_CIS_CCODE		0x05
+#define  SSB_SDIO_CIS_ANTENNA		0x06
+#define  SSB_SDIO_CIS_ANTGAIN		0x07
+#define  SSB_SDIO_CIS_BFLAGS		0x08
+#define  SSB_SDIO_CIS_LEDS		0x09
+
+#define CISTPL_FUNCE_LAN_NODE_ID        0x04	/* same as in PCMCIA */
+
+
+/*
+ * Function 1 miscellaneous registers.
+ *
+ * Definitions match src/include/sbsdio.h from the
+ * Android Open Source Project
+ * http://android.git.kernel.org/?p=platform/system/wlan/broadcom.git
+ *
+ */
+#define SBSDIO_FUNC1_SBADDRLOW	0x1000a	/* SB Address window Low (b15) */
+#define SBSDIO_FUNC1_SBADDRMID	0x1000b	/* SB Address window Mid (b23-b16) */
+#define SBSDIO_FUNC1_SBADDRHIGH	0x1000c	/* SB Address window High (b24-b31) */
+
+/* valid bits in SBSDIO_FUNC1_SBADDRxxx regs */
+#define SBSDIO_SBADDRLOW_MASK	0x80	/* Valid address bits in SBADDRLOW */
+#define SBSDIO_SBADDRMID_MASK	0xff	/* Valid address bits in SBADDRMID */
+#define SBSDIO_SBADDRHIGH_MASK	0xff	/* Valid address bits in SBADDRHIGH */
+
+#define SBSDIO_SB_OFT_ADDR_MASK	0x7FFF	/* sb offset addr is <= 15 bits, 32k */
+
+/* REVISIT: this flag doesn't seem to matter */
+#define SBSDIO_SB_ACCESS_2_4B_FLAG	0x8000	/* forces 32-bit SB access */
+
+
+/*
+ * Address map within the SDIO function address space (128K).
+ *
+ *   Start   End     Description
+ *   ------- ------- ------------------------------------------
+ *   0x00000 0x0ffff selected backplane address window (64K)
+ *   0x10000 0x1ffff backplane control registers (max 64K)
+ *
+ * The current address window is configured by writing to registers
+ * SBADDRLOW, SBADDRMID and SBADDRHIGH.
+ *
+ * In order to access the contents of a 32-bit Silicon Backplane address
+ * the backplane address window must be first loaded with the highest
+ * 16 bits of the target address. Then, an access must be done to the
+ * SDIO function address space using the lower 15 bits of the address.
+ * Bit 15 of the address must be set when doing 32 bit accesses.
+ *
+ * 10987654321098765432109876543210
+ * WWWWWWWWWWWWWWWWW                 SB Address Window
+ *                 OOOOOOOOOOOOOOOO  Offset within SB Address Window
+ *                 a                 32-bit access flag
+ */
+
+
+/*
+ * SSB I/O via SDIO.
+ *
+ * NOTE: SDIO address @addr is 17 bits long (SDIO address space is 128K).
+ */
+
+static inline struct device *ssb_sdio_dev(struct ssb_bus *bus)
+{
+	return &bus->host_sdio->dev;
+}
+
+/* host claimed */
+static int ssb_sdio_writeb(struct ssb_bus *bus, unsigned int addr, u8 val)
+{
+	int error = 0;
+
+	sdio_writeb(bus->host_sdio, val, addr, &error);
+	if (unlikely(error)) {
+		dev_dbg(ssb_sdio_dev(bus), "%08X <- %02x, error %d\n",
+			addr, val, error);
+	}
+
+	return error;
+}
+
+#if 0
+static u8 ssb_sdio_readb(struct ssb_bus *bus, unsigned int addr)
+{
+	u8 val;
+	int error = 0;
+
+	val = sdio_readb(bus->host_sdio, addr, &error);
+	if (unlikely(error)) {
+		dev_dbg(ssb_sdio_dev(bus), "%08X -> %02x, error %d\n",
+			addr, val, error);
+	}
+
+	return val;
+}
+#endif
+
+/* host claimed */
+static int ssb_sdio_set_sbaddr_window(struct ssb_bus *bus, u32 address)
+{
+	int error;
+
+	error = ssb_sdio_writeb(bus, SBSDIO_FUNC1_SBADDRLOW,
+				(address >> 8) & SBSDIO_SBADDRLOW_MASK);
+	if (error)
+		goto out;
+	error = ssb_sdio_writeb(bus, SBSDIO_FUNC1_SBADDRMID,
+				(address >> 16) & SBSDIO_SBADDRMID_MASK);
+	if (error)
+		goto out;
+	error = ssb_sdio_writeb(bus, SBSDIO_FUNC1_SBADDRHIGH,
+				(address >> 24) & SBSDIO_SBADDRHIGH_MASK);
+	if (error)
+		goto out;
+	bus->sdio_sbaddr = address;
+out:
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "failed to set address window"
+			" to 0x%08x, error %d\n", address, error);
+	}
+
+	return error;
+}
+
+/* for enumeration use only */
+u32 ssb_sdio_scan_read32(struct ssb_bus *bus, u16 offset)
+{
+	u32 val;
+	int error;
+
+	sdio_claim_host(bus->host_sdio);
+	val = sdio_readl(bus->host_sdio, offset, &error);
+	sdio_release_host(bus->host_sdio);
+	if (unlikely(error)) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X > %08x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+
+	return val;
+}
+
+/* for enumeration use only */
+int ssb_sdio_scan_switch_coreidx(struct ssb_bus *bus, u8 coreidx)
+{
+	u32 sbaddr;
+	int error;
+
+	sbaddr = (coreidx * SSB_CORE_SIZE) + SSB_ENUM_BASE;
+	sdio_claim_host(bus->host_sdio);
+	error = ssb_sdio_set_sbaddr_window(bus, sbaddr);
+	sdio_release_host(bus->host_sdio);
+	if (error) {
+		dev_err(ssb_sdio_dev(bus), "failed to switch to core %u,"
+			" error %d\n", coreidx, error);
+		goto out;
+	}
+out:
+	return error;
+}
+
+/* host must be already claimed */
+int ssb_sdio_switch_core(struct ssb_bus *bus, struct ssb_device *dev)
+{
+	u8 coreidx = dev->core_index;
+	u32 sbaddr;
+	int error = 0;
+
+	sbaddr = (coreidx * SSB_CORE_SIZE) + SSB_ENUM_BASE;
+	if (unlikely(bus->sdio_sbaddr != sbaddr)) {
+#if SSB_VERBOSE_SDIOCORESWITCH_DEBUG
+		dev_info(ssb_sdio_dev(bus),
+			   "switching to %s core, index %d\n",
+			   ssb_core_name(dev->id.coreid), coreidx);
+#endif
+		error = ssb_sdio_set_sbaddr_window(bus, sbaddr);
+		if (error) {
+			dev_dbg(ssb_sdio_dev(bus), "failed to switch to"
+				" core %u, error %d\n", coreidx, error);
+			goto out;
+		}
+		bus->mapped_device = dev;
+	}
+
+out:
+	return error;
+}
+
+static u8 ssb_sdio_read8(struct ssb_device *dev, u16 offset)
+{
+	struct ssb_bus *bus = dev->bus;
+	u8 val = 0xff;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	val = sdio_readb(bus->host_sdio, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X > %02x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+out:
+	sdio_release_host(bus->host_sdio);
+
+	return val;
+}
+
+static u16 ssb_sdio_read16(struct ssb_device *dev, u16 offset)
+{
+	struct ssb_bus *bus = dev->bus;
+	u16 val = 0xffff;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	val = sdio_readw(bus->host_sdio, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X > %04x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+out:
+	sdio_release_host(bus->host_sdio);
+
+	return val;
+}
+
+static u32 ssb_sdio_read32(struct ssb_device *dev, u16 offset)
+{
+	struct ssb_bus *bus = dev->bus;
+	u32 val = 0xffffffff;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
+	val = sdio_readl(bus->host_sdio, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X > %08x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+out:
+	sdio_release_host(bus->host_sdio);
+
+	return val;
+}
+
+#ifdef CONFIG_SSB_BLOCKIO
+static void ssb_sdio_block_read(struct ssb_device *dev, void *buffer,
+				  size_t count, u16 offset, u8 reg_width)
+{
+	size_t saved_count = count;
+	struct ssb_bus *bus = dev->bus;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev))) {
+		error = -EIO;
+		memset(buffer, 0xff, count);
+		goto err_out;
+	}
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+
+	switch (reg_width) {
+	case sizeof(u8): {
+		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
+		break;
+	}
+	case sizeof(u16): {
+		SSB_WARN_ON(count & 1);
+		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
+		break;
+	}
+	case sizeof(u32): {
+		SSB_WARN_ON(count & 3);
+		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
+		error = sdio_readsb(bus->host_sdio, buffer, offset, count);
+		break;
+	}
+	default:
+		SSB_WARN_ON(1);
+	}
+	if (!error)
+		goto out;
+
+err_out:
+	dev_dbg(ssb_sdio_dev(bus), "%04X:%04X (width=%u, len=%u), error %d\n",
+		bus->sdio_sbaddr >> 16, offset, reg_width, saved_count, error);
+out:
+	sdio_release_host(bus->host_sdio);
+}
+#endif /* CONFIG_SSB_BLOCKIO */
+
+static void ssb_sdio_write8(struct ssb_device *dev, u16 offset, u8 val)
+{
+	struct ssb_bus *bus = dev->bus;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	sdio_writeb(bus->host_sdio, val, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X < %02x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+out:
+	sdio_release_host(bus->host_sdio);
+}
+
+static void ssb_sdio_write16(struct ssb_device *dev, u16 offset, u16 val)
+{
+	struct ssb_bus *bus = dev->bus;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	sdio_writew(bus->host_sdio, val, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X < %04x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+out:
+	sdio_release_host(bus->host_sdio);
+}
+
+static void ssb_sdio_write32(struct ssb_device *dev, u16 offset, u32 val)
+{
+	struct ssb_bus *bus = dev->bus;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev)))
+		goto out;
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+	offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
+	sdio_writel(bus->host_sdio, val, offset, &error);
+	if (error) {
+		dev_dbg(ssb_sdio_dev(bus), "%04X:%04X < %08x, error %d\n",
+			bus->sdio_sbaddr >> 16, offset, val, error);
+	}
+	if (bus->quirks & SSB_QUIRK_SDIO_READ_AFTER_WRITE32)
+		sdio_readl(bus->host_sdio, 0, &error);
+out:
+	sdio_release_host(bus->host_sdio);
+}
+
+#ifdef CONFIG_SSB_BLOCKIO
+static void ssb_sdio_block_write(struct ssb_device *dev, const void *buffer,
+				   size_t count, u16 offset, u8 reg_width)
+{
+	size_t saved_count = count;
+	struct ssb_bus *bus = dev->bus;
+	int error = 0;
+
+	sdio_claim_host(bus->host_sdio);
+	if (unlikely(ssb_sdio_switch_core(bus, dev))) {
+		error = -EIO;
+		memset((void *)buffer, 0xff, count);
+		goto err_out;
+	}
+	offset |= bus->sdio_sbaddr & 0xffff;
+	offset &= SBSDIO_SB_OFT_ADDR_MASK;
+
+	switch (reg_width) {
+	case sizeof(u8):
+		error = sdio_writesb(bus->host_sdio, offset,
+				     (void *)buffer, count);
+		break;
+	case sizeof(u16):
+		SSB_WARN_ON(count & 1);
+		error = sdio_writesb(bus->host_sdio, offset,
+				     (void *)buffer, count);
+		break;
+	case sizeof(u32):
+		SSB_WARN_ON(count & 3);
+		offset |= SBSDIO_SB_ACCESS_2_4B_FLAG;	/* 32 bit data access */
+		error = sdio_writesb(bus->host_sdio, offset,
+				     (void *)buffer, count);
+		break;
+	default:
+		SSB_WARN_ON(1);
+	}
+	if (!error)
+		goto out;
+
+err_out:
+	dev_dbg(ssb_sdio_dev(bus), "%04X:%04X (width=%u, len=%u), error %d\n",
+		bus->sdio_sbaddr >> 16, offset, reg_width, saved_count, error);
+out:
+	sdio_release_host(bus->host_sdio);
+}
+
+#endif /* CONFIG_SSB_BLOCKIO */
+
+/* Not "static", as it's used in main.c */
+const struct ssb_bus_ops ssb_sdio_ops = {
+	.read8		= ssb_sdio_read8,
+	.read16		= ssb_sdio_read16,
+	.read32		= ssb_sdio_read32,
+	.write8		= ssb_sdio_write8,
+	.write16	= ssb_sdio_write16,
+	.write32	= ssb_sdio_write32,
+#ifdef CONFIG_SSB_BLOCKIO
+	.block_read	= ssb_sdio_block_read,
+	.block_write	= ssb_sdio_block_write,
+#endif
+};
+
+#define GOTO_ERROR_ON(condition, description) do {	\
+	if (unlikely(condition)) {			\
+		error_description = description;	\
+		goto error;				\
+	}						\
+  } while (0)
+
+int ssb_sdio_get_invariants(struct ssb_bus *bus,
+			    struct ssb_init_invariants *iv)
+{
+	struct ssb_sprom *sprom = &iv->sprom;
+	struct ssb_boardinfo *bi = &iv->boardinfo;
+	const char *error_description = "none";
+	struct sdio_func_tuple *tuple;
+	void *mac;
+
+	memset(sprom, 0xFF, sizeof(*sprom));
+	sprom->boardflags_lo = 0;
+	sprom->boardflags_hi = 0;
+
+	tuple = bus->host_sdio->tuples;
+	while (tuple) {
+		switch (tuple->code) {
+		case 0x22: /* extended function */
+			switch (tuple->data[0]) {
+			case CISTPL_FUNCE_LAN_NODE_ID:
+				GOTO_ERROR_ON((tuple->size != 7) &&
+					      (tuple->data[1] != 6),
+					      "mac tpl size");
+				/* fetch the MAC address. */
+				mac = tuple->data + 2;
+				memcpy(sprom->il0mac, mac, ETH_ALEN);
+				memcpy(sprom->et1mac, mac, ETH_ALEN);
+				break;
+			default:
+				break;
+			}
+			break;
+		case 0x80: /* vendor specific tuple */
+			switch (tuple->data[0]) {
+			case SSB_SDIO_CIS_SROMREV:
+				GOTO_ERROR_ON(tuple->size != 2,
+					      "sromrev tpl size");
+				sprom->revision = tuple->data[1];
+				break;
+			case SSB_SDIO_CIS_ID:
+				GOTO_ERROR_ON((tuple->size != 5) &&
+					      (tuple->size != 7),
+					      "id tpl size");
+				bi->vendor = tuple->data[1] |
+					     (tuple->data[2]<<8);
+				break;
+			case SSB_SDIO_CIS_BOARDREV:
+				GOTO_ERROR_ON(tuple->size != 2,
+					      "boardrev tpl size");
+				sprom->board_rev = tuple->data[1];
+				break;
+			case SSB_SDIO_CIS_PA:
+				GOTO_ERROR_ON((tuple->size != 9) &&
+					      (tuple->size != 10),
+					      "pa tpl size");
+				sprom->pa0b0 = tuple->data[1] |
+					 ((u16)tuple->data[2] << 8);
+				sprom->pa0b1 = tuple->data[3] |
+					 ((u16)tuple->data[4] << 8);
+				sprom->pa0b2 = tuple->data[5] |
+					 ((u16)tuple->data[6] << 8);
+				sprom->itssi_a = tuple->data[7];
+				sprom->itssi_bg = tuple->data[7];
+				sprom->maxpwr_a = tuple->data[8];
+				sprom->maxpwr_bg = tuple->data[8];
+				break;
+			case SSB_SDIO_CIS_OEMNAME:
+				/* Not present */
+				break;
+			case SSB_SDIO_CIS_CCODE:
+				GOTO_ERROR_ON(tuple->size != 2,
+					      "ccode tpl size");
+				sprom->country_code = tuple->data[1];
+				break;
+			case SSB_SDIO_CIS_ANTENNA:
+				GOTO_ERROR_ON(tuple->size != 2,
+					      "ant tpl size");
+				sprom->ant_available_a = tuple->data[1];
+				sprom->ant_available_bg = tuple->data[1];
+				break;
+			case SSB_SDIO_CIS_ANTGAIN:
+				GOTO_ERROR_ON(tuple->size != 2,
+					      "antg tpl size");
+				sprom->antenna_gain.ghz24.a0 = tuple->data[1];
+				sprom->antenna_gain.ghz24.a1 = tuple->data[1];
+				sprom->antenna_gain.ghz24.a2 = tuple->data[1];
+				sprom->antenna_gain.ghz24.a3 = tuple->data[1];
+				sprom->antenna_gain.ghz5.a0 = tuple->data[1];
+				sprom->antenna_gain.ghz5.a1 = tuple->data[1];
+				sprom->antenna_gain.ghz5.a2 = tuple->data[1];
+				sprom->antenna_gain.ghz5.a3 = tuple->data[1];
+				break;
+			case SSB_SDIO_CIS_BFLAGS:
+				GOTO_ERROR_ON((tuple->size != 3) &&
+					      (tuple->size != 5),
+					      "bfl tpl size");
+				sprom->boardflags_lo = tuple->data[1] |
+						 ((u16)tuple->data[2] << 8);
+				break;
+			case SSB_SDIO_CIS_LEDS:
+				GOTO_ERROR_ON(tuple->size != 5,
+					      "leds tpl size");
+				sprom->gpio0 = tuple->data[1];
+				sprom->gpio1 = tuple->data[2];
+				sprom->gpio2 = tuple->data[3];
+				sprom->gpio3 = tuple->data[4];
+				break;
+			default:
+				break;
+			}
+			break;
+		default:
+			break;
+		}
+		tuple = tuple->next;
+	}
+
+	return 0;
+error:
+	dev_err(ssb_sdio_dev(bus), "failed to fetch device invariants: %s\n",
+		error_description);
+	return -ENODEV;
+}
+
+void ssb_sdio_exit(struct ssb_bus *bus)
+{
+	if (bus->bustype != SSB_BUSTYPE_SDIO)
+		return;
+	/* Nothing to do here. */
+}
+
+int ssb_sdio_init(struct ssb_bus *bus)
+{
+	if (bus->bustype != SSB_BUSTYPE_SDIO)
+		return 0;
+
+	bus->sdio_sbaddr = ~0;
+
+	return 0;
+}
diff --git a/drivers/ssb/ssb_private.h b/drivers/ssb/ssb_private.h
index 57fa482abb94..25433565dfda 100644
--- a/drivers/ssb/ssb_private.h
+++ b/drivers/ssb/ssb_private.h
@@ -114,6 +114,46 @@ static inline int ssb_pcmcia_init(struct ssb_bus *bus)
 }
 #endif /* CONFIG_SSB_PCMCIAHOST */
 
+/* sdio.c */
+#ifdef CONFIG_SSB_SDIOHOST
+extern int ssb_sdio_get_invariants(struct ssb_bus *bus,
+				     struct ssb_init_invariants *iv);
+
+extern u32 ssb_sdio_scan_read32(struct ssb_bus *bus, u16 offset);
+extern int ssb_sdio_switch_core(struct ssb_bus *bus, struct ssb_device *dev);
+extern int ssb_sdio_scan_switch_coreidx(struct ssb_bus *bus, u8 coreidx);
+extern int ssb_sdio_hardware_setup(struct ssb_bus *bus);
+extern void ssb_sdio_exit(struct ssb_bus *bus);
+extern int ssb_sdio_init(struct ssb_bus *bus);
+
+extern const struct ssb_bus_ops ssb_sdio_ops;
+#else /* CONFIG_SSB_SDIOHOST */
+static inline u32 ssb_sdio_scan_read32(struct ssb_bus *bus, u16 offset)
+{
+	return 0;
+}
+static inline int ssb_sdio_switch_core(struct ssb_bus *bus,
+					 struct ssb_device *dev)
+{
+	return 0;
+}
+static inline int ssb_sdio_scan_switch_coreidx(struct ssb_bus *bus, u8 coreidx)
+{
+	return 0;
+}
+static inline int ssb_sdio_hardware_setup(struct ssb_bus *bus)
+{
+	return 0;
+}
+static inline void ssb_sdio_exit(struct ssb_bus *bus)
+{
+}
+static inline int ssb_sdio_init(struct ssb_bus *bus)
+{
+	return 0;
+}
+#endif /* CONFIG_SSB_SDIOHOST */
+
 
 /* scan.c */
 extern const char *ssb_core_name(u16 coreid);
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 17ffc1f84d76..3d0a9ff24f01 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -238,6 +238,7 @@ enum ssb_bustype {
 	SSB_BUSTYPE_SSB,	/* This SSB bus is the system bus */
 	SSB_BUSTYPE_PCI,	/* SSB is connected to PCI bus */
 	SSB_BUSTYPE_PCMCIA,	/* SSB is connected to PCMCIA bus */
+	SSB_BUSTYPE_SDIO,	/* SSB is connected to SDIO bus */
 };
 
 /* board_vendor */
@@ -270,8 +271,12 @@ struct ssb_bus {
 
 	/* The core in the basic address register window. (PCI bus only) */
 	struct ssb_device *mapped_device;
-	/* Currently mapped PCMCIA segment. (bustype == SSB_BUSTYPE_PCMCIA only) */
-	u8 mapped_pcmcia_seg;
+	union {
+		/* Currently mapped PCMCIA segment. (bustype == SSB_BUSTYPE_PCMCIA only) */
+		u8 mapped_pcmcia_seg;
+		/* Current SSB base address window for SDIO. */
+		u32 sdio_sbaddr;
+	};
 	/* Lock for core and segment switching.
 	 * On PCMCIA-host busses this is used to protect the whole MMIO access. */
 	spinlock_t bar_lock;
@@ -282,6 +287,11 @@ struct ssb_bus {
 	struct pci_dev *host_pci;
 	/* Pointer to the PCMCIA device (only if bustype == SSB_BUSTYPE_PCMCIA). */
 	struct pcmcia_device *host_pcmcia;
+	/* Pointer to the SDIO device (only if bustype == SSB_BUSTYPE_SDIO). */
+	struct sdio_func *host_sdio;
+
+	/* See enum ssb_quirks */
+	unsigned int quirks;
 
 #ifdef CONFIG_SSB_SPROM
 	/* Mutex to protect the SPROM writing. */
@@ -336,6 +346,11 @@ struct ssb_bus {
 #endif /* DEBUG */
 };
 
+enum ssb_quirks {
+	/* SDIO connected card requires performing a read after writing a 32-bit value */
+	SSB_QUIRK_SDIO_READ_AFTER_WRITE32	= (1 << 0),
+};
+
 /* The initialization-invariants. */
 struct ssb_init_invariants {
 	/* Versioning information about the PCB. */
@@ -366,6 +381,12 @@ extern int ssb_bus_pcmciabus_register(struct ssb_bus *bus,
 				      struct pcmcia_device *pcmcia_dev,
 				      unsigned long baseaddr);
 #endif /* CONFIG_SSB_PCMCIAHOST */
+#ifdef CONFIG_SSB_SDIOHOST
+extern int ssb_bus_sdiobus_register(struct ssb_bus *bus,
+				    struct sdio_func *sdio_func,
+				    unsigned int quirks);
+#endif /* CONFIG_SSB_SDIOHOST */
+
 
 extern void ssb_bus_unregister(struct ssb_bus *bus);
 
-- 
cgit v1.2.3


From 2bba22c50b06abe9fd0d23933b1e64d35b419262 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Wed, 9 Sep 2009 15:41:37 +0200
Subject: sched: Turn off child_runs_first

Set child_runs_first default to off.

It hurts 'optimal' make -j<NR_CPUS> workloads as make jobs
get preempted by child tasks, reducing parallelism.

Note, this patch might make existing races in user
applications more prominent than before - so breakages
might be bisected to this commit.

Child-runs-first is broken on SMP to begin with, and we
already had it off briefly in v2.6.23 so most of the
offenders ought to be fixed. Would be nice not to revert
this commit but fix those apps finally ...

Signed-off-by: Mike Galbraith <efault@gmx.de>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1252486344.28645.18.camel@marge.simson.net>
[ made the sysctl independent of CONFIG_SCHED_DEBUG, in case
  people want to work around broken apps. ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched_fair.c   |  4 ++--
 kernel/sysctl.c       | 16 ++++++++--------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b7f43e3b736..3a50e8222498 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1820,8 +1820,8 @@ extern unsigned int sysctl_sched_min_granularity;
 extern unsigned int sysctl_sched_wakeup_granularity;
 extern unsigned int sysctl_sched_shares_ratelimit;
 extern unsigned int sysctl_sched_shares_thresh;
-#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_child_runs_first;
+#ifdef CONFIG_SCHED_DEBUG
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e386e5dfcdae..af325a382b1b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -48,10 +48,10 @@ unsigned int sysctl_sched_min_granularity = 4000000ULL;
 static unsigned int sched_nr_latency = 5;
 
 /*
- * After fork, child runs first. (default) If set to 0 then
+ * After fork, child runs first. If set to 0 (default) then
  * parent will (try to) run first.
  */
-const_debug unsigned int sysctl_sched_child_runs_first = 1;
+unsigned int sysctl_sched_child_runs_first __read_mostly;
 
 /*
  * sys_sched_yield() compat mode
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6c9836ef4b47..25d6bf3383be 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -246,6 +246,14 @@ static int max_wakeup_granularity_ns = NSEC_PER_SEC;	/* 1 second */
 #endif
 
 static struct ctl_table kern_table[] = {
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "sched_child_runs_first",
+		.data		= &sysctl_sched_child_runs_first,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 #ifdef CONFIG_SCHED_DEBUG
 	{
 		.ctl_name	= CTL_UNNUMBERED,
@@ -298,14 +306,6 @@ static struct ctl_table kern_table[] = {
 		.strategy	= &sysctl_intvec,
 		.extra1		= &zero,
 	},
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "sched_child_runs_first",
-		.data		= &sysctl_sched_child_runs_first,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= &proc_dointvec,
-	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sched_features",
-- 
cgit v1.2.3


From a7db50405216610c8a0d62b8b400180b6f366733 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Mon, 22 Jun 2009 08:08:07 -0600
Subject: PCI: remove pcibios_scan_all_fns()

This was #define'd as 0 on all platforms, so let's get rid of it.

This change makes pci_scan_slot() slightly easier to read.

Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Tony Luck <tony.luck@intel.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Acked-by: Russell King <linux@arm.linux.org.uk>
Acked-by: Ralf Baechle <ralf@linux-mips.org>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/alpha/include/asm/pci.h    |  1 -
 arch/arm/include/asm/pci.h      |  2 --
 arch/h8300/include/asm/pci.h    |  1 -
 arch/ia64/include/asm/pci.h     | 14 ++++++++++++--
 arch/mips/include/asm/pci.h     |  2 --
 arch/mn10300/include/asm/pci.h  | 13 ++++++++++++-
 arch/parisc/include/asm/pci.h   |  1 -
 arch/powerpc/include/asm/pci.h  |  1 -
 arch/sh/include/asm/pci.h       |  1 -
 arch/sparc/include/asm/pci_32.h |  1 -
 arch/sparc/include/asm/pci_64.h |  1 -
 arch/um/include/asm/pci.h       |  1 -
 arch/x86/include/asm/pci.h      |  1 -
 drivers/pci/probe.c             |  3 +--
 include/asm-generic/pci.h       | 13 ++++++++++++-
 15 files changed, 37 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h
index d22ace99d13d..dd8dcabf160f 100644
--- a/arch/alpha/include/asm/pci.h
+++ b/arch/alpha/include/asm/pci.h
@@ -52,7 +52,6 @@ struct pci_controller {
    bus numbers.  */
 
 #define pcibios_assign_all_busses()	1
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		alpha_mv.min_io_address
 #define PCIBIOS_MIN_MEM		alpha_mv.min_mem_address
diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h
index 0abf386ba3d3..226cddd2fb65 100644
--- a/arch/arm/include/asm/pci.h
+++ b/arch/arm/include/asm/pci.h
@@ -6,8 +6,6 @@
 
 #include <mach/hardware.h> /* for PCIBIOS_MIN_* */
 
-#define pcibios_scan_all_fns(a, b)	0
-
 #ifdef CONFIG_PCI_HOST_ITE8152
 /* ITE bridge requires setting latency timer to avoid early bus access
    termination by PIC bus mater devices
diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h
index 97389b35aa35..cc9762091c0a 100644
--- a/arch/h8300/include/asm/pci.h
+++ b/arch/h8300/include/asm/pci.h
@@ -8,7 +8,6 @@
  */
 
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h
index fcfca56bb850..55281aabe5f2 100644
--- a/arch/ia64/include/asm/pci.h
+++ b/arch/ia64/include/asm/pci.h
@@ -17,7 +17,6 @@
  * loader.
  */
 #define pcibios_assign_all_busses()     0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -135,7 +134,18 @@ extern void pcibios_resource_to_bus(struct pci_dev *dev,
 extern void pcibios_bus_to_resource(struct pci_dev *dev,
 		struct resource *res, struct pci_bus_region *region);
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 #define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h
index a68d111e55e9..5ebf82572ec0 100644
--- a/arch/mips/include/asm/pci.h
+++ b/arch/mips/include/asm/pci.h
@@ -65,8 +65,6 @@ extern int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin);
 
 extern unsigned int pcibios_assign_all_busses(void);
 
-#define pcibios_scan_all_fns(a, b)	0
-
 extern unsigned long PCIBIOS_MIN_IO;
 extern unsigned long PCIBIOS_MIN_MEM;
 
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 19aecc90f7a4..6095a28561dd 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -101,7 +101,18 @@ extern void pcibios_bus_to_resource(struct pci_dev *dev,
 				    struct resource *res,
 				    struct pci_bus_region *region);
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
 {
diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h
index 7d842d699df2..64c7aa590ae5 100644
--- a/arch/parisc/include/asm/pci.h
+++ b/arch/parisc/include/asm/pci.h
@@ -233,7 +233,6 @@ static inline void pcibios_register_hba(struct pci_hba_data *x)
  *   rp7420/8420 boxes and then revisit this issue.
  */
 #define pcibios_assign_all_busses()     (1)
-#define pcibios_scan_all_fns(a, b)	(0)
 
 #define PCIBIOS_MIN_IO          0x10
 #define PCIBIOS_MIN_MEM         0x1000 /* NBPG - but pci/setup-res.c dies */
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index d9483c504d2d..36057c821ff4 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -40,7 +40,6 @@ struct pci_dev;
  */
 #define pcibios_assign_all_busses() \
 	(ppc_pci_has_flag(PPC_PCI_REASSIGN_ALL_BUS))
-#define pcibios_scan_all_fns(a, b)	0
 
 static inline void pcibios_set_master(struct pci_dev *dev)
 {
diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h
index d3633f513ebc..4163950cd1c6 100644
--- a/arch/sh/include/asm/pci.h
+++ b/arch/sh/include/asm/pci.h
@@ -10,7 +10,6 @@
    or architectures with incomplete PCI setup by the loader */
 
 #define pcibios_assign_all_busses()	1
-#define pcibios_scan_all_fns(a, b)	0
 
 /*
  * A board can define one or more PCI channels that represent built-in (or
diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h
index b41c4c198159..810d9248e23f 100644
--- a/arch/sparc/include/asm/pci_32.h
+++ b/arch/sparc/include/asm/pci_32.h
@@ -10,7 +10,6 @@
  * or architectures with incomplete PCI setup by the loader.
  */
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0UL
 #define PCIBIOS_MIN_MEM		0UL
diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h
index 7a1e3566e59c..a32970888287 100644
--- a/arch/sparc/include/asm/pci_64.h
+++ b/arch/sparc/include/asm/pci_64.h
@@ -10,7 +10,6 @@
  * or architectures with incomplete PCI setup by the loader.
  */
 #define pcibios_assign_all_busses()	0
-#define pcibios_scan_all_fns(a, b)	0
 
 #define PCIBIOS_MIN_IO		0UL
 #define PCIBIOS_MIN_MEM		0UL
diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h
index 59923199cdc3..b44cf59ede1e 100644
--- a/arch/um/include/asm/pci.h
+++ b/arch/um/include/asm/pci.h
@@ -2,6 +2,5 @@
 #define __UM_PCI_H
 
 #define PCI_DMA_BUS_IS_PHYS     (1)
-#define pcibios_scan_all_fns(a, b)	0
 
 #endif
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 1ff685ca221c..f76a162c082c 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -48,7 +48,6 @@ extern unsigned int pcibios_assign_all_busses(void);
 #else
 #define pcibios_assign_all_busses()	0
 #endif
-#define pcibios_scan_all_fns(a, b)	0
 
 extern unsigned long pci_mem_start;
 #define PCIBIOS_MIN_IO		0x1000
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 40e75f6a5056..b9d4e95aafba 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1061,8 +1061,7 @@ int pci_scan_slot(struct pci_bus *bus, int devfn)
 	if (dev && !dev->is_added)	/* new device? */
 		nr++;
 
-	if ((dev && dev->multifunction) ||
-	    (!dev && pcibios_scan_all_fns(bus, devfn))) {
+	if (dev && dev->multifunction) {
 		for (fn = 1; fn < 8; fn++) {
 			dev = pci_scan_single_device(bus, devfn + fn);
 			if (dev) {
diff --git a/include/asm-generic/pci.h b/include/asm-generic/pci.h
index b4326b5466eb..26373cff4546 100644
--- a/include/asm-generic/pci.h
+++ b/include/asm-generic/pci.h
@@ -30,7 +30,18 @@ pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res,
 	res->end = region->end;
 }
 
-#define pcibios_scan_all_fns(a, b)	0
+static inline struct resource *
+pcibios_select_root(struct pci_dev *pdev, struct resource *res)
+{
+	struct resource *root = NULL;
+
+	if (res->flags & IORESOURCE_IO)
+		root = &ioport_resource;
+	if (res->flags & IORESOURCE_MEM)
+		root = &iomem_resource;
+
+	return root;
+}
 
 #ifndef HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
 static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
-- 
cgit v1.2.3


From 76d56de57ae60c6be383e48e7068fd973d5fb08a Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 23 Jul 2009 17:03:00 -0600
Subject: ACPI: export acpi_pci_root and friends

We can simplify ACPI drivers if we can tell whether a handle is an
ACPI PCI root or not.

Reviewed-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/pci_root.c | 17 ++---------------
 include/acpi/acpi_bus.h | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 55b5b90c2a44..31b961c2f22f 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -61,20 +61,6 @@ static struct acpi_driver acpi_pci_root_driver = {
 		},
 };
 
-struct acpi_pci_root {
-	struct list_head node;
-	struct acpi_device *device;
-	struct pci_bus *bus;
-	u16 segment;
-	u8 bus_nr;
-
-	u32 osc_support_set;	/* _OSC state of support bits */
-	u32 osc_control_set;	/* _OSC state of control bits */
-	u32 osc_control_qry;	/* the latest _OSC query result */
-
-	u32 osc_queried:1;	/* has _OSC control been queried? */
-};
-
 static LIST_HEAD(acpi_pci_roots);
 
 static struct acpi_pci_driver *sub_driver;
@@ -317,7 +303,7 @@ static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
 	return status;
 }
 
-static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
+struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 {
 	struct acpi_pci_root *root;
 
@@ -327,6 +313,7 @@ static struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
 	}
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(acpi_pci_find_root);
 
 struct acpi_handle_node {
 	struct list_head node;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c65e4ce6c3af..c3ace75d57b4 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -369,10 +369,26 @@ int register_acpi_bus_type(struct acpi_bus_type *);
 int unregister_acpi_bus_type(struct acpi_bus_type *);
 struct device *acpi_get_physical_device(acpi_handle);
 
+struct acpi_pci_root {
+	struct list_head node;
+	struct acpi_device * device;
+	struct acpi_pci_id id;
+	struct pci_bus *bus;
+	u16 segment;
+	u8 bus_nr;
+
+	u32 osc_support_set;	/* _OSC state of support bits */
+	u32 osc_control_set;	/* _OSC state of control bits */
+	u32 osc_control_qry;	/* the latest _OSC query result */
+
+	u32 osc_queried:1;	/* has _OSC control been queried? */
+};
+
 /* helper */
 acpi_handle acpi_get_child(acpi_handle, acpi_integer);
 int acpi_is_root_bridge(acpi_handle);
 acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int);
+struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle))
 
 #ifdef CONFIG_PM_SLEEP
-- 
cgit v1.2.3


From 711d57796f5ce2d02d6e62c9034afbb16aedda31 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 27 Jul 2009 23:37:48 +0300
Subject: PCI: expose function reset capability in sysfs

Some devices allow an individual function to be reset without affecting
other functions in the same device: that's what pci_reset_function does.
For devices that have this support, expose reset attribite in sysfs.

This is useful e.g. for virtualization, where a qemu userspace
process wants to reset the device when the guest is reset,
to emulate machine reboot as closely as possible.

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 Documentation/ABI/testing/sysfs-bus-pci | 10 +++++++++
 drivers/pci/pci-sysfs.c                 | 37 +++++++++++++++++++++++++++++++++
 drivers/pci/pci.c                       | 16 ++++++++++++++
 drivers/pci/pci.h                       |  1 +
 include/linux/pci.h                     |  1 +
 5 files changed, 65 insertions(+)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 6bf68053e4b8..25be3250f7d6 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -84,6 +84,16 @@ Description:
 		from this part of the device tree.
 		Depends on CONFIG_HOTPLUG.
 
+What:		/sys/bus/pci/devices/.../reset
+Date:		July 2009
+Contact:	Michael S. Tsirkin <mst@redhat.com>
+Description:
+		Some devices allow an individual function to be reset
+		without affecting other functions in the same device.
+		For devices that have this support, a file named reset
+		will be present in sysfs.  Writing 1 to this file
+		will perform reset.
+
 What:		/sys/bus/pci/devices/.../vpd
 Date:		February 2008
 Contact:	Ben Hutchings <bhutchings@solarflare.com>
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 85ebd02a64a7..0f6382f090ee 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -916,6 +916,24 @@ int __attribute__ ((weak)) pcibios_add_platform_entries(struct pci_dev *dev)
 	return 0;
 }
 
+static ssize_t reset_store(struct device *dev,
+			   struct device_attribute *attr, const char *buf,
+			   size_t count)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	unsigned long val;
+	ssize_t result = strict_strtoul(buf, 0, &val);
+
+	if (result < 0)
+		return result;
+
+	if (val != 1)
+		return -EINVAL;
+	return pci_reset_function(pdev);
+}
+
+static struct device_attribute reset_attr = __ATTR(reset, 0200, NULL, reset_store);
+
 static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 {
 	int retval;
@@ -943,7 +961,22 @@ static int pci_create_capabilities_sysfs(struct pci_dev *dev)
 	/* Active State Power Management */
 	pcie_aspm_create_sysfs_dev_files(dev);
 
+	if (!pci_probe_reset_function(dev)) {
+		retval = device_create_file(&dev->dev, &reset_attr);
+		if (retval)
+			goto error;
+		dev->reset_fn = 1;
+	}
 	return 0;
+
+error:
+	pcie_aspm_remove_sysfs_dev_files(dev);
+	if (dev->vpd && dev->vpd->attr) {
+		sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
+		kfree(dev->vpd->attr);
+	}
+
+	return retval;
 }
 
 int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev)
@@ -1037,6 +1070,10 @@ static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
 	}
 
 	pcie_aspm_remove_sysfs_dev_files(dev);
+	if (dev->reset_fn) {
+		device_remove_file(&dev->dev, &reset_attr);
+		dev->reset_fn = 0;
+	}
 }
 
 /**
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7b70312181d7..7d55039ffa05 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2261,6 +2261,22 @@ int __pci_reset_function(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(__pci_reset_function);
 
+/**
+ * pci_probe_reset_function - check whether the device can be safely reset
+ * @dev: PCI device to reset
+ *
+ * Some devices allow an individual function to be reset without affecting
+ * other functions in the same device.  The PCI device must be responsive
+ * to PCI config space in order to use this function.
+ *
+ * Returns 0 if the device function can be reset or negative if the
+ * device doesn't support resetting a single function.
+ */
+int pci_probe_reset_function(struct pci_dev *dev)
+{
+	return pci_dev_reset(dev, 1);
+}
+
 /**
  * pci_reset_function - quiesce and reset a PCI device function
  * @dev: PCI device to reset
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 5ff4d25bf0e9..73d9d92715a0 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -16,6 +16,7 @@ extern void pci_cleanup_rom(struct pci_dev *dev);
 extern int pci_mmap_fits(struct pci_dev *pdev, int resno,
 			 struct vm_area_struct *vma);
 #endif
+int pci_probe_reset_function(struct pci_dev *dev);
 
 /**
  * struct pci_platform_pm_ops - Firmware PM callbacks
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 115fb7ba5089..a90f94020798 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -276,6 +276,7 @@ struct pci_dev {
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
+	unsigned int	reset_fn:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 1d4a433fc4e9c7dbfc2069a8f1a1f4338b982427 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 6 Aug 2009 15:13:59 -0400
Subject: PCI: Document pci_ids.h addition policy.

IDs should generally only be added to pci_ids.h when they're shared
across several files in the tree.  IDs that are just used by a single
driver should be defined in the driver instead.

Perhaps documenting this is a good idea to prevent things being moved there,
as it still seems to be happening judging from the git log.

(based on discussion w/gregkh and others).

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci_ids.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..6235ce2bea04 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2,6 +2,9 @@
  *	PCI Class, Vendor and Device IDs
  *
  *	Please keep sorted.
+ *
+ *	Do not add new entries to this file unless the definitions
+ *	are shared between multiple drivers.
  */
 
 /* Device classes and subclasses */
-- 
cgit v1.2.3


From deb2d2ecd43dfc51efe71eed7128fda514da96c6 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 11 Aug 2009 15:52:06 +1000
Subject: PCI/GPU: implement VGA arbitration on Linux

Background:
Graphic devices are accessed through ranges in I/O or memory space. While most
modern devices allow relocation of such ranges, some "Legacy" VGA devices
implemented on PCI will typically have the same "hard-decoded" addresses as
they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994
Standard for Boot (Initialization Configuration) Firmware Revision 2.1"
Section 7, Legacy Devices.

The Resource Access Control (RAC) module inside the X server currently does
the task of arbitration when more than one legacy device co-exists on the same
machine. But the problem happens when these devices are trying to be accessed
by different userspace clients (e.g. two server in parallel). Their address
assignments conflict. Therefore an arbitration scheme _outside_ of the X
server is needed to control the sharing of these resources. This document
introduces the operation of the VGA arbiter implemented for Linux kernel.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Tiago Vignatti <tiago.vignatti@nokia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/Makefile     |    2 +-
 drivers/gpu/vga/Kconfig  |   10 +
 drivers/gpu/vga/Makefile |    1 +
 drivers/gpu/vga/vgaarb.c | 1206 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/pci/pci.c        |   44 ++
 drivers/video/Kconfig    |    2 +
 include/linux/pci.h      |    2 +
 include/linux/vgaarb.h   |  195 ++++++++
 8 files changed, 1461 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/vga/Kconfig
 create mode 100644 drivers/gpu/vga/Makefile
 create mode 100644 drivers/gpu/vga/vgaarb.c
 create mode 100644 include/linux/vgaarb.h

(limited to 'include')

diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index de566cf0414c..30879df3daea 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -1 +1 @@
-obj-y			+= drm/
+obj-y			+= drm/ vga/
diff --git a/drivers/gpu/vga/Kconfig b/drivers/gpu/vga/Kconfig
new file mode 100644
index 000000000000..790e675b13eb
--- /dev/null
+++ b/drivers/gpu/vga/Kconfig
@@ -0,0 +1,10 @@
+config VGA_ARB
+	bool "VGA Arbitration" if EMBEDDED
+	default y
+	depends on PCI
+	help
+	  Some "legacy" VGA devices implemented on PCI typically have the same
+	  hard-decoded addresses as they did on ISA. When multiple PCI devices
+	  are accessed at same time they need some kind of coordination. Please
+	  see Documentation/vgaarbiter.txt for more details. Select this to
+	  enable VGA arbiter.
diff --git a/drivers/gpu/vga/Makefile b/drivers/gpu/vga/Makefile
new file mode 100644
index 000000000000..7cc8c1ed645b
--- /dev/null
+++ b/drivers/gpu/vga/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_VGA_ARB)  += vgaarb.o
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
new file mode 100644
index 000000000000..199138f241e0
--- /dev/null
+++ b/drivers/gpu/vga/vgaarb.c
@@ -0,0 +1,1206 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
+ *
+ * Implements the VGA arbitration. For details refer to
+ * Documentation/vgaarbiter.txt
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/spinlock.h>
+#include <linux/poll.h>
+#include <linux/miscdevice.h>
+
+#include <linux/uaccess.h>
+
+#include <linux/vgaarb.h>
+
+static void vga_arbiter_notify_clients(void);
+/*
+ * We keep a list of all vga devices in the system to speed
+ * up the various operations of the arbiter
+ */
+struct vga_device {
+	struct list_head list;
+	struct pci_dev *pdev;
+	unsigned int decodes;	/* what does it decodes */
+	unsigned int owns;	/* what does it owns */
+	unsigned int locks;	/* what does it locks */
+	unsigned int io_lock_cnt;	/* legacy IO lock count */
+	unsigned int mem_lock_cnt;	/* legacy MEM lock count */
+	unsigned int io_norm_cnt;	/* normal IO count */
+	unsigned int mem_norm_cnt;	/* normal MEM count */
+
+	/* allow IRQ enable/disable hook */
+	void *cookie;
+	void (*irq_set_state)(void *cookie, bool enable);
+	unsigned int (*set_vga_decode)(void *cookie, bool decode);
+};
+
+static LIST_HEAD(vga_list);
+static int vga_count, vga_decode_count;
+static bool vga_arbiter_used;
+static DEFINE_SPINLOCK(vga_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue);
+
+
+static const char *vga_iostate_to_str(unsigned int iostate)
+{
+	/* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */
+	iostate &= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	switch (iostate) {
+	case VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM:
+		return "io+mem";
+	case VGA_RSRC_LEGACY_IO:
+		return "io";
+	case VGA_RSRC_LEGACY_MEM:
+		return "mem";
+	}
+	return "none";
+}
+
+static int vga_str_to_iostate(char *buf, int str_size, int *io_state)
+{
+	/* we could in theory hand out locks on IO and mem
+	 * separately to userspace but it can cause deadlocks */
+	if (strncmp(buf, "none", 4) == 0) {
+		*io_state = VGA_RSRC_NONE;
+		return 1;
+	}
+
+	/* XXX We're not chekcing the str_size! */
+	if (strncmp(buf, "io+mem", 6) == 0)
+		goto both;
+	else if (strncmp(buf, "io", 2) == 0)
+		goto both;
+	else if (strncmp(buf, "mem", 3) == 0)
+		goto both;
+	return 0;
+both:
+	*io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+	return 1;
+}
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+/* this is only used a cookie - it should not be dereferenced */
+static struct pci_dev *vga_default;
+#endif
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev);
+
+/* Find somebody in our list */
+static struct vga_device *vgadev_find(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+
+	list_for_each_entry(vgadev, &vga_list, list)
+		if (pdev == vgadev->pdev)
+			return vgadev;
+	return NULL;
+}
+
+/* Returns the default VGA device (vgacon's babe) */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+struct pci_dev *vga_default_device(void)
+{
+	return vga_default;
+}
+#endif
+
+static inline void vga_irq_set_state(struct vga_device *vgadev, bool state)
+{
+	if (vgadev->irq_set_state)
+		vgadev->irq_set_state(vgadev->cookie, state);
+}
+
+
+/* If we don't ever use VGA arb we should avoid
+   turning off anything anywhere due to old X servers getting
+   confused about the boot device not being VGA */
+static void vga_check_first_use(void)
+{
+	/* we should inform all GPUs in the system that
+	 * VGA arb has occured and to try and disable resources
+	 * if they can */
+	if (!vga_arbiter_used) {
+		vga_arbiter_used = true;
+		vga_arbiter_notify_clients();
+	}
+}
+
+static struct vga_device *__vga_tryget(struct vga_device *vgadev,
+				       unsigned int rsrc)
+{
+	unsigned int wants, legacy_wants, match;
+	struct vga_device *conflict;
+	unsigned int pci_bits;
+	/* Account for "normal" resources to lock. If we decode the legacy,
+	 * counterpart, we need to request it as well
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_IO))
+		rsrc |= VGA_RSRC_LEGACY_IO;
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) &&
+	    (vgadev->decodes & VGA_RSRC_LEGACY_MEM))
+		rsrc |= VGA_RSRC_LEGACY_MEM;
+
+	pr_devel("%s: %d\n", __func__, rsrc);
+	pr_devel("%s: owns: %d\n", __func__, vgadev->owns);
+
+	/* Check what resources we need to acquire */
+	wants = rsrc & ~vgadev->owns;
+
+	/* We already own everything, just mark locked & bye bye */
+	if (wants == 0)
+		goto lock_them;
+
+	/* We don't need to request a legacy resource, we just enable
+	 * appropriate decoding and go
+	 */
+	legacy_wants = wants & VGA_RSRC_LEGACY_MASK;
+	if (legacy_wants == 0)
+		goto enable_them;
+
+	/* Ok, we don't, let's find out how we need to kick off */
+	list_for_each_entry(conflict, &vga_list, list) {
+		unsigned int lwants = legacy_wants;
+		unsigned int change_bridge = 0;
+
+		/* Don't conflict with myself */
+		if (vgadev == conflict)
+			continue;
+
+		/* Check if the architecture allows a conflict between those
+		 * 2 devices or if they are on separate domains
+		 */
+		if (!vga_conflicts(vgadev->pdev, conflict->pdev))
+			continue;
+
+		/* We have a possible conflict. before we go further, we must
+		 * check if we sit on the same bus as the conflicting device.
+		 * if we don't, then we must tie both IO and MEM resources
+		 * together since there is only a single bit controlling
+		 * VGA forwarding on P2P bridges
+		 */
+		if (vgadev->pdev->bus != conflict->pdev->bus) {
+			change_bridge = 1;
+			lwants = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		}
+
+		/* Check if the guy has a lock on the resource. If he does,
+		 * return the conflicting entry
+		 */
+		if (conflict->locks & lwants)
+			return conflict;
+
+		/* Ok, now check if he owns the resource we want. We don't need
+		 * to check "decodes" since it should be impossible to own
+		 * own legacy resources you don't decode unless I have a bug
+		 * in this code...
+		 */
+		WARN_ON(conflict->owns & ~conflict->decodes);
+		match = lwants & conflict->owns;
+		if (!match)
+			continue;
+
+		/* looks like he doesn't have a lock, we can steal
+		 * them from him
+		 */
+		vga_irq_set_state(conflict, false);
+
+		pci_bits = 0;
+		if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+			pci_bits |= PCI_COMMAND_MEMORY;
+		if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+			pci_bits |= PCI_COMMAND_IO;
+
+		pci_set_vga_state(conflict->pdev, false, pci_bits,
+				  change_bridge);
+		conflict->owns &= ~lwants;
+		/* If he also owned non-legacy, that is no longer the case */
+		if (lwants & VGA_RSRC_LEGACY_MEM)
+			conflict->owns &= ~VGA_RSRC_NORMAL_MEM;
+		if (lwants & VGA_RSRC_LEGACY_IO)
+			conflict->owns &= ~VGA_RSRC_NORMAL_IO;
+	}
+
+enable_them:
+	/* ok dude, we got it, everybody conflicting has been disabled, let's
+	 * enable us. Make sure we don't mark a bit in "owns" that we don't
+	 * also have in "decodes". We can lock resources we don't decode but
+	 * not own them.
+	 */
+	pci_bits = 0;
+	if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+		pci_bits |= PCI_COMMAND_MEMORY;
+	if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+		pci_bits |= PCI_COMMAND_IO;
+	pci_set_vga_state(vgadev->pdev, true, pci_bits, !!(wants & VGA_RSRC_LEGACY_MASK));
+
+	vga_irq_set_state(vgadev, true);
+	vgadev->owns |= (wants & vgadev->decodes);
+lock_them:
+	vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
+	if (rsrc & VGA_RSRC_LEGACY_IO)
+		vgadev->io_lock_cnt++;
+	if (rsrc & VGA_RSRC_LEGACY_MEM)
+		vgadev->mem_lock_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_IO)
+		vgadev->io_norm_cnt++;
+	if (rsrc & VGA_RSRC_NORMAL_MEM)
+		vgadev->mem_norm_cnt++;
+
+	return NULL;
+}
+
+static void __vga_put(struct vga_device *vgadev, unsigned int rsrc)
+{
+	unsigned int old_locks = vgadev->locks;
+
+	pr_devel("%s\n", __func__);
+
+	/* Update our counters, and account for equivalent legacy resources
+	 * if we decode them
+	 */
+	if ((rsrc & VGA_RSRC_NORMAL_IO) && vgadev->io_norm_cnt > 0) {
+		vgadev->io_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_IO)
+			rsrc |= VGA_RSRC_LEGACY_IO;
+	}
+	if ((rsrc & VGA_RSRC_NORMAL_MEM) && vgadev->mem_norm_cnt > 0) {
+		vgadev->mem_norm_cnt--;
+		if (vgadev->decodes & VGA_RSRC_LEGACY_MEM)
+			rsrc |= VGA_RSRC_LEGACY_MEM;
+	}
+	if ((rsrc & VGA_RSRC_LEGACY_IO) && vgadev->io_lock_cnt > 0)
+		vgadev->io_lock_cnt--;
+	if ((rsrc & VGA_RSRC_LEGACY_MEM) && vgadev->mem_lock_cnt > 0)
+		vgadev->mem_lock_cnt--;
+
+	/* Just clear lock bits, we do lazy operations so we don't really
+	 * have to bother about anything else at this point
+	 */
+	if (vgadev->io_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_IO;
+	if (vgadev->mem_lock_cnt == 0)
+		vgadev->locks &= ~VGA_RSRC_LEGACY_MEM;
+
+	/* Kick the wait queue in case somebody was waiting if we actually
+	 * released something
+	 */
+	if (old_locks != vgadev->locks)
+		wake_up_all(&vga_wait_queue);
+}
+
+int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible)
+{
+	struct vga_device *vgadev, *conflict;
+	unsigned long flags;
+	wait_queue_t wait;
+	int rc = 0;
+
+	vga_check_first_use();
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+
+	for (;;) {
+		spin_lock_irqsave(&vga_lock, flags);
+		vgadev = vgadev_find(pdev);
+		if (vgadev == NULL) {
+			spin_unlock_irqrestore(&vga_lock, flags);
+			rc = -ENODEV;
+			break;
+		}
+		conflict = __vga_tryget(vgadev, rsrc);
+		spin_unlock_irqrestore(&vga_lock, flags);
+		if (conflict == NULL)
+			break;
+
+
+		/* We have a conflict, we wait until somebody kicks the
+		 * work queue. Currently we have one work queue that we
+		 * kick each time some resources are released, but it would
+		 * be fairly easy to have a per device one so that we only
+		 * need to attach to the conflicting device
+		 */
+		init_waitqueue_entry(&wait, current);
+		add_wait_queue(&vga_wait_queue, &wait);
+		set_current_state(interruptible ?
+				  TASK_INTERRUPTIBLE :
+				  TASK_UNINTERRUPTIBLE);
+		if (signal_pending(current)) {
+			rc = -EINTR;
+			break;
+		}
+		schedule();
+		remove_wait_queue(&vga_wait_queue, &wait);
+		set_current_state(TASK_RUNNING);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(vga_get);
+
+int vga_tryget(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	int rc = 0;
+
+	vga_check_first_use();
+
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return 0;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		rc = -ENODEV;
+		goto bail;
+	}
+	if (__vga_tryget(vgadev, rsrc))
+		rc = -EBUSY;
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL(vga_tryget);
+
+void vga_put(struct pci_dev *pdev, unsigned int rsrc)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	/* The one who calls us should check for this, but lets be sure... */
+	if (pdev == NULL)
+		pdev = vga_default_device();
+	if (pdev == NULL)
+		return;
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+	__vga_put(vgadev, rsrc);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+EXPORT_SYMBOL(vga_put);
+
+/*
+ * Currently, we assume that the "initial" setup of the system is
+ * not sane, that is we come up with conflicting devices and let
+ * the arbiter's client decides if devices decodes or not legacy
+ * things.
+ */
+static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+
+	/* Only deal with VGA class devices */
+	if ((pdev->class >> 8) != PCI_CLASS_DISPLAY_VGA)
+		return false;
+
+	/* Allocate structure */
+	vgadev = kmalloc(sizeof(struct vga_device), GFP_KERNEL);
+	if (vgadev == NULL) {
+		pr_err("vgaarb: failed to allocate pci device\n");
+		/* What to do on allocation failure ? For now, let's
+		 * just do nothing, I'm not sure there is anything saner
+		 * to be done
+		 */
+		return false;
+	}
+
+	memset(vgadev, 0, sizeof(*vgadev));
+
+	/* Take lock & check for duplicates */
+	spin_lock_irqsave(&vga_lock, flags);
+	if (vgadev_find(pdev) != NULL) {
+		BUG_ON(1);
+		goto fail;
+	}
+	vgadev->pdev = pdev;
+
+	/* By default, assume we decode everything */
+	vgadev->decodes = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+			  VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+	/* by default mark it as decoding */
+	vga_decode_count++;
+	/* Mark that we "own" resources based on our enables, we will
+	 * clear that below if the bridge isn't forwarding
+	 */
+	pci_read_config_word(pdev, PCI_COMMAND, &cmd);
+	if (cmd & PCI_COMMAND_IO)
+		vgadev->owns |= VGA_RSRC_LEGACY_IO;
+	if (cmd & PCI_COMMAND_MEMORY)
+		vgadev->owns |= VGA_RSRC_LEGACY_MEM;
+
+	/* Check if VGA cycles can get down to us */
+	bus = pdev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			u16 l;
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+					     &l);
+			if (!(l & PCI_BRIDGE_CTL_VGA)) {
+				vgadev->owns = 0;
+				break;
+			}
+		}
+		bus = bus->parent;
+	}
+
+	/* Deal with VGA default device. Use first enabled one
+	 * by default if arch doesn't have it's own hook
+	 */
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+	if (vga_default == NULL &&
+	    ((vgadev->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK))
+		vga_default = pci_dev_get(pdev);
+#endif
+
+	/* Add to the list */
+	list_add(&vgadev->list, &vga_list);
+	vga_count++;
+	pr_info("vgaarb: device added: PCI:%s,decodes=%s,owns=%s,locks=%s\n",
+		pci_name(pdev),
+		vga_iostate_to_str(vgadev->decodes),
+		vga_iostate_to_str(vgadev->owns),
+		vga_iostate_to_str(vgadev->locks));
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return true;
+fail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return false;
+}
+
+static bool vga_arbiter_del_pci_device(struct pci_dev *pdev)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	bool ret = true;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		ret = false;
+		goto bail;
+	}
+
+	if (vga_default == pdev) {
+		pci_dev_put(vga_default);
+		vga_default = NULL;
+	}
+
+	if (vgadev->decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
+		vga_decode_count--;
+
+	/* Remove entry from list */
+	list_del(&vgadev->list);
+	vga_count--;
+	/* Notify userland driver that the device is gone so it discards
+	 * it's copies of the pci_dev pointer
+	 */
+	vga_arb_device_card_gone(pdev);
+
+	/* Wake up all possible waiters */
+	wake_up_all(&vga_wait_queue);
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	kfree(vgadev);
+	return ret;
+}
+
+/* this is called with the lock */
+static inline void vga_update_device_decodes(struct vga_device *vgadev,
+					     int new_decodes)
+{
+	int old_decodes;
+	struct vga_device *new_vgadev, *conflict;
+
+	old_decodes = vgadev->decodes;
+	vgadev->decodes = new_decodes;
+
+	pr_info("vgaarb: device changed decodes: PCI:%s,olddecodes=%s,decodes=%s:owns=%s\n",
+		pci_name(vgadev->pdev),
+		vga_iostate_to_str(old_decodes),
+		vga_iostate_to_str(vgadev->decodes),
+		vga_iostate_to_str(vgadev->owns));
+
+
+	/* if we own the decodes we should move them along to
+	   another card */
+	if ((vgadev->owns & old_decodes) && (vga_count > 1)) {
+		/* set us to own nothing */
+		vgadev->owns &= ~old_decodes;
+		list_for_each_entry(new_vgadev, &vga_list, list) {
+			if ((new_vgadev != vgadev) &&
+			    (new_vgadev->decodes & VGA_RSRC_LEGACY_MASK)) {
+				pr_info("vgaarb: transferring owner from PCI:%s to PCI:%s\n", pci_name(vgadev->pdev), pci_name(new_vgadev->pdev));
+				conflict = __vga_tryget(new_vgadev, VGA_RSRC_LEGACY_MASK);
+				if (!conflict)
+					__vga_put(new_vgadev, VGA_RSRC_LEGACY_MASK);
+				break;
+			}
+		}
+	}
+
+	/* change decodes counter */
+	if (old_decodes != new_decodes) {
+		if (new_decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM))
+			vga_decode_count++;
+		else
+			vga_decode_count--;
+	}
+}
+
+void __vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes, bool userspace)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	decodes &= VGA_RSRC_LEGACY_MASK;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL)
+		goto bail;
+
+	/* don't let userspace futz with kernel driver decodes */
+	if (userspace && vgadev->set_vga_decode)
+		goto bail;
+
+	/* update the device decodes + counter */
+	vga_update_device_decodes(vgadev, decodes);
+
+	/* XXX if somebody is going from "doesn't decode" to "decodes" state
+	 * here, additional care must be taken as we may have pending owner
+	 * ship of non-legacy region ...
+	 */
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
+{
+	__vga_set_legacy_decoding(pdev, decodes, false);
+}
+EXPORT_SYMBOL(vga_set_legacy_decoding);
+
+/* return number of active VGA devices */
+/* call with NULL to unregister */
+int vga_client_register(struct pci_dev *pdev, void *cookie,
+			void (*irq_set_state)(void *cookie, bool state),
+			unsigned int (*set_vga_decode)(void *cookie, bool decode))
+{
+	int ret = -1;
+	struct vga_device *vgadev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	vgadev = vgadev_find(pdev);
+	if (!vgadev)
+		goto bail;
+
+	vgadev->irq_set_state = irq_set_state;
+	vgadev->set_vga_decode = set_vga_decode;
+	vgadev->cookie = cookie;
+	ret = 0;
+
+bail:
+	spin_unlock_irqrestore(&vga_lock, flags);
+	return ret;
+
+}
+EXPORT_SYMBOL(vga_client_register);
+
+/*
+ * Char driver implementation
+ *
+ * Semantics is:
+ *
+ *  open       : open user instance of the arbitrer. by default, it's
+ *                attached to the default VGA device of the system.
+ *
+ *  close      : close user instance, release locks
+ *
+ *  read       : return a string indicating the status of the target.
+ *                an IO state string is of the form {io,mem,io+mem,none},
+ *                mc and ic are respectively mem and io lock counts (for
+ *                debugging/diagnostic only). "decodes" indicate what the
+ *                card currently decodes, "owns" indicates what is currently
+ *                enabled on it, and "locks" indicates what is locked by this
+ *                card. If the card is unplugged, we get "invalid" then for
+ *                card_ID and an -ENODEV error is returned for any command
+ *                until a new card is targeted
+ *
+ *   "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+ *
+ * write       : write a command to the arbiter. List of commands is:
+ *
+ *   target <card_ID>   : switch target to card <card_ID> (see below)
+ *   lock <io_state>    : acquires locks on target ("none" is invalid io_state)
+ *   trylock <io_state> : non-blocking acquire locks on target
+ *   unlock <io_state>  : release locks on target
+ *   unlock all         : release all locks on target held by this user
+ *   decodes <io_state> : set the legacy decoding attributes for the card
+ *
+ * poll         : event if something change on any card (not just the target)
+ *
+ * card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ * to go back to the system default card (TODO: not implemented yet).
+ * Currently, only PCI is supported as a prefix, but the userland API may
+ * support other bus types in the future, even if the current kernel
+ * implementation doesn't.
+ *
+ * Note about locks:
+ *
+ * The driver keeps track of which user has what locks on which card. It
+ * supports stacking, like the kernel one. This complexifies the implementation
+ * a bit, but makes the arbiter more tolerant to userspace problems and able
+ * to properly cleanup in all cases when a process dies.
+ * Currently, a max of 16 cards simultaneously can have locks issued from
+ * userspace for a given user (file descriptor instance) of the arbiter.
+ *
+ * If the device is hot-unplugged, there is a hook inside the module to notify
+ * they being added/removed in the system and automatically added/removed in
+ * the arbiter.
+ */
+
+#define MAX_USER_CARDS         16
+#define PCI_INVALID_CARD       ((struct pci_dev *)-1UL)
+
+/*
+ * Each user has an array of these, tracking which cards have locks
+ */
+struct vga_arb_user_card {
+	struct pci_dev *pdev;
+	unsigned int mem_cnt;
+	unsigned int io_cnt;
+};
+
+struct vga_arb_private {
+	struct list_head list;
+	struct pci_dev *target;
+	struct vga_arb_user_card cards[MAX_USER_CARDS];
+	spinlock_t lock;
+};
+
+static LIST_HEAD(vga_user_list);
+static DEFINE_SPINLOCK(vga_user_lock);
+
+
+/*
+ * This function gets a string in the format: "PCI:domain:bus:dev.fn" and
+ * returns the respective values. If the string is not in this format,
+ * it returns 0.
+ */
+static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain,
+			       unsigned int *bus, unsigned int *devfn)
+{
+	int n;
+	unsigned int slot, func;
+
+
+	n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, &slot, &func);
+	if (n != 4)
+		return 0;
+
+	*devfn = PCI_DEVFN(slot, func);
+
+	return 1;
+}
+
+static ssize_t vga_arb_read(struct file *file, char __user * buf,
+			    size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_device *vgadev;
+	struct pci_dev *pdev;
+	unsigned long flags;
+	size_t len;
+	int rc;
+	char *lbuf;
+
+	lbuf = kmalloc(1024, GFP_KERNEL);
+	if (lbuf == NULL)
+		return -ENOMEM;
+
+	/* Shields against vga_arb_device_card_gone (pci_dev going
+	 * away), and allows access to vga list
+	 */
+	spin_lock_irqsave(&vga_lock, flags);
+
+	/* If we are targetting the default, use it */
+	pdev = priv->target;
+	if (pdev == NULL || pdev == PCI_INVALID_CARD) {
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Find card vgadev structure */
+	vgadev = vgadev_find(pdev);
+	if (vgadev == NULL) {
+		/* Wow, it's not in the list, that shouldn't happen,
+		 * let's fix us up and return invalid card
+		 */
+		if (pdev == priv->target)
+			vga_arb_device_card_gone(pdev);
+		spin_unlock_irqrestore(&vga_lock, flags);
+		len = sprintf(lbuf, "invalid");
+		goto done;
+	}
+
+	/* Fill the buffer with infos */
+	len = snprintf(lbuf, 1024,
+		       "count:%d,PCI:%s,decodes=%s,owns=%s,locks=%s(%d:%d)\n",
+		       vga_decode_count, pci_name(pdev),
+		       vga_iostate_to_str(vgadev->decodes),
+		       vga_iostate_to_str(vgadev->owns),
+		       vga_iostate_to_str(vgadev->locks),
+		       vgadev->io_lock_cnt, vgadev->mem_lock_cnt);
+
+	spin_unlock_irqrestore(&vga_lock, flags);
+done:
+
+	/* Copy that to user */
+	if (len > count)
+		len = count;
+	rc = copy_to_user(buf, lbuf, len);
+	kfree(lbuf);
+	if (rc)
+		return -EFAULT;
+	return len;
+}
+
+/*
+ * TODO: To avoid parsing inside kernel and to improve the speed we may
+ * consider use ioctl here
+ */
+static ssize_t vga_arb_write(struct file *file, const char __user * buf,
+			     size_t count, loff_t *ppos)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc = NULL;
+	struct pci_dev *pdev;
+
+	unsigned int io_state;
+
+	char *kbuf, *curr_pos;
+	size_t remaining = count;
+
+	int ret_val;
+	int i;
+
+
+	kbuf = kmalloc(count + 1, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	if (copy_from_user(kbuf, buf, count)) {
+		kfree(kbuf);
+		return -EFAULT;
+	}
+	curr_pos = kbuf;
+	kbuf[count] = '\0';	/* Just to make sure... */
+
+	if (strncmp(curr_pos, "lock ", 5) == 0) {
+		curr_pos += 5;
+		remaining -= 5;
+
+		pr_devel("client 0x%X called 'lock'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		if (io_state == VGA_RSRC_NONE) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		vga_get_uninterruptible(pdev, io_state);
+
+		/* Update the client's locks lists... */
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev) {
+				if (io_state & VGA_RSRC_LEGACY_IO)
+					priv->cards[i].io_cnt++;
+				if (io_state & VGA_RSRC_LEGACY_MEM)
+					priv->cards[i].mem_cnt++;
+				break;
+			}
+		}
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "unlock ", 7) == 0) {
+		curr_pos += 7;
+		remaining -= 7;
+
+		pr_devel("client 0x%X called 'unlock'\n", (int)priv);
+
+		if (strncmp(curr_pos, "all", 3) == 0)
+			io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+		else {
+			if (!vga_str_to_iostate
+			    (curr_pos, remaining, &io_state)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+			/* TODO: Add this?
+			   if (io_state == VGA_RSRC_NONE) {
+			   ret_val = -EPROTO;
+			   goto done;
+			   }
+			  */
+		}
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				uc = &priv->cards[i];
+		}
+
+		if (!uc)
+			return -EINVAL;
+
+		if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0)
+			return -EINVAL;
+
+		if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0)
+			return -EINVAL;
+
+		vga_put(pdev, io_state);
+
+		if (io_state & VGA_RSRC_LEGACY_IO)
+			uc->io_cnt--;
+		if (io_state & VGA_RSRC_LEGACY_MEM)
+			uc->mem_cnt--;
+
+		ret_val = count;
+		goto done;
+	} else if (strncmp(curr_pos, "trylock ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+
+		pr_devel("client 0x%X called 'trylock'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		/* TODO: Add this?
+		   if (io_state == VGA_RSRC_NONE) {
+		   ret_val = -EPROTO;
+		   goto done;
+		   }
+		 */
+
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		if (vga_tryget(pdev, io_state)) {
+			/* Update the client's locks lists... */
+			for (i = 0; i < MAX_USER_CARDS; i++) {
+				if (priv->cards[i].pdev == pdev) {
+					if (io_state & VGA_RSRC_LEGACY_IO)
+						priv->cards[i].io_cnt++;
+					if (io_state & VGA_RSRC_LEGACY_MEM)
+						priv->cards[i].mem_cnt++;
+					break;
+				}
+			}
+			ret_val = count;
+			goto done;
+		} else {
+			ret_val = -EBUSY;
+			goto done;
+		}
+
+	} else if (strncmp(curr_pos, "target ", 7) == 0) {
+		unsigned int domain, bus, devfn;
+		struct vga_device *vgadev;
+
+		curr_pos += 7;
+		remaining -= 7;
+		pr_devel("client 0x%X called 'target'\n", (int)priv);
+		/* if target is default */
+		if (!strncmp(buf, "default", 7))
+			pdev = pci_dev_get(vga_default_device());
+		else {
+			if (!vga_pci_str_to_vars(curr_pos, remaining,
+						 &domain, &bus, &devfn)) {
+				ret_val = -EPROTO;
+				goto done;
+			}
+
+			pdev = pci_get_bus_and_slot(bus, devfn);
+			if (!pdev) {
+				pr_info("vgaarb: invalid PCI address!\n");
+				ret_val = -ENODEV;
+				goto done;
+			}
+		}
+
+		vgadev = vgadev_find(pdev);
+		if (vgadev == NULL) {
+			pr_info("vgaarb: this pci device is not a vga device\n");
+			pci_dev_put(pdev);
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		priv->target = pdev;
+		for (i = 0; i < MAX_USER_CARDS; i++) {
+			if (priv->cards[i].pdev == pdev)
+				break;
+			if (priv->cards[i].pdev == NULL) {
+				priv->cards[i].pdev = pdev;
+				priv->cards[i].io_cnt = 0;
+				priv->cards[i].mem_cnt = 0;
+				break;
+			}
+		}
+		if (i == MAX_USER_CARDS) {
+			pr_err("vgaarb: maximum user cards number reached!\n");
+			pci_dev_put(pdev);
+			/* XXX: which value to return? */
+			ret_val =  -ENOMEM;
+			goto done;
+		}
+
+		ret_val = count;
+		pci_dev_put(pdev);
+		goto done;
+
+
+	} else if (strncmp(curr_pos, "decodes ", 8) == 0) {
+		curr_pos += 8;
+		remaining -= 8;
+		pr_devel("vgaarb: client 0x%X called 'decodes'\n", (int)priv);
+
+		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
+			ret_val = -EPROTO;
+			goto done;
+		}
+		pdev = priv->target;
+		if (priv->target == NULL) {
+			ret_val = -ENODEV;
+			goto done;
+		}
+
+		__vga_set_legacy_decoding(pdev, io_state, true);
+		ret_val = count;
+		goto done;
+	}
+	/* If we got here, the message written is not part of the protocol! */
+	kfree(kbuf);
+	return -EPROTO;
+
+done:
+	kfree(kbuf);
+	return ret_val;
+}
+
+static unsigned int vga_arb_fpoll(struct file *file, poll_table * wait)
+{
+	struct vga_arb_private *priv = file->private_data;
+
+	pr_devel("%s\n", __func__);
+
+	if (priv == NULL)
+		return -ENODEV;
+	poll_wait(file, &vga_wait_queue, wait);
+	return POLLIN;
+}
+
+static int vga_arb_open(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv;
+	unsigned long flags;
+
+	pr_devel("%s\n", __func__);
+
+	priv = kmalloc(sizeof(struct vga_arb_private), GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+	memset(priv, 0, sizeof(*priv));
+	spin_lock_init(&priv->lock);
+	file->private_data = priv;
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_add(&priv->list, &vga_user_list);
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	/* Set the client' lists of locks */
+	priv->target = vga_default_device(); /* Maybe this is still null! */
+	priv->cards[0].pdev = priv->target;
+	priv->cards[0].io_cnt = 0;
+	priv->cards[0].mem_cnt = 0;
+
+
+	return 0;
+}
+
+static int vga_arb_release(struct inode *inode, struct file *file)
+{
+	struct vga_arb_private *priv = file->private_data;
+	struct vga_arb_user_card *uc;
+	unsigned long flags;
+	int i;
+
+	pr_devel("%s\n", __func__);
+
+	if (priv == NULL)
+		return -ENODEV;
+
+	spin_lock_irqsave(&vga_user_lock, flags);
+	list_del(&priv->list);
+	for (i = 0; i < MAX_USER_CARDS; i++) {
+		uc = &priv->cards[i];
+		if (uc->pdev == NULL)
+			continue;
+		pr_devel("uc->io_cnt == %d, uc->mem_cnt == %d\n",
+			 uc->io_cnt, uc->mem_cnt);
+		while (uc->io_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_IO);
+		while (uc->mem_cnt--)
+			vga_put(uc->pdev, VGA_RSRC_LEGACY_MEM);
+	}
+	spin_unlock_irqrestore(&vga_user_lock, flags);
+
+	kfree(priv);
+
+	return 0;
+}
+
+static void vga_arb_device_card_gone(struct pci_dev *pdev)
+{
+}
+
+/*
+ * callback any registered clients to let them know we have a
+ * change in VGA cards
+ */
+static void vga_arbiter_notify_clients(void)
+{
+	struct vga_device *vgadev;
+	unsigned long flags;
+	uint32_t new_decodes;
+	bool new_state;
+
+	if (!vga_arbiter_used)
+		return;
+
+	spin_lock_irqsave(&vga_lock, flags);
+	list_for_each_entry(vgadev, &vga_list, list) {
+		if (vga_count > 1)
+			new_state = false;
+		else
+			new_state = true;
+		if (vgadev->set_vga_decode) {
+			new_decodes = vgadev->set_vga_decode(vgadev->cookie, new_state);
+			vga_update_device_decodes(vgadev, new_decodes);
+		}
+	}
+	spin_unlock_irqrestore(&vga_lock, flags);
+}
+
+static int pci_notify(struct notifier_block *nb, unsigned long action,
+		      void *data)
+{
+	struct device *dev = data;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	bool notify = false;
+
+	pr_devel("%s\n", __func__);
+
+	/* For now we're only intereted in devices added and removed. I didn't
+	 * test this thing here, so someone needs to double check for the
+	 * cases of hotplugable vga cards. */
+	if (action == BUS_NOTIFY_ADD_DEVICE)
+		notify = vga_arbiter_add_pci_device(pdev);
+	else if (action == BUS_NOTIFY_DEL_DEVICE)
+		notify = vga_arbiter_del_pci_device(pdev);
+
+	if (notify)
+		vga_arbiter_notify_clients();
+	return 0;
+}
+
+static struct notifier_block pci_notifier = {
+	.notifier_call = pci_notify,
+};
+
+static const struct file_operations vga_arb_device_fops = {
+	.read = vga_arb_read,
+	.write = vga_arb_write,
+	.poll = vga_arb_fpoll,
+	.open = vga_arb_open,
+	.release = vga_arb_release,
+};
+
+static struct miscdevice vga_arb_device = {
+	MISC_DYNAMIC_MINOR, "vga_arbiter", &vga_arb_device_fops
+};
+
+static int __init vga_arb_device_init(void)
+{
+	int rc;
+	struct pci_dev *pdev;
+
+	rc = misc_register(&vga_arb_device);
+	if (rc < 0)
+		pr_err("vgaarb: error %d registering device\n", rc);
+
+	bus_register_notifier(&pci_bus_type, &pci_notifier);
+
+	/* We add all pci devices satisfying vga class in the arbiter by
+	 * default */
+	pdev = NULL;
+	while ((pdev =
+		pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+			       PCI_ANY_ID, pdev)) != NULL)
+		vga_arbiter_add_pci_device(pdev);
+
+	pr_info("vgaarb: loaded\n");
+	return rc;
+}
+subsys_initcall(vga_arb_device_init);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 7d55039ffa05..bd993351db45 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2520,6 +2520,50 @@ int pci_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type)
 	return 0;
 }
 
+/**
+ * pci_set_vga_state - set VGA decode state on device and parents if requested
+ * @dev the PCI device
+ * @decode - true = enable decoding, false = disable decoding
+ * @command_bits PCI_COMMAND_IO and/or PCI_COMMAND_MEMORY
+ * @change_bridge - traverse ancestors and change bridges
+ */
+int pci_set_vga_state(struct pci_dev *dev, bool decode,
+		      unsigned int command_bits, bool change_bridge)
+{
+	struct pci_bus *bus;
+	struct pci_dev *bridge;
+	u16 cmd;
+
+	WARN_ON(command_bits & ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+
+	pci_read_config_word(dev, PCI_COMMAND, &cmd);
+	if (decode == true)
+		cmd |= command_bits;
+	else
+		cmd &= ~command_bits;
+	pci_write_config_word(dev, PCI_COMMAND, cmd);
+
+	if (change_bridge == false)
+		return 0;
+
+	bus = dev->bus;
+	while (bus) {
+		bridge = bus->self;
+		if (bridge) {
+			pci_read_config_word(bridge, PCI_BRIDGE_CONTROL,
+					     &cmd);
+			if (decode == true)
+				cmd |= PCI_BRIDGE_CTL_VGA;
+			else
+				cmd &= ~PCI_BRIDGE_CTL_VGA;
+			pci_write_config_word(bridge, PCI_BRIDGE_CONTROL,
+					      cmd);
+		}
+		bus = bus->parent;
+	}
+	return 0;
+}
+
 #define RESOURCE_ALIGNMENT_PARAM_SIZE COMMAND_LINE_SIZE
 static char resource_alignment_param[RESOURCE_ALIGNMENT_PARAM_SIZE] = {0};
 spinlock_t resource_alignment_lock = SPIN_LOCK_UNLOCKED;
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 3b54b3940178..a0d9ee1df349 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -7,6 +7,8 @@ menu "Graphics support"
 
 source "drivers/char/agp/Kconfig"
 
+source "drivers/gpu/vga/Kconfig"
+
 source "drivers/gpu/drm/Kconfig"
 
 config VGASTATE
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a90f94020798..6dbb1fd30e5a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -806,6 +806,8 @@ int pci_cfg_space_size_ext(struct pci_dev *dev);
 int pci_cfg_space_size(struct pci_dev *dev);
 unsigned char pci_bus_max_busnr(struct pci_bus *bus);
 
+int pci_set_vga_state(struct pci_dev *pdev, bool decode,
+		      unsigned int command_bits, bool change_bridge);
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
 #include <linux/dmapool.h>
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
new file mode 100644
index 000000000000..68229ce80fde
--- /dev/null
+++ b/include/linux/vgaarb.h
@@ -0,0 +1,195 @@
+/*
+ * vgaarb.c
+ *
+ * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
+ * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
+ * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org>
+ */
+
+#ifndef LINUX_VGA_H
+
+#include <asm/vga.h>
+
+/* Legacy VGA regions */
+#define VGA_RSRC_NONE	       0x00
+#define VGA_RSRC_LEGACY_IO     0x01
+#define VGA_RSRC_LEGACY_MEM    0x02
+#define VGA_RSRC_LEGACY_MASK   (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM)
+/* Non-legacy access */
+#define VGA_RSRC_NORMAL_IO     0x04
+#define VGA_RSRC_NORMAL_MEM    0x08
+
+/* Passing that instead of a pci_dev to use the system "default"
+ * device, that is the one used by vgacon. Archs will probably
+ * have to provide their own vga_default_device();
+ */
+#define VGA_DEFAULT_DEVICE     (NULL)
+
+/* For use by clients */
+
+/**
+ *     vga_set_legacy_decoding
+ *
+ *     @pdev: pci device of the VGA card
+ *     @decodes: bit mask of what legacy regions the card decodes
+ *
+ *     Indicates to the arbiter if the card decodes legacy VGA IOs,
+ *     legacy VGA Memory, both, or none. All cards default to both,
+ *     the card driver (fbdev for example) should tell the arbiter
+ *     if it has disabled legacy decoding, so the card can be left
+ *     out of the arbitration process (and can be safe to take
+ *     interrupts at any time.
+ */
+extern void vga_set_legacy_decoding(struct pci_dev *pdev,
+									unsigned int decodes);
+
+/**
+ *     vga_get         - acquire & locks VGA resources
+ *
+ *     pdev: pci device of the VGA card or NULL for the system default
+ *     rsrc: bit mask of resources to acquire and lock
+ *     interruptible: blocking should be interruptible by signals ?
+ *
+ *     This function acquires VGA resources for the given
+ *     card and mark those resources locked. If the resource requested
+ *     are "normal" (and not legacy) resources, the arbiter will first check
+ *     wether the card is doing legacy decoding for that type of resource. If
+ *     yes, the lock is "converted" into a legacy resource lock.
+ *     The arbiter will first look for all VGA cards that might conflict
+ *     and disable their IOs and/or Memory access, inlcuding VGA forwarding
+ *     on P2P bridges if necessary, so that the requested resources can
+ *     be used. Then, the card is marked as locking these resources and
+ *     the IO and/or Memory accesse are enabled on the card (including
+ *     VGA forwarding on parent P2P bridges if any).
+ *     This function will block if some conflicting card is already locking
+ *     one of the required resources (or any resource on a different bus
+ *     segment, since P2P bridges don't differenciate VGA memory and IO
+ *     afaik). You can indicate wether this blocking should be interruptible
+ *     by a signal (for userland interface) or not.
+ *     Must not be called at interrupt time or in atomic context.
+ *     If the card already owns the resources, the function succeeds.
+ *     Nested calls are supported (a per-resource counter is maintained)
+ */
+
+extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
+											int interruptible);
+
+/**
+ *     vga_get_interruptible
+ *
+ *     Shortcut to vga_get
+ */
+
+static inline int vga_get_interruptible(struct pci_dev *pdev,
+										unsigned int rsrc)
+{
+       return vga_get(pdev, rsrc, 1);
+}
+
+/**
+ *     vga_get_interruptible
+ *
+ *     Shortcut to vga_get
+ */
+
+static inline int vga_get_uninterruptible(struct pci_dev *pdev,
+											unsigned int rsrc)
+{
+       return vga_get(pdev, rsrc, 0);
+}
+
+/**
+ *     vga_tryget      - try to acquire & lock legacy VGA resources
+ *
+ *     @pdev: pci devivce of VGA card or NULL for system default
+ *     @rsrc: bit mask of resources to acquire and lock
+ *
+ *     This function performs the same operation as vga_get(), but
+ *     will return an error (-EBUSY) instead of blocking if the resources
+ *     are already locked by another card. It can be called in any context
+ */
+
+extern int vga_tryget(struct pci_dev *pdev, unsigned int rsrc);
+
+/**
+ *     vga_put         - release lock on legacy VGA resources
+ *
+ *     @pdev: pci device of VGA card or NULL for system default
+ *     @rsrc: but mask of resource to release
+ *
+ *     This function releases resources previously locked by vga_get()
+ *     or vga_tryget(). The resources aren't disabled right away, so
+ *     that a subsequence vga_get() on the same card will succeed
+ *     immediately. Resources have a counter, so locks are only
+ *     released if the counter reaches 0.
+ */
+
+extern void vga_put(struct pci_dev *pdev, unsigned int rsrc);
+
+
+/**
+ *     vga_default_device
+ *
+ *     This can be defined by the platform. The default implementation
+ *     is rather dumb and will probably only work properly on single
+ *     vga card setups and/or x86 platforms.
+ *
+ *     If your VGA default device is not PCI, you'll have to return
+ *     NULL here. In this case, I assume it will not conflict with
+ *     any PCI card. If this is not true, I'll have to define two archs
+ *     hooks for enabling/disabling the VGA default device if that is
+ *     possible. This may be a problem with real _ISA_ VGA cards, in
+ *     addition to a PCI one. I don't know at this point how to deal
+ *     with that card. Can theirs IOs be disabled at all ? If not, then
+ *     I suppose it's a matter of having the proper arch hook telling
+ *     us about it, so we basically never allow anybody to succeed a
+ *     vga_get()...
+ */
+
+#ifndef __ARCH_HAS_VGA_DEFAULT_DEVICE
+extern struct pci_dev *vga_default_device(void);
+#endif
+
+/**
+ *     vga_conflicts
+ *
+ *     Architectures should define this if they have several
+ *     independant PCI domains that can afford concurrent VGA
+ *     decoding
+ */
+
+#ifndef __ARCH_HAS_VGA_CONFLICT
+static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
+{
+       return 1;
+}
+#endif
+
+/*
+ * Register a client with the VGA arbitration logic
+ * return value: number of VGA devices in system.
+ *
+ * Clients have two callback mechanisms they can use.
+ * irq enable/disable callback -
+ *    If a client can't disable its GPUs VGA resources, then we
+ *    need to be able to ask it to turn off its irqs when we
+ *    turn off its mem and io decoding.
+ * set_vga_decode
+ *    If a client can disable its GPU VGA resource, it will
+ *    get a callback from this to set the encode/decode state
+ *
+ * Clients with disable abilities should check the return value
+ * of this function and if the VGA device count is > 1, should
+ * disable VGA decoding resources.
+ *
+ * Rationale: we cannot disable VGA decode resources unconditionally
+ * some single GPU laptops seem to require ACPI or BIOS access to the
+ * VGA registers to control things like backlights etc.
+ * Hopefully newer multi-GPU laptops do something saner, and desktops
+ * won't have any special ACPI for this.
+ */
+int vga_client_register(struct pci_dev *pdev, void *cookie,
+			void (*irq_set_state)(void *cookie, bool state),
+			unsigned int (*set_vga_decode)(void *cookie, bool state));
+
+#endif /* LINUX_VGA_H */
-- 
cgit v1.2.3


From 260d703adc5f275e3ba7ddff6e2e0217bc613b35 Mon Sep 17 00:00:00 2001
From: Mike Mason <mmlnx@us.ibm.com>
Date: Thu, 30 Jul 2009 15:33:21 -0700
Subject: PCI: support for PCI Express fundamental reset

This is the first of three patches that implement a bit field that PCI
Express device drivers can use to indicate they need a fundamental reset
during error recovery.

By default, the EEH framework on powerpc does what's known as a "hot
reset" during recovery of a PCI Express device.  We've found a case
where the device needs a "fundamental reset" to recover properly.  The
current PCI error recovery and EEH frameworks do not support this
distinction.

The attached patch (courtesy of Richard Lary) adds a bit field to
pci_dev that indicates whether the device requires a fundamental reset
during recovery.

These patches supersede the previously submitted patch that implemented
a fundamental reset bit field.

Signed-off-by: Mike Mason <mmlnx@us.ibm.com>
Signed-off-by: Richard Lary <rlary@us.ibm.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/pci.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6dbb1fd30e5a..da90217a7b0e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -273,6 +273,7 @@ struct pci_dev {
 	unsigned int	ari_enabled:1;	/* ARI forwarding */
 	unsigned int	is_managed:1;
 	unsigned int	is_pcie:1;
+	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 	unsigned int	state_saved:1;
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
-- 
cgit v1.2.3


From 6ac3bd527007eeecb148b67ca47b21731fd8a503 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 19 Aug 2009 15:21:57 +1000
Subject: PCI/vgaarb: cleanup some warnings + cleanup some comments.

Fix some warnings reported in linux-next + also cleanup some
comment errors noticed by Pekka Paalanen.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/vga/vgaarb.c | 11 +++++------
 include/linux/vgaarb.h   | 49 ++++++++++++++++++++++++++----------------------
 2 files changed, 32 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 199138f241e0..1ac0c93603c9 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -609,7 +609,6 @@ void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes)
 }
 EXPORT_SYMBOL(vga_set_legacy_decoding);
 
-/* return number of active VGA devices */
 /* call with NULL to unregister */
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
@@ -831,7 +830,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 5;
 		remaining -= 5;
 
-		pr_devel("client 0x%X called 'lock'\n", (int)priv);
+		pr_devel("client 0x%p called 'lock'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
@@ -867,7 +866,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 7;
 		remaining -= 7;
 
-		pr_devel("client 0x%X called 'unlock'\n", (int)priv);
+		pr_devel("client 0x%p called 'unlock'\n", priv);
 
 		if (strncmp(curr_pos, "all", 3) == 0)
 			io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
@@ -917,7 +916,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 		curr_pos += 8;
 		remaining -= 8;
 
-		pr_devel("client 0x%X called 'trylock'\n", (int)priv);
+		pr_devel("client 0x%p called 'trylock'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
@@ -960,7 +959,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 
 		curr_pos += 7;
 		remaining -= 7;
-		pr_devel("client 0x%X called 'target'\n", (int)priv);
+		pr_devel("client 0x%p called 'target'\n", priv);
 		/* if target is default */
 		if (!strncmp(buf, "default", 7))
 			pdev = pci_dev_get(vga_default_device());
@@ -1014,7 +1013,7 @@ static ssize_t vga_arb_write(struct file *file, const char __user * buf,
 	} else if (strncmp(curr_pos, "decodes ", 8) == 0) {
 		curr_pos += 8;
 		remaining -= 8;
-		pr_devel("vgaarb: client 0x%X called 'decodes'\n", (int)priv);
+		pr_devel("vgaarb: client 0x%p called 'decodes'\n", priv);
 
 		if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) {
 			ret_val = -EPROTO;
diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index 68229ce80fde..e81c64af80c1 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -46,9 +46,9 @@ extern void vga_set_legacy_decoding(struct pci_dev *pdev,
 /**
  *     vga_get         - acquire & locks VGA resources
  *
- *     pdev: pci device of the VGA card or NULL for the system default
- *     rsrc: bit mask of resources to acquire and lock
- *     interruptible: blocking should be interruptible by signals ?
+ *     @pdev: pci device of the VGA card or NULL for the system default
+ *     @rsrc: bit mask of resources to acquire and lock
+ *     @interruptible: blocking should be interruptible by signals ?
  *
  *     This function acquires VGA resources for the given
  *     card and mark those resources locked. If the resource requested
@@ -81,19 +81,19 @@ extern int vga_get(struct pci_dev *pdev, unsigned int rsrc,
  */
 
 static inline int vga_get_interruptible(struct pci_dev *pdev,
-										unsigned int rsrc)
+					unsigned int rsrc)
 {
        return vga_get(pdev, rsrc, 1);
 }
 
 /**
- *     vga_get_interruptible
+ *     vga_get_uninterruptible
  *
  *     Shortcut to vga_get
  */
 
 static inline int vga_get_uninterruptible(struct pci_dev *pdev,
-											unsigned int rsrc)
+					  unsigned int rsrc)
 {
        return vga_get(pdev, rsrc, 0);
 }
@@ -165,28 +165,33 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
 }
 #endif
 
-/*
- * Register a client with the VGA arbitration logic
- * return value: number of VGA devices in system.
- *
- * Clients have two callback mechanisms they can use.
- * irq enable/disable callback -
- *    If a client can't disable its GPUs VGA resources, then we
- *    need to be able to ask it to turn off its irqs when we
- *    turn off its mem and io decoding.
- * set_vga_decode
- *    If a client can disable its GPU VGA resource, it will
- *    get a callback from this to set the encode/decode state
- *
- * Clients with disable abilities should check the return value
- * of this function and if the VGA device count is > 1, should
- * disable VGA decoding resources.
+/**
+ *	vga_client_register
+ *
+ *	@pdev: pci device of the VGA client
+ *	@cookie: client cookie to be used in callbacks
+ *	@irq_set_state: irq state change callback
+ *	@set_vga_decode: vga decode change callback
+ *
+ * 	return value: 0 on success, -1 on failure
+ * 	Register a client with the VGA arbitration logic
+ *
+ *	Clients have two callback mechanisms they can use.
+ *	irq enable/disable callback -
+ *		If a client can't disable its GPUs VGA resources, then we
+ *		need to be able to ask it to turn off its irqs when we
+ *		turn off its mem and io decoding.
+ *	set_vga_decode
+ *		If a client can disable its GPU VGA resource, it will
+ *		get a callback from this to set the encode/decode state
  *
  * Rationale: we cannot disable VGA decode resources unconditionally
  * some single GPU laptops seem to require ACPI or BIOS access to the
  * VGA registers to control things like backlights etc.
  * Hopefully newer multi-GPU laptops do something saner, and desktops
  * won't have any special ACPI for this.
+ * They driver will get a callback when VGA arbitration is first used
+ * by userspace since we some older X servers have issues.
  */
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
-- 
cgit v1.2.3


From 825c423a35a80a8fd66398a3f9bde7f0b0187a76 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Wed, 29 Jul 2009 14:39:58 +0900
Subject: PCI hotplug: add support for 5.0G link speed

Add support for PCI-E 5.0 GT/s in max_bus_speed and cur_bus_speed.

Reviewed-by: Matthew Wilcox <matthew@wil.cx>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/pci_hotplug_core.c |  1 +
 drivers/pci/hotplug/pciehp_hpc.c       | 10 ++++++++--
 include/linux/pci_hotplug.h            |  3 ++-
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 9d44669bf098..0325d989bb46 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -87,6 +87,7 @@ static char *pci_bus_speed_strings[] = {
 	"100 MHz PCIX 533",	/* 0x12 */
 	"133 MHz PCIX 533",	/* 0x13 */
 	"2.5 GT/s PCI-E",	/* 0x14 */
+	"5.0 GT/s PCI-E",	/* 0x15 */
 };
 
 #ifdef CONFIG_HOTPLUG_PCI_CPCI
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 52813257e5bf..271f917b6f2c 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -693,7 +693,10 @@ static int hpc_get_max_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_cap & 0x000F) {
 	case 1:
-		lnk_speed = PCIE_2PT5GB;
+		lnk_speed = PCIE_2_5GB;
+		break;
+	case 2:
+		lnk_speed = PCIE_5_0GB;
 		break;
 	default:
 		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
@@ -772,7 +775,10 @@ static int hpc_get_cur_lnk_speed(struct slot *slot, enum pci_bus_speed *value)
 
 	switch (lnk_status & PCI_EXP_LNKSTA_CLS) {
 	case 1:
-		lnk_speed = PCIE_2PT5GB;
+		lnk_speed = PCIE_2_5GB;
+		break;
+	case 2:
+		lnk_speed = PCIE_5_0GB;
 		break;
 	default:
 		lnk_speed = PCIE_LNK_SPEED_UNKNOWN;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 4391741b99dc..1b00cc3177fc 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -62,7 +62,8 @@ enum pcie_link_width {
 };
 
 enum pcie_link_speed {
-	PCIE_2PT5GB		= 0x14,
+	PCIE_2_5GB		= 0x14,
+	PCIE_5_0GB		= 0x15,
 	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
 };
 
-- 
cgit v1.2.3


From 9dba910e9de2c4aa15ec1286f10052c107ef48ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 3 Sep 2009 15:26:36 +0900
Subject: PCI: separate out pci_add_dynid()

Separate out pci_add_dynid() from store_new_id() and export it so that
in-kernel code can add PCI IDs dynamically.  As the function will be
available regardless of HOTPLUG, put it and pull pci_free_dynids()
outside of CONFIG_HOTPLUG.

This will be used by pci-stub to initialize initial IDs via module
param.

While at it, remove bogus get_driver() failure check.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Reviewed-by: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci-driver.c | 119 +++++++++++++++++++++++++++++------------------
 include/linux/pci.h      |   5 ++
 2 files changed, 79 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index f99bc7f089f1..c66dc4341fa0 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -19,37 +19,98 @@
 #include <linux/cpu.h>
 #include "pci.h"
 
-/*
- * Dynamic device IDs are disabled for !CONFIG_HOTPLUG
- */
-
 struct pci_dynid {
 	struct list_head node;
 	struct pci_device_id id;
 };
 
-#ifdef CONFIG_HOTPLUG
+/**
+ * pci_add_dynid - add a new PCI device ID to this driver and re-probe devices
+ * @drv: target pci driver
+ * @vendor: PCI vendor ID
+ * @device: PCI device ID
+ * @subvendor: PCI subvendor ID
+ * @subdevice: PCI subdevice ID
+ * @class: PCI class
+ * @class_mask: PCI class mask
+ * @driver_data: private driver data
+ *
+ * Adds a new dynamic pci device ID to this driver and causes the
+ * driver to probe for all devices again.  @drv must have been
+ * registered prior to calling this function.
+ *
+ * CONTEXT:
+ * Does GFP_KERNEL allocation.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int pci_add_dynid(struct pci_driver *drv,
+		  unsigned int vendor, unsigned int device,
+		  unsigned int subvendor, unsigned int subdevice,
+		  unsigned int class, unsigned int class_mask,
+		  unsigned long driver_data)
+{
+	struct pci_dynid *dynid;
+	int retval;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	dynid->id.vendor = vendor;
+	dynid->id.device = device;
+	dynid->id.subvendor = subvendor;
+	dynid->id.subdevice = subdevice;
+	dynid->id.class = class;
+	dynid->id.class_mask = class_mask;
+	dynid->id.driver_data = driver_data;
 
+	spin_lock(&drv->dynids.lock);
+	list_add_tail(&dynid->node, &drv->dynids.list);
+	spin_unlock(&drv->dynids.lock);
+
+	get_driver(&drv->driver);
+	retval = driver_attach(&drv->driver);
+	put_driver(&drv->driver);
+
+	return retval;
+}
+
+static void pci_free_dynids(struct pci_driver *drv)
+{
+	struct pci_dynid *dynid, *n;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
+		list_del(&dynid->node);
+		kfree(dynid);
+	}
+	spin_unlock(&drv->dynids.lock);
+}
+
+/*
+ * Dynamic device ID manipulation via sysfs is disabled for !CONFIG_HOTPLUG
+ */
+#ifdef CONFIG_HOTPLUG
 /**
- * store_new_id - add a new PCI device ID to this driver and re-probe devices
+ * store_new_id - sysfs frontend to pci_add_dynid()
  * @driver: target device driver
  * @buf: buffer for scanning device ID data
  * @count: input size
  *
- * Adds a new dynamic pci device ID to this driver,
- * and causes the driver to probe for all devices again.
+ * Allow PCI IDs to be added to an existing driver via sysfs.
  */
 static ssize_t
 store_new_id(struct device_driver *driver, const char *buf, size_t count)
 {
-	struct pci_dynid *dynid;
 	struct pci_driver *pdrv = to_pci_driver(driver);
 	const struct pci_device_id *ids = pdrv->id_table;
 	__u32 vendor, device, subvendor=PCI_ANY_ID,
 		subdevice=PCI_ANY_ID, class=0, class_mask=0;
 	unsigned long driver_data=0;
 	int fields=0;
-	int retval=0;
+	int retval;
 
 	fields = sscanf(buf, "%x %x %x %x %x %x %lx",
 			&vendor, &device, &subvendor, &subdevice,
@@ -72,27 +133,8 @@ store_new_id(struct device_driver *driver, const char *buf, size_t count)
 			return retval;
 	}
 
-	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
-	if (!dynid)
-		return -ENOMEM;
-
-	dynid->id.vendor = vendor;
-	dynid->id.device = device;
-	dynid->id.subvendor = subvendor;
-	dynid->id.subdevice = subdevice;
-	dynid->id.class = class;
-	dynid->id.class_mask = class_mask;
-	dynid->id.driver_data = driver_data;
-
-	spin_lock(&pdrv->dynids.lock);
-	list_add_tail(&dynid->node, &pdrv->dynids.list);
-	spin_unlock(&pdrv->dynids.lock);
-
-	if (get_driver(&pdrv->driver)) {
-		retval = driver_attach(&pdrv->driver);
-		put_driver(&pdrv->driver);
-	}
-
+	retval = pci_add_dynid(pdrv, vendor, device, subvendor, subdevice,
+			       class, class_mask, driver_data);
 	if (retval)
 		return retval;
 	return count;
@@ -145,19 +187,6 @@ store_remove_id(struct device_driver *driver, const char *buf, size_t count)
 }
 static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
 
-static void
-pci_free_dynids(struct pci_driver *drv)
-{
-	struct pci_dynid *dynid, *n;
-
-	spin_lock(&drv->dynids.lock);
-	list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) {
-		list_del(&dynid->node);
-		kfree(dynid);
-	}
-	spin_unlock(&drv->dynids.lock);
-}
-
 static int
 pci_create_newid_file(struct pci_driver *drv)
 {
@@ -186,7 +215,6 @@ static void pci_remove_removeid_file(struct pci_driver *drv)
 	driver_remove_file(&drv->driver, &driver_attr_remove_id);
 }
 #else /* !CONFIG_HOTPLUG */
-static inline void pci_free_dynids(struct pci_driver *drv) {}
 static inline int pci_create_newid_file(struct pci_driver *drv)
 {
 	return 0;
@@ -1106,6 +1134,7 @@ static int __init pci_driver_init(void)
 
 postcore_initcall(pci_driver_init);
 
+EXPORT_SYMBOL_GPL(pci_add_dynid);
 EXPORT_SYMBOL(pci_match_id);
 EXPORT_SYMBOL(__pci_register_driver);
 EXPORT_SYMBOL(pci_unregister_driver);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index da90217a7b0e..d75668317705 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -796,6 +796,11 @@ int __must_check __pci_register_driver(struct pci_driver *, struct module *,
 void pci_unregister_driver(struct pci_driver *dev);
 void pci_remove_behind_bridge(struct pci_dev *dev);
 struct pci_driver *pci_dev_driver(const struct pci_dev *dev);
+int pci_add_dynid(struct pci_driver *drv,
+		  unsigned int vendor, unsigned int device,
+		  unsigned int subvendor, unsigned int subdevice,
+		  unsigned int class, unsigned int class_mask,
+		  unsigned long driver_data);
 const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
 					 struct pci_dev *dev);
 int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
-- 
cgit v1.2.3


From 28760489a3f1e136c5ae8581c0fa8f63511f2f4c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@aristanetworks.com>
Date: Wed, 9 Sep 2009 14:09:24 -0700
Subject: PCI: pcie: Ensure hotplug ports have a minimum number of resources

In general a BIOS may goof or we may hotplug in a hotplug controller.
In either case the kernel needs to reserve resources for plugging
in more devices in the future instead of creating a minimal resource
assignment.

We already do this for cardbus bridges I am just adding a variant
for pcie bridges.

v2: Make testing for pcie hotplug bridges based on a flag.

    So far we only set the flag for pcie but a header_quirk
    could easily be added for the non-standard pci hotplug
    bridges.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c       | 10 ++++++++++
 drivers/pci/probe.c     | 18 ++++++++++++++++++
 drivers/pci/setup-bus.c | 22 +++++++++++++++++-----
 include/linux/pci.h     |  4 ++++
 4 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index bd993351db45..8c663d628d03 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -41,6 +41,12 @@ int pci_domains_supported = 1;
 unsigned long pci_cardbus_io_size = DEFAULT_CARDBUS_IO_SIZE;
 unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE;
 
+#define DEFAULT_HOTPLUG_IO_SIZE		(256)
+#define DEFAULT_HOTPLUG_MEM_SIZE	(2*1024*1024)
+/* pci=hpmemsize=nnM,hpiosize=nn can override this */
+unsigned long pci_hotplug_io_size  = DEFAULT_HOTPLUG_IO_SIZE;
+unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE;
+
 /**
  * pci_bus_max_busnr - returns maximum PCI bus number of given bus' children
  * @bus: pointer to PCI bus structure to search
@@ -2732,6 +2738,10 @@ static int __init pci_setup(char *str)
 							strlen(str + 19));
 			} else if (!strncmp(str, "ecrc=", 5)) {
 				pcie_ecrc_get_policy(str + 5);
+			} else if (!strncmp(str, "hpiosize=", 9)) {
+				pci_hotplug_io_size = memparse(str + 9, &str);
+			} else if (!strncmp(str, "hpmemsize=", 10)) {
+				pci_hotplug_mem_size = memparse(str + 10, &str);
 			} else {
 				printk(KERN_ERR "PCI: Unknown option `%s'\n",
 						str);
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index ab52840f4753..882383b61d30 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -697,6 +697,23 @@ static void set_pcie_port_type(struct pci_dev *pdev)
 	pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4;
 }
 
+static void set_pcie_hotplug_bridge(struct pci_dev *pdev)
+{
+	int pos;
+	u16 reg16;
+	u32 reg32;
+
+	pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+	pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, &reg16);
+	if (!(reg16 & PCI_EXP_FLAGS_SLOT))
+		return;
+	pci_read_config_dword(pdev, pos + PCI_EXP_SLTCAP, &reg32);
+	if (reg32 & PCI_EXP_SLTCAP_HPC)
+		pdev->is_hotplug_bridge = 1;
+}
+
 #define LEGACY_IO_RESOURCE	(IORESOURCE_IO | IORESOURCE_PCI_FIXED)
 
 /**
@@ -804,6 +821,7 @@ int pci_setup_device(struct pci_dev *dev)
 		pci_read_irq(dev);
 		dev->transparent = ((dev->class & 0xff) == 1);
 		pci_read_bases(dev, 2, PCI_ROM_ADDRESS1);
+		set_pcie_hotplug_bridge(dev);
 		break;
 
 	case PCI_HEADER_TYPE_CARDBUS:		    /* CardBus bridge header */
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index 7c443b4583ab..cb1a027eb552 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -309,7 +309,7 @@ static struct resource *find_free_bus_resource(struct pci_bus *bus, unsigned lon
    since these windows have 4K granularity and the IO ranges
    of non-bridge PCI devices are limited to 256 bytes.
    We must be careful with the ISA aliasing though. */
-static void pbus_size_io(struct pci_bus *bus)
+static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size)
 {
 	struct pci_dev *dev;
 	struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO);
@@ -336,6 +336,8 @@ static void pbus_size_io(struct pci_bus *bus)
 				size1 += r_size;
 		}
 	}
+	if (size < min_size)
+		size = min_size;
 /* To be fixed in 2.5: we should have sort of HAVE_ISA
    flag in the struct pci_bus. */
 #if defined(CONFIG_ISA) || defined(CONFIG_EISA)
@@ -354,7 +356,8 @@ static void pbus_size_io(struct pci_bus *bus)
 
 /* Calculate the size of the bus and minimal alignment which
    guarantees that all child resources fit in this size. */
-static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type)
+static int pbus_size_mem(struct pci_bus *bus, unsigned long mask,
+			 unsigned long type, resource_size_t min_size)
 {
 	struct pci_dev *dev;
 	resource_size_t min_align, align, size;
@@ -404,6 +407,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long
 			mem64_mask &= r->flags & IORESOURCE_MEM_64;
 		}
 	}
+	if (size < min_size)
+		size = min_size;
 
 	align = 0;
 	min_align = 0;
@@ -483,6 +488,7 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
 	unsigned long mask, prefmask;
+	resource_size_t min_mem_size = 0, min_io_size = 0;
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct pci_bus *b = dev->subordinate;
@@ -512,8 +518,12 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 
 	case PCI_CLASS_BRIDGE_PCI:
 		pci_bridge_check_ranges(bus);
+		if (bus->self->is_hotplug_bridge) {
+			min_io_size  = pci_hotplug_io_size;
+			min_mem_size = pci_hotplug_mem_size;
+		}
 	default:
-		pbus_size_io(bus);
+		pbus_size_io(bus, min_io_size);
 		/* If the bridge supports prefetchable range, size it
 		   separately. If it doesn't, or its prefetchable window
 		   has already been allocated by arch code, try
@@ -521,9 +531,11 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus)
 		   resources. */
 		mask = IORESOURCE_MEM;
 		prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH;
-		if (pbus_size_mem(bus, prefmask, prefmask))
+		if (pbus_size_mem(bus, prefmask, prefmask, min_mem_size))
 			mask = prefmask; /* Success, size non-prefetch only. */
-		pbus_size_mem(bus, mask, IORESOURCE_MEM);
+		else
+			min_mem_size += min_mem_size;
+		pbus_size_mem(bus, mask, IORESOURCE_MEM, min_mem_size);
 		break;
 	}
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d75668317705..d80ed872a3c4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -278,6 +278,7 @@ struct pci_dev {
 	unsigned int	is_physfn:1;
 	unsigned int	is_virtfn:1;
 	unsigned int	reset_fn:1;
+	unsigned int    is_hotplug_bridge:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
@@ -1245,6 +1246,9 @@ extern int pci_pci_problems;
 extern unsigned long pci_cardbus_io_size;
 extern unsigned long pci_cardbus_mem_size;
 
+extern unsigned long pci_hotplug_io_size;
+extern unsigned long pci_hotplug_mem_size;
+
 int pcibios_add_platform_entries(struct pci_dev *dev);
 void pcibios_disable_device(struct pci_dev *dev);
 int pcibios_set_pcie_reset_state(struct pci_dev *dev,
-- 
cgit v1.2.3


From e80bb09d2c73d76a2a4cd79e4a83802dd901c642 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Sep 2009 23:14:49 +0200
Subject: PCI PM: Introduce device flag wakeup_prepared

Introduce a new PCI device flag, wakeup_prepared, to prevent PCI
wake-up preparation code from being executed twice in a row for the
same device and for the same purpose.

Reviewed-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/pci.c   | 8 ++++++++
 include/linux/pci.h | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 84a6d6d30c5a..dcdfb2212ca3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1225,6 +1225,10 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 	if (enable && !device_may_wakeup(&dev->dev))
 		return -EINVAL;
 
+	/* Don't do the same thing twice in a row for one device. */
+	if (!!enable == !!dev->wakeup_prepared)
+		return 0;
+
 	/*
 	 * According to "PCI System Architecture" 4th ed. by Tom Shanley & Don
 	 * Anderson we should be doing PME# wake enable followed by ACPI wake
@@ -1241,9 +1245,12 @@ int pci_enable_wake(struct pci_dev *dev, pci_power_t state, bool enable)
 		error = platform_pci_sleep_wake(dev, true);
 		if (ret)
 			ret = error;
+		if (!ret)
+			dev->wakeup_prepared = true;
 	} else {
 		platform_pci_sleep_wake(dev, false);
 		pci_pme_active(dev, false);
+		dev->wakeup_prepared = false;
 	}
 
 	return ret;
@@ -1365,6 +1372,7 @@ void pci_pm_init(struct pci_dev *dev)
 	int pm;
 	u16 pmc;
 
+	dev->wakeup_prepared = false;
 	dev->pm_cap = 0;
 
 	/* find PCI PM capability in list */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d80ed872a3c4..f5c7cd343e56 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -241,6 +241,7 @@ struct pci_dev {
 	unsigned int	d1_support:1;	/* Low power state D1 is supported */
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
+	unsigned int	wakeup_prepared:1;
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3


From 9b83ccd2f14f647936dcfbf4a9a20c501007dd69 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Sep 2009 23:15:31 +0200
Subject: ACPI PM: Replace wakeup.prepared with reference counter

The wakeup.prepared flag is used for marking devices that have the
wake-up power already enabled, so that the wake-up power is not
enabled twice in a row for the same device.  This assumes, however,
that device wake-up power will only be enabled once, while the device
is being prepared for a system-wide sleep transition, and the second
attempt is made by acpi_enable_wakeup_device_prep().

With the upcoming PCI wake-up rework this assumption will not hold
any more for PCI bridges and the root bridge whose wake-up power
may be enabled as a result of wake-up enable propagation from other
devices (eg. add-on devices that are not associated with any GPEs).
Thus, there may be many attempts to enable wake-up power on a PCI
bridge or the root bridge during a system power state transition
and it's better to replace wakeup.prepared with a reference counter.

Reviewed-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/acpi/power.c    | 58 +++++++++++++++++++++++++++++--------------------
 drivers/acpi/scan.c     |  1 +
 drivers/acpi/wakeup.c   |  4 ++--
 include/acpi/acpi_bus.h |  2 +-
 4 files changed, 39 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c
index d74365d4a6e7..5a09bf392ec1 100644
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -44,6 +44,8 @@
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
+#include "sleep.h"
+
 #define _COMPONENT			ACPI_POWER_COMPONENT
 ACPI_MODULE_NAME("power");
 #define ACPI_POWER_CLASS		"power_resource"
@@ -361,17 +363,15 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
  */
 int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 {
-	int i, err;
+	int i, err = 0;
 
 	if (!dev || !dev->wakeup.flags.valid)
 		return -EINVAL;
 
-	/*
-	 * Do not execute the code below twice in a row without calling
-	 * acpi_disable_wakeup_device_power() in between for the same device
-	 */
-	if (dev->wakeup.flags.prepared)
-		return 0;
+	mutex_lock(&acpi_device_lock);
+
+	if (dev->wakeup.prepare_count++)
+		goto out;
 
 	/* Open power resource */
 	for (i = 0; i < dev->wakeup.resources.count; i++) {
@@ -379,7 +379,8 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 		if (ret) {
 			printk(KERN_ERR PREFIX "Transition power state\n");
 			dev->wakeup.flags.valid = 0;
-			return -ENODEV;
+			err = -ENODEV;
+			goto err_out;
 		}
 	}
 
@@ -388,9 +389,13 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
 	 * in arbitrary power state afterwards.
 	 */
 	err = acpi_device_sleep_wake(dev, 1, sleep_state, 3);
-	if (!err)
-		dev->wakeup.flags.prepared = 1;
 
+ err_out:
+	if (err)
+		dev->wakeup.prepare_count = 0;
+
+ out:
+	mutex_unlock(&acpi_device_lock);
 	return err;
 }
 
@@ -402,35 +407,42 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int sleep_state)
  */
 int acpi_disable_wakeup_device_power(struct acpi_device *dev)
 {
-	int i, ret;
+	int i, err = 0;
 
 	if (!dev || !dev->wakeup.flags.valid)
 		return -EINVAL;
 
+	mutex_lock(&acpi_device_lock);
+
+	if (--dev->wakeup.prepare_count > 0)
+		goto out;
+
 	/*
-	 * Do not execute the code below twice in a row without calling
-	 * acpi_enable_wakeup_device_power() in between for the same device
+	 * Executing the code below even if prepare_count is already zero when
+	 * the function is called may be useful, for example for initialisation.
 	 */
-	if (!dev->wakeup.flags.prepared)
-		return 0;
+	if (dev->wakeup.prepare_count < 0)
+		dev->wakeup.prepare_count = 0;
 
-	dev->wakeup.flags.prepared = 0;
-
-	ret = acpi_device_sleep_wake(dev, 0, 0, 0);
-	if (ret)
-		return ret;
+	err = acpi_device_sleep_wake(dev, 0, 0, 0);
+	if (err)
+		goto out;
 
 	/* Close power resource */
 	for (i = 0; i < dev->wakeup.resources.count; i++) {
-		ret = acpi_power_off_device(dev->wakeup.resources.handles[i], dev);
+		int ret = acpi_power_off_device(
+				dev->wakeup.resources.handles[i], dev);
 		if (ret) {
 			printk(KERN_ERR PREFIX "Transition power state\n");
 			dev->wakeup.flags.valid = 0;
-			return -ENODEV;
+			err = -ENODEV;
+			goto out;
 		}
 	}
 
-	return ret;
+ out:
+	mutex_unlock(&acpi_device_lock);
+	return err;
 }
 
 /* --------------------------------------------------------------------------
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 781435d7e369..318b1ea7a5bf 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -781,6 +781,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 	kfree(buffer.pointer);
 
 	device->wakeup.flags.valid = 1;
+	device->wakeup.prepare_count = 0;
 	/* Call _PSW/_DSW object to disable its ability to wake the sleeping
 	 * system for the ACPI device with the _PRW object.
 	 * The _PSW object is depreciated in ACPI 3.0 and is replaced by _DSW.
diff --git a/drivers/acpi/wakeup.c b/drivers/acpi/wakeup.c
index 88725dcdf8bc..e0ee0c036f5a 100644
--- a/drivers/acpi/wakeup.c
+++ b/drivers/acpi/wakeup.c
@@ -68,7 +68,7 @@ void acpi_enable_wakeup_device(u8 sleep_state)
 		/* If users want to disable run-wake GPE,
 		 * we only disable it for wake and leave it for runtime
 		 */
-		if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+		if ((!dev->wakeup.state.enabled && !dev->wakeup.prepare_count)
 		    || sleep_state > (u32) dev->wakeup.sleep_state) {
 			if (dev->wakeup.flags.run_wake) {
 				/* set_gpe_type will disable GPE, leave it like that */
@@ -100,7 +100,7 @@ void acpi_disable_wakeup_device(u8 sleep_state)
 		if (!dev->wakeup.flags.valid)
 			continue;
 
-		if ((!dev->wakeup.state.enabled && !dev->wakeup.flags.prepared)
+		if ((!dev->wakeup.state.enabled && !dev->wakeup.prepare_count)
 		    || sleep_state > (u32) dev->wakeup.sleep_state) {
 			if (dev->wakeup.flags.run_wake) {
 				acpi_set_gpe_type(dev->wakeup.gpe_device,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c3ace75d57b4..1fa3ffb7c93b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -248,7 +248,6 @@ struct acpi_device_perf {
 /* Wakeup Management */
 struct acpi_device_wakeup_flags {
 	u8 valid:1;		/* Can successfully enable wakeup? */
-	u8 prepared:1;		/* Has the wake-up capability been enabled? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
 };
 
@@ -263,6 +262,7 @@ struct acpi_device_wakeup {
 	struct acpi_handle_list resources;
 	struct acpi_device_wakeup_state state;
 	struct acpi_device_wakeup_flags flags;
+	int prepare_count;
 };
 
 /* Device */
-- 
cgit v1.2.3


From b1ab7e4b2a88d3ac13771463be8f302ce1616cfc Mon Sep 17 00:00:00 2001
From: "David P. Quigley" <dpquigl@tycho.nsa.gov>
Date: Thu, 3 Sep 2009 14:25:56 -0400
Subject: VFS: Factor out part of vfs_setxattr so it can be called from the
 SELinux hook for inode_setsecctx.

This factors out the part of the vfs_setxattr function that performs the
setting of the xattr and its notification. This is needed so the SELinux
implementation of inode_setsecctx can handle the setting of the xattr while
maintaining the proper separation of layers.

Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/xattr.c            | 55 +++++++++++++++++++++++++++++++++++++++------------
 include/linux/xattr.h |  1 +
 2 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/xattr.c b/fs/xattr.c
index 1c3d0af59ddf..6d4f6d3449fb 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -66,22 +66,28 @@ xattr_permission(struct inode *inode, const char *name, int mask)
 	return inode_permission(inode, mask);
 }
 
-int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
-		size_t size, int flags)
+/**
+ *  __vfs_setxattr_noperm - perform setxattr operation without performing
+ *  permission checks.
+ *
+ *  @dentry - object to perform setxattr on
+ *  @name - xattr name to set
+ *  @value - value to set @name to
+ *  @size - size of @value
+ *  @flags - flags to pass into filesystem operations
+ *
+ *  returns the result of the internal setxattr or setsecurity operations.
+ *
+ *  This function requires the caller to lock the inode's i_mutex before it
+ *  is executed. It also assumes that the caller will make the appropriate
+ *  permission checks.
+ */
+int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
+		const void *value, size_t size, int flags)
 {
 	struct inode *inode = dentry->d_inode;
-	int error;
-
-	error = xattr_permission(inode, name, MAY_WRITE);
-	if (error)
-		return error;
+	int error = -EOPNOTSUPP;
 
-	mutex_lock(&inode->i_mutex);
-	error = security_inode_setxattr(dentry, name, value, size, flags);
-	if (error)
-		goto out;
-	error = -EOPNOTSUPP;
 	if (inode->i_op->setxattr) {
 		error = inode->i_op->setxattr(dentry, name, value, size, flags);
 		if (!error) {
@@ -97,6 +103,29 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		if (!error)
 			fsnotify_xattr(dentry);
 	}
+
+	return error;
+}
+
+
+int
+vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+		size_t size, int flags)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	error = xattr_permission(inode, name, MAY_WRITE);
+	if (error)
+		return error;
+
+	mutex_lock(&inode->i_mutex);
+	error = security_inode_setxattr(dentry, name, value, size, flags);
+	if (error)
+		goto out;
+
+	error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+
 out:
 	mutex_unlock(&inode->i_mutex);
 	return error;
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d131e352cfe1..5c84af8c5f6f 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -49,6 +49,7 @@ struct xattr_handler {
 ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t);
 ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
 int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
 int vfs_removexattr(struct dentry *, const char *);
 
-- 
cgit v1.2.3


From 1ee65e37e904b959c24404139f5752edc66319d5 Mon Sep 17 00:00:00 2001
From: "David P. Quigley" <dpquigl@tycho.nsa.gov>
Date: Thu, 3 Sep 2009 14:25:57 -0400
Subject: LSM/SELinux: inode_{get,set,notify}secctx hooks to access LSM
 security context information.

This patch introduces three new hooks. The inode_getsecctx hook is used to get
all relevant information from an LSM about an inode. The inode_setsecctx is
used to set both the in-core and on-disk state for the inode based on a context
derived from inode_getsecctx.The final hook inode_notifysecctx will notify the
LSM of a change for the in-core state of the inode in question. These hooks are
for use in the labeled NFS code and addresses concerns of how to set security
on an inode in a multi-xattr LSM. For historical reasons Stephen Smalley's
explanation of the reason for these hooks is pasted below.

Quote Stephen Smalley

inode_setsecctx:  Change the security context of an inode.  Updates the
in core security context managed by the security module and invokes the
fs code as needed (via __vfs_setxattr_noperm) to update any backing
xattrs that represent the context.  Example usage:  NFS server invokes
this hook to change the security context in its incore inode and on the
backing file system to a value provided by the client on a SETATTR
operation.

inode_notifysecctx:  Notify the security module of what the security
context of an inode should be.  Initializes the incore security context
managed by the security module for this inode.  Example usage:  NFS
client invokes this hook to initialize the security context in its
incore inode to the value provided by the server for the file when the
server returned the file's attributes to the client.

Signed-off-by: David P. Quigley <dpquigl@tycho.nsa.gov>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h   | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 security/capability.c      | 17 ++++++++++++++
 security/security.c        | 18 +++++++++++++++
 security/selinux/hooks.c   | 29 ++++++++++++++++++++++++
 security/smack/smack_lsm.c | 24 ++++++++++++++++++++
 5 files changed, 143 insertions(+)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 10a09257952b..d050b66ab9ef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1382,6 +1382,41 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	audit_rule_init.
  *	@rule contains the allocated rule
  *
+ * @inode_notifysecctx:
+ *	Notify the security module of what the security context of an inode
+ *	should be.  Initializes the incore security context managed by the
+ *	security module for this inode.  Example usage:  NFS client invokes
+ *	this hook to initialize the security context in its incore inode to the
+ *	value provided by the server for the file when the server returned the
+ *	file's attributes to the client.
+ *
+ * 	Must be called with inode->i_mutex locked.
+ *
+ * 	@inode we wish to set the security context of.
+ * 	@ctx contains the string which we wish to set in the inode.
+ * 	@ctxlen contains the length of @ctx.
+ *
+ * @inode_setsecctx:
+ * 	Change the security context of an inode.  Updates the
+ * 	incore security context managed by the security module and invokes the
+ * 	fs code as needed (via __vfs_setxattr_noperm) to update any backing
+ * 	xattrs that represent the context.  Example usage:  NFS server invokes
+ * 	this hook to change the security context in its incore inode and on the
+ * 	backing filesystem to a value provided by the client on a SETATTR
+ * 	operation.
+ *
+ * 	Must be called with inode->i_mutex locked.
+ *
+ * 	@dentry contains the inode we wish to set the security context of.
+ * 	@ctx contains the string which we wish to set in the inode.
+ * 	@ctxlen contains the length of @ctx.
+ *
+ * @inode_getsecctx:
+ * 	Returns a string containing all relavent security context information
+ *
+ * 	@inode we wish to set the security context of.
+ *	@ctx is a pointer in which to place the allocated security context.
+ *	@ctxlen points to the place to put the length of @ctx.
  * This is the main security structure.
  */
 struct security_operations {
@@ -1590,6 +1625,10 @@ struct security_operations {
 	int (*secctx_to_secid) (const char *secdata, u32 seclen, u32 *secid);
 	void (*release_secctx) (char *secdata, u32 seclen);
 
+	int (*inode_notifysecctx)(struct inode *inode, void *ctx, u32 ctxlen);
+	int (*inode_setsecctx)(struct dentry *dentry, void *ctx, u32 ctxlen);
+	int (*inode_getsecctx)(struct inode *inode, void **ctx, u32 *ctxlen);
+
 #ifdef CONFIG_SECURITY_NETWORK
 	int (*unix_stream_connect) (struct socket *sock,
 				    struct socket *other, struct sock *newsk);
@@ -1839,6 +1878,9 @@ int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen);
 int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen);
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen);
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen);
 #else /* CONFIG_SECURITY */
 struct security_mnt_opts {
 };
@@ -2595,6 +2637,19 @@ static inline int security_secctx_to_secid(const char *secdata,
 static inline void security_release_secctx(char *secdata, u32 seclen)
 {
 }
+
+static inline int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return -EOPNOTSUPP;
+}
+static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return -EOPNOTSUPP;
+}
+static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return -EOPNOTSUPP;
+}
 #endif	/* CONFIG_SECURITY */
 
 #ifdef CONFIG_SECURITY_NETWORK
diff --git a/security/capability.c b/security/capability.c
index 93a2ffe65905..fce07a7bc825 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -822,6 +822,20 @@ static void cap_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+static int cap_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return 0;
+}
+
+static int cap_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return 0;
+}
+
+static int cap_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return 0;
+}
 #ifdef CONFIG_KEYS
 static int cap_key_alloc(struct key *key, const struct cred *cred,
 			 unsigned long flags)
@@ -1032,6 +1046,9 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, secid_to_secctx);
 	set_to_cap_if_null(ops, secctx_to_secid);
 	set_to_cap_if_null(ops, release_secctx);
+	set_to_cap_if_null(ops, inode_notifysecctx);
+	set_to_cap_if_null(ops, inode_setsecctx);
+	set_to_cap_if_null(ops, inode_getsecctx);
 #ifdef CONFIG_SECURITY_NETWORK
 	set_to_cap_if_null(ops, unix_stream_connect);
 	set_to_cap_if_null(ops, unix_may_send);
diff --git a/security/security.c b/security/security.c
index d8b727637f02..c4c673240c1c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -974,6 +974,24 @@ void security_release_secctx(char *secdata, u32 seclen)
 }
 EXPORT_SYMBOL(security_release_secctx);
 
+int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return security_ops->inode_notifysecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_notifysecctx);
+
+int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return security_ops->inode_setsecctx(dentry, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_setsecctx);
+
+int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	return security_ops->inode_getsecctx(inode, ctx, ctxlen);
+}
+EXPORT_SYMBOL(security_inode_getsecctx);
+
 #ifdef CONFIG_SECURITY_NETWORK
 
 int security_unix_stream_connect(struct socket *sock, struct socket *other,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 134a9c0d2004..7118be2a74a5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5351,6 +5351,32 @@ static void selinux_release_secctx(char *secdata, u32 seclen)
 	kfree(secdata);
 }
 
+/*
+ *	called with inode->i_mutex locked
+ */
+static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return selinux_inode_setsecurity(inode, XATTR_SELINUX_SUFFIX, ctx, ctxlen, 0);
+}
+
+/*
+ *	called with inode->i_mutex locked
+ */
+static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
+}
+
+static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	int len = 0;
+	len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
+						ctx, true);
+	if (len < 0)
+		return len;
+	*ctxlen = len;
+	return 0;
+}
 #ifdef CONFIG_KEYS
 
 static int selinux_key_alloc(struct key *k, const struct cred *cred,
@@ -5550,6 +5576,9 @@ static struct security_operations selinux_ops = {
 	.secid_to_secctx =		selinux_secid_to_secctx,
 	.secctx_to_secid =		selinux_secctx_to_secid,
 	.release_secctx =		selinux_release_secctx,
+	.inode_notifysecctx =		selinux_inode_notifysecctx,
+	.inode_setsecctx =		selinux_inode_setsecctx,
+	.inode_getsecctx =		selinux_inode_getsecctx,
 
 	.unix_stream_connect =		selinux_socket_unix_stream_connect,
 	.unix_may_send =		selinux_socket_unix_may_send,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 969f5fee1906..0b3bb646f90e 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -3057,6 +3057,27 @@ static void smack_release_secctx(char *secdata, u32 seclen)
 {
 }
 
+static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
+{
+	return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
+}
+
+static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
+{
+	return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
+}
+
+static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
+{
+	int len = 0;
+	len = smack_inode_getsecurity(inode, XATTR_SMACK_SUFFIX, ctx, true);
+
+	if (len < 0)
+		return len;
+	*ctxlen = len;
+	return 0;
+}
+
 struct security_operations smack_ops = {
 	.name =				"smack",
 
@@ -3185,6 +3206,9 @@ struct security_operations smack_ops = {
 	.secid_to_secctx = 		smack_secid_to_secctx,
 	.secctx_to_secid = 		smack_secctx_to_secid,
 	.release_secctx = 		smack_release_secctx,
+	.inode_notifysecctx =		smack_inode_notifysecctx,
+	.inode_setsecctx =		smack_inode_setsecctx,
+	.inode_getsecctx =		smack_inode_getsecctx,
 };
 
 
-- 
cgit v1.2.3


From 23bcf634c8bc0d84607a5b863333191d58baee4c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 9 Sep 2009 18:11:23 -0700
Subject: net_sched: fix estimator lock selection for mq child qdiscs

When new child qdiscs are attached to the mq qdisc, they are actually
attached as root qdiscs to the device queues. The lock selection for
new estimators incorrectly picks the root lock of the existing and
to be replaced qdisc, which results in a use-after-free once the old
qdisc has been destroyed.

Mark mq qdisc instances with a new flag and treat qdiscs attached to
mq as children similar to regular root qdiscs.

Additionally prevent estimators from being attached to the mq qdisc
itself since it only updates its byte and packet counters during dumps.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  1 +
 net/sched/sch_api.c       | 42 ++++++++++++++++++++++++++----------------
 net/sched/sch_mq.c        |  1 +
 3 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 9c69585a1be8..88eb9de095de 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -46,6 +46,7 @@ struct Qdisc
 #define TCQ_F_THROTTLED		2
 #define TCQ_F_INGRESS		4
 #define TCQ_F_CAN_BYPASS	8
+#define TCQ_F_MQROOT		16
 #define TCQ_F_WARN_NONWC	(1 << 16)
 	int			padded;
 	struct Qdisc_ops	*ops;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2a78d5410154..3af106140f35 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -733,7 +733,8 @@ static struct lock_class_key qdisc_rx_lock;
 
 static struct Qdisc *
 qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
-	     u32 parent, u32 handle, struct nlattr **tca, int *errp)
+	     struct Qdisc *p, u32 parent, u32 handle,
+	     struct nlattr **tca, int *errp)
 {
 	int err;
 	struct nlattr *kind = tca[TCA_KIND];
@@ -810,24 +811,21 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		if (tca[TCA_RATE]) {
 			spinlock_t *root_lock;
 
+			err = -EOPNOTSUPP;
+			if (sch->flags & TCQ_F_MQROOT)
+				goto err_out4;
+
 			if ((sch->parent != TC_H_ROOT) &&
-			    !(sch->flags & TCQ_F_INGRESS))
+			    !(sch->flags & TCQ_F_INGRESS) &&
+			    (!p || !(p->flags & TCQ_F_MQROOT)))
 				root_lock = qdisc_root_sleeping_lock(sch);
 			else
 				root_lock = qdisc_lock(sch);
 
 			err = gen_new_estimator(&sch->bstats, &sch->rate_est,
 						root_lock, tca[TCA_RATE]);
-			if (err) {
-				/*
-				 * Any broken qdiscs that would require
-				 * a ops->reset() here? The qdisc was never
-				 * in action so it shouldn't be necessary.
-				 */
-				if (ops->destroy)
-					ops->destroy(sch);
-				goto err_out3;
-			}
+			if (err)
+				goto err_out4;
 		}
 
 		qdisc_list_add(sch);
@@ -843,6 +841,15 @@ err_out2:
 err_out:
 	*errp = err;
 	return NULL;
+
+err_out4:
+	/*
+	 * Any broken qdiscs that would require a ops->reset() here?
+	 * The qdisc was never in action so it shouldn't be necessary.
+	 */
+	if (ops->destroy)
+		ops->destroy(sch);
+	goto err_out3;
 }
 
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
@@ -867,13 +874,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 	qdisc_put_stab(sch->stab);
 	sch->stab = stab;
 
-	if (tca[TCA_RATE])
+	if (tca[TCA_RATE]) {
 		/* NB: ignores errors from replace_estimator
 		   because change can't be undone. */
+		if (sch->flags & TCQ_F_MQROOT)
+			goto out;
 		gen_replace_estimator(&sch->bstats, &sch->rate_est,
 					    qdisc_root_sleeping_lock(sch),
 					    tca[TCA_RATE]);
-
+	}
+out:
 	return 0;
 }
 
@@ -1097,7 +1107,7 @@ create_n_graft:
 	if (!(n->nlmsg_flags&NLM_F_CREATE))
 		return -ENOENT;
 	if (clid == TC_H_INGRESS)
-		q = qdisc_create(dev, &dev->rx_queue,
+		q = qdisc_create(dev, &dev->rx_queue, p,
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else {
@@ -1106,7 +1116,7 @@ create_n_graft:
 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
 			ntx = p->ops->cl_ops->select_queue(p, tcm);
 
-		q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx),
+		q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
 				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
 	}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index c84dec9c8c7d..dd5ee022f1f7 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -64,6 +64,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 		priv->qdiscs[ntx] = qdisc;
 	}
 
+	sch->flags |= TCQ_F_MQROOT;
 	return 0;
 
 err:
-- 
cgit v1.2.3


From 890ca9aefa78f7831f8f633cab9e4803636dffe4 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 11 May 2009 16:48:15 +0800
Subject: KVM: Add MCE support

The related MSRs are emulated. MCE capability is exported via
extension KVM_CAP_MCE and ioctl KVM_X86_GET_MCE_CAP_SUPPORTED.  A new
vcpu ioctl command KVM_X86_SETUP_MCE is used to setup MCE emulation
such as the mcg_cap. MCE is injected via vcpu ioctl command
KVM_X86_SET_MCE. Extended machine-check state (MCG_EXT_P) and CMCI are
not implemented.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h      |   1 +
 arch/x86/include/asm/kvm_host.h |   5 +
 arch/x86/kvm/x86.c              | 220 +++++++++++++++++++++++++++++++++++-----
 include/linux/kvm.h             |  20 ++++
 4 files changed, 222 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 125be8b19568..708b9c32a5da 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -17,6 +17,7 @@
 #define __KVM_HAVE_USER_NMI
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
+#define __KVM_HAVE_MCE
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 79561752af97..81c68f630b14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -373,6 +373,11 @@ struct kvm_vcpu_arch {
 	unsigned long dr6;
 	unsigned long dr7;
 	unsigned long eff_db[KVM_NR_DB_REGS];
+
+	u64 mcg_cap;
+	u64 mcg_status;
+	u64 mcg_ctl;
+	u64 *mce_banks;
 };
 
 struct kvm_mem_alias {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6d46079a901c..55a9dd182de8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
 #include <asm/msr.h>
 #include <asm/desc.h>
 #include <asm/mtrr.h>
+#include <asm/mce.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -55,6 +56,10 @@
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
+
+#define KVM_MAX_MCE_BANKS 32
+#define KVM_MCE_CAP_SUPPORTED MCG_CTL_P
+
 /* EFER defaults:
  * - enable syscall per default because its emulated by KVM
  * - enable LME and LMA per default on 64 bit KVM
@@ -777,23 +782,43 @@ static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	return 0;
 }
 
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
+
 	switch (msr) {
-	case MSR_EFER:
-		set_efer(vcpu, data);
-		break;
-	case MSR_IA32_MC0_STATUS:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-		       __func__, data);
-		break;
 	case MSR_IA32_MCG_STATUS:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-			__func__, data);
+		vcpu->arch.mcg_status = data;
 		break;
 	case MSR_IA32_MCG_CTL:
-		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-			__func__, data);
+		if (!(mcg_cap & MCG_CTL_P))
+			return 1;
+		if (data != 0 && data != ~(u64)0)
+			return -1;
+		vcpu->arch.mcg_ctl = data;
+		break;
+	default:
+		if (msr >= MSR_IA32_MC0_CTL &&
+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+			u32 offset = msr - MSR_IA32_MC0_CTL;
+			/* only 0 or all 1s can be written to IA32_MCi_CTL */
+			if ((offset & 0x3) == 0 &&
+			    data != 0 && data != ~(u64)0)
+				return -1;
+			vcpu->arch.mce_banks[offset] = data;
+			break;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+{
+	switch (msr) {
+	case MSR_EFER:
+		set_efer(vcpu, data);
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!data) {
@@ -849,6 +874,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		kvm_request_guest_time_update(vcpu);
 		break;
 	}
+	case MSR_IA32_MCG_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+		return set_msr_mce(vcpu, msr, data);
 	default:
 		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
 		return 1;
@@ -904,26 +933,49 @@ static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	return 0;
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 {
 	u64 data;
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
 
 	switch (msr) {
-	case 0xc0010010: /* SYSCFG */
-	case 0xc0010015: /* HWCR */
-	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_P5_MC_ADDR:
 	case MSR_IA32_P5_MC_TYPE:
-	case MSR_IA32_MC0_CTL:
-	case MSR_IA32_MCG_STATUS:
+		data = 0;
+		break;
 	case MSR_IA32_MCG_CAP:
+		data = vcpu->arch.mcg_cap;
+		break;
 	case MSR_IA32_MCG_CTL:
-	case MSR_IA32_MC0_MISC:
-	case MSR_IA32_MC0_MISC+4:
-	case MSR_IA32_MC0_MISC+8:
-	case MSR_IA32_MC0_MISC+12:
-	case MSR_IA32_MC0_MISC+16:
-	case MSR_IA32_MC0_MISC+20:
+		if (!(mcg_cap & MCG_CTL_P))
+			return 1;
+		data = vcpu->arch.mcg_ctl;
+		break;
+	case MSR_IA32_MCG_STATUS:
+		data = vcpu->arch.mcg_status;
+		break;
+	default:
+		if (msr >= MSR_IA32_MC0_CTL &&
+		    msr < MSR_IA32_MC0_CTL + 4 * bank_num) {
+			u32 offset = msr - MSR_IA32_MC0_CTL;
+			data = vcpu->arch.mce_banks[offset];
+			break;
+		}
+		return 1;
+	}
+	*pdata = data;
+	return 0;
+}
+
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
+{
+	u64 data;
+
+	switch (msr) {
+	case 0xc0010010: /* SYSCFG */
+	case 0xc0010015: /* HWCR */
+	case MSR_IA32_PLATFORM_ID:
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_EBL_CR_POWERON:
 	case MSR_IA32_DEBUGCTLMSR:
@@ -966,6 +1018,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_KVM_SYSTEM_TIME:
 		data = vcpu->arch.time;
 		break;
+	case MSR_IA32_P5_MC_ADDR:
+	case MSR_IA32_P5_MC_TYPE:
+	case MSR_IA32_MCG_CAP:
+	case MSR_IA32_MCG_CTL:
+	case MSR_IA32_MCG_STATUS:
+	case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * KVM_MAX_MCE_BANKS - 1:
+		return get_msr_mce(vcpu, msr, pdata);
 	default:
 		pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -1087,6 +1146,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOMMU:
 		r = iommu_found();
 		break;
+	case KVM_CAP_MCE:
+		r = KVM_MAX_MCE_BANKS;
+		break;
 	default:
 		r = 0;
 		break;
@@ -1146,6 +1208,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_X86_GET_MCE_CAP_SUPPORTED: {
+		u64 mce_cap;
+
+		mce_cap = KVM_MCE_CAP_SUPPORTED;
+		r = -EFAULT;
+		if (copy_to_user(argp, &mce_cap, sizeof mce_cap))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -1502,6 +1574,80 @@ static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
+					u64 mcg_cap)
+{
+	int r;
+	unsigned bank_num = mcg_cap & 0xff, bank;
+
+	r = -EINVAL;
+	if (!bank_num)
+		goto out;
+	if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
+		goto out;
+	r = 0;
+	vcpu->arch.mcg_cap = mcg_cap;
+	/* Init IA32_MCG_CTL to all 1s */
+	if (mcg_cap & MCG_CTL_P)
+		vcpu->arch.mcg_ctl = ~(u64)0;
+	/* Init IA32_MCi_CTL to all 1s */
+	for (bank = 0; bank < bank_num; bank++)
+		vcpu->arch.mce_banks[bank*4] = ~(u64)0;
+out:
+	return r;
+}
+
+static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
+				      struct kvm_x86_mce *mce)
+{
+	u64 mcg_cap = vcpu->arch.mcg_cap;
+	unsigned bank_num = mcg_cap & 0xff;
+	u64 *banks = vcpu->arch.mce_banks;
+
+	if (mce->bank >= bank_num || !(mce->status & MCI_STATUS_VAL))
+		return -EINVAL;
+	/*
+	 * if IA32_MCG_CTL is not all 1s, the uncorrected error
+	 * reporting is disabled
+	 */
+	if ((mce->status & MCI_STATUS_UC) && (mcg_cap & MCG_CTL_P) &&
+	    vcpu->arch.mcg_ctl != ~(u64)0)
+		return 0;
+	banks += 4 * mce->bank;
+	/*
+	 * if IA32_MCi_CTL is not all 1s, the uncorrected error
+	 * reporting is disabled for the bank
+	 */
+	if ((mce->status & MCI_STATUS_UC) && banks[0] != ~(u64)0)
+		return 0;
+	if (mce->status & MCI_STATUS_UC) {
+		if ((vcpu->arch.mcg_status & MCG_STATUS_MCIP) ||
+		    !(vcpu->arch.cr4 & X86_CR4_MCE)) {
+			printk(KERN_DEBUG "kvm: set_mce: "
+			       "injects mce exception while "
+			       "previous one is in progress!\n");
+			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+			return 0;
+		}
+		if (banks[1] & MCI_STATUS_VAL)
+			mce->status |= MCI_STATUS_OVER;
+		banks[2] = mce->addr;
+		banks[3] = mce->misc;
+		vcpu->arch.mcg_status = mce->mcg_status;
+		banks[1] = mce->status;
+		kvm_queue_exception(vcpu, MC_VECTOR);
+	} else if (!(banks[1] & MCI_STATUS_VAL)
+		   || !(banks[1] & MCI_STATUS_UC)) {
+		if (banks[1] & MCI_STATUS_VAL)
+			mce->status |= MCI_STATUS_OVER;
+		banks[2] = mce->addr;
+		banks[3] = mce->misc;
+		banks[1] = mce->status;
+	} else
+		banks[1] |= MCI_STATUS_OVER;
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -1635,6 +1781,24 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
 		break;
 	}
+	case KVM_X86_SETUP_MCE: {
+		u64 mcg_cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&mcg_cap, argp, sizeof mcg_cap))
+			goto out;
+		r = kvm_vcpu_ioctl_x86_setup_mce(vcpu, mcg_cap);
+		break;
+	}
+	case KVM_X86_SET_MCE: {
+		struct kvm_x86_mce mce;
+
+		r = -EFAULT;
+		if (copy_from_user(&mce, argp, sizeof mce))
+			goto out;
+		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -4440,6 +4604,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 			goto fail_mmu_destroy;
 	}
 
+	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
+				       GFP_KERNEL);
+	if (!vcpu->arch.mce_banks) {
+		r = -ENOMEM;
+		goto fail_mmu_destroy;
+	}
+	vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
+
 	return 0;
 
 fail_mmu_destroy:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 3db5d8d37485..7b17141c47c9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -415,6 +415,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#ifdef __KVM_HAVE_MCE
+#define KVM_CAP_MCE 31
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -454,6 +457,19 @@ struct kvm_irq_routing {
 
 #endif
 
+#ifdef KVM_CAP_MCE
+/* x86 MCE */
+struct kvm_x86_mce {
+	__u64 status;
+	__u64 addr;
+	__u64 misc;
+	__u64 mcg_status;
+	__u8 bank;
+	__u8 pad1[7];
+	__u64 pad2[3];
+};
+#endif
+
 /*
  * ioctls for VM fds
  */
@@ -541,6 +557,10 @@ struct kvm_irq_routing {
 #define KVM_NMI                   _IO(KVMIO,  0x9a)
 /* Available with KVM_CAP_SET_GUEST_DEBUG */
 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
+/* MCE for x86 */
+#define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
+#define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
 
 /*
  * Deprecated interfaces
-- 
cgit v1.2.3


From 721eecbf4fe995ca94a9edec0c9843b1cc0eaaf3 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Wed, 20 May 2009 10:30:49 -0400
Subject: KVM: irqfd

KVM provides a complete virtual system environment for guests, including
support for injecting interrupts modeled after the real exception/interrupt
facilities present on the native platform (such as the IDT on x86).
Virtual interrupts can come from a variety of sources (emulated devices,
pass-through devices, etc) but all must be injected to the guest via
the KVM infrastructure.  This patch adds a new mechanism to inject a specific
interrupt to a guest using a decoupled eventfd mechnanism:  Any legal signal
on the irqfd (using eventfd semantics from either userspace or kernel) will
translate into an injected interrupt in the guest at the next available
interrupt window.

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/Kconfig     |   1 +
 arch/x86/kvm/Makefile    |   2 +-
 arch/x86/kvm/x86.c       |   1 +
 include/linux/kvm.h      |  11 ++
 include/linux/kvm_host.h |  24 ++++
 virt/kvm/Kconfig         |   4 +
 virt/kvm/eventfd.c       | 329 +++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      |  12 ++
 8 files changed, 383 insertions(+), 1 deletion(-)
 create mode 100644 virt/kvm/eventfd.c

(limited to 'include')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 939b1cb9c5b3..8cd2a4efe238 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
 	select MMU_NOTIFIER
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_EVENTFD
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index bee9512cd60d..01e3c61f749a 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -2,7 +2,7 @@
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-				coalesced_mmio.o irq_comm.o)
+				coalesced_mmio.o irq_comm.o eventfd.o)
 kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0642396783f..15f39fc08ece 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1126,6 +1126,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_REINJECT_CONTROL:
 	case KVM_CAP_IRQ_INJECT_STATUS:
 	case KVM_CAP_ASSIGN_DEV_IRQ:
+	case KVM_CAP_IRQFD:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 7b17141c47c9..8f53f24e5274 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -418,6 +418,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_MCE
 #define KVM_CAP_MCE 31
 #endif
+#define KVM_CAP_IRQFD 32
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -470,6 +471,15 @@ struct kvm_x86_mce {
 };
 #endif
 
+#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+
+struct kvm_irqfd {
+	__u32 fd;
+	__u32 gsi;
+	__u32 flags;
+	__u8  pad[20];
+};
+
 /*
  * ioctls for VM fds
  */
@@ -514,6 +524,7 @@ struct kvm_x86_mce {
 #define KVM_ASSIGN_SET_MSIX_ENTRY \
 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3060bdc35ffe..7724dcb6ff76 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,12 @@ struct kvm {
 	struct list_head vm_list;
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
+#ifdef CONFIG_HAVE_KVM_EVENTFD
+	struct {
+		spinlock_t        lock;
+		struct list_head  items;
+	} irqfds;
+#endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
 	atomic_t users_count;
@@ -525,4 +531,22 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #endif
 
+#ifdef CONFIG_HAVE_KVM_EVENTFD
+
+void kvm_irqfd_init(struct kvm *kvm);
+int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
+void kvm_irqfd_release(struct kvm *kvm);
+
+#else
+
+static inline void kvm_irqfd_init(struct kvm *kvm) {}
+static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
+{
+	return -EINVAL;
+}
+
+static inline void kvm_irqfd_release(struct kvm *kvm) {}
+
+#endif /* CONFIG_HAVE_KVM_EVENTFD */
+
 #endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 79e83fc00dc7..56c6848d2df8 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -5,3 +5,7 @@ config HAVE_KVM
 
 config HAVE_KVM_IRQCHIP
        bool
+
+config HAVE_KVM_EVENTFD
+       bool
+       select EVENTFD
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
new file mode 100644
index 000000000000..314012323afe
--- /dev/null
+++ b/virt/kvm/eventfd.c
@@ -0,0 +1,329 @@
+/*
+ * kvm eventfd support - use eventfd objects to signal various KVM events
+ *
+ * Copyright 2009 Novell.  All Rights Reserved.
+ *
+ * Author:
+ *	Gregory Haskins <ghaskins@novell.com>
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/workqueue.h>
+#include <linux/syscalls.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/list.h>
+#include <linux/eventfd.h>
+
+/*
+ * --------------------------------------------------------------------
+ * irqfd: Allows an fd to be used to inject an interrupt to the guest
+ *
+ * Credit goes to Avi Kivity for the original idea.
+ * --------------------------------------------------------------------
+ */
+
+struct _irqfd {
+	struct kvm               *kvm;
+	struct eventfd_ctx       *eventfd;
+	int                       gsi;
+	struct list_head          list;
+	poll_table                pt;
+	wait_queue_head_t        *wqh;
+	wait_queue_t              wait;
+	struct work_struct        inject;
+	struct work_struct        shutdown;
+};
+
+static struct workqueue_struct *irqfd_cleanup_wq;
+
+static void
+irqfd_inject(struct work_struct *work)
+{
+	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
+	struct kvm *kvm = irqfd->kvm;
+
+	mutex_lock(&kvm->lock);
+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
+	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+	mutex_unlock(&kvm->lock);
+}
+
+/*
+ * Race-free decouple logic (ordering is critical)
+ */
+static void
+irqfd_shutdown(struct work_struct *work)
+{
+	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
+
+	/*
+	 * Synchronize with the wait-queue and unhook ourselves to prevent
+	 * further events.
+	 */
+	remove_wait_queue(irqfd->wqh, &irqfd->wait);
+
+	/*
+	 * We know no new events will be scheduled at this point, so block
+	 * until all previously outstanding events have completed
+	 */
+	flush_work(&irqfd->inject);
+
+	/*
+	 * It is now safe to release the object's resources
+	 */
+	eventfd_ctx_put(irqfd->eventfd);
+	kfree(irqfd);
+}
+
+
+/* assumes kvm->irqfds.lock is held */
+static bool
+irqfd_is_active(struct _irqfd *irqfd)
+{
+	return list_empty(&irqfd->list) ? false : true;
+}
+
+/*
+ * Mark the irqfd as inactive and schedule it for removal
+ *
+ * assumes kvm->irqfds.lock is held
+ */
+static void
+irqfd_deactivate(struct _irqfd *irqfd)
+{
+	BUG_ON(!irqfd_is_active(irqfd));
+
+	list_del_init(&irqfd->list);
+
+	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
+}
+
+/*
+ * Called with wqh->lock held and interrupts disabled
+ */
+static int
+irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
+	unsigned long flags = (unsigned long)key;
+
+	if (flags & POLLIN)
+		/* An event has been signaled, inject an interrupt */
+		schedule_work(&irqfd->inject);
+
+	if (flags & POLLHUP) {
+		/* The eventfd is closing, detach from KVM */
+		struct kvm *kvm = irqfd->kvm;
+		unsigned long flags;
+
+		spin_lock_irqsave(&kvm->irqfds.lock, flags);
+
+		/*
+		 * We must check if someone deactivated the irqfd before
+		 * we could acquire the irqfds.lock since the item is
+		 * deactivated from the KVM side before it is unhooked from
+		 * the wait-queue.  If it is already deactivated, we can
+		 * simply return knowing the other side will cleanup for us.
+		 * We cannot race against the irqfd going away since the
+		 * other side is required to acquire wqh->lock, which we hold
+		 */
+		if (irqfd_is_active(irqfd))
+			irqfd_deactivate(irqfd);
+
+		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
+	}
+
+	return 0;
+}
+
+static void
+irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
+			poll_table *pt)
+{
+	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
+
+	irqfd->wqh = wqh;
+	add_wait_queue(wqh, &irqfd->wait);
+}
+
+static int
+kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
+{
+	struct _irqfd *irqfd;
+	struct file *file = NULL;
+	struct eventfd_ctx *eventfd = NULL;
+	int ret;
+	unsigned int events;
+
+	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+	if (!irqfd)
+		return -ENOMEM;
+
+	irqfd->kvm = kvm;
+	irqfd->gsi = gsi;
+	INIT_LIST_HEAD(&irqfd->list);
+	INIT_WORK(&irqfd->inject, irqfd_inject);
+	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
+
+	file = eventfd_fget(fd);
+	if (IS_ERR(file)) {
+		ret = PTR_ERR(file);
+		goto fail;
+	}
+
+	eventfd = eventfd_ctx_fileget(file);
+	if (IS_ERR(eventfd)) {
+		ret = PTR_ERR(eventfd);
+		goto fail;
+	}
+
+	irqfd->eventfd = eventfd;
+
+	/*
+	 * Install our own custom wake-up handling so we are notified via
+	 * a callback whenever someone signals the underlying eventfd
+	 */
+	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
+	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
+
+	events = file->f_op->poll(file, &irqfd->pt);
+
+	spin_lock_irq(&kvm->irqfds.lock);
+	list_add_tail(&irqfd->list, &kvm->irqfds.items);
+	spin_unlock_irq(&kvm->irqfds.lock);
+
+	/*
+	 * Check if there was an event already pending on the eventfd
+	 * before we registered, and trigger it as if we didn't miss it.
+	 */
+	if (events & POLLIN)
+		schedule_work(&irqfd->inject);
+
+	/*
+	 * do not drop the file until the irqfd is fully initialized, otherwise
+	 * we might race against the POLLHUP
+	 */
+	fput(file);
+
+	return 0;
+
+fail:
+	if (eventfd && !IS_ERR(eventfd))
+		eventfd_ctx_put(eventfd);
+
+	if (file && !IS_ERR(file))
+		fput(file);
+
+	kfree(irqfd);
+	return ret;
+}
+
+void
+kvm_irqfd_init(struct kvm *kvm)
+{
+	spin_lock_init(&kvm->irqfds.lock);
+	INIT_LIST_HEAD(&kvm->irqfds.items);
+}
+
+/*
+ * shutdown any irqfd's that match fd+gsi
+ */
+static int
+kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
+{
+	struct _irqfd *irqfd, *tmp;
+	struct eventfd_ctx *eventfd;
+
+	eventfd = eventfd_ctx_fdget(fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	spin_lock_irq(&kvm->irqfds.lock);
+
+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
+		if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
+			irqfd_deactivate(irqfd);
+	}
+
+	spin_unlock_irq(&kvm->irqfds.lock);
+	eventfd_ctx_put(eventfd);
+
+	/*
+	 * Block until we know all outstanding shutdown jobs have completed
+	 * so that we guarantee there will not be any more interrupts on this
+	 * gsi once this deassign function returns.
+	 */
+	flush_workqueue(irqfd_cleanup_wq);
+
+	return 0;
+}
+
+int
+kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
+{
+	if (flags & KVM_IRQFD_FLAG_DEASSIGN)
+		return kvm_irqfd_deassign(kvm, fd, gsi);
+
+	return kvm_irqfd_assign(kvm, fd, gsi);
+}
+
+/*
+ * This function is called as the kvm VM fd is being released. Shutdown all
+ * irqfds that still remain open
+ */
+void
+kvm_irqfd_release(struct kvm *kvm)
+{
+	struct _irqfd *irqfd, *tmp;
+
+	spin_lock_irq(&kvm->irqfds.lock);
+
+	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
+		irqfd_deactivate(irqfd);
+
+	spin_unlock_irq(&kvm->irqfds.lock);
+
+	/*
+	 * Block until we know all outstanding shutdown jobs have completed
+	 * since we do not take a kvm* reference.
+	 */
+	flush_workqueue(irqfd_cleanup_wq);
+
+}
+
+/*
+ * create a host-wide workqueue for issuing deferred shutdown requests
+ * aggregated from all vm* instances. We need our own isolated single-thread
+ * queue to prevent deadlock against flushing the normal work-queue.
+ */
+static int __init irqfd_module_init(void)
+{
+	irqfd_cleanup_wq = create_singlethread_workqueue("kvm-irqfd-cleanup");
+	if (!irqfd_cleanup_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit irqfd_module_exit(void)
+{
+	destroy_workqueue(irqfd_cleanup_wq);
+}
+
+module_init(irqfd_module_init);
+module_exit(irqfd_module_exit);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 2884baf1d5f9..dee321e58894 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -986,6 +986,7 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->mmu_lock);
 	spin_lock_init(&kvm->requests_lock);
 	kvm_io_bus_init(&kvm->pio_bus);
+	kvm_irqfd_init(kvm);
 	mutex_init(&kvm->lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
@@ -1071,6 +1072,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
 {
 	struct kvm *kvm = filp->private_data;
 
+	kvm_irqfd_release(kvm);
+
 	kvm_put_kvm(kvm);
 	return 0;
 }
@@ -2222,6 +2225,15 @@ static long kvm_vm_ioctl(struct file *filp,
 	}
 #endif
 #endif /* KVM_CAP_IRQ_ROUTING */
+	case KVM_IRQFD: {
+		struct kvm_irqfd data;
+
+		r = -EFAULT;
+		if (copy_from_user(&data, argp, sizeof data))
+			goto out;
+		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
-- 
cgit v1.2.3


From c5ff41ce66382d657a76bc06ba252d848826950f Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@web.de>
Date: Thu, 14 May 2009 22:42:53 +0200
Subject: KVM: Allow PIT emulation without speaker port

The in-kernel speaker emulation is only a dummy and also unneeded from
the performance point of view. Rather, it takes user space support to
generate sound output on the host, e.g. console beeps.

To allow this, introduce KVM_CREATE_PIT2 which controls in-kernel
speaker port emulation via a flag passed along the new IOCTL. It also
leaves room for future extensions of the PIT configuration interface.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c | 14 ++++++++------
 arch/x86/kvm/i8254.h |  2 +-
 arch/x86/kvm/x86.c   | 12 +++++++++++-
 include/linux/kvm.h  | 12 ++++++++++++
 4 files changed, 32 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 21f68e00524f..0990bc9aac1f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -563,7 +563,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 	}
 }
 
-struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
 	struct kvm_kpit_state *pit_state;
@@ -589,11 +589,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	pit->dev.private = pit;
 	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
 
-	pit->speaker_dev.read = speaker_ioport_read;
-	pit->speaker_dev.write = speaker_ioport_write;
-	pit->speaker_dev.in_range = speaker_in_range;
-	pit->speaker_dev.private = pit;
-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+	if (flags & KVM_PIT_SPEAKER_DUMMY) {
+		pit->speaker_dev.read = speaker_ioport_read;
+		pit->speaker_dev.write = speaker_ioport_write;
+		pit->speaker_dev.in_range = speaker_in_range;
+		pit->speaker_dev.private = pit;
+		kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+	}
 
 	kvm->arch.vpit = pit;
 	pit->kvm = kvm;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index bbd863ff60b7..b2670180f225 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -50,7 +50,7 @@ struct kvm_pit {
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
-struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
 void kvm_pit_reset(struct kvm_pit *pit);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 15f39fc08ece..5eb3b8dd74b8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1127,6 +1127,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQ_INJECT_STATUS:
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
+	case KVM_CAP_PIT2:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2038,6 +2039,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	union {
 		struct kvm_pit_state ps;
 		struct kvm_memory_alias alias;
+		struct kvm_pit_config pit_config;
 	} u;
 
 	switch (ioctl) {
@@ -2098,12 +2100,20 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		}
 		break;
 	case KVM_CREATE_PIT:
+		u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+		goto create_pit;
+	case KVM_CREATE_PIT2:
+		r = -EFAULT;
+		if (copy_from_user(&u.pit_config, argp,
+				   sizeof(struct kvm_pit_config)))
+			goto out;
+	create_pit:
 		mutex_lock(&kvm->lock);
 		r = -EEXIST;
 		if (kvm->arch.vpit)
 			goto create_pit_unlock;
 		r = -ENOMEM;
-		kvm->arch.vpit = kvm_create_pit(kvm);
+		kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
 		if (kvm->arch.vpit)
 			r = 0;
 	create_pit_unlock:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 8f53f24e5274..65df6f5e2b98 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -70,6 +70,14 @@ struct kvm_irqchip {
 	} chip;
 };
 
+/* for KVM_CREATE_PIT2 */
+struct kvm_pit_config {
+	__u32 flags;
+	__u32 pad[15];
+};
+
+#define KVM_PIT_SPEAKER_DUMMY     1
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -419,6 +427,9 @@ struct kvm_trace_rec {
 #define KVM_CAP_MCE 31
 #endif
 #define KVM_CAP_IRQFD 32
+#ifdef __KVM_HAVE_PIT
+#define KVM_CAP_PIT2 33
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -525,6 +536,7 @@ struct kvm_irqfd {
 			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
 #define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
+#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
 
 /*
  * ioctls for vcpu fds
-- 
cgit v1.2.3


From e7333391403b31feb27a05bc0dcd052a471f1276 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 21 May 2009 13:50:13 +0800
Subject: KVM: Downsize max support MSI-X entry to 256

We only trap one page for MSI-X entry now, so it's 4k/(128/8) = 256 entries at
most.

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 65df6f5e2b98..632a8560dbcf 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -676,7 +676,7 @@ struct kvm_assigned_msix_nr {
 	__u16 padding;
 };
 
-#define KVM_MAX_MSIX_PER_DEV		512
+#define KVM_MAX_MSIX_PER_DEV		256
 struct kvm_assigned_msix_entry {
 	__u32 assigned_dev_id;
 	__u32 gsi;
-- 
cgit v1.2.3


From b188d2d365702cfc660a56d66f4bf77ebf14a5c2 Mon Sep 17 00:00:00 2001
From: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Date: Fri, 29 May 2009 12:58:50 +0200
Subject: KVM: remove redundant declarations

Changing s390 code in kvm_arch_vcpu_load/put come across this header
declarations. They are complete duplicates, not even useful forward
declarations as nothing using it is in between (maybe it was that in
the past).

This patch removes the two dispensable lines.

Signed-off-by: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7724dcb6ff76..19240feefe6f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -249,8 +249,6 @@ long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
 
 int kvm_dev_ioctl_check_extension(long ext);
 
-- 
cgit v1.2.3


From 60eead79ad8750f80384cbe48fc44edcc78a0305 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 4 Jun 2009 15:08:23 -0300
Subject: KVM: introduce irq_lock, use it to protect ioapic

Introduce irq_lock, and use to protect ioapic data structures.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 3 ++-
 virt/kvm/ioapic.c        | 5 +++++
 virt/kvm/kvm_main.c      | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 19240feefe6f..0c71688b1ee3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -124,7 +124,6 @@ struct kvm_kernel_irq_routing_entry {
 };
 
 struct kvm {
-	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
 	spinlock_t requests_lock;
 	struct rw_semaphore slots_lock;
@@ -134,6 +133,7 @@ struct kvm {
 					KVM_PRIVATE_MEM_SLOTS];
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
+	struct mutex lock;
 	struct kvm_io_bus mmio_bus;
 	struct kvm_io_bus pio_bus;
 #ifdef CONFIG_HAVE_KVM_EVENTFD
@@ -150,6 +150,7 @@ struct kvm {
 	struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
 #endif
 
+	struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
 	struct hlist_head mask_notifier_list;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 469b9faae665..2a5667173995 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -243,6 +243,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
+	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
@@ -269,6 +270,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
@@ -280,6 +282,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
@@ -305,6 +309,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		break;
 	}
+	mutex_unlock(&ioapic->kvm->irq_lock);
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d1f9c87daa87..d47e660fb709 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -982,6 +982,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	kvm_irqfd_init(kvm);
 	mutex_init(&kvm->lock);
+	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
 	init_rwsem(&kvm->slots_lock);
 	atomic_set(&kvm->users_count, 1);
-- 
cgit v1.2.3


From fa40a8214bb9bcae8d49c234c19d8b4a6c1f37ff Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 4 Jun 2009 15:08:24 -0300
Subject: KVM: switch irq injection/acking data structures to irq_lock

Protect irq injection/acking data structures with a separate irq_lock
mutex. This fixes the following deadlock:

CPU A                               CPU B
kvm_vm_ioctl_deassign_dev_irq()
  mutex_lock(&kvm->lock);            worker_thread()
  -> kvm_deassign_irq()                -> kvm_assigned_dev_interrupt_work_handler()
    -> deassign_host_irq()               mutex_lock(&kvm->lock);
      -> cancel_work_sync() [blocked]

[gleb: fix ia64 path]

Reported-by: Alex Williamson <alex.williamson@hp.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |  4 ++--
 arch/x86/kvm/i8254.c     |  4 ++--
 arch/x86/kvm/lapic.c     |  4 ++++
 arch/x86/kvm/x86.c       | 19 +++++++++----------
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/eventfd.c       |  4 ++--
 virt/kvm/irq_comm.c      | 34 ++++++++++++++++++++++++++++------
 virt/kvm/kvm_main.c      | 16 +++++++++-------
 8 files changed, 58 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 319922137fdd..8dde36953af3 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1000,10 +1000,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 				    irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 977af7ab8193..3837db65d33e 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -654,10 +654,10 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	int i;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	/*
 	 * Provides NMI watchdog support via Virtual Wire mode.
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index a23f42e550af..44f20cdb5709 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -424,7 +424,9 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
@@ -448,7 +450,9 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
 		   irq.vector);
 
+	mutex_lock(&apic->vcpu->kvm->irq_lock);
 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
+	mutex_unlock(&apic->vcpu->kvm->irq_lock);
 }
 
 static u32 apic_get_tmcct(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2ad8c97f58cc..05cbe83c74e2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2136,10 +2136,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		if (irqchip_in_kernel(kvm)) {
 			__s32 status;
-			mutex_lock(&kvm->lock);
+			mutex_lock(&kvm->irq_lock);
 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
 					irq_event.irq, irq_event.level);
-			mutex_unlock(&kvm->lock);
+			mutex_unlock(&kvm->irq_lock);
 			if (ioctl == KVM_IRQ_LINE_STATUS) {
 				irq_event.status = status;
 				if (copy_to_user(argp, &irq_event,
@@ -2385,12 +2385,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2440,12 +2439,11 @@ mmio:
 	 */
 	mutex_lock(&vcpu->kvm->lock);
 	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (mmio_dev) {
 		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
-		mutex_unlock(&vcpu->kvm->lock);
 		return X86EMUL_CONTINUE;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2768,7 +2766,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
 {
 	/* TODO: String I/O for in kernel device */
 
-	mutex_lock(&vcpu->kvm->lock);
 	if (vcpu->arch.pio.in)
 		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
 				  vcpu->arch.pio.size,
@@ -2777,7 +2774,6 @@ static void kernel_pio(struct kvm_io_device *pio_dev,
 		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
 				   vcpu->arch.pio.size,
 				   pd);
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static void pio_string_write(struct kvm_io_device *pio_dev,
@@ -2787,14 +2783,12 @@ static void pio_string_write(struct kvm_io_device *pio_dev,
 	void *pd = vcpu->arch.pio_data;
 	int i;
 
-	mutex_lock(&vcpu->kvm->lock);
 	for (i = 0; i < io->cur_count; i++) {
 		kvm_iodevice_write(pio_dev, io->port,
 				   io->size,
 				   pd);
 		pd += io->size;
 	}
-	mutex_unlock(&vcpu->kvm->lock);
 }
 
 static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
@@ -2831,7 +2825,9 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
+	mutex_unlock(&vcpu->kvm->lock);
 	if (pio_dev) {
 		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
 		complete_pio(vcpu);
@@ -2895,9 +2891,12 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
 	vcpu->arch.pio.guest_gva = address;
 
+	mutex_lock(&vcpu->kvm->lock);
 	pio_dev = vcpu_find_pio_dev(vcpu, port,
 				    vcpu->arch.pio.cur_count,
 				    !vcpu->arch.pio.in);
+	mutex_unlock(&vcpu->kvm->lock);
+
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0c71688b1ee3..a29ea030dd8e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -371,7 +371,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				   struct kvm_irq_ack_notifier *kian);
 int kvm_request_irq_source_id(struct kvm *kvm);
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 314012323afe..4092b8dcd510 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -57,10 +57,10 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 /*
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index ddc17f0e2f35..08a9a49481b2 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -62,6 +62,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	int i, r = -1;
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
@@ -113,7 +115,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
 }
 
-/* This should be called with the kvm->lock mutex held
+/* This should be called with the kvm->irq_lock mutex held
  * Return value:
  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
  *  = 0   Interrupt was coalesced (previous irq is still pending)
@@ -125,6 +127,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	if (irq < KVM_IOAPIC_NUM_PINS) {
 		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
 
@@ -175,19 +179,26 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
 				   struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian)
+void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
+				    struct kvm_irq_ack_notifier *kian)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del_init(&kian->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
-/* The caller must hold kvm->lock mutex */
 int kvm_request_irq_source_id(struct kvm *kvm)
 {
 	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id = find_first_zero_bit(bitmap,
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap,
 				sizeof(kvm->arch.irq_sources_bitmap));
 
 	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
@@ -197,6 +208,7 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 	set_bit(irq_source_id, bitmap);
+	mutex_unlock(&kvm->irq_lock);
 
 	return irq_source_id;
 }
@@ -207,6 +219,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
 
+	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
 	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
@@ -215,19 +228,24 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
 		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 				    struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
 	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 				      struct kvm_irq_mask_notifier *kimn)
 {
+	mutex_lock(&kvm->irq_lock);
 	hlist_del(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
@@ -235,6 +253,8 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 	struct kvm_irq_mask_notifier *kimn;
 	struct hlist_node *n;
 
+	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
+
 	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
 		if (kimn->irq == irq)
 			kimn->func(kimn, mask);
@@ -250,7 +270,9 @@ static void __kvm_free_irq_routing(struct list_head *irq_routing)
 
 void kvm_free_irq_routing(struct kvm *kvm)
 {
+	mutex_lock(&kvm->irq_lock);
 	__kvm_free_irq_routing(&kvm->irq_routing);
+	mutex_unlock(&kvm->irq_lock);
 }
 
 static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
@@ -325,13 +347,13 @@ int kvm_set_irq_routing(struct kvm *kvm,
 		e = NULL;
 	}
 
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	list_splice(&kvm->irq_routing, &tmp);
 	INIT_LIST_HEAD(&kvm->irq_routing);
 	list_splice(&irq_list, &kvm->irq_routing);
 	INIT_LIST_HEAD(&irq_list);
 	list_splice(&tmp, &irq_list);
-	mutex_unlock(&kvm->lock);
+	mutex_unlock(&kvm->irq_lock);
 
 	r = 0;
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d47e660fb709..0d481b282448 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -62,6 +62,12 @@
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
+/*
+ * Ordering of locks:
+ *
+ * 		kvm->lock --> kvm->irq_lock
+ */
+
 DEFINE_SPINLOCK(kvm_lock);
 LIST_HEAD(vm_list);
 
@@ -126,11 +132,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 				    interrupt_work);
 	kvm = assigned_dev->kvm;
 
-	/* This is taken to safely inject irq inside the guest. When
-	 * the interrupt injection (or the ioapic code) uses a
-	 * finer-grained lock, update this
-	 */
-	mutex_lock(&kvm->lock);
+	mutex_lock(&kvm->irq_lock);
 	spin_lock_irq(&assigned_dev->assigned_dev_lock);
 	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
 		struct kvm_guest_msix_entry *guest_entries =
@@ -149,7 +151,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
 			    assigned_dev->guest_irq, 1);
 
 	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
-	mutex_unlock(&assigned_dev->kvm->lock);
+	mutex_unlock(&assigned_dev->kvm->irq_lock);
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -207,7 +209,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
 static void deassign_guest_irq(struct kvm *kvm,
 			       struct kvm_assigned_dev_kernel *assigned_dev)
 {
-	kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier);
+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
 	assigned_dev->ack_notifier.gsi = -1;
 
 	if (assigned_dev->irq_source_id != -1)
-- 
cgit v1.2.3


From 6a4a98397331723dce25a7537270548d91523431 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 9 Jun 2009 11:33:36 +0300
Subject: KVM: Reorder ioctls in kvm.h

Somehow the VM ioctls got unsorted; resort.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 632a8560dbcf..f1dada0519eb 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -495,11 +495,6 @@ struct kvm_irqfd {
  * ioctls for VM fds
  */
 #define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
-					struct kvm_userspace_memory_region)
-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
@@ -507,6 +502,11 @@ struct kvm_irqfd {
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
+					struct kvm_userspace_memory_region)
+#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
-- 
cgit v1.2.3


From c5af89b68abb26eea5e745f33228f4d672f115e5 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 9 Jun 2009 15:56:26 +0300
Subject: KVM: Introduce kvm_vcpu_is_bsp() function.

Use it instead of open code "vcpu_id zero is BSP" assumption.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 2 +-
 arch/ia64/kvm/vcpu.c     | 2 +-
 arch/x86/kvm/i8254.c     | 4 ++--
 arch/x86/kvm/i8259.c     | 6 +++---
 arch/x86/kvm/lapic.c     | 7 ++++---
 arch/x86/kvm/svm.c       | 4 ++--
 arch/x86/kvm/vmx.c       | 6 +++---
 arch/x86/kvm/x86.c       | 4 ++--
 include/linux/kvm_host.h | 5 +++++
 virt/kvm/ioapic.c        | 4 +++-
 virt/kvm/kvm_main.c      | 2 ++
 11 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 8dde36953af3..4082665ace0a 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1216,7 +1216,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	if (IS_ERR(vmm_vcpu))
 		return PTR_ERR(vmm_vcpu);
 
-	if (vcpu->vcpu_id == 0) {
+	if (kvm_vcpu_is_bsp(vcpu)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 
 		/*Set entry address for first run.*/
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index cc406d064a09..61a3320b62c1 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -830,7 +830,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 
 	kvm = (struct kvm *)KVM_VM_BASE;
 
-	if (vcpu->vcpu_id == 0) {
+	if (kvm_vcpu_is_bsp(vcpu)) {
 		for (i = 0; i < kvm->arch.online_vcpus; i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 3837db65d33e..008a83185067 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -231,7 +231,7 @@ int pit_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 
-	if (pit && vcpu->vcpu_id == 0 && pit->pit_state.irq_ack)
+	if (pit && kvm_vcpu_is_bsp(vcpu) && pit->pit_state.irq_ack)
 		return atomic_read(&pit->pit_state.pit_timer.pending);
 	return 0;
 }
@@ -252,7 +252,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
 	struct hrtimer *timer;
 
-	if (vcpu->vcpu_id != 0 || !pit)
+	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
 		return;
 
 	timer = &pit->pit_state.pit_timer.timer;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index bf94a45f4f86..148c52a608d6 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -57,7 +57,7 @@ static void pic_unlock(struct kvm_pic *s)
 	}
 
 	if (wakeup) {
-		vcpu = s->kvm->vcpus[0];
+		vcpu = s->kvm->bsp_vcpu;
 		if (vcpu)
 			kvm_vcpu_kick(vcpu);
 	}
@@ -254,7 +254,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 {
 	int irq, irqbase, n;
 	struct kvm *kvm = s->pics_state->irq_request_opaque;
-	struct kvm_vcpu *vcpu0 = kvm->vcpus[0];
+	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
 
 	if (s == &s->pics_state->pics[0])
 		irqbase = 0;
@@ -512,7 +512,7 @@ static void picdev_read(struct kvm_io_device *this,
 static void pic_irq_request(void *opaque, int level)
 {
 	struct kvm *kvm = opaque;
-	struct kvm_vcpu *vcpu = kvm->vcpus[0];
+	struct kvm_vcpu *vcpu = kvm->bsp_vcpu;
 	struct kvm_pic *s = pic_irqchip(kvm);
 	int irq = pic_get_irq(&s->pics[0]);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 44f20cdb5709..b0661300eb28 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -793,7 +793,8 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		vcpu->arch.apic_base = value;
 		return;
 	}
-	if (apic->vcpu->vcpu_id)
+
+	if (!kvm_vcpu_is_bsp(apic->vcpu))
 		value &= ~MSR_IA32_APICBASE_BSP;
 
 	vcpu->arch.apic_base = value;
@@ -844,7 +845,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	}
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
-	if (vcpu->vcpu_id == 0)
+	if (kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
 
@@ -985,7 +986,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
 	int r = 0;
 
-	if (vcpu->vcpu_id == 0) {
+	if (kvm_vcpu_is_bsp(vcpu)) {
 		if (!apic_hw_enabled(vcpu->arch.apic))
 			r = 1;
 		if ((lvt0 & APIC_LVT_MASKED) == 0 &&
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7749b0692cb2..28b981409a8f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -642,7 +642,7 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	init_vmcb(svm);
 
-	if (vcpu->vcpu_id != 0) {
+	if (!kvm_vcpu_is_bsp(vcpu)) {
 		kvm_rip_write(vcpu, 0);
 		svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
 		svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
@@ -706,7 +706,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	fx_init(&svm->vcpu);
 	svm->vcpu.fpu_active = 1;
 	svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
-	if (svm->vcpu.vcpu_id == 0)
+	if (kvm_vcpu_is_bsp(&svm->vcpu))
 		svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
 	return &svm->vcpu;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ae682929a642..c08bb4cf372e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2411,7 +2411,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
 	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
-	if (vmx->vcpu.vcpu_id == 0)
+	if (kvm_vcpu_is_bsp(&vmx->vcpu))
 		msr |= MSR_IA32_APICBASE_BSP;
 	kvm_set_apic_base(&vmx->vcpu, msr);
 
@@ -2422,7 +2422,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	 * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode
 	 * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
 	 */
-	if (vmx->vcpu.vcpu_id == 0) {
+	if (kvm_vcpu_is_bsp(&vmx->vcpu)) {
 		vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
 		vmcs_writel(GUEST_CS_BASE, 0x000f0000);
 	} else {
@@ -2451,7 +2451,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmcs_writel(GUEST_SYSENTER_EIP, 0);
 
 	vmcs_writel(GUEST_RFLAGS, 0x02);
-	if (vmx->vcpu.vcpu_id == 0)
+	if (kvm_vcpu_is_bsp(&vmx->vcpu))
 		kvm_rip_write(vcpu, 0xfff0);
 	else
 		kvm_rip_write(vcpu, 0);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e877efa37620..d8adc1da76dd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4330,7 +4330,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
 	/* Older userspace won't unhalt the vcpu on reset. */
-	if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+	if (kvm_vcpu_is_bsp(vcpu) && kvm_rip_read(vcpu) == 0xfff0 &&
 	    sregs->cs.selector == 0xf000 && sregs->cs.base == 0xffff0000 &&
 	    !(vcpu->arch.cr0 & X86_CR0_PE))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -4601,7 +4601,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	kvm = vcpu->kvm;
 
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
-	if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
+	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
 		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a29ea030dd8e..a5bd429e9bd3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -131,6 +131,7 @@ struct kvm {
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
 					KVM_PRIVATE_MEM_SLOTS];
+	struct kvm_vcpu *bsp_vcpu;
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	struct list_head vm_list;
 	struct mutex lock;
@@ -549,4 +550,8 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+	return vcpu->kvm->bsp_vcpu == vcpu;
+}
 #endif
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 2a5667173995..0532fa68f5d1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -164,7 +164,9 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 	/* Always delivery PIT interrupt to vcpu 0 */
 	if (irq == 0) {
 		irqe.dest_mode = 0; /* Physical mode. */
-		irqe.dest_id = ioapic->kvm->vcpus[0]->vcpu_id;
+		/* need to read apic_id from apic regiest since
+		 * it can be rewritten */
+		irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
 	}
 #endif
 	return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0d481b282448..0d54edecbc70 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1747,6 +1747,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 		goto vcpu_destroy;
 	}
 	kvm->vcpus[n] = vcpu;
+	if (n == 0)
+		kvm->bsp_vcpu = vcpu;
 	mutex_unlock(&kvm->lock);
 
 	/* Now it's all set up, let userspace reach it */
-- 
cgit v1.2.3


From 73880c80aa9c8dc353cd0ad26579023213cd5314 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 9 Jun 2009 15:56:28 +0300
Subject: KVM: Break dependency between vcpu index in vcpus array and vcpu_id.

Archs are free to use vcpu_id as they see fit. For x86 it is used as
vcpu's apic id. New ioctl is added to configure boot vcpu id that was
assumed to be 0 till now.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h |  1 -
 arch/ia64/kvm/Kconfig            |  1 +
 arch/ia64/kvm/kvm-ia64.c         |  8 ++---
 arch/ia64/kvm/vcpu.c             |  2 +-
 arch/x86/kvm/Kconfig             |  1 +
 include/linux/kvm.h              |  2 ++
 include/linux/kvm_host.h         |  6 ++++
 virt/kvm/Kconfig                 |  3 ++
 virt/kvm/kvm_main.c              | 64 ++++++++++++++++++++++++----------------
 9 files changed, 55 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 5f43697aed30..9cf1c4b1f92f 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -465,7 +465,6 @@ struct kvm_arch {
 	unsigned long	metaphysical_rr4;
 	unsigned long	vmm_init_rr;
 
-	int		online_vcpus;
 	int		is_sn2;
 
 	struct kvm_ioapic *vioapic;
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index f922bbba3797..cbadd8a65233 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -25,6 +25,7 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
+	select KVM_APIC_ARCHITECTURE
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 4082665ace0a..d1f7bcda2c7f 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -338,7 +338,7 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 	union ia64_lid lid;
 	int i;
 
-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
 		if (kvm->vcpus[i]) {
 			lid.val = VCPU_LID(kvm->vcpus[i]);
 			if (lid.id == id && lid.eid == eid)
@@ -412,7 +412,7 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	call_data.ptc_g_data = p->u.ptc_g_data;
 
-	for (i = 0; i < kvm->arch.online_vcpus; i++) {
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
 		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
 						KVM_MP_STATE_UNINITIALIZED ||
 					vcpu == kvm->vcpus[i])
@@ -852,8 +852,6 @@ struct  kvm *kvm_arch_create_vm(void)
 
 	kvm_init_vm(kvm);
 
-	kvm->arch.online_vcpus = 0;
-
 	return kvm;
 
 }
@@ -1356,8 +1354,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 		goto fail;
 	}
 
-	kvm->arch.online_vcpus++;
-
 	return vcpu;
 fail:
 	return ERR_PTR(r);
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index 61a3320b62c1..dce75b70cdd5 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -831,7 +831,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
 	kvm = (struct kvm *)KVM_VM_BASE;
 
 	if (kvm_vcpu_is_bsp(vcpu)) {
-		for (i = 0; i < kvm->arch.online_vcpus; i++) {
+		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
 			v = (struct kvm_vcpu *)((char *)vcpu +
 					sizeof(struct kvm_vcpu_data) * i);
 			VMX(v, itc_offset) = itc_offset;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 8cd2a4efe238..7fbedfd34d6c 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,6 +27,7 @@ config KVM
 	select ANON_INODES
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_EVENTFD
+	select KVM_APIC_ARCHITECTURE
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index f1dada0519eb..5037e170a70d 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -430,6 +430,7 @@ struct kvm_trace_rec {
 #ifdef __KVM_HAVE_PIT
 #define KVM_CAP_PIT2 33
 #endif
+#define KVM_CAP_SET_BOOT_CPU_ID 34
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -537,6 +538,7 @@ struct kvm_irqfd {
 #define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
 #define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
+#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a5bd429e9bd3..d3fdf1a738c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -131,8 +131,12 @@ struct kvm {
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
 					KVM_PRIVATE_MEM_SLOTS];
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	u32 bsp_vcpu_id;
 	struct kvm_vcpu *bsp_vcpu;
+#endif
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+	atomic_t online_vcpus;
 	struct list_head vm_list;
 	struct mutex lock;
 	struct kvm_io_bus mmio_bus;
@@ -550,8 +554,10 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {}
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
 static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->bsp_vcpu == vcpu;
 }
 #endif
+#endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 56c6848d2df8..daece36c0a57 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -9,3 +9,6 @@ config HAVE_KVM_IRQCHIP
 config HAVE_KVM_EVENTFD
        bool
        select EVENTFD
+
+config KVM_APIC_ARCHITECTURE
+       bool
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0d54edecbc70..25e1f9c97b1a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -689,11 +689,6 @@ out:
 }
 #endif
 
-static inline int valid_vcpu(int n)
-{
-	return likely(n >= 0 && n < KVM_MAX_VCPUS);
-}
-
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn)) {
@@ -1714,24 +1709,18 @@ static struct file_operations kvm_vcpu_fops = {
  */
 static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 {
-	int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
-	if (fd < 0)
-		kvm_put_kvm(vcpu->kvm);
-	return fd;
+	return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0);
 }
 
 /*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
-static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
+static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 {
 	int r;
 	struct kvm_vcpu *vcpu;
 
-	if (!valid_vcpu(n))
-		return -EINVAL;
-
-	vcpu = kvm_arch_vcpu_create(kvm, n);
+	vcpu = kvm_arch_vcpu_create(kvm, id);
 	if (IS_ERR(vcpu))
 		return PTR_ERR(vcpu);
 
@@ -1742,25 +1731,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n)
 		return r;
 
 	mutex_lock(&kvm->lock);
-	if (kvm->vcpus[n]) {
-		r = -EEXIST;
+	if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
+		r = -EINVAL;
 		goto vcpu_destroy;
 	}
-	kvm->vcpus[n] = vcpu;
-	if (n == 0)
-		kvm->bsp_vcpu = vcpu;
-	mutex_unlock(&kvm->lock);
+
+	for (r = 0; r < atomic_read(&kvm->online_vcpus); r++)
+		if (kvm->vcpus[r]->vcpu_id == id) {
+			r = -EEXIST;
+			goto vcpu_destroy;
+		}
+
+	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
 
 	/* Now it's all set up, let userspace reach it */
 	kvm_get_kvm(kvm);
 	r = create_vcpu_fd(vcpu);
-	if (r < 0)
-		goto unlink;
+	if (r < 0) {
+		kvm_put_kvm(kvm);
+		goto vcpu_destroy;
+	}
+
+	kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu;
+	smp_wmb();
+	atomic_inc(&kvm->online_vcpus);
+
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	if (kvm->bsp_vcpu_id == id)
+		kvm->bsp_vcpu = vcpu;
+#endif
+	mutex_unlock(&kvm->lock);
 	return r;
 
-unlink:
-	mutex_lock(&kvm->lock);
-	kvm->vcpus[n] = NULL;
 vcpu_destroy:
 	mutex_unlock(&kvm->lock);
 	kvm_arch_vcpu_destroy(vcpu);
@@ -2233,6 +2235,15 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
 		break;
 	}
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_SET_BOOT_CPU_ID:
+		r = 0;
+		if (atomic_read(&kvm->online_vcpus) != 0)
+			r = -EBUSY;
+		else
+			kvm->bsp_vcpu_id = arg;
+		break;
+#endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
@@ -2299,6 +2310,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
 	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
 		return 1;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
 	case KVM_CAP_IRQ_ROUTING:
-- 
cgit v1.2.3


From 988a2cae6a3c0dea6df59808a935a9a697bfc28c Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 9 Jun 2009 15:56:29 +0300
Subject: KVM: Use macro to iterate over vcpus.

[christian: remove unused variables on s390]

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 29 ++++++++++++++---------------
 arch/powerpc/kvm/powerpc.c | 16 ++++++++++------
 arch/s390/kvm/kvm-s390.c   | 31 +++++++++++++++----------------
 arch/x86/kvm/i8254.c       |  7 ++-----
 arch/x86/kvm/mmu.c         |  6 +++---
 arch/x86/kvm/x86.c         | 25 ++++++++++++-------------
 include/linux/kvm_host.h   | 11 +++++++++++
 virt/kvm/irq_comm.c        |  6 ++----
 virt/kvm/kvm_main.c        | 19 +++++++------------
 9 files changed, 76 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d1f7bcda2c7f..5c766bd82b05 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -337,13 +337,12 @@ static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
 {
 	union ia64_lid lid;
 	int i;
+	struct kvm_vcpu *vcpu;
 
-	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-		if (kvm->vcpus[i]) {
-			lid.val = VCPU_LID(kvm->vcpus[i]);
-			if (lid.id == id && lid.eid == eid)
-				return kvm->vcpus[i];
-		}
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		lid.val = VCPU_LID(vcpu);
+		if (lid.id == id && lid.eid == eid)
+			return vcpu;
 	}
 
 	return NULL;
@@ -409,21 +408,21 @@ static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	struct kvm *kvm = vcpu->kvm;
 	struct call_data call_data;
 	int i;
+	struct kvm_vcpu *vcpui;
 
 	call_data.ptc_g_data = p->u.ptc_g_data;
 
-	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
-						KVM_MP_STATE_UNINITIALIZED ||
-					vcpu == kvm->vcpus[i])
+	kvm_for_each_vcpu(i, vcpui, kvm) {
+		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
+				vcpu == vcpui)
 			continue;
 
-		if (waitqueue_active(&kvm->vcpus[i]->wq))
-			wake_up_interruptible(&kvm->vcpus[i]->wq);
+		if (waitqueue_active(&vcpui->wq))
+			wake_up_interruptible(&vcpui->wq);
 
-		if (kvm->vcpus[i]->cpu != -1) {
-			call_data.vcpu = kvm->vcpus[i];
-			smp_call_function_single(kvm->vcpus[i]->cpu,
+		if (vcpui->cpu != -1) {
+			call_data.vcpu = vcpui;
+			smp_call_function_single(vcpui->cpu,
 					vcpu_global_purge, &call_data, 1);
 		} else
 			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2cf915e51e7e..7ad30e0a1b9a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -122,13 +122,17 @@ struct kvm *kvm_arch_create_vm(void)
 static void kvmppc_free_vcpus(struct kvm *kvm)
 {
 	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			kvm_arch_vcpu_free(kvm->vcpus[i]);
-			kvm->vcpus[i] = NULL;
-		}
-	}
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_arch_vcpu_free(vcpu);
+
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+		kvm->vcpus[i] = NULL;
+
+	atomic_set(&kvm->online_vcpus, 0);
+	mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 098bfa6fbdf6..07ced89740d7 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -211,13 +211,17 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 static void kvm_free_vcpus(struct kvm *kvm)
 {
 	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			kvm_arch_vcpu_destroy(kvm->vcpus[i]);
-			kvm->vcpus[i] = NULL;
-		}
-	}
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_arch_vcpu_destroy(vcpu);
+
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+		kvm->vcpus[i] = NULL;
+
+	atomic_set(&kvm->online_vcpus, 0);
+	mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
@@ -314,8 +318,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	BUG_ON(!kvm->arch.sca);
 	if (!kvm->arch.sca->cpu[id].sda)
 		kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
-	else
-		BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
 	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
 	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
 
@@ -683,6 +685,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				int user_alloc)
 {
 	int i;
+	struct kvm_vcpu *vcpu;
 
 	/* A few sanity checks. We can have exactly one memory slot which has
 	   to start at guest virtual zero and which has to be located at a
@@ -707,14 +710,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 		return -EINVAL;
 
 	/* request update of sie control block for all available vcpus */
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			if (test_and_set_bit(KVM_REQ_MMU_RELOAD,
-						&kvm->vcpus[i]->requests))
-				continue;
-			kvm_s390_inject_sigp_stop(kvm->vcpus[i],
-						  ACTION_RELOADVCPU_ON_STOP);
-		}
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+			continue;
+		kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
 	}
 
 	return 0;
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 06d8f84ae8a2..15fc95b2fc05 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -669,11 +669,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 	 * VCPU0, and only if its LVT0 is in EXTINT mode.
 	 */
 	if (kvm->arch.vapics_in_nmi_mode > 0)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (vcpu)
-				kvm_apic_nmi_wd_deliver(vcpu);
-		}
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			kvm_apic_nmi_wd_deliver(vcpu);
 }
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d443a421ca3e..5f97dbd24291 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1347,10 +1347,10 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
 static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
 {
 	int i;
+	struct kvm_vcpu *vcpu;
 
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		if (kvm->vcpus[i])
-			kvm->vcpus[i]->arch.last_pte_updated = NULL;
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		vcpu->arch.last_pte_updated = NULL;
 }
 
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d8adc1da76dd..89862a80e32c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2946,10 +2946,7 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list) {
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (!vcpu)
-				continue;
+		kvm_for_each_vcpu(i, vcpu, kvm) {
 			if (vcpu->cpu != freq->cpu)
 				continue;
 			if (!kvm_request_guest_time_update(vcpu))
@@ -4678,20 +4675,22 @@ static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 static void kvm_free_vcpus(struct kvm *kvm)
 {
 	unsigned int i;
+	struct kvm_vcpu *vcpu;
 
 	/*
 	 * Unpin any mmu pages first.
 	 */
-	for (i = 0; i < KVM_MAX_VCPUS; ++i)
-		if (kvm->vcpus[i])
-			kvm_unload_vcpu_mmu(kvm->vcpus[i]);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (kvm->vcpus[i]) {
-			kvm_arch_vcpu_free(kvm->vcpus[i]);
-			kvm->vcpus[i] = NULL;
-		}
-	}
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_unload_vcpu_mmu(vcpu);
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		kvm_arch_vcpu_free(vcpu);
+
+	mutex_lock(&kvm->lock);
+	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
+		kvm->vcpus[i] = NULL;
 
+	atomic_set(&kvm->online_vcpus, 0);
+	mutex_unlock(&kvm->lock);
 }
 
 void kvm_arch_sync_events(struct kvm *kvm)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d3fdf1a738c9..c6e4d02067fe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -179,6 +179,17 @@ struct kvm {
 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
 
+static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
+{
+	smp_rmb();
+	return kvm->vcpus[i];
+}
+
+#define kvm_for_each_vcpu(idx, vcpup, kvm) \
+	for (idx = 0, vcpup = kvm_get_vcpu(kvm, idx); \
+	     idx < atomic_read(&kvm->online_vcpus) && vcpup; \
+	     vcpup = kvm_get_vcpu(kvm, ++idx))
+
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 08a9a49481b2..bb8a1b5e41c1 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,10 +68,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 			kvm_is_dm_lowest_prio(irq))
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
 
-	for (i = 0; i < KVM_MAX_VCPUS; i++) {
-		vcpu = kvm->vcpus[i];
-
-		if (!vcpu || !kvm_apic_present(vcpu))
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
 			continue;
 
 		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25e1f9c97b1a..777fe533cfe7 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -738,10 +738,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
 
 	me = get_cpu();
 	spin_lock(&kvm->requests_lock);
-	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		vcpu = kvm->vcpus[i];
-		if (!vcpu)
-			continue;
+	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (test_and_set_bit(req, &vcpu->requests))
 			continue;
 		cpu = vcpu->cpu;
@@ -1718,7 +1715,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 {
 	int r;
-	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu *vcpu, *v;
 
 	vcpu = kvm_arch_vcpu_create(kvm, id);
 	if (IS_ERR(vcpu))
@@ -1736,8 +1733,8 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 		goto vcpu_destroy;
 	}
 
-	for (r = 0; r < atomic_read(&kvm->online_vcpus); r++)
-		if (kvm->vcpus[r]->vcpu_id == id) {
+	kvm_for_each_vcpu(r, v, kvm)
+		if (v->vcpu_id == id) {
 			r = -EEXIST;
 			goto vcpu_destroy;
 		}
@@ -2526,11 +2523,9 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 	*val = 0;
 	spin_lock(&kvm_lock);
 	list_for_each_entry(kvm, &vm_list, vm_list)
-		for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-			vcpu = kvm->vcpus[i];
-			if (vcpu)
-				*val += *(u32 *)((void *)vcpu + offset);
-		}
+		kvm_for_each_vcpu(i, vcpu, kvm)
+			*val += *(u32 *)((void *)vcpu + offset);
+
 	spin_unlock(&kvm_lock);
 	return 0;
 }
-- 
cgit v1.2.3


From 3f5d18a96577fd78277e08c467041573b9a65eaf Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 11 Jun 2009 15:43:28 +0300
Subject: KVM: Return to userspace on emulation failure

Instead of mindlessly retrying to execute the instruction, report the
failure to userspace.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c  | 5 +++--
 include/linux/kvm.h | 7 +++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5f97dbd24291..b6e4cda77047 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2673,8 +2673,9 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 		++vcpu->stat.mmio_exits;
 		return 0;
 	case EMULATE_FAIL:
-		kvm_report_emulation_failure(vcpu, "pagetable");
-		return 1;
+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+		return 0;
 	default:
 		BUG();
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 5037e170a70d..671051829da6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -95,6 +95,10 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_RESET       14
 #define KVM_EXIT_DCR              15
 #define KVM_EXIT_NMI              16
+#define KVM_EXIT_INTERNAL_ERROR   17
+
+/* For KVM_EXIT_INTERNAL_ERROR */
+#define KVM_INTERNAL_ERROR_EMULATION 1
 
 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
 struct kvm_run {
@@ -181,6 +185,9 @@ struct kvm_run {
 			__u32 data;
 			__u8  is_write;
 		} dcr;
+		struct {
+			__u32 suberror;
+		} internal;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
cgit v1.2.3


From 54dee9933e8d93589ad63ec3d6be39f1921b0767 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 11 Jun 2009 12:07:44 -0300
Subject: KVM: VMX: conditionally disable 2M pages

Disable usage of 2M pages if VMX_EPT_2MB_PAGE_BIT (bit 16) is clear
in MSR_IA32_VMX_EPT_VPID_CAP and EPT is enabled.

[avi: s/largepages_disabled/largepages_enabled/ to avoid negative logic]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c       |  3 +++
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 14 ++++++++++++--
 3 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 94c07ada103e..fc8d49c6bc51 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1381,6 +1381,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_tpr_shadow())
 		kvm_x86_ops->update_cr8_intercept = NULL;
 
+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
+		kvm_disable_largepages();
+
 	return alloc_kvm_area();
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c6e4d02067fe..6988858dc56e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -224,6 +224,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem,
 				struct kvm_memory_slot old,
 				int user_alloc);
+void kvm_disable_largepages(void);
 void kvm_arch_flush_shadow(struct kvm *kvm);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 777fe533cfe7..48d5e697bf44 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -85,6 +85,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 
 static bool kvm_rebooting;
 
+static bool largepages_enabled = true;
+
 #ifdef KVM_CAP_DEVICE_ASSIGNMENT
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -1174,9 +1176,11 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		ugfn = new.userspace_addr >> PAGE_SHIFT;
 		/*
 		 * If the gfn and userspace address are not aligned wrt each
-		 * other, disable large page support for this slot
+		 * other, or if explicitly asked to, disable large page
+		 * support for this slot
 		 */
-		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1))
+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1) ||
+		    !largepages_enabled)
 			for (i = 0; i < largepages; ++i)
 				new.lpage_info[i].write_count = 1;
 	}
@@ -1291,6 +1295,12 @@ out:
 	return r;
 }
 
+void kvm_disable_largepages(void)
+{
+	largepages_enabled = false;
+}
+EXPORT_SYMBOL_GPL(kvm_disable_largepages);
+
 int is_error_page(struct page *page)
 {
 	return page == bad_page;
-- 
cgit v1.2.3


From 229456fc34b1c9031b04f7581e7b755d1cebfe9c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 17 Jun 2009 09:22:14 -0300
Subject: KVM: convert custom marker based tracing to event traces

This allows use of the powerful ftrace infrastructure.

See Documentation/trace/ for usage information.

[avi, stephen: various build fixes]
[sheng: fix control register breakage]

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   2 +
 arch/x86/kvm/Makefile           |   4 +
 arch/x86/kvm/lapic.c            |   7 +-
 arch/x86/kvm/svm.c              |  84 +++++++++----
 arch/x86/kvm/trace.h            | 260 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx.c              |  78 +++++++-----
 arch/x86/kvm/x86.c              |  48 +++-----
 include/trace/events/kvm.h      |  57 +++++++++
 virt/kvm/irq_comm.c             |   5 +
 virt/kvm/kvm_main.c             |   4 +
 10 files changed, 463 insertions(+), 86 deletions(-)
 create mode 100644 arch/x86/kvm/trace.h
 create mode 100644 include/trace/events/kvm.h

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c7b0cc2b7020..19027ab20412 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/mmu_notifier.h>
+#include <linux/tracepoint.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -527,6 +528,7 @@ struct kvm_x86_ops {
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 	int (*get_tdp_level)(void);
 	u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+	const struct trace_print_flags *exit_reasons_str;
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 01e3c61f749a..7c56850b82cb 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,6 +1,10 @@
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
+CFLAGS_x86.o := -I.
+CFLAGS_svm.o := -I.
+CFLAGS_vmx.o := -I.
+
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o)
 kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3bde43c3789e..2e0286596387 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -34,6 +34,7 @@
 #include <asm/atomic.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
+#include "trace.h"
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -515,8 +516,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 {
 	u32 val = 0;
 
-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
-
 	if (offset >= LAPIC_MMIO_LENGTH)
 		return 0;
 
@@ -562,6 +561,8 @@ static void apic_mmio_read(struct kvm_io_device *this,
 	}
 	result = __apic_read(apic, offset & ~0xf);
 
+	trace_kvm_apic_read(offset, result);
+
 	switch (len) {
 	case 1:
 	case 2:
@@ -657,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
 	offset &= 0xff0;
 
-	KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler);
+	trace_kvm_apic_write(offset, val);
 
 	switch (offset) {
 	case APIC_ID:		/* Local APIC ID */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 456666183770..b1c446208867 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -25,10 +25,12 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/sched.h>
+#include <linux/ftrace_event.h>
 
 #include <asm/desc.h>
 
 #include <asm/virtext.h>
+#include "trace.h"
 
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
@@ -1096,7 +1098,6 @@ static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
 		val = 0;
 	}
 
-	KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	return val;
 }
 
@@ -1105,8 +1106,6 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)value, handler);
-
 	*exception = 0;
 
 	switch (dr) {
@@ -1154,14 +1153,7 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	fault_address  = svm->vmcb->control.exit_info_2;
 	error_code = svm->vmcb->control.exit_info_1;
 
-	if (!npt_enabled)
-		KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code,
-			    (u32)fault_address, (u32)(fault_address >> 32),
-			    handler);
-	else
-		KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code,
-			    (u32)fault_address, (u32)(fault_address >> 32),
-			    handler);
+	trace_kvm_page_fault(fault_address, error_code);
 	/*
 	 * FIXME: Tis shouldn't be necessary here, but there is a flush
 	 * missing in the MMU code. Until we find this bug, flush the
@@ -1288,14 +1280,12 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
 static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
-	KVMTRACE_0D(NMI, &svm->vcpu, handler);
 	return 1;
 }
 
 static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	++svm->vcpu.stat.irq_exits;
-	KVMTRACE_0D(INTR, &svm->vcpu, handler);
 	return 1;
 }
 
@@ -2077,8 +2067,7 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	if (svm_get_msr(&svm->vcpu, ecx, &data))
 		kvm_inject_gp(&svm->vcpu, 0);
 	else {
-		KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data,
-			    (u32)(data >> 32), handler);
+		trace_kvm_msr_read(ecx, data);
 
 		svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
 		svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
@@ -2163,8 +2152,7 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_write(ecx, data);
 
 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
 	if (svm_set_msr(&svm->vcpu, ecx, data))
@@ -2185,8 +2173,6 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 static int interrupt_window_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
-	KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler);
-
 	svm_clear_vintr(svm);
 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
 	/*
@@ -2265,8 +2251,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 exit_code = svm->vmcb->control.exit_code;
 
-	KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip,
-		    (u32)((u64)svm->vmcb->save.rip >> 32), entryexit);
+	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
 
 	if (is_nested(svm)) {
 		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
@@ -2354,7 +2339,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
 {
 	struct vmcb_control_area *control;
 
-	KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
+	trace_kvm_inj_virq(irq);
 
 	++svm->vcpu.stat.irq_injections;
 	control = &svm->vmcb->control;
@@ -2717,6 +2702,59 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return 0;
 }
 
+static const struct trace_print_flags svm_exit_reasons_str[] = {
+	{ SVM_EXIT_READ_CR0,           		"read_cr0" },
+	{ SVM_EXIT_READ_CR3,	      		"read_cr3" },
+	{ SVM_EXIT_READ_CR4,	      		"read_cr4" },
+	{ SVM_EXIT_READ_CR8,  	      		"read_cr8" },
+	{ SVM_EXIT_WRITE_CR0,          		"write_cr0" },
+	{ SVM_EXIT_WRITE_CR3,	      		"write_cr3" },
+	{ SVM_EXIT_WRITE_CR4,          		"write_cr4" },
+	{ SVM_EXIT_WRITE_CR8, 	      		"write_cr8" },
+	{ SVM_EXIT_READ_DR0, 	      		"read_dr0" },
+	{ SVM_EXIT_READ_DR1,	      		"read_dr1" },
+	{ SVM_EXIT_READ_DR2,	      		"read_dr2" },
+	{ SVM_EXIT_READ_DR3,	      		"read_dr3" },
+	{ SVM_EXIT_WRITE_DR0,	      		"write_dr0" },
+	{ SVM_EXIT_WRITE_DR1,	      		"write_dr1" },
+	{ SVM_EXIT_WRITE_DR2,	      		"write_dr2" },
+	{ SVM_EXIT_WRITE_DR3,	      		"write_dr3" },
+	{ SVM_EXIT_WRITE_DR5,	      		"write_dr5" },
+	{ SVM_EXIT_WRITE_DR7,	      		"write_dr7" },
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" },
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" },
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" },
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" },
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" },
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" },
+	{ SVM_EXIT_INTR,			"interrupt" },
+	{ SVM_EXIT_NMI,				"nmi" },
+	{ SVM_EXIT_SMI,				"smi" },
+	{ SVM_EXIT_INIT,			"init" },
+	{ SVM_EXIT_VINTR,			"vintr" },
+	{ SVM_EXIT_CPUID,			"cpuid" },
+	{ SVM_EXIT_INVD,			"invd" },
+	{ SVM_EXIT_HLT,				"hlt" },
+	{ SVM_EXIT_INVLPG,			"invlpg" },
+	{ SVM_EXIT_INVLPGA,			"invlpga" },
+	{ SVM_EXIT_IOIO,			"io" },
+	{ SVM_EXIT_MSR,				"msr" },
+	{ SVM_EXIT_TASK_SWITCH,			"task_switch" },
+	{ SVM_EXIT_SHUTDOWN,			"shutdown" },
+	{ SVM_EXIT_VMRUN,			"vmrun" },
+	{ SVM_EXIT_VMMCALL,			"hypercall" },
+	{ SVM_EXIT_VMLOAD,			"vmload" },
+	{ SVM_EXIT_VMSAVE,			"vmsave" },
+	{ SVM_EXIT_STGI,			"stgi" },
+	{ SVM_EXIT_CLGI,			"clgi" },
+	{ SVM_EXIT_SKINIT,			"skinit" },
+	{ SVM_EXIT_WBINVD,			"wbinvd" },
+	{ SVM_EXIT_MONITOR,			"monitor" },
+	{ SVM_EXIT_MWAIT,			"mwait" },
+	{ SVM_EXIT_NPF,				"npf" },
+	{ -1, NULL }
+};
+
 static struct kvm_x86_ops svm_x86_ops = {
 	.cpu_has_kvm_support = has_svm,
 	.disabled_by_bios = is_disabled,
@@ -2778,6 +2816,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.set_tss_addr = svm_set_tss_addr,
 	.get_tdp_level = get_npt_level,
 	.get_mt_mask = svm_get_mt_mask,
+
+	.exit_reasons_str = svm_exit_reasons_str,
 };
 
 static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
new file mode 100644
index 000000000000..cd8c90db41a5
--- /dev/null
+++ b/arch/x86/kvm/trace.h
@@ -0,0 +1,260 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_PATH arch/x86/kvm
+#define TRACE_INCLUDE_FILE trace
+
+/*
+ * Tracepoint for guest mode entry.
+ */
+TRACE_EVENT(kvm_entry,
+	TP_PROTO(unsigned int vcpu_id),
+	TP_ARGS(vcpu_id),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+	),
+
+	TP_printk("vcpu %u", __entry->vcpu_id)
+);
+
+/*
+ * Tracepoint for hypercall.
+ */
+TRACE_EVENT(kvm_hypercall,
+	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
+		 unsigned long a2, unsigned long a3),
+	TP_ARGS(nr, a0, a1, a2, a3),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long, 	nr		)
+		__field(	unsigned long,	a0		)
+		__field(	unsigned long,	a1		)
+		__field(	unsigned long,	a2		)
+		__field(	unsigned long,	a3		)
+	),
+
+	TP_fast_assign(
+		__entry->nr		= nr;
+		__entry->a0		= a0;
+		__entry->a1		= a1;
+		__entry->a2		= a2;
+		__entry->a3		= a3;
+	),
+
+	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx",
+		 __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
+		 __entry->a3)
+);
+
+/*
+ * Tracepoint for PIO.
+ */
+TRACE_EVENT(kvm_pio,
+	TP_PROTO(unsigned int rw, unsigned int port, unsigned int size,
+		 unsigned int count),
+	TP_ARGS(rw, port, size, count),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int, 	rw		)
+		__field(	unsigned int, 	port		)
+		__field(	unsigned int, 	size		)
+		__field(	unsigned int,	count		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->port		= port;
+		__entry->size		= size;
+		__entry->count		= count;
+	),
+
+	TP_printk("pio_%s at 0x%x size %d count %d",
+		  __entry->rw ? "write" : "read",
+		  __entry->port, __entry->size, __entry->count)
+);
+
+/*
+ * Tracepoint for cpuid.
+ */
+TRACE_EVENT(kvm_cpuid,
+	TP_PROTO(unsigned int function, unsigned long rax, unsigned long rbx,
+		 unsigned long rcx, unsigned long rdx),
+	TP_ARGS(function, rax, rbx, rcx, rdx),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	function	)
+		__field(	unsigned long,	rax		)
+		__field(	unsigned long,	rbx		)
+		__field(	unsigned long,	rcx		)
+		__field(	unsigned long,	rdx		)
+	),
+
+	TP_fast_assign(
+		__entry->function	= function;
+		__entry->rax		= rax;
+		__entry->rbx		= rbx;
+		__entry->rcx		= rcx;
+		__entry->rdx		= rdx;
+	),
+
+	TP_printk("func %x rax %lx rbx %lx rcx %lx rdx %lx",
+		  __entry->function, __entry->rax,
+		  __entry->rbx, __entry->rcx, __entry->rdx)
+);
+
+/*
+ * Tracepoint for apic access.
+ */
+TRACE_EVENT(kvm_apic,
+	TP_PROTO(unsigned int rw, unsigned int reg, unsigned int val),
+	TP_ARGS(rw, reg, val),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw		)
+		__field(	unsigned int,	reg		)
+		__field(	unsigned int,	val		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->reg		= reg;
+		__entry->val		= val;
+	),
+
+	TP_printk("apic_%s 0x%x = 0x%x",
+		  __entry->rw ? "write" : "read",
+		  __entry->reg, __entry->val)
+);
+
+#define trace_kvm_apic_read(reg, val)		trace_kvm_apic(0, reg, val)
+#define trace_kvm_apic_write(reg, val)		trace_kvm_apic(1, reg, val)
+
+/*
+ * Tracepoint for kvm guest exit:
+ */
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_reason, unsigned long guest_rip),
+	TP_ARGS(exit_reason, guest_rip),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_reason	)
+		__field(	unsigned long,	guest_rip	)
+	),
+
+	TP_fast_assign(
+		__entry->exit_reason	= exit_reason;
+		__entry->guest_rip	= guest_rip;
+	),
+
+	TP_printk("reason %s rip 0x%lx",
+		 ftrace_print_symbols_seq(p, __entry->exit_reason,
+					  kvm_x86_ops->exit_reasons_str),
+		 __entry->guest_rip)
+);
+
+/*
+ * Tracepoint for kvm interrupt injection:
+ */
+TRACE_EVENT(kvm_inj_virq,
+	TP_PROTO(unsigned int irq),
+	TP_ARGS(irq),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irq		)
+	),
+
+	TP_fast_assign(
+		__entry->irq		= irq;
+	),
+
+	TP_printk("irq %u", __entry->irq)
+);
+
+/*
+ * Tracepoint for page fault.
+ */
+TRACE_EVENT(kvm_page_fault,
+	TP_PROTO(unsigned long fault_address, unsigned int error_code),
+	TP_ARGS(fault_address, error_code),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	fault_address	)
+		__field(	unsigned int,	error_code	)
+	),
+
+	TP_fast_assign(
+		__entry->fault_address	= fault_address;
+		__entry->error_code	= error_code;
+	),
+
+	TP_printk("address %lx error_code %x",
+		  __entry->fault_address, __entry->error_code)
+);
+
+/*
+ * Tracepoint for guest MSR access.
+ */
+TRACE_EVENT(kvm_msr,
+	TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
+	TP_ARGS(rw, ecx, data),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw		)
+		__field(	unsigned int,	ecx		)
+		__field(	unsigned long,	data		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->ecx		= ecx;
+		__entry->data		= data;
+	),
+
+	TP_printk("msr_%s %x = 0x%lx",
+		  __entry->rw ? "write" : "read",
+		  __entry->ecx, __entry->data)
+);
+
+#define trace_kvm_msr_read(ecx, data)		trace_kvm_msr(0, ecx, data)
+#define trace_kvm_msr_write(ecx, data)		trace_kvm_msr(1, ecx, data)
+
+/*
+ * Tracepoint for guest CR access.
+ */
+TRACE_EVENT(kvm_cr,
+	TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val),
+	TP_ARGS(rw, cr, val),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	rw		)
+		__field(	unsigned int,	cr		)
+		__field(	unsigned long,	val		)
+	),
+
+	TP_fast_assign(
+		__entry->rw		= rw;
+		__entry->cr		= cr;
+		__entry->val		= val;
+	),
+
+	TP_printk("cr_%s %x = 0x%lx",
+		  __entry->rw ? "write" : "read",
+		  __entry->cr, __entry->val)
+);
+
+#define trace_kvm_cr_read(cr, val)		trace_kvm_cr(0, cr, val)
+#define trace_kvm_cr_write(cr, val)		trace_kvm_cr(1, cr, val)
+
+#endif /* _TRACE_KVM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a84ca191cd1..c6256b98f078 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -25,6 +25,7 @@
 #include <linux/highmem.h>
 #include <linux/sched.h>
 #include <linux/moduleparam.h>
+#include <linux/ftrace_event.h>
 #include "kvm_cache_regs.h"
 #include "x86.h"
 
@@ -34,6 +35,8 @@
 #include <asm/virtext.h>
 #include <asm/mce.h>
 
+#include "trace.h"
+
 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 
 MODULE_AUTHOR("Qumranet");
@@ -2550,7 +2553,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
 	uint32_t intr;
 	int irq = vcpu->arch.interrupt.nr;
 
-	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+	trace_kvm_inj_virq(irq);
 
 	++vcpu->stat.irq_injections;
 	if (vmx->rmode.vm86_active) {
@@ -2751,8 +2754,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		if (enable_ept)
 			BUG();
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
-		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
-			    (u32)((u64)cr2 >> 32), handler);
+		trace_kvm_page_fault(cr2, error_code);
+
 		if (kvm_event_needs_reinjection(vcpu))
 			kvm_mmu_unprotect_page_virt(vcpu, cr2);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
@@ -2799,7 +2802,6 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 				     struct kvm_run *kvm_run)
 {
 	++vcpu->stat.irq_exits;
-	KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
 	return 1;
 }
 
@@ -2847,7 +2849,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
 static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
-	unsigned long exit_qualification;
+	unsigned long exit_qualification, val;
 	int cr;
 	int reg;
 
@@ -2856,21 +2858,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	reg = (exit_qualification >> 8) & 15;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
-		KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr,
-			    (u32)kvm_register_read(vcpu, reg),
-			    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-			    handler);
+		val = kvm_register_read(vcpu, reg);
+		trace_kvm_cr_write(cr, val);
 		switch (cr) {
 		case 0:
-			kvm_set_cr0(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr0(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 3:
-			kvm_set_cr3(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr3(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 4:
-			kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg));
+			kvm_set_cr4(vcpu, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8: {
@@ -2892,23 +2892,19 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 		vmx_fpu_activate(vcpu);
-		KVMTRACE_0D(CLTS, vcpu, handler);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
 		switch (cr) {
 		case 3:
 			kvm_register_write(vcpu, reg, vcpu->arch.cr3);
-			KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
-				    (u32)kvm_register_read(vcpu, reg),
-				    (u32)((u64)kvm_register_read(vcpu, reg) >> 32),
-				    handler);
+			trace_kvm_cr_read(cr, vcpu->arch.cr3);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
-			kvm_register_write(vcpu, reg, kvm_get_cr8(vcpu));
-			KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
-				    (u32)kvm_register_read(vcpu, reg), handler);
+			val = kvm_get_cr8(vcpu);
+			kvm_register_write(vcpu, reg, val);
+			trace_kvm_cr_read(cr, val);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		}
@@ -2976,7 +2972,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			val = 0;
 		}
 		kvm_register_write(vcpu, reg, val);
-		KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	} else {
 		val = vcpu->arch.regs[reg];
 		switch (dr) {
@@ -3009,7 +3004,6 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			}
 			break;
 		}
-		KVMTRACE_2D(DR_WRITE, vcpu, (u32)dr, (u32)val, handler);
 	}
 	skip_emulated_instruction(vcpu);
 	return 1;
@@ -3031,8 +3025,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return 1;
 	}
 
-	KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_read(ecx, data);
 
 	/* FIXME: handling of bits 32:63 of rax, rdx */
 	vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
@@ -3047,8 +3040,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
-	KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
-		    handler);
+	trace_kvm_msr_write(ecx, data);
 
 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
 		kvm_inject_gp(vcpu, 0);
@@ -3075,7 +3067,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 
-	KVMTRACE_0D(PEND_INTR, vcpu, handler);
 	++vcpu->stat.irq_window_exits;
 
 	/*
@@ -3227,6 +3218,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	}
 
 	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+	trace_kvm_page_fault(gpa, exit_qualification);
 	return kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
 }
 
@@ -3410,8 +3402,7 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	u32 exit_reason = vmx->exit_reason;
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
-	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu),
-		    (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit);
+	trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
 
 	/* If we need to emulate an MMIO from handle_invalid_guest_state
 	 * we just return 0 */
@@ -3500,10 +3491,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
 	/* We need to handle NMIs before interrupts are enabled */
 	if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR &&
-	    (exit_intr_info & INTR_INFO_VALID_MASK)) {
-		KVMTRACE_0D(NMI, &vmx->vcpu, handler);
+	    (exit_intr_info & INTR_INFO_VALID_MASK))
 		asm("int $2");
-	}
 
 	idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK;
 
@@ -3891,6 +3880,29 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return ret;
 }
 
+static const struct trace_print_flags vmx_exit_reasons_str[] = {
+	{ EXIT_REASON_EXCEPTION_NMI,           "exception" },
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,      "ext_irq" },
+	{ EXIT_REASON_TRIPLE_FAULT,            "triple_fault" },
+	{ EXIT_REASON_NMI_WINDOW,              "nmi_window" },
+	{ EXIT_REASON_IO_INSTRUCTION,          "io_instruction" },
+	{ EXIT_REASON_CR_ACCESS,               "cr_access" },
+	{ EXIT_REASON_DR_ACCESS,               "dr_access" },
+	{ EXIT_REASON_CPUID,                   "cpuid" },
+	{ EXIT_REASON_MSR_READ,                "rdmsr" },
+	{ EXIT_REASON_MSR_WRITE,               "wrmsr" },
+	{ EXIT_REASON_PENDING_INTERRUPT,       "interrupt_window" },
+	{ EXIT_REASON_HLT,                     "halt" },
+	{ EXIT_REASON_INVLPG,                  "invlpg" },
+	{ EXIT_REASON_VMCALL,                  "hypercall" },
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,     "tpr_below_thres" },
+	{ EXIT_REASON_APIC_ACCESS,             "apic_access" },
+	{ EXIT_REASON_WBINVD,                  "wbinvd" },
+	{ EXIT_REASON_TASK_SWITCH,             "task_switch" },
+	{ EXIT_REASON_EPT_VIOLATION,           "ept_violation" },
+	{ -1, NULL }
+};
+
 static struct kvm_x86_ops vmx_x86_ops = {
 	.cpu_has_kvm_support = cpu_has_kvm_support,
 	.disabled_by_bios = vmx_disabled_by_bios,
@@ -3950,6 +3962,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.set_tss_addr = vmx_set_tss_addr,
 	.get_tdp_level = get_ept_level,
 	.get_mt_mask = vmx_get_mt_mask,
+
+	.exit_reasons_str = vmx_exit_reasons_str,
 };
 
 static int __init vmx_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a066876f1373..892a7a60c815 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,8 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
@@ -347,9 +349,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr0);
 void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
 	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
-	KVMTRACE_1D(LMSW, vcpu,
-		    (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
-		    handler);
 }
 EXPORT_SYMBOL_GPL(kvm_lmsw);
 
@@ -2568,7 +2567,6 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
 
 int emulate_clts(struct kvm_vcpu *vcpu)
 {
-	KVMTRACE_0D(CLTS, vcpu, handler);
 	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS);
 	return X86EMUL_CONTINUE;
 }
@@ -2851,12 +2849,8 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.down = 0;
 	vcpu->arch.pio.rep = 0;
 
-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
-	else
-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+		      size, 1);
 
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
@@ -2892,12 +2886,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.down = down;
 	vcpu->arch.pio.rep = rep;
 
-	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
-		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
-	else
-		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
-			    handler);
+	trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+		      size, count);
 
 	if (!count) {
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
@@ -3075,7 +3065,6 @@ void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
-	KVMTRACE_0D(HLT, vcpu, handler);
 	if (irqchip_in_kernel(vcpu->kvm)) {
 		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
 		return 1;
@@ -3106,7 +3095,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	a2 = kvm_register_read(vcpu, VCPU_REGS_RDX);
 	a3 = kvm_register_read(vcpu, VCPU_REGS_RSI);
 
-	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+	trace_kvm_hypercall(nr, a0, a1, a2, a3);
 
 	if (!is_long_mode(vcpu)) {
 		nr &= 0xFFFFFFFF;
@@ -3206,8 +3195,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 		return 0;
 	}
-	KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value,
-		    (u32)((u64)value >> 32), handler);
 
 	return value;
 }
@@ -3215,9 +3202,6 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 		     unsigned long *rflags)
 {
-	KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val,
-		    (u32)((u64)val >> 32), handler);
-
 	switch (cr) {
 	case 0:
 		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
@@ -3327,11 +3311,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 		kvm_register_write(vcpu, VCPU_REGS_RDX, best->edx);
 	}
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
-	KVMTRACE_5D(CPUID, vcpu, function,
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RAX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RBX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RCX),
-		    (u32)kvm_register_read(vcpu, VCPU_REGS_RDX), handler);
+	trace_kvm_cpuid(function,
+			kvm_register_read(vcpu, VCPU_REGS_RAX),
+			kvm_register_read(vcpu, VCPU_REGS_RBX),
+			kvm_register_read(vcpu, VCPU_REGS_RCX),
+			kvm_register_read(vcpu, VCPU_REGS_RDX));
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -3527,7 +3511,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		set_debugreg(vcpu->arch.eff_db[3], 3);
 	}
 
-	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
+	trace_kvm_entry(vcpu->vcpu_id);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
 	if (unlikely(vcpu->arch.switch_db_regs)) {
@@ -4842,3 +4826,9 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
new file mode 100644
index 000000000000..d74b23d803f1
--- /dev/null
+++ b/include/trace/events/kvm.h
@@ -0,0 +1,57 @@
+#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_MAIN_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+#define TRACE_INCLUDE_FILE kvm
+
+#if defined(__KVM_HAVE_IOAPIC)
+TRACE_EVENT(kvm_set_irq,
+	TP_PROTO(unsigned int gsi),
+	TP_ARGS(gsi),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	gsi		)
+	),
+
+	TP_fast_assign(
+		__entry->gsi		= gsi;
+	),
+
+	TP_printk("gsi %u", __entry->gsi)
+);
+
+
+#define kvm_irqchips						\
+	{KVM_IRQCHIP_PIC_MASTER,	"PIC master"},		\
+	{KVM_IRQCHIP_PIC_SLAVE,		"PIC slave"},		\
+	{KVM_IRQCHIP_IOAPIC,		"IOAPIC"}
+
+TRACE_EVENT(kvm_ack_irq,
+	TP_PROTO(unsigned int irqchip, unsigned int pin),
+	TP_ARGS(irqchip, pin),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	irqchip		)
+		__field(	unsigned int,	pin		)
+	),
+
+	TP_fast_assign(
+		__entry->irqchip	= irqchip;
+		__entry->pin		= pin;
+	),
+
+	TP_printk("irqchip %s pin %u",
+		  __print_symbolic(__entry->irqchip, kvm_irqchips),
+		 __entry->pin)
+);
+
+
+
+#endif /* defined(__KVM_HAVE_IOAPIC) */
+#endif /* _TRACE_KVM_MAIN_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index bb8a1b5e41c1..94759ed96b64 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -20,6 +20,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
 #ifdef CONFIG_IA64
@@ -125,6 +126,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
+	trace_kvm_set_irq(irq);
+
 	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
 
 	if (irq < KVM_IOAPIC_NUM_PINS) {
@@ -161,6 +164,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	struct hlist_node *n;
 	unsigned gsi = pin;
 
+	trace_kvm_ack_irq(irqchip, pin);
+
 	list_for_each_entry(e, &kvm->irq_routing, link)
 		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
 		    e->irqchip.irqchip == irqchip &&
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 48d5e697bf44..04bdeddebdac 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,6 +59,9 @@
 #include "irq.h"
 #endif
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kvm.h>
+
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
@@ -2718,6 +2721,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 void kvm_exit(void)
 {
 	kvm_trace_cleanup();
+	tracepoint_synchronize_unregister();
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	sysdev_unregister(&kvm_sysdev);
-- 
cgit v1.2.3


From ec04b2604c3707a46db1d26d98f82b11d0844669 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 19 Jun 2009 15:16:23 +0200
Subject: KVM: Prepare memslot data structures for multiple hugepage sizes

[avi: fix build on non-x86]

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/include/asm/kvm_host.h    |  3 +-
 arch/powerpc/include/asm/kvm_host.h |  3 +-
 arch/s390/include/asm/kvm_host.h    |  6 +++-
 arch/x86/include/asm/kvm_host.h     | 12 ++++----
 arch/x86/kvm/mmu.c                  | 30 ++++++++++----------
 arch/x86/kvm/paging_tmpl.h          |  3 +-
 include/linux/kvm_host.h            |  2 +-
 virt/kvm/kvm_main.c                 | 56 ++++++++++++++++++++++++++-----------
 8 files changed, 73 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 9cf1c4b1f92f..d9b6325a9328 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -235,7 +235,8 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G		32
 #define KVM_REQ_RESUME		33
 
-#define KVM_PAGES_PER_HPAGE	1
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	1
 
 struct kvm;
 struct kvm_vcpu;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d4caa6127f55..c9c930ed11d7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,8 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
 /* We don't currently support large pages. */
-#define KVM_PAGES_PER_HPAGE (1UL << 31)
+#define KVM_NR_PAGE_SIZES	1
+#define KVM_PAGES_PER_HPAGE(x)	(1UL<<31)
 
 struct kvm;
 struct kvm_run;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 75535d4d7a05..78e07a622b45 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -40,7 +40,11 @@ struct sca_block {
 	struct sca_entry cpu[64];
 } __attribute__((packed));
 
-#define KVM_PAGES_PER_HPAGE 256
+#define KVM_NR_PAGE_SIZES 2
+#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + ((x) - 1) * 8)
+#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
 #define CPUSTAT_HOST       0x80000000
 #define CPUSTAT_WAIT       0x10000000
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 19027ab20412..30b625d8e5f0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -54,12 +54,12 @@
 #define INVALID_PAGE (~(hpa_t)0)
 #define UNMAPPED_GVA (~(gpa_t)0)
 
-/* shadow tables are PAE even on non-PAE hosts */
-#define KVM_HPAGE_SHIFT 21
-#define KVM_HPAGE_SIZE (1UL << KVM_HPAGE_SHIFT)
-#define KVM_HPAGE_MASK (~(KVM_HPAGE_SIZE - 1))
-
-#define KVM_PAGES_PER_HPAGE (KVM_HPAGE_SIZE / PAGE_SIZE)
+/* KVM Hugepage definitions for x86 */
+#define KVM_NR_PAGE_SIZES	2
+#define KVM_HPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
+#define KVM_HPAGE_SIZE(x)	(1UL << KVM_HPAGE_SHIFT(x))
+#define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
+#define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
 #define DE_VECTOR 0
 #define DB_VECTOR 1
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 12974de88aa5..b67585c1ef08 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -394,9 +394,9 @@ static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
 {
 	unsigned long idx;
 
-	idx = (gfn / KVM_PAGES_PER_HPAGE) -
-	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
-	return &slot->lpage_info[idx].write_count;
+	idx = (gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL)) -
+	      (slot->base_gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL));
+	return &slot->lpage_info[0][idx].write_count;
 }
 
 static void account_shadowed(struct kvm *kvm, gfn_t gfn)
@@ -485,10 +485,10 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
 	if (!lpage)
 		return &slot->rmap[gfn - slot->base_gfn];
 
-	idx = (gfn / KVM_PAGES_PER_HPAGE) -
-	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+	idx = (gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL)) -
+	      (slot->base_gfn / KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL));
 
-	return &slot->lpage_info[idx].rmap_pde;
+	return &slot->lpage_info[0][idx].rmap_pde;
 }
 
 /*
@@ -731,11 +731,11 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+			int idx = gfn_offset /
+			          KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL);
 			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
 			retval |= handler(kvm,
-					  &memslot->lpage_info[
-						  gfn_offset /
-						  KVM_PAGES_PER_HPAGE].rmap_pde);
+					&memslot->lpage_info[0][idx].rmap_pde);
 		}
 	}
 
@@ -1876,8 +1876,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 	pfn_t pfn;
 	unsigned long mmu_seq;
 
-	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+	if (is_largepage_backed(vcpu, gfn &
+			~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1))) {
+		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
 		largepage = 1;
 	}
 
@@ -2082,8 +2083,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
 	if (r)
 		return r;
 
-	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+	if (is_largepage_backed(vcpu, gfn &
+			~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1))) {
+		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
 		largepage = 1;
 	}
 	mmu_seq = vcpu->kvm->mmu_notifier_seq;
@@ -2485,7 +2487,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
 	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
-		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+		gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
 		vcpu->arch.update_pte.largepage = 1;
 	}
 	vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 322e8113aeea..53e129cec5fd 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -401,7 +401,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 
 	if (walker.level == PT_DIRECTORY_LEVEL) {
 		gfn_t large_gfn;
-		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
+		large_gfn = walker.gfn &
+			~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
 		if (is_largepage_backed(vcpu, large_gfn)) {
 			walker.gfn = large_gfn;
 			largepage = 1;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6988858dc56e..06af936a250a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -103,7 +103,7 @@ struct kvm_memory_slot {
 	struct {
 		unsigned long rmap_pde;
 		int write_count;
-	} *lpage_info;
+	} *lpage_info[KVM_NR_PAGE_SIZES - 1];
 	unsigned long userspace_addr;
 	int user_alloc;
 };
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1da8072d61b1..8361662e7e0a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1001,19 +1001,25 @@ out:
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
+	int i;
+
 	if (!dont || free->rmap != dont->rmap)
 		vfree(free->rmap);
 
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		vfree(free->dirty_bitmap);
 
-	if (!dont || free->lpage_info != dont->lpage_info)
-		vfree(free->lpage_info);
+
+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		if (!dont || free->lpage_info[i] != dont->lpage_info[i]) {
+			vfree(free->lpage_info[i]);
+			free->lpage_info[i] = NULL;
+		}
+	}
 
 	free->npages = 0;
 	free->dirty_bitmap = NULL;
 	free->rmap = NULL;
-	free->lpage_info = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -1087,7 +1093,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	int r;
 	gfn_t base_gfn;
 	unsigned long npages, ugfn;
-	unsigned long largepages, i;
+	int lpages;
+	unsigned long i, j;
 	struct kvm_memory_slot *memslot;
 	struct kvm_memory_slot old, new;
 
@@ -1161,33 +1168,48 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		else
 			new.userspace_addr = 0;
 	}
-	if (npages && !new.lpage_info) {
-		largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE;
-		largepages -= base_gfn / KVM_PAGES_PER_HPAGE;
+	if (!npages)
+		goto skip_lpage;
 
-		new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info));
+	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		int level = i + 2;
 
-		if (!new.lpage_info)
+		/* Avoid unused variable warning if no large pages */
+		(void)level;
+
+		if (new.lpage_info[i])
+			continue;
+
+		lpages = 1 + (base_gfn + npages - 1) /
+			     KVM_PAGES_PER_HPAGE(level);
+		lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level);
+
+		new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i]));
+
+		if (!new.lpage_info[i])
 			goto out_free;
 
-		memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info));
+		memset(new.lpage_info[i], 0,
+		       lpages * sizeof(*new.lpage_info[i]));
 
-		if (base_gfn % KVM_PAGES_PER_HPAGE)
-			new.lpage_info[0].write_count = 1;
-		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE)
-			new.lpage_info[largepages-1].write_count = 1;
+		if (base_gfn % KVM_PAGES_PER_HPAGE(level))
+			new.lpage_info[i][0].write_count = 1;
+		if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level))
+			new.lpage_info[i][lpages - 1].write_count = 1;
 		ugfn = new.userspace_addr >> PAGE_SHIFT;
 		/*
 		 * If the gfn and userspace address are not aligned wrt each
 		 * other, or if explicitly asked to, disable large page
 		 * support for this slot
 		 */
-		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1) ||
+		if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) ||
 		    !largepages_enabled)
-			for (i = 0; i < largepages; ++i)
-				new.lpage_info[i].write_count = 1;
+			for (j = 0; j < lpages; ++j)
+				new.lpage_info[i][j].write_count = 1;
 	}
 
+skip_lpage:
+
 	/* Allocate page dirty bitmap if needed */
 	if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) {
 		unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
-- 
cgit v1.2.3


From 2023a29cbe34139afcea8f65f8aef78c325c5dc0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 18 Jun 2009 11:47:28 -0300
Subject: KVM: remove old KVMTRACE support code

Return EOPNOTSUPP for KVM_TRACE_ENABLE/PAUSE/DISABLE ioctls.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/Kconfig     |   3 -
 arch/powerpc/kvm/Kconfig  |  11 --
 arch/powerpc/kvm/Makefile |   2 -
 arch/s390/kvm/Kconfig     |   3 -
 arch/x86/kvm/Kconfig      |  12 --
 arch/x86/kvm/Makefile     |   1 -
 include/linux/kvm.h       |  31 +----
 include/linux/kvm_host.h  |  31 -----
 virt/kvm/kvm_main.c       |   3 +-
 virt/kvm/kvm_trace.c      | 285 ----------------------------------------------
 10 files changed, 2 insertions(+), 380 deletions(-)
 delete mode 100644 virt/kvm/kvm_trace.c

(limited to 'include')

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index cbadd8a65233..ef3e7be29caf 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -47,9 +47,6 @@ config KVM_INTEL
 	  Provides support for KVM on Itanium 2 processors equipped with the VT
 	  extensions.
 
-config KVM_TRACE
-       bool
-
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 46019dccce1c..c29926846613 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -58,17 +58,6 @@ config KVM_E500
 
 	  If unsure, say N.
 
-config KVM_TRACE
-	bool "KVM trace support"
-	depends on KVM && MARKERS && SYSFS
-	select RELAY
-	select DEBUG_FS
-	default n
-	---help---
-	  This option allows reading a trace of kvm-related events through
-	  relayfs.  Note the ABI is not considered stable and will be
-	  modified in future updates.
-
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 4f407f2662f7..37655fe19f2f 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -8,8 +8,6 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
 
 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
-common-objs-$(CONFIG_KVM_TRACE)  += $(addprefix ../../../virt/kvm/, kvm_trace.o)
-
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_tlb.o := -I.
 CFLAGS_emulate.o  := -I.
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index ad75ce33be12..bf164fc21864 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -34,9 +34,6 @@ config KVM
 
 	  If unsure, say N.
 
-config KVM_TRACE
-       bool
-
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/virtio/Kconfig
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 7fbedfd34d6c..b84e571f4175 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -62,18 +62,6 @@ config KVM_AMD
 	  To compile this as a module, choose M here: the module
 	  will be called kvm-amd.
 
-config KVM_TRACE
-	bool "KVM trace support"
-	depends on KVM && SYSFS
-	select MARKERS
-	select RELAY
-	select DEBUG_FS
-	default n
-	---help---
-	  This option allows reading a trace of kvm-related events through
-	  relayfs.  Note the ABI is not considered stable and will be
-	  modified in future updates.
-
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 7c56850b82cb..afaaa7627d95 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,7 +7,6 @@ CFLAGS_vmx.o := -I.
 
 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
 				coalesced_mmio.o irq_comm.o eventfd.o)
-kvm-$(CONFIG_KVM_TRACE)	+= $(addprefix ../../../virt/kvm/, kvm_trace.o)
 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 
 kvm-y			+= x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 671051829da6..76c640834ea6 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,7 +14,7 @@
 
 #define KVM_API_VERSION 12
 
-/* for KVM_TRACE_ENABLE */
+/* for KVM_TRACE_ENABLE, deprecated */
 struct kvm_user_trace_setup {
 	__u32 buf_size; /* sub_buffer size of each per-cpu */
 	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
@@ -325,35 +325,6 @@ struct kvm_guest_debug {
 #define KVM_TRC_CYCLE_SIZE      8
 #define KVM_TRC_EXTRA_MAX       7
 
-/* This structure represents a single trace buffer record. */
-struct kvm_trace_rec {
-	/* variable rec_val
-	 * is split into:
-	 * bits 0 - 27  -> event id
-	 * bits 28 -30  -> number of extra data args of size u32
-	 * bits 31      -> binary indicator for if tsc is in record
-	 */
-	__u32 rec_val;
-	__u32 pid;
-	__u32 vcpu_id;
-	union {
-		struct {
-			__u64 timestamp;
-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
-		} __attribute__((packed)) timestamp;
-		struct {
-			__u32 extra_u32[KVM_TRC_EXTRA_MAX];
-		} notimestamp;
-	} u;
-};
-
-#define TRACE_REC_EVENT_ID(val) \
-		(0x0fffffff & (val))
-#define TRACE_REC_NUM_DATA_ARGS(val) \
-		(0x70000000 & ((val) << 28))
-#define TRACE_REC_TCS(val) \
-		(0x80000000 & ((val) << 31))
-
 #define KVMIO 0xAE
 
 /*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 06af936a250a..0604d56f6eed 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -482,37 +482,6 @@ struct kvm_stats_debugfs_item {
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
 
-#define KVMTRACE_5D(evt, vcpu, d1, d2, d3, d4, d5, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 5, d1, d2, d3, d4, d5)
-#define KVMTRACE_4D(evt, vcpu, d1, d2, d3, d4, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 4, d1, d2, d3, d4, 0)
-#define KVMTRACE_3D(evt, vcpu, d1, d2, d3, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 3, d1, d2, d3, 0, 0)
-#define KVMTRACE_2D(evt, vcpu, d1, d2, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 2, d1, d2, 0, 0, 0)
-#define KVMTRACE_1D(evt, vcpu, d1, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 1, d1, 0, 0, 0, 0)
-#define KVMTRACE_0D(evt, vcpu, name) \
-	trace_mark(kvm_trace_##name, "%u %p %u %u %u %u %u %u", KVM_TRC_##evt, \
-						vcpu, 0, 0, 0, 0, 0, 0)
-
-#ifdef CONFIG_KVM_TRACE
-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg);
-void kvm_trace_cleanup(void);
-#else
-static inline
-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-#define kvm_trace_cleanup() ((void)0)
-#endif
-
 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
 static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_seq)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f1e2e8c373c6..bbb4029d7c4d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2398,7 +2398,7 @@ static long kvm_dev_ioctl(struct file *filp,
 	case KVM_TRACE_ENABLE:
 	case KVM_TRACE_PAUSE:
 	case KVM_TRACE_DISABLE:
-		r = kvm_trace_ioctl(ioctl, arg);
+		r = -EOPNOTSUPP;
 		break;
 	default:
 		return kvm_arch_dev_ioctl(filp, ioctl, arg);
@@ -2748,7 +2748,6 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
-	kvm_trace_cleanup();
 	tracepoint_synchronize_unregister();
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
diff --git a/virt/kvm/kvm_trace.c b/virt/kvm/kvm_trace.c
deleted file mode 100644
index f59874446440..000000000000
--- a/virt/kvm/kvm_trace.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * kvm trace
- *
- * It is designed to allow debugging traces of kvm to be generated
- * on UP / SMP machines.  Each trace entry can be timestamped so that
- * it's possible to reconstruct a chronological record of trace events.
- * The implementation refers to blktrace kernel support.
- *
- * Copyright (c) 2008 Intel Corporation
- * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
- *
- * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
- *
- * Date:    Feb 2008
- */
-
-#include <linux/module.h>
-#include <linux/relay.h>
-#include <linux/debugfs.h>
-#include <linux/ktime.h>
-
-#include <linux/kvm_host.h>
-
-#define KVM_TRACE_STATE_RUNNING 	(1 << 0)
-#define KVM_TRACE_STATE_PAUSE 		(1 << 1)
-#define KVM_TRACE_STATE_CLEARUP 	(1 << 2)
-
-struct kvm_trace {
-	int trace_state;
-	struct rchan *rchan;
-	struct dentry *lost_file;
-	atomic_t lost_records;
-};
-static struct kvm_trace *kvm_trace;
-
-struct kvm_trace_probe {
-	const char *name;
-	const char *format;
-	u32 timestamp_in;
-	marker_probe_func *probe_func;
-};
-
-static inline int calc_rec_size(int timestamp, int extra)
-{
-	int rec_size = KVM_TRC_HEAD_SIZE;
-
-	rec_size += extra;
-	return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
-}
-
-static void kvm_add_trace(void *probe_private, void *call_data,
-			  const char *format, va_list *args)
-{
-	struct kvm_trace_probe *p = probe_private;
-	struct kvm_trace *kt = kvm_trace;
-	struct kvm_trace_rec rec;
-	struct kvm_vcpu *vcpu;
-	int    i, size;
-	u32    extra;
-
-	if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
-		return;
-
-	rec.rec_val	= TRACE_REC_EVENT_ID(va_arg(*args, u32));
-	vcpu		= va_arg(*args, struct kvm_vcpu *);
-	rec.pid		= current->tgid;
-	rec.vcpu_id	= vcpu->vcpu_id;
-
-	extra   	= va_arg(*args, u32);
-	WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
-	extra 		= min_t(u32, extra, KVM_TRC_EXTRA_MAX);
-
-	rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
-			| TRACE_REC_NUM_DATA_ARGS(extra);
-
-	if (p->timestamp_in) {
-		rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
-
-		for (i = 0; i < extra; i++)
-			rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
-	} else {
-		for (i = 0; i < extra; i++)
-			rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
-	}
-
-	size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
-	relay_write(kt->rchan, &rec, size);
-}
-
-static struct kvm_trace_probe kvm_trace_probes[] = {
-	{ "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
-	{ "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
-};
-
-static int lost_records_get(void *data, u64 *val)
-{
-	struct kvm_trace *kt = data;
-
-	*val = atomic_read(&kt->lost_records);
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
-
-/*
- *  The relay channel is used in "no-overwrite" mode, it keeps trace of how
- *  many times we encountered a full subbuffer, to tell user space app the
- *  lost records there were.
- */
-static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
-				     void *prev_subbuf, size_t prev_padding)
-{
-	struct kvm_trace *kt;
-
-	if (!relay_buf_full(buf)) {
-		if (!prev_subbuf) {
-			/*
-			 * executed only once when the channel is opened
-			 * save metadata as first record
-			 */
-			subbuf_start_reserve(buf, sizeof(u32));
-			*(u32 *)subbuf = 0x12345678;
-		}
-
-		return 1;
-	}
-
-	kt = buf->chan->private_data;
-	atomic_inc(&kt->lost_records);
-
-	return 0;
-}
-
-static struct dentry *kvm_create_buf_file_callack(const char *filename,
-						 struct dentry *parent,
-						 int mode,
-						 struct rchan_buf *buf,
-						 int *is_global)
-{
-	return debugfs_create_file(filename, mode, parent, buf,
-				   &relay_file_operations);
-}
-
-static int kvm_remove_buf_file_callback(struct dentry *dentry)
-{
-	debugfs_remove(dentry);
-	return 0;
-}
-
-static struct rchan_callbacks kvm_relay_callbacks = {
-	.subbuf_start 		= kvm_subbuf_start_callback,
-	.create_buf_file 	= kvm_create_buf_file_callack,
-	.remove_buf_file 	= kvm_remove_buf_file_callback,
-};
-
-static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
-{
-	struct kvm_trace *kt;
-	int i, r = -ENOMEM;
-
-	if (!kuts->buf_size || !kuts->buf_nr)
-		return -EINVAL;
-
-	kt = kzalloc(sizeof(*kt), GFP_KERNEL);
-	if (!kt)
-		goto err;
-
-	r = -EIO;
-	atomic_set(&kt->lost_records, 0);
-	kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
-					    kt, &kvm_trace_lost_ops);
-	if (!kt->lost_file)
-		goto err;
-
-	kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
-				kuts->buf_nr, &kvm_relay_callbacks, kt);
-	if (!kt->rchan)
-		goto err;
-
-	kvm_trace = kt;
-
-	for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
-		struct kvm_trace_probe *p = &kvm_trace_probes[i];
-
-		r = marker_probe_register(p->name, p->format, p->probe_func, p);
-		if (r)
-			printk(KERN_INFO "Unable to register probe %s\n",
-			       p->name);
-	}
-
-	kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
-
-	return 0;
-err:
-	if (kt) {
-		if (kt->lost_file)
-			debugfs_remove(kt->lost_file);
-		if (kt->rchan)
-			relay_close(kt->rchan);
-		kfree(kt);
-	}
-	return r;
-}
-
-static int kvm_trace_enable(char __user *arg)
-{
-	struct kvm_user_trace_setup kuts;
-	int ret;
-
-	ret = copy_from_user(&kuts, arg, sizeof(kuts));
-	if (ret)
-		return -EFAULT;
-
-	ret = do_kvm_trace_enable(&kuts);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-static int kvm_trace_pause(void)
-{
-	struct kvm_trace *kt = kvm_trace;
-	int r = -EINVAL;
-
-	if (kt == NULL)
-		return r;
-
-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
-		kt->trace_state = KVM_TRACE_STATE_PAUSE;
-		relay_flush(kt->rchan);
-		r = 0;
-	}
-
-	return r;
-}
-
-void kvm_trace_cleanup(void)
-{
-	struct kvm_trace *kt = kvm_trace;
-	int i;
-
-	if (kt == NULL)
-		return;
-
-	if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
-	    kt->trace_state == KVM_TRACE_STATE_PAUSE) {
-
-		kt->trace_state = KVM_TRACE_STATE_CLEARUP;
-
-		for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
-			struct kvm_trace_probe *p = &kvm_trace_probes[i];
-			marker_probe_unregister(p->name, p->probe_func, p);
-		}
-		marker_synchronize_unregister();
-
-		relay_close(kt->rchan);
-		debugfs_remove(kt->lost_file);
-		kfree(kt);
-	}
-}
-
-int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	long r = -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	switch (ioctl) {
-	case KVM_TRACE_ENABLE:
-		r = kvm_trace_enable(argp);
-		break;
-	case KVM_TRACE_PAUSE:
-		r = kvm_trace_pause();
-		break;
-	case KVM_TRACE_DISABLE:
-		r = 0;
-		kvm_trace_cleanup();
-		break;
-	}
-
-	return r;
-}
-- 
cgit v1.2.3


From d3efc8efdbaa81e9db3089810fcd526fccba234d Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 17 Jun 2009 10:07:59 -0300
Subject: KVM: use vcpu_id instead of bsp_vcpu pointer in kvm_vcpu_is_bsp

Change kvm_vcpu_is_bsp to use vcpu_id instead of bsp_vcpu pointer, which
is only initialized at the end of kvm_vm_ioctl_create_vcpu.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0604d56f6eed..4ea42c950539 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -538,7 +538,7 @@ static inline void kvm_irqfd_release(struct kvm *kvm) {}
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 {
-	return vcpu->kvm->bsp_vcpu == vcpu;
+	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
 }
 #endif
 #endif
-- 
cgit v1.2.3


From 6c474694530f377507f9aca438c17206e051e6e7 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 29 Jun 2009 22:24:26 +0300
Subject: KVM: convert bus to slots_lock

Use slots_lock to protect device list on the bus.  slots_lock is already
taken for read everywhere, so we only need to take it for write when
registering devices.  This is in preparation to removing in_range and
kvm->lock around it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c      |  5 +++--
 arch/x86/kvm/i8259.c      |  2 +-
 include/linux/kvm_host.h  |  5 ++++-
 virt/kvm/coalesced_mmio.c |  2 +-
 virt/kvm/ioapic.c         |  2 +-
 virt/kvm/kvm_main.c       | 12 +++++++++++-
 6 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index bcd00c76d69e..4082cdd468ed 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -583,6 +583,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
 	.in_range = speaker_in_range,
 };
 
+/* Caller must have writers lock on slots_lock */
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
@@ -621,11 +622,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	__kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+		__kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
 	}
 
 	return pit;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 148c52a608d6..1851aec8a7da 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -548,6 +548,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev);
+	kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
 	return s;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4ea42c950539..96c8c0b01929 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -42,6 +42,7 @@
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
+struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
@@ -61,7 +62,9 @@ void kvm_io_bus_init(struct kvm_io_bus *bus);
 void kvm_io_bus_destroy(struct kvm_io_bus *bus);
 struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
 					  gpa_t addr, int len, int is_write);
-void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+void __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+			       struct kvm_io_device *dev);
+void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
 			     struct kvm_io_device *dev);
 
 struct kvm_vcpu {
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index b40946c1237d..7b7cc9fe5ee3 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -102,7 +102,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
-	kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev);
+	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
 
 	return 0;
 }
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 0532fa68f5d1..0eca54e06326 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -343,7 +343,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev);
+	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
 	return 0;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bbb4029d7c4d..0edc366ecf89 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2527,7 +2527,17 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
 	return NULL;
 }
 
-void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev)
+void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+			     struct kvm_io_device *dev)
+{
+	down_write(&kvm->slots_lock);
+	__kvm_io_bus_register_dev(bus, dev);
+	up_write(&kvm->slots_lock);
+}
+
+/* An unlocked version. Caller must have write lock on slots_lock. */
+void __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+			     struct kvm_io_device *dev)
 {
 	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
 
-- 
cgit v1.2.3


From bda9020e2463ec94db9f97e8615f3bae22069838 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 29 Jun 2009 22:24:32 +0300
Subject: KVM: remove in_range from io devices

This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write
functions. in_range now becomes unused so it is removed from device ops in
favor of read/write callbacks performing range checks internally.

This allows aliasing (mostly for in-kernel virtio), as well as better error
handling by making it possible to pass errors up to userspace.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c  |  28 ++++--------
 arch/x86/kvm/i8254.c      |  49 +++++++++++----------
 arch/x86/kvm/i8259.c      |  20 +++++----
 arch/x86/kvm/lapic.c      |  44 +++++++++----------
 arch/x86/kvm/x86.c        | 110 ++++++++++++++--------------------------------
 include/linux/kvm_host.h  |   6 ++-
 virt/kvm/coalesced_mmio.c |  16 +++----
 virt/kvm/ioapic.c         |  22 +++++-----
 virt/kvm/iodev.h          |  39 +++++++---------
 virt/kvm/kvm_main.c       |  26 ++++++-----
 10 files changed, 152 insertions(+), 208 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5c766bd82b05..d7aa6bb8f477 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 
 }
 
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-					gpa_t addr, int len, int is_write)
-{
-	struct kvm_io_device *dev;
-
-	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write);
-
-	return dev;
-}
-
 static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct kvm_mmio_req *p;
 	struct kvm_io_device *mmio_dev;
+	int r;
 
 	p = kvm_get_vcpu_ioreq(vcpu);
 
@@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	kvm_run->exit_reason = KVM_EXIT_MMIO;
 	return 0;
 mmio:
-	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir);
-	if (mmio_dev) {
-		if (!p->dir)
-			kvm_iodevice_write(mmio_dev, p->addr, p->size,
-						&p->data);
-		else
-			kvm_iodevice_read(mmio_dev, p->addr, p->size,
-						&p->data);
-
-	} else
+	if (p->dir)
+		r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
+				    p->size, &p->data);
+	else
+		r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
+				     p->size, &p->data);
+	if (r)
 		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
 	p->state = STATE_IORESP_READY;
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4082cdd468ed..8c3ac30ef9bd 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -358,8 +358,14 @@ static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_pit, speaker_dev);
 }
 
-static void pit_ioport_write(struct kvm_io_device *this,
-			     gpa_t addr, int len, const void *data)
+static inline int pit_in_range(gpa_t addr)
+{
+	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static int pit_ioport_write(struct kvm_io_device *this,
+			    gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
@@ -367,6 +373,8 @@ static void pit_ioport_write(struct kvm_io_device *this,
 	int channel, access;
 	struct kvm_kpit_channel_state *s;
 	u32 val = *(u32 *) data;
+	if (!pit_in_range(addr))
+		return -EOPNOTSUPP;
 
 	val  &= 0xff;
 	addr &= KVM_PIT_CHANNEL_MASK;
@@ -429,16 +437,19 @@ static void pit_ioport_write(struct kvm_io_device *this,
 	}
 
 	mutex_unlock(&pit_state->lock);
+	return 0;
 }
 
-static void pit_ioport_read(struct kvm_io_device *this,
-			    gpa_t addr, int len, void *data)
+static int pit_ioport_read(struct kvm_io_device *this,
+			   gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = dev_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
 	struct kvm *kvm = pit->kvm;
 	int ret, count;
 	struct kvm_kpit_channel_state *s;
+	if (!pit_in_range(addr))
+		return -EOPNOTSUPP;
 
 	addr &= KVM_PIT_CHANNEL_MASK;
 	s = &pit_state->channels[addr];
@@ -493,37 +504,36 @@ static void pit_ioport_read(struct kvm_io_device *this,
 	memcpy(data, (char *)&ret, len);
 
 	mutex_unlock(&pit_state->lock);
+	return 0;
 }
 
-static int pit_in_range(struct kvm_io_device *this, gpa_t addr,
-			int len, int is_write)
-{
-	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
-		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
-}
-
-static void speaker_ioport_write(struct kvm_io_device *this,
-				 gpa_t addr, int len, const void *data)
+static int speaker_ioport_write(struct kvm_io_device *this,
+				gpa_t addr, int len, const void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
 	struct kvm *kvm = pit->kvm;
 	u32 val = *(u32 *) data;
+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
+		return -EOPNOTSUPP;
 
 	mutex_lock(&pit_state->lock);
 	pit_state->speaker_data_on = (val >> 1) & 1;
 	pit_set_gate(kvm, 2, val & 1);
 	mutex_unlock(&pit_state->lock);
+	return 0;
 }
 
-static void speaker_ioport_read(struct kvm_io_device *this,
-				gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_io_device *this,
+			       gpa_t addr, int len, void *data)
 {
 	struct kvm_pit *pit = speaker_to_pit(this);
 	struct kvm_kpit_state *pit_state = &pit->pit_state;
 	struct kvm *kvm = pit->kvm;
 	unsigned int refresh_clock;
 	int ret;
+	if (addr != KVM_SPEAKER_BASE_ADDRESS)
+		return -EOPNOTSUPP;
 
 	/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
 	refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
@@ -535,12 +545,7 @@ static void speaker_ioport_read(struct kvm_io_device *this,
 		len = sizeof(ret);
 	memcpy(data, (char *)&ret, len);
 	mutex_unlock(&pit_state->lock);
-}
-
-static int speaker_in_range(struct kvm_io_device *this, gpa_t addr,
-			    int len, int is_write)
-{
-	return (addr == KVM_SPEAKER_BASE_ADDRESS);
+	return 0;
 }
 
 void kvm_pit_reset(struct kvm_pit *pit)
@@ -574,13 +579,11 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 static const struct kvm_io_device_ops pit_dev_ops = {
 	.read     = pit_ioport_read,
 	.write    = pit_ioport_write,
-	.in_range = pit_in_range,
 };
 
 static const struct kvm_io_device_ops speaker_dev_ops = {
 	.read     = speaker_ioport_read,
 	.write    = speaker_ioport_write,
-	.in_range = speaker_in_range,
 };
 
 /* Caller must have writers lock on slots_lock */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 1851aec8a7da..1d1bb75dc7bc 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -430,8 +430,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
 	return s->elcr;
 }
 
-static int picdev_in_range(struct kvm_io_device *this, gpa_t addr,
-			   int len, int is_write)
+static int picdev_in_range(gpa_t addr)
 {
 	switch (addr) {
 	case 0x20:
@@ -451,16 +450,18 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_pic, dev);
 }
 
-static void picdev_write(struct kvm_io_device *this,
+static int picdev_write(struct kvm_io_device *this,
 			 gpa_t addr, int len, const void *val)
 {
 	struct kvm_pic *s = to_pic(this);
 	unsigned char data = *(unsigned char *)val;
+	if (!picdev_in_range(addr))
+		return -EOPNOTSUPP;
 
 	if (len != 1) {
 		if (printk_ratelimit())
 			printk(KERN_ERR "PIC: non byte write\n");
-		return;
+		return 0;
 	}
 	pic_lock(s);
 	switch (addr) {
@@ -476,18 +477,21 @@ static void picdev_write(struct kvm_io_device *this,
 		break;
 	}
 	pic_unlock(s);
+	return 0;
 }
 
-static void picdev_read(struct kvm_io_device *this,
-			gpa_t addr, int len, void *val)
+static int picdev_read(struct kvm_io_device *this,
+		       gpa_t addr, int len, void *val)
 {
 	struct kvm_pic *s = to_pic(this);
 	unsigned char data = 0;
+	if (!picdev_in_range(addr))
+		return -EOPNOTSUPP;
 
 	if (len != 1) {
 		if (printk_ratelimit())
 			printk(KERN_ERR "PIC: non byte read\n");
-		return;
+		return 0;
 	}
 	pic_lock(s);
 	switch (addr) {
@@ -504,6 +508,7 @@ static void picdev_read(struct kvm_io_device *this,
 	}
 	*(unsigned char *)val = data;
 	pic_unlock(s);
+	return 0;
 }
 
 /*
@@ -526,7 +531,6 @@ static void pic_irq_request(void *opaque, int level)
 static const struct kvm_io_device_ops picdev_ops = {
 	.read     = picdev_read,
 	.write    = picdev_write,
-	.in_range = picdev_in_range,
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2e0286596387..265a765f038f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -546,18 +546,27 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_lapic, dev);
 }
 
-static void apic_mmio_read(struct kvm_io_device *this,
-			   gpa_t address, int len, void *data)
+static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
+{
+	return apic_hw_enabled(apic) &&
+	    addr >= apic->base_address &&
+	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
+}
+
+static int apic_mmio_read(struct kvm_io_device *this,
+			  gpa_t address, int len, void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
 	unsigned int offset = address - apic->base_address;
 	unsigned char alignment = offset & 0xf;
 	u32 result;
+	if (!apic_mmio_in_range(apic, address))
+		return -EOPNOTSUPP;
 
 	if ((alignment + len) > 4) {
 		printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d",
 		       (unsigned long)address, len);
-		return;
+		return 0;
 	}
 	result = __apic_read(apic, offset & ~0xf);
 
@@ -574,6 +583,7 @@ static void apic_mmio_read(struct kvm_io_device *this,
 		       "should be 1,2, or 4 instead\n", len);
 		break;
 	}
+	return 0;
 }
 
 static void update_divide_count(struct kvm_lapic *apic)
@@ -629,13 +639,15 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 		apic->vcpu->kvm->arch.vapics_in_nmi_mode--;
 }
 
-static void apic_mmio_write(struct kvm_io_device *this,
-			    gpa_t address, int len, const void *data)
+static int apic_mmio_write(struct kvm_io_device *this,
+			   gpa_t address, int len, const void *data)
 {
 	struct kvm_lapic *apic = to_lapic(this);
 	unsigned int offset = address - apic->base_address;
 	unsigned char alignment = offset & 0xf;
 	u32 val;
+	if (!apic_mmio_in_range(apic, address))
+		return -EOPNOTSUPP;
 
 	/*
 	 * APIC register must be aligned on 128-bits boundary.
@@ -646,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		/* Don't shout loud, $infamous_os would cause only noise. */
 		apic_debug("apic write: bad size=%d %lx\n",
 			   len, (long)address);
-		return;
+		return 0;
 	}
 
 	val = *(u32 *) data;
@@ -729,7 +741,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		hrtimer_cancel(&apic->lapic_timer.timer);
 		apic_set_reg(apic, APIC_TMICT, val);
 		start_apic_timer(apic);
-		return;
+		return 0;
 
 	case APIC_TDCR:
 		if (val & 4)
@@ -743,22 +755,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 			   offset);
 		break;
 	}
-
-}
-
-static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr,
-			   int len, int size)
-{
-	struct kvm_lapic *apic = to_lapic(this);
-	int ret = 0;
-
-
-	if (apic_hw_enabled(apic) &&
-	    (addr >= apic->base_address) &&
-	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
-		ret = 1;
-
-	return ret;
+	return 0;
 }
 
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
@@ -938,7 +935,6 @@ static struct kvm_timer_ops lapic_timer_ops = {
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
-	.in_range = apic_mmio_range,
 };
 
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7ce6367c1976..96f0ae7d97b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2333,35 +2333,23 @@ static void kvm_init_msr_list(void)
 	num_msrs_to_save = j;
 }
 
-/*
- * Only apic need an MMIO device hook, so shortcut now..
- */
-static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu,
-						gpa_t addr, int len,
-						int is_write)
+static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
+			   const void *v)
 {
-	struct kvm_io_device *dev;
+	if (vcpu->arch.apic &&
+	    !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v))
+		return 0;
 
-	if (vcpu->arch.apic) {
-		dev = &vcpu->arch.apic->dev;
-		if (kvm_iodevice_in_range(dev, addr, len, is_write))
-			return dev;
-	}
-	return NULL;
+	return kvm_io_bus_write(&vcpu->kvm->mmio_bus, addr, len, v);
 }
 
-
-static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
-						gpa_t addr, int len,
-						int is_write)
+static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 {
-	struct kvm_io_device *dev;
+	if (vcpu->arch.apic &&
+	    !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v))
+		return 0;
 
-	dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write);
-	if (dev == NULL)
-		dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len,
-					  is_write);
-	return dev;
+	return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
 }
 
 static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
@@ -2430,7 +2418,6 @@ static int emulator_read_emulated(unsigned long addr,
 				  unsigned int bytes,
 				  struct kvm_vcpu *vcpu)
 {
-	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
 	if (vcpu->mmio_read_completed) {
@@ -2455,13 +2442,8 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	mutex_lock(&vcpu->kvm->lock);
-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0);
-	mutex_unlock(&vcpu->kvm->lock);
-	if (mmio_dev) {
-		kvm_iodevice_read(mmio_dev, gpa, bytes, val);
+	if (!vcpu_mmio_read(vcpu, gpa, bytes, val))
 		return X86EMUL_CONTINUE;
-	}
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2488,7 +2470,6 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 					   unsigned int bytes,
 					   struct kvm_vcpu *vcpu)
 {
-	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
@@ -2509,13 +2490,8 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	mutex_lock(&vcpu->kvm->lock);
-	mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1);
-	mutex_unlock(&vcpu->kvm->lock);
-	if (mmio_dev) {
-		kvm_iodevice_write(mmio_dev, gpa, bytes, val);
+	if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
 		return X86EMUL_CONTINUE;
-	}
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2850,48 +2826,40 @@ int complete_pio(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void kernel_pio(struct kvm_io_device *pio_dev,
-		       struct kvm_vcpu *vcpu,
-		       void *pd)
+static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
 {
 	/* TODO: String I/O for in kernel device */
+	int r;
 
 	if (vcpu->arch.pio.in)
-		kvm_iodevice_read(pio_dev, vcpu->arch.pio.port,
-				  vcpu->arch.pio.size,
-				  pd);
+		r = kvm_io_bus_read(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+				    vcpu->arch.pio.size, pd);
 	else
-		kvm_iodevice_write(pio_dev, vcpu->arch.pio.port,
-				   vcpu->arch.pio.size,
-				   pd);
+		r = kvm_io_bus_write(&vcpu->kvm->pio_bus, vcpu->arch.pio.port,
+				     vcpu->arch.pio.size, pd);
+	return r;
 }
 
-static void pio_string_write(struct kvm_io_device *pio_dev,
-			     struct kvm_vcpu *vcpu)
+static int pio_string_write(struct kvm_vcpu *vcpu)
 {
 	struct kvm_pio_request *io = &vcpu->arch.pio;
 	void *pd = vcpu->arch.pio_data;
-	int i;
+	int i, r = 0;
 
 	for (i = 0; i < io->cur_count; i++) {
-		kvm_iodevice_write(pio_dev, io->port,
-				   io->size,
-				   pd);
+		if (kvm_io_bus_write(&vcpu->kvm->pio_bus,
+				     io->port, io->size, pd)) {
+			r = -EOPNOTSUPP;
+			break;
+		}
 		pd += io->size;
 	}
-}
-
-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
-					       gpa_t addr, int len,
-					       int is_write)
-{
-	return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write);
+	return r;
 }
 
 int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		  int size, unsigned port)
 {
-	struct kvm_io_device *pio_dev;
 	unsigned long val;
 
 	vcpu->run->exit_reason = KVM_EXIT_IO;
@@ -2911,11 +2879,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	val = kvm_register_read(vcpu, VCPU_REGS_RAX);
 	memcpy(vcpu->arch.pio_data, &val, 4);
 
-	mutex_lock(&vcpu->kvm->lock);
-	pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in);
-	mutex_unlock(&vcpu->kvm->lock);
-	if (pio_dev) {
-		kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data);
+	if (!kernel_pio(vcpu, vcpu->arch.pio_data)) {
 		complete_pio(vcpu);
 		return 1;
 	}
@@ -2929,7 +2893,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 {
 	unsigned now, in_page;
 	int ret = 0;
-	struct kvm_io_device *pio_dev;
 
 	vcpu->run->exit_reason = KVM_EXIT_IO;
 	vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
@@ -2973,12 +2936,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
 	vcpu->arch.pio.guest_gva = address;
 
-	mutex_lock(&vcpu->kvm->lock);
-	pio_dev = vcpu_find_pio_dev(vcpu, port,
-				    vcpu->arch.pio.cur_count,
-				    !vcpu->arch.pio.in);
-	mutex_unlock(&vcpu->kvm->lock);
-
 	if (!vcpu->arch.pio.in) {
 		/* string PIO write */
 		ret = pio_copy_data(vcpu);
@@ -2986,16 +2943,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 			kvm_inject_gp(vcpu, 0);
 			return 1;
 		}
-		if (ret == 0 && pio_dev) {
-			pio_string_write(pio_dev, vcpu);
+		if (ret == 0 && !pio_string_write(vcpu)) {
 			complete_pio(vcpu);
 			if (vcpu->arch.pio.count == 0)
 				ret = 1;
 		}
-	} else if (pio_dev)
-		pr_unimpl(vcpu, "no string pio read support yet, "
-		       "port %x size %d count %ld\n",
-			port, size, count);
+	}
+	/* no string PIO read support yet */
 
 	return ret;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 96c8c0b01929..077e8bb875a9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -60,8 +60,10 @@ struct kvm_io_bus {
 
 void kvm_io_bus_init(struct kvm_io_bus *bus);
 void kvm_io_bus_destroy(struct kvm_io_bus *bus);
-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
-					  gpa_t addr, int len, int is_write);
+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
+		     const void *val);
+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
+		    void *val);
 void __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 			       struct kvm_io_device *dev);
 void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 7b7cc9fe5ee3..0352f81ecc0b 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -19,18 +19,14 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_coalesced_mmio_dev, dev);
 }
 
-static int coalesced_mmio_in_range(struct kvm_io_device *this,
-				   gpa_t addr, int len, int is_write)
+static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
+				   gpa_t addr, int len)
 {
-	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_zone *zone;
 	struct kvm_coalesced_mmio_ring *ring;
 	unsigned avail;
 	int i;
 
-	if (!is_write)
-		return 0;
-
 	/* Are we able to batch it ? */
 
 	/* last is the first free entry
@@ -60,11 +56,13 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this,
 	return 0;
 }
 
-static void coalesced_mmio_write(struct kvm_io_device *this,
-				 gpa_t addr, int len, const void *val)
+static int coalesced_mmio_write(struct kvm_io_device *this,
+				gpa_t addr, int len, const void *val)
 {
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+	if (!coalesced_mmio_in_range(dev, addr, len))
+		return -EOPNOTSUPP;
 
 	spin_lock(&dev->lock);
 
@@ -76,6 +74,7 @@ static void coalesced_mmio_write(struct kvm_io_device *this,
 	smp_wmb();
 	ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
 	spin_unlock(&dev->lock);
+	return 0;
 }
 
 static void coalesced_mmio_destructor(struct kvm_io_device *this)
@@ -87,7 +86,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
 
 static const struct kvm_io_device_ops coalesced_mmio_ops = {
 	.write      = coalesced_mmio_write,
-	.in_range   = coalesced_mmio_in_range,
 	.destructor = coalesced_mmio_destructor,
 };
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 0eca54e06326..ddf6aa998b18 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -227,20 +227,19 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_ioapic, dev);
 }
 
-static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr,
-			   int len, int is_write)
+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
 {
-	struct kvm_ioapic *ioapic = to_ioapic(this);
-
 	return ((addr >= ioapic->base_address &&
 		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
 
-static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-			     void *val)
+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+			    void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 result;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
 
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
@@ -273,13 +272,16 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
 	mutex_unlock(&ioapic->kvm->irq_lock);
+	return 0;
 }
 
-static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			      const void *val)
+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+			     const void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
 
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
@@ -290,7 +292,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		data = *(u32 *) val;
 	else {
 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		return;
+		return 0;
 	}
 
 	addr &= 0xff;
@@ -312,6 +314,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		break;
 	}
 	mutex_unlock(&ioapic->kvm->irq_lock);
+	return 0;
 }
 
 void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
@@ -329,7 +332,6 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
 static const struct kvm_io_device_ops ioapic_mmio_ops = {
 	.read     = ioapic_mmio_read,
 	.write    = ioapic_mmio_write,
-	.in_range = ioapic_in_range,
 };
 
 int kvm_ioapic_init(struct kvm *kvm)
diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h
index 06e38b23fa61..12fd3caffd2b 100644
--- a/virt/kvm/iodev.h
+++ b/virt/kvm/iodev.h
@@ -17,23 +17,24 @@
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
+#include <asm/errno.h>
 
 struct kvm_io_device;
 
 /**
  * kvm_io_device_ops are called under kvm slots_lock.
+ * read and write handlers return 0 if the transaction has been handled,
+ * or non-zero to have it passed to the next device.
  **/
 struct kvm_io_device_ops {
-	void (*read)(struct kvm_io_device *this,
+	int (*read)(struct kvm_io_device *this,
+		    gpa_t addr,
+		    int len,
+		    void *val);
+	int (*write)(struct kvm_io_device *this,
 		     gpa_t addr,
 		     int len,
-		     void *val);
-	void (*write)(struct kvm_io_device *this,
-		      gpa_t addr,
-		      int len,
-		      const void *val);
-	int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len,
-			int is_write);
+		     const void *val);
 	void (*destructor)(struct kvm_io_device *this);
 };
 
@@ -48,26 +49,16 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev,
 	dev->ops = ops;
 }
 
-static inline void kvm_iodevice_read(struct kvm_io_device *dev,
-				     gpa_t addr,
-				     int len,
-				     void *val)
+static inline int kvm_iodevice_read(struct kvm_io_device *dev,
+				    gpa_t addr, int l, void *v)
 {
-	dev->ops->read(dev, addr, len, val);
+	return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
 }
 
-static inline void kvm_iodevice_write(struct kvm_io_device *dev,
-				      gpa_t addr,
-				      int len,
-				      const void *val)
+static inline int kvm_iodevice_write(struct kvm_io_device *dev,
+				     gpa_t addr, int l, const void *v)
 {
-	dev->ops->write(dev, addr, len, val);
-}
-
-static inline int kvm_iodevice_in_range(struct kvm_io_device *dev,
-					gpa_t addr, int len, int is_write)
-{
-	return dev->ops->in_range(dev, addr, len, is_write);
+	return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
 }
 
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0edc366ecf89..594606526620 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2512,19 +2512,25 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 	}
 }
 
-struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
-					  gpa_t addr, int len, int is_write)
+/* kvm_io_bus_write - called under kvm->slots_lock */
+int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
+		     int len, const void *val)
 {
 	int i;
+	for (i = 0; i < bus->dev_count; i++)
+		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+			return 0;
+	return -EOPNOTSUPP;
+}
 
-	for (i = 0; i < bus->dev_count; i++) {
-		struct kvm_io_device *pos = bus->devs[i];
-
-		if (kvm_iodevice_in_range(pos, addr, len, is_write))
-			return pos;
-	}
-
-	return NULL;
+/* kvm_io_bus_read - called under kvm->slots_lock */
+int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
+{
+	int i;
+	for (i = 0; i < bus->dev_count; i++)
+		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+			return 0;
+	return -EOPNOTSUPP;
 }
 
 void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-- 
cgit v1.2.3


From ae8c1c4025c2b780616586c3f86a3374a154ef90 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 1 Jul 2009 12:09:41 +0300
Subject: KVM: Trace irq level and source id

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/trace/events/kvm.h | 11 ++++++++---
 virt/kvm/irq_comm.c        |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index d74b23d803f1..035232dc84e0 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -9,18 +9,23 @@
 
 #if defined(__KVM_HAVE_IOAPIC)
 TRACE_EVENT(kvm_set_irq,
-	TP_PROTO(unsigned int gsi),
-	TP_ARGS(gsi),
+	TP_PROTO(unsigned int gsi, int level, int irq_source_id),
+	TP_ARGS(gsi, level, irq_source_id),
 
 	TP_STRUCT__entry(
 		__field(	unsigned int,	gsi		)
+		__field(	int,		level		)
+		__field(	int,		irq_source_id	)
 	),
 
 	TP_fast_assign(
 		__entry->gsi		= gsi;
+		__entry->level		= level;
+		__entry->irq_source_id	= irq_source_id;
 	),
 
-	TP_printk("gsi %u", __entry->gsi)
+	TP_printk("gsi %u level %d source %d",
+		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
 
 
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 94759ed96b64..56e696104d43 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -126,7 +126,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
 	unsigned long *irq_state, sig_level;
 	int ret = -1;
 
-	trace_kvm_set_irq(irq);
+	trace_kvm_set_irq(irq, level, irq_source_id);
 
 	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
 
-- 
cgit v1.2.3


From aec51dc4f1584018d7e35269e04e3dde3d2033e6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 1 Jul 2009 16:01:02 +0300
Subject: KVM: Trace mmio

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c         | 11 ++++++++++-
 include/trace/events/kvm.h | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index af40e23df251..d32e3c6d3175 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,8 @@
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
 #include <linux/cpufreq.h>
+#include <trace/events/kvm.h>
+#undef TRACE_INCLUDE_FILE
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
@@ -2425,6 +2427,8 @@ static int emulator_read_emulated(unsigned long addr,
 
 	if (vcpu->mmio_read_completed) {
 		memcpy(val, vcpu->mmio_data, bytes);
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
+			       vcpu->mmio_phys_addr, *(u64 *)val);
 		vcpu->mmio_read_completed = 0;
 		return X86EMUL_CONTINUE;
 	}
@@ -2445,8 +2449,12 @@ mmio:
 	/*
 	 * Is this MMIO handled locally?
 	 */
-	if (!vcpu_mmio_read(vcpu, gpa, bytes, val))
+	if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, gpa, *(u64 *)val);
 		return X86EMUL_CONTINUE;
+	}
+
+	trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
 
 	vcpu->mmio_needed = 1;
 	vcpu->mmio_phys_addr = gpa;
@@ -2490,6 +2498,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 		return X86EMUL_CONTINUE;
 
 mmio:
+	trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
 	/*
 	 * Is this MMIO handled locally?
 	 */
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 035232dc84e0..77022af48492 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -56,6 +56,39 @@ TRACE_EVENT(kvm_ack_irq,
 
 
 #endif /* defined(__KVM_HAVE_IOAPIC) */
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+#define kvm_trace_symbol_mmio \
+	{ KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \
+	{ KVM_TRACE_MMIO_READ, "read" }, \
+	{ KVM_TRACE_MMIO_WRITE, "write" }
+
+TRACE_EVENT(kvm_mmio,
+	TP_PROTO(int type, int len, u64 gpa, u64 val),
+	TP_ARGS(type, len, gpa, val),
+
+	TP_STRUCT__entry(
+		__field(	u32,	type		)
+		__field(	u32,	len		)
+		__field(	u64,	gpa		)
+		__field(	u64,	val		)
+	),
+
+	TP_fast_assign(
+		__entry->type		= type;
+		__entry->len		= len;
+		__entry->gpa		= gpa;
+		__entry->val		= val;
+	),
+
+	TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx",
+		  __print_symbolic(__entry->type, kvm_trace_symbol_mmio),
+		  __entry->len, __entry->gpa, __entry->val)
+);
+
 #endif /* _TRACE_KVM_MAIN_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 1000ff8d893765d7b56e32fe16dbe4814f172588 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Tue, 7 Jul 2009 16:00:57 +0300
Subject: KVM: Add trace points in irqchip code

Add tracepoint in msi/ioapic/pic set_irq() functions,
in IPI sending and in the point where IRQ is placed into
apic's IRR.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c       |  3 ++
 arch/x86/kvm/lapic.c       |  4 +++
 arch/x86/kvm/trace.h       | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 include/trace/events/kvm.h | 56 ++++++++++++++++++++++++++++++
 virt/kvm/ioapic.c          |  2 ++
 virt/kvm/irq_comm.c        |  2 ++
 6 files changed, 152 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 1d1bb75dc7bc..e4bcbddecb36 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -30,6 +30,7 @@
 #include "irq.h"
 
 #include <linux/kvm_host.h>
+#include "trace.h"
 
 static void pic_lock(struct kvm_pic *s)
 	__acquires(&s->lock)
@@ -190,6 +191,8 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
+		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
+				      s->pics[irq >> 3].imr, ret == 0);
 	}
 	pic_unlock(s);
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6c8460308548..5d697602048b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -375,6 +375,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 			break;
 
 		result = !apic_test_and_set_irr(vector, apic);
+		trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
+					  trig_mode, vector, result);
 		if (!result) {
 			if (trig_mode)
 				apic_debug("level trig mode repeatedly for "
@@ -493,6 +495,8 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 	else
 		irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
 
+	trace_kvm_apic_ipi(icr_low, irq.dest_id);
+
 	apic_debug("icr_high 0x%x, icr_low 0x%x, "
 		   "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
 		   "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n",
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6c2c87fa6e4f..0d480e77eacf 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -264,6 +264,91 @@ TRACE_EVENT(kvm_cr,
 #define trace_kvm_cr_read(cr, val)		trace_kvm_cr(0, cr, val)
 #define trace_kvm_cr_write(cr, val)		trace_kvm_cr(1, cr, val)
 
+TRACE_EVENT(kvm_pic_set_irq,
+	    TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced),
+	    TP_ARGS(chip, pin, elcr, imr, coalesced),
+
+	TP_STRUCT__entry(
+		__field(	__u8,		chip		)
+		__field(	__u8,		pin		)
+		__field(	__u8,		elcr		)
+		__field(	__u8,		imr		)
+		__field(	bool,		coalesced	)
+	),
+
+	TP_fast_assign(
+		__entry->chip		= chip;
+		__entry->pin		= pin;
+		__entry->elcr		= elcr;
+		__entry->imr		= imr;
+		__entry->coalesced	= coalesced;
+	),
+
+	TP_printk("chip %u pin %u (%s%s)%s",
+		  __entry->chip, __entry->pin,
+		  (__entry->elcr & (1 << __entry->pin)) ? "level":"edge",
+		  (__entry->imr & (1 << __entry->pin)) ? "|masked":"",
+		  __entry->coalesced ? " (coalesced)" : "")
+);
+
+#define kvm_apic_dst_shorthand		\
+	{0x0, "dst"},			\
+	{0x1, "self"},			\
+	{0x2, "all"},			\
+	{0x3, "all-but-self"}
+
+TRACE_EVENT(kvm_apic_ipi,
+	    TP_PROTO(__u32 icr_low, __u32 dest_id),
+	    TP_ARGS(icr_low, dest_id),
+
+	TP_STRUCT__entry(
+		__field(	__u32,		icr_low		)
+		__field(	__u32,		dest_id		)
+	),
+
+	TP_fast_assign(
+		__entry->icr_low	= icr_low;
+		__entry->dest_id	= dest_id;
+	),
+
+	TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)",
+		  __entry->dest_id, (u8)__entry->icr_low,
+		  __print_symbolic((__entry->icr_low >> 8 & 0x7),
+				   kvm_deliver_mode),
+		  (__entry->icr_low & (1<<11)) ? "logical" : "physical",
+		  (__entry->icr_low & (1<<14)) ? "assert" : "de-assert",
+		  (__entry->icr_low & (1<<15)) ? "level" : "edge",
+		  __print_symbolic((__entry->icr_low >> 18 & 0x3),
+				   kvm_apic_dst_shorthand))
+);
+
+TRACE_EVENT(kvm_apic_accept_irq,
+	    TP_PROTO(__u32 apicid, __u16 dm, __u8 tm, __u8 vec, bool coalesced),
+	    TP_ARGS(apicid, dm, tm, vec, coalesced),
+
+	TP_STRUCT__entry(
+		__field(	__u32,		apicid		)
+		__field(	__u16,		dm		)
+		__field(	__u8,		tm		)
+		__field(	__u8,		vec		)
+		__field(	bool,		coalesced	)
+	),
+
+	TP_fast_assign(
+		__entry->apicid		= apicid;
+		__entry->dm		= dm;
+		__entry->tm		= tm;
+		__entry->vec		= vec;
+		__entry->coalesced	= coalesced;
+	),
+
+	TP_printk("apicid %x vec %u (%s|%s)%s",
+		  __entry->apicid, __entry->vec,
+		  __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode),
+		  __entry->tm ? "level" : "edge",
+		  __entry->coalesced ? " (coalesced)" : "")
+);
+
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h
index 77022af48492..dbe108455275 100644
--- a/include/trace/events/kvm.h
+++ b/include/trace/events/kvm.h
@@ -28,6 +28,62 @@ TRACE_EVENT(kvm_set_irq,
 		  __entry->gsi, __entry->level, __entry->irq_source_id)
 );
 
+#define kvm_deliver_mode		\
+	{0x0, "Fixed"},			\
+	{0x1, "LowPrio"},		\
+	{0x2, "SMI"},			\
+	{0x3, "Res3"},			\
+	{0x4, "NMI"},			\
+	{0x5, "INIT"},			\
+	{0x6, "SIPI"},			\
+	{0x7, "ExtINT"}
+
+TRACE_EVENT(kvm_ioapic_set_irq,
+	    TP_PROTO(__u64 e, int pin, bool coalesced),
+	    TP_ARGS(e, pin, coalesced),
+
+	TP_STRUCT__entry(
+		__field(	__u64,		e		)
+		__field(	int,		pin		)
+		__field(	bool,		coalesced	)
+	),
+
+	TP_fast_assign(
+		__entry->e		= e;
+		__entry->pin		= pin;
+		__entry->coalesced	= coalesced;
+	),
+
+	TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s",
+		  __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e,
+		  __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode),
+		  (__entry->e & (1<<11)) ? "logical" : "physical",
+		  (__entry->e & (1<<15)) ? "level" : "edge",
+		  (__entry->e & (1<<16)) ? "|masked" : "",
+		  __entry->coalesced ? " (coalesced)" : "")
+);
+
+TRACE_EVENT(kvm_msi_set_irq,
+	    TP_PROTO(__u64 address, __u64 data),
+	    TP_ARGS(address, data),
+
+	TP_STRUCT__entry(
+		__field(	__u64,		address		)
+		__field(	__u64,		data		)
+	),
+
+	TP_fast_assign(
+		__entry->address	= address;
+		__entry->data		= data;
+	),
+
+	TP_printk("dst %u vec %x (%s|%s|%s%s)",
+		  (u8)(__entry->address >> 12), (u8)__entry->data,
+		  __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode),
+		  (__entry->address & (1<<2)) ? "logical" : "physical",
+		  (__entry->data & (1<<15)) ? "level" : "edge",
+		  (__entry->address & (1<<3)) ? "|rh" : "")
+);
 
 #define kvm_irqchips						\
 	{KVM_IRQCHIP_PIC_MASTER,	"PIC master"},		\
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 92496ff3d82d..b91fbb215447 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -36,6 +36,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/current.h>
+#include <trace/events/kvm.h>
 
 #include "ioapic.h"
 #include "lapic.h"
@@ -193,6 +194,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 			    (!edge && !entry.fields.remote_irr))
 				ret = ioapic_service(ioapic, irq);
 		}
+		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
 	return ret;
 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a9d7fd1f1d6a..001663ff401a 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -100,6 +100,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 {
 	struct kvm_lapic_irq irq;
 
+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
 	irq.dest_id = (e->msi.address_lo &
 			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
 	irq.vector = (e->msi.data &
-- 
cgit v1.2.3


From e9f4275732add046fed4a548b8dbb98dbe500d2f Mon Sep 17 00:00:00 2001
From: Beth Kon <eak@us.ibm.com>
Date: Tue, 7 Jul 2009 11:50:38 -0400
Subject: KVM: PIT support for HPET legacy mode

When kvm is in hpet_legacy_mode, the hpet is providing the timer
interrupt and the pit should not be. So in legacy mode, the pit timer
is destroyed, but the *state* of the pit is maintained. So if kvm or
the guest tries to modify the state of the pit, this modification is
accepted, *except* that the timer isn't actually started. When we exit
hpet_legacy_mode, the current state of the pit (which is up to date
since we've been accepting modifications) is used to restart the pit
timer.

The saved_mode code in kvm_pit_load_count temporarily changes mode to
0xff in order to destroy the timer, but then restores the actual
value, again maintaining "current" state of the pit for possible later
reenablement.

[avi: add some reserved storage in the ioctl; make SET_PIT2 IOW]
[marcelo: fix memory corruption due to reserved storage]

Signed-off-by: Beth Kon <eak@us.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm.h |  9 +++++++
 arch/x86/kvm/i8254.c       | 22 +++++++++++++----
 arch/x86/kvm/i8254.h       |  3 ++-
 arch/x86/kvm/x86.c         | 59 +++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/kvm.h        |  6 +++++
 5 files changed, 93 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 708b9c32a5da..4a5fe914dc59 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -18,6 +18,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_MSIX
 #define __KVM_HAVE_MCE
+#define __KVM_HAVE_PIT_STATE2
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
@@ -237,6 +238,14 @@ struct kvm_pit_state {
 	struct kvm_pit_channel_state channels[3];
 };
 
+#define KVM_PIT_FLAGS_HPET_LEGACY  0x00000001
+
+struct kvm_pit_state2 {
+	struct kvm_pit_channel_state channels[3];
+	__u32 flags;
+	__u32 reserved[9];
+};
+
 struct kvm_reinject_control {
 	__u8 pit_reinject;
 	__u8 reserved[31];
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8c3ac30ef9bd..9b62c57ba6e4 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -332,20 +332,33 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
 	case 1:
         /* FIXME: enhance mode 4 precision */
 	case 4:
-		create_pit_timer(ps, val, 0);
+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)) {
+			create_pit_timer(ps, val, 0);
+		}
 		break;
 	case 2:
 	case 3:
-		create_pit_timer(ps, val, 1);
+		if (!(ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)){
+			create_pit_timer(ps, val, 1);
+		}
 		break;
 	default:
 		destroy_pit_timer(&ps->pit_timer);
 	}
 }
 
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start)
 {
-	pit_load_count(kvm, channel, val);
+	u8 saved_mode;
+	if (hpet_legacy_start) {
+		/* save existing mode for later reenablement */
+		saved_mode = kvm->arch.vpit->pit_state.channels[0].mode;
+		kvm->arch.vpit->pit_state.channels[0].mode = 0xff; /* disable timer */
+		pit_load_count(kvm, channel, val);
+		kvm->arch.vpit->pit_state.channels[0].mode = saved_mode;
+	} else {
+		pit_load_count(kvm, channel, val);
+	}
 }
 
 static inline struct kvm_pit *dev_to_pit(struct kvm_io_device *dev)
@@ -554,6 +567,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	struct kvm_kpit_channel_state *c;
 
 	mutex_lock(&pit->pit_state.lock);
+	pit->pit_state.flags = 0;
 	for (i = 0; i < 3; i++) {
 		c = &pit->pit_state.channels[i];
 		c->mode = 0xff;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index b2670180f225..d4c1c7ffdc09 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,6 +21,7 @@ struct kvm_kpit_channel_state {
 
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
+	u32 flags;
 	struct kvm_timer pit_timer;
 	bool is_periodic;
 	u32    speaker_data_on;
@@ -49,7 +50,7 @@ struct kvm_pit {
 #define KVM_PIT_CHANNEL_MASK	    0x3
 
 void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
-void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val, int hpet_legacy_start);
 struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags);
 void kvm_free_pit(struct kvm *kvm);
 void kvm_pit_reset(struct kvm_pit *pit);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dfb0e37b3c65..2214384ff610 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1213,6 +1213,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
 	case KVM_CAP_PIT2:
+	case KVM_CAP_PIT_STATE2:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -2078,7 +2079,36 @@ static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
 
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
 	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
-	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+	kvm_pit_load_count(kvm, 0, ps->channels[0].count, 0);
+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_get_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+{
+	int r = 0;
+
+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
+	memcpy(ps->channels, &kvm->arch.vpit->pit_state.channels,
+		sizeof(ps->channels));
+	ps->flags = kvm->arch.vpit->pit_state.flags;
+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_pit2(struct kvm *kvm, struct kvm_pit_state2 *ps)
+{
+	int r = 0, start = 0;
+	u32 prev_legacy, cur_legacy;
+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
+	prev_legacy = kvm->arch.vpit->pit_state.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+	cur_legacy = ps->flags & KVM_PIT_FLAGS_HPET_LEGACY;
+	if (!prev_legacy && cur_legacy)
+		start = 1;
+	memcpy(&kvm->arch.vpit->pit_state.channels, &ps->channels,
+	       sizeof(kvm->arch.vpit->pit_state.channels));
+	kvm->arch.vpit->pit_state.flags = ps->flags;
+	kvm_pit_load_count(kvm, 0, kvm->arch.vpit->pit_state.channels[0].count, start);
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return r;
 }
@@ -2140,6 +2170,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	 */
 	union {
 		struct kvm_pit_state ps;
+		struct kvm_pit_state2 ps2;
 		struct kvm_memory_alias alias;
 		struct kvm_pit_config pit_config;
 	} u;
@@ -2322,6 +2353,32 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_PIT2: {
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_get_pit2(kvm, &u.ps2);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &u.ps2, sizeof(u.ps2)))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_PIT2: {
+		r = -EFAULT;
+		if (copy_from_user(&u.ps2, argp, sizeof(u.ps2)))
+			goto out;
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	case KVM_REINJECT_CONTROL: {
 		struct kvm_reinject_control control;
 		r =  -EFAULT;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 76c640834ea6..a74a1fcc28e9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -409,6 +409,9 @@ struct kvm_guest_debug {
 #define KVM_CAP_PIT2 33
 #endif
 #define KVM_CAP_SET_BOOT_CPU_ID 34
+#ifdef __KVM_HAVE_PIT_STATE2
+#define KVM_CAP_PIT_STATE2 35
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -586,6 +589,9 @@ struct kvm_debug_guest {
 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 
+#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
+#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
+
 #define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
 #define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
 #define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
-- 
cgit v1.2.3


From 090b7aff27120cdae76a346a70db394844fea598 Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Tue, 7 Jul 2009 17:08:44 -0400
Subject: KVM: make io_bus interface more robust

Today kvm_io_bus_regsiter_dev() returns void and will internally BUG_ON
if it fails.  We want to create dynamic MMIO/PIO entries driven from
userspace later in the series, so we need to enhance the code to be more
robust with the following changes:

   1) Add a return value to the registration function
   2) Fix up all the callsites to check the return code, handle any
      failures, and percolate the error up to the caller.
   3) Add an unregister function that collapses holes in the array

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8254.c      | 20 ++++++++++++++++++--
 arch/x86/kvm/i8259.c      |  9 ++++++++-
 include/linux/kvm_host.h  | 10 +++++++---
 virt/kvm/coalesced_mmio.c |  8 ++++++--
 virt/kvm/ioapic.c         |  8 ++++++--
 virt/kvm/kvm_main.c       | 39 ++++++++++++++++++++++++++++++++++-----
 6 files changed, 79 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 9b62c57ba6e4..137e54817102 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -605,6 +605,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 {
 	struct kvm_pit *pit;
 	struct kvm_kpit_state *pit_state;
+	int ret;
 
 	pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
 	if (!pit)
@@ -639,14 +640,29 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	__kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+	if (ret < 0)
+		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
-		__kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+		ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
+						&pit->speaker_dev);
+		if (ret < 0)
+			goto fail_unregister;
 	}
 
 	return pit;
+
+fail_unregister:
+	__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
+
+fail:
+	if (pit->irq_source_id >= 0)
+		kvm_free_irq_source_id(kvm, pit->irq_source_id);
+
+	kfree(pit);
+	return NULL;
 }
 
 void kvm_free_pit(struct kvm *kvm)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e4bcbddecb36..daf4606b0293 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -539,6 +539,8 @@ static const struct kvm_io_device_ops picdev_ops = {
 struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 {
 	struct kvm_pic *s;
+	int ret;
+
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
@@ -555,6 +557,11 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 * Initialize PIO device
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
-	kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
+	if (ret < 0) {
+		kfree(s);
+		return NULL;
+	}
+
 	return s;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 077e8bb875a9..983b0bdeb3ff 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -64,10 +64,14 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
 		     const void *val);
 int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
 		    void *val);
-void __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+			       struct kvm_io_device *dev);
+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+			    struct kvm_io_device *dev);
+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
+				 struct kvm_io_device *dev);
+void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
 			       struct kvm_io_device *dev);
-void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev);
 
 struct kvm_vcpu {
 	struct kvm *kvm;
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 0352f81ecc0b..04d69cd7049b 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -92,6 +92,7 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
 int kvm_coalesced_mmio_init(struct kvm *kvm)
 {
 	struct kvm_coalesced_mmio_dev *dev;
+	int ret;
 
 	dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
 	if (!dev)
@@ -100,9 +101,12 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
 	kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
 	dev->kvm = kvm;
 	kvm->coalesced_mmio_dev = dev;
-	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
 
-	return 0;
+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
+	if (ret < 0)
+		kfree(dev);
+
+	return ret;
 }
 
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index b91fbb215447..fa05f67423ab 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -342,6 +342,7 @@ static const struct kvm_io_device_ops ioapic_mmio_ops = {
 int kvm_ioapic_init(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic;
+	int ret;
 
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
@@ -350,7 +351,10 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
-	kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
-	return 0;
+	ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
+	if (ret < 0)
+		kfree(ioapic);
+
+	return ret;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index fc1b58a72757..9c2fd025b8ae 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2533,21 +2533,50 @@ int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
 	return -EOPNOTSUPP;
 }
 
-void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
+int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
 			     struct kvm_io_device *dev)
 {
+	int ret;
+
 	down_write(&kvm->slots_lock);
-	__kvm_io_bus_register_dev(bus, dev);
+	ret = __kvm_io_bus_register_dev(bus, dev);
 	up_write(&kvm->slots_lock);
+
+	return ret;
 }
 
 /* An unlocked version. Caller must have write lock on slots_lock. */
-void __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
-			     struct kvm_io_device *dev)
+int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
+			      struct kvm_io_device *dev)
 {
-	BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1));
+	if (bus->dev_count > NR_IOBUS_DEVS-1)
+		return -ENOSPC;
 
 	bus->devs[bus->dev_count++] = dev;
+
+	return 0;
+}
+
+void kvm_io_bus_unregister_dev(struct kvm *kvm,
+			       struct kvm_io_bus *bus,
+			       struct kvm_io_device *dev)
+{
+	down_write(&kvm->slots_lock);
+	__kvm_io_bus_unregister_dev(bus, dev);
+	up_write(&kvm->slots_lock);
+}
+
+/* An unlocked version. Caller must have write lock on slots_lock. */
+void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
+				 struct kvm_io_device *dev)
+{
+	int i;
+
+	for (i = 0; i < bus->dev_count; i++)
+		if (bus->devs[i] == dev) {
+			bus->devs[i] = bus->devs[--bus->dev_count];
+			break;
+		}
 }
 
 static struct notifier_block kvm_cpu_notifier = {
-- 
cgit v1.2.3


From d34e6b175e61821026893ec5298cc8e7558df43a Mon Sep 17 00:00:00 2001
From: Gregory Haskins <ghaskins@novell.com>
Date: Tue, 7 Jul 2009 17:08:49 -0400
Subject: KVM: add ioeventfd support

ioeventfd is a mechanism to register PIO/MMIO regions to trigger an eventfd
signal when written to by a guest.  Host userspace can register any
arbitrary IO address with a corresponding eventfd and then pass the eventfd
to a specific end-point of interest for handling.

Normal IO requires a blocking round-trip since the operation may cause
side-effects in the emulated model or may return data to the caller.
Therefore, an IO in KVM traps from the guest to the host, causes a VMX/SVM
"heavy-weight" exit back to userspace, and is ultimately serviced by qemu's
device model synchronously before returning control back to the vcpu.

However, there is a subclass of IO which acts purely as a trigger for
other IO (such as to kick off an out-of-band DMA request, etc).  For these
patterns, the synchronous call is particularly expensive since we really
only want to simply get our notification transmitted asychronously and
return as quickly as possible.  All the sychronous infrastructure to ensure
proper data-dependencies are met in the normal IO case are just unecessary
overhead for signalling.  This adds additional computational load on the
system, as well as latency to the signalling path.

Therefore, we provide a mechanism for registration of an in-kernel trigger
point that allows the VCPU to only require a very brief, lightweight
exit just long enough to signal an eventfd.  This also means that any
clients compatible with the eventfd interface (which includes userspace
and kernelspace equally well) can now register to be notified. The end
result should be a more flexible and higher performance notification API
for the backend KVM hypervisor and perhipheral components.

To test this theory, we built a test-harness called "doorbell".  This
module has a function called "doorbell_ring()" which simply increments a
counter for each time the doorbell is signaled.  It supports signalling
from either an eventfd, or an ioctl().

We then wired up two paths to the doorbell: One via QEMU via a registered
io region and through the doorbell ioctl().  The other is direct via
ioeventfd.

You can download this test harness here:

ftp://ftp.novell.com/dev/ghaskins/doorbell.tar.bz2

The measured results are as follows:

qemu-mmio:       110000 iops, 9.09us rtt
ioeventfd-mmio: 200100 iops, 5.00us rtt
ioeventfd-pio:  367300 iops, 2.72us rtt

I didn't measure qemu-pio, because I have to figure out how to register a
PIO region with qemu's device model, and I got lazy.  However, for now we
can extrapolate based on the data from the NULLIO runs of +2.56us for MMIO,
and -350ns for HC, we get:

qemu-pio:      153139 iops, 6.53us rtt
ioeventfd-hc: 412585 iops, 2.37us rtt

these are just for fun, for now, until I can gather more data.

Here is a graph for your convenience:

http://developer.novell.com/wiki/images/7/76/Iofd-chart.png

The conclusion to draw is that we save about 4us by skipping the userspace
hop.

--------------------

Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       |   1 +
 include/linux/kvm.h      |  24 +++++
 include/linux/kvm_host.h |  10 +-
 virt/kvm/eventfd.c       | 251 ++++++++++++++++++++++++++++++++++++++++++++++-
 virt/kvm/kvm_main.c      |  11 ++-
 5 files changed, 293 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2214384ff610..42160b031fcd 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1212,6 +1212,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQ_INJECT_STATUS:
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_IRQFD:
+	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_PIT2:
 	case KVM_CAP_PIT_STATE2:
 		r = 1;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index a74a1fcc28e9..230a91aa61c9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -307,6 +307,28 @@ struct kvm_guest_debug {
 	struct kvm_guest_debug_arch arch;
 };
 
+enum {
+	kvm_ioeventfd_flag_nr_datamatch,
+	kvm_ioeventfd_flag_nr_pio,
+	kvm_ioeventfd_flag_nr_deassign,
+	kvm_ioeventfd_flag_nr_max,
+};
+
+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
+#define KVM_IOEVENTFD_FLAG_PIO       (1 << kvm_ioeventfd_flag_nr_pio)
+#define KVM_IOEVENTFD_FLAG_DEASSIGN  (1 << kvm_ioeventfd_flag_nr_deassign)
+
+#define KVM_IOEVENTFD_VALID_FLAG_MASK  ((1 << kvm_ioeventfd_flag_nr_max) - 1)
+
+struct kvm_ioeventfd {
+	__u64 datamatch;
+	__u64 addr;        /* legal pio/mmio address */
+	__u32 len;         /* 1, 2, 4, or 8 bytes    */
+	__s32 fd;
+	__u32 flags;
+	__u8  pad[36];
+};
+
 #define KVM_TRC_SHIFT           16
 /*
  * kvm trace categories
@@ -412,6 +434,7 @@ struct kvm_guest_debug {
 #ifdef __KVM_HAVE_PIT_STATE2
 #define KVM_CAP_PIT_STATE2 35
 #endif
+#define KVM_CAP_IOEVENTFD 36
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -520,6 +543,7 @@ struct kvm_irqfd {
 #define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
 #define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
 #define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
+#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
 
 /*
  * ioctls for vcpu fds
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 983b0bdeb3ff..6ec9fc56a49e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -155,6 +155,7 @@ struct kvm {
 		spinlock_t        lock;
 		struct list_head  items;
 	} irqfds;
+	struct list_head ioeventfds;
 #endif
 	struct kvm_vm_stat stat;
 	struct kvm_arch arch;
@@ -528,19 +529,24 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 
 #ifdef CONFIG_HAVE_KVM_EVENTFD
 
-void kvm_irqfd_init(struct kvm *kvm);
+void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
+int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
 
-static inline void kvm_irqfd_init(struct kvm *kvm) {}
+static inline void kvm_eventfd_init(struct kvm *kvm) {}
 static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags)
 {
 	return -EINVAL;
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	return -ENOSYS;
+}
 
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 4092b8dcd510..99017e8a92ac 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/kvm_host.h>
+#include <linux/kvm.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
 #include <linux/wait.h>
@@ -28,6 +29,9 @@
 #include <linux/file.h>
 #include <linux/list.h>
 #include <linux/eventfd.h>
+#include <linux/kernel.h>
+
+#include "iodev.h"
 
 /*
  * --------------------------------------------------------------------
@@ -234,10 +238,11 @@ fail:
 }
 
 void
-kvm_irqfd_init(struct kvm *kvm)
+kvm_eventfd_init(struct kvm *kvm)
 {
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
+	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
 /*
@@ -327,3 +332,247 @@ static void __exit irqfd_module_exit(void)
 
 module_init(irqfd_module_init);
 module_exit(irqfd_module_exit);
+
+/*
+ * --------------------------------------------------------------------
+ * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
+ *
+ * userspace can register a PIO/MMIO address with an eventfd for receiving
+ * notification when the memory has been touched.
+ * --------------------------------------------------------------------
+ */
+
+struct _ioeventfd {
+	struct list_head     list;
+	u64                  addr;
+	int                  length;
+	struct eventfd_ctx  *eventfd;
+	u64                  datamatch;
+	struct kvm_io_device dev;
+	bool                 wildcard;
+};
+
+static inline struct _ioeventfd *
+to_ioeventfd(struct kvm_io_device *dev)
+{
+	return container_of(dev, struct _ioeventfd, dev);
+}
+
+static void
+ioeventfd_release(struct _ioeventfd *p)
+{
+	eventfd_ctx_put(p->eventfd);
+	list_del(&p->list);
+	kfree(p);
+}
+
+static bool
+ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
+{
+	u64 _val;
+
+	if (!(addr == p->addr && len == p->length))
+		/* address-range must be precise for a hit */
+		return false;
+
+	if (p->wildcard)
+		/* all else equal, wildcard is always a hit */
+		return true;
+
+	/* otherwise, we have to actually compare the data */
+
+	BUG_ON(!IS_ALIGNED((unsigned long)val, len));
+
+	switch (len) {
+	case 1:
+		_val = *(u8 *)val;
+		break;
+	case 2:
+		_val = *(u16 *)val;
+		break;
+	case 4:
+		_val = *(u32 *)val;
+		break;
+	case 8:
+		_val = *(u64 *)val;
+		break;
+	default:
+		return false;
+	}
+
+	return _val == p->datamatch ? true : false;
+}
+
+/* MMIO/PIO writes trigger an event if the addr/val match */
+static int
+ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
+		const void *val)
+{
+	struct _ioeventfd *p = to_ioeventfd(this);
+
+	if (!ioeventfd_in_range(p, addr, len, val))
+		return -EOPNOTSUPP;
+
+	eventfd_signal(p->eventfd, 1);
+	return 0;
+}
+
+/*
+ * This function is called as KVM is completely shutting down.  We do not
+ * need to worry about locking just nuke anything we have as quickly as possible
+ */
+static void
+ioeventfd_destructor(struct kvm_io_device *this)
+{
+	struct _ioeventfd *p = to_ioeventfd(this);
+
+	ioeventfd_release(p);
+}
+
+static const struct kvm_io_device_ops ioeventfd_ops = {
+	.write      = ioeventfd_write,
+	.destructor = ioeventfd_destructor,
+};
+
+/* assumes kvm->slots_lock held */
+static bool
+ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
+{
+	struct _ioeventfd *_p;
+
+	list_for_each_entry(_p, &kvm->ioeventfds, list)
+		if (_p->addr == p->addr && _p->length == p->length &&
+		    (_p->wildcard || p->wildcard ||
+		     _p->datamatch == p->datamatch))
+			return true;
+
+	return false;
+}
+
+static int
+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	struct _ioeventfd        *p;
+	struct eventfd_ctx       *eventfd;
+	int                       ret;
+
+	/* must be natural-word sized */
+	switch (args->len) {
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* check for range overflow */
+	if (args->addr + args->len < args->addr)
+		return -EINVAL;
+
+	/* check for extra flags that we don't understand */
+	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
+		return -EINVAL;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	INIT_LIST_HEAD(&p->list);
+	p->addr    = args->addr;
+	p->length  = args->len;
+	p->eventfd = eventfd;
+
+	/* The datamatch feature is optional, otherwise this is a wildcard */
+	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
+		p->datamatch = args->datamatch;
+	else
+		p->wildcard = true;
+
+	down_write(&kvm->slots_lock);
+
+	/* Verify that there isnt a match already */
+	if (ioeventfd_check_collision(kvm, p)) {
+		ret = -EEXIST;
+		goto unlock_fail;
+	}
+
+	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
+
+	ret = __kvm_io_bus_register_dev(bus, &p->dev);
+	if (ret < 0)
+		goto unlock_fail;
+
+	list_add_tail(&p->list, &kvm->ioeventfds);
+
+	up_write(&kvm->slots_lock);
+
+	return 0;
+
+unlock_fail:
+	up_write(&kvm->slots_lock);
+
+fail:
+	kfree(p);
+	eventfd_ctx_put(eventfd);
+
+	return ret;
+}
+
+static int
+kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	int                       pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
+	struct kvm_io_bus        *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
+	struct _ioeventfd        *p, *tmp;
+	struct eventfd_ctx       *eventfd;
+	int                       ret = -ENOENT;
+
+	eventfd = eventfd_ctx_fdget(args->fd);
+	if (IS_ERR(eventfd))
+		return PTR_ERR(eventfd);
+
+	down_write(&kvm->slots_lock);
+
+	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
+		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
+
+		if (p->eventfd != eventfd  ||
+		    p->addr != args->addr  ||
+		    p->length != args->len ||
+		    p->wildcard != wildcard)
+			continue;
+
+		if (!p->wildcard && p->datamatch != args->datamatch)
+			continue;
+
+		__kvm_io_bus_unregister_dev(bus, &p->dev);
+		ioeventfd_release(p);
+		ret = 0;
+		break;
+	}
+
+	up_write(&kvm->slots_lock);
+
+	eventfd_ctx_put(eventfd);
+
+	return ret;
+}
+
+int
+kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
+		return kvm_deassign_ioeventfd(kvm, args);
+
+	return kvm_assign_ioeventfd(kvm, args);
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9c2fd025b8ae..d7b9bbba26da 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -979,7 +979,7 @@ static struct kvm *kvm_create_vm(void)
 	spin_lock_init(&kvm->mmu_lock);
 	spin_lock_init(&kvm->requests_lock);
 	kvm_io_bus_init(&kvm->pio_bus);
-	kvm_irqfd_init(kvm);
+	kvm_eventfd_init(kvm);
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
@@ -2271,6 +2271,15 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags);
 		break;
 	}
+	case KVM_IOEVENTFD: {
+		struct kvm_ioeventfd data;
+
+		r = -EFAULT;
+		if (copy_from_user(&data, argp, sizeof data))
+			goto out;
+		r = kvm_ioeventfd(kvm, &data);
+		break;
+	}
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
 	case KVM_SET_BOOT_CPU_ID:
 		r = 0;
-- 
cgit v1.2.3


From 0b71785dc05f1f66e6268022b9953c0d6a9985c6 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 9 Jul 2009 15:33:53 +0300
Subject: KVM: Move kvm_cpu_get_interrupt() declaration to x86 code

It is implemented only by x86.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 1 +
 include/linux/kvm_host.h        | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30b625d8e5f0..3f4f00a23536 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -797,5 +797,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6ec9fc56a49e..2c6a5f008746 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -341,7 +341,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
 
-int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
-- 
cgit v1.2.3


From a1b37100d9e29c1f8dc3e2f5490a205c80180e01 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 9 Jul 2009 15:33:52 +0300
Subject: KVM: Reduce runnability interface with arch support code

Remove kvm_cpu_has_interrupt() and kvm_arch_interrupt_allowed() from
interface between general code and arch code. kvm_arch_vcpu_runnable()
checks for interrupts instead.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c        | 16 ++--------------
 arch/powerpc/kvm/powerpc.c      | 13 +------------
 arch/s390/kvm/interrupt.c       |  8 +-------
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              |  6 ++++--
 include/linux/kvm_host.h        |  2 --
 virt/kvm/kvm_main.c             |  4 +---
 7 files changed, 11 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d7aa6bb8f477..0ad09f05efa9 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1935,19 +1935,6 @@ int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
     return find_highest_bits((int *)&vpd->irr[0]);
 }
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
-{
-	if (kvm_highest_pending_irq(vcpu) != -1)
-		return 1;
-	return 0;
-}
-
-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
-{
-	/* do real check here */
-	return 1;
-}
-
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.timer_fired;
@@ -1960,7 +1947,8 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
+	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
+		(kvm_highest_pending_irq(vcpu) != -1);
 }
 
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0341391eff12..2a4551f78f60 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -39,20 +39,9 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 	return gfn;
 }
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
-{
-	return !!(v->arch.pending_exceptions);
-}
-
-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
-{
-	/* do real check here */
-	return 1;
-}
-
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !(v->arch.msr & MSR_WE);
+	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
 }
 
 
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4d613415c435..2c2f98353415 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -283,7 +283,7 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+static int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
@@ -320,12 +320,6 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 	return rc;
 }
 
-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
-{
-	/* do real check here */
-	return 1;
-}
-
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3f4f00a23536..08732d7b6d98 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -797,6 +797,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 11cfd897aac6..b87d65d89a05 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4890,8 +4890,10 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-	       || vcpu->arch.nmi_pending;
+		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
+		|| vcpu->arch.nmi_pending ||
+		(kvm_arch_interrupt_allowed(vcpu) &&
+		 kvm_cpu_has_interrupt(vcpu));
 }
 
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2c6a5f008746..4af56036a6bf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -332,7 +332,6 @@ int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
-int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 
 void kvm_free_physmem(struct kvm *kvm);
 
@@ -341,7 +340,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_free_all_assigned_devices(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
 
-int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d7b9bbba26da..532af9b41ee3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1666,9 +1666,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
-		if ((kvm_arch_interrupt_allowed(vcpu) &&
-					kvm_cpu_has_interrupt(vcpu)) ||
-				kvm_arch_vcpu_runnable(vcpu)) {
+		if (kvm_arch_vcpu_runnable(vcpu)) {
 			set_bit(KVM_REQ_UNHALT, &vcpu->requests);
 			break;
 		}
-- 
cgit v1.2.3


From b927a3cec081a605142f5b7e90b730611bee28b1 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Tue, 21 Jul 2009 10:42:48 +0800
Subject: KVM: VMX: Introduce KVM_SET_IDENTITY_MAP_ADDR ioctl

Now KVM allow guest to modify guest's physical address of EPT's identity mapping page.

(change from v1, discard unnecessary check, change ioctl to accept parameter
address rather than value)

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/vmx.c              | 15 ++++++++++-----
 arch/x86/kvm/x86.c              | 19 +++++++++++++++++++
 include/linux/kvm.h             |  2 ++
 4 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 08732d7b6d98..e210b218df44 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -411,6 +411,7 @@ struct kvm_arch{
 
 	struct page *ept_identity_pagetable;
 	bool ept_identity_pagetable_done;
+	gpa_t ept_identity_map_addr;
 
 	unsigned long irq_sources_bitmap;
 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c6256b98f078..686e1abb6816 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1719,7 +1719,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		eptp = construct_eptp(cr3);
 		vmcs_write64(EPT_POINTER, eptp);
 		guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
-			VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+			vcpu->kvm->arch.ept_identity_map_addr;
 	}
 
 	vmx_flush_tlb(vcpu);
@@ -2122,7 +2122,7 @@ static int init_rmode_identity_map(struct kvm *kvm)
 	if (likely(kvm->arch.ept_identity_pagetable_done))
 		return 1;
 	ret = 0;
-	identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
+	identity_map_pfn = kvm->arch.ept_identity_map_addr >> PAGE_SHIFT;
 	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
 	if (r < 0)
 		goto out;
@@ -2191,14 +2191,15 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 		goto out;
 	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
 	kvm_userspace_mem.flags = 0;
-	kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
+	kvm_userspace_mem.guest_phys_addr =
+		kvm->arch.ept_identity_map_addr;
 	kvm_userspace_mem.memory_size = PAGE_SIZE;
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
 	if (r)
 		goto out;
 
 	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
-			VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
+			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
 out:
 	up_write(&kvm->slots_lock);
 	return r;
@@ -3814,9 +3815,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 		if (alloc_apic_access_page(kvm) != 0)
 			goto free_vmcs;
 
-	if (enable_ept)
+	if (enable_ept) {
+		if (!kvm->arch.ept_identity_map_addr)
+			kvm->arch.ept_identity_map_addr =
+				VMX_EPT_IDENTITY_PAGETABLE_ADDR;
 		if (alloc_identity_pagetable(kvm) != 0)
 			goto free_vmcs;
+	}
 
 	return &vmx->vcpu;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c7ec0c921c01..f4cb1baaa04b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1206,6 +1206,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_PIT2:
 	case KVM_CAP_PIT_STATE2:
+	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -1906,6 +1907,13 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
 	return ret;
 }
 
+static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
+					      u64 ident_addr)
+{
+	kvm->arch.ept_identity_map_addr = ident_addr;
+	return 0;
+}
+
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 					  u32 kvm_nr_mmu_pages)
 {
@@ -2173,6 +2181,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		if (r < 0)
 			goto out;
 		break;
+	case KVM_SET_IDENTITY_MAP_ADDR: {
+		u64 ident_addr;
+
+		r = -EFAULT;
+		if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
+			goto out;
+		r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
+		if (r < 0)
+			goto out;
+		break;
+	}
 	case KVM_SET_MEMORY_REGION: {
 		struct kvm_memory_region kvm_mem;
 		struct kvm_userspace_memory_region kvm_userspace_mem;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 230a91aa61c9..f8f8900fc5ec 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -435,6 +435,7 @@ struct kvm_ioeventfd {
 #define KVM_CAP_PIT_STATE2 35
 #endif
 #define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -512,6 +513,7 @@ struct kvm_irqfd {
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
 					struct kvm_userspace_memory_region)
 #define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
-- 
cgit v1.2.3


From 07708c4af1346ab1521b26a202f438366b7bcffd Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 3 Aug 2009 18:43:28 +0200
Subject: KVM: x86: Disallow hypercalls for guest callers in rings > 0

So far unprivileged guest callers running in ring 3 can issue, e.g., MMU
hypercalls. Normally, such callers cannot provide any hand-crafted MMU
command structure as it has to be passed by its physical address, but
they can still crash the guest kernel by passing random addresses.

To close the hole, this patch considers hypercalls valid only if issued
from guest ring 0. This may still be relaxed on a per-hypercall base in
the future once required.

Cc: stable@kernel.org
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c       | 6 ++++++
 include/linux/kvm_para.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fa525d511d92..92b5eddaa877 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3213,6 +3213,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		a3 &= 0xFFFFFFFF;
 	}
 
+	if (kvm_x86_ops->get_cpl(vcpu) != 0) {
+		ret = -KVM_EPERM;
+		goto out;
+	}
+
 	switch (nr) {
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
@@ -3224,6 +3229,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 		ret = -KVM_ENOSYS;
 		break;
 	}
+out:
 	kvm_register_write(vcpu, VCPU_REGS_RAX, ret);
 	++vcpu->stat.hypercalls;
 	return r;
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 3ddce03766ca..d73109243fda 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -13,6 +13,7 @@
 #define KVM_ENOSYS		1000
 #define KVM_EFAULT		EFAULT
 #define KVM_E2BIG		E2BIG
+#define KVM_EPERM		EPERM
 
 #define KVM_HC_VAPIC_POLL_IRQ		1
 #define KVM_HC_MMU_OP			2
-- 
cgit v1.2.3


From da18acffc3f13c4f187c20bc5dcfcb110b374b48 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 10 Aug 2009 15:59:25 +0300
Subject: KVM: export kvm_para.h

kvm_para.h contains userspace interface and so
should be exported.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/asm-generic/Kbuild.asm | 5 +++++
 include/linux/Kbuild           | 4 ++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm
index 290910e4ede4..96d7c9804dc1 100644
--- a/include/asm-generic/Kbuild.asm
+++ b/include/asm-generic/Kbuild.asm
@@ -3,6 +3,11 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
 header-y  += kvm.h
 endif
 
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
+header-y  += kvm_para.h
+endif
+
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
       		  $(srctree)/include/asm-$(SRCARCH)/a.out.h),)
 unifdef-y += a.out.h
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 334a3593cdfd..cff4a101f266 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -268,6 +268,10 @@ ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm.h \
       		  $(srctree)/include/asm-$(SRCARCH)/kvm.h),)
 unifdef-y += kvm.h
 endif
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/kvm_para.h \
+		  $(srctree)/include/asm-$(SRCARCH)/kvm_para.h),)
+unifdef-y += kvm_para.h
+endif
 unifdef-y += llc.h
 unifdef-y += loop.h
 unifdef-y += lp.h
-- 
cgit v1.2.3


From 1a5aeeecd550ee4344cfba1791f1134739b16dc6 Mon Sep 17 00:00:00 2001
From: Maciej Sosnowski <maciej.sosnowski@intel.com>
Date: Thu, 10 Sep 2009 15:05:58 +0200
Subject: dca: registering requesters in multiple dca domains

This patch enables DCA support on multiple-IOH/multiple-IIO architectures.
It modifies dca module by replacing single dca_providers list
with dca_domains list, each domain containing separate list of providers.
This approach lets dca driver manage multiple domains, i.e. sets of providers
and requesters mapped back to the same PCI root complex device.
The driver takes care to register each requester to a provider
from the same domain.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
---
 drivers/dca/dca-core.c | 122 +++++++++++++++++++++++++++++++++++++++++++------
 drivers/dma/ioat/pci.c |   2 +-
 include/linux/dca.h    |  11 ++++-
 3 files changed, 120 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c
index 25b743abfb59..7e318de0904b 100644
--- a/drivers/dca/dca-core.c
+++ b/drivers/dca/dca-core.c
@@ -28,7 +28,7 @@
 #include <linux/device.h>
 #include <linux/dca.h>
 
-#define DCA_VERSION "1.8"
+#define DCA_VERSION "1.12.1"
 
 MODULE_VERSION(DCA_VERSION);
 MODULE_LICENSE("GPL");
@@ -36,20 +36,92 @@ MODULE_AUTHOR("Intel Corporation");
 
 static DEFINE_SPINLOCK(dca_lock);
 
-static LIST_HEAD(dca_providers);
+static LIST_HEAD(dca_domains);
 
-static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+static struct pci_bus *dca_pci_rc_from_dev(struct device *dev)
 {
-	struct dca_provider *dca, *ret = NULL;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct pci_bus *bus = pdev->bus;
 
-	list_for_each_entry(dca, &dca_providers, node) {
-		if ((!dev) || (dca->ops->dev_managed(dca, dev))) {
-			ret = dca;
-			break;
-		}
+	while (bus->parent)
+		bus = bus->parent;
+
+	return bus;
+}
+
+static struct dca_domain *dca_allocate_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_NOWAIT);
+	if (!domain)
+		return NULL;
+
+	INIT_LIST_HEAD(&domain->dca_providers);
+	domain->pci_rc = rc;
+
+	return domain;
+}
+
+static void dca_free_domain(struct dca_domain *domain)
+{
+	list_del(&domain->node);
+	kfree(domain);
+}
+
+static struct dca_domain *dca_find_domain(struct pci_bus *rc)
+{
+	struct dca_domain *domain;
+
+	list_for_each_entry(domain, &dca_domains, node)
+		if (domain->pci_rc == rc)
+			return domain;
+
+	return NULL;
+}
+
+static struct dca_domain *dca_get_domain(struct device *dev)
+{
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(rc);
+
+	if (!domain) {
+		domain = dca_allocate_domain(rc);
+		if (domain)
+			list_add(&domain->node, &dca_domains);
+	}
+
+	return domain;
+}
+
+static struct dca_provider *dca_find_provider_by_dev(struct device *dev)
+{
+	struct dca_provider *dca;
+	struct pci_bus *rc;
+	struct dca_domain *domain;
+
+	if (dev) {
+		rc = dca_pci_rc_from_dev(dev);
+		domain = dca_find_domain(rc);
+		if (!domain)
+			return NULL;
+	} else {
+		if (!list_empty(&dca_domains))
+			domain = list_first_entry(&dca_domains,
+						  struct dca_domain,
+						  node);
+		else
+			return NULL;
 	}
 
-	return ret;
+	list_for_each_entry(dca, &domain->dca_providers, node)
+		if ((!dev) || (dca->ops->dev_managed(dca, dev)))
+			return dca;
+
+	return NULL;
 }
 
 /**
@@ -61,6 +133,8 @@ int dca_add_requester(struct device *dev)
 	struct dca_provider *dca;
 	int err, slot = -ENODEV;
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	if (!dev)
 		return -EFAULT;
@@ -74,7 +148,14 @@ int dca_add_requester(struct device *dev)
 		return -EEXIST;
 	}
 
-	list_for_each_entry(dca, &dca_providers, node) {
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+
+	list_for_each_entry(dca, &domain->dca_providers, node) {
 		slot = dca->ops->add_requester(dca, dev);
 		if (slot >= 0)
 			break;
@@ -222,13 +303,19 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	int err;
 	unsigned long flags;
+	struct dca_domain *domain;
 
 	err = dca_sysfs_add_provider(dca, dev);
 	if (err)
 		return err;
 
 	spin_lock_irqsave(&dca_lock, flags);
-	list_add(&dca->node, &dca_providers);
+	domain = dca_get_domain(dev);
+	if (!domain) {
+		spin_unlock_irqrestore(&dca_lock, flags);
+		return -ENODEV;
+	}
+	list_add(&dca->node, &domain->dca_providers);
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	blocking_notifier_call_chain(&dca_provider_chain,
@@ -241,15 +328,24 @@ EXPORT_SYMBOL_GPL(register_dca_provider);
  * unregister_dca_provider - remove a dca provider
  * @dca - struct created by alloc_dca_provider()
  */
-void unregister_dca_provider(struct dca_provider *dca)
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev)
 {
 	unsigned long flags;
+	struct pci_bus *pci_rc;
+	struct dca_domain *domain;
 
 	blocking_notifier_call_chain(&dca_provider_chain,
 				     DCA_PROVIDER_REMOVE, NULL);
 
 	spin_lock_irqsave(&dca_lock, flags);
+
 	list_del(&dca->node);
+
+	pci_rc = dca_pci_rc_from_dev(dev);
+	domain = dca_find_domain(pci_rc);
+	if (list_empty(&domain->dca_providers))
+		dca_free_domain(domain);
+
 	spin_unlock_irqrestore(&dca_lock, flags);
 
 	dca_sysfs_remove_provider(dca);
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index c788fa266470..d545fae30f37 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -175,7 +175,7 @@ static void __devexit ioat_remove(struct pci_dev *pdev)
 
 	dev_err(&pdev->dev, "Removing dma and dca services\n");
 	if (device->dca) {
-		unregister_dca_provider(device->dca);
+		unregister_dca_provider(device->dca, &pdev->dev);
 		free_dca_provider(device->dca);
 		device->dca = NULL;
 	}
diff --git a/include/linux/dca.h b/include/linux/dca.h
index 9c20c7e87d0a..d27a7a05718d 100644
--- a/include/linux/dca.h
+++ b/include/linux/dca.h
@@ -20,6 +20,9 @@
  */
 #ifndef DCA_H
 #define DCA_H
+
+#include <linux/pci.h>
+
 /* DCA Provider API */
 
 /* DCA Notifier Interface */
@@ -36,6 +39,12 @@ struct dca_provider {
 	int			 id;
 };
 
+struct dca_domain {
+	struct list_head	node;
+	struct list_head	dca_providers;
+	struct pci_bus		*pci_rc;
+};
+
 struct dca_ops {
 	int	(*add_requester)    (struct dca_provider *, struct device *);
 	int	(*remove_requester) (struct dca_provider *, struct device *);
@@ -47,7 +56,7 @@ struct dca_ops {
 struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size);
 void free_dca_provider(struct dca_provider *dca);
 int register_dca_provider(struct dca_provider *dca, struct device *dev);
-void unregister_dca_provider(struct dca_provider *dca);
+void unregister_dca_provider(struct dca_provider *dca, struct device *dev);
 
 static inline void *dca_priv(struct dca_provider *dca)
 {
-- 
cgit v1.2.3


From ab28f1fd3b0d14c1bd693e640decd711d5e6642a Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:00:34 -0700
Subject: [SCSI] libfc: prepare to split off struct fc_rport_priv from
 fc_rport_libfc_priv

While the I/O and LLD interfaces use fc_rport_libfc_priv, the
disc and rport interfaces will use fc_rport_priv, which will
be separately allocated.

Change the disc and rport usage of fc_rport_libfc_priv to fc_rport_priv.

Use #define temporarily to make both names equivalent until a
subsequent patch splits them.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 18 ++++++-------
 drivers/scsi/libfc/fc_lport.c |  2 +-
 drivers/scsi/libfc/fc_rport.c | 60 +++++++++++++++++++++----------------------
 include/scsi/libfc.h          | 10 +++++++-
 4 files changed, 49 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 6fabf66972b9..4c8d893af7bd 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -65,7 +65,7 @@ struct fc_rport *fc_disc_lookup_rport(const struct fc_lport *lport,
 {
 	const struct fc_disc *disc = &lport->disc;
 	struct fc_rport *rport, *found = NULL;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 	int disc_found = 0;
 
 	list_for_each_entry(rdata, &disc->rports, peers) {
@@ -94,7 +94,7 @@ void fc_disc_stop_rports(struct fc_disc *disc)
 {
 	struct fc_lport *lport;
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata, *next;
+	struct fc_rport_priv *rdata, *next;
 
 	lport = disc->lport;
 
@@ -126,7 +126,7 @@ static void fc_disc_rport_callback(struct fc_lport *lport,
 				   struct fc_rport *rport,
 				   enum fc_rport_event event)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_disc *disc = &lport->disc;
 
 	FC_DISC_DBG(disc, "Received a %d event for port (%6x)\n", event,
@@ -170,7 +170,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 {
 	struct fc_lport *lport;
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 	struct fc_els_rscn *rp;
 	struct fc_els_rscn_page *pp;
 	struct fc_seq_els_data rjt_data;
@@ -309,7 +309,7 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 static void fc_disc_restart(struct fc_disc *disc)
 {
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata, *next;
+	struct fc_rport_priv *rdata, *next;
 	struct fc_lport *lport = disc->lport;
 
 	FC_DISC_DBG(disc, "Restarting discovery\n");
@@ -400,7 +400,7 @@ static int fc_disc_new_target(struct fc_disc *disc,
 			      struct fc_rport_identifiers *ids)
 {
 	struct fc_lport *lport = disc->lport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 	int error = 0;
 
 	if (rport && ids->port_name) {
@@ -458,7 +458,7 @@ static int fc_disc_new_target(struct fc_disc *disc,
 static void fc_disc_del_target(struct fc_disc *disc, struct fc_rport *rport)
 {
 	struct fc_lport *lport = disc->lport;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	list_del(&rdata->peers);
 	lport->tt.rport_logoff(rport);
 }
@@ -580,7 +580,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	int error = 0;
 	struct fc_disc_port dp;
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 
 	lport = disc->lport;
 
@@ -774,7 +774,7 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 {
 	struct fc_lport *lport;
 	struct fc_rport *new_rport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 
 	lport = disc->lport;
 
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index ca8ea264b684..f7f328f952a5 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1306,7 +1306,7 @@ static struct fc_rport_operations fc_lport_rport_ops = {
 static void fc_lport_enter_dns(struct fc_lport *lport)
 {
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 	struct fc_disc_port dp;
 
 	dp.ids.port_id = FC_FID_DIR_SERV;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 90cc90dd3b5d..5f8f437e76b3 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -96,7 +96,7 @@ static void fc_rport_rogue_destroy(struct device *dev)
 struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
 {
 	struct fc_rport *rport;
-	struct fc_rport_libfc_priv *rdata;
+	struct fc_rport_priv *rdata;
 	rport = kzalloc(sizeof(*rport) + sizeof(*rdata), GFP_KERNEL);
 
 	if (!rport)
@@ -144,7 +144,7 @@ struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
 static const char *fc_rport_state(struct fc_rport *rport)
 {
 	const char *cp;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	cp = fc_rport_state_names[rdata->rp_state];
 	if (!cp)
@@ -199,7 +199,7 @@ static unsigned int fc_plogi_get_maxframe(struct fc_els_flogi *flp,
 static void fc_rport_state_enter(struct fc_rport *rport,
 				 enum fc_rport_state new)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	if (rdata->rp_state != new)
 		rdata->retries = 0;
 	rdata->rp_state = new;
@@ -208,8 +208,8 @@ static void fc_rport_state_enter(struct fc_rport *rport,
 static void fc_rport_work(struct work_struct *work)
 {
 	u32 port_id;
-	struct fc_rport_libfc_priv *rdata =
-		container_of(work, struct fc_rport_libfc_priv, event_work);
+	struct fc_rport_priv *rdata =
+		container_of(work, struct fc_rport_priv, event_work);
 	enum fc_rport_event event;
 	enum fc_rport_trans_state trans_state;
 	struct fc_lport *lport = rdata->local_port;
@@ -222,7 +222,7 @@ static void fc_rport_work(struct work_struct *work)
 
 	if (event == RPORT_EV_CREATED) {
 		struct fc_rport *new_rport;
-		struct fc_rport_libfc_priv *new_rdata;
+		struct fc_rport_priv *new_rdata;
 		struct fc_rport_identifiers ids;
 
 		ids.port_id = rport->port_id;
@@ -299,7 +299,7 @@ static void fc_rport_work(struct work_struct *work)
  */
 int fc_rport_login(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	mutex_lock(&rdata->rp_mutex);
 
@@ -329,7 +329,7 @@ int fc_rport_login(struct fc_rport *rport)
 static void fc_rport_enter_delete(struct fc_rport *rport,
 				  enum fc_rport_event event)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	if (rdata->rp_state == RPORT_ST_DELETE)
 		return;
@@ -353,7 +353,7 @@ static void fc_rport_enter_delete(struct fc_rport *rport,
  */
 int fc_rport_logoff(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	mutex_lock(&rdata->rp_mutex);
 
@@ -387,7 +387,7 @@ out:
  */
 static void fc_rport_enter_ready(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	fc_rport_state_enter(rport, RPORT_ST_READY);
 
@@ -400,7 +400,7 @@ static void fc_rport_enter_ready(struct fc_rport *rport)
 
 /**
  * fc_rport_timeout() - Handler for the retry_work timer.
- * @work: The work struct of the fc_rport_libfc_priv
+ * @work: The work struct of the fc_rport_priv
  *
  * Locking Note: Called without the rport lock held. This
  * function will hold the rport lock, call an _enter_*
@@ -408,8 +408,8 @@ static void fc_rport_enter_ready(struct fc_rport *rport)
  */
 static void fc_rport_timeout(struct work_struct *work)
 {
-	struct fc_rport_libfc_priv *rdata =
-		container_of(work, struct fc_rport_libfc_priv, retry_work.work);
+	struct fc_rport_priv *rdata =
+		container_of(work, struct fc_rport_priv, retry_work.work);
 	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
 	mutex_lock(&rdata->rp_mutex);
@@ -446,7 +446,7 @@ static void fc_rport_timeout(struct work_struct *work)
  */
 static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 
 	FC_RPORT_DBG(rport, "Error %ld in state %s, retries %d\n",
 		     PTR_ERR(fp), fc_rport_state(rport), rdata->retries);
@@ -480,7 +480,7 @@ static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
  */
 static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	unsigned long delay = FC_DEF_E_D_TOV;
 
 	/* make sure this isn't an FC_EX_CLOSED error, never retry those */
@@ -515,7 +515,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 				void *rp_arg)
 {
 	struct fc_rport *rport = rp_arg;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_els_flogi *plp = NULL;
 	unsigned int tov;
@@ -586,7 +586,7 @@ err:
  */
 static void fc_rport_enter_plogi(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
 
@@ -624,7 +624,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 			       void *rp_arg)
 {
 	struct fc_rport *rport = rp_arg;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct {
 		struct fc_els_prli prli;
 		struct fc_els_spp spp;
@@ -694,7 +694,7 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 			       void *rp_arg)
 {
 	struct fc_rport *rport = rp_arg;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
@@ -738,7 +738,7 @@ err:
  */
 static void fc_rport_enter_prli(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct {
 		struct fc_els_prli prli;
@@ -780,7 +780,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 			      void *rp_arg)
 {
 	struct fc_rport *rport = rp_arg;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
@@ -841,7 +841,7 @@ err:
 static void fc_rport_enter_rtv(struct fc_rport *rport)
 {
 	struct fc_frame *fp;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	FC_RPORT_DBG(rport, "Port entered RTV state from %s state\n",
@@ -871,7 +871,7 @@ static void fc_rport_enter_rtv(struct fc_rport *rport)
  */
 static void fc_rport_enter_logo(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp;
 
@@ -907,7 +907,7 @@ static void fc_rport_enter_logo(struct fc_rport *rport)
 void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 		       struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	struct fc_frame_header *fh;
@@ -967,7 +967,7 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 				    struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp = rx_fp;
 	struct fc_exch *ep;
@@ -1090,7 +1090,7 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 static void fc_rport_recv_prli_req(struct fc_rport *rport,
 				   struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_exch *ep;
 	struct fc_frame *fp;
@@ -1242,7 +1242,7 @@ static void fc_rport_recv_prli_req(struct fc_rport *rport,
 static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
 				   struct fc_frame *fp)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	struct fc_frame_header *fh;
@@ -1278,7 +1278,7 @@ static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
 				   struct fc_frame *fp)
 {
 	struct fc_frame_header *fh;
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	fh = fc_frame_header_get(fp);
@@ -1342,8 +1342,8 @@ EXPORT_SYMBOL(fc_destroy_rport);
 
 void fc_rport_terminate_io(struct fc_rport *rport)
 {
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;
-	struct fc_lport *lport = rdata->local_port;
+	struct fc_rport_libfc_priv *rp = rport->dd_data;
+	struct fc_lport *lport = rp->local_port;
 
 	lport->tt.exch_mgr_reset(lport, 0, rport->port_id);
 	lport->tt.exch_mgr_reset(lport, rport->port_id, 0);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 53b38814d38a..aa219514c96b 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -77,7 +77,7 @@ do {								\
 
 #define FC_RPORT_DBG(rport, fmt, args...)				\
 do {									\
-	struct fc_rport_libfc_priv *rdata = rport->dd_data;		\
+	struct fc_rport_priv *rdata = rport->dd_data;			\
 	struct fc_lport *lport = rdata->local_port;			\
 	FC_RPORT_ID_DBG(lport, rport->port_id, fmt, ##args);		\
 } while (0)
@@ -177,6 +177,14 @@ enum fc_rport_event {
 	RPORT_EV_LOGO
 };
 
+/*
+ * Temporary definition to prepare for split off from fc_rport_libfc_priv
+ * of a separately-allocated structure called fc_rport_priv.  This will
+ * be the primary object for the discovery and rport state machines.
+ * This definition is just to make this patch series easier to review.
+ */
+#define fc_rport_priv fc_rport_libfc_priv
+
 struct fc_rport_operations {
 	void (*event_callback)(struct fc_lport *, struct fc_rport *,
 			       enum fc_rport_event);
-- 
cgit v1.2.3


From 795d86f55ec3bf6280dda368f208943f1fb7d366 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:00:39 -0700
Subject: [SCSI] libfc: change interface for rport_create

The interface for lport->tt.rport_create() takes a fc_disc_port arg,
which is unnatural for most calls.   The only reason for this was
to avoid passing in the local port as an argument, but otherwise
added to complexity.

Simplify by just using lport and fc_rport_identifiers.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 31 ++++++++++++-------------------
 drivers/scsi/libfc/fc_lport.c | 26 ++++++++++++--------------
 drivers/scsi/libfc/fc_rport.c | 17 +++++++++--------
 include/scsi/libfc.h          |  5 ++---
 4 files changed, 35 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 4c8d893af7bd..ecc625c20520 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -428,13 +428,7 @@ static int fc_disc_new_target(struct fc_disc *disc,
 		if (!rport) {
 			rport = lport->tt.rport_lookup(lport, ids->port_id);
 			if (!rport) {
-				struct fc_disc_port dp;
-				dp.lp = lport;
-				dp.ids.port_id = ids->port_id;
-				dp.ids.port_name = ids->port_name;
-				dp.ids.node_name = ids->node_name;
-				dp.ids.roles = ids->roles;
-				rport = lport->tt.rport_create(&dp);
+				rport = lport->tt.rport_create(lport, ids);
 			}
 			if (!rport)
 				error = -ENOMEM;
@@ -578,7 +572,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	size_t plen;
 	size_t tlen;
 	int error = 0;
-	struct fc_disc_port dp;
+	struct fc_rport_identifiers ids;
 	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
 
@@ -621,15 +615,14 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	 * After the first time through the loop, things return to "normal".
 	 */
 	while (plen >= sizeof(*np)) {
-		dp.lp = lport;
-		dp.ids.port_id = ntoh24(np->fp_fid);
-		dp.ids.port_name = ntohll(np->fp_wwpn);
-		dp.ids.node_name = -1;
-		dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
-
-		if ((dp.ids.port_id != fc_host_port_id(lport->host)) &&
-		    (dp.ids.port_name != lport->wwpn)) {
-			rport = lport->tt.rport_create(&dp);
+		ids.port_id = ntoh24(np->fp_fid);
+		ids.port_name = ntohll(np->fp_wwpn);
+		ids.node_name = -1;
+		ids.roles = FC_RPORT_ROLE_UNKNOWN;
+
+		if (ids.port_id != fc_host_port_id(lport->host) &&
+		    ids.port_name != lport->wwpn) {
+			rport = lport->tt.rport_create(lport, &ids);
 			if (rport) {
 				rdata = rport->dd_data;
 				rdata->ops = &fc_disc_rport_ops;
@@ -640,7 +633,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 			} else
 				printk(KERN_WARNING "libfc: Failed to allocate "
 				       "memory for the newly discovered port "
-				       "(%6x)\n", dp.ids.port_id);
+				       "(%6x)\n", ids.port_id);
 		}
 
 		if (np->fp_flags & FC_NS_FID_LAST) {
@@ -781,7 +774,7 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 	if (dp->ids.port_id == fc_host_port_id(lport->host))
 		goto out;
 
-	new_rport = lport->tt.rport_create(dp);
+	new_rport = lport->tt.rport_create(lport, &dp->ids);
 	if (new_rport) {
 		rdata = new_rport->dd_data;
 		rdata->ops = &fc_disc_rport_ops;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index f7f328f952a5..a78161cf1811 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -211,20 +211,19 @@ static void fc_lport_ptp_setup(struct fc_lport *lport,
 			       u32 remote_fid, u64 remote_wwpn,
 			       u64 remote_wwnn)
 {
-	struct fc_disc_port dp;
+	struct fc_rport_identifiers ids;
 
-	dp.lp = lport;
-	dp.ids.port_id = remote_fid;
-	dp.ids.port_name = remote_wwpn;
-	dp.ids.node_name = remote_wwnn;
-	dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
+	ids.port_id = remote_fid;
+	ids.port_name = remote_wwpn;
+	ids.node_name = remote_wwnn;
+	ids.roles = FC_RPORT_ROLE_UNKNOWN;
 
 	if (lport->ptp_rp) {
 		lport->tt.rport_logoff(lport->ptp_rp);
 		lport->ptp_rp = NULL;
 	}
 
-	lport->ptp_rp = lport->tt.rport_create(&dp);
+	lport->ptp_rp = lport->tt.rport_create(lport, &ids);
 
 	lport->tt.rport_login(lport->ptp_rp);
 
@@ -1307,20 +1306,19 @@ static void fc_lport_enter_dns(struct fc_lport *lport)
 {
 	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
-	struct fc_disc_port dp;
+	struct fc_rport_identifiers ids;
 
-	dp.ids.port_id = FC_FID_DIR_SERV;
-	dp.ids.port_name = -1;
-	dp.ids.node_name = -1;
-	dp.ids.roles = FC_RPORT_ROLE_UNKNOWN;
-	dp.lp = lport;
+	ids.port_id = FC_FID_DIR_SERV;
+	ids.port_name = -1;
+	ids.node_name = -1;
+	ids.roles = FC_RPORT_ROLE_UNKNOWN;
 
 	FC_LPORT_DBG(lport, "Entered DNS state from %s state\n",
 		     fc_lport_state(lport));
 
 	fc_lport_state_enter(lport, LPORT_ST_DNS);
 
-	rport = lport->tt.rport_create(&dp);
+	rport = lport->tt.rport_create(lport, &ids);
 	if (!rport)
 		goto err;
 
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 5f8f437e76b3..2fbc94aaf343 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -93,7 +93,8 @@ static void fc_rport_rogue_destroy(struct device *dev)
 	kfree(rport);
 }
 
-struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
+struct fc_rport *fc_rport_rogue_create(struct fc_lport *lport,
+				       struct fc_rport_identifiers *ids)
 {
 	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
@@ -105,10 +106,10 @@ struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
 	rdata = RPORT_TO_PRIV(rport);
 
 	rport->dd_data = rdata;
-	rport->port_id = dp->ids.port_id;
-	rport->port_name = dp->ids.port_name;
-	rport->node_name = dp->ids.node_name;
-	rport->roles = dp->ids.roles;
+	rport->port_id = ids->port_id;
+	rport->port_name = ids->port_name;
+	rport->node_name = ids->node_name;
+	rport->roles = ids->roles;
 	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	/*
 	 * Note: all this libfc rogue rport code will be removed for
@@ -118,14 +119,14 @@ struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *dp)
 	rport->dev.release = fc_rport_rogue_destroy;
 
 	mutex_init(&rdata->rp_mutex);
-	rdata->local_port = dp->lp;
+	rdata->local_port = lport;
 	rdata->trans_state = FC_PORTSTATE_ROGUE;
 	rdata->rp_state = RPORT_ST_INIT;
 	rdata->event = RPORT_EV_NONE;
 	rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
 	rdata->ops = NULL;
-	rdata->e_d_tov = dp->lp->e_d_tov;
-	rdata->r_a_tov = dp->lp->r_a_tov;
+	rdata->e_d_tov = lport->e_d_tov;
+	rdata->r_a_tov = lport->r_a_tov;
 	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
 	INIT_WORK(&rdata->event_work, fc_rport_work);
 	/*
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index aa219514c96b..d888cbecd72e 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -227,8 +227,6 @@ struct fc_rport_libfc_priv {
 #define RPORT_TO_PRIV(x)						\
 	(struct fc_rport_libfc_priv *)((void *)x + sizeof(struct fc_rport));
 
-struct fc_rport *fc_rport_rogue_create(struct fc_disc_port *);
-
 static inline void fc_rport_set_name(struct fc_rport *rport, u64 wwpn, u64 wwnn)
 {
 	rport->node_name = wwnn;
@@ -569,7 +567,8 @@ struct libfc_function_template {
 	/*
 	 * Create a remote port
 	 */
-	struct fc_rport *(*rport_create)(struct fc_disc_port *);
+	struct fc_rport *(*rport_create)(struct fc_lport *,
+					 struct fc_rport_identifiers *);
 
 	/*
 	 * Initiates the RP state machine. It is called from the LP module.
-- 
cgit v1.2.3


From 922aa210bcad4b34a7bb98ec9d318b7e59e7a5ca Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:00:45 -0700
Subject: [SCSI] libfc: fix RPORT_TO_PRIV and PRIV_TO_RPORT() macros.

These macros introduce extra undesirable semicolons that keep
them from being used in expressions, and they don't protect
against being passed an expression.

Add parens and remove the semicolons.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/scsi/libfc.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index d888cbecd72e..2fdd8ac12b89 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -223,9 +223,9 @@ struct fc_rport_libfc_priv {
 };
 
 #define PRIV_TO_RPORT(x)						\
-	(struct fc_rport *)((void *)x - sizeof(struct fc_rport));
+	((struct fc_rport *)((void *)(x) - sizeof(struct fc_rport)))
 #define RPORT_TO_PRIV(x)						\
-	(struct fc_rport_libfc_priv *)((void *)x + sizeof(struct fc_rport));
+	((struct fc_rport_libfc_priv *)((void *)(x) + sizeof(struct fc_rport)))
 
 static inline void fc_rport_set_name(struct fc_rport *rport, u64 wwpn, u64 wwnn)
 {
-- 
cgit v1.2.3


From 9fb9d32831fd687e427ec5b147bb690f468b99a0 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:00:50 -0700
Subject: [SCSI] libfc: make fc_rport_priv the primary rport interface.

The rport and discovery modules deal with remote ports
before fc_remote_port_add() can be done, because the
full set of rport identifiers is not known at early stages.

In preparation for splitting the fc_rport/fc_rport_priv allocation,
make fc_rport_priv the primary interface for the remote port and
discovery engines.

The FCP / SCSI layers still deal with fc_rport and
fc_rport_libfc_priv, however.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  |  95 ++++-------
 drivers/scsi/libfc/fc_elsct.c |   4 +-
 drivers/scsi/libfc/fc_fcp.c   |   2 +-
 drivers/scsi/libfc/fc_lport.c |  26 +--
 drivers/scsi/libfc/fc_rport.c | 364 ++++++++++++++++++++----------------------
 include/scsi/fc_encode.h      |   7 +-
 include/scsi/libfc.h          |  26 +--
 7 files changed, 245 insertions(+), 279 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index ecc625c20520..448ffc388656 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -49,7 +49,6 @@ static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
 static int fc_disc_new_target(struct fc_disc *, struct fc_rport *,
 			      struct fc_rport_identifiers *);
-static void fc_disc_del_target(struct fc_disc *, struct fc_rport *);
 static void fc_disc_done(struct fc_disc *);
 static void fc_disc_timeout(struct work_struct *);
 static void fc_disc_single(struct fc_disc *, struct fc_disc_port *);
@@ -60,27 +59,19 @@ static void fc_disc_restart(struct fc_disc *);
  * @lport: Fibre Channel host port instance
  * @port_id: remote port port_id to match
  */
-struct fc_rport *fc_disc_lookup_rport(const struct fc_lport *lport,
-				      u32 port_id)
+struct fc_rport_priv *fc_disc_lookup_rport(const struct fc_lport *lport,
+					   u32 port_id)
 {
 	const struct fc_disc *disc = &lport->disc;
-	struct fc_rport *rport, *found = NULL;
+	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
-	int disc_found = 0;
 
 	list_for_each_entry(rdata, &disc->rports, peers) {
 		rport = PRIV_TO_RPORT(rdata);
-		if (rport->port_id == port_id) {
-			disc_found = 1;
-			found = rport;
-			break;
-		}
+		if (rport->port_id == port_id)
+			return rdata;
 	}
-
-	if (!disc_found)
-		found = NULL;
-
-	return found;
+	return NULL;
 }
 
 /**
@@ -93,21 +84,18 @@ struct fc_rport *fc_disc_lookup_rport(const struct fc_lport *lport,
 void fc_disc_stop_rports(struct fc_disc *disc)
 {
 	struct fc_lport *lport;
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata, *next;
 
 	lport = disc->lport;
 
 	mutex_lock(&disc->disc_mutex);
 	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
-		rport = PRIV_TO_RPORT(rdata);
 		list_del(&rdata->peers);
-		lport->tt.rport_logoff(rport);
+		lport->tt.rport_logoff(rdata);
 	}
 
 	list_for_each_entry_safe(rdata, next, &disc->rogue_rports, peers) {
-		rport = PRIV_TO_RPORT(rdata);
-		lport->tt.rport_logoff(rport);
+		lport->tt.rport_logoff(rdata);
 	}
 
 	mutex_unlock(&disc->disc_mutex);
@@ -116,18 +104,18 @@ void fc_disc_stop_rports(struct fc_disc *disc)
 /**
  * fc_disc_rport_callback() - Event handler for rport events
  * @lport: The lport which is receiving the event
- * @rport: The rport which the event has occured on
+ * @rdata: private remote port data
  * @event: The event that occured
  *
  * Locking Note: The rport lock should not be held when calling
  *		 this function.
  */
 static void fc_disc_rport_callback(struct fc_lport *lport,
-				   struct fc_rport *rport,
+				   struct fc_rport_priv *rdata,
 				   enum fc_rport_event event)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_disc *disc = &lport->disc;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
 	FC_DISC_DBG(disc, "Received a %d event for port (%6x)\n", event,
 		    rport->port_id);
@@ -169,7 +157,6 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 				  struct fc_disc *disc)
 {
 	struct fc_lport *lport;
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
 	struct fc_els_rscn *rp;
 	struct fc_els_rscn_page *pp;
@@ -249,11 +236,10 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 			    redisc, lport->state, disc->pending);
 		list_for_each_entry_safe(dp, next, &disc_ports, peers) {
 			list_del(&dp->peers);
-			rport = lport->tt.rport_lookup(lport, dp->ids.port_id);
-			if (rport) {
-				rdata = rport->dd_data;
+			rdata = lport->tt.rport_lookup(lport, dp->ids.port_id);
+			if (rdata) {
 				list_del(&rdata->peers);
-				lport->tt.rport_logoff(rport);
+				lport->tt.rport_logoff(rdata);
 			}
 			fc_disc_single(disc, dp);
 		}
@@ -308,16 +294,14 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
  */
 static void fc_disc_restart(struct fc_disc *disc)
 {
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata, *next;
 	struct fc_lport *lport = disc->lport;
 
 	FC_DISC_DBG(disc, "Restarting discovery\n");
 
 	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
-		rport = PRIV_TO_RPORT(rdata);
 		list_del(&rdata->peers);
-		lport->tt.rport_logoff(rport);
+		lport->tt.rport_logoff(rdata);
 	}
 
 	disc->requested = 1;
@@ -335,6 +319,7 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 						enum fc_disc_event),
 			  struct fc_lport *lport)
 {
+	struct fc_rport_priv *rdata;
 	struct fc_rport *rport;
 	struct fc_rport_identifiers ids;
 	struct fc_disc *disc = &lport->disc;
@@ -362,8 +347,9 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 	 * Handle point-to-point mode as a simple discovery
 	 * of the remote port. Yucky, yucky, yuck, yuck!
 	 */
-	rport = disc->lport->ptp_rp;
-	if (rport) {
+	rdata = disc->lport->ptp_rp;
+	if (rdata) {
+		rport = PRIV_TO_RPORT(rdata);
 		ids.port_id = rport->port_id;
 		ids.port_name = rport->port_name;
 		ids.node_name = rport->node_name;
@@ -418,7 +404,9 @@ static int fc_disc_new_target(struct fc_disc *disc,
 			 * assigned the same FCID.  This should be rare.
 			 * Delete the old one and fall thru to re-create.
 			 */
-			fc_disc_del_target(disc, rport);
+			rdata = rport->dd_data;
+			list_del(&rdata->peers);
+			lport->tt.rport_logoff(rdata);
 			rport = NULL;
 		}
 	}
@@ -426,37 +414,26 @@ static int fc_disc_new_target(struct fc_disc *disc,
 	    ids->port_id != fc_host_port_id(lport->host) &&
 	    ids->port_name != lport->wwpn) {
 		if (!rport) {
-			rport = lport->tt.rport_lookup(lport, ids->port_id);
+			rdata = lport->tt.rport_lookup(lport, ids->port_id);
 			if (!rport) {
-				rport = lport->tt.rport_create(lport, ids);
+				rdata = lport->tt.rport_create(lport, ids);
 			}
-			if (!rport)
+			if (!rdata)
 				error = -ENOMEM;
+			else
+				rport = PRIV_TO_RPORT(rdata);
 		}
 		if (rport) {
 			rdata = rport->dd_data;
 			rdata->ops = &fc_disc_rport_ops;
 			rdata->rp_state = RPORT_ST_INIT;
 			list_add_tail(&rdata->peers, &disc->rogue_rports);
-			lport->tt.rport_login(rport);
+			lport->tt.rport_login(rdata);
 		}
 	}
 	return error;
 }
 
-/**
- * fc_disc_del_target() - Delete a target
- * @disc: FC discovery context
- * @rport: The remote port to be removed
- */
-static void fc_disc_del_target(struct fc_disc *disc, struct fc_rport *rport)
-{
-	struct fc_lport *lport = disc->lport;
-	struct fc_rport_priv *rdata = rport->dd_data;
-	list_del(&rdata->peers);
-	lport->tt.rport_logoff(rport);
-}
-
 /**
  * fc_disc_done() - Discovery has been completed
  * @disc: FC discovery context
@@ -573,7 +550,6 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	size_t tlen;
 	int error = 0;
 	struct fc_rport_identifiers ids;
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
 
 	lport = disc->lport;
@@ -622,14 +598,13 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 
 		if (ids.port_id != fc_host_port_id(lport->host) &&
 		    ids.port_name != lport->wwpn) {
-			rport = lport->tt.rport_create(lport, &ids);
-			if (rport) {
-				rdata = rport->dd_data;
+			rdata = lport->tt.rport_create(lport, &ids);
+			if (rdata) {
 				rdata->ops = &fc_disc_rport_ops;
 				rdata->local_port = lport;
 				list_add_tail(&rdata->peers,
 					      &disc->rogue_rports);
-				lport->tt.rport_login(rport);
+				lport->tt.rport_login(rdata);
 			} else
 				printk(KERN_WARNING "libfc: Failed to allocate "
 				       "memory for the newly discovered port "
@@ -766,7 +741,6 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 {
 	struct fc_lport *lport;
-	struct fc_rport *new_rport;
 	struct fc_rport_priv *rdata;
 
 	lport = disc->lport;
@@ -774,13 +748,12 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 	if (dp->ids.port_id == fc_host_port_id(lport->host))
 		goto out;
 
-	new_rport = lport->tt.rport_create(lport, &dp->ids);
-	if (new_rport) {
-		rdata = new_rport->dd_data;
+	rdata = lport->tt.rport_create(lport, &dp->ids);
+	if (rdata) {
 		rdata->ops = &fc_disc_rport_ops;
 		kfree(dp);
 		list_add_tail(&rdata->peers, &disc->rogue_rports);
-		lport->tt.rport_login(new_rport);
+		lport->tt.rport_login(rdata);
 	}
 	return;
 out:
diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
index 5878b34bff18..2b8a3bbc0399 100644
--- a/drivers/scsi/libfc/fc_elsct.c
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -32,7 +32,7 @@
  * fc_elsct_send - sends ELS/CT frame
  */
 static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
-				    struct fc_rport *rport,
+				    struct fc_rport_priv *rdata,
 				    struct fc_frame *fp,
 				    unsigned int op,
 				    void (*resp)(struct fc_seq *,
@@ -47,7 +47,7 @@ static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
 
 	/* ELS requests */
 	if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS))
-		rc = fc_els_fill(lport, rport, fp, op, &r_ctl, &did, &fh_type);
+		rc = fc_els_fill(lport, rdata, fp, op, &r_ctl, &did, &fh_type);
 	else
 		/* CT requests */
 		rc = fc_ct_fill(lport, fp, op, &r_ctl, &did, &fh_type);
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 7d5ffcbbf39b..a622096eb315 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1308,7 +1308,7 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp)
 	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, rport->port_id,
 		       fc_host_port_id(rp->local_port->host), FC_TYPE_ELS,
 		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
-	if (lp->tt.elsct_send(lp, rport, fp, ELS_REC, fc_fcp_rec_resp,
+	if (lp->tt.elsct_send(lp, rport->dd_data, fp, ELS_REC, fc_fcp_rec_resp,
 			      fsp, jiffies_to_msecs(FC_SCSI_REC_TOV))) {
 		fc_fcp_pkt_hold(fsp);		/* hold while REC outstanding */
 		return;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index a78161cf1811..3c15abd35ffa 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -133,16 +133,18 @@ static int fc_frame_drop(struct fc_lport *lport, struct fc_frame *fp)
 /**
  * fc_lport_rport_callback() - Event handler for rport events
  * @lport: The lport which is receiving the event
- * @rport: The rport which the event has occured on
+ * @rdata: private remote port data
  * @event: The event that occured
  *
  * Locking Note: The rport lock should not be held when calling
  *		 this function.
  */
 static void fc_lport_rport_callback(struct fc_lport *lport,
-				    struct fc_rport *rport,
+				    struct fc_rport_priv *rdata,
 				    enum fc_rport_event event)
 {
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
+
 	FC_LPORT_DBG(lport, "Received a %d event for port (%6x)\n", event,
 		     rport->port_id);
 
@@ -151,7 +153,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 		if (rport->port_id == FC_FID_DIR_SERV) {
 			mutex_lock(&lport->lp_mutex);
 			if (lport->state == LPORT_ST_DNS) {
-				lport->dns_rp = rport;
+				lport->dns_rp = rdata;
 				fc_lport_enter_rpn_id(lport);
 			} else {
 				FC_LPORT_DBG(lport, "Received an CREATED event "
@@ -160,7 +162,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 					     "in the DNS state, it's in the "
 					     "%d state", rport->port_id,
 					     lport->state);
-				lport->tt.rport_logoff(rport);
+				lport->tt.rport_logoff(rdata);
 			}
 			mutex_unlock(&lport->lp_mutex);
 		} else
@@ -832,7 +834,7 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 {
 	struct fc_frame_header *fh = fc_frame_header_get(fp);
 	void (*recv) (struct fc_seq *, struct fc_frame *, struct fc_lport *);
-	struct fc_rport *rport;
+	struct fc_rport_priv *rdata;
 	u32 s_id;
 	u32 d_id;
 	struct fc_seq_els_data rjt_data;
@@ -888,9 +890,9 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 			s_id = ntoh24(fh->fh_s_id);
 			d_id = ntoh24(fh->fh_d_id);
 
-			rport = lport->tt.rport_lookup(lport, s_id);
-			if (rport)
-				lport->tt.rport_recv_req(sp, fp, rport);
+			rdata = lport->tt.rport_lookup(lport, s_id);
+			if (rdata)
+				lport->tt.rport_recv_req(sp, fp, rdata);
 			else {
 				rjt_data.fp = NULL;
 				rjt_data.reason = ELS_RJT_UNAB;
@@ -1304,7 +1306,6 @@ static struct fc_rport_operations fc_lport_rport_ops = {
  */
 static void fc_lport_enter_dns(struct fc_lport *lport)
 {
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
 	struct fc_rport_identifiers ids;
 
@@ -1318,13 +1319,12 @@ static void fc_lport_enter_dns(struct fc_lport *lport)
 
 	fc_lport_state_enter(lport, LPORT_ST_DNS);
 
-	rport = lport->tt.rport_create(lport, &ids);
-	if (!rport)
+	rdata = lport->tt.rport_create(lport, &ids);
+	if (!rdata)
 		goto err;
 
-	rdata = rport->dd_data;
 	rdata->ops = &fc_lport_rport_ops;
-	lport->tt.rport_login(rport);
+	lport->tt.rport_login(rdata);
 	return;
 
 err:
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 2fbc94aaf343..13d3d758fb0e 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -57,23 +57,23 @@
 
 struct workqueue_struct *rport_event_queue;
 
-static void fc_rport_enter_plogi(struct fc_rport *);
-static void fc_rport_enter_prli(struct fc_rport *);
-static void fc_rport_enter_rtv(struct fc_rport *);
-static void fc_rport_enter_ready(struct fc_rport *);
-static void fc_rport_enter_logo(struct fc_rport *);
+static void fc_rport_enter_plogi(struct fc_rport_priv *);
+static void fc_rport_enter_prli(struct fc_rport_priv *);
+static void fc_rport_enter_rtv(struct fc_rport_priv *);
+static void fc_rport_enter_ready(struct fc_rport_priv *);
+static void fc_rport_enter_logo(struct fc_rport_priv *);
 
-static void fc_rport_recv_plogi_req(struct fc_rport *,
+static void fc_rport_recv_plogi_req(struct fc_rport_priv *,
 				    struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_prli_req(struct fc_rport *,
+static void fc_rport_recv_prli_req(struct fc_rport_priv *,
 				   struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_prlo_req(struct fc_rport *,
+static void fc_rport_recv_prlo_req(struct fc_rport_priv *,
 				   struct fc_seq *, struct fc_frame *);
-static void fc_rport_recv_logo_req(struct fc_rport *,
+static void fc_rport_recv_logo_req(struct fc_rport_priv *,
 				   struct fc_seq *, struct fc_frame *);
 static void fc_rport_timeout(struct work_struct *);
-static void fc_rport_error(struct fc_rport *, struct fc_frame *);
-static void fc_rport_error_retry(struct fc_rport *, struct fc_frame *);
+static void fc_rport_error(struct fc_rport_priv *, struct fc_frame *);
+static void fc_rport_error_retry(struct fc_rport_priv *, struct fc_frame *);
 static void fc_rport_work(struct work_struct *);
 
 static const char *fc_rport_state_names[] = {
@@ -89,12 +89,14 @@ static const char *fc_rport_state_names[] = {
 static void fc_rport_rogue_destroy(struct device *dev)
 {
 	struct fc_rport *rport = dev_to_rport(dev);
-	FC_RPORT_DBG(rport, "Destroying rogue rport\n");
+	struct fc_rport_priv *rdata = RPORT_TO_PRIV(rport);
+
+	FC_RPORT_DBG(rdata, "Destroying rogue rport\n");
 	kfree(rport);
 }
 
-struct fc_rport *fc_rport_rogue_create(struct fc_lport *lport,
-				       struct fc_rport_identifiers *ids)
+struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
+					    struct fc_rport_identifiers *ids)
 {
 	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
@@ -135,17 +137,16 @@ struct fc_rport *fc_rport_rogue_create(struct fc_lport *lport,
 	 */
 	INIT_LIST_HEAD(&rdata->peers);
 
-	return rport;
+	return rdata;
 }
 
 /**
  * fc_rport_state() - return a string for the state the rport is in
- * @rport: The rport whose state we want to get a string for
+ * @rdata: remote port private data
  */
-static const char *fc_rport_state(struct fc_rport *rport)
+static const char *fc_rport_state(struct fc_rport_priv *rdata)
 {
 	const char *cp;
-	struct fc_rport_priv *rdata = rport->dd_data;
 
 	cp = fc_rport_state_names[rdata->rp_state];
 	if (!cp)
@@ -192,15 +193,14 @@ static unsigned int fc_plogi_get_maxframe(struct fc_els_flogi *flp,
 
 /**
  * fc_rport_state_enter() - Change the rport's state
- * @rport: The rport whose state should change
+ * @rdata: The rport whose state should change
  * @new: The new state of the rport
  *
  * Locking Note: Called with the rport lock held
  */
-static void fc_rport_state_enter(struct fc_rport *rport,
+static void fc_rport_state_enter(struct fc_rport_priv *rdata,
 				 enum fc_rport_state new)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	if (rdata->rp_state != new)
 		rdata->retries = 0;
 	rdata->rp_state = new;
@@ -255,7 +255,7 @@ static void fc_rport_work(struct work_struct *work)
 			INIT_LIST_HEAD(&new_rdata->peers);
 			INIT_WORK(&new_rdata->event_work, fc_rport_work);
 
-			fc_rport_state_enter(new_rport, RPORT_ST_READY);
+			fc_rport_state_enter(new_rdata, RPORT_ST_READY);
 		} else {
 			printk(KERN_WARNING "libfc: Failed to allocate "
 			       " memory for rport (%6x)\n", ids.port_id);
@@ -263,20 +263,20 @@ static void fc_rport_work(struct work_struct *work)
 		}
 		if (rport->port_id != FC_FID_DIR_SERV)
 			if (rport_ops->event_callback)
-				rport_ops->event_callback(lport, rport,
+				rport_ops->event_callback(lport, rdata,
 							  RPORT_EV_FAILED);
 		put_device(&rport->dev);
 		rport = new_rport;
 		rdata = new_rport->dd_data;
 		if (rport_ops->event_callback)
-			rport_ops->event_callback(lport, rport, event);
+			rport_ops->event_callback(lport, rdata, event);
 	} else if ((event == RPORT_EV_FAILED) ||
 		   (event == RPORT_EV_LOGO) ||
 		   (event == RPORT_EV_STOP)) {
 		trans_state = rdata->trans_state;
 		mutex_unlock(&rdata->rp_mutex);
 		if (rport_ops->event_callback)
-			rport_ops->event_callback(lport, rport, event);
+			rport_ops->event_callback(lport, rdata, event);
 		cancel_delayed_work_sync(&rdata->retry_work);
 		if (trans_state == FC_PORTSTATE_ROGUE)
 			put_device(&rport->dev);
@@ -292,21 +292,19 @@ static void fc_rport_work(struct work_struct *work)
 
 /**
  * fc_rport_login() - Start the remote port login state machine
- * @rport: Fibre Channel remote port
+ * @rdata: private remote port
  *
  * Locking Note: Called without the rport lock held. This
  * function will hold the rport lock, call an _enter_*
  * function and then unlock the rport.
  */
-int fc_rport_login(struct fc_rport *rport)
+int fc_rport_login(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
-
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Login to port\n");
+	FC_RPORT_DBG(rdata, "Login to port\n");
 
-	fc_rport_enter_plogi(rport);
+	fc_rport_enter_plogi(rdata);
 
 	mutex_unlock(&rdata->rp_mutex);
 
@@ -315,7 +313,7 @@ int fc_rport_login(struct fc_rport *rport)
 
 /**
  * fc_rport_enter_delete() - schedule a remote port to be deleted.
- * @rport: Fibre Channel remote port
+ * @rdata: private remote port
  * @event: event to report as the reason for deletion
  *
  * Locking Note: Called with the rport lock held.
@@ -327,17 +325,15 @@ int fc_rport_login(struct fc_rport *rport)
  * Since we have the mutex, even if fc_rport_work() is already started,
  * it'll see the new event.
  */
-static void fc_rport_enter_delete(struct fc_rport *rport,
+static void fc_rport_enter_delete(struct fc_rport_priv *rdata,
 				  enum fc_rport_event event)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
-
 	if (rdata->rp_state == RPORT_ST_DELETE)
 		return;
 
-	FC_RPORT_DBG(rport, "Delete port\n");
+	FC_RPORT_DBG(rdata, "Delete port\n");
 
-	fc_rport_state_enter(rport, RPORT_ST_DELETE);
+	fc_rport_state_enter(rdata, RPORT_ST_DELETE);
 
 	if (rdata->event == RPORT_EV_NONE)
 		queue_work(rport_event_queue, &rdata->event_work);
@@ -346,33 +342,31 @@ static void fc_rport_enter_delete(struct fc_rport *rport,
 
 /**
  * fc_rport_logoff() - Logoff and remove an rport
- * @rport: Fibre Channel remote port to be removed
+ * @rdata: private remote port
  *
  * Locking Note: Called without the rport lock held. This
  * function will hold the rport lock, call an _enter_*
  * function and then unlock the rport.
  */
-int fc_rport_logoff(struct fc_rport *rport)
+int fc_rport_logoff(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
-
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Remove port\n");
+	FC_RPORT_DBG(rdata, "Remove port\n");
 
 	if (rdata->rp_state == RPORT_ST_DELETE) {
-		FC_RPORT_DBG(rport, "Port in Delete state, not removing\n");
+		FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
 		mutex_unlock(&rdata->rp_mutex);
 		goto out;
 	}
 
-	fc_rport_enter_logo(rport);
+	fc_rport_enter_logo(rdata);
 
 	/*
 	 * Change the state to Delete so that we discard
 	 * the response.
 	 */
-	fc_rport_enter_delete(rport, RPORT_EV_STOP);
+	fc_rport_enter_delete(rdata, RPORT_EV_STOP);
 	mutex_unlock(&rdata->rp_mutex);
 
 out:
@@ -381,18 +375,16 @@ out:
 
 /**
  * fc_rport_enter_ready() - The rport is ready
- * @rport: Fibre Channel remote port that is ready
+ * @rdata: private remote port
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this routine.
  */
-static void fc_rport_enter_ready(struct fc_rport *rport)
+static void fc_rport_enter_ready(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
-
-	fc_rport_state_enter(rport, RPORT_ST_READY);
+	fc_rport_state_enter(rdata, RPORT_ST_READY);
 
-	FC_RPORT_DBG(rport, "Port is Ready\n");
+	FC_RPORT_DBG(rdata, "Port is Ready\n");
 
 	if (rdata->event == RPORT_EV_NONE)
 		queue_work(rport_event_queue, &rdata->event_work);
@@ -411,22 +403,21 @@ static void fc_rport_timeout(struct work_struct *work)
 {
 	struct fc_rport_priv *rdata =
 		container_of(work, struct fc_rport_priv, retry_work.work);
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
 	mutex_lock(&rdata->rp_mutex);
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PLOGI:
-		fc_rport_enter_plogi(rport);
+		fc_rport_enter_plogi(rdata);
 		break;
 	case RPORT_ST_PRLI:
-		fc_rport_enter_prli(rport);
+		fc_rport_enter_prli(rdata);
 		break;
 	case RPORT_ST_RTV:
-		fc_rport_enter_rtv(rport);
+		fc_rport_enter_rtv(rdata);
 		break;
 	case RPORT_ST_LOGO:
-		fc_rport_enter_logo(rport);
+		fc_rport_enter_logo(rdata);
 		break;
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
@@ -439,27 +430,25 @@ static void fc_rport_timeout(struct work_struct *work)
 
 /**
  * fc_rport_error() - Error handler, called once retries have been exhausted
- * @rport: The fc_rport object
+ * @rdata: private remote port
  * @fp: The frame pointer
  *
  * Locking Note: The rport lock is expected to be held before
  * calling this routine
  */
-static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
+static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
-
-	FC_RPORT_DBG(rport, "Error %ld in state %s, retries %d\n",
-		     PTR_ERR(fp), fc_rport_state(rport), rdata->retries);
+	FC_RPORT_DBG(rdata, "Error %ld in state %s, retries %d\n",
+		     PTR_ERR(fp), fc_rport_state(rdata), rdata->retries);
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PLOGI:
 	case RPORT_ST_PRLI:
 	case RPORT_ST_LOGO:
-		fc_rport_enter_delete(rport, RPORT_EV_FAILED);
+		fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
 		break;
 	case RPORT_ST_RTV:
-		fc_rport_enter_ready(rport);
+		fc_rport_enter_ready(rdata);
 		break;
 	case RPORT_ST_DELETE:
 	case RPORT_ST_READY:
@@ -470,7 +459,7 @@ static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
 
 /**
  * fc_rport_error_retry() - Error handler when retries are desired
- * @rport: The fc_rport object
+ * @rdata: private remote port data
  * @fp: The frame pointer
  *
  * If the error was an exchange timeout retry immediately,
@@ -479,18 +468,18 @@ static void fc_rport_error(struct fc_rport *rport, struct fc_frame *fp)
  * Locking Note: The rport lock is expected to be held before
  * calling this routine
  */
-static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
+static void fc_rport_error_retry(struct fc_rport_priv *rdata,
+				 struct fc_frame *fp)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	unsigned long delay = FC_DEF_E_D_TOV;
 
 	/* make sure this isn't an FC_EX_CLOSED error, never retry those */
 	if (PTR_ERR(fp) == -FC_EX_CLOSED)
-		return fc_rport_error(rport, fp);
+		return fc_rport_error(rdata, fp);
 
 	if (rdata->retries < rdata->local_port->max_rport_retry_count) {
-		FC_RPORT_DBG(rport, "Error %ld in state %s, retrying\n",
-			     PTR_ERR(fp), fc_rport_state(rport));
+		FC_RPORT_DBG(rdata, "Error %ld in state %s, retrying\n",
+			     PTR_ERR(fp), fc_rport_state(rdata));
 		rdata->retries++;
 		/* no additional delay on exchange timeouts */
 		if (PTR_ERR(fp) == -FC_EX_TIMEOUT)
@@ -499,24 +488,24 @@ static void fc_rport_error_retry(struct fc_rport *rport, struct fc_frame *fp)
 		return;
 	}
 
-	return fc_rport_error(rport, fp);
+	return fc_rport_error(rdata, fp);
 }
 
 /**
  * fc_rport_plogi_recv_resp() - Handle incoming ELS PLOGI response
  * @sp: current sequence in the PLOGI exchange
  * @fp: response frame
- * @rp_arg: Fibre Channel remote port
+ * @rdata_arg: private remote port data
  *
  * Locking Note: This function will be called without the rport lock
  * held, but it will lock, call an _enter_* function or fc_rport_error
  * and then unlock the rport.
  */
 static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
-				void *rp_arg)
+				void *rdata_arg)
 {
-	struct fc_rport *rport = rp_arg;
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_els_flogi *plp = NULL;
 	unsigned int tov;
@@ -526,18 +515,18 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Received a PLOGI response\n");
+	FC_RPORT_DBG(rdata, "Received a PLOGI response\n");
 
 	if (rdata->rp_state != RPORT_ST_PLOGI) {
-		FC_RPORT_DBG(rport, "Received a PLOGI response, but in state "
-			     "%s\n", fc_rport_state(rport));
+		FC_RPORT_DBG(rdata, "Received a PLOGI response, but in state "
+			     "%s\n", fc_rport_state(rdata));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
 	}
 
 	if (IS_ERR(fp)) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		goto err;
 	}
 
@@ -565,11 +554,11 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 		 * we skip PRLI and RTV and go straight to READY.
 		 */
 		if (rport->port_id >= FC_FID_DOM_MGR)
-			fc_rport_enter_ready(rport);
+			fc_rport_enter_ready(rdata);
 		else
-			fc_rport_enter_prli(rport);
+			fc_rport_enter_prli(rdata);
 	} else
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 
 out:
 	fc_frame_free(fp);
@@ -580,33 +569,33 @@ err:
 
 /**
  * fc_rport_enter_plogi() - Send Port Login (PLOGI) request to peer
- * @rport: Fibre Channel remote port to send PLOGI to
+ * @rdata: private remote port data
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this routine.
  */
-static void fc_rport_enter_plogi(struct fc_rport *rport)
+static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_frame *fp;
 
-	FC_RPORT_DBG(rport, "Port entered PLOGI state from %s state\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Port entered PLOGI state from %s state\n",
+		     fc_rport_state(rdata));
 
-	fc_rport_state_enter(rport, RPORT_ST_PLOGI);
+	fc_rport_state_enter(rdata, RPORT_ST_PLOGI);
 
 	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
 	if (!fp) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		return;
 	}
 	rdata->e_d_tov = lport->e_d_tov;
 
-	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PLOGI,
-				  fc_rport_plogi_resp, rport, lport->e_d_tov))
-		fc_rport_error_retry(rport, fp);
+	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_PLOGI,
+				  fc_rport_plogi_resp, rdata, lport->e_d_tov))
+		fc_rport_error_retry(rdata, fp);
 	else
 		get_device(&rport->dev);
 }
@@ -615,17 +604,17 @@ static void fc_rport_enter_plogi(struct fc_rport *rport)
  * fc_rport_prli_resp() - Process Login (PRLI) response handler
  * @sp: current sequence in the PRLI exchange
  * @fp: response frame
- * @rp_arg: Fibre Channel remote port
+ * @rdata_arg: private remote port data
  *
  * Locking Note: This function will be called without the rport lock
  * held, but it will lock, call an _enter_* function or fc_rport_error
  * and then unlock the rport.
  */
 static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
-			       void *rp_arg)
+			       void *rdata_arg)
 {
-	struct fc_rport *rport = rp_arg;
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct {
 		struct fc_els_prli prli;
 		struct fc_els_spp spp;
@@ -636,18 +625,18 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Received a PRLI response\n");
+	FC_RPORT_DBG(rdata, "Received a PRLI response\n");
 
 	if (rdata->rp_state != RPORT_ST_PRLI) {
-		FC_RPORT_DBG(rport, "Received a PRLI response, but in state "
-			     "%s\n", fc_rport_state(rport));
+		FC_RPORT_DBG(rdata, "Received a PRLI response, but in state "
+			     "%s\n", fc_rport_state(rdata));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
 	}
 
 	if (IS_ERR(fp)) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		goto err;
 	}
 
@@ -667,11 +656,11 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 			roles |= FC_RPORT_ROLE_FCP_TARGET;
 
 		rport->roles = roles;
-		fc_rport_enter_rtv(rport);
+		fc_rport_enter_rtv(rdata);
 
 	} else {
-		FC_RPORT_DBG(rport, "Bad ELS response for PRLI command\n");
-		fc_rport_enter_delete(rport, RPORT_EV_FAILED);
+		FC_RPORT_DBG(rdata, "Bad ELS response for PRLI command\n");
+		fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
 	}
 
 out:
@@ -685,42 +674,42 @@ err:
  * fc_rport_logo_resp() - Logout (LOGO) response handler
  * @sp: current sequence in the LOGO exchange
  * @fp: response frame
- * @rp_arg: Fibre Channel remote port
+ * @rdata_arg: private remote port data
  *
  * Locking Note: This function will be called without the rport lock
  * held, but it will lock, call an _enter_* function or fc_rport_error
  * and then unlock the rport.
  */
 static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
-			       void *rp_arg)
+			       void *rdata_arg)
 {
-	struct fc_rport *rport = rp_arg;
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Received a LOGO response\n");
+	FC_RPORT_DBG(rdata, "Received a LOGO response\n");
 
 	if (rdata->rp_state != RPORT_ST_LOGO) {
-		FC_RPORT_DBG(rport, "Received a LOGO response, but in state "
-			     "%s\n", fc_rport_state(rport));
+		FC_RPORT_DBG(rdata, "Received a LOGO response, but in state "
+			     "%s\n", fc_rport_state(rdata));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
 	}
 
 	if (IS_ERR(fp)) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		goto err;
 	}
 
 	op = fc_frame_payload_op(fp);
 	if (op == ELS_LS_ACC) {
-		fc_rport_enter_rtv(rport);
+		fc_rport_enter_rtv(rdata);
 	} else {
-		FC_RPORT_DBG(rport, "Bad ELS response for LOGO command\n");
-		fc_rport_enter_delete(rport, RPORT_EV_LOGO);
+		FC_RPORT_DBG(rdata, "Bad ELS response for LOGO command\n");
+		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
 	}
 
 out:
@@ -732,14 +721,14 @@ err:
 
 /**
  * fc_rport_enter_prli() - Send Process Login (PRLI) request to peer
- * @rport: Fibre Channel remote port to send PRLI to
+ * @rdata: private remote port data
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this routine.
  */
-static void fc_rport_enter_prli(struct fc_rport *rport)
+static void fc_rport_enter_prli(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct {
 		struct fc_els_prli prli;
@@ -747,20 +736,20 @@ static void fc_rport_enter_prli(struct fc_rport *rport)
 	} *pp;
 	struct fc_frame *fp;
 
-	FC_RPORT_DBG(rport, "Port entered PRLI state from %s state\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Port entered PRLI state from %s state\n",
+		     fc_rport_state(rdata));
 
-	fc_rport_state_enter(rport, RPORT_ST_PRLI);
+	fc_rport_state_enter(rdata, RPORT_ST_PRLI);
 
 	fp = fc_frame_alloc(lport, sizeof(*pp));
 	if (!fp) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport, fp, ELS_PRLI,
-				  fc_rport_prli_resp, rport, lport->e_d_tov))
-		fc_rport_error_retry(rport, fp);
+	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_PRLI,
+				  fc_rport_prli_resp, rdata, lport->e_d_tov))
+		fc_rport_error_retry(rdata, fp);
 	else
 		get_device(&rport->dev);
 }
@@ -769,7 +758,7 @@ static void fc_rport_enter_prli(struct fc_rport *rport)
  * fc_rport_els_rtv_resp() - Request Timeout Value response handler
  * @sp: current sequence in the RTV exchange
  * @fp: response frame
- * @rp_arg: Fibre Channel remote port
+ * @rdata_arg: private remote port data
  *
  * Many targets don't seem to support this.
  *
@@ -778,26 +767,26 @@ static void fc_rport_enter_prli(struct fc_rport *rport)
  * and then unlock the rport.
  */
 static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
-			      void *rp_arg)
+			      void *rdata_arg)
 {
-	struct fc_rport *rport = rp_arg;
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rport, "Received a RTV response\n");
+	FC_RPORT_DBG(rdata, "Received a RTV response\n");
 
 	if (rdata->rp_state != RPORT_ST_RTV) {
-		FC_RPORT_DBG(rport, "Received a RTV response, but in state "
-			     "%s\n", fc_rport_state(rport));
+		FC_RPORT_DBG(rdata, "Received a RTV response, but in state "
+			     "%s\n", fc_rport_state(rdata));
 		if (IS_ERR(fp))
 			goto err;
 		goto out;
 	}
 
 	if (IS_ERR(fp)) {
-		fc_rport_error(rport, fp);
+		fc_rport_error(rdata, fp);
 		goto err;
 	}
 
@@ -823,7 +812,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 		}
 	}
 
-	fc_rport_enter_ready(rport);
+	fc_rport_enter_ready(rdata);
 
 out:
 	fc_frame_free(fp);
@@ -834,62 +823,62 @@ err:
 
 /**
  * fc_rport_enter_rtv() - Send Request Timeout Value (RTV) request to peer
- * @rport: Fibre Channel remote port to send RTV to
+ * @rdata: private remote port data
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this routine.
  */
-static void fc_rport_enter_rtv(struct fc_rport *rport)
+static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 {
 	struct fc_frame *fp;
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
-	FC_RPORT_DBG(rport, "Port entered RTV state from %s state\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Port entered RTV state from %s state\n",
+		     fc_rport_state(rdata));
 
-	fc_rport_state_enter(rport, RPORT_ST_RTV);
+	fc_rport_state_enter(rdata, RPORT_ST_RTV);
 
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_rtv));
 	if (!fp) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport, fp, ELS_RTV,
-				     fc_rport_rtv_resp, rport, lport->e_d_tov))
-		fc_rport_error_retry(rport, fp);
+	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_RTV,
+				     fc_rport_rtv_resp, rdata, lport->e_d_tov))
+		fc_rport_error_retry(rdata, fp);
 	else
 		get_device(&rport->dev);
 }
 
 /**
  * fc_rport_enter_logo() - Send Logout (LOGO) request to peer
- * @rport: Fibre Channel remote port to send LOGO to
+ * @rdata: private remote port data
  *
  * Locking Note: The rport lock is expected to be held before calling
  * this routine.
  */
-static void fc_rport_enter_logo(struct fc_rport *rport)
+static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_frame *fp;
 
-	FC_RPORT_DBG(rport, "Port entered LOGO state from %s state\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Port entered LOGO state from %s state\n",
+		     fc_rport_state(rdata));
 
-	fc_rport_state_enter(rport, RPORT_ST_LOGO);
+	fc_rport_state_enter(rdata, RPORT_ST_LOGO);
 
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_logo));
 	if (!fp) {
-		fc_rport_error_retry(rport, fp);
+		fc_rport_error_retry(rdata, fp);
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport, fp, ELS_LOGO,
-				  fc_rport_logo_resp, rport, lport->e_d_tov))
-		fc_rport_error_retry(rport, fp);
+	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_LOGO,
+				  fc_rport_logo_resp, rdata, lport->e_d_tov))
+		fc_rport_error_retry(rdata, fp);
 	else
 		get_device(&rport->dev);
 }
@@ -899,16 +888,15 @@ static void fc_rport_enter_logo(struct fc_rport *rport)
  * fc_rport_recv_req() - Receive a request from a rport
  * @sp: current sequence in the PLOGI exchange
  * @fp: response frame
- * @rp_arg: Fibre Channel remote port
+ * @rdata_arg: private remote port data
  *
  * Locking Note: Called without the rport lock held. This
  * function will hold the rport lock, call an _enter_*
  * function and then unlock the rport.
  */
 void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
-		       struct fc_rport *rport)
+		       struct fc_rport_priv *rdata)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	struct fc_frame_header *fh;
@@ -927,16 +915,16 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 		op = fc_frame_payload_op(fp);
 		switch (op) {
 		case ELS_PLOGI:
-			fc_rport_recv_plogi_req(rport, sp, fp);
+			fc_rport_recv_plogi_req(rdata, sp, fp);
 			break;
 		case ELS_PRLI:
-			fc_rport_recv_prli_req(rport, sp, fp);
+			fc_rport_recv_prli_req(rdata, sp, fp);
 			break;
 		case ELS_PRLO:
-			fc_rport_recv_prlo_req(rport, sp, fp);
+			fc_rport_recv_prlo_req(rdata, sp, fp);
 			break;
 		case ELS_LOGO:
-			fc_rport_recv_logo_req(rport, sp, fp);
+			fc_rport_recv_logo_req(rdata, sp, fp);
 			break;
 		case ELS_RRQ:
 			els_data.fp = fp;
@@ -958,17 +946,17 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 
 /**
  * fc_rport_recv_plogi_req() - Handle incoming Port Login (PLOGI) request
- * @rport: Fibre Channel remote port that initiated PLOGI
+ * @rdata: private remote port data
  * @sp: current sequence in the PLOGI exchange
  * @fp: PLOGI request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
-static void fc_rport_recv_plogi_req(struct fc_rport *rport,
+static void fc_rport_recv_plogi_req(struct fc_rport_priv *rdata,
 				    struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp = rx_fp;
 	struct fc_exch *ep;
@@ -984,13 +972,13 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 
 	fh = fc_frame_header_get(fp);
 
-	FC_RPORT_DBG(rport, "Received PLOGI request while in state %s\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Received PLOGI request while in state %s\n",
+		     fc_rport_state(rdata));
 
 	sid = ntoh24(fh->fh_s_id);
 	pl = fc_frame_payload_get(fp, sizeof(*pl));
 	if (!pl) {
-		FC_RPORT_DBG(rport, "Received PLOGI too short\n");
+		FC_RPORT_DBG(rdata, "Received PLOGI too short\n");
 		WARN_ON(1);
 		/* XXX TBD: send reject? */
 		fc_frame_free(fp);
@@ -1012,25 +1000,25 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 	 */
 	switch (rdata->rp_state) {
 	case RPORT_ST_INIT:
-		FC_RPORT_DBG(rport, "Received PLOGI, wwpn %llx state INIT "
+		FC_RPORT_DBG(rdata, "Received PLOGI, wwpn %llx state INIT "
 			     "- reject\n", (unsigned long long)wwpn);
 		reject = ELS_RJT_UNSUP;
 		break;
 	case RPORT_ST_PLOGI:
-		FC_RPORT_DBG(rport, "Received PLOGI in PLOGI state %d\n",
+		FC_RPORT_DBG(rdata, "Received PLOGI in PLOGI state %d\n",
 			     rdata->rp_state);
 		if (wwpn < lport->wwpn)
 			reject = ELS_RJT_INPROG;
 		break;
 	case RPORT_ST_PRLI:
 	case RPORT_ST_READY:
-		FC_RPORT_DBG(rport, "Received PLOGI in logged-in state %d "
+		FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d "
 			     "- ignored for now\n", rdata->rp_state);
 		/* XXX TBD - should reset */
 		break;
 	case RPORT_ST_DELETE:
 	default:
-		FC_RPORT_DBG(rport, "Received PLOGI in unexpected "
+		FC_RPORT_DBG(rdata, "Received PLOGI in unexpected "
 			     "state %d\n", rdata->rp_state);
 		fc_frame_free(fp);
 		return;
@@ -1074,24 +1062,24 @@ static void fc_rport_recv_plogi_req(struct fc_rport *rport,
 				       FC_TYPE_ELS, f_ctl, 0);
 			lport->tt.seq_send(lport, sp, fp);
 			if (rdata->rp_state == RPORT_ST_PLOGI)
-				fc_rport_enter_prli(rport);
+				fc_rport_enter_prli(rdata);
 		}
 	}
 }
 
 /**
  * fc_rport_recv_prli_req() - Handle incoming Process Login (PRLI) request
- * @rport: Fibre Channel remote port that initiated PRLI
+ * @rdata: private remote port data
  * @sp: current sequence in the PRLI exchange
  * @fp: PRLI request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
-static void fc_rport_recv_prli_req(struct fc_rport *rport,
+static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 				   struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_exch *ep;
 	struct fc_frame *fp;
@@ -1115,8 +1103,8 @@ static void fc_rport_recv_prli_req(struct fc_rport *rport,
 
 	fh = fc_frame_header_get(rx_fp);
 
-	FC_RPORT_DBG(rport, "Received PRLI request while in state %s\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Received PRLI request while in state %s\n",
+		     fc_rport_state(rdata));
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PRLI:
@@ -1220,7 +1208,7 @@ static void fc_rport_recv_prli_req(struct fc_rport *rport,
 		 */
 		switch (rdata->rp_state) {
 		case RPORT_ST_PRLI:
-			fc_rport_enter_ready(rport);
+			fc_rport_enter_ready(rdata);
 			break;
 		case RPORT_ST_READY:
 			break;
@@ -1233,17 +1221,17 @@ static void fc_rport_recv_prli_req(struct fc_rport *rport,
 
 /**
  * fc_rport_recv_prlo_req() - Handle incoming Process Logout (PRLO) request
- * @rport: Fibre Channel remote port that initiated PRLO
+ * @rdata: private remote port data
  * @sp: current sequence in the PRLO exchange
  * @fp: PRLO request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
-static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
+static void fc_rport_recv_prlo_req(struct fc_rport_priv *rdata,
+				   struct fc_seq *sp,
 				   struct fc_frame *fp)
 {
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	struct fc_frame_header *fh;
@@ -1251,8 +1239,8 @@ static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
 
 	fh = fc_frame_header_get(fp);
 
-	FC_RPORT_DBG(rport, "Received PRLO request while in state %s\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Received PRLO request while in state %s\n",
+		     fc_rport_state(rdata));
 
 	if (rdata->rp_state == RPORT_ST_DELETE) {
 		fc_frame_free(fp);
@@ -1268,24 +1256,24 @@ static void fc_rport_recv_prlo_req(struct fc_rport *rport, struct fc_seq *sp,
 
 /**
  * fc_rport_recv_logo_req() - Handle incoming Logout (LOGO) request
- * @rport: Fibre Channel remote port that initiated LOGO
+ * @rdata: private remote port data
  * @sp: current sequence in the LOGO exchange
  * @fp: LOGO request frame
  *
  * Locking Note: The rport lock is exected to be held before calling
  * this function.
  */
-static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
+static void fc_rport_recv_logo_req(struct fc_rport_priv *rdata,
+				   struct fc_seq *sp,
 				   struct fc_frame *fp)
 {
 	struct fc_frame_header *fh;
-	struct fc_rport_priv *rdata = rport->dd_data;
 	struct fc_lport *lport = rdata->local_port;
 
 	fh = fc_frame_header_get(fp);
 
-	FC_RPORT_DBG(rport, "Received LOGO request while in state %s\n",
-		     fc_rport_state(rport));
+	FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
+		     fc_rport_state(rdata));
 
 	if (rdata->rp_state == RPORT_ST_DELETE) {
 		fc_frame_free(fp);
@@ -1293,7 +1281,7 @@ static void fc_rport_recv_logo_req(struct fc_rport *rport, struct fc_seq *sp,
 	}
 
 	rdata->event = RPORT_EV_LOGO;
-	fc_rport_state_enter(rport, RPORT_ST_DELETE);
+	fc_rport_state_enter(rdata, RPORT_ST_DELETE);
 	queue_work(rport_event_queue, &rdata->event_work);
 
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index a0ff61c3e935..3ede1abad4b7 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -249,10 +249,13 @@ static inline void fc_scr_fill(struct fc_lport *lport, struct fc_frame *fp)
 /**
  * fc_els_fill - Fill in an ELS  request frame
  */
-static inline int fc_els_fill(struct fc_lport *lport, struct fc_rport *rport,
+static inline int fc_els_fill(struct fc_lport *lport,
+		       struct fc_rport_priv *rdata,
 		       struct fc_frame *fp, unsigned int op,
 		       enum fc_rctl *r_ctl, u32 *did, enum fc_fh_type *fh_type)
 {
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
+
 	switch (op) {
 	case ELS_PLOGI:
 		fc_plogi_fill(lport, fp, ELS_PLOGI);
@@ -272,7 +275,7 @@ static inline int fc_els_fill(struct fc_lport *lport, struct fc_rport *rport,
 		 * is port logo, therefore
 		 * set did to rport id.
 		 */
-		if (rport)
+		if (rdata)
 			*did = rport->port_id;
 		break;
 
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 2fdd8ac12b89..df57cb762dc2 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -75,10 +75,10 @@ do {								\
 				(lport)->host->host_no,			\
 				(port_id), ##args))
 
-#define FC_RPORT_DBG(rport, fmt, args...)				\
+#define FC_RPORT_DBG(rdata, fmt, args...)				\
 do {									\
-	struct fc_rport_priv *rdata = rport->dd_data;			\
 	struct fc_lport *lport = rdata->local_port;			\
+	struct fc_rport *rport = PRIV_TO_RPORT(rdata);			\
 	FC_RPORT_ID_DBG(lport, rport->port_id, fmt, ##args);		\
 } while (0)
 
@@ -185,8 +185,10 @@ enum fc_rport_event {
  */
 #define fc_rport_priv fc_rport_libfc_priv
 
+struct fc_rport_priv;
+
 struct fc_rport_operations {
-	void (*event_callback)(struct fc_lport *, struct fc_rport *,
+	void (*event_callback)(struct fc_lport *, struct fc_rport_priv *,
 			       enum fc_rport_event);
 };
 
@@ -422,7 +424,7 @@ struct libfc_function_template {
 	 * STATUS: OPTIONAL
 	 */
 	struct fc_seq *(*elsct_send)(struct fc_lport *lport,
-				     struct fc_rport *rport,
+				     struct fc_rport_priv *,
 				     struct fc_frame *fp,
 				     unsigned int op,
 				     void (*resp)(struct fc_seq *,
@@ -567,8 +569,8 @@ struct libfc_function_template {
 	/*
 	 * Create a remote port
 	 */
-	struct fc_rport *(*rport_create)(struct fc_lport *,
-					 struct fc_rport_identifiers *);
+	struct fc_rport_priv *(*rport_create)(struct fc_lport *,
+					      struct fc_rport_identifiers *);
 
 	/*
 	 * Initiates the RP state machine. It is called from the LP module.
@@ -581,7 +583,7 @@ struct libfc_function_template {
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	int (*rport_login)(struct fc_rport *rport);
+	int (*rport_login)(struct fc_rport_priv *);
 
 	/*
 	 * Logoff, and remove the rport from the transport if
@@ -589,7 +591,7 @@ struct libfc_function_template {
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	int (*rport_logoff)(struct fc_rport *rport);
+	int (*rport_logoff)(struct fc_rport_priv *);
 
 	/*
 	 * Recieve a request from a remote port.
@@ -597,14 +599,14 @@ struct libfc_function_template {
 	 * STATUS: OPTIONAL
 	 */
 	void (*rport_recv_req)(struct fc_seq *, struct fc_frame *,
-			       struct fc_rport *);
+			       struct fc_rport_priv *);
 
 	/*
 	 * lookup an rport by it's port ID.
 	 *
 	 * STATUS: OPTIONAL
 	 */
-	struct fc_rport *(*rport_lookup)(const struct fc_lport *, u32);
+	struct fc_rport_priv *(*rport_lookup)(const struct fc_lport *, u32);
 
 	/*
 	 * Send a fcp cmd from fsp pkt.
@@ -694,8 +696,8 @@ struct fc_lport {
 	/* Associations */
 	struct Scsi_Host	*host;
 	struct list_head	ema_list;
-	struct fc_rport		*dns_rp;
-	struct fc_rport		*ptp_rp;
+	struct fc_rport_priv	*dns_rp;
+	struct fc_rport_priv	*ptp_rp;
 	void			*scsi_priv;
 	struct fc_disc          disc;
 
-- 
cgit v1.2.3


From a46f327aa5caf2cce138e98ddd863b6cca0e71e2 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:00:55 -0700
Subject: [SCSI] libfc: change elsct to use FC_ID instead of rdata

tt.elsct_send is used by both FCP and by the rport state machine.
After further patches, these two modules will use different
structures for the remote port.

So, change elsct_send to use the FC_ID instead of the fc_rport_priv
as its argument.  It currently only uses the FC_ID anyway.

For CT requests the destination FC_ID is still implicitly 0xfffffc.
After further patches the did arg on CT requests will be used to
specify the FC_ID being inquired about for GPN_ID or other queries.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  |  2 +-
 drivers/scsi/libfc/fc_elsct.c | 11 ++++++-----
 drivers/scsi/libfc/fc_fcp.c   |  2 +-
 drivers/scsi/libfc/fc_lport.c | 12 ++++++------
 drivers/scsi/libfc/fc_rport.c |  8 ++++----
 include/scsi/fc_encode.h      | 26 +++++---------------------
 include/scsi/libfc.h          |  2 +-
 7 files changed, 24 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 448ffc388656..4b1f9faf639a 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -526,7 +526,7 @@ static void fc_disc_gpn_ft_req(struct fc_disc *disc)
 	if (!fp)
 		goto err;
 
-	if (lport->tt.elsct_send(lport, NULL, fp,
+	if (lport->tt.elsct_send(lport, 0, fp,
 				 FC_NS_GPN_FT,
 				 fc_disc_gpn_ft_resp,
 				 disc, lport->e_d_tov))
diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
index 2b8a3bbc0399..5e8b011af50e 100644
--- a/drivers/scsi/libfc/fc_elsct.c
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -32,7 +32,7 @@
  * fc_elsct_send - sends ELS/CT frame
  */
 static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
-				    struct fc_rport_priv *rdata,
+				    u32 did,
 				    struct fc_frame *fp,
 				    unsigned int op,
 				    void (*resp)(struct fc_seq *,
@@ -41,16 +41,17 @@ static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
 				    void *arg, u32 timer_msec)
 {
 	enum fc_rctl r_ctl;
-	u32 did = FC_FID_NONE;
 	enum fc_fh_type fh_type;
 	int rc;
 
 	/* ELS requests */
 	if ((op >= ELS_LS_RJT) && (op <= ELS_AUTH_ELS))
-		rc = fc_els_fill(lport, rdata, fp, op, &r_ctl, &did, &fh_type);
-	else
+		rc = fc_els_fill(lport, did, fp, op, &r_ctl, &fh_type);
+	else {
 		/* CT requests */
-		rc = fc_ct_fill(lport, fp, op, &r_ctl, &did, &fh_type);
+		rc = fc_ct_fill(lport, fp, op, &r_ctl, &fh_type);
+		did = FC_FID_DIR_SERV;
+	}
 
 	if (rc)
 		return NULL;
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index a622096eb315..59a4408b27b5 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1308,7 +1308,7 @@ static void fc_fcp_rec(struct fc_fcp_pkt *fsp)
 	fc_fill_fc_hdr(fp, FC_RCTL_ELS_REQ, rport->port_id,
 		       fc_host_port_id(rp->local_port->host), FC_TYPE_ELS,
 		       FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT, 0);
-	if (lp->tt.elsct_send(lp, rport->dd_data, fp, ELS_REC, fc_fcp_rec_resp,
+	if (lp->tt.elsct_send(lp, rport->port_id, fp, ELS_REC, fc_fcp_rec_resp,
 			      fsp, jiffies_to_msecs(FC_SCSI_REC_TOV))) {
 		fc_fcp_pkt_hold(fsp);		/* hold while REC outstanding */
 		return;
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 3c15abd35ffa..aa605d2012e0 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1217,7 +1217,7 @@ static void fc_lport_enter_scr(struct fc_lport *lport)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_SCR,
+	if (!lport->tt.elsct_send(lport, FC_FID_FCTRL, fp, ELS_SCR,
 				  fc_lport_scr_resp, lport, lport->e_d_tov))
 		fc_lport_error(lport, fp);
 }
@@ -1258,7 +1258,7 @@ static void fc_lport_enter_rft_id(struct fc_lport *lport)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, NULL, fp, FC_NS_RFT_ID,
+	if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RFT_ID,
 				  fc_lport_rft_id_resp,
 				  lport, lport->e_d_tov))
 		fc_lport_error(lport, fp);
@@ -1287,7 +1287,7 @@ static void fc_lport_enter_rpn_id(struct fc_lport *lport)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, NULL, fp, FC_NS_RPN_ID,
+	if (!lport->tt.elsct_send(lport, FC_FID_DIR_SERV, fp, FC_NS_RPN_ID,
 				  fc_lport_rpn_id_resp,
 				  lport, lport->e_d_tov))
 		fc_lport_error(lport, fp);
@@ -1443,8 +1443,8 @@ static void fc_lport_enter_logo(struct fc_lport *lport)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_LOGO, fc_lport_logo_resp,
-				  lport, lport->e_d_tov))
+	if (!lport->tt.elsct_send(lport, FC_FID_FLOGI, fp, ELS_LOGO,
+				  fc_lport_logo_resp, lport, lport->e_d_tov))
 		fc_lport_error(lport, fp);
 }
 
@@ -1567,7 +1567,7 @@ void fc_lport_enter_flogi(struct fc_lport *lport)
 	if (!fp)
 		return fc_lport_error(lport, fp);
 
-	if (!lport->tt.elsct_send(lport, NULL, fp, ELS_FLOGI,
+	if (!lport->tt.elsct_send(lport, FC_FID_FLOGI, fp, ELS_FLOGI,
 				  fc_lport_flogi_resp, lport, lport->e_d_tov))
 		fc_lport_error(lport, fp);
 }
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 13d3d758fb0e..20371b445bb1 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -593,7 +593,7 @@ static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 	}
 	rdata->e_d_tov = lport->e_d_tov;
 
-	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_PLOGI,
+	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_PLOGI,
 				  fc_rport_plogi_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
@@ -747,7 +747,7 @@ static void fc_rport_enter_prli(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_PRLI,
+	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_PRLI,
 				  fc_rport_prli_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
@@ -845,7 +845,7 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_RTV,
+	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_RTV,
 				     fc_rport_rtv_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
@@ -876,7 +876,7 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rdata, fp, ELS_LOGO,
+	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_LOGO,
 				  fc_rport_logo_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index 3ede1abad4b7..24bf764f9884 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -79,8 +79,9 @@ static inline struct fc_ct_req *fc_ct_hdr_fill(const struct fc_frame *fp,
 /**
  * fc_ct_fill - Fill in a name service request frame
  */
-static inline int fc_ct_fill(struct fc_lport *lport, struct fc_frame *fp,
-		      unsigned int op, enum fc_rctl *r_ctl, u32 *did,
+static inline int fc_ct_fill(struct fc_lport *lport,
+		      struct fc_frame *fp,
+		      unsigned int op, enum fc_rctl *r_ctl,
 		      enum fc_fh_type *fh_type)
 {
 	struct fc_ct_req *ct;
@@ -110,7 +111,6 @@ static inline int fc_ct_fill(struct fc_lport *lport, struct fc_frame *fp,
 		return -EINVAL;
 	}
 	*r_ctl = FC_RCTL_DD_UNSOL_CTL;
-	*did = FC_FID_DIR_SERV;
 	*fh_type = FC_TYPE_CT;
 	return 0;
 }
@@ -250,53 +250,37 @@ static inline void fc_scr_fill(struct fc_lport *lport, struct fc_frame *fp)
  * fc_els_fill - Fill in an ELS  request frame
  */
 static inline int fc_els_fill(struct fc_lport *lport,
-		       struct fc_rport_priv *rdata,
+		       u32 did,
 		       struct fc_frame *fp, unsigned int op,
-		       enum fc_rctl *r_ctl, u32 *did, enum fc_fh_type *fh_type)
+		       enum fc_rctl *r_ctl, enum fc_fh_type *fh_type)
 {
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
-
 	switch (op) {
 	case ELS_PLOGI:
 		fc_plogi_fill(lport, fp, ELS_PLOGI);
-		*did = rport->port_id;
 		break;
 
 	case ELS_FLOGI:
 		fc_flogi_fill(lport, fp);
-		*did = FC_FID_FLOGI;
 		break;
 
 	case ELS_LOGO:
 		fc_logo_fill(lport, fp);
-		*did = FC_FID_FLOGI;
-		/*
-		 * if rport is valid then it
-		 * is port logo, therefore
-		 * set did to rport id.
-		 */
-		if (rdata)
-			*did = rport->port_id;
 		break;
 
 	case ELS_RTV:
 		fc_rtv_fill(lport, fp);
-		*did = rport->port_id;
 		break;
 
 	case ELS_REC:
 		fc_rec_fill(lport, fp);
-		*did = rport->port_id;
 		break;
 
 	case ELS_PRLI:
 		fc_prli_fill(lport, fp);
-		*did = rport->port_id;
 		break;
 
 	case ELS_SCR:
 		fc_scr_fill(lport, fp);
-		*did = FC_FID_FCTRL;
 		break;
 
 	default:
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index df57cb762dc2..2473167464c2 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -424,7 +424,7 @@ struct libfc_function_template {
 	 * STATUS: OPTIONAL
 	 */
 	struct fc_seq *(*elsct_send)(struct fc_lport *lport,
-				     struct fc_rport_priv *,
+				     u32 did,
 				     struct fc_frame *fp,
 				     unsigned int op,
 				     void (*resp)(struct fc_seq *,
-- 
cgit v1.2.3


From f211fa514a07326c0f9364c0e6ed17e38860172f Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:01:01 -0700
Subject: [SCSI] libfc: make rport structure optional

Allow a struct fc_rport_priv to have no fc_rport associated with it.
This sets up to remove the need for "rogue" rports.

Add a few fields to fc_rport_priv that are needed before the fc_rport
is created.  These are the ids, maxframe_size, classes, and rport pointer.

Remove the macro PRIV_TO_RPORT().  Just use rdata->rport where appropriate.

To take the place of the get_device()/put_device ops that were used to
hold both the rport and rdata, add a reference count to rdata structures
using kref.  When kref_get decrements the refcount to zero, a new template
function releasing the rdata should be called.  This will take care of
freeing the rdata and releasing the hold on the rport (for now).  After
subsequent patches make the rport truly optional, this release function
will simply free the rdata.

Remove the simple inline function fc_rport_set_name(), which becomes
semanticly ambiguous otherwise.  The caller will set the port_name and
node_name in the rdata->Ids, which will later be copied to the rport
when it its created.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  |  54 ++++++++--------------
 drivers/scsi/libfc/fc_lport.c |  14 +++---
 drivers/scsi/libfc/fc_rport.c | 102 +++++++++++++++++++++++-------------------
 include/scsi/libfc.h          |  29 ++++++------
 4 files changed, 98 insertions(+), 101 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 4b1f9faf639a..5f839b625e50 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -47,7 +47,7 @@
 
 static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
-static int fc_disc_new_target(struct fc_disc *, struct fc_rport *,
+static int fc_disc_new_target(struct fc_disc *, struct fc_rport_priv *,
 			      struct fc_rport_identifiers *);
 static void fc_disc_done(struct fc_disc *);
 static void fc_disc_timeout(struct work_struct *);
@@ -63,12 +63,10 @@ struct fc_rport_priv *fc_disc_lookup_rport(const struct fc_lport *lport,
 					   u32 port_id)
 {
 	const struct fc_disc *disc = &lport->disc;
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
 
 	list_for_each_entry(rdata, &disc->rports, peers) {
-		rport = PRIV_TO_RPORT(rdata);
-		if (rport->port_id == port_id)
+		if (rdata->ids.port_id == port_id)
 			return rdata;
 	}
 	return NULL;
@@ -115,10 +113,9 @@ static void fc_disc_rport_callback(struct fc_lport *lport,
 				   enum fc_rport_event event)
 {
 	struct fc_disc *disc = &lport->disc;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
 	FC_DISC_DBG(disc, "Received a %d event for port (%6x)\n", event,
-		    rport->port_id);
+		    rdata->ids.port_id);
 
 	switch (event) {
 	case RPORT_EV_CREATED:
@@ -320,8 +317,6 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 			  struct fc_lport *lport)
 {
 	struct fc_rport_priv *rdata;
-	struct fc_rport *rport;
-	struct fc_rport_identifiers ids;
 	struct fc_disc *disc = &lport->disc;
 
 	/*
@@ -349,18 +344,12 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 	 */
 	rdata = disc->lport->ptp_rp;
 	if (rdata) {
-		rport = PRIV_TO_RPORT(rdata);
-		ids.port_id = rport->port_id;
-		ids.port_name = rport->port_name;
-		ids.node_name = rport->node_name;
-		ids.roles = FC_RPORT_ROLE_UNKNOWN;
-		get_device(&rport->dev);
-
-		if (!fc_disc_new_target(disc, rport, &ids)) {
+		kref_get(&rdata->kref);
+		if (!fc_disc_new_target(disc, rdata, &rdata->ids)) {
 			disc->event = DISC_EV_SUCCESS;
 			fc_disc_done(disc);
 		}
-		put_device(&rport->dev);
+		kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 	} else {
 		fc_disc_gpn_ft_req(disc);	/* get ports by FC-4 type */
 	}
@@ -375,28 +364,27 @@ static struct fc_rport_operations fc_disc_rport_ops = {
 /**
  * fc_disc_new_target() - Handle new target found by discovery
  * @lport: FC local port
- * @rport: The previous FC remote port (NULL if new remote port)
+ * @rdata: The previous FC remote port priv (NULL if new remote port)
  * @ids: Identifiers for the new FC remote port
  *
  * Locking Note: This function expects that the disc_mutex is locked
  *		 before it is called.
  */
 static int fc_disc_new_target(struct fc_disc *disc,
-			      struct fc_rport *rport,
+			      struct fc_rport_priv *rdata,
 			      struct fc_rport_identifiers *ids)
 {
 	struct fc_lport *lport = disc->lport;
-	struct fc_rport_priv *rdata;
 	int error = 0;
 
-	if (rport && ids->port_name) {
-		if (rport->port_name == -1) {
+	if (rdata && ids->port_name) {
+		if (rdata->ids.port_name == -1) {
 			/*
 			 * Set WWN and fall through to notify of create.
 			 */
-			fc_rport_set_name(rport, ids->port_name,
-					  rport->node_name);
-		} else if (rport->port_name != ids->port_name) {
+			rdata->ids.port_name = ids->port_name;
+			rdata->ids.node_name = ids->node_name;
+		} else if (rdata->ids.port_name != ids->port_name) {
 			/*
 			 * This is a new port with the same FCID as
 			 * a previously-discovered port.  Presumably the old
@@ -404,27 +392,23 @@ static int fc_disc_new_target(struct fc_disc *disc,
 			 * assigned the same FCID.  This should be rare.
 			 * Delete the old one and fall thru to re-create.
 			 */
-			rdata = rport->dd_data;
 			list_del(&rdata->peers);
 			lport->tt.rport_logoff(rdata);
-			rport = NULL;
+			rdata = NULL;
 		}
 	}
 	if (((ids->port_name != -1) || (ids->port_id != -1)) &&
 	    ids->port_id != fc_host_port_id(lport->host) &&
 	    ids->port_name != lport->wwpn) {
-		if (!rport) {
+		if (!rdata) {
 			rdata = lport->tt.rport_lookup(lport, ids->port_id);
-			if (!rport) {
+			if (!rdata) {
 				rdata = lport->tt.rport_create(lport, ids);
+				if (!rdata)
+					error = -ENOMEM;
 			}
-			if (!rdata)
-				error = -ENOMEM;
-			else
-				rport = PRIV_TO_RPORT(rdata);
 		}
-		if (rport) {
-			rdata = rport->dd_data;
+		if (rdata) {
 			rdata->ops = &fc_disc_rport_ops;
 			rdata->rp_state = RPORT_ST_INIT;
 			list_add_tail(&rdata->peers, &disc->rogue_rports);
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index aa605d2012e0..a7fe6b8d38b8 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -143,14 +143,12 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 				    struct fc_rport_priv *rdata,
 				    enum fc_rport_event event)
 {
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
-
 	FC_LPORT_DBG(lport, "Received a %d event for port (%6x)\n", event,
-		     rport->port_id);
+		     rdata->ids.port_id);
 
 	switch (event) {
 	case RPORT_EV_CREATED:
-		if (rport->port_id == FC_FID_DIR_SERV) {
+		if (rdata->ids.port_id == FC_FID_DIR_SERV) {
 			mutex_lock(&lport->lp_mutex);
 			if (lport->state == LPORT_ST_DNS) {
 				lport->dns_rp = rdata;
@@ -160,7 +158,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 					     "on port (%6x) for the directory "
 					     "server, but the lport is not "
 					     "in the DNS state, it's in the "
-					     "%d state", rport->port_id,
+					     "%d state", rdata->ids.port_id,
 					     lport->state);
 				lport->tt.rport_logoff(rdata);
 			}
@@ -168,12 +166,12 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 		} else
 			FC_LPORT_DBG(lport, "Received an event for port (%6x) "
 				     "which is not the directory server\n",
-				     rport->port_id);
+				     rdata->ids.port_id);
 		break;
 	case RPORT_EV_LOGO:
 	case RPORT_EV_FAILED:
 	case RPORT_EV_STOP:
-		if (rport->port_id == FC_FID_DIR_SERV) {
+		if (rdata->ids.port_id == FC_FID_DIR_SERV) {
 			mutex_lock(&lport->lp_mutex);
 			lport->dns_rp = NULL;
 			mutex_unlock(&lport->lp_mutex);
@@ -181,7 +179,7 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 		} else
 			FC_LPORT_DBG(lport, "Received an event for port (%6x) "
 				     "which is not the directory server\n",
-				     rport->port_id);
+				     rdata->ids.port_id);
 		break;
 	case RPORT_EV_NONE:
 		break;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 20371b445bb1..69f6e588d37b 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -120,7 +120,10 @@ struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
 	device_initialize(&rport->dev);
 	rport->dev.release = fc_rport_rogue_destroy;
 
+	rdata->ids = *ids;
+	kref_init(&rdata->kref);
 	mutex_init(&rdata->rp_mutex);
+	rdata->rport = rport;
 	rdata->local_port = lport;
 	rdata->trans_state = FC_PORTSTATE_ROGUE;
 	rdata->rp_state = RPORT_ST_INIT;
@@ -129,6 +132,7 @@ struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
 	rdata->ops = NULL;
 	rdata->e_d_tov = lport->e_d_tov;
 	rdata->r_a_tov = lport->r_a_tov;
+	rdata->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
 	INIT_WORK(&rdata->event_work, fc_rport_work);
 	/*
@@ -140,6 +144,20 @@ struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
 	return rdata;
 }
 
+/**
+ * fc_rport_destroy() - free a remote port after last reference is released.
+ * @kref: pointer to kref inside struct fc_rport_priv
+ */
+static void fc_rport_destroy(struct kref *kref)
+{
+	struct fc_rport_priv *rdata;
+	struct fc_rport *rport;
+
+	rdata = container_of(kref, struct fc_rport_priv, kref);
+	rport = rdata->rport;
+	put_device(&rport->dev);
+}
+
 /**
  * fc_rport_state() - return a string for the state the rport is in
  * @rdata: remote port private data
@@ -215,22 +233,19 @@ static void fc_rport_work(struct work_struct *work)
 	enum fc_rport_trans_state trans_state;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_rport_operations *rport_ops;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
+	struct fc_rport *rport;
 
 	mutex_lock(&rdata->rp_mutex);
 	event = rdata->event;
 	rport_ops = rdata->ops;
+	rport = rdata->rport;
 
 	if (event == RPORT_EV_CREATED) {
 		struct fc_rport *new_rport;
 		struct fc_rport_priv *new_rdata;
 		struct fc_rport_identifiers ids;
 
-		ids.port_id = rport->port_id;
-		ids.roles = rport->roles;
-		ids.port_name = rport->port_name;
-		ids.node_name = rport->node_name;
-
+		ids = rdata->ids;
 		rdata->event = RPORT_EV_NONE;
 		mutex_unlock(&rdata->rp_mutex);
 
@@ -240,15 +255,20 @@ static void fc_rport_work(struct work_struct *work)
 			 * Switch from the rogue rport to the rport
 			 * returned by the FC class.
 			 */
-			new_rport->maxframe_size = rport->maxframe_size;
+			new_rport->maxframe_size = rdata->maxframe_size;
 
 			new_rdata = new_rport->dd_data;
+			new_rdata->rport = new_rport;
+			new_rdata->ids = ids;
 			new_rdata->e_d_tov = rdata->e_d_tov;
 			new_rdata->r_a_tov = rdata->r_a_tov;
 			new_rdata->ops = rdata->ops;
 			new_rdata->local_port = rdata->local_port;
 			new_rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
 			new_rdata->trans_state = FC_PORTSTATE_REAL;
+			new_rdata->maxframe_size = rdata->maxframe_size;
+			new_rdata->supported_classes = rdata->supported_classes;
+			kref_init(&new_rdata->kref);
 			mutex_init(&new_rdata->rp_mutex);
 			INIT_DELAYED_WORK(&new_rdata->retry_work,
 					  fc_rport_timeout);
@@ -261,12 +281,11 @@ static void fc_rport_work(struct work_struct *work)
 			       " memory for rport (%6x)\n", ids.port_id);
 			event = RPORT_EV_FAILED;
 		}
-		if (rport->port_id != FC_FID_DIR_SERV)
+		if (rdata->ids.port_id != FC_FID_DIR_SERV)
 			if (rport_ops->event_callback)
 				rport_ops->event_callback(lport, rdata,
 							  RPORT_EV_FAILED);
-		put_device(&rport->dev);
-		rport = new_rport;
+		kref_put(&rdata->kref, lport->tt.rport_destroy);
 		rdata = new_rport->dd_data;
 		if (rport_ops->event_callback)
 			rport_ops->event_callback(lport, rdata, event);
@@ -279,7 +298,7 @@ static void fc_rport_work(struct work_struct *work)
 			rport_ops->event_callback(lport, rdata, event);
 		cancel_delayed_work_sync(&rdata->retry_work);
 		if (trans_state == FC_PORTSTATE_ROGUE)
-			put_device(&rport->dev);
+			kref_put(&rdata->kref, lport->tt.rport_destroy);
 		else {
 			port_id = rport->port_id;
 			fc_remote_port_delete(rport);
@@ -505,7 +524,6 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 				void *rdata_arg)
 {
 	struct fc_rport_priv *rdata = rdata_arg;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_els_flogi *plp = NULL;
 	unsigned int tov;
@@ -533,8 +551,8 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	op = fc_frame_payload_op(fp);
 	if (op == ELS_LS_ACC &&
 	    (plp = fc_frame_payload_get(fp, sizeof(*plp))) != NULL) {
-		rport->port_name = get_unaligned_be64(&plp->fl_wwpn);
-		rport->node_name = get_unaligned_be64(&plp->fl_wwnn);
+		rdata->ids.port_name = get_unaligned_be64(&plp->fl_wwpn);
+		rdata->ids.node_name = get_unaligned_be64(&plp->fl_wwnn);
 
 		tov = ntohl(plp->fl_csp.sp_e_d_tov);
 		if (ntohs(plp->fl_csp.sp_features) & FC_SP_FT_EDTR)
@@ -546,14 +564,13 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 		if (cssp_seq < csp_seq)
 			csp_seq = cssp_seq;
 		rdata->max_seq = csp_seq;
-		rport->maxframe_size =
-			fc_plogi_get_maxframe(plp, lport->mfs);
+		rdata->maxframe_size = fc_plogi_get_maxframe(plp, lport->mfs);
 
 		/*
 		 * If the rport is one of the well known addresses
 		 * we skip PRLI and RTV and go straight to READY.
 		 */
-		if (rport->port_id >= FC_FID_DOM_MGR)
+		if (rdata->ids.port_id >= FC_FID_DOM_MGR)
 			fc_rport_enter_ready(rdata);
 		else
 			fc_rport_enter_prli(rdata);
@@ -564,7 +581,7 @@ out:
 	fc_frame_free(fp);
 err:
 	mutex_unlock(&rdata->rp_mutex);
-	put_device(&rport->dev);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
 /**
@@ -577,7 +594,6 @@ err:
 static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 {
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_frame *fp;
 
 	FC_RPORT_DBG(rdata, "Port entered PLOGI state from %s state\n",
@@ -585,7 +601,7 @@ static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 
 	fc_rport_state_enter(rdata, RPORT_ST_PLOGI);
 
-	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
+	rdata->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	fp = fc_frame_alloc(lport, sizeof(struct fc_els_flogi));
 	if (!fp) {
 		fc_rport_error_retry(rdata, fp);
@@ -593,11 +609,11 @@ static void fc_rport_enter_plogi(struct fc_rport_priv *rdata)
 	}
 	rdata->e_d_tov = lport->e_d_tov;
 
-	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_PLOGI,
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_PLOGI,
 				  fc_rport_plogi_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
-		get_device(&rport->dev);
+		kref_get(&rdata->kref);
 }
 
 /**
@@ -614,7 +630,6 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 			       void *rdata_arg)
 {
 	struct fc_rport_priv *rdata = rdata_arg;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct {
 		struct fc_els_prli prli;
 		struct fc_els_spp spp;
@@ -649,13 +664,13 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 				rdata->flags |= FC_RP_FLAGS_RETRY;
 		}
 
-		rport->supported_classes = FC_COS_CLASS3;
+		rdata->supported_classes = FC_COS_CLASS3;
 		if (fcp_parm & FCP_SPPF_INIT_FCN)
 			roles |= FC_RPORT_ROLE_FCP_INITIATOR;
 		if (fcp_parm & FCP_SPPF_TARG_FCN)
 			roles |= FC_RPORT_ROLE_FCP_TARGET;
 
-		rport->roles = roles;
+		rdata->ids.roles = roles;
 		fc_rport_enter_rtv(rdata);
 
 	} else {
@@ -667,7 +682,7 @@ out:
 	fc_frame_free(fp);
 err:
 	mutex_unlock(&rdata->rp_mutex);
-	put_device(&rport->dev);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
 /**
@@ -684,7 +699,6 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 			       void *rdata_arg)
 {
 	struct fc_rport_priv *rdata = rdata_arg;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
@@ -716,7 +730,7 @@ out:
 	fc_frame_free(fp);
 err:
 	mutex_unlock(&rdata->rp_mutex);
-	put_device(&rport->dev);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
 /**
@@ -728,7 +742,6 @@ err:
  */
 static void fc_rport_enter_prli(struct fc_rport_priv *rdata)
 {
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct {
 		struct fc_els_prli prli;
@@ -747,11 +760,11 @@ static void fc_rport_enter_prli(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_PRLI,
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_PRLI,
 				  fc_rport_prli_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
-		get_device(&rport->dev);
+		kref_get(&rdata->kref);
 }
 
 /**
@@ -770,7 +783,6 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 			      void *rdata_arg)
 {
 	struct fc_rport_priv *rdata = rdata_arg;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	u8 op;
 
 	mutex_lock(&rdata->rp_mutex);
@@ -818,7 +830,7 @@ out:
 	fc_frame_free(fp);
 err:
 	mutex_unlock(&rdata->rp_mutex);
-	put_device(&rport->dev);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 }
 
 /**
@@ -832,7 +844,6 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 {
 	struct fc_frame *fp;
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 
 	FC_RPORT_DBG(rdata, "Port entered RTV state from %s state\n",
 		     fc_rport_state(rdata));
@@ -845,11 +856,11 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_RTV,
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_RTV,
 				     fc_rport_rtv_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
-		get_device(&rport->dev);
+		kref_get(&rdata->kref);
 }
 
 /**
@@ -862,7 +873,6 @@ static void fc_rport_enter_rtv(struct fc_rport_priv *rdata)
 static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 {
 	struct fc_lport *lport = rdata->local_port;
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_frame *fp;
 
 	FC_RPORT_DBG(rdata, "Port entered LOGO state from %s state\n",
@@ -876,11 +886,11 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 		return;
 	}
 
-	if (!lport->tt.elsct_send(lport, rport->port_id, fp, ELS_LOGO,
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_LOGO,
 				  fc_rport_logo_resp, rdata, lport->e_d_tov))
 		fc_rport_error_retry(rdata, fp);
 	else
-		get_device(&rport->dev);
+		kref_get(&rdata->kref);
 }
 
 
@@ -956,7 +966,6 @@ void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
 static void fc_rport_recv_plogi_req(struct fc_rport_priv *rdata,
 				    struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_frame *fp = rx_fp;
 	struct fc_exch *ep;
@@ -1041,12 +1050,13 @@ static void fc_rport_recv_plogi_req(struct fc_rport_priv *rdata,
 		} else {
 			sp = lport->tt.seq_start_next(sp);
 			WARN_ON(!sp);
-			fc_rport_set_name(rport, wwpn, wwnn);
+			rdata->ids.port_name = wwpn;
+			rdata->ids.node_name = wwnn;
 
 			/*
 			 * Get session payload size from incoming PLOGI.
 			 */
-			rport->maxframe_size =
+			rdata->maxframe_size =
 				fc_plogi_get_maxframe(pl, lport->mfs);
 			fc_frame_free(rx_fp);
 			fc_plogi_fill(lport, fp, ELS_LS_ACC);
@@ -1079,7 +1089,6 @@ static void fc_rport_recv_plogi_req(struct fc_rport_priv *rdata,
 static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 				   struct fc_seq *sp, struct fc_frame *rx_fp)
 {
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_exch *ep;
 	struct fc_frame *fp;
@@ -1173,12 +1182,12 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 				fcp_parm = ntohl(rspp->spp_params);
 				if (fcp_parm * FCP_SPPF_RETRY)
 					rdata->flags |= FC_RP_FLAGS_RETRY;
-				rport->supported_classes = FC_COS_CLASS3;
+				rdata->supported_classes = FC_COS_CLASS3;
 				if (fcp_parm & FCP_SPPF_INIT_FCN)
 					roles |= FC_RPORT_ROLE_FCP_INITIATOR;
 				if (fcp_parm & FCP_SPPF_TARG_FCN)
 					roles |= FC_RPORT_ROLE_FCP_TARGET;
-				rport->roles = roles;
+				rdata->ids.roles = roles;
 
 				spp->spp_params =
 					htonl(lport->service_params);
@@ -1310,6 +1319,9 @@ int fc_rport_init(struct fc_lport *lport)
 	if (!lport->tt.rport_flush_queue)
 		lport->tt.rport_flush_queue = fc_rport_flush_queue;
 
+	if (!lport->tt.rport_destroy)
+		lport->tt.rport_destroy = fc_rport_destroy;
+
 	return 0;
 }
 EXPORT_SYMBOL(fc_rport_init);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 2473167464c2..a94d216d2207 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -76,11 +76,7 @@ do {								\
 				(port_id), ##args))
 
 #define FC_RPORT_DBG(rdata, fmt, args...)				\
-do {									\
-	struct fc_lport *lport = rdata->local_port;			\
-	struct fc_rport *rport = PRIV_TO_RPORT(rdata);			\
-	FC_RPORT_ID_DBG(lport, rport->port_id, fmt, ##args);		\
-} while (0)
+	FC_RPORT_ID_DBG((rdata)->local_port, (rdata)->ids.port_id, fmt, ##args)
 
 #define FC_FCP_DBG(pkt, fmt, args...)					\
 	FC_CHECK_LOGGING(FC_FCP_LOGGING,				\
@@ -195,9 +191,13 @@ struct fc_rport_operations {
 /**
  * struct fc_rport_libfc_priv - libfc internal information about a remote port
  * @local_port: Fibre Channel host port instance
+ * @rport: transport remote port
+ * @kref: reference counter
  * @rp_state: state tracks progress of PLOGI, PRLI, and RTV exchanges
+ * @ids: remote port identifiers and roles
  * @flags: REC and RETRY supported flags
  * @max_seq: maximum number of concurrent sequences
+ * @maxframe_size: maximum frame size
  * @retries: retry count in current state
  * @e_d_tov: error detect timeout value (in msec)
  * @r_a_tov: resource allocation timeout value (in msec)
@@ -207,11 +207,15 @@ struct fc_rport_operations {
  */
 struct fc_rport_libfc_priv {
 	struct fc_lport		   *local_port;
+	struct fc_rport		   *rport;
+	struct kref		   kref;
 	enum fc_rport_state        rp_state;
+	struct fc_rport_identifiers ids;
 	u16			   flags;
 	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
 	#define FC_RP_FLAGS_RETRY		(1 << 1)
 	u16		           max_seq;
+	u16			   maxframe_size;
 	unsigned int	           retries;
 	unsigned int	           e_d_tov;
 	unsigned int	           r_a_tov;
@@ -222,19 +226,12 @@ struct fc_rport_libfc_priv {
 	struct fc_rport_operations *ops;
 	struct list_head           peers;
 	struct work_struct         event_work;
+	u32			   supported_classes;
 };
 
-#define PRIV_TO_RPORT(x)						\
-	((struct fc_rport *)((void *)(x) - sizeof(struct fc_rport)))
 #define RPORT_TO_PRIV(x)						\
 	((struct fc_rport_libfc_priv *)((void *)(x) + sizeof(struct fc_rport)))
 
-static inline void fc_rport_set_name(struct fc_rport *rport, u64 wwpn, u64 wwnn)
-{
-	rport->node_name = wwnn;
-	rport->port_name = wwpn;
-}
-
 /*
  * fcoe stats structure
  */
@@ -608,6 +605,12 @@ struct libfc_function_template {
 	 */
 	struct fc_rport_priv *(*rport_lookup)(const struct fc_lport *, u32);
 
+	/*
+	 * Destroy an rport after final kref_put().
+	 * The argument is a pointer to the kref inside the fc_rport_priv.
+	 */
+	void (*rport_destroy)(struct kref *);
+
 	/*
 	 * Send a fcp cmd from fsp pkt.
 	 * Called with the SCSI host lock unlocked and irqs disabled.
-- 
cgit v1.2.3


From 4c0f62b5679321b2e5572cf541ffb9f7b344d47c Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:01:12 -0700
Subject: [SCSI] libfc: rename rport event CREATED to READY

Remote ports will become READY more than once after
ADISC is implemented in a later patch.

The event callback that has been called "CREATED" will mean "READY".
Rename it now in preparation for those changes.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 2 +-
 drivers/scsi/libfc/fc_lport.c | 4 ++--
 drivers/scsi/libfc/fc_rport.c | 4 ++--
 include/scsi/libfc.h          | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 5f839b625e50..e5e5b260a572 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -118,7 +118,7 @@ static void fc_disc_rport_callback(struct fc_lport *lport,
 		    rdata->ids.port_id);
 
 	switch (event) {
-	case RPORT_EV_CREATED:
+	case RPORT_EV_READY:
 		if (disc) {
 			mutex_lock(&disc->disc_mutex);
 			list_add_tail(&rdata->peers, &disc->rports);
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index a7fe6b8d38b8..016f771ebe69 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -147,14 +147,14 @@ static void fc_lport_rport_callback(struct fc_lport *lport,
 		     rdata->ids.port_id);
 
 	switch (event) {
-	case RPORT_EV_CREATED:
+	case RPORT_EV_READY:
 		if (rdata->ids.port_id == FC_FID_DIR_SERV) {
 			mutex_lock(&lport->lp_mutex);
 			if (lport->state == LPORT_ST_DNS) {
 				lport->dns_rp = rdata;
 				fc_lport_enter_rpn_id(lport);
 			} else {
-				FC_LPORT_DBG(lport, "Received an CREATED event "
+				FC_LPORT_DBG(lport, "Received an READY event "
 					     "on port (%6x) for the directory "
 					     "server, but the lport is not "
 					     "in the DNS state, it's in the "
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index d0a45425cc3f..50959ba0a9a0 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -244,7 +244,7 @@ static void fc_rport_work(struct work_struct *work)
 	rport = rdata->rport;
 
 	switch (event) {
-	case RPORT_EV_CREATED:
+	case RPORT_EV_READY:
 		ids = rdata->ids;
 		rdata->event = RPORT_EV_NONE;
 		mutex_unlock(&rdata->rp_mutex);
@@ -413,7 +413,7 @@ static void fc_rport_enter_ready(struct fc_rport_priv *rdata)
 
 	if (rdata->event == RPORT_EV_NONE)
 		queue_work(rport_event_queue, &rdata->event_work);
-	rdata->event = RPORT_EV_CREATED;
+	rdata->event = RPORT_EV_READY;
 }
 
 /**
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index a94d216d2207..d324df8c36e6 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -167,7 +167,7 @@ struct fc_disc_port {
 
 enum fc_rport_event {
 	RPORT_EV_NONE = 0,
-	RPORT_EV_CREATED,
+	RPORT_EV_READY,
 	RPORT_EV_FAILED,
 	RPORT_EV_STOP,
 	RPORT_EV_LOGO
-- 
cgit v1.2.3


From 9e9d0452fe12115b1c1883c0d4d2ee509079791b Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:01:18 -0700
Subject: [SCSI] libfc: don't create dummy (rogue) remote ports

Don't create a "dummy" remote port to go with fc_rport_priv.

Make the rport truly optional by allocating fc_rport_priv separately
and not requiring a dummy rport to be there if we haven't yet done
fc_remote_port_add().

The fc_rport_libfc_priv remains as a structure attached to the
rport for I/O purposes.

Be sure to hold references on rdata when the lock is dropped in
fc_rport_work().

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  |  40 +++--------
 drivers/scsi/libfc/fc_rport.c | 154 +++++++++++++++++-------------------------
 include/scsi/libfc.h          |  40 ++++++-----
 3 files changed, 92 insertions(+), 142 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index e5e5b260a572..bbea41e50a57 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -66,7 +66,8 @@ struct fc_rport_priv *fc_disc_lookup_rport(const struct fc_lport *lport,
 	struct fc_rport_priv *rdata;
 
 	list_for_each_entry(rdata, &disc->rports, peers) {
-		if (rdata->ids.port_id == port_id)
+		if (rdata->ids.port_id == port_id &&
+		    rdata->rp_state != RPORT_ST_DELETE)
 			return rdata;
 	}
 	return NULL;
@@ -87,15 +88,8 @@ void fc_disc_stop_rports(struct fc_disc *disc)
 	lport = disc->lport;
 
 	mutex_lock(&disc->disc_mutex);
-	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
-		list_del(&rdata->peers);
+	list_for_each_entry_safe(rdata, next, &disc->rports, peers)
 		lport->tt.rport_logoff(rdata);
-	}
-
-	list_for_each_entry_safe(rdata, next, &disc->rogue_rports, peers) {
-		lport->tt.rport_logoff(rdata);
-	}
-
 	mutex_unlock(&disc->disc_mutex);
 }
 
@@ -119,20 +113,12 @@ static void fc_disc_rport_callback(struct fc_lport *lport,
 
 	switch (event) {
 	case RPORT_EV_READY:
-		if (disc) {
-			mutex_lock(&disc->disc_mutex);
-			list_add_tail(&rdata->peers, &disc->rports);
-			mutex_unlock(&disc->disc_mutex);
-		}
 		break;
 	case RPORT_EV_LOGO:
 	case RPORT_EV_FAILED:
 	case RPORT_EV_STOP:
 		mutex_lock(&disc->disc_mutex);
-		mutex_lock(&rdata->rp_mutex);
-		if (rdata->trans_state == FC_PORTSTATE_ROGUE)
-			list_del(&rdata->peers);
-		mutex_unlock(&rdata->rp_mutex);
+		list_del(&rdata->peers);
 		mutex_unlock(&disc->disc_mutex);
 		break;
 	default:
@@ -235,7 +221,6 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 			list_del(&dp->peers);
 			rdata = lport->tt.rport_lookup(lport, dp->ids.port_id);
 			if (rdata) {
-				list_del(&rdata->peers);
 				lport->tt.rport_logoff(rdata);
 			}
 			fc_disc_single(disc, dp);
@@ -296,10 +281,8 @@ static void fc_disc_restart(struct fc_disc *disc)
 
 	FC_DISC_DBG(disc, "Restarting discovery\n");
 
-	list_for_each_entry_safe(rdata, next, &disc->rports, peers) {
-		list_del(&rdata->peers);
+	list_for_each_entry_safe(rdata, next, &disc->rports, peers)
 		lport->tt.rport_logoff(rdata);
-	}
 
 	disc->requested = 1;
 	if (!disc->pending)
@@ -392,7 +375,6 @@ static int fc_disc_new_target(struct fc_disc *disc,
 			 * assigned the same FCID.  This should be rare.
 			 * Delete the old one and fall thru to re-create.
 			 */
-			list_del(&rdata->peers);
 			lport->tt.rport_logoff(rdata);
 			rdata = NULL;
 		}
@@ -406,12 +388,13 @@ static int fc_disc_new_target(struct fc_disc *disc,
 				rdata = lport->tt.rport_create(lport, ids);
 				if (!rdata)
 					error = -ENOMEM;
+				else
+					list_add_tail(&rdata->peers,
+						      &disc->rports);
 			}
 		}
 		if (rdata) {
 			rdata->ops = &fc_disc_rport_ops;
-			rdata->rp_state = RPORT_ST_INIT;
-			list_add_tail(&rdata->peers, &disc->rogue_rports);
 			lport->tt.rport_login(rdata);
 		}
 	}
@@ -585,9 +568,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 			rdata = lport->tt.rport_create(lport, &ids);
 			if (rdata) {
 				rdata->ops = &fc_disc_rport_ops;
-				rdata->local_port = lport;
-				list_add_tail(&rdata->peers,
-					      &disc->rogue_rports);
+				list_add_tail(&rdata->peers, &disc->rports);
 				lport->tt.rport_login(rdata);
 			} else
 				printk(KERN_WARNING "libfc: Failed to allocate "
@@ -736,7 +717,7 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 	if (rdata) {
 		rdata->ops = &fc_disc_rport_ops;
 		kfree(dp);
-		list_add_tail(&rdata->peers, &disc->rogue_rports);
+		list_add_tail(&rdata->peers, &disc->rports);
 		lport->tt.rport_login(rdata);
 	}
 	return;
@@ -798,7 +779,6 @@ int fc_disc_init(struct fc_lport *lport)
 	INIT_DELAYED_WORK(&disc->disc_work, fc_disc_timeout);
 	mutex_init(&disc->disc_mutex);
 	INIT_LIST_HEAD(&disc->rports);
-	INIT_LIST_HEAD(&disc->rogue_rports);
 
 	disc->lport = lport;
 	disc->delay = FC_DISC_DELAY;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 50959ba0a9a0..a1794a39158e 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -86,61 +86,35 @@ static const char *fc_rport_state_names[] = {
 	[RPORT_ST_DELETE] = "Delete",
 };
 
-static void fc_rport_rogue_destroy(struct device *dev)
-{
-	struct fc_rport *rport = dev_to_rport(dev);
-	struct fc_rport_priv *rdata = RPORT_TO_PRIV(rport);
-
-	FC_RPORT_DBG(rdata, "Destroying rogue rport\n");
-	kfree(rport);
-}
-
-struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
-					    struct fc_rport_identifiers *ids)
+/**
+ * fc_rport_create() - create remote port in INIT state.
+ * @lport: local port.
+ * @ids: remote port identifiers.
+ *
+ * Locking note: this may be called without locks held, but
+ * is usually called from discovery with the disc_mutex held.
+ */
+static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
+					     struct fc_rport_identifiers *ids)
 {
-	struct fc_rport *rport;
 	struct fc_rport_priv *rdata;
-	rport = kzalloc(sizeof(*rport) + sizeof(*rdata), GFP_KERNEL);
 
-	if (!rport)
+	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
+	if (!rdata)
 		return NULL;
 
-	rdata = RPORT_TO_PRIV(rport);
-
-	rport->dd_data = rdata;
-	rport->port_id = ids->port_id;
-	rport->port_name = ids->port_name;
-	rport->node_name = ids->node_name;
-	rport->roles = ids->roles;
-	rport->maxframe_size = FC_MIN_MAX_PAYLOAD;
-	/*
-	 * Note: all this libfc rogue rport code will be removed for
-	 * upstream so it fine that this is really ugly and hacky right now.
-	 */
-	device_initialize(&rport->dev);
-	rport->dev.release = fc_rport_rogue_destroy;
-
 	rdata->ids = *ids;
 	kref_init(&rdata->kref);
 	mutex_init(&rdata->rp_mutex);
-	rdata->rport = rport;
 	rdata->local_port = lport;
-	rdata->trans_state = FC_PORTSTATE_ROGUE;
 	rdata->rp_state = RPORT_ST_INIT;
 	rdata->event = RPORT_EV_NONE;
 	rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
-	rdata->ops = NULL;
 	rdata->e_d_tov = lport->e_d_tov;
 	rdata->r_a_tov = lport->r_a_tov;
 	rdata->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
 	INIT_WORK(&rdata->event_work, fc_rport_work);
-	/*
-	 * For good measure, but not necessary as we should only
-	 * add REAL rport to the lport list.
-	 */
-	INIT_LIST_HEAD(&rdata->peers);
-
 	return rdata;
 }
 
@@ -151,11 +125,9 @@ struct fc_rport_priv *fc_rport_rogue_create(struct fc_lport *lport,
 static void fc_rport_destroy(struct kref *kref)
 {
 	struct fc_rport_priv *rdata;
-	struct fc_rport *rport;
 
 	rdata = container_of(kref, struct fc_rport_priv, kref);
-	rport = rdata->rport;
-	put_device(&rport->dev);
+	kfree(rdata);
 }
 
 /**
@@ -229,12 +201,10 @@ static void fc_rport_work(struct work_struct *work)
 	u32 port_id;
 	struct fc_rport_priv *rdata =
 		container_of(work, struct fc_rport_priv, event_work);
+	struct fc_rport_libfc_priv *rp;
 	enum fc_rport_event event;
-	enum fc_rport_trans_state trans_state;
 	struct fc_lport *lport = rdata->local_port;
 	struct fc_rport_operations *rport_ops;
-	struct fc_rport *new_rport;
-	struct fc_rport_priv *new_rdata;
 	struct fc_rport_identifiers ids;
 	struct fc_rport *rport;
 
@@ -243,70 +213,72 @@ static void fc_rport_work(struct work_struct *work)
 	rport_ops = rdata->ops;
 	rport = rdata->rport;
 
+	FC_RPORT_DBG(rdata, "work event %u\n", event);
+
 	switch (event) {
 	case RPORT_EV_READY:
 		ids = rdata->ids;
 		rdata->event = RPORT_EV_NONE;
+		kref_get(&rdata->kref);
 		mutex_unlock(&rdata->rp_mutex);
 
-		new_rport = fc_remote_port_add(lport->host, 0, &ids);
-		if (new_rport) {
-			/*
-			 * Switch from the rogue rport to the rport
-			 * returned by the FC class.
-			 */
-			new_rport->maxframe_size = rdata->maxframe_size;
-
-			new_rdata = new_rport->dd_data;
-			new_rdata->rport = new_rport;
-			new_rdata->ids = ids;
-			new_rdata->e_d_tov = rdata->e_d_tov;
-			new_rdata->r_a_tov = rdata->r_a_tov;
-			new_rdata->ops = rdata->ops;
-			new_rdata->local_port = rdata->local_port;
-			new_rdata->flags = FC_RP_FLAGS_REC_SUPPORTED;
-			new_rdata->trans_state = FC_PORTSTATE_REAL;
-			new_rdata->maxframe_size = rdata->maxframe_size;
-			new_rdata->supported_classes = rdata->supported_classes;
-			kref_init(&new_rdata->kref);
-			mutex_init(&new_rdata->rp_mutex);
-			INIT_DELAYED_WORK(&new_rdata->retry_work,
-					  fc_rport_timeout);
-			INIT_LIST_HEAD(&new_rdata->peers);
-			INIT_WORK(&new_rdata->event_work, fc_rport_work);
-
-			fc_rport_state_enter(new_rdata, RPORT_ST_READY);
-		} else {
-			printk(KERN_WARNING "libfc: Failed to allocate "
-			       " memory for rport (%6x)\n", ids.port_id);
-			event = RPORT_EV_FAILED;
+		if (!rport)
+			rport = fc_remote_port_add(lport->host, 0, &ids);
+		if (!rport) {
+			FC_RPORT_DBG(rdata, "Failed to add the rport\n");
+			lport->tt.rport_logoff(rdata);
+			kref_put(&rdata->kref, lport->tt.rport_destroy);
+			return;
 		}
-		if (rdata->ids.port_id != FC_FID_DIR_SERV)
-			if (rport_ops->event_callback)
-				rport_ops->event_callback(lport, rdata,
-							  RPORT_EV_FAILED);
-		kref_put(&rdata->kref, lport->tt.rport_destroy);
-		rdata = new_rport->dd_data;
-		if (rport_ops->event_callback)
+		mutex_lock(&rdata->rp_mutex);
+		if (rdata->rport)
+			FC_RPORT_DBG(rdata, "rport already allocated\n");
+		rdata->rport = rport;
+		rport->maxframe_size = rdata->maxframe_size;
+		rport->supported_classes = rdata->supported_classes;
+
+		rp = rport->dd_data;
+		rp->local_port = lport;
+		rp->rp_state = rdata->rp_state;
+		rp->flags = rdata->flags;
+		rp->e_d_tov = rdata->e_d_tov;
+		rp->r_a_tov = rdata->r_a_tov;
+		mutex_unlock(&rdata->rp_mutex);
+
+		if (rport_ops->event_callback) {
+			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
 			rport_ops->event_callback(lport, rdata, event);
+		}
+		kref_put(&rdata->kref, lport->tt.rport_destroy);
 		break;
 
 	case RPORT_EV_FAILED:
 	case RPORT_EV_LOGO:
 	case RPORT_EV_STOP:
-		trans_state = rdata->trans_state;
+		port_id = rdata->ids.port_id;
 		mutex_unlock(&rdata->rp_mutex);
-		if (rport_ops->event_callback)
+
+		if (rport_ops->event_callback) {
+			FC_RPORT_DBG(rdata, "callback ev %d\n", event);
 			rport_ops->event_callback(lport, rdata, event);
+		}
 		cancel_delayed_work_sync(&rdata->retry_work);
-		if (trans_state == FC_PORTSTATE_ROGUE)
-			kref_put(&rdata->kref, lport->tt.rport_destroy);
-		else {
-			port_id = rport->port_id;
+
+		/*
+		 * Reset any outstanding exchanges before freeing rport.
+		 */
+		lport->tt.exch_mgr_reset(lport, 0, port_id);
+		lport->tt.exch_mgr_reset(lport, port_id, 0);
+
+		if (rport) {
+			rp = rport->dd_data;
+			rp->rp_state = RPORT_ST_DELETE;
+			mutex_lock(&rdata->rp_mutex);
+			rdata->rport = NULL;
+			mutex_unlock(&rdata->rp_mutex);
 			fc_remote_port_delete(rport);
-			lport->tt.exch_mgr_reset(lport, 0, port_id);
-			lport->tt.exch_mgr_reset(lport, port_id, 0);
 		}
+		kref_put(&rdata->kref, lport->tt.rport_destroy);
 		break;
 
 	default:
@@ -1311,7 +1283,7 @@ static void fc_rport_flush_queue(void)
 int fc_rport_init(struct fc_lport *lport)
 {
 	if (!lport->tt.rport_create)
-		lport->tt.rport_create = fc_rport_rogue_create;
+		lport->tt.rport_create = fc_rport_create;
 
 	if (!lport->tt.rport_login)
 		lport->tt.rport_login = fc_rport_login;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index d324df8c36e6..bf4b1c2ec600 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -146,11 +146,6 @@ enum fc_rport_state {
 	RPORT_ST_DELETE,	/* port being deleted */
 };
 
-enum fc_rport_trans_state {
-	FC_PORTSTATE_ROGUE,
-	FC_PORTSTATE_REAL,
-};
-
 /**
  * struct fc_disc_port - temporary discovery port to hold rport identifiers
  * @lp: Fibre Channel host port instance
@@ -173,14 +168,6 @@ enum fc_rport_event {
 	RPORT_EV_LOGO
 };
 
-/*
- * Temporary definition to prepare for split off from fc_rport_libfc_priv
- * of a separately-allocated structure called fc_rport_priv.  This will
- * be the primary object for the discovery and rport state machines.
- * This definition is just to make this patch series easier to review.
- */
-#define fc_rport_priv fc_rport_libfc_priv
-
 struct fc_rport_priv;
 
 struct fc_rport_operations {
@@ -191,6 +178,24 @@ struct fc_rport_operations {
 /**
  * struct fc_rport_libfc_priv - libfc internal information about a remote port
  * @local_port: Fibre Channel host port instance
+ * @rp_state: indicates READY for I/O or DELETE when blocked.
+ * @flags: REC and RETRY supported flags
+ * @e_d_tov: error detect timeout value (in msec)
+ * @r_a_tov: resource allocation timeout value (in msec)
+ */
+struct fc_rport_libfc_priv {
+	struct fc_lport		   *local_port;
+	enum fc_rport_state        rp_state;
+	u16			   flags;
+	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
+	#define FC_RP_FLAGS_RETRY		(1 << 1)
+	unsigned int	           e_d_tov;
+	unsigned int	           r_a_tov;
+};
+
+/**
+ * struct fc_rport_priv - libfc rport and discovery info about a remote port
+ * @local_port: Fibre Channel host port instance
  * @rport: transport remote port
  * @kref: reference counter
  * @rp_state: state tracks progress of PLOGI, PRLI, and RTV exchanges
@@ -205,21 +210,18 @@ struct fc_rport_operations {
  * @retry_work:
  * @event_callback: Callback for rport READY, FAILED or LOGO
  */
-struct fc_rport_libfc_priv {
+struct fc_rport_priv {
 	struct fc_lport		   *local_port;
 	struct fc_rport		   *rport;
 	struct kref		   kref;
 	enum fc_rport_state        rp_state;
 	struct fc_rport_identifiers ids;
 	u16			   flags;
-	#define FC_RP_FLAGS_REC_SUPPORTED	(1 << 0)
-	#define FC_RP_FLAGS_RETRY		(1 << 1)
 	u16		           max_seq;
 	u16			   maxframe_size;
 	unsigned int	           retries;
 	unsigned int	           e_d_tov;
 	unsigned int	           r_a_tov;
-	enum fc_rport_trans_state  trans_state;
 	struct mutex               rp_mutex;
 	struct delayed_work	   retry_work;
 	enum fc_rport_event        event;
@@ -229,9 +231,6 @@ struct fc_rport_libfc_priv {
 	u32			   supported_classes;
 };
 
-#define RPORT_TO_PRIV(x)						\
-	((struct fc_rport_libfc_priv *)((void *)(x) + sizeof(struct fc_rport)))
-
 /*
  * fcoe stats structure
  */
@@ -686,7 +685,6 @@ struct fc_disc {
 			      enum fc_disc_event);
 
 	struct list_head	 rports;
-	struct list_head	 rogue_rports;
 	struct fc_lport		*lport;
 	struct mutex		disc_mutex;
 	struct fc_gpn_ft_resp	partial_buf;	/* partial name buffer */
-- 
cgit v1.2.3


From 786681b96fc1a5b94d187160b7bf80bf6b4681ed Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:01:29 -0700
Subject: [SCSI] libfc: eliminate disc->event

There was no need to have the discovery status stored in struct fc_disc.

Change fc_disc_done() to take the discovery status as an argument
and just pass it on to the discovery callback.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c | 31 ++++++++++++-------------------
 include/scsi/libfc.h         |  1 -
 2 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index bbea41e50a57..736f91742064 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -49,7 +49,7 @@ static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
 static int fc_disc_new_target(struct fc_disc *, struct fc_rport_priv *,
 			      struct fc_rport_identifiers *);
-static void fc_disc_done(struct fc_disc *);
+static void fc_disc_done(struct fc_disc *, enum fc_disc_event);
 static void fc_disc_timeout(struct work_struct *);
 static void fc_disc_single(struct fc_disc *, struct fc_disc_port *);
 static void fc_disc_restart(struct fc_disc *);
@@ -329,8 +329,7 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 	if (rdata) {
 		kref_get(&rdata->kref);
 		if (!fc_disc_new_target(disc, rdata, &rdata->ids)) {
-			disc->event = DISC_EV_SUCCESS;
-			fc_disc_done(disc);
+			fc_disc_done(disc, DISC_EV_SUCCESS);
 		}
 		kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 	} else {
@@ -404,20 +403,18 @@ static int fc_disc_new_target(struct fc_disc *disc,
 /**
  * fc_disc_done() - Discovery has been completed
  * @disc: FC discovery context
+ * @event: discovery completion status
+ *
  * Locking Note: This function expects that the disc mutex is locked before
  * it is called. The discovery callback is then made with the lock released,
  * and the lock is re-taken before returning from this function
  */
-static void fc_disc_done(struct fc_disc *disc)
+static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event)
 {
 	struct fc_lport *lport = disc->lport;
-	enum fc_disc_event event;
 
 	FC_DISC_DBG(disc, "Discovery complete\n");
 
-	event = disc->event;
-	disc->event = DISC_EV_NONE;
-
 	if (disc->requested)
 		fc_disc_gpn_ft_req(disc);
 	else
@@ -460,11 +457,8 @@ static void fc_disc_error(struct fc_disc *disc, struct fc_frame *fp)
 			}
 			disc->retry_count++;
 			schedule_delayed_work(&disc->disc_work, delay);
-		} else {
-			/* exceeded retries */
-			disc->event = DISC_EV_FAILED;
-			fc_disc_done(disc);
-		}
+		} else
+			fc_disc_done(disc, DISC_EV_FAILED);
 	}
 }
 
@@ -503,10 +497,12 @@ err:
 }
 
 /**
- * fc_disc_gpn_ft_parse() - Parse the list of IDs and names resulting from a request
+ * fc_disc_gpn_ft_parse() - Parse the body of the dNS GPN_FT response.
  * @lport: Fibre Channel host port instance
  * @buf: GPN_FT response buffer
  * @len: size of response buffer
+ *
+ * Goes through the list of IDs and names resulting from a request.
  */
 static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 {
@@ -577,8 +573,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 		}
 
 		if (np->fp_flags & FC_NS_FID_LAST) {
-			disc->event = DISC_EV_SUCCESS;
-			fc_disc_done(disc);
+			fc_disc_done(disc, DISC_EV_SUCCESS);
 			len = 0;
 			break;
 		}
@@ -669,8 +664,7 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 			FC_DISC_DBG(disc, "GPN_FT rejected reason %x exp %x "
 				    "(check zoning)\n", cp->ct_reason,
 				    cp->ct_explan);
-			disc->event = DISC_EV_FAILED;
-			fc_disc_done(disc);
+			fc_disc_done(disc, DISC_EV_FAILED);
 		} else {
 			FC_DISC_DBG(disc, "GPN_FT unexpected response code "
 				    "%x\n", ntohs(cp->ct_cmd));
@@ -782,7 +776,6 @@ int fc_disc_init(struct fc_lport *lport)
 
 	disc->lport = lport;
 	disc->delay = FC_DISC_DELAY;
-	disc->event = DISC_EV_NONE;
 
 	return 0;
 }
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index bf4b1c2ec600..f3f320f1d399 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -679,7 +679,6 @@ struct fc_disc {
 	unsigned char		requested;
 	unsigned short		seq_count;
 	unsigned char		buf_len;
-	enum fc_disc_event	event;
 
 	void (*disc_callback)(struct fc_lport *,
 			      enum fc_disc_event);
-- 
cgit v1.2.3


From b84c7962653e4d04065d2603f0e1424ee0f455ae Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:01:34 -0700
Subject: [SCSI] libfc: remove unused disc->delay element

Delete unused disc->delay element.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c | 3 ---
 include/scsi/libfc.h         | 1 -
 2 files changed, 4 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 736f91742064..8427396909e8 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -43,8 +43,6 @@
 #define FC_DISC_RETRY_LIMIT	3	/* max retries */
 #define FC_DISC_RETRY_DELAY	500UL	/* (msecs) delay */
 
-#define	FC_DISC_DELAY		3
-
 static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
 static int fc_disc_new_target(struct fc_disc *, struct fc_rport_priv *,
@@ -775,7 +773,6 @@ int fc_disc_init(struct fc_lport *lport)
 	INIT_LIST_HEAD(&disc->rports);
 
 	disc->lport = lport;
-	disc->delay = FC_DISC_DELAY;
 
 	return 0;
 }
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index f3f320f1d399..093b0439a2cf 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -674,7 +674,6 @@ struct libfc_function_template {
 /* information used by the discovery layer */
 struct fc_disc {
 	unsigned char		retry_count;
-	unsigned char		delay;
 	unsigned char		pending;
 	unsigned char		requested;
 	unsigned short		seq_count;
-- 
cgit v1.2.3


From 0f6c6149870e03c722af6eae406758b28cb71320 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:02:11 -0700
Subject: [SCSI] libfc: do not log off rports before or after discovery

When receiving an RSCN, do not log off all rports.  This is
extremely disruptive.  If, after the GPN_FT response, some
rports haven't been listed, delete them.

Add field disc_id to structs fc_rport_priv and fc_disc.
disc_id is an arbitrary serial number used to identify the
rports found by the latest discovery.  This eliminates the need
to go through the rport list when restarting discovery.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c | 47 ++++++++++++++++++++++++++++++++------------
 include/scsi/libfc.h         |  3 +++
 2 files changed, 37 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index f6762a52147d..ddf494424ff4 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -221,17 +221,19 @@ static void fc_disc_recv_req(struct fc_seq *sp, struct fc_frame *fp,
  */
 static void fc_disc_restart(struct fc_disc *disc)
 {
-	struct fc_rport_priv *rdata, *next;
-	struct fc_lport *lport = disc->lport;
-
 	FC_DISC_DBG(disc, "Restarting discovery\n");
 
-	list_for_each_entry_safe(rdata, next, &disc->rports, peers)
-		lport->tt.rport_logoff(rdata);
-
 	disc->requested = 1;
-	if (!disc->pending)
-		fc_disc_gpn_ft_req(disc);
+	if (disc->pending)
+		return;
+
+	/*
+	 * Advance disc_id.  This is an arbitrary non-zero number that will
+	 * match the value in the fc_rport_priv after discovery for all
+	 * freshly-discovered remote ports.  Avoid wrapping to zero.
+	 */
+	disc->disc_id = (disc->disc_id + 2) | 1;
+	fc_disc_gpn_ft_req(disc);
 }
 
 /**
@@ -278,6 +280,7 @@ static void fc_disc_start(void (*disc_callback)(struct fc_lport *,
 		}
 		kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
 	} else {
+		disc->disc_id = (disc->disc_id + 2) | 1;
 		fc_disc_gpn_ft_req(disc);	/* get ports by FC-4 type */
 	}
 
@@ -345,13 +348,30 @@ static int fc_disc_new_target(struct fc_disc *disc,
 static void fc_disc_done(struct fc_disc *disc, enum fc_disc_event event)
 {
 	struct fc_lport *lport = disc->lport;
+	struct fc_rport_priv *rdata;
 
 	FC_DISC_DBG(disc, "Discovery complete\n");
 
-	if (disc->requested)
-		fc_disc_gpn_ft_req(disc);
-	else
-		disc->pending = 0;
+	disc->pending = 0;
+	if (disc->requested) {
+		fc_disc_restart(disc);
+		return;
+	}
+
+	/*
+	 * Go through all remote ports.  If they were found in the latest
+	 * discovery, reverify or log them in.  Otherwise, log them out.
+	 * Skip ports which were never discovered.  These are the dNS port
+	 * and ports which were created by PLOGI.
+	 */
+	list_for_each_entry(rdata, &disc->rports, peers) {
+		if (!rdata->disc_id)
+			continue;
+		if (rdata->disc_id == disc->disc_id)
+			lport->tt.rport_login(rdata);
+		else
+			lport->tt.rport_logoff(rdata);
+	}
 
 	mutex_unlock(&disc->disc_mutex);
 	disc->disc_callback(lport, event);
@@ -496,7 +516,7 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 		    ids.port_name != lport->wwpn) {
 			rdata = lport->tt.rport_create(lport, &ids);
 			if (rdata)
-				lport->tt.rport_login(rdata);
+				rdata->disc_id = disc->disc_id;
 			else
 				printk(KERN_WARNING "libfc: Failed to allocate "
 				       "memory for the newly discovered port "
@@ -640,6 +660,7 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 
 	rdata = lport->tt.rport_create(lport, &dp->ids);
 	if (rdata) {
+		rdata->disc_id = disc->disc_id;
 		kfree(dp);
 		lport->tt.rport_login(rdata);
 	}
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 093b0439a2cf..517dce5c8d0d 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -202,6 +202,7 @@ struct fc_rport_libfc_priv {
  * @ids: remote port identifiers and roles
  * @flags: REC and RETRY supported flags
  * @max_seq: maximum number of concurrent sequences
+ * @disc_id: discovery identifier
  * @maxframe_size: maximum frame size
  * @retries: retry count in current state
  * @e_d_tov: error detect timeout value (in msec)
@@ -218,6 +219,7 @@ struct fc_rport_priv {
 	struct fc_rport_identifiers ids;
 	u16			   flags;
 	u16		           max_seq;
+	u16			   disc_id;
 	u16			   maxframe_size;
 	unsigned int	           retries;
 	unsigned int	           e_d_tov;
@@ -678,6 +680,7 @@ struct fc_disc {
 	unsigned char		requested;
 	unsigned short		seq_count;
 	unsigned char		buf_len;
+	u16			disc_id;
 
 	void (*disc_callback)(struct fc_lport *,
 			      enum fc_disc_event);
-- 
cgit v1.2.3


From c762608bf75f792dcaf3658338189b9970951704 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:02:33 -0700
Subject: [SCSI] libfc: fix: empty zone causes endless discovery retries.

On some switches, an empty zone causes GPN_FT to be rejected
with reason 9 (unable) explanation 7 (FC-4 types not registered),
which causes discovery to be retried endlessly.  Treat this as
just an empty response and consider discovery complete.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c | 3 +++
 include/scsi/fc/fc_gs.h      | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 844376c1d8dc..9b8043bdeddb 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -617,6 +617,9 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 				    "(check zoning)\n", cp->ct_reason,
 				    cp->ct_explan);
 			event = DISC_EV_FAILED;
+			if (cp->ct_reason == FC_FS_RJT_UNABL &&
+			    cp->ct_explan == FC_FS_EXP_FTNR)
+				event = DISC_EV_SUCCESS;
 		} else {
 			FC_DISC_DBG(disc, "GPN_FT unexpected response code "
 				    "%x\n", ntohs(cp->ct_cmd));
diff --git a/include/scsi/fc/fc_gs.h b/include/scsi/fc/fc_gs.h
index ffab0272c65a..324dd0e3c622 100644
--- a/include/scsi/fc/fc_gs.h
+++ b/include/scsi/fc/fc_gs.h
@@ -87,6 +87,7 @@ enum fc_ct_explan {
 	FC_FS_EXP_PNAM =	0x02,	/* port name not registered */
 	FC_FS_EXP_NNAM =	0x03,	/* node name not registered */
 	FC_FS_EXP_COS =		0x04,	/* class of service not registered */
+	FC_FS_EXP_FTNR =	0x07,	/* FC-4 types not registered */
 	/* definitions not complete */
 };
 
-- 
cgit v1.2.3


From 9737e6a7b5b8af48f983cd565df93493597c565b Mon Sep 17 00:00:00 2001
From: Robert Love <robert.w.love@intel.com>
Date: Tue, 25 Aug 2009 14:02:59 -0700
Subject: [SCSI] libfc: Initialize fc_rport_identifiers inside fc_rport_create

Currently these values are initialized by the callers. This was exposed
by a later patch that adds PLOGI request support. The patch failed to
initialize the new remote port's roles and it caused problems. This patch
has the rport_create routine initialize the identifiers and then the
callers can override them with real values.

Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 20 ++++++++------------
 drivers/scsi/libfc/fc_lport.c | 19 ++++---------------
 drivers/scsi/libfc/fc_rport.c | 18 +++++++++++-------
 include/scsi/libfc.h          | 17 +++++++++--------
 4 files changed, 32 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 1a699f484c85..4242894cce7c 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -137,10 +137,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 				break;
 			}
 			dp->lp = lport;
-			dp->ids.port_id = ntoh24(pp->rscn_fid);
-			dp->ids.port_name = -1;
-			dp->ids.node_name = -1;
-			dp->ids.roles = FC_RPORT_ROLE_UNKNOWN;
+			dp->port_id = ntoh24(pp->rscn_fid);
 			list_add_tail(&dp->peers, &disc_ports);
 			break;
 		case ELS_ADDR_FMT_AREA:
@@ -162,7 +159,7 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 			    redisc, lport->state, disc->pending);
 		list_for_each_entry_safe(dp, next, &disc_ports, peers) {
 			list_del(&dp->peers);
-			rdata = lport->tt.rport_lookup(lport, dp->ids.port_id);
+			rdata = lport->tt.rport_lookup(lport, dp->port_id);
 			if (rdata) {
 				lport->tt.rport_logoff(rdata);
 			}
@@ -435,15 +432,14 @@ static int fc_disc_gpn_ft_parse(struct fc_disc *disc, void *buf, size_t len)
 	while (plen >= sizeof(*np)) {
 		ids.port_id = ntoh24(np->fp_fid);
 		ids.port_name = ntohll(np->fp_wwpn);
-		ids.node_name = -1;
-		ids.roles = FC_RPORT_ROLE_UNKNOWN;
 
 		if (ids.port_id != fc_host_port_id(lport->host) &&
 		    ids.port_name != lport->wwpn) {
-			rdata = lport->tt.rport_create(lport, &ids);
-			if (rdata)
+			rdata = lport->tt.rport_create(lport, ids.port_id);
+			if (rdata) {
+				rdata->ids.port_name = ids.port_name;
 				rdata->disc_id = disc->disc_id;
-			else {
+			} else {
 				printk(KERN_WARNING "libfc: Failed to allocate "
 				       "memory for the newly discovered port "
 				       "(%6x)\n", ids.port_id);
@@ -580,10 +576,10 @@ static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
 
 	lport = disc->lport;
 
-	if (dp->ids.port_id == fc_host_port_id(lport->host))
+	if (dp->port_id == fc_host_port_id(lport->host))
 		goto out;
 
-	rdata = lport->tt.rport_create(lport, &dp->ids);
+	rdata = lport->tt.rport_create(lport, dp->port_id);
 	if (rdata) {
 		rdata->disc_id = disc->disc_id;
 		kfree(dp);
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 7000df573691..caf68240bddf 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -198,17 +198,12 @@ static void fc_lport_ptp_setup(struct fc_lport *lport,
 			       u32 remote_fid, u64 remote_wwpn,
 			       u64 remote_wwnn)
 {
-	struct fc_rport_identifiers ids;
-
-	ids.port_id = remote_fid;
-	ids.port_name = remote_wwpn;
-	ids.node_name = remote_wwnn;
-	ids.roles = FC_RPORT_ROLE_UNKNOWN;
-
 	mutex_lock(&lport->disc.disc_mutex);
 	if (lport->ptp_rp)
 		lport->tt.rport_logoff(lport->ptp_rp);
-	lport->ptp_rp = lport->tt.rport_create(lport, &ids);
+	lport->ptp_rp = lport->tt.rport_create(lport, remote_fid);
+	lport->ptp_rp->ids.port_name = remote_wwpn;
+	lport->ptp_rp->ids.node_name = remote_wwnn;
 	mutex_unlock(&lport->disc.disc_mutex);
 
 	lport->tt.rport_login(lport->ptp_rp);
@@ -1287,12 +1282,6 @@ static struct fc_rport_operations fc_lport_rport_ops = {
 static void fc_lport_enter_dns(struct fc_lport *lport)
 {
 	struct fc_rport_priv *rdata;
-	struct fc_rport_identifiers ids;
-
-	ids.port_id = FC_FID_DIR_SERV;
-	ids.port_name = -1;
-	ids.node_name = -1;
-	ids.roles = FC_RPORT_ROLE_UNKNOWN;
 
 	FC_LPORT_DBG(lport, "Entered DNS state from %s state\n",
 		     fc_lport_state(lport));
@@ -1300,7 +1289,7 @@ static void fc_lport_enter_dns(struct fc_lport *lport)
 	fc_lport_state_enter(lport, LPORT_ST_DNS);
 
 	mutex_lock(&lport->disc.disc_mutex);
-	rdata = lport->tt.rport_create(lport, &ids);
+	rdata = lport->tt.rport_create(lport, FC_FID_DIR_SERV);
 	mutex_unlock(&lport->disc.disc_mutex);
 	if (!rdata)
 		goto err;
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 99ac056293f5..c667be879be6 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -104,18 +104,18 @@ static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport,
 }
 
 /**
- * fc_rport_create() - create remote port in INIT state.
- * @lport: local port.
- * @ids: remote port identifiers.
+ * fc_rport_create() - Create a new remote port
+ * @lport:   The local port that the new remote port is for
+ * @port_id: The port ID for the new remote port
  *
  * Locking note:  must be called with the disc_mutex held.
  */
 static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
-					     struct fc_rport_identifiers *ids)
+					     u32 port_id)
 {
 	struct fc_rport_priv *rdata;
 
-	rdata = lport->tt.rport_lookup(lport, ids->port_id);
+	rdata = lport->tt.rport_lookup(lport, port_id);
 	if (rdata)
 		return rdata;
 
@@ -123,7 +123,11 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
 	if (!rdata)
 		return NULL;
 
-	rdata->ids = *ids;
+	rdata->ids.node_name = -1;
+	rdata->ids.port_name = -1;
+	rdata->ids.port_id = port_id;
+	rdata->ids.roles = FC_RPORT_ROLE_UNKNOWN;
+
 	kref_init(&rdata->kref);
 	mutex_init(&rdata->rp_mutex);
 	rdata->local_port = lport;
@@ -135,7 +139,7 @@ static struct fc_rport_priv *fc_rport_create(struct fc_lport *lport,
 	rdata->maxframe_size = FC_MIN_MAX_PAYLOAD;
 	INIT_DELAYED_WORK(&rdata->retry_work, fc_rport_timeout);
 	INIT_WORK(&rdata->event_work, fc_rport_work);
-	if (ids->port_id != FC_FID_DIR_SERV)
+	if (port_id != FC_FID_DIR_SERV)
 		list_add(&rdata->peers, &lport->disc.rports);
 	return rdata;
 }
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 517dce5c8d0d..cd410c123b99 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -148,16 +148,16 @@ enum fc_rport_state {
 
 /**
  * struct fc_disc_port - temporary discovery port to hold rport identifiers
- * @lp: Fibre Channel host port instance
- * @peers: node for list management during discovery and RSCN processing
- * @ids: identifiers structure to pass to fc_remote_port_add()
- * @rport_work: work struct for starting the rport state machine
+ * @lp:         Fibre Channel host port instance
+ * @peers:      Node for list management during discovery and RSCN processing
+ * @rport_work: Work struct for starting the rport state machine
+ * @port_id:    Port ID of the discovered port
  */
 struct fc_disc_port {
 	struct fc_lport             *lp;
 	struct list_head            peers;
-	struct fc_rport_identifiers ids;
 	struct work_struct	    rport_work;
+	u32                         port_id;
 };
 
 enum fc_rport_event {
@@ -565,10 +565,11 @@ struct libfc_function_template {
 	int (*lport_reset)(struct fc_lport *);
 
 	/*
-	 * Create a remote port
+	 * Create a remote port with a given port ID
+	 *
+	 * STATUS: OPTIONAL
 	 */
-	struct fc_rport_priv *(*rport_create)(struct fc_lport *,
-					      struct fc_rport_identifiers *);
+	struct fc_rport_priv *(*rport_create)(struct fc_lport *, u32);
 
 	/*
 	 * Initiates the RP state machine. It is called from the LP module.
-- 
cgit v1.2.3


From 131203a1ef53f3a4deb3260031bc53c7e4db4a24 Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:03:10 -0700
Subject: [SCSI] libfc: move remote port lookup for ELS requests into
 fc_rport.c.

This moves the remote port lookup for incoming ELS requests into
fc_rport.c, in preparation for handing PLOGI and LOGO from
unknown rports.

This changes the arg to rport_recv_req from an rdata to an lport.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_lport.c | 30 ++---------------
 drivers/scsi/libfc/fc_rport.c | 78 +++++++++++++++++++++++--------------------
 include/scsi/libfc.h          |  2 +-
 3 files changed, 44 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index caf68240bddf..d3f4e0c34f5c 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -812,10 +812,6 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 {
 	struct fc_frame_header *fh = fc_frame_header_get(fp);
 	void (*recv) (struct fc_seq *, struct fc_frame *, struct fc_lport *);
-	struct fc_rport_priv *rdata;
-	u32 s_id;
-	u32 d_id;
-	struct fc_seq_els_data rjt_data;
 
 	mutex_lock(&lport->lp_mutex);
 
@@ -831,7 +827,7 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 		/*
 		 * Check opcode.
 		 */
-		recv = NULL;
+		recv = lport->tt.rport_recv_req;
 		switch (fc_frame_payload_op(fp)) {
 		case ELS_FLOGI:
 			recv = fc_lport_recv_flogi_req;
@@ -858,29 +854,7 @@ static void fc_lport_recv_req(struct fc_lport *lport, struct fc_seq *sp,
 			break;
 		}
 
-		if (recv)
-			recv(sp, fp, lport);
-		else {
-			/*
-			 * Find session.
-			 * If this is a new incoming PLOGI, we won't find it.
-			 */
-			s_id = ntoh24(fh->fh_s_id);
-			d_id = ntoh24(fh->fh_d_id);
-
-			rdata = lport->tt.rport_lookup(lport, s_id);
-			if (rdata)
-				lport->tt.rport_recv_req(sp, fp, rdata);
-			else {
-				rjt_data.fp = NULL;
-				rjt_data.reason = ELS_RJT_UNAB;
-				rjt_data.explan = ELS_EXPL_NONE;
-				lport->tt.seq_els_rsp_send(sp,
-							   ELS_LS_RJT,
-							   &rjt_data);
-				fc_frame_free(fp);
-			}
-		}
+		recv(sp, fp, lport);
 	} else {
 		FC_LPORT_DBG(lport, "dropping invalid frame (eof %x)\n",
 			     fr_eof(fp));
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index cb54115c26cb..acdc72d6b873 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -912,57 +912,61 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
  * fc_rport_recv_req() - Receive a request from a rport
  * @sp: current sequence in the PLOGI exchange
  * @fp: response frame
- * @rdata_arg: private remote port data
+ * @lport: Fibre Channel local port
  *
- * Locking Note: Called without the rport lock held. This
- * function will hold the rport lock, call an _enter_*
- * function and then unlock the rport.
+ * Locking Note: Called with the lport lock held.
  */
 void fc_rport_recv_req(struct fc_seq *sp, struct fc_frame *fp,
-		       struct fc_rport_priv *rdata)
+		       struct fc_lport *lport)
 {
-	struct fc_lport *lport = rdata->local_port;
-
+	struct fc_rport_priv *rdata;
 	struct fc_frame_header *fh;
 	struct fc_seq_els_data els_data;
+	u32 s_id;
 	u8 op;
 
-	mutex_lock(&rdata->rp_mutex);
-
 	els_data.fp = NULL;
 	els_data.explan = ELS_EXPL_NONE;
 	els_data.reason = ELS_RJT_NONE;
 
 	fh = fc_frame_header_get(fp);
+	s_id = ntoh24(fh->fh_s_id);
 
-	if (fh->fh_r_ctl == FC_RCTL_ELS_REQ && fh->fh_type == FC_TYPE_ELS) {
-		op = fc_frame_payload_op(fp);
-		switch (op) {
-		case ELS_PLOGI:
-			fc_rport_recv_plogi_req(rdata, sp, fp);
-			break;
-		case ELS_PRLI:
-			fc_rport_recv_prli_req(rdata, sp, fp);
-			break;
-		case ELS_PRLO:
-			fc_rport_recv_prlo_req(rdata, sp, fp);
-			break;
-		case ELS_LOGO:
-			fc_rport_recv_logo_req(rdata, sp, fp);
-			break;
-		case ELS_RRQ:
-			els_data.fp = fp;
-			lport->tt.seq_els_rsp_send(sp, ELS_RRQ, &els_data);
-			break;
-		case ELS_REC:
-			els_data.fp = fp;
-			lport->tt.seq_els_rsp_send(sp, ELS_REC, &els_data);
-			break;
-		default:
-			els_data.reason = ELS_RJT_UNSUP;
-			lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
-			break;
-		}
+	rdata = lport->tt.rport_lookup(lport, s_id);
+	if (!rdata) {
+		els_data.reason = ELS_RJT_UNAB;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
+		fc_frame_free(fp);
+		return;
+	}
+	mutex_lock(&rdata->rp_mutex);
+
+	op = fc_frame_payload_op(fp);
+	switch (op) {
+	case ELS_PLOGI:
+		fc_rport_recv_plogi_req(rdata, sp, fp);
+		break;
+	case ELS_PRLI:
+		fc_rport_recv_prli_req(rdata, sp, fp);
+		break;
+	case ELS_PRLO:
+		fc_rport_recv_prlo_req(rdata, sp, fp);
+		break;
+	case ELS_LOGO:
+		fc_rport_recv_logo_req(rdata, sp, fp);
+		break;
+	case ELS_RRQ:
+		els_data.fp = fp;
+		lport->tt.seq_els_rsp_send(sp, ELS_RRQ, &els_data);
+		break;
+	case ELS_REC:
+		els_data.fp = fp;
+		lport->tt.seq_els_rsp_send(sp, ELS_REC, &els_data);
+		break;
+	default:
+		els_data.reason = ELS_RJT_UNSUP;
+		lport->tt.seq_els_rsp_send(sp, ELS_LS_RJT, &els_data);
+		break;
 	}
 
 	mutex_unlock(&rdata->rp_mutex);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index cd410c123b99..265f106d9fd6 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -598,7 +598,7 @@ struct libfc_function_template {
 	 * STATUS: OPTIONAL
 	 */
 	void (*rport_recv_req)(struct fc_seq *, struct fc_frame *,
-			       struct fc_rport_priv *);
+			       struct fc_lport *);
 
 	/*
 	 * lookup an rport by it's port ID.
-- 
cgit v1.2.3


From f657d299cf05883e23e12a69e86842da1df378ad Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:03:21 -0700
Subject: [SCSI] libfc: improve debug messages for ELS response handlers

Improve lport and rport debug messages to indicate whether
the response is LS_ACC, LS_RJT, closed, or timeout.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_elsct.c | 38 ++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libfc/fc_lport.c | 20 ++++++++++----------
 drivers/scsi/libfc/fc_rport.c |  8 ++++----
 include/scsi/libfc.h          |  5 +++++
 4 files changed, 57 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
index 5e8b011af50e..d655924d46b6 100644
--- a/drivers/scsi/libfc/fc_elsct.c
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -70,3 +70,41 @@ int fc_elsct_init(struct fc_lport *lport)
 	return 0;
 }
 EXPORT_SYMBOL(fc_elsct_init);
+
+/**
+ * fc_els_resp_type() - return string describing ELS response for debug.
+ * @fp: frame pointer with possible error code.
+ */
+const char *fc_els_resp_type(struct fc_frame *fp)
+{
+	const char *msg;
+	if (IS_ERR(fp)) {
+		switch (-PTR_ERR(fp)) {
+		case FC_NO_ERR:
+			msg = "response no error";
+			break;
+		case FC_EX_TIMEOUT:
+			msg = "response timeout";
+			break;
+		case FC_EX_CLOSED:
+			msg = "response closed";
+			break;
+		default:
+			msg = "response unknown error";
+			break;
+		}
+	} else {
+		switch (fc_frame_payload_op(fp)) {
+		case ELS_LS_ACC:
+			msg = "accept";
+			break;
+		case ELS_LS_RJT:
+			msg = "reject";
+			break;
+		default:
+			msg = "response unknown ELS";
+			break;
+		}
+	}
+	return msg;
+}
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index d3f4e0c34f5c..3f2f72390145 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -1006,13 +1006,13 @@ static void fc_lport_rft_id_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_frame_header *fh;
 	struct fc_ct_hdr *ct;
 
+	FC_LPORT_DBG(lport, "Received a RFT_ID %s\n", fc_els_resp_type(fp));
+
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		return;
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_LPORT_DBG(lport, "Received a RFT_ID response\n");
-
 	if (lport->state != LPORT_ST_RFT_ID) {
 		FC_LPORT_DBG(lport, "Received a RFT_ID response, but in state "
 			     "%s\n", fc_lport_state(lport));
@@ -1060,13 +1060,13 @@ static void fc_lport_rpn_id_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_frame_header *fh;
 	struct fc_ct_hdr *ct;
 
+	FC_LPORT_DBG(lport, "Received a RPN_ID %s\n", fc_els_resp_type(fp));
+
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		return;
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_LPORT_DBG(lport, "Received a RPN_ID response\n");
-
 	if (lport->state != LPORT_ST_RPN_ID) {
 		FC_LPORT_DBG(lport, "Received a RPN_ID response, but in state "
 			     "%s\n", fc_lport_state(lport));
@@ -1112,13 +1112,13 @@ static void fc_lport_scr_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_lport *lport = lp_arg;
 	u8 op;
 
+	FC_LPORT_DBG(lport, "Received a SCR %s\n", fc_els_resp_type(fp));
+
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		return;
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_LPORT_DBG(lport, "Received a SCR response\n");
-
 	if (lport->state != LPORT_ST_SCR) {
 		FC_LPORT_DBG(lport, "Received a SCR response, but in state "
 			     "%s\n", fc_lport_state(lport));
@@ -1333,13 +1333,13 @@ static void fc_lport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 	struct fc_lport *lport = lp_arg;
 	u8 op;
 
+	FC_LPORT_DBG(lport, "Received a LOGO %s\n", fc_els_resp_type(fp));
+
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		return;
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_LPORT_DBG(lport, "Received a LOGO response\n");
-
 	if (lport->state != LPORT_ST_LOGO) {
 		FC_LPORT_DBG(lport, "Received a LOGO response, but in state "
 			     "%s\n", fc_lport_state(lport));
@@ -1415,13 +1415,13 @@ static void fc_lport_flogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 	unsigned int e_d_tov;
 	u16 mfs;
 
+	FC_LPORT_DBG(lport, "Received a FLOGI %s\n", fc_els_resp_type(fp));
+
 	if (fp == ERR_PTR(-FC_EX_CLOSED))
 		return;
 
 	mutex_lock(&lport->lp_mutex);
 
-	FC_LPORT_DBG(lport, "Received a FLOGI response\n");
-
 	if (lport->state != LPORT_ST_FLOGI) {
 		FC_LPORT_DBG(lport, "Received a FLOGI response, but in state "
 			     "%s\n", fc_lport_state(lport));
diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 02200b26d897..d014b285cd1a 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -544,7 +544,7 @@ static void fc_rport_plogi_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rdata, "Received a PLOGI response\n");
+	FC_RPORT_DBG(rdata, "Received a PLOGI %s\n", fc_els_resp_type(fp));
 
 	if (rdata->rp_state != RPORT_ST_PLOGI) {
 		FC_RPORT_DBG(rdata, "Received a PLOGI response, but in state "
@@ -651,7 +651,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rdata, "Received a PRLI response\n");
+	FC_RPORT_DBG(rdata, "Received a PRLI %s\n", fc_els_resp_type(fp));
 
 	if (rdata->rp_state != RPORT_ST_PRLI) {
 		FC_RPORT_DBG(rdata, "Received a PRLI response, but in state "
@@ -717,7 +717,7 @@ static void fc_rport_logo_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rdata, "Received a LOGO response\n");
+	FC_RPORT_DBG(rdata, "Received a LOGO %s\n", fc_els_resp_type(fp));
 
 	if (rdata->rp_state != RPORT_ST_LOGO) {
 		FC_RPORT_DBG(rdata, "Received a LOGO response, but in state "
@@ -801,7 +801,7 @@ static void fc_rport_rtv_resp(struct fc_seq *sp, struct fc_frame *fp,
 
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rdata, "Received a RTV response\n");
+	FC_RPORT_DBG(rdata, "Received a RTV %s\n", fc_els_resp_type(fp));
 
 	if (rdata->rp_state != RPORT_ST_RTV) {
 		FC_RPORT_DBG(rdata, "Received a RTV response, but in state "
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 265f106d9fd6..e18e5ce5af51 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -1084,4 +1084,9 @@ void fc_destroy_exch_mgr(void);
 int fc_setup_rport(void);
 void fc_destroy_rport(void);
 
+/*
+ * Internal libfc functions.
+ */
+const char *fc_els_resp_type(struct fc_frame *);
+
 #endif /* _LIBFC_H_ */
-- 
cgit v1.2.3


From 370c3bd05cf02afabea9cd3f2de66202d6b516dc Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:03:47 -0700
Subject: [SCSI] libfc: use ADISC to verify rport login state

When rport_login is called on an rport that is already thought
to be logged in, use ADISC.  If that fails, redo PLOGI.
This is less disruptive after fabric changes that don't affect
the state of the target.

Implement the sending of ADISC via fc_els_fill.

Add ADISC state to the rport state machine.  This is entered from READY
and returns to READY after successful completion.  If it fails, the rport
is either logged off and deleted or re-does PLOGI.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_rport.c | 122 ++++++++++++++++++++++++++++++++++++++++--
 include/scsi/fc_encode.h      |  21 ++++++++
 include/scsi/libfc.h          |   1 +
 3 files changed, 139 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index b5bc8724e1a0..c33e25851082 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -62,6 +62,7 @@ static void fc_rport_enter_prli(struct fc_rport_priv *);
 static void fc_rport_enter_rtv(struct fc_rport_priv *);
 static void fc_rport_enter_ready(struct fc_rport_priv *);
 static void fc_rport_enter_logo(struct fc_rport_priv *);
+static void fc_rport_enter_adisc(struct fc_rport_priv *);
 
 static void fc_rport_recv_plogi_req(struct fc_lport *,
 				    struct fc_seq *, struct fc_frame *);
@@ -83,6 +84,7 @@ static const char *fc_rport_state_names[] = {
 	[RPORT_ST_RTV] = "RTV",
 	[RPORT_ST_READY] = "Ready",
 	[RPORT_ST_LOGO] = "LOGO",
+	[RPORT_ST_ADISC] = "ADISC",
 	[RPORT_ST_DELETE] = "Delete",
 };
 
@@ -326,15 +328,25 @@ static void fc_rport_work(struct work_struct *work)
  * Locking Note: Called without the rport lock held. This
  * function will hold the rport lock, call an _enter_*
  * function and then unlock the rport.
+ *
+ * This indicates the intent to be logged into the remote port.
+ * If it appears we are already logged in, ADISC is used to verify
+ * the setup.
  */
 int fc_rport_login(struct fc_rport_priv *rdata)
 {
 	mutex_lock(&rdata->rp_mutex);
 
-	FC_RPORT_DBG(rdata, "Login to port\n");
-
-	fc_rport_enter_plogi(rdata);
-
+	switch (rdata->rp_state) {
+	case RPORT_ST_READY:
+		FC_RPORT_DBG(rdata, "ADISC port\n");
+		fc_rport_enter_adisc(rdata);
+		break;
+	default:
+		FC_RPORT_DBG(rdata, "Login to port\n");
+		fc_rport_enter_plogi(rdata);
+		break;
+	}
 	mutex_unlock(&rdata->rp_mutex);
 
 	return 0;
@@ -448,6 +460,9 @@ static void fc_rport_timeout(struct work_struct *work)
 	case RPORT_ST_LOGO:
 		fc_rport_enter_logo(rdata);
 		break;
+	case RPORT_ST_ADISC:
+		fc_rport_enter_adisc(rdata);
+		break;
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 	case RPORT_ST_DELETE:
@@ -473,13 +488,16 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 
 	switch (rdata->rp_state) {
 	case RPORT_ST_PLOGI:
-	case RPORT_ST_PRLI:
 	case RPORT_ST_LOGO:
 		fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
 		break;
 	case RPORT_ST_RTV:
 		fc_rport_enter_ready(rdata);
 		break;
+	case RPORT_ST_PRLI:
+	case RPORT_ST_ADISC:
+		fc_rport_enter_logo(rdata);
+		break;
 	case RPORT_ST_DELETE:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
@@ -906,6 +924,93 @@ static void fc_rport_enter_logo(struct fc_rport_priv *rdata)
 		kref_get(&rdata->kref);
 }
 
+/**
+ * fc_rport_els_adisc_resp() - Address Discovery response handler
+ * @sp: current sequence in the ADISC exchange
+ * @fp: response frame
+ * @rdata_arg: remote port private.
+ *
+ * Locking Note: This function will be called without the rport lock
+ * held, but it will lock, call an _enter_* function or fc_rport_error
+ * and then unlock the rport.
+ */
+static void fc_rport_adisc_resp(struct fc_seq *sp, struct fc_frame *fp,
+			      void *rdata_arg)
+{
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_els_adisc *adisc;
+	u8 op;
+
+	mutex_lock(&rdata->rp_mutex);
+
+	FC_RPORT_DBG(rdata, "Received a ADISC response\n");
+
+	if (rdata->rp_state != RPORT_ST_ADISC) {
+		FC_RPORT_DBG(rdata, "Received a ADISC resp but in state %s\n",
+			     fc_rport_state(rdata));
+		if (IS_ERR(fp))
+			goto err;
+		goto out;
+	}
+
+	if (IS_ERR(fp)) {
+		fc_rport_error(rdata, fp);
+		goto err;
+	}
+
+	/*
+	 * If address verification failed.  Consider us logged out of the rport.
+	 * Since the rport is still in discovery, we want to be
+	 * logged in, so go to PLOGI state.  Otherwise, go back to READY.
+	 */
+	op = fc_frame_payload_op(fp);
+	adisc = fc_frame_payload_get(fp, sizeof(*adisc));
+	if (op != ELS_LS_ACC || !adisc ||
+	    ntoh24(adisc->adisc_port_id) != rdata->ids.port_id ||
+	    get_unaligned_be64(&adisc->adisc_wwpn) != rdata->ids.port_name ||
+	    get_unaligned_be64(&adisc->adisc_wwnn) != rdata->ids.node_name) {
+		FC_RPORT_DBG(rdata, "ADISC error or mismatch\n");
+		fc_rport_enter_plogi(rdata);
+	} else {
+		FC_RPORT_DBG(rdata, "ADISC OK\n");
+		fc_rport_enter_ready(rdata);
+	}
+out:
+	fc_frame_free(fp);
+err:
+	mutex_unlock(&rdata->rp_mutex);
+	kref_put(&rdata->kref, rdata->local_port->tt.rport_destroy);
+}
+
+/**
+ * fc_rport_enter_adisc() - Send Address Discover (ADISC) request to peer
+ * @rdata: remote port private data
+ *
+ * Locking Note: The rport lock is expected to be held before calling
+ * this routine.
+ */
+static void fc_rport_enter_adisc(struct fc_rport_priv *rdata)
+{
+	struct fc_lport *lport = rdata->local_port;
+	struct fc_frame *fp;
+
+	FC_RPORT_DBG(rdata, "sending ADISC from %s state\n",
+		     fc_rport_state(rdata));
+
+	fc_rport_state_enter(rdata, RPORT_ST_ADISC);
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_els_adisc));
+	if (!fp) {
+		fc_rport_error_retry(rdata, fp);
+		return;
+	}
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, ELS_ADISC,
+				  fc_rport_adisc_resp, rdata, lport->e_d_tov))
+		fc_rport_error_retry(rdata, fp);
+	else
+		kref_get(&rdata->kref);
+}
+
 /**
  * fc_rport_recv_els_req() - handle a validated ELS request.
  * @lport: Fibre Channel local port
@@ -943,6 +1048,7 @@ static void fc_rport_recv_els_req(struct fc_lport *lport,
 	case RPORT_ST_PRLI:
 	case RPORT_ST_RTV:
 	case RPORT_ST_READY:
+	case RPORT_ST_ADISC:
 		break;
 	default:
 		mutex_unlock(&rdata->rp_mutex);
@@ -1095,6 +1201,10 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		break;
 	case RPORT_ST_PRLI:
 	case RPORT_ST_READY:
+	case RPORT_ST_ADISC:
+		FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d "
+			     "- ignored for now\n", rdata->rp_state);
+		/* XXX TBD - should reset */
 		break;
 	case RPORT_ST_DELETE:
 	default:
@@ -1178,6 +1288,7 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 	case RPORT_ST_PRLI:
 	case RPORT_ST_RTV:
 	case RPORT_ST_READY:
+	case RPORT_ST_ADISC:
 		reason = ELS_RJT_NONE;
 		break;
 	default:
@@ -1283,6 +1394,7 @@ static void fc_rport_recv_prli_req(struct fc_rport_priv *rdata,
 			fc_rport_enter_ready(rdata);
 			break;
 		case RPORT_ST_READY:
+		case RPORT_ST_ADISC:
 			break;
 		default:
 			break;
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index 24bf764f9884..c5ee6bb79e05 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -56,6 +56,23 @@ static inline void fc_fill_fc_hdr(struct fc_frame *fp, enum fc_rctl r_ctl,
 	fh->fh_parm_offset = htonl(parm_offset);
 }
 
+/**
+ * fc_adisc_fill() - Fill in adisc request frame
+ * @lport: local port.
+ * @fp: fc frame where payload will be placed.
+ */
+static inline void fc_adisc_fill(struct fc_lport *lport, struct fc_frame *fp)
+{
+	struct fc_els_adisc *adisc;
+
+	adisc = fc_frame_payload_get(fp, sizeof(*adisc));
+	memset(adisc, 0, sizeof(*adisc));
+	adisc->adisc_cmd = ELS_ADISC;
+	put_unaligned_be64(lport->wwpn, &adisc->adisc_wwpn);
+	put_unaligned_be64(lport->wwnn, &adisc->adisc_wwnn);
+	hton24(adisc->adisc_port_id, fc_host_port_id(lport->host));
+}
+
 /**
  * fc_ct_hdr_fill- fills ct header and reset ct payload
  * returns pointer to ct request.
@@ -255,6 +272,10 @@ static inline int fc_els_fill(struct fc_lport *lport,
 		       enum fc_rctl *r_ctl, enum fc_fh_type *fh_type)
 {
 	switch (op) {
+	case ELS_ADISC:
+		fc_adisc_fill(lport, fp);
+		break;
+
 	case ELS_PLOGI:
 		fc_plogi_fill(lport, fp, ELS_PLOGI);
 		break;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index e18e5ce5af51..65dc9aacbf70 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -143,6 +143,7 @@ enum fc_rport_state {
 	RPORT_ST_RTV,		/* waiting for RTV completion */
 	RPORT_ST_READY,		/* ready for use */
 	RPORT_ST_LOGO,		/* port logout sent */
+	RPORT_ST_ADISC,		/* Discover Address sent */
 	RPORT_ST_DELETE,	/* port being deleted */
 };
 
-- 
cgit v1.2.3


From 2ab7e1ecb81ce35ed8e8df512e3fc6338a4c55bb Mon Sep 17 00:00:00 2001
From: Joe Eykholt <jeykholt@cisco.com>
Date: Tue, 25 Aug 2009 14:03:58 -0700
Subject: [SCSI] libfc: send GPN_ID in reaction to single-port RSCNs.

When an RSCN indicates changes to individual remote ports,
don't blindly log them out and then back in.  Instead, determine
whether they're still in the directory, by doing GPN_ID.

If that is successful, call login, which will send ADISC and reverify,
otherwise, call logoff.  Perhaps we should just delete the rport,
not send LOGO, but it seems safer.

Also, fix a possible issue where if a mix of records in the RSCN
cause us to queue disc_ports for disc_single and then we decide
to do full rediscovery, we leak memory for those disc_ports queued.

So, go through the list of disc_ports even if doing full discovery.
Free the disc_ports in any case.  If any of the disc_single() calls
return error, do a full discovery.

The ability to fill in GPN_ID requests was added to fc_ct_fill().
For this, it needs the FC_ID to be passed in as an arg.
The did parameter for fc_elsct_send() is used for that, since the
actual D_DID will always be 0xfffffc for all CT requests so far.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libfc/fc_disc.c  | 137 ++++++++++++++++++++++++++++++++++--------
 drivers/scsi/libfc/fc_elsct.c |   2 +-
 include/scsi/fc_encode.h      |  16 ++++-
 3 files changed, 128 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 4242894cce7c..c48799e9dd8e 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -47,7 +47,7 @@ static void fc_disc_gpn_ft_req(struct fc_disc *);
 static void fc_disc_gpn_ft_resp(struct fc_seq *, struct fc_frame *, void *);
 static void fc_disc_done(struct fc_disc *, enum fc_disc_event);
 static void fc_disc_timeout(struct work_struct *);
-static void fc_disc_single(struct fc_disc *, struct fc_disc_port *);
+static int fc_disc_single(struct fc_lport *, struct fc_disc_port *);
 static void fc_disc_restart(struct fc_disc *);
 
 /**
@@ -83,7 +83,6 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 				  struct fc_disc *disc)
 {
 	struct fc_lport *lport;
-	struct fc_rport_priv *rdata;
 	struct fc_els_rscn *rp;
 	struct fc_els_rscn_page *pp;
 	struct fc_seq_els_data rjt_data;
@@ -150,6 +149,19 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 		}
 	}
 	lport->tt.seq_els_rsp_send(sp, ELS_LS_ACC, NULL);
+
+	/*
+	 * If not doing a complete rediscovery, do GPN_ID on
+	 * the individual ports mentioned in the list.
+	 * If any of these get an error, do a full rediscovery.
+	 * In any case, go through the list and free the entries.
+	 */
+	list_for_each_entry_safe(dp, next, &disc_ports, peers) {
+		list_del(&dp->peers);
+		if (!redisc)
+			redisc = fc_disc_single(lport, dp);
+		kfree(dp);
+	}
 	if (redisc) {
 		FC_DISC_DBG(disc, "RSCN received: rediscovering\n");
 		fc_disc_restart(disc);
@@ -157,14 +169,6 @@ static void fc_disc_recv_rscn_req(struct fc_seq *sp, struct fc_frame *fp,
 		FC_DISC_DBG(disc, "RSCN received: not rediscovering. "
 			    "redisc %d state %d in_prog %d\n",
 			    redisc, lport->state, disc->pending);
-		list_for_each_entry_safe(dp, next, &disc_ports, peers) {
-			list_del(&dp->peers);
-			rdata = lport->tt.rport_lookup(lport, dp->port_id);
-			if (rdata) {
-				lport->tt.rport_logoff(rdata);
-			}
-			fc_disc_single(disc, dp);
-		}
 	}
 	fc_frame_free(fp);
 	return;
@@ -562,32 +566,117 @@ static void fc_disc_gpn_ft_resp(struct fc_seq *sp, struct fc_frame *fp,
 }
 
 /**
- * fc_disc_single() - Discover the directory information for a single target
- * @lport: FC local port
- * @dp: The port to rediscover
+ * fc_disc_gpn_id_resp() - Handle a response frame from Get Port Names (GPN_ID)
+ * @sp: exchange sequence
+ * @fp: response frame
+ * @rdata_arg: remote port private data
  *
- * Locking Note: This function expects that the disc_mutex is locked
- *		 before it is called.
+ * Locking Note: This function is called without disc mutex held.
  */
-static void fc_disc_single(struct fc_disc *disc, struct fc_disc_port *dp)
+static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp,
+				void *rdata_arg)
 {
+	struct fc_rport_priv *rdata = rdata_arg;
+	struct fc_rport_priv *new_rdata;
 	struct fc_lport *lport;
-	struct fc_rport_priv *rdata;
+	struct fc_disc *disc;
+	struct fc_ct_hdr *cp;
+	struct fc_ns_gid_pn *pn;
+	u64 port_name;
 
-	lport = disc->lport;
+	lport = rdata->local_port;
+	disc = &lport->disc;
 
-	if (dp->port_id == fc_host_port_id(lport->host))
+	mutex_lock(&disc->disc_mutex);
+	if (PTR_ERR(fp) == -FC_EX_CLOSED)
 		goto out;
+	if (IS_ERR(fp))
+		goto redisc;
+
+	cp = fc_frame_payload_get(fp, sizeof(*cp));
+	if (!cp)
+		goto redisc;
+	if (ntohs(cp->ct_cmd) == FC_FS_ACC) {
+		if (fr_len(fp) < sizeof(struct fc_frame_header) +
+		    sizeof(*cp) + sizeof(*pn))
+			goto redisc;
+		pn = (struct fc_ns_gid_pn *)(cp + 1);
+		port_name = get_unaligned_be64(&pn->fn_wwpn);
+		if (rdata->ids.port_name == -1)
+			rdata->ids.port_name = port_name;
+		else if (rdata->ids.port_name != port_name) {
+			FC_DISC_DBG(disc, "GPN_ID accepted.  WWPN changed. "
+				    "Port-id %x wwpn %llx\n",
+				    rdata->ids.port_id, port_name);
+			lport->tt.rport_logoff(rdata);
 
-	rdata = lport->tt.rport_create(lport, dp->port_id);
-	if (rdata) {
+			new_rdata = lport->tt.rport_create(lport,
+							   rdata->ids.port_id);
+			if (new_rdata) {
+				new_rdata->disc_id = disc->disc_id;
+				lport->tt.rport_login(new_rdata);
+			}
+			goto out;
+		}
 		rdata->disc_id = disc->disc_id;
-		kfree(dp);
 		lport->tt.rport_login(rdata);
+	} else if (ntohs(cp->ct_cmd) == FC_FS_RJT) {
+		FC_DISC_DBG(disc, "GPN_ID rejected reason %x exp %x\n",
+			    cp->ct_reason, cp->ct_explan);
+		lport->tt.rport_logoff(rdata);
+	} else {
+		FC_DISC_DBG(disc, "GPN_ID unexpected response code %x\n",
+			    ntohs(cp->ct_cmd));
+redisc:
+		fc_disc_restart(disc);
 	}
-	return;
 out:
-	kfree(dp);
+	mutex_unlock(&disc->disc_mutex);
+	kref_put(&rdata->kref, lport->tt.rport_destroy);
+}
+
+/**
+ * fc_disc_gpn_id_req() - Send Get Port Names by ID (GPN_ID) request
+ * @lport: local port
+ * @rdata: remote port private data
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ * On failure, an error code is returned.
+ */
+static int fc_disc_gpn_id_req(struct fc_lport *lport,
+			      struct fc_rport_priv *rdata)
+{
+	struct fc_frame *fp;
+
+	fp = fc_frame_alloc(lport, sizeof(struct fc_ct_hdr) +
+			    sizeof(struct fc_ns_fid));
+	if (!fp)
+		return -ENOMEM;
+	if (!lport->tt.elsct_send(lport, rdata->ids.port_id, fp, FC_NS_GPN_ID,
+				 fc_disc_gpn_id_resp, rdata, lport->e_d_tov))
+		return -ENOMEM;
+	kref_get(&rdata->kref);
+	return 0;
+}
+
+/**
+ * fc_disc_single() - Discover the directory information for a single target
+ * @lport: local port
+ * @dp: The port to rediscover
+ *
+ * Locking Note: This function expects that the disc_mutex is locked
+ *		 before it is called.
+ */
+static int fc_disc_single(struct fc_lport *lport, struct fc_disc_port *dp)
+{
+	struct fc_rport_priv *rdata;
+
+	rdata = lport->tt.rport_create(lport, dp->port_id);
+	if (!rdata)
+		return -ENOMEM;
+	rdata->disc_id = 0;
+	return fc_disc_gpn_id_req(lport, rdata);
 }
 
 /**
diff --git a/drivers/scsi/libfc/fc_elsct.c b/drivers/scsi/libfc/fc_elsct.c
index d655924d46b6..5cfa68732e9d 100644
--- a/drivers/scsi/libfc/fc_elsct.c
+++ b/drivers/scsi/libfc/fc_elsct.c
@@ -49,7 +49,7 @@ static struct fc_seq *fc_elsct_send(struct fc_lport *lport,
 		rc = fc_els_fill(lport, did, fp, op, &r_ctl, &fh_type);
 	else {
 		/* CT requests */
-		rc = fc_ct_fill(lport, fp, op, &r_ctl, &fh_type);
+		rc = fc_ct_fill(lport, did, fp, op, &r_ctl, &fh_type);
 		did = FC_FID_DIR_SERV;
 	}
 
diff --git a/include/scsi/fc_encode.h b/include/scsi/fc_encode.h
index c5ee6bb79e05..27dad703824f 100644
--- a/include/scsi/fc_encode.h
+++ b/include/scsi/fc_encode.h
@@ -32,6 +32,7 @@ struct fc_ct_req {
 		struct fc_ns_gid_ft gid;
 		struct fc_ns_rn_id  rn;
 		struct fc_ns_rft rft;
+		struct fc_ns_fid fid;
 	} payload;
 };
 
@@ -94,10 +95,16 @@ static inline struct fc_ct_req *fc_ct_hdr_fill(const struct fc_frame *fp,
 }
 
 /**
- * fc_ct_fill - Fill in a name service request frame
+ * fc_ct_fill() - Fill in a name service request frame
+ * @lport: local port.
+ * @fc_id: FC_ID of non-destination rport for GPN_ID and similar inquiries.
+ * @fp: frame to contain payload.
+ * @op: CT opcode.
+ * @r_ctl: pointer to FC header R_CTL.
+ * @fh_type: pointer to FC-4 type.
  */
 static inline int fc_ct_fill(struct fc_lport *lport,
-		      struct fc_frame *fp,
+		      u32 fc_id, struct fc_frame *fp,
 		      unsigned int op, enum fc_rctl *r_ctl,
 		      enum fc_fh_type *fh_type)
 {
@@ -109,6 +116,11 @@ static inline int fc_ct_fill(struct fc_lport *lport,
 		ct->payload.gid.fn_fc4_type = FC_TYPE_FCP;
 		break;
 
+	case FC_NS_GPN_ID:
+		ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_fid));
+		hton24(ct->payload.fid.fp_fid, fc_id);
+		break;
+
 	case FC_NS_RFT_ID:
 		ct = fc_ct_hdr_fill(fp, op, sizeof(struct fc_ns_rft));
 		hton24(ct->payload.rft.fid.fp_fid,
-- 
cgit v1.2.3


From 7e12715ecc47a8a59154afe2746e48998225bb69 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 10 Sep 2009 15:28:02 -0700
Subject: ACPI button: provide lid status functions

Some drivers need to know when a lid event occurs and get the current
status.  This can be useful for when a platform firmware clobbers some
hardware state at lid time, and a driver needs to restore things when
the lid is opened again.

Acked-by: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/acpi/button.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
 include/acpi/button.h | 10 ++++++++++
 2 files changed, 53 insertions(+), 2 deletions(-)
 create mode 100644 include/acpi/button.h

(limited to 'include')

diff --git a/drivers/acpi/button.c b/drivers/acpi/button.c
index 9195deba9d94..ebb593e9c380 100644
--- a/drivers/acpi/button.c
+++ b/drivers/acpi/button.c
@@ -113,6 +113,9 @@ static const struct file_operations acpi_button_state_fops = {
 	.release = single_release,
 };
 
+static BLOCKING_NOTIFIER_HEAD(acpi_lid_notifier);
+static struct acpi_device *lid_device;
+
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -229,11 +232,38 @@ static int acpi_button_remove_fs(struct acpi_device *device)
 /* --------------------------------------------------------------------------
                                 Driver Interface
    -------------------------------------------------------------------------- */
+int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_register);
+
+int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&acpi_lid_notifier, nb);
+}
+EXPORT_SYMBOL(acpi_lid_notifier_unregister);
+
+int acpi_lid_open(void)
+{
+	acpi_status status;
+	unsigned long long state;
+
+	status = acpi_evaluate_integer(lid_device->handle, "_LID", NULL,
+				       &state);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	return !!state;
+}
+EXPORT_SYMBOL(acpi_lid_open);
+
 static int acpi_lid_send_state(struct acpi_device *device)
 {
 	struct acpi_button *button = acpi_driver_data(device);
 	unsigned long long state;
 	acpi_status status;
+	int ret;
 
 	status = acpi_evaluate_integer(device->handle, "_LID", NULL, &state);
 	if (ACPI_FAILURE(status))
@@ -242,7 +272,12 @@ static int acpi_lid_send_state(struct acpi_device *device)
 	/* input layer checks if event is redundant */
 	input_report_switch(button->input, SW_LID, !state);
 	input_sync(button->input);
-	return 0;
+
+	ret = blocking_notifier_call_chain(&acpi_lid_notifier, state, device);
+	if (ret == NOTIFY_DONE)
+		ret = blocking_notifier_call_chain(&acpi_lid_notifier, state,
+						   device);
+	return ret;
 }
 
 static void acpi_button_notify(struct acpi_device *device, u32 event)
@@ -364,8 +399,14 @@ static int acpi_button_add(struct acpi_device *device)
 	error = input_register_device(input);
 	if (error)
 		goto err_remove_fs;
-	if (button->type == ACPI_BUTTON_TYPE_LID)
+	if (button->type == ACPI_BUTTON_TYPE_LID) {
 		acpi_lid_send_state(device);
+		/*
+		 * This assumes there's only one lid device, or if there are
+		 * more we only care about the last one...
+		 */
+		lid_device = device;
+	}
 
 	if (device->wakeup.flags.valid) {
 		/* Button's GPE is run-wake GPE */
diff --git a/include/acpi/button.h b/include/acpi/button.h
new file mode 100644
index 000000000000..bb643a79d651
--- /dev/null
+++ b/include/acpi/button.h
@@ -0,0 +1,10 @@
+#ifndef ACPI_BUTTON_H
+#define ACPI_BUTTON_H
+
+#include <linux/notifier.h>
+
+extern int acpi_lid_notifier_register(struct notifier_block *nb);
+extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
+extern int acpi_lid_open(void);
+
+#endif /* ACPI_BUTTON_H */
-- 
cgit v1.2.3


From e2dd90b1ad4c61ecb52f2424049d91ce6ccc1f17 Mon Sep 17 00:00:00 2001
From: Shane Huang <shane.huang@amd.com>
Date: Wed, 29 Jul 2009 11:34:49 +0800
Subject: ahci: Add AMD SB900 SATA/IDE controller device IDs

Add AMD SB900 SATA/IDE controller device IDs.

Signed-off-by: Shane Huang <shane.huang@amd.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/ahci.c        | 6 ++++++
 drivers/ata/pata_atiixp.c | 1 +
 drivers/ide/atiixp.c      | 1 +
 drivers/pci/quirks.c      | 4 +++-
 include/linux/pci_ids.h   | 3 +++
 5 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index a4e4cf141c03..5fb9c3a1f404 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -553,6 +553,12 @@ static const struct pci_device_id ahci_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, 0x4394), board_ahci_sb700 }, /* ATI SB700/800 */
 	{ PCI_VDEVICE(ATI, 0x4395), board_ahci_sb700 }, /* ATI SB700/800 */
 
+	/* AMD */
+	{ PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD SB900 */
+	/* AMD is using RAID class only for ahci controllers */
+	{ PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
+	  PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci },
+
 	/* VIA */
 	{ PCI_VDEVICE(VIA, 0x3349), board_ahci_vt8251 }, /* VIA VT8251 */
 	{ PCI_VDEVICE(VIA, 0x6287), board_ahci_vt8251 }, /* VIA VT8251 */
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index 45915566e4e9..aa4b3f6ae771 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -246,6 +246,7 @@ static const struct pci_device_id atiixp[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), },
 
 	{ },
 };
diff --git a/drivers/ide/atiixp.c b/drivers/ide/atiixp.c
index 923cbfe259d3..6396c3ad3252 100644
--- a/drivers/ide/atiixp.c
+++ b/drivers/ide/atiixp.c
@@ -177,6 +177,7 @@ static const struct pci_device_id atiixp_pci_tbl[] = {
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP400_IDE), 0 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP600_IDE), 1 },
 	{ PCI_VDEVICE(ATI, PCI_DEVICE_ID_ATI_IXP700_IDE), 0 },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_SB900_IDE), 0 },
 	{ 0, },
 };
 MODULE_DEVICE_TABLE(pci, atiixp_pci_tbl);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 06b965623962..85ce23997be4 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -992,7 +992,7 @@ DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_INTEL,	PCI_DEVICE_ID_INTEL_82454NX,
 
 static void __devinit quirk_amd_ide_mode(struct pci_dev *pdev)
 {
-	/* set sb600/sb700/sb800 sata to ahci mode */
+	/* set SBX00 SATA in IDE mode to AHCI mode */
 	u8 tmp;
 
 	pci_read_config_byte(pdev, PCI_CLASS_DEVICE, &tmp);
@@ -1011,6 +1011,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP600_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
 DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP700_SATA, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
+DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SATA_IDE, quirk_amd_ide_mode);
 
 /*
  *	Serverworks CSB5 IDE does not fully support native mode
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 73b46b6b904f..900f40bdbb17 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -376,6 +376,9 @@
 #define PCI_DEVICE_ID_ATI_IXP600_IDE	0x438c
 #define PCI_DEVICE_ID_ATI_IXP700_SATA	0x4390
 #define PCI_DEVICE_ID_ATI_IXP700_IDE	0x439c
+/* AMD SB Chipset */
+#define PCI_DEVICE_ID_AMD_SB900_IDE	 0x780c
+#define PCI_DEVICE_ID_AMD_SB900_SATA_IDE 0x7800
 
 #define PCI_VENDOR_ID_VLSI		0x1004
 #define PCI_DEVICE_ID_VLSI_82C592	0x0005
-- 
cgit v1.2.3


From 02cb009bb942007b76c38da4cc2ca0a0a974c667 Mon Sep 17 00:00:00 2001
From: Otavio Salvador <otavio@ossystems.com.br>
Date: Fri, 3 Jul 2009 11:22:42 -0300
Subject: pata_cs5535: add pci id for AMD based CS5535 controllers

Signed-off-by: Otavio Salvador <otavio@ossystems.com.br>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/pata_cs5535.c | 3 ++-
 include/linux/pci_ids.h   | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c
index d33aa28239a9..403f56165cec 100644
--- a/drivers/ata/pata_cs5535.c
+++ b/drivers/ata/pata_cs5535.c
@@ -202,7 +202,8 @@ static int cs5535_init_one(struct pci_dev *dev, const struct pci_device_id *id)
 }
 
 static const struct pci_device_id cs5535[] = {
-	{ PCI_VDEVICE(NS, 0x002D), },
+	{ PCI_VDEVICE(NS, PCI_DEVICE_ID_NS_CS5535_IDE), },
+	{ PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5535_IDE), },
 
 	{ },
 };
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 900f40bdbb17..c8fdcadce437 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -540,6 +540,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
+#define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
 #define PCI_DEVICE_ID_AMD_CS5536_AUDIO  0x2093
-- 
cgit v1.2.3


From d8a8559cd7a9ccac98d5f6f13297a2ff68a43627 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 2 Sep 2009 12:34:32 +0200
Subject: writeback: get rid of generic_sync_sb_inodes() export

This adds two new exported functions:

- writeback_inodes_sb(), which only attempts to writeback dirty inodes on
  this super_block, for WB_SYNC_NONE writeout.
- sync_inodes_sb(), which writes out all dirty inodes on this super_block
  and also waits for the IO to complete.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/staging/pohmelfs/inode.c |  9 +-----
 fs/fs-writeback.c                | 70 ++++++++++++++++++++++++----------------
 fs/sync.c                        | 18 ++++++-----
 fs/ubifs/budget.c                | 16 ++-------
 fs/ubifs/super.c                 |  8 +----
 include/linux/fs.h               |  2 --
 include/linux/writeback.h        |  3 +-
 7 files changed, 58 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c
index 7b605795b770..e63c9bea6c54 100644
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -1950,14 +1950,7 @@ static int pohmelfs_get_sb(struct file_system_type *fs_type,
  */
 static void pohmelfs_kill_super(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_ALL,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-		.nr_to_write	= LONG_MAX,
-	};
-	generic_sync_sb_inodes(sb, &wbc);
-
+	sync_inodes_sb(sb);
 	kill_anon_super(sb);
 }
 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c54226be5294..271e5f44e871 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -458,8 +458,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
  * on the writer throttling path, and we get decent balancing between many
  * throttled threads: we don't want them all piling up on inode_sync_wait.
  */
-void generic_sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc)
+static void generic_sync_sb_inodes(struct super_block *sb,
+				   struct writeback_control *wbc)
 {
 	const unsigned long start = jiffies;	/* livelock avoidance */
 	int sync = wbc->sync_mode == WB_SYNC_ALL;
@@ -593,13 +593,6 @@ void generic_sync_sb_inodes(struct super_block *sb,
 
 	return;		/* Leave any unwritten inodes on s_io */
 }
-EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
-
-static void sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc)
-{
-	generic_sync_sb_inodes(sb, wbc);
-}
 
 /*
  * Start writeback of dirty pagecache data against all unlocked inodes.
@@ -640,7 +633,7 @@ restart:
 			 */
 			if (down_read_trylock(&sb->s_umount)) {
 				if (sb->s_root)
-					sync_sb_inodes(sb, wbc);
+					generic_sync_sb_inodes(sb, wbc);
 				up_read(&sb->s_umount);
 			}
 			spin_lock(&sb_lock);
@@ -653,35 +646,56 @@ restart:
 	spin_unlock(&sb_lock);
 }
 
-/*
- * writeback and wait upon the filesystem's dirty inodes.  The caller will
- * do this in two passes - one to write, and one to wait.
- *
- * A finite limit is set on the number of pages which will be written.
- * To prevent infinite livelock of sys_sync().
+/**
+ * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * @sb: the superblock
  *
- * We add in the number of potentially dirty inodes, because each inode write
- * can dirty pagecache in the underlying blockdev.
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO. The number of pages submitted is
+ * returned.
  */
-void sync_inodes_sb(struct super_block *sb, int wait)
+long writeback_inodes_sb(struct super_block *sb)
 {
 	struct writeback_control wbc = {
-		.sync_mode	= wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+		.sync_mode	= WB_SYNC_NONE,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
 	};
+	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
+	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
+	long nr_to_write;
 
-	if (!wait) {
-		unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-		unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-
-		wbc.nr_to_write = nr_dirty + nr_unstable +
+	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-	} else
-		wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */
 
-	sync_sb_inodes(sb, &wbc);
+	wbc.nr_to_write = nr_to_write;
+	generic_sync_sb_inodes(sb, &wbc);
+	return nr_to_write - wbc.nr_to_write;
+}
+EXPORT_SYMBOL(writeback_inodes_sb);
+
+/**
+ * sync_inodes_sb	-	sync sb inode pages
+ * @sb: the superblock
+ *
+ * This function writes and waits on any dirty inode belonging to this
+ * super_block. The number of pages synced is returned.
+ */
+long sync_inodes_sb(struct super_block *sb)
+{
+	struct writeback_control wbc = {
+		.sync_mode	= WB_SYNC_ALL,
+		.range_start	= 0,
+		.range_end	= LLONG_MAX,
+	};
+	long nr_to_write = LONG_MAX; /* doesn't actually matter */
+
+	wbc.nr_to_write = nr_to_write;
+	generic_sync_sb_inodes(sb, &wbc);
+	return nr_to_write - wbc.nr_to_write;
 }
+EXPORT_SYMBOL(sync_inodes_sb);
 
 /**
  * write_inode_now	-	write an inode to disk
diff --git a/fs/sync.c b/fs/sync.c
index 3422ba61d86d..66f210476f40 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -19,20 +19,22 @@
 			SYNC_FILE_RANGE_WAIT_AFTER)
 
 /*
- * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
- * just dirties buffers with inodes so we have to submit IO for these buffers
- * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
- * case write_inode() functions do sync_dirty_buffer() and thus effectively
- * write one block at a time.
+ * Do the filesystem syncing work. For simple filesystems
+ * writeback_inodes_sb(sb) just dirties buffers with inodes so we have to
+ * submit IO for these buffers via __sync_blockdev(). This also speeds up the
+ * wait == 1 case since in that case write_inode() functions do
+ * sync_dirty_buffer() and thus effectively write one block at a time.
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
 	/* Avoid doing twice syncing and cache pruning for quota sync */
-	if (!wait)
+	if (!wait) {
 		writeout_quota_sb(sb, -1);
-	else
+		writeback_inodes_sb(sb);
+	} else {
 		sync_quota_sb(sb, -1);
-	sync_inodes_sb(sb, wait);
+		sync_inodes_sb(sb);
+	}
 	if (sb->s_op->sync_fs)
 		sb->s_op->sync_fs(sb, wait);
 	return __sync_blockdev(sb->s_bdev, wait);
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d891d46f..1c8991b0db13 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -65,26 +65,14 @@
 static int shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
 	int nr_written;
-	struct writeback_control wbc = {
-		.sync_mode   = WB_SYNC_NONE,
-		.range_end   = LLONG_MAX,
-		.nr_to_write = nr_to_write,
-	};
-
-	generic_sync_sb_inodes(c->vfs_sb, &wbc);
-	nr_written = nr_to_write - wbc.nr_to_write;
 
+	nr_written = writeback_inodes_sb(c->vfs_sb);
 	if (!nr_written) {
 		/*
 		 * Re-try again but wait on pages/inodes which are being
 		 * written-back concurrently (e.g., by pdflush).
 		 */
-		memset(&wbc, 0, sizeof(struct writeback_control));
-		wbc.sync_mode   = WB_SYNC_ALL;
-		wbc.range_end   = LLONG_MAX;
-		wbc.nr_to_write = nr_to_write;
-		generic_sync_sb_inodes(c->vfs_sb, &wbc);
-		nr_written = nr_to_write - wbc.nr_to_write;
+		nr_written = sync_inodes_sb(c->vfs_sb);
 	}
 
 	dbg_budg("%d pages were written back", nr_written);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 26d2e0d80465..8d6050a5966c 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -438,12 +438,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 {
 	int i, err;
 	struct ubifs_info *c = sb->s_fs_info;
-	struct writeback_control wbc = {
-		.sync_mode   = WB_SYNC_ALL,
-		.range_start = 0,
-		.range_end   = LLONG_MAX,
-		.nr_to_write = LONG_MAX,
-	};
 
 	/*
 	 * Zero @wait is just an advisory thing to help the file system shove
@@ -462,7 +456,7 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
 	 * the user be able to get more accurate results of 'statfs()' after
 	 * they synchronize the file system.
 	 */
-	generic_sync_sb_inodes(sb, &wbc);
+	sync_inodes_sb(sb);
 
 	/*
 	 * Synchronize write buffers, because 'ubifs_run_commit()' does not
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1f993515f51..46ff7dd6e164 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2071,8 +2071,6 @@ static inline void invalidate_remote_inode(struct inode *inode)
 extern int invalidate_inode_pages2(struct address_space *mapping);
 extern int invalidate_inode_pages2_range(struct address_space *mapping,
 					 pgoff_t start, pgoff_t end);
-extern void generic_sync_sb_inodes(struct super_block *sb,
-				struct writeback_control *wbc);
 extern int write_inode_now(struct inode *, int);
 extern int filemap_fdatawrite(struct address_space *);
 extern int filemap_flush(struct address_space *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3224820c8514..07039299603d 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -78,7 +78,8 @@ struct writeback_control {
  */	
 void writeback_inodes(struct writeback_control *wbc);
 int inode_wait(void *);
-void sync_inodes_sb(struct super_block *, int wait);
+long writeback_inodes_sb(struct super_block *);
+long sync_inodes_sb(struct super_block *);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
-- 
cgit v1.2.3


From 66f3b8e2e103a0b93b945764d98e9ba46cb926dd Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 2 Sep 2009 09:19:46 +0200
Subject: writeback: move dirty inodes from super_block to backing_dev_info

This is a first step at introducing per-bdi flusher threads. We should
have no change in behaviour, although sb_has_dirty_inodes() is now
ridiculously expensive, as there's no easy way to answer that question.
Not a huge problem, since it'll be deleted in subsequent patches.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 197 ++++++++++++++++++++++++++++----------------
 fs/super.c                  |   3 -
 include/linux/backing-dev.h |   9 ++
 include/linux/fs.h          |   5 +-
 mm/backing-dev.c            |  24 ++++++
 mm/page-writeback.c         |  11 +--
 6 files changed, 165 insertions(+), 84 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 271e5f44e871..45ad4bb700e6 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -25,6 +25,7 @@
 #include <linux/buffer_head.h>
 #include "internal.h"
 
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
 
 /**
  * writeback_acquire - attempt to get exclusive writeback access to a device
@@ -165,12 +166,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			goto out;
 
 		/*
-		 * If the inode was already on s_dirty/s_io/s_more_io, don't
-		 * reposition it (that would break s_dirty time-ordering).
+		 * If the inode was already on b_dirty/b_io/b_more_io, don't
+		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &sb->s_dirty);
+			list_move(&inode->i_list,
+					&inode_to_bdi(inode)->b_dirty);
 		}
 	}
 out:
@@ -191,31 +193,30 @@ static int write_inode(struct inode *inode, int sync)
  * furthest end of its superblock's dirty-inode list.
  *
  * Before stamping the inode's ->dirtied_when, we check to see whether it is
- * already the most-recently-dirtied inode on the s_dirty list.  If that is
+ * already the most-recently-dirtied inode on the b_dirty list.  If that is
  * the case then the inode must have been redirtied while it was being written
  * out and we don't reset its dirtied_when.
  */
 static void redirty_tail(struct inode *inode)
 {
-	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = inode_to_bdi(inode);
 
-	if (!list_empty(&sb->s_dirty)) {
-		struct inode *tail_inode;
+	if (!list_empty(&bdi->b_dirty)) {
+		struct inode *tail;
 
-		tail_inode = list_entry(sb->s_dirty.next, struct inode, i_list);
-		if (time_before(inode->dirtied_when,
-				tail_inode->dirtied_when))
+		tail = list_entry(bdi->b_dirty.next, struct inode, i_list);
+		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &sb->s_dirty);
+	list_move(&inode->i_list, &bdi->b_dirty);
 }
 
 /*
- * requeue inode for re-scanning after sb->s_io list is exhausted.
+ * requeue inode for re-scanning after bdi->b_io list is exhausted.
  */
 static void requeue_io(struct inode *inode)
 {
-	list_move(&inode->i_list, &inode->i_sb->s_more_io);
+	list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -262,18 +263,50 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 /*
  * Queue all expired dirty inodes for io, eldest first.
  */
-static void queue_io(struct super_block *sb,
-				unsigned long *older_than_this)
+static void queue_io(struct backing_dev_info *bdi,
+		     unsigned long *older_than_this)
+{
+	list_splice_init(&bdi->b_more_io, bdi->b_io.prev);
+	move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this);
+}
+
+static int sb_on_inode_list(struct super_block *sb, struct list_head *list)
 {
-	list_splice_init(&sb->s_more_io, sb->s_io.prev);
-	move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
+	struct inode *inode;
+	int ret = 0;
+
+	spin_lock(&inode_lock);
+	list_for_each_entry(inode, list, i_list) {
+		if (inode->i_sb == sb) {
+			ret = 1;
+			break;
+		}
+	}
+	spin_unlock(&inode_lock);
+	return ret;
 }
 
 int sb_has_dirty_inodes(struct super_block *sb)
 {
-	return !list_empty(&sb->s_dirty) ||
-	       !list_empty(&sb->s_io) ||
-	       !list_empty(&sb->s_more_io);
+	struct backing_dev_info *bdi;
+	int ret = 0;
+
+	/*
+	 * This is REALLY expensive right now, but it'll go away
+	 * when the bdi writeback is introduced
+	 */
+	mutex_lock(&bdi_lock);
+	list_for_each_entry(bdi, &bdi_list, bdi_list) {
+		if (sb_on_inode_list(sb, &bdi->b_dirty) ||
+		    sb_on_inode_list(sb, &bdi->b_io) ||
+		    sb_on_inode_list(sb, &bdi->b_more_io)) {
+			ret = 1;
+			break;
+		}
+	}
+	mutex_unlock(&bdi_lock);
+
+	return ret;
 }
 EXPORT_SYMBOL(sb_has_dirty_inodes);
 
@@ -322,11 +355,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	if (inode->i_state & I_SYNC) {
 		/*
 		 * If this inode is locked for writeback and we are not doing
-		 * writeback-for-data-integrity, move it to s_more_io so that
+		 * writeback-for-data-integrity, move it to b_more_io so that
 		 * writeback can proceed with the other inodes on s_io.
 		 *
 		 * We'll have another go at writing back this inode when we
-		 * completed a full scan of s_io.
+		 * completed a full scan of b_io.
 		 */
 		if (!wait) {
 			requeue_io(inode);
@@ -371,11 +404,11 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			/*
 			 * We didn't write back all the pages.  nfs_writepages()
 			 * sometimes bales out without doing anything. Redirty
-			 * the inode; Move it from s_io onto s_more_io/s_dirty.
+			 * the inode; Move it from b_io onto b_more_io/b_dirty.
 			 */
 			/*
 			 * akpm: if the caller was the kupdate function we put
-			 * this inode at the head of s_dirty so it gets first
+			 * this inode at the head of b_dirty so it gets first
 			 * consideration.  Otherwise, move it to the tail, for
 			 * the reasons described there.  I'm not really sure
 			 * how much sense this makes.  Presumably I had a good
@@ -385,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			if (wbc->for_kupdate) {
 				/*
 				 * For the kupdate function we move the inode
-				 * to s_more_io so it will get more writeout as
+				 * to b_more_io so it will get more writeout as
 				 * soon as the queue becomes uncongested.
 				 */
 				inode->i_state |= I_DIRTY_PAGES;
@@ -433,51 +466,34 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 
-/*
- * Write out a superblock's list of dirty inodes.  A wait will be performed
- * upon no inodes, all inodes or the final one, depending upon sync_mode.
- *
- * If older_than_this is non-NULL, then only write out inodes which
- * had their first dirtying at a time earlier than *older_than_this.
- *
- * If we're a pdflush thread, then implement pdflush collision avoidance
- * against the entire list.
- *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched.  For other superblocks,
- * assume that all inodes are backed by the same queue.
- *
- * FIXME: this linear search could get expensive with many fileystems.  But
- * how to fix?  We need to go from an address_space to all inodes which share
- * a queue with that address_space.  (Easy: have a global "dirty superblocks"
- * list).
- *
- * The inodes to be written are parked on sb->s_io.  They are moved back onto
- * sb->s_dirty as they are selected for writing.  This way, none can be missed
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on inode_sync_wait.
- */
-static void generic_sync_sb_inodes(struct super_block *sb,
-				   struct writeback_control *wbc)
+static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
+				    struct writeback_control *wbc,
+				    struct super_block *sb)
 {
+	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
 	const unsigned long start = jiffies;	/* livelock avoidance */
-	int sync = wbc->sync_mode == WB_SYNC_ALL;
 
 	spin_lock(&inode_lock);
-	if (!wbc->for_kupdate || list_empty(&sb->s_io))
-		queue_io(sb, wbc->older_than_this);
 
-	while (!list_empty(&sb->s_io)) {
-		struct inode *inode = list_entry(sb->s_io.prev,
+	if (!wbc->for_kupdate || list_empty(&bdi->b_io))
+		queue_io(bdi, wbc->older_than_this);
+
+	while (!list_empty(&bdi->b_io)) {
+		struct inode *inode = list_entry(bdi->b_io.prev,
 						struct inode, i_list);
-		struct address_space *mapping = inode->i_mapping;
-		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		long pages_skipped;
 
+		/*
+		 * super block given and doesn't match, skip this inode
+		 */
+		if (sb && sb != inode->i_sb) {
+			redirty_tail(inode);
+			continue;
+		}
+
 		if (!bdi_cap_writeback_dirty(bdi)) {
 			redirty_tail(inode);
-			if (sb_is_blkdev_sb(sb)) {
+			if (is_blkdev_sb) {
 				/*
 				 * Dirty memory-backed blockdev: the ramdisk
 				 * driver does this.  Skip just this inode
@@ -499,14 +515,14 @@ static void generic_sync_sb_inodes(struct super_block *sb,
 
 		if (wbc->nonblocking && bdi_write_congested(bdi)) {
 			wbc->encountered_congestion = 1;
-			if (!sb_is_blkdev_sb(sb))
+			if (!is_blkdev_sb)
 				break;		/* Skip a congested fs */
 			requeue_io(inode);
 			continue;		/* Skip a congested blockdev */
 		}
 
 		if (wbc->bdi && bdi != wbc->bdi) {
-			if (!sb_is_blkdev_sb(sb))
+			if (!is_blkdev_sb)
 				break;		/* fs has the wrong queue */
 			requeue_io(inode);
 			continue;		/* blockdev has wrong queue */
@@ -544,13 +560,57 @@ static void generic_sync_sb_inodes(struct super_block *sb,
 			wbc->more_io = 1;
 			break;
 		}
-		if (!list_empty(&sb->s_more_io))
+		if (!list_empty(&bdi->b_more_io))
 			wbc->more_io = 1;
 	}
 
-	if (sync) {
+	spin_unlock(&inode_lock);
+	/* Leave any unwritten inodes on b_io */
+}
+
+/*
+ * Write out a superblock's list of dirty inodes.  A wait will be performed
+ * upon no inodes, all inodes or the final one, depending upon sync_mode.
+ *
+ * If older_than_this is non-NULL, then only write out inodes which
+ * had their first dirtying at a time earlier than *older_than_this.
+ *
+ * If we're a pdlfush thread, then implement pdflush collision avoidance
+ * against the entire list.
+ *
+ * If `bdi' is non-zero then we're being asked to writeback a specific queue.
+ * This function assumes that the blockdev superblock's inodes are backed by
+ * a variety of queues, so all inodes are searched.  For other superblocks,
+ * assume that all inodes are backed by the same queue.
+ *
+ * FIXME: this linear search could get expensive with many fileystems.  But
+ * how to fix?  We need to go from an address_space to all inodes which share
+ * a queue with that address_space.  (Easy: have a global "dirty superblocks"
+ * list).
+ *
+ * The inodes to be written are parked on bdi->b_io.  They are moved back onto
+ * bdi->b_dirty as they are selected for writing.  This way, none can be missed
+ * on the writer throttling path, and we get decent balancing between many
+ * throttled threads: we don't want them all piling up on inode_sync_wait.
+ */
+static void generic_sync_sb_inodes(struct super_block *sb,
+				   struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi;
+
+	if (!wbc->bdi) {
+		mutex_lock(&bdi_lock);
+		list_for_each_entry(bdi, &bdi_list, bdi_list)
+			generic_sync_bdi_inodes(bdi, wbc, sb);
+		mutex_unlock(&bdi_lock);
+	} else
+		generic_sync_bdi_inodes(wbc->bdi, wbc, sb);
+
+	if (wbc->sync_mode == WB_SYNC_ALL) {
 		struct inode *inode, *old_inode = NULL;
 
+		spin_lock(&inode_lock);
+
 		/*
 		 * Data integrity sync. Must wait for all pages under writeback,
 		 * because there may have been pages dirtied before our sync
@@ -588,10 +648,7 @@ static void generic_sync_sb_inodes(struct super_block *sb,
 		}
 		spin_unlock(&inode_lock);
 		iput(old_inode);
-	} else
-		spin_unlock(&inode_lock);
-
-	return;		/* Leave any unwritten inodes on s_io */
+	}
 }
 
 /*
@@ -599,8 +656,8 @@ static void generic_sync_sb_inodes(struct super_block *sb,
  *
  * Note:
  * We don't need to grab a reference to superblock here. If it has non-empty
- * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
+ * ->b_dirty it's hadn't been killed yet and kill_super() won't proceed
+ * past sync_inodes_sb() until the ->b_dirty/b_io/b_more_io lists are all
  * empty. Since __sync_single_inode() regains inode_lock before it finally moves
  * inode from superblock lists we are OK.
  *
diff --git a/fs/super.c b/fs/super.c
index 2761d3e22ed9..0d22ce3be4aa 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -62,9 +62,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
 			s = NULL;
 			goto out;
 		}
-		INIT_LIST_HEAD(&s->s_dirty);
-		INIT_LIST_HEAD(&s->s_io);
-		INIT_LIST_HEAD(&s->s_more_io);
 		INIT_LIST_HEAD(&s->s_files);
 		INIT_LIST_HEAD(&s->s_instances);
 		INIT_HLIST_HEAD(&s->s_anon);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1d52425a6118..928cd5484f4d 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -40,6 +40,8 @@ enum bdi_stat_item {
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
 struct backing_dev_info {
+	struct list_head bdi_list;
+
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -58,6 +60,10 @@ struct backing_dev_info {
 
 	struct device *dev;
 
+	struct list_head	b_dirty;	/* dirty inodes */
+	struct list_head	b_io;		/* parked for writeback */
+	struct list_head	b_more_io;	/* parked for more writeback */
+
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
 	struct dentry *debug_stats;
@@ -72,6 +78,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
 
+extern struct mutex bdi_lock;
+extern struct list_head bdi_list;
+
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
 		enum bdi_stat_item item, s64 amount)
 {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 46ff7dd6e164..56371be1be65 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -715,7 +715,7 @@ struct posix_acl;
 
 struct inode {
 	struct hlist_node	i_hash;
-	struct list_head	i_list;
+	struct list_head	i_list;		/* backing dev IO list */
 	struct list_head	i_sb_list;
 	struct list_head	i_dentry;
 	unsigned long		i_ino;
@@ -1336,9 +1336,6 @@ struct super_block {
 	struct xattr_handler	**s_xattr;
 
 	struct list_head	s_inodes;	/* all inodes */
-	struct list_head	s_dirty;	/* dirty inodes */
-	struct list_head	s_io;		/* parked for writeback */
-	struct list_head	s_more_io;	/* parked for more writeback */
 	struct hlist_head	s_anon;		/* anonymous dentries for (nfs) exporting */
 	struct list_head	s_files;
 	/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c86edd244294..6f163e0f0509 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -22,6 +22,8 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
+DEFINE_MUTEX(bdi_lock);
+LIST_HEAD(bdi_list);
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -211,6 +213,10 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		goto exit;
 	}
 
+	mutex_lock(&bdi_lock);
+	list_add_tail(&bdi->bdi_list, &bdi_list);
+	mutex_unlock(&bdi_lock);
+
 	bdi->dev = dev;
 	bdi_debug_register(bdi, dev_name(dev));
 
@@ -225,9 +231,17 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
 }
 EXPORT_SYMBOL(bdi_register_dev);
 
+static void bdi_remove_from_list(struct backing_dev_info *bdi)
+{
+	mutex_lock(&bdi_lock);
+	list_del(&bdi->bdi_list);
+	mutex_unlock(&bdi_lock);
+}
+
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		bdi_remove_from_list(bdi);
 		bdi_debug_unregister(bdi);
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
@@ -245,6 +259,10 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
+	INIT_LIST_HEAD(&bdi->bdi_list);
+	INIT_LIST_HEAD(&bdi->b_io);
+	INIT_LIST_HEAD(&bdi->b_dirty);
+	INIT_LIST_HEAD(&bdi->b_more_io);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -259,6 +277,8 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
+
+		bdi_remove_from_list(bdi);
 	}
 
 	return err;
@@ -269,6 +289,10 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
 	int i;
 
+	WARN_ON(!list_empty(&bdi->b_dirty));
+	WARN_ON(!list_empty(&bdi->b_io));
+	WARN_ON(!list_empty(&bdi->b_more_io));
+
 	bdi_unregister(bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 81627ebcd313..f8341b6019bf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -320,15 +320,13 @@ static void task_dirty_limit(struct task_struct *tsk, unsigned long *pdirty)
 /*
  *
  */
-static DEFINE_SPINLOCK(bdi_lock);
 static unsigned int bdi_min_ratio;
 
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&bdi_lock, flags);
+	mutex_lock(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -340,27 +338,26 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			ret = -EINVAL;
 		}
 	}
-	spin_unlock_irqrestore(&bdi_lock, flags);
+	mutex_unlock(&bdi_lock);
 
 	return ret;
 }
 
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 {
-	unsigned long flags;
 	int ret = 0;
 
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	spin_lock_irqsave(&bdi_lock, flags);
+	mutex_lock(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	spin_unlock_irqrestore(&bdi_lock, flags);
+	mutex_unlock(&bdi_lock);
 
 	return ret;
 }
-- 
cgit v1.2.3


From 03ba3782e8dcc5b0e1efe440d33084f066e38cae Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 9 Sep 2009 09:08:54 +0200
Subject: writeback: switch to per-bdi threads for flushing data

This gets rid of pdflush for bdi writeout and kupdated style cleaning.
pdflush writeout suffers from lack of locality and also requires more
threads to handle the same workload, since it has to work in a
non-blocking fashion against each queue. This also introduces lumpy
behaviour and potential request starvation, since pdflush can be starved
for queue access if others are accessing it. A sample ffsb workload that
does random writes to files is about 8% faster here on a simple SATA drive
during the benchmark phase. File layout also seems a LOT more smooth in
vmstat:

 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 0  1      0 608848   2652 375372    0    0     0 71024  604    24  1 10 48 42
 0  1      0 549644   2712 433736    0    0     0 60692  505    27  1  8 48 44
 1  0      0 476928   2784 505192    0    0     4 29540  553    24  0  9 53 37
 0  1      0 457972   2808 524008    0    0     0 54876  331    16  0  4 38 58
 0  1      0 366128   2928 614284    0    0     4 92168  710    58  0 13 53 34
 0  1      0 295092   3000 684140    0    0     0 62924  572    23  0  9 53 37
 0  1      0 236592   3064 741704    0    0     4 58256  523    17  0  8 48 44
 0  1      0 165608   3132 811464    0    0     0 57460  560    21  0  8 54 38
 0  1      0 102952   3200 873164    0    0     4 74748  540    29  1 10 48 41
 0  1      0  48604   3252 926472    0    0     0 53248  469    29  0  7 47 45

where vanilla tends to fluctuate a lot in the creation phase:

 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 1  1      0 678716   5792 303380    0    0     0 74064  565    50  1 11 52 36
 1  0      0 662488   5864 319396    0    0     4   352  302   329  0  2 47 51
 0  1      0 599312   5924 381468    0    0     0 78164  516    55  0  9 51 40
 0  1      0 519952   6008 459516    0    0     4 78156  622    56  1 11 52 37
 1  1      0 436640   6092 541632    0    0     0 82244  622    54  0 11 48 41
 0  1      0 436640   6092 541660    0    0     0     8  152    39  0  0 51 49
 0  1      0 332224   6200 644252    0    0     4 102800  728    46  1 13 49 36
 1  0      0 274492   6260 701056    0    0     4 12328  459    49  0  7 50 43
 0  1      0 211220   6324 763356    0    0     0 106940  515    37  1 10 51 39
 1  0      0 160412   6376 813468    0    0     0  8224  415    43  0  6 49 45
 1  1      0  85980   6452 886556    0    0     4 113516  575    39  1 11 54 34
 0  2      0  85968   6452 886620    0    0     0  1640  158   211  0  0 46 54

A 10 disk test with btrfs performs 26% faster with per-bdi flushing. A
SSD based writeback test on XFS performs over 20% better as well, with
the throughput being very stable around 1GB/sec, where pdflush only
manages 750MB/sec and fluctuates wildly while doing so. Random buffered
writes to many files behave a lot better as well, as does random mmap'ed
writes.

A separate thread is added to sync the super blocks. In the long term,
adding sync_supers_bdi() functionality could get rid of this thread again.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/buffer.c                 |   2 +-
 fs/fs-writeback.c           | 999 +++++++++++++++++++++++++++++++-------------
 fs/super.c                  |   2 +-
 fs/sync.c                   |   2 +-
 include/linux/backing-dev.h |  55 ++-
 include/linux/fs.h          |   2 +-
 include/linux/writeback.h   |   8 +-
 mm/backing-dev.c            | 341 ++++++++++++++-
 mm/page-writeback.c         | 179 ++------
 mm/vmscan.c                 |   2 +-
 10 files changed, 1120 insertions(+), 472 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index 28f320fac4d4..90a98865b0cc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -281,7 +281,7 @@ static void free_more_memory(void)
 	struct zone *zone;
 	int nid;
 
-	wakeup_pdflush(1024);
+	wakeup_flusher_threads(1024);
 	yield();
 
 	for_each_online_node(nid) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 45ad4bb700e6..7f6dae8aa47f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -19,6 +19,8 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
@@ -27,165 +29,208 @@
 
 #define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
 
-/**
- * writeback_acquire - attempt to get exclusive writeback access to a device
- * @bdi: the device's backing_dev_info structure
- *
- * It is a waste of resources to have more than one pdflush thread blocked on
- * a single request queue.  Exclusion at the request_queue level is obtained
- * via a flag in the request_queue's backing_dev_info.state.
- *
- * Non-request_queue-backed address_spaces will share default_backing_dev_info,
- * unless they implement their own.  Which is somewhat inefficient, as this
- * may prevent concurrent writeback against multiple devices.
+/*
+ * Work items for the bdi_writeback threads
  */
-static int writeback_acquire(struct backing_dev_info *bdi)
+struct bdi_work {
+	struct list_head list;
+	struct list_head wait_list;
+	struct rcu_head rcu_head;
+
+	unsigned long seen;
+	atomic_t pending;
+
+	struct super_block *sb;
+	unsigned long nr_pages;
+	enum writeback_sync_modes sync_mode;
+
+	unsigned long state;
+};
+
+enum {
+	WS_USED_B = 0,
+	WS_ONSTACK_B,
+};
+
+#define WS_USED (1 << WS_USED_B)
+#define WS_ONSTACK (1 << WS_ONSTACK_B)
+
+static inline bool bdi_work_on_stack(struct bdi_work *work)
+{
+	return test_bit(WS_ONSTACK_B, &work->state);
+}
+
+static inline void bdi_work_init(struct bdi_work *work,
+				 struct writeback_control *wbc)
+{
+	INIT_RCU_HEAD(&work->rcu_head);
+	work->sb = wbc->sb;
+	work->nr_pages = wbc->nr_to_write;
+	work->sync_mode = wbc->sync_mode;
+	work->state = WS_USED;
+}
+
+static inline void bdi_work_init_on_stack(struct bdi_work *work,
+					  struct writeback_control *wbc)
 {
-	return !test_and_set_bit(BDI_pdflush, &bdi->state);
+	bdi_work_init(work, wbc);
+	work->state |= WS_ONSTACK;
 }
 
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
  *
- * Determine whether there is writeback in progress against a backing device.
+ * Determine whether there is writeback waiting to be handled against a
+ * backing device.
  */
 int writeback_in_progress(struct backing_dev_info *bdi)
 {
-	return test_bit(BDI_pdflush, &bdi->state);
+	return !list_empty(&bdi->work_list);
 }
 
-/**
- * writeback_release - relinquish exclusive writeback access against a device.
- * @bdi: the device's backing_dev_info structure
- */
-static void writeback_release(struct backing_dev_info *bdi)
+static void bdi_work_clear(struct bdi_work *work)
 {
-	BUG_ON(!writeback_in_progress(bdi));
-	clear_bit(BDI_pdflush, &bdi->state);
+	clear_bit(WS_USED_B, &work->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&work->state, WS_USED_B);
 }
 
-static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+static void bdi_work_free(struct rcu_head *head)
 {
-	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
-		struct dentry *dentry;
-		const char *name = "?";
+	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
 
-		dentry = d_find_alias(inode);
-		if (dentry) {
-			spin_lock(&dentry->d_lock);
-			name = (const char *) dentry->d_name.name;
-		}
-		printk(KERN_DEBUG
-		       "%s(%d): dirtied inode %lu (%s) on %s\n",
-		       current->comm, task_pid_nr(current), inode->i_ino,
-		       name, inode->i_sb->s_id);
-		if (dentry) {
-			spin_unlock(&dentry->d_lock);
-			dput(dentry);
-		}
-	}
+	if (!bdi_work_on_stack(work))
+		kfree(work);
+	else
+		bdi_work_clear(work);
 }
 
-/**
- *	__mark_inode_dirty -	internal function
- *	@inode: inode to mark
- *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
- *	Mark an inode as dirty. Callers should use mark_inode_dirty or
- *  	mark_inode_dirty_sync.
- *
- * Put the inode on the super block's dirty list.
- *
- * CAREFUL! We mark it dirty unconditionally, but move it onto the
- * dirty list only if it is hashed or if it refers to a blockdev.
- * If it was not hashed, it will never be added to the dirty list
- * even if it is later hashed, as it will have been marked dirty already.
- *
- * In short, make sure you hash any inodes _before_ you start marking
- * them dirty.
- *
- * This function *must* be atomic for the I_DIRTY_PAGES case -
- * set_page_dirty() is called under spinlock in several places.
- *
- * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
- * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
- * the kernel-internal blockdev inode represents the dirtying time of the
- * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
- * page->mapping->host, so the page-dirtying time is recorded in the internal
- * blockdev inode.
- */
-void __mark_inode_dirty(struct inode *inode, int flags)
+static void wb_work_complete(struct bdi_work *work)
 {
-	struct super_block *sb = inode->i_sb;
+	const enum writeback_sync_modes sync_mode = work->sync_mode;
 
 	/*
-	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
-	 * dirty the inode itself
+	 * For allocated work, we can clear the done/seen bit right here.
+	 * For on-stack work, we need to postpone both the clear and free
+	 * to after the RCU grace period, since the stack could be invalidated
+	 * as soon as bdi_work_clear() has done the wakeup.
 	 */
-	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
-		if (sb->s_op->dirty_inode)
-			sb->s_op->dirty_inode(inode);
-	}
+	if (!bdi_work_on_stack(work))
+		bdi_work_clear(work);
+	if (sync_mode == WB_SYNC_NONE || bdi_work_on_stack(work))
+		call_rcu(&work->rcu_head, bdi_work_free);
+}
 
+static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
+{
 	/*
-	 * make sure that changes are seen by all cpus before we test i_state
-	 * -- mikulas
+	 * The caller has retrieved the work arguments from this work,
+	 * drop our reference. If this is the last ref, delete and free it
 	 */
-	smp_mb();
+	if (atomic_dec_and_test(&work->pending)) {
+		struct backing_dev_info *bdi = wb->bdi;
 
-	/* avoid the locking if we can */
-	if ((inode->i_state & flags) == flags)
-		return;
-
-	if (unlikely(block_dump))
-		block_dump___mark_inode_dirty(inode);
+		spin_lock(&bdi->wb_lock);
+		list_del_rcu(&work->list);
+		spin_unlock(&bdi->wb_lock);
 
-	spin_lock(&inode_lock);
-	if ((inode->i_state & flags) != flags) {
-		const int was_dirty = inode->i_state & I_DIRTY;
+		wb_work_complete(work);
+	}
+}
 
-		inode->i_state |= flags;
+static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
+{
+	if (work) {
+		work->seen = bdi->wb_mask;
+		BUG_ON(!work->seen);
+		atomic_set(&work->pending, bdi->wb_cnt);
+		BUG_ON(!bdi->wb_cnt);
 
 		/*
-		 * If the inode is being synced, just update its dirty state.
-		 * The unlocker will place the inode on the appropriate
-		 * superblock list, based upon its state.
+		 * Make sure stores are seen before it appears on the list
 		 */
-		if (inode->i_state & I_SYNC)
-			goto out;
+		smp_mb();
 
-		/*
-		 * Only add valid (hashed) inodes to the superblock's
-		 * dirty list.  Add blockdev inodes as well.
-		 */
-		if (!S_ISBLK(inode->i_mode)) {
-			if (hlist_unhashed(&inode->i_hash))
-				goto out;
-		}
-		if (inode->i_state & (I_FREEING|I_CLEAR))
-			goto out;
+		spin_lock(&bdi->wb_lock);
+		list_add_tail_rcu(&work->list, &bdi->work_list);
+		spin_unlock(&bdi->wb_lock);
+	}
+
+	/*
+	 * If the default thread isn't there, make sure we add it. When
+	 * it gets created and wakes up, we'll run this work.
+	 */
+	if (unlikely(list_empty_careful(&bdi->wb_list)))
+		wake_up_process(default_backing_dev_info.wb.task);
+	else {
+		struct bdi_writeback *wb = &bdi->wb;
 
 		/*
-		 * If the inode was already on b_dirty/b_io/b_more_io, don't
-		 * reposition it (that would break b_dirty time-ordering).
+		 * If we failed allocating the bdi work item, wake up the wb
+		 * thread always. As a safety precaution, it'll flush out
+		 * everything
 		 */
-		if (!was_dirty) {
-			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list,
-					&inode_to_bdi(inode)->b_dirty);
-		}
+		if (!wb_has_dirty_io(wb)) {
+			if (work)
+				wb_clear_pending(wb, work);
+		} else if (wb->task)
+			wake_up_process(wb->task);
 	}
-out:
-	spin_unlock(&inode_lock);
 }
 
-EXPORT_SYMBOL(__mark_inode_dirty);
+/*
+ * Used for on-stack allocated work items. The caller needs to wait until
+ * the wb threads have acked the work before it's safe to continue.
+ */
+static void bdi_wait_on_work_clear(struct bdi_work *work)
+{
+	wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+		    TASK_UNINTERRUPTIBLE);
+}
 
-static int write_inode(struct inode *inode, int sync)
+static struct bdi_work *bdi_alloc_work(struct writeback_control *wbc)
 {
-	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
-		return inode->i_sb->s_op->write_inode(inode, sync);
-	return 0;
+	struct bdi_work *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work)
+		bdi_work_init(work, wbc);
+
+	return work;
+}
+
+void bdi_start_writeback(struct writeback_control *wbc)
+{
+	const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+	struct bdi_work work_stack, *work = NULL;
+
+	if (!must_wait)
+		work = bdi_alloc_work(wbc);
+
+	if (!work) {
+		work = &work_stack;
+		bdi_work_init_on_stack(work, wbc);
+	}
+
+	bdi_queue_work(wbc->bdi, work);
+
+	/*
+	 * If the sync mode is WB_SYNC_ALL, block waiting for the work to
+	 * complete. If not, we only need to wait for the work to be started,
+	 * if we allocated it on-stack. We use the same mechanism, if the
+	 * wait bit is set in the bdi_work struct, then threads will not
+	 * clear pending until after they are done.
+	 *
+	 * Note that work == &work_stack if must_wait is true, so we don't
+	 * need to do call_rcu() here ever, since the completion path will
+	 * have done that for us.
+	 */
+	if (must_wait || work == &work_stack) {
+		bdi_wait_on_work_clear(work);
+		if (work != &work_stack)
+			call_rcu(&work->rcu_head, bdi_work_free);
+	}
 }
 
 /*
@@ -199,16 +244,16 @@ static int write_inode(struct inode *inode, int sync)
  */
 static void redirty_tail(struct inode *inode)
 {
-	struct backing_dev_info *bdi = inode_to_bdi(inode);
+	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
-	if (!list_empty(&bdi->b_dirty)) {
+	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
-		tail = list_entry(bdi->b_dirty.next, struct inode, i_list);
+		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &bdi->b_dirty);
+	list_move(&inode->i_list, &wb->b_dirty);
 }
 
 /*
@@ -216,7 +261,9 @@ static void redirty_tail(struct inode *inode)
  */
 static void requeue_io(struct inode *inode)
 {
-	list_move(&inode->i_list, &inode_to_bdi(inode)->b_more_io);
+	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+	list_move(&inode->i_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -263,52 +310,18 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 /*
  * Queue all expired dirty inodes for io, eldest first.
  */
-static void queue_io(struct backing_dev_info *bdi,
-		     unsigned long *older_than_this)
+static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this)
 {
-	list_splice_init(&bdi->b_more_io, bdi->b_io.prev);
-	move_expired_inodes(&bdi->b_dirty, &bdi->b_io, older_than_this);
+	list_splice_init(&wb->b_more_io, wb->b_io.prev);
+	move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this);
 }
 
-static int sb_on_inode_list(struct super_block *sb, struct list_head *list)
-{
-	struct inode *inode;
-	int ret = 0;
-
-	spin_lock(&inode_lock);
-	list_for_each_entry(inode, list, i_list) {
-		if (inode->i_sb == sb) {
-			ret = 1;
-			break;
-		}
-	}
-	spin_unlock(&inode_lock);
-	return ret;
-}
-
-int sb_has_dirty_inodes(struct super_block *sb)
+static int write_inode(struct inode *inode, int sync)
 {
-	struct backing_dev_info *bdi;
-	int ret = 0;
-
-	/*
-	 * This is REALLY expensive right now, but it'll go away
-	 * when the bdi writeback is introduced
-	 */
-	mutex_lock(&bdi_lock);
-	list_for_each_entry(bdi, &bdi_list, bdi_list) {
-		if (sb_on_inode_list(sb, &bdi->b_dirty) ||
-		    sb_on_inode_list(sb, &bdi->b_io) ||
-		    sb_on_inode_list(sb, &bdi->b_more_io)) {
-			ret = 1;
-			break;
-		}
-	}
-	mutex_unlock(&bdi_lock);
-
-	return ret;
+	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
+		return inode->i_sb->s_op->write_inode(inode, sync);
+	return 0;
 }
-EXPORT_SYMBOL(sb_has_dirty_inodes);
 
 /*
  * Wait for writeback on an inode to complete.
@@ -466,20 +479,71 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 
-static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
-				    struct writeback_control *wbc,
-				    struct super_block *sb)
+/*
+ * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * before calling writeback. So make sure that we do pin it, so it doesn't
+ * go away while we are writing inodes from it.
+ *
+ * Returns 0 if the super was successfully pinned (or pinning wasn't needed),
+ * 1 if we failed.
+ */
+static int pin_sb_for_writeback(struct writeback_control *wbc,
+				   struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	/*
+	 * Caller must already hold the ref for this
+	 */
+	if (wbc->sync_mode == WB_SYNC_ALL) {
+		WARN_ON(!rwsem_is_locked(&sb->s_umount));
+		return 0;
+	}
+
+	spin_lock(&sb_lock);
+	sb->s_count++;
+	if (down_read_trylock(&sb->s_umount)) {
+		if (sb->s_root) {
+			spin_unlock(&sb_lock);
+			return 0;
+		}
+		/*
+		 * umounted, drop rwsem again and fall through to failure
+		 */
+		up_read(&sb->s_umount);
+	}
+
+	sb->s_count--;
+	spin_unlock(&sb_lock);
+	return 1;
+}
+
+static void unpin_sb_for_writeback(struct writeback_control *wbc,
+				   struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		return;
+
+	up_read(&sb->s_umount);
+	put_super(sb);
+}
+
+static void writeback_inodes_wb(struct bdi_writeback *wb,
+				struct writeback_control *wbc)
 {
+	struct super_block *sb = wbc->sb;
 	const int is_blkdev_sb = sb_is_blkdev_sb(sb);
 	const unsigned long start = jiffies;	/* livelock avoidance */
 
 	spin_lock(&inode_lock);
 
-	if (!wbc->for_kupdate || list_empty(&bdi->b_io))
-		queue_io(bdi, wbc->older_than_this);
+	if (!wbc->for_kupdate || list_empty(&wb->b_io))
+		queue_io(wb, wbc->older_than_this);
 
-	while (!list_empty(&bdi->b_io)) {
-		struct inode *inode = list_entry(bdi->b_io.prev,
+	while (!list_empty(&wb->b_io)) {
+		struct inode *inode = list_entry(wb->b_io.prev,
 						struct inode, i_list);
 		long pages_skipped;
 
@@ -491,7 +555,7 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 			continue;
 		}
 
-		if (!bdi_cap_writeback_dirty(bdi)) {
+		if (!bdi_cap_writeback_dirty(wb->bdi)) {
 			redirty_tail(inode);
 			if (is_blkdev_sb) {
 				/*
@@ -513,7 +577,7 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 			continue;
 		}
 
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
+		if (wbc->nonblocking && bdi_write_congested(wb->bdi)) {
 			wbc->encountered_congestion = 1;
 			if (!is_blkdev_sb)
 				break;		/* Skip a congested fs */
@@ -521,13 +585,6 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 			continue;		/* Skip a congested blockdev */
 		}
 
-		if (wbc->bdi && bdi != wbc->bdi) {
-			if (!is_blkdev_sb)
-				break;		/* fs has the wrong queue */
-			requeue_io(inode);
-			continue;		/* blockdev has wrong queue */
-		}
-
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
@@ -535,16 +592,16 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 		if (inode_dirtied_after(inode, start))
 			break;
 
-		/* Is another pdflush already flushing this queue? */
-		if (current_is_pdflush() && !writeback_acquire(bdi))
-			break;
+		if (pin_sb_for_writeback(wbc, inode)) {
+			requeue_io(inode);
+			continue;
+		}
 
 		BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		writeback_single_inode(inode, wbc);
-		if (current_is_pdflush())
-			writeback_release(bdi);
+		unpin_sb_for_writeback(wbc, inode);
 		if (wbc->pages_skipped != pages_skipped) {
 			/*
 			 * writeback is not making progress due to locked
@@ -560,7 +617,7 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 			wbc->more_io = 1;
 			break;
 		}
-		if (!list_empty(&bdi->b_more_io))
+		if (!list_empty(&wb->b_more_io))
 			wbc->more_io = 1;
 	}
 
@@ -568,139 +625,500 @@ static void generic_sync_bdi_inodes(struct backing_dev_info *bdi,
 	/* Leave any unwritten inodes on b_io */
 }
 
+void writeback_inodes_wbc(struct writeback_control *wbc)
+{
+	struct backing_dev_info *bdi = wbc->bdi;
+
+	writeback_inodes_wb(&bdi->wb, wbc);
+}
+
 /*
- * Write out a superblock's list of dirty inodes.  A wait will be performed
- * upon no inodes, all inodes or the final one, depending upon sync_mode.
- *
- * If older_than_this is non-NULL, then only write out inodes which
- * had their first dirtying at a time earlier than *older_than_this.
- *
- * If we're a pdlfush thread, then implement pdflush collision avoidance
- * against the entire list.
+ * The maximum number of pages to writeout in a single bdi flush/kupdate
+ * operation.  We do this so we don't hold I_SYNC against an inode for
+ * enormous amounts of time, which would block a userspace task which has
+ * been forced to throttle against that inode.  Also, the code reevaluates
+ * the dirty each time it has written this many pages.
+ */
+#define MAX_WRITEBACK_PAGES     1024
+
+static inline bool over_bground_thresh(void)
+{
+	unsigned long background_thresh, dirty_thresh;
+
+	get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
+
+	return (global_page_state(NR_FILE_DIRTY) +
+		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+}
+
+/*
+ * Explicit flushing or periodic writeback of "old" data.
  *
- * If `bdi' is non-zero then we're being asked to writeback a specific queue.
- * This function assumes that the blockdev superblock's inodes are backed by
- * a variety of queues, so all inodes are searched.  For other superblocks,
- * assume that all inodes are backed by the same queue.
+ * Define "old": the first time one of an inode's pages is dirtied, we mark the
+ * dirtying-time in the inode's address_space.  So this periodic writeback code
+ * just walks the superblock inode list, writing back any inodes which are
+ * older than a specific point in time.
  *
- * FIXME: this linear search could get expensive with many fileystems.  But
- * how to fix?  We need to go from an address_space to all inodes which share
- * a queue with that address_space.  (Easy: have a global "dirty superblocks"
- * list).
+ * Try to run once per dirty_writeback_interval.  But if a writeback event
+ * takes longer than a dirty_writeback_interval interval, then leave a
+ * one-second gap.
  *
- * The inodes to be written are parked on bdi->b_io.  They are moved back onto
- * bdi->b_dirty as they are selected for writing.  This way, none can be missed
- * on the writer throttling path, and we get decent balancing between many
- * throttled threads: we don't want them all piling up on inode_sync_wait.
+ * older_than_this takes precedence over nr_to_write.  So we'll only write back
+ * all dirty pages if they are all attached to "old" mappings.
  */
-static void generic_sync_sb_inodes(struct super_block *sb,
-				   struct writeback_control *wbc)
+static long wb_writeback(struct bdi_writeback *wb, long nr_pages,
+			 struct super_block *sb,
+			 enum writeback_sync_modes sync_mode, int for_kupdate)
 {
-	struct backing_dev_info *bdi;
-
-	if (!wbc->bdi) {
-		mutex_lock(&bdi_lock);
-		list_for_each_entry(bdi, &bdi_list, bdi_list)
-			generic_sync_bdi_inodes(bdi, wbc, sb);
-		mutex_unlock(&bdi_lock);
-	} else
-		generic_sync_bdi_inodes(wbc->bdi, wbc, sb);
+	struct writeback_control wbc = {
+		.bdi			= wb->bdi,
+		.sb			= sb,
+		.sync_mode		= sync_mode,
+		.older_than_this	= NULL,
+		.for_kupdate		= for_kupdate,
+		.range_cyclic		= 1,
+	};
+	unsigned long oldest_jif;
+	long wrote = 0;
 
-	if (wbc->sync_mode == WB_SYNC_ALL) {
-		struct inode *inode, *old_inode = NULL;
+	if (wbc.for_kupdate) {
+		wbc.older_than_this = &oldest_jif;
+		oldest_jif = jiffies -
+				msecs_to_jiffies(dirty_expire_interval * 10);
+	}
 
-		spin_lock(&inode_lock);
+	for (;;) {
+		/*
+		 * Don't flush anything for non-integrity writeback where
+		 * no nr_pages was given
+		 */
+		if (!for_kupdate && nr_pages <= 0 && sync_mode == WB_SYNC_NONE)
+			break;
 
 		/*
-		 * Data integrity sync. Must wait for all pages under writeback,
-		 * because there may have been pages dirtied before our sync
-		 * call, but which had writeout started before we write it out.
-		 * In which case, the inode may not be on the dirty list, but
-		 * we still have to wait for that writeout.
+		 * If no specific pages were given and this is just a
+		 * periodic background writeout and we are below the
+		 * background dirty threshold, don't do anything
 		 */
-		list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
-			struct address_space *mapping;
+		if (for_kupdate && nr_pages <= 0 && !over_bground_thresh())
+			break;
 
-			if (inode->i_state &
-					(I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
-				continue;
-			mapping = inode->i_mapping;
-			if (mapping->nrpages == 0)
+		wbc.more_io = 0;
+		wbc.encountered_congestion = 0;
+		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
+		wbc.pages_skipped = 0;
+		writeback_inodes_wb(wb, &wbc);
+		nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
+
+		/*
+		 * If we ran out of stuff to write, bail unless more_io got set
+		 */
+		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
+			if (wbc.more_io && !wbc.for_kupdate)
 				continue;
-			__iget(inode);
-			spin_unlock(&inode_lock);
+			break;
+		}
+	}
+
+	return wrote;
+}
+
+/*
+ * Return the next bdi_work struct that hasn't been processed by this
+ * wb thread yet
+ */
+static struct bdi_work *get_next_work_item(struct backing_dev_info *bdi,
+					   struct bdi_writeback *wb)
+{
+	struct bdi_work *work, *ret = NULL;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(work, &bdi->work_list, list) {
+		if (!test_and_clear_bit(wb->nr, &work->seen))
+			continue;
+
+		ret = work;
+		break;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
+static long wb_check_old_data_flush(struct bdi_writeback *wb)
+{
+	unsigned long expired;
+	long nr_pages;
+
+	expired = wb->last_old_flush +
+			msecs_to_jiffies(dirty_writeback_interval * 10);
+	if (time_before(jiffies, expired))
+		return 0;
+
+	wb->last_old_flush = jiffies;
+	nr_pages = global_page_state(NR_FILE_DIRTY) +
+			global_page_state(NR_UNSTABLE_NFS) +
+			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+
+	if (nr_pages)
+		return wb_writeback(wb, nr_pages, NULL, WB_SYNC_NONE, 1);
+
+	return 0;
+}
+
+/*
+ * Retrieve work items and do the writeback they describe
+ */
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
+{
+	struct backing_dev_info *bdi = wb->bdi;
+	struct bdi_work *work;
+	long nr_pages, wrote = 0;
+
+	while ((work = get_next_work_item(bdi, wb)) != NULL) {
+		enum writeback_sync_modes sync_mode;
+
+		nr_pages = work->nr_pages;
+
+		/*
+		 * Override sync mode, in case we must wait for completion
+		 */
+		if (force_wait)
+			work->sync_mode = sync_mode = WB_SYNC_ALL;
+		else
+			sync_mode = work->sync_mode;
+
+		/*
+		 * If this isn't a data integrity operation, just notify
+		 * that we have seen this work and we are now starting it.
+		 */
+		if (sync_mode == WB_SYNC_NONE)
+			wb_clear_pending(wb, work);
+
+		wrote += wb_writeback(wb, nr_pages, work->sb, sync_mode, 0);
+
+		/*
+		 * This is a data integrity writeback, so only do the
+		 * notification when we have completed the work.
+		 */
+		if (sync_mode == WB_SYNC_ALL)
+			wb_clear_pending(wb, work);
+	}
+
+	/*
+	 * Check for periodic writeback, kupdated() style
+	 */
+	wrote += wb_check_old_data_flush(wb);
+
+	return wrote;
+}
+
+/*
+ * Handle writeback of dirty data for the device backed by this bdi. Also
+ * wakes up periodically and does kupdated style flushing.
+ */
+int bdi_writeback_task(struct bdi_writeback *wb)
+{
+	unsigned long last_active = jiffies;
+	unsigned long wait_jiffies = -1UL;
+	long pages_written;
+
+	while (!kthread_should_stop()) {
+		pages_written = wb_do_writeback(wb, 0);
+
+		if (pages_written)
+			last_active = jiffies;
+		else if (wait_jiffies != -1UL) {
+			unsigned long max_idle;
+
 			/*
-			 * We hold a reference to 'inode' so it couldn't have
-			 * been removed from s_inodes list while we dropped the
-			 * inode_lock.  We cannot iput the inode now as we can
-			 * be holding the last reference and we cannot iput it
-			 * under inode_lock. So we keep the reference and iput
-			 * it later.
+			 * Longest period of inactivity that we tolerate. If we
+			 * see dirty data again later, the task will get
+			 * recreated automatically.
 			 */
-			iput(old_inode);
-			old_inode = inode;
+			max_idle = max(5UL * 60 * HZ, wait_jiffies);
+			if (time_after(jiffies, max_idle + last_active))
+				break;
+		}
+
+		wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(wait_jiffies);
+		try_to_freeze();
+	}
+
+	return 0;
+}
+
+/*
+ * Schedule writeback for all backing devices. Expensive! If this is a data
+ * integrity operation, writeback will be complete when this returns. If
+ * we are simply called for WB_SYNC_NONE, then writeback will merely be
+ * scheduled to run.
+ */
+static void bdi_writeback_all(struct writeback_control *wbc)
+{
+	const bool must_wait = wbc->sync_mode == WB_SYNC_ALL;
+	struct backing_dev_info *bdi;
+	struct bdi_work *work;
+	LIST_HEAD(list);
+
+restart:
+	spin_lock(&bdi_lock);
+
+	list_for_each_entry(bdi, &bdi_list, bdi_list) {
+		struct bdi_work *work;
+
+		if (!bdi_has_dirty_io(bdi))
+			continue;
 
-			filemap_fdatawait(mapping);
+		/*
+		 * If work allocation fails, do the writes inline. We drop
+		 * the lock and restart the list writeout. This should be OK,
+		 * since this happens rarely and because the writeout should
+		 * eventually make more free memory available.
+		 */
+		work = bdi_alloc_work(wbc);
+		if (!work) {
+			struct writeback_control __wbc;
 
-			cond_resched();
+			/*
+			 * Not a data integrity writeout, just continue
+			 */
+			if (!must_wait)
+				continue;
 
-			spin_lock(&inode_lock);
+			spin_unlock(&bdi_lock);
+			__wbc = *wbc;
+			__wbc.bdi = bdi;
+			writeback_inodes_wbc(&__wbc);
+			goto restart;
 		}
-		spin_unlock(&inode_lock);
-		iput(old_inode);
+		if (must_wait)
+			list_add_tail(&work->wait_list, &list);
+
+		bdi_queue_work(bdi, work);
+	}
+
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * If this is for WB_SYNC_ALL, wait for pending work to complete
+	 * before returning.
+	 */
+	while (!list_empty(&list)) {
+		work = list_entry(list.next, struct bdi_work, wait_list);
+		list_del(&work->wait_list);
+		bdi_wait_on_work_clear(work);
+		call_rcu(&work->rcu_head, bdi_work_free);
 	}
 }
 
 /*
- * Start writeback of dirty pagecache data against all unlocked inodes.
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
+ * the whole world.
+ */
+void wakeup_flusher_threads(long nr_pages)
+{
+	struct writeback_control wbc = {
+		.sync_mode	= WB_SYNC_NONE,
+		.older_than_this = NULL,
+		.range_cyclic	= 1,
+	};
+
+	if (nr_pages == 0)
+		nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS);
+	wbc.nr_to_write = nr_pages;
+	bdi_writeback_all(&wbc);
+}
+
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+		struct dentry *dentry;
+		const char *name = "?";
+
+		dentry = d_find_alias(inode);
+		if (dentry) {
+			spin_lock(&dentry->d_lock);
+			name = (const char *) dentry->d_name.name;
+		}
+		printk(KERN_DEBUG
+		       "%s(%d): dirtied inode %lu (%s) on %s\n",
+		       current->comm, task_pid_nr(current), inode->i_ino,
+		       name, inode->i_sb->s_id);
+		if (dentry) {
+			spin_unlock(&dentry->d_lock);
+			dput(dentry);
+		}
+	}
+}
+
+/**
+ *	__mark_inode_dirty -	internal function
+ *	@inode: inode to mark
+ *	@flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ *	Mark an inode as dirty. Callers should use mark_inode_dirty or
+ *  	mark_inode_dirty_sync.
  *
- * Note:
- * We don't need to grab a reference to superblock here. If it has non-empty
- * ->b_dirty it's hadn't been killed yet and kill_super() won't proceed
- * past sync_inodes_sb() until the ->b_dirty/b_io/b_more_io lists are all
- * empty. Since __sync_single_inode() regains inode_lock before it finally moves
- * inode from superblock lists we are OK.
+ * Put the inode on the super block's dirty list.
+ *
+ * CAREFUL! We mark it dirty unconditionally, but move it onto the
+ * dirty list only if it is hashed or if it refers to a blockdev.
+ * If it was not hashed, it will never be added to the dirty list
+ * even if it is later hashed, as it will have been marked dirty already.
+ *
+ * In short, make sure you hash any inodes _before_ you start marking
+ * them dirty.
  *
- * If `older_than_this' is non-zero then only flush inodes which have a
- * flushtime older than *older_than_this.
+ * This function *must* be atomic for the I_DIRTY_PAGES case -
+ * set_page_dirty() is called under spinlock in several places.
  *
- * If `bdi' is non-zero then we will scan the first inode against each
- * superblock until we find the matching ones.  One group will be the dirty
- * inodes against a filesystem.  Then when we hit the dummy blockdev superblock,
- * sync_sb_inodes will seekout the blockdev which matches `bdi'.  Maybe not
- * super-efficient but we're about to do a ton of I/O...
+ * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
+ * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
+ * the kernel-internal blockdev inode represents the dirtying time of the
+ * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
+ * page->mapping->host, so the page-dirtying time is recorded in the internal
+ * blockdev inode.
  */
-void
-writeback_inodes(struct writeback_control *wbc)
+void __mark_inode_dirty(struct inode *inode, int flags)
 {
-	struct super_block *sb;
+	struct super_block *sb = inode->i_sb;
 
-	might_sleep();
-	spin_lock(&sb_lock);
-restart:
-	list_for_each_entry_reverse(sb, &super_blocks, s_list) {
-		if (sb_has_dirty_inodes(sb)) {
-			/* we're making our own get_super here */
-			sb->s_count++;
-			spin_unlock(&sb_lock);
-			/*
-			 * If we can't get the readlock, there's no sense in
-			 * waiting around, most of the time the FS is going to
-			 * be unmounted by the time it is released.
-			 */
-			if (down_read_trylock(&sb->s_umount)) {
-				if (sb->s_root)
-					generic_sync_sb_inodes(sb, wbc);
-				up_read(&sb->s_umount);
-			}
-			spin_lock(&sb_lock);
-			if (__put_super_and_need_restart(sb))
-				goto restart;
+	/*
+	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
+	 * dirty the inode itself
+	 */
+	if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+		if (sb->s_op->dirty_inode)
+			sb->s_op->dirty_inode(inode);
+	}
+
+	/*
+	 * make sure that changes are seen by all cpus before we test i_state
+	 * -- mikulas
+	 */
+	smp_mb();
+
+	/* avoid the locking if we can */
+	if ((inode->i_state & flags) == flags)
+		return;
+
+	if (unlikely(block_dump))
+		block_dump___mark_inode_dirty(inode);
+
+	spin_lock(&inode_lock);
+	if ((inode->i_state & flags) != flags) {
+		const int was_dirty = inode->i_state & I_DIRTY;
+
+		inode->i_state |= flags;
+
+		/*
+		 * If the inode is being synced, just update its dirty state.
+		 * The unlocker will place the inode on the appropriate
+		 * superblock list, based upon its state.
+		 */
+		if (inode->i_state & I_SYNC)
+			goto out;
+
+		/*
+		 * Only add valid (hashed) inodes to the superblock's
+		 * dirty list.  Add blockdev inodes as well.
+		 */
+		if (!S_ISBLK(inode->i_mode)) {
+			if (hlist_unhashed(&inode->i_hash))
+				goto out;
+		}
+		if (inode->i_state & (I_FREEING|I_CLEAR))
+			goto out;
+
+		/*
+		 * If the inode was already on b_dirty/b_io/b_more_io, don't
+		 * reposition it (that would break b_dirty time-ordering).
+		 */
+		if (!was_dirty) {
+			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+
+			inode->dirtied_when = jiffies;
+			list_move(&inode->i_list, &wb->b_dirty);
 		}
-		if (wbc->nr_to_write <= 0)
-			break;
 	}
-	spin_unlock(&sb_lock);
+out:
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(__mark_inode_dirty);
+
+/*
+ * Write out a superblock's list of dirty inodes.  A wait will be performed
+ * upon no inodes, all inodes or the final one, depending upon sync_mode.
+ *
+ * If older_than_this is non-NULL, then only write out inodes which
+ * had their first dirtying at a time earlier than *older_than_this.
+ *
+ * If we're a pdlfush thread, then implement pdflush collision avoidance
+ * against the entire list.
+ *
+ * If `bdi' is non-zero then we're being asked to writeback a specific queue.
+ * This function assumes that the blockdev superblock's inodes are backed by
+ * a variety of queues, so all inodes are searched.  For other superblocks,
+ * assume that all inodes are backed by the same queue.
+ *
+ * The inodes to be written are parked on bdi->b_io.  They are moved back onto
+ * bdi->b_dirty as they are selected for writing.  This way, none can be missed
+ * on the writer throttling path, and we get decent balancing between many
+ * throttled threads: we don't want them all piling up on inode_sync_wait.
+ */
+static void wait_sb_inodes(struct writeback_control *wbc)
+{
+	struct inode *inode, *old_inode = NULL;
+
+	/*
+	 * We need to be protected against the filesystem going from
+	 * r/o to r/w or vice versa.
+	 */
+	WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
+
+	spin_lock(&inode_lock);
+
+	/*
+	 * Data integrity sync. Must wait for all pages under writeback,
+	 * because there may have been pages dirtied before our sync
+	 * call, but which had writeout started before we write it out.
+	 * In which case, the inode may not be on the dirty list, but
+	 * we still have to wait for that writeout.
+	 */
+	list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
+		struct address_space *mapping;
+
+		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
+			continue;
+		mapping = inode->i_mapping;
+		if (mapping->nrpages == 0)
+			continue;
+		__iget(inode);
+		spin_unlock(&inode_lock);
+		/*
+		 * We hold a reference to 'inode' so it couldn't have
+		 * been removed from s_inodes list while we dropped the
+		 * inode_lock.  We cannot iput the inode now as we can
+		 * be holding the last reference and we cannot iput it
+		 * under inode_lock. So we keep the reference and iput
+		 * it later.
+		 */
+		iput(old_inode);
+		old_inode = inode;
+
+		filemap_fdatawait(mapping);
+
+		cond_resched();
+
+		spin_lock(&inode_lock);
+	}
+	spin_unlock(&inode_lock);
+	iput(old_inode);
 }
 
 /**
@@ -715,6 +1133,7 @@ restart:
 long writeback_inodes_sb(struct super_block *sb)
 {
 	struct writeback_control wbc = {
+		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
@@ -727,7 +1146,7 @@ long writeback_inodes_sb(struct super_block *sb)
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
 	wbc.nr_to_write = nr_to_write;
-	generic_sync_sb_inodes(sb, &wbc);
+	bdi_writeback_all(&wbc);
 	return nr_to_write - wbc.nr_to_write;
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
@@ -742,6 +1161,7 @@ EXPORT_SYMBOL(writeback_inodes_sb);
 long sync_inodes_sb(struct super_block *sb)
 {
 	struct writeback_control wbc = {
+		.sb		= sb,
 		.sync_mode	= WB_SYNC_ALL,
 		.range_start	= 0,
 		.range_end	= LLONG_MAX,
@@ -749,7 +1169,8 @@ long sync_inodes_sb(struct super_block *sb)
 	long nr_to_write = LONG_MAX; /* doesn't actually matter */
 
 	wbc.nr_to_write = nr_to_write;
-	generic_sync_sb_inodes(sb, &wbc);
+	bdi_writeback_all(&wbc);
+	wait_sb_inodes(&wbc);
 	return nr_to_write - wbc.nr_to_write;
 }
 EXPORT_SYMBOL(sync_inodes_sb);
diff --git a/fs/super.c b/fs/super.c
index 0d22ce3be4aa..9cda337ddae2 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -168,7 +168,7 @@ int __put_super_and_need_restart(struct super_block *sb)
  *	Drops a temporary reference, frees superblock if there's no
  *	references left.
  */
-static void put_super(struct super_block *sb)
+void put_super(struct super_block *sb)
 {
 	spin_lock(&sb_lock);
 	__put_super(sb);
diff --git a/fs/sync.c b/fs/sync.c
index 66f210476f40..103cc7fdd3df 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -120,7 +120,7 @@ restart:
  */
 SYSCALL_DEFINE0(sync)
 {
-	wakeup_pdflush(0);
+	wakeup_flusher_threads(0);
 	sync_filesystems(0);
 	sync_filesystems(1);
 	if (unlikely(laptop_mode))
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 928cd5484f4d..d045f5f615c7 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -13,6 +13,8 @@
 #include <linux/proportions.h>
 #include <linux/kernel.h>
 #include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/writeback.h>
 #include <asm/atomic.h>
 
 struct page;
@@ -23,7 +25,8 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_pdflush,		/* A pdflush thread is working this device */
+	BDI_pending,		/* On its way to being activated */
+	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
 	BDI_unused,		/* Available bits start here */
@@ -39,9 +42,22 @@ enum bdi_stat_item {
 
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
+struct bdi_writeback {
+	struct list_head list;			/* hangs off the bdi */
+
+	struct backing_dev_info *bdi;		/* our parent bdi */
+	unsigned int nr;
+
+	unsigned long last_old_flush;		/* last old data flush */
+
+	struct task_struct	*task;		/* writeback task */
+	struct list_head	b_dirty;	/* dirty inodes */
+	struct list_head	b_io;		/* parked for writeback */
+	struct list_head	b_more_io;	/* parked for more writeback */
+};
+
 struct backing_dev_info {
 	struct list_head bdi_list;
-
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -58,11 +74,15 @@ struct backing_dev_info {
 	unsigned int min_ratio;
 	unsigned int max_ratio, max_prop_frac;
 
-	struct device *dev;
+	struct bdi_writeback wb;  /* default writeback info for this bdi */
+	spinlock_t wb_lock;	  /* protects update side of wb_list */
+	struct list_head wb_list; /* the flusher threads hanging off this bdi */
+	unsigned long wb_mask;	  /* bitmask of registered tasks */
+	unsigned int wb_cnt;	  /* number of registered tasks */
 
-	struct list_head	b_dirty;	/* dirty inodes */
-	struct list_head	b_io;		/* parked for writeback */
-	struct list_head	b_more_io;	/* parked for more writeback */
+	struct list_head work_list;
+
+	struct device *dev;
 
 #ifdef CONFIG_DEBUG_FS
 	struct dentry *debug_dir;
@@ -77,10 +97,20 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
+void bdi_start_writeback(struct writeback_control *wbc);
+int bdi_writeback_task(struct bdi_writeback *wb);
+int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
-extern struct mutex bdi_lock;
+extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
 
+static inline int wb_has_dirty_io(struct bdi_writeback *wb)
+{
+	return !list_empty(&wb->b_dirty) ||
+	       !list_empty(&wb->b_io) ||
+	       !list_empty(&wb->b_more_io);
+}
+
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
 		enum bdi_stat_item item, s64 amount)
 {
@@ -270,6 +300,11 @@ static inline bool bdi_cap_swap_backed(struct backing_dev_info *bdi)
 	return bdi->capabilities & BDI_CAP_SWAP_BACKED;
 }
 
+static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
+{
+	return bdi == &default_backing_dev_info;
+}
+
 static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
 {
 	return bdi_cap_writeback_dirty(mapping->backing_dev_info);
@@ -285,4 +320,10 @@ static inline bool mapping_cap_swap_backed(struct address_space *mapping)
 	return bdi_cap_swap_backed(mapping->backing_dev_info);
 }
 
+static inline int bdi_sched_wait(void *word)
+{
+	schedule();
+	return 0;
+}
+
 #endif		/* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 56371be1be65..26da98f61116 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1786,6 +1786,7 @@ extern int get_sb_pseudo(struct file_system_type *, char *,
 	struct vfsmount *mnt);
 extern void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb);
 int __put_super_and_need_restart(struct super_block *sb);
+void put_super(struct super_block *sb);
 
 /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
 #define fops_get(fops) \
@@ -2182,7 +2183,6 @@ extern int bdev_read_only(struct block_device *);
 extern int set_blocksize(struct block_device *, int);
 extern int sb_set_blocksize(struct super_block *, int);
 extern int sb_min_blocksize(struct super_block *, int);
-extern int sb_has_dirty_inodes(struct super_block *);
 
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 07039299603d..cef75527a14c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -40,6 +40,8 @@ enum writeback_sync_modes {
 struct writeback_control {
 	struct backing_dev_info *bdi;	/* If !NULL, only write back this
 					   queue */
+	struct super_block *sb;		/* if !NULL, only write inodes from
+					   this super_block */
 	enum writeback_sync_modes sync_mode;
 	unsigned long *older_than_this;	/* If !NULL, only write back inodes
 					   older than this */
@@ -76,10 +78,13 @@ struct writeback_control {
 /*
  * fs/fs-writeback.c
  */	
-void writeback_inodes(struct writeback_control *wbc);
+struct bdi_writeback;
 int inode_wait(void *);
 long writeback_inodes_sb(struct super_block *);
 long sync_inodes_sb(struct super_block *);
+void writeback_inodes_wbc(struct writeback_control *wbc);
+long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
+void wakeup_flusher_threads(long nr_pages);
 
 /* writeback.h requires fs.h; it, too, is not included from here. */
 static inline void wait_on_inode(struct inode *inode)
@@ -99,7 +104,6 @@ static inline void inode_sync_wait(struct inode *inode)
 /*
  * mm/page-writeback.c
  */
-int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(gfp_t gfp_mask);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 6f163e0f0509..7f3fa79f25c0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1,8 +1,11 @@
 
 #include <linux/wait.h>
 #include <linux/backing-dev.h>
+#include <linux/kthread.h>
+#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/pagemap.h>
+#include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/writeback.h>
@@ -22,8 +25,18 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
-DEFINE_MUTEX(bdi_lock);
+DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
+LIST_HEAD(bdi_pending_list);
+
+static struct task_struct *sync_supers_tsk;
+static struct timer_list sync_supers_timer;
+
+static int bdi_sync_supers(void *);
+static void sync_supers_timer_fn(unsigned long);
+static void arm_supers_timer(void);
+
+static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
 
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
@@ -187,6 +200,13 @@ static int __init default_bdi_init(void)
 {
 	int err;
 
+	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
+	BUG_ON(IS_ERR(sync_supers_tsk));
+
+	init_timer(&sync_supers_timer);
+	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
+	arm_supers_timer();
+
 	err = bdi_init(&default_backing_dev_info);
 	if (!err)
 		bdi_register(&default_backing_dev_info, NULL, "default");
@@ -195,6 +215,242 @@ static int __init default_bdi_init(void)
 }
 subsys_initcall(default_bdi_init);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+	memset(wb, 0, sizeof(*wb));
+
+	wb->bdi = bdi;
+	wb->last_old_flush = jiffies;
+	INIT_LIST_HEAD(&wb->b_dirty);
+	INIT_LIST_HEAD(&wb->b_io);
+	INIT_LIST_HEAD(&wb->b_more_io);
+}
+
+static void bdi_task_init(struct backing_dev_info *bdi,
+			  struct bdi_writeback *wb)
+{
+	struct task_struct *tsk = current;
+
+	spin_lock(&bdi->wb_lock);
+	list_add_tail_rcu(&wb->list, &bdi->wb_list);
+	spin_unlock(&bdi->wb_lock);
+
+	tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(tsk, 0);
+}
+
+static int bdi_start_fn(void *ptr)
+{
+	struct bdi_writeback *wb = ptr;
+	struct backing_dev_info *bdi = wb->bdi;
+	int ret;
+
+	/*
+	 * Add us to the active bdi_list
+	 */
+	spin_lock(&bdi_lock);
+	list_add(&bdi->bdi_list, &bdi_list);
+	spin_unlock(&bdi_lock);
+
+	bdi_task_init(bdi, wb);
+
+	/*
+	 * Clear pending bit and wakeup anybody waiting to tear us down
+	 */
+	clear_bit(BDI_pending, &bdi->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&bdi->state, BDI_pending);
+
+	ret = bdi_writeback_task(wb);
+
+	/*
+	 * Remove us from the list
+	 */
+	spin_lock(&bdi->wb_lock);
+	list_del_rcu(&wb->list);
+	spin_unlock(&bdi->wb_lock);
+
+	/*
+	 * Flush any work that raced with us exiting. No new work
+	 * will be added, since this bdi isn't discoverable anymore.
+	 */
+	if (!list_empty(&bdi->work_list))
+		wb_do_writeback(wb, 1);
+
+	wb->task = NULL;
+	return ret;
+}
+
+int bdi_has_dirty_io(struct backing_dev_info *bdi)
+{
+	return wb_has_dirty_io(&bdi->wb);
+}
+
+static void bdi_flush_io(struct backing_dev_info *bdi)
+{
+	struct writeback_control wbc = {
+		.bdi			= bdi,
+		.sync_mode		= WB_SYNC_NONE,
+		.older_than_this	= NULL,
+		.range_cyclic		= 1,
+		.nr_to_write		= 1024,
+	};
+
+	writeback_inodes_wbc(&wbc);
+}
+
+/*
+ * kupdated() used to do this. We cannot do it from the bdi_forker_task()
+ * or we risk deadlocking on ->s_umount. The longer term solution would be
+ * to implement sync_supers_bdi() or similar and simply do it from the
+ * bdi writeback tasks individually.
+ */
+static int bdi_sync_supers(void *unused)
+{
+	set_user_nice(current, 0);
+
+	while (!kthread_should_stop()) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
+
+		/*
+		 * Do this periodically, like kupdated() did before.
+		 */
+		sync_supers();
+	}
+
+	return 0;
+}
+
+static void arm_supers_timer(void)
+{
+	unsigned long next;
+
+	next = msecs_to_jiffies(dirty_writeback_interval * 10) + jiffies;
+	mod_timer(&sync_supers_timer, round_jiffies_up(next));
+}
+
+static void sync_supers_timer_fn(unsigned long unused)
+{
+	wake_up_process(sync_supers_tsk);
+	arm_supers_timer();
+}
+
+static int bdi_forker_task(void *ptr)
+{
+	struct bdi_writeback *me = ptr;
+
+	bdi_task_init(me->bdi, me);
+
+	for (;;) {
+		struct backing_dev_info *bdi, *tmp;
+		struct bdi_writeback *wb;
+
+		/*
+		 * Temporary measure, we want to make sure we don't see
+		 * dirty data on the default backing_dev_info
+		 */
+		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
+			wb_do_writeback(me, 0);
+
+		spin_lock(&bdi_lock);
+
+		/*
+		 * Check if any existing bdi's have dirty data without
+		 * a thread registered. If so, set that up.
+		 */
+		list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
+			if (bdi->wb.task)
+				continue;
+			if (list_empty(&bdi->work_list) &&
+			    !bdi_has_dirty_io(bdi))
+				continue;
+
+			bdi_add_default_flusher_task(bdi);
+		}
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (list_empty(&bdi_pending_list)) {
+			unsigned long wait;
+
+			spin_unlock(&bdi_lock);
+			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
+			schedule_timeout(wait);
+			try_to_freeze();
+			continue;
+		}
+
+		__set_current_state(TASK_RUNNING);
+
+		/*
+		 * This is our real job - check for pending entries in
+		 * bdi_pending_list, and create the tasks that got added
+		 */
+		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
+				 bdi_list);
+		list_del_init(&bdi->bdi_list);
+		spin_unlock(&bdi_lock);
+
+		wb = &bdi->wb;
+		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
+					dev_name(bdi->dev));
+		/*
+		 * If task creation fails, then readd the bdi to
+		 * the pending list and force writeout of the bdi
+		 * from this forker thread. That will free some memory
+		 * and we can try again.
+		 */
+		if (IS_ERR(wb->task)) {
+			wb->task = NULL;
+
+			/*
+			 * Add this 'bdi' to the back, so we get
+			 * a chance to flush other bdi's to free
+			 * memory.
+			 */
+			spin_lock(&bdi_lock);
+			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+			spin_unlock(&bdi_lock);
+
+			bdi_flush_io(bdi);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Add the default flusher task that gets created for any bdi
+ * that has dirty data pending writeout
+ */
+void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
+{
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	/*
+	 * Check with the helper whether to proceed adding a task. Will only
+	 * abort if we two or more simultanous calls to
+	 * bdi_add_default_flusher_task() occured, further additions will block
+	 * waiting for previous additions to finish.
+	 */
+	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
+		list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+
+		/*
+		 * We are now on the pending list, wake up bdi_forker_task()
+		 * to finish the job and add us back to the active bdi_list
+		 */
+		wake_up_process(default_backing_dev_info.wb.task);
+	}
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...)
 {
@@ -213,13 +469,34 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		goto exit;
 	}
 
-	mutex_lock(&bdi_lock);
+	spin_lock(&bdi_lock);
 	list_add_tail(&bdi->bdi_list, &bdi_list);
-	mutex_unlock(&bdi_lock);
+	spin_unlock(&bdi_lock);
 
 	bdi->dev = dev;
-	bdi_debug_register(bdi, dev_name(dev));
 
+	/*
+	 * Just start the forker thread for our default backing_dev_info,
+	 * and add other bdi's to the list. They will get a thread created
+	 * on-demand when they need it.
+	 */
+	if (bdi_cap_flush_forker(bdi)) {
+		struct bdi_writeback *wb = &bdi->wb;
+
+		wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
+						dev_name(dev));
+		if (IS_ERR(wb->task)) {
+			wb->task = NULL;
+			ret = -ENOMEM;
+
+			spin_lock(&bdi_lock);
+			list_del(&bdi->bdi_list);
+			spin_unlock(&bdi_lock);
+			goto exit;
+		}
+	}
+
+	bdi_debug_register(bdi, dev_name(dev));
 exit:
 	return ret;
 }
@@ -231,17 +508,42 @@ int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev)
 }
 EXPORT_SYMBOL(bdi_register_dev);
 
-static void bdi_remove_from_list(struct backing_dev_info *bdi)
+/*
+ * Remove bdi from the global list and shutdown any threads we have running
+ */
+static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-	mutex_lock(&bdi_lock);
+	struct bdi_writeback *wb;
+
+	if (!bdi_cap_writeback_dirty(bdi))
+		return;
+
+	/*
+	 * If setup is pending, wait for that to complete first
+	 */
+	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
+			TASK_UNINTERRUPTIBLE);
+
+	/*
+	 * Make sure nobody finds us on the bdi_list anymore
+	 */
+	spin_lock(&bdi_lock);
 	list_del(&bdi->bdi_list);
-	mutex_unlock(&bdi_lock);
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * Finally, kill the kernel threads. We don't need to be RCU
+	 * safe anymore, since the bdi is gone from visibility.
+	 */
+	list_for_each_entry(wb, &bdi->wb_list, list)
+		kthread_stop(wb->task);
 }
 
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
-		bdi_remove_from_list(bdi);
+		if (!bdi_cap_flush_forker(bdi))
+			bdi_wb_shutdown(bdi);
 		bdi_debug_unregister(bdi);
 		device_unregister(bdi->dev);
 		bdi->dev = NULL;
@@ -251,18 +553,25 @@ EXPORT_SYMBOL(bdi_unregister);
 
 int bdi_init(struct backing_dev_info *bdi)
 {
-	int i;
-	int err;
+	int i, err;
 
 	bdi->dev = NULL;
 
 	bdi->min_ratio = 0;
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
+	spin_lock_init(&bdi->wb_lock);
 	INIT_LIST_HEAD(&bdi->bdi_list);
-	INIT_LIST_HEAD(&bdi->b_io);
-	INIT_LIST_HEAD(&bdi->b_dirty);
-	INIT_LIST_HEAD(&bdi->b_more_io);
+	INIT_LIST_HEAD(&bdi->wb_list);
+	INIT_LIST_HEAD(&bdi->work_list);
+
+	bdi_wb_init(&bdi->wb, bdi);
+
+	/*
+	 * Just one thread support for now, hard code mask and count
+	 */
+	bdi->wb_mask = 1;
+	bdi->wb_cnt = 1;
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
@@ -277,8 +586,6 @@ int bdi_init(struct backing_dev_info *bdi)
 err:
 		while (i--)
 			percpu_counter_destroy(&bdi->bdi_stat[i]);
-
-		bdi_remove_from_list(bdi);
 	}
 
 	return err;
@@ -289,9 +596,7 @@ void bdi_destroy(struct backing_dev_info *bdi)
 {
 	int i;
 
-	WARN_ON(!list_empty(&bdi->b_dirty));
-	WARN_ON(!list_empty(&bdi->b_io));
-	WARN_ON(!list_empty(&bdi->b_more_io));
+	WARN_ON(bdi_has_dirty_io(bdi));
 
 	bdi_unregister(bdi);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f8341b6019bf..25e7770309b8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -35,15 +35,6 @@
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
 
-/*
- * The maximum number of pages to writeout in a single bdflush/kupdate
- * operation.  We do this so we don't hold I_SYNC against an inode for
- * enormous amounts of time, which would block a userspace task which has
- * been forced to throttle against that inode.  Also, the code reevaluates
- * the dirty each time it has written this many pages.
- */
-#define MAX_WRITEBACK_PAGES	1024
-
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
  * will look to see if it needs to force writeback or throttling.
@@ -117,8 +108,6 @@ EXPORT_SYMBOL(laptop_mode);
 /* End of sysctl-exported parameters */
 
 
-static void background_writeout(unsigned long _min_pages);
-
 /*
  * Scale the writeback cache size proportional to the relative writeout speeds.
  *
@@ -326,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
 
-	mutex_lock(&bdi_lock);
+	spin_lock(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -338,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			ret = -EINVAL;
 		}
 	}
-	mutex_unlock(&bdi_lock);
+	spin_unlock(&bdi_lock);
 
 	return ret;
 }
@@ -350,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	mutex_lock(&bdi_lock);
+	spin_lock(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	mutex_unlock(&bdi_lock);
+	spin_unlock(&bdi_lock);
 
 	return ret;
 }
@@ -543,7 +532,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 		 * up.
 		 */
 		if (bdi_nr_reclaimable > bdi_thresh) {
-			writeback_inodes(&wbc);
+			writeback_inodes_wbc(&wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
@@ -572,7 +561,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
 
-		congestion_wait(BLK_RW_ASYNC, HZ/10);
+		schedule_timeout(1);
 	}
 
 	if (bdi_nr_reclaimable + bdi_nr_writeback < bdi_thresh &&
@@ -591,10 +580,18 @@ static void balance_dirty_pages(struct address_space *mapping)
 	 * background_thresh, to keep the amount of dirty memory low.
 	 */
 	if ((laptop_mode && pages_written) ||
-			(!laptop_mode && (global_page_state(NR_FILE_DIRTY)
-					  + global_page_state(NR_UNSTABLE_NFS)
-					  > background_thresh)))
-		pdflush_operation(background_writeout, 0);
+	    (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
+					  + global_page_state(NR_UNSTABLE_NFS))
+					  > background_thresh))) {
+		struct writeback_control wbc = {
+			.bdi		= bdi,
+			.sync_mode	= WB_SYNC_NONE,
+			.nr_to_write	= nr_writeback,
+		};
+
+
+		bdi_start_writeback(&wbc);
+	}
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
@@ -678,124 +675,10 @@ void throttle_vm_writeout(gfp_t gfp_mask)
         }
 }
 
-/*
- * writeback at least _min_pages, and keep writing until the amount of dirty
- * memory is less than the background threshold, or until we're all clean.
- */
-static void background_writeout(unsigned long _min_pages)
-{
-	long min_pages = _min_pages;
-	struct writeback_control wbc = {
-		.bdi		= NULL,
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = NULL,
-		.nr_to_write	= 0,
-		.nonblocking	= 1,
-		.range_cyclic	= 1,
-	};
-
-	for ( ; ; ) {
-		unsigned long background_thresh;
-		unsigned long dirty_thresh;
-
-		get_dirty_limits(&background_thresh, &dirty_thresh, NULL, NULL);
-		if (global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) < background_thresh
-				&& min_pages <= 0)
-			break;
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		wbc.pages_skipped = 0;
-		writeback_inodes(&wbc);
-		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
-			/* Wrote less than expected */
-			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
-			else
-				break;
-		}
-	}
-}
-
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.  Returns 0 if a pdflush thread was dispatched.  Returns
- * -1 if all pdflush threads were busy.
- */
-int wakeup_pdflush(long nr_pages)
-{
-	if (nr_pages == 0)
-		nr_pages = global_page_state(NR_FILE_DIRTY) +
-				global_page_state(NR_UNSTABLE_NFS);
-	return pdflush_operation(background_writeout, nr_pages);
-}
-
-static void wb_timer_fn(unsigned long unused);
 static void laptop_timer_fn(unsigned long unused);
 
-static DEFINE_TIMER(wb_timer, wb_timer_fn, 0, 0);
 static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
 
-/*
- * Periodic writeback of "old" data.
- *
- * Define "old": the first time one of an inode's pages is dirtied, we mark the
- * dirtying-time in the inode's address_space.  So this periodic writeback code
- * just walks the superblock inode list, writing back any inodes which are
- * older than a specific point in time.
- *
- * Try to run once per dirty_writeback_interval.  But if a writeback event
- * takes longer than a dirty_writeback_interval interval, then leave a
- * one-second gap.
- *
- * older_than_this takes precedence over nr_to_write.  So we'll only write back
- * all dirty pages if they are all attached to "old" mappings.
- */
-static void wb_kupdate(unsigned long arg)
-{
-	unsigned long oldest_jif;
-	unsigned long start_jif;
-	unsigned long next_jif;
-	long nr_to_write;
-	struct writeback_control wbc = {
-		.bdi		= NULL,
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = &oldest_jif,
-		.nr_to_write	= 0,
-		.nonblocking	= 1,
-		.for_kupdate	= 1,
-		.range_cyclic	= 1,
-	};
-
-	sync_supers();
-
-	oldest_jif = jiffies - msecs_to_jiffies(dirty_expire_interval * 10);
-	start_jif = jiffies;
-	next_jif = start_jif + msecs_to_jiffies(dirty_writeback_interval * 10);
-	nr_to_write = global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-	while (nr_to_write > 0) {
-		wbc.more_io = 0;
-		wbc.encountered_congestion = 0;
-		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
-		writeback_inodes(&wbc);
-		if (wbc.nr_to_write > 0) {
-			if (wbc.encountered_congestion || wbc.more_io)
-				congestion_wait(BLK_RW_ASYNC, HZ/10);
-			else
-				break;	/* All the old data is written */
-		}
-		nr_to_write -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
-	}
-	if (time_before(next_jif, jiffies + HZ))
-		next_jif = jiffies + HZ;
-	if (dirty_writeback_interval)
-		mod_timer(&wb_timer, next_jif);
-}
-
 /*
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
@@ -803,28 +686,24 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
 	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
 {
 	proc_dointvec(table, write, file, buffer, length, ppos);
-	if (dirty_writeback_interval)
-		mod_timer(&wb_timer, jiffies +
-			msecs_to_jiffies(dirty_writeback_interval * 10));
-	else
-		del_timer(&wb_timer);
 	return 0;
 }
 
-static void wb_timer_fn(unsigned long unused)
-{
-	if (pdflush_operation(wb_kupdate, 0) < 0)
-		mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
-}
-
-static void laptop_flush(unsigned long unused)
+static void do_laptop_sync(struct work_struct *work)
 {
-	sys_sync();
+	wakeup_flusher_threads(0);
+	kfree(work);
 }
 
 static void laptop_timer_fn(unsigned long unused)
 {
-	pdflush_operation(laptop_flush, 0);
+	struct work_struct *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work) {
+		INIT_WORK(work, do_laptop_sync);
+		schedule_work(work);
+	}
 }
 
 /*
@@ -907,8 +786,6 @@ void __init page_writeback_init(void)
 {
 	int shift;
 
-	mod_timer(&wb_timer,
-		  jiffies + msecs_to_jiffies(dirty_writeback_interval * 10));
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 94e86dd6954c..ba8228e0a806 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1720,7 +1720,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		 */
 		if (total_scanned > sc->swap_cluster_max +
 					sc->swap_cluster_max / 2) {
-			wakeup_pdflush(laptop_mode ? 0 : total_scanned);
+			wakeup_flusher_threads(laptop_mode ? 0 : total_scanned);
 			sc->may_writepage = 1;
 		}
 
-- 
cgit v1.2.3


From d0bceac747b547c0b4769b91fec7d3c15600153f Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 18 May 2009 08:20:32 +0200
Subject: writeback: get rid of pdflush completely

It is now unused, so kill it off.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c         |   5 +
 include/linux/writeback.h |  12 ---
 mm/Makefile               |   2 +-
 mm/pdflush.c              | 269 ----------------------------------------------
 4 files changed, 6 insertions(+), 282 deletions(-)
 delete mode 100644 mm/pdflush.c

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7f6dae8aa47f..2e601ce581c9 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -29,6 +29,11 @@
 
 #define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
 
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /*
  * Work items for the bdi_writeback threads
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index cef75527a14c..78b1e4684cc9 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -13,17 +13,6 @@ extern spinlock_t inode_lock;
 extern struct list_head inode_in_use;
 extern struct list_head inode_unused;
 
-/*
- * Yes, writeback.h requires sched.h
- * No, sched.h is not included from here.
- */
-static inline int task_is_pdflush(struct task_struct *task)
-{
-	return task->flags & PF_FLUSHER;
-}
-
-#define current_is_pdflush()	task_is_pdflush(current)
-
 /*
  * fs/fs-writeback.c
  */
@@ -155,7 +144,6 @@ balance_dirty_pages_ratelimited(struct address_space *mapping)
 typedef int (*writepage_t)(struct page *page, struct writeback_control *wbc,
 				void *data);
 
-int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
 int generic_writepages(struct address_space *mapping,
 		       struct writeback_control *wbc);
 int write_cache_pages(struct address_space *mapping,
diff --git a/mm/Makefile b/mm/Makefile
index 5e0bd6426693..147a7a7873c4 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
 			   vmalloc.o
 
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
-			   maccess.o page_alloc.o page-writeback.o pdflush.o \
+			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
 			   page_isolation.o mm_init.o $(mmu-y)
diff --git a/mm/pdflush.c b/mm/pdflush.c
deleted file mode 100644
index 235ac440c44e..000000000000
--- a/mm/pdflush.c
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * mm/pdflush.c - worker threads for writing back filesystem data
- *
- * Copyright (C) 2002, Linus Torvalds.
- *
- * 09Apr2002	Andrew Morton
- *		Initial version
- * 29Feb2004	kaos@sgi.com
- *		Move worker thread creation to kthread to avoid chewing
- *		up stack space with nested calls to kernel_thread.
- */
-
-#include <linux/sched.h>
-#include <linux/list.h>
-#include <linux/signal.h>
-#include <linux/spinlock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/fs.h>		/* Needed by writeback.h	  */
-#include <linux/writeback.h>	/* Prototypes pdflush_operation() */
-#include <linux/kthread.h>
-#include <linux/cpuset.h>
-#include <linux/freezer.h>
-
-
-/*
- * Minimum and maximum number of pdflush instances
- */
-#define MIN_PDFLUSH_THREADS	2
-#define MAX_PDFLUSH_THREADS	8
-
-static void start_one_pdflush_thread(void);
-
-
-/*
- * The pdflush threads are worker threads for writing back dirty data.
- * Ideally, we'd like one thread per active disk spindle.  But the disk
- * topology is very hard to divine at this level.   Instead, we take
- * care in various places to prevent more than one pdflush thread from
- * performing writeback against a single filesystem.  pdflush threads
- * have the PF_FLUSHER flag set in current->flags to aid in this.
- */
-
-/*
- * All the pdflush threads.  Protected by pdflush_lock
- */
-static LIST_HEAD(pdflush_list);
-static DEFINE_SPINLOCK(pdflush_lock);
-
-/*
- * The count of currently-running pdflush threads.  Protected
- * by pdflush_lock.
- *
- * Readable by sysctl, but not writable.  Published to userspace at
- * /proc/sys/vm/nr_pdflush_threads.
- */
-int nr_pdflush_threads = 0;
-
-/*
- * The time at which the pdflush thread pool last went empty
- */
-static unsigned long last_empty_jifs;
-
-/*
- * The pdflush thread.
- *
- * Thread pool management algorithm:
- * 
- * - The minimum and maximum number of pdflush instances are bound
- *   by MIN_PDFLUSH_THREADS and MAX_PDFLUSH_THREADS.
- * 
- * - If there have been no idle pdflush instances for 1 second, create
- *   a new one.
- * 
- * - If the least-recently-went-to-sleep pdflush thread has been asleep
- *   for more than one second, terminate a thread.
- */
-
-/*
- * A structure for passing work to a pdflush thread.  Also for passing
- * state information between pdflush threads.  Protected by pdflush_lock.
- */
-struct pdflush_work {
-	struct task_struct *who;	/* The thread */
-	void (*fn)(unsigned long);	/* A callback function */
-	unsigned long arg0;		/* An argument to the callback */
-	struct list_head list;		/* On pdflush_list, when idle */
-	unsigned long when_i_went_to_sleep;
-};
-
-static int __pdflush(struct pdflush_work *my_work)
-{
-	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
-	set_freezable();
-	my_work->fn = NULL;
-	my_work->who = current;
-	INIT_LIST_HEAD(&my_work->list);
-
-	spin_lock_irq(&pdflush_lock);
-	for ( ; ; ) {
-		struct pdflush_work *pdf;
-
-		set_current_state(TASK_INTERRUPTIBLE);
-		list_move(&my_work->list, &pdflush_list);
-		my_work->when_i_went_to_sleep = jiffies;
-		spin_unlock_irq(&pdflush_lock);
-		schedule();
-		try_to_freeze();
-		spin_lock_irq(&pdflush_lock);
-		if (!list_empty(&my_work->list)) {
-			/*
-			 * Someone woke us up, but without removing our control
-			 * structure from the global list.  swsusp will do this
-			 * in try_to_freeze()->refrigerator().  Handle it.
-			 */
-			my_work->fn = NULL;
-			continue;
-		}
-		if (my_work->fn == NULL) {
-			printk("pdflush: bogus wakeup\n");
-			continue;
-		}
-		spin_unlock_irq(&pdflush_lock);
-
-		(*my_work->fn)(my_work->arg0);
-
-		spin_lock_irq(&pdflush_lock);
-
-		/*
-		 * Thread creation: For how long have there been zero
-		 * available threads?
-		 *
-		 * To throttle creation, we reset last_empty_jifs.
-		 */
-		if (time_after(jiffies, last_empty_jifs + 1 * HZ)) {
-			if (list_empty(&pdflush_list)) {
-				if (nr_pdflush_threads < MAX_PDFLUSH_THREADS) {
-					last_empty_jifs = jiffies;
-					nr_pdflush_threads++;
-					spin_unlock_irq(&pdflush_lock);
-					start_one_pdflush_thread();
-					spin_lock_irq(&pdflush_lock);
-				}
-			}
-		}
-
-		my_work->fn = NULL;
-
-		/*
-		 * Thread destruction: For how long has the sleepiest
-		 * thread slept?
-		 */
-		if (list_empty(&pdflush_list))
-			continue;
-		if (nr_pdflush_threads <= MIN_PDFLUSH_THREADS)
-			continue;
-		pdf = list_entry(pdflush_list.prev, struct pdflush_work, list);
-		if (time_after(jiffies, pdf->when_i_went_to_sleep + 1 * HZ)) {
-			/* Limit exit rate */
-			pdf->when_i_went_to_sleep = jiffies;
-			break;					/* exeunt */
-		}
-	}
-	nr_pdflush_threads--;
-	spin_unlock_irq(&pdflush_lock);
-	return 0;
-}
-
-/*
- * Of course, my_work wants to be just a local in __pdflush().  It is
- * separated out in this manner to hopefully prevent the compiler from
- * performing unfortunate optimisations against the auto variables.  Because
- * these are visible to other tasks and CPUs.  (No problem has actually
- * been observed.  This is just paranoia).
- */
-static int pdflush(void *dummy)
-{
-	struct pdflush_work my_work;
-	cpumask_var_t cpus_allowed;
-
-	/*
-	 * Since the caller doesn't even check kthread_run() worked, let's not
-	 * freak out too much if this fails.
-	 */
-	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
-		printk(KERN_WARNING "pdflush failed to allocate cpumask\n");
-		return 0;
-	}
-
-	/*
-	 * pdflush can spend a lot of time doing encryption via dm-crypt.  We
-	 * don't want to do that at keventd's priority.
-	 */
-	set_user_nice(current, 0);
-
-	/*
-	 * Some configs put our parent kthread in a limited cpuset,
-	 * which kthread() overrides, forcing cpus_allowed == cpu_all_mask.
-	 * Our needs are more modest - cut back to our cpusets cpus_allowed.
-	 * This is needed as pdflush's are dynamically created and destroyed.
-	 * The boottime pdflush's are easily placed w/o these 2 lines.
-	 */
-	cpuset_cpus_allowed(current, cpus_allowed);
-	set_cpus_allowed_ptr(current, cpus_allowed);
-	free_cpumask_var(cpus_allowed);
-
-	return __pdflush(&my_work);
-}
-
-/*
- * Attempt to wake up a pdflush thread, and get it to do some work for you.
- * Returns zero if it indeed managed to find a worker thread, and passed your
- * payload to it.
- */
-int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0)
-{
-	unsigned long flags;
-	int ret = 0;
-
-	BUG_ON(fn == NULL);	/* Hard to diagnose if it's deferred */
-
-	spin_lock_irqsave(&pdflush_lock, flags);
-	if (list_empty(&pdflush_list)) {
-		ret = -1;
-	} else {
-		struct pdflush_work *pdf;
-
-		pdf = list_entry(pdflush_list.next, struct pdflush_work, list);
-		list_del_init(&pdf->list);
-		if (list_empty(&pdflush_list))
-			last_empty_jifs = jiffies;
-		pdf->fn = fn;
-		pdf->arg0 = arg0;
-		wake_up_process(pdf->who);
-	}
-	spin_unlock_irqrestore(&pdflush_lock, flags);
-
-	return ret;
-}
-
-static void start_one_pdflush_thread(void)
-{
-	struct task_struct *k;
-
-	k = kthread_run(pdflush, NULL, "pdflush");
-	if (unlikely(IS_ERR(k))) {
-		spin_lock_irq(&pdflush_lock);
-		nr_pdflush_threads--;
-		spin_unlock_irq(&pdflush_lock);
-	}
-}
-
-static int __init pdflush_init(void)
-{
-	int i;
-
-	/*
-	 * Pre-set nr_pdflush_threads...  If we fail to create,
-	 * the count will be decremented.
-	 */
-	nr_pdflush_threads = MIN_PDFLUSH_THREADS;
-
-	for (i = 0; i < MIN_PDFLUSH_THREADS; i++)
-		start_one_pdflush_thread();
-	return 0;
-}
-
-module_init(pdflush_init);
-- 
cgit v1.2.3


From d993831fa7ffeb89e994f046f93eeb09ec91df08 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 12 Jun 2009 14:45:52 +0200
Subject: writeback: add name to backing_dev_info

This enables us to track who does what and print info. Its main use
is catching dirty inodes on the default_backing_dev_info, so we can
fix that up.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c            | 1 +
 drivers/block/aoe/aoeblk.c  | 1 +
 drivers/char/mem.c          | 1 +
 fs/btrfs/disk-io.c          | 1 +
 fs/char_dev.c               | 1 +
 fs/configfs/inode.c         | 1 +
 fs/fuse/inode.c             | 1 +
 fs/hugetlbfs/inode.c        | 1 +
 fs/nfs/client.c             | 1 +
 fs/ocfs2/dlm/dlmfs.c        | 1 +
 fs/ramfs/inode.c            | 1 +
 fs/sysfs/inode.c            | 1 +
 fs/ubifs/super.c            | 1 +
 include/linux/backing-dev.h | 2 ++
 kernel/cgroup.c             | 1 +
 mm/backing-dev.c            | 1 +
 mm/swap_state.c             | 1 +
 17 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a77a0d8..e695634882a6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -501,6 +501,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 			(VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
+	q->backing_dev_info.name = "block";
 
 	err = bdi_init(&q->backing_dev_info);
 	if (err) {
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 1e15889c4b98..95d344971eda 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -268,6 +268,7 @@ aoeblk_gdalloc(void *vp)
 	if (!d->blkq)
 		goto err_mempool;
 	blk_queue_make_request(d->blkq, aoeblk_make_request);
+	d->blkq->backing_dev_info.name = "aoe";
 	if (bdi_init(&d->blkq->backing_dev_info))
 		goto err_blkq;
 	spin_lock_irqsave(&d->lock, flags);
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index afa8813e737a..645237bda682 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -822,6 +822,7 @@ static const struct file_operations zero_fops = {
  * - permits private mappings, "copies" are taken of the source of zeros
  */
 static struct backing_dev_info zero_bdi = {
+	.name		= "char/mem",
 	.capabilities	= BDI_CAP_MAP_COPY,
 };
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e83be2e4602c..15831d5c7367 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1352,6 +1352,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
 {
 	int err;
 
+	bdi->name = "btrfs";
 	bdi->capabilities = BDI_CAP_MAP_COPY;
 	err = bdi_init(bdi);
 	if (err)
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a173551e19d7..7c27a8ebef6a 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -31,6 +31,7 @@
  * - no readahead or I/O queue unplugging required
  */
 struct backing_dev_info directly_mappable_cdev_bdi = {
+	.name = "char",
 	.capabilities	= (
 #ifdef CONFIG_MMU
 		/* permit private copies of the data to be taken */
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4921e7426d95..a2f746066c5d 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -51,6 +51,7 @@ static const struct address_space_operations configfs_aops = {
 };
 
 static struct backing_dev_info configfs_backing_dev_info = {
+	.name		= "configfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index f91ccc4a189d..4567db6f9430 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -801,6 +801,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
 {
 	int err;
 
+	fc->bdi.name = "fuse";
 	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	fc->bdi.unplug_io_fn = default_unplug_io_fn;
 	/* fuse does it's own writeback accounting */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index cb88dac8ccaa..a93b885311d8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -44,6 +44,7 @@ static const struct inode_operations hugetlbfs_dir_inode_operations;
 static const struct inode_operations hugetlbfs_inode_operations;
 
 static struct backing_dev_info hugetlbfs_backing_dev_info = {
+	.name		= "hugetlbfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8d25ccb2d51d..c6be84a161f6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -879,6 +879,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *
 		server->rsize = NFS_MAX_FILE_IO_SIZE;
 	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
+	server->backing_dev_info.name = "nfs";
 	server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
 
 	if (server->wsize > max_rpc_payload)
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 1c9efb406a96..02bf17808bdc 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -325,6 +325,7 @@ clear_fields:
 }
 
 static struct backing_dev_info dlmfs_backing_dev_info = {
+	.name		= "ocfs2-dlmfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0ff7566c767c..a7f0110fca4c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -46,6 +46,7 @@ static const struct super_operations ramfs_ops;
 static const struct inode_operations ramfs_dir_inode_operations;
 
 static struct backing_dev_info ramfs_backing_dev_info = {
+	.name		= "ramfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK |
 			  BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 555f0ff988df..e57f98e54fce 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -29,6 +29,7 @@ static const struct address_space_operations sysfs_aops = {
 };
 
 static struct backing_dev_info sysfs_backing_dev_info = {
+	.name		= "sysfs",
 	.ra_pages	= 0,	/* No readahead */
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 8d6050a5966c..51763aa8f4de 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1965,6 +1965,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	 *
 	 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
 	 */
+	c->bdi.name = "ubifs",
 	c->bdi.capabilities = BDI_CAP_MAP_COPY;
 	c->bdi.unplug_io_fn = default_unplug_io_fn;
 	err  = bdi_init(&c->bdi);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d045f5f615c7..2f218b7cb063 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -66,6 +66,8 @@ struct backing_dev_info {
 	void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
 	void *unplug_io_data;
 
+	char *name;
+
 	struct percpu_counter bdi_stat[NR_BDI_STAT_ITEMS];
 
 	struct prop_local_percpu completions;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index b6eadfe30e7b..c7ece8f027f2 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -600,6 +600,7 @@ static struct inode_operations cgroup_dir_inode_operations;
 static struct file_operations proc_cgroupstats_operations;
 
 static struct backing_dev_info cgroup_backing_dev_info = {
+	.name		= "cgroup",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK,
 };
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 22c45e932e3a..5cb32c5b93d8 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -17,6 +17,7 @@ void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 EXPORT_SYMBOL(default_unplug_io_fn);
 
 struct backing_dev_info default_backing_dev_info = {
+	.name		= "default",
 	.ra_pages	= VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE,
 	.state		= 0,
 	.capabilities	= BDI_CAP_MAP_COPY,
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 42cd38eba79f..5ae6b8b78c80 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -34,6 +34,7 @@ static const struct address_space_operations swap_aops = {
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
+	.name		= "swap",
 	.capabilities	= BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED,
 	.unplug_io_fn	= swap_unplug_io_fn,
 };
-- 
cgit v1.2.3


From 500b067c5e6ceea49cf280a02597b1169320e08c Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 9 Sep 2009 09:10:25 +0200
Subject: writeback: check for registered bdi in flusher add and inode dirty

Also a debugging aid. We want to catch dirty inodes being added to
backing devices that don't do writeback.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 8 ++++++++
 include/linux/backing-dev.h | 1 +
 mm/backing-dev.c            | 7 +++++++
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 2e601ce581c9..da86ef58e427 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1046,6 +1046,14 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 */
 		if (!was_dirty) {
 			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
+			struct backing_dev_info *bdi = wb->bdi;
+
+			if (bdi_cap_writeback_dirty(bdi) &&
+			    !test_bit(BDI_registered, &bdi->state)) {
+				WARN_ON(1);
+				printk(KERN_ERR "bdi-%s not registered\n",
+								bdi->name);
+			}
 
 			inode->dirtied_when = jiffies;
 			list_move(&inode->i_list, &wb->b_dirty);
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2f218b7cb063..f169bcb90b58 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -29,6 +29,7 @@ enum bdi_state {
 	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
+	BDI_registered,		/* bdi_register() was done */
 	BDI_unused,		/* Available bits start here */
 };
 
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 5cb32c5b93d8..d3ca0dac1111 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -465,6 +465,12 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	if (!bdi_cap_writeback_dirty(bdi))
 		return;
 
+	if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
+		printk(KERN_ERR "bdi %p/%s is not registered!\n",
+							bdi, bdi->name);
+		return;
+	}
+
 	/*
 	 * Check with the helper whether to proceed adding a task. Will only
 	 * abort if we two or more simultanous calls to
@@ -528,6 +534,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
+	set_bit(BDI_registered, &bdi->state);
 exit:
 	return ret;
 }
-- 
cgit v1.2.3


From a82afdfcb8c0df09776b6458af6b68fc58b2e87b Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Jul 2009 17:48:16 +0900
Subject: block: use the same failfast bits for bio and request

bio and request use the same set of failfast bits.  This patch makes
the following changes to simplify things.

* enumify BIO_RW* bits and reorder bits such that BIOS_RW_FAILFAST_*
  bits coincide with __REQ_FAILFAST_* bits.

* The above pushes BIO_RW_AHEAD out of sync with __REQ_FAILFAST_DEV
  but the matching is useless anyway.  init_request_from_bio() is
  responsible for setting FAILFAST bits on FS requests and non-FS
  requests never use BIO_RW_AHEAD.  Drop the code and comment from
  blk_rq_bio_prep().

* Define REQ_FAILFAST_MASK which is OR of all FAILFAST bits and
  simplify FAILFAST flags handling in init_request_from_bio().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 19 +++++++------------
 include/linux/bio.h    | 43 +++++++++++++++++++++++--------------------
 include/linux/blkdev.h |  4 ++++
 3 files changed, 34 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index e3299a77a0d8..4daae1ee2b23 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1111,17 +1111,13 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	req->cmd_type = REQ_TYPE_FS;
 
 	/*
-	 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST)
+	 * Inherit FAILFAST from bio (for read-ahead, and explicit
+	 * FAILFAST).  FAILFAST flags are identical for req and bio.
 	 */
 	if (bio_rw_ahead(bio))
-		req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT |
-				   REQ_FAILFAST_DRIVER);
-	if (bio_failfast_dev(bio))
-		req->cmd_flags |= REQ_FAILFAST_DEV;
-	if (bio_failfast_transport(bio))
-		req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
-	if (bio_failfast_driver(bio))
-		req->cmd_flags |= REQ_FAILFAST_DRIVER;
+		req->cmd_flags |= REQ_FAILFAST_MASK;
+	else
+		req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
 
 	if (unlikely(bio_discard(bio))) {
 		req->cmd_flags |= REQ_DISCARD;
@@ -2239,9 +2235,8 @@ EXPORT_SYMBOL(__blk_end_request_cur);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
-	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and
-	   we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */
-	rq->cmd_flags |= (bio->bi_rw & 3);
+	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
+	rq->cmd_flags |= bio->bi_rw & REQ_RW;
 
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2892b710771c..a299ed38fcd7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -142,37 +142,40 @@ struct bio {
  *
  * bit 0 -- data direction
  *	If not set, bio is a read from device. If set, it's a write to device.
- * bit 1 -- rw-ahead when set
- * bit 2 -- barrier
+ * bit 1 -- fail fast device errors
+ * bit 2 -- fail fast transport errors
+ * bit 3 -- fail fast driver errors
+ * bit 4 -- rw-ahead when set
+ * bit 5 -- barrier
  *	Insert a serialization point in the IO queue, forcing previously
  *	submitted IO to be completed before this one is issued.
- * bit 3 -- synchronous I/O hint.
- * bit 4 -- Unplug the device immediately after submitting this bio.
- * bit 5 -- metadata request
+ * bit 6 -- synchronous I/O hint.
+ * bit 7 -- Unplug the device immediately after submitting this bio.
+ * bit 8 -- metadata request
  *	Used for tracing to differentiate metadata and data IO. May also
  *	get some preferential treatment in the IO scheduler
- * bit 6 -- discard sectors
+ * bit 9 -- discard sectors
  *	Informs the lower level device that this range of sectors is no longer
  *	used by the file system and may thus be freed by the device. Used
  *	for flash based storage.
- * bit 7 -- fail fast device errors
- * bit 8 -- fail fast transport errors
- * bit 9 -- fail fast driver errors
  *	Don't want driver retries for any fast fail whatever the reason.
  * bit 10 -- Tell the IO scheduler not to wait for more requests after this
 	one has been submitted, even if it is a SYNC request.
  */
-#define BIO_RW		0	/* Must match RW in req flags (blkdev.h) */
-#define BIO_RW_AHEAD	1	/* Must match FAILFAST in req flags */
-#define BIO_RW_BARRIER	2
-#define BIO_RW_SYNCIO	3
-#define BIO_RW_UNPLUG	4
-#define BIO_RW_META	5
-#define BIO_RW_DISCARD	6
-#define BIO_RW_FAILFAST_DEV		7
-#define BIO_RW_FAILFAST_TRANSPORT	8
-#define BIO_RW_FAILFAST_DRIVER		9
-#define BIO_RW_NOIDLE	10
+enum bio_rw_flags {
+	BIO_RW,
+	BIO_RW_FAILFAST_DEV,
+	BIO_RW_FAILFAST_TRANSPORT,
+	BIO_RW_FAILFAST_DRIVER,
+	/* above flags must match REQ_* */
+	BIO_RW_AHEAD,
+	BIO_RW_BARRIER,
+	BIO_RW_SYNCIO,
+	BIO_RW_UNPLUG,
+	BIO_RW_META,
+	BIO_RW_DISCARD,
+	BIO_RW_NOIDLE,
+};
 
 #define bio_rw_flagged(bio, flag)	((bio)->bi_rw & (1 << (flag)))
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69103e053c92..c3015736d814 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -93,6 +93,7 @@ enum rq_flag_bits {
 	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
 	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
+	/* above flags must match BIO_RW_* */
 	__REQ_DISCARD,		/* request to discard sectors */
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
@@ -143,6 +144,9 @@ enum rq_flag_bits {
 #define REQ_NOIDLE	(1 << __REQ_NOIDLE)
 #define REQ_IO_STAT	(1 << __REQ_IO_STAT)
 
+#define REQ_FAILFAST_MASK	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
+				 REQ_FAILFAST_DRIVER)
+
 #define BLK_MAX_CDB	16
 
 /*
-- 
cgit v1.2.3


From 80a761fd33cf812f771e212139157bf8f58d4b3f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 3 Jul 2009 17:48:17 +0900
Subject: block: implement mixed merge of different failfast requests

Failfast has characteristics from other attributes.  When issuing,
executing and successuflly completing requests, failfast doesn't make
any difference.  It only affects how a request is handled on failure.
Allowing requests with different failfast settings to be merged cause
normal IOs to fail prematurely while not allowing has performance
penalties as failfast is used for read aheads which are likely to be
located near in-flight or to-be-issued normal IOs.

This patch introduces the concept of 'mixed merge'.  A request is a
mixed merge if it is merge of segments which require different
handling on failure.  Currently the only mixable attributes are
failfast ones (or lack thereof).

When a bio with different failfast settings is added to an existing
request or requests of different failfast settings are merged, the
merged request is marked mixed.  Each bio carries failfast settings
and the request always tracks failfast state of the first bio.  When
the request fails, blk_rq_err_bytes() can be used to determine how
many bytes can be safely failed without crossing into an area which
requires further retrials.

This allows request merging regardless of failfast settings while
keeping the failure handling correct.

This patch only implements mixed merge but doesn't enable it.  The
next one will update SCSI to make use of mixed merge.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Niel Lambrechts <niel.lambrechts@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-merge.c      | 43 ++++++++++++++++++++++
 block/blk.h            |  1 +
 include/linux/blkdev.h | 23 +++++++++---
 4 files changed, 161 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 4daae1ee2b23..c822239bcc9d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1157,6 +1157,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	const unsigned short prio = bio_prio(bio);
 	const int sync = bio_sync(bio);
 	const int unplug = bio_unplug(bio);
+	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 
 	if (bio_barrier(bio) && bio_has_data(bio) &&
@@ -1186,6 +1187,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		trace_block_bio_backmerge(q, bio);
 
+		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
+			blk_rq_set_mixed_merge(req);
+
 		req->biotail->bi_next = bio;
 		req->biotail = bio;
 		req->__data_len += bytes;
@@ -1205,6 +1209,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 		trace_block_bio_frontmerge(q, bio);
 
+		if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
+			blk_rq_set_mixed_merge(req);
+			req->cmd_flags &= ~REQ_FAILFAST_MASK;
+			req->cmd_flags |= ff;
+		}
+
 		bio->bi_next = req->bio;
 		req->bio = bio;
 
@@ -1649,6 +1659,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
+/**
+ * blk_rq_err_bytes - determine number of bytes till the next failure boundary
+ * @rq: request to examine
+ *
+ * Description:
+ *     A request could be merge of IOs which require different failure
+ *     handling.  This function determines the number of bytes which
+ *     can be failed from the beginning of the request without
+ *     crossing into area which need to be retried further.
+ *
+ * Return:
+ *     The number of bytes to fail.
+ *
+ * Context:
+ *     queue_lock must be held.
+ */
+unsigned int blk_rq_err_bytes(const struct request *rq)
+{
+	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+	unsigned int bytes = 0;
+	struct bio *bio;
+
+	if (!(rq->cmd_flags & REQ_MIXED_MERGE))
+		return blk_rq_bytes(rq);
+
+	/*
+	 * Currently the only 'mixing' which can happen is between
+	 * different fastfail types.  We can safely fail portions
+	 * which have all the failfast bits that the first one has -
+	 * the ones which are at least as eager to fail as the first
+	 * one.
+	 */
+	for (bio = rq->bio; bio; bio = bio->bi_next) {
+		if ((bio->bi_rw & ff) != ff)
+			break;
+		bytes += bio->bi_size;
+	}
+
+	/* this could lead to infinite loop */
+	BUG_ON(blk_rq_bytes(rq) && !bytes);
+	return bytes;
+}
+EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
+
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
 {
 	if (blk_do_io_stat(req)) {
@@ -1995,6 +2049,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
 	if (blk_fs_request(req) || blk_discard_rq(req))
 		req->__sector += total_bytes >> 9;
 
+	/* mixed attributes always follow the first bio */
+	if (req->cmd_flags & REQ_MIXED_MERGE) {
+		req->cmd_flags &= ~REQ_FAILFAST_MASK;
+		req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
+	}
+
 	/*
 	 * If total number of sectors is less than the first segment
 	 * size, something has gone terribly wrong.
@@ -2173,6 +2233,25 @@ bool blk_end_request_cur(struct request *rq, int error)
 }
 EXPORT_SYMBOL(blk_end_request_cur);
 
+/**
+ * blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ *     Complete @rq till the next failure boundary.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool blk_end_request_err(struct request *rq, int error)
+{
+	WARN_ON(error >= 0);
+	return blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(blk_end_request_err);
+
 /**
  * __blk_end_request - Helper function for drivers to complete the request.
  * @rq:       the request being processed
@@ -2232,6 +2311,26 @@ bool __blk_end_request_cur(struct request *rq, int error)
 }
 EXPORT_SYMBOL(__blk_end_request_cur);
 
+/**
+ * __blk_end_request_err - Finish a request till the next failure boundary.
+ * @rq: the request to finish till the next failure boundary for
+ * @error: must be negative errno
+ *
+ * Description:
+ *     Complete @rq till the next failure boundary.  Must be called
+ *     with queue lock held.
+ *
+ * Return:
+ *     %false - we are done with this request
+ *     %true  - still buffers pending for this request
+ */
+bool __blk_end_request_err(struct request *rq, int error)
+{
+	WARN_ON(error >= 0);
+	return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
+}
+EXPORT_SYMBOL_GPL(__blk_end_request_err);
+
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 		     struct bio *bio)
 {
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e1999679a4d5..7c9ca01baa45 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	return 1;
 }
 
+/**
+ * blk_rq_set_mixed_merge - mark a request as mixed merge
+ * @rq: request to mark as mixed merge
+ *
+ * Description:
+ *     @rq is about to be mixed merged.  Make sure the attributes
+ *     which can be mixed are set in each bio and mark @rq as mixed
+ *     merged.
+ */
+void blk_rq_set_mixed_merge(struct request *rq)
+{
+	unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
+	struct bio *bio;
+
+	if (rq->cmd_flags & REQ_MIXED_MERGE)
+		return;
+
+	/*
+	 * @rq will no longer represent mixable attributes for all the
+	 * contained bios.  It will just track those of the first one.
+	 * Distributes the attributs to each bio.
+	 */
+	for (bio = rq->bio; bio; bio = bio->bi_next) {
+		WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
+			     (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
+		bio->bi_rw |= ff;
+	}
+	rq->cmd_flags |= REQ_MIXED_MERGE;
+}
+
 static void blk_account_io_merge(struct request *req)
 {
 	if (blk_do_io_stat(req)) {
@@ -365,6 +395,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
 	if (!ll_merge_requests_fn(q, req, next))
 		return 0;
 
+	/*
+	 * If failfast settings disagree or any of the two is already
+	 * a mixed merge, mark both as mixed before proceeding.  This
+	 * makes sure that all involved bios have mixable attributes
+	 * set properly.
+	 */
+	if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
+	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
+	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
+		blk_rq_set_mixed_merge(req);
+		blk_rq_set_mixed_merge(next);
+	}
+
 	/*
 	 * At this point we have either done a back merge
 	 * or front merge. We need the smaller start_time of
diff --git a/block/blk.h b/block/blk.h
index 3fae6add5430..5ee3d7e72feb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
 int attempt_back_merge(struct request_queue *q, struct request *rq);
 int attempt_front_merge(struct request_queue *q, struct request *rq);
 void blk_recalc_rq_segments(struct request *rq);
+void blk_rq_set_mixed_merge(struct request *rq);
 
 void blk_queue_congestion_threshold(struct request_queue *q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c3015736d814..650b6a9cb679 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -115,6 +115,7 @@ enum rq_flag_bits {
 	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
 	__REQ_NOIDLE,		/* Don't anticipate more IO after this one */
 	__REQ_IO_STAT,		/* account I/O stat */
+	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -143,6 +144,7 @@ enum rq_flag_bits {
 #define REQ_INTEGRITY	(1 << __REQ_INTEGRITY)
 #define REQ_NOIDLE	(1 << __REQ_NOIDLE)
 #define REQ_IO_STAT	(1 << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE	(1 << __REQ_MIXED_MERGE)
 
 #define REQ_FAILFAST_MASK	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
 				 REQ_FAILFAST_DRIVER)
@@ -832,11 +834,13 @@ static inline void blk_run_address_space(struct address_space *mapping)
 }
 
 /*
- * blk_rq_pos()		: the current sector
- * blk_rq_bytes()	: bytes left in the entire request
- * blk_rq_cur_bytes()	: bytes left in the current segment
- * blk_rq_sectors()	: sectors left in the entire request
- * blk_rq_cur_sectors()	: sectors left in the current segment
+ * blk_rq_pos()			: the current sector
+ * blk_rq_bytes()		: bytes left in the entire request
+ * blk_rq_cur_bytes()		: bytes left in the current segment
+ * blk_rq_err_bytes()		: bytes left till the next error boundary
+ * blk_rq_sectors()		: sectors left in the entire request
+ * blk_rq_cur_sectors()		: sectors left in the current segment
+ * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -853,6 +857,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq)
 	return rq->bio ? bio_cur_bytes(rq->bio) : 0;
 }
 
+extern unsigned int blk_rq_err_bytes(const struct request *rq);
+
 static inline unsigned int blk_rq_sectors(const struct request *rq)
 {
 	return blk_rq_bytes(rq) >> 9;
@@ -863,6 +869,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
+static inline unsigned int blk_rq_err_sectors(const struct request *rq)
+{
+	return blk_rq_err_bytes(rq) >> 9;
+}
+
 /*
  * Request issue related functions.
  */
@@ -889,10 +900,12 @@ extern bool blk_end_request(struct request *rq, int error,
 			    unsigned int nr_bytes);
 extern void blk_end_request_all(struct request *rq, int error);
 extern bool blk_end_request_cur(struct request *rq, int error);
+extern bool blk_end_request_err(struct request *rq, int error);
 extern bool __blk_end_request(struct request *rq, int error,
 			      unsigned int nr_bytes);
 extern void __blk_end_request_all(struct request *rq, int error);
 extern bool __blk_end_request_cur(struct request *rq, int error);
+extern bool __blk_end_request_err(struct request *rq, int error);
 
 extern void blk_complete_request(struct request *);
 extern void __blk_complete_request(struct request *);
-- 
cgit v1.2.3


From e7e503aedb1f4d165081cb8d47a58c38f80f0cb4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 28 Jul 2009 10:15:30 +0200
Subject: block: make bio_rw_flagged() return a bool

Makes for a saner interface, instead of returning the bit position.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/bio.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index a299ed38fcd7..4f8fd0221cd2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -177,7 +177,10 @@ enum bio_rw_flags {
 	BIO_RW_NOIDLE,
 };
 
-#define bio_rw_flagged(bio, flag)	((bio)->bi_rw & (1 << (flag)))
+static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
+{
+	return (bio->bi_rw & (1 << flag)) != 0;
+}
 
 /*
  * Old defines, these should eventually be replaced by direct usage of
-- 
cgit v1.2.3


From 1f98a13f623e0ef666690a18c1250335fc6d7ef1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 11 Sep 2009 14:32:04 +0200
Subject: bio: first step in sanitizing the bio->bi_rw flag testing

Get rid of any functions that test for these bits and make callers
use bio_rw_flagged() directly. Then it is at least directly apparent
what variable and flag they check.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c            | 25 +++++++++++++------------
 block/cfq-iosched.c         |  2 +-
 block/elevator.c            |  3 ++-
 drivers/block/loop.c        |  2 +-
 drivers/md/dm-raid1.c       |  2 +-
 drivers/md/dm-stripe.c      |  2 +-
 drivers/md/dm.c             | 12 ++++++------
 drivers/md/linear.c         |  2 +-
 drivers/md/multipath.c      |  4 ++--
 drivers/md/raid0.c          |  2 +-
 drivers/md/raid1.c          | 14 ++++++++------
 drivers/md/raid10.c         |  6 +++---
 drivers/md/raid5.c          |  2 +-
 drivers/staging/dst/dcore.c |  5 +++--
 fs/btrfs/volumes.c          |  4 ++--
 include/linux/bio.h         | 25 +++++++------------------
 include/linux/blkdev.h      |  2 +-
 17 files changed, 54 insertions(+), 60 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index c822239bcc9d..52559715cb90 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1114,24 +1114,24 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 	 * Inherit FAILFAST from bio (for read-ahead, and explicit
 	 * FAILFAST).  FAILFAST flags are identical for req and bio.
 	 */
-	if (bio_rw_ahead(bio))
+	if (bio_rw_flagged(bio, BIO_RW_AHEAD))
 		req->cmd_flags |= REQ_FAILFAST_MASK;
 	else
 		req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
 
-	if (unlikely(bio_discard(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
 		req->cmd_flags |= REQ_DISCARD;
-		if (bio_barrier(bio))
+		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
 		req->q->prepare_discard_fn(req->q, req);
-	} else if (unlikely(bio_barrier(bio)))
+	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
-	if (bio_sync(bio))
+	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
 		req->cmd_flags |= REQ_RW_SYNC;
-	if (bio_rw_meta(bio))
+	if (bio_rw_flagged(bio, BIO_RW_META))
 		req->cmd_flags |= REQ_RW_META;
-	if (bio_noidle(bio))
+	if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
 		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
@@ -1155,12 +1155,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 	int el_ret;
 	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
-	const int sync = bio_sync(bio);
-	const int unplug = bio_unplug(bio);
+	const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+	const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 
-	if (bio_barrier(bio) && bio_has_data(bio) &&
+	if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
 	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
@@ -1174,7 +1174,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(bio_barrier(bio)) || elv_queue_empty(q))
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
 		goto get_rq;
 
 	el_ret = elv_merge(q, &req, bio);
@@ -1470,7 +1470,8 @@ static inline void __generic_make_request(struct bio *bio)
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 
-		if (bio_discard(bio) && !q->prepare_discard_fn) {
+		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+		    !q->prepare_discard_fn) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index ca0d7e71324b..9e6d0af6c990 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -257,7 +257,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
  */
 static inline int cfq_bio_sync(struct bio *bio)
 {
-	if (bio_data_dir(bio) == READ || bio_sync(bio))
+	if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
 		return 1;
 
 	return 0;
diff --git a/block/elevator.c b/block/elevator.c
index ca861927ba41..1975b619c86d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
 	/*
 	 * Don't merge file system requests and discard requests
 	 */
-	if (bio_discard(bio) != bio_discard(rq->bio))
+	if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
+	    bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
 		return 0;
 
 	/*
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5757188cd1fb..bbb79441d895 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -475,7 +475,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
 	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
 	if (bio_rw(bio) == WRITE) {
-		int barrier = bio_barrier(bio);
+		bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
 		struct file *file = lo->lo_backing_file;
 
 		if (barrier) {
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 33f179e66bf5..cc9dc79b0784 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1129,7 +1129,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
 	if (error == -EOPNOTSUPP)
 		goto out;
 
-	if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+	if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
 		goto out;
 
 	if (unlikely(error)) {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 3e563d251733..fde658ccbcec 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -285,7 +285,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
 	if (!error)
 		return 0; /* I/O complete */
 
-	if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
+	if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
 		return error;
 
 	if (error == -EOPNOTSUPP)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b4845b14740d..ec012f030240 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -586,7 +586,7 @@ static void dec_pending(struct dm_io *io, int error)
 			 */
 			spin_lock_irqsave(&md->deferred_lock, flags);
 			if (__noflush_suspending(md)) {
-				if (!bio_barrier(io->bio))
+				if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
 					bio_list_add_head(&md->deferred,
 							  io->bio);
 			} else
@@ -598,7 +598,7 @@ static void dec_pending(struct dm_io *io, int error)
 		io_error = io->error;
 		bio = io->bio;
 
-		if (bio_barrier(bio)) {
+		if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
 			/*
 			 * There can be just one barrier request so we use
 			 * a per-device variable for error reporting.
@@ -1209,7 +1209,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
 
 	ci.map = dm_get_table(md);
 	if (unlikely(!ci.map)) {
-		if (!bio_barrier(bio))
+		if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
 			bio_io_error(bio);
 		else
 			if (!md->barrier_error)
@@ -1321,7 +1321,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
 	 * we have to queue this io for later.
 	 */
 	if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
-	    unlikely(bio_barrier(bio))) {
+	    unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		up_read(&md->io_lock);
 
 		if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1344,7 +1344,7 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
 {
 	struct mapped_device *md = q->queuedata;
 
-	if (unlikely(bio_barrier(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
@@ -2164,7 +2164,7 @@ static void dm_wq_work(struct work_struct *work)
 		if (dm_request_based(md))
 			generic_make_request(c);
 		else {
-			if (bio_barrier(c))
+			if (bio_rw_flagged(c, BIO_RW_BARRIER))
 				process_barrier(md, c);
 			else
 				__split_and_process_bio(md, c);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5fe39c2a3d2b..ea4842905444 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,7 +288,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
 	sector_t start_sector;
 	int cpu;
 
-	if (unlikely(bio_barrier(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 7140909f6662..89e76819f61f 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -90,7 +90,7 @@ static void multipath_end_request(struct bio *bio, int error)
 
 	if (uptodate)
 		multipath_end_bh_io(mp_bh, 0);
-	else if (!bio_rw_ahead(bio)) {
+	else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
 		/*
 		 * oops, IO error:
 		 */
@@ -144,7 +144,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
 	const int rw = bio_data_dir(bio);
 	int cpu;
 
-	if (unlikely(bio_barrier(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 898e2bdfee47..f845ed98fec9 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -448,7 +448,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
 	const int rw = bio_data_dir(bio);
 	int cpu;
 
-	if (unlikely(bio_barrier(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8726fd7ebce5..ff7ed3335995 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -782,8 +782,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
 	struct bio_list bl;
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
-	const int do_sync = bio_sync(bio);
-	int cpu, do_barriers;
+	const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+	int cpu;
+	bool do_barriers;
 	mdk_rdev_t *blocked_rdev;
 
 	/*
@@ -797,7 +798,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
 
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
+	if (unlikely(!mddev->barriers_work &&
+		     bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		if (rw == WRITE)
 			md_write_end(mddev);
 		bio_endio(bio, -EOPNOTSUPP);
@@ -925,7 +927,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
 	atomic_set(&r1_bio->remaining, 0);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
-	do_barriers = bio_barrier(bio);
+	do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
 	if (do_barriers)
 		set_bit(R1BIO_Barrier, &r1_bio->state);
 
@@ -1600,7 +1602,7 @@ static void raid1d(mddev_t *mddev)
 			 * We already have a nr_pending reference on these rdevs.
 			 */
 			int i;
-			const int do_sync = bio_sync(r1_bio->master_bio);
+			const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 			clear_bit(R1BIO_Barrier, &r1_bio->state);
 			for (i=0; i < conf->raid_disks; i++)
@@ -1654,7 +1656,7 @@ static void raid1d(mddev_t *mddev)
 				       (unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio);
 			} else {
-				const int do_sync = bio_sync(r1_bio->master_bio);
+				const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
 				r1_bio->bios[r1_bio->read_disk] =
 					mddev->ro ? IO_BLOCKED : NULL;
 				r1_bio->read_disk = disk;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3d9020cf6f6e..d0a2152e064f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -796,12 +796,12 @@ static int make_request(struct request_queue *q, struct bio * bio)
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
-	const int do_sync = bio_sync(bio);
+	const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
 	struct bio_list bl;
 	unsigned long flags;
 	mdk_rdev_t *blocked_rdev;
 
-	if (unlikely(bio_barrier(bio))) {
+	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	}
@@ -1610,7 +1610,7 @@ static void raid10d(mddev_t *mddev)
 				raid_end_bio_io(r10_bio);
 				bio_put(bio);
 			} else {
-				const int do_sync = bio_sync(r10_bio->master_bio);
+				const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
 				bio_put(bio);
 				rdev = conf->mirrors[mirror].rdev;
 				if (printk_ratelimit())
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b8a2c5dc67ba..826eb3467357 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3606,7 +3606,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
 	const int rw = bio_data_dir(bi);
 	int cpu, remaining;
 
-	if (unlikely(bio_barrier(bi))) {
+	if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
 		bio_endio(bi, -EOPNOTSUPP);
 		return 0;
 	}
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index fad25b753042..b1c258ca2102 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -112,8 +112,9 @@ static int dst_request(struct request_queue *q, struct bio *bio)
 		 * I worked with.
 		 *
 		 * Empty barriers are not allowed anyway, see 51fd77bd9f512
-		 * for example, although later it was changed to bio_discard()
-		 * only, which does not work in this case.
+		 * for example, although later it was changed to
+		 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
+		 * work in this case.
 		 */
 		//err = -EOPNOTSUPP;
 		err = 0;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4af..5cf405b0828d 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
 		num_run++;
 		batch_run++;
 
-		if (bio_sync(cur))
+		if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
 			num_sync_run++;
 
 		if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
 	bio->bi_rw |= rw;
 
 	spin_lock(&device->io_lock);
-	if (bio_sync(bio))
+	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
 		pending_bios = &device->pending_sync_bios;
 	else
 		pending_bios = &device->pending_bios;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4f8fd0221cd2..5be93f18d842 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -177,28 +177,17 @@ enum bio_rw_flags {
 	BIO_RW_NOIDLE,
 };
 
+/*
+ * First four bits must match between bio->bi_rw and rq->cmd_flags, make
+ * that explicit here.
+ */
+#define BIO_RW_RQ_MASK		0xf
+
 static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
 {
 	return (bio->bi_rw & (1 << flag)) != 0;
 }
 
-/*
- * Old defines, these should eventually be replaced by direct usage of
- * bio_rw_flagged()
- */
-#define bio_barrier(bio)	bio_rw_flagged(bio, BIO_RW_BARRIER)
-#define bio_sync(bio)		bio_rw_flagged(bio, BIO_RW_SYNCIO)
-#define bio_unplug(bio)		bio_rw_flagged(bio, BIO_RW_UNPLUG)
-#define bio_failfast_dev(bio)	bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV)
-#define bio_failfast_transport(bio)	\
-		bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT)
-#define bio_failfast_driver(bio) 	\
-		bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER)
-#define bio_rw_ahead(bio)	bio_rw_flagged(bio, BIO_RW_AHEAD)
-#define bio_rw_meta(bio)	bio_rw_flagged(bio, BIO_RW_META)
-#define bio_discard(bio)	bio_rw_flagged(bio, BIO_RW_DISCARD)
-#define bio_noidle(bio)		bio_rw_flagged(bio, BIO_RW_NOIDLE)
-
 /*
  * upper 16 bits of bi_rw define the io priority of this bio
  */
@@ -222,7 +211,7 @@ static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
 #define bio_offset(bio)		bio_iovec((bio))->bv_offset
 #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio)	(bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio))
+#define bio_empty_barrier(bio)	(bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 650b6a9cb679..88edb62def82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -86,7 +86,7 @@ enum {
 };
 
 /*
- * request type modified bits. first two bits match BIO_RW* bits, important
+ * request type modified bits. first four bits match BIO_RW* bits, important
  */
 enum rq_flag_bits {
 	__REQ_RW,		/* not set, read. set, write */
-- 
cgit v1.2.3


From fb1e75389bd06fd5987e9cda1b4e0305c782f854 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 30 Jul 2009 08:18:24 +0200
Subject: block: improve queue_should_plug() by looking at IO depths

Instead of just checking whether this device uses block layer
tagging, we can improve the detection by looking at the maximum
queue depth it has reached. If that crosses 4, then deem it a
queuing device.

This is important on high IOPS devices, since plugging hurts
the performance there (it can be as much as 10-15% of the sys
time).

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       | 11 +++++++++--
 include/linux/blkdev.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 52559715cb90..93051d151635 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1146,7 +1146,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
  */
 static inline bool queue_should_plug(struct request_queue *q)
 {
-	return !(blk_queue_nonrot(q) && blk_queue_tagged(q));
+	return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
 }
 
 static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1857,8 +1857,15 @@ void blk_dequeue_request(struct request *rq)
 	 * and to it is freed is accounted as io that is in progress at
 	 * the driver side.
 	 */
-	if (blk_account_rq(rq))
+	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]++;
+		/*
+		 * Mark this device as supporting hardware queuing, if
+		 * we have more IOs in flight than 4.
+		 */
+		if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
+			set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
+	}
 }
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 88edb62def82..98b45633a27e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -459,6 +459,7 @@ struct request_queue
 #define QUEUE_FLAG_NONROT      14	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
+#define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -581,6 +582,7 @@ enum {
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
+#define blk_queue_queuing(q)	test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
-- 
cgit v1.2.3


From 5e605b64a183a6c0e84cdb99a6f8acb1f8200437 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 5 Aug 2009 09:07:21 +0200
Subject: block: add blk-iopoll, a NAPI like approach for block devices

This borrows some code from NAPI and implements a polled completion
mode for block devices. The idea is the same as NAPI - instead of
doing the command completion when the irq occurs, schedule a dedicated
softirq in the hopes that we will complete more IO when the iopoll
handler is invoked. Devices have a budget of commands assigned, and will
stay in polled mode as long as they continue to consume their budget
from the iopoll softirq handler. If they do not, the device is set back
to interrupt completion mode.

This patch holds the core bits for blk-iopoll, device driver support
sold separately.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/Makefile             |   2 +-
 block/blk-iopoll.c         | 220 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blk-iopoll.h |  41 +++++++++
 include/linux/interrupt.h  |   1 +
 kernel/sysctl.c            |  10 ++-
 5 files changed, 272 insertions(+), 2 deletions(-)
 create mode 100644 block/blk-iopoll.c
 create mode 100644 include/linux/blk-iopoll.h

(limited to 'include')

diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff755..ba74ca6bfa14 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
 			blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
 			blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
-			ioctl.o genhd.o scsi_ioctl.o
+			blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
 
 obj-$(CONFIG_BLK_DEV_BSG)	+= bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)	+= noop-iosched.o
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 000000000000..566db1e7c1c7
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,220 @@
+/*
+ * Functions related to interrupt-poll handling in the block layer. This
+ * is similar to NAPI for network devices.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/blk-iopoll.h>
+#include <linux/delay.h>
+
+#include "blk.h"
+
+int blk_iopoll_enabled = 1;
+EXPORT_SYMBOL(blk_iopoll_enabled);
+
+static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
+
+/**
+ * blk_iopoll_sched - Schedule a run of the iopoll handler
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Add this blk_iopoll structure to the pending poll list and trigger the raise
+ *     of the blk iopoll softirq. The driver must already have gotten a succesful
+ *     return from blk_iopoll_sched_prep() before calling this.
+ **/
+void blk_iopoll_sched(struct blk_iopoll *iop)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
+	__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_sched);
+
+/**
+ * __blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     See blk_iopoll_complete(). This function must be called with interrupts disabled.
+ **/
+void __blk_iopoll_complete(struct blk_iopoll *iop)
+{
+	list_del(&iop->list);
+	smp_mb__before_clear_bit();
+	clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(__blk_iopoll_complete);
+
+/**
+ * blk_iopoll_complete - Mark this @iop as un-polled again
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     If a driver consumes less than the assigned budget in its run of the iopoll
+ *     handler, it'll end the polled mode by calling this function. The iopoll handler
+ *     will not be invoked again before blk_iopoll_sched_prep() is called.
+ **/
+void blk_iopoll_complete(struct blk_iopoll *iopoll)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__blk_iopoll_complete(iopoll);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(blk_iopoll_complete);
+
+static void blk_iopoll_softirq(struct softirq_action *h)
+{
+	struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
+	unsigned long start_time = jiffies;
+	int rearm = 0, budget = 64;
+
+	local_irq_disable();
+
+	while (!list_empty(list)) {
+		struct blk_iopoll *iop;
+		int work, weight;
+
+		/*
+		 * If softirq window is exhausted then punt.
+		 */
+		if (budget <= 0 || time_after(jiffies, start_time)) {
+			rearm = 1;
+			break;
+		}
+
+		local_irq_enable();
+
+		/* Even though interrupts have been re-enabled, this
+		 * access is safe because interrupts can only add new
+		 * entries to the tail of this list, and only ->poll()
+		 * calls can remove this head entry from the list.
+		 */
+		iop = list_entry(list->next, struct blk_iopoll, list);
+
+		weight = iop->weight;
+		work = 0;
+		if (test_bit(IOPOLL_F_SCHED, &iop->state))
+			work = iop->poll(iop, weight);
+
+		budget -= work;
+
+		local_irq_disable();
+
+		/* Drivers must not modify the NAPI state if they
+		 * consume the entire weight.  In such cases this code
+		 * still "owns" the NAPI instance and therefore can
+		 * move the instance around on the list at-will.
+		 */
+		if (work >= weight) {
+			if (blk_iopoll_disable_pending(iop))
+				__blk_iopoll_complete(iop);
+			else
+				list_move_tail(&iop->list, list);
+		}
+	}
+
+	if (rearm)
+		__raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+
+	local_irq_enable();
+}
+
+/**
+ * blk_iopoll_disable - Disable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Disable io polling and wait for any pending callbacks to have completed.
+ **/
+void blk_iopoll_disable(struct blk_iopoll *iop)
+{
+	set_bit(IOPOLL_F_DISABLE, &iop->state);
+	while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
+		msleep(1);
+	clear_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_disable);
+
+/**
+ * blk_iopoll_enable - Enable iopoll on this @iop
+ * @iop:      The parent iopoll structure
+ *
+ * Description:
+ *     Enable iopoll on this @iop. Note that the handler run will not be scheduled, it
+ *     will only mark it as active.
+ **/
+void blk_iopoll_enable(struct blk_iopoll *iop)
+{
+	BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
+        smp_mb__before_clear_bit();
+	clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_enable);
+
+/**
+ * blk_iopoll_init - Initialize this @iop
+ * @iop:      The parent iopoll structure
+ * @weight:   The default weight (or command completion budget)
+ * @poll_fn:  The handler to invoke
+ *
+ * Description:
+ *     Initialize this blk_iopoll structure. Before being actively used, the driver
+ *     must call blk_iopoll_enable().
+ **/
+void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
+{
+	memset(iop, 0, sizeof(*iop));
+	INIT_LIST_HEAD(&iop->list);
+	iop->weight = weight;
+	iop->poll = poll_fn;
+	set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+EXPORT_SYMBOL(blk_iopoll_init);
+
+static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
+					  unsigned long action, void *hcpu)
+{
+	/*
+	 * If a CPU goes away, splice its entries to the current CPU
+	 * and trigger a run of the softirq
+	 */
+	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+		int cpu = (unsigned long) hcpu;
+
+		local_irq_disable();
+		list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
+				 &__get_cpu_var(blk_cpu_iopoll));
+		raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
+		local_irq_enable();
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
+	.notifier_call	= blk_iopoll_cpu_notify,
+};
+
+static __init int blk_iopoll_setup(void)
+{
+	int i;
+
+	for_each_possible_cpu(i)
+		INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
+
+	open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
+	register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
+	return 0;
+}
+subsys_initcall(blk_iopoll_setup);
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644
index 000000000000..b2e1739a2e7b
--- /dev/null
+++ b/include/linux/blk-iopoll.h
@@ -0,0 +1,41 @@
+#ifndef BLK_IOPOLL_H
+#define BLK_IOPOLL_H
+
+struct blk_iopoll;
+typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
+
+struct blk_iopoll {
+	struct list_head list;
+	unsigned long state;
+	unsigned long data;
+	int weight;
+	int max;
+	blk_iopoll_fn *poll;
+};
+
+enum {
+	IOPOLL_F_SCHED		= 0,
+	IOPOLL_F_DISABLE	= 1,
+};
+
+static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
+{
+	return !test_bit(IOPOLL_F_DISABLE, &iop->state) &&
+		!test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+}
+
+static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
+{
+	return test_bit(IOPOLL_F_DISABLE, &iop->state);
+}
+
+extern void blk_iopoll_sched(struct blk_iopoll *);
+extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
+extern void blk_iopoll_complete(struct blk_iopoll *);
+extern void __blk_iopoll_complete(struct blk_iopoll *);
+extern void blk_iopoll_enable(struct blk_iopoll *);
+extern void blk_iopoll_disable(struct blk_iopoll *);
+
+extern int blk_iopoll_enabled;
+
+#endif
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 35e7df1e9f30..edd8d5c90394 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -344,6 +344,7 @@ enum
 	NET_TX_SOFTIRQ,
 	NET_RX_SOFTIRQ,
 	BLOCK_SOFTIRQ,
+	BLOCK_IOPOLL_SOFTIRQ,
 	TASKLET_SOFTIRQ,
 	SCHED_SOFTIRQ,
 	HRTIMER_SOFTIRQ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 58be76017fd0..0ed9fa6f322e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -92,6 +92,7 @@ extern int sysctl_nr_trim_pages;
 #ifdef CONFIG_RCU_TORTURE_TEST
 extern int rcutorture_runnable;
 #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+extern int blk_iopoll_enabled;
 
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -990,7 +991,14 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
-
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "blk_iopoll",
+		.data		= &blk_iopoll_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
-- 
cgit v1.2.3


From d62f843b295393124970d29316344150c7de009b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 11 Aug 2009 08:31:14 +0200
Subject: block: make blk_iopoll_prep_sched() follow normal 0/1 return
 convention

Return 0 if we successfully marked this iopoll structure as ours for
scheduling, instead of 1.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blk-iopoll.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
index b2e1739a2e7b..308734d3d4a2 100644
--- a/include/linux/blk-iopoll.h
+++ b/include/linux/blk-iopoll.h
@@ -18,10 +18,17 @@ enum {
 	IOPOLL_F_DISABLE	= 1,
 };
 
+/*
+ * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
+ * that we were the first to acquire this iop for scheduling. If this iop
+ * is currently disabled, return "failure".
+ */
 static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
 {
-	return !test_bit(IOPOLL_F_DISABLE, &iop->state) &&
-		!test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+	if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
+		return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
+
+	return 1;
 }
 
 static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
-- 
cgit v1.2.3


From 01e97f6b897bf06ec83375d691f2f4d57f5b3a09 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 3 Sep 2009 20:06:47 +0200
Subject: block: enable rq CPU completion affinity by default

Test results here look good, and on big OLTP runs it has also shown
to significantly increase cycles attributed to the database and
cause a performance boost.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 98b45633a27e..8bf1a10e4d88 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -463,7 +463,8 @@ struct request_queue
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
-				 (1 << QUEUE_FLAG_STACKABLE))
+				 (1 << QUEUE_FLAG_STACKABLE)	|	\
+				 (1 << QUEUE_FLAG_SAME_COMP))
 
 static inline int queue_is_locked(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 48659d31195bb76d688e99dabd816c5472fb1656 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Sep 2009 11:36:23 -0400
Subject: tracing: move tgid out of generic entry and into userstack

The userstack trace required the recording of the tgid entry.
Unfortunately, it was added to the generic entry where it wasted
4 bytes of every entry and was only used by one entry.

This patch moves it out of the generic field and moves it into the
only user (userstack_entry).

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 1 -
 kernel/trace/trace.c         | 2 +-
 kernel/trace/trace.h         | 1 +
 kernel/trace/trace_events.c  | 5 +----
 kernel/trace/trace_output.c  | 2 +-
 5 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 23f7179bf74e..06c795b536c2 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -34,7 +34,6 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
-	int			tgid;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c75deeefe30..1a37da2e8534 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -886,7 +886,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
-	entry->tgid			= (tsk) ? tsk->tgid : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1068,6 +1067,7 @@ ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
 		return;
 	entry	= ring_buffer_event_data(event);
 
+	entry->tgid		= current->tgid;
 	memset(&entry->caller, 0, sizeof(entry->caller));
 
 	trace.nr_entries	= 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index acaa68060ebc..b69697b4b846 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -101,6 +101,7 @@ struct stack_entry {
 
 struct userstack_entry {
 	struct trace_entry	ent;
+	unsigned int		tgid;
 	unsigned long		caller[FTRACE_STACK_ENTRIES];
 };
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 78b1ed230177..28d92027a93c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -86,7 +86,6 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
-	__common_field(int, tgid);
 
 	return ret;
 }
@@ -572,13 +571,11 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
-				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
 				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid),
-				FIELD(int, tgid));
+				FIELD(int, pid));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index e0c2545622e8..be34a6aa7e4d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -407,7 +407,7 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 		 * since individual threads might have already quit!
 		 */
 		rcu_read_lock();
-		task = find_task_by_vpid(entry->ent.tgid);
+		task = find_task_by_vpid(entry->tgid);
 		if (task)
 			mm = get_task_mm(task);
 		rcu_read_unlock();
-- 
cgit v1.2.3


From 074835f0143b83845af5044af2739c52c9f53808 Mon Sep 17 00:00:00 2001
From: Youquan Song <youquan.song@intel.com>
Date: Wed, 9 Sep 2009 12:05:39 -0400
Subject: intel-iommu: Fix kernel hang if interrupt remapping disabled in BIOS

BIOS clear DMAR table INTR_REMAP flag to disable interrupt remapping. Current
kernel only check interrupt remapping(IR) flag in DRHD's extended capability
register to decide interrupt remapping support or not. But IR flag will not
change when BIOS disable/enable interrupt remapping.

When user disable interrupt remapping in BIOS or BIOS often defaultly disable
interrupt remapping feature when BIOS is not mature.Though BIOS disable
interrupt remapping but intr_remapping_supported function will always report
to OS support interrupt remapping if VT-d2 chipset populated. On this
cases, kernel will continue enable interrupt remapping and result kernel panic.
This bug exist on almost all platforms with interrupt remapping support.

This patch add DMAR table INTR_REMAP flag check before enable interrupt
remapping.

Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/pci/dmar.c           | 10 ++++++++++
 drivers/pci/intr_remapping.c |  3 +++
 include/linux/intel-iommu.h  |  2 ++
 3 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index fba4f6891680..270ed222a075 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -1316,3 +1316,13 @@ int dmar_reenable_qi(struct intel_iommu *iommu)
 
 	return 0;
 }
+
+/*
+ * Check interrupt remapping support in DMAR table description.
+ */
+int dmar_ir_support(void)
+{
+	struct acpi_table_dmar *dmar;
+	dmar = (struct acpi_table_dmar *)dmar_tbl;
+	return dmar->flags & 0x1;
+}
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index ebfa47b79c5b..ac065144c01c 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -611,6 +611,9 @@ int __init intr_remapping_supported(void)
 	if (disable_intremap)
 		return 0;
 
+	if (!dmar_ir_support())
+		return 0;
+
 	for_each_drhd_unit(drhd) {
 		struct intel_iommu *iommu = drhd->iommu;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 482dc91fd53a..4f0a72a9740c 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -360,4 +360,6 @@ extern void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
 
 extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
 
+extern int dmar_ir_support(void);
+
 #endif
-- 
cgit v1.2.3


From 52a7a1cec88acdaf3f8b36a6b1fe904f6eca7ee5 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 10 Sep 2009 15:26:30 +0200
Subject: [ARM] pxafb: add accelerator ID for PXA3xx GCU

Add ID 99 for PXA3xx frame buffers and report it in the pxa frame buffer
conditionally, depending on a new flag in struct pxafb_mach_info.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: linux-fbdev-devel@lists.sourceforge.net
Cc: Dennis Oliver Kropp <dok@directfb.org>
Cc: Sven Neumann <s.neumann@raumfeld.com>
Cc: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Eric Miao <eric.y.miao@gmail.com>
---
 arch/arm/mach-pxa/include/mach/pxafb.h | 3 ++-
 drivers/video/pxafb.c                  | 3 +++
 include/linux/fb.h                     | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/arm/mach-pxa/include/mach/pxafb.h b/arch/arm/mach-pxa/include/mach/pxafb.h
index 6932720ba04e..f73061c90b5e 100644
--- a/arch/arm/mach-pxa/include/mach/pxafb.h
+++ b/arch/arm/mach-pxa/include/mach/pxafb.h
@@ -118,7 +118,8 @@ struct pxafb_mach_info {
 	u_int		fixed_modes:1,
 			cmap_inverse:1,
 			cmap_static:1,
-			unused:29;
+			acceleration_enabled:1,
+			unused:28;
 
 	/* The following should be defined in LCCR0
 	 *      LCCR0_Act or LCCR0_Pas          Active or Passive
diff --git a/drivers/video/pxafb.c b/drivers/video/pxafb.c
index 3a002a634ecf..1820c4a24434 100644
--- a/drivers/video/pxafb.c
+++ b/drivers/video/pxafb.c
@@ -2083,6 +2083,9 @@ static int __devinit pxafb_probe(struct platform_device *dev)
 		goto failed;
 	}
 
+	if (cpu_is_pxa3xx() && inf->acceleration_enabled)
+		fbi->fb.fix.accel = FB_ACCEL_PXA3XX;
+
 	fbi->backlight_power = inf->pxafb_backlight_power;
 	fbi->lcd_power = inf->pxafb_lcd_power;
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f847df9e99b6..a34bdf5a9d23 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -133,6 +133,7 @@ struct dentry;
 #define FB_ACCEL_NEOMAGIC_NM2230 96	/* NeoMagic NM2230              */
 #define FB_ACCEL_NEOMAGIC_NM2360 97	/* NeoMagic NM2360              */
 #define FB_ACCEL_NEOMAGIC_NM2380 98	/* NeoMagic NM2380              */
+#define FB_ACCEL_PXA3XX		 99	/* PXA3xx			*/
 
 #define FB_ACCEL_SAVAGE4        0x80	/* S3 Savage4                   */
 #define FB_ACCEL_SAVAGE3D       0x81	/* S3 Savage3D                  */
-- 
cgit v1.2.3


From 637e7e864103a7a68c1ce43ada27dfc25c0d113f Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Fri, 11 Sep 2009 13:55:35 -0400
Subject: tracing: add lock depth to entries

This patch adds the lock depth of the big kernel lock to the generic
entry header. This way we can see the depth of the lock and help
in removing the BKL.

Example:

 #                  _------=> CPU#
 #                 / _-----=> irqs-off
 #                | / _----=> need-resched
 #                || / _---=> hardirq/softirq
 #                ||| / _--=> preempt-depth
 #                |||| /_--=> lock-depth
 #                |||||/     delay
 #  cmd     pid   |||||| time  |   caller
 #     \   /      ||||||   \   |   /
   <idle>-0       2.N..3 5902255250us+: lock_acquire: read rcu_read_lock
   <idle>-0       2.N..3 5902255253us+: lock_release: rcu_read_lock
   <idle>-0       2dN..3 5902255257us+: lock_acquire: xtime_lock
   <idle>-0       2dN..4 5902255259us : lock_acquire: clocksource_lock
   <idle>-0       2dN..4 5902255261us+: lock_release: clocksource_lock

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h         |  1 +
 kernel/trace/trace.c                 |  9 +++++----
 kernel/trace/trace_events.c          |  5 ++++-
 kernel/trace/trace_functions_graph.c | 16 ++++++++++++----
 kernel/trace/trace_output.c          | 10 +++++++++-
 5 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 06c795b536c2..0608b0ff2635 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -34,6 +34,7 @@ struct trace_entry {
 	unsigned char		flags;
 	unsigned char		preempt_count;
 	int			pid;
+	int			lock_depth;
 };
 
 #define FTRACE_MAX_EVENT						\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1a37da2e8534..3b918283cf94 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -886,6 +886,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 
 	entry->preempt_count		= pc & 0xff;
 	entry->pid			= (tsk) ? tsk->pid : 0;
+	entry->lock_depth		= (tsk) ? tsk->lock_depth : 0;
 	entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1530,10 +1531,10 @@ static void print_lat_help_header(struct seq_file *m)
 	seq_puts(m, "#                | / _----=> need-resched    \n");
 	seq_puts(m, "#                || / _---=> hardirq/softirq \n");
 	seq_puts(m, "#                ||| / _--=> preempt-depth   \n");
-	seq_puts(m, "#                |||| /                      \n");
-	seq_puts(m, "#                |||||     delay             \n");
-	seq_puts(m, "#  cmd     pid   ||||| time  |   caller      \n");
-	seq_puts(m, "#     \\   /      |||||   \\   |   /           \n");
+	seq_puts(m, "#                |||| /_--=> lock-depth       \n");
+	seq_puts(m, "#                |||||/     delay             \n");
+	seq_puts(m, "#  cmd     pid   |||||| time  |   caller      \n");
+	seq_puts(m, "#     \\   /      ||||||   \\   |   /           \n");
 }
 
 static void print_func_help_header(struct seq_file *m)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 28d92027a93c..975f324a07e7 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -86,6 +86,7 @@ int trace_define_common_fields(struct ftrace_event_call *call)
 	__common_field(unsigned char, flags);
 	__common_field(unsigned char, preempt_count);
 	__common_field(int, pid);
+	__common_field(int, lock_depth);
 
 	return ret;
 }
@@ -571,11 +572,13 @@ static int trace_write_header(struct trace_seq *s)
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
+				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
 				"\n",
 				FIELD(unsigned short, type),
 				FIELD(unsigned char, flags),
 				FIELD(unsigned char, preempt_count),
-				FIELD(int, pid));
+				FIELD(int, pid),
+				FIELD(int, lock_depth));
 }
 
 static ssize_t
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index ee791a9650c5..48af49374384 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -368,6 +368,7 @@ static enum print_line_t
 print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 {
 	int hardirq, softirq;
+	int ret;
 
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
 	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
@@ -382,6 +383,13 @@ print_graph_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 				hardirq ? 'h' : softirq ? 's' : '.'))
 		return 0;
 
+	if (entry->lock_depth < 0)
+		ret = trace_seq_putc(s, '.');
+	else
+		ret = trace_seq_printf(s, "%d", entry->lock_depth);
+	if (!ret)
+		return 0;
+
 	if (entry->preempt_count)
 		return trace_seq_printf(s, "%x", entry->preempt_count);
 	return trace_seq_puts(s, ".");
@@ -1001,8 +1009,8 @@ static void print_lat_header(struct seq_file *s)
 	seq_printf(s, "#%.*s / _----=> need-resched    \n", size, spaces);
 	seq_printf(s, "#%.*s| / _---=> hardirq/softirq \n", size, spaces);
 	seq_printf(s, "#%.*s|| / _--=> preempt-depth   \n", size, spaces);
-	seq_printf(s, "#%.*s||| /                      \n", size, spaces);
-	seq_printf(s, "#%.*s||||                       \n", size, spaces);
+	seq_printf(s, "#%.*s||| / _-=> lock-depth      \n", size, spaces);
+	seq_printf(s, "#%.*s|||| /                     \n", size, spaces);
 }
 
 static void print_graph_headers(struct seq_file *s)
@@ -1021,7 +1029,7 @@ static void print_graph_headers(struct seq_file *s)
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "  TASK/PID       ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "  DURATION   ");
 	seq_printf(s, "               FUNCTION CALLS\n");
@@ -1035,7 +1043,7 @@ static void print_graph_headers(struct seq_file *s)
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
 		seq_printf(s, "   |    |        ");
 	if (lat)
-		seq_printf(s, "||||");
+		seq_printf(s, "|||||");
 	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
 		seq_printf(s, "   |   |      ");
 	seq_printf(s, "               |   |   |   |\n");
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index be34a6aa7e4d..29a370a45582 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -465,6 +465,7 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 {
 	int hardirq, softirq;
 	char comm[TASK_COMM_LEN];
+	int ret;
 
 	trace_find_cmdline(entry->pid, comm);
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
@@ -481,9 +482,16 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 				hardirq ? 'h' : softirq ? 's' : '.'))
 		return 0;
 
+	if (entry->lock_depth < 0)
+		ret = trace_seq_putc(s, '.');
+	else
+		ret = trace_seq_printf(s, "%d", entry->lock_depth);
+	if (!ret)
+		return 0;
+
 	if (entry->preempt_count)
 		return trace_seq_printf(s, "%x", entry->preempt_count);
-	return trace_seq_puts(s, ".");
+	return trace_seq_putc(s, '.');
 }
 
 static unsigned long preempt_mark_thresh = 100;
-- 
cgit v1.2.3


From 4cfc7e6019caa3e97d2a81c48c8d575d7b38d751 Mon Sep 17 00:00:00 2001
From: Rahul Iyer <iyer@netapp.com>
Date: Thu, 10 Sep 2009 17:32:28 +0300
Subject: nfsd41: sunrpc: Added rpc server-side backchannel handling

When the call direction is a reply, copy the xid and call direction into the
req->rq_private_buf.head[0].iov_base otherwise rpc_verify_header returns
rpc_garbage.

Signed-off-by: Rahul Iyer <iyer@netapp.com>
Signed-off-by: Mike Sager <sager@netapp.com>
Signed-off-by: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[get rid of CONFIG_NFSD_V4_1]
[sunrpc: refactoring of svc_tcp_recvfrom]
[nfsd41: sunrpc: create common send routine for the fore and the back channels]
[nfsd41: sunrpc: Use free_page() to free server backchannel pages]
[nfsd41: sunrpc: Document server backchannel locking]
[nfsd41: sunrpc: remove bc_connect_worker()]
[nfsd41: sunrpc: Define xprt_server_backchannel()[
[nfsd41: sunrpc: remove bc_close and bc_init_auto_disconnect dummy functions]
[nfsd41: sunrpc: eliminate unneeded switch statement in xs_setup_tcp()]
[nfsd41: sunrpc: Don't auto close the server backchannel connection]
[nfsd41: sunrpc: Remove unused functions]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
[nfsd41: sunrpc: move struct rpc_buffer def into a common header file]
[nfsd41: sunrpc: use rpc_sleep in bc_send_request so not to block on mutex]
[removed cosmetic changes]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[sunrpc: add new xprt class for nfsv4.1 backchannel]
[sunrpc: v2.1 change handling of auto_close and init_auto_disconnect operations for the nfsv4.1 backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
[reverted more cosmetic leftovers]
[got rid of xprt_server_backchannel]
[separated "nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Cc: Trond Myklebust <trond.myklebust@netapp.com>
[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <trond.myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/svc_xprt.h |   1 +
 include/linux/sunrpc/svcsock.h  |   1 +
 include/linux/sunrpc/xprt.h     |   1 +
 net/sunrpc/sunrpc.h             |   4 +
 net/sunrpc/svc_xprt.c           |   2 +
 net/sunrpc/svcsock.c            | 172 ++++++++++++++++++++++++++++++++--------
 net/sunrpc/xprt.c               |  15 +++-
 net/sunrpc/xprtsock.c           | 146 ++++++++++++++++++++++++++++++++++
 8 files changed, 303 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 2223ae0b5ed5..5f4e18b3ce73 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -65,6 +65,7 @@ struct svc_xprt {
 	size_t			xpt_locallen;	/* length of address */
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
+	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 };
 
 int	svc_reg_xprt_class(struct svc_xprt_class *);
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 04dba23c59f2..1b353a76c304 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -28,6 +28,7 @@ struct svc_sock {
 	/* private TCP part */
 	u32			sk_reclen;	/* length of record */
 	u32			sk_tcplen;	/* current read length */
+	struct rpc_xprt		*sk_bc_xprt;	/* NFSv4.1 backchannel xprt */
 };
 
 /*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c090df442572..228d694dbb90 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -179,6 +179,7 @@ struct rpc_xprt {
 	spinlock_t		reserve_lock;	/* lock slot table */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 #if defined(CONFIG_NFS_V4_1)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
 						/* process the callback */
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 13171e63f51b..90c292e2738b 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,5 +43,9 @@ static inline int rpc_reply_expected(struct rpc_task *task)
 		(task->tk_msg.rpc_proc->p_decode != NULL);
 }
 
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset);
+
 #endif /* _NET_SUNRPC_SUNRPC_H */
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 912dea558ccc..df124f78ee48 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -160,6 +160,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
+	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
@@ -810,6 +811,7 @@ int svc_send(struct svc_rqst *rqstp)
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
+	rpc_wake_up(&xprt->xpt_bc_pending);
 	svc_xprt_release(rqstp);
 
 	if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 76a380d37de4..ccc5e83cae5d 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -49,6 +49,7 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/xprt.h>
 
 #define RPCDBG_FACILITY	RPCDBG_SVCXPRT
 
@@ -153,49 +154,27 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 }
 
 /*
- * Generic sendto routine
+ * send routine intended to be shared by the fore- and back-channel
  */
-static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
+		    struct page *headpage, unsigned long headoffset,
+		    struct page *tailpage, unsigned long tailoffset)
 {
-	struct svc_sock	*svsk =
-		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
-	struct socket	*sock = svsk->sk_sock;
-	int		slen;
-	union {
-		struct cmsghdr	hdr;
-		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
-	} buffer;
-	struct cmsghdr *cmh = &buffer.hdr;
-	int		len = 0;
 	int		result;
 	int		size;
 	struct page	**ppage = xdr->pages;
 	size_t		base = xdr->page_base;
 	unsigned int	pglen = xdr->page_len;
 	unsigned int	flags = MSG_MORE;
-	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+	int		slen;
+	int		len = 0;
 
 	slen = xdr->len;
 
-	if (rqstp->rq_prot == IPPROTO_UDP) {
-		struct msghdr msg = {
-			.msg_name	= &rqstp->rq_addr,
-			.msg_namelen	= rqstp->rq_addrlen,
-			.msg_control	= cmh,
-			.msg_controllen	= sizeof(buffer),
-			.msg_flags	= MSG_MORE,
-		};
-
-		svc_set_cmsg_data(rqstp, cmh);
-
-		if (sock_sendmsg(sock, &msg, 0) < 0)
-			goto out;
-	}
-
 	/* send head */
 	if (slen == xdr->head[0].iov_len)
 		flags = 0;
-	len = kernel_sendpage(sock, rqstp->rq_respages[0], 0,
+	len = kernel_sendpage(sock, headpage, headoffset,
 				  xdr->head[0].iov_len, flags);
 	if (len != xdr->head[0].iov_len)
 		goto out;
@@ -219,16 +198,58 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
 		base = 0;
 		ppage++;
 	}
+
 	/* send tail */
 	if (xdr->tail[0].iov_len) {
-		result = kernel_sendpage(sock, rqstp->rq_respages[0],
-					     ((unsigned long)xdr->tail[0].iov_base)
-						& (PAGE_SIZE-1),
-					     xdr->tail[0].iov_len, 0);
-
+		result = kernel_sendpage(sock, tailpage, tailoffset,
+				   xdr->tail[0].iov_len, 0);
 		if (result > 0)
 			len += result;
 	}
+
+out:
+	return len;
+}
+
+
+/*
+ * Generic sendto routine
+ */
+static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
+{
+	struct svc_sock	*svsk =
+		container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt);
+	struct socket	*sock = svsk->sk_sock;
+	union {
+		struct cmsghdr	hdr;
+		long		all[SVC_PKTINFO_SPACE / sizeof(long)];
+	} buffer;
+	struct cmsghdr *cmh = &buffer.hdr;
+	int		len = 0;
+	unsigned long tailoff;
+	unsigned long headoff;
+	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
+
+	if (rqstp->rq_prot == IPPROTO_UDP) {
+		struct msghdr msg = {
+			.msg_name	= &rqstp->rq_addr,
+			.msg_namelen	= rqstp->rq_addrlen,
+			.msg_control	= cmh,
+			.msg_controllen	= sizeof(buffer),
+			.msg_flags	= MSG_MORE,
+		};
+
+		svc_set_cmsg_data(rqstp, cmh);
+
+		if (sock_sendmsg(sock, &msg, 0) < 0)
+			goto out;
+	}
+
+	tailoff = ((unsigned long)xdr->tail[0].iov_base) & (PAGE_SIZE-1);
+	headoff = 0;
+	len = svc_send_common(sock, xdr, rqstp->rq_respages[0], headoff,
+			       rqstp->rq_respages[0], tailoff);
+
 out:
 	dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
 		svsk, xdr->head[0].iov_base, xdr->head[0].iov_len,
@@ -951,6 +972,57 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	return -EAGAIN;
 }
 
+static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
+			       struct rpc_rqst **reqpp, struct kvec *vec)
+{
+	struct rpc_rqst *req = NULL;
+	u32 *p;
+	u32 xid;
+	u32 calldir;
+	int len;
+
+	len = svc_recvfrom(rqstp, vec, 1, 8);
+	if (len < 0)
+		goto error;
+
+	p = (u32 *)rqstp->rq_arg.head[0].iov_base;
+	xid = *p++;
+	calldir = *p;
+
+	if (calldir == 0) {
+		/* REQUEST is the most common case */
+		vec[0] = rqstp->rq_arg.head[0];
+	} else {
+		/* REPLY */
+		if (svsk->sk_bc_xprt)
+			req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid);
+
+		if (!req) {
+			printk(KERN_NOTICE
+				"%s: Got unrecognized reply: "
+				"calldir 0x%x sk_bc_xprt %p xid %08x\n",
+				__func__, ntohl(calldir),
+				svsk->sk_bc_xprt, xid);
+			vec[0] = rqstp->rq_arg.head[0];
+			goto out;
+		}
+
+		memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+		       sizeof(struct xdr_buf));
+		/* copy the xid and call direction */
+		memcpy(req->rq_private_buf.head[0].iov_base,
+		       rqstp->rq_arg.head[0].iov_base, 8);
+		vec[0] = req->rq_private_buf.head[0];
+	}
+ out:
+	vec[0].iov_base += 8;
+	vec[0].iov_len -= 8;
+	len = svsk->sk_reclen - 8;
+ error:
+	*reqpp = req;
+	return len;
+}
+
 /*
  * Receive data from a TCP socket.
  */
@@ -962,6 +1034,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	int		len;
 	struct kvec *vec;
 	int pnum, vlen;
+	struct rpc_rqst *req = NULL;
 
 	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
 		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -975,9 +1048,27 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	vec = rqstp->rq_vec;
 	vec[0] = rqstp->rq_arg.head[0];
 	vlen = PAGE_SIZE;
+
+	/*
+	 * We have enough data for the whole tcp record. Let's try and read the
+	 * first 8 bytes to get the xid and the call direction. We can use this
+	 * to figure out if this is a call or a reply to a callback. If
+	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
+	 * In that case, don't bother with the calldir and just read the data.
+	 * It will be rejected in svc_process.
+	 */
+	if (len >= 8) {
+		len = svc_process_calldir(svsk, rqstp, &req, vec);
+		if (len < 0)
+			goto err_again;
+		vlen -= 8;
+	}
+
 	pnum = 1;
 	while (vlen < len) {
-		vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]);
+		vec[pnum].iov_base = (req) ?
+			page_address(req->rq_private_buf.pages[pnum - 1]) :
+			page_address(rqstp->rq_pages[pnum]);
 		vec[pnum].iov_len = PAGE_SIZE;
 		pnum++;
 		vlen += PAGE_SIZE;
@@ -989,6 +1080,16 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	if (len < 0)
 		goto err_again;
 
+	/*
+	 * Account for the 8 bytes we read earlier
+	 */
+	len += 8;
+
+	if (req) {
+		xprt_complete_rqst(req->rq_task, len);
+		len = 0;
+		goto out;
+	}
 	dprintk("svc: TCP complete record (%d bytes)\n", len);
 	rqstp->rq_arg.len = len;
 	rqstp->rq_arg.page_base = 0;
@@ -1002,6 +1103,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
 
+out:
 	/* Reset TCP read info */
 	svsk->sk_reclen = 0;
 	svsk->sk_tcplen = 0;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f412a852bc73..fd46d42afa89 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -832,6 +832,11 @@ static void xprt_timer(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 }
 
+static inline int xprt_has_timer(struct rpc_xprt *xprt)
+{
+	return xprt->idle_timeout != 0;
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1013,7 +1018,7 @@ void xprt_release(struct rpc_task *task)
 	if (!list_empty(&req->rq_list))
 		list_del(&req->rq_list);
 	xprt->last_used = jiffies;
-	if (list_empty(&xprt->recv))
+	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
@@ -1082,8 +1087,11 @@ found:
 #endif /* CONFIG_NFS_V4_1 */
 
 	INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
-	setup_timer(&xprt->timer, xprt_init_autodisconnect,
-			(unsigned long)xprt);
+	if (xprt_has_timer(xprt))
+		setup_timer(&xprt->timer, xprt_init_autodisconnect,
+			    (unsigned long)xprt);
+	else
+		init_timer(&xprt->timer);
 	xprt->last_used = jiffies;
 	xprt->cwnd = RPC_INITCWND;
 	xprt->bind_index = 0;
@@ -1102,7 +1110,6 @@ found:
 
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
-
 	return xprt;
 }
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 62438f3a914d..d9a2b815714e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -32,6 +32,7 @@
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/sunrpc/xprtsock.h>
 #include <linux/file.h>
 #ifdef CONFIG_NFS_V4_1
@@ -43,6 +44,7 @@
 #include <net/udp.h>
 #include <net/tcp.h>
 
+#include "sunrpc.h"
 /*
  * xprtsock tunables
  */
@@ -2098,6 +2100,134 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 			xprt->stat.bklog_u);
 }
 
+/*
+ * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason
+ * we allocate pages instead doing a kmalloc like rpc_malloc is because we want
+ * to use the server side send routines.
+ */
+void *bc_malloc(struct rpc_task *task, size_t size)
+{
+	struct page *page;
+	struct rpc_buffer *buf;
+
+	BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer));
+	page = alloc_page(GFP_KERNEL);
+
+	if (!page)
+		return NULL;
+
+	buf = page_address(page);
+	buf->len = PAGE_SIZE;
+
+	return buf->data;
+}
+
+/*
+ * Free the space allocated in the bc_alloc routine
+ */
+void bc_free(void *buffer)
+{
+	struct rpc_buffer *buf;
+
+	if (!buffer)
+		return;
+
+	buf = container_of(buffer, struct rpc_buffer, data);
+	free_page((unsigned long)buf);
+}
+
+/*
+ * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex
+ * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request.
+ */
+static int bc_sendto(struct rpc_rqst *req)
+{
+	int len;
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+	struct rpc_xprt *xprt = req->rq_xprt;
+	struct sock_xprt *transport =
+				container_of(xprt, struct sock_xprt, xprt);
+	struct socket *sock = transport->sock;
+	unsigned long headoff;
+	unsigned long tailoff;
+
+	/*
+	 * Set up the rpc header and record marker stuff
+	 */
+	xs_encode_tcp_record_marker(xbufp);
+
+	tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
+	headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
+	len = svc_send_common(sock, xbufp,
+			      virt_to_page(xbufp->head[0].iov_base), headoff,
+			      xbufp->tail[0].iov_base, tailoff);
+
+	if (len != xbufp->len) {
+		printk(KERN_NOTICE "Error sending entire callback!\n");
+		len = -EAGAIN;
+	}
+
+	return len;
+}
+
+/*
+ * The send routine. Borrows from svc_send
+ */
+static int bc_send_request(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct svc_xprt	*xprt;
+	struct svc_sock         *svsk;
+	u32                     len;
+
+	dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid));
+	/*
+	 * Get the server socket associated with this callback xprt
+	 */
+	xprt = req->rq_xprt->bc_xprt;
+	svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+	/*
+	 * Grab the mutex to serialize data as the connection is shared
+	 * with the fore channel
+	 */
+	if (!mutex_trylock(&xprt->xpt_mutex)) {
+		rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
+		if (!mutex_trylock(&xprt->xpt_mutex))
+			return -EAGAIN;
+		rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
+	}
+	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
+		len = -ENOTCONN;
+	else
+		len = bc_sendto(req);
+	mutex_unlock(&xprt->xpt_mutex);
+
+	if (len > 0)
+		len = 0;
+
+	return len;
+}
+
+/*
+ * The close routine. Since this is client initiated, we do nothing
+ */
+
+static void bc_close(struct rpc_xprt *xprt)
+{
+	return;
+}
+
+/*
+ * The xprt destroy routine. Again, because this connection is client
+ * initiated, we do nothing
+ */
+
+static void bc_destroy(struct rpc_xprt *xprt)
+{
+	return;
+}
+
 static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
@@ -2134,6 +2264,22 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.print_stats		= xs_tcp_print_stats,
 };
 
+/*
+ * The rpc_xprt_ops for the server backchannel
+ */
+
+static struct rpc_xprt_ops bc_tcp_ops = {
+	.reserve_xprt		= xprt_reserve_xprt,
+	.release_xprt		= xprt_release_xprt,
+	.buf_alloc		= bc_malloc,
+	.buf_free		= bc_free,
+	.send_request		= bc_send_request,
+	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
+	.close			= bc_close,
+	.destroy		= bc_destroy,
+	.print_stats		= xs_tcp_print_stats,
+};
+
 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 				      unsigned int slot_table_size)
 {
-- 
cgit v1.2.3


From 384912ed194e43c03ad1cdaa09b0b1e488c34d46 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 31 Aug 2009 21:08:19 +0000
Subject: net: Add DEVTYPE support for Ethernet based devices

The Ethernet framing is used for a lot of devices these days. Most
prominent are WiFi and WiMAX based devices. However for userspace
application it is important to classify these devices correctly and
not only see them as Ethernet devices. The daemons like HAL, DeviceKit
or even NetworkManager with udev support tries to do the classification
in userspace with a lot trickery and extra system calls. This is not
good and actually reaches its limitations. Especially since the kernel
does know the type of the Ethernet device it is pretty stupid.

To solve this problem the underlying device type needs to be set and
then the value will be exported as DEVTYPE via uevents and available
within udev.

  # cat /sys/class/net/wlan0/uevent
  DEVTYPE=wlan
  INTERFACE=wlan0
  IFINDEX=5

This is similar to subsystems like USB and SCSI that distinguish
between hosts, devices, disks, partitions etc.

The new SET_NETDEV_DEVTYPE() is a convenience helper to set the actual
device type. All device types are free form, but for convenience the
same strings as used with RFKILL are choosen.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/hso.c           | 5 +++++
 drivers/net/wimax/i2400m/sdio.c | 5 +++++
 drivers/net/wimax/i2400m/usb.c  | 5 +++++
 include/linux/netdevice.h       | 6 ++++++
 net/bluetooth/bnep/core.c       | 5 +++++
 net/bridge/br_if.c              | 6 ++++++
 net/mac80211/iface.c            | 5 +++++
 7 files changed, 37 insertions(+)

(limited to 'include')

diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index 3f9c92a2afcb..fa4e58196c21 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2535,6 +2535,10 @@ static void hso_create_rfkill(struct hso_device *hso_dev,
 	}
 }
 
+static struct device_type hso_type = {
+	.name	= "wwan",
+};
+
 /* Creates our network device */
 static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 						int port_spec)
@@ -2575,6 +2579,7 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface,
 		goto exit;
 	}
 	SET_NETDEV_DEV(net, &interface->dev);
+	SET_NETDEV_DEVTYPE(net, &hso_type);
 
 	/* registering our net device */
 	result = register_netdev(net);
diff --git a/drivers/net/wimax/i2400m/sdio.c b/drivers/net/wimax/i2400m/sdio.c
index ea7b29034aab..2981e211e04f 100644
--- a/drivers/net/wimax/i2400m/sdio.c
+++ b/drivers/net/wimax/i2400m/sdio.c
@@ -371,6 +371,10 @@ error:
 }
 
 
+static struct device_type i2400ms_type = {
+	.name	= "wimax",
+};
+
 /*
  * Probe a i2400m interface and register it
  *
@@ -412,6 +416,7 @@ int i2400ms_probe(struct sdio_func *func,
 		goto error_alloc_netdev;
 	}
 	SET_NETDEV_DEV(net_dev, dev);
+	SET_NETDEV_DEVTYPE(net_dev, &i2400ms_type);
 	i2400m = net_dev_to_i2400m(net_dev);
 	i2400ms = container_of(i2400m, struct i2400ms, i2400m);
 	i2400m->wimax_dev.net_dev = net_dev;
diff --git a/drivers/net/wimax/i2400m/usb.c b/drivers/net/wimax/i2400m/usb.c
index cfdaf69da9d1..7eadd11c815b 100644
--- a/drivers/net/wimax/i2400m/usb.c
+++ b/drivers/net/wimax/i2400m/usb.c
@@ -351,6 +351,10 @@ error:
 }
 
 
+static struct device_type i2400mu_type = {
+	.name	= "wimax",
+};
+
 /*
  * Probe a i2400m interface and register it
  *
@@ -388,6 +392,7 @@ int i2400mu_probe(struct usb_interface *iface,
 		goto error_alloc_netdev;
 	}
 	SET_NETDEV_DEV(net_dev, dev);
+	SET_NETDEV_DEVTYPE(net_dev, &i2400mu_type);
 	i2400m = net_dev_to_i2400m(net_dev);
 	i2400mu = container_of(i2400m, struct i2400mu, i2400m);
 	i2400m->wimax_dev.net_dev = net_dev;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a44118b1b56c..65ee1929b2b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -998,6 +998,12 @@ static inline void *netdev_priv(const struct net_device *dev)
  */
 #define SET_NETDEV_DEV(net, pdev)	((net)->dev.parent = (pdev))
 
+/* Set the sysfs device type for the network logical device to allow
+ * fin grained indentification of different network device types. For
+ * example Ethernet, Wirelss LAN, Bluetooth, WiMAX etc.
+ */
+#define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
+
 /**
  *	netif_napi_add - initialize a napi context
  *	@dev:  network device
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 52a6ce0d772b..cafe9f54d841 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -533,6 +533,10 @@ static struct device *bnep_get_device(struct bnep_session *session)
 	return conn ? &conn->dev : NULL;
 }
 
+static struct device_type bnep_type = {
+	.name	= "bluetooth",
+};
+
 int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
 {
 	struct net_device *dev;
@@ -586,6 +590,7 @@ int bnep_add_connection(struct bnep_connadd_req *req, struct socket *sock)
 #endif
 
 	SET_NETDEV_DEV(dev, bnep_get_device(s));
+	SET_NETDEV_DEVTYPE(dev, &bnep_type);
 
 	err = register_netdev(dev);
 	if (err) {
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index e486f1fc3632..142ebac14176 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -264,6 +264,10 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br,
 	return p;
 }
 
+static struct device_type br_type = {
+	.name	= "bridge",
+};
+
 int br_add_bridge(struct net *net, const char *name)
 {
 	struct net_device *dev;
@@ -280,6 +284,8 @@ int br_add_bridge(struct net *net, const char *name)
 			goto out_free;
 	}
 
+	SET_NETDEV_DEVTYPE(dev, &br_type);
+
 	ret = register_netdevice(dev);
 	if (ret)
 		goto out_free;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f6005adcbf90..b8295cbd7e8f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -754,6 +754,10 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
+static struct device_type wiphy_type = {
+	.name	= "wlan",
+};
+
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params)
@@ -785,6 +789,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 	memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
 	SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
+	SET_NETDEV_DEVTYPE(ndev, &wiphy_type);
 
 	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
 	sdata = netdev_priv(ndev);
-- 
cgit v1.2.3


From cc411d0bae9c19ec85a150aeab4b08335f5751d1 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Wed, 9 Sep 2009 14:41:32 +0000
Subject: ipv6: Add IFA_F_DADFAILED flag

Add IFA_F_DADFAILED flag to denote an IPv6 address that has
failed Duplicate Address Detection, that way tools like
/sbin/ip can be more informative.

3: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qlen 1000
    inet6 2001:db8::1/64 scope global tentative dadfailed
       valid_lft forever preferred_lft forever

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_addr.h |  1 +
 net/ipv6/addrconf.c     | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h
index a60c821be44c..fd9740466757 100644
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -41,6 +41,7 @@ enum
 
 #define	IFA_F_NODAD		0x02
 #define IFA_F_OPTIMISTIC	0x04
+#define IFA_F_DADFAILED		0x08
 #define	IFA_F_HOMEADDRESS	0x10
 #define IFA_F_DEPRECATED	0x20
 #define IFA_F_TENTATIVE		0x40
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 43b3c9f89c12..c9b369034a40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1371,12 +1371,14 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 
 /* Gets referenced address, destroys ifaddr */
 
-static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
+static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 {
 	if (ifp->flags&IFA_F_PERMANENT) {
 		spin_lock_bh(&ifp->lock);
 		addrconf_del_timer(ifp);
 		ifp->flags |= IFA_F_TENTATIVE;
+		if (dad_failed)
+			ifp->flags |= IFA_F_DADFAILED;
 		spin_unlock_bh(&ifp->lock);
 		in6_ifa_put(ifp);
 #ifdef CONFIG_IPV6_PRIVACY
@@ -1422,7 +1424,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 		}
 	}
 
-	addrconf_dad_stop(ifp);
+	addrconf_dad_stop(ifp, 1);
 }
 
 /* Join to solicited addr multicast group. */
@@ -2778,7 +2780,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
 	    ifp->flags & IFA_F_NODAD) {
-		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
@@ -2795,7 +2797,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		 * - otherwise, kill it.
 		 */
 		in6_ifa_hold(ifp);
-		addrconf_dad_stop(ifp);
+		addrconf_dad_stop(ifp, 0);
 		return;
 	}
 
@@ -2829,7 +2831,7 @@ static void addrconf_dad_timer(unsigned long data)
 		 * DAD was successful
 		 */
 
-		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC);
+		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
 		spin_unlock_bh(&ifp->lock);
 		read_unlock_bh(&idev->lock);
 
-- 
cgit v1.2.3


From f5bb1c558405aaac41b08b2ea71137db9db46e72 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 9 Sep 2009 00:00:05 +0000
Subject: Phonet: back-end for autoconfigured addresses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases, the network device driver knows what layer-3 address the
device should have. This adds support for the Phonet stack to
automatically request from the driver and add that address to the
network device.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h | 17 +++++++++++++++++
 net/phonet/pn_dev.c    | 26 +++++++++++++++++++++++++-
 2 files changed, 42 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index ee5e3c9e2bca..82b45d16e50c 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -170,4 +170,21 @@ static inline __u8 pn_sockaddr_get_resource(const struct sockaddr_pn *spn)
 	return spn->spn_resource;
 }
 
+/* Phonet device ioctl requests */
+#ifdef __KERNEL__
+#define SIOCPNGAUTOCONF		(SIOCDEVPRIVATE + 0)
+
+struct if_phonet_autoconf {
+	uint8_t device;
+};
+
+struct if_phonet_req {
+	char ifr_phonet_name[16];
+	union {
+		struct if_phonet_autoconf ifru_phonet_autoconf;
+	} ifr_ifru;
+};
+#define ifr_phonet_autoconf ifr_ifru.ifru_phonet_autoconf
+#endif /* __KERNEL__ */
+
 #endif
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5ae4c01e8388..2f65dcaed2fb 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -28,6 +28,7 @@
 #include <linux/netdevice.h>
 #include <linux/phonet.h>
 #include <linux/proc_fs.h>
+#include <linux/if_arp.h>
 #include <net/sock.h>
 #include <net/netns/generic.h>
 #include <net/phonet/pn_dev.h>
@@ -195,14 +196,37 @@ found:
 	return err;
 }
 
+/* automatically configure a Phonet device, if supported */
+static int phonet_device_autoconf(struct net_device *dev)
+{
+	struct if_phonet_req req;
+	int ret;
+
+	if (!dev->netdev_ops->ndo_do_ioctl)
+		return -EOPNOTSUPP;
+
+	ret = dev->netdev_ops->ndo_do_ioctl(dev, (struct ifreq *)&req,
+						SIOCPNGAUTOCONF);
+	if (ret < 0)
+		return ret;
+	return phonet_address_add(dev, req.ifr_phonet_autoconf.device);
+}
+
 /* notify Phonet of device events */
 static int phonet_device_notify(struct notifier_block *me, unsigned long what,
 				void *arg)
 {
 	struct net_device *dev = arg;
 
-	if (what == NETDEV_UNREGISTER)
+	switch (what) {
+	case NETDEV_REGISTER:
+		if (dev->type == ARPHRD_PHONET)
+			phonet_device_autoconf(dev);
+		break;
+	case NETDEV_UNREGISTER:
 		phonet_device_destroy(dev);
+		break;
+	}
 	return 0;
 
 }
-- 
cgit v1.2.3


From 02571f89871e009b66fb5f8d5ae222e90e9f357c Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 9 Sep 2009 00:00:06 +0000
Subject: cdc-phonet: autoconfigure Phonet address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/cdc-phonet.c | 15 +++++++++++++++
 include/linux/phonet.h       |  3 +++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index 0ca5916ca8df..97e54d9d03ce 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -27,6 +27,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/if_phonet.h>
+#include <linux/phonet.h>
 
 #define PN_MEDIA_USB	0x1B
 
@@ -256,6 +257,19 @@ static int usbpn_close(struct net_device *dev)
 	return usb_set_interface(pnd->usb, num, !pnd->active_setting);
 }
 
+static int usbpn_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct if_phonet_req *req = (struct if_phonet_req *)ifr;
+
+	switch (cmd) {
+	case SIOCPNGAUTOCONF:
+		req->ifr_phonet_autoconf.device = PN_DEV_PC;
+		printk(KERN_CRIT"device is PN_DEV_PC\n");
+		return 0;
+	}
+	return -ENOIOCTLCMD;
+}
+
 static int usbpn_set_mtu(struct net_device *dev, int new_mtu)
 {
 	if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU))
@@ -269,6 +283,7 @@ static const struct net_device_ops usbpn_ops = {
 	.ndo_open	= usbpn_open,
 	.ndo_stop	= usbpn_close,
 	.ndo_start_xmit = usbpn_xmit,
+	.ndo_do_ioctl	= usbpn_ioctl,
 	.ndo_change_mtu = usbpn_set_mtu,
 };
 
diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 82b45d16e50c..1ef5a0781831 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -99,6 +99,9 @@ struct sockaddr_pn {
 	__u8 spn_zero[sizeof(struct sockaddr) - sizeof(sa_family_t) - 3];
 } __attribute__ ((packed));
 
+/* Well known address */
+#define PN_DEV_PC	0x10
+
 static inline __u16 pn_object(__u8 addr, __u16 port)
 {
 	return (addr << 8) | (port & 0x3ff);
-- 
cgit v1.2.3


From f17a1f06d2fa93f4825be572622eb02c4894db4e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 4 Aug 2009 01:01:02 +0100
Subject: ARM: 5636/1: Move vendor enum to AMBA include

This moves the primecell vendor enum definition inside vic.c
out to linux/amba/bus.h where it belongs and replace any
occurances of specific vendor ID:s with the respective enums
instead.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/common/vic.c    | 12 ++++--------
 drivers/mmc/host/mmci.c  |  6 +++---
 include/linux/amba/bus.h |  5 +++++
 3 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/arm/common/vic.c b/arch/arm/common/vic.c
index bc1f9ad61ff6..920ced0b73c5 100644
--- a/arch/arm/common/vic.c
+++ b/arch/arm/common/vic.c
@@ -22,6 +22,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/sysdev.h>
+#include <linux/amba/bus.h>
 
 #include <asm/mach/irq.h>
 #include <asm/hardware/vic.h>
@@ -272,11 +273,6 @@ static struct irq_chip vic_chip = {
 static void vik_init_st(void __iomem *base, unsigned int irq_start,
 			 u32 vic_sources);
 
-enum vic_vendor {
-	VENDOR_ARM = 0x41,
-	VENDOR_ST = 0x80,
-};
-
 /**
  * vic_init - initialise a vectored interrupt controller
  * @base: iomem base address
@@ -289,7 +285,7 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 {
 	unsigned int i;
 	u32 cellid = 0;
-	enum vic_vendor vendor;
+	enum amba_vendor vendor;
 
 	/* Identify which VIC cell this one is, by reading the ID */
 	for (i = 0; i < 4; i++) {
@@ -301,13 +297,13 @@ void __init vic_init(void __iomem *base, unsigned int irq_start,
 	       base, cellid, vendor);
 
 	switch(vendor) {
-	case VENDOR_ST:
+	case AMBA_VENDOR_ST:
 		vik_init_st(base, irq_start, vic_sources);
 		return;
 	default:
 		printk(KERN_WARNING "VIC: unknown vendor, continuing anyways\n");
 		/* fall through */
-	case VENDOR_ARM:
+	case AMBA_VENDOR_ARM:
 		break;
 	}
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index e1aa8471ab1c..d78b1e460d7d 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -430,7 +430,7 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 				clk = 255;
 			host->cclk = host->mclk / (2 * (clk + 1));
 		}
-		if (host->hw_designer == 0x80)
+		if (host->hw_designer == AMBA_VENDOR_ST)
 			clk |= MCI_FCEN; /* Bug fix in ST IP block */
 		clk |= MCI_CLK_ENABLE;
 	}
@@ -443,7 +443,7 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 		break;
 	case MMC_POWER_UP:
 		/* The ST version does not have this, fall through to POWER_ON */
-		if (host->hw_designer != 0x80) {
+		if (host->hw_designer != AMBA_VENDOR_ST) {
 			pwr |= MCI_PWR_UP;
 			break;
 		}
@@ -453,7 +453,7 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	}
 
 	if (ios->bus_mode == MMC_BUSMODE_OPENDRAIN) {
-		if (host->hw_designer != 0x80)
+		if (host->hw_designer != AMBA_VENDOR_ST)
 			pwr |= MCI_ROD;
 		else {
 			/*
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index 9b93cafa82a0..ab94335b4bb9 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -36,6 +36,11 @@ struct amba_driver {
 	struct amba_id		*id_table;
 };
 
+enum amba_vendor {
+	AMBA_VENDOR_ARM = 0x41,
+	AMBA_VENDOR_ST = 0x80,
+};
+
 #define amba_get_drvdata(d)	dev_get_drvdata(&d->dev)
 #define amba_set_drvdata(d,p)	dev_set_drvdata(&d->dev, p)
 
-- 
cgit v1.2.3


From 18668ff9a3232d5f942a2f7abc1ad67d2760dcdf Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Sun, 6 Sep 2009 18:49:48 +0200
Subject: firewire: core: header file cleanup

fw_card_get, fw_card_put, fw_card_release are currently not exported for
use outside the firewire-core.  Move their definitions/ declarations
from the subsystem header file to the core header file.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
---
 drivers/firewire/core.h  | 14 ++++++++++++++
 include/linux/firewire.h | 14 --------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core.h b/drivers/firewire/core.h
index 6052816be353..7ff6e7585152 100644
--- a/drivers/firewire/core.h
+++ b/drivers/firewire/core.h
@@ -96,6 +96,20 @@ int fw_core_initiate_bus_reset(struct fw_card *card, int short_reset);
 int fw_compute_block_crc(u32 *block);
 void fw_schedule_bm_work(struct fw_card *card, unsigned long delay);
 
+static inline struct fw_card *fw_card_get(struct fw_card *card)
+{
+	kref_get(&card->kref);
+
+	return card;
+}
+
+void fw_card_release(struct kref *kref);
+
+static inline void fw_card_put(struct fw_card *card)
+{
+	kref_put(&card->kref, fw_card_release);
+}
+
 
 /* -cdev */
 
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 192d1e43c43c..7e1d4dec83e7 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -134,20 +134,6 @@ struct fw_card {
 	u32 topology_map[(CSR_TOPOLOGY_MAP_END - CSR_TOPOLOGY_MAP) / 4];
 };
 
-static inline struct fw_card *fw_card_get(struct fw_card *card)
-{
-	kref_get(&card->kref);
-
-	return card;
-}
-
-void fw_card_release(struct kref *kref);
-
-static inline void fw_card_put(struct fw_card *card)
-{
-	kref_put(&card->kref, fw_card_release);
-}
-
 struct fw_attribute_group {
 	struct attribute_group *groups[2];
 	struct attribute_group group;
-- 
cgit v1.2.3


From 4c0ba5d2593b5156327263f3ef6d7399dc0717b8 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Sat, 5 Sep 2009 07:34:23 +0530
Subject: [SCSI] libiscsi: add completion function for drivers that do not need
 pdu processing

beiscsi does not need the iscsi scsi cmd processing. It does not
even get this info on the completion path. This adds a function
to just update the sequencing numbers and complete a task.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jayamohan Kallickal <jayamohank@serverengines.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libiscsi.c | 38 +++++++++++++++++++++++++++++++++-----
 include/scsi/libiscsi.h |  2 ++
 2 files changed, 35 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a7ee4bb40708..67b2a2b00286 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -109,12 +109,9 @@ inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_queue_work);
 
-void
-iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+static void __iscsi_update_cmdsn(struct iscsi_session *session,
+				 uint32_t exp_cmdsn, uint32_t max_cmdsn)
 {
-	uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
-	uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
-
 	/*
 	 * standard specifies this check for when to update expected and
 	 * max sequence numbers
@@ -138,6 +135,12 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 			iscsi_conn_queue_work(session->leadconn);
 	}
 }
+
+void iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+{
+	__iscsi_update_cmdsn(session, be32_to_cpu(hdr->exp_cmdsn),
+			     be32_to_cpu(hdr->max_cmdsn));
+}
 EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
 
 /**
@@ -499,6 +502,31 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
 	__iscsi_put_task(task);
 }
 
+/**
+ * iscsi_complete_scsi_task - finish scsi task normally
+ * @task: iscsi task for scsi cmd
+ * @exp_cmdsn: expected cmd sn in cpu format
+ * @max_cmdsn: max cmd sn in cpu format
+ *
+ * This is used when drivers do not need or cannot perform
+ * lower level pdu processing.
+ *
+ * Called with session lock
+ */
+void iscsi_complete_scsi_task(struct iscsi_task *task,
+			      uint32_t exp_cmdsn, uint32_t max_cmdsn)
+{
+	struct iscsi_conn *conn = task->conn;
+
+	ISCSI_DBG_SESSION(conn->session, "[itt 0x%x]\n", task->itt);
+
+	conn->last_recv = jiffies;
+	__iscsi_update_cmdsn(conn->session, exp_cmdsn, max_cmdsn);
+	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+}
+EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
+
+
 /*
  * session lock must be held and if not called for a task that is
  * still pending or from the xmit thread, then xmit thread must
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 61afeb59a836..439c8b75cb69 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -415,6 +415,8 @@ extern struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *, itt_t);
 extern void iscsi_requeue_task(struct iscsi_task *task);
 extern void iscsi_put_task(struct iscsi_task *task);
 extern void __iscsi_get_task(struct iscsi_task *task);
+extern void iscsi_complete_scsi_task(struct iscsi_task *task,
+				     uint32_t exp_cmdsn, uint32_t max_cmdsn);
 
 /*
  * generic helpers
-- 
cgit v1.2.3


From 661134ad3765348ecd6150a92e736bf28ba40f80 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Sat, 5 Sep 2009 07:35:33 +0530
Subject: [SCSI] libiscsi, bnx2i: make bound ep check common

bnx2i currently has a check for if a ep is properly bound, so if
iscsi_queuecommand/xmit_task is called while there is no ep
we will not queue IO.

be2iscsi sends IO from queuecommand/xmit_task like how bnx2i does
and needs a similar test. This patch has us just use the suspend_bit
test for this.

When ep_poll has succeeed iscsid will call conn_bind, the LLD will
then call iscsi_conn_bind which will clear the suspend bit.
When ep_disconnect is called (or if there is a conn error) we set
the suspend bit. For the ep_disconnect case I am adding a helper
in this patch that will take the session lock to make sure
iscsi_queuecommand/xmit_task is not running and it will set
the suspend bit.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Jayamohan Kallickal <jayamohank@serverengines.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 drivers/scsi/libiscsi.c | 33 +++++++++++++++++++++++++++++++++
 include/scsi/libiscsi.h |  1 +
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 67b2a2b00286..8dc73c489a17 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1571,6 +1571,12 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 		goto fault;
 	}
 
+	if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+		reason = FAILURE_SESSION_IN_RECOVERY;
+		sc->result = DID_REQUEUE;
+		goto fault;
+	}
+
 	if (iscsi_check_cmdsn_window_closed(conn)) {
 		reason = FAILURE_WINDOW_CLOSED;
 		goto reject;
@@ -1810,6 +1816,33 @@ static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
 	}
 }
 
+/**
+ * iscsi_suspend_queue - suspend iscsi_queuecommand
+ * @conn: iscsi conn to stop queueing IO on
+ *
+ * This grabs the session lock to make sure no one is in
+ * xmit_task/queuecommand, and then sets suspend to prevent
+ * new commands from being queued. This only needs to be called
+ * by offload drivers that need to sync a path like ep disconnect
+ * with the iscsi_queuecommand/xmit_task. To start IO again libiscsi
+ * will call iscsi_start_tx and iscsi_unblock_session when in FFP.
+ */
+void iscsi_suspend_queue(struct iscsi_conn *conn)
+{
+	spin_lock_bh(&conn->session->lock);
+	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+	spin_unlock_bh(&conn->session->lock);
+}
+EXPORT_SYMBOL_GPL(iscsi_suspend_queue);
+
+/**
+ * iscsi_suspend_tx - suspend iscsi_data_xmit
+ * @conn: iscsi conn tp stop processing IO on.
+ *
+ * This function sets the suspend bit to prevent iscsi_data_xmit
+ * from sending new IO, and if work is queued on the xmit thread
+ * it will wait for it to be completed.
+ */
 void iscsi_suspend_tx(struct iscsi_conn *conn)
 {
 	struct Scsi_Host *shost = conn->session->host;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 439c8b75cb69..887e57e3e223 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -390,6 +390,7 @@ extern void iscsi_session_failure(struct iscsi_session *session,
 extern int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
 				enum iscsi_param param, char *buf);
 extern void iscsi_suspend_tx(struct iscsi_conn *conn);
+extern void iscsi_suspend_queue(struct iscsi_conn *conn);
 extern void iscsi_conn_queue_work(struct iscsi_conn *conn);
 
 #define iscsi_conn_printk(prefix, _c, fmt, a...) \
-- 
cgit v1.2.3


From 65dd2f93febf6345ce9e39d2f6e35ce1122f4a4a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Tue, 30 Jun 2009 16:14:17 -0300
Subject: V4L/DVB (12149): videodev2.h: Reorganize fourcc table

With the changes this file suffered along the time, things got a little disorganized.
In particular, V4L2_PIX_FMT_YVYU were shown as a device-specific format, instead of
yet another variant of YUV.

There's no functional change on this patch. It just adds some comments and reorder
fourcc formats to their proper places.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 74f16876f38d..eddd5ff99c80 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -275,7 +275,9 @@ struct v4l2_pix_format {
 	__u32			priv;		/* private data, depends on pixelformat */
 };
 
-/*      Pixel format         FOURCC                        depth  Description  */
+/*      Pixel format         FOURCC                          depth  Description  */
+
+/* RGB formats */
 #define V4L2_PIX_FMT_RGB332  v4l2_fourcc('R', 'G', 'B', '1') /*  8  RGB-3-3-2     */
 #define V4L2_PIX_FMT_RGB444  v4l2_fourcc('R', '4', '4', '4') /* 16  xxxxrrrr ggggbbbb */
 #define V4L2_PIX_FMT_RGB555  v4l2_fourcc('R', 'G', 'B', 'O') /* 16  RGB-5-5-5     */
@@ -286,12 +288,20 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_RGB24   v4l2_fourcc('R', 'G', 'B', '3') /* 24  RGB-8-8-8     */
 #define V4L2_PIX_FMT_BGR32   v4l2_fourcc('B', 'G', 'R', '4') /* 32  BGR-8-8-8-8   */
 #define V4L2_PIX_FMT_RGB32   v4l2_fourcc('R', 'G', 'B', '4') /* 32  RGB-8-8-8-8   */
+
+/* Grey formats */
 #define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+
+/* Palette formats */
 #define V4L2_PIX_FMT_PAL8    v4l2_fourcc('P', 'A', 'L', '8') /*  8  8-bit palette */
+
+/* Luminance+Chrominance formats */
 #define V4L2_PIX_FMT_YVU410  v4l2_fourcc('Y', 'V', 'U', '9') /*  9  YVU 4:1:0     */
 #define V4L2_PIX_FMT_YVU420  v4l2_fourcc('Y', 'V', '1', '2') /* 12  YVU 4:2:0     */
 #define V4L2_PIX_FMT_YUYV    v4l2_fourcc('Y', 'U', 'Y', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
+#define V4L2_PIX_FMT_YVYU    v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
 #define V4L2_PIX_FMT_UYVY    v4l2_fourcc('U', 'Y', 'V', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_VYUY    v4l2_fourcc('V', 'Y', 'U', 'Y') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_YUV422P v4l2_fourcc('4', '2', '2', 'P') /* 16  YVU422 planar */
@@ -301,6 +311,10 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_YUV555  v4l2_fourcc('Y', 'U', 'V', 'O') /* 16  YUV-5-5-5     */
 #define V4L2_PIX_FMT_YUV565  v4l2_fourcc('Y', 'U', 'V', 'P') /* 16  YUV-5-6-5     */
 #define V4L2_PIX_FMT_YUV32   v4l2_fourcc('Y', 'U', 'V', '4') /* 32  YUV-8-8-8-8   */
+#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
+#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
+#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* two planes -- one Y, one Cr + Cb interleaved  */
 #define V4L2_PIX_FMT_NV12    v4l2_fourcc('N', 'V', '1', '2') /* 12  Y/CbCr 4:2:0  */
@@ -308,25 +322,17 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_NV16    v4l2_fourcc('N', 'V', '1', '6') /* 16  Y/CbCr 4:2:2  */
 #define V4L2_PIX_FMT_NV61    v4l2_fourcc('N', 'V', '6', '1') /* 16  Y/CrCb 4:2:2  */
 
-/*  The following formats are not defined in the V4L2 specification */
-#define V4L2_PIX_FMT_YUV410  v4l2_fourcc('Y', 'U', 'V', '9') /*  9  YUV 4:1:0     */
-#define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y', 'U', '1', '2') /* 12  YUV 4:2:0     */
-#define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y', 'Y', 'U', 'V') /* 16  YUV 4:2:2     */
-#define V4L2_PIX_FMT_HI240   v4l2_fourcc('H', 'I', '2', '4') /*  8  8-bit color   */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H', 'M', '1', '2') /*  8  YUV 4:2:0 16x16 macroblocks */
-
-/* see http://www.siliconimaging.com/RGB%20Bayer.htm */
+/* Bayer formats - see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B', 'A', '8', '1') /*  8  BGBG.. GRGR.. */
 #define V4L2_PIX_FMT_SGBRG8  v4l2_fourcc('G', 'B', 'R', 'G') /*  8  GBGB.. RGRG.. */
 #define V4L2_PIX_FMT_SGRBG8  v4l2_fourcc('G', 'R', 'B', 'G') /*  8  GRGR.. BGBG.. */
-
-/*
- * 10bit raw bayer, expanded to 16 bits
- * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
- */
-#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0')
-/* 10bit raw bayer DPCM compressed to 8 bits */
+#define V4L2_PIX_FMT_SGRBG10 v4l2_fourcc('B', 'A', '1', '0') /* 10bit raw bayer */
+	/* 10bit raw bayer DPCM compressed to 8 bits */
 #define V4L2_PIX_FMT_SGRBG10DPCM8 v4l2_fourcc('B', 'D', '1', '0')
+	/*
+	 * 10bit raw bayer, expanded to 16 bits
+	 * xxxxrrrrrrrrrrxxxxgggggggggg xxxxggggggggggxxxxbbbbbbbbbb...
+	 */
 #define V4L2_PIX_FMT_SBGGR16 v4l2_fourcc('B', 'Y', 'R', '2') /* 16  BGBG.. GRGR.. */
 
 /* compressed formats */
@@ -350,7 +356,6 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_MR97310A v4l2_fourcc('M', '3', '1', '0') /* compressed BGGR bayer */
 #define V4L2_PIX_FMT_SQ905C   v4l2_fourcc('9', '0', '5', 'C') /* compressed RGGB bayer */
 #define V4L2_PIX_FMT_PJPG     v4l2_fourcc('P', 'J', 'P', 'G') /* Pixart 73xx JPEG */
-#define V4L2_PIX_FMT_YVYU     v4l2_fourcc('Y', 'V', 'Y', 'U') /* 16 YVU 4:2:2 */
 #define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 
-- 
cgit v1.2.3


From b7f2cef0c80c3ac29c13d2f4fc31691f2bd75f05 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <lyakh@axis700.grange>
Date: Wed, 24 Jun 2009 10:31:25 -0300
Subject: V4L/DVB (12158): v4l: add cropping prototypes to struct
 v4l2_subdev_video_ops

Add g_crop, s_crop and cropcap methods to video v4l2-subdev operations.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Reviewed-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 5dcb36785529..89a39ce17294 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -220,6 +220,9 @@ struct v4l2_subdev_video_ops {
 	int (*g_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
 	int (*try_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
 	int (*s_fmt)(struct v4l2_subdev *sd, struct v4l2_format *fmt);
+	int (*cropcap)(struct v4l2_subdev *sd, struct v4l2_cropcap *cc);
+	int (*g_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
+	int (*s_crop)(struct v4l2_subdev *sd, struct v4l2_crop *crop);
 	int (*g_parm)(struct v4l2_subdev *sd, struct v4l2_streamparm *param);
 	int (*s_parm)(struct v4l2_subdev *sd, struct v4l2_streamparm *param);
 	int (*enum_framesizes)(struct v4l2_subdev *sd, struct v4l2_frmsizeenum *fsize);
-- 
cgit v1.2.3


From de05f63430e8cb8cde0a21260bc6b01765111e5c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Fri, 26 Jun 2009 12:15:38 -0300
Subject: V4L/DVB (12187): uvcvideo: Move UVC definitions to linux/usb/video.h

To make UVC constants accessible by a future UVC gadget driver, move them from
drivers/media/video/uvc/uvcvideo.h to include/linux/usb/video.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/uvc/uvcvideo.h | 136 +-----------------------------
 include/linux/usb/video.h          | 164 +++++++++++++++++++++++++++++++++++++
 2 files changed, 165 insertions(+), 135 deletions(-)
 create mode 100644 include/linux/usb/video.h

(limited to 'include')

diff --git a/drivers/media/video/uvc/uvcvideo.h b/drivers/media/video/uvc/uvcvideo.h
index cc5023e09ca9..5cf68f590725 100644
--- a/drivers/media/video/uvc/uvcvideo.h
+++ b/drivers/media/video/uvc/uvcvideo.h
@@ -67,144 +67,12 @@ struct uvc_xu_control {
 #ifdef __KERNEL__
 
 #include <linux/poll.h>
+#include <linux/usb/video.h>
 
 /* --------------------------------------------------------------------------
  * UVC constants
  */
 
-#define UVC_SC_UNDEFINED				0x00
-#define UVC_SC_VIDEOCONTROL				0x01
-#define UVC_SC_VIDEOSTREAMING				0x02
-#define UVC_SC_VIDEO_INTERFACE_COLLECTION		0x03
-
-#define UVC_PC_PROTOCOL_UNDEFINED			0x00
-
-/* VideoControl class specific interface descriptor */
-#define UVC_VC_DESCRIPTOR_UNDEFINED			0x00
-#define UVC_VC_HEADER					0x01
-#define UVC_VC_INPUT_TERMINAL				0x02
-#define UVC_VC_OUTPUT_TERMINAL				0x03
-#define UVC_VC_SELECTOR_UNIT				0x04
-#define UVC_VC_PROCESSING_UNIT				0x05
-#define UVC_VC_EXTENSION_UNIT				0x06
-
-/* VideoStreaming class specific interface descriptor */
-#define UVC_VS_UNDEFINED				0x00
-#define UVC_VS_INPUT_HEADER				0x01
-#define UVC_VS_OUTPUT_HEADER				0x02
-#define UVC_VS_STILL_IMAGE_FRAME			0x03
-#define UVC_VS_FORMAT_UNCOMPRESSED			0x04
-#define UVC_VS_FRAME_UNCOMPRESSED			0x05
-#define UVC_VS_FORMAT_MJPEG				0x06
-#define UVC_VS_FRAME_MJPEG				0x07
-#define UVC_VS_FORMAT_MPEG2TS				0x0a
-#define UVC_VS_FORMAT_DV				0x0c
-#define UVC_VS_COLORFORMAT				0x0d
-#define UVC_VS_FORMAT_FRAME_BASED			0x10
-#define UVC_VS_FRAME_FRAME_BASED			0x11
-#define UVC_VS_FORMAT_STREAM_BASED			0x12
-
-/* Endpoint type */
-#define UVC_EP_UNDEFINED				0x00
-#define UVC_EP_GENERAL					0x01
-#define UVC_EP_ENDPOINT					0x02
-#define UVC_EP_INTERRUPT				0x03
-
-/* Request codes */
-#define UVC_RC_UNDEFINED				0x00
-#define UVC_SET_CUR					0x01
-#define UVC_GET_CUR					0x81
-#define UVC_GET_MIN					0x82
-#define UVC_GET_MAX					0x83
-#define UVC_GET_RES					0x84
-#define UVC_GET_LEN					0x85
-#define UVC_GET_INFO					0x86
-#define UVC_GET_DEF					0x87
-
-/* VideoControl interface controls */
-#define UVC_VC_CONTROL_UNDEFINED			0x00
-#define UVC_VC_VIDEO_POWER_MODE_CONTROL			0x01
-#define UVC_VC_REQUEST_ERROR_CODE_CONTROL		0x02
-
-/* Terminal controls */
-#define UVC_TE_CONTROL_UNDEFINED			0x00
-
-/* Selector Unit controls */
-#define UVC_SU_CONTROL_UNDEFINED			0x00
-#define UVC_SU_INPUT_SELECT_CONTROL			0x01
-
-/* Camera Terminal controls */
-#define UVC_CT_CONTROL_UNDEFINED			0x00
-#define UVC_CT_SCANNING_MODE_CONTROL			0x01
-#define UVC_CT_AE_MODE_CONTROL				0x02
-#define UVC_CT_AE_PRIORITY_CONTROL			0x03
-#define UVC_CT_EXPOSURE_TIME_ABSOLUTE_CONTROL		0x04
-#define UVC_CT_EXPOSURE_TIME_RELATIVE_CONTROL		0x05
-#define UVC_CT_FOCUS_ABSOLUTE_CONTROL			0x06
-#define UVC_CT_FOCUS_RELATIVE_CONTROL			0x07
-#define UVC_CT_FOCUS_AUTO_CONTROL			0x08
-#define UVC_CT_IRIS_ABSOLUTE_CONTROL			0x09
-#define UVC_CT_IRIS_RELATIVE_CONTROL			0x0a
-#define UVC_CT_ZOOM_ABSOLUTE_CONTROL			0x0b
-#define UVC_CT_ZOOM_RELATIVE_CONTROL			0x0c
-#define UVC_CT_PANTILT_ABSOLUTE_CONTROL			0x0d
-#define UVC_CT_PANTILT_RELATIVE_CONTROL			0x0e
-#define UVC_CT_ROLL_ABSOLUTE_CONTROL			0x0f
-#define UVC_CT_ROLL_RELATIVE_CONTROL			0x10
-#define UVC_CT_PRIVACY_CONTROL				0x11
-
-/* Processing Unit controls */
-#define UVC_PU_CONTROL_UNDEFINED			0x00
-#define UVC_PU_BACKLIGHT_COMPENSATION_CONTROL		0x01
-#define UVC_PU_BRIGHTNESS_CONTROL			0x02
-#define UVC_PU_CONTRAST_CONTROL				0x03
-#define UVC_PU_GAIN_CONTROL				0x04
-#define UVC_PU_POWER_LINE_FREQUENCY_CONTROL		0x05
-#define UVC_PU_HUE_CONTROL				0x06
-#define UVC_PU_SATURATION_CONTROL			0x07
-#define UVC_PU_SHARPNESS_CONTROL			0x08
-#define UVC_PU_GAMMA_CONTROL				0x09
-#define UVC_PU_WHITE_BALANCE_TEMPERATURE_CONTROL	0x0a
-#define UVC_PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL	0x0b
-#define UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL		0x0c
-#define UVC_PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL	0x0d
-#define UVC_PU_DIGITAL_MULTIPLIER_CONTROL		0x0e
-#define UVC_PU_DIGITAL_MULTIPLIER_LIMIT_CONTROL		0x0f
-#define UVC_PU_HUE_AUTO_CONTROL				0x10
-#define UVC_PU_ANALOG_VIDEO_STANDARD_CONTROL		0x11
-#define UVC_PU_ANALOG_LOCK_STATUS_CONTROL		0x12
-
-/* VideoStreaming interface controls */
-#define UVC_VS_CONTROL_UNDEFINED			0x00
-#define UVC_VS_PROBE_CONTROL				0x01
-#define UVC_VS_COMMIT_CONTROL				0x02
-#define UVC_VS_STILL_PROBE_CONTROL			0x03
-#define UVC_VS_STILL_COMMIT_CONTROL			0x04
-#define UVC_VS_STILL_IMAGE_TRIGGER_CONTROL		0x05
-#define UVC_VS_STREAM_ERROR_CODE_CONTROL		0x06
-#define UVC_VS_GENERATE_KEY_FRAME_CONTROL		0x07
-#define UVC_VS_UPDATE_FRAME_SEGMENT_CONTROL		0x08
-#define UVC_VS_SYNC_DELAY_CONTROL			0x09
-
-#define UVC_TT_VENDOR_SPECIFIC				0x0100
-#define UVC_TT_STREAMING				0x0101
-
-/* Input Terminal types */
-#define UVC_ITT_VENDOR_SPECIFIC				0x0200
-#define UVC_ITT_CAMERA					0x0201
-#define UVC_ITT_MEDIA_TRANSPORT_INPUT			0x0202
-
-/* Output Terminal types */
-#define UVC_OTT_VENDOR_SPECIFIC				0x0300
-#define UVC_OTT_DISPLAY					0x0301
-#define UVC_OTT_MEDIA_TRANSPORT_OUTPUT			0x0302
-
-/* External Terminal types */
-#define UVC_EXTERNAL_VENDOR_SPECIFIC			0x0400
-#define UVC_COMPOSITE_CONNECTOR				0x0401
-#define UVC_SVIDEO_CONNECTOR				0x0402
-#define UVC_COMPONENT_CONNECTOR				0x0403
-
 #define UVC_TERM_INPUT			0x0000
 #define UVC_TERM_OUTPUT			0x8000
 
@@ -216,8 +84,6 @@ struct uvc_xu_control {
 #define UVC_ENTITY_IS_OTERM(entity) \
 	(((entity)->type & 0x8000) == UVC_TERM_OUTPUT)
 
-#define UVC_STATUS_TYPE_CONTROL		1
-#define UVC_STATUS_TYPE_STREAMING	2
 
 /* ------------------------------------------------------------------------
  * GUIDs
diff --git a/include/linux/usb/video.h b/include/linux/usb/video.h
new file mode 100644
index 000000000000..be436d9ee479
--- /dev/null
+++ b/include/linux/usb/video.h
@@ -0,0 +1,164 @@
+/*
+ * USB Video Class definitions.
+ *
+ * Copyright (C) 2009 Laurent Pinchart <laurent.pinchart@skynet.be>
+ *
+ * This file holds USB constants and structures defined by the USB Device
+ * Class Definition for Video Devices. Unless otherwise stated, comments
+ * below reference relevant sections of the USB Video Class 1.1 specification
+ * available at
+ *
+ * http://www.usb.org/developers/devclass_docs/USB_Video_Class_1_1.zip
+ */
+
+#ifndef __LINUX_USB_VIDEO_H
+#define __LINUX_USB_VIDEO_H
+
+#include <linux/types.h>
+
+/* --------------------------------------------------------------------------
+ * UVC constants
+ */
+
+/* A.2. Video Interface Subclass Codes */
+#define UVC_SC_UNDEFINED				0x00
+#define UVC_SC_VIDEOCONTROL				0x01
+#define UVC_SC_VIDEOSTREAMING				0x02
+#define UVC_SC_VIDEO_INTERFACE_COLLECTION		0x03
+
+/* A.3. Video Interface Protocol Codes */
+#define UVC_PC_PROTOCOL_UNDEFINED			0x00
+
+/* A.5. Video Class-Specific VC Interface Descriptor Subtypes */
+#define UVC_VC_DESCRIPTOR_UNDEFINED			0x00
+#define UVC_VC_HEADER					0x01
+#define UVC_VC_INPUT_TERMINAL				0x02
+#define UVC_VC_OUTPUT_TERMINAL				0x03
+#define UVC_VC_SELECTOR_UNIT				0x04
+#define UVC_VC_PROCESSING_UNIT				0x05
+#define UVC_VC_EXTENSION_UNIT				0x06
+
+/* A.6. Video Class-Specific VS Interface Descriptor Subtypes */
+#define UVC_VS_UNDEFINED				0x00
+#define UVC_VS_INPUT_HEADER				0x01
+#define UVC_VS_OUTPUT_HEADER				0x02
+#define UVC_VS_STILL_IMAGE_FRAME			0x03
+#define UVC_VS_FORMAT_UNCOMPRESSED			0x04
+#define UVC_VS_FRAME_UNCOMPRESSED			0x05
+#define UVC_VS_FORMAT_MJPEG				0x06
+#define UVC_VS_FRAME_MJPEG				0x07
+#define UVC_VS_FORMAT_MPEG2TS				0x0a
+#define UVC_VS_FORMAT_DV				0x0c
+#define UVC_VS_COLORFORMAT				0x0d
+#define UVC_VS_FORMAT_FRAME_BASED			0x10
+#define UVC_VS_FRAME_FRAME_BASED			0x11
+#define UVC_VS_FORMAT_STREAM_BASED			0x12
+
+/* A.7. Video Class-Specific Endpoint Descriptor Subtypes */
+#define UVC_EP_UNDEFINED				0x00
+#define UVC_EP_GENERAL					0x01
+#define UVC_EP_ENDPOINT					0x02
+#define UVC_EP_INTERRUPT				0x03
+
+/* A.8. Video Class-Specific Request Codes */
+#define UVC_RC_UNDEFINED				0x00
+#define UVC_SET_CUR					0x01
+#define UVC_GET_CUR					0x81
+#define UVC_GET_MIN					0x82
+#define UVC_GET_MAX					0x83
+#define UVC_GET_RES					0x84
+#define UVC_GET_LEN					0x85
+#define UVC_GET_INFO					0x86
+#define UVC_GET_DEF					0x87
+
+/* A.9.1. VideoControl Interface Control Selectors */
+#define UVC_VC_CONTROL_UNDEFINED			0x00
+#define UVC_VC_VIDEO_POWER_MODE_CONTROL			0x01
+#define UVC_VC_REQUEST_ERROR_CODE_CONTROL		0x02
+
+/* A.9.2. Terminal Control Selectors */
+#define UVC_TE_CONTROL_UNDEFINED			0x00
+
+/* A.9.3. Selector Unit Control Selectors */
+#define UVC_SU_CONTROL_UNDEFINED			0x00
+#define UVC_SU_INPUT_SELECT_CONTROL			0x01
+
+/* A.9.4. Camera Terminal Control Selectors */
+#define UVC_CT_CONTROL_UNDEFINED			0x00
+#define UVC_CT_SCANNING_MODE_CONTROL			0x01
+#define UVC_CT_AE_MODE_CONTROL				0x02
+#define UVC_CT_AE_PRIORITY_CONTROL			0x03
+#define UVC_CT_EXPOSURE_TIME_ABSOLUTE_CONTROL		0x04
+#define UVC_CT_EXPOSURE_TIME_RELATIVE_CONTROL		0x05
+#define UVC_CT_FOCUS_ABSOLUTE_CONTROL			0x06
+#define UVC_CT_FOCUS_RELATIVE_CONTROL			0x07
+#define UVC_CT_FOCUS_AUTO_CONTROL			0x08
+#define UVC_CT_IRIS_ABSOLUTE_CONTROL			0x09
+#define UVC_CT_IRIS_RELATIVE_CONTROL			0x0a
+#define UVC_CT_ZOOM_ABSOLUTE_CONTROL			0x0b
+#define UVC_CT_ZOOM_RELATIVE_CONTROL			0x0c
+#define UVC_CT_PANTILT_ABSOLUTE_CONTROL			0x0d
+#define UVC_CT_PANTILT_RELATIVE_CONTROL			0x0e
+#define UVC_CT_ROLL_ABSOLUTE_CONTROL			0x0f
+#define UVC_CT_ROLL_RELATIVE_CONTROL			0x10
+#define UVC_CT_PRIVACY_CONTROL				0x11
+
+/* A.9.5. Processing Unit Control Selectors */
+#define UVC_PU_CONTROL_UNDEFINED			0x00
+#define UVC_PU_BACKLIGHT_COMPENSATION_CONTROL		0x01
+#define UVC_PU_BRIGHTNESS_CONTROL			0x02
+#define UVC_PU_CONTRAST_CONTROL				0x03
+#define UVC_PU_GAIN_CONTROL				0x04
+#define UVC_PU_POWER_LINE_FREQUENCY_CONTROL		0x05
+#define UVC_PU_HUE_CONTROL				0x06
+#define UVC_PU_SATURATION_CONTROL			0x07
+#define UVC_PU_SHARPNESS_CONTROL			0x08
+#define UVC_PU_GAMMA_CONTROL				0x09
+#define UVC_PU_WHITE_BALANCE_TEMPERATURE_CONTROL	0x0a
+#define UVC_PU_WHITE_BALANCE_TEMPERATURE_AUTO_CONTROL	0x0b
+#define UVC_PU_WHITE_BALANCE_COMPONENT_CONTROL		0x0c
+#define UVC_PU_WHITE_BALANCE_COMPONENT_AUTO_CONTROL	0x0d
+#define UVC_PU_DIGITAL_MULTIPLIER_CONTROL		0x0e
+#define UVC_PU_DIGITAL_MULTIPLIER_LIMIT_CONTROL		0x0f
+#define UVC_PU_HUE_AUTO_CONTROL				0x10
+#define UVC_PU_ANALOG_VIDEO_STANDARD_CONTROL		0x11
+#define UVC_PU_ANALOG_LOCK_STATUS_CONTROL		0x12
+
+/* A.9.7. VideoStreaming Interface Control Selectors */
+#define UVC_VS_CONTROL_UNDEFINED			0x00
+#define UVC_VS_PROBE_CONTROL				0x01
+#define UVC_VS_COMMIT_CONTROL				0x02
+#define UVC_VS_STILL_PROBE_CONTROL			0x03
+#define UVC_VS_STILL_COMMIT_CONTROL			0x04
+#define UVC_VS_STILL_IMAGE_TRIGGER_CONTROL		0x05
+#define UVC_VS_STREAM_ERROR_CODE_CONTROL		0x06
+#define UVC_VS_GENERATE_KEY_FRAME_CONTROL		0x07
+#define UVC_VS_UPDATE_FRAME_SEGMENT_CONTROL		0x08
+#define UVC_VS_SYNC_DELAY_CONTROL			0x09
+
+/* B.1. USB Terminal Types */
+#define UVC_TT_VENDOR_SPECIFIC				0x0100
+#define UVC_TT_STREAMING				0x0101
+
+/* B.2. Input Terminal Types */
+#define UVC_ITT_VENDOR_SPECIFIC				0x0200
+#define UVC_ITT_CAMERA					0x0201
+#define UVC_ITT_MEDIA_TRANSPORT_INPUT			0x0202
+
+/* B.3. Output Terminal Types */
+#define UVC_OTT_VENDOR_SPECIFIC				0x0300
+#define UVC_OTT_DISPLAY					0x0301
+#define UVC_OTT_MEDIA_TRANSPORT_OUTPUT			0x0302
+
+/* B.4. External Terminal Types */
+#define UVC_EXTERNAL_VENDOR_SPECIFIC			0x0400
+#define UVC_COMPOSITE_CONNECTOR				0x0401
+#define UVC_SVIDEO_CONNECTOR				0x0402
+#define UVC_COMPONENT_CONNECTOR				0x0403
+
+/* 2.4.2.2. Status Packet Type */
+#define UVC_STATUS_TYPE_CONTROL				1
+#define UVC_STATUS_TYPE_STREAMING			2
+
+#endif /* __LINUX_USB_VIDEO_H */
+
-- 
cgit v1.2.3


From ef5b5b7e73038f2c7d0d1b020c6eac6435c2b552 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 20 Jun 2009 05:37:27 -0300
Subject: V4L/DVB (12212): v4l2: add RDS API to videodev2.h

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index eddd5ff99c80..22fc99d58d34 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1186,6 +1186,7 @@ struct v4l2_modulator {
 #define V4L2_TUNER_CAP_LANG2		0x0020
 #define V4L2_TUNER_CAP_SAP		0x0020
 #define V4L2_TUNER_CAP_LANG1		0x0040
+#define V4L2_TUNER_CAP_RDS		0x0080
 
 /*  Flags for the 'rxsubchans' field */
 #define V4L2_TUNER_SUB_MONO		0x0001
@@ -1193,6 +1194,7 @@ struct v4l2_modulator {
 #define V4L2_TUNER_SUB_LANG2		0x0004
 #define V4L2_TUNER_SUB_SAP		0x0004
 #define V4L2_TUNER_SUB_LANG1		0x0008
+#define V4L2_TUNER_SUB_RDS		0x0010
 
 /*  Values for the 'audmode' field */
 #define V4L2_TUNER_MODE_MONO		0x0000
@@ -1217,6 +1219,27 @@ struct v4l2_hw_freq_seek {
 	__u32		      reserved[8];
 };
 
+/*
+ *	R D S
+ */
+
+struct v4l2_rds_data {
+	__u8 	lsb;
+	__u8 	msb;
+	__u8 	block;
+} __attribute__ ((packed));
+
+#define V4L2_RDS_BLOCK_MSK 	 0x7
+#define V4L2_RDS_BLOCK_A 	 0
+#define V4L2_RDS_BLOCK_B 	 1
+#define V4L2_RDS_BLOCK_C 	 2
+#define V4L2_RDS_BLOCK_D 	 3
+#define V4L2_RDS_BLOCK_C_ALT 	 4
+#define V4L2_RDS_BLOCK_INVALID 	 7
+
+#define V4L2_RDS_BLOCK_CORRECTED 0x40
+#define V4L2_RDS_BLOCK_ERROR 	 0x80
+
 /*
  *	A U D I O
  */
-- 
cgit v1.2.3


From 1cb662a3144992259edfd3cf9f54a6b25a913a0f Mon Sep 17 00:00:00 2001
From: Andreas Oberritter <obi@linuxtv.org>
Date: Tue, 14 Jul 2009 20:28:50 -0300
Subject: V4L/DVB (12275): Add two new ioctls: DMX_ADD_PID and DMX_REMOVE_PID

DMX_ADD_PID allows to add multiple PIDs to a transport stream filter
previously set up with DMX_SET_PES_FILTER and output=DMX_OUT_TSDEMUX_TAP.

DMX_REMOVE_PID is used to drop a PID from a filter.

These ioctls are to be used by readers of /dev/dvb/adapterX/demuxY. They
may be called at any time, i.e. before or after the first filter on the
shared file descriptor was started.

They make it possible to record multiple services without the need to de-
or re-multiplex TS packets.

To accomplish this, dmxdev_filter->feed.ts has been converted to a list
of struct dmxdev_feeds, each containing a PID value and a pointer to a
struct dmx_ts_feed.

Signed-off-by: Andreas Oberritter <obi@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dmxdev.c | 231 +++++++++++++++++++++++++++---------
 drivers/media/dvb/dvb-core/dmxdev.h |   9 +-
 include/linux/dvb/dmx.h             |   2 +
 3 files changed, 184 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 6d6121eb5d59..3750ff48cba1 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -430,6 +430,8 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len,
 /* stop feed but only mark the specified filter as stopped (state set) */
 static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 {
+	struct dmxdev_feed *feed;
+
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);
 
 	switch (dmxdevfilter->type) {
@@ -438,7 +440,8 @@ static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 		dmxdevfilter->feed.sec->stop_filtering(dmxdevfilter->feed.sec);
 		break;
 	case DMXDEV_TYPE_PES:
-		dmxdevfilter->feed.ts->stop_filtering(dmxdevfilter->feed.ts);
+		list_for_each_entry(feed, &dmxdevfilter->feed.ts, next)
+			feed->ts->stop_filtering(feed->ts);
 		break;
 	default:
 		return -EINVAL;
@@ -449,13 +452,23 @@ static int dvb_dmxdev_feed_stop(struct dmxdev_filter *dmxdevfilter)
 /* start feed associated with the specified filter */
 static int dvb_dmxdev_feed_start(struct dmxdev_filter *filter)
 {
+	struct dmxdev_feed *feed;
+	int ret;
+
 	dvb_dmxdev_filter_state_set(filter, DMXDEV_STATE_GO);
 
 	switch (filter->type) {
 	case DMXDEV_TYPE_SEC:
 		return filter->feed.sec->start_filtering(filter->feed.sec);
 	case DMXDEV_TYPE_PES:
-		return filter->feed.ts->start_filtering(filter->feed.ts);
+		list_for_each_entry(feed, &filter->feed.ts, next) {
+			ret = feed->ts->start_filtering(feed->ts);
+			if (ret < 0) {
+				dvb_dmxdev_feed_stop(filter);
+				return ret;
+			}
+		}
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -487,6 +500,9 @@ static int dvb_dmxdev_feed_restart(struct dmxdev_filter *filter)
 
 static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 {
+	struct dmxdev_feed *feed;
+	struct dmx_demux *demux;
+
 	if (dmxdevfilter->state < DMXDEV_STATE_GO)
 		return 0;
 
@@ -503,13 +519,12 @@ static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 		dmxdevfilter->feed.sec = NULL;
 		break;
 	case DMXDEV_TYPE_PES:
-		if (!dmxdevfilter->feed.ts)
-			break;
 		dvb_dmxdev_feed_stop(dmxdevfilter);
-		dmxdevfilter->dev->demux->
-		    release_ts_feed(dmxdevfilter->dev->demux,
-				    dmxdevfilter->feed.ts);
-		dmxdevfilter->feed.ts = NULL;
+		demux = dmxdevfilter->dev->demux;
+		list_for_each_entry(feed, &dmxdevfilter->feed.ts, next) {
+			demux->release_ts_feed(demux, feed->ts);
+			feed->ts = NULL;
+		}
 		break;
 	default:
 		if (dmxdevfilter->state == DMXDEV_STATE_ALLOCATED)
@@ -521,19 +536,88 @@ static int dvb_dmxdev_filter_stop(struct dmxdev_filter *dmxdevfilter)
 	return 0;
 }
 
+static void dvb_dmxdev_delete_pids(struct dmxdev_filter *dmxdevfilter)
+{
+	struct dmxdev_feed *feed, *tmp;
+
+	/* delete all PIDs */
+	list_for_each_entry_safe(feed, tmp, &dmxdevfilter->feed.ts, next) {
+		list_del(&feed->next);
+		kfree(feed);
+	}
+
+	BUG_ON(!list_empty(&dmxdevfilter->feed.ts));
+}
+
 static inline int dvb_dmxdev_filter_reset(struct dmxdev_filter *dmxdevfilter)
 {
 	if (dmxdevfilter->state < DMXDEV_STATE_SET)
 		return 0;
 
+	if (dmxdevfilter->type == DMXDEV_TYPE_PES)
+		dvb_dmxdev_delete_pids(dmxdevfilter);
+
 	dmxdevfilter->type = DMXDEV_TYPE_NONE;
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
 	return 0;
 }
 
+static int dvb_dmxdev_start_feed(struct dmxdev *dmxdev,
+				 struct dmxdev_filter *filter,
+				 struct dmxdev_feed *feed)
+{
+	struct timespec timeout = { 0 };
+	struct dmx_pes_filter_params *para = &filter->params.pes;
+	dmx_output_t otype;
+	int ret;
+	int ts_type;
+	enum dmx_ts_pes ts_pes;
+	struct dmx_ts_feed *tsfeed;
+
+	feed->ts = NULL;
+	otype = para->output;
+
+	ts_pes = (enum dmx_ts_pes)para->pes_type;
+
+	if (ts_pes < DMX_PES_OTHER)
+		ts_type = TS_DECODER;
+	else
+		ts_type = 0;
+
+	if (otype == DMX_OUT_TS_TAP)
+		ts_type |= TS_PACKET;
+	else if (otype == DMX_OUT_TSDEMUX_TAP)
+		ts_type |= TS_PACKET | TS_DEMUX;
+	else if (otype == DMX_OUT_TAP)
+		ts_type |= TS_PACKET | TS_DEMUX | TS_PAYLOAD_ONLY;
+
+	ret = dmxdev->demux->allocate_ts_feed(dmxdev->demux, &feed->ts,
+					      dvb_dmxdev_ts_callback);
+	if (ret < 0)
+		return ret;
+
+	tsfeed = feed->ts;
+	tsfeed->priv = filter;
+
+	ret = tsfeed->set(tsfeed, feed->pid, ts_type, ts_pes, 32768, timeout);
+	if (ret < 0) {
+		dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
+		return ret;
+	}
+
+	ret = tsfeed->start_filtering(tsfeed);
+	if (ret < 0) {
+		dmxdev->demux->release_ts_feed(dmxdev->demux, tsfeed);
+		return ret;
+	}
+
+	return 0;
+}
+
 static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
 {
 	struct dmxdev *dmxdev = filter->dev;
+	struct dmxdev_feed *feed;
 	void *mem;
 	int ret, i;
 
@@ -631,56 +715,14 @@ static int dvb_dmxdev_filter_start(struct dmxdev_filter *filter)
 		break;
 	}
 	case DMXDEV_TYPE_PES:
-	{
-		struct timespec timeout = { 0 };
-		struct dmx_pes_filter_params *para = &filter->params.pes;
-		dmx_output_t otype;
-		int ts_type;
-		enum dmx_ts_pes ts_pes;
-		struct dmx_ts_feed **tsfeed = &filter->feed.ts;
-
-		filter->feed.ts = NULL;
-		otype = para->output;
-
-		ts_pes = (enum dmx_ts_pes)para->pes_type;
-
-		if (ts_pes < DMX_PES_OTHER)
-			ts_type = TS_DECODER;
-		else
-			ts_type = 0;
-
-		if (otype == DMX_OUT_TS_TAP)
-			ts_type |= TS_PACKET;
-		else if (otype == DMX_OUT_TSDEMUX_TAP)
-			ts_type |= TS_PACKET | TS_DEMUX;
-		else if (otype == DMX_OUT_TAP)
-			ts_type |= TS_PACKET | TS_DEMUX | TS_PAYLOAD_ONLY;
-
-		ret = dmxdev->demux->allocate_ts_feed(dmxdev->demux,
-						      tsfeed,
-						      dvb_dmxdev_ts_callback);
-		if (ret < 0)
-			return ret;
-
-		(*tsfeed)->priv = filter;
-
-		ret = (*tsfeed)->set(*tsfeed, para->pid, ts_type, ts_pes,
-				     32768, timeout);
-		if (ret < 0) {
-			dmxdev->demux->release_ts_feed(dmxdev->demux,
-						       *tsfeed);
-			return ret;
-		}
-
-		ret = filter->feed.ts->start_filtering(filter->feed.ts);
-		if (ret < 0) {
-			dmxdev->demux->release_ts_feed(dmxdev->demux,
-						       *tsfeed);
-			return ret;
+		list_for_each_entry(feed, &filter->feed.ts, next) {
+			ret = dvb_dmxdev_start_feed(dmxdev, filter, feed);
+			if (ret < 0) {
+				dvb_dmxdev_filter_stop(filter);
+				return ret;
+			}
 		}
-
 		break;
-	}
 	default:
 		return -EINVAL;
 	}
@@ -718,7 +760,7 @@ static int dvb_demux_open(struct inode *inode, struct file *file)
 	dvb_ringbuffer_init(&dmxdevfilter->buffer, NULL, 8192);
 	dmxdevfilter->type = DMXDEV_TYPE_NONE;
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_ALLOCATED);
-	dmxdevfilter->feed.ts = NULL;
+	INIT_LIST_HEAD(&dmxdevfilter->feed.ts);
 	init_timer(&dmxdevfilter->timer);
 
 	dvbdev->users++;
@@ -760,6 +802,55 @@ static inline void invert_mode(dmx_filter_t *filter)
 		filter->mode[i] ^= 0xff;
 }
 
+static int dvb_dmxdev_add_pid(struct dmxdev *dmxdev,
+			      struct dmxdev_filter *filter, u16 pid)
+{
+	struct dmxdev_feed *feed;
+
+	if ((filter->type != DMXDEV_TYPE_PES) ||
+	    (filter->state < DMXDEV_STATE_SET))
+		return -EINVAL;
+
+	/* only TS packet filters may have multiple PIDs */
+	if ((filter->params.pes.output != DMX_OUT_TSDEMUX_TAP) &&
+	    (!list_empty(&filter->feed.ts)))
+		return -EINVAL;
+
+	feed = kzalloc(sizeof(struct dmxdev_feed), GFP_KERNEL);
+	if (feed == NULL)
+		return -ENOMEM;
+
+	feed->pid = pid;
+	list_add(&feed->next, &filter->feed.ts);
+
+	if (filter->state >= DMXDEV_STATE_GO)
+		return dvb_dmxdev_start_feed(dmxdev, filter, feed);
+
+	return 0;
+}
+
+static int dvb_dmxdev_remove_pid(struct dmxdev *dmxdev,
+				  struct dmxdev_filter *filter, u16 pid)
+{
+	struct dmxdev_feed *feed, *tmp;
+
+	if ((filter->type != DMXDEV_TYPE_PES) ||
+	    (filter->state < DMXDEV_STATE_SET))
+		return -EINVAL;
+
+	list_for_each_entry_safe(feed, tmp, &filter->feed.ts, next) {
+		if ((feed->pid == pid) && (feed->ts != NULL)) {
+			feed->ts->stop_filtering(feed->ts);
+			filter->dev->demux->release_ts_feed(filter->dev->demux,
+							    feed->ts);
+			list_del(&feed->next);
+			kfree(feed);
+		}
+	}
+
+	return 0;
+}
+
 static int dvb_dmxdev_filter_set(struct dmxdev *dmxdev,
 				 struct dmxdev_filter *dmxdevfilter,
 				 struct dmx_sct_filter_params *params)
@@ -784,7 +875,10 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
 				     struct dmxdev_filter *dmxdevfilter,
 				     struct dmx_pes_filter_params *params)
 {
+	int ret;
+
 	dvb_dmxdev_filter_stop(dmxdevfilter);
+	dvb_dmxdev_filter_reset(dmxdevfilter);
 
 	if (params->pes_type > DMX_PES_OTHER || params->pes_type < 0)
 		return -EINVAL;
@@ -795,6 +889,11 @@ static int dvb_dmxdev_pes_filter_set(struct dmxdev *dmxdev,
 
 	dvb_dmxdev_filter_state_set(dmxdevfilter, DMXDEV_STATE_SET);
 
+	ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter,
+				 dmxdevfilter->params.pes.pid);
+	if (ret < 0)
+		return ret;
+
 	if (params->flags & DMX_IMMEDIATE_START)
 		return dvb_dmxdev_filter_start(dmxdevfilter);
 
@@ -958,6 +1057,24 @@ static int dvb_demux_do_ioctl(struct inode *inode, struct file *file,
 					     &((struct dmx_stc *)parg)->base);
 		break;
 
+	case DMX_ADD_PID:
+		if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		ret = dvb_dmxdev_add_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
+		mutex_unlock(&dmxdevfilter->mutex);
+		break;
+
+	case DMX_REMOVE_PID:
+		if (mutex_lock_interruptible(&dmxdevfilter->mutex)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+		ret = dvb_dmxdev_remove_pid(dmxdev, dmxdevfilter, *(u16 *)parg);
+		mutex_unlock(&dmxdevfilter->mutex);
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
diff --git a/drivers/media/dvb/dvb-core/dmxdev.h b/drivers/media/dvb/dvb-core/dmxdev.h
index 29746e70d325..c1379b56dfb4 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.h
+++ b/drivers/media/dvb/dvb-core/dmxdev.h
@@ -53,13 +53,20 @@ enum dmxdev_state {
 	DMXDEV_STATE_TIMEDOUT
 };
 
+struct dmxdev_feed {
+	u16 pid;
+	struct dmx_ts_feed *ts;
+	struct list_head next;
+};
+
 struct dmxdev_filter {
 	union {
 		struct dmx_section_filter *sec;
 	} filter;
 
 	union {
-		struct dmx_ts_feed *ts;
+		/* list of TS and PES feeds (struct dmxdev_feed) */
+		struct list_head ts;
 		struct dmx_section_feed *sec;
 	} feed;
 
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
index fef943738a24..f078f3ac82d4 100644
--- a/include/linux/dvb/dmx.h
+++ b/include/linux/dvb/dmx.h
@@ -151,5 +151,7 @@ struct dmx_stc {
 #define DMX_GET_CAPS             _IOR('o', 48, dmx_caps_t)
 #define DMX_SET_SOURCE           _IOW('o', 49, dmx_source_t)
 #define DMX_GET_STC              _IOWR('o', 50, struct dmx_stc)
+#define DMX_ADD_PID              _IOW('o', 51, __u16)
+#define DMX_REMOVE_PID           _IOW('o', 52, __u16)
 
 #endif /*_DVBDMX_H_*/
-- 
cgit v1.2.3


From 6ace40effd34331a604c5eeae90838cf8dd7eb8f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 20 Jun 2009 09:50:39 -0300
Subject: V4L/DVB (12316): v4l: add V4L2_CAP_RDS_OUTPUT and V4L2_CAP_MODULATOR
 caps

Add capabilities to describe an FM transmitter device.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 22fc99d58d34..293ba181f95a 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -252,10 +252,12 @@ struct v4l2_capability {
 #define V4L2_CAP_RDS_CAPTURE		0x00000100  /* RDS data capture */
 #define V4L2_CAP_VIDEO_OUTPUT_OVERLAY	0x00000200  /* Can do video output overlay */
 #define V4L2_CAP_HW_FREQ_SEEK		0x00000400  /* Can do hardware frequency seek  */
+#define V4L2_CAP_RDS_OUTPUT		0x00000800  /* Is an RDS encoder */
 
 #define V4L2_CAP_TUNER			0x00010000  /* has a tuner */
 #define V4L2_CAP_AUDIO			0x00020000  /* has audio support */
 #define V4L2_CAP_RADIO			0x00040000  /* is a radio device */
+#define V4L2_CAP_MODULATOR		0x00080000  /* has a modulator */
 
 #define V4L2_CAP_READWRITE              0x01000000  /* read/write systemcalls */
 #define V4L2_CAP_ASYNCIO                0x02000000  /* async I/O */
-- 
cgit v1.2.3


From e3e1920b28d47cb18b477fc9884b889f9622fc97 Mon Sep 17 00:00:00 2001
From: Andy Walls <awalls@radix.net>
Date: Wed, 22 Jul 2009 21:02:44 -0300
Subject: V4L/DVB (12334): tuner-simple: Add an entry for the Partsnic
 PTI-5NF05 NTSC tuner

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c | 25 +++++++++++++++++++++++++
 include/media/tuner.h                     |  1 +
 3 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index be67844074dd..ba9fa679e2d3 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -78,3 +78,4 @@ tuner=77 - TCL tuner MF02GIP-5N-E
 tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
 tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
+tuner=81 - Partsnic (Daewoo) PTI-5NF05
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index 6a7f1a417c27..5c6ef1e23c94 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1301,6 +1301,25 @@ static struct tuner_params tuner_fq1216lme_mk3_params[] = {
 	},
 };
 
+/* ----- TUNER_PARTSNIC_PTI_5NF05 - Partsnic (Daewoo) PTI-5NF05 NTSC ----- */
+
+static struct tuner_range tuner_partsnic_pti_5nf05_ranges[] = {
+	/* The datasheet specified channel ranges and the bandswitch byte */
+	/* The control byte value of 0x8e is just a guess */
+	{ 16 * 133.25 /*MHz*/, 0x8e, 0x01, }, /* Channels    2 -    B */
+	{ 16 * 367.25 /*MHz*/, 0x8e, 0x02, }, /* Channels    C - W+11 */
+	{ 16 * 999.99        , 0x8e, 0x08, }, /* Channels W+12 -   69 */
+};
+
+static struct tuner_params tuner_partsnic_pti_5nf05_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_NTSC,
+		.ranges = tuner_partsnic_pti_5nf05_ranges,
+		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_ranges),
+		.cb_first_if_lower_freq = 1, /* not specified but safe to do */
+	},
+};
+
 /* --------------------------------------------------------------------- */
 
 struct tunertype tuners[] = {
@@ -1753,6 +1772,12 @@ struct tunertype tuners[] = {
 		.params = tuner_fq1216lme_mk3_params,
 		.count  = ARRAY_SIZE(tuner_fq1216lme_mk3_params),
 	},
+
+	[TUNER_PARTSNIC_PTI_5NF05] = {
+		.name = "Partsnic (Daewoo) PTI-5NF05",
+		.params = tuner_partsnic_pti_5nf05_params,
+		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_params),
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/include/media/tuner.h b/include/media/tuner.h
index cbf97f45fbec..c146f2f530b0 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -126,6 +126,7 @@
 #define TUNER_PHILIPS_FMD1216MEX_MK3	78
 #define TUNER_PHILIPS_FM1216MK5		79
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
+#define TUNER_PARTSNIC_PTI_5NF05	81
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From 1b6e59e3f48eecdab97bdd1568422e22f7d2f4f5 Mon Sep 17 00:00:00 2001
From: Andy Walls <awalls@radix.net>
Date: Tue, 28 Jul 2009 11:44:05 -0300
Subject: V4L/DVB (12366): ir-kbd-i2c: Allow use of ir-kdb-i2c internal get_key
 funcs and set ir_type

This patch augments the init data passed by bridge drivers to
ir-kbd-i2c, so that the ir_type can be set explicitly, and so
ir-kbd-i2c internal get_key functions can be reused without
requiring symbols from ir-kbd-i2c in the bridge driver.

Signed-off-by: Andy Walls <awalls@radix.net>
Reviewed-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Douglas Schilling Landgraf <dougsland@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/ir-kbd-i2c.c | 31 ++++++++++++++++++++++++++++++-
 include/media/ir-kbd-i2c.h       | 17 +++++++++++++++++
 2 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 6e4a2341edf8..f144acc400cd 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -392,7 +392,36 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 
 		ir_codes = init_data->ir_codes;
 		name = init_data->name;
-		ir->get_key = init_data->get_key;
+		if (init_data->type)
+			ir_type = init_data->type;
+
+		switch (init_data->internal_get_key_func) {
+		case IR_KBD_GET_KEY_CUSTOM:
+			/* The bridge driver provided us its own function */
+			ir->get_key = init_data->get_key;
+			break;
+		case IR_KBD_GET_KEY_PIXELVIEW:
+			ir->get_key = get_key_pixelview;
+			break;
+		case IR_KBD_GET_KEY_PV951:
+			ir->get_key = get_key_pv951;
+			break;
+		case IR_KBD_GET_KEY_HAUP:
+			ir->get_key = get_key_haup;
+			break;
+		case IR_KBD_GET_KEY_KNC1:
+			ir->get_key = get_key_knc1;
+			break;
+		case IR_KBD_GET_KEY_FUSIONHDTV:
+			ir->get_key = get_key_fusionhdtv;
+			break;
+		case IR_KBD_GET_KEY_HAUP_XVR:
+			ir->get_key = get_key_haup_xvr;
+			break;
+		case IR_KBD_GET_KEY_AVERMEDIA_CARDBUS:
+			ir->get_key = get_key_avermedia_cardbus;
+			break;
+		}
 	}
 
 	/* Make sure we are all setup before going on */
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index 3ad4ed5402fb..defef3b18dfd 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -20,10 +20,27 @@ struct IR_i2c {
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
 };
 
+enum ir_kbd_get_key_fn {
+	IR_KBD_GET_KEY_CUSTOM = 0,
+	IR_KBD_GET_KEY_PIXELVIEW,
+	IR_KBD_GET_KEY_PV951,
+	IR_KBD_GET_KEY_HAUP,
+	IR_KBD_GET_KEY_KNC1,
+	IR_KBD_GET_KEY_FUSIONHDTV,
+	IR_KBD_GET_KEY_HAUP_XVR,
+	IR_KBD_GET_KEY_AVERMEDIA_CARDBUS,
+};
+
 /* Can be passed when instantiating an ir_video i2c device */
 struct IR_i2c_init_data {
 	IR_KEYTAB_TYPE         *ir_codes;
 	const char             *name;
+	int                    type; /* IR_TYPE_RC5, IR_TYPE_PD, etc */
+	/*
+	 * Specify either a function pointer or a value indicating one of
+	 * ir_kbd_i2c's internal get_key functions
+	 */
 	int                    (*get_key)(struct IR_i2c*, u32*, u32*);
+	enum ir_kbd_get_key_fn internal_get_key_func;
 };
 #endif
-- 
cgit v1.2.3


From 8cd9aaefad5968f8f5aff3852a67870550ce941d Mon Sep 17 00:00:00 2001
From: Devin Heitmueller <dheitmueller@linuxtv.org>
Date: Mon, 3 Aug 2009 23:56:51 -0300
Subject: V4L/DVB (12444): em28xx: add support for Terratec Cinergy Hybrid T
 USB XS remote control

Add support for the remote control that comes with the Cinergy Hybrid T USB XS

Thanks to Jelle de Jong for providing sample hardware to test with.

Cc: Jelle de Jong <jelledejong@powercraft.nl>
Signed-off-by: Devin Heitmueller <dheitmueller@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c         | 55 +++++++++++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c |  2 ++
 include/media/ir-common.h                 |  1 +
 3 files changed, 58 insertions(+)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 4216328552f6..58d3e54db5a5 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2751,6 +2751,61 @@ IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE] = {
 };
 EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec);
 
+
+/* Terratec Cinergy Hybrid T USB XS
+   Devin Heitmueller <dheitmueller@linuxtv.org>
+ */
+IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE] = {
+	[0x41] = KEY_HOME,
+	[0x01] = KEY_POWER,
+	[0x42] = KEY_MENU,
+	[0x02] = KEY_1,
+	[0x03] = KEY_2,
+	[0x04] = KEY_3,
+	[0x43] = KEY_SUBTITLE,
+	[0x05] = KEY_4,
+	[0x06] = KEY_5,
+	[0x07] = KEY_6,
+	[0x44] = KEY_TEXT,
+	[0x08] = KEY_7,
+	[0x09] = KEY_8,
+	[0x0a] = KEY_9,
+	[0x45] = KEY_DELETE,
+	[0x0b] = KEY_TUNER,
+	[0x0c] = KEY_0,
+	[0x0d] = KEY_MODE,
+	[0x46] = KEY_TV,
+	[0x47] = KEY_DVD,
+	[0x49] = KEY_VIDEO,
+	[0x4b] = KEY_AUX,
+	[0x10] = KEY_UP,
+	[0x11] = KEY_LEFT,
+	[0x12] = KEY_OK,
+	[0x13] = KEY_RIGHT,
+	[0x14] = KEY_DOWN,
+	[0x0f] = KEY_EPG,
+	[0x16] = KEY_INFO,
+	[0x4d] = KEY_BACKSPACE,
+	[0x1c] = KEY_VOLUMEUP,
+	[0x4c] = KEY_PLAY,
+	[0x1b] = KEY_CHANNELUP,
+	[0x1e] = KEY_VOLUMEDOWN,
+	[0x1d] = KEY_MUTE,
+	[0x1f] = KEY_CHANNELDOWN,
+	[0x17] = KEY_RED,
+	[0x18] = KEY_GREEN,
+	[0x19] = KEY_YELLOW,
+	[0x1a] = KEY_BLUE,
+	[0x58] = KEY_RECORD,
+	[0x48] = KEY_STOP,
+	[0x40] = KEY_PAUSE,
+	[0x54] = KEY_LAST,
+	[0x4e] = KEY_REWIND,
+	[0x4f] = KEY_FASTFORWARD,
+	[0x5c] = KEY_NEXT,
+};
+EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs);
+
 /* EVGA inDtube
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 02e24411b134..66c377683708 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -870,6 +870,8 @@ struct em28xx_board em28xx_boards[] = {
 		.decoder        = EM28XX_TVP5150,
 		.has_dvb        = 1,
 		.dvb_gpio       = default_digital,
+		.ir_codes       = ir_codes_terratec_cinergy_xs,
+		.xclk           = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 9dcb632f6083..922bad37deec 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -163,6 +163,7 @@ extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From ecfcfec80493097967aa40e3433d65a8ff65c86b Mon Sep 17 00:00:00 2001
From: "Igor M. Liplianin" <liplianin@me.by>
Date: Thu, 13 Aug 2009 21:42:21 -0300
Subject: V4L/DVB (12463): Add support for Compro VideoMate S350 DVB-S PCI
 card.

Add Compro VideoMate S350 DVB-S driver.
The card uses zl10313, zl10039, saa7130 integrated circuits.

Signed-off-by: Igor M. Liplianin <liplianin@me.by>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-keymaps.c           | 48 +++++++++++++++++++++++++++++
 drivers/media/video/saa7134/Kconfig         |  1 +
 drivers/media/video/saa7134/saa7134-cards.c | 31 ++++++++++++++++++-
 drivers/media/video/saa7134/saa7134-dvb.c   | 15 +++++++++
 drivers/media/video/saa7134/saa7134-input.c |  5 +++
 drivers/media/video/saa7134/saa7134.h       |  1 +
 include/media/ir-common.h                   |  1 +
 7 files changed, 101 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 58d3e54db5a5..c222b7f15184 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2828,3 +2828,51 @@ IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE] = {
 	[0x13] = KEY_CAMERA,
 };
 EXPORT_SYMBOL_GPL(ir_codes_evga_indtube);
+
+IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
+	[0x00] = KEY_TV,
+	[0x01] = KEY_DVD,
+	[0x04] = KEY_RECORD,
+	[0x05] = KEY_VIDEO, /* TV/Video */
+	[0x07] = KEY_STOP,
+	[0x08] = KEY_PLAYPAUSE,
+	[0x0a] = KEY_REWIND,
+	[0x0f] = KEY_FASTFORWARD,
+	[0x10] = KEY_CHANNELUP,
+	[0x12] = KEY_VOLUMEUP,
+	[0x13] = KEY_CHANNELDOWN,
+	[0x14] = KEY_MUTE,
+	[0x15] = KEY_VOLUMEDOWN,
+	[0x16] = KEY_1,
+	[0x17] = KEY_2,
+	[0x18] = KEY_3,
+	[0x19] = KEY_4,
+	[0x1a] = KEY_5,
+	[0x1b] = KEY_6,
+	[0x1c] = KEY_7,
+	[0x1d] = KEY_8,
+	[0x1e] = KEY_9,
+	[0x1f] = KEY_0,
+	[0x21] = KEY_SLEEP,
+	[0x24] = KEY_ZOOM,
+	[0x25] = KEY_LAST,    /* Recall */
+	[0x26] = KEY_SUBTITLE, /* CC */
+	[0x27] = KEY_LANGUAGE, /* MTS */
+	[0x29] = KEY_CHANNEL, /* SURF */
+	[0x2b] = KEY_A,
+	[0x2c] = KEY_B,
+	[0x2f] = KEY_SHUFFLE, /* Snapshot */
+	[0x23] = KEY_RADIO,
+	[0x02] = KEY_PREVIOUSSONG,
+	[0x06] = KEY_NEXTSONG,
+	[0x03] = KEY_EPG,
+	[0x09] = KEY_SETUP,
+	[0x22] = KEY_BACKSPACE,
+	[0x0c] = KEY_UP,
+	[0x0e] = KEY_DOWN,
+	[0x0b] = KEY_LEFT,
+	[0x0d] = KEY_RIGHT,
+	[0x11] = KEY_ENTER,
+	[0x20] = KEY_TEXT,
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
diff --git a/drivers/media/video/saa7134/Kconfig b/drivers/media/video/saa7134/Kconfig
index 5bcce092e804..22bfd62c9551 100644
--- a/drivers/media/video/saa7134/Kconfig
+++ b/drivers/media/video/saa7134/Kconfig
@@ -47,6 +47,7 @@ config VIDEO_SAA7134_DVB
 	select DVB_TDA10048 if !DVB_FE_CUSTOMISE
 	select MEDIA_TUNER_TDA18271 if !MEDIA_TUNER_CUSTOMISE
 	select MEDIA_TUNER_TDA8290 if !MEDIA_TUNER_CUSTOMISE
+	select DVB_ZL10039 if !DVB_FE_CUSTOMISE
 	---help---
 	  This adds support for DVB cards based on the
 	  Philips saa7134 chip.
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index e1d89b232392..b15d696dff47 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -5116,6 +5116,25 @@ struct saa7134_board saa7134_boards[] = {
 			.gpio = 0x00,
 		},
 	},
+	[SAA7134_BOARD_VIDEOMATE_S350] = {
+		/* Jan D. Louw <jd.louw@mweb.co.za */
+		.name		= "Compro VideoMate S350/S300",
+		.audio_clock	= 0x00187de7,
+		.tuner_type	= TUNER_ABSENT,
+		.radio_type	= UNSET,
+		.tuner_addr	= ADDR_UNSET,
+		.radio_addr	= ADDR_UNSET,
+		.mpeg		= SAA7134_MPEG_DVB,
+		.inputs = { {
+			.name	= name_comp1,
+			.vmux	= 0,
+			.amux	= LINE1,
+		}, {
+			.name	= name_svideo,
+			.vmux	= 8, /* Not tested */
+			.amux	= LINE1
+		} },
+	},
 };
 
 const unsigned int saa7134_bcount = ARRAY_SIZE(saa7134_boards);
@@ -6223,7 +6242,12 @@ struct pci_device_id saa7134_pci_tbl[] = {
 		.subvendor    = 0x1461, /* Avermedia Technologies Inc */
 		.subdevice    = 0xf31d,
 		.driver_data  = SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS,
-
+	}, {
+		.vendor       = PCI_VENDOR_ID_PHILIPS,
+		.device       = PCI_DEVICE_ID_PHILIPS_SAA7130,
+		.subvendor    = 0x185b,
+		.subdevice    = 0xc900,
+		.driver_data  = SAA7134_BOARD_VIDEOMATE_S350,
 	}, {
 		/* --- boards without eeprom + subsystem ID --- */
 		.vendor       = PCI_VENDOR_ID_PHILIPS,
@@ -6673,6 +6697,11 @@ int saa7134_board_init1(struct saa7134_dev *dev)
 		saa_andorl(SAA7134_GPIO_GPMODE0 >> 2,   0x80040100, 0x80040100);
 		saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x80040100, 0x00040100);
 		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		dev->has_remote = SAA7134_REMOTE_GPIO;
+		saa_andorl(SAA7134_GPIO_GPMODE0 >> 2,   0x00008000, 0x00008000);
+		saa_andorl(SAA7134_GPIO_GPSTATUS0 >> 2, 0x00008000, 0x00008000);
+		break;
 	}
 	return 0;
 }
diff --git a/drivers/media/video/saa7134/saa7134-dvb.c b/drivers/media/video/saa7134/saa7134-dvb.c
index 2ac3f1f2f53e..ebde21dba7e3 100644
--- a/drivers/media/video/saa7134/saa7134-dvb.c
+++ b/drivers/media/video/saa7134/saa7134-dvb.c
@@ -56,6 +56,7 @@
 #include "zl10353.h"
 
 #include "zl10036.h"
+#include "zl10039.h"
 #include "mt312.h"
 
 MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
@@ -968,6 +969,10 @@ static struct zl10036_config avertv_a700_tuner = {
 	.tuner_address = 0x60,
 };
 
+static struct mt312_config zl10313_compro_s350_config = {
+	.demod_address = 0x0e,
+};
+
 static struct lgdt3305_config hcw_lgdt3305_config = {
 	.i2c_addr           = 0x0e,
 	.mpeg_mode          = LGDT3305_MPEG_SERIAL,
@@ -1472,6 +1477,16 @@ static int dvb_init(struct saa7134_dev *dev)
 					__func__);
 			}
 		}
+		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		fe0->dvb.frontend = dvb_attach(mt312_attach,
+				&zl10313_compro_s350_config, &dev->i2c_adap);
+		if (fe0->dvb.frontend)
+			if (dvb_attach(zl10039_attach, fe0->dvb.frontend,
+					0x60, &dev->i2c_adap) == NULL)
+				wprintk("%s: No zl10039 found!\n",
+					__func__);
+
 		break;
 	default:
 		wprintk("Huh? unknown DVB card?\n");
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 6e219c2db841..059661fd1634 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -605,6 +605,11 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		mask_keycode = 0x7f;
 		polling = 40; /* ms */
 		break;
+	case SAA7134_BOARD_VIDEOMATE_S350:
+		ir_codes     = ir_codes_videomate_s350;
+		mask_keycode = 0x003f00;
+		mask_keydown = 0x040000;
+		break;
 	}
 	if (NULL == ir_codes) {
 		printk("%s: Oops: IR config error [card=%d]\n",
diff --git a/drivers/media/video/saa7134/saa7134.h b/drivers/media/video/saa7134/saa7134.h
index ab28a691e09a..de9a7dd6508a 100644
--- a/drivers/media/video/saa7134/saa7134.h
+++ b/drivers/media/video/saa7134/saa7134.h
@@ -292,6 +292,7 @@ struct saa7134_format {
 #define SAA7134_BOARD_BEHOLD_607RDS_MK5     166
 #define SAA7134_BOARD_BEHOLD_609RDS_MK3     167
 #define SAA7134_BOARD_BEHOLD_609RDS_MK5     168
+#define SAA7134_BOARD_VIDEOMATE_S350        169
 
 #define SAA7134_MAXBOARDS 32
 #define SAA7134_INPUT_MAX 8
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 922bad37deec..8a607db492a5 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -164,6 +164,7 @@ extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From 6b5a9492ca0c991bab1ac495624e17520e9edf18 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 11 Aug 2009 18:47:18 -0300
Subject: V4L/DVB (12543): v4l: introduce string control support.

The upcoming RDS encoder needs support for string controls. This patch
implements the core implementation.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-common.c         |  2 +
 drivers/media/video/v4l2-compat-ioctl32.c | 65 ++++++++++++++++++++++---------
 drivers/media/video/v4l2-ioctl.c          | 10 ++---
 include/linux/videodev2.h                 |  6 ++-
 4 files changed, 57 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index b91d66a767d7..536150c34067 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -156,6 +156,8 @@ int v4l2_ctrl_check(struct v4l2_ext_control *ctrl, struct v4l2_queryctrl *qctrl,
 		return -EINVAL;
 	if (qctrl->flags & V4L2_CTRL_FLAG_GRABBED)
 		return -EBUSY;
+	if (qctrl->type == V4L2_CTRL_TYPE_STRING)
+		return 0;
 	if (qctrl->type == V4L2_CTRL_TYPE_BUTTON ||
 	    qctrl->type == V4L2_CTRL_TYPE_INTEGER64 ||
 	    qctrl->type == V4L2_CTRL_TYPE_CTRL_CLASS)
diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index 0056b115b42e..f788c41a3a5c 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -600,9 +600,35 @@ struct v4l2_ext_controls32 {
        compat_caddr_t controls; /* actually struct v4l2_ext_control32 * */
 };
 
+struct v4l2_ext_control32 {
+	__u32 id;
+	__u32 size;
+	__u32 reserved2[1];
+	union {
+		__s32 value;
+		__s64 value64;
+		compat_caddr_t string; /* actually char * */
+	};
+} __attribute__ ((packed));
+
+/* The following function really belong in v4l2-common, but that causes
+   a circular dependency between modules. We need to think about this, but
+   for now this will do. */
+
+/* Return non-zero if this control is a pointer type. Currently only
+ * type STRING is a pointer type.
+ *
+ * Note that there are currently no controls of this type, but at least the
+ * compat32 code is in place to properly handle such controls. Please
+ * remove this note once the first pointer controls are added. */
+static inline int ctrl_is_pointer(u32 id)
+{
+	return 0;
+}
+
 static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control32 __user *ucontrols;
 	struct v4l2_ext_control __user *kcontrols;
 	int n;
 	compat_caddr_t p;
@@ -626,15 +652,17 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 	kcontrols = compat_alloc_user_space(n * sizeof(struct v4l2_ext_control));
 	kp->controls = kcontrols;
 	while (--n >= 0) {
-		if (copy_in_user(&kcontrols->id, &ucontrols->id, sizeof(__u32)))
-			return -EFAULT;
-		if (copy_in_user(&kcontrols->reserved2, &ucontrols->reserved2, sizeof(ucontrols->reserved2)))
-			return -EFAULT;
-		/* Note: if the void * part of the union ever becomes relevant
-		   then we need to know the type of the control in order to do
-		   the right thing here. Luckily, that is not yet an issue. */
-		if (copy_in_user(&kcontrols->value, &ucontrols->value, sizeof(ucontrols->value)))
+		if (copy_in_user(kcontrols, ucontrols, sizeof(*kcontrols)))
 			return -EFAULT;
+		if (ctrl_is_pointer(kcontrols->id)) {
+			void __user *s;
+
+			if (get_user(p, &ucontrols->string))
+				return -EFAULT;
+			s = compat_ptr(p);
+			if (put_user(s, &kcontrols->string))
+				return -EFAULT;
+		}
 		ucontrols++;
 		kcontrols++;
 	}
@@ -643,7 +671,7 @@ static int get_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 
 static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext_controls32 __user *up)
 {
-	struct v4l2_ext_control __user *ucontrols;
+	struct v4l2_ext_control32 __user *ucontrols;
 	struct v4l2_ext_control __user *kcontrols = kp->controls;
 	int n = kp->count;
 	compat_caddr_t p;
@@ -664,15 +692,14 @@ static int put_v4l2_ext_controls32(struct v4l2_ext_controls *kp, struct v4l2_ext
 		return -EFAULT;
 
 	while (--n >= 0) {
-		if (copy_in_user(&ucontrols->id, &kcontrols->id, sizeof(__u32)))
-			return -EFAULT;
-		if (copy_in_user(&ucontrols->reserved2, &kcontrols->reserved2,
-					sizeof(ucontrols->reserved2)))
-			return -EFAULT;
-		/* Note: if the void * part of the union ever becomes relevant
-		   then we need to know the type of the control in order to do
-		   the right thing here. Luckily, that is not yet an issue. */
-		if (copy_in_user(&ucontrols->value, &kcontrols->value, sizeof(ucontrols->value)))
+		unsigned size = sizeof(*ucontrols);
+
+		/* Do not modify the pointer when copying a pointer control.
+		   The contents of the pointer was changed, not the pointer
+		   itself. */
+		if (ctrl_is_pointer(kcontrols->id))
+			size -= sizeof(ucontrols->value64);
+		if (copy_in_user(ucontrols, kcontrols, size))
 			return -EFAULT;
 		ucontrols++;
 		kcontrols++;
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index c32e67608ad4..30cc3347ae52 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -513,11 +513,12 @@ static inline void v4l_print_ext_ctrls(unsigned int cmd,
 	dbgarg(cmd, "");
 	printk(KERN_CONT "class=0x%x", c->ctrl_class);
 	for (i = 0; i < c->count; i++) {
-		if (show_vals)
+		if (show_vals && !c->controls[i].size)
 			printk(KERN_CONT " id/val=0x%x/0x%x",
 				c->controls[i].id, c->controls[i].value);
 		else
-			printk(KERN_CONT " id=0x%x", c->controls[i].id);
+			printk(KERN_CONT " id=0x%x,size=%u",
+				c->controls[i].id, c->controls[i].size);
 	}
 	printk(KERN_CONT "\n");
 };
@@ -528,10 +529,9 @@ static inline int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv)
 
 	/* zero the reserved fields */
 	c->reserved[0] = c->reserved[1] = 0;
-	for (i = 0; i < c->count; i++) {
+	for (i = 0; i < c->count; i++)
 		c->controls[i].reserved2[0] = 0;
-		c->controls[i].reserved2[1] = 0;
-	}
+
 	/* V4L2_CID_PRIVATE_BASE cannot be used as control class
 	   when using extended controls.
 	   Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 293ba181f95a..9ab4cbe676fd 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -167,6 +167,7 @@ enum v4l2_ctrl_type {
 	V4L2_CTRL_TYPE_BUTTON	     = 4,
 	V4L2_CTRL_TYPE_INTEGER64     = 5,
 	V4L2_CTRL_TYPE_CTRL_CLASS    = 6,
+	V4L2_CTRL_TYPE_STRING        = 7,
 };
 
 enum v4l2_tuner_type {
@@ -795,11 +796,12 @@ struct v4l2_control {
 
 struct v4l2_ext_control {
 	__u32 id;
-	__u32 reserved2[2];
+	__u32 size;
+	__u32 reserved2[1];
 	union {
 		__s32 value;
 		__s64 value64;
-		void *reserved;
+		char *string;
 	};
 } __attribute__ ((packed));
 
-- 
cgit v1.2.3


From d6bacea6cfc3bb8a385f8c4104ee6b3d5768af8a Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:34:18 -0300
Subject: V4L/DVB (12547): v4l2-subdev.h: Add g/s_modulator callbacks to subdev
 api

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/v4l2-subdev.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h
index 89a39ce17294..d411345f244b 100644
--- a/include/media/v4l2-subdev.h
+++ b/include/media/v4l2-subdev.h
@@ -137,6 +137,8 @@ struct v4l2_subdev_tuner_ops {
 	int (*g_frequency)(struct v4l2_subdev *sd, struct v4l2_frequency *freq);
 	int (*g_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
 	int (*s_tuner)(struct v4l2_subdev *sd, struct v4l2_tuner *vt);
+	int (*g_modulator)(struct v4l2_subdev *sd, struct v4l2_modulator *vm);
+	int (*s_modulator)(struct v4l2_subdev *sd, struct v4l2_modulator *vm);
 	int (*s_type_addr)(struct v4l2_subdev *sd, struct tuner_setup *type);
 	int (*s_config)(struct v4l2_subdev *sd, const struct v4l2_priv_tun_config *config);
 	int (*s_standby)(struct v4l2_subdev *sd);
-- 
cgit v1.2.3


From 6b4249413abb634d9b2ff44c685da744f02e49d1 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:38:58 -0300
Subject: V4L/DVB (12548): v4l2: video device: Add V4L2_CTRL_CLASS_FM_TX
 controls

This patch adds a new class of extended controls. This class
is intended to support FM Radio Modulators properties such as:
rds, audio limiters, audio compression, pilot tone generation,
tuning power levels and preemphasis properties.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 9ab4cbe676fd..5758066cb57b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -817,6 +817,7 @@ struct v4l2_ext_controls {
 #define V4L2_CTRL_CLASS_USER 0x00980000	/* Old-style 'user' controls */
 #define V4L2_CTRL_CLASS_MPEG 0x00990000	/* MPEG-compression controls */
 #define V4L2_CTRL_CLASS_CAMERA 0x009a0000	/* Camera class controls */
+#define V4L2_CTRL_CLASS_FM_TX 0x009b0000	/* FM Modulator control class */
 
 #define V4L2_CTRL_ID_MASK      	  (0x0fffffff)
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
@@ -1156,6 +1157,39 @@ enum  v4l2_exposure_auto_type {
 
 #define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
 
+/* FM Modulator class control IDs */
+#define V4L2_CID_FM_TX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_TX | 0x900)
+#define V4L2_CID_FM_TX_CLASS			(V4L2_CTRL_CLASS_FM_TX | 1)
+
+#define V4L2_CID_RDS_TX_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 1)
+#define V4L2_CID_RDS_TX_PI			(V4L2_CID_FM_TX_CLASS_BASE + 2)
+#define V4L2_CID_RDS_TX_PTY			(V4L2_CID_FM_TX_CLASS_BASE + 3)
+#define V4L2_CID_RDS_TX_PS_NAME			(V4L2_CID_FM_TX_CLASS_BASE + 5)
+#define V4L2_CID_RDS_TX_RADIO_TEXT		(V4L2_CID_FM_TX_CLASS_BASE + 6)
+
+#define V4L2_CID_AUDIO_LIMITER_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 64)
+#define V4L2_CID_AUDIO_LIMITER_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 65)
+#define V4L2_CID_AUDIO_LIMITER_DEVIATION	(V4L2_CID_FM_TX_CLASS_BASE + 66)
+
+#define V4L2_CID_AUDIO_COMPRESSION_ENABLED	(V4L2_CID_FM_TX_CLASS_BASE + 80)
+#define V4L2_CID_AUDIO_COMPRESSION_GAIN		(V4L2_CID_FM_TX_CLASS_BASE + 81)
+#define V4L2_CID_AUDIO_COMPRESSION_THRESHOLD	(V4L2_CID_FM_TX_CLASS_BASE + 82)
+#define V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 83)
+#define V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME	(V4L2_CID_FM_TX_CLASS_BASE + 84)
+
+#define V4L2_CID_PILOT_TONE_ENABLED		(V4L2_CID_FM_TX_CLASS_BASE + 96)
+#define V4L2_CID_PILOT_TONE_DEVIATION		(V4L2_CID_FM_TX_CLASS_BASE + 97)
+#define V4L2_CID_PILOT_TONE_FREQUENCY		(V4L2_CID_FM_TX_CLASS_BASE + 98)
+
+#define V4L2_CID_TUNE_PREEMPHASIS		(V4L2_CID_FM_TX_CLASS_BASE + 112)
+enum v4l2_preemphasis {
+	V4L2_PREEMPHASIS_DISABLED	= 0,
+	V4L2_PREEMPHASIS_50_uS		= 1,
+	V4L2_PREEMPHASIS_75_uS		= 2,
+};
+#define V4L2_CID_TUNE_POWER_LEVEL		(V4L2_CID_FM_TX_CLASS_BASE + 113)
+#define V4L2_CID_TUNE_ANTENNA_CAPACITOR		(V4L2_CID_FM_TX_CLASS_BASE + 114)
+
 /*
  *	T U N I N G
  */
-- 
cgit v1.2.3


From 1fd2121c08eeef2e9a792719628a467e0fe97b96 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:45:49 -0300
Subject: V4L/DVB (12551): FM TX: si4713: Add files to add radio interface for
 si4713

This patch adds files which creates the radio interface
for si4713 FM transmitter (modulator) devices.

In order to do the real access to device registers, this
driver uses the v4l2 subdev interface exported by si4713 i2c driver.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/radio/radio-si4713.c | 367 +++++++++++++++++++++++++++++++++++++
 include/media/radio-si4713.h       |  30 +++
 2 files changed, 397 insertions(+)
 create mode 100644 drivers/media/radio/radio-si4713.c
 create mode 100644 include/media/radio-si4713.h

(limited to 'include')

diff --git a/drivers/media/radio/radio-si4713.c b/drivers/media/radio/radio-si4713.c
new file mode 100644
index 000000000000..65c14b704586
--- /dev/null
+++ b/drivers/media/radio/radio-si4713.c
@@ -0,0 +1,367 @@
+/*
+ * drivers/media/radio/radio-si4713.c
+ *
+ * Platform Driver for Silicon Labs Si4713 FM Radio Transmitter:
+ *
+ * Copyright (c) 2008 Instituto Nokia de Tecnologia - INdT
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/version.h>
+#include <linux/platform_device.h>
+#include <linux/i2c.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-ioctl.h>
+#include <media/radio-si4713.h>
+
+/* module parameters */
+static int radio_nr = -1;	/* radio device minor (-1 ==> auto assign) */
+module_param(radio_nr, int, 0);
+MODULE_PARM_DESC(radio_nr,
+		 "Minor number for radio device (-1 ==> auto assign)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eduardo Valentin <eduardo.valentin@nokia.com>");
+MODULE_DESCRIPTION("Platform driver for Si4713 FM Radio Transmitter");
+MODULE_VERSION("0.0.1");
+
+/* Driver state struct */
+struct radio_si4713_device {
+	struct v4l2_device		v4l2_dev;
+	struct video_device		*radio_dev;
+};
+
+/* radio_si4713_fops - file operations interface */
+static const struct v4l2_file_operations radio_si4713_fops = {
+	.owner		= THIS_MODULE,
+	.ioctl		= video_ioctl2,
+};
+
+/* Video4Linux Interface */
+static int radio_si4713_fill_audout(struct v4l2_audioout *vao)
+{
+	/* TODO: check presence of audio output */
+	strlcpy(vao->name, "FM Modulator Audio Out", 32);
+
+	return 0;
+}
+
+static int radio_si4713_enumaudout(struct file *file, void *priv,
+						struct v4l2_audioout *vao)
+{
+	return radio_si4713_fill_audout(vao);
+}
+
+static int radio_si4713_g_audout(struct file *file, void *priv,
+					struct v4l2_audioout *vao)
+{
+	int rval = radio_si4713_fill_audout(vao);
+
+	vao->index = 0;
+
+	return rval;
+}
+
+static int radio_si4713_s_audout(struct file *file, void *priv,
+					struct v4l2_audioout *vao)
+{
+	return vao->index ? -EINVAL : 0;
+}
+
+/* radio_si4713_querycap - query device capabilities */
+static int radio_si4713_querycap(struct file *file, void *priv,
+					struct v4l2_capability *capability)
+{
+	struct radio_si4713_device *rsdev;
+
+	rsdev = video_get_drvdata(video_devdata(file));
+
+	strlcpy(capability->driver, "radio-si4713", sizeof(capability->driver));
+	strlcpy(capability->card, "Silicon Labs Si4713 Modulator",
+				sizeof(capability->card));
+	capability->capabilities = V4L2_CAP_MODULATOR | V4L2_CAP_RDS_OUTPUT;
+
+	return 0;
+}
+
+/* radio_si4713_queryctrl - enumerate control items */
+static int radio_si4713_queryctrl(struct file *file, void *priv,
+						struct v4l2_queryctrl *qc)
+{
+	/* Must be sorted from low to high control ID! */
+	static const u32 user_ctrls[] = {
+		V4L2_CID_USER_CLASS,
+		V4L2_CID_AUDIO_MUTE,
+		0
+	};
+
+	/* Must be sorted from low to high control ID! */
+	static const u32 fmtx_ctrls[] = {
+		V4L2_CID_FM_TX_CLASS,
+		V4L2_CID_RDS_TX_DEVIATION,
+		V4L2_CID_RDS_TX_PI,
+		V4L2_CID_RDS_TX_PTY,
+		V4L2_CID_RDS_TX_PS_NAME,
+		V4L2_CID_RDS_TX_RADIO_TEXT,
+		V4L2_CID_AUDIO_LIMITER_ENABLED,
+		V4L2_CID_AUDIO_LIMITER_RELEASE_TIME,
+		V4L2_CID_AUDIO_LIMITER_DEVIATION,
+		V4L2_CID_AUDIO_COMPRESSION_ENABLED,
+		V4L2_CID_AUDIO_COMPRESSION_GAIN,
+		V4L2_CID_AUDIO_COMPRESSION_THRESHOLD,
+		V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME,
+		V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME,
+		V4L2_CID_PILOT_TONE_ENABLED,
+		V4L2_CID_PILOT_TONE_DEVIATION,
+		V4L2_CID_PILOT_TONE_FREQUENCY,
+		V4L2_CID_TUNE_PREEMPHASIS,
+		V4L2_CID_TUNE_POWER_LEVEL,
+		V4L2_CID_TUNE_ANTENNA_CAPACITOR,
+		0
+	};
+	static const u32 *ctrl_classes[] = {
+		user_ctrls,
+		fmtx_ctrls,
+		NULL
+	};
+	struct radio_si4713_device *rsdev;
+
+	rsdev = video_get_drvdata(video_devdata(file));
+
+	qc->id = v4l2_ctrl_next(ctrl_classes, qc->id);
+	if (qc->id == 0)
+		return -EINVAL;
+
+	if (qc->id == V4L2_CID_USER_CLASS || qc->id == V4L2_CID_FM_TX_CLASS)
+		return v4l2_ctrl_query_fill(qc, 0, 0, 0, 0);
+
+	return v4l2_device_call_until_err(&rsdev->v4l2_dev, 0, core,
+						queryctrl, qc);
+}
+
+/*
+ * v4l2 ioctl call backs.
+ * we are just a wrapper for v4l2_sub_devs.
+ */
+static inline struct v4l2_device *get_v4l2_dev(struct file *file)
+{
+	return &((struct radio_si4713_device *)video_drvdata(file))->v4l2_dev;
+}
+
+static int radio_si4713_g_ext_ctrls(struct file *file, void *p,
+						struct v4l2_ext_controls *vecs)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							g_ext_ctrls, vecs);
+}
+
+static int radio_si4713_s_ext_ctrls(struct file *file, void *p,
+						struct v4l2_ext_controls *vecs)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							s_ext_ctrls, vecs);
+}
+
+static int radio_si4713_g_ctrl(struct file *file, void *p,
+						struct v4l2_control *vc)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							g_ctrl, vc);
+}
+
+static int radio_si4713_s_ctrl(struct file *file, void *p,
+						struct v4l2_control *vc)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							s_ctrl, vc);
+}
+
+static int radio_si4713_g_modulator(struct file *file, void *p,
+						struct v4l2_modulator *vm)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							g_modulator, vm);
+}
+
+static int radio_si4713_s_modulator(struct file *file, void *p,
+						struct v4l2_modulator *vm)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							s_modulator, vm);
+}
+
+static int radio_si4713_g_frequency(struct file *file, void *p,
+						struct v4l2_frequency *vf)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							g_frequency, vf);
+}
+
+static int radio_si4713_s_frequency(struct file *file, void *p,
+						struct v4l2_frequency *vf)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, tuner,
+							s_frequency, vf);
+}
+
+static long radio_si4713_default(struct file *file, void *p, int cmd, void *arg)
+{
+	return v4l2_device_call_until_err(get_v4l2_dev(file), 0, core,
+							ioctl, cmd, arg);
+}
+
+static struct v4l2_ioctl_ops radio_si4713_ioctl_ops = {
+	.vidioc_enumaudout	= radio_si4713_enumaudout,
+	.vidioc_g_audout	= radio_si4713_g_audout,
+	.vidioc_s_audout	= radio_si4713_s_audout,
+	.vidioc_querycap	= radio_si4713_querycap,
+	.vidioc_queryctrl	= radio_si4713_queryctrl,
+	.vidioc_g_ext_ctrls	= radio_si4713_g_ext_ctrls,
+	.vidioc_s_ext_ctrls	= radio_si4713_s_ext_ctrls,
+	.vidioc_g_ctrl		= radio_si4713_g_ctrl,
+	.vidioc_s_ctrl		= radio_si4713_s_ctrl,
+	.vidioc_g_modulator	= radio_si4713_g_modulator,
+	.vidioc_s_modulator	= radio_si4713_s_modulator,
+	.vidioc_g_frequency	= radio_si4713_g_frequency,
+	.vidioc_s_frequency	= radio_si4713_s_frequency,
+	.vidioc_default		= radio_si4713_default,
+};
+
+/* radio_si4713_vdev_template - video device interface */
+static struct video_device radio_si4713_vdev_template = {
+	.fops			= &radio_si4713_fops,
+	.name			= "radio-si4713",
+	.release		= video_device_release,
+	.ioctl_ops		= &radio_si4713_ioctl_ops,
+};
+
+/* Platform driver interface */
+/* radio_si4713_pdriver_probe - probe for the device */
+static int radio_si4713_pdriver_probe(struct platform_device *pdev)
+{
+	struct radio_si4713_platform_data *pdata = pdev->dev.platform_data;
+	struct radio_si4713_device *rsdev;
+	struct i2c_adapter *adapter;
+	struct v4l2_subdev *sd;
+	int rval = 0;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "Cannot proceed without platform data.\n");
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	rsdev = kzalloc(sizeof *rsdev, GFP_KERNEL);
+	if (!rsdev) {
+		dev_err(&pdev->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto exit;
+	}
+
+	rval = v4l2_device_register(&pdev->dev, &rsdev->v4l2_dev);
+	if (rval) {
+		dev_err(&pdev->dev, "Failed to register v4l2 device.\n");
+		goto free_rsdev;
+	}
+
+	adapter = i2c_get_adapter(pdata->i2c_bus);
+	if (!adapter) {
+		dev_err(&pdev->dev, "Cannot get i2c adapter %d\n",
+							pdata->i2c_bus);
+		rval = -ENODEV;
+		goto unregister_v4l2_dev;
+	}
+
+	sd = v4l2_i2c_new_subdev_board(&rsdev->v4l2_dev, adapter, "si4713_i2c",
+					pdata->subdev_board_info, NULL);
+	if (!sd) {
+		dev_err(&pdev->dev, "Cannot get v4l2 subdevice\n");
+		rval = -ENODEV;
+		goto unregister_v4l2_dev;
+	}
+
+	rsdev->radio_dev = video_device_alloc();
+	if (!rsdev->radio_dev) {
+		dev_err(&pdev->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto unregister_v4l2_dev;
+	}
+
+	memcpy(rsdev->radio_dev, &radio_si4713_vdev_template,
+			sizeof(radio_si4713_vdev_template));
+	video_set_drvdata(rsdev->radio_dev, rsdev);
+	if (video_register_device(rsdev->radio_dev, VFL_TYPE_RADIO, radio_nr)) {
+		dev_err(&pdev->dev, "Could not register video device.\n");
+		rval = -EIO;
+		goto free_vdev;
+	}
+	dev_info(&pdev->dev, "New device successfully probed\n");
+
+	goto exit;
+
+free_vdev:
+	video_device_release(rsdev->radio_dev);
+unregister_v4l2_dev:
+	v4l2_device_unregister(&rsdev->v4l2_dev);
+free_rsdev:
+	kfree(rsdev);
+exit:
+	return rval;
+}
+
+/* radio_si4713_pdriver_remove - remove the device */
+static int __exit radio_si4713_pdriver_remove(struct platform_device *pdev)
+{
+	struct v4l2_device *v4l2_dev = platform_get_drvdata(pdev);
+	struct radio_si4713_device *rsdev = container_of(v4l2_dev,
+						struct radio_si4713_device,
+						v4l2_dev);
+
+	video_unregister_device(rsdev->radio_dev);
+	v4l2_device_unregister(&rsdev->v4l2_dev);
+	kfree(rsdev);
+
+	return 0;
+}
+
+static struct platform_driver radio_si4713_pdriver = {
+	.driver		= {
+		.name	= "radio-si4713",
+	},
+	.probe		= radio_si4713_pdriver_probe,
+	.remove         = __exit_p(radio_si4713_pdriver_remove),
+};
+
+/* Module Interface */
+static int __init radio_si4713_module_init(void)
+{
+	return platform_driver_register(&radio_si4713_pdriver);
+}
+
+static void __exit radio_si4713_module_exit(void)
+{
+	platform_driver_unregister(&radio_si4713_pdriver);
+}
+
+module_init(radio_si4713_module_init);
+module_exit(radio_si4713_module_exit);
+
diff --git a/include/media/radio-si4713.h b/include/media/radio-si4713.h
new file mode 100644
index 000000000000..f6aae29c7741
--- /dev/null
+++ b/include/media/radio-si4713.h
@@ -0,0 +1,30 @@
+/*
+ * include/media/radio-si4713.h
+ *
+ * Board related data definitions for Si4713 radio transmitter chip.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef RADIO_SI4713_H
+#define RADIO_SI4713_H
+
+#include <linux/i2c.h>
+
+#define SI4713_NAME "radio-si4713"
+
+/*
+ * Platform dependent definition
+ */
+struct radio_si4713_platform_data {
+	int i2c_bus;
+	struct i2c_board_info *subdev_board_info;
+};
+
+#endif /* ifndef RADIO_SI4713_H*/
-- 
cgit v1.2.3


From 02bee89e79b1302776e32214b8ca96a00c70c446 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@nokia.com>
Date: Sat, 8 Aug 2009 08:46:53 -0300
Subject: V4L/DVB (12552): FM TX: si4713: Add files to handle si4713 i2c device

This patch adds files to control si4713 devices.
Internal functions to control device properties
and initialization procedures are into these files.
Also, a v4l2 subdev interface is also exported.
This way other drivers can use this as v4l2 i2c subdevice.

Signed-off-by: Eduardo Valentin <eduardo.valentin@nokia.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/radio/si4713-i2c.c | 2067 ++++++++++++++++++++++++++++++++++++++
 drivers/media/radio/si4713-i2c.h |  237 +++++
 include/media/si4713.h           |   49 +
 3 files changed, 2353 insertions(+)
 create mode 100644 drivers/media/radio/si4713-i2c.c
 create mode 100644 drivers/media/radio/si4713-i2c.h
 create mode 100644 include/media/si4713.h

(limited to 'include')

diff --git a/drivers/media/radio/si4713-i2c.c b/drivers/media/radio/si4713-i2c.c
new file mode 100644
index 000000000000..8cbbe48b01bd
--- /dev/null
+++ b/drivers/media/radio/si4713-i2c.c
@@ -0,0 +1,2067 @@
+/*
+ * drivers/media/radio/si4713-i2c.c
+ *
+ * Silicon Labs Si4713 FM Radio Transmitter I2C commands.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/mutex.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-common.h>
+
+#include "si4713-i2c.h"
+
+/* module parameters */
+static int debug;
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debug level (0 - 2)");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Eduardo Valentin <eduardo.valentin@nokia.com>");
+MODULE_DESCRIPTION("I2C driver for Si4713 FM Radio Transmitter");
+MODULE_VERSION("0.0.1");
+
+#define DEFAULT_RDS_PI			0x00
+#define DEFAULT_RDS_PTY			0x00
+#define DEFAULT_RDS_PS_NAME		""
+#define DEFAULT_RDS_RADIO_TEXT		DEFAULT_RDS_PS_NAME
+#define DEFAULT_RDS_DEVIATION		0x00C8
+#define DEFAULT_RDS_PS_REPEAT_COUNT	0x0003
+#define DEFAULT_LIMITER_RTIME		0x1392
+#define DEFAULT_LIMITER_DEV		0x102CA
+#define DEFAULT_PILOT_FREQUENCY 	0x4A38
+#define DEFAULT_PILOT_DEVIATION		0x1A5E
+#define DEFAULT_ACOMP_ATIME		0x0000
+#define DEFAULT_ACOMP_RTIME		0xF4240L
+#define DEFAULT_ACOMP_GAIN		0x0F
+#define DEFAULT_ACOMP_THRESHOLD 	(-0x28)
+#define DEFAULT_MUTE			0x01
+#define DEFAULT_POWER_LEVEL		88
+#define DEFAULT_FREQUENCY		8800
+#define DEFAULT_PREEMPHASIS		FMPE_EU
+#define DEFAULT_TUNE_RNL		0xFF
+
+#define to_si4713_device(sd)	container_of(sd, struct si4713_device, sd)
+
+/* frequency domain transformation (using times 10 to avoid floats) */
+#define FREQDEV_UNIT	100000
+#define FREQV4L2_MULTI	625
+#define si4713_to_v4l2(f)	((f * FREQDEV_UNIT) / FREQV4L2_MULTI)
+#define v4l2_to_si4713(f)	((f * FREQV4L2_MULTI) / FREQDEV_UNIT)
+#define FREQ_RANGE_LOW			7600
+#define FREQ_RANGE_HIGH			10800
+
+#define MAX_ARGS 7
+
+#define RDS_BLOCK			8
+#define RDS_BLOCK_CLEAR			0x03
+#define RDS_BLOCK_LOAD			0x04
+#define RDS_RADIOTEXT_2A		0x20
+#define RDS_RADIOTEXT_BLK_SIZE		4
+#define RDS_RADIOTEXT_INDEX_MAX		0x0F
+#define RDS_CARRIAGE_RETURN		0x0D
+
+#define rds_ps_nblocks(len)	((len / RDS_BLOCK) + (len % RDS_BLOCK ? 1 : 0))
+
+#define get_status_bit(p, b, m)	(((p) & (m)) >> (b))
+#define set_bits(p, v, b, m)	(((p) & ~(m)) | ((v) << (b)))
+
+#define ATTACK_TIME_UNIT	500
+
+#define POWER_OFF			0x00
+#define POWER_ON			0x01
+
+#define msb(x)                  ((u8)((u16) x >> 8))
+#define lsb(x)                  ((u8)((u16) x &  0x00FF))
+#define compose_u16(msb, lsb)	(((u16)msb << 8) | lsb)
+#define check_command_failed(status)	(!(status & SI4713_CTS) || \
+					(status & SI4713_ERR))
+/* mute definition */
+#define set_mute(p)	((p & 1) | ((p & 1) << 1));
+#define get_mute(p)	(p & 0x01)
+
+#ifdef DEBUG
+#define DBG_BUFFER(device, message, buffer, size)			\
+	{								\
+		int i;							\
+		char str[(size)*5];					\
+		for (i = 0; i < size; i++)				\
+			sprintf(str + i * 5, " 0x%02x", buffer[i]);	\
+		v4l2_dbg(2, debug, device, "%s:%s\n", message, str);	\
+	}
+#else
+#define DBG_BUFFER(device, message, buffer, size)
+#endif
+
+/*
+ * Values for limiter release time (sorted by second column)
+ *	device	release
+ *	value	time (us)
+ */
+static long limiter_times[] = {
+	2000,	250,
+	1000,	500,
+	510,	1000,
+	255,	2000,
+	170,	3000,
+	127,	4020,
+	102,	5010,
+	85,	6020,
+	73,	7010,
+	64,	7990,
+	57,	8970,
+	51,	10030,
+	25,	20470,
+	17,	30110,
+	13,	39380,
+	10,	51190,
+	8,	63690,
+	7,	73140,
+	6,	85330,
+	5,	102390,
+};
+
+/*
+ * Values for audio compression release time (sorted by second column)
+ *	device	release
+ *	value	time (us)
+ */
+static unsigned long acomp_rtimes[] = {
+	0,	100000,
+	1,	200000,
+	2,	350000,
+	3,	525000,
+	4,	1000000,
+};
+
+/*
+ * Values for preemphasis (sorted by second column)
+ *	device	preemphasis
+ *	value	value (v4l2)
+ */
+static unsigned long preemphasis_values[] = {
+	FMPE_DISABLED,	V4L2_PREEMPHASIS_DISABLED,
+	FMPE_EU,	V4L2_PREEMPHASIS_50_uS,
+	FMPE_USA,	V4L2_PREEMPHASIS_75_uS,
+};
+
+static int usecs_to_dev(unsigned long usecs, unsigned long const array[],
+			int size)
+{
+	int i;
+	int rval = -EINVAL;
+
+	for (i = 0; i < size / 2; i++)
+		if (array[(i * 2) + 1] >= usecs) {
+			rval = array[i * 2];
+			break;
+		}
+
+	return rval;
+}
+
+static unsigned long dev_to_usecs(int value, unsigned long const array[],
+			int size)
+{
+	int i;
+	int rval = -EINVAL;
+
+	for (i = 0; i < size / 2; i++)
+		if (array[i * 2] == value) {
+			rval = array[(i * 2) + 1];
+			break;
+		}
+
+	return rval;
+}
+
+/* si4713_handler: IRQ handler, just complete work */
+static irqreturn_t si4713_handler(int irq, void *dev)
+{
+	struct si4713_device *sdev = dev;
+
+	v4l2_dbg(2, debug, &sdev->sd,
+			"%s: sending signal to completion work.\n", __func__);
+	complete(&sdev->work);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * si4713_send_command - sends a command to si4713 and waits its response
+ * @sdev: si4713_device structure for the device we are communicating
+ * @command: command id
+ * @args: command arguments we are sending (up to 7)
+ * @argn: actual size of @args
+ * @response: buffer to place the expected response from the device (up to 15)
+ * @respn: actual size of @response
+ * @usecs: amount of time to wait before reading the response (in usecs)
+ */
+static int si4713_send_command(struct si4713_device *sdev, const u8 command,
+				const u8 args[], const int argn,
+				u8 response[], const int respn, const int usecs)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&sdev->sd);
+	u8 data1[MAX_ARGS + 1];
+	int err;
+
+	if (!client->adapter)
+		return -ENODEV;
+
+	/* First send the command and its arguments */
+	data1[0] = command;
+	memcpy(data1 + 1, args, argn);
+	DBG_BUFFER(&sdev->sd, "Parameters", data1, argn + 1);
+
+	err = i2c_master_send(client, data1, argn + 1);
+	if (err != argn + 1) {
+		v4l2_err(&sdev->sd, "Error while sending command 0x%02x\n",
+			command);
+		return (err > 0) ? -EIO : err;
+	}
+
+	/* Wait response from interrupt */
+	if (!wait_for_completion_timeout(&sdev->work,
+				usecs_to_jiffies(usecs) + 1))
+		v4l2_warn(&sdev->sd,
+				"(%s) Device took too much time to answer.\n",
+				__func__);
+
+	/* Then get the response */
+	err = i2c_master_recv(client, response, respn);
+	if (err != respn) {
+		v4l2_err(&sdev->sd,
+			"Error while reading response for command 0x%02x\n",
+			command);
+		return (err > 0) ? -EIO : err;
+	}
+
+	DBG_BUFFER(&sdev->sd, "Response", response, respn);
+	if (check_command_failed(response[0]))
+		return -EBUSY;
+
+	return 0;
+}
+
+/*
+ * si4713_read_property - reads a si4713 property
+ * @sdev: si4713_device structure for the device we are communicating
+ * @prop: property identification number
+ * @pv: property value to be returned on success
+ */
+static int si4713_read_property(struct si4713_device *sdev, u16 prop, u32 *pv)
+{
+	int err;
+	u8 val[SI4713_GET_PROP_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = property's MSB
+	 * 	.Third byte = property's LSB
+	 */
+	const u8 args[SI4713_GET_PROP_NARGS] = {
+		0x00,
+		msb(prop),
+		lsb(prop),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_GET_PROPERTY,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	*pv = compose_u16(val[2], val[3]);
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: property=0x%02x value=0x%02x status=0x%02x\n",
+			__func__, prop, *pv, val[0]);
+
+	return err;
+}
+
+/*
+ * si4713_write_property - modifies a si4713 property
+ * @sdev: si4713_device structure for the device we are communicating
+ * @prop: property identification number
+ * @val: new value for that property
+ */
+static int si4713_write_property(struct si4713_device *sdev, u16 prop, u16 val)
+{
+	int rval;
+	u8 resp[SI4713_SET_PROP_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = property's MSB
+	 * 	.Third byte = property's LSB
+	 * 	.Fourth byte = value's MSB
+	 * 	.Fifth byte = value's LSB
+	 */
+	const u8 args[SI4713_SET_PROP_NARGS] = {
+		0x00,
+		msb(prop),
+		lsb(prop),
+		msb(val),
+		lsb(val),
+	};
+
+	rval = si4713_send_command(sdev, SI4713_CMD_SET_PROPERTY,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (rval < 0)
+		return rval;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: property=0x%02x value=0x%02x status=0x%02x\n",
+			__func__, prop, val, resp[0]);
+
+	/*
+	 * As there is no command response for SET_PROPERTY,
+	 * wait Tcomp time to finish before proceed, in order
+	 * to have property properly set.
+	 */
+	msleep(TIMEOUT_SET_PROPERTY);
+
+	return rval;
+}
+
+/*
+ * si4713_powerup - Powers the device up
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_powerup(struct si4713_device *sdev)
+{
+	int err;
+	u8 resp[SI4713_PWUP_NRESP];
+	/*
+	 * 	.First byte = Enabled interrupts and boot function
+	 * 	.Second byte = Input operation mode
+	 */
+	const u8 args[SI4713_PWUP_NARGS] = {
+		SI4713_PWUP_CTSIEN | SI4713_PWUP_GPO2OEN | SI4713_PWUP_FUNC_TX,
+		SI4713_PWUP_OPMOD_ANALOG,
+	};
+
+	if (sdev->power_state)
+		return 0;
+
+	sdev->platform_data->set_power(1);
+	err = si4713_send_command(sdev, SI4713_CMD_POWER_UP,
+					args, ARRAY_SIZE(args),
+					resp, ARRAY_SIZE(resp),
+					TIMEOUT_POWER_UP);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd, "Powerup response: 0x%02x\n",
+				resp[0]);
+		v4l2_dbg(1, debug, &sdev->sd, "Device in power up mode\n");
+		sdev->power_state = POWER_ON;
+
+		err = si4713_write_property(sdev, SI4713_GPO_IEN,
+						SI4713_STC_INT | SI4713_CTS);
+	} else {
+		sdev->platform_data->set_power(0);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_powerdown - Powers the device down
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_powerdown(struct si4713_device *sdev)
+{
+	int err;
+	u8 resp[SI4713_PWDN_NRESP];
+
+	if (!sdev->power_state)
+		return 0;
+
+	err = si4713_send_command(sdev, SI4713_CMD_POWER_DOWN,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd, "Power down response: 0x%02x\n",
+				resp[0]);
+		v4l2_dbg(1, debug, &sdev->sd, "Device in reset mode\n");
+		sdev->platform_data->set_power(0);
+		sdev->power_state = POWER_OFF;
+	}
+
+	return err;
+}
+
+/*
+ * si4713_checkrev - Checks if we are treating a device with the correct rev.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_checkrev(struct si4713_device *sdev)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(&sdev->sd);
+	int rval;
+	u8 resp[SI4713_GETREV_NRESP];
+
+	mutex_lock(&sdev->mutex);
+
+	rval = si4713_send_command(sdev, SI4713_CMD_GET_REV,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (rval < 0)
+		goto unlock;
+
+	if (resp[1] == SI4713_PRODUCT_NUMBER) {
+		v4l2_info(&sdev->sd, "chip found @ 0x%02x (%s)\n",
+				client->addr << 1, client->adapter->name);
+	} else {
+		v4l2_err(&sdev->sd, "Invalid product number\n");
+		rval = -EINVAL;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/*
+ * si4713_wait_stc - Waits STC interrupt and clears status bits. Usefull
+ *		     for TX_TUNE_POWER, TX_TUNE_FREQ and TX_TUNE_MEAS
+ * @sdev: si4713_device structure for the device we are communicating
+ * @usecs: timeout to wait for STC interrupt signal
+ */
+static int si4713_wait_stc(struct si4713_device *sdev, const int usecs)
+{
+	int err;
+	u8 resp[SI4713_GET_STATUS_NRESP];
+
+	/* Wait response from STC interrupt */
+	if (!wait_for_completion_timeout(&sdev->work,
+			usecs_to_jiffies(usecs) + 1))
+		v4l2_warn(&sdev->sd,
+			"%s: device took too much time to answer (%d usec).\n",
+				__func__, usecs);
+
+	/* Clear status bits */
+	err = si4713_send_command(sdev, SI4713_CMD_GET_INT_STATUS,
+					NULL, 0,
+					resp, ARRAY_SIZE(resp),
+					DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		goto exit;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status bits: 0x%02x\n", __func__, resp[0]);
+
+	if (!(resp[0] & SI4713_STC_INT))
+		err = -EIO;
+
+exit:
+	return err;
+}
+
+/*
+ * si4713_tx_tune_freq - Sets the state of the RF carrier and sets the tuning
+ * 			frequency between 76 and 108 MHz in 10 kHz units and
+ * 			steps of 50 kHz.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @frequency: desired frequency (76 - 108 MHz, unit 10 KHz, step 50 kHz)
+ */
+static int si4713_tx_tune_freq(struct si4713_device *sdev, u16 frequency)
+{
+	int err;
+	u8 val[SI4713_TXFREQ_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = frequency's MSB
+	 * 	.Third byte = frequency's LSB
+	 */
+	const u8 args[SI4713_TXFREQ_NARGS] = {
+		0x00,
+		msb(frequency),
+		lsb(frequency),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_FREQ,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: frequency=0x%02x status=0x%02x\n", __func__,
+			frequency, val[0]);
+
+	err = si4713_wait_stc(sdev, TIMEOUT_TX_TUNE);
+	if (err < 0)
+		return err;
+
+	return compose_u16(args[1], args[2]);
+}
+
+/*
+ * si4713_tx_tune_power - Sets the RF voltage level between 88 and 115 dBuV in
+ * 			1 dB units. A value of 0x00 indicates off. The command
+ * 			also sets the antenna tuning capacitance. A value of 0
+ * 			indicates autotuning, and a value of 1 - 191 indicates
+ * 			a manual override, which results in a tuning
+ * 			capacitance of 0.25 pF x @antcap.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @power: tuning power (88 - 115 dBuV, unit/step 1 dB)
+ * @antcap: value of antenna tuning capacitor (0 - 191)
+ */
+static int si4713_tx_tune_power(struct si4713_device *sdev, u8 power,
+				u8 antcap)
+{
+	int err;
+	u8 val[SI4713_TXPWR_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = 0
+	 * 	.Third byte = power
+	 * 	.Fourth byte = antcap
+	 */
+	const u8 args[SI4713_TXPWR_NARGS] = {
+		0x00,
+		0x00,
+		power,
+		antcap,
+	};
+
+	if (((power > 0) && (power < SI4713_MIN_POWER)) ||
+		power > SI4713_MAX_POWER || antcap > SI4713_MAX_ANTCAP)
+		return -EDOM;
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_POWER,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: power=0x%02x antcap=0x%02x status=0x%02x\n",
+			__func__, power, antcap, val[0]);
+
+	return si4713_wait_stc(sdev, TIMEOUT_TX_TUNE_POWER);
+}
+
+/*
+ * si4713_tx_tune_measure - Enters receive mode and measures the received noise
+ * 			level in units of dBuV on the selected frequency.
+ * 			The Frequency must be between 76 and 108 MHz in 10 kHz
+ * 			units and steps of 50 kHz. The command also sets the
+ * 			antenna	tuning capacitance. A value of 0 means
+ * 			autotuning, and a value of 1 to 191 indicates manual
+ * 			override.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @frequency: desired frequency (76 - 108 MHz, unit 10 KHz, step 50 kHz)
+ * @antcap: value of antenna tuning capacitor (0 - 191)
+ */
+static int si4713_tx_tune_measure(struct si4713_device *sdev, u16 frequency,
+					u8 antcap)
+{
+	int err;
+	u8 val[SI4713_TXMEA_NRESP];
+	/*
+	 * 	.First byte = 0
+	 * 	.Second byte = frequency's MSB
+	 * 	.Third byte = frequency's LSB
+	 * 	.Fourth byte = antcap
+	 */
+	const u8 args[SI4713_TXMEA_NARGS] = {
+		0x00,
+		msb(frequency),
+		lsb(frequency),
+		antcap,
+	};
+
+	sdev->tune_rnl = DEFAULT_TUNE_RNL;
+
+	if (antcap > SI4713_MAX_ANTCAP)
+		return -EDOM;
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_MEASURE,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd,
+			"%s: frequency=0x%02x antcap=0x%02x status=0x%02x\n",
+			__func__, frequency, antcap, val[0]);
+
+	return si4713_wait_stc(sdev, TIMEOUT_TX_TUNE);
+}
+
+/*
+ * si4713_tx_tune_status- Returns the status of the tx_tune_freq, tx_tune_mea or
+ * 			tx_tune_power commands. This command return the current
+ * 			frequency, output voltage in dBuV, the antenna tunning
+ * 			capacitance value and the received noise level. The
+ * 			command also clears the stcint interrupt bit when the
+ * 			first bit of its arguments is high.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @intack: 0x01 to clear the seek/tune complete interrupt status indicator.
+ * @frequency: returned frequency
+ * @power: returned power
+ * @antcap: returned antenna capacitance
+ * @noise: returned noise level
+ */
+static int si4713_tx_tune_status(struct si4713_device *sdev, u8 intack,
+					u16 *frequency,	u8 *power,
+					u8 *antcap, u8 *noise)
+{
+	int err;
+	u8 val[SI4713_TXSTATUS_NRESP];
+	/*
+	 * 	.First byte = intack bit
+	 */
+	const u8 args[SI4713_TXSTATUS_NARGS] = {
+		intack & SI4713_INTACK_MASK,
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_TUNE_STATUS,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status=0x%02x\n", __func__, val[0]);
+		*frequency = compose_u16(val[2], val[3]);
+		sdev->frequency = *frequency;
+		*power = val[5];
+		*antcap = val[6];
+		*noise = val[7];
+		v4l2_dbg(1, debug, &sdev->sd, "%s: response: %d x 10 kHz "
+				"(power %d, antcap %d, rnl %d)\n", __func__,
+				*frequency, *power, *antcap, *noise);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_tx_rds_buff - Loads the RDS group buffer FIFO or circular buffer.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @mode: the buffer operation mode.
+ * @rdsb: RDS Block B
+ * @rdsc: RDS Block C
+ * @rdsd: RDS Block D
+ * @cbleft: returns the number of available circular buffer blocks minus the
+ *          number of used circular buffer blocks.
+ */
+static int si4713_tx_rds_buff(struct si4713_device *sdev, u8 mode, u16 rdsb,
+				u16 rdsc, u16 rdsd, s8 *cbleft)
+{
+	int err;
+	u8 val[SI4713_RDSBUFF_NRESP];
+
+	const u8 args[SI4713_RDSBUFF_NARGS] = {
+		mode & SI4713_RDSBUFF_MODE_MASK,
+		msb(rdsb),
+		lsb(rdsb),
+		msb(rdsc),
+		lsb(rdsc),
+		msb(rdsd),
+		lsb(rdsd),
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_RDS_BUFF,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (!err) {
+		v4l2_dbg(1, debug, &sdev->sd,
+			"%s: status=0x%02x\n", __func__, val[0]);
+		*cbleft = (s8)val[2] - val[3];
+		v4l2_dbg(1, debug, &sdev->sd, "%s: response: interrupts"
+				" 0x%02x cb avail: %d cb used %d fifo avail"
+				" %d fifo used %d\n", __func__, val[1],
+				val[2], val[3], val[4], val[5]);
+	}
+
+	return err;
+}
+
+/*
+ * si4713_tx_rds_ps - Loads the program service buffer.
+ * @sdev: si4713_device structure for the device we are communicating
+ * @psid: program service id to be loaded.
+ * @pschar: assumed 4 size char array to be loaded into the program service
+ */
+static int si4713_tx_rds_ps(struct si4713_device *sdev, u8 psid,
+				unsigned char *pschar)
+{
+	int err;
+	u8 val[SI4713_RDSPS_NRESP];
+
+	const u8 args[SI4713_RDSPS_NARGS] = {
+		psid & SI4713_RDSPS_PSID_MASK,
+		pschar[0],
+		pschar[1],
+		pschar[2],
+		pschar[3],
+	};
+
+	err = si4713_send_command(sdev, SI4713_CMD_TX_RDS_PS,
+				  args, ARRAY_SIZE(args), val,
+				  ARRAY_SIZE(val), DEFAULT_TIMEOUT);
+
+	if (err < 0)
+		return err;
+
+	v4l2_dbg(1, debug, &sdev->sd, "%s: status=0x%02x\n", __func__, val[0]);
+
+	return err;
+}
+
+static int si4713_set_power_state(struct si4713_device *sdev, u8 value)
+{
+	int rval;
+
+	mutex_lock(&sdev->mutex);
+
+	if (value)
+		rval = si4713_powerup(sdev);
+	else
+		rval = si4713_powerdown(sdev);
+
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_set_mute(struct si4713_device *sdev, u16 mute)
+{
+	int rval = 0;
+
+	mute = set_mute(mute);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state)
+		rval = si4713_write_property(sdev,
+				SI4713_TX_LINE_INPUT_MUTE, mute);
+
+	if (rval >= 0)
+		sdev->mute = get_mute(mute);
+
+	mutex_unlock(&sdev->mutex);
+
+	return rval;
+}
+
+static int si4713_set_rds_ps_name(struct si4713_device *sdev, char *ps_name)
+{
+	int rval = 0, i;
+	u8 len = 0;
+
+	/* We want to clear the whole thing */
+	if (!strlen(ps_name))
+		memset(ps_name, 0, MAX_RDS_PS_NAME + 1);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		/* Write the new ps name and clear the padding */
+		for (i = 0; i < MAX_RDS_PS_NAME; i += (RDS_BLOCK / 2)) {
+			rval = si4713_tx_rds_ps(sdev, (i / (RDS_BLOCK / 2)),
+						ps_name + i);
+			if (rval < 0)
+				goto unlock;
+		}
+
+		/* Setup the size to be sent */
+		if (strlen(ps_name))
+			len = strlen(ps_name) - 1;
+		else
+			len = 1;
+
+		rval = si4713_write_property(sdev,
+				SI4713_TX_RDS_PS_MESSAGE_COUNT,
+				rds_ps_nblocks(len));
+		if (rval < 0)
+			goto unlock;
+
+		rval = si4713_write_property(sdev,
+				SI4713_TX_RDS_PS_REPEAT_COUNT,
+				DEFAULT_RDS_PS_REPEAT_COUNT * 2);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	strncpy(sdev->rds_info.ps_name, ps_name, MAX_RDS_PS_NAME);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_set_rds_radio_text(struct si4713_device *sdev, char *rt)
+{
+	int rval = 0, i;
+	u16 t_index = 0;
+	u8 b_index = 0, cr_inserted = 0;
+	s8 left;
+
+	mutex_lock(&sdev->mutex);
+
+	if (!sdev->power_state)
+		goto copy;
+
+	rval = si4713_tx_rds_buff(sdev, RDS_BLOCK_CLEAR, 0, 0, 0, &left);
+	if (rval < 0)
+		goto unlock;
+
+	if (!strlen(rt))
+		goto copy;
+
+	do {
+		/* RDS spec says that if the last block isn't used,
+		 * then apply a carriage return
+		 */
+		if (t_index < (RDS_RADIOTEXT_INDEX_MAX *
+			RDS_RADIOTEXT_BLK_SIZE)) {
+			for (i = 0; i < RDS_RADIOTEXT_BLK_SIZE; i++) {
+				if (!rt[t_index + i] || rt[t_index + i] ==
+					RDS_CARRIAGE_RETURN) {
+					rt[t_index + i] = RDS_CARRIAGE_RETURN;
+					cr_inserted = 1;
+					break;
+				}
+			}
+		}
+
+		rval = si4713_tx_rds_buff(sdev, RDS_BLOCK_LOAD,
+				compose_u16(RDS_RADIOTEXT_2A, b_index++),
+				compose_u16(rt[t_index], rt[t_index + 1]),
+				compose_u16(rt[t_index + 2], rt[t_index + 3]),
+				&left);
+		if (rval < 0)
+			goto unlock;
+
+		t_index += RDS_RADIOTEXT_BLK_SIZE;
+
+		if (cr_inserted)
+			break;
+	} while (left > 0);
+
+copy:
+	strncpy(sdev->rds_info.radio_text, rt, MAX_RDS_RADIO_TEXT);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_choose_econtrol_action(struct si4713_device *sdev, u32 id,
+		u32 **shadow, s32 *bit, s32 *mask, u16 *property, int *mul,
+		unsigned long **table, int *size)
+{
+	s32 rval = 0;
+
+	switch (id) {
+	/* FM_TX class controls */
+	case V4L2_CID_RDS_TX_PI:
+		*property = SI4713_TX_RDS_PI;
+		*mul = 1;
+		*shadow = &sdev->rds_info.pi;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
+		*property = SI4713_TX_ACOMP_THRESHOLD;
+		*mul = 1;
+		*shadow = &sdev->acomp_info.threshold;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
+		*property = SI4713_TX_ACOMP_GAIN;
+		*mul = 1;
+		*shadow = &sdev->acomp_info.gain;
+		break;
+	case V4L2_CID_PILOT_TONE_FREQUENCY:
+		*property = SI4713_TX_PILOT_FREQUENCY;
+		*mul = 1;
+		*shadow = &sdev->pilot_info.frequency;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
+		*property = SI4713_TX_ACOMP_ATTACK_TIME;
+		*mul = ATTACK_TIME_UNIT;
+		*shadow = &sdev->acomp_info.attack_time;
+		break;
+	case V4L2_CID_PILOT_TONE_DEVIATION:
+		*property = SI4713_TX_PILOT_DEVIATION;
+		*mul = 10;
+		*shadow = &sdev->pilot_info.deviation;
+		break;
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
+		*property = SI4713_TX_AUDIO_DEVIATION;
+		*mul = 10;
+		*shadow = &sdev->limiter_info.deviation;
+		break;
+	case V4L2_CID_RDS_TX_DEVIATION:
+		*property = SI4713_TX_RDS_DEVIATION;
+		*mul = 1;
+		*shadow = &sdev->rds_info.deviation;
+		break;
+
+	case V4L2_CID_RDS_TX_PTY:
+		*property = SI4713_TX_RDS_PS_MISC;
+		*bit = 5;
+		*mask = 0x1F << 5;
+		*shadow = &sdev->rds_info.pty;
+		break;
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:
+		*property = SI4713_TX_ACOMP_ENABLE;
+		*bit = 1;
+		*mask = 1 << 1;
+		*shadow = &sdev->limiter_info.enabled;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
+		*property = SI4713_TX_ACOMP_ENABLE;
+		*bit = 0;
+		*mask = 1 << 0;
+		*shadow = &sdev->acomp_info.enabled;
+		break;
+	case V4L2_CID_PILOT_TONE_ENABLED:
+		*property = SI4713_TX_COMPONENT_ENABLE;
+		*bit = 0;
+		*mask = 1 << 0;
+		*shadow = &sdev->pilot_info.enabled;
+		break;
+
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
+		*property = SI4713_TX_LIMITER_RELEASE_TIME;
+		*table = limiter_times;
+		*size = ARRAY_SIZE(limiter_times);
+		*shadow = &sdev->limiter_info.release_time;
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
+		*property = SI4713_TX_ACOMP_RELEASE_TIME;
+		*table = acomp_rtimes;
+		*size = ARRAY_SIZE(acomp_rtimes);
+		*shadow = &sdev->acomp_info.release_time;
+		break;
+	case V4L2_CID_TUNE_PREEMPHASIS:
+		*property = SI4713_TX_PREEMPHASIS;
+		*table = preemphasis_values;
+		*size = ARRAY_SIZE(preemphasis_values);
+		*shadow = &sdev->preemphasis;
+		break;
+
+	default:
+		rval = -EINVAL;
+	};
+
+	return rval;
+}
+
+static int si4713_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc);
+
+/* write string property */
+static int si4713_write_econtrol_string(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	struct v4l2_queryctrl vqc;
+	int len;
+	s32 rval = 0;
+
+	vqc.id = control->id;
+	rval = si4713_queryctrl(&sdev->sd, &vqc);
+	if (rval < 0)
+		goto exit;
+
+	switch (control->id) {
+	case V4L2_CID_RDS_TX_PS_NAME: {
+		char ps_name[MAX_RDS_PS_NAME + 1];
+
+		len = control->size - 1;
+		if (len > MAX_RDS_PS_NAME) {
+			rval = -ERANGE;
+			goto exit;
+		}
+		rval = copy_from_user(ps_name, control->string, len);
+		if (rval < 0)
+			goto exit;
+		ps_name[len] = '\0';
+
+		if (strlen(ps_name) % vqc.step) {
+			rval = -ERANGE;
+			goto exit;
+		}
+
+		rval = si4713_set_rds_ps_name(sdev, ps_name);
+	}
+		break;
+
+	case V4L2_CID_RDS_TX_RADIO_TEXT: {
+		char radio_text[MAX_RDS_RADIO_TEXT + 1];
+
+		len = control->size - 1;
+		if (len > MAX_RDS_RADIO_TEXT) {
+			rval = -ERANGE;
+			goto exit;
+		}
+		rval = copy_from_user(radio_text, control->string, len);
+		if (rval < 0)
+			goto exit;
+		radio_text[len] = '\0';
+
+		if (strlen(radio_text) % vqc.step) {
+			rval = -ERANGE;
+			goto exit;
+		}
+
+		rval = si4713_set_rds_radio_text(sdev, radio_text);
+	}
+		break;
+
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+exit:
+	return rval;
+}
+
+static int validate_range(struct v4l2_subdev *sd,
+					struct v4l2_ext_control *control)
+{
+	struct v4l2_queryctrl vqc;
+	int rval;
+
+	vqc.id = control->id;
+	rval = si4713_queryctrl(sd, &vqc);
+	if (rval < 0)
+		goto exit;
+
+	if (control->value < vqc.minimum || control->value > vqc.maximum)
+		rval = -ERANGE;
+
+exit:
+	return rval;
+}
+
+/* properties which use tx_tune_power*/
+static int si4713_write_econtrol_tune(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+	u8 power, antcap;
+
+	rval = validate_range(&sdev->sd, control);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	switch (control->id) {
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		power = control->value;
+		antcap = sdev->antenna_capacitor;
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		power = sdev->power_level;
+		antcap = control->value;
+		break;
+	default:
+		rval = -EINVAL;
+		goto unlock;
+	};
+
+	if (sdev->power_state)
+		rval = si4713_tx_tune_power(sdev, power, antcap);
+
+	if (rval == 0) {
+		sdev->power_level = power;
+		sdev->antenna_capacitor = antcap;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+static int si4713_write_econtrol_integers(struct si4713_device *sdev,
+					struct v4l2_ext_control *control)
+{
+	s32 rval;
+	u32 *shadow = NULL, val = 0;
+	s32 bit = 0, mask = 0;
+	u16 property = 0;
+	int mul = 0;
+	unsigned long *table = NULL;
+	int size = 0;
+
+	rval = validate_range(&sdev->sd, control);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_choose_econtrol_action(sdev, control->id, &shadow, &bit,
+			&mask, &property, &mul, &table, &size);
+	if (rval < 0)
+		goto exit;
+
+	val = control->value;
+	if (mul) {
+		val = control->value / mul;
+	} else if (table) {
+		rval = usecs_to_dev(control->value, table, size);
+		if (rval < 0)
+			goto exit;
+		val = rval;
+		rval = 0;
+	}
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		if (mask) {
+			rval = si4713_read_property(sdev, property, &val);
+			if (rval < 0)
+				goto unlock;
+			val = set_bits(val, control->value, bit, mask);
+		}
+
+		rval = si4713_write_property(sdev, property, val);
+		if (rval < 0)
+			goto unlock;
+		if (mask)
+			val = control->value;
+	}
+
+	if (mul) {
+		*shadow = val * mul;
+	} else if (table) {
+		rval = dev_to_usecs(val, table, size);
+		if (rval < 0)
+			goto unlock;
+		*shadow = rval;
+		rval = 0;
+	} else {
+		*shadow = val;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+static int si4713_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f);
+static int si4713_s_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *);
+/*
+ * si4713_setup - Sets the device up with current configuration.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_setup(struct si4713_device *sdev)
+{
+	struct v4l2_ext_control ctrl;
+	struct v4l2_frequency f;
+	struct v4l2_modulator vm;
+	struct si4713_device *tmp;
+	int rval = 0;
+
+	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	/* Get a local copy to avoid race */
+	mutex_lock(&sdev->mutex);
+	memcpy(tmp, sdev, sizeof(*sdev));
+	mutex_unlock(&sdev->mutex);
+
+	ctrl.id = V4L2_CID_RDS_TX_PI;
+	ctrl.value = tmp->rds_info.pi;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_THRESHOLD;
+	ctrl.value = tmp->acomp_info.threshold;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_GAIN;
+	ctrl.value = tmp->acomp_info.gain;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_FREQUENCY;
+	ctrl.value = tmp->pilot_info.frequency;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME;
+	ctrl.value = tmp->acomp_info.attack_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_DEVIATION;
+	ctrl.value = tmp->pilot_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_DEVIATION;
+	ctrl.value = tmp->limiter_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_DEVIATION;
+	ctrl.value = tmp->rds_info.deviation;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_PTY;
+	ctrl.value = tmp->rds_info.pty;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_ENABLED;
+	ctrl.value = tmp->limiter_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_ENABLED;
+	ctrl.value = tmp->acomp_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_PILOT_TONE_ENABLED;
+	ctrl.value = tmp->pilot_info.enabled;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_LIMITER_RELEASE_TIME;
+	ctrl.value = tmp->limiter_info.release_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME;
+	ctrl.value = tmp->acomp_info.release_time;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_TUNE_PREEMPHASIS;
+	ctrl.value = tmp->preemphasis;
+	rval |= si4713_write_econtrol_integers(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_RDS_TX_PS_NAME;
+	rval |= si4713_set_rds_ps_name(sdev, tmp->rds_info.ps_name);
+
+	ctrl.id = V4L2_CID_RDS_TX_RADIO_TEXT;
+	rval |= si4713_set_rds_radio_text(sdev, tmp->rds_info.radio_text);
+
+	/* Device procedure needs to set frequency first */
+	f.frequency = tmp->frequency ? tmp->frequency : DEFAULT_FREQUENCY;
+	f.frequency = si4713_to_v4l2(f.frequency);
+	rval |= si4713_s_frequency(&sdev->sd, &f);
+
+	ctrl.id = V4L2_CID_TUNE_POWER_LEVEL;
+	ctrl.value = tmp->power_level;
+	rval |= si4713_write_econtrol_tune(sdev, &ctrl);
+
+	ctrl.id = V4L2_CID_TUNE_ANTENNA_CAPACITOR;
+	ctrl.value = tmp->antenna_capacitor;
+	rval |= si4713_write_econtrol_tune(sdev, &ctrl);
+
+	vm.index = 0;
+	if (tmp->stereo)
+		vm.txsubchans = V4L2_TUNER_SUB_STEREO;
+	else
+		vm.txsubchans = V4L2_TUNER_SUB_MONO;
+	if (tmp->rds_info.enabled)
+		vm.txsubchans |= V4L2_TUNER_SUB_RDS;
+	si4713_s_modulator(&sdev->sd, &vm);
+
+	kfree(tmp);
+
+	return rval;
+}
+
+/*
+ * si4713_initialize - Sets the device up with default configuration.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_initialize(struct si4713_device *sdev)
+{
+	int rval;
+
+	rval = si4713_set_power_state(sdev, POWER_ON);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_checkrev(sdev);
+	if (rval < 0)
+		goto exit;
+
+	rval = si4713_set_power_state(sdev, POWER_OFF);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	sdev->rds_info.pi = DEFAULT_RDS_PI;
+	sdev->rds_info.pty = DEFAULT_RDS_PTY;
+	sdev->rds_info.deviation = DEFAULT_RDS_DEVIATION;
+	strlcpy(sdev->rds_info.ps_name, DEFAULT_RDS_PS_NAME, MAX_RDS_PS_NAME);
+	strlcpy(sdev->rds_info.radio_text, DEFAULT_RDS_RADIO_TEXT,
+							MAX_RDS_RADIO_TEXT);
+	sdev->rds_info.enabled = 1;
+
+	sdev->limiter_info.release_time = DEFAULT_LIMITER_RTIME;
+	sdev->limiter_info.deviation = DEFAULT_LIMITER_DEV;
+	sdev->limiter_info.enabled = 1;
+
+	sdev->pilot_info.deviation = DEFAULT_PILOT_DEVIATION;
+	sdev->pilot_info.frequency = DEFAULT_PILOT_FREQUENCY;
+	sdev->pilot_info.enabled = 1;
+
+	sdev->acomp_info.release_time = DEFAULT_ACOMP_RTIME;
+	sdev->acomp_info.attack_time = DEFAULT_ACOMP_ATIME;
+	sdev->acomp_info.threshold = DEFAULT_ACOMP_THRESHOLD;
+	sdev->acomp_info.gain = DEFAULT_ACOMP_GAIN;
+	sdev->acomp_info.enabled = 1;
+
+	sdev->frequency = DEFAULT_FREQUENCY;
+	sdev->preemphasis = DEFAULT_PREEMPHASIS;
+	sdev->mute = DEFAULT_MUTE;
+	sdev->power_level = DEFAULT_POWER_LEVEL;
+	sdev->antenna_capacitor = 0;
+	sdev->stereo = 1;
+	sdev->tune_rnl = DEFAULT_TUNE_RNL;
+
+	mutex_unlock(&sdev->mutex);
+
+exit:
+	return rval;
+}
+
+/* read string property */
+static int si4713_read_econtrol_string(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+
+	switch (control->id) {
+	case V4L2_CID_RDS_TX_PS_NAME:
+		if (strlen(sdev->rds_info.ps_name) + 1 > control->size) {
+			control->size = MAX_RDS_PS_NAME + 1;
+			rval = -ENOSPC;
+			goto exit;
+		}
+		rval = copy_to_user(control->string, sdev->rds_info.ps_name,
+					strlen(sdev->rds_info.ps_name) + 1);
+		break;
+
+	case V4L2_CID_RDS_TX_RADIO_TEXT:
+		if (strlen(sdev->rds_info.radio_text) + 1 > control->size) {
+			control->size = MAX_RDS_RADIO_TEXT + 1;
+			rval = -ENOSPC;
+			goto exit;
+		}
+		rval = copy_to_user(control->string, sdev->rds_info.radio_text,
+					strlen(sdev->rds_info.radio_text) + 1);
+		break;
+
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+exit:
+	return rval;
+}
+
+/*
+ * si4713_update_tune_status - update properties from tx_tune_status
+ * command. Must be called with sdev->mutex held.
+ * @sdev: si4713_device structure for the device we are communicating
+ */
+static int si4713_update_tune_status(struct si4713_device *sdev)
+{
+	int rval;
+	u16 f = 0;
+	u8 p = 0, a = 0, n = 0;
+
+	rval = si4713_tx_tune_status(sdev, 0x00, &f, &p, &a, &n);
+
+	if (rval < 0)
+		goto exit;
+
+	sdev->power_level = p;
+	sdev->antenna_capacitor = a;
+	sdev->tune_rnl = n;
+
+exit:
+	return rval;
+}
+
+/* properties which use tx_tune_status */
+static int si4713_read_econtrol_tune(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval = 0;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_update_tune_status(sdev);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	switch (control->id) {
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		control->value = sdev->power_level;
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		control->value = sdev->antenna_capacitor;
+		break;
+	default:
+		rval = -EINVAL;
+	};
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static int si4713_read_econtrol_integers(struct si4713_device *sdev,
+				struct v4l2_ext_control *control)
+{
+	s32 rval;
+	u32 *shadow = NULL, val = 0;
+	s32 bit = 0, mask = 0;
+	u16 property = 0;
+	int mul = 0;
+	unsigned long *table = NULL;
+	int size = 0;
+
+	rval = si4713_choose_econtrol_action(sdev, control->id, &shadow, &bit,
+			&mask, &property, &mul, &table, &size);
+	if (rval < 0)
+		goto exit;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev, property, &val);
+		if (rval < 0)
+			goto unlock;
+
+		/* Keep negative values for threshold */
+		if (control->id == V4L2_CID_AUDIO_COMPRESSION_THRESHOLD)
+			*shadow = (s16)val;
+		else if (mask)
+			*shadow = get_status_bit(val, bit, mask);
+		else if (mul)
+			*shadow = val * mul;
+		else
+			*shadow = dev_to_usecs(val, table, size);
+	}
+
+	control->value = *shadow;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/*
+ * Video4Linux Subdev Interface
+ */
+/* si4713_s_ext_ctrls - set extended controls value */
+static int si4713_s_ext_ctrls(struct v4l2_subdev *sd,
+				struct v4l2_ext_controls *ctrls)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int i;
+
+	if (ctrls->ctrl_class != V4L2_CTRL_CLASS_FM_TX)
+		return -EINVAL;
+
+	for (i = 0; i < ctrls->count; i++) {
+		int err;
+
+		switch ((ctrls->controls + i)->id) {
+		case V4L2_CID_RDS_TX_PS_NAME:
+		case V4L2_CID_RDS_TX_RADIO_TEXT:
+			err = si4713_write_econtrol_string(sdev,
+							ctrls->controls + i);
+			break;
+		case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		case V4L2_CID_TUNE_POWER_LEVEL:
+			err = si4713_write_econtrol_tune(sdev,
+							ctrls->controls + i);
+			break;
+		default:
+			err = si4713_write_econtrol_integers(sdev,
+							ctrls->controls + i);
+		}
+
+		if (err < 0) {
+			ctrls->error_idx = i;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/* si4713_g_ext_ctrls - get extended controls value */
+static int si4713_g_ext_ctrls(struct v4l2_subdev *sd,
+				struct v4l2_ext_controls *ctrls)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int i;
+
+	if (ctrls->ctrl_class != V4L2_CTRL_CLASS_FM_TX)
+		return -EINVAL;
+
+	for (i = 0; i < ctrls->count; i++) {
+		int err;
+
+		switch ((ctrls->controls + i)->id) {
+		case V4L2_CID_RDS_TX_PS_NAME:
+		case V4L2_CID_RDS_TX_RADIO_TEXT:
+			err = si4713_read_econtrol_string(sdev,
+							ctrls->controls + i);
+			break;
+		case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		case V4L2_CID_TUNE_POWER_LEVEL:
+			err = si4713_read_econtrol_tune(sdev,
+							ctrls->controls + i);
+			break;
+		default:
+			err = si4713_read_econtrol_integers(sdev,
+							ctrls->controls + i);
+		}
+
+		if (err < 0) {
+			ctrls->error_idx = i;
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/* si4713_queryctrl - enumerate control items */
+static int si4713_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qc)
+{
+	int rval = 0;
+
+	switch (qc->id) {
+	/* User class controls */
+	case V4L2_CID_AUDIO_MUTE:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, DEFAULT_MUTE);
+		break;
+	/* FM_TX class controls */
+	case V4L2_CID_RDS_TX_PI:
+		rval = v4l2_ctrl_query_fill(qc, 0, 0xFFFF, 1, DEFAULT_RDS_PI);
+		break;
+	case V4L2_CID_RDS_TX_PTY:
+		rval = v4l2_ctrl_query_fill(qc, 0, 31, 1, DEFAULT_RDS_PTY);
+		break;
+	case V4L2_CID_RDS_TX_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_DEVIATION,
+						10, DEFAULT_RDS_DEVIATION);
+		break;
+	case V4L2_CID_RDS_TX_PS_NAME:
+		/*
+		 * Report step as 8. From RDS spec, psname
+		 * should be 8. But there are receivers which scroll strings
+		 * sized as 8xN.
+		 */
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_PS_NAME, 8, 0);
+		break;
+	case V4L2_CID_RDS_TX_RADIO_TEXT:
+		/*
+		 * Report step as 32 (2A block). From RDS spec,
+		 * radio text should be 32 for 2A block. But there are receivers
+		 * which scroll strings sized as 32xN. Setting default to 32.
+		 */
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_RDS_RADIO_TEXT, 32, 0);
+		break;
+
+	case V4L2_CID_AUDIO_LIMITER_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_AUDIO_LIMITER_RELEASE_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 250, MAX_LIMITER_RELEASE_TIME,
+						50, DEFAULT_LIMITER_RTIME);
+		break;
+	case V4L2_CID_AUDIO_LIMITER_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_LIMITER_DEVIATION,
+						10, DEFAULT_LIMITER_DEV);
+		break;
+
+	case V4L2_CID_AUDIO_COMPRESSION_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_GAIN:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_ACOMP_GAIN, 1,
+						DEFAULT_ACOMP_GAIN);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_THRESHOLD:
+		rval = v4l2_ctrl_query_fill(qc, MIN_ACOMP_THRESHOLD,
+						MAX_ACOMP_THRESHOLD, 1,
+						DEFAULT_ACOMP_THRESHOLD);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_ACOMP_ATTACK_TIME,
+						500, DEFAULT_ACOMP_ATIME);
+		break;
+	case V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME:
+		rval = v4l2_ctrl_query_fill(qc, 100000, MAX_ACOMP_RELEASE_TIME,
+						100000, DEFAULT_ACOMP_RTIME);
+		break;
+
+	case V4L2_CID_PILOT_TONE_ENABLED:
+		rval = v4l2_ctrl_query_fill(qc, 0, 1, 1, 1);
+		break;
+	case V4L2_CID_PILOT_TONE_DEVIATION:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_PILOT_DEVIATION,
+						10, DEFAULT_PILOT_DEVIATION);
+		break;
+	case V4L2_CID_PILOT_TONE_FREQUENCY:
+		rval = v4l2_ctrl_query_fill(qc, 0, MAX_PILOT_FREQUENCY,
+						1, DEFAULT_PILOT_FREQUENCY);
+		break;
+
+	case V4L2_CID_TUNE_PREEMPHASIS:
+		rval = v4l2_ctrl_query_fill(qc, V4L2_PREEMPHASIS_DISABLED,
+						V4L2_PREEMPHASIS_75_uS, 1,
+						V4L2_PREEMPHASIS_50_uS);
+		break;
+	case V4L2_CID_TUNE_POWER_LEVEL:
+		rval = v4l2_ctrl_query_fill(qc, 0, 120, 1, DEFAULT_POWER_LEVEL);
+		break;
+	case V4L2_CID_TUNE_ANTENNA_CAPACITOR:
+		rval = v4l2_ctrl_query_fill(qc, 0, 191, 1, 0);
+		break;
+	default:
+		rval = -EINVAL;
+		break;
+	};
+
+	return rval;
+}
+
+/* si4713_g_ctrl - get the value of a control */
+static int si4713_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev)
+		return -ENODEV;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev, SI4713_TX_LINE_INPUT_MUTE,
+						&sdev->mute);
+
+		if (rval < 0)
+			goto unlock;
+	}
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		ctrl->value = get_mute(sdev->mute);
+		break;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/* si4713_s_ctrl - set the value of a control */
+static int si4713_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev)
+		return -ENODEV;
+
+	switch (ctrl->id) {
+	case V4L2_CID_AUDIO_MUTE:
+		if (ctrl->value) {
+			rval = si4713_set_mute(sdev, ctrl->value);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_set_power_state(sdev, POWER_DOWN);
+		} else {
+			rval = si4713_set_power_state(sdev, POWER_UP);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_setup(sdev);
+			if (rval < 0)
+				goto exit;
+
+			rval = si4713_set_mute(sdev, ctrl->value);
+		}
+		break;
+	}
+
+exit:
+	return rval;
+}
+
+/* si4713_ioctl - deal with private ioctls (only rnl for now) */
+long si4713_ioctl(struct v4l2_subdev *sd, unsigned int cmd, void *arg)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	struct si4713_rnl *rnl = arg;
+	u16 frequency;
+	int rval = 0;
+
+	if (!arg)
+		return -EINVAL;
+
+	mutex_lock(&sdev->mutex);
+	switch (cmd) {
+	case SI4713_IOC_MEASURE_RNL:
+		frequency = v4l2_to_si4713(rnl->frequency);
+
+		if (sdev->power_state) {
+			/* Set desired measurement frequency */
+			rval = si4713_tx_tune_measure(sdev, frequency, 0);
+			if (rval < 0)
+				goto unlock;
+			/* get results from tune status */
+			rval = si4713_update_tune_status(sdev);
+			if (rval < 0)
+				goto unlock;
+		}
+		rnl->rnl = sdev->tune_rnl;
+		break;
+
+	default:
+		/* nothing */
+		rval = -ENOIOCTLCMD;
+	}
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static const struct v4l2_subdev_core_ops si4713_subdev_core_ops = {
+	.queryctrl	= si4713_queryctrl,
+	.g_ext_ctrls	= si4713_g_ext_ctrls,
+	.s_ext_ctrls	= si4713_s_ext_ctrls,
+	.g_ctrl		= si4713_g_ctrl,
+	.s_ctrl		= si4713_s_ctrl,
+	.ioctl		= si4713_ioctl,
+};
+
+/* si4713_g_modulator - get modulator attributes */
+static int si4713_g_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *vm)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	if (!sdev) {
+		rval = -ENODEV;
+		goto exit;
+	}
+
+	if (vm->index > 0) {
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	strncpy(vm->name, "FM Modulator", 32);
+	vm->capability = V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LOW |
+						V4L2_TUNER_CAP_RDS;
+
+	/* Report current frequency range limits */
+	vm->rangelow = si4713_to_v4l2(FREQ_RANGE_LOW);
+	vm->rangehigh = si4713_to_v4l2(FREQ_RANGE_HIGH);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		u32 comp_en = 0;
+
+		rval = si4713_read_property(sdev, SI4713_TX_COMPONENT_ENABLE,
+						&comp_en);
+		if (rval < 0)
+			goto unlock;
+
+		sdev->stereo = get_status_bit(comp_en, 1, 1 << 1);
+		sdev->rds_info.enabled = get_status_bit(comp_en, 2, 1 << 2);
+	}
+
+	/* Report current audio mode: mono or stereo */
+	if (sdev->stereo)
+		vm->txsubchans = V4L2_TUNER_SUB_STEREO;
+	else
+		vm->txsubchans = V4L2_TUNER_SUB_MONO;
+
+	/* Report rds feature status */
+	if (sdev->rds_info.enabled)
+		vm->txsubchans |= V4L2_TUNER_SUB_RDS;
+	else
+		vm->txsubchans &= ~V4L2_TUNER_SUB_RDS;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/* si4713_s_modulator - set modulator attributes */
+static int si4713_s_modulator(struct v4l2_subdev *sd, struct v4l2_modulator *vm)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+	u16 stereo, rds;
+	u32 p;
+
+	if (!sdev) {
+		rval = -ENODEV;
+		goto exit;
+	}
+
+	if (vm->index > 0) {
+		rval = -EINVAL;
+		goto exit;
+	}
+
+	/* Set audio mode: mono or stereo */
+	if (vm->txsubchans & V4L2_TUNER_SUB_STEREO)
+		stereo = 1;
+	else if (vm->txsubchans & V4L2_TUNER_SUB_MONO)
+		stereo = 0;
+	else
+		rval = -EINVAL;
+	if (rval < 0)
+		goto exit;
+
+	rds = !!(vm->txsubchans & V4L2_TUNER_SUB_RDS);
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_read_property(sdev,
+						SI4713_TX_COMPONENT_ENABLE, &p);
+		if (rval < 0)
+			goto unlock;
+
+		p = set_bits(p, stereo, 1, 1 << 1);
+		p = set_bits(p, rds, 2, 1 << 2);
+
+		rval = si4713_write_property(sdev,
+						SI4713_TX_COMPONENT_ENABLE, p);
+		if (rval < 0)
+			goto unlock;
+	}
+
+	sdev->stereo = stereo;
+	sdev->rds_info.enabled = rds;
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+exit:
+	return rval;
+}
+
+/* si4713_g_frequency - get tuner or modulator radio frequency */
+static int si4713_g_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+
+	f->type = V4L2_TUNER_RADIO;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		u16 freq;
+		u8 p, a, n;
+
+		rval = si4713_tx_tune_status(sdev, 0x00, &freq, &p, &a, &n);
+		if (rval < 0)
+			goto unlock;
+
+		sdev->frequency = freq;
+	}
+
+	f->frequency = si4713_to_v4l2(sdev->frequency);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+/* si4713_s_frequency - set tuner or modulator radio frequency */
+static int si4713_s_frequency(struct v4l2_subdev *sd, struct v4l2_frequency *f)
+{
+	struct si4713_device *sdev = to_si4713_device(sd);
+	int rval = 0;
+	u16 frequency = v4l2_to_si4713(f->frequency);
+
+	/* Check frequency range */
+	if (frequency < FREQ_RANGE_LOW || frequency > FREQ_RANGE_HIGH)
+		return -EDOM;
+
+	mutex_lock(&sdev->mutex);
+
+	if (sdev->power_state) {
+		rval = si4713_tx_tune_freq(sdev, frequency);
+		if (rval < 0)
+			goto unlock;
+		frequency = rval;
+		rval = 0;
+	}
+	sdev->frequency = frequency;
+	f->frequency = si4713_to_v4l2(frequency);
+
+unlock:
+	mutex_unlock(&sdev->mutex);
+	return rval;
+}
+
+static const struct v4l2_subdev_tuner_ops si4713_subdev_tuner_ops = {
+	.g_frequency	= si4713_g_frequency,
+	.s_frequency	= si4713_s_frequency,
+	.g_modulator	= si4713_g_modulator,
+	.s_modulator	= si4713_s_modulator,
+};
+
+static const struct v4l2_subdev_ops si4713_subdev_ops = {
+	.core		= &si4713_subdev_core_ops,
+	.tuner		= &si4713_subdev_tuner_ops,
+};
+
+/*
+ * I2C driver interface
+ */
+/* si4713_probe - probe for the device */
+static int si4713_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct si4713_device *sdev;
+	int rval;
+
+	sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
+	if (!sdev) {
+		dev_err(&client->dev, "Failed to alloc video device.\n");
+		rval = -ENOMEM;
+		goto exit;
+	}
+
+	sdev->platform_data = client->dev.platform_data;
+	if (!sdev->platform_data) {
+		v4l2_err(&sdev->sd, "No platform data registered.\n");
+		rval = -ENODEV;
+		goto free_sdev;
+	}
+
+	v4l2_i2c_subdev_init(&sdev->sd, client, &si4713_subdev_ops);
+
+	mutex_init(&sdev->mutex);
+	init_completion(&sdev->work);
+
+	if (client->irq) {
+		rval = request_irq(client->irq,
+			si4713_handler, IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+			client->name, sdev);
+		if (rval < 0) {
+			v4l2_err(&sdev->sd, "Could not request IRQ\n");
+			goto free_sdev;
+		}
+		v4l2_dbg(1, debug, &sdev->sd, "IRQ requested.\n");
+	} else {
+		v4l2_warn(&sdev->sd, "IRQ not configured. Using timeouts.\n");
+	}
+
+	rval = si4713_initialize(sdev);
+	if (rval < 0) {
+		v4l2_err(&sdev->sd, "Failed to probe device information.\n");
+		goto free_irq;
+	}
+
+	return 0;
+
+free_irq:
+	if (client->irq)
+		free_irq(client->irq, sdev);
+free_sdev:
+	kfree(sdev);
+exit:
+	return rval;
+}
+
+/* si4713_remove - remove the device */
+static int si4713_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct si4713_device *sdev = to_si4713_device(sd);
+
+	if (sdev->power_state)
+		si4713_set_power_state(sdev, POWER_DOWN);
+
+	if (client->irq > 0)
+		free_irq(client->irq, sdev);
+
+	v4l2_device_unregister_subdev(sd);
+
+	kfree(sdev);
+
+	return 0;
+}
+
+/* si4713_i2c_driver - i2c driver interface */
+static const struct i2c_device_id si4713_id[] = {
+	{ "si4713" , 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, si4713_id);
+
+static struct i2c_driver si4713_i2c_driver = {
+	.driver		= {
+		.name	= "si4713",
+	},
+	.probe		= si4713_probe,
+	.remove         = si4713_remove,
+	.id_table       = si4713_id,
+};
+
+/* Module Interface */
+static int __init si4713_module_init(void)
+{
+	return i2c_add_driver(&si4713_i2c_driver);
+}
+
+static void __exit si4713_module_exit(void)
+{
+	i2c_del_driver(&si4713_i2c_driver);
+}
+
+module_init(si4713_module_init);
+module_exit(si4713_module_exit);
+
diff --git a/drivers/media/radio/si4713-i2c.h b/drivers/media/radio/si4713-i2c.h
new file mode 100644
index 000000000000..faf8cff124f1
--- /dev/null
+++ b/drivers/media/radio/si4713-i2c.h
@@ -0,0 +1,237 @@
+/*
+ * drivers/media/radio/si4713-i2c.h
+ *
+ * Property and commands definitions for Si4713 radio transmitter chip.
+ *
+ * Copyright (c) 2008 Instituto Nokia de Tecnologia - INdT
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef SI4713_I2C_H
+#define SI4713_I2C_H
+
+#include <media/v4l2-subdev.h>
+#include <media/si4713.h>
+
+#define SI4713_PRODUCT_NUMBER		0x0D
+
+/* Command Timeouts */
+#define DEFAULT_TIMEOUT			500
+#define TIMEOUT_SET_PROPERTY		20
+#define TIMEOUT_TX_TUNE_POWER		30000
+#define TIMEOUT_TX_TUNE			110000
+#define TIMEOUT_POWER_UP		200000
+
+/*
+ * Command and its arguments definitions
+ */
+#define SI4713_PWUP_CTSIEN		(1<<7)
+#define SI4713_PWUP_GPO2OEN		(1<<6)
+#define SI4713_PWUP_PATCH		(1<<5)
+#define SI4713_PWUP_XOSCEN		(1<<4)
+#define SI4713_PWUP_FUNC_TX		0x02
+#define SI4713_PWUP_FUNC_PATCH		0x0F
+#define SI4713_PWUP_OPMOD_ANALOG	0x50
+#define SI4713_PWUP_OPMOD_DIGITAL	0x0F
+#define SI4713_PWUP_NARGS		2
+#define SI4713_PWUP_NRESP		1
+#define SI4713_CMD_POWER_UP		0x01
+
+#define SI4713_GETREV_NRESP		9
+#define SI4713_CMD_GET_REV		0x10
+
+#define SI4713_PWDN_NRESP		1
+#define SI4713_CMD_POWER_DOWN		0x11
+
+#define SI4713_SET_PROP_NARGS		5
+#define SI4713_SET_PROP_NRESP		1
+#define SI4713_CMD_SET_PROPERTY		0x12
+
+#define SI4713_GET_PROP_NARGS		3
+#define SI4713_GET_PROP_NRESP		4
+#define SI4713_CMD_GET_PROPERTY		0x13
+
+#define SI4713_GET_STATUS_NRESP		1
+#define SI4713_CMD_GET_INT_STATUS	0x14
+
+#define SI4713_CMD_PATCH_ARGS		0x15
+#define SI4713_CMD_PATCH_DATA		0x16
+
+#define SI4713_MAX_FREQ			10800
+#define SI4713_MIN_FREQ			7600
+#define SI4713_TXFREQ_NARGS		3
+#define SI4713_TXFREQ_NRESP		1
+#define SI4713_CMD_TX_TUNE_FREQ		0x30
+
+#define SI4713_MAX_POWER		120
+#define SI4713_MIN_POWER		88
+#define SI4713_MAX_ANTCAP		191
+#define SI4713_MIN_ANTCAP		0
+#define SI4713_TXPWR_NARGS		4
+#define SI4713_TXPWR_NRESP		1
+#define SI4713_CMD_TX_TUNE_POWER	0x31
+
+#define SI4713_TXMEA_NARGS		4
+#define SI4713_TXMEA_NRESP		1
+#define SI4713_CMD_TX_TUNE_MEASURE	0x32
+
+#define SI4713_INTACK_MASK		0x01
+#define SI4713_TXSTATUS_NARGS		1
+#define SI4713_TXSTATUS_NRESP		8
+#define SI4713_CMD_TX_TUNE_STATUS	0x33
+
+#define SI4713_OVERMOD_BIT		(1 << 2)
+#define SI4713_IALH_BIT			(1 << 1)
+#define SI4713_IALL_BIT			(1 << 0)
+#define SI4713_ASQSTATUS_NARGS		1
+#define SI4713_ASQSTATUS_NRESP		5
+#define SI4713_CMD_TX_ASQ_STATUS	0x34
+
+#define SI4713_RDSBUFF_MODE_MASK	0x87
+#define SI4713_RDSBUFF_NARGS		7
+#define SI4713_RDSBUFF_NRESP		6
+#define SI4713_CMD_TX_RDS_BUFF		0x35
+
+#define SI4713_RDSPS_PSID_MASK		0x1F
+#define SI4713_RDSPS_NARGS		5
+#define SI4713_RDSPS_NRESP		1
+#define SI4713_CMD_TX_RDS_PS		0x36
+
+#define SI4713_CMD_GPO_CTL		0x80
+#define SI4713_CMD_GPO_SET		0x81
+
+/*
+ * Bits from status response
+ */
+#define SI4713_CTS			(1<<7)
+#define SI4713_ERR			(1<<6)
+#define SI4713_RDS_INT			(1<<2)
+#define SI4713_ASQ_INT			(1<<1)
+#define SI4713_STC_INT			(1<<0)
+
+/*
+ * Property definitions
+ */
+#define SI4713_GPO_IEN			0x0001
+#define SI4713_DIG_INPUT_FORMAT		0x0101
+#define SI4713_DIG_INPUT_SAMPLE_RATE	0x0103
+#define SI4713_REFCLK_FREQ		0x0201
+#define SI4713_REFCLK_PRESCALE		0x0202
+#define SI4713_TX_COMPONENT_ENABLE	0x2100
+#define SI4713_TX_AUDIO_DEVIATION	0x2101
+#define SI4713_TX_PILOT_DEVIATION	0x2102
+#define SI4713_TX_RDS_DEVIATION		0x2103
+#define SI4713_TX_LINE_INPUT_LEVEL	0x2104
+#define SI4713_TX_LINE_INPUT_MUTE	0x2105
+#define SI4713_TX_PREEMPHASIS		0x2106
+#define SI4713_TX_PILOT_FREQUENCY	0x2107
+#define SI4713_TX_ACOMP_ENABLE		0x2200
+#define SI4713_TX_ACOMP_THRESHOLD	0x2201
+#define SI4713_TX_ACOMP_ATTACK_TIME	0x2202
+#define SI4713_TX_ACOMP_RELEASE_TIME	0x2203
+#define SI4713_TX_ACOMP_GAIN		0x2204
+#define SI4713_TX_LIMITER_RELEASE_TIME	0x2205
+#define SI4713_TX_ASQ_INTERRUPT_SOURCE	0x2300
+#define SI4713_TX_ASQ_LEVEL_LOW		0x2301
+#define SI4713_TX_ASQ_DURATION_LOW	0x2302
+#define SI4713_TX_ASQ_LEVEL_HIGH	0x2303
+#define SI4713_TX_ASQ_DURATION_HIGH	0x2304
+#define SI4713_TX_RDS_INTERRUPT_SOURCE	0x2C00
+#define SI4713_TX_RDS_PI		0x2C01
+#define SI4713_TX_RDS_PS_MIX		0x2C02
+#define SI4713_TX_RDS_PS_MISC		0x2C03
+#define SI4713_TX_RDS_PS_REPEAT_COUNT	0x2C04
+#define SI4713_TX_RDS_PS_MESSAGE_COUNT	0x2C05
+#define SI4713_TX_RDS_PS_AF		0x2C06
+#define SI4713_TX_RDS_FIFO_SIZE		0x2C07
+
+#define PREEMPHASIS_USA			75
+#define PREEMPHASIS_EU			50
+#define PREEMPHASIS_DISABLED		0
+#define FMPE_USA			0x00
+#define FMPE_EU				0x01
+#define FMPE_DISABLED			0x02
+
+#define POWER_UP			0x01
+#define POWER_DOWN			0x00
+
+struct rds_info {
+	u32 pi;
+#define MAX_RDS_PTY			31
+	u32 pty;
+#define MAX_RDS_DEVIATION		90000
+	u32 deviation;
+/*
+ * PSNAME is known to be defined as 8 character sized (RDS Spec).
+ * However, there is receivers which scroll PSNAME 8xN sized.
+ */
+#define MAX_RDS_PS_NAME			96
+	u8 ps_name[MAX_RDS_PS_NAME + 1];
+/*
+ * MAX_RDS_RADIO_TEXT is known to be defined as 32 (2A group) or 64 (2B group)
+ * character sized (RDS Spec).
+ * However, there is receivers which scroll them as well.
+ */
+#define MAX_RDS_RADIO_TEXT		384
+	u8 radio_text[MAX_RDS_RADIO_TEXT + 1];
+	u32 enabled;
+};
+
+struct limiter_info {
+#define MAX_LIMITER_RELEASE_TIME	102390
+	u32 release_time;
+#define MAX_LIMITER_DEVIATION		90000
+	u32 deviation;
+	u32 enabled;
+};
+
+struct pilot_info {
+#define MAX_PILOT_DEVIATION		90000
+	u32 deviation;
+#define MAX_PILOT_FREQUENCY		19000
+	u32 frequency;
+	u32 enabled;
+};
+
+struct acomp_info {
+#define MAX_ACOMP_RELEASE_TIME		1000000
+	u32 release_time;
+#define MAX_ACOMP_ATTACK_TIME		5000
+	u32 attack_time;
+#define MAX_ACOMP_THRESHOLD		0
+#define MIN_ACOMP_THRESHOLD		(-40)
+	s32 threshold;
+#define MAX_ACOMP_GAIN			20
+	u32 gain;
+	u32 enabled;
+};
+
+/*
+ * si4713_device - private data
+ */
+struct si4713_device {
+	/* v4l2_subdev and i2c reference (v4l2_subdev priv data) */
+	struct v4l2_subdev sd;
+	/* private data structures */
+	struct mutex mutex;
+	struct completion work;
+	struct si4713_platform_data *platform_data;
+	struct rds_info rds_info;
+	struct limiter_info limiter_info;
+	struct pilot_info pilot_info;
+	struct acomp_info acomp_info;
+	u32 frequency;
+	u32 preemphasis;
+	u32 mute;
+	u32 power_level;
+	u32 power_state;
+	u32 antenna_capacitor;
+	u32 stereo;
+	u32 tune_rnl;
+};
+#endif /* ifndef SI4713_I2C_H */
diff --git a/include/media/si4713.h b/include/media/si4713.h
new file mode 100644
index 000000000000..99850a54ed09
--- /dev/null
+++ b/include/media/si4713.h
@@ -0,0 +1,49 @@
+/*
+ * include/media/si4713.h
+ *
+ * Board related data definitions for Si4713 i2c device driver.
+ *
+ * Copyright (c) 2009 Nokia Corporation
+ * Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ */
+
+#ifndef SI4713_H
+#define SI4713_H
+
+/* The SI4713 I2C sensor chip has a fixed slave address of 0xc6 or 0x22. */
+#define SI4713_I2C_ADDR_BUSEN_HIGH	0x63
+#define SI4713_I2C_ADDR_BUSEN_LOW	0x11
+
+/*
+ * Platform dependent definition
+ */
+struct si4713_platform_data {
+	/* Set power state, zero is off, non-zero is on. */
+	int (*set_power)(int power);
+};
+
+/*
+ * Structure to query for Received Noise Level (RNL).
+ */
+struct si4713_rnl {
+	__u32 index;		/* modulator index */
+	__u32 frequency;	/* frequency to peform rnl measurement */
+	__s32 rnl;		/* result of measurement in dBuV */
+	__u32 reserved[4];	/* drivers and apps must init this to 0 */
+};
+
+/*
+ * This is the ioctl number to query for rnl. Users must pass a
+ * struct si4713_rnl pointer specifying desired frequency in 'frequency' field
+ * following driver capabilities (i.e V4L2_TUNER_CAP_LOW).
+ * Driver must return measured value in the same struture, filling 'rnl' field.
+ */
+#define SI4713_IOC_MEASURE_RNL	_IOWR('V', BASE_VIDIOC_PRIVATE + 0, \
+						struct si4713_rnl)
+
+#endif /* ifndef SI4713_H*/
-- 
cgit v1.2.3


From 6d888a66be1c50c2f5193c53d6ea556e01dd60e3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sun, 30 Aug 2009 13:05:56 -0300
Subject: V4L/DVB (12591): em28xx: Add entry for GADMEI UTV330+ and related IR
 keymap

[mchehab@redhat.com: Fix a few wrong IR keymaps]
Signed-off-by: Shine Liu <shinel@foxmail.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.em28xx |  1 +
 drivers/media/common/ir-keymaps.c         | 44 +++++++++++++++++++++++++++++++
 drivers/media/video/em28xx/em28xx-cards.c | 22 ++++++++++++++++
 drivers/media/video/em28xx/em28xx.h       |  1 +
 include/media/ir-common.h                 |  1 +
 5 files changed, 69 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
index 66e5f829577c..b37eff3c9888 100644
--- a/Documentation/video4linux/CARDLIST.em28xx
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -67,3 +67,4 @@
  69 -> KWorld ATSC 315U HDTV TV Box             (em2882)        [eb1a:a313]
  70 -> Evga inDtube                             (em2882)
  71 -> Silvercrest Webcam 1.3mpix               (em2820/em2840)
+ 72 -> Gadmei UTV330+                           (em2861)
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index 0b8bfac14313..c93a5269f223 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -2826,3 +2826,47 @@ IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
 	[0x20] = KEY_TEXT,
 };
 EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
+
+
+/* GADMEI UTV330+ RM008Z remote
+   Shine Liu <shinel@foxmail.com>
+ */
+IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE] = {
+	[0x14] = KEY_POWER2,		/* POWER OFF */
+	[0x0c] = KEY_MUTE,		/* MUTE */
+
+	[0x18] = KEY_TV,		/* TV */
+	[0x0e] = KEY_VIDEO,		/* AV */
+	[0x0b] = KEY_AUDIO,		/* SV */
+	[0x0f] = KEY_RADIO,		/* FM */
+
+	[0x00] = KEY_1,
+	[0x01] = KEY_2,
+	[0x02] = KEY_3,
+	[0x03] = KEY_4,
+	[0x04] = KEY_5,
+	[0x05] = KEY_6,
+	[0x06] = KEY_7,
+	[0x07] = KEY_8,
+	[0x08] = KEY_9,
+	[0x09] = KEY_0,
+	[0x0a] = KEY_INFO,		/* OSD */
+	[0x1c] = KEY_BACKSPACE,		/* LAST */
+
+	[0x0d] = KEY_PLAY,		/* PLAY */
+	[0x1e] = KEY_CAMERA,		/* SNAPSHOT */
+	[0x1a] = KEY_RECORD,		/* RECORD */
+	[0x17] = KEY_STOP,		/* STOP */
+
+	[0x1f] = KEY_UP,		/* UP */
+	[0x44] = KEY_DOWN,		/* DOWN */
+	[0x46] = KEY_TAB,		/* BACK */
+	[0x4a] = KEY_ZOOM,		/* FULLSECREEN */
+
+	[0x10] = KEY_VOLUMEUP,		/* VOLUMEUP */
+	[0x11] = KEY_VOLUMEDOWN,	/* VOLUMEDOWN */
+	[0x12] = KEY_CHANNELUP,		/* CHANNELUP */
+	[0x13] = KEY_CHANNELDOWN,	/* CHANNELDOWN */
+	[0x15] = KEY_ENTER,		/* OK */
+};
+EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z);
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 66c377683708..b184d482c497 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -558,6 +558,27 @@ struct em28xx_board em28xx_boards[] = {
 			.amux     = EM28XX_AMUX_LINE_IN,
 		} },
 	},
+	[EM2861_BOARD_GADMEI_UTV330PLUS] = {
+		.name         = "Gadmei UTV330+",
+		.tuner_type   = TUNER_TNF_5335MF,
+		.tda9887_conf = TDA9887_PRESENT,
+		.ir_codes     = ir_codes_gadmei_rm008z,
+		.decoder      = EM28XX_SAA711X,
+		.xclk         = EM28XX_XCLK_FREQUENCY_12MHZ,
+		.input        = { {
+			.type     = EM28XX_VMUX_TELEVISION,
+			.vmux     = SAA7115_COMPOSITE2,
+			.amux     = EM28XX_AMUX_VIDEO,
+		}, {
+			.type     = EM28XX_VMUX_COMPOSITE1,
+			.vmux     = SAA7115_COMPOSITE0,
+			.amux     = EM28XX_AMUX_LINE_IN,
+		}, {
+			.type     = EM28XX_VMUX_SVIDEO,
+			.vmux     = SAA7115_SVIDEO3,
+			.amux     = EM28XX_AMUX_LINE_IN,
+		} },
+	},
 	[EM2860_BOARD_TERRATEC_HYBRID_XS] = {
 		.name         = "Terratec Cinergy A Hybrid XS",
 		.valid        = EM28XX_BOARD_NOT_VALIDATED,
@@ -1676,6 +1697,7 @@ static struct em28xx_hash_table em28xx_i2c_hash[] = {
 	{0xf51200e3, EM2800_BOARD_VGEAR_POCKETTV, TUNER_LG_PAL_NEW_TAPC},
 	{0x1ba50080, EM2860_BOARD_SAA711X_REFERENCE_DESIGN, TUNER_ABSENT},
 	{0xc51200e3, EM2820_BOARD_GADMEI_TVR200, TUNER_LG_PAL_NEW_TAPC},
+	{0x4ba50080, EM2861_BOARD_GADMEI_UTV330PLUS, TUNER_TNF_5335MF},
 };
 
 /* I2C possible address to saa7115, tvp5150, msp3400, tvaudio */
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index a2add61f7d59..23f34dd691e9 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -108,6 +108,7 @@
 #define EM2882_BOARD_KWORLD_ATSC_315U		  69
 #define EM2882_BOARD_EVGA_INDTUBE		  70
 #define EM2820_BOARD_SILVERCREST_WEBCAM           71
+#define EM2861_BOARD_GADMEI_UTV330PLUS           72
 
 /* Limits minimum and default number of buffers */
 #define EM28XX_MIN_BUF 4
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 8a607db492a5..74a7e55734f0 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -165,6 +165,7 @@ extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
 extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
+extern IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE];
 
 #endif
 
-- 
cgit v1.2.3


From 715a223323c8c8bcbe7739e20f6c619f7343b595 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Sat, 29 Aug 2009 14:15:55 -0300
Subject: V4L/DVB (12595): common/ir: use a struct for keycode tables

Currently, V4L uses a scancode table whose index is the scancode and
the value is the keycode. While this works, it has some drawbacks:

1) It requires that the scancode to be at the range 00-7f;

2) keycodes should be masked on 7 bits in order for it to work;

3) due to the 7 bits approach, sometimes it is not possible to replace
the default keyboard to another one with a different encoding rule;

4) it is different than what is done with dvb-usb approach;

5) it requires a typedef for it to work. This is not a recommended
Linux CodingStyle.

This patch is part of a larger series of IR changes. It basically
replaces the IR_KEYTAB_TYPE tables by a structured table:
struct ir_scancode {
       u16     scancode;
       u32     keycode;
};

This is very close to what dvb does. So, a further integration with DVB
code will be easy.

While we've changed the tables, for now, the IR keycode handling is still
based on the old approach.

The only notable effect is the redution of about 35% of the ir-common
module size:

   text    data     bss     dec     hex filename
   6721   29208       4   35933    8c5d old/ir-common.ko
   5756   18040       4   23800    5cf8 new/ir-common.ko

In thesis, we could be using above u8 for scancode, reducing even more the size
of the module, but defining it as u16 is more convenient, since, on dvb, each
scancode has up to 16 bits, and we currently have a few troubles with rc5, as their
scancodes are defined with more than 8 bits.

This patch itself shouldn't be doing any functional changes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/common/ir-functions.c         |   15 +-
 drivers/media/common/ir-keymaps.c           | 5085 ++++++++++++++-------------
 drivers/media/dvb/dm1105/dm1105.c           |    2 +-
 drivers/media/dvb/ttpci/budget-ci.c         |    6 +-
 drivers/media/video/bt8xx/bttv-input.c      |   26 +-
 drivers/media/video/cx18/cx18-i2c.c         |    2 +-
 drivers/media/video/cx231xx/cx231xx.h       |    2 +-
 drivers/media/video/cx88/cx88-input.c       |   52 +-
 drivers/media/video/em28xx/em28xx-cards.c   |   26 +-
 drivers/media/video/em28xx/em28xx.h         |    2 +-
 drivers/media/video/ir-kbd-i2c.c            |   20 +-
 drivers/media/video/saa7134/saa7134-input.c |   66 +-
 include/media/ir-common.h                   |  141 +-
 include/media/ir-kbd-i2c.h                  |    5 +-
 14 files changed, 2871 insertions(+), 2579 deletions(-)

(limited to 'include')

diff --git a/drivers/media/common/ir-functions.c b/drivers/media/common/ir-functions.c
index 16792a68a449..655474b29e21 100644
--- a/drivers/media/common/ir-functions.c
+++ b/drivers/media/common/ir-functions.c
@@ -58,13 +58,24 @@ static void ir_input_key_event(struct input_dev *dev, struct ir_input_state *ir)
 /* -------------------------------------------------------------------------- */
 
 void ir_input_init(struct input_dev *dev, struct ir_input_state *ir,
-		   int ir_type, IR_KEYTAB_TYPE *ir_codes)
+		   int ir_type, struct ir_scancode_table *ir_codes)
 {
 	int i;
 
 	ir->ir_type = ir_type;
+
+	memset(ir->ir_codes, sizeof(ir->ir_codes), 0);
+
+	/*
+	 * FIXME: This is a temporary workaround to use the new IR tables
+	 * with the old approach. Later patches will replace this to a
+	 * proper method
+	 */
+
 	if (ir_codes)
-		memcpy(ir->ir_codes, ir_codes, sizeof(ir->ir_codes));
+		for (i = 0; i < ir_codes->size; i++)
+			if (ir_codes->scan[i].scancode < IR_KEYTAB_SIZE)
+				ir->ir_codes[ir_codes->scan[i].scancode] = ir_codes->scan[i].keycode;
 
 	dev->keycode     = ir->ir_codes;
 	dev->keycodesize = sizeof(IR_KEYTAB_TYPE);
diff --git a/drivers/media/common/ir-keymaps.c b/drivers/media/common/ir-keymaps.c
index c93a5269f223..f6790172736a 100644
--- a/drivers/media/common/ir-keymaps.c
+++ b/drivers/media/common/ir-keymaps.c
@@ -29,562 +29,627 @@
 #include <media/ir-common.h>
 
 /* empty keytable, can be used as placeholder for not-yet created keytables */
-IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE] = {
-	[0x2a] = KEY_COFFEE,
+static struct ir_scancode ir_codes_empty[] = {
+	{ 0x2a, KEY_COFFEE },
 };
-EXPORT_SYMBOL_GPL(ir_codes_empty);
+
+struct ir_scancode_table ir_codes_empty_table = {
+	.scan = ir_codes_empty,
+	.size = ARRAY_SIZE(ir_codes_empty),
+};
+EXPORT_SYMBOL_GPL(ir_codes_empty_table);
 
 /* Michal Majchrowicz <mmajchrowicz@gmail.com> */
-IR_KEYTAB_TYPE ir_codes_proteus_2309[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_proteus_2309[] = {
 	/* numeric */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x5c] = KEY_POWER,		/* power       */
-	[0x20] = KEY_ZOOM,		/* full screen */
-	[0x0f] = KEY_BACKSPACE,		/* recall      */
-	[0x1b] = KEY_ENTER,		/* mute        */
-	[0x41] = KEY_RECORD,		/* record      */
-	[0x43] = KEY_STOP,		/* stop        */
-	[0x16] = KEY_S,
-	[0x1a] = KEY_POWER2,		/* off         */
-	[0x2e] = KEY_RED,
-	[0x1f] = KEY_CHANNELDOWN,	/* channel -   */
-	[0x1c] = KEY_CHANNELUP,		/* channel +   */
-	[0x10] = KEY_VOLUMEDOWN,	/* volume -    */
-	[0x1e] = KEY_VOLUMEUP,		/* volume +    */
-	[0x14] = KEY_F1,
-};
-EXPORT_SYMBOL_GPL(ir_codes_proteus_2309);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x5c, KEY_POWER },		/* power       */
+	{ 0x20, KEY_ZOOM },		/* full screen */
+	{ 0x0f, KEY_BACKSPACE },	/* recall      */
+	{ 0x1b, KEY_ENTER },		/* mute        */
+	{ 0x41, KEY_RECORD },		/* record      */
+	{ 0x43, KEY_STOP },		/* stop        */
+	{ 0x16, KEY_S },
+	{ 0x1a, KEY_POWER2 },		/* off         */
+	{ 0x2e, KEY_RED },
+	{ 0x1f, KEY_CHANNELDOWN },	/* channel -   */
+	{ 0x1c, KEY_CHANNELUP },	/* channel +   */
+	{ 0x10, KEY_VOLUMEDOWN },	/* volume -    */
+	{ 0x1e, KEY_VOLUMEUP },		/* volume +    */
+	{ 0x14, KEY_F1 },
+};
+
+struct ir_scancode_table ir_codes_proteus_2309_table = {
+	.scan = ir_codes_proteus_2309,
+	.size = ARRAY_SIZE(ir_codes_proteus_2309),
+};
+EXPORT_SYMBOL_GPL(ir_codes_proteus_2309_table);
 
 /* Matt Jesson <dvb@jesson.eclipse.co.uk */
-IR_KEYTAB_TYPE ir_codes_avermedia_dvbt[IR_KEYTAB_SIZE] = {
-	[0x28] = KEY_0,			/* '0' / 'enter' */
-	[0x22] = KEY_1,			/* '1' */
-	[0x12] = KEY_2,			/* '2' / 'up arrow' */
-	[0x32] = KEY_3,			/* '3' */
-	[0x24] = KEY_4,			/* '4' / 'left arrow' */
-	[0x14] = KEY_5,			/* '5' */
-	[0x34] = KEY_6,			/* '6' / 'right arrow' */
-	[0x26] = KEY_7,			/* '7' */
-	[0x16] = KEY_8,			/* '8' / 'down arrow' */
-	[0x36] = KEY_9,			/* '9' */
-
-	[0x20] = KEY_LIST,		/* 'source' */
-	[0x10] = KEY_TEXT,		/* 'teletext' */
-	[0x00] = KEY_POWER,		/* 'power' */
-	[0x04] = KEY_AUDIO,		/* 'audio' */
-	[0x06] = KEY_ZOOM,		/* 'full screen' */
-	[0x18] = KEY_VIDEO,		/* 'display' */
-	[0x38] = KEY_SEARCH,		/* 'loop' */
-	[0x08] = KEY_INFO,		/* 'preview' */
-	[0x2a] = KEY_REWIND,		/* 'backward <<' */
-	[0x1a] = KEY_FASTFORWARD,	/* 'forward >>' */
-	[0x3a] = KEY_RECORD,		/* 'capture' */
-	[0x0a] = KEY_MUTE,		/* 'mute' */
-	[0x2c] = KEY_RECORD,		/* 'record' */
-	[0x1c] = KEY_PAUSE,		/* 'pause' */
-	[0x3c] = KEY_STOP,		/* 'stop' */
-	[0x0c] = KEY_PLAY,		/* 'play' */
-	[0x2e] = KEY_RED,		/* 'red' */
-	[0x01] = KEY_BLUE,		/* 'blue' / 'cancel' */
-	[0x0e] = KEY_YELLOW,		/* 'yellow' / 'ok' */
-	[0x21] = KEY_GREEN,		/* 'green' */
-	[0x11] = KEY_CHANNELDOWN,	/* 'channel -' */
-	[0x31] = KEY_CHANNELUP,		/* 'channel +' */
-	[0x1e] = KEY_VOLUMEDOWN,	/* 'volume -' */
-	[0x3e] = KEY_VOLUMEUP,		/* 'volume +' */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_dvbt);
+static struct ir_scancode ir_codes_avermedia_dvbt[] = {
+	{ 0x28, KEY_0 },		/* '0' / 'enter' */
+	{ 0x22, KEY_1 },		/* '1' */
+	{ 0x12, KEY_2 },		/* '2' / 'up arrow' */
+	{ 0x32, KEY_3 },		/* '3' */
+	{ 0x24, KEY_4 },		/* '4' / 'left arrow' */
+	{ 0x14, KEY_5 },		/* '5' */
+	{ 0x34, KEY_6 },		/* '6' / 'right arrow' */
+	{ 0x26, KEY_7 },		/* '7' */
+	{ 0x16, KEY_8 },		/* '8' / 'down arrow' */
+	{ 0x36, KEY_9 },		/* '9' */
+
+	{ 0x20, KEY_LIST },		/* 'source' */
+	{ 0x10, KEY_TEXT },		/* 'teletext' */
+	{ 0x00, KEY_POWER },		/* 'power' */
+	{ 0x04, KEY_AUDIO },		/* 'audio' */
+	{ 0x06, KEY_ZOOM },		/* 'full screen' */
+	{ 0x18, KEY_VIDEO },		/* 'display' */
+	{ 0x38, KEY_SEARCH },		/* 'loop' */
+	{ 0x08, KEY_INFO },		/* 'preview' */
+	{ 0x2a, KEY_REWIND },		/* 'backward <<' */
+	{ 0x1a, KEY_FASTFORWARD },	/* 'forward >>' */
+	{ 0x3a, KEY_RECORD },		/* 'capture' */
+	{ 0x0a, KEY_MUTE },		/* 'mute' */
+	{ 0x2c, KEY_RECORD },		/* 'record' */
+	{ 0x1c, KEY_PAUSE },		/* 'pause' */
+	{ 0x3c, KEY_STOP },		/* 'stop' */
+	{ 0x0c, KEY_PLAY },		/* 'play' */
+	{ 0x2e, KEY_RED },		/* 'red' */
+	{ 0x01, KEY_BLUE },		/* 'blue' / 'cancel' */
+	{ 0x0e, KEY_YELLOW },		/* 'yellow' / 'ok' */
+	{ 0x21, KEY_GREEN },		/* 'green' */
+	{ 0x11, KEY_CHANNELDOWN },	/* 'channel -' */
+	{ 0x31, KEY_CHANNELUP },	/* 'channel +' */
+	{ 0x1e, KEY_VOLUMEDOWN },	/* 'volume -' */
+	{ 0x3e, KEY_VOLUMEUP },		/* 'volume +' */
+};
+
+struct ir_scancode_table ir_codes_avermedia_dvbt_table = {
+	.scan = ir_codes_avermedia_dvbt,
+	.size = ARRAY_SIZE(ir_codes_avermedia_dvbt),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_dvbt_table);
 
 /* Mauro Carvalho Chehab <mchehab@infradead.org> */
-IR_KEYTAB_TYPE ir_codes_avermedia_m135a[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_POWER2,
-	[0x2e] = KEY_DOT,		/* '.' */
-	[0x01] = KEY_MODE,		/* TV/FM */
-
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x11] = KEY_0,
-
-	[0x13] = KEY_RIGHT,		/* -> */
-	[0x12] = KEY_LEFT,		/* <- */
-
-	[0x17] = KEY_SLEEP,		/* Capturar Imagem */
-	[0x10] = KEY_SHUFFLE,		/* Amostra */
+static struct ir_scancode ir_codes_avermedia_m135a[] = {
+	{ 0x00, KEY_POWER2 },
+	{ 0x2e, KEY_DOT },		/* '.' */
+	{ 0x01, KEY_MODE },		/* TV/FM */
+
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x11, KEY_0 },
+
+	{ 0x13, KEY_RIGHT },		/* -> */
+	{ 0x12, KEY_LEFT },		/* <- */
+
+	{ 0x17, KEY_SLEEP },		/* Capturar Imagem */
+	{ 0x10, KEY_SHUFFLE },		/* Amostra */
 
 	/* FIXME: The keys bellow aren't ok */
 
-	[0x43] = KEY_CHANNELUP,
-	[0x42] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_ENTER,
+	{ 0x43, KEY_CHANNELUP },
+	{ 0x42, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_ENTER },
 
-	[0x14] = KEY_MUTE,
-	[0x08] = KEY_AUDIO,
+	{ 0x14, KEY_MUTE },
+	{ 0x08, KEY_AUDIO },
 
-	[0x03] = KEY_TEXT,
-	[0x04] = KEY_EPG,
-	[0x2b] = KEY_TV2,		/* TV2 */
+	{ 0x03, KEY_TEXT },
+	{ 0x04, KEY_EPG },
+	{ 0x2b, KEY_TV2 },		/* TV2 */
 
-	[0x1d] = KEY_RED,
-	[0x1c] = KEY_YELLOW,
-	[0x41] = KEY_GREEN,
-	[0x40] = KEY_BLUE,
+	{ 0x1d, KEY_RED },
+	{ 0x1c, KEY_YELLOW },
+	{ 0x41, KEY_GREEN },
+	{ 0x40, KEY_BLUE },
 
-	[0x1a] = KEY_PLAYPAUSE,
-	[0x19] = KEY_RECORD,
-	[0x18] = KEY_PLAY,
-	[0x1b] = KEY_STOP,
+	{ 0x1a, KEY_PLAYPAUSE },
+	{ 0x19, KEY_RECORD },
+	{ 0x18, KEY_PLAY },
+	{ 0x1b, KEY_STOP },
 };
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_m135a);
+
+struct ir_scancode_table ir_codes_avermedia_m135a_table = {
+	.scan = ir_codes_avermedia_m135a,
+	.size = ARRAY_SIZE(ir_codes_avermedia_m135a),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_m135a_table);
 
 /* Oldrich Jedlicka <oldium.pro@seznam.cz> */
-IR_KEYTAB_TYPE ir_codes_avermedia_cardbus[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_POWER,
-	[0x01] = KEY_TUNER,		/* TV/FM */
-	[0x03] = KEY_TEXT,		/* Teletext */
-	[0x04] = KEY_EPG,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x08] = KEY_AUDIO,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0c] = KEY_ZOOM,		/* Full screen */
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x10] = KEY_PAGEUP,		/* 16-CH PREV */
-	[0x11] = KEY_0,
-	[0x12] = KEY_INFO,
-	[0x13] = KEY_AGAIN,		/* CH RTN - channel return */
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_EDIT,		/* Autoscan */
-	[0x17] = KEY_SAVE,		/* Screenshot */
-	[0x18] = KEY_PLAYPAUSE,
-	[0x19] = KEY_RECORD,
-	[0x1a] = KEY_PLAY,
-	[0x1b] = KEY_STOP,
-	[0x1c] = KEY_FASTFORWARD,
-	[0x1d] = KEY_REWIND,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_VOLUMEUP,
-	[0x22] = KEY_SLEEP,		/* Sleep */
-	[0x23] = KEY_ZOOM,		/* Aspect */
-	[0x26] = KEY_SCREEN,		/* Pos */
-	[0x27] = KEY_ANGLE,		/* Size */
-	[0x28] = KEY_SELECT,		/* Select */
-	[0x29] = KEY_BLUE,		/* Blue/Picture */
-	[0x2a] = KEY_BACKSPACE,		/* Back */
-	[0x2b] = KEY_MEDIA,		/* PIP (Picture-in-picture) */
-	[0x2c] = KEY_DOWN,
-	[0x2e] = KEY_DOT,
-	[0x2f] = KEY_TV,		/* Live TV */
-	[0x32] = KEY_LEFT,
-	[0x33] = KEY_CLEAR,		/* Clear */
-	[0x35] = KEY_RED,		/* Red/TV */
-	[0x36] = KEY_UP,
-	[0x37] = KEY_HOME,		/* Home */
-	[0x39] = KEY_GREEN,		/* Green/Video */
-	[0x3d] = KEY_YELLOW,		/* Yellow/Music */
-	[0x3e] = KEY_OK,		/* Ok */
-	[0x3f] = KEY_RIGHT,
-	[0x40] = KEY_NEXT,		/* Next */
-	[0x41] = KEY_PREVIOUS,		/* Previous */
-	[0x42] = KEY_CHANNELDOWN,	/* Channel down */
-	[0x43] = KEY_CHANNELUP,		/* Channel up */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_cardbus);
+static struct ir_scancode ir_codes_avermedia_cardbus[] = {
+	{ 0x00, KEY_POWER },
+	{ 0x01, KEY_TUNER },		/* TV/FM */
+	{ 0x03, KEY_TEXT },		/* Teletext */
+	{ 0x04, KEY_EPG },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x08, KEY_AUDIO },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0c, KEY_ZOOM },		/* Full screen */
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x10, KEY_PAGEUP },		/* 16-CH PREV */
+	{ 0x11, KEY_0 },
+	{ 0x12, KEY_INFO },
+	{ 0x13, KEY_AGAIN },		/* CH RTN - channel return */
+	{ 0x14, KEY_MUTE },
+	{ 0x15, KEY_EDIT },		/* Autoscan */
+	{ 0x17, KEY_SAVE },		/* Screenshot */
+	{ 0x18, KEY_PLAYPAUSE },
+	{ 0x19, KEY_RECORD },
+	{ 0x1a, KEY_PLAY },
+	{ 0x1b, KEY_STOP },
+	{ 0x1c, KEY_FASTFORWARD },
+	{ 0x1d, KEY_REWIND },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_VOLUMEUP },
+	{ 0x22, KEY_SLEEP },		/* Sleep */
+	{ 0x23, KEY_ZOOM },		/* Aspect */
+	{ 0x26, KEY_SCREEN },		/* Pos */
+	{ 0x27, KEY_ANGLE },		/* Size */
+	{ 0x28, KEY_SELECT },		/* Select */
+	{ 0x29, KEY_BLUE },		/* Blue/Picture */
+	{ 0x2a, KEY_BACKSPACE },	/* Back */
+	{ 0x2b, KEY_MEDIA },		/* PIP (Picture-in-picture) */
+	{ 0x2c, KEY_DOWN },
+	{ 0x2e, KEY_DOT },
+	{ 0x2f, KEY_TV },		/* Live TV */
+	{ 0x32, KEY_LEFT },
+	{ 0x33, KEY_CLEAR },		/* Clear */
+	{ 0x35, KEY_RED },		/* Red/TV */
+	{ 0x36, KEY_UP },
+	{ 0x37, KEY_HOME },		/* Home */
+	{ 0x39, KEY_GREEN },		/* Green/Video */
+	{ 0x3d, KEY_YELLOW },		/* Yellow/Music */
+	{ 0x3e, KEY_OK },		/* Ok */
+	{ 0x3f, KEY_RIGHT },
+	{ 0x40, KEY_NEXT },		/* Next */
+	{ 0x41, KEY_PREVIOUS },		/* Previous */
+	{ 0x42, KEY_CHANNELDOWN },	/* Channel down */
+	{ 0x43, KEY_CHANNELUP },	/* Channel up */
+};
+
+struct ir_scancode_table ir_codes_avermedia_cardbus_table = {
+	.scan = ir_codes_avermedia_cardbus,
+	.size = ARRAY_SIZE(ir_codes_avermedia_cardbus),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_cardbus_table);
 
 /* Attila Kondoros <attila.kondoros@chello.hu> */
-IR_KEYTAB_TYPE ir_codes_apac_viewcomp[IR_KEYTAB_SIZE] = {
-
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x00] = KEY_0,
-	[0x17] = KEY_LAST,		/* +100 */
-	[0x0a] = KEY_LIST,		/* recall */
-
-
-	[0x1c] = KEY_TUNER,		/* TV/FM */
-	[0x15] = KEY_SEARCH,		/* scan */
-	[0x12] = KEY_POWER,		/* power */
-	[0x1f] = KEY_VOLUMEDOWN,	/* vol up */
-	[0x1b] = KEY_VOLUMEUP,		/* vol down */
-	[0x1e] = KEY_CHANNELDOWN,	/* chn up */
-	[0x1a] = KEY_CHANNELUP,		/* chn down */
-
-	[0x11] = KEY_VIDEO,		/* video */
-	[0x0f] = KEY_ZOOM,		/* full screen */
-	[0x13] = KEY_MUTE,		/* mute/unmute */
-	[0x10] = KEY_TEXT,		/* min */
-
-	[0x0d] = KEY_STOP,		/* freeze */
-	[0x0e] = KEY_RECORD,		/* record */
-	[0x1d] = KEY_PLAYPAUSE,		/* stop */
-	[0x19] = KEY_PLAY,		/* play */
-
-	[0x16] = KEY_GOTO,		/* osd */
-	[0x14] = KEY_REFRESH,		/* default */
-	[0x0c] = KEY_KPPLUS,		/* fine tune >>>> */
-	[0x18] = KEY_KPMINUS,		/* fine tune <<<< */
-};
-EXPORT_SYMBOL_GPL(ir_codes_apac_viewcomp);
+static struct ir_scancode ir_codes_apac_viewcomp[] = {
+
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_LAST },		/* +100 */
+	{ 0x0a, KEY_LIST },		/* recall */
+
+
+	{ 0x1c, KEY_TUNER },		/* TV/FM */
+	{ 0x15, KEY_SEARCH },		/* scan */
+	{ 0x12, KEY_POWER },		/* power */
+	{ 0x1f, KEY_VOLUMEDOWN },	/* vol up */
+	{ 0x1b, KEY_VOLUMEUP },		/* vol down */
+	{ 0x1e, KEY_CHANNELDOWN },	/* chn up */
+	{ 0x1a, KEY_CHANNELUP },	/* chn down */
+
+	{ 0x11, KEY_VIDEO },		/* video */
+	{ 0x0f, KEY_ZOOM },		/* full screen */
+	{ 0x13, KEY_MUTE },		/* mute/unmute */
+	{ 0x10, KEY_TEXT },		/* min */
+
+	{ 0x0d, KEY_STOP },		/* freeze */
+	{ 0x0e, KEY_RECORD },		/* record */
+	{ 0x1d, KEY_PLAYPAUSE },	/* stop */
+	{ 0x19, KEY_PLAY },		/* play */
+
+	{ 0x16, KEY_GOTO },		/* osd */
+	{ 0x14, KEY_REFRESH },		/* default */
+	{ 0x0c, KEY_KPPLUS },		/* fine tune >>>> */
+	{ 0x18, KEY_KPMINUS },		/* fine tune <<<< */
+};
+
+struct ir_scancode_table ir_codes_apac_viewcomp_table = {
+	.scan = ir_codes_apac_viewcomp,
+	.size = ARRAY_SIZE(ir_codes_apac_viewcomp),
+};
+EXPORT_SYMBOL_GPL(ir_codes_apac_viewcomp_table);
 
 /* ---------------------------------------------------------------------- */
 
-IR_KEYTAB_TYPE ir_codes_pixelview[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_pixelview[] = {
 
-	[0x1e] = KEY_POWER,	/* power */
-	[0x07] = KEY_MEDIA,	/* source */
-	[0x1c] = KEY_SEARCH,	/* scan */
+	{ 0x1e, KEY_POWER },	/* power */
+	{ 0x07, KEY_MEDIA },	/* source */
+	{ 0x1c, KEY_SEARCH },	/* scan */
 
 
-	[0x03] = KEY_TUNER,		/* TV/FM */
+	{ 0x03, KEY_TUNER },		/* TV/FM */
 
-	[0x00] = KEY_RECORD,
-	[0x08] = KEY_STOP,
-	[0x11] = KEY_PLAY,
+	{ 0x00, KEY_RECORD },
+	{ 0x08, KEY_STOP },
+	{ 0x11, KEY_PLAY },
 
-	[0x1a] = KEY_PLAYPAUSE,		/* freeze */
-	[0x19] = KEY_ZOOM,		/* zoom */
-	[0x0f] = KEY_TEXT,		/* min */
+	{ 0x1a, KEY_PLAYPAUSE },	/* freeze */
+	{ 0x19, KEY_ZOOM },		/* zoom */
+	{ 0x0f, KEY_TEXT },		/* min */
 
-	[0x01] = KEY_1,
-	[0x0b] = KEY_2,
-	[0x1b] = KEY_3,
-	[0x05] = KEY_4,
-	[0x09] = KEY_5,
-	[0x15] = KEY_6,
-	[0x06] = KEY_7,
-	[0x0a] = KEY_8,
-	[0x12] = KEY_9,
-	[0x02] = KEY_0,
-	[0x10] = KEY_LAST,		/* +100 */
-	[0x13] = KEY_LIST,		/* recall */
+	{ 0x01, KEY_1 },
+	{ 0x0b, KEY_2 },
+	{ 0x1b, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x09, KEY_5 },
+	{ 0x15, KEY_6 },
+	{ 0x06, KEY_7 },
+	{ 0x0a, KEY_8 },
+	{ 0x12, KEY_9 },
+	{ 0x02, KEY_0 },
+	{ 0x10, KEY_LAST },		/* +100 */
+	{ 0x13, KEY_LIST },		/* recall */
 
-	[0x1f] = KEY_CHANNELUP,		/* chn down */
-	[0x17] = KEY_CHANNELDOWN,	/* chn up */
-	[0x16] = KEY_VOLUMEUP,		/* vol down */
-	[0x14] = KEY_VOLUMEDOWN,	/* vol up */
+	{ 0x1f, KEY_CHANNELUP },	/* chn down */
+	{ 0x17, KEY_CHANNELDOWN },	/* chn up */
+	{ 0x16, KEY_VOLUMEUP },		/* vol down */
+	{ 0x14, KEY_VOLUMEDOWN },	/* vol up */
 
-	[0x04] = KEY_KPMINUS,		/* <<< */
-	[0x0e] = KEY_SETUP,		/* function */
-	[0x0c] = KEY_KPPLUS,		/* >>> */
+	{ 0x04, KEY_KPMINUS },		/* <<< */
+	{ 0x0e, KEY_SETUP },		/* function */
+	{ 0x0c, KEY_KPPLUS },		/* >>> */
 
-	[0x0d] = KEY_GOTO,		/* mts */
-	[0x1d] = KEY_REFRESH,		/* reset */
-	[0x18] = KEY_MUTE,		/* mute/unmute */
+	{ 0x0d, KEY_GOTO },		/* mts */
+	{ 0x1d, KEY_REFRESH },		/* reset */
+	{ 0x18, KEY_MUTE },		/* mute/unmute */
 };
-EXPORT_SYMBOL_GPL(ir_codes_pixelview);
+
+struct ir_scancode_table ir_codes_pixelview_table = {
+	.scan = ir_codes_pixelview,
+	.size = ARRAY_SIZE(ir_codes_pixelview),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pixelview_table);
 
 /*
    Mauro Carvalho Chehab <mchehab@infradead.org>
    present on PV MPEG 8000GT
  */
-IR_KEYTAB_TYPE ir_codes_pixelview_new[IR_KEYTAB_SIZE] = {
-	[0x3c] = KEY_TIME,		/* Timeshift */
-	[0x12] = KEY_POWER,
-
-	[0x3d] = KEY_1,
-	[0x38] = KEY_2,
-	[0x18] = KEY_3,
-	[0x35] = KEY_4,
-	[0x39] = KEY_5,
-	[0x15] = KEY_6,
-	[0x36] = KEY_7,
-	[0x3a] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x3e] = KEY_0,
-
-	[0x1c] = KEY_AGAIN,		/* LOOP	*/
-	[0x3f] = KEY_MEDIA,		/* Source */
-	[0x1f] = KEY_LAST,		/* +100 */
-	[0x1b] = KEY_MUTE,
-
-	[0x17] = KEY_CHANNELDOWN,
-	[0x16] = KEY_CHANNELUP,
-	[0x10] = KEY_VOLUMEUP,
-	[0x14] = KEY_VOLUMEDOWN,
-	[0x13] = KEY_ZOOM,
-
-	[0x19] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x1a] = KEY_SEARCH,		/* scan */
-
-	[0x37] = KEY_REWIND,		/* << */
-	[0x32] = KEY_RECORD,		/* o (red) */
-	[0x33] = KEY_FORWARD,		/* >> */
-	[0x11] = KEY_STOP,		/* square */
-	[0x3b] = KEY_PLAY,		/* > */
-	[0x30] = KEY_PLAYPAUSE,		/* || */
-
-	[0x31] = KEY_TV,
-	[0x34] = KEY_RADIO,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pixelview_new);
-
-IR_KEYTAB_TYPE ir_codes_nebula[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_TV,
-	[0x0b] = KEY_AUX,
-	[0x0c] = KEY_DVD,
-	[0x0d] = KEY_POWER,
-	[0x0e] = KEY_MHP,	/* labelled 'Picture' */
-	[0x0f] = KEY_AUDIO,
-	[0x10] = KEY_INFO,
-	[0x11] = KEY_F13,	/* 16:9 */
-	[0x12] = KEY_F14,	/* 14:9 */
-	[0x13] = KEY_EPG,
-	[0x14] = KEY_EXIT,
-	[0x15] = KEY_MENU,
-	[0x16] = KEY_UP,
-	[0x17] = KEY_DOWN,
-	[0x18] = KEY_LEFT,
-	[0x19] = KEY_RIGHT,
-	[0x1a] = KEY_ENTER,
-	[0x1b] = KEY_CHANNELUP,
-	[0x1c] = KEY_CHANNELDOWN,
-	[0x1d] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_RED,
-	[0x20] = KEY_GREEN,
-	[0x21] = KEY_YELLOW,
-	[0x22] = KEY_BLUE,
-	[0x23] = KEY_SUBTITLE,
-	[0x24] = KEY_F15,	/* AD */
-	[0x25] = KEY_TEXT,
-	[0x26] = KEY_MUTE,
-	[0x27] = KEY_REWIND,
-	[0x28] = KEY_STOP,
-	[0x29] = KEY_PLAY,
-	[0x2a] = KEY_FASTFORWARD,
-	[0x2b] = KEY_F16,	/* chapter */
-	[0x2c] = KEY_PAUSE,
-	[0x2d] = KEY_PLAY,
-	[0x2e] = KEY_RECORD,
-	[0x2f] = KEY_F17,	/* picture in picture */
-	[0x30] = KEY_KPPLUS,	/* zoom in */
-	[0x31] = KEY_KPMINUS,	/* zoom out */
-	[0x32] = KEY_F18,	/* capture */
-	[0x33] = KEY_F19,	/* web */
-	[0x34] = KEY_EMAIL,
-	[0x35] = KEY_PHONE,
-	[0x36] = KEY_PC,
-};
-EXPORT_SYMBOL_GPL(ir_codes_nebula);
+static struct ir_scancode ir_codes_pixelview_new[] = {
+	{ 0x3c, KEY_TIME },		/* Timeshift */
+	{ 0x12, KEY_POWER },
+
+	{ 0x3d, KEY_1 },
+	{ 0x38, KEY_2 },
+	{ 0x18, KEY_3 },
+	{ 0x35, KEY_4 },
+	{ 0x39, KEY_5 },
+	{ 0x15, KEY_6 },
+	{ 0x36, KEY_7 },
+	{ 0x3a, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x3e, KEY_0 },
+
+	{ 0x1c, KEY_AGAIN },		/* LOOP	*/
+	{ 0x3f, KEY_MEDIA },		/* Source */
+	{ 0x1f, KEY_LAST },		/* +100 */
+	{ 0x1b, KEY_MUTE },
+
+	{ 0x17, KEY_CHANNELDOWN },
+	{ 0x16, KEY_CHANNELUP },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x14, KEY_VOLUMEDOWN },
+	{ 0x13, KEY_ZOOM },
+
+	{ 0x19, KEY_CAMERA },		/* SNAPSHOT */
+	{ 0x1a, KEY_SEARCH },		/* scan */
+
+	{ 0x37, KEY_REWIND },		/* << */
+	{ 0x32, KEY_RECORD },		/* o (red) */
+	{ 0x33, KEY_FORWARD },		/* >> */
+	{ 0x11, KEY_STOP },		/* square */
+	{ 0x3b, KEY_PLAY },		/* > */
+	{ 0x30, KEY_PLAYPAUSE },	/* || */
+
+	{ 0x31, KEY_TV },
+	{ 0x34, KEY_RADIO },
+};
+
+struct ir_scancode_table ir_codes_pixelview_new_table = {
+	.scan = ir_codes_pixelview_new,
+	.size = ARRAY_SIZE(ir_codes_pixelview_new),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pixelview_new_table);
+
+static struct ir_scancode ir_codes_nebula[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_TV },
+	{ 0x0b, KEY_AUX },
+	{ 0x0c, KEY_DVD },
+	{ 0x0d, KEY_POWER },
+	{ 0x0e, KEY_MHP },	/* labelled 'Picture' */
+	{ 0x0f, KEY_AUDIO },
+	{ 0x10, KEY_INFO },
+	{ 0x11, KEY_F13 },	/* 16:9 */
+	{ 0x12, KEY_F14 },	/* 14:9 */
+	{ 0x13, KEY_EPG },
+	{ 0x14, KEY_EXIT },
+	{ 0x15, KEY_MENU },
+	{ 0x16, KEY_UP },
+	{ 0x17, KEY_DOWN },
+	{ 0x18, KEY_LEFT },
+	{ 0x19, KEY_RIGHT },
+	{ 0x1a, KEY_ENTER },
+	{ 0x1b, KEY_CHANNELUP },
+	{ 0x1c, KEY_CHANNELDOWN },
+	{ 0x1d, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_RED },
+	{ 0x20, KEY_GREEN },
+	{ 0x21, KEY_YELLOW },
+	{ 0x22, KEY_BLUE },
+	{ 0x23, KEY_SUBTITLE },
+	{ 0x24, KEY_F15 },	/* AD */
+	{ 0x25, KEY_TEXT },
+	{ 0x26, KEY_MUTE },
+	{ 0x27, KEY_REWIND },
+	{ 0x28, KEY_STOP },
+	{ 0x29, KEY_PLAY },
+	{ 0x2a, KEY_FASTFORWARD },
+	{ 0x2b, KEY_F16 },	/* chapter */
+	{ 0x2c, KEY_PAUSE },
+	{ 0x2d, KEY_PLAY },
+	{ 0x2e, KEY_RECORD },
+	{ 0x2f, KEY_F17 },	/* picture in picture */
+	{ 0x30, KEY_KPPLUS },	/* zoom in */
+	{ 0x31, KEY_KPMINUS },	/* zoom out */
+	{ 0x32, KEY_F18 },	/* capture */
+	{ 0x33, KEY_F19 },	/* web */
+	{ 0x34, KEY_EMAIL },
+	{ 0x35, KEY_PHONE },
+	{ 0x36, KEY_PC },
+};
+
+struct ir_scancode_table ir_codes_nebula_table = {
+	.scan = ir_codes_nebula,
+	.size = ARRAY_SIZE(ir_codes_nebula),
+};
+EXPORT_SYMBOL_GPL(ir_codes_nebula_table);
 
 /* DigitalNow DNTV Live DVB-T Remote */
-IR_KEYTAB_TYPE ir_codes_dntv_live_dvb_t[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_ESC,		/* 'go up a level?' */
+static struct ir_scancode ir_codes_dntv_live_dvb_t[] = {
+	{ 0x00, KEY_ESC },		/* 'go up a level?' */
 	/* Keys 0 to 9 */
-	[0x0a] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0b] = KEY_TUNER,		/* tv/fm */
-	[0x0c] = KEY_SEARCH,		/* scan */
-	[0x0d] = KEY_STOP,
-	[0x0e] = KEY_PAUSE,
-	[0x0f] = KEY_LIST,		/* source */
-
-	[0x10] = KEY_MUTE,
-	[0x11] = KEY_REWIND,		/* backward << */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_CAMERA,		/* snap */
-	[0x14] = KEY_AUDIO,		/* stereo */
-	[0x15] = KEY_CLEAR,		/* reset */
-	[0x16] = KEY_PLAY,
-	[0x17] = KEY_ENTER,
-	[0x18] = KEY_ZOOM,		/* full screen */
-	[0x19] = KEY_FASTFORWARD,	/* forward >> */
-	[0x1a] = KEY_CHANNELUP,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1c] = KEY_INFO,		/* preview */
-	[0x1d] = KEY_RECORD,		/* record */
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvb_t);
+	{ 0x0a, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0b, KEY_TUNER },		/* tv/fm */
+	{ 0x0c, KEY_SEARCH },		/* scan */
+	{ 0x0d, KEY_STOP },
+	{ 0x0e, KEY_PAUSE },
+	{ 0x0f, KEY_LIST },		/* source */
+
+	{ 0x10, KEY_MUTE },
+	{ 0x11, KEY_REWIND },		/* backward << */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_CAMERA },		/* snap */
+	{ 0x14, KEY_AUDIO },		/* stereo */
+	{ 0x15, KEY_CLEAR },		/* reset */
+	{ 0x16, KEY_PLAY },
+	{ 0x17, KEY_ENTER },
+	{ 0x18, KEY_ZOOM },		/* full screen */
+	{ 0x19, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1c, KEY_INFO },		/* preview */
+	{ 0x1d, KEY_RECORD },		/* record */
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_dntv_live_dvb_t_table = {
+	.scan = ir_codes_dntv_live_dvb_t,
+	.size = ARRAY_SIZE(ir_codes_dntv_live_dvb_t),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvb_t_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* IO-DATA BCTV7E Remote */
-IR_KEYTAB_TYPE ir_codes_iodata_bctv7e[IR_KEYTAB_SIZE] = {
-	[0x40] = KEY_TV,
-	[0x20] = KEY_RADIO,		/* FM */
-	[0x60] = KEY_EPG,
-	[0x00] = KEY_POWER,
+static struct ir_scancode ir_codes_iodata_bctv7e[] = {
+	{ 0x40, KEY_TV },
+	{ 0x20, KEY_RADIO },		/* FM */
+	{ 0x60, KEY_EPG },
+	{ 0x00, KEY_POWER },
 
 	/* Keys 0 to 9 */
-	[0x44] = KEY_0,			/* 10 */
-	[0x50] = KEY_1,
-	[0x30] = KEY_2,
-	[0x70] = KEY_3,
-	[0x48] = KEY_4,
-	[0x28] = KEY_5,
-	[0x68] = KEY_6,
-	[0x58] = KEY_7,
-	[0x38] = KEY_8,
-	[0x78] = KEY_9,
-
-	[0x10] = KEY_L,			/* Live */
-	[0x08] = KEY_TIME,		/* Time Shift */
-
-	[0x18] = KEY_PLAYPAUSE,		/* Play */
-
-	[0x24] = KEY_ENTER,		/* 11 */
-	[0x64] = KEY_ESC,		/* 12 */
-	[0x04] = KEY_M,			/* Multi */
-
-	[0x54] = KEY_VIDEO,
-	[0x34] = KEY_CHANNELUP,
-	[0x74] = KEY_VOLUMEUP,
-	[0x14] = KEY_MUTE,
-
-	[0x4c] = KEY_VCR,		/* SVIDEO */
-	[0x2c] = KEY_CHANNELDOWN,
-	[0x6c] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_ZOOM,
-
-	[0x5c] = KEY_PAUSE,
-	[0x3c] = KEY_RED,		/* || (red) */
-	[0x7c] = KEY_RECORD,		/* recording */
-	[0x1c] = KEY_STOP,
-
-	[0x41] = KEY_REWIND,		/* backward << */
-	[0x21] = KEY_PLAY,
-	[0x61] = KEY_FASTFORWARD,	/* forward >> */
-	[0x01] = KEY_NEXT,		/* skip >| */
-};
-EXPORT_SYMBOL_GPL(ir_codes_iodata_bctv7e);
+	{ 0x44, KEY_0 },		/* 10 */
+	{ 0x50, KEY_1 },
+	{ 0x30, KEY_2 },
+	{ 0x70, KEY_3 },
+	{ 0x48, KEY_4 },
+	{ 0x28, KEY_5 },
+	{ 0x68, KEY_6 },
+	{ 0x58, KEY_7 },
+	{ 0x38, KEY_8 },
+	{ 0x78, KEY_9 },
+
+	{ 0x10, KEY_L },		/* Live */
+	{ 0x08, KEY_TIME },		/* Time Shift */
+
+	{ 0x18, KEY_PLAYPAUSE },	/* Play */
+
+	{ 0x24, KEY_ENTER },		/* 11 */
+	{ 0x64, KEY_ESC },		/* 12 */
+	{ 0x04, KEY_M },		/* Multi */
+
+	{ 0x54, KEY_VIDEO },
+	{ 0x34, KEY_CHANNELUP },
+	{ 0x74, KEY_VOLUMEUP },
+	{ 0x14, KEY_MUTE },
+
+	{ 0x4c, KEY_VCR },		/* SVIDEO */
+	{ 0x2c, KEY_CHANNELDOWN },
+	{ 0x6c, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_ZOOM },
+
+	{ 0x5c, KEY_PAUSE },
+	{ 0x3c, KEY_RED },		/* || (red) */
+	{ 0x7c, KEY_RECORD },		/* recording */
+	{ 0x1c, KEY_STOP },
+
+	{ 0x41, KEY_REWIND },		/* backward << */
+	{ 0x21, KEY_PLAY },
+	{ 0x61, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x01, KEY_NEXT },		/* skip >| */
+};
+
+struct ir_scancode_table ir_codes_iodata_bctv7e_table = {
+	.scan = ir_codes_iodata_bctv7e,
+	.size = ARRAY_SIZE(ir_codes_iodata_bctv7e),
+};
+EXPORT_SYMBOL_GPL(ir_codes_iodata_bctv7e_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* ADS Tech Instant TV DVB-T PCI Remote */
-IR_KEYTAB_TYPE ir_codes_adstech_dvb_t_pci[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_adstech_dvb_t_pci[] = {
 	/* Keys 0 to 9 */
-	[0x4d] = KEY_0,
-	[0x57] = KEY_1,
-	[0x4f] = KEY_2,
-	[0x53] = KEY_3,
-	[0x56] = KEY_4,
-	[0x4e] = KEY_5,
-	[0x5e] = KEY_6,
-	[0x54] = KEY_7,
-	[0x4c] = KEY_8,
-	[0x5c] = KEY_9,
-
-	[0x5b] = KEY_POWER,
-	[0x5f] = KEY_MUTE,
-	[0x55] = KEY_GOTO,
-	[0x5d] = KEY_SEARCH,
-	[0x17] = KEY_EPG,		/* Guide */
-	[0x1f] = KEY_MENU,
-	[0x0f] = KEY_UP,
-	[0x46] = KEY_DOWN,
-	[0x16] = KEY_LEFT,
-	[0x1e] = KEY_RIGHT,
-	[0x0e] = KEY_SELECT,		/* Enter */
-	[0x5a] = KEY_INFO,
-	[0x52] = KEY_EXIT,
-	[0x59] = KEY_PREVIOUS,
-	[0x51] = KEY_NEXT,
-	[0x58] = KEY_REWIND,
-	[0x50] = KEY_FORWARD,
-	[0x44] = KEY_PLAYPAUSE,
-	[0x07] = KEY_STOP,
-	[0x1b] = KEY_RECORD,
-	[0x13] = KEY_TUNER,		/* Live */
-	[0x0a] = KEY_A,
-	[0x12] = KEY_B,
-	[0x03] = KEY_PROG1,		/* 1 */
-	[0x01] = KEY_PROG2,		/* 2 */
-	[0x00] = KEY_PROG3,		/* 3 */
-	[0x06] = KEY_DVD,
-	[0x48] = KEY_AUX,		/* Photo */
-	[0x40] = KEY_VIDEO,
-	[0x19] = KEY_AUDIO,		/* Music */
-	[0x0b] = KEY_CHANNELUP,
-	[0x08] = KEY_CHANNELDOWN,
-	[0x15] = KEY_VOLUMEUP,
-	[0x1c] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_adstech_dvb_t_pci);
+	{ 0x4d, KEY_0 },
+	{ 0x57, KEY_1 },
+	{ 0x4f, KEY_2 },
+	{ 0x53, KEY_3 },
+	{ 0x56, KEY_4 },
+	{ 0x4e, KEY_5 },
+	{ 0x5e, KEY_6 },
+	{ 0x54, KEY_7 },
+	{ 0x4c, KEY_8 },
+	{ 0x5c, KEY_9 },
+
+	{ 0x5b, KEY_POWER },
+	{ 0x5f, KEY_MUTE },
+	{ 0x55, KEY_GOTO },
+	{ 0x5d, KEY_SEARCH },
+	{ 0x17, KEY_EPG },		/* Guide */
+	{ 0x1f, KEY_MENU },
+	{ 0x0f, KEY_UP },
+	{ 0x46, KEY_DOWN },
+	{ 0x16, KEY_LEFT },
+	{ 0x1e, KEY_RIGHT },
+	{ 0x0e, KEY_SELECT },		/* Enter */
+	{ 0x5a, KEY_INFO },
+	{ 0x52, KEY_EXIT },
+	{ 0x59, KEY_PREVIOUS },
+	{ 0x51, KEY_NEXT },
+	{ 0x58, KEY_REWIND },
+	{ 0x50, KEY_FORWARD },
+	{ 0x44, KEY_PLAYPAUSE },
+	{ 0x07, KEY_STOP },
+	{ 0x1b, KEY_RECORD },
+	{ 0x13, KEY_TUNER },		/* Live */
+	{ 0x0a, KEY_A },
+	{ 0x12, KEY_B },
+	{ 0x03, KEY_PROG1 },		/* 1 */
+	{ 0x01, KEY_PROG2 },		/* 2 */
+	{ 0x00, KEY_PROG3 },		/* 3 */
+	{ 0x06, KEY_DVD },
+	{ 0x48, KEY_AUX },		/* Photo */
+	{ 0x40, KEY_VIDEO },
+	{ 0x19, KEY_AUDIO },		/* Music */
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x08, KEY_CHANNELDOWN },
+	{ 0x15, KEY_VOLUMEUP },
+	{ 0x1c, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_adstech_dvb_t_pci_table = {
+	.scan = ir_codes_adstech_dvb_t_pci,
+	.size = ARRAY_SIZE(ir_codes_adstech_dvb_t_pci),
+};
+EXPORT_SYMBOL_GPL(ir_codes_adstech_dvb_t_pci_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* MSI TV@nywhere MASTER remote */
 
-IR_KEYTAB_TYPE ir_codes_msi_tvanywhere[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_msi_tvanywhere[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0c] = KEY_MUTE,
-	[0x0f] = KEY_SCREEN,		/* Full Screen */
-	[0x10] = KEY_FN,		/* Funtion */
-	[0x11] = KEY_TIME,		/* Time shift */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_MEDIA,		/* MTS */
-	[0x14] = KEY_SLOW,
-	[0x16] = KEY_REWIND,		/* backward << */
-	[0x17] = KEY_ENTER,		/* Return */
-	[0x18] = KEY_FASTFORWARD,	/* forward >> */
-	[0x1a] = KEY_CHANNELUP,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x1f] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0c, KEY_MUTE },
+	{ 0x0f, KEY_SCREEN },		/* Full Screen */
+	{ 0x10, KEY_FN },		/* Funtion */
+	{ 0x11, KEY_TIME },		/* Time shift */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_MEDIA },		/* MTS */
+	{ 0x14, KEY_SLOW },
+	{ 0x16, KEY_REWIND },		/* backward << */
+	{ 0x17, KEY_ENTER },		/* Return */
+	{ 0x18, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x1f, KEY_VOLUMEDOWN },
+};
+
+struct ir_scancode_table ir_codes_msi_tvanywhere_table = {
+	.scan = ir_codes_msi_tvanywhere,
+	.size = ARRAY_SIZE(ir_codes_msi_tvanywhere),
+};
+EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_table);
 
 /* ---------------------------------------------------------------------- */
 
@@ -603,7 +668,7 @@ EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere);
 
 */
 
-IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_msi_tvanywhere_plus[] = {
 
 /*  ---- Remote Button Layout ----
 
@@ -625,567 +690,627 @@ IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE] = {
      <<      FUNC    >>     RESET
 */
 
-	[0x01] = KEY_1,		/* 1 */
-	[0x0b] = KEY_2,		/* 2 */
-	[0x1b] = KEY_3,		/* 3 */
-	[0x05] = KEY_4,		/* 4 */
-	[0x09] = KEY_5,		/* 5 */
-	[0x15] = KEY_6,		/* 6 */
-	[0x06] = KEY_7,		/* 7 */
-	[0x0a] = KEY_8,		/* 8 */
-	[0x12] = KEY_9,		/* 9 */
-	[0x02] = KEY_0,		/* 0 */
-	[0x10] = KEY_KPPLUS,	/* + */
-	[0x13] = KEY_AGAIN,	/* Recall */
-
-	[0x1e] = KEY_POWER,	/* Power */
-	[0x07] = KEY_TUNER,	/* Source */
-	[0x1c] = KEY_SEARCH,	/* Scan */
-	[0x18] = KEY_MUTE,	/* Mute */
-
-	[0x03] = KEY_RADIO,	/* TV/FM */
+	{ 0x01, KEY_1 },		/* 1 */
+	{ 0x0b, KEY_2 },		/* 2 */
+	{ 0x1b, KEY_3 },		/* 3 */
+	{ 0x05, KEY_4 },		/* 4 */
+	{ 0x09, KEY_5 },		/* 5 */
+	{ 0x15, KEY_6 },		/* 6 */
+	{ 0x06, KEY_7 },		/* 7 */
+	{ 0x0a, KEY_8 },		/* 8 */
+	{ 0x12, KEY_9 },		/* 9 */
+	{ 0x02, KEY_0 },		/* 0 */
+	{ 0x10, KEY_KPPLUS },		/* + */
+	{ 0x13, KEY_AGAIN },		/* Recall */
+
+	{ 0x1e, KEY_POWER },		/* Power */
+	{ 0x07, KEY_TUNER },		/* Source */
+	{ 0x1c, KEY_SEARCH },		/* Scan */
+	{ 0x18, KEY_MUTE },		/* Mute */
+
+	{ 0x03, KEY_RADIO },		/* TV/FM */
 	/* The next four keys are duplicates that appear to send the
 	   same IR code as Ch+, Ch-, >>, and << .  The raw code assigned
 	   to them is the actual code + 0x20 - they will never be
 	   detected as such unless some way is discovered to distinguish
 	   these buttons from those that have the same code. */
-	[0x3f] = KEY_RIGHT,	/* |> and Ch+ */
-	[0x37] = KEY_LEFT,	/* <| and Ch- */
-	[0x2c] = KEY_UP,	/* ^^Up and >> */
-	[0x24] = KEY_DOWN,	/* vvDn and << */
-
-	[0x00] = KEY_RECORD,	/* Record */
-	[0x08] = KEY_STOP,	/* Stop */
-	[0x11] = KEY_PLAY,	/* Play */
-
-	[0x0f] = KEY_CLOSE,	/* Minimize */
-	[0x19] = KEY_ZOOM,	/* Zoom */
-	[0x1a] = KEY_CAMERA,	/* Snapshot */
-	[0x0d] = KEY_LANGUAGE,	/* MTS */
-
-	[0x14] = KEY_VOLUMEDOWN,/* Vol- */
-	[0x16] = KEY_VOLUMEUP,	/* Vol+ */
-	[0x17] = KEY_CHANNELDOWN,/* Ch- */
-	[0x1f] = KEY_CHANNELUP,	/* Ch+ */
+	{ 0x3f, KEY_RIGHT },		/* |> and Ch+ */
+	{ 0x37, KEY_LEFT },		/* <| and Ch- */
+	{ 0x2c, KEY_UP },		/* ^^Up and >> */
+	{ 0x24, KEY_DOWN },		/* vvDn and << */
+
+	{ 0x00, KEY_RECORD },		/* Record */
+	{ 0x08, KEY_STOP },		/* Stop */
+	{ 0x11, KEY_PLAY },		/* Play */
+
+	{ 0x0f, KEY_CLOSE },		/* Minimize */
+	{ 0x19, KEY_ZOOM },		/* Zoom */
+	{ 0x1a, KEY_CAMERA },		/* Snapshot */
+	{ 0x0d, KEY_LANGUAGE },		/* MTS */
+
+	{ 0x14, KEY_VOLUMEDOWN },	/* Vol- */
+	{ 0x16, KEY_VOLUMEUP },		/* Vol+ */
+	{ 0x17, KEY_CHANNELDOWN },	/* Ch- */
+	{ 0x1f, KEY_CHANNELUP },	/* Ch+ */
+
+	{ 0x04, KEY_REWIND },		/* << */
+	{ 0x0e, KEY_MENU },		/* Function */
+	{ 0x0c, KEY_FASTFORWARD },	/* >> */
+	{ 0x1d, KEY_RESTART },		/* Reset */
+};
 
-	[0x04] = KEY_REWIND,	/* << */
-	[0x0e] = KEY_MENU,	/* Function */
-	[0x0c] = KEY_FASTFORWARD,/* >> */
-	[0x1d] = KEY_RESTART,	/* Reset */
+struct ir_scancode_table ir_codes_msi_tvanywhere_plus_table = {
+	.scan = ir_codes_msi_tvanywhere_plus,
+	.size = ARRAY_SIZE(ir_codes_msi_tvanywhere_plus),
 };
-EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_plus);
+EXPORT_SYMBOL_GPL(ir_codes_msi_tvanywhere_plus_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* Cinergy 1400 DVB-T */
-IR_KEYTAB_TYPE ir_codes_cinergy_1400[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_POWER,
-	[0x02] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x05] = KEY_4,
-	[0x06] = KEY_5,
-	[0x07] = KEY_6,
-	[0x08] = KEY_7,
-	[0x09] = KEY_8,
-	[0x0a] = KEY_9,
-	[0x0c] = KEY_0,
-
-	[0x0b] = KEY_VIDEO,
-	[0x0d] = KEY_REFRESH,
-	[0x0e] = KEY_SELECT,
-	[0x0f] = KEY_EPG,
-	[0x10] = KEY_UP,
-	[0x11] = KEY_LEFT,
-	[0x12] = KEY_OK,
-	[0x13] = KEY_RIGHT,
-	[0x14] = KEY_DOWN,
-	[0x15] = KEY_TEXT,
-	[0x16] = KEY_INFO,
-
-	[0x17] = KEY_RED,
-	[0x18] = KEY_GREEN,
-	[0x19] = KEY_YELLOW,
-	[0x1a] = KEY_BLUE,
-
-	[0x1b] = KEY_CHANNELUP,
-	[0x1c] = KEY_VOLUMEUP,
-	[0x1d] = KEY_MUTE,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1f] = KEY_CHANNELDOWN,
-
-	[0x40] = KEY_PAUSE,
-	[0x4c] = KEY_PLAY,
-	[0x58] = KEY_RECORD,
-	[0x54] = KEY_PREVIOUS,
-	[0x48] = KEY_STOP,
-	[0x5c] = KEY_NEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_cinergy_1400);
+static struct ir_scancode ir_codes_cinergy_1400[] = {
+	{ 0x01, KEY_POWER },
+	{ 0x02, KEY_1 },
+	{ 0x03, KEY_2 },
+	{ 0x04, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x06, KEY_5 },
+	{ 0x07, KEY_6 },
+	{ 0x08, KEY_7 },
+	{ 0x09, KEY_8 },
+	{ 0x0a, KEY_9 },
+	{ 0x0c, KEY_0 },
+
+	{ 0x0b, KEY_VIDEO },
+	{ 0x0d, KEY_REFRESH },
+	{ 0x0e, KEY_SELECT },
+	{ 0x0f, KEY_EPG },
+	{ 0x10, KEY_UP },
+	{ 0x11, KEY_LEFT },
+	{ 0x12, KEY_OK },
+	{ 0x13, KEY_RIGHT },
+	{ 0x14, KEY_DOWN },
+	{ 0x15, KEY_TEXT },
+	{ 0x16, KEY_INFO },
+
+	{ 0x17, KEY_RED },
+	{ 0x18, KEY_GREEN },
+	{ 0x19, KEY_YELLOW },
+	{ 0x1a, KEY_BLUE },
+
+	{ 0x1b, KEY_CHANNELUP },
+	{ 0x1c, KEY_VOLUMEUP },
+	{ 0x1d, KEY_MUTE },
+	{ 0x1e, KEY_VOLUMEDOWN },
+	{ 0x1f, KEY_CHANNELDOWN },
+
+	{ 0x40, KEY_PAUSE },
+	{ 0x4c, KEY_PLAY },
+	{ 0x58, KEY_RECORD },
+	{ 0x54, KEY_PREVIOUS },
+	{ 0x48, KEY_STOP },
+	{ 0x5c, KEY_NEXT },
+};
+
+struct ir_scancode_table ir_codes_cinergy_1400_table = {
+	.scan = ir_codes_cinergy_1400,
+	.size = ARRAY_SIZE(ir_codes_cinergy_1400),
+};
+EXPORT_SYMBOL_GPL(ir_codes_cinergy_1400_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* AVERTV STUDIO 303 Remote */
-IR_KEYTAB_TYPE ir_codes_avertv_303[IR_KEYTAB_SIZE] = {
-	[0x2a] = KEY_1,
-	[0x32] = KEY_2,
-	[0x3a] = KEY_3,
-	[0x4a] = KEY_4,
-	[0x52] = KEY_5,
-	[0x5a] = KEY_6,
-	[0x6a] = KEY_7,
-	[0x72] = KEY_8,
-	[0x7a] = KEY_9,
-	[0x0e] = KEY_0,
-
-	[0x02] = KEY_POWER,
-	[0x22] = KEY_VIDEO,
-	[0x42] = KEY_AUDIO,
-	[0x62] = KEY_ZOOM,
-	[0x0a] = KEY_TV,
-	[0x12] = KEY_CD,
-	[0x1a] = KEY_TEXT,
-
-	[0x16] = KEY_SUBTITLE,
-	[0x1e] = KEY_REWIND,
-	[0x06] = KEY_PRINT,
-
-	[0x2e] = KEY_SEARCH,
-	[0x36] = KEY_SLEEP,
-	[0x3e] = KEY_SHUFFLE,
-	[0x26] = KEY_MUTE,
-
-	[0x4e] = KEY_RECORD,
-	[0x56] = KEY_PAUSE,
-	[0x5e] = KEY_STOP,
-	[0x46] = KEY_PLAY,
-
-	[0x6e] = KEY_RED,
-	[0x0b] = KEY_GREEN,
-	[0x66] = KEY_YELLOW,
-	[0x03] = KEY_BLUE,
-
-	[0x76] = KEY_LEFT,
-	[0x7e] = KEY_RIGHT,
-	[0x13] = KEY_DOWN,
-	[0x1b] = KEY_UP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_avertv_303);
+static struct ir_scancode ir_codes_avertv_303[] = {
+	{ 0x2a, KEY_1 },
+	{ 0x32, KEY_2 },
+	{ 0x3a, KEY_3 },
+	{ 0x4a, KEY_4 },
+	{ 0x52, KEY_5 },
+	{ 0x5a, KEY_6 },
+	{ 0x6a, KEY_7 },
+	{ 0x72, KEY_8 },
+	{ 0x7a, KEY_9 },
+	{ 0x0e, KEY_0 },
+
+	{ 0x02, KEY_POWER },
+	{ 0x22, KEY_VIDEO },
+	{ 0x42, KEY_AUDIO },
+	{ 0x62, KEY_ZOOM },
+	{ 0x0a, KEY_TV },
+	{ 0x12, KEY_CD },
+	{ 0x1a, KEY_TEXT },
+
+	{ 0x16, KEY_SUBTITLE },
+	{ 0x1e, KEY_REWIND },
+	{ 0x06, KEY_PRINT },
+
+	{ 0x2e, KEY_SEARCH },
+	{ 0x36, KEY_SLEEP },
+	{ 0x3e, KEY_SHUFFLE },
+	{ 0x26, KEY_MUTE },
+
+	{ 0x4e, KEY_RECORD },
+	{ 0x56, KEY_PAUSE },
+	{ 0x5e, KEY_STOP },
+	{ 0x46, KEY_PLAY },
+
+	{ 0x6e, KEY_RED },
+	{ 0x0b, KEY_GREEN },
+	{ 0x66, KEY_YELLOW },
+	{ 0x03, KEY_BLUE },
+
+	{ 0x76, KEY_LEFT },
+	{ 0x7e, KEY_RIGHT },
+	{ 0x13, KEY_DOWN },
+	{ 0x1b, KEY_UP },
+};
+
+struct ir_scancode_table ir_codes_avertv_303_table = {
+	.scan = ir_codes_avertv_303,
+	.size = ARRAY_SIZE(ir_codes_avertv_303),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avertv_303_table);
 
 /* ---------------------------------------------------------------------- */
 
 /* DigitalNow DNTV Live! DVB-T Pro Remote */
-IR_KEYTAB_TYPE ir_codes_dntv_live_dvbt_pro[IR_KEYTAB_SIZE] = {
-	[0x16] = KEY_POWER,
-	[0x5b] = KEY_HOME,
-
-	[0x55] = KEY_TV,		/* live tv */
-	[0x58] = KEY_TUNER,		/* digital Radio */
-	[0x5a] = KEY_RADIO,		/* FM radio */
-	[0x59] = KEY_DVD,		/* dvd menu */
-	[0x03] = KEY_1,
-	[0x01] = KEY_2,
-	[0x06] = KEY_3,
-	[0x09] = KEY_4,
-	[0x1d] = KEY_5,
-	[0x1f] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x19] = KEY_8,
-	[0x1b] = KEY_9,
-	[0x0c] = KEY_CANCEL,
-	[0x15] = KEY_0,
-	[0x4a] = KEY_CLEAR,
-	[0x13] = KEY_BACK,
-	[0x00] = KEY_TAB,
-	[0x4b] = KEY_UP,
-	[0x4e] = KEY_LEFT,
-	[0x4f] = KEY_OK,
-	[0x52] = KEY_RIGHT,
-	[0x51] = KEY_DOWN,
-	[0x1e] = KEY_VOLUMEUP,
-	[0x0a] = KEY_VOLUMEDOWN,
-	[0x02] = KEY_CHANNELDOWN,
-	[0x05] = KEY_CHANNELUP,
-	[0x11] = KEY_RECORD,
-	[0x14] = KEY_PLAY,
-	[0x4c] = KEY_PAUSE,
-	[0x1a] = KEY_STOP,
-	[0x40] = KEY_REWIND,
-	[0x12] = KEY_FASTFORWARD,
-	[0x41] = KEY_PREVIOUSSONG,	/* replay |< */
-	[0x42] = KEY_NEXTSONG,		/* skip >| */
-	[0x54] = KEY_CAMERA,		/* capture */
-	[0x50] = KEY_LANGUAGE,		/* sap */
-	[0x47] = KEY_TV2,		/* pip */
-	[0x4d] = KEY_SCREEN,
-	[0x43] = KEY_SUBTITLE,
-	[0x10] = KEY_MUTE,
-	[0x49] = KEY_AUDIO,		/* l/r */
-	[0x07] = KEY_SLEEP,
-	[0x08] = KEY_VIDEO,		/* a/v */
-	[0x0e] = KEY_PREVIOUS,		/* recall */
-	[0x45] = KEY_ZOOM,		/* zoom + */
-	[0x46] = KEY_ANGLE,		/* zoom - */
-	[0x56] = KEY_RED,
-	[0x57] = KEY_GREEN,
-	[0x5c] = KEY_YELLOW,
-	[0x5d] = KEY_BLUE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvbt_pro);
-
-IR_KEYTAB_TYPE ir_codes_em_terratec[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_CHANNEL,
-	[0x02] = KEY_SELECT,
-	[0x03] = KEY_MUTE,
-	[0x04] = KEY_POWER,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x08] = KEY_CHANNELUP,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0c] = KEY_CHANNELDOWN,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_0,
-	[0x12] = KEY_MENU,
-	[0x13] = KEY_PRINT,
-	[0x14] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_PAUSE,
-	[0x18] = KEY_RECORD,
-	[0x19] = KEY_REWIND,
-	[0x1a] = KEY_PLAY,
-	[0x1b] = KEY_FORWARD,
-	[0x1c] = KEY_BACKSPACE,
-	[0x1e] = KEY_STOP,
-	[0x40] = KEY_ZOOM,
-};
-EXPORT_SYMBOL_GPL(ir_codes_em_terratec);
-
-IR_KEYTAB_TYPE ir_codes_pinnacle_grey[IR_KEYTAB_SIZE] = {
-	[0x3a] = KEY_0,
-	[0x31] = KEY_1,
-	[0x32] = KEY_2,
-	[0x33] = KEY_3,
-	[0x34] = KEY_4,
-	[0x35] = KEY_5,
-	[0x36] = KEY_6,
-	[0x37] = KEY_7,
-	[0x38] = KEY_8,
-	[0x39] = KEY_9,
-
-	[0x2f] = KEY_POWER,
-
-	[0x2e] = KEY_P,
-	[0x1f] = KEY_L,
-	[0x2b] = KEY_I,
-
-	[0x2d] = KEY_SCREEN,
-	[0x1e] = KEY_ZOOM,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x0f] = KEY_VOLUMEDOWN,
-	[0x17] = KEY_CHANNELUP,
-	[0x1c] = KEY_CHANNELDOWN,
-	[0x25] = KEY_INFO,
-
-	[0x3c] = KEY_MUTE,
-
-	[0x3d] = KEY_LEFT,
-	[0x3b] = KEY_RIGHT,
-
-	[0x3f] = KEY_UP,
-	[0x3e] = KEY_DOWN,
-	[0x1a] = KEY_ENTER,
-
-	[0x1d] = KEY_MENU,
-	[0x19] = KEY_AGAIN,
-	[0x16] = KEY_PREVIOUSSONG,
-	[0x13] = KEY_NEXTSONG,
-	[0x15] = KEY_PAUSE,
-	[0x0e] = KEY_REWIND,
-	[0x0d] = KEY_PLAY,
-	[0x0b] = KEY_STOP,
-	[0x07] = KEY_FORWARD,
-	[0x27] = KEY_RECORD,
-	[0x26] = KEY_TUNER,
-	[0x29] = KEY_TEXT,
-	[0x2a] = KEY_MEDIA,
-	[0x18] = KEY_EPG,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_grey);
-
-IR_KEYTAB_TYPE ir_codes_flyvideo[IR_KEYTAB_SIZE] = {
-	[0x0f] = KEY_0,
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x07] = KEY_4,
-	[0x08] = KEY_5,
-	[0x09] = KEY_6,
-	[0x0b] = KEY_7,
-	[0x0c] = KEY_8,
-	[0x0d] = KEY_9,
-
-	[0x0e] = KEY_MODE,	/* Air/Cable */
-	[0x11] = KEY_VIDEO,	/* Video */
-	[0x15] = KEY_AUDIO,	/* Audio */
-	[0x00] = KEY_POWER,	/* Power */
-	[0x18] = KEY_TUNER,	/* AV Source */
-	[0x02] = KEY_ZOOM,	/* Fullscreen */
-	[0x1a] = KEY_LANGUAGE,	/* Stereo */
-	[0x1b] = KEY_MUTE,	/* Mute */
-	[0x14] = KEY_VOLUMEUP,	/* Volume + */
-	[0x17] = KEY_VOLUMEDOWN,/* Volume - */
-	[0x12] = KEY_CHANNELUP,	/* Channel + */
-	[0x13] = KEY_CHANNELDOWN,/* Channel - */
-	[0x06] = KEY_AGAIN,	/* Recall */
-	[0x10] = KEY_ENTER,	/* Enter */
-
-	[0x19] = KEY_BACK,	/* Rewind  ( <<< ) */
-	[0x1f] = KEY_FORWARD,	/* Forward ( >>> ) */
-	[0x0a] = KEY_ANGLE,	/* no label, may be used as the PAUSE button */
-};
-EXPORT_SYMBOL_GPL(ir_codes_flyvideo);
-
-IR_KEYTAB_TYPE ir_codes_flydvb[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_ZOOM,		/* Full Screen */
-	[0x00] = KEY_POWER,		/* Power */
-
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x07] = KEY_4,
-	[0x08] = KEY_5,
-	[0x09] = KEY_6,
-	[0x0b] = KEY_7,
-	[0x0c] = KEY_8,
-	[0x0d] = KEY_9,
-	[0x06] = KEY_AGAIN,		/* Recall */
-	[0x0f] = KEY_0,
-	[0x10] = KEY_MUTE,		/* Mute */
-	[0x02] = KEY_RADIO,		/* TV/Radio */
-	[0x1b] = KEY_LANGUAGE,		/* SAP (Second Audio Program) */
-
-	[0x14] = KEY_VOLUMEUP,		/* VOL+ */
-	[0x17] = KEY_VOLUMEDOWN,	/* VOL- */
-	[0x12] = KEY_CHANNELUP,		/* CH+ */
-	[0x13] = KEY_CHANNELDOWN,	/* CH- */
-	[0x1d] = KEY_ENTER,		/* Enter */
-
-	[0x1a] = KEY_MODE,		/* PIP */
-	[0x18] = KEY_TUNER,		/* Source */
-
-	[0x1e] = KEY_RECORD,		/* Record/Pause */
-	[0x15] = KEY_ANGLE,		/* Swap (no label on key) */
-	[0x1c] = KEY_PAUSE,		/* Timeshift/Pause */
-	[0x19] = KEY_BACK,		/* Rewind << */
-	[0x0a] = KEY_PLAYPAUSE,		/* Play/Pause */
-	[0x1f] = KEY_FORWARD,		/* Forward >> */
-	[0x16] = KEY_PREVIOUS,		/* Back |<< */
-	[0x11] = KEY_STOP,		/* Stop */
-	[0x0e] = KEY_NEXT,		/* End >>| */
-};
-EXPORT_SYMBOL_GPL(ir_codes_flydvb);
-
-IR_KEYTAB_TYPE ir_codes_cinergy[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_POWER,
-	[0x0b] = KEY_PROG1,	/* app */
-	[0x0c] = KEY_ZOOM,	/* zoom/fullscreen */
-	[0x0d] = KEY_CHANNELUP,	/* channel */
-	[0x0e] = KEY_CHANNELDOWN,/* channel- */
-	[0x0f] = KEY_VOLUMEUP,
-	[0x10] = KEY_VOLUMEDOWN,
-	[0x11] = KEY_TUNER,	/* AV */
-	[0x12] = KEY_NUMLOCK,	/* -/-- */
-	[0x13] = KEY_AUDIO,	/* audio */
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_UP,
-	[0x16] = KEY_DOWN,
-	[0x17] = KEY_LEFT,
-	[0x18] = KEY_RIGHT,
-	[0x19] = BTN_LEFT,
-	[0x1a] = BTN_RIGHT,
-	[0x1b] = KEY_WWW,	/* text */
-	[0x1c] = KEY_REWIND,
-	[0x1d] = KEY_FORWARD,
-	[0x1e] = KEY_RECORD,
-	[0x1f] = KEY_PLAY,
-	[0x20] = KEY_PREVIOUSSONG,
-	[0x21] = KEY_NEXTSONG,
-	[0x22] = KEY_PAUSE,
-	[0x23] = KEY_STOP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_cinergy);
+static struct ir_scancode ir_codes_dntv_live_dvbt_pro[] = {
+	{ 0x16, KEY_POWER },
+	{ 0x5b, KEY_HOME },
+
+	{ 0x55, KEY_TV },		/* live tv */
+	{ 0x58, KEY_TUNER },		/* digital Radio */
+	{ 0x5a, KEY_RADIO },		/* FM radio */
+	{ 0x59, KEY_DVD },		/* dvd menu */
+	{ 0x03, KEY_1 },
+	{ 0x01, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x1d, KEY_5 },
+	{ 0x1f, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x19, KEY_8 },
+	{ 0x1b, KEY_9 },
+	{ 0x0c, KEY_CANCEL },
+	{ 0x15, KEY_0 },
+	{ 0x4a, KEY_CLEAR },
+	{ 0x13, KEY_BACK },
+	{ 0x00, KEY_TAB },
+	{ 0x4b, KEY_UP },
+	{ 0x4e, KEY_LEFT },
+	{ 0x4f, KEY_OK },
+	{ 0x52, KEY_RIGHT },
+	{ 0x51, KEY_DOWN },
+	{ 0x1e, KEY_VOLUMEUP },
+	{ 0x0a, KEY_VOLUMEDOWN },
+	{ 0x02, KEY_CHANNELDOWN },
+	{ 0x05, KEY_CHANNELUP },
+	{ 0x11, KEY_RECORD },
+	{ 0x14, KEY_PLAY },
+	{ 0x4c, KEY_PAUSE },
+	{ 0x1a, KEY_STOP },
+	{ 0x40, KEY_REWIND },
+	{ 0x12, KEY_FASTFORWARD },
+	{ 0x41, KEY_PREVIOUSSONG },	/* replay |< */
+	{ 0x42, KEY_NEXTSONG },		/* skip >| */
+	{ 0x54, KEY_CAMERA },		/* capture */
+	{ 0x50, KEY_LANGUAGE },		/* sap */
+	{ 0x47, KEY_TV2 },		/* pip */
+	{ 0x4d, KEY_SCREEN },
+	{ 0x43, KEY_SUBTITLE },
+	{ 0x10, KEY_MUTE },
+	{ 0x49, KEY_AUDIO },		/* l/r */
+	{ 0x07, KEY_SLEEP },
+	{ 0x08, KEY_VIDEO },		/* a/v */
+	{ 0x0e, KEY_PREVIOUS },		/* recall */
+	{ 0x45, KEY_ZOOM },		/* zoom + */
+	{ 0x46, KEY_ANGLE },		/* zoom - */
+	{ 0x56, KEY_RED },
+	{ 0x57, KEY_GREEN },
+	{ 0x5c, KEY_YELLOW },
+	{ 0x5d, KEY_BLUE },
+};
+
+struct ir_scancode_table ir_codes_dntv_live_dvbt_pro_table = {
+	.scan = ir_codes_dntv_live_dvbt_pro,
+	.size = ARRAY_SIZE(ir_codes_dntv_live_dvbt_pro),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dntv_live_dvbt_pro_table);
+
+static struct ir_scancode ir_codes_em_terratec[] = {
+	{ 0x01, KEY_CHANNEL },
+	{ 0x02, KEY_SELECT },
+	{ 0x03, KEY_MUTE },
+	{ 0x04, KEY_POWER },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x08, KEY_CHANNELUP },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0c, KEY_CHANNELDOWN },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_0 },
+	{ 0x12, KEY_MENU },
+	{ 0x13, KEY_PRINT },
+	{ 0x14, KEY_VOLUMEDOWN },
+	{ 0x16, KEY_PAUSE },
+	{ 0x18, KEY_RECORD },
+	{ 0x19, KEY_REWIND },
+	{ 0x1a, KEY_PLAY },
+	{ 0x1b, KEY_FORWARD },
+	{ 0x1c, KEY_BACKSPACE },
+	{ 0x1e, KEY_STOP },
+	{ 0x40, KEY_ZOOM },
+};
+
+struct ir_scancode_table ir_codes_em_terratec_table = {
+	.scan = ir_codes_em_terratec,
+	.size = ARRAY_SIZE(ir_codes_em_terratec),
+};
+EXPORT_SYMBOL_GPL(ir_codes_em_terratec_table);
+
+static struct ir_scancode ir_codes_pinnacle_grey[] = {
+	{ 0x3a, KEY_0 },
+	{ 0x31, KEY_1 },
+	{ 0x32, KEY_2 },
+	{ 0x33, KEY_3 },
+	{ 0x34, KEY_4 },
+	{ 0x35, KEY_5 },
+	{ 0x36, KEY_6 },
+	{ 0x37, KEY_7 },
+	{ 0x38, KEY_8 },
+	{ 0x39, KEY_9 },
+
+	{ 0x2f, KEY_POWER },
+
+	{ 0x2e, KEY_P },
+	{ 0x1f, KEY_L },
+	{ 0x2b, KEY_I },
+
+	{ 0x2d, KEY_SCREEN },
+	{ 0x1e, KEY_ZOOM },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x0f, KEY_VOLUMEDOWN },
+	{ 0x17, KEY_CHANNELUP },
+	{ 0x1c, KEY_CHANNELDOWN },
+	{ 0x25, KEY_INFO },
+
+	{ 0x3c, KEY_MUTE },
+
+	{ 0x3d, KEY_LEFT },
+	{ 0x3b, KEY_RIGHT },
+
+	{ 0x3f, KEY_UP },
+	{ 0x3e, KEY_DOWN },
+	{ 0x1a, KEY_ENTER },
+
+	{ 0x1d, KEY_MENU },
+	{ 0x19, KEY_AGAIN },
+	{ 0x16, KEY_PREVIOUSSONG },
+	{ 0x13, KEY_NEXTSONG },
+	{ 0x15, KEY_PAUSE },
+	{ 0x0e, KEY_REWIND },
+	{ 0x0d, KEY_PLAY },
+	{ 0x0b, KEY_STOP },
+	{ 0x07, KEY_FORWARD },
+	{ 0x27, KEY_RECORD },
+	{ 0x26, KEY_TUNER },
+	{ 0x29, KEY_TEXT },
+	{ 0x2a, KEY_MEDIA },
+	{ 0x18, KEY_EPG },
+};
+
+struct ir_scancode_table ir_codes_pinnacle_grey_table = {
+	.scan = ir_codes_pinnacle_grey,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_grey),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_grey_table);
+
+static struct ir_scancode ir_codes_flyvideo[] = {
+	{ 0x0f, KEY_0 },
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x08, KEY_5 },
+	{ 0x09, KEY_6 },
+	{ 0x0b, KEY_7 },
+	{ 0x0c, KEY_8 },
+	{ 0x0d, KEY_9 },
+
+	{ 0x0e, KEY_MODE },	/* Air/Cable */
+	{ 0x11, KEY_VIDEO },	/* Video */
+	{ 0x15, KEY_AUDIO },	/* Audio */
+	{ 0x00, KEY_POWER },	/* Power */
+	{ 0x18, KEY_TUNER },	/* AV Source */
+	{ 0x02, KEY_ZOOM },	/* Fullscreen */
+	{ 0x1a, KEY_LANGUAGE },	/* Stereo */
+	{ 0x1b, KEY_MUTE },	/* Mute */
+	{ 0x14, KEY_VOLUMEUP },	/* Volume + */
+	{ 0x17, KEY_VOLUMEDOWN },/* Volume - */
+	{ 0x12, KEY_CHANNELUP },/* Channel + */
+	{ 0x13, KEY_CHANNELDOWN },/* Channel - */
+	{ 0x06, KEY_AGAIN },	/* Recall */
+	{ 0x10, KEY_ENTER },	/* Enter */
+
+	{ 0x19, KEY_BACK },	/* Rewind  ( <<< ) */
+	{ 0x1f, KEY_FORWARD },	/* Forward ( >>> ) */
+	{ 0x0a, KEY_ANGLE },	/* no label, may be used as the PAUSE button */
+};
+
+struct ir_scancode_table ir_codes_flyvideo_table = {
+	.scan = ir_codes_flyvideo,
+	.size = ARRAY_SIZE(ir_codes_flyvideo),
+};
+EXPORT_SYMBOL_GPL(ir_codes_flyvideo_table);
+
+static struct ir_scancode ir_codes_flydvb[] = {
+	{ 0x01, KEY_ZOOM },		/* Full Screen */
+	{ 0x00, KEY_POWER },		/* Power */
+
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x08, KEY_5 },
+	{ 0x09, KEY_6 },
+	{ 0x0b, KEY_7 },
+	{ 0x0c, KEY_8 },
+	{ 0x0d, KEY_9 },
+	{ 0x06, KEY_AGAIN },		/* Recall */
+	{ 0x0f, KEY_0 },
+	{ 0x10, KEY_MUTE },		/* Mute */
+	{ 0x02, KEY_RADIO },		/* TV/Radio */
+	{ 0x1b, KEY_LANGUAGE },		/* SAP (Second Audio Program) */
+
+	{ 0x14, KEY_VOLUMEUP },		/* VOL+ */
+	{ 0x17, KEY_VOLUMEDOWN },	/* VOL- */
+	{ 0x12, KEY_CHANNELUP },	/* CH+ */
+	{ 0x13, KEY_CHANNELDOWN },	/* CH- */
+	{ 0x1d, KEY_ENTER },		/* Enter */
+
+	{ 0x1a, KEY_MODE },		/* PIP */
+	{ 0x18, KEY_TUNER },		/* Source */
+
+	{ 0x1e, KEY_RECORD },		/* Record/Pause */
+	{ 0x15, KEY_ANGLE },		/* Swap (no label on key) */
+	{ 0x1c, KEY_PAUSE },		/* Timeshift/Pause */
+	{ 0x19, KEY_BACK },		/* Rewind << */
+	{ 0x0a, KEY_PLAYPAUSE },	/* Play/Pause */
+	{ 0x1f, KEY_FORWARD },		/* Forward >> */
+	{ 0x16, KEY_PREVIOUS },		/* Back |<< */
+	{ 0x11, KEY_STOP },		/* Stop */
+	{ 0x0e, KEY_NEXT },		/* End >>| */
+};
+
+struct ir_scancode_table ir_codes_flydvb_table = {
+	.scan = ir_codes_flydvb,
+	.size = ARRAY_SIZE(ir_codes_flydvb),
+};
+EXPORT_SYMBOL_GPL(ir_codes_flydvb_table);
+
+static struct ir_scancode ir_codes_cinergy[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_POWER },
+	{ 0x0b, KEY_PROG1 },		/* app */
+	{ 0x0c, KEY_ZOOM },		/* zoom/fullscreen */
+	{ 0x0d, KEY_CHANNELUP },	/* channel */
+	{ 0x0e, KEY_CHANNELDOWN },	/* channel- */
+	{ 0x0f, KEY_VOLUMEUP },
+	{ 0x10, KEY_VOLUMEDOWN },
+	{ 0x11, KEY_TUNER },		/* AV */
+	{ 0x12, KEY_NUMLOCK },		/* -/-- */
+	{ 0x13, KEY_AUDIO },		/* audio */
+	{ 0x14, KEY_MUTE },
+	{ 0x15, KEY_UP },
+	{ 0x16, KEY_DOWN },
+	{ 0x17, KEY_LEFT },
+	{ 0x18, KEY_RIGHT },
+	{ 0x19, BTN_LEFT, },
+	{ 0x1a, BTN_RIGHT, },
+	{ 0x1b, KEY_WWW },		/* text */
+	{ 0x1c, KEY_REWIND },
+	{ 0x1d, KEY_FORWARD },
+	{ 0x1e, KEY_RECORD },
+	{ 0x1f, KEY_PLAY },
+	{ 0x20, KEY_PREVIOUSSONG },
+	{ 0x21, KEY_NEXTSONG },
+	{ 0x22, KEY_PAUSE },
+	{ 0x23, KEY_STOP },
+};
+
+struct ir_scancode_table ir_codes_cinergy_table = {
+	.scan = ir_codes_cinergy,
+	.size = ARRAY_SIZE(ir_codes_cinergy),
+};
+EXPORT_SYMBOL_GPL(ir_codes_cinergy_table);
 
 /* Alfons Geser <a.geser@cox.net>
  * updates from Job D. R. Borges <jobdrb@ig.com.br> */
-IR_KEYTAB_TYPE ir_codes_eztv[IR_KEYTAB_SIZE] = {
-	[0x12] = KEY_POWER,
-	[0x01] = KEY_TV,	/* DVR */
-	[0x15] = KEY_DVD,	/* DVD */
-	[0x17] = KEY_AUDIO,	/* music */
+static struct ir_scancode ir_codes_eztv[] = {
+	{ 0x12, KEY_POWER },
+	{ 0x01, KEY_TV },	/* DVR */
+	{ 0x15, KEY_DVD },	/* DVD */
+	{ 0x17, KEY_AUDIO },	/* music */
 				/* DVR mode / DVD mode / music mode */
 
-	[0x1b] = KEY_MUTE,	/* mute */
-	[0x02] = KEY_LANGUAGE,	/* MTS/SAP / audio / autoseek */
-	[0x1e] = KEY_SUBTITLE,	/* closed captioning / subtitle / seek */
-	[0x16] = KEY_ZOOM,	/* full screen */
-	[0x1c] = KEY_VIDEO,	/* video source / eject / delall */
-	[0x1d] = KEY_RESTART,	/* playback / angle / del */
-	[0x2f] = KEY_SEARCH,	/* scan / menu / playlist */
-	[0x30] = KEY_CHANNEL,	/* CH surfing / bookmark / memo */
-
-	[0x31] = KEY_HELP,	/* help */
-	[0x32] = KEY_MODE,	/* num/memo */
-	[0x33] = KEY_ESC,	/* cancel */
-
-	[0x0c] = KEY_UP,	/* up */
-	[0x10] = KEY_DOWN,	/* down */
-	[0x08] = KEY_LEFT,	/* left */
-	[0x04] = KEY_RIGHT,	/* right */
-	[0x03] = KEY_SELECT,	/* select */
-
-	[0x1f] = KEY_REWIND,	/* rewind */
-	[0x20] = KEY_PLAYPAUSE,	/* play/pause */
-	[0x29] = KEY_FORWARD,	/* forward */
-	[0x14] = KEY_AGAIN,	/* repeat */
-	[0x2b] = KEY_RECORD,	/* recording */
-	[0x2c] = KEY_STOP,	/* stop */
-	[0x2d] = KEY_PLAY,	/* play */
-	[0x2e] = KEY_CAMERA,	/* snapshot / shuffle */
-
-	[0x00] = KEY_0,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-
-	[0x2a] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x18] = KEY_CHANNELUP,	/* CH.tracking up */
-	[0x19] = KEY_CHANNELDOWN,/* CH.tracking down */
-
-	[0x13] = KEY_ENTER,	/* enter */
-	[0x21] = KEY_DOT,	/* . (decimal dot) */
-};
-EXPORT_SYMBOL_GPL(ir_codes_eztv);
+	{ 0x1b, KEY_MUTE },	/* mute */
+	{ 0x02, KEY_LANGUAGE },	/* MTS/SAP / audio / autoseek */
+	{ 0x1e, KEY_SUBTITLE },	/* closed captioning / subtitle / seek */
+	{ 0x16, KEY_ZOOM },	/* full screen */
+	{ 0x1c, KEY_VIDEO },	/* video source / eject / delall */
+	{ 0x1d, KEY_RESTART },	/* playback / angle / del */
+	{ 0x2f, KEY_SEARCH },	/* scan / menu / playlist */
+	{ 0x30, KEY_CHANNEL },	/* CH surfing / bookmark / memo */
+
+	{ 0x31, KEY_HELP },	/* help */
+	{ 0x32, KEY_MODE },	/* num/memo */
+	{ 0x33, KEY_ESC },	/* cancel */
+
+	{ 0x0c, KEY_UP },	/* up */
+	{ 0x10, KEY_DOWN },	/* down */
+	{ 0x08, KEY_LEFT },	/* left */
+	{ 0x04, KEY_RIGHT },	/* right */
+	{ 0x03, KEY_SELECT },	/* select */
+
+	{ 0x1f, KEY_REWIND },	/* rewind */
+	{ 0x20, KEY_PLAYPAUSE },/* play/pause */
+	{ 0x29, KEY_FORWARD },	/* forward */
+	{ 0x14, KEY_AGAIN },	/* repeat */
+	{ 0x2b, KEY_RECORD },	/* recording */
+	{ 0x2c, KEY_STOP },	/* stop */
+	{ 0x2d, KEY_PLAY },	/* play */
+	{ 0x2e, KEY_CAMERA },	/* snapshot / shuffle */
+
+	{ 0x00, KEY_0 },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+
+	{ 0x2a, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x18, KEY_CHANNELUP },/* CH.tracking up */
+	{ 0x19, KEY_CHANNELDOWN },/* CH.tracking down */
+
+	{ 0x13, KEY_ENTER },	/* enter */
+	{ 0x21, KEY_DOT },	/* . (decimal dot) */
+};
+
+struct ir_scancode_table ir_codes_eztv_table = {
+	.scan = ir_codes_eztv,
+	.size = ARRAY_SIZE(ir_codes_eztv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_eztv_table);
 
 /* Alex Hermann <gaaf@gmx.net> */
-IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE] = {
-	[0x28] = KEY_1,
-	[0x18] = KEY_2,
-	[0x38] = KEY_3,
-	[0x24] = KEY_4,
-	[0x14] = KEY_5,
-	[0x34] = KEY_6,
-	[0x2c] = KEY_7,
-	[0x1c] = KEY_8,
-	[0x3c] = KEY_9,
-	[0x22] = KEY_0,
-
-	[0x20] = KEY_TV,		/* TV/FM */
-	[0x10] = KEY_CD,		/* CD */
-	[0x30] = KEY_TEXT,		/* TELETEXT */
-	[0x00] = KEY_POWER,		/* POWER */
-
-	[0x08] = KEY_VIDEO,		/* VIDEO */
-	[0x04] = KEY_AUDIO,		/* AUDIO */
-	[0x0c] = KEY_ZOOM,		/* FULL SCREEN */
-
-	[0x12] = KEY_SUBTITLE,		/* DISPLAY */
-	[0x32] = KEY_REWIND,		/* LOOP	*/
-	[0x02] = KEY_PRINT,		/* PREVIEW */
-
-	[0x2a] = KEY_SEARCH,		/* AUTOSCAN */
-	[0x1a] = KEY_SLEEP,		/* FREEZE */
-	[0x3a] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x0a] = KEY_MUTE,		/* MUTE */
-
-	[0x26] = KEY_RECORD,		/* RECORD */
-	[0x16] = KEY_PAUSE,		/* PAUSE */
-	[0x36] = KEY_STOP,		/* STOP */
-	[0x06] = KEY_PLAY,		/* PLAY */
-
-	[0x2e] = KEY_RED,		/* RED */
-	[0x21] = KEY_GREEN,		/* GREEN */
-	[0x0e] = KEY_YELLOW,		/* YELLOW */
-	[0x01] = KEY_BLUE,		/* BLUE */
-
-	[0x1e] = KEY_VOLUMEDOWN,	/* VOLUME- */
-	[0x3e] = KEY_VOLUMEUP,		/* VOLUME+ */
-	[0x11] = KEY_CHANNELDOWN,	/* CHANNEL/PAGE- */
-	[0x31] = KEY_CHANNELUP		/* CHANNEL/PAGE+ */
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia);
-
-IR_KEYTAB_TYPE ir_codes_videomate_tv_pvr[IR_KEYTAB_SIZE] = {
-	[0x14] = KEY_MUTE,
-	[0x24] = KEY_ZOOM,
-
-	[0x01] = KEY_DVD,
-	[0x23] = KEY_RADIO,
-	[0x00] = KEY_TV,
-
-	[0x0a] = KEY_REWIND,
-	[0x08] = KEY_PLAYPAUSE,
-	[0x0f] = KEY_FORWARD,
-
-	[0x02] = KEY_PREVIOUS,
-	[0x07] = KEY_STOP,
-	[0x06] = KEY_NEXT,
-
-	[0x0c] = KEY_UP,
-	[0x0e] = KEY_DOWN,
-	[0x0b] = KEY_LEFT,
-	[0x0d] = KEY_RIGHT,
-	[0x11] = KEY_OK,
-
-	[0x03] = KEY_MENU,
-	[0x09] = KEY_SETUP,
-	[0x05] = KEY_VIDEO,
-	[0x22] = KEY_CHANNEL,
-
-	[0x12] = KEY_VOLUMEUP,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x10] = KEY_CHANNELUP,
-	[0x13] = KEY_CHANNELDOWN,
-
-	[0x04] = KEY_RECORD,
-
-	[0x16] = KEY_1,
-	[0x17] = KEY_2,
-	[0x18] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1a] = KEY_5,
-	[0x1b] = KEY_6,
-	[0x1c] = KEY_7,
-	[0x1d] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x1f] = KEY_0,
-
-	[0x20] = KEY_LANGUAGE,
-	[0x21] = KEY_SLEEP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr);
+static struct ir_scancode ir_codes_avermedia[] = {
+	{ 0x28, KEY_1 },
+	{ 0x18, KEY_2 },
+	{ 0x38, KEY_3 },
+	{ 0x24, KEY_4 },
+	{ 0x14, KEY_5 },
+	{ 0x34, KEY_6 },
+	{ 0x2c, KEY_7 },
+	{ 0x1c, KEY_8 },
+	{ 0x3c, KEY_9 },
+	{ 0x22, KEY_0 },
+
+	{ 0x20, KEY_TV },		/* TV/FM */
+	{ 0x10, KEY_CD },		/* CD */
+	{ 0x30, KEY_TEXT },		/* TELETEXT */
+	{ 0x00, KEY_POWER },		/* POWER */
+
+	{ 0x08, KEY_VIDEO },		/* VIDEO */
+	{ 0x04, KEY_AUDIO },		/* AUDIO */
+	{ 0x0c, KEY_ZOOM },		/* FULL SCREEN */
+
+	{ 0x12, KEY_SUBTITLE },		/* DISPLAY */
+	{ 0x32, KEY_REWIND },		/* LOOP	*/
+	{ 0x02, KEY_PRINT },		/* PREVIEW */
+
+	{ 0x2a, KEY_SEARCH },		/* AUTOSCAN */
+	{ 0x1a, KEY_SLEEP },		/* FREEZE */
+	{ 0x3a, KEY_CAMERA },		/* SNAPSHOT */
+	{ 0x0a, KEY_MUTE },		/* MUTE */
+
+	{ 0x26, KEY_RECORD },		/* RECORD */
+	{ 0x16, KEY_PAUSE },		/* PAUSE */
+	{ 0x36, KEY_STOP },		/* STOP */
+	{ 0x06, KEY_PLAY },		/* PLAY */
+
+	{ 0x2e, KEY_RED },		/* RED */
+	{ 0x21, KEY_GREEN },		/* GREEN */
+	{ 0x0e, KEY_YELLOW },		/* YELLOW */
+	{ 0x01, KEY_BLUE },		/* BLUE */
+
+	{ 0x1e, KEY_VOLUMEDOWN },	/* VOLUME- */
+	{ 0x3e, KEY_VOLUMEUP },		/* VOLUME+ */
+	{ 0x11, KEY_CHANNELDOWN },	/* CHANNEL/PAGE- */
+	{ 0x31, KEY_CHANNELUP }		/* CHANNEL/PAGE+ */
+};
+
+struct ir_scancode_table ir_codes_avermedia_table = {
+	.scan = ir_codes_avermedia,
+	.size = ARRAY_SIZE(ir_codes_avermedia),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_table);
+
+static struct ir_scancode ir_codes_videomate_tv_pvr[] = {
+	{ 0x14, KEY_MUTE },
+	{ 0x24, KEY_ZOOM },
+
+	{ 0x01, KEY_DVD },
+	{ 0x23, KEY_RADIO },
+	{ 0x00, KEY_TV },
+
+	{ 0x0a, KEY_REWIND },
+	{ 0x08, KEY_PLAYPAUSE },
+	{ 0x0f, KEY_FORWARD },
+
+	{ 0x02, KEY_PREVIOUS },
+	{ 0x07, KEY_STOP },
+	{ 0x06, KEY_NEXT },
+
+	{ 0x0c, KEY_UP },
+	{ 0x0e, KEY_DOWN },
+	{ 0x0b, KEY_LEFT },
+	{ 0x0d, KEY_RIGHT },
+	{ 0x11, KEY_OK },
+
+	{ 0x03, KEY_MENU },
+	{ 0x09, KEY_SETUP },
+	{ 0x05, KEY_VIDEO },
+	{ 0x22, KEY_CHANNEL },
+
+	{ 0x12, KEY_VOLUMEUP },
+	{ 0x15, KEY_VOLUMEDOWN },
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x13, KEY_CHANNELDOWN },
+
+	{ 0x04, KEY_RECORD },
+
+	{ 0x16, KEY_1 },
+	{ 0x17, KEY_2 },
+	{ 0x18, KEY_3 },
+	{ 0x19, KEY_4 },
+	{ 0x1a, KEY_5 },
+	{ 0x1b, KEY_6 },
+	{ 0x1c, KEY_7 },
+	{ 0x1d, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x1f, KEY_0 },
+
+	{ 0x20, KEY_LANGUAGE },
+	{ 0x21, KEY_SLEEP },
+};
+
+struct ir_scancode_table ir_codes_videomate_tv_pvr_table = {
+	.scan = ir_codes_videomate_tv_pvr,
+	.size = ARRAY_SIZE(ir_codes_videomate_tv_pvr),
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr_table);
 
 /* Michael Tokarev <mjt@tls.msk.ru>
    http://www.corpit.ru/mjt/beholdTV/remote_control.jpg
@@ -1196,14 +1321,14 @@ EXPORT_SYMBOL_GPL(ir_codes_videomate_tv_pvr);
    the button labels (several variants when appropriate)
    helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_manli[] = {
 
 	/*  0x1c            0x12  *
 	 * FUNCTION         POWER *
 	 *   FM              (|)  *
 	 *                        */
-	[0x1c] = KEY_RADIO,	/*XXX*/
-	[0x12] = KEY_POWER,
+	{ 0x1c, KEY_RADIO },	/*XXX*/
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  *
 	 *   1       2       3    *
@@ -1214,29 +1339,29 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  *
 	 *   7       8       9    *
 	 *                        */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
 
 	/*  0x0a    0x00    0x17  *
 	 * RECALL    0      +100  *
 	 *                  PLUS  *
 	 *                        */
-	[0x0a] = KEY_AGAIN,	/*XXX KEY_REWIND? */
-	[0x00] = KEY_0,
-	[0x17] = KEY_DIGITS,	/*XXX*/
+	{ 0x0a, KEY_AGAIN },	/*XXX KEY_REWIND? */
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_DIGITS },	/*XXX*/
 
 	/*  0x14            0x10  *
 	 *  MENU            INFO  *
 	 *  OSD                   */
-	[0x14] = KEY_MENU,
-	[0x10] = KEY_INFO,
+	{ 0x14, KEY_MENU },
+	{ 0x10, KEY_INFO },
 
 	/*          0x0b          *
 	 *           Up           *
@@ -1247,18 +1372,18 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *         0x015          *
 	 *         Down           *
 	 *                        */
-	[0x0b] = KEY_UP,
-	[0x18] = KEY_LEFT,
-	[0x16] = KEY_OK,	/*XXX KEY_SELECT? KEY_ENTER? */
-	[0x0c] = KEY_RIGHT,
-	[0x15] = KEY_DOWN,
+	{ 0x0b, KEY_UP },
+	{ 0x18, KEY_LEFT },
+	{ 0x16, KEY_OK },	/*XXX KEY_SELECT? KEY_ENTER? */
+	{ 0x0c, KEY_RIGHT },
+	{ 0x15, KEY_DOWN },
 
 	/*  0x11            0x0d  *
 	 *  TV/AV           MODE  *
 	 *  SOURCE         STEREO *
 	 *                        */
-	[0x11] = KEY_TV,	/*XXX*/
-	[0x0d] = KEY_MODE,	/*XXX there's no KEY_STEREO	*/
+	{ 0x11, KEY_TV },	/*XXX*/
+	{ 0x0d, KEY_MODE },	/*XXX there's no KEY_STEREO	*/
 
 	/*  0x0f    0x1b    0x1a  *
 	 *  AUDIO   Vol+    Chan+ *
@@ -1267,877 +1392,967 @@ IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE] = {
 	 *  0x0e    0x1f    0x1e  *
 	 *  SLEEP   Vol-    Chan- *
 	 *                        */
-	[0x0f] = KEY_AUDIO,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1a] = KEY_CHANNELUP,
-	[0x0e] = KEY_TIME,
-	[0x1f] = KEY_VOLUMEDOWN,
-	[0x1e] = KEY_CHANNELDOWN,
+	{ 0x0f, KEY_AUDIO },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x0e, KEY_TIME },
+	{ 0x1f, KEY_VOLUMEDOWN },
+	{ 0x1e, KEY_CHANNELDOWN },
 
 	/*         0x13     0x19  *
 	 *         MUTE   SNAPSHOT*
 	 *                        */
-	[0x13] = KEY_MUTE,
-	[0x19] = KEY_CAMERA,
+	{ 0x13, KEY_MUTE },
+	{ 0x19, KEY_CAMERA },
 
 	/* 0x1d unused ? */
 };
-EXPORT_SYMBOL_GPL(ir_codes_manli);
+
+struct ir_scancode_table ir_codes_manli_table = {
+	.scan = ir_codes_manli,
+	.size = ARRAY_SIZE(ir_codes_manli),
+};
+EXPORT_SYMBOL_GPL(ir_codes_manli_table);
 
 /* Mike Baikov <mike@baikov.com> */
-IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE] = {
-
-	[0x11] = KEY_POWER,
-	[0x35] = KEY_TV,
-	[0x1b] = KEY_0,
-	[0x29] = KEY_1,
-	[0x19] = KEY_2,
-	[0x39] = KEY_3,
-	[0x1f] = KEY_4,
-	[0x2c] = KEY_5,
-	[0x21] = KEY_6,
-	[0x24] = KEY_7,
-	[0x18] = KEY_8,
-	[0x2b] = KEY_9,
-	[0x3b] = KEY_AGAIN,	/* LOOP */
-	[0x06] = KEY_AUDIO,
-	[0x31] = KEY_PRINT,	/* PREVIEW */
-	[0x3e] = KEY_VIDEO,
-	[0x10] = KEY_CHANNELUP,
-	[0x20] = KEY_CHANNELDOWN,
-	[0x0c] = KEY_VOLUMEDOWN,
-	[0x28] = KEY_VOLUMEUP,
-	[0x08] = KEY_MUTE,
-	[0x26] = KEY_SEARCH,	/* SCAN */
-	[0x3f] = KEY_CAMERA,	/* SNAPSHOT */
-	[0x12] = KEY_RECORD,
-	[0x32] = KEY_STOP,
-	[0x3c] = KEY_PLAY,
-	[0x1d] = KEY_REWIND,
-	[0x2d] = KEY_PAUSE,
-	[0x0d] = KEY_FORWARD,
-	[0x05] = KEY_ZOOM,	/*FULL*/
-
-	[0x2a] = KEY_F21,	/* LIVE TIMESHIFT */
-	[0x0e] = KEY_F22,	/* MIN TIMESHIFT */
-	[0x1e] = KEY_TIME,	/* TIMESHIFT */
-	[0x38] = KEY_F24,	/* NORMAL TIMESHIFT */
-};
-EXPORT_SYMBOL_GPL(ir_codes_gotview7135);
-
-IR_KEYTAB_TYPE ir_codes_purpletv[IR_KEYTAB_SIZE] = {
-	[0x03] = KEY_POWER,
-	[0x6f] = KEY_MUTE,
-	[0x10] = KEY_BACKSPACE,	/* Recall */
-
-	[0x11] = KEY_0,
-	[0x04] = KEY_1,
-	[0x05] = KEY_2,
-	[0x06] = KEY_3,
-	[0x08] = KEY_4,
-	[0x09] = KEY_5,
-	[0x0a] = KEY_6,
-	[0x0c] = KEY_7,
-	[0x0d] = KEY_8,
-	[0x0e] = KEY_9,
-	[0x12] = KEY_DOT,	/* 100+ */
-
-	[0x07] = KEY_VOLUMEUP,
-	[0x0b] = KEY_VOLUMEDOWN,
-	[0x1a] = KEY_KPPLUS,
-	[0x18] = KEY_KPMINUS,
-	[0x15] = KEY_UP,
-	[0x1d] = KEY_DOWN,
-	[0x0f] = KEY_CHANNELUP,
-	[0x13] = KEY_CHANNELDOWN,
-	[0x48] = KEY_ZOOM,
-
-	[0x1b] = KEY_VIDEO,	/* Video source */
-	[0x1f] = KEY_CAMERA,	/* Snapshot */
-	[0x49] = KEY_LANGUAGE,	/* MTS Select */
-	[0x19] = KEY_SEARCH,	/* Auto Scan */
-
-	[0x4b] = KEY_RECORD,
-	[0x46] = KEY_PLAY,
-	[0x45] = KEY_PAUSE,	/* Pause */
-	[0x44] = KEY_STOP,
-	[0x43] = KEY_TIME,	/* Time Shift */
-	[0x17] = KEY_CHANNEL,	/* SURF CH */
-	[0x40] = KEY_FORWARD,	/* Forward ? */
-	[0x42] = KEY_REWIND,	/* Backward ? */
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_purpletv);
+static struct ir_scancode ir_codes_gotview7135[] = {
+
+	{ 0x11, KEY_POWER },
+	{ 0x35, KEY_TV },
+	{ 0x1b, KEY_0 },
+	{ 0x29, KEY_1 },
+	{ 0x19, KEY_2 },
+	{ 0x39, KEY_3 },
+	{ 0x1f, KEY_4 },
+	{ 0x2c, KEY_5 },
+	{ 0x21, KEY_6 },
+	{ 0x24, KEY_7 },
+	{ 0x18, KEY_8 },
+	{ 0x2b, KEY_9 },
+	{ 0x3b, KEY_AGAIN },	/* LOOP */
+	{ 0x06, KEY_AUDIO },
+	{ 0x31, KEY_PRINT },	/* PREVIEW */
+	{ 0x3e, KEY_VIDEO },
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x20, KEY_CHANNELDOWN },
+	{ 0x0c, KEY_VOLUMEDOWN },
+	{ 0x28, KEY_VOLUMEUP },
+	{ 0x08, KEY_MUTE },
+	{ 0x26, KEY_SEARCH },	/* SCAN */
+	{ 0x3f, KEY_CAMERA },	/* SNAPSHOT */
+	{ 0x12, KEY_RECORD },
+	{ 0x32, KEY_STOP },
+	{ 0x3c, KEY_PLAY },
+	{ 0x1d, KEY_REWIND },
+	{ 0x2d, KEY_PAUSE },
+	{ 0x0d, KEY_FORWARD },
+	{ 0x05, KEY_ZOOM },	/*FULL*/
+
+	{ 0x2a, KEY_F21 },	/* LIVE TIMESHIFT */
+	{ 0x0e, KEY_F22 },	/* MIN TIMESHIFT */
+	{ 0x1e, KEY_TIME },	/* TIMESHIFT */
+	{ 0x38, KEY_F24 },	/* NORMAL TIMESHIFT */
+};
+
+struct ir_scancode_table ir_codes_gotview7135_table = {
+	.scan = ir_codes_gotview7135,
+	.size = ARRAY_SIZE(ir_codes_gotview7135),
+};
+EXPORT_SYMBOL_GPL(ir_codes_gotview7135_table);
+
+static struct ir_scancode ir_codes_purpletv[] = {
+	{ 0x03, KEY_POWER },
+	{ 0x6f, KEY_MUTE },
+	{ 0x10, KEY_BACKSPACE },	/* Recall */
+
+	{ 0x11, KEY_0 },
+	{ 0x04, KEY_1 },
+	{ 0x05, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x08, KEY_4 },
+	{ 0x09, KEY_5 },
+	{ 0x0a, KEY_6 },
+	{ 0x0c, KEY_7 },
+	{ 0x0d, KEY_8 },
+	{ 0x0e, KEY_9 },
+	{ 0x12, KEY_DOT },	/* 100+ */
+
+	{ 0x07, KEY_VOLUMEUP },
+	{ 0x0b, KEY_VOLUMEDOWN },
+	{ 0x1a, KEY_KPPLUS },
+	{ 0x18, KEY_KPMINUS },
+	{ 0x15, KEY_UP },
+	{ 0x1d, KEY_DOWN },
+	{ 0x0f, KEY_CHANNELUP },
+	{ 0x13, KEY_CHANNELDOWN },
+	{ 0x48, KEY_ZOOM },
+
+	{ 0x1b, KEY_VIDEO },	/* Video source */
+	{ 0x1f, KEY_CAMERA },	/* Snapshot */
+	{ 0x49, KEY_LANGUAGE },	/* MTS Select */
+	{ 0x19, KEY_SEARCH },	/* Auto Scan */
+
+	{ 0x4b, KEY_RECORD },
+	{ 0x46, KEY_PLAY },
+	{ 0x45, KEY_PAUSE },	/* Pause */
+	{ 0x44, KEY_STOP },
+	{ 0x43, KEY_TIME },	/* Time Shift */
+	{ 0x17, KEY_CHANNEL },	/* SURF CH */
+	{ 0x40, KEY_FORWARD },	/* Forward ? */
+	{ 0x42, KEY_REWIND },	/* Backward ? */
+
+};
+
+struct ir_scancode_table ir_codes_purpletv_table = {
+	.scan = ir_codes_purpletv,
+	.size = ARRAY_SIZE(ir_codes_purpletv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_purpletv_table);
 
 /* Mapping for the 28 key remote control as seen at
    http://www.sednacomputer.com/photo/cardbus-tv.jpg
    Pavel Mihaylov <bin@bash.info>
    Also for the remote bundled with Kozumi KTV-01C card */
-IR_KEYTAB_TYPE ir_codes_pctv_sedna[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_AGAIN,	/* Recall */
-	[0x0b] = KEY_CHANNELUP,
-	[0x0c] = KEY_VOLUMEUP,
-	[0x0d] = KEY_MODE,	/* Stereo */
-	[0x0e] = KEY_STOP,
-	[0x0f] = KEY_PREVIOUSSONG,
-	[0x10] = KEY_ZOOM,
-	[0x11] = KEY_TUNER,	/* Source */
-	[0x12] = KEY_POWER,
-	[0x13] = KEY_MUTE,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x19] = KEY_CAMERA,	/* Snapshot */
-	[0x1a] = KEY_NEXTSONG,
-	[0x1b] = KEY_TIME,	/* Time Shift */
-	[0x1c] = KEY_RADIO,	/* FM Radio */
-	[0x1d] = KEY_RECORD,
-	[0x1e] = KEY_PAUSE,
+static struct ir_scancode ir_codes_pctv_sedna[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_AGAIN },	/* Recall */
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x0c, KEY_VOLUMEUP },
+	{ 0x0d, KEY_MODE },	/* Stereo */
+	{ 0x0e, KEY_STOP },
+	{ 0x0f, KEY_PREVIOUSSONG },
+	{ 0x10, KEY_ZOOM },
+	{ 0x11, KEY_TUNER },	/* Source */
+	{ 0x12, KEY_POWER },
+	{ 0x13, KEY_MUTE },
+	{ 0x15, KEY_CHANNELDOWN },
+	{ 0x18, KEY_VOLUMEDOWN },
+	{ 0x19, KEY_CAMERA },	/* Snapshot */
+	{ 0x1a, KEY_NEXTSONG },
+	{ 0x1b, KEY_TIME },	/* Time Shift */
+	{ 0x1c, KEY_RADIO },	/* FM Radio */
+	{ 0x1d, KEY_RECORD },
+	{ 0x1e, KEY_PAUSE },
 	/* additional codes for Kozumi's remote */
-	[0x14] = KEY_INFO,	/* OSD */
-	[0x16] = KEY_OK,	/* OK */
-	[0x17] = KEY_DIGITS,	/* Plus */
-	[0x1f] = KEY_PLAY,	/* Play */
+	{ 0x14, KEY_INFO },	/* OSD */
+	{ 0x16, KEY_OK },	/* OK */
+	{ 0x17, KEY_DIGITS },	/* Plus */
+	{ 0x1f, KEY_PLAY },	/* Play */
 };
-EXPORT_SYMBOL_GPL(ir_codes_pctv_sedna);
+
+struct ir_scancode_table ir_codes_pctv_sedna_table = {
+	.scan = ir_codes_pctv_sedna,
+	.size = ARRAY_SIZE(ir_codes_pctv_sedna),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pctv_sedna_table);
 
 /* Mark Phalan <phalanm@o2.ie> */
-IR_KEYTAB_TYPE ir_codes_pv951[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x12] = KEY_POWER,
-	[0x10] = KEY_MUTE,
-	[0x1f] = KEY_VOLUMEDOWN,
-	[0x1b] = KEY_VOLUMEUP,
-	[0x1a] = KEY_CHANNELUP,
-	[0x1e] = KEY_CHANNELDOWN,
-	[0x0e] = KEY_PAGEUP,
-	[0x1d] = KEY_PAGEDOWN,
-	[0x13] = KEY_SOUND,
-
-	[0x18] = KEY_KPPLUSMINUS,	/* CH +/- */
-	[0x16] = KEY_SUBTITLE,		/* CC */
-	[0x0d] = KEY_TEXT,		/* TTX */
-	[0x0b] = KEY_TV,		/* AIR/CBL */
-	[0x11] = KEY_PC,		/* PC/TV */
-	[0x17] = KEY_OK,		/* CH RTN */
-	[0x19] = KEY_MODE,		/* FUNC */
-	[0x0c] = KEY_SEARCH,		/* AUTOSCAN */
+static struct ir_scancode ir_codes_pv951[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x12, KEY_POWER },
+	{ 0x10, KEY_MUTE },
+	{ 0x1f, KEY_VOLUMEDOWN },
+	{ 0x1b, KEY_VOLUMEUP },
+	{ 0x1a, KEY_CHANNELUP },
+	{ 0x1e, KEY_CHANNELDOWN },
+	{ 0x0e, KEY_PAGEUP },
+	{ 0x1d, KEY_PAGEDOWN },
+	{ 0x13, KEY_SOUND },
+
+	{ 0x18, KEY_KPPLUSMINUS },	/* CH +/- */
+	{ 0x16, KEY_SUBTITLE },		/* CC */
+	{ 0x0d, KEY_TEXT },		/* TTX */
+	{ 0x0b, KEY_TV },		/* AIR/CBL */
+	{ 0x11, KEY_PC },		/* PC/TV */
+	{ 0x17, KEY_OK },		/* CH RTN */
+	{ 0x19, KEY_MODE },		/* FUNC */
+	{ 0x0c, KEY_SEARCH },		/* AUTOSCAN */
 
 	/* Not sure what to do with these ones! */
-	[0x0f] = KEY_SELECT,		/* SOURCE */
-	[0x0a] = KEY_KPPLUS,		/* +100 */
-	[0x14] = KEY_EQUAL,		/* SYNC */
-	[0x1c] = KEY_MEDIA,		/* PC/TV */
+	{ 0x0f, KEY_SELECT },		/* SOURCE */
+	{ 0x0a, KEY_KPPLUS },		/* +100 */
+	{ 0x14, KEY_EQUAL },		/* SYNC */
+	{ 0x1c, KEY_MEDIA },		/* PC/TV */
+};
+
+struct ir_scancode_table ir_codes_pv951_table = {
+	.scan = ir_codes_pv951,
+	.size = ARRAY_SIZE(ir_codes_pv951),
 };
-EXPORT_SYMBOL_GPL(ir_codes_pv951);
+EXPORT_SYMBOL_GPL(ir_codes_pv951_table);
 
 /* generic RC5 keytable                                          */
 /* see http://users.pandora.be/nenya/electronics/rc5/codes00.htm */
 /* used by old (black) Hauppauge remotes                         */
-IR_KEYTAB_TYPE ir_codes_rc5_tv[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_rc5_tv[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0b] = KEY_CHANNEL,		/* channel / program (japan: 11) */
-	[0x0c] = KEY_POWER,		/* standby */
-	[0x0d] = KEY_MUTE,		/* mute / demute */
-	[0x0f] = KEY_TV,		/* display */
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x12] = KEY_BRIGHTNESSUP,
-	[0x13] = KEY_BRIGHTNESSDOWN,
-	[0x1e] = KEY_SEARCH,		/* search + */
-	[0x20] = KEY_CHANNELUP,		/* channel / program + */
-	[0x21] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x22] = KEY_CHANNEL,		/* alt / channel */
-	[0x23] = KEY_LANGUAGE,		/* 1st / 2nd language */
-	[0x26] = KEY_SLEEP,		/* sleeptimer */
-	[0x2e] = KEY_MENU,		/* 2nd controls (USA: menu) */
-	[0x30] = KEY_PAUSE,
-	[0x32] = KEY_REWIND,
-	[0x33] = KEY_GOTO,
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,		/* recording */
-	[0x3c] = KEY_TEXT,		/* teletext submode (Japan: 12) */
-	[0x3d] = KEY_SUSPEND,		/* system standby */
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_rc5_tv);
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0b, KEY_CHANNEL },		/* channel / program (japan: 11) */
+	{ 0x0c, KEY_POWER },		/* standby */
+	{ 0x0d, KEY_MUTE },		/* mute / demute */
+	{ 0x0f, KEY_TV },		/* display */
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x12, KEY_BRIGHTNESSUP },
+	{ 0x13, KEY_BRIGHTNESSDOWN },
+	{ 0x1e, KEY_SEARCH },		/* search + */
+	{ 0x20, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x22, KEY_CHANNEL },		/* alt / channel */
+	{ 0x23, KEY_LANGUAGE },		/* 1st / 2nd language */
+	{ 0x26, KEY_SLEEP },		/* sleeptimer */
+	{ 0x2e, KEY_MENU },		/* 2nd controls (USA: menu) */
+	{ 0x30, KEY_PAUSE },
+	{ 0x32, KEY_REWIND },
+	{ 0x33, KEY_GOTO },
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },		/* recording */
+	{ 0x3c, KEY_TEXT },		/* teletext submode (Japan: 12) */
+	{ 0x3d, KEY_SUSPEND },		/* system standby */
+
+};
+
+struct ir_scancode_table ir_codes_rc5_tv_table = {
+	.scan = ir_codes_rc5_tv,
+	.size = ARRAY_SIZE(ir_codes_rc5_tv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_rc5_tv_table);
 
 /* Table for Leadtek Winfast Remote Controls - used by both bttv and cx88 */
-IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_winfast[] = {
 	/* Keys 0 to 9 */
-	[0x12] = KEY_0,
-	[0x05] = KEY_1,
-	[0x06] = KEY_2,
-	[0x07] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0a] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x0d] = KEY_7,
-	[0x0e] = KEY_8,
-	[0x0f] = KEY_9,
-
-	[0x00] = KEY_POWER,
-	[0x1b] = KEY_AUDIO,		/* Audio Source */
-	[0x02] = KEY_TUNER,		/* TV/FM, not on Y0400052 */
-	[0x1e] = KEY_VIDEO,		/* Video Source */
-	[0x16] = KEY_INFO,		/* Display information */
-	[0x04] = KEY_VOLUMEUP,
-	[0x08] = KEY_VOLUMEDOWN,
-	[0x0c] = KEY_CHANNELUP,
-	[0x10] = KEY_CHANNELDOWN,
-	[0x03] = KEY_ZOOM,		/* fullscreen */
-	[0x1f] = KEY_TEXT,		/* closed caption/teletext */
-	[0x20] = KEY_SLEEP,
-	[0x29] = KEY_CLEAR,		/* boss key */
-	[0x14] = KEY_MUTE,
-	[0x2b] = KEY_RED,
-	[0x2c] = KEY_GREEN,
-	[0x2d] = KEY_YELLOW,
-	[0x2e] = KEY_BLUE,
-	[0x18] = KEY_KPPLUS,		/* fine tune + , not on Y040052 */
-	[0x19] = KEY_KPMINUS,		/* fine tune - , not on Y040052 */
-	[0x2a] = KEY_MEDIA,		/* PIP (Picture in picture */
-	[0x21] = KEY_DOT,
-	[0x13] = KEY_ENTER,
-	[0x11] = KEY_LAST,		/* Recall (last channel */
-	[0x22] = KEY_PREVIOUS,
-	[0x23] = KEY_PLAYPAUSE,
-	[0x24] = KEY_NEXT,
-	[0x25] = KEY_TIME,		/* Time Shifting */
-	[0x26] = KEY_STOP,
-	[0x27] = KEY_RECORD,
-	[0x28] = KEY_SAVE,		/* Screenshot */
-	[0x2f] = KEY_MENU,
-	[0x30] = KEY_CANCEL,
-	[0x31] = KEY_CHANNEL,		/* Channel Surf */
-	[0x32] = KEY_SUBTITLE,
-	[0x33] = KEY_LANGUAGE,
-	[0x34] = KEY_REWIND,
-	[0x35] = KEY_FASTFORWARD,
-	[0x36] = KEY_TV,
-	[0x37] = KEY_RADIO,		/* FM */
-	[0x38] = KEY_DVD,
-
-	[0x3e] = KEY_F21,		/* MCE +VOL, on Y04G0033 */
-	[0x3a] = KEY_F22,		/* MCE -VOL, on Y04G0033 */
-	[0x3b] = KEY_F23,		/* MCE +CH,  on Y04G0033 */
-	[0x3f] = KEY_F24		/* MCE -CH,  on Y04G0033 */
-};
-EXPORT_SYMBOL_GPL(ir_codes_winfast);
-
-IR_KEYTAB_TYPE ir_codes_pinnacle_color[IR_KEYTAB_SIZE] = {
-	[0x59] = KEY_MUTE,
-	[0x4a] = KEY_POWER,
-
-	[0x18] = KEY_TEXT,
-	[0x26] = KEY_TV,
-	[0x3d] = KEY_PRINT,
-
-	[0x48] = KEY_RED,
-	[0x04] = KEY_GREEN,
-	[0x11] = KEY_YELLOW,
-	[0x00] = KEY_BLUE,
-
-	[0x2d] = KEY_VOLUMEUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-
-	[0x49] = KEY_MENU,
-
-	[0x16] = KEY_CHANNELUP,
-	[0x17] = KEY_CHANNELDOWN,
-
-	[0x20] = KEY_UP,
-	[0x21] = KEY_DOWN,
-	[0x22] = KEY_LEFT,
-	[0x23] = KEY_RIGHT,
-	[0x0d] = KEY_SELECT,
-
-	[0x08] = KEY_BACK,
-	[0x07] = KEY_REFRESH,
-
-	[0x2f] = KEY_ZOOM,
-	[0x29] = KEY_RECORD,
-
-	[0x4b] = KEY_PAUSE,
-	[0x4d] = KEY_REWIND,
-	[0x2e] = KEY_PLAY,
-	[0x4e] = KEY_FORWARD,
-	[0x53] = KEY_PREVIOUS,
-	[0x4c] = KEY_STOP,
-	[0x54] = KEY_NEXT,
-
-	[0x69] = KEY_0,
-	[0x6a] = KEY_1,
-	[0x6b] = KEY_2,
-	[0x6c] = KEY_3,
-	[0x6d] = KEY_4,
-	[0x6e] = KEY_5,
-	[0x6f] = KEY_6,
-	[0x70] = KEY_7,
-	[0x71] = KEY_8,
-	[0x72] = KEY_9,
-
-	[0x74] = KEY_CHANNEL,
-	[0x0a] = KEY_BACKSPACE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_color);
+	{ 0x12, KEY_0 },
+	{ 0x05, KEY_1 },
+	{ 0x06, KEY_2 },
+	{ 0x07, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0a, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x0d, KEY_7 },
+	{ 0x0e, KEY_8 },
+	{ 0x0f, KEY_9 },
+
+	{ 0x00, KEY_POWER },
+	{ 0x1b, KEY_AUDIO },		/* Audio Source */
+	{ 0x02, KEY_TUNER },		/* TV/FM, not on Y0400052 */
+	{ 0x1e, KEY_VIDEO },		/* Video Source */
+	{ 0x16, KEY_INFO },		/* Display information */
+	{ 0x04, KEY_VOLUMEUP },
+	{ 0x08, KEY_VOLUMEDOWN },
+	{ 0x0c, KEY_CHANNELUP },
+	{ 0x10, KEY_CHANNELDOWN },
+	{ 0x03, KEY_ZOOM },		/* fullscreen */
+	{ 0x1f, KEY_TEXT },		/* closed caption/teletext */
+	{ 0x20, KEY_SLEEP },
+	{ 0x29, KEY_CLEAR },		/* boss key */
+	{ 0x14, KEY_MUTE },
+	{ 0x2b, KEY_RED },
+	{ 0x2c, KEY_GREEN },
+	{ 0x2d, KEY_YELLOW },
+	{ 0x2e, KEY_BLUE },
+	{ 0x18, KEY_KPPLUS },		/* fine tune + , not on Y040052 */
+	{ 0x19, KEY_KPMINUS },		/* fine tune - , not on Y040052 */
+	{ 0x2a, KEY_MEDIA },		/* PIP (Picture in picture */
+	{ 0x21, KEY_DOT },
+	{ 0x13, KEY_ENTER },
+	{ 0x11, KEY_LAST },		/* Recall (last channel */
+	{ 0x22, KEY_PREVIOUS },
+	{ 0x23, KEY_PLAYPAUSE },
+	{ 0x24, KEY_NEXT },
+	{ 0x25, KEY_TIME },		/* Time Shifting */
+	{ 0x26, KEY_STOP },
+	{ 0x27, KEY_RECORD },
+	{ 0x28, KEY_SAVE },		/* Screenshot */
+	{ 0x2f, KEY_MENU },
+	{ 0x30, KEY_CANCEL },
+	{ 0x31, KEY_CHANNEL },		/* Channel Surf */
+	{ 0x32, KEY_SUBTITLE },
+	{ 0x33, KEY_LANGUAGE },
+	{ 0x34, KEY_REWIND },
+	{ 0x35, KEY_FASTFORWARD },
+	{ 0x36, KEY_TV },
+	{ 0x37, KEY_RADIO },		/* FM */
+	{ 0x38, KEY_DVD },
+
+	{ 0x3e, KEY_F21 },		/* MCE +VOL, on Y04G0033 */
+	{ 0x3a, KEY_F22 },		/* MCE -VOL, on Y04G0033 */
+	{ 0x3b, KEY_F23 },		/* MCE +CH,  on Y04G0033 */
+	{ 0x3f, KEY_F24 }		/* MCE -CH,  on Y04G0033 */
+};
+
+struct ir_scancode_table ir_codes_winfast_table = {
+	.scan = ir_codes_winfast,
+	.size = ARRAY_SIZE(ir_codes_winfast),
+};
+EXPORT_SYMBOL_GPL(ir_codes_winfast_table);
+
+static struct ir_scancode ir_codes_pinnacle_color[] = {
+	{ 0x59, KEY_MUTE },
+	{ 0x4a, KEY_POWER },
+
+	{ 0x18, KEY_TEXT },
+	{ 0x26, KEY_TV },
+	{ 0x3d, KEY_PRINT },
+
+	{ 0x48, KEY_RED },
+	{ 0x04, KEY_GREEN },
+	{ 0x11, KEY_YELLOW },
+	{ 0x00, KEY_BLUE },
+
+	{ 0x2d, KEY_VOLUMEUP },
+	{ 0x1e, KEY_VOLUMEDOWN },
+
+	{ 0x49, KEY_MENU },
+
+	{ 0x16, KEY_CHANNELUP },
+	{ 0x17, KEY_CHANNELDOWN },
+
+	{ 0x20, KEY_UP },
+	{ 0x21, KEY_DOWN },
+	{ 0x22, KEY_LEFT },
+	{ 0x23, KEY_RIGHT },
+	{ 0x0d, KEY_SELECT },
+
+	{ 0x08, KEY_BACK },
+	{ 0x07, KEY_REFRESH },
+
+	{ 0x2f, KEY_ZOOM },
+	{ 0x29, KEY_RECORD },
+
+	{ 0x4b, KEY_PAUSE },
+	{ 0x4d, KEY_REWIND },
+	{ 0x2e, KEY_PLAY },
+	{ 0x4e, KEY_FORWARD },
+	{ 0x53, KEY_PREVIOUS },
+	{ 0x4c, KEY_STOP },
+	{ 0x54, KEY_NEXT },
+
+	{ 0x69, KEY_0 },
+	{ 0x6a, KEY_1 },
+	{ 0x6b, KEY_2 },
+	{ 0x6c, KEY_3 },
+	{ 0x6d, KEY_4 },
+	{ 0x6e, KEY_5 },
+	{ 0x6f, KEY_6 },
+	{ 0x70, KEY_7 },
+	{ 0x71, KEY_8 },
+	{ 0x72, KEY_9 },
+
+	{ 0x74, KEY_CHANNEL },
+	{ 0x0a, KEY_BACKSPACE },
+};
+
+struct ir_scancode_table ir_codes_pinnacle_color_table = {
+	.scan = ir_codes_pinnacle_color,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_color),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_color_table);
 
 /* Hauppauge: the newer, gray remotes (seems there are multiple
  * slightly different versions), shipped with cx88+ivtv cards.
  * almost rc5 coding, but some non-standard keys */
-IR_KEYTAB_TYPE ir_codes_hauppauge_new[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_hauppauge_new[] = {
 	/* Keys 0 to 9 */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-
-	[0x0a] = KEY_TEXT,		/* keypad asterisk as well */
-	[0x0b] = KEY_RED,		/* red button */
-	[0x0c] = KEY_RADIO,
-	[0x0d] = KEY_MENU,
-	[0x0e] = KEY_SUBTITLE,		/* also the # key */
-	[0x0f] = KEY_MUTE,
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x12] = KEY_PREVIOUS,		/* previous channel */
-	[0x14] = KEY_UP,
-	[0x15] = KEY_DOWN,
-	[0x16] = KEY_LEFT,
-	[0x17] = KEY_RIGHT,
-	[0x18] = KEY_VIDEO,		/* Videos */
-	[0x19] = KEY_AUDIO,		/* Music */
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+
+	{ 0x0a, KEY_TEXT },		/* keypad asterisk as well */
+	{ 0x0b, KEY_RED },		/* red button */
+	{ 0x0c, KEY_RADIO },
+	{ 0x0d, KEY_MENU },
+	{ 0x0e, KEY_SUBTITLE },		/* also the # key */
+	{ 0x0f, KEY_MUTE },
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x12, KEY_PREVIOUS },		/* previous channel */
+	{ 0x14, KEY_UP },
+	{ 0x15, KEY_DOWN },
+	{ 0x16, KEY_LEFT },
+	{ 0x17, KEY_RIGHT },
+	{ 0x18, KEY_VIDEO },		/* Videos */
+	{ 0x19, KEY_AUDIO },		/* Music */
 	/* 0x1a: Pictures - presume this means
 	   "Multimedia Home Platform" -
 	   no "PICTURES" key in input.h
 	 */
-	[0x1a] = KEY_MHP,
-
-	[0x1b] = KEY_EPG,		/* Guide */
-	[0x1c] = KEY_TV,
-	[0x1e] = KEY_NEXTSONG,		/* skip >| */
-	[0x1f] = KEY_EXIT,		/* back/exit */
-	[0x20] = KEY_CHANNELUP,		/* channel / program + */
-	[0x21] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x22] = KEY_CHANNEL,		/* source (old black remote) */
-	[0x24] = KEY_PREVIOUSSONG,	/* replay |< */
-	[0x25] = KEY_ENTER,		/* OK */
-	[0x26] = KEY_SLEEP,		/* minimize (old black remote) */
-	[0x29] = KEY_BLUE,		/* blue key */
-	[0x2e] = KEY_GREEN,		/* green button */
-	[0x30] = KEY_PAUSE,		/* pause */
-	[0x32] = KEY_REWIND,		/* backward << */
-	[0x34] = KEY_FASTFORWARD,	/* forward >> */
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,		/* recording */
-	[0x38] = KEY_YELLOW,		/* yellow key */
-	[0x3b] = KEY_SELECT,		/* top right button */
-	[0x3c] = KEY_ZOOM,		/* full */
-	[0x3d] = KEY_POWER,		/* system power (green button) */
-};
-EXPORT_SYMBOL_GPL(ir_codes_hauppauge_new);
-
-IR_KEYTAB_TYPE ir_codes_npgtech[IR_KEYTAB_SIZE] = {
-	[0x1d] = KEY_SWITCHVIDEOMODE,	/* switch inputs */
-	[0x2a] = KEY_FRONT,
-
-	[0x3e] = KEY_1,
-	[0x02] = KEY_2,
-	[0x06] = KEY_3,
-	[0x0a] = KEY_4,
-	[0x0e] = KEY_5,
-	[0x12] = KEY_6,
-	[0x16] = KEY_7,
-	[0x1a] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x3a] = KEY_0,
-	[0x22] = KEY_NUMLOCK,		/* -/-- */
-	[0x20] = KEY_REFRESH,
-
-	[0x03] = KEY_BRIGHTNESSDOWN,
-	[0x28] = KEY_AUDIO,
-	[0x3c] = KEY_CHANNELUP,
-	[0x3f] = KEY_VOLUMEDOWN,
-	[0x2e] = KEY_MUTE,
-	[0x3b] = KEY_VOLUMEUP,
-	[0x00] = KEY_CHANNELDOWN,
-	[0x07] = KEY_BRIGHTNESSUP,
-	[0x2c] = KEY_TEXT,
-
-	[0x37] = KEY_RECORD,
-	[0x17] = KEY_PLAY,
-	[0x13] = KEY_PAUSE,
-	[0x26] = KEY_STOP,
-	[0x18] = KEY_FASTFORWARD,
-	[0x14] = KEY_REWIND,
-	[0x33] = KEY_ZOOM,
-	[0x32] = KEY_KEYBOARD,
-	[0x30] = KEY_GOTO,		/* Pointing arrow */
-	[0x36] = KEY_MACRO,		/* Maximize/Minimize (yellow) */
-	[0x0b] = KEY_RADIO,
-	[0x10] = KEY_POWER,
-
-};
-EXPORT_SYMBOL_GPL(ir_codes_npgtech);
+	{ 0x1a, KEY_MHP },
+
+	{ 0x1b, KEY_EPG },		/* Guide */
+	{ 0x1c, KEY_TV },
+	{ 0x1e, KEY_NEXTSONG },		/* skip >| */
+	{ 0x1f, KEY_EXIT },		/* back/exit */
+	{ 0x20, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x22, KEY_CHANNEL },		/* source (old black remote) */
+	{ 0x24, KEY_PREVIOUSSONG },	/* replay |< */
+	{ 0x25, KEY_ENTER },		/* OK */
+	{ 0x26, KEY_SLEEP },		/* minimize (old black remote) */
+	{ 0x29, KEY_BLUE },		/* blue key */
+	{ 0x2e, KEY_GREEN },		/* green button */
+	{ 0x30, KEY_PAUSE },		/* pause */
+	{ 0x32, KEY_REWIND },		/* backward << */
+	{ 0x34, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },		/* recording */
+	{ 0x38, KEY_YELLOW },		/* yellow key */
+	{ 0x3b, KEY_SELECT },		/* top right button */
+	{ 0x3c, KEY_ZOOM },		/* full */
+	{ 0x3d, KEY_POWER },		/* system power (green button) */
+};
+
+struct ir_scancode_table ir_codes_hauppauge_new_table = {
+	.scan = ir_codes_hauppauge_new,
+	.size = ARRAY_SIZE(ir_codes_hauppauge_new),
+};
+EXPORT_SYMBOL_GPL(ir_codes_hauppauge_new_table);
+
+static struct ir_scancode ir_codes_npgtech[] = {
+	{ 0x1d, KEY_SWITCHVIDEOMODE },	/* switch inputs */
+	{ 0x2a, KEY_FRONT },
+
+	{ 0x3e, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x06, KEY_3 },
+	{ 0x0a, KEY_4 },
+	{ 0x0e, KEY_5 },
+	{ 0x12, KEY_6 },
+	{ 0x16, KEY_7 },
+	{ 0x1a, KEY_8 },
+	{ 0x1e, KEY_9 },
+	{ 0x3a, KEY_0 },
+	{ 0x22, KEY_NUMLOCK },		/* -/-- */
+	{ 0x20, KEY_REFRESH },
+
+	{ 0x03, KEY_BRIGHTNESSDOWN },
+	{ 0x28, KEY_AUDIO },
+	{ 0x3c, KEY_CHANNELUP },
+	{ 0x3f, KEY_VOLUMEDOWN },
+	{ 0x2e, KEY_MUTE },
+	{ 0x3b, KEY_VOLUMEUP },
+	{ 0x00, KEY_CHANNELDOWN },
+	{ 0x07, KEY_BRIGHTNESSUP },
+	{ 0x2c, KEY_TEXT },
+
+	{ 0x37, KEY_RECORD },
+	{ 0x17, KEY_PLAY },
+	{ 0x13, KEY_PAUSE },
+	{ 0x26, KEY_STOP },
+	{ 0x18, KEY_FASTFORWARD },
+	{ 0x14, KEY_REWIND },
+	{ 0x33, KEY_ZOOM },
+	{ 0x32, KEY_KEYBOARD },
+	{ 0x30, KEY_GOTO },		/* Pointing arrow */
+	{ 0x36, KEY_MACRO },		/* Maximize/Minimize (yellow) */
+	{ 0x0b, KEY_RADIO },
+	{ 0x10, KEY_POWER },
+
+};
+
+struct ir_scancode_table ir_codes_npgtech_table = {
+	.scan = ir_codes_npgtech,
+	.size = ARRAY_SIZE(ir_codes_npgtech),
+};
+EXPORT_SYMBOL_GPL(ir_codes_npgtech_table);
 
 /* Norwood Micro (non-Pro) TV Tuner
    By Peter Naulls <peter@chocky.org>
    Key comments are the functions given in the manual */
-IR_KEYTAB_TYPE ir_codes_norwood[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_norwood[] = {
 	/* Keys 0 to 9 */
-	[0x20] = KEY_0,
-	[0x21] = KEY_1,
-	[0x22] = KEY_2,
-	[0x23] = KEY_3,
-	[0x24] = KEY_4,
-	[0x25] = KEY_5,
-	[0x26] = KEY_6,
-	[0x27] = KEY_7,
-	[0x28] = KEY_8,
-	[0x29] = KEY_9,
-
-	[0x78] = KEY_TUNER,		/* Video Source        */
-	[0x2c] = KEY_EXIT,		/* Open/Close software */
-	[0x2a] = KEY_SELECT,		/* 2 Digit Select      */
-	[0x69] = KEY_AGAIN,		/* Recall              */
-
-	[0x32] = KEY_BRIGHTNESSUP,	/* Brightness increase */
-	[0x33] = KEY_BRIGHTNESSDOWN,	/* Brightness decrease */
-	[0x6b] = KEY_KPPLUS,	/* (not named >>>>>)   */
-	[0x6c] = KEY_KPMINUS,	/* (not named <<<<<)   */
-
-	[0x2d] = KEY_MUTE,	/* Mute                */
-	[0x30] = KEY_VOLUMEUP,	/* Volume up           */
-	[0x31] = KEY_VOLUMEDOWN,/* Volume down         */
-	[0x60] = KEY_CHANNELUP,	/* Channel up          */
-	[0x61] = KEY_CHANNELDOWN,/* Channel down        */
-
-	[0x3f] = KEY_RECORD,	/* Record              */
-	[0x37] = KEY_PLAY,	/* Play                */
-	[0x36] = KEY_PAUSE,	/* Pause               */
-	[0x2b] = KEY_STOP,	/* Stop                */
-	[0x67] = KEY_FASTFORWARD,/* Foward              */
-	[0x66] = KEY_REWIND,	/* Rewind              */
-	[0x3e] = KEY_SEARCH,	/* Auto Scan           */
-	[0x2e] = KEY_CAMERA,	/* Capture Video       */
-	[0x6d] = KEY_MENU,	/* Show/Hide Control   */
-	[0x2f] = KEY_ZOOM,	/* Full Screen         */
-	[0x34] = KEY_RADIO,	/* FM                  */
-	[0x65] = KEY_POWER,	/* Computer power      */
-};
-EXPORT_SYMBOL_GPL(ir_codes_norwood);
+	{ 0x20, KEY_0 },
+	{ 0x21, KEY_1 },
+	{ 0x22, KEY_2 },
+	{ 0x23, KEY_3 },
+	{ 0x24, KEY_4 },
+	{ 0x25, KEY_5 },
+	{ 0x26, KEY_6 },
+	{ 0x27, KEY_7 },
+	{ 0x28, KEY_8 },
+	{ 0x29, KEY_9 },
+
+	{ 0x78, KEY_TUNER },		/* Video Source        */
+	{ 0x2c, KEY_EXIT },		/* Open/Close software */
+	{ 0x2a, KEY_SELECT },		/* 2 Digit Select      */
+	{ 0x69, KEY_AGAIN },		/* Recall              */
+
+	{ 0x32, KEY_BRIGHTNESSUP },	/* Brightness increase */
+	{ 0x33, KEY_BRIGHTNESSDOWN },	/* Brightness decrease */
+	{ 0x6b, KEY_KPPLUS },		/* (not named >>>>>)   */
+	{ 0x6c, KEY_KPMINUS },		/* (not named <<<<<)   */
+
+	{ 0x2d, KEY_MUTE },		/* Mute                */
+	{ 0x30, KEY_VOLUMEUP },		/* Volume up           */
+	{ 0x31, KEY_VOLUMEDOWN },	/* Volume down         */
+	{ 0x60, KEY_CHANNELUP },	/* Channel up          */
+	{ 0x61, KEY_CHANNELDOWN },	/* Channel down        */
+
+	{ 0x3f, KEY_RECORD },		/* Record              */
+	{ 0x37, KEY_PLAY },		/* Play                */
+	{ 0x36, KEY_PAUSE },		/* Pause               */
+	{ 0x2b, KEY_STOP },		/* Stop                */
+	{ 0x67, KEY_FASTFORWARD },	/* Foward              */
+	{ 0x66, KEY_REWIND },		/* Rewind              */
+	{ 0x3e, KEY_SEARCH },		/* Auto Scan           */
+	{ 0x2e, KEY_CAMERA },		/* Capture Video       */
+	{ 0x6d, KEY_MENU },		/* Show/Hide Control   */
+	{ 0x2f, KEY_ZOOM },		/* Full Screen         */
+	{ 0x34, KEY_RADIO },		/* FM                  */
+	{ 0x65, KEY_POWER },		/* Computer power      */
+};
+
+struct ir_scancode_table ir_codes_norwood_table = {
+	.scan = ir_codes_norwood,
+	.size = ARRAY_SIZE(ir_codes_norwood),
+};
+EXPORT_SYMBOL_GPL(ir_codes_norwood_table);
 
 /* From reading the following remotes:
  * Zenith Universal 7 / TV Mode 807 / VCR Mode 837
  * Hauppauge (from NOVA-CI-s box product)
  * This is a "middle of the road" approach, differences are noted
  */
-IR_KEYTAB_TYPE ir_codes_budget_ci_old[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_ENTER,
-	[0x0b] = KEY_RED,
-	[0x0c] = KEY_POWER,		/* RADIO on Hauppauge */
-	[0x0d] = KEY_MUTE,
-	[0x0f] = KEY_A,			/* TV on Hauppauge */
-	[0x10] = KEY_VOLUMEUP,
-	[0x11] = KEY_VOLUMEDOWN,
-	[0x14] = KEY_B,
-	[0x1c] = KEY_UP,
-	[0x1d] = KEY_DOWN,
-	[0x1e] = KEY_OPTION,		/* RESERVED on Hauppauge */
-	[0x1f] = KEY_BREAK,
-	[0x20] = KEY_CHANNELUP,
-	[0x21] = KEY_CHANNELDOWN,
-	[0x22] = KEY_PREVIOUS,		/* Prev Ch on Zenith, SOURCE on Hauppauge */
-	[0x24] = KEY_RESTART,
-	[0x25] = KEY_OK,
-	[0x26] = KEY_CYCLEWINDOWS,	/* MINIMIZE on Hauppauge */
-	[0x28] = KEY_ENTER,		/* VCR mode on Zenith */
-	[0x29] = KEY_PAUSE,
-	[0x2b] = KEY_RIGHT,
-	[0x2c] = KEY_LEFT,
-	[0x2e] = KEY_MENU,		/* FULL SCREEN on Hauppauge */
-	[0x30] = KEY_SLOW,
-	[0x31] = KEY_PREVIOUS,		/* VCR mode on Zenith */
-	[0x32] = KEY_REWIND,
-	[0x34] = KEY_FASTFORWARD,
-	[0x35] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x37] = KEY_RECORD,
-	[0x38] = KEY_TUNER,		/* TV/VCR on Zenith */
-	[0x3a] = KEY_C,
-	[0x3c] = KEY_EXIT,
-	[0x3d] = KEY_POWER2,
-	[0x3e] = KEY_TUNER,
-};
-EXPORT_SYMBOL_GPL(ir_codes_budget_ci_old);
+static struct ir_scancode ir_codes_budget_ci_old[] = {
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_ENTER },
+	{ 0x0b, KEY_RED },
+	{ 0x0c, KEY_POWER },		/* RADIO on Hauppauge */
+	{ 0x0d, KEY_MUTE },
+	{ 0x0f, KEY_A },		/* TV on Hauppauge */
+	{ 0x10, KEY_VOLUMEUP },
+	{ 0x11, KEY_VOLUMEDOWN },
+	{ 0x14, KEY_B },
+	{ 0x1c, KEY_UP },
+	{ 0x1d, KEY_DOWN },
+	{ 0x1e, KEY_OPTION },		/* RESERVED on Hauppauge */
+	{ 0x1f, KEY_BREAK },
+	{ 0x20, KEY_CHANNELUP },
+	{ 0x21, KEY_CHANNELDOWN },
+	{ 0x22, KEY_PREVIOUS },		/* Prev Ch on Zenith, SOURCE on Hauppauge */
+	{ 0x24, KEY_RESTART },
+	{ 0x25, KEY_OK },
+	{ 0x26, KEY_CYCLEWINDOWS },	/* MINIMIZE on Hauppauge */
+	{ 0x28, KEY_ENTER },		/* VCR mode on Zenith */
+	{ 0x29, KEY_PAUSE },
+	{ 0x2b, KEY_RIGHT },
+	{ 0x2c, KEY_LEFT },
+	{ 0x2e, KEY_MENU },		/* FULL SCREEN on Hauppauge */
+	{ 0x30, KEY_SLOW },
+	{ 0x31, KEY_PREVIOUS },		/* VCR mode on Zenith */
+	{ 0x32, KEY_REWIND },
+	{ 0x34, KEY_FASTFORWARD },
+	{ 0x35, KEY_PLAY },
+	{ 0x36, KEY_STOP },
+	{ 0x37, KEY_RECORD },
+	{ 0x38, KEY_TUNER },		/* TV/VCR on Zenith */
+	{ 0x3a, KEY_C },
+	{ 0x3c, KEY_EXIT },
+	{ 0x3d, KEY_POWER2 },
+	{ 0x3e, KEY_TUNER },
+};
+
+struct ir_scancode_table ir_codes_budget_ci_old_table = {
+	.scan = ir_codes_budget_ci_old,
+	.size = ARRAY_SIZE(ir_codes_budget_ci_old),
+};
+EXPORT_SYMBOL_GPL(ir_codes_budget_ci_old_table);
 
 /*
  * Marc Fargas <telenieko@telenieko.com>
  * this is the remote control that comes with the asus p7131
  * which has a label saying is "Model PC-39"
  */
-IR_KEYTAB_TYPE ir_codes_asus_pc39[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_asus_pc39[] = {
 	/* Keys 0 to 9 */
-	[0x15] = KEY_0,
-	[0x29] = KEY_1,
-	[0x2d] = KEY_2,
-	[0x2b] = KEY_3,
-	[0x09] = KEY_4,
-	[0x0d] = KEY_5,
-	[0x0b] = KEY_6,
-	[0x31] = KEY_7,
-	[0x35] = KEY_8,
-	[0x33] = KEY_9,
-
-	[0x3e] = KEY_RADIO,		/* radio */
-	[0x03] = KEY_MENU,		/* dvd/menu */
-	[0x2a] = KEY_VOLUMEUP,
-	[0x19] = KEY_VOLUMEDOWN,
-	[0x37] = KEY_UP,
-	[0x3b] = KEY_DOWN,
-	[0x27] = KEY_LEFT,
-	[0x2f] = KEY_RIGHT,
-	[0x25] = KEY_VIDEO,		/* video */
-	[0x39] = KEY_AUDIO,		/* music */
-
-	[0x21] = KEY_TV,		/* tv */
-	[0x1d] = KEY_EXIT,		/* back */
-	[0x0a] = KEY_CHANNELUP,		/* channel / program + */
-	[0x1b] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x1a] = KEY_ENTER,		/* enter */
-
-	[0x06] = KEY_PAUSE,		/* play/pause */
-	[0x1e] = KEY_PREVIOUS,		/* rew */
-	[0x26] = KEY_NEXT,		/* forward */
-	[0x0e] = KEY_REWIND,		/* backward << */
-	[0x3a] = KEY_FASTFORWARD,	/* forward >> */
-	[0x36] = KEY_STOP,
-	[0x2e] = KEY_RECORD,		/* recording */
-	[0x16] = KEY_POWER,		/* the button that reads "close" */
-
-	[0x11] = KEY_ZOOM,		/* full screen */
-	[0x13] = KEY_MACRO,		/* recall */
-	[0x23] = KEY_HOME,		/* home */
-	[0x05] = KEY_PVR,		/* picture */
-	[0x3d] = KEY_MUTE,		/* mute */
-	[0x01] = KEY_DVD,		/* dvd */
-};
-EXPORT_SYMBOL_GPL(ir_codes_asus_pc39);
+	{ 0x15, KEY_0 },
+	{ 0x29, KEY_1 },
+	{ 0x2d, KEY_2 },
+	{ 0x2b, KEY_3 },
+	{ 0x09, KEY_4 },
+	{ 0x0d, KEY_5 },
+	{ 0x0b, KEY_6 },
+	{ 0x31, KEY_7 },
+	{ 0x35, KEY_8 },
+	{ 0x33, KEY_9 },
+
+	{ 0x3e, KEY_RADIO },		/* radio */
+	{ 0x03, KEY_MENU },		/* dvd/menu */
+	{ 0x2a, KEY_VOLUMEUP },
+	{ 0x19, KEY_VOLUMEDOWN },
+	{ 0x37, KEY_UP },
+	{ 0x3b, KEY_DOWN },
+	{ 0x27, KEY_LEFT },
+	{ 0x2f, KEY_RIGHT },
+	{ 0x25, KEY_VIDEO },		/* video */
+	{ 0x39, KEY_AUDIO },		/* music */
+
+	{ 0x21, KEY_TV },		/* tv */
+	{ 0x1d, KEY_EXIT },		/* back */
+	{ 0x0a, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x1b, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x1a, KEY_ENTER },		/* enter */
+
+	{ 0x06, KEY_PAUSE },		/* play/pause */
+	{ 0x1e, KEY_PREVIOUS },		/* rew */
+	{ 0x26, KEY_NEXT },		/* forward */
+	{ 0x0e, KEY_REWIND },		/* backward << */
+	{ 0x3a, KEY_FASTFORWARD },	/* forward >> */
+	{ 0x36, KEY_STOP },
+	{ 0x2e, KEY_RECORD },		/* recording */
+	{ 0x16, KEY_POWER },		/* the button that reads "close" */
+
+	{ 0x11, KEY_ZOOM },		/* full screen */
+	{ 0x13, KEY_MACRO },		/* recall */
+	{ 0x23, KEY_HOME },		/* home */
+	{ 0x05, KEY_PVR },		/* picture */
+	{ 0x3d, KEY_MUTE },		/* mute */
+	{ 0x01, KEY_DVD },		/* dvd */
+};
+
+struct ir_scancode_table ir_codes_asus_pc39_table = {
+	.scan = ir_codes_asus_pc39,
+	.size = ARRAY_SIZE(ir_codes_asus_pc39),
+};
+EXPORT_SYMBOL_GPL(ir_codes_asus_pc39_table);
 
 
 /* Encore ENLTV-FM  - black plastic, white front cover with white glowing buttons
     Juan Pablo Sormani <sorman@gmail.com> */
-IR_KEYTAB_TYPE ir_codes_encore_enltv[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_encore_enltv[] = {
 
 	/* Power button does nothing, neither in Windows app,
 	 although it sends data (used for BIOS wakeup?) */
-	[0x0d] = KEY_MUTE,
-
-	[0x1e] = KEY_TV,
-	[0x00] = KEY_VIDEO,
-	[0x01] = KEY_AUDIO,		/* music */
-	[0x02] = KEY_MHP,		/* picture */
-
-	[0x1f] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x05] = KEY_4,
-	[0x1c] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x1d] = KEY_9,
-	[0x0a] = KEY_0,
-
-	[0x09] = KEY_LIST,		/* -/-- */
-	[0x0b] = KEY_LAST,		/* recall */
-
-	[0x14] = KEY_HOME,		/* win start menu */
-	[0x15] = KEY_EXIT,		/* exit */
-	[0x16] = KEY_CHANNELUP,		/* UP */
-	[0x12] = KEY_CHANNELDOWN,	/* DOWN */
-	[0x0c] = KEY_VOLUMEUP,		/* RIGHT */
-	[0x17] = KEY_VOLUMEDOWN,	/* LEFT */
-
-	[0x18] = KEY_ENTER,		/* OK */
-
-	[0x0e] = KEY_ESC,
-	[0x13] = KEY_CYCLEWINDOWS,	/* desktop */
-	[0x11] = KEY_TAB,
-	[0x19] = KEY_SWITCHVIDEOMODE,	/* switch */
-
-	[0x1a] = KEY_MENU,
-	[0x1b] = KEY_ZOOM,		/* fullscreen */
-	[0x44] = KEY_TIME,		/* time shift */
-	[0x40] = KEY_MODE,		/* source */
-
-	[0x5a] = KEY_RECORD,
-	[0x42] = KEY_PLAY,		/* play/pause */
-	[0x45] = KEY_STOP,
-	[0x43] = KEY_CAMERA,		/* camera icon */
-
-	[0x48] = KEY_REWIND,
-	[0x4a] = KEY_FASTFORWARD,
-	[0x49] = KEY_PREVIOUS,
-	[0x4b] = KEY_NEXT,
-
-	[0x4c] = KEY_FAVORITES,		/* tv wall */
-	[0x4d] = KEY_SOUND,		/* DVD sound */
-	[0x4e] = KEY_LANGUAGE,		/* DVD lang */
-	[0x4f] = KEY_TEXT,		/* DVD text */
-
-	[0x50] = KEY_SLEEP,		/* shutdown */
-	[0x51] = KEY_MODE,		/* stereo > main */
-	[0x52] = KEY_SELECT,		/* stereo > sap */
-	[0x53] = KEY_PROG1,		/* teletext */
-
-
-	[0x59] = KEY_RED,		/* AP1 */
-	[0x41] = KEY_GREEN,		/* AP2 */
-	[0x47] = KEY_YELLOW,		/* AP3 */
-	[0x57] = KEY_BLUE,		/* AP4 */
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv);
+	{ 0x0d, KEY_MUTE },
+
+	{ 0x1e, KEY_TV },
+	{ 0x00, KEY_VIDEO },
+	{ 0x01, KEY_AUDIO },		/* music */
+	{ 0x02, KEY_MHP },		/* picture */
+
+	{ 0x1f, KEY_1 },
+	{ 0x03, KEY_2 },
+	{ 0x04, KEY_3 },
+	{ 0x05, KEY_4 },
+	{ 0x1c, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x1d, KEY_9 },
+	{ 0x0a, KEY_0 },
+
+	{ 0x09, KEY_LIST },		/* -/-- */
+	{ 0x0b, KEY_LAST },		/* recall */
+
+	{ 0x14, KEY_HOME },		/* win start menu */
+	{ 0x15, KEY_EXIT },		/* exit */
+	{ 0x16, KEY_CHANNELUP },	/* UP */
+	{ 0x12, KEY_CHANNELDOWN },	/* DOWN */
+	{ 0x0c, KEY_VOLUMEUP },		/* RIGHT */
+	{ 0x17, KEY_VOLUMEDOWN },	/* LEFT */
+
+	{ 0x18, KEY_ENTER },		/* OK */
+
+	{ 0x0e, KEY_ESC },
+	{ 0x13, KEY_CYCLEWINDOWS },	/* desktop */
+	{ 0x11, KEY_TAB },
+	{ 0x19, KEY_SWITCHVIDEOMODE },	/* switch */
+
+	{ 0x1a, KEY_MENU },
+	{ 0x1b, KEY_ZOOM },		/* fullscreen */
+	{ 0x44, KEY_TIME },		/* time shift */
+	{ 0x40, KEY_MODE },		/* source */
+
+	{ 0x5a, KEY_RECORD },
+	{ 0x42, KEY_PLAY },		/* play/pause */
+	{ 0x45, KEY_STOP },
+	{ 0x43, KEY_CAMERA },		/* camera icon */
+
+	{ 0x48, KEY_REWIND },
+	{ 0x4a, KEY_FASTFORWARD },
+	{ 0x49, KEY_PREVIOUS },
+	{ 0x4b, KEY_NEXT },
+
+	{ 0x4c, KEY_FAVORITES },	/* tv wall */
+	{ 0x4d, KEY_SOUND },		/* DVD sound */
+	{ 0x4e, KEY_LANGUAGE },		/* DVD lang */
+	{ 0x4f, KEY_TEXT },		/* DVD text */
+
+	{ 0x50, KEY_SLEEP },		/* shutdown */
+	{ 0x51, KEY_MODE },		/* stereo > main */
+	{ 0x52, KEY_SELECT },		/* stereo > sap */
+	{ 0x53, KEY_PROG1 },		/* teletext */
+
+
+	{ 0x59, KEY_RED },		/* AP1 */
+	{ 0x41, KEY_GREEN },		/* AP2 */
+	{ 0x47, KEY_YELLOW },		/* AP3 */
+	{ 0x57, KEY_BLUE },		/* AP4 */
+};
+
+struct ir_scancode_table ir_codes_encore_enltv_table = {
+	.scan = ir_codes_encore_enltv,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_table);
 
 /* Encore ENLTV2-FM  - silver plastic - "Wand Media" written at the botton
     Mauro Carvalho Chehab <mchehab@infradead.org> */
-IR_KEYTAB_TYPE ir_codes_encore_enltv2[IR_KEYTAB_SIZE] = {
-	[0x4c] = KEY_POWER2,
-	[0x4a] = KEY_TUNER,
-	[0x40] = KEY_1,
-	[0x60] = KEY_2,
-	[0x50] = KEY_3,
-	[0x70] = KEY_4,
-	[0x48] = KEY_5,
-	[0x68] = KEY_6,
-	[0x58] = KEY_7,
-	[0x78] = KEY_8,
-	[0x44] = KEY_9,
-	[0x54] = KEY_0,
-
-	[0x64] = KEY_LAST,		/* +100 */
-	[0x4e] = KEY_AGAIN,		/* Recall */
-
-	[0x6c] = KEY_SWITCHVIDEOMODE,	/* Video Source */
-	[0x5e] = KEY_MENU,
-	[0x56] = KEY_SCREEN,
-	[0x7a] = KEY_SETUP,
-
-	[0x46] = KEY_MUTE,
-	[0x5c] = KEY_MODE,		/* Stereo */
-	[0x74] = KEY_INFO,
-	[0x7c] = KEY_CLEAR,
-
-	[0x55] = KEY_UP,
-	[0x49] = KEY_DOWN,
-	[0x7e] = KEY_LEFT,
-	[0x59] = KEY_RIGHT,
-	[0x6a] = KEY_ENTER,
-
-	[0x42] = KEY_VOLUMEUP,
-	[0x62] = KEY_VOLUMEDOWN,
-	[0x52] = KEY_CHANNELUP,
-	[0x72] = KEY_CHANNELDOWN,
-
-	[0x41] = KEY_RECORD,
-	[0x51] = KEY_CAMERA,		/* Snapshot */
-	[0x75] = KEY_TIME,		/* Timeshift */
-	[0x71] = KEY_TV2,		/* PIP */
-
-	[0x45] = KEY_REWIND,
-	[0x6f] = KEY_PAUSE,
-	[0x7d] = KEY_FORWARD,
-	[0x79] = KEY_STOP,
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv2);
+static struct ir_scancode ir_codes_encore_enltv2[] = {
+	{ 0x4c, KEY_POWER2 },
+	{ 0x4a, KEY_TUNER },
+	{ 0x40, KEY_1 },
+	{ 0x60, KEY_2 },
+	{ 0x50, KEY_3 },
+	{ 0x70, KEY_4 },
+	{ 0x48, KEY_5 },
+	{ 0x68, KEY_6 },
+	{ 0x58, KEY_7 },
+	{ 0x78, KEY_8 },
+	{ 0x44, KEY_9 },
+	{ 0x54, KEY_0 },
+
+	{ 0x64, KEY_LAST },		/* +100 */
+	{ 0x4e, KEY_AGAIN },		/* Recall */
+
+	{ 0x6c, KEY_SWITCHVIDEOMODE },	/* Video Source */
+	{ 0x5e, KEY_MENU },
+	{ 0x56, KEY_SCREEN },
+	{ 0x7a, KEY_SETUP },
+
+	{ 0x46, KEY_MUTE },
+	{ 0x5c, KEY_MODE },		/* Stereo */
+	{ 0x74, KEY_INFO },
+	{ 0x7c, KEY_CLEAR },
+
+	{ 0x55, KEY_UP },
+	{ 0x49, KEY_DOWN },
+	{ 0x7e, KEY_LEFT },
+	{ 0x59, KEY_RIGHT },
+	{ 0x6a, KEY_ENTER },
+
+	{ 0x42, KEY_VOLUMEUP },
+	{ 0x62, KEY_VOLUMEDOWN },
+	{ 0x52, KEY_CHANNELUP },
+	{ 0x72, KEY_CHANNELDOWN },
+
+	{ 0x41, KEY_RECORD },
+	{ 0x51, KEY_CAMERA },		/* Snapshot */
+	{ 0x75, KEY_TIME },		/* Timeshift */
+	{ 0x71, KEY_TV2 },		/* PIP */
+
+	{ 0x45, KEY_REWIND },
+	{ 0x6f, KEY_PAUSE },
+	{ 0x7d, KEY_FORWARD },
+	{ 0x79, KEY_STOP },
+};
+
+struct ir_scancode_table ir_codes_encore_enltv2_table = {
+	.scan = ir_codes_encore_enltv2,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv2),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv2_table);
 
 /* for the Technotrend 1500 bundled remotes (grey and black): */
-IR_KEYTAB_TYPE ir_codes_tt_1500[IR_KEYTAB_SIZE] = {
-	[0x01] = KEY_POWER,
-	[0x02] = KEY_SHUFFLE,		/* ? double-arrow key */
-	[0x03] = KEY_1,
-	[0x04] = KEY_2,
-	[0x05] = KEY_3,
-	[0x06] = KEY_4,
-	[0x07] = KEY_5,
-	[0x08] = KEY_6,
-	[0x09] = KEY_7,
-	[0x0a] = KEY_8,
-	[0x0b] = KEY_9,
-	[0x0c] = KEY_0,
-	[0x0d] = KEY_UP,
-	[0x0e] = KEY_LEFT,
-	[0x0f] = KEY_OK,
-	[0x10] = KEY_RIGHT,
-	[0x11] = KEY_DOWN,
-	[0x12] = KEY_INFO,
-	[0x13] = KEY_EXIT,
-	[0x14] = KEY_RED,
-	[0x15] = KEY_GREEN,
-	[0x16] = KEY_YELLOW,
-	[0x17] = KEY_BLUE,
-	[0x18] = KEY_MUTE,
-	[0x19] = KEY_TEXT,
-	[0x1a] = KEY_MODE,		/* ? TV/Radio */
-	[0x21] = KEY_OPTION,
-	[0x22] = KEY_EPG,
-	[0x23] = KEY_CHANNELUP,
-	[0x24] = KEY_CHANNELDOWN,
-	[0x25] = KEY_VOLUMEUP,
-	[0x26] = KEY_VOLUMEDOWN,
-	[0x27] = KEY_SETUP,
-	[0x3a] = KEY_RECORD,		/* these keys are only in the black remote */
-	[0x3b] = KEY_PLAY,
-	[0x3c] = KEY_STOP,
-	[0x3d] = KEY_REWIND,
-	[0x3e] = KEY_PAUSE,
-	[0x3f] = KEY_FORWARD,
-};
-EXPORT_SYMBOL_GPL(ir_codes_tt_1500);
+static struct ir_scancode ir_codes_tt_1500[] = {
+	{ 0x01, KEY_POWER },
+	{ 0x02, KEY_SHUFFLE },		/* ? double-arrow key */
+	{ 0x03, KEY_1 },
+	{ 0x04, KEY_2 },
+	{ 0x05, KEY_3 },
+	{ 0x06, KEY_4 },
+	{ 0x07, KEY_5 },
+	{ 0x08, KEY_6 },
+	{ 0x09, KEY_7 },
+	{ 0x0a, KEY_8 },
+	{ 0x0b, KEY_9 },
+	{ 0x0c, KEY_0 },
+	{ 0x0d, KEY_UP },
+	{ 0x0e, KEY_LEFT },
+	{ 0x0f, KEY_OK },
+	{ 0x10, KEY_RIGHT },
+	{ 0x11, KEY_DOWN },
+	{ 0x12, KEY_INFO },
+	{ 0x13, KEY_EXIT },
+	{ 0x14, KEY_RED },
+	{ 0x15, KEY_GREEN },
+	{ 0x16, KEY_YELLOW },
+	{ 0x17, KEY_BLUE },
+	{ 0x18, KEY_MUTE },
+	{ 0x19, KEY_TEXT },
+	{ 0x1a, KEY_MODE },		/* ? TV/Radio */
+	{ 0x21, KEY_OPTION },
+	{ 0x22, KEY_EPG },
+	{ 0x23, KEY_CHANNELUP },
+	{ 0x24, KEY_CHANNELDOWN },
+	{ 0x25, KEY_VOLUMEUP },
+	{ 0x26, KEY_VOLUMEDOWN },
+	{ 0x27, KEY_SETUP },
+	{ 0x3a, KEY_RECORD },		/* these keys are only in the black remote */
+	{ 0x3b, KEY_PLAY },
+	{ 0x3c, KEY_STOP },
+	{ 0x3d, KEY_REWIND },
+	{ 0x3e, KEY_PAUSE },
+	{ 0x3f, KEY_FORWARD },
+};
+
+struct ir_scancode_table ir_codes_tt_1500_table = {
+	.scan = ir_codes_tt_1500,
+	.size = ARRAY_SIZE(ir_codes_tt_1500),
+};
+EXPORT_SYMBOL_GPL(ir_codes_tt_1500_table);
 
 /* DViCO FUSION HDTV MCE remote */
-IR_KEYTAB_TYPE ir_codes_fusionhdtv_mce[IR_KEYTAB_SIZE] = {
-
-	[0x0b] = KEY_1,
-	[0x17] = KEY_2,
-	[0x1b] = KEY_3,
-	[0x07] = KEY_4,
-	[0x50] = KEY_5,
-	[0x54] = KEY_6,
-	[0x48] = KEY_7,
-	[0x4c] = KEY_8,
-	[0x58] = KEY_9,
-	[0x03] = KEY_0,
-
-	[0x5e] = KEY_OK,
-	[0x51] = KEY_UP,
-	[0x53] = KEY_DOWN,
-	[0x5b] = KEY_LEFT,
-	[0x5f] = KEY_RIGHT,
-
-	[0x02] = KEY_TV,		/* Labeled DTV on remote */
-	[0x0e] = KEY_MP3,
-	[0x1a] = KEY_DVD,
-	[0x1e] = KEY_FAVORITES,		/* Labeled CPF on remote */
-	[0x16] = KEY_SETUP,
-	[0x46] = KEY_POWER2,		/* TV On/Off button on remote */
-	[0x0a] = KEY_EPG,		/* Labeled Guide on remote */
-
-	[0x49] = KEY_BACK,
-	[0x59] = KEY_INFO,		/* Labeled MORE on remote */
-	[0x4d] = KEY_MENU,		/* Labeled DVDMENU on remote */
-	[0x55] = KEY_CYCLEWINDOWS,	/* Labeled ALT-TAB on remote */
-
-	[0x0f] = KEY_PREVIOUSSONG,	/* Labeled |<< REPLAY on remote */
-	[0x12] = KEY_NEXTSONG,		/* Labeled >>| SKIP on remote */
-	[0x42] = KEY_ENTER,		/* Labeled START with a green
+static struct ir_scancode ir_codes_fusionhdtv_mce[] = {
+
+	{ 0x0b, KEY_1 },
+	{ 0x17, KEY_2 },
+	{ 0x1b, KEY_3 },
+	{ 0x07, KEY_4 },
+	{ 0x50, KEY_5 },
+	{ 0x54, KEY_6 },
+	{ 0x48, KEY_7 },
+	{ 0x4c, KEY_8 },
+	{ 0x58, KEY_9 },
+	{ 0x03, KEY_0 },
+
+	{ 0x5e, KEY_OK },
+	{ 0x51, KEY_UP },
+	{ 0x53, KEY_DOWN },
+	{ 0x5b, KEY_LEFT },
+	{ 0x5f, KEY_RIGHT },
+
+	{ 0x02, KEY_TV },		/* Labeled DTV on remote */
+	{ 0x0e, KEY_MP3 },
+	{ 0x1a, KEY_DVD },
+	{ 0x1e, KEY_FAVORITES },	/* Labeled CPF on remote */
+	{ 0x16, KEY_SETUP },
+	{ 0x46, KEY_POWER2 },		/* TV On/Off button on remote */
+	{ 0x0a, KEY_EPG },		/* Labeled Guide on remote */
+
+	{ 0x49, KEY_BACK },
+	{ 0x59, KEY_INFO },		/* Labeled MORE on remote */
+	{ 0x4d, KEY_MENU },		/* Labeled DVDMENU on remote */
+	{ 0x55, KEY_CYCLEWINDOWS },	/* Labeled ALT-TAB on remote */
+
+	{ 0x0f, KEY_PREVIOUSSONG },	/* Labeled |<< REPLAY on remote */
+	{ 0x12, KEY_NEXTSONG },		/* Labeled >>| SKIP on remote */
+	{ 0x42, KEY_ENTER },		/* Labeled START with a green
 					   MS windows logo on remote */
 
-	[0x15] = KEY_VOLUMEUP,
-	[0x05] = KEY_VOLUMEDOWN,
-	[0x11] = KEY_CHANNELUP,
-	[0x09] = KEY_CHANNELDOWN,
-
-	[0x52] = KEY_CAMERA,
-	[0x5a] = KEY_TUNER,
-	[0x19] = KEY_OPEN,
-
-	[0x13] = KEY_MODE,		/* 4:3 16:9 select */
-	[0x1f] = KEY_ZOOM,
+	{ 0x15, KEY_VOLUMEUP },
+	{ 0x05, KEY_VOLUMEDOWN },
+	{ 0x11, KEY_CHANNELUP },
+	{ 0x09, KEY_CHANNELDOWN },
+
+	{ 0x52, KEY_CAMERA },
+	{ 0x5a, KEY_TUNER },
+	{ 0x19, KEY_OPEN },
+
+	{ 0x13, KEY_MODE },		/* 4:3 16:9 select */
+	{ 0x1f, KEY_ZOOM },
+
+	{ 0x43, KEY_REWIND },
+	{ 0x47, KEY_PLAYPAUSE },
+	{ 0x4f, KEY_FASTFORWARD },
+	{ 0x57, KEY_MUTE },
+	{ 0x0d, KEY_STOP },
+	{ 0x01, KEY_RECORD },
+	{ 0x4e, KEY_POWER },
+};
 
-	[0x43] = KEY_REWIND,
-	[0x47] = KEY_PLAYPAUSE,
-	[0x4f] = KEY_FASTFORWARD,
-	[0x57] = KEY_MUTE,
-	[0x0d] = KEY_STOP,
-	[0x01] = KEY_RECORD,
-	[0x4e] = KEY_POWER,
+struct ir_scancode_table ir_codes_fusionhdtv_mce_table = {
+	.scan = ir_codes_fusionhdtv_mce,
+	.size = ARRAY_SIZE(ir_codes_fusionhdtv_mce),
 };
-EXPORT_SYMBOL_GPL(ir_codes_fusionhdtv_mce);
+EXPORT_SYMBOL_GPL(ir_codes_fusionhdtv_mce_table);
 
 /* Pinnacle PCTV HD 800i mini remote */
-IR_KEYTAB_TYPE ir_codes_pinnacle_pctv_hd[IR_KEYTAB_SIZE] = {
-
-	[0x0f] = KEY_1,
-	[0x15] = KEY_2,
-	[0x10] = KEY_3,
-	[0x18] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x1e] = KEY_6,
-	[0x11] = KEY_7,
-	[0x21] = KEY_8,
-	[0x12] = KEY_9,
-	[0x27] = KEY_0,
-
-	[0x24] = KEY_ZOOM,
-	[0x2a] = KEY_SUBTITLE,
-
-	[0x00] = KEY_MUTE,
-	[0x01] = KEY_ENTER,	/* Pinnacle Logo */
-	[0x39] = KEY_POWER,
-
-	[0x03] = KEY_VOLUMEUP,
-	[0x09] = KEY_VOLUMEDOWN,
-	[0x06] = KEY_CHANNELUP,
-	[0x0c] = KEY_CHANNELDOWN,
-
-	[0x2d] = KEY_REWIND,
-	[0x30] = KEY_PLAYPAUSE,
-	[0x33] = KEY_FASTFORWARD,
-	[0x3c] = KEY_STOP,
-	[0x36] = KEY_RECORD,
-	[0x3f] = KEY_EPG,	/* Labeled "?" */
-};
-EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd);
+static struct ir_scancode ir_codes_pinnacle_pctv_hd[] = {
+
+	{ 0x0f, KEY_1 },
+	{ 0x15, KEY_2 },
+	{ 0x10, KEY_3 },
+	{ 0x18, KEY_4 },
+	{ 0x1b, KEY_5 },
+	{ 0x1e, KEY_6 },
+	{ 0x11, KEY_7 },
+	{ 0x21, KEY_8 },
+	{ 0x12, KEY_9 },
+	{ 0x27, KEY_0 },
+
+	{ 0x24, KEY_ZOOM },
+	{ 0x2a, KEY_SUBTITLE },
+
+	{ 0x00, KEY_MUTE },
+	{ 0x01, KEY_ENTER },	/* Pinnacle Logo */
+	{ 0x39, KEY_POWER },
+
+	{ 0x03, KEY_VOLUMEUP },
+	{ 0x09, KEY_VOLUMEDOWN },
+	{ 0x06, KEY_CHANNELUP },
+	{ 0x0c, KEY_CHANNELDOWN },
+
+	{ 0x2d, KEY_REWIND },
+	{ 0x30, KEY_PLAYPAUSE },
+	{ 0x33, KEY_FASTFORWARD },
+	{ 0x3c, KEY_STOP },
+	{ 0x36, KEY_RECORD },
+	{ 0x3f, KEY_EPG },	/* Labeled "?" */
+};
+
+struct ir_scancode_table ir_codes_pinnacle_pctv_hd_table = {
+	.scan = ir_codes_pinnacle_pctv_hd,
+	.size = ARRAY_SIZE(ir_codes_pinnacle_pctv_hd),
+};
+EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd_table);
 
 /*
  * Igor Kuznetsov <igk72@ya.ru>
@@ -2150,13 +2365,13 @@ EXPORT_SYMBOL_GPL(ir_codes_pinnacle_pctv_hd);
  * the button labels (several variants when appropriate)
  * helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_behold[] = {
 
 	/*  0x1c            0x12  *
 	 *  TV/FM          POWER  *
 	 *                        */
-	[0x1c] = KEY_TUNER,	/* XXX KEY_TV / KEY_RADIO */
-	[0x12] = KEY_POWER,
+	{ 0x1c, KEY_TUNER },	/* XXX KEY_TV / KEY_RADIO */
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  *
 	 *   1       2       3    *
@@ -2167,28 +2382,28 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  *
 	 *   7       8       9    *
 	 *                        */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
 
 	/*  0x0a    0x00    0x17  *
 	 * RECALL    0      MODE  *
 	 *                        */
-	[0x0a] = KEY_AGAIN,
-	[0x00] = KEY_0,
-	[0x17] = KEY_MODE,
+	{ 0x0a, KEY_AGAIN },
+	{ 0x00, KEY_0 },
+	{ 0x17, KEY_MODE },
 
 	/*  0x14          0x10    *
 	 * ASPECT      FULLSCREEN *
 	 *                        */
-	[0x14] = KEY_SCREEN,
-	[0x10] = KEY_ZOOM,
+	{ 0x14, KEY_SCREEN },
+	{ 0x10, KEY_ZOOM },
 
 	/*          0x0b          *
 	 *           Up           *
@@ -2199,17 +2414,17 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *         0x015          *
 	 *         Down           *
 	 *                        */
-	[0x0b] = KEY_CHANNELUP,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_OK,		/* XXX KEY_ENTER */
-	[0x0c] = KEY_VOLUMEUP,
-	[0x15] = KEY_CHANNELDOWN,
+	{ 0x0b, KEY_CHANNELUP },
+	{ 0x18, KEY_VOLUMEDOWN },
+	{ 0x16, KEY_OK },		/* XXX KEY_ENTER */
+	{ 0x0c, KEY_VOLUMEUP },
+	{ 0x15, KEY_CHANNELDOWN },
 
 	/*  0x11            0x0d  *
 	 *  MUTE            INFO  *
 	 *                        */
-	[0x11] = KEY_MUTE,
-	[0x0d] = KEY_INFO,
+	{ 0x11, KEY_MUTE },
+	{ 0x0d, KEY_INFO },
 
 	/*  0x0f    0x1b    0x1a  *
 	 * RECORD PLAY/PAUSE STOP *
@@ -2218,29 +2433,34 @@ IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE] = {
 	 *TELETEXT  AUDIO  SOURCE *
 	 *           RED   YELLOW *
 	 *                        */
-	[0x0f] = KEY_RECORD,
-	[0x1b] = KEY_PLAYPAUSE,
-	[0x1a] = KEY_STOP,
-	[0x0e] = KEY_TEXT,
-	[0x1f] = KEY_RED,	/*XXX KEY_AUDIO	*/
-	[0x1e] = KEY_YELLOW,	/*XXX KEY_SOURCE	*/
+	{ 0x0f, KEY_RECORD },
+	{ 0x1b, KEY_PLAYPAUSE },
+	{ 0x1a, KEY_STOP },
+	{ 0x0e, KEY_TEXT },
+	{ 0x1f, KEY_RED },	/*XXX KEY_AUDIO	*/
+	{ 0x1e, KEY_YELLOW },	/*XXX KEY_SOURCE	*/
 
 	/*  0x1d   0x13     0x19  *
 	 * SLEEP  PREVIEW   DVB   *
 	 *         GREEN    BLUE  *
 	 *                        */
-	[0x1d] = KEY_SLEEP,
-	[0x13] = KEY_GREEN,
-	[0x19] = KEY_BLUE,	/* XXX KEY_SAT	*/
+	{ 0x1d, KEY_SLEEP },
+	{ 0x13, KEY_GREEN },
+	{ 0x19, KEY_BLUE },	/* XXX KEY_SAT	*/
 
 	/*  0x58           0x5c   *
 	 * FREEZE        SNAPSHOT *
 	 *                        */
-	[0x58] = KEY_SLOW,
-	[0x5c] = KEY_CAMERA,
+	{ 0x58, KEY_SLOW },
+	{ 0x5c, KEY_CAMERA },
 
 };
-EXPORT_SYMBOL_GPL(ir_codes_behold);
+
+struct ir_scancode_table ir_codes_behold_table = {
+	.scan = ir_codes_behold,
+	.size = ARRAY_SIZE(ir_codes_behold),
+};
+EXPORT_SYMBOL_GPL(ir_codes_behold_table);
 
 /* Beholder Intl. Ltd. 2008
  * Dmitry Belimov d.belimov@google.com
@@ -2250,16 +2470,16 @@ EXPORT_SYMBOL_GPL(ir_codes_behold);
  * the button labels (several variants when appropriate)
  * helps to descide which keycodes to assign to the buttons.
  */
-IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_behold_columbus[] = {
 
 	/*  0x13   0x11   0x1C   0x12  *
 	 *  Mute  Source  TV/FM  Power *
 	 *                             */
 
-	[0x13] = KEY_MUTE,
-	[0x11] = KEY_PROPS,
-	[0x1C] = KEY_TUNER,	/* KEY_TV/KEY_RADIO	*/
-	[0x12] = KEY_POWER,
+	{ 0x13, KEY_MUTE },
+	{ 0x11, KEY_PROPS },
+	{ 0x1C, KEY_TUNER },	/* KEY_TV/KEY_RADIO	*/
+	{ 0x12, KEY_POWER },
 
 	/*  0x01    0x02    0x03  0x0D    *
 	 *   1       2       3   Stereo   *
@@ -2270,173 +2490,188 @@ IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE] = {
 	 *  0x07    0x08    0x09  0x10    *
 	 *   7       8       9    Zoom 	  *
 	 *                                */
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x0D] = KEY_SETUP,	  /* Setup key */
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x19] = KEY_CAMERA,	/* Snapshot key */
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x10] = KEY_ZOOM,
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x0D, KEY_SETUP },	  /* Setup key */
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x19, KEY_CAMERA },	/* Snapshot key */
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x10, KEY_ZOOM },
 
 	/*  0x0A    0x00    0x0B       0x0C   *
 	 * RECALL    0    ChannelUp  VolumeUp *
 	 *                                    */
-	[0x0A] = KEY_AGAIN,
-	[0x00] = KEY_0,
-	[0x0B] = KEY_CHANNELUP,
-	[0x0C] = KEY_VOLUMEUP,
+	{ 0x0A, KEY_AGAIN },
+	{ 0x00, KEY_0 },
+	{ 0x0B, KEY_CHANNELUP },
+	{ 0x0C, KEY_VOLUMEUP },
 
 	/*   0x1B      0x1D      0x15        0x18     *
 	 * Timeshift  Record  ChannelDown  VolumeDown *
 	 *                                            */
 
-	[0x1B] = KEY_TIME,
-	[0x1D] = KEY_RECORD,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x18] = KEY_VOLUMEDOWN,
+	{ 0x1B, KEY_TIME },
+	{ 0x1D, KEY_RECORD },
+	{ 0x15, KEY_CHANNELDOWN },
+	{ 0x18, KEY_VOLUMEDOWN },
 
 	/*   0x0E   0x1E     0x0F     0x1A  *
 	 *   Stop   Pause  Previouse  Next  *
 	 *                                  */
 
-	[0x0E] = KEY_STOP,
-	[0x1E] = KEY_PAUSE,
-	[0x0F] = KEY_PREVIOUS,
-	[0x1A] = KEY_NEXT,
+	{ 0x0E, KEY_STOP },
+	{ 0x1E, KEY_PAUSE },
+	{ 0x0F, KEY_PREVIOUS },
+	{ 0x1A, KEY_NEXT },
+
+};
 
+struct ir_scancode_table ir_codes_behold_columbus_table = {
+	.scan = ir_codes_behold_columbus,
+	.size = ARRAY_SIZE(ir_codes_behold_columbus),
 };
-EXPORT_SYMBOL_GPL(ir_codes_behold_columbus);
+EXPORT_SYMBOL_GPL(ir_codes_behold_columbus_table);
 
 /*
  * Remote control for the Genius TVGO A11MCE
  * Adrian Pardini <pardo.bsso@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_genius_tvgo_a11mce[IR_KEYTAB_SIZE] = {
+static struct ir_scancode ir_codes_genius_tvgo_a11mce[] = {
 	/* Keys 0 to 9 */
-	[0x48] = KEY_0,
-	[0x09] = KEY_1,
-	[0x1d] = KEY_2,
-	[0x1f] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x11] = KEY_6,
-	[0x17] = KEY_7,
-	[0x12] = KEY_8,
-	[0x16] = KEY_9,
-
-	[0x54] = KEY_RECORD,		/* recording */
-	[0x06] = KEY_MUTE,		/* mute */
-	[0x10] = KEY_POWER,
-	[0x40] = KEY_LAST,		/* recall */
-	[0x4c] = KEY_CHANNELUP,		/* channel / program + */
-	[0x00] = KEY_CHANNELDOWN,	/* channel / program - */
-	[0x0d] = KEY_VOLUMEUP,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x4d] = KEY_OK,		/* also labeled as Pause */
-	[0x1c] = KEY_ZOOM,		/* full screen and Stop*/
-	[0x02] = KEY_MODE,		/* AV Source or Rewind*/
-	[0x04] = KEY_LIST,		/* -/-- */
+	{ 0x48, KEY_0 },
+	{ 0x09, KEY_1 },
+	{ 0x1d, KEY_2 },
+	{ 0x1f, KEY_3 },
+	{ 0x19, KEY_4 },
+	{ 0x1b, KEY_5 },
+	{ 0x11, KEY_6 },
+	{ 0x17, KEY_7 },
+	{ 0x12, KEY_8 },
+	{ 0x16, KEY_9 },
+
+	{ 0x54, KEY_RECORD },		/* recording */
+	{ 0x06, KEY_MUTE },		/* mute */
+	{ 0x10, KEY_POWER },
+	{ 0x40, KEY_LAST },		/* recall */
+	{ 0x4c, KEY_CHANNELUP },	/* channel / program + */
+	{ 0x00, KEY_CHANNELDOWN },	/* channel / program - */
+	{ 0x0d, KEY_VOLUMEUP },
+	{ 0x15, KEY_VOLUMEDOWN },
+	{ 0x4d, KEY_OK },		/* also labeled as Pause */
+	{ 0x1c, KEY_ZOOM },		/* full screen and Stop*/
+	{ 0x02, KEY_MODE },		/* AV Source or Rewind*/
+	{ 0x04, KEY_LIST },		/* -/-- */
 	/* small arrows above numbers */
-	[0x1a] = KEY_NEXT,		/* also Fast Forward */
-	[0x0e] = KEY_PREVIOUS,		/* also Rewind */
+	{ 0x1a, KEY_NEXT },		/* also Fast Forward */
+	{ 0x0e, KEY_PREVIOUS },		/* also Rewind */
 	/* these are in a rather non standard layout and have
 	an alternate name written */
-	[0x1e] = KEY_UP,		/* Video Setting */
-	[0x0a] = KEY_DOWN,		/* Video Default */
-	[0x05] = KEY_CAMERA,		/* Snapshot */
-	[0x0c] = KEY_RIGHT,		/* Hide Panel */
+	{ 0x1e, KEY_UP },		/* Video Setting */
+	{ 0x0a, KEY_DOWN },		/* Video Default */
+	{ 0x05, KEY_CAMERA },		/* Snapshot */
+	{ 0x0c, KEY_RIGHT },		/* Hide Panel */
 	/* Four buttons without label */
-	[0x49] = KEY_RED,
-	[0x0b] = KEY_GREEN,
-	[0x13] = KEY_YELLOW,
-	[0x50] = KEY_BLUE,
+	{ 0x49, KEY_RED },
+	{ 0x0b, KEY_GREEN },
+	{ 0x13, KEY_YELLOW },
+	{ 0x50, KEY_BLUE },
 };
-EXPORT_SYMBOL_GPL(ir_codes_genius_tvgo_a11mce);
+
+struct ir_scancode_table ir_codes_genius_tvgo_a11mce_table = {
+	.scan = ir_codes_genius_tvgo_a11mce,
+	.size = ARRAY_SIZE(ir_codes_genius_tvgo_a11mce),
+};
+EXPORT_SYMBOL_GPL(ir_codes_genius_tvgo_a11mce_table);
 
 /*
  * Remote control for Powercolor Real Angel 330
  * Daniel Fraga <fragabr@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE] = {
-	[0x38] = KEY_SWITCHVIDEOMODE,	/* switch inputs */
-	[0x0c] = KEY_MEDIA,		/* Turn ON/OFF App */
-	[0x00] = KEY_0,
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x0a] = KEY_DIGITS,		/* single, double, tripple digit */
-	[0x29] = KEY_PREVIOUS,		/* previous channel */
-	[0x12] = KEY_BRIGHTNESSUP,
-	[0x13] = KEY_BRIGHTNESSDOWN,
-	[0x2b] = KEY_MODE,		/* stereo/mono */
-	[0x2c] = KEY_TEXT,		/* teletext */
-	[0x20] = KEY_CHANNELUP,		/* channel up */
-	[0x21] = KEY_CHANNELDOWN,	/* channel down */
-	[0x10] = KEY_VOLUMEUP,		/* volume up */
-	[0x11] = KEY_VOLUMEDOWN,	/* volume down */
-	[0x0d] = KEY_MUTE,
-	[0x1f] = KEY_RECORD,
-	[0x17] = KEY_PLAY,
-	[0x16] = KEY_PAUSE,
-	[0x0b] = KEY_STOP,
-	[0x27] = KEY_FASTFORWARD,
-	[0x26] = KEY_REWIND,
-	[0x1e] = KEY_SEARCH,		/* autoscan */
-	[0x0e] = KEY_CAMERA,		/* snapshot */
-	[0x2d] = KEY_SETUP,
-	[0x0f] = KEY_SCREEN,		/* full screen */
-	[0x14] = KEY_RADIO,		/* FM radio */
-	[0x25] = KEY_POWER,		/* power */
-};
-EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel);
+static struct ir_scancode ir_codes_powercolor_real_angel[] = {
+	{ 0x38, KEY_SWITCHVIDEOMODE },	/* switch inputs */
+	{ 0x0c, KEY_MEDIA },		/* Turn ON/OFF App */
+	{ 0x00, KEY_0 },
+	{ 0x01, KEY_1 },
+	{ 0x02, KEY_2 },
+	{ 0x03, KEY_3 },
+	{ 0x04, KEY_4 },
+	{ 0x05, KEY_5 },
+	{ 0x06, KEY_6 },
+	{ 0x07, KEY_7 },
+	{ 0x08, KEY_8 },
+	{ 0x09, KEY_9 },
+	{ 0x0a, KEY_DIGITS },		/* single, double, tripple digit */
+	{ 0x29, KEY_PREVIOUS },		/* previous channel */
+	{ 0x12, KEY_BRIGHTNESSUP },
+	{ 0x13, KEY_BRIGHTNESSDOWN },
+	{ 0x2b, KEY_MODE },		/* stereo/mono */
+	{ 0x2c, KEY_TEXT },		/* teletext */
+	{ 0x20, KEY_CHANNELUP },	/* channel up */
+	{ 0x21, KEY_CHANNELDOWN },	/* channel down */
+	{ 0x10, KEY_VOLUMEUP },		/* volume up */
+	{ 0x11, KEY_VOLUMEDOWN },	/* volume down */
+	{ 0x0d, KEY_MUTE },
+	{ 0x1f, KEY_RECORD },
+	{ 0x17, KEY_PLAY },
+	{ 0x16, KEY_PAUSE },
+	{ 0x0b, KEY_STOP },
+	{ 0x27, KEY_FASTFORWARD },
+	{ 0x26, KEY_REWIND },
+	{ 0x1e, KEY_SEARCH },		/* autoscan */
+	{ 0x0e, KEY_CAMERA },		/* snapshot */
+	{ 0x2d, KEY_SETUP },
+	{ 0x0f, KEY_SCREEN },		/* full screen */
+	{ 0x14, KEY_RADIO },		/* FM radio */
+	{ 0x25, KEY_POWER },		/* power */
+};
+
+struct ir_scancode_table ir_codes_powercolor_real_angel_table = {
+	.scan = ir_codes_powercolor_real_angel,
+	.size = ARRAY_SIZE(ir_codes_powercolor_real_angel),
+};
+EXPORT_SYMBOL_GPL(ir_codes_powercolor_real_angel_table);
 
 /* Kworld Plus TV Analog Lite PCI IR
    Mauro Carvalho Chehab <mchehab@infradead.org>
  */
-IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = {
-	[0x0c] = KEY_PROG1,		/* Kworld key */
-	[0x16] = KEY_CLOSECD,		/* -> ) */
-	[0x1d] = KEY_POWER2,
-
-	[0x00] = KEY_1,
-	[0x01] = KEY_2,
-	[0x02] = KEY_3,			/* Two keys have the same code: 3 and left */
-	[0x03] = KEY_4,			/* Two keys have the same code: 3 and right */
-	[0x04] = KEY_5,
-	[0x05] = KEY_6,
-	[0x06] = KEY_7,
-	[0x07] = KEY_8,
-	[0x08] = KEY_9,
-	[0x0a] = KEY_0,
-
-	[0x09] = KEY_AGAIN,
-	[0x14] = KEY_MUTE,
-
-	[0x20] = KEY_UP,
-	[0x21] = KEY_DOWN,
-	[0x0b] = KEY_ENTER,
-
-	[0x10] = KEY_CHANNELUP,
-	[0x11] = KEY_CHANNELDOWN,
+static struct ir_scancode ir_codes_kworld_plus_tv_analog[] = {
+	{ 0x0c, KEY_PROG1 },		/* Kworld key */
+	{ 0x16, KEY_CLOSECD },		/* -> ) */
+	{ 0x1d, KEY_POWER2 },
+
+	{ 0x00, KEY_1 },
+	{ 0x01, KEY_2 },
+	{ 0x02, KEY_3 },		/* Two keys have the same code: 3 and left */
+	{ 0x03, KEY_4 },		/* Two keys have the same code: 3 and right */
+	{ 0x04, KEY_5 },
+	{ 0x05, KEY_6 },
+	{ 0x06, KEY_7 },
+	{ 0x07, KEY_8 },
+	{ 0x08, KEY_9 },
+	{ 0x0a, KEY_0 },
+
+	{ 0x09, KEY_AGAIN },
+	{ 0x14, KEY_MUTE },
+
+	{ 0x20, KEY_UP },
+	{ 0x21, KEY_DOWN },
+	{ 0x0b, KEY_ENTER },
+
+	{ 0x10, KEY_CHANNELUP },
+	{ 0x11, KEY_CHANNELDOWN },
 
 	/* Couldn't map key left/key right since those
 	   conflict with '3' and '4' scancodes
 	   I dunno what the original driver does
 	 */
 
-	[0x13] = KEY_VOLUMEUP,
-	[0x12] = KEY_VOLUMEDOWN,
+	{ 0x13, KEY_VOLUMEUP },
+	{ 0x12, KEY_VOLUMEDOWN },
 
 	/* The lower part of the IR
 	   There are several duplicated keycodes there.
@@ -2447,426 +2682,468 @@ IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE] = {
 	   Also, it is not related to the time between keyup
 	   and keydown.
 	 */
-	[0x19] = KEY_TIME,		/* Timeshift */
-	[0x1a] = KEY_STOP,
-	[0x1b] = KEY_RECORD,
+	{ 0x19, KEY_TIME},		/* Timeshift */
+	{ 0x1a, KEY_STOP},
+	{ 0x1b, KEY_RECORD},
 
-	[0x22] = KEY_TEXT,
+	{ 0x22, KEY_TEXT},
 
-	[0x15] = KEY_AUDIO,		/* ((*)) */
-	[0x0f] = KEY_ZOOM,
-	[0x1c] = KEY_CAMERA,		/* snapshot */
+	{ 0x15, KEY_AUDIO},		/* ((*)) */
+	{ 0x0f, KEY_ZOOM},
+	{ 0x1c, KEY_CAMERA},		/* snapshot */
 
-	[0x18] = KEY_RED,		/* B */
-	[0x23] = KEY_GREEN,		/* C */
+	{ 0x18, KEY_RED},		/* B */
+	{ 0x23, KEY_GREEN},		/* C */
+};
+struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table = {
+	.scan = ir_codes_kworld_plus_tv_analog,
+	.size = ARRAY_SIZE(ir_codes_kworld_plus_tv_analog),
 };
-EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog);
+EXPORT_SYMBOL_GPL(ir_codes_kworld_plus_tv_analog_table);
 
 /* Kaiomy TVnPC U2
    Mauro Carvalho Chehab <mchehab@infradead.org>
  */
-IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE] = {
-	[0x43] = KEY_POWER2,
-	[0x01] = KEY_LIST,
-	[0x0b] = KEY_ZOOM,
-	[0x03] = KEY_POWER,
-
-	[0x04] = KEY_1,
-	[0x08] = KEY_2,
-	[0x02] = KEY_3,
-
-	[0x0f] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-
-	[0x0c] = KEY_7,
-	[0x0d] = KEY_8,
-	[0x0a] = KEY_9,
-
-	[0x11] = KEY_0,
-
-	[0x09] = KEY_CHANNELUP,
-	[0x07] = KEY_CHANNELDOWN,
-
-	[0x0e] = KEY_VOLUMEUP,
-	[0x13] = KEY_VOLUMEDOWN,
-
-	[0x10] = KEY_HOME,
-	[0x12] = KEY_ENTER,
-
-	[0x14] = KEY_RECORD,
-	[0x15] = KEY_STOP,
-	[0x16] = KEY_PLAY,
-	[0x17] = KEY_MUTE,
-
-	[0x18] = KEY_UP,
-	[0x19] = KEY_DOWN,
-	[0x1a] = KEY_LEFT,
-	[0x1b] = KEY_RIGHT,
-
-	[0x1c] = KEY_RED,
-	[0x1d] = KEY_GREEN,
-	[0x1e] = KEY_YELLOW,
-	[0x1f] = KEY_BLUE,
-};
-EXPORT_SYMBOL_GPL(ir_codes_kaiomy);
-
-IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE] = {
-	[0x20] = KEY_LIST,
-	[0x00] = KEY_POWER,
-	[0x28] = KEY_1,
-	[0x18] = KEY_2,
-	[0x38] = KEY_3,
-	[0x24] = KEY_4,
-	[0x14] = KEY_5,
-	[0x34] = KEY_6,
-	[0x2c] = KEY_7,
-	[0x1c] = KEY_8,
-	[0x3c] = KEY_9,
-	[0x12] = KEY_SUBTITLE,
-	[0x22] = KEY_0,
-	[0x32] = KEY_REWIND,
-	[0x3a] = KEY_SHUFFLE,
-	[0x02] = KEY_PRINT,
-	[0x11] = KEY_CHANNELDOWN,
-	[0x31] = KEY_CHANNELUP,
-	[0x0c] = KEY_ZOOM,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x3e] = KEY_VOLUMEUP,
-	[0x0a] = KEY_MUTE,
-	[0x04] = KEY_AUDIO,
-	[0x26] = KEY_RECORD,
-	[0x06] = KEY_PLAY,
-	[0x36] = KEY_STOP,
-	[0x16] = KEY_PAUSE,
-	[0x2e] = KEY_REWIND,
-	[0x0e] = KEY_FASTFORWARD,
-	[0x30] = KEY_TEXT,
-	[0x21] = KEY_GREEN,
-	[0x01] = KEY_BLUE,
-	[0x08] = KEY_EPG,
-	[0x2a] = KEY_MENU,
-};
-EXPORT_SYMBOL_GPL(ir_codes_avermedia_a16d);
+static struct ir_scancode ir_codes_kaiomy[] = {
+	{ 0x43, KEY_POWER2},
+	{ 0x01, KEY_LIST},
+	{ 0x0b, KEY_ZOOM},
+	{ 0x03, KEY_POWER},
 
-/* Encore ENLTV-FM v5.3
-   Mauro Carvalho Chehab <mchehab@infradead.org>
- */
-IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE] = {
-	[0x10] = KEY_POWER2,
-	[0x06] = KEY_MUTE,
-
-	[0x09] = KEY_1,
-	[0x1d] = KEY_2,
-	[0x1f] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1b] = KEY_5,
-	[0x11] = KEY_6,
-	[0x17] = KEY_7,
-	[0x12] = KEY_8,
-	[0x16] = KEY_9,
-	[0x48] = KEY_0,
-
-	[0x04] = KEY_LIST,		/* -/-- */
-	[0x40] = KEY_LAST,		/* recall */
-
-	[0x02] = KEY_MODE,		/* TV/AV */
-	[0x05] = KEY_CAMERA,		/* SNAPSHOT */
-
-	[0x4c] = KEY_CHANNELUP,		/* UP */
-	[0x00] = KEY_CHANNELDOWN,	/* DOWN */
-	[0x0d] = KEY_VOLUMEUP,		/* RIGHT */
-	[0x15] = KEY_VOLUMEDOWN,	/* LEFT */
-	[0x49] = KEY_ENTER,		/* OK */
-
-	[0x54] = KEY_RECORD,
-	[0x4d] = KEY_PLAY,		/* pause */
-
-	[0x1e] = KEY_MENU,		/* video setting */
-	[0x0e] = KEY_RIGHT,		/* <- */
-	[0x1a] = KEY_LEFT,		/* -> */
-
-	[0x0a] = KEY_CLEAR,		/* video default */
-	[0x0c] = KEY_ZOOM,		/* hide pannel */
-	[0x47] = KEY_SLEEP,		/* shutdown */
-};
-EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_fm53);
+	{ 0x04, KEY_1},
+	{ 0x08, KEY_2},
+	{ 0x02, KEY_3},
 
-/* Zogis Real Audio 220 - 32 keys IR */
-IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE] = {
-	[0x1c] = KEY_RADIO,
-	[0x12] = KEY_POWER2,
+	{ 0x0f, KEY_4},
+	{ 0x05, KEY_5},
+	{ 0x06, KEY_6},
+
+	{ 0x0c, KEY_7},
+	{ 0x0d, KEY_8},
+	{ 0x0a, KEY_9},
+
+	{ 0x11, KEY_0},
+
+	{ 0x09, KEY_CHANNELUP},
+	{ 0x07, KEY_CHANNELDOWN},
 
-	[0x01] = KEY_1,
-	[0x02] = KEY_2,
-	[0x03] = KEY_3,
-	[0x04] = KEY_4,
-	[0x05] = KEY_5,
-	[0x06] = KEY_6,
-	[0x07] = KEY_7,
-	[0x08] = KEY_8,
-	[0x09] = KEY_9,
-	[0x00] = KEY_0,
+	{ 0x0e, KEY_VOLUMEUP},
+	{ 0x13, KEY_VOLUMEDOWN},
 
-	[0x0c] = KEY_VOLUMEUP,
-	[0x18] = KEY_VOLUMEDOWN,
-	[0x0b] = KEY_CHANNELUP,
-	[0x15] = KEY_CHANNELDOWN,
-	[0x16] = KEY_ENTER,
+	{ 0x10, KEY_HOME},
+	{ 0x12, KEY_ENTER},
 
-	[0x11] = KEY_LIST,		/* Source */
-	[0x0d] = KEY_AUDIO,		/* stereo */
+	{ 0x14, KEY_RECORD},
+	{ 0x15, KEY_STOP},
+	{ 0x16, KEY_PLAY},
+	{ 0x17, KEY_MUTE},
 
-	[0x0f] = KEY_PREVIOUS,		/* Prev */
-	[0x1b] = KEY_TIME,		/* Timeshift */
-	[0x1a] = KEY_NEXT,		/* Next */
+	{ 0x18, KEY_UP},
+	{ 0x19, KEY_DOWN},
+	{ 0x1a, KEY_LEFT},
+	{ 0x1b, KEY_RIGHT},
+
+	{ 0x1c, KEY_RED},
+	{ 0x1d, KEY_GREEN},
+	{ 0x1e, KEY_YELLOW},
+	{ 0x1f, KEY_BLUE},
+};
+struct ir_scancode_table ir_codes_kaiomy_table = {
+	.scan = ir_codes_kaiomy,
+	.size = ARRAY_SIZE(ir_codes_kaiomy),
+};
+EXPORT_SYMBOL_GPL(ir_codes_kaiomy_table);
+
+static struct ir_scancode ir_codes_avermedia_a16d[] = {
+	{ 0x20, KEY_LIST},
+	{ 0x00, KEY_POWER},
+	{ 0x28, KEY_1},
+	{ 0x18, KEY_2},
+	{ 0x38, KEY_3},
+	{ 0x24, KEY_4},
+	{ 0x14, KEY_5},
+	{ 0x34, KEY_6},
+	{ 0x2c, KEY_7},
+	{ 0x1c, KEY_8},
+	{ 0x3c, KEY_9},
+	{ 0x12, KEY_SUBTITLE},
+	{ 0x22, KEY_0},
+	{ 0x32, KEY_REWIND},
+	{ 0x3a, KEY_SHUFFLE},
+	{ 0x02, KEY_PRINT},
+	{ 0x11, KEY_CHANNELDOWN},
+	{ 0x31, KEY_CHANNELUP},
+	{ 0x0c, KEY_ZOOM},
+	{ 0x1e, KEY_VOLUMEDOWN},
+	{ 0x3e, KEY_VOLUMEUP},
+	{ 0x0a, KEY_MUTE},
+	{ 0x04, KEY_AUDIO},
+	{ 0x26, KEY_RECORD},
+	{ 0x06, KEY_PLAY},
+	{ 0x36, KEY_STOP},
+	{ 0x16, KEY_PAUSE},
+	{ 0x2e, KEY_REWIND},
+	{ 0x0e, KEY_FASTFORWARD},
+	{ 0x30, KEY_TEXT},
+	{ 0x21, KEY_GREEN},
+	{ 0x01, KEY_BLUE},
+	{ 0x08, KEY_EPG},
+	{ 0x2a, KEY_MENU},
+};
+struct ir_scancode_table ir_codes_avermedia_a16d_table = {
+	.scan = ir_codes_avermedia_a16d,
+	.size = ARRAY_SIZE(ir_codes_avermedia_a16d),
+};
+EXPORT_SYMBOL_GPL(ir_codes_avermedia_a16d_table);
 
-	[0x0e] = KEY_STOP,
-	[0x1f] = KEY_PLAY,
-	[0x1e] = KEY_PLAYPAUSE,		/* Pause */
+/* Encore ENLTV-FM v5.3
+   Mauro Carvalho Chehab <mchehab@infradead.org>
+ */
+static struct ir_scancode ir_codes_encore_enltv_fm53[] = {
+	{ 0x10, KEY_POWER2},
+	{ 0x06, KEY_MUTE},
+
+	{ 0x09, KEY_1},
+	{ 0x1d, KEY_2},
+	{ 0x1f, KEY_3},
+	{ 0x19, KEY_4},
+	{ 0x1b, KEY_5},
+	{ 0x11, KEY_6},
+	{ 0x17, KEY_7},
+	{ 0x12, KEY_8},
+	{ 0x16, KEY_9},
+	{ 0x48, KEY_0},
+
+	{ 0x04, KEY_LIST},		/* -/-- */
+	{ 0x40, KEY_LAST},		/* recall */
+
+	{ 0x02, KEY_MODE},		/* TV/AV */
+	{ 0x05, KEY_CAMERA},		/* SNAPSHOT */
+
+	{ 0x4c, KEY_CHANNELUP},		/* UP */
+	{ 0x00, KEY_CHANNELDOWN},	/* DOWN */
+	{ 0x0d, KEY_VOLUMEUP},		/* RIGHT */
+	{ 0x15, KEY_VOLUMEDOWN},	/* LEFT */
+	{ 0x49, KEY_ENTER},		/* OK */
+
+	{ 0x54, KEY_RECORD},
+	{ 0x4d, KEY_PLAY},		/* pause */
+
+	{ 0x1e, KEY_MENU},		/* video setting */
+	{ 0x0e, KEY_RIGHT},		/* <- */
+	{ 0x1a, KEY_LEFT},		/* -> */
+
+	{ 0x0a, KEY_CLEAR},		/* video default */
+	{ 0x0c, KEY_ZOOM},		/* hide pannel */
+	{ 0x47, KEY_SLEEP},		/* shutdown */
+};
+struct ir_scancode_table ir_codes_encore_enltv_fm53_table = {
+	.scan = ir_codes_encore_enltv_fm53,
+	.size = ARRAY_SIZE(ir_codes_encore_enltv_fm53),
+};
+EXPORT_SYMBOL_GPL(ir_codes_encore_enltv_fm53_table);
 
-	[0x1d] = KEY_RECORD,
-	[0x13] = KEY_MUTE,
-	[0x19] = KEY_CAMERA,		/* Snapshot */
+/* Zogis Real Audio 220 - 32 keys IR */
+static struct ir_scancode ir_codes_real_audio_220_32_keys[] = {
+	{ 0x1c, KEY_RADIO},
+	{ 0x12, KEY_POWER2},
+
+	{ 0x01, KEY_1},
+	{ 0x02, KEY_2},
+	{ 0x03, KEY_3},
+	{ 0x04, KEY_4},
+	{ 0x05, KEY_5},
+	{ 0x06, KEY_6},
+	{ 0x07, KEY_7},
+	{ 0x08, KEY_8},
+	{ 0x09, KEY_9},
+	{ 0x00, KEY_0},
+
+	{ 0x0c, KEY_VOLUMEUP},
+	{ 0x18, KEY_VOLUMEDOWN},
+	{ 0x0b, KEY_CHANNELUP},
+	{ 0x15, KEY_CHANNELDOWN},
+	{ 0x16, KEY_ENTER},
+
+	{ 0x11, KEY_LIST},		/* Source */
+	{ 0x0d, KEY_AUDIO},		/* stereo */
+
+	{ 0x0f, KEY_PREVIOUS},		/* Prev */
+	{ 0x1b, KEY_TIME},		/* Timeshift */
+	{ 0x1a, KEY_NEXT},		/* Next */
+
+	{ 0x0e, KEY_STOP},
+	{ 0x1f, KEY_PLAY},
+	{ 0x1e, KEY_PLAYPAUSE},		/* Pause */
+
+	{ 0x1d, KEY_RECORD},
+	{ 0x13, KEY_MUTE},
+	{ 0x19, KEY_CAMERA},		/* Snapshot */
 
 };
-EXPORT_SYMBOL_GPL(ir_codes_real_audio_220_32_keys);
+struct ir_scancode_table ir_codes_real_audio_220_32_keys_table = {
+	.scan = ir_codes_real_audio_220_32_keys,
+	.size = ARRAY_SIZE(ir_codes_real_audio_220_32_keys),
+};
+EXPORT_SYMBOL_GPL(ir_codes_real_audio_220_32_keys_table);
 
 /* ATI TV Wonder HD 600 USB
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_RECORD,		/* Row 1 */
-	[0x01] = KEY_PLAYPAUSE,
-	[0x02] = KEY_STOP,
-	[0x03] = KEY_POWER,
-	[0x04] = KEY_PREVIOUS,	/* Row 2 */
-	[0x05] = KEY_REWIND,
-	[0x06] = KEY_FORWARD,
-	[0x07] = KEY_NEXT,
-	[0x08] = KEY_EPG,		/* Row 3 */
-	[0x09] = KEY_HOME,
-	[0x0a] = KEY_MENU,
-	[0x0b] = KEY_CHANNELUP,
-	[0x0c] = KEY_BACK,		/* Row 4 */
-	[0x0d] = KEY_UP,
-	[0x0e] = KEY_INFO,
-	[0x0f] = KEY_CHANNELDOWN,
-	[0x10] = KEY_LEFT,		/* Row 5 */
-	[0x11] = KEY_SELECT,
-	[0x12] = KEY_RIGHT,
-	[0x13] = KEY_VOLUMEUP,
-	[0x14] = KEY_LAST,		/* Row 6 */
-	[0x15] = KEY_DOWN,
-	[0x16] = KEY_MUTE,
-	[0x17] = KEY_VOLUMEDOWN,
-};
-EXPORT_SYMBOL_GPL(ir_codes_ati_tv_wonder_hd_600);
+static struct ir_scancode ir_codes_ati_tv_wonder_hd_600[] = {
+	{ 0x00, KEY_RECORD},		/* Row 1 */
+	{ 0x01, KEY_PLAYPAUSE},
+	{ 0x02, KEY_STOP},
+	{ 0x03, KEY_POWER},
+	{ 0x04, KEY_PREVIOUS},	/* Row 2 */
+	{ 0x05, KEY_REWIND},
+	{ 0x06, KEY_FORWARD},
+	{ 0x07, KEY_NEXT},
+	{ 0x08, KEY_EPG},		/* Row 3 */
+	{ 0x09, KEY_HOME},
+	{ 0x0a, KEY_MENU},
+	{ 0x0b, KEY_CHANNELUP},
+	{ 0x0c, KEY_BACK},		/* Row 4 */
+	{ 0x0d, KEY_UP},
+	{ 0x0e, KEY_INFO},
+	{ 0x0f, KEY_CHANNELDOWN},
+	{ 0x10, KEY_LEFT},		/* Row 5 */
+	{ 0x11, KEY_SELECT},
+	{ 0x12, KEY_RIGHT},
+	{ 0x13, KEY_VOLUMEUP},
+	{ 0x14, KEY_LAST},		/* Row 6 */
+	{ 0x15, KEY_DOWN},
+	{ 0x16, KEY_MUTE},
+	{ 0x17, KEY_VOLUMEDOWN},
+};
+struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table = {
+	.scan = ir_codes_ati_tv_wonder_hd_600,
+	.size = ARRAY_SIZE(ir_codes_ati_tv_wonder_hd_600),
+};
+EXPORT_SYMBOL_GPL(ir_codes_ati_tv_wonder_hd_600_table);
 
 /* DVBWorld remotes
    Igor M. Liplianin <liplianin@me.by>
  */
-IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE] = {
-	[0x0a] = KEY_POWER2,		/* power */
-	[0x0c] = KEY_MUTE,		/* mute */
-	[0x11] = KEY_1,
-	[0x12] = KEY_2,
-	[0x13] = KEY_3,
-	[0x14] = KEY_4,
-	[0x15] = KEY_5,
-	[0x16] = KEY_6,
-	[0x17] = KEY_7,
-	[0x18] = KEY_8,
-	[0x19] = KEY_9,
-	[0x10] = KEY_0,
-	[0x1c] = KEY_CHANNELUP,		/* ch+ */
-	[0x0f] = KEY_CHANNELDOWN,	/* ch- */
-	[0x1a] = KEY_VOLUMEUP,		/* vol+ */
-	[0x0e] = KEY_VOLUMEDOWN,	/* vol- */
-	[0x04] = KEY_RECORD,		/* rec */
-	[0x09] = KEY_CHANNEL,		/* fav */
-	[0x08] = KEY_BACKSPACE,		/* rewind */
-	[0x07] = KEY_FASTFORWARD,	/* fast */
-	[0x0b] = KEY_PAUSE,		/* pause */
-	[0x02] = KEY_ESC,		/* cancel */
-	[0x03] = KEY_TAB,		/* tab */
-	[0x00] = KEY_UP,		/* up */
-	[0x1f] = KEY_ENTER,		/* ok */
-	[0x01] = KEY_DOWN,		/* down */
-	[0x05] = KEY_RECORD,		/* cap */
-	[0x06] = KEY_STOP,		/* stop */
-	[0x40] = KEY_ZOOM,		/* full */
-	[0x1e] = KEY_TV,		/* tvmode */
-	[0x1b] = KEY_B,			/* recall */
-};
-EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec);
-
+static struct ir_scancode ir_codes_dm1105_nec[] = {
+	{ 0x0a, KEY_POWER2},		/* power */
+	{ 0x0c, KEY_MUTE},		/* mute */
+	{ 0x11, KEY_1},
+	{ 0x12, KEY_2},
+	{ 0x13, KEY_3},
+	{ 0x14, KEY_4},
+	{ 0x15, KEY_5},
+	{ 0x16, KEY_6},
+	{ 0x17, KEY_7},
+	{ 0x18, KEY_8},
+	{ 0x19, KEY_9},
+	{ 0x10, KEY_0},
+	{ 0x1c, KEY_CHANNELUP},		/* ch+ */
+	{ 0x0f, KEY_CHANNELDOWN},	/* ch- */
+	{ 0x1a, KEY_VOLUMEUP},		/* vol+ */
+	{ 0x0e, KEY_VOLUMEDOWN},	/* vol- */
+	{ 0x04, KEY_RECORD},		/* rec */
+	{ 0x09, KEY_CHANNEL},		/* fav */
+	{ 0x08, KEY_BACKSPACE},		/* rewind */
+	{ 0x07, KEY_FASTFORWARD},	/* fast */
+	{ 0x0b, KEY_PAUSE},		/* pause */
+	{ 0x02, KEY_ESC},		/* cancel */
+	{ 0x03, KEY_TAB},		/* tab */
+	{ 0x00, KEY_UP},		/* up */
+	{ 0x1f, KEY_ENTER},		/* ok */
+	{ 0x01, KEY_DOWN},		/* down */
+	{ 0x05, KEY_RECORD},		/* cap */
+	{ 0x06, KEY_STOP},		/* stop */
+	{ 0x40, KEY_ZOOM},		/* full */
+	{ 0x1e, KEY_TV},		/* tvmode */
+	{ 0x1b, KEY_B},			/* recall */
+};
+struct ir_scancode_table ir_codes_dm1105_nec_table = {
+	.scan = ir_codes_dm1105_nec,
+	.size = ARRAY_SIZE(ir_codes_dm1105_nec),
+};
+EXPORT_SYMBOL_GPL(ir_codes_dm1105_nec_table);
 
 /* Terratec Cinergy Hybrid T USB XS
    Devin Heitmueller <dheitmueller@linuxtv.org>
  */
-IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE] = {
-	[0x41] = KEY_HOME,
-	[0x01] = KEY_POWER,
-	[0x42] = KEY_MENU,
-	[0x02] = KEY_1,
-	[0x03] = KEY_2,
-	[0x04] = KEY_3,
-	[0x43] = KEY_SUBTITLE,
-	[0x05] = KEY_4,
-	[0x06] = KEY_5,
-	[0x07] = KEY_6,
-	[0x44] = KEY_TEXT,
-	[0x08] = KEY_7,
-	[0x09] = KEY_8,
-	[0x0a] = KEY_9,
-	[0x45] = KEY_DELETE,
-	[0x0b] = KEY_TUNER,
-	[0x0c] = KEY_0,
-	[0x0d] = KEY_MODE,
-	[0x46] = KEY_TV,
-	[0x47] = KEY_DVD,
-	[0x49] = KEY_VIDEO,
-	[0x4b] = KEY_AUX,
-	[0x10] = KEY_UP,
-	[0x11] = KEY_LEFT,
-	[0x12] = KEY_OK,
-	[0x13] = KEY_RIGHT,
-	[0x14] = KEY_DOWN,
-	[0x0f] = KEY_EPG,
-	[0x16] = KEY_INFO,
-	[0x4d] = KEY_BACKSPACE,
-	[0x1c] = KEY_VOLUMEUP,
-	[0x4c] = KEY_PLAY,
-	[0x1b] = KEY_CHANNELUP,
-	[0x1e] = KEY_VOLUMEDOWN,
-	[0x1d] = KEY_MUTE,
-	[0x1f] = KEY_CHANNELDOWN,
-	[0x17] = KEY_RED,
-	[0x18] = KEY_GREEN,
-	[0x19] = KEY_YELLOW,
-	[0x1a] = KEY_BLUE,
-	[0x58] = KEY_RECORD,
-	[0x48] = KEY_STOP,
-	[0x40] = KEY_PAUSE,
-	[0x54] = KEY_LAST,
-	[0x4e] = KEY_REWIND,
-	[0x4f] = KEY_FASTFORWARD,
-	[0x5c] = KEY_NEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs);
+static struct ir_scancode ir_codes_terratec_cinergy_xs[] = {
+	{ 0x41, KEY_HOME},
+	{ 0x01, KEY_POWER},
+	{ 0x42, KEY_MENU},
+	{ 0x02, KEY_1},
+	{ 0x03, KEY_2},
+	{ 0x04, KEY_3},
+	{ 0x43, KEY_SUBTITLE},
+	{ 0x05, KEY_4},
+	{ 0x06, KEY_5},
+	{ 0x07, KEY_6},
+	{ 0x44, KEY_TEXT},
+	{ 0x08, KEY_7},
+	{ 0x09, KEY_8},
+	{ 0x0a, KEY_9},
+	{ 0x45, KEY_DELETE},
+	{ 0x0b, KEY_TUNER},
+	{ 0x0c, KEY_0},
+	{ 0x0d, KEY_MODE},
+	{ 0x46, KEY_TV},
+	{ 0x47, KEY_DVD},
+	{ 0x49, KEY_VIDEO},
+	{ 0x4b, KEY_AUX},
+	{ 0x10, KEY_UP},
+	{ 0x11, KEY_LEFT},
+	{ 0x12, KEY_OK},
+	{ 0x13, KEY_RIGHT},
+	{ 0x14, KEY_DOWN},
+	{ 0x0f, KEY_EPG},
+	{ 0x16, KEY_INFO},
+	{ 0x4d, KEY_BACKSPACE},
+	{ 0x1c, KEY_VOLUMEUP},
+	{ 0x4c, KEY_PLAY},
+	{ 0x1b, KEY_CHANNELUP},
+	{ 0x1e, KEY_VOLUMEDOWN},
+	{ 0x1d, KEY_MUTE},
+	{ 0x1f, KEY_CHANNELDOWN},
+	{ 0x17, KEY_RED},
+	{ 0x18, KEY_GREEN},
+	{ 0x19, KEY_YELLOW},
+	{ 0x1a, KEY_BLUE},
+	{ 0x58, KEY_RECORD},
+	{ 0x48, KEY_STOP},
+	{ 0x40, KEY_PAUSE},
+	{ 0x54, KEY_LAST},
+	{ 0x4e, KEY_REWIND},
+	{ 0x4f, KEY_FASTFORWARD},
+	{ 0x5c, KEY_NEXT},
+};
+struct ir_scancode_table ir_codes_terratec_cinergy_xs_table = {
+	.scan = ir_codes_terratec_cinergy_xs,
+	.size = ARRAY_SIZE(ir_codes_terratec_cinergy_xs),
+};
+EXPORT_SYMBOL_GPL(ir_codes_terratec_cinergy_xs_table);
 
 /* EVGA inDtube
    Devin Heitmueller <devin.heitmueller@gmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE] = {
-	[0x12] = KEY_POWER,
-	[0x02] = KEY_MODE,	/* TV */
-	[0x14] = KEY_MUTE,
-	[0x1a] = KEY_CHANNELUP,
-	[0x16] = KEY_TV2,	/* PIP */
-	[0x1d] = KEY_VOLUMEUP,
-	[0x05] = KEY_CHANNELDOWN,
-	[0x0f] = KEY_PLAYPAUSE,
-	[0x19] = KEY_VOLUMEDOWN,
-	[0x1c] = KEY_REWIND,
-	[0x0d] = KEY_RECORD,
-	[0x18] = KEY_FORWARD,
-	[0x1e] = KEY_PREVIOUS,
-	[0x1b] = KEY_STOP,
-	[0x1f] = KEY_NEXT,
-	[0x13] = KEY_CAMERA,
-};
-EXPORT_SYMBOL_GPL(ir_codes_evga_indtube);
-
-IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE] = {
-	[0x00] = KEY_TV,
-	[0x01] = KEY_DVD,
-	[0x04] = KEY_RECORD,
-	[0x05] = KEY_VIDEO,	/* TV/Video */
-	[0x07] = KEY_STOP,
-	[0x08] = KEY_PLAYPAUSE,
-	[0x0a] = KEY_REWIND,
-	[0x0f] = KEY_FASTFORWARD,
-	[0x10] = KEY_CHANNELUP,
-	[0x12] = KEY_VOLUMEUP,
-	[0x13] = KEY_CHANNELDOWN,
-	[0x14] = KEY_MUTE,
-	[0x15] = KEY_VOLUMEDOWN,
-	[0x16] = KEY_1,
-	[0x17] = KEY_2,
-	[0x18] = KEY_3,
-	[0x19] = KEY_4,
-	[0x1a] = KEY_5,
-	[0x1b] = KEY_6,
-	[0x1c] = KEY_7,
-	[0x1d] = KEY_8,
-	[0x1e] = KEY_9,
-	[0x1f] = KEY_0,
-	[0x21] = KEY_SLEEP,
-	[0x24] = KEY_ZOOM,
-	[0x25] = KEY_LAST,	/* Recall */
-	[0x26] = KEY_SUBTITLE,	/* CC */
-	[0x27] = KEY_LANGUAGE,	/* MTS */
-	[0x29] = KEY_CHANNEL,	/* SURF */
-	[0x2b] = KEY_A,
-	[0x2c] = KEY_B,
-	[0x2f] = KEY_CAMERA,	/* Snapshot */
-	[0x23] = KEY_RADIO,
-	[0x02] = KEY_PREVIOUSSONG,
-	[0x06] = KEY_NEXTSONG,
-	[0x03] = KEY_EPG,
-	[0x09] = KEY_SETUP,
-	[0x22] = KEY_BACKSPACE,
-	[0x0c] = KEY_UP,
-	[0x0e] = KEY_DOWN,
-	[0x0b] = KEY_LEFT,
-	[0x0d] = KEY_RIGHT,
-	[0x11] = KEY_ENTER,
-	[0x20] = KEY_TEXT,
-};
-EXPORT_SYMBOL_GPL(ir_codes_videomate_s350);
-
+static struct ir_scancode ir_codes_evga_indtube[] = {
+	{ 0x12, KEY_POWER},
+	{ 0x02, KEY_MODE},	/* TV */
+	{ 0x14, KEY_MUTE},
+	{ 0x1a, KEY_CHANNELUP},
+	{ 0x16, KEY_TV2},	/* PIP */
+	{ 0x1d, KEY_VOLUMEUP},
+	{ 0x05, KEY_CHANNELDOWN},
+	{ 0x0f, KEY_PLAYPAUSE},
+	{ 0x19, KEY_VOLUMEDOWN},
+	{ 0x1c, KEY_REWIND},
+	{ 0x0d, KEY_RECORD},
+	{ 0x18, KEY_FORWARD},
+	{ 0x1e, KEY_PREVIOUS},
+	{ 0x1b, KEY_STOP},
+	{ 0x1f, KEY_NEXT},
+	{ 0x13, KEY_CAMERA},
+};
+struct ir_scancode_table ir_codes_evga_indtube_table = {
+	.scan = ir_codes_evga_indtube,
+	.size = ARRAY_SIZE(ir_codes_evga_indtube),
+};
+EXPORT_SYMBOL_GPL(ir_codes_evga_indtube_table);
+
+static struct ir_scancode ir_codes_videomate_s350[] = {
+	{ 0x00, KEY_TV},
+	{ 0x01, KEY_DVD},
+	{ 0x04, KEY_RECORD},
+	{ 0x05, KEY_VIDEO},	/* TV/Video */
+	{ 0x07, KEY_STOP},
+	{ 0x08, KEY_PLAYPAUSE},
+	{ 0x0a, KEY_REWIND},
+	{ 0x0f, KEY_FASTFORWARD},
+	{ 0x10, KEY_CHANNELUP},
+	{ 0x12, KEY_VOLUMEUP},
+	{ 0x13, KEY_CHANNELDOWN},
+	{ 0x14, KEY_MUTE},
+	{ 0x15, KEY_VOLUMEDOWN},
+	{ 0x16, KEY_1},
+	{ 0x17, KEY_2},
+	{ 0x18, KEY_3},
+	{ 0x19, KEY_4},
+	{ 0x1a, KEY_5},
+	{ 0x1b, KEY_6},
+	{ 0x1c, KEY_7},
+	{ 0x1d, KEY_8},
+	{ 0x1e, KEY_9},
+	{ 0x1f, KEY_0},
+	{ 0x21, KEY_SLEEP},
+	{ 0x24, KEY_ZOOM},
+	{ 0x25, KEY_LAST},	/* Recall */
+	{ 0x26, KEY_SUBTITLE},	/* CC */
+	{ 0x27, KEY_LANGUAGE},	/* MTS */
+	{ 0x29, KEY_CHANNEL},	/* SURF */
+	{ 0x2b, KEY_A},
+	{ 0x2c, KEY_B},
+	{ 0x2f, KEY_CAMERA},	/* Snapshot */
+	{ 0x23, KEY_RADIO},
+	{ 0x02, KEY_PREVIOUSSONG},
+	{ 0x06, KEY_NEXTSONG},
+	{ 0x03, KEY_EPG},
+	{ 0x09, KEY_SETUP},
+	{ 0x22, KEY_BACKSPACE},
+	{ 0x0c, KEY_UP},
+	{ 0x0e, KEY_DOWN},
+	{ 0x0b, KEY_LEFT},
+	{ 0x0d, KEY_RIGHT},
+	{ 0x11, KEY_ENTER},
+	{ 0x20, KEY_TEXT},
+};
+struct ir_scancode_table ir_codes_videomate_s350_table = {
+	.scan = ir_codes_videomate_s350,
+	.size = ARRAY_SIZE(ir_codes_videomate_s350),
+};
+EXPORT_SYMBOL_GPL(ir_codes_videomate_s350_table);
 
 /* GADMEI UTV330+ RM008Z remote
    Shine Liu <shinel@foxmail.com>
  */
-IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE] = {
-	[0x14] = KEY_POWER2,		/* POWER OFF */
-	[0x0c] = KEY_MUTE,		/* MUTE */
-
-	[0x18] = KEY_TV,		/* TV */
-	[0x0e] = KEY_VIDEO,		/* AV */
-	[0x0b] = KEY_AUDIO,		/* SV */
-	[0x0f] = KEY_RADIO,		/* FM */
-
-	[0x00] = KEY_1,
-	[0x01] = KEY_2,
-	[0x02] = KEY_3,
-	[0x03] = KEY_4,
-	[0x04] = KEY_5,
-	[0x05] = KEY_6,
-	[0x06] = KEY_7,
-	[0x07] = KEY_8,
-	[0x08] = KEY_9,
-	[0x09] = KEY_0,
-	[0x0a] = KEY_INFO,		/* OSD */
-	[0x1c] = KEY_BACKSPACE,		/* LAST */
-
-	[0x0d] = KEY_PLAY,		/* PLAY */
-	[0x1e] = KEY_CAMERA,		/* SNAPSHOT */
-	[0x1a] = KEY_RECORD,		/* RECORD */
-	[0x17] = KEY_STOP,		/* STOP */
-
-	[0x1f] = KEY_UP,		/* UP */
-	[0x44] = KEY_DOWN,		/* DOWN */
-	[0x46] = KEY_TAB,		/* BACK */
-	[0x4a] = KEY_ZOOM,		/* FULLSECREEN */
-
-	[0x10] = KEY_VOLUMEUP,		/* VOLUMEUP */
-	[0x11] = KEY_VOLUMEDOWN,	/* VOLUMEDOWN */
-	[0x12] = KEY_CHANNELUP,		/* CHANNELUP */
-	[0x13] = KEY_CHANNELDOWN,	/* CHANNELDOWN */
-	[0x15] = KEY_ENTER,		/* OK */
-};
-EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z);
+static struct ir_scancode ir_codes_gadmei_rm008z[] = {
+	{ 0x14, KEY_POWER2},		/* POWER OFF */
+	{ 0x0c, KEY_MUTE},		/* MUTE */
+
+	{ 0x18, KEY_TV},		/* TV */
+	{ 0x0e, KEY_VIDEO},		/* AV */
+	{ 0x0b, KEY_AUDIO},		/* SV */
+	{ 0x0f, KEY_RADIO},		/* FM */
+
+	{ 0x00, KEY_1},
+	{ 0x01, KEY_2},
+	{ 0x02, KEY_3},
+	{ 0x03, KEY_4},
+	{ 0x04, KEY_5},
+	{ 0x05, KEY_6},
+	{ 0x06, KEY_7},
+	{ 0x07, KEY_8},
+	{ 0x08, KEY_9},
+	{ 0x09, KEY_0},
+	{ 0x0a, KEY_INFO},		/* OSD */
+	{ 0x1c, KEY_BACKSPACE},		/* LAST */
+
+	{ 0x0d, KEY_PLAY},		/* PLAY */
+	{ 0x1e, KEY_CAMERA},		/* SNAPSHOT */
+	{ 0x1a, KEY_RECORD},		/* RECORD */
+	{ 0x17, KEY_STOP},		/* STOP */
+
+	{ 0x1f, KEY_UP},		/* UP */
+	{ 0x44, KEY_DOWN},		/* DOWN */
+	{ 0x46, KEY_TAB},		/* BACK */
+	{ 0x4a, KEY_ZOOM},		/* FULLSECREEN */
+
+	{ 0x10, KEY_VOLUMEUP},		/* VOLUMEUP */
+	{ 0x11, KEY_VOLUMEDOWN},	/* VOLUMEDOWN */
+	{ 0x12, KEY_CHANNELUP},		/* CHANNELUP */
+	{ 0x13, KEY_CHANNELDOWN},	/* CHANNELDOWN */
+	{ 0x15, KEY_ENTER},		/* OK */
+};
+struct ir_scancode_table ir_codes_gadmei_rm008z_table = {
+	.scan = ir_codes_gadmei_rm008z,
+	.size = ARRAY_SIZE(ir_codes_gadmei_rm008z),
+};
+EXPORT_SYMBOL_GPL(ir_codes_gadmei_rm008z_table);
diff --git a/drivers/media/dvb/dm1105/dm1105.c b/drivers/media/dvb/dm1105/dm1105.c
index c662fa65a339..2d099e271751 100644
--- a/drivers/media/dvb/dm1105/dm1105.c
+++ b/drivers/media/dvb/dm1105/dm1105.c
@@ -577,7 +577,7 @@ static irqreturn_t dm1105dvb_irq(int irq, void *dev_id)
 int __devinit dm1105_ir_init(struct dm1105dvb *dm1105)
 {
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = ir_codes_dm1105_nec;
+	struct ir_scancode_table *ir_codes = &ir_codes_dm1105_nec_table;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
 
diff --git a/drivers/media/dvb/ttpci/budget-ci.c b/drivers/media/dvb/ttpci/budget-ci.c
index 371a71616810..b5c681372b6c 100644
--- a/drivers/media/dvb/ttpci/budget-ci.c
+++ b/drivers/media/dvb/ttpci/budget-ci.c
@@ -225,7 +225,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	case 0x1012:
 		/* The hauppauge keymap is a superset of these remotes */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_hauppauge_new);
+			      IR_TYPE_RC5, &ir_codes_hauppauge_new_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = 0x1f;
@@ -237,7 +237,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	case 0x101a:
 		/* for the Technotrend 1500 bundled remote */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_tt_1500);
+			      IR_TYPE_RC5, &ir_codes_tt_1500_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = IR_DEVICE_ANY;
@@ -247,7 +247,7 @@ static int msp430_ir_init(struct budget_ci *budget_ci)
 	default:
 		/* unknown remote */
 		ir_input_init(input_dev, &budget_ci->ir.state,
-			      IR_TYPE_RC5, ir_codes_budget_ci_old);
+			      IR_TYPE_RC5, &ir_codes_budget_ci_old_table);
 
 		if (rc5_device < 0)
 			budget_ci->ir.rc5_device = IR_DEVICE_ANY;
diff --git a/drivers/media/video/bt8xx/bttv-input.c b/drivers/media/video/bt8xx/bttv-input.c
index 3e7b48e73b01..ebd51afe8761 100644
--- a/drivers/media/video/bt8xx/bttv-input.c
+++ b/drivers/media/video/bt8xx/bttv-input.c
@@ -245,7 +245,7 @@ static void bttv_ir_stop(struct bttv *btv)
 int bttv_input_init(struct bttv *btv)
 {
 	struct card_ir *ir;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	struct input_dev *input_dev;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
@@ -263,7 +263,7 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_AVERMEDIA:
 	case BTTV_BOARD_AVPHONE98:
 	case BTTV_BOARD_AVERMEDIA98:
-		ir_codes         = ir_codes_avermedia;
+		ir_codes         = &ir_codes_avermedia_table;
 		ir->mask_keycode = 0xf88000;
 		ir->mask_keydown = 0x010000;
 		ir->polling      = 50; // ms
@@ -271,14 +271,14 @@ int bttv_input_init(struct bttv *btv)
 
 	case BTTV_BOARD_AVDVBT_761:
 	case BTTV_BOARD_AVDVBT_771:
-		ir_codes         = ir_codes_avermedia_dvbt;
+		ir_codes         = &ir_codes_avermedia_dvbt_table;
 		ir->mask_keycode = 0x0f00c0;
 		ir->mask_keydown = 0x000020;
 		ir->polling      = 50; // ms
 		break;
 
 	case BTTV_BOARD_PXELVWPLTVPAK:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x003e00;
 		ir->mask_keyup   = 0x010000;
 		ir->polling      = 50; // ms
@@ -286,24 +286,24 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_PV_M4900:
 	case BTTV_BOARD_PV_BT878P_9B:
 	case BTTV_BOARD_PV_BT878P_PLUS:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x008000;
 		ir->polling      = 50; // ms
 		break;
 
 	case BTTV_BOARD_WINFAST2000:
-		ir_codes         = ir_codes_winfast;
+		ir_codes         = &ir_codes_winfast_table;
 		ir->mask_keycode = 0x1f8;
 		break;
 	case BTTV_BOARD_MAGICTVIEW061:
 	case BTTV_BOARD_MAGICTVIEW063:
-		ir_codes         = ir_codes_winfast;
+		ir_codes         = &ir_codes_winfast_table;
 		ir->mask_keycode = 0x0008e000;
 		ir->mask_keydown = 0x00200000;
 		break;
 	case BTTV_BOARD_APAC_VIEWCOMP:
-		ir_codes         = ir_codes_apac_viewcomp;
+		ir_codes         = &ir_codes_apac_viewcomp_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x008000;
 		ir->polling      = 50; // ms
@@ -311,30 +311,30 @@ int bttv_input_init(struct bttv *btv)
 	case BTTV_BOARD_ASKEY_CPH03X:
 	case BTTV_BOARD_CONCEPTRONIC_CTVFMI2:
 	case BTTV_BOARD_CONTVFMI:
-		ir_codes         = ir_codes_pixelview;
+		ir_codes         = &ir_codes_pixelview_table;
 		ir->mask_keycode = 0x001F00;
 		ir->mask_keyup   = 0x006000;
 		ir->polling      = 50; // ms
 		break;
 	case BTTV_BOARD_NEBULA_DIGITV:
-		ir_codes = ir_codes_nebula;
+		ir_codes = &ir_codes_nebula_table;
 		btv->custom_irq = bttv_rc5_irq;
 		ir->rc5_gpio = 1;
 		break;
 	case BTTV_BOARD_MACHTV_MAGICTV:
-		ir_codes         = ir_codes_apac_viewcomp;
+		ir_codes         = &ir_codes_apac_viewcomp_table;
 		ir->mask_keycode = 0x001F00;
 		ir->mask_keyup   = 0x004000;
 		ir->polling      = 50; /* ms */
 		break;
 	case BTTV_BOARD_KOZUMI_KTV_01C:
-		ir_codes         = ir_codes_pctv_sedna;
+		ir_codes         = &ir_codes_pctv_sedna_table;
 		ir->mask_keycode = 0x001f00;
 		ir->mask_keyup   = 0x006000;
 		ir->polling      = 50; /* ms */
 		break;
 	case BTTV_BOARD_ENLTV_FM_2:
-		ir_codes         = ir_codes_encore_enltv2;
+		ir_codes         = &ir_codes_encore_enltv2_table;
 		ir->mask_keycode = 0x00fd00;
 		ir->mask_keyup   = 0x000080;
 		ir->polling      = 1; /* ms */
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index ba754e8056fb..766ddce5780f 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -111,7 +111,7 @@ static int cx18_i2c_new_ir(struct i2c_adapter *adap, u32 hw, const char *type,
 	switch (hw) {
 	case CX18_HW_Z8F0811_IR_RX_HAUP:
 		memset(&ir_init_data, 0, sizeof(struct IR_i2c_init_data));
-		ir_init_data.ir_codes = ir_codes_hauppauge_new;
+		ir_init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		ir_init_data.internal_get_key_func = IR_KBD_GET_KEY_HAUP_XVR;
 		ir_init_data.type = IR_TYPE_RC5;
 		ir_init_data.name = "CX23418 Z8F0811 Hauppauge";
diff --git a/drivers/media/video/cx231xx/cx231xx.h b/drivers/media/video/cx231xx/cx231xx.h
index a0f823ac6b8d..64e2ddd3c401 100644
--- a/drivers/media/video/cx231xx/cx231xx.h
+++ b/drivers/media/video/cx231xx/cx231xx.h
@@ -282,7 +282,7 @@ struct cx231xx_board {
 
 	struct cx231xx_input input[MAX_CX231XX_INPUT];
 	struct cx231xx_input radio;
-	IR_KEYTAB_TYPE *ir_codes;
+	struct ir_scancode_table *ir_codes;
 };
 
 /* device states */
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index 79c4408a6171..78b3635178af 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -191,7 +191,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 {
 	struct cx88_IR *ir;
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	int ir_type = IR_TYPE_OTHER;
 	int err = -ENOMEM;
 
@@ -207,14 +207,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_DNTV_LIVE_DVB_T:
 	case CX88_BOARD_KWORLD_DVB_T:
 	case CX88_BOARD_KWORLD_DVB_T_CX22702:
-		ir_codes = ir_codes_dntv_live_dvb_t;
+		ir_codes = &ir_codes_dntv_live_dvb_t_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x60;
 		ir->polling = 50; /* ms */
 		break;
 	case CX88_BOARD_TERRATEC_CINERGY_1400_DVB_T1:
-		ir_codes = ir_codes_cinergy_1400;
+		ir_codes = &ir_codes_cinergy_1400_table;
 		ir_type = IR_TYPE_PD;
 		ir->sampling = 0xeb04; /* address */
 		break;
@@ -229,14 +229,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_PCHDTV_HD3000:
 	case CX88_BOARD_PCHDTV_HD5500:
 	case CX88_BOARD_HAUPPAUGE_IRONLY:
-		ir_codes = ir_codes_hauppauge_new;
+		ir_codes = &ir_codes_hauppauge_new_table;
 		ir_type = IR_TYPE_RC5;
 		ir->sampling = 1;
 		break;
 	case CX88_BOARD_WINFAST_DTV2000H:
 	case CX88_BOARD_WINFAST_DTV2000H_J:
 	case CX88_BOARD_WINFAST_DTV1800H:
-		ir_codes = ir_codes_winfast;
+		ir_codes = &ir_codes_winfast_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0x8f8;
 		ir->mask_keyup = 0x100;
@@ -245,14 +245,14 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 	case CX88_BOARD_WINFAST2000XP_EXPERT:
 	case CX88_BOARD_WINFAST_DTV1000:
 	case CX88_BOARD_WINFAST_TV2000_XP_GLOBAL:
-		ir_codes = ir_codes_winfast;
+		ir_codes = &ir_codes_winfast_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0x8f8;
 		ir->mask_keyup = 0x100;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_IODATA_GVBCTV7E:
-		ir_codes = ir_codes_iodata_bctv7e;
+		ir_codes = &ir_codes_iodata_bctv7e_table;
 		ir->gpio_addr = MO_GP0_IO;
 		ir->mask_keycode = 0xfd;
 		ir->mask_keydown = 0x02;
@@ -260,7 +260,7 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_PROLINK_PLAYTVPVR:
 	case CX88_BOARD_PIXELVIEW_PLAYTV_ULTRA_PRO:
-		ir_codes = ir_codes_pixelview;
+		ir_codes = &ir_codes_pixelview_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x80;
@@ -268,28 +268,28 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_PROLINK_PV_8000GT:
 	case CX88_BOARD_PROLINK_PV_GLOBAL_XTREME:
-		ir_codes = ir_codes_pixelview_new;
+		ir_codes = &ir_codes_pixelview_new_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x3f;
 		ir->mask_keyup = 0x80;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_KWORLD_LTV883:
-		ir_codes = ir_codes_pixelview;
+		ir_codes = &ir_codes_pixelview_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x60;
 		ir->polling = 1; /* ms */
 		break;
 	case CX88_BOARD_ADSTECH_DVB_T_PCI:
-		ir_codes = ir_codes_adstech_dvb_t_pci;
+		ir_codes = &ir_codes_adstech_dvb_t_pci_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0xbf;
 		ir->mask_keyup = 0x40;
 		ir->polling = 50; /* ms */
 		break;
 	case CX88_BOARD_MSI_TVANYWHERE_MASTER:
-		ir_codes = ir_codes_msi_tvanywhere;
+		ir_codes = &ir_codes_msi_tvanywhere_table;
 		ir->gpio_addr = MO_GP1_IO;
 		ir->mask_keycode = 0x1f;
 		ir->mask_keyup = 0x40;
@@ -297,40 +297,40 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		break;
 	case CX88_BOARD_AVERTV_303:
 	case CX88_BOARD_AVERTV_STUDIO_303:
-		ir_codes         = ir_codes_avertv_303;
+		ir_codes         = &ir_codes_avertv_303_table;
 		ir->gpio_addr    = MO_GP2_IO;
 		ir->mask_keycode = 0xfb;
 		ir->mask_keydown = 0x02;
 		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_DNTV_LIVE_DVB_T_PRO:
-		ir_codes = ir_codes_dntv_live_dvbt_pro;
-		ir_type = IR_TYPE_PD;
-		ir->sampling = 0xff00; /* address */
+		ir_codes         = &ir_codes_dntv_live_dvbt_pro_table;
+		ir_type          = IR_TYPE_PD;
+		ir->sampling     = 0xff00; /* address */
 		break;
 	case CX88_BOARD_NORWOOD_MICRO:
-		ir_codes         = ir_codes_norwood;
+		ir_codes         = &ir_codes_norwood_table;
 		ir->gpio_addr    = MO_GP1_IO;
 		ir->mask_keycode = 0x0e;
 		ir->mask_keyup   = 0x80;
 		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_NPGTECH_REALTV_TOP10FM:
-		ir_codes = ir_codes_npgtech;
-		ir->gpio_addr = MO_GP0_IO;
+		ir_codes         = &ir_codes_npgtech_table;
+		ir->gpio_addr    = MO_GP0_IO;
 		ir->mask_keycode = 0xfa;
-		ir->polling = 50; /* ms */
+		ir->polling      = 50; /* ms */
 		break;
 	case CX88_BOARD_PINNACLE_PCTV_HD_800i:
-		ir_codes = ir_codes_pinnacle_pctv_hd;
-		ir_type = IR_TYPE_RC5;
-		ir->sampling = 1;
+		ir_codes         = &ir_codes_pinnacle_pctv_hd_table;
+		ir_type          = IR_TYPE_RC5;
+		ir->sampling     = 1;
 		break;
 	case CX88_BOARD_POWERCOLOR_REAL_ANGEL:
-		ir_codes = ir_codes_powercolor_real_angel;
-		ir->gpio_addr = MO_GP2_IO;
+		ir_codes         = &ir_codes_powercolor_real_angel_table;
+		ir->gpio_addr    = MO_GP2_IO;
 		ir->mask_keycode = 0x7e;
-		ir->polling = 100; /* ms */
+		ir->polling      = 100; /* ms */
 		break;
 	}
 
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index b184d482c497..f46b8a5c3a61 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -562,7 +562,7 @@ struct em28xx_board em28xx_boards[] = {
 		.name         = "Gadmei UTV330+",
 		.tuner_type   = TUNER_TNF_5335MF,
 		.tda9887_conf = TDA9887_PRESENT,
-		.ir_codes     = ir_codes_gadmei_rm008z,
+		.ir_codes     = &ir_codes_gadmei_rm008z_table,
 		.decoder      = EM28XX_SAA711X,
 		.xclk         = EM28XX_XCLK_FREQUENCY_12MHZ,
 		.input        = { {
@@ -736,7 +736,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.has_dvb      = 1,
 		.dvb_gpio     = hauppauge_wintv_hvr_900_digital,
-		.ir_codes     = ir_codes_hauppauge_new,
+		.ir_codes     = &ir_codes_hauppauge_new_table,
 		.decoder      = EM28XX_TVP5150,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -761,7 +761,7 @@ struct em28xx_board em28xx_boards[] = {
 		.tuner_type   = TUNER_XC2028,
 		.tuner_gpio   = default_tuner_gpio,
 		.mts_firmware = 1,
-		.ir_codes     = ir_codes_hauppauge_new,
+		.ir_codes     = &ir_codes_hauppauge_new_table,
 		.decoder      = EM28XX_TVP5150,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -787,7 +787,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_hauppauge_new,
+		.ir_codes       = &ir_codes_hauppauge_new_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -813,7 +813,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_hauppauge_new,
+		.ir_codes       = &ir_codes_hauppauge_new_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -839,7 +839,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_pinnacle_pctv_hd,
+		.ir_codes       = &ir_codes_pinnacle_pctv_hd_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -865,7 +865,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware   = 1,
 		.has_dvb        = 1,
 		.dvb_gpio       = hauppauge_wintv_hvr_900_digital,
-		.ir_codes       = ir_codes_ati_tv_wonder_hd_600,
+		.ir_codes       = &ir_codes_ati_tv_wonder_hd_600_table,
 		.decoder        = EM28XX_TVP5150,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -891,7 +891,7 @@ struct em28xx_board em28xx_boards[] = {
 		.decoder        = EM28XX_TVP5150,
 		.has_dvb        = 1,
 		.dvb_gpio       = default_digital,
-		.ir_codes       = ir_codes_terratec_cinergy_xs,
+		.ir_codes       = &ir_codes_terratec_cinergy_xs_table,
 		.xclk           = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
@@ -1443,7 +1443,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.decoder      = EM28XX_TVP5150,
 		.tuner_gpio   = default_tuner_gpio,
-		.ir_codes     = ir_codes_kaiomy,
+		.ir_codes     = &ir_codes_kaiomy_table,
 		.input          = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
@@ -1543,7 +1543,7 @@ struct em28xx_board em28xx_boards[] = {
 		.mts_firmware = 1,
 		.has_dvb      = 1,
 		.dvb_gpio     = evga_indtube_digital,
-		.ir_codes     = ir_codes_evga_indtube,
+		.ir_codes     = &ir_codes_evga_indtube_table,
 		.input        = { {
 			.type     = EM28XX_VMUX_TELEVISION,
 			.vmux     = TVP5150_COMPOSITE0,
@@ -2217,17 +2217,17 @@ void em28xx_register_i2c_ir(struct em28xx *dev)
 		break;
 	case (EM2800_BOARD_TERRATEC_CINERGY_200):
 	case (EM2820_BOARD_TERRATEC_CINERGY_250):
-		init_data.ir_codes = ir_codes_em_terratec;
+		init_data.ir_codes = &ir_codes_em_terratec_table;
 		init_data.get_key = em28xx_get_key_terratec;
 		init_data.name = "i2c IR (EM28XX Terratec)";
 		break;
 	case (EM2820_BOARD_PINNACLE_USB_2):
-		init_data.ir_codes = ir_codes_pinnacle_grey;
+		init_data.ir_codes = &ir_codes_pinnacle_grey_table;
 		init_data.get_key = em28xx_get_key_pinnacle_usb_grey;
 		init_data.name = "i2c IR (EM28XX Pinnacle PCTV)";
 		break;
 	case (EM2820_BOARD_HAUPPAUGE_WINTV_USB_2):
-		init_data.ir_codes = ir_codes_hauppauge_new;
+		init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		init_data.get_key = em28xx_get_key_em_haup;
 		init_data.name = "i2c IR (EM2840 Hauppauge)";
 		break;
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 23f34dd691e9..217f948e03d7 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -409,7 +409,7 @@ struct em28xx_board {
 
 	struct em28xx_input       input[MAX_EM28XX_INPUT];
 	struct em28xx_input	  radio;
-	IR_KEYTAB_TYPE            *ir_codes;
+	struct ir_scancode_table  *ir_codes;
 };
 
 struct em28xx_eeprom {
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index b92ddcabf0b6..247d3115a9b7 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -297,7 +297,7 @@ static void ir_work(struct work_struct *work)
 
 static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	const char *name = NULL;
 	int ir_type;
 	struct IR_i2c *ir;
@@ -322,13 +322,13 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		name        = "Pixelview";
 		ir->get_key = get_key_pixelview;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_empty;
+		ir_codes    = &ir_codes_empty_table;
 		break;
 	case 0x4b:
 		name        = "PV951";
 		ir->get_key = get_key_pv951;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_pv951;
+		ir_codes    = &ir_codes_pv951_table;
 		break;
 	case 0x18:
 	case 0x1a:
@@ -336,22 +336,22 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		ir->get_key = get_key_haup;
 		ir_type     = IR_TYPE_RC5;
 		if (hauppauge == 1) {
-			ir_codes    = ir_codes_hauppauge_new;
+			ir_codes    = &ir_codes_hauppauge_new_table;
 		} else {
-			ir_codes    = ir_codes_rc5_tv;
+			ir_codes    = &ir_codes_rc5_tv_table;
 		}
 		break;
 	case 0x30:
 		name        = "KNC One";
 		ir->get_key = get_key_knc1;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_empty;
+		ir_codes    = &ir_codes_empty_table;
 		break;
 	case 0x6b:
 		name        = "FusionHDTV";
 		ir->get_key = get_key_fusionhdtv;
 		ir_type     = IR_TYPE_RC5;
-		ir_codes    = ir_codes_fusionhdtv_mce;
+		ir_codes    = &ir_codes_fusionhdtv_mce_table;
 		break;
 	case 0x7a:
 	case 0x47:
@@ -365,9 +365,9 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 			ir_type     = IR_TYPE_RC5;
 			ir->get_key = get_key_haup_xvr;
 			if (hauppauge == 1) {
-				ir_codes    = ir_codes_hauppauge_new;
+				ir_codes    = &ir_codes_hauppauge_new_table;
 			} else {
-				ir_codes    = ir_codes_rc5_tv;
+				ir_codes    = &ir_codes_rc5_tv_table;
 			}
 		} else {
 			/* Handled by saa7134-input */
@@ -379,7 +379,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		name        = "AVerMedia Cardbus remote";
 		ir->get_key = get_key_avermedia_cardbus;
 		ir_type     = IR_TYPE_OTHER;
-		ir_codes    = ir_codes_avermedia_cardbus;
+		ir_codes    = &ir_codes_avermedia_cardbus_table;
 		break;
 	default:
 		dprintk(1, DEVNAME ": Unsupported i2c address 0x%02x\n", addr);
diff --git a/drivers/media/video/saa7134/saa7134-input.c b/drivers/media/video/saa7134/saa7134-input.c
index 65fb7b17b678..9070e5fabb4e 100644
--- a/drivers/media/video/saa7134/saa7134-input.c
+++ b/drivers/media/video/saa7134/saa7134-input.c
@@ -394,7 +394,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 {
 	struct card_ir *ir;
 	struct input_dev *input_dev;
-	IR_KEYTAB_TYPE *ir_codes = NULL;
+	struct ir_scancode_table *ir_codes = NULL;
 	u32 mask_keycode = 0;
 	u32 mask_keydown = 0;
 	u32 mask_keyup   = 0;
@@ -416,27 +416,27 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_FLYTVPLATINUM_FM:
 	case SAA7134_BOARD_FLYTVPLATINUM_MINI2:
 	case SAA7134_BOARD_ROVERMEDIA_LINK_PRO_FM:
-		ir_codes     = ir_codes_flyvideo;
+		ir_codes     = &ir_codes_flyvideo_table;
 		mask_keycode = 0xEC00000;
 		mask_keydown = 0x0040000;
 		break;
 	case SAA7134_BOARD_CINERGY400:
 	case SAA7134_BOARD_CINERGY600:
 	case SAA7134_BOARD_CINERGY600_MK3:
-		ir_codes     = ir_codes_cinergy;
+		ir_codes     = &ir_codes_cinergy_table;
 		mask_keycode = 0x00003f;
 		mask_keyup   = 0x040000;
 		break;
 	case SAA7134_BOARD_ECS_TVP3XP:
 	case SAA7134_BOARD_ECS_TVP3XP_4CB5:
-		ir_codes     = ir_codes_eztv;
+		ir_codes     = &ir_codes_eztv_table;
 		mask_keycode = 0x00017c;
 		mask_keyup   = 0x000002;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_KWORLD_XPERT:
 	case SAA7134_BOARD_AVACSSMARTTV:
-		ir_codes     = ir_codes_pixelview;
+		ir_codes     = &ir_codes_pixelview_table;
 		mask_keycode = 0x00001F;
 		mask_keyup   = 0x000020;
 		polling      = 50; // ms
@@ -453,7 +453,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_AVERMEDIA_GO_007_FM:
 	case SAA7134_BOARD_AVERMEDIA_M102:
 	case SAA7134_BOARD_AVERMEDIA_GO_007_FM_PLUS:
-		ir_codes     = ir_codes_avermedia;
+		ir_codes     = &ir_codes_avermedia_table;
 		mask_keycode = 0x0007C8;
 		mask_keydown = 0x000010;
 		polling      = 50; // ms
@@ -462,14 +462,14 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS0, 0x4);
 		break;
 	case SAA7134_BOARD_AVERMEDIA_M135A:
-		ir_codes     = ir_codes_avermedia_m135a;
+		ir_codes     = &ir_codes_avermedia_m135a_table;
 		mask_keydown = 0x0040000;
 		mask_keycode = 0x00013f;
 		nec_gpio     = 1;
 		break;
 	case SAA7134_BOARD_AVERMEDIA_777:
 	case SAA7134_BOARD_AVERMEDIA_A16AR:
-		ir_codes     = ir_codes_avermedia;
+		ir_codes     = &ir_codes_avermedia_table;
 		mask_keycode = 0x02F200;
 		mask_keydown = 0x000400;
 		polling      = 50; // ms
@@ -478,7 +478,7 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1);
 		break;
 	case SAA7134_BOARD_AVERMEDIA_A16D:
-		ir_codes     = ir_codes_avermedia_a16d;
+		ir_codes     = &ir_codes_avermedia_a16d_table;
 		mask_keycode = 0x02F200;
 		mask_keydown = 0x000400;
 		polling      = 50; /* ms */
@@ -487,14 +487,14 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 		saa_setb(SAA7134_GPIO_GPSTATUS1, 0x1);
 		break;
 	case SAA7134_BOARD_KWORLD_TERMINATOR:
-		ir_codes     = ir_codes_pixelview;
+		ir_codes     = &ir_codes_pixelview_table;
 		mask_keycode = 0x00001f;
 		mask_keyup   = 0x000060;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_MANLI_MTV001:
 	case SAA7134_BOARD_MANLI_MTV002:
-		ir_codes     = ir_codes_manli;
+		ir_codes     = &ir_codes_manli_table;
 		mask_keycode = 0x001f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; /* ms */
@@ -513,25 +513,25 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_507_9FM:
 	case SAA7134_BOARD_BEHOLD_507RDS_MK3:
 	case SAA7134_BOARD_BEHOLD_507RDS_MK5:
-		ir_codes     = ir_codes_manli;
+		ir_codes     = &ir_codes_manli_table;
 		mask_keycode = 0x003f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; /* ms */
 		break;
 	case SAA7134_BOARD_BEHOLD_COLUMBUS_TVFM:
-		ir_codes     = ir_codes_behold_columbus;
+		ir_codes     = &ir_codes_behold_columbus_table;
 		mask_keycode = 0x003f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_SEDNA_PC_TV_CARDBUS:
-		ir_codes     = ir_codes_pctv_sedna;
+		ir_codes     = &ir_codes_pctv_sedna_table;
 		mask_keycode = 0x001f00;
 		mask_keyup   = 0x004000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_GOTVIEW_7135:
-		ir_codes     = ir_codes_gotview7135;
+		ir_codes     = &ir_codes_gotview7135_table;
 		mask_keycode = 0x0003CC;
 		mask_keydown = 0x000010;
 		polling	     = 5; /* ms */
@@ -540,75 +540,75 @@ int saa7134_input_init1(struct saa7134_dev *dev)
 	case SAA7134_BOARD_VIDEOMATE_TV_PVR:
 	case SAA7134_BOARD_VIDEOMATE_GOLD_PLUS:
 	case SAA7134_BOARD_VIDEOMATE_TV_GOLD_PLUSII:
-		ir_codes     = ir_codes_videomate_tv_pvr;
+		ir_codes     = &ir_codes_videomate_tv_pvr_table;
 		mask_keycode = 0x00003F;
 		mask_keyup   = 0x400000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_PROTEUS_2309:
-		ir_codes     = ir_codes_proteus_2309;
+		ir_codes     = &ir_codes_proteus_2309_table;
 		mask_keycode = 0x00007F;
 		mask_keyup   = 0x000080;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_VIDEOMATE_DVBT_300:
 	case SAA7134_BOARD_VIDEOMATE_DVBT_200:
-		ir_codes     = ir_codes_videomate_tv_pvr;
+		ir_codes     = &ir_codes_videomate_tv_pvr_table;
 		mask_keycode = 0x003F00;
 		mask_keyup   = 0x040000;
 		break;
 	case SAA7134_BOARD_FLYDVBS_LR300:
 	case SAA7134_BOARD_FLYDVBT_LR301:
 	case SAA7134_BOARD_FLYDVBTDUO:
-		ir_codes     = ir_codes_flydvb;
+		ir_codes     = &ir_codes_flydvb_table;
 		mask_keycode = 0x0001F00;
 		mask_keydown = 0x0040000;
 		break;
 	case SAA7134_BOARD_ASUSTeK_P7131_DUAL:
 	case SAA7134_BOARD_ASUSTeK_P7131_HYBRID_LNA:
 	case SAA7134_BOARD_ASUSTeK_P7131_ANALOG:
-		ir_codes     = ir_codes_asus_pc39;
+		ir_codes     = &ir_codes_asus_pc39_table;
 		mask_keydown = 0x0040000;
 		rc5_gpio = 1;
 		break;
 	case SAA7134_BOARD_ENCORE_ENLTV:
 	case SAA7134_BOARD_ENCORE_ENLTV_FM:
-		ir_codes     = ir_codes_encore_enltv;
+		ir_codes     = &ir_codes_encore_enltv_table;
 		mask_keycode = 0x00007f;
 		mask_keyup   = 0x040000;
 		polling      = 50; // ms
 		break;
 	case SAA7134_BOARD_ENCORE_ENLTV_FM53:
-		ir_codes     = ir_codes_encore_enltv_fm53;
+		ir_codes     = &ir_codes_encore_enltv_fm53_table;
 		mask_keydown = 0x0040000;
 		mask_keycode = 0x00007f;
 		nec_gpio = 1;
 		break;
 	case SAA7134_BOARD_10MOONSTVMASTER3:
-		ir_codes     = ir_codes_encore_enltv;
+		ir_codes     = &ir_codes_encore_enltv_table;
 		mask_keycode = 0x5f80000;
 		mask_keyup   = 0x8000000;
 		polling      = 50; //ms
 		break;
 	case SAA7134_BOARD_GENIUS_TVGO_A11MCE:
-		ir_codes     = ir_codes_genius_tvgo_a11mce;
+		ir_codes     = &ir_codes_genius_tvgo_a11mce_table;
 		mask_keycode = 0xff;
 		mask_keydown = 0xf00000;
 		polling = 50; /* ms */
 		break;
 	case SAA7134_BOARD_REAL_ANGEL_220:
-		ir_codes     = ir_codes_real_audio_220_32_keys;
+		ir_codes     = &ir_codes_real_audio_220_32_keys_table;
 		mask_keycode = 0x3f00;
 		mask_keyup   = 0x4000;
 		polling = 50; /* ms */
 		break;
 	case SAA7134_BOARD_KWORLD_PLUS_TV_ANALOG:
-		ir_codes     = ir_codes_kworld_plus_tv_analog;
+		ir_codes     = &ir_codes_kworld_plus_tv_analog_table;
 		mask_keycode = 0x7f;
 		polling = 40; /* ms */
 		break;
 	case SAA7134_BOARD_VIDEOMATE_S350:
-		ir_codes     = ir_codes_videomate_s350;
+		ir_codes     = &ir_codes_videomate_s350_table;
 		mask_keycode = 0x003f00;
 		mask_keydown = 0x040000;
 		break;
@@ -722,23 +722,23 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 		init_data.name = "Pinnacle PCTV";
 		if (pinnacle_remote == 0) {
 			init_data.get_key = get_key_pinnacle_color;
-			init_data.ir_codes = ir_codes_pinnacle_color;
+			init_data.ir_codes = &ir_codes_pinnacle_color_table;
 			info.addr = 0x47;
 		} else {
 			init_data.get_key = get_key_pinnacle_grey;
-			init_data.ir_codes = ir_codes_pinnacle_grey;
+			init_data.ir_codes = &ir_codes_pinnacle_grey_table;
 			info.addr = 0x47;
 		}
 		break;
 	case SAA7134_BOARD_UPMOST_PURPLE_TV:
 		init_data.name = "Purple TV";
 		init_data.get_key = get_key_purpletv;
-		init_data.ir_codes = ir_codes_purpletv;
+		init_data.ir_codes = &ir_codes_purpletv_table;
 		break;
 	case SAA7134_BOARD_MSI_TVATANYWHERE_PLUS:
 		init_data.name = "MSI TV@nywhere Plus";
 		init_data.get_key = get_key_msi_tvanywhere_plus;
-		init_data.ir_codes = ir_codes_msi_tvanywhere_plus;
+		init_data.ir_codes = &ir_codes_msi_tvanywhere_plus_table;
 		info.addr = 0x30;
 		/* MSI TV@nywhere Plus controller doesn't seem to
 		   respond to probes unless we read something from
@@ -752,7 +752,7 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 	case SAA7134_BOARD_HAUPPAUGE_HVR1110:
 		init_data.name = "HVR 1110";
 		init_data.get_key = get_key_hvr1110;
-		init_data.ir_codes = ir_codes_hauppauge_new;
+		init_data.ir_codes = &ir_codes_hauppauge_new_table;
 		break;
 	case SAA7134_BOARD_BEHOLD_607FM_MK3:
 	case SAA7134_BOARD_BEHOLD_607FM_MK5:
@@ -769,7 +769,7 @@ void saa7134_probe_i2c_ir(struct saa7134_dev *dev)
 	case SAA7134_BOARD_BEHOLD_X7:
 		init_data.name = "BeholdTV";
 		init_data.get_key = get_key_beholdm6xx;
-		init_data.ir_codes = ir_codes_behold;
+		init_data.ir_codes = &ir_codes_behold_table;
 		break;
 	case SAA7134_BOARD_AVERMEDIA_CARDBUS_501:
 	case SAA7134_BOARD_AVERMEDIA_CARDBUS_506:
diff --git a/include/media/ir-common.h b/include/media/ir-common.h
index 74a7e55734f0..29f0e53cff94 100644
--- a/include/media/ir-common.h
+++ b/include/media/ir-common.h
@@ -31,8 +31,18 @@
 #define IR_TYPE_PD      2 /* Pulse distance encoded IR */
 #define IR_TYPE_OTHER  99
 
-#define IR_KEYTAB_TYPE	u32
-#define IR_KEYTAB_SIZE	128  // enougth for rc5, probably need more some day ...
+#define IR_KEYTAB_TYPE u32
+#define IR_KEYTAB_SIZE	128  /* enougth for rc5, probably need more some day */
+
+struct ir_scancode {
+	u16	scancode;
+	u32	keycode;
+};
+
+struct ir_scancode_table {
+	struct ir_scancode *scan;
+	int size;
+};
 
 #define IR_KEYCODE(tab,code)	(((unsigned)code < IR_KEYTAB_SIZE) \
 				 ? tab[code] : KEY_RESERVED)
@@ -93,7 +103,7 @@ struct card_ir {
 };
 
 void ir_input_init(struct input_dev *dev, struct ir_input_state *ir,
-		   int ir_type, IR_KEYTAB_TYPE *ir_codes);
+		   int ir_type, struct ir_scancode_table *ir_codes);
 void ir_input_nokey(struct input_dev *dev, struct ir_input_state *ir);
 void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir,
 		      u32 ir_key, u32 ir_raw);
@@ -107,70 +117,63 @@ void ir_rc5_timer_keyup(unsigned long data);
 
 /* Keymaps to be used by other modules */
 
-extern IR_KEYTAB_TYPE ir_codes_empty[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_dvbt[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_m135a[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_cardbus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_apac_viewcomp[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pixelview[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pixelview_new[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_nebula[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dntv_live_dvb_t[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_iodata_bctv7e[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_adstech_dvb_t_pci[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_cinergy_1400[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avertv_303[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dntv_live_dvbt_pro[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_em_terratec[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_grey[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_flyvideo[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_flydvb[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_cinergy[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_eztv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_videomate_tv_pvr[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_manli[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_gotview7135[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_purpletv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pctv_sedna[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pv951[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_rc5_tv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_winfast[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_color[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_hauppauge_new[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_npgtech[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_norwood[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_proteus_2309[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_budget_ci_old[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_asus_pc39[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv2[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_tt_1500[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_fusionhdtv_mce[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_behold[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_behold_columbus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_pinnacle_pctv_hd[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_genius_tvgo_a11mce[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_powercolor_real_angel[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_avermedia_a16d[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_encore_enltv_fm53[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_real_audio_220_32_keys[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_msi_tvanywhere_plus[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_ati_tv_wonder_hd_600[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_kworld_plus_tv_analog[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_kaiomy[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_dm1105_nec[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_evga_indtube[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_terratec_cinergy_xs[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_videomate_s350[IR_KEYTAB_SIZE];
-extern IR_KEYTAB_TYPE ir_codes_gadmei_rm008z[IR_KEYTAB_SIZE];
-
+extern struct ir_scancode_table ir_codes_empty_table;
+extern struct ir_scancode_table ir_codes_avermedia_table;
+extern struct ir_scancode_table ir_codes_avermedia_dvbt_table;
+extern struct ir_scancode_table ir_codes_avermedia_m135a_table;
+extern struct ir_scancode_table ir_codes_avermedia_cardbus_table;
+extern struct ir_scancode_table ir_codes_apac_viewcomp_table;
+extern struct ir_scancode_table ir_codes_pixelview_table;
+extern struct ir_scancode_table ir_codes_pixelview_new_table;
+extern struct ir_scancode_table ir_codes_nebula_table;
+extern struct ir_scancode_table ir_codes_dntv_live_dvb_t_table;
+extern struct ir_scancode_table ir_codes_iodata_bctv7e_table;
+extern struct ir_scancode_table ir_codes_adstech_dvb_t_pci_table;
+extern struct ir_scancode_table ir_codes_msi_tvanywhere_table;
+extern struct ir_scancode_table ir_codes_cinergy_1400_table;
+extern struct ir_scancode_table ir_codes_avertv_303_table;
+extern struct ir_scancode_table ir_codes_dntv_live_dvbt_pro_table;
+extern struct ir_scancode_table ir_codes_em_terratec_table;
+extern struct ir_scancode_table ir_codes_pinnacle_grey_table;
+extern struct ir_scancode_table ir_codes_flyvideo_table;
+extern struct ir_scancode_table ir_codes_flydvb_table;
+extern struct ir_scancode_table ir_codes_cinergy_table;
+extern struct ir_scancode_table ir_codes_eztv_table;
+extern struct ir_scancode_table ir_codes_avermedia_table;
+extern struct ir_scancode_table ir_codes_videomate_tv_pvr_table;
+extern struct ir_scancode_table ir_codes_manli_table;
+extern struct ir_scancode_table ir_codes_gotview7135_table;
+extern struct ir_scancode_table ir_codes_purpletv_table;
+extern struct ir_scancode_table ir_codes_pctv_sedna_table;
+extern struct ir_scancode_table ir_codes_pv951_table;
+extern struct ir_scancode_table ir_codes_rc5_tv_table;
+extern struct ir_scancode_table ir_codes_winfast_table;
+extern struct ir_scancode_table ir_codes_pinnacle_color_table;
+extern struct ir_scancode_table ir_codes_hauppauge_new_table;
+extern struct ir_scancode_table ir_codes_npgtech_table;
+extern struct ir_scancode_table ir_codes_norwood_table;
+extern struct ir_scancode_table ir_codes_proteus_2309_table;
+extern struct ir_scancode_table ir_codes_budget_ci_old_table;
+extern struct ir_scancode_table ir_codes_asus_pc39_table;
+extern struct ir_scancode_table ir_codes_encore_enltv_table;
+extern struct ir_scancode_table ir_codes_encore_enltv2_table;
+extern struct ir_scancode_table ir_codes_tt_1500_table;
+extern struct ir_scancode_table ir_codes_fusionhdtv_mce_table;
+extern struct ir_scancode_table ir_codes_behold_table;
+extern struct ir_scancode_table ir_codes_behold_columbus_table;
+extern struct ir_scancode_table ir_codes_pinnacle_pctv_hd_table;
+extern struct ir_scancode_table ir_codes_genius_tvgo_a11mce_table;
+extern struct ir_scancode_table ir_codes_powercolor_real_angel_table;
+extern struct ir_scancode_table ir_codes_avermedia_a16d_table;
+extern struct ir_scancode_table ir_codes_encore_enltv_fm53_table;
+extern struct ir_scancode_table ir_codes_real_audio_220_32_keys_table;
+extern struct ir_scancode_table ir_codes_msi_tvanywhere_plus_table;
+extern struct ir_scancode_table ir_codes_ati_tv_wonder_hd_600_table;
+extern struct ir_scancode_table ir_codes_kworld_plus_tv_analog_table;
+extern struct ir_scancode_table ir_codes_kaiomy_table;
+extern struct ir_scancode_table ir_codes_dm1105_nec_table;
+extern struct ir_scancode_table ir_codes_evga_indtube_table;
+extern struct ir_scancode_table ir_codes_terratec_cinergy_xs_table;
+extern struct ir_scancode_table ir_codes_videomate_s350_table;
+extern struct ir_scancode_table ir_codes_gadmei_rm008z_table;
 #endif
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
diff --git a/include/media/ir-kbd-i2c.h b/include/media/ir-kbd-i2c.h
index defef3b18dfd..aaf65e8b1a40 100644
--- a/include/media/ir-kbd-i2c.h
+++ b/include/media/ir-kbd-i2c.h
@@ -6,7 +6,8 @@
 struct IR_i2c;
 
 struct IR_i2c {
-	IR_KEYTAB_TYPE         *ir_codes;
+	struct ir_scancode_table *ir_codes;
+
 	struct i2c_client      *c;
 	struct input_dev       *input;
 	struct ir_input_state  ir;
@@ -33,7 +34,7 @@ enum ir_kbd_get_key_fn {
 
 /* Can be passed when instantiating an ir_video i2c device */
 struct IR_i2c_init_data {
-	IR_KEYTAB_TYPE         *ir_codes;
+	struct ir_scancode_table *ir_codes;
 	const char             *name;
 	int                    type; /* IR_TYPE_RC5, IR_TYPE_PD, etc */
 	/*
-- 
cgit v1.2.3


From 1e3c1f7695a446742e24ea1f0c80715be98edbc4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 31 Aug 2009 11:32:46 -0300
Subject: V4L/DVB (12625): Add new V4L2_FMT_FLAG_EMULATED flag to videodev2.h

V4L2_FMT_FLAG_EMULATED	0x0002	This format is not native to the device but
emulated through software (usually libv4l2), where possible try to use a
native format instead for better performance.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 5758066cb57b..3689d7d81fe9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -375,6 +375,7 @@ struct v4l2_fmtdesc {
 };
 
 #define V4L2_FMT_FLAG_COMPRESSED 0x0001
+#define V4L2_FMT_FLAG_EMULATED   0x0002
 
 #if 1
 	/* Experimental Frame Size and frame rate enumeration */
-- 
cgit v1.2.3


From fe63b94a430c0c8058de317f0a8ce921c69dbee4 Mon Sep 17 00:00:00 2001
From: Carsten Emde <Carsten.Emde@osadl.org>
Date: Sat, 12 Sep 2009 00:05:37 +0200
Subject: tracing: prevent NULL pointer dereference in
 ftrace_raw_event_block_bio_bounce

Booting 2.6.31 and executing
   echo 1 >/sys/kernel/debug/tracing/events/enable
leads to
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c032a583>] ftrace_raw_event_block_bio_bounce+0x4b/0xb9

Apparently,
   bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask);
is called in block/blk-map.c:58 where bio->bi_bdev in set to NULL and
still is NULL when an attempt is made to evaluate bio->bi_bdev->bd_dev
in include/trace/events/block.h:189.

The tracepoint should ensure bio->bi_bdev is not dereferenced, if NULL.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
LKML-Reference: <4AAAC9B1.9060505@osadl.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/block.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 9a74b468a229..d86af94691c2 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -171,6 +171,7 @@ TRACE_EVENT(block_rq_complete,
 		  (unsigned long long)__entry->sector,
 		  __entry->nr_sector, __entry->errors)
 );
+
 TRACE_EVENT(block_bio_bounce,
 
 	TP_PROTO(struct request_queue *q, struct bio *bio),
@@ -186,7 +187,8 @@ TRACE_EVENT(block_bio_bounce,
 	),
 
 	TP_fast_assign(
-		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->dev		= bio->bi_bdev ?
+					  bio->bi_bdev->bd_dev : 0;
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-- 
cgit v1.2.3


From 4b3b4c5e64ce26612646867ee354373620063534 Mon Sep 17 00:00:00 2001
From: John Reiser <jreiser@bitwagon.com>
Date: Mon, 27 Jul 2009 11:23:50 -0700
Subject: ftrace: __start_mcount_loc should be .init.rodata

__start_mcount_loc[] is unused after init, yet occupies RAM forever
as part of .rodata.  152kiB is typical on a 64-bit architecture.  Instead,
__start_mcount_loc should be in the interval [__init_begin, __init_end)
so that the space is reclaimed after init.

__start_mcount_loc[] is generated during the load portion
of kernel build, and is used only by ftrace_init().  ftrace_init is declared
'__init' and is in .init.text, which is freed after init.
__start_mcount_loc is placed into .rodata by a call to MCOUNT_REC inside
the RO_DATA macro of include/asm-generic/vmlinux.lds.h.  The array *is*
read-only, but more importantly it is not used after init.  So the call to
MCOUNT_REC should be moved from RO_DATA to INIT_DATA.

This patch has been tested on x86_64 with CONFIG_DEBUG_PAGEALLOC=y
which verifies that the address range never is accessed after init.

Signed-off-by: John Reiser <jreiser@BitWagon.com>
LKML-Reference: <4A6DF0B6.7080402@bitwagon.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/asm-generic/vmlinux.lds.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6ad76bf5fb40..98b37cf3ac6d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -91,7 +91,8 @@
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-#define MCOUNT_REC()	VMLINUX_SYMBOL(__start_mcount_loc) = .; \
+#define MCOUNT_REC()	. = ALIGN(8);				\
+			VMLINUX_SYMBOL(__start_mcount_loc) = .; \
 			*(__mcount_loc)				\
 			VMLINUX_SYMBOL(__stop_mcount_loc) = .;
 #else
@@ -331,7 +332,6 @@
 	/* __*init sections */						\
 	__init_rodata : AT(ADDR(__init_rodata) - LOAD_OFFSET) {		\
 		*(.ref.rodata)						\
-		MCOUNT_REC()						\
 		DEV_KEEP(init.rodata)					\
 		DEV_KEEP(exit.rodata)					\
 		CPU_KEEP(init.rodata)					\
@@ -455,6 +455,7 @@
 	MEM_DISCARD(init.data)						\
 	KERNEL_CTORS()							\
 	*(.init.rodata)							\
+	MCOUNT_REC()							\
 	DEV_DISCARD(init.rodata)					\
 	CPU_DISCARD(init.rodata)					\
 	MEM_DISCARD(init.rodata)
-- 
cgit v1.2.3


From 16bb8eb1b73bf940d30ff88cae622bfcd3790f61 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Sat, 12 Sep 2009 19:04:54 -0400
Subject: tracing: allow filter predicates to handle ksym symbols

This patch increases the max string used by predicates to
handle KSYM_SYMBOL_LEN.

Also moves an include to look nicer.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 0608b0ff2635..bd099ba82ccc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -1,8 +1,8 @@
 #ifndef _LINUX_FTRACE_EVENT_H
 #define _LINUX_FTRACE_EVENT_H
 
-#include <linux/trace_seq.h>
 #include <linux/ring_buffer.h>
+#include <linux/trace_seq.h>
 #include <linux/percpu.h>
 
 struct trace_array;
@@ -135,7 +135,7 @@ struct ftrace_event_call {
 };
 
 #define MAX_FILTER_PRED		32
-#define MAX_FILTER_STR_VAL	128
+#define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
 extern void destroy_preds(struct ftrace_event_call *call);
 extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-- 
cgit v1.2.3


From f977bb4937857994312fff4f9c2cad336a36a932 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sun, 13 Sep 2009 18:15:54 +0200
Subject: perf_counter, sched: Add sched_stat_runtime tracepoint

This allows more precise tracking of how the scheduler accounts
(and acts upon) a task having spent N nanoseconds of CPU time.

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++
 kernel/sched_fair.c          |  1 +
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b48f1ad7c946..4069c43f4187 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -379,6 +379,39 @@ TRACE_EVENT(sched_stat_wait,
 			(unsigned long long)__entry->delay)
 );
 
+/*
+ * Tracepoint for accounting runtime (time the task is executing
+ * on a CPU).
+ */
+TRACE_EVENT(sched_stat_runtime,
+
+	TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime),
+
+	TP_ARGS(tsk, runtime, vruntime),
+
+	TP_STRUCT__entry(
+		__array( char,	comm,	TASK_COMM_LEN	)
+		__field( pid_t,	pid			)
+		__field( u64,	runtime			)
+		__field( u64,	vruntime			)
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
+		__entry->pid		= tsk->pid;
+		__entry->runtime	= runtime;
+		__entry->vruntime	= vruntime;
+	)
+	TP_perf_assign(
+		__perf_count(runtime);
+	),
+
+	TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]",
+			__entry->comm, __entry->pid,
+			(unsigned long long)__entry->runtime,
+			(unsigned long long)__entry->vruntime)
+);
+
 /*
  * Tracepoint for accounting sleep time (time the task is not runnable,
  * including iowait, see below).
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index aa7f84121016..a097e909e80f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -513,6 +513,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (entity_is_task(curr)) {
 		struct task_struct *curtask = task_of(curr);
 
+		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
 		cpuacct_charge(curtask, delta_exec);
 		account_group_exec_runtime(curtask, delta_exec);
 	}
-- 
cgit v1.2.3


From f300baba5a1536070d6d77bf0c8c4ca999bb4f0f Mon Sep 17 00:00:00 2001
From: Alexandros Batsakis <batsakis@netapp.com>
Date: Thu, 10 Sep 2009 17:33:30 +0300
Subject: nfsd41: sunrpc: add new xprt class for nfsv4.1 backchannel

[sunrpc: change idle timeout value for the backchannel]
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Acked-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/clnt.h     |  1 +
 include/linux/sunrpc/xprt.h     | 18 ++++++++
 include/linux/sunrpc/xprtrdma.h |  5 ---
 include/linux/sunrpc/xprtsock.h | 11 -----
 net/sunrpc/clnt.c               |  1 +
 net/sunrpc/xprtsock.c           | 96 ++++++++++++++++++++++++++++++++++++++++-
 6 files changed, 114 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d025588e56e..8ed9642a5a76 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -114,6 +114,7 @@ struct rpc_create_args {
 	rpc_authflavor_t	authflavor;
 	unsigned long		flags;
 	char			*client_name;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 /* Values for "flags" field */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 228d694dbb90..6f9457a75b8f 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -124,6 +124,23 @@ struct rpc_xprt_ops {
 	void		(*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq);
 };
 
+/*
+ * RPC transport identifiers
+ *
+ * To preserve compatibility with the historical use of raw IP protocol
+ * id's for transport selection, UDP and TCP identifiers are specified
+ * with the previous values. No such restriction exists for new transports,
+ * except that they may not collide with these values (17 and 6,
+ * respectively).
+ */
+#define XPRT_TRANSPORT_BC       (1 << 31)
+enum xprt_transports {
+	XPRT_TRANSPORT_UDP	= IPPROTO_UDP,
+	XPRT_TRANSPORT_TCP	= IPPROTO_TCP,
+	XPRT_TRANSPORT_BC_TCP	= IPPROTO_TCP | XPRT_TRANSPORT_BC,
+	XPRT_TRANSPORT_RDMA	= 256
+};
+
 struct rpc_xprt {
 	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
@@ -232,6 +249,7 @@ struct xprt_create {
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
+	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 };
 
 struct xprt_class {
diff --git a/include/linux/sunrpc/xprtrdma.h b/include/linux/sunrpc/xprtrdma.h
index 54a379c9e8eb..c2f04e1ae159 100644
--- a/include/linux/sunrpc/xprtrdma.h
+++ b/include/linux/sunrpc/xprtrdma.h
@@ -40,11 +40,6 @@
 #ifndef _LINUX_SUNRPC_XPRTRDMA_H
 #define _LINUX_SUNRPC_XPRTRDMA_H
 
-/*
- * RPC transport identifier for RDMA
- */
-#define XPRT_TRANSPORT_RDMA	256
-
 /*
  * rpcbind (v3+) RDMA netid.
  */
diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index c2a46c45c8f7..3f14a02e9cc0 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -12,17 +12,6 @@
 int		init_socket_xprt(void);
 void		cleanup_socket_xprt(void);
 
-/*
- * RPC transport identifiers for UDP, TCP
- *
- * To preserve compatibility with the historical use of raw IP protocol
- * id's for transport selection, these are specified with the previous
- * values. No such restriction exists for new transports, except that
- * they may not collide with these values (17 and 6, respectively).
- */
-#define XPRT_TRANSPORT_UDP	IPPROTO_UDP
-#define XPRT_TRANSPORT_TCP	IPPROTO_TCP
-
 /*
  * RPC slot table sizes for UDP, TCP transports
  */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c1e467e1b07d..7389804e3bb7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -288,6 +288,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
+		.bc_xprt = args->bc_xprt,
 	};
 	char servername[48];
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d9a2b815714e..bee415465754 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2468,11 +2468,93 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	return ERR_PTR(-EINVAL);
 }
 
+/**
+ * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket
+ * @args: rpc transport creation arguments
+ *
+ */
+static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
+{
+	struct sockaddr *addr = args->dstaddr;
+	struct rpc_xprt *xprt;
+	struct sock_xprt *transport;
+	struct svc_sock *bc_sock;
+
+	if (!args->bc_xprt)
+		ERR_PTR(-EINVAL);
+
+	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+	if (IS_ERR(xprt))
+		return xprt;
+	transport = container_of(xprt, struct sock_xprt, xprt);
+
+	xprt->prot = IPPROTO_TCP;
+	xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+	xprt->timeout = &xs_tcp_default_timeout;
+
+	/* backchannel */
+	xprt_set_bound(xprt);
+	xprt->bind_timeout = 0;
+	xprt->connect_timeout = 0;
+	xprt->reestablish_timeout = 0;
+	xprt->idle_timeout = 0;
+
+	/*
+	 * The backchannel uses the same socket connection as the
+	 * forechannel
+	 */
+	xprt->bc_xprt = args->bc_xprt;
+	bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt);
+	bc_sock->sk_bc_xprt = xprt;
+	transport->sock = bc_sock->sk_sock;
+	transport->inet = bc_sock->sk_sk;
+
+	xprt->ops = &bc_tcp_ops;
+
+	switch (addr->sa_family) {
+	case AF_INET:
+		xs_format_peer_addresses(xprt, "tcp",
+					 RPCBIND_NETID_TCP);
+		break;
+	case AF_INET6:
+		xs_format_peer_addresses(xprt, "tcp",
+				   RPCBIND_NETID_TCP6);
+		break;
+	default:
+		kfree(xprt);
+		return ERR_PTR(-EAFNOSUPPORT);
+	}
+
+	if (xprt_bound(xprt))
+		dprintk("RPC:       set up xprt to %s (port %s) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PORT],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+	else
+		dprintk("RPC:       set up xprt to %s (autobind) via %s\n",
+				xprt->address_strings[RPC_DISPLAY_ADDR],
+				xprt->address_strings[RPC_DISPLAY_PROTO]);
+
+	/*
+	 * Since we don't want connections for the backchannel, we set
+	 * the xprt status to connected
+	 */
+	xprt_set_connected(xprt);
+
+
+	if (try_module_get(THIS_MODULE))
+		return xprt;
+	kfree(xprt->slot);
+	kfree(xprt);
+	return ERR_PTR(-EINVAL);
+}
+
 static struct xprt_class	xs_udp_transport = {
 	.list		= LIST_HEAD_INIT(xs_udp_transport.list),
 	.name		= "udp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_UDP,
+	.ident		= XPRT_TRANSPORT_UDP,
 	.setup		= xs_setup_udp,
 };
 
@@ -2480,10 +2562,18 @@ static struct xprt_class	xs_tcp_transport = {
 	.list		= LIST_HEAD_INIT(xs_tcp_transport.list),
 	.name		= "tcp",
 	.owner		= THIS_MODULE,
-	.ident		= IPPROTO_TCP,
+	.ident		= XPRT_TRANSPORT_TCP,
 	.setup		= xs_setup_tcp,
 };
 
+static struct xprt_class	xs_bc_tcp_transport = {
+	.list		= LIST_HEAD_INIT(xs_bc_tcp_transport.list),
+	.name		= "tcp NFSv4.1 backchannel",
+	.owner		= THIS_MODULE,
+	.ident		= XPRT_TRANSPORT_BC_TCP,
+	.setup		= xs_setup_bc_tcp,
+};
+
 /**
  * init_socket_xprt - set up xprtsock's sysctls, register with RPC client
  *
@@ -2497,6 +2587,7 @@ int init_socket_xprt(void)
 
 	xprt_register_transport(&xs_udp_transport);
 	xprt_register_transport(&xs_tcp_transport);
+	xprt_register_transport(&xs_bc_tcp_transport);
 
 	return 0;
 }
@@ -2516,6 +2607,7 @@ void cleanup_socket_xprt(void)
 
 	xprt_unregister_transport(&xs_udp_transport);
 	xprt_unregister_transport(&xs_tcp_transport);
+	xprt_unregister_transport(&xs_bc_tcp_transport);
 }
 
 static int param_set_uint_minmax(const char *val, struct kernel_param *kp,
-- 
cgit v1.2.3


From ed868a56988464cd31de0302426a5e94d3127f10 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Sat, 12 Sep 2009 22:54:10 -0400
Subject: Creds: creds->security can be NULL is selinux is disabled

__validate_process_creds should check if selinux is actually enabled before
running tests on the selinux portion of the credentials struct.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h       | 13 ++++++++-----
 include/linux/selinux.h    |  9 +++++++++
 security/selinux/exports.c |  6 ++++++
 3 files changed, 23 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 24520a539c6f..fb371601a3b4 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -15,6 +15,7 @@
 #include <linux/capability.h>
 #include <linux/init.h>
 #include <linux/key.h>
+#include <linux/selinux.h>
 #include <asm/atomic.h>
 
 struct user_struct;
@@ -182,11 +183,13 @@ static inline bool creds_are_invalid(const struct cred *cred)
 	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
 		return true;
 #ifdef CONFIG_SECURITY_SELINUX
-	if ((unsigned long) cred->security < PAGE_SIZE)
-		return true;
-	if ((*(u32*)cred->security & 0xffffff00) ==
-	    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
-		return true;
+	if (selinux_is_enabled()) {
+		if ((unsigned long) cred->security < PAGE_SIZE)
+			return true;
+		if ((*(u32 *)cred->security & 0xffffff00) ==
+		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+			return true;
+	}
 #endif
 	return false;
 }
diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 20f965d4b041..223d06a6feb1 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -61,6 +61,11 @@ void selinux_secmark_refcount_inc(void);
  *     existing SECMARK targets has been removed/flushed.
  */
 void selinux_secmark_refcount_dec(void);
+
+/**
+ * selinux_is_enabled - is SELinux enabled?
+ */
+bool selinux_is_enabled(void);
 #else
 
 static inline int selinux_string_to_sid(const char *str, u32 *sid)
@@ -84,6 +89,10 @@ static inline void selinux_secmark_refcount_dec(void)
 	return;
 }
 
+static bool selinux_is_enabled(void)
+{
+	return false;
+}
 #endif	/* CONFIG_SECURITY_SELINUX */
 
 #endif /* _LINUX_SELINUX_H */
diff --git a/security/selinux/exports.c b/security/selinux/exports.c
index c73aeaa008e8..c0a454aee1e0 100644
--- a/security/selinux/exports.c
+++ b/security/selinux/exports.c
@@ -63,3 +63,9 @@ void selinux_secmark_refcount_dec(void)
 	atomic_dec(&selinux_secmark_refcount);
 }
 EXPORT_SYMBOL_GPL(selinux_secmark_refcount_dec);
+
+bool selinux_is_enabled(void)
+{
+	return selinux_enabled;
+}
+EXPORT_SYMBOL_GPL(selinux_is_enabled);
-- 
cgit v1.2.3


From a9327cac440be4d8333bba975cbbf76045096275 Mon Sep 17 00:00:00 2001
From: Nikanth Karthikesan <knikanth@suse.de>
Date: Fri, 11 Sep 2009 09:18:54 +0200
Subject: Seperate read and write statistics of in_flight requests

Currently, there is a single in_flight counter measuring the number of
requests in the request_queue. But some monitoring tools would like to
know how many read requests and write requests are in progress. Split the
current in_flight counter into two seperate counters for read and write.

This information is exported as a sysfs attribute, as changing the
currently available stat files would break the existing tools.

Signed-off-by: Nikanth Karthikesan <knikanth@suse.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      |  6 +++---
 block/blk-merge.c     |  2 +-
 block/genhd.c         |  4 +++-
 drivers/md/dm.c       | 16 ++++++++++------
 fs/partitions/check.c | 12 +++++++++++-
 include/linux/genhd.h | 21 ++++++++++++++-------
 6 files changed, 42 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 982d634e67f9..182ebe3eb9e0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part);
+		part_inc_in_flight(part, rw);
 	}
 
 	part_stat_unlock();
@@ -1030,7 +1030,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 
 	if (part->in_flight) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part->in_flight * (now - part->stamp));
+				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1737,7 +1737,7 @@ static void blk_account_io_done(struct request *req)
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rw);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b0de8574fdc8..99cb5cf1f447 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part);
+		part_dec_in_flight(part, rq_data_dir(req));
 
 		part_stat_unlock();
 	}
diff --git a/block/genhd.c b/block/genhd.c
index b89328eceee2..5b76bf55d05c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   hd->in_flight,
+			   part_in_flight(hd),
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index ec012f030240..eee28fac210c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending;
+	atomic_t pending[2];
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	int cpu;
+	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight = pending =
-		atomic_dec_return(&md->pending);
+	dm_disk(md)->part0.in_flight[rw] = pending =
+		atomic_dec_return(&md->pending[rw]);
+	pending += atomic_read(&md->pending[rw^0x1]);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending, 0);
+	atomic_set(&md->pending[0], 0);
+	atomic_set(&md->pending[1], 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending))
+		} else if (!atomic_read(&md->pending[0]) &&
+					!atomic_read(&md->pending[1]))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ea4e6cb29e13..619ba99dfe39 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		p->in_flight,
+		part_in_flight(p),
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
+ssize_t part_inflight_show(struct device *dev,
+			struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+
+	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
+}
+
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
+	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 45fc320a53c6..44263cb27121 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight;
+	int in_flight[2];
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part)
+static inline void part_inc_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight++;
+	part->in_flight[rw]++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight++;
+		part_to_disk(part)->part0.in_flight[rw]++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part)
+static inline void part_dec_in_flight(struct hd_struct *part, int rw)
 {
-	part->in_flight--;
+	part->in_flight[rw]--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight--;
+		part_to_disk(part)->part0.in_flight[rw]--;
+}
+
+static inline int part_in_flight(struct hd_struct *part)
+{
+	return part->in_flight[0] + part->in_flight[1];
 }
 
 /* block/blk-core.c */
@@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
+extern ssize_t part_inflight_show(struct device *dev,
+			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From 3c5820c743479285ce2678fd3c12b1fd39fe998f Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 11 Sep 2009 21:54:52 +0200
Subject: block: Optimal I/O limit wrapper

Implement blk_limits_io_opt() and make blk_queue_io_opt() a wrapper
around it. DM needs this to avoid poking at the queue_limits directly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 21 ++++++++++++++++++++-
 drivers/md/dm-stripe.c |  2 +-
 include/linux/blkdev.h |  1 +
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 476d87065073..83413ff83739 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -427,6 +427,25 @@ void blk_queue_io_min(struct request_queue *q, unsigned int min)
 }
 EXPORT_SYMBOL(blk_queue_io_min);
 
+/**
+ * blk_limits_io_opt - set optimal request size for a device
+ * @limits: the queue limits
+ * @opt:  smallest I/O size in bytes
+ *
+ * Description:
+ *   Storage devices may report an optimal I/O size, which is the
+ *   device's preferred unit for sustained I/O.  This is rarely reported
+ *   for disk drives.  For RAID arrays it is usually the stripe width or
+ *   the internal track size.  A properly aligned multiple of
+ *   optimal_io_size is the preferred request size for workloads where
+ *   sustained throughput is desired.
+ */
+void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
+{
+	limits->io_opt = opt;
+}
+EXPORT_SYMBOL(blk_limits_io_opt);
+
 /**
  * blk_queue_io_opt - set optimal request size for the queue
  * @q:	the request queue for the device
@@ -442,7 +461,7 @@ EXPORT_SYMBOL(blk_queue_io_min);
  */
 void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 {
-	q->limits.io_opt = opt;
+	blk_limits_io_opt(&q->limits, opt);
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index fde658ccbcec..e0efc1adcaff 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -336,7 +336,7 @@ static void stripe_io_hints(struct dm_target *ti,
 	unsigned chunk_size = (sc->chunk_mask + 1) << 9;
 
 	blk_limits_io_min(limits, chunk_size);
-	limits->io_opt = chunk_size * sc->stripes;
+	blk_limits_io_opt(limits, chunk_size * sc->stripes);
 }
 
 static struct target_type stripe_target = {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8bf1a10e4d88..86c2bdff3b89 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -935,6 +935,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
 				       unsigned int alignment);
 extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
 extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
+extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
 extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-- 
cgit v1.2.3


From 3d2257f157c2324acbc0fa0fa54e8626a987edd2 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 12 Sep 2009 07:35:37 +0200
Subject: Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads

The commands are conceptually writes, and in the case of IDE and SCSI
commands actually are writes.  They were only reads because we thought
that would interact better with the elevators.  Now the elevators know
about discard requests, that advantage no longer exists.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index c1f993515f51..7f5a8ad25008 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -161,8 +161,8 @@ struct inodes_stat_t {
  * These aren't really reads or writes, they pass down information about
  * parts of device that are now unused by the file system.
  */
-#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
-#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
+#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
 
 #define SEL_IN		1
 #define SEL_OUT		2
-- 
cgit v1.2.3


From 746cd1e7e4a555ddaee53b19a46e05c9c61eaf09 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Sat, 12 Sep 2009 07:35:43 +0200
Subject: block: use blkdev_issue_discard in blk_ioctl_discard

blk_ioctl_discard duplicates large amounts of code from blkdev_issue_discard,
the only difference between the two is that blkdev_issue_discard needs to
send a barrier discard request and blk_ioctl_discard a non-barrier one,
and blk_ioctl_discard needs to wait on the request.  To facilitates this
add a flags argument to blkdev_issue_discard to control both aspects of the
behaviour.  This will be very useful later on for using the waiting
funcitonality for other callers.

Based on an earlier patch from Matthew Wilcox <matthew@wil.cx>.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 31 +++++++++++++++++++------------
 block/ioctl.c          | 49 ++-----------------------------------------------
 fs/btrfs/extent-tree.c |  3 ++-
 fs/gfs2/rgrp.c         |  6 ++++--
 include/linux/blkdev.h |  9 ++++++---
 mm/swapfile.c          |  6 ++++--
 6 files changed, 37 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 30022b4e2f63..6593ab39cfe9 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -348,6 +348,9 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 		clear_bit(BIO_UPTODATE, &bio->bi_flags);
 	}
 
+	if (bio->bi_private)
+		complete(bio->bi_private);
+
 	bio_put(bio);
 }
 
@@ -357,21 +360,20 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
  * @sector:	start sector
  * @nr_sects:	number of sectors to discard
  * @gfp_mask:	memory allocation flags (for bio_alloc)
+ * @flags:	DISCARD_FL_* flags to control behaviour
  *
  * Description:
- *    Issue a discard request for the sectors in question. Does not wait.
+ *    Issue a discard request for the sectors in question.
  */
-int blkdev_issue_discard(struct block_device *bdev,
-			 sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
+int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
+		sector_t nr_sects, gfp_t gfp_mask, int flags)
 {
-	struct request_queue *q;
-	struct bio *bio;
+	DECLARE_COMPLETION_ONSTACK(wait);
+	struct request_queue *q = bdev_get_queue(bdev);
+	int type = flags & DISCARD_FL_BARRIER ?
+		DISCARD_BARRIER : DISCARD_NOBARRIER;
 	int ret = 0;
 
-	if (bdev->bd_disk == NULL)
-		return -ENXIO;
-
-	q = bdev_get_queue(bdev);
 	if (!q)
 		return -ENXIO;
 
@@ -379,12 +381,14 @@ int blkdev_issue_discard(struct block_device *bdev,
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		bio = bio_alloc(gfp_mask, 0);
+		struct bio *bio = bio_alloc(gfp_mask, 0);
 		if (!bio)
 			return -ENOMEM;
 
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
+		if (flags & DISCARD_FL_WAIT)
+			bio->bi_private = &wait;
 
 		bio->bi_sector = sector;
 
@@ -396,10 +400,13 @@ int blkdev_issue_discard(struct block_device *bdev,
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
 		}
+
 		bio_get(bio);
-		submit_bio(DISCARD_BARRIER, bio);
+		submit_bio(type, bio);
+
+		if (flags & DISCARD_FL_WAIT)
+			wait_for_completion(&wait);
 
-		/* Check if it failed immediately */
 		if (bio_flagged(bio, BIO_EOPNOTSUPP))
 			ret = -EOPNOTSUPP;
 		else if (!bio_flagged(bio, BIO_UPTODATE))
diff --git a/block/ioctl.c b/block/ioctl.c
index 500e4c73cc52..d3e6b5827a34 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -112,22 +112,9 @@ static int blkdev_reread_part(struct block_device *bdev)
 	return res;
 }
 
-static void blk_ioc_discard_endio(struct bio *bio, int err)
-{
-	if (err) {
-		if (err == -EOPNOTSUPP)
-			set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		clear_bit(BIO_UPTODATE, &bio->bi_flags);
-	}
-	complete(bio->bi_private);
-}
-
 static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 			     uint64_t len)
 {
-	struct request_queue *q = bdev_get_queue(bdev);
-	int ret = 0;
-
 	if (start & 511)
 		return -EINVAL;
 	if (len & 511)
@@ -137,40 +124,8 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 
 	if (start + len > (bdev->bd_inode->i_size >> 9))
 		return -EINVAL;
-
-	if (!q->prepare_discard_fn)
-		return -EOPNOTSUPP;
-
-	while (len && !ret) {
-		DECLARE_COMPLETION_ONSTACK(wait);
-		struct bio *bio;
-
-		bio = bio_alloc(GFP_KERNEL, 0);
-
-		bio->bi_end_io = blk_ioc_discard_endio;
-		bio->bi_bdev = bdev;
-		bio->bi_private = &wait;
-		bio->bi_sector = start;
-
-		if (len > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			len -= queue_max_hw_sectors(q);
-			start += queue_max_hw_sectors(q);
-		} else {
-			bio->bi_size = len << 9;
-			len = 0;
-		}
-		submit_bio(DISCARD_NOBARRIER, bio);
-
-		wait_for_completion(&wait);
-
-		if (bio_flagged(bio, BIO_EOPNOTSUPP))
-			ret = -EOPNOTSUPP;
-		else if (!bio_flagged(bio, BIO_UPTODATE))
-			ret = -EIO;
-		bio_put(bio);
-	}
-	return ret;
+	return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
+				    DISCARD_FL_WAIT);
 }
 
 static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9f..535f85ba104f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 static void btrfs_issue_discard(struct block_device *bdev,
 				u64 start, u64 len)
 {
-	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL);
+	blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
+			     DISCARD_FL_BARRIER);
 }
 #endif
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index fba795798d3a..fbc43241f2ef 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
 					goto start_new_extent;
 				if ((start + nr_sects) != blk) {
 					rv = blkdev_issue_discard(bdev, start,
-							    nr_sects, GFP_NOFS);
+							    nr_sects, GFP_NOFS,
+							    DISCARD_FL_BARRIER);
 					if (rv)
 						goto fail;
 					nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
 		}
 	}
 	if (nr_sects) {
-		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS);
+		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
+					 DISCARD_FL_BARRIER);
 		if (rv)
 			goto fail;
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 86c2bdff3b89..e23a86cae5ac 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -998,15 +998,18 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 }
 
 extern int blkdev_issue_flush(struct block_device *, sector_t *);
-extern int blkdev_issue_discard(struct block_device *,
-				sector_t sector, sector_t nr_sects, gfp_t);
+#define DISCARD_FL_WAIT		0x01	/* wait for completion */
+#define DISCARD_FL_BARRIER	0x02	/* issue DISCARD_BARRIER request */
+extern int blkdev_issue_discard(struct block_device *, sector_t sector,
+		sector_t nr_sects, gfp_t, int flags);
 
 static inline int sb_issue_discard(struct super_block *sb,
 				   sector_t block, sector_t nr_blocks)
 {
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
-	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL);
+	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
+				    DISCARD_FL_BARRIER);
 }
 
 extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8ffdc0d23c53..74f1102e8749 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -161,7 +161,8 @@ static int discard_swap(struct swap_info_struct *si)
 		}
 
 		err = blkdev_issue_discard(si->bdev, start_block,
-						nr_blocks, GFP_KERNEL);
+						nr_blocks, GFP_KERNEL,
+						DISCARD_FL_BARRIER);
 		if (err)
 			break;
 
@@ -200,7 +201,8 @@ static void discard_swap_cluster(struct swap_info_struct *si,
 			start_block <<= PAGE_SHIFT - 9;
 			nr_blocks <<= PAGE_SHIFT - 9;
 			if (blkdev_issue_discard(si->bdev, start_block,
-							nr_blocks, GFP_NOIO))
+							nr_blocks, GFP_NOIO,
+							DISCARD_FL_BARRIER))
 				break;
 		}
 
-- 
cgit v1.2.3


From ec827c7ece8901044e6b3f92aeea489be9e1bcf7 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Sep 2009 10:50:23 -0400
Subject: tracing: add static to generated TRACE_EVENT functions

Some of the generated functions used in the TRACE_EVENT macros are
not declared static, but they are not global.

Discovered by sparse.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      | 4 ++--
 kernel/trace/trace_events.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 308bafd93325..fa8ce03f836c 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -254,7 +254,7 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
-enum print_line_t							\
+static enum print_line_t						\
 ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 {									\
 	struct trace_seq *s = &iter->seq;				\
@@ -317,7 +317,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 
 #undef TRACE_EVENT
 #define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
-int									\
+static int								\
 ftrace_define_fields_##call(struct ftrace_event_call *event_call)	\
 {									\
 	struct ftrace_raw_##call field;					\
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index adbed124c3e7..0fa8f9faa61c 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1154,7 +1154,7 @@ static int trace_module_notify(struct notifier_block *self,
 }
 #endif /* CONFIG_MODULES */
 
-struct notifier_block trace_module_nb = {
+static struct notifier_block trace_module_nb = {
 	.notifier_call = trace_module_notify,
 	.priority = 0,
 };
-- 
cgit v1.2.3


From d3bccb6f4b886060aa0f58976b92b77d951f5434 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 Aug 2009 19:30:27 +0200
Subject: vfs: Introduce filemap_fdatawait_range

This simple helper saves some filesystems conversion from byte offset
to page numbers and also makes the fdata* interface more complete.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h |  2 ++
 mm/filemap.c       | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a79f48373e7e..beb0e2774b2e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2086,6 +2086,8 @@ extern int write_inode_now(struct inode *, int);
 extern int filemap_fdatawrite(struct address_space *);
 extern int filemap_flush(struct address_space *);
 extern int filemap_fdatawait(struct address_space *);
+extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
+				   loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
 				        loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index ccea3b665c12..65b2e50efcd0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -306,6 +306,26 @@ int wait_on_page_writeback_range(struct address_space *mapping,
 	return ret;
 }
 
+/**
+ * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
+ * @mapping: address space structure to wait for
+ * @start:	offset in bytes where the range starts
+ * @end:	offset in bytes where the range ends (inclusive)
+ *
+ * Walk the list of under-writeback pages of the given address space
+ * in the given range and wait for all of them.
+ *
+ * This is just a simple wrapper so that callers don't have to convert offsets
+ * to page indexes themselves
+ */
+int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
+			    loff_t end)
+{
+	return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
+					    end >> PAGE_CACHE_SHIFT);
+}
+EXPORT_SYMBOL(filemap_fdatawait_range);
+
 /**
  * sync_page_range - write and wait on all pages in the passed range
  * @inode:	target inode
-- 
cgit v1.2.3


From e4dd9de3c66bc7e26c5c7f149a060c5a67cf06a0 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 Aug 2009 18:10:06 +0200
Subject: vfs: Export __generic_file_aio_write() and add some comments

Rename __generic_file_aio_write_nolock() to __generic_file_aio_write(), add
comments to write helpers explaining how they should be used and export
__generic_file_aio_write() since it will be used by some filesystems.

CC: ocfs2-devel@oss.oracle.com
CC: Joel Becker <joel.becker@oracle.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h |  2 ++
 mm/filemap.c       | 57 +++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 52 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index beb0e2774b2e..ea099d3a18d9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2204,6 +2204,8 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
 int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
 extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
+		loff_t *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
 extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
 		unsigned long, loff_t);
diff --git a/mm/filemap.c b/mm/filemap.c
index 65b2e50efcd0..554a396d85e2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2368,9 +2368,27 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 }
 EXPORT_SYMBOL(generic_file_buffered_write);
 
-static ssize_t
-__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
-				unsigned long nr_segs, loff_t *ppos)
+/**
+ * __generic_file_aio_write - write data to a file
+ * @iocb:	IO state structure (file, offset, etc.)
+ * @iov:	vector with data to write
+ * @nr_segs:	number of segments in the vector
+ * @ppos:	position where to write
+ *
+ * This function does all the work needed for actually writing data to a
+ * file. It does all basic checks, removes SUID from the file, updates
+ * modification times and calls proper subroutines depending on whether we
+ * do direct IO or a standard buffered write.
+ *
+ * It expects i_mutex to be grabbed unless we work on a block device or similar
+ * object which does not need locking at all.
+ *
+ * This function does *not* take care of syncing data in case of O_SYNC write.
+ * A caller has to handle it. This is mainly due to the fact that we want to
+ * avoid syncing under i_mutex.
+ */
+ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				 unsigned long nr_segs, loff_t *ppos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space * mapping = file->f_mapping;
@@ -2467,7 +2485,23 @@ out:
 	current->backing_dev_info = NULL;
 	return written ? written : err;
 }
+EXPORT_SYMBOL(__generic_file_aio_write);
+
 
+/**
+ * generic_file_aio_write_nolock - write data, usually to a device
+ * @iocb:	IO state structure
+ * @iov:	vector with data to write
+ * @nr_segs:	number of segments in the vector
+ * @pos:	position in file where to write
+ *
+ * This is a wrapper around __generic_file_aio_write() which takes care of
+ * syncing the file in case of O_SYNC file. It does not take i_mutex for the
+ * write itself but may do so during syncing. It is meant for users like block
+ * devices which do not need i_mutex during write. If your filesystem needs to
+ * do a write but already holds i_mutex, use __generic_file_aio_write()
+ * directly and then sync the file like generic_file_aio_write().
+ */
 ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
 		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
 {
@@ -2478,8 +2512,7 @@ ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
 
 	BUG_ON(iocb->ki_pos != pos);
 
-	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
-			&iocb->ki_pos);
+	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
 		ssize_t err;
@@ -2492,6 +2525,17 @@ ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
 }
 EXPORT_SYMBOL(generic_file_aio_write_nolock);
 
+/**
+ * generic_file_aio_write - write data to a file
+ * @iocb:	IO state structure
+ * @iov:	vector with data to write
+ * @nr_segs:	number of segments in the vector
+ * @pos:	position in file where to write
+ *
+ * This is a wrapper around __generic_file_aio_write() to be used by most
+ * filesystems. It takes care of syncing the file in case of O_SYNC file
+ * and acquires i_mutex as needed.
+ */
 ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
@@ -2503,8 +2547,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	BUG_ON(iocb->ki_pos != pos);
 
 	mutex_lock(&inode->i_mutex);
-	ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
-			&iocb->ki_pos);
+	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
 	if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-- 
cgit v1.2.3


From eef99380679e20e7edc096aa4d8a98b875404d79 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Aug 2009 17:43:41 +0200
Subject: vfs: Rename generic_file_aio_write_nolock

generic_file_aio_write_nolock() is now used only by block devices and raw
character device. Filesystems should use __generic_file_aio_write() in case
generic_file_aio_write() doesn't suit them. So rename the function to
blkdev_aio_write() and move it to fs/blockdev.c.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 drivers/char/raw.c |  2 +-
 fs/block_dev.c     | 29 ++++++++++++++++++++++++++++-
 include/linux/fs.h |  6 ++++--
 mm/filemap.c       | 39 ---------------------------------------
 4 files changed, 33 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 05f9d18b9361..40268db02e22 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -246,7 +246,7 @@ static const struct file_operations raw_fops = {
 	.read	=	do_sync_read,
 	.aio_read = 	generic_file_aio_read,
 	.write	=	do_sync_write,
-	.aio_write = 	generic_file_aio_write_nolock,
+	.aio_write =	blkdev_aio_write,
 	.open	=	raw_open,
 	.release=	raw_release,
 	.ioctl	=	raw_ioctl,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 94dfda24c06e..3581a4e53942 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1404,6 +1404,33 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return blkdev_ioctl(bdev, mode, cmd, arg);
 }
 
+/*
+ * Write data to the block device.  Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
+			 unsigned long nr_segs, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t ret;
+
+	BUG_ON(iocb->ki_pos != pos);
+
+	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
+	if (ret > 0 || ret == -EIOCBQUEUED) {
+		ssize_t err;
+
+		err = generic_write_sync(file, pos, ret);
+		if (err < 0 && ret > 0)
+			ret = err;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blkdev_aio_write);
+
 /*
  * Try to release a page associated with block device when the system
  * is under memory pressure.
@@ -1436,7 +1463,7 @@ const struct file_operations def_blk_fops = {
 	.read		= do_sync_read,
 	.write		= do_sync_write,
   	.aio_read	= generic_file_aio_read,
-  	.aio_write	= generic_file_aio_write_nolock,
+	.aio_write	= blkdev_aio_write,
 	.mmap		= generic_file_mmap,
 	.fsync		= block_fsync,
 	.unlocked_ioctl	= block_ioctl,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea099d3a18d9..6c1be3a4edea 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2207,8 +2207,6 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig
 extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long,
 		loff_t *);
 extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
-extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *,
-		unsigned long, loff_t);
 extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *,
 		unsigned long *, loff_t, loff_t *, size_t, size_t);
 extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *,
@@ -2218,6 +2216,10 @@ extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t l
 extern int generic_segment_checks(const struct iovec *iov,
 		unsigned long *nr_segs, size_t *count, int access_flags);
 
+/* fs/block_dev.c */
+extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
+				unsigned long nr_segs, loff_t pos);
+
 /* fs/splice.c */
 extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
diff --git a/mm/filemap.c b/mm/filemap.c
index f863e1d7e227..3587554f45ef 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2462,45 +2462,6 @@ out:
 }
 EXPORT_SYMBOL(__generic_file_aio_write);
 
-
-/**
- * generic_file_aio_write_nolock - write data, usually to a device
- * @iocb:	IO state structure
- * @iov:	vector with data to write
- * @nr_segs:	number of segments in the vector
- * @pos:	position in file where to write
- *
- * This is a wrapper around __generic_file_aio_write() which takes care of
- * syncing the file in case of O_SYNC file. It does not take i_mutex for the
- * write itself but may do so during syncing. It is meant for users like block
- * devices which do not need i_mutex during write. If your filesystem needs to
- * do a write but already holds i_mutex, use __generic_file_aio_write()
- * directly and then sync the file like generic_file_aio_write().
- */
-ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
-		const struct iovec *iov, unsigned long nr_segs, loff_t pos)
-{
-	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
-	ssize_t ret;
-
-	BUG_ON(iocb->ki_pos != pos);
-
-	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
-
-	if ((ret > 0 || ret == -EIOCBQUEUED) &&
-	    ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-		ssize_t err;
-
-		err = sync_page_range_nolock(inode, mapping, pos, ret);
-		if (err < 0 && ret > 0)
-			ret = err;
-	}
-	return ret;
-}
-EXPORT_SYMBOL(generic_file_aio_write_nolock);
-
 /**
  * generic_file_aio_write - write data to a file
  * @iocb:	IO state structure
-- 
cgit v1.2.3


From 148f948ba877f4d3cdef036b1ff6d9f68986706a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 17 Aug 2009 19:52:36 +0200
Subject: vfs: Introduce new helpers for syncing after writing to O_SYNC file
 or IS_SYNC inode

Introduce new function for generic inode syncing (vfs_fsync_range) and use
it from fsync() path. Introduce also new helper for syncing after a sync
write (generic_write_sync) using the generic function.

Use these new helpers for syncing from generic VFS functions. This makes
O_SYNC writes to block devices acquire i_mutex for syncing. If we really
care about this, we can make block_fsync() drop the i_mutex and reacquire
it before it returns.

CC: Evgeniy Polyakov <zbr@ioremap.net>
CC: ocfs2-devel@oss.oracle.com
CC: Joel Becker <joel.becker@oracle.com>
CC: Felix Blyakher <felixb@sgi.com>
CC: xfs@oss.sgi.com
CC: Anton Altaparmakov <aia21@cantab.net>
CC: linux-ntfs-dev@lists.sourceforge.net
CC: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
CC: linux-ext4@vger.kernel.org
CC: tytso@mit.edu
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/splice.c        | 22 ++++++----------------
 fs/sync.c          | 55 +++++++++++++++++++++++++++++++++++++++++++++++-------
 include/linux/fs.h |  3 +++
 mm/filemap.c       | 11 ++++-------
 4 files changed, 61 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index 73766d24f97b..819023733f8e 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -976,25 +976,15 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 	if (ret > 0) {
 		unsigned long nr_pages;
+		int err;
 
-		*ppos += ret;
 		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-		/*
-		 * If file or inode is SYNC and we actually wrote some data,
-		 * sync it.
-		 */
-		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-			int err;
-
-			mutex_lock(&inode->i_mutex);
-			err = generic_osync_inode(inode, mapping,
-						  OSYNC_METADATA|OSYNC_DATA);
-			mutex_unlock(&inode->i_mutex);
-
-			if (err)
-				ret = err;
-		}
+		err = generic_write_sync(out, *ppos, ret);
+		if (err)
+			ret = err;
+		else
+			*ppos += ret;
 		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
 	}
 
diff --git a/fs/sync.c b/fs/sync.c
index 103cc7fdd3df..4e15da01923c 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -178,19 +178,23 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
 }
 
 /**
- * vfs_fsync - perform a fsync or fdatasync on a file
+ * vfs_fsync_range - helper to sync a range of data & metadata to disk
  * @file:		file to sync
  * @dentry:		dentry of @file
- * @data:		only perform a fdatasync operation
+ * @start:		offset in bytes of the beginning of data range to sync
+ * @end:		offset in bytes of the end of data range (inclusive)
+ * @datasync:		perform only datasync
  *
- * Write back data and metadata for @file to disk.  If @datasync is
- * set only metadata needed to access modified file data is written.
+ * Write back data in range @start..@end and metadata for @file to disk.  If
+ * @datasync is set only metadata needed to access modified file data is
+ * written.
  *
  * In case this function is called from nfsd @file may be %NULL and
  * only @dentry is set.  This can only happen when the filesystem
  * implements the export_operations API.
  */
-int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int vfs_fsync_range(struct file *file, struct dentry *dentry, loff_t start,
+		    loff_t end, int datasync)
 {
 	const struct file_operations *fop;
 	struct address_space *mapping;
@@ -214,7 +218,7 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 		goto out;
 	}
 
-	ret = filemap_fdatawrite(mapping);
+	ret = filemap_fdatawrite_range(mapping, start, end);
 
 	/*
 	 * We need to protect against concurrent writers, which could cause
@@ -225,12 +229,32 @@ int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	if (!ret)
 		ret = err;
 	mutex_unlock(&mapping->host->i_mutex);
-	err = filemap_fdatawait(mapping);
+
+	err = filemap_fdatawait_range(mapping, start, end);
 	if (!ret)
 		ret = err;
 out:
 	return ret;
 }
+EXPORT_SYMBOL(vfs_fsync_range);
+
+/**
+ * vfs_fsync - perform a fsync or fdatasync on a file
+ * @file:		file to sync
+ * @dentry:		dentry of @file
+ * @datasync:		only perform a fdatasync operation
+ *
+ * Write back data and metadata for @file to disk.  If @datasync is
+ * set only metadata needed to access modified file data is written.
+ *
+ * In case this function is called from nfsd @file may be %NULL and
+ * only @dentry is set.  This can only happen when the filesystem
+ * implements the export_operations API.
+ */
+int vfs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	return vfs_fsync_range(file, dentry, 0, LLONG_MAX, datasync);
+}
 EXPORT_SYMBOL(vfs_fsync);
 
 static int do_fsync(unsigned int fd, int datasync)
@@ -256,6 +280,23 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd)
 	return do_fsync(fd, 1);
 }
 
+/**
+ * generic_write_sync - perform syncing after a write if file / inode is sync
+ * @file:	file to which the write happened
+ * @pos:	offset where the write started
+ * @count:	length of the write
+ *
+ * This is just a simple wrapper about our general syncing function.
+ */
+int generic_write_sync(struct file *file, loff_t pos, loff_t count)
+{
+	if (!(file->f_flags & O_SYNC) && !IS_SYNC(file->f_mapping->host))
+		return 0;
+	return vfs_fsync_range(file, file->f_path.dentry, pos,
+			       pos + count - 1, 1);
+}
+EXPORT_SYMBOL(generic_write_sync);
+
 /*
  * sys_sync_file_range() permits finely controlled syncing over a segment of
  * a file in the range offset .. (offset+nbytes-1) inclusive.  If nbytes is
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c1be3a4edea..e2c7f5167662 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2098,7 +2098,10 @@ extern int __filemap_fdatawrite_range(struct address_space *mapping,
 extern int filemap_fdatawrite_range(struct address_space *mapping,
 				loff_t start, loff_t end);
 
+extern int vfs_fsync_range(struct file *file, struct dentry *dentry,
+			   loff_t start, loff_t end, int datasync);
 extern int vfs_fsync(struct file *file, struct dentry *dentry, int datasync);
+extern int generic_write_sync(struct file *file, loff_t pos, loff_t count);
 extern void sync_supers(void);
 extern void emergency_sync(void);
 extern void emergency_remount(void);
diff --git a/mm/filemap.c b/mm/filemap.c
index 3587554f45ef..849293c4f418 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -39,11 +39,10 @@
 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
  */
-#include <linux/buffer_head.h> /* for generic_osync_inode */
+#include <linux/buffer_head.h> /* for try_to_free_buffers */
 
 #include <asm/mman.h>
 
-
 /*
  * Shared mappings implemented 30.11.1994. It's not fully working yet,
  * though.
@@ -2477,8 +2476,7 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct address_space *mapping = file->f_mapping;
-	struct inode *inode = mapping->host;
+	struct inode *inode = file->f_mapping->host;
 	ssize_t ret;
 
 	BUG_ON(iocb->ki_pos != pos);
@@ -2487,11 +2485,10 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
 	mutex_unlock(&inode->i_mutex);
 
-	if ((ret > 0 || ret == -EIOCBQUEUED) &&
-	    ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+	if (ret > 0 || ret == -EIOCBQUEUED) {
 		ssize_t err;
 
-		err = sync_page_range(inode, mapping, pos, ret);
+		err = generic_write_sync(file, pos, ret);
 		if (err < 0 && ret > 0)
 			ret = err;
 	}
-- 
cgit v1.2.3


From 18f2ee705d98034b0f229a3202d827468d4bffd9 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 18 Aug 2009 18:43:15 +0200
Subject: vfs: Remove generic_osync_inode() and sync_page_range{_nolock}()

Remove these three functions since nobody uses them anymore.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 54 ---------------------------------------
 include/linux/fs.h        |  5 ----
 include/linux/writeback.h |  4 ---
 mm/filemap.c              | 64 -----------------------------------------------
 4 files changed, 127 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index da86ef58e427..628235cf44b5 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1242,57 +1242,3 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
-
-/**
- * generic_osync_inode - flush all dirty data for a given inode to disk
- * @inode: inode to write
- * @mapping: the address_space that should be flushed
- * @what:  what to write and wait upon
- *
- * This can be called by file_write functions for files which have the
- * O_SYNC flag set, to flush dirty writes to disk.
- *
- * @what is a bitmask, specifying which part of the inode's data should be
- * written and waited upon.
- *
- *    OSYNC_DATA:     i_mapping's dirty data
- *    OSYNC_METADATA: the buffers at i_mapping->private_list
- *    OSYNC_INODE:    the inode itself
- */
-
-int generic_osync_inode(struct inode *inode, struct address_space *mapping, int what)
-{
-	int err = 0;
-	int need_write_inode_now = 0;
-	int err2;
-
-	if (what & OSYNC_DATA)
-		err = filemap_fdatawrite(mapping);
-	if (what & (OSYNC_METADATA|OSYNC_DATA)) {
-		err2 = sync_mapping_buffers(mapping);
-		if (!err)
-			err = err2;
-	}
-	if (what & OSYNC_DATA) {
-		err2 = filemap_fdatawait(mapping);
-		if (!err)
-			err = err2;
-	}
-
-	spin_lock(&inode_lock);
-	if ((inode->i_state & I_DIRTY) &&
-	    ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
-		need_write_inode_now = 1;
-	spin_unlock(&inode_lock);
-
-	if (need_write_inode_now) {
-		err2 = write_inode_now(inode, 1);
-		if (!err)
-			err = err2;
-	}
-	else
-		inode_sync_wait(inode);
-
-	return err;
-}
-EXPORT_SYMBOL(generic_osync_inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e2c7f5167662..37f53216998a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1455,11 +1455,6 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
 #define DT_SOCK		12
 #define DT_WHT		14
 
-#define OSYNC_METADATA	(1<<0)
-#define OSYNC_DATA	(1<<1)
-#define OSYNC_INODE	(1<<2)
-int generic_osync_inode(struct inode *, struct address_space *, int);
-
 /*
  * This is the "filldir" function type, used by readdir() to let
  * the kernel specify what kind of dirent layout it wants to have.
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 78b1e4684cc9..d347632f1861 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -150,10 +150,6 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
-int sync_page_range(struct inode *inode, struct address_space *mapping,
-			loff_t pos, loff_t count);
-int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
-			   loff_t pos, loff_t count);
 void set_page_dirty_balance(struct page *page, int page_mkwrite);
 void writeback_set_ratelimit(void);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 849293c4f418..dd51c68e2b86 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -325,70 +325,6 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
 }
 EXPORT_SYMBOL(filemap_fdatawait_range);
 
-/**
- * sync_page_range - write and wait on all pages in the passed range
- * @inode:	target inode
- * @mapping:	target address_space
- * @pos:	beginning offset in pages to write
- * @count:	number of bytes to write
- *
- * Write and wait upon all the pages in the passed range.  This is a "data
- * integrity" operation.  It waits upon in-flight writeout before starting and
- * waiting upon new writeout.  If there was an IO error, return it.
- *
- * We need to re-take i_mutex during the generic_osync_inode list walk because
- * it is otherwise livelockable.
- */
-int sync_page_range(struct inode *inode, struct address_space *mapping,
-			loff_t pos, loff_t count)
-{
-	pgoff_t start = pos >> PAGE_CACHE_SHIFT;
-	pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
-	int ret;
-
-	if (!mapping_cap_writeback_dirty(mapping) || !count)
-		return 0;
-	ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
-	if (ret == 0) {
-		mutex_lock(&inode->i_mutex);
-		ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
-		mutex_unlock(&inode->i_mutex);
-	}
-	if (ret == 0)
-		ret = wait_on_page_writeback_range(mapping, start, end);
-	return ret;
-}
-EXPORT_SYMBOL(sync_page_range);
-
-/**
- * sync_page_range_nolock - write & wait on all pages in the passed range without locking
- * @inode:	target inode
- * @mapping:	target address_space
- * @pos:	beginning offset in pages to write
- * @count:	number of bytes to write
- *
- * Note: Holding i_mutex across sync_page_range_nolock() is not a good idea
- * as it forces O_SYNC writers to different parts of the same file
- * to be serialised right until io completion.
- */
-int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
-			   loff_t pos, loff_t count)
-{
-	pgoff_t start = pos >> PAGE_CACHE_SHIFT;
-	pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
-	int ret;
-
-	if (!mapping_cap_writeback_dirty(mapping) || !count)
-		return 0;
-	ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
-	if (ret == 0)
-		ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
-	if (ret == 0)
-		ret = wait_on_page_writeback_range(mapping, start, end);
-	return ret;
-}
-EXPORT_SYMBOL(sync_page_range_nolock);
-
 /**
  * filemap_fdatawait - wait for all under-writeback pages to complete
  * @mapping: address space structure to wait for
-- 
cgit v1.2.3


From a48f494e1dbdcf4fb7c02100ae3208c4c1daecbf Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 14 Sep 2009 11:18:02 -0400
Subject: tracing: have TRACE_EVENT macro use __flags to not shadow parameter

The generated functions of TRACE_EVENT uses "flags" in one of the
sub macros which shadows a parameter in the outside macro.

Simple fix is to make the submacro use __flags instead.

Discovered by sparse.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index fa8ce03f836c..72a3b437b829 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -239,9 +239,9 @@ ftrace_format_##call(struct ftrace_event_call *unused,			\
 #undef __print_flags
 #define __print_flags(flag, delim, flag_array...)			\
 	({								\
-		static const struct trace_print_flags flags[] =		\
+		static const struct trace_print_flags __flags[] =	\
 			{ flag_array, { -1, NULL }};			\
-		ftrace_print_flags_seq(p, delim, flag, flags);		\
+		ftrace_print_flags_seq(p, delim, flag, __flags);	\
 	})
 
 #undef __print_symbolic
-- 
cgit v1.2.3


From 7f53866932fd08add06ee2f93ead129949158490 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Thu, 10 Sep 2009 12:34:09 -0600
Subject: PCI Hotplug: convert acpi_pci_detect_ejectable() to take an
 acpi_handle

acpi_pci_detect_ejectable() goes through effort to convert its
struct pci_bus arg to an acpi_handle, but every time we use this
interface, we already have the handle available.

So let's just use the handle instead of converting back and forth.

Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Tested-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   | 12 ++++++------
 drivers/pci/hotplug/acpiphp_glue.c | 33 +++++++++++----------------------
 drivers/pci/hotplug/pciehp_acpi.c  |  7 ++++---
 include/linux/pci_hotplug.h        |  2 +-
 4 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index eb159587d0bf..ec3c039b7ebd 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -500,18 +500,18 @@ check_hotplug(acpi_handle handle, u32 lvl, void *context, void **rv)
 
 /**
  * acpi_pci_detect_ejectable - check if the PCI bus has ejectable slots
- * @pbus - PCI bus to scan
+ * @handle - handle of the PCI bus to scan
  *
  * Returns 1 if the PCI bus has ACPI based ejectable slots, 0 otherwise.
  */
-int acpi_pci_detect_ejectable(struct pci_bus *pbus)
+int acpi_pci_detect_ejectable(acpi_handle handle)
 {
-	acpi_handle handle;
 	int found = 0;
 
-	if (!(handle = acpi_pci_get_bridge_handle(pbus)))
-		return 0;
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
+	if (!handle)
+		return found;
+
+	acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
 			    check_hotplug, (void *)&found, NULL);
 	return found;
 }
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index e72e0adc0681..680c33635b6f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -62,22 +62,6 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus);
 static void acpiphp_set_hpp_values(acpi_handle handle, struct pci_bus *bus);
 static void handle_hotplug_event_func(acpi_handle handle, u32 type, void *context);
 
-static struct pci_bus *pci_bus_from_handle(acpi_handle handle)
-{
-	struct pci_bus *pbus;
-	struct acpi_pci_root *root;
-
-	root = acpi_pci_find_root(handle);
-	if (root)
-		pbus = root->bus;
-	else {
-		struct pci_dev *pdev = acpi_get_pci_dev(handle);
-		pbus = pdev->subordinate;
-		pci_dev_put(pdev);
-	}
-	return pbus;
-}
-
 /* callback routine to check for the existence of a pci dock device */
 static acpi_status
 is_pci_dock_device(acpi_handle handle, u32 lvl, void *context, void **rv)
@@ -279,11 +263,7 @@ register_slot(acpi_handle handle, u32 lvl, void *context, void **rv)
 /* see if it's worth looking at this bridge */
 static int detect_ejectable_slots(acpi_handle handle)
 {
-	int found;
-	struct pci_bus *pbus;
-
-	pbus = pci_bus_from_handle(handle);
-	found = acpi_pci_detect_ejectable(pbus);
+	int found = acpi_pci_detect_ejectable(handle);
 	if (!found) {
 		acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, (u32)1,
 				    is_pci_dock_device, (void *)&found, NULL);
@@ -1364,7 +1344,16 @@ static void acpiphp_sanitize_bus(struct pci_bus *bus)
 /* Program resources in newly inserted bridge */
 static int acpiphp_configure_bridge (acpi_handle handle)
 {
-	struct pci_bus *bus = pci_bus_from_handle(handle);
+	struct pci_bus *bus;
+
+	if (acpi_is_root_bridge(handle)) {
+		struct acpi_pci_root *root = acpi_pci_find_root(handle);
+		bus = root->bus;
+	} else {
+		struct pci_dev *pdev = acpi_get_pci_dev(handle);
+		bus = pdev->subordinate;
+		pci_dev_put(pdev);
+	}
 
 	pci_bus_size_bridges(bus);
 	pci_bus_assign_resources(bus);
diff --git a/drivers/pci/hotplug/pciehp_acpi.c b/drivers/pci/hotplug/pciehp_acpi.c
index 96048010e7d9..7163e6a6cfae 100644
--- a/drivers/pci/hotplug/pciehp_acpi.c
+++ b/drivers/pci/hotplug/pciehp_acpi.c
@@ -47,7 +47,7 @@ int pciehp_acpi_slot_detection_check(struct pci_dev *dev)
 {
 	if (slot_detection_mode != PCIEHP_DETECT_ACPI)
 		return 0;
-	if (acpi_pci_detect_ejectable(dev->subordinate))
+	if (acpi_pci_detect_ejectable(DEVICE_ACPI_HANDLE(&dev->dev)))
 		return 0;
 	return -ENODEV;
 }
@@ -76,9 +76,9 @@ static int __init dummy_probe(struct pcie_device *dev)
 {
 	int pos;
 	u32 slot_cap;
+	acpi_handle handle;
 	struct slot *slot, *tmp;
 	struct pci_dev *pdev = dev->port;
-	struct pci_bus *pbus = pdev->subordinate;
 	/* Note: pciehp_detect_mode != PCIEHP_DETECT_ACPI here */
 	if (pciehp_get_hp_hw_control_from_firmware(pdev))
 		return -ENODEV;
@@ -94,7 +94,8 @@ static int __init dummy_probe(struct pcie_device *dev)
 			dup_slot_id++;
 	}
 	list_add_tail(&slot->slot_list, &dummy_slots);
-	if (!acpi_slot_detected && acpi_pci_detect_ejectable(pbus))
+	handle = DEVICE_ACPI_HANDLE(&pdev->dev);
+	if (!acpi_slot_detected && acpi_pci_detect_ejectable(handle))
 		acpi_slot_detected = 1;
 	return -ENODEV;         /* dummy driver always returns error */
 }
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 1b00cc3177fc..f0c31ae3f842 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -231,7 +231,7 @@ extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
-int acpi_pci_detect_ejectable(struct pci_bus *pbus);
+int acpi_pci_detect_ejectable(acpi_handle handle);
 #endif
 #endif
 
-- 
cgit v1.2.3


From 9d62ec6ca71d71c8a0d2cb1004f476d33f668955 Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Wed, 5 Aug 2009 23:59:59 +0200
Subject: PM: Add convenience macro to make switching to dev_pm_ops less
 error-prone

In a number of cases, the .suspend, .freeze, .poweroff and .resume,
.thaw, .restore functions are identical. However, they all need to be
assigned to avoid regressionsm as the previous code called .suspend
resp. .resume in all those cases. SIMPLE_DEV_PM_OPS helps to deal
with this case.

[rjw: Changed the name of the macro and added the comment explaining its
 purpose.]

Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 2b6e20df0e52..3b7e04b95bd2 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -213,6 +213,20 @@ struct dev_pm_ops {
 	int (*runtime_idle)(struct device *dev);
 };
 
+/*
+ * Use this if you want to use the same suspend and resume callbacks for suspend
+ * to RAM and hibernation.
+ */
+#define SIMPLE_DEV_PM_OPS(name, suspend_fn, resume_fn) \
+struct dev_pm_ops name = { \
+	.suspend = suspend_fn, \
+	.resume = resume_fn, \
+	.freeze = suspend_fn, \
+	.thaw = resume_fn, \
+	.poweroff = suspend_fn, \
+	.restore = resume_fn, \
+}
+
 /**
  * PM_EVENT_ messages
  *
-- 
cgit v1.2.3


From d136f1bd366fdb7e747ca7e0218171e7a00a98a5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 12 Sep 2009 03:03:15 +0000
Subject: genetlink: fix netns vs. netlink table locking

Since my commits introducing netns awareness into
genetlink we can get this problem:

BUG: scheduling while atomic: modprobe/1178/0x00000002
2 locks held by modprobe/1178:
 #0:  (genl_mutex){+.+.+.}, at: [<ffffffff8135ee1a>] genl_register_mc_grou
 #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff8135eeb5>] genl_register_mc_g
Pid: 1178, comm: modprobe Not tainted 2.6.31-rc8-wl-34789-g95cb731-dirty #
Call Trace:
 [<ffffffff8103e285>] __schedule_bug+0x85/0x90
 [<ffffffff81403138>] schedule+0x108/0x588
 [<ffffffff8135b131>] netlink_table_grab+0xa1/0xf0
 [<ffffffff8135c3a7>] netlink_change_ngroups+0x47/0x100
 [<ffffffff8135ef0f>] genl_register_mc_group+0x12f/0x290

because I overlooked that netlink_table_grab() will
schedule, thinking it was just the rwlock. However,
in the contention case, that isn't actually true.

Fix this by letting the code grab the netlink table
lock first and then the RCU for netns protection.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  4 ++++
 net/netlink/af_netlink.c | 51 +++++++++++++++++++++++++++---------------------
 net/netlink/genetlink.c  |  5 ++++-
 3 files changed, 37 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 0fbecbbe8e9e..080f6ba9e73a 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -176,12 +176,16 @@ struct netlink_skb_parms
 #define NETLINK_CREDS(skb)	(&NETLINK_CB((skb)).creds)
 
 
+extern void netlink_table_grab(void);
+extern void netlink_table_ungrab(void);
+
 extern struct sock *netlink_kernel_create(struct net *net,
 					  int unit,unsigned int groups,
 					  void (*input)(struct sk_buff *skb),
 					  struct mutex *cb_mutex,
 					  struct module *module);
 extern void netlink_kernel_release(struct sock *sk);
+extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index d0ff382c40ca..c5aab6a368ce 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -177,9 +177,11 @@ static void netlink_sock_destruct(struct sock *sk)
  * this, _but_ remember, it adds useless work on UP machines.
  */
 
-static void netlink_table_grab(void)
+void netlink_table_grab(void)
 	__acquires(nl_table_lock)
 {
+	might_sleep();
+
 	write_lock_irq(&nl_table_lock);
 
 	if (atomic_read(&nl_table_users)) {
@@ -200,7 +202,7 @@ static void netlink_table_grab(void)
 	}
 }
 
-static void netlink_table_ungrab(void)
+void netlink_table_ungrab(void)
 	__releases(nl_table_lock)
 {
 	write_unlock_irq(&nl_table_lock);
@@ -1549,37 +1551,21 @@ static void netlink_free_old_listeners(struct rcu_head *rcu_head)
 	kfree(lrh->ptr);
 }
 
-/**
- * netlink_change_ngroups - change number of multicast groups
- *
- * This changes the number of multicast groups that are available
- * on a certain netlink family. Note that it is not possible to
- * change the number of groups to below 32. Also note that it does
- * not implicitly call netlink_clear_multicast_users() when the
- * number of groups is reduced.
- *
- * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
- * @groups: The new number of groups.
- */
-int netlink_change_ngroups(struct sock *sk, unsigned int groups)
+int __netlink_change_ngroups(struct sock *sk, unsigned int groups)
 {
 	unsigned long *listeners, *old = NULL;
 	struct listeners_rcu_head *old_rcu_head;
 	struct netlink_table *tbl = &nl_table[sk->sk_protocol];
-	int err = 0;
 
 	if (groups < 32)
 		groups = 32;
 
-	netlink_table_grab();
 	if (NLGRPSZ(tbl->groups) < NLGRPSZ(groups)) {
 		listeners = kzalloc(NLGRPSZ(groups) +
 				    sizeof(struct listeners_rcu_head),
 				    GFP_ATOMIC);
-		if (!listeners) {
-			err = -ENOMEM;
-			goto out_ungrab;
-		}
+		if (!listeners)
+			return -ENOMEM;
 		old = tbl->listeners;
 		memcpy(listeners, old, NLGRPSZ(tbl->groups));
 		rcu_assign_pointer(tbl->listeners, listeners);
@@ -1597,8 +1583,29 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 	}
 	tbl->groups = groups;
 
- out_ungrab:
+	return 0;
+}
+
+/**
+ * netlink_change_ngroups - change number of multicast groups
+ *
+ * This changes the number of multicast groups that are available
+ * on a certain netlink family. Note that it is not possible to
+ * change the number of groups to below 32. Also note that it does
+ * not implicitly call netlink_clear_multicast_users() when the
+ * number of groups is reduced.
+ *
+ * @sk: The kernel netlink socket, as returned by netlink_kernel_create().
+ * @groups: The new number of groups.
+ */
+int netlink_change_ngroups(struct sock *sk, unsigned int groups)
+{
+	int err;
+
+	netlink_table_grab();
+	err = __netlink_change_ngroups(sk, groups);
 	netlink_table_ungrab();
+
 	return err;
 }
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 66f6ba0bab11..566941e03363 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -176,9 +176,10 @@ int genl_register_mc_group(struct genl_family *family,
 	if (family->netnsok) {
 		struct net *net;
 
+		netlink_table_grab();
 		rcu_read_lock();
 		for_each_net_rcu(net) {
-			err = netlink_change_ngroups(net->genl_sock,
+			err = __netlink_change_ngroups(net->genl_sock,
 					mc_groups_longs * BITS_PER_LONG);
 			if (err) {
 				/*
@@ -188,10 +189,12 @@ int genl_register_mc_group(struct genl_family *family,
 				 * increased on some sockets which is ok.
 				 */
 				rcu_read_unlock();
+				netlink_table_ungrab();
 				goto out;
 			}
 		}
 		rcu_read_unlock();
+		netlink_table_ungrab();
 	} else {
 		err = netlink_change_ngroups(init_net.genl_sock,
 					     mc_groups_longs * BITS_PER_LONG);
-- 
cgit v1.2.3


From 32613090a96dba2ca2cc524c8d4749d3126fdde5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Sep 2009 12:21:47 +0000
Subject: net: constify struct net_protocol

Remove long removed "inet_protocol_base" declaration.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  7 +++----
 net/dccp/ipv4.c        |  2 +-
 net/ipv4/af_inet.c     | 18 +++++++++---------
 net/ipv4/ah4.c         |  2 +-
 net/ipv4/esp4.c        |  2 +-
 net/ipv4/icmp.c        |  2 +-
 net/ipv4/ip_gre.c      |  2 +-
 net/ipv4/ip_input.c    |  2 +-
 net/ipv4/ipcomp.c      |  2 +-
 net/ipv4/ipmr.c        |  6 +-----
 net/ipv4/protocol.c    |  6 +++---
 net/ipv4/tunnel4.c     |  4 ++--
 net/ipv4/udplite.c     |  2 +-
 net/sctp/protocol.c    |  2 +-
 14 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index 1089d5aabd49..cea2aee92ac5 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -94,15 +94,14 @@ struct inet_protosw {
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
 #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
-extern struct net_protocol *inet_protocol_base;
-extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
+extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 #endif
 
-extern int	inet_add_protocol(struct net_protocol *prot, unsigned char num);
-extern int	inet_del_protocol(struct net_protocol *prot, unsigned char num);
+extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
+extern int	inet_del_protocol(const struct net_protocol *prot, unsigned char num);
 extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d01c00de1ad0..7302e1498d46 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -948,7 +948,7 @@ static struct proto dccp_v4_prot = {
 #endif
 };
 
-static struct net_protocol dccp_v4_protocol = {
+static const struct net_protocol dccp_v4_protocol = {
 	.handler	= dccp_v4_rcv,
 	.err_handler	= dccp_v4_err,
 	.no_policy	= 1,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6c30a73f03f5..58c4b0f7c4aa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -244,7 +244,7 @@ EXPORT_SYMBOL(build_ehash_secret);
 static inline int inet_netns_ok(struct net *net, int protocol)
 {
 	int hash;
-	struct net_protocol *ipprot;
+	const struct net_protocol *ipprot;
 
 	if (net_eq(net, &init_net))
 		return 1;
@@ -1162,7 +1162,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header);
 static int inet_gso_send_check(struct sk_buff *skb)
 {
 	struct iphdr *iph;
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	int proto;
 	int ihl;
 	int err = -EINVAL;
@@ -1198,7 +1198,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	int proto;
 	int ihl;
 	int id;
@@ -1265,7 +1265,7 @@ out:
 static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	struct sk_buff **pp = NULL;
 	struct sk_buff *p;
 	struct iphdr *iph;
@@ -1342,7 +1342,7 @@ out:
 
 static int inet_gro_complete(struct sk_buff *skb)
 {
-	struct net_protocol *ops;
+	const struct net_protocol *ops;
 	struct iphdr *iph = ip_hdr(skb);
 	int proto = iph->protocol & (MAX_INET_PROTOS - 1);
 	int err = -ENOSYS;
@@ -1427,13 +1427,13 @@ void snmp_mib_free(void *ptr[2])
 EXPORT_SYMBOL_GPL(snmp_mib_free);
 
 #ifdef CONFIG_IP_MULTICAST
-static struct net_protocol igmp_protocol = {
+static const struct net_protocol igmp_protocol = {
 	.handler =	igmp_rcv,
 	.netns_ok =	1,
 };
 #endif
 
-static struct net_protocol tcp_protocol = {
+static const struct net_protocol tcp_protocol = {
 	.handler =	tcp_v4_rcv,
 	.err_handler =	tcp_v4_err,
 	.gso_send_check = tcp_v4_gso_send_check,
@@ -1444,7 +1444,7 @@ static struct net_protocol tcp_protocol = {
 	.netns_ok =	1,
 };
 
-static struct net_protocol udp_protocol = {
+static const struct net_protocol udp_protocol = {
 	.handler =	udp_rcv,
 	.err_handler =	udp_err,
 	.gso_send_check = udp4_ufo_send_check,
@@ -1453,7 +1453,7 @@ static struct net_protocol udp_protocol = {
 	.netns_ok =	1,
 };
 
-static struct net_protocol icmp_protocol = {
+static const struct net_protocol icmp_protocol = {
 	.handler =	icmp_rcv,
 	.no_policy =	1,
 	.netns_ok =	1,
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index e878e494296e..5c662703eb1e 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -311,7 +311,7 @@ static const struct xfrm_type ah_type =
 	.output		= ah_output
 };
 
-static struct net_protocol ah4_protocol = {
+static const struct net_protocol ah4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ah4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 18bb383ea393..12f7287e902d 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -615,7 +615,7 @@ static const struct xfrm_type esp_type =
 	.output		= esp_output
 };
 
-static struct net_protocol esp4_protocol = {
+static const struct net_protocol esp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	esp4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 97c410e84388..5bc13fe816d1 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -655,7 +655,7 @@ static void icmp_unreach(struct sk_buff *skb)
 	struct iphdr *iph;
 	struct icmphdr *icmph;
 	int hash, protocol;
-	struct net_protocol *ipprot;
+	const struct net_protocol *ipprot;
 	u32 info = 0;
 	struct net *net;
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 533afaadefd4..d9645c94a067 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1288,7 +1288,7 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
 }
 
 
-static struct net_protocol ipgre_protocol = {
+static const struct net_protocol ipgre_protocol = {
 	.handler	=	ipgre_rcv,
 	.err_handler	=	ipgre_err,
 	.netns_ok	=	1,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index db46b4b5b2b9..6c98b43badf4 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -202,7 +202,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
 	{
 		int protocol = ip_hdr(skb)->protocol;
 		int hash, raw;
-		struct net_protocol *ipprot;
+		const struct net_protocol *ipprot;
 
 	resubmit:
 		raw = raw_local_deliver(skb, protocol);
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 3262ce06294c..38fbf04150ae 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -146,7 +146,7 @@ static const struct xfrm_type ipcomp_type = {
 	.output		= ipcomp_output
 };
 
-static struct net_protocol ipcomp4_protocol = {
+static const struct net_protocol ipcomp4_protocol = {
 	.handler	=	xfrm4_rcv,
 	.err_handler	=	ipcomp4_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 65d421cf5bc7..c43ec2d51ce2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -99,10 +99,6 @@ static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
-#ifdef CONFIG_IP_PIMSM_V2
-static struct net_protocol pim_protocol;
-#endif
-
 static struct timer_list ipmr_expire_timer;
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
@@ -1945,7 +1941,7 @@ static const struct file_operations ipmr_mfc_fops = {
 #endif
 
 #ifdef CONFIG_IP_PIMSM_V2
-static struct net_protocol pim_protocol = {
+static const struct net_protocol pim_protocol = {
 	.handler	=	pim_rcv,
 	.netns_ok	=	1,
 };
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index a2e5fc0a15e1..542f22fc98b3 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,14 +28,14 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
+const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp;
 static DEFINE_SPINLOCK(inet_proto_lock);
 
 /*
  *	Add a protocol handler to the hash tables
  */
 
-int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
+int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash, ret;
 
@@ -57,7 +57,7 @@ int inet_add_protocol(struct net_protocol *prot, unsigned char protocol)
  *	Remove a protocol from the hash tables.
  */
 
-int inet_del_protocol(struct net_protocol *prot, unsigned char protocol)
+int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
 {
 	int hash, ret;
 
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index cb1f0e83830b..3959e0ca456a 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -132,7 +132,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
 }
 #endif
 
-static struct net_protocol tunnel4_protocol = {
+static const struct net_protocol tunnel4_protocol = {
 	.handler	=	tunnel4_rcv,
 	.err_handler	=	tunnel4_err,
 	.no_policy	=	1,
@@ -140,7 +140,7 @@ static struct net_protocol tunnel4_protocol = {
 };
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-static struct net_protocol tunnel64_protocol = {
+static const struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
 	.err_handler	=	tunnel64_err,
 	.no_policy	=	1,
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index c784891cb7e5..95248d7f75ec 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -25,7 +25,7 @@ static void udplite_err(struct sk_buff *skb, u32 info)
 	__udp4_lib_err(skb, info, &udplite_table);
 }
 
-static	struct net_protocol udplite_protocol = {
+static const struct net_protocol udplite_protocol = {
 	.handler	= udplite_rcv,
 	.err_handler	= udplite_err,
 	.no_policy	= 1,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 60093be8385d..c557f1fb1c66 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -924,7 +924,7 @@ static struct inet_protosw sctp_stream_protosw = {
 };
 
 /* Register with IP layer.  */
-static struct net_protocol sctp_protocol = {
+static const struct net_protocol sctp_protocol = {
 	.handler     = sctp_rcv,
 	.err_handler = sctp_v4_err,
 	.no_policy   = 1,
-- 
cgit v1.2.3


From 41135cc836a1abeb12ca1416bdb29e87ad021153 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 14 Sep 2009 12:22:28 +0000
Subject: net: constify struct inet6_protocol

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/protocol.h |  6 +++---
 net/dccp/ipv6.c        |  2 +-
 net/ipv6/af_inet6.c    | 10 +++++-----
 net/ipv6/ah6.c         |  2 +-
 net/ipv6/esp6.c        |  2 +-
 net/ipv6/exthdrs.c     |  6 +++---
 net/ipv6/icmp.c        |  4 ++--
 net/ipv6/ip6_input.c   |  2 +-
 net/ipv6/ip6mr.c       |  6 +-----
 net/ipv6/ipcomp6.c     |  2 +-
 net/ipv6/protocol.c    |  6 +++---
 net/ipv6/reassembly.c  |  2 +-
 net/ipv6/tcp_ipv6.c    |  2 +-
 net/ipv6/tunnel6.c     |  4 ++--
 net/ipv6/udp.c         |  2 +-
 net/ipv6/udplite.c     |  2 +-
 net/sctp/ipv6.c        |  2 +-
 17 files changed, 29 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/net/protocol.h b/include/net/protocol.h
index cea2aee92ac5..60249e51b669 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -97,7 +97,7 @@ struct inet_protosw {
 extern const struct net_protocol *inet_protos[MAX_INET_PROTOS];
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+extern const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 #endif
 
 extern int	inet_add_protocol(const struct net_protocol *prot, unsigned char num);
@@ -106,8 +106,8 @@ extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-extern int	inet6_add_protocol(struct inet6_protocol *prot, unsigned char num);
-extern int	inet6_del_protocol(struct inet6_protocol *prot, unsigned char num);
+extern int	inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
+extern int	inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_register_protosw(struct inet_protosw *p);
 extern void	inet6_unregister_protosw(struct inet_protosw *p);
 #endif
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 64f011cc4491..364bfc76c29e 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1152,7 +1152,7 @@ static struct proto dccp_v6_prot = {
 #endif
 };
 
-static struct inet6_protocol dccp_v6_protocol = {
+static const struct inet6_protocol dccp_v6_protocol = {
 	.handler	= dccp_v6_rcv,
 	.err_handler	= dccp_v6_err,
 	.flags		= INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a123a328aeb3..e127a32f9540 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -710,7 +710,7 @@ EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
 
 static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 {
-	struct inet6_protocol *ops = NULL;
+	const struct inet6_protocol *ops = NULL;
 
 	for (;;) {
 		struct ipv6_opt_hdr *opth;
@@ -745,7 +745,7 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 static int ipv6_gso_send_check(struct sk_buff *skb)
 {
 	struct ipv6hdr *ipv6h;
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	int err = -EINVAL;
 
 	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
@@ -773,7 +773,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	int proto;
 	struct frag_hdr *fptr;
 	unsigned int unfrag_ip6hlen;
@@ -840,7 +840,7 @@ struct ipv6_gro_cb {
 static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	struct sk_buff **pp = NULL;
 	struct sk_buff *p;
 	struct ipv6hdr *iph;
@@ -926,7 +926,7 @@ out:
 
 static int ipv6_gro_complete(struct sk_buff *skb)
 {
-	struct inet6_protocol *ops;
+	const struct inet6_protocol *ops;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	int err = -ENOSYS;
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 86f42a288c4b..c1589e2f1dc9 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -527,7 +527,7 @@ static const struct xfrm_type ah6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ah6_protocol = {
+static const struct inet6_protocol ah6_protocol = {
 	.handler	=	xfrm6_rcv,
 	.err_handler	=	ah6_err,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 678bb95b1525..af597c73ebe9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -558,7 +558,7 @@ static const struct xfrm_type esp6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol esp6_protocol = {
+static const struct inet6_protocol esp6_protocol = {
 	.handler 	=	xfrm6_rcv,
 	.err_handler	=	esp6_err,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4aae658e5501..df159fffe4bc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -500,17 +500,17 @@ unknown_rh:
 	return -1;
 }
 
-static struct inet6_protocol rthdr_protocol = {
+static const struct inet6_protocol rthdr_protocol = {
 	.handler	=	ipv6_rthdr_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
 };
 
-static struct inet6_protocol destopt_protocol = {
+static const struct inet6_protocol destopt_protocol = {
 	.handler	=	ipv6_destopt_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY | INET6_PROTO_GSO_EXTHDR,
 };
 
-static struct inet6_protocol nodata_protocol = {
+static const struct inet6_protocol nodata_protocol = {
 	.handler	=	dst_discard,
 	.flags		=	INET6_PROTO_NOPOLICY,
 };
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e2325f6a05fb..f23ebbec0631 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -86,7 +86,7 @@ static inline struct sock *icmpv6_sk(struct net *net)
 
 static int icmpv6_rcv(struct sk_buff *skb);
 
-static struct inet6_protocol icmpv6_protocol = {
+static const struct inet6_protocol icmpv6_protocol = {
 	.handler	=	icmpv6_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
@@ -583,7 +583,7 @@ out:
 
 static void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
 {
-	struct inet6_protocol *ipprot;
+	const struct inet6_protocol *ipprot;
 	int inner_offset;
 	int hash;
 	u8 nexthdr;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2d9cbaa67edb..237e2dba6e94 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -159,7 +159,7 @@ drop:
 
 static int ip6_input_finish(struct sk_buff *skb)
 {
-	struct inet6_protocol *ipprot;
+	const struct inet6_protocol *ipprot;
 	unsigned int nhoff;
 	int nexthdr, raw;
 	u8 hash;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 5c8d73730c75..3907510c2ce3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -83,10 +83,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
-#ifdef CONFIG_IPV6_PIMSM_V2
-static struct inet6_protocol pim6_protocol;
-#endif
-
 static struct timer_list ipmr_expire_timer;
 
 
@@ -410,7 +406,7 @@ static int pim6_rcv(struct sk_buff *skb)
 	return 0;
 }
 
-static struct inet6_protocol pim6_protocol = {
+static const struct inet6_protocol pim6_protocol = {
 	.handler	=	pim6_rcv,
 };
 
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 79c172f1ff01..2f2a5ca2c878 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -178,7 +178,7 @@ static const struct xfrm_type ipcomp6_type =
 	.hdr_offset	= xfrm6_find_1stfragopt,
 };
 
-static struct inet6_protocol ipcomp6_protocol =
+static const struct inet6_protocol ipcomp6_protocol =
 {
 	.handler	= xfrm6_rcv,
 	.err_handler	= ipcomp6_err,
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 568864f722ca..1fa3468f0f32 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,11 +25,11 @@
 #include <linux/spinlock.h>
 #include <net/protocol.h>
 
-struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
 static DEFINE_SPINLOCK(inet6_proto_lock);
 
 
-int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
+int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(inet6_add_protocol);
  *	Remove a protocol from the hash tables.
  */
 
-int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
+int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)
 {
 	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 2642a41a8535..da5bd0ed83df 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -627,7 +627,7 @@ fail_hdr:
 	return -1;
 }
 
-static struct inet6_protocol frag_protocol =
+static const struct inet6_protocol frag_protocol =
 {
 	.handler	=	ipv6_frag_rcv,
 	.flags		=	INET6_PROTO_NOPOLICY,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3aae0f217d61..7718a9261efb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2093,7 +2093,7 @@ struct proto tcpv6_prot = {
 #endif
 };
 
-static struct inet6_protocol tcpv6_protocol = {
+static const struct inet6_protocol tcpv6_protocol = {
 	.handler	=	tcp_v6_rcv,
 	.err_handler	=	tcp_v6_err,
 	.gso_send_check	=	tcp_v6_gso_send_check,
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 633ad789effc..51e2832d13a6 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -133,13 +133,13 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			break;
 }
 
-static struct inet6_protocol tunnel6_protocol = {
+static const struct inet6_protocol tunnel6_protocol = {
 	.handler	= tunnel6_rcv,
 	.err_handler	= tunnel6_err,
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
-static struct inet6_protocol tunnel46_protocol = {
+static const struct inet6_protocol tunnel46_protocol = {
 	.handler	= tunnel46_rcv,
 	.err_handler	= tunnel6_err,
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 164040613c2e..b265b7047d3e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1172,7 +1172,7 @@ out:
 	return segs;
 }
 
-static struct inet6_protocol udpv6_protocol = {
+static const struct inet6_protocol udpv6_protocol = {
 	.handler	=	udpv6_rcv,
 	.err_handler	=	udpv6_err,
 	.gso_send_check =	udp6_ufo_send_check,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 4818c48688f2..d737a27ee010 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -25,7 +25,7 @@ static void udplitev6_err(struct sk_buff *skb,
 	__udp6_lib_err(skb, opt, type, code, offset, info, &udplite_table);
 }
 
-static struct inet6_protocol udplitev6_protocol = {
+static const struct inet6_protocol udplitev6_protocol = {
 	.handler	=	udplitev6_rcv,
 	.err_handler	=	udplitev6_err,
 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 6a4b19094143..bb280e60e00a 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -949,7 +949,7 @@ static int sctp6_rcv(struct sk_buff *skb)
 	return sctp_rcv(skb) ? -1 : 0;
 }
 
-static struct inet6_protocol sctpv6_protocol = {
+static const struct inet6_protocol sctpv6_protocol = {
 	.handler      = sctp6_rcv,
 	.err_handler  = sctp_v6_err,
 	.flags        = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
-- 
cgit v1.2.3


From 6a29172ba90e49c046245610caff9848307bfd6a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:15 -0600
Subject: PCI hotplug: clean up acpi_get_hp_params_from_firmware() interface

This patch makes acpi_get_hp_params_from_firmware() take a
pci_dev rather than a pci_bus and makes it return a standard
int errno rather than acpi_status.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   | 14 +++++++-------
 drivers/pci/hotplug/acpiphp_glue.c |  7 +++----
 drivers/pci/hotplug/pciehp.h       |  4 +---
 drivers/pci/hotplug/shpchp.h       |  4 +---
 include/linux/pci_hotplug.h        |  2 +-
 5 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index ec3c039b7ebd..14d2d8a01f68 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -324,18 +324,18 @@ static acpi_status acpi_run_oshp(acpi_handle handle)
 
 /* acpi_get_hp_params_from_firmware
  *
- * @bus - the pci_bus of the bus on which the device is newly added
+ * @dev - the pci_dev for which we want parameters
  * @hpp - allocated by the caller
  */
-acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
+int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 		struct hotplug_params *hpp)
 {
-	acpi_status status = AE_NOT_FOUND;
+	acpi_status status;
 	acpi_handle handle, phandle;
 	struct pci_bus *pbus;
 
 	handle = NULL;
-	for (pbus = bus; pbus; pbus = pbus->parent) {
+	for (pbus = dev->bus; pbus; pbus = pbus->parent) {
 		handle = acpi_pci_get_bridge_handle(pbus);
 		if (handle)
 			break;
@@ -350,10 +350,10 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 	while (handle) {
 		status = acpi_run_hpx(handle, hpp);
 		if (ACPI_SUCCESS(status))
-			break;
+			return 0;
 		status = acpi_run_hpp(handle, hpp);
 		if (ACPI_SUCCESS(status))
-			break;
+			return 0;
 		if (acpi_is_root_bridge(handle))
 			break;
 		status = acpi_get_parent(handle, &phandle);
@@ -361,7 +361,7 @@ acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
 			break;
 		handle = phandle;
 	}
-	return status;
+	return -ENODEV;
 }
 EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
 
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index db81c08de8d5..14e6f1a17f28 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -275,11 +275,10 @@ static int detect_ejectable_slots(acpi_handle handle)
 /* decode ACPI 2.0 _HPP hot plug parameters */
 static void decode_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
 {
-	acpi_status status;
+	int ret;
 
-	status = acpi_get_hp_params_from_firmware(dev->bus, hpp);
-	if (ACPI_FAILURE(status) ||
-	    !hpp->t0 || (hpp->t0->revision > 1)) {
+	ret = acpi_get_hp_params_from_firmware(dev, hpp);
+	if (ret || !hpp->t0 || (hpp->t0->revision > 1)) {
 		/* use default numbers */
 		printk(KERN_WARNING
 		       "%s: Could not get hotplug parameters. Use defaults\n",
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index e6cf096498be..86cdfd71ed27 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -241,9 +241,7 @@ static inline int pciehp_get_hp_hw_control_from_firmware(struct pci_dev *dev)
 static inline int pciehp_get_hp_params_from_firmware(struct pci_dev *dev,
 			struct hotplug_params *hpp)
 {
-	if (ACPI_FAILURE(acpi_get_hp_params_from_firmware(dev->bus, hpp)))
-		return -ENODEV;
-	return 0;
+	return acpi_get_hp_params_from_firmware(dev, hpp);
 }
 #else
 #define pciehp_firmware_init()				do {} while (0)
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 974e924ca96d..ad6a255cf0a5 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -191,9 +191,7 @@ static inline const char *slot_name(struct slot *slot)
 static inline int get_hp_params_from_firmware(struct pci_dev *dev,
 					      struct hotplug_params *hpp)
 {
-	if (ACPI_FAILURE(acpi_get_hp_params_from_firmware(dev->bus, hpp)))
-			return -ENODEV;
-	return 0;
+	return acpi_get_hp_params_from_firmware(dev, hpp);
 }
 
 static inline int get_hp_hw_control_from_firmware(struct pci_dev *dev)
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index f0c31ae3f842..0a7c2401d945 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -227,7 +227,7 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
-extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
+int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 				struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
-- 
cgit v1.2.3


From 8838400db5193c37588813c2eb1249b821781950 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:20 -0600
Subject: PCI hotplug: add pci_configure_slot()

This patch adds a new pci_configure_slot() function that programs the
PCI bus characteristics for a newly-added device.  This is based on
code in pciehp_pci.c, but should be generic enough to be used by pciehp,
shpchp, and acpiphp.

The hotplug_params struct and the program_hpp_typeX() functions are based
on the ACPI definitions, but they aren't really ACPI-specific, and there's
no alternate implementation, so I don't see the need to abstract them yet.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/Makefile     |   2 +-
 drivers/pci/hotplug/pcihp_slot.c | 187 +++++++++++++++++++++++++++++++++++++++
 include/linux/pci_hotplug.h      |   9 ++
 3 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 drivers/pci/hotplug/pcihp_slot.c

(limited to 'include')

diff --git a/drivers/pci/hotplug/Makefile b/drivers/pci/hotplug/Makefile
index 2aa117c8cd87..3625b094bf7e 100644
--- a/drivers/pci/hotplug/Makefile
+++ b/drivers/pci/hotplug/Makefile
@@ -22,7 +22,7 @@ obj-$(CONFIG_HOTPLUG_PCI_SGI)		+= sgi_hotplug.o
 # Link this last so it doesn't claim devices that have a real hotplug driver
 obj-$(CONFIG_HOTPLUG_PCI_FAKE)		+= fakephp.o
 
-pci_hotplug-objs	:=	pci_hotplug_core.o
+pci_hotplug-objs	:=	pci_hotplug_core.o pcihp_slot.o
 
 ifdef CONFIG_HOTPLUG_PCI_CPCI
 pci_hotplug-objs	+=	cpci_hotplug_core.o	\
diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c
new file mode 100644
index 000000000000..cc8ec3aa41a7
--- /dev/null
+++ b/drivers/pci/hotplug/pcihp_slot.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ * Copyright (C) 2003-2004 Intel Corporation
+ * (c) Copyright 2009 Hewlett-Packard Development Company, L.P.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+
+static struct hpp_type0 pci_default_type0 = {
+	.revision = 1,
+	.cache_line_size = 8,
+	.latency_timer = 0x40,
+	.enable_serr = 0,
+	.enable_perr = 0,
+};
+
+static void program_hpp_type0(struct pci_dev *dev, struct hpp_type0 *hpp)
+{
+	u16 pci_cmd, pci_bctl;
+
+	if (!hpp) {
+		/*
+		 * Perhaps we *should* use default settings for PCIe, but
+		 * pciehp didn't, so we won't either.
+		 */
+		if (dev->is_pcie)
+			return;
+		dev_info(&dev->dev, "using default PCI settings\n");
+		hpp = &pci_default_type0;
+	}
+
+	if (hpp->revision > 1) {
+		dev_warn(&dev->dev,
+			 "PCI settings rev %d not supported; using defaults\n",
+			 hpp->revision);
+		hpp = &pci_default_type0;
+	}
+
+	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, hpp->cache_line_size);
+	pci_write_config_byte(dev, PCI_LATENCY_TIMER, hpp->latency_timer);
+	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
+	if (hpp->enable_serr)
+		pci_cmd |= PCI_COMMAND_SERR;
+	else
+		pci_cmd &= ~PCI_COMMAND_SERR;
+	if (hpp->enable_perr)
+		pci_cmd |= PCI_COMMAND_PARITY;
+	else
+		pci_cmd &= ~PCI_COMMAND_PARITY;
+	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
+
+	/* Program bridge control value */
+	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
+				      hpp->latency_timer);
+		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
+		if (hpp->enable_serr)
+			pci_bctl |= PCI_BRIDGE_CTL_SERR;
+		else
+			pci_bctl &= ~PCI_BRIDGE_CTL_SERR;
+		if (hpp->enable_perr)
+			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
+		else
+			pci_bctl &= ~PCI_BRIDGE_CTL_PARITY;
+		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
+	}
+}
+
+static void program_hpp_type1(struct pci_dev *dev, struct hpp_type1 *hpp)
+{
+	if (hpp)
+		dev_warn(&dev->dev, "PCI-X settings not supported\n");
+}
+
+static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
+{
+	int pos;
+	u16 reg16;
+	u32 reg32;
+
+	if (!hpp)
+		return;
+
+	/* Find PCI Express capability */
+	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+	if (!pos)
+		return;
+
+	if (hpp->revision > 1) {
+		dev_warn(&dev->dev, "PCIe settings rev %d not supported\n",
+			 hpp->revision);
+		return;
+	}
+
+	/* Initialize Device Control Register */
+	pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &reg16);
+	reg16 = (reg16 & hpp->pci_exp_devctl_and) | hpp->pci_exp_devctl_or;
+	pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, reg16);
+
+	/* Initialize Link Control Register */
+	if (dev->subordinate) {
+		pci_read_config_word(dev, pos + PCI_EXP_LNKCTL, &reg16);
+		reg16 = (reg16 & hpp->pci_exp_lnkctl_and)
+			| hpp->pci_exp_lnkctl_or;
+		pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, reg16);
+	}
+
+	/* Find Advanced Error Reporting Enhanced Capability */
+	pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
+	if (!pos)
+		return;
+
+	/* Initialize Uncorrectable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &reg32);
+	reg32 = (reg32 & hpp->unc_err_mask_and) | hpp->unc_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, reg32);
+
+	/* Initialize Uncorrectable Error Severity Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &reg32);
+	reg32 = (reg32 & hpp->unc_err_sever_and) | hpp->unc_err_sever_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, reg32);
+
+	/* Initialize Correctable Error Mask Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_COR_MASK, &reg32);
+	reg32 = (reg32 & hpp->cor_err_mask_and) | hpp->cor_err_mask_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_COR_MASK, reg32);
+
+	/* Initialize Advanced Error Capabilities and Control Register */
+	pci_read_config_dword(dev, pos + PCI_ERR_CAP, &reg32);
+	reg32 = (reg32 & hpp->adv_err_cap_and) | hpp->adv_err_cap_or;
+	pci_write_config_dword(dev, pos + PCI_ERR_CAP, reg32);
+
+	/*
+	 * FIXME: The following two registers are not supported yet.
+	 *
+	 *   o Secondary Uncorrectable Error Severity Register
+	 *   o Secondary Uncorrectable Error Mask Register
+	 */
+}
+
+void pci_configure_slot(struct pci_dev *dev)
+{
+	struct pci_dev *cdev;
+	struct hotplug_params hpp;
+	int ret;
+
+	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
+			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
+			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
+		return;
+
+	memset(&hpp, 0, sizeof(hpp));
+	ret = pci_get_hp_params(dev, &hpp);
+	if (ret)
+		dev_warn(&dev->dev, "no hotplug settings from platform\n");
+
+	program_hpp_type2(dev, hpp.t2);
+	program_hpp_type1(dev, hpp.t1);
+	program_hpp_type0(dev, hpp.t0);
+
+	if (dev->subordinate) {
+		list_for_each_entry(cdev, &dev->subordinate->devices,
+				    bus_list)
+			pci_configure_slot(cdev);
+	}
+}
+EXPORT_SYMBOL_GPL(pci_configure_slot);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 0a7c2401d945..1cdef8317377 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -232,6 +232,15 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
+#define pci_get_hp_params(dev, hpp)  acpi_get_hp_params_from_firmware(dev, hpp)
+#else
+static inline int pci_get_hp_params(struct pci_dev *dev,
+				    struct hotplug_params *hpp)
+{
+	return -ENODEV;
+}
 #endif
+
+void pci_configure_slot(struct pci_dev *dev);
 #endif
 
-- 
cgit v1.2.3


From e81995bb1c0077a312cb621abc406a36f65a986a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 14 Sep 2009 16:35:35 -0600
Subject: PCI hotplug: acpiphp: use generic pci_configure_slot()

Use the generic pci_configure_slot() rather than the acpiphp-specific
decode_hpp() and program_hpp().

Unlike the previous acpiphp-specific code, pci_configure_slot() programs
PCIe settings when an _HPX method provides them, so acpiphp-managed PCIe
devices can now be configured.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Acked-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/pci/hotplug/acpi_pcihp.c   |  9 ++---
 drivers/pci/hotplug/acpiphp_glue.c | 81 +-------------------------------------
 include/linux/pci_hotplug.h        |  4 +-
 3 files changed, 7 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 14d2d8a01f68..ee24de1c5fae 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -322,13 +322,12 @@ static acpi_status acpi_run_oshp(acpi_handle handle)
 	return status;
 }
 
-/* acpi_get_hp_params_from_firmware
+/* pci_get_hp_params
  *
  * @dev - the pci_dev for which we want parameters
  * @hpp - allocated by the caller
  */
-int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
-		struct hotplug_params *hpp)
+int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp)
 {
 	acpi_status status;
 	acpi_handle handle, phandle;
@@ -345,7 +344,7 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 	 * _HPP settings apply to all child buses, until another _HPP is
 	 * encountered. If we don't find an _HPP for the input pci dev,
 	 * look for it in the parent device scope since that would apply to
-	 * this pci dev. If we don't find any _HPP, use hardcoded defaults
+	 * this pci dev.
 	 */
 	while (handle) {
 		status = acpi_run_hpx(handle, hpp);
@@ -363,7 +362,7 @@ int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
 	}
 	return -ENODEV;
 }
-EXPORT_SYMBOL_GPL(acpi_get_hp_params_from_firmware);
+EXPORT_SYMBOL_GPL(pci_get_hp_params);
 
 /**
  * acpi_get_hp_hw_control_from_firmware
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 14e6f1a17f28..58d25a163a8b 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -271,29 +271,6 @@ static int detect_ejectable_slots(acpi_handle handle)
 	return found;
 }
 
-
-/* decode ACPI 2.0 _HPP hot plug parameters */
-static void decode_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
-{
-	int ret;
-
-	ret = acpi_get_hp_params_from_firmware(dev, hpp);
-	if (ret || !hpp->t0 || (hpp->t0->revision > 1)) {
-		/* use default numbers */
-		printk(KERN_WARNING
-		       "%s: Could not get hotplug parameters. Use defaults\n",
-		       __func__);
-		hpp->t0 = &hpp->type0_data;
-		hpp->t0->revision = 0;
-		hpp->t0->cache_line_size = 0x10;
-		hpp->t0->latency_timer = 0x40;
-		hpp->t0->enable_serr = 0;
-		hpp->t0->enable_perr = 0;
-	}
-}
-
-
-
 /* initialize miscellaneous stuff for both root and PCI-to-PCI bridge */
 static void init_bridge_misc(struct acpiphp_bridge *bridge)
 {
@@ -1247,66 +1224,12 @@ static int acpiphp_check_bridge(struct acpiphp_bridge *bridge)
 	return retval;
 }
 
-static void program_hpp(struct pci_dev *dev, struct hotplug_params *hpp)
-{
-	u16 pci_cmd, pci_bctl;
-	struct pci_dev *cdev;
-
-	/* Program hpp values for this device */
-	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
-			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
-			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
-		return;
-
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
-		return;
-
-	pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE,
-			hpp->t0->cache_line_size);
-	pci_write_config_byte(dev, PCI_LATENCY_TIMER,
-			hpp->t0->latency_timer);
-	pci_read_config_word(dev, PCI_COMMAND, &pci_cmd);
-	if (hpp->t0->enable_serr)
-		pci_cmd |= PCI_COMMAND_SERR;
-	else
-		pci_cmd &= ~PCI_COMMAND_SERR;
-	if (hpp->t0->enable_perr)
-		pci_cmd |= PCI_COMMAND_PARITY;
-	else
-		pci_cmd &= ~PCI_COMMAND_PARITY;
-	pci_write_config_word(dev, PCI_COMMAND, pci_cmd);
-
-	/* Program bridge control value and child devices */
-	if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
-		pci_write_config_byte(dev, PCI_SEC_LATENCY_TIMER,
-				hpp->t0->latency_timer);
-		pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &pci_bctl);
-		if (hpp->t0->enable_serr)
-			pci_bctl |= PCI_BRIDGE_CTL_SERR;
-		else
-			pci_bctl &= ~PCI_BRIDGE_CTL_SERR;
-		if (hpp->t0->enable_perr)
-			pci_bctl |= PCI_BRIDGE_CTL_PARITY;
-		else
-			pci_bctl &= ~PCI_BRIDGE_CTL_PARITY;
-		pci_write_config_word(dev, PCI_BRIDGE_CONTROL, pci_bctl);
-		if (dev->subordinate) {
-			list_for_each_entry(cdev, &dev->subordinate->devices,
-					bus_list)
-				program_hpp(cdev, hpp);
-		}
-	}
-}
-
 static void acpiphp_set_hpp_values(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
-	struct hotplug_params hpp;
 
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		decode_hpp(dev, &hpp);
-		program_hpp(dev, &hpp);
-	}
+	list_for_each_entry(dev, &bus->devices, bus_list)
+		pci_configure_slot(dev);
 }
 
 /*
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 1cdef8317377..652ba797696d 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -227,12 +227,10 @@ struct hotplug_params {
 #ifdef CONFIG_ACPI
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
-int acpi_get_hp_params_from_firmware(struct pci_dev *dev,
-				struct hotplug_params *hpp);
+int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp);
 int acpi_get_hp_hw_control_from_firmware(struct pci_dev *dev, u32 flags);
 int acpi_pci_check_ejectable(struct pci_bus *pbus, acpi_handle handle);
 int acpi_pci_detect_ejectable(acpi_handle handle);
-#define pci_get_hp_params(dev, hpp)  acpi_get_hp_params_from_firmware(dev, hpp)
 #else
 static inline int pci_get_hp_params(struct pci_dev *dev,
 				    struct hotplug_params *hpp)
-- 
cgit v1.2.3


From 353f6dd2dec992ddd34620a94b051b0f76227379 Mon Sep 17 00:00:00 2001
From: Anirban Sinha <asinha@zeugmasystems.com>
Date: Mon, 14 Sep 2009 11:13:37 -0700
Subject: cleanup console_print()

console_print() is an old legacy interface mostly unused in the entire
kernel tree. It's best to clean up its existing use and let developers
use their own implementation of it as they feel fit.

Signed-off-by: Anirban Sinha <asinha@zeugmasystems.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/kernel/head.S    |  1 +
 arch/ia64/kernel/head.h    |  1 +
 arch/ia64/kernel/process.c |  7 +++++++
 drivers/char/serial167.c   |  5 ++---
 include/linux/dtlk.h       | 19 -------------------
 include/linux/tty.h        |  4 ----
 kernel/printk.c            |  6 ------
 7 files changed, 11 insertions(+), 32 deletions(-)
 create mode 100644 arch/ia64/kernel/head.h

(limited to 'include')

diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 23f846de62d5..e6c5c3d5e1f8 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -34,6 +34,7 @@
 #include <asm/mca_asm.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
+#include "head.h"
 
 #ifdef CONFIG_HOTPLUG_CPU
 #define SAL_PSR_BITS_TO_SET				\
diff --git a/arch/ia64/kernel/head.h b/arch/ia64/kernel/head.h
new file mode 100644
index 000000000000..2e2ac6824e65
--- /dev/null
+++ b/arch/ia64/kernel/head.h
@@ -0,0 +1 @@
+extern void console_print(const char *s);
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 89969e950045..9bcec9945c12 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -161,6 +161,13 @@ show_regs (struct pt_regs *regs)
 		show_stack(NULL, NULL);
 }
 
+/* local support for deprecated console_print */
+void
+console_print(const char *s)
+{
+	printk(KERN_EMERG "%s", s);
+}
+
 void
 do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 {
diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
index 51e7a46787be..5942a9d674c0 100644
--- a/drivers/char/serial167.c
+++ b/drivers/char/serial167.c
@@ -171,7 +171,6 @@ static int startup(struct cyclades_port *);
 static void cy_throttle(struct tty_struct *);
 static void cy_unthrottle(struct tty_struct *);
 static void config_setup(struct cyclades_port *);
-extern void console_print(const char *);
 #ifdef CYCLOM_SHOW_STATUS
 static void show_status(int);
 #endif
@@ -245,7 +244,7 @@ void SP(char *data)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	console_print(data);
+	printk(KERN_EMERG "%s", data);
 	local_irq_restore(flags);
 }
 
@@ -255,7 +254,7 @@ void CP(char data)
 	unsigned long flags;
 	local_irq_save(flags);
 	scrn[0] = data;
-	console_print(scrn);
+	printk(KERN_EMERG "%c", scrn);
 	local_irq_restore(flags);
 }				/* CP */
 
diff --git a/include/linux/dtlk.h b/include/linux/dtlk.h
index 2896d90118a9..22a7b9a5f5d1 100644
--- a/include/linux/dtlk.h
+++ b/include/linux/dtlk.h
@@ -1,22 +1,3 @@
-#if 0
-
-#define TRACE_TXT(text) \
-	{ \
-	  if(dtlk_trace) \
-	  { \
-	    console_print(text); \
-	    console_print("\n"); \
-	  } \
-	}
-
-#define TRACE_CHR(chr) \
-	{ \
-	  if(dtlk_trace) \
-	    console_print(chr); \
-	} \
-
-#endif
-
 #define DTLK_MINOR	0
 #define DTLK_IO_EXTENT	0x02
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0d3974f59c53..a916a318004e 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -519,10 +519,6 @@ extern void serial_console_init(void);
 
 extern int pcxe_open(struct tty_struct *tty, struct file *filp);
 
-/* printk.c */
-
-extern void console_print(const char *);
-
 /* vt.c */
 
 extern int vt_ioctl(struct tty_struct *tty, struct file *file,
diff --git a/kernel/printk.c b/kernel/printk.c
index e10d193a833a..602033acd6c7 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1075,12 +1075,6 @@ void __sched console_conditional_schedule(void)
 }
 EXPORT_SYMBOL(console_conditional_schedule);
 
-void console_print(const char *s)
-{
-	printk(KERN_EMERG "%s", s);
-}
-EXPORT_SYMBOL(console_print);
-
 void console_unblank(void)
 {
 	struct console *c;
-- 
cgit v1.2.3


From 8a478905adbb2e09a59644e76f7fe7e0ab644204 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Mon, 14 Sep 2009 20:59:48 -0400
Subject: SELinux: inline selinux_is_enabled in !CONFIG_SECURITY_SELINUX

Without this patch building a kernel emits millions of warning like:

include/linux/selinux.h:92: warning: ?selinux_is_enabled? defined but not used

When it is build without CONFIG_SECURITY_SELINUX.  This is harmless, but
the function should be inlined, so it gets compiled out.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/selinux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/selinux.h b/include/linux/selinux.h
index 223d06a6feb1..82e0f26a1299 100644
--- a/include/linux/selinux.h
+++ b/include/linux/selinux.h
@@ -89,7 +89,7 @@ static inline void selinux_secmark_refcount_dec(void)
 	return;
 }
 
-static bool selinux_is_enabled(void)
+static inline bool selinux_is_enabled(void)
 {
 	return false;
 }
-- 
cgit v1.2.3


From 3661d28615ea580c1db02a972fd4d3898df1cb01 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 14 Sep 2009 22:59:50 -0400
Subject: ext4: Fix include/trace/events/ext4.h to work with Systemtap

Using relative pathnames in #include statements interacts badly with
SystemTap, since the fs/ext4/*.h header files are not packaged up as
part of a distribution kernel's header files.  Since systemtap doesn't
use TP_fast_assign(), we can use a blind structure definition and then
make sure the needed header files are defined before the ext4 source
files #include the trace/events/ext4.h header file.

https://bugzilla.redhat.com/show_bug.cgi?id=512478

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/super.c             | 1 +
 include/trace/events/ext4.h | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 04c693357336..af95dd8ba54b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -45,6 +45,7 @@
 #include "ext4_jbd2.h"
 #include "xattr.h"
 #include "acl.h"
+#include "mballoc.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/ext4.h>
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 68b53c7ef8a7..6fe6ce9ee071 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -5,10 +5,12 @@
 #define _TRACE_EXT4_H
 
 #include <linux/writeback.h>
-#include "../../../fs/ext4/ext4.h"
-#include "../../../fs/ext4/mballoc.h"
 #include <linux/tracepoint.h>
 
+struct ext4_allocation_context;
+struct ext4_allocation_request;
+struct ext4_prealloc_space;
+
 TRACE_EVENT(ext4_free_inode,
 	TP_PROTO(struct inode *inode),
 
-- 
cgit v1.2.3


From 12e09337fe238981cb0c87543306e23775d1a143 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Sep 2009 23:37:40 +0200
Subject: time: Prevent 32 bit overflow with set_normalized_timespec()

set_normalized_timespec() nsec argument is of type long. The recent
timekeeping changes of ktime_get_ts() feed

	ts->tv_nsec + tomono.tv_nsec + nsecs

to set_normalized_timespec(). On 32 bit machines that sum can be
larger than (1 << 31) and therefor result in a negative value which
screws up the result completely.

Make the nsec argument of set_normalized_timespec() s64 to fix the
problem at hand. This also prevents similar problems for future users
of set_normalized_timespec().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Carsten Emde <carsten.emde@osadl.org>
LKML-Reference: <new-submission>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: John Stultz <johnstul@us.ibm.com>
---
 include/linux/time.h | 2 +-
 kernel/time.c        | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 256232f7e5e6..56787c093345 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -75,7 +75,7 @@ extern unsigned long mktime(const unsigned int year, const unsigned int mon,
 			    const unsigned int day, const unsigned int hour,
 			    const unsigned int min, const unsigned int sec);
 
-extern void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec);
+extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
 extern struct timespec timespec_add_safe(const struct timespec lhs,
 					 const struct timespec rhs);
 
diff --git a/kernel/time.c b/kernel/time.c
index 29511943871a..2e2e469a7fec 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -370,13 +370,20 @@ EXPORT_SYMBOL(mktime);
  *	0 <= tv_nsec < NSEC_PER_SEC
  * For negative values only the tv_sec field is negative !
  */
-void set_normalized_timespec(struct timespec *ts, time_t sec, long nsec)
+void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
 {
 	while (nsec >= NSEC_PER_SEC) {
+		/*
+		 * The following asm() prevents the compiler from
+		 * optimising this loop into a modulo operation. See
+		 * also __iter_div_u64_rem() in include/linux/time.h
+		 */
+		asm("" : "+rm"(nsec));
 		nsec -= NSEC_PER_SEC;
 		++sec;
 	}
 	while (nsec < 0) {
+		asm("" : "+rm"(nsec));
 		nsec += NSEC_PER_SEC;
 		--sec;
 	}
-- 
cgit v1.2.3


From 0b6a05c1dbebe8c616e2e5b0f52b7a01fd792911 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Tue, 15 Sep 2009 01:30:10 -0700
Subject: tcp: fix ssthresh u16 leftover
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was once upon time so that snd_sthresh was a 16-bit quantity.
...That has not been true for long period of time. I run across
some ancient compares which still seem to trust such legacy.
Put all that magic into a single place, I hopefully found all
of them.

Compile tested, though linking of allyesconfig is ridiculous
nowadays it seems.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        | 7 +++++++
 net/ipv4/tcp.c           | 2 +-
 net/ipv4/tcp_input.c     | 2 +-
 net/ipv4/tcp_ipv4.c      | 4 ++--
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv6/tcp_ipv6.c      | 5 +++--
 6 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b71a446d58f6..56b76027b85e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -793,6 +793,13 @@ static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
 	return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
 }
 
+#define TCP_INFINITE_SSTHRESH	0x7fffffff
+
+static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp)
+{
+	return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
  * The exception is rate halving phase, when cwnd is decreasing towards
  * ssthresh.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index edeea060db44..19a0612b8a20 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2012,7 +2012,7 @@ int tcp_disconnect(struct sock *sk, int flags)
 	tp->snd_cwnd = 2;
 	icsk->icsk_probes_out = 0;
 	tp->packets_out = 0;
-	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_cnt = 0;
 	tp->bytes_acked = 0;
 	tcp_set_ca_state(sk, TCP_CA_Open);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index af6d6fa00db1..d86784be7ab3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -761,7 +761,7 @@ void tcp_update_metrics(struct sock *sk)
 			set_dst_metric_rtt(dst, RTAX_RTTVAR, var);
 		}
 
-		if (tp->snd_ssthresh >= 0xFFFF) {
+		if (tcp_in_initial_slowstart(tp)) {
 			/* Slow start still did not finish. */
 			if (dst_metric(dst, RTAX_SSTHRESH) &&
 			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0543561da999..7cda24b53f61 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1808,7 +1808,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	/* See draft-stevens-tcpca-spec-01 for discussion of the
 	 * initialization of these values.
 	 */
-	tp->snd_ssthresh = 0x7fffffff;	/* Infinity */
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
@@ -2284,7 +2284,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len)
 		jiffies_to_clock_t(icsk->icsk_ack.ato),
 		(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
 		tp->snd_cwnd,
-		tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh,
+		tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh,
 		len);
 }
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e48c37d74d77..045bcfd3f288 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -410,7 +410,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->retrans_out = 0;
 		newtp->sacked_out = 0;
 		newtp->fackets_out = 0;
-		newtp->snd_ssthresh = 0x7fffffff;
+		newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 
 		/* So many TCP implementations out there (incorrectly) count the
 		 * initial SYN frame in their delayed-ACK and congestion control
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7718a9261efb..21d100b68b19 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1846,7 +1846,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	/* See draft-stevens-tcpca-spec-01 for discussion of the
 	 * initialization of these values.
 	 */
-	tp->snd_ssthresh = 0x7fffffff;
+	tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
 	tp->snd_cwnd_clamp = ~0;
 	tp->mss_cache = 536;
 
@@ -1969,7 +1969,8 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 		   jiffies_to_clock_t(icsk->icsk_rto),
 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
 		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
-		   tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
+		   tp->snd_cwnd,
+		   tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
 		   );
 }
 
-- 
cgit v1.2.3


From 75c78500ddad74b229cd0691496b8549490496a2 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@voltaire.com>
Date: Tue, 15 Sep 2009 02:37:40 -0700
Subject: bonding: remap muticast addresses without using dev_close() and
 dev_open()

This patch fixes commit e36b9d16c6a6d0f59803b3ef04ff3c22c3844c10. The approach
there is to call dev_close()/dev_open() whenever the device type is changed in
order to remap the device IP multicast addresses to HW multicast addresses.
This approach suffers from 2 drawbacks:

*. It assumes tha the device is UP when calling dev_close(), or otherwise
   dev_close() has no affect. It is worth to mention that initscripts (Redhat)
   and sysconfig (Suse) doesn't act the same in this matter.
*. dev_close() has other side affects, like deleting entries from the routing
   table, which might be unnecessary.

The fix here is to directly remap the IP multicast addresses to HW multicast
addresses for a bonding device that changes its type, and nothing else.

Reported-by:   Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Moni Shoua <monis@voltaire.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  9 ++++++---
 include/linux/igmp.h            |  2 ++
 include/linux/netdevice.h       |  3 ++-
 include/linux/notifier.h        |  2 ++
 include/net/addrconf.h          |  2 ++
 net/core/dev.c                  |  4 ++--
 net/ipv4/devinet.c              |  6 ++++++
 net/ipv4/igmp.c                 | 22 ++++++++++++++++++++++
 net/ipv6/addrconf.c             | 19 +++++++++++++++++++
 net/ipv6/mcast.c                | 19 +++++++++++++++++++
 10 files changed, 82 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index a7e731f8a0da..6419cf9a4fa6 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1211,7 +1211,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			write_unlock_bh(&bond->curr_slave_lock);
 			read_unlock(&bond->lock);
 
-			netdev_bonding_change(bond->dev);
+			netdev_bonding_change(bond->dev, NETDEV_BONDING_FAILOVER);
 
 			read_lock(&bond->lock);
 			write_lock_bh(&bond->curr_slave_lock);
@@ -1469,14 +1469,17 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	 */
 	if (bond->slave_cnt == 0) {
 		if (bond_dev->type != slave_dev->type) {
-			dev_close(bond_dev);
 			pr_debug("%s: change device type from %d to %d\n",
 				bond_dev->name, bond_dev->type, slave_dev->type);
+
+			netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
+
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
 				ether_setup(bond_dev);
-			dev_open(bond_dev);
+
+			netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
 		}
 	} else if (bond_dev->type != slave_dev->type) {
 		pr_err(DRV_NAME ": %s ether type (%d) is different "
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 92fbd8cbd68f..fe158e0e20e6 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -233,6 +233,8 @@ extern void ip_mc_init_dev(struct in_device *);
 extern void ip_mc_destroy_dev(struct in_device *);
 extern void ip_mc_up(struct in_device *);
 extern void ip_mc_down(struct in_device *);
+extern void ip_mc_unmap(struct in_device *);
+extern void ip_mc_remap(struct in_device *);
 extern void ip_mc_dec_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_inc_group(struct in_device *in_dev, __be32 addr);
 extern void ip_mc_rejoin_group(struct ip_mc_list *im);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65ee1929b2b1..f46db6c7a734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1873,7 +1873,8 @@ extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_bonding_change(struct net_device *dev);
+extern void		netdev_bonding_change(struct net_device *dev,
+					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
 extern void		dev_load(struct net *net, const char *name);
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 81bc252dc8ac..44428d247dbe 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -199,6 +199,8 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_FEAT_CHANGE	0x000B
 #define NETDEV_BONDING_FAILOVER 0x000C
 #define NETDEV_PRE_UP		0x000D
+#define NETDEV_BONDING_OLDTYPE  0x000E
+#define NETDEV_BONDING_NEWTYPE  0x000F
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 7b55ab215a64..0f7c37825fc1 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -143,6 +143,8 @@ extern int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr
 extern int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr);
 extern void ipv6_mc_up(struct inet6_dev *idev);
 extern void ipv6_mc_down(struct inet6_dev *idev);
+extern void ipv6_mc_unmap(struct inet6_dev *idev);
+extern void ipv6_mc_remap(struct inet6_dev *idev);
 extern void ipv6_mc_init_dev(struct inet6_dev *idev);
 extern void ipv6_mc_destroy_dev(struct inet6_dev *idev);
 extern void addrconf_dad_failure(struct inet6_ifaddr *ifp);
diff --git a/net/core/dev.c b/net/core/dev.c
index 84945470ab38..560c8c9c03ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1017,9 +1017,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev)
+void netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+	call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3863c3a4223f..07336c6201f0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1087,6 +1087,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
 		break;
+	case NETDEV_BONDING_OLDTYPE:
+		ip_mc_unmap(in_dev);
+		break;
+	case NETDEV_BONDING_NEWTYPE:
+		ip_mc_remap(in_dev);
+		break;
 	case NETDEV_CHANGEMTU:
 		if (inetdev_valid_mtu(dev->mtu))
 			break;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 01b4284ed694..d41e5de79a82 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1298,6 +1298,28 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr)
 	}
 }
 
+/* Device changing type */
+
+void ip_mc_unmap(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	for (i = in_dev->mc_list; i; i = i->next)
+		igmp_group_dropped(i);
+}
+
+void ip_mc_remap(struct in_device *in_dev)
+{
+	struct ip_mc_list *i;
+
+	ASSERT_RTNL();
+
+	for (i = in_dev->mc_list; i; i = i->next)
+		igmp_group_added(i);
+}
+
 /* Device going down */
 
 void ip_mc_down(struct in_device *in_dev)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c9b369034a40..f216a41ceb22 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,6 +137,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
+static void addrconf_bonding_change(struct net_device *dev,
+				    unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -2582,6 +2584,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
+	case NETDEV_BONDING_OLDTYPE:
+	case NETDEV_BONDING_NEWTYPE:
+		addrconf_bonding_change(dev, event);
+		break;
 	}
 
 	return NOTIFY_OK;
@@ -2595,6 +2601,19 @@ static struct notifier_block ipv6_dev_notf = {
 	.priority = 0
 };
 
+static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+{
+	struct inet6_dev *idev;
+	ASSERT_RTNL();
+
+	idev = __in6_dev_get(dev);
+
+	if (event == NETDEV_BONDING_NEWTYPE)
+		ipv6_mc_remap(idev);
+	else if (event == NETDEV_BONDING_OLDTYPE)
+		ipv6_mc_unmap(idev);
+}
+
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
 	struct inet6_dev *idev;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 71c3dacec1ed..f9fcf690bd5d 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2249,6 +2249,25 @@ static void igmp6_timer_handler(unsigned long data)
 	ma_put(ma);
 }
 
+/* Device changing type */
+
+void ipv6_mc_unmap(struct inet6_dev *idev)
+{
+	struct ifmcaddr6 *i;
+
+	/* Install multicast list, except for all-nodes (already installed) */
+
+	read_lock_bh(&idev->lock);
+	for (i = idev->mc_list; i; i = i->next)
+		igmp6_group_dropped(i);
+	read_unlock_bh(&idev->lock);
+}
+
+void ipv6_mc_remap(struct inet6_dev *idev)
+{
+	ipv6_mc_up(idev);
+}
+
 /* Device going down */
 
 void ipv6_mc_down(struct inet6_dev *idev)
-- 
cgit v1.2.3


From 29a020d35f629619c67fa5e32acaaac3f8a1ba90 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 15 Sep 2009 02:39:20 -0700
Subject: [PATCH] net: kmemcheck annotation in struct socket

struct socket has a 16 bit hole that triggers kmemcheck warnings.

As suggested by Ingo, use kmemcheck annotations

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 5 +++++
 net/socket.c        | 1 +
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 4fc2ffd527f9..9040a10584f7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -57,6 +57,7 @@ typedef enum {
 #include <linux/random.h>
 #include <linux/wait.h>
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
+#include <linux/kmemcheck.h>
 
 struct poll_table_struct;
 struct pipe_inode_info;
@@ -127,7 +128,11 @@ enum sock_shutdown_cmd {
  */
 struct socket {
 	socket_state		state;
+
+	kmemcheck_bitfield_begin(type);
 	short			type;
+	kmemcheck_bitfield_end(type);
+
 	unsigned long		flags;
 	/*
 	 * Please keep fasync_list & wait fields in the same cache line
diff --git a/net/socket.c b/net/socket.c
index 6d4716559047..2a022c00d85c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -489,6 +489,7 @@ static struct socket *sock_alloc(void)
 
 	sock = SOCKET_I(inode);
 
+	kmemcheck_annotate_bitfield(sock, type);
 	inode->i_mode = S_IFSOCK | S_IRWXUGO;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
-- 
cgit v1.2.3


From 8b815477f382f96deefbe5bd4404fa7b31cf5dcf Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:30 +0000
Subject: RxRPC: Declare the security index constants symbolically

Declare the security index constants symbolically rather than just referring
to them numerically.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rxrpc.h | 7 +++++++
 net/rxrpc/ar-key.c    | 4 ++--
 net/rxrpc/rxkad.c     | 6 +++---
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/rxrpc.h b/include/linux/rxrpc.h
index f7b826b565c7..a53915cd5581 100644
--- a/include/linux/rxrpc.h
+++ b/include/linux/rxrpc.h
@@ -58,5 +58,12 @@ struct sockaddr_rxrpc {
 #define RXRPC_SECURITY_AUTH	1	/* authenticated packets */
 #define RXRPC_SECURITY_ENCRYPT	2	/* encrypted packets */
 
+/*
+ * RxRPC security indices
+ */
+#define RXRPC_SECURITY_NONE	0	/* no security protocol */
+#define RXRPC_SECURITY_RXKAD	2	/* kaserver or kerberos 4 */
+#define RXRPC_SECURITY_RXGK	4	/* gssapi-based */
+#define RXRPC_SECURITY_RXK5	5	/* kerberos 5 */
 
 #endif /* _LINUX_RXRPC_H */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index ad8c7a782da1..b3d10e7ccd7e 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -122,7 +122,7 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 		       tsec->ticket[6], tsec->ticket[7]);
 
 	ret = -EPROTONOSUPPORT;
-	if (tsec->security_index != 2)
+	if (tsec->security_index != RXRPC_SECURITY_RXKAD)
 		goto error;
 
 	key->type_data.x[0] = tsec->security_index;
@@ -308,7 +308,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	_debug("key %d", key_serial(key));
 
 	data.kver = 1;
-	data.tsec.security_index = 2;
+	data.tsec.security_index = RXRPC_SECURITY_RXKAD;
 	data.tsec.ticket_len = 0;
 	data.tsec.expiry = expiry;
 	data.tsec.kvno = 0;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index ef8f91030a15..acec76292c01 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -42,7 +42,7 @@ struct rxkad_level2_hdr {
 	__be32	checksum;	/* decrypted data checksum */
 };
 
-MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos)");
+MODULE_DESCRIPTION("RxRPC network protocol type-2 security (Kerberos 4)");
 MODULE_AUTHOR("Red Hat, Inc.");
 MODULE_LICENSE("GPL");
 
@@ -506,7 +506,7 @@ static int rxkad_verify_packet(const struct rxrpc_call *call,
 	if (!call->conn->cipher)
 		return 0;
 
-	if (sp->hdr.securityIndex != 2) {
+	if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) {
 		*_abort_code = RXKADINCONSISTENCY;
 		_leave(" = -EPROTO [not rxkad]");
 		return -EPROTO;
@@ -1122,7 +1122,7 @@ static void rxkad_clear(struct rxrpc_connection *conn)
 static struct rxrpc_security rxkad = {
 	.owner				= THIS_MODULE,
 	.name				= "rxkad",
-	.security_index			= RXKAD_VERSION,
+	.security_index			= RXRPC_SECURITY_RXKAD,
 	.init_connection_security	= rxkad_init_connection_security,
 	.prime_packet_security		= rxkad_prime_packet_security,
 	.secure_packet			= rxkad_secure_packet,
-- 
cgit v1.2.3


From 339412841d7620f93fea805fbd7469f08186f458 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:35 +0000
Subject: RxRPC: Allow key payloads to be passed in XDR form

Allow add_key() and KEYCTL_INSTANTIATE to accept key payloads in XDR form as
described by openafs-1.4.10/src/auth/afs_token.xg.  This provides a way of
passing kaserver, Kerberos 4, Kerberos 5 and GSSAPI keys from userspace, and
allows for future expansion.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h |  55 +++++++++
 net/rxrpc/ar-internal.h   |  16 ---
 net/rxrpc/ar-key.c        | 308 ++++++++++++++++++++++++++++++++++++++++------
 net/rxrpc/ar-security.c   |   8 +-
 net/rxrpc/rxkad.c         |  41 +++---
 5 files changed, 353 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 7609365577f1..c0d91218fdd3 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -21,4 +21,59 @@ extern struct key_type key_type_rxrpc;
 
 extern struct key *rxrpc_get_null_key(const char *);
 
+/*
+ * RxRPC key for Kerberos IV (type-2 security)
+ */
+struct rxkad_key {
+	u32	vice_id;
+	u32	start;			/* time at which ticket starts */
+	u32	expiry;			/* time at which ticket expires */
+	u32	kvno;			/* key version number */
+	u8	primary_flag;		/* T if key for primary cell for this user */
+	u16	ticket_len;		/* length of ticket[] */
+	u8	session_key[8];		/* DES session key */
+	u8	ticket[0];		/* the encrypted ticket */
+};
+
+/*
+ * list of tokens attached to an rxrpc key
+ */
+struct rxrpc_key_token {
+	u16	security_index;		/* RxRPC header security index */
+	struct rxrpc_key_token *next;	/* the next token in the list */
+	union {
+		struct rxkad_key *kad;
+	};
+};
+
+/*
+ * structure of raw payloads passed to add_key() or instantiate key
+ */
+struct rxrpc_key_data_v1 {
+	u32		kif_version;		/* 1 */
+	u16		security_index;
+	u16		ticket_length;
+	u32		expiry;			/* time_t */
+	u32		kvno;
+	u8		session_key[8];
+	u8		ticket[0];
+};
+
+/*
+ * AF_RXRPC key payload derived from XDR format
+ * - based on openafs-1.4.10/src/auth/afs_token.xg
+ */
+#define AFSTOKEN_LENGTH_MAX		16384	/* max payload size */
+#define AFSTOKEN_CELL_MAX		64	/* max cellname length */
+#define AFSTOKEN_MAX			8	/* max tokens per payload */
+#define AFSTOKEN_RK_TIX_MAX		12000	/* max RxKAD ticket size */
+#define AFSTOKEN_GK_KEY_MAX		64	/* max GSSAPI key size */
+#define AFSTOKEN_GK_TOKEN_MAX		16384	/* max GSSAPI token size */
+#define AFSTOKEN_K5_COMPONENTS_MAX	16	/* max K5 components */
+#define AFSTOKEN_K5_NAME_MAX		128	/* max K5 name length */
+#define AFSTOKEN_K5_REALM_MAX		64	/* max K5 realm name length */
+#define AFSTOKEN_K5_TIX_MAX		16384	/* max K5 ticket size */
+#define AFSTOKEN_K5_ADDRESSES_MAX	16	/* max K5 addresses */
+#define AFSTOKEN_K5_AUTHDATA_MAX	16	/* max K5 pieces of auth data */
+
 #endif /* _KEYS_RXRPC_TYPE_H */
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3e7318c1343c..46c6d8888493 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -401,22 +401,6 @@ struct rxrpc_call {
 	__be32			call_id;	/* call ID on connection  */
 };
 
-/*
- * RxRPC key for Kerberos (type-2 security)
- */
-struct rxkad_key {
-	u16	security_index;		/* RxRPC header security index */
-	u16	ticket_len;		/* length of ticket[] */
-	u32	expiry;			/* time at which expires */
-	u32	kvno;			/* key version number */
-	u8	session_key[8];		/* DES session key */
-	u8	ticket[0];		/* the encrypted ticket */
-};
-
-struct rxrpc_key_payload {
-	struct rxkad_key k;
-};
-
 /*
  * locally abort an RxRPC call
  */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index b3d10e7ccd7e..a3a7acb5071a 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -17,6 +17,7 @@
 #include <linux/skbuff.h>
 #include <linux/key-type.h>
 #include <linux/crypto.h>
+#include <linux/ctype.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include <keys/rxrpc-type.h>
@@ -54,6 +55,202 @@ struct key_type key_type_rxrpc_s = {
 	.describe	= rxrpc_describe,
 };
 
+/*
+ * parse an RxKAD type XDR format token
+ * - the caller guarantees we have at least 4 words
+ */
+static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
+				       unsigned toklen)
+{
+	struct rxrpc_key_token *token;
+	size_t plen;
+	u32 tktlen;
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       toklen);
+
+	if (toklen <= 8 * 4)
+		return -EKEYREJECTED;
+	tktlen = ntohl(xdr[7]);
+	_debug("tktlen: %x", tktlen);
+	if (tktlen > AFSTOKEN_RK_TIX_MAX)
+		return -EKEYREJECTED;
+	if (8 * 4 + tktlen != toklen)
+		return -EKEYREJECTED;
+
+	plen = sizeof(*token) + sizeof(*token->kad) + tktlen;
+	ret = key_payload_reserve(key, key->datalen + plen);
+	if (ret < 0)
+		return ret;
+
+	plen -= sizeof(*token);
+	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
+		return -ENOMEM;
+
+	token->kad = kmalloc(plen, GFP_KERNEL);
+	if (!token->kad) {
+		kfree(token);
+		return -ENOMEM;
+	}
+
+	token->security_index	= RXRPC_SECURITY_RXKAD;
+	token->kad->ticket_len	= tktlen;
+	token->kad->vice_id	= ntohl(xdr[0]);
+	token->kad->kvno	= ntohl(xdr[1]);
+	token->kad->start	= ntohl(xdr[4]);
+	token->kad->expiry	= ntohl(xdr[5]);
+	token->kad->primary_flag = ntohl(xdr[6]);
+	memcpy(&token->kad->session_key, &xdr[2], 8);
+	memcpy(&token->kad->ticket, &xdr[8], tktlen);
+
+	_debug("SCIX: %u", token->security_index);
+	_debug("TLEN: %u", token->kad->ticket_len);
+	_debug("EXPY: %x", token->kad->expiry);
+	_debug("KVNO: %u", token->kad->kvno);
+	_debug("PRIM: %u", token->kad->primary_flag);
+	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
+	       token->kad->session_key[0], token->kad->session_key[1],
+	       token->kad->session_key[2], token->kad->session_key[3],
+	       token->kad->session_key[4], token->kad->session_key[5],
+	       token->kad->session_key[6], token->kad->session_key[7]);
+	if (token->kad->ticket_len >= 8)
+		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
+		       token->kad->ticket[0], token->kad->ticket[1],
+		       token->kad->ticket[2], token->kad->ticket[3],
+		       token->kad->ticket[4], token->kad->ticket[5],
+		       token->kad->ticket[6], token->kad->ticket[7]);
+
+	/* count the number of tokens attached */
+	key->type_data.x[0]++;
+
+	/* attach the data */
+	token->next = key->payload.data;
+	key->payload.data = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+
+	_leave(" = 0");
+	return 0;
+}
+
+/*
+ * attempt to parse the data as the XDR format
+ * - the caller guarantees we have more than 7 words
+ */
+static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datalen)
+{
+	const __be32 *xdr = data, *token;
+	const char *cp;
+	unsigned len, tmp, loop, ntoken, toklen, sec_ix;
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%zu",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       datalen);
+
+	if (datalen > AFSTOKEN_LENGTH_MAX)
+		goto not_xdr;
+
+	/* XDR is an array of __be32's */
+	if (datalen & 3)
+		goto not_xdr;
+
+	/* the flags should be 0 (the setpag bit must be handled by
+	 * userspace) */
+	if (ntohl(*xdr++) != 0)
+		goto not_xdr;
+	datalen -= 4;
+
+	/* check the cell name */
+	len = ntohl(*xdr++);
+	if (len < 1 || len > AFSTOKEN_CELL_MAX)
+		goto not_xdr;
+	datalen -= 4;
+	tmp = (len + 3) & ~3;
+	if (tmp > datalen)
+		goto not_xdr;
+
+	cp = (const char *) xdr;
+	for (loop = 0; loop < len; loop++)
+		if (!isprint(cp[loop]))
+			goto not_xdr;
+	if (len < tmp)
+		for (; loop < tmp; loop++)
+			if (cp[loop])
+				goto not_xdr;
+	_debug("cellname: [%u/%u] '%*.*s'",
+	       len, tmp, len, len, (const char *) xdr);
+	datalen -= tmp;
+	xdr += tmp >> 2;
+
+	/* get the token count */
+	if (datalen < 12)
+		goto not_xdr;
+	ntoken = ntohl(*xdr++);
+	datalen -= 4;
+	_debug("ntoken: %x", ntoken);
+	if (ntoken < 1 || ntoken > AFSTOKEN_MAX)
+		goto not_xdr;
+
+	/* check each token wrapper */
+	token = xdr;
+	loop = ntoken;
+	do {
+		if (datalen < 8)
+			goto not_xdr;
+		toklen = ntohl(*xdr++);
+		sec_ix = ntohl(*xdr);
+		datalen -= 4;
+		_debug("token: [%x/%zx] %x", toklen, datalen, sec_ix);
+		if (toklen < 20 || toklen > datalen)
+			goto not_xdr;
+		datalen -= (toklen + 3) & ~3;
+		xdr += (toklen + 3) >> 2;
+
+	} while (--loop > 0);
+
+	_debug("remainder: %zu", datalen);
+	if (datalen != 0)
+		goto not_xdr;
+
+	/* okay: we're going to assume it's valid XDR format
+	 * - we ignore the cellname, relying on the key to be correctly named
+	 */
+	do {
+		xdr = token;
+		toklen = ntohl(*xdr++);
+		token = xdr + ((toklen + 3) >> 2);
+		sec_ix = ntohl(*xdr++);
+		toklen -= 4;
+
+		switch (sec_ix) {
+		case RXRPC_SECURITY_RXKAD:
+			ret = rxrpc_instantiate_xdr_rxkad(key, xdr, toklen);
+			if (ret != 0)
+				goto error;
+			break;
+
+		default:
+			ret = -EPROTONOSUPPORT;
+			goto error;
+		}
+
+	} while (--ntoken > 0);
+
+	_leave(" = 0");
+	return 0;
+
+not_xdr:
+	_leave(" = -EPROTO");
+	return -EPROTO;
+error:
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * instantiate an rxrpc defined key
  * data should be of the form:
@@ -70,8 +267,8 @@ struct key_type key_type_rxrpc_s = {
  */
 static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 {
-	const struct rxkad_key *tsec;
-	struct rxrpc_key_payload *upayload;
+	const struct rxrpc_key_data_v1 *v1;
+	struct rxrpc_key_token *token, **pp;
 	size_t plen;
 	u32 kver;
 	int ret;
@@ -82,6 +279,13 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 	if (!data && datalen == 0)
 		return 0;
 
+	/* determine if the XDR payload format is being used */
+	if (datalen > 7 * 4) {
+		ret = rxrpc_instantiate_xdr(key, data, datalen);
+		if (ret != -EPROTO)
+			return ret;
+	}
+
 	/* get the key interface version number */
 	ret = -EINVAL;
 	if (datalen <= 4 || !data)
@@ -98,53 +302,67 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen)
 
 	/* deal with a version 1 key */
 	ret = -EINVAL;
-	if (datalen < sizeof(*tsec))
+	if (datalen < sizeof(*v1))
 		goto error;
 
-	tsec = data;
-	if (datalen != sizeof(*tsec) + tsec->ticket_len)
+	v1 = data;
+	if (datalen != sizeof(*v1) + v1->ticket_length)
 		goto error;
 
-	_debug("SCIX: %u", tsec->security_index);
-	_debug("TLEN: %u", tsec->ticket_len);
-	_debug("EXPY: %x", tsec->expiry);
-	_debug("KVNO: %u", tsec->kvno);
+	_debug("SCIX: %u", v1->security_index);
+	_debug("TLEN: %u", v1->ticket_length);
+	_debug("EXPY: %x", v1->expiry);
+	_debug("KVNO: %u", v1->kvno);
 	_debug("SKEY: %02x%02x%02x%02x%02x%02x%02x%02x",
-	       tsec->session_key[0], tsec->session_key[1],
-	       tsec->session_key[2], tsec->session_key[3],
-	       tsec->session_key[4], tsec->session_key[5],
-	       tsec->session_key[6], tsec->session_key[7]);
-	if (tsec->ticket_len >= 8)
+	       v1->session_key[0], v1->session_key[1],
+	       v1->session_key[2], v1->session_key[3],
+	       v1->session_key[4], v1->session_key[5],
+	       v1->session_key[6], v1->session_key[7]);
+	if (v1->ticket_length >= 8)
 		_debug("TCKT: %02x%02x%02x%02x%02x%02x%02x%02x",
-		       tsec->ticket[0], tsec->ticket[1],
-		       tsec->ticket[2], tsec->ticket[3],
-		       tsec->ticket[4], tsec->ticket[5],
-		       tsec->ticket[6], tsec->ticket[7]);
+		       v1->ticket[0], v1->ticket[1],
+		       v1->ticket[2], v1->ticket[3],
+		       v1->ticket[4], v1->ticket[5],
+		       v1->ticket[6], v1->ticket[7]);
 
 	ret = -EPROTONOSUPPORT;
-	if (tsec->security_index != RXRPC_SECURITY_RXKAD)
+	if (v1->security_index != RXRPC_SECURITY_RXKAD)
 		goto error;
 
-	key->type_data.x[0] = tsec->security_index;
-
-	plen = sizeof(*upayload) + tsec->ticket_len;
-	ret = key_payload_reserve(key, plen);
+	plen = sizeof(*token->kad) + v1->ticket_length;
+	ret = key_payload_reserve(key, plen + sizeof(*token));
 	if (ret < 0)
 		goto error;
 
 	ret = -ENOMEM;
-	upayload = kmalloc(plen, GFP_KERNEL);
-	if (!upayload)
+	token = kmalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
 		goto error;
+	token->kad = kmalloc(plen, GFP_KERNEL);
+	if (!token->kad)
+		goto error_free;
+
+	token->security_index		= RXRPC_SECURITY_RXKAD;
+	token->kad->ticket_len		= v1->ticket_length;
+	token->kad->expiry		= v1->expiry;
+	token->kad->kvno		= v1->kvno;
+	memcpy(&token->kad->session_key, &v1->session_key, 8);
+	memcpy(&token->kad->ticket, v1->ticket, v1->ticket_length);
 
 	/* attach the data */
-	memcpy(&upayload->k, tsec, sizeof(*tsec));
-	memcpy(&upayload->k.ticket, (void *)tsec + sizeof(*tsec),
-	       tsec->ticket_len);
-	key->payload.data = upayload;
-	key->expiry = tsec->expiry;
+	key->type_data.x[0]++;
+
+	pp = (struct rxrpc_key_token **)&key->payload.data;
+	while (*pp)
+		pp = &(*pp)->next;
+	*pp = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+	token = NULL;
 	ret = 0;
 
+error_free:
+	kfree(token);
 error:
 	return ret;
 }
@@ -184,7 +402,22 @@ static int rxrpc_instantiate_s(struct key *key, const void *data,
  */
 static void rxrpc_destroy(struct key *key)
 {
-	kfree(key->payload.data);
+	struct rxrpc_key_token *token;
+
+	while ((token = key->payload.data)) {
+		key->payload.data = token->next;
+		switch (token->security_index) {
+		case RXRPC_SECURITY_RXKAD:
+			kfree(token->kad);
+			break;
+		default:
+			printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
+			       token->security_index);
+			BUG();
+		}
+
+		kfree(token);
+	}
 }
 
 /*
@@ -293,7 +526,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 
 	struct {
 		u32 kver;
-		struct rxkad_key tsec;
+		struct rxrpc_key_data_v1 v1;
 	} data;
 
 	_enter("");
@@ -308,13 +541,12 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn,
 	_debug("key %d", key_serial(key));
 
 	data.kver = 1;
-	data.tsec.security_index = RXRPC_SECURITY_RXKAD;
-	data.tsec.ticket_len = 0;
-	data.tsec.expiry = expiry;
-	data.tsec.kvno = 0;
+	data.v1.security_index = RXRPC_SECURITY_RXKAD;
+	data.v1.ticket_length = 0;
+	data.v1.expiry = expiry;
+	data.v1.kvno = 0;
 
-	memcpy(&data.tsec.session_key, session_key,
-	       sizeof(data.tsec.session_key));
+	memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key));
 
 	ret = key_instantiate_and_link(key, &data, sizeof(data), NULL, NULL);
 	if (ret < 0)
diff --git a/net/rxrpc/ar-security.c b/net/rxrpc/ar-security.c
index dc62920ee19a..49b3cc31ee1f 100644
--- a/net/rxrpc/ar-security.c
+++ b/net/rxrpc/ar-security.c
@@ -16,6 +16,7 @@
 #include <linux/crypto.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
 #include "ar-internal.h"
 
 static LIST_HEAD(rxrpc_security_methods);
@@ -122,6 +123,7 @@ EXPORT_SYMBOL_GPL(rxrpc_unregister_security);
  */
 int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 {
+	struct rxrpc_key_token *token;
 	struct rxrpc_security *sec;
 	struct key *key = conn->key;
 	int ret;
@@ -135,7 +137,11 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 	if (ret < 0)
 		return ret;
 
-	sec = rxrpc_security_lookup(key->type_data.x[0]);
+	if (!key->payload.data)
+		return -EKEYREJECTED;
+	token = key->payload.data;
+
+	sec = rxrpc_security_lookup(token->security_index);
 	if (!sec)
 		return -EKEYREJECTED;
 	conn->security = sec;
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index acec76292c01..713ac593e2e9 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#include <keys/rxrpc-type.h>
 #define rxrpc_debug rxkad_debug
 #include "ar-internal.h"
 
@@ -59,14 +60,14 @@ static DEFINE_MUTEX(rxkad_ci_mutex);
  */
 static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 {
-	struct rxrpc_key_payload *payload;
 	struct crypto_blkcipher *ci;
+	struct rxrpc_key_token *token;
 	int ret;
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(conn->key));
 
-	payload = conn->key->payload.data;
-	conn->security_ix = payload->k.security_index;
+	token = conn->key->payload.data;
+	conn->security_ix = token->security_index;
 
 	ci = crypto_alloc_blkcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(ci)) {
@@ -75,8 +76,8 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
 		goto error;
 	}
 
-	if (crypto_blkcipher_setkey(ci, payload->k.session_key,
-				    sizeof(payload->k.session_key)) < 0)
+	if (crypto_blkcipher_setkey(ci, token->kad->session_key,
+				    sizeof(token->kad->session_key)) < 0)
 		BUG();
 
 	switch (conn->security_level) {
@@ -110,7 +111,7 @@ error:
  */
 static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 {
-	struct rxrpc_key_payload *payload;
+	struct rxrpc_key_token *token;
 	struct blkcipher_desc desc;
 	struct scatterlist sg[2];
 	struct rxrpc_crypt iv;
@@ -123,8 +124,8 @@ static void rxkad_prime_packet_security(struct rxrpc_connection *conn)
 	if (!conn->key)
 		return;
 
-	payload = conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 
 	desc.tfm = conn->cipher;
 	desc.info = iv.x;
@@ -197,7 +198,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 					u32 data_size,
 					void *sechdr)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_level2_hdr rxkhdr
 		__attribute__((aligned(8))); /* must be all on one page */
 	struct rxrpc_skb_priv *sp;
@@ -219,8 +220,8 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
 	rxkhdr.checksum = 0;
 
 	/* encrypt from the session key */
-	payload = call->conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = call->conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
 	desc.flags = 0;
@@ -400,7 +401,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 				       struct sk_buff *skb,
 				       u32 *_abort_code)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_level2_hdr sechdr;
 	struct rxrpc_skb_priv *sp;
 	struct blkcipher_desc desc;
@@ -431,8 +432,8 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
 	skb_to_sgvec(skb, sg, 0, skb->len);
 
 	/* decrypt from the session key */
-	payload = call->conn->key->payload.data;
-	memcpy(&iv, payload->k.session_key, sizeof(iv));
+	token = call->conn->key->payload.data;
+	memcpy(&iv, token->kad->session_key, sizeof(iv));
 	desc.tfm = call->conn->cipher;
 	desc.info = iv.x;
 	desc.flags = 0;
@@ -737,7 +738,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 				      struct sk_buff *skb,
 				      u32 *_abort_code)
 {
-	const struct rxrpc_key_payload *payload;
+	const struct rxrpc_key_token *token;
 	struct rxkad_challenge challenge;
 	struct rxkad_response resp
 		__attribute__((aligned(8))); /* must be aligned for crypto */
@@ -778,7 +779,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	if (conn->security_level < min_level)
 		goto protocol_error;
 
-	payload = conn->key->payload.data;
+	token = conn->key->payload.data;
 
 	/* build the response packet */
 	memset(&resp, 0, sizeof(resp));
@@ -797,13 +798,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 		(conn->channels[3] ? conn->channels[3]->call_id : 0);
 	resp.encrypted.inc_nonce = htonl(nonce + 1);
 	resp.encrypted.level = htonl(conn->security_level);
-	resp.kvno = htonl(payload->k.kvno);
-	resp.ticket_len = htonl(payload->k.ticket_len);
+	resp.kvno = htonl(token->kad->kvno);
+	resp.ticket_len = htonl(token->kad->ticket_len);
 
 	/* calculate the response checksum and then do the encryption */
 	rxkad_calc_response_checksum(&resp);
-	rxkad_encrypt_response(conn, &resp, &payload->k);
-	return rxkad_send_response(conn, &sp->hdr, &resp, &payload->k);
+	rxkad_encrypt_response(conn, &resp, token->kad);
+	return rxkad_send_response(conn, &sp->hdr, &resp, token->kad);
 
 protocol_error:
 	*_abort_code = abort_code;
-- 
cgit v1.2.3


From 99455153d0670ba110e6a3b855b8369bcbd11120 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 14 Sep 2009 01:17:46 +0000
Subject: RxRPC: Parse security index 5 keys (Kerberos 5)

Parse RxRPC security index 5 type keys (Kerberos 5 tokens).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h |  52 +++++
 net/rxrpc/ar-key.c        | 577 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 589 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index c0d91218fdd3..5eb23571b425 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -35,6 +35,54 @@ struct rxkad_key {
 	u8	ticket[0];		/* the encrypted ticket */
 };
 
+/*
+ * Kerberos 5 principal
+ *	name/name/name@realm
+ */
+struct krb5_principal {
+	u8	n_name_parts;		/* N of parts of the name part of the principal */
+	char	**name_parts;		/* parts of the name part of the principal */
+	char	*realm;			/* parts of the realm part of the principal */
+};
+
+/*
+ * Kerberos 5 tagged data
+ */
+struct krb5_tagged_data {
+	/* for tag value, see /usr/include/krb5/krb5.h
+	 * - KRB5_AUTHDATA_* for auth data
+	 * - 
+	 */
+	int32_t		tag;
+	uint32_t	data_len;
+	u8		*data;
+};
+
+/*
+ * RxRPC key for Kerberos V (type-5 security)
+ */
+struct rxk5_key {
+	uint64_t		authtime;	/* time at which auth token generated */
+	uint64_t		starttime;	/* time at which auth token starts */
+	uint64_t		endtime;	/* time at which auth token expired */
+	uint64_t		renew_till;	/* time to which auth token can be renewed */
+	int32_t			is_skey;	/* T if ticket is encrypted in another ticket's
+						 * skey */
+	int32_t			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
+	struct krb5_principal	client;		/* client principal name */
+	struct krb5_principal	server;		/* server principal name */
+	uint16_t		ticket_len;	/* length of ticket */
+	uint16_t		ticket2_len;	/* length of second ticket */
+	u8			n_authdata;	/* number of authorisation data elements */
+	u8			n_addresses;	/* number of addresses */
+	struct krb5_tagged_data	session;	/* session data; tag is enctype */
+	struct krb5_tagged_data *addresses;	/* addresses */
+	u8			*ticket;	/* krb5 ticket */
+	u8			*ticket2;	/* second krb5 ticket, if related to ticket (via
+						 * DUPLICATE-SKEY or ENC-TKT-IN-SKEY) */
+	struct krb5_tagged_data *authdata;	/* authorisation data */
+};
+
 /*
  * list of tokens attached to an rxrpc key
  */
@@ -43,6 +91,7 @@ struct rxrpc_key_token {
 	struct rxrpc_key_token *next;	/* the next token in the list */
 	union {
 		struct rxkad_key *kad;
+		struct rxk5_key *k5;
 	};
 };
 
@@ -64,8 +113,11 @@ struct rxrpc_key_data_v1 {
  * - based on openafs-1.4.10/src/auth/afs_token.xg
  */
 #define AFSTOKEN_LENGTH_MAX		16384	/* max payload size */
+#define AFSTOKEN_STRING_MAX		256	/* max small string length */
+#define AFSTOKEN_DATA_MAX		64	/* max small data length */
 #define AFSTOKEN_CELL_MAX		64	/* max cellname length */
 #define AFSTOKEN_MAX			8	/* max tokens per payload */
+#define AFSTOKEN_BDATALN_MAX		16384	/* max big data length */
 #define AFSTOKEN_RK_TIX_MAX		12000	/* max RxKAD ticket size */
 #define AFSTOKEN_GK_KEY_MAX		64	/* max GSSAPI key size */
 #define AFSTOKEN_GK_TOKEN_MAX		16384	/* max GSSAPI token size */
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index bf4d623ee1ce..44836f6c9643 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -64,7 +64,7 @@ struct key_type key_type_rxrpc_s = {
 static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 				       unsigned toklen)
 {
-	struct rxrpc_key_token *token;
+	struct rxrpc_key_token *token, **pptoken;
 	size_t plen;
 	u32 tktlen;
 	int ret;
@@ -129,13 +129,398 @@ static int rxrpc_instantiate_xdr_rxkad(struct key *key, const __be32 *xdr,
 	key->type_data.x[0]++;
 
 	/* attach the data */
-	token->next = key->payload.data;
-	key->payload.data = token;
+	for (pptoken = (struct rxrpc_key_token **)&key->payload.data;
+	     *pptoken;
+	     pptoken = &(*pptoken)->next)
+		continue;
+	*pptoken = token;
+	if (token->kad->expiry < key->expiry)
+		key->expiry = token->kad->expiry;
+
+	_leave(" = 0");
+	return 0;
+}
+
+static void rxrpc_free_krb5_principal(struct krb5_principal *princ)
+{
+	int loop;
+
+	if (princ->name_parts) {
+		for (loop = princ->n_name_parts - 1; loop >= 0; loop--)
+			kfree(princ->name_parts[loop]);
+		kfree(princ->name_parts);
+	}
+	kfree(princ->realm);
+}
+
+static void rxrpc_free_krb5_tagged(struct krb5_tagged_data *td)
+{
+	kfree(td->data);
+}
+
+/*
+ * free up an RxK5 token
+ */
+static void rxrpc_rxk5_free(struct rxk5_key *rxk5)
+{
+	int loop;
+
+	rxrpc_free_krb5_principal(&rxk5->client);
+	rxrpc_free_krb5_principal(&rxk5->server);
+	rxrpc_free_krb5_tagged(&rxk5->session);
+
+	if (rxk5->addresses) {
+		for (loop = rxk5->n_addresses - 1; loop >= 0; loop--)
+			rxrpc_free_krb5_tagged(&rxk5->addresses[loop]);
+		kfree(rxk5->addresses);
+	}
+	if (rxk5->authdata) {
+		for (loop = rxk5->n_authdata - 1; loop >= 0; loop--)
+			rxrpc_free_krb5_tagged(&rxk5->authdata[loop]);
+		kfree(rxk5->authdata);
+	}
+
+	kfree(rxk5->ticket);
+	kfree(rxk5->ticket2);
+	kfree(rxk5);
+}
+
+/*
+ * extract a krb5 principal
+ */
+static int rxrpc_krb5_decode_principal(struct krb5_principal *princ,
+				       const __be32 **_xdr,
+				       unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, n_parts, loop, tmp;
+
+	/* there must be at least one name, and at least #names+1 length
+	 * words */
+	if (toklen <= 12)
+		return -EINVAL;
+
+	_enter(",{%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), toklen);
+
+	n_parts = ntohl(*xdr++);
+	toklen -= 4;
+	if (n_parts <= 0 || n_parts > AFSTOKEN_K5_COMPONENTS_MAX)
+		return -EINVAL;
+	princ->n_name_parts = n_parts;
+
+	if (toklen <= (n_parts + 1) * 4)
+		return -EINVAL;
+
+	princ->name_parts = kcalloc(sizeof(char *), n_parts, GFP_KERNEL);
+	if (!princ->name_parts)
+		return -ENOMEM;
+
+	for (loop = 0; loop < n_parts; loop++) {
+		if (toklen < 4)
+			return -EINVAL;
+		tmp = ntohl(*xdr++);
+		toklen -= 4;
+		if (tmp <= 0 || tmp > AFSTOKEN_STRING_MAX)
+			return -EINVAL;
+		if (tmp > toklen)
+			return -EINVAL;
+		princ->name_parts[loop] = kmalloc(tmp + 1, GFP_KERNEL);
+		if (!princ->name_parts[loop])
+			return -ENOMEM;
+		memcpy(princ->name_parts[loop], xdr, tmp);
+		princ->name_parts[loop][tmp] = 0;
+		tmp = (tmp + 3) & ~3;
+		toklen -= tmp;
+		xdr += tmp >> 2;
+	}
+
+	if (toklen < 4)
+		return -EINVAL;
+	tmp = ntohl(*xdr++);
+	toklen -= 4;
+	if (tmp <= 0 || tmp > AFSTOKEN_K5_REALM_MAX)
+		return -EINVAL;
+	if (tmp > toklen)
+		return -EINVAL;
+	princ->realm = kmalloc(tmp + 1, GFP_KERNEL);
+	if (!princ->realm)
+		return -ENOMEM;
+	memcpy(princ->realm, xdr, tmp);
+	princ->realm[tmp] = 0;
+	tmp = (tmp + 3) & ~3;
+	toklen -= tmp;
+	xdr += tmp >> 2;
+
+	_debug("%s/...@%s", princ->name_parts[0], princ->realm);
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract a piece of krb5 tagged data
+ */
+static int rxrpc_krb5_decode_tagged_data(struct krb5_tagged_data *td,
+					 size_t max_data_size,
+					 const __be32 **_xdr,
+					 unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, len;
+
+	/* there must be at least one tag and one length word */
+	if (toklen <= 8)
+		return -EINVAL;
+
+	_enter(",%zu,{%x,%x},%u",
+	       max_data_size, ntohl(xdr[0]), ntohl(xdr[1]), toklen);
+
+	td->tag = ntohl(*xdr++);
+	len = ntohl(*xdr++);
+	toklen -= 8;
+	if (len > max_data_size)
+		return -EINVAL;
+	td->data_len = len;
+
+	if (len > 0) {
+		td->data = kmalloc(len, GFP_KERNEL);
+		if (!td->data)
+			return -ENOMEM;
+		memcpy(td->data, xdr, len);
+		len = (len + 3) & ~3;
+		toklen -= len;
+		xdr += len >> 2;
+	}
+
+	_debug("tag %x len %x", td->tag, td->data_len);
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract an array of tagged data
+ */
+static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
+					  u8 *_n_elem,
+					  u8 max_n_elem,
+					  size_t max_elem_size,
+					  const __be32 **_xdr,
+					  unsigned *_toklen)
+{
+	struct krb5_tagged_data *td;
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, n_elem, loop;
+	int ret;
+
+	/* there must be at least one count */
+	if (toklen < 4)
+		return -EINVAL;
+
+	_enter(",,%u,%zu,{%x},%u",
+	       max_n_elem, max_elem_size, ntohl(xdr[0]), toklen);
+
+	n_elem = ntohl(*xdr++);
+	toklen -= 4;
+	if (n_elem < 0 || n_elem > max_n_elem)
+		return -EINVAL;
+	*_n_elem = n_elem;
+	if (n_elem > 0) {
+		if (toklen <= (n_elem + 1) * 4)
+			return -EINVAL;
+
+		_debug("n_elem %d", n_elem);
+
+		td = kcalloc(sizeof(struct krb5_tagged_data), n_elem,
+			     GFP_KERNEL);
+		if (!td)
+			return -ENOMEM;
+		*_td = td;
+
+		for (loop = 0; loop < n_elem; loop++) {
+			ret = rxrpc_krb5_decode_tagged_data(&td[loop],
+							    max_elem_size,
+							    &xdr, &toklen);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * extract a krb5 ticket
+ */
+static int rxrpc_krb5_decode_ticket(u8 **_ticket, uint16_t *_tktlen,
+				    const __be32 **_xdr, unsigned *_toklen)
+{
+	const __be32 *xdr = *_xdr;
+	unsigned toklen = *_toklen, len;
+
+	/* there must be at least one length word */
+	if (toklen <= 4)
+		return -EINVAL;
+
+	_enter(",{%x},%u", ntohl(xdr[0]), toklen);
+
+	len = ntohl(*xdr++);
+	toklen -= 4;
+	if (len > AFSTOKEN_K5_TIX_MAX)
+		return -EINVAL;
+	*_tktlen = len;
+
+	_debug("ticket len %u", len);
+
+	if (len > 0) {
+		*_ticket = kmalloc(len, GFP_KERNEL);
+		if (!*_ticket)
+			return -ENOMEM;
+		memcpy(*_ticket, xdr, len);
+		len = (len + 3) & ~3;
+		toklen -= len;
+		xdr += len >> 2;
+	}
+
+	*_xdr = xdr;
+	*_toklen = toklen;
+	_leave(" = 0 [toklen=%u]", toklen);
+	return 0;
+}
+
+/*
+ * parse an RxK5 type XDR format token
+ * - the caller guarantees we have at least 4 words
+ */
+static int rxrpc_instantiate_xdr_rxk5(struct key *key, const __be32 *xdr,
+				      unsigned toklen)
+{
+	struct rxrpc_key_token *token, **pptoken;
+	struct rxk5_key *rxk5;
+	const __be32 *end_xdr = xdr + (toklen >> 2);
+	int ret;
+
+	_enter(",{%x,%x,%x,%x},%u",
+	       ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+	       toklen);
+
+	/* reserve some payload space for this subkey - the length of the token
+	 * is a reasonable approximation */
+	ret = key_payload_reserve(key, key->datalen + toklen);
+	if (ret < 0)
+		return ret;
+
+	token = kzalloc(sizeof(*token), GFP_KERNEL);
+	if (!token)
+		return -ENOMEM;
+
+	rxk5 = kzalloc(sizeof(*rxk5), GFP_KERNEL);
+	if (!rxk5) {
+		kfree(token);
+		return -ENOMEM;
+	}
+
+	token->security_index = RXRPC_SECURITY_RXK5;
+	token->k5 = rxk5;
+
+	/* extract the principals */
+	ret = rxrpc_krb5_decode_principal(&rxk5->client, &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+	ret = rxrpc_krb5_decode_principal(&rxk5->server, &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	/* extract the session key and the encoding type (the tag field ->
+	 * ENCTYPE_xxx) */
+	ret = rxrpc_krb5_decode_tagged_data(&rxk5->session, AFSTOKEN_DATA_MAX,
+					    &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	if (toklen < 4 * 8 + 2 * 4)
+		goto inval;
+	rxk5->authtime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->starttime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->endtime	= be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->renew_till = be64_to_cpup((const __be64 *) xdr);
+	xdr += 2;
+	rxk5->is_skey = ntohl(*xdr++);
+	rxk5->flags = ntohl(*xdr++);
+	toklen -= 4 * 8 + 2 * 4;
+
+	_debug("times: a=%llx s=%llx e=%llx rt=%llx",
+	       rxk5->authtime, rxk5->starttime, rxk5->endtime,
+	       rxk5->renew_till);
+	_debug("is_skey=%x flags=%x", rxk5->is_skey, rxk5->flags);
+
+	/* extract the permitted client addresses */
+	ret = rxrpc_krb5_decode_tagged_array(&rxk5->addresses,
+					     &rxk5->n_addresses,
+					     AFSTOKEN_K5_ADDRESSES_MAX,
+					     AFSTOKEN_DATA_MAX,
+					     &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	/* extract the tickets */
+	ret = rxrpc_krb5_decode_ticket(&rxk5->ticket, &rxk5->ticket_len,
+				       &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+	ret = rxrpc_krb5_decode_ticket(&rxk5->ticket2, &rxk5->ticket2_len,
+				       &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	/* extract the typed auth data */
+	ret = rxrpc_krb5_decode_tagged_array(&rxk5->authdata,
+					     &rxk5->n_authdata,
+					     AFSTOKEN_K5_AUTHDATA_MAX,
+					     AFSTOKEN_BDATALN_MAX,
+					     &xdr, &toklen);
+	if (ret < 0)
+		goto error;
+
+	ASSERTCMP((end_xdr - xdr) << 2, ==, toklen);
+
+	if (toklen != 0)
+		goto inval;
+
+	/* attach the payload to the key */
+	for (pptoken = (struct rxrpc_key_token **)&key->payload.data;
+	     *pptoken;
+	     pptoken = &(*pptoken)->next)
+		continue;
+	*pptoken = token;
 	if (token->kad->expiry < key->expiry)
 		key->expiry = token->kad->expiry;
 
 	_leave(" = 0");
 	return 0;
+
+inval:
+	ret = -EINVAL;
+error:
+	rxrpc_rxk5_free(rxk5);
+	kfree(token);
+	_leave(" = %d", ret);
+	return ret;
 }
 
 /*
@@ -228,6 +613,8 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal
 		sec_ix = ntohl(*xdr++);
 		toklen -= 4;
 
+		_debug("TOKEN type=%u [%p-%p]", sec_ix, xdr, token);
+
 		switch (sec_ix) {
 		case RXRPC_SECURITY_RXKAD:
 			ret = rxrpc_instantiate_xdr_rxkad(key, xdr, toklen);
@@ -235,6 +622,12 @@ static int rxrpc_instantiate_xdr(struct key *key, const void *data, size_t datal
 				goto error;
 			break;
 
+		case RXRPC_SECURITY_RXK5:
+			ret = rxrpc_instantiate_xdr_rxk5(key, xdr, toklen);
+			if (ret != 0)
+				goto error;
+			break;
+
 		default:
 			ret = -EPROTONOSUPPORT;
 			goto error;
@@ -412,6 +805,10 @@ static void rxrpc_destroy(struct key *key)
 		case RXRPC_SECURITY_RXKAD:
 			kfree(token->kad);
 			break;
+		case RXRPC_SECURITY_RXK5:
+			if (token->k5)
+				rxrpc_rxk5_free(token->k5);
+			break;
 		default:
 			printk(KERN_ERR "Unknown token type %x on rxrpc key\n",
 			       token->security_index);
@@ -602,10 +999,13 @@ EXPORT_SYMBOL(rxrpc_get_null_key);
 static long rxrpc_read(const struct key *key,
 		       char __user *buffer, size_t buflen)
 {
-	struct rxrpc_key_token *token;
-	size_t size, toksize;
-	__be32 __user *xdr;
-	u32 cnlen, tktlen, ntoks, zero;
+	const struct rxrpc_key_token *token;
+	const struct krb5_principal *princ;
+	size_t size;
+	__be32 __user *xdr, *oldxdr;
+	u32 cnlen, toksize, ntoks, tok, zero;
+	u16 toksizes[AFSTOKEN_MAX];
+	int loop;
 
 	_enter("");
 
@@ -614,28 +1014,68 @@ static long rxrpc_read(const struct key *key,
 		return -EOPNOTSUPP;
 	cnlen = strlen(key->description + 4);
 
+#define RND(X) (((X) + 3) & ~3)
+
 	/* AFS keys we return in XDR form, so we need to work out the size of
 	 * the XDR */
 	size = 2 * 4;	/* flags, cellname len */
-	size += (cnlen + 3) & ~3;	/* cellname */
+	size += RND(cnlen);	/* cellname */
 	size += 1 * 4;	/* token count */
 
 	ntoks = 0;
 	for (token = key->payload.data; token; token = token->next) {
+		toksize = 4;	/* sec index */
+
 		switch (token->security_index) {
 		case RXRPC_SECURITY_RXKAD:
-			size += 2 * 4;	/* length, security index (switch ID) */
-			size += 8 * 4;	/* viceid, kvno, key*2, begin, end,
-					 * primary, tktlen */
-			size += (token->kad->ticket_len + 3) & ~3; /* ticket */
-			ntoks++;
+			toksize += 8 * 4;	/* viceid, kvno, key*2, begin,
+						 * end, primary, tktlen */
+			toksize += RND(token->kad->ticket_len);
 			break;
 
-		default: /* can't encode */
+		case RXRPC_SECURITY_RXK5:
+			princ = &token->k5->client;
+			toksize += 4 + princ->n_name_parts * 4;
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				toksize += RND(strlen(princ->name_parts[loop]));
+			toksize += 4 + RND(strlen(princ->realm));
+
+			princ = &token->k5->server;
+			toksize += 4 + princ->n_name_parts * 4;
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				toksize += RND(strlen(princ->name_parts[loop]));
+			toksize += 4 + RND(strlen(princ->realm));
+
+			toksize += 8 + RND(token->k5->session.data_len);
+
+			toksize += 4 * 8 + 2 * 4;
+
+			toksize += 4 + token->k5->n_addresses * 8;
+			for (loop = 0; loop < token->k5->n_addresses; loop++)
+				toksize += RND(token->k5->addresses[loop].data_len);
+
+			toksize += 4 + RND(token->k5->ticket_len);
+			toksize += 4 + RND(token->k5->ticket2_len);
+
+			toksize += 4 + token->k5->n_authdata * 8;
+			for (loop = 0; loop < token->k5->n_authdata; loop++)
+				toksize += RND(token->k5->authdata[loop].data_len);
 			break;
+
+		default: /* we have a ticket we can't encode */
+			BUG();
+			continue;
 		}
+
+		_debug("token[%u]: toksize=%u", ntoks, toksize);
+		ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX);
+
+		toksizes[ntoks++] = toksize;
+		size += toksize + 4; /* each token has a length word */
 	}
 
+#undef RND
+
 	if (!buffer || buflen < size)
 		return size;
 
@@ -647,52 +1087,109 @@ static long rxrpc_read(const struct key *key,
 		if (put_user(y, xdr++) < 0)	\
 			goto fault;		\
 	} while(0)
+#define ENCODE_DATA(l, s)						\
+	do {								\
+		u32 _l = (l);						\
+		ENCODE(l);						\
+		if (copy_to_user(xdr, (s), _l) != 0)			\
+			goto fault;					\
+		if (_l & 3 &&						\
+		    copy_to_user((u8 *)xdr + _l, &zero, 4 - (_l & 3)) != 0) \
+			goto fault;					\
+		xdr += (_l + 3) >> 2;					\
+	} while(0)
+#define ENCODE64(x)					\
+	do {						\
+		__be64 y = cpu_to_be64(x);		\
+		if (copy_to_user(xdr, &y, 8) != 0)	\
+			goto fault;			\
+		xdr += 8 >> 2;				\
+	} while(0)
+#define ENCODE_STR(s)				\
+	do {					\
+		const char *_s = (s);		\
+		ENCODE_DATA(strlen(_s), _s);	\
+	} while(0)
 
-	ENCODE(0);	/* flags */
-	ENCODE(cnlen);	/* cellname length */
-	if (copy_to_user(xdr, key->description + 4, cnlen) != 0)
-		goto fault;
-	if (cnlen & 3 &&
-	    copy_to_user((u8 *)xdr + cnlen, &zero, 4 - (cnlen & 3)) != 0)
-		goto fault;
-	xdr += (cnlen + 3) >> 2;
-	ENCODE(ntoks);	/* token count */
+	ENCODE(0);					/* flags */
+	ENCODE_DATA(cnlen, key->description + 4);	/* cellname */
+	ENCODE(ntoks);
 
+	tok = 0;
 	for (token = key->payload.data; token; token = token->next) {
-		toksize = 1 * 4;	/* sec index */
+		toksize = toksizes[tok++];
+		ENCODE(toksize);
+		oldxdr = xdr;
+		ENCODE(token->security_index);
 
 		switch (token->security_index) {
 		case RXRPC_SECURITY_RXKAD:
-			toksize += 8 * 4;
-			toksize += (token->kad->ticket_len + 3) & ~3;
-			ENCODE(toksize);
-			ENCODE(token->security_index);
 			ENCODE(token->kad->vice_id);
 			ENCODE(token->kad->kvno);
-			if (copy_to_user(xdr, token->kad->session_key, 8) != 0)
-				goto fault;
-			xdr += 8 >> 2;
+			ENCODE_DATA(8, token->kad->session_key);
 			ENCODE(token->kad->start);
 			ENCODE(token->kad->expiry);
 			ENCODE(token->kad->primary_flag);
-			tktlen = token->kad->ticket_len;
-			ENCODE(tktlen);
-			if (copy_to_user(xdr, token->kad->ticket, tktlen) != 0)
-				goto fault;
-			if (tktlen & 3 &&
-			    copy_to_user((u8 *)xdr + tktlen, &zero,
-					 4 - (tktlen & 3)) != 0)
-				goto fault;
-			xdr += (tktlen + 3) >> 2;
+			ENCODE_DATA(token->kad->ticket_len, token->kad->ticket);
+			break;
+
+		case RXRPC_SECURITY_RXK5:
+			princ = &token->k5->client;
+			ENCODE(princ->n_name_parts);
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				ENCODE_STR(princ->name_parts[loop]);
+			ENCODE_STR(princ->realm);
+
+			princ = &token->k5->server;
+			ENCODE(princ->n_name_parts);
+			for (loop = 0; loop < princ->n_name_parts; loop++)
+				ENCODE_STR(princ->name_parts[loop]);
+			ENCODE_STR(princ->realm);
+
+			ENCODE(token->k5->session.tag);
+			ENCODE_DATA(token->k5->session.data_len,
+				    token->k5->session.data);
+
+			ENCODE64(token->k5->authtime);
+			ENCODE64(token->k5->starttime);
+			ENCODE64(token->k5->endtime);
+			ENCODE64(token->k5->renew_till);
+			ENCODE(token->k5->is_skey);
+			ENCODE(token->k5->flags);
+
+			ENCODE(token->k5->n_addresses);
+			for (loop = 0; loop < token->k5->n_addresses; loop++) {
+				ENCODE(token->k5->addresses[loop].tag);
+				ENCODE_DATA(token->k5->addresses[loop].data_len,
+					    token->k5->addresses[loop].data);
+			}
+
+			ENCODE_DATA(token->k5->ticket_len, token->k5->ticket);
+			ENCODE_DATA(token->k5->ticket2_len, token->k5->ticket2);
+
+			ENCODE(token->k5->n_authdata);
+			for (loop = 0; loop < token->k5->n_authdata; loop++) {
+				ENCODE(token->k5->authdata[loop].tag);
+				ENCODE_DATA(token->k5->authdata[loop].data_len,
+					    token->k5->authdata[loop].data);
+			}
 			break;
 
 		default:
+			BUG();
 			break;
 		}
+
+		ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
+			  toksize);
 	}
 
+#undef ENCODE_STR
+#undef ENCODE_DATA
+#undef ENCODE64
 #undef ENCODE
 
+	ASSERTCMP(tok, ==, ntoks);
 	ASSERTCMP((char __user *) xdr - buffer, ==, size);
 	_leave(" = %zu", size);
 	return size;
-- 
cgit v1.2.3


From 926e61b7c44db83013159ac2f74bccd451607b5a Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Tue, 15 Sep 2009 02:53:07 -0700
Subject: pkt_sched: Fix tx queue selection in tc_modify_qdisc

After the recent mq change there is the new select_queue qdisc class
method used in tc_modify_qdisc, but it works OK only for direct child
qdiscs of mq qdisc. Grandchildren always get the first tx queue, which
would give wrong qdisc_root etc. results (e.g. for sch_htb as child of
sch_prio). This patch fixes it by using parent's dev_queue for such
grandchildren qdiscs. The select_queue method's return type is changed
BTW.

With feedback from: Patrick McHardy <kaber@trash.net>

Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h |  2 +-
 net/sched/sch_api.c       | 10 +++++++---
 net/sched/sch_mq.c        | 13 +++++++++----
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 88eb9de095de..c33180dd42b4 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -81,7 +81,7 @@ struct Qdisc
 struct Qdisc_class_ops
 {
 	/* Child qdisc manipulation */
-	unsigned int		(*select_queue)(struct Qdisc *, struct tcmsg *);
+	struct netdev_queue *	(*select_queue)(struct Qdisc *, struct tcmsg *);
 	int			(*graft)(struct Qdisc *, unsigned long cl,
 					struct Qdisc *, struct Qdisc **);
 	struct Qdisc *		(*leaf)(struct Qdisc *, unsigned long cl);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c6e4063f698c..1367aa21fad5 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1116,12 +1116,16 @@ create_n_graft:
 				 tcm->tcm_parent, tcm->tcm_parent,
 				 tca, &err);
 	else {
-		unsigned int ntx = 0;
+		struct netdev_queue *dev_queue;
 
 		if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
-			ntx = p->ops->cl_ops->select_queue(p, tcm);
+			dev_queue = p->ops->cl_ops->select_queue(p, tcm);
+		else if (p)
+			dev_queue = p->dev_queue;
+		else
+			dev_queue = netdev_get_tx_queue(dev, 0);
 
-		q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p,
+		q = qdisc_create(dev, dev_queue, p,
 				 tcm->tcm_parent, tcm->tcm_handle,
 				 tca, &err);
 	}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index dd5ee022f1f7..600c50143cc7 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -125,13 +125,18 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl)
 	return netdev_get_tx_queue(dev, ntx);
 }
 
-static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
+static struct netdev_queue *mq_select_queue(struct Qdisc *sch,
+					    struct tcmsg *tcm)
 {
 	unsigned int ntx = TC_H_MIN(tcm->tcm_parent);
+	struct netdev_queue *dev_queue = mq_queue_get(sch, ntx);
 
-	if (!mq_queue_get(sch, ntx))
-		return 0;
-	return ntx - 1;
+	if (!dev_queue) {
+		struct net_device *dev = qdisc_dev(sch);
+
+		return netdev_get_tx_queue(dev, 0);
+	}
+	return dev_queue;
 }
 
 static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
-- 
cgit v1.2.3


From 5f3edc1b1ead6d9bd45a85c551f44eff8fe76b9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 10 Sep 2009 13:42:00 +0200
Subject: sched: Hook sched_balance_self() into sched_class::select_task_rq()

Rather ugly patch to fully place the sched_balance_self() code
inside the fair class.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 ++-
 kernel/sched.c          | 14 +++++++-------
 kernel/sched_fair.c     |  7 ++++++-
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       |  5 ++++-
 5 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..5d3c9900943e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -811,6 +811,7 @@ enum cpu_idle_type {
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_WAKE_IDLE_FAR	0x0800	/* Gain latency sacrificing cache hit */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
+#define SD_BALANCE_WAKE		0x2000  /* Balance on wakeup */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -1032,7 +1033,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int flag, int sync);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/kernel/sched.c b/kernel/sched.c
index 60400a22401f..32b7a81230c2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2350,7 +2350,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
-	cpu = p->sched_class->select_task_rq(p, sync);
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
 	if (cpu != orig_cpu) {
 		set_task_cpu(p, cpu);
 		task_rq_unlock(rq, &flags);
@@ -2525,11 +2525,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
 
 	__sched_fork(p);
 
-#ifdef CONFIG_SMP
-	cpu = sched_balance_self(cpu, SD_BALANCE_FORK);
-#endif
-	set_task_cpu(p, cpu);
-
 	/*
 	 * Make sure we do not leak PI boosting priority to the child.
 	 */
@@ -2560,6 +2555,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
 	if (!rt_prio(p->prio))
 		p->sched_class = &fair_sched_class;
 
+#ifdef CONFIG_SMP
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+#endif
+	set_task_cpu(p, cpu);
+
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	if (likely(sched_info_on()))
 		memset(&p->sched_info, 0, sizeof(p->sched_info));
@@ -3114,7 +3114,7 @@ out:
 void sched_exec(void)
 {
 	int new_cpu, this_cpu = get_cpu();
-	new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC);
+	new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
 	put_cpu();
 	if (new_cpu != this_cpu)
 		sched_migrate_task(current, new_cpu);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a82d71d3afed..f2eb5b934715 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1300,7 +1300,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	return 0;
 }
 
-static int select_task_rq_fair(struct task_struct *p, int sync)
+static int sched_balance_self(int cpu, int flag);
+
+static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 {
 	struct sched_domain *sd, *this_sd = NULL;
 	int prev_cpu, this_cpu, new_cpu;
@@ -1314,6 +1316,9 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
 	this_rq		= cpu_rq(this_cpu);
 	new_cpu		= prev_cpu;
 
+	if (flag != SD_BALANCE_WAKE)
+		return sched_balance_self(this_cpu, flag);
+
 	/*
 	 * 'this_sd' is the first domain that both
 	 * this_cpu and prev_cpu are present in:
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 499672c10cbd..99b2f0337609 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sync)
+static int select_task_rq_idle(struct task_struct *p, int flag, int sync)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 2eb4bd6a526c..438380810ac4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int sync)
+static int select_task_rq_rt(struct task_struct *p, int flag, int sync)
 {
 	struct rq *rq = task_rq(p);
 
+	if (flag != SD_BALANCE_WAKE)
+		return smp_processor_id();
+
 	/*
 	 * If the current task is an RT task, then
 	 * try to see if we can wake this RT task up on another
-- 
cgit v1.2.3


From e9c8431185d6c406887190519f6dbdd112641686 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Sep 2009 14:43:03 +0200
Subject: sched: Add TASK_WAKING

We're going to want to drop rq->lock in try_to_wake_up() for a
longer period of time, however we also want to deal with concurrent
waking of the same task, which is currently handled by holding
rq->lock.

So introduce a new TASK state, namely TASK_WAKING, which indicates
someone is already waking the task (other wakers will fail p->state
& state).

We also keep preemption disabled over the whole ttwu().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        | 31 +++++++++++++++----------------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5d3c9900943e..3b0ca66bd6ce 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -190,6 +190,7 @@ extern unsigned long long time_sync_thresh;
 /* in tsk->state again */
 #define TASK_DEAD		64
 #define TASK_WAKEKILL		128
+#define TASK_WAKING		256
 
 /* Convenience macros for the sake of set_task_state */
 #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
diff --git a/kernel/sched.c b/kernel/sched.c
index 32b7a81230c2..fc6fda881d2e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2310,7 +2310,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 {
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
-	long old_state;
 	struct rq *rq;
 
 	if (!sched_feat(SYNC_WAKEUPS))
@@ -2332,11 +2331,12 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	}
 #endif
 
+	this_cpu = get_cpu();
+
 	smp_wmb();
 	rq = task_rq_lock(p, &flags);
 	update_rq_clock(rq);
-	old_state = p->state;
-	if (!(old_state & state))
+	if (!(p->state & state))
 		goto out;
 
 	if (p->se.on_rq)
@@ -2344,27 +2344,25 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 
 	cpu = task_cpu(p);
 	orig_cpu = cpu;
-	this_cpu = smp_processor_id();
 
 #ifdef CONFIG_SMP
 	if (unlikely(task_running(rq, p)))
 		goto out_activate;
 
+	/*
+	 * In order to handle concurrent wakeups and release the rq->lock
+	 * we put the task in TASK_WAKING state.
+	 */
+	p->state = TASK_WAKING;
+	task_rq_unlock(rq, &flags);
+
 	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
-	if (cpu != orig_cpu) {
+	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
-		task_rq_unlock(rq, &flags);
-		/* might preempt at this point */
-		rq = task_rq_lock(p, &flags);
-		old_state = p->state;
-		if (!(old_state & state))
-			goto out;
-		if (p->se.on_rq)
-			goto out_running;
 
-		this_cpu = smp_processor_id();
-		cpu = task_cpu(p);
-	}
+	rq = task_rq_lock(p, &flags);
+	WARN_ON(p->state != TASK_WAKING);
+	cpu = task_cpu(p);
 
 #ifdef CONFIG_SCHEDSTATS
 	schedstat_inc(rq, ttwu_count);
@@ -2422,6 +2420,7 @@ out_running:
 #endif
 out:
 	task_rq_unlock(rq, &flags);
+	put_cpu();
 
 	return success;
 }
-- 
cgit v1.2.3


From c88d5910890ad35af283344417891344604f0438 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 10 Sep 2009 13:50:02 +0200
Subject: sched: Merge select_task_rq_fair() and sched_balance_self()

The problem with wake_idle() is that is doesn't respect things like
cpu_power, which means it doesn't deal well with SMT nor the recent
RT interaction.

To cure this, it needs to do what sched_balance_self() does, which
leads to the possibility of merging select_task_rq_fair() and
sched_balance_self().

Modify sched_balance_self() to:

  - update_shares() when walking up the domain tree,
    (it only called it for the top domain, but it should
     have done this anyway), which allows us to remove
    this ugly bit from try_to_wake_up().

  - do wake_affine() on the smallest domain that contains
    both this (the waking) and the prev (the wakee) cpu for
    WAKE invocations.

Then use the top-down balance steps it had to replace wake_idle().

This leads to the dissapearance of SD_WAKE_BALANCE and
SD_WAKE_IDLE_FAR, with SD_WAKE_IDLE replaced with SD_BALANCE_WAKE.

SD_WAKE_AFFINE needs SD_BALANCE_WAKE to be effective.

Touch all topology bits to replace the old with new SD flags --
platforms might need re-tuning, enabling SD_BALANCE_WAKE
conditionally on a NUMA distance seems like a good additional
feature, magny-core and small nehalem systems would want this
enabled, systems with slow interconnects would not.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h           |   5 +-
 arch/mips/include/asm/mach-ip27/topology.h |   2 +-
 arch/powerpc/include/asm/topology.h        |   5 +-
 arch/sh/include/asm/topology.h             |   4 +-
 arch/sparc/include/asm/topology_64.h       |   4 +-
 arch/x86/include/asm/topology.h            |   4 +-
 include/linux/sched.h                      |   7 +-
 include/linux/topology.h                   |  16 +-
 kernel/sched.c                             |  41 +----
 kernel/sched_fair.c                        | 233 ++++++++---------------------
 10 files changed, 84 insertions(+), 237 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 7b4c8c70b2d1..cf6053b226c3 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -67,6 +67,7 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
@@ -91,8 +92,8 @@ void build_cpu_to_node_map(void);
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 07547231e078..d8332398f5be 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -48,7 +48,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 	.cache_nice_tries	= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 054a16d68082..c6343313ff59 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_NEWIDLE	\
-				| SD_WAKE_IDLE		\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index b69ee850906d..dc1531e2f25f 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -21,8 +21,8 @@
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index e5ea8d332421..1d091abd2d13 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -57,8 +57,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_SERIALIZE		\
-				| SD_WAKE_BALANCE,	\
+				| SD_BALANCE_WAKE	\
+				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 }
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 26d06e052a18..966d58dc6274 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -145,14 +145,12 @@ extern unsigned long node_remap_size[];
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b0ca66bd6ce..c30bf3d516d1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -803,16 +803,15 @@ enum cpu_idle_type {
 #define SD_BALANCE_NEWIDLE	0x0002	/* Balance when about to become idle */
 #define SD_BALANCE_EXEC		0x0004	/* Balance on exec */
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
-#define SD_WAKE_IDLE		0x0010	/* Wake to idle CPU on task wakeup */
+#define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_WAKE_BALANCE		0x0040	/* Perform balancing at task wakeup */
+
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
-#define SD_WAKE_IDLE_FAR	0x0800	/* Gain latency sacrificing cache hit */
+
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
-#define SD_BALANCE_WAKE		0x2000  /* Balance on wakeup */
 
 enum powersavings_balance_level {
 	POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 85e8cf7d393c..6a8cd15555bb 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,14 +95,12 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
@@ -129,13 +127,11 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| sd_balance_for_mc_power()		\
 				| sd_power_saving_flags()		\
 				,					\
@@ -163,13 +159,11 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_WAKE_IDLE			\
+				| 1*SD_BALANCE_WAKE			\
 				| 0*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
-				| 0*SD_WAKE_IDLE_FAR			\
 				| sd_balance_for_package_power()	\
 				| sd_power_saving_flags()		\
 				,					\
@@ -191,14 +185,12 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 0*SD_BALANCE_EXEC			\
 				| 0*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 0*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/kernel/sched.c b/kernel/sched.c
index fc6fda881d2e..6c819f338b11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -512,14 +512,6 @@ struct root_domain {
 #ifdef CONFIG_SMP
 	struct cpupri cpupri;
 #endif
-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
-	/*
-	 * Preferred wake up cpu nominated by sched_mc balance that will be
-	 * used when most cpus are idle in the system indicating overall very
-	 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
-	 */
-	unsigned int sched_mc_preferred_wakeup_cpu;
-#endif
 };
 
 /*
@@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	if (!sched_feat(SYNC_WAKEUPS))
 		sync = 0;
 
-#ifdef CONFIG_SMP
-	if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
-		struct sched_domain *sd;
-
-		this_cpu = raw_smp_processor_id();
-		cpu = task_cpu(p);
-
-		for_each_domain(this_cpu, sd) {
-			if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
-				update_shares(sd);
-				break;
-			}
-		}
-	}
-#endif
-
 	this_cpu = get_cpu();
 
 	smp_wmb();
@@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
 	*imbalance = sds->min_load_per_task;
 	sds->busiest = sds->group_min;
 
-	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
-		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
-			group_first_cpu(sds->group_leader);
-	}
-
 	return 1;
 
 }
@@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd)
 	}
 
 	/* Following flags don't use groups */
-	if (sd->flags & (SD_WAKE_IDLE |
-			 SD_WAKE_AFFINE |
-			 SD_WAKE_BALANCE))
+	if (sd->flags & (SD_WAKE_AFFINE))
 		return 0;
 
 	return 1;
@@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
 	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
 		return 0;
 
-	/* Does parent contain flags not in child? */
-	/* WAKE_BALANCE is a subset of WAKE_AFFINE */
-	if (cflags & SD_WAKE_AFFINE)
-		pflags &= ~SD_WAKE_BALANCE;
 	/* Flags needing groups don't count if only 1 group in parent */
 	if (parent->groups == parent->groups->next) {
 		pflags &= ~(SD_LOAD_BALANCE |
@@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd,
 		request = attr->relax_domain_level;
 	if (request < sd->level) {
 		/* turn off idle balance on this domain */
-		sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE);
+		sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
 	} else {
 		/* turn on idle balance on this domain */
-		sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE);
+		sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
 	}
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2eb5b934715..09d19f77eb3a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq)
 	se->vruntime = rightmost->vruntime + 1;
 }
 
-/*
- * wake_idle() will wake a task on an idle cpu if task->cpu is
- * not idle and an idle cpu is available.  The span of cpus to
- * search starts with cpus closest then further out as needed,
- * so we always favor a closer, idle cpu.
- * Domains may include CPUs that are not usable for migration,
- * hence we need to mask them out (rq->rd->online)
- *
- * Returns the CPU we should wake onto.
- */
-#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
-
-#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
-
-static int wake_idle(int cpu, struct task_struct *p)
-{
-	struct sched_domain *sd;
-	int i;
-	unsigned int chosen_wakeup_cpu;
-	int this_cpu;
-	struct rq *task_rq = task_rq(p);
-
-	/*
-	 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
-	 * are idle and this is not a kernel thread and this task's affinity
-	 * allows it to be moved to preferred cpu, then just move!
-	 */
-
-	this_cpu = smp_processor_id();
-	chosen_wakeup_cpu =
-		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
-
-	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
-		idle_cpu(cpu) && idle_cpu(this_cpu) &&
-		p->mm && !(p->flags & PF_KTHREAD) &&
-		cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
-		return chosen_wakeup_cpu;
-
-	/*
-	 * If it is idle, then it is the best cpu to run this task.
-	 *
-	 * This cpu is also the best, if it has more than one task already.
-	 * Siblings must be also busy(in most cases) as they didn't already
-	 * pickup the extra load from this cpu and hence we need not check
-	 * sibling runqueue info. This will avoid the checks and cache miss
-	 * penalities associated with that.
-	 */
-	if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
-		return cpu;
-
-	for_each_domain(cpu, sd) {
-		if ((sd->flags & SD_WAKE_IDLE)
-		    || ((sd->flags & SD_WAKE_IDLE_FAR)
-			&& !task_hot(p, task_rq->clock, sd))) {
-			for_each_cpu_and(i, sched_domain_span(sd),
-					 &p->cpus_allowed) {
-				if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
-					if (i != task_cpu(p)) {
-						schedstat_inc(p,
-						       se.nr_wakeups_idle);
-					}
-					return i;
-				}
-			}
-		} else {
-			break;
-		}
-	}
-	return cpu;
-}
-#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
-static inline int wake_idle(int cpu, struct task_struct *p)
-{
-	return cpu;
-}
-#endif
-
 #ifdef CONFIG_SMP
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 #endif
 
-static int
-wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
-	    struct task_struct *p, int prev_cpu, int this_cpu, int sync,
-	    int idx, unsigned long load, unsigned long this_load,
-	    unsigned int imbalance)
+static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
-	struct task_struct *curr = this_rq->curr;
-	struct task_group *tg;
-	unsigned long tl = this_load;
+	struct task_struct *curr = current;
+	unsigned long this_load, load;
+	int idx, this_cpu, prev_cpu;
 	unsigned long tl_per_task;
+	unsigned int imbalance;
+	struct task_group *tg;
 	unsigned long weight;
 	int balanced;
 
-	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
-		return 0;
+	idx	  = sd->wake_idx;
+	this_cpu  = smp_processor_id();
+	prev_cpu  = task_cpu(p);
+	load	  = source_load(prev_cpu, idx);
+	this_load = target_load(this_cpu, idx);
 
 	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
 			p->se.avg_overlap > sysctl_sched_migration_cost))
@@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 		tg = task_group(current);
 		weight = current->se.load.weight;
 
-		tl += effective_load(tg, this_cpu, -weight, -weight);
+		this_load += effective_load(tg, this_cpu, -weight, -weight);
 		load += effective_load(tg, prev_cpu, 0, -weight);
 	}
 
 	tg = task_group(p);
 	weight = p->se.load.weight;
 
+	imbalance = 100 + (sd->imbalance_pct - 100) / 2;
+
 	/*
 	 * In low-load situations, where prev_cpu is idle and this_cpu is idle
-	 * due to the sync cause above having dropped tl to 0, we'll always have
-	 * an imbalance, but there's really nothing you can do about that, so
-	 * that's good too.
+	 * due to the sync cause above having dropped this_load to 0, we'll
+	 * always have an imbalance, but there's really nothing you can do
+	 * about that, so that's good too.
 	 *
 	 * Otherwise check if either cpus are near enough in load to allow this
 	 * task to be woken on this_cpu.
 	 */
-	balanced = !tl ||
-		100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
+	balanced = !this_load ||
+		100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
 		imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
 
 	/*
@@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	schedstat_inc(p, se.nr_wakeups_affine_attempts);
 	tl_per_task = cpu_avg_load_per_task(this_cpu);
 
-	if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
-			tl_per_task)) {
+	if (balanced ||
+	    (this_load <= load &&
+	     this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
 		/*
 		 * This domain has SD_WAKE_AFFINE and
 		 * p is cache cold in this domain, and
 		 * there is no bad imbalance.
 		 */
-		schedstat_inc(this_sd, ttwu_move_affine);
+		schedstat_inc(sd, ttwu_move_affine);
 		schedstat_inc(p, se.nr_wakeups_affine);
 
 		return 1;
@@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
 	return 0;
 }
 
-static int sched_balance_self(int cpu, int flag);
-
-static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
-{
-	struct sched_domain *sd, *this_sd = NULL;
-	int prev_cpu, this_cpu, new_cpu;
-	unsigned long load, this_load;
-	struct rq *this_rq;
-	unsigned int imbalance;
-	int idx;
-
-	prev_cpu	= task_cpu(p);
-	this_cpu	= smp_processor_id();
-	this_rq		= cpu_rq(this_cpu);
-	new_cpu		= prev_cpu;
-
-	if (flag != SD_BALANCE_WAKE)
-		return sched_balance_self(this_cpu, flag);
-
-	/*
-	 * 'this_sd' is the first domain that both
-	 * this_cpu and prev_cpu are present in:
-	 */
-	for_each_domain(this_cpu, sd) {
-		if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
-			this_sd = sd;
-			break;
-		}
-	}
-
-	if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
-		goto out;
-
-	/*
-	 * Check for affine wakeup and passive balancing possibilities.
-	 */
-	if (!this_sd)
-		goto out;
-
-	idx = this_sd->wake_idx;
-
-	imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
-
-	load = source_load(prev_cpu, idx);
-	this_load = target_load(this_cpu, idx);
-
-	if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
-				     load, this_load, imbalance))
-		return this_cpu;
-
-	/*
-	 * Start passive balancing when half the imbalance_pct
-	 * limit is reached.
-	 */
-	if (this_sd->flags & SD_WAKE_BALANCE) {
-		if (imbalance*this_load <= 100*load) {
-			schedstat_inc(this_sd, ttwu_move_balance);
-			schedstat_inc(p, se.nr_wakeups_passive);
-			return this_cpu;
-		}
-	}
-
-out:
-	return wake_idle(new_cpu, p);
-}
-
 /*
  * find_idlest_group finds and returns the least busy CPU group within the
  * domain.
@@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int sched_balance_self(int cpu, int flag)
+static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 {
 	struct task_struct *t = current;
 	struct sched_domain *tmp, *sd = NULL;
+	int cpu = smp_processor_id();
+	int prev_cpu = task_cpu(p);
+	int new_cpu = cpu;
+	int want_affine = 0;
+
+	if (flag & SD_BALANCE_WAKE) {
+		if (sched_feat(AFFINE_WAKEUPS))
+			want_affine = 1;
+		new_cpu = prev_cpu;
+	}
 
 	for_each_domain(cpu, tmp) {
 		/*
@@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag)
 		 */
 		if (tmp->flags & SD_POWERSAVINGS_BALANCE)
 			break;
-		if (tmp->flags & flag)
-			sd = tmp;
-	}
 
-	if (sd)
-		update_shares(sd);
+		switch (flag) {
+		case SD_BALANCE_WAKE:
+			if (!sched_feat(LB_WAKEUP_UPDATE))
+				break;
+		case SD_BALANCE_FORK:
+		case SD_BALANCE_EXEC:
+			if (root_task_group_empty())
+				break;
+			update_shares(tmp);
+		default:
+			break;
+		}
+
+		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
+		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+
+			if (wake_affine(tmp, p, sync))
+				return cpu;
+
+			want_affine = 0;
+		}
+
+		if (!(tmp->flags & flag))
+			continue;
+
+		sd = tmp;
+	}
 
 	while (sd) {
 		struct sched_group *group;
-		int new_cpu, weight;
+		int weight;
 
 		if (!(sd->flags & flag)) {
 			sd = sd->child;
@@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag)
 		/* while loop will break here if sd == NULL */
 	}
 
-	return cpu;
+	return new_cpu;
 }
 #endif /* CONFIG_SMP */
 
-- 
cgit v1.2.3


From 78e7ed53c9f42f04f9401ada6f7047db60781676 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 3 Sep 2009 13:16:51 +0200
Subject: sched: Tweak wake_idx

When merging select_task_rq_fair() and sched_balance_self() we lost
the use of wake_idx, restore that and set them to 0 to make wake
balancing more aggressive.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h     |  5 +++--
 arch/powerpc/include/asm/topology.h  |  3 ++-
 arch/sh/include/asm/topology.h       |  2 +-
 arch/sparc/include/asm/topology_64.h |  2 +-
 arch/x86/include/asm/topology.h      |  2 +-
 include/linux/topology.h             |  4 ++--
 kernel/sched_fair.c                  | 21 ++++++++++++++++++---
 7 files changed, 28 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index cf6053b226c3..47f3c51d5e27 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -62,11 +62,12 @@ void build_cpu_to_node_map(void);
 	.busy_idx		= 2,			\
 	.idle_idx		= 1,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
@@ -87,7 +88,7 @@ void build_cpu_to_node_map(void);
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c6343313ff59..a6b220ab56db 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -58,9 +58,10 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.busy_idx		= 3,			\
 	.idle_idx		= 1,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
+				| SD_BALANCE_FORK	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index dc1531e2f25f..9054e5c0ad54 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -16,7 +16,7 @@
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 2,			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 1d091abd2d13..bc3a0930ed64 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -52,7 +52,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0, 			\
-	.wake_idx		= 1,			\
+	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 966d58dc6274..4b1b335097b5 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -138,7 +138,7 @@ extern unsigned long node_remap_size[];
 	.busy_idx		= 3,					\
 	.idle_idx		= SD_IDLE_IDX,				\
 	.newidle_idx		= SD_NEWIDLE_IDX,			\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 6a8cd15555bb..fef57040a4e2 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -120,7 +120,7 @@ int arch_update_cpu_topology(void);
 	.imbalance_pct		= 125,					\
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
@@ -152,7 +152,7 @@ int arch_update_cpu_topology(void);
 	.busy_idx		= 2,					\
 	.idle_idx		= 1,					\
 	.newidle_idx		= 2,					\
-	.wake_idx		= 1,					\
+	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8b3eddbcf9a4..19593568031a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1232,12 +1232,27 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
  * domain.
  */
 static struct sched_group *
-find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
+find_idlest_group(struct sched_domain *sd, struct task_struct *p,
+		  int this_cpu, int flag)
 {
 	struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups;
 	unsigned long min_load = ULONG_MAX, this_load = 0;
-	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
+	int load_idx = 0;
+
+	switch (flag) {
+	case SD_BALANCE_FORK:
+	case SD_BALANCE_EXEC:
+		load_idx = sd->forkexec_idx;
+		break;
+
+	case SD_BALANCE_WAKE:
+		load_idx = sd->wake_idx;
+		break;
+
+	default:
+		break;
+	}
 
 	do {
 		unsigned long load, avg_load;
@@ -1392,7 +1407,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 			continue;
 		}
 
-		group = find_idlest_group(sd, p, cpu);
+		group = find_idlest_group(sd, p, cpu, flag);
 		if (!group) {
 			sd = sd->child;
 			continue;
-- 
cgit v1.2.3


From 6bd7821f905a8d6c471f0d6675f5cb7ea448d791 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Sep 2009 18:42:15 +0200
Subject: sched: Fix some domain tunings

CPU level should have WAKE_AFFINE, whereas ALLNODES is dubious.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index fef57040a4e2..c87edcd87967 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
-				| 0*SD_WAKE_AFFINE			\
+				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
@@ -186,7 +186,7 @@ int arch_update_cpu_topology(void);
 				| 0*SD_BALANCE_EXEC			\
 				| 0*SD_BALANCE_FORK			\
 				| 0*SD_BALANCE_WAKE			\
-				| 1*SD_WAKE_AFFINE			\
+				| 0*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
-- 
cgit v1.2.3


From 0ec9fab3d186d9cbb00c0f694d4a260d07c198d9 Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Tue, 15 Sep 2009 15:07:03 +0200
Subject: sched: Improve latencies and throughput

Make the idle balancer more agressive, to improve a
x264 encoding workload provided by Jason Garrett-Glaser:

 NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 252.82 fps, 22096.60 kb/s
 encoded 600 frames, 250.69 fps, 22096.60 kb/s
 encoded 600 frames, 245.76 fps, 22096.60 kb/s

 NO_NEXT_BUDDY LB_BIAS
 encoded 600 frames, 344.44 fps, 22096.60 kb/s
 encoded 600 frames, 346.66 fps, 22096.60 kb/s
 encoded 600 frames, 352.59 fps, 22096.60 kb/s

 NO_NEXT_BUDDY NO_LB_BIAS
 encoded 600 frames, 425.75 fps, 22096.60 kb/s
 encoded 600 frames, 425.45 fps, 22096.60 kb/s
 encoded 600 frames, 422.49 fps, 22096.60 kb/s

Peter pointed out that this is better done via newidle_idx,
not via LB_BIAS, newidle balancing should look for where
there is load _now_, not where there was load 2 ticks ago.

Worst-case latencies are improved as well as no buddies
means less vruntime spread. (as per prior lkml discussions)

This change improves kbuild-peak parallelism as well.

Reported-by: Jason Garrett-Glaser <darkshikari@gmail.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1253011667.9128.16.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h    | 5 +++--
 arch/powerpc/include/asm/topology.h | 2 +-
 arch/sh/include/asm/topology.h      | 3 ++-
 arch/x86/include/asm/topology.h     | 4 +---
 include/linux/topology.h            | 2 +-
 kernel/sched_features.h             | 2 +-
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 47f3c51d5e27..42f1673ec83f 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -61,7 +61,7 @@ void build_cpu_to_node_map(void);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 2,			\
 	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
@@ -87,10 +87,11 @@ void build_cpu_to_node_map(void);
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_WAKE	\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index a6b220ab56db..1a2c9eb42a03 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -57,7 +57,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 	.cache_nice_tries	= 1,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 1,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 9054e5c0ad54..c8436771e31d 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -15,13 +15,14 @@
 	.cache_nice_tries	= 2,			\
 	.busy_idx		= 3,			\
 	.idle_idx		= 2,			\
-	.newidle_idx		= 2,			\
+	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
 	.forkexec_idx		= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_WAKE	\
+				| SD_BALANCE_NEWIDLE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4b1b335097b5..7fafd1bc4149 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -116,14 +116,12 @@ extern unsigned long node_remap_size[];
 
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-# define SD_NEWIDLE_IDX		2
 # define SD_FORKEXEC_IDX	0
 
 #else
 
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-# define SD_NEWIDLE_IDX		2
 # define SD_FORKEXEC_IDX	1
 
 #endif
@@ -137,7 +135,7 @@ extern unsigned long node_remap_size[];
 	.cache_nice_tries	= SD_CACHE_NICE_TRIES,			\
 	.busy_idx		= 3,					\
 	.idle_idx		= SD_IDLE_IDX,				\
-	.newidle_idx		= SD_NEWIDLE_IDX,			\
+	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
 	.forkexec_idx		= SD_FORKEXEC_IDX,			\
 									\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index c87edcd87967..4298745615a5 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -151,7 +151,7 @@ int arch_update_cpu_topology(void);
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
 	.idle_idx		= 1,					\
-	.newidle_idx		= 2,					\
+	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
 	.forkexec_idx		= 1,					\
 									\
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 891ea0f72b46..e98c2e8de1d5 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -67,7 +67,7 @@ SCHED_FEAT(AFFINE_WAKEUPS, 1)
  * wakeup-preemption), since its likely going to consume data we
  * touched, increases cache locality.
  */
-SCHED_FEAT(NEXT_BUDDY, 1)
+SCHED_FEAT(NEXT_BUDDY, 0)
 
 /*
  * Prefer to schedule the task that ran last (when we did
-- 
cgit v1.2.3


From b8a543ea5a5896830a9969bacfd047f9d15940b2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 15 Sep 2009 15:22:03 +0200
Subject: sched: Reduce forkexec_idx

If we're looking to place a new task, we might as well find the
idlest position _now_, not 1 tick ago.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h     | 4 ++--
 arch/sh/include/asm/topology.h       | 2 +-
 arch/sparc/include/asm/topology_64.h | 2 +-
 arch/x86/include/asm/topology.h      | 4 +---
 include/linux/topology.h             | 4 ++--
 5 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 42f1673ec83f..569b9dafc78c 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -63,7 +63,7 @@ void build_cpu_to_node_map(void);
 	.idle_idx		= 1,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
@@ -89,7 +89,7 @@ void build_cpu_to_node_map(void);
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index c8436771e31d..a8cc564b703d 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -17,7 +17,7 @@
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0,			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index bc3a0930ed64..10b979d1de20 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -53,7 +53,7 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.idle_idx		= 2,			\
 	.newidle_idx		= 0, 			\
 	.wake_idx		= 0,			\
-	.forkexec_idx		= 1,			\
+	.forkexec_idx		= 0,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 7fafd1bc4149..589f12383d78 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -116,13 +116,11 @@ extern unsigned long node_remap_size[];
 
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-# define SD_FORKEXEC_IDX	0
 
 #else
 
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-# define SD_FORKEXEC_IDX	1
 
 #endif
 
@@ -137,7 +135,7 @@ extern unsigned long node_remap_size[];
 	.idle_idx		= SD_IDLE_IDX,				\
 	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= SD_FORKEXEC_IDX,			\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 4298745615a5..936ab2b37683 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -121,7 +121,7 @@ int arch_update_cpu_topology(void);
 	.cache_nice_tries	= 1,					\
 	.busy_idx		= 2,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= 1,					\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
@@ -153,7 +153,7 @@ int arch_update_cpu_topology(void);
 	.idle_idx		= 1,					\
 	.newidle_idx		= 0,					\
 	.wake_idx		= 0,					\
-	.forkexec_idx		= 1,					\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
-- 
cgit v1.2.3


From 47fe38fcff0517e67d395c039d2e26d2de688a60 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 2 Sep 2009 13:49:18 +0200
Subject: x86: sched: Provide arch implementations using aperf/mperf

APERF/MPERF support for cpu_power.

APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.

APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/Makefile |  2 +-
 arch/x86/kernel/cpu/sched.c  | 55 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h        |  4 ++++
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/sched.c

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253dac155..8dd30638fe44 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o		:= $(nostackp)
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
-obj-y			+= vmware.o hypervisor.o
+obj-y			+= vmware.o hypervisor.o sched.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 000000000000..6c00a8f3cce5
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
+#include <linux/sched.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct aperfmperf, old_perf);
+
+static unsigned long scale_aperfmperf(void)
+{
+	struct aperfmperf val, *old = &__get_cpu_var(old_perf);
+	unsigned long ratio, flags;
+
+	local_irq_save(flags);
+	get_aperfmperf(&val);
+	local_irq_restore(flags);
+
+	ratio = calc_aperfmperf_ratio(old, &val);
+	*old = val;
+
+	return ratio;
+}
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * do aperf/mperf on the cpu level because it includes things
+	 * like turbo mode, which are relevant to full cores.
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return scale_aperfmperf();
+
+	/*
+	 * maybe have something cpufreq here
+	 */
+
+	return default_scale_freq_power(sd, cpu);
+}
+
+unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * aperf/mperf already includes the smt gain
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return SCHED_LOAD_SCALE;
+
+	return default_scale_smt_power(sd, cpu);
+}
+
+#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c30bf3d516d1..fc4c0f9393d2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag)
 	return 0;
 }
 
+unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu);
+unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu);
+
 #else /* CONFIG_SMP */
 
 struct sched_domain_attr;
@@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
 }
 #endif	/* !CONFIG_SMP */
 
+
 struct io_context;			/* See blkdev.h */
 
 
-- 
cgit v1.2.3


From 0763a660a84220cc3900fd32abdd7ad109e2278d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 19:37:39 +0200
Subject: sched: Rename select_task_rq() argument

In order to be able to rename the sync argument, we need to rename
the current flag argument.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  2 +-
 kernel/sched_fair.c     | 14 +++++++-------
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fc4c0f9393d2..5c116f03d74c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1037,7 +1037,7 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int flag, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int sync);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 19593568031a..b554e63c521a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1331,7 +1331,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
+static int select_task_rq_fair(struct task_struct *p, int sd_flag, int sync)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -1339,7 +1339,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 	int new_cpu = cpu;
 	int want_affine = 0;
 
-	if (flag & SD_BALANCE_WAKE) {
+	if (sd_flag & SD_BALANCE_WAKE) {
 		if (sched_feat(AFFINE_WAKEUPS))
 			want_affine = 1;
 		new_cpu = prev_cpu;
@@ -1368,7 +1368,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 				break;
 		}
 
-		switch (flag) {
+		switch (sd_flag) {
 		case SD_BALANCE_WAKE:
 			if (!sched_feat(LB_WAKEUP_UPDATE))
 				break;
@@ -1392,7 +1392,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 			want_affine = 0;
 		}
 
-		if (!(tmp->flags & flag))
+		if (!(tmp->flags & sd_flag))
 			continue;
 
 		sd = tmp;
@@ -1402,12 +1402,12 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 		struct sched_group *group;
 		int weight;
 
-		if (!(sd->flags & flag)) {
+		if (!(sd->flags & sd_flag)) {
 			sd = sd->child;
 			continue;
 		}
 
-		group = find_idlest_group(sd, p, cpu, flag);
+		group = find_idlest_group(sd, p, cpu, sd_flag);
 		if (!group) {
 			sd = sd->child;
 			continue;
@@ -1427,7 +1427,7 @@ static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
 		for_each_domain(cpu, tmp) {
 			if (weight <= cpumask_weight(sched_domain_span(tmp)))
 				break;
-			if (tmp->flags & flag)
+			if (tmp->flags & sd_flag)
 				sd = tmp;
 		}
 		/* while loop will break here if sd == NULL */
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 99b2f0337609..9ff7697e5dc4 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int flag, int sync)
+static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 438380810ac4..97c53f3f51a7 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,11 +938,11 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int flag, int sync)
+static int select_task_rq_rt(struct task_struct *p, int sd_flag, int sync)
 {
 	struct rq *rq = task_rq(p);
 
-	if (flag != SD_BALANCE_WAKE)
+	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
 	/*
-- 
cgit v1.2.3


From 7d47872146398dbede13223299fe1cb368ebc781 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 19:55:44 +0200
Subject: sched: Rename sync arguments

In order to extend the functions to have more than 1 flag (sync),
rename the argument to flags, and explicitly define a WF_ space for
individual flags.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  9 +++++++--
 include/linux/wait.h    |  4 ++--
 kernel/sched.c          | 30 ++++++++++++++++--------------
 kernel/sched_fair.c     |  6 ++++--
 kernel/sched_idletask.c |  4 ++--
 kernel/sched_rt.c       |  4 ++--
 6 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c116f03d74c..3b07168b6f03 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1024,6 +1024,11 @@ struct uts_namespace;
 struct rq;
 struct sched_domain;
 
+/*
+ * wake flags
+ */
+#define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+
 struct sched_class {
 	const struct sched_class *next;
 
@@ -1031,13 +1036,13 @@ struct sched_class {
 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
 	void (*yield_task) (struct rq *rq);
 
-	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync);
+	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
 
 	struct task_struct * (*pick_next_task) (struct rq *rq);
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int sync);
+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
 
 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
 			struct rq *busiest, unsigned long max_load_move,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index cf3c2f5dba51..a48e16b77d5e 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -26,8 +26,8 @@
 #include <asm/current.h>
 
 typedef struct __wait_queue wait_queue_t;
-typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key);
-int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
+typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
+int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
 
 struct __wait_queue {
 	unsigned int flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index e8e603bf8761..4da335cec8ee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -636,9 +636,10 @@ struct rq {
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
 
-static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
+static inline
+void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
-	rq->curr->sched_class->check_preempt_curr(rq, p, sync);
+	rq->curr->sched_class->check_preempt_curr(rq, p, flags);
 }
 
 static inline int cpu_of(struct rq *rq)
@@ -2318,14 +2319,15 @@ void task_oncpu_function_call(struct task_struct *p,
  *
  * returns failure only if the task is already active.
  */
-static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
+static int try_to_wake_up(struct task_struct *p, unsigned int state,
+			  int wake_flags)
 {
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
 	struct rq *rq;
 
 	if (!sched_feat(SYNC_WAKEUPS))
-		sync = 0;
+		wake_flags &= ~WF_SYNC;
 
 	this_cpu = get_cpu();
 
@@ -2352,7 +2354,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 	p->state = TASK_WAKING;
 	task_rq_unlock(rq, &flags);
 
-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, sync);
+	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
 
@@ -2378,7 +2380,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
 out_activate:
 #endif /* CONFIG_SMP */
 	schedstat_inc(p, se.nr_wakeups);
-	if (sync)
+	if (wake_flags & WF_SYNC)
 		schedstat_inc(p, se.nr_wakeups_sync);
 	if (orig_cpu != cpu)
 		schedstat_inc(p, se.nr_wakeups_migrate);
@@ -2407,7 +2409,7 @@ out_activate:
 
 out_running:
 	trace_sched_wakeup(rq, p, success);
-	check_preempt_curr(rq, p, sync);
+	check_preempt_curr(rq, p, wake_flags);
 
 	p->state = TASK_RUNNING;
 #ifdef CONFIG_SMP
@@ -5562,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void)
 
 #endif /* CONFIG_PREEMPT */
 
-int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
+int default_wake_function(wait_queue_t *curr, unsigned mode, int flags,
 			  void *key)
 {
-	return try_to_wake_up(curr->private, mode, sync);
+	return try_to_wake_up(curr->private, mode, flags);
 }
 EXPORT_SYMBOL(default_wake_function);
 
@@ -5579,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function);
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-			int nr_exclusive, int sync, void *key)
+			int nr_exclusive, int flags, void *key)
 {
 	wait_queue_t *curr, *next;
 
 	list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
 		unsigned flags = curr->flags;
 
-		if (curr->func(curr, mode, sync, key) &&
+		if (curr->func(curr, mode, flags, key) &&
 				(flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
 			break;
 	}
@@ -5647,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
 			int nr_exclusive, void *key)
 {
 	unsigned long flags;
-	int sync = 1;
+	int wake_flags = WF_SYNC;
 
 	if (unlikely(!q))
 		return;
 
 	if (unlikely(!nr_exclusive))
-		sync = 0;
+		wake_flags = 0;
 
 	spin_lock_irqsave(&q->lock, flags);
-	__wake_up_common(q, mode, nr_exclusive, sync, key);
+	__wake_up_common(q, mode, nr_exclusive, wake_flags, key);
 	spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL_GPL(__wake_up_sync_key);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b554e63c521a..007958e3c93a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1331,13 +1331,14 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
  *
  * preempt must be disabled.
  */
-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 {
 	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
 	int new_cpu = cpu;
 	int want_affine = 0;
+	int sync = flags & WF_SYNC;
 
 	if (sd_flag & SD_BALANCE_WAKE) {
 		if (sched_feat(AFFINE_WAKEUPS))
@@ -1548,11 +1549,12 @@ static void set_next_buddy(struct sched_entity *se)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct task_struct *curr = rq->curr;
 	struct sched_entity *se = &curr->se, *pse = &p->se;
 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
+	int sync = flags & WF_SYNC;
 
 	update_curr(cfs_rq);
 
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 9ff7697e5dc4..a8b448af004b 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,7 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sd_flag, int sync)
 /*
  * Idle tasks are unconditionally rescheduled:
  */
-static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags)
 {
 	resched_task(rq->idle);
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 97c53f3f51a7..13de7126a6ab 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -938,7 +938,7 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int sync)
+static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 {
 	struct rq *rq = task_rq(p);
 
@@ -1002,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
  * Preempt the current task with a newly woken task if needed:
  */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (p->prio < rq->curr->prio) {
 		resched_task(rq->curr);
-- 
cgit v1.2.3


From a7558e01056f5191ff2ecff53b075dcb9e484188 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Sep 2009 20:02:34 +0200
Subject: sched: Add WF_FORK

Avoid the cache buddies from biasing the time distribution away
from fork()ers. Normally the next buddy will be the preferred
scheduling target, but this makes fork()s prefer to run the new
child, whereas we prefer to run the parent, since that will
generate more work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 1 +
 kernel/sched.c        | 2 +-
 kernel/sched_fair.c   | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b07168b6f03..ee1f88993097 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1028,6 +1028,7 @@ struct sched_domain;
  * wake flags
  */
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+#define WF_FORK		0x02		/* child wakeup after fork */
 
 struct sched_class {
 	const struct sched_class *next;
diff --git a/kernel/sched.c b/kernel/sched.c
index 4da335cec8ee..0d4c4fea3317 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2602,7 +2602,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		inc_nr_running(rq);
 	}
 	trace_sched_wakeup_new(rq, p, 1);
-	check_preempt_curr(rq, p, 0);
+	check_preempt_curr(rq, p, WF_FORK);
 #ifdef CONFIG_SMP
 	if (p->sched_class->task_wake_up)
 		p->sched_class->task_wake_up(rq, p);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 007958e3c93a..6766959c7f44 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1580,7 +1580,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int flags
 	 */
 	if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
 		set_last_buddy(se);
-	if (sched_feat(NEXT_BUDDY))
+	if (sched_feat(NEXT_BUDDY) && !(flags & WF_FORK))
 		set_next_buddy(pse);
 
 	/*
-- 
cgit v1.2.3


From 2f82af08fcc7dc01a7e98a49a5995a77e32a2925 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Mon, 14 Sep 2009 03:25:28 -0400
Subject: Nicolas Pitre has a new email address

Due to problems at cam.org, my nico@cam.org email address is no longer
valid.  FRom now on, nico@fluxnic.net should be used instead.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 CREDITS                                        | 2 +-
 Documentation/arm/SA1100/ADSBitsy              | 2 +-
 Documentation/arm/SA1100/Assabet               | 2 +-
 Documentation/arm/SA1100/Brutus                | 2 +-
 Documentation/arm/SA1100/GraphicsClient        | 4 ++--
 Documentation/arm/SA1100/GraphicsMaster        | 4 ++--
 Documentation/arm/SA1100/Victor                | 2 +-
 MAINTAINERS                                    | 4 ++--
 arch/arm/boot/compressed/head-sa1100.S         | 2 +-
 arch/arm/lib/lib1funcs.S                       | 2 +-
 arch/arm/lib/sha1.S                            | 2 +-
 arch/arm/mach-sa1100/include/mach/assabet.h    | 2 +-
 arch/arm/mach-sa1100/include/mach/hardware.h   | 2 +-
 arch/arm/mach-sa1100/include/mach/memory.h     | 2 +-
 arch/arm/mach-sa1100/include/mach/neponset.h   | 2 +-
 arch/arm/mach-sa1100/include/mach/system.h     | 2 +-
 arch/arm/mach-sa1100/include/mach/uncompress.h | 2 +-
 arch/arm/mach-sa1100/pm.c                      | 2 +-
 arch/arm/mach-sa1100/time.c                    | 2 +-
 arch/arm/mm/proc-xscale.S                      | 2 +-
 arch/arm/plat-iop/setup.c                      | 2 +-
 arch/arm/plat-omap/include/mach/system.h       | 2 +-
 drivers/input/keyboard/pxa27x_keypad.c         | 2 +-
 drivers/mtd/chips/cfi_cmdset_0001.c            | 2 +-
 drivers/mtd/chips/cfi_cmdset_0020.c            | 2 +-
 drivers/mtd/maps/bfin-async-flash.c            | 2 +-
 drivers/mtd/maps/ceiva.c                       | 2 +-
 drivers/mtd/maps/dc21285.c                     | 4 ++--
 drivers/mtd/maps/ipaq-flash.c                  | 2 +-
 drivers/mtd/maps/pxa2xx-flash.c                | 2 +-
 drivers/mtd/maps/sa1100-flash.c                | 2 +-
 drivers/mtd/mtdblock.c                         | 4 ++--
 drivers/mtd/mtdpart.c                          | 2 +-
 drivers/net/smc91x.c                           | 4 ++--
 drivers/net/smc91x.h                           | 2 +-
 drivers/rtc/rtc-sa1100.c                       | 2 +-
 drivers/video/sa1100fb.c                       | 2 +-
 include/linux/mtd/partitions.h                 | 2 +-
 lib/inflate.c                                  | 2 +-
 39 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/CREDITS b/CREDITS
index 1a41bf4addd0..72b487869788 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2800,7 +2800,7 @@ D: Starter of Linux1394 effort
 S: ask per mail for current address
 
 N: Nicolas Pitre
-E: nico@cam.org
+E: nico@fluxnic.net
 D: StrongARM SA1100 support integrator & hacker
 D: Xscale PXA architecture
 D: unified SMC 91C9x/91C11x ethernet driver (smc91x)
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/SA1100/ADSBitsy
index ab47c3833908..7197a9e958ee 100644
--- a/Documentation/arm/SA1100/ADSBitsy
+++ b/Documentation/arm/SA1100/ADSBitsy
@@ -40,4 +40,4 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
index 78bc1c1b04e5..91f7ce7ba426 100644
--- a/Documentation/arm/SA1100/Assabet
+++ b/Documentation/arm/SA1100/Assabet
@@ -240,7 +240,7 @@ Then, rebooting the Assabet is just a matter of waiting for the login prompt.
 
 
 Nicolas Pitre
-nico@cam.org
+nico@fluxnic.net
 June 12, 2001
 
 
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/SA1100/Brutus
index 2254c8f0b326..b1cfd405dccc 100644
--- a/Documentation/arm/SA1100/Brutus
+++ b/Documentation/arm/SA1100/Brutus
@@ -60,7 +60,7 @@ little modifications.
 
 Any contribution is welcome.
 
-Please send patches to nico@cam.org
+Please send patches to nico@fluxnic.net
 
 Have Fun !
 
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/SA1100/GraphicsClient
index 8fa7e8027ff1..6c9c4f5a36e1 100644
--- a/Documentation/arm/SA1100/GraphicsClient
+++ b/Documentation/arm/SA1100/GraphicsClient
@@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
 http://www.applieddata.net/products.html
 
 The original Linux support for this product has been provided by 
-Nicolas Pitre <nico@cam.org>. Continued development work by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
 Woojung Huh <whuh@applieddata.net>
 
 It's currently possible to mount a root filesystem via NFS providing a
@@ -94,5 +94,5 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
 
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/SA1100/GraphicsMaster
index dd28745ac521..ee7c6595f23f 100644
--- a/Documentation/arm/SA1100/GraphicsMaster
+++ b/Documentation/arm/SA1100/GraphicsMaster
@@ -4,7 +4,7 @@ For more details, contact Applied Data Systems or see
 http://www.applieddata.net/products.html
 
 The original Linux support for this product has been provided by
-Nicolas Pitre <nico@cam.org>. Continued development work by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
 Woojung Huh <whuh@applieddata.net>
 
 Use 'make graphicsmaster_config' before any 'make config'.
@@ -50,4 +50,4 @@ Notes:
   mode, the timing is off so the image is corrupted.  This will be
   fixed soon.
 
-Any contribution can be sent to nico@cam.org and will be greatly welcome!
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Victor b/Documentation/arm/SA1100/Victor
index 01e81fc49461..f938a29fdc20 100644
--- a/Documentation/arm/SA1100/Victor
+++ b/Documentation/arm/SA1100/Victor
@@ -9,7 +9,7 @@ Of course Victor is using Linux as its main operating system.
 The Victor implementation for Linux is maintained by Nicolas Pitre:
 
 	nico@visuaide.com
-	nico@cam.org
+	nico@fluxnic.net
 
 For any comments, please feel free to contact me through the above
 addresses.
diff --git a/MAINTAINERS b/MAINTAINERS
index 837b5985ac40..64b9e447545c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3317,7 +3317,7 @@ S:	Supported
 F:	drivers/net/wireless/mwl8k.c
 
 MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER
-M:	Nicolas Pitre <nico@cam.org>
+M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Maintained
 
 MARVELL YUKON / SYSKONNECT DRIVER
@@ -4689,7 +4689,7 @@ F:	include/linux/sl?b*.h
 F:	mm/sl?b.c
 
 SMC91x ETHERNET DRIVER
-M:	Nicolas Pitre <nico@cam.org>
+M:	Nicolas Pitre <nico@fluxnic.net>
 S:	Maintained
 F:	drivers/net/smc91x.*
 
diff --git a/arch/arm/boot/compressed/head-sa1100.S b/arch/arm/boot/compressed/head-sa1100.S
index 4c8c0e46027d..6179d94dd5c6 100644
--- a/arch/arm/boot/compressed/head-sa1100.S
+++ b/arch/arm/boot/compressed/head-sa1100.S
@@ -1,7 +1,7 @@
 /* 
  * linux/arch/arm/boot/compressed/head-sa1100.S
  * 
- * Copyright (C) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1999 Nicolas Pitre <nico@fluxnic.net>
  * 
  * SA1100 specific tweaks.  This is merged into head.S by the linker.
  *
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 67964bcfc854..6dc06487f3c3 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -1,7 +1,7 @@
 /*
  * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
  *
- * Author: Nicolas Pitre <nico@cam.org>
+ * Author: Nicolas Pitre <nico@fluxnic.net>
  *   - contributed to gcc-3.4 on Sep 30, 2003
  *   - adapted for the Linux kernel on Oct 2, 2003
  */
diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S
index 09b548cac1a4..eb0edb80d7b8 100644
--- a/arch/arm/lib/sha1.S
+++ b/arch/arm/lib/sha1.S
@@ -3,7 +3,7 @@
  *
  *  SHA transform optimized for ARM
  *
- *  Copyright:	(C) 2005 by Nicolas Pitre <nico@cam.org>
+ *  Copyright:	(C) 2005 by Nicolas Pitre <nico@fluxnic.net>
  *  Created:	September 17, 2005
  *
  *  This program is free software; you can redistribute it and/or modify
diff --git a/arch/arm/mach-sa1100/include/mach/assabet.h b/arch/arm/mach-sa1100/include/mach/assabet.h
index 3959b20d5d1c..28c2cf50c259 100644
--- a/arch/arm/mach-sa1100/include/mach/assabet.h
+++ b/arch/arm/mach-sa1100/include/mach/assabet.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/assabet.h
  *
- * Created 2000/06/05 by Nicolas Pitre <nico@cam.org>
+ * Created 2000/06/05 by Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware specific definitions for Assabet
  * Only include this file from SA1100-specific files.
diff --git a/arch/arm/mach-sa1100/include/mach/hardware.h b/arch/arm/mach-sa1100/include/mach/hardware.h
index 60711822b125..99f5856d8de4 100644
--- a/arch/arm/mach-sa1100/include/mach/hardware.h
+++ b/arch/arm/mach-sa1100/include/mach/hardware.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/hardware.h
  *
- * Copyright (C) 1998 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1998 Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware definitions for SA1100 architecture
  *
diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h
index e9f8eed900f5..d5277f9bee77 100644
--- a/arch/arm/mach-sa1100/include/mach/memory.h
+++ b/arch/arm/mach-sa1100/include/mach/memory.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/memory.h
  *
- * Copyright (C) 1999-2000 Nicolas Pitre <nico@cam.org>
+ * Copyright (C) 1999-2000 Nicolas Pitre <nico@fluxnic.net>
  */
 
 #ifndef __ASM_ARCH_MEMORY_H
diff --git a/arch/arm/mach-sa1100/include/mach/neponset.h b/arch/arm/mach-sa1100/include/mach/neponset.h
index d3f044f92c00..ffe2bc45eed0 100644
--- a/arch/arm/mach-sa1100/include/mach/neponset.h
+++ b/arch/arm/mach-sa1100/include/mach/neponset.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/neponset.h
  *
- * Created 2000/06/05 by Nicolas Pitre <nico@cam.org>
+ * Created 2000/06/05 by Nicolas Pitre <nico@fluxnic.net>
  *
  * This file contains the hardware specific definitions for Assabet
  * Only include this file from SA1100-specific files.
diff --git a/arch/arm/mach-sa1100/include/mach/system.h b/arch/arm/mach-sa1100/include/mach/system.h
index 942b153e251d..ba9da9f7f183 100644
--- a/arch/arm/mach-sa1100/include/mach/system.h
+++ b/arch/arm/mach-sa1100/include/mach/system.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/system.h
  *
- * Copyright (c) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (c) 1999 Nicolas Pitre <nico@fluxnic.net>
  */
 #include <mach/hardware.h>
 
diff --git a/arch/arm/mach-sa1100/include/mach/uncompress.h b/arch/arm/mach-sa1100/include/mach/uncompress.h
index 714160b03d7a..6cb39ddde656 100644
--- a/arch/arm/mach-sa1100/include/mach/uncompress.h
+++ b/arch/arm/mach-sa1100/include/mach/uncompress.h
@@ -1,7 +1,7 @@
 /*
  * arch/arm/mach-sa1100/include/mach/uncompress.h
  *
- * (C) 1999 Nicolas Pitre <nico@cam.org>
+ * (C) 1999 Nicolas Pitre <nico@fluxnic.net>
  *
  * Reorganised to be machine independent.
  */
diff --git a/arch/arm/mach-sa1100/pm.c b/arch/arm/mach-sa1100/pm.c
index 111cce67ad2f..c83fdc80edfd 100644
--- a/arch/arm/mach-sa1100/pm.c
+++ b/arch/arm/mach-sa1100/pm.c
@@ -15,7 +15,7 @@
  * 			Save more value for the resume function! Support
  * 			Bitsy/Assabet/Freebird board
  *
- * 2001-08-29:	Nicolas Pitre <nico@cam.org>
+ * 2001-08-29:	Nicolas Pitre <nico@fluxnic.net>
  * 			Cleaned up, pushed platform dependent stuff
  * 			in the platform specific files.
  *
diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c
index 711c0295c66f..95d92e8e56a8 100644
--- a/arch/arm/mach-sa1100/time.c
+++ b/arch/arm/mach-sa1100/time.c
@@ -4,7 +4,7 @@
  * Copyright (C) 1998 Deborah Wallach.
  * Twiddles  (C) 1999 Hugo Fiennes <hugo@empeg.com>
  *
- * 2000/03/29 (C) Nicolas Pitre <nico@cam.org>
+ * 2000/03/29 (C) Nicolas Pitre <nico@fluxnic.net>
  *	Rewritten: big cleanup, much simpler, better HZ accuracy.
  *
  */
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index 0cce37b93937..423394260bcb 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -17,7 +17,7 @@
  *
  * 2001 Sep 08:
  *	Completely revisited, many important fixes
- *	Nicolas Pitre <nico@cam.org>
+ *	Nicolas Pitre <nico@fluxnic.net>
  */
 
 #include <linux/linkage.h>
diff --git a/arch/arm/plat-iop/setup.c b/arch/arm/plat-iop/setup.c
index 9e573e78176a..bade586fed0f 100644
--- a/arch/arm/plat-iop/setup.c
+++ b/arch/arm/plat-iop/setup.c
@@ -1,7 +1,7 @@
 /*
  * arch/arm/plat-iop/setup.c
  *
- * Author: Nicolas Pitre <nico@cam.org>
+ * Author: Nicolas Pitre <nico@fluxnic.net>
  * Copyright (C) 2001 MontaVista Software, Inc.
  * Copyright (C) 2004 Intel Corporation.
  *
diff --git a/arch/arm/plat-omap/include/mach/system.h b/arch/arm/plat-omap/include/mach/system.h
index 1060e345423b..ed8ec7477261 100644
--- a/arch/arm/plat-omap/include/mach/system.h
+++ b/arch/arm/plat-omap/include/mach/system.h
@@ -1,6 +1,6 @@
 /*
  * Copied from arch/arm/mach-sa1100/include/mach/system.h
- * Copyright (c) 1999 Nicolas Pitre <nico@cam.org>
+ * Copyright (c) 1999 Nicolas Pitre <nico@fluxnic.net>
  */
 #ifndef __ASM_ARCH_SYSTEM_H
 #define __ASM_ARCH_SYSTEM_H
diff --git a/drivers/input/keyboard/pxa27x_keypad.c b/drivers/input/keyboard/pxa27x_keypad.c
index 76f9668221a4..79cd3e9fdf2e 100644
--- a/drivers/input/keyboard/pxa27x_keypad.c
+++ b/drivers/input/keyboard/pxa27x_keypad.c
@@ -8,7 +8,7 @@
  *
  * Based on a previous implementations by Kevin O'Connor
  * <kevin_at_koconnor.net> and Alex Osborne <bobofdoom@gmail.com> and
- * on some suggestions by Nicolas Pitre <nico@cam.org>.
+ * on some suggestions by Nicolas Pitre <nico@fluxnic.net>.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 8664feebc93b..e7563a9872d0 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -5,7 +5,7 @@
  * (C) 2000 Red Hat. GPL'd
  *
  *
- * 10/10/2000	Nicolas Pitre <nico@cam.org>
+ * 10/10/2000	Nicolas Pitre <nico@fluxnic.net>
  * 	- completely revamped method functions so they are aware and
  * 	  independent of the flash geometry (buswidth, interleave, etc.)
  * 	- scalability vs code size is completely set at compile-time
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 6c740f346f91..0667a671525d 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -4,7 +4,7 @@
  *
  * (C) 2000 Red Hat. GPL'd
  *
- * 10/10/2000	Nicolas Pitre <nico@cam.org>
+ * 10/10/2000	Nicolas Pitre <nico@fluxnic.net>
  * 	- completely revamped method functions so they are aware and
  * 	  independent of the flash geometry (buswidth, interleave, etc.)
  * 	- scalability vs code size is completely set at compile-time
diff --git a/drivers/mtd/maps/bfin-async-flash.c b/drivers/mtd/maps/bfin-async-flash.c
index 365c77b1b871..a7c808b577d3 100644
--- a/drivers/mtd/maps/bfin-async-flash.c
+++ b/drivers/mtd/maps/bfin-async-flash.c
@@ -6,7 +6,7 @@
  * for example.  All board-specific configuration goes in your
  * board resources file.
  *
- * Copyright 2000 Nicolas Pitre <nico@cam.org>
+ * Copyright 2000 Nicolas Pitre <nico@fluxnic.net>
  * Copyright 2005-2008 Analog Devices Inc.
  *
  * Enter bugs at http://blackfin.uclinux.org/
diff --git a/drivers/mtd/maps/ceiva.c b/drivers/mtd/maps/ceiva.c
index 60e68bde0fea..d41f34766e53 100644
--- a/drivers/mtd/maps/ceiva.c
+++ b/drivers/mtd/maps/ceiva.c
@@ -9,7 +9,7 @@
  * Based on: sa1100-flash.c, which has the following copyright:
  * Flash memory access on SA11x0 based devices
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  */
 
diff --git a/drivers/mtd/maps/dc21285.c b/drivers/mtd/maps/dc21285.c
index 42969fe051b2..b3cb3a183809 100644
--- a/drivers/mtd/maps/dc21285.c
+++ b/drivers/mtd/maps/dc21285.c
@@ -1,7 +1,7 @@
 /*
  * MTD map driver for flash on the DC21285 (the StrongARM-110 companion chip)
  *
- * (C) 2000  Nicolas Pitre <nico@cam.org>
+ * (C) 2000  Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  */
@@ -249,5 +249,5 @@ module_exit(cleanup_dc21285);
 
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net>");
 MODULE_DESCRIPTION("MTD map driver for DC21285 boards");
diff --git a/drivers/mtd/maps/ipaq-flash.c b/drivers/mtd/maps/ipaq-flash.c
index 748c85f635f1..76708e796b70 100644
--- a/drivers/mtd/maps/ipaq-flash.c
+++ b/drivers/mtd/maps/ipaq-flash.c
@@ -1,7 +1,7 @@
 /*
  * Flash memory access on iPAQ Handhelds (either SA1100 or PXA250 based)
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  * (C) 2002 Hewlett-Packard Company <jamey.hicks@hp.com>
  * (C) 2003 Christian Pellegrin <chri@ascensit.com>, <chri@infis.univ.ts.it>: concatenation of multiple flashes
  */
diff --git a/drivers/mtd/maps/pxa2xx-flash.c b/drivers/mtd/maps/pxa2xx-flash.c
index 643aa06b599e..74fa075c838a 100644
--- a/drivers/mtd/maps/pxa2xx-flash.c
+++ b/drivers/mtd/maps/pxa2xx-flash.c
@@ -175,5 +175,5 @@ module_init(init_pxa2xx_flash);
 module_exit(cleanup_pxa2xx_flash);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org>");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net>");
 MODULE_DESCRIPTION("MTD map driver for Intel XScale PXA2xx");
diff --git a/drivers/mtd/maps/sa1100-flash.c b/drivers/mtd/maps/sa1100-flash.c
index c6210f5118d1..fdb97f3d30e9 100644
--- a/drivers/mtd/maps/sa1100-flash.c
+++ b/drivers/mtd/maps/sa1100-flash.c
@@ -1,7 +1,7 @@
 /*
  * Flash memory access on SA11x0 based devices
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  */
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 77db5ce24d92..2d70295a5fa3 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -1,7 +1,7 @@
 /*
  * Direct MTD block device access
  *
- * (C) 2000-2003 Nicolas Pitre <nico@cam.org>
+ * (C) 2000-2003 Nicolas Pitre <nico@fluxnic.net>
  * (C) 1999-2003 David Woodhouse <dwmw2@infradead.org>
  */
 
@@ -403,5 +403,5 @@ module_exit(cleanup_mtdblock);
 
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Nicolas Pitre <nico@cam.org> et al.");
+MODULE_AUTHOR("Nicolas Pitre <nico@fluxnic.net> et al.");
 MODULE_DESCRIPTION("Caching read/erase/writeback block device emulation access to MTD devices");
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 349fcbe5cc0f..742504ea96f5 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -1,7 +1,7 @@
 /*
  * Simple MTD partitioning layer
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  *
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 61be6d7680f6..05c91ee6921e 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -35,7 +35,7 @@
  *
  * contributors:
  * 	Daris A Nevil <dnevil@snmc.com>
- *      Nicolas Pitre <nico@cam.org>
+ *      Nicolas Pitre <nico@fluxnic.net>
  *	Russell King <rmk@arm.linux.org.uk>
  *
  * History:
@@ -58,7 +58,7 @@
  *   22/09/04  Nicolas Pitre      big update (see commit log for details)
  */
 static const char version[] =
-	"smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@cam.org>\n";
+	"smc91x.c: v1.1, sep 22 2004 by Nicolas Pitre <nico@fluxnic.net>\n";
 
 /* Debugging level */
 #ifndef SMC_DEBUG
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index 57a159fac99f..784b631cfa3c 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -28,7 +28,7 @@
  . Authors
  .	Erik Stahlman		<erik@vt.edu>
  .	Daris A Nevil		<dnevil@snmc.com>
- .	Nicolas Pitre 		<nico@cam.org>
+ .	Nicolas Pitre 		<nico@fluxnic.net>
  .
  ---------------------------------------------------------------------------*/
 #ifndef _SMC91X_H_
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 4f247e4dd3f9..021b2928f0b9 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -9,7 +9,7 @@
  *
  * Modifications from:
  *   CIH <cih@coventive.com>
- *   Nicolas Pitre <nico@cam.org>
+ *   Nicolas Pitre <nico@fluxnic.net>
  *   Andrew Christian <andrew.christian@hp.com>
  *
  * Converted to the RTC subsystem and Driver Model
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index 10ddad8e17d6..cdaa873a6054 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -66,7 +66,7 @@
  *	- FrameBuffer memory is now allocated at run-time when the
  *	  driver is initialized.    
  *
- * 2000/04/10: Nicolas Pitre <nico@cam.org>
+ * 2000/04/10: Nicolas Pitre <nico@fluxnic.net>
  *	- Big cleanup for dynamic selection of machine type at run time.
  *
  * 2000/07/19: Jamey Hicks <jamey@crl.dec.com>
diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h
index b70313d33ff8..274b6196091d 100644
--- a/include/linux/mtd/partitions.h
+++ b/include/linux/mtd/partitions.h
@@ -1,7 +1,7 @@
 /*
  * MTD partitioning layer definitions
  *
- * (C) 2000 Nicolas Pitre <nico@cam.org>
+ * (C) 2000 Nicolas Pitre <nico@fluxnic.net>
  *
  * This code is GPL
  */
diff --git a/lib/inflate.c b/lib/inflate.c
index 1a8e8a978128..d10255973a9f 100644
--- a/lib/inflate.c
+++ b/lib/inflate.c
@@ -7,7 +7,7 @@
  * Adapted for booting Linux by Hannu Savolainen 1993
  * based on gzip-1.0.3 
  *
- * Nicolas Pitre <nico@cam.org>, 1999/04/14 :
+ * Nicolas Pitre <nico@fluxnic.net>, 1999/04/14 :
  *   Little mods for all variable to reside either into rodata or bss segments
  *   by marking constant variables with 'const' and initializing all the others
  *   at run-time only.  This allows for the kernel uncompressor to run
-- 
cgit v1.2.3


From b4028437876866aba4747a655ede00f892089e14 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 11 May 2009 14:16:57 -0700
Subject: Driver core: move dev_get/set_drvdata to drivers/base/dd.c

No one should directly access the driver_data field, so remove the field
and make it private.  We dynamically create the private field now if it
is needed, to handle drivers that call get/set before they are
registered with the driver core.

Also update the copyright notices on these files while we are there.

Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/base.h    |  5 +++++
 drivers/base/core.c    | 20 ++++++++++++++------
 drivers/base/dd.c      | 31 +++++++++++++++++++++++++++++--
 include/linux/device.h | 16 ++++------------
 4 files changed, 52 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/base/base.h b/drivers/base/base.h
index 1e52c125f437..503d59c57501 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -70,6 +70,8 @@ struct class_private {
  * @knode_parent - node in sibling list
  * @knode_driver - node in driver list
  * @knode_bus - node in bus list
+ * @driver_data - private pointer for driver specific info.  Will turn into a
+ * list soon.
  * @device - pointer back to the struct class that this structure is
  * associated with.
  *
@@ -80,6 +82,7 @@ struct device_private {
 	struct klist_node knode_parent;
 	struct klist_node knode_driver;
 	struct klist_node knode_bus;
+	void *driver_data;
 	struct device *device;
 };
 #define to_device_private_parent(obj)	\
@@ -89,6 +92,8 @@ struct device_private {
 #define to_device_private_bus(obj)	\
 	container_of(obj, struct device_private, knode_bus)
 
+extern int device_private_init(struct device *dev);
+
 /* initialisation functions */
 extern int devices_init(void);
 extern int buses_init(void);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index c34774d0b9d3..99dfe96fffcb 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -843,6 +843,17 @@ static void device_remove_sys_dev_entry(struct device *dev)
 	}
 }
 
+int device_private_init(struct device *dev)
+{
+	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
+	if (!dev->p)
+		return -ENOMEM;
+	dev->p->device = dev;
+	klist_init(&dev->p->klist_children, klist_children_get,
+		   klist_children_put);
+	return 0;
+}
+
 /**
  * device_add - add device to device hierarchy.
  * @dev: device.
@@ -868,14 +879,11 @@ int device_add(struct device *dev)
 	if (!dev)
 		goto done;
 
-	dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL);
 	if (!dev->p) {
-		error = -ENOMEM;
-		goto done;
+		error = device_private_init(dev);
+		if (error)
+			goto done;
 	}
-	dev->p->device = dev;
-	klist_init(&dev->p->klist_children, klist_children_get,
-		   klist_children_put);
 
 	/*
 	 * for statically allocated devices, which should all be converted
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 7b34b3a48f67..979d159b5cd1 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -11,8 +11,8 @@
  *
  * Copyright (c) 2002-5 Patrick Mochel
  * Copyright (c) 2002-3 Open Source Development Labs
- * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de>
- * Copyright (c) 2007 Novell Inc.
+ * Copyright (c) 2007-2009 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2007-2009 Novell Inc.
  *
  * This file is released under the GPLv2
  */
@@ -391,3 +391,30 @@ void driver_detach(struct device_driver *drv)
 		put_device(dev);
 	}
 }
+
+/*
+ * These exports can't be _GPL due to .h files using this within them, and it
+ * might break something that was previously working...
+ */
+void *dev_get_drvdata(const struct device *dev)
+{
+	if (dev && dev->p)
+		return dev->p->driver_data;
+	return NULL;
+}
+EXPORT_SYMBOL(dev_get_drvdata);
+
+void dev_set_drvdata(struct device *dev, void *data)
+{
+	int error;
+
+	if (!dev)
+		return;
+	if (!dev->p) {
+		error = device_private_init(dev);
+		if (error)
+			return;
+	}
+	dev->p->driver_data = data;
+}
+EXPORT_SYMBOL(dev_set_drvdata);
diff --git a/include/linux/device.h b/include/linux/device.h
index a28642975053..c0bd23048be0 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -2,7 +2,8 @@
  * device.h - generic, centralized driver model
  *
  * Copyright (c) 2001-2003 Patrick Mochel <mochel@osdl.org>
- * Copyright (c) 2004-2007 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2004-2009 Greg Kroah-Hartman <gregkh@suse.de>
+ * Copyright (c) 2008-2009 Novell Inc.
  *
  * This file is released under the GPLv2
  *
@@ -381,7 +382,6 @@ struct device {
 	struct bus_type	*bus;		/* type of bus device is on */
 	struct device_driver *driver;	/* which driver has allocated this
 					   device */
-	void		*driver_data;	/* data private to the driver */
 	void		*platform_data;	/* Platform specific data, device
 					   core doesn't touch it */
 	struct dev_pm_info	power;
@@ -447,16 +447,6 @@ static inline void set_dev_node(struct device *dev, int node)
 }
 #endif
 
-static inline void *dev_get_drvdata(const struct device *dev)
-{
-	return dev->driver_data;
-}
-
-static inline void dev_set_drvdata(struct device *dev, void *data)
-{
-	dev->driver_data = data;
-}
-
 static inline unsigned int dev_get_uevent_suppress(const struct device *dev)
 {
 	return dev->kobj.uevent_suppress;
@@ -490,6 +480,8 @@ extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
 extern const char *device_get_nodename(struct device *dev, const char **tmp);
+extern void *dev_get_drvdata(const struct device *dev);
+extern void dev_set_drvdata(struct device *dev, void *data);
 
 /*
  * Root device objects for grouping under /sys/devices
-- 
cgit v1.2.3


From a5b8b1ada6dd76503884f5492b995cd29eefae0f Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 17 Jul 2009 15:06:08 +0100
Subject: Driver core: Add accessor for device platform data

For consistency with driver data provide a dev_get_platdata() accessor
for reading the platform data from a device.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/device.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index c0bd23048be0..3f33f17f556c 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -494,6 +494,11 @@ static inline struct device *root_device_register(const char *name)
 }
 extern void root_device_unregister(struct device *root);
 
+static inline void *dev_get_platdata(const struct device *dev)
+{
+	return dev->platform_data;
+}
+
 /*
  * Manual binding of a device to driver. See drivers/base/bus.c
  * for information on use.
-- 
cgit v1.2.3


From a4dbd6740df0872cdf0a86841f75beec8381964d Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Wed, 24 Jun 2009 10:06:31 -0700
Subject: driver model: constify attribute groups

Let attribute group vectors be declared "const".  We'd
like to let most attribute metadata live in read-only
sections... this is a start.

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 block/genhd.c                              | 2 +-
 drivers/base/core.c                        | 4 ++--
 drivers/base/driver.c                      | 4 ++--
 drivers/block/cciss.c                      | 2 +-
 drivers/firewire/core-device.c             | 2 +-
 drivers/firmware/dmi-id.c                  | 2 +-
 drivers/infiniband/hw/ehca/ehca_main.c     | 2 +-
 drivers/infiniband/hw/ipath/ipath_kernel.h | 2 +-
 drivers/infiniband/hw/ipath/ipath_sysfs.c  | 2 +-
 drivers/input/input.c                      | 2 +-
 drivers/misc/enclosure.c                   | 4 ++--
 drivers/mmc/core/mmc.c                     | 2 +-
 drivers/mmc/core/sd.c                      | 2 +-
 drivers/mtd/mtdcore.c                      | 2 +-
 drivers/s390/cio/css.c                     | 2 +-
 drivers/s390/cio/device.c                  | 2 +-
 drivers/s390/net/netiucv.c                 | 2 +-
 drivers/scsi/scsi_priv.h                   | 2 +-
 drivers/scsi/scsi_sysfs.c                  | 4 ++--
 drivers/usb/core/endpoint.c                | 2 +-
 drivers/usb/core/sysfs.c                   | 4 ++--
 drivers/usb/core/usb.h                     | 4 ++--
 drivers/uwb/lc-dev.c                       | 2 +-
 fs/partitions/check.c                      | 2 +-
 include/linux/attribute_container.h        | 2 +-
 include/linux/device.h                     | 6 +++---
 include/linux/netdevice.h                  | 2 +-
 include/linux/transport_class.h            | 2 +-
 net/bluetooth/hci_sysfs.c                  | 4 ++--
 net/core/net-sysfs.c                       | 2 +-
 30 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index 5b76bf55d05c..2ad91ddad8e2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -903,7 +903,7 @@ static struct attribute_group disk_attr_group = {
 	.attrs = disk_attrs,
 };
 
-static struct attribute_group *disk_attr_groups[] = {
+static const struct attribute_group *disk_attr_groups[] = {
 	&disk_attr_group,
 	NULL
 };
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 99dfe96fffcb..a992985d1fab 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -341,7 +341,7 @@ static void device_remove_attributes(struct device *dev,
 }
 
 static int device_add_groups(struct device *dev,
-			     struct attribute_group **groups)
+			     const struct attribute_group **groups)
 {
 	int error = 0;
 	int i;
@@ -361,7 +361,7 @@ static int device_add_groups(struct device *dev,
 }
 
 static void device_remove_groups(struct device *dev,
-				 struct attribute_group **groups)
+				 const struct attribute_group **groups)
 {
 	int i;
 
diff --git a/drivers/base/driver.c b/drivers/base/driver.c
index 8ae0f63602e0..ed2ebd3c287d 100644
--- a/drivers/base/driver.c
+++ b/drivers/base/driver.c
@@ -181,7 +181,7 @@ void put_driver(struct device_driver *drv)
 EXPORT_SYMBOL_GPL(put_driver);
 
 static int driver_add_groups(struct device_driver *drv,
-			     struct attribute_group **groups)
+			     const struct attribute_group **groups)
 {
 	int error = 0;
 	int i;
@@ -201,7 +201,7 @@ static int driver_add_groups(struct device_driver *drv,
 }
 
 static void driver_remove_groups(struct device_driver *drv,
-				 struct attribute_group **groups)
+				 const struct attribute_group **groups)
 {
 	int i;
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 0589dfbbd7db..d8372b432826 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -572,7 +572,7 @@ static struct attribute_group cciss_dev_attr_group = {
 	.attrs = cciss_dev_attrs,
 };
 
-static struct attribute_group *cciss_dev_attr_groups[] = {
+static const struct attribute_group *cciss_dev_attr_groups[] = {
 	&cciss_dev_attr_group,
 	NULL
 };
diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c
index 97e656af2d22..9d0dfcbe2c1c 100644
--- a/drivers/firewire/core-device.c
+++ b/drivers/firewire/core-device.c
@@ -312,7 +312,7 @@ static void init_fw_attribute_group(struct device *dev,
 	group->groups[0] = &group->group;
 	group->groups[1] = NULL;
 	group->group.attrs = group->attrs;
-	dev->groups = group->groups;
+	dev->groups = (const struct attribute_group **) group->groups;
 }
 
 static ssize_t modalias_show(struct device *dev,
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 5a76d056b9d0..dbdf6fadfc79 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -139,7 +139,7 @@ static struct attribute_group sys_dmi_attribute_group = {
 	.attrs = sys_dmi_attributes,
 };
 
-static struct attribute_group* sys_dmi_attribute_groups[] = {
+static const struct attribute_group* sys_dmi_attribute_groups[] = {
 	&sys_dmi_attribute_group,
 	NULL
 };
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 5b635aa5947e..fb2d83c5bf01 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -623,7 +623,7 @@ static struct attribute_group ehca_drv_attr_grp = {
 	.attrs = ehca_drv_attrs
 };
 
-static struct attribute_group *ehca_drv_attr_groups[] = {
+static const struct attribute_group *ehca_drv_attr_groups[] = {
 	&ehca_drv_attr_grp,
 	NULL,
 };
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 6ba4861dd6ac..b3d7efcdf021 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -1286,7 +1286,7 @@ struct device_driver;
 
 extern const char ib_ipath_version[];
 
-extern struct attribute_group *ipath_driver_attr_groups[];
+extern const struct attribute_group *ipath_driver_attr_groups[];
 
 int ipath_device_create_group(struct device *, struct ipath_devdata *);
 void ipath_device_remove_group(struct device *, struct ipath_devdata *);
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index a6c8efbdc0c9..b8cb2f145ae4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -1069,7 +1069,7 @@ static ssize_t show_tempsense(struct device *dev,
 	return ret;
 }
 
-struct attribute_group *ipath_driver_attr_groups[] = {
+const struct attribute_group *ipath_driver_attr_groups[] = {
 	&driver_attr_group,
 	NULL,
 };
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7c237e6ac711..851791d955f3 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1144,7 +1144,7 @@ static struct attribute_group input_dev_caps_attr_group = {
 	.attrs	= input_dev_caps_attrs,
 };
 
-static struct attribute_group *input_dev_attr_groups[] = {
+static const struct attribute_group *input_dev_attr_groups[] = {
 	&input_dev_attr_group,
 	&input_dev_id_attr_group,
 	&input_dev_caps_attr_group,
diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c
index 7b039306037f..e9eae4a78402 100644
--- a/drivers/misc/enclosure.c
+++ b/drivers/misc/enclosure.c
@@ -238,7 +238,7 @@ static void enclosure_component_release(struct device *dev)
 	put_device(dev->parent);
 }
 
-static struct attribute_group *enclosure_groups[];
+static const struct attribute_group *enclosure_groups[];
 
 /**
  * enclosure_component_register - add a particular component to an enclosure
@@ -536,7 +536,7 @@ static struct attribute_group enclosure_group = {
 	.attrs = enclosure_component_attrs,
 };
 
-static struct attribute_group *enclosure_groups[] = {
+static const struct attribute_group *enclosure_groups[] = {
 	&enclosure_group,
 	NULL
 };
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 06084dbf1277..2fb9d5f271ea 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -276,7 +276,7 @@ static struct attribute_group mmc_std_attr_group = {
 	.attrs = mmc_std_attrs,
 };
 
-static struct attribute_group *mmc_attr_groups[] = {
+static const struct attribute_group *mmc_attr_groups[] = {
 	&mmc_std_attr_group,
 	NULL,
 };
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index cd81c395e164..7ad646fe077e 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -314,7 +314,7 @@ static struct attribute_group sd_std_attr_group = {
 	.attrs = sd_std_attrs,
 };
 
-static struct attribute_group *sd_attr_groups[] = {
+static const struct attribute_group *sd_attr_groups[] = {
 	&sd_std_attr_group,
 	NULL,
 };
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 00ebf7af7467..69007a6eff50 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -217,7 +217,7 @@ struct attribute_group mtd_group = {
 	.attrs		= mtd_attrs,
 };
 
-struct attribute_group *mtd_groups[] = {
+const struct attribute_group *mtd_groups[] = {
 	&mtd_group,
 	NULL,
 };
diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
index e995123fd805..393c73c47f87 100644
--- a/drivers/s390/cio/css.c
+++ b/drivers/s390/cio/css.c
@@ -266,7 +266,7 @@ static struct attribute_group subch_attr_group = {
 	.attrs = subch_attrs,
 };
 
-static struct attribute_group *default_subch_attr_groups[] = {
+static const struct attribute_group *default_subch_attr_groups[] = {
 	&subch_attr_group,
 	NULL,
 };
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 0f95405c2c5e..6527f3f34493 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -656,7 +656,7 @@ static struct attribute_group ccwdev_attr_group = {
 	.attrs = ccwdev_attrs,
 };
 
-static struct attribute_group *ccwdev_attr_groups[] = {
+static const struct attribute_group *ccwdev_attr_groups[] = {
 	&ccwdev_attr_group,
 	NULL,
 };
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 9215fbbccc08..a4b2c576144b 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2159,7 +2159,7 @@ static struct attribute_group netiucv_drv_attr_group = {
 	.attrs = netiucv_drv_attrs,
 };
 
-static struct attribute_group *netiucv_drv_attr_groups[] = {
+static const struct attribute_group *netiucv_drv_attr_groups[] = {
 	&netiucv_drv_attr_group,
 	NULL,
 };
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 021e503c8c44..1fbf7c78bba0 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -132,7 +132,7 @@ extern struct scsi_transport_template blank_transport_template;
 extern void __scsi_remove_device(struct scsi_device *);
 
 extern struct bus_type scsi_bus_type;
-extern struct attribute_group *scsi_sysfs_shost_attr_groups[];
+extern const struct attribute_group *scsi_sysfs_shost_attr_groups[];
 
 /* scsi_netlink.c */
 #ifdef CONFIG_SCSI_NETLINK
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 91482f2dcc50..fde54537d715 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -275,7 +275,7 @@ struct attribute_group scsi_shost_attr_group = {
 	.attrs =	scsi_sysfs_shost_attrs,
 };
 
-struct attribute_group *scsi_sysfs_shost_attr_groups[] = {
+const struct attribute_group *scsi_sysfs_shost_attr_groups[] = {
 	&scsi_shost_attr_group,
 	NULL
 };
@@ -745,7 +745,7 @@ static struct attribute_group scsi_sdev_attr_group = {
 	.attrs =	scsi_sdev_attrs,
 };
 
-static struct attribute_group *scsi_sdev_attr_groups[] = {
+static const struct attribute_group *scsi_sdev_attr_groups[] = {
 	&scsi_sdev_attr_group,
 	NULL
 };
diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c
index bc39fc40bbde..fdfaa7885515 100644
--- a/drivers/usb/core/endpoint.c
+++ b/drivers/usb/core/endpoint.c
@@ -154,7 +154,7 @@ static struct attribute *ep_dev_attrs[] = {
 static struct attribute_group ep_dev_attr_grp = {
 	.attrs = ep_dev_attrs,
 };
-static struct attribute_group *ep_dev_groups[] = {
+static const struct attribute_group *ep_dev_groups[] = {
 	&ep_dev_attr_grp,
 	NULL
 };
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index b5c72e458943..7ec3041ae79e 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -573,7 +573,7 @@ static struct attribute_group dev_string_attr_grp = {
 	.is_visible =	dev_string_attrs_are_visible,
 };
 
-struct attribute_group *usb_device_groups[] = {
+const struct attribute_group *usb_device_groups[] = {
 	&dev_attr_grp,
 	&dev_string_attr_grp,
 	NULL
@@ -799,7 +799,7 @@ static struct attribute_group intf_assoc_attr_grp = {
 	.is_visible =	intf_assoc_attrs_are_visible,
 };
 
-struct attribute_group *usb_interface_groups[] = {
+const struct attribute_group *usb_interface_groups[] = {
 	&intf_attr_grp,
 	&intf_assoc_attr_grp,
 	NULL
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index e2a8cfaade1d..c0e0ae2bb8e7 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -152,8 +152,8 @@ static inline int is_active(const struct usb_interface *f)
 extern const char *usbcore_name;
 
 /* sysfs stuff */
-extern struct attribute_group *usb_device_groups[];
-extern struct attribute_group *usb_interface_groups[];
+extern const struct attribute_group *usb_device_groups[];
+extern const struct attribute_group *usb_interface_groups[];
 
 /* usbfs stuff */
 extern struct mutex usbfs_mutex;
diff --git a/drivers/uwb/lc-dev.c b/drivers/uwb/lc-dev.c
index e9fe1bb7eb23..1097e81b56d0 100644
--- a/drivers/uwb/lc-dev.c
+++ b/drivers/uwb/lc-dev.c
@@ -255,7 +255,7 @@ static struct attribute_group dev_attr_group = {
 	.attrs = dev_attrs,
 };
 
-static struct attribute_group *groups[] = {
+static const struct attribute_group *groups[] = {
 	&dev_attr_group,
 	NULL,
 };
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 619ba99dfe39..fbeaddf595d3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -312,7 +312,7 @@ static struct attribute_group part_attr_group = {
 	.attrs = part_attrs,
 };
 
-static struct attribute_group *part_attr_groups[] = {
+static const struct attribute_group *part_attr_groups[] = {
 	&part_attr_group,
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	&blk_trace_attr_group,
diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h
index 794ad74b1d61..c3ab81428c66 100644
--- a/include/linux/attribute_container.h
+++ b/include/linux/attribute_container.h
@@ -17,7 +17,7 @@ struct attribute_container {
 	struct list_head	node;
 	struct klist		containers;
 	struct class		*class;
-	struct attribute_group	*grp;
+	const struct attribute_group *grp;
 	struct device_attribute **attrs;
 	int (*match)(struct attribute_container *, struct device *);
 #define	ATTRIBUTE_CONTAINER_NO_CLASSDEVS	0x01
diff --git a/include/linux/device.h b/include/linux/device.h
index 3f33f17f556c..e19e40a3dcbe 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -131,7 +131,7 @@ struct device_driver {
 	void (*shutdown) (struct device *dev);
 	int (*suspend) (struct device *dev, pm_message_t state);
 	int (*resume) (struct device *dev);
-	struct attribute_group **groups;
+	const struct attribute_group **groups;
 
 	const struct dev_pm_ops *pm;
 
@@ -288,7 +288,7 @@ extern void class_destroy(struct class *cls);
  */
 struct device_type {
 	const char *name;
-	struct attribute_group **groups;
+	const struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
 	char *(*nodename)(struct device *dev);
 	void (*release)(struct device *dev);
@@ -412,7 +412,7 @@ struct device {
 
 	struct klist_node	knode_class;
 	struct class		*class;
-	struct attribute_group	**groups;	/* optional groups */
+	const struct attribute_group **groups;	/* optional groups */
 
 	void	(*release)(struct device *dev);
 };
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 65ee1929b2b1..a9aa4b5917d7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -895,7 +895,7 @@ struct net_device
 	/* class/net/name entry */
 	struct device		dev;
 	/* space for optional statistics and wireless sysfs groups */
-	struct attribute_group  *sysfs_groups[3];
+	const struct attribute_group *sysfs_groups[3];
 
 	/* rtnetlink link ops */
 	const struct rtnl_link_ops *rtnl_link_ops;
diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h
index eaec1ea9558e..9ae8da3e6407 100644
--- a/include/linux/transport_class.h
+++ b/include/linux/transport_class.h
@@ -55,7 +55,7 @@ struct anon_transport_class cls = {				\
 
 struct transport_container {
 	struct attribute_container ac;
-	struct attribute_group *statistics;
+	const struct attribute_group *statistics;
 };
 
 #define attribute_container_to_transport_container(x) \
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 95f7a7a544b4..7f939ce29801 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -68,7 +68,7 @@ static struct attribute_group bt_link_group = {
 	.attrs = bt_link_attrs,
 };
 
-static struct attribute_group *bt_link_groups[] = {
+static const struct attribute_group *bt_link_groups[] = {
 	&bt_link_group,
 	NULL
 };
@@ -392,7 +392,7 @@ static struct attribute_group bt_host_group = {
 	.attrs = bt_host_attrs,
 };
 
-static struct attribute_group *bt_host_groups[] = {
+static const struct attribute_group *bt_host_groups[] = {
 	&bt_host_group,
 	NULL
 };
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ad91e9e5f475..7d4c57523b09 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -493,7 +493,7 @@ void netdev_unregister_kobject(struct net_device * net)
 int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
-	struct attribute_group **groups = net->sysfs_groups;
+	const struct attribute_group **groups = net->sysfs_groups;
 
 	dev->class = &net_class;
 	dev->platform_data = net;
-- 
cgit v1.2.3


From ccb86a6907c9ba7b5be5f521362fc308e80bed34 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 20 Jul 2009 10:29:34 +0300
Subject: uio: add generic driver for PCI 2.3 devices

This adds a generic uio driver that can bind to any PCI device.  First
user will be virtualization where a qemu userspace process needs to give
guest OS access to the device.

Interrupts are handled using the Interrupt Disable bit in the PCI
command register and Interrupt Status bit in the PCI status register.
All devices compliant to PCI 2.3 (circa 2002) and all compliant PCI
Express devices should support these bits.  Driver detects this support,
and won't bind to devices which do not support the Interrupt Disable Bit
in the command register.

It's expected that more features of interest to virtualization will be
added to this driver in the future. Possibilities are: mmap for device
resources, MSI/MSI-X, eventfd (to interface with kvm), iommu.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/DocBook/uio-howto.tmpl | 163 +++++++++++++++++++++++++++
 MAINTAINERS                          |   7 ++
 drivers/uio/Kconfig                  |  10 ++
 drivers/uio/Makefile                 |   1 +
 drivers/uio/uio_pci_generic.c        | 207 +++++++++++++++++++++++++++++++++++
 include/linux/pci_regs.h             |   1 +
 6 files changed, 389 insertions(+)
 create mode 100644 drivers/uio/uio_pci_generic.c

(limited to 'include')

diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index 8f6e3b2403c7..4d4ce0e61e42 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -25,6 +25,10 @@
 	<year>2006-2008</year>
 	<holder>Hans-Jürgen Koch.</holder>
 </copyright>
+<copyright>
+	<year>2009</year>
+	<holder>Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)</holder>
+</copyright>
 
 <legalnotice>
 <para>
@@ -41,6 +45,13 @@ GPL version 2.
 </abstract>
 
 <revhistory>
+	<revision>
+	<revnumber>0.9</revnumber>
+	<date>2009-07-16</date>
+	<authorinitials>mst</authorinitials>
+	<revremark>Added generic pci driver
+		</revremark>
+	</revision>
 	<revision>
 	<revnumber>0.8</revnumber>
 	<date>2008-12-24</date>
@@ -809,6 +820,158 @@ framework to set up sysfs files for this region. Simply leave it alone.
 
 </chapter>
 
+<chapter id="uio_pci_generic" xreflabel="Using Generic driver for PCI cards">
+<?dbhtml filename="uio_pci_generic.html"?>
+<title>Generic PCI UIO driver</title>
+	<para>
+	The generic driver is a kernel module named uio_pci_generic.
+	It can work with any device compliant to PCI 2.3 (circa 2002) and
+	any compliant PCI Express device. Using this, you only need to
+        write the userspace driver, removing the need to write
+        a hardware-specific kernel module.
+	</para>
+
+<sect1 id="uio_pci_generic_binding">
+<title>Making the driver recognize the device</title>
+	<para>
+Since the driver does not declare any device ids, it will not get loaded
+automatically and will not automatically bind to any devices, you must load it
+and allocate id to the driver yourself. For example:
+	<programlisting>
+ modprobe uio_pci_generic
+ echo &quot;8086 10f5&quot; &gt; /sys/bus/pci/drivers/uio_pci_generic/new_id
+	</programlisting>
+	</para>
+	<para>
+If there already is a hardware specific kernel driver for your device, the
+generic driver still won't bind to it, in this case if you want to use the
+generic driver (why would you?) you'll have to manually unbind the hardware
+specific driver and bind the generic driver, like this:
+	<programlisting>
+    echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/e1000e/unbind
+    echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/uio_pci_generic/bind
+	</programlisting>
+	</para>
+	<para>
+You can verify that the device has been bound to the driver
+by looking for it in sysfs, for example like the following:
+	<programlisting>
+    ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+	</programlisting>
+Which if successful should print
+	<programlisting>
+  .../0000:00:19.0/driver -&gt; ../../../bus/pci/drivers/uio_pci_generic
+	</programlisting>
+Note that the generic driver will not bind to old PCI 2.2 devices.
+If binding the device failed, run the following command:
+	<programlisting>
+  dmesg
+	</programlisting>
+and look in the output for failure reasons
+	</para>
+</sect1>
+
+<sect1 id="uio_pci_generic_internals">
+<title>Things to know about uio_pci_generic</title>
+	<para>
+Interrupts are handled using the Interrupt Disable bit in the PCI command
+register and Interrupt Status bit in the PCI status register.  All devices
+compliant to PCI 2.3 (circa 2002) and all compliant PCI Express devices should
+support these bits.  uio_pci_generic detects this support, and won't bind to
+devices which do not support the Interrupt Disable Bit in the command register.
+	</para>
+	<para>
+On each interrupt, uio_pci_generic sets the Interrupt Disable bit.
+This prevents the device from generating further interrupts
+until the bit is cleared. The userspace driver should clear this
+bit before blocking and waiting for more interrupts.
+	</para>
+</sect1>
+<sect1 id="uio_pci_generic_userspace">
+<title>Writing userspace driver using uio_pci_generic</title>
+	<para>
+Userspace driver can use pci sysfs interface, or the
+libpci libray that wraps it, to talk to the device and to
+re-enable interrupts by writing to the command register.
+	</para>
+</sect1>
+<sect1 id="uio_pci_generic_example">
+<title>Example code using uio_pci_generic</title>
+	<para>
+Here is some sample userspace driver code using uio_pci_generic:
+<programlisting>
+#include &lt;stdlib.h&gt;
+#include &lt;stdio.h&gt;
+#include &lt;unistd.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/stat.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;errno.h&gt;
+
+int main()
+{
+	int uiofd;
+	int configfd;
+	int err;
+	int i;
+	unsigned icount;
+	unsigned char command_high;
+
+	uiofd = open(&quot;/dev/uio0&quot;, O_RDONLY);
+	if (uiofd &lt; 0) {
+		perror(&quot;uio open:&quot;);
+		return errno;
+	}
+	configfd = open(&quot;/sys/class/uio/uio0/device/config&quot;, O_RDWR);
+	if (uiofd &lt; 0) {
+		perror(&quot;config open:&quot;);
+		return errno;
+	}
+
+	/* Read and cache command value */
+	err = pread(configfd, &amp;command_high, 1, 5);
+	if (err != 1) {
+		perror(&quot;command config read:&quot;);
+		return errno;
+	}
+	command_high &amp;= ~0x4;
+
+	for(i = 0;; ++i) {
+		/* Print out a message, for debugging. */
+		if (i == 0)
+			fprintf(stderr, &quot;Started uio test driver.\n&quot;);
+		else
+			fprintf(stderr, &quot;Interrupts: %d\n&quot;, icount);
+
+		/****************************************/
+		/* Here we got an interrupt from the
+		   device. Do something to it. */
+		/****************************************/
+
+		/* Re-enable interrupts. */
+		err = pwrite(configfd, &amp;command_high, 1, 5);
+		if (err != 1) {
+			perror(&quot;config write:&quot;);
+			break;
+		}
+
+		/* Wait for next interrupt. */
+		err = read(uiofd, &amp;icount, 4);
+		if (err != 4) {
+			perror(&quot;uio read:&quot;);
+			break;
+		}
+
+	}
+	return errno;
+}
+
+</programlisting>
+	</para>
+</sect1>
+
+</chapter>
+
 <appendix id="app1">
 <title>Further information</title>
 <itemizedlist>
diff --git a/MAINTAINERS b/MAINTAINERS
index 837b5985ac40..01193a4fe30e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2218,6 +2218,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git
 S:	Maintained
 F:	include/asm-generic
 
+GENERIC UIO DRIVER FOR PCI DEVICES
+M:	Michael S. Tsirkin <mst@redhat.com>
+L:	kvm@vger.kernel.org
+L:	linux-kernel@vger.kernel.org
+S:	Supported
+F:	drivers/uio/uio_pci_generic.c
+
 GFS2 FILE SYSTEM
 M:	Steven Whitehouse <swhiteho@redhat.com>
 L:	cluster-devel@redhat.com
diff --git a/drivers/uio/Kconfig b/drivers/uio/Kconfig
index 45200fd68534..8aa1955f35ed 100644
--- a/drivers/uio/Kconfig
+++ b/drivers/uio/Kconfig
@@ -84,4 +84,14 @@ config UIO_SERCOS3
 
 	  If you compile this as a module, it will be called uio_sercos3.
 
+config UIO_PCI_GENERIC
+	tristate "Generic driver for PCI 2.3 and PCI Express cards"
+	depends on PCI
+	default n
+	help
+	  Generic driver that you can bind, dynamically, to any
+	  PCI 2.3 compliant and PCI Express card. It is useful,
+	  primarily, for virtualization scenarios.
+	  If you compile this as a module, it will be called uio_pci_generic.
+
 endif
diff --git a/drivers/uio/Makefile b/drivers/uio/Makefile
index 5c2586d75797..73b2e7516729 100644
--- a/drivers/uio/Makefile
+++ b/drivers/uio/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_UIO_PDRV_GENIRQ)	+= uio_pdrv_genirq.o
 obj-$(CONFIG_UIO_SMX)	+= uio_smx.o
 obj-$(CONFIG_UIO_AEC)	+= uio_aec.o
 obj-$(CONFIG_UIO_SERCOS3)	+= uio_sercos3.o
+obj-$(CONFIG_UIO_PCI_GENERIC)	+= uio_pci_generic.o
diff --git a/drivers/uio/uio_pci_generic.c b/drivers/uio/uio_pci_generic.c
new file mode 100644
index 000000000000..313da35984af
--- /dev/null
+++ b/drivers/uio/uio_pci_generic.c
@@ -0,0 +1,207 @@
+/* uio_pci_generic - generic UIO driver for PCI 2.3 devices
+ *
+ * Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Since the driver does not declare any device ids, you must allocate
+ * id and bind the device to the driver yourself.  For example:
+ *
+ * # echo "8086 10f5" > /sys/bus/pci/drivers/uio_pci_generic/new_id
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/e1000e/unbind
+ * # echo -n 0000:00:19.0 > /sys/bus/pci/drivers/uio_pci_generic/bind
+ * # ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+ * .../0000:00:19.0/driver -> ../../../bus/pci/drivers/uio_pci_generic
+ *
+ * Driver won't bind to devices which do not support the Interrupt Disable Bit
+ * in the command register. All devices compliant to PCI 2.3 (circa 2002) and
+ * all compliant PCI Express devices should support this bit.
+ */
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/uio_driver.h>
+#include <linux/spinlock.h>
+
+#define DRIVER_VERSION	"0.01.0"
+#define DRIVER_AUTHOR	"Michael S. Tsirkin <mst@redhat.com>"
+#define DRIVER_DESC	"Generic UIO driver for PCI 2.3 devices"
+
+struct uio_pci_generic_dev {
+	struct uio_info info;
+	struct pci_dev *pdev;
+	spinlock_t lock; /* guards command register accesses */
+};
+
+static inline struct uio_pci_generic_dev *
+to_uio_pci_generic_dev(struct uio_info *info)
+{
+	return container_of(info, struct uio_pci_generic_dev, info);
+}
+
+/* Interrupt handler. Read/modify/write the command register to disable
+ * the interrupt. */
+static irqreturn_t irqhandler(int irq, struct uio_info *info)
+{
+	struct uio_pci_generic_dev *gdev = to_uio_pci_generic_dev(info);
+	struct pci_dev *pdev = gdev->pdev;
+	irqreturn_t ret = IRQ_NONE;
+	u32 cmd_status_dword;
+	u16 origcmd, newcmd, status;
+
+	/* We do a single dword read to retrieve both command and status.
+	 * Document assumptions that make this possible. */
+	BUILD_BUG_ON(PCI_COMMAND % 4);
+	BUILD_BUG_ON(PCI_COMMAND + 2 != PCI_STATUS);
+
+	spin_lock_irq(&gdev->lock);
+	pci_block_user_cfg_access(pdev);
+
+	/* Read both command and status registers in a single 32-bit operation.
+	 * Note: we could cache the value for command and move the status read
+	 * out of the lock if there was a way to get notified of user changes
+	 * to command register through sysfs. Should be good for shared irqs. */
+	pci_read_config_dword(pdev, PCI_COMMAND, &cmd_status_dword);
+	origcmd = cmd_status_dword;
+	status = cmd_status_dword >> 16;
+
+	/* Check interrupt status register to see whether our device
+	 * triggered the interrupt. */
+	if (!(status & PCI_STATUS_INTERRUPT))
+		goto done;
+
+	/* We triggered the interrupt, disable it. */
+	newcmd = origcmd | PCI_COMMAND_INTX_DISABLE;
+	if (newcmd != origcmd)
+		pci_write_config_word(pdev, PCI_COMMAND, newcmd);
+
+	/* UIO core will signal the user process. */
+	ret = IRQ_HANDLED;
+done:
+
+	pci_unblock_user_cfg_access(pdev);
+	spin_unlock_irq(&gdev->lock);
+	return ret;
+}
+
+/* Verify that the device supports Interrupt Disable bit in command register,
+ * per PCI 2.3, by flipping this bit and reading it back: this bit was readonly
+ * in PCI 2.2. */
+static int __devinit verify_pci_2_3(struct pci_dev *pdev)
+{
+	u16 orig, new;
+	int err = 0;
+
+	pci_block_user_cfg_access(pdev);
+	pci_read_config_word(pdev, PCI_COMMAND, &orig);
+	pci_write_config_word(pdev, PCI_COMMAND,
+			      orig ^ PCI_COMMAND_INTX_DISABLE);
+	pci_read_config_word(pdev, PCI_COMMAND, &new);
+	/* There's no way to protect against
+	 * hardware bugs or detect them reliably, but as long as we know
+	 * what the value should be, let's go ahead and check it. */
+	if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) {
+		err = -EBUSY;
+		dev_err(&pdev->dev, "Command changed from 0x%x to 0x%x: "
+			"driver or HW bug?\n", orig, new);
+		goto err;
+	}
+	if (!((new ^ orig) & PCI_COMMAND_INTX_DISABLE)) {
+		dev_warn(&pdev->dev, "Device does not support "
+			 "disabling interrupts: unable to bind.\n");
+		err = -ENODEV;
+		goto err;
+	}
+	/* Now restore the original value. */
+	pci_write_config_word(pdev, PCI_COMMAND, orig);
+err:
+	pci_unblock_user_cfg_access(pdev);
+	return err;
+}
+
+static int __devinit probe(struct pci_dev *pdev,
+			   const struct pci_device_id *id)
+{
+	struct uio_pci_generic_dev *gdev;
+	int err;
+
+	if (!pdev->irq) {
+		dev_warn(&pdev->dev, "No IRQ assigned to device: "
+			 "no support for interrupts?\n");
+		return -ENODEV;
+	}
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "%s: pci_enable_device failed: %d\n",
+			__func__, err);
+		return err;
+	}
+
+	err = verify_pci_2_3(pdev);
+	if (err)
+		goto err_verify;
+
+	gdev = kzalloc(sizeof(struct uio_pci_generic_dev), GFP_KERNEL);
+	if (!gdev) {
+		err = -ENOMEM;
+		goto err_alloc;
+	}
+
+	gdev->info.name = "uio_pci_generic";
+	gdev->info.version = DRIVER_VERSION;
+	gdev->info.irq = pdev->irq;
+	gdev->info.irq_flags = IRQF_SHARED;
+	gdev->info.handler = irqhandler;
+	gdev->pdev = pdev;
+	spin_lock_init(&gdev->lock);
+
+	if (uio_register_device(&pdev->dev, &gdev->info))
+		goto err_register;
+	pci_set_drvdata(pdev, gdev);
+
+	return 0;
+err_register:
+	kfree(gdev);
+err_alloc:
+err_verify:
+	pci_disable_device(pdev);
+	return err;
+}
+
+static void remove(struct pci_dev *pdev)
+{
+	struct uio_pci_generic_dev *gdev = pci_get_drvdata(pdev);
+
+	uio_unregister_device(&gdev->info);
+	pci_disable_device(pdev);
+	kfree(gdev);
+}
+
+static struct pci_driver driver = {
+	.name = "uio_pci_generic",
+	.id_table = NULL, /* only dynamic id's */
+	.probe = probe,
+	.remove = remove,
+};
+
+static int __init init(void)
+{
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	return pci_register_driver(&driver);
+}
+
+static void __exit cleanup(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(init);
+module_exit(cleanup);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index fcaee42c7ac2..dd0bed4f1cf0 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -42,6 +42,7 @@
 #define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
 
 #define PCI_STATUS		0x06	/* 16 bits */
+#define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
 #define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
 #define  PCI_STATUS_66MHZ	0x20	/* Support 66 Mhz PCI 2.1 bus */
 #define  PCI_STATUS_UDF		0x40	/* Support User Definable Features [obsolete] */
-- 
cgit v1.2.3


From 4622709445705c1e423d2addcfd8ccae052fe0ba Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 4 Aug 2009 12:55:34 +0200
Subject: Driver core: Add support for compatibility classes

When turning class devices into bus devices, we may need to
temporarily add links in sysfs so that user-space applications
are not confused. This is done by adding the following API:

* Functions to register and unregister compatibility classes.
  These appear in sysfs at the same location as regular classes, but
  instead of class devices, they contain links to bus devices.
* Functions to create and delete such links. Additionally, the caller
  can optionally pass a target device to which a "device" link should
  point (typically that would be the device's parent), to fully emulate
  the original class device.

The i2c subsystem will be the first user of this API, as i2c adapters
are being converted from class devices to bus devices.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
---
 drivers/base/class.c   | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h |  8 +++++
 2 files changed, 95 insertions(+)

(limited to 'include')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index eb85e4312301..161746deab4b 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -488,6 +488,93 @@ void class_interface_unregister(struct class_interface *class_intf)
 	class_put(parent);
 }
 
+struct class_compat {
+	struct kobject *kobj;
+};
+
+/**
+ * class_compat_register - register a compatibility class
+ * @name: the name of the class
+ *
+ * Compatibility class are meant as a temporary user-space compatibility
+ * workaround when converting a family of class devices to a bus devices.
+ */
+struct class_compat *class_compat_register(const char *name)
+{
+	struct class_compat *cls;
+
+	cls = kmalloc(sizeof(struct class_compat), GFP_KERNEL);
+	if (!cls)
+		return NULL;
+	cls->kobj = kobject_create_and_add(name, &class_kset->kobj);
+	if (!cls->kobj) {
+		kfree(cls);
+		return NULL;
+	}
+	return cls;
+}
+EXPORT_SYMBOL_GPL(class_compat_register);
+
+/**
+ * class_compat_unregister - unregister a compatibility class
+ * @cls: the class to unregister
+ */
+void class_compat_unregister(struct class_compat *cls)
+{
+	kobject_put(cls->kobj);
+	kfree(cls);
+}
+EXPORT_SYMBOL_GPL(class_compat_unregister);
+
+/**
+ * class_compat_create_link - create a compatibility class device link to
+ *			      a bus device
+ * @cls: the compatibility class
+ * @dev: the target bus device
+ * @device_link: an optional device to which a "device" link should be created
+ */
+int class_compat_create_link(struct class_compat *cls, struct device *dev,
+			     struct device *device_link)
+{
+	int error;
+
+	error = sysfs_create_link(cls->kobj, &dev->kobj, dev_name(dev));
+	if (error)
+		return error;
+
+	/*
+	 * Optionally add a "device" link (typically to the parent), as a
+	 * class device would have one and we want to provide as much
+	 * backwards compatibility as possible.
+	 */
+	if (device_link) {
+		error = sysfs_create_link(&dev->kobj, &device_link->kobj,
+					  "device");
+		if (error)
+			sysfs_remove_link(cls->kobj, dev_name(dev));
+	}
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(class_compat_create_link);
+
+/**
+ * class_compat_remove_link - remove a compatibility class device link to
+ *			      a bus device
+ * @cls: the compatibility class
+ * @dev: the target bus device
+ * @device_link: an optional device to which a "device" link was previously
+ * 		 created
+ */
+void class_compat_remove_link(struct class_compat *cls, struct device *dev,
+			      struct device *device_link)
+{
+	if (device_link)
+		sysfs_remove_link(&dev->kobj, "device");
+	sysfs_remove_link(cls->kobj, dev_name(dev));
+}
+EXPORT_SYMBOL_GPL(class_compat_remove_link);
+
 int __init classes_init(void)
 {
 	class_kset = kset_create_and_add("class", NULL, NULL);
diff --git a/include/linux/device.h b/include/linux/device.h
index e19e40a3dcbe..62ff53a67931 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -225,6 +225,14 @@ extern void class_unregister(struct class *class);
 	__class_register(class, &__key);	\
 })
 
+struct class_compat;
+struct class_compat *class_compat_register(const char *name);
+void class_compat_unregister(struct class_compat *cls);
+int class_compat_create_link(struct class_compat *cls, struct device *dev,
+			     struct device *device_link);
+void class_compat_remove_link(struct class_compat *cls, struct device *dev,
+			      struct device *device_link);
+
 extern void class_dev_iter_init(struct class_dev_iter *iter,
 				struct class *class,
 				struct device *start,
-- 
cgit v1.2.3


From 2b2af54a5bb6f7e80ccf78f20084b93c398c3a8b Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Thu, 30 Apr 2009 15:23:42 +0200
Subject: Driver Core: devtmpfs - kernel-maintained tmpfs-based /dev

Devtmpfs lets the kernel create a tmpfs instance called devtmpfs
very early at kernel initialization, before any driver-core device
is registered. Every device with a major/minor will provide a
device node in devtmpfs.

Devtmpfs can be changed and altered by userspace at any time,
and in any way needed - just like today's udev-mounted tmpfs.
Unmodified udev versions will run just fine on top of it, and will
recognize an already existing kernel-created device node and use it.
The default node permissions are root:root 0600. Proper permissions
and user/group ownership, meaningful symlinks, all other policy still
needs to be applied by userspace.

If a node is created by devtmps, devtmpfs will remove the device node
when the device goes away. If the device node was created by
userspace, or the devtmpfs created node was replaced by userspace, it
will no longer be removed by devtmpfs.

If it is requested to auto-mount it, it makes init=/bin/sh work
without any further userspace support. /dev will be fully populated
and dynamic, and always reflect the current device state of the kernel.
With the commonly used dynamic device numbers, it solves the problem
where static devices nodes may point to the wrong devices.

It is intended to make the initial bootup logic simpler and more robust,
by de-coupling the creation of the inital environment, to reliably run
userspace processes, from a complex userspace bootstrap logic to provide
a working /dev.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Jan Blunck <jblunck@suse.de>
Tested-By: Harald Hoyer <harald@redhat.com>
Tested-By: Scott James Remnant <scott@ubuntu.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Kconfig     |  25 ++++
 drivers/base/Makefile    |   1 +
 drivers/base/base.h      |   6 +
 drivers/base/core.c      |   3 +
 drivers/base/devtmpfs.c  | 367 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/base/init.c      |   1 +
 include/linux/device.h   |  10 ++
 include/linux/shmem_fs.h |   3 +
 init/do_mounts.c         |   2 +-
 init/main.c              |   2 +
 mm/shmem.c               |   9 +-
 11 files changed, 422 insertions(+), 7 deletions(-)
 create mode 100644 drivers/base/devtmpfs.c

(limited to 'include')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 8f006f96ff53..ee377270beb9 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -8,6 +8,31 @@ config UEVENT_HELPER_PATH
 	  Path to uevent helper program forked by the kernel for
 	  every uevent.
 
+config DEVTMPFS
+	bool "Create a kernel maintained /dev tmpfs (EXPERIMENTAL)"
+	depends on HOTPLUG && SHMEM && TMPFS
+	help
+	  This creates a tmpfs filesystem, and mounts it at bootup
+	  and mounts it at /dev. The kernel driver core creates device
+	  nodes for all registered devices in that filesystem. All device
+	  nodes are owned by root and have the default mode of 0600.
+	  Userspace can add and delete the nodes as needed. This is
+	  intended to simplify bootup, and make it possible to delay
+	  the initial coldplug at bootup done by udev in userspace.
+	  It should also provide a simpler way for rescue systems
+	  to bring up a kernel with dynamic major/minor numbers.
+	  Meaningful symlinks, permissions and device ownership must
+	  still be handled by userspace.
+	  If unsure, say N here.
+
+config DEVTMPFS_MOUNT
+	bool "Automount devtmpfs at /dev"
+	depends on DEVTMPFS
+	help
+	  This will mount devtmpfs at /dev if the kernel mounts the root
+	  filesystem. It will not affect initramfs based mounting.
+	  If unsure, say N here.
+
 config STANDALONE
 	bool "Select only drivers that don't need compile-time external firmware" if EXPERIMENTAL
 	default y
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 1b2640ce74f0..c12c7f2f2a6f 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,6 +4,7 @@ obj-y			:= core.o sys.o bus.o dd.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o
+obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 503d59c57501..2ca7f5b7b824 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -139,3 +139,9 @@ static inline void module_add_driver(struct module *mod,
 				     struct device_driver *drv) { }
 static inline void module_remove_driver(struct device_driver *drv) { }
 #endif
+
+#ifdef CONFIG_DEVTMPFS
+extern int devtmpfs_init(void);
+#else
+static inline int devtmpfs_init(void) { return 0; }
+#endif
diff --git a/drivers/base/core.c b/drivers/base/core.c
index a992985d1fab..390e664ec1c7 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -929,6 +929,8 @@ int device_add(struct device *dev)
 		error = device_create_sys_dev_entry(dev);
 		if (error)
 			goto devtattrError;
+
+		devtmpfs_create_node(dev);
 	}
 
 	error = device_add_class_symlinks(dev);
@@ -1075,6 +1077,7 @@ void device_del(struct device *dev)
 	if (parent)
 		klist_del(&dev->p->knode_parent);
 	if (MAJOR(dev->devt)) {
+		devtmpfs_delete_node(dev);
 		device_remove_sys_dev_entry(dev);
 		device_remove_file(dev, &devt_attr);
 	}
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
new file mode 100644
index 000000000000..fd488ad4263a
--- /dev/null
+++ b/drivers/base/devtmpfs.c
@@ -0,0 +1,367 @@
+/*
+ * devtmpfs - kernel-maintained tmpfs-based /dev
+ *
+ * Copyright (C) 2009, Kay Sievers <kay.sievers@vrfy.org>
+ *
+ * During bootup, before any driver core device is registered,
+ * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
+ * device which requests a device node, will add a node in this
+ * filesystem. The node is named after the the name of the device,
+ * or the susbsytem can provide a custom name. All devices are
+ * owned by root and have a mode of 0600.
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/mount.h>
+#include <linux/device.h>
+#include <linux/genhd.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/cred.h>
+#include <linux/init_task.h>
+
+static struct vfsmount *dev_mnt;
+
+#if defined CONFIG_DEVTMPFS_MOUNT
+static int dev_mount = 1;
+#else
+static int dev_mount;
+#endif
+
+static int __init mount_param(char *str)
+{
+	dev_mount = simple_strtoul(str, NULL, 0);
+	return 1;
+}
+__setup("devtmpfs.mount=", mount_param);
+
+static int dev_get_sb(struct file_system_type *fs_type, int flags,
+		      const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt);
+}
+
+static struct file_system_type dev_fs_type = {
+	.name = "devtmpfs",
+	.get_sb = dev_get_sb,
+	.kill_sb = kill_litter_super,
+};
+
+#ifdef CONFIG_BLOCK
+static inline int is_blockdev(struct device *dev)
+{
+	return dev->class == &block_class;
+}
+#else
+static inline int is_blockdev(struct device *dev) { return 0; }
+#endif
+
+static int dev_mkdir(const char *name, mode_t mode)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      name, LOOKUP_PARENT, &nd);
+	if (err)
+		return err;
+
+	dentry = lookup_create(&nd, 1);
+	if (!IS_ERR(dentry)) {
+		err = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	return err;
+}
+
+static int create_path(const char *nodepath)
+{
+	char *path;
+	struct nameidata nd;
+	int err = 0;
+
+	path = kstrdup(nodepath, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      path, LOOKUP_PARENT, &nd);
+	if (err == 0) {
+		struct dentry *dentry;
+
+		/* create directory right away */
+		dentry = lookup_create(&nd, 1);
+		if (!IS_ERR(dentry)) {
+			err = vfs_mkdir(nd.path.dentry->d_inode,
+					dentry, 0755);
+			dput(dentry);
+		}
+		mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+		path_put(&nd.path);
+	} else if (err == -ENOENT) {
+		char *s;
+
+		/* parent directories do not exist, create them */
+		s = path;
+		while (1) {
+			s = strchr(s, '/');
+			if (!s)
+				break;
+			s[0] = '\0';
+			err = dev_mkdir(path, 0755);
+			if (err && err != -EEXIST)
+				break;
+			s[0] = '/';
+			s++;
+		}
+	}
+
+	kfree(path);
+	return err;
+}
+
+int devtmpfs_create_node(struct device *dev)
+{
+	const char *tmp = NULL;
+	const char *nodename;
+	const struct cred *curr_cred;
+	mode_t mode;
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	if (!dev_mnt)
+		return 0;
+
+	nodename = device_get_nodename(dev, &tmp);
+	if (!nodename)
+		return -ENOMEM;
+
+	if (is_blockdev(dev))
+		mode = S_IFBLK|0600;
+	else
+		mode = S_IFCHR|0600;
+
+	curr_cred = override_creds(&init_cred);
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      nodename, LOOKUP_PARENT, &nd);
+	if (err == -ENOENT) {
+		/* create missing parent directories */
+		create_path(nodename);
+		err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+				      nodename, LOOKUP_PARENT, &nd);
+		if (err)
+			goto out;
+	}
+
+	dentry = lookup_create(&nd, 0);
+	if (!IS_ERR(dentry)) {
+		err = vfs_mknod(nd.path.dentry->d_inode,
+				dentry, mode, dev->devt);
+		/* mark as kernel created inode */
+		if (!err)
+			dentry->d_inode->i_private = &dev_mnt;
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+out:
+	kfree(tmp);
+	revert_creds(curr_cred);
+	return err;
+}
+
+static int dev_rmdir(const char *name)
+{
+	struct nameidata nd;
+	struct dentry *dentry;
+	int err;
+
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      name, LOOKUP_PARENT, &nd);
+	if (err)
+		return err;
+
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	if (!IS_ERR(dentry)) {
+		if (dentry->d_inode)
+			err = vfs_rmdir(nd.path.dentry->d_inode, dentry);
+		else
+			err = -ENOENT;
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	return err;
+}
+
+static int delete_path(const char *nodepath)
+{
+	const char *path;
+	int err = 0;
+
+	path = kstrdup(nodepath, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	while (1) {
+		char *base;
+
+		base = strrchr(path, '/');
+		if (!base)
+			break;
+		base[0] = '\0';
+		err = dev_rmdir(path);
+		if (err)
+			break;
+	}
+
+	kfree(path);
+	return err;
+}
+
+static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *stat)
+{
+	/* did we create it */
+	if (inode->i_private != &dev_mnt)
+		return 0;
+
+	/* does the dev_t match */
+	if (is_blockdev(dev)) {
+		if (!S_ISBLK(stat->mode))
+			return 0;
+	} else {
+		if (!S_ISCHR(stat->mode))
+			return 0;
+	}
+	if (stat->rdev != dev->devt)
+		return 0;
+
+	/* ours */
+	return 1;
+}
+
+int devtmpfs_delete_node(struct device *dev)
+{
+	const char *tmp = NULL;
+	const char *nodename;
+	const struct cred *curr_cred;
+	struct nameidata nd;
+	struct dentry *dentry;
+	struct kstat stat;
+	int deleted = 1;
+	int err;
+
+	if (!dev_mnt)
+		return 0;
+
+	nodename = device_get_nodename(dev, &tmp);
+	if (!nodename)
+		return -ENOMEM;
+
+	curr_cred = override_creds(&init_cred);
+	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
+			      nodename, LOOKUP_PARENT, &nd);
+	if (err)
+		goto out;
+
+	mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
+	dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len);
+	if (!IS_ERR(dentry)) {
+		if (dentry->d_inode) {
+			err = vfs_getattr(nd.path.mnt, dentry, &stat);
+			if (!err && dev_mynode(dev, dentry->d_inode, &stat)) {
+				err = vfs_unlink(nd.path.dentry->d_inode,
+						 dentry);
+				if (!err || err == -ENOENT)
+					deleted = 1;
+			}
+		} else {
+			err = -ENOENT;
+		}
+		dput(dentry);
+	} else {
+		err = PTR_ERR(dentry);
+	}
+	mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+
+	path_put(&nd.path);
+	if (deleted && strchr(nodename, '/'))
+		delete_path(nodename);
+out:
+	kfree(tmp);
+	revert_creds(curr_cred);
+	return err;
+}
+
+/*
+ * If configured, or requested by the commandline, devtmpfs will be
+ * auto-mounted after the kernel mounted the root filesystem.
+ */
+int devtmpfs_mount(const char *mountpoint)
+{
+	struct path path;
+	int err;
+
+	if (!dev_mount)
+		return 0;
+
+	if (!dev_mnt)
+		return 0;
+
+	err = kern_path(mountpoint, LOOKUP_FOLLOW, &path);
+	if (err)
+		return err;
+	err = do_add_mount(dev_mnt, &path, 0, NULL);
+	if (err)
+		printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
+	else
+		printk(KERN_INFO "devtmpfs: mounted\n");
+	path_put(&path);
+	return err;
+}
+
+/*
+ * Create devtmpfs instance, driver-core devices will add their device
+ * nodes here.
+ */
+int __init devtmpfs_init(void)
+{
+	int err;
+	struct vfsmount *mnt;
+
+	err = register_filesystem(&dev_fs_type);
+	if (err) {
+		printk(KERN_ERR "devtmpfs: unable to register devtmpfs "
+		       "type %i\n", err);
+		return err;
+	}
+
+	mnt = kern_mount(&dev_fs_type);
+	if (IS_ERR(mnt)) {
+		err = PTR_ERR(mnt);
+		printk(KERN_ERR "devtmpfs: unable to create devtmpfs %i\n", err);
+		unregister_filesystem(&dev_fs_type);
+		return err;
+	}
+	dev_mnt = mnt;
+
+	printk(KERN_INFO "devtmpfs: initialized\n");
+	return 0;
+}
diff --git a/drivers/base/init.c b/drivers/base/init.c
index 7bd9b6a5b01f..c8a934e79421 100644
--- a/drivers/base/init.c
+++ b/drivers/base/init.c
@@ -20,6 +20,7 @@
 void __init driver_init(void)
 {
 	/* These are the core pieces */
+	devtmpfs_init();
 	devices_init();
 	buses_init();
 	classes_init();
diff --git a/include/linux/device.h b/include/linux/device.h
index 62ff53a67931..847b763e40e9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -552,6 +552,16 @@ extern void put_device(struct device *dev);
 
 extern void wait_for_device_probe(void);
 
+#ifdef CONFIG_DEVTMPFS
+extern int devtmpfs_create_node(struct device *dev);
+extern int devtmpfs_delete_node(struct device *dev);
+extern int devtmpfs_mount(const char *mountpoint);
+#else
+static inline int devtmpfs_create_node(struct device *dev) { return 0; }
+static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
+static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
+#endif
+
 /* drivers/base/power/shutdown.c */
 extern void device_shutdown(void);
 
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6d3f2f449ead..deee7afd8d66 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -38,6 +38,9 @@ static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
 	return container_of(inode, struct shmem_inode_info, vfs_inode);
 }
 
+extern int init_tmpfs(void);
+extern int shmem_fill_super(struct super_block *sb, void *data, int silent);
+
 #ifdef CONFIG_TMPFS_POSIX_ACL
 int shmem_check_acl(struct inode *, int);
 int shmem_acl_init(struct inode *, struct inode *);
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 093f65915501..bb008d064c1a 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -415,7 +415,7 @@ void __init prepare_namespace(void)
 
 	mount_root();
 out:
+	devtmpfs_mount("dev");
 	sys_mount(".", "/", NULL, MS_MOVE, NULL);
 	sys_chroot(".");
 }
-
diff --git a/init/main.c b/init/main.c
index b34fd8e5edef..8e6a7846bd07 100644
--- a/init/main.c
+++ b/init/main.c
@@ -68,6 +68,7 @@
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
 #include <linux/kmemtrace.h>
+#include <linux/shmem_fs.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -809,6 +810,7 @@ static void __init do_basic_setup(void)
 	init_workqueues();
 	cpuset_init_smp();
 	usermodehelper_init();
+	init_tmpfs();
 	driver_init();
 	init_irq_proc();
 	do_ctors();
diff --git a/mm/shmem.c b/mm/shmem.c
index 5a0b3d4055f3..bd20f8bb02aa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2298,8 +2298,7 @@ static void shmem_put_super(struct super_block *sb)
 	sb->s_fs_info = NULL;
 }
 
-static int shmem_fill_super(struct super_block *sb,
-			    void *data, int silent)
+int shmem_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct inode *inode;
 	struct dentry *root;
@@ -2519,7 +2518,7 @@ static struct file_system_type tmpfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-static int __init init_tmpfs(void)
+int __init init_tmpfs(void)
 {
 	int error;
 
@@ -2576,7 +2575,7 @@ static struct file_system_type tmpfs_fs_type = {
 	.kill_sb	= kill_litter_super,
 };
 
-static int __init init_tmpfs(void)
+int __init init_tmpfs(void)
 {
 	BUG_ON(register_filesystem(&tmpfs_fs_type) != 0);
 
@@ -2687,5 +2686,3 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	vma->vm_ops = &shmem_vm_ops;
 	return 0;
 }
-
-module_init(init_tmpfs)
-- 
cgit v1.2.3


From 1d80766554322236aee50d6023693b3210b9cf38 Mon Sep 17 00:00:00 2001
From: "Steven A. Falco" <sfalco@harris.com>
Date: Tue, 16 Jun 2009 12:35:00 -0400
Subject: pcmcia: Use phys_addr_t for physical addresses

Physical addresses are currently represented as int or long types.
However, this does not work for processors like the PPC440EPx, which
is a 32-bit processor with a 36-bit address space.  This patch uses
the phys_addr_t type, which correctly holds a 36-bit address on
this processor.

Signed-off-by: Steven A. Falco <sfalco@harris.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/pcmcia/ss.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 9b4ac9385f5d..9a3b49865173 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -90,14 +90,14 @@ typedef struct pccard_io_map {
 	u_char	map;
 	u_char	flags;
 	u_short	speed;
-	u_int	start, stop;
+	phys_addr_t start, stop;
 } pccard_io_map;
 
 typedef struct pccard_mem_map {
 	u_char		map;
 	u_char		flags;
 	u_short		speed;
-	u_long		static_start;
+	phys_addr_t	static_start;
 	u_int		card_start;
 	struct resource	*res;
 } pccard_mem_map;
-- 
cgit v1.2.3


From 5d351754fcf58d1a604aa7cf95c2805e8a098ad9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Sep 2009 13:32:13 -0400
Subject: SUNRPC: Defer the auth_gss upcall when the RPC call is asynchronous

Otherwise, the upcall is going to be synchronous, which may not be what the
caller wants...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/sunrpc/auth.h |  4 ++--
 net/sunrpc/auth.c           | 20 ++++++++++++--------
 net/sunrpc/auth_generic.c   |  4 ++--
 3 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 3f632182d8eb..996df4dac7d4 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -111,7 +111,7 @@ struct rpc_credops {
 	void			(*crdestroy)(struct rpc_cred *);
 
 	int			(*crmatch)(struct auth_cred *, struct rpc_cred *, int);
-	void			(*crbind)(struct rpc_task *, struct rpc_cred *);
+	void			(*crbind)(struct rpc_task *, struct rpc_cred *, int);
 	__be32 *		(*crmarshal)(struct rpc_task *, __be32 *);
 	int			(*crrefresh)(struct rpc_task *);
 	__be32 *		(*crvalidate)(struct rpc_task *, __be32 *);
@@ -140,7 +140,7 @@ struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *
 void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
 struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
 void			rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
-void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *);
+void			rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *, int);
 void			put_rpccred(struct rpc_cred *);
 void			rpcauth_unbindcred(struct rpc_task *);
 __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 0c431c277af5..54a4e042f104 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,7 +385,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
 void
-rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	task->tk_msg.rpc_cred = get_rpccred(cred);
 	dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
@@ -394,7 +394,7 @@ rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
 EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
 
 static void
-rpcauth_bind_root_cred(struct rpc_task *task)
+rpcauth_bind_root_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred acred = {
@@ -405,7 +405,7 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
-	ret = auth->au_ops->lookup_cred(auth, &acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -413,14 +413,14 @@ rpcauth_bind_root_cred(struct rpc_task *task)
 }
 
 static void
-rpcauth_bind_new_cred(struct rpc_task *task)
+rpcauth_bind_new_cred(struct rpc_task *task, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct rpc_cred *ret;
 
 	dprintk("RPC: %5u looking up %s cred\n",
 		task->tk_pid, auth->au_ops->au_name);
-	ret = rpcauth_lookupcred(auth, 0);
+	ret = rpcauth_lookupcred(auth, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
@@ -430,12 +430,16 @@ rpcauth_bind_new_cred(struct rpc_task *task)
 void
 rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
 {
+	int lookupflags = 0;
+
+	if (flags & RPC_TASK_ASYNC)
+		lookupflags |= RPCAUTH_LOOKUP_NEW;
 	if (cred != NULL)
-		cred->cr_ops->crbind(task, cred);
+		cred->cr_ops->crbind(task, cred, lookupflags);
 	else if (flags & RPC_TASK_ROOTCREDS)
-		rpcauth_bind_root_cred(task);
+		rpcauth_bind_root_cred(task, lookupflags);
 	else
-		rpcauth_bind_new_cred(task);
+		rpcauth_bind_new_cred(task, lookupflags);
 }
 
 void
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 4028502f0528..bf88bf8e9365 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -55,13 +55,13 @@ struct rpc_cred *rpc_lookup_machine_cred(void)
 EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
 
 static void
-generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred, int lookupflags)
 {
 	struct rpc_auth *auth = task->tk_client->cl_auth;
 	struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
 	struct rpc_cred *ret;
 
-	ret = auth->au_ops->lookup_cred(auth, acred, 0);
+	ret = auth->au_ops->lookup_cred(auth, acred, lookupflags);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
-- 
cgit v1.2.3


From 29ab23cc5d351658d01a4327d55e9106a73fd04f Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 15:56:50 -0400
Subject: nfsd4: allow nfs4 state startup to fail

The failure here is pretty unlikely, but we should handle it anyway.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c       | 17 ++++++++++++-----
 fs/nfsd/nfssvc.c          |  4 +++-
 include/linux/nfsd/nfsd.h |  4 ++--
 3 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 46e9ac526872..11db40cb2f2b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4004,7 +4004,7 @@ set_max_delegations(void)
 
 /* initialization to perform when the nfsd service is started: */
 
-static void
+static int
 __nfs4_state_start(void)
 {
 	unsigned long grace_time;
@@ -4016,19 +4016,26 @@ __nfs4_state_start(void)
 	printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
 	       grace_time/HZ);
 	laundry_wq = create_singlethread_workqueue("nfsd4");
+	if (laundry_wq == NULL)
+		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
+	return 0;
 }
 
-void
+int
 nfs4_state_start(void)
 {
+	int ret;
+
 	if (nfs4_init)
-		return;
+		return 0;
 	nfsd4_load_reboot_recovery_data();
-	__nfs4_state_start();
+	ret = __nfs4_state_start();
+	if (ret)
+		return ret;
 	nfs4_init = 1;
-	return;
+	return 0;
 }
 
 time_t
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 4472449c0937..fcc001088261 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -411,7 +411,9 @@ nfsd_svc(unsigned short port, int nrservs)
 	error =	nfsd_racache_init(2*nrservs);
 	if (error<0)
 		goto out;
-	nfs4_state_start();
+	error = nfs4_state_start();
+	if (error)
+		goto out;
 
 	nfsd_reset_versions();
 
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2812ed52669d..24fdf89cea83 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -166,7 +166,7 @@ extern int nfsd_max_blksize;
 extern unsigned int max_delegations;
 int nfs4_state_init(void);
 void nfsd4_free_slabs(void);
-void nfs4_state_start(void);
+int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 time_t nfs4_lease_time(void);
 void nfs4_reset_lease(time_t leasetime);
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline void nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v1.2.3


From 80fc015bdfe1f5b870c1e1ee02d78e709523fee7 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Sep 2009 18:07:35 -0400
Subject: nfsd4: use common rpc_cred for all callbacks

Callbacks are always made using the machine's identity, so we can use a
single auth_generic credential shared among callbacks to all clients and
let the rpc code take care of the rest.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 33 ++++++++++-----------------------
 fs/nfsd/nfs4state.c        |  6 +-----
 include/linux/nfsd/state.h |  2 +-
 3 files changed, 12 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 4abb88264c72..128519769ea8 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -432,42 +432,29 @@ static const struct rpc_call_ops nfsd4_cb_probe_ops = {
 	.rpc_call_done = nfsd4_cb_probe_done,
 };
 
-static struct rpc_cred *lookup_cb_cred(struct nfs4_cb_conn *cb)
+static struct rpc_cred *callback_cred;
+
+int set_callback_cred(void)
 {
-	struct auth_cred acred = {
-		.machine_cred = 1
-	};
-	struct rpc_auth *auth = cb->cb_client->cl_auth;
-
-	/*
-	 * Note in the gss case this doesn't actually have to wait for a
-	 * gss upcall (or any calls to the client); this just creates a
-	 * non-uptodate cred which the rpc state machine will fill in with
-	 * a refresh_upcall later.
-	 */
-	return auth->au_ops->lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
+	callback_cred = rpc_lookup_machine_cred();
+	if (!callback_cred)
+		return -ENOMEM;
+	return 0;
 }
 
+
 void do_probe_callback(struct nfs4_client *clp)
 {
 	struct nfs4_cb_conn *cb = &clp->cl_cb_conn;
 	struct rpc_message msg = {
 		.rpc_proc       = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
 		.rpc_argp       = clp,
+		.rpc_cred	= callback_cred
 	};
-	struct rpc_cred *cred;
 	int status;
 
-	cred = lookup_cb_cred(cb);
-	if (IS_ERR(cred)) {
-		status = PTR_ERR(cred);
-		goto out;
-	}
-	cb->cb_cred = cred;
-	msg.rpc_cred = cb->cb_cred;
 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_SOFT,
 				&nfsd4_cb_probe_ops, (void *)clp);
-out:
 	if (status) {
 		warn_no_callback_path(clp, status);
 		put_nfs4_client(clp);
@@ -550,7 +537,7 @@ nfsd4_cb_recall(struct nfs4_delegation *dp)
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
 		.rpc_argp = dp,
-		.rpc_cred = clp->cl_cb_conn.cb_cred
+		.rpc_cred = callback_cred
 	};
 	int status;
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 11db40cb2f2b..0445192d660d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -696,10 +696,6 @@ shutdown_callback_client(struct nfs4_client *clp)
 		clp->cl_cb_conn.cb_client = NULL;
 		rpc_shutdown_client(clnt);
 	}
-	if (clp->cl_cb_conn.cb_cred) {
-		put_rpccred(clp->cl_cb_conn.cb_cred);
-		clp->cl_cb_conn.cb_cred = NULL;
-	}
 }
 
 static inline void
@@ -4020,7 +4016,7 @@ __nfs4_state_start(void)
 		return -ENOMEM;
 	queue_delayed_work(laundry_wq, &laundromat_work, grace_time);
 	set_max_delegations();
-	return 0;
+	return set_callback_cred();
 }
 
 int
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 70ef5f4abbbc..9bf3aa8c5aea 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -89,7 +89,6 @@ struct nfs4_cb_conn {
 	/* RPC client info */
 	atomic_t		cb_set;     /* successful CB_NULL call */
 	struct rpc_clnt *       cb_client;
-	struct rpc_cred	*	cb_cred;
 };
 
 /* Maximum number of slots per session. 160 is useful for long haul TCP */
@@ -362,6 +361,7 @@ extern int nfs4_in_grace(void);
 extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
 extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-- 
cgit v1.2.3


From 38524ab38f2752beee262a97403d871665838172 Mon Sep 17 00:00:00 2001
From: Andy Adamson <andros@netapp.com>
Date: Thu, 10 Sep 2009 12:25:59 +0300
Subject: nfsd41: Backchannel: callback infrastructure

Keep the xprt used for create_session in cl_cb_xprt.
Mark cl_callback.cb_minorversion = 1 and remember
the client provided cl_callback.cb_prog rpc program number.
Use it to probe the callback path.

Use the client's network address to initialize as the
callback's address as expected by the xprt creation
routines.

Define xdr sizes and code nfs4_cb_compound header to be able
to send a null callback rpc.

Signed-off-by: Andy Adamson<andros@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[get callback minorversion from fore channel's]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: change bc_sock to bc_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[pulled definition for cl_cb_xprt]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
[nfsd41: set up backchannel's cb_addr]
[moved rpc_create_args init to "nfsd: modify nfsd4.1 backchannel to use new xprt class"]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 17 +++++++++++++++--
 fs/nfsd/nfs4state.c        | 14 ++++++++++++++
 include/linux/nfsd/state.h |  3 +++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 128519769ea8..db4188ce9b00 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -43,6 +43,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svcsock.h>
 #include <linux/nfsd/nfsd.h>
 #include <linux/nfsd/state.h>
 #include <linux/sunrpc/sched.h>
@@ -52,16 +53,19 @@
 
 #define NFSPROC4_CB_NULL 0
 #define NFSPROC4_CB_COMPOUND 1
+#define NFS4_STATEID_SIZE 16
 
 /* Index of predefined Linux callback client operations */
 
 enum {
 	NFSPROC4_CLNT_CB_NULL = 0,
 	NFSPROC4_CLNT_CB_RECALL,
+	NFSPROC4_CLNT_CB_SEQUENCE,
 };
 
 enum nfs_cb_opnum4 {
 	OP_CB_RECALL            = 4,
+	OP_CB_SEQUENCE          = 11,
 };
 
 #define NFS4_MAXTAGLEN		20
@@ -70,15 +74,22 @@ enum nfs_cb_opnum4 {
 #define NFS4_dec_cb_null_sz		0
 #define cb_compound_enc_hdr_sz		4
 #define cb_compound_dec_hdr_sz		(3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz			(NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz		(sessionid_sz + 4 +             \
+					1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz		(op_dec_sz + sessionid_sz + 4)
+
 #define op_enc_sz			1
 #define op_dec_sz			2
 #define enc_nfs4_fh_sz			(1 + (NFS4_FHSIZE >> 2))
 #define enc_stateid_sz			(NFS4_STATEID_SIZE >> 2)
 #define NFS4_enc_cb_recall_sz		(cb_compound_enc_hdr_sz +       \
+					cb_sequence_enc_sz +            \
 					1 + enc_stateid_sz +            \
 					enc_nfs4_fh_sz)
 
 #define NFS4_dec_cb_recall_sz		(cb_compound_dec_hdr_sz  +      \
+					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
 /*
@@ -137,11 +148,13 @@ xdr_error:                                      \
 } while (0)
 
 struct nfs4_cb_compound_hdr {
-	int		status;
-	u32		ident;
+	/* args */
+	u32		ident;	/* minorversion 0 only */
 	u32		nops;
 	__be32		*nops_p;
 	u32		minorversion;
+	/* res */
+	int		status;
 	u32		taglen;
 	char		*tag;
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0445192d660d..d8196b453f61 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -702,6 +702,8 @@ static inline void
 free_client(struct nfs4_client *clp)
 {
 	shutdown_callback_client(clp);
+	if (clp->cl_cb_xprt)
+		svc_xprt_put(clp->cl_cb_xprt);
 	if (clp->cl_cred.cr_group_info)
 		put_group_info(clp->cl_cred.cr_group_info);
 	kfree(clp->cl_principal);
@@ -1317,6 +1319,18 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		cr_ses->flags &= ~SESSION4_PERSIST;
 		cr_ses->flags &= ~SESSION4_RDMA;
 
+		if (cr_ses->flags & SESSION4_BACK_CHAN) {
+			unconf->cl_cb_xprt = rqstp->rq_xprt;
+			svc_xprt_get(unconf->cl_cb_xprt);
+			rpc_copy_addr(
+				(struct sockaddr *)&unconf->cl_cb_conn.cb_addr,
+				sa);
+			unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
+			unconf->cl_cb_conn.cb_minorversion =
+				cstate->minorversion;
+			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			nfsd4_probe_callback(unconf);
+		}
 		conf = unconf;
 	} else {
 		status = nfserr_stale_clientid;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9bf3aa8c5aea..c916032570c4 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -211,6 +211,9 @@ struct nfs4_client {
 	struct nfsd4_clid_slot	cl_cs_slot;	/* create_session slot */
 	u32			cl_exchange_flags;
 	struct nfs4_sessionid	cl_sessionid;
+
+	/* for nfs41 callbacks */
+	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v1.2.3


From 132f97715c098393fb8de3c26b07b9fdbd2334f1 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:12 +0300
Subject: nfsd41: Backchannel: Add sequence arguments to callback RPC arguments

Follow the model we use in the client. Make the sequence arguments
part of the regular RPC arguments.  None of the callbacks that are
soon to be implemented expect results that need to be passed back
to the caller, so we don't define a separate RPC results structure.
For session validation, the cb_sequence decoding will use a pointer
to the sequence arguments that are part of the RPC argument.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define struct nfsd4_cb_sequence here]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 5 +++++
 include/linux/nfsd/state.h | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index db4188ce9b00..f31175717c1a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -92,6 +92,11 @@ enum nfs_cb_opnum4 {
 					cb_sequence_dec_sz +            \
 					op_dec_sz)
 
+struct nfs4_rpc_args {
+	void				*args_op;
+	struct nfsd4_cb_sequence	args_seq;
+};
+
 /*
 * Generic encode routines from fs/nfs/nfs4xdr.c
 */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index c916032570c4..0e5b5aecde03 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -60,6 +60,12 @@ typedef struct {
 #define si_stateownerid   si_opaque.so_stateownerid
 #define si_fileid         si_opaque.so_fileid
 
+struct nfsd4_cb_sequence {
+	/* args/res */
+	u32			cbs_minorversion;
+	struct nfs4_client	*cbs_clp;
+};
+
 struct nfs4_delegation {
 	struct list_head	dl_perfile;
 	struct list_head	dl_perclnt;
-- 
cgit v1.2.3


From 199ff35e1c8724871e157c2e48556c2794946e82 Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:25 +0300
Subject: nfsd41: Backchannel: Server backchannel RPC wait queue

RPC callback requests will wait on this wait queue if the backchannel
is out of slots.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4state.c        | 2 ++
 include/linux/nfsd/state.h | 4 ++++
 2 files changed, 6 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d8196b453f61..f4cebd9016bc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -775,6 +775,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
 	INIT_LIST_HEAD(&clp->cl_delegations);
 	INIT_LIST_HEAD(&clp->cl_sessions);
 	INIT_LIST_HEAD(&clp->cl_lru);
+	clear_bit(0, &clp->cl_cb_slot_busy);
+	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	return clp;
 }
 
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 0e5b5aecde03..9cc40a137c34 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -219,7 +219,11 @@ struct nfs4_client {
 	struct nfs4_sessionid	cl_sessionid;
 
 	/* for nfs41 callbacks */
+	/* We currently support a single back channel with a single slot */
+	unsigned long		cl_cb_slot_busy;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
+	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
+						/* wait here for slots */
 };
 
 /* struct nfs4_client_reset
-- 
cgit v1.2.3


From 2a1d1b593803d7c18a369bf148f3b48c5a3260fc Mon Sep 17 00:00:00 2001
From: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
Date: Thu, 10 Sep 2009 12:26:38 +0300
Subject: nfsd41: Backchannel: Setup sequence information

Follows the model used by the NFS client.  Setup the RPC prepare and done
function pointers so that we can populate the sequence information if
minorversion == 1.  rpc_run_task() is then invoked directly just like
existing NFS client operations do.

nfsd4_cb_prepare() determines if the sequence information needs to be setup.
If the slot is in use, it adds itself to the wait queue.

nfsd4_cb_done() wakes anyone sleeping on the callback channel wait queue
after our RPC reply has been received.  It also sets the task message
result pointer to NULL to clearly indicate we're done using it.

Signed-off-by: Ricardo Labiaga <Ricardo.Labiaga@netapp.com>
[define and initialize cl_cb_seq_nr here]
[pulled out unused defintion of nfsd4_cb_done]
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/nfsd/nfs4callback.c     | 62 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4state.c        |  1 +
 include/linux/nfsd/state.h |  1 +
 3 files changed, 64 insertions(+)

(limited to 'include')

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f31175717c1a..25a09069e458 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -501,6 +501,67 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	do_probe_callback(clp);
 }
 
+/*
+ * There's currently a single callback channel slot.
+ * If the slot is available, then mark it busy.  Otherwise, set the
+ * thread for sleeping on the callback RPC wait queue.
+ */
+static int nfsd41_cb_setup_sequence(struct nfs4_client *clp,
+		struct rpc_task *task)
+{
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 *ptr = (u32 *)clp->cl_sessionid.data;
+	int status = 0;
+
+	dprintk("%s: %u:%u:%u:%u\n", __func__,
+		ptr[0], ptr[1], ptr[2], ptr[3]);
+
+	if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+		rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
+		dprintk("%s slot is busy\n", __func__);
+		status = -EAGAIN;
+		goto out;
+	}
+
+	/*
+	 * We'll need the clp during XDR encoding and decoding,
+	 * and the sequence during decoding to verify the reply
+	 */
+	args->args_seq.cbs_clp = clp;
+	task->tk_msg.rpc_resp = &args->args_seq;
+
+out:
+	dprintk("%s status=%d\n", __func__, status);
+	return status;
+}
+
+/*
+ * TODO: cb_sequence should support referring call lists, cachethis, multiple
+ * slots, and mark callback channel down on communication errors.
+ */
+static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_delegation *dp = calldata;
+	struct nfs4_client *clp = dp->dl_client;
+	struct nfs4_rpc_args *args = task->tk_msg.rpc_argp;
+	u32 minorversion = clp->cl_cb_conn.cb_minorversion;
+	int status = 0;
+
+	args->args_seq.cbs_minorversion = minorversion;
+	if (minorversion) {
+		status = nfsd41_cb_setup_sequence(clp, task);
+		if (status) {
+			if (status != -EAGAIN) {
+				/* terminate rpc task */
+				task->tk_status = status;
+				task->tk_action = NULL;
+			}
+			return;
+		}
+	}
+	rpc_call_start(task);
+}
+
 static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegation *dp = calldata;
@@ -540,6 +601,7 @@ static void nfsd4_cb_recall_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfsd4_cb_recall_ops = {
+	.rpc_call_prepare = nfsd4_cb_prepare,
 	.rpc_call_done = nfsd4_cb_recall_done,
 	.rpc_release = nfsd4_cb_recall_release,
 };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f4cebd9016bc..76b7bcbb3f20 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1331,6 +1331,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 			unconf->cl_cb_conn.cb_minorversion =
 				cstate->minorversion;
 			unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog;
+			unconf->cl_cb_seq_nr = 1;
 			nfsd4_probe_callback(unconf);
 		}
 		conf = unconf;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 9cc40a137c34..b38d11324189 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -221,6 +221,7 @@ struct nfs4_client {
 	/* for nfs41 callbacks */
 	/* We currently support a single back channel with a single slot */
 	unsigned long		cl_cb_slot_busy;
+	u32			cl_cb_seq_nr;
 	struct svc_xprt		*cl_cb_xprt;	/* 4.1 callback transport */
 	struct rpc_wait_queue	cl_cb_waitq;	/* backchannel callers may */
 						/* wait here for slots */
-- 
cgit v1.2.3


From fe832a3a48737b24f95fab202b1c67fb588b071d Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 15 Sep 2009 23:51:31 -0400
Subject: tracing: remove notrace from __kprobes annotation

When ftrace had issues with NMIs, it was needed to annotate all
the areas that kprobes had issues with notrace. Now that ftrace is
NMI safe, the functions that limit ftrace from tracing are just a
small few.

Kprobes is too big of a set for ftrace not to trace. Remove the
coupling.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/kprobes.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index bcd9c07848be..3a46b7b7abb2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -48,13 +48,13 @@
 #define KPROBE_HIT_SSDONE	0x00000008
 
 /* Attach to insert probes on any functions which should be ignored*/
-#define __kprobes	__attribute__((__section__(".kprobes.text"))) notrace
+#define __kprobes	__attribute__((__section__(".kprobes.text")))
 #else /* CONFIG_KPROBES */
 typedef int kprobe_opcode_t;
 struct arch_specific_insn {
 	int dummy;
 };
-#define __kprobes	notrace
+#define __kprobes
 #endif /* CONFIG_KPROBES */
 
 struct kprobe;
-- 
cgit v1.2.3


From 59abf02644c45f1591e1374ee7bb45dc757fcb88 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Sep 2009 08:28:30 +0200
Subject: sched: Add SD_PREFER_LOCAL

And turn it on for NUMA and MC domains. This improves
locality in balancing decisions by keeping up to
capacity amount of tasks local before looking for idle
CPUs. (and twice the capacity if SD_POWERSAVINGS_BALANCE
is set.)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    | 2 +-
 include/linux/topology.h | 2 ++
 kernel/sched_fair.c      | 7 +++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ee1f88993097..b4a39bb2b4a4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -805,7 +805,7 @@ enum cpu_idle_type {
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-
+#define SD_PREFER_LOCAL		0x0040  /* Prefer to keep tasks local to this domain */
 #define SD_SHARE_CPUPOWER	0x0080	/* Domain members share cpu power */
 #define SD_POWERSAVINGS_BALANCE	0x0100	/* Balance for power savings */
 #define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 936ab2b37683..a6614b0242a9 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -129,6 +129,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
@@ -161,6 +162,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_FORK			\
 				| 1*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
+				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 280892e9d85e..a37f311f436e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1360,7 +1360,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 		 * If power savings logic is enabled for a domain, see if we
 		 * are not overloaded, if so, don't balance wider.
 		 */
-		if (tmp->flags & SD_POWERSAVINGS_BALANCE) {
+		if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) {
 			unsigned long power = 0;
 			unsigned long nr_running = 0;
 			unsigned long capacity;
@@ -1373,7 +1373,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int flags)
 
 			capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE);
 
-			if (nr_running/2 < capacity)
+			if (tmp->flags & SD_POWERSAVINGS_BALANCE)
+				nr_running /= 2;
+
+			if (nr_running < capacity)
 				break;
 		}
 
-- 
cgit v1.2.3


From 4e36a95e591e9c58dd10bb4103c00993917c27fd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 Sep 2009 00:01:13 -0700
Subject: RxRPC: Use uX/sX rather than uintX_t/intX_t types

Use uX rather than uintX_t types for consistency.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/keys/rxrpc-type.h | 20 ++++++++++----------
 net/rxrpc/ar-ack.c        |  6 +++---
 net/rxrpc/ar-internal.h   | 16 ++++++++--------
 net/rxrpc/ar-key.c        |  2 +-
 4 files changed, 22 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 5eb23571b425..5cb86c307f5d 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -53,8 +53,8 @@ struct krb5_tagged_data {
 	 * - KRB5_AUTHDATA_* for auth data
 	 * - 
 	 */
-	int32_t		tag;
-	uint32_t	data_len;
+	s32		tag;
+	u32		data_len;
 	u8		*data;
 };
 
@@ -62,17 +62,17 @@ struct krb5_tagged_data {
  * RxRPC key for Kerberos V (type-5 security)
  */
 struct rxk5_key {
-	uint64_t		authtime;	/* time at which auth token generated */
-	uint64_t		starttime;	/* time at which auth token starts */
-	uint64_t		endtime;	/* time at which auth token expired */
-	uint64_t		renew_till;	/* time to which auth token can be renewed */
-	int32_t			is_skey;	/* T if ticket is encrypted in another ticket's
+	u64			authtime;	/* time at which auth token generated */
+	u64			starttime;	/* time at which auth token starts */
+	u64			endtime;	/* time at which auth token expired */
+	u64			renew_till;	/* time to which auth token can be renewed */
+	s32			is_skey;	/* T if ticket is encrypted in another ticket's
 						 * skey */
-	int32_t			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
+	s32			flags;		/* mask of TKT_FLG_* bits (krb5/krb5.h) */
 	struct krb5_principal	client;		/* client principal name */
 	struct krb5_principal	server;		/* server principal name */
-	uint16_t		ticket_len;	/* length of ticket */
-	uint16_t		ticket2_len;	/* length of second ticket */
+	u16			ticket_len;	/* length of ticket */
+	u16			ticket2_len;	/* length of second ticket */
 	u8			n_authdata;	/* number of authorisation data elements */
 	u8			n_addresses;	/* number of addresses */
 	struct krb5_tagged_data	session;	/* session data; tag is enctype */
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index c9f1f0a3a2ff..b4a220977031 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -40,7 +40,7 @@ static const s8 rxrpc_ack_priority[] = {
 /*
  * propose an ACK be sent
  */
-void __rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 			 __be32 serial, bool immediate)
 {
 	unsigned long expiry;
@@ -120,7 +120,7 @@ cancel_timer:
 /*
  * propose an ACK be sent, locking the call structure
  */
-void rxrpc_propose_ACK(struct rxrpc_call *call, uint8_t ack_reason,
+void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 		       __be32 serial, bool immediate)
 {
 	s8 prior = rxrpc_ack_priority[ack_reason];
@@ -520,7 +520,7 @@ static void rxrpc_zap_tx_window(struct rxrpc_call *call)
 	struct rxrpc_skb_priv *sp;
 	struct sk_buff *skb;
 	unsigned long _skb, *acks_window;
-	uint8_t winsz = call->acks_winsz;
+	u8 winsz = call->acks_winsz;
 	int tail;
 
 	acks_window = call->acks_window;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 46c6d8888493..7043b294bb67 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -229,7 +229,7 @@ struct rxrpc_conn_bundle {
 	int			debug_id;	/* debug ID for printks */
 	unsigned short		num_conns;	/* number of connections in this bundle */
 	__be16			service_id;	/* service ID */
-	uint8_t			security_ix;	/* security type */
+	u8			security_ix;	/* security type */
 };
 
 /*
@@ -370,10 +370,10 @@ struct rxrpc_call {
 	u8			channel;	/* connection channel occupied by this call */
 
 	/* transmission-phase ACK management */
-	uint8_t			acks_head;	/* offset into window of first entry */
-	uint8_t			acks_tail;	/* offset into window of last entry */
-	uint8_t			acks_winsz;	/* size of un-ACK'd window */
-	uint8_t			acks_unacked;	/* lowest unacked packet in last ACK received */
+	u8			acks_head;	/* offset into window of first entry */
+	u8			acks_tail;	/* offset into window of last entry */
+	u8			acks_winsz;	/* size of un-ACK'd window */
+	u8			acks_unacked;	/* lowest unacked packet in last ACK received */
 	int			acks_latest;	/* serial number of latest ACK received */
 	rxrpc_seq_t		acks_hard;	/* highest definitively ACK'd msg seq */
 	unsigned long		*acks_window;	/* sent packet window
@@ -388,7 +388,7 @@ struct rxrpc_call {
 	rxrpc_seq_t		rx_first_oos;	/* first packet in rx_oos_queue (or 0) */
 	rxrpc_seq_t		ackr_win_top;	/* top of ACK window (rx_data_eaten is bottom) */
 	rxrpc_seq_net_t		ackr_prev_seq;	/* previous sequence number received */
-	uint8_t			ackr_reason;	/* reason to ACK */
+	u8			ackr_reason;	/* reason to ACK */
 	__be32			ackr_serial;	/* serial of packet being ACK'd */
 	atomic_t		ackr_not_idle;	/* number of packets in Rx queue */
 
@@ -434,8 +434,8 @@ extern int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * ar-ack.c
  */
-extern void __rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
-extern void rxrpc_propose_ACK(struct rxrpc_call *, uint8_t, __be32, bool);
+extern void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
+extern void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
 extern void rxrpc_process_call(struct work_struct *);
 
 /*
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 44836f6c9643..74697b200496 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -360,7 +360,7 @@ static int rxrpc_krb5_decode_tagged_array(struct krb5_tagged_data **_td,
 /*
  * extract a krb5 ticket
  */
-static int rxrpc_krb5_decode_ticket(u8 **_ticket, uint16_t *_tktlen,
+static int rxrpc_krb5_decode_ticket(u8 **_ticket, u16 *_tktlen,
 				    const __be32 **_xdr, unsigned *_toklen)
 {
 	const __be32 *xdr = *_xdr;
-- 
cgit v1.2.3


From d466f2fcb32cd97fd586bfa33f5dba3ac78aadb0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:03 +0200
Subject: HWPOISON: Add page flag for poisoned pages

Hardware poisoned pages need special handling in the VM and shouldn't be
touched again. This requires a new page flag. Define it here.

The page flags wars seem to be over, so it shouldn't be a problem
to get a new one.

v2: Add TestSetHWPoison (suggested by Johannes Weiner)

Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/page-flags.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acfc5f87..9bc5fd9fdbf6 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -51,6 +51,9 @@
  * PG_buddy is set to indicate that the page is free and in the buddy system
  * (see mm/page_alloc.c).
  *
+ * PG_hwpoison indicates that a page got corrupted in hardware and contains
+ * data with incorrect ECC bits that triggered a machine check. Accessing is
+ * not safe since it may cause another machine check. Don't touch!
  */
 
 /*
@@ -101,6 +104,9 @@ enum pageflags {
 #endif
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
 	PG_uncached,		/* Page has been mapped as uncached */
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	PG_hwpoison,		/* hardware poisoned page. Don't touch */
 #endif
 	__NR_PAGEFLAGS,
 
@@ -263,6 +269,15 @@ PAGEFLAG(Uncached, uncached)
 PAGEFLAG_FALSE(Uncached)
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+PAGEFLAG(HWPoison, hwpoison)
+TESTSETFLAG(HWPoison, hwpoison)
+#define __PG_HWPOISON (1UL << PG_hwpoison)
+#else
+PAGEFLAG_FALSE(HWPoison)
+#define __PG_HWPOISON 0
+#endif
+
 static inline int PageUptodate(struct page *page)
 {
 	int ret = test_bit(PG_uptodate, &(page)->flags);
@@ -387,7 +402,7 @@ static inline void __ClearPageTail(struct page *page)
 	 1 << PG_private | 1 << PG_private_2 | \
 	 1 << PG_buddy	 | 1 << PG_writeback | 1 << PG_reserved | \
 	 1 << PG_slab	 | 1 << PG_swapcache | 1 << PG_active | \
-	 1 << PG_unevictable | __PG_MLOCKED)
+	 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
-- 
cgit v1.2.3


From 10be22dfe1e6ad978269dc275147e0ed049187bb Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:04 +0200
Subject: HWPOISON: Export some rmap vma locking to outside world

Needed for later patch that walks rmap entries on its own.

This used to be very frowned upon, but memory-failure.c does
some rather specialized rmap walking and rmap has been stable
for quite some time, so I think it's ok now to export it.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 6 ++++++
 mm/rmap.c            | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0dbf23..8dff2ffab82c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -112,6 +112,12 @@ int page_mkclean(struct page *);
  */
 int try_to_munlock(struct page *);
 
+/*
+ * Called by memory-failure.c to kill processes.
+ */
+struct anon_vma *page_lock_anon_vma(struct page *page);
+void page_unlock_anon_vma(struct anon_vma *anon_vma);
+
 #else	/* !CONFIG_MMU */
 
 #define anon_vma_init()		do {} while (0)
diff --git a/mm/rmap.c b/mm/rmap.c
index 0895b5c7cbff..5a35c030e779 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -191,7 +191,7 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is
  * tricky: page_lock_anon_vma rely on RCU to guard against the races.
  */
-static struct anon_vma *page_lock_anon_vma(struct page *page)
+struct anon_vma *page_lock_anon_vma(struct page *page)
 {
 	struct anon_vma *anon_vma;
 	unsigned long anon_mapping;
@@ -211,7 +211,7 @@ out:
 	return NULL;
 }
 
-static void page_unlock_anon_vma(struct anon_vma *anon_vma)
+void page_unlock_anon_vma(struct anon_vma *anon_vma)
 {
 	spin_unlock(&anon_vma->lock);
 	rcu_read_unlock();
-- 
cgit v1.2.3


From a7420aa54dbf699a5a05feba3c859b6baaa3938c Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:05 +0200
Subject: HWPOISON: Add support for poison swap entries v2

Memory migration uses special swap entry types to trigger special actions on
page faults. Extend this mechanism to also support poisoned swap entries, to
trigger poison handling on page faults. This allows follow-on patches to
prevent processes from faulting in poisoned pages again.

v2: Fix overflow in MAX_SWAPFILES (Fengguang Wu)
v3: Better overflow fix (Hidehiro Kawai)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/swap.h    | 34 ++++++++++++++++++++++++++++------
 include/linux/swapops.h | 38 ++++++++++++++++++++++++++++++++++++++
 mm/swapfile.c           |  4 ++--
 3 files changed, 68 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334f3ff2..f077e454c659 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void)
  * the type/offset into the pte as 5/27 as well.
  */
 #define MAX_SWAPFILES_SHIFT	5
-#ifndef CONFIG_MIGRATION
-#define MAX_SWAPFILES		(1 << MAX_SWAPFILES_SHIFT)
+
+/*
+ * Use some of the swap files numbers for other purposes. This
+ * is a convenient way to hook into the VM to trigger special
+ * actions on faults.
+ */
+
+/*
+ * NUMA node memory migration support
+ */
+#ifdef CONFIG_MIGRATION
+#define SWP_MIGRATION_NUM 2
+#define SWP_MIGRATION_READ	(MAX_SWAPFILES + SWP_HWPOISON_NUM)
+#define SWP_MIGRATION_WRITE	(MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
 #else
-/* Use last two entries for page migration swap entries */
-#define MAX_SWAPFILES		((1 << MAX_SWAPFILES_SHIFT)-2)
-#define SWP_MIGRATION_READ	MAX_SWAPFILES
-#define SWP_MIGRATION_WRITE	(MAX_SWAPFILES + 1)
+#define SWP_MIGRATION_NUM 0
 #endif
 
+/*
+ * Handling of hardware poisoned pages with memory corruption.
+ */
+#ifdef CONFIG_MEMORY_FAILURE
+#define SWP_HWPOISON_NUM 1
+#define SWP_HWPOISON		MAX_SWAPFILES
+#else
+#define SWP_HWPOISON_NUM 0
+#endif
+
+#define MAX_SWAPFILES \
+	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+
 /*
  * Magic header for a swap area. The first part of the union is
  * what the swap magic looks like for the old (limited to 128MB)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6ec39ab27b4b..cd42e30b7c6e 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
 
 #endif
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+	BUG_ON(!PageLocked(page));
+	return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_HWPOISON;
+}
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+	return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+	return 0;
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
+static inline int non_swap_entry(swp_entry_t entry)
+{
+	return swp_type(entry) >= MAX_SWAPFILES;
+}
+#else
+static inline int non_swap_entry(swp_entry_t entry)
+{
+	return 0;
+}
+#endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102e8749..ce5dda6d604b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
 	struct swap_info_struct *p;
 	struct page *page = NULL;
 
-	if (is_migration_entry(entry))
+	if (non_swap_entry(entry))
 		return 1;
 
 	p = swap_info_get(entry);
@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
 	int count;
 	bool has_cache;
 
-	if (is_migration_entry(entry))
+	if (non_swap_entry(entry))
 		return -EINVAL;
 
 	type = swp_type(entry);
-- 
cgit v1.2.3


From ad5fa913991e9e0f122b021e882b0d50051fbdbc Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:06 +0200
Subject: HWPOISON: Add new SIGBUS error codes for hardware poison signals

Add new SIGBUS codes for reporting machine checks as signals. When
the hardware detects an uncorrected ECC error it can trigger these
signals.

This is needed for telling KVM's qemu about machine checks that happen to
guests, so that it can inject them, but might be also useful for other programs.
I find it useful in my test programs.

This patch merely defines the new types.

- Define two new si_codes for SIGBUS.  BUS_MCEERR_AO and BUS_MCEERR_AR
* BUS_MCEERR_AO is for "Action Optional" machine checks, which means that some
corruption has been detected in the background, but nothing has been consumed
so far. The program can ignore those if it wants (but most programs would
already get killed)
* BUS_MCEERR_AR is for "Action Required" machine checks. This happens
when corrupted data is consumed or the application ran into an area
which has been known to be corrupted earlier. These require immediate
action and cannot just returned to. Most programs would kill themselves.
- They report the address of the corruption in the user address space
in si_addr.
- Define a new si_addr_lsb field that reports the extent of the corruption
to user space. That's currently always a (small) page. The user application
cannot tell where in this page the corruption happened.

AK: I plan to write a man page update before anyone asks.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/asm-generic/siginfo.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index c840719a8c59..942d30b5aab1 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -82,6 +82,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
 			int _trapno;	/* TRAP # which caused the signal */
 #endif
+			short _addr_lsb; /* LSB of the reported address */
 		} _sigfault;
 
 		/* SIGPOLL */
@@ -112,6 +113,7 @@ typedef struct siginfo {
 #ifdef __ARCH_SI_TRAPNO
 #define si_trapno	_sifields._sigfault._trapno
 #endif
+#define si_addr_lsb	_sifields._sigfault._addr_lsb
 #define si_band		_sifields._sigpoll._band
 #define si_fd		_sifields._sigpoll._fd
 
@@ -192,7 +194,11 @@ typedef struct siginfo {
 #define BUS_ADRALN	(__SI_FAULT|1)	/* invalid address alignment */
 #define BUS_ADRERR	(__SI_FAULT|2)	/* non-existant physical address */
 #define BUS_OBJERR	(__SI_FAULT|3)	/* object specific hardware error */
-#define NSIGBUS		3
+/* hardware memory error consumed on a machine check: action required */
+#define BUS_MCEERR_AR	(__SI_FAULT|4)
+/* hardware memory error detected in process but not consumed: action optional*/
+#define BUS_MCEERR_AO	(__SI_FAULT|5)
+#define NSIGBUS		5
 
 /*
  * SIGTRAP si_codes
-- 
cgit v1.2.3


From d1737fdbec7f90edc52dd0c5c3767457f28e78d8 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:06 +0200
Subject: HWPOISON: Add basic support for poisoned pages in fault handler v3

- Add a new VM_FAULT_HWPOISON error code to handle_mm_fault. Right now
architectures have to explicitely enable poison page support, so
this is forward compatible to all architectures. They only need
to add it when they enable poison page support.
- Add poison page handling in swap in fault code

v2: Add missing delayacct_clear_flag (Hidehiro Kawai)
v3: Really use delayacct_clear_flag (Hidehiro Kawai)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  3 ++-
 mm/memory.c        | 18 +++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc78e6b8..082b68cb5ffe 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -685,11 +685,12 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_SIGBUS	0x0002
 #define VM_FAULT_MAJOR	0x0004
 #define VM_FAULT_WRITE	0x0008	/* Special case for get_user_pages */
+#define VM_FAULT_HWPOISON 0x0010	/* Hit poisoned page */
 
 #define VM_FAULT_NOPAGE	0x0100	/* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED	0x0200	/* ->fault locked the returned page */
 
-#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS)
+#define VM_FAULT_ERROR	(VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
 
 /*
  * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
diff --git a/mm/memory.c b/mm/memory.c
index aede2ce3aba4..02bae2d540d4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1319,7 +1319,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 				if (ret & VM_FAULT_ERROR) {
 					if (ret & VM_FAULT_OOM)
 						return i ? i : -ENOMEM;
-					else if (ret & VM_FAULT_SIGBUS)
+					if (ret &
+					    (VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
 						return i ? i : -EFAULT;
 					BUG();
 				}
@@ -2511,8 +2512,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		goto out;
 
 	entry = pte_to_swp_entry(orig_pte);
-	if (is_migration_entry(entry)) {
-		migration_entry_wait(mm, pmd, address);
+	if (unlikely(non_swap_entry(entry))) {
+		if (is_migration_entry(entry)) {
+			migration_entry_wait(mm, pmd, address);
+		} else if (is_hwpoison_entry(entry)) {
+			ret = VM_FAULT_HWPOISON;
+		} else {
+			print_bad_pte(vma, address, orig_pte, NULL);
+			ret = VM_FAULT_OOM;
+		}
 		goto out;
 	}
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
@@ -2536,6 +2544,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
 		count_vm_event(PGMAJFAULT);
+	} else if (PageHWPoison(page)) {
+		ret = VM_FAULT_HWPOISON;
+		delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+		goto out;
 	}
 
 	lock_page(page);
-- 
cgit v1.2.3


From 14fa31b89c5ae79e4131da41761378a6df674352 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:10 +0200
Subject: HWPOISON: Use bitmask/action code for try_to_unmap behaviour

try_to_unmap currently has multiple modi (migration, munlock, normal unmap)
which are selected by magic flag variables. The logic is not very straight
forward, because each of these flag change multiple behaviours (e.g.
migration turns off aging, not only sets up migration ptes etc.)
Also the different flags interact in magic ways.

A later patch in this series adds another mode to try_to_unmap, so
this becomes quickly unmanageable.

Replace the different flags with a action code (migration, munlock, munmap)
and some additional flags as modifiers (ignore mlock, ignore aging).
This makes the logic more straight forward and allows easier extension
to new behaviours. Change all the caller to declare what they want to
do.

This patch is supposed to be a nop in behaviour. If anyone can prove
it is not that would be a bug.

Cc: Lee.Schermerhorn@hp.com
Cc: npiggin@suse.de

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 13 ++++++++++++-
 mm/migrate.c         |  2 +-
 mm/rmap.c            | 40 ++++++++++++++++++++++------------------
 mm/vmscan.c          |  2 +-
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 8dff2ffab82c..4c4a2d4d289e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -85,7 +85,18 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma,
  */
 int page_referenced(struct page *, int is_locked,
 			struct mem_cgroup *cnt, unsigned long *vm_flags);
-int try_to_unmap(struct page *, int ignore_refs);
+enum ttu_flags {
+	TTU_UNMAP = 0,			/* unmap mode */
+	TTU_MIGRATION = 1,		/* migration mode */
+	TTU_MUNLOCK = 2,		/* munlock mode */
+	TTU_ACTION_MASK = 0xff,
+
+	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
+	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
+};
+#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
+
+int try_to_unmap(struct page *, enum ttu_flags flags);
 
 /*
  * Called from mm/filemap_xip.c to unmap empty zero page
diff --git a/mm/migrate.c b/mm/migrate.c
index 939888f9ddab..e3a0cd3859a9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -669,7 +669,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
 	}
 
 	/* Establish migration ptes or remove ptes */
-	try_to_unmap(page, 1);
+	try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
 
 	if (!page_mapped(page))
 		rc = move_to_new_page(newpage, page);
diff --git a/mm/rmap.c b/mm/rmap.c
index 5a35c030e779..08c112a776a7 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -774,7 +774,7 @@ void page_remove_rmap(struct page *page)
  * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
  */
 static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-				int migration)
+				enum ttu_flags flags)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -796,11 +796,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	 * If it's recently referenced (perhaps page_referenced
 	 * skipped over this mm) then we should reactivate it.
 	 */
-	if (!migration) {
+	if (!(flags & TTU_IGNORE_MLOCK)) {
 		if (vma->vm_flags & VM_LOCKED) {
 			ret = SWAP_MLOCK;
 			goto out_unmap;
 		}
+	}
+	if (!(flags & TTU_IGNORE_ACCESS)) {
 		if (ptep_clear_flush_young_notify(vma, address, pte)) {
 			ret = SWAP_FAIL;
 			goto out_unmap;
@@ -840,12 +842,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
-			BUG_ON(!migration);
+			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
 		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
 		BUG_ON(pte_file(*pte));
-	} else if (PAGE_MIGRATION && migration) {
+	} else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
 		/* Establish migration entry for a file page */
 		swp_entry_t entry;
 		entry = make_migration_entry(page, pte_write(pteval));
@@ -1014,12 +1016,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_anon(struct page *page, int unlock, int migration)
+static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
 {
 	struct anon_vma *anon_vma;
 	struct vm_area_struct *vma;
 	unsigned int mlocked = 0;
 	int ret = SWAP_AGAIN;
+	int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
 	if (MLOCK_PAGES && unlikely(unlock))
 		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
@@ -1035,7 +1038,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 				continue;  /* must visit all unlocked vmas */
 			ret = SWAP_MLOCK;  /* saw at least one mlocked vma */
 		} else {
-			ret = try_to_unmap_one(page, vma, migration);
+			ret = try_to_unmap_one(page, vma, flags);
 			if (ret == SWAP_FAIL || !page_mapped(page))
 				break;
 		}
@@ -1059,8 +1062,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
 /**
  * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
  * @page: the page to unmap/unlock
- * @unlock:  request for unlock rather than unmap [unlikely]
- * @migration:  unmapping for migration - ignored if @unlock
+ * @flags: action and flags
  *
  * Find all the mappings of a page using the mapping pointer and the vma chains
  * contained in the address_space struct it points to.
@@ -1072,7 +1074,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
  * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
  * 'LOCKED.
  */
-static int try_to_unmap_file(struct page *page, int unlock, int migration)
+static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 {
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
@@ -1084,6 +1086,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 	unsigned long max_nl_size = 0;
 	unsigned int mapcount;
 	unsigned int mlocked = 0;
+	int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
 
 	if (MLOCK_PAGES && unlikely(unlock))
 		ret = SWAP_SUCCESS;	/* default for try_to_munlock() */
@@ -1096,7 +1099,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 				continue;	/* must visit all vmas */
 			ret = SWAP_MLOCK;
 		} else {
-			ret = try_to_unmap_one(page, vma, migration);
+			ret = try_to_unmap_one(page, vma, flags);
 			if (ret == SWAP_FAIL || !page_mapped(page))
 				goto out;
 		}
@@ -1121,7 +1124,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 			ret = SWAP_MLOCK;	/* leave mlocked == 0 */
 			goto out;		/* no need to look further */
 		}
-		if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED))
+		if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
+			(vma->vm_flags & VM_LOCKED))
 			continue;
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
@@ -1155,7 +1159,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
 						shared.vm_set.list) {
-			if (!MLOCK_PAGES && !migration &&
+			if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
 			    (vma->vm_flags & VM_LOCKED))
 				continue;
 			cursor = (unsigned long) vma->vm_private_data;
@@ -1195,7 +1199,7 @@ out:
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
- * @migration: migration flag
+ * @flags: action and flags
  *
  * Tries to remove all the page table entries which are mapping this
  * page, used in the pageout path.  Caller must hold the page lock.
@@ -1206,16 +1210,16 @@ out:
  * SWAP_FAIL	- the page is unswappable
  * SWAP_MLOCK	- page is mlocked.
  */
-int try_to_unmap(struct page *page, int migration)
+int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
 	int ret;
 
 	BUG_ON(!PageLocked(page));
 
 	if (PageAnon(page))
-		ret = try_to_unmap_anon(page, 0, migration);
+		ret = try_to_unmap_anon(page, flags);
 	else
-		ret = try_to_unmap_file(page, 0, migration);
+		ret = try_to_unmap_file(page, flags);
 	if (ret != SWAP_MLOCK && !page_mapped(page))
 		ret = SWAP_SUCCESS;
 	return ret;
@@ -1240,8 +1244,8 @@ int try_to_munlock(struct page *page)
 	VM_BUG_ON(!PageLocked(page) || PageLRU(page));
 
 	if (PageAnon(page))
-		return try_to_unmap_anon(page, 1, 0);
+		return try_to_unmap_anon(page, TTU_MUNLOCK);
 	else
-		return try_to_unmap_file(page, 1, 0);
+		return try_to_unmap_file(page, TTU_MUNLOCK);
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index ba8228e0a806..ab3b0ad3ce52 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -659,7 +659,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		 * processes. Try to unmap it here.
 		 */
 		if (page_mapped(page) && mapping) {
-			switch (try_to_unmap(page, 0)) {
+			switch (try_to_unmap(page, TTU_UNMAP)) {
 			case SWAP_FAIL:
 				goto activate_locked;
 			case SWAP_AGAIN:
-- 
cgit v1.2.3


From 888b9f7c58ebe8303bad817cd554df887a683957 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 16 Sep 2009 11:50:11 +0200
Subject: HWPOISON: Handle hardware poisoned pages in try_to_unmap

When a page has the poison bit set replace the PTE with a poison entry.
This causes the right error handling to be done later when a process runs
into it.

v2: add a new flag to not do that (needed for the memory-failure handler
later) (Fengguang)
v3: remove unnecessary is_migration_entry() test (Fengguang, Minchan)

Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rmap.h | 1 +
 mm/rmap.c            | 9 ++++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 4c4a2d4d289e..ce989f1fc2ed 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -93,6 +93,7 @@ enum ttu_flags {
 
 	TTU_IGNORE_MLOCK = (1 << 8),	/* ignore mlock */
 	TTU_IGNORE_ACCESS = (1 << 9),	/* don't age */
+	TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
 };
 #define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 08c112a776a7..7e72ca19d68b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	/* Update high watermark before we lower rss */
 	update_hiwater_rss(mm);
 
-	if (PageAnon(page)) {
+	if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
+		if (PageAnon(page))
+			dec_mm_counter(mm, anon_rss);
+		else
+			dec_mm_counter(mm, file_rss);
+		set_pte_at(mm, address, pte,
+				swp_entry_to_pte(make_hwpoison_entry(page)));
+	} else if (PageAnon(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
 
 		if (PageSwapCache(page)) {
-- 
cgit v1.2.3


From 750b4987b0cd4d408e54cb83a80a067cbe690feb Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 16 Sep 2009 11:50:12 +0200
Subject: HWPOISON: Refactor truncate to allow direct truncating of page v2

Extract out truncate_inode_page() out of the truncate path so that
it can be used by memory-failure.c

[AK: description, headers, fix typos]
v2: Some white space changes from Fengguang Wu

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  2 ++
 mm/truncate.c      | 29 +++++++++++++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 082b68cb5ffe..8cbc0aafd5bd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -794,6 +794,8 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
+int truncate_inode_page(struct address_space *mapping, struct page *page);
+
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..2519a7c92873 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
  */
-static void
+static int
 truncate_complete_page(struct address_space *mapping, struct page *page)
 {
 	if (page->mapping != mapping)
-		return;
+		return -EIO;
 
 	if (page_has_private(page))
 		do_invalidatepage(page, 0);
@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
 	remove_from_page_cache(page);
 	ClearPageMappedToDisk(page);
 	page_cache_release(page);	/* pagecache ref */
+	return 0;
 }
 
 /*
@@ -135,6 +136,16 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 	return ret;
 }
 
+int truncate_inode_page(struct address_space *mapping, struct page *page)
+{
+	if (page_mapped(page)) {
+		unmap_mapping_range(mapping,
+				   (loff_t)page->index << PAGE_CACHE_SHIFT,
+				   PAGE_CACHE_SIZE, 0);
+	}
+	return truncate_complete_page(mapping, page);
+}
+
 /**
  * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
@@ -196,12 +207,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				unlock_page(page);
 				continue;
 			}
-			if (page_mapped(page)) {
-				unmap_mapping_range(mapping,
-				  (loff_t)page_index<<PAGE_CACHE_SHIFT,
-				  PAGE_CACHE_SIZE, 0);
-			}
-			truncate_complete_page(mapping, page);
+			truncate_inode_page(mapping, page);
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
@@ -238,15 +244,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				break;
 			lock_page(page);
 			wait_on_page_writeback(page);
-			if (page_mapped(page)) {
-				unmap_mapping_range(mapping,
-				  (loff_t)page->index<<PAGE_CACHE_SHIFT,
-				  PAGE_CACHE_SIZE, 0);
-			}
+			truncate_inode_page(mapping, page);
 			if (page->index > next)
 				next = page->index;
 			next++;
-			truncate_complete_page(mapping, page);
 			unlock_page(page);
 		}
 		pagevec_release(&pvec);
-- 
cgit v1.2.3


From 83f786680aec8d030184f7ced1a0a3dd8ac81764 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Wed, 16 Sep 2009 11:50:13 +0200
Subject: HWPOISON: Add invalidate_inode_page

Add a simple way to invalidate a single page
This is just a refactoring of the truncate.c code.
Originally from Fengguang, modified by Andi Kleen.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/mm.h |  2 ++
 mm/truncate.c      | 26 ++++++++++++++++++++------
 2 files changed, 22 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8cbc0aafd5bd..b05bbde0296d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -796,6 +796,8 @@ extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 
+int invalidate_inode_page(struct page *page);
+
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, unsigned int flags);
diff --git a/mm/truncate.c b/mm/truncate.c
index 2519a7c92873..ea132f7ea2d2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -146,6 +146,24 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 	return truncate_complete_page(mapping, page);
 }
 
+/*
+ * Safely invalidate one page from its pagecache mapping.
+ * It only drops clean, unused pages. The page must be locked.
+ *
+ * Returns 1 if the page is successfully invalidated, otherwise 0.
+ */
+int invalidate_inode_page(struct page *page)
+{
+	struct address_space *mapping = page_mapping(page);
+	if (!mapping)
+		return 0;
+	if (PageDirty(page) || PageWriteback(page))
+		return 0;
+	if (page_mapped(page))
+		return 0;
+	return invalidate_complete_page(mapping, page);
+}
+
 /**
  * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
  * @mapping: mapping to truncate
@@ -312,12 +330,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (lock_failed)
 				continue;
 
-			if (PageDirty(page) || PageWriteback(page))
-				goto unlock;
-			if (page_mapped(page))
-				goto unlock;
-			ret += invalidate_complete_page(mapping, page);
-unlock:
+			ret += invalidate_inode_page(page);
+
 			unlock_page(page);
 			if (next > end)
 				break;
-- 
cgit v1.2.3


From 257187362123f15d9d1e09918cf87cebbea4e786 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:13 +0200
Subject: HWPOISON: Define a new error_remove_page address space op for async
 truncation

Truncating metadata pages is not safe right now before
we haven't audited all file systems.

To enable truncation only for data address space define
a new address_space callback error_remove_page.

This is used for memory_failure.c memory error handling.

This can be then set to truncate_inode_page()

This patch just defines the new operation and adds documentation.

Callers and users come in followon patches.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 Documentation/filesystems/vfs.txt |  7 +++++++
 include/linux/fs.h                |  1 +
 include/linux/mm.h                |  1 +
 mm/truncate.c                     | 17 +++++++++++++++++
 4 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f49eecf2e573..623f094c9d8d 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -536,6 +536,7 @@ struct address_space_operations {
 	/* migrate the contents of a page to the specified target */
 	int (*migratepage) (struct page *, struct page *);
 	int (*launder_page) (struct page *);
+	int (*error_remove_page) (struct mapping *mapping, struct page *page);
 };
 
   writepage: called by the VM to write a dirty page to backing store.
@@ -694,6 +695,12 @@ struct address_space_operations {
   	prevent redirtying the page, it is kept locked during the whole
 	operation.
 
+  error_remove_page: normally set to generic_error_remove_page if truncation
+	is ok for this address space. Used for memory failure handling.
+	Setting this implies you deal with pages going away under you,
+	unless you have them locked or reference counts increased.
+
+
 The File Object
 ===============
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b21cf6b9c80b..4f47afd37647 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -595,6 +595,7 @@ struct address_space_operations {
 	int (*launder_page) (struct page *);
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
+	int (*error_remove_page)(struct address_space *, struct page *);
 };
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b05bbde0296d..a16018f7d61c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -795,6 +795,7 @@ extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 
 int truncate_inode_page(struct address_space *mapping, struct page *page);
+int generic_error_remove_page(struct address_space *mapping, struct page *page);
 
 int invalidate_inode_page(struct page *page);
 
diff --git a/mm/truncate.c b/mm/truncate.c
index ea132f7ea2d2..a17b3977cfdf 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -146,6 +146,23 @@ int truncate_inode_page(struct address_space *mapping, struct page *page)
 	return truncate_complete_page(mapping, page);
 }
 
+/*
+ * Used to get rid of pages on hardware memory corruption.
+ */
+int generic_error_remove_page(struct address_space *mapping, struct page *page)
+{
+	if (!mapping)
+		return -EINVAL;
+	/*
+	 * Only punch for normal data pages for now.
+	 * Handling other types like directories would need more auditing.
+	 */
+	if (!S_ISREG(mapping->host->i_mode))
+		return -EIO;
+	return truncate_inode_page(mapping, page);
+}
+EXPORT_SYMBOL(generic_error_remove_page);
+
 /*
  * Safely invalidate one page from its pagecache mapping.
  * It only drops clean, unused pages. The page must be locked.
-- 
cgit v1.2.3


From 4db96cf077aa938b11fe7ac79ecc9b29ec00fbab Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:14 +0200
Subject: HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per
 process

This allows processes to override their early/late kill
behaviour on hardware memory errors.

Typically applications which are memory error aware is
better of with early kill (see the error as soon
as possible), all others with late kill (only
see the error when the error is really impacting execution)

There's a global sysctl, but this way an application
can set its specific policy.

We're using two bits, one to signify that the process
stated its intention and that

I also made the prctl future proof by enforcing
the unused arguments are 0.

The state is inherited to children.

Note this makes us officially run out of process flags
on 32bit, but the next patch can easily add another field.

Manpage patch will be supplied separately.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/prctl.h |  2 ++
 include/linux/sched.h |  2 ++
 kernel/sys.c          | 22 ++++++++++++++++++++++
 3 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c79c63..3dc303197e67 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -88,4 +88,6 @@
 #define PR_TASK_PERF_COUNTERS_DISABLE		31
 #define PR_TASK_PERF_COUNTERS_ENABLE		32
 
+#define PR_MCE_KILL	33
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..29eae73c951d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1687,6 +1687,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
 #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
 #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
+#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
 #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
 #define PF_DUMPCORE	0x00000200	/* dumped core */
 #define PF_SIGNALED	0x00000400	/* killed by a signal */
@@ -1706,6 +1707,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
 #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
 #define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
+#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097c76fa..41e02eff3398 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1528,6 +1528,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 				current->timer_slack_ns = arg2;
 			error = 0;
 			break;
+		case PR_MCE_KILL:
+			if (arg4 | arg5)
+				return -EINVAL;
+			switch (arg2) {
+			case 0:
+				if (arg3 != 0)
+					return -EINVAL;
+				current->flags &= ~PF_MCE_PROCESS;
+				break;
+			case 1:
+				current->flags |= PF_MCE_PROCESS;
+				if (arg3 != 0)
+					current->flags |= PF_MCE_EARLY;
+				else
+					current->flags &= ~PF_MCE_EARLY;
+				break;
+			default:
+				return -EINVAL;
+			}
+			error = 0;
+			break;
+
 		default:
 			error = -EINVAL;
 			break;
-- 
cgit v1.2.3


From 6a46079cf57a7f7758e8b926980a4f852f89b34d Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:15 +0200
Subject: HWPOISON: The high level memory error handler in the VM v7

Add the high level memory handler that poisons pages
that got corrupted by hardware (typically by a two bit flip in a DIMM
or a cache) on the Linux level. The goal is to prevent everyone
from accessing these pages in the future.

This done at the VM level by marking a page hwpoisoned
and doing the appropriate action based on the type of page
it is.

The code that does this is portable and lives in mm/memory-failure.c

To quote the overview comment:

High level machine check handler. Handles pages reported by the
hardware as being corrupted usually due to a 2bit ECC memory or cache
failure.

This focuses on pages detected as corrupted in the background.
When the current CPU tries to consume corruption the currently
running process can just be killed directly instead. This implies
that if the error cannot be handled for some reason it's safe to
just ignore it because no corruption has been consumed yet. Instead
when that happens another machine check will happen.

Handles page cache pages in various states. The tricky part
here is that we can access any page asynchronous to other VM
users, because memory failures could happen anytime and anywhere,
possibly violating some of their assumptions. This is why this code
has to be extremely careful. Generally it tries to use normal locking
rules, as in get the standard locks, even if that means the
error handling takes potentially a long time.

Some of the operations here are somewhat inefficient and have non
linear algorithmic complexity, because the data structures have not
been optimized for this case. This is in particular the case
for the mapping from a vma to a process. Since this case is expected
to be rare we hope we can get away with this.

There are in principle two strategies to kill processes on poison:
- just unmap the data and wait for an actual reference before
killing
- kill as soon as corruption is detected.
Both have advantages and disadvantages and should be used
in different situations. Right now both are implemented and can
be switched with a new sysctl vm.memory_failure_early_kill
The default is early kill.

The patch does some rmap data structure walking on its own to collect
processes to kill. This is unusual because normally all rmap data structure
knowledge is in rmap.c only. I put it here for now to keep
everything together and rmap knowledge has been seeping out anyways

Includes contributions from Johannes Weiner, Chris Mason, Fengguang Wu,
Nick Piggin (who did a lot of great work) and others.

Cc: npiggin@suse.de
Cc: riel@redhat.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>
---
 Documentation/sysctl/vm.txt |  41 ++-
 fs/proc/meminfo.c           |   9 +-
 include/linux/mm.h          |   7 +
 include/linux/rmap.h        |   1 +
 kernel/sysctl.c             |  25 ++
 mm/Kconfig                  |  10 +
 mm/Makefile                 |   1 +
 mm/filemap.c                |   4 +
 mm/memory-failure.c         | 832 ++++++++++++++++++++++++++++++++++++++++++++
 mm/rmap.c                   |   7 +-
 10 files changed, 934 insertions(+), 3 deletions(-)
 create mode 100644 mm/memory-failure.c

(limited to 'include')

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index c4de6359d440..faf62740aa2c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
 - legacy_va_layout
 - lowmem_reserve_ratio
 - max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
 - min_free_kbytes
 - min_slab_ratio
 - min_unmapped_ratio
@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
 - vfs_cache_pressure
 - zone_reclaim_mode
 
-
 ==============================================================
 
 block_dump
@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
 
 The default value is 65536.
 
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected.  Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
 ==============================================================
 
 min_free_kbytes:
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..78faedcb0a8d 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -95,7 +95,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Committed_AS:   %8lu kB\n"
 		"VmallocTotal:   %8lu kB\n"
 		"VmallocUsed:    %8lu kB\n"
-		"VmallocChunk:   %8lu kB\n",
+		"VmallocChunk:   %8lu kB\n"
+#ifdef CONFIG_MEMORY_FAILURE
+		"HardwareCorrupted: %8lu kB\n"
+#endif
+		,
 		K(i.totalram),
 		K(i.freeram),
 		K(i.bufferram),
@@ -140,6 +144,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		(unsigned long)VMALLOC_TOTAL >> 10,
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
+#ifdef CONFIG_MEMORY_FAILURE
+		,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
+#endif
 		);
 
 	hugetlb_report_meminfo(m);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a16018f7d61c..1ffca03f34b7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1309,5 +1309,12 @@ void vmemmap_populate_print_last(void);
 extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
 				 size_t size);
 extern void refund_locked_memory(struct mm_struct *mm, size_t size);
+
+extern void memory_failure(unsigned long pfn, int trapno);
+extern int __memory_failure(unsigned long pfn, int trapno, int ref);
+extern int sysctl_memory_failure_early_kill;
+extern int sysctl_memory_failure_recovery;
+extern atomic_long_t mce_bad_pages;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index ce989f1fc2ed..3c1004e50747 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -129,6 +129,7 @@ int try_to_munlock(struct page *);
  */
 struct anon_vma *page_lock_anon_vma(struct page *page);
 void page_unlock_anon_vma(struct anon_vma *anon_vma);
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
 
 #else	/* !CONFIG_MMU */
 
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6bb59f707402..eacae77ac9fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1372,6 +1372,31 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &scan_unevictable_handler,
 	},
+#ifdef CONFIG_MEMORY_FAILURE
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "memory_failure_early_kill",
+		.data		= &sysctl_memory_failure_early_kill,
+		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "memory_failure_recovery",
+		.data		= &sysctl_memory_failure_recovery,
+		.maxlen		= sizeof(sysctl_memory_failure_recovery),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
+#endif
+
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e18..ea2d8b61c631 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -233,6 +233,16 @@ config DEFAULT_MMAP_MIN_ADDR
 	  /proc/sys/vm/mmap_min_addr tunable.
 
 
+config MEMORY_FAILURE
+	depends on MMU
+	depends on X86_MCE
+	bool "Enable recovery from hardware memory errors"
+	help
+	  Enables code to recover from some memory failures on systems
+	  with MCA recovery. This allows a system to continue running
+	  even when some of its memory has uncorrected errors. This requires
+	  special hardware support and typically ECC memory.
+
 config NOMMU_INITIAL_TRIM_EXCESS
 	int "Turn on mmap() excess space trimming before booting"
 	depends on !MMU
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd3960..dc2551e7d006 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -40,5 +40,6 @@ obj-$(CONFIG_SMP) += allocpercpu.o
 endif
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e2b86..75575c392167 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -104,6 +104,10 @@
  *
  *  ->task->proc_lock
  *    ->dcache_lock		(proc_pid_lookup)
+ *
+ *  (code doesn't rely on that order, so you could switch it around)
+ *  ->tasklist_lock             (memory_failure, collect_procs_ao)
+ *    ->i_mmap_lock
  */
 
 /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
new file mode 100644
index 000000000000..729d4b15b645
--- /dev/null
+++ b/mm/memory-failure.c
@@ -0,0 +1,832 @@
+/*
+ * Copyright (C) 2008, 2009 Intel Corporation
+ * Authors: Andi Kleen, Fengguang Wu
+ *
+ * This software may be redistributed and/or modified under the terms of
+ * the GNU General Public License ("GPL") version 2 only as published by the
+ * Free Software Foundation.
+ *
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * Handles page cache pages in various states.	The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * The operation to map back from RMAP chains to processes has to walk
+ * the complete process list and has non linear complexity with the number
+ * mappings. In short it can be quite slow. But since memory corruptions
+ * are rare we hope to get away with this.
+ */
+
+/*
+ * Notebook:
+ * - hugetlb needs more code
+ * - kcore/oldmem/vmcore/mem/kmem check for hwpoison pages
+ * - pass bad pages to kdump next kernel
+ */
+#define DEBUG 1		/* remove me in 2.6.34 */
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/page-flags.h>
+#include <linux/sched.h>
+#include <linux/rmap.h>
+#include <linux/pagemap.h>
+#include <linux/swap.h>
+#include <linux/backing-dev.h>
+#include "internal.h"
+
+int sysctl_memory_failure_early_kill __read_mostly = 0;
+
+int sysctl_memory_failure_recovery __read_mostly = 1;
+
+atomic_long_t mce_bad_pages __read_mostly = ATOMIC_LONG_INIT(0);
+
+/*
+ * Send all the processes who have the page mapped an ``action optional''
+ * signal.
+ */
+static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
+			unsigned long pfn)
+{
+	struct siginfo si;
+	int ret;
+
+	printk(KERN_ERR
+		"MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+		pfn, t->comm, t->pid);
+	si.si_signo = SIGBUS;
+	si.si_errno = 0;
+	si.si_code = BUS_MCEERR_AO;
+	si.si_addr = (void *)addr;
+#ifdef __ARCH_SI_TRAPNO
+	si.si_trapno = trapno;
+#endif
+	si.si_addr_lsb = PAGE_SHIFT;
+	/*
+	 * Don't use force here, it's convenient if the signal
+	 * can be temporarily blocked.
+	 * This could cause a loop when the user sets SIGBUS
+	 * to SIG_IGN, but hopefully noone will do that?
+	 */
+	ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */
+	if (ret < 0)
+		printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
+		       t->comm, t->pid, ret);
+	return ret;
+}
+
+/*
+ * Kill all processes that have a poisoned page mapped and then isolate
+ * the page.
+ *
+ * General strategy:
+ * Find all processes having the page mapped and kill them.
+ * But we keep a page reference around so that the page is not
+ * actually freed yet.
+ * Then stash the page away
+ *
+ * There's no convenient way to get back to mapped processes
+ * from the VMAs. So do a brute-force search over all
+ * running processes.
+ *
+ * Remember that machine checks are not common (or rather
+ * if they are common you have other problems), so this shouldn't
+ * be a performance issue.
+ *
+ * Also there are some races possible while we get from the
+ * error detection to actually handle it.
+ */
+
+struct to_kill {
+	struct list_head nd;
+	struct task_struct *tsk;
+	unsigned long addr;
+	unsigned addr_valid:1;
+};
+
+/*
+ * Failure handling: if we can't find or can't kill a process there's
+ * not much we can do.	We just print a message and ignore otherwise.
+ */
+
+/*
+ * Schedule a process for later kill.
+ * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
+ * TBD would GFP_NOIO be enough?
+ */
+static void add_to_kill(struct task_struct *tsk, struct page *p,
+		       struct vm_area_struct *vma,
+		       struct list_head *to_kill,
+		       struct to_kill **tkc)
+{
+	struct to_kill *tk;
+
+	if (*tkc) {
+		tk = *tkc;
+		*tkc = NULL;
+	} else {
+		tk = kmalloc(sizeof(struct to_kill), GFP_ATOMIC);
+		if (!tk) {
+			printk(KERN_ERR
+		"MCE: Out of memory while machine check handling\n");
+			return;
+		}
+	}
+	tk->addr = page_address_in_vma(p, vma);
+	tk->addr_valid = 1;
+
+	/*
+	 * In theory we don't have to kill when the page was
+	 * munmaped. But it could be also a mremap. Since that's
+	 * likely very rare kill anyways just out of paranoia, but use
+	 * a SIGKILL because the error is not contained anymore.
+	 */
+	if (tk->addr == -EFAULT) {
+		pr_debug("MCE: Unable to find user space address %lx in %s\n",
+			page_to_pfn(p), tsk->comm);
+		tk->addr_valid = 0;
+	}
+	get_task_struct(tsk);
+	tk->tsk = tsk;
+	list_add_tail(&tk->nd, to_kill);
+}
+
+/*
+ * Kill the processes that have been collected earlier.
+ *
+ * Only do anything when DOIT is set, otherwise just free the list
+ * (this is used for clean pages which do not need killing)
+ * Also when FAIL is set do a force kill because something went
+ * wrong earlier.
+ */
+static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
+			  int fail, unsigned long pfn)
+{
+	struct to_kill *tk, *next;
+
+	list_for_each_entry_safe (tk, next, to_kill, nd) {
+		if (doit) {
+			/*
+			 * In case something went wrong with munmaping
+			 * make sure the process doesn't catch the
+			 * signal and then access the memory. Just kill it.
+			 * the signal handlers
+			 */
+			if (fail || tk->addr_valid == 0) {
+				printk(KERN_ERR
+		"MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
+					pfn, tk->tsk->comm, tk->tsk->pid);
+				force_sig(SIGKILL, tk->tsk);
+			}
+
+			/*
+			 * In theory the process could have mapped
+			 * something else on the address in-between. We could
+			 * check for that, but we need to tell the
+			 * process anyways.
+			 */
+			else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
+					      pfn) < 0)
+				printk(KERN_ERR
+		"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
+					pfn, tk->tsk->comm, tk->tsk->pid);
+		}
+		put_task_struct(tk->tsk);
+		kfree(tk);
+	}
+}
+
+static int task_early_kill(struct task_struct *tsk)
+{
+	if (!tsk->mm)
+		return 0;
+	if (tsk->flags & PF_MCE_PROCESS)
+		return !!(tsk->flags & PF_MCE_EARLY);
+	return sysctl_memory_failure_early_kill;
+}
+
+/*
+ * Collect processes when the error hit an anonymous page.
+ */
+static void collect_procs_anon(struct page *page, struct list_head *to_kill,
+			      struct to_kill **tkc)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+	struct anon_vma *av;
+
+	read_lock(&tasklist_lock);
+	av = page_lock_anon_vma(page);
+	if (av == NULL)	/* Not actually mapped anymore */
+		goto out;
+	for_each_process (tsk) {
+		if (!task_early_kill(tsk))
+			continue;
+		list_for_each_entry (vma, &av->head, anon_vma_node) {
+			if (!page_mapped_in_vma(page, vma))
+				continue;
+			if (vma->vm_mm == tsk->mm)
+				add_to_kill(tsk, page, vma, to_kill, tkc);
+		}
+	}
+	page_unlock_anon_vma(av);
+out:
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect processes when the error hit a file mapped page.
+ */
+static void collect_procs_file(struct page *page, struct list_head *to_kill,
+			      struct to_kill **tkc)
+{
+	struct vm_area_struct *vma;
+	struct task_struct *tsk;
+	struct prio_tree_iter iter;
+	struct address_space *mapping = page->mapping;
+
+	/*
+	 * A note on the locking order between the two locks.
+	 * We don't rely on this particular order.
+	 * If you have some other code that needs a different order
+	 * feel free to switch them around. Or add a reverse link
+	 * from mm_struct to task_struct, then this could be all
+	 * done without taking tasklist_lock and looping over all tasks.
+	 */
+
+	read_lock(&tasklist_lock);
+	spin_lock(&mapping->i_mmap_lock);
+	for_each_process(tsk) {
+		pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+
+		if (!task_early_kill(tsk))
+			continue;
+
+		vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+				      pgoff) {
+			/*
+			 * Send early kill signal to tasks where a vma covers
+			 * the page but the corrupted page is not necessarily
+			 * mapped it in its pte.
+			 * Assume applications who requested early kill want
+			 * to be informed of all such data corruptions.
+			 */
+			if (vma->vm_mm == tsk->mm)
+				add_to_kill(tsk, page, vma, to_kill, tkc);
+		}
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+	read_unlock(&tasklist_lock);
+}
+
+/*
+ * Collect the processes who have the corrupted page mapped to kill.
+ * This is done in two steps for locking reasons.
+ * First preallocate one tokill structure outside the spin locks,
+ * so that we can kill at least one process reasonably reliable.
+ */
+static void collect_procs(struct page *page, struct list_head *tokill)
+{
+	struct to_kill *tk;
+
+	if (!page->mapping)
+		return;
+
+	tk = kmalloc(sizeof(struct to_kill), GFP_NOIO);
+	if (!tk)
+		return;
+	if (PageAnon(page))
+		collect_procs_anon(page, tokill, &tk);
+	else
+		collect_procs_file(page, tokill, &tk);
+	kfree(tk);
+}
+
+/*
+ * Error handlers for various types of pages.
+ */
+
+enum outcome {
+	FAILED,		/* Error handling failed */
+	DELAYED,	/* Will be handled later */
+	IGNORED,	/* Error safely ignored */
+	RECOVERED,	/* Successfully recovered */
+};
+
+static const char *action_name[] = {
+	[FAILED] = "Failed",
+	[DELAYED] = "Delayed",
+	[IGNORED] = "Ignored",
+	[RECOVERED] = "Recovered",
+};
+
+/*
+ * Error hit kernel page.
+ * Do nothing, try to be lucky and not touch this instead. For a few cases we
+ * could be more sophisticated.
+ */
+static int me_kernel(struct page *p, unsigned long pfn)
+{
+	return DELAYED;
+}
+
+/*
+ * Already poisoned page.
+ */
+static int me_ignore(struct page *p, unsigned long pfn)
+{
+	return IGNORED;
+}
+
+/*
+ * Page in unknown state. Do nothing.
+ */
+static int me_unknown(struct page *p, unsigned long pfn)
+{
+	printk(KERN_ERR "MCE %#lx: Unknown page state\n", pfn);
+	return FAILED;
+}
+
+/*
+ * Free memory
+ */
+static int me_free(struct page *p, unsigned long pfn)
+{
+	return DELAYED;
+}
+
+/*
+ * Clean (or cleaned) page cache page.
+ */
+static int me_pagecache_clean(struct page *p, unsigned long pfn)
+{
+	int err;
+	int ret = FAILED;
+	struct address_space *mapping;
+
+	if (!isolate_lru_page(p))
+		page_cache_release(p);
+
+	/*
+	 * For anonymous pages we're done the only reference left
+	 * should be the one m_f() holds.
+	 */
+	if (PageAnon(p))
+		return RECOVERED;
+
+	/*
+	 * Now truncate the page in the page cache. This is really
+	 * more like a "temporary hole punch"
+	 * Don't do this for block devices when someone else
+	 * has a reference, because it could be file system metadata
+	 * and that's not safe to truncate.
+	 */
+	mapping = page_mapping(p);
+	if (!mapping) {
+		/*
+		 * Page has been teared down in the meanwhile
+		 */
+		return FAILED;
+	}
+
+	/*
+	 * Truncation is a bit tricky. Enable it per file system for now.
+	 *
+	 * Open: to take i_mutex or not for this? Right now we don't.
+	 */
+	if (mapping->a_ops->error_remove_page) {
+		err = mapping->a_ops->error_remove_page(mapping, p);
+		if (err != 0) {
+			printk(KERN_INFO "MCE %#lx: Failed to punch page: %d\n",
+					pfn, err);
+		} else if (page_has_private(p) &&
+				!try_to_release_page(p, GFP_NOIO)) {
+			pr_debug("MCE %#lx: failed to release buffers\n", pfn);
+		} else {
+			ret = RECOVERED;
+		}
+	} else {
+		/*
+		 * If the file system doesn't support it just invalidate
+		 * This fails on dirty or anything with private pages
+		 */
+		if (invalidate_inode_page(p))
+			ret = RECOVERED;
+		else
+			printk(KERN_INFO "MCE %#lx: Failed to invalidate\n",
+				pfn);
+	}
+	return ret;
+}
+
+/*
+ * Dirty cache page page
+ * Issues: when the error hit a hole page the error is not properly
+ * propagated.
+ */
+static int me_pagecache_dirty(struct page *p, unsigned long pfn)
+{
+	struct address_space *mapping = page_mapping(p);
+
+	SetPageError(p);
+	/* TBD: print more information about the file. */
+	if (mapping) {
+		/*
+		 * IO error will be reported by write(), fsync(), etc.
+		 * who check the mapping.
+		 * This way the application knows that something went
+		 * wrong with its dirty file data.
+		 *
+		 * There's one open issue:
+		 *
+		 * The EIO will be only reported on the next IO
+		 * operation and then cleared through the IO map.
+		 * Normally Linux has two mechanisms to pass IO error
+		 * first through the AS_EIO flag in the address space
+		 * and then through the PageError flag in the page.
+		 * Since we drop pages on memory failure handling the
+		 * only mechanism open to use is through AS_AIO.
+		 *
+		 * This has the disadvantage that it gets cleared on
+		 * the first operation that returns an error, while
+		 * the PageError bit is more sticky and only cleared
+		 * when the page is reread or dropped.  If an
+		 * application assumes it will always get error on
+		 * fsync, but does other operations on the fd before
+		 * and the page is dropped inbetween then the error
+		 * will not be properly reported.
+		 *
+		 * This can already happen even without hwpoisoned
+		 * pages: first on metadata IO errors (which only
+		 * report through AS_EIO) or when the page is dropped
+		 * at the wrong time.
+		 *
+		 * So right now we assume that the application DTRT on
+		 * the first EIO, but we're not worse than other parts
+		 * of the kernel.
+		 */
+		mapping_set_error(mapping, EIO);
+	}
+
+	return me_pagecache_clean(p, pfn);
+}
+
+/*
+ * Clean and dirty swap cache.
+ *
+ * Dirty swap cache page is tricky to handle. The page could live both in page
+ * cache and swap cache(ie. page is freshly swapped in). So it could be
+ * referenced concurrently by 2 types of PTEs:
+ * normal PTEs and swap PTEs. We try to handle them consistently by calling
+ * try_to_unmap(TTU_IGNORE_HWPOISON) to convert the normal PTEs to swap PTEs,
+ * and then
+ *      - clear dirty bit to prevent IO
+ *      - remove from LRU
+ *      - but keep in the swap cache, so that when we return to it on
+ *        a later page fault, we know the application is accessing
+ *        corrupted data and shall be killed (we installed simple
+ *        interception code in do_swap_page to catch it).
+ *
+ * Clean swap cache pages can be directly isolated. A later page fault will
+ * bring in the known good data from disk.
+ */
+static int me_swapcache_dirty(struct page *p, unsigned long pfn)
+{
+	int ret = FAILED;
+
+	ClearPageDirty(p);
+	/* Trigger EIO in shmem: */
+	ClearPageUptodate(p);
+
+	if (!isolate_lru_page(p)) {
+		page_cache_release(p);
+		ret = DELAYED;
+	}
+
+	return ret;
+}
+
+static int me_swapcache_clean(struct page *p, unsigned long pfn)
+{
+	int ret = FAILED;
+
+	if (!isolate_lru_page(p)) {
+		page_cache_release(p);
+		ret = RECOVERED;
+	}
+	delete_from_swap_cache(p);
+	return ret;
+}
+
+/*
+ * Huge pages. Needs work.
+ * Issues:
+ * No rmap support so we cannot find the original mapper. In theory could walk
+ * all MMs and look for the mappings, but that would be non atomic and racy.
+ * Need rmap for hugepages for this. Alternatively we could employ a heuristic,
+ * like just walking the current process and hoping it has it mapped (that
+ * should be usually true for the common "shared database cache" case)
+ * Should handle free huge pages and dequeue them too, but this needs to
+ * handle huge page accounting correctly.
+ */
+static int me_huge_page(struct page *p, unsigned long pfn)
+{
+	return FAILED;
+}
+
+/*
+ * Various page states we can handle.
+ *
+ * A page state is defined by its current page->flags bits.
+ * The table matches them in order and calls the right handler.
+ *
+ * This is quite tricky because we can access page at any time
+ * in its live cycle, so all accesses have to be extremly careful.
+ *
+ * This is not complete. More states could be added.
+ * For any missing state don't attempt recovery.
+ */
+
+#define dirty		(1UL << PG_dirty)
+#define sc		(1UL << PG_swapcache)
+#define unevict		(1UL << PG_unevictable)
+#define mlock		(1UL << PG_mlocked)
+#define writeback	(1UL << PG_writeback)
+#define lru		(1UL << PG_lru)
+#define swapbacked	(1UL << PG_swapbacked)
+#define head		(1UL << PG_head)
+#define tail		(1UL << PG_tail)
+#define compound	(1UL << PG_compound)
+#define slab		(1UL << PG_slab)
+#define buddy		(1UL << PG_buddy)
+#define reserved	(1UL << PG_reserved)
+
+static struct page_state {
+	unsigned long mask;
+	unsigned long res;
+	char *msg;
+	int (*action)(struct page *p, unsigned long pfn);
+} error_states[] = {
+	{ reserved,	reserved,	"reserved kernel",	me_ignore },
+	{ buddy,	buddy,		"free kernel",	me_free },
+
+	/*
+	 * Could in theory check if slab page is free or if we can drop
+	 * currently unused objects without touching them. But just
+	 * treat it as standard kernel for now.
+	 */
+	{ slab,		slab,		"kernel slab",	me_kernel },
+
+#ifdef CONFIG_PAGEFLAGS_EXTENDED
+	{ head,		head,		"huge",		me_huge_page },
+	{ tail,		tail,		"huge",		me_huge_page },
+#else
+	{ compound,	compound,	"huge",		me_huge_page },
+#endif
+
+	{ sc|dirty,	sc|dirty,	"swapcache",	me_swapcache_dirty },
+	{ sc|dirty,	sc,		"swapcache",	me_swapcache_clean },
+
+	{ unevict|dirty, unevict|dirty,	"unevictable LRU", me_pagecache_dirty},
+	{ unevict,	unevict,	"unevictable LRU", me_pagecache_clean},
+
+#ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
+	{ mlock|dirty,	mlock|dirty,	"mlocked LRU",	me_pagecache_dirty },
+	{ mlock,	mlock,		"mlocked LRU",	me_pagecache_clean },
+#endif
+
+	{ lru|dirty,	lru|dirty,	"LRU",		me_pagecache_dirty },
+	{ lru|dirty,	lru,		"clean LRU",	me_pagecache_clean },
+	{ swapbacked,	swapbacked,	"anonymous",	me_pagecache_clean },
+
+	/*
+	 * Catchall entry: must be at end.
+	 */
+	{ 0,		0,		"unknown page state",	me_unknown },
+};
+
+#undef lru
+
+static void action_result(unsigned long pfn, char *msg, int result)
+{
+	struct page *page = NULL;
+	if (pfn_valid(pfn))
+		page = pfn_to_page(pfn);
+
+	printk(KERN_ERR "MCE %#lx: %s%s page recovery: %s\n",
+		pfn,
+		page && PageDirty(page) ? "dirty " : "",
+		msg, action_name[result]);
+}
+
+static int page_action(struct page_state *ps, struct page *p,
+			unsigned long pfn, int ref)
+{
+	int result;
+
+	result = ps->action(p, pfn);
+	action_result(pfn, ps->msg, result);
+	if (page_count(p) != 1 + ref)
+		printk(KERN_ERR
+		       "MCE %#lx: %s page still referenced by %d users\n",
+		       pfn, ps->msg, page_count(p) - 1);
+
+	/* Could do more checks here if page looks ok */
+	/*
+	 * Could adjust zone counters here to correct for the missing page.
+	 */
+
+	return result == RECOVERED ? 0 : -EBUSY;
+}
+
+#define N_UNMAP_TRIES 5
+
+/*
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+				  int trapno)
+{
+	enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+	struct address_space *mapping;
+	LIST_HEAD(tokill);
+	int ret;
+	int i;
+	int kill = 1;
+
+	if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+		return;
+
+	if (!PageLRU(p))
+		lru_add_drain_all();
+
+	/*
+	 * This check implies we don't kill processes if their pages
+	 * are in the swap cache early. Those are always late kills.
+	 */
+	if (!page_mapped(p))
+		return;
+
+	if (PageSwapCache(p)) {
+		printk(KERN_ERR
+		       "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
+		ttu |= TTU_IGNORE_HWPOISON;
+	}
+
+	/*
+	 * Propagate the dirty bit from PTEs to struct page first, because we
+	 * need this to decide if we should kill or just drop the page.
+	 */
+	mapping = page_mapping(p);
+	if (!PageDirty(p) && mapping && mapping_cap_writeback_dirty(mapping)) {
+		if (page_mkclean(p)) {
+			SetPageDirty(p);
+		} else {
+			kill = 0;
+			ttu |= TTU_IGNORE_HWPOISON;
+			printk(KERN_INFO
+	"MCE %#lx: corrupted page was clean: dropped without side effects\n",
+				pfn);
+		}
+	}
+
+	/*
+	 * First collect all the processes that have the page
+	 * mapped in dirty form.  This has to be done before try_to_unmap,
+	 * because ttu takes the rmap data structures down.
+	 *
+	 * Error handling: We ignore errors here because
+	 * there's nothing that can be done.
+	 */
+	if (kill)
+		collect_procs(p, &tokill);
+
+	/*
+	 * try_to_unmap can fail temporarily due to races.
+	 * Try a few times (RED-PEN better strategy?)
+	 */
+	for (i = 0; i < N_UNMAP_TRIES; i++) {
+		ret = try_to_unmap(p, ttu);
+		if (ret == SWAP_SUCCESS)
+			break;
+		pr_debug("MCE %#lx: try_to_unmap retry needed %d\n", pfn,  ret);
+	}
+
+	if (ret != SWAP_SUCCESS)
+		printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
+				pfn, page_mapcount(p));
+
+	/*
+	 * Now that the dirty bit has been propagated to the
+	 * struct page and all unmaps done we can decide if
+	 * killing is needed or not.  Only kill when the page
+	 * was dirty, otherwise the tokill list is merely
+	 * freed.  When there was a problem unmapping earlier
+	 * use a more force-full uncatchable kill to prevent
+	 * any accesses to the poisoned memory.
+	 */
+	kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+		      ret != SWAP_SUCCESS, pfn);
+}
+
+int __memory_failure(unsigned long pfn, int trapno, int ref)
+{
+	struct page_state *ps;
+	struct page *p;
+	int res;
+
+	if (!sysctl_memory_failure_recovery)
+		panic("Memory failure from trap %d on page %lx", trapno, pfn);
+
+	if (!pfn_valid(pfn)) {
+		action_result(pfn, "memory outside kernel control", IGNORED);
+		return -EIO;
+	}
+
+	p = pfn_to_page(pfn);
+	if (TestSetPageHWPoison(p)) {
+		action_result(pfn, "already hardware poisoned", IGNORED);
+		return 0;
+	}
+
+	atomic_long_add(1, &mce_bad_pages);
+
+	/*
+	 * We need/can do nothing about count=0 pages.
+	 * 1) it's a free page, and therefore in safe hand:
+	 *    prep_new_page() will be the gate keeper.
+	 * 2) it's part of a non-compound high order page.
+	 *    Implies some kernel user: cannot stop them from
+	 *    R/W the page; let's pray that the page has been
+	 *    used and will be freed some time later.
+	 * In fact it's dangerous to directly bump up page count from 0,
+	 * that may make page_freeze_refs()/page_unfreeze_refs() mismatch.
+	 */
+	if (!get_page_unless_zero(compound_head(p))) {
+		action_result(pfn, "free or high order kernel", IGNORED);
+		return PageBuddy(compound_head(p)) ? 0 : -EBUSY;
+	}
+
+	/*
+	 * Lock the page and wait for writeback to finish.
+	 * It's very difficult to mess with pages currently under IO
+	 * and in many cases impossible, so we just avoid it here.
+	 */
+	lock_page_nosync(p);
+	wait_on_page_writeback(p);
+
+	/*
+	 * Now take care of user space mappings.
+	 */
+	hwpoison_user_mappings(p, pfn, trapno);
+
+	/*
+	 * Torn down by someone else?
+	 */
+	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+		action_result(pfn, "already truncated LRU", IGNORED);
+		res = 0;
+		goto out;
+	}
+
+	res = -EBUSY;
+	for (ps = error_states;; ps++) {
+		if ((p->flags & ps->mask) == ps->res) {
+			res = page_action(ps, p, pfn, ref);
+			break;
+		}
+	}
+out:
+	unlock_page(p);
+	return res;
+}
+EXPORT_SYMBOL_GPL(__memory_failure);
+
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+void memory_failure(unsigned long pfn, int trapno)
+{
+	__memory_failure(pfn, trapno, 0);
+}
diff --git a/mm/rmap.c b/mm/rmap.c
index 7e72ca19d68b..09c3d0b96116 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -36,6 +36,11 @@
  *                 mapping->tree_lock (widely used, in set_page_dirty,
  *                           in arch-dependent flush_dcache_mmap_lock,
  *                           within inode_lock in __sync_single_inode)
+ *
+ * (code doesn't rely on that order so it could be switched around)
+ * ->tasklist_lock
+ *   anon_vma->lock      (memory_failure, collect_procs_anon)
+ *     pte map lock
  */
 
 #include <linux/mm.h>
@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
  * if the page is not mapped into the page tables of this VMA.  Only
  * valid for normal file or anonymous VMAs.
  */
-static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
+int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
 {
 	unsigned long address;
 	pte_t *pte;
-- 
cgit v1.2.3


From 9893e49d64a4874ea67849ee2cfbf3f3d6817573 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Wed, 16 Sep 2009 11:50:17 +0200
Subject: HWPOISON: Add madvise() based injector for hardware poisoned pages v4

Impact: optional, useful for debugging

Add a new madvice sub command to inject poison for some
pages in a process' address space.  This is useful for
testing the poison page handling.

This patch can allow root to tie up large amounts of memory.
I got feedback from container developers and they didn't see any
problem.

v2: Use write flag for get_user_pages to make sure to always get
a fresh page
v3: Don't request write mapping (Fengguang Wu)
v4: Move MADV_* number to avoid conflict with KSM (Hugh Dickins)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/asm-generic/mman-common.h |  1 +
 mm/madvise.c                      | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad34189a..c325d1ef42ab 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -34,6 +34,7 @@
 #define MADV_REMOVE	9		/* remove these pages & resources */
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
+#define MADV_HWPOISON	100		/* poison a page for testing */
 
 /* compatibility flags */
 #define MAP_FILE	0
diff --git a/mm/madvise.c b/mm/madvise.c
index 76eb4193acdd..8dbd38b8e4a4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -207,6 +207,32 @@ static long madvise_remove(struct vm_area_struct *vma,
 	return error;
 }
 
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Error injection support for memory error handling.
+ */
+static int madvise_hwpoison(unsigned long start, unsigned long end)
+{
+	int ret = 0;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	for (; start < end; start += PAGE_SIZE) {
+		struct page *p;
+		int ret = get_user_pages(current, current->mm, start, 1,
+						0, 0, &p, NULL);
+		if (ret != 1)
+			return ret;
+		printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+		       page_to_pfn(p), start);
+		/* Ignore return value for now */
+		__memory_failure(page_to_pfn(p), 0, 1);
+		put_page(p);
+	}
+	return ret;
+}
+#endif
+
 static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
@@ -307,6 +333,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 	int write;
 	size_t len;
 
+#ifdef CONFIG_MEMORY_FAILURE
+	if (behavior == MADV_HWPOISON)
+		return madvise_hwpoison(start, start+len_in);
+#endif
 	if (!madvise_behavior_valid(behavior))
 		return error;
 
-- 
cgit v1.2.3


From 2c96ce9f2084c1e04d02883e622f74a537a63aea Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 15 Sep 2009 09:43:56 +0200
Subject: fs: remove bdev->bd_inode_backing_dev_info

It has been unused since it was introduced in:

commit 520808bf20e90fdbdb320264ba7dd5cf9d47dcac
Author: Andrew Morton <akpm@osdl.org>
Date:   Fri May 21 00:46:17 2004 -0700

    [PATCH] block device layer: separate backing_dev_info infrastructure

So lets just kill it.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/block_dev.c        | 1 -
 fs/inode.c            | 4 +---
 fs/nilfs2/the_nilfs.c | 4 +---
 include/linux/fs.h    | 1 -
 4 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3581a4e53942..71e7e03ac343 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -420,7 +420,6 @@ static void bdev_destroy_inode(struct inode *inode)
 {
 	struct bdev_inode *bdi = BDEV_I(inode);
 
-	bdi->bdev.bd_inode_backing_dev_info = NULL;
 	kmem_cache_free(bdev_cachep, bdi);
 }
 
diff --git a/fs/inode.c b/fs/inode.c
index ae7b67e48661..b2ba83d2c4e1 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -182,9 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	if (sb->s_bdev) {
 		struct backing_dev_info *bdi;
 
-		bdi = sb->s_bdev->bd_inode_backing_dev_info;
-		if (!bdi)
-			bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
+		bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info;
 		mapping->backing_dev_info = bdi;
 	}
 	inode->i_private = NULL;
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d4168e269c5d..ad391a8c3e7e 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -591,9 +591,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data)
 
 	nilfs->ns_mount_state = le16_to_cpu(sbp->s_state);
 
-	bdi = nilfs->ns_bdev->bd_inode_backing_dev_info;
-	if (!bdi)
-		bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
+	bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info;
 	nilfs->ns_bdi = bdi ? : &default_backing_dev_info;
 
 	/* Finding last segment */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index b21cf6b9c80b..db29588874ac 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -655,7 +655,6 @@ struct block_device {
 	int			bd_invalidated;
 	struct gendisk *	bd_disk;
 	struct list_head	bd_list;
-	struct backing_dev_info *bd_inode_backing_dev_info;
 	/*
 	 * Private data.  You must have bd_claim'ed the block_device
 	 * to use this.  NOTE:  bd_claim allows an owner to claim
-- 
cgit v1.2.3


From 1fe06ad89255c211fe100d7f690d10b161398df8 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 15 Sep 2009 15:10:20 +0200
Subject: writeback: get rid of wbc->for_writepages

It's only set, it's never checked. Kill it.

Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/afs/write.c              | 1 -
 fs/btrfs/ordered-data.c     | 1 -
 fs/jbd2/commit.c            | 1 -
 fs/nfs/write.c              | 1 -
 include/linux/writeback.h   | 1 -
 include/trace/events/ext4.h | 6 ++----
 mm/page-writeback.c         | 2 --
 7 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/afs/write.c b/fs/afs/write.c
index c2e7a7ff0080..c63a3c8beb73 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -712,7 +712,6 @@ int afs_writeback_all(struct afs_vnode *vnode)
 		.bdi		= mapping->backing_dev_info,
 		.sync_mode	= WB_SYNC_ALL,
 		.nr_to_write	= LONG_MAX,
-		.for_writepages = 1,
 		.range_cyclic	= 1,
 	};
 	int ret;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d6f0806c682f..7b2f401e604e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -740,7 +740,6 @@ int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
 		.nr_to_write = mapping->nrpages * 2,
 		.range_start = start,
 		.range_end = end,
-		.for_writepages = 1,
 	};
 	return btrfs_writepages(mapping, &wbc);
 }
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7b4088b2364d..0df600e9162d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -220,7 +220,6 @@ static int journal_submit_inode_data_buffers(struct address_space *mapping)
 		.nr_to_write = mapping->nrpages * 2,
 		.range_start = 0,
 		.range_end = i_size_read(mapping->host),
-		.for_writepages = 1,
 	};
 
 	ret = generic_writepages(mapping, &wbc);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 120acadc6a84..53eb26c16b50 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1490,7 +1490,6 @@ static int nfs_write_mapping(struct address_space *mapping, int how)
 		.nr_to_write = LONG_MAX,
 		.range_start = 0,
 		.range_end = LLONG_MAX,
-		.for_writepages = 1,
 	};
 
 	return __nfs_write_mapping(mapping, &wbc, how);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index d347632f1861..48a054e2b716 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -50,7 +50,6 @@ struct writeback_control {
 	unsigned encountered_congestion:1; /* An output: a queue is full */
 	unsigned for_kupdate:1;		/* A kupdate writeback */
 	unsigned for_reclaim:1;		/* Invoked from the page allocator */
-	unsigned for_writepages:1;	/* This is a writepages() call */
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned more_io:1;		/* more io to be dispatched */
 	/*
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 7d8b5bc74185..8d433c4e3709 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -227,7 +227,6 @@ TRACE_EVENT(ext4_da_writepages,
 		__field(	char,	nonblocking		)
 		__field(	char,	for_kupdate		)
 		__field(	char,	for_reclaim		)
-		__field(	char,	for_writepages		)
 		__field(	char,	range_cyclic		)
 	),
 
@@ -241,16 +240,15 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->nonblocking	= wbc->nonblocking;
 		__entry->for_kupdate	= wbc->for_kupdate;
 		__entry->for_reclaim	= wbc->for_reclaim;
-		__entry->for_writepages	= wbc->for_writepages;
 		__entry->range_cyclic	= wbc->range_cyclic;
 	),
 
-	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
 		  jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
-		  __entry->for_writepages, __entry->range_cyclic)
+		  __entry->range_cyclic)
 );
 
 TRACE_EVENT(ext4_da_writepages_result,
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dd73d29c15a8..abc648f5de00 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1020,12 +1020,10 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 
 	if (wbc->nr_to_write <= 0)
 		return 0;
-	wbc->for_writepages = 1;
 	if (mapping->a_ops->writepages)
 		ret = mapping->a_ops->writepages(mapping, wbc);
 	else
 		ret = generic_writepages(mapping, wbc);
-	wbc->for_writepages = 0;
 	return ret;
 }
 
-- 
cgit v1.2.3


From 32a88aa1b6dfb901cec64e1898cac78d0f25028a Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 16 Sep 2009 15:02:33 +0200
Subject: fs: Assign bdi in super_block

We do this automatically in get_sb_bdev() from the set_bdev_super()
callback. Filesystems that have their own private backing_dev_info
must assign that in ->fill_super().

Note that ->s_bdi assignment is required for proper writeback!

Acked-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/btrfs/disk-io.c | 1 +
 fs/fuse/inode.c    | 2 ++
 fs/nfs/super.c     | 2 ++
 fs/super.c         | 6 ++++++
 fs/sync.c          | 9 ++++++++-
 fs/ubifs/super.c   | 1 +
 include/linux/fs.h | 1 +
 7 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 15831d5c7367..8b8192790011 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1600,6 +1600,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	sb->s_blocksize = 4096;
 	sb->s_blocksize_bits = blksize_bits(4096);
+	sb->s_bdi = &fs_info->bdi;
 
 	/*
 	 * we set the i_size on the btree inode to the max possible int.
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4567db6f9430..e5dbecd87b0f 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -894,6 +894,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto err_put_conn;
 
+	sb->s_bdi = &fc->bdi;
+
 	/* Handle umasking inside the fuse code */
 	if (sb->s_flags & MS_POSIXACL)
 		fc->dont_mask = 1;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 867f70504531..de935692d40d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1918,6 +1918,8 @@ static inline void nfs_initialise_sb(struct super_block *sb)
 	if (server->flags & NFS_MOUNT_NOAC)
 		sb->s_flags |= MS_SYNCHRONOUS;
 
+	sb->s_bdi = &server->backing_dev_info;
+
 	nfs_super_set_maxbytes(sb, server->maxfilesize);
 }
 
diff --git a/fs/super.c b/fs/super.c
index 9cda337ddae2..b03fea8fbfb6 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -707,6 +707,12 @@ static int set_bdev_super(struct super_block *s, void *data)
 {
 	s->s_bdev = data;
 	s->s_dev = s->s_bdev->bd_dev;
+
+	/*
+	 * We set the bdi here to the queue backing, file systems can
+	 * overwrite this in ->fill_super()
+	 */
+	s->s_bdi = &bdev_get_queue(s->s_bdev)->backing_dev_info;
 	return 0;
 }
 
diff --git a/fs/sync.c b/fs/sync.c
index 192340930bb4..c08467a5d7cb 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -27,6 +27,13 @@
  */
 static int __sync_filesystem(struct super_block *sb, int wait)
 {
+	/*
+	 * This should be safe, as we require bdi backing to actually
+	 * write out data in the first place
+	 */
+	if (!sb->s_bdi)
+		return 0;
+
 	/* Avoid doing twice syncing and cache pruning for quota sync */
 	if (!wait) {
 		writeout_quota_sb(sb, -1);
@@ -101,7 +108,7 @@ restart:
 		spin_unlock(&sb_lock);
 
 		down_read(&sb->s_umount);
-		if (!(sb->s_flags & MS_RDONLY) && sb->s_root)
+		if (!(sb->s_flags & MS_RDONLY) && sb->s_root && sb->s_bdi)
 			__sync_filesystem(sb, wait);
 		up_read(&sb->s_umount);
 
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 51763aa8f4de..c4af069df1ad 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1980,6 +1980,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto out_bdi;
 
+	sb->s_bdi = &c->bdi;
 	sb->s_fs_info = c;
 	sb->s_magic = UBIFS_SUPER_MAGIC;
 	sb->s_blocksize = UBIFS_BLOCK_SIZE;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index db29588874ac..90162fb3bf04 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1342,6 +1342,7 @@ struct super_block {
 	int			s_nr_dentry_unused;	/* # of dentry on lru */
 
 	struct block_device	*s_bdev;
+	struct backing_dev_info *s_bdi;
 	struct mtd_info		*s_mtd;
 	struct list_head	s_instances;
 	struct quota_info	s_dquot;	/* Diskquota specific options */
-- 
cgit v1.2.3


From cfc4ba5365449cb6b5c9f68d755a142f17da1e47 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 14 Sep 2009 13:12:40 +0200
Subject: writeback: use RCU to protect bdi_list

Now that bdi_writeback_all() no longer handles integrity writeback,
it doesn't have to block anymore. This means that we can switch
bdi_list reader side protection to RCU.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           |  6 ++--
 include/linux/backing-dev.h |  1 +
 mm/backing-dev.c            | 76 ++++++++++++++++++++++++++++++++-------------
 mm/page-writeback.c         |  8 ++---
 4 files changed, 63 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 14f06b459197..f8cd7a97f5b7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -868,16 +868,16 @@ static void bdi_writeback_all(struct writeback_control *wbc)
 
 	WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
 
-	spin_lock(&bdi_lock);
+	rcu_read_lock();
 
-	list_for_each_entry(bdi, &bdi_list, bdi_list) {
+	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
 
 		bdi_alloc_queue_work(bdi, wbc);
 	}
 
-	spin_unlock(&bdi_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f169bcb90b58..859e797f4576 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -59,6 +59,7 @@ struct bdi_writeback {
 
 struct backing_dev_info {
 	struct list_head bdi_list;
+	struct rcu_head rcu_head;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index d3ca0dac1111..fd93566345b6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -26,6 +26,12 @@ struct backing_dev_info default_backing_dev_info = {
 EXPORT_SYMBOL_GPL(default_backing_dev_info);
 
 static struct class *bdi_class;
+
+/*
+ * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as
+ * reader side protection for bdi_pending_list. bdi_list has RCU reader side
+ * locking.
+ */
 DEFINE_SPINLOCK(bdi_lock);
 LIST_HEAD(bdi_list);
 LIST_HEAD(bdi_pending_list);
@@ -284,9 +290,9 @@ static int bdi_start_fn(void *ptr)
 	/*
 	 * Add us to the active bdi_list
 	 */
-	spin_lock(&bdi_lock);
-	list_add(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi_task_init(bdi, wb);
 
@@ -389,7 +395,7 @@ static int bdi_forker_task(void *ptr)
 		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
 			wb_do_writeback(me, 0);
 
-		spin_lock(&bdi_lock);
+		spin_lock_bh(&bdi_lock);
 
 		/*
 		 * Check if any existing bdi's have dirty data without
@@ -410,7 +416,7 @@ static int bdi_forker_task(void *ptr)
 		if (list_empty(&bdi_pending_list)) {
 			unsigned long wait;
 
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
 			schedule_timeout(wait);
 			try_to_freeze();
@@ -426,7 +432,7 @@ static int bdi_forker_task(void *ptr)
 		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
 				 bdi_list);
 		list_del_init(&bdi->bdi_list);
-		spin_unlock(&bdi_lock);
+		spin_unlock_bh(&bdi_lock);
 
 		wb = &bdi->wb;
 		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
@@ -445,9 +451,9 @@ static int bdi_forker_task(void *ptr)
 			 * a chance to flush other bdi's to free
 			 * memory.
 			 */
-			spin_lock(&bdi_lock);
+			spin_lock_bh(&bdi_lock);
 			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-			spin_unlock(&bdi_lock);
+			spin_unlock_bh(&bdi_lock);
 
 			bdi_flush_io(bdi);
 		}
@@ -456,6 +462,24 @@ static int bdi_forker_task(void *ptr)
 	return 0;
 }
 
+static void bdi_add_to_pending(struct rcu_head *head)
+{
+	struct backing_dev_info *bdi;
+
+	bdi = container_of(head, struct backing_dev_info, rcu_head);
+	INIT_LIST_HEAD(&bdi->bdi_list);
+
+	spin_lock(&bdi_lock);
+	list_add_tail(&bdi->bdi_list, &bdi_pending_list);
+	spin_unlock(&bdi_lock);
+
+	/*
+	 * We are now on the pending list, wake up bdi_forker_task()
+	 * to finish the job and add us back to the active bdi_list
+	 */
+	wake_up_process(default_backing_dev_info.wb.task);
+}
+
 /*
  * Add the default flusher task that gets created for any bdi
  * that has dirty data pending writeout
@@ -478,16 +502,29 @@ void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
 	 * waiting for previous additions to finish.
 	 */
 	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-		list_move_tail(&bdi->bdi_list, &bdi_pending_list);
+		list_del_rcu(&bdi->bdi_list);
 
 		/*
-		 * We are now on the pending list, wake up bdi_forker_task()
-		 * to finish the job and add us back to the active bdi_list
+		 * We must wait for the current RCU period to end before
+		 * moving to the pending list. So schedule that operation
+		 * from an RCU callback.
 		 */
-		wake_up_process(default_backing_dev_info.wb.task);
+		call_rcu(&bdi->rcu_head, bdi_add_to_pending);
 	}
 }
 
+/*
+ * Remove bdi from bdi_list, and ensure that it is no longer visible
+ */
+static void bdi_remove_from_list(struct backing_dev_info *bdi)
+{
+	spin_lock_bh(&bdi_lock);
+	list_del_rcu(&bdi->bdi_list);
+	spin_unlock_bh(&bdi_lock);
+
+	synchronize_rcu();
+}
+
 int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...)
 {
@@ -506,9 +543,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		goto exit;
 	}
 
-	spin_lock(&bdi_lock);
-	list_add_tail(&bdi->bdi_list, &bdi_list);
-	spin_unlock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
+	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
 
 	bdi->dev = dev;
 
@@ -526,9 +563,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 			wb->task = NULL;
 			ret = -ENOMEM;
 
-			spin_lock(&bdi_lock);
-			list_del(&bdi->bdi_list);
-			spin_unlock(&bdi_lock);
+			bdi_remove_from_list(bdi);
 			goto exit;
 		}
 	}
@@ -565,9 +600,7 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 	/*
 	 * Make sure nobody finds us on the bdi_list anymore
 	 */
-	spin_lock(&bdi_lock);
-	list_del(&bdi->bdi_list);
-	spin_unlock(&bdi_lock);
+	bdi_remove_from_list(bdi);
 
 	/*
 	 * Finally, kill the kernel threads. We don't need to be RCU
@@ -599,6 +632,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->max_ratio = 100;
 	bdi->max_prop_frac = PROP_FRAC_BASE;
 	spin_lock_init(&bdi->wb_lock);
+	INIT_RCU_HEAD(&bdi->rcu_head);
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->wb_list);
 	INIT_LIST_HEAD(&bdi->work_list);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index abc648f5de00..12c3d843ce93 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -315,7 +315,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	int ret = 0;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
 	} else {
@@ -327,7 +327,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			ret = -EINVAL;
 		}
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
@@ -339,14 +339,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 	if (max_ratio > 100)
 		return -EINVAL;
 
-	spin_lock(&bdi_lock);
+	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
 	} else {
 		bdi->max_ratio = max_ratio;
 		bdi->max_prop_frac = (PROP_FRAC_BASE * max_ratio) / 100;
 	}
-	spin_unlock(&bdi_lock);
+	spin_unlock_bh(&bdi_lock);
 
 	return ret;
 }
-- 
cgit v1.2.3


From b6e51316daede0633e9274e1e30391cfa4747877 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Wed, 16 Sep 2009 15:13:54 +0200
Subject: writeback: separate starting of sync vs opportunistic writeback

bdi_start_writeback() is currently split into two paths, one for
WB_SYNC_NONE and one for WB_SYNC_ALL. Add bdi_sync_writeback()
for WB_SYNC_ALL writeback and let bdi_start_writeback() handle
only WB_SYNC_NONE.

Push down the writeback_control allocation and only accept the
parameters that make sense for each function. This cleans up
the API considerably.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 132 ++++++++++++++++++++++----------------------
 fs/ubifs/budget.c           |  20 +------
 include/linux/backing-dev.h |   2 +-
 include/linux/writeback.h   |   4 +-
 mm/page-writeback.c         |  12 +---
 5 files changed, 75 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 59b3ee63b624..5887328b5a06 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -74,14 +74,10 @@ static inline bool bdi_work_on_stack(struct bdi_work *work)
 }
 
 static inline void bdi_work_init(struct bdi_work *work,
-				 struct writeback_control *wbc)
+				 struct wb_writeback_args *args)
 {
 	INIT_RCU_HEAD(&work->rcu_head);
-	work->args.sb = wbc->sb;
-	work->args.nr_pages = wbc->nr_to_write;
-	work->args.sync_mode = wbc->sync_mode;
-	work->args.range_cyclic = wbc->range_cyclic;
-	work->args.for_kupdate = 0;
+	work->args = *args;
 	work->state = WS_USED;
 }
 
@@ -194,7 +190,7 @@ static void bdi_wait_on_work_clear(struct bdi_work *work)
 }
 
 static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
-				 struct writeback_control *wbc)
+				 struct wb_writeback_args *args)
 {
 	struct bdi_work *work;
 
@@ -204,7 +200,7 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	 */
 	work = kmalloc(sizeof(*work), GFP_ATOMIC);
 	if (work) {
-		bdi_work_init(work, wbc);
+		bdi_work_init(work, args);
 		bdi_queue_work(bdi, work);
 	} else {
 		struct bdi_writeback *wb = &bdi->wb;
@@ -214,24 +210,54 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 	}
 }
 
-void bdi_start_writeback(struct writeback_control *wbc)
+/**
+ * bdi_sync_writeback - start and wait for writeback
+ * @bdi: the backing device to write from
+ * @sb: write inodes from this super_block
+ *
+ * Description:
+ *   This does WB_SYNC_ALL data integrity writeback and waits for the
+ *   IO to complete. Callers must hold the sb s_umount semaphore for
+ *   reading, to avoid having the super disappear before we are done.
+ */
+static void bdi_sync_writeback(struct backing_dev_info *bdi,
+			       struct super_block *sb)
 {
-	/*
-	 * WB_SYNC_NONE is opportunistic writeback. If this allocation fails,
-	 * bdi_queue_work() will wake up the thread and flush old data. This
-	 * should ensure some amount of progress in freeing memory.
-	 */
-	if (wbc->sync_mode != WB_SYNC_ALL)
-		bdi_alloc_queue_work(wbc->bdi, wbc);
-	else {
-		struct bdi_work work;
+	struct wb_writeback_args args = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_ALL,
+		.nr_pages	= LONG_MAX,
+		.range_cyclic	= 0,
+	};
+	struct bdi_work work;
 
-		bdi_work_init(&work, wbc);
-		work.state |= WS_ONSTACK;
+	bdi_work_init(&work, &args);
+	work.state |= WS_ONSTACK;
 
-		bdi_queue_work(wbc->bdi, &work);
-		bdi_wait_on_work_clear(&work);
-	}
+	bdi_queue_work(bdi, &work);
+	bdi_wait_on_work_clear(&work);
+}
+
+/**
+ * bdi_start_writeback - start writeback
+ * @bdi: the backing device to write from
+ * @nr_pages: the number of pages to write
+ *
+ * Description:
+ *   This does WB_SYNC_NONE opportunistic writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ *
+ */
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+{
+	struct wb_writeback_args args = {
+		.sync_mode	= WB_SYNC_NONE,
+		.nr_pages	= nr_pages,
+		.range_cyclic	= 1,
+	};
+
+	bdi_alloc_queue_work(bdi, &args);
 }
 
 /*
@@ -863,23 +889,25 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 
 /*
- * Schedule writeback for all backing devices. Can only be used for
- * WB_SYNC_NONE writeback, WB_SYNC_ALL should use bdi_start_writeback()
- * and pass in the superblock.
+ * Schedule writeback for all backing devices. This does WB_SYNC_NONE
+ * writeback, for integrity writeback see bdi_sync_writeback().
  */
-static void bdi_writeback_all(struct writeback_control *wbc)
+static void bdi_writeback_all(struct super_block *sb, long nr_pages)
 {
+	struct wb_writeback_args args = {
+		.sb		= sb,
+		.nr_pages	= nr_pages,
+		.sync_mode	= WB_SYNC_NONE,
+	};
 	struct backing_dev_info *bdi;
 
-	WARN_ON(wbc->sync_mode == WB_SYNC_ALL);
-
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
 
-		bdi_alloc_queue_work(bdi, wbc);
+		bdi_alloc_queue_work(bdi, &args);
 	}
 
 	rcu_read_unlock();
@@ -891,17 +919,10 @@ static void bdi_writeback_all(struct writeback_control *wbc)
  */
 void wakeup_flusher_threads(long nr_pages)
 {
-	struct writeback_control wbc = {
-		.sync_mode	= WB_SYNC_NONE,
-		.older_than_this = NULL,
-		.range_cyclic	= 1,
-	};
-
 	if (nr_pages == 0)
 		nr_pages = global_page_state(NR_FILE_DIRTY) +
 				global_page_state(NR_UNSTABLE_NFS);
-	wbc.nr_to_write = nr_pages;
-	bdi_writeback_all(&wbc);
+	bdi_writeback_all(NULL, nr_pages);
 }
 
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
@@ -1048,7 +1069,7 @@ EXPORT_SYMBOL(__mark_inode_dirty);
  * on the writer throttling path, and we get decent balancing between many
  * throttled threads: we don't want them all piling up on inode_sync_wait.
  */
-static void wait_sb_inodes(struct writeback_control *wbc)
+static void wait_sb_inodes(struct super_block *sb)
 {
 	struct inode *inode, *old_inode = NULL;
 
@@ -1056,7 +1077,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
 	 * We need to be protected against the filesystem going from
 	 * r/o to r/w or vice versa.
 	 */
-	WARN_ON(!rwsem_is_locked(&wbc->sb->s_umount));
+	WARN_ON(!rwsem_is_locked(&sb->s_umount));
 
 	spin_lock(&inode_lock);
 
@@ -1067,7 +1088,7 @@ static void wait_sb_inodes(struct writeback_control *wbc)
 	 * In which case, the inode may not be on the dirty list, but
 	 * we still have to wait for that writeout.
 	 */
-	list_for_each_entry(inode, &wbc->sb->s_inodes, i_sb_list) {
+	list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
 		struct address_space *mapping;
 
 		if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE|I_NEW))
@@ -1107,14 +1128,8 @@ static void wait_sb_inodes(struct writeback_control *wbc)
  * for IO completion of submitted IO. The number of pages submitted is
  * returned.
  */
-long writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sb		= sb,
-		.sync_mode	= WB_SYNC_NONE,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-	};
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 	long nr_to_write;
@@ -1122,9 +1137,7 @@ long writeback_inodes_sb(struct super_block *sb)
 	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	wbc.nr_to_write = nr_to_write;
-	bdi_writeback_all(&wbc);
-	return nr_to_write - wbc.nr_to_write;
+	bdi_writeback_all(sb, nr_to_write);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
@@ -1135,21 +1148,10 @@ EXPORT_SYMBOL(writeback_inodes_sb);
  * This function writes and waits on any dirty inode belonging to this
  * super_block. The number of pages synced is returned.
  */
-long sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb)
 {
-	struct writeback_control wbc = {
-		.sb		= sb,
-		.bdi		= sb->s_bdi,
-		.sync_mode	= WB_SYNC_ALL,
-		.range_start	= 0,
-		.range_end	= LLONG_MAX,
-	};
-	long nr_to_write = LONG_MAX; /* doesn't actually matter */
-
-	wbc.nr_to_write = nr_to_write;
-	bdi_start_writeback(&wbc);
-	wait_sb_inodes(&wbc);
-	return nr_to_write - wbc.nr_to_write;
+	bdi_sync_writeback(sb->s_bdi, sb);
+	wait_sb_inodes(sb);
 }
 EXPORT_SYMBOL(sync_inodes_sb);
 
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 1c8991b0db13..ee1ce68fd98b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -54,29 +54,15 @@
  * @nr_to_write: how many dirty pages to write-back
  *
  * This function shrinks UBIFS liability by means of writing back some amount
- * of dirty inodes and their pages. Returns the amount of pages which were
- * written back. The returned value does not include dirty inodes which were
- * synchronized.
+ * of dirty inodes and their pages.
  *
  * Note, this function synchronizes even VFS inodes which are locked
  * (@i_mutex) by the caller of the budgeting function, because write-back does
  * not touch @i_mutex.
  */
-static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+static void shrink_liability(struct ubifs_info *c, int nr_to_write)
 {
-	int nr_written;
-
-	nr_written = writeback_inodes_sb(c->vfs_sb);
-	if (!nr_written) {
-		/*
-		 * Re-try again but wait on pages/inodes which are being
-		 * written-back concurrently (e.g., by pdflush).
-		 */
-		nr_written = sync_inodes_sb(c->vfs_sb);
-	}
-
-	dbg_budg("%d pages were written back", nr_written);
-	return nr_written;
+	writeback_inodes_sb(c->vfs_sb);
 }
 
 /**
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 859e797f4576..0ee33c2e6129 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -101,7 +101,7 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
-void bdi_start_writeback(struct writeback_control *wbc);
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 48a054e2b716..75cf58666ff9 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -68,8 +68,8 @@ struct writeback_control {
  */	
 struct bdi_writeback;
 int inode_wait(void *);
-long writeback_inodes_sb(struct super_block *);
-long sync_inodes_sb(struct super_block *);
+void writeback_inodes_sb(struct super_block *);
+void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
 void wakeup_flusher_threads(long nr_pages);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 12c3d843ce93..1eea4fa0d410 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -582,16 +582,8 @@ static void balance_dirty_pages(struct address_space *mapping)
 	if ((laptop_mode && pages_written) ||
 	    (!laptop_mode && ((nr_writeback = global_page_state(NR_FILE_DIRTY)
 					  + global_page_state(NR_UNSTABLE_NFS))
-					  > background_thresh))) {
-		struct writeback_control wbc = {
-			.bdi		= bdi,
-			.sync_mode	= WB_SYNC_NONE,
-			.nr_to_write	= nr_writeback,
-		};
-
-
-		bdi_start_writeback(&wbc);
-	}
+					  > background_thresh)))
+		bdi_start_writeback(bdi, nr_writeback);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
-- 
cgit v1.2.3


From 182a85f8a119c789610a9d464f4129ded9f3c107 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Sep 2009 13:24:49 +0200
Subject: sched: Disable wakeup balancing

Sysbench thinks SD_BALANCE_WAKE is too agressive and kbuild doesn't
really mind too much, SD_BALANCE_NEWIDLE picks up most of the
slack.

On a dual socket, quad core, dual thread nehalem system:

sysbench (--num_threads=16):

 SD_BALANCE_WAKE-: 13982 tx/s
 SD_BALANCE_WAKE+: 15688 tx/s

kbuild (-j16):

 SD_BALANCE_WAKE-: 47.648295846  seconds time elapsed   ( +-   0.312% )
 SD_BALANCE_WAKE+: 47.608607360  seconds time elapsed   ( +-   0.026% )

(same within noise)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/ia64/include/asm/topology.h           | 2 --
 arch/mips/include/asm/mach-ip27/topology.h | 1 -
 arch/powerpc/include/asm/topology.h        | 1 -
 arch/sh/include/asm/topology.h             | 1 -
 arch/sparc/include/asm/topology_64.h       | 1 -
 arch/x86/include/asm/topology.h            | 2 +-
 include/linux/topology.h                   | 6 +++---
 7 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 569b9dafc78c..d0141fbf51d0 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -68,7 +68,6 @@ void build_cpu_to_node_map(void);
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_BALANCE_WAKE	\
 				| SD_WAKE_AFFINE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
@@ -94,7 +93,6 @@ void build_cpu_to_node_map(void);
 				| SD_BALANCE_NEWIDLE	\
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 64,			\
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index d8332398f5be..230591707005 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
 	.cache_nice_tries	= 1,			\
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE,	\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 1a2c9eb42a03..394edcbcce71 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -63,7 +63,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
 				| SD_BALANCE_EXEC	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_NEWIDLE	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index a8cc564b703d..f8c40cc65054 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -21,7 +21,6 @@
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE	\
 				| SD_BALANCE_NEWIDLE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 10b979d1de20..26cd25c08399 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 	.flags			= SD_LOAD_BALANCE	\
 				| SD_BALANCE_FORK	\
 				| SD_BALANCE_EXEC	\
-				| SD_BALANCE_WAKE	\
 				| SD_SERIALIZE,		\
 	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 589f12383d78..6f0695d744bf 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -141,7 +141,7 @@ extern unsigned long node_remap_size[];
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
diff --git a/include/linux/topology.h b/include/linux/topology.h
index a6614b0242a9..809b26c07090 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,7 +95,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
@@ -127,7 +127,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
@@ -160,7 +160,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 1*SD_BALANCE_WAKE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_PREFER_LOCAL			\
 				| 0*SD_SHARE_CPUPOWER			\
-- 
cgit v1.2.3


From 9c28cbccec66a5ca292c6659bf5a0fe0c8459fa7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 3 Aug 2009 19:21:00 +0200
Subject: jbd: Journal block numbers can ever be only 32-bit use unsigned int
 for them

It does not make sense to store block number for journal as unsigned long
since they can be only 32-bit (because of on-disk format limitation). So
change in-memory structures and variables to use unsigned int instead.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/checkpoint.c |  6 +++---
 fs/jbd/commit.c     |  2 +-
 fs/jbd/journal.c    | 30 +++++++++++++++---------------
 fs/jbd/recovery.c   | 18 +++++++++---------
 fs/jbd/revoke.c     | 16 ++++++++--------
 include/linux/jbd.h | 26 +++++++++++++-------------
 6 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 61f32f3868cd..b0435dd0654d 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -456,7 +456,7 @@ int cleanup_journal_tail(journal_t *journal)
 {
 	transaction_t * transaction;
 	tid_t		first_tid;
-	unsigned long	blocknr, freed;
+	unsigned int	blocknr, freed;
 
 	if (is_journal_aborted(journal))
 		return 1;
@@ -502,8 +502,8 @@ int cleanup_journal_tail(journal_t *journal)
 		freed = freed + journal->j_last - journal->j_first;
 
 	jbd_debug(1,
-		  "Cleaning journal tail from %d to %d (offset %lu), "
-		  "freeing %lu\n",
+		  "Cleaning journal tail from %d to %d (offset %u), "
+		  "freeing %u\n",
 		  journal->j_tail_sequence, first_tid, blocknr, freed);
 
 	journal->j_free += freed;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 618e21c0b7a3..4bd882548c45 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -308,7 +308,7 @@ void journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	ktime_t start_time;
 	u64 commit_time;
 	char *tagp = NULL;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f96f85092d1c..bd3c073b485d 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -276,7 +276,7 @@ static void journal_kill_thread(journal_t *journal)
 int journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  unsigned long blocknr)
+				  unsigned int blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -567,9 +567,9 @@ int log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int journal_next_log_block(journal_t *journal, unsigned long *retp)
+int journal_next_log_block(journal_t *journal, unsigned int *retp)
 {
-	unsigned long blocknr;
+	unsigned int blocknr;
 
 	spin_lock(&journal->j_state_lock);
 	J_ASSERT(journal->j_free > 1);
@@ -590,11 +590,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
  * this is a no-op.  If needed, we can use j_blk_offset - everything is
  * ready.
  */
-int journal_bmap(journal_t *journal, unsigned long blocknr,
-		 unsigned long *retp)
+int journal_bmap(journal_t *journal, unsigned int blocknr,
+		 unsigned int *retp)
 {
 	int err = 0;
-	unsigned long ret;
+	unsigned int ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -604,7 +604,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
 			char b[BDEVNAME_SIZE];
 
 			printk(KERN_ALERT "%s: journal block not found "
-					"at offset %lu on %s\n",
+					"at offset %u on %s\n",
 				__func__,
 				blocknr,
 				bdevname(journal->j_dev, b));
@@ -630,7 +630,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	int err;
 
 	err = journal_next_log_block(journal, &blocknr);
@@ -774,7 +774,7 @@ journal_t * journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	unsigned long blocknr;
+	unsigned int blocknr;
 
 	if (!journal)
 		return NULL;
@@ -846,12 +846,12 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned long first, last;
+	unsigned int first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
 	if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) {
-		printk(KERN_ERR "JBD: Journal too short (blocks %lu-%lu).\n",
+		printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n",
 		       first, last);
 		journal_fail_superblock(journal);
 		return -EINVAL;
@@ -885,7 +885,7 @@ static int journal_reset(journal_t *journal)
  **/
 int journal_create(journal_t *journal)
 {
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
@@ -969,14 +969,14 @@ void journal_update_superblock(journal_t *journal, int wait)
 	if (sb->s_start == 0 && journal->j_tail_sequence ==
 				journal->j_transaction_sequence) {
 		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
-			"(start %ld, seq %d, errno %d)\n",
+			"(start %u, seq %d, errno %d)\n",
 			journal->j_tail, journal->j_tail_sequence,
 			journal->j_errno);
 		goto out;
 	}
 
 	spin_lock(&journal->j_state_lock);
-	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+	jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
 
 	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
@@ -1371,7 +1371,7 @@ int journal_flush(journal_t *journal)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
-	unsigned long old_tail;
+	unsigned int old_tail;
 
 	spin_lock(&journal->j_state_lock);
 
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index db5e982c5ddf..cb1a49ae605e 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	unsigned long blocknr;
+	unsigned int blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -314,7 +314,7 @@ static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
 	unsigned int		first_commit_ID, next_commit_ID;
-	unsigned long		next_log_block;
+	unsigned int		next_log_block;
 	int			err, success = 0;
 	journal_superblock_t *	sb;
 	journal_header_t *	tmp;
@@ -367,14 +367,14 @@ static int do_one_pass(journal_t *journal,
 			if (tid_geq(next_commit_ID, info->end_transaction))
 				break;
 
-		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+		jbd_debug(2, "Scanning for sequence ID %u at %u/%u\n",
 			  next_commit_ID, next_log_block, journal->j_last);
 
 		/* Skip over each chunk of the transaction looking
 		 * either the next descriptor block or the final commit
 		 * record. */
 
-		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+		jbd_debug(3, "JBD: checking block %u\n", next_log_block);
 		err = jread(&bh, journal, next_log_block);
 		if (err)
 			goto failed;
@@ -429,7 +429,7 @@ static int do_one_pass(journal_t *journal,
 			tagp = &bh->b_data[sizeof(journal_header_t)];
 			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
 			       <= journal->j_blocksize) {
-				unsigned long io_block;
+				unsigned int io_block;
 
 				tag = (journal_block_tag_t *) tagp;
 				flags = be32_to_cpu(tag->t_flags);
@@ -443,10 +443,10 @@ static int do_one_pass(journal_t *journal,
 					success = err;
 					printk (KERN_ERR
 						"JBD: IO error %d recovering "
-						"block %ld in log\n",
+						"block %u in log\n",
 						err, io_block);
 				} else {
-					unsigned long blocknr;
+					unsigned int blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = be32_to_cpu(tag->t_blocknr);
@@ -581,7 +581,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 	max = be32_to_cpu(header->r_count);
 
 	while (offset < max) {
-		unsigned long blocknr;
+		unsigned int blocknr;
 		int err;
 
 		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index da6cd9bdaabc..ad717328343a 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -101,7 +101,7 @@ struct jbd_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	unsigned long	  blocknr;
+	unsigned int	  blocknr;
 };
 
 
@@ -126,7 +126,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, unsigned long block)
+static inline int hash(journal_t *journal, unsigned int block)
 {
 	struct jbd_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
@@ -136,7 +136,7 @@ static inline int hash(journal_t *journal, unsigned long block)
 		(block << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+static int insert_revoke_hash(journal_t *journal, unsigned int blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -166,7 +166,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
-						      unsigned long blocknr)
+						      unsigned int blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd_revoke_record_s *record;
@@ -332,7 +332,7 @@ void journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int journal_revoke(handle_t *handle, unsigned int blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -401,7 +401,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		}
 	}
 
-	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	jbd_debug(2, "insert revoke for block %u, bh_in=%p\n", blocknr, bh_in);
 	err = insert_revoke_hash(journal, blocknr,
 				handle->h_transaction->t_tid);
 	BUFFER_TRACE(bh_in, "exit");
@@ -644,7 +644,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int journal_set_revoke(journal_t *journal,
-		       unsigned long blocknr,
+		       unsigned int blocknr,
 		       tid_t sequence)
 {
 	struct jbd_revoke_record_s *record;
@@ -668,7 +668,7 @@ int journal_set_revoke(journal_t *journal,
  */
 
 int journal_test_revoke(journal_t *journal,
-			unsigned long blocknr,
+			unsigned int blocknr,
 			tid_t sequence)
 {
 	struct jbd_revoke_record_s *record;
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index c2049a04fa0b..a1187a0c99b4 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -446,7 +446,7 @@ struct transaction_s
 	/*
 	 * Where in the log does this transaction's commit start? [no locking]
 	 */
-	unsigned long		t_log_start;
+	unsigned int		t_log_start;
 
 	/* Number of buffers on the t_buffers list [j_list_lock] */
 	int			t_nr_buffers;
@@ -701,26 +701,26 @@ struct journal_s
 	 * Journal head: identifies the first unused block in the journal.
 	 * [j_state_lock]
 	 */
-	unsigned long		j_head;
+	unsigned int		j_head;
 
 	/*
 	 * Journal tail: identifies the oldest still-used block in the journal.
 	 * [j_state_lock]
 	 */
-	unsigned long		j_tail;
+	unsigned int		j_tail;
 
 	/*
 	 * Journal free: how many free blocks are there in the journal?
 	 * [j_state_lock]
 	 */
-	unsigned long		j_free;
+	unsigned int		j_free;
 
 	/*
 	 * Journal start and end: the block numbers of the first usable block
 	 * and one beyond the last usable block in the journal. [j_state_lock]
 	 */
-	unsigned long		j_first;
-	unsigned long		j_last;
+	unsigned int		j_first;
+	unsigned int		j_last;
 
 	/*
 	 * Device, blocksize and starting block offset for the location where we
@@ -728,7 +728,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	unsigned long		j_blk_offset;
+	unsigned int		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -859,7 +859,7 @@ extern void __journal_clean_data_list(transaction_t *transaction);
 
 /* Log buffer allocation */
 extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
-int journal_next_log_block(journal_t *, unsigned long *);
+int journal_next_log_block(journal_t *, unsigned int *);
 
 /* Commit management */
 extern void journal_commit_transaction(journal_t *);
@@ -874,7 +874,7 @@ extern int
 journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      unsigned long	   blocknr);
+			      unsigned int blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
@@ -942,7 +942,7 @@ extern void	   journal_abort      (journal_t *, int);
 extern int	   journal_errno      (journal_t *);
 extern void	   journal_ack_err    (journal_t *);
 extern int	   journal_clear_err  (journal_t *);
-extern int	   journal_bmap(journal_t *, unsigned long, unsigned long *);
+extern int	   journal_bmap(journal_t *, unsigned int, unsigned int *);
 extern int	   journal_force_commit(journal_t *);
 
 /*
@@ -976,14 +976,14 @@ extern int	   journal_init_revoke_caches(void);
 
 extern void	   journal_destroy_revoke(journal_t *);
 extern int	   journal_revoke (handle_t *,
-				unsigned long, struct buffer_head *);
+				unsigned int, struct buffer_head *);
 extern int	   journal_cancel_revoke(handle_t *, struct journal_head *);
 extern void	   journal_write_revoke_records(journal_t *,
 						transaction_t *, int);
 
 /* Recovery revoke support */
-extern int	journal_set_revoke(journal_t *, unsigned long, tid_t);
-extern int	journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern int	journal_set_revoke(journal_t *, unsigned int, tid_t);
+extern int	journal_test_revoke(journal_t *, unsigned int, tid_t);
 extern void	journal_clear_revoke(journal_t *);
 extern void	journal_switch_revoke_table(journal_t *journal);
 
-- 
cgit v1.2.3


From fb40ba0d98968bc3454731360363d725b4f1064c Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 16 Sep 2009 19:30:40 -0400
Subject: ext4: Add a tracepoint for ext4_alloc_da_blocks()

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/inode.c             |  2 ++
 include/trace/events/ext4.h | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a5b4ce40cc66..9887a0c562d5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3128,6 +3128,8 @@ out:
  */
 int ext4_alloc_da_blocks(struct inode *inode)
 {
+	trace_ext4_alloc_da_blocks(inode);
+
 	if (!EXT4_I(inode)->i_reserved_data_blocks &&
 	    !EXT4_I(inode)->i_reserved_meta_blocks)
 		return 0;
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 6fe6ce9ee071..c1bd8f1e8b94 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -10,6 +10,9 @@
 struct ext4_allocation_context;
 struct ext4_allocation_request;
 struct ext4_prealloc_space;
+struct ext4_inode_info;
+
+#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
 TRACE_EVENT(ext4_free_inode,
 	TP_PROTO(struct inode *inode),
@@ -710,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs,
 		  __entry->wait)
 );
 
+TRACE_EVENT(ext4_alloc_da_blocks,
+	TP_PROTO(struct inode *inode),
+
+	TP_ARGS(inode),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field( unsigned int,	data_blocks	)
+		__field( unsigned int,	meta_blocks	)
+	),
+
+	TP_fast_assign(
+		__entry->dev	= inode->i_sb->s_dev;
+		__entry->ino	= inode->i_ino;
+		__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
+		__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
+	),
+
+	TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->data_blocks, __entry->meta_blocks)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 9f7b07d6cc3ed14783c9427a5b2a69794eb2de64 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 23 Jun 2009 12:32:11 -0300
Subject: mfd: Introduce irq_to_pcap()

Export an irq_to_pcap function to get pcap irq number, for the keypad driver.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 9 +++++----
 include/linux/mfd/ezx-pcap.h | 1 +
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index c1de4afa89a6..de7e63706abb 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -107,10 +107,11 @@ int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
 /* IRQ */
-static inline unsigned int irq2pcap(struct pcap_chip *pcap, int irq)
+int irq_to_pcap(struct pcap_chip *pcap, int irq)
 {
-	return 1 << (irq - pcap->irq_base);
+	return irq - pcap->irq_base;
 }
+EXPORT_SYMBOL_GPL(irq_to_pcap);
 
 int pcap_to_irq(struct pcap_chip *pcap, int irq)
 {
@@ -122,7 +123,7 @@ static void pcap_mask_irq(unsigned int irq)
 {
 	struct pcap_chip *pcap = get_irq_chip_data(irq);
 
-	pcap->msr |= irq2pcap(pcap, irq);
+	pcap->msr |= 1 << irq_to_pcap(pcap, irq);
 	queue_work(pcap->workqueue, &pcap->msr_work);
 }
 
@@ -130,7 +131,7 @@ static void pcap_unmask_irq(unsigned int irq)
 {
 	struct pcap_chip *pcap = get_irq_chip_data(irq);
 
-	pcap->msr &= ~irq2pcap(pcap, irq);
+	pcap->msr &= ~(1 << irq_to_pcap(pcap, irq));
 	queue_work(pcap->workqueue, &pcap->msr_work);
 }
 
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index c12c3c0932bf..6296b4935a1e 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -26,6 +26,7 @@ struct pcap_chip;
 int ezx_pcap_write(struct pcap_chip *, u8, u32);
 int ezx_pcap_read(struct pcap_chip *, u8, u32 *);
 int pcap_to_irq(struct pcap_chip *, int);
+int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
 int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]);
 
-- 
cgit v1.2.3


From ecd78cbdb989fd593bf4fd69cdb572200e70a553 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Tue, 23 Jun 2009 12:33:10 -0300
Subject: mfd: add set_ts_bits for pcap

Some TS controller bits are on the same register as the ADC control, save
TS specific bits and export a set_ts_bits function so the TS driver can set
it with the adc_mutex lock held.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 22 ++++++++++++++++++----
 include/linux/mfd/ezx-pcap.h |  1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index de7e63706abb..c5122024f05a 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -195,6 +195,19 @@ static void pcap_irq_handler(unsigned int irq, struct irq_desc *desc)
 }
 
 /* ADC */
+void pcap_set_ts_bits(struct pcap_chip *pcap, u32 bits)
+{
+	u32 tmp;
+
+	mutex_lock(&pcap->adc_mutex);
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= ~(PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	tmp |= bits & (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	mutex_unlock(&pcap->adc_mutex);
+}
+EXPORT_SYMBOL_GPL(pcap_set_ts_bits);
+
 static void pcap_disable_adc(struct pcap_chip *pcap)
 {
 	u32 tmp;
@@ -217,15 +230,16 @@ static void pcap_adc_trigger(struct pcap_chip *pcap)
 		mutex_unlock(&pcap->adc_mutex);
 		return;
 	}
-	mutex_unlock(&pcap->adc_mutex);
-
-	/* start conversion on requested bank */
-	tmp = pcap->adc_queue[head]->flags | PCAP_ADC_ADEN;
+	/* start conversion on requested bank, save TS_M bits */
+	ezx_pcap_read(pcap, PCAP_REG_ADC, &tmp);
+	tmp &= (PCAP_ADC_TS_M_MASK | PCAP_ADC_TS_REF_LOWPWR);
+	tmp |= pcap->adc_queue[head]->flags | PCAP_ADC_ADEN;
 
 	if (pcap->adc_queue[head]->bank == PCAP_ADC_BANK_1)
 		tmp |= PCAP_ADC_AD_SEL1;
 
 	ezx_pcap_write(pcap, PCAP_REG_ADC, tmp);
+	mutex_unlock(&pcap->adc_mutex);
 	ezx_pcap_write(pcap, PCAP_REG_ADR, PCAP_ADR_ASC);
 }
 
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index 6296b4935a1e..b15caacf0720 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -29,6 +29,7 @@ int pcap_to_irq(struct pcap_chip *, int);
 int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
 int pcap_adc_sync(struct pcap_chip *, u8, u32, u8[], u16[]);
+void pcap_set_ts_bits(struct pcap_chip *, u32);
 
 #define PCAP_SECOND_PORT	1
 #define PCAP_CS_AH		2
-- 
cgit v1.2.3


From e9a22635b0d794d0cb242ffb0249f7b2a410bca2 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Sat, 27 Jun 2009 00:17:20 -0300
Subject: mfd: add ezx_pcap_setbits

Provides an atomic set_bits functions, as needed by the pcap-regulator
driver.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ezx-pcap.c       | 23 +++++++++++++++++++++++
 include/linux/mfd/ezx-pcap.h |  1 +
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 732664f238fe..86d394894d81 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -107,6 +107,29 @@ int ezx_pcap_read(struct pcap_chip *pcap, u8 reg_num, u32 *value)
 }
 EXPORT_SYMBOL_GPL(ezx_pcap_read);
 
+int ezx_pcap_set_bits(struct pcap_chip *pcap, u8 reg_num, u32 mask, u32 val)
+{
+	int ret;
+	u32 tmp = PCAP_REGISTER_READ_OP_BIT |
+		(reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+
+	mutex_lock(&pcap->io_mutex);
+	ret = ezx_pcap_putget(pcap, &tmp);
+	if (ret)
+		goto out_unlock;
+
+	tmp &= (PCAP_REGISTER_VALUE_MASK & ~mask);
+	tmp |= (val & mask) | PCAP_REGISTER_WRITE_OP_BIT |
+		(reg_num << PCAP_REGISTER_ADDRESS_SHIFT);
+
+	ret = ezx_pcap_putget(pcap, &tmp);
+out_unlock:
+	mutex_unlock(&pcap->io_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ezx_pcap_set_bits);
+
 /* IRQ */
 int irq_to_pcap(struct pcap_chip *pcap, int irq)
 {
diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index b15caacf0720..dec82b0b05f9 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -25,6 +25,7 @@ struct pcap_chip;
 
 int ezx_pcap_write(struct pcap_chip *, u8, u32);
 int ezx_pcap_read(struct pcap_chip *, u8, u32 *);
+int ezx_pcap_set_bits(struct pcap_chip *, u8, u32, u32);
 int pcap_to_irq(struct pcap_chip *, int);
 int irq_to_pcap(struct pcap_chip *, int);
 int pcap_adc_async(struct pcap_chip *, u8, u32, u8[], void *, void *);
-- 
cgit v1.2.3


From fb6c023a2b845df1ec383b74644ac35a4bbb76b6 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 20 Jul 2009 12:43:45 +0100
Subject: hwmon: Add WM835x PMIC hardware monitoring driver

This driver provides reporting of the status supply voltage rails
of the WM835x series of PMICs via the hwmon API.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 Documentation/hwmon/wm8350      |  26 +++++++
 drivers/hwmon/Kconfig           |  10 +++
 drivers/hwmon/Makefile          |   1 +
 drivers/hwmon/wm8350-hwmon.c    | 151 ++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/wm8350-core.c       |   3 +
 include/linux/mfd/wm8350/core.h |   6 ++
 6 files changed, 197 insertions(+)
 create mode 100644 Documentation/hwmon/wm8350
 create mode 100644 drivers/hwmon/wm8350-hwmon.c

(limited to 'include')

diff --git a/Documentation/hwmon/wm8350 b/Documentation/hwmon/wm8350
new file mode 100644
index 000000000000..98f923bd2e92
--- /dev/null
+++ b/Documentation/hwmon/wm8350
@@ -0,0 +1,26 @@
+Kernel driver wm8350-hwmon
+==========================
+
+Supported chips:
+  * Wolfson Microelectronics WM835x PMICs
+    Prefix: 'wm8350'
+    Datasheet:
+	http://www.wolfsonmicro.com/products/WM8350
+	http://www.wolfsonmicro.com/products/WM8351
+	http://www.wolfsonmicro.com/products/WM8352
+
+Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
+
+Description
+-----------
+
+The WM835x series of PMICs include an AUXADC which can be used to
+monitor a range of system operating parameters, including the voltages
+of the major supplies within the system.  Currently the driver provides
+simple access to these major supplies.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by a 12 bit ADC.  For the internal supplies the ADC
+is referenced to the system VRTC.
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 2e25b7a827d3..120085ce651a 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -937,6 +937,16 @@ config SENSORS_W83627EHF
 	  This driver can also be built as a module.  If so, the module
 	  will be called w83627ehf.
 
+config SENSORS_WM8350
+	tristate "Wolfson Microelectronics WM835x"
+	depends on MFD_WM8350
+	help
+	  If you say yes here you get support for the hardware
+	  monitoring features of the WM835x series of PMICs.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called wm8350-hwmon.
+
 config SENSORS_ULTRA45
 	tristate "Sun Ultra45 PIC16F747"
 	depends on SPARC64
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 7f239a247c33..ed5f1781b8c4 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -90,6 +90,7 @@ obj-$(CONFIG_SENSORS_VT8231)	+= vt8231.o
 obj-$(CONFIG_SENSORS_W83627EHF)	+= w83627ehf.o
 obj-$(CONFIG_SENSORS_W83L785TS)	+= w83l785ts.o
 obj-$(CONFIG_SENSORS_W83L786NG)	+= w83l786ng.o
+obj-$(CONFIG_SENSORS_WM8350)	+= wm8350-hwmon.o
 
 ifeq ($(CONFIG_HWMON_DEBUG_CHIP),y)
 EXTRA_CFLAGS += -DDEBUG
diff --git a/drivers/hwmon/wm8350-hwmon.c b/drivers/hwmon/wm8350-hwmon.c
new file mode 100644
index 000000000000..13290595ca86
--- /dev/null
+++ b/drivers/hwmon/wm8350-hwmon.c
@@ -0,0 +1,151 @@
+/*
+ * drivers/hwmon/wm8350-hwmon.c - Wolfson Microelectronics WM8350 PMIC
+ *                                  hardware monitoring features.
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics plc
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License v2 as published by the
+ * Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+
+#include <linux/mfd/wm8350/core.h>
+#include <linux/mfd/wm8350/comparator.h>
+
+static ssize_t show_name(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	return sprintf(buf, "wm8350\n");
+}
+
+static const char *input_names[] = {
+	[WM8350_AUXADC_USB]  = "USB",
+	[WM8350_AUXADC_LINE] = "Line",
+	[WM8350_AUXADC_BATT] = "Battery",
+};
+
+
+static ssize_t show_voltage(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct wm8350 *wm8350 = dev_get_drvdata(dev);
+	int channel = to_sensor_dev_attr(attr)->index;
+	int val;
+
+	val = wm8350_read_auxadc(wm8350, channel, 0, 0) * WM8350_AUX_COEFF;
+	val = DIV_ROUND_CLOSEST(val, 1000);
+
+	return sprintf(buf, "%d\n", val);
+}
+
+static ssize_t show_label(struct device *dev,
+			  struct device_attribute *attr, char *buf)
+{
+	int channel = to_sensor_dev_attr(attr)->index;
+
+	return sprintf(buf, "%s\n", input_names[channel]);
+}
+
+#define WM8350_NAMED_VOLTAGE(id, name) \
+	static SENSOR_DEVICE_ATTR(in##id##_input, S_IRUGO, show_voltage,\
+				  NULL, name);		\
+	static SENSOR_DEVICE_ATTR(in##id##_label, S_IRUGO, show_label,	\
+				  NULL, name)
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+
+WM8350_NAMED_VOLTAGE(0, WM8350_AUXADC_USB);
+WM8350_NAMED_VOLTAGE(1, WM8350_AUXADC_BATT);
+WM8350_NAMED_VOLTAGE(2, WM8350_AUXADC_LINE);
+
+static struct attribute *wm8350_attributes[] = {
+	&dev_attr_name.attr,
+
+	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_label.dev_attr.attr,
+	&sensor_dev_attr_in1_input.dev_attr.attr,
+	&sensor_dev_attr_in1_label.dev_attr.attr,
+	&sensor_dev_attr_in2_input.dev_attr.attr,
+	&sensor_dev_attr_in2_label.dev_attr.attr,
+
+	NULL,
+};
+
+static const struct attribute_group wm8350_attr_group = {
+	.attrs	= wm8350_attributes,
+};
+
+static int __devinit wm8350_hwmon_probe(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = sysfs_create_group(&pdev->dev.kobj, &wm8350_attr_group);
+	if (ret)
+		goto err;
+
+	wm8350->hwmon.classdev = hwmon_device_register(&pdev->dev);
+	if (IS_ERR(wm8350->hwmon.classdev)) {
+		ret = PTR_ERR(wm8350->hwmon.classdev);
+		goto err_group;
+	}
+
+	return 0;
+
+err_group:
+	sysfs_remove_group(&pdev->dev.kobj, &wm8350_attr_group);
+err:
+	return ret;
+}
+
+static int __devexit wm8350_hwmon_remove(struct platform_device *pdev)
+{
+	struct wm8350 *wm8350 = platform_get_drvdata(pdev);
+
+	hwmon_device_unregister(wm8350->hwmon.classdev);
+	sysfs_remove_group(&pdev->dev.kobj, &wm8350_attr_group);
+
+	return 0;
+}
+
+static struct platform_driver wm8350_hwmon_driver = {
+	.probe = wm8350_hwmon_probe,
+	.remove = __devexit_p(wm8350_hwmon_remove),
+	.driver = {
+		.name = "wm8350-hwmon",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init wm8350_hwmon_init(void)
+{
+	return platform_driver_register(&wm8350_hwmon_driver);
+}
+module_init(wm8350_hwmon_init);
+
+static void __exit wm8350_hwmon_exit(void)
+{
+	platform_driver_unregister(&wm8350_hwmon_driver);
+}
+module_exit(wm8350_hwmon_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM8350 Hardware Monitoring");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm8350-hwmon");
diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index fe24079387c5..9d662a576a41 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -1472,6 +1472,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 				   &(wm8350->codec.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-gpio",
 				   &(wm8350->gpio.pdev));
+	wm8350_client_dev_register(wm8350, "wm8350-hwmon",
+				   &(wm8350->hwmon.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-power",
 				   &(wm8350->power.pdev));
 	wm8350_client_dev_register(wm8350, "wm8350-rtc", &(wm8350->rtc.pdev));
@@ -1498,6 +1500,7 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->wdt.pdev);
 	platform_device_unregister(wm8350->rtc.pdev);
 	platform_device_unregister(wm8350->power.pdev);
+	platform_device_unregister(wm8350->hwmon.pdev);
 	platform_device_unregister(wm8350->gpio.pdev);
 	platform_device_unregister(wm8350->codec.pdev);
 
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 42cca672f340..969b0b55615c 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -605,6 +605,11 @@ struct wm8350_irq {
 	void *data;
 };
 
+struct wm8350_hwmon {
+	struct platform_device *pdev;
+	struct device *classdev;
+};
+
 struct wm8350 {
 	struct device *dev;
 
@@ -629,6 +634,7 @@ struct wm8350 {
 	/* Client devices */
 	struct wm8350_codec codec;
 	struct wm8350_gpio gpio;
+	struct wm8350_hwmon hwmon;
 	struct wm8350_pmic pmic;
 	struct wm8350_power power;
 	struct wm8350_rtc rtc;
-- 
cgit v1.2.3


From ed52e62ebec9e703eb0b69704feaf1b6e847d882 Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Tue, 28 Jul 2009 00:41:15 +0400
Subject: mfd: use a dedicated workqueue for pcf50633 irq processing

Using the default kernel "events" workqueue causes problems with
synchronous adc readings if initiated from some task on the same
workqueue.

I had a deadlock trying to use pcf50633_adc_sync_read from a
power_supply class driver because the reading was initiated from the
workqueue and it waited for the irq processing to complete (to get the
result) and that was put on the same workqueue.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/pcf50633-core.c       | 5 ++++-
 include/linux/mfd/pcf50633/core.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/pcf50633-core.c b/drivers/mfd/pcf50633-core.c
index 8d3c38bf9714..d26d7747175e 100644
--- a/drivers/mfd/pcf50633-core.c
+++ b/drivers/mfd/pcf50633-core.c
@@ -444,7 +444,7 @@ static irqreturn_t pcf50633_irq(int irq, void *data)
 
 	get_device(pcf->dev);
 	disable_irq_nosync(pcf->irq);
-	schedule_work(&pcf->irq_work);
+	queue_work(pcf->work_queue, &pcf->irq_work);
 
 	return IRQ_HANDLED;
 }
@@ -575,6 +575,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 	pcf->dev = &client->dev;
 	pcf->i2c_client = client;
 	pcf->irq = client->irq;
+	pcf->work_queue = create_singlethread_workqueue("pcf50633");
 
 	INIT_WORK(&pcf->irq_work, pcf50633_irq_worker);
 
@@ -651,6 +652,7 @@ static int __devinit pcf50633_probe(struct i2c_client *client,
 	return 0;
 
 err:
+	destroy_workqueue(pcf->work_queue);
 	kfree(pcf);
 	return ret;
 }
@@ -661,6 +663,7 @@ static int __devexit pcf50633_remove(struct i2c_client *client)
 	int i;
 
 	free_irq(pcf->irq, pcf);
+	destroy_workqueue(pcf->work_queue);
 
 	platform_device_unregister(pcf->input_pdev);
 	platform_device_unregister(pcf->rtc_pdev);
diff --git a/include/linux/mfd/pcf50633/core.h b/include/linux/mfd/pcf50633/core.h
index c8f51c3c0a72..9aba7b779fbc 100644
--- a/include/linux/mfd/pcf50633/core.h
+++ b/include/linux/mfd/pcf50633/core.h
@@ -136,6 +136,7 @@ struct pcf50633 {
 	int irq;
 	struct pcf50633_irq irq_handler[PCF50633_NUM_IRQ];
 	struct work_struct irq_work;
+	struct workqueue_struct *work_queue;
 	struct mutex lock;
 
 	u8 mask_regs[5];
-- 
cgit v1.2.3


From 8d360d8c03e1e8514bbaf606b1cd3b818dfc445d Mon Sep 17 00:00:00 2001
From: Paul Fertser <fercerpav@gmail.com>
Date: Tue, 28 Jul 2009 01:09:04 +0400
Subject: mfd: fix wrong define for 10bit pcf50633 ADC mode

The 10 bits definition was the 8 bits one.

Signed-off-by: Paul Fertser <fercerpav@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/pcf50633/adc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mfd/pcf50633/adc.h b/include/linux/mfd/pcf50633/adc.h
index 56669b4183ad..b35e62801ffa 100644
--- a/include/linux/mfd/pcf50633/adc.h
+++ b/include/linux/mfd/pcf50633/adc.h
@@ -25,7 +25,8 @@
 #define PCF50633_REG_ADCS3		0x57
 
 #define PCF50633_ADCC1_ADCSTART		0x01
-#define PCF50633_ADCC1_RES_10BIT	0x02
+#define PCF50633_ADCC1_RES_8BIT		0x02
+#define PCF50633_ADCC1_RES_10BIT	0x00
 #define PCF50633_ADCC1_AVERAGE_NO	0x00
 #define PCF50633_ADCC1_AVERAGE_4	0x04
 #define PCF50633_ADCC1_AVERAGE_8	0x08
-- 
cgit v1.2.3


From 327bc3a3efa408fb285948bfef112a6c58dfb375 Mon Sep 17 00:00:00 2001
From: Daniel Ribeiro <drwyrm@gmail.com>
Date: Fri, 31 Jul 2009 12:38:02 +0200
Subject: mfd: Remove VIB defines from pcap header file

Vibrator will be accessed via the pcap-regulator driver, no need to expose its
bits in the header file.

Signed-off-by: Daniel Ribeiro <drwyrm@gmail.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/ezx-pcap.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/ezx-pcap.h b/include/linux/mfd/ezx-pcap.h
index dec82b0b05f9..e5124ceea769 100644
--- a/include/linux/mfd/ezx-pcap.h
+++ b/include/linux/mfd/ezx-pcap.h
@@ -227,7 +227,6 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_LED1		1
 #define PCAP_BL0		2
 #define PCAP_BL1		3
-#define PCAP_VIB		4
 #define PCAP_LED_3MA		0
 #define PCAP_LED_4MA		1
 #define PCAP_LED_5MA		2
@@ -246,9 +245,6 @@ void pcap_set_ts_bits(struct pcap_chip *, u32);
 #define PCAP_LED0_C_SHIFT	15
 #define PCAP_LED1_C_SHIFT	17
 #define PCAP_BL1_SHIFT		20
-#define PCAP_VIB_MASK		0x3
-#define PCAP_VIB_SHIFT		20
-#define PCAP_VIB_EN		(1 << 19)
 
 /* RTC */
 #define PCAP_RTC_DAY_MASK	0x3fff
-- 
cgit v1.2.3


From 3bed6e415fc2cbf8d706848a62a48aebe84435e5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:51 +0100
Subject: mfd: Allow multiple MFD cells with the same name

Provide basic support for MFDs having multiple cells of a given
type with different IDs by adding an id to the mfd_cell structure
and then adding that to the id passed in to mfd_add_devices().

As it stands this approach requires that MFDs using this feature
deal with ensuring that there aren't any ID collisions resulting
from multiple MFDs of the same type being instantiated. This needs
to happen with the existing code too, but with this approach there
is a knock on effect on the IDs for non-duplicated devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/mfd-core.c   | 2 +-
 include/linux/mfd/core.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 54ddf3772e0c..ae15e495e20e 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -25,7 +25,7 @@ static int mfd_add_device(struct device *parent, int id,
 	int ret = -ENOMEM;
 	int r;
 
-	pdev = platform_device_alloc(cell->name, id);
+	pdev = platform_device_alloc(cell->name, id + cell->id);
 	if (!pdev)
 		goto fail_alloc;
 
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 49ef857cdb2d..11d740b8831d 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -23,6 +23,7 @@
  */
 struct mfd_cell {
 	const char		*name;
+	int			id;
 
 	int			(*enable)(struct platform_device *dev);
 	int			(*disable)(struct platform_device *dev);
-- 
cgit v1.2.3


From d2bedfe7a8b2f34beee2cad9cae74a088ee8ed07 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:52 +0100
Subject: mfd: Initial core support for WM831x series devices

The WM831x series of devices are register compatible processor power
management subsystems, providing regulator and power path management
facilities along with other services like watchdog, RTC and touch
panel controllers.

This patch adds very basic support, providing basic single register
I2C access, handling of the security key and registration of the
devices.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        | 1357 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h  |  247 +++++++
 include/linux/mfd/wm831x/pdata.h |  107 +++
 3 files changed, 1711 insertions(+)
 create mode 100644 drivers/mfd/wm831x-core.c
 create mode 100644 include/linux/mfd/wm831x/core.h
 create mode 100644 include/linux/mfd/wm831x/pdata.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
new file mode 100644
index 000000000000..cc1040c9d46c
--- /dev/null
+++ b/drivers/mfd/wm831x-core.c
@@ -0,0 +1,1357 @@
+/*
+ * wm831x-core.c  --  Device access for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+enum wm831x_parent {
+	WM8310 = 0,
+	WM8311 = 1,
+	WM8312 = 2,
+};
+
+static int wm831x_reg_locked(struct wm831x *wm831x, unsigned short reg)
+{
+	if (!wm831x->locked)
+		return 0;
+
+	switch (reg) {
+	case WM831X_WATCHDOG:
+	case WM831X_DC4_CONTROL:
+	case WM831X_ON_PIN_CONTROL:
+	case WM831X_BACKUP_CHARGER_CONTROL:
+	case WM831X_CHARGER_CONTROL_1:
+	case WM831X_CHARGER_CONTROL_2:
+		return 1;
+
+	default:
+		return 0;
+	}
+}
+
+/**
+ * wm831x_reg_unlock: Unlock user keyed registers
+ *
+ * The WM831x has a user key preventing writes to particularly
+ * critical registers.  This function locks those registers,
+ * allowing writes to them.
+ */
+void wm831x_reg_lock(struct wm831x *wm831x)
+{
+	int ret;
+
+	ret = wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0);
+	if (ret == 0) {
+		dev_vdbg(wm831x->dev, "Registers locked\n");
+
+		mutex_lock(&wm831x->io_lock);
+		WARN_ON(wm831x->locked);
+		wm831x->locked = 1;
+		mutex_unlock(&wm831x->io_lock);
+	} else {
+		dev_err(wm831x->dev, "Failed to lock registers: %d\n", ret);
+	}
+
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_lock);
+
+/**
+ * wm831x_reg_unlock: Unlock user keyed registers
+ *
+ * The WM831x has a user key preventing writes to particularly
+ * critical registers.  This function locks those registers,
+ * preventing spurious writes.
+ */
+int wm831x_reg_unlock(struct wm831x *wm831x)
+{
+	int ret;
+
+	/* 0x9716 is the value required to unlock the registers */
+	ret = wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0x9716);
+	if (ret == 0) {
+		dev_vdbg(wm831x->dev, "Registers unlocked\n");
+
+		mutex_lock(&wm831x->io_lock);
+		WARN_ON(!wm831x->locked);
+		wm831x->locked = 0;
+		mutex_unlock(&wm831x->io_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_unlock);
+
+static int wm831x_read(struct wm831x *wm831x, unsigned short reg,
+		       int bytes, void *dest)
+{
+	int ret, i;
+	u16 *buf = dest;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	ret = wm831x->read_dev(wm831x, reg, bytes, dest);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < bytes / 2; i++) {
+		buf[i] = be16_to_cpu(buf[i]);
+
+		dev_vdbg(wm831x->dev, "Read %04x from R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+	}
+
+	return 0;
+}
+
+/**
+ * wm831x_reg_read: Read a single WM831x register.
+ *
+ * @wm831x: Device to read from.
+ * @reg: Register to read.
+ */
+int wm831x_reg_read(struct wm831x *wm831x, unsigned short reg)
+{
+	unsigned short val;
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, 2, &val);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	if (ret < 0)
+		return ret;
+	else
+		return val;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_read);
+
+/**
+ * wm831x_bulk_read: Read multiple WM831x registers
+ *
+ * @wm831x: Device to read from
+ * @reg: First register
+ * @count: Number of registers
+ * @buf: Buffer to fill.
+ */
+int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
+		     int count, u16 *buf)
+{
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, count * 2, buf);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_bulk_read);
+
+static int wm831x_write(struct wm831x *wm831x, unsigned short reg,
+			int bytes, void *src)
+{
+	u16 *buf = src;
+	int i;
+
+	BUG_ON(bytes % 2);
+	BUG_ON(bytes <= 0);
+
+	for (i = 0; i < bytes / 2; i++) {
+		if (wm831x_reg_locked(wm831x, reg))
+			return -EPERM;
+
+		dev_vdbg(wm831x->dev, "Write %04x to R%d(0x%x)\n",
+			 buf[i], reg + i, reg + i);
+
+		buf[i] = cpu_to_be16(buf[i]);
+	}
+
+	return wm831x->write_dev(wm831x, reg, bytes, src);
+}
+
+/**
+ * wm831x_reg_write: Write a single WM831x register.
+ *
+ * @wm831x: Device to write to.
+ * @reg: Register to write to.
+ * @val: Value to write.
+ */
+int wm831x_reg_write(struct wm831x *wm831x, unsigned short reg,
+		     unsigned short val)
+{
+	int ret;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_write(wm831x, reg, 2, &val);
+
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_reg_write);
+
+/**
+ * wm831x_set_bits: Set the value of a bitfield in a WM831x register
+ *
+ * @wm831x: Device to write to.
+ * @reg: Register to write to.
+ * @mask: Mask of bits to set.
+ * @val: Value to set (unshifted)
+ */
+int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
+		    unsigned short mask, unsigned short val)
+{
+	int ret;
+	u16 r;
+
+	mutex_lock(&wm831x->io_lock);
+
+	ret = wm831x_read(wm831x, reg, 2, &r);
+	if (ret < 0)
+		goto out;
+
+	r &= ~mask;
+	r |= val;
+
+	ret = wm831x_write(wm831x, reg, 2, &r);
+
+out:
+	mutex_unlock(&wm831x->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_set_bits);
+
+static struct resource wm831x_dcdc1_resources[] = {
+	{
+		.start = WM831X_DC1_CONTROL_1,
+		.end   = WM831X_DC1_DVS_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC1,
+		.end   = WM831X_IRQ_UV_DC1,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "HC",
+		.start = WM831X_IRQ_HC_DC1,
+		.end   = WM831X_IRQ_HC_DC1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+
+static struct resource wm831x_dcdc2_resources[] = {
+	{
+		.start = WM831X_DC2_CONTROL_1,
+		.end   = WM831X_DC2_DVS_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC2,
+		.end   = WM831X_IRQ_UV_DC2,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name  = "HC",
+		.start = WM831X_IRQ_HC_DC2,
+		.end   = WM831X_IRQ_HC_DC2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_dcdc3_resources[] = {
+	{
+		.start = WM831X_DC3_CONTROL_1,
+		.end   = WM831X_DC3_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC3,
+		.end   = WM831X_IRQ_UV_DC3,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_dcdc4_resources[] = {
+	{
+		.start = WM831X_DC4_CONTROL,
+		.end   = WM831X_DC4_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_DC4,
+		.end   = WM831X_IRQ_UV_DC4,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_gpio_resources[] = {
+	{
+		.start = WM831X_IRQ_GPIO_1,
+		.end   = WM831X_IRQ_GPIO_16,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_isink1_resources[] = {
+	{
+		.start = WM831X_CURRENT_SINK_1,
+		.end   = WM831X_CURRENT_SINK_1,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.start = WM831X_IRQ_CS1,
+		.end   = WM831X_IRQ_CS1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_isink2_resources[] = {
+	{
+		.start = WM831X_CURRENT_SINK_2,
+		.end   = WM831X_CURRENT_SINK_2,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.start = WM831X_IRQ_CS2,
+		.end   = WM831X_IRQ_CS2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo1_resources[] = {
+	{
+		.start = WM831X_LDO1_CONTROL,
+		.end   = WM831X_LDO1_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO1,
+		.end   = WM831X_IRQ_UV_LDO1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo2_resources[] = {
+	{
+		.start = WM831X_LDO2_CONTROL,
+		.end   = WM831X_LDO2_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO2,
+		.end   = WM831X_IRQ_UV_LDO2,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo3_resources[] = {
+	{
+		.start = WM831X_LDO3_CONTROL,
+		.end   = WM831X_LDO3_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO3,
+		.end   = WM831X_IRQ_UV_LDO3,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo4_resources[] = {
+	{
+		.start = WM831X_LDO4_CONTROL,
+		.end   = WM831X_LDO4_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO4,
+		.end   = WM831X_IRQ_UV_LDO4,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo5_resources[] = {
+	{
+		.start = WM831X_LDO5_CONTROL,
+		.end   = WM831X_LDO5_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO5,
+		.end   = WM831X_IRQ_UV_LDO5,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo6_resources[] = {
+	{
+		.start = WM831X_LDO6_CONTROL,
+		.end   = WM831X_LDO6_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO6,
+		.end   = WM831X_IRQ_UV_LDO6,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo7_resources[] = {
+	{
+		.start = WM831X_LDO7_CONTROL,
+		.end   = WM831X_LDO7_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO7,
+		.end   = WM831X_IRQ_UV_LDO7,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo8_resources[] = {
+	{
+		.start = WM831X_LDO8_CONTROL,
+		.end   = WM831X_LDO8_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO8,
+		.end   = WM831X_IRQ_UV_LDO8,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo9_resources[] = {
+	{
+		.start = WM831X_LDO9_CONTROL,
+		.end   = WM831X_LDO9_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO9,
+		.end   = WM831X_IRQ_UV_LDO9,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo10_resources[] = {
+	{
+		.start = WM831X_LDO10_CONTROL,
+		.end   = WM831X_LDO10_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+	{
+		.name  = "UV",
+		.start = WM831X_IRQ_UV_LDO10,
+		.end   = WM831X_IRQ_UV_LDO10,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_ldo11_resources[] = {
+	{
+		.start = WM831X_LDO11_ON_CONTROL,
+		.end   = WM831X_LDO11_SLEEP_CONTROL,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_on_resources[] = {
+	{
+		.start = WM831X_IRQ_ON,
+		.end   = WM831X_IRQ_ON,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+
+static struct resource wm831x_power_resources[] = {
+	{
+		.name = "SYSLO",
+		.start = WM831X_IRQ_PPM_SYSLO,
+		.end   = WM831X_IRQ_PPM_SYSLO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "PWR SRC",
+		.start = WM831X_IRQ_PPM_PWR_SRC,
+		.end   = WM831X_IRQ_PPM_PWR_SRC,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "USB CURR",
+		.start = WM831X_IRQ_PPM_USB_CURR,
+		.end   = WM831X_IRQ_PPM_USB_CURR,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT HOT",
+		.start = WM831X_IRQ_CHG_BATT_HOT,
+		.end   = WM831X_IRQ_CHG_BATT_HOT,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT COLD",
+		.start = WM831X_IRQ_CHG_BATT_COLD,
+		.end   = WM831X_IRQ_CHG_BATT_COLD,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "BATT FAIL",
+		.start = WM831X_IRQ_CHG_BATT_FAIL,
+		.end   = WM831X_IRQ_CHG_BATT_FAIL,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "OV",
+		.start = WM831X_IRQ_CHG_OV,
+		.end   = WM831X_IRQ_CHG_OV,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "END",
+		.start = WM831X_IRQ_CHG_END,
+		.end   = WM831X_IRQ_CHG_END,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "TO",
+		.start = WM831X_IRQ_CHG_TO,
+		.end   = WM831X_IRQ_CHG_TO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "MODE",
+		.start = WM831X_IRQ_CHG_MODE,
+		.end   = WM831X_IRQ_CHG_MODE,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "START",
+		.start = WM831X_IRQ_CHG_START,
+		.end   = WM831X_IRQ_CHG_START,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_rtc_resources[] = {
+	{
+		.name = "PER",
+		.start = WM831X_IRQ_RTC_PER,
+		.end   = WM831X_IRQ_RTC_PER,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "ALM",
+		.start = WM831X_IRQ_RTC_ALM,
+		.end   = WM831X_IRQ_RTC_ALM,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_status1_resources[] = {
+	{
+		.start = WM831X_STATUS_LED_1,
+		.end   = WM831X_STATUS_LED_1,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_status2_resources[] = {
+	{
+		.start = WM831X_STATUS_LED_2,
+		.end   = WM831X_STATUS_LED_2,
+		.flags = IORESOURCE_IO,
+	},
+};
+
+static struct resource wm831x_touch_resources[] = {
+	{
+		.name = "TCHPD",
+		.start = WM831X_IRQ_TCHPD,
+		.end   = WM831X_IRQ_TCHPD,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.name = "TCHDATA",
+		.start = WM831X_IRQ_TCHDATA,
+		.end   = WM831X_IRQ_TCHDATA,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm831x_wdt_resources[] = {
+	{
+		.start = WM831X_IRQ_WDOG_TO,
+		.end   = WM831X_IRQ_WDOG_TO,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell wm8310_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 6,
+		.num_resources = ARRAY_SIZE(wm831x_ldo6_resources),
+		.resources = wm831x_ldo6_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 8,
+		.num_resources = ARRAY_SIZE(wm831x_ldo8_resources),
+		.resources = wm831x_ldo8_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 9,
+		.num_resources = ARRAY_SIZE(wm831x_ldo9_resources),
+		.resources = wm831x_ldo9_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 10,
+		.num_resources = ARRAY_SIZE(wm831x_ldo10_resources),
+		.resources = wm831x_ldo10_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+static struct mfd_cell wm8311_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-touch",
+		.num_resources = ARRAY_SIZE(wm831x_touch_resources),
+		.resources = wm831x_touch_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+static struct mfd_cell wm8312_devs[] = {
+	{
+		.name = "wm831x-buckv",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc1_resources),
+		.resources = wm831x_dcdc1_resources,
+	},
+	{
+		.name = "wm831x-buckv",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc2_resources),
+		.resources = wm831x_dcdc2_resources,
+	},
+	{
+		.name = "wm831x-buckp",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc3_resources),
+		.resources = wm831x_dcdc3_resources,
+	},
+	{
+		.name = "wm831x-boostp",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_dcdc4_resources),
+		.resources = wm831x_dcdc4_resources,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 1,
+	},
+	{
+		.name = "wm831x-epe",
+		.id = 2,
+	},
+	{
+		.name = "wm831x-gpio",
+		.num_resources = ARRAY_SIZE(wm831x_gpio_resources),
+		.resources = wm831x_gpio_resources,
+	},
+	{
+		.name = "wm831x-hwmon",
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_isink1_resources),
+		.resources = wm831x_isink1_resources,
+	},
+	{
+		.name = "wm831x-isink",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_isink2_resources),
+		.resources = wm831x_isink2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_ldo1_resources),
+		.resources = wm831x_ldo1_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_ldo2_resources),
+		.resources = wm831x_ldo2_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 3,
+		.num_resources = ARRAY_SIZE(wm831x_ldo3_resources),
+		.resources = wm831x_ldo3_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 4,
+		.num_resources = ARRAY_SIZE(wm831x_ldo4_resources),
+		.resources = wm831x_ldo4_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 5,
+		.num_resources = ARRAY_SIZE(wm831x_ldo5_resources),
+		.resources = wm831x_ldo5_resources,
+	},
+	{
+		.name = "wm831x-ldo",
+		.id = 6,
+		.num_resources = ARRAY_SIZE(wm831x_ldo6_resources),
+		.resources = wm831x_ldo6_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 7,
+		.num_resources = ARRAY_SIZE(wm831x_ldo7_resources),
+		.resources = wm831x_ldo7_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 8,
+		.num_resources = ARRAY_SIZE(wm831x_ldo8_resources),
+		.resources = wm831x_ldo8_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 9,
+		.num_resources = ARRAY_SIZE(wm831x_ldo9_resources),
+		.resources = wm831x_ldo9_resources,
+	},
+	{
+		.name = "wm831x-aldo",
+		.id = 10,
+		.num_resources = ARRAY_SIZE(wm831x_ldo10_resources),
+		.resources = wm831x_ldo10_resources,
+	},
+	{
+		.name = "wm831x-alive-ldo",
+		.id = 11,
+		.num_resources = ARRAY_SIZE(wm831x_ldo11_resources),
+		.resources = wm831x_ldo11_resources,
+	},
+	{
+		.name = "wm831x-on",
+		.num_resources = ARRAY_SIZE(wm831x_on_resources),
+		.resources = wm831x_on_resources,
+	},
+	{
+		.name = "wm831x-power",
+		.num_resources = ARRAY_SIZE(wm831x_power_resources),
+		.resources = wm831x_power_resources,
+	},
+	{
+		.name = "wm831x-rtc",
+		.num_resources = ARRAY_SIZE(wm831x_rtc_resources),
+		.resources = wm831x_rtc_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 1,
+		.num_resources = ARRAY_SIZE(wm831x_status1_resources),
+		.resources = wm831x_status1_resources,
+	},
+	{
+		.name = "wm831x-status",
+		.id = 2,
+		.num_resources = ARRAY_SIZE(wm831x_status2_resources),
+		.resources = wm831x_status2_resources,
+	},
+	{
+		.name = "wm831x-touch",
+		.num_resources = ARRAY_SIZE(wm831x_touch_resources),
+		.resources = wm831x_touch_resources,
+	},
+	{
+		.name = "wm831x-watchdog",
+		.num_resources = ARRAY_SIZE(wm831x_wdt_resources),
+		.resources = wm831x_wdt_resources,
+	},
+};
+
+/*
+ * Instantiate the generic non-control parts of the device.
+ */
+static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
+{
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int rev;
+	enum wm831x_parent parent;
+	int ret;
+
+	mutex_init(&wm831x->io_lock);
+	mutex_init(&wm831x->key_lock);
+	dev_set_drvdata(wm831x->dev, wm831x);
+
+	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read parent ID: %d\n", ret);
+		goto err;
+	}
+	if (ret != 0x6204) {
+		dev_err(wm831x->dev, "Device is not a WM831x: ID %x\n", ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_REVISION);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read revision: %d\n", ret);
+		goto err;
+	}
+	rev = (ret & WM831X_PARENT_REV_MASK) >> WM831X_PARENT_REV_SHIFT;
+
+	ret = wm831x_reg_read(wm831x, WM831X_RESET_ID);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read device ID: %d\n", ret);
+		goto err;
+	}
+
+	switch (ret) {
+	case 0x8310:
+		parent = WM8310;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8310 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0x8311:
+		parent = WM8311;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8311 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0x8312:
+		parent = WM8312;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM8312 revision %c\n",
+				 'A' + rev);
+			break;
+		}
+		break;
+
+	case 0:
+		/* Some engineering samples do not have the ID set,
+		 * rely on the device being registered correctly.
+		 * This will need revisiting for future devices with
+		 * multiple dies.
+		 */
+		parent = id;
+		switch (rev) {
+		case 0:
+			dev_info(wm831x->dev, "WM831%d ES revision %c\n",
+				 parent, 'A' + rev);
+			break;
+		}
+		break;
+
+	default:
+		dev_err(wm831x->dev, "Unknown WM831x device %04x\n", ret);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	/* This will need revisiting in future but is OK for all
+	 * current parts.
+	 */
+	if (parent != id)
+		dev_warn(wm831x->dev, "Device was registered as a WM831%lu\n",
+			 id);
+
+	/* Bootstrap the user key */
+	ret = wm831x_reg_read(wm831x, WM831X_SECURITY_KEY);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read security key: %d\n", ret);
+		goto err;
+	}
+	if (ret != 0) {
+		dev_warn(wm831x->dev, "Security key had non-zero value %x\n",
+			 ret);
+		wm831x_reg_write(wm831x, WM831X_SECURITY_KEY, 0);
+	}
+	wm831x->locked = 1;
+
+	if (pdata && pdata->pre_init) {
+		ret = pdata->pre_init(wm831x);
+		if (ret != 0) {
+			dev_err(wm831x->dev, "pre_init() failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	/* The core device is up, instantiate the subdevices. */
+	switch (parent) {
+	case WM8310:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8310_devs, ARRAY_SIZE(wm8310_devs),
+				      NULL, 0);
+		break;
+
+	case WM8311:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8311_devs, ARRAY_SIZE(wm8311_devs),
+				      NULL, 0);
+		break;
+
+	case WM8312:
+		ret = mfd_add_devices(wm831x->dev, -1,
+				      wm8312_devs, ARRAY_SIZE(wm8312_devs),
+				      NULL, 0);
+		break;
+
+	default:
+		/* If this happens the bus probe function is buggy */
+		BUG();
+	}
+
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to add children\n");
+		goto err;
+	}
+
+	if (pdata && pdata->post_init) {
+		ret = pdata->post_init(wm831x);
+		if (ret != 0) {
+			dev_err(wm831x->dev, "post_init() failed: %d\n", ret);
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	mfd_remove_devices(wm831x->dev);
+	kfree(wm831x);
+	return ret;
+}
+
+static void wm831x_device_exit(struct wm831x *wm831x)
+{
+	mfd_remove_devices(wm831x->dev);
+	kfree(wm831x);
+}
+
+static int wm831x_i2c_read_device(struct wm831x *wm831x, unsigned short reg,
+				  int bytes, void *dest)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	int ret;
+	u16 r = cpu_to_be16(reg);
+
+	ret = i2c_master_send(i2c, (unsigned char *)&r, 2);
+	if (ret < 0)
+		return ret;
+	if (ret != 2)
+		return -EIO;
+
+	ret = i2c_master_recv(i2c, dest, bytes);
+	if (ret < 0)
+		return ret;
+	if (ret != bytes)
+		return -EIO;
+	return 0;
+}
+
+/* Currently we allocate the write buffer on the stack; this is OK for
+ * small writes - if we need to do large writes this will need to be
+ * revised.
+ */
+static int wm831x_i2c_write_device(struct wm831x *wm831x, unsigned short reg,
+				   int bytes, void *src)
+{
+	struct i2c_client *i2c = wm831x->control_data;
+	unsigned char msg[bytes + 2];
+	int ret;
+
+	reg = cpu_to_be16(reg);
+	memcpy(&msg[0], &reg, 2);
+	memcpy(&msg[2], src, bytes);
+
+	ret = i2c_master_send(i2c, msg, bytes + 2);
+	if (ret < 0)
+		return ret;
+	if (ret < bytes + 2)
+		return -EIO;
+
+	return 0;
+}
+
+static int wm831x_i2c_probe(struct i2c_client *i2c,
+			    const struct i2c_device_id *id)
+{
+	struct wm831x *wm831x;
+
+	wm831x = kzalloc(sizeof(struct wm831x), GFP_KERNEL);
+	if (wm831x == NULL) {
+		kfree(i2c);
+		return -ENOMEM;
+	}
+
+	i2c_set_clientdata(i2c, wm831x);
+	wm831x->dev = &i2c->dev;
+	wm831x->control_data = i2c;
+	wm831x->read_dev = wm831x_i2c_read_device;
+	wm831x->write_dev = wm831x_i2c_write_device;
+
+	return wm831x_device_init(wm831x, id->driver_data, i2c->irq);
+}
+
+static int wm831x_i2c_remove(struct i2c_client *i2c)
+{
+	struct wm831x *wm831x = i2c_get_clientdata(i2c);
+
+	wm831x_device_exit(wm831x);
+
+	return 0;
+}
+
+static const struct i2c_device_id wm831x_i2c_id[] = {
+	{ "wm8310", WM8310 },
+	{ "wm8311", WM8311 },
+	{ "wm8312", WM8312 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, wm831x_i2c_id);
+
+
+static struct i2c_driver wm831x_i2c_driver = {
+	.driver = {
+		   .name = "wm831x",
+		   .owner = THIS_MODULE,
+	},
+	.probe = wm831x_i2c_probe,
+	.remove = wm831x_i2c_remove,
+	.id_table = wm831x_i2c_id,
+};
+
+static int __init wm831x_i2c_init(void)
+{
+	int ret;
+
+	ret = i2c_add_driver(&wm831x_i2c_driver);
+	if (ret != 0)
+		pr_err("Failed to register wm831x I2C driver: %d\n", ret);
+
+	return ret;
+}
+subsys_initcall(wm831x_i2c_init);
+
+static void __exit wm831x_i2c_exit(void)
+{
+	i2c_del_driver(&wm831x_i2c_driver);
+}
+module_exit(wm831x_i2c_exit);
+
+MODULE_DESCRIPTION("I2C support for the WM831X AudioPlus PMIC");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown");
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
new file mode 100644
index 000000000000..d90e693053ba
--- /dev/null
+++ b/include/linux/mfd/wm831x/core.h
@@ -0,0 +1,247 @@
+/*
+ * include/linux/mfd/wm831x/core.h -- Core interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_CORE_H__
+#define __MFD_WM831X_CORE_H__
+
+/*
+ * Register values.
+ */
+#define WM831X_RESET_ID                         0x00
+#define WM831X_REVISION                         0x01
+#define WM831X_PARENT_ID                        0x4000
+#define WM831X_SYSVDD_CONTROL                   0x4001
+#define WM831X_THERMAL_MONITORING               0x4002
+#define WM831X_POWER_STATE                      0x4003
+#define WM831X_WATCHDOG                         0x4004
+#define WM831X_ON_PIN_CONTROL                   0x4005
+#define WM831X_RESET_CONTROL                    0x4006
+#define WM831X_CONTROL_INTERFACE                0x4007
+#define WM831X_SECURITY_KEY                     0x4008
+#define WM831X_SOFTWARE_SCRATCH                 0x4009
+#define WM831X_OTP_CONTROL                      0x400A
+#define WM831X_GPIO_LEVEL                       0x400C
+#define WM831X_SYSTEM_STATUS                    0x400D
+#define WM831X_ON_SOURCE                        0x400E
+#define WM831X_OFF_SOURCE                       0x400F
+#define WM831X_SYSTEM_INTERRUPTS                0x4010
+#define WM831X_INTERRUPT_STATUS_1               0x4011
+#define WM831X_INTERRUPT_STATUS_2               0x4012
+#define WM831X_INTERRUPT_STATUS_3               0x4013
+#define WM831X_INTERRUPT_STATUS_4               0x4014
+#define WM831X_INTERRUPT_STATUS_5               0x4015
+#define WM831X_IRQ_CONFIG                       0x4017
+#define WM831X_SYSTEM_INTERRUPTS_MASK           0x4018
+#define WM831X_INTERRUPT_STATUS_1_MASK          0x4019
+#define WM831X_INTERRUPT_STATUS_2_MASK          0x401A
+#define WM831X_INTERRUPT_STATUS_3_MASK          0x401B
+#define WM831X_INTERRUPT_STATUS_4_MASK          0x401C
+#define WM831X_INTERRUPT_STATUS_5_MASK          0x401D
+#define WM831X_RTC_WRITE_COUNTER                0x4020
+#define WM831X_RTC_TIME_1                       0x4021
+#define WM831X_RTC_TIME_2                       0x4022
+#define WM831X_RTC_ALARM_1                      0x4023
+#define WM831X_RTC_ALARM_2                      0x4024
+#define WM831X_RTC_CONTROL                      0x4025
+#define WM831X_RTC_TRIM                         0x4026
+#define WM831X_TOUCH_CONTROL_1                  0x4028
+#define WM831X_TOUCH_CONTROL_2                  0x4029
+#define WM831X_TOUCH_DATA_X                     0x402A
+#define WM831X_TOUCH_DATA_Y                     0x402B
+#define WM831X_TOUCH_DATA_Z                     0x402C
+#define WM831X_AUXADC_DATA                      0x402D
+#define WM831X_AUXADC_CONTROL                   0x402E
+#define WM831X_AUXADC_SOURCE                    0x402F
+#define WM831X_COMPARATOR_CONTROL               0x4030
+#define WM831X_COMPARATOR_1                     0x4031
+#define WM831X_COMPARATOR_2                     0x4032
+#define WM831X_COMPARATOR_3                     0x4033
+#define WM831X_COMPARATOR_4                     0x4034
+#define WM831X_GPIO1_CONTROL                    0x4038
+#define WM831X_GPIO2_CONTROL                    0x4039
+#define WM831X_GPIO3_CONTROL                    0x403A
+#define WM831X_GPIO4_CONTROL                    0x403B
+#define WM831X_GPIO5_CONTROL                    0x403C
+#define WM831X_GPIO6_CONTROL                    0x403D
+#define WM831X_GPIO7_CONTROL                    0x403E
+#define WM831X_GPIO8_CONTROL                    0x403F
+#define WM831X_GPIO9_CONTROL                    0x4040
+#define WM831X_GPIO10_CONTROL                   0x4041
+#define WM831X_GPIO11_CONTROL                   0x4042
+#define WM831X_GPIO12_CONTROL                   0x4043
+#define WM831X_GPIO13_CONTROL                   0x4044
+#define WM831X_GPIO14_CONTROL                   0x4045
+#define WM831X_GPIO15_CONTROL                   0x4046
+#define WM831X_GPIO16_CONTROL                   0x4047
+#define WM831X_CHARGER_CONTROL_1                0x4048
+#define WM831X_CHARGER_CONTROL_2                0x4049
+#define WM831X_CHARGER_STATUS                   0x404A
+#define WM831X_BACKUP_CHARGER_CONTROL           0x404B
+#define WM831X_STATUS_LED_1                     0x404C
+#define WM831X_STATUS_LED_2                     0x404D
+#define WM831X_CURRENT_SINK_1                   0x404E
+#define WM831X_CURRENT_SINK_2                   0x404F
+#define WM831X_DCDC_ENABLE                      0x4050
+#define WM831X_LDO_ENABLE                       0x4051
+#define WM831X_DCDC_STATUS                      0x4052
+#define WM831X_LDO_STATUS                       0x4053
+#define WM831X_DCDC_UV_STATUS                   0x4054
+#define WM831X_LDO_UV_STATUS                    0x4055
+#define WM831X_DC1_CONTROL_1                    0x4056
+#define WM831X_DC1_CONTROL_2                    0x4057
+#define WM831X_DC1_ON_CONFIG                    0x4058
+#define WM831X_DC1_SLEEP_CONTROL                0x4059
+#define WM831X_DC1_DVS_CONTROL                  0x405A
+#define WM831X_DC2_CONTROL_1                    0x405B
+#define WM831X_DC2_CONTROL_2                    0x405C
+#define WM831X_DC2_ON_CONFIG                    0x405D
+#define WM831X_DC2_SLEEP_CONTROL                0x405E
+#define WM831X_DC2_DVS_CONTROL                  0x405F
+#define WM831X_DC3_CONTROL_1                    0x4060
+#define WM831X_DC3_CONTROL_2                    0x4061
+#define WM831X_DC3_ON_CONFIG                    0x4062
+#define WM831X_DC3_SLEEP_CONTROL                0x4063
+#define WM831X_DC4_CONTROL                      0x4064
+#define WM831X_DC4_SLEEP_CONTROL                0x4065
+#define WM831X_EPE1_CONTROL                     0x4066
+#define WM831X_EPE2_CONTROL                     0x4067
+#define WM831X_LDO1_CONTROL                     0x4068
+#define WM831X_LDO1_ON_CONTROL                  0x4069
+#define WM831X_LDO1_SLEEP_CONTROL               0x406A
+#define WM831X_LDO2_CONTROL                     0x406B
+#define WM831X_LDO2_ON_CONTROL                  0x406C
+#define WM831X_LDO2_SLEEP_CONTROL               0x406D
+#define WM831X_LDO3_CONTROL                     0x406E
+#define WM831X_LDO3_ON_CONTROL                  0x406F
+#define WM831X_LDO3_SLEEP_CONTROL               0x4070
+#define WM831X_LDO4_CONTROL                     0x4071
+#define WM831X_LDO4_ON_CONTROL                  0x4072
+#define WM831X_LDO4_SLEEP_CONTROL               0x4073
+#define WM831X_LDO5_CONTROL                     0x4074
+#define WM831X_LDO5_ON_CONTROL                  0x4075
+#define WM831X_LDO5_SLEEP_CONTROL               0x4076
+#define WM831X_LDO6_CONTROL                     0x4077
+#define WM831X_LDO6_ON_CONTROL                  0x4078
+#define WM831X_LDO6_SLEEP_CONTROL               0x4079
+#define WM831X_LDO7_CONTROL                     0x407A
+#define WM831X_LDO7_ON_CONTROL                  0x407B
+#define WM831X_LDO7_SLEEP_CONTROL               0x407C
+#define WM831X_LDO8_CONTROL                     0x407D
+#define WM831X_LDO8_ON_CONTROL                  0x407E
+#define WM831X_LDO8_SLEEP_CONTROL               0x407F
+#define WM831X_LDO9_CONTROL                     0x4080
+#define WM831X_LDO9_ON_CONTROL                  0x4081
+#define WM831X_LDO9_SLEEP_CONTROL               0x4082
+#define WM831X_LDO10_CONTROL                    0x4083
+#define WM831X_LDO10_ON_CONTROL                 0x4084
+#define WM831X_LDO10_SLEEP_CONTROL              0x4085
+#define WM831X_LDO11_ON_CONTROL                 0x4087
+#define WM831X_LDO11_SLEEP_CONTROL              0x4088
+#define WM831X_POWER_GOOD_SOURCE_1              0x408E
+#define WM831X_POWER_GOOD_SOURCE_2              0x408F
+#define WM831X_CLOCK_CONTROL_1                  0x4090
+#define WM831X_CLOCK_CONTROL_2                  0x4091
+#define WM831X_FLL_CONTROL_1                    0x4092
+#define WM831X_FLL_CONTROL_2                    0x4093
+#define WM831X_FLL_CONTROL_3                    0x4094
+#define WM831X_FLL_CONTROL_4                    0x4095
+#define WM831X_FLL_CONTROL_5                    0x4096
+#define WM831X_UNIQUE_ID_1                      0x7800
+#define WM831X_UNIQUE_ID_2                      0x7801
+#define WM831X_UNIQUE_ID_3                      0x7802
+#define WM831X_UNIQUE_ID_4                      0x7803
+#define WM831X_UNIQUE_ID_5                      0x7804
+#define WM831X_UNIQUE_ID_6                      0x7805
+#define WM831X_UNIQUE_ID_7                      0x7806
+#define WM831X_UNIQUE_ID_8                      0x7807
+#define WM831X_FACTORY_OTP_ID                   0x7808
+#define WM831X_FACTORY_OTP_1                    0x7809
+#define WM831X_FACTORY_OTP_2                    0x780A
+#define WM831X_FACTORY_OTP_3                    0x780B
+#define WM831X_FACTORY_OTP_4                    0x780C
+#define WM831X_FACTORY_OTP_5                    0x780D
+#define WM831X_CUSTOMER_OTP_ID                  0x7810
+#define WM831X_DC1_OTP_CONTROL                  0x7811
+#define WM831X_DC2_OTP_CONTROL                  0x7812
+#define WM831X_DC3_OTP_CONTROL                  0x7813
+#define WM831X_LDO1_2_OTP_CONTROL               0x7814
+#define WM831X_LDO3_4_OTP_CONTROL               0x7815
+#define WM831X_LDO5_6_OTP_CONTROL               0x7816
+#define WM831X_LDO7_8_OTP_CONTROL               0x7817
+#define WM831X_LDO9_10_OTP_CONTROL              0x7818
+#define WM831X_LDO11_EPE_CONTROL                0x7819
+#define WM831X_GPIO1_OTP_CONTROL                0x781A
+#define WM831X_GPIO2_OTP_CONTROL                0x781B
+#define WM831X_GPIO3_OTP_CONTROL                0x781C
+#define WM831X_GPIO4_OTP_CONTROL                0x781D
+#define WM831X_GPIO5_OTP_CONTROL                0x781E
+#define WM831X_GPIO6_OTP_CONTROL                0x781F
+#define WM831X_DBE_CHECK_DATA                   0x7827
+
+/*
+ * R0 (0x00) - Reset ID
+ */
+#define WM831X_CHIP_ID_MASK                     0xFFFF  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_SHIFT                         0  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_WIDTH                        16  /* CHIP_ID - [15:0] */
+
+/*
+ * R1 (0x01) - Revision
+ */
+#define WM831X_PARENT_REV_MASK                  0xFF00  /* PARENT_REV - [15:8] */
+#define WM831X_PARENT_REV_SHIFT                      8  /* PARENT_REV - [15:8] */
+#define WM831X_PARENT_REV_WIDTH                      8  /* PARENT_REV - [15:8] */
+#define WM831X_CHILD_REV_MASK                   0x00FF  /* CHILD_REV - [7:0] */
+#define WM831X_CHILD_REV_SHIFT                       0  /* CHILD_REV - [7:0] */
+#define WM831X_CHILD_REV_WIDTH                       8  /* CHILD_REV - [7:0] */
+
+/*
+ * R16384 (0x4000) - Parent ID
+ */
+#define WM831X_PARENT_ID_MASK                   0xFFFF  /* PARENT_ID - [15:0] */
+#define WM831X_PARENT_ID_SHIFT                       0  /* PARENT_ID - [15:0] */
+#define WM831X_PARENT_ID_WIDTH                      16  /* PARENT_ID - [15:0] */
+
+struct wm831x {
+	struct mutex io_lock;
+
+	struct device *dev;
+	int (*read_dev)(struct wm831x *wm831x, unsigned short reg,
+			int bytes, void *dest);
+	int (*write_dev)(struct wm831x *wm831x, unsigned short reg,
+			 int bytes, void *src);
+
+	void *control_data;
+
+	/* The WM831x has a security key blocking access to certain
+	 * registers.  The mutex is taken by the accessors for locking
+	 * and unlocking the security key, locked is used to fail
+	 * writes if the lock is held.
+	 */
+	struct mutex key_lock;
+	unsigned int locked:1;
+};
+
+/* Device I/O API */
+int wm831x_reg_read(struct wm831x *wm831x, unsigned short reg);
+int wm831x_reg_write(struct wm831x *wm831x, unsigned short reg,
+		 unsigned short val);
+void wm831x_reg_lock(struct wm831x *wm831x);
+int wm831x_reg_unlock(struct wm831x *wm831x);
+int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
+		    unsigned short mask, unsigned short val);
+int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
+		     int count, u16 *buf);
+
+#endif
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
new file mode 100644
index 000000000000..571e60136264
--- /dev/null
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -0,0 +1,107 @@
+/*
+ * include/linux/mfd/wm831x/pdata.h -- Platform data for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_PDATA_H__
+#define __MFD_WM831X_PDATA_H__
+
+struct wm831x;
+struct regulator_init_data;
+
+struct wm831x_backup_pdata {
+	int charger_enable;
+	int no_constant_voltage;  /** Disable constant voltage charging */
+	int vlim;   /** Voltage limit in milivolts */
+	int ilim;   /** Current limit in microamps */
+};
+
+struct wm831x_battery_pdata {
+	int enable;         /** Enable charging */
+	int fast_enable;    /** Enable fast charging */
+	int off_mask;       /** Mask OFF while charging */
+	int trickle_ilim;   /** Trickle charge current limit, in mA */
+	int vsel;           /** Target voltage, in mV */
+	int eoc_iterm;      /** End of trickle charge current, in mA */
+	int fast_ilim;      /** Fast charge current limit, in mA */
+	int timeout;        /** Charge cycle timeout, in minutes */
+};
+
+/* Sources for status LED configuration.  Values are register values
+ * plus 1 to allow for a zero default for preserve.
+ */
+enum wm831x_status_src {
+	WM831X_STATUS_PRESERVE = 0,  /* Keep the current hardware setting */
+	WM831X_STATUS_OTP = 1,
+	WM831X_STATUS_POWER = 2,
+	WM831X_STATUS_CHARGER = 3,
+	WM831X_STATUS_MANUAL = 4,
+};
+
+struct wm831x_status_pdata {
+	enum wm831x_status_src default_src;
+	const char *name;
+	const char *default_trigger;
+};
+
+struct wm831x_touch_pdata {
+	int fivewire;          /** 1 for five wire mode, 0 for 4 wire */
+	int isel;              /** Current for pen down (uA) */
+	int rpu;               /** Pen down sensitivity resistor divider */
+	int pressure;          /** Report pressure (boolean) */
+	int data_irq;          /** Touch data ready IRQ */
+};
+
+enum wm831x_watchdog_action {
+	WM831X_WDOG_NONE = 0,
+	WM831X_WDOG_INTERRUPT = 1,
+	WM831X_WDOG_RESET = 2,
+	WM831X_WDOG_WAKE = 3,
+};
+
+struct wm831x_watchdog_pdata {
+	enum wm831x_watchdog_action primary, secondary;
+	int update_gpio;
+	unsigned int software:1;
+};
+
+#define WM831X_MAX_STATUS 2
+#define WM831X_MAX_DCDC   4
+#define WM831X_MAX_EPE    2
+#define WM831X_MAX_LDO    11
+#define WM831X_MAX_ISINK  2
+
+struct wm831x_pdata {
+	/** Called before subdevices are set up */
+	int (*pre_init)(struct wm831x *wm831x);
+	/** Called after subdevices are set up */
+	int (*post_init)(struct wm831x *wm831x);
+
+	int gpio_base;
+	struct wm831x_backup_pdata *backup;
+	struct wm831x_battery_pdata *battery;
+	struct wm831x_touch_pdata *touch;
+	struct wm831x_watchdog_pdata *watchdog;
+
+	/** LED1 = 0 and so on */
+	struct wm831x_status_pdata *status[WM831X_MAX_STATUS];
+	/** DCDC1 = 0 and so on */
+	struct regulator_init_data *dcdc[WM831X_MAX_DCDC];
+	/** EPE1 = 0 and so on */
+	struct regulator_init_data *epe[WM831X_MAX_EPE];
+	/** LDO1 = 0 and so on */
+	struct regulator_init_data *ldo[WM831X_MAX_LDO];
+	/** ISINK1 = 0 and so on*/
+	struct regulator_init_data *isink[WM831X_MAX_ISINK];
+};
+
+#endif
-- 
cgit v1.2.3


From 7d4d0a3e7343e3190afaa17253073db58e3d9bff Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:53 +0100
Subject: mfd: Add WM831x interrupt support

The WM831x includes an interrupt controller managing interrupts for
the various functions on the chip. This patch adds support for the
core interrupt block on the device.

Ideally this would be supported by genirq, particularly for the
GPIOs, but currently genirq is unable to cope with controllers on
interrupt driven buses so we cut'n'paste the generic interface.
Once genirq is able to cope chips like this it should be a case
of filing the prefixes off the code and redoing wm831x-irq.c to
move over.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c       |  12 +-
 drivers/mfd/wm831x-irq.c        | 559 +++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |  21 ++
 include/linux/mfd/wm831x/irq.h  | 764 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1354 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mfd/wm831x-irq.c
 create mode 100644 include/linux/mfd/wm831x/irq.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index cc1040c9d46c..eb63d22160d1 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -19,6 +19,7 @@
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/irq.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -1189,6 +1190,10 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		}
 	}
 
+	ret = wm831x_irq_init(wm831x, irq);
+	if (ret != 0)
+		goto err;
+
 	/* The core device is up, instantiate the subdevices. */
 	switch (parent) {
 	case WM8310:
@@ -1216,19 +1221,21 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 
 	if (ret != 0) {
 		dev_err(wm831x->dev, "Failed to add children\n");
-		goto err;
+		goto err_irq;
 	}
 
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
 			dev_err(wm831x->dev, "post_init() failed: %d\n", ret);
-			goto err;
+			goto err_irq;
 		}
 	}
 
 	return 0;
 
+err_irq:
+	wm831x_irq_exit(wm831x);
 err:
 	mfd_remove_devices(wm831x->dev);
 	kfree(wm831x);
@@ -1238,6 +1245,7 @@ err:
 static void wm831x_device_exit(struct wm831x *wm831x)
 {
 	mfd_remove_devices(wm831x->dev);
+	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
 }
 
diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c
new file mode 100644
index 000000000000..d3015dfb9134
--- /dev/null
+++ b/drivers/mfd/wm831x-irq.c
@@ -0,0 +1,559 @@
+/*
+ * wm831x-irq.c  --  Interrupt controller support for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/interrupt.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/irq.h>
+
+#include <linux/delay.h>
+
+/*
+ * Since generic IRQs don't currently support interrupt controllers on
+ * interrupt driven buses we don't use genirq but instead provide an
+ * interface that looks very much like the standard ones.  This leads
+ * to some bodges, including storing interrupt handler information in
+ * the static irq_data table we use to look up the data for individual
+ * interrupts, but hopefully won't last too long.
+ */
+
+struct wm831x_irq_data {
+	int primary;
+	int reg;
+	int mask;
+	irq_handler_t handler;
+	void *handler_data;
+};
+
+static struct wm831x_irq_data wm831x_irqs[] = {
+	[WM831X_IRQ_TEMP_THW] = {
+		.primary = WM831X_TEMP_INT,
+		.reg = 1,
+		.mask = WM831X_TEMP_THW_EINT,
+	},
+	[WM831X_IRQ_GPIO_1] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP1_EINT,
+	},
+	[WM831X_IRQ_GPIO_2] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP2_EINT,
+	},
+	[WM831X_IRQ_GPIO_3] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP3_EINT,
+	},
+	[WM831X_IRQ_GPIO_4] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP4_EINT,
+	},
+	[WM831X_IRQ_GPIO_5] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP5_EINT,
+	},
+	[WM831X_IRQ_GPIO_6] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP6_EINT,
+	},
+	[WM831X_IRQ_GPIO_7] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP7_EINT,
+	},
+	[WM831X_IRQ_GPIO_8] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP8_EINT,
+	},
+	[WM831X_IRQ_GPIO_9] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP9_EINT,
+	},
+	[WM831X_IRQ_GPIO_10] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP10_EINT,
+	},
+	[WM831X_IRQ_GPIO_11] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP11_EINT,
+	},
+	[WM831X_IRQ_GPIO_12] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP12_EINT,
+	},
+	[WM831X_IRQ_GPIO_13] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP13_EINT,
+	},
+	[WM831X_IRQ_GPIO_14] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP14_EINT,
+	},
+	[WM831X_IRQ_GPIO_15] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP15_EINT,
+	},
+	[WM831X_IRQ_GPIO_16] = {
+		.primary = WM831X_GP_INT,
+		.reg = 5,
+		.mask = WM831X_GP16_EINT,
+	},
+	[WM831X_IRQ_ON] = {
+		.primary = WM831X_ON_PIN_INT,
+		.reg = 1,
+		.mask = WM831X_ON_PIN_EINT,
+	},
+	[WM831X_IRQ_PPM_SYSLO] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_SYSLO_EINT,
+	},
+	[WM831X_IRQ_PPM_PWR_SRC] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_PWR_SRC_EINT,
+	},
+	[WM831X_IRQ_PPM_USB_CURR] = {
+		.primary = WM831X_PPM_INT,
+		.reg = 1,
+		.mask = WM831X_PPM_USB_CURR_EINT,
+	},
+	[WM831X_IRQ_WDOG_TO] = {
+		.primary = WM831X_WDOG_INT,
+		.reg = 1,
+		.mask = WM831X_WDOG_TO_EINT,
+	},
+	[WM831X_IRQ_RTC_PER] = {
+		.primary = WM831X_RTC_INT,
+		.reg = 1,
+		.mask = WM831X_RTC_PER_EINT,
+	},
+	[WM831X_IRQ_RTC_ALM] = {
+		.primary = WM831X_RTC_INT,
+		.reg = 1,
+		.mask = WM831X_RTC_ALM_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_HOT] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_HOT_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_COLD] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_COLD_EINT,
+	},
+	[WM831X_IRQ_CHG_BATT_FAIL] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_BATT_FAIL_EINT,
+	},
+	[WM831X_IRQ_CHG_OV] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_OV_EINT,
+	},
+	[WM831X_IRQ_CHG_END] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_END_EINT,
+	},
+	[WM831X_IRQ_CHG_TO] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_TO_EINT,
+	},
+	[WM831X_IRQ_CHG_MODE] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_MODE_EINT,
+	},
+	[WM831X_IRQ_CHG_START] = {
+		.primary = WM831X_CHG_INT,
+		.reg = 2,
+		.mask = WM831X_CHG_START_EINT,
+	},
+	[WM831X_IRQ_TCHDATA] = {
+		.primary = WM831X_TCHDATA_INT,
+		.reg = 1,
+		.mask = WM831X_TCHDATA_EINT,
+	},
+	[WM831X_IRQ_TCHPD] = {
+		.primary = WM831X_TCHPD_INT,
+		.reg = 1,
+		.mask = WM831X_TCHPD_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DATA] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DATA_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP1] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP1_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP2] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP2_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP3] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP3_EINT,
+	},
+	[WM831X_IRQ_AUXADC_DCOMP4] = {
+		.primary = WM831X_AUXADC_INT,
+		.reg = 1,
+		.mask = WM831X_AUXADC_DCOMP4_EINT,
+	},
+	[WM831X_IRQ_CS1] = {
+		.primary = WM831X_CS_INT,
+		.reg = 2,
+		.mask = WM831X_CS1_EINT,
+	},
+	[WM831X_IRQ_CS2] = {
+		.primary = WM831X_CS_INT,
+		.reg = 2,
+		.mask = WM831X_CS2_EINT,
+	},
+	[WM831X_IRQ_HC_DC1] = {
+		.primary = WM831X_HC_INT,
+		.reg = 4,
+		.mask = WM831X_HC_DC1_EINT,
+	},
+	[WM831X_IRQ_HC_DC2] = {
+		.primary = WM831X_HC_INT,
+		.reg = 4,
+		.mask = WM831X_HC_DC2_EINT,
+	},
+	[WM831X_IRQ_UV_LDO1] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO1_EINT,
+	},
+	[WM831X_IRQ_UV_LDO2] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO2_EINT,
+	},
+	[WM831X_IRQ_UV_LDO3] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO3_EINT,
+	},
+	[WM831X_IRQ_UV_LDO4] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO4_EINT,
+	},
+	[WM831X_IRQ_UV_LDO5] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO5_EINT,
+	},
+	[WM831X_IRQ_UV_LDO6] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO6_EINT,
+	},
+	[WM831X_IRQ_UV_LDO7] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO7_EINT,
+	},
+	[WM831X_IRQ_UV_LDO8] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO8_EINT,
+	},
+	[WM831X_IRQ_UV_LDO9] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO9_EINT,
+	},
+	[WM831X_IRQ_UV_LDO10] = {
+		.primary = WM831X_UV_INT,
+		.reg = 3,
+		.mask = WM831X_UV_LDO10_EINT,
+	},
+	[WM831X_IRQ_UV_DC1] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC1_EINT,
+	},
+	[WM831X_IRQ_UV_DC2] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC2_EINT,
+	},
+	[WM831X_IRQ_UV_DC3] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC3_EINT,
+	},
+	[WM831X_IRQ_UV_DC4] = {
+		.primary = WM831X_UV_INT,
+		.reg = 4,
+		.mask = WM831X_UV_DC4_EINT,
+	},
+};
+
+static inline int irq_data_to_status_reg(struct wm831x_irq_data *irq_data)
+{
+	return WM831X_INTERRUPT_STATUS_1 - 1 + irq_data->reg;
+}
+
+static inline int irq_data_to_mask_reg(struct wm831x_irq_data *irq_data)
+{
+	return WM831X_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg;
+}
+
+static void __wm831x_enable_irq(struct wm831x *wm831x, int irq)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	wm831x->irq_masks[irq_data->reg - 1] &= ~irq_data->mask;
+	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
+			 wm831x->irq_masks[irq_data->reg - 1]);
+}
+
+void wm831x_enable_irq(struct wm831x *wm831x, int irq)
+{
+	mutex_lock(&wm831x->irq_lock);
+	__wm831x_enable_irq(wm831x, irq);
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_enable_irq);
+
+static void __wm831x_disable_irq(struct wm831x *wm831x, int irq)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	wm831x->irq_masks[irq_data->reg - 1] |= irq_data->mask;
+	wm831x_reg_write(wm831x, irq_data_to_mask_reg(irq_data),
+			 wm831x->irq_masks[irq_data->reg - 1]);
+}
+
+void wm831x_disable_irq(struct wm831x *wm831x, int irq)
+{
+	mutex_lock(&wm831x->irq_lock);
+	__wm831x_disable_irq(wm831x, irq);
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_disable_irq);
+
+int wm831x_request_irq(struct wm831x *wm831x,
+		       unsigned int irq, irq_handler_t handler,
+		       unsigned long flags, const char *name,
+		       void *dev)
+{
+	int ret = 0;
+
+	if (irq < 0 || irq >= WM831X_NUM_IRQS)
+		return -EINVAL;
+
+	mutex_lock(&wm831x->irq_lock);
+
+	if (wm831x_irqs[irq].handler) {
+		dev_err(wm831x->dev, "Already have handler for IRQ %d\n", irq);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	wm831x_irqs[irq].handler = handler;
+	wm831x_irqs[irq].handler_data = dev;
+
+	__wm831x_enable_irq(wm831x, irq);
+
+out:
+	mutex_unlock(&wm831x->irq_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_request_irq);
+
+void wm831x_free_irq(struct wm831x *wm831x, unsigned int irq, void *data)
+{
+	if (irq < 0 || irq >= WM831X_NUM_IRQS)
+		return;
+
+	mutex_lock(&wm831x->irq_lock);
+
+	wm831x_irqs[irq].handler = NULL;
+	wm831x_irqs[irq].handler_data = NULL;
+
+	__wm831x_disable_irq(wm831x, irq);
+
+	mutex_unlock(&wm831x->irq_lock);
+}
+EXPORT_SYMBOL_GPL(wm831x_free_irq);
+
+
+static void wm831x_handle_irq(struct wm831x *wm831x, int irq, int status)
+{
+	struct wm831x_irq_data *irq_data = &wm831x_irqs[irq];
+
+	if (irq_data->handler) {
+		irq_data->handler(irq, irq_data->handler_data);
+		wm831x_reg_write(wm831x, irq_data_to_status_reg(irq_data),
+				 irq_data->mask);
+	} else {
+		dev_err(wm831x->dev, "Unhandled IRQ %d, masking\n", irq);
+		__wm831x_disable_irq(wm831x, irq);
+	}
+}
+
+/* Main interrupt handling occurs in a workqueue since we need
+ * interrupts enabled to interact with the chip. */
+static void wm831x_irq_worker(struct work_struct *work)
+{
+	struct wm831x *wm831x = container_of(work, struct wm831x, irq_work);
+	unsigned int i;
+	int primary;
+	int status_regs[5];
+	int read[5] = { 0 };
+	int *status;
+
+	primary = wm831x_reg_read(wm831x, WM831X_SYSTEM_INTERRUPTS);
+	if (primary < 0) {
+		dev_err(wm831x->dev, "Failed to read system interrupt: %d\n",
+			primary);
+		goto out;
+	}
+
+	mutex_lock(&wm831x->irq_lock);
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_irqs); i++) {
+		int offset = wm831x_irqs[i].reg - 1;
+
+		if (!(primary & wm831x_irqs[i].primary))
+			continue;
+
+		status = &status_regs[offset];
+
+		/* Hopefully there should only be one register to read
+		 * each time otherwise we ought to do a block read. */
+		if (!read[offset]) {
+			*status = wm831x_reg_read(wm831x,
+				     irq_data_to_status_reg(&wm831x_irqs[i]));
+			if (*status < 0) {
+				dev_err(wm831x->dev,
+					"Failed to read IRQ status: %d\n",
+					*status);
+				goto out_lock;
+			}
+
+			/* Mask out the disabled IRQs */
+			*status &= ~wm831x->irq_masks[offset];
+			read[offset] = 1;
+		}
+
+		if (*status & wm831x_irqs[i].mask)
+			wm831x_handle_irq(wm831x, i, *status);
+	}
+
+out_lock:
+	mutex_unlock(&wm831x->irq_lock);
+out:
+	enable_irq(wm831x->irq);
+}
+
+
+static irqreturn_t wm831x_cpu_irq(int irq, void *data)
+{
+	struct wm831x *wm831x = data;
+
+	/* Shut the interrupt to the CPU up and schedule the actual
+	 * handler; we can't check that the IRQ is asserted. */
+	disable_irq_nosync(irq);
+
+	queue_work(wm831x->irq_wq, &wm831x->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+int wm831x_irq_init(struct wm831x *wm831x, int irq)
+{
+	int i, ret;
+
+	if (!irq) {
+		dev_warn(wm831x->dev,
+			 "No interrupt specified - functionality limited\n");
+		return 0;
+	}
+
+
+	wm831x->irq_wq = create_singlethread_workqueue("wm831x-irq");
+	if (!wm831x->irq_wq) {
+		dev_err(wm831x->dev, "Failed to allocate IRQ worker\n");
+		return -ESRCH;
+	}
+
+	wm831x->irq = irq;
+	mutex_init(&wm831x->irq_lock);
+	INIT_WORK(&wm831x->irq_work, wm831x_irq_worker);
+
+	/* Mask the individual interrupt sources */
+	for (i = 0; i < ARRAY_SIZE(wm831x->irq_masks); i++) {
+		wm831x->irq_masks[i] = 0xffff;
+		wm831x_reg_write(wm831x, WM831X_INTERRUPT_STATUS_1_MASK + i,
+				 0xffff);
+	}
+
+	/* Enable top level interrupts, we mask at secondary level */
+	wm831x_reg_write(wm831x, WM831X_SYSTEM_INTERRUPTS_MASK, 0);
+
+	/* We're good to go.  We set IRQF_SHARED since there's a
+	 * chance the driver will interoperate with another driver but
+	 * the need to disable the IRQ while handing via I2C/SPI means
+	 * that this may break and performance will be impacted.  If
+	 * this does happen it's a hardware design issue and the only
+	 * other alternative would be polling.
+	 */
+	ret = request_irq(irq, wm831x_cpu_irq, IRQF_TRIGGER_LOW | IRQF_SHARED,
+			  "wm831x", wm831x);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to request IRQ %d: %d\n",
+			irq, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+void wm831x_irq_exit(struct wm831x *wm831x)
+{
+	if (wm831x->irq)
+		free_irq(wm831x->irq, wm831x);
+}
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index d90e693053ba..b96c9355b16e 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -15,6 +15,9 @@
 #ifndef __MFD_WM831X_CORE_H__
 #define __MFD_WM831X_CORE_H__
 
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+
 /*
  * Register values.
  */
@@ -224,6 +227,13 @@ struct wm831x {
 
 	void *control_data;
 
+	int irq;  /* Our chip IRQ */
+	struct mutex irq_lock;
+	struct workqueue_struct *irq_wq;
+	struct work_struct irq_work;
+	unsigned int irq_base;
+	int irq_masks[5];
+
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
 	 * and unlocking the security key, locked is used to fail
@@ -244,4 +254,15 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg,
 int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg,
 		     int count, u16 *buf);
 
+int wm831x_irq_init(struct wm831x *wm831x, int irq);
+void wm831x_irq_exit(struct wm831x *wm831x);
+
+int __must_check wm831x_request_irq(struct wm831x *wm831x,
+				    unsigned int irq, irq_handler_t handler,
+				    unsigned long flags, const char *name,
+				    void *dev);
+void wm831x_free_irq(struct wm831x *wm831x, unsigned int, void *);
+void wm831x_disable_irq(struct wm831x *wm831x, int irq);
+void wm831x_enable_irq(struct wm831x *wm831x, int irq);
+
 #endif
diff --git a/include/linux/mfd/wm831x/irq.h b/include/linux/mfd/wm831x/irq.h
new file mode 100644
index 000000000000..3a8c97656fda
--- /dev/null
+++ b/include/linux/mfd/wm831x/irq.h
@@ -0,0 +1,764 @@
+/*
+ * include/linux/mfd/wm831x/irq.h -- Interrupt controller for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_IRQ_H__
+#define __MFD_WM831X_IRQ_H__
+
+/* Interrupt number assignments within Linux */
+#define WM831X_IRQ_TEMP_THW 0
+#define WM831X_IRQ_GPIO_1   1
+#define WM831X_IRQ_GPIO_2   2
+#define WM831X_IRQ_GPIO_3   3
+#define WM831X_IRQ_GPIO_4   4
+#define WM831X_IRQ_GPIO_5   5
+#define WM831X_IRQ_GPIO_6   6
+#define WM831X_IRQ_GPIO_7   7
+#define WM831X_IRQ_GPIO_8   8
+#define WM831X_IRQ_GPIO_9   9
+#define WM831X_IRQ_GPIO_10  10
+#define WM831X_IRQ_GPIO_11  11
+#define WM831X_IRQ_GPIO_12  12
+#define WM831X_IRQ_GPIO_13  13
+#define WM831X_IRQ_GPIO_14  14
+#define WM831X_IRQ_GPIO_15  15
+#define WM831X_IRQ_GPIO_16  16
+#define WM831X_IRQ_ON           17
+#define WM831X_IRQ_PPM_SYSLO    18
+#define WM831X_IRQ_PPM_PWR_SRC  19
+#define WM831X_IRQ_PPM_USB_CURR 20
+#define WM831X_IRQ_WDOG_TO      21
+#define WM831X_IRQ_RTC_PER      22
+#define WM831X_IRQ_RTC_ALM      23
+#define WM831X_IRQ_CHG_BATT_HOT  24
+#define WM831X_IRQ_CHG_BATT_COLD 25
+#define WM831X_IRQ_CHG_BATT_FAIL 26
+#define WM831X_IRQ_CHG_OV        27
+#define WM831X_IRQ_CHG_END       29
+#define WM831X_IRQ_CHG_TO        30
+#define WM831X_IRQ_CHG_MODE      31
+#define WM831X_IRQ_CHG_START     32
+#define WM831X_IRQ_TCHDATA       33
+#define WM831X_IRQ_TCHPD         34
+#define WM831X_IRQ_AUXADC_DATA   35
+#define WM831X_IRQ_AUXADC_DCOMP1 36
+#define WM831X_IRQ_AUXADC_DCOMP2 37
+#define WM831X_IRQ_AUXADC_DCOMP3 38
+#define WM831X_IRQ_AUXADC_DCOMP4 39
+#define WM831X_IRQ_CS1           40
+#define WM831X_IRQ_CS2           41
+#define WM831X_IRQ_HC_DC1        42
+#define WM831X_IRQ_HC_DC2        43
+#define WM831X_IRQ_UV_LDO1       44
+#define WM831X_IRQ_UV_LDO2       45
+#define WM831X_IRQ_UV_LDO3       46
+#define WM831X_IRQ_UV_LDO4       47
+#define WM831X_IRQ_UV_LDO5       48
+#define WM831X_IRQ_UV_LDO6       49
+#define WM831X_IRQ_UV_LDO7       50
+#define WM831X_IRQ_UV_LDO8       51
+#define WM831X_IRQ_UV_LDO9       52
+#define WM831X_IRQ_UV_LDO10      53
+#define WM831X_IRQ_UV_DC1        54
+#define WM831X_IRQ_UV_DC2        55
+#define WM831X_IRQ_UV_DC3        56
+#define WM831X_IRQ_UV_DC4        57
+
+#define WM831X_NUM_IRQS     58
+
+/*
+ * R16400 (0x4010) - System Interrupts
+ */
+#define WM831X_PS_INT                           0x8000  /* PS_INT */
+#define WM831X_PS_INT_MASK                      0x8000  /* PS_INT */
+#define WM831X_PS_INT_SHIFT                         15  /* PS_INT */
+#define WM831X_PS_INT_WIDTH                          1  /* PS_INT */
+#define WM831X_TEMP_INT                         0x4000  /* TEMP_INT */
+#define WM831X_TEMP_INT_MASK                    0x4000  /* TEMP_INT */
+#define WM831X_TEMP_INT_SHIFT                       14  /* TEMP_INT */
+#define WM831X_TEMP_INT_WIDTH                        1  /* TEMP_INT */
+#define WM831X_GP_INT                           0x2000  /* GP_INT */
+#define WM831X_GP_INT_MASK                      0x2000  /* GP_INT */
+#define WM831X_GP_INT_SHIFT                         13  /* GP_INT */
+#define WM831X_GP_INT_WIDTH                          1  /* GP_INT */
+#define WM831X_ON_PIN_INT                       0x1000  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_MASK                  0x1000  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_SHIFT                     12  /* ON_PIN_INT */
+#define WM831X_ON_PIN_INT_WIDTH                      1  /* ON_PIN_INT */
+#define WM831X_WDOG_INT                         0x0800  /* WDOG_INT */
+#define WM831X_WDOG_INT_MASK                    0x0800  /* WDOG_INT */
+#define WM831X_WDOG_INT_SHIFT                       11  /* WDOG_INT */
+#define WM831X_WDOG_INT_WIDTH                        1  /* WDOG_INT */
+#define WM831X_TCHDATA_INT                      0x0400  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_MASK                 0x0400  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_SHIFT                    10  /* TCHDATA_INT */
+#define WM831X_TCHDATA_INT_WIDTH                     1  /* TCHDATA_INT */
+#define WM831X_TCHPD_INT                        0x0200  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_MASK                   0x0200  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_SHIFT                       9  /* TCHPD_INT */
+#define WM831X_TCHPD_INT_WIDTH                       1  /* TCHPD_INT */
+#define WM831X_AUXADC_INT                       0x0100  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_MASK                  0x0100  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_SHIFT                      8  /* AUXADC_INT */
+#define WM831X_AUXADC_INT_WIDTH                      1  /* AUXADC_INT */
+#define WM831X_PPM_INT                          0x0080  /* PPM_INT */
+#define WM831X_PPM_INT_MASK                     0x0080  /* PPM_INT */
+#define WM831X_PPM_INT_SHIFT                         7  /* PPM_INT */
+#define WM831X_PPM_INT_WIDTH                         1  /* PPM_INT */
+#define WM831X_CS_INT                           0x0040  /* CS_INT */
+#define WM831X_CS_INT_MASK                      0x0040  /* CS_INT */
+#define WM831X_CS_INT_SHIFT                          6  /* CS_INT */
+#define WM831X_CS_INT_WIDTH                          1  /* CS_INT */
+#define WM831X_RTC_INT                          0x0020  /* RTC_INT */
+#define WM831X_RTC_INT_MASK                     0x0020  /* RTC_INT */
+#define WM831X_RTC_INT_SHIFT                         5  /* RTC_INT */
+#define WM831X_RTC_INT_WIDTH                         1  /* RTC_INT */
+#define WM831X_OTP_INT                          0x0010  /* OTP_INT */
+#define WM831X_OTP_INT_MASK                     0x0010  /* OTP_INT */
+#define WM831X_OTP_INT_SHIFT                         4  /* OTP_INT */
+#define WM831X_OTP_INT_WIDTH                         1  /* OTP_INT */
+#define WM831X_CHILD_INT                        0x0008  /* CHILD_INT */
+#define WM831X_CHILD_INT_MASK                   0x0008  /* CHILD_INT */
+#define WM831X_CHILD_INT_SHIFT                       3  /* CHILD_INT */
+#define WM831X_CHILD_INT_WIDTH                       1  /* CHILD_INT */
+#define WM831X_CHG_INT                          0x0004  /* CHG_INT */
+#define WM831X_CHG_INT_MASK                     0x0004  /* CHG_INT */
+#define WM831X_CHG_INT_SHIFT                         2  /* CHG_INT */
+#define WM831X_CHG_INT_WIDTH                         1  /* CHG_INT */
+#define WM831X_HC_INT                           0x0002  /* HC_INT */
+#define WM831X_HC_INT_MASK                      0x0002  /* HC_INT */
+#define WM831X_HC_INT_SHIFT                          1  /* HC_INT */
+#define WM831X_HC_INT_WIDTH                          1  /* HC_INT */
+#define WM831X_UV_INT                           0x0001  /* UV_INT */
+#define WM831X_UV_INT_MASK                      0x0001  /* UV_INT */
+#define WM831X_UV_INT_SHIFT                          0  /* UV_INT */
+#define WM831X_UV_INT_WIDTH                          1  /* UV_INT */
+
+/*
+ * R16401 (0x4011) - Interrupt Status 1
+ */
+#define WM831X_PPM_SYSLO_EINT                   0x8000  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_MASK              0x8000  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_SHIFT                 15  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_SYSLO_EINT_WIDTH                  1  /* PPM_SYSLO_EINT */
+#define WM831X_PPM_PWR_SRC_EINT                 0x4000  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_MASK            0x4000  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_SHIFT               14  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_PWR_SRC_EINT_WIDTH                1  /* PPM_PWR_SRC_EINT */
+#define WM831X_PPM_USB_CURR_EINT                0x2000  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_MASK           0x2000  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_SHIFT              13  /* PPM_USB_CURR_EINT */
+#define WM831X_PPM_USB_CURR_EINT_WIDTH               1  /* PPM_USB_CURR_EINT */
+#define WM831X_ON_PIN_EINT                      0x1000  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_MASK                 0x1000  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_SHIFT                    12  /* ON_PIN_EINT */
+#define WM831X_ON_PIN_EINT_WIDTH                     1  /* ON_PIN_EINT */
+#define WM831X_WDOG_TO_EINT                     0x0800  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_MASK                0x0800  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_SHIFT                   11  /* WDOG_TO_EINT */
+#define WM831X_WDOG_TO_EINT_WIDTH                    1  /* WDOG_TO_EINT */
+#define WM831X_TCHDATA_EINT                     0x0400  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_MASK                0x0400  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_SHIFT                   10  /* TCHDATA_EINT */
+#define WM831X_TCHDATA_EINT_WIDTH                    1  /* TCHDATA_EINT */
+#define WM831X_TCHPD_EINT                       0x0200  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_MASK                  0x0200  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_SHIFT                      9  /* TCHPD_EINT */
+#define WM831X_TCHPD_EINT_WIDTH                      1  /* TCHPD_EINT */
+#define WM831X_AUXADC_DATA_EINT                 0x0100  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_MASK            0x0100  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_SHIFT                8  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DATA_EINT_WIDTH                1  /* AUXADC_DATA_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT               0x0080  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_MASK          0x0080  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_SHIFT              7  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP4_EINT_WIDTH              1  /* AUXADC_DCOMP4_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT               0x0040  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_MASK          0x0040  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_SHIFT              6  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP3_EINT_WIDTH              1  /* AUXADC_DCOMP3_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT               0x0020  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_MASK          0x0020  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_SHIFT              5  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP2_EINT_WIDTH              1  /* AUXADC_DCOMP2_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT               0x0010  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_MASK          0x0010  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_SHIFT              4  /* AUXADC_DCOMP1_EINT */
+#define WM831X_AUXADC_DCOMP1_EINT_WIDTH              1  /* AUXADC_DCOMP1_EINT */
+#define WM831X_RTC_PER_EINT                     0x0008  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_MASK                0x0008  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_SHIFT                    3  /* RTC_PER_EINT */
+#define WM831X_RTC_PER_EINT_WIDTH                    1  /* RTC_PER_EINT */
+#define WM831X_RTC_ALM_EINT                     0x0004  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_MASK                0x0004  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_SHIFT                    2  /* RTC_ALM_EINT */
+#define WM831X_RTC_ALM_EINT_WIDTH                    1  /* RTC_ALM_EINT */
+#define WM831X_TEMP_THW_EINT                    0x0002  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_MASK               0x0002  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_SHIFT                   1  /* TEMP_THW_EINT */
+#define WM831X_TEMP_THW_EINT_WIDTH                   1  /* TEMP_THW_EINT */
+
+/*
+ * R16402 (0x4012) - Interrupt Status 2
+ */
+#define WM831X_CHG_BATT_HOT_EINT                0x8000  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_MASK           0x8000  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_SHIFT              15  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_HOT_EINT_WIDTH               1  /* CHG_BATT_HOT_EINT */
+#define WM831X_CHG_BATT_COLD_EINT               0x4000  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_MASK          0x4000  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_SHIFT             14  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_COLD_EINT_WIDTH              1  /* CHG_BATT_COLD_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT               0x2000  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_MASK          0x2000  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_SHIFT             13  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_BATT_FAIL_EINT_WIDTH              1  /* CHG_BATT_FAIL_EINT */
+#define WM831X_CHG_OV_EINT                      0x1000  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_MASK                 0x1000  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_SHIFT                    12  /* CHG_OV_EINT */
+#define WM831X_CHG_OV_EINT_WIDTH                     1  /* CHG_OV_EINT */
+#define WM831X_CHG_END_EINT                     0x0800  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_MASK                0x0800  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_SHIFT                   11  /* CHG_END_EINT */
+#define WM831X_CHG_END_EINT_WIDTH                    1  /* CHG_END_EINT */
+#define WM831X_CHG_TO_EINT                      0x0400  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_MASK                 0x0400  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_SHIFT                    10  /* CHG_TO_EINT */
+#define WM831X_CHG_TO_EINT_WIDTH                     1  /* CHG_TO_EINT */
+#define WM831X_CHG_MODE_EINT                    0x0200  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_MASK               0x0200  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_SHIFT                   9  /* CHG_MODE_EINT */
+#define WM831X_CHG_MODE_EINT_WIDTH                   1  /* CHG_MODE_EINT */
+#define WM831X_CHG_START_EINT                   0x0100  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_MASK              0x0100  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_SHIFT                  8  /* CHG_START_EINT */
+#define WM831X_CHG_START_EINT_WIDTH                  1  /* CHG_START_EINT */
+#define WM831X_CS2_EINT                         0x0080  /* CS2_EINT */
+#define WM831X_CS2_EINT_MASK                    0x0080  /* CS2_EINT */
+#define WM831X_CS2_EINT_SHIFT                        7  /* CS2_EINT */
+#define WM831X_CS2_EINT_WIDTH                        1  /* CS2_EINT */
+#define WM831X_CS1_EINT                         0x0040  /* CS1_EINT */
+#define WM831X_CS1_EINT_MASK                    0x0040  /* CS1_EINT */
+#define WM831X_CS1_EINT_SHIFT                        6  /* CS1_EINT */
+#define WM831X_CS1_EINT_WIDTH                        1  /* CS1_EINT */
+#define WM831X_OTP_CMD_END_EINT                 0x0020  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_MASK            0x0020  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_SHIFT                5  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_CMD_END_EINT_WIDTH                1  /* OTP_CMD_END_EINT */
+#define WM831X_OTP_ERR_EINT                     0x0010  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_MASK                0x0010  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_SHIFT                    4  /* OTP_ERR_EINT */
+#define WM831X_OTP_ERR_EINT_WIDTH                    1  /* OTP_ERR_EINT */
+#define WM831X_PS_POR_EINT                      0x0004  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_MASK                 0x0004  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_SHIFT                     2  /* PS_POR_EINT */
+#define WM831X_PS_POR_EINT_WIDTH                     1  /* PS_POR_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT                0x0002  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_MASK           0x0002  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_SHIFT               1  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_SLEEP_OFF_EINT_WIDTH               1  /* PS_SLEEP_OFF_EINT */
+#define WM831X_PS_ON_WAKE_EINT                  0x0001  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_MASK             0x0001  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_SHIFT                 0  /* PS_ON_WAKE_EINT */
+#define WM831X_PS_ON_WAKE_EINT_WIDTH                 1  /* PS_ON_WAKE_EINT */
+
+/*
+ * R16403 (0x4013) - Interrupt Status 3
+ */
+#define WM831X_UV_LDO10_EINT                    0x0200  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_MASK               0x0200  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_SHIFT                   9  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO10_EINT_WIDTH                   1  /* UV_LDO10_EINT */
+#define WM831X_UV_LDO9_EINT                     0x0100  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_MASK                0x0100  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_SHIFT                    8  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO9_EINT_WIDTH                    1  /* UV_LDO9_EINT */
+#define WM831X_UV_LDO8_EINT                     0x0080  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_MASK                0x0080  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_SHIFT                    7  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO8_EINT_WIDTH                    1  /* UV_LDO8_EINT */
+#define WM831X_UV_LDO7_EINT                     0x0040  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_MASK                0x0040  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_SHIFT                    6  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO7_EINT_WIDTH                    1  /* UV_LDO7_EINT */
+#define WM831X_UV_LDO6_EINT                     0x0020  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_MASK                0x0020  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_SHIFT                    5  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO6_EINT_WIDTH                    1  /* UV_LDO6_EINT */
+#define WM831X_UV_LDO5_EINT                     0x0010  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_MASK                0x0010  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_SHIFT                    4  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO5_EINT_WIDTH                    1  /* UV_LDO5_EINT */
+#define WM831X_UV_LDO4_EINT                     0x0008  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_MASK                0x0008  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_SHIFT                    3  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO4_EINT_WIDTH                    1  /* UV_LDO4_EINT */
+#define WM831X_UV_LDO3_EINT                     0x0004  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_MASK                0x0004  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_SHIFT                    2  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO3_EINT_WIDTH                    1  /* UV_LDO3_EINT */
+#define WM831X_UV_LDO2_EINT                     0x0002  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_MASK                0x0002  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_SHIFT                    1  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO2_EINT_WIDTH                    1  /* UV_LDO2_EINT */
+#define WM831X_UV_LDO1_EINT                     0x0001  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_MASK                0x0001  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_SHIFT                    0  /* UV_LDO1_EINT */
+#define WM831X_UV_LDO1_EINT_WIDTH                    1  /* UV_LDO1_EINT */
+
+/*
+ * R16404 (0x4014) - Interrupt Status 4
+ */
+#define WM831X_HC_DC2_EINT                      0x0200  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_MASK                 0x0200  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_SHIFT                     9  /* HC_DC2_EINT */
+#define WM831X_HC_DC2_EINT_WIDTH                     1  /* HC_DC2_EINT */
+#define WM831X_HC_DC1_EINT                      0x0100  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_MASK                 0x0100  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_SHIFT                     8  /* HC_DC1_EINT */
+#define WM831X_HC_DC1_EINT_WIDTH                     1  /* HC_DC1_EINT */
+#define WM831X_UV_DC4_EINT                      0x0008  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_MASK                 0x0008  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_SHIFT                     3  /* UV_DC4_EINT */
+#define WM831X_UV_DC4_EINT_WIDTH                     1  /* UV_DC4_EINT */
+#define WM831X_UV_DC3_EINT                      0x0004  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_MASK                 0x0004  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_SHIFT                     2  /* UV_DC3_EINT */
+#define WM831X_UV_DC3_EINT_WIDTH                     1  /* UV_DC3_EINT */
+#define WM831X_UV_DC2_EINT                      0x0002  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_MASK                 0x0002  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_SHIFT                     1  /* UV_DC2_EINT */
+#define WM831X_UV_DC2_EINT_WIDTH                     1  /* UV_DC2_EINT */
+#define WM831X_UV_DC1_EINT                      0x0001  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_MASK                 0x0001  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_SHIFT                     0  /* UV_DC1_EINT */
+#define WM831X_UV_DC1_EINT_WIDTH                     1  /* UV_DC1_EINT */
+
+/*
+ * R16405 (0x4015) - Interrupt Status 5
+ */
+#define WM831X_GP16_EINT                        0x8000  /* GP16_EINT */
+#define WM831X_GP16_EINT_MASK                   0x8000  /* GP16_EINT */
+#define WM831X_GP16_EINT_SHIFT                      15  /* GP16_EINT */
+#define WM831X_GP16_EINT_WIDTH                       1  /* GP16_EINT */
+#define WM831X_GP15_EINT                        0x4000  /* GP15_EINT */
+#define WM831X_GP15_EINT_MASK                   0x4000  /* GP15_EINT */
+#define WM831X_GP15_EINT_SHIFT                      14  /* GP15_EINT */
+#define WM831X_GP15_EINT_WIDTH                       1  /* GP15_EINT */
+#define WM831X_GP14_EINT                        0x2000  /* GP14_EINT */
+#define WM831X_GP14_EINT_MASK                   0x2000  /* GP14_EINT */
+#define WM831X_GP14_EINT_SHIFT                      13  /* GP14_EINT */
+#define WM831X_GP14_EINT_WIDTH                       1  /* GP14_EINT */
+#define WM831X_GP13_EINT                        0x1000  /* GP13_EINT */
+#define WM831X_GP13_EINT_MASK                   0x1000  /* GP13_EINT */
+#define WM831X_GP13_EINT_SHIFT                      12  /* GP13_EINT */
+#define WM831X_GP13_EINT_WIDTH                       1  /* GP13_EINT */
+#define WM831X_GP12_EINT                        0x0800  /* GP12_EINT */
+#define WM831X_GP12_EINT_MASK                   0x0800  /* GP12_EINT */
+#define WM831X_GP12_EINT_SHIFT                      11  /* GP12_EINT */
+#define WM831X_GP12_EINT_WIDTH                       1  /* GP12_EINT */
+#define WM831X_GP11_EINT                        0x0400  /* GP11_EINT */
+#define WM831X_GP11_EINT_MASK                   0x0400  /* GP11_EINT */
+#define WM831X_GP11_EINT_SHIFT                      10  /* GP11_EINT */
+#define WM831X_GP11_EINT_WIDTH                       1  /* GP11_EINT */
+#define WM831X_GP10_EINT                        0x0200  /* GP10_EINT */
+#define WM831X_GP10_EINT_MASK                   0x0200  /* GP10_EINT */
+#define WM831X_GP10_EINT_SHIFT                       9  /* GP10_EINT */
+#define WM831X_GP10_EINT_WIDTH                       1  /* GP10_EINT */
+#define WM831X_GP9_EINT                         0x0100  /* GP9_EINT */
+#define WM831X_GP9_EINT_MASK                    0x0100  /* GP9_EINT */
+#define WM831X_GP9_EINT_SHIFT                        8  /* GP9_EINT */
+#define WM831X_GP9_EINT_WIDTH                        1  /* GP9_EINT */
+#define WM831X_GP8_EINT                         0x0080  /* GP8_EINT */
+#define WM831X_GP8_EINT_MASK                    0x0080  /* GP8_EINT */
+#define WM831X_GP8_EINT_SHIFT                        7  /* GP8_EINT */
+#define WM831X_GP8_EINT_WIDTH                        1  /* GP8_EINT */
+#define WM831X_GP7_EINT                         0x0040  /* GP7_EINT */
+#define WM831X_GP7_EINT_MASK                    0x0040  /* GP7_EINT */
+#define WM831X_GP7_EINT_SHIFT                        6  /* GP7_EINT */
+#define WM831X_GP7_EINT_WIDTH                        1  /* GP7_EINT */
+#define WM831X_GP6_EINT                         0x0020  /* GP6_EINT */
+#define WM831X_GP6_EINT_MASK                    0x0020  /* GP6_EINT */
+#define WM831X_GP6_EINT_SHIFT                        5  /* GP6_EINT */
+#define WM831X_GP6_EINT_WIDTH                        1  /* GP6_EINT */
+#define WM831X_GP5_EINT                         0x0010  /* GP5_EINT */
+#define WM831X_GP5_EINT_MASK                    0x0010  /* GP5_EINT */
+#define WM831X_GP5_EINT_SHIFT                        4  /* GP5_EINT */
+#define WM831X_GP5_EINT_WIDTH                        1  /* GP5_EINT */
+#define WM831X_GP4_EINT                         0x0008  /* GP4_EINT */
+#define WM831X_GP4_EINT_MASK                    0x0008  /* GP4_EINT */
+#define WM831X_GP4_EINT_SHIFT                        3  /* GP4_EINT */
+#define WM831X_GP4_EINT_WIDTH                        1  /* GP4_EINT */
+#define WM831X_GP3_EINT                         0x0004  /* GP3_EINT */
+#define WM831X_GP3_EINT_MASK                    0x0004  /* GP3_EINT */
+#define WM831X_GP3_EINT_SHIFT                        2  /* GP3_EINT */
+#define WM831X_GP3_EINT_WIDTH                        1  /* GP3_EINT */
+#define WM831X_GP2_EINT                         0x0002  /* GP2_EINT */
+#define WM831X_GP2_EINT_MASK                    0x0002  /* GP2_EINT */
+#define WM831X_GP2_EINT_SHIFT                        1  /* GP2_EINT */
+#define WM831X_GP2_EINT_WIDTH                        1  /* GP2_EINT */
+#define WM831X_GP1_EINT                         0x0001  /* GP1_EINT */
+#define WM831X_GP1_EINT_MASK                    0x0001  /* GP1_EINT */
+#define WM831X_GP1_EINT_SHIFT                        0  /* GP1_EINT */
+#define WM831X_GP1_EINT_WIDTH                        1  /* GP1_EINT */
+
+/*
+ * R16407 (0x4017) - IRQ Config
+ */
+#define WM831X_IRQ_OD                           0x0002  /* IRQ_OD */
+#define WM831X_IRQ_OD_MASK                      0x0002  /* IRQ_OD */
+#define WM831X_IRQ_OD_SHIFT                          1  /* IRQ_OD */
+#define WM831X_IRQ_OD_WIDTH                          1  /* IRQ_OD */
+#define WM831X_IM_IRQ                           0x0001  /* IM_IRQ */
+#define WM831X_IM_IRQ_MASK                      0x0001  /* IM_IRQ */
+#define WM831X_IM_IRQ_SHIFT                          0  /* IM_IRQ */
+#define WM831X_IM_IRQ_WIDTH                          1  /* IM_IRQ */
+
+/*
+ * R16408 (0x4018) - System Interrupts Mask
+ */
+#define WM831X_IM_PS_INT                        0x8000  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_MASK                   0x8000  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_SHIFT                      15  /* IM_PS_INT */
+#define WM831X_IM_PS_INT_WIDTH                       1  /* IM_PS_INT */
+#define WM831X_IM_TEMP_INT                      0x4000  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_MASK                 0x4000  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_SHIFT                    14  /* IM_TEMP_INT */
+#define WM831X_IM_TEMP_INT_WIDTH                     1  /* IM_TEMP_INT */
+#define WM831X_IM_GP_INT                        0x2000  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_MASK                   0x2000  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_SHIFT                      13  /* IM_GP_INT */
+#define WM831X_IM_GP_INT_WIDTH                       1  /* IM_GP_INT */
+#define WM831X_IM_ON_PIN_INT                    0x1000  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_MASK               0x1000  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_SHIFT                  12  /* IM_ON_PIN_INT */
+#define WM831X_IM_ON_PIN_INT_WIDTH                   1  /* IM_ON_PIN_INT */
+#define WM831X_IM_WDOG_INT                      0x0800  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_MASK                 0x0800  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_SHIFT                    11  /* IM_WDOG_INT */
+#define WM831X_IM_WDOG_INT_WIDTH                     1  /* IM_WDOG_INT */
+#define WM831X_IM_TCHDATA_INT                   0x0400  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_MASK              0x0400  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_SHIFT                 10  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHDATA_INT_WIDTH                  1  /* IM_TCHDATA_INT */
+#define WM831X_IM_TCHPD_INT                     0x0200  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_MASK                0x0200  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_SHIFT                    9  /* IM_TCHPD_INT */
+#define WM831X_IM_TCHPD_INT_WIDTH                    1  /* IM_TCHPD_INT */
+#define WM831X_IM_AUXADC_INT                    0x0100  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_MASK               0x0100  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_SHIFT                   8  /* IM_AUXADC_INT */
+#define WM831X_IM_AUXADC_INT_WIDTH                   1  /* IM_AUXADC_INT */
+#define WM831X_IM_PPM_INT                       0x0080  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_MASK                  0x0080  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_SHIFT                      7  /* IM_PPM_INT */
+#define WM831X_IM_PPM_INT_WIDTH                      1  /* IM_PPM_INT */
+#define WM831X_IM_CS_INT                        0x0040  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_MASK                   0x0040  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_SHIFT                       6  /* IM_CS_INT */
+#define WM831X_IM_CS_INT_WIDTH                       1  /* IM_CS_INT */
+#define WM831X_IM_RTC_INT                       0x0020  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_MASK                  0x0020  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_SHIFT                      5  /* IM_RTC_INT */
+#define WM831X_IM_RTC_INT_WIDTH                      1  /* IM_RTC_INT */
+#define WM831X_IM_OTP_INT                       0x0010  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_MASK                  0x0010  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_SHIFT                      4  /* IM_OTP_INT */
+#define WM831X_IM_OTP_INT_WIDTH                      1  /* IM_OTP_INT */
+#define WM831X_IM_CHILD_INT                     0x0008  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_MASK                0x0008  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_SHIFT                    3  /* IM_CHILD_INT */
+#define WM831X_IM_CHILD_INT_WIDTH                    1  /* IM_CHILD_INT */
+#define WM831X_IM_CHG_INT                       0x0004  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_MASK                  0x0004  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_SHIFT                      2  /* IM_CHG_INT */
+#define WM831X_IM_CHG_INT_WIDTH                      1  /* IM_CHG_INT */
+#define WM831X_IM_HC_INT                        0x0002  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_MASK                   0x0002  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_SHIFT                       1  /* IM_HC_INT */
+#define WM831X_IM_HC_INT_WIDTH                       1  /* IM_HC_INT */
+#define WM831X_IM_UV_INT                        0x0001  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_MASK                   0x0001  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_SHIFT                       0  /* IM_UV_INT */
+#define WM831X_IM_UV_INT_WIDTH                       1  /* IM_UV_INT */
+
+/*
+ * R16409 (0x4019) - Interrupt Status 1 Mask
+ */
+#define WM831X_IM_PPM_SYSLO_EINT                0x8000  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_MASK           0x8000  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_SHIFT              15  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_SYSLO_EINT_WIDTH               1  /* IM_PPM_SYSLO_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT              0x4000  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_MASK         0x4000  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_SHIFT            14  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_PWR_SRC_EINT_WIDTH             1  /* IM_PPM_PWR_SRC_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT             0x2000  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_MASK        0x2000  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_SHIFT           13  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_PPM_USB_CURR_EINT_WIDTH            1  /* IM_PPM_USB_CURR_EINT */
+#define WM831X_IM_ON_PIN_EINT                   0x1000  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_MASK              0x1000  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_SHIFT                 12  /* IM_ON_PIN_EINT */
+#define WM831X_IM_ON_PIN_EINT_WIDTH                  1  /* IM_ON_PIN_EINT */
+#define WM831X_IM_WDOG_TO_EINT                  0x0800  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_MASK             0x0800  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_SHIFT                11  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_WDOG_TO_EINT_WIDTH                 1  /* IM_WDOG_TO_EINT */
+#define WM831X_IM_TCHDATA_EINT                  0x0400  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_MASK             0x0400  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_SHIFT                10  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHDATA_EINT_WIDTH                 1  /* IM_TCHDATA_EINT */
+#define WM831X_IM_TCHPD_EINT                    0x0200  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_MASK               0x0200  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_SHIFT                   9  /* IM_TCHPD_EINT */
+#define WM831X_IM_TCHPD_EINT_WIDTH                   1  /* IM_TCHPD_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT              0x0100  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_MASK         0x0100  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_SHIFT             8  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DATA_EINT_WIDTH             1  /* IM_AUXADC_DATA_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT            0x0080  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_MASK       0x0080  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_SHIFT           7  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP4_EINT_WIDTH           1  /* IM_AUXADC_DCOMP4_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT            0x0040  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_MASK       0x0040  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_SHIFT           6  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP3_EINT_WIDTH           1  /* IM_AUXADC_DCOMP3_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT            0x0020  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_MASK       0x0020  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_SHIFT           5  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP2_EINT_WIDTH           1  /* IM_AUXADC_DCOMP2_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT            0x0010  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_MASK       0x0010  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_SHIFT           4  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_AUXADC_DCOMP1_EINT_WIDTH           1  /* IM_AUXADC_DCOMP1_EINT */
+#define WM831X_IM_RTC_PER_EINT                  0x0008  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_MASK             0x0008  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_SHIFT                 3  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_PER_EINT_WIDTH                 1  /* IM_RTC_PER_EINT */
+#define WM831X_IM_RTC_ALM_EINT                  0x0004  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_MASK             0x0004  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_SHIFT                 2  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_RTC_ALM_EINT_WIDTH                 1  /* IM_RTC_ALM_EINT */
+#define WM831X_IM_TEMP_THW_EINT                 0x0002  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_MASK            0x0002  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_SHIFT                1  /* IM_TEMP_THW_EINT */
+#define WM831X_IM_TEMP_THW_EINT_WIDTH                1  /* IM_TEMP_THW_EINT */
+
+/*
+ * R16410 (0x401A) - Interrupt Status 2 Mask
+ */
+#define WM831X_IM_CHG_BATT_HOT_EINT             0x8000  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_MASK        0x8000  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_SHIFT           15  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_HOT_EINT_WIDTH            1  /* IM_CHG_BATT_HOT_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT            0x4000  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_MASK       0x4000  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_SHIFT          14  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_COLD_EINT_WIDTH           1  /* IM_CHG_BATT_COLD_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT            0x2000  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_MASK       0x2000  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_SHIFT          13  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_BATT_FAIL_EINT_WIDTH           1  /* IM_CHG_BATT_FAIL_EINT */
+#define WM831X_IM_CHG_OV_EINT                   0x1000  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_MASK              0x1000  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_SHIFT                 12  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_OV_EINT_WIDTH                  1  /* IM_CHG_OV_EINT */
+#define WM831X_IM_CHG_END_EINT                  0x0800  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_MASK             0x0800  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_SHIFT                11  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_END_EINT_WIDTH                 1  /* IM_CHG_END_EINT */
+#define WM831X_IM_CHG_TO_EINT                   0x0400  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_MASK              0x0400  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_SHIFT                 10  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_TO_EINT_WIDTH                  1  /* IM_CHG_TO_EINT */
+#define WM831X_IM_CHG_MODE_EINT                 0x0200  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_MASK            0x0200  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_SHIFT                9  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_MODE_EINT_WIDTH                1  /* IM_CHG_MODE_EINT */
+#define WM831X_IM_CHG_START_EINT                0x0100  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_MASK           0x0100  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_SHIFT               8  /* IM_CHG_START_EINT */
+#define WM831X_IM_CHG_START_EINT_WIDTH               1  /* IM_CHG_START_EINT */
+#define WM831X_IM_CS2_EINT                      0x0080  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_MASK                 0x0080  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_SHIFT                     7  /* IM_CS2_EINT */
+#define WM831X_IM_CS2_EINT_WIDTH                     1  /* IM_CS2_EINT */
+#define WM831X_IM_CS1_EINT                      0x0040  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_MASK                 0x0040  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_SHIFT                     6  /* IM_CS1_EINT */
+#define WM831X_IM_CS1_EINT_WIDTH                     1  /* IM_CS1_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT              0x0020  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_MASK         0x0020  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_SHIFT             5  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_CMD_END_EINT_WIDTH             1  /* IM_OTP_CMD_END_EINT */
+#define WM831X_IM_OTP_ERR_EINT                  0x0010  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_MASK             0x0010  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_SHIFT                 4  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_OTP_ERR_EINT_WIDTH                 1  /* IM_OTP_ERR_EINT */
+#define WM831X_IM_PS_POR_EINT                   0x0004  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_MASK              0x0004  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_SHIFT                  2  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_POR_EINT_WIDTH                  1  /* IM_PS_POR_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT             0x0002  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_MASK        0x0002  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_SHIFT            1  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_SLEEP_OFF_EINT_WIDTH            1  /* IM_PS_SLEEP_OFF_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT               0x0001  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_MASK          0x0001  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_SHIFT              0  /* IM_PS_ON_WAKE_EINT */
+#define WM831X_IM_PS_ON_WAKE_EINT_WIDTH              1  /* IM_PS_ON_WAKE_EINT */
+
+/*
+ * R16411 (0x401B) - Interrupt Status 3 Mask
+ */
+#define WM831X_IM_UV_LDO10_EINT                 0x0200  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_MASK            0x0200  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_SHIFT                9  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO10_EINT_WIDTH                1  /* IM_UV_LDO10_EINT */
+#define WM831X_IM_UV_LDO9_EINT                  0x0100  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_MASK             0x0100  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_SHIFT                 8  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO9_EINT_WIDTH                 1  /* IM_UV_LDO9_EINT */
+#define WM831X_IM_UV_LDO8_EINT                  0x0080  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_MASK             0x0080  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_SHIFT                 7  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO8_EINT_WIDTH                 1  /* IM_UV_LDO8_EINT */
+#define WM831X_IM_UV_LDO7_EINT                  0x0040  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_MASK             0x0040  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_SHIFT                 6  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO7_EINT_WIDTH                 1  /* IM_UV_LDO7_EINT */
+#define WM831X_IM_UV_LDO6_EINT                  0x0020  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_MASK             0x0020  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_SHIFT                 5  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO6_EINT_WIDTH                 1  /* IM_UV_LDO6_EINT */
+#define WM831X_IM_UV_LDO5_EINT                  0x0010  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_MASK             0x0010  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_SHIFT                 4  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO5_EINT_WIDTH                 1  /* IM_UV_LDO5_EINT */
+#define WM831X_IM_UV_LDO4_EINT                  0x0008  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_MASK             0x0008  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_SHIFT                 3  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO4_EINT_WIDTH                 1  /* IM_UV_LDO4_EINT */
+#define WM831X_IM_UV_LDO3_EINT                  0x0004  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_MASK             0x0004  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_SHIFT                 2  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO3_EINT_WIDTH                 1  /* IM_UV_LDO3_EINT */
+#define WM831X_IM_UV_LDO2_EINT                  0x0002  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_MASK             0x0002  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_SHIFT                 1  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO2_EINT_WIDTH                 1  /* IM_UV_LDO2_EINT */
+#define WM831X_IM_UV_LDO1_EINT                  0x0001  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_MASK             0x0001  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_SHIFT                 0  /* IM_UV_LDO1_EINT */
+#define WM831X_IM_UV_LDO1_EINT_WIDTH                 1  /* IM_UV_LDO1_EINT */
+
+/*
+ * R16412 (0x401C) - Interrupt Status 4 Mask
+ */
+#define WM831X_IM_HC_DC2_EINT                   0x0200  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_MASK              0x0200  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_SHIFT                  9  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC2_EINT_WIDTH                  1  /* IM_HC_DC2_EINT */
+#define WM831X_IM_HC_DC1_EINT                   0x0100  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_MASK              0x0100  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_SHIFT                  8  /* IM_HC_DC1_EINT */
+#define WM831X_IM_HC_DC1_EINT_WIDTH                  1  /* IM_HC_DC1_EINT */
+#define WM831X_IM_UV_DC4_EINT                   0x0008  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_MASK              0x0008  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_SHIFT                  3  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC4_EINT_WIDTH                  1  /* IM_UV_DC4_EINT */
+#define WM831X_IM_UV_DC3_EINT                   0x0004  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_MASK              0x0004  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_SHIFT                  2  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC3_EINT_WIDTH                  1  /* IM_UV_DC3_EINT */
+#define WM831X_IM_UV_DC2_EINT                   0x0002  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_MASK              0x0002  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_SHIFT                  1  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC2_EINT_WIDTH                  1  /* IM_UV_DC2_EINT */
+#define WM831X_IM_UV_DC1_EINT                   0x0001  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_MASK              0x0001  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_SHIFT                  0  /* IM_UV_DC1_EINT */
+#define WM831X_IM_UV_DC1_EINT_WIDTH                  1  /* IM_UV_DC1_EINT */
+
+/*
+ * R16413 (0x401D) - Interrupt Status 5 Mask
+ */
+#define WM831X_IM_GP16_EINT                     0x8000  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_MASK                0x8000  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_SHIFT                   15  /* IM_GP16_EINT */
+#define WM831X_IM_GP16_EINT_WIDTH                    1  /* IM_GP16_EINT */
+#define WM831X_IM_GP15_EINT                     0x4000  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_MASK                0x4000  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_SHIFT                   14  /* IM_GP15_EINT */
+#define WM831X_IM_GP15_EINT_WIDTH                    1  /* IM_GP15_EINT */
+#define WM831X_IM_GP14_EINT                     0x2000  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_MASK                0x2000  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_SHIFT                   13  /* IM_GP14_EINT */
+#define WM831X_IM_GP14_EINT_WIDTH                    1  /* IM_GP14_EINT */
+#define WM831X_IM_GP13_EINT                     0x1000  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_MASK                0x1000  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_SHIFT                   12  /* IM_GP13_EINT */
+#define WM831X_IM_GP13_EINT_WIDTH                    1  /* IM_GP13_EINT */
+#define WM831X_IM_GP12_EINT                     0x0800  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_MASK                0x0800  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_SHIFT                   11  /* IM_GP12_EINT */
+#define WM831X_IM_GP12_EINT_WIDTH                    1  /* IM_GP12_EINT */
+#define WM831X_IM_GP11_EINT                     0x0400  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_MASK                0x0400  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_SHIFT                   10  /* IM_GP11_EINT */
+#define WM831X_IM_GP11_EINT_WIDTH                    1  /* IM_GP11_EINT */
+#define WM831X_IM_GP10_EINT                     0x0200  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_MASK                0x0200  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_SHIFT                    9  /* IM_GP10_EINT */
+#define WM831X_IM_GP10_EINT_WIDTH                    1  /* IM_GP10_EINT */
+#define WM831X_IM_GP9_EINT                      0x0100  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_MASK                 0x0100  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_SHIFT                     8  /* IM_GP9_EINT */
+#define WM831X_IM_GP9_EINT_WIDTH                     1  /* IM_GP9_EINT */
+#define WM831X_IM_GP8_EINT                      0x0080  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_MASK                 0x0080  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_SHIFT                     7  /* IM_GP8_EINT */
+#define WM831X_IM_GP8_EINT_WIDTH                     1  /* IM_GP8_EINT */
+#define WM831X_IM_GP7_EINT                      0x0040  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_MASK                 0x0040  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_SHIFT                     6  /* IM_GP7_EINT */
+#define WM831X_IM_GP7_EINT_WIDTH                     1  /* IM_GP7_EINT */
+#define WM831X_IM_GP6_EINT                      0x0020  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_MASK                 0x0020  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_SHIFT                     5  /* IM_GP6_EINT */
+#define WM831X_IM_GP6_EINT_WIDTH                     1  /* IM_GP6_EINT */
+#define WM831X_IM_GP5_EINT                      0x0010  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_MASK                 0x0010  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_SHIFT                     4  /* IM_GP5_EINT */
+#define WM831X_IM_GP5_EINT_WIDTH                     1  /* IM_GP5_EINT */
+#define WM831X_IM_GP4_EINT                      0x0008  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_MASK                 0x0008  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_SHIFT                     3  /* IM_GP4_EINT */
+#define WM831X_IM_GP4_EINT_WIDTH                     1  /* IM_GP4_EINT */
+#define WM831X_IM_GP3_EINT                      0x0004  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_MASK                 0x0004  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_SHIFT                     2  /* IM_GP3_EINT */
+#define WM831X_IM_GP3_EINT_WIDTH                     1  /* IM_GP3_EINT */
+#define WM831X_IM_GP2_EINT                      0x0002  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_MASK                 0x0002  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_SHIFT                     1  /* IM_GP2_EINT */
+#define WM831X_IM_GP2_EINT_WIDTH                     1  /* IM_GP2_EINT */
+#define WM831X_IM_GP1_EINT                      0x0001  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_MASK                 0x0001  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_SHIFT                     0  /* IM_GP1_EINT */
+#define WM831X_IM_GP1_EINT_WIDTH                     1  /* IM_GP1_EINT */
+
+
+#endif
-- 
cgit v1.2.3


From 7e9f9fd4b8285c52c0950a1929864346de5caa6d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:54 +0100
Subject: mfd: Add WM831x AUXADC support

The WM831x contains an auxiliary ADC with a number of switchable
inputs which is used to monitor some of the voltages and
temperatures in the system and has some external inputs which can be
used for machine specific purposes. Provide an API allowing drivers
to read values from the ADC.

An internal reference voltage is provided to allow callibration of
the ADC. This is used to calibrate the device at startup.

The hardware also supports continuous readings and digital comparators.
These are not yet supported by the driver.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c         | 101 ++++++++++++++++++
 include/linux/mfd/wm831x/auxadc.h | 216 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h   |   2 +
 3 files changed, 319 insertions(+)
 create mode 100644 include/linux/mfd/wm831x/auxadc.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index eb63d22160d1..42bef1dd2ca1 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -15,11 +15,14 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/delay.h>
 #include <linux/mfd/core.h>
 
 #include <linux/mfd/wm831x/core.h>
 #include <linux/mfd/wm831x/pdata.h>
 #include <linux/mfd/wm831x/irq.h>
+#include <linux/mfd/wm831x/auxadc.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -244,6 +247,103 @@ out:
 }
 EXPORT_SYMBOL_GPL(wm831x_set_bits);
 
+/**
+ * wm831x_auxadc_read: Read a value from the WM831x AUXADC
+ *
+ * @wm831x: Device to read from.
+ * @input: AUXADC input to read.
+ */
+int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input)
+{
+	int tries = 10;
+	int ret, src;
+
+	mutex_lock(&wm831x->auxadc_lock);
+
+	ret = wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL,
+			      WM831X_AUX_ENA, WM831X_AUX_ENA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to enable AUXADC: %d\n", ret);
+		goto out;
+	}
+
+	/* We force a single source at present */
+	src = input;
+	ret = wm831x_reg_write(wm831x, WM831X_AUXADC_SOURCE,
+			       1 << src);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to set AUXADC source: %d\n", ret);
+		goto out;
+	}
+
+	ret = wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL,
+			      WM831X_AUX_CVT_ENA, WM831X_AUX_CVT_ENA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to start AUXADC: %d\n", ret);
+		goto disable;
+	}
+
+	do {
+		msleep(1);
+
+		ret = wm831x_reg_read(wm831x, WM831X_AUXADC_CONTROL);
+		if (ret < 0)
+			ret = WM831X_AUX_CVT_ENA;
+	} while ((ret & WM831X_AUX_CVT_ENA) && --tries);
+
+	if (ret & WM831X_AUX_CVT_ENA) {
+		dev_err(wm831x->dev, "Timed out reading AUXADC\n");
+		ret = -EBUSY;
+		goto disable;
+	}
+
+	ret = wm831x_reg_read(wm831x, WM831X_AUXADC_DATA);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read AUXADC data: %d\n", ret);
+	} else {
+		src = ((ret & WM831X_AUX_DATA_SRC_MASK)
+		       >> WM831X_AUX_DATA_SRC_SHIFT) - 1;
+
+		if (src == 14)
+			src = WM831X_AUX_CAL;
+
+		if (src != input) {
+			dev_err(wm831x->dev, "Data from source %d not %d\n",
+				src, input);
+			ret = -EINVAL;
+		} else {
+			ret &= WM831X_AUX_DATA_MASK;
+		}
+	}
+
+disable:
+	wm831x_set_bits(wm831x, WM831X_AUXADC_CONTROL, WM831X_AUX_ENA, 0);
+out:
+	mutex_unlock(&wm831x->auxadc_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_auxadc_read);
+
+/**
+ * wm831x_auxadc_read_uv: Read a voltage from the WM831x AUXADC
+ *
+ * @wm831x: Device to read from.
+ * @input: AUXADC input to read.
+ */
+int wm831x_auxadc_read_uv(struct wm831x *wm831x, enum wm831x_auxadc input)
+{
+	int ret;
+
+	ret = wm831x_auxadc_read(wm831x, input);
+	if (ret < 0)
+		return ret;
+
+	ret *= 1465;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(wm831x_auxadc_read_uv);
+
 static struct resource wm831x_dcdc1_resources[] = {
 	{
 		.start = WM831X_DC1_CONTROL_1,
@@ -1084,6 +1184,7 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 
 	mutex_init(&wm831x->io_lock);
 	mutex_init(&wm831x->key_lock);
+	mutex_init(&wm831x->auxadc_lock);
 	dev_set_drvdata(wm831x->dev, wm831x);
 
 	ret = wm831x_reg_read(wm831x, WM831X_PARENT_ID);
diff --git a/include/linux/mfd/wm831x/auxadc.h b/include/linux/mfd/wm831x/auxadc.h
new file mode 100644
index 000000000000..b132067e9e99
--- /dev/null
+++ b/include/linux/mfd/wm831x/auxadc.h
@@ -0,0 +1,216 @@
+/*
+ * include/linux/mfd/wm831x/auxadc.h -- Auxiliary ADC interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_AUXADC_H__
+#define __MFD_WM831X_AUXADC_H__
+
+/*
+ * R16429 (0x402D) - AuxADC Data
+ */
+#define WM831X_AUX_DATA_SRC_MASK                0xF000  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_SRC_SHIFT                   12  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_SRC_WIDTH                    4  /* AUX_DATA_SRC - [15:12] */
+#define WM831X_AUX_DATA_MASK                    0x0FFF  /* AUX_DATA - [11:0] */
+#define WM831X_AUX_DATA_SHIFT                        0  /* AUX_DATA - [11:0] */
+#define WM831X_AUX_DATA_WIDTH                       12  /* AUX_DATA - [11:0] */
+
+/*
+ * R16430 (0x402E) - AuxADC Control
+ */
+#define WM831X_AUX_ENA                          0x8000  /* AUX_ENA */
+#define WM831X_AUX_ENA_MASK                     0x8000  /* AUX_ENA */
+#define WM831X_AUX_ENA_SHIFT                        15  /* AUX_ENA */
+#define WM831X_AUX_ENA_WIDTH                         1  /* AUX_ENA */
+#define WM831X_AUX_CVT_ENA                      0x4000  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_MASK                 0x4000  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_SHIFT                    14  /* AUX_CVT_ENA */
+#define WM831X_AUX_CVT_ENA_WIDTH                     1  /* AUX_CVT_ENA */
+#define WM831X_AUX_SLPENA                       0x1000  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_MASK                  0x1000  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_SHIFT                     12  /* AUX_SLPENA */
+#define WM831X_AUX_SLPENA_WIDTH                      1  /* AUX_SLPENA */
+#define WM831X_AUX_FRC_ENA                      0x0800  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_MASK                 0x0800  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_SHIFT                    11  /* AUX_FRC_ENA */
+#define WM831X_AUX_FRC_ENA_WIDTH                     1  /* AUX_FRC_ENA */
+#define WM831X_AUX_RATE_MASK                    0x003F  /* AUX_RATE - [5:0] */
+#define WM831X_AUX_RATE_SHIFT                        0  /* AUX_RATE - [5:0] */
+#define WM831X_AUX_RATE_WIDTH                        6  /* AUX_RATE - [5:0] */
+
+/*
+ * R16431 (0x402F) - AuxADC Source
+ */
+#define WM831X_AUX_CAL_SEL                      0x8000  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_MASK                 0x8000  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_SHIFT                    15  /* AUX_CAL_SEL */
+#define WM831X_AUX_CAL_SEL_WIDTH                     1  /* AUX_CAL_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL                0x0400  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_MASK           0x0400  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_SHIFT              10  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_BKUP_BATT_SEL_WIDTH               1  /* AUX_BKUP_BATT_SEL */
+#define WM831X_AUX_WALL_SEL                     0x0200  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_MASK                0x0200  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_SHIFT                    9  /* AUX_WALL_SEL */
+#define WM831X_AUX_WALL_SEL_WIDTH                    1  /* AUX_WALL_SEL */
+#define WM831X_AUX_BATT_SEL                     0x0100  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_MASK                0x0100  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_SHIFT                    8  /* AUX_BATT_SEL */
+#define WM831X_AUX_BATT_SEL_WIDTH                    1  /* AUX_BATT_SEL */
+#define WM831X_AUX_USB_SEL                      0x0080  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_MASK                 0x0080  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_SHIFT                     7  /* AUX_USB_SEL */
+#define WM831X_AUX_USB_SEL_WIDTH                     1  /* AUX_USB_SEL */
+#define WM831X_AUX_SYSVDD_SEL                   0x0040  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_MASK              0x0040  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_SHIFT                  6  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_SYSVDD_SEL_WIDTH                  1  /* AUX_SYSVDD_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL                0x0020  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_MASK           0x0020  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_SHIFT               5  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_BATT_TEMP_SEL_WIDTH               1  /* AUX_BATT_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL                0x0010  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_MASK           0x0010  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_SHIFT               4  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_CHIP_TEMP_SEL_WIDTH               1  /* AUX_CHIP_TEMP_SEL */
+#define WM831X_AUX_AUX4_SEL                     0x0008  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_MASK                0x0008  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_SHIFT                    3  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX4_SEL_WIDTH                    1  /* AUX_AUX4_SEL */
+#define WM831X_AUX_AUX3_SEL                     0x0004  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_MASK                0x0004  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_SHIFT                    2  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX3_SEL_WIDTH                    1  /* AUX_AUX3_SEL */
+#define WM831X_AUX_AUX2_SEL                     0x0002  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_MASK                0x0002  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_SHIFT                    1  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX2_SEL_WIDTH                    1  /* AUX_AUX2_SEL */
+#define WM831X_AUX_AUX1_SEL                     0x0001  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_MASK                0x0001  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_SHIFT                    0  /* AUX_AUX1_SEL */
+#define WM831X_AUX_AUX1_SEL_WIDTH                    1  /* AUX_AUX1_SEL */
+
+/*
+ * R16432 (0x4030) - Comparator Control
+ */
+#define WM831X_DCOMP4_STS                       0x0800  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_MASK                  0x0800  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_SHIFT                     11  /* DCOMP4_STS */
+#define WM831X_DCOMP4_STS_WIDTH                      1  /* DCOMP4_STS */
+#define WM831X_DCOMP3_STS                       0x0400  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_MASK                  0x0400  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_SHIFT                     10  /* DCOMP3_STS */
+#define WM831X_DCOMP3_STS_WIDTH                      1  /* DCOMP3_STS */
+#define WM831X_DCOMP2_STS                       0x0200  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_MASK                  0x0200  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_SHIFT                      9  /* DCOMP2_STS */
+#define WM831X_DCOMP2_STS_WIDTH                      1  /* DCOMP2_STS */
+#define WM831X_DCOMP1_STS                       0x0100  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_MASK                  0x0100  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_SHIFT                      8  /* DCOMP1_STS */
+#define WM831X_DCOMP1_STS_WIDTH                      1  /* DCOMP1_STS */
+#define WM831X_DCMP4_ENA                        0x0008  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_MASK                   0x0008  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_SHIFT                       3  /* DCMP4_ENA */
+#define WM831X_DCMP4_ENA_WIDTH                       1  /* DCMP4_ENA */
+#define WM831X_DCMP3_ENA                        0x0004  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_MASK                   0x0004  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_SHIFT                       2  /* DCMP3_ENA */
+#define WM831X_DCMP3_ENA_WIDTH                       1  /* DCMP3_ENA */
+#define WM831X_DCMP2_ENA                        0x0002  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_MASK                   0x0002  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_SHIFT                       1  /* DCMP2_ENA */
+#define WM831X_DCMP2_ENA_WIDTH                       1  /* DCMP2_ENA */
+#define WM831X_DCMP1_ENA                        0x0001  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_MASK                   0x0001  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_SHIFT                       0  /* DCMP1_ENA */
+#define WM831X_DCMP1_ENA_WIDTH                       1  /* DCMP1_ENA */
+
+/*
+ * R16433 (0x4031) - Comparator 1
+ */
+#define WM831X_DCMP1_SRC_MASK                   0xE000  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_SRC_SHIFT                      13  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_SRC_WIDTH                       3  /* DCMP1_SRC - [15:13] */
+#define WM831X_DCMP1_GT                         0x1000  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_MASK                    0x1000  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_SHIFT                       12  /* DCMP1_GT */
+#define WM831X_DCMP1_GT_WIDTH                        1  /* DCMP1_GT */
+#define WM831X_DCMP1_THR_MASK                   0x0FFF  /* DCMP1_THR - [11:0] */
+#define WM831X_DCMP1_THR_SHIFT                       0  /* DCMP1_THR - [11:0] */
+#define WM831X_DCMP1_THR_WIDTH                      12  /* DCMP1_THR - [11:0] */
+
+/*
+ * R16434 (0x4032) - Comparator 2
+ */
+#define WM831X_DCMP2_SRC_MASK                   0xE000  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_SRC_SHIFT                      13  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_SRC_WIDTH                       3  /* DCMP2_SRC - [15:13] */
+#define WM831X_DCMP2_GT                         0x1000  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_MASK                    0x1000  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_SHIFT                       12  /* DCMP2_GT */
+#define WM831X_DCMP2_GT_WIDTH                        1  /* DCMP2_GT */
+#define WM831X_DCMP2_THR_MASK                   0x0FFF  /* DCMP2_THR - [11:0] */
+#define WM831X_DCMP2_THR_SHIFT                       0  /* DCMP2_THR - [11:0] */
+#define WM831X_DCMP2_THR_WIDTH                      12  /* DCMP2_THR - [11:0] */
+
+/*
+ * R16435 (0x4033) - Comparator 3
+ */
+#define WM831X_DCMP3_SRC_MASK                   0xE000  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_SRC_SHIFT                      13  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_SRC_WIDTH                       3  /* DCMP3_SRC - [15:13] */
+#define WM831X_DCMP3_GT                         0x1000  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_MASK                    0x1000  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_SHIFT                       12  /* DCMP3_GT */
+#define WM831X_DCMP3_GT_WIDTH                        1  /* DCMP3_GT */
+#define WM831X_DCMP3_THR_MASK                   0x0FFF  /* DCMP3_THR - [11:0] */
+#define WM831X_DCMP3_THR_SHIFT                       0  /* DCMP3_THR - [11:0] */
+#define WM831X_DCMP3_THR_WIDTH                      12  /* DCMP3_THR - [11:0] */
+
+/*
+ * R16436 (0x4034) - Comparator 4
+ */
+#define WM831X_DCMP4_SRC_MASK                   0xE000  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_SRC_SHIFT                      13  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_SRC_WIDTH                       3  /* DCMP4_SRC - [15:13] */
+#define WM831X_DCMP4_GT                         0x1000  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_MASK                    0x1000  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_SHIFT                       12  /* DCMP4_GT */
+#define WM831X_DCMP4_GT_WIDTH                        1  /* DCMP4_GT */
+#define WM831X_DCMP4_THR_MASK                   0x0FFF  /* DCMP4_THR - [11:0] */
+#define WM831X_DCMP4_THR_SHIFT                       0  /* DCMP4_THR - [11:0] */
+#define WM831X_DCMP4_THR_WIDTH                      12  /* DCMP4_THR - [11:0] */
+
+#define WM831X_AUX_CAL_FACTOR  0xfff
+#define WM831X_AUX_CAL_NOMINAL 0x222
+
+enum wm831x_auxadc {
+	WM831X_AUX_CAL = 15,
+	WM831X_AUX_BKUP_BATT = 10,
+	WM831X_AUX_WALL = 9,
+	WM831X_AUX_BATT = 8,
+	WM831X_AUX_USB = 7,
+	WM831X_AUX_SYSVDD = 6,
+	WM831X_AUX_BATT_TEMP = 5,
+	WM831X_AUX_CHIP_TEMP = 4,
+	WM831X_AUX_AUX4 = 3,
+	WM831X_AUX_AUX3 = 2,
+	WM831X_AUX_AUX2 = 1,
+	WM831X_AUX_AUX1 = 0,
+};
+
+int wm831x_auxadc_read(struct wm831x *wm831x, enum wm831x_auxadc input);
+int wm831x_auxadc_read_uv(struct wm831x *wm831x, enum wm831x_auxadc input);
+
+#endif
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index b96c9355b16e..d7134dfba56e 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -234,6 +234,8 @@ struct wm831x {
 	unsigned int irq_base;
 	int irq_masks[5];
 
+	struct mutex auxadc_lock;
+
 	/* The WM831x has a security key blocking access to certain
 	 * registers.  The mutex is taken by the accessors for locking
 	 * and unlocking the security key, locked is used to fail
-- 
cgit v1.2.3


From 63aed85e3535b4603798184cc941e49de386d354 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:55 +0100
Subject: mfd: Conditionally add WM831x backlight subdevice

The WM831x backlight driver requires at least the specification of the
current sink to use and a maximum current to allow them to function and
will actively interfere with other users of the regulators it uses if
misconfigured so only register the subdevice for it if this platform
data has been supplied.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c        | 15 +++++++++++++++
 include/linux/mfd/wm831x/pdata.h |  6 ++++++
 2 files changed, 21 insertions(+)

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 42bef1dd2ca1..bc40ea315cc0 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -1172,6 +1172,12 @@ static struct mfd_cell wm8312_devs[] = {
 	},
 };
 
+static struct mfd_cell backlight_devs[] = {
+	{
+		.name = "wm831x-backlight",
+	},
+};
+
 /*
  * Instantiate the generic non-control parts of the device.
  */
@@ -1325,6 +1331,15 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 		goto err_irq;
 	}
 
+	if (pdata && pdata->backlight) {
+		/* Treat errors as non-critical */
+		ret = mfd_add_devices(wm831x->dev, -1, backlight_devs,
+				      ARRAY_SIZE(backlight_devs), NULL, 0);
+		if (ret < 0)
+			dev_err(wm831x->dev, "Failed to add backlight: %d\n",
+				ret);
+	}
+
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h
index 571e60136264..90d820260aad 100644
--- a/include/linux/mfd/wm831x/pdata.h
+++ b/include/linux/mfd/wm831x/pdata.h
@@ -18,6 +18,11 @@
 struct wm831x;
 struct regulator_init_data;
 
+struct wm831x_backlight_pdata {
+	int isink;     /** ISINK to use, 1 or 2 */
+	int max_uA;    /** Maximum current to allow */
+};
+
 struct wm831x_backup_pdata {
 	int charger_enable;
 	int no_constant_voltage;  /** Disable constant voltage charging */
@@ -87,6 +92,7 @@ struct wm831x_pdata {
 	int (*post_init)(struct wm831x *wm831x);
 
 	int gpio_base;
+	struct wm831x_backlight_pdata *backlight;
 	struct wm831x_backup_pdata *backup;
 	struct wm831x_battery_pdata *battery;
 	struct wm831x_touch_pdata *touch;
-- 
cgit v1.2.3


From 6704e5171ba9053ba173bcd807c7392d2076bdb4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:56 +0100
Subject: mfd: Add basic WM831x OTP support

The WM831x series of devices use OTP (One Time Programmable, a type
of PROM) to store system configuration. At run time this data is
visible via registers.

Currently the only explicitly supported feature is that the unique
ID provided by every WM831x device is exported to user space via
sysfs. Other configuration data may be read by system-specific
code in the pre_init() and post_init() platform data operations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c      |   4 +
 drivers/mfd/wm831x-otp.c       |  83 +++++++++++++++++++++
 include/linux/mfd/wm831x/otp.h | 162 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 249 insertions(+)
 create mode 100644 drivers/mfd/wm831x-otp.c
 create mode 100644 include/linux/mfd/wm831x/otp.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index bc40ea315cc0..33eaea2f988e 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -23,6 +23,7 @@
 #include <linux/mfd/wm831x/pdata.h>
 #include <linux/mfd/wm831x/irq.h>
 #include <linux/mfd/wm831x/auxadc.h>
+#include <linux/mfd/wm831x/otp.h>
 
 enum wm831x_parent {
 	WM8310 = 0,
@@ -1340,6 +1341,8 @@ static int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq)
 				ret);
 	}
 
+	wm831x_otp_init(wm831x);
+
 	if (pdata && pdata->post_init) {
 		ret = pdata->post_init(wm831x);
 		if (ret != 0) {
@@ -1360,6 +1363,7 @@ err:
 
 static void wm831x_device_exit(struct wm831x *wm831x)
 {
+	wm831x_otp_exit(wm831x);
 	mfd_remove_devices(wm831x->dev);
 	wm831x_irq_exit(wm831x);
 	kfree(wm831x);
diff --git a/drivers/mfd/wm831x-otp.c b/drivers/mfd/wm831x-otp.c
new file mode 100644
index 000000000000..f742745ff354
--- /dev/null
+++ b/drivers/mfd/wm831x-otp.c
@@ -0,0 +1,83 @@
+/*
+ * wm831x-otp.c  --  OTP for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/bcd.h>
+#include <linux/delay.h>
+#include <linux/mfd/core.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/otp.h>
+
+/* In bytes */
+#define WM831X_UNIQUE_ID_LEN 16
+
+/* Read the unique ID from the chip into id */
+static int wm831x_unique_id_read(struct wm831x *wm831x, char *id)
+{
+	int i, val;
+
+	for (i = 0; i < WM831X_UNIQUE_ID_LEN / 2; i++) {
+		val = wm831x_reg_read(wm831x, WM831X_UNIQUE_ID_1 + i);
+		if (val < 0)
+			return val;
+
+		id[i * 2]       = (val >> 8) & 0xff;
+		id[(i * 2) + 1] = val & 0xff;
+	}
+
+	return 0;
+}
+
+static ssize_t wm831x_unique_id_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct wm831x *wm831x = dev_get_drvdata(dev);
+	int i, rval;
+	char id[WM831X_UNIQUE_ID_LEN];
+	ssize_t ret = 0;
+
+	rval = wm831x_unique_id_read(wm831x, id);
+	if (rval < 0)
+		return 0;
+
+	for (i = 0; i < WM831X_UNIQUE_ID_LEN; i++)
+		ret += sprintf(&buf[ret], "%02x", buf[i]);
+
+	ret += sprintf(&buf[ret], "\n");
+
+	return ret;
+}
+
+static DEVICE_ATTR(unique_id, 0444, wm831x_unique_id_show, NULL);
+
+int wm831x_otp_init(struct wm831x *wm831x)
+{
+	int ret;
+
+	ret = device_create_file(wm831x->dev, &dev_attr_unique_id);
+	if (ret != 0)
+		dev_err(wm831x->dev, "Unique ID attribute not created: %d\n",
+			ret);
+
+	return ret;
+}
+
+void wm831x_otp_exit(struct wm831x *wm831x)
+{
+	device_remove_file(wm831x->dev, &dev_attr_unique_id);
+}
+
diff --git a/include/linux/mfd/wm831x/otp.h b/include/linux/mfd/wm831x/otp.h
new file mode 100644
index 000000000000..ce1f81a39bfc
--- /dev/null
+++ b/include/linux/mfd/wm831x/otp.h
@@ -0,0 +1,162 @@
+/*
+ * include/linux/mfd/wm831x/otp.h -- OTP interface for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_OTP_H__
+#define __MFD_WM831X_OTP_H__
+
+int wm831x_otp_init(struct wm831x *wm831x);
+void wm831x_otp_exit(struct wm831x *wm831x);
+
+/*
+ * R30720 (0x7800) - Unique ID 1
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30721 (0x7801) - Unique ID 2
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30722 (0x7802) - Unique ID 3
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30723 (0x7803) - Unique ID 4
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30724 (0x7804) - Unique ID 5
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30725 (0x7805) - Unique ID 6
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30726 (0x7806) - Unique ID 7
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30727 (0x7807) - Unique ID 8
+ */
+#define WM831X_UNIQUE_ID_MASK                   0xFFFF  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_SHIFT                       0  /* UNIQUE_ID - [15:0] */
+#define WM831X_UNIQUE_ID_WIDTH                      16  /* UNIQUE_ID - [15:0] */
+
+/*
+ * R30728 (0x7808) - Factory OTP ID
+ */
+#define WM831X_OTP_FACT_ID_MASK                 0xFFFE  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_ID_SHIFT                     1  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_ID_WIDTH                    15  /* OTP_FACT_ID - [15:1] */
+#define WM831X_OTP_FACT_FINAL                   0x0001  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_MASK              0x0001  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_SHIFT                  0  /* OTP_FACT_FINAL */
+#define WM831X_OTP_FACT_FINAL_WIDTH                  1  /* OTP_FACT_FINAL */
+
+/*
+ * R30729 (0x7809) - Factory OTP 1
+ */
+#define WM831X_DC3_TRIM_MASK                    0xF000  /* DC3_TRIM - [15:12] */
+#define WM831X_DC3_TRIM_SHIFT                       12  /* DC3_TRIM - [15:12] */
+#define WM831X_DC3_TRIM_WIDTH                        4  /* DC3_TRIM - [15:12] */
+#define WM831X_DC2_TRIM_MASK                    0x0FC0  /* DC2_TRIM - [11:6] */
+#define WM831X_DC2_TRIM_SHIFT                        6  /* DC2_TRIM - [11:6] */
+#define WM831X_DC2_TRIM_WIDTH                        6  /* DC2_TRIM - [11:6] */
+#define WM831X_DC1_TRIM_MASK                    0x003F  /* DC1_TRIM - [5:0] */
+#define WM831X_DC1_TRIM_SHIFT                        0  /* DC1_TRIM - [5:0] */
+#define WM831X_DC1_TRIM_WIDTH                        6  /* DC1_TRIM - [5:0] */
+
+/*
+ * R30730 (0x780A) - Factory OTP 2
+ */
+#define WM831X_CHIP_ID_MASK                     0xFFFF  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_SHIFT                         0  /* CHIP_ID - [15:0] */
+#define WM831X_CHIP_ID_WIDTH                        16  /* CHIP_ID - [15:0] */
+
+/*
+ * R30731 (0x780B) - Factory OTP 3
+ */
+#define WM831X_OSC_TRIM_MASK                    0x0780  /* OSC_TRIM - [10:7] */
+#define WM831X_OSC_TRIM_SHIFT                        7  /* OSC_TRIM - [10:7] */
+#define WM831X_OSC_TRIM_WIDTH                        4  /* OSC_TRIM - [10:7] */
+#define WM831X_BG_TRIM_MASK                     0x0078  /* BG_TRIM - [6:3] */
+#define WM831X_BG_TRIM_SHIFT                         3  /* BG_TRIM - [6:3] */
+#define WM831X_BG_TRIM_WIDTH                         4  /* BG_TRIM - [6:3] */
+#define WM831X_LPBG_TRIM_MASK                   0x0007  /* LPBG_TRIM - [2:0] */
+#define WM831X_LPBG_TRIM_SHIFT                       0  /* LPBG_TRIM - [2:0] */
+#define WM831X_LPBG_TRIM_WIDTH                       3  /* LPBG_TRIM - [2:0] */
+
+/*
+ * R30732 (0x780C) - Factory OTP 4
+ */
+#define WM831X_CHILD_I2C_ADDR_MASK              0x00FE  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CHILD_I2C_ADDR_SHIFT                  1  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CHILD_I2C_ADDR_WIDTH                  7  /* CHILD_I2C_ADDR - [7:1] */
+#define WM831X_CH_AW                            0x0001  /* CH_AW */
+#define WM831X_CH_AW_MASK                       0x0001  /* CH_AW */
+#define WM831X_CH_AW_SHIFT                           0  /* CH_AW */
+#define WM831X_CH_AW_WIDTH                           1  /* CH_AW */
+
+/*
+ * R30733 (0x780D) - Factory OTP 5
+ */
+#define WM831X_CHARGE_TRIM_MASK                 0x003F  /* CHARGE_TRIM - [5:0] */
+#define WM831X_CHARGE_TRIM_SHIFT                     0  /* CHARGE_TRIM - [5:0] */
+#define WM831X_CHARGE_TRIM_WIDTH                     6  /* CHARGE_TRIM - [5:0] */
+
+/*
+ * R30736 (0x7810) - Customer OTP ID
+ */
+#define WM831X_OTP_AUTO_PROG                    0x8000  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_MASK               0x8000  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_SHIFT                  15  /* OTP_AUTO_PROG */
+#define WM831X_OTP_AUTO_PROG_WIDTH                   1  /* OTP_AUTO_PROG */
+#define WM831X_OTP_CUST_ID_MASK                 0x7FFE  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_ID_SHIFT                     1  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_ID_WIDTH                    14  /* OTP_CUST_ID - [14:1] */
+#define WM831X_OTP_CUST_FINAL                   0x0001  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_MASK              0x0001  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_SHIFT                  0  /* OTP_CUST_FINAL */
+#define WM831X_OTP_CUST_FINAL_WIDTH                  1  /* OTP_CUST_FINAL */
+
+/*
+ * R30759 (0x7827) - DBE CHECK DATA
+ */
+#define WM831X_DBE_VALID_DATA_MASK              0xFFFF  /* DBE_VALID_DATA - [15:0] */
+#define WM831X_DBE_VALID_DATA_SHIFT                  0  /* DBE_VALID_DATA - [15:0] */
+#define WM831X_DBE_VALID_DATA_WIDTH                 16  /* DBE_VALID_DATA - [15:0] */
+
+
+#endif
-- 
cgit v1.2.3


From 698659d5f78606c698781574773f433c60176e40 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:45:57 +0100
Subject: mfd: Export ISEL values from WM831x core

The current settings which can be used with the WM831x current sinks
can't easily be mapped between register values and currents at run
time without a lookup table since the values scale logarithmically
to match the way the human eye interprets brightness. This lookup
table is inclided in the core since several drivers need to use it.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm831x-core.c            | 64 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 21 ++++++++++++
 2 files changed, 85 insertions(+)
 create mode 100644 include/linux/mfd/wm831x/regulator.h

(limited to 'include')

diff --git a/drivers/mfd/wm831x-core.c b/drivers/mfd/wm831x-core.c
index 33eaea2f988e..49b7885c2702 100644
--- a/drivers/mfd/wm831x-core.c
+++ b/drivers/mfd/wm831x-core.c
@@ -24,6 +24,70 @@
 #include <linux/mfd/wm831x/irq.h>
 #include <linux/mfd/wm831x/auxadc.h>
 #include <linux/mfd/wm831x/otp.h>
+#include <linux/mfd/wm831x/regulator.h>
+
+/* Current settings - values are 2*2^(reg_val/4) microamps.  These are
+ * exported since they are used by multiple drivers.
+ */
+int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL] = {
+	2,
+	2,
+	3,
+	3,
+	4,
+	5,
+	6,
+	7,
+	8,
+	10,
+	11,
+	13,
+	16,
+	19,
+	23,
+	27,
+	32,
+	38,
+	45,
+	54,
+	64,
+	76,
+	91,
+	108,
+	128,
+	152,
+	181,
+	215,
+	256,
+	304,
+	362,
+	431,
+	512,
+	609,
+	724,
+	861,
+	1024,
+	1218,
+	1448,
+	1722,
+	2048,
+	2435,
+	2896,
+	3444,
+	4096,
+	4871,
+	5793,
+	6889,
+	8192,
+	9742,
+	11585,
+	13777,
+	16384,
+	19484,
+	23170,
+	27554,
+};
+EXPORT_SYMBOL_GPL(wm831x_isinkv_values);
 
 enum wm831x_parent {
 	WM8310 = 0,
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
new file mode 100644
index 000000000000..b5d58fb38b4e
--- /dev/null
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -0,0 +1,21 @@
+/*
+ * linux/mfd/wm831x/regulator.h -- Regulator definitons for wm831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_REGULATOR_H__
+#define __MFD_WM831X_REGULATOR_H__
+
+#define WM831X_ISINK_MAX_ISEL 56
+extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
+
+#endif
-- 
cgit v1.2.3


From e4b736f18f338daae141325c818187c4ab3e244c Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:46:00 +0100
Subject: gpio: Add WM831X GPIO driver

Add support for the GPIO pins on the WM831x. No direct support is
currently supplied for configuring non-gpiolib functionality such
as pull configuration and alternate functions, soft configuration
of these will be provided in a future patch.

Currently use of these pins as interrupts is not supported due to
the ongoing issues with generic irq not support interrupt controllers
on interrupt driven buses. Users can directly request the interrupts
with the wm831x-specific APIs currently provided if required.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/gpio/Kconfig            |   7 ++
 drivers/gpio/Makefile           |   1 +
 drivers/gpio/wm831x-gpio.c      | 252 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/gpio.h |  55 +++++++++
 4 files changed, 315 insertions(+)
 create mode 100644 drivers/gpio/wm831x-gpio.c
 create mode 100644 include/linux/mfd/wm831x/gpio.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 96dda81c9228..6b4c484a699a 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -155,6 +155,13 @@ config GPIO_TWL4030
 	  Say yes here to access the GPIO signals of various multi-function
 	  power management chips from Texas Instruments.
 
+config GPIO_WM831X
+	tristate "WM831x GPIOs"
+	depends on MFD_WM831X
+	help
+	  Say yes here to access the GPIO signals of WM831x power management
+	  chips from Wolfson Microelectronics.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_BT8XX
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 9244c6fcd8be..ea7c745f26a8 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
+obj-$(CONFIG_GPIO_WM831X)	+= wm831x-gpio.o
diff --git a/drivers/gpio/wm831x-gpio.c b/drivers/gpio/wm831x-gpio.c
new file mode 100644
index 000000000000..f9c09a54ec7f
--- /dev/null
+++ b/drivers/gpio/wm831x-gpio.c
@@ -0,0 +1,252 @@
+/*
+ * wm831x-gpio.c  --  gpiolib support for Wolfson WM831x PMICs
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/mfd/core.h>
+#include <linux/platform_device.h>
+#include <linux/seq_file.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/gpio.h>
+
+#define WM831X_GPIO_MAX 16
+
+struct wm831x_gpio {
+	struct wm831x *wm831x;
+	struct gpio_chip gpio_chip;
+};
+
+static inline struct wm831x_gpio *to_wm831x_gpio(struct gpio_chip *chip)
+{
+	return container_of(chip, struct wm831x_gpio, gpio_chip);
+}
+
+static int wm831x_gpio_direction_in(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
+			       WM831X_GPN_DIR | WM831X_GPN_TRI,
+			       WM831X_GPN_DIR);
+}
+
+static int wm831x_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_GPIO_LEVEL);
+	if (ret < 0)
+		return ret;
+
+	if (ret & 1 << offset)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_gpio_direction_out(struct gpio_chip *chip,
+				     unsigned offset, int value)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	return wm831x_set_bits(wm831x, WM831X_GPIO1_CONTROL + offset,
+			       WM831X_GPN_DIR | WM831X_GPN_TRI, 0);
+}
+
+static void wm831x_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+
+	wm831x_set_bits(wm831x, WM831X_GPIO_LEVEL, 1 << offset,
+			value << offset);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static void wm831x_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
+{
+	struct wm831x_gpio *wm831x_gpio = to_wm831x_gpio(chip);
+	struct wm831x *wm831x = wm831x_gpio->wm831x;
+	int i;
+
+	for (i = 0; i < chip->ngpio; i++) {
+		int gpio = i + chip->base;
+		int reg;
+		const char *label, *pull, *powerdomain;
+
+		/* We report the GPIO even if it's not requested since
+		 * we're also reporting things like alternate
+		 * functions which apply even when the GPIO is not in
+		 * use as a GPIO.
+		 */
+		label = gpiochip_is_requested(chip, i);
+		if (!label)
+			label = "Unrequested";
+
+		seq_printf(s, " gpio-%-3d (%-20.20s) ", gpio, label);
+
+		reg = wm831x_reg_read(wm831x, WM831X_GPIO1_CONTROL + i);
+		if (reg < 0) {
+			dev_err(wm831x->dev,
+				"GPIO control %d read failed: %d\n",
+				gpio, reg);
+			seq_printf(s, "\n");
+			continue;
+		}
+
+		switch (reg & WM831X_GPN_PULL_MASK) {
+		case WM831X_GPIO_PULL_NONE:
+			pull = "nopull";
+			break;
+		case WM831X_GPIO_PULL_DOWN:
+			pull = "pulldown";
+			break;
+		case WM831X_GPIO_PULL_UP:
+			pull = "pullup";
+		default:
+			pull = "INVALID PULL";
+			break;
+		}
+
+		switch (i + 1) {
+		case 1 ... 3:
+		case 7 ... 9:
+			if (reg & WM831X_GPN_PWR_DOM)
+				powerdomain = "VPMIC";
+			else
+				powerdomain = "DBVDD";
+			break;
+
+		case 4 ... 6:
+		case 10 ... 12:
+			if (reg & WM831X_GPN_PWR_DOM)
+				powerdomain = "SYSVDD";
+			else
+				powerdomain = "DBVDD";
+			break;
+
+		case 13 ... 16:
+			powerdomain = "TPVDD";
+			break;
+
+		default:
+			BUG();
+			break;
+		}
+
+		seq_printf(s, " %s %s %s %s%s\n"
+			   "                                  %s%s (0x%4x)\n",
+			   reg & WM831X_GPN_DIR ? "in" : "out",
+			   wm831x_gpio_get(chip, i) ? "high" : "low",
+			   pull,
+			   powerdomain,
+			   reg & WM831X_GPN_POL ? " inverted" : "",
+			   reg & WM831X_GPN_OD ? "open-drain" : "CMOS",
+			   reg & WM831X_GPN_TRI ? " tristated" : "",
+			   reg);
+	}
+}
+#else
+#define wm831x_gpio_dbg_show NULL
+#endif
+
+static struct gpio_chip template_chip = {
+	.label			= "wm831x",
+	.owner			= THIS_MODULE,
+	.direction_input	= wm831x_gpio_direction_in,
+	.get			= wm831x_gpio_get,
+	.direction_output	= wm831x_gpio_direction_out,
+	.set			= wm831x_gpio_set,
+	.dbg_show		= wm831x_gpio_dbg_show,
+	.can_sleep		= 1,
+};
+
+static int __devinit wm831x_gpio_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	struct wm831x_gpio *wm831x_gpio;
+	int ret;
+
+	wm831x_gpio = kzalloc(sizeof(*wm831x_gpio), GFP_KERNEL);
+	if (wm831x_gpio == NULL)
+		return -ENOMEM;
+
+	wm831x_gpio->wm831x = wm831x;
+	wm831x_gpio->gpio_chip = template_chip;
+	wm831x_gpio->gpio_chip.ngpio = WM831X_GPIO_MAX;
+	wm831x_gpio->gpio_chip.dev = &pdev->dev;
+	if (pdata && pdata->gpio_base)
+		wm831x_gpio->gpio_chip.base = pdata->gpio_base;
+	else
+		wm831x_gpio->gpio_chip.base = -1;
+
+	ret = gpiochip_add(&wm831x_gpio->gpio_chip);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Could not register gpiochip, %d\n",
+			ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, wm831x_gpio);
+
+	return ret;
+
+err:
+	kfree(wm831x_gpio);
+	return ret;
+}
+
+static int __devexit wm831x_gpio_remove(struct platform_device *pdev)
+{
+	struct wm831x_gpio *wm831x_gpio = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = gpiochip_remove(&wm831x_gpio->gpio_chip);
+	if (ret == 0)
+		kfree(wm831x_gpio);
+
+	return ret;
+}
+
+static struct platform_driver wm831x_gpio_driver = {
+	.driver.name	= "wm831x-gpio",
+	.driver.owner	= THIS_MODULE,
+	.probe		= wm831x_gpio_probe,
+	.remove		= __devexit_p(wm831x_gpio_remove),
+};
+
+static int __init wm831x_gpio_init(void)
+{
+	return platform_driver_register(&wm831x_gpio_driver);
+}
+subsys_initcall(wm831x_gpio_init);
+
+static void __exit wm831x_gpio_exit(void)
+{
+	platform_driver_unregister(&wm831x_gpio_driver);
+}
+module_exit(wm831x_gpio_exit);
+
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("GPIO interface for WM831x PMICs");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-gpio");
diff --git a/include/linux/mfd/wm831x/gpio.h b/include/linux/mfd/wm831x/gpio.h
new file mode 100644
index 000000000000..2835614af0e3
--- /dev/null
+++ b/include/linux/mfd/wm831x/gpio.h
@@ -0,0 +1,55 @@
+/*
+ * include/linux/mfd/wm831x/gpio.h -- GPIO for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_GPIO_H__
+#define __MFD_WM831X_GPIO_H__
+
+/*
+ * R16440-16455 (0x4038-0x4047) - GPIOx Control
+ */
+#define WM831X_GPN_DIR                          0x8000  /* GPN_DIR */
+#define WM831X_GPN_DIR_MASK                     0x8000  /* GPN_DIR */
+#define WM831X_GPN_DIR_SHIFT                        15  /* GPN_DIR */
+#define WM831X_GPN_DIR_WIDTH                         1  /* GPN_DIR */
+#define WM831X_GPN_PULL_MASK                    0x6000  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_PULL_SHIFT                       13  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_PULL_WIDTH                        2  /* GPN_PULL - [14:13] */
+#define WM831X_GPN_INT_MODE                     0x1000  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_MASK                0x1000  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_SHIFT                   12  /* GPN_INT_MODE */
+#define WM831X_GPN_INT_MODE_WIDTH                    1  /* GPN_INT_MODE */
+#define WM831X_GPN_PWR_DOM                      0x0800  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_MASK                 0x0800  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_SHIFT                    11  /* GPN_PWR_DOM */
+#define WM831X_GPN_PWR_DOM_WIDTH                     1  /* GPN_PWR_DOM */
+#define WM831X_GPN_POL                          0x0400  /* GPN_POL */
+#define WM831X_GPN_POL_MASK                     0x0400  /* GPN_POL */
+#define WM831X_GPN_POL_SHIFT                        10  /* GPN_POL */
+#define WM831X_GPN_POL_WIDTH                         1  /* GPN_POL */
+#define WM831X_GPN_OD                           0x0200  /* GPN_OD */
+#define WM831X_GPN_OD_MASK                      0x0200  /* GPN_OD */
+#define WM831X_GPN_OD_SHIFT                          9  /* GPN_OD */
+#define WM831X_GPN_OD_WIDTH                          1  /* GPN_OD */
+#define WM831X_GPN_TRI                          0x0080  /* GPN_TRI */
+#define WM831X_GPN_TRI_MASK                     0x0080  /* GPN_TRI */
+#define WM831X_GPN_TRI_SHIFT                         7  /* GPN_TRI */
+#define WM831X_GPN_TRI_WIDTH                         1  /* GPN_TRI */
+#define WM831X_GPN_FN_MASK                      0x000F  /* GPN_FN - [3:0] */
+#define WM831X_GPN_FN_SHIFT                          0  /* GPN_FN - [3:0] */
+#define WM831X_GPN_FN_WIDTH                          4  /* GPN_FN - [3:0] */
+
+#define WM831X_GPIO_PULL_NONE (0 << WM831X_GPN_PULL_SHIFT)
+#define WM831X_GPIO_PULL_DOWN (1 << WM831X_GPN_PULL_SHIFT)
+#define WM831X_GPIO_PULL_UP   (2 << WM831X_GPN_PULL_SHIFT)
+#endif
-- 
cgit v1.2.3


From 0c73b992dd4c645f050344cb13142c0fd3496824 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 15 Sep 2009 12:07:12 +0200
Subject: input: Add support for the WM831x ON pin

The WM831x series of PMICs support control of initial power on
through the ON pin on the device with soft control of the pin
at other times. Represent this to userspace as KEY_POWER.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/input/misc/Kconfig      |  10 +++
 drivers/input/misc/Makefile     |   1 +
 drivers/input/misc/wm831x-on.c  | 163 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/core.h |  19 +++++
 4 files changed, 193 insertions(+)
 create mode 100644 drivers/input/misc/wm831x-on.c

(limited to 'include')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index cbe21bc96b52..852941d108ff 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -279,4 +279,14 @@ config INPUT_BFIN_ROTARY
 	  To compile this driver as a module, choose M here: the
 	  module will be called bfin-rotary.
 
+config INPUT_WM831X_ON
+	tristate "WM831X ON pin"
+	depends on MFD_WM831X
+	help
+	  Support the ON pin of WM831X PMICs as an input device
+	  reporting power button status.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called wm831x_on.
+
 endif
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index 79c1e9a5ea31..c97533fb83c8 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -26,4 +26,5 @@ obj-$(CONFIG_INPUT_SPARCSPKR)		+= sparcspkr.o
 obj-$(CONFIG_INPUT_TWL4030_PWRBUTTON)	+= twl4030-pwrbutton.o
 obj-$(CONFIG_INPUT_UINPUT)		+= uinput.o
 obj-$(CONFIG_INPUT_WISTRON_BTNS)	+= wistron_btns.o
+obj-$(CONFIG_INPUT_WM831X_ON)		+= wm831x-on.o
 obj-$(CONFIG_INPUT_YEALINK)		+= yealink.o
diff --git a/drivers/input/misc/wm831x-on.c b/drivers/input/misc/wm831x-on.c
new file mode 100644
index 000000000000..ba4f5dd7c60e
--- /dev/null
+++ b/drivers/input/misc/wm831x-on.c
@@ -0,0 +1,163 @@
+/**
+ * wm831x-on.c - WM831X ON pin driver
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics plc
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/workqueue.h>
+#include <linux/mfd/wm831x/core.h>
+
+struct wm831x_on {
+	struct input_dev *dev;
+	struct delayed_work work;
+	struct wm831x *wm831x;
+};
+
+/*
+ * The chip gives us an interrupt when the ON pin is asserted but we
+ * then need to poll to see when the pin is deasserted.
+ */
+static void wm831x_poll_on(struct work_struct *work)
+{
+	struct wm831x_on *wm831x_on = container_of(work, struct wm831x_on,
+						   work.work);
+	struct wm831x *wm831x = wm831x_on->wm831x;
+	int poll, ret;
+
+	ret = wm831x_reg_read(wm831x, WM831X_ON_PIN_CONTROL);
+	if (ret >= 0) {
+		poll = !(ret & WM831X_ON_PIN_STS);
+
+		input_report_key(wm831x_on->dev, KEY_POWER, poll);
+		input_sync(wm831x_on->dev);
+	} else {
+		dev_err(wm831x->dev, "Failed to read ON status: %d\n", ret);
+		poll = 1;
+	}
+
+	if (poll)
+		schedule_delayed_work(&wm831x_on->work, 100);
+}
+
+static irqreturn_t wm831x_on_irq(int irq, void *data)
+{
+	struct wm831x_on *wm831x_on = data;
+
+	schedule_delayed_work(&wm831x_on->work, 0);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit wm831x_on_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_on *wm831x_on;
+	int irq = platform_get_irq(pdev, 0);
+	int ret;
+
+	wm831x_on = kzalloc(sizeof(struct wm831x_on), GFP_KERNEL);
+	if (!wm831x_on) {
+		dev_err(&pdev->dev, "Can't allocate data\n");
+		return -ENOMEM;
+	}
+
+	wm831x_on->wm831x = wm831x;
+	INIT_DELAYED_WORK(&wm831x_on->work, wm831x_poll_on);
+
+	wm831x_on->dev = input_allocate_device();
+	if (!wm831x_on->dev) {
+		dev_err(&pdev->dev, "Can't allocate input dev\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	wm831x_on->dev->evbit[0] = BIT_MASK(EV_KEY);
+	wm831x_on->dev->keybit[BIT_WORD(KEY_POWER)] = BIT_MASK(KEY_POWER);
+	wm831x_on->dev->name = "wm831x_on";
+	wm831x_on->dev->phys = "wm831x_on/input0";
+	wm831x_on->dev->dev.parent = &pdev->dev;
+
+	ret = wm831x_request_irq(wm831x, irq, wm831x_on_irq,
+				 IRQF_TRIGGER_RISING, "wm831x_on", wm831x_on);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Unable to request IRQ: %d\n", ret);
+		goto err_input_dev;
+	}
+	ret = input_register_device(wm831x_on->dev);
+	if (ret) {
+		dev_dbg(&pdev->dev, "Can't register input device: %d\n", ret);
+		goto err_irq;
+	}
+
+	platform_set_drvdata(pdev, wm831x_on);
+
+	return 0;
+
+err_irq:
+	wm831x_free_irq(wm831x, irq, NULL);
+err_input_dev:
+	input_free_device(wm831x_on->dev);
+err:
+	kfree(wm831x_on);
+	return ret;
+}
+
+static int __devexit wm831x_on_remove(struct platform_device *pdev)
+{
+	struct wm831x_on *wm831x_on = platform_get_drvdata(pdev);
+	int irq = platform_get_irq(pdev, 0);
+
+	wm831x_free_irq(wm831x_on->wm831x, irq, wm831x_on);
+	cancel_delayed_work_sync(&wm831x_on->work);
+	input_unregister_device(wm831x_on->dev);
+	kfree(wm831x_on);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_on_driver = {
+	.probe		= wm831x_on_probe,
+	.remove		= __devexit_p(wm831x_on_remove),
+	.driver		= {
+		.name	= "wm831x-on",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init wm831x_on_init(void)
+{
+	return platform_driver_register(&wm831x_on_driver);
+}
+module_init(wm831x_on_init);
+
+static void __exit wm831x_on_exit(void)
+{
+	platform_driver_unregister(&wm831x_on_driver);
+}
+module_exit(wm831x_on_exit);
+
+MODULE_ALIAS("platform:wm831x-on");
+MODULE_DESCRIPTION("WM831x ON pin");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+
diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h
index d7134dfba56e..91eb493bf14c 100644
--- a/include/linux/mfd/wm831x/core.h
+++ b/include/linux/mfd/wm831x/core.h
@@ -216,6 +216,25 @@
 #define WM831X_PARENT_ID_SHIFT                       0  /* PARENT_ID - [15:0] */
 #define WM831X_PARENT_ID_WIDTH                      16  /* PARENT_ID - [15:0] */
 
+/*
+ * R16389 (0x4005) - ON Pin Control
+ */
+#define WM831X_ON_PIN_SECACT_MASK               0x0300  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_SECACT_SHIFT                   8  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_SECACT_WIDTH                   2  /* ON_PIN_SECACT - [9:8] */
+#define WM831X_ON_PIN_PRIMACT_MASK              0x0030  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_PRIMACT_SHIFT                  4  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_PRIMACT_WIDTH                  2  /* ON_PIN_PRIMACT - [5:4] */
+#define WM831X_ON_PIN_STS                       0x0008  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_MASK                  0x0008  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_SHIFT                      3  /* ON_PIN_STS */
+#define WM831X_ON_PIN_STS_WIDTH                      1  /* ON_PIN_STS */
+#define WM831X_ON_PIN_TO_MASK                   0x0003  /* ON_PIN_TO - [1:0] */
+#define WM831X_ON_PIN_TO_SHIFT                       0  /* ON_PIN_TO - [1:0] */
+#define WM831X_ON_PIN_TO_WIDTH                       2  /* ON_PIN_TO - [1:0] */
+
+struct regulator_dev;
+
 struct wm831x {
 	struct mutex io_lock;
 
-- 
cgit v1.2.3


From be721979dd6b335e4ab6f83abb5cc11c33662aa8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 4 Aug 2009 20:09:52 +0200
Subject: regulator: Provide mode to status conversion function

This is useful for implementing get_status() in terms of get_mode().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/core.c         | 24 ++++++++++++++++++++++++
 include/linux/regulator/driver.h |  2 ++
 2 files changed, 26 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 98c3a74e9949..91ba9bfaa706 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1864,6 +1864,30 @@ int regulator_notifier_call_chain(struct regulator_dev *rdev,
 }
 EXPORT_SYMBOL_GPL(regulator_notifier_call_chain);
 
+/**
+ * regulator_mode_to_status - convert a regulator mode into a status
+ *
+ * @mode: Mode to convert
+ *
+ * Convert a regulator mode into a status.
+ */
+int regulator_mode_to_status(unsigned int mode)
+{
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		return REGULATOR_STATUS_FAST;
+	case REGULATOR_MODE_NORMAL:
+		return REGULATOR_STATUS_NORMAL;
+	case REGULATOR_MODE_IDLE:
+		return REGULATOR_STATUS_IDLE;
+	case REGULATOR_STATUS_STANDBY:
+		return REGULATOR_STATUS_STANDBY;
+	default:
+		return 0;
+	}
+}
+EXPORT_SYMBOL_GPL(regulator_mode_to_status);
+
 /*
  * To avoid cluttering sysfs (and memory) with useless state, only
  * create attributes that can be meaningfully displayed.
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 225f733e7533..ce1be708ca16 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -193,6 +193,8 @@ void *rdev_get_drvdata(struct regulator_dev *rdev);
 struct device *rdev_get_dev(struct regulator_dev *rdev);
 int rdev_get_id(struct regulator_dev *rdev);
 
+int regulator_mode_to_status(unsigned int);
+
 void *regulator_get_init_drvdata(struct regulator_init_data *reg_init_data);
 
 #endif
-- 
cgit v1.2.3


From e4ee831f949a7c7746a56bcf1e7ca057d6f69e2a Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 28 Jul 2009 15:21:49 +0100
Subject: regulator: Add WM831x DC-DC buck convertor support

The WM831x series of devices all have 3 DC-DC buck convertors. This
driver implements software control for these regulators via the
regulator API.  Use with split hardware/software control of individual
regulators is not supported, though regulators not controlled by
software may be controlled via the hardware control interfaces.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/Kconfig            |   7 +
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/wm831x-dcdc.c      | 643 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 571 +++++++++++++++++++++++++++++++
 4 files changed, 1222 insertions(+)
 create mode 100644 drivers/regulator/wm831x-dcdc.c

(limited to 'include')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index da7483ef32b3..38ea5dc8e143 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -82,6 +82,13 @@ config REGULATOR_TWL4030
 	  This driver supports the voltage regulators provided by
 	  this family of companion chips.
 
+config REGULATOR_WM831X
+	tristate "Wolfson Microelcronics WM831x PMIC regulators"
+	depends on MFD_WM831X
+	help
+	  Support the voltage and current regulators of the WM831x series
+	  of PMIC devices.
+
 config REGULATOR_WM8350
 	tristate "Wolfson Microelectroncis WM8350 AudioPlus PMIC"
 	depends on MFD_WM8350
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 3a9748ff860b..b1d2b826f532 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_REGULATOR_BQ24022) += bq24022.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
+obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c
new file mode 100644
index 000000000000..fa5126e38acc
--- /dev/null
+++ b/drivers/regulator/wm831x-dcdc.c
@@ -0,0 +1,643 @@
+/*
+ * wm831x-dcdc.c  --  DC-DC buck convertor driver for the WM831x series
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/regulator.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+#define WM831X_BUCKV_MAX_SELECTOR 0x68
+#define WM831X_BUCKP_MAX_SELECTOR 0x66
+
+#define WM831X_DCDC_MODE_FAST    0
+#define WM831X_DCDC_MODE_NORMAL  1
+#define WM831X_DCDC_MODE_IDLE    2
+#define WM831X_DCDC_MODE_STANDBY 3
+
+#define WM831X_DCDC_MAX_NAME 6
+
+/* Register offsets in control block */
+#define WM831X_DCDC_CONTROL_1     0
+#define WM831X_DCDC_CONTROL_2     1
+#define WM831X_DCDC_ON_CONFIG     2
+#define WM831X_DCDC_SLEEP_CONTROL 3
+
+/*
+ * Shared
+ */
+
+struct wm831x_dcdc {
+	char name[WM831X_DCDC_MAX_NAME];
+	struct regulator_desc desc;
+	int base;
+	struct wm831x *wm831x;
+	struct regulator_dev *regulator;
+};
+
+static int wm831x_dcdc_is_enabled(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int reg;
+
+	reg = wm831x_reg_read(wm831x, WM831X_DCDC_ENABLE);
+	if (reg < 0)
+		return reg;
+
+	if (reg & mask)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_dcdc_enable(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_DCDC_ENABLE, mask, mask);
+}
+
+static int wm831x_dcdc_disable(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_DCDC_ENABLE, mask, 0);
+}
+
+static unsigned int wm831x_dcdc_get_mode(struct regulator_dev *rdev)
+
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	val = (val & WM831X_DC1_ON_MODE_MASK) >> WM831X_DC1_ON_MODE_SHIFT;
+
+	switch (val) {
+	case WM831X_DCDC_MODE_FAST:
+		return REGULATOR_MODE_FAST;
+	case WM831X_DCDC_MODE_NORMAL:
+		return REGULATOR_MODE_NORMAL;
+	case WM831X_DCDC_MODE_STANDBY:
+		return REGULATOR_MODE_STANDBY;
+	case WM831X_DCDC_MODE_IDLE:
+		return REGULATOR_MODE_IDLE;
+	default:
+		BUG();
+	}
+}
+
+static int wm831x_dcdc_set_mode_int(struct wm831x *wm831x, int reg,
+				    unsigned int mode)
+{
+	int val;
+
+	switch (mode) {
+	case REGULATOR_MODE_FAST:
+		val = WM831X_DCDC_MODE_FAST;
+		break;
+	case REGULATOR_MODE_NORMAL:
+		val = WM831X_DCDC_MODE_NORMAL;
+		break;
+	case REGULATOR_MODE_STANDBY:
+		val = WM831X_DCDC_MODE_STANDBY;
+		break;
+	case REGULATOR_MODE_IDLE:
+		val = WM831X_DCDC_MODE_IDLE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_MODE_MASK,
+			       val << WM831X_DC1_ON_MODE_SHIFT);
+}
+
+static int wm831x_dcdc_set_mode(struct regulator_dev *rdev, unsigned int mode)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_dcdc_set_mode_int(wm831x, reg, mode);
+}
+
+static int wm831x_dcdc_set_suspend_mode(struct regulator_dev *rdev,
+					unsigned int mode)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_dcdc_set_mode_int(wm831x, reg, mode);
+}
+
+static int wm831x_dcdc_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	int ret;
+
+	/* First, check for errors */
+	ret = wm831x_reg_read(wm831x, WM831X_DCDC_UV_STATUS);
+	if (ret < 0)
+		return ret;
+
+	if (ret & (1 << rdev_get_id(rdev))) {
+		dev_dbg(wm831x->dev, "DCDC%d under voltage\n",
+			rdev_get_id(rdev) + 1);
+		return REGULATOR_STATUS_ERROR;
+	}
+
+	/* DCDC1 and DCDC2 can additionally detect high voltage/current */
+	if (rdev_get_id(rdev) < 2) {
+		if (ret & (WM831X_DC1_OV_STS << rdev_get_id(rdev))) {
+			dev_dbg(wm831x->dev, "DCDC%d over voltage\n",
+				rdev_get_id(rdev) + 1);
+			return REGULATOR_STATUS_ERROR;
+		}
+
+		if (ret & (WM831X_DC1_HC_STS << rdev_get_id(rdev))) {
+			dev_dbg(wm831x->dev, "DCDC%d over current\n",
+				rdev_get_id(rdev) + 1);
+			return REGULATOR_STATUS_ERROR;
+		}
+	}
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_DCDC_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & (1 << rdev_get_id(rdev))))
+		return REGULATOR_STATUS_OFF;
+
+	/* TODO: When we handle hardware control modes so we can report the
+	 * current mode. */
+	return REGULATOR_STATUS_ON;
+}
+
+static irqreturn_t wm831x_dcdc_uv_irq(int irq, void *data)
+{
+	struct wm831x_dcdc *dcdc = data;
+
+	regulator_notifier_call_chain(dcdc->regulator,
+				      REGULATOR_EVENT_UNDER_VOLTAGE,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t wm831x_dcdc_oc_irq(int irq, void *data)
+{
+	struct wm831x_dcdc *dcdc = data;
+
+	regulator_notifier_call_chain(dcdc->regulator,
+				      REGULATOR_EVENT_OVER_CURRENT,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * BUCKV specifics
+ */
+
+static int wm831x_buckv_list_voltage(struct regulator_dev *rdev,
+				      unsigned selector)
+{
+	if (selector <= 0x8)
+		return 600000;
+	if (selector <= WM831X_BUCKV_MAX_SELECTOR)
+		return 600000 + ((selector - 0x8) * 12500);
+	return -EINVAL;
+}
+
+static int wm831x_buckv_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 vsel;
+
+	if (min_uV < 600000)
+		vsel = 0;
+	else if (min_uV <= 1800000)
+		vsel = ((min_uV - 600000) / 12500) + 8;
+	else
+		return -EINVAL;
+
+	if (wm831x_buckv_list_voltage(rdev, vsel) > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_buckv_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_buckv_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_buckv_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_buckv_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_buckv_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_buckv_list_voltage(rdev, val & WM831X_DC1_ON_VSEL_MASK);
+}
+
+/* Current limit options */
+static u16 wm831x_dcdc_ilim[] = {
+	125, 250, 375, 500, 625, 750, 875, 1000
+};
+
+static int wm831x_buckv_set_current_limit(struct regulator_dev *rdev,
+					   int min_uA, int max_uA)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_CONTROL_2;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wm831x_dcdc_ilim); i++) {
+		if (max_uA <= wm831x_dcdc_ilim[i])
+			break;
+	}
+	if (i == ARRAY_SIZE(wm831x_dcdc_ilim))
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC1_HC_THR_MASK, i);
+}
+
+static int wm831x_buckv_get_current_limit(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_CONTROL_2;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_dcdc_ilim[val & WM831X_DC1_HC_THR_MASK];
+}
+
+static struct regulator_ops wm831x_buckv_ops = {
+	.set_voltage = wm831x_buckv_set_voltage,
+	.get_voltage = wm831x_buckv_get_voltage,
+	.list_voltage = wm831x_buckv_list_voltage,
+	.set_suspend_voltage = wm831x_buckv_set_suspend_voltage,
+	.set_current_limit = wm831x_buckv_set_current_limit,
+	.get_current_limit = wm831x_buckv_get_current_limit,
+
+	.is_enabled = wm831x_dcdc_is_enabled,
+	.enable = wm831x_dcdc_enable,
+	.disable = wm831x_dcdc_disable,
+	.get_status = wm831x_dcdc_get_status,
+	.get_mode = wm831x_dcdc_get_mode,
+	.set_mode = wm831x_dcdc_set_mode,
+	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
+};
+
+static __devinit int wm831x_buckv_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	struct wm831x_dcdc *dcdc;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
+
+	if (pdata == NULL || pdata->dcdc[id] == NULL)
+		return -ENODEV;
+
+	dcdc = kzalloc(sizeof(struct wm831x_dcdc), GFP_KERNEL);
+	if (dcdc == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	dcdc->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	dcdc->base = res->start;
+
+	snprintf(dcdc->name, sizeof(dcdc->name), "DCDC%d", id + 1);
+	dcdc->desc.name = dcdc->name;
+	dcdc->desc.id = id;
+	dcdc->desc.type = REGULATOR_VOLTAGE;
+	dcdc->desc.n_voltages = WM831X_BUCKV_MAX_SELECTOR + 1;
+	dcdc->desc.ops = &wm831x_buckv_ops;
+	dcdc->desc.owner = THIS_MODULE;
+
+	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
+					     pdata->dcdc[id], dcdc);
+	if (IS_ERR(dcdc->regulator)) {
+		ret = PTR_ERR(dcdc->regulator);
+		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	irq = platform_get_irq_byname(pdev, "HC");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_oc_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request HC IRQ %d: %d\n",
+			irq, ret);
+		goto err_uv;
+	}
+
+	platform_set_drvdata(pdev, dcdc);
+
+	return 0;
+
+err_uv:
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+err_regulator:
+	regulator_unregister(dcdc->regulator);
+err:
+	kfree(dcdc);
+	return ret;
+}
+
+static __devexit int wm831x_buckv_remove(struct platform_device *pdev)
+{
+	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "HC"), dcdc);
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	regulator_unregister(dcdc->regulator);
+	kfree(dcdc);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_buckv_driver = {
+	.probe = wm831x_buckv_probe,
+	.remove = __devexit_p(wm831x_buckv_remove),
+	.driver		= {
+		.name	= "wm831x-buckv",
+	},
+};
+
+/*
+ * BUCKP specifics
+ */
+
+static int wm831x_buckp_list_voltage(struct regulator_dev *rdev,
+				      unsigned selector)
+{
+	if (selector <= WM831X_BUCKP_MAX_SELECTOR)
+		return 850000 + (selector * 25000);
+	else
+		return -EINVAL;
+}
+
+static int wm831x_buckp_set_voltage_int(struct regulator_dev *rdev, int reg,
+					int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 vsel;
+
+	if (min_uV <= 34000000)
+		vsel = (min_uV - 850000) / 25000;
+	else
+		return -EINVAL;
+
+	if (wm831x_buckp_list_voltage(rdev, vsel) > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_DC3_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_buckp_set_voltage(struct regulator_dev *rdev,
+				    int min_uV, int max_uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+
+	return wm831x_buckp_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_buckp_set_suspend_voltage(struct regulator_dev *rdev,
+					    int uV)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	u16 reg = dcdc->base + WM831X_DCDC_SLEEP_CONTROL;
+
+	return wm831x_buckp_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_buckp_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_dcdc *dcdc = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+	u16 reg = dcdc->base + WM831X_DCDC_ON_CONFIG;
+	int val;
+
+	val = wm831x_reg_read(wm831x, reg);
+	if (val < 0)
+		return val;
+
+	return wm831x_buckp_list_voltage(rdev, val & WM831X_DC3_ON_VSEL_MASK);
+}
+
+static struct regulator_ops wm831x_buckp_ops = {
+	.set_voltage = wm831x_buckp_set_voltage,
+	.get_voltage = wm831x_buckp_get_voltage,
+	.list_voltage = wm831x_buckp_list_voltage,
+	.set_suspend_voltage = wm831x_buckp_set_suspend_voltage,
+
+	.is_enabled = wm831x_dcdc_is_enabled,
+	.enable = wm831x_dcdc_enable,
+	.disable = wm831x_dcdc_disable,
+	.get_status = wm831x_dcdc_get_status,
+	.get_mode = wm831x_dcdc_get_mode,
+	.set_mode = wm831x_dcdc_set_mode,
+	.set_suspend_mode = wm831x_dcdc_set_suspend_mode,
+};
+
+static __devinit int wm831x_buckp_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->dcdc);
+	struct wm831x_dcdc *dcdc;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing DCDC%d\n", id + 1);
+
+	if (pdata == NULL || pdata->dcdc[id] == NULL)
+		return -ENODEV;
+
+	dcdc = kzalloc(sizeof(struct wm831x_dcdc), GFP_KERNEL);
+	if (dcdc == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	dcdc->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	dcdc->base = res->start;
+
+	snprintf(dcdc->name, sizeof(dcdc->name), "DCDC%d", id + 1);
+	dcdc->desc.name = dcdc->name;
+	dcdc->desc.id = id;
+	dcdc->desc.type = REGULATOR_VOLTAGE;
+	dcdc->desc.n_voltages = WM831X_BUCKP_MAX_SELECTOR + 1;
+	dcdc->desc.ops = &wm831x_buckp_ops;
+	dcdc->desc.owner = THIS_MODULE;
+
+	dcdc->regulator = regulator_register(&dcdc->desc, &pdev->dev,
+					     pdata->dcdc[id], dcdc);
+	if (IS_ERR(dcdc->regulator)) {
+		ret = PTR_ERR(dcdc->regulator);
+		dev_err(wm831x->dev, "Failed to register DCDC%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_dcdc_uv_irq,
+				 IRQF_TRIGGER_RISING, dcdc->name,
+				 dcdc);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, dcdc);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(dcdc->regulator);
+err:
+	kfree(dcdc);
+	return ret;
+}
+
+static __devexit int wm831x_buckp_remove(struct platform_device *pdev)
+{
+	struct wm831x_dcdc *dcdc = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = dcdc->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), dcdc);
+	regulator_unregister(dcdc->regulator);
+	kfree(dcdc);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_buckp_driver = {
+	.probe = wm831x_buckp_probe,
+	.remove = __devexit_p(wm831x_buckp_remove),
+	.driver		= {
+		.name	= "wm831x-buckp",
+	},
+};
+
+static int __init wm831x_dcdc_init(void)
+{
+	int ret;
+	ret = platform_driver_register(&wm831x_buckv_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x BUCKV driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_buckp_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x BUCKP driver: %d\n", ret);
+
+	return 0;
+}
+subsys_initcall(wm831x_dcdc_init);
+
+static void __exit wm831x_dcdc_exit(void)
+{
+	platform_driver_unregister(&wm831x_buckp_driver);
+	platform_driver_unregister(&wm831x_buckv_driver);
+}
+module_exit(wm831x_dcdc_exit);
+
+/* Module information */
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("WM831x DC-DC convertor driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-buckv");
+MODULE_ALIAS("platform:wm831x-buckp");
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index b5d58fb38b4e..c74d6aafdca8 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -15,6 +15,577 @@
 #ifndef __MFD_WM831X_REGULATOR_H__
 #define __MFD_WM831X_REGULATOR_H__
 
+/*
+ * R16462 (0x404E) - Current Sink 1
+ */
+#define WM831X_CS1_ENA                          0x8000  /* CS1_ENA */
+#define WM831X_CS1_ENA_MASK                     0x8000  /* CS1_ENA */
+#define WM831X_CS1_ENA_SHIFT                        15  /* CS1_ENA */
+#define WM831X_CS1_ENA_WIDTH                         1  /* CS1_ENA */
+#define WM831X_CS1_DRIVE                        0x4000  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_MASK                   0x4000  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_SHIFT                      14  /* CS1_DRIVE */
+#define WM831X_CS1_DRIVE_WIDTH                       1  /* CS1_DRIVE */
+#define WM831X_CS1_SLPENA                       0x1000  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_MASK                  0x1000  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_SHIFT                     12  /* CS1_SLPENA */
+#define WM831X_CS1_SLPENA_WIDTH                      1  /* CS1_SLPENA */
+#define WM831X_CS1_OFF_RAMP_MASK                0x0C00  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_OFF_RAMP_SHIFT                   10  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_OFF_RAMP_WIDTH                    2  /* CS1_OFF_RAMP - [11:10] */
+#define WM831X_CS1_ON_RAMP_MASK                 0x0300  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ON_RAMP_SHIFT                     8  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ON_RAMP_WIDTH                     2  /* CS1_ON_RAMP - [9:8] */
+#define WM831X_CS1_ISEL_MASK                    0x003F  /* CS1_ISEL - [5:0] */
+#define WM831X_CS1_ISEL_SHIFT                        0  /* CS1_ISEL - [5:0] */
+#define WM831X_CS1_ISEL_WIDTH                        6  /* CS1_ISEL - [5:0] */
+
+/*
+ * R16463 (0x404F) - Current Sink 2
+ */
+#define WM831X_CS2_ENA                          0x8000  /* CS2_ENA */
+#define WM831X_CS2_ENA_MASK                     0x8000  /* CS2_ENA */
+#define WM831X_CS2_ENA_SHIFT                        15  /* CS2_ENA */
+#define WM831X_CS2_ENA_WIDTH                         1  /* CS2_ENA */
+#define WM831X_CS2_DRIVE                        0x4000  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_MASK                   0x4000  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_SHIFT                      14  /* CS2_DRIVE */
+#define WM831X_CS2_DRIVE_WIDTH                       1  /* CS2_DRIVE */
+#define WM831X_CS2_SLPENA                       0x1000  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_MASK                  0x1000  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_SHIFT                     12  /* CS2_SLPENA */
+#define WM831X_CS2_SLPENA_WIDTH                      1  /* CS2_SLPENA */
+#define WM831X_CS2_OFF_RAMP_MASK                0x0C00  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_OFF_RAMP_SHIFT                   10  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_OFF_RAMP_WIDTH                    2  /* CS2_OFF_RAMP - [11:10] */
+#define WM831X_CS2_ON_RAMP_MASK                 0x0300  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ON_RAMP_SHIFT                     8  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ON_RAMP_WIDTH                     2  /* CS2_ON_RAMP - [9:8] */
+#define WM831X_CS2_ISEL_MASK                    0x003F  /* CS2_ISEL - [5:0] */
+#define WM831X_CS2_ISEL_SHIFT                        0  /* CS2_ISEL - [5:0] */
+#define WM831X_CS2_ISEL_WIDTH                        6  /* CS2_ISEL - [5:0] */
+
+/*
+ * R16464 (0x4050) - DCDC Enable
+ */
+#define WM831X_EPE2_ENA                         0x0080  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_MASK                    0x0080  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_SHIFT                        7  /* EPE2_ENA */
+#define WM831X_EPE2_ENA_WIDTH                        1  /* EPE2_ENA */
+#define WM831X_EPE1_ENA                         0x0040  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_MASK                    0x0040  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_SHIFT                        6  /* EPE1_ENA */
+#define WM831X_EPE1_ENA_WIDTH                        1  /* EPE1_ENA */
+#define WM831X_DC4_ENA                          0x0008  /* DC4_ENA */
+#define WM831X_DC4_ENA_MASK                     0x0008  /* DC4_ENA */
+#define WM831X_DC4_ENA_SHIFT                         3  /* DC4_ENA */
+#define WM831X_DC4_ENA_WIDTH                         1  /* DC4_ENA */
+#define WM831X_DC3_ENA                          0x0004  /* DC3_ENA */
+#define WM831X_DC3_ENA_MASK                     0x0004  /* DC3_ENA */
+#define WM831X_DC3_ENA_SHIFT                         2  /* DC3_ENA */
+#define WM831X_DC3_ENA_WIDTH                         1  /* DC3_ENA */
+#define WM831X_DC2_ENA                          0x0002  /* DC2_ENA */
+#define WM831X_DC2_ENA_MASK                     0x0002  /* DC2_ENA */
+#define WM831X_DC2_ENA_SHIFT                         1  /* DC2_ENA */
+#define WM831X_DC2_ENA_WIDTH                         1  /* DC2_ENA */
+#define WM831X_DC1_ENA                          0x0001  /* DC1_ENA */
+#define WM831X_DC1_ENA_MASK                     0x0001  /* DC1_ENA */
+#define WM831X_DC1_ENA_SHIFT                         0  /* DC1_ENA */
+#define WM831X_DC1_ENA_WIDTH                         1  /* DC1_ENA */
+
+/*
+ * R16465 (0x4051) - LDO Enable
+ */
+#define WM831X_LDO11_ENA                        0x0400  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_MASK                   0x0400  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_SHIFT                      10  /* LDO11_ENA */
+#define WM831X_LDO11_ENA_WIDTH                       1  /* LDO11_ENA */
+#define WM831X_LDO10_ENA                        0x0200  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_MASK                   0x0200  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_SHIFT                       9  /* LDO10_ENA */
+#define WM831X_LDO10_ENA_WIDTH                       1  /* LDO10_ENA */
+#define WM831X_LDO9_ENA                         0x0100  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_MASK                    0x0100  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_SHIFT                        8  /* LDO9_ENA */
+#define WM831X_LDO9_ENA_WIDTH                        1  /* LDO9_ENA */
+#define WM831X_LDO8_ENA                         0x0080  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_MASK                    0x0080  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_SHIFT                        7  /* LDO8_ENA */
+#define WM831X_LDO8_ENA_WIDTH                        1  /* LDO8_ENA */
+#define WM831X_LDO7_ENA                         0x0040  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_MASK                    0x0040  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_SHIFT                        6  /* LDO7_ENA */
+#define WM831X_LDO7_ENA_WIDTH                        1  /* LDO7_ENA */
+#define WM831X_LDO6_ENA                         0x0020  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_MASK                    0x0020  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_SHIFT                        5  /* LDO6_ENA */
+#define WM831X_LDO6_ENA_WIDTH                        1  /* LDO6_ENA */
+#define WM831X_LDO5_ENA                         0x0010  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_MASK                    0x0010  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_SHIFT                        4  /* LDO5_ENA */
+#define WM831X_LDO5_ENA_WIDTH                        1  /* LDO5_ENA */
+#define WM831X_LDO4_ENA                         0x0008  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_MASK                    0x0008  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_SHIFT                        3  /* LDO4_ENA */
+#define WM831X_LDO4_ENA_WIDTH                        1  /* LDO4_ENA */
+#define WM831X_LDO3_ENA                         0x0004  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_MASK                    0x0004  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_SHIFT                        2  /* LDO3_ENA */
+#define WM831X_LDO3_ENA_WIDTH                        1  /* LDO3_ENA */
+#define WM831X_LDO2_ENA                         0x0002  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_MASK                    0x0002  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_SHIFT                        1  /* LDO2_ENA */
+#define WM831X_LDO2_ENA_WIDTH                        1  /* LDO2_ENA */
+#define WM831X_LDO1_ENA                         0x0001  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_MASK                    0x0001  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_SHIFT                        0  /* LDO1_ENA */
+#define WM831X_LDO1_ENA_WIDTH                        1  /* LDO1_ENA */
+
+/*
+ * R16466 (0x4052) - DCDC Status
+ */
+#define WM831X_EPE2_STS                         0x0080  /* EPE2_STS */
+#define WM831X_EPE2_STS_MASK                    0x0080  /* EPE2_STS */
+#define WM831X_EPE2_STS_SHIFT                        7  /* EPE2_STS */
+#define WM831X_EPE2_STS_WIDTH                        1  /* EPE2_STS */
+#define WM831X_EPE1_STS                         0x0040  /* EPE1_STS */
+#define WM831X_EPE1_STS_MASK                    0x0040  /* EPE1_STS */
+#define WM831X_EPE1_STS_SHIFT                        6  /* EPE1_STS */
+#define WM831X_EPE1_STS_WIDTH                        1  /* EPE1_STS */
+#define WM831X_DC4_STS                          0x0008  /* DC4_STS */
+#define WM831X_DC4_STS_MASK                     0x0008  /* DC4_STS */
+#define WM831X_DC4_STS_SHIFT                         3  /* DC4_STS */
+#define WM831X_DC4_STS_WIDTH                         1  /* DC4_STS */
+#define WM831X_DC3_STS                          0x0004  /* DC3_STS */
+#define WM831X_DC3_STS_MASK                     0x0004  /* DC3_STS */
+#define WM831X_DC3_STS_SHIFT                         2  /* DC3_STS */
+#define WM831X_DC3_STS_WIDTH                         1  /* DC3_STS */
+#define WM831X_DC2_STS                          0x0002  /* DC2_STS */
+#define WM831X_DC2_STS_MASK                     0x0002  /* DC2_STS */
+#define WM831X_DC2_STS_SHIFT                         1  /* DC2_STS */
+#define WM831X_DC2_STS_WIDTH                         1  /* DC2_STS */
+#define WM831X_DC1_STS                          0x0001  /* DC1_STS */
+#define WM831X_DC1_STS_MASK                     0x0001  /* DC1_STS */
+#define WM831X_DC1_STS_SHIFT                         0  /* DC1_STS */
+#define WM831X_DC1_STS_WIDTH                         1  /* DC1_STS */
+
+/*
+ * R16467 (0x4053) - LDO Status
+ */
+#define WM831X_LDO11_STS                        0x0400  /* LDO11_STS */
+#define WM831X_LDO11_STS_MASK                   0x0400  /* LDO11_STS */
+#define WM831X_LDO11_STS_SHIFT                      10  /* LDO11_STS */
+#define WM831X_LDO11_STS_WIDTH                       1  /* LDO11_STS */
+#define WM831X_LDO10_STS                        0x0200  /* LDO10_STS */
+#define WM831X_LDO10_STS_MASK                   0x0200  /* LDO10_STS */
+#define WM831X_LDO10_STS_SHIFT                       9  /* LDO10_STS */
+#define WM831X_LDO10_STS_WIDTH                       1  /* LDO10_STS */
+#define WM831X_LDO9_STS                         0x0100  /* LDO9_STS */
+#define WM831X_LDO9_STS_MASK                    0x0100  /* LDO9_STS */
+#define WM831X_LDO9_STS_SHIFT                        8  /* LDO9_STS */
+#define WM831X_LDO9_STS_WIDTH                        1  /* LDO9_STS */
+#define WM831X_LDO8_STS                         0x0080  /* LDO8_STS */
+#define WM831X_LDO8_STS_MASK                    0x0080  /* LDO8_STS */
+#define WM831X_LDO8_STS_SHIFT                        7  /* LDO8_STS */
+#define WM831X_LDO8_STS_WIDTH                        1  /* LDO8_STS */
+#define WM831X_LDO7_STS                         0x0040  /* LDO7_STS */
+#define WM831X_LDO7_STS_MASK                    0x0040  /* LDO7_STS */
+#define WM831X_LDO7_STS_SHIFT                        6  /* LDO7_STS */
+#define WM831X_LDO7_STS_WIDTH                        1  /* LDO7_STS */
+#define WM831X_LDO6_STS                         0x0020  /* LDO6_STS */
+#define WM831X_LDO6_STS_MASK                    0x0020  /* LDO6_STS */
+#define WM831X_LDO6_STS_SHIFT                        5  /* LDO6_STS */
+#define WM831X_LDO6_STS_WIDTH                        1  /* LDO6_STS */
+#define WM831X_LDO5_STS                         0x0010  /* LDO5_STS */
+#define WM831X_LDO5_STS_MASK                    0x0010  /* LDO5_STS */
+#define WM831X_LDO5_STS_SHIFT                        4  /* LDO5_STS */
+#define WM831X_LDO5_STS_WIDTH                        1  /* LDO5_STS */
+#define WM831X_LDO4_STS                         0x0008  /* LDO4_STS */
+#define WM831X_LDO4_STS_MASK                    0x0008  /* LDO4_STS */
+#define WM831X_LDO4_STS_SHIFT                        3  /* LDO4_STS */
+#define WM831X_LDO4_STS_WIDTH                        1  /* LDO4_STS */
+#define WM831X_LDO3_STS                         0x0004  /* LDO3_STS */
+#define WM831X_LDO3_STS_MASK                    0x0004  /* LDO3_STS */
+#define WM831X_LDO3_STS_SHIFT                        2  /* LDO3_STS */
+#define WM831X_LDO3_STS_WIDTH                        1  /* LDO3_STS */
+#define WM831X_LDO2_STS                         0x0002  /* LDO2_STS */
+#define WM831X_LDO2_STS_MASK                    0x0002  /* LDO2_STS */
+#define WM831X_LDO2_STS_SHIFT                        1  /* LDO2_STS */
+#define WM831X_LDO2_STS_WIDTH                        1  /* LDO2_STS */
+#define WM831X_LDO1_STS                         0x0001  /* LDO1_STS */
+#define WM831X_LDO1_STS_MASK                    0x0001  /* LDO1_STS */
+#define WM831X_LDO1_STS_SHIFT                        0  /* LDO1_STS */
+#define WM831X_LDO1_STS_WIDTH                        1  /* LDO1_STS */
+
+/*
+ * R16468 (0x4054) - DCDC UV Status
+ */
+#define WM831X_DC2_OV_STS                       0x2000  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_MASK                  0x2000  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_SHIFT                     13  /* DC2_OV_STS */
+#define WM831X_DC2_OV_STS_WIDTH                      1  /* DC2_OV_STS */
+#define WM831X_DC1_OV_STS                       0x1000  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_MASK                  0x1000  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_SHIFT                     12  /* DC1_OV_STS */
+#define WM831X_DC1_OV_STS_WIDTH                      1  /* DC1_OV_STS */
+#define WM831X_DC2_HC_STS                       0x0200  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_MASK                  0x0200  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_SHIFT                      9  /* DC2_HC_STS */
+#define WM831X_DC2_HC_STS_WIDTH                      1  /* DC2_HC_STS */
+#define WM831X_DC1_HC_STS                       0x0100  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_MASK                  0x0100  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_SHIFT                      8  /* DC1_HC_STS */
+#define WM831X_DC1_HC_STS_WIDTH                      1  /* DC1_HC_STS */
+#define WM831X_DC4_UV_STS                       0x0008  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_MASK                  0x0008  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_SHIFT                      3  /* DC4_UV_STS */
+#define WM831X_DC4_UV_STS_WIDTH                      1  /* DC4_UV_STS */
+#define WM831X_DC3_UV_STS                       0x0004  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_MASK                  0x0004  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_SHIFT                      2  /* DC3_UV_STS */
+#define WM831X_DC3_UV_STS_WIDTH                      1  /* DC3_UV_STS */
+#define WM831X_DC2_UV_STS                       0x0002  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_MASK                  0x0002  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_SHIFT                      1  /* DC2_UV_STS */
+#define WM831X_DC2_UV_STS_WIDTH                      1  /* DC2_UV_STS */
+#define WM831X_DC1_UV_STS                       0x0001  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_MASK                  0x0001  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_SHIFT                      0  /* DC1_UV_STS */
+#define WM831X_DC1_UV_STS_WIDTH                      1  /* DC1_UV_STS */
+
+/*
+ * R16469 (0x4055) - LDO UV Status
+ */
+#define WM831X_INTLDO_UV_STS                    0x8000  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_MASK               0x8000  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_SHIFT                  15  /* INTLDO_UV_STS */
+#define WM831X_INTLDO_UV_STS_WIDTH                   1  /* INTLDO_UV_STS */
+#define WM831X_LDO10_UV_STS                     0x0200  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_MASK                0x0200  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_SHIFT                    9  /* LDO10_UV_STS */
+#define WM831X_LDO10_UV_STS_WIDTH                    1  /* LDO10_UV_STS */
+#define WM831X_LDO9_UV_STS                      0x0100  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_MASK                 0x0100  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_SHIFT                     8  /* LDO9_UV_STS */
+#define WM831X_LDO9_UV_STS_WIDTH                     1  /* LDO9_UV_STS */
+#define WM831X_LDO8_UV_STS                      0x0080  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_MASK                 0x0080  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_SHIFT                     7  /* LDO8_UV_STS */
+#define WM831X_LDO8_UV_STS_WIDTH                     1  /* LDO8_UV_STS */
+#define WM831X_LDO7_UV_STS                      0x0040  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_MASK                 0x0040  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_SHIFT                     6  /* LDO7_UV_STS */
+#define WM831X_LDO7_UV_STS_WIDTH                     1  /* LDO7_UV_STS */
+#define WM831X_LDO6_UV_STS                      0x0020  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_MASK                 0x0020  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_SHIFT                     5  /* LDO6_UV_STS */
+#define WM831X_LDO6_UV_STS_WIDTH                     1  /* LDO6_UV_STS */
+#define WM831X_LDO5_UV_STS                      0x0010  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_MASK                 0x0010  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_SHIFT                     4  /* LDO5_UV_STS */
+#define WM831X_LDO5_UV_STS_WIDTH                     1  /* LDO5_UV_STS */
+#define WM831X_LDO4_UV_STS                      0x0008  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_MASK                 0x0008  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_SHIFT                     3  /* LDO4_UV_STS */
+#define WM831X_LDO4_UV_STS_WIDTH                     1  /* LDO4_UV_STS */
+#define WM831X_LDO3_UV_STS                      0x0004  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_MASK                 0x0004  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_SHIFT                     2  /* LDO3_UV_STS */
+#define WM831X_LDO3_UV_STS_WIDTH                     1  /* LDO3_UV_STS */
+#define WM831X_LDO2_UV_STS                      0x0002  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_MASK                 0x0002  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_SHIFT                     1  /* LDO2_UV_STS */
+#define WM831X_LDO2_UV_STS_WIDTH                     1  /* LDO2_UV_STS */
+#define WM831X_LDO1_UV_STS                      0x0001  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_MASK                 0x0001  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_SHIFT                     0  /* LDO1_UV_STS */
+#define WM831X_LDO1_UV_STS_WIDTH                     1  /* LDO1_UV_STS */
+
+/*
+ * R16470 (0x4056) - DC1 Control 1
+ */
+#define WM831X_DC1_RATE_MASK                    0xC000  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_RATE_SHIFT                       14  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_RATE_WIDTH                        2  /* DC1_RATE - [15:14] */
+#define WM831X_DC1_PHASE                        0x1000  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_MASK                   0x1000  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_SHIFT                      12  /* DC1_PHASE */
+#define WM831X_DC1_PHASE_WIDTH                       1  /* DC1_PHASE */
+#define WM831X_DC1_FREQ_MASK                    0x0300  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FREQ_SHIFT                        8  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FREQ_WIDTH                        2  /* DC1_FREQ - [9:8] */
+#define WM831X_DC1_FLT                          0x0080  /* DC1_FLT */
+#define WM831X_DC1_FLT_MASK                     0x0080  /* DC1_FLT */
+#define WM831X_DC1_FLT_SHIFT                         7  /* DC1_FLT */
+#define WM831X_DC1_FLT_WIDTH                         1  /* DC1_FLT */
+#define WM831X_DC1_SOFT_START_MASK              0x0030  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_SOFT_START_SHIFT                  4  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_SOFT_START_WIDTH                  2  /* DC1_SOFT_START - [5:4] */
+#define WM831X_DC1_CAP_MASK                     0x0003  /* DC1_CAP - [1:0] */
+#define WM831X_DC1_CAP_SHIFT                         0  /* DC1_CAP - [1:0] */
+#define WM831X_DC1_CAP_WIDTH                         2  /* DC1_CAP - [1:0] */
+
+/*
+ * R16471 (0x4057) - DC1 Control 2
+ */
+#define WM831X_DC1_ERR_ACT_MASK                 0xC000  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_ERR_ACT_SHIFT                    14  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_ERR_ACT_WIDTH                     2  /* DC1_ERR_ACT - [15:14] */
+#define WM831X_DC1_HWC_SRC_MASK                 0x1800  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_SRC_SHIFT                    11  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_SRC_WIDTH                     2  /* DC1_HWC_SRC - [12:11] */
+#define WM831X_DC1_HWC_VSEL                     0x0400  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_MASK                0x0400  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_SHIFT                   10  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_VSEL_WIDTH                    1  /* DC1_HWC_VSEL */
+#define WM831X_DC1_HWC_MODE_MASK                0x0300  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HWC_MODE_SHIFT                    8  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HWC_MODE_WIDTH                    2  /* DC1_HWC_MODE - [9:8] */
+#define WM831X_DC1_HC_THR_MASK                  0x0070  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_THR_SHIFT                      4  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_THR_WIDTH                      3  /* DC1_HC_THR - [6:4] */
+#define WM831X_DC1_HC_IND_ENA                   0x0001  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_MASK              0x0001  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_SHIFT                  0  /* DC1_HC_IND_ENA */
+#define WM831X_DC1_HC_IND_ENA_WIDTH                  1  /* DC1_HC_IND_ENA */
+
+/*
+ * R16472 (0x4058) - DC1 ON Config
+ */
+#define WM831X_DC1_ON_SLOT_MASK                 0xE000  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_SLOT_SHIFT                    13  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_SLOT_WIDTH                     3  /* DC1_ON_SLOT - [15:13] */
+#define WM831X_DC1_ON_MODE_MASK                 0x0300  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_MODE_SHIFT                     8  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_MODE_WIDTH                     2  /* DC1_ON_MODE - [9:8] */
+#define WM831X_DC1_ON_VSEL_MASK                 0x007F  /* DC1_ON_VSEL - [6:0] */
+#define WM831X_DC1_ON_VSEL_SHIFT                     0  /* DC1_ON_VSEL - [6:0] */
+#define WM831X_DC1_ON_VSEL_WIDTH                     7  /* DC1_ON_VSEL - [6:0] */
+
+/*
+ * R16473 (0x4059) - DC1 SLEEP Control
+ */
+#define WM831X_DC1_SLP_SLOT_MASK                0xE000  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_SLOT_SHIFT                   13  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_SLOT_WIDTH                    3  /* DC1_SLP_SLOT - [15:13] */
+#define WM831X_DC1_SLP_MODE_MASK                0x0300  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_MODE_SHIFT                    8  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_MODE_WIDTH                    2  /* DC1_SLP_MODE - [9:8] */
+#define WM831X_DC1_SLP_VSEL_MASK                0x007F  /* DC1_SLP_VSEL - [6:0] */
+#define WM831X_DC1_SLP_VSEL_SHIFT                    0  /* DC1_SLP_VSEL - [6:0] */
+#define WM831X_DC1_SLP_VSEL_WIDTH                    7  /* DC1_SLP_VSEL - [6:0] */
+
+/*
+ * R16474 (0x405A) - DC1 DVS Control
+ */
+#define WM831X_DC1_DVS_SRC_MASK                 0x1800  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_SRC_SHIFT                    11  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_SRC_WIDTH                     2  /* DC1_DVS_SRC - [12:11] */
+#define WM831X_DC1_DVS_VSEL_MASK                0x007F  /* DC1_DVS_VSEL - [6:0] */
+#define WM831X_DC1_DVS_VSEL_SHIFT                    0  /* DC1_DVS_VSEL - [6:0] */
+#define WM831X_DC1_DVS_VSEL_WIDTH                    7  /* DC1_DVS_VSEL - [6:0] */
+
+/*
+ * R16475 (0x405B) - DC2 Control 1
+ */
+#define WM831X_DC2_RATE_MASK                    0xC000  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_RATE_SHIFT                       14  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_RATE_WIDTH                        2  /* DC2_RATE - [15:14] */
+#define WM831X_DC2_PHASE                        0x1000  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_MASK                   0x1000  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_SHIFT                      12  /* DC2_PHASE */
+#define WM831X_DC2_PHASE_WIDTH                       1  /* DC2_PHASE */
+#define WM831X_DC2_FREQ_MASK                    0x0300  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FREQ_SHIFT                        8  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FREQ_WIDTH                        2  /* DC2_FREQ - [9:8] */
+#define WM831X_DC2_FLT                          0x0080  /* DC2_FLT */
+#define WM831X_DC2_FLT_MASK                     0x0080  /* DC2_FLT */
+#define WM831X_DC2_FLT_SHIFT                         7  /* DC2_FLT */
+#define WM831X_DC2_FLT_WIDTH                         1  /* DC2_FLT */
+#define WM831X_DC2_SOFT_START_MASK              0x0030  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_SOFT_START_SHIFT                  4  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_SOFT_START_WIDTH                  2  /* DC2_SOFT_START - [5:4] */
+#define WM831X_DC2_CAP_MASK                     0x0003  /* DC2_CAP - [1:0] */
+#define WM831X_DC2_CAP_SHIFT                         0  /* DC2_CAP - [1:0] */
+#define WM831X_DC2_CAP_WIDTH                         2  /* DC2_CAP - [1:0] */
+
+/*
+ * R16476 (0x405C) - DC2 Control 2
+ */
+#define WM831X_DC2_ERR_ACT_MASK                 0xC000  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_ERR_ACT_SHIFT                    14  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_ERR_ACT_WIDTH                     2  /* DC2_ERR_ACT - [15:14] */
+#define WM831X_DC2_HWC_SRC_MASK                 0x1800  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_SRC_SHIFT                    11  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_SRC_WIDTH                     2  /* DC2_HWC_SRC - [12:11] */
+#define WM831X_DC2_HWC_VSEL                     0x0400  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_MASK                0x0400  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_SHIFT                   10  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_VSEL_WIDTH                    1  /* DC2_HWC_VSEL */
+#define WM831X_DC2_HWC_MODE_MASK                0x0300  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HWC_MODE_SHIFT                    8  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HWC_MODE_WIDTH                    2  /* DC2_HWC_MODE - [9:8] */
+#define WM831X_DC2_HC_THR_MASK                  0x0070  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_THR_SHIFT                      4  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_THR_WIDTH                      3  /* DC2_HC_THR - [6:4] */
+#define WM831X_DC2_HC_IND_ENA                   0x0001  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_MASK              0x0001  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_SHIFT                  0  /* DC2_HC_IND_ENA */
+#define WM831X_DC2_HC_IND_ENA_WIDTH                  1  /* DC2_HC_IND_ENA */
+
+/*
+ * R16477 (0x405D) - DC2 ON Config
+ */
+#define WM831X_DC2_ON_SLOT_MASK                 0xE000  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_SLOT_SHIFT                    13  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_SLOT_WIDTH                     3  /* DC2_ON_SLOT - [15:13] */
+#define WM831X_DC2_ON_MODE_MASK                 0x0300  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_MODE_SHIFT                     8  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_MODE_WIDTH                     2  /* DC2_ON_MODE - [9:8] */
+#define WM831X_DC2_ON_VSEL_MASK                 0x007F  /* DC2_ON_VSEL - [6:0] */
+#define WM831X_DC2_ON_VSEL_SHIFT                     0  /* DC2_ON_VSEL - [6:0] */
+#define WM831X_DC2_ON_VSEL_WIDTH                     7  /* DC2_ON_VSEL - [6:0] */
+
+/*
+ * R16478 (0x405E) - DC2 SLEEP Control
+ */
+#define WM831X_DC2_SLP_SLOT_MASK                0xE000  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_SLOT_SHIFT                   13  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_SLOT_WIDTH                    3  /* DC2_SLP_SLOT - [15:13] */
+#define WM831X_DC2_SLP_MODE_MASK                0x0300  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_MODE_SHIFT                    8  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_MODE_WIDTH                    2  /* DC2_SLP_MODE - [9:8] */
+#define WM831X_DC2_SLP_VSEL_MASK                0x007F  /* DC2_SLP_VSEL - [6:0] */
+#define WM831X_DC2_SLP_VSEL_SHIFT                    0  /* DC2_SLP_VSEL - [6:0] */
+#define WM831X_DC2_SLP_VSEL_WIDTH                    7  /* DC2_SLP_VSEL - [6:0] */
+
+/*
+ * R16479 (0x405F) - DC2 DVS Control
+ */
+#define WM831X_DC2_DVS_SRC_MASK                 0x1800  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_SRC_SHIFT                    11  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_SRC_WIDTH                     2  /* DC2_DVS_SRC - [12:11] */
+#define WM831X_DC2_DVS_VSEL_MASK                0x007F  /* DC2_DVS_VSEL - [6:0] */
+#define WM831X_DC2_DVS_VSEL_SHIFT                    0  /* DC2_DVS_VSEL - [6:0] */
+#define WM831X_DC2_DVS_VSEL_WIDTH                    7  /* DC2_DVS_VSEL - [6:0] */
+
+/*
+ * R16480 (0x4060) - DC3 Control 1
+ */
+#define WM831X_DC3_PHASE                        0x1000  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_MASK                   0x1000  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_SHIFT                      12  /* DC3_PHASE */
+#define WM831X_DC3_PHASE_WIDTH                       1  /* DC3_PHASE */
+#define WM831X_DC3_FLT                          0x0080  /* DC3_FLT */
+#define WM831X_DC3_FLT_MASK                     0x0080  /* DC3_FLT */
+#define WM831X_DC3_FLT_SHIFT                         7  /* DC3_FLT */
+#define WM831X_DC3_FLT_WIDTH                         1  /* DC3_FLT */
+#define WM831X_DC3_SOFT_START_MASK              0x0030  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_SOFT_START_SHIFT                  4  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_SOFT_START_WIDTH                  2  /* DC3_SOFT_START - [5:4] */
+#define WM831X_DC3_STNBY_LIM_MASK               0x000C  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_STNBY_LIM_SHIFT                   2  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_STNBY_LIM_WIDTH                   2  /* DC3_STNBY_LIM - [3:2] */
+#define WM831X_DC3_CAP_MASK                     0x0003  /* DC3_CAP - [1:0] */
+#define WM831X_DC3_CAP_SHIFT                         0  /* DC3_CAP - [1:0] */
+#define WM831X_DC3_CAP_WIDTH                         2  /* DC3_CAP - [1:0] */
+
+/*
+ * R16481 (0x4061) - DC3 Control 2
+ */
+#define WM831X_DC3_ERR_ACT_MASK                 0xC000  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_ERR_ACT_SHIFT                    14  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_ERR_ACT_WIDTH                     2  /* DC3_ERR_ACT - [15:14] */
+#define WM831X_DC3_HWC_SRC_MASK                 0x1800  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_SRC_SHIFT                    11  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_SRC_WIDTH                     2  /* DC3_HWC_SRC - [12:11] */
+#define WM831X_DC3_HWC_VSEL                     0x0400  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_MASK                0x0400  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_SHIFT                   10  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_VSEL_WIDTH                    1  /* DC3_HWC_VSEL */
+#define WM831X_DC3_HWC_MODE_MASK                0x0300  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_HWC_MODE_SHIFT                    8  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_HWC_MODE_WIDTH                    2  /* DC3_HWC_MODE - [9:8] */
+#define WM831X_DC3_OVP                          0x0080  /* DC3_OVP */
+#define WM831X_DC3_OVP_MASK                     0x0080  /* DC3_OVP */
+#define WM831X_DC3_OVP_SHIFT                         7  /* DC3_OVP */
+#define WM831X_DC3_OVP_WIDTH                         1  /* DC3_OVP */
+
+/*
+ * R16482 (0x4062) - DC3 ON Config
+ */
+#define WM831X_DC3_ON_SLOT_MASK                 0xE000  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_SLOT_SHIFT                    13  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_SLOT_WIDTH                     3  /* DC3_ON_SLOT - [15:13] */
+#define WM831X_DC3_ON_MODE_MASK                 0x0300  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_MODE_SHIFT                     8  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_MODE_WIDTH                     2  /* DC3_ON_MODE - [9:8] */
+#define WM831X_DC3_ON_VSEL_MASK                 0x007F  /* DC3_ON_VSEL - [6:0] */
+#define WM831X_DC3_ON_VSEL_SHIFT                     0  /* DC3_ON_VSEL - [6:0] */
+#define WM831X_DC3_ON_VSEL_WIDTH                     7  /* DC3_ON_VSEL - [6:0] */
+
+/*
+ * R16483 (0x4063) - DC3 SLEEP Control
+ */
+#define WM831X_DC3_SLP_SLOT_MASK                0xE000  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_SLOT_SHIFT                   13  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_SLOT_WIDTH                    3  /* DC3_SLP_SLOT - [15:13] */
+#define WM831X_DC3_SLP_MODE_MASK                0x0300  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_MODE_SHIFT                    8  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_MODE_WIDTH                    2  /* DC3_SLP_MODE - [9:8] */
+#define WM831X_DC3_SLP_VSEL_MASK                0x007F  /* DC3_SLP_VSEL - [6:0] */
+#define WM831X_DC3_SLP_VSEL_SHIFT                    0  /* DC3_SLP_VSEL - [6:0] */
+#define WM831X_DC3_SLP_VSEL_WIDTH                    7  /* DC3_SLP_VSEL - [6:0] */
+
+/*
+ * R16484 (0x4064) - DC4 Control
+ */
+#define WM831X_DC4_ERR_ACT_MASK                 0xC000  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_ERR_ACT_SHIFT                    14  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_ERR_ACT_WIDTH                     2  /* DC4_ERR_ACT - [15:14] */
+#define WM831X_DC4_HWC_SRC_MASK                 0x1800  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_SRC_SHIFT                    11  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_SRC_WIDTH                     2  /* DC4_HWC_SRC - [12:11] */
+#define WM831X_DC4_HWC_MODE                     0x0100  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_MASK                0x0100  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_SHIFT                    8  /* DC4_HWC_MODE */
+#define WM831X_DC4_HWC_MODE_WIDTH                    1  /* DC4_HWC_MODE */
+#define WM831X_DC4_RANGE_MASK                   0x000C  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_RANGE_SHIFT                       2  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_RANGE_WIDTH                       2  /* DC4_RANGE - [3:2] */
+#define WM831X_DC4_FBSRC                        0x0001  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_MASK                   0x0001  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_SHIFT                       0  /* DC4_FBSRC */
+#define WM831X_DC4_FBSRC_WIDTH                       1  /* DC4_FBSRC */
+
+/*
+ * R16485 (0x4065) - DC4 SLEEP Control
+ */
+#define WM831X_DC4_SLPENA                       0x0100  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_MASK                  0x0100  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_SHIFT                      8  /* DC4_SLPENA */
+#define WM831X_DC4_SLPENA_WIDTH                      1  /* DC4_SLPENA */
+
+/*
+ * R16526 (0x408E) - Power Good Source 1
+ */
+#define WM831X_DC4_OK                           0x0008  /* DC4_OK */
+#define WM831X_DC4_OK_MASK                      0x0008  /* DC4_OK */
+#define WM831X_DC4_OK_SHIFT                          3  /* DC4_OK */
+#define WM831X_DC4_OK_WIDTH                          1  /* DC4_OK */
+#define WM831X_DC3_OK                           0x0004  /* DC3_OK */
+#define WM831X_DC3_OK_MASK                      0x0004  /* DC3_OK */
+#define WM831X_DC3_OK_SHIFT                          2  /* DC3_OK */
+#define WM831X_DC3_OK_WIDTH                          1  /* DC3_OK */
+#define WM831X_DC2_OK                           0x0002  /* DC2_OK */
+#define WM831X_DC2_OK_MASK                      0x0002  /* DC2_OK */
+#define WM831X_DC2_OK_SHIFT                          1  /* DC2_OK */
+#define WM831X_DC2_OK_WIDTH                          1  /* DC2_OK */
+#define WM831X_DC1_OK                           0x0001  /* DC1_OK */
+#define WM831X_DC1_OK_MASK                      0x0001  /* DC1_OK */
+#define WM831X_DC1_OK_SHIFT                          0  /* DC1_OK */
+#define WM831X_DC1_OK_WIDTH                          1  /* DC1_OK */
+
 #define WM831X_ISINK_MAX_ISEL 56
 extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
 
-- 
cgit v1.2.3


From d1c6b4fe668b2ae02f490deee86eaab60822a362 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 28 Jul 2009 15:22:02 +0100
Subject: regulator: Add WM831x LDO support

The WM831x series of devices provide three types of LDO:

 - General purpose LDOs supporting voltages from 0.9-3.3V
 - High performance analogue LDOs supporting voltages from 1-3.5V
 - Very low power consumption LDOs intended to support always on
   functionality.

This patch adds support for all three kinds of LDO. Each regulator
is probed as an individual platform device with resources used to
provide the register map location of the regulator. Mixed hardware
and software control of regulators is not current supported.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/regulator/Makefile           |   1 +
 drivers/regulator/wm831x-ldo.c       | 852 +++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/regulator.h | 626 +++++++++++++++++++++++++
 3 files changed, 1479 insertions(+)
 create mode 100644 drivers/regulator/wm831x-ldo.c

(limited to 'include')

diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index b1d2b826f532..a0a635fdae87 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_MAX1586) += max1586.o
 obj-$(CONFIG_REGULATOR_TWL4030) += twl4030-regulator.o
 obj-$(CONFIG_REGULATOR_WM831X) += wm831x-dcdc.o
+obj-$(CONFIG_REGULATOR_WM831X) += wm831x-ldo.o
 obj-$(CONFIG_REGULATOR_WM8350) += wm8350-regulator.o
 obj-$(CONFIG_REGULATOR_WM8400) += wm8400-regulator.o
 obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
diff --git a/drivers/regulator/wm831x-ldo.c b/drivers/regulator/wm831x-ldo.c
new file mode 100644
index 000000000000..bb61aede4801
--- /dev/null
+++ b/drivers/regulator/wm831x-ldo.c
@@ -0,0 +1,852 @@
+/*
+ * wm831x-ldo.c  --  LDO driver for the WM831x series
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/regulator.h>
+#include <linux/mfd/wm831x/pdata.h>
+
+#define WM831X_LDO_MAX_NAME 6
+
+#define WM831X_LDO_CONTROL       0
+#define WM831X_LDO_ON_CONTROL    1
+#define WM831X_LDO_SLEEP_CONTROL 2
+
+#define WM831X_ALIVE_LDO_ON_CONTROL    0
+#define WM831X_ALIVE_LDO_SLEEP_CONTROL 1
+
+struct wm831x_ldo {
+	char name[WM831X_LDO_MAX_NAME];
+	struct regulator_desc desc;
+	int base;
+	struct wm831x *wm831x;
+	struct regulator_dev *regulator;
+};
+
+/*
+ * Shared
+ */
+
+static int wm831x_ldo_is_enabled(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int reg;
+
+	reg = wm831x_reg_read(wm831x, WM831X_LDO_ENABLE);
+	if (reg < 0)
+		return reg;
+
+	if (reg & mask)
+		return 1;
+	else
+		return 0;
+}
+
+static int wm831x_ldo_enable(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_LDO_ENABLE, mask, mask);
+}
+
+static int wm831x_ldo_disable(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+
+	return wm831x_set_bits(wm831x, WM831X_LDO_ENABLE, mask, 0);
+}
+
+static irqreturn_t wm831x_ldo_uv_irq(int irq, void *data)
+{
+	struct wm831x_ldo *ldo = data;
+
+	regulator_notifier_call_chain(ldo->regulator,
+				      REGULATOR_EVENT_UNDER_VOLTAGE,
+				      NULL);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * General purpose LDOs
+ */
+
+#define WM831X_GP_LDO_SELECTOR_LOW 0xe
+#define WM831X_GP_LDO_MAX_SELECTOR 0x1f
+
+static int wm831x_gp_ldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 0.9-1.6V in 50mV steps */
+	if (selector <= WM831X_GP_LDO_SELECTOR_LOW)
+		return 900000 + (selector * 50000);
+	/* 1.7-3.3V in 50mV steps */
+	if (selector <= WM831X_GP_LDO_MAX_SELECTOR)
+		return 1600000 + ((selector - WM831X_GP_LDO_SELECTOR_LOW)
+				  * 100000);
+	return -EINVAL;
+}
+
+static int wm831x_gp_ldo_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	if (min_uV < 900000)
+		vsel = 0;
+	else if (min_uV < 1700000)
+		vsel = ((min_uV - 900000) / 50000);
+	else
+		vsel = ((min_uV - 1700000) / 100000)
+			+ WM831X_GP_LDO_SELECTOR_LOW + 1;
+
+	ret = wm831x_gp_ldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO1_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_gp_ldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+
+	return wm831x_gp_ldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_gp_ldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_SLEEP_CONTROL;
+
+	return wm831x_gp_ldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_gp_ldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO1_ON_VSEL_MASK;
+
+	return wm831x_gp_ldo_list_voltage(rdev, ret);
+}
+
+static unsigned int wm831x_gp_ldo_get_mode(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	unsigned int ret;
+
+	ret = wm831x_reg_read(wm831x, on_reg);
+	if (ret < 0)
+		return 0;
+
+	if (!(ret & WM831X_LDO1_ON_MODE))
+		return REGULATOR_MODE_NORMAL;
+
+	ret = wm831x_reg_read(wm831x, ctrl_reg);
+	if (ret < 0)
+		return 0;
+
+	if (ret & WM831X_LDO1_LP_MODE)
+		return REGULATOR_MODE_STANDBY;
+	else
+		return REGULATOR_MODE_IDLE;
+}
+
+static int wm831x_gp_ldo_set_mode(struct regulator_dev *rdev,
+				  unsigned int mode)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE, 0);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case REGULATOR_MODE_IDLE:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO1_LP_MODE,
+				      WM831X_LDO1_LP_MODE);
+		if (ret < 0)
+			return ret;
+
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE,
+				      WM831X_LDO1_ON_MODE);
+		if (ret < 0)
+			return ret;
+
+	case REGULATOR_MODE_STANDBY:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO1_LP_MODE, 0);
+		if (ret < 0)
+			return ret;
+
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO1_ON_MODE,
+				      WM831X_LDO1_ON_MODE);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm831x_gp_ldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & mask))
+		return REGULATOR_STATUS_OFF;
+
+	/* Is it reporting under voltage? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS);
+	if (ret & mask)
+		return REGULATOR_STATUS_ERROR;
+
+	ret = wm831x_gp_ldo_get_mode(rdev);
+	if (ret < 0)
+		return ret;
+	else
+		return regulator_mode_to_status(ret);
+}
+
+static unsigned int wm831x_gp_ldo_get_optimum_mode(struct regulator_dev *rdev,
+						   int input_uV,
+						   int output_uV, int load_uA)
+{
+	if (load_uA < 20000)
+		return REGULATOR_MODE_STANDBY;
+	if (load_uA < 50000)
+		return REGULATOR_MODE_IDLE;
+	return REGULATOR_MODE_NORMAL;
+}
+
+
+static struct regulator_ops wm831x_gp_ldo_ops = {
+	.list_voltage = wm831x_gp_ldo_list_voltage,
+	.get_voltage = wm831x_gp_ldo_get_voltage,
+	.set_voltage = wm831x_gp_ldo_set_voltage,
+	.set_suspend_voltage = wm831x_gp_ldo_set_suspend_voltage,
+	.get_mode = wm831x_gp_ldo_get_mode,
+	.set_mode = wm831x_gp_ldo_set_mode,
+	.get_status = wm831x_gp_ldo_get_status,
+	.get_optimum_mode = wm831x_gp_ldo_get_optimum_mode,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_gp_ldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_GP_LDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_gp_ldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
+				 IRQF_TRIGGER_RISING, ldo->name,
+				 ldo);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(ldo->regulator);
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_gp_ldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = ldo->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_gp_ldo_driver = {
+	.probe = wm831x_gp_ldo_probe,
+	.remove = __devexit_p(wm831x_gp_ldo_remove),
+	.driver		= {
+		.name	= "wm831x-ldo",
+	},
+};
+
+/*
+ * Analogue LDOs
+ */
+
+
+#define WM831X_ALDO_SELECTOR_LOW 0xc
+#define WM831X_ALDO_MAX_SELECTOR 0x1f
+
+static int wm831x_aldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 1-1.6V in 50mV steps */
+	if (selector <= WM831X_ALDO_SELECTOR_LOW)
+		return 1000000 + (selector * 50000);
+	/* 1.7-3.5V in 50mV steps */
+	if (selector <= WM831X_ALDO_MAX_SELECTOR)
+		return 1600000 + ((selector - WM831X_ALDO_SELECTOR_LOW)
+				  * 100000);
+	return -EINVAL;
+}
+
+static int wm831x_aldo_set_voltage_int(struct regulator_dev *rdev, int reg,
+					 int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	if (min_uV < 1000000)
+		vsel = 0;
+	else if (min_uV < 1700000)
+		vsel = ((min_uV - 1000000) / 50000);
+	else
+		vsel = ((min_uV - 1700000) / 100000)
+			+ WM831X_ALDO_SELECTOR_LOW + 1;
+
+	ret = wm831x_aldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO7_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_aldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+
+	return wm831x_aldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_aldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_LDO_SLEEP_CONTROL;
+
+	return wm831x_aldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_aldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO7_ON_VSEL_MASK;
+
+	return wm831x_aldo_list_voltage(rdev, ret);
+}
+
+static unsigned int wm831x_aldo_get_mode(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	unsigned int ret;
+
+	ret = wm831x_reg_read(wm831x, on_reg);
+	if (ret < 0)
+		return 0;
+
+	if (ret & WM831X_LDO7_ON_MODE)
+		return REGULATOR_MODE_IDLE;
+	else
+		return REGULATOR_MODE_NORMAL;
+}
+
+static int wm831x_aldo_set_mode(struct regulator_dev *rdev,
+				  unsigned int mode)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int ctrl_reg = ldo->base + WM831X_LDO_CONTROL;
+	int on_reg = ldo->base + WM831X_LDO_ON_CONTROL;
+	int ret;
+
+
+	switch (mode) {
+	case REGULATOR_MODE_NORMAL:
+		ret = wm831x_set_bits(wm831x, on_reg,
+				      WM831X_LDO7_ON_MODE, 0);
+		if (ret < 0)
+			return ret;
+		break;
+
+	case REGULATOR_MODE_IDLE:
+		ret = wm831x_set_bits(wm831x, ctrl_reg,
+				      WM831X_LDO7_ON_MODE,
+				      WM831X_LDO7_ON_MODE);
+		if (ret < 0)
+			return ret;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int wm831x_aldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (!(ret & mask))
+		return REGULATOR_STATUS_OFF;
+
+	/* Is it reporting under voltage? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_UV_STATUS);
+	if (ret & mask)
+		return REGULATOR_STATUS_ERROR;
+
+	ret = wm831x_aldo_get_mode(rdev);
+	if (ret < 0)
+		return ret;
+	else
+		return regulator_mode_to_status(ret);
+}
+
+static struct regulator_ops wm831x_aldo_ops = {
+	.list_voltage = wm831x_aldo_list_voltage,
+	.get_voltage = wm831x_aldo_get_voltage,
+	.set_voltage = wm831x_aldo_set_voltage,
+	.set_suspend_voltage = wm831x_aldo_set_suspend_voltage,
+	.get_mode = wm831x_aldo_get_mode,
+	.set_mode = wm831x_aldo_set_mode,
+	.get_status = wm831x_aldo_get_status,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_aldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret, irq;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_ALDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_aldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	irq = platform_get_irq_byname(pdev, "UV");
+	ret = wm831x_request_irq(wm831x, irq, wm831x_ldo_uv_irq,
+				 IRQF_TRIGGER_RISING, ldo->name,
+				 ldo);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "Failed to request UV IRQ %d: %d\n",
+			irq, ret);
+		goto err_regulator;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err_regulator:
+	regulator_unregister(ldo->regulator);
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_aldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+	struct wm831x *wm831x = ldo->wm831x;
+
+	wm831x_free_irq(wm831x, platform_get_irq_byname(pdev, "UV"), ldo);
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_aldo_driver = {
+	.probe = wm831x_aldo_probe,
+	.remove = __devexit_p(wm831x_aldo_remove),
+	.driver		= {
+		.name	= "wm831x-aldo",
+	},
+};
+
+/*
+ * Alive LDO
+ */
+
+#define WM831X_ALIVE_LDO_MAX_SELECTOR 0xf
+
+static int wm831x_alive_ldo_list_voltage(struct regulator_dev *rdev,
+				      unsigned int selector)
+{
+	/* 0.8-1.55V in 50mV steps */
+	if (selector <= WM831X_ALIVE_LDO_MAX_SELECTOR)
+		return 800000 + (selector * 50000);
+	return -EINVAL;
+}
+
+static int wm831x_alive_ldo_set_voltage_int(struct regulator_dev *rdev,
+					    int reg,
+					    int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int vsel, ret;
+
+	vsel = (min_uV - 800000) / 50000;
+
+	ret = wm831x_alive_ldo_list_voltage(rdev, vsel);
+	if (ret < 0)
+		return ret;
+	if (ret < min_uV || ret > max_uV)
+		return -EINVAL;
+
+	return wm831x_set_bits(wm831x, reg, WM831X_LDO11_ON_VSEL_MASK, vsel);
+}
+
+static int wm831x_alive_ldo_set_voltage(struct regulator_dev *rdev,
+				     int min_uV, int max_uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_ALIVE_LDO_ON_CONTROL;
+
+	return wm831x_alive_ldo_set_voltage_int(rdev, reg, min_uV, max_uV);
+}
+
+static int wm831x_alive_ldo_set_suspend_voltage(struct regulator_dev *rdev,
+					     int uV)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	int reg = ldo->base + WM831X_ALIVE_LDO_SLEEP_CONTROL;
+
+	return wm831x_alive_ldo_set_voltage_int(rdev, reg, uV, uV);
+}
+
+static int wm831x_alive_ldo_get_voltage(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int reg = ldo->base + WM831X_ALIVE_LDO_ON_CONTROL;
+	int ret;
+
+	ret = wm831x_reg_read(wm831x, reg);
+	if (ret < 0)
+		return ret;
+
+	ret &= WM831X_LDO11_ON_VSEL_MASK;
+
+	return wm831x_alive_ldo_list_voltage(rdev, ret);
+}
+
+static int wm831x_alive_ldo_get_status(struct regulator_dev *rdev)
+{
+	struct wm831x_ldo *ldo = rdev_get_drvdata(rdev);
+	struct wm831x *wm831x = ldo->wm831x;
+	int mask = 1 << rdev_get_id(rdev);
+	int ret;
+
+	/* Is the regulator on? */
+	ret = wm831x_reg_read(wm831x, WM831X_LDO_STATUS);
+	if (ret < 0)
+		return ret;
+	if (ret & mask)
+		return REGULATOR_STATUS_ON;
+	else
+		return REGULATOR_STATUS_OFF;
+}
+
+static struct regulator_ops wm831x_alive_ldo_ops = {
+	.list_voltage = wm831x_alive_ldo_list_voltage,
+	.get_voltage = wm831x_alive_ldo_get_voltage,
+	.set_voltage = wm831x_alive_ldo_set_voltage,
+	.set_suspend_voltage = wm831x_alive_ldo_set_suspend_voltage,
+	.get_status = wm831x_alive_ldo_get_status,
+
+	.is_enabled = wm831x_ldo_is_enabled,
+	.enable = wm831x_ldo_enable,
+	.disable = wm831x_ldo_disable,
+};
+
+static __devinit int wm831x_alive_ldo_probe(struct platform_device *pdev)
+{
+	struct wm831x *wm831x = dev_get_drvdata(pdev->dev.parent);
+	struct wm831x_pdata *pdata = wm831x->dev->platform_data;
+	int id = pdev->id % ARRAY_SIZE(pdata->ldo);
+	struct wm831x_ldo *ldo;
+	struct resource *res;
+	int ret;
+
+	dev_dbg(&pdev->dev, "Probing LDO%d\n", id + 1);
+
+	if (pdata == NULL || pdata->ldo[id] == NULL)
+		return -ENODEV;
+
+	ldo = kzalloc(sizeof(struct wm831x_ldo), GFP_KERNEL);
+	if (ldo == NULL) {
+		dev_err(&pdev->dev, "Unable to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	ldo->wm831x = wm831x;
+
+	res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+	if (res == NULL) {
+		dev_err(&pdev->dev, "No I/O resource\n");
+		ret = -EINVAL;
+		goto err;
+	}
+	ldo->base = res->start;
+
+	snprintf(ldo->name, sizeof(ldo->name), "LDO%d", id + 1);
+	ldo->desc.name = ldo->name;
+	ldo->desc.id = id;
+	ldo->desc.type = REGULATOR_VOLTAGE;
+	ldo->desc.n_voltages = WM831X_ALIVE_LDO_MAX_SELECTOR + 1;
+	ldo->desc.ops = &wm831x_alive_ldo_ops;
+	ldo->desc.owner = THIS_MODULE;
+
+	ldo->regulator = regulator_register(&ldo->desc, &pdev->dev,
+					     pdata->ldo[id], ldo);
+	if (IS_ERR(ldo->regulator)) {
+		ret = PTR_ERR(ldo->regulator);
+		dev_err(wm831x->dev, "Failed to register LDO%d: %d\n",
+			id + 1, ret);
+		goto err;
+	}
+
+	platform_set_drvdata(pdev, ldo);
+
+	return 0;
+
+err:
+	kfree(ldo);
+	return ret;
+}
+
+static __devexit int wm831x_alive_ldo_remove(struct platform_device *pdev)
+{
+	struct wm831x_ldo *ldo = platform_get_drvdata(pdev);
+
+	regulator_unregister(ldo->regulator);
+	kfree(ldo);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_alive_ldo_driver = {
+	.probe = wm831x_alive_ldo_probe,
+	.remove = __devexit_p(wm831x_alive_ldo_remove),
+	.driver		= {
+		.name	= "wm831x-alive-ldo",
+	},
+};
+
+static int __init wm831x_ldo_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&wm831x_gp_ldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x GP LDO driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_aldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x ALDO driver: %d\n", ret);
+
+	ret = platform_driver_register(&wm831x_alive_ldo_driver);
+	if (ret != 0)
+		pr_err("Failed to register WM831x alive LDO driver: %d\n",
+		       ret);
+
+	return 0;
+}
+subsys_initcall(wm831x_ldo_init);
+
+static void __exit wm831x_ldo_exit(void)
+{
+	platform_driver_unregister(&wm831x_alive_ldo_driver);
+	platform_driver_unregister(&wm831x_aldo_driver);
+	platform_driver_unregister(&wm831x_gp_ldo_driver);
+}
+module_exit(wm831x_ldo_exit);
+
+/* Module information */
+MODULE_AUTHOR("Mark Brown <broonie@opensource.wolfsonmicro.com>");
+MODULE_DESCRIPTION("WM831x LDO driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-ldo");
+MODULE_ALIAS("platform:wm831x-aldo");
+MODULE_ALIAS("platform:wm831x-aliveldo");
diff --git a/include/linux/mfd/wm831x/regulator.h b/include/linux/mfd/wm831x/regulator.h
index c74d6aafdca8..f95466343fb2 100644
--- a/include/linux/mfd/wm831x/regulator.h
+++ b/include/linux/mfd/wm831x/regulator.h
@@ -566,6 +566,588 @@
 #define WM831X_DC4_SLPENA_SHIFT                      8  /* DC4_SLPENA */
 #define WM831X_DC4_SLPENA_WIDTH                      1  /* DC4_SLPENA */
 
+/*
+ * R16488 (0x4068) - LDO1 Control
+ */
+#define WM831X_LDO1_ERR_ACT_MASK                0xC000  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_ERR_ACT_SHIFT                   14  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_ERR_ACT_WIDTH                    2  /* LDO1_ERR_ACT - [15:14] */
+#define WM831X_LDO1_HWC_SRC_MASK                0x1800  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_SRC_SHIFT                   11  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_SRC_WIDTH                    2  /* LDO1_HWC_SRC - [12:11] */
+#define WM831X_LDO1_HWC_VSEL                    0x0400  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_MASK               0x0400  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_SHIFT                  10  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_VSEL_WIDTH                   1  /* LDO1_HWC_VSEL */
+#define WM831X_LDO1_HWC_MODE_MASK               0x0300  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_HWC_MODE_SHIFT                   8  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_HWC_MODE_WIDTH                   2  /* LDO1_HWC_MODE - [9:8] */
+#define WM831X_LDO1_FLT                         0x0080  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_MASK                    0x0080  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_SHIFT                        7  /* LDO1_FLT */
+#define WM831X_LDO1_FLT_WIDTH                        1  /* LDO1_FLT */
+#define WM831X_LDO1_SWI                         0x0040  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_MASK                    0x0040  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_SHIFT                        6  /* LDO1_SWI */
+#define WM831X_LDO1_SWI_WIDTH                        1  /* LDO1_SWI */
+#define WM831X_LDO1_LP_MODE                     0x0001  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_MASK                0x0001  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_SHIFT                    0  /* LDO1_LP_MODE */
+#define WM831X_LDO1_LP_MODE_WIDTH                    1  /* LDO1_LP_MODE */
+
+/*
+ * R16489 (0x4069) - LDO1 ON Control
+ */
+#define WM831X_LDO1_ON_SLOT_MASK                0xE000  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_SLOT_SHIFT                   13  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_SLOT_WIDTH                    3  /* LDO1_ON_SLOT - [15:13] */
+#define WM831X_LDO1_ON_MODE                     0x0100  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_MASK                0x0100  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_SHIFT                    8  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_MODE_WIDTH                    1  /* LDO1_ON_MODE */
+#define WM831X_LDO1_ON_VSEL_MASK                0x001F  /* LDO1_ON_VSEL - [4:0] */
+#define WM831X_LDO1_ON_VSEL_SHIFT                    0  /* LDO1_ON_VSEL - [4:0] */
+#define WM831X_LDO1_ON_VSEL_WIDTH                    5  /* LDO1_ON_VSEL - [4:0] */
+
+/*
+ * R16490 (0x406A) - LDO1 SLEEP Control
+ */
+#define WM831X_LDO1_SLP_SLOT_MASK               0xE000  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_SLOT_SHIFT                  13  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_SLOT_WIDTH                   3  /* LDO1_SLP_SLOT - [15:13] */
+#define WM831X_LDO1_SLP_MODE                    0x0100  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_MASK               0x0100  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_SHIFT                   8  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_MODE_WIDTH                   1  /* LDO1_SLP_MODE */
+#define WM831X_LDO1_SLP_VSEL_MASK               0x001F  /* LDO1_SLP_VSEL - [4:0] */
+#define WM831X_LDO1_SLP_VSEL_SHIFT                   0  /* LDO1_SLP_VSEL - [4:0] */
+#define WM831X_LDO1_SLP_VSEL_WIDTH                   5  /* LDO1_SLP_VSEL - [4:0] */
+
+/*
+ * R16491 (0x406B) - LDO2 Control
+ */
+#define WM831X_LDO2_ERR_ACT_MASK                0xC000  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_ERR_ACT_SHIFT                   14  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_ERR_ACT_WIDTH                    2  /* LDO2_ERR_ACT - [15:14] */
+#define WM831X_LDO2_HWC_SRC_MASK                0x1800  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_SRC_SHIFT                   11  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_SRC_WIDTH                    2  /* LDO2_HWC_SRC - [12:11] */
+#define WM831X_LDO2_HWC_VSEL                    0x0400  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_MASK               0x0400  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_SHIFT                  10  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_VSEL_WIDTH                   1  /* LDO2_HWC_VSEL */
+#define WM831X_LDO2_HWC_MODE_MASK               0x0300  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_HWC_MODE_SHIFT                   8  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_HWC_MODE_WIDTH                   2  /* LDO2_HWC_MODE - [9:8] */
+#define WM831X_LDO2_FLT                         0x0080  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_MASK                    0x0080  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_SHIFT                        7  /* LDO2_FLT */
+#define WM831X_LDO2_FLT_WIDTH                        1  /* LDO2_FLT */
+#define WM831X_LDO2_SWI                         0x0040  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_MASK                    0x0040  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_SHIFT                        6  /* LDO2_SWI */
+#define WM831X_LDO2_SWI_WIDTH                        1  /* LDO2_SWI */
+#define WM831X_LDO2_LP_MODE                     0x0001  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_MASK                0x0001  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_SHIFT                    0  /* LDO2_LP_MODE */
+#define WM831X_LDO2_LP_MODE_WIDTH                    1  /* LDO2_LP_MODE */
+
+/*
+ * R16492 (0x406C) - LDO2 ON Control
+ */
+#define WM831X_LDO2_ON_SLOT_MASK                0xE000  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_SLOT_SHIFT                   13  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_SLOT_WIDTH                    3  /* LDO2_ON_SLOT - [15:13] */
+#define WM831X_LDO2_ON_MODE                     0x0100  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_MASK                0x0100  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_SHIFT                    8  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_MODE_WIDTH                    1  /* LDO2_ON_MODE */
+#define WM831X_LDO2_ON_VSEL_MASK                0x001F  /* LDO2_ON_VSEL - [4:0] */
+#define WM831X_LDO2_ON_VSEL_SHIFT                    0  /* LDO2_ON_VSEL - [4:0] */
+#define WM831X_LDO2_ON_VSEL_WIDTH                    5  /* LDO2_ON_VSEL - [4:0] */
+
+/*
+ * R16493 (0x406D) - LDO2 SLEEP Control
+ */
+#define WM831X_LDO2_SLP_SLOT_MASK               0xE000  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_SLOT_SHIFT                  13  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_SLOT_WIDTH                   3  /* LDO2_SLP_SLOT - [15:13] */
+#define WM831X_LDO2_SLP_MODE                    0x0100  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_MASK               0x0100  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_SHIFT                   8  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_MODE_WIDTH                   1  /* LDO2_SLP_MODE */
+#define WM831X_LDO2_SLP_VSEL_MASK               0x001F  /* LDO2_SLP_VSEL - [4:0] */
+#define WM831X_LDO2_SLP_VSEL_SHIFT                   0  /* LDO2_SLP_VSEL - [4:0] */
+#define WM831X_LDO2_SLP_VSEL_WIDTH                   5  /* LDO2_SLP_VSEL - [4:0] */
+
+/*
+ * R16494 (0x406E) - LDO3 Control
+ */
+#define WM831X_LDO3_ERR_ACT_MASK                0xC000  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_ERR_ACT_SHIFT                   14  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_ERR_ACT_WIDTH                    2  /* LDO3_ERR_ACT - [15:14] */
+#define WM831X_LDO3_HWC_SRC_MASK                0x1800  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_SRC_SHIFT                   11  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_SRC_WIDTH                    2  /* LDO3_HWC_SRC - [12:11] */
+#define WM831X_LDO3_HWC_VSEL                    0x0400  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_MASK               0x0400  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_SHIFT                  10  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_VSEL_WIDTH                   1  /* LDO3_HWC_VSEL */
+#define WM831X_LDO3_HWC_MODE_MASK               0x0300  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_HWC_MODE_SHIFT                   8  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_HWC_MODE_WIDTH                   2  /* LDO3_HWC_MODE - [9:8] */
+#define WM831X_LDO3_FLT                         0x0080  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_MASK                    0x0080  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_SHIFT                        7  /* LDO3_FLT */
+#define WM831X_LDO3_FLT_WIDTH                        1  /* LDO3_FLT */
+#define WM831X_LDO3_SWI                         0x0040  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_MASK                    0x0040  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_SHIFT                        6  /* LDO3_SWI */
+#define WM831X_LDO3_SWI_WIDTH                        1  /* LDO3_SWI */
+#define WM831X_LDO3_LP_MODE                     0x0001  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_MASK                0x0001  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_SHIFT                    0  /* LDO3_LP_MODE */
+#define WM831X_LDO3_LP_MODE_WIDTH                    1  /* LDO3_LP_MODE */
+
+/*
+ * R16495 (0x406F) - LDO3 ON Control
+ */
+#define WM831X_LDO3_ON_SLOT_MASK                0xE000  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_SLOT_SHIFT                   13  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_SLOT_WIDTH                    3  /* LDO3_ON_SLOT - [15:13] */
+#define WM831X_LDO3_ON_MODE                     0x0100  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_MASK                0x0100  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_SHIFT                    8  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_MODE_WIDTH                    1  /* LDO3_ON_MODE */
+#define WM831X_LDO3_ON_VSEL_MASK                0x001F  /* LDO3_ON_VSEL - [4:0] */
+#define WM831X_LDO3_ON_VSEL_SHIFT                    0  /* LDO3_ON_VSEL - [4:0] */
+#define WM831X_LDO3_ON_VSEL_WIDTH                    5  /* LDO3_ON_VSEL - [4:0] */
+
+/*
+ * R16496 (0x4070) - LDO3 SLEEP Control
+ */
+#define WM831X_LDO3_SLP_SLOT_MASK               0xE000  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_SLOT_SHIFT                  13  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_SLOT_WIDTH                   3  /* LDO3_SLP_SLOT - [15:13] */
+#define WM831X_LDO3_SLP_MODE                    0x0100  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_MASK               0x0100  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_SHIFT                   8  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_MODE_WIDTH                   1  /* LDO3_SLP_MODE */
+#define WM831X_LDO3_SLP_VSEL_MASK               0x001F  /* LDO3_SLP_VSEL - [4:0] */
+#define WM831X_LDO3_SLP_VSEL_SHIFT                   0  /* LDO3_SLP_VSEL - [4:0] */
+#define WM831X_LDO3_SLP_VSEL_WIDTH                   5  /* LDO3_SLP_VSEL - [4:0] */
+
+/*
+ * R16497 (0x4071) - LDO4 Control
+ */
+#define WM831X_LDO4_ERR_ACT_MASK                0xC000  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_ERR_ACT_SHIFT                   14  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_ERR_ACT_WIDTH                    2  /* LDO4_ERR_ACT - [15:14] */
+#define WM831X_LDO4_HWC_SRC_MASK                0x1800  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_SRC_SHIFT                   11  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_SRC_WIDTH                    2  /* LDO4_HWC_SRC - [12:11] */
+#define WM831X_LDO4_HWC_VSEL                    0x0400  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_MASK               0x0400  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_SHIFT                  10  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_VSEL_WIDTH                   1  /* LDO4_HWC_VSEL */
+#define WM831X_LDO4_HWC_MODE_MASK               0x0300  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_HWC_MODE_SHIFT                   8  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_HWC_MODE_WIDTH                   2  /* LDO4_HWC_MODE - [9:8] */
+#define WM831X_LDO4_FLT                         0x0080  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_MASK                    0x0080  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_SHIFT                        7  /* LDO4_FLT */
+#define WM831X_LDO4_FLT_WIDTH                        1  /* LDO4_FLT */
+#define WM831X_LDO4_SWI                         0x0040  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_MASK                    0x0040  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_SHIFT                        6  /* LDO4_SWI */
+#define WM831X_LDO4_SWI_WIDTH                        1  /* LDO4_SWI */
+#define WM831X_LDO4_LP_MODE                     0x0001  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_MASK                0x0001  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_SHIFT                    0  /* LDO4_LP_MODE */
+#define WM831X_LDO4_LP_MODE_WIDTH                    1  /* LDO4_LP_MODE */
+
+/*
+ * R16498 (0x4072) - LDO4 ON Control
+ */
+#define WM831X_LDO4_ON_SLOT_MASK                0xE000  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_SLOT_SHIFT                   13  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_SLOT_WIDTH                    3  /* LDO4_ON_SLOT - [15:13] */
+#define WM831X_LDO4_ON_MODE                     0x0100  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_MASK                0x0100  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_SHIFT                    8  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_MODE_WIDTH                    1  /* LDO4_ON_MODE */
+#define WM831X_LDO4_ON_VSEL_MASK                0x001F  /* LDO4_ON_VSEL - [4:0] */
+#define WM831X_LDO4_ON_VSEL_SHIFT                    0  /* LDO4_ON_VSEL - [4:0] */
+#define WM831X_LDO4_ON_VSEL_WIDTH                    5  /* LDO4_ON_VSEL - [4:0] */
+
+/*
+ * R16499 (0x4073) - LDO4 SLEEP Control
+ */
+#define WM831X_LDO4_SLP_SLOT_MASK               0xE000  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_SLOT_SHIFT                  13  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_SLOT_WIDTH                   3  /* LDO4_SLP_SLOT - [15:13] */
+#define WM831X_LDO4_SLP_MODE                    0x0100  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_MASK               0x0100  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_SHIFT                   8  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_MODE_WIDTH                   1  /* LDO4_SLP_MODE */
+#define WM831X_LDO4_SLP_VSEL_MASK               0x001F  /* LDO4_SLP_VSEL - [4:0] */
+#define WM831X_LDO4_SLP_VSEL_SHIFT                   0  /* LDO4_SLP_VSEL - [4:0] */
+#define WM831X_LDO4_SLP_VSEL_WIDTH                   5  /* LDO4_SLP_VSEL - [4:0] */
+
+/*
+ * R16500 (0x4074) - LDO5 Control
+ */
+#define WM831X_LDO5_ERR_ACT_MASK                0xC000  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_ERR_ACT_SHIFT                   14  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_ERR_ACT_WIDTH                    2  /* LDO5_ERR_ACT - [15:14] */
+#define WM831X_LDO5_HWC_SRC_MASK                0x1800  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_SRC_SHIFT                   11  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_SRC_WIDTH                    2  /* LDO5_HWC_SRC - [12:11] */
+#define WM831X_LDO5_HWC_VSEL                    0x0400  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_MASK               0x0400  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_SHIFT                  10  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_VSEL_WIDTH                   1  /* LDO5_HWC_VSEL */
+#define WM831X_LDO5_HWC_MODE_MASK               0x0300  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_HWC_MODE_SHIFT                   8  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_HWC_MODE_WIDTH                   2  /* LDO5_HWC_MODE - [9:8] */
+#define WM831X_LDO5_FLT                         0x0080  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_MASK                    0x0080  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_SHIFT                        7  /* LDO5_FLT */
+#define WM831X_LDO5_FLT_WIDTH                        1  /* LDO5_FLT */
+#define WM831X_LDO5_SWI                         0x0040  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_MASK                    0x0040  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_SHIFT                        6  /* LDO5_SWI */
+#define WM831X_LDO5_SWI_WIDTH                        1  /* LDO5_SWI */
+#define WM831X_LDO5_LP_MODE                     0x0001  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_MASK                0x0001  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_SHIFT                    0  /* LDO5_LP_MODE */
+#define WM831X_LDO5_LP_MODE_WIDTH                    1  /* LDO5_LP_MODE */
+
+/*
+ * R16501 (0x4075) - LDO5 ON Control
+ */
+#define WM831X_LDO5_ON_SLOT_MASK                0xE000  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_SLOT_SHIFT                   13  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_SLOT_WIDTH                    3  /* LDO5_ON_SLOT - [15:13] */
+#define WM831X_LDO5_ON_MODE                     0x0100  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_MASK                0x0100  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_SHIFT                    8  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_MODE_WIDTH                    1  /* LDO5_ON_MODE */
+#define WM831X_LDO5_ON_VSEL_MASK                0x001F  /* LDO5_ON_VSEL - [4:0] */
+#define WM831X_LDO5_ON_VSEL_SHIFT                    0  /* LDO5_ON_VSEL - [4:0] */
+#define WM831X_LDO5_ON_VSEL_WIDTH                    5  /* LDO5_ON_VSEL - [4:0] */
+
+/*
+ * R16502 (0x4076) - LDO5 SLEEP Control
+ */
+#define WM831X_LDO5_SLP_SLOT_MASK               0xE000  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_SLOT_SHIFT                  13  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_SLOT_WIDTH                   3  /* LDO5_SLP_SLOT - [15:13] */
+#define WM831X_LDO5_SLP_MODE                    0x0100  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_MASK               0x0100  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_SHIFT                   8  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_MODE_WIDTH                   1  /* LDO5_SLP_MODE */
+#define WM831X_LDO5_SLP_VSEL_MASK               0x001F  /* LDO5_SLP_VSEL - [4:0] */
+#define WM831X_LDO5_SLP_VSEL_SHIFT                   0  /* LDO5_SLP_VSEL - [4:0] */
+#define WM831X_LDO5_SLP_VSEL_WIDTH                   5  /* LDO5_SLP_VSEL - [4:0] */
+
+/*
+ * R16503 (0x4077) - LDO6 Control
+ */
+#define WM831X_LDO6_ERR_ACT_MASK                0xC000  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_ERR_ACT_SHIFT                   14  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_ERR_ACT_WIDTH                    2  /* LDO6_ERR_ACT - [15:14] */
+#define WM831X_LDO6_HWC_SRC_MASK                0x1800  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_SRC_SHIFT                   11  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_SRC_WIDTH                    2  /* LDO6_HWC_SRC - [12:11] */
+#define WM831X_LDO6_HWC_VSEL                    0x0400  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_MASK               0x0400  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_SHIFT                  10  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_VSEL_WIDTH                   1  /* LDO6_HWC_VSEL */
+#define WM831X_LDO6_HWC_MODE_MASK               0x0300  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_HWC_MODE_SHIFT                   8  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_HWC_MODE_WIDTH                   2  /* LDO6_HWC_MODE - [9:8] */
+#define WM831X_LDO6_FLT                         0x0080  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_MASK                    0x0080  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_SHIFT                        7  /* LDO6_FLT */
+#define WM831X_LDO6_FLT_WIDTH                        1  /* LDO6_FLT */
+#define WM831X_LDO6_SWI                         0x0040  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_MASK                    0x0040  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_SHIFT                        6  /* LDO6_SWI */
+#define WM831X_LDO6_SWI_WIDTH                        1  /* LDO6_SWI */
+#define WM831X_LDO6_LP_MODE                     0x0001  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_MASK                0x0001  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_SHIFT                    0  /* LDO6_LP_MODE */
+#define WM831X_LDO6_LP_MODE_WIDTH                    1  /* LDO6_LP_MODE */
+
+/*
+ * R16504 (0x4078) - LDO6 ON Control
+ */
+#define WM831X_LDO6_ON_SLOT_MASK                0xE000  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_SLOT_SHIFT                   13  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_SLOT_WIDTH                    3  /* LDO6_ON_SLOT - [15:13] */
+#define WM831X_LDO6_ON_MODE                     0x0100  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_MASK                0x0100  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_SHIFT                    8  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_MODE_WIDTH                    1  /* LDO6_ON_MODE */
+#define WM831X_LDO6_ON_VSEL_MASK                0x001F  /* LDO6_ON_VSEL - [4:0] */
+#define WM831X_LDO6_ON_VSEL_SHIFT                    0  /* LDO6_ON_VSEL - [4:0] */
+#define WM831X_LDO6_ON_VSEL_WIDTH                    5  /* LDO6_ON_VSEL - [4:0] */
+
+/*
+ * R16505 (0x4079) - LDO6 SLEEP Control
+ */
+#define WM831X_LDO6_SLP_SLOT_MASK               0xE000  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_SLOT_SHIFT                  13  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_SLOT_WIDTH                   3  /* LDO6_SLP_SLOT - [15:13] */
+#define WM831X_LDO6_SLP_MODE                    0x0100  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_MASK               0x0100  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_SHIFT                   8  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_MODE_WIDTH                   1  /* LDO6_SLP_MODE */
+#define WM831X_LDO6_SLP_VSEL_MASK               0x001F  /* LDO6_SLP_VSEL - [4:0] */
+#define WM831X_LDO6_SLP_VSEL_SHIFT                   0  /* LDO6_SLP_VSEL - [4:0] */
+#define WM831X_LDO6_SLP_VSEL_WIDTH                   5  /* LDO6_SLP_VSEL - [4:0] */
+
+/*
+ * R16506 (0x407A) - LDO7 Control
+ */
+#define WM831X_LDO7_ERR_ACT_MASK                0xC000  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_ERR_ACT_SHIFT                   14  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_ERR_ACT_WIDTH                    2  /* LDO7_ERR_ACT - [15:14] */
+#define WM831X_LDO7_HWC_SRC_MASK                0x1800  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_SRC_SHIFT                   11  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_SRC_WIDTH                    2  /* LDO7_HWC_SRC - [12:11] */
+#define WM831X_LDO7_HWC_VSEL                    0x0400  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_MASK               0x0400  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_SHIFT                  10  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_VSEL_WIDTH                   1  /* LDO7_HWC_VSEL */
+#define WM831X_LDO7_HWC_MODE_MASK               0x0300  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_HWC_MODE_SHIFT                   8  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_HWC_MODE_WIDTH                   2  /* LDO7_HWC_MODE - [9:8] */
+#define WM831X_LDO7_FLT                         0x0080  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_MASK                    0x0080  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_SHIFT                        7  /* LDO7_FLT */
+#define WM831X_LDO7_FLT_WIDTH                        1  /* LDO7_FLT */
+#define WM831X_LDO7_SWI                         0x0040  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_MASK                    0x0040  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_SHIFT                        6  /* LDO7_SWI */
+#define WM831X_LDO7_SWI_WIDTH                        1  /* LDO7_SWI */
+
+/*
+ * R16507 (0x407B) - LDO7 ON Control
+ */
+#define WM831X_LDO7_ON_SLOT_MASK                0xE000  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_SLOT_SHIFT                   13  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_SLOT_WIDTH                    3  /* LDO7_ON_SLOT - [15:13] */
+#define WM831X_LDO7_ON_MODE                     0x0100  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_MASK                0x0100  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_SHIFT                    8  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_MODE_WIDTH                    1  /* LDO7_ON_MODE */
+#define WM831X_LDO7_ON_VSEL_MASK                0x001F  /* LDO7_ON_VSEL - [4:0] */
+#define WM831X_LDO7_ON_VSEL_SHIFT                    0  /* LDO7_ON_VSEL - [4:0] */
+#define WM831X_LDO7_ON_VSEL_WIDTH                    5  /* LDO7_ON_VSEL - [4:0] */
+
+/*
+ * R16508 (0x407C) - LDO7 SLEEP Control
+ */
+#define WM831X_LDO7_SLP_SLOT_MASK               0xE000  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_SLOT_SHIFT                  13  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_SLOT_WIDTH                   3  /* LDO7_SLP_SLOT - [15:13] */
+#define WM831X_LDO7_SLP_MODE                    0x0100  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_MASK               0x0100  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_SHIFT                   8  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_MODE_WIDTH                   1  /* LDO7_SLP_MODE */
+#define WM831X_LDO7_SLP_VSEL_MASK               0x001F  /* LDO7_SLP_VSEL - [4:0] */
+#define WM831X_LDO7_SLP_VSEL_SHIFT                   0  /* LDO7_SLP_VSEL - [4:0] */
+#define WM831X_LDO7_SLP_VSEL_WIDTH                   5  /* LDO7_SLP_VSEL - [4:0] */
+
+/*
+ * R16509 (0x407D) - LDO8 Control
+ */
+#define WM831X_LDO8_ERR_ACT_MASK                0xC000  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_ERR_ACT_SHIFT                   14  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_ERR_ACT_WIDTH                    2  /* LDO8_ERR_ACT - [15:14] */
+#define WM831X_LDO8_HWC_SRC_MASK                0x1800  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_SRC_SHIFT                   11  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_SRC_WIDTH                    2  /* LDO8_HWC_SRC - [12:11] */
+#define WM831X_LDO8_HWC_VSEL                    0x0400  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_MASK               0x0400  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_SHIFT                  10  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_VSEL_WIDTH                   1  /* LDO8_HWC_VSEL */
+#define WM831X_LDO8_HWC_MODE_MASK               0x0300  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_HWC_MODE_SHIFT                   8  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_HWC_MODE_WIDTH                   2  /* LDO8_HWC_MODE - [9:8] */
+#define WM831X_LDO8_FLT                         0x0080  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_MASK                    0x0080  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_SHIFT                        7  /* LDO8_FLT */
+#define WM831X_LDO8_FLT_WIDTH                        1  /* LDO8_FLT */
+#define WM831X_LDO8_SWI                         0x0040  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_MASK                    0x0040  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_SHIFT                        6  /* LDO8_SWI */
+#define WM831X_LDO8_SWI_WIDTH                        1  /* LDO8_SWI */
+
+/*
+ * R16510 (0x407E) - LDO8 ON Control
+ */
+#define WM831X_LDO8_ON_SLOT_MASK                0xE000  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_SLOT_SHIFT                   13  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_SLOT_WIDTH                    3  /* LDO8_ON_SLOT - [15:13] */
+#define WM831X_LDO8_ON_MODE                     0x0100  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_MASK                0x0100  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_SHIFT                    8  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_MODE_WIDTH                    1  /* LDO8_ON_MODE */
+#define WM831X_LDO8_ON_VSEL_MASK                0x001F  /* LDO8_ON_VSEL - [4:0] */
+#define WM831X_LDO8_ON_VSEL_SHIFT                    0  /* LDO8_ON_VSEL - [4:0] */
+#define WM831X_LDO8_ON_VSEL_WIDTH                    5  /* LDO8_ON_VSEL - [4:0] */
+
+/*
+ * R16511 (0x407F) - LDO8 SLEEP Control
+ */
+#define WM831X_LDO8_SLP_SLOT_MASK               0xE000  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_SLOT_SHIFT                  13  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_SLOT_WIDTH                   3  /* LDO8_SLP_SLOT - [15:13] */
+#define WM831X_LDO8_SLP_MODE                    0x0100  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_MASK               0x0100  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_SHIFT                   8  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_MODE_WIDTH                   1  /* LDO8_SLP_MODE */
+#define WM831X_LDO8_SLP_VSEL_MASK               0x001F  /* LDO8_SLP_VSEL - [4:0] */
+#define WM831X_LDO8_SLP_VSEL_SHIFT                   0  /* LDO8_SLP_VSEL - [4:0] */
+#define WM831X_LDO8_SLP_VSEL_WIDTH                   5  /* LDO8_SLP_VSEL - [4:0] */
+
+/*
+ * R16512 (0x4080) - LDO9 Control
+ */
+#define WM831X_LDO9_ERR_ACT_MASK                0xC000  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_ERR_ACT_SHIFT                   14  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_ERR_ACT_WIDTH                    2  /* LDO9_ERR_ACT - [15:14] */
+#define WM831X_LDO9_HWC_SRC_MASK                0x1800  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_SRC_SHIFT                   11  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_SRC_WIDTH                    2  /* LDO9_HWC_SRC - [12:11] */
+#define WM831X_LDO9_HWC_VSEL                    0x0400  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_MASK               0x0400  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_SHIFT                  10  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_VSEL_WIDTH                   1  /* LDO9_HWC_VSEL */
+#define WM831X_LDO9_HWC_MODE_MASK               0x0300  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_HWC_MODE_SHIFT                   8  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_HWC_MODE_WIDTH                   2  /* LDO9_HWC_MODE - [9:8] */
+#define WM831X_LDO9_FLT                         0x0080  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_MASK                    0x0080  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_SHIFT                        7  /* LDO9_FLT */
+#define WM831X_LDO9_FLT_WIDTH                        1  /* LDO9_FLT */
+#define WM831X_LDO9_SWI                         0x0040  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_MASK                    0x0040  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_SHIFT                        6  /* LDO9_SWI */
+#define WM831X_LDO9_SWI_WIDTH                        1  /* LDO9_SWI */
+
+/*
+ * R16513 (0x4081) - LDO9 ON Control
+ */
+#define WM831X_LDO9_ON_SLOT_MASK                0xE000  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_SLOT_SHIFT                   13  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_SLOT_WIDTH                    3  /* LDO9_ON_SLOT - [15:13] */
+#define WM831X_LDO9_ON_MODE                     0x0100  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_MASK                0x0100  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_SHIFT                    8  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_MODE_WIDTH                    1  /* LDO9_ON_MODE */
+#define WM831X_LDO9_ON_VSEL_MASK                0x001F  /* LDO9_ON_VSEL - [4:0] */
+#define WM831X_LDO9_ON_VSEL_SHIFT                    0  /* LDO9_ON_VSEL - [4:0] */
+#define WM831X_LDO9_ON_VSEL_WIDTH                    5  /* LDO9_ON_VSEL - [4:0] */
+
+/*
+ * R16514 (0x4082) - LDO9 SLEEP Control
+ */
+#define WM831X_LDO9_SLP_SLOT_MASK               0xE000  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_SLOT_SHIFT                  13  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_SLOT_WIDTH                   3  /* LDO9_SLP_SLOT - [15:13] */
+#define WM831X_LDO9_SLP_MODE                    0x0100  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_MASK               0x0100  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_SHIFT                   8  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_MODE_WIDTH                   1  /* LDO9_SLP_MODE */
+#define WM831X_LDO9_SLP_VSEL_MASK               0x001F  /* LDO9_SLP_VSEL - [4:0] */
+#define WM831X_LDO9_SLP_VSEL_SHIFT                   0  /* LDO9_SLP_VSEL - [4:0] */
+#define WM831X_LDO9_SLP_VSEL_WIDTH                   5  /* LDO9_SLP_VSEL - [4:0] */
+
+/*
+ * R16515 (0x4083) - LDO10 Control
+ */
+#define WM831X_LDO10_ERR_ACT_MASK               0xC000  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_ERR_ACT_SHIFT                  14  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_ERR_ACT_WIDTH                   2  /* LDO10_ERR_ACT - [15:14] */
+#define WM831X_LDO10_HWC_SRC_MASK               0x1800  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_SRC_SHIFT                  11  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_SRC_WIDTH                   2  /* LDO10_HWC_SRC - [12:11] */
+#define WM831X_LDO10_HWC_VSEL                   0x0400  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_MASK              0x0400  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_SHIFT                 10  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_VSEL_WIDTH                  1  /* LDO10_HWC_VSEL */
+#define WM831X_LDO10_HWC_MODE_MASK              0x0300  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_HWC_MODE_SHIFT                  8  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_HWC_MODE_WIDTH                  2  /* LDO10_HWC_MODE - [9:8] */
+#define WM831X_LDO10_FLT                        0x0080  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_MASK                   0x0080  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_SHIFT                       7  /* LDO10_FLT */
+#define WM831X_LDO10_FLT_WIDTH                       1  /* LDO10_FLT */
+#define WM831X_LDO10_SWI                        0x0040  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_MASK                   0x0040  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_SHIFT                       6  /* LDO10_SWI */
+#define WM831X_LDO10_SWI_WIDTH                       1  /* LDO10_SWI */
+
+/*
+ * R16516 (0x4084) - LDO10 ON Control
+ */
+#define WM831X_LDO10_ON_SLOT_MASK               0xE000  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_SLOT_SHIFT                  13  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_SLOT_WIDTH                   3  /* LDO10_ON_SLOT - [15:13] */
+#define WM831X_LDO10_ON_MODE                    0x0100  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_MASK               0x0100  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_SHIFT                   8  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_MODE_WIDTH                   1  /* LDO10_ON_MODE */
+#define WM831X_LDO10_ON_VSEL_MASK               0x001F  /* LDO10_ON_VSEL - [4:0] */
+#define WM831X_LDO10_ON_VSEL_SHIFT                   0  /* LDO10_ON_VSEL - [4:0] */
+#define WM831X_LDO10_ON_VSEL_WIDTH                   5  /* LDO10_ON_VSEL - [4:0] */
+
+/*
+ * R16517 (0x4085) - LDO10 SLEEP Control
+ */
+#define WM831X_LDO10_SLP_SLOT_MASK              0xE000  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_SLOT_SHIFT                 13  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_SLOT_WIDTH                  3  /* LDO10_SLP_SLOT - [15:13] */
+#define WM831X_LDO10_SLP_MODE                   0x0100  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_MASK              0x0100  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_SHIFT                  8  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_MODE_WIDTH                  1  /* LDO10_SLP_MODE */
+#define WM831X_LDO10_SLP_VSEL_MASK              0x001F  /* LDO10_SLP_VSEL - [4:0] */
+#define WM831X_LDO10_SLP_VSEL_SHIFT                  0  /* LDO10_SLP_VSEL - [4:0] */
+#define WM831X_LDO10_SLP_VSEL_WIDTH                  5  /* LDO10_SLP_VSEL - [4:0] */
+
+/*
+ * R16519 (0x4087) - LDO11 ON Control
+ */
+#define WM831X_LDO11_ON_SLOT_MASK               0xE000  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_ON_SLOT_SHIFT                  13  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_ON_SLOT_WIDTH                   3  /* LDO11_ON_SLOT - [15:13] */
+#define WM831X_LDO11_OFFENA                     0x1000  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_MASK                0x1000  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_SHIFT                   12  /* LDO11_OFFENA */
+#define WM831X_LDO11_OFFENA_WIDTH                    1  /* LDO11_OFFENA */
+#define WM831X_LDO11_VSEL_SRC                   0x0080  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_MASK              0x0080  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_SHIFT                  7  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_VSEL_SRC_WIDTH                  1  /* LDO11_VSEL_SRC */
+#define WM831X_LDO11_ON_VSEL_MASK               0x000F  /* LDO11_ON_VSEL - [3:0] */
+#define WM831X_LDO11_ON_VSEL_SHIFT                   0  /* LDO11_ON_VSEL - [3:0] */
+#define WM831X_LDO11_ON_VSEL_WIDTH                   4  /* LDO11_ON_VSEL - [3:0] */
+
+/*
+ * R16520 (0x4088) - LDO11 SLEEP Control
+ */
+#define WM831X_LDO11_SLP_SLOT_MASK              0xE000  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_SLOT_SHIFT                 13  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_SLOT_WIDTH                  3  /* LDO11_SLP_SLOT - [15:13] */
+#define WM831X_LDO11_SLP_VSEL_MASK              0x000F  /* LDO11_SLP_VSEL - [3:0] */
+#define WM831X_LDO11_SLP_VSEL_SHIFT                  0  /* LDO11_SLP_VSEL - [3:0] */
+#define WM831X_LDO11_SLP_VSEL_WIDTH                  4  /* LDO11_SLP_VSEL - [3:0] */
+
 /*
  * R16526 (0x408E) - Power Good Source 1
  */
@@ -586,6 +1168,50 @@
 #define WM831X_DC1_OK_SHIFT                          0  /* DC1_OK */
 #define WM831X_DC1_OK_WIDTH                          1  /* DC1_OK */
 
+/*
+ * R16527 (0x408F) - Power Good Source 2
+ */
+#define WM831X_LDO10_OK                         0x0200  /* LDO10_OK */
+#define WM831X_LDO10_OK_MASK                    0x0200  /* LDO10_OK */
+#define WM831X_LDO10_OK_SHIFT                        9  /* LDO10_OK */
+#define WM831X_LDO10_OK_WIDTH                        1  /* LDO10_OK */
+#define WM831X_LDO9_OK                          0x0100  /* LDO9_OK */
+#define WM831X_LDO9_OK_MASK                     0x0100  /* LDO9_OK */
+#define WM831X_LDO9_OK_SHIFT                         8  /* LDO9_OK */
+#define WM831X_LDO9_OK_WIDTH                         1  /* LDO9_OK */
+#define WM831X_LDO8_OK                          0x0080  /* LDO8_OK */
+#define WM831X_LDO8_OK_MASK                     0x0080  /* LDO8_OK */
+#define WM831X_LDO8_OK_SHIFT                         7  /* LDO8_OK */
+#define WM831X_LDO8_OK_WIDTH                         1  /* LDO8_OK */
+#define WM831X_LDO7_OK                          0x0040  /* LDO7_OK */
+#define WM831X_LDO7_OK_MASK                     0x0040  /* LDO7_OK */
+#define WM831X_LDO7_OK_SHIFT                         6  /* LDO7_OK */
+#define WM831X_LDO7_OK_WIDTH                         1  /* LDO7_OK */
+#define WM831X_LDO6_OK                          0x0020  /* LDO6_OK */
+#define WM831X_LDO6_OK_MASK                     0x0020  /* LDO6_OK */
+#define WM831X_LDO6_OK_SHIFT                         5  /* LDO6_OK */
+#define WM831X_LDO6_OK_WIDTH                         1  /* LDO6_OK */
+#define WM831X_LDO5_OK                          0x0010  /* LDO5_OK */
+#define WM831X_LDO5_OK_MASK                     0x0010  /* LDO5_OK */
+#define WM831X_LDO5_OK_SHIFT                         4  /* LDO5_OK */
+#define WM831X_LDO5_OK_WIDTH                         1  /* LDO5_OK */
+#define WM831X_LDO4_OK                          0x0008  /* LDO4_OK */
+#define WM831X_LDO4_OK_MASK                     0x0008  /* LDO4_OK */
+#define WM831X_LDO4_OK_SHIFT                         3  /* LDO4_OK */
+#define WM831X_LDO4_OK_WIDTH                         1  /* LDO4_OK */
+#define WM831X_LDO3_OK                          0x0004  /* LDO3_OK */
+#define WM831X_LDO3_OK_MASK                     0x0004  /* LDO3_OK */
+#define WM831X_LDO3_OK_SHIFT                         2  /* LDO3_OK */
+#define WM831X_LDO3_OK_WIDTH                         1  /* LDO3_OK */
+#define WM831X_LDO2_OK                          0x0002  /* LDO2_OK */
+#define WM831X_LDO2_OK_MASK                     0x0002  /* LDO2_OK */
+#define WM831X_LDO2_OK_SHIFT                         1  /* LDO2_OK */
+#define WM831X_LDO2_OK_WIDTH                         1  /* LDO2_OK */
+#define WM831X_LDO1_OK                          0x0001  /* LDO1_OK */
+#define WM831X_LDO1_OK_MASK                     0x0001  /* LDO1_OK */
+#define WM831X_LDO1_OK_SHIFT                         0  /* LDO1_OK */
+#define WM831X_LDO1_OK_WIDTH                         1  /* LDO1_OK */
+
 #define WM831X_ISINK_MAX_ISEL 56
 extern int wm831x_isinkv_values[WM831X_ISINK_MAX_ISEL];
 
-- 
cgit v1.2.3


From 956f25a6778a2510d52973ab8a3ac2e03e2c3704 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 13 Aug 2009 11:49:23 +0200
Subject: mfd: AB3100 accessor function cleanups

This adds the _interruptible suffix to the AB3100 accessor
functions on par with mutex_lock_interruptible() that's used
for blocking simultaneous calls to the AB3100 acessor functions.
Since these accesses are slow on a 100kHz I2C bus and may line
up waiting for the mutex, we need to handle interruption by
system shutdown or kill signals and may just as well denote that
in the function names.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 43 ++++++++++++++++++++++++-------------------
 include/linux/mfd/ab3100.h |  8 ++++----
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 8ff10cb77cac..ffe4b6415465 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -77,7 +77,7 @@ u8 ab3100_get_chip_type(struct ab3100 *ab3100)
 }
 EXPORT_SYMBOL(ab3100_get_chip_type);
 
-int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval)
+int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval)
 {
 	u8 regandval[2] = {reg, regval};
 	int err;
@@ -109,7 +109,8 @@ int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval)
 	mutex_unlock(&ab3100->access_mutex);
 	return 0;
 }
-EXPORT_SYMBOL(ab3100_set_register);
+EXPORT_SYMBOL(ab3100_set_register_interruptible);
+
 
 /*
  * The test registers exist at an I2C bus address up one
@@ -118,7 +119,7 @@ EXPORT_SYMBOL(ab3100_set_register);
  * anyway. It's currently only used from this file so declare
  * it static and do not export.
  */
-static int ab3100_set_test_register(struct ab3100 *ab3100,
+static int ab3100_set_test_register_interruptible(struct ab3100 *ab3100,
 				    u8 reg, u8 regval)
 {
 	u8 regandval[2] = {reg, regval};
@@ -148,7 +149,8 @@ static int ab3100_set_test_register(struct ab3100 *ab3100,
 	return err;
 }
 
-int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval)
+
+int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval)
 {
 	int err;
 
@@ -202,9 +204,10 @@ int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval)
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_get_register);
+EXPORT_SYMBOL(ab3100_get_register_interruptible);
+
 
-int ab3100_get_register_page(struct ab3100 *ab3100,
+int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
 			     u8 first_reg, u8 *regvals, u8 numregs)
 {
 	int err;
@@ -258,9 +261,10 @@ int ab3100_get_register_page(struct ab3100 *ab3100,
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_get_register_page);
+EXPORT_SYMBOL(ab3100_get_register_page_interruptible);
 
-int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+
+int ab3100_mask_and_set_register_interruptible(struct ab3100 *ab3100,
 				 u8 reg, u8 andmask, u8 ormask)
 {
 	u8 regandval[2] = {reg, 0};
@@ -328,7 +332,8 @@ int ab3100_mask_and_set_register(struct ab3100 *ab3100,
 	mutex_unlock(&ab3100->access_mutex);
 	return err;
 }
-EXPORT_SYMBOL(ab3100_mask_and_set_register);
+EXPORT_SYMBOL(ab3100_mask_and_set_register_interruptible);
+
 
 /*
  * Register a simple callback for handling any AB3100 events.
@@ -371,7 +376,7 @@ static void ab3100_work(struct work_struct *work)
 	u32 fatevent;
 	int err;
 
-	err = ab3100_get_register_page(ab3100, AB3100_EVENTA1,
+	err = ab3100_get_register_page_interruptible(ab3100, AB3100_EVENTA1,
 				       event_regs, 3);
 	if (err)
 		goto err_event_wq;
@@ -435,7 +440,7 @@ static int ab3100_registers_print(struct seq_file *s, void *p)
 	seq_printf(s, "AB3100 registers:\n");
 
 	for (reg = 0; reg < 0xff; reg++) {
-		ab3100_get_register(ab3100, reg, &value);
+		ab3100_get_register_interruptible(ab3100, reg, &value);
 		seq_printf(s, "[0x%x]:  0x%x\n", reg, value);
 	}
 	return 0;
@@ -515,7 +520,7 @@ static ssize_t ab3100_get_set_reg(struct file *file,
 		u8 reg = (u8) user_reg;
 		u8 regvalue;
 
-		ab3100_get_register(ab3100, reg, &regvalue);
+		ab3100_get_register_interruptible(ab3100, reg, &regvalue);
 
 		dev_info(ab3100->dev,
 			 "debug read AB3100 reg[0x%02x]: 0x%02x\n",
@@ -547,8 +552,8 @@ static ssize_t ab3100_get_set_reg(struct file *file,
 			return -EINVAL;
 
 		value = (u8) user_value;
-		ab3100_set_register(ab3100, reg, value);
-		ab3100_get_register(ab3100, reg, &regvalue);
+		ab3100_set_register_interruptible(ab3100, reg, value);
+		ab3100_get_register_interruptible(ab3100, reg, &regvalue);
 
 		dev_info(ab3100->dev,
 			 "debug write reg[0x%02x] with 0x%02x, "
@@ -696,7 +701,7 @@ static int __init ab3100_setup(struct ab3100 *ab3100)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(ab3100_init_settings); i++) {
-		err = ab3100_set_register(ab3100,
+		err = ab3100_set_register_interruptible(ab3100,
 					  ab3100_init_settings[i].abreg,
 					  ab3100_init_settings[i].setting);
 		if (err)
@@ -705,14 +710,14 @@ static int __init ab3100_setup(struct ab3100 *ab3100)
 
 	/*
 	 * Special trick to make the AB3100 use the 32kHz clock (RTC)
-	 * bit 3 in test registe 0x02 is a special, undocumented test
+	 * bit 3 in test register 0x02 is a special, undocumented test
 	 * register bit that only exist in AB3100 P1E
 	 */
 	if (ab3100->chip_id == 0xc4) {
 		dev_warn(ab3100->dev,
 			 "AB3100 P1E variant detected, "
 			 "forcing chip to 32KHz\n");
-		err = ab3100_set_test_register(ab3100, 0x02, 0x08);
+		err = ab3100_set_test_register_interruptible(ab3100, 0x02, 0x08);
 	}
 
  exit_no_setup:
@@ -852,8 +857,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 	i2c_set_clientdata(client, ab3100);
 
 	/* Read chip ID register */
-	err = ab3100_get_register(ab3100, AB3100_CID,
-				  &ab3100->chip_id);
+	err = ab3100_get_register_interruptible(ab3100, AB3100_CID,
+						&ab3100->chip_id);
 	if (err) {
 		dev_err(&client->dev,
 			"could not communicate with the AB3100 analog "
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 7a3f316e3848..56343b8013b5 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -86,11 +86,11 @@ struct ab3100 {
 	bool startup_events_read;
 };
 
-int ab3100_set_register(struct ab3100 *ab3100, u8 reg, u8 regval);
-int ab3100_get_register(struct ab3100 *ab3100, u8 reg, u8 *regval);
-int ab3100_get_register_page(struct ab3100 *ab3100,
+int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval);
+int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval);
+int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
 			     u8 first_reg, u8 *regvals, u8 numregs);
-int ab3100_mask_and_set_register(struct ab3100 *ab3100,
+int ab3100_mask_and_set_register_interruptible(struct ab3100 *ab3100,
 				 u8 reg, u8 andmask, u8 ormask);
 u8 ab3100_get_chip_type(struct ab3100 *ab3100);
 int ab3100_event_register(struct ab3100 *ab3100,
-- 
cgit v1.2.3


From 8238addcc52c94c59b10c3c1e9850d3a7921f825 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 19 Aug 2009 01:40:28 +0200
Subject: mfd: Add Freescale MC13783 driver

This driver provides the core Freescale MC13783 support. It
registers the client platform_devices and provides access
to the A/D converter.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig                 |  10 +
 drivers/mfd/Makefile                |   2 +
 drivers/mfd/mc13783-core.c          | 427 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783-private.h | 396 +++++++++++++++++++++++++++++++++
 include/linux/mfd/mc13783.h         |  84 +++++++
 5 files changed, 919 insertions(+)
 create mode 100644 drivers/mfd/mc13783-core.c
 create mode 100644 include/linux/mfd/mc13783-private.h
 create mode 100644 include/linux/mfd/mc13783.h

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 0273456af77f..a4f3dff30ba5 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -238,6 +238,16 @@ config MFD_PCF50633
 	  facilities, and registers devices for the various functions
 	  so that function-specific drivers can bind to them.
 
+config MFD_MC13783
+	tristate "Support Freescale MC13783"
+	depends on SPI_MASTER
+	select MFD_CORE
+	help
+	  Support for the Freescale (Atlas) MC13783 PMIC and audio CODEC.
+	  This driver provides common support for accessing  the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config PCF50633_ADC
 	tristate "Support for NXP PCF50633 ADC"
 	depends on MFD_PCF50633
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 285cb320e6a6..7fec04fb5f47 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
 
+obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
+
 obj-$(CONFIG_MFD_CORE)		+= mfd-core.o
 
 obj-$(CONFIG_EZX_PCAP)		+= ezx-pcap.o
diff --git a/drivers/mfd/mc13783-core.c b/drivers/mfd/mc13783-core.c
new file mode 100644
index 000000000000..e354d2912ef1
--- /dev/null
+++ b/drivers/mfd/mc13783-core.c
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This code is in parts based on wm8350-core.c and pcf50633-core.c
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/mfd/mc13783-private.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/mc13783.h>
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/core.h>
+#include <linux/spi/spi.h>
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/irq.h>
+
+#define MC13783_MAX_REG_NUM	0x3f
+#define MC13783_FRAME_MASK	0x00ffffff
+#define MC13783_MAX_REG_NUM	0x3f
+#define MC13783_REG_NUM_SHIFT	0x19
+#define MC13783_WRITE_BIT_SHIFT	31
+
+static inline int spi_rw(struct spi_device *spi, u8 * buf, size_t len)
+{
+	struct spi_transfer t = {
+		.tx_buf = (const void *)buf,
+		.rx_buf = buf,
+		.len = len,
+		.cs_change = 0,
+		.delay_usecs = 0,
+	};
+	struct spi_message m;
+
+	spi_message_init(&m);
+	spi_message_add_tail(&t, &m);
+	if (spi_sync(spi, &m) != 0 || m.status != 0)
+		return -EINVAL;
+	return len - m.actual_length;
+}
+
+static int mc13783_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
+{
+	unsigned int frame = 0;
+	int ret = 0;
+
+	if (reg_num > MC13783_MAX_REG_NUM)
+		return -EINVAL;
+
+	frame |= reg_num << MC13783_REG_NUM_SHIFT;
+
+	ret = spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+
+	*reg_val = frame & MC13783_FRAME_MASK;
+
+	return ret;
+}
+
+static int mc13783_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+{
+	unsigned int frame = 0;
+
+	if (reg_num > MC13783_MAX_REG_NUM)
+		return -EINVAL;
+
+	frame |= (1 << MC13783_WRITE_BIT_SHIFT);
+	frame |= reg_num << MC13783_REG_NUM_SHIFT;
+	frame |= reg_val & MC13783_FRAME_MASK;
+
+	return spi_rw(mc13783->spi_device, (u8 *)&frame, 4);
+}
+
+int mc13783_reg_read(struct mc13783 *mc13783, int reg_num, u32 *reg_val)
+{
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+	ret = mc13783_read(mc13783, reg_num, reg_val);
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_reg_read);
+
+int mc13783_reg_write(struct mc13783 *mc13783, int reg_num, u32 reg_val)
+{
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+	ret = mc13783_write(mc13783, reg_num, reg_val);
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_reg_write);
+
+/**
+ * mc13783_set_bits - Bitmask write
+ *
+ * @mc13783: Pointer to mc13783 control structure
+ * @reg:    Register to access
+ * @mask:   Mask of bits to change
+ * @val:    Value to set for masked bits
+ */
+int mc13783_set_bits(struct mc13783 *mc13783, int reg, u32 mask, u32 val)
+{
+	u32 tmp;
+	int ret;
+
+	mutex_lock(&mc13783->io_lock);
+
+	ret = mc13783_read(mc13783, reg, &tmp);
+	tmp = (tmp & ~mask) | val;
+	if (ret == 0)
+		ret = mc13783_write(mc13783, reg, tmp);
+
+	mutex_unlock(&mc13783->io_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mc13783_set_bits);
+
+int mc13783_register_irq(struct mc13783 *mc13783, int irq,
+		void (*handler) (int, void *), void *data)
+{
+	if (irq < 0 || irq > MC13783_NUM_IRQ || !handler)
+		return -EINVAL;
+
+	if (WARN_ON(mc13783->irq_handler[irq].handler))
+		return -EBUSY;
+
+	mutex_lock(&mc13783->io_lock);
+	mc13783->irq_handler[irq].handler = handler;
+	mc13783->irq_handler[irq].data = data;
+	mutex_unlock(&mc13783->io_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_register_irq);
+
+int mc13783_free_irq(struct mc13783 *mc13783, int irq)
+{
+	if (irq < 0 || irq > MC13783_NUM_IRQ)
+		return -EINVAL;
+
+	mutex_lock(&mc13783->io_lock);
+	mc13783->irq_handler[irq].handler = NULL;
+	mutex_unlock(&mc13783->io_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_free_irq);
+
+static void mc13783_irq_work(struct work_struct *work)
+{
+	struct mc13783 *mc13783 = container_of(work, struct mc13783, work);
+	int i;
+	unsigned int adc_sts;
+
+	/* check if the adc has finished any completion */
+	mc13783_reg_read(mc13783, MC13783_REG_INTERRUPT_STATUS_0, &adc_sts);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0,
+			adc_sts & MC13783_INT_STAT_ADCDONEI);
+
+	if (adc_sts & MC13783_INT_STAT_ADCDONEI)
+		complete_all(&mc13783->adc_done);
+
+	for (i = 0; i < MC13783_NUM_IRQ; i++)
+		if (mc13783->irq_handler[i].handler)
+			mc13783->irq_handler[i].handler(i,
+					mc13783->irq_handler[i].data);
+	enable_irq(mc13783->irq);
+}
+
+static irqreturn_t mc13783_interrupt(int irq, void *dev_id)
+{
+	struct mc13783 *mc13783 = dev_id;
+
+	disable_irq_nosync(irq);
+
+	schedule_work(&mc13783->work);
+	return IRQ_HANDLED;
+}
+
+/* set adc to ts interrupt mode, which generates touchscreen wakeup interrupt */
+static inline void mc13783_adc_set_ts_irq_mode(struct mc13783 *mc13783)
+{
+	unsigned int reg_adc0, reg_adc1;
+
+	reg_adc0 = MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
+			| MC13783_ADC0_TSMOD0;
+	reg_adc1 = MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN;
+
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+}
+
+int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+		unsigned int channel, unsigned int *sample)
+{
+	unsigned int reg_adc0, reg_adc1;
+	int i;
+
+	mutex_lock(&mc13783->adc_conv_lock);
+
+	/* set up auto incrementing anyway to make quick read */
+	reg_adc0 =  MC13783_ADC0_ADINC1 | MC13783_ADC0_ADINC2;
+	/* enable the adc, ignore external triggering and set ASC to trigger
+	 * conversion */
+	reg_adc1 =  MC13783_ADC1_ADEN | MC13783_ADC1_ADTRIGIGN
+		| MC13783_ADC1_ASC;
+
+	/* setup channel number */
+	if (channel > 7)
+		reg_adc1 |= MC13783_ADC1_ADSEL;
+
+	switch (mode) {
+	case MC13783_ADC_MODE_TS:
+		/* enables touch screen reference mode and set touchscreen mode
+		 * to position mode */
+		reg_adc0 |= MC13783_ADC0_ADREFEN | MC13783_ADC0_ADREFMODE
+			| MC13783_ADC0_TSMOD0 | MC13783_ADC0_TSMOD1;
+		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+	case MC13783_ADC_MODE_SINGLE_CHAN:
+		reg_adc1 |= (channel & 0x7) << MC13783_ADC1_CHAN0_SHIFT;
+		reg_adc1 |= MC13783_ADC1_RAND;
+		break;
+	case MC13783_ADC_MODE_MULT_CHAN:
+		reg_adc1 |= 4 << MC13783_ADC1_CHAN1_SHIFT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_0, reg_adc0);
+	mc13783_reg_write(mc13783, MC13783_REG_ADC_1, reg_adc1);
+
+	wait_for_completion_interruptible(&mc13783->adc_done);
+
+	for (i = 0; i < 4; i++)
+		mc13783_reg_read(mc13783, MC13783_REG_ADC_2, &sample[i]);
+
+	if (mc13783->ts_active)
+		mc13783_adc_set_ts_irq_mode(mc13783);
+
+	mutex_unlock(&mc13783->adc_conv_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mc13783_adc_do_conversion);
+
+void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status)
+{
+	mc13783->ts_active = status;
+}
+EXPORT_SYMBOL_GPL(mc13783_adc_set_ts_status);
+
+static int mc13783_check_revision(struct mc13783 *mc13783)
+{
+	u32 rev_id, rev1, rev2, finid, icid;
+
+	mc13783_read(mc13783, MC13783_REG_REVISION, &rev_id);
+
+	rev1 = (rev_id & 0x018) >> 3;
+	rev2 = (rev_id & 0x007);
+	icid = (rev_id & 0x01C0) >> 6;
+	finid = (rev_id & 0x01E00) >> 9;
+
+	/* Ver 0.2 is actually 3.2a.  Report as 3.2 */
+	if ((rev1 == 0) && (rev2 == 2))
+		rev1 = 3;
+
+	if (rev1 == 0 || icid != 2) {
+		dev_err(mc13783->dev, "No MC13783 detected.\n");
+		return -ENODEV;
+	}
+
+	mc13783->revision = ((rev1 * 10) + rev2);
+	dev_info(mc13783->dev, "MC13783 Rev %d.%d FinVer %x detected\n", rev1,
+	       rev2, finid);
+
+	return 0;
+}
+
+/*
+ * Register a client device.  This is non-fatal since there is no need to
+ * fail the entire device init due to a single platform device failing.
+ */
+static void mc13783_client_dev_register(struct mc13783 *mc13783,
+				       const char *name)
+{
+	struct mfd_cell cell = {};
+
+	cell.name = name;
+
+	mfd_add_devices(mc13783->dev, -1, &cell, 1, NULL, 0);
+}
+
+static int __devinit mc13783_probe(struct spi_device *spi)
+{
+	struct mc13783 *mc13783;
+	struct mc13783_platform_data *pdata = spi->dev.platform_data;
+	int ret;
+
+	mc13783 = kzalloc(sizeof(struct mc13783), GFP_KERNEL);
+	if (!mc13783)
+		return -ENOMEM;
+
+	dev_set_drvdata(&spi->dev, mc13783);
+	spi->mode = SPI_MODE_0 | SPI_CS_HIGH;
+	spi->bits_per_word = 32;
+	spi_setup(spi);
+
+	mc13783->spi_device = spi;
+	mc13783->dev = &spi->dev;
+	mc13783->irq = spi->irq;
+
+	INIT_WORK(&mc13783->work, mc13783_irq_work);
+	mutex_init(&mc13783->io_lock);
+	mutex_init(&mc13783->adc_conv_lock);
+	init_completion(&mc13783->adc_done);
+
+	if (pdata) {
+		mc13783->flags = pdata->flags;
+		mc13783->regulators = pdata->regulators;
+		mc13783->num_regulators = pdata->num_regulators;
+	}
+
+	if (mc13783_check_revision(mc13783)) {
+		ret = -ENODEV;
+		goto err_out;
+	}
+
+	/* clear and mask all interrupts */
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_0, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_0, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_STATUS_1, 0x00ffffff);
+	mc13783_reg_write(mc13783, MC13783_REG_INTERRUPT_MASK_1, 0x00ffffff);
+
+	/* unmask adcdone interrupts */
+	mc13783_set_bits(mc13783, MC13783_REG_INTERRUPT_MASK_0,
+			MC13783_INT_MASK_ADCDONEM, 0);
+
+	ret = request_irq(mc13783->irq, mc13783_interrupt,
+			IRQF_DISABLED | IRQF_TRIGGER_HIGH, "mc13783",
+			mc13783);
+	if (ret)
+		goto err_out;
+
+	if (mc13783->flags & MC13783_USE_CODEC)
+		mc13783_client_dev_register(mc13783, "mc13783-codec");
+	if (mc13783->flags & MC13783_USE_ADC)
+		mc13783_client_dev_register(mc13783, "mc13783-adc");
+	if (mc13783->flags & MC13783_USE_RTC)
+		mc13783_client_dev_register(mc13783, "mc13783-rtc");
+	if (mc13783->flags & MC13783_USE_REGULATOR)
+		mc13783_client_dev_register(mc13783, "mc13783-regulator");
+	if (mc13783->flags & MC13783_USE_TOUCHSCREEN)
+		mc13783_client_dev_register(mc13783, "mc13783-ts");
+
+	return 0;
+
+err_out:
+	kfree(mc13783);
+	return ret;
+}
+
+static int __devexit mc13783_remove(struct spi_device *spi)
+{
+	struct mc13783 *mc13783;
+
+	mc13783 = dev_get_drvdata(&spi->dev);
+
+	free_irq(mc13783->irq, mc13783);
+
+	mfd_remove_devices(&spi->dev);
+
+	return 0;
+}
+
+static struct spi_driver pmic_driver = {
+	.driver = {
+		   .name = "mc13783",
+		   .bus = &spi_bus_type,
+		   .owner = THIS_MODULE,
+	},
+	.probe = mc13783_probe,
+	.remove = __devexit_p(mc13783_remove),
+};
+
+static int __init pmic_init(void)
+{
+	return spi_register_driver(&pmic_driver);
+}
+subsys_initcall(pmic_init);
+
+static void __exit pmic_exit(void)
+{
+	spi_unregister_driver(&pmic_driver);
+}
+module_exit(pmic_exit);
+
+MODULE_DESCRIPTION("Core/Protocol driver for Freescale MC13783 PMIC");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_LICENSE("GPL");
+
diff --git a/include/linux/mfd/mc13783-private.h b/include/linux/mfd/mc13783-private.h
new file mode 100644
index 000000000000..47e698cb0f16
--- /dev/null
+++ b/include/linux/mfd/mc13783-private.h
@@ -0,0 +1,396 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __LINUX_MFD_MC13783_PRIV_H
+#define __LINUX_MFD_MC13783_PRIV_H
+
+#include <linux/platform_device.h>
+#include <linux/mfd/mc13783.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+struct mc13783_irq {
+	void (*handler)(int, void *);
+	void *data;
+};
+
+#define MC13783_NUM_IRQ		2
+#define MC13783_IRQ_TS		0
+#define MC13783_IRQ_REGULATOR	1
+
+#define MC13783_ADC_MODE_TS		1
+#define MC13783_ADC_MODE_SINGLE_CHAN	2
+#define MC13783_ADC_MODE_MULT_CHAN	3
+
+struct mc13783 {
+	int revision;
+	struct device *dev;
+	struct spi_device *spi_device;
+
+	int (*read_dev)(void *data, char reg, int count, u32 *dst);
+	int (*write_dev)(void *data, char reg, int count, const u32 *src);
+
+	struct mutex io_lock;
+	void *io_data;
+	int irq;
+	unsigned int flags;
+
+	struct mc13783_irq irq_handler[MC13783_NUM_IRQ];
+	struct work_struct work;
+	struct completion adc_done;
+	unsigned int ts_active;
+	struct mutex adc_conv_lock;
+
+	struct mc13783_regulator_init_data *regulators;
+	int num_regulators;
+};
+
+int mc13783_reg_read(struct mc13783 *, int reg_num, u32 *);
+int mc13783_reg_write(struct mc13783 *, int, u32);
+int mc13783_set_bits(struct mc13783 *, int, u32, u32);
+int mc13783_free_irq(struct mc13783 *mc13783, int irq);
+int mc13783_register_irq(struct mc13783 *mc13783, int irq,
+		void (*handler) (int, void *), void *data);
+
+#define MC13783_REG_INTERRUPT_STATUS_0		 0
+#define MC13783_REG_INTERRUPT_MASK_0		 1
+#define MC13783_REG_INTERRUPT_SENSE_0		 2
+#define MC13783_REG_INTERRUPT_STATUS_1		 3
+#define MC13783_REG_INTERRUPT_MASK_1		 4
+#define MC13783_REG_INTERRUPT_SENSE_1		 5
+#define MC13783_REG_POWER_UP_MODE_SENSE		 6
+#define MC13783_REG_REVISION			 7
+#define MC13783_REG_SEMAPHORE			 8
+#define MC13783_REG_ARBITRATION_PERIPHERAL_AUDIO 9
+#define MC13783_REG_ARBITRATION_SWITCHERS	10
+#define MC13783_REG_ARBITRATION_REGULATORS_0	11
+#define MC13783_REG_ARBITRATION_REGULATORS_1	12
+#define MC13783_REG_POWER_CONTROL_0		13
+#define MC13783_REG_POWER_CONTROL_1		14
+#define MC13783_REG_POWER_CONTROL_2		15
+#define MC13783_REG_REGEN_ASSIGNMENT		16
+#define MC13783_REG_CONTROL_SPARE		17
+#define MC13783_REG_MEMORY_A			18
+#define MC13783_REG_MEMORY_B			19
+#define MC13783_REG_RTC_TIME			20
+#define MC13783_REG_RTC_ALARM			21
+#define MC13783_REG_RTC_DAY			22
+#define MC13783_REG_RTC_DAY_ALARM		23
+#define MC13783_REG_SWITCHERS_0			24
+#define MC13783_REG_SWITCHERS_1			25
+#define MC13783_REG_SWITCHERS_2			26
+#define MC13783_REG_SWITCHERS_3			27
+#define MC13783_REG_SWITCHERS_4			28
+#define MC13783_REG_SWITCHERS_5			29
+#define MC13783_REG_REGULATOR_SETTING_0		30
+#define MC13783_REG_REGULATOR_SETTING_1		31
+#define MC13783_REG_REGULATOR_MODE_0		32
+#define MC13783_REG_REGULATOR_MODE_1		33
+#define MC13783_REG_POWER_MISCELLANEOUS		34
+#define MC13783_REG_POWER_SPARE			35
+#define MC13783_REG_AUDIO_RX_0			36
+#define MC13783_REG_AUDIO_RX_1			37
+#define MC13783_REG_AUDIO_TX			38
+#define MC13783_REG_AUDIO_SSI_NETWORK		39
+#define MC13783_REG_AUDIO_CODEC			40
+#define MC13783_REG_AUDIO_STEREO_DAC		41
+#define MC13783_REG_AUDIO_SPARE			42
+#define MC13783_REG_ADC_0			43
+#define MC13783_REG_ADC_1			44
+#define MC13783_REG_ADC_2			45
+#define MC13783_REG_ADC_3			46
+#define MC13783_REG_ADC_4			47
+#define MC13783_REG_CHARGER			48
+#define MC13783_REG_USB				49
+#define MC13783_REG_CHARGE_USB_SPARE		50
+#define MC13783_REG_LED_CONTROL_0		51
+#define MC13783_REG_LED_CONTROL_1		52
+#define MC13783_REG_LED_CONTROL_2		53
+#define MC13783_REG_LED_CONTROL_3		54
+#define MC13783_REG_LED_CONTROL_4		55
+#define MC13783_REG_LED_CONTROL_5		56
+#define MC13783_REG_SPARE			57
+#define MC13783_REG_TRIM_0			58
+#define MC13783_REG_TRIM_1			59
+#define MC13783_REG_TEST_0			60
+#define MC13783_REG_TEST_1			61
+#define MC13783_REG_TEST_2			62
+#define MC13783_REG_TEST_3			63
+#define MC13783_REG_NB				64
+
+
+/*
+ * Interrupt Status
+ */
+#define MC13783_INT_STAT_ADCDONEI	(1 << 0)
+#define MC13783_INT_STAT_ADCBISDONEI	(1 << 1)
+#define MC13783_INT_STAT_TSI		(1 << 2)
+#define MC13783_INT_STAT_WHIGHI		(1 << 3)
+#define MC13783_INT_STAT_WLOWI		(1 << 4)
+#define MC13783_INT_STAT_CHGDETI	(1 << 6)
+#define MC13783_INT_STAT_CHGOVI		(1 << 7)
+#define MC13783_INT_STAT_CHGREVI	(1 << 8)
+#define MC13783_INT_STAT_CHGSHORTI	(1 << 9)
+#define MC13783_INT_STAT_CCCVI		(1 << 10)
+#define MC13783_INT_STAT_CHGCURRI	(1 << 11)
+#define MC13783_INT_STAT_BPONI		(1 << 12)
+#define MC13783_INT_STAT_LOBATLI	(1 << 13)
+#define MC13783_INT_STAT_LOBATHI	(1 << 14)
+#define MC13783_INT_STAT_UDPI		(1 << 15)
+#define MC13783_INT_STAT_USBI		(1 << 16)
+#define MC13783_INT_STAT_IDI		(1 << 19)
+#define MC13783_INT_STAT_Unused		(1 << 20)
+#define MC13783_INT_STAT_SE1I		(1 << 21)
+#define MC13783_INT_STAT_CKDETI		(1 << 22)
+#define MC13783_INT_STAT_UDMI		(1 << 23)
+
+/*
+ * Interrupt Mask
+ */
+#define MC13783_INT_MASK_ADCDONEM	(1 << 0)
+#define MC13783_INT_MASK_ADCBISDONEM	(1 << 1)
+#define MC13783_INT_MASK_TSM		(1 << 2)
+#define MC13783_INT_MASK_WHIGHM		(1 << 3)
+#define MC13783_INT_MASK_WLOWM		(1 << 4)
+#define MC13783_INT_MASK_CHGDETM	(1 << 6)
+#define MC13783_INT_MASK_CHGOVM		(1 << 7)
+#define MC13783_INT_MASK_CHGREVM	(1 << 8)
+#define MC13783_INT_MASK_CHGSHORTM	(1 << 9)
+#define MC13783_INT_MASK_CCCVM		(1 << 10)
+#define MC13783_INT_MASK_CHGCURRM	(1 << 11)
+#define MC13783_INT_MASK_BPONM		(1 << 12)
+#define MC13783_INT_MASK_LOBATLM	(1 << 13)
+#define MC13783_INT_MASK_LOBATHM	(1 << 14)
+#define MC13783_INT_MASK_UDPM		(1 << 15)
+#define MC13783_INT_MASK_USBM		(1 << 16)
+#define MC13783_INT_MASK_IDM		(1 << 19)
+#define MC13783_INT_MASK_SE1M		(1 << 21)
+#define MC13783_INT_MASK_CKDETM		(1 << 22)
+
+/*
+ * Reg Regulator Mode 0
+ */
+#define MC13783_REGCTRL_VAUDIO_EN	(1 << 0)
+#define MC13783_REGCTRL_VAUDIO_STBY	(1 << 1)
+#define MC13783_REGCTRL_VAUDIO_MODE	(1 << 2)
+#define MC13783_REGCTRL_VIOHI_EN	(1 << 3)
+#define MC13783_REGCTRL_VIOHI_STBY	(1 << 4)
+#define MC13783_REGCTRL_VIOHI_MODE	(1 << 5)
+#define MC13783_REGCTRL_VIOLO_EN	(1 << 6)
+#define MC13783_REGCTRL_VIOLO_STBY 	(1 << 7)
+#define MC13783_REGCTRL_VIOLO_MODE	(1 << 8)
+#define MC13783_REGCTRL_VDIG_EN		(1 << 9)
+#define MC13783_REGCTRL_VDIG_STBY	(1 << 10)
+#define MC13783_REGCTRL_VDIG_MODE	(1 << 11)
+#define MC13783_REGCTRL_VGEN_EN		(1 << 12)
+#define MC13783_REGCTRL_VGEN_STBY	(1 << 13)
+#define MC13783_REGCTRL_VGEN_MODE	(1 << 14)
+#define MC13783_REGCTRL_VRFDIG_EN	(1 << 15)
+#define MC13783_REGCTRL_VRFDIG_STBY	(1 << 16)
+#define MC13783_REGCTRL_VRFDIG_MODE	(1 << 17)
+#define MC13783_REGCTRL_VRFREF_EN	(1 << 18)
+#define MC13783_REGCTRL_VRFREF_STBY	(1 << 19)
+#define MC13783_REGCTRL_VRFREF_MODE	(1 << 20)
+#define MC13783_REGCTRL_VRFCP_EN	(1 << 21)
+#define MC13783_REGCTRL_VRFCP_STBY	(1 << 22)
+#define MC13783_REGCTRL_VRFCP_MODE	(1 << 23)
+
+/*
+ * Reg Regulator Mode 1
+ */
+#define MC13783_REGCTRL_VSIM_EN		(1 << 0)
+#define MC13783_REGCTRL_VSIM_STBY	(1 << 1)
+#define MC13783_REGCTRL_VSIM_MODE	(1 << 2)
+#define MC13783_REGCTRL_VESIM_EN	(1 << 3)
+#define MC13783_REGCTRL_VESIM_STBY	(1 << 4)
+#define MC13783_REGCTRL_VESIM_MODE	(1 << 5)
+#define MC13783_REGCTRL_VCAM_EN		(1 << 6)
+#define MC13783_REGCTRL_VCAM_STBY	(1 << 7)
+#define MC13783_REGCTRL_VCAM_MODE	(1 << 8)
+#define	MC13783_REGCTRL_VRFBG_EN	(1 << 9)
+#define MC13783_REGCTRL_VRFBG_STBY	(1 << 10)
+#define MC13783_REGCTRL_VVIB_EN		(1 << 11)
+#define MC13783_REGCTRL_VRF1_EN		(1 << 12)
+#define MC13783_REGCTRL_VRF1_STBY	(1 << 13)
+#define MC13783_REGCTRL_VRF1_MODE	(1 << 14)
+#define MC13783_REGCTRL_VRF2_EN		(1 << 15)
+#define MC13783_REGCTRL_VRF2_STBY	(1 << 16)
+#define MC13783_REGCTRL_VRF2_MODE	(1 << 17)
+#define MC13783_REGCTRL_VMMC1_EN	(1 << 18)
+#define MC13783_REGCTRL_VMMC1_STBY	(1 << 19)
+#define MC13783_REGCTRL_VMMC1_MODE	(1 << 20)
+#define MC13783_REGCTRL_VMMC2_EN	(1 << 21)
+#define MC13783_REGCTRL_VMMC2_STBY	(1 << 22)
+#define MC13783_REGCTRL_VMMC2_MODE	(1 << 23)
+
+/*
+ * Reg Regulator Misc.
+ */
+#define MC13783_REGCTRL_GPO1_EN		(1 << 6)
+#define MC13783_REGCTRL_GPO2_EN		(1 << 8)
+#define MC13783_REGCTRL_GPO3_EN		(1 << 10)
+#define MC13783_REGCTRL_GPO4_EN		(1 << 12)
+#define MC13783_REGCTRL_VIBPINCTRL	(1 << 14)
+
+/*
+ * Reg Switcher 4
+ */
+#define MC13783_SWCTRL_SW1A_MODE	(1 << 0)
+#define MC13783_SWCTRL_SW1A_STBY_MODE	(1 << 2)
+#define MC13783_SWCTRL_SW1A_DVS_SPEED	(1 << 6)
+#define MC13783_SWCTRL_SW1A_PANIC_MODE	(1 << 8)
+#define MC13783_SWCTRL_SW1A_SOFTSTART	(1 << 9)
+#define MC13783_SWCTRL_SW1B_MODE	(1 << 10)
+#define MC13783_SWCTRL_SW1B_STBY_MODE	(1 << 12)
+#define MC13783_SWCTRL_SW1B_DVS_SPEED	(1 << 14)
+#define MC13783_SWCTRL_SW1B_PANIC_MODE	(1 << 16)
+#define MC13783_SWCTRL_SW1B_SOFTSTART	(1 << 17)
+#define MC13783_SWCTRL_PLL_EN		(1 << 18)
+#define MC13783_SWCTRL_PLL_FACTOR	(1 << 19)
+
+/*
+ * Reg Switcher 5
+ */
+#define MC13783_SWCTRL_SW2A_MODE	(1 << 0)
+#define MC13783_SWCTRL_SW2A_STBY_MODE	(1 << 2)
+#define MC13783_SWCTRL_SW2A_DVS_SPEED	(1 << 6)
+#define MC13783_SWCTRL_SW2A_PANIC_MODE	(1 << 8)
+#define MC13783_SWCTRL_SW2A_SOFTSTART	(1 << 9)
+#define MC13783_SWCTRL_SW2B_MODE	(1 << 10)
+#define MC13783_SWCTRL_SW2B_STBY_MODE	(1 << 12)
+#define MC13783_SWCTRL_SW2B_DVS_SPEED	(1 << 14)
+#define MC13783_SWCTRL_SW2B_PANIC_MODE	(1 << 16)
+#define MC13783_SWCTRL_SW2B_SOFTSTART	(1 << 17)
+#define MC13783_SWSET_SW3		(1 << 18)
+#define MC13783_SWCTRL_SW3_EN		(1 << 20)
+#define MC13783_SWCTRL_SW3_STBY		(1 << 21)
+#define MC13783_SWCTRL_SW3_MODE		(1 << 22)
+
+/*
+ * ADC/Touch
+ */
+#define MC13783_ADC0_LICELLCON		(1 << 0)
+#define MC13783_ADC0_CHRGICON		(1 << 1)
+#define MC13783_ADC0_BATICON		(1 << 2)
+#define MC13783_ADC0_RTHEN 		(1 << 3)
+#define MC13783_ADC0_DTHEN		(1 << 4)
+#define MC13783_ADC0_UIDEN		(1 << 5)
+#define MC13783_ADC0_ADOUTEN 		(1 << 6)
+#define MC13783_ADC0_ADOUTPER		(1 << 7)
+#define MC13783_ADC0_ADREFEN		(1 << 10)
+#define MC13783_ADC0_ADREFMODE		(1 << 11)
+#define MC13783_ADC0_TSMOD0		(1 << 12)
+#define MC13783_ADC0_TSMOD1		(1 << 13)
+#define MC13783_ADC0_TSMOD2		(1 << 14)
+#define MC13783_ADC0_CHRGRAWDIV		(1 << 15)
+#define MC13783_ADC0_ADINC1		(1 << 16)
+#define MC13783_ADC0_ADINC2		(1 << 17)
+#define MC13783_ADC0_WCOMP		(1 << 18)
+#define MC13783_ADC0_ADCBIS0		(1 << 23)
+
+#define MC13783_ADC1_ADEN		(1 << 0)
+#define MC13783_ADC1_RAND		(1 << 1)
+#define MC13783_ADC1_ADSEL		(1 << 3)
+#define MC13783_ADC1_TRIGMASK		(1 << 4)
+#define MC13783_ADC1_ADA10		(1 << 5)
+#define MC13783_ADC1_ADA11		(1 << 6)
+#define MC13783_ADC1_ADA12		(1 << 7)
+#define MC13783_ADC1_ADA20		(1 << 8)
+#define MC13783_ADC1_ADA21		(1 << 9)
+#define MC13783_ADC1_ADA22		(1 << 10)
+#define MC13783_ADC1_ATO0		(1 << 11)
+#define MC13783_ADC1_ATO1		(1 << 12)
+#define MC13783_ADC1_ATO2		(1 << 13)
+#define MC13783_ADC1_ATO3		(1 << 14)
+#define MC13783_ADC1_ATO4		(1 << 15)
+#define MC13783_ADC1_ATO5		(1 << 16)
+#define MC13783_ADC1_ATO6		(1 << 17)
+#define MC13783_ADC1_ATO7		(1 << 18)
+#define MC13783_ADC1_ATOX		(1 << 19)
+#define MC13783_ADC1_ASC		(1 << 20)
+#define MC13783_ADC1_ADTRIGIGN		(1 << 21)
+#define MC13783_ADC1_ADONESHOT		(1 << 22)
+#define MC13783_ADC1_ADCBIS1		(1 << 23)
+
+#define MC13783_ADC1_CHAN0_SHIFT	5
+#define MC13783_ADC1_CHAN1_SHIFT	8
+
+#define MC13783_ADC2_ADD10		(1 << 2)
+#define MC13783_ADC2_ADD11		(1 << 3)
+#define MC13783_ADC2_ADD12		(1 << 4)
+#define MC13783_ADC2_ADD13		(1 << 5)
+#define MC13783_ADC2_ADD14		(1 << 6)
+#define MC13783_ADC2_ADD15		(1 << 7)
+#define MC13783_ADC2_ADD16		(1 << 8)
+#define MC13783_ADC2_ADD17		(1 << 9)
+#define MC13783_ADC2_ADD18		(1 << 10)
+#define MC13783_ADC2_ADD19		(1 << 11)
+#define MC13783_ADC2_ADD20		(1 << 14)
+#define MC13783_ADC2_ADD21		(1 << 15)
+#define MC13783_ADC2_ADD22		(1 << 16)
+#define MC13783_ADC2_ADD23		(1 << 17)
+#define MC13783_ADC2_ADD24		(1 << 18)
+#define MC13783_ADC2_ADD25		(1 << 19)
+#define MC13783_ADC2_ADD26		(1 << 20)
+#define MC13783_ADC2_ADD27		(1 << 21)
+#define MC13783_ADC2_ADD28		(1 << 22)
+#define MC13783_ADC2_ADD29		(1 << 23)
+
+#define MC13783_ADC3_WHIGH0		(1 << 0)
+#define MC13783_ADC3_WHIGH1		(1 << 1)
+#define MC13783_ADC3_WHIGH2		(1 << 2)
+#define MC13783_ADC3_WHIGH3		(1 << 3)
+#define MC13783_ADC3_WHIGH4		(1 << 4)
+#define MC13783_ADC3_WHIGH5		(1 << 5)
+#define MC13783_ADC3_ICID0		(1 << 6)
+#define MC13783_ADC3_ICID1		(1 << 7)
+#define MC13783_ADC3_ICID2		(1 << 8)
+#define MC13783_ADC3_WLOW0		(1 << 9)
+#define MC13783_ADC3_WLOW1		(1 << 10)
+#define MC13783_ADC3_WLOW2		(1 << 11)
+#define MC13783_ADC3_WLOW3		(1 << 12)
+#define MC13783_ADC3_WLOW4		(1 << 13)
+#define MC13783_ADC3_WLOW5		(1 << 14)
+#define MC13783_ADC3_ADCBIS2		(1 << 23)
+
+#define MC13783_ADC4_ADDBIS10		(1 << 2)
+#define MC13783_ADC4_ADDBIS11		(1 << 3)
+#define MC13783_ADC4_ADDBIS12		(1 << 4)
+#define MC13783_ADC4_ADDBIS13		(1 << 5)
+#define MC13783_ADC4_ADDBIS14		(1 << 6)
+#define MC13783_ADC4_ADDBIS15		(1 << 7)
+#define MC13783_ADC4_ADDBIS16		(1 << 8)
+#define MC13783_ADC4_ADDBIS17		(1 << 9)
+#define MC13783_ADC4_ADDBIS18		(1 << 10)
+#define MC13783_ADC4_ADDBIS19		(1 << 11)
+#define MC13783_ADC4_ADDBIS20		(1 << 14)
+#define MC13783_ADC4_ADDBIS21		(1 << 15)
+#define MC13783_ADC4_ADDBIS22		(1 << 16)
+#define MC13783_ADC4_ADDBIS23		(1 << 17)
+#define MC13783_ADC4_ADDBIS24		(1 << 18)
+#define MC13783_ADC4_ADDBIS25		(1 << 19)
+#define MC13783_ADC4_ADDBIS26		(1 << 20)
+#define MC13783_ADC4_ADDBIS27		(1 << 21)
+#define MC13783_ADC4_ADDBIS28		(1 << 22)
+#define MC13783_ADC4_ADDBIS29		(1 << 23)
+
+#endif /* __LINUX_MFD_MC13783_PRIV_H */
+
diff --git a/include/linux/mfd/mc13783.h b/include/linux/mfd/mc13783.h
new file mode 100644
index 000000000000..b3a2a7243573
--- /dev/null
+++ b/include/linux/mfd/mc13783.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * Initial development of this code was funded by
+ * Phytec Messtechnik GmbH, http://www.phytec.de
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __INCLUDE_LINUX_MFD_MC13783_H
+#define __INCLUDE_LINUX_MFD_MC13783_H
+
+struct mc13783;
+struct regulator_init_data;
+
+struct mc13783_regulator_init_data {
+	int id;
+	struct regulator_init_data *init_data;
+};
+
+struct mc13783_platform_data {
+	struct mc13783_regulator_init_data *regulators;
+	int num_regulators;
+	unsigned int flags;
+};
+
+/* mc13783_platform_data flags */
+#define MC13783_USE_TOUCHSCREEN (1 << 0)
+#define MC13783_USE_CODEC	(1 << 1)
+#define MC13783_USE_ADC		(1 << 2)
+#define MC13783_USE_RTC		(1 << 3)
+#define MC13783_USE_REGULATOR	(1 << 4)
+
+int mc13783_adc_do_conversion(struct mc13783 *mc13783, unsigned int mode,
+		unsigned int channel, unsigned int *sample);
+
+void mc13783_adc_set_ts_status(struct mc13783 *mc13783, unsigned int status);
+
+#define	MC13783_SW_SW1A		0
+#define	MC13783_SW_SW1B		1
+#define	MC13783_SW_SW2A		2
+#define	MC13783_SW_SW2B		3
+#define	MC13783_SW_SW3		4
+#define	MC13783_SW_PLL		5
+#define	MC13783_REGU_VAUDIO	6
+#define	MC13783_REGU_VIOHI	7
+#define	MC13783_REGU_VIOLO	8
+#define	MC13783_REGU_VDIG	9
+#define	MC13783_REGU_VGEN	10
+#define	MC13783_REGU_VRFDIG	11
+#define	MC13783_REGU_VRFREF	12
+#define	MC13783_REGU_VRFCP	13
+#define	MC13783_REGU_VSIM	14
+#define	MC13783_REGU_VESIM	15
+#define	MC13783_REGU_VCAM	16
+#define	MC13783_REGU_VRFBG	17
+#define	MC13783_REGU_VVIB	18
+#define	MC13783_REGU_VRF1	19
+#define	MC13783_REGU_VRF2	20
+#define	MC13783_REGU_VMMC1	21
+#define	MC13783_REGU_VMMC2	22
+#define	MC13783_REGU_GPO1	23
+#define	MC13783_REGU_GPO2	24
+#define	MC13783_REGU_GPO3	25
+#define	MC13783_REGU_GPO4	26
+#define	MC13783_REGU_V1		27
+#define	MC13783_REGU_V2		28
+#define	MC13783_REGU_V3		29
+#define	MC13783_REGU_V4		30
+
+#endif /* __INCLUDE_LINUX_MFD_MC13783_H */
+
-- 
cgit v1.2.3


From ebf0bd366ed8161e6fbc919705d878ccbfd51624 Mon Sep 17 00:00:00 2001
From: Amit Kucheria <amit.kucheria@verdurent.com>
Date: Mon, 31 Aug 2009 18:32:18 +0200
Subject: mfd: Add support for TWL4030/5030 dynamic power switching

The TWL4030/5030 family of multifunction devices allows board-specific
control of the the various regulators, clock and reset lines through
'scripts' that are loaded into its memory. This allows for Dynamic Power
Switching (DPS).

Implement board-independent core support for DPS that is then used by
board-specific code to load custom DPS scripts.

Signed-off-by: Amit Kucheria <amit.kucheria@verdurent.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig         |  13 ++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/twl4030-core.c  |  10 +
 drivers/mfd/twl4030-power.c | 466 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/twl4030.h |  94 ++++++++-
 5 files changed, 574 insertions(+), 10 deletions(-)
 create mode 100644 drivers/mfd/twl4030-power.c

(limited to 'include')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 4fd4f913c36b..570be139f9df 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -108,6 +108,19 @@ config TWL4030_CORE
 	  high speed USB OTG transceiver, an audio codec (on most
 	  versions) and many other features.
 
+config TWL4030_POWER
+	bool "Support power resources on TWL4030 family chips"
+	depends on TWL4030_CORE && ARM
+	help
+	  Say yes here if you want to use the power resources on the
+	  TWL4030 family chips.  Most of these resources are regulators,
+	  which have a separate driver; some are control signals, such
+	  as clock request handshaking.
+
+	  This driver uses board-specific data to initialize the resources
+	  and load scripts controling which resources are switched off/on
+	  or reset when a sleep, wakeup or warm reset event occurs.
+
 config MFD_TMIO
 	bool
 	default n
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 6a5d8cb545fc..f3b277b90d40 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
 
 obj-$(CONFIG_TWL4030_CORE)	+= twl4030-core.o twl4030-irq.o
+obj-$(CONFIG_TWL4030_POWER)    += twl4030-power.o
 
 obj-$(CONFIG_MFD_MC13783)	+= mc13783-core.o
 
diff --git a/drivers/mfd/twl4030-core.c b/drivers/mfd/twl4030-core.c
index 1fd2620819d0..e424cf6d8e9e 100644
--- a/drivers/mfd/twl4030-core.c
+++ b/drivers/mfd/twl4030-core.c
@@ -89,6 +89,12 @@
 #define twl_has_madc()	false
 #endif
 
+#ifdef CONFIG_TWL4030_POWER
+#define twl_has_power()        true
+#else
+#define twl_has_power()        false
+#endif
+
 #if defined(CONFIG_RTC_DRV_TWL4030) || defined(CONFIG_RTC_DRV_TWL4030_MODULE)
 #define twl_has_rtc()	true
 #else
@@ -801,6 +807,10 @@ twl4030_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	/* setup clock framework */
 	clocks_init(&client->dev);
 
+	/* load power event scripts */
+	if (twl_has_power() && pdata->power)
+		twl4030_power_init(pdata->power);
+
 	/* Maybe init the T2 Interrupt subsystem */
 	if (client->irq
 			&& pdata->irq_base
diff --git a/drivers/mfd/twl4030-power.c b/drivers/mfd/twl4030-power.c
new file mode 100644
index 000000000000..e7688b041264
--- /dev/null
+++ b/drivers/mfd/twl4030-power.c
@@ -0,0 +1,466 @@
+/*
+ * linux/drivers/i2c/chips/twl4030-power.c
+ *
+ * Handle TWL4030 Power initialization
+ *
+ * Copyright (C) 2008 Nokia Corporation
+ * Copyright (C) 2006 Texas Instruments, Inc
+ *
+ * Written by 	Kalle Jokiniemi
+ *		Peter De Schrijver <peter.de-schrijver@nokia.com>
+ * Several fixes by Amit Kucheria <amit.kucheria@verdurent.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/module.h>
+#include <linux/pm.h>
+#include <linux/i2c/twl4030.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-types.h>
+
+static u8 twl4030_start_script_address = 0x2b;
+
+#define PWR_P1_SW_EVENTS	0x10
+#define PWR_DEVOFF	(1<<0)
+
+#define PHY_TO_OFF_PM_MASTER(p)		(p - 0x36)
+#define PHY_TO_OFF_PM_RECEIVER(p)	(p - 0x5b)
+
+/* resource - hfclk */
+#define R_HFCLKOUT_DEV_GRP 	PHY_TO_OFF_PM_RECEIVER(0xe6)
+
+/* PM events */
+#define R_P1_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x46)
+#define R_P2_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x47)
+#define R_P3_SW_EVENTS		PHY_TO_OFF_PM_MASTER(0x48)
+#define R_CFG_P1_TRANSITION	PHY_TO_OFF_PM_MASTER(0x36)
+#define R_CFG_P2_TRANSITION	PHY_TO_OFF_PM_MASTER(0x37)
+#define R_CFG_P3_TRANSITION	PHY_TO_OFF_PM_MASTER(0x38)
+
+#define LVL_WAKEUP	0x08
+
+#define ENABLE_WARMRESET (1<<4)
+
+#define END_OF_SCRIPT		0x3f
+
+#define R_SEQ_ADD_A2S		PHY_TO_OFF_PM_MASTER(0x55)
+#define R_SEQ_ADD_S2A12		PHY_TO_OFF_PM_MASTER(0x56)
+#define	R_SEQ_ADD_S2A3		PHY_TO_OFF_PM_MASTER(0x57)
+#define	R_SEQ_ADD_WARM		PHY_TO_OFF_PM_MASTER(0x58)
+#define R_MEMORY_ADDRESS	PHY_TO_OFF_PM_MASTER(0x59)
+#define R_MEMORY_DATA		PHY_TO_OFF_PM_MASTER(0x5a)
+
+#define R_PROTECT_KEY		0x0E
+#define KEY_1			0xC0
+#define KEY_2			0x0C
+
+/* resource configuration registers */
+
+#define DEVGROUP_OFFSET		0
+#define TYPE_OFFSET		1
+
+/* Bit positions */
+#define DEVGROUP_SHIFT		5
+#define DEVGROUP_MASK		(7 << DEVGROUP_SHIFT)
+#define TYPE_SHIFT		0
+#define TYPE_MASK		(7 << TYPE_SHIFT)
+#define TYPE2_SHIFT		3
+#define TYPE2_MASK		(3 << TYPE2_SHIFT)
+
+static u8 res_config_addrs[] = {
+	[RES_VAUX1]	= 0x17,
+	[RES_VAUX2]	= 0x1b,
+	[RES_VAUX3]	= 0x1f,
+	[RES_VAUX4]	= 0x23,
+	[RES_VMMC1]	= 0x27,
+	[RES_VMMC2]	= 0x2b,
+	[RES_VPLL1]	= 0x2f,
+	[RES_VPLL2]	= 0x33,
+	[RES_VSIM]	= 0x37,
+	[RES_VDAC]	= 0x3b,
+	[RES_VINTANA1]	= 0x3f,
+	[RES_VINTANA2]	= 0x43,
+	[RES_VINTDIG]	= 0x47,
+	[RES_VIO]	= 0x4b,
+	[RES_VDD1]	= 0x55,
+	[RES_VDD2]	= 0x63,
+	[RES_VUSB_1V5]	= 0x71,
+	[RES_VUSB_1V8]	= 0x74,
+	[RES_VUSB_3V1]	= 0x77,
+	[RES_VUSBCP]	= 0x7a,
+	[RES_REGEN]	= 0x7f,
+	[RES_NRES_PWRON] = 0x82,
+	[RES_CLKEN]	= 0x85,
+	[RES_SYSEN]	= 0x88,
+	[RES_HFCLKOUT]	= 0x8b,
+	[RES_32KCLKOUT]	= 0x8e,
+	[RES_RESET]	= 0x91,
+	[RES_Main_Ref]	= 0x94,
+};
+
+static int __init twl4030_write_script_byte(u8 address, u8 byte)
+{
+	int err;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_MEMORY_ADDRESS);
+	if (err)
+		goto out;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, byte,
+				R_MEMORY_DATA);
+out:
+	return err;
+}
+
+static int __init twl4030_write_script_ins(u8 address, u16 pmb_message,
+					   u8 delay, u8 next)
+{
+	int err;
+
+	address *= 4;
+	err = twl4030_write_script_byte(address++, pmb_message >> 8);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, pmb_message & 0xff);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, delay);
+	if (err)
+		goto out;
+	err = twl4030_write_script_byte(address++, next);
+out:
+	return err;
+}
+
+static int __init twl4030_write_script(u8 address, struct twl4030_ins *script,
+				       int len)
+{
+	int err;
+
+	for (; len; len--, address++, script++) {
+		if (len == 1) {
+			err = twl4030_write_script_ins(address,
+						script->pmb_message,
+						script->delay,
+						END_OF_SCRIPT);
+			if (err)
+				break;
+		} else {
+			err = twl4030_write_script_ins(address,
+						script->pmb_message,
+						script->delay,
+						address + 1);
+			if (err)
+				break;
+		}
+	}
+	return err;
+}
+
+static int __init twl4030_config_wakeup3_sequence(u8 address)
+{
+	int err;
+	u8 data;
+
+	/* Set SLEEP to ACTIVE SEQ address for P3 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_S2A3);
+	if (err)
+		goto out;
+
+	/* P3 LVL_WAKEUP should be on LEVEL */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P3_SW_EVENTS);
+	if (err)
+		goto out;
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P3_SW_EVENTS);
+out:
+	if (err)
+		pr_err("TWL4030 wakeup sequence for P3 config error\n");
+	return err;
+}
+
+static int __init twl4030_config_wakeup12_sequence(u8 address)
+{
+	int err = 0;
+	u8 data;
+
+	/* Set SLEEP to ACTIVE SEQ address for P1 and P2 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_S2A12);
+	if (err)
+		goto out;
+
+	/* P1/P2 LVL_WAKEUP should be on LEVEL */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	data |= LVL_WAKEUP;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	if (machine_is_omap_3430sdp() || machine_is_omap_ldp()) {
+		/* Disabling AC charger effect on sleep-active transitions */
+		err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &data,
+					R_CFG_P1_TRANSITION);
+		if (err)
+			goto out;
+		data &= ~(1<<1);
+		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, data ,
+					R_CFG_P1_TRANSITION);
+		if (err)
+			goto out;
+	}
+
+out:
+	if (err)
+		pr_err("TWL4030 wakeup sequence for P1 and P2" \
+			"config error\n");
+	return err;
+}
+
+static int __init twl4030_config_sleep_sequence(u8 address)
+{
+	int err;
+
+	/* Set ACTIVE to SLEEP SEQ address in T2 memory*/
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_A2S);
+
+	if (err)
+		pr_err("TWL4030 sleep sequence config error\n");
+
+	return err;
+}
+
+static int __init twl4030_config_warmreset_sequence(u8 address)
+{
+	int err;
+	u8 rd_data;
+
+	/* Set WARM RESET SEQ address for P1 */
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, address,
+				R_SEQ_ADD_WARM);
+	if (err)
+		goto out;
+
+	/* P1/P2/P3 enable WARMRESET */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P1_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P2_SW_EVENTS);
+	if (err)
+		goto out;
+
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_MASTER, &rd_data,
+				R_P3_SW_EVENTS);
+	if (err)
+		goto out;
+
+	rd_data |= ENABLE_WARMRESET;
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, rd_data,
+				R_P3_SW_EVENTS);
+out:
+	if (err)
+		pr_err("TWL4030 warmreset seq config error\n");
+	return err;
+}
+
+static int __init twl4030_configure_resource(struct twl4030_resconfig *rconfig)
+{
+	int rconfig_addr;
+	int err;
+	u8 type;
+	u8 grp;
+
+	if (rconfig->resource > TOTAL_RESOURCES) {
+		pr_err("TWL4030 Resource %d does not exist\n",
+			rconfig->resource);
+		return -EINVAL;
+	}
+
+	rconfig_addr = res_config_addrs[rconfig->resource];
+
+	/* Set resource group */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &grp,
+				rconfig_addr + DEVGROUP_OFFSET);
+	if (err) {
+		pr_err("TWL4030 Resource %d group could not be read\n",
+			rconfig->resource);
+		return err;
+	}
+
+	if (rconfig->devgroup >= 0) {
+		grp &= ~DEVGROUP_MASK;
+		grp |= rconfig->devgroup << DEVGROUP_SHIFT;
+		err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+					grp, rconfig_addr + DEVGROUP_OFFSET);
+		if (err < 0) {
+			pr_err("TWL4030 failed to program devgroup\n");
+			return err;
+		}
+	}
+
+	/* Set resource types */
+	err = twl4030_i2c_read_u8(TWL4030_MODULE_PM_RECEIVER, &type,
+				rconfig_addr + TYPE_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 Resource %d type could not be read\n",
+			rconfig->resource);
+		return err;
+	}
+
+	if (rconfig->type >= 0) {
+		type &= ~TYPE_MASK;
+		type |= rconfig->type << TYPE_SHIFT;
+	}
+
+	if (rconfig->type2 >= 0) {
+		type &= ~TYPE2_MASK;
+		type |= rconfig->type2 << TYPE2_SHIFT;
+	}
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_RECEIVER,
+				type, rconfig_addr + TYPE_OFFSET);
+	if (err < 0) {
+		pr_err("TWL4030 failed to program resource type\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int __init load_twl4030_script(struct twl4030_script *tscript,
+	       u8 address)
+{
+	int err;
+
+	/* Make sure the script isn't going beyond last valid address (0x3f) */
+	if ((address + tscript->size) > END_OF_SCRIPT) {
+		pr_err("TWL4030 scripts too big error\n");
+		return -EINVAL;
+	}
+
+	err = twl4030_write_script(address, tscript->script, tscript->size);
+	if (err)
+		goto out;
+
+	if (tscript->flags & TWL4030_WRST_SCRIPT) {
+		err = twl4030_config_warmreset_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_WAKEUP12_SCRIPT) {
+		err = twl4030_config_wakeup12_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_WAKEUP3_SCRIPT) {
+		err = twl4030_config_wakeup3_sequence(address);
+		if (err)
+			goto out;
+	}
+	if (tscript->flags & TWL4030_SLEEP_SCRIPT)
+		err = twl4030_config_sleep_sequence(address);
+out:
+	return err;
+}
+
+void __init twl4030_power_init(struct twl4030_power_data *twl4030_scripts)
+{
+	int err = 0;
+	int i;
+	struct twl4030_resconfig *resconfig;
+	u8 address = twl4030_start_script_address;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_1,
+				R_PROTECT_KEY);
+	if (err)
+		goto unlock;
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, KEY_2,
+				R_PROTECT_KEY);
+	if (err)
+		goto unlock;
+
+	for (i = 0; i < twl4030_scripts->num; i++) {
+		err = load_twl4030_script(twl4030_scripts->scripts[i], address);
+		if (err)
+			goto load;
+		address += twl4030_scripts->scripts[i]->size;
+	}
+
+	resconfig = twl4030_scripts->resource_config;
+	if (resconfig) {
+		while (resconfig->resource) {
+			err = twl4030_configure_resource(resconfig);
+			if (err)
+				goto resource;
+			resconfig++;
+
+		}
+	}
+
+	err = twl4030_i2c_write_u8(TWL4030_MODULE_PM_MASTER, 0, R_PROTECT_KEY);
+	if (err)
+		pr_err("TWL4030 Unable to relock registers\n");
+	return;
+
+unlock:
+	if (err)
+		pr_err("TWL4030 Unable to unlock registers\n");
+	return;
+load:
+	if (err)
+		pr_err("TWL4030 failed to load scripts\n");
+	return;
+resource:
+	if (err)
+		pr_err("TWL4030 failed to configure resource\n");
+	return;
+}
diff --git a/include/linux/i2c/twl4030.h b/include/linux/i2c/twl4030.h
index 3fd21d7cb6bf..2d02dfd7076c 100644
--- a/include/linux/i2c/twl4030.h
+++ b/include/linux/i2c/twl4030.h
@@ -223,19 +223,28 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 
 /* Power bus message definitions */
 
-#define DEV_GRP_NULL		0x0
-#define DEV_GRP_P1		0x1
-#define DEV_GRP_P2		0x2
-#define DEV_GRP_P3		0x4
+/* The TWL4030/5030 splits its power-management resources (the various
+ * regulators, clock and reset lines) into 3 processor groups - P1, P2 and
+ * P3. These groups can then be configured to transition between sleep, wait-on
+ * and active states by sending messages to the power bus.  See Section 5.4.2
+ * Power Resources of TWL4030 TRM
+ */
 
-#define RES_GRP_RES		0x0
-#define RES_GRP_PP		0x1
-#define RES_GRP_RC		0x2
+/* Processor groups */
+#define DEV_GRP_NULL		0x0
+#define DEV_GRP_P1		0x1	/* P1: all OMAP devices */
+#define DEV_GRP_P2		0x2	/* P2: all Modem devices */
+#define DEV_GRP_P3		0x4	/* P3: all peripheral devices */
+
+/* Resource groups */
+#define RES_GRP_RES		0x0	/* Reserved */
+#define RES_GRP_PP		0x1	/* Power providers */
+#define RES_GRP_RC		0x2	/* Reset and control */
 #define RES_GRP_PP_RC		0x3
-#define RES_GRP_PR		0x4
+#define RES_GRP_PR		0x4	/* Power references */
 #define RES_GRP_PP_PR		0x5
 #define RES_GRP_RC_PR		0x6
-#define RES_GRP_ALL		0x7
+#define RES_GRP_ALL		0x7	/* All resource groups */
 
 #define RES_TYPE2_R0		0x0
 
@@ -246,6 +255,41 @@ int twl4030_i2c_read(u8 mod_no, u8 *value, u8 reg, unsigned num_bytes);
 #define RES_STATE_SLEEP		0x8
 #define RES_STATE_OFF		0x0
 
+/* Power resources */
+
+/* Power providers */
+#define RES_VAUX1               1
+#define RES_VAUX2               2
+#define RES_VAUX3               3
+#define RES_VAUX4               4
+#define RES_VMMC1               5
+#define RES_VMMC2               6
+#define RES_VPLL1               7
+#define RES_VPLL2               8
+#define RES_VSIM                9
+#define RES_VDAC                10
+#define RES_VINTANA1            11
+#define RES_VINTANA2            12
+#define RES_VINTDIG             13
+#define RES_VIO                 14
+#define RES_VDD1                15
+#define RES_VDD2                16
+#define RES_VUSB_1V5            17
+#define RES_VUSB_1V8            18
+#define RES_VUSB_3V1            19
+#define RES_VUSBCP              20
+#define RES_REGEN               21
+/* Reset and control */
+#define RES_NRES_PWRON          22
+#define RES_CLKEN               23
+#define RES_SYSEN               24
+#define RES_HFCLKOUT            25
+#define RES_32KCLKOUT           26
+#define RES_RESET               27
+/* Power Reference */
+#define RES_Main_Ref            28
+
+#define TOTAL_RESOURCES		28
 /*
  * Power Bus Message Format ... these can be sent individually by Linux,
  * but are usually part of downloaded scripts that are run when various
@@ -327,6 +371,36 @@ struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
 };
 
+struct twl4030_ins {
+	u16 pmb_message;
+	u8 delay;
+};
+
+struct twl4030_script {
+	struct twl4030_ins *script;
+	unsigned size;
+	u8 flags;
+#define TWL4030_WRST_SCRIPT	(1<<0)
+#define TWL4030_WAKEUP12_SCRIPT	(1<<1)
+#define TWL4030_WAKEUP3_SCRIPT	(1<<2)
+#define TWL4030_SLEEP_SCRIPT	(1<<3)
+};
+
+struct twl4030_resconfig {
+	u8 resource;
+	u8 devgroup;	/* Processor group that Power resource belongs to */
+	u8 type;	/* Power resource addressed, 6 / broadcast message */
+	u8 type2;	/* Power resource addressed, 3 / broadcast message */
+};
+
+struct twl4030_power_data {
+	struct twl4030_script **scripts;
+	unsigned num;
+	struct twl4030_resconfig *resource_config;
+};
+
+extern void twl4030_power_init(struct twl4030_power_data *triton2_scripts);
+
 struct twl4030_platform_data {
 	unsigned				irq_base, irq_end;
 	struct twl4030_bci_platform_data	*bci;
@@ -334,6 +408,7 @@ struct twl4030_platform_data {
 	struct twl4030_madc_platform_data	*madc;
 	struct twl4030_keypad_data		*keypad;
 	struct twl4030_usb_data			*usb;
+	struct twl4030_power_data		*power;
 
 	/* LDO regulators */
 	struct regulator_init_data		*vdac;
@@ -364,7 +439,6 @@ int twl4030_sih_setup(int module);
 #define TWL4030_VAUX3_DEV_GRP		0x1F
 #define TWL4030_VAUX3_DEDICATED		0x22
 
-
 #if defined(CONFIG_TWL4030_BCI_BATTERY) || \
 	defined(CONFIG_TWL4030_BCI_BATTERY_MODULE)
 	extern int twl4030charger_usb_en(int enable);
-- 
cgit v1.2.3


From 8aba721b23917bc6d374ad42bf80bde5058710e2 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Thu, 27 Aug 2009 20:49:08 +0200
Subject: mfd: Fix ab3100-otp build failure

ab3100.h should include linux/workqueue.h for otp to build properly.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  | 1 -
 include/linux/mfd/ab3100.h | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index 1d8ac1a1e304..a848df77514a 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -14,7 +14,6 @@
 #include <linux/platform_device.h>
 #include <linux/device.h>
 #include <linux/interrupt.h>
-#include <linux/workqueue.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 56343b8013b5..3ead5cfb638c 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -6,6 +6,7 @@
  */
 
 #include <linux/device.h>
+#include <linux/workqueue.h>
 
 #ifndef MFD_AB3100_H
 #define MFD_AB3100_H
-- 
cgit v1.2.3


From d619bc143e311a738113dbbe7792bd032403939f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Wed, 9 Sep 2009 11:31:00 +0200
Subject: regulator: AB3100 support

This adds support for the regulators found in the AB3100
Mixed-Signal IC.

It further also defines platform data for the ST-Ericsson
U300 platform and extends the AB3100 MFD driver so that
platform/board data with regulation constraints and an init
function can be passed down all the way from the board to
the regulators.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/ab3100-core.c  |   4 +
 drivers/regulator/Kconfig  |   9 +
 drivers/regulator/Makefile |   1 +
 drivers/regulator/ab3100.c | 694 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/ab3100.h |  28 ++
 5 files changed, 736 insertions(+)
 create mode 100644 drivers/regulator/ab3100.c

(limited to 'include')

diff --git a/drivers/mfd/ab3100-core.c b/drivers/mfd/ab3100-core.c
index a848df77514a..c533f86ff5ea 100644
--- a/drivers/mfd/ab3100-core.c
+++ b/drivers/mfd/ab3100-core.c
@@ -837,6 +837,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct ab3100 *ab3100;
+	struct ab3100_platform_data *ab3100_plf_data =
+		client->dev.platform_data;
 	int err;
 	int i;
 
@@ -920,6 +922,8 @@ static int __init ab3100_probe(struct i2c_client *client,
 	for (i = 0; i < ARRAY_SIZE(ab3100_platform_devs); i++) {
 		ab3100_platform_devs[i]->dev.parent =
 			&client->dev;
+		ab3100_platform_devs[i]->dev.platform_data =
+			ab3100_plf_data;
 		platform_set_drvdata(ab3100_platform_devs[i], ab3100);
 	}
 
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index c505714b0a98..2dc42bbf6fe9 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -138,5 +138,14 @@ config REGULATOR_MC13783
 	  Say y here to support the regulators found on the Freescale MC13783
 	  PMIC.
 
+config REGULATOR_AB3100
+	tristate "ST-Ericsson AB3100 Regulator functions"
+	depends on AB3100_CORE
+	default y if AB3100_CORE
+	help
+	 These regulators correspond to functionality in the
+	 AB3100 analog baseband dealing with power regulators
+	 for the system.
+
 endif
 
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 5034830a395e..768b3316d6eb 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -21,5 +21,6 @@ obj-$(CONFIG_REGULATOR_DA903X)	+= da903x.o
 obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o
 obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o
 obj-$(CONFIG_REGULATOR_MC13783) += mc13783.o
+obj-$(CONFIG_REGULATOR_AB3100) += ab3100.o
 
 ccflags-$(CONFIG_REGULATOR_DEBUG) += -DDEBUG
diff --git a/drivers/regulator/ab3100.c b/drivers/regulator/ab3100.c
new file mode 100644
index 000000000000..156979d1f41e
--- /dev/null
+++ b/drivers/regulator/ab3100.c
@@ -0,0 +1,694 @@
+/*
+ * drivers/regulator/ab3100.c
+ *
+ * Copyright (C) 2008-2009 ST-Ericsson AB
+ * License terms: GNU General Public License (GPL) version 2
+ * Low-level control of the AB3100 IC Low Dropout (LDO)
+ * regulators, external regulator and buck converter
+ * Author: Mattias Wallin <mattias.wallin@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/driver.h>
+#include <linux/mfd/ab3100.h>
+
+/* LDO registers and some handy masking definitions for AB3100 */
+#define AB3100_LDO_A		0x40
+#define AB3100_LDO_C		0x41
+#define AB3100_LDO_D		0x42
+#define AB3100_LDO_E		0x43
+#define AB3100_LDO_E_SLEEP	0x44
+#define AB3100_LDO_F		0x45
+#define AB3100_LDO_G		0x46
+#define AB3100_LDO_H		0x47
+#define AB3100_LDO_H_SLEEP_MODE	0
+#define AB3100_LDO_H_SLEEP_EN	2
+#define AB3100_LDO_ON		4
+#define AB3100_LDO_H_VSEL_AC	5
+#define AB3100_LDO_K		0x48
+#define AB3100_LDO_EXT		0x49
+#define AB3100_BUCK		0x4A
+#define AB3100_BUCK_SLEEP	0x4B
+#define AB3100_REG_ON_MASK	0x10
+
+/**
+ * struct ab3100_regulator
+ * A struct passed around the individual regulator functions
+ * @platform_device: platform device holding this regulator
+ * @ab3100: handle to the AB3100 parent chip
+ * @plfdata: AB3100 platform data passed in at probe time
+ * @regreg: regulator register number in the AB3100
+ * @fixed_voltage: a fixed voltage for this regulator, if this
+ *          0 the voltages array is used instead.
+ * @typ_voltages: an array of available typical voltages for
+ *          this regulator
+ * @voltages_len: length of the array of available voltages
+ */
+struct ab3100_regulator {
+	struct regulator_dev *rdev;
+	struct ab3100 *ab3100;
+	struct ab3100_platform_data *plfdata;
+	u8 regreg;
+	int fixed_voltage;
+	int const *typ_voltages;
+	u8 voltages_len;
+};
+
+/* The order in which registers are initialized */
+static const u8 ab3100_reg_init_order[AB3100_NUM_REGULATORS+2] = {
+	AB3100_LDO_A,
+	AB3100_LDO_C,
+	AB3100_LDO_E,
+	AB3100_LDO_E_SLEEP,
+	AB3100_LDO_F,
+	AB3100_LDO_G,
+	AB3100_LDO_H,
+	AB3100_LDO_K,
+	AB3100_LDO_EXT,
+	AB3100_BUCK,
+	AB3100_BUCK_SLEEP,
+	AB3100_LDO_D,
+};
+
+/* Preset (hardware defined) voltages for these regulators */
+#define LDO_A_VOLTAGE 2750000
+#define LDO_C_VOLTAGE 2650000
+#define LDO_D_VOLTAGE 2650000
+
+static const int const ldo_e_buck_typ_voltages[] = {
+	1800000,
+	1400000,
+	1300000,
+	1200000,
+	1100000,
+	1050000,
+	900000,
+};
+
+static const int const ldo_f_typ_voltages[] = {
+	1800000,
+	1400000,
+	1300000,
+	1200000,
+	1100000,
+	1050000,
+	2500000,
+	2650000,
+};
+
+static const int const ldo_g_typ_voltages[] = {
+	2850000,
+	2750000,
+	1800000,
+	1500000,
+};
+
+static const int const ldo_h_typ_voltages[] = {
+	2750000,
+	1800000,
+	1500000,
+	1200000,
+};
+
+static const int const ldo_k_typ_voltages[] = {
+	2750000,
+	1800000,
+};
+
+
+/* The regulator devices */
+static struct ab3100_regulator
+ab3100_regulators[AB3100_NUM_REGULATORS] = {
+	{
+		.regreg = AB3100_LDO_A,
+		.fixed_voltage = LDO_A_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_C,
+		.fixed_voltage = LDO_C_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_D,
+		.fixed_voltage = LDO_D_VOLTAGE,
+	},
+	{
+		.regreg = AB3100_LDO_E,
+		.typ_voltages = ldo_e_buck_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_e_buck_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_F,
+		.typ_voltages = ldo_f_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_f_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_G,
+		.typ_voltages = ldo_g_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_g_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_H,
+		.typ_voltages = ldo_h_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_h_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_K,
+		.typ_voltages = ldo_k_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_k_typ_voltages),
+	},
+	{
+		.regreg = AB3100_LDO_EXT,
+		/* No voltages for the external regulator */
+	},
+	{
+		.regreg = AB3100_BUCK,
+		.typ_voltages = ldo_e_buck_typ_voltages,
+		.voltages_len = ARRAY_SIZE(ldo_e_buck_typ_voltages),
+	},
+};
+
+/*
+ * General functions for enable, disable and is_enabled used for
+ * LDO: A,C,E,F,G,H,K,EXT and BUCK
+ */
+static int ab3100_enable_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int err;
+	u8 regval;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_warn(&reg->dev, "failed to get regid %d value\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The regulator is already on, no reason to go further */
+	if (regval & AB3100_REG_ON_MASK)
+		return 0;
+
+	regval |= AB3100_REG_ON_MASK;
+
+	err = ab3100_set_register_interruptible(abreg->ab3100, abreg->regreg,
+						regval);
+	if (err) {
+		dev_warn(&reg->dev, "failed to set regid %d value\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* Per-regulator power on delay from spec */
+	switch (abreg->regreg) {
+	case AB3100_LDO_A: /* Fallthrough */
+	case AB3100_LDO_C: /* Fallthrough */
+	case AB3100_LDO_D: /* Fallthrough */
+	case AB3100_LDO_E: /* Fallthrough */
+	case AB3100_LDO_H: /* Fallthrough */
+	case AB3100_LDO_K:
+		udelay(200);
+		break;
+	case AB3100_LDO_F:
+		udelay(600);
+		break;
+	case AB3100_LDO_G:
+		udelay(400);
+		break;
+	case AB3100_BUCK:
+		mdelay(1);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ab3100_disable_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int err;
+	u8 regval;
+
+	/*
+	 * LDO D is a special regulator. When it is disabled, the entire
+	 * system is shut down. So this is handled specially.
+	 */
+	if (abreg->regreg == AB3100_LDO_D) {
+		int i;
+
+		dev_info(&reg->dev, "disabling LDO D - shut down system\n");
+		/*
+		 * Set regulators to default values, ignore any errors,
+		 * we're going DOWN
+		 */
+		for (i = 0; i < ARRAY_SIZE(ab3100_reg_init_order); i++) {
+			(void) ab3100_set_register_interruptible(abreg->ab3100,
+					ab3100_reg_init_order[i],
+					abreg->plfdata->reg_initvals[i]);
+		}
+
+		/* Setting LDO D to 0x00 cuts the power to the SoC */
+		return ab3100_set_register_interruptible(abreg->ab3100,
+							 AB3100_LDO_D, 0x00U);
+
+	}
+
+	/*
+	 * All other regulators are handled here
+	 */
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_err(&reg->dev, "unable to get register 0x%x\n",
+			abreg->regreg);
+		return err;
+	}
+	regval &= ~AB3100_REG_ON_MASK;
+	return ab3100_set_register_interruptible(abreg->ab3100, abreg->regreg,
+						 regval);
+}
+
+static int ab3100_is_enabled_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100, abreg->regreg,
+						&regval);
+	if (err) {
+		dev_err(&reg->dev, "unable to get register 0x%x\n",
+			abreg->regreg);
+		return err;
+	}
+
+	return regval & AB3100_REG_ON_MASK;
+}
+
+static int ab3100_list_voltage_regulator(struct regulator_dev *reg,
+					 unsigned selector)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+
+	if (selector > abreg->voltages_len)
+		return -EINVAL;
+	return abreg->typ_voltages[selector];
+}
+
+static int ab3100_get_voltage_regulator(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+
+	/* Return the voltage for fixed regulators immediately */
+	if (abreg->fixed_voltage)
+		return abreg->fixed_voltage;
+
+	/*
+	 * For variable types, read out setting and index into
+	 * supplied voltage list.
+	 */
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						abreg->regreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator value in register %02x\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The 3 highest bits index voltages */
+	regval &= 0xE0;
+	regval >>= 5;
+
+	if (regval > abreg->voltages_len) {
+		dev_err(&reg->dev,
+			"regulator register %02x contains an illegal voltage setting\n",
+			abreg->regreg);
+		return -EINVAL;
+	}
+
+	return abreg->typ_voltages[regval];
+}
+
+static int ab3100_get_best_voltage_index(struct regulator_dev *reg,
+				   int min_uV, int max_uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	int i;
+	int bestmatch;
+	int bestindex;
+
+	/*
+	 * Locate the minimum voltage fitting the criteria on
+	 * this regulator. The switchable voltages are not
+	 * in strict falling order so we need to check them
+	 * all for the best match.
+	 */
+	bestmatch = INT_MAX;
+	bestindex = -1;
+	for (i = 0; i < abreg->voltages_len; i++) {
+		if (abreg->typ_voltages[i] <= max_uV &&
+		    abreg->typ_voltages[i] >= min_uV &&
+		    abreg->typ_voltages[i] < bestmatch) {
+			bestmatch = abreg->typ_voltages[i];
+			bestindex = i;
+		}
+	}
+
+	if (bestindex < 0) {
+		dev_warn(&reg->dev, "requested %d<=x<=%d uV, out of range!\n",
+			 min_uV, max_uV);
+		return -EINVAL;
+	}
+	return bestindex;
+}
+
+static int ab3100_set_voltage_regulator(struct regulator_dev *reg,
+					int min_uV, int max_uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+	int bestindex;
+
+	bestindex = ab3100_get_best_voltage_index(reg, min_uV, max_uV);
+	if (bestindex < 0)
+		return bestindex;
+
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						abreg->regreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator register %02x\n",
+			 abreg->regreg);
+		return err;
+	}
+
+	/* The highest three bits control the variable regulators */
+	regval &= ~0xE0;
+	regval |= (bestindex << 5);
+
+	err = ab3100_set_register_interruptible(abreg->ab3100,
+						abreg->regreg, regval);
+	if (err)
+		dev_warn(&reg->dev, "failed to set regulator register %02x\n",
+			abreg->regreg);
+
+	return err;
+}
+
+static int ab3100_set_suspend_voltage_regulator(struct regulator_dev *reg,
+						int uV)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+	u8 regval;
+	int err;
+	int bestindex;
+	u8 targetreg;
+
+	if (abreg->regreg == AB3100_LDO_E)
+		targetreg = AB3100_LDO_E_SLEEP;
+	else if (abreg->regreg == AB3100_BUCK)
+		targetreg = AB3100_BUCK_SLEEP;
+	else
+		return -EINVAL;
+
+	/* LDO E and BUCK have special suspend voltages you can set */
+	bestindex = ab3100_get_best_voltage_index(reg, uV, uV);
+
+	err = ab3100_get_register_interruptible(abreg->ab3100,
+						targetreg, &regval);
+	if (err) {
+		dev_warn(&reg->dev,
+			 "failed to get regulator register %02x\n",
+			 targetreg);
+		return err;
+	}
+
+	/* The highest three bits control the variable regulators */
+	regval &= ~0xE0;
+	regval |= (bestindex << 5);
+
+	err = ab3100_set_register_interruptible(abreg->ab3100,
+						targetreg, regval);
+	if (err)
+		dev_warn(&reg->dev, "failed to set regulator register %02x\n",
+			abreg->regreg);
+
+	return err;
+}
+
+/*
+ * The external regulator can just define a fixed voltage.
+ */
+static int ab3100_get_voltage_regulator_external(struct regulator_dev *reg)
+{
+	struct ab3100_regulator *abreg = reg->reg_data;
+
+	return abreg->plfdata->external_voltage;
+}
+
+static struct regulator_ops regulator_ops_fixed = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+};
+
+static struct regulator_ops regulator_ops_variable = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+	.set_voltage = ab3100_set_voltage_regulator,
+	.list_voltage = ab3100_list_voltage_regulator,
+};
+
+static struct regulator_ops regulator_ops_variable_sleepable = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator,
+	.set_voltage = ab3100_set_voltage_regulator,
+	.set_suspend_voltage = ab3100_set_suspend_voltage_regulator,
+	.list_voltage = ab3100_list_voltage_regulator,
+};
+
+/*
+ * LDO EXT is an external regulator so it is really
+ * not possible to set any voltage locally here, AB3100
+ * is an on/off switch plain an simple. The external
+ * voltage is defined in the board set-up if any.
+ */
+static struct regulator_ops regulator_ops_external = {
+	.enable      = ab3100_enable_regulator,
+	.disable     = ab3100_disable_regulator,
+	.is_enabled  = ab3100_is_enabled_regulator,
+	.get_voltage = ab3100_get_voltage_regulator_external,
+};
+
+static struct regulator_desc
+ab3100_regulator_desc[AB3100_NUM_REGULATORS] = {
+	{
+		.name = "LDO_A",
+		.id   = AB3100_LDO_A,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_C",
+		.id   = AB3100_LDO_C,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_D",
+		.id   = AB3100_LDO_D,
+		.ops  = &regulator_ops_fixed,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_E",
+		.id   = AB3100_LDO_E,
+		.ops  = &regulator_ops_variable_sleepable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_F",
+		.id   = AB3100_LDO_F,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_G",
+		.id   = AB3100_LDO_G,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_H",
+		.id   = AB3100_LDO_H,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_K",
+		.id   = AB3100_LDO_K,
+		.ops  = &regulator_ops_variable,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "LDO_EXT",
+		.id   = AB3100_LDO_EXT,
+		.ops  = &regulator_ops_external,
+		.type = REGULATOR_VOLTAGE,
+	},
+	{
+		.name = "BUCK",
+		.id   = AB3100_BUCK,
+		.ops  = &regulator_ops_variable_sleepable,
+		.type = REGULATOR_VOLTAGE,
+	},
+};
+
+/*
+ * NOTE: the following functions are regulators pluralis - it is the
+ * binding to the AB3100 core driver and the parent platform device
+ * for all the different regulators.
+ */
+
+static int __init ab3100_regulators_probe(struct platform_device *pdev)
+{
+	struct ab3100_platform_data *plfdata = pdev->dev.platform_data;
+	struct ab3100 *ab3100 = platform_get_drvdata(pdev);
+	int err = 0;
+	u8 data;
+	int i;
+
+	/* Check chip state */
+	err = ab3100_get_register_interruptible(ab3100,
+						AB3100_LDO_D, &data);
+	if (err) {
+		dev_err(&pdev->dev, "could not read initial status of LDO_D\n");
+		return err;
+	}
+	if (data & 0x10)
+		dev_notice(&pdev->dev,
+			   "chip is already in active mode (Warm start)\n");
+	else
+		dev_notice(&pdev->dev,
+			   "chip is in inactive mode (Cold start)\n");
+
+	/* Set up regulators */
+	for (i = 0; i < ARRAY_SIZE(ab3100_reg_init_order); i++) {
+		err = ab3100_set_register_interruptible(ab3100,
+					ab3100_reg_init_order[i],
+					plfdata->reg_initvals[i]);
+		if (err) {
+			dev_err(&pdev->dev, "regulator initialization failed with error %d\n",
+				err);
+			return err;
+		}
+	}
+
+	if (err) {
+		dev_err(&pdev->dev,
+			"LDO D regulator initialization failed with error %d\n",
+			err);
+		return err;
+	}
+
+	/* Register the regulators */
+	for (i = 0; i < AB3100_NUM_REGULATORS; i++) {
+		struct ab3100_regulator *reg = &ab3100_regulators[i];
+		struct regulator_dev *rdev;
+
+		/*
+		 * Initialize per-regulator struct.
+		 * Inherit platform data, this comes down from the
+		 * i2c boarddata, from the machine. So if you want to
+		 * see what it looks like for a certain machine, go
+		 * into the machine I2C setup.
+		 */
+		reg->ab3100 = ab3100;
+		reg->plfdata = plfdata;
+
+		/*
+		 * Register the regulator, pass around
+		 * the ab3100_regulator struct
+		 */
+		rdev = regulator_register(&ab3100_regulator_desc[i],
+					  &pdev->dev,
+					  &plfdata->reg_constraints[i],
+					  reg);
+
+		if (IS_ERR(rdev)) {
+			err = PTR_ERR(rdev);
+			dev_err(&pdev->dev,
+				"%s: failed to register regulator %s err %d\n",
+				__func__, ab3100_regulator_desc[i].name,
+				err);
+			i--;
+			/* remove the already registered regulators */
+			while (i > 0) {
+				regulator_unregister(ab3100_regulators[i].rdev);
+				i--;
+			}
+			return err;
+		}
+
+		/* Then set a pointer back to the registered regulator */
+		reg->rdev = rdev;
+	}
+
+	return 0;
+}
+
+static int __exit ab3100_regulators_remove(struct platform_device *pdev)
+{
+	int i;
+
+	for (i = 0; i < AB3100_NUM_REGULATORS; i++) {
+		struct ab3100_regulator *reg = &ab3100_regulators[i];
+
+		regulator_unregister(reg->rdev);
+	}
+	return 0;
+}
+
+static struct platform_driver ab3100_regulators_driver = {
+	.driver = {
+		.name  = "ab3100-regulators",
+		.owner = THIS_MODULE,
+	},
+	.probe = ab3100_regulators_probe,
+	.remove = __exit_p(ab3100_regulators_remove),
+};
+
+static __init int ab3100_regulators_init(void)
+{
+	return platform_driver_register(&ab3100_regulators_driver);
+}
+
+static __exit void ab3100_regulators_exit(void)
+{
+	platform_driver_register(&ab3100_regulators_driver);
+}
+
+subsys_initcall(ab3100_regulators_init);
+module_exit(ab3100_regulators_exit);
+
+MODULE_AUTHOR("Mattias Wallin <mattias.wallin@stericsson.com>");
+MODULE_DESCRIPTION("AB3100 Regulator driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ab3100-regulators");
diff --git a/include/linux/mfd/ab3100.h b/include/linux/mfd/ab3100.h
index 3ead5cfb638c..e9aa4c9d749d 100644
--- a/include/linux/mfd/ab3100.h
+++ b/include/linux/mfd/ab3100.h
@@ -7,6 +7,7 @@
 
 #include <linux/device.h>
 #include <linux/workqueue.h>
+#include <linux/regulator/machine.h>
 
 #ifndef MFD_AB3100_H
 #define MFD_AB3100_H
@@ -57,6 +58,14 @@
 #define AB3100_STR_BATT_REMOVAL				(0x40)
 #define AB3100_STR_VBUS					(0x80)
 
+/*
+ * AB3100 contains 8 regulators, one external regulator controller
+ * and a buck converter, further the LDO E and buck converter can
+ * have separate settings if they are in sleep mode, this is
+ * modeled as a separate regulator.
+ */
+#define AB3100_NUM_REGULATORS				10
+
 /**
  * struct ab3100
  * @access_mutex: lock out concurrent accesses to the AB3100 registers
@@ -87,6 +96,25 @@ struct ab3100 {
 	bool startup_events_read;
 };
 
+/**
+ * struct ab3100_platform_data
+ * Data supplied to initialize board connections to the AB3100
+ * @reg_constraints: regulator constraints for target board
+ *     the order of these constraints are: LDO A, C, D, E,
+ *     F, G, H, K, EXT and BUCK.
+ * @reg_initvals: initial values for the regulator registers
+ *     plus two sleep settings for LDO E and the BUCK converter.
+ *     exactly AB3100_NUM_REGULATORS+2 values must be sent in.
+ *     Order: LDO A, C, E, E sleep, F, G, H, K, EXT, BUCK,
+ *     BUCK sleep, LDO D. (LDO D need to be initialized last.)
+ * @external_voltage: voltage level of the external regulator.
+ */
+struct ab3100_platform_data {
+	struct regulator_init_data reg_constraints[AB3100_NUM_REGULATORS];
+	u8 reg_initvals[AB3100_NUM_REGULATORS+2];
+	int external_voltage;
+};
+
 int ab3100_set_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 regval);
 int ab3100_get_register_interruptible(struct ab3100 *ab3100, u8 reg, u8 *regval);
 int ab3100_get_register_page_interruptible(struct ab3100 *ab3100,
-- 
cgit v1.2.3


From ad4b78bbcbab66998b05d422ac6106b645796e54 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 16 Sep 2009 12:31:31 +0200
Subject: sched: Add new wakeup preemption mode: WAKEUP_RUNNING

Create a new wakeup preemption mode, preempt towards tasks that run
shorter on avg. It sets next buddy to be sure we actually run the task
we preempted for.

Test results:

 root@twins:~# while :; do :; done &
 [1] 6537
 root@twins:~# while :; do :; done &
 [2] 6538
 root@twins:~# while :; do :; done &
 [3] 6539
 root@twins:~# while :; do :; done &
 [4] 6540

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max          4750 usec
        Avg           497 usec
        Stdev         737 usec

 root@twins:/home/peter# echo WAKEUP_RUNNING > /debug/sched_features

 root@twins:/home/peter# ./latt -c4 sleep 4
 Entries: 48 (clients=4)

 Averages:
 ------------------------------
        Max            14 usec
        Avg             5 usec
        Stdev           3 usec

Disabled by default - needs more testing.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <new-submission>
---
 include/linux/sched.h   |  2 ++
 kernel/sched.c          | 17 ++++++++++-------
 kernel/sched_debug.c    |  1 +
 kernel/sched_fair.c     | 14 +++++++++++---
 kernel/sched_features.h |  5 +++++
 5 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4a39bb2b4a4..8af3d249170e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1113,6 +1113,8 @@ struct sched_entity {
 	u64			start_runtime;
 	u64			avg_wakeup;
 
+	u64			avg_running;
+
 #ifdef CONFIG_SCHEDSTATS
 	u64			wait_start;
 	u64			wait_max;
diff --git a/kernel/sched.c b/kernel/sched.c
index 969dfaef2465..3bb4ea2ee6f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2458,6 +2458,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg_overlap		= 0;
 	p->se.start_runtime		= 0;
 	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
+	p->se.avg_running		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	p->se.wait_start			= 0;
@@ -5310,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev)
 #endif
 }
 
-static void put_prev_task(struct rq *rq, struct task_struct *prev)
+static void put_prev_task(struct rq *rq, struct task_struct *p)
 {
-	if (prev->state == TASK_RUNNING) {
-		u64 runtime = prev->se.sum_exec_runtime;
+	u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime;
 
-		runtime -= prev->se.prev_sum_exec_runtime;
-		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+	update_avg(&p->se.avg_running, runtime);
 
+	if (p->state == TASK_RUNNING) {
 		/*
 		 * In order to avoid avg_overlap growing stale when we are
 		 * indeed overlapping and hence not getting put to sleep, grow
@@ -5327,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
 		 * correlates to the amount of cache footprint a task can
 		 * build up.
 		 */
-		update_avg(&prev->se.avg_overlap, runtime);
+		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
+		update_avg(&p->se.avg_overlap, runtime);
+	} else {
+		update_avg(&p->se.avg_running, 0);
 	}
-	prev->sched_class->put_prev_task(rq, prev);
+	p->sched_class->put_prev_task(rq, p);
 }
 
 /*
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ddbd0891267..efb84409bc43 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.sum_exec_runtime);
 	PN(se.avg_overlap);
 	PN(se.avg_wakeup);
+	PN(se.avg_running);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c741cd9d38de..3e6f78c66876 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1605,9 +1605,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		return;
 	}
 
-	if (!sched_feat(WAKEUP_PREEMPT))
-		return;
-
 	if ((sched_feat(WAKEUP_SYNC) && sync) ||
 	    (sched_feat(WAKEUP_OVERLAP) &&
 	     (se->avg_overlap < sysctl_sched_migration_cost &&
@@ -1616,6 +1613,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 		return;
 	}
 
+	if (sched_feat(WAKEUP_RUNNING)) {
+		if (pse->avg_running < se->avg_running) {
+			set_next_buddy(pse);
+			resched_task(curr);
+			return;
+		}
+	}
+
+	if (!sched_feat(WAKEUP_PREEMPT))
+		return;
+
 	find_matching_se(&se, &pse);
 
 	BUG_ON(!pse);
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index d5059fd761d9..0d94083582c7 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -53,6 +53,11 @@ SCHED_FEAT(WAKEUP_SYNC, 0)
  */
 SCHED_FEAT(WAKEUP_OVERLAP, 0)
 
+/*
+ * Wakeup preemption towards tasks that run short
+ */
+SCHED_FEAT(WAKEUP_RUNNING, 0)
+
 /*
  * Use the SYNC wakeup hint, pipes and the likes use this to indicate
  * the remote end is likely to consume the data we just wrote, and
-- 
cgit v1.2.3


From c4c259bcc27c4242b012106afdba183622b1735f Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 15 Sep 2009 16:27:45 +0200
Subject: HID: consolidate connect and disconnect into core code

HID core registers input, hidraw and hiddev devices, but leaves
unregistering it up to the individual driver, which is not really nice.
Let's move all the logic to the core.

Reported-by: Marcel Holtmann <marcel@holtmann.org>
Reported-by: Brian Rogers <brian@xyzw.org>
Acked-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        | 11 +++++++++++
 drivers/hid/usbhid/hid-core.c | 16 +++++-----------
 include/linux/hid.h           |  3 +++
 net/bluetooth/hidp/core.c     |  7 -------
 4 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index ca9bb26c2076..be34d32906bd 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1237,6 +1237,17 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 }
 EXPORT_SYMBOL_GPL(hid_connect);
 
+void hid_disconnect(struct hid_device *hdev)
+{
+	if (hdev->claimed & HID_CLAIMED_INPUT)
+		hidinput_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDDEV)
+		hdev->hiddev_disconnect(hdev);
+	if (hdev->claimed & HID_CLAIMED_HIDRAW)
+		hidraw_disconnect(hdev);
+}
+EXPORT_SYMBOL_GPL(hid_disconnect);
+
 /* a list of devices for which there is a specialized driver on HID bus */
 static const struct hid_device_id hid_blacklist[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) },
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 1b0e07a67d6d..03bd703255a3 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1041,13 +1041,6 @@ static void usbhid_stop(struct hid_device *hid)
 
 	hid_cancel_delayed_stuff(usbhid);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDDEV)
-		hiddev_disconnect(hid);
-	if (hid->claimed & HID_CLAIMED_HIDRAW)
-		hidraw_disconnect(hid);
-
 	hid->claimed = 0;
 
 	usb_free_urb(usbhid->urbin);
@@ -1085,7 +1078,7 @@ static struct hid_ll_driver usb_hid_driver = {
 	.hidinput_input_event = usb_hidinput_input_event,
 };
 
-static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
+static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 {
 	struct usb_host_interface *interface = intf->cur_altsetting;
 	struct usb_device *dev = interface_to_usbdev(intf);
@@ -1117,6 +1110,7 @@ static int hid_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	hid->ff_init = hid_pidff_init;
 #ifdef CONFIG_USB_HIDDEV
 	hid->hiddev_connect = hiddev_connect;
+	hid->hiddev_disconnect = hiddev_disconnect;
 	hid->hiddev_hid_event = hiddev_hid_event;
 	hid->hiddev_report_event = hiddev_report_event;
 #endif
@@ -1177,7 +1171,7 @@ err:
 	return ret;
 }
 
-static void hid_disconnect(struct usb_interface *intf)
+static void usbhid_disconnect(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid;
@@ -1359,8 +1353,8 @@ MODULE_DEVICE_TABLE (usb, hid_usb_ids);
 
 static struct usb_driver hid_driver = {
 	.name =		"usbhid",
-	.probe =	hid_probe,
-	.disconnect =	hid_disconnect,
+	.probe =	usbhid_probe,
+	.disconnect =	usbhid_disconnect,
 #ifdef CONFIG_PM
 	.suspend =	hid_suspend,
 	.resume =	hid_resume,
diff --git a/include/linux/hid.h b/include/linux/hid.h
index a0ebdace7baa..10f628416740 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -494,6 +494,7 @@ struct hid_device {							/* device report descriptor */
 
 	/* hiddev event handler */
 	int (*hiddev_connect)(struct hid_device *, unsigned int);
+	void (*hiddev_disconnect)(struct hid_device *);
 	void (*hiddev_hid_event) (struct hid_device *, struct hid_field *field,
 				  struct hid_usage *, __s32);
 	void (*hiddev_report_event) (struct hid_device *, struct hid_report *);
@@ -691,6 +692,7 @@ struct hid_device *hid_allocate_device(void);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
+void hid_disconnect(struct hid_device *hid);
 
 /**
  * hid_map_usage - map usage input bits
@@ -800,6 +802,7 @@ static inline int __must_check hid_hw_start(struct hid_device *hdev,
  */
 static inline void hid_hw_stop(struct hid_device *hdev)
 {
+	hid_disconnect(hdev);
 	hdev->ll_driver->stop(hdev);
 }
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 09bedeb5579c..49d8495d69be 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -577,11 +577,6 @@ static int hidp_session(void *arg)
 	}
 
 	if (session->hid) {
-		if (session->hid->claimed & HID_CLAIMED_INPUT)
-			hidinput_disconnect(session->hid);
-		if (session->hid->claimed & HID_CLAIMED_HIDRAW)
-			hidraw_disconnect(session->hid);
-
 		hid_destroy_device(session->hid);
 		session->hid = NULL;
 	}
@@ -747,8 +742,6 @@ static void hidp_stop(struct hid_device *hid)
 	skb_queue_purge(&session->ctrl_transmit);
 	skb_queue_purge(&session->intr_transmit);
 
-	if (hid->claimed & HID_CLAIMED_INPUT)
-		hidinput_disconnect(hid);
 	hid->claimed = 0;
 }
 
-- 
cgit v1.2.3


From 37bce07077b0c335d8747f1ddb27ad585434a47e Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 16 Sep 2009 19:07:32 +0100
Subject: mfd: Convert WM8350 to use request_threaded_irq()

Instead of hand rolling our own variant.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/wm8350-core.c       | 24 ++++++------------------
 include/linux/mfd/wm8350/core.h |  1 -
 2 files changed, 6 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/wm8350-core.c b/drivers/mfd/wm8350-core.c
index 9d662a576a41..ba27c9dc1ad3 100644
--- a/drivers/mfd/wm8350-core.c
+++ b/drivers/mfd/wm8350-core.c
@@ -353,15 +353,15 @@ static void wm8350_irq_call_handler(struct wm8350 *wm8350, int irq)
 }
 
 /*
- * wm8350_irq_worker actually handles the interrupts.  Since all
+ * This is a threaded IRQ handler so can access I2C/SPI.  Since all
  * interrupts are clear on read the IRQ line will be reasserted and
  * the physical IRQ will be handled again if another interrupt is
  * asserted while we run - in the normal course of events this is a
  * rare occurrence so we save I2C/SPI reads.
  */
-static void wm8350_irq_worker(struct work_struct *work)
+static irqreturn_t wm8350_irq(int irq, void *data)
 {
-	struct wm8350 *wm8350 = container_of(work, struct wm8350, irq_work);
+	struct wm8350 *wm8350 = data;
 	u16 level_one, status1, status2, comp;
 
 	/* TODO: Use block reads to improve performance? */
@@ -552,16 +552,6 @@ static void wm8350_irq_worker(struct work_struct *work)
 		}
 	}
 
-	enable_irq(wm8350->chip_irq);
-}
-
-static irqreturn_t wm8350_irq(int irq, void *data)
-{
-	struct wm8350 *wm8350 = data;
-
-	disable_irq_nosync(irq);
-	schedule_work(&wm8350->irq_work);
-
 	return IRQ_HANDLED;
 }
 
@@ -1428,9 +1418,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 
 	mutex_init(&wm8350->auxadc_mutex);
 	mutex_init(&wm8350->irq_mutex);
-	INIT_WORK(&wm8350->irq_work, wm8350_irq_worker);
 	if (irq) {
-		int flags = 0;
+		int flags = IRQF_ONESHOT;
 
 		if (pdata && pdata->irq_high) {
 			flags |= IRQF_TRIGGER_HIGH;
@@ -1444,8 +1433,8 @@ int wm8350_device_init(struct wm8350 *wm8350, int irq,
 					  WM8350_IRQ_POL);
 		}
 
-		ret = request_irq(irq, wm8350_irq, flags,
-				  "wm8350", wm8350);
+		ret = request_threaded_irq(irq, NULL, wm8350_irq, flags,
+					   "wm8350", wm8350);
 		if (ret != 0) {
 			dev_err(wm8350->dev, "Failed to request IRQ: %d\n",
 				ret);
@@ -1505,7 +1494,6 @@ void wm8350_device_exit(struct wm8350 *wm8350)
 	platform_device_unregister(wm8350->codec.pdev);
 
 	free_irq(wm8350->chip_irq, wm8350);
-	flush_work(&wm8350->irq_work);
 	kfree(wm8350->reg_cache);
 }
 EXPORT_SYMBOL_GPL(wm8350_device_exit);
diff --git a/include/linux/mfd/wm8350/core.h b/include/linux/mfd/wm8350/core.h
index 969b0b55615c..1d595de6a055 100644
--- a/include/linux/mfd/wm8350/core.h
+++ b/include/linux/mfd/wm8350/core.h
@@ -626,7 +626,6 @@ struct wm8350 {
 	struct mutex auxadc_mutex;
 
 	/* Interrupt handling */
-	struct work_struct irq_work;
 	struct mutex irq_mutex; /* IRQ table mutex */
 	struct wm8350_irq irq[WM8350_NUM_IRQ];
 	int chip_irq;
-- 
cgit v1.2.3


From c0826574ddc0df486ecfc2d655e08904c6513209 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 17 Sep 2009 17:03:06 +1000
Subject: nfsd: return success for non-NFS4 nfs4_state_start

Today's linux-next build (sparc64_defconfig) failed like this:

In file included from arch/sparc/kernel/sys_sparc32.c:32:
include/linux/nfsd/nfsd.h: In function 'nfs4_state_start':
include/linux/nfsd/nfsd.h:177: error: no return statement in function returning non-void

Caused by commit 29ab23cc5d351658d01a4327d55e9106a73fd04f ("nfsd4: allow
nfs4 state startup to fail").  Please, if you add code that depends on a
CONFIG option, build with that option enabled and disabled.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 24fdf89cea83..03bbe9039104 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -174,7 +174,7 @@ int nfs4_reset_recoverydir(char *recdir);
 #else
 static inline int nfs4_state_init(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
-static inline int nfs4_state_start(void) { }
+static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
 static inline time_t nfs4_lease_time(void) { return 0; }
 static inline void nfs4_reset_lease(time_t leasetime) { }
-- 
cgit v1.2.3


From e2d4304b7d2b85c45de89ec420037d6b9261a12d Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 17 Sep 2009 09:40:31 -0700
Subject: PCI: fix VGA arbiter header file

Remove reference to vgaarb.c and replace it with a comment about the
arbiter itself.

Reported-by: Tiago Vignatti <tiago.vignatti@nokia.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/linux/vgaarb.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64af80c1..a1fa1da8dc1a 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -1,5 +1,6 @@
 /*
- * vgaarb.c
+ * The VGA aribiter manages VGA space routing and VGA resource decode to
+ * allow multiple VGA devices to be used in a system in a safe way.
  *
  * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com>
-- 
cgit v1.2.3


From 5dd4de587fd9c25cb32a7a0fe9feec3647509b6f Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 17 Sep 2009 17:38:32 +0800
Subject: softirq: add BLOCK_IOPOLL to softirq_to_name

With BLOCK_IOPOLL_SOFTIRQ added, softirq_to_name[] and
show_softirq_name() needs to be updated.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4AB20398.8070209@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/events/irq.h | 21 +++++++++++----------
 kernel/softirq.c           |  2 +-
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
index 1cb0c3aa11e6..b89f9db4a404 100644
--- a/include/trace/events/irq.h
+++ b/include/trace/events/irq.h
@@ -8,16 +8,17 @@
 #include <linux/interrupt.h>
 
 #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
-#define show_softirq_name(val)			\
-	__print_symbolic(val,			\
-			 softirq_name(HI),	\
-			 softirq_name(TIMER),	\
-			 softirq_name(NET_TX),	\
-			 softirq_name(NET_RX),	\
-			 softirq_name(BLOCK),	\
-			 softirq_name(TASKLET),	\
-			 softirq_name(SCHED),	\
-			 softirq_name(HRTIMER),	\
+#define show_softirq_name(val)				\
+	__print_symbolic(val,				\
+			 softirq_name(HI),		\
+			 softirq_name(TIMER),		\
+			 softirq_name(NET_TX),		\
+			 softirq_name(NET_RX),		\
+			 softirq_name(BLOCK),		\
+			 softirq_name(BLOCK_IOPOLL),	\
+			 softirq_name(TASKLET),		\
+			 softirq_name(SCHED),		\
+			 softirq_name(HRTIMER),		\
 			 softirq_name(RCU))
 
 /**
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7db25067cd2d..f8749e5216e0 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -57,7 +57,7 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp
 static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
 
 char *softirq_to_name[NR_SOFTIRQS] = {
-	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
+	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
 	"TASKLET", "SCHED", "HRTIMER",	"RCU"
 };
 
-- 
cgit v1.2.3


From 2667de81f3256c944b06abdf2c56c2f192fcb724 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 Sep 2009 19:01:10 +0200
Subject: perf_counter: Allow for a wakeup watermark

Currently we wake the mmap() consumer once every PAGE_SIZE of data
and/or once event wakeup_events when specified.

For high speed sampling this results in too many wakeups wrt. the
buffer size, hence change this.

We move the default wakeup limit to 1/4-th the buffer size, and
provide for means to manually specify this limit.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 10 ++++++++--
 kernel/perf_counter.c        | 32 +++++++++++++++++++-------------
 2 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 972f90d7a32f..6c1ef72ea501 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -199,10 +199,14 @@ struct perf_counter_attr {
 				inherit_stat   :  1, /* per task counts       */
 				enable_on_exec :  1, /* next exec enables     */
 				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
 
-				__reserved_1   : 50;
+				__reserved_1   : 49;
 
-	__u32			wakeup_events;	/* wakeup every n events */
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
 	__u32			__reserved_2;
 
 	__u64			__reserved_3;
@@ -521,6 +525,8 @@ struct perf_mmap_data {
 	atomic_t			wakeup;		/* needs a wakeup    */
 	atomic_t			lost;		/* nr records lost   */
 
+	long				watermark;	/* wakeup watermark  */
+
 	struct perf_counter_mmap_page   *user_page;
 	void				*data_pages[0];
 };
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index fe0d1adde804..29b73b6e8146 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2176,6 +2176,13 @@ static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
 	data->nr_pages = nr_pages;
 	atomic_set(&data->lock, -1);
 
+	if (counter->attr.watermark) {
+		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+				      counter->attr.wakeup_watermark);
+	}
+	if (!data->watermark)
+		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
 	rcu_assign_pointer(counter->data, data);
 
 	return 0;
@@ -2517,23 +2524,15 @@ struct perf_output_handle {
 	unsigned long		flags;
 };
 
-static bool perf_output_space(struct perf_mmap_data *data,
-			      unsigned int offset, unsigned int head)
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+			      unsigned long offset, unsigned long head)
 {
-	unsigned long tail;
 	unsigned long mask;
 
 	if (!data->writable)
 		return true;
 
 	mask = (data->nr_pages << PAGE_SHIFT) - 1;
-	/*
-	 * Userspace could choose to issue a mb() before updating the tail
-	 * pointer. So that all reads will be completed before the write is
-	 * issued.
-	 */
-	tail = ACCESS_ONCE(data->user_page->data_tail);
-	smp_rmb();
 
 	offset = (offset - tail) & mask;
 	head   = (head   - tail) & mask;
@@ -2679,7 +2678,7 @@ static int perf_output_begin(struct perf_output_handle *handle,
 {
 	struct perf_counter *output_counter;
 	struct perf_mmap_data *data;
-	unsigned int offset, head;
+	unsigned long tail, offset, head;
 	int have_lost;
 	struct {
 		struct perf_event_header header;
@@ -2717,16 +2716,23 @@ static int perf_output_begin(struct perf_output_handle *handle,
 	perf_output_lock(handle);
 
 	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(data->user_page->data_tail);
+		smp_rmb();
 		offset = head = atomic_long_read(&data->head);
 		head += size;
-		if (unlikely(!perf_output_space(data, offset, head)))
+		if (unlikely(!perf_output_space(data, tail, offset, head)))
 			goto fail;
 	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
 
 	handle->offset	= offset;
 	handle->head	= head;
 
-	if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT))
+	if (head - tail > data->watermark)
 		atomic_set(&data->wakeup, 1);
 
 	if (have_lost) {
-- 
cgit v1.2.3


From d15d6e6cc340566d53d953ffdec2c9e96816fa52 Mon Sep 17 00:00:00 2001
From: "John(Jung-Ik) Lee" <jilee@google.com>
Date: Mon, 14 Sep 2009 21:32:33 -0700
Subject: libata: Add pata_atp867x driver for Artop/Acard ATP867X controllers

This is a new pata driver for ARTOP 867X 64bit 4-channel UDMA133 ATA ctrls.
Based on the Atp867 data sheet rev 1.2, Acard, and in part on early ide codes
from Eric Uhrhane <ericu@google.com>.

Signed-off-by: John(Jung-Ik) Lee <jilee@google.com>
Reviewed-by:  Grant Grundler <grundler@google.com>
Reviewed-by:  Gwendal Gringo <gwendal@google.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig        |   9 +
 drivers/ata/Makefile       |   1 +
 drivers/ata/pata_atp867x.c | 548 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci_ids.h    |   2 +
 4 files changed, 560 insertions(+)
 create mode 100644 drivers/ata/pata_atp867x.c

(limited to 'include')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index ab2fa4eeb364..f2df6e2a224c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -255,6 +255,15 @@ config PATA_ARTOP
 
 	  If unsure, say N.
 
+config PATA_ATP867X
+	tristate "ARTOP/Acard ATP867X PATA support"
+	depends on PCI
+	help
+	  This option enables support for ARTOP/Acard ATP867X PATA
+	  controllers.
+
+	  If unsure, say N.
+
 config PATA_AT32
 	tristate "Atmel AVR32 PATA support (Experimental)"
 	depends on AVR32 && PLATFORM_AT32AP && EXPERIMENTAL
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 463eb52236aa..01e126f343b3 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_SATA_FSL)		+= sata_fsl.o
 obj-$(CONFIG_PATA_ALI)		+= pata_ali.o
 obj-$(CONFIG_PATA_AMD)		+= pata_amd.o
 obj-$(CONFIG_PATA_ARTOP)	+= pata_artop.o
+obj-$(CONFIG_PATA_ATP867X)	+= pata_atp867x.o
 obj-$(CONFIG_PATA_AT32)		+= pata_at32.o
 obj-$(CONFIG_PATA_ATIIXP)	+= pata_atiixp.o
 obj-$(CONFIG_PATA_CMD640_PCI)	+= pata_cmd640.o
diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c
new file mode 100644
index 000000000000..7990de925d2e
--- /dev/null
+++ b/drivers/ata/pata_atp867x.c
@@ -0,0 +1,548 @@
+/*
+ * pata_atp867x.c - ARTOP 867X 64bit 4-channel UDMA133 ATA controller driver
+ *
+ *	(C) 2009 Google Inc. John(Jung-Ik) Lee <jilee@google.com>
+ *
+ * Per Atp867 data sheet rev 1.2, Acard.
+ * Based in part on early ide code from
+ *	2003-2004 by Eric Uhrhane, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ *
+ * TODO:
+ *   1. RAID features [comparison, XOR, striping, mirroring, etc.]
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <scsi/scsi_host.h>
+#include <linux/libata.h>
+
+#define	DRV_NAME	"pata_atp867x"
+#define	DRV_VERSION	"0.7.5"
+
+/*
+ * IO Registers
+ * Note that all runtime hot priv ports are cached in ap private_data
+ */
+
+enum {
+	ATP867X_IO_CHANNEL_OFFSET	= 0x10,
+
+	/*
+	 * IO Register Bitfields
+	 */
+
+	ATP867X_IO_PIOSPD_ACTIVE_SHIFT	= 4,
+	ATP867X_IO_PIOSPD_RECOVER_SHIFT	= 0,
+
+	ATP867X_IO_DMAMODE_MSTR_SHIFT	= 0,
+	ATP867X_IO_DMAMODE_MSTR_MASK	= 0x07,
+	ATP867X_IO_DMAMODE_SLAVE_SHIFT	= 4,
+	ATP867X_IO_DMAMODE_SLAVE_MASK	= 0x70,
+
+	ATP867X_IO_DMAMODE_UDMA_6	= 0x07,
+	ATP867X_IO_DMAMODE_UDMA_5	= 0x06,
+	ATP867X_IO_DMAMODE_UDMA_4	= 0x05,
+	ATP867X_IO_DMAMODE_UDMA_3	= 0x04,
+	ATP867X_IO_DMAMODE_UDMA_2	= 0x03,
+	ATP867X_IO_DMAMODE_UDMA_1	= 0x02,
+	ATP867X_IO_DMAMODE_UDMA_0	= 0x01,
+	ATP867X_IO_DMAMODE_DISABLE	= 0x00,
+
+	ATP867X_IO_SYS_INFO_66MHZ	= 0x04,
+	ATP867X_IO_SYS_INFO_SLOW_UDMA5	= 0x02,
+	ATP867X_IO_SYS_MASK_RESERVED	= (~0xf1),
+
+	ATP867X_IO_PORTSPD_VAL		= 0x1143,
+	ATP867X_PREREAD_VAL		= 0x0200,
+
+	ATP867X_NUM_PORTS		= 4,
+	ATP867X_BAR_IOBASE		= 0,
+	ATP867X_BAR_ROMBASE		= 6,
+};
+
+#define ATP867X_IOBASE(ap)		((ap)->host->iomap[0])
+#define ATP867X_SYS_INFO(ap)		(0x3F + ATP867X_IOBASE(ap))
+
+#define ATP867X_IO_PORTBASE(ap, port)	(0x00 + ATP867X_IOBASE(ap) + \
+					(port) * ATP867X_IO_CHANNEL_OFFSET)
+#define ATP867X_IO_DMABASE(ap, port)	(0x40 + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+#define ATP867X_IO_STATUS(ap, port)	(0x07 + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+#define ATP867X_IO_ALTSTATUS(ap, port)	(0x0E + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+/*
+ * hot priv ports
+ */
+#define ATP867X_IO_MSTRPIOSPD(ap, port)	(0x08 + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_SLAVPIOSPD(ap, port)	(0x09 + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_8BPIOSPD(ap, port)	(0x0A + \
+					ATP867X_IO_DMABASE((ap), (port)))
+#define ATP867X_IO_DMAMODE(ap, port)	(0x0B + \
+					ATP867X_IO_DMABASE((ap), (port)))
+
+#define ATP867X_IO_PORTSPD(ap, port)	(0x4A + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+#define ATP867X_IO_PREREAD(ap, port)	(0x4C + \
+					ATP867X_IO_PORTBASE((ap), (port)))
+
+struct atp867x_priv {
+	void __iomem *dma_mode;
+	void __iomem *mstr_piospd;
+	void __iomem *slave_piospd;
+	void __iomem *eightb_piospd;
+	int		pci66mhz;
+};
+
+static inline u8 atp867x_speed_to_mode(u8 speed)
+{
+	return speed - XFER_UDMA_0 + 1;
+}
+
+static void atp867x_set_dmamode(struct ata_port *ap, struct ata_device *adev)
+{
+	struct pci_dev *pdev	= to_pci_dev(ap->host->dev);
+	struct atp867x_priv *dp = ap->private_data;
+	u8 speed = adev->dma_mode;
+	u8 b;
+	u8 mode;
+
+	mode = atp867x_speed_to_mode(speed);
+
+	/*
+	 * Doc 6.6.9: decrease the udma mode value by 1 for safer UDMA speed
+	 * on 66MHz bus
+	 *   rev-A: UDMA_1~4 (5, 6 no change)
+	 *   rev-B: all UDMA modes
+	 *   UDMA_0 stays not to disable UDMA
+	 */
+	if (dp->pci66mhz && mode > ATP867X_IO_DMAMODE_UDMA_0  &&
+	   (pdev->device == PCI_DEVICE_ID_ARTOP_ATP867B ||
+	    mode < ATP867X_IO_DMAMODE_UDMA_5))
+		mode--;
+
+	b = ioread8(dp->dma_mode);
+	if (adev->devno & 1) {
+		b = (b & ~ATP867X_IO_DMAMODE_SLAVE_MASK) |
+			(mode << ATP867X_IO_DMAMODE_SLAVE_SHIFT);
+	} else {
+		b = (b & ~ATP867X_IO_DMAMODE_MSTR_MASK) |
+			(mode << ATP867X_IO_DMAMODE_MSTR_SHIFT);
+	}
+	iowrite8(b, dp->dma_mode);
+}
+
+static int atp867x_get_active_clocks_shifted(unsigned int clk)
+{
+	unsigned char clocks = clk;
+
+	switch (clocks) {
+	case 0:
+		clocks = 1;
+		break;
+	case 1 ... 7:
+		break;
+	case 8 ... 12:
+		clocks = 7;
+		break;
+	default:
+		printk(KERN_WARNING "ATP867X: active %dclk is invalid. "
+			"Using default 8clk.\n", clk);
+		clocks = 0;	/* 8 clk */
+		break;
+	}
+	return clocks << ATP867X_IO_PIOSPD_ACTIVE_SHIFT;
+}
+
+static int atp867x_get_recover_clocks_shifted(unsigned int clk)
+{
+	unsigned char clocks = clk;
+
+	switch (clocks) {
+	case 0:
+		clocks = 1;
+		break;
+	case 1 ... 11:
+		break;
+	case 12:
+		clocks = 0;
+		break;
+	case 13: case 14:
+		--clocks;
+		break;
+	case 15:
+		break;
+	default:
+		printk(KERN_WARNING "ATP867X: recover %dclk is invalid. "
+			"Using default 15clk.\n", clk);
+		clocks = 0;	/* 12 clk */
+		break;
+	}
+	return clocks << ATP867X_IO_PIOSPD_RECOVER_SHIFT;
+}
+
+static void atp867x_set_piomode(struct ata_port *ap, struct ata_device *adev)
+{
+	struct ata_device *peer = ata_dev_pair(adev);
+	struct atp867x_priv *dp = ap->private_data;
+	u8 speed = adev->pio_mode;
+	struct ata_timing t, p;
+	int T, UT;
+	u8 b;
+
+	T = 1000000000 / 33333;
+	UT = T / 4;
+
+	ata_timing_compute(adev, speed, &t, T, UT);
+	if (peer && peer->pio_mode) {
+		ata_timing_compute(peer, peer->pio_mode, &p, T, UT);
+		ata_timing_merge(&p, &t, &t, ATA_TIMING_8BIT);
+	}
+
+	b = ioread8(dp->dma_mode);
+	if (adev->devno & 1)
+		b = (b & ~ATP867X_IO_DMAMODE_SLAVE_MASK);
+	else
+		b = (b & ~ATP867X_IO_DMAMODE_MSTR_MASK);
+	iowrite8(b, dp->dma_mode);
+
+	b = atp867x_get_active_clocks_shifted(t.active) |
+		atp867x_get_recover_clocks_shifted(t.recover);
+	if (dp->pci66mhz)
+		b += 0x10;
+
+	if (adev->devno & 1)
+		iowrite8(b, dp->slave_piospd);
+	else
+		iowrite8(b, dp->mstr_piospd);
+
+	/*
+	 * use the same value for comand timing as for PIO timimg
+	 */
+	iowrite8(b, dp->eightb_piospd);
+}
+
+static int atp867x_cable_detect(struct ata_port *ap)
+{
+	return ATA_CBL_PATA40_SHORT;
+}
+
+static struct scsi_host_template atp867x_sht = {
+	ATA_BMDMA_SHT(DRV_NAME),
+};
+
+static struct ata_port_operations atp867x_ops = {
+	.inherits		= &ata_bmdma_port_ops,
+	.cable_detect		= atp867x_cable_detect,
+	.set_piomode		= atp867x_set_piomode,
+	.set_dmamode		= atp867x_set_dmamode,
+};
+
+
+#ifdef	ATP867X_DEBUG
+static void atp867x_check_res(struct pci_dev *pdev)
+{
+	int i;
+	unsigned long start, len;
+
+	/* Check the PCI resources for this channel are enabled */
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+		start = pci_resource_start(pdev, i);
+		len   = pci_resource_len(pdev, i);
+		printk(KERN_DEBUG "ATP867X: resource start:len=%lx:%lx\n",
+			start, len);
+	}
+}
+
+static void atp867x_check_ports(struct ata_port *ap, int port)
+{
+	struct ata_ioports *ioaddr = &ap->ioaddr;
+	struct atp867x_priv *dp = ap->private_data;
+
+	printk(KERN_DEBUG "ATP867X: port[%d] addresses\n"
+		"  cmd_addr	=0x%llx, 0x%llx\n"
+		"  ctl_addr	=0x%llx, 0x%llx\n"
+		"  bmdma_addr	=0x%llx, 0x%llx\n"
+		"  data_addr	=0x%llx\n"
+		"  error_addr	=0x%llx\n"
+		"  feature_addr	=0x%llx\n"
+		"  nsect_addr	=0x%llx\n"
+		"  lbal_addr	=0x%llx\n"
+		"  lbam_addr	=0x%llx\n"
+		"  lbah_addr	=0x%llx\n"
+		"  device_addr	=0x%llx\n"
+		"  status_addr	=0x%llx\n"
+		"  command_addr	=0x%llx\n"
+		"  dp->dma_mode	=0x%llx\n"
+		"  dp->mstr_piospd	=0x%llx\n"
+		"  dp->slave_piospd	=0x%llx\n"
+		"  dp->eightb_piospd	=0x%llx\n"
+		"  dp->pci66mhz		=0x%lx\n",
+		port,
+		(unsigned long long)ioaddr->cmd_addr,
+		(unsigned long long)ATP867X_IO_PORTBASE(ap, port),
+		(unsigned long long)ioaddr->ctl_addr,
+		(unsigned long long)ATP867X_IO_ALTSTATUS(ap, port),
+		(unsigned long long)ioaddr->bmdma_addr,
+		(unsigned long long)ATP867X_IO_DMABASE(ap, port),
+		(unsigned long long)ioaddr->data_addr,
+		(unsigned long long)ioaddr->error_addr,
+		(unsigned long long)ioaddr->feature_addr,
+		(unsigned long long)ioaddr->nsect_addr,
+		(unsigned long long)ioaddr->lbal_addr,
+		(unsigned long long)ioaddr->lbam_addr,
+		(unsigned long long)ioaddr->lbah_addr,
+		(unsigned long long)ioaddr->device_addr,
+		(unsigned long long)ioaddr->status_addr,
+		(unsigned long long)ioaddr->command_addr,
+		(unsigned long long)dp->dma_mode,
+		(unsigned long long)dp->mstr_piospd,
+		(unsigned long long)dp->slave_piospd,
+		(unsigned long long)dp->eightb_piospd,
+		(unsigned long)dp->pci66mhz);
+}
+#endif
+
+static int atp867x_set_priv(struct ata_port *ap)
+{
+	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
+	struct atp867x_priv *dp;
+	int port = ap->port_no;
+
+	dp = ap->private_data =
+		devm_kzalloc(&pdev->dev, sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		return -ENOMEM;
+
+	dp->dma_mode	 = ATP867X_IO_DMAMODE(ap, port);
+	dp->mstr_piospd	 = ATP867X_IO_MSTRPIOSPD(ap, port);
+	dp->slave_piospd = ATP867X_IO_SLAVPIOSPD(ap, port);
+	dp->eightb_piospd = ATP867X_IO_8BPIOSPD(ap, port);
+
+	dp->pci66mhz =
+		ioread8(ATP867X_SYS_INFO(ap)) & ATP867X_IO_SYS_INFO_66MHZ;
+
+	return 0;
+}
+
+static void atp867x_fixup(struct ata_host *host)
+{
+	struct pci_dev *pdev = to_pci_dev(host->dev);
+	struct ata_port *ap = host->ports[0];
+	int i;
+	u8 v;
+
+	/*
+	 * Broken BIOS might not set latency high enough
+	 */
+	pci_read_config_byte(pdev, PCI_LATENCY_TIMER, &v);
+	if (v < 0x80) {
+		v = 0x80;
+		pci_write_config_byte(pdev, PCI_LATENCY_TIMER, v);
+		printk(KERN_DEBUG "ATP867X: set latency timer of device %s"
+			" to %d\n", pci_name(pdev), v);
+	}
+
+	/*
+	 * init 8bit io ports speed(0aaarrrr) to 43h and
+	 * init udma modes of master/slave to 0/0(11h)
+	 */
+	for (i = 0; i < ATP867X_NUM_PORTS; i++)
+		iowrite16(ATP867X_IO_PORTSPD_VAL, ATP867X_IO_PORTSPD(ap, i));
+
+	/*
+	 * init PreREAD counts
+	 */
+	for (i = 0; i < ATP867X_NUM_PORTS; i++)
+		iowrite16(ATP867X_PREREAD_VAL, ATP867X_IO_PREREAD(ap, i));
+
+	v = ioread8(ATP867X_IOBASE(ap) + 0x28);
+	v &= 0xcf;	/* Enable INTA#: bit4=0 means enable */
+	v |= 0xc0;	/* Enable PCI burst, MRM & not immediate interrupts */
+	iowrite8(v, ATP867X_IOBASE(ap) + 0x28);
+
+	/*
+	 * Turn off the over clocked udma5 mode, only for Rev-B
+	 */
+	v = ioread8(ATP867X_SYS_INFO(ap));
+	v &= ATP867X_IO_SYS_MASK_RESERVED;
+	if (pdev->device == PCI_DEVICE_ID_ARTOP_ATP867B)
+		v |= ATP867X_IO_SYS_INFO_SLOW_UDMA5;
+	iowrite8(v, ATP867X_SYS_INFO(ap));
+}
+
+static int atp867x_ata_pci_sff_init_host(struct ata_host *host)
+{
+	struct device *gdev = host->dev;
+	struct pci_dev *pdev = to_pci_dev(gdev);
+	unsigned int mask = 0;
+	int i, rc;
+
+	/*
+	 * do not map rombase
+	 */
+	rc = pcim_iomap_regions(pdev, 1 << ATP867X_BAR_IOBASE, DRV_NAME);
+	if (rc == -EBUSY)
+		pcim_pin_device(pdev);
+	if (rc)
+		return rc;
+	host->iomap = pcim_iomap_table(pdev);
+
+#ifdef	ATP867X_DEBUG
+	atp867x_check_res(pdev);
+
+	for (i = 0; i < PCI_ROM_RESOURCE; i++)
+		printk(KERN_DEBUG "ATP867X: iomap[%d]=0x%llx\n", i,
+			(unsigned long long)(host->iomap[i]));
+#endif
+
+	/*
+	 * request, iomap BARs and init port addresses accordingly
+	 */
+	for (i = 0; i < host->n_ports; i++) {
+		struct ata_port *ap = host->ports[i];
+		struct ata_ioports *ioaddr = &ap->ioaddr;
+
+		ioaddr->cmd_addr = ATP867X_IO_PORTBASE(ap, i);
+		ioaddr->ctl_addr = ioaddr->altstatus_addr
+				 = ATP867X_IO_ALTSTATUS(ap, i);
+		ioaddr->bmdma_addr = ATP867X_IO_DMABASE(ap, i);
+
+		ata_sff_std_ports(ioaddr);
+		rc = atp867x_set_priv(ap);
+		if (rc)
+			return rc;
+
+#ifdef	ATP867X_DEBUG
+		atp867x_check_ports(ap, i);
+#endif
+		ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
+			(unsigned long)ioaddr->cmd_addr,
+			(unsigned long)ioaddr->ctl_addr);
+		ata_port_desc(ap, "bmdma 0x%lx",
+			(unsigned long)ioaddr->bmdma_addr);
+
+		mask |= 1 << i;
+	}
+
+	if (!mask) {
+		dev_printk(KERN_ERR, gdev, "no available native port\n");
+		return -ENODEV;
+	}
+
+	atp867x_fixup(host);
+
+	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+
+	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	return rc;
+}
+
+static int atp867x_init_one(struct pci_dev *pdev,
+	const struct pci_device_id *id)
+{
+	static int printed_version;
+	static const struct ata_port_info info_867x = {
+		.flags		= ATA_FLAG_SLAVE_POSS,
+		.pio_mask	= ATA_PIO4,
+		.mwdma_mask	= ATA_MWDMA2,
+		.udma_mask 	= ATA_UDMA6,
+		.port_ops	= &atp867x_ops,
+	};
+
+	struct ata_host *host;
+	const struct ata_port_info *ppi[] = { &info_867x, NULL };
+	int rc;
+
+	if (!printed_version++)
+		dev_printk(KERN_INFO, &pdev->dev, "version " DRV_VERSION "\n");
+
+	rc = pcim_enable_device(pdev);
+	if (rc)
+		return rc;
+
+	printk(KERN_INFO "ATP867X: ATP867 ATA UDMA133 controller (rev %02X)",
+		pdev->device);
+
+	host = ata_host_alloc_pinfo(&pdev->dev, ppi, ATP867X_NUM_PORTS);
+	if (!host) {
+		dev_printk(KERN_ERR, &pdev->dev,
+			"failed to allocate ATA host\n");
+		rc = -ENOMEM;
+		goto err_out;
+	}
+
+	rc = atp867x_ata_pci_sff_init_host(host);
+	if (rc) {
+		dev_printk(KERN_ERR, &pdev->dev, "failed to init host\n");
+		goto err_out;
+	}
+
+	pci_set_master(pdev);
+
+	rc = ata_host_activate(host, pdev->irq, ata_sff_interrupt,
+				IRQF_SHARED, &atp867x_sht);
+	if (rc)
+		dev_printk(KERN_ERR, &pdev->dev, "failed to activate host\n");
+
+err_out:
+	return rc;
+}
+
+static struct pci_device_id atp867x_pci_tbl[] = {
+	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867A),	0 },
+	{ PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867B),	0 },
+	{ },
+};
+
+static struct pci_driver atp867x_driver = {
+	.name 		= DRV_NAME,
+	.id_table 	= atp867x_pci_tbl,
+	.probe 		= atp867x_init_one,
+	.remove		= ata_pci_remove_one,
+};
+
+static int __init atp867x_init(void)
+{
+	return pci_register_driver(&atp867x_driver);
+}
+
+static void __exit atp867x_exit(void)
+{
+	pci_unregister_driver(&atp867x_driver);
+}
+
+MODULE_AUTHOR("John(Jung-Ik) Lee, Google Inc.");
+MODULE_DESCRIPTION("low level driver for Artop/Acard 867x ATA controller");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, atp867x_pci_tbl);
+MODULE_VERSION(DRV_VERSION);
+
+module_init(atp867x_init);
+module_exit(atp867x_exit);
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 8975add8668f..3b6b788fe2b5 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1529,6 +1529,8 @@
 #define PCI_DEVICE_ID_ARTOP_ATP860R	0x0007
 #define PCI_DEVICE_ID_ARTOP_ATP865	0x0008
 #define PCI_DEVICE_ID_ARTOP_ATP865R	0x0009
+#define PCI_DEVICE_ID_ARTOP_ATP867A	0x000A
+#define PCI_DEVICE_ID_ARTOP_ATP867B	0x000B
 #define PCI_DEVICE_ID_ARTOP_AEC7610	0x8002
 #define PCI_DEVICE_ID_ARTOP_AEC7612UW	0x8010
 #define PCI_DEVICE_ID_ARTOP_AEC7612U	0x8020
-- 
cgit v1.2.3


From 3ef94daae7530b4ebcd2e5f48f1028cd2d2470ba Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Mon, 14 Sep 2009 16:50:29 +0100
Subject: drm/i915: Add ioctl to set 'purgeability' of objects

Similar to the madvise() concept, the application may wish to mark some
data as volatile. That is in the event of memory pressure the kernel is
free to discard such buffers safe in the knowledge that the application
can recreate them on demand, and is simply using these as a cache.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/gpu/drm/i915/i915_dma.c |  1 +
 drivers/gpu/drm/i915/i915_drv.h |  7 ++++
 drivers/gpu/drm/i915/i915_gem.c | 81 +++++++++++++++++++++++++++++++++++++----
 include/drm/i915_drm.h          | 18 +++++++++
 4 files changed, 99 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f47adb4aa59a..250999cdf814 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1616,6 +1616,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_GEM_GET_TILING, i915_gem_get_tiling, 0),
 	DRM_IOCTL_DEF(DRM_I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, 0),
 	DRM_IOCTL_DEF(DRM_I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, 0),
+	DRM_IOCTL_DEF(DRM_I915_GEM_MADVISE, i915_gem_madvise_ioctl, 0),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bbcf5fc72666..2f89c2136be9 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -566,6 +566,11 @@ struct drm_i915_gem_object {
 	 * in an execbuffer object list.
 	 */
 	int in_execbuffer;
+
+	/**
+	 * Advice: are the backing pages purgeable?
+	 */
+	int madv;
 };
 
 /**
@@ -705,6 +710,8 @@ int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *file_priv);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
 int i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
 int i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 2fff2e0a976e..2ab30f251943 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1436,13 +1436,21 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 	if (obj_priv->tiling_mode != I915_TILING_NONE)
 		i915_gem_object_save_bit_17_swizzle(obj);
 
-	for (i = 0; i < page_count; i++)
-		if (obj_priv->pages[i] != NULL) {
-			if (obj_priv->dirty)
-				set_page_dirty(obj_priv->pages[i]);
-			mark_page_accessed(obj_priv->pages[i]);
-			page_cache_release(obj_priv->pages[i]);
-		}
+	if (obj_priv->madv == I915_MADV_DONTNEED)
+	    obj_priv->dirty = 0;
+
+	for (i = 0; i < page_count; i++) {
+		if (obj_priv->pages[i] == NULL)
+			break;
+
+		if (obj_priv->dirty)
+			set_page_dirty(obj_priv->pages[i]);
+
+		if (obj_priv->madv == I915_MADV_WILLNEED)
+		    mark_page_accessed(obj_priv->pages[i]);
+
+		page_cache_release(obj_priv->pages[i]);
+	}
 	obj_priv->dirty = 0;
 
 	drm_free_large(obj_priv->pages);
@@ -2412,6 +2420,12 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
+
+	if (obj_priv->madv == I915_MADV_DONTNEED) {
+		DRM_ERROR("Attempting to bind a purgeable object\n");
+		return -EINVAL;
+	}
+
 	if (alignment == 0)
 		alignment = i915_gem_get_gtt_alignment(obj);
 	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
@@ -3679,6 +3693,13 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 	obj_priv = obj->driver_private;
 
+	if (obj_priv->madv == I915_MADV_DONTNEED) {
+		DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n");
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
 	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
 		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
 			  args->handle);
@@ -3791,6 +3812,49 @@ i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
     return i915_gem_ring_throttle(dev, file_priv);
 }
 
+int
+i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	struct drm_i915_gem_madvise *args = data;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+
+	switch (args->madv) {
+	case I915_MADV_DONTNEED:
+	case I915_MADV_WILLNEED:
+	    break;
+	default:
+	    return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (obj == NULL) {
+		DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
+			  args->handle);
+		return -EBADF;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+	obj_priv = obj->driver_private;
+
+	if (obj_priv->pin_count) {
+		drm_gem_object_unreference(obj);
+		mutex_unlock(&dev->struct_mutex);
+
+		DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
+		return -EINVAL;
+	}
+
+	obj_priv->madv = args->madv;
+	args->retained = obj_priv->gtt_space != NULL;
+
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
 int i915_gem_init_object(struct drm_gem_object *obj)
 {
 	struct drm_i915_gem_object *obj_priv;
@@ -3815,6 +3879,7 @@ int i915_gem_init_object(struct drm_gem_object *obj)
 	obj_priv->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj_priv->list);
 	INIT_LIST_HEAD(&obj_priv->fence_list);
+	obj_priv->madv = I915_MADV_WILLNEED;
 
 	return 0;
 }
@@ -4506,7 +4571,7 @@ i915_gem_object_truncate(struct drm_gem_object *obj)
 static inline int
 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
 {
-	return !obj_priv->dirty;
+	return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED;
 }
 
 static int
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8e1e92583fbc..607c9da061e8 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -185,6 +185,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_GET_APERTURE 0x23
 #define DRM_I915_GEM_MMAP_GTT	0x24
 #define DRM_I915_GET_PIPE_FROM_CRTC_ID	0x25
+#define DRM_I915_GEM_MADVISE	0x26
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -221,6 +222,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_GET_TILING	DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
 #define DRM_IOCTL_I915_GEM_GET_APERTURE	DRM_IOR  (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
 #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_intel_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
  * on the security mechanisms provided by hardware.
@@ -667,4 +669,20 @@ struct drm_i915_get_pipe_from_crtc_id {
 	__u32 pipe;
 };
 
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+
+struct drm_i915_gem_madvise {
+	/** Handle of the buffer to change the backing store advice */
+	__u32 handle;
+
+	/* Advice: either the buffer will be needed again in the near future,
+	 *         or wont be and could be discarded under memory pressure.
+	 */
+	__u32 madv;
+
+	/** Whether the backing store still exists. */
+	__u32 retained;
+};
+
 #endif				/* _I915_DRM_H_ */
-- 
cgit v1.2.3


From 1a133e0c9dabda23e6693cabfdc1d5106dca5fc2 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Tue, 15 Sep 2009 16:57:24 -0700
Subject: ACPI: make ACPI button funcs no-ops if not built in

Yakui pointed out that we don't properly no-op the ACPI button routines
if the button driver isn't built in.  This will cause problems if ACPI
is disabled, so provide stub functions in that case.

Reported-by: ykzhao <yakui.zhao@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 include/acpi/button.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include')

diff --git a/include/acpi/button.h b/include/acpi/button.h
index bb643a79d651..97eea0e4c016 100644
--- a/include/acpi/button.h
+++ b/include/acpi/button.h
@@ -3,8 +3,23 @@
 
 #include <linux/notifier.h>
 
+#if defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE)
 extern int acpi_lid_notifier_register(struct notifier_block *nb);
 extern int acpi_lid_notifier_unregister(struct notifier_block *nb);
 extern int acpi_lid_open(void);
+#else
+static inline int acpi_lid_notifier_register(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int acpi_lid_notifier_unregister(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int acpi_lid_open(void)
+{
+	return 1;
+}
+#endif /* defined(CONFIG_ACPI_BUTTON) || defined(CONFIG_ACPI_BUTTON_MODULE) */
 
 #endif /* ACPI_BUTTON_H */
-- 
cgit v1.2.3


From c3422bea5f09b0e85704f51f2b01271630b8940b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 13 Sep 2009 09:15:10 -0700
Subject: rcu: Simplify rcu_read_unlock_special() quiescent-state accounting

The earlier approach required two scheduling-clock ticks to note an
preemptable-RCU quiescent state in the situation in which the
scheduling-clock interrupt is unlucky enough to always interrupt an
RCU read-side critical section.

With this change, the quiescent state is instead noted by the
outermost rcu_read_unlock() immediately following the first
scheduling-clock tick, or, alternatively, by the first subsequent
context switch.  Therefore, this change also speeds up grace
periods.

Suggested-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111945-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  1 -
 kernel/rcutree.c        | 15 ++++++--------
 kernel/rcutree_plugin.h | 54 ++++++++++++++++++++++++-------------------------
 3 files changed, 32 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f3d74bd04d18..c62a9f84d614 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1740,7 +1740,6 @@ extern cputime_t task_gtime(struct task_struct *p);
 
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
-#define RCU_READ_UNLOCK_GOT_QS  (1 << 2) /* CPU has responded to RCU core. */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e9a4ae94647f..6c99553e9f15 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -107,27 +107,23 @@ static void __cpuinit rcu_init_percpu_data(int cpu, struct rcu_state *rsp,
  */
 void rcu_sched_qs(int cpu)
 {
-	unsigned long flags;
 	struct rcu_data *rdp;
 
-	local_irq_save(flags);
 	rdp = &per_cpu(rcu_sched_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
-	rcu_preempt_qs(cpu);
-	local_irq_restore(flags);
+	barrier();
+	rdp->passed_quiesc = 1;
+	rcu_preempt_note_context_switch(cpu);
 }
 
 void rcu_bh_qs(int cpu)
 {
-	unsigned long flags;
 	struct rcu_data *rdp;
 
-	local_irq_save(flags);
 	rdp = &per_cpu(rcu_bh_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
-	local_irq_restore(flags);
+	barrier();
+	rdp->passed_quiesc = 1;
 }
 
 #ifdef CONFIG_NO_HZ
@@ -615,6 +611,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
 
 	/* Advance to a new grace period and initialize state. */
 	rsp->gpnum++;
+	WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
 	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
 	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
 	record_gp_stall_check_time(rsp);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index b8e4b0384f00..c9616e48379b 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -64,34 +64,42 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  */
-static void rcu_preempt_qs_record(int cpu)
+static void rcu_preempt_qs(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-	rdp->passed_quiesc = 1;
 	rdp->passed_quiesc_completed = rdp->completed;
+	barrier();
+	rdp->passed_quiesc = 1;
 }
 
 /*
- * We have entered the scheduler or are between softirqs in ksoftirqd.
- * If we are in an RCU read-side critical section, we need to reflect
- * that in the state of the rcu_node structure corresponding to this CPU.
- * Caller must disable hardirqs.
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the appropriate entry
+ * of the blocked_tasks[] array.  The task will dequeue itself when
+ * it exits the outermost enclosing RCU read-side critical section.
+ * Therefore, the current grace period cannot be permitted to complete
+ * until the blocked_tasks[] entry indexed by the low-order bit of
+ * rnp->gpnum empties.
+ *
+ * Caller must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
 	struct task_struct *t = current;
+	unsigned long flags;
 	int phase;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 
 	if (t->rcu_read_lock_nesting &&
 	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
-		WARN_ON_ONCE(cpu != smp_processor_id());
 
 		/* Possibly blocking in an RCU read-side critical section. */
 		rdp = rcu_preempt_state.rda[cpu];
 		rnp = rdp->mynode;
-		spin_lock(&rnp->lock);
+		spin_lock_irqsave(&rnp->lock, flags);
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
 		t->rcu_blocked_node = rnp;
 
@@ -112,7 +120,7 @@ static void rcu_preempt_qs(int cpu)
 		phase = !(rnp->qsmask & rdp->grpmask) ^ (rnp->gpnum & 0x1);
 		list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
 		smp_mb();  /* Ensure later ctxt swtch seen after above. */
-		spin_unlock(&rnp->lock);
+		spin_unlock_irqrestore(&rnp->lock, flags);
 	}
 
 	/*
@@ -124,9 +132,8 @@ static void rcu_preempt_qs(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs_record(cpu);
-	t->rcu_read_unlock_special &= ~(RCU_READ_UNLOCK_NEED_QS |
-					RCU_READ_UNLOCK_GOT_QS);
+	rcu_preempt_qs(cpu);
+	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 }
 
 /*
@@ -162,7 +169,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	special = t->rcu_read_unlock_special;
 	if (special & RCU_READ_UNLOCK_NEED_QS) {
 		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_GOT_QS;
+		rcu_preempt_qs(smp_processor_id());
 	}
 
 	/* Hardware IRQ handlers cannot block. */
@@ -199,9 +206,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		 */
 		if (!empty && rnp->qsmask == 0 &&
 		    list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
-			t->rcu_read_unlock_special &=
-				~(RCU_READ_UNLOCK_NEED_QS |
-				  RCU_READ_UNLOCK_GOT_QS);
+			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 			if (rnp->parent == NULL) {
 				/* Only one rcu_node in the tree. */
 				cpu_quiet_msk_finish(&rcu_preempt_state, flags);
@@ -352,19 +357,12 @@ static void rcu_preempt_check_callbacks(int cpu)
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		t->rcu_read_unlock_special &=
-			~(RCU_READ_UNLOCK_NEED_QS | RCU_READ_UNLOCK_GOT_QS);
-		rcu_preempt_qs_record(cpu);
+		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+		rcu_preempt_qs(cpu);
 		return;
 	}
 	if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
-		if (t->rcu_read_unlock_special & RCU_READ_UNLOCK_GOT_QS) {
-			rcu_preempt_qs_record(cpu);
-			t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_GOT_QS;
-		} else if (!(t->rcu_read_unlock_special &
-			     RCU_READ_UNLOCK_NEED_QS)) {
-			t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-		}
+		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
 	}
 }
 
@@ -451,7 +449,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * Because preemptable RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_qs(int cpu)
+static void rcu_preempt_note_context_switch(int cpu)
 {
 }
 
-- 
cgit v1.2.3


From 16e3081191837a6a04733de5cd5d1d1b303140d4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 13 Sep 2009 09:15:11 -0700
Subject: rcu: Fix synchronize_rcu() for TREE_PREEMPT_RCU

The redirection of synchronize_sched() to synchronize_rcu() was
appropriate for TREE_RCU, but not for TREE_PREEMPT_RCU.

Fix this by creating an underlying synchronize_sched().  TREE_RCU
then redirects synchronize_rcu() to synchronize_sched(), while
TREE_PREEMPT_RCU has its own version of synchronize_rcu().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <12528585111916-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcupdate.h | 23 +++++------------------
 include/linux/rcutree.h  |  4 ++--
 kernel/rcupdate.c        | 44 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 50 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 95e0615f4d75..39dce83c4865 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -52,8 +52,13 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
+#ifdef CONFIG_TREE_PREEMPT_RCU
 extern void synchronize_rcu(void);
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#define synchronize_rcu synchronize_sched
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
 extern void synchronize_rcu_bh(void);
+extern void synchronize_sched(void);
 extern void rcu_barrier(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
@@ -261,24 +266,6 @@ struct rcu_synchronize {
 
 extern void wakeme_after_rcu(struct rcu_head  *head);
 
-/**
- * synchronize_sched - block until all CPUs have exited any non-preemptive
- * kernel code sequences.
- *
- * This means that all preempt_disable code sequences, including NMI and
- * hardware-interrupt handlers, in progress on entry will have completed
- * before this primitive returns.  However, this does not guarantee that
- * softirq handlers will have completed, since in some kernels, these
- * handlers can run in process context, and can block.
- *
- * This primitive provides the guarantees made by the (now removed)
- * synchronize_kernel() API.  In contrast, synchronize_rcu() only
- * guarantees that rcu_read_lock() sections will have completed.
- * In "classic RCU", these two guarantees happen to be one and
- * the same, but can differ in realtime RCU implementations.
- */
-#define synchronize_sched() __synchronize_sched()
-
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index a89307717825..00d08c0cbcc1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -53,6 +53,8 @@ static inline void __rcu_read_unlock(void)
 	preempt_enable();
 }
 
+#define __synchronize_sched() synchronize_rcu()
+
 static inline void exit_rcu(void)
 {
 }
@@ -68,8 +70,6 @@ static inline void __rcu_read_unlock_bh(void)
 	local_bh_enable();
 }
 
-#define __synchronize_sched() synchronize_rcu()
-
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
 
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index bd5d5c8e5140..28d2f24e7871 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -74,6 +74,8 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	complete(&rcu->completion);
 }
 
+#ifdef CONFIG_TREE_PREEMPT_RCU
+
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -87,7 +89,7 @@ void synchronize_rcu(void)
 {
 	struct rcu_synchronize rcu;
 
-	if (rcu_blocking_is_gp())
+	if (!rcu_scheduler_active)
 		return;
 
 	init_completion(&rcu.completion);
@@ -98,6 +100,46 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
+#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+
+/**
+ * synchronize_sched - wait until an rcu-sched grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * hardware-interrupt handlers, in progress on entry will have completed
+ * before this primitive returns.  However, this does not guarantee that
+ * softirq handlers will have completed, since in some kernels, these
+ * handlers can run in process context, and can block.
+ *
+ * This primitive provides the guarantees made by the (now removed)
+ * synchronize_kernel() API.  In contrast, synchronize_rcu() only
+ * guarantees that rcu_read_lock() sections will have completed.
+ * In "classic RCU", these two guarantees happen to be one and
+ * the same, but can differ in realtime RCU implementations.
+ */
+void synchronize_sched(void)
+{
+	struct rcu_synchronize rcu;
+
+	if (rcu_blocking_is_gp())
+		return;
+
+	init_completion(&rcu.completion);
+	/* Will wake me after RCU finished. */
+	call_rcu_sched(&rcu.head, wakeme_after_rcu);
+	/* Wait for it. */
+	wait_for_completion(&rcu.completion);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched);
+
 /**
  * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
  *
-- 
cgit v1.2.3


From e5e25cf47b0bdd1f7e9b8bb6368ee48e16de0c87 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 18 Sep 2009 00:54:43 +0200
Subject: tracing: Factorize the events profile accounting

Factorize the events enabling accounting in a common tracing core
helper. This reduces the size of the profile_enable() and
profile_disable() callbacks for each trace events.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  4 ++--
 include/linux/syscalls.h           | 24 ++++++++----------------
 include/trace/ftrace.h             | 28 ++++++++--------------------
 kernel/trace/trace_event_profile.c | 20 ++++++++++++++++++--
 4 files changed, 36 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bd099ba82ccc..bc103d7b1ca8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -130,8 +130,8 @@ struct ftrace_event_call {
 	void			*data;
 
 	atomic_t		profile_count;
-	int			(*profile_enable)(struct ftrace_event_call *);
-	void			(*profile_disable)(struct ftrace_event_call *);
+	int			(*profile_enable)(void);
+	void			(*profile_disable)(void);
 };
 
 #define MAX_FILTER_PRED		32
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e37821cc60..7d9803cbb20f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -100,33 +100,25 @@ struct perf_counter_attr;
 
 #ifdef CONFIG_EVENT_PROFILE
 #define TRACE_SYS_ENTER_PROFILE(sname)					       \
-static int prof_sysenter_enable_##sname(struct ftrace_event_call *event_call)  \
+static int prof_sysenter_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_enter_##sname.profile_count))	       \
-		ret = reg_prof_syscall_enter("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_enter("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysenter_disable_##sname(struct ftrace_event_call *event_call)\
+static void prof_sysenter_disable_##sname(void)				       \
 {									       \
-	if (atomic_add_negative(-1, &event_enter_##sname.profile_count))       \
-		unreg_prof_syscall_enter("sys"#sname);			       \
+	unreg_prof_syscall_enter("sys"#sname);				       \
 }
 
 #define TRACE_SYS_EXIT_PROFILE(sname)					       \
-static int prof_sysexit_enable_##sname(struct ftrace_event_call *event_call)   \
+static int prof_sysexit_enable_##sname(void)				       \
 {									       \
-	int ret = 0;							       \
-	if (!atomic_inc_return(&event_exit_##sname.profile_count))	       \
-		ret = reg_prof_syscall_exit("sys"#sname);		       \
-	return ret;							       \
+	return reg_prof_syscall_exit("sys"#sname);			       \
 }									       \
 									       \
-static void prof_sysexit_disable_##sname(struct ftrace_event_call *event_call) \
+static void prof_sysexit_disable_##sname(void)				       \
 {                                                                              \
-	if (atomic_add_negative(-1, &event_exit_##sname.profile_count))	       \
-		unreg_prof_syscall_exit("sys"#sname);			       \
+	unreg_prof_syscall_exit("sys"#sname);				       \
 }
 
 #define TRACE_SYS_ENTER_PROFILE_INIT(sname)				       \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b437b829..a822087857e9 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call(				\
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
- * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
+ * static int ftrace_profile_enable_<call>(void)
  * {
- * 	int ret = 0;
- *
- * 	if (!atomic_inc_return(&event_call->profile_count))
- * 		ret = register_trace_<call>(ftrace_profile_<call>);
- *
- * 	return ret;
+ * 	return register_trace_<call>(ftrace_profile_<call>);
  * }
  *
- * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
+ * static void ftrace_profile_disable_<call>(void)
  * {
- * 	if (atomic_add_negative(-1, &event->call->profile_count))
- * 		unregister_trace_<call>(ftrace_profile_<call>);
+ * 	unregister_trace_<call>(ftrace_profile_<call>);
  * }
  *
  */
@@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call(				\
 									\
 static void ftrace_profile_##call(proto);				\
 									\
-static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+static int ftrace_profile_enable_##call(void)				\
 {									\
-	int ret = 0;							\
-									\
-	if (!atomic_inc_return(&event_call->profile_count))		\
-		ret = register_trace_##call(ftrace_profile_##call);	\
-									\
-	return ret;							\
+	return register_trace_##call(ftrace_profile_##call);		\
 }									\
 									\
-static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+static void ftrace_profile_disable_##call(void)				\
 {									\
-	if (atomic_add_negative(-1, &event_call->profile_count))	\
-		unregister_trace_##call(ftrace_profile_##call);		\
+	unregister_trace_##call(ftrace_profile_##call);			\
 }
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index 55a25c933d15..df4a74efd50c 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,6 +8,14 @@
 #include <linux/module.h>
 #include "trace.h"
 
+static int ftrace_profile_enable_event(struct ftrace_event_call *event)
+{
+	if (atomic_inc_return(&event->profile_count))
+		return 0;
+
+	return event->profile_enable();
+}
+
 int ftrace_profile_enable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -17,7 +25,7 @@ int ftrace_profile_enable(int event_id)
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id && event->profile_enable &&
 		    try_module_get(event->mod)) {
-			ret = event->profile_enable(event);
+			ret = ftrace_profile_enable_event(event);
 			break;
 		}
 	}
@@ -26,6 +34,14 @@ int ftrace_profile_enable(int event_id)
 	return ret;
 }
 
+static void ftrace_profile_disable_event(struct ftrace_event_call *event)
+{
+	if (!atomic_add_negative(-1, &event->profile_count))
+		return;
+
+	event->profile_disable();
+}
+
 void ftrace_profile_disable(int event_id)
 {
 	struct ftrace_event_call *event;
@@ -33,7 +49,7 @@ void ftrace_profile_disable(int event_id)
 	mutex_lock(&event_mutex);
 	list_for_each_entry(event, &ftrace_events, list) {
 		if (event->id == event_id) {
-			event->profile_disable(event);
+			ftrace_profile_disable_event(event);
 			module_put(event->mod);
 			break;
 		}
-- 
cgit v1.2.3


From 20ab4425a77a1f34028cc6ce57053c22c184ba5f Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 18 Sep 2009 06:10:28 +0200
Subject: tracing: Allocate the ftrace event profile buffer dynamically

Currently the trace event profile buffer is allocated in the stack. But
this may be too much for the stack, as the events can have large
statically defined field size and can also grow with dynamic arrays.

Allocate two per cpu buffer for all profiled events. The first cpu
buffer is used to host every non-nmi context traces. It is protected
by disabling the interrupts while writing and committing the trace.

The second buffer is reserved for nmi. So that there is no race between
them and the first buffer.

The whole write/commit section is rcu protected because we release
these buffers while deactivating the last profiling trace event.

v2: Move the buffers from trace_event to be global, as pointed by
    Steven Rostedt.

v3: Fix the syscall events to handle the profiling buffer races
    by disabling interrupts, now that the buffers are globals.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/ftrace_event.h       |  6 +++
 include/trace/ftrace.h             | 83 +++++++++++++++++++++-----------
 kernel/trace/trace_event_profile.c | 61 +++++++++++++++++++++++-
 kernel/trace/trace_syscalls.c      | 97 ++++++++++++++++++++++++++++++--------
 4 files changed, 199 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index bc103d7b1ca8..4ec5e67e18cf 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -4,6 +4,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/trace_seq.h>
 #include <linux/percpu.h>
+#include <linux/hardirq.h>
 
 struct trace_array;
 struct tracer;
@@ -134,6 +135,11 @@ struct ftrace_event_call {
 	void			(*profile_disable)(void);
 };
 
+#define FTRACE_MAX_PROFILE_SIZE	2048
+
+extern char			*trace_profile_buf;
+extern char			*trace_profile_buf_nmi;
+
 #define MAX_FILTER_PRED		32
 #define MAX_FILTER_STR_VAL	256	/* Should handle KSYM_SYMBOL_LEN */
 
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index a822087857e9..a0361cb69769 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
+ *	struct trace_entry *ent;
  *	int __entry_size;
  *	int __data_size;
+ *	int __cpu
  *	int pc;
  *
- *	local_save_flags(irq_flags);
  *	pc = preempt_count();
  *
  *	__data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
@@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *			     sizeof(u64));
  *	__entry_size -= sizeof(u32);
  *
- *	do {
- *		char raw_data[__entry_size]; <- allocate our sample in the stack
- *		struct trace_entry *ent;
+ *	// Protect the non nmi buffer
+ *	// This also protects the rcu read side
+ *	local_irq_save(irq_flags);
+ *	__cpu = smp_processor_id();
+ *
+ *	if (in_nmi())
+ *		raw_data = rcu_dereference(trace_profile_buf_nmi);
+ *	else
+ *		raw_data = rcu_dereference(trace_profile_buf);
+ *
+ *	if (!raw_data)
+ *		goto end;
  *
- *		zero dead bytes from alignment to avoid stack leak to userspace:
+ *	raw_data = per_cpu_ptr(raw_data, __cpu);
  *
- *		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
- *		entry = (struct ftrace_raw_<call> *)raw_data;
- *		ent = &entry->ent;
- *		tracing_generic_entry_update(ent, irq_flags, pc);
- *		ent->type = event_call->id;
+ *	//zero dead bytes from alignment to avoid stack leak to userspace:
+ *	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
+ *	entry = (struct ftrace_raw_<call> *)raw_data;
+ *	ent = &entry->ent;
+ *	tracing_generic_entry_update(ent, irq_flags, pc);
+ *	ent->type = event_call->id;
  *
- *		<tstruct> <- do some jobs with dynamic arrays
+ *	<tstruct> <- do some jobs with dynamic arrays
  *
- *		<assign>  <- affect our values
+ *	<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
- *			     __entry_size);  <- submit them to perf counter
- *	} while (0);
+ *	perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		     __entry_size);  <- submit them to perf counter
  *
  * }
  */
@@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto)				\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
+	struct trace_entry *ent;					\
 	int __entry_size;						\
 	int __data_size;						\
+	char *raw_data;							\
+	int __cpu;							\
 	int pc;								\
 									\
-	local_save_flags(irq_flags);					\
 	pc = preempt_count();						\
 									\
 	__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
@@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto)				\
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	do {								\
-		char raw_data[__entry_size];				\
-		struct trace_entry *ent;				\
+	if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE,		\
+		      "profile buffer not large enough"))		\
+		return;							\
+									\
+	local_irq_save(irq_flags);					\
+	__cpu = smp_processor_id();					\
 									\
-		*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;	\
-		entry = (struct ftrace_raw_##call *)raw_data;		\
-		ent = &entry->ent;					\
-		tracing_generic_entry_update(ent, irq_flags, pc);	\
-		ent->type = event_call->id;				\
+	if (in_nmi())							\
+		raw_data = rcu_dereference(trace_profile_buf_nmi);		\
+	else								\
+		raw_data = rcu_dereference(trace_profile_buf);		\
 									\
-		tstruct							\
+	if (!raw_data)							\
+		goto end;						\
 									\
-		{ assign; }						\
+	raw_data = per_cpu_ptr(raw_data, __cpu);			\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+	*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;		\
+	entry = (struct ftrace_raw_##call *)raw_data;			\
+	ent = &entry->ent;						\
+	tracing_generic_entry_update(ent, irq_flags, pc);		\
+	ent->type = event_call->id;					\
+									\
+	tstruct								\
+									\
+	{ assign; }							\
+									\
+	perf_tpcounter_event(event_call->id, __addr, __count, entry,	\
 			     __entry_size);				\
-	} while (0);							\
+									\
+end:									\
+	local_irq_restore(irq_flags);					\
 									\
 }
 
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index df4a74efd50c..3aaa77c3309b 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -8,12 +8,52 @@
 #include <linux/module.h>
 #include "trace.h"
 
+/*
+ * We can't use a size but a type in alloc_percpu()
+ * So let's create a dummy type that matches the desired size
+ */
+typedef struct {char buf[FTRACE_MAX_PROFILE_SIZE];} profile_buf_t;
+
+char		*trace_profile_buf;
+char 		*trace_profile_buf_nmi;
+
+/* Count the events in use (per event id, not per instance) */
+static int	total_profile_count;
+
 static int ftrace_profile_enable_event(struct ftrace_event_call *event)
 {
+	char *buf;
+	int ret = -ENOMEM;
+
 	if (atomic_inc_return(&event->profile_count))
 		return 0;
 
-	return event->profile_enable();
+	if (!total_profile_count++) {
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf;
+
+		rcu_assign_pointer(trace_profile_buf, buf);
+
+		buf = (char *)alloc_percpu(profile_buf_t);
+		if (!buf)
+			goto fail_buf_nmi;
+
+		rcu_assign_pointer(trace_profile_buf_nmi, buf);
+	}
+
+	ret = event->profile_enable();
+	if (!ret)
+		return 0;
+
+	kfree(trace_profile_buf_nmi);
+fail_buf_nmi:
+	kfree(trace_profile_buf);
+fail_buf:
+	total_profile_count--;
+	atomic_dec(&event->profile_count);
+
+	return ret;
 }
 
 int ftrace_profile_enable(int event_id)
@@ -36,10 +76,29 @@ int ftrace_profile_enable(int event_id)
 
 static void ftrace_profile_disable_event(struct ftrace_event_call *event)
 {
+	char *buf, *nmi_buf;
+
 	if (!atomic_add_negative(-1, &event->profile_count))
 		return;
 
 	event->profile_disable();
+
+	if (!--total_profile_count) {
+		buf = trace_profile_buf;
+		rcu_assign_pointer(trace_profile_buf, NULL);
+
+		nmi_buf = trace_profile_buf_nmi;
+		rcu_assign_pointer(trace_profile_buf_nmi, NULL);
+
+		/*
+		 * Ensure every events in profiling have finished before
+		 * releasing the buffers
+		 */
+		synchronize_sched();
+
+		free_percpu(buf);
+		free_percpu(nmi_buf);
+	}
 }
 
 void ftrace_profile_disable(int event_id)
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..7a3550cf2597 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -384,10 +384,13 @@ static int sys_prof_refcount_exit;
 
 static void prof_syscall_enter(struct pt_regs *regs, long id)
 {
-	struct syscall_trace_enter *rec;
 	struct syscall_metadata *sys_data;
+	struct syscall_trace_enter *rec;
+	unsigned long flags;
+	char *raw_data;
 	int syscall_nr;
 	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_enter_syscalls))
@@ -402,20 +405,38 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	do {
-		char raw_data[size];
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		      "profile buffer not large enough"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
 
-		/* zero the dead bytes from align to not leak stack to user */
-		*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
 
-		rec = (struct syscall_trace_enter *) raw_data;
-		tracing_generic_entry_update(&rec->ent, 0, 0);
-		rec->ent.type = sys_data->enter_id;
-		rec->nr = syscall_nr;
-		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
-				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
-	} while(0);
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_enter *) raw_data;
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->enter_id;
+	rec->nr = syscall_nr;
+	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
+			       (unsigned long *)&rec->args);
+	perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_enter(char *name)
@@ -460,8 +481,12 @@ void unreg_prof_syscall_enter(char *name)
 static void prof_syscall_exit(struct pt_regs *regs, long ret)
 {
 	struct syscall_metadata *sys_data;
-	struct syscall_trace_exit rec;
+	struct syscall_trace_exit *rec;
+	unsigned long flags;
 	int syscall_nr;
+	char *raw_data;
+	int size;
+	int cpu;
 
 	syscall_nr = syscall_get_nr(current, regs);
 	if (!test_bit(syscall_nr, enabled_prof_exit_syscalls))
@@ -471,12 +496,46 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	if (!sys_data)
 		return;
 
-	tracing_generic_entry_update(&rec.ent, 0, 0);
-	rec.ent.type = sys_data->exit_id;
-	rec.nr = syscall_nr;
-	rec.ret = syscall_get_return_value(current, regs);
+	/* We can probably do that at build time */
+	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
+	size -= sizeof(u32);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	/*
+	 * Impossible, but be paranoid with the future
+	 * How to put this check outside runtime?
+	 */
+	if (WARN_ONCE(size > FTRACE_MAX_PROFILE_SIZE,
+		"exit event has grown above profile buffer size"))
+		return;
+
+	/* Protect the per cpu buffer, begin the rcu read side */
+	local_irq_save(flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi())
+		raw_data = rcu_dereference(trace_profile_buf_nmi);
+	else
+		raw_data = rcu_dereference(trace_profile_buf);
+
+	if (!raw_data)
+		goto end;
+
+	raw_data = per_cpu_ptr(raw_data, cpu);
+
+	/* zero the dead bytes from align to not leak stack to user */
+	*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
+
+	rec = (struct syscall_trace_exit *)raw_data;
+
+	tracing_generic_entry_update(&rec->ent, 0, 0);
+	rec->ent.type = sys_data->exit_id;
+	rec->nr = syscall_nr;
+	rec->ret = syscall_get_return_value(current, regs);
+
+	perf_tpcounter_event(sys_data->exit_id, 0, 1, rec, size);
+
+end:
+	local_irq_restore(flags);
 }
 
 int reg_prof_syscall_exit(char *name)
-- 
cgit v1.2.3


From 733289c2656c556d5cf36eafa1c8ec77222c359f Mon Sep 17 00:00:00 2001
From: Jerome Glisse <jglisse@redhat.com>
Date: Wed, 16 Sep 2009 15:24:21 +0200
Subject: drm/radeon/kms: don't fail if we fail to init GPU acceleration

Userspace can query if acceleration is working or not true get
info ioctl and could fallback to software if for some reason
kernel failed to initialize KMS. This should allow to give a
working KMS setup in all case (even with non functionning accel).

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r420.c          |  2 ++
 drivers/gpu/drm/radeon/r600.c          | 33 +++++++++++++------------
 drivers/gpu/drm/radeon/radeon.h        |  1 +
 drivers/gpu/drm/radeon/radeon_device.c | 44 ++++++++++++++--------------------
 drivers/gpu/drm/radeon/radeon_kms.c    |  3 +++
 drivers/gpu/drm/radeon/rv770.c         | 33 +++++++++++++------------
 include/drm/radeon_drm.h               |  1 +
 7 files changed, 61 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 33a25a4377b8..2142a4781970 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -329,6 +329,7 @@ int r420_init(struct radeon_device *rdev)
 			return r;
 	}
 	r300_set_reg_safe(rdev);
+	rdev->accel_working = true;
 	r = r420_resume(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
@@ -343,6 +344,7 @@ int r420_init(struct radeon_device *rdev)
 			r100_pci_gart_fini(rdev);
 		radeon_agp_fini(rdev);
 		radeon_irq_kms_fini(rdev);
+		rdev->accel_working = false;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 65699e9f2025..af430d719e7f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1573,6 +1573,7 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	rdev->accel_working = true;
 	r = r600_resume(rdev);
 	if (r) {
 		if (rdev->flags & RADEON_IS_AGP) {
@@ -1581,22 +1582,24 @@ int r600_init(struct radeon_device *rdev)
 			rdev->flags &= ~RADEON_IS_AGP;
 			return r600_init(rdev);
 		}
-		return r;
-	}
-	r = radeon_ib_pool_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-		return r;
+		rdev->accel_working = false;
 	}
-	r = r600_blit_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled blitter (%d).\n", r);
-		return r;
-	}
-	r = radeon_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-			return r;
+	if (rdev->accel_working) {
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled blitter (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5bfc05612ac2..d7c4efd08928 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -793,6 +793,7 @@ struct radeon_device {
 	bool				suspend;
 	bool				need_dma32;
 	bool				new_init_path;
+	bool				accel_working;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index db5ae73d6289..0b5014c2ae7f 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -504,6 +504,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
 	rdev->gpu_lockup = false;
+	rdev->accel_working = false;
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
 	mutex_init(&rdev->cs_mutex);
@@ -649,35 +650,26 @@ int radeon_device_init(struct radeon_device *rdev,
 		/* Initialize GART (initialize after TTM so we can allocate
 		 * memory through TTM but finalize after TTM) */
 		r = radeon_gart_enable(rdev);
-		if (!r) {
+		if (r)
+			return 0;
 			r = radeon_gem_init(rdev);
-		}
+		if (r)
+			return 0;
 
 		/* 1M ring buffer */
-		if (!r) {
-			r = radeon_cp_init(rdev, 1024 * 1024);
-		}
-		if (!r) {
-			r = radeon_wb_init(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
-				return r;
-			}
-		}
-		if (!r) {
-			r = radeon_ib_pool_init(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-				return r;
-			}
-		}
-		if (!r) {
-			r = radeon_ib_test(rdev);
-			if (r) {
-				DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-				return r;
-			}
-		}
+		r = radeon_cp_init(rdev, 1024 * 1024);
+		if (r)
+			return 0;
+		r = radeon_wb_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failled initializing WB (%d).\n", r);
+		r = radeon_ib_pool_init(rdev);
+		if (r)
+			return 0;
+		r = radeon_ib_test(rdev);
+		if (r)
+			return 0;
+		rdev->accel_working = true;
 	}
 	DRM_INFO("radeon: kernel modesetting successfully initialized.\n");
 	if (radeon_testing) {
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index ac8505fe2ca7..709bd892b3a9 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -112,6 +112,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	case RADEON_INFO_NUM_Z_PIPES:
 		value = rdev->num_z_pipes;
 		break;
+	case RADEON_INFO_ACCEL_WORKING:
+		value = rdev->accel_working;
+		break;
 	default:
 		DRM_DEBUG("Invalid request %d\n", info->request);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 4f2098bc7974..be2f86539ebc 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -953,6 +953,7 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	rdev->accel_working = true;
 	r = rv770_resume(rdev);
 	if (r) {
 		if (rdev->flags & RADEON_IS_AGP) {
@@ -961,22 +962,24 @@ int rv770_init(struct radeon_device *rdev)
 			rdev->flags &= ~RADEON_IS_AGP;
 			return rv770_init(rdev);
 		}
-		return r;
-	}
-	r = r600_blit_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled blitter (%d).\n", r);
-		return r;
+		rdev->accel_working = false;
 	}
-	r = radeon_ib_pool_init(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
-		return r;
-	}
-	r = radeon_ib_test(rdev);
-	if (r) {
-		DRM_ERROR("radeon: failled testing IB (%d).\n", r);
-			return r;
+	if (rdev->accel_working) {
+		r = r600_blit_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled blitter (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_pool_init(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled initializing IB pool (%d).\n", r);
+			rdev->accel_working = false;
+		}
+		r = radeon_ib_test(rdev);
+		if (r) {
+			DRM_ERROR("radeon: failled testing IB (%d).\n", r);
+			rdev->accel_working = false;
+		}
 	}
 	return 0;
 }
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index 2ba61e18fc8b..b67bbd75bd20 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -899,6 +899,7 @@ struct drm_radeon_cs {
 #define RADEON_INFO_DEVICE_ID		0x00
 #define RADEON_INFO_NUM_GB_PIPES	0x01
 #define RADEON_INFO_NUM_Z_PIPES 	0x02
+#define RADEON_INFO_ACCEL_WORKING	0x03
 
 struct drm_radeon_info {
 	uint32_t		request;
-- 
cgit v1.2.3


From c88f9f0c91de55efaece6d9bd9ec920b90244776 Mon Sep 17 00:00:00 2001
From: Michel Dänzer <daenzer@vmware.com>
Date: Tue, 15 Sep 2009 17:09:30 +0200
Subject: drm/radeon/kms: Use surfaces for scanout / cursor byte swapping on
 big endian.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michel Dänzer <daenzer@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/radeon/r100.c          |   5 ++
 drivers/gpu/drm/radeon/radeon_fb.c     | 121 +++++----------------------------
 drivers/gpu/drm/radeon/radeon_object.c |   2 +
 include/drm/radeon_drm.h               |  11 +--
 4 files changed, 31 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index fa0fdc1e3457..737970b43aef 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -2235,6 +2235,11 @@ int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			flags |= R300_SURF_TILE_MICRO;
 	}
 
+	if (tiling_flags & RADEON_TILING_SWAP_16BIT)
+		flags |= RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP;
+	if (tiling_flags & RADEON_TILING_SWAP_32BIT)
+		flags |= RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP;
+
 	DRM_DEBUG("writing surface %d %d %x %x\n", reg, flags, offset, offset+obj_size-1);
 	WREG32(RADEON_SURFACE0_INFO + surf_index, flags);
 	WREG32(RADEON_SURFACE0_LOWER_BOUND + surf_index, offset);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 19e244a512ba..944e4fa78db5 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -45,71 +45,9 @@ struct radeon_fb_device {
 	struct radeon_device		*rdev;
 };
 
-static int radeon_fb_check_var(struct fb_var_screeninfo *var,
-			       struct fb_info *info)
-{
-	int ret;
-	ret = drm_fb_helper_check_var(var, info);
-	if (ret)
-		return ret;
-
-	/* big endian override for radeon endian workaround */
-#ifdef __BIG_ENDIAN
-	{
-		int depth;
-		switch (var->bits_per_pixel) {
-		case 16:
-			depth = (var->green.length == 6) ? 16 : 15;
-			break;
-		case 32:
-			depth = (var->transp.length > 0) ? 32 : 24;
-			break;
-		default:
-			depth = var->bits_per_pixel;
-			break;
-		}
-		switch (depth) {
-		case 8:
-			var->red.offset = 0;
-			var->green.offset = 0;
-			var->blue.offset = 0;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 0;
-			var->transp.offset = 0;
-			break;
-		case 24:
-			var->red.offset = 8;
-			var->green.offset = 16;
-			var->blue.offset = 24;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 0;
-			var->transp.offset = 0;
-			break;
-		case 32:
-			var->red.offset = 8;
-			var->green.offset = 16;
-			var->blue.offset = 24;
-			var->red.length = 8;
-			var->green.length = 8;
-			var->blue.length = 8;
-			var->transp.length = 8;
-			var->transp.offset = 0;
-			break;
-		default:
-			return -EINVAL;
-		}
-	}
-#endif
-	return 0;
-}
-
 static struct fb_ops radeonfb_ops = {
 	.owner = THIS_MODULE,
-	.fb_check_var = radeon_fb_check_var,
+	.fb_check_var = drm_fb_helper_check_var,
 	.fb_set_par = drm_fb_helper_set_par,
 	.fb_setcolreg = drm_fb_helper_setcolreg,
 	.fb_fillrect = cfb_fillrect,
@@ -206,6 +144,7 @@ int radeonfb_create(struct drm_device *dev,
 	void *fbptr = NULL;
 	unsigned long tmp;
 	bool fb_tiled = false; /* useful for testing */
+	u32 tiling_flags = 0;
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
@@ -230,7 +169,22 @@ int radeonfb_create(struct drm_device *dev,
 	robj = gobj->driver_private;
 
 	if (fb_tiled)
-		radeon_object_set_tiling_flags(robj, RADEON_TILING_MACRO|RADEON_TILING_SURFACE, mode_cmd.pitch);
+		tiling_flags = RADEON_TILING_MACRO;
+
+#ifdef __BIG_ENDIAN
+	switch (mode_cmd.bpp) {
+	case 32:
+		tiling_flags |= RADEON_TILING_SWAP_32BIT;
+		break;
+	case 16:
+		tiling_flags |= RADEON_TILING_SWAP_16BIT;
+	default:
+		break;
+	}
+#endif
+
+	if (tiling_flags)
+		radeon_object_set_tiling_flags(robj, tiling_flags | RADEON_TILING_SURFACE, mode_cmd.pitch);
 	mutex_lock(&rdev->ddev->struct_mutex);
 	fb = radeon_framebuffer_create(rdev->ddev, &mode_cmd, gobj);
 	if (fb == NULL) {
@@ -313,45 +267,6 @@ int radeonfb_create(struct drm_device *dev,
 	DRM_INFO("fb depth is %d\n", fb->depth);
 	DRM_INFO("   pitch is %d\n", fb->pitch);
 
-#ifdef __BIG_ENDIAN
-	/* fill var sets defaults for this stuff - override
-	   on big endian */
-	switch (fb->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 24:
-		info->var.red.offset = 8;
-		info->var.green.offset = 16;
-		info->var.blue.offset = 24;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 8;
-		info->var.green.offset = 16;
-		info->var.blue.offset = 24;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
-#endif
-
 	fb->fbdev = info;
 	rfbdev->rfb = rfb;
 	rfbdev->rdev = rdev;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 1500d5bc7af5..73af463b7a59 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -188,6 +188,7 @@ int radeon_object_kmap(struct radeon_object *robj, void **ptr)
 	if (ptr) {
 		*ptr = robj->kptr;
 	}
+	radeon_object_check_tiling(robj, 0, 0);
 	return 0;
 }
 
@@ -200,6 +201,7 @@ void radeon_object_kunmap(struct radeon_object *robj)
 	}
 	robj->kptr = NULL;
 	spin_unlock(&robj->tobj.lock);
+	radeon_object_check_tiling(robj, 0, 0);
 	ttm_bo_kunmap(&robj->kmap);
 }
 
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index b67bbd75bd20..3b9932ab1756 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -802,11 +802,12 @@ struct drm_radeon_gem_create {
 	uint32_t	flags;
 };
 
-#define RADEON_TILING_MACRO 0x1
-#define RADEON_TILING_MICRO 0x2
-#define RADEON_TILING_SWAP  0x4
-#define RADEON_TILING_SURFACE  0x8 /* this object requires a surface
-				    * when mapped - i.e. front buffer */
+#define RADEON_TILING_MACRO       0x1
+#define RADEON_TILING_MICRO       0x2
+#define RADEON_TILING_SWAP_16BIT  0x4
+#define RADEON_TILING_SWAP_32BIT  0x8
+#define RADEON_TILING_SURFACE     0x10 /* this object requires a surface
+					* when mapped - i.e. front buffer */
 
 struct drm_radeon_gem_set_tiling {
 	uint32_t	handle;
-- 
cgit v1.2.3


From 181d683d752c432635eda0f182ee71548c1f1820 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 16 Sep 2009 01:06:43 -0700
Subject: Input: libps2 - additional locking for i8042 ports

The serio ports on i8042 are not completely isolated; while we provide
enough locking to ensure proper serialization when accessing control
and data registers AUX and KBD ports can still have an effect on each
other on PS/2 protocol level. The most prominent effect is that
issuing a command for the device connected to one port may cause
abort of the command currently executing by the device connected to
another port.

Since i8042 nor serio subsystem are not aware of the details of the
PS/2 protocol (length of the commands and their replies and so on) the
locking should be done on libps2 level by adding special handling when
we see that we are dealing with serio port on i8042.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/mouse/sentelic.c  | 18 ++++++++++--------
 drivers/input/serio/i8042.c     | 41 +++++++++++++++++++++++++++++++++++++++++
 drivers/input/serio/libps2.c    | 28 ++++++++++++++++++++++++----
 drivers/leds/leds-clevo-mail.c  |  8 ++++++++
 drivers/platform/x86/acer-wmi.c |  2 ++
 include/linux/i8042.h           | 30 ++++++++++++++++++++++++++++++
 include/linux/libps2.h          |  2 ++
 7 files changed, 117 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c
index 84e2fc04d11b..f84cbd97c884 100644
--- a/drivers/input/mouse/sentelic.c
+++ b/drivers/input/mouse/sentelic.c
@@ -92,7 +92,8 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
 	 */
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
 	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-	mutex_lock(&ps2dev->cmd_mutex);
+
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -126,7 +127,7 @@ static int fsp_reg_read(struct psmouse *psmouse, int reg_addr, int *reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
 	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	dev_dbg(&ps2dev->serio->dev, "READ REG: 0x%02x is 0x%02x (rc = %d)\n",
@@ -140,7 +141,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
 	unsigned char v;
 	int rc = -1;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -179,7 +180,7 @@ static int fsp_reg_write(struct psmouse *psmouse, int reg_addr, int reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	dev_dbg(&ps2dev->serio->dev, "WRITE REG: 0x%02x to 0x%02x (rc = %d)\n",
 		reg_addr, reg_val, rc);
 	return rc;
@@ -214,7 +215,8 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
 
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_DISABLE);
 	psmouse_set_state(psmouse, PSMOUSE_CMD_MODE);
-	mutex_lock(&ps2dev->cmd_mutex);
+
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -236,7 +238,7 @@ static int fsp_page_reg_read(struct psmouse *psmouse, int *reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	ps2_command(ps2dev, NULL, PSMOUSE_CMD_ENABLE);
 	psmouse_set_state(psmouse, PSMOUSE_ACTIVATED);
 	dev_dbg(&ps2dev->serio->dev, "READ PAGE REG: 0x%02x (rc = %d)\n",
@@ -250,7 +252,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
 	unsigned char v;
 	int rc = -1;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	if (ps2_sendbyte(ps2dev, 0xf3, FSP_CMD_TIMEOUT) < 0)
 		goto out;
@@ -275,7 +277,7 @@ static int fsp_page_reg_write(struct psmouse *psmouse, int reg_val)
 	rc = 0;
 
  out:
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 	dev_dbg(&ps2dev->serio->dev, "WRITE PAGE REG: to 0x%02x (rc = %d)\n",
 		reg_val, rc);
 	return rc;
diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index eb3ff94af58c..bc56e52b945f 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -87,8 +87,22 @@ static bool i8042_bypass_aux_irq_test;
 
 #include "i8042.h"
 
+/*
+ * i8042_lock protects serialization between i8042_command and
+ * the interrupt handler.
+ */
 static DEFINE_SPINLOCK(i8042_lock);
 
+/*
+ * Writers to AUX and KBD ports as well as users issuing i8042_command
+ * directly should acquire i8042_mutex (by means of calling
+ * i8042_lock_chip() and i8042_unlock_ship() helpers) to ensure that
+ * they do not disturb each other (unfortunately in many i8042
+ * implementations write to one of the ports will immediately abort
+ * command that is being processed by another port).
+ */
+static DEFINE_MUTEX(i8042_mutex);
+
 struct i8042_port {
 	struct serio *serio;
 	int irq;
@@ -113,6 +127,18 @@ static struct platform_device *i8042_platform_device;
 
 static irqreturn_t i8042_interrupt(int irq, void *dev_id);
 
+void i8042_lock_chip(void)
+{
+	mutex_lock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_lock_chip);
+
+void i8042_unlock_chip(void)
+{
+	mutex_unlock(&i8042_mutex);
+}
+EXPORT_SYMBOL(i8042_unlock_chip);
+
 /*
  * The i8042_wait_read() and i8042_wait_write functions wait for the i8042 to
  * be ready for reading values from it / writing values to it.
@@ -1161,6 +1187,21 @@ static void __devexit i8042_unregister_ports(void)
 	}
 }
 
+/*
+ * Checks whether port belongs to i8042 controller.
+ */
+bool i8042_check_port_owner(const struct serio *port)
+{
+	int i;
+
+	for (i = 0; i < I8042_NUM_PORTS; i++)
+		if (i8042_ports[i].serio == port)
+			return true;
+
+	return false;
+}
+EXPORT_SYMBOL(i8042_check_port_owner);
+
 static void i8042_free_irqs(void)
 {
 	if (i8042_aux_irq_registered)
diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index 3a95b508bf27..769ba65a585a 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/input.h>
 #include <linux/serio.h>
+#include <linux/i8042.h>
 #include <linux/init.h>
 #include <linux/libps2.h>
 
@@ -54,6 +55,24 @@ int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout)
 }
 EXPORT_SYMBOL(ps2_sendbyte);
 
+void ps2_begin_command(struct ps2dev *ps2dev)
+{
+	mutex_lock(&ps2dev->cmd_mutex);
+
+	if (i8042_check_port_owner(ps2dev->serio))
+		i8042_lock_chip();
+}
+EXPORT_SYMBOL(ps2_begin_command);
+
+void ps2_end_command(struct ps2dev *ps2dev)
+{
+	if (i8042_check_port_owner(ps2dev->serio))
+		i8042_unlock_chip();
+
+	mutex_unlock(&ps2dev->cmd_mutex);
+}
+EXPORT_SYMBOL(ps2_end_command);
+
 /*
  * ps2_drain() waits for device to transmit requested number of bytes
  * and discards them.
@@ -66,7 +85,7 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
 		maxbytes = sizeof(ps2dev->cmdbuf);
 	}
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 
 	serio_pause_rx(ps2dev->serio);
 	ps2dev->flags = PS2_FLAG_CMD;
@@ -76,7 +95,8 @@ void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout)
 	wait_event_timeout(ps2dev->wait,
 			   !(ps2dev->flags & PS2_FLAG_CMD),
 			   msecs_to_jiffies(timeout));
-	mutex_unlock(&ps2dev->cmd_mutex);
+
+	ps2_end_command(ps2dev);
 }
 EXPORT_SYMBOL(ps2_drain);
 
@@ -237,9 +257,9 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 {
 	int rc;
 
-	mutex_lock(&ps2dev->cmd_mutex);
+	ps2_begin_command(ps2dev);
 	rc = __ps2_command(ps2dev, param, command);
-	mutex_unlock(&ps2dev->cmd_mutex);
+	ps2_end_command(ps2dev);
 
 	return rc;
 }
diff --git a/drivers/leds/leds-clevo-mail.c b/drivers/leds/leds-clevo-mail.c
index 1813c84ea5fc..f2242db54016 100644
--- a/drivers/leds/leds-clevo-mail.c
+++ b/drivers/leds/leds-clevo-mail.c
@@ -93,6 +93,8 @@ static struct dmi_system_id __initdata mail_led_whitelist[] = {
 static void clevo_mail_led_set(struct led_classdev *led_cdev,
 				enum led_brightness value)
 {
+	i8042_lock_chip();
+
 	if (value == LED_OFF)
 		i8042_command(NULL, CLEVO_MAIL_LED_OFF);
 	else if (value <= LED_HALF)
@@ -100,6 +102,8 @@ static void clevo_mail_led_set(struct led_classdev *led_cdev,
 	else
 		i8042_command(NULL, CLEVO_MAIL_LED_BLINK_1HZ);
 
+	i8042_unlock_chip();
+
 }
 
 static int clevo_mail_led_blink(struct led_classdev *led_cdev,
@@ -108,6 +112,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
 {
 	int status = -EINVAL;
 
+	i8042_lock_chip();
+
 	if (*delay_on == 0 /* ms */ && *delay_off == 0 /* ms */) {
 		/* Special case: the leds subsystem requested us to
 		 * chose one user friendly blinking of the LED, and
@@ -135,6 +141,8 @@ static int clevo_mail_led_blink(struct led_classdev *led_cdev,
 		       *delay_on, *delay_off);
 	}
 
+	i8042_unlock_chip();
+
 	return status;
 }
 
diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c
index fb45f5ee8df1..454970d2d701 100644
--- a/drivers/platform/x86/acer-wmi.c
+++ b/drivers/platform/x86/acer-wmi.c
@@ -746,7 +746,9 @@ static acpi_status WMID_set_u32(u32 value, u32 cap, struct wmi_interface *iface)
 			return AE_BAD_PARAMETER;
 		if (quirks->mailled == 1) {
 			param = value ? 0x92 : 0x93;
+			i8042_lock_chip();
 			i8042_command(&param, 0x1059);
+			i8042_unlock_chip();
 			return 0;
 		}
 		break;
diff --git a/include/linux/i8042.h b/include/linux/i8042.h
index 7907a72403ee..60c3360ef6ad 100644
--- a/include/linux/i8042.h
+++ b/include/linux/i8042.h
@@ -7,6 +7,7 @@
  * the Free Software Foundation.
  */
 
+#include <linux/types.h>
 
 /*
  * Standard commands.
@@ -30,6 +31,35 @@
 #define I8042_CMD_MUX_PFX	0x0090
 #define I8042_CMD_MUX_SEND	0x1090
 
+struct serio;
+
+#if defined(CONFIG_SERIO_I8042) || defined(CONFIG_SERIO_I8042_MODULE)
+
+void i8042_lock_chip(void);
+void i8042_unlock_chip(void);
 int i8042_command(unsigned char *param, int command);
+bool i8042_check_port_owner(const struct serio *);
+
+#else
+
+void i8042_lock_chip(void)
+{
+}
+
+void i8042_unlock_chip(void)
+{
+}
+
+int i8042_command(unsigned char *param, int command)
+{
+	return -ENOSYS;
+}
+
+bool i8042_check_port_owner(const struct serio *serio)
+{
+	return false;
+}
+
+#endif
 
 #endif
diff --git a/include/linux/libps2.h b/include/linux/libps2.h
index fcf5fbe6a50c..79603a6c356f 100644
--- a/include/linux/libps2.h
+++ b/include/linux/libps2.h
@@ -44,6 +44,8 @@ struct ps2dev {
 void ps2_init(struct ps2dev *ps2dev, struct serio *serio);
 int ps2_sendbyte(struct ps2dev *ps2dev, unsigned char byte, int timeout);
 void ps2_drain(struct ps2dev *ps2dev, int maxbytes, int timeout);
+void ps2_begin_command(struct ps2dev *ps2dev);
+void ps2_end_command(struct ps2dev *ps2dev);
 int __ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command);
 int ps2_handle_ack(struct ps2dev *ps2dev, unsigned char data);
-- 
cgit v1.2.3


From ffd0db97196c1057f09c2ab42dd5b30e94e511d9 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 16 Sep 2009 01:06:43 -0700
Subject: Input: add generic suspend and resume for input devices

Automatically turn off leds and sound effects as part of suspend
process and restore led state, sounds and repeat rate at resume.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/input.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/input.h |  2 +-
 2 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 7c237e6ac711..b8ed4294fccd 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/types.h>
 #include <linux/input.h>
 #include <linux/module.h>
 #include <linux/random.h>
@@ -514,7 +515,7 @@ static void input_disconnect_device(struct input_dev *dev)
 	 * that there are no threads in the middle of input_open_device()
 	 */
 	mutex_lock(&dev->mutex);
-	dev->going_away = 1;
+	dev->going_away = true;
 	mutex_unlock(&dev->mutex);
 
 	spin_lock_irq(&dev->event_lock);
@@ -1259,10 +1260,71 @@ static int input_dev_uevent(struct device *device, struct kobj_uevent_env *env)
 	return 0;
 }
 
+#define INPUT_DO_TOGGLE(dev, type, bits, on)			\
+	do {							\
+		int i;						\
+		if (!test_bit(EV_##type, dev->evbit))		\
+			break;					\
+		for (i = 0; i < type##_MAX; i++) {		\
+			if (!test_bit(i, dev->bits##bit) ||	\
+			    !test_bit(i, dev->bits))		\
+				continue;			\
+			dev->event(dev, EV_##type, i, on);	\
+		}						\
+	} while (0)
+
+static void input_dev_reset(struct input_dev *dev, bool activate)
+{
+	if (!dev->event)
+		return;
+
+	INPUT_DO_TOGGLE(dev, LED, led, activate);
+	INPUT_DO_TOGGLE(dev, SND, snd, activate);
+
+	if (activate && test_bit(EV_REP, dev->evbit)) {
+		dev->event(dev, EV_REP, REP_PERIOD, dev->rep[REP_PERIOD]);
+		dev->event(dev, EV_REP, REP_DELAY, dev->rep[REP_DELAY]);
+	}
+}
+
+#ifdef CONFIG_PM
+static int input_dev_suspend(struct device *dev)
+{
+	struct input_dev *input_dev = to_input_dev(dev);
+
+	mutex_lock(&input_dev->mutex);
+	input_dev_reset(input_dev, false);
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+
+static int input_dev_resume(struct device *dev)
+{
+	struct input_dev *input_dev = to_input_dev(dev);
+
+	mutex_lock(&input_dev->mutex);
+	input_dev_reset(input_dev, true);
+	mutex_unlock(&input_dev->mutex);
+
+	return 0;
+}
+
+static const struct dev_pm_ops input_dev_pm_ops = {
+	.suspend	= input_dev_suspend,
+	.resume		= input_dev_resume,
+	.poweroff	= input_dev_suspend,
+	.restore	= input_dev_resume,
+};
+#endif /* CONFIG_PM */
+
 static struct device_type input_dev_type = {
 	.groups		= input_dev_attr_groups,
 	.release	= input_dev_release,
 	.uevent		= input_dev_uevent,
+#ifdef CONFIG_PM
+	.pm		= &input_dev_pm_ops,
+#endif
 };
 
 static char *input_nodename(struct device *dev)
diff --git a/include/linux/input.h b/include/linux/input.h
index 8b3bc3e0d146..0ccfc30cd40f 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -1123,7 +1123,7 @@ struct input_dev {
 	struct mutex mutex;
 
 	unsigned int users;
-	int going_away;
+	bool going_away;
 
 	struct device dev;
 
-- 
cgit v1.2.3


From 38e783b38148531c0840ac130b97eb8158f84b48 Mon Sep 17 00:00:00 2001
From: Joonyoung Shim <jy0922.shim@samsung.com>
Date: Thu, 17 Sep 2009 22:35:45 -0700
Subject: Input: add touchscreen driver for MELFAS MCS-5000 controller

The MELPAS MCS-5000 is the touchscreen controller. The overview of this
controller can see at the following website:

http://www.melfas.com/product/product01.asp?k_r=eng_

This driver is tested on s3c6410 NCP board and supports only the i2c
interface.

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig      |  11 ++
 drivers/input/touchscreen/Makefile     |   1 +
 drivers/input/touchscreen/mcs5000_ts.c | 318 +++++++++++++++++++++++++++++++++
 include/linux/i2c/mcs5000_ts.h         |  24 +++
 4 files changed, 354 insertions(+)
 create mode 100644 drivers/input/touchscreen/mcs5000_ts.c
 create mode 100644 include/linux/i2c/mcs5000_ts.h

(limited to 'include')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 5a252d449e55..6dfb0a2b0a7d 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -169,6 +169,17 @@ config TOUCHSCREEN_WACOM_W8001
 	  To compile this driver as a module, choose M here: the
 	  module will be called wacom_w8001.
 
+config TOUCHSCREEN_MCS5000
+	tristate "MELFAS MCS-5000 touchscreen"
+	depends on I2C
+	help
+	  Say Y here if you have the MELFAS MCS-5000 touchscreen controller
+	  chip in your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mcs5000_ts.
 
 config TOUCHSCREEN_MTOUCH
 	tristate "MicroTouch serial touchscreens"
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 3e1c5e0b952f..a882ca16506b 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
 obj-$(CONFIG_TOUCHSCREEN_FUJITSU)	+= fujitsu_ts.o
 obj-$(CONFIG_TOUCHSCREEN_INEXIO)	+= inexio.o
+obj-$(CONFIG_TOUCHSCREEN_MCS5000)	+= mcs5000_ts.o
 obj-$(CONFIG_TOUCHSCREEN_MIGOR)		+= migor_ts.o
 obj-$(CONFIG_TOUCHSCREEN_MTOUCH)	+= mtouch.o
 obj-$(CONFIG_TOUCHSCREEN_MK712)		+= mk712.o
diff --git a/drivers/input/touchscreen/mcs5000_ts.c b/drivers/input/touchscreen/mcs5000_ts.c
new file mode 100644
index 000000000000..4c28b89757f9
--- /dev/null
+++ b/drivers/input/touchscreen/mcs5000_ts.c
@@ -0,0 +1,318 @@
+/*
+ * mcs5000_ts.c - Touchscreen driver for MELFAS MCS-5000 controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * Based on wm97xx-core.c
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/i2c/mcs5000_ts.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/irq.h>
+
+/* Registers */
+#define MCS5000_TS_STATUS		0x00
+#define STATUS_OFFSET			0
+#define STATUS_NO			(0 << STATUS_OFFSET)
+#define STATUS_INIT			(1 << STATUS_OFFSET)
+#define STATUS_SENSING			(2 << STATUS_OFFSET)
+#define STATUS_COORD			(3 << STATUS_OFFSET)
+#define STATUS_GESTURE			(4 << STATUS_OFFSET)
+#define ERROR_OFFSET			4
+#define ERROR_NO			(0 << ERROR_OFFSET)
+#define ERROR_POWER_ON_RESET		(1 << ERROR_OFFSET)
+#define ERROR_INT_RESET			(2 << ERROR_OFFSET)
+#define ERROR_EXT_RESET			(3 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_ADDRESS	(8 << ERROR_OFFSET)
+#define ERROR_INVALID_REG_VALUE		(9 << ERROR_OFFSET)
+
+#define MCS5000_TS_OP_MODE		0x01
+#define RESET_OFFSET			0
+#define RESET_NO			(0 << RESET_OFFSET)
+#define RESET_EXT_SOFT			(1 << RESET_OFFSET)
+#define OP_MODE_OFFSET			1
+#define OP_MODE_SLEEP			(0 << OP_MODE_OFFSET)
+#define OP_MODE_ACTIVE			(1 << OP_MODE_OFFSET)
+#define GESTURE_OFFSET			4
+#define GESTURE_DISABLE			(0 << GESTURE_OFFSET)
+#define GESTURE_ENABLE			(1 << GESTURE_OFFSET)
+#define PROXIMITY_OFFSET		5
+#define PROXIMITY_DISABLE		(0 << PROXIMITY_OFFSET)
+#define PROXIMITY_ENABLE		(1 << PROXIMITY_OFFSET)
+#define SCAN_MODE_OFFSET		6
+#define SCAN_MODE_INTERRUPT		(0 << SCAN_MODE_OFFSET)
+#define SCAN_MODE_POLLING		(1 << SCAN_MODE_OFFSET)
+#define REPORT_RATE_OFFSET		7
+#define REPORT_RATE_40			(0 << REPORT_RATE_OFFSET)
+#define REPORT_RATE_80			(1 << REPORT_RATE_OFFSET)
+
+#define MCS5000_TS_SENS_CTL		0x02
+#define MCS5000_TS_FILTER_CTL		0x03
+#define PRI_FILTER_OFFSET		0
+#define SEC_FILTER_OFFSET		4
+
+#define MCS5000_TS_X_SIZE_UPPER		0x08
+#define MCS5000_TS_X_SIZE_LOWER		0x09
+#define MCS5000_TS_Y_SIZE_UPPER		0x0A
+#define MCS5000_TS_Y_SIZE_LOWER		0x0B
+
+#define MCS5000_TS_INPUT_INFO		0x10
+#define INPUT_TYPE_OFFSET		0
+#define INPUT_TYPE_NONTOUCH		(0 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_SINGLE		(1 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_DUAL			(2 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PALM			(3 << INPUT_TYPE_OFFSET)
+#define INPUT_TYPE_PROXIMITY		(7 << INPUT_TYPE_OFFSET)
+#define GESTURE_CODE_OFFSET		3
+#define GESTURE_CODE_NO			(0 << GESTURE_CODE_OFFSET)
+
+#define MCS5000_TS_X_POS_UPPER		0x11
+#define MCS5000_TS_X_POS_LOWER		0x12
+#define MCS5000_TS_Y_POS_UPPER		0x13
+#define MCS5000_TS_Y_POS_LOWER		0x14
+#define MCS5000_TS_Z_POS		0x15
+#define MCS5000_TS_WIDTH		0x16
+#define MCS5000_TS_GESTURE_VAL		0x17
+#define MCS5000_TS_MODULE_REV		0x20
+#define MCS5000_TS_FIRMWARE_VER		0x21
+
+/* Touchscreen absolute values */
+#define MCS5000_MAX_XC			0x3ff
+#define MCS5000_MAX_YC			0x3ff
+
+enum mcs5000_ts_read_offset {
+	READ_INPUT_INFO,
+	READ_X_POS_UPPER,
+	READ_X_POS_LOWER,
+	READ_Y_POS_UPPER,
+	READ_Y_POS_LOWER,
+	READ_BLOCK_SIZE,
+};
+
+/* Each client has this additional data */
+struct mcs5000_ts_data {
+	struct i2c_client *client;
+	struct input_dev *input_dev;
+	const struct mcs5000_ts_platform_data *platform_data;
+};
+
+static irqreturn_t mcs5000_ts_interrupt(int irq, void *dev_id)
+{
+	struct mcs5000_ts_data *data = dev_id;
+	struct i2c_client *client = data->client;
+	u8 buffer[READ_BLOCK_SIZE];
+	int err;
+	int x;
+	int y;
+
+	err = i2c_smbus_read_i2c_block_data(client, MCS5000_TS_INPUT_INFO,
+			READ_BLOCK_SIZE, buffer);
+	if (err < 0) {
+		dev_err(&client->dev, "%s, err[%d]\n", __func__, err);
+		goto out;
+	}
+
+	switch (buffer[READ_INPUT_INFO]) {
+	case INPUT_TYPE_NONTOUCH:
+		input_report_key(data->input_dev, BTN_TOUCH, 0);
+		input_sync(data->input_dev);
+		break;
+
+	case INPUT_TYPE_SINGLE:
+		x = (buffer[READ_X_POS_UPPER] << 8) | buffer[READ_X_POS_LOWER];
+		y = (buffer[READ_Y_POS_UPPER] << 8) | buffer[READ_Y_POS_LOWER];
+
+		input_report_key(data->input_dev, BTN_TOUCH, 1);
+		input_report_abs(data->input_dev, ABS_X, x);
+		input_report_abs(data->input_dev, ABS_Y, y);
+		input_sync(data->input_dev);
+		break;
+
+	case INPUT_TYPE_DUAL:
+		/* TODO */
+		break;
+
+	case INPUT_TYPE_PALM:
+		/* TODO */
+		break;
+
+	case INPUT_TYPE_PROXIMITY:
+		/* TODO */
+		break;
+
+	default:
+		dev_err(&client->dev, "Unknown ts input type %d\n",
+				buffer[READ_INPUT_INFO]);
+		break;
+	}
+
+ out:
+	return IRQ_HANDLED;
+}
+
+static void mcs5000_ts_phys_init(struct mcs5000_ts_data *data)
+{
+	const struct mcs5000_ts_platform_data *platform_data =
+		data->platform_data;
+	struct i2c_client *client = data->client;
+
+	/* Touch reset & sleep mode */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE,
+			RESET_EXT_SOFT | OP_MODE_SLEEP);
+
+	/* Touch size */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_UPPER,
+			platform_data->x_size >> 8);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_X_SIZE_LOWER,
+			platform_data->x_size & 0xff);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_UPPER,
+			platform_data->y_size >> 8);
+	i2c_smbus_write_byte_data(client, MCS5000_TS_Y_SIZE_LOWER,
+			platform_data->y_size & 0xff);
+
+	/* Touch active mode & 80 report rate */
+	i2c_smbus_write_byte_data(data->client, MCS5000_TS_OP_MODE,
+			OP_MODE_ACTIVE | REPORT_RATE_80);
+}
+
+static int __devinit mcs5000_ts_probe(struct i2c_client *client,
+		const struct i2c_device_id *id)
+{
+	struct mcs5000_ts_data *data;
+	struct input_dev *input_dev;
+	int ret;
+
+	if (!client->dev.platform_data)
+		return -EINVAL;
+
+	data = kzalloc(sizeof(struct mcs5000_ts_data), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!data || !input_dev) {
+		dev_err(&client->dev, "Failed to allocate memory\n");
+		ret = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	data->client = client;
+	data->input_dev = input_dev;
+	data->platform_data = client->dev.platform_data;
+
+	input_dev->name = "MELPAS MCS-5000 Touchscreen";
+	input_dev->id.bustype = BUS_I2C;
+	input_dev->dev.parent = &client->dev;
+
+	__set_bit(EV_ABS, input_dev->evbit);
+	__set_bit(EV_KEY, input_dev->evbit);
+	__set_bit(BTN_TOUCH, input_dev->keybit);
+	input_set_abs_params(input_dev, ABS_X, 0, MCS5000_MAX_XC, 0, 0);
+	input_set_abs_params(input_dev, ABS_Y, 0, MCS5000_MAX_YC, 0, 0);
+
+	input_set_drvdata(input_dev, data);
+
+	if (data->platform_data->cfg_pin)
+		data->platform_data->cfg_pin();
+
+	ret = request_threaded_irq(client->irq, NULL, mcs5000_ts_interrupt,
+			IRQF_TRIGGER_LOW | IRQF_ONESHOT, "mcs5000_ts", data);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to register interrupt\n");
+		goto err_free_mem;
+	}
+
+	ret = input_register_device(data->input_dev);
+	if (ret < 0)
+		goto err_free_irq;
+
+	mcs5000_ts_phys_init(data);
+	i2c_set_clientdata(client, data);
+
+	return 0;
+
+err_free_irq:
+	free_irq(client->irq, data);
+err_free_mem:
+	input_free_device(input_dev);
+	kfree(data);
+	return ret;
+}
+
+static int __devexit mcs5000_ts_remove(struct i2c_client *client)
+{
+	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+	free_irq(client->irq, data);
+	input_unregister_device(data->input_dev);
+	kfree(data);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int mcs5000_ts_suspend(struct i2c_client *client, pm_message_t mesg)
+{
+	/* Touch sleep mode */
+	i2c_smbus_write_byte_data(client, MCS5000_TS_OP_MODE, OP_MODE_SLEEP);
+
+	return 0;
+}
+
+static int mcs5000_ts_resume(struct i2c_client *client)
+{
+	struct mcs5000_ts_data *data = i2c_get_clientdata(client);
+
+	mcs5000_ts_phys_init(data);
+
+	return 0;
+}
+#else
+#define mcs5000_ts_suspend	NULL
+#define mcs5000_ts_resume	NULL
+#endif
+
+static const struct i2c_device_id mcs5000_ts_id[] = {
+	{ "mcs5000_ts", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, mcs5000_ts_id);
+
+static struct i2c_driver mcs5000_ts_driver = {
+	.probe		= mcs5000_ts_probe,
+	.remove		= __devexit_p(mcs5000_ts_remove),
+	.suspend	= mcs5000_ts_suspend,
+	.resume		= mcs5000_ts_resume,
+	.driver = {
+		.name = "mcs5000_ts",
+	},
+	.id_table	= mcs5000_ts_id,
+};
+
+static int __init mcs5000_ts_init(void)
+{
+	return i2c_add_driver(&mcs5000_ts_driver);
+}
+
+static void __exit mcs5000_ts_exit(void)
+{
+	i2c_del_driver(&mcs5000_ts_driver);
+}
+
+module_init(mcs5000_ts_init);
+module_exit(mcs5000_ts_exit);
+
+/* Module information */
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_DESCRIPTION("Touchscreen driver for MELFAS MCS-5000 controller");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/mcs5000_ts.h b/include/linux/i2c/mcs5000_ts.h
new file mode 100644
index 000000000000..5a117b5ca15e
--- /dev/null
+++ b/include/linux/i2c/mcs5000_ts.h
@@ -0,0 +1,24 @@
+/*
+ * mcs5000_ts.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __LINUX_MCS5000_TS_H
+#define __LINUX_MCS5000_TS_H
+
+/* platform data for the MELFAS MCS-5000 touchscreen driver */
+struct mcs5000_ts_platform_data {
+	void (*cfg_pin)(void);
+	int x_size;
+	int y_size;
+};
+
+#endif	/* __LINUX_MCS5000_TS_H */
-- 
cgit v1.2.3


From 88751dd6ce1fb0627c36c4ab08a40730e5a50d3e Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Thu, 17 Sep 2009 22:39:38 -0700
Subject: Input: add driver for ADP5588 QWERTY I2C Keypad

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig        |  10 +
 drivers/input/keyboard/Makefile       |   1 +
 drivers/input/keyboard/adp5588-keys.c | 361 ++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h           |  92 +++++++++
 4 files changed, 464 insertions(+)
 create mode 100644 drivers/input/keyboard/adp5588-keys.c
 create mode 100644 include/linux/i2c/adp5588.h

(limited to 'include')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index b14bd3a07140..d615c09a83c6 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -24,6 +24,16 @@ config KEYBOARD_AAED2000
 	  To compile this driver as a module, choose M here: the
 	  module will be called aaed2000_kbd.
 
+config KEYBOARD_ADP5588
+	tristate "ADP5588 I2C QWERTY Keypad and IO Expander"
+	depends on I2C
+	help
+	  Say Y here if you want to use a ADP5588 attached to your
+	  system I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adp5588-keys.
+
 config KEYBOARD_AMIGA
 	tristate "Amiga keyboard"
 	depends on AMIGA
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index ab35ac36111e..a5c08cdf8083 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -5,6 +5,7 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_KEYBOARD_AAED2000)		+= aaed2000_kbd.o
+obj-$(CONFIG_KEYBOARD_ADP5588)		+= adp5588-keys.o
 obj-$(CONFIG_KEYBOARD_AMIGA)		+= amikbd.o
 obj-$(CONFIG_KEYBOARD_ATARI)		+= atakbd.o
 obj-$(CONFIG_KEYBOARD_ATKBD)		+= atkbd.o
diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c
new file mode 100644
index 000000000000..d48c808d5928
--- /dev/null
+++ b/drivers/input/keyboard/adp5588-keys.c
@@ -0,0 +1,361 @@
+/*
+ * File: drivers/input/keyboard/adp5588_keys.c
+ * Description:  keypad driver for ADP5588 I2C QWERTY Keypad and IO Expander
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Copyright (C) 2008-2009 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/pm.h>
+#include <linux/platform_device.h>
+#include <linux/input.h>
+#include <linux/i2c.h>
+
+#include <linux/i2c/adp5588.h>
+
+ /* Configuration Register1 */
+#define AUTO_INC	(1 << 7)
+#define GPIEM_CFG	(1 << 6)
+#define OVR_FLOW_M	(1 << 5)
+#define INT_CFG		(1 << 4)
+#define OVR_FLOW_IEN	(1 << 3)
+#define K_LCK_IM	(1 << 2)
+#define GPI_IEN		(1 << 1)
+#define KE_IEN		(1 << 0)
+
+/* Interrupt Status Register */
+#define CMP2_INT	(1 << 5)
+#define CMP1_INT	(1 << 4)
+#define OVR_FLOW_INT	(1 << 3)
+#define K_LCK_INT	(1 << 2)
+#define GPI_INT		(1 << 1)
+#define KE_INT		(1 << 0)
+
+/* Key Lock and Event Counter Register */
+#define K_LCK_EN	(1 << 6)
+#define LCK21		0x30
+#define KEC		0xF
+
+/* Key Event Register xy */
+#define KEY_EV_PRESSED		(1 << 7)
+#define KEY_EV_MASK		(0x7F)
+
+#define KP_SEL(x)		(0xFFFF >> (16 - x))	/* 2^x-1 */
+
+#define KEYP_MAX_EVENT		10
+
+/*
+ * Early pre 4.0 Silicon required to delay readout by at least 25ms,
+ * since the Event Counter Register updated 25ms after the interrupt
+ * asserted.
+ */
+#define WA_DELAYED_READOUT_REVID(rev)		((rev) < 4)
+
+struct adp5588_kpad {
+	struct i2c_client *client;
+	struct input_dev *input;
+	struct delayed_work work;
+	unsigned long delay;
+	unsigned short keycode[ADP5588_KEYMAPSIZE];
+};
+
+static int adp5588_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	return i2c_smbus_write_byte_data(client, reg, val);
+}
+
+static void adp5588_work(struct work_struct *work)
+{
+	struct adp5588_kpad *kpad = container_of(work,
+						struct adp5588_kpad, work.work);
+	struct i2c_client *client = kpad->client;
+	int i, key, status, ev_cnt;
+
+	status = adp5588_read(client, INT_STAT);
+
+	if (status & OVR_FLOW_INT)	/* Unlikely and should never happen */
+		dev_err(&client->dev, "Event Overflow Error\n");
+
+	if (status & KE_INT) {
+		ev_cnt = adp5588_read(client, KEY_LCK_EC_STAT) & KEC;
+		if (ev_cnt) {
+			for (i = 0; i < ev_cnt; i++) {
+				key = adp5588_read(client, Key_EVENTA + i);
+				input_report_key(kpad->input,
+					kpad->keycode[(key & KEY_EV_MASK) - 1],
+					key & KEY_EV_PRESSED);
+			}
+			input_sync(kpad->input);
+		}
+	}
+	adp5588_write(client, INT_STAT, status); /* Status is W1C */
+}
+
+static irqreturn_t adp5588_irq(int irq, void *handle)
+{
+	struct adp5588_kpad *kpad = handle;
+
+	/*
+	 * use keventd context to read the event fifo registers
+	 * Schedule readout at least 25ms after notification for
+	 * REVID < 4
+	 */
+
+	schedule_delayed_work(&kpad->work, kpad->delay);
+
+	return IRQ_HANDLED;
+}
+
+static int __devinit adp5588_setup(struct i2c_client *client)
+{
+	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	int i, ret;
+
+	ret = adp5588_write(client, KP_GPIO1, KP_SEL(pdata->rows));
+	ret |= adp5588_write(client, KP_GPIO2, KP_SEL(pdata->cols) & 0xFF);
+	ret |= adp5588_write(client, KP_GPIO3, KP_SEL(pdata->cols) >> 8);
+
+	if (pdata->en_keylock) {
+		ret |= adp5588_write(client, UNLOCK1, pdata->unlock_key1);
+		ret |= adp5588_write(client, UNLOCK2, pdata->unlock_key2);
+		ret |= adp5588_write(client, KEY_LCK_EC_STAT, K_LCK_EN);
+	}
+
+	for (i = 0; i < KEYP_MAX_EVENT; i++)
+		ret |= adp5588_read(client, Key_EVENTA);
+
+	ret |= adp5588_write(client, INT_STAT, CMP2_INT | CMP1_INT |
+					OVR_FLOW_INT | K_LCK_INT |
+					GPI_INT | KE_INT); /* Status is W1C */
+
+	ret |= adp5588_write(client, CFG, INT_CFG | OVR_FLOW_IEN | KE_IEN);
+
+	if (ret < 0) {
+		dev_err(&client->dev, "Write Error\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __devinit adp5588_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_kpad *kpad;
+	struct adp5588_kpad_platform_data *pdata = client->dev.platform_data;
+	struct input_dev *input;
+	unsigned int revid;
+	int ret, i;
+	int error;
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	if (!pdata) {
+		dev_err(&client->dev, "no platform data?\n");
+		return -EINVAL;
+	}
+
+	if (!pdata->rows || !pdata->cols || !pdata->keymap) {
+		dev_err(&client->dev, "no rows, cols or keymap from pdata\n");
+		return -EINVAL;
+	}
+
+	if (pdata->keymapsize != ADP5588_KEYMAPSIZE) {
+		dev_err(&client->dev, "invalid keymapsize\n");
+		return -EINVAL;
+	}
+
+	if (!client->irq) {
+		dev_err(&client->dev, "no IRQ?\n");
+		return -EINVAL;
+	}
+
+	kpad = kzalloc(sizeof(*kpad), GFP_KERNEL);
+	input = input_allocate_device();
+	if (!kpad || !input) {
+		error = -ENOMEM;
+		goto err_free_mem;
+	}
+
+	kpad->client = client;
+	kpad->input = input;
+	INIT_DELAYED_WORK(&kpad->work, adp5588_work);
+
+	ret = adp5588_read(client, DEV_ID);
+	if (ret < 0) {
+		error = ret;
+		goto err_free_mem;
+	}
+
+	revid = (u8) ret & ADP5588_DEVICE_ID_MASK;
+	if (WA_DELAYED_READOUT_REVID(revid))
+		kpad->delay = msecs_to_jiffies(30);
+
+	input->name = client->name;
+	input->phys = "adp5588-keys/input0";
+	input->dev.parent = &client->dev;
+
+	input_set_drvdata(input, kpad);
+
+	input->id.bustype = BUS_I2C;
+	input->id.vendor = 0x0001;
+	input->id.product = 0x0001;
+	input->id.version = revid;
+
+	input->keycodesize = sizeof(kpad->keycode[0]);
+	input->keycodemax = pdata->keymapsize;
+	input->keycode = kpad->keycode;
+
+	memcpy(kpad->keycode, pdata->keymap,
+		pdata->keymapsize * input->keycodesize);
+
+	/* setup input device */
+	__set_bit(EV_KEY, input->evbit);
+
+	if (pdata->repeat)
+		__set_bit(EV_REP, input->evbit);
+
+	for (i = 0; i < input->keycodemax; i++)
+		__set_bit(kpad->keycode[i] & KEY_MAX, input->keybit);
+	__clear_bit(KEY_RESERVED, input->keybit);
+
+	error = input_register_device(input);
+	if (error) {
+		dev_err(&client->dev, "unable to register input device\n");
+		goto err_free_mem;
+	}
+
+	error = request_irq(client->irq, adp5588_irq,
+			    IRQF_TRIGGER_FALLING | IRQF_DISABLED,
+			    client->dev.driver->name, kpad);
+	if (error) {
+		dev_err(&client->dev, "irq %d busy?\n", client->irq);
+		goto err_unreg_dev;
+	}
+
+	error = adp5588_setup(client);
+	if (error)
+		goto err_free_irq;
+
+	device_init_wakeup(&client->dev, 1);
+	i2c_set_clientdata(client, kpad);
+
+	dev_info(&client->dev, "Rev.%d keypad, irq %d\n", revid, client->irq);
+	return 0;
+
+ err_free_irq:
+	free_irq(client->irq, kpad);
+ err_unreg_dev:
+	input_unregister_device(input);
+	input = NULL;
+ err_free_mem:
+	input_free_device(input);
+	kfree(kpad);
+
+	return error;
+}
+
+static int __devexit adp5588_remove(struct i2c_client *client)
+{
+	struct adp5588_kpad *kpad = i2c_get_clientdata(client);
+
+	adp5588_write(client, CFG, 0);
+	free_irq(client->irq, kpad);
+	cancel_delayed_work_sync(&kpad->work);
+	input_unregister_device(kpad->input);
+	i2c_set_clientdata(client, NULL);
+	kfree(kpad);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int adp5588_suspend(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	struct i2c_client *client = kpad->client;
+
+	disable_irq(client->irq);
+	cancel_delayed_work_sync(&kpad->work);
+
+	if (device_may_wakeup(&client->dev))
+		enable_irq_wake(client->irq);
+
+	return 0;
+}
+
+static int adp5588_resume(struct device *dev)
+{
+	struct adp5588_kpad *kpad = dev_get_drvdata(dev);
+	struct i2c_client *client = kpad->client;
+
+	if (device_may_wakeup(&client->dev))
+		disable_irq_wake(client->irq);
+
+	enable_irq(client->irq);
+
+	return 0;
+}
+
+static struct dev_pm_ops adp5588_dev_pm_ops = {
+	.suspend = adp5588_suspend,
+	.resume  = adp5588_resume,
+};
+#endif
+
+static const struct i2c_device_id adp5588_id[] = {
+	{ KBUILD_MODNAME, 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, adp5588_id);
+
+static struct i2c_driver adp5588_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+#ifdef CONFIG_PM
+		.pm   = &adp5588_dev_pm_ops,
+#endif
+	},
+	.probe    = adp5588_probe,
+	.remove   = __devexit_p(adp5588_remove),
+	.id_table = adp5588_id,
+};
+
+static int __init adp5588_init(void)
+{
+	return i2c_add_driver(&adp5588_driver);
+}
+module_init(adp5588_init);
+
+static void __exit adp5588_exit(void)
+{
+	i2c_del_driver(&adp5588_driver);
+}
+module_exit(adp5588_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("ADP5588 Keypad driver");
+MODULE_ALIAS("platform:adp5588-keys");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
new file mode 100644
index 000000000000..fc5db826b48e
--- /dev/null
+++ b/include/linux/i2c/adp5588.h
@@ -0,0 +1,92 @@
+/*
+ * Analog Devices ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _ADP5588_H
+#define _ADP5588_H
+
+#define DEV_ID 0x00		/* Device ID */
+#define CFG 0x01		/* Configuration Register1 */
+#define INT_STAT 0x02		/* Interrupt Status Register */
+#define KEY_LCK_EC_STAT 0x03	/* Key Lock and Event Counter Register */
+#define Key_EVENTA 0x04		/* Key Event Register A */
+#define Key_EVENTB 0x05		/* Key Event Register B */
+#define Key_EVENTC 0x06		/* Key Event Register C */
+#define Key_EVENTD 0x07		/* Key Event Register D */
+#define Key_EVENTE 0x08		/* Key Event Register E */
+#define Key_EVENTF 0x09		/* Key Event Register F */
+#define Key_EVENTG 0x0A		/* Key Event Register G */
+#define Key_EVENTH 0x0B		/* Key Event Register H */
+#define Key_EVENTI 0x0C		/* Key Event Register I */
+#define Key_EVENTJ 0x0D		/* Key Event Register J */
+#define KP_LCK_TMR 0x0E		/* Keypad Lock1 to Lock2 Timer */
+#define UNLOCK1 0x0F		/* Unlock Key1 */
+#define UNLOCK2 0x10		/* Unlock Key2 */
+#define GPIO_INT_STAT1 0x11	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT2 0x12	/* GPIO Interrupt Status */
+#define GPIO_INT_STAT3 0x13	/* GPIO Interrupt Status */
+#define GPIO_DAT_STAT1 0x14	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT2 0x15	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_STAT3 0x16	/* GPIO Data Status, Read twice to clear */
+#define GPIO_DAT_OUT1 0x17	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT2 0x18	/* GPIO DATA OUT */
+#define GPIO_DAT_OUT3 0x19	/* GPIO DATA OUT */
+#define GPIO_INT_EN1 0x1A	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN2 0x1B	/* GPIO Interrupt Enable */
+#define GPIO_INT_EN3 0x1C	/* GPIO Interrupt Enable */
+#define KP_GPIO1 0x1D		/* Keypad or GPIO Selection */
+#define KP_GPIO2 0x1E		/* Keypad or GPIO Selection */
+#define KP_GPIO3 0x1F		/* Keypad or GPIO Selection */
+#define GPI_EM1 0x20		/* GPI Event Mode 1 */
+#define GPI_EM2 0x21		/* GPI Event Mode 2 */
+#define GPI_EM3 0x22		/* GPI Event Mode 3 */
+#define GPIO_DIR1 0x23		/* GPIO Data Direction */
+#define GPIO_DIR2 0x24		/* GPIO Data Direction */
+#define GPIO_DIR3 0x25		/* GPIO Data Direction */
+#define GPIO_INT_LVL1 0x26	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL2 0x27	/* GPIO Edge/Level Detect */
+#define GPIO_INT_LVL3 0x28	/* GPIO Edge/Level Detect */
+#define Debounce_DIS1 0x29	/* Debounce Disable */
+#define Debounce_DIS2 0x2A	/* Debounce Disable */
+#define Debounce_DIS3 0x2B	/* Debounce Disable */
+#define GPIO_PULL1 0x2C		/* GPIO Pull Disable */
+#define GPIO_PULL2 0x2D		/* GPIO Pull Disable */
+#define GPIO_PULL3 0x2E		/* GPIO Pull Disable */
+#define CMP_CFG_STAT 0x30	/* Comparator Configuration and Status Register */
+#define CMP_CONFG_SENS1 0x31	/* Sensor1 Comparator Configuration Register */
+#define CMP_CONFG_SENS2 0x32	/* L2 Light Sensor Reference Level, Output Falling for Sensor 1 */
+#define CMP1_LVL2_TRIP 0x33	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 1 */
+#define CMP1_LVL2_HYS 0x34	/* L3 Light Sensor Reference Level, Output Falling For Sensor 1 */
+#define CMP1_LVL3_TRIP 0x35	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 1 */
+#define CMP1_LVL3_HYS 0x36	/* Sensor 2 Comparator Configuration Register */
+#define CMP2_LVL2_TRIP 0x37	/* L2 Light Sensor Reference Level, Output Falling for Sensor 2 */
+#define CMP2_LVL2_HYS 0x38	/* L2 Light Sensor Hysteresis (Active when Output Rising) for Sensor 2 */
+#define CMP2_LVL3_TRIP 0x39	/* L3 Light Sensor Reference Level, Output Falling For Sensor 2 */
+#define CMP2_LVL3_HYS 0x3A	/* L3 Light Sensor Hysteresis (Active when Output Rising) For Sensor 2 */
+#define CMP1_ADC_DAT_R1 0x3B	/* Comparator 1 ADC data Register1 */
+#define CMP1_ADC_DAT_R2 0x3C	/* Comparator 1 ADC data Register2 */
+#define CMP2_ADC_DAT_R1 0x3D	/* Comparator 2 ADC data Register1 */
+#define CMP2_ADC_DAT_R2 0x3E	/* Comparator 2 ADC data Register2 */
+
+#define ADP5588_DEVICE_ID_MASK	0xF
+
+/* Put one of these structures in i2c_board_info platform_data */
+
+#define ADP5588_KEYMAPSIZE	80
+
+struct adp5588_kpad_platform_data {
+	int rows;			/* Number of rows */
+	int cols;			/* Number of columns */
+	const unsigned short *keymap;	/* Pointer to keymap */
+	unsigned short keymapsize;	/* Keymap size */
+	unsigned repeat:1;		/* Enable key repeat */
+	unsigned en_keylock:1;		/* Enable Key Lock feature */
+	unsigned short unlock_key1;	/* Unlock Key 1 */
+	unsigned short unlock_key2;	/* Unlock Key 2 */
+};
+
+#endif
-- 
cgit v1.2.3


From 502a0106b2cc31940f690dc6693fddfd3b97cab5 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 27 Jul 2009 14:46:12 +0100
Subject: [WATCHDOG] Add support for WM831x watchdog

The WM831x series of devices provide a watchdog with configurable
behaviour on timer expiry.

Currently this driver support refreshes via a register or GPIO line and
autonomous refreshes from a hardware source (eg, a clock).

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 drivers/watchdog/Kconfig            |   7 +
 drivers/watchdog/Makefile           |   1 +
 drivers/watchdog/wm831x_wdt.c       | 441 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm831x/watchdog.h |  52 +++++
 4 files changed, 501 insertions(+)
 create mode 100644 drivers/watchdog/wm831x_wdt.c
 create mode 100644 include/linux/mfd/wm831x/watchdog.h

(limited to 'include')

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 3229889c3591..ff3eb8ff6bd7 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -55,6 +55,13 @@ config SOFT_WATCHDOG
 	  To compile this driver as a module, choose M here: the
 	  module will be called softdog.
 
+config WM831X_WATCHDOG
+	tristate "WM831x watchdog"
+	depends on MFD_WM831X
+	help
+	  Support for the watchdog in the WM831x AudioPlus PMICs.  When
+	  the watchdog triggers the system will be reset.
+
 config WM8350_WATCHDOG
 	tristate "WM8350 watchdog"
 	depends on MFD_WM8350
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index e1784a6208a7..348b3b862c99 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -141,5 +141,6 @@ obj-$(CONFIG_WATCHDOG_CP1XXX)		+= cpwd.o
 # XTENSA Architecture
 
 # Architecture Independant
+obj-$(CONFIG_WM831X_WATCHDOG) += wm831x_wdt.o
 obj-$(CONFIG_WM8350_WATCHDOG) += wm8350_wdt.o
 obj-$(CONFIG_SOFT_WATCHDOG) += softdog.o
diff --git a/drivers/watchdog/wm831x_wdt.c b/drivers/watchdog/wm831x_wdt.c
new file mode 100644
index 000000000000..775bcd807f31
--- /dev/null
+++ b/drivers/watchdog/wm831x_wdt.c
@@ -0,0 +1,441 @@
+/*
+ * Watchdog driver for the wm831x PMICs
+ *
+ * Copyright (C) 2009 Wolfson Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+#include <linux/uaccess.h>
+#include <linux/gpio.h>
+
+#include <linux/mfd/wm831x/core.h>
+#include <linux/mfd/wm831x/pdata.h>
+#include <linux/mfd/wm831x/watchdog.h>
+
+static int nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, int, 0);
+MODULE_PARM_DESC(nowayout,
+		 "Watchdog cannot be stopped once started (default="
+		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static unsigned long wm831x_wdt_users;
+static struct miscdevice wm831x_wdt_miscdev;
+static int wm831x_wdt_expect_close;
+static DEFINE_MUTEX(wdt_mutex);
+static struct wm831x *wm831x;
+static unsigned int update_gpio;
+static unsigned int update_state;
+
+/* We can't use the sub-second values here but they're included
+ * for completeness.  */
+static struct {
+	int time;  /* Seconds */
+	u16 val;   /* WDOG_TO value */
+} wm831x_wdt_cfgs[] = {
+	{  1, 2 },
+	{  2, 3 },
+	{  4, 4 },
+	{  8, 5 },
+	{ 16, 6 },
+	{ 32, 7 },
+	{ 33, 7 },  /* Actually 32.768s so include both, others round down */
+};
+
+static int wm831x_wdt_set_timeout(struct wm831x *wm831x, u16 value)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_TO_MASK, value);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_start(struct wm831x *wm831x)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_ENA, WM831X_WDOG_ENA);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_stop(struct wm831x *wm831x)
+{
+	int ret;
+
+	mutex_lock(&wdt_mutex);
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_set_bits(wm831x, WM831X_WATCHDOG,
+				      WM831X_WDOG_ENA, 0);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_kick(struct wm831x *wm831x)
+{
+	int ret;
+	u16 reg;
+
+	mutex_lock(&wdt_mutex);
+
+	if (update_gpio) {
+		gpio_set_value_cansleep(update_gpio, update_state);
+		update_state = !update_state;
+		ret = 0;
+		goto out;
+	}
+
+
+	reg = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+
+	if (!(reg & WM831X_WDOG_RST_SRC)) {
+		dev_err(wm831x->dev, "Hardware watchdog update unsupported\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	reg |= WM831X_WDOG_RESET;
+
+	ret = wm831x_reg_unlock(wm831x);
+	if (ret == 0) {
+		ret = wm831x_reg_write(wm831x, WM831X_WATCHDOG, reg);
+		wm831x_reg_lock(wm831x);
+	} else {
+		dev_err(wm831x->dev, "Failed to unlock security key: %d\n",
+			ret);
+	}
+
+out:
+	mutex_unlock(&wdt_mutex);
+
+	return ret;
+}
+
+static int wm831x_wdt_open(struct inode *inode, struct file *file)
+{
+	int ret;
+
+	if (!wm831x)
+		return -ENODEV;
+
+	if (test_and_set_bit(0, &wm831x_wdt_users))
+		return -EBUSY;
+
+	ret = wm831x_wdt_start(wm831x);
+	if (ret != 0)
+		return ret;
+
+	return nonseekable_open(inode, file);
+}
+
+static int wm831x_wdt_release(struct inode *inode, struct file *file)
+{
+	if (wm831x_wdt_expect_close)
+		wm831x_wdt_stop(wm831x);
+	else {
+		dev_warn(wm831x->dev, "Watchdog device closed uncleanly\n");
+		wm831x_wdt_kick(wm831x);
+	}
+
+	clear_bit(0, &wm831x_wdt_users);
+
+	return 0;
+}
+
+static ssize_t wm831x_wdt_write(struct file *file,
+				const char __user *data, size_t count,
+				loff_t *ppos)
+{
+	size_t i;
+
+	if (count) {
+		wm831x_wdt_kick(wm831x);
+
+		if (!nowayout) {
+			/* In case it was set long ago */
+			wm831x_wdt_expect_close = 0;
+
+			/* scan to see whether or not we got the magic
+			   character */
+			for (i = 0; i != count; i++) {
+				char c;
+				if (get_user(c, data + i))
+					return -EFAULT;
+				if (c == 'V')
+					wm831x_wdt_expect_close = 42;
+			}
+		}
+	}
+	return count;
+}
+
+static struct watchdog_info ident = {
+	.options = WDIOF_SETTIMEOUT | WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE,
+	.identity = "WM831x Watchdog",
+};
+
+static long wm831x_wdt_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	int ret = -ENOTTY, time, i;
+	void __user *argp = (void __user *)arg;
+	int __user *p = argp;
+	u16 reg;
+
+	switch (cmd) {
+	case WDIOC_GETSUPPORT:
+		ret = copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0;
+		break;
+
+	case WDIOC_GETSTATUS:
+	case WDIOC_GETBOOTSTATUS:
+		ret = put_user(0, p);
+		break;
+
+	case WDIOC_SETOPTIONS:
+	{
+		int options;
+
+		if (get_user(options, p))
+			return -EFAULT;
+
+		ret = -EINVAL;
+
+		/* Setting both simultaneously means at least one must fail */
+		if (options == WDIOS_DISABLECARD)
+			ret = wm831x_wdt_start(wm831x);
+
+		if (options == WDIOS_ENABLECARD)
+			ret = wm831x_wdt_stop(wm831x);
+		break;
+	}
+
+	case WDIOC_KEEPALIVE:
+		ret = wm831x_wdt_kick(wm831x);
+		break;
+
+	case WDIOC_SETTIMEOUT:
+		ret = get_user(time, p);
+		if (ret)
+			break;
+
+		if (time == 0) {
+			if (nowayout)
+				ret = -EINVAL;
+			else
+				wm831x_wdt_stop(wm831x);
+			break;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(wm831x_wdt_cfgs); i++)
+			if (wm831x_wdt_cfgs[i].time == time)
+				break;
+		if (i == ARRAY_SIZE(wm831x_wdt_cfgs))
+			ret = -EINVAL;
+		else
+			ret = wm831x_wdt_set_timeout(wm831x,
+						     wm831x_wdt_cfgs[i].val);
+		break;
+
+	case WDIOC_GETTIMEOUT:
+		reg = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+		reg &= WM831X_WDOG_TO_MASK;
+		for (i = 0; i < ARRAY_SIZE(wm831x_wdt_cfgs); i++)
+			if (wm831x_wdt_cfgs[i].val == reg)
+				break;
+		if (i == ARRAY_SIZE(wm831x_wdt_cfgs)) {
+			dev_warn(wm831x->dev,
+				 "Unknown watchdog configuration: %x\n", reg);
+			ret = -EINVAL;
+		} else
+			ret = put_user(wm831x_wdt_cfgs[i].time, p);
+
+	}
+
+	return ret;
+}
+
+static const struct file_operations wm831x_wdt_fops = {
+	.owner = THIS_MODULE,
+	.llseek = no_llseek,
+	.write = wm831x_wdt_write,
+	.unlocked_ioctl = wm831x_wdt_ioctl,
+	.open = wm831x_wdt_open,
+	.release = wm831x_wdt_release,
+};
+
+static struct miscdevice wm831x_wdt_miscdev = {
+	.minor = WATCHDOG_MINOR,
+	.name = "watchdog",
+	.fops = &wm831x_wdt_fops,
+};
+
+static int __devinit wm831x_wdt_probe(struct platform_device *pdev)
+{
+	struct wm831x_pdata *chip_pdata;
+	struct wm831x_watchdog_pdata *pdata;
+	int reg, ret;
+
+	wm831x = dev_get_drvdata(pdev->dev.parent);
+
+	ret = wm831x_reg_read(wm831x, WM831X_WATCHDOG);
+	if (ret < 0) {
+		dev_err(wm831x->dev, "Failed to read watchdog status: %d\n",
+			ret);
+		goto err;
+	}
+	reg = ret;
+
+	if (reg & WM831X_WDOG_DEBUG)
+		dev_warn(wm831x->dev, "Watchdog is paused\n");
+
+	/* Apply any configuration */
+	if (pdev->dev.parent->platform_data) {
+		chip_pdata = pdev->dev.parent->platform_data;
+		pdata = chip_pdata->watchdog;
+	} else {
+		pdata = NULL;
+	}
+
+	if (pdata) {
+		reg &= ~(WM831X_WDOG_SECACT_MASK | WM831X_WDOG_PRIMACT_MASK |
+			 WM831X_WDOG_RST_SRC);
+
+		reg |= pdata->primary << WM831X_WDOG_PRIMACT_SHIFT;
+		reg |= pdata->secondary << WM831X_WDOG_SECACT_SHIFT;
+		reg |= pdata->software << WM831X_WDOG_RST_SRC_SHIFT;
+
+		if (pdata->update_gpio) {
+			ret = gpio_request(pdata->update_gpio,
+					   "Watchdog update");
+			if (ret < 0) {
+				dev_err(wm831x->dev,
+					"Failed to request update GPIO: %d\n",
+					ret);
+				goto err;
+			}
+
+			ret = gpio_direction_output(pdata->update_gpio, 0);
+			if (ret != 0) {
+				dev_err(wm831x->dev,
+					"gpio_direction_output returned: %d\n",
+					ret);
+				goto err_gpio;
+			}
+
+			update_gpio = pdata->update_gpio;
+
+			/* Make sure the watchdog takes hardware updates */
+			reg |= WM831X_WDOG_RST_SRC;
+		}
+
+		ret = wm831x_reg_unlock(wm831x);
+		if (ret == 0) {
+			ret = wm831x_reg_write(wm831x, WM831X_WATCHDOG, reg);
+			wm831x_reg_lock(wm831x);
+		} else {
+			dev_err(wm831x->dev,
+				"Failed to unlock security key: %d\n", ret);
+			goto err_gpio;
+		}
+	}
+
+	wm831x_wdt_miscdev.parent = &pdev->dev;
+
+	ret = misc_register(&wm831x_wdt_miscdev);
+	if (ret != 0) {
+		dev_err(wm831x->dev, "Failed to register miscdev: %d\n", ret);
+		goto err_gpio;
+	}
+
+	return 0;
+
+err_gpio:
+	if (update_gpio) {
+		gpio_free(update_gpio);
+		update_gpio = 0;
+	}
+err:
+	return ret;
+}
+
+static int __devexit wm831x_wdt_remove(struct platform_device *pdev)
+{
+	if (update_gpio) {
+		gpio_free(update_gpio);
+		update_gpio = 0;
+	}
+
+	misc_deregister(&wm831x_wdt_miscdev);
+
+	return 0;
+}
+
+static struct platform_driver wm831x_wdt_driver = {
+	.probe = wm831x_wdt_probe,
+	.remove = __devexit_p(wm831x_wdt_remove),
+	.driver = {
+		.name = "wm831x-watchdog",
+	},
+};
+
+static int __init wm831x_wdt_init(void)
+{
+	return platform_driver_register(&wm831x_wdt_driver);
+}
+module_init(wm831x_wdt_init);
+
+static void __exit wm831x_wdt_exit(void)
+{
+	platform_driver_unregister(&wm831x_wdt_driver);
+}
+module_exit(wm831x_wdt_exit);
+
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("WM831x Watchdog");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:wm831x-watchdog");
diff --git a/include/linux/mfd/wm831x/watchdog.h b/include/linux/mfd/wm831x/watchdog.h
new file mode 100644
index 000000000000..97a99b52956f
--- /dev/null
+++ b/include/linux/mfd/wm831x/watchdog.h
@@ -0,0 +1,52 @@
+/*
+ * include/linux/mfd/wm831x/watchdog.h -- Watchdog for WM831x
+ *
+ * Copyright 2009 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __MFD_WM831X_WATCHDOG_H__
+#define __MFD_WM831X_WATCHDOG_H__
+
+
+/*
+ * R16388 (0x4004) - Watchdog
+ */
+#define WM831X_WDOG_ENA                         0x8000  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_MASK                    0x8000  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_SHIFT                       15  /* WDOG_ENA */
+#define WM831X_WDOG_ENA_WIDTH                        1  /* WDOG_ENA */
+#define WM831X_WDOG_DEBUG                       0x4000  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_MASK                  0x4000  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_SHIFT                     14  /* WDOG_DEBUG */
+#define WM831X_WDOG_DEBUG_WIDTH                      1  /* WDOG_DEBUG */
+#define WM831X_WDOG_RST_SRC                     0x2000  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_MASK                0x2000  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_SHIFT                   13  /* WDOG_RST_SRC */
+#define WM831X_WDOG_RST_SRC_WIDTH                    1  /* WDOG_RST_SRC */
+#define WM831X_WDOG_SLPENA                      0x1000  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_MASK                 0x1000  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_SHIFT                    12  /* WDOG_SLPENA */
+#define WM831X_WDOG_SLPENA_WIDTH                     1  /* WDOG_SLPENA */
+#define WM831X_WDOG_RESET                       0x0800  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_MASK                  0x0800  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_SHIFT                     11  /* WDOG_RESET */
+#define WM831X_WDOG_RESET_WIDTH                      1  /* WDOG_RESET */
+#define WM831X_WDOG_SECACT_MASK                 0x0300  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_SECACT_SHIFT                     8  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_SECACT_WIDTH                     2  /* WDOG_SECACT - [9:8] */
+#define WM831X_WDOG_PRIMACT_MASK                0x0030  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_PRIMACT_SHIFT                    4  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_PRIMACT_WIDTH                    2  /* WDOG_PRIMACT - [5:4] */
+#define WM831X_WDOG_TO_MASK                     0x0007  /* WDOG_TO - [2:0] */
+#define WM831X_WDOG_TO_SHIFT                         0  /* WDOG_TO - [2:0] */
+#define WM831X_WDOG_TO_WIDTH                         3  /* WDOG_TO - [2:0] */
+
+#endif
-- 
cgit v1.2.3


From 6952b61de9984073289859073e8195ad0bee8fd5 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 18 Sep 2009 23:55:55 +0400
Subject: headers: taskstats_kern.h trim

Remove net/genetlink.h inclusion, now sched.c won't be recompiled
because of some networking changes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h      | 1 -
 include/linux/taskstats_kern.h | 1 -
 kernel/delayacct.c             | 1 +
 mm/memory.c                    | 1 +
 4 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index f352f06fa063..5076fe0c8a96 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -18,7 +18,6 @@
 #define _LINUX_DELAYACCT_H
 
 #include <linux/sched.h>
-#include <linux/taskstats_kern.h>
 
 /*
  * Per-task flags relevant to delay accounting
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 7e9680f4afdd..3398f4553269 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -9,7 +9,6 @@
 
 #include <linux/taskstats.h>
 #include <linux/sched.h>
-#include <net/genetlink.h>
 
 #ifdef CONFIG_TASKSTATS
 extern struct kmem_cache *taskstats_cache;
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index abb6e17505e2..ead9b610aa71 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -15,6 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/taskstats.h>
 #include <linux/time.h>
 #include <linux/sysctl.h>
 #include <linux/delayacct.h>
diff --git a/mm/memory.c b/mm/memory.c
index aede2ce3aba4..e8f63d9961ea 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -56,6 +56,7 @@
 #include <linux/swapops.h>
 #include <linux/elf.h>
 
+#include <asm/io.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
-- 
cgit v1.2.3


From 5622f295b53fb60dbf9bed3e2c89d182490a8b7f Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 15 Sep 2009 13:00:23 +0200
Subject: x86, perf_counter, bts: Optimize BTS overflow handling

Draining the BTS buffer on a buffer overflow interrupt takes too
long resulting in a kernel lockup when tracing the kernel.

Restructure perf_counter sampling into sample creation and sample
output.

Prepare a single reference sample for BTS sampling and update the
from and to address fields when draining the BTS buffer. Drain the
entire BTS buffer between a single perf_output_begin() /
perf_output_end() pair.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20090915130023.A16204@sedona.ch.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_counter.c |  60 ++++---
 include/linux/perf_counter.h       |  68 +++++++-
 kernel/perf_counter.c              | 312 ++++++++++++++++++++-----------------
 3 files changed, 266 insertions(+), 174 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index f9cd0849bd42..6a0e71b38126 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -36,10 +36,10 @@ static u64 perf_counter_mask __read_mostly;
 #define BTS_RECORD_SIZE		24
 
 /* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 1024)
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
 
 /* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 64)
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
 
 
 /*
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
-				       struct perf_sample_data *data)
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
 	struct bts_record {
@@ -1498,8 +1497,11 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 		u64	flags;
 	};
 	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	unsigned long orig_ip = data->regs->ip;
 	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
 
 	if (!counter)
 		return;
@@ -1510,19 +1512,38 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 	top = (struct bts_record *)(unsigned long)ds->bts_index;
 
+	if (top <= at)
+		return;
+
 	ds->bts_index = ds->bts_buffer_base;
 
+
+	data.period	= counter->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, counter, &regs);
+
+	if (perf_output_begin(&handle, counter,
+			      header.size * (top - at), 1, 1))
+		return;
+
 	for (; at < top; at++) {
-		data->regs->ip	= at->from;
-		data->addr	= at->to;
+		data.ip		= at->from;
+		data.addr	= at->to;
 
-		perf_counter_output(counter, 1, data);
+		perf_output_sample(&handle, &header, &data, counter);
 	}
 
-	data->regs->ip	= orig_ip;
-	data->addr	= 0;
+	perf_output_end(&handle);
 
 	/* There's new data available. */
+	counter->hw.interrupts++;
 	counter->pending_kill = POLL_IN;
 }
 
@@ -1552,13 +1573,9 @@ static void x86_pmu_disable(struct perf_counter *counter)
 	x86_perf_counter_update(counter, hwc, idx);
 
 	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		struct perf_sample_data data;
-		struct pt_regs regs;
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
 
-		data.regs = &regs;
-		intel_pmu_drain_bts_buffer(cpuc, &data);
-	}
 	cpuc->counters[idx] = NULL;
 	clear_bit(idx, cpuc->used_mask);
 
@@ -1619,7 +1636,6 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1644,7 +1660,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			p6_pmu_disable_counter(hwc, idx);
 	}
 
@@ -1665,13 +1681,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	int bit, loops;
 	u64 ack, status;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
 
 	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc, &data);
+	intel_pmu_drain_bts_buffer(cpuc);
 	status = intel_pmu_get_status();
 	if (!status) {
 		perf_enable();
@@ -1702,7 +1717,7 @@ again:
 
 		data.period = counter->hw.last_period;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			intel_pmu_disable_counter(&counter->hw, bit);
 	}
 
@@ -1729,7 +1744,6 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -1754,7 +1768,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
 		if (!x86_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			amd_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 6c1ef72ea501..c7375f97aa19 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -691,6 +691,17 @@ struct perf_cpu_context {
 	int				recursion[4];
 };
 
+struct perf_output_handle {
+	struct perf_counter	*counter;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			sample;
+	int			locked;
+	unsigned long		flags;
+};
+
 #ifdef CONFIG_PERF_COUNTERS
 
 /*
@@ -722,16 +733,38 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
 extern void perf_counter_update_userpage(struct perf_counter *counter);
 
 struct perf_sample_data {
-	struct pt_regs			*regs;
+	u64				type;
+
+	u64				ip;
+	struct {
+		u32	pid;
+		u32	tid;
+	}				tid_entry;
+	u64				time;
 	u64				addr;
+	u64				id;
+	u64				stream_id;
+	struct {
+		u32	cpu;
+		u32	reserved;
+	}				cpu_entry;
 	u64				period;
+	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 };
 
+extern void perf_output_sample(struct perf_output_handle *handle,
+			       struct perf_event_header *header,
+			       struct perf_sample_data *data,
+			       struct perf_counter *counter);
+extern void perf_prepare_sample(struct perf_event_header *header,
+				struct perf_sample_data *data,
+				struct perf_counter *counter,
+				struct pt_regs *regs);
+
 extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
-				 struct perf_sample_data *data);
-extern void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data);
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs);
 
 /*
  * Return 1 for a software counter, 0 for a hardware counter
@@ -781,6 +814,12 @@ extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
 #define perf_instruction_pointer(regs)	instruction_pointer(regs)
 #endif
 
+extern int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_counter *counter, unsigned int size,
+			     int nmi, int sample);
+extern void perf_output_end(struct perf_output_handle *handle);
+extern void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len);
 #else
 static inline void
 perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
@@ -807,7 +846,28 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
 static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
+
+static inline int
+perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
+		  unsigned int size, int nmi, int sample)		{ }
+static inline void perf_output_end(struct perf_output_handle *handle)	{ }
+static inline void
+perf_output_copy(struct perf_output_handle *handle,
+		 const void *buf, unsigned int len)			{ }
+static inline void
+perf_output_sample(struct perf_output_handle *handle,
+		   struct perf_event_header *header,
+		   struct perf_sample_data *data,
+		   struct perf_counter *counter)			{ }
+static inline void
+perf_prepare_sample(struct perf_event_header *header,
+		    struct perf_sample_data *data,
+		    struct perf_counter *counter,
+		    struct pt_regs *regs)				{ }
 #endif
 
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_PERF_COUNTER_H */
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 29b73b6e8146..215845243a69 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2512,18 +2512,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 /*
  * Output
  */
-
-struct perf_output_handle {
-	struct perf_counter	*counter;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
-};
-
 static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
 			      unsigned long offset, unsigned long head)
 {
@@ -2633,8 +2621,8 @@ out:
 	local_irq_restore(handle->flags);
 }
 
-static void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len)
+void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
 {
 	unsigned int pages_mask;
 	unsigned int offset;
@@ -2669,12 +2657,9 @@ static void perf_output_copy(struct perf_output_handle *handle,
 	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
 }
 
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
-static int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int sample)
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_counter *counter, unsigned int size,
+		      int nmi, int sample)
 {
 	struct perf_counter *output_counter;
 	struct perf_mmap_data *data;
@@ -2756,7 +2741,7 @@ out:
 	return -ENOSPC;
 }
 
-static void perf_output_end(struct perf_output_handle *handle)
+void perf_output_end(struct perf_output_handle *handle)
 {
 	struct perf_counter *counter = handle->counter;
 	struct perf_mmap_data *data = handle->data;
@@ -2870,82 +2855,151 @@ static void perf_output_read(struct perf_output_handle *handle,
 		perf_output_read_one(handle, counter);
 }
 
-void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data)
+void perf_output_sample(struct perf_output_handle *handle,
+			struct perf_event_header *header,
+			struct perf_sample_data *data,
+			struct perf_counter *counter)
+{
+	u64 sample_type = data->type;
+
+	perf_output_put(handle, *header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(handle, data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(handle, data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(handle, counter);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (data->callchain) {
+			int size = 1;
+
+			if (data->callchain)
+				size += data->callchain->nr;
+
+			size *= sizeof(u64);
+
+			perf_output_copy(handle, data->callchain, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(handle, data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(handle, raw);
+		}
+	}
+}
+
+void perf_prepare_sample(struct perf_event_header *header,
+			 struct perf_sample_data *data,
+			 struct perf_counter *counter,
+			 struct pt_regs *regs)
 {
-	int ret;
 	u64 sample_type = counter->attr.sample_type;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	u64 ip;
-	struct {
-		u32 pid, tid;
-	} tid_entry;
-	struct perf_callchain_entry *callchain = NULL;
-	int callchain_size = 0;
-	u64 time;
-	struct {
-		u32 cpu, reserved;
-	} cpu_entry;
 
-	header.type = PERF_EVENT_SAMPLE;
-	header.size = sizeof(header);
+	data->type = sample_type;
 
-	header.misc = 0;
-	header.misc |= perf_misc_flags(data->regs);
+	header->type = PERF_EVENT_SAMPLE;
+	header->size = sizeof(*header);
+
+	header->misc = 0;
+	header->misc |= perf_misc_flags(regs);
 
 	if (sample_type & PERF_SAMPLE_IP) {
-		ip = perf_instruction_pointer(data->regs);
-		header.size += sizeof(ip);
+		data->ip = perf_instruction_pointer(regs);
+
+		header->size += sizeof(data->ip);
 	}
 
 	if (sample_type & PERF_SAMPLE_TID) {
 		/* namespace issues */
-		tid_entry.pid = perf_counter_pid(counter, current);
-		tid_entry.tid = perf_counter_tid(counter, current);
+		data->tid_entry.pid = perf_counter_pid(counter, current);
+		data->tid_entry.tid = perf_counter_tid(counter, current);
 
-		header.size += sizeof(tid_entry);
+		header->size += sizeof(data->tid_entry);
 	}
 
 	if (sample_type & PERF_SAMPLE_TIME) {
 		/*
 		 * Maybe do better on x86 and provide cpu_clock_nmi()
 		 */
-		time = sched_clock();
+		data->time = sched_clock();
 
-		header.size += sizeof(u64);
+		header->size += sizeof(data->time);
 	}
 
 	if (sample_type & PERF_SAMPLE_ADDR)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->addr);
 
-	if (sample_type & PERF_SAMPLE_ID)
-		header.size += sizeof(u64);
+	if (sample_type & PERF_SAMPLE_ID) {
+		data->id = primary_counter_id(counter);
 
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = counter->id;
+
+		header->size += sizeof(data->stream_id);
+	}
 
 	if (sample_type & PERF_SAMPLE_CPU) {
-		header.size += sizeof(cpu_entry);
+		data->cpu_entry.cpu		= raw_smp_processor_id();
+		data->cpu_entry.reserved	= 0;
 
-		cpu_entry.cpu = raw_smp_processor_id();
-		cpu_entry.reserved = 0;
+		header->size += sizeof(data->cpu_entry);
 	}
 
 	if (sample_type & PERF_SAMPLE_PERIOD)
-		header.size += sizeof(u64);
+		header->size += sizeof(data->period);
 
 	if (sample_type & PERF_SAMPLE_READ)
-		header.size += perf_counter_read_size(counter);
+		header->size += perf_counter_read_size(counter);
 
 	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		callchain = perf_callchain(data->regs);
+		int size = 1;
 
-		if (callchain) {
-			callchain_size = (1 + callchain->nr) * sizeof(u64);
-			header.size += callchain_size;
-		} else
-			header.size += sizeof(u64);
+		data->callchain = perf_callchain(regs);
+
+		if (data->callchain)
+			size += data->callchain->nr;
+
+		header->size += size * sizeof(u64);
 	}
 
 	if (sample_type & PERF_SAMPLE_RAW) {
@@ -2957,69 +3011,23 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
 			size += sizeof(u32);
 
 		WARN_ON_ONCE(size & (sizeof(u64)-1));
-		header.size += size;
-	}
-
-	ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, header);
-
-	if (sample_type & PERF_SAMPLE_IP)
-		perf_output_put(&handle, ip);
-
-	if (sample_type & PERF_SAMPLE_TID)
-		perf_output_put(&handle, tid_entry);
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		perf_output_put(&handle, time);
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(&handle, data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		u64 id = primary_counter_id(counter);
-
-		perf_output_put(&handle, id);
+		header->size += size;
 	}
+}
 
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		perf_output_put(&handle, counter->id);
-
-	if (sample_type & PERF_SAMPLE_CPU)
-		perf_output_put(&handle, cpu_entry);
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(&handle, data->period);
+static void perf_counter_output(struct perf_counter *counter, int nmi,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct perf_output_handle handle;
+	struct perf_event_header header;
 
-	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(&handle, counter);
+	perf_prepare_sample(&header, data, counter, regs);
 
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (callchain)
-			perf_output_copy(&handle, callchain, callchain_size);
-		else {
-			u64 nr = 0;
-			perf_output_put(&handle, nr);
-		}
-	}
+	if (perf_output_begin(&handle, counter, header.size, nmi, 1))
+		return;
 
-	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			perf_output_put(&handle, data->raw->size);
-			perf_output_copy(&handle, data->raw->data, data->raw->size);
-		} else {
-			struct {
-				u32	size;
-				u32	data;
-			} raw = {
-				.size = sizeof(u32),
-				.data = 0,
-			};
-			perf_output_put(&handle, raw);
-		}
-	}
+	perf_output_sample(&handle, &header, data, counter);
 
 	perf_output_end(&handle);
 }
@@ -3501,7 +3509,8 @@ static void perf_log_throttle(struct perf_counter *counter, int enable)
  */
 
 static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
-				   int throttle, struct perf_sample_data *data)
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
 {
 	int events = atomic_read(&counter->event_limit);
 	struct hw_perf_counter *hwc = &counter->hw;
@@ -3557,14 +3566,15 @@ static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
 			perf_counter_disable(counter);
 	}
 
-	perf_counter_output(counter, nmi, data);
+	perf_counter_output(counter, nmi, data, regs);
 	return ret;
 }
 
 int perf_counter_overflow(struct perf_counter *counter, int nmi,
-			  struct perf_sample_data *data)
+			  struct perf_sample_data *data,
+			  struct pt_regs *regs)
 {
-	return __perf_counter_overflow(counter, nmi, 1, data);
+	return __perf_counter_overflow(counter, nmi, 1, data, regs);
 }
 
 /*
@@ -3602,7 +3612,8 @@ again:
 }
 
 static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct perf_sample_data *data)
+				    int nmi, struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 	int throttle = 0;
@@ -3615,7 +3626,8 @@ static void perf_swcounter_overflow(struct perf_counter *counter,
 		return;
 
 	for (; overflow; overflow--) {
-		if (__perf_counter_overflow(counter, nmi, throttle, data)) {
+		if (__perf_counter_overflow(counter, nmi, throttle,
+					    data, regs)) {
 			/*
 			 * We inhibit the overflow from happening when
 			 * hwc->interrupts == MAX_INTERRUPTS.
@@ -3634,7 +3646,8 @@ static void perf_swcounter_unthrottle(struct perf_counter *counter)
 }
 
 static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct perf_sample_data *data)
+			       int nmi, struct perf_sample_data *data,
+			       struct pt_regs *regs)
 {
 	struct hw_perf_counter *hwc = &counter->hw;
 
@@ -3643,11 +3656,11 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
 	if (!hwc->sample_period)
 		return;
 
-	if (!data->regs)
+	if (!regs)
 		return;
 
 	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swcounter_overflow(counter, nmi, data);
+		perf_swcounter_overflow(counter, nmi, data, regs);
 }
 
 static int perf_swcounter_is_counting(struct perf_counter *counter)
@@ -3706,7 +3719,8 @@ static int perf_swcounter_match(struct perf_counter *counter,
 static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 				     enum perf_type_id type,
 				     u32 event, u64 nr, int nmi,
-				     struct perf_sample_data *data)
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
 {
 	struct perf_counter *counter;
 
@@ -3715,8 +3729,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event, data->regs))
-			perf_swcounter_add(counter, nr, nmi, data);
+		if (perf_swcounter_match(counter, type, event, regs))
+			perf_swcounter_add(counter, nr, nmi, data, regs);
 	}
 	rcu_read_unlock();
 }
@@ -3737,7 +3751,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
 
 static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 				    u64 nr, int nmi,
-				    struct perf_sample_data *data)
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
 	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
 	int *recursion = perf_swcounter_recursion_context(cpuctx);
@@ -3750,7 +3765,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 	barrier();
 
 	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-				 nr, nmi, data);
+				 nr, nmi, data, regs);
 	rcu_read_lock();
 	/*
 	 * doesn't really matter which of the child contexts the
@@ -3758,7 +3773,7 @@ static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
 	 */
 	ctx = rcu_dereference(current->perf_counter_ctxp);
 	if (ctx)
-		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data);
+		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
 	rcu_read_unlock();
 
 	barrier();
@@ -3772,11 +3787,11 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
 {
 	struct perf_sample_data data = {
-		.regs = regs,
 		.addr = addr,
 	};
 
-	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data);
+	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
+				&data, regs);
 }
 
 static void perf_swcounter_read(struct perf_counter *counter)
@@ -3813,6 +3828,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 {
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_sample_data data;
+	struct pt_regs *regs;
 	struct perf_counter *counter;
 	u64 period;
 
@@ -3820,17 +3836,17 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	counter->pmu->read(counter);
 
 	data.addr = 0;
-	data.regs = get_irq_regs();
+	regs = get_irq_regs();
 	/*
 	 * In case we exclude kernel IPs or are somehow not in interrupt
 	 * context, provide the next best thing, the user IP.
 	 */
-	if ((counter->attr.exclude_kernel || !data.regs) &&
+	if ((counter->attr.exclude_kernel || !regs) &&
 			!counter->attr.exclude_user)
-		data.regs = task_pt_regs(current);
+		regs = task_pt_regs(current);
 
-	if (data.regs) {
-		if (perf_counter_overflow(counter, 0, &data))
+	if (regs) {
+		if (perf_counter_overflow(counter, 0, &data, regs))
 			ret = HRTIMER_NORESTART;
 	}
 
@@ -3966,15 +3982,17 @@ void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
 	};
 
 	struct perf_sample_data data = {
-		.regs = get_irq_regs(),
 		.addr = addr,
 		.raw = &raw,
 	};
 
-	if (!data.regs)
-		data.regs = task_pt_regs(current);
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
 
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
+	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+				&data, regs);
 }
 EXPORT_SYMBOL_GPL(perf_tpcounter_event);
 
-- 
cgit v1.2.3


From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:35:28 +0200
Subject: tracing: Remove markers

Now that the last users of markers have migrated to the event
tracer we can kill off the (now orphan) support code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090917173527.GA1699@lst.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/markers.txt                 | 104 ----
 arch/powerpc/platforms/cell/spufs/file.c  |   1 -
 arch/powerpc/platforms/cell/spufs/sched.c |   1 -
 include/linux/kvm_host.h                  |   1 -
 include/linux/marker.h                    | 221 -------
 include/linux/module.h                    |  11 -
 init/Kconfig                              |   7 -
 kernel/Makefile                           |   1 -
 kernel/marker.c                           | 930 ------------------------------
 kernel/module.c                           |  18 -
 kernel/trace/trace_printk.c               |   1 -
 samples/Kconfig                           |   6 -
 samples/Makefile                          |   2 +-
 samples/markers/Makefile                  |   4 -
 samples/markers/marker-example.c          |  53 --
 samples/markers/probe-example.c           |  92 ---
 scripts/Makefile.modpost                  |  12 -
 17 files changed, 1 insertion(+), 1464 deletions(-)
 delete mode 100644 Documentation/markers.txt
 delete mode 100644 include/linux/marker.h
 delete mode 100644 kernel/marker.c
 delete mode 100644 samples/markers/Makefile
 delete mode 100644 samples/markers/marker-example.c
 delete mode 100644 samples/markers/probe-example.c

(limited to 'include')

diff --git a/Documentation/markers.txt b/Documentation/markers.txt
deleted file mode 100644
index d2b3d0e91b26..000000000000
--- a/Documentation/markers.txt
+++ /dev/null
@@ -1,104 +0,0 @@
- 	             Using the Linux Kernel Markers
-
-			    Mathieu Desnoyers
-
-
-This document introduces Linux Kernel Markers and their use. It provides
-examples of how to insert markers in the kernel and connect probe functions to
-them and provides some examples of probe functions.
-
-
-* Purpose of markers
-
-A marker placed in code provides a hook to call a function (probe) that you can
-provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
-(no probe is attached). When a marker is "off" it has no effect, except for
-adding a tiny time penalty (checking a condition for a branch) and space
-penalty (adding a few bytes for the function call at the end of the
-instrumented function and adds a data structure in a separate section).  When a
-marker is "on", the function you provide is called each time the marker is
-executed, in the execution context of the caller. When the function provided
-ends its execution, it returns to the caller (continuing from the marker site).
-
-You can put markers at important locations in the code. Markers are
-lightweight hooks that can pass an arbitrary number of parameters,
-described in a printk-like format string, to the attached probe function.
-
-They can be used for tracing and performance accounting.
-
-
-* Usage
-
-In order to use the macro trace_mark, you should include linux/marker.h.
-
-#include <linux/marker.h>
-
-And,
-
-trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring);
-Where :
-- subsystem_event is an identifier unique to your event
-    - subsystem is the name of your subsystem.
-    - event is the name of the event to mark.
-- "myint %d mystring %s" is the formatted string for the serializer. "myint" and
-  "mystring" are repectively the field names associated with the first and
-  second parameter.
-- someint is an integer.
-- somestring is a char pointer.
-
-Connecting a function (probe) to a marker is done by providing a probe (function
-to call) for the specific marker through marker_probe_register() and can be
-activated by calling marker_arm(). Marker deactivation can be done by calling
-marker_disarm() as many times as marker_arm() has been called. Removing a probe
-is done through marker_probe_unregister(); it will disarm the probe.
-
-marker_synchronize_unregister() must be called between probe unregistration and
-the first occurrence of
-- the end of module exit function,
-  to make sure there is no caller left using the probe;
-- the free of any resource used by the probes,
-  to make sure the probes wont be accessing invalid data.
-This, and the fact that preemption is disabled around the probe call, make sure
-that probe removal and module unload are safe. See the "Probe example" section
-below for a sample probe module.
-
-The marker mechanism supports inserting multiple instances of the same marker.
-Markers can be put in inline functions, inlined static functions, and
-unrolled loops as well as regular functions.
-
-The naming scheme "subsystem_event" is suggested here as a convention intended
-to limit collisions. Marker names are global to the kernel: they are considered
-as being the same whether they are in the core kernel image or in modules.
-Conflicting format strings for markers with the same name will cause the markers
-to be detected to have a different format string not to be armed and will output
-a printk warning which identifies the inconsistency:
-
-"Format mismatch for probe probe_name (format), marker (format)"
-
-Another way to use markers is to simply define the marker without generating any
-function call to actually call into the marker. This is useful in combination
-with tracepoint probes in a scheme like this :
-
-void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
-
-DEFINE_MARKER_TP(marker_eventname, tracepoint_name, probe_tracepoint_name,
-	"arg1 %u pid %d");
-
-notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
-{
-	struct marker *marker = &GET_MARKER(kernel_irq_entry);
-	/* write data to trace buffers ... */
-}
-
-* Probe / marker example
-
-See the example provided in samples/markers/src
-
-Compile them with your kernel.
-
-Run, as root :
-modprobe marker-example (insmod order is not important)
-modprobe probe-example
-cat /proc/marker-example (returns an expected error)
-rmmod marker-example probe-example
-dmesg
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index ab8aef9bb8ea..8f079b865ad0 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -29,7 +29,6 @@
 #include <linux/poll.h>
 #include <linux/ptrace.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/time.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index bb5b77c66d05..4678078fede8 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -39,7 +39,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/marker.h>
 
 #include <asm/io.h>
 #include <asm/mmu_context.h>
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4af56036a6bf..b7bbb5ddd7ae 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/preempt.h>
-#include <linux/marker.h>
 #include <linux/msi.h>
 #include <asm/signal.h>
 
diff --git a/include/linux/marker.h b/include/linux/marker.h
deleted file mode 100644
index b85e74ca782f..000000000000
--- a/include/linux/marker.h
+++ /dev/null
@@ -1,221 +0,0 @@
-#ifndef _LINUX_MARKER_H
-#define _LINUX_MARKER_H
-
-/*
- * Code markup for dynamic and static tracing.
- *
- * See Documentation/marker.txt.
- *
- * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <stdarg.h>
-#include <linux/types.h>
-
-struct module;
-struct marker;
-
-/**
- * marker_probe_func - Type of a marker probe function
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @args: variable argument list pointer. Use a pointer to overcome C's
- *        inability to pass this around as a pointer in a portable manner in
- *        the callee otherwise.
- *
- * Type of marker probe functions. They receive the mdata and need to parse the
- * format string to recover the variable argument list.
- */
-typedef void marker_probe_func(void *probe_private, void *call_private,
-		const char *fmt, va_list *args);
-
-struct marker_probe_closure {
-	marker_probe_func *func;	/* Callback */
-	void *probe_private;		/* Private probe data */
-};
-
-struct marker {
-	const char *name;	/* Marker name */
-	const char *format;	/* Marker format string, describing the
-				 * variable argument list.
-				 */
-	char state;		/* Marker state. */
-	char ptype;		/* probe type : 0 : single, 1 : multi */
-				/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	const char *tp_name;	/* Optional tracepoint name */
-	void *tp_cb;		/* Optional tracepoint callback */
-} __attribute__((aligned(8)));
-
-#ifdef CONFIG_MARKERS
-
-#define _DEFINE_MARKER(name, tp_name_str, tp_cb, format)		\
-		static const char __mstrtab_##name[]			\
-		__attribute__((section("__markers_strings")))		\
-		= #name "\0" format;					\
-		static struct marker __mark_##name			\
-		__attribute__((section("__markers"), aligned(8))) =	\
-		{ __mstrtab_##name, &__mstrtab_##name[sizeof(#name)],	\
-		  0, 0, marker_probe_cb, { __mark_empty_function, NULL},\
-		  NULL, tp_name_str, tp_cb }
-
-#define DEFINE_MARKER(name, format)					\
-		_DEFINE_MARKER(name, NULL, NULL, format)
-
-#define DEFINE_MARKER_TP(name, tp_name, tp_cb, format)			\
-		_DEFINE_MARKER(name, #tp_name, tp_cb, format)
-
-/*
- * Note : the empty asm volatile with read constraint is used here instead of a
- * "used" attribute to fix a gcc 4.1.x bug.
- * Make sure the alignment of the structure in the __markers section will
- * not add unwanted padding between the beginning of the section and the
- * structure. Force alignment to the same alignment as the section start.
- *
- * The "generic" argument controls which marker enabling mechanism must be used.
- * If generic is true, a variable read is used.
- * If generic is false, immediate values are used.
- */
-#define __trace_mark(generic, name, call_private, format, args...)	\
-	do {								\
-		DEFINE_MARKER(name, format);				\
-		__mark_check_format(format, ## args);			\
-		if (unlikely(__mark_##name.state)) {			\
-			(*__mark_##name.call)				\
-				(&__mark_##name, call_private, ## args);\
-		}							\
-	} while (0)
-
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		DEFINE_MARKER_TP(name, tp_name, tp_cb, format);		\
-		__mark_check_format(format, ## args);			\
-		(*__mark_##name.call)(&__mark_##name, call_private,	\
-					## args);			\
-	} while (0)
-
-extern void marker_update_probe_range(struct marker *begin,
-	struct marker *end);
-
-#define GET_MARKER(name)	(__mark_##name)
-
-#else /* !CONFIG_MARKERS */
-#define DEFINE_MARKER(name, tp_name, tp_cb, format)
-#define __trace_mark(generic, name, call_private, format, args...) \
-		__mark_check_format(format, ## args)
-#define __trace_mark_tp(name, call_private, tp_name, tp_cb, format, args...) \
-	do {								\
-		void __check_tp_type(void)				\
-		{							\
-			register_trace_##tp_name(tp_cb);		\
-		}							\
-		__mark_check_format(format, ## args);			\
-	} while (0)
-static inline void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{ }
-#define GET_MARKER(name)
-#endif /* CONFIG_MARKERS */
-
-/**
- * trace_mark - Marker using code patching
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using optimized code patching technique (imv_read())
- * to be enabled when immediate values are present.
- */
-#define trace_mark(name, format, args...) \
-	__trace_mark(0, name, NULL, format, ## args)
-
-/**
- * _trace_mark - Marker using variable read
- * @name: marker name, not quoted.
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker using a standard memory read (_imv_read()) to be
- * enabled. Should be used for markers in code paths where instruction
- * modification based enabling is not welcome. (__init and __exit functions,
- * lockdep, some traps, printk).
- */
-#define _trace_mark(name, format, args...) \
-	__trace_mark(1, name, NULL, format, ## args)
-
-/**
- * trace_mark_tp - Marker in a tracepoint callback
- * @name: marker name, not quoted.
- * @tp_name: tracepoint name, not quoted.
- * @tp_cb: tracepoint callback. Should have an associated global symbol so it
- *         is not optimized away by the compiler (should not be static).
- * @format: format string
- * @args...: variable argument list
- *
- * Places a marker in a tracepoint callback.
- */
-#define trace_mark_tp(name, tp_name, tp_cb, format, args...)	\
-	__trace_mark_tp(name, NULL, tp_name, tp_cb, format, ## args)
-
-/**
- * MARK_NOARGS - Format string for a marker with no argument.
- */
-#define MARK_NOARGS " "
-
-/* To be used for string format validity checking with gcc */
-static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
-{
-}
-
-#define __mark_check_format(format, args...)				\
-	do {								\
-		if (0)							\
-			___mark_check_format(format, ## args);		\
-	} while (0)
-
-extern marker_probe_func __mark_empty_function;
-
-extern void marker_probe_cb(const struct marker *mdata,
-	void *call_private, ...);
-
-/*
- * Connect a probe to a marker.
- * private data pointer must be a valid allocated memory address, or NULL.
- */
-extern int marker_probe_register(const char *name, const char *format,
-				marker_probe_func *probe, void *probe_private);
-
-/*
- * Returns the private data given to marker_probe_register.
- */
-extern int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private);
-/*
- * Unregister a marker by providing the registered private data.
- */
-extern int marker_probe_unregister_private_data(marker_probe_func *probe,
-	void *probe_private);
-
-extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
-	int num);
-
-/*
- * marker_synchronize_unregister must be called between the last marker probe
- * unregistration and the first one of
- * - the end of module exit function
- * - the free of any resource used by the probes
- * to ensure the code and data are valid for any possibly running probes.
- */
-#define marker_synchronize_unregister() synchronize_sched()
-
-#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index f8f92d015efe..1c755b2f937d 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,7 +15,6 @@
 #include <linux/stringify.h>
 #include <linux/kobject.h>
 #include <linux/moduleparam.h>
-#include <linux/marker.h>
 #include <linux/tracepoint.h>
 
 #include <asm/local.h>
@@ -327,10 +326,6 @@ struct module
 	/* The command line arguments (may be mangled).  People like
 	   keeping pointers to this stuff */
 	char *args;
-#ifdef CONFIG_MARKERS
-	struct marker *markers;
-	unsigned int num_markers;
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	struct tracepoint *tracepoints;
 	unsigned int num_tracepoints;
@@ -535,8 +530,6 @@ int unregister_module_notifier(struct notifier_block * nb);
 
 extern void print_modules(void);
 
-extern void module_update_markers(void);
-
 extern void module_update_tracepoints(void);
 extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
 
@@ -651,10 +644,6 @@ static inline void print_modules(void)
 {
 }
 
-static inline void module_update_markers(void)
-{
-}
-
 static inline void module_update_tracepoints(void)
 {
 }
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d8a272..4cc0fa13d5eb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1054,13 +1054,6 @@ config PROFILING
 config TRACEPOINTS
 	bool
 
-config MARKERS
-	bool "Activate markers"
-	select TRACEPOINTS
-	help
-	  Place an empty function call at each marker site. Can be
-	  dynamically changed for a probe function.
-
 source "arch/Kconfig"
 
 config SLOW_WORK
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e27e3f9..7c9b0a585502 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -87,7 +87,6 @@ obj-$(CONFIG_RELAY) += relay.o
 obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
 obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
 obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
-obj-$(CONFIG_MARKERS) += marker.o
 obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
 obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
diff --git a/kernel/marker.c b/kernel/marker.c
deleted file mode 100644
index ea54f2647868..000000000000
--- a/kernel/marker.c
+++ /dev/null
@@ -1,930 +0,0 @@
-/*
- * Copyright (C) 2007 Mathieu Desnoyers
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/jhash.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/marker.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-
-extern struct marker __start___markers[];
-extern struct marker __stop___markers[];
-
-/* Set to 1 to enable marker debug output */
-static const int marker_debug;
-
-/*
- * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
- * and module markers and the hash table.
- */
-static DEFINE_MUTEX(markers_mutex);
-
-/*
- * Marker hash table, containing the active markers.
- * Protected by module_mutex.
- */
-#define MARKER_HASH_BITS 6
-#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
-static struct hlist_head marker_table[MARKER_TABLE_SIZE];
-
-/*
- * Note about RCU :
- * It is used to make sure every handler has finished using its private data
- * between two consecutive operation (add or remove) on a given marker.  It is
- * also used to delay the free of multiple probes array until a quiescent state
- * is reached.
- * marker entries modifications are protected by the markers_mutex.
- */
-struct marker_entry {
-	struct hlist_node hlist;
-	char *format;
-			/* Probe wrapper */
-	void (*call)(const struct marker *mdata, void *call_private, ...);
-	struct marker_probe_closure single;
-	struct marker_probe_closure *multi;
-	int refcount;	/* Number of times armed. 0 if disarmed. */
-	struct rcu_head rcu;
-	void *oldptr;
-	int rcu_pending;
-	unsigned char ptype:1;
-	unsigned char format_allocated:1;
-	char name[0];	/* Contains name'\0'format'\0' */
-};
-
-/**
- * __mark_empty_function - Empty probe callback
- * @probe_private: probe private data
- * @call_private: call site private data
- * @fmt: format string
- * @...: variable argument list
- *
- * Empty callback provided as a probe to the markers. By providing this to a
- * disabled marker, we make sure the  execution flow is always valid even
- * though the function pointer change and the marker enabling are two distinct
- * operations that modifies the execution flow of preemptible code.
- */
-notrace void __mark_empty_function(void *probe_private, void *call_private,
-	const char *fmt, va_list *args)
-{
-}
-EXPORT_SYMBOL_GPL(__mark_empty_function);
-
-/*
- * marker_probe_cb Callback that prepares the variable argument list for probes.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Since we do not use "typical" pointer based RCU in the 1 argument case, we
- * need to put a full smp_rmb() in this branch. This is why we do not use
- * rcu_dereference() for the pointer read.
- */
-notrace void marker_probe_cb(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;
-	char ptype;
-
-	/*
-	 * rcu_read_lock_sched does two things : disabling preemption to make
-	 * sure the teardown of the callbacks can be done correctly when they
-	 * are in modules and they insure RCU read coherency.
-	 */
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		va_start(args, call_private);
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-		va_end(args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++) {
-			va_start(args, call_private);
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-			va_end(args);
-		}
-	}
-	rcu_read_unlock_sched_notrace();
-}
-EXPORT_SYMBOL_GPL(marker_probe_cb);
-
-/*
- * marker_probe_cb Callback that does not prepare the variable argument list.
- * @mdata: pointer of type struct marker
- * @call_private: caller site private data
- * @...:  Variable argument list.
- *
- * Should be connected to markers "MARK_NOARGS".
- */
-static notrace void marker_probe_cb_noarg(const struct marker *mdata,
-		void *call_private, ...)
-{
-	va_list args;	/* not initialized */
-	char ptype;
-
-	rcu_read_lock_sched_notrace();
-	ptype = mdata->ptype;
-	if (likely(!ptype)) {
-		marker_probe_func *func;
-		/* Must read the ptype before ptr. They are not data dependant,
-		 * so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func = mdata->single.func;
-		/* Must read the ptr before private data. They are not data
-		 * dependant, so we put an explicit smp_rmb() here. */
-		smp_rmb();
-		func(mdata->single.probe_private, call_private, mdata->format,
-			&args);
-	} else {
-		struct marker_probe_closure *multi;
-		int i;
-		/*
-		 * Read mdata->ptype before mdata->multi.
-		 */
-		smp_rmb();
-		multi = mdata->multi;
-		/*
-		 * multi points to an array, therefore accessing the array
-		 * depends on reading multi. However, even in this case,
-		 * we must insure that the pointer is read _before_ the array
-		 * data. Same as rcu_dereference, but we need a full smp_rmb()
-		 * in the fast path, so put the explicit barrier here.
-		 */
-		smp_read_barrier_depends();
-		for (i = 0; multi[i].func; i++)
-			multi[i].func(multi[i].probe_private, call_private,
-				mdata->format, &args);
-	}
-	rcu_read_unlock_sched_notrace();
-}
-
-static void free_old_closure(struct rcu_head *head)
-{
-	struct marker_entry *entry = container_of(head,
-		struct marker_entry, rcu);
-	kfree(entry->oldptr);
-	/* Make sure we free the data before setting the pending flag to 0 */
-	smp_wmb();
-	entry->rcu_pending = 0;
-}
-
-static void debug_print_probes(struct marker_entry *entry)
-{
-	int i;
-
-	if (!marker_debug)
-		return;
-
-	if (!entry->ptype) {
-		printk(KERN_DEBUG "Single probe : %p %p\n",
-			entry->single.func,
-			entry->single.probe_private);
-	} else {
-		for (i = 0; entry->multi[i].func; i++)
-			printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
-				entry->multi[i].func,
-				entry->multi[i].probe_private);
-	}
-}
-
-static struct marker_probe_closure *
-marker_entry_add_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0;
-	struct marker_probe_closure *old, *new;
-
-	WARN_ON(!probe);
-
-	debug_print_probes(entry);
-	old = entry->multi;
-	if (!entry->ptype) {
-		if (entry->single.func == probe &&
-				entry->single.probe_private == probe_private)
-			return ERR_PTR(-EBUSY);
-		if (entry->single.func == __mark_empty_function) {
-			/* 0 -> 1 probes */
-			entry->single.func = probe;
-			entry->single.probe_private = probe_private;
-			entry->refcount = 1;
-			entry->ptype = 0;
-			debug_print_probes(entry);
-			return NULL;
-		} else {
-			/* 1 -> 2 probes */
-			nr_probes = 1;
-			old = NULL;
-		}
-	} else {
-		/* (N -> N+1), (N != 0, 1) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++)
-			if (old[nr_probes].func == probe
-					&& old[nr_probes].probe_private
-						== probe_private)
-				return ERR_PTR(-EBUSY);
-	}
-	/* + 2 : one for new probe, one for NULL func */
-	new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure),
-			GFP_KERNEL);
-	if (new == NULL)
-		return ERR_PTR(-ENOMEM);
-	if (!old)
-		new[0] = entry->single;
-	else
-		memcpy(new, old,
-			nr_probes * sizeof(struct marker_probe_closure));
-	new[nr_probes].func = probe;
-	new[nr_probes].probe_private = probe_private;
-	entry->refcount = nr_probes + 1;
-	entry->multi = new;
-	entry->ptype = 1;
-	debug_print_probes(entry);
-	return old;
-}
-
-static struct marker_probe_closure *
-marker_entry_remove_probe(struct marker_entry *entry,
-		marker_probe_func *probe, void *probe_private)
-{
-	int nr_probes = 0, nr_del = 0, i;
-	struct marker_probe_closure *old, *new;
-
-	old = entry->multi;
-
-	debug_print_probes(entry);
-	if (!entry->ptype) {
-		/* 0 -> N is an error */
-		WARN_ON(entry->single.func == __mark_empty_function);
-		/* 1 -> 0 probes */
-		WARN_ON(probe && entry->single.func != probe);
-		WARN_ON(entry->single.probe_private != probe_private);
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-		debug_print_probes(entry);
-		return NULL;
-	} else {
-		/* (N -> M), (N > 1, M >= 0) probes */
-		for (nr_probes = 0; old[nr_probes].func; nr_probes++) {
-			if ((!probe || old[nr_probes].func == probe)
-					&& old[nr_probes].probe_private
-						== probe_private)
-				nr_del++;
-		}
-	}
-
-	if (nr_probes - nr_del == 0) {
-		/* N -> 0, (N > 1) */
-		entry->single.func = __mark_empty_function;
-		entry->refcount = 0;
-		entry->ptype = 0;
-	} else if (nr_probes - nr_del == 1) {
-		/* N -> 1, (N > 1) */
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				entry->single = old[i];
-		entry->refcount = 1;
-		entry->ptype = 0;
-	} else {
-		int j = 0;
-		/* N -> M, (N > 1, M > 1) */
-		/* + 1 for NULL */
-		new = kzalloc((nr_probes - nr_del + 1)
-			* sizeof(struct marker_probe_closure), GFP_KERNEL);
-		if (new == NULL)
-			return ERR_PTR(-ENOMEM);
-		for (i = 0; old[i].func; i++)
-			if ((probe && old[i].func != probe) ||
-					old[i].probe_private != probe_private)
-				new[j++] = old[i];
-		entry->refcount = nr_probes - nr_del;
-		entry->ptype = 1;
-		entry->multi = new;
-	}
-	debug_print_probes(entry);
-	return old;
-}
-
-/*
- * Get marker if the marker is present in the marker hash table.
- * Must be called with markers_mutex held.
- * Returns NULL if not present.
- */
-static struct marker_entry *get_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	u32 hash = jhash(name, strlen(name), 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name))
-			return e;
-	}
-	return NULL;
-}
-
-/*
- * Add the marker to the marker hash table. Must be called with markers_mutex
- * held.
- */
-static struct marker_entry *add_marker(const char *name, const char *format)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	size_t format_len = 0;
-	u32 hash = jhash(name, name_len-1, 0);
-
-	if (format)
-		format_len = strlen(format) + 1;
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			printk(KERN_NOTICE
-				"Marker %s busy\n", name);
-			return ERR_PTR(-EBUSY);	/* Already there */
-		}
-	}
-	/*
-	 * Using kmalloc here to allocate a variable length element. Could
-	 * cause some memory fragmentation if overused.
-	 */
-	e = kmalloc(sizeof(struct marker_entry) + name_len + format_len,
-			GFP_KERNEL);
-	if (!e)
-		return ERR_PTR(-ENOMEM);
-	memcpy(&e->name[0], name, name_len);
-	if (format) {
-		e->format = &e->name[name_len];
-		memcpy(e->format, format, format_len);
-		if (strcmp(e->format, MARK_NOARGS) == 0)
-			e->call = marker_probe_cb_noarg;
-		else
-			e->call = marker_probe_cb;
-		trace_mark(core_marker_format, "name %s format %s",
-				e->name, e->format);
-	} else {
-		e->format = NULL;
-		e->call = marker_probe_cb;
-	}
-	e->single.func = __mark_empty_function;
-	e->single.probe_private = NULL;
-	e->multi = NULL;
-	e->ptype = 0;
-	e->format_allocated = 0;
-	e->refcount = 0;
-	e->rcu_pending = 0;
-	hlist_add_head(&e->hlist, head);
-	return e;
-}
-
-/*
- * Remove the marker from the marker hash table. Must be called with mutex_lock
- * held.
- */
-static int remove_marker(const char *name)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	int found = 0;
-	size_t len = strlen(name) + 1;
-	u32 hash = jhash(name, len-1, 0);
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			found = 1;
-			break;
-		}
-	}
-	if (!found)
-		return -ENOENT;
-	if (e->single.func != __mark_empty_function)
-		return -EBUSY;
-	hlist_del(&e->hlist);
-	if (e->format_allocated)
-		kfree(e->format);
-	/* Make sure the call_rcu has been executed */
-	if (e->rcu_pending)
-		rcu_barrier_sched();
-	kfree(e);
-	return 0;
-}
-
-/*
- * Set the mark_entry format to the format found in the element.
- */
-static int marker_set_format(struct marker_entry *entry, const char *format)
-{
-	entry->format = kstrdup(format, GFP_KERNEL);
-	if (!entry->format)
-		return -ENOMEM;
-	entry->format_allocated = 1;
-
-	trace_mark(core_marker_format, "name %s format %s",
-			entry->name, entry->format);
-	return 0;
-}
-
-/*
- * Sets the probe callback corresponding to one marker.
- */
-static int set_marker(struct marker_entry *entry, struct marker *elem,
-		int active)
-{
-	int ret = 0;
-	WARN_ON(strcmp(entry->name, elem->name) != 0);
-
-	if (entry->format) {
-		if (strcmp(entry->format, elem->format) != 0) {
-			printk(KERN_NOTICE
-				"Format mismatch for probe %s "
-				"(%s), marker (%s)\n",
-				entry->name,
-				entry->format,
-				elem->format);
-			return -EPERM;
-		}
-	} else {
-		ret = marker_set_format(entry, elem->format);
-		if (ret)
-			return ret;
-	}
-
-	/*
-	 * probe_cb setup (statically known) is done here. It is
-	 * asynchronous with the rest of execution, therefore we only
-	 * pass from a "safe" callback (with argument) to an "unsafe"
-	 * callback (does not set arguments).
-	 */
-	elem->call = entry->call;
-	/*
-	 * Sanity check :
-	 * We only update the single probe private data when the ptr is
-	 * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
-	 */
-	WARN_ON(elem->single.func != __mark_empty_function
-		&& elem->single.probe_private != entry->single.probe_private
-		&& !elem->ptype);
-	elem->single.probe_private = entry->single.probe_private;
-	/*
-	 * Make sure the private data is valid when we update the
-	 * single probe ptr.
-	 */
-	smp_wmb();
-	elem->single.func = entry->single.func;
-	/*
-	 * We also make sure that the new probe callbacks array is consistent
-	 * before setting a pointer to it.
-	 */
-	rcu_assign_pointer(elem->multi, entry->multi);
-	/*
-	 * Update the function or multi probe array pointer before setting the
-	 * ptype.
-	 */
-	smp_wmb();
-	elem->ptype = entry->ptype;
-
-	if (elem->tp_name && (active ^ elem->state)) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-
-		if (active) {
-			/*
-			 * try_module_get should always succeed because we hold
-			 * lock_module() to get the tp_cb address.
-			 */
-			ret = try_module_get(__module_text_address(
-				(unsigned long)elem->tp_cb));
-			BUG_ON(!ret);
-			ret = tracepoint_probe_register_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-		} else {
-			ret = tracepoint_probe_unregister_noupdate(
-				elem->tp_name,
-				elem->tp_cb);
-			/*
-			 * tracepoint_probe_update_all() must be called
-			 * before the module containing tp_cb is unloaded.
-			 */
-			module_put(__module_text_address(
-				(unsigned long)elem->tp_cb));
-		}
-	}
-	elem->state = active;
-
-	return ret;
-}
-
-/*
- * Disable a marker and its probe callback.
- * Note: only waiting an RCU period after setting elem->call to the empty
- * function insures that the original callback is not used anymore. This insured
- * by rcu_read_lock_sched around the call site.
- */
-static void disable_marker(struct marker *elem)
-{
-	int ret;
-
-	/* leave "call" as is. It is known statically. */
-	if (elem->tp_name && elem->state) {
-		WARN_ON(!elem->tp_cb);
-		/*
-		 * It is ok to directly call the probe registration because type
-		 * checking has been done in the __trace_mark_tp() macro.
-		 */
-		ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
-			elem->tp_cb);
-		WARN_ON(ret);
-		/*
-		 * tracepoint_probe_update_all() must be called
-		 * before the module containing tp_cb is unloaded.
-		 */
-		module_put(__module_text_address((unsigned long)elem->tp_cb));
-	}
-	elem->state = 0;
-	elem->single.func = __mark_empty_function;
-	/* Update the function before setting the ptype */
-	smp_wmb();
-	elem->ptype = 0;	/* single probe */
-	/*
-	 * Leave the private data and id there, because removal is racy and
-	 * should be done only after an RCU period. These are never used until
-	 * the next initialization anyway.
-	 */
-}
-
-/**
- * marker_update_probe_range - Update a probe range
- * @begin: beginning of the range
- * @end: end of the range
- *
- * Updates the probe callback corresponding to a range of markers.
- */
-void marker_update_probe_range(struct marker *begin,
-	struct marker *end)
-{
-	struct marker *iter;
-	struct marker_entry *mark_entry;
-
-	mutex_lock(&markers_mutex);
-	for (iter = begin; iter < end; iter++) {
-		mark_entry = get_marker(iter->name);
-		if (mark_entry) {
-			set_marker(mark_entry, iter, !!mark_entry->refcount);
-			/*
-			 * ignore error, continue
-			 */
-		} else {
-			disable_marker(iter);
-		}
-	}
-	mutex_unlock(&markers_mutex);
-}
-
-/*
- * Update probes, removing the faulty probes.
- *
- * Internal callback only changed before the first probe is connected to it.
- * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
- * transitions.  All other transitions will leave the old private data valid.
- * This makes the non-atomicity of the callback/private data updates valid.
- *
- * "special case" updates :
- * 0 -> 1 callback
- * 1 -> 0 callback
- * 1 -> 2 callbacks
- * 2 -> 1 callbacks
- * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
- * Site effect : marker_set_format may delete the marker entry (creating a
- * replacement).
- */
-static void marker_update_probes(void)
-{
-	/* Core kernel markers */
-	marker_update_probe_range(__start___markers, __stop___markers);
-	/* Markers in modules. */
-	module_update_markers();
-	tracepoint_probe_update_all();
-}
-
-/**
- * marker_probe_register -  Connect a probe to a marker
- * @name: marker name
- * @format: format string
- * @probe: probe handler
- * @probe_private: probe private data
- *
- * private data must be a valid allocated memory address, or NULL.
- * Returns 0 if ok, error value on error.
- * The probe address must at least be aligned on the architecture pointer size.
- */
-int marker_probe_register(const char *name, const char *format,
-			marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry) {
-		entry = add_marker(name, format);
-		if (IS_ERR(entry))
-			ret = PTR_ERR(entry);
-	} else if (format) {
-		if (!entry->format)
-			ret = marker_set_format(entry, format);
-		else if (strcmp(entry->format, format))
-			ret = -EPERM;
-	}
-	if (ret)
-		goto end;
-
-	/*
-	 * If we detect that a call_rcu is pending for this marker,
-	 * make sure it's executed now.
-	 */
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_add_probe(entry, probe, probe_private);
-	if (IS_ERR(old)) {
-		ret = PTR_ERR(old);
-		goto end;
-	}
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_register);
-
-/**
- * marker_probe_unregister -  Disconnect a probe from a marker
- * @name: marker name
- * @probe: probe function pointer
- * @probe_private: probe private data
- *
- * Returns the private data given to marker_probe_register, or an ERR_PTR().
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister(const char *name,
-	marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	struct marker_probe_closure *old;
-	int ret = -ENOENT;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, probe, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker(name);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(name);	/* Ignore busy error message */
-	ret = 0;
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister);
-
-static struct marker_entry *
-get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
-{
-	struct marker_entry *entry;
-	unsigned int i;
-	struct hlist_head *head;
-	struct hlist_node *node;
-
-	for (i = 0; i < MARKER_TABLE_SIZE; i++) {
-		head = &marker_table[i];
-		hlist_for_each_entry(entry, node, head, hlist) {
-			if (!entry->ptype) {
-				if (entry->single.func == probe
-						&& entry->single.probe_private
-						== probe_private)
-					return entry;
-			} else {
-				struct marker_probe_closure *closure;
-				closure = entry->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func == probe &&
-							closure[i].probe_private
-							== probe_private)
-						return entry;
-				}
-			}
-		}
-	}
-	return NULL;
-}
-
-/**
- * marker_probe_unregister_private_data -  Disconnect a probe from a marker
- * @probe: probe function
- * @probe_private: probe private data
- *
- * Unregister a probe by providing the registered private data.
- * Only removes the first marker found in hash table.
- * Return 0 on success or error value.
- * We do not need to call a synchronize_sched to make sure the probes have
- * finished running before doing a module unload, because the module unload
- * itself uses stop_machine(), which insures that every preempt disabled section
- * have finished.
- */
-int marker_probe_unregister_private_data(marker_probe_func *probe,
-		void *probe_private)
-{
-	struct marker_entry *entry;
-	int ret = 0;
-	struct marker_probe_closure *old;
-
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry) {
-		ret = -ENOENT;
-		goto end;
-	}
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	old = marker_entry_remove_probe(entry, NULL, probe_private);
-	mutex_unlock(&markers_mutex);
-	marker_update_probes();
-	mutex_lock(&markers_mutex);
-	entry = get_marker_from_private_data(probe, probe_private);
-	if (!entry)
-		goto end;
-	if (entry->rcu_pending)
-		rcu_barrier_sched();
-	entry->oldptr = old;
-	entry->rcu_pending = 1;
-	/* write rcu_pending before calling the RCU callback */
-	smp_wmb();
-	call_rcu_sched(&entry->rcu, free_old_closure);
-	remove_marker(entry->name);	/* Ignore busy error message */
-end:
-	mutex_unlock(&markers_mutex);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
-
-/**
- * marker_get_private_data - Get a marker's probe private data
- * @name: marker name
- * @probe: probe to match
- * @num: get the nth matching probe's private data
- *
- * Returns the nth private data pointer (starting from 0) matching, or an
- * ERR_PTR.
- * Returns the private data pointer, or an ERR_PTR.
- * The private data pointer should _only_ be dereferenced if the caller is the
- * owner of the data, or its content could vanish. This is mostly used to
- * confirm that a caller is the owner of a registered probe.
- */
-void *marker_get_private_data(const char *name, marker_probe_func *probe,
-		int num)
-{
-	struct hlist_head *head;
-	struct hlist_node *node;
-	struct marker_entry *e;
-	size_t name_len = strlen(name) + 1;
-	u32 hash = jhash(name, name_len-1, 0);
-	int i;
-
-	head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
-	hlist_for_each_entry(e, node, head, hlist) {
-		if (!strcmp(name, e->name)) {
-			if (!e->ptype) {
-				if (num == 0 && e->single.func == probe)
-					return e->single.probe_private;
-			} else {
-				struct marker_probe_closure *closure;
-				int match = 0;
-				closure = e->multi;
-				for (i = 0; closure[i].func; i++) {
-					if (closure[i].func != probe)
-						continue;
-					if (match++ == num)
-						return closure[i].probe_private;
-				}
-			}
-			break;
-		}
-	}
-	return ERR_PTR(-ENOENT);
-}
-EXPORT_SYMBOL_GPL(marker_get_private_data);
-
-#ifdef CONFIG_MODULES
-
-int marker_module_notify(struct notifier_block *self,
-			 unsigned long val, void *data)
-{
-	struct module *mod = data;
-
-	switch (val) {
-	case MODULE_STATE_COMING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	case MODULE_STATE_GOING:
-		marker_update_probe_range(mod->markers,
-			mod->markers + mod->num_markers);
-		break;
-	}
-	return 0;
-}
-
-struct notifier_block marker_module_nb = {
-	.notifier_call = marker_module_notify,
-	.priority = 0,
-};
-
-static int init_markers(void)
-{
-	return register_module_notifier(&marker_module_nb);
-}
-__initcall(init_markers);
-
-#endif /* CONFIG_MODULES */
diff --git a/kernel/module.c b/kernel/module.c
index 05ce49ced8f6..b6ee424245dd 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod,
 				  sizeof(*mod->ctors), &mod->num_ctors);
 #endif
 
-#ifdef CONFIG_MARKERS
-	mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers",
-				    sizeof(*mod->markers), &mod->num_markers);
-#endif
 #ifdef CONFIG_TRACEPOINTS
 	mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
 					"__tracepoints",
@@ -2958,20 +2954,6 @@ void module_layout(struct module *mod,
 EXPORT_SYMBOL(module_layout);
 #endif
 
-#ifdef CONFIG_MARKERS
-void module_update_markers(void)
-{
-	struct module *mod;
-
-	mutex_lock(&module_mutex);
-	list_for_each_entry(mod, &modules, list)
-		if (!mod->taints)
-			marker_update_probe_range(mod->markers,
-				mod->markers + mod->num_markers);
-	mutex_unlock(&module_mutex);
-}
-#endif
-
 #ifdef CONFIG_TRACEPOINTS
 void module_update_tracepoints(void)
 {
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 687699d365ae..2547d8813cf0 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,7 +11,6 @@
 #include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/module.h>
-#include <linux/marker.h>
 #include <linux/mutex.h>
 #include <linux/ctype.h>
 #include <linux/list.h>
diff --git a/samples/Kconfig b/samples/Kconfig
index 428b065ba695..b92bde3c6a89 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -7,12 +7,6 @@ menuconfig SAMPLES
 
 if SAMPLES
 
-config SAMPLE_MARKERS
-	tristate "Build markers examples -- loadable modules only"
-	depends on MARKERS && m
-	help
-	  This build markers example modules.
-
 config SAMPLE_TRACEPOINTS
 	tristate "Build tracepoints examples -- loadable modules only"
 	depends on TRACEPOINTS && m
diff --git a/samples/Makefile b/samples/Makefile
index 13e4b470b539..43343a03b1f4 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)	+= markers/ kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES)	+= kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
deleted file mode 100644
index 6d7231265f0f..000000000000
--- a/samples/markers/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# builds the kprobes example kernel modules;
-# then to use one (as root):  insmod <module_name.ko>
-
-obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
deleted file mode 100644
index e9cd9c0bc84f..000000000000
--- a/samples/markers/marker-example.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* marker-example.c
- *
- * Executes a marker when /proc/marker-example is opened.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-
-struct proc_dir_entry *pentry_example;
-
-static int my_open(struct inode *inode, struct file *file)
-{
-	int i;
-
-	trace_mark(subsystem_event, "integer %d string %s", 123,
-		"example string");
-	for (i = 0; i < 10; i++)
-		trace_mark(subsystem_eventb, MARK_NOARGS);
-	return -EPERM;
-}
-
-static struct file_operations mark_ops = {
-	.open = my_open,
-};
-
-static int __init example_init(void)
-{
-	printk(KERN_ALERT "example init\n");
-	pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
-	if (!pentry_example)
-		return -EPERM;
-	return 0;
-}
-
-static void __exit example_exit(void)
-{
-	printk(KERN_ALERT "example exit\n");
-	remove_proc_entry("marker-example", NULL);
-}
-
-module_init(example_init)
-module_exit(example_exit)
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
deleted file mode 100644
index 2dfb3b32937e..000000000000
--- a/samples/markers/probe-example.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* probe-example.c
- *
- * Connects two functions to marker call sites.
- *
- * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
- *
- * This file is released under the GPLv2.
- * See the file COPYING for more details.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/marker.h>
-#include <asm/atomic.h>
-
-struct probe_data {
-	const char *name;
-	const char *format;
-	marker_probe_func *probe_func;
-};
-
-void probe_subsystem_event(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Declare args */
-	unsigned int value;
-	const char *mystr;
-
-	/* Assign args */
-	value = va_arg(*args, typeof(value));
-	mystr = va_arg(*args, typeof(mystr));
-
-	/* Call printk */
-	printk(KERN_INFO "Value %u, string %s\n", value, mystr);
-
-	/* or count, check rights, serialize data in a buffer */
-}
-
-atomic_t eventb_count = ATOMIC_INIT(0);
-
-void probe_subsystem_eventb(void *probe_data, void *call_data,
-	const char *format, va_list *args)
-{
-	/* Increment counter */
-	atomic_inc(&eventb_count);
-}
-
-static struct probe_data probe_array[] =
-{
-	{	.name = "subsystem_event",
-		.format = "integer %d string %s",
-		.probe_func = probe_subsystem_event },
-	{	.name = "subsystem_eventb",
-		.format = MARK_NOARGS,
-		.probe_func = probe_subsystem_eventb },
-};
-
-static int __init probe_init(void)
-{
-	int result;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
-		result = marker_probe_register(probe_array[i].name,
-				probe_array[i].format,
-				probe_array[i].probe_func, &probe_array[i]);
-		if (result)
-			printk(KERN_INFO "Unable to register probe %s\n",
-				probe_array[i].name);
-	}
-	return 0;
-}
-
-static void __exit probe_fini(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(probe_array); i++)
-		marker_probe_unregister(probe_array[i].name,
-			probe_array[i].probe_func, &probe_array[i]);
-	printk(KERN_INFO "Number of event b : %u\n",
-			atomic_read(&eventb_count));
-	marker_synchronize_unregister();
-}
-
-module_init(probe_init);
-module_exit(probe_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Mathieu Desnoyers");
-MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index f4053dc7b5d6..8f14c81abbc7 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,7 +13,6 @@
 # 2) modpost is then used to
 # 3)  create one <module>.mod.c file pr. module
 # 4)  create one Module.symvers file with CRC for all exported symbols
-# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
 # 5) compile all <module>.mod.c files
 # 6) final link of the module to a <module.ko> file
 
@@ -59,10 +58,6 @@ include scripts/Makefile.lib
 
 kernelsymfile := $(objtree)/Module.symvers
 modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
-kernelmarkersfile := $(objtree)/Module.markers
-modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
-
-markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
 
 # Step 1), find all modules listed in $(MODVERDIR)/
 __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -85,8 +80,6 @@ modpost = scripts/mod/modpost                    \
  $(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
  $(if $(KBUILD_EXTMOD),-o $(modulesymfile))      \
  $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S)      \
- $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
- $(if $(CONFIG_MARKERS),-M $(markersfile))	 \
  $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
  $(if $(cross_build),-c)
 
@@ -101,17 +94,12 @@ quiet_cmd_kernel-mod = MODPOST $@
       cmd_kernel-mod = $(modpost) $@
 
 vmlinux.o: FORCE
-	@rm -fr $(kernelmarkersfile)
 	$(call cmd,kernel-mod)
 
 # Declare generated files as targets for modpost
 $(symverfile):         __modpost ;
 $(modules:.ko=.mod.c): __modpost ;
 
-ifdef CONFIG_MARKERS
-$(markersfile):	       __modpost ;
-endif
-
 
 # Step 5), compile all *.mod.c files
 
-- 
cgit v1.2.3


From 6a891a3111fe701517bb31c2204304724c7299c8 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Fri, 18 Sep 2009 22:45:43 +0200
Subject: i2c: Drop unused i2c_driver.id field

Nobody is using i2c_driver.id any longer, so we can drop that field.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 include/linux/i2c-id.h | 11 -----------
 include/linux/i2c.h    |  2 --
 2 files changed, 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c9087de5c6c6..e844a0b18695 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -27,17 +27,6 @@
    legacy chip driver needs to identify a bus or a bus driver needs to
    identify a legacy client. If you don't need them, just don't set them. */
 
-/*
- * ---- Driver types -----------------------------------------------------
- */
-
-#define I2C_DRIVERID_MSP3400	 1
-#define I2C_DRIVERID_TUNER	 2
-#define I2C_DRIVERID_TDA7432	27	/* Stereo sound processor	*/
-#define I2C_DRIVERID_TVAUDIO    29      /* Generic TV sound driver      */
-#define I2C_DRIVERID_SAA711X	73	/* saa711x video encoders	*/
-#define I2C_DRIVERID_INFRARED	75	/* I2C InfraRed on Video boards */
-
 /*
  * ---- Adapter types ----------------------------------------------------
  */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index f4784c0fe975..57d41b0abce2 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -98,7 +98,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
 
 /**
  * struct i2c_driver - represent an I2C device driver
- * @id: Unique driver ID (optional)
  * @class: What kind of i2c device we instantiate (for detect)
  * @attach_adapter: Callback for bus addition (for legacy drivers)
  * @detach_adapter: Callback for bus removal (for legacy drivers)
@@ -135,7 +134,6 @@ extern s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
  * not allowed.
  */
 struct i2c_driver {
-	int id;
 	unsigned int class;
 
 	/* Notifies the driver that a new bus has appeared or is about to be
-- 
cgit v1.2.3


From 76b3e28fa728bb68895cbd8375f5ce233bd891de Mon Sep 17 00:00:00 2001
From: Crane Cai <crane.cai@amd.com>
Date: Fri, 18 Sep 2009 22:45:50 +0200
Subject: i2c-piix4: Add AMD SB900 SMBus device ID

Add new SMBus device ID for AMD SB900.

Signed-off-by: Crane Cai <crane.cai@amd.com>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
---
 Documentation/i2c/busses/i2c-piix4 | 2 ++
 drivers/i2c/busses/Kconfig         | 3 ++-
 drivers/i2c/busses/i2c-piix4.c     | 9 ++++++---
 include/linux/pci_ids.h            | 1 +
 4 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index f889481762b5..c5b37c570554 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -8,6 +8,8 @@ Supported adapters:
     Datasheet: Only available via NDA from ServerWorks
   * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
     Datasheet: Not publicly available
+  * AMD SB900
+    Datasheet: Not publicly available
   * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
     Datasheet: Publicly available at the SMSC website http://www.smsc.com
 
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index 8206442fbabd..bb216c5fe30e 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -113,7 +113,7 @@ config I2C_ISCH
 	  will be called i2c-isch.
 
 config I2C_PIIX4
-	tristate "Intel PIIX4 and compatible (ATI/Serverworks/Broadcom/SMSC)"
+	tristate "Intel PIIX4 and compatible (ATI/AMD/Serverworks/Broadcom/SMSC)"
 	depends on PCI
 	help
 	  If you say yes to this option, support will be included for the Intel
@@ -128,6 +128,7 @@ config I2C_PIIX4
 	    ATI SB600
 	    ATI SB700
 	    ATI SB800
+	    AMD SB900
 	    Serverworks OSB4
 	    Serverworks CSB5
 	    Serverworks CSB6
diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c
index 0249a7d762b9..a782c7a08f9e 100644
--- a/drivers/i2c/busses/i2c-piix4.c
+++ b/drivers/i2c/busses/i2c-piix4.c
@@ -22,6 +22,7 @@
 	Intel PIIX4, 440MX
 	Serverworks OSB4, CSB5, CSB6, HT-1000, HT-1100
 	ATI IXP200, IXP300, IXP400, SB600, SB700, SB800
+	AMD SB900
 	SMSC Victory66
 
    Note: we assume there can only be one device, with one SMBus interface.
@@ -479,6 +480,7 @@ static struct pci_device_id piix4_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP300_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_SB900_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
 		     PCI_DEVICE_ID_SERVERWORKS_OSB4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_SERVERWORKS,
@@ -499,9 +501,10 @@ static int __devinit piix4_probe(struct pci_dev *dev,
 {
 	int retval;
 
-	if ((dev->vendor == PCI_VENDOR_ID_ATI) &&
-	    (dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS) &&
-	    (dev->revision >= 0x40))
+	if ((dev->vendor == PCI_VENDOR_ID_ATI &&
+	     dev->device == PCI_DEVICE_ID_ATI_SBX00_SMBUS &&
+	     dev->revision >= 0x40) ||
+	    dev->vendor == PCI_VENDOR_ID_AMD)
 		/* base address location etc changed in SB800 */
 		retval = piix4_setup_sb800(dev, id);
 	else
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 3b6b788fe2b5..7803565aa877 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -543,6 +543,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
+#define PCI_DEVICE_ID_AMD_SB900_SMBUS	0x780b
 #define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
-- 
cgit v1.2.3


From ee2b805c8eb6459cf541ef141ff70dae17af59ca Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Sat, 15 Aug 2009 15:12:05 +0100
Subject: ARM: 5678/1: SSP/SPI PL022 polarity terminology fix

The definition of the SPI clock phase for the Motorola mode of
the PL022 driver was incorrect: the spec had been interpreted as
data being recieved on rising or falling edge of the clocks while
the correct interpretation is that data can be recieved on the
first or second edge transition, falling or rising depending on
the polarity setting.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/mach-u300/spi.c   | 2 +-
 drivers/spi/amba-pl022.c   | 8 ++++----
 include/linux/amba/pl022.h | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-u300/spi.c b/arch/arm/mach-u300/spi.c
index 307d007ea7f3..f0e887bea30e 100644
--- a/arch/arm/mach-u300/spi.c
+++ b/arch/arm/mach-u300/spi.c
@@ -48,7 +48,7 @@ struct pl022_config_chip dummy_chip_info = {
 	.data_size = SSP_DATA_BITS_8, /* used to be 12 in some default */
 	.rx_lev_trig = SSP_RX_1_OR_MORE_ELEM,
 	.tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC,
-	.clk_phase = SSP_CLK_FALLING_EDGE,
+	.clk_phase = SSP_CLK_SECOND_EDGE,
 	.clk_pol = SSP_CLK_POL_IDLE_LOW,
 	.ctrl_len = SSP_BITS_12,
 	.wait_state = SSP_MWIRE_WAIT_ZERO,
diff --git a/drivers/spi/amba-pl022.c b/drivers/spi/amba-pl022.c
index da76797ce8b9..35521af0d0d7 100644
--- a/drivers/spi/amba-pl022.c
+++ b/drivers/spi/amba-pl022.c
@@ -534,7 +534,7 @@ static void restore_state(struct pl022 *pl022)
 	GEN_MASK_BITS(SSP_DATA_BITS_12, SSP_CR0_MASK_DSS, 0)	| \
 	GEN_MASK_BITS(SSP_MICROWIRE_CHANNEL_FULL_DUPLEX, SSP_CR0_MASK_HALFDUP, 5) | \
 	GEN_MASK_BITS(SSP_CLK_POL_IDLE_LOW, SSP_CR0_MASK_SPO, 6) | \
-	GEN_MASK_BITS(SSP_CLK_FALLING_EDGE, SSP_CR0_MASK_SPH, 7) | \
+	GEN_MASK_BITS(SSP_CLK_SECOND_EDGE, SSP_CR0_MASK_SPH, 7) | \
 	GEN_MASK_BITS(NMDK_SSP_DEFAULT_CLKRATE, SSP_CR0_MASK_SCR, 8) | \
 	GEN_MASK_BITS(SSP_BITS_8, SSP_CR0_MASK_CSS, 16)	| \
 	GEN_MASK_BITS(SSP_INTERFACE_MOTOROLA_SPI, SSP_CR0_MASK_FRF, 21) \
@@ -1249,8 +1249,8 @@ static int verify_controller_parameters(struct pl022 *pl022,
 		return -EINVAL;
 	}
 	if (chip_info->iface == SSP_INTERFACE_MOTOROLA_SPI) {
-		if ((chip_info->clk_phase != SSP_CLK_RISING_EDGE)
-		    && (chip_info->clk_phase != SSP_CLK_FALLING_EDGE)) {
+		if ((chip_info->clk_phase != SSP_CLK_FIRST_EDGE)
+		    && (chip_info->clk_phase != SSP_CLK_SECOND_EDGE)) {
 			dev_err(chip_info->dev,
 				"Clock Phase is configured incorrectly\n");
 			return -EINVAL;
@@ -1487,7 +1487,7 @@ static int pl022_setup(struct spi_device *spi)
 		chip_info->data_size = SSP_DATA_BITS_12;
 		chip_info->rx_lev_trig = SSP_RX_1_OR_MORE_ELEM;
 		chip_info->tx_lev_trig = SSP_TX_1_OR_MORE_EMPTY_LOC;
-		chip_info->clk_phase = SSP_CLK_FALLING_EDGE;
+		chip_info->clk_phase = SSP_CLK_SECOND_EDGE;
 		chip_info->clk_pol = SSP_CLK_POL_IDLE_LOW;
 		chip_info->ctrl_len = SSP_BITS_8;
 		chip_info->wait_state = SSP_MWIRE_WAIT_ZERO;
diff --git a/include/linux/amba/pl022.h b/include/linux/amba/pl022.h
index dcad0ffd1755..e4836c6b3dd7 100644
--- a/include/linux/amba/pl022.h
+++ b/include/linux/amba/pl022.h
@@ -136,12 +136,12 @@ enum ssp_tx_level_trig {
 
 /**
  * enum SPI Clock Phase - clock phase (Motorola SPI interface only)
- * @SSP_CLK_RISING_EDGE: Receive data on rising edge
- * @SSP_CLK_FALLING_EDGE: Receive data on falling edge
+ * @SSP_CLK_FIRST_EDGE: Receive data on first edge transition (actual direction depends on polarity)
+ * @SSP_CLK_SECOND_EDGE: Receive data on second edge transition (actual direction depends on polarity)
  */
 enum ssp_spi_clk_phase {
-	SSP_CLK_RISING_EDGE,
-	SSP_CLK_FALLING_EDGE
+	SSP_CLK_FIRST_EDGE,
+	SSP_CLK_SECOND_EDGE
 };
 
 /**
-- 
cgit v1.2.3


From b6e760f3097501e60e76fbcb7a313d42da930c1f Mon Sep 17 00:00:00 2001
From: Patrick Boettcher <pboettcher@dibcom.fr>
Date: Mon, 3 Aug 2009 14:39:15 -0300
Subject: V4L/DVB (12892): DVB-API: add support for ISDB-T and ISDB-Tsb
 (version 5.1)

This patch increments the DVB-API to version 5.1 in order to reflect the addition of ISDB-T and ISDB-Tsb on Linux' DVB-API.

Changes in detail:
- added a small document to describe how to use the API to tune to an ISDB-T or ISDB-Tsb channel
- added necessary fields to dtv_frontend_cache
- added a smarter clear-cache function which resets all fields of the dtv_frontend_cache
- added a TRANSMISSION_MODE_4K to fe_transmit_mode_t

Signed-off-by: Olivier Grenie <olgrenie@dibcom.fr>
Signed-off-by: Patrick Boettcher <pboettcher@dibcom.fr>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 202 +++++++++++++++++++++++++++++-
 drivers/media/dvb/dvb-core/dvb_frontend.h |  14 +++
 include/linux/dvb/frontend.h              |  44 +++++--
 include/linux/dvb/version.h               |   2 +-
 4 files changed, 247 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index d13ebcb0c6b6..826080416c93 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -850,6 +850,49 @@ static int dvb_frontend_check_parameters(struct dvb_frontend *fe,
 	return 0;
 }
 
+static int dvb_frontend_clear_cache(struct dvb_frontend *fe)
+{
+	int i;
+
+	memset(&(fe->dtv_property_cache), 0,
+			sizeof(struct dtv_frontend_properties));
+
+	fe->dtv_property_cache.state = DTV_CLEAR;
+	fe->dtv_property_cache.delivery_system = SYS_UNDEFINED;
+	fe->dtv_property_cache.inversion = INVERSION_AUTO;
+	fe->dtv_property_cache.fec_inner = FEC_AUTO;
+	fe->dtv_property_cache.transmission_mode = TRANSMISSION_MODE_AUTO;
+	fe->dtv_property_cache.bandwidth_hz = BANDWIDTH_AUTO;
+	fe->dtv_property_cache.guard_interval = GUARD_INTERVAL_AUTO;
+	fe->dtv_property_cache.hierarchy = HIERARCHY_AUTO;
+	fe->dtv_property_cache.symbol_rate = QAM_AUTO;
+	fe->dtv_property_cache.code_rate_HP = FEC_AUTO;
+	fe->dtv_property_cache.code_rate_LP = FEC_AUTO;
+
+	fe->dtv_property_cache.isdbt_partial_reception = -1;
+	fe->dtv_property_cache.isdbt_sb_mode = -1;
+	fe->dtv_property_cache.isdbt_sb_subchannel = -1;
+	fe->dtv_property_cache.isdbt_sb_segment_idx = -1;
+	fe->dtv_property_cache.isdbt_sb_segment_count = -1;
+	fe->dtv_property_cache.isdbt_layer_enabled = 0x7;
+	for (i = 0; i < 3; i++) {
+		fe->dtv_property_cache.layer[i].fec = FEC_AUTO;
+		fe->dtv_property_cache.layer[i].modulation = QAM_AUTO;
+		fe->dtv_property_cache.layer[i].interleaving = -1;
+		fe->dtv_property_cache.layer[i].segment_count = -1;
+	}
+
+	return 0;
+}
+
+#define _DTV_CMD(n, s, b) \
+[n] = { \
+	.name = #n, \
+	.cmd  = n, \
+	.set  = s,\
+	.buffer = b \
+}
+
 static struct dtv_cmds_h dtv_cmds[] = {
 	[DTV_TUNE] = {
 		.name	= "DTV_TUNE",
@@ -949,6 +992,43 @@ static struct dtv_cmds_h dtv_cmds[] = {
 		.cmd	= DTV_TRANSMISSION_MODE,
 		.set	= 1,
 	},
+
+	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX, 1, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_FEC, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 1, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 1, 0),
+
+	_DTV_CMD(DTV_ISDBT_PARTIAL_RECEPTION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SOUND_BROADCASTING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SUBCHANNEL_ID, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_IDX, 0, 0),
+	_DTV_CMD(DTV_ISDBT_SB_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERA_TIME_INTERLEAVING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERB_TIME_INTERLEAVING, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_FEC, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_MODULATION, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 0, 0),
+	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 0, 0),
+
 	/* Get */
 	[DTV_DISEQC_SLAVE_REPLY] = {
 		.name	= "DTV_DISEQC_SLAVE_REPLY",
@@ -956,6 +1036,7 @@ static struct dtv_cmds_h dtv_cmds[] = {
 		.set	= 0,
 		.buffer	= 1,
 	},
+
 	[DTV_API_VERSION] = {
 		.name	= "DTV_API_VERSION",
 		.cmd	= DTV_API_VERSION,
@@ -1165,14 +1246,21 @@ static void dtv_property_adv_params_sync(struct dvb_frontend *fe)
 	if(c->delivery_system == SYS_ISDBT) {
 		/* Fake out a generic DVB-T request so we pass validation in the ioctl */
 		p->frequency = c->frequency;
-		p->inversion = INVERSION_AUTO;
+		p->inversion = c->inversion;
 		p->u.ofdm.constellation = QAM_AUTO;
 		p->u.ofdm.code_rate_HP = FEC_AUTO;
 		p->u.ofdm.code_rate_LP = FEC_AUTO;
-		p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 		p->u.ofdm.transmission_mode = TRANSMISSION_MODE_AUTO;
 		p->u.ofdm.guard_interval = GUARD_INTERVAL_AUTO;
 		p->u.ofdm.hierarchy_information = HIERARCHY_AUTO;
+		if (c->bandwidth_hz == 8000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_8_MHZ;
+		else if (c->bandwidth_hz == 7000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_7_MHZ;
+		else if (c->bandwidth_hz == 6000000)
+			p->u.ofdm.bandwidth = BANDWIDTH_6_MHZ;
+		else
+			p->u.ofdm.bandwidth = BANDWIDTH_AUTO;
 	}
 }
 
@@ -1274,6 +1362,59 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_HIERARCHY:
 		tvp->u.data = fe->dtv_property_cache.hierarchy;
 		break;
+
+	/* ISDB-T Support here */
+	case DTV_ISDBT_PARTIAL_RECEPTION:
+		tvp->u.data = fe->dtv_property_cache.isdbt_partial_reception;
+		break;
+	case DTV_ISDBT_SOUND_BROADCASTING:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_mode;
+		break;
+	case DTV_ISDBT_SB_SUBCHANNEL_ID:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_subchannel;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_IDX:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_segment_idx;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.isdbt_sb_segment_count;
+		break;
+	case DTV_ISDBT_LAYERA_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[0].fec;
+		break;
+	case DTV_ISDBT_LAYERA_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[0].modulation;
+		break;
+	case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[0].segment_count;
+		break;
+	case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[0].interleaving;
+		break;
+	case DTV_ISDBT_LAYERB_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[1].fec;
+		break;
+	case DTV_ISDBT_LAYERB_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[1].modulation;
+		break;
+	case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[1].segment_count;
+		break;
+	case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[1].interleaving;
+		break;
+	case DTV_ISDBT_LAYERC_FEC:
+		tvp->u.data = fe->dtv_property_cache.layer[2].fec;
+		break;
+	case DTV_ISDBT_LAYERC_MODULATION:
+		tvp->u.data = fe->dtv_property_cache.layer[2].modulation;
+		break;
+	case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
+		tvp->u.data = fe->dtv_property_cache.layer[2].segment_count;
+		break;
+	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
+		tvp->u.data = fe->dtv_property_cache.layer[2].interleaving;
+		break;
 	default:
 		r = -1;
 	}
@@ -1302,10 +1443,8 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 		/* Reset a cache of data specific to the frontend here. This does
 		 * not effect hardware.
 		 */
+		dvb_frontend_clear_cache(fe);
 		dprintk("%s() Flushing property cache\n", __func__);
-		memset(&fe->dtv_property_cache, 0, sizeof(struct dtv_frontend_properties));
-		fe->dtv_property_cache.state = tvp->cmd;
-		fe->dtv_property_cache.delivery_system = SYS_UNDEFINED;
 		break;
 	case DTV_TUNE:
 		/* interpret the cache of data, build either a traditional frontend
@@ -1371,6 +1510,59 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_HIERARCHY:
 		fe->dtv_property_cache.hierarchy = tvp->u.data;
 		break;
+
+	/* ISDB-T Support here */
+	case DTV_ISDBT_PARTIAL_RECEPTION:
+		fe->dtv_property_cache.isdbt_partial_reception = tvp->u.data;
+		break;
+	case DTV_ISDBT_SOUND_BROADCASTING:
+		fe->dtv_property_cache.isdbt_sb_mode = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SUBCHANNEL_ID:
+		fe->dtv_property_cache.isdbt_sb_subchannel = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_IDX:
+		fe->dtv_property_cache.isdbt_sb_segment_idx = tvp->u.data;
+		break;
+	case DTV_ISDBT_SB_SEGMENT_COUNT:
+		fe->dtv_property_cache.isdbt_sb_segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_FEC:
+		fe->dtv_property_cache.layer[0].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_MODULATION:
+		fe->dtv_property_cache.layer[0].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[0].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERA_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[0].interleaving = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_FEC:
+		fe->dtv_property_cache.layer[1].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_MODULATION:
+		fe->dtv_property_cache.layer[1].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[1].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERB_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[1].interleaving = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_FEC:
+		fe->dtv_property_cache.layer[2].fec = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_MODULATION:
+		fe->dtv_property_cache.layer[2].modulation = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_SEGMENT_COUNT:
+		fe->dtv_property_cache.layer[2].segment_count = tvp->u.data;
+		break;
+	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
+		fe->dtv_property_cache.layer[2].interleaving = tvp->u.data;
+		break;
 	default:
 		r = -1;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index e176da472d7a..9e46f1772c54 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -341,6 +341,20 @@ struct dtv_frontend_properties {
 	fe_rolloff_t		rolloff;
 
 	fe_delivery_system_t	delivery_system;
+
+	/* ISDB-T specifics */
+	u8			isdbt_partial_reception;
+	u8			isdbt_sb_mode;
+	u8			isdbt_sb_subchannel;
+	u32			isdbt_sb_segment_idx;
+	u32			isdbt_sb_segment_count;
+	u8			isdbt_layer_enabled;
+	struct {
+	    u8			segment_count;
+	    fe_code_rate_t	fec;
+	    fe_modulation_t	modulation;
+	    u8			interleaving;
+	} layer[3];
 };
 
 struct dvb_frontend {
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 51c8d2d49e42..25b01c14727b 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -173,7 +173,8 @@ typedef enum fe_modulation {
 typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO
+	TRANSMISSION_MODE_AUTO,
+	TRANSMISSION_MODE_4K
 } fe_transmit_mode_t;
 
 typedef enum fe_bandwidth {
@@ -268,15 +269,40 @@ struct dvb_frontend_event {
 #define DTV_FE_CAPABILITY	16
 #define DTV_DELIVERY_SYSTEM	17
 
-#define DTV_API_VERSION				35
-#define DTV_API_VERSION				35
-#define DTV_CODE_RATE_HP			36
-#define DTV_CODE_RATE_LP			37
-#define DTV_GUARD_INTERVAL			38
-#define DTV_TRANSMISSION_MODE			39
-#define DTV_HIERARCHY				40
+/* ISDB-T and ISDB-Tsb */
+#define DTV_ISDBT_PARTIAL_RECEPTION	18
+#define DTV_ISDBT_SOUND_BROADCASTING	19
 
-#define DTV_MAX_COMMAND				DTV_HIERARCHY
+#define DTV_ISDBT_SB_SUBCHANNEL_ID	20
+#define DTV_ISDBT_SB_SEGMENT_IDX	21
+#define DTV_ISDBT_SB_SEGMENT_COUNT	22
+
+#define DTV_ISDBT_LAYERA_FEC			23
+#define DTV_ISDBT_LAYERA_MODULATION		24
+#define DTV_ISDBT_LAYERA_SEGMENT_COUNT		25
+#define DTV_ISDBT_LAYERA_TIME_INTERLEAVING	26
+
+#define DTV_ISDBT_LAYERB_FEC			27
+#define DTV_ISDBT_LAYERB_MODULATION		28
+#define DTV_ISDBT_LAYERB_SEGMENT_COUNT		29
+#define DTV_ISDBT_LAYERB_TIME_INTERLEAVING	30
+
+#define DTV_ISDBT_LAYERC_FEC			31
+#define DTV_ISDBT_LAYERC_MODULATION		32
+#define DTV_ISDBT_LAYERC_SEGMENT_COUNT		33
+#define DTV_ISDBT_LAYERC_TIME_INTERLEAVING	34
+
+#define DTV_API_VERSION		35
+
+#define DTV_CODE_RATE_HP	36
+#define DTV_CODE_RATE_LP	37
+#define DTV_GUARD_INTERVAL	38
+#define DTV_TRANSMISSION_MODE	39
+#define DTV_HIERARCHY		40
+
+#define DTV_ISDBT_LAYER_ENABLED	41
+
+#define DTV_MAX_COMMAND				DTV_ISDBT_LAYER_ENABLED
 
 typedef enum fe_pilot {
 	PILOT_ON,
diff --git a/include/linux/dvb/version.h b/include/linux/dvb/version.h
index 25b823b81734..540b0583d9fb 100644
--- a/include/linux/dvb/version.h
+++ b/include/linux/dvb/version.h
@@ -24,6 +24,6 @@
 #define _DVBVERSION_H_
 
 #define DVB_API_VERSION 5
-#define DVB_API_VERSION_MINOR 0
+#define DVB_API_VERSION_MINOR 1
 
 #endif /*_DVBVERSION_H_*/
-- 
cgit v1.2.3


From 9afef394308cf63ddb67b003a1c6036615456eb9 Mon Sep 17 00:00:00 2001
From: Steven Toth <stoth@kernellabs.com>
Date: Sat, 9 May 2009 13:24:12 -0300
Subject: V4L/DVB (12922): Add the SAA7164 I2C bus identifier

Add the SAA7164 I2C bus identifier

Signed-off-by: Steven Toth <stoth@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/i2c-id.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index c9087de5c6c6..271b67a3167e 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -53,6 +53,7 @@
 #define I2C_HW_B_AU0828		0x010023 /* auvitek au0828 usb bridge */
 #define I2C_HW_B_CX231XX	0x010024 /* Conexant CX231XX USB based cards */
 #define I2C_HW_B_HDPVR		0x010025 /* Hauppauge HD PVR */
+#define I2C_HW_B_SAA7164        0x010024 /* NXP 7164 based tv cards */
 
 /* --- SGI adapters							*/
 #define I2C_HW_SGI_VINO		0x160000
-- 
cgit v1.2.3


From e558170a91677d3065be3922bb4467d8969d875c Mon Sep 17 00:00:00 2001
From: Antti Palosaari <crope@iki.fi>
Date: Tue, 15 Sep 2009 14:37:20 -0300
Subject: V4L/DVB (12950): tuner-simple: add Philips CU1216L

add Philips CU1216L NIM

Signed-off-by: Antti Palosaari <crope@iki.fi>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c | 23 +++++++++++++++++++++++
 include/media/tuner.h                     |  1 +
 3 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index ba9fa679e2d3..3561b09fb416 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -79,3 +79,4 @@ tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
 tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
 tuner=81 - Partsnic (Daewoo) PTI-5NF05
+tuner=82 - Philips CU1216L
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index 5c6ef1e23c94..a2204df796ec 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1320,6 +1320,23 @@ static struct tuner_params tuner_partsnic_pti_5nf05_params[] = {
 	},
 };
 
+/* --------- TUNER_PHILIPS_CU1216L - DVB-C NIM ------------------------- */
+
+static struct tuner_range tuner_cu1216l_ranges[] = {
+	{ 16 * 160.25 /*MHz*/, 0xce, 0x01 },
+	{ 16 * 444.25 /*MHz*/, 0xce, 0x02 },
+	{ 16 * 999.99        , 0xce, 0x04 },
+};
+
+static struct tuner_params tuner_philips_cu1216l_params[] = {
+	{
+		.type   = TUNER_PARAM_TYPE_DIGITAL,
+		.ranges = tuner_cu1216l_ranges,
+		.count  = ARRAY_SIZE(tuner_cu1216l_ranges),
+		.iffreq = 16 * 36.125, /*MHz*/
+	},
+};
+
 /* --------------------------------------------------------------------- */
 
 struct tunertype tuners[] = {
@@ -1778,6 +1795,12 @@ struct tunertype tuners[] = {
 		.params = tuner_partsnic_pti_5nf05_params,
 		.count  = ARRAY_SIZE(tuner_partsnic_pti_5nf05_params),
 	},
+	[TUNER_PHILIPS_CU1216L] = {
+		.name = "Philips CU1216L",
+		.params = tuner_philips_cu1216l_params,
+		.count  = ARRAY_SIZE(tuner_philips_cu1216l_params),
+		.stepsize = 62500,
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/include/media/tuner.h b/include/media/tuner.h
index c146f2f530b0..b1f57e175e9a 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -127,6 +127,7 @@
 #define TUNER_PHILIPS_FM1216MK5		79
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
 #define TUNER_PARTSNIC_PTI_5NF05	81
+#define TUNER_PHILIPS_CU1216L           82
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From 93463895ae0a87b689d71d65c44d5ccdcd950dc4 Mon Sep 17 00:00:00 2001
From: Michael Krufky <mkrufky@kernellabs.com>
Date: Tue, 15 Sep 2009 23:04:18 -0300
Subject: V4L/DVB (12964): tuner-core: add support for NXP TDA18271 without
 TDA829X demod

Add support for NXP TDA18271 as a standalone tuner, allowing the use of
analog demodulators other than the Philips/NXP TDA829x.

Signed-off-by: Michael Krufky <mkrufky@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/CARDLIST.tuner  |  1 +
 drivers/media/common/tuners/tuner-types.c |  4 ++++
 drivers/media/video/tuner-core.c          | 12 ++++++++++++
 include/media/tuner.h                     |  1 +
 4 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
index 3561b09fb416..e0d298fe8830 100644
--- a/Documentation/video4linux/CARDLIST.tuner
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -80,3 +80,4 @@ tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
 tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
 tuner=81 - Partsnic (Daewoo) PTI-5NF05
 tuner=82 - Philips CU1216L
+tuner=83 - NXP TDA18271
diff --git a/drivers/media/common/tuners/tuner-types.c b/drivers/media/common/tuners/tuner-types.c
index a2204df796ec..2b876f3988c1 100644
--- a/drivers/media/common/tuners/tuner-types.c
+++ b/drivers/media/common/tuners/tuner-types.c
@@ -1801,6 +1801,10 @@ struct tunertype tuners[] = {
 		.count  = ARRAY_SIZE(tuner_philips_cu1216l_params),
 		.stepsize = 62500,
 	},
+	[TUNER_NXP_TDA18271] = {
+		.name   = "NXP TDA18271",
+		/* see tda18271-fe.c for details */
+	},
 };
 EXPORT_SYMBOL(tuners);
 
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 2816f1839230..aba92e2313d8 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -29,6 +29,7 @@
 #include "tuner-simple.h"
 #include "tda9887.h"
 #include "xc5000.h"
+#include "tda18271.h"
 
 #define UNSET (-1U)
 
@@ -420,6 +421,17 @@ static void set_type(struct i2c_client *c, unsigned int type,
 			goto attach_failed;
 		break;
 	}
+	case TUNER_NXP_TDA18271:
+	{
+		struct tda18271_config cfg = {
+			.config = t->config,
+		};
+
+		if (!dvb_attach(tda18271_attach, &t->fe, t->i2c->addr,
+				t->i2c->adapter, &cfg))
+			goto attach_failed;
+		break;
+	}
 	default:
 		if (!dvb_attach(simple_tuner_attach, &t->fe,
 				t->i2c->adapter, t->i2c->addr, t->type))
diff --git a/include/media/tuner.h b/include/media/tuner.h
index b1f57e175e9a..4d5b53ff17db 100644
--- a/include/media/tuner.h
+++ b/include/media/tuner.h
@@ -128,6 +128,7 @@
 #define TUNER_PHILIPS_FQ1216LME_MK3	80	/* Active loopthrough, no FM */
 #define TUNER_PARTSNIC_PTI_5NF05	81
 #define TUNER_PHILIPS_CU1216L           82
+#define TUNER_NXP_TDA18271		83
 
 /* tv card specific */
 #define TDA9887_PRESENT 		(1<<0)
-- 
cgit v1.2.3


From c9230457a98f33fe3658fd0bd9b9ceffdd97b63b Mon Sep 17 00:00:00 2001
From: Steven Toth <stoth@kernellabs.com>
Date: Thu, 17 Sep 2009 15:05:38 -0300
Subject: V4L/DVB (12974): SAA7164: Remove the SAA7164 bus id, no longer
 required.

SAA7164: Remove the SAA7164 bus id, no longer required.

Signed-off-by: Steven Toth <stoth@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/saa7164/saa7164-i2c.c | 1 -
 include/linux/i2c-id.h                    | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/saa7164/saa7164-i2c.c b/drivers/media/video/saa7164/saa7164-i2c.c
index 4c431b4ac8db..8198e6833f75 100644
--- a/drivers/media/video/saa7164/saa7164-i2c.c
+++ b/drivers/media/video/saa7164/saa7164-i2c.c
@@ -116,7 +116,6 @@ static struct i2c_algorithm saa7164_i2c_algo_template = {
 static struct i2c_adapter saa7164_i2c_adap_template = {
 	.name              = "saa7164",
 	.owner             = THIS_MODULE,
-	.id                = I2C_HW_B_SAA7164,
 	.algo              = &saa7164_i2c_algo_template,
 	.client_register   = attach_inform,
 	.client_unregister = detach_inform,
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 271b67a3167e..c9087de5c6c6 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -53,7 +53,6 @@
 #define I2C_HW_B_AU0828		0x010023 /* auvitek au0828 usb bridge */
 #define I2C_HW_B_CX231XX	0x010024 /* Conexant CX231XX USB based cards */
 #define I2C_HW_B_HDPVR		0x010025 /* Hauppauge HD PVR */
-#define I2C_HW_B_SAA7164        0x010024 /* NXP 7164 based tv cards */
 
 /* --- SGI adapters							*/
 #define I2C_HW_SGI_VINO		0x160000
-- 
cgit v1.2.3


From 62ef80a1f3fb69711dc7e59394ddc8e88097a7cc Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 07:13:44 -0300
Subject: V4L/DVB (12246): tvp514x: Migration to sub-device framework

This patch converts TVP514x driver to sub-device framework
from V4L2-int framework.

[hverkuil@xs4all.nl: remove inline from the dump_reg function]
Signed-off-by: Brijesh Jadav <brijesh.j@ti.com>
Signed-off-by: Hardik Shah <hardik.shah@ti.com>
Signed-off-by: Vaibhav Hiremath <hvaibhav@ti.com>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/tvp514x.c      | 875 +++++++++++++++----------------------
 drivers/media/video/tvp514x_regs.h |  10 -
 include/media/tvp514x.h            |   4 -
 3 files changed, 349 insertions(+), 540 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/tvp514x.c b/drivers/media/video/tvp514x.c
index 3750f7fadb12..c8386e2f7a99 100644
--- a/drivers/media/video/tvp514x.c
+++ b/drivers/media/video/tvp514x.c
@@ -31,7 +31,10 @@
 #include <linux/i2c.h>
 #include <linux/delay.h>
 #include <linux/videodev2.h>
-#include <media/v4l2-int-device.h>
+
+#include <media/v4l2-device.h>
+#include <media/v4l2-common.h>
+#include <media/v4l2-chip-ident.h>
 #include <media/tvp514x.h>
 
 #include "tvp514x_regs.h"
@@ -49,13 +52,11 @@ static int debug;
 module_param(debug, bool, 0644);
 MODULE_PARM_DESC(debug, "Debug level (0-1)");
 
-#define dump_reg(client, reg, val)				\
-	do {							\
-		val = tvp514x_read_reg(client, reg);		\
-		v4l_info(client, "Reg(0x%.2X): 0x%.2X\n", reg, val); \
-	} while (0)
+MODULE_AUTHOR("Texas Instruments");
+MODULE_DESCRIPTION("TVP514X linux decoder driver");
+MODULE_LICENSE("GPL");
 
-/**
+/*
  * enum tvp514x_std - enum for supported standards
  */
 enum tvp514x_std {
@@ -64,15 +65,7 @@ enum tvp514x_std {
 	STD_INVALID
 };
 
-/**
- * enum tvp514x_state - enum for different decoder states
- */
-enum tvp514x_state {
-	STATE_NOT_DETECTED,
-	STATE_DETECTED
-};
-
-/**
+/*
  * struct tvp514x_std_info - Structure to store standard informations
  * @width: Line width in pixels
  * @height:Number of active lines
@@ -87,35 +80,29 @@ struct tvp514x_std_info {
 };
 
 static struct tvp514x_reg tvp514x_reg_list_default[0x40];
-/**
+/*
  * struct tvp514x_decoder - TVP5146/47 decoder object
- * @v4l2_int_device: Slave handle
- * @tvp514x_slave: Slave pointer which is used by @v4l2_int_device
+ * @sd: Subdevice Slave handle
  * @tvp514x_regs: copy of hw's regs with preset values.
  * @pdata: Board specific
- * @client: I2C client data
- * @id: Entry from I2C table
  * @ver: Chip version
- * @state: TVP5146/47 decoder state - detected or not-detected
+ * @streaming: TVP5146/47 decoder streaming - enabled or disabled.
  * @pix: Current pixel format
  * @num_fmts: Number of formats
  * @fmt_list: Format list
  * @current_std: Current standard
  * @num_stds: Number of standards
  * @std_list: Standards list
- * @route: input and output routing at chip level
+ * @input: Input routing at chip level
+ * @output: Output routing at chip level
  */
 struct tvp514x_decoder {
-	struct v4l2_int_device v4l2_int_device;
-	struct v4l2_int_slave tvp514x_slave;
+	struct v4l2_subdev sd;
 	struct tvp514x_reg tvp514x_regs[ARRAY_SIZE(tvp514x_reg_list_default)];
 	const struct tvp514x_platform_data *pdata;
-	struct i2c_client *client;
-
-	struct i2c_device_id *id;
 
 	int ver;
-	enum tvp514x_state state;
+	int streaming;
 
 	struct v4l2_pix_format pix;
 	int num_fmts;
@@ -124,8 +111,11 @@ struct tvp514x_decoder {
 	enum tvp514x_std current_std;
 	int num_stds;
 	struct tvp514x_std_info *std_list;
-
-	struct v4l2_routing route;
+	/*
+	 * Input and Output Routing parameters
+	 */
+	u32 input;
+	u32 output;
 };
 
 /* TVP514x default register values */
@@ -191,7 +181,8 @@ static struct tvp514x_reg tvp514x_reg_list_default[] = {
 	{TOK_TERM, 0, 0},
 };
 
-/* List of image formats supported by TVP5146/47 decoder
+/*
+ * List of image formats supported by TVP5146/47 decoder
  * Currently we are using 8 bit mode only, but can be
  * extended to 10/20 bit mode.
  */
@@ -240,35 +231,29 @@ static struct tvp514x_std_info tvp514x_std_list[] = {
 	},
 	/* Standard: need to add for additional standard */
 };
-/*
- * Control structure for Auto Gain
- *     This is temporary data, will get replaced once
- *     v4l2_ctrl_query_fill supports it.
- */
-static const struct v4l2_queryctrl tvp514x_autogain_ctrl = {
-	.id = V4L2_CID_AUTOGAIN,
-	.name = "Gain, Automatic",
-	.type = V4L2_CTRL_TYPE_BOOLEAN,
-	.minimum = 0,
-	.maximum = 1,
-	.step = 1,
-	.default_value = 1,
-};
+
+
+static inline struct tvp514x_decoder *to_decoder(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct tvp514x_decoder, sd);
+}
+
 
 /*
  * Read a value from a register in an TVP5146/47 decoder device.
  * Returns value read if successful, or non-zero (-1) otherwise.
  */
-static int tvp514x_read_reg(struct i2c_client *client, u8 reg)
+static int tvp514x_read_reg(struct v4l2_subdev *sd, u8 reg)
 {
-	int err;
-	int retry = 0;
+	int err, retry = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
 read_again:
 
 	err = i2c_smbus_read_byte_data(client, reg);
 	if (err == -1) {
 		if (retry <= I2C_RETRY_COUNT) {
-			v4l_warn(client, "Read: retry ... %d\n", retry);
+			v4l2_warn(sd, "Read: retry ... %d\n", retry);
 			retry++;
 			msleep_interruptible(10);
 			goto read_again;
@@ -278,20 +263,29 @@ read_again:
 	return err;
 }
 
+static void dump_reg(struct v4l2_subdev *sd, u8 reg)
+{
+	u32 val;
+
+	val = tvp514x_read_reg(sd, reg);
+	v4l2_info(sd, "Reg(0x%.2X): 0x%.2X\n", reg, val);
+}
+
 /*
  * Write a value to a register in an TVP5146/47 decoder device.
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_write_reg(struct i2c_client *client, u8 reg, u8 val)
+static int tvp514x_write_reg(struct v4l2_subdev *sd, u8 reg, u8 val)
 {
-	int err;
-	int retry = 0;
+	int err, retry = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
 write_again:
 
 	err = i2c_smbus_write_byte_data(client, reg, val);
 	if (err) {
 		if (retry <= I2C_RETRY_COUNT) {
-			v4l_warn(client, "Write: retry ... %d\n", retry);
+			v4l2_warn(sd, "Write: retry ... %d\n", retry);
 			retry++;
 			msleep_interruptible(10);
 			goto write_again;
@@ -311,7 +305,7 @@ write_again:
  * reglist - list of registers to be written
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_write_regs(struct i2c_client *client,
+static int tvp514x_write_regs(struct v4l2_subdev *sd,
 			      const struct tvp514x_reg reglist[])
 {
 	int err;
@@ -326,9 +320,9 @@ static int tvp514x_write_regs(struct i2c_client *client,
 		if (next->token == TOK_SKIP)
 			continue;
 
-		err = tvp514x_write_reg(client, next->reg, (u8) next->val);
+		err = tvp514x_write_reg(sd, next->reg, (u8) next->val);
 		if (err) {
-			v4l_err(client, "Write failed. Err[%d]\n", err);
+			v4l2_err(sd, "Write failed. Err[%d]\n", err);
 			return err;
 		}
 	}
@@ -339,17 +333,15 @@ static int tvp514x_write_regs(struct i2c_client *client,
  * tvp514x_get_current_std:
  * Returns the current standard detected by TVP5146/47
  */
-static enum tvp514x_std tvp514x_get_current_std(struct tvp514x_decoder
-						*decoder)
+static enum tvp514x_std tvp514x_get_current_std(struct v4l2_subdev *sd)
 {
 	u8 std, std_status;
 
-	std = tvp514x_read_reg(decoder->client, REG_VIDEO_STD);
-	if ((std & VIDEO_STD_MASK) == VIDEO_STD_AUTO_SWITCH_BIT) {
+	std = tvp514x_read_reg(sd, REG_VIDEO_STD);
+	if ((std & VIDEO_STD_MASK) == VIDEO_STD_AUTO_SWITCH_BIT)
 		/* use the standard status register */
-		std_status = tvp514x_read_reg(decoder->client,
-				REG_VIDEO_STD_STATUS);
-	} else
+		std_status = tvp514x_read_reg(sd, REG_VIDEO_STD_STATUS);
+	else
 		std_status = std;	/* use the standard register itself */
 
 	switch (std_status & VIDEO_STD_MASK) {
@@ -369,70 +361,69 @@ static enum tvp514x_std tvp514x_get_current_std(struct tvp514x_decoder
 /*
  * TVP5146/47 register dump function
  */
-static void tvp514x_reg_dump(struct tvp514x_decoder *decoder)
+static void tvp514x_reg_dump(struct v4l2_subdev *sd)
 {
-	u8 value;
-
-	dump_reg(decoder->client, REG_INPUT_SEL, value);
-	dump_reg(decoder->client, REG_AFE_GAIN_CTRL, value);
-	dump_reg(decoder->client, REG_VIDEO_STD, value);
-	dump_reg(decoder->client, REG_OPERATION_MODE, value);
-	dump_reg(decoder->client, REG_COLOR_KILLER, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL1, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL2, value);
-	dump_reg(decoder->client, REG_LUMA_CONTROL3, value);
-	dump_reg(decoder->client, REG_BRIGHTNESS, value);
-	dump_reg(decoder->client, REG_CONTRAST, value);
-	dump_reg(decoder->client, REG_SATURATION, value);
-	dump_reg(decoder->client, REG_HUE, value);
-	dump_reg(decoder->client, REG_CHROMA_CONTROL1, value);
-	dump_reg(decoder->client, REG_CHROMA_CONTROL2, value);
-	dump_reg(decoder->client, REG_COMP_PR_SATURATION, value);
-	dump_reg(decoder->client, REG_COMP_Y_CONTRAST, value);
-	dump_reg(decoder->client, REG_COMP_PB_SATURATION, value);
-	dump_reg(decoder->client, REG_COMP_Y_BRIGHTNESS, value);
-	dump_reg(decoder->client, REG_AVID_START_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_AVID_START_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_AVID_STOP_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_HSYNC_START_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_LSB, value);
-	dump_reg(decoder->client, REG_HSYNC_STOP_PIXEL_MSB, value);
-	dump_reg(decoder->client, REG_VSYNC_START_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VSYNC_START_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VSYNC_STOP_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VBLK_START_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VBLK_START_LINE_MSB, value);
-	dump_reg(decoder->client, REG_VBLK_STOP_LINE_LSB, value);
-	dump_reg(decoder->client, REG_VBLK_STOP_LINE_MSB, value);
-	dump_reg(decoder->client, REG_SYNC_CONTROL, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER1, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER2, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER3, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER4, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER5, value);
-	dump_reg(decoder->client, REG_OUTPUT_FORMATTER6, value);
-	dump_reg(decoder->client, REG_CLEAR_LOST_LOCK, value);
+	dump_reg(sd, REG_INPUT_SEL);
+	dump_reg(sd, REG_AFE_GAIN_CTRL);
+	dump_reg(sd, REG_VIDEO_STD);
+	dump_reg(sd, REG_OPERATION_MODE);
+	dump_reg(sd, REG_COLOR_KILLER);
+	dump_reg(sd, REG_LUMA_CONTROL1);
+	dump_reg(sd, REG_LUMA_CONTROL2);
+	dump_reg(sd, REG_LUMA_CONTROL3);
+	dump_reg(sd, REG_BRIGHTNESS);
+	dump_reg(sd, REG_CONTRAST);
+	dump_reg(sd, REG_SATURATION);
+	dump_reg(sd, REG_HUE);
+	dump_reg(sd, REG_CHROMA_CONTROL1);
+	dump_reg(sd, REG_CHROMA_CONTROL2);
+	dump_reg(sd, REG_COMP_PR_SATURATION);
+	dump_reg(sd, REG_COMP_Y_CONTRAST);
+	dump_reg(sd, REG_COMP_PB_SATURATION);
+	dump_reg(sd, REG_COMP_Y_BRIGHTNESS);
+	dump_reg(sd, REG_AVID_START_PIXEL_LSB);
+	dump_reg(sd, REG_AVID_START_PIXEL_MSB);
+	dump_reg(sd, REG_AVID_STOP_PIXEL_LSB);
+	dump_reg(sd, REG_AVID_STOP_PIXEL_MSB);
+	dump_reg(sd, REG_HSYNC_START_PIXEL_LSB);
+	dump_reg(sd, REG_HSYNC_START_PIXEL_MSB);
+	dump_reg(sd, REG_HSYNC_STOP_PIXEL_LSB);
+	dump_reg(sd, REG_HSYNC_STOP_PIXEL_MSB);
+	dump_reg(sd, REG_VSYNC_START_LINE_LSB);
+	dump_reg(sd, REG_VSYNC_START_LINE_MSB);
+	dump_reg(sd, REG_VSYNC_STOP_LINE_LSB);
+	dump_reg(sd, REG_VSYNC_STOP_LINE_MSB);
+	dump_reg(sd, REG_VBLK_START_LINE_LSB);
+	dump_reg(sd, REG_VBLK_START_LINE_MSB);
+	dump_reg(sd, REG_VBLK_STOP_LINE_LSB);
+	dump_reg(sd, REG_VBLK_STOP_LINE_MSB);
+	dump_reg(sd, REG_SYNC_CONTROL);
+	dump_reg(sd, REG_OUTPUT_FORMATTER1);
+	dump_reg(sd, REG_OUTPUT_FORMATTER2);
+	dump_reg(sd, REG_OUTPUT_FORMATTER3);
+	dump_reg(sd, REG_OUTPUT_FORMATTER4);
+	dump_reg(sd, REG_OUTPUT_FORMATTER5);
+	dump_reg(sd, REG_OUTPUT_FORMATTER6);
+	dump_reg(sd, REG_CLEAR_LOST_LOCK);
 }
 
 /*
  * Configure the TVP5146/47 with the current register settings
  * Returns zero if successful, or non-zero otherwise.
  */
-static int tvp514x_configure(struct tvp514x_decoder *decoder)
+static int tvp514x_configure(struct v4l2_subdev *sd,
+		struct tvp514x_decoder *decoder)
 {
 	int err;
 
 	/* common register initialization */
 	err =
-	    tvp514x_write_regs(decoder->client, decoder->tvp514x_regs);
+	    tvp514x_write_regs(sd, decoder->tvp514x_regs);
 	if (err)
 		return err;
 
 	if (debug)
-		tvp514x_reg_dump(decoder);
+		tvp514x_reg_dump(sd);
 
 	return 0;
 }
@@ -445,15 +436,17 @@ static int tvp514x_configure(struct tvp514x_decoder *decoder)
  * Returns ENODEV error number if no device is detected, or zero
  * if a device is detected.
  */
-static int tvp514x_detect(struct tvp514x_decoder *decoder)
+static int tvp514x_detect(struct v4l2_subdev *sd,
+		struct tvp514x_decoder *decoder)
 {
 	u8 chip_id_msb, chip_id_lsb, rom_ver;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
 
-	chip_id_msb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_MSB);
-	chip_id_lsb = tvp514x_read_reg(decoder->client, REG_CHIP_ID_LSB);
-	rom_ver = tvp514x_read_reg(decoder->client, REG_ROM_VERSION);
+	chip_id_msb = tvp514x_read_reg(sd, REG_CHIP_ID_MSB);
+	chip_id_lsb = tvp514x_read_reg(sd, REG_CHIP_ID_LSB);
+	rom_ver = tvp514x_read_reg(sd, REG_ROM_VERSION);
 
-	v4l_dbg(1, debug, decoder->client,
+	v4l2_dbg(1, debug, sd,
 		 "chip id detected msb:0x%x lsb:0x%x rom version:0x%x\n",
 		 chip_id_msb, chip_id_lsb, rom_ver);
 	if ((chip_id_msb != TVP514X_CHIP_ID_MSB)
@@ -462,19 +455,16 @@ static int tvp514x_detect(struct tvp514x_decoder *decoder)
 		/* We didn't read the values we expected, so this must not be
 		 * an TVP5146/47.
 		 */
-		v4l_err(decoder->client,
-			"chip id mismatch msb:0x%x lsb:0x%x\n",
-			chip_id_msb, chip_id_lsb);
+		v4l2_err(sd, "chip id mismatch msb:0x%x lsb:0x%x\n",
+				chip_id_msb, chip_id_lsb);
 		return -ENODEV;
 	}
 
 	decoder->ver = rom_ver;
-	decoder->state = STATE_DETECTED;
 
-	v4l_info(decoder->client,
-			"%s found at 0x%x (%s)\n", decoder->client->name,
-			decoder->client->addr << 1,
-			decoder->client->adapter->name);
+	v4l2_info(sd, "%s (Version - 0x%.2x) found at 0x%x (%s)\n",
+			client->name, decoder->ver,
+			client->addr << 1, client->adapter->name);
 	return 0;
 }
 
@@ -483,17 +473,17 @@ static int tvp514x_detect(struct tvp514x_decoder *decoder)
  * TVP5146/47 decoder driver.
  */
 
-/**
- * ioctl_querystd - V4L2 decoder interface handler for VIDIOC_QUERYSTD ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_querystd - V4L2 decoder interface handler for VIDIOC_QUERYSTD ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @std_id: standard V4L2 std_id ioctl enum
  *
  * Returns the current standard detected by TVP5146/47. If no active input is
  * detected, returns -EINVAL
  */
-static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
+static int tvp514x_querystd(struct v4l2_subdev *sd, v4l2_std_id *std_id)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	enum tvp514x_std current_std;
 	enum tvp514x_input input_sel;
 	u8 sync_lock_status, lock_mask;
@@ -502,11 +492,11 @@ static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
 		return -EINVAL;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
-	input_sel = decoder->route.input;
+	input_sel = decoder->input;
 
 	switch (input_sel) {
 	case INPUT_CVBS_VI1A:
@@ -544,42 +534,39 @@ static int ioctl_querystd(struct v4l2_int_device *s, v4l2_std_id *std_id)
 		return -EINVAL;
 	}
 	/* check whether signal is locked */
-	sync_lock_status = tvp514x_read_reg(decoder->client, REG_STATUS1);
+	sync_lock_status = tvp514x_read_reg(sd, REG_STATUS1);
 	if (lock_mask != (sync_lock_status & lock_mask))
 		return -EINVAL;	/* No input detected */
 
 	decoder->current_std = current_std;
 	*std_id = decoder->std_list[current_std].standard.id;
 
-	v4l_dbg(1, debug, decoder->client, "Current STD: %s",
+	v4l2_dbg(1, debug, sd, "Current STD: %s",
 			decoder->std_list[current_std].standard.name);
 	return 0;
 }
 
-/**
- * ioctl_s_std - V4L2 decoder interface handler for VIDIOC_S_STD ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_std - V4L2 decoder interface handler for VIDIOC_S_STD ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @std_id: standard V4L2 v4l2_std_id ioctl enum
  *
  * If std_id is supported, sets the requested standard. Otherwise, returns
  * -EINVAL
  */
-static int ioctl_s_std(struct v4l2_int_device *s, v4l2_std_id *std_id)
+static int tvp514x_s_std(struct v4l2_subdev *sd, v4l2_std_id std_id)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err, i;
 
-	if (std_id == NULL)
-		return -EINVAL;
-
 	for (i = 0; i < decoder->num_stds; i++)
-		if (*std_id & decoder->std_list[i].standard.id)
+		if (std_id & decoder->std_list[i].standard.id)
 			break;
 
 	if ((i == decoder->num_stds) || (i == STD_INVALID))
 		return -EINVAL;
 
-	err = tvp514x_write_reg(decoder->client, REG_VIDEO_STD,
+	err = tvp514x_write_reg(sd, REG_VIDEO_STD,
 				decoder->std_list[i].video_std);
 	if (err)
 		return err;
@@ -588,24 +575,24 @@ static int ioctl_s_std(struct v4l2_int_device *s, v4l2_std_id *std_id)
 	decoder->tvp514x_regs[REG_VIDEO_STD].val =
 		decoder->std_list[i].video_std;
 
-	v4l_dbg(1, debug, decoder->client, "Standard set to: %s",
+	v4l2_dbg(1, debug, sd, "Standard set to: %s",
 			decoder->std_list[i].standard.name);
 	return 0;
 }
 
-/**
- * ioctl_s_routing - V4L2 decoder interface handler for VIDIOC_S_INPUT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_routing - V4L2 decoder interface handler for VIDIOC_S_INPUT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @index: number of the input
  *
  * If index is valid, selects the requested input. Otherwise, returns -EINVAL if
  * the input is not supported or there is no active signal present in the
  * selected input.
  */
-static int ioctl_s_routing(struct v4l2_int_device *s,
-				struct v4l2_routing *route)
+static int tvp514x_s_routing(struct v4l2_subdev *sd,
+				u32 input, u32 output, u32 config)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err;
 	enum tvp514x_input input_sel;
 	enum tvp514x_output output_sel;
@@ -613,20 +600,20 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 	u8 sync_lock_status, lock_mask;
 	int try_count = LOCK_RETRY_COUNT;
 
-	if ((!route) || (route->input >= INPUT_INVALID) ||
-			(route->output >= OUTPUT_INVALID))
+	if ((input >= INPUT_INVALID) ||
+			(output >= OUTPUT_INVALID))
 		return -EINVAL;	/* Index out of bound */
 
-	input_sel = route->input;
-	output_sel = route->output;
+	input_sel = input;
+	output_sel = output;
 
-	err = tvp514x_write_reg(decoder->client, REG_INPUT_SEL, input_sel);
+	err = tvp514x_write_reg(sd, REG_INPUT_SEL, input_sel);
 	if (err)
 		return err;
 
-	output_sel |= tvp514x_read_reg(decoder->client,
+	output_sel |= tvp514x_read_reg(sd,
 			REG_OUTPUT_FORMATTER1) & 0x7;
-	err = tvp514x_write_reg(decoder->client, REG_OUTPUT_FORMATTER1,
+	err = tvp514x_write_reg(sd, REG_OUTPUT_FORMATTER1,
 			output_sel);
 	if (err)
 		return err;
@@ -637,7 +624,7 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 	/* Clear status */
 	msleep(LOCK_RETRY_DELAY);
 	err =
-	    tvp514x_write_reg(decoder->client, REG_CLEAR_LOST_LOCK, 0x01);
+	    tvp514x_write_reg(sd, REG_CLEAR_LOST_LOCK, 0x01);
 	if (err)
 		return err;
 
@@ -682,11 +669,11 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 		msleep(LOCK_RETRY_DELAY);
 
 		/* get the current standard for future reference */
-		current_std = tvp514x_get_current_std(decoder);
+		current_std = tvp514x_get_current_std(sd);
 		if (current_std == STD_INVALID)
 			continue;
 
-		sync_lock_status = tvp514x_read_reg(decoder->client,
+		sync_lock_status = tvp514x_read_reg(sd,
 				REG_STATUS1);
 		if (lock_mask == (sync_lock_status & lock_mask))
 			break;	/* Input detected */
@@ -696,28 +683,26 @@ static int ioctl_s_routing(struct v4l2_int_device *s,
 		return -EINVAL;
 
 	decoder->current_std = current_std;
-	decoder->route.input = route->input;
-	decoder->route.output = route->output;
+	decoder->input = input;
+	decoder->output = output;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Input set to: %d, std : %d",
+	v4l2_dbg(1, debug, sd, "Input set to: %d, std : %d",
 			input_sel, current_std);
 
 	return 0;
 }
 
-/**
- * ioctl_queryctrl - V4L2 decoder interface handler for VIDIOC_QUERYCTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_queryctrl - V4L2 decoder interface handler for VIDIOC_QUERYCTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @qctrl: standard V4L2 v4l2_queryctrl structure
  *
  * If the requested control is supported, returns the control information.
  * Otherwise, returns -EINVAL if the control is not supported.
  */
 static int
-ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
+tvp514x_queryctrl(struct v4l2_subdev *sd, struct v4l2_queryctrl *qctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
 	int err = -EINVAL;
 
 	if (qctrl == NULL)
@@ -744,30 +729,27 @@ ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
 		err = v4l2_ctrl_query_fill(qctrl, -180, 180, 180, 0);
 		break;
 	case V4L2_CID_AUTOGAIN:
-		/* Autogain is either 0 or 1*/
-		memcpy(qctrl, &tvp514x_autogain_ctrl,
-				sizeof(struct v4l2_queryctrl));
-		err = 0;
+		/*
+		 * Auto Gain supported is -
+		 * 	0 - 1 (Default - 1)
+		 */
+		err = v4l2_ctrl_query_fill(qctrl, 0, 1, 1, 1);
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", qctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", qctrl->id);
 		return err;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Query Control: %s : Min - %d, Max - %d, Def - %d",
-			qctrl->name,
-			qctrl->minimum,
-			qctrl->maximum,
+	v4l2_dbg(1, debug, sd, "Query Control:%s: Min - %d, Max - %d, Def - %d",
+			qctrl->name, qctrl->minimum, qctrl->maximum,
 			qctrl->default_value);
 
 	return err;
 }
 
-/**
- * ioctl_g_ctrl - V4L2 decoder interface handler for VIDIOC_G_CTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_ctrl - V4L2 decoder interface handler for VIDIOC_G_CTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @ctrl: pointer to v4l2_control structure
  *
  * If the requested control is supported, returns the control's current
@@ -775,9 +757,9 @@ ioctl_queryctrl(struct v4l2_int_device *s, struct v4l2_queryctrl *qctrl)
  * supported.
  */
 static int
-ioctl_g_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+tvp514x_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
 	if (ctrl == NULL)
 		return -EINVAL;
@@ -811,74 +793,70 @@ ioctl_g_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", ctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", ctrl->id);
 		return -EINVAL;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Get Control: ID - %d - %d",
+	v4l2_dbg(1, debug, sd, "Get Control: ID - %d - %d",
 			ctrl->id, ctrl->value);
 	return 0;
 }
 
-/**
- * ioctl_s_ctrl - V4L2 decoder interface handler for VIDIOC_S_CTRL ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_ctrl - V4L2 decoder interface handler for VIDIOC_S_CTRL ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @ctrl: pointer to v4l2_control structure
  *
  * If the requested control is supported, sets the control's current
  * value in HW. Otherwise, returns -EINVAL if the control is not supported.
  */
 static int
-ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
+tvp514x_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int err = -EINVAL, value;
 
 	if (ctrl == NULL)
 		return err;
 
-	value = (__s32) ctrl->value;
+	value = ctrl->value;
 
 	switch (ctrl->id) {
 	case V4L2_CID_BRIGHTNESS:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid brightness setting %d\n",
+			v4l2_err(sd, "invalid brightness setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_BRIGHTNESS,
+		err = tvp514x_write_reg(sd, REG_BRIGHTNESS,
 				value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_BRIGHTNESS].val = value;
 		break;
 	case V4L2_CID_CONTRAST:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid contrast setting %d\n",
+			v4l2_err(sd, "invalid contrast setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_CONTRAST,
-				value);
+		err = tvp514x_write_reg(sd, REG_CONTRAST, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_CONTRAST].val = value;
 		break;
 	case V4L2_CID_SATURATION:
 		if (ctrl->value < 0 || ctrl->value > 255) {
-			v4l_err(decoder->client,
-					"invalid saturation setting %d\n",
+			v4l2_err(sd, "invalid saturation setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_SATURATION,
-				value);
+		err = tvp514x_write_reg(sd, REG_SATURATION, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_SATURATION].val = value;
 		break;
 	case V4L2_CID_HUE:
@@ -889,15 +867,13 @@ ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 		else if (value == 0)
 			value = 0;
 		else {
-			v4l_err(decoder->client,
-					"invalid hue setting %d\n",
-					ctrl->value);
+			v4l2_err(sd, "invalid hue setting %d\n", ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_HUE,
-				value);
+		err = tvp514x_write_reg(sd, REG_HUE, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_HUE].val = value;
 		break;
 	case V4L2_CID_AUTOGAIN:
@@ -906,41 +882,38 @@ ioctl_s_ctrl(struct v4l2_int_device *s, struct v4l2_control *ctrl)
 		else if (value == 0)
 			value = 0x0C;
 		else {
-			v4l_err(decoder->client,
-					"invalid auto gain setting %d\n",
+			v4l2_err(sd, "invalid auto gain setting %d\n",
 					ctrl->value);
 			return -ERANGE;
 		}
-		err = tvp514x_write_reg(decoder->client, REG_AFE_GAIN_CTRL,
-				value);
+		err = tvp514x_write_reg(sd, REG_AFE_GAIN_CTRL, value);
 		if (err)
 			return err;
+
 		decoder->tvp514x_regs[REG_AFE_GAIN_CTRL].val = value;
 		break;
 	default:
-		v4l_err(decoder->client,
-			"invalid control id %d\n", ctrl->id);
+		v4l2_err(sd, "invalid control id %d\n", ctrl->id);
 		return err;
 	}
 
-	v4l_dbg(1, debug, decoder->client,
-			"Set Control: ID - %d - %d",
+	v4l2_dbg(1, debug, sd, "Set Control: ID - %d - %d",
 			ctrl->id, ctrl->value);
 
 	return err;
 }
 
-/**
- * ioctl_enum_fmt_cap - Implement the CAPTURE buffer VIDIOC_ENUM_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_enum_fmt_cap - Implement the CAPTURE buffer VIDIOC_ENUM_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @fmt: standard V4L2 VIDIOC_ENUM_FMT ioctl structure
  *
  * Implement the VIDIOC_ENUM_FMT ioctl to enumerate supported formats
  */
 static int
-ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
+tvp514x_enum_fmt_cap(struct v4l2_subdev *sd, struct v4l2_fmtdesc *fmt)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int index;
 
 	if (fmt == NULL)
@@ -956,16 +929,15 @@ ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
 	memcpy(fmt, &decoder->fmt_list[index],
 		sizeof(struct v4l2_fmtdesc));
 
-	v4l_dbg(1, debug, decoder->client,
-			"Current FMT: index - %d (%s)",
+	v4l2_dbg(1, debug, sd, "Current FMT: index - %d (%s)",
 			decoder->fmt_list[index].index,
 			decoder->fmt_list[index].description);
 	return 0;
 }
 
-/**
- * ioctl_try_fmt_cap - Implement the CAPTURE buffer VIDIOC_TRY_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_try_fmt_cap - Implement the CAPTURE buffer VIDIOC_TRY_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 VIDIOC_TRY_FMT ioctl structure
  *
  * Implement the VIDIOC_TRY_FMT ioctl for the CAPTURE buffer type. This
@@ -973,9 +945,9 @@ ioctl_enum_fmt_cap(struct v4l2_int_device *s, struct v4l2_fmtdesc *fmt)
  * without actually making it take effect.
  */
 static int
-ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_try_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	int ifmt;
 	struct v4l2_pix_format *pix;
 	enum tvp514x_std current_std;
@@ -989,7 +961,7 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	pix = &f->fmt.pix;
 
 	/* Calculate height and width based on current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1012,17 +984,16 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
 	pix->priv = 0;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Try FMT: pixelformat - %s, bytesperline - %d"
+	v4l2_dbg(1, debug, sd, "Try FMT: pixelformat - %s, bytesperline - %d"
 			"Width - %d, Height - %d",
 			decoder->fmt_list[ifmt].description, pix->bytesperline,
 			pix->width, pix->height);
 	return 0;
 }
 
-/**
- * ioctl_s_fmt_cap - V4L2 decoder interface handler for VIDIOC_S_FMT ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_fmt_cap - V4L2 decoder interface handler for VIDIOC_S_FMT ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 VIDIOC_S_FMT ioctl structure
  *
  * If the requested format is supported, configures the HW to use that
@@ -1030,9 +1001,9 @@ ioctl_try_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
  * correctly configured.
  */
 static int
-ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_s_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_pix_format *pix;
 	int rval;
 
@@ -1043,7 +1014,7 @@ ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 		return -EINVAL;	/* only capture is supported */
 
 	pix = &f->fmt.pix;
-	rval = ioctl_try_fmt_cap(s, f);
+	rval = tvp514x_try_fmt_cap(sd, f);
 	if (rval)
 		return rval;
 
@@ -1052,18 +1023,18 @@ ioctl_s_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 	return rval;
 }
 
-/**
- * ioctl_g_fmt_cap - V4L2 decoder interface handler for ioctl_g_fmt_cap
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_fmt_cap - V4L2 decoder interface handler for tvp514x_g_fmt_cap
+ * @sd: pointer to standard V4L2 sub-device structure
  * @f: pointer to standard V4L2 v4l2_format structure
  *
  * Returns the decoder's current pixel format in the v4l2_format
  * parameter.
  */
 static int
-ioctl_g_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
+tvp514x_g_fmt_cap(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
 	if (f == NULL)
 		return -EINVAL;
@@ -1073,25 +1044,24 @@ ioctl_g_fmt_cap(struct v4l2_int_device *s, struct v4l2_format *f)
 
 	f->fmt.pix = decoder->pix;
 
-	v4l_dbg(1, debug, decoder->client,
-			"Current FMT: bytesperline - %d"
+	v4l2_dbg(1, debug, sd, "Current FMT: bytesperline - %d"
 			"Width - %d, Height - %d",
 			decoder->pix.bytesperline,
 			decoder->pix.width, decoder->pix.height);
 	return 0;
 }
 
-/**
- * ioctl_g_parm - V4L2 decoder interface handler for VIDIOC_G_PARM ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_g_parm - V4L2 decoder interface handler for VIDIOC_G_PARM ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @a: pointer to standard V4L2 VIDIOC_G_PARM ioctl structure
  *
  * Returns the decoder's video CAPTURE parameters.
  */
 static int
-ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+tvp514x_g_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *a)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_captureparm *cparm;
 	enum tvp514x_std current_std;
 
@@ -1105,7 +1075,7 @@ ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	a->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1119,18 +1089,18 @@ ioctl_g_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	return 0;
 }
 
-/**
- * ioctl_s_parm - V4L2 decoder interface handler for VIDIOC_S_PARM ioctl
- * @s: pointer to standard V4L2 device structure
+/*
+ * tvp514x_s_parm - V4L2 decoder interface handler for VIDIOC_S_PARM ioctl
+ * @sd: pointer to standard V4L2 sub-device structure
  * @a: pointer to standard V4L2 VIDIOC_S_PARM ioctl structure
  *
  * Configures the decoder to use the input parameters, if possible. If
  * not possible, returns the appropriate error code.
  */
 static int
-ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
+tvp514x_s_parm(struct v4l2_subdev *sd, struct v4l2_streamparm *a)
 {
-	struct tvp514x_decoder *decoder = s->priv;
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 	struct v4l2_fract *timeperframe;
 	enum tvp514x_std current_std;
 
@@ -1143,7 +1113,7 @@ ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	timeperframe = &a->parm.capture.timeperframe;
 
 	/* get the current standard */
-	current_std = tvp514x_get_current_std(decoder);
+	current_std = tvp514x_get_current_std(sd);
 	if (current_std == STD_INVALID)
 		return -EINVAL;
 
@@ -1155,112 +1125,59 @@ ioctl_s_parm(struct v4l2_int_device *s, struct v4l2_streamparm *a)
 	return 0;
 }
 
-/**
- * ioctl_g_ifparm - V4L2 decoder interface handler for vidioc_int_g_ifparm_num
- * @s: pointer to standard V4L2 device structure
- * @p: pointer to standard V4L2 vidioc_int_g_ifparm_num ioctl structure
- *
- * Gets slave interface parameters.
- * Calculates the required xclk value to support the requested
- * clock parameters in p. This value is returned in the p
- * parameter.
- */
-static int ioctl_g_ifparm(struct v4l2_int_device *s, struct v4l2_ifparm *p)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-	int rval;
-
-	if (p == NULL)
-		return -EINVAL;
-
-	if (NULL == decoder->pdata->ifparm)
-		return -EINVAL;
-
-	rval = decoder->pdata->ifparm(p);
-	if (rval) {
-		v4l_err(decoder->client, "g_ifparm.Err[%d]\n", rval);
-		return rval;
-	}
-
-	p->u.bt656.clock_curr = TVP514X_XCLK_BT656;
-
-	return 0;
-}
-
-/**
- * ioctl_g_priv - V4L2 decoder interface handler for vidioc_int_g_priv_num
- * @s: pointer to standard V4L2 device structure
- * @p: void pointer to hold decoder's private data address
- *
- * Returns device's (decoder's) private data area address in p parameter
- */
-static int ioctl_g_priv(struct v4l2_int_device *s, void *p)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-
-	if (NULL == decoder->pdata->priv_data_set)
-		return -EINVAL;
-
-	return decoder->pdata->priv_data_set(p);
-}
-
-/**
- * ioctl_s_power - V4L2 decoder interface handler for vidioc_int_s_power_num
- * @s: pointer to standard V4L2 device structure
- * @on: power state to which device is to be set
+/*
+ * tvp514x_s_stream - V4L2 decoder interface handler for vidioc_int_s_power_num
+ * @sd: pointer to standard V4L2 sub-device structure
+ * @enable: streaming enable or disable
  *
- * Sets devices power state to requrested state, if possible.
+ * Sets streaming to enable or disable, if possible.
  */
-static int ioctl_s_power(struct v4l2_int_device *s, enum v4l2_power on)
+static int tvp514x_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct tvp514x_decoder *decoder = s->priv;
 	int err = 0;
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
-	switch (on) {
-	case V4L2_POWER_OFF:
-		/* Power Down Sequence */
-		err =
-		    tvp514x_write_reg(decoder->client, REG_OPERATION_MODE,
-					0x01);
-		/* Disable mux for TVP5146/47 decoder data path */
-		if (decoder->pdata->power_set)
-			err |= decoder->pdata->power_set(on);
-		decoder->state = STATE_NOT_DETECTED;
-		break;
+	if (decoder->streaming == enable)
+		return 0;
 
-	case V4L2_POWER_STANDBY:
-		if (decoder->pdata->power_set)
-			err = decoder->pdata->power_set(on);
+	switch (enable) {
+	case 0:
+	{
+		/* Power Down Sequence */
+		err = tvp514x_write_reg(sd, REG_OPERATION_MODE, 0x01);
+		if (err) {
+			v4l2_err(sd, "Unable to turn off decoder\n");
+			return err;
+		}
+		decoder->streaming = enable;
 		break;
+	}
+	case 1:
+	{
+		struct tvp514x_reg *int_seq = (struct tvp514x_reg *)
+				client->driver->id_table->driver_data;
 
-	case V4L2_POWER_ON:
-		/* Enable mux for TVP5146/47 decoder data path */
-		if ((decoder->pdata->power_set) &&
-				(decoder->state == STATE_NOT_DETECTED)) {
-			int i;
-			struct tvp514x_init_seq *int_seq =
-				(struct tvp514x_init_seq *)
-				decoder->id->driver_data;
-
-			err = decoder->pdata->power_set(on);
-
-			/* Power Up Sequence */
-			for (i = 0; i < int_seq->no_regs; i++) {
-				err |= tvp514x_write_reg(decoder->client,
-						int_seq->init_reg_seq[i].reg,
-						int_seq->init_reg_seq[i].val);
-			}
-			/* Detect the sensor is not already detected */
-			err |= tvp514x_detect(decoder);
-			if (err) {
-				v4l_err(decoder->client,
-						"Unable to detect decoder\n");
-				return err;
-			}
+		/* Power Up Sequence */
+		err = tvp514x_write_regs(sd, int_seq);
+		if (err) {
+			v4l2_err(sd, "Unable to turn on decoder\n");
+			return err;
+		}
+		/* Detect if not already detected */
+		err = tvp514x_detect(sd, decoder);
+		if (err) {
+			v4l2_err(sd, "Unable to detect decoder\n");
+			return err;
 		}
-		err |= tvp514x_configure(decoder);
+		err = tvp514x_configure(sd, decoder);
+		if (err) {
+			v4l2_err(sd, "Unable to configure decoder\n");
+			return err;
+		}
+		decoder->streaming = enable;
 		break;
-
+	}
 	default:
 		err = -ENODEV;
 		break;
@@ -1269,93 +1186,37 @@ static int ioctl_s_power(struct v4l2_int_device *s, enum v4l2_power on)
 	return err;
 }
 
-/**
- * ioctl_init - V4L2 decoder interface handler for VIDIOC_INT_INIT
- * @s: pointer to standard V4L2 device structure
- *
- * Initialize the decoder device (calls tvp514x_configure())
- */
-static int ioctl_init(struct v4l2_int_device *s)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-
-	/* Set default standard to auto */
-	decoder->tvp514x_regs[REG_VIDEO_STD].val =
-	    VIDEO_STD_AUTO_SWITCH_BIT;
-
-	return tvp514x_configure(decoder);
-}
-
-/**
- * ioctl_dev_exit - V4L2 decoder interface handler for vidioc_int_dev_exit_num
- * @s: pointer to standard V4L2 device structure
- *
- * Delinitialise the dev. at slave detach. The complement of ioctl_dev_init.
- */
-static int ioctl_dev_exit(struct v4l2_int_device *s)
-{
-	return 0;
-}
-
-/**
- * ioctl_dev_init - V4L2 decoder interface handler for vidioc_int_dev_init_num
- * @s: pointer to standard V4L2 device structure
- *
- * Initialise the device when slave attaches to the master. Returns 0 if
- * TVP5146/47 device could be found, otherwise returns appropriate error.
- */
-static int ioctl_dev_init(struct v4l2_int_device *s)
-{
-	struct tvp514x_decoder *decoder = s->priv;
-	int err;
-
-	err = tvp514x_detect(decoder);
-	if (err < 0) {
-		v4l_err(decoder->client,
-			"Unable to detect decoder\n");
-		return err;
-	}
-
-	v4l_info(decoder->client,
-		 "chip version 0x%.2x detected\n", decoder->ver);
+static const struct v4l2_subdev_core_ops tvp514x_core_ops = {
+	.queryctrl = tvp514x_queryctrl,
+	.g_ctrl = tvp514x_g_ctrl,
+	.s_ctrl = tvp514x_s_ctrl,
+	.s_std = tvp514x_s_std,
+};
 
-	return 0;
-}
+static const struct v4l2_subdev_video_ops tvp514x_video_ops = {
+	.s_routing = tvp514x_s_routing,
+	.querystd = tvp514x_querystd,
+	.enum_fmt = tvp514x_enum_fmt_cap,
+	.g_fmt = tvp514x_g_fmt_cap,
+	.try_fmt = tvp514x_try_fmt_cap,
+	.s_fmt = tvp514x_s_fmt_cap,
+	.g_parm = tvp514x_g_parm,
+	.s_parm = tvp514x_s_parm,
+	.s_stream = tvp514x_s_stream,
+};
 
-static struct v4l2_int_ioctl_desc tvp514x_ioctl_desc[] = {
-	{vidioc_int_dev_init_num, (v4l2_int_ioctl_func*) ioctl_dev_init},
-	{vidioc_int_dev_exit_num, (v4l2_int_ioctl_func*) ioctl_dev_exit},
-	{vidioc_int_s_power_num, (v4l2_int_ioctl_func*) ioctl_s_power},
-	{vidioc_int_g_priv_num, (v4l2_int_ioctl_func*) ioctl_g_priv},
-	{vidioc_int_g_ifparm_num, (v4l2_int_ioctl_func*) ioctl_g_ifparm},
-	{vidioc_int_init_num, (v4l2_int_ioctl_func*) ioctl_init},
-	{vidioc_int_enum_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_enum_fmt_cap},
-	{vidioc_int_try_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_try_fmt_cap},
-	{vidioc_int_g_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_g_fmt_cap},
-	{vidioc_int_s_fmt_cap_num,
-	 (v4l2_int_ioctl_func *) ioctl_s_fmt_cap},
-	{vidioc_int_g_parm_num, (v4l2_int_ioctl_func *) ioctl_g_parm},
-	{vidioc_int_s_parm_num, (v4l2_int_ioctl_func *) ioctl_s_parm},
-	{vidioc_int_queryctrl_num,
-	 (v4l2_int_ioctl_func *) ioctl_queryctrl},
-	{vidioc_int_g_ctrl_num, (v4l2_int_ioctl_func *) ioctl_g_ctrl},
-	{vidioc_int_s_ctrl_num, (v4l2_int_ioctl_func *) ioctl_s_ctrl},
-	{vidioc_int_querystd_num, (v4l2_int_ioctl_func *) ioctl_querystd},
-	{vidioc_int_s_std_num, (v4l2_int_ioctl_func *) ioctl_s_std},
-	{vidioc_int_s_video_routing_num,
-		(v4l2_int_ioctl_func *) ioctl_s_routing},
+static const struct v4l2_subdev_ops tvp514x_ops = {
+	.core = &tvp514x_core_ops,
+	.video = &tvp514x_video_ops,
 };
 
 static struct tvp514x_decoder tvp514x_dev = {
-	.state = STATE_NOT_DETECTED,
+	.streaming = 0,
 
 	.fmt_list = tvp514x_fmt_list,
 	.num_fmts = ARRAY_SIZE(tvp514x_fmt_list),
 
-	.pix = {		/* Default to NTSC 8-bit YUV 422 */
+	.pix = {/* Default to NTSC 8-bit YUV 422 */
 		.width = NTSC_NUM_ACTIVE_PIXELS,
 		.height = NTSC_NUM_ACTIVE_LINES,
 		.pixelformat = V4L2_PIX_FMT_UYVY,
@@ -1369,20 +1230,13 @@ static struct tvp514x_decoder tvp514x_dev = {
 	.current_std = STD_NTSC_MJ,
 	.std_list = tvp514x_std_list,
 	.num_stds = ARRAY_SIZE(tvp514x_std_list),
-	.v4l2_int_device = {
-		.module = THIS_MODULE,
-		.name = TVP514X_MODULE_NAME,
-		.type = v4l2_int_type_slave,
-	},
-	.tvp514x_slave = {
-		.ioctls = tvp514x_ioctl_desc,
-		.num_ioctls = ARRAY_SIZE(tvp514x_ioctl_desc),
-	},
+
 };
 
-/**
+/*
  * tvp514x_probe - decoder driver i2c probe handler
  * @client: i2c driver client device structure
+ * @id: i2c driver id table
  *
  * Register decoder as an i2c client device and V4L2
  * device.
@@ -1391,82 +1245,71 @@ static int
 tvp514x_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct tvp514x_decoder *decoder;
-	int err;
+	struct v4l2_subdev *sd;
 
 	/* Check if the adapter supports the needed features */
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
 		return -EIO;
 
+	if (!client->dev.platform_data) {
+		v4l2_err(client, "No platform data!!\n");
+		return -ENODEV;
+	}
+
 	decoder = kzalloc(sizeof(*decoder), GFP_KERNEL);
 	if (!decoder)
 		return -ENOMEM;
 
-	if (!client->dev.platform_data) {
-		v4l_err(client, "No platform data!!\n");
-		err = -ENODEV;
-		goto out_free;
-	}
-
+	/*
+	 * Initialize the tvp514x_decoder with default configuration
+	 */
 	*decoder = tvp514x_dev;
-	decoder->v4l2_int_device.priv = decoder;
-	decoder->pdata = client->dev.platform_data;
-	decoder->v4l2_int_device.u.slave = &decoder->tvp514x_slave;
+	/* Copy default register configuration */
 	memcpy(decoder->tvp514x_regs, tvp514x_reg_list_default,
 			sizeof(tvp514x_reg_list_default));
+
+	/*
+	 * Copy board specific information here
+	 */
+	decoder->pdata = client->dev.platform_data;
+
 	/*
 	 * Fetch platform specific data, and configure the
 	 * tvp514x_reg_list[] accordingly. Since this is one
 	 * time configuration, no need to preserve.
 	 */
 	decoder->tvp514x_regs[REG_OUTPUT_FORMATTER2].val |=
-			(decoder->pdata->clk_polarity << 1);
+		(decoder->pdata->clk_polarity << 1);
 	decoder->tvp514x_regs[REG_SYNC_CONTROL].val |=
-			((decoder->pdata->hs_polarity << 2) |
-			(decoder->pdata->vs_polarity << 3));
-	/*
-	 * Save the id data, required for power up sequence
-	 */
-	decoder->id = (struct i2c_device_id *)id;
-	/* Attach to Master */
-	strcpy(decoder->v4l2_int_device.u.slave->attach_to,
-			decoder->pdata->master);
-	decoder->client = client;
-	i2c_set_clientdata(client, decoder);
+		((decoder->pdata->hs_polarity << 2) |
+		 (decoder->pdata->vs_polarity << 3));
+	/* Set default standard to auto */
+	decoder->tvp514x_regs[REG_VIDEO_STD].val =
+		VIDEO_STD_AUTO_SWITCH_BIT;
 
 	/* Register with V4L2 layer as slave device */
-	err = v4l2_int_device_register(&decoder->v4l2_int_device);
-	if (err) {
-		i2c_set_clientdata(client, NULL);
-		v4l_err(client,
-			"Unable to register to v4l2. Err[%d]\n", err);
-		goto out_free;
-
-	} else
-		v4l_info(client, "Registered to v4l2 master %s!!\n",
-				decoder->pdata->master);
+	sd = &decoder->sd;
+	v4l2_i2c_subdev_init(sd, client, &tvp514x_ops);
+
+	v4l2_info(sd, "%s decoder driver registered !!\n", sd->name);
+
 	return 0;
 
-out_free:
-	kfree(decoder);
-	return err;
 }
 
-/**
+/*
  * tvp514x_remove - decoder driver i2c remove handler
  * @client: i2c driver client device structure
  *
  * Unregister decoder as an i2c client device and V4L2
  * device. Complement of tvp514x_probe().
  */
-static int __exit tvp514x_remove(struct i2c_client *client)
+static int tvp514x_remove(struct i2c_client *client)
 {
-	struct tvp514x_decoder *decoder = i2c_get_clientdata(client);
-
-	if (!client->adapter)
-		return -ENODEV;	/* our client isn't attached */
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+	struct tvp514x_decoder *decoder = to_decoder(sd);
 
-	v4l2_int_device_unregister(&decoder->v4l2_int_device);
-	i2c_set_clientdata(client, NULL);
+	v4l2_device_unregister_subdev(sd);
 	kfree(decoder);
 	return 0;
 }
@@ -1485,11 +1328,9 @@ static const struct tvp514x_reg tvp5146_init_reg_seq[] = {
 	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp5146_init = {
-	.no_regs = ARRAY_SIZE(tvp5146_init_reg_seq),
-	.init_reg_seq = tvp5146_init_reg_seq,
-};
+
 /*
  * TVP5147 Init/Power on Sequence
  */
@@ -1512,22 +1353,18 @@ static const struct tvp514x_reg tvp5147_init_reg_seq[] =	{
 	{TOK_WRITE, REG_VBUS_DATA_ACCESS_NO_VBUS_ADDR_INCR, 0x00},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp5147_init = {
-	.no_regs = ARRAY_SIZE(tvp5147_init_reg_seq),
-	.init_reg_seq = tvp5147_init_reg_seq,
-};
+
 /*
  * TVP5146M2/TVP5147M1 Init/Power on Sequence
  */
 static const struct tvp514x_reg tvp514xm_init_reg_seq[] = {
 	{TOK_WRITE, REG_OPERATION_MODE, 0x01},
 	{TOK_WRITE, REG_OPERATION_MODE, 0x00},
+	{TOK_TERM, 0, 0},
 };
-static const struct tvp514x_init_seq tvp514xm_init = {
-	.no_regs = ARRAY_SIZE(tvp514xm_init_reg_seq),
-	.init_reg_seq = tvp514xm_init_reg_seq,
-};
+
 /*
  * I2C Device Table -
  *
@@ -1535,48 +1372,34 @@ static const struct tvp514x_init_seq tvp514xm_init = {
  * driver_data - Driver data
  */
 static const struct i2c_device_id tvp514x_id[] = {
-	{"tvp5146", (unsigned long)&tvp5146_init},
-	{"tvp5146m2", (unsigned long)&tvp514xm_init},
-	{"tvp5147", (unsigned long)&tvp5147_init},
-	{"tvp5147m1", (unsigned long)&tvp514xm_init},
+	{"tvp5146", (unsigned long)tvp5146_init_reg_seq},
+	{"tvp5146m2", (unsigned long)tvp514xm_init_reg_seq},
+	{"tvp5147", (unsigned long)tvp5147_init_reg_seq},
+	{"tvp5147m1", (unsigned long)tvp514xm_init_reg_seq},
 	{},
 };
 
 MODULE_DEVICE_TABLE(i2c, tvp514x_id);
 
-static struct i2c_driver tvp514x_i2c_driver = {
+static struct i2c_driver tvp514x_driver = {
 	.driver = {
-		   .name = TVP514X_MODULE_NAME,
-		   .owner = THIS_MODULE,
-		   },
+		.owner = THIS_MODULE,
+		.name = TVP514X_MODULE_NAME,
+	},
 	.probe = tvp514x_probe,
-	.remove = __exit_p(tvp514x_remove),
+	.remove = tvp514x_remove,
 	.id_table = tvp514x_id,
 };
 
-/**
- * tvp514x_init
- *
- * Module init function
- */
 static int __init tvp514x_init(void)
 {
-	return i2c_add_driver(&tvp514x_i2c_driver);
+	return i2c_add_driver(&tvp514x_driver);
 }
 
-/**
- * tvp514x_cleanup
- *
- * Module exit function
- */
-static void __exit tvp514x_cleanup(void)
+static void __exit tvp514x_exit(void)
 {
-	i2c_del_driver(&tvp514x_i2c_driver);
+	i2c_del_driver(&tvp514x_driver);
 }
 
 module_init(tvp514x_init);
-module_exit(tvp514x_cleanup);
-
-MODULE_AUTHOR("Texas Instruments");
-MODULE_DESCRIPTION("TVP514X linux decoder driver");
-MODULE_LICENSE("GPL");
+module_exit(tvp514x_exit);
diff --git a/drivers/media/video/tvp514x_regs.h b/drivers/media/video/tvp514x_regs.h
index 351620aeecc2..18f29ad0dfe2 100644
--- a/drivers/media/video/tvp514x_regs.h
+++ b/drivers/media/video/tvp514x_regs.h
@@ -284,14 +284,4 @@ struct tvp514x_reg {
 	u32 val;
 };
 
-/**
- * struct tvp514x_init_seq - Structure for TVP5146/47/46M2/47M1 power up
- *		Sequence.
- * @ no_regs - Number of registers to write for power up sequence.
- * @ init_reg_seq - Array of registers and respective value to write.
- */
-struct tvp514x_init_seq {
-	unsigned int no_regs;
-	const struct tvp514x_reg *init_reg_seq;
-};
 #endif				/* ifndef _TVP514X_REGS_H */
diff --git a/include/media/tvp514x.h b/include/media/tvp514x.h
index 5e7ee968c6dc..74387e83f5b9 100644
--- a/include/media/tvp514x.h
+++ b/include/media/tvp514x.h
@@ -104,10 +104,6 @@ enum tvp514x_output {
  * @ vs_polarity: VSYNC Polarity configuration for current interface.
  */
 struct tvp514x_platform_data {
-	char *master;
-	int (*power_set) (enum v4l2_power on);
-	int (*ifparm) (struct v4l2_ifparm *p);
-	int (*priv_data_set) (void *);
 	/* Interface control params */
 	bool clk_polarity;
 	bool hs_polarity;
-- 
cgit v1.2.3


From 7da8a6cb3e5b60e73b196f1c71031423e0791032 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Mon, 6 Jul 2009 15:04:12 -0300
Subject: V4L/DVB (12248): v4l: vpfe capture bridge driver for DM355 and DM6446

This the vpfe capture bridge driver for doing video
capture on DM355 and DM6446 evms. The ccdc hw modules register with the
driver and are used for configuring the CCD Controller for a specific
decoder interface. The driver also registers the sub devices required
for a specific evm. More than one sub devices can be registered.
This allows driver to switch dynamically to capture video from
any sub device that is registered. Currently only one sub device
(tvp5146) is supported. But in future this driver is expected
to do capture from sensor devices such as Micron's MT9T001, MT9T031
and MT9P031 etc. The driver currently supports MMAP based IO.

Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Alexey Klimov <klimov.linux@gmail.com>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/vpfe_capture.c | 2124 ++++++++++++++++++++++++++++
 include/media/davinci/vpfe_capture.h       |  198 +++
 include/media/davinci/vpfe_types.h         |   51 +
 3 files changed, 2373 insertions(+)
 create mode 100644 drivers/media/video/davinci/vpfe_capture.c
 create mode 100644 include/media/davinci/vpfe_capture.h
 create mode 100644 include/media/davinci/vpfe_types.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/vpfe_capture.c b/drivers/media/video/davinci/vpfe_capture.c
new file mode 100644
index 000000000000..402ce43ef38e
--- /dev/null
+++ b/drivers/media/video/davinci/vpfe_capture.c
@@ -0,0 +1,2124 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * Driver name : VPFE Capture driver
+ *    VPFE Capture driver allows applications to capture and stream video
+ *    frames on DaVinci SoCs (DM6446, DM355 etc) from a YUV source such as
+ *    TVP5146 or  Raw Bayer RGB image data from an image sensor
+ *    such as Microns' MT9T001, MT9T031 etc.
+ *
+ *    These SoCs have, in common, a Video Processing Subsystem (VPSS) that
+ *    consists of a Video Processing Front End (VPFE) for capturing
+ *    video/raw image data and Video Processing Back End (VPBE) for displaying
+ *    YUV data through an in-built analog encoder or Digital LCD port. This
+ *    driver is for capture through VPFE. A typical EVM using these SoCs have
+ *    following high level configuration.
+ *
+ *
+ *    decoder(TVP5146/		YUV/
+ * 	     MT9T001)   -->  Raw Bayer RGB ---> MUX -> VPFE (CCDC/ISIF)
+ *    				data input              |      |
+ *							V      |
+ *						      SDRAM    |
+ *							       V
+ *							   Image Processor
+ *							       |
+ *							       V
+ *							     SDRAM
+ *    The data flow happens from a decoder connected to the VPFE over a
+ *    YUV embedded (BT.656/BT.1120) or separate sync or raw bayer rgb interface
+ *    and to the input of VPFE through an optional MUX (if more inputs are
+ *    to be interfaced on the EVM). The input data is first passed through
+ *    CCDC (CCD Controller, a.k.a Image Sensor Interface, ISIF). The CCDC
+ *    does very little or no processing on YUV data and does pre-process Raw
+ *    Bayer RGB data through modules such as Defect Pixel Correction (DFC)
+ *    Color Space Conversion (CSC), data gain/offset etc. After this, data
+ *    can be written to SDRAM or can be connected to the image processing
+ *    block such as IPIPE (on DM355 only).
+ *
+ *    Features supported
+ *  		- MMAP IO
+ *		- Capture using TVP5146 over BT.656
+ *		- support for interfacing decoders using sub device model
+ *		- Work with DM355 or DM6446 CCDC to do Raw Bayer RGB/YUV
+ *		  data capture to SDRAM.
+ *    TODO list
+ *		- Support multiple REQBUF after open
+ *		- Support for de-allocating buffers through REQBUF
+ *		- Support for Raw Bayer RGB capture
+ *		- Support for chaining Image Processor
+ *		- Support for static allocation of buffers
+ *		- Support for USERPTR IO
+ *		- Support for STREAMON before QBUF
+ *		- Support for control ioctls
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/version.h>
+#include <media/v4l2-common.h>
+#include <linux/io.h>
+#include <media/davinci/vpfe_capture.h>
+#include "ccdc_hw_device.h"
+
+static int debug;
+static u32 numbuffers = 3;
+static u32 bufsize = (720 * 576 * 2);
+
+module_param(numbuffers, uint, S_IRUGO);
+module_param(bufsize, uint, S_IRUGO);
+module_param(debug, int, 0644);
+
+MODULE_PARM_DESC(numbuffers, "buffer count (default:3)");
+MODULE_PARM_DESC(bufsize, "buffer size in bytes (default:720 x 576 x 2)");
+MODULE_PARM_DESC(debug, "Debug level 0-1");
+
+MODULE_DESCRIPTION("VPFE Video for Linux Capture Driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Texas Instruments");
+
+/* standard information */
+struct vpfe_standard {
+	v4l2_std_id std_id;
+	unsigned int width;
+	unsigned int height;
+	struct v4l2_fract pixelaspect;
+	/* 0 - progressive, 1 - interlaced */
+	int frame_format;
+};
+
+/* ccdc configuration */
+struct ccdc_config {
+	/* This make sure vpfe is probed and ready to go */
+	int vpfe_probed;
+	/* name of ccdc device */
+	char name[32];
+	/* for storing mem maps for CCDC */
+	int ccdc_addr_size;
+	void *__iomem ccdc_addr;
+};
+
+/* data structures */
+static struct vpfe_config_params config_params = {
+	.min_numbuffers = 3,
+	.numbuffers = 3,
+	.min_bufsize = 720 * 480 * 2,
+	.device_bufsize = 720 * 576 * 2,
+};
+
+/* ccdc device registered */
+static struct ccdc_hw_device *ccdc_dev;
+/* lock for accessing ccdc information */
+static DEFINE_MUTEX(ccdc_lock);
+/* ccdc configuration */
+static struct ccdc_config *ccdc_cfg;
+
+const struct vpfe_standard vpfe_standards[] = {
+	{V4L2_STD_525_60, 720, 480, {11, 10}, 1},
+	{V4L2_STD_625_50, 720, 576, {54, 59}, 1},
+};
+
+/* Used when raw Bayer image from ccdc is directly captured to SDRAM */
+static const struct vpfe_pixel_format vpfe_pix_fmts[] = {
+	{
+		.fmtdesc = {
+			.index = 0,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb 8bit A-Law compr.",
+			.pixelformat = V4L2_PIX_FMT_SBGGR8,
+		},
+		.bpp = 1,
+	},
+	{
+		.fmtdesc = {
+			.index = 1,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb - 16bit",
+			.pixelformat = V4L2_PIX_FMT_SBGGR16,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 2,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Bayer GrRBGb 8bit DPCM compr.",
+			.pixelformat = V4L2_PIX_FMT_SGRBG10DPCM8,
+		},
+		.bpp = 1,
+	},
+	{
+		.fmtdesc = {
+			.index = 3,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "YCbCr 4:2:2 Interleaved UYVY",
+			.pixelformat = V4L2_PIX_FMT_UYVY,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 4,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "YCbCr 4:2:2 Interleaved YUYV",
+			.pixelformat = V4L2_PIX_FMT_YUYV,
+		},
+		.bpp = 2,
+	},
+	{
+		.fmtdesc = {
+			.index = 5,
+			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+			.description = "Y/CbCr 4:2:0 - Semi planar",
+			.pixelformat = V4L2_PIX_FMT_NV12,
+		},
+		.bpp = 1,
+	},
+};
+
+/*
+ * vpfe_lookup_pix_format()
+ * lookup an entry in the vpfe pix format table based on pix_format
+ */
+static const struct vpfe_pixel_format *vpfe_lookup_pix_format(u32 pix_format)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe_pix_fmts); i++) {
+		if (pix_format == vpfe_pix_fmts[i].fmtdesc.pixelformat)
+			return &vpfe_pix_fmts[i];
+	}
+	return NULL;
+}
+
+/*
+ * vpfe_register_ccdc_device. CCDC module calls this to
+ * register with vpfe capture
+ */
+int vpfe_register_ccdc_device(struct ccdc_hw_device *dev)
+{
+	int ret = 0;
+	printk(KERN_NOTICE "vpfe_register_ccdc_device: %s\n", dev->name);
+
+	BUG_ON(!dev->hw_ops.open);
+	BUG_ON(!dev->hw_ops.enable);
+	BUG_ON(!dev->hw_ops.set_hw_if_params);
+	BUG_ON(!dev->hw_ops.configure);
+	BUG_ON(!dev->hw_ops.set_buftype);
+	BUG_ON(!dev->hw_ops.get_buftype);
+	BUG_ON(!dev->hw_ops.enum_pix);
+	BUG_ON(!dev->hw_ops.set_frame_format);
+	BUG_ON(!dev->hw_ops.get_frame_format);
+	BUG_ON(!dev->hw_ops.get_pixel_format);
+	BUG_ON(!dev->hw_ops.set_pixel_format);
+	BUG_ON(!dev->hw_ops.set_params);
+	BUG_ON(!dev->hw_ops.set_image_window);
+	BUG_ON(!dev->hw_ops.get_image_window);
+	BUG_ON(!dev->hw_ops.get_line_length);
+	BUG_ON(!dev->hw_ops.setfbaddr);
+	BUG_ON(!dev->hw_ops.getfid);
+
+	mutex_lock(&ccdc_lock);
+	if (NULL == ccdc_cfg) {
+		/*
+		 * TODO. Will this ever happen? if so, we need to fix it.
+		 * Proabably we need to add the request to a linked list and
+		 * walk through it during vpfe probe
+		 */
+		printk(KERN_ERR "vpfe capture not initialized\n");
+		ret = -1;
+		goto unlock;
+	}
+
+	if (strcmp(dev->name, ccdc_cfg->name)) {
+		/* ignore this ccdc */
+		ret = -1;
+		goto unlock;
+	}
+
+	if (ccdc_dev) {
+		printk(KERN_ERR "ccdc already registered\n");
+		ret = -1;
+		goto unlock;
+	}
+
+	ccdc_dev = dev;
+	dev->hw_ops.set_ccdc_base(ccdc_cfg->ccdc_addr,
+				  ccdc_cfg->ccdc_addr_size);
+unlock:
+	mutex_unlock(&ccdc_lock);
+	return ret;
+}
+EXPORT_SYMBOL(vpfe_register_ccdc_device);
+
+/*
+ * vpfe_unregister_ccdc_device. CCDC module calls this to
+ * unregister with vpfe capture
+ */
+void vpfe_unregister_ccdc_device(struct ccdc_hw_device *dev)
+{
+	if (NULL == dev) {
+		printk(KERN_ERR "invalid ccdc device ptr\n");
+		return;
+	}
+
+	printk(KERN_NOTICE "vpfe_unregister_ccdc_device, dev->name = %s\n",
+		dev->name);
+
+	if (strcmp(dev->name, ccdc_cfg->name)) {
+		/* ignore this ccdc */
+		return;
+	}
+
+	mutex_lock(&ccdc_lock);
+	ccdc_dev = NULL;
+	mutex_unlock(&ccdc_lock);
+	return;
+}
+EXPORT_SYMBOL(vpfe_unregister_ccdc_device);
+
+/*
+ * vpfe_get_ccdc_image_format - Get image parameters based on CCDC settings
+ */
+static int vpfe_get_ccdc_image_format(struct vpfe_device *vpfe_dev,
+				 struct v4l2_format *f)
+{
+	struct v4l2_rect image_win;
+	enum ccdc_buftype buf_type;
+	enum ccdc_frmfmt frm_fmt;
+
+	memset(f, 0, sizeof(*f));
+	f->type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+	ccdc_dev->hw_ops.get_image_window(&image_win);
+	f->fmt.pix.width = image_win.width;
+	f->fmt.pix.height = image_win.height;
+	f->fmt.pix.bytesperline = ccdc_dev->hw_ops.get_line_length();
+	f->fmt.pix.sizeimage = f->fmt.pix.bytesperline *
+				f->fmt.pix.height;
+	buf_type = ccdc_dev->hw_ops.get_buftype();
+	f->fmt.pix.pixelformat = ccdc_dev->hw_ops.get_pixel_format();
+	frm_fmt = ccdc_dev->hw_ops.get_frame_format();
+	if (frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		f->fmt.pix.field = V4L2_FIELD_NONE;
+	else if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
+			f->fmt.pix.field = V4L2_FIELD_INTERLACED;
+		else if (buf_type == CCDC_BUFTYPE_FLD_SEPARATED)
+			f->fmt.pix.field = V4L2_FIELD_SEQ_TB;
+		else {
+			v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf_type\n");
+			return -EINVAL;
+		}
+	} else {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid frm_fmt\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * vpfe_config_ccdc_image_format()
+ * For a pix format, configure ccdc to setup the capture
+ */
+static int vpfe_config_ccdc_image_format(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frm_fmt = CCDC_FRMFMT_INTERLACED;
+	int ret = 0;
+
+	if (ccdc_dev->hw_ops.set_pixel_format(
+			vpfe_dev->fmt.fmt.pix.pixelformat) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"couldn't set pix format in ccdc\n");
+		return -EINVAL;
+	}
+	/* configure the image window */
+	ccdc_dev->hw_ops.set_image_window(&vpfe_dev->crop);
+
+	switch (vpfe_dev->fmt.fmt.pix.field) {
+	case V4L2_FIELD_INTERLACED:
+		/* do nothing, since it is default */
+		ret = ccdc_dev->hw_ops.set_buftype(
+				CCDC_BUFTYPE_FLD_INTERLEAVED);
+		break;
+	case V4L2_FIELD_NONE:
+		frm_fmt = CCDC_FRMFMT_PROGRESSIVE;
+		/* buffer type only applicable for interlaced scan */
+		break;
+	case V4L2_FIELD_SEQ_TB:
+		ret = ccdc_dev->hw_ops.set_buftype(
+				CCDC_BUFTYPE_FLD_SEPARATED);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* set the frame format */
+	if (!ret)
+		ret = ccdc_dev->hw_ops.set_frame_format(frm_fmt);
+	return ret;
+}
+/*
+ * vpfe_config_image_format()
+ * For a given standard, this functions sets up the default
+ * pix format & crop values in the vpfe device and ccdc.  It first
+ * starts with defaults based values from the standard table.
+ * It then checks if sub device support g_fmt and then override the
+ * values based on that.Sets crop values to match with scan resolution
+ * starting at 0,0. It calls vpfe_config_ccdc_image_format() set the
+ * values in ccdc
+ */
+static int vpfe_config_image_format(struct vpfe_device *vpfe_dev,
+				    const v4l2_std_id *std_id)
+{
+	struct vpfe_subdev_info *sdinfo = vpfe_dev->current_subdev;
+	int i, ret = 0;
+
+	for (i = 0; i < ARRAY_SIZE(vpfe_standards); i++) {
+		if (vpfe_standards[i].std_id & *std_id) {
+			vpfe_dev->std_info.active_pixels =
+					vpfe_standards[i].width;
+			vpfe_dev->std_info.active_lines =
+					vpfe_standards[i].height;
+			vpfe_dev->std_info.frame_format =
+					vpfe_standards[i].frame_format;
+			vpfe_dev->std_index = i;
+			break;
+		}
+	}
+
+	if (i ==  ARRAY_SIZE(vpfe_standards)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "standard not supported\n");
+		return -EINVAL;
+	}
+
+	vpfe_dev->crop.top = 0;
+	vpfe_dev->crop.left = 0;
+	vpfe_dev->crop.width = vpfe_dev->std_info.active_pixels;
+	vpfe_dev->crop.height = vpfe_dev->std_info.active_lines;
+	vpfe_dev->fmt.fmt.pix.width = vpfe_dev->crop.width;
+	vpfe_dev->fmt.fmt.pix.height = vpfe_dev->crop.height;
+
+	/* first field and frame format based on standard frame format */
+	if (vpfe_dev->std_info.frame_format) {
+		vpfe_dev->fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
+		/* assume V4L2_PIX_FMT_UYVY as default */
+		vpfe_dev->fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_UYVY;
+	} else {
+		vpfe_dev->fmt.fmt.pix.field = V4L2_FIELD_NONE;
+		/* assume V4L2_PIX_FMT_SBGGR8 */
+		vpfe_dev->fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_SBGGR8;
+	}
+
+	/* if sub device supports g_fmt, override the defaults */
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev,
+			sdinfo->grp_id, video, g_fmt, &vpfe_dev->fmt);
+
+	if (ret && ret != -ENOIOCTLCMD) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"error in getting g_fmt from sub device\n");
+		return ret;
+	}
+
+	/* Sets the values in CCDC */
+	ret = vpfe_config_ccdc_image_format(vpfe_dev);
+	if (ret)
+		return ret;
+
+	/* Update the values of sizeimage and bytesperline */
+	if (!ret) {
+		vpfe_dev->fmt.fmt.pix.bytesperline =
+			ccdc_dev->hw_ops.get_line_length();
+		vpfe_dev->fmt.fmt.pix.sizeimage =
+			vpfe_dev->fmt.fmt.pix.bytesperline *
+			vpfe_dev->fmt.fmt.pix.height;
+	}
+	return ret;
+}
+
+static int vpfe_initialize_device(struct vpfe_device *vpfe_dev)
+{
+	int ret = 0;
+
+	/* set first input of current subdevice as the current input */
+	vpfe_dev->current_input = 0;
+
+	/* set default standard */
+	vpfe_dev->std_index = 0;
+
+	/* Configure the default format information */
+	ret = vpfe_config_image_format(vpfe_dev,
+				&vpfe_standards[vpfe_dev->std_index].std_id);
+	if (ret)
+		return ret;
+
+	/* now open the ccdc device to initialize it */
+	mutex_lock(&ccdc_lock);
+	if (NULL == ccdc_dev) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "ccdc device not registered\n");
+		ret = -ENODEV;
+		goto unlock;
+	}
+
+	if (!try_module_get(ccdc_dev->owner)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Couldn't lock ccdc module\n");
+		ret = -ENODEV;
+		goto unlock;
+	}
+	ret = ccdc_dev->hw_ops.open(vpfe_dev->pdev);
+	if (!ret)
+		vpfe_dev->initialized = 1;
+unlock:
+	mutex_unlock(&ccdc_lock);
+	return ret;
+}
+
+/*
+ * vpfe_open : It creates object of file handle structure and
+ * stores it in private_data  member of filepointer
+ */
+static int vpfe_open(struct file *file)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_open\n");
+
+	if (!vpfe_dev->cfg->num_subdevs) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "No decoder registered\n");
+		return -ENODEV;
+	}
+
+	/* Allocate memory for the file handle object */
+	fh = kmalloc(sizeof(struct vpfe_fh), GFP_KERNEL);
+	if (NULL == fh) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"unable to allocate memory for file handle object\n");
+		return -ENOMEM;
+	}
+	/* store pointer to fh in private_data member of file */
+	file->private_data = fh;
+	fh->vpfe_dev = vpfe_dev;
+	mutex_lock(&vpfe_dev->lock);
+	/* If decoder is not initialized. initialize it */
+	if (!vpfe_dev->initialized) {
+		if (vpfe_initialize_device(vpfe_dev)) {
+			mutex_unlock(&vpfe_dev->lock);
+			return -ENODEV;
+		}
+	}
+	/* Increment device usrs counter */
+	vpfe_dev->usrs++;
+	/* Set io_allowed member to false */
+	fh->io_allowed = 0;
+	/* Initialize priority of this instance to default priority */
+	fh->prio = V4L2_PRIORITY_UNSET;
+	v4l2_prio_open(&vpfe_dev->prio, &fh->prio);
+	mutex_unlock(&vpfe_dev->lock);
+	return 0;
+}
+
+static void vpfe_schedule_next_buffer(struct vpfe_device *vpfe_dev)
+{
+	unsigned long addr;
+
+	vpfe_dev->next_frm = list_entry(vpfe_dev->dma_queue.next,
+					struct videobuf_buffer, queue);
+	list_del(&vpfe_dev->next_frm->queue);
+	vpfe_dev->next_frm->state = VIDEOBUF_ACTIVE;
+	addr = videobuf_to_dma_contig(vpfe_dev->next_frm);
+	ccdc_dev->hw_ops.setfbaddr(addr);
+}
+
+static void vpfe_process_buffer_complete(struct vpfe_device *vpfe_dev)
+{
+	struct timeval timevalue;
+
+	do_gettimeofday(&timevalue);
+	vpfe_dev->cur_frm->ts = timevalue;
+	vpfe_dev->cur_frm->state = VIDEOBUF_DONE;
+	vpfe_dev->cur_frm->size = vpfe_dev->fmt.fmt.pix.sizeimage;
+	wake_up_interruptible(&vpfe_dev->cur_frm->done);
+	vpfe_dev->cur_frm = vpfe_dev->next_frm;
+}
+
+/* ISR for VINT0*/
+static irqreturn_t vpfe_isr(int irq, void *dev_id)
+{
+	struct vpfe_device *vpfe_dev = dev_id;
+	enum v4l2_field field;
+	unsigned long addr;
+	int fid;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "\nStarting vpfe_isr...\n");
+	field = vpfe_dev->fmt.fmt.pix.field;
+
+	/* if streaming not started, don't do anything */
+	if (!vpfe_dev->started)
+		return IRQ_HANDLED;
+
+	/* only for 6446 this will be applicable */
+	if (NULL != ccdc_dev->hw_ops.reset)
+		ccdc_dev->hw_ops.reset();
+
+	if (field == V4L2_FIELD_NONE) {
+		/* handle progressive frame capture */
+		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+			"frame format is progressive...\n");
+		if (vpfe_dev->cur_frm != vpfe_dev->next_frm)
+			vpfe_process_buffer_complete(vpfe_dev);
+		return IRQ_HANDLED;
+	}
+
+	/* interlaced or TB capture check which field we are in hardware */
+	fid = ccdc_dev->hw_ops.getfid();
+
+	/* switch the software maintained field id */
+	vpfe_dev->field_id ^= 1;
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "field id = %x:%x.\n",
+		fid, vpfe_dev->field_id);
+	if (fid == vpfe_dev->field_id) {
+		/* we are in-sync here,continue */
+		if (fid == 0) {
+			/*
+			 * One frame is just being captured. If the next frame
+			 * is available, release the current frame and move on
+			 */
+			if (vpfe_dev->cur_frm != vpfe_dev->next_frm)
+				vpfe_process_buffer_complete(vpfe_dev);
+			/*
+			 * based on whether the two fields are stored
+			 * interleavely or separately in memory, reconfigure
+			 * the CCDC memory address
+			 */
+			if (field == V4L2_FIELD_SEQ_TB) {
+				addr =
+				  videobuf_to_dma_contig(vpfe_dev->cur_frm);
+				addr += vpfe_dev->field_off;
+				ccdc_dev->hw_ops.setfbaddr(addr);
+			}
+			return IRQ_HANDLED;
+		}
+		/*
+		 * if one field is just being captured configure
+		 * the next frame get the next frame from the empty
+		 * queue if no frame is available hold on to the
+		 * current buffer
+		 */
+		spin_lock(&vpfe_dev->dma_queue_lock);
+		if (!list_empty(&vpfe_dev->dma_queue) &&
+		    vpfe_dev->cur_frm == vpfe_dev->next_frm)
+			vpfe_schedule_next_buffer(vpfe_dev);
+		spin_unlock(&vpfe_dev->dma_queue_lock);
+	} else if (fid == 0) {
+		/*
+		 * out of sync. Recover from any hardware out-of-sync.
+		 * May loose one frame
+		 */
+		vpfe_dev->field_id = fid;
+	}
+	return IRQ_HANDLED;
+}
+
+/* vdint1_isr - isr handler for VINT1 interrupt */
+static irqreturn_t vdint1_isr(int irq, void *dev_id)
+{
+	struct vpfe_device *vpfe_dev = dev_id;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "\nInside vdint1_isr...\n");
+
+	/* if streaming not started, don't do anything */
+	if (!vpfe_dev->started)
+		return IRQ_HANDLED;
+
+	spin_lock(&vpfe_dev->dma_queue_lock);
+	if ((vpfe_dev->fmt.fmt.pix.field == V4L2_FIELD_NONE) &&
+	    !list_empty(&vpfe_dev->dma_queue) &&
+	    vpfe_dev->cur_frm == vpfe_dev->next_frm)
+		vpfe_schedule_next_buffer(vpfe_dev);
+	spin_unlock(&vpfe_dev->dma_queue_lock);
+	return IRQ_HANDLED;
+}
+
+static void vpfe_detach_irq(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = ccdc_dev->hw_ops.get_frame_format();
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE)
+		free_irq(IRQ_VDINT1, vpfe_dev);
+}
+
+static int vpfe_attach_irq(struct vpfe_device *vpfe_dev)
+{
+	enum ccdc_frmfmt frame_format;
+
+	frame_format = ccdc_dev->hw_ops.get_frame_format();
+	if (frame_format == CCDC_FRMFMT_PROGRESSIVE) {
+		return request_irq(vpfe_dev->ccdc_irq1, vdint1_isr,
+				    IRQF_DISABLED, "vpfe_capture1",
+				    vpfe_dev);
+	}
+	return 0;
+}
+
+/* vpfe_stop_ccdc_capture: stop streaming in ccdc/isif */
+static void vpfe_stop_ccdc_capture(struct vpfe_device *vpfe_dev)
+{
+	vpfe_dev->started = 0;
+	ccdc_dev->hw_ops.enable(0);
+	if (ccdc_dev->hw_ops.enable_out_to_sdram)
+		ccdc_dev->hw_ops.enable_out_to_sdram(0);
+}
+
+/*
+ * vpfe_release : This function deletes buffer queue, frees the
+ * buffers and the vpfe file  handle
+ */
+static int vpfe_release(struct file *file)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	int ret;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_release\n");
+
+	/* Get the device lock */
+	mutex_lock(&vpfe_dev->lock);
+	/* if this instance is doing IO */
+	if (fh->io_allowed) {
+		if (vpfe_dev->started) {
+			sdinfo = vpfe_dev->current_subdev;
+			ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev,
+							 sdinfo->grp_id,
+							 video, s_stream, 0);
+			if (ret && (ret != -ENOIOCTLCMD))
+				v4l2_err(&vpfe_dev->v4l2_dev,
+				"stream off failed in subdev\n");
+			vpfe_stop_ccdc_capture(vpfe_dev);
+			vpfe_detach_irq(vpfe_dev);
+			videobuf_streamoff(&vpfe_dev->buffer_queue);
+		}
+		vpfe_dev->io_usrs = 0;
+		vpfe_dev->numbuffers = config_params.numbuffers;
+	}
+
+	/* Decrement device usrs counter */
+	vpfe_dev->usrs--;
+	/* Close the priority */
+	v4l2_prio_close(&vpfe_dev->prio, &fh->prio);
+	/* If this is the last file handle */
+	if (!vpfe_dev->usrs) {
+		vpfe_dev->initialized = 0;
+		if (ccdc_dev->hw_ops.close)
+			ccdc_dev->hw_ops.close(vpfe_dev->pdev);
+		module_put(ccdc_dev->owner);
+	}
+	mutex_unlock(&vpfe_dev->lock);
+	file->private_data = NULL;
+	/* Free memory allocated to file handle object */
+	kfree(fh);
+	return 0;
+}
+
+/*
+ * vpfe_mmap : It is used to map kernel space buffers
+ * into user spaces
+ */
+static int vpfe_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	/* Get the device object and file handle object */
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_mmap\n");
+
+	return videobuf_mmap_mapper(&vpfe_dev->buffer_queue, vma);
+}
+
+/*
+ * vpfe_poll: It is used for select/poll system call
+ */
+static unsigned int vpfe_poll(struct file *file, poll_table *wait)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_poll\n");
+
+	if (vpfe_dev->started)
+		return videobuf_poll_stream(file,
+					    &vpfe_dev->buffer_queue, wait);
+	return 0;
+}
+
+/* vpfe capture driver file operations */
+static const struct v4l2_file_operations vpfe_fops = {
+	.owner = THIS_MODULE,
+	.open = vpfe_open,
+	.release = vpfe_release,
+	.unlocked_ioctl = video_ioctl2,
+	.mmap = vpfe_mmap,
+	.poll = vpfe_poll
+};
+
+/*
+ * vpfe_check_format()
+ * This function adjust the input pixel format as per hardware
+ * capabilities and update the same in pixfmt.
+ * Following algorithm used :-
+ *
+ *	If given pixformat is not in the vpfe list of pix formats or not
+ *	supported by the hardware, current value of pixformat in the device
+ *	is used
+ *	If given field is not supported, then current field is used. If field
+ *	is different from current, then it is matched with that from sub device.
+ *	Minimum height is 2 lines for interlaced or tb field and 1 line for
+ *	progressive. Maximum height is clamped to active active lines of scan
+ *	Minimum width is 32 bytes in memory and width is clamped to active
+ *	pixels of scan.
+ *	bytesperline is a multiple of 32.
+ */
+static const struct vpfe_pixel_format *
+	vpfe_check_format(struct vpfe_device *vpfe_dev,
+			  struct v4l2_pix_format *pixfmt)
+{
+	u32 min_height = 1, min_width = 32, max_width, max_height;
+	const struct vpfe_pixel_format *vpfe_pix_fmt;
+	u32 pix;
+	int temp, found;
+
+	vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	if (NULL == vpfe_pix_fmt) {
+		/*
+		 * use current pixel format in the vpfe device. We
+		 * will find this pix format in the table
+		 */
+		pixfmt->pixelformat = vpfe_dev->fmt.fmt.pix.pixelformat;
+		vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	}
+
+	/* check if hw supports it */
+	temp = 0;
+	found = 0;
+	while (ccdc_dev->hw_ops.enum_pix(&pix, temp) >= 0) {
+		if (vpfe_pix_fmt->fmtdesc.pixelformat == pix) {
+			found = 1;
+			break;
+		}
+		temp++;
+	}
+
+	if (!found) {
+		/* use current pixel format */
+		pixfmt->pixelformat = vpfe_dev->fmt.fmt.pix.pixelformat;
+		/*
+		 * Since this is currently used in the vpfe device, we
+		 * will find this pix format in the table
+		 */
+		vpfe_pix_fmt = vpfe_lookup_pix_format(pixfmt->pixelformat);
+	}
+
+	/* check what field format is supported */
+	if (pixfmt->field == V4L2_FIELD_ANY) {
+		/* if field is any, use current value as default */
+		pixfmt->field = vpfe_dev->fmt.fmt.pix.field;
+	}
+
+	/*
+	 * if field is not same as current field in the vpfe device
+	 * try matching the field with the sub device field
+	 */
+	if (vpfe_dev->fmt.fmt.pix.field != pixfmt->field) {
+		/*
+		 * If field value is not in the supported fields, use current
+		 * field used in the device as default
+		 */
+		switch (pixfmt->field) {
+		case V4L2_FIELD_INTERLACED:
+		case V4L2_FIELD_SEQ_TB:
+			/* if sub device is supporting progressive, use that */
+			if (!vpfe_dev->std_info.frame_format)
+				pixfmt->field = V4L2_FIELD_NONE;
+			break;
+		case V4L2_FIELD_NONE:
+			if (vpfe_dev->std_info.frame_format)
+				pixfmt->field = V4L2_FIELD_INTERLACED;
+			break;
+
+		default:
+			/* use current field as default */
+			pixfmt->field = vpfe_dev->fmt.fmt.pix.field;
+			break;
+		}
+	}
+
+	/* Now adjust image resolutions supported */
+	if (pixfmt->field == V4L2_FIELD_INTERLACED ||
+	    pixfmt->field == V4L2_FIELD_SEQ_TB)
+		min_height = 2;
+
+	max_width = vpfe_dev->std_info.active_pixels;
+	max_height = vpfe_dev->std_info.active_lines;
+	min_width /= vpfe_pix_fmt->bpp;
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "width = %d, height = %d, bpp = %d\n",
+		  pixfmt->width, pixfmt->height, vpfe_pix_fmt->bpp);
+
+	pixfmt->width = clamp((pixfmt->width), min_width, max_width);
+	pixfmt->height = clamp((pixfmt->height), min_height, max_height);
+
+	/* If interlaced, adjust height to be a multiple of 2 */
+	if (pixfmt->field == V4L2_FIELD_INTERLACED)
+		pixfmt->height &= (~1);
+	/*
+	 * recalculate bytesperline and sizeimage since width
+	 * and height might have changed
+	 */
+	pixfmt->bytesperline = (((pixfmt->width * vpfe_pix_fmt->bpp) + 31)
+				& ~31);
+	if (pixfmt->pixelformat == V4L2_PIX_FMT_NV12)
+		pixfmt->sizeimage =
+			pixfmt->bytesperline * pixfmt->height +
+			((pixfmt->bytesperline * pixfmt->height) >> 1);
+	else
+		pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "adjusted width = %d, height ="
+		 " %d, bpp = %d, bytesperline = %d, sizeimage = %d\n",
+		 pixfmt->width, pixfmt->height, vpfe_pix_fmt->bpp,
+		 pixfmt->bytesperline, pixfmt->sizeimage);
+	return vpfe_pix_fmt;
+}
+
+static int vpfe_querycap(struct file *file, void  *priv,
+			       struct v4l2_capability *cap)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querycap\n");
+
+	cap->version = VPFE_CAPTURE_VERSION_CODE;
+	cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_STREAMING;
+	strlcpy(cap->driver, CAPTURE_DRV_NAME, sizeof(cap->driver));
+	strlcpy(cap->bus_info, "VPFE", sizeof(cap->bus_info));
+	strlcpy(cap->card, vpfe_dev->cfg->card_name, sizeof(cap->card));
+	return 0;
+}
+
+static int vpfe_g_fmt_vid_cap(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_fmt_vid_cap\n");
+	/* Fill in the information about format */
+	*fmt = vpfe_dev->fmt;
+	return ret;
+}
+
+static int vpfe_enum_fmt_vid_cap(struct file *file, void  *priv,
+				   struct v4l2_fmtdesc *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmt;
+	int temp_index;
+	u32 pix;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_enum_fmt_vid_cap\n");
+
+	if (ccdc_dev->hw_ops.enum_pix(&pix, fmt->index) < 0)
+		return -EINVAL;
+
+	/* Fill in the information about format */
+	pix_fmt = vpfe_lookup_pix_format(pix);
+	if (NULL != pix_fmt) {
+		temp_index = fmt->index;
+		*fmt = pix_fmt->fmtdesc;
+		fmt->index = temp_index;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int vpfe_s_fmt_vid_cap(struct file *file, void *priv,
+				struct v4l2_format *fmt)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmts;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_fmt_vid_cap\n");
+
+	/* If streaming is started, return error */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Streaming is started\n");
+		return -EBUSY;
+	}
+
+	/* Check for valid frame format */
+	pix_fmts = vpfe_check_format(vpfe_dev, &fmt->fmt.pix);
+
+	if (NULL == pix_fmts)
+		return -EINVAL;
+
+	/* store the pixel format in the device  object */
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	/* First detach any IRQ if currently attached */
+	vpfe_detach_irq(vpfe_dev);
+	vpfe_dev->fmt = *fmt;
+	/* set image capture parameters in the ccdc */
+	ret = vpfe_config_ccdc_image_format(vpfe_dev);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_try_fmt_vid_cap(struct file *file, void *priv,
+				  struct v4l2_format *f)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	const struct vpfe_pixel_format *pix_fmts;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_try_fmt_vid_cap\n");
+
+	pix_fmts = vpfe_check_format(vpfe_dev, &f->fmt.pix);
+	if (NULL == pix_fmts)
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * vpfe_get_subdev_input_index - Get subdev index and subdev input index for a
+ * given app input index
+ */
+static int vpfe_get_subdev_input_index(struct vpfe_device *vpfe_dev,
+					int *subdev_index,
+					int *subdev_input_index,
+					int app_input_index)
+{
+	struct vpfe_config *cfg = vpfe_dev->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < cfg->num_subdevs; i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (app_input_index < (j + sdinfo->num_inputs)) {
+			*subdev_index = i;
+			*subdev_input_index = app_input_index - j;
+			return 0;
+		}
+		j += sdinfo->num_inputs;
+	}
+	return -EINVAL;
+}
+
+/*
+ * vpfe_get_app_input - Get app input index for a given subdev input index
+ * driver stores the input index of the current sub device and translate it
+ * when application request the current input
+ */
+static int vpfe_get_app_input_index(struct vpfe_device *vpfe_dev,
+				    int *app_input_index)
+{
+	struct vpfe_config *cfg = vpfe_dev->cfg;
+	struct vpfe_subdev_info *sdinfo;
+	int i, j = 0;
+
+	for (i = 0; i < cfg->num_subdevs; i++) {
+		sdinfo = &cfg->sub_devs[i];
+		if (!strcmp(sdinfo->name, vpfe_dev->current_subdev->name)) {
+			if (vpfe_dev->current_input >= sdinfo->num_inputs)
+				return -1;
+			*app_input_index = j + vpfe_dev->current_input;
+			return 0;
+		}
+		j += sdinfo->num_inputs;
+	}
+	return -EINVAL;
+}
+
+static int vpfe_enum_input(struct file *file, void *priv,
+				 struct v4l2_input *inp)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int subdev, index ;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_enum_input\n");
+
+	if (vpfe_get_subdev_input_index(vpfe_dev,
+					&subdev,
+					&index,
+					inp->index) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "input information not found"
+			 " for the subdev\n");
+		return -EINVAL;
+	}
+	sdinfo = &vpfe_dev->cfg->sub_devs[subdev];
+	memcpy(inp, &sdinfo->inputs[index], sizeof(struct v4l2_input));
+	return 0;
+}
+
+static int vpfe_g_input(struct file *file, void *priv, unsigned int *index)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_input\n");
+
+	return vpfe_get_app_input_index(vpfe_dev, index);
+}
+
+
+static int vpfe_s_input(struct file *file, void *priv, unsigned int index)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int subdev_index, inp_index;
+	struct vpfe_route *route;
+	u32 input = 0, output = 0;
+	int ret = -EINVAL;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_input\n");
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	/*
+	 * If streaming is started return device busy
+	 * error
+	 */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Streaming is on\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	if (vpfe_get_subdev_input_index(vpfe_dev,
+					&subdev_index,
+					&inp_index,
+					index) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "invalid input index\n");
+		goto unlock_out;
+	}
+
+	sdinfo = &vpfe_dev->cfg->sub_devs[subdev_index];
+	route = &sdinfo->routes[inp_index];
+	if (route && sdinfo->can_route) {
+		input = route->input;
+		output = route->output;
+	}
+
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 video, s_routing, input, output, 0);
+
+	if (ret) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"vpfe_doioctl:error in setting input in decoder\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	vpfe_dev->current_subdev = sdinfo;
+	vpfe_dev->current_input = index;
+	vpfe_dev->std_index = 0;
+
+	/* set the bus/interface parameter for the sub device in ccdc */
+	ret = ccdc_dev->hw_ops.set_hw_if_params(&sdinfo->ccdc_if_params);
+	if (ret)
+		goto unlock_out;
+
+	/* set the default image parameters in the device */
+	ret = vpfe_config_image_format(vpfe_dev,
+				&vpfe_standards[vpfe_dev->std_index].std_id);
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_querystd(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querystd\n");
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	sdinfo = vpfe_dev->current_subdev;
+	if (ret)
+		return ret;
+	/* Call querystd function of decoder device */
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 video, querystd, std_id);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_s_std(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_std\n");
+
+	/* Call decoder driver function to set the standard */
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	sdinfo = vpfe_dev->current_subdev;
+	/* If streaming is started, return device busy error */
+	if (vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "streaming is started\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					 core, s_std, *std_id);
+	if (ret < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Failed to set standard\n");
+		goto unlock_out;
+	}
+	ret = vpfe_config_image_format(vpfe_dev, std_id);
+
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_g_std(struct file *file, void *priv, v4l2_std_id *std_id)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_std\n");
+
+	*std_id = vpfe_standards[vpfe_dev->std_index].std_id;
+	return 0;
+}
+/*
+ *  Videobuf operations
+ */
+static int vpfe_videobuf_setup(struct videobuf_queue *vq,
+				unsigned int *count,
+				unsigned int *size)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_setup\n");
+	*size = config_params.device_bufsize;
+
+	if (*count < config_params.min_numbuffers)
+		*count = config_params.min_numbuffers;
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"count=%d, size=%d\n", *count, *size);
+	return 0;
+}
+
+static int vpfe_videobuf_prepare(struct videobuf_queue *vq,
+				struct videobuf_buffer *vb,
+				enum v4l2_field field)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_prepare\n");
+
+	/* If buffer is not initialized, initialize it */
+	if (VIDEOBUF_NEEDS_INIT == vb->state) {
+		vb->width = vpfe_dev->fmt.fmt.pix.width;
+		vb->height = vpfe_dev->fmt.fmt.pix.height;
+		vb->size = vpfe_dev->fmt.fmt.pix.sizeimage;
+		vb->field = field;
+	}
+	vb->state = VIDEOBUF_PREPARED;
+	return 0;
+}
+
+static void vpfe_videobuf_queue(struct videobuf_queue *vq,
+				struct videobuf_buffer *vb)
+{
+	/* Get the file handle object and device object */
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+	unsigned long flags;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_buffer_queue\n");
+
+	/* add the buffer to the DMA queue */
+	spin_lock_irqsave(&vpfe_dev->dma_queue_lock, flags);
+	list_add_tail(&vb->queue, &vpfe_dev->dma_queue);
+	spin_unlock_irqrestore(&vpfe_dev->dma_queue_lock, flags);
+
+	/* Change state of the buffer */
+	vb->state = VIDEOBUF_QUEUED;
+}
+
+static void vpfe_videobuf_release(struct videobuf_queue *vq,
+				  struct videobuf_buffer *vb)
+{
+	struct vpfe_fh *fh = vq->priv_data;
+	struct vpfe_device *vpfe_dev = fh->vpfe_dev;
+	unsigned long flags;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_videobuf_release\n");
+
+	/*
+	 * We need to flush the buffer from the dma queue since
+	 * they are de-allocated
+	 */
+	spin_lock_irqsave(&vpfe_dev->dma_queue_lock, flags);
+	INIT_LIST_HEAD(&vpfe_dev->dma_queue);
+	spin_unlock_irqrestore(&vpfe_dev->dma_queue_lock, flags);
+	videobuf_dma_contig_free(vq, vb);
+	vb->state = VIDEOBUF_NEEDS_INIT;
+}
+
+static struct videobuf_queue_ops vpfe_videobuf_qops = {
+	.buf_setup      = vpfe_videobuf_setup,
+	.buf_prepare    = vpfe_videobuf_prepare,
+	.buf_queue      = vpfe_videobuf_queue,
+	.buf_release    = vpfe_videobuf_release,
+};
+
+/*
+ * vpfe_reqbufs. currently support REQBUF only once opening
+ * the device.
+ */
+static int vpfe_reqbufs(struct file *file, void *priv,
+			struct v4l2_requestbuffers *req_buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_reqbufs\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != req_buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buffer type\n");
+		return -EINVAL;
+	}
+
+	if (V4L2_MEMORY_USERPTR == req_buf->memory) {
+		/* we don't support user ptr IO */
+		v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_reqbufs:"
+			 " USERPTR IO not supported\n");
+		return  -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	if (vpfe_dev->io_usrs != 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Only one IO user allowed\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	vpfe_dev->memory = req_buf->memory;
+	videobuf_queue_dma_contig_init(&vpfe_dev->buffer_queue,
+				&vpfe_videobuf_qops,
+				NULL,
+				&vpfe_dev->irqlock,
+				req_buf->type,
+				vpfe_dev->fmt.fmt.pix.field,
+				sizeof(struct videobuf_buffer),
+				fh);
+
+	fh->io_allowed = 1;
+	vpfe_dev->io_usrs = 1;
+	INIT_LIST_HEAD(&vpfe_dev->dma_queue);
+	ret = videobuf_reqbufs(&vpfe_dev->buffer_queue, req_buf);
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_querybuf(struct file *file, void *priv,
+			 struct v4l2_buffer *buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_querybuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return  -EINVAL;
+	}
+
+	if (vpfe_dev->memory != V4L2_MEMORY_MMAP) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid memory\n");
+		return -EINVAL;
+	}
+	/* Call videobuf_querybuf to get information */
+	return videobuf_querybuf(&vpfe_dev->buffer_queue, buf);
+}
+
+static int vpfe_qbuf(struct file *file, void *priv,
+		     struct v4l2_buffer *p)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_qbuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != p->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * If this file handle is not allowed to do IO,
+	 * return error
+	 */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+	return videobuf_qbuf(&vpfe_dev->buffer_queue, p);
+}
+
+static int vpfe_dqbuf(struct file *file, void *priv,
+		      struct v4l2_buffer *buf)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_dqbuf\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf->type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+	return videobuf_dqbuf(&vpfe_dev->buffer_queue,
+				      buf, file->f_flags & O_NONBLOCK);
+}
+
+/*
+ * vpfe_calculate_offsets : This function calculates buffers offset
+ * for top and bottom field
+ */
+static void vpfe_calculate_offsets(struct vpfe_device *vpfe_dev)
+{
+	struct v4l2_rect image_win;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_calculate_offsets\n");
+
+	ccdc_dev->hw_ops.get_image_window(&image_win);
+	vpfe_dev->field_off = image_win.height * image_win.width;
+}
+
+/* vpfe_start_ccdc_capture: start streaming in ccdc/isif */
+static void vpfe_start_ccdc_capture(struct vpfe_device *vpfe_dev)
+{
+	ccdc_dev->hw_ops.enable(1);
+	if (ccdc_dev->hw_ops.enable_out_to_sdram)
+		ccdc_dev->hw_ops.enable_out_to_sdram(1);
+	vpfe_dev->started = 1;
+}
+
+/*
+ * vpfe_streamon. Assume the DMA queue is not empty.
+ * application is expected to call QBUF before calling
+ * this ioctl. If not, driver returns error
+ */
+static int vpfe_streamon(struct file *file, void *priv,
+			 enum v4l2_buf_type buf_type)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	unsigned long addr;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_streamon\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf_type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/* If file handle is not allowed IO, return error */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+
+	sdinfo = vpfe_dev->current_subdev;
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					video, s_stream, 1);
+
+	if (ret && (ret != -ENOIOCTLCMD)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "stream on failed in subdev\n");
+		return -EINVAL;
+	}
+
+	/* If buffer queue is empty, return error */
+	if (list_empty(&vpfe_dev->buffer_queue.stream)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "buffer queue is empty\n");
+		return -EIO;
+	}
+
+	/* Call videobuf_streamon to start streaming * in videobuf */
+	ret = videobuf_streamon(&vpfe_dev->buffer_queue);
+	if (ret)
+		return ret;
+
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		goto streamoff;
+	/* Get the next frame from the buffer queue */
+	vpfe_dev->next_frm = list_entry(vpfe_dev->dma_queue.next,
+					struct videobuf_buffer, queue);
+	vpfe_dev->cur_frm = vpfe_dev->next_frm;
+	/* Remove buffer from the buffer queue */
+	list_del(&vpfe_dev->cur_frm->queue);
+	/* Mark state of the current frame to active */
+	vpfe_dev->cur_frm->state = VIDEOBUF_ACTIVE;
+	/* Initialize field_id and started member */
+	vpfe_dev->field_id = 0;
+	addr = videobuf_to_dma_contig(vpfe_dev->cur_frm);
+
+	/* Calculate field offset */
+	vpfe_calculate_offsets(vpfe_dev);
+
+	if (vpfe_attach_irq(vpfe_dev) < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			 "Error in attaching interrupt handle\n");
+		ret = -EFAULT;
+		goto unlock_out;
+	}
+	if (ccdc_dev->hw_ops.configure() < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			 "Error in configuring ccdc\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	ccdc_dev->hw_ops.setfbaddr((unsigned long)(addr));
+	vpfe_start_ccdc_capture(vpfe_dev);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+streamoff:
+	ret = videobuf_streamoff(&vpfe_dev->buffer_queue);
+	return ret;
+}
+
+static int vpfe_streamoff(struct file *file, void *priv,
+			  enum v4l2_buf_type buf_type)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	struct vpfe_fh *fh = file->private_data;
+	struct vpfe_subdev_info *sdinfo;
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_streamoff\n");
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != buf_type) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Invalid buf type\n");
+		return -EINVAL;
+	}
+
+	/* If io is allowed for this file handle, return error */
+	if (!fh->io_allowed) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "fh->io_allowed\n");
+		return -EACCES;
+	}
+
+	/* If streaming is not started, return error */
+	if (!vpfe_dev->started) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "device started\n");
+		return -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	vpfe_stop_ccdc_capture(vpfe_dev);
+	vpfe_detach_irq(vpfe_dev);
+
+	sdinfo = vpfe_dev->current_subdev;
+	ret = v4l2_device_call_until_err(&vpfe_dev->v4l2_dev, sdinfo->grp_id,
+					video, s_stream, 0);
+
+	if (ret && (ret != -ENOIOCTLCMD))
+		v4l2_err(&vpfe_dev->v4l2_dev, "stream off failed in subdev\n");
+	ret = videobuf_streamoff(&vpfe_dev->buffer_queue);
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+static int vpfe_cropcap(struct file *file, void *priv,
+			      struct v4l2_cropcap *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_cropcap\n");
+
+	if (vpfe_dev->std_index > ARRAY_SIZE(vpfe_standards))
+		return -EINVAL;
+
+	memset(crop, 0, sizeof(struct v4l2_cropcap));
+	crop->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	crop->bounds.width = crop->defrect.width =
+		vpfe_standards[vpfe_dev->std_index].width;
+	crop->bounds.height = crop->defrect.height =
+		vpfe_standards[vpfe_dev->std_index].height;
+	crop->pixelaspect = vpfe_standards[vpfe_dev->std_index].pixelaspect;
+	return 0;
+}
+
+static int vpfe_g_crop(struct file *file, void *priv,
+			     struct v4l2_crop *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_g_crop\n");
+
+	crop->c = vpfe_dev->crop;
+	return 0;
+}
+
+static int vpfe_s_crop(struct file *file, void *priv,
+			     struct v4l2_crop *crop)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_s_crop\n");
+
+	if (vpfe_dev->started) {
+		/* make sure streaming is not started */
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"Cannot change crop when streaming is ON\n");
+		return -EBUSY;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	if (crop->c.top < 0 || crop->c.left < 0) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"doesn't support negative values for top & left\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+
+	/* adjust the width to 16 pixel boundry */
+	crop->c.width = ((crop->c.width + 15) & ~0xf);
+
+	/* make sure parameters are valid */
+	if ((crop->c.left + crop->c.width >
+		vpfe_dev->std_info.active_pixels) ||
+	    (crop->c.top + crop->c.height >
+		vpfe_dev->std_info.active_lines)) {
+		v4l2_err(&vpfe_dev->v4l2_dev, "Error in S_CROP params\n");
+		ret = -EINVAL;
+		goto unlock_out;
+	}
+	ccdc_dev->hw_ops.set_image_window(&crop->c);
+	vpfe_dev->fmt.fmt.pix.width = crop->c.width;
+	vpfe_dev->fmt.fmt.pix.height = crop->c.height;
+	vpfe_dev->fmt.fmt.pix.bytesperline =
+		ccdc_dev->hw_ops.get_line_length();
+	vpfe_dev->fmt.fmt.pix.sizeimage =
+		vpfe_dev->fmt.fmt.pix.bytesperline *
+		vpfe_dev->fmt.fmt.pix.height;
+	vpfe_dev->crop = crop->c;
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+
+static long vpfe_param_handler(struct file *file, void *priv,
+		int cmd, void *param)
+{
+	struct vpfe_device *vpfe_dev = video_drvdata(file);
+	int ret = 0;
+
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev, "vpfe_param_handler\n");
+
+	if (vpfe_dev->started) {
+		/* only allowed if streaming is not started */
+		v4l2_err(&vpfe_dev->v4l2_dev, "device already started\n");
+		return -EBUSY;
+	}
+
+	ret = mutex_lock_interruptible(&vpfe_dev->lock);
+	if (ret)
+		return ret;
+
+	switch (cmd) {
+	case VPFE_CMD_S_CCDC_RAW_PARAMS:
+		v4l2_warn(&vpfe_dev->v4l2_dev,
+			  "VPFE_CMD_S_CCDC_RAW_PARAMS: experimental ioctl\n");
+		ret = ccdc_dev->hw_ops.set_params(param);
+		if (ret) {
+			v4l2_err(&vpfe_dev->v4l2_dev,
+				"Error in setting parameters in CCDC\n");
+			goto unlock_out;
+		}
+		if (vpfe_get_ccdc_image_format(vpfe_dev, &vpfe_dev->fmt) < 0) {
+			v4l2_err(&vpfe_dev->v4l2_dev,
+				"Invalid image format at CCDC\n");
+			goto unlock_out;
+		}
+		break;
+	default:
+		ret = -EINVAL;
+	}
+unlock_out:
+	mutex_unlock(&vpfe_dev->lock);
+	return ret;
+}
+
+
+/* vpfe capture ioctl operations */
+static const struct v4l2_ioctl_ops vpfe_ioctl_ops = {
+	.vidioc_querycap	 = vpfe_querycap,
+	.vidioc_g_fmt_vid_cap    = vpfe_g_fmt_vid_cap,
+	.vidioc_enum_fmt_vid_cap = vpfe_enum_fmt_vid_cap,
+	.vidioc_s_fmt_vid_cap    = vpfe_s_fmt_vid_cap,
+	.vidioc_try_fmt_vid_cap  = vpfe_try_fmt_vid_cap,
+	.vidioc_enum_input	 = vpfe_enum_input,
+	.vidioc_g_input		 = vpfe_g_input,
+	.vidioc_s_input		 = vpfe_s_input,
+	.vidioc_querystd	 = vpfe_querystd,
+	.vidioc_s_std		 = vpfe_s_std,
+	.vidioc_g_std		 = vpfe_g_std,
+	.vidioc_reqbufs		 = vpfe_reqbufs,
+	.vidioc_querybuf	 = vpfe_querybuf,
+	.vidioc_qbuf		 = vpfe_qbuf,
+	.vidioc_dqbuf		 = vpfe_dqbuf,
+	.vidioc_streamon	 = vpfe_streamon,
+	.vidioc_streamoff	 = vpfe_streamoff,
+	.vidioc_cropcap		 = vpfe_cropcap,
+	.vidioc_g_crop		 = vpfe_g_crop,
+	.vidioc_s_crop		 = vpfe_s_crop,
+	.vidioc_default		 = vpfe_param_handler,
+};
+
+static struct vpfe_device *vpfe_initialize(void)
+{
+	struct vpfe_device *vpfe_dev;
+
+	/* Default number of buffers should be 3 */
+	if ((numbuffers > 0) &&
+	    (numbuffers < config_params.min_numbuffers))
+		numbuffers = config_params.min_numbuffers;
+
+	/*
+	 * Set buffer size to min buffers size if invalid buffer size is
+	 * given
+	 */
+	if (bufsize < config_params.min_bufsize)
+		bufsize = config_params.min_bufsize;
+
+	config_params.numbuffers = numbuffers;
+
+	if (numbuffers)
+		config_params.device_bufsize = bufsize;
+
+	/* Allocate memory for device objects */
+	vpfe_dev = kzalloc(sizeof(*vpfe_dev), GFP_KERNEL);
+
+	return vpfe_dev;
+}
+
+static void vpfe_disable_clock(struct vpfe_device *vpfe_dev)
+{
+	struct vpfe_config *vpfe_cfg = vpfe_dev->cfg;
+
+	clk_disable(vpfe_cfg->vpssclk);
+	clk_put(vpfe_cfg->vpssclk);
+	clk_disable(vpfe_cfg->slaveclk);
+	clk_put(vpfe_cfg->slaveclk);
+	v4l2_info(vpfe_dev->pdev->driver,
+		 "vpfe vpss master & slave clocks disabled\n");
+}
+
+static int vpfe_enable_clock(struct vpfe_device *vpfe_dev)
+{
+	struct vpfe_config *vpfe_cfg = vpfe_dev->cfg;
+	int ret = -ENOENT;
+
+	vpfe_cfg->vpssclk = clk_get(vpfe_dev->pdev, "vpss_master");
+	if (NULL == vpfe_cfg->vpssclk) {
+		v4l2_err(vpfe_dev->pdev->driver, "No clock defined for"
+			 "vpss_master\n");
+		return ret;
+	}
+
+	if (clk_enable(vpfe_cfg->vpssclk)) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			"vpfe vpss master clock not enabled\n");
+		goto out;
+	}
+	v4l2_info(vpfe_dev->pdev->driver,
+		 "vpfe vpss master clock enabled\n");
+
+	vpfe_cfg->slaveclk = clk_get(vpfe_dev->pdev, "vpss_slave");
+	if (NULL == vpfe_cfg->slaveclk) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			"No clock defined for vpss slave\n");
+		goto out;
+	}
+
+	if (clk_enable(vpfe_cfg->slaveclk)) {
+		v4l2_err(vpfe_dev->pdev->driver,
+			 "vpfe vpss slave clock not enabled\n");
+		goto out;
+	}
+	v4l2_info(vpfe_dev->pdev->driver, "vpfe vpss slave clock enabled\n");
+	return 0;
+out:
+	if (vpfe_cfg->vpssclk)
+		clk_put(vpfe_cfg->vpssclk);
+	if (vpfe_cfg->slaveclk)
+		clk_put(vpfe_cfg->slaveclk);
+
+	return -1;
+}
+
+/*
+ * vpfe_probe : This function creates device entries by register
+ * itself to the V4L2 driver and initializes fields of each
+ * device objects
+ */
+static __init int vpfe_probe(struct platform_device *pdev)
+{
+	struct vpfe_subdev_info *sdinfo;
+	struct vpfe_config *vpfe_cfg;
+	struct resource *res1;
+	struct vpfe_device *vpfe_dev;
+	struct i2c_adapter *i2c_adap;
+	struct video_device *vfd;
+	int ret = -ENOMEM, i, j;
+	int num_subdevs = 0;
+
+	/* Get the pointer to the device object */
+	vpfe_dev = vpfe_initialize();
+
+	if (!vpfe_dev) {
+		v4l2_err(pdev->dev.driver,
+			"Failed to allocate memory for vpfe_dev\n");
+		return ret;
+	}
+
+	vpfe_dev->pdev = &pdev->dev;
+
+	if (NULL == pdev->dev.platform_data) {
+		v4l2_err(pdev->dev.driver, "Unable to get vpfe config\n");
+		ret = -ENOENT;
+		goto probe_free_dev_mem;
+	}
+
+	vpfe_cfg = pdev->dev.platform_data;
+	vpfe_dev->cfg = vpfe_cfg;
+	if (NULL == vpfe_cfg->ccdc ||
+	    NULL == vpfe_cfg->card_name ||
+	    NULL == vpfe_cfg->sub_devs) {
+		v4l2_err(pdev->dev.driver, "null ptr in vpfe_cfg\n");
+		ret = -ENOENT;
+		goto probe_free_dev_mem;
+	}
+
+	/* enable vpss clocks */
+	ret = vpfe_enable_clock(vpfe_dev);
+	if (ret)
+		goto probe_free_dev_mem;
+
+	mutex_lock(&ccdc_lock);
+	/* Allocate memory for ccdc configuration */
+	ccdc_cfg = kmalloc(sizeof(struct ccdc_config), GFP_KERNEL);
+	if (NULL == ccdc_cfg) {
+		v4l2_err(pdev->dev.driver,
+			 "Memory allocation failed for ccdc_cfg\n");
+		goto probe_disable_clock;
+	}
+
+	strncpy(ccdc_cfg->name, vpfe_cfg->ccdc, 32);
+	/* Get VINT0 irq resource */
+	res1 = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			 "Unable to get interrupt for VINT0\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+	vpfe_dev->ccdc_irq0 = res1->start;
+
+	/* Get VINT1 irq resource */
+	res1 = platform_get_resource(pdev,
+				IORESOURCE_IRQ, 1);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			 "Unable to get interrupt for VINT1\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+	vpfe_dev->ccdc_irq1 = res1->start;
+
+	/* Get address base of CCDC */
+	res1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res1) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to get register address map\n");
+		ret = -ENOENT;
+		goto probe_disable_clock;
+	}
+
+	ccdc_cfg->ccdc_addr_size = res1->end - res1->start + 1;
+	if (!request_mem_region(res1->start, ccdc_cfg->ccdc_addr_size,
+				pdev->dev.driver->name)) {
+		v4l2_err(pdev->dev.driver,
+			"Failed request_mem_region for ccdc base\n");
+		ret = -ENXIO;
+		goto probe_disable_clock;
+	}
+	ccdc_cfg->ccdc_addr = ioremap_nocache(res1->start,
+					     ccdc_cfg->ccdc_addr_size);
+	if (!ccdc_cfg->ccdc_addr) {
+		v4l2_err(pdev->dev.driver, "Unable to ioremap ccdc addr\n");
+		ret = -ENXIO;
+		goto probe_out_release_mem1;
+	}
+
+	ret = request_irq(vpfe_dev->ccdc_irq0, vpfe_isr, IRQF_DISABLED,
+			  "vpfe_capture0", vpfe_dev);
+
+	if (0 != ret) {
+		v4l2_err(pdev->dev.driver, "Unable to request interrupt\n");
+		goto probe_out_unmap1;
+	}
+
+	/* Allocate memory for video device */
+	vfd = video_device_alloc();
+	if (NULL == vfd) {
+		ret = -ENOMEM;
+		v4l2_err(pdev->dev.driver,
+			"Unable to alloc video device\n");
+		goto probe_out_release_irq;
+	}
+
+	/* Initialize field of video device */
+	vfd->release		= video_device_release;
+	vfd->fops		= &vpfe_fops;
+	vfd->ioctl_ops		= &vpfe_ioctl_ops;
+	vfd->minor		= -1;
+	vfd->tvnorms		= 0;
+	vfd->current_norm	= V4L2_STD_PAL;
+	vfd->v4l2_dev 		= &vpfe_dev->v4l2_dev;
+	snprintf(vfd->name, sizeof(vfd->name),
+		 "%s_V%d.%d.%d",
+		 CAPTURE_DRV_NAME,
+		 (VPFE_CAPTURE_VERSION_CODE >> 16) & 0xff,
+		 (VPFE_CAPTURE_VERSION_CODE >> 8) & 0xff,
+		 (VPFE_CAPTURE_VERSION_CODE) & 0xff);
+	/* Set video_dev to the video device */
+	vpfe_dev->video_dev	= vfd;
+
+	ret = v4l2_device_register(&pdev->dev, &vpfe_dev->v4l2_dev);
+	if (ret) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to register v4l2 device.\n");
+		goto probe_out_video_release;
+	}
+	v4l2_info(&vpfe_dev->v4l2_dev, "v4l2 device registered\n");
+	spin_lock_init(&vpfe_dev->irqlock);
+	spin_lock_init(&vpfe_dev->dma_queue_lock);
+	mutex_init(&vpfe_dev->lock);
+
+	/* Initialize field of the device objects */
+	vpfe_dev->numbuffers = config_params.numbuffers;
+
+	/* Initialize prio member of device object */
+	v4l2_prio_init(&vpfe_dev->prio);
+	/* register video device */
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"trying to register vpfe device.\n");
+	v4l2_dbg(1, debug, &vpfe_dev->v4l2_dev,
+		"video_dev=%x\n", (int)&vpfe_dev->video_dev);
+	vpfe_dev->fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	ret = video_register_device(vpfe_dev->video_dev,
+				    VFL_TYPE_GRABBER, -1);
+
+	if (ret) {
+		v4l2_err(pdev->dev.driver,
+			"Unable to register video device.\n");
+		goto probe_out_v4l2_unregister;
+	}
+
+	v4l2_info(&vpfe_dev->v4l2_dev, "video device registered\n");
+	/* set the driver data in platform device */
+	platform_set_drvdata(pdev, vpfe_dev);
+	/* set driver private data */
+	video_set_drvdata(vpfe_dev->video_dev, vpfe_dev);
+	i2c_adap = i2c_get_adapter(1);
+	vpfe_cfg = pdev->dev.platform_data;
+	num_subdevs = vpfe_cfg->num_subdevs;
+	vpfe_dev->sd = kmalloc(sizeof(struct v4l2_subdev *) * num_subdevs,
+				GFP_KERNEL);
+	if (NULL == vpfe_dev->sd) {
+		v4l2_err(&vpfe_dev->v4l2_dev,
+			"unable to allocate memory for subdevice pointers\n");
+		ret = -ENOMEM;
+		goto probe_out_video_unregister;
+	}
+
+	for (i = 0; i < num_subdevs; i++) {
+		struct v4l2_input *inps;
+
+		sdinfo = &vpfe_cfg->sub_devs[i];
+
+		/* Load up the subdevice */
+		vpfe_dev->sd[i] =
+			v4l2_i2c_new_subdev_board(&vpfe_dev->v4l2_dev,
+						  i2c_adap,
+						  sdinfo->name,
+						  &sdinfo->board_info,
+						  NULL);
+		if (vpfe_dev->sd[i]) {
+			v4l2_info(&vpfe_dev->v4l2_dev,
+				  "v4l2 sub device %s registered\n",
+				  sdinfo->name);
+			vpfe_dev->sd[i]->grp_id = sdinfo->grp_id;
+			/* update tvnorms from the sub devices */
+			for (j = 0; j < sdinfo->num_inputs; j++) {
+				inps = &sdinfo->inputs[j];
+				vfd->tvnorms |= inps->std;
+			}
+		} else {
+			v4l2_info(&vpfe_dev->v4l2_dev,
+				  "v4l2 sub device %s register fails\n",
+				  sdinfo->name);
+			goto probe_sd_out;
+		}
+	}
+
+	/* set first sub device as current one */
+	vpfe_dev->current_subdev = &vpfe_cfg->sub_devs[0];
+
+	/* We have at least one sub device to work with */
+	mutex_unlock(&ccdc_lock);
+	return 0;
+
+probe_sd_out:
+	kfree(vpfe_dev->sd);
+probe_out_video_unregister:
+	video_unregister_device(vpfe_dev->video_dev);
+probe_out_v4l2_unregister:
+	v4l2_device_unregister(&vpfe_dev->v4l2_dev);
+probe_out_video_release:
+	if (vpfe_dev->video_dev->minor == -1)
+		video_device_release(vpfe_dev->video_dev);
+probe_out_release_irq:
+	free_irq(vpfe_dev->ccdc_irq0, vpfe_dev);
+probe_out_unmap1:
+	iounmap(ccdc_cfg->ccdc_addr);
+probe_out_release_mem1:
+	release_mem_region(res1->start, res1->end - res1->start + 1);
+probe_disable_clock:
+	vpfe_disable_clock(vpfe_dev);
+	mutex_unlock(&ccdc_lock);
+	kfree(ccdc_cfg);
+probe_free_dev_mem:
+	kfree(vpfe_dev);
+	return ret;
+}
+
+/*
+ * vpfe_remove : It un-register device from V4L2 driver
+ */
+static int vpfe_remove(struct platform_device *pdev)
+{
+	struct vpfe_device *vpfe_dev = platform_get_drvdata(pdev);
+	struct resource *res;
+
+	v4l2_info(pdev->dev.driver, "vpfe_remove\n");
+
+	free_irq(vpfe_dev->ccdc_irq0, vpfe_dev);
+	kfree(vpfe_dev->sd);
+	v4l2_device_unregister(&vpfe_dev->v4l2_dev);
+	video_unregister_device(vpfe_dev->video_dev);
+	mutex_lock(&ccdc_lock);
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(res->start, res->end - res->start + 1);
+	iounmap(ccdc_cfg->ccdc_addr);
+	mutex_unlock(&ccdc_lock);
+	vpfe_disable_clock(vpfe_dev);
+	kfree(vpfe_dev);
+	kfree(ccdc_cfg);
+	return 0;
+}
+
+static int
+vpfe_suspend(struct device *dev)
+{
+	/* add suspend code here later */
+	return -1;
+}
+
+static int
+vpfe_resume(struct device *dev)
+{
+	/* add resume code here later */
+	return -1;
+}
+
+static struct dev_pm_ops vpfe_dev_pm_ops = {
+	.suspend = vpfe_suspend,
+	.resume = vpfe_resume,
+};
+
+static struct platform_driver vpfe_driver = {
+	.driver = {
+		.name = CAPTURE_DRV_NAME,
+		.owner = THIS_MODULE,
+		.pm = &vpfe_dev_pm_ops,
+	},
+	.probe = vpfe_probe,
+	.remove = __devexit_p(vpfe_remove),
+};
+
+static __init int vpfe_init(void)
+{
+	printk(KERN_NOTICE "vpfe_init\n");
+	/* Register driver to the kernel */
+	return platform_driver_register(&vpfe_driver);
+}
+
+/*
+ * vpfe_cleanup : This function un-registers device driver
+ */
+static void vpfe_cleanup(void)
+{
+	platform_driver_unregister(&vpfe_driver);
+}
+
+module_init(vpfe_init);
+module_exit(vpfe_cleanup);
diff --git a/include/media/davinci/vpfe_capture.h b/include/media/davinci/vpfe_capture.h
new file mode 100644
index 000000000000..71d8982e13ff
--- /dev/null
+++ b/include/media/davinci/vpfe_capture.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _VPFE_CAPTURE_H
+#define _VPFE_CAPTURE_H
+
+#ifdef __KERNEL__
+
+/* Header files */
+#include <media/v4l2-dev.h>
+#include <linux/videodev2.h>
+#include <linux/clk.h>
+#include <linux/i2c.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-device.h>
+#include <media/videobuf-dma-contig.h>
+#include <media/davinci/vpfe_types.h>
+
+#define VPFE_CAPTURE_NUM_DECODERS        5
+
+/* Macros */
+#define VPFE_MAJOR_RELEASE              0
+#define VPFE_MINOR_RELEASE              0
+#define VPFE_BUILD                      1
+#define VPFE_CAPTURE_VERSION_CODE       ((VPFE_MAJOR_RELEASE << 16) | \
+					(VPFE_MINOR_RELEASE << 8)  | \
+					VPFE_BUILD)
+
+#define CAPTURE_DRV_NAME		"vpfe-capture"
+
+struct vpfe_pixel_format {
+	struct v4l2_fmtdesc fmtdesc;
+	/* bytes per pixel */
+	int bpp;
+};
+
+struct vpfe_std_info {
+	int active_pixels;
+	int active_lines;
+	/* current frame format */
+	int frame_format;
+};
+
+struct vpfe_route {
+	u32 input;
+	u32 output;
+};
+
+struct vpfe_subdev_info {
+	/* Sub device name */
+	char name[32];
+	/* Sub device group id */
+	int grp_id;
+	/* Number of inputs supported */
+	int num_inputs;
+	/* inputs available at the sub device */
+	struct v4l2_input *inputs;
+	/* Sub dev routing information for each input */
+	struct vpfe_route *routes;
+	/* check if sub dev supports routing */
+	int can_route;
+	/* ccdc bus/interface configuration */
+	struct vpfe_hw_if_param ccdc_if_params;
+	/* i2c subdevice board info */
+	struct i2c_board_info board_info;
+};
+
+struct vpfe_config {
+	/* Number of sub devices connected to vpfe */
+	int num_subdevs;
+	/* information about each subdev */
+	struct vpfe_subdev_info *sub_devs;
+	/* evm card info */
+	char *card_name;
+	/* ccdc name */
+	char *ccdc;
+	/* vpfe clock */
+	struct clk *vpssclk;
+	struct clk *slaveclk;
+};
+
+struct vpfe_device {
+	/* V4l2 specific parameters */
+	/* Identifies video device for this channel */
+	struct video_device *video_dev;
+	/* sub devices */
+	struct v4l2_subdev **sd;
+	/* vpfe cfg */
+	struct vpfe_config *cfg;
+	/* V4l2 device */
+	struct v4l2_device v4l2_dev;
+	/* parent device */
+	struct device *pdev;
+	/* Used to keep track of state of the priority */
+	struct v4l2_prio_state prio;
+	/* number of open instances of the channel */
+	u32 usrs;
+	/* Indicates id of the field which is being displayed */
+	u32 field_id;
+	/* flag to indicate whether decoder is initialized */
+	u8 initialized;
+	/* current interface type */
+	struct vpfe_hw_if_param vpfe_if_params;
+	/* ptr to currently selected sub device */
+	struct vpfe_subdev_info *current_subdev;
+	/* current input at the sub device */
+	int current_input;
+	/* Keeps track of the information about the standard */
+	struct vpfe_std_info std_info;
+	/* std index into std table */
+	int std_index;
+	/* CCDC IRQs used when CCDC/ISIF output to SDRAM */
+	unsigned int ccdc_irq0;
+	unsigned int ccdc_irq1;
+	/* number of buffers in fbuffers */
+	u32 numbuffers;
+	/* List of buffer pointers for storing frames */
+	u8 *fbuffers[VIDEO_MAX_FRAME];
+	/* Pointer pointing to current v4l2_buffer */
+	struct videobuf_buffer *cur_frm;
+	/* Pointer pointing to next v4l2_buffer */
+	struct videobuf_buffer *next_frm;
+	/*
+	 * This field keeps track of type of buffer exchange mechanism
+	 * user has selected
+	 */
+	enum v4l2_memory memory;
+	/* Used to store pixel format */
+	struct v4l2_format fmt;
+	/*
+	 * used when IMP is chained to store the crop window which
+	 * is different from the image window
+	 */
+	struct v4l2_rect crop;
+	/* Buffer queue used in video-buf */
+	struct videobuf_queue buffer_queue;
+	/* Queue of filled frames */
+	struct list_head dma_queue;
+	/* Used in video-buf */
+	spinlock_t irqlock;
+	/* IRQ lock for DMA queue */
+	spinlock_t dma_queue_lock;
+	/* lock used to access this structure */
+	struct mutex lock;
+	/* number of users performing IO */
+	u32 io_usrs;
+	/* Indicates whether streaming started */
+	u8 started;
+	/*
+	 * offset where second field starts from the starting of the
+	 * buffer for field seperated YCbCr formats
+	 */
+	u32 field_off;
+};
+
+/* File handle structure */
+struct vpfe_fh {
+	struct vpfe_device *vpfe_dev;
+	/* Indicates whether this file handle is doing IO */
+	u8 io_allowed;
+	/* Used to keep track priority of this instance */
+	enum v4l2_priority prio;
+};
+
+struct vpfe_config_params {
+	u8 min_numbuffers;
+	u8 numbuffers;
+	u32 min_bufsize;
+	u32 device_bufsize;
+};
+
+#endif				/* End of __KERNEL__ */
+/**
+ * VPFE_CMD_S_CCDC_RAW_PARAMS - EXPERIMENTAL IOCTL to set raw capture params
+ * This can be used to configure modules such as defect pixel correction,
+ * color space conversion, culling etc. This is an experimental ioctl that
+ * will change in future kernels. So use this ioctl with care !
+ * TODO: This is to be split into multiple ioctls and also explore the
+ * possibility of extending the v4l2 api to include this
+ **/
+#define VPFE_CMD_S_CCDC_RAW_PARAMS _IOW('V', BASE_VIDIOC_PRIVATE + 1, \
+					void *)
+#endif				/* _DAVINCI_VPFE_H */
diff --git a/include/media/davinci/vpfe_types.h b/include/media/davinci/vpfe_types.h
new file mode 100644
index 000000000000..76fb74bad08c
--- /dev/null
+++ b/include/media/davinci/vpfe_types.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#ifndef _VPFE_TYPES_H
+#define _VPFE_TYPES_H
+
+#ifdef __KERNEL__
+
+enum vpfe_pin_pol {
+	VPFE_PINPOL_POSITIVE,
+	VPFE_PINPOL_NEGATIVE
+};
+
+enum vpfe_hw_if_type {
+	/* BT656 - 8 bit */
+	VPFE_BT656,
+	/* BT1120 - 16 bit */
+	VPFE_BT1120,
+	/* Raw Bayer */
+	VPFE_RAW_BAYER,
+	/* YCbCr - 8 bit with external sync */
+	VPFE_YCBCR_SYNC_8,
+	/* YCbCr - 16 bit with external sync */
+	VPFE_YCBCR_SYNC_16,
+	/* BT656 - 10 bit */
+	VPFE_BT656_10BIT
+};
+
+/* interface description */
+struct vpfe_hw_if_param {
+	enum vpfe_hw_if_type if_type;
+	enum vpfe_pin_pol hdpol;
+	enum vpfe_pin_pol vdpol;
+};
+
+#endif
+#endif
-- 
cgit v1.2.3


From dd2ceb1a4028dc9644ed4df80cea9c05ca0b5f6d Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 3 Jul 2009 05:23:07 -0300
Subject: V4L/DVB (12250): v4l: dm355 ccdc module for vpfe capture driver

Adds ccdc hw module for DM355 CCDC. This registers with the bridge
driver a set of hw_ops for configuring the CCDC for a specific
decoder device connected to vpfe.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Mauro Carvalho Chehab <mchehab@infradead.org>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/dm355_ccdc.c      | 978 ++++++++++++++++++++++++++
 drivers/media/video/davinci/dm355_ccdc_regs.h | 310 ++++++++
 include/media/davinci/dm355_ccdc.h            | 321 +++++++++
 3 files changed, 1609 insertions(+)
 create mode 100644 drivers/media/video/davinci/dm355_ccdc.c
 create mode 100644 drivers/media/video/davinci/dm355_ccdc_regs.h
 create mode 100644 include/media/davinci/dm355_ccdc.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/dm355_ccdc.c b/drivers/media/video/davinci/dm355_ccdc.c
new file mode 100644
index 000000000000..4629cabe3f28
--- /dev/null
+++ b/drivers/media/video/davinci/dm355_ccdc.c
@@ -0,0 +1,978 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * CCDC hardware module for DM355
+ * ------------------------------
+ *
+ * This module is for configuring DM355 CCD controller of VPFE to capture
+ * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
+ * such as Defect Pixel Correction, Color Space Conversion etc to
+ * pre-process the Bayer RGB data, before writing it to SDRAM. This
+ * module also allows application to configure individual
+ * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
+ * To do so, application include dm355_ccdc.h and vpfe_capture.h header
+ * files. The setparams() API is called by vpfe_capture driver
+ * to configure module parameters
+ *
+ * TODO: 1) Raw bayer parameter settings and bayer capture
+ * 	 2) Split module parameter structure to module specific ioctl structs
+ *	 3) add support for lense shading correction
+ *	 4) investigate if enum used for user space type definition
+ * 	    to be replaced by #defines or integer
+ */
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+#include <media/davinci/dm355_ccdc.h>
+#include <media/davinci/vpss.h>
+#include "dm355_ccdc_regs.h"
+#include "ccdc_hw_device.h"
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CCDC Driver for DM355");
+MODULE_AUTHOR("Texas Instruments");
+
+static struct device *dev;
+
+/* Object for CCDC raw mode */
+static struct ccdc_params_raw ccdc_hw_params_raw = {
+	.pix_fmt = CCDC_PIXFMT_RAW,
+	.frm_fmt = CCDC_FRMFMT_PROGRESSIVE,
+	.win = CCDC_WIN_VGA,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.gain = {
+		.r_ye = 256,
+		.gb_g = 256,
+		.gr_cy = 256,
+		.b_mg = 256
+	},
+	.config_params = {
+		.datasft = 2,
+		.data_sz = CCDC_DATA_10BITS,
+		.mfilt1 = CCDC_NO_MEDIAN_FILTER1,
+		.mfilt2 = CCDC_NO_MEDIAN_FILTER2,
+		.alaw = {
+			.gama_wd = 2,
+		},
+		.blk_clamp = {
+			.sample_pixel = 1,
+			.dc_sub = 25
+		},
+		.col_pat_field0 = {
+			.olop = CCDC_GREEN_BLUE,
+			.olep = CCDC_BLUE,
+			.elop = CCDC_RED,
+			.elep = CCDC_GREEN_RED
+		},
+		.col_pat_field1 = {
+			.olop = CCDC_GREEN_BLUE,
+			.olep = CCDC_BLUE,
+			.elop = CCDC_RED,
+			.elep = CCDC_GREEN_RED
+		},
+	},
+};
+
+
+/* Object for CCDC ycbcr mode */
+static struct ccdc_params_ycbcr ccdc_hw_params_ycbcr = {
+	.win = CCDC_WIN_PAL,
+	.pix_fmt = CCDC_PIXFMT_YCBCR_8BIT,
+	.frm_fmt = CCDC_FRMFMT_INTERLACED,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.bt656_enable = 1,
+	.pix_order = CCDC_PIXORDER_CBYCRY,
+	.buf_type = CCDC_BUFTYPE_FLD_INTERLEAVED
+};
+
+static enum vpfe_hw_if_type ccdc_if_type;
+static void *__iomem ccdc_base_addr;
+static int ccdc_addr_size;
+
+/* Raw Bayer formats */
+static u32 ccdc_raw_bayer_pix_formats[] =
+		{V4L2_PIX_FMT_SBGGR8, V4L2_PIX_FMT_SBGGR16};
+
+/* Raw YUV formats */
+static u32 ccdc_raw_yuv_pix_formats[] =
+		{V4L2_PIX_FMT_UYVY, V4L2_PIX_FMT_YUYV};
+
+/* register access routines */
+static inline u32 regr(u32 offset)
+{
+	return __raw_readl(ccdc_base_addr + offset);
+}
+
+static inline void regw(u32 val, u32 offset)
+{
+	__raw_writel(val, ccdc_base_addr + offset);
+}
+
+static void ccdc_set_ccdc_base(void *addr, int size)
+{
+	ccdc_base_addr = addr;
+	ccdc_addr_size = size;
+}
+
+static void ccdc_enable(int en)
+{
+	unsigned int temp;
+	temp = regr(SYNCEN);
+	temp &= (~CCDC_SYNCEN_VDHDEN_MASK);
+	temp |= (en & CCDC_SYNCEN_VDHDEN_MASK);
+	regw(temp, SYNCEN);
+}
+
+static void ccdc_enable_output_to_sdram(int en)
+{
+	unsigned int temp;
+	temp = regr(SYNCEN);
+	temp &= (~(CCDC_SYNCEN_WEN_MASK));
+	temp |= ((en << CCDC_SYNCEN_WEN_SHIFT) & CCDC_SYNCEN_WEN_MASK);
+	regw(temp, SYNCEN);
+}
+
+static void ccdc_config_gain_offset(void)
+{
+	/* configure gain */
+	regw(ccdc_hw_params_raw.gain.r_ye, RYEGAIN);
+	regw(ccdc_hw_params_raw.gain.gr_cy, GRCYGAIN);
+	regw(ccdc_hw_params_raw.gain.gb_g, GBGGAIN);
+	regw(ccdc_hw_params_raw.gain.b_mg, BMGGAIN);
+	/* configure offset */
+	regw(ccdc_hw_params_raw.ccdc_offset, OFFSET);
+}
+
+/*
+ * ccdc_restore_defaults()
+ * This function restore power on defaults in the ccdc registers
+ */
+static int ccdc_restore_defaults(void)
+{
+	int i;
+
+	dev_dbg(dev, "\nstarting ccdc_restore_defaults...");
+	/* set all registers to zero */
+	for (i = 0; i <= CCDC_REG_LAST; i += 4)
+		regw(0, i);
+
+	/* now override the values with power on defaults in registers */
+	regw(MODESET_DEFAULT, MODESET);
+	/* no culling support */
+	regw(CULH_DEFAULT, CULH);
+	regw(CULV_DEFAULT, CULV);
+	/* Set default Gain and Offset */
+	ccdc_hw_params_raw.gain.r_ye = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.gb_g = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.gr_cy = GAIN_DEFAULT;
+	ccdc_hw_params_raw.gain.b_mg = GAIN_DEFAULT;
+	ccdc_config_gain_offset();
+	regw(OUTCLIP_DEFAULT, OUTCLIP);
+	regw(LSCCFG2_DEFAULT, LSCCFG2);
+	/* select ccdc input */
+	if (vpss_select_ccdc_source(VPSS_CCDCIN)) {
+		dev_dbg(dev, "\ncouldn't select ccdc input source");
+		return -EFAULT;
+	}
+	/* select ccdc clock */
+	if (vpss_enable_clock(VPSS_CCDC_CLOCK, 1) < 0) {
+		dev_dbg(dev, "\ncouldn't enable ccdc clock");
+		return -EFAULT;
+	}
+	dev_dbg(dev, "\nEnd of ccdc_restore_defaults...");
+	return 0;
+}
+
+static int ccdc_open(struct device *device)
+{
+	dev = device;
+	return ccdc_restore_defaults();
+}
+
+static int ccdc_close(struct device *device)
+{
+	/* disable clock */
+	vpss_enable_clock(VPSS_CCDC_CLOCK, 0);
+	/* do nothing for now */
+	return 0;
+}
+/*
+ * ccdc_setwin()
+ * This function will configure the window size to
+ * be capture in CCDC reg.
+ */
+static void ccdc_setwin(struct v4l2_rect *image_win,
+			enum ccdc_frmfmt frm_fmt, int ppc)
+{
+	int horz_start, horz_nr_pixels;
+	int vert_start, vert_nr_lines;
+	int mid_img = 0;
+
+	dev_dbg(dev, "\nStarting ccdc_setwin...");
+
+	/*
+	 * ppc - per pixel count. indicates how many pixels per cell
+	 * output to SDRAM. example, for ycbcr, it is one y and one c, so 2.
+	 * raw capture this is 1
+	 */
+	horz_start = image_win->left << (ppc - 1);
+	horz_nr_pixels = ((image_win->width) << (ppc - 1)) - 1;
+
+	/* Writing the horizontal info into the registers */
+	regw(horz_start, SPH);
+	regw(horz_nr_pixels, NPH);
+	vert_start = image_win->top;
+
+	if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		vert_nr_lines = (image_win->height >> 1) - 1;
+		vert_start >>= 1;
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		/* configure VDINT0 and VDINT1 */
+		regw(vert_start, VDINT0);
+	} else {
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		vert_nr_lines = image_win->height - 1;
+		/* configure VDINT0 and VDINT1 */
+		mid_img = vert_start + (image_win->height / 2);
+		regw(vert_start, VDINT0);
+		regw(mid_img, VDINT1);
+	}
+	regw(vert_start & CCDC_START_VER_ONE_MASK, SLV0);
+	regw(vert_start & CCDC_START_VER_TWO_MASK, SLV1);
+	regw(vert_nr_lines & CCDC_NUM_LINES_VER, NLV);
+	dev_dbg(dev, "\nEnd of ccdc_setwin...");
+}
+
+static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
+{
+	if (ccdcparam->datasft < CCDC_DATA_NO_SHIFT ||
+	    ccdcparam->datasft > CCDC_DATA_SHIFT_6BIT) {
+		dev_dbg(dev, "Invalid value of data shift\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->mfilt1 < CCDC_NO_MEDIAN_FILTER1 ||
+	    ccdcparam->mfilt1 > CCDC_MEDIAN_FILTER1) {
+		dev_dbg(dev, "Invalid value of median filter1\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->mfilt2 < CCDC_NO_MEDIAN_FILTER2 ||
+	    ccdcparam->mfilt2 > CCDC_MEDIAN_FILTER2) {
+		dev_dbg(dev, "Invalid value of median filter2\n");
+		return -EINVAL;
+	}
+
+	if ((ccdcparam->med_filt_thres < 0) ||
+	   (ccdcparam->med_filt_thres > CCDC_MED_FILT_THRESH)) {
+		dev_dbg(dev, "Invalid value of median filter thresold\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->data_sz < CCDC_DATA_16BITS ||
+	    ccdcparam->data_sz > CCDC_DATA_8BITS) {
+		dev_dbg(dev, "Invalid value of data size\n");
+		return -EINVAL;
+	}
+
+	if (ccdcparam->alaw.enable) {
+		if (ccdcparam->alaw.gama_wd < CCDC_GAMMA_BITS_13_4 ||
+		    ccdcparam->alaw.gama_wd > CCDC_GAMMA_BITS_09_0) {
+			dev_dbg(dev, "Invalid value of ALAW\n");
+			return -EINVAL;
+		}
+	}
+
+	if (ccdcparam->blk_clamp.b_clamp_enable) {
+		if (ccdcparam->blk_clamp.sample_pixel < CCDC_SAMPLE_1PIXELS ||
+		    ccdcparam->blk_clamp.sample_pixel > CCDC_SAMPLE_16PIXELS) {
+			dev_dbg(dev, "Invalid value of sample pixel\n");
+			return -EINVAL;
+		}
+		if (ccdcparam->blk_clamp.sample_ln < CCDC_SAMPLE_1LINES ||
+		    ccdcparam->blk_clamp.sample_ln > CCDC_SAMPLE_16LINES) {
+			dev_dbg(dev, "Invalid value of sample lines\n");
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+/* Parameter operations */
+static int ccdc_set_params(void __user *params)
+{
+	struct ccdc_config_params_raw ccdc_raw_params;
+	int x;
+
+	/* only raw module parameters can be set through the IOCTL */
+	if (ccdc_if_type != VPFE_RAW_BAYER)
+		return -EINVAL;
+
+	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
+	if (x) {
+		dev_dbg(dev, "ccdc_set_params: error in copying ccdc"
+			"params, %d\n", x);
+		return -EFAULT;
+	}
+
+	if (!validate_ccdc_param(&ccdc_raw_params)) {
+		memcpy(&ccdc_hw_params_raw.config_params,
+			&ccdc_raw_params,
+			sizeof(ccdc_raw_params));
+		return 0;
+	}
+	return -EINVAL;
+}
+
+/* This function will configure CCDC for YCbCr video capture */
+static void ccdc_config_ycbcr(void)
+{
+	struct ccdc_params_ycbcr *params = &ccdc_hw_params_ycbcr;
+	u32 temp;
+
+	/* first set the CCDC power on defaults values in all registers */
+	dev_dbg(dev, "\nStarting ccdc_config_ycbcr...");
+	ccdc_restore_defaults();
+
+	/* configure pixel format & video frame format */
+	temp = (((params->pix_fmt & CCDC_INPUT_MODE_MASK) <<
+		CCDC_INPUT_MODE_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) <<
+		CCDC_FRM_FMT_SHIFT));
+
+	/* setup BT.656 sync mode */
+	if (params->bt656_enable) {
+		regw(CCDC_REC656IF_BT656_EN, REC656IF);
+		/*
+		 * configure the FID, VD, HD pin polarity fld,hd pol positive,
+		 * vd negative, 8-bit pack mode
+		 */
+		temp |= CCDC_VD_POL_NEGATIVE;
+	} else {		/* y/c external sync mode */
+		temp |= (((params->fid_pol & CCDC_FID_POL_MASK) <<
+			CCDC_FID_POL_SHIFT) |
+			((params->hd_pol & CCDC_HD_POL_MASK) <<
+			CCDC_HD_POL_SHIFT) |
+			((params->vd_pol & CCDC_VD_POL_MASK) <<
+			CCDC_VD_POL_SHIFT));
+	}
+
+	/* pack the data to 8-bit */
+	temp |= CCDC_DATA_PACK_ENABLE;
+
+	regw(temp, MODESET);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 2);
+
+	/* configure the order of y cb cr in SD-RAM */
+	temp = (params->pix_order << CCDC_Y8POS_SHIFT);
+	temp |= CCDC_LATCH_ON_VSYNC_DISABLE | CCDC_CCDCFG_FIDMD_NO_LATCH_VSYNC;
+	regw(temp, CCDCFG);
+
+	/*
+	 * configure the horizontal line offset. This is done by rounding up
+	 * width to a multiple of 16 pixels and multiply by two to account for
+	 * y:cb:cr 4:2:2 data
+	 */
+	regw(((params->win.width * 2 + 31) >> 5), HSIZE);
+
+	/* configure the memory line offset */
+	if (params->buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED) {
+		/* two fields are interleaved in memory */
+		regw(CCDC_SDOFST_FIELD_INTERLEAVED, SDOFST);
+	}
+
+	dev_dbg(dev, "\nEnd of ccdc_config_ycbcr...\n");
+}
+
+/*
+ * ccdc_config_black_clamp()
+ * configure parameters for Optical Black Clamp
+ */
+static void ccdc_config_black_clamp(struct ccdc_black_clamp *bclamp)
+{
+	u32 val;
+
+	if (!bclamp->b_clamp_enable) {
+		/* configure DCSub */
+		regw(bclamp->dc_sub & CCDC_BLK_DC_SUB_MASK, DCSUB);
+		regw(0x0000, CLAMP);
+		return;
+	}
+	/* Enable the Black clamping, set sample lines and pixels */
+	val = (bclamp->start_pixel & CCDC_BLK_ST_PXL_MASK) |
+	      ((bclamp->sample_pixel & CCDC_BLK_SAMPLE_LN_MASK) <<
+		CCDC_BLK_SAMPLE_LN_SHIFT) | CCDC_BLK_CLAMP_ENABLE;
+	regw(val, CLAMP);
+
+	/* If Black clamping is enable then make dcsub 0 */
+	val = (bclamp->sample_ln & CCDC_NUM_LINE_CALC_MASK)
+			<< CCDC_NUM_LINE_CALC_SHIFT;
+	regw(val, DCSUB);
+}
+
+/*
+ * ccdc_config_black_compense()
+ * configure parameters for Black Compensation
+ */
+static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
+{
+	u32 val;
+
+	val = (bcomp->b & CCDC_BLK_COMP_MASK) |
+		((bcomp->gb & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_GB_COMP_SHIFT);
+	regw(val, BLKCMP1);
+
+	val = ((bcomp->gr & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_GR_COMP_SHIFT) |
+		((bcomp->r & CCDC_BLK_COMP_MASK) <<
+		CCDC_BLK_COMP_R_COMP_SHIFT);
+	regw(val, BLKCMP0);
+}
+
+/*
+ * ccdc_write_dfc_entry()
+ * write an entry in the dfc table.
+ */
+int ccdc_write_dfc_entry(int index, struct ccdc_vertical_dft *dfc)
+{
+/* TODO This is to be re-visited and adjusted */
+#define DFC_WRITE_WAIT_COUNT	1000
+	u32 val, count = DFC_WRITE_WAIT_COUNT;
+
+	regw(dfc->dft_corr_vert[index], DFCMEM0);
+	regw(dfc->dft_corr_horz[index], DFCMEM1);
+	regw(dfc->dft_corr_sub1[index], DFCMEM2);
+	regw(dfc->dft_corr_sub2[index], DFCMEM3);
+	regw(dfc->dft_corr_sub3[index], DFCMEM4);
+	/* set WR bit to write */
+	val = regr(DFCMEMCTL) | CCDC_DFCMEMCTL_DFCMWR_MASK;
+	regw(val, DFCMEMCTL);
+
+	/*
+	 * Assume, it is very short. If we get an error, we need to
+	 * adjust this value
+	 */
+	while (regr(DFCMEMCTL) & CCDC_DFCMEMCTL_DFCMWR_MASK)
+		count--;
+	/*
+	 * TODO We expect the count to be non-zero to be successful. Adjust
+	 * the count if write requires more time
+	 */
+
+	if (count) {
+		dev_err(dev, "defect table write timeout !!!\n");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * ccdc_config_vdfc()
+ * configure parameters for Vertical Defect Correction
+ */
+static int ccdc_config_vdfc(struct ccdc_vertical_dft *dfc)
+{
+	u32 val;
+	int i;
+
+	/* Configure General Defect Correction. The table used is from IPIPE */
+	val = dfc->gen_dft_en & CCDC_DFCCTL_GDFCEN_MASK;
+
+	/* Configure Vertical Defect Correction if needed */
+	if (!dfc->ver_dft_en) {
+		/* Enable only General Defect Correction */
+		regw(val, DFCCTL);
+		return 0;
+	}
+
+	if (dfc->table_size > CCDC_DFT_TABLE_SIZE)
+		return -EINVAL;
+
+	val |= CCDC_DFCCTL_VDFC_DISABLE;
+	val |= (dfc->dft_corr_ctl.vdfcsl & CCDC_DFCCTL_VDFCSL_MASK) <<
+		CCDC_DFCCTL_VDFCSL_SHIFT;
+	val |= (dfc->dft_corr_ctl.vdfcuda & CCDC_DFCCTL_VDFCUDA_MASK) <<
+		CCDC_DFCCTL_VDFCUDA_SHIFT;
+	val |= (dfc->dft_corr_ctl.vdflsft & CCDC_DFCCTL_VDFLSFT_MASK) <<
+		CCDC_DFCCTL_VDFLSFT_SHIFT;
+	regw(val , DFCCTL);
+
+	/* clear address ptr to offset 0 */
+	val = CCDC_DFCMEMCTL_DFCMARST_MASK << CCDC_DFCMEMCTL_DFCMARST_SHIFT;
+
+	/* write defect table entries */
+	for (i = 0; i < dfc->table_size; i++) {
+		/* increment address for non zero index */
+		if (i != 0)
+			val = CCDC_DFCMEMCTL_INC_ADDR;
+		regw(val, DFCMEMCTL);
+		if (ccdc_write_dfc_entry(i, dfc) < 0)
+			return -EFAULT;
+	}
+
+	/* update saturation level and enable dfc */
+	regw(dfc->saturation_ctl & CCDC_VDC_DFCVSAT_MASK, DFCVSAT);
+	val = regr(DFCCTL) | (CCDC_DFCCTL_VDFCEN_MASK <<
+			CCDC_DFCCTL_VDFCEN_SHIFT);
+	regw(val, DFCCTL);
+	return 0;
+}
+
+/*
+ * ccdc_config_csc()
+ * configure parameters for color space conversion
+ * Each register CSCM0-7 has two values in S8Q5 format.
+ */
+static void ccdc_config_csc(struct ccdc_csc *csc)
+{
+	u32 val1, val2;
+	int i;
+
+	if (!csc->enable)
+		return;
+
+	/* Enable the CSC sub-module */
+	regw(CCDC_CSC_ENABLE, CSCCTL);
+
+	/* Converting the co-eff as per the format of the register */
+	for (i = 0; i < CCDC_CSC_COEFF_TABLE_SIZE; i++) {
+		if ((i % 2) == 0) {
+			/* CSCM - LSB */
+			val1 = (csc->coeff[i].integer &
+				CCDC_CSC_COEF_INTEG_MASK)
+				<< CCDC_CSC_COEF_INTEG_SHIFT;
+			/*
+			 * convert decimal part to binary. Use 2 decimal
+			 * precision, user values range from .00 - 0.99
+			 */
+			val1 |= (((csc->coeff[i].decimal &
+				CCDC_CSC_COEF_DECIMAL_MASK) *
+				CCDC_CSC_DEC_MAX) / 100);
+		} else {
+
+			/* CSCM - MSB */
+			val2 = (csc->coeff[i].integer &
+				CCDC_CSC_COEF_INTEG_MASK)
+				<< CCDC_CSC_COEF_INTEG_SHIFT;
+			val2 |= (((csc->coeff[i].decimal &
+				 CCDC_CSC_COEF_DECIMAL_MASK) *
+				 CCDC_CSC_DEC_MAX) / 100);
+			val2 <<= CCDC_CSCM_MSB_SHIFT;
+			val2 |= val1;
+			regw(val2, (CSCM0 + ((i - 1) << 1)));
+		}
+	}
+}
+
+/*
+ * ccdc_config_color_patterns()
+ * configure parameters for color patterns
+ */
+static void ccdc_config_color_patterns(struct ccdc_col_pat *pat0,
+				       struct ccdc_col_pat *pat1)
+{
+	u32 val;
+
+	val = (pat0->olop | (pat0->olep << 2) | (pat0->elop << 4) |
+		(pat0->elep << 6) | (pat1->olop << 8) | (pat1->olep << 10) |
+		(pat1->elop << 12) | (pat1->elep << 14));
+	regw(val, COLPTN);
+}
+
+/* This function will configure CCDC for Raw mode image capture */
+static int ccdc_config_raw(void)
+{
+	struct ccdc_params_raw *params = &ccdc_hw_params_raw;
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int val;
+
+	dev_dbg(dev, "\nStarting ccdc_config_raw...");
+
+	/* restore power on defaults to register */
+	ccdc_restore_defaults();
+
+	/* CCDCFG register:
+	 * set CCD Not to swap input since input is RAW data
+	 * set FID detection function to Latch at V-Sync
+	 * set WENLOG - ccdc valid area to AND
+	 * set TRGSEL to WENBIT
+	 * set EXTRG to DISABLE
+	 * disable latching function on VSYNC - shadowed registers
+	 */
+	regw(CCDC_YCINSWP_RAW | CCDC_CCDCFG_FIDMD_LATCH_VSYNC |
+	     CCDC_CCDCFG_WENLOG_AND | CCDC_CCDCFG_TRGSEL_WEN |
+	     CCDC_CCDCFG_EXTRG_DISABLE | CCDC_LATCH_ON_VSYNC_DISABLE, CCDCFG);
+
+	/*
+	 * Set VDHD direction to input,  input type to raw input
+	 * normal data polarity, do not use external WEN
+	 */
+	val = (CCDC_VDHDOUT_INPUT | CCDC_RAW_IP_MODE | CCDC_DATAPOL_NORMAL |
+		CCDC_EXWEN_DISABLE);
+
+	/*
+	 * Configure the vertical sync polarity (MODESET.VDPOL), horizontal
+	 * sync polarity (MODESET.HDPOL), field id polarity (MODESET.FLDPOL),
+	 * frame format(progressive or interlace), & pixel format (Input mode)
+	 */
+	val |= (((params->vd_pol & CCDC_VD_POL_MASK) << CCDC_VD_POL_SHIFT) |
+		((params->hd_pol & CCDC_HD_POL_MASK) << CCDC_HD_POL_SHIFT) |
+		((params->fid_pol & CCDC_FID_POL_MASK) << CCDC_FID_POL_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) << CCDC_FRM_FMT_SHIFT) |
+		((params->pix_fmt & CCDC_PIX_FMT_MASK) << CCDC_PIX_FMT_SHIFT));
+
+	/* set pack for alaw compression */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable)
+		val |= CCDC_DATA_PACK_ENABLE;
+
+	/* Configure for LPF */
+	if (config_params->lpf_enable)
+		val |= (config_params->lpf_enable & CCDC_LPF_MASK) <<
+			CCDC_LPF_SHIFT;
+
+	/* Configure the data shift */
+	val |= (config_params->datasft & CCDC_DATASFT_MASK) <<
+		CCDC_DATASFT_SHIFT;
+	regw(val , MODESET);
+	dev_dbg(dev, "\nWriting 0x%x to MODESET...\n", val);
+
+	/* Configure the Median Filter threshold */
+	regw((config_params->med_filt_thres) & CCDC_MED_FILT_THRESH, MEDFILT);
+
+	/* Configure GAMMAWD register. defaur 11-2, and Mosaic cfa pattern */
+	val = CCDC_GAMMA_BITS_11_2 << CCDC_GAMMAWD_INPUT_SHIFT |
+		CCDC_CFA_MOSAIC;
+
+	/* Enable and configure aLaw register if needed */
+	if (config_params->alaw.enable) {
+		val |= (CCDC_ALAW_ENABLE |
+			((config_params->alaw.gama_wd &
+			CCDC_ALAW_GAMA_WD_MASK) <<
+			CCDC_GAMMAWD_INPUT_SHIFT));
+	}
+
+	/* Configure Median filter1 & filter2 */
+	val |= ((config_params->mfilt1 << CCDC_MFILT1_SHIFT) |
+		(config_params->mfilt2 << CCDC_MFILT2_SHIFT));
+
+	regw(val, GAMMAWD);
+	dev_dbg(dev, "\nWriting 0x%x to GAMMAWD...\n", val);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 1);
+
+	/* Optical Clamp Averaging */
+	ccdc_config_black_clamp(&config_params->blk_clamp);
+
+	/* Black level compensation */
+	ccdc_config_black_compense(&config_params->blk_comp);
+
+	/* Vertical Defect Correction if needed */
+	if (ccdc_config_vdfc(&config_params->vertical_dft) < 0)
+		return -EFAULT;
+
+	/* color space conversion */
+	ccdc_config_csc(&config_params->csc);
+
+	/* color pattern */
+	ccdc_config_color_patterns(&config_params->col_pat_field0,
+				   &config_params->col_pat_field1);
+
+	/* Configure the Gain  & offset control */
+	ccdc_config_gain_offset();
+
+	dev_dbg(dev, "\nWriting %x to COLPTN...\n", val);
+
+	/* Configure DATAOFST  register */
+	val = (config_params->data_offset.horz_offset & CCDC_DATAOFST_MASK) <<
+		CCDC_DATAOFST_H_SHIFT;
+	val |= (config_params->data_offset.vert_offset & CCDC_DATAOFST_MASK) <<
+		CCDC_DATAOFST_V_SHIFT;
+	regw(val, DATAOFST);
+
+	/* configuring HSIZE register */
+	val = (params->horz_flip_enable & CCDC_HSIZE_FLIP_MASK) <<
+		CCDC_HSIZE_FLIP_SHIFT;
+
+	/* If pack 8 is enable then 1 pixel will take 1 byte */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable) {
+		val |= (((params->win.width) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK;
+
+		/* adjust to multiple of 32 */
+		dev_dbg(dev, "\nWriting 0x%x to HSIZE...\n",
+		       (((params->win.width) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK);
+	} else {
+		/* else one pixel will take 2 byte */
+		val |= (((params->win.width * 2) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK;
+
+		dev_dbg(dev, "\nWriting 0x%x to HSIZE...\n",
+		       (((params->win.width * 2) + 31) >> 5) &
+			CCDC_HSIZE_VAL_MASK);
+	}
+	regw(val, HSIZE);
+
+	/* Configure SDOFST register */
+	if (params->frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (params->image_invert_enable) {
+			/* For interlace inverse mode */
+			regw(CCDC_SDOFST_INTERLACE_INVERSE, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_INTERLACE_INVERSE);
+		} else {
+			/* For interlace non inverse mode */
+			regw(CCDC_SDOFST_INTERLACE_NORMAL, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_INTERLACE_NORMAL);
+		}
+	} else if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		if (params->image_invert_enable) {
+			/* For progessive inverse mode */
+			regw(CCDC_SDOFST_PROGRESSIVE_INVERSE, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_PROGRESSIVE_INVERSE);
+		} else {
+			/* For progessive non inverse mode */
+			regw(CCDC_SDOFST_PROGRESSIVE_NORMAL, SDOFST);
+			dev_dbg(dev, "\nWriting %x to SDOFST...\n",
+				CCDC_SDOFST_PROGRESSIVE_NORMAL);
+		}
+	}
+	dev_dbg(dev, "\nend of ccdc_config_raw...");
+	return 0;
+}
+
+static int ccdc_configure(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_config_raw();
+	else
+		ccdc_config_ycbcr();
+	return 0;
+}
+
+static int ccdc_set_buftype(enum ccdc_buftype buf_type)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.buf_type = buf_type;
+	else
+		ccdc_hw_params_ycbcr.buf_type = buf_type;
+	return 0;
+}
+static enum ccdc_buftype ccdc_get_buftype(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.buf_type;
+	return ccdc_hw_params_ycbcr.buf_type;
+}
+
+static int ccdc_enum_pix(u32 *pix, int i)
+{
+	int ret = -EINVAL;
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if (i < ARRAY_SIZE(ccdc_raw_bayer_pix_formats)) {
+			*pix = ccdc_raw_bayer_pix_formats[i];
+			ret = 0;
+		}
+	} else {
+		if (i < ARRAY_SIZE(ccdc_raw_yuv_pix_formats)) {
+			*pix = ccdc_raw_yuv_pix_formats[i];
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+static int ccdc_set_pixel_format(u32 pixfmt)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		ccdc_hw_params_raw.pix_fmt = CCDC_PIXFMT_RAW;
+		if (pixfmt == V4L2_PIX_FMT_SBGGR8)
+			alaw->enable = 1;
+		else if (pixfmt != V4L2_PIX_FMT_SBGGR16)
+			return -EINVAL;
+	} else {
+		if (pixfmt == V4L2_PIX_FMT_YUYV)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_YCBYCR;
+		else if (pixfmt == V4L2_PIX_FMT_UYVY)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+static u32 ccdc_get_pixel_format(void)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+	u32 pixfmt;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		if (alaw->enable)
+			pixfmt = V4L2_PIX_FMT_SBGGR8;
+		else
+			pixfmt = V4L2_PIX_FMT_SBGGR16;
+	else {
+		if (ccdc_hw_params_ycbcr.pix_order == CCDC_PIXORDER_YCBYCR)
+			pixfmt = V4L2_PIX_FMT_YUYV;
+		else
+			pixfmt = V4L2_PIX_FMT_UYVY;
+	}
+	return pixfmt;
+}
+static int ccdc_set_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.win = *win;
+	else
+		ccdc_hw_params_ycbcr.win = *win;
+	return 0;
+}
+
+static void ccdc_get_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		*win = ccdc_hw_params_raw.win;
+	else
+		*win = ccdc_hw_params_ycbcr.win;
+}
+
+static unsigned int ccdc_get_line_length(void)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int len;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if ((config_params->alaw.enable) ||
+		    (config_params->data_sz == CCDC_DATA_8BITS))
+			len = ccdc_hw_params_raw.win.width;
+		else
+			len = ccdc_hw_params_raw.win.width * 2;
+	} else
+		len = ccdc_hw_params_ycbcr.win.width * 2;
+	return ALIGN(len, 32);
+}
+
+static int ccdc_set_frame_format(enum ccdc_frmfmt frm_fmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.frm_fmt = frm_fmt;
+	else
+		ccdc_hw_params_ycbcr.frm_fmt = frm_fmt;
+	return 0;
+}
+
+static enum ccdc_frmfmt ccdc_get_frame_format(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.frm_fmt;
+	else
+		return ccdc_hw_params_ycbcr.frm_fmt;
+}
+
+static int ccdc_getfid(void)
+{
+	return  (regr(MODESET) >> 15) & 1;
+}
+
+/* misc operations */
+static inline void ccdc_setfbaddr(unsigned long addr)
+{
+	regw((addr >> 21) & 0x007f, STADRH);
+	regw((addr >> 5) & 0x0ffff, STADRL);
+}
+
+static int ccdc_set_hw_if_params(struct vpfe_hw_if_param *params)
+{
+	ccdc_if_type = params->if_type;
+
+	switch (params->if_type) {
+	case VPFE_BT656:
+	case VPFE_YCBCR_SYNC_16:
+	case VPFE_YCBCR_SYNC_8:
+		ccdc_hw_params_ycbcr.vd_pol = params->vdpol;
+		ccdc_hw_params_ycbcr.hd_pol = params->hdpol;
+		break;
+	default:
+		/* TODO add support for raw bayer here */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct ccdc_hw_device ccdc_hw_dev = {
+	.name = "DM355 CCDC",
+	.owner = THIS_MODULE,
+	.hw_ops = {
+		.open = ccdc_open,
+		.close = ccdc_close,
+		.set_ccdc_base = ccdc_set_ccdc_base,
+		.enable = ccdc_enable,
+		.enable_out_to_sdram = ccdc_enable_output_to_sdram,
+		.set_hw_if_params = ccdc_set_hw_if_params,
+		.set_params = ccdc_set_params,
+		.configure = ccdc_configure,
+		.set_buftype = ccdc_set_buftype,
+		.get_buftype = ccdc_get_buftype,
+		.enum_pix = ccdc_enum_pix,
+		.set_pixel_format = ccdc_set_pixel_format,
+		.get_pixel_format = ccdc_get_pixel_format,
+		.set_frame_format = ccdc_set_frame_format,
+		.get_frame_format = ccdc_get_frame_format,
+		.set_image_window = ccdc_set_image_window,
+		.get_image_window = ccdc_get_image_window,
+		.get_line_length = ccdc_get_line_length,
+		.setfbaddr = ccdc_setfbaddr,
+		.getfid = ccdc_getfid,
+	},
+};
+
+static int dm355_ccdc_init(void)
+{
+	printk(KERN_NOTICE "dm355_ccdc_init\n");
+	if (vpfe_register_ccdc_device(&ccdc_hw_dev) < 0)
+		return -1;
+	printk(KERN_NOTICE "%s is registered with vpfe.\n",
+		ccdc_hw_dev.name);
+	return 0;
+}
+
+static void dm355_ccdc_exit(void)
+{
+	vpfe_unregister_ccdc_device(&ccdc_hw_dev);
+}
+
+module_init(dm355_ccdc_init);
+module_exit(dm355_ccdc_exit);
diff --git a/drivers/media/video/davinci/dm355_ccdc_regs.h b/drivers/media/video/davinci/dm355_ccdc_regs.h
new file mode 100644
index 000000000000..d6d2ef0533b5
--- /dev/null
+++ b/drivers/media/video/davinci/dm355_ccdc_regs.h
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM355_CCDC_REGS_H
+#define _DM355_CCDC_REGS_H
+
+/**************************************************************************\
+* Register OFFSET Definitions
+\**************************************************************************/
+#define SYNCEN				0x00
+#define MODESET				0x04
+#define HDWIDTH				0x08
+#define VDWIDTH				0x0c
+#define PPLN				0x10
+#define LPFR				0x14
+#define SPH				0x18
+#define NPH				0x1c
+#define SLV0				0x20
+#define SLV1				0x24
+#define NLV				0x28
+#define CULH				0x2c
+#define CULV				0x30
+#define HSIZE				0x34
+#define SDOFST				0x38
+#define STADRH				0x3c
+#define STADRL				0x40
+#define CLAMP				0x44
+#define DCSUB				0x48
+#define COLPTN				0x4c
+#define BLKCMP0				0x50
+#define BLKCMP1				0x54
+#define MEDFILT				0x58
+#define RYEGAIN				0x5c
+#define GRCYGAIN			0x60
+#define GBGGAIN				0x64
+#define BMGGAIN				0x68
+#define OFFSET				0x6c
+#define OUTCLIP				0x70
+#define VDINT0				0x74
+#define VDINT1				0x78
+#define RSV0				0x7c
+#define GAMMAWD				0x80
+#define REC656IF			0x84
+#define CCDCFG				0x88
+#define FMTCFG				0x8c
+#define FMTPLEN				0x90
+#define FMTSPH				0x94
+#define FMTLNH				0x98
+#define FMTSLV				0x9c
+#define FMTLNV				0xa0
+#define FMTRLEN				0xa4
+#define FMTHCNT				0xa8
+#define FMT_ADDR_PTR_B			0xac
+#define FMT_ADDR_PTR(i)			(FMT_ADDR_PTR_B + (i * 4))
+#define FMTPGM_VF0			0xcc
+#define FMTPGM_VF1			0xd0
+#define FMTPGM_AP0			0xd4
+#define FMTPGM_AP1			0xd8
+#define FMTPGM_AP2			0xdc
+#define FMTPGM_AP3                      0xe0
+#define FMTPGM_AP4                      0xe4
+#define FMTPGM_AP5                      0xe8
+#define FMTPGM_AP6                      0xec
+#define FMTPGM_AP7                      0xf0
+#define LSCCFG1                         0xf4
+#define LSCCFG2                         0xf8
+#define LSCH0                           0xfc
+#define LSCV0                           0x100
+#define LSCKH                           0x104
+#define LSCKV                           0x108
+#define LSCMEMCTL                       0x10c
+#define LSCMEMD                         0x110
+#define LSCMEMQ                         0x114
+#define DFCCTL                          0x118
+#define DFCVSAT                         0x11c
+#define DFCMEMCTL                       0x120
+#define DFCMEM0                         0x124
+#define DFCMEM1                         0x128
+#define DFCMEM2                         0x12c
+#define DFCMEM3                         0x130
+#define DFCMEM4                         0x134
+#define CSCCTL                          0x138
+#define CSCM0                           0x13c
+#define CSCM1                           0x140
+#define CSCM2                           0x144
+#define CSCM3                           0x148
+#define CSCM4                           0x14c
+#define CSCM5                           0x150
+#define CSCM6                           0x154
+#define CSCM7                           0x158
+#define DATAOFST			0x15c
+#define CCDC_REG_LAST			DATAOFST
+/**************************************************************
+*	Define for various register bit mask and shifts for CCDC
+*
+**************************************************************/
+#define CCDC_RAW_IP_MODE			0
+#define CCDC_VDHDOUT_INPUT			0
+#define CCDC_YCINSWP_RAW			(0 << 4)
+#define CCDC_EXWEN_DISABLE 			0
+#define CCDC_DATAPOL_NORMAL			0
+#define CCDC_CCDCFG_FIDMD_LATCH_VSYNC		0
+#define CCDC_CCDCFG_FIDMD_NO_LATCH_VSYNC	(1 << 6)
+#define CCDC_CCDCFG_WENLOG_AND			0
+#define CCDC_CCDCFG_TRGSEL_WEN			0
+#define CCDC_CCDCFG_EXTRG_DISABLE		0
+#define CCDC_CFA_MOSAIC				0
+#define CCDC_Y8POS_SHIFT			11
+
+#define CCDC_VDC_DFCVSAT_MASK			0x3fff
+#define CCDC_DATAOFST_MASK			0x0ff
+#define CCDC_DATAOFST_H_SHIFT			0
+#define CCDC_DATAOFST_V_SHIFT			8
+#define CCDC_GAMMAWD_CFA_MASK			1
+#define CCDC_GAMMAWD_CFA_SHIFT			5
+#define CCDC_GAMMAWD_INPUT_SHIFT		2
+#define CCDC_FID_POL_MASK			1
+#define CCDC_FID_POL_SHIFT			4
+#define CCDC_HD_POL_MASK			1
+#define CCDC_HD_POL_SHIFT			3
+#define CCDC_VD_POL_MASK			1
+#define CCDC_VD_POL_SHIFT			2
+#define CCDC_VD_POL_NEGATIVE			(1 << 2)
+#define CCDC_FRM_FMT_MASK			1
+#define CCDC_FRM_FMT_SHIFT			7
+#define CCDC_DATA_SZ_MASK			7
+#define CCDC_DATA_SZ_SHIFT			8
+#define CCDC_VDHDOUT_MASK			1
+#define CCDC_VDHDOUT_SHIFT			0
+#define CCDC_EXWEN_MASK				1
+#define CCDC_EXWEN_SHIFT			5
+#define CCDC_INPUT_MODE_MASK			3
+#define CCDC_INPUT_MODE_SHIFT			12
+#define CCDC_PIX_FMT_MASK			3
+#define CCDC_PIX_FMT_SHIFT			12
+#define CCDC_DATAPOL_MASK			1
+#define CCDC_DATAPOL_SHIFT			6
+#define CCDC_WEN_ENABLE				(1 << 1)
+#define CCDC_VDHDEN_ENABLE			(1 << 16)
+#define CCDC_LPF_ENABLE				(1 << 14)
+#define CCDC_ALAW_ENABLE			1
+#define CCDC_ALAW_GAMA_WD_MASK			7
+#define CCDC_REC656IF_BT656_EN			3
+
+#define CCDC_FMTCFG_FMTMODE_MASK 		3
+#define CCDC_FMTCFG_FMTMODE_SHIFT		1
+#define CCDC_FMTCFG_LNUM_MASK			3
+#define CCDC_FMTCFG_LNUM_SHIFT			4
+#define CCDC_FMTCFG_ADDRINC_MASK		7
+#define CCDC_FMTCFG_ADDRINC_SHIFT		8
+
+#define CCDC_CCDCFG_FIDMD_SHIFT			6
+#define	CCDC_CCDCFG_WENLOG_SHIFT		8
+#define CCDC_CCDCFG_TRGSEL_SHIFT		9
+#define CCDC_CCDCFG_EXTRG_SHIFT			10
+#define CCDC_CCDCFG_MSBINVI_SHIFT		13
+
+#define CCDC_HSIZE_FLIP_SHIFT			12
+#define CCDC_HSIZE_FLIP_MASK			1
+#define CCDC_HSIZE_VAL_MASK			0xFFF
+#define CCDC_SDOFST_FIELD_INTERLEAVED		0x249
+#define CCDC_SDOFST_INTERLACE_INVERSE		0x4B6D
+#define CCDC_SDOFST_INTERLACE_NORMAL		0x0B6D
+#define CCDC_SDOFST_PROGRESSIVE_INVERSE		0x4000
+#define CCDC_SDOFST_PROGRESSIVE_NORMAL		0
+#define CCDC_START_PX_HOR_MASK			0x7FFF
+#define CCDC_NUM_PX_HOR_MASK			0x7FFF
+#define CCDC_START_VER_ONE_MASK			0x7FFF
+#define CCDC_START_VER_TWO_MASK			0x7FFF
+#define CCDC_NUM_LINES_VER			0x7FFF
+
+#define CCDC_BLK_CLAMP_ENABLE			(1 << 15)
+#define CCDC_BLK_SGAIN_MASK			0x1F
+#define CCDC_BLK_ST_PXL_MASK			0x1FFF
+#define CCDC_BLK_SAMPLE_LN_MASK			3
+#define CCDC_BLK_SAMPLE_LN_SHIFT		13
+
+#define CCDC_NUM_LINE_CALC_MASK			3
+#define CCDC_NUM_LINE_CALC_SHIFT		14
+
+#define CCDC_BLK_DC_SUB_MASK			0x3FFF
+#define CCDC_BLK_COMP_MASK			0xFF
+#define CCDC_BLK_COMP_GB_COMP_SHIFT		8
+#define CCDC_BLK_COMP_GR_COMP_SHIFT		0
+#define CCDC_BLK_COMP_R_COMP_SHIFT		8
+#define CCDC_LATCH_ON_VSYNC_DISABLE		(1 << 15)
+#define CCDC_LATCH_ON_VSYNC_ENABLE		(0 << 15)
+#define CCDC_FPC_ENABLE				(1 << 15)
+#define CCDC_FPC_FPC_NUM_MASK 			0x7FFF
+#define CCDC_DATA_PACK_ENABLE			(1 << 11)
+#define CCDC_FMT_HORZ_FMTLNH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_SHIFT		16
+#define CCDC_FMT_VERT_FMTLNV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_SHIFT		16
+#define CCDC_VP_OUT_VERT_NUM_MASK		0x3FFF
+#define CCDC_VP_OUT_VERT_NUM_SHIFT		17
+#define CCDC_VP_OUT_HORZ_NUM_MASK		0x1FFF
+#define CCDC_VP_OUT_HORZ_NUM_SHIFT		4
+#define CCDC_VP_OUT_HORZ_ST_MASK		0xF
+
+#define CCDC_CSC_COEF_INTEG_MASK		7
+#define CCDC_CSC_COEF_DECIMAL_MASK		0x1f
+#define CCDC_CSC_COEF_INTEG_SHIFT		5
+#define CCDC_CSCM_MSB_SHIFT			8
+#define CCDC_CSC_ENABLE				1
+#define CCDC_CSC_DEC_MAX			32
+
+#define CCDC_MFILT1_SHIFT			10
+#define CCDC_MFILT2_SHIFT			8
+#define CCDC_MED_FILT_THRESH			0x3FFF
+#define CCDC_LPF_MASK				1
+#define CCDC_LPF_SHIFT				14
+#define CCDC_OFFSET_MASK			0x3FF
+#define CCDC_DATASFT_MASK			7
+#define CCDC_DATASFT_SHIFT			8
+
+#define CCDC_DF_ENABLE				1
+
+#define CCDC_FMTPLEN_P0_MASK			0xF
+#define CCDC_FMTPLEN_P1_MASK			0xF
+#define CCDC_FMTPLEN_P2_MASK			7
+#define CCDC_FMTPLEN_P3_MASK			7
+#define CCDC_FMTPLEN_P0_SHIFT			0
+#define CCDC_FMTPLEN_P1_SHIFT			4
+#define CCDC_FMTPLEN_P2_SHIFT			8
+#define CCDC_FMTPLEN_P3_SHIFT			12
+
+#define CCDC_FMTSPH_MASK			0x1FFF
+#define CCDC_FMTLNH_MASK			0x1FFF
+#define CCDC_FMTSLV_MASK			0x1FFF
+#define CCDC_FMTLNV_MASK			0x7FFF
+#define CCDC_FMTRLEN_MASK			0x1FFF
+#define CCDC_FMTHCNT_MASK			0x1FFF
+
+#define CCDC_ADP_INIT_MASK			0x1FFF
+#define CCDC_ADP_LINE_SHIFT			13
+#define CCDC_ADP_LINE_MASK			3
+#define CCDC_FMTPGN_APTR_MASK			7
+
+#define CCDC_DFCCTL_GDFCEN_MASK			1
+#define CCDC_DFCCTL_VDFCEN_MASK			1
+#define CCDC_DFCCTL_VDFC_DISABLE		(0 << 4)
+#define CCDC_DFCCTL_VDFCEN_SHIFT		4
+#define CCDC_DFCCTL_VDFCSL_MASK			3
+#define CCDC_DFCCTL_VDFCSL_SHIFT		5
+#define CCDC_DFCCTL_VDFCUDA_MASK		1
+#define CCDC_DFCCTL_VDFCUDA_SHIFT		7
+#define CCDC_DFCCTL_VDFLSFT_MASK		3
+#define CCDC_DFCCTL_VDFLSFT_SHIFT		8
+#define CCDC_DFCMEMCTL_DFCMARST_MASK		1
+#define CCDC_DFCMEMCTL_DFCMARST_SHIFT		2
+#define CCDC_DFCMEMCTL_DFCMWR_MASK		1
+#define CCDC_DFCMEMCTL_DFCMWR_SHIFT		0
+#define CCDC_DFCMEMCTL_INC_ADDR			(0 << 2)
+
+#define CCDC_LSCCFG_GFTSF_MASK			7
+#define CCDC_LSCCFG_GFTSF_SHIFT			1
+#define CCDC_LSCCFG_GFTINV_MASK			0xf
+#define CCDC_LSCCFG_GFTINV_SHIFT		4
+#define CCDC_LSC_GFTABLE_SEL_MASK		3
+#define CCDC_LSC_GFTABLE_EPEL_SHIFT		8
+#define CCDC_LSC_GFTABLE_OPEL_SHIFT		10
+#define CCDC_LSC_GFTABLE_EPOL_SHIFT		12
+#define CCDC_LSC_GFTABLE_OPOL_SHIFT		14
+#define CCDC_LSC_GFMODE_MASK			3
+#define CCDC_LSC_GFMODE_SHIFT			4
+#define CCDC_LSC_DISABLE			0
+#define CCDC_LSC_ENABLE				1
+#define CCDC_LSC_TABLE1_SLC			0
+#define CCDC_LSC_TABLE2_SLC			1
+#define CCDC_LSC_TABLE3_SLC			2
+#define CCDC_LSC_MEMADDR_RESET			(1 << 2)
+#define CCDC_LSC_MEMADDR_INCR			(0 << 2)
+#define CCDC_LSC_FRAC_MASK_T1			0xFF
+#define CCDC_LSC_INT_MASK			3
+#define CCDC_LSC_FRAC_MASK			0x3FFF
+#define CCDC_LSC_CENTRE_MASK			0x3FFF
+#define CCDC_LSC_COEF_MASK			0xff
+#define CCDC_LSC_COEFL_SHIFT			0
+#define CCDC_LSC_COEFU_SHIFT			8
+#define CCDC_GAIN_MASK				0x7FF
+#define CCDC_SYNCEN_VDHDEN_MASK			(1 << 0)
+#define CCDC_SYNCEN_WEN_MASK			(1 << 1)
+#define CCDC_SYNCEN_WEN_SHIFT			1
+
+/* Power on Defaults in hardware */
+#define MODESET_DEFAULT				0x200
+#define CULH_DEFAULT				0xFFFF
+#define CULV_DEFAULT				0xFF
+#define GAIN_DEFAULT				256
+#define OUTCLIP_DEFAULT				0x3FFF
+#define LSCCFG2_DEFAULT				0xE
+
+#endif
diff --git a/include/media/davinci/dm355_ccdc.h b/include/media/davinci/dm355_ccdc.h
new file mode 100644
index 000000000000..df8a7b107477
--- /dev/null
+++ b/include/media/davinci/dm355_ccdc.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2005-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM355_CCDC_H
+#define _DM355_CCDC_H
+#include <media/davinci/ccdc_types.h>
+#include <media/davinci/vpfe_types.h>
+
+/* enum for No of pixel per line to be avg. in Black Clamping */
+enum ccdc_sample_length {
+	CCDC_SAMPLE_1PIXELS,
+	CCDC_SAMPLE_2PIXELS,
+	CCDC_SAMPLE_4PIXELS,
+	CCDC_SAMPLE_8PIXELS,
+	CCDC_SAMPLE_16PIXELS
+};
+
+/* enum for No of lines in Black Clamping */
+enum ccdc_sample_line {
+	CCDC_SAMPLE_1LINES,
+	CCDC_SAMPLE_2LINES,
+	CCDC_SAMPLE_4LINES,
+	CCDC_SAMPLE_8LINES,
+	CCDC_SAMPLE_16LINES
+};
+
+/* enum for Alaw gama width */
+enum ccdc_gamma_width {
+	CCDC_GAMMA_BITS_13_4,
+	CCDC_GAMMA_BITS_12_3,
+	CCDC_GAMMA_BITS_11_2,
+	CCDC_GAMMA_BITS_10_1,
+	CCDC_GAMMA_BITS_09_0
+};
+
+enum ccdc_colpats {
+	CCDC_RED,
+	CCDC_GREEN_RED,
+	CCDC_GREEN_BLUE,
+	CCDC_BLUE
+};
+
+struct ccdc_col_pat {
+	enum ccdc_colpats olop;
+	enum ccdc_colpats olep;
+	enum ccdc_colpats elop;
+	enum ccdc_colpats elep;
+};
+
+enum ccdc_datasft {
+	CCDC_DATA_NO_SHIFT,
+	CCDC_DATA_SHIFT_1BIT,
+	CCDC_DATA_SHIFT_2BIT,
+	CCDC_DATA_SHIFT_3BIT,
+	CCDC_DATA_SHIFT_4BIT,
+	CCDC_DATA_SHIFT_5BIT,
+	CCDC_DATA_SHIFT_6BIT
+};
+
+enum ccdc_data_size {
+	CCDC_DATA_16BITS,
+	CCDC_DATA_15BITS,
+	CCDC_DATA_14BITS,
+	CCDC_DATA_13BITS,
+	CCDC_DATA_12BITS,
+	CCDC_DATA_11BITS,
+	CCDC_DATA_10BITS,
+	CCDC_DATA_8BITS
+};
+enum ccdc_mfilt1 {
+	CCDC_NO_MEDIAN_FILTER1,
+	CCDC_AVERAGE_FILTER1,
+	CCDC_MEDIAN_FILTER1
+};
+
+enum ccdc_mfilt2 {
+	CCDC_NO_MEDIAN_FILTER2,
+	CCDC_AVERAGE_FILTER2,
+	CCDC_MEDIAN_FILTER2
+};
+
+/* structure for ALaw */
+struct ccdc_a_law {
+	/* Enable/disable A-Law */
+	unsigned char enable;
+	/* Gama Width Input */
+	enum ccdc_gamma_width gama_wd;
+};
+
+/* structure for Black Clamping */
+struct ccdc_black_clamp {
+	/* only if bClampEnable is TRUE */
+	unsigned char b_clamp_enable;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_length sample_pixel;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_line sample_ln;
+	/* only if bClampEnable is TRUE */
+	unsigned short start_pixel;
+	/* only if bClampEnable is FALSE */
+	unsigned short sgain;
+	unsigned short dc_sub;
+};
+
+/* structure for Black Level Compensation */
+struct ccdc_black_compensation {
+	/* Constant value to subtract from Red component */
+	unsigned char r;
+	/* Constant value to subtract from Gr component */
+	unsigned char gr;
+	/* Constant value to subtract from Blue component */
+	unsigned char b;
+	/* Constant value to subtract from Gb component */
+	unsigned char gb;
+};
+
+struct ccdc_float {
+	int integer;
+	unsigned int decimal;
+};
+
+#define CCDC_CSC_COEFF_TABLE_SIZE	16
+/* structure for color space converter */
+struct ccdc_csc {
+	unsigned char enable;
+	/*
+	 * S8Q5. Use 2 decimal precision, user values range from -3.00 to 3.99.
+	 * example - to use 1.03, set integer part as 1, and decimal part as 3
+	 * to use -1.03, set integer part as -1 and decimal part as 3
+	 */
+	struct ccdc_float coeff[CCDC_CSC_COEFF_TABLE_SIZE];
+};
+
+/* Structures for Vertical Defect Correction*/
+enum ccdc_vdf_csl {
+	CCDC_VDF_NORMAL,
+	CCDC_VDF_HORZ_INTERPOL_SAT,
+	CCDC_VDF_HORZ_INTERPOL
+};
+
+enum ccdc_vdf_cuda {
+	CCDC_VDF_WHOLE_LINE_CORRECT,
+	CCDC_VDF_UPPER_DISABLE
+};
+
+enum ccdc_dfc_mwr {
+	CCDC_DFC_MWR_WRITE_COMPLETE,
+	CCDC_DFC_WRITE_REG
+};
+
+enum ccdc_dfc_mrd {
+	CCDC_DFC_READ_COMPLETE,
+	CCDC_DFC_READ_REG
+};
+
+enum ccdc_dfc_ma_rst {
+	CCDC_DFC_INCR_ADDR,
+	CCDC_DFC_CLR_ADDR
+};
+
+enum ccdc_dfc_mclr {
+	CCDC_DFC_CLEAR_COMPLETE,
+	CCDC_DFC_CLEAR
+};
+
+struct ccdc_dft_corr_ctl {
+	enum ccdc_vdf_csl vdfcsl;
+	enum ccdc_vdf_cuda vdfcuda;
+	unsigned int vdflsft;
+};
+
+struct ccdc_dft_corr_mem_ctl {
+	enum ccdc_dfc_mwr dfcmwr;
+	enum ccdc_dfc_mrd dfcmrd;
+	enum ccdc_dfc_ma_rst dfcmarst;
+	enum ccdc_dfc_mclr dfcmclr;
+};
+
+#define CCDC_DFT_TABLE_SIZE	16
+/*
+ * Main Structure for vertical defect correction. Vertical defect
+ * correction can correct upto 16 defects if defects less than 16
+ * then pad the rest with 0
+ */
+struct ccdc_vertical_dft {
+	unsigned char ver_dft_en;
+	unsigned char gen_dft_en;
+	unsigned int saturation_ctl;
+	struct ccdc_dft_corr_ctl dft_corr_ctl;
+	struct ccdc_dft_corr_mem_ctl dft_corr_mem_ctl;
+	int table_size;
+	unsigned int dft_corr_horz[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_vert[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub1[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub2[CCDC_DFT_TABLE_SIZE];
+	unsigned int dft_corr_sub3[CCDC_DFT_TABLE_SIZE];
+};
+
+struct ccdc_data_offset {
+	unsigned char horz_offset;
+	unsigned char vert_offset;
+};
+
+/*
+ * Structure for CCDC configuration parameters for raw capture mode passed
+ * by application
+ */
+struct ccdc_config_params_raw {
+	/* data shift to be applied before storing */
+	enum ccdc_datasft datasft;
+	/* data size value from 8 to 16 bits */
+	enum ccdc_data_size data_sz;
+	/* median filter for sdram */
+	enum ccdc_mfilt1 mfilt1;
+	enum ccdc_mfilt2 mfilt2;
+	/* low pass filter enable/disable */
+	unsigned char lpf_enable;
+	/* Threshold of median filter */
+	int med_filt_thres;
+	/*
+	 * horz and vertical data offset. Appliable for defect correction
+	 * and lsc
+	 */
+	struct ccdc_data_offset data_offset;
+	/* Structure for Optional A-Law */
+	struct ccdc_a_law alaw;
+	/* Structure for Optical Black Clamp */
+	struct ccdc_black_clamp blk_clamp;
+	/* Structure for Black Compensation */
+	struct ccdc_black_compensation blk_comp;
+	/* struture for vertical Defect Correction Module Configuration */
+	struct ccdc_vertical_dft vertical_dft;
+	/* structure for color space converter Module Configuration */
+	struct ccdc_csc csc;
+	/* color patters for bayer capture */
+	struct ccdc_col_pat col_pat_field0;
+	struct ccdc_col_pat col_pat_field1;
+};
+
+#ifdef __KERNEL__
+#include <linux/io.h>
+
+#define CCDC_WIN_PAL	{0, 0, 720, 576}
+#define CCDC_WIN_VGA	{0, 0, 640, 480}
+
+struct ccdc_params_ycbcr {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* enable BT.656 embedded sync mode */
+	int bt656_enable;
+	/* cb:y:cr:y or y:cb:y:cr in memory */
+	enum ccdc_pixorder pix_order;
+	/* interleaved or separated fields  */
+	enum ccdc_buftype buf_type;
+};
+
+/* Gain applied to Raw Bayer data */
+struct ccdc_gain {
+	unsigned short r_ye;
+	unsigned short gr_cy;
+	unsigned short gb_g;
+	unsigned short b_mg;
+};
+
+/* Structure for CCDC configuration parameters for raw capture mode */
+struct ccdc_params_raw {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* interleaved or separated fields */
+	enum ccdc_buftype buf_type;
+	/* Gain values */
+	struct ccdc_gain gain;
+	/* offset */
+	unsigned int ccdc_offset;
+	/* horizontal flip enable */
+	unsigned char horz_flip_enable;
+	/*
+	 * enable to store the image in inverse order in memory
+	 * (bottom to top)
+	 */
+	unsigned char image_invert_enable;
+	/* Configurable part of raw data */
+	struct ccdc_config_params_raw config_params;
+};
+
+#endif
+#endif				/* DM355_CCDC_H */
-- 
cgit v1.2.3


From 5f15fbb68fd774780a7fa8fe25a88e4c9e518109 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:18:14 -0300
Subject: V4L/DVB (12251): v4l: dm644x ccdc module for vpfe capture driver

This is the hw module for DM644x CCDC. This registers with the
vpfe capture driver and provides a set of hw_ops to configure
CCDC for a specific decoder device connected to the VPFE.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/dm644x_ccdc.c      | 878 +++++++++++++++++++++++++
 drivers/media/video/davinci/dm644x_ccdc_regs.h | 145 ++++
 include/media/davinci/dm644x_ccdc.h            | 184 ++++++
 3 files changed, 1207 insertions(+)
 create mode 100644 drivers/media/video/davinci/dm644x_ccdc.c
 create mode 100644 drivers/media/video/davinci/dm644x_ccdc_regs.h
 create mode 100644 include/media/davinci/dm644x_ccdc.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/dm644x_ccdc.c b/drivers/media/video/davinci/dm644x_ccdc.c
new file mode 100644
index 000000000000..2f19a919f477
--- /dev/null
+++ b/drivers/media/video/davinci/dm644x_ccdc.c
@@ -0,0 +1,878 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * CCDC hardware module for DM6446
+ * ------------------------------
+ *
+ * This module is for configuring CCD controller of DM6446 VPFE to capture
+ * Raw yuv or Bayer RGB data from a decoder. CCDC has several modules
+ * such as Defect Pixel Correction, Color Space Conversion etc to
+ * pre-process the Raw Bayer RGB data, before writing it to SDRAM. This
+ * module also allows application to configure individual
+ * module parameters through VPFE_CMD_S_CCDC_RAW_PARAMS IOCTL.
+ * To do so, application includes dm644x_ccdc.h and vpfe_capture.h header
+ * files.  The setparams() API is called by vpfe_capture driver
+ * to configure module parameters. This file is named DM644x so that other
+ * variants such DM6443 may be supported using the same module.
+ *
+ * TODO: Test Raw bayer parameter settings and bayer capture
+ * 	 Split module parameter structure to module specific ioctl structs
+ * 	 investigate if enum used for user space type definition
+ * 	 to be replaced by #defines or integer
+ */
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/videodev2.h>
+#include <media/davinci/dm644x_ccdc.h>
+#include <media/davinci/vpss.h>
+#include "dm644x_ccdc_regs.h"
+#include "ccdc_hw_device.h"
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CCDC Driver for DM6446");
+MODULE_AUTHOR("Texas Instruments");
+
+static struct device *dev;
+
+/* Object for CCDC raw mode */
+static struct ccdc_params_raw ccdc_hw_params_raw = {
+	.pix_fmt = CCDC_PIXFMT_RAW,
+	.frm_fmt = CCDC_FRMFMT_PROGRESSIVE,
+	.win = CCDC_WIN_VGA,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.config_params = {
+		.data_sz = CCDC_DATA_10BITS,
+	},
+};
+
+/* Object for CCDC ycbcr mode */
+static struct ccdc_params_ycbcr ccdc_hw_params_ycbcr = {
+	.pix_fmt = CCDC_PIXFMT_YCBCR_8BIT,
+	.frm_fmt = CCDC_FRMFMT_INTERLACED,
+	.win = CCDC_WIN_PAL,
+	.fid_pol = VPFE_PINPOL_POSITIVE,
+	.vd_pol = VPFE_PINPOL_POSITIVE,
+	.hd_pol = VPFE_PINPOL_POSITIVE,
+	.bt656_enable = 1,
+	.pix_order = CCDC_PIXORDER_CBYCRY,
+	.buf_type = CCDC_BUFTYPE_FLD_INTERLEAVED
+};
+
+#define CCDC_MAX_RAW_YUV_FORMATS	2
+
+/* Raw Bayer formats */
+static u32 ccdc_raw_bayer_pix_formats[] =
+	{V4L2_PIX_FMT_SBGGR8, V4L2_PIX_FMT_SBGGR16};
+
+/* Raw YUV formats */
+static u32 ccdc_raw_yuv_pix_formats[] =
+	{V4L2_PIX_FMT_UYVY, V4L2_PIX_FMT_YUYV};
+
+static void *__iomem ccdc_base_addr;
+static int ccdc_addr_size;
+static enum vpfe_hw_if_type ccdc_if_type;
+
+/* register access routines */
+static inline u32 regr(u32 offset)
+{
+	return __raw_readl(ccdc_base_addr + offset);
+}
+
+static inline void regw(u32 val, u32 offset)
+{
+	__raw_writel(val, ccdc_base_addr + offset);
+}
+
+static void ccdc_set_ccdc_base(void *addr, int size)
+{
+	ccdc_base_addr = addr;
+	ccdc_addr_size = size;
+}
+
+static void ccdc_enable(int flag)
+{
+	regw(flag, CCDC_PCR);
+}
+
+static void ccdc_enable_vport(int flag)
+{
+	if (flag)
+		/* enable video port */
+		regw(CCDC_ENABLE_VIDEO_PORT, CCDC_FMTCFG);
+	else
+		regw(CCDC_DISABLE_VIDEO_PORT, CCDC_FMTCFG);
+}
+
+/*
+ * ccdc_setwin()
+ * This function will configure the window size
+ * to be capture in CCDC reg
+ */
+void ccdc_setwin(struct v4l2_rect *image_win,
+		enum ccdc_frmfmt frm_fmt,
+		int ppc)
+{
+	int horz_start, horz_nr_pixels;
+	int vert_start, vert_nr_lines;
+	int val = 0, mid_img = 0;
+
+	dev_dbg(dev, "\nStarting ccdc_setwin...");
+	/*
+	 * ppc - per pixel count. indicates how many pixels per cell
+	 * output to SDRAM. example, for ycbcr, it is one y and one c, so 2.
+	 * raw capture this is 1
+	 */
+	horz_start = image_win->left << (ppc - 1);
+	horz_nr_pixels = (image_win->width << (ppc - 1)) - 1;
+	regw((horz_start << CCDC_HORZ_INFO_SPH_SHIFT) | horz_nr_pixels,
+	     CCDC_HORZ_INFO);
+
+	vert_start = image_win->top;
+
+	if (frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		vert_nr_lines = (image_win->height >> 1) - 1;
+		vert_start >>= 1;
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		/* configure VDINT0 */
+		val = (vert_start << CCDC_VDINT_VDINT0_SHIFT);
+		regw(val, CCDC_VDINT);
+
+	} else {
+		/* Since first line doesn't have any data */
+		vert_start += 1;
+		vert_nr_lines = image_win->height - 1;
+		/*
+		 * configure VDINT0 and VDINT1. VDINT1 will be at half
+		 * of image height
+		 */
+		mid_img = vert_start + (image_win->height / 2);
+		val = (vert_start << CCDC_VDINT_VDINT0_SHIFT) |
+		    (mid_img & CCDC_VDINT_VDINT1_MASK);
+		regw(val, CCDC_VDINT);
+
+	}
+	regw((vert_start << CCDC_VERT_START_SLV0_SHIFT) | vert_start,
+	     CCDC_VERT_START);
+	regw(vert_nr_lines, CCDC_VERT_LINES);
+	dev_dbg(dev, "\nEnd of ccdc_setwin...");
+}
+
+static void ccdc_readregs(void)
+{
+	unsigned int val = 0;
+
+	val = regr(CCDC_ALAW);
+	dev_notice(dev, "\nReading 0x%x to ALAW...\n", val);
+	val = regr(CCDC_CLAMP);
+	dev_notice(dev, "\nReading 0x%x to CLAMP...\n", val);
+	val = regr(CCDC_DCSUB);
+	dev_notice(dev, "\nReading 0x%x to DCSUB...\n", val);
+	val = regr(CCDC_BLKCMP);
+	dev_notice(dev, "\nReading 0x%x to BLKCMP...\n", val);
+	val = regr(CCDC_FPC_ADDR);
+	dev_notice(dev, "\nReading 0x%x to FPC_ADDR...\n", val);
+	val = regr(CCDC_FPC);
+	dev_notice(dev, "\nReading 0x%x to FPC...\n", val);
+	val = regr(CCDC_FMTCFG);
+	dev_notice(dev, "\nReading 0x%x to FMTCFG...\n", val);
+	val = regr(CCDC_COLPTN);
+	dev_notice(dev, "\nReading 0x%x to COLPTN...\n", val);
+	val = regr(CCDC_FMT_HORZ);
+	dev_notice(dev, "\nReading 0x%x to FMT_HORZ...\n", val);
+	val = regr(CCDC_FMT_VERT);
+	dev_notice(dev, "\nReading 0x%x to FMT_VERT...\n", val);
+	val = regr(CCDC_HSIZE_OFF);
+	dev_notice(dev, "\nReading 0x%x to HSIZE_OFF...\n", val);
+	val = regr(CCDC_SDOFST);
+	dev_notice(dev, "\nReading 0x%x to SDOFST...\n", val);
+	val = regr(CCDC_VP_OUT);
+	dev_notice(dev, "\nReading 0x%x to VP_OUT...\n", val);
+	val = regr(CCDC_SYN_MODE);
+	dev_notice(dev, "\nReading 0x%x to SYN_MODE...\n", val);
+	val = regr(CCDC_HORZ_INFO);
+	dev_notice(dev, "\nReading 0x%x to HORZ_INFO...\n", val);
+	val = regr(CCDC_VERT_START);
+	dev_notice(dev, "\nReading 0x%x to VERT_START...\n", val);
+	val = regr(CCDC_VERT_LINES);
+	dev_notice(dev, "\nReading 0x%x to VERT_LINES...\n", val);
+}
+
+static int validate_ccdc_param(struct ccdc_config_params_raw *ccdcparam)
+{
+	if (ccdcparam->alaw.enable) {
+		if ((ccdcparam->alaw.gama_wd > CCDC_GAMMA_BITS_09_0) ||
+		    (ccdcparam->alaw.gama_wd < CCDC_GAMMA_BITS_15_6) ||
+		    (ccdcparam->alaw.gama_wd < ccdcparam->data_sz)) {
+			dev_dbg(dev, "\nInvalid data line select");
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int ccdc_update_raw_params(struct ccdc_config_params_raw *raw_params)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int *fpc_virtaddr = NULL;
+	unsigned int *fpc_physaddr = NULL;
+
+	memcpy(config_params, raw_params, sizeof(*raw_params));
+	/*
+	 * allocate memory for fault pixel table and copy the user
+	 * values to the table
+	 */
+	if (!config_params->fault_pxl.enable)
+		return 0;
+
+	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
+	fpc_virtaddr = (unsigned int *)phys_to_virt(
+				(unsigned long)fpc_physaddr);
+	/*
+	 * Allocate memory for FPC table if current
+	 * FPC table buffer is not big enough to
+	 * accomodate FPC Number requested
+	 */
+	if (raw_params->fault_pxl.fp_num != config_params->fault_pxl.fp_num) {
+		if (fpc_physaddr != NULL) {
+			free_pages((unsigned long)fpc_physaddr,
+				   get_order
+				   (config_params->fault_pxl.fp_num *
+				   FP_NUM_BYTES));
+		}
+
+		/* Allocate memory for FPC table */
+		fpc_virtaddr =
+			(unsigned int *)__get_free_pages(GFP_KERNEL | GFP_DMA,
+							 get_order(raw_params->
+							 fault_pxl.fp_num *
+							 FP_NUM_BYTES));
+
+		if (fpc_virtaddr == NULL) {
+			dev_dbg(dev,
+				"\nUnable to allocate memory for FPC");
+			return -EFAULT;
+		}
+		fpc_physaddr =
+		    (unsigned int *)virt_to_phys((void *)fpc_virtaddr);
+	}
+
+	/* Copy number of fault pixels and FPC table */
+	config_params->fault_pxl.fp_num = raw_params->fault_pxl.fp_num;
+	if (copy_from_user(fpc_virtaddr,
+			(void __user *)raw_params->fault_pxl.fpc_table_addr,
+			config_params->fault_pxl.fp_num * FP_NUM_BYTES)) {
+		dev_dbg(dev, "\n copy_from_user failed");
+		return -EFAULT;
+	}
+	config_params->fault_pxl.fpc_table_addr = (unsigned int)fpc_physaddr;
+	return 0;
+}
+
+static int ccdc_close(struct device *dev)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int *fpc_physaddr = NULL, *fpc_virtaddr = NULL;
+
+	fpc_physaddr = (unsigned int *)config_params->fault_pxl.fpc_table_addr;
+
+	if (fpc_physaddr != NULL) {
+		fpc_virtaddr = (unsigned int *)
+		    phys_to_virt((unsigned long)fpc_physaddr);
+		free_pages((unsigned long)fpc_virtaddr,
+			   get_order(config_params->fault_pxl.fp_num *
+			   FP_NUM_BYTES));
+	}
+	return 0;
+}
+
+/*
+ * ccdc_restore_defaults()
+ * This function will write defaults to all CCDC registers
+ */
+static void ccdc_restore_defaults(void)
+{
+	int i;
+
+	/* disable CCDC */
+	ccdc_enable(0);
+	/* set all registers to default value */
+	for (i = 4; i <= 0x94; i += 4)
+		regw(0,  i);
+	regw(CCDC_NO_CULLING, CCDC_CULLING);
+	regw(CCDC_GAMMA_BITS_11_2, CCDC_ALAW);
+}
+
+static int ccdc_open(struct device *device)
+{
+	dev = device;
+	ccdc_restore_defaults();
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_enable_vport(1);
+	return 0;
+}
+
+static void ccdc_sbl_reset(void)
+{
+	vpss_clear_wbl_overflow(VPSS_PCR_CCDC_WBL_O);
+}
+
+/* Parameter operations */
+static int ccdc_set_params(void __user *params)
+{
+	struct ccdc_config_params_raw ccdc_raw_params;
+	int x;
+
+	if (ccdc_if_type != VPFE_RAW_BAYER)
+		return -EINVAL;
+
+	x = copy_from_user(&ccdc_raw_params, params, sizeof(ccdc_raw_params));
+	if (x) {
+		dev_dbg(dev, "ccdc_set_params: error in copying"
+			   "ccdc params, %d\n", x);
+		return -EFAULT;
+	}
+
+	if (!validate_ccdc_param(&ccdc_raw_params)) {
+		if (!ccdc_update_raw_params(&ccdc_raw_params))
+			return 0;
+	}
+	return -EINVAL;
+}
+
+/*
+ * ccdc_config_ycbcr()
+ * This function will configure CCDC for YCbCr video capture
+ */
+void ccdc_config_ycbcr(void)
+{
+	struct ccdc_params_ycbcr *params = &ccdc_hw_params_ycbcr;
+	u32 syn_mode;
+
+	dev_dbg(dev, "\nStarting ccdc_config_ycbcr...");
+	/*
+	 * first restore the CCDC registers to default values
+	 * This is important since we assume default values to be set in
+	 * a lot of registers that we didn't touch
+	 */
+	ccdc_restore_defaults();
+
+	/*
+	 * configure pixel format, frame format, configure video frame
+	 * format, enable output to SDRAM, enable internal timing generator
+	 * and 8bit pack mode
+	 */
+	syn_mode = (((params->pix_fmt & CCDC_SYN_MODE_INPMOD_MASK) <<
+		    CCDC_SYN_MODE_INPMOD_SHIFT) |
+		    ((params->frm_fmt & CCDC_SYN_FLDMODE_MASK) <<
+		    CCDC_SYN_FLDMODE_SHIFT) | CCDC_VDHDEN_ENABLE |
+		    CCDC_WEN_ENABLE | CCDC_DATA_PACK_ENABLE);
+
+	/* setup BT.656 sync mode */
+	if (params->bt656_enable) {
+		regw(CCDC_REC656IF_BT656_EN, CCDC_REC656IF);
+
+		/*
+		 * configure the FID, VD, HD pin polarity,
+		 * fld,hd pol positive, vd negative, 8-bit data
+		 */
+		syn_mode |= CCDC_SYN_MODE_VD_POL_NEGATIVE | CCDC_SYN_MODE_8BITS;
+	} else {
+		/* y/c external sync mode */
+		syn_mode |= (((params->fid_pol & CCDC_FID_POL_MASK) <<
+			     CCDC_FID_POL_SHIFT) |
+			     ((params->hd_pol & CCDC_HD_POL_MASK) <<
+			     CCDC_HD_POL_SHIFT) |
+			     ((params->vd_pol & CCDC_VD_POL_MASK) <<
+			     CCDC_VD_POL_SHIFT));
+	}
+	regw(syn_mode, CCDC_SYN_MODE);
+
+	/* configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, 2);
+
+	/*
+	 * configure the order of y cb cr in SDRAM, and disable latch
+	 * internal register on vsync
+	 */
+	regw((params->pix_order << CCDC_CCDCFG_Y8POS_SHIFT) |
+		 CCDC_LATCH_ON_VSYNC_DISABLE, CCDC_CCDCFG);
+
+	/*
+	 * configure the horizontal line offset. This should be a
+	 * on 32 byte bondary. So clear LSB 5 bits
+	 */
+	regw(((params->win.width * 2  + 31) & ~0x1f), CCDC_HSIZE_OFF);
+
+	/* configure the memory line offset */
+	if (params->buf_type == CCDC_BUFTYPE_FLD_INTERLEAVED)
+		/* two fields are interleaved in memory */
+		regw(CCDC_SDOFST_FIELD_INTERLEAVED, CCDC_SDOFST);
+
+	ccdc_sbl_reset();
+	dev_dbg(dev, "\nEnd of ccdc_config_ycbcr...\n");
+	ccdc_readregs();
+}
+
+static void ccdc_config_black_clamp(struct ccdc_black_clamp *bclamp)
+{
+	u32 val;
+
+	if (!bclamp->enable) {
+		/* configure DCSub */
+		val = (bclamp->dc_sub) & CCDC_BLK_DC_SUB_MASK;
+		regw(val, CCDC_DCSUB);
+		dev_dbg(dev, "\nWriting 0x%x to DCSUB...\n", val);
+		regw(CCDC_CLAMP_DEFAULT_VAL, CCDC_CLAMP);
+		dev_dbg(dev, "\nWriting 0x0000 to CLAMP...\n");
+		return;
+	}
+	/*
+	 * Configure gain,  Start pixel, No of line to be avg,
+	 * No of pixel/line to be avg, & Enable the Black clamping
+	 */
+	val = ((bclamp->sgain & CCDC_BLK_SGAIN_MASK) |
+	       ((bclamp->start_pixel & CCDC_BLK_ST_PXL_MASK) <<
+		CCDC_BLK_ST_PXL_SHIFT) |
+	       ((bclamp->sample_ln & CCDC_BLK_SAMPLE_LINE_MASK) <<
+		CCDC_BLK_SAMPLE_LINE_SHIFT) |
+	       ((bclamp->sample_pixel & CCDC_BLK_SAMPLE_LN_MASK) <<
+		CCDC_BLK_SAMPLE_LN_SHIFT) | CCDC_BLK_CLAMP_ENABLE);
+	regw(val, CCDC_CLAMP);
+	dev_dbg(dev, "\nWriting 0x%x to CLAMP...\n", val);
+	/* If Black clamping is enable then make dcsub 0 */
+	regw(CCDC_DCSUB_DEFAULT_VAL, CCDC_DCSUB);
+	dev_dbg(dev, "\nWriting 0x00000000 to DCSUB...\n");
+}
+
+static void ccdc_config_black_compense(struct ccdc_black_compensation *bcomp)
+{
+	u32 val;
+
+	val = ((bcomp->b & CCDC_BLK_COMP_MASK) |
+	      ((bcomp->gb & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_GB_COMP_SHIFT) |
+	      ((bcomp->gr & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_GR_COMP_SHIFT) |
+	      ((bcomp->r & CCDC_BLK_COMP_MASK) <<
+	       CCDC_BLK_COMP_R_COMP_SHIFT));
+	regw(val, CCDC_BLKCMP);
+}
+
+static void ccdc_config_fpc(struct ccdc_fault_pixel *fpc)
+{
+	u32 val;
+
+	/* Initially disable FPC */
+	val = CCDC_FPC_DISABLE;
+	regw(val, CCDC_FPC);
+
+	if (!fpc->enable)
+		return;
+
+	/* Configure Fault pixel if needed */
+	regw(fpc->fpc_table_addr, CCDC_FPC_ADDR);
+	dev_dbg(dev, "\nWriting 0x%x to FPC_ADDR...\n",
+		       (fpc->fpc_table_addr));
+	/* Write the FPC params with FPC disable */
+	val = fpc->fp_num & CCDC_FPC_FPC_NUM_MASK;
+	regw(val, CCDC_FPC);
+
+	dev_dbg(dev, "\nWriting 0x%x to FPC...\n", val);
+	/* read the FPC register */
+	val = regr(CCDC_FPC) | CCDC_FPC_ENABLE;
+	regw(val, CCDC_FPC);
+	dev_dbg(dev, "\nWriting 0x%x to FPC...\n", val);
+}
+
+/*
+ * ccdc_config_raw()
+ * This function will configure CCDC for Raw capture mode
+ */
+void ccdc_config_raw(void)
+{
+	struct ccdc_params_raw *params = &ccdc_hw_params_raw;
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int syn_mode = 0;
+	unsigned int val;
+
+	dev_dbg(dev, "\nStarting ccdc_config_raw...");
+
+	/*      Reset CCDC */
+	ccdc_restore_defaults();
+
+	/* Disable latching function registers on VSYNC  */
+	regw(CCDC_LATCH_ON_VSYNC_DISABLE, CCDC_CCDCFG);
+
+	/*
+	 * Configure the vertical sync polarity(SYN_MODE.VDPOL),
+	 * horizontal sync polarity (SYN_MODE.HDPOL), frame id polarity
+	 * (SYN_MODE.FLDPOL), frame format(progressive or interlace),
+	 * data size(SYNMODE.DATSIZ), &pixel format (Input mode), output
+	 * SDRAM, enable internal timing generator
+	 */
+	syn_mode =
+		(((params->vd_pol & CCDC_VD_POL_MASK) << CCDC_VD_POL_SHIFT) |
+		((params->hd_pol & CCDC_HD_POL_MASK) << CCDC_HD_POL_SHIFT) |
+		((params->fid_pol & CCDC_FID_POL_MASK) << CCDC_FID_POL_SHIFT) |
+		((params->frm_fmt & CCDC_FRM_FMT_MASK) << CCDC_FRM_FMT_SHIFT) |
+		((config_params->data_sz & CCDC_DATA_SZ_MASK) <<
+		CCDC_DATA_SZ_SHIFT) |
+		((params->pix_fmt & CCDC_PIX_FMT_MASK) << CCDC_PIX_FMT_SHIFT) |
+		CCDC_WEN_ENABLE | CCDC_VDHDEN_ENABLE);
+
+	/* Enable and configure aLaw register if needed */
+	if (config_params->alaw.enable) {
+		val = ((config_params->alaw.gama_wd &
+		      CCDC_ALAW_GAMA_WD_MASK) | CCDC_ALAW_ENABLE);
+		regw(val, CCDC_ALAW);
+		dev_dbg(dev, "\nWriting 0x%x to ALAW...\n", val);
+	}
+
+	/* Configure video window */
+	ccdc_setwin(&params->win, params->frm_fmt, CCDC_PPC_RAW);
+
+	/* Configure Black Clamp */
+	ccdc_config_black_clamp(&config_params->blk_clamp);
+
+	/* Configure Black level compensation */
+	ccdc_config_black_compense(&config_params->blk_comp);
+
+	/* Configure Fault Pixel Correction */
+	ccdc_config_fpc(&config_params->fault_pxl);
+
+	/* If data size is 8 bit then pack the data */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	     config_params->alaw.enable)
+		syn_mode |= CCDC_DATA_PACK_ENABLE;
+
+#ifdef CONFIG_DM644X_VIDEO_PORT_ENABLE
+	/* enable video port */
+	val = CCDC_ENABLE_VIDEO_PORT;
+#else
+	/* disable video port */
+	val = CCDC_DISABLE_VIDEO_PORT;
+#endif
+
+	if (config_params->data_sz == CCDC_DATA_8BITS)
+		val |= (CCDC_DATA_10BITS & CCDC_FMTCFG_VPIN_MASK)
+		    << CCDC_FMTCFG_VPIN_SHIFT;
+	else
+		val |= (config_params->data_sz & CCDC_FMTCFG_VPIN_MASK)
+		    << CCDC_FMTCFG_VPIN_SHIFT;
+	/* Write value in FMTCFG */
+	regw(val, CCDC_FMTCFG);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMTCFG...\n", val);
+	/* Configure the color pattern according to mt9t001 sensor */
+	regw(CCDC_COLPTN_VAL, CCDC_COLPTN);
+
+	dev_dbg(dev, "\nWriting 0xBB11BB11 to COLPTN...\n");
+	/*
+	 * Configure Data formatter(Video port) pixel selection
+	 * (FMT_HORZ, FMT_VERT)
+	 */
+	val = ((params->win.left & CCDC_FMT_HORZ_FMTSPH_MASK) <<
+	      CCDC_FMT_HORZ_FMTSPH_SHIFT) |
+	      (params->win.width & CCDC_FMT_HORZ_FMTLNH_MASK);
+	regw(val, CCDC_FMT_HORZ);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMT_HORZ...\n", val);
+	val = (params->win.top & CCDC_FMT_VERT_FMTSLV_MASK)
+	    << CCDC_FMT_VERT_FMTSLV_SHIFT;
+	if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		val |= (params->win.height) & CCDC_FMT_VERT_FMTLNV_MASK;
+	else
+		val |= (params->win.height >> 1) & CCDC_FMT_VERT_FMTLNV_MASK;
+
+	dev_dbg(dev, "\nparams->win.height  0x%x ...\n",
+	       params->win.height);
+	regw(val, CCDC_FMT_VERT);
+
+	dev_dbg(dev, "\nWriting 0x%x to FMT_VERT...\n", val);
+
+	dev_dbg(dev, "\nbelow regw(val, FMT_VERT)...");
+
+	/*
+	 * Configure Horizontal offset register. If pack 8 is enabled then
+	 * 1 pixel will take 1 byte
+	 */
+	if ((config_params->data_sz == CCDC_DATA_8BITS) ||
+	    config_params->alaw.enable)
+		regw((params->win.width + CCDC_32BYTE_ALIGN_VAL) &
+		    CCDC_HSIZE_OFF_MASK, CCDC_HSIZE_OFF);
+	else
+		/* else one pixel will take 2 byte */
+		regw(((params->win.width * CCDC_TWO_BYTES_PER_PIXEL) +
+		    CCDC_32BYTE_ALIGN_VAL) & CCDC_HSIZE_OFF_MASK,
+		    CCDC_HSIZE_OFF);
+
+	/* Set value for SDOFST */
+	if (params->frm_fmt == CCDC_FRMFMT_INTERLACED) {
+		if (params->image_invert_enable) {
+			/* For intelace inverse mode */
+			regw(CCDC_INTERLACED_IMAGE_INVERT, CCDC_SDOFST);
+			dev_dbg(dev, "\nWriting 0x4B6D to SDOFST...\n");
+		}
+
+		else {
+			/* For intelace non inverse mode */
+			regw(CCDC_INTERLACED_NO_IMAGE_INVERT, CCDC_SDOFST);
+			dev_dbg(dev, "\nWriting 0x0249 to SDOFST...\n");
+		}
+	} else if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE) {
+		regw(CCDC_PROGRESSIVE_NO_IMAGE_INVERT, CCDC_SDOFST);
+		dev_dbg(dev, "\nWriting 0x0000 to SDOFST...\n");
+	}
+
+	/*
+	 * Configure video port pixel selection (VPOUT)
+	 * Here -1 is to make the height value less than FMT_VERT.FMTLNV
+	 */
+	if (params->frm_fmt == CCDC_FRMFMT_PROGRESSIVE)
+		val = (((params->win.height - 1) & CCDC_VP_OUT_VERT_NUM_MASK))
+		    << CCDC_VP_OUT_VERT_NUM_SHIFT;
+	else
+		val =
+		    ((((params->win.height >> CCDC_INTERLACED_HEIGHT_SHIFT) -
+		     1) & CCDC_VP_OUT_VERT_NUM_MASK)) <<
+		    CCDC_VP_OUT_VERT_NUM_SHIFT;
+
+	val |= ((((params->win.width))) & CCDC_VP_OUT_HORZ_NUM_MASK)
+	    << CCDC_VP_OUT_HORZ_NUM_SHIFT;
+	val |= (params->win.left) & CCDC_VP_OUT_HORZ_ST_MASK;
+	regw(val, CCDC_VP_OUT);
+
+	dev_dbg(dev, "\nWriting 0x%x to VP_OUT...\n", val);
+	regw(syn_mode, CCDC_SYN_MODE);
+	dev_dbg(dev, "\nWriting 0x%x to SYN_MODE...\n", syn_mode);
+
+	ccdc_sbl_reset();
+	dev_dbg(dev, "\nend of ccdc_config_raw...");
+	ccdc_readregs();
+}
+
+static int ccdc_configure(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_config_raw();
+	else
+		ccdc_config_ycbcr();
+	return 0;
+}
+
+static int ccdc_set_buftype(enum ccdc_buftype buf_type)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.buf_type = buf_type;
+	else
+		ccdc_hw_params_ycbcr.buf_type = buf_type;
+	return 0;
+}
+
+static enum ccdc_buftype ccdc_get_buftype(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.buf_type;
+	return ccdc_hw_params_ycbcr.buf_type;
+}
+
+static int ccdc_enum_pix(u32 *pix, int i)
+{
+	int ret = -EINVAL;
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if (i < ARRAY_SIZE(ccdc_raw_bayer_pix_formats)) {
+			*pix = ccdc_raw_bayer_pix_formats[i];
+			ret = 0;
+		}
+	} else {
+		if (i < ARRAY_SIZE(ccdc_raw_yuv_pix_formats)) {
+			*pix = ccdc_raw_yuv_pix_formats[i];
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+static int ccdc_set_pixel_format(u32 pixfmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		ccdc_hw_params_raw.pix_fmt = CCDC_PIXFMT_RAW;
+		if (pixfmt == V4L2_PIX_FMT_SBGGR8)
+			ccdc_hw_params_raw.config_params.alaw.enable = 1;
+		else if (pixfmt != V4L2_PIX_FMT_SBGGR16)
+			return -EINVAL;
+	} else {
+		if (pixfmt == V4L2_PIX_FMT_YUYV)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_YCBYCR;
+		else if (pixfmt == V4L2_PIX_FMT_UYVY)
+			ccdc_hw_params_ycbcr.pix_order = CCDC_PIXORDER_CBYCRY;
+		else
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static u32 ccdc_get_pixel_format(void)
+{
+	struct ccdc_a_law *alaw =
+		&ccdc_hw_params_raw.config_params.alaw;
+	u32 pixfmt;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		if (alaw->enable)
+			pixfmt = V4L2_PIX_FMT_SBGGR8;
+		else
+			pixfmt = V4L2_PIX_FMT_SBGGR16;
+	else {
+		if (ccdc_hw_params_ycbcr.pix_order == CCDC_PIXORDER_YCBYCR)
+			pixfmt = V4L2_PIX_FMT_YUYV;
+		else
+			pixfmt = V4L2_PIX_FMT_UYVY;
+	}
+	return pixfmt;
+}
+
+static int ccdc_set_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.win = *win;
+	else
+		ccdc_hw_params_ycbcr.win = *win;
+	return 0;
+}
+
+static void ccdc_get_image_window(struct v4l2_rect *win)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		*win = ccdc_hw_params_raw.win;
+	else
+		*win = ccdc_hw_params_ycbcr.win;
+}
+
+static unsigned int ccdc_get_line_length(void)
+{
+	struct ccdc_config_params_raw *config_params =
+		&ccdc_hw_params_raw.config_params;
+	unsigned int len;
+
+	if (ccdc_if_type == VPFE_RAW_BAYER) {
+		if ((config_params->alaw.enable) ||
+		    (config_params->data_sz == CCDC_DATA_8BITS))
+			len = ccdc_hw_params_raw.win.width;
+		else
+			len = ccdc_hw_params_raw.win.width * 2;
+	} else
+		len = ccdc_hw_params_ycbcr.win.width * 2;
+	return ALIGN(len, 32);
+}
+
+static int ccdc_set_frame_format(enum ccdc_frmfmt frm_fmt)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		ccdc_hw_params_raw.frm_fmt = frm_fmt;
+	else
+		ccdc_hw_params_ycbcr.frm_fmt = frm_fmt;
+	return 0;
+}
+
+static enum ccdc_frmfmt ccdc_get_frame_format(void)
+{
+	if (ccdc_if_type == VPFE_RAW_BAYER)
+		return ccdc_hw_params_raw.frm_fmt;
+	else
+		return ccdc_hw_params_ycbcr.frm_fmt;
+}
+
+static int ccdc_getfid(void)
+{
+	return (regr(CCDC_SYN_MODE) >> 15) & 1;
+}
+
+/* misc operations */
+static inline void ccdc_setfbaddr(unsigned long addr)
+{
+	regw(addr & 0xffffffe0, CCDC_SDR_ADDR);
+}
+
+static int ccdc_set_hw_if_params(struct vpfe_hw_if_param *params)
+{
+	ccdc_if_type = params->if_type;
+
+	switch (params->if_type) {
+	case VPFE_BT656:
+	case VPFE_YCBCR_SYNC_16:
+	case VPFE_YCBCR_SYNC_8:
+		ccdc_hw_params_ycbcr.vd_pol = params->vdpol;
+		ccdc_hw_params_ycbcr.hd_pol = params->hdpol;
+		break;
+	default:
+		/* TODO add support for raw bayer here */
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct ccdc_hw_device ccdc_hw_dev = {
+	.name = "DM6446 CCDC",
+	.owner = THIS_MODULE,
+	.hw_ops = {
+		.open = ccdc_open,
+		.close = ccdc_close,
+		.set_ccdc_base = ccdc_set_ccdc_base,
+		.reset = ccdc_sbl_reset,
+		.enable = ccdc_enable,
+		.set_hw_if_params = ccdc_set_hw_if_params,
+		.set_params = ccdc_set_params,
+		.configure = ccdc_configure,
+		.set_buftype = ccdc_set_buftype,
+		.get_buftype = ccdc_get_buftype,
+		.enum_pix = ccdc_enum_pix,
+		.set_pixel_format = ccdc_set_pixel_format,
+		.get_pixel_format = ccdc_get_pixel_format,
+		.set_frame_format = ccdc_set_frame_format,
+		.get_frame_format = ccdc_get_frame_format,
+		.set_image_window = ccdc_set_image_window,
+		.get_image_window = ccdc_get_image_window,
+		.get_line_length = ccdc_get_line_length,
+		.setfbaddr = ccdc_setfbaddr,
+		.getfid = ccdc_getfid,
+	},
+};
+
+static int dm644x_ccdc_init(void)
+{
+	printk(KERN_NOTICE "dm644x_ccdc_init\n");
+	if (vpfe_register_ccdc_device(&ccdc_hw_dev) < 0)
+		return -1;
+	printk(KERN_NOTICE "%s is registered with vpfe.\n",
+		ccdc_hw_dev.name);
+	return 0;
+}
+
+static void dm644x_ccdc_exit(void)
+{
+	vpfe_unregister_ccdc_device(&ccdc_hw_dev);
+}
+
+module_init(dm644x_ccdc_init);
+module_exit(dm644x_ccdc_exit);
diff --git a/drivers/media/video/davinci/dm644x_ccdc_regs.h b/drivers/media/video/davinci/dm644x_ccdc_regs.h
new file mode 100644
index 000000000000..6e5d05324466
--- /dev/null
+++ b/drivers/media/video/davinci/dm644x_ccdc_regs.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM644X_CCDC_REGS_H
+#define _DM644X_CCDC_REGS_H
+
+/**************************************************************************\
+* Register OFFSET Definitions
+\**************************************************************************/
+#define CCDC_PID				0x0
+#define CCDC_PCR				0x4
+#define CCDC_SYN_MODE				0x8
+#define CCDC_HD_VD_WID				0xc
+#define CCDC_PIX_LINES				0x10
+#define CCDC_HORZ_INFO				0x14
+#define CCDC_VERT_START				0x18
+#define CCDC_VERT_LINES				0x1c
+#define CCDC_CULLING				0x20
+#define CCDC_HSIZE_OFF				0x24
+#define CCDC_SDOFST				0x28
+#define CCDC_SDR_ADDR				0x2c
+#define CCDC_CLAMP				0x30
+#define CCDC_DCSUB				0x34
+#define CCDC_COLPTN				0x38
+#define CCDC_BLKCMP				0x3c
+#define CCDC_FPC				0x40
+#define CCDC_FPC_ADDR				0x44
+#define CCDC_VDINT				0x48
+#define CCDC_ALAW				0x4c
+#define CCDC_REC656IF				0x50
+#define CCDC_CCDCFG				0x54
+#define CCDC_FMTCFG				0x58
+#define CCDC_FMT_HORZ				0x5c
+#define CCDC_FMT_VERT				0x60
+#define CCDC_FMT_ADDR0				0x64
+#define CCDC_FMT_ADDR1				0x68
+#define CCDC_FMT_ADDR2				0x6c
+#define CCDC_FMT_ADDR3				0x70
+#define CCDC_FMT_ADDR4				0x74
+#define CCDC_FMT_ADDR5				0x78
+#define CCDC_FMT_ADDR6				0x7c
+#define CCDC_FMT_ADDR7				0x80
+#define CCDC_PRGEVEN_0				0x84
+#define CCDC_PRGEVEN_1				0x88
+#define CCDC_PRGODD_0				0x8c
+#define CCDC_PRGODD_1				0x90
+#define CCDC_VP_OUT				0x94
+
+
+/***************************************************************
+*	Define for various register bit mask and shifts for CCDC
+****************************************************************/
+#define CCDC_FID_POL_MASK			1
+#define CCDC_FID_POL_SHIFT			4
+#define CCDC_HD_POL_MASK			1
+#define CCDC_HD_POL_SHIFT			3
+#define CCDC_VD_POL_MASK			1
+#define CCDC_VD_POL_SHIFT			2
+#define CCDC_HSIZE_OFF_MASK			0xffffffe0
+#define CCDC_32BYTE_ALIGN_VAL			31
+#define CCDC_FRM_FMT_MASK			0x1
+#define CCDC_FRM_FMT_SHIFT			7
+#define CCDC_DATA_SZ_MASK			7
+#define CCDC_DATA_SZ_SHIFT			8
+#define CCDC_PIX_FMT_MASK			3
+#define CCDC_PIX_FMT_SHIFT			12
+#define CCDC_VP2SDR_DISABLE			0xFFFBFFFF
+#define CCDC_WEN_ENABLE				(1 << 17)
+#define CCDC_SDR2RSZ_DISABLE			0xFFF7FFFF
+#define CCDC_VDHDEN_ENABLE			(1 << 16)
+#define CCDC_LPF_ENABLE				(1 << 14)
+#define CCDC_ALAW_ENABLE			(1 << 3)
+#define CCDC_ALAW_GAMA_WD_MASK			7
+#define CCDC_BLK_CLAMP_ENABLE			(1 << 31)
+#define CCDC_BLK_SGAIN_MASK			0x1F
+#define CCDC_BLK_ST_PXL_MASK			0x7FFF
+#define CCDC_BLK_ST_PXL_SHIFT			10
+#define CCDC_BLK_SAMPLE_LN_MASK			7
+#define CCDC_BLK_SAMPLE_LN_SHIFT		28
+#define CCDC_BLK_SAMPLE_LINE_MASK		7
+#define CCDC_BLK_SAMPLE_LINE_SHIFT		25
+#define CCDC_BLK_DC_SUB_MASK			0x03FFF
+#define CCDC_BLK_COMP_MASK			0xFF
+#define CCDC_BLK_COMP_GB_COMP_SHIFT		8
+#define CCDC_BLK_COMP_GR_COMP_SHIFT		16
+#define CCDC_BLK_COMP_R_COMP_SHIFT		24
+#define CCDC_LATCH_ON_VSYNC_DISABLE		(1 << 15)
+#define CCDC_FPC_ENABLE				(1 << 15)
+#define CCDC_FPC_DISABLE			0
+#define CCDC_FPC_FPC_NUM_MASK 			0x7FFF
+#define CCDC_DATA_PACK_ENABLE			(1 << 11)
+#define CCDC_FMTCFG_VPIN_MASK			7
+#define CCDC_FMTCFG_VPIN_SHIFT			12
+#define CCDC_FMT_HORZ_FMTLNH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_MASK		0x1FFF
+#define CCDC_FMT_HORZ_FMTSPH_SHIFT		16
+#define CCDC_FMT_VERT_FMTLNV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_MASK		0x1FFF
+#define CCDC_FMT_VERT_FMTSLV_SHIFT		16
+#define CCDC_VP_OUT_VERT_NUM_MASK		0x3FFF
+#define CCDC_VP_OUT_VERT_NUM_SHIFT		17
+#define CCDC_VP_OUT_HORZ_NUM_MASK		0x1FFF
+#define CCDC_VP_OUT_HORZ_NUM_SHIFT		4
+#define CCDC_VP_OUT_HORZ_ST_MASK		0xF
+#define CCDC_HORZ_INFO_SPH_SHIFT		16
+#define CCDC_VERT_START_SLV0_SHIFT		16
+#define CCDC_VDINT_VDINT0_SHIFT			16
+#define CCDC_VDINT_VDINT1_MASK			0xFFFF
+#define CCDC_PPC_RAW				1
+#define CCDC_DCSUB_DEFAULT_VAL			0
+#define CCDC_CLAMP_DEFAULT_VAL			0
+#define CCDC_ENABLE_VIDEO_PORT			0x8000
+#define CCDC_DISABLE_VIDEO_PORT			0
+#define CCDC_COLPTN_VAL				0xBB11BB11
+#define CCDC_TWO_BYTES_PER_PIXEL		2
+#define CCDC_INTERLACED_IMAGE_INVERT		0x4B6D
+#define CCDC_INTERLACED_NO_IMAGE_INVERT		0x0249
+#define CCDC_PROGRESSIVE_IMAGE_INVERT		0x4000
+#define CCDC_PROGRESSIVE_NO_IMAGE_INVERT	0
+#define CCDC_INTERLACED_HEIGHT_SHIFT		1
+#define CCDC_SYN_MODE_INPMOD_SHIFT		12
+#define CCDC_SYN_MODE_INPMOD_MASK		3
+#define CCDC_SYN_MODE_8BITS			(7 << 8)
+#define CCDC_SYN_FLDMODE_MASK			1
+#define CCDC_SYN_FLDMODE_SHIFT			7
+#define CCDC_REC656IF_BT656_EN			3
+#define CCDC_SYN_MODE_VD_POL_NEGATIVE		(1 << 2)
+#define CCDC_CCDCFG_Y8POS_SHIFT			11
+#define CCDC_SDOFST_FIELD_INTERLEAVED		0x249
+#define CCDC_NO_CULLING				0xffff00ff
+#endif
diff --git a/include/media/davinci/dm644x_ccdc.h b/include/media/davinci/dm644x_ccdc.h
new file mode 100644
index 000000000000..3e178eb52fb3
--- /dev/null
+++ b/include/media/davinci/dm644x_ccdc.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2006-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#ifndef _DM644X_CCDC_H
+#define _DM644X_CCDC_H
+#include <media/davinci/ccdc_types.h>
+#include <media/davinci/vpfe_types.h>
+
+/* enum for No of pixel per line to be avg. in Black Clamping*/
+enum ccdc_sample_length {
+	CCDC_SAMPLE_1PIXELS,
+	CCDC_SAMPLE_2PIXELS,
+	CCDC_SAMPLE_4PIXELS,
+	CCDC_SAMPLE_8PIXELS,
+	CCDC_SAMPLE_16PIXELS
+};
+
+/* enum for No of lines in Black Clamping */
+enum ccdc_sample_line {
+	CCDC_SAMPLE_1LINES,
+	CCDC_SAMPLE_2LINES,
+	CCDC_SAMPLE_4LINES,
+	CCDC_SAMPLE_8LINES,
+	CCDC_SAMPLE_16LINES
+};
+
+/* enum for Alaw gama width */
+enum ccdc_gama_width {
+	CCDC_GAMMA_BITS_15_6,
+	CCDC_GAMMA_BITS_14_5,
+	CCDC_GAMMA_BITS_13_4,
+	CCDC_GAMMA_BITS_12_3,
+	CCDC_GAMMA_BITS_11_2,
+	CCDC_GAMMA_BITS_10_1,
+	CCDC_GAMMA_BITS_09_0
+};
+
+enum ccdc_data_size {
+	CCDC_DATA_16BITS,
+	CCDC_DATA_15BITS,
+	CCDC_DATA_14BITS,
+	CCDC_DATA_13BITS,
+	CCDC_DATA_12BITS,
+	CCDC_DATA_11BITS,
+	CCDC_DATA_10BITS,
+	CCDC_DATA_8BITS
+};
+
+/* structure for ALaw */
+struct ccdc_a_law {
+	/* Enable/disable A-Law */
+	unsigned char enable;
+	/* Gama Width Input */
+	enum ccdc_gama_width gama_wd;
+};
+
+/* structure for Black Clamping */
+struct ccdc_black_clamp {
+	unsigned char enable;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_length sample_pixel;
+	/* only if bClampEnable is TRUE */
+	enum ccdc_sample_line sample_ln;
+	/* only if bClampEnable is TRUE */
+	unsigned short start_pixel;
+	/* only if bClampEnable is TRUE */
+	unsigned short sgain;
+	/* only if bClampEnable is FALSE */
+	unsigned short dc_sub;
+};
+
+/* structure for Black Level Compensation */
+struct ccdc_black_compensation {
+	/* Constant value to subtract from Red component */
+	char r;
+	/* Constant value to subtract from Gr component */
+	char gr;
+	/* Constant value to subtract from Blue component */
+	char b;
+	/* Constant value to subtract from Gb component */
+	char gb;
+};
+
+/* structure for fault pixel correction */
+struct ccdc_fault_pixel {
+	/* Enable or Disable fault pixel correction */
+	unsigned char enable;
+	/* Number of fault pixel */
+	unsigned short fp_num;
+	/* Address of fault pixel table */
+	unsigned int fpc_table_addr;
+};
+
+/* Structure for CCDC configuration parameters for raw capture mode passed
+ * by application
+ */
+struct ccdc_config_params_raw {
+	/* data size value from 8 to 16 bits */
+	enum ccdc_data_size data_sz;
+	/* Structure for Optional A-Law */
+	struct ccdc_a_law alaw;
+	/* Structure for Optical Black Clamp */
+	struct ccdc_black_clamp blk_clamp;
+	/* Structure for Black Compensation */
+	struct ccdc_black_compensation blk_comp;
+	/* Structure for Fault Pixel Module Configuration */
+	struct ccdc_fault_pixel fault_pxl;
+};
+
+
+#ifdef __KERNEL__
+#include <linux/io.h>
+/* Define to enable/disable video port */
+#define FP_NUM_BYTES		4
+/* Define for extra pixel/line and extra lines/frame */
+#define NUM_EXTRAPIXELS		8
+#define NUM_EXTRALINES		8
+
+/* settings for commonly used video formats */
+#define CCDC_WIN_PAL     {0, 0, 720, 576}
+/* ntsc square pixel */
+#define CCDC_WIN_VGA	{0, 0, (640 + NUM_EXTRAPIXELS), (480 + NUM_EXTRALINES)}
+
+/* Structure for CCDC configuration parameters for raw capture mode */
+struct ccdc_params_raw {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* interleaved or separated fields */
+	enum ccdc_buftype buf_type;
+	/*
+	 * enable to store the image in inverse
+	 * order in memory(bottom to top)
+	 */
+	unsigned char image_invert_enable;
+	/* configurable paramaters */
+	struct ccdc_config_params_raw config_params;
+};
+
+struct ccdc_params_ycbcr {
+	/* pixel format */
+	enum ccdc_pixfmt pix_fmt;
+	/* progressive or interlaced frame */
+	enum ccdc_frmfmt frm_fmt;
+	/* video window */
+	struct v4l2_rect win;
+	/* field id polarity */
+	enum vpfe_pin_pol fid_pol;
+	/* vertical sync polarity */
+	enum vpfe_pin_pol vd_pol;
+	/* horizontal sync polarity */
+	enum vpfe_pin_pol hd_pol;
+	/* enable BT.656 embedded sync mode */
+	int bt656_enable;
+	/* cb:y:cr:y or y:cb:y:cr in memory */
+	enum ccdc_pixorder pix_order;
+	/* interleaved or separated fields  */
+	enum ccdc_buftype buf_type;
+};
+#endif
+#endif				/* _DM644X_CCDC_H */
-- 
cgit v1.2.3


From 92ee438b8e27f1b96ce5a7e8d73cb11b71a02584 Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:19:17 -0300
Subject: V4L/DVB (12252): v4l: ccdc types used across ccdc modules for vpfe
 capture driver

common types used across CCDC modules

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>

Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/davinci/ccdc_types.h | 43 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 include/media/davinci/ccdc_types.h

(limited to 'include')

diff --git a/include/media/davinci/ccdc_types.h b/include/media/davinci/ccdc_types.h
new file mode 100644
index 000000000000..5773874bf266
--- /dev/null
+++ b/include/media/davinci/ccdc_types.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ **************************************************************************/
+#ifndef _CCDC_TYPES_H
+#define _CCDC_TYPES_H
+enum ccdc_pixfmt {
+	CCDC_PIXFMT_RAW,
+	CCDC_PIXFMT_YCBCR_16BIT,
+	CCDC_PIXFMT_YCBCR_8BIT
+};
+
+enum ccdc_frmfmt {
+	CCDC_FRMFMT_PROGRESSIVE,
+	CCDC_FRMFMT_INTERLACED
+};
+
+/* PIXEL ORDER IN MEMORY from LSB to MSB */
+/* only applicable for 8-bit input mode  */
+enum ccdc_pixorder {
+	CCDC_PIXORDER_YCBYCR,
+	CCDC_PIXORDER_CBYCRY,
+};
+
+enum ccdc_buftype {
+	CCDC_BUFTYPE_FLD_INTERLEAVED,
+	CCDC_BUFTYPE_FLD_SEPARATED
+};
+#endif
-- 
cgit v1.2.3


From 7b140b89307a59527df644100ce5ab3bc1be7d1b Mon Sep 17 00:00:00 2001
From: Muralidharan Karicheri <m-karicheri2@ti.com>
Date: Fri, 19 Jun 2009 09:20:16 -0300
Subject: V4L/DVB (12253): v4l: common vpss module for video drivers

This is a new module added for vpss library functions that are
used for configuring vpss system module. All video drivers will
include vpss.h header file and call functions defined in this
module to configure vpss system module.

Reviewed by: Hans Verkuil <hverkuil@xs4all.nl>
Reviewed by: Laurent Pinchart <laurent.pinchart@skynet.be>
Reviewed by: Alexey Klimov <klimov.linux@gmail.com>
Signed-off-by: Muralidharan Karicheri <m-karicheri2@ti.com>
Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/davinci/vpss.c | 301 +++++++++++++++++++++++++++++++++++++
 include/media/davinci/vpss.h       |  69 +++++++++
 2 files changed, 370 insertions(+)
 create mode 100644 drivers/media/video/davinci/vpss.c
 create mode 100644 include/media/davinci/vpss.h

(limited to 'include')

diff --git a/drivers/media/video/davinci/vpss.c b/drivers/media/video/davinci/vpss.c
new file mode 100644
index 000000000000..6d709ca8cfb0
--- /dev/null
+++ b/drivers/media/video/davinci/vpss.c
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2009 Texas Instruments.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * common vpss driver for all video drivers.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/compiler.h>
+#include <linux/io.h>
+#include <mach/hardware.h>
+#include <media/davinci/vpss.h>
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("VPSS Driver");
+MODULE_AUTHOR("Texas Instruments");
+
+/* DM644x defines */
+#define DM644X_SBL_PCR_VPSS		(4)
+
+/* vpss BL register offsets */
+#define DM355_VPSSBL_CCDCMUX		0x1c
+/* vpss CLK register offsets */
+#define DM355_VPSSCLK_CLKCTRL		0x04
+/* masks and shifts */
+#define VPSS_HSSISEL_SHIFT		4
+
+/*
+ * vpss operations. Depends on platform. Not all functions are available
+ * on all platforms. The api, first check if a functio is available before
+ * invoking it. In the probe, the function ptrs are intialized based on
+ * vpss name. vpss name can be "dm355_vpss", "dm644x_vpss" etc.
+ */
+struct vpss_hw_ops {
+	/* enable clock */
+	int (*enable_clock)(enum vpss_clock_sel clock_sel, int en);
+	/* select input to ccdc */
+	void (*select_ccdc_source)(enum vpss_ccdc_source_sel src_sel);
+	/* clear wbl overlflow bit */
+	int (*clear_wbl_overflow)(enum vpss_wbl_sel wbl_sel);
+};
+
+/* vpss configuration */
+struct vpss_oper_config {
+	__iomem void *vpss_bl_regs_base;
+	__iomem void *vpss_regs_base;
+	struct resource		*r1;
+	resource_size_t		len1;
+	struct resource		*r2;
+	resource_size_t		len2;
+	char vpss_name[32];
+	spinlock_t vpss_lock;
+	struct vpss_hw_ops hw_ops;
+};
+
+static struct vpss_oper_config oper_cfg;
+
+/* register access routines */
+static inline u32 bl_regr(u32 offset)
+{
+	return __raw_readl(oper_cfg.vpss_bl_regs_base + offset);
+}
+
+static inline void bl_regw(u32 val, u32 offset)
+{
+	__raw_writel(val, oper_cfg.vpss_bl_regs_base + offset);
+}
+
+static inline u32 vpss_regr(u32 offset)
+{
+	return __raw_readl(oper_cfg.vpss_regs_base + offset);
+}
+
+static inline void vpss_regw(u32 val, u32 offset)
+{
+	__raw_writel(val, oper_cfg.vpss_regs_base + offset);
+}
+
+static void dm355_select_ccdc_source(enum vpss_ccdc_source_sel src_sel)
+{
+	bl_regw(src_sel << VPSS_HSSISEL_SHIFT, DM355_VPSSBL_CCDCMUX);
+}
+
+int vpss_select_ccdc_source(enum vpss_ccdc_source_sel src_sel)
+{
+	if (!oper_cfg.hw_ops.select_ccdc_source)
+		return -1;
+
+	dm355_select_ccdc_source(src_sel);
+	return 0;
+}
+EXPORT_SYMBOL(vpss_select_ccdc_source);
+
+static int dm644x_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel)
+{
+	u32 mask = 1, val;
+
+	if (wbl_sel < VPSS_PCR_AEW_WBL_0 ||
+	    wbl_sel > VPSS_PCR_CCDC_WBL_O)
+		return -1;
+
+	/* writing a 0 clear the overflow */
+	mask = ~(mask << wbl_sel);
+	val = bl_regr(DM644X_SBL_PCR_VPSS) & mask;
+	bl_regw(val, DM644X_SBL_PCR_VPSS);
+	return 0;
+}
+
+int vpss_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel)
+{
+	if (!oper_cfg.hw_ops.clear_wbl_overflow)
+		return -1;
+
+	return oper_cfg.hw_ops.clear_wbl_overflow(wbl_sel);
+}
+EXPORT_SYMBOL(vpss_clear_wbl_overflow);
+
+/*
+ *  dm355_enable_clock - Enable VPSS Clock
+ *  @clock_sel: CLock to be enabled/disabled
+ *  @en: enable/disable flag
+ *
+ *  This is called to enable or disable a vpss clock
+ */
+static int dm355_enable_clock(enum vpss_clock_sel clock_sel, int en)
+{
+	unsigned long flags;
+	u32 utemp, mask = 0x1, shift = 0;
+
+	switch (clock_sel) {
+	case VPSS_VPBE_CLOCK:
+		/* nothing since lsb */
+		break;
+	case VPSS_VENC_CLOCK_SEL:
+		shift = 2;
+		break;
+	case VPSS_CFALD_CLOCK:
+		shift = 3;
+		break;
+	case VPSS_H3A_CLOCK:
+		shift = 4;
+		break;
+	case VPSS_IPIPE_CLOCK:
+		shift = 5;
+		break;
+	case VPSS_CCDC_CLOCK:
+		shift = 6;
+		break;
+	default:
+		printk(KERN_ERR "dm355_enable_clock:"
+				" Invalid selector: %d\n", clock_sel);
+		return -1;
+	}
+
+	spin_lock_irqsave(&oper_cfg.vpss_lock, flags);
+	utemp = vpss_regr(DM355_VPSSCLK_CLKCTRL);
+	if (!en)
+		utemp &= ~(mask << shift);
+	else
+		utemp |= (mask << shift);
+
+	vpss_regw(utemp, DM355_VPSSCLK_CLKCTRL);
+	spin_unlock_irqrestore(&oper_cfg.vpss_lock, flags);
+	return 0;
+}
+
+int vpss_enable_clock(enum vpss_clock_sel clock_sel, int en)
+{
+	if (!oper_cfg.hw_ops.enable_clock)
+		return -1;
+
+	return oper_cfg.hw_ops.enable_clock(clock_sel, en);
+}
+EXPORT_SYMBOL(vpss_enable_clock);
+
+static int __init vpss_probe(struct platform_device *pdev)
+{
+	int status, dm355 = 0;
+
+	if (!pdev->dev.platform_data) {
+		dev_err(&pdev->dev, "no platform data\n");
+		return -ENOENT;
+	}
+	strcpy(oper_cfg.vpss_name, pdev->dev.platform_data);
+
+	if (!strcmp(oper_cfg.vpss_name, "dm355_vpss"))
+		dm355 = 1;
+	else if (strcmp(oper_cfg.vpss_name, "dm644x_vpss")) {
+		dev_err(&pdev->dev, "vpss driver not supported on"
+			" this platform\n");
+		return -ENODEV;
+	}
+
+	dev_info(&pdev->dev, "%s vpss probed\n", oper_cfg.vpss_name);
+	oper_cfg.r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!oper_cfg.r1)
+		return -ENOENT;
+
+	oper_cfg.len1 = oper_cfg.r1->end - oper_cfg.r1->start + 1;
+
+	oper_cfg.r1 = request_mem_region(oper_cfg.r1->start, oper_cfg.len1,
+					 oper_cfg.r1->name);
+	if (!oper_cfg.r1)
+		return -EBUSY;
+
+	oper_cfg.vpss_bl_regs_base = ioremap(oper_cfg.r1->start, oper_cfg.len1);
+	if (!oper_cfg.vpss_bl_regs_base) {
+		status = -EBUSY;
+		goto fail1;
+	}
+
+	if (dm355) {
+		oper_cfg.r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+		if (!oper_cfg.r2) {
+			status = -ENOENT;
+			goto fail2;
+		}
+		oper_cfg.len2 = oper_cfg.r2->end - oper_cfg.r2->start + 1;
+		oper_cfg.r2 = request_mem_region(oper_cfg.r2->start,
+						 oper_cfg.len2,
+						 oper_cfg.r2->name);
+		if (!oper_cfg.r2) {
+			status = -EBUSY;
+			goto fail2;
+		}
+
+		oper_cfg.vpss_regs_base = ioremap(oper_cfg.r2->start,
+						  oper_cfg.len2);
+		if (!oper_cfg.vpss_regs_base) {
+			status = -EBUSY;
+			goto fail3;
+		}
+	}
+
+	if (dm355) {
+		oper_cfg.hw_ops.enable_clock = dm355_enable_clock;
+		oper_cfg.hw_ops.select_ccdc_source = dm355_select_ccdc_source;
+	} else
+		oper_cfg.hw_ops.clear_wbl_overflow = dm644x_clear_wbl_overflow;
+
+	spin_lock_init(&oper_cfg.vpss_lock);
+	dev_info(&pdev->dev, "%s vpss probe success\n", oper_cfg.vpss_name);
+	return 0;
+
+fail3:
+	release_mem_region(oper_cfg.r2->start, oper_cfg.len2);
+fail2:
+	iounmap(oper_cfg.vpss_bl_regs_base);
+fail1:
+	release_mem_region(oper_cfg.r1->start, oper_cfg.len1);
+	return status;
+}
+
+static int vpss_remove(struct platform_device *pdev)
+{
+	iounmap(oper_cfg.vpss_bl_regs_base);
+	release_mem_region(oper_cfg.r1->start, oper_cfg.len1);
+	if (!strcmp(oper_cfg.vpss_name, "dm355_vpss")) {
+		iounmap(oper_cfg.vpss_regs_base);
+		release_mem_region(oper_cfg.r2->start, oper_cfg.len2);
+	}
+	return 0;
+}
+
+static struct platform_driver vpss_driver = {
+	.driver = {
+		.name	= "vpss",
+		.owner = THIS_MODULE,
+	},
+	.remove = __devexit_p(vpss_remove),
+	.probe = vpss_probe,
+};
+
+static void vpss_exit(void)
+{
+	platform_driver_unregister(&vpss_driver);
+}
+
+static int __init vpss_init(void)
+{
+	return platform_driver_register(&vpss_driver);
+}
+subsys_initcall(vpss_init);
+module_exit(vpss_exit);
diff --git a/include/media/davinci/vpss.h b/include/media/davinci/vpss.h
new file mode 100644
index 000000000000..fcdff745fae2
--- /dev/null
+++ b/include/media/davinci/vpss.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2009 Texas Instruments Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * vpss - video processing subsystem module header file.
+ *
+ * Include this header file if a driver needs to configure vpss system
+ * module. It exports a set of library functions  for video drivers to
+ * configure vpss system module functions such as clock enable/disable,
+ * vpss interrupt mux to arm, and other common vpss system module
+ * functions.
+ */
+#ifndef _VPSS_H
+#define _VPSS_H
+
+/* selector for ccdc input selection on DM355 */
+enum vpss_ccdc_source_sel {
+	VPSS_CCDCIN,
+	VPSS_HSSIIN
+};
+
+/* Used for enable/diable VPSS Clock */
+enum vpss_clock_sel {
+	/* DM355/DM365 */
+	VPSS_CCDC_CLOCK,
+	VPSS_IPIPE_CLOCK,
+	VPSS_H3A_CLOCK,
+	VPSS_CFALD_CLOCK,
+	/*
+	 * When using VPSS_VENC_CLOCK_SEL in vpss_enable_clock() api
+	 * following applies:-
+	 * en = 0 selects ENC_CLK
+	 * en = 1 selects ENC_CLK/2
+	 */
+	VPSS_VENC_CLOCK_SEL,
+	VPSS_VPBE_CLOCK,
+};
+
+/* select input to ccdc on dm355 */
+int vpss_select_ccdc_source(enum vpss_ccdc_source_sel src_sel);
+/* enable/disable a vpss clock, 0 - success, -1 - failure */
+int vpss_enable_clock(enum vpss_clock_sel clock_sel, int en);
+
+/* wbl reset for dm644x */
+enum vpss_wbl_sel {
+	VPSS_PCR_AEW_WBL_0 = 16,
+	VPSS_PCR_AF_WBL_0,
+	VPSS_PCR_RSZ4_WBL_0,
+	VPSS_PCR_RSZ3_WBL_0,
+	VPSS_PCR_RSZ2_WBL_0,
+	VPSS_PCR_RSZ1_WBL_0,
+	VPSS_PCR_PREV_WBL_0,
+	VPSS_PCR_CCDC_WBL_O,
+};
+int vpss_clear_wbl_overflow(enum vpss_wbl_sel wbl_sel);
+#endif
-- 
cgit v1.2.3


From c41debafc6e396a8e15f1f017aec7c0cf67e1b54 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:06:21 -0300
Subject: V4L/DVB (12504): soc-camera: prepare soc_camera_platform.c and its
 users for conversion

soc_camera_platform.c is only used by y SuperH ap325rxa board. This patch
converts soc_camera_platform.c and its users for the soc-camera platform-
device conversion and also extends soc-camera core to handle non-I2C cameras.

Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/board-ap325rxa.c     | 43 +++++++++++++++++++-------
 drivers/media/video/soc_camera.c    | 61 ++++++++++++++++++++++++++++---------
 include/media/soc_camera.h          |  6 ++++
 include/media/soc_camera_platform.h |  2 ++
 4 files changed, 86 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 327d47c25a57..7e2d2de0384d 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -310,6 +310,9 @@ static int camera_set_capture(struct soc_camera_platform_info *info,
 	return ret;
 }
 
+static int ap325rxa_camera_add(struct soc_camera_link *icl, struct device *dev);
+static void ap325rxa_camera_del(struct soc_camera_link *icl);
+
 static struct soc_camera_platform_info camera_info = {
 	.iface = 0,
 	.format_name = "UYVY",
@@ -323,6 +326,10 @@ static struct soc_camera_platform_info camera_info = {
 	.bus_param = SOCAM_PCLK_SAMPLE_RISING | SOCAM_HSYNC_ACTIVE_HIGH |
 	SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_MASTER | SOCAM_DATAWIDTH_8,
 	.set_capture = camera_set_capture,
+	.link = {
+		.add_device	= ap325rxa_camera_add,
+		.del_device	= ap325rxa_camera_del,
+	},
 };
 
 static struct platform_device camera_device = {
@@ -332,15 +339,20 @@ static struct platform_device camera_device = {
 	},
 };
 
-static int __init camera_setup(void)
+static int ap325rxa_camera_add(struct soc_camera_link *icl,
+			       struct device *dev)
 {
-	if (camera_probe() > 0)
-		platform_device_register(&camera_device);
+	if (icl != &camera_info.link || camera_probe() <= 0)
+		return -ENODEV;
 
-	return 0;
+	return platform_device_register(&camera_device);
 }
-late_initcall(camera_setup);
 
+static void ap325rxa_camera_del(struct soc_camera_link *icl)
+{
+	if (icl == &camera_info.link)
+		platform_device_unregister(&camera_device);
+}
 #endif /* CONFIG_I2C */
 
 static int ov7725_power(struct device *dev, int mode)
@@ -423,11 +435,19 @@ static struct ov772x_camera_info ov7725_info = {
 	},
 };
 
-static struct platform_device ap325rxa_camera = {
-	.name	= "soc-camera-pdrv",
-	.id	= 0,
-	.dev	= {
-		.platform_data = &ov7725_info.link,
+static struct platform_device ap325rxa_camera[] = {
+	{
+		.name	= "soc-camera-pdrv",
+		.id	= 0,
+		.dev	= {
+			.platform_data = &ov7725_info.link,
+		},
+	}, {
+		.name	= "soc-camera-pdrv",
+		.id	= 1,
+		.dev	= {
+			.platform_data = &camera_info.link,
+		},
 	},
 };
 
@@ -438,7 +458,8 @@ static struct platform_device *ap325rxa_devices[] __initdata = {
 	&ceu_device,
 	&nand_flash_device,
 	&sdcard_cn3_device,
-	&ap325rxa_camera,
+	&ap325rxa_camera[0],
+	&ap325rxa_camera[1],
 };
 
 static struct spi_board_info ap325rxa_spi_devices[] = {
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 9f5ae8167855..0340754e5406 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -1165,45 +1165,76 @@ void soc_camera_video_stop(struct soc_camera_device *icd)
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
-static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+#ifdef CONFIG_I2C_BOARDINFO
+static int soc_camera_init_i2c(struct platform_device *pdev,
+			       struct soc_camera_link *icl)
 {
-	struct soc_camera_link *icl = pdev->dev.platform_data;
-	struct i2c_adapter *adap;
 	struct i2c_client *client;
+	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	int ret;
 
-	if (!icl)
-		return -EINVAL;
-
-	adap = i2c_get_adapter(icl->i2c_adapter_id);
 	if (!adap) {
-		dev_warn(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
-			 icl->i2c_adapter_id);
-		/* -ENODEV and -ENXIO do not produce an error on probe()... */
-		return -ENOENT;
+		ret = -ENODEV;
+		dev_err(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
+			icl->i2c_adapter_id);
+		goto ei2cga;
 	}
 
 	icl->board_info->platform_data = icl;
 	client = i2c_new_device(adap, icl->board_info);
 	if (!client) {
-		i2c_put_adapter(adap);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto ei2cnd;
 	}
 
 	platform_set_drvdata(pdev, client);
 
 	return 0;
+ei2cnd:
+	i2c_put_adapter(adap);
+ei2cga:
+	return ret;
 }
 
-static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
+static void soc_camera_free_i2c(struct platform_device *pdev)
 {
 	struct i2c_client *client = platform_get_drvdata(pdev);
 
 	if (!client)
-		return -ENODEV;
+		return;
 
 	i2c_unregister_device(client);
 	i2c_put_adapter(client->adapter);
+}
+#else
+#define soc_camera_init_i2c(d, icl)	(-ENODEV)
+#define soc_camera_free_i2c(d)		do {} while (0)
+#endif
 
+static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+{
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+
+	if (!icl)
+		return -EINVAL;
+
+	if (icl->board_info)
+		return soc_camera_init_i2c(pdev, icl);
+	else if (!icl->add_device || !icl->del_device)
+		return -EINVAL;
+
+	/* &pdev->dev will become &icd->dev */
+	return icl->add_device(icl, &pdev->dev);
+}
+
+static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
+{
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+
+	if (icl->board_info)
+		soc_camera_free_i2c(pdev);
+	else
+		icl->del_device(icl);
 	return 0;
 }
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 23ecead35e7a..813e12061daa 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -102,6 +102,12 @@ struct soc_camera_link {
 	int i2c_adapter_id;
 	struct i2c_board_info *board_info;
 	const char *module_name;
+	/*
+	 * For non-I2C devices platform platform has to provide methods to
+	 * add a device to the system and to remove
+	 */
+	int (*add_device)(struct soc_camera_link *, struct device *);
+	void (*del_device)(struct soc_camera_link *);
 	/* Optional callbacks to power on or off and reset the sensor */
 	int (*power)(struct device *, int);
 	int (*reset)(struct device *);
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index 1d092b4678aa..af224deadb43 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -12,6 +12,7 @@
 #define __SOC_CAMERA_H__
 
 #include <linux/videodev2.h>
+#include <media/soc_camera.h>
 
 struct soc_camera_platform_info {
 	int iface;
@@ -21,6 +22,7 @@ struct soc_camera_platform_info {
 	unsigned long bus_param;
 	void (*power)(int);
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
+	struct soc_camera_link link;
 };
 
 #endif /* __SOC_CAMERA_H__ */
-- 
cgit v1.2.3


From bc1937b41d8253e2b554da385023a92189d38917 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:06:22 -0300
Subject: V4L/DVB (12505): soc_camera_platform: pass device pointer from
 soc-camera core on .add_device()

Add a struct device pointer to struct soc_camera_platform_info and let the user
(ap325rxa) pass it down to soc_camera_platform.c in its .add_device() method.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 arch/sh/boards/board-ap325rxa.c     | 2 ++
 include/media/soc_camera_platform.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7e2d2de0384d..97a2fc97e108 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -345,6 +345,8 @@ static int ap325rxa_camera_add(struct soc_camera_link *icl,
 	if (icl != &camera_info.link || camera_probe() <= 0)
 		return -ENODEV;
 
+	camera_info.dev = dev;
+
 	return platform_device_register(&camera_device);
 }
 
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index af224deadb43..3e8f020abf48 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -14,6 +14,8 @@
 #include <linux/videodev2.h>
 #include <media/soc_camera.h>
 
+struct device;
+
 struct soc_camera_platform_info {
 	int iface;
 	char *format_name;
@@ -21,6 +23,7 @@ struct soc_camera_platform_info {
 	struct v4l2_pix_format format;
 	unsigned long bus_param;
 	void (*power)(int);
+	struct device *dev;
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
 	struct soc_camera_link link;
 };
-- 
cgit v1.2.3


From 40e2e0927003424c25807b575dd40da2b8685857 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:28:22 -0300
Subject: V4L/DVB (12506): soc-camera: convert to platform device

Convert soc-camera core and all drivers to platform device API. We already
converted platforms to register a platform device for each soc-camera client,
now we remove the compatibility code and switch completely to the new scheme.
This is a preparatory step for the v4l2-subdev conversion.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 114 ++++----
 drivers/media/video/mt9m111.c              | 154 ++++++-----
 drivers/media/video/mt9t031.c              | 116 ++++----
 drivers/media/video/mt9v022.c              | 119 ++++----
 drivers/media/video/mx3_camera.c           |  27 +-
 drivers/media/video/ov772x.c               | 157 ++++++-----
 drivers/media/video/pxa_camera.c           |  29 +-
 drivers/media/video/sh_mobile_ceu_camera.c |  13 +-
 drivers/media/video/soc_camera.c           | 431 ++++++++++++++---------------
 drivers/media/video/soc_camera_platform.c  |  76 ++---
 drivers/media/video/tw9910.c               | 110 ++++----
 include/media/soc_camera.h                 |  27 +-
 include/media/soc_camera_platform.h        |   3 +-
 13 files changed, 699 insertions(+), 677 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 4d794b42d6cd..1e4f269fc08b 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -69,8 +69,6 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 };
 
 struct mt9m001 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
@@ -111,11 +109,11 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 
 static int mt9m001_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	dev_dbg(icd->vdev->parent, "%s\n", __func__);
+	dev_dbg(&icd->dev, "%s\n", __func__);
 
 	if (icl->power) {
 		ret = icl->power(&client->dev, 1);
@@ -147,8 +145,8 @@ static int mt9m001_init(struct soc_camera_device *icd)
 
 static int mt9m001_release(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
@@ -161,7 +159,7 @@ static int mt9m001_release(struct soc_camera_device *icd)
 
 static int mt9m001_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Switch to master "normal" mode */
 	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 2) < 0)
@@ -171,7 +169,7 @@ static int mt9m001_start_capture(struct soc_camera_device *icd)
 
 static int mt9m001_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Stop sensor readout */
 	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 0) < 0)
@@ -182,8 +180,7 @@ static int mt9m001_stop_capture(struct soc_camera_device *icd)
 static int mt9m001_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long width_flag = flags & SOCAM_DATAWIDTH_MASK;
 
 	/* Only one width bit may be set */
@@ -205,8 +202,7 @@ static int mt9m001_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	/* MT9M001 has all capture_format parameters fixed */
 	unsigned long flags = SOCAM_PCLK_SAMPLE_FALLING |
 		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
@@ -223,8 +219,8 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 static int mt9m001_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -290,12 +286,13 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9m001->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m001->model;
@@ -308,7 +305,7 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 static int mt9m001_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -328,7 +325,7 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 static int mt9m001_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -381,15 +378,11 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 	}
 };
 
-static int mt9m001_video_probe(struct soc_camera_device *);
-static void mt9m001_video_remove(struct soc_camera_device *);
 static int mt9m001_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9m001_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9m001_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9m001_video_probe,
-	.remove			= mt9m001_video_remove,
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
 	.start_capture		= mt9m001_start_capture,
@@ -412,8 +405,8 @@ static struct soc_camera_ops mt9m001_ops = {
 
 static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -432,8 +425,8 @@ static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_contro
 
 static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -525,11 +518,11 @@ static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9m001_video_probe(struct soc_camera_device *icd)
+static int mt9m001_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
 	unsigned long flags;
@@ -540,6 +533,11 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Enable the chip */
 	data = reg_write(client, MT9M001_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -547,6 +545,8 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	/* Read out the chip version register */
 	data = reg_read(client, MT9M001_CHIP_VERSION);
 
+	soc_camera_video_stop(icd);
+
 	/* must be 0x8411 or 0x8421 for colour sensor and 8431 for bw */
 	switch (data) {
 	case 0x8411:
@@ -559,10 +559,9 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 		icd->formats = mt9m001_monochrome_formats;
 		break;
 	default:
-		ret = -ENODEV;
 		dev_err(&icd->dev,
 			"No MT9M001 chip detected, register read %x\n", data);
-		goto ei2c;
+		return -ENODEV;
 	}
 
 	icd->num_formats = 0;
@@ -588,26 +587,16 @@ static int mt9m001_video_probe(struct soc_camera_device *icd)
 	dev_info(&icd->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
-	/* Now that we know the model, we can start video */
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto eisis;
-
 	return 0;
-
-eisis:
-ei2c:
-	return ret;
 }
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
 {
-	struct mt9m001 *mt9m001 = container_of(icd, struct mt9m001, icd);
-	struct soc_camera_link *icl = mt9m001->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9m001->client->addr,
+	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", client->addr,
 		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 	if (icl->free_bus)
 		icl->free_bus(icl);
 }
@@ -616,11 +605,17 @@ static int mt9m001_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9m001 *mt9m001;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9M001 driver needs platform data\n");
 		return -EINVAL;
@@ -636,13 +631,10 @@ static int mt9m001_probe(struct i2c_client *client,
 	if (!mt9m001)
 		return -ENOMEM;
 
-	mt9m001->client = client;
 	i2c_set_clientdata(client, mt9m001);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd = &mt9m001->icd;
 	icd->ops	= &mt9m001_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= 20;
 	icd->y_min	= 12;
 	icd->x_current	= 20;
@@ -652,27 +644,29 @@ static int mt9m001_probe(struct i2c_client *client,
 	icd->height_min	= 32;
 	icd->height_max	= 1024;
 	icd->y_skip_top	= 1;
-	icd->iface	= icl->bus_id;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-
-	return 0;
+	ret = mt9m001_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9m001);
+	}
 
-eisdr:
-	kfree(mt9m001);
 	return ret;
 }
 
 static int mt9m001_remove(struct i2c_client *client)
 {
 	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9m001->icd);
+	icd->ops = NULL;
+	mt9m001_video_remove(icd);
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9m001);
 
 	return 0;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index fc5e2de03766..95c2f089605f 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -148,8 +148,6 @@ enum mt9m111_context {
 };
 
 struct mt9m111 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9M11x* codes from v4l2-chip-ident.h */
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
@@ -203,7 +201,7 @@ static int mt9m111_reg_write(struct i2c_client *client, const u16 reg,
 
 	ret = reg_page_map_set(client, reg);
 	if (!ret)
-		ret = i2c_smbus_write_word_data(client, (reg & 0xff),
+		ret = i2c_smbus_write_word_data(client, reg & 0xff,
 						swab16(data));
 	dev_dbg(&client->dev, "write reg.%03x = %04x -> %d\n", reg, data, ret);
 	return ret;
@@ -232,7 +230,7 @@ static int mt9m111_reg_clear(struct i2c_client *client, const u16 reg,
 static int mt9m111_set_context(struct soc_camera_device *icd,
 			       enum mt9m111_context ctxt)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int valB = MT9M111_CTXT_CTRL_RESTART | MT9M111_CTXT_CTRL_DEFECTCOR_B
 		| MT9M111_CTXT_CTRL_RESIZE_B | MT9M111_CTXT_CTRL_CTRL2_B
 		| MT9M111_CTXT_CTRL_GAMMA_B | MT9M111_CTXT_CTRL_READ_MODE_B
@@ -249,8 +247,8 @@ static int mt9m111_set_context(struct soc_camera_device *icd,
 static int mt9m111_setup_rect(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret, is_raw_format;
 	int width = rect->width;
 	int height = rect->height;
@@ -294,7 +292,7 @@ static int mt9m111_setup_rect(struct soc_camera_device *icd,
 
 static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	ret = reg_write(OUTPUT_FORMAT_CTRL2_A, outfmt);
@@ -315,7 +313,8 @@ static int mt9m111_setfmt_bayer10(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -329,7 +328,8 @@ static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -343,7 +343,8 @@ static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 
 static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int val = 0;
 
 	if (mt9m111->swap_yuv_cb_cr)
@@ -356,9 +357,9 @@ static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 
 static int mt9m111_enable(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (icl->power) {
@@ -378,9 +379,9 @@ static int mt9m111_enable(struct soc_camera_device *icd)
 
 static int mt9m111_disable(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
@@ -395,8 +396,8 @@ static int mt9m111_disable(struct soc_camera_device *icd)
 
 static int mt9m111_reset(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	ret = reg_set(RESET, MT9M111_RESET_RESET_MODE);
@@ -424,8 +425,7 @@ static int mt9m111_stop_capture(struct soc_camera_device *icd)
 
 static unsigned long mt9m111_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-	struct soc_camera_link *icl = mt9m111->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_MASTER | SOCAM_PCLK_SAMPLE_RISING |
 		SOCAM_HSYNC_ACTIVE_HIGH | SOCAM_VSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | SOCAM_DATAWIDTH_8;
@@ -441,7 +441,8 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 static int mt9m111_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
@@ -456,7 +457,8 @@ static int mt9m111_set_crop(struct soc_camera_device *icd,
 
 static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	switch (pixfmt) {
@@ -506,7 +508,8 @@ static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 static int mt9m111_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= mt9m111->rect.left,
@@ -544,12 +547,13 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9m111->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9m111->model;
@@ -562,8 +566,8 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 static int mt9m111_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int val;
-	struct i2c_client *client = to_i2c_client(icd->control);
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -583,7 +587,7 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 static int mt9m111_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -635,8 +639,6 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 	}
 };
 
-static int mt9m111_video_probe(struct soc_camera_device *);
-static void mt9m111_video_remove(struct soc_camera_device *);
 static int mt9m111_get_control(struct soc_camera_device *,
 			       struct v4l2_control *);
 static int mt9m111_set_control(struct soc_camera_device *,
@@ -647,8 +649,6 @@ static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9m111_video_probe,
-	.remove			= mt9m111_video_remove,
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
@@ -672,8 +672,8 @@ static struct soc_camera_ops mt9m111_ops = {
 
 static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (mt9m111->context == HIGHPOWER) {
@@ -693,7 +693,7 @@ static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 
 static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	data = reg_read(GLOBAL_GAIN);
@@ -705,7 +705,7 @@ static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 
 static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
@@ -724,8 +724,8 @@ static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 
 static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (on)
@@ -741,8 +741,8 @@ static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 
 static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	if (on)
@@ -759,8 +759,8 @@ static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 static int mt9m111_get_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -803,7 +803,8 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 static int mt9m111_set_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
@@ -841,7 +842,8 @@ static int mt9m111_set_control(struct soc_camera_device *icd,
 
 static int mt9m111_restore_state(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 
 	mt9m111_set_context(icd, mt9m111->context);
 	mt9m111_set_pixfmt(icd, mt9m111->pixfmt);
@@ -856,7 +858,8 @@ static int mt9m111_restore_state(struct soc_camera_device *icd)
 
 static int mt9m111_resume(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret = 0;
 
 	if (mt9m111->powered) {
@@ -871,7 +874,8 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 
 static int mt9m111_init(struct soc_camera_device *icd)
 {
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	int ret;
 
 	mt9m111->context = HIGHPOWER;
@@ -902,10 +906,10 @@ static int mt9m111_release(struct soc_camera_device *icd)
  * Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one
  */
-static int mt9m111_video_probe(struct soc_camera_device *icd)
+static int mt9m111_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
+	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
 	s32 data;
 	int ret;
 
@@ -917,6 +921,11 @@ static int mt9m111_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		goto evstart;
+
 	ret = mt9m111_enable(icd);
 	if (ret)
 		goto ei2c;
@@ -945,40 +954,33 @@ static int mt9m111_video_probe(struct soc_camera_device *icd)
 
 	dev_info(&icd->dev, "Detected a MT9M11x chip ID %x\n", data);
 
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto eisis;
-
 	mt9m111->autoexposure = 1;
 	mt9m111->autowhitebalance = 1;
 
 	mt9m111->swap_rgb_even_odd = 1;
 	mt9m111->swap_rgb_red_blue = 1;
 
-	return 0;
-eisis:
 ei2c:
+	soc_camera_video_stop(icd);
+evstart:
 	return ret;
 }
 
-static void mt9m111_video_remove(struct soc_camera_device *icd)
-{
-	struct mt9m111 *mt9m111 = container_of(icd, struct mt9m111, icd);
-
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9m111->client->addr,
-		mt9m111->icd.dev.parent, mt9m111->icd.vdev);
-	soc_camera_video_stop(&mt9m111->icd);
-}
-
 static int mt9m111_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9m111 *mt9m111;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9M11x: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9M11x driver needs platform data\n");
 		return -EINVAL;
@@ -994,13 +996,10 @@ static int mt9m111_probe(struct i2c_client *client,
 	if (!mt9m111)
 		return -ENOMEM;
 
-	mt9m111->client = client;
 	i2c_set_clientdata(client, mt9m111);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd 		= &mt9m111->icd;
 	icd->ops	= &mt9m111_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= MT9M111_MIN_DARK_COLS;
 	icd->y_min	= MT9M111_MIN_DARK_ROWS;
 	icd->x_current	= icd->x_min;
@@ -1010,22 +1009,25 @@ static int mt9m111_probe(struct i2c_client *client,
 	icd->height_min	= MT9M111_MIN_DARK_COLS;
 	icd->height_max	= MT9M111_MAX_HEIGHT;
 	icd->y_skip_top	= 0;
-	icd->iface	= icl->bus_id;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-	return 0;
+	ret = mt9m111_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9m111);
+	}
 
-eisdr:
-	kfree(mt9m111);
 	return ret;
 }
 
 static int mt9m111_remove(struct i2c_client *client)
 {
 	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-	soc_camera_device_unregister(&mt9m111->icd);
+	struct soc_camera_device *icd = client->dev.platform_data;
+
+	icd->ops = NULL;
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9m111);
 
 	return 0;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 4207fb342670..d9c7c2fd698a 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -68,8 +68,6 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 };
 
 struct mt9t031 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 	u16 xskip;
@@ -138,8 +136,8 @@ static int get_shutter(struct i2c_client *client, u32 *data)
 
 static int mt9t031_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	if (icl->power) {
@@ -166,8 +164,8 @@ static int mt9t031_init(struct soc_camera_device *icd)
 
 static int mt9t031_release(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
@@ -180,7 +178,7 @@ static int mt9t031_release(struct soc_camera_device *icd)
 
 static int mt9t031_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Switch to master "normal" mode */
 	if (reg_set(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
@@ -190,7 +188,7 @@ static int mt9t031_start_capture(struct soc_camera_device *icd)
 
 static int mt9t031_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* Stop sensor readout */
 	if (reg_clear(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
@@ -201,7 +199,7 @@ static int mt9t031_stop_capture(struct soc_camera_device *icd)
 static int mt9t031_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	/* The caller should have queried our parameters, check anyway */
 	if (flags & ~MT9T031_BUS_PARAM)
@@ -217,8 +215,7 @@ static int mt9t031_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
-	struct soc_camera_link *icl = mt9t031->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	return soc_camera_apply_sensor_flags(icl, MT9T031_BUS_PARAM);
 }
@@ -238,8 +235,8 @@ static void recalculate_limits(struct soc_camera_device *icd,
 static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int ret;
 	u16 xbin, ybin, width, height, left, top;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
@@ -336,7 +333,8 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 static int mt9t031_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 
 	/* CROP - no change in scaling, or in limits */
 	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
@@ -345,7 +343,8 @@ static int mt9t031_set_crop(struct soc_camera_device *icd,
 static int mt9t031_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
@@ -395,12 +394,13 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9t031->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9t031->model;
@@ -413,7 +413,7 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 static int mt9t031_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -432,7 +432,7 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 static int mt9t031_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -493,15 +493,11 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 	}
 };
 
-static int mt9t031_video_probe(struct soc_camera_device *);
-static void mt9t031_video_remove(struct soc_camera_device *);
 static int mt9t031_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9t031_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9t031_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9t031_video_probe,
-	.remove			= mt9t031_video_remove,
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
 	.start_capture		= mt9t031_start_capture,
@@ -524,8 +520,8 @@ static struct soc_camera_ops mt9t031_ops = {
 
 static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -550,8 +546,8 @@ static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_contro
 
 static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -657,10 +653,10 @@ static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9t031_video_probe(struct soc_camera_device *icd)
+static int mt9t031_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
+	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
 	s32 data;
 	int ret;
 
@@ -670,6 +666,11 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -677,6 +678,8 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 	/* Read out the chip version register */
 	data = reg_read(client, MT9T031_CHIP_VERSION);
 
+	soc_camera_video_stop(icd);
+
 	switch (data) {
 	case 0x1621:
 		mt9t031->model = V4L2_IDENT_MT9T031;
@@ -684,44 +687,31 @@ static int mt9t031_video_probe(struct soc_camera_device *icd)
 		icd->num_formats = ARRAY_SIZE(mt9t031_colour_formats);
 		break;
 	default:
-		ret = -ENODEV;
 		dev_err(&icd->dev,
 			"No MT9T031 chip detected, register read %x\n", data);
-		goto ei2c;
+		return -ENODEV;
 	}
 
 	dev_info(&icd->dev, "Detected a MT9T031 chip ID %x\n", data);
 
-	/* Now that we know the model, we can start video */
-	ret = soc_camera_video_start(icd);
-	if (ret)
-		goto evstart;
-
 	return 0;
-
-evstart:
-ei2c:
-	return ret;
-}
-
-static void mt9t031_video_remove(struct soc_camera_device *icd)
-{
-	struct mt9t031 *mt9t031 = container_of(icd, struct mt9t031, icd);
-
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9t031->client->addr,
-		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 }
 
 static int mt9t031_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9t031 *mt9t031;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9T031: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9T031 driver needs platform data\n");
 		return -EINVAL;
@@ -737,13 +727,10 @@ static int mt9t031_probe(struct i2c_client *client,
 	if (!mt9t031)
 		return -ENOMEM;
 
-	mt9t031->client = client;
 	i2c_set_clientdata(client, mt9t031);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd = &mt9t031->icd;
 	icd->ops	= &mt9t031_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= MT9T031_COLUMN_SKIP;
 	icd->y_min	= MT9T031_ROW_SKIP;
 	icd->x_current	= icd->x_min;
@@ -753,7 +740,6 @@ static int mt9t031_probe(struct i2c_client *client,
 	icd->height_min	= MT9T031_MIN_HEIGHT;
 	icd->height_max	= MT9T031_MAX_HEIGHT;
 	icd->y_skip_top	= 0;
-	icd->iface	= icl->bus_id;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
@@ -761,24 +747,24 @@ static int mt9t031_probe(struct i2c_client *client,
 	mt9t031->xskip = 1;
 	mt9t031->yskip = 1;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
-
-	return 0;
+	ret = mt9t031_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9t031);
+	}
 
-eisdr:
-	i2c_set_clientdata(client, NULL);
-	kfree(mt9t031);
 	return ret;
 }
 
 static int mt9t031_remove(struct i2c_client *client)
 {
 	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9t031->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9t031);
 
 	return 0;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index dbdcc86ae50d..959cc299f1ae 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -85,8 +85,6 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 };
 
 struct mt9v022 {
-	struct i2c_client *client;
-	struct soc_camera_device icd;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
@@ -127,9 +125,9 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 
 static int mt9v022_init(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	int ret;
 
 	if (icl->power) {
@@ -173,19 +171,19 @@ static int mt9v022_init(struct soc_camera_device *icd)
 
 static int mt9v022_release(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	if (icl->power)
-		icl->power(&mt9v022->client->dev, 0);
+		icl->power(&client->dev, 0);
 
 	return 0;
 }
 
 static int mt9v022_start_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	/* Switch to master "normal" mode */
 	mt9v022->chip_control &= ~0x10;
 	if (reg_write(client, MT9V022_CHIP_CONTROL,
@@ -196,8 +194,8 @@ static int mt9v022_start_capture(struct soc_camera_device *icd)
 
 static int mt9v022_stop_capture(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	/* Switch to snapshot mode */
 	mt9v022->chip_control |= 0x10;
 	if (reg_write(client, MT9V022_CHIP_CONTROL,
@@ -209,9 +207,9 @@ static int mt9v022_stop_capture(struct soc_camera_device *icd)
 static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag = flags & SOCAM_DATAWIDTH_MASK;
 	int ret;
 	u16 pixclk = 0;
@@ -263,8 +261,7 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag;
 
 	if (icl->query_bus_param)
@@ -283,7 +280,7 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 static int mt9v022_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	/* Like in example app. Contradicts the datasheet though */
@@ -326,7 +323,8 @@ static int mt9v022_set_crop(struct soc_camera_device *icd,
 static int mt9v022_set_fmt(struct soc_camera_device *icd,
 			   struct v4l2_format *f)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -374,12 +372,13 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 			       struct v4l2_dbg_chip_ident *id)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
 
-	if (id->match.addr != mt9v022->client->addr)
+	if (id->match.addr != client->addr)
 		return -ENODEV;
 
 	id->ident	= mt9v022->model;
@@ -392,7 +391,7 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 static int mt9v022_get_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -412,7 +411,7 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 static int mt9v022_set_register(struct soc_camera_device *icd,
 				struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -481,15 +480,11 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 	}
 };
 
-static int mt9v022_video_probe(struct soc_camera_device *);
-static void mt9v022_video_remove(struct soc_camera_device *);
 static int mt9v022_get_control(struct soc_camera_device *, struct v4l2_control *);
 static int mt9v022_set_control(struct soc_camera_device *, struct v4l2_control *);
 
 static struct soc_camera_ops mt9v022_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= mt9v022_video_probe,
-	.remove			= mt9v022_video_remove,
 	.init			= mt9v022_init,
 	.release		= mt9v022_release,
 	.start_capture		= mt9v022_start_capture,
@@ -513,7 +508,7 @@ static struct soc_camera_ops mt9v022_ops = {
 static int mt9v022_get_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	switch (ctrl->id) {
@@ -549,7 +544,7 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 			       struct v4l2_control *ctrl)
 {
 	int data;
-	struct i2c_client *client = to_i2c_client(icd->control);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
@@ -646,11 +641,11 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9v022_video_probe(struct soc_camera_device *icd)
+static int mt9v022_video_probe(struct soc_camera_device *icd,
+			       struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(icd->control);
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
 	unsigned long flags;
@@ -659,6 +654,11 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/* Read out the chip version register */
 	data = reg_read(client, MT9V022_CHIP_VERSION);
 
@@ -678,6 +678,8 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	udelay(200);
 	if (reg_read(client, MT9V022_RESET)) {
 		dev_err(&icd->dev, "Resetting MT9V022 failed!\n");
+		if (ret > 0)
+			ret = -EIO;
 		goto ei2c;
 	}
 
@@ -694,7 +696,7 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	}
 
 	if (ret < 0)
-		goto eisis;
+		goto ei2c;
 
 	icd->num_formats = 0;
 
@@ -716,29 +718,23 @@ static int mt9v022_video_probe(struct soc_camera_device *icd)
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
-	ret = soc_camera_video_start(icd);
-	if (ret < 0)
-		goto eisis;
-
 	dev_info(&icd->dev, "Detected a MT9V022 chip ID %x, %s sensor\n",
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
 
-	return 0;
-
-eisis:
 ei2c:
+	soc_camera_video_stop(icd);
+
 	return ret;
 }
 
 static void mt9v022_video_remove(struct soc_camera_device *icd)
 {
-	struct mt9v022 *mt9v022 = container_of(icd, struct mt9v022, icd);
-	struct soc_camera_link *icl = mt9v022->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", mt9v022->client->addr,
+	dev_dbg(&icd->dev, "Video %x removed: %p, %p\n", client->addr,
 		icd->dev.parent, icd->vdev);
-	soc_camera_video_stop(icd);
 	if (icl->free_bus)
 		icl->free_bus(icl);
 }
@@ -747,11 +743,17 @@ static int mt9v022_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
 	struct mt9v022 *mt9v022;
-	struct soc_camera_device *icd;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct i2c_adapter *adapter = to_i2c_adapter(client->dev.parent);
-	struct soc_camera_link *icl = client->dev.platform_data;
+	struct soc_camera_link *icl;
 	int ret;
 
+	if (!icd) {
+		dev_err(&client->dev, "MT9V022: missing soc-camera data!\n");
+		return -EINVAL;
+	}
+
+	icl = to_soc_camera_link(icd);
 	if (!icl) {
 		dev_err(&client->dev, "MT9V022 driver needs platform data\n");
 		return -EINVAL;
@@ -768,12 +770,9 @@ static int mt9v022_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
-	mt9v022->client = client;
 	i2c_set_clientdata(client, mt9v022);
 
-	icd = &mt9v022->icd;
 	icd->ops	= &mt9v022_ops;
-	icd->control	= &client->dev;
 	icd->x_min	= 1;
 	icd->y_min	= 4;
 	icd->x_current	= 1;
@@ -783,24 +782,26 @@ static int mt9v022_probe(struct i2c_client *client,
 	icd->height_min	= 32;
 	icd->height_max	= 480;
 	icd->y_skip_top	= 1;
-	icd->iface	= icl->bus_id;
-
-	ret = soc_camera_device_register(icd);
-	if (ret)
-		goto eisdr;
 
-	return 0;
+	ret = mt9v022_video_probe(icd, client);
+	if (ret) {
+		icd->ops = NULL;
+		i2c_set_clientdata(client, NULL);
+		kfree(mt9v022);
+	}
 
-eisdr:
-	kfree(mt9v022);
 	return ret;
 }
 
 static int mt9v022_remove(struct i2c_client *client)
 {
 	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&mt9v022->icd);
+	icd->ops = NULL;
+	mt9v022_video_remove(icd);
+	i2c_set_clientdata(client, NULL);
+	client->driver = NULL;
 	kfree(mt9v022);
 
 	return 0;
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 9770cb7932ca..2edf77a6256b 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -503,18 +503,19 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 
 	mx3_camera_activate(mx3_cam, icd);
 	ret = icd->ops->init(icd);
-	if (ret < 0) {
-		clk_disable(mx3_cam->clk);
+	if (ret < 0)
 		goto einit;
-	}
 
 	mx3_cam->icd = icd;
 
+	dev_info(&icd->dev, "MX3 Camera driver attached to camera %d\n",
+		 icd->devnum);
+
+	return 0;
+
 einit:
+	clk_disable(mx3_cam->clk);
 ebusy:
-	if (!ret)
-		dev_info(&icd->dev, "MX3 Camera driver attached to camera %d\n",
-			 icd->devnum);
 
 	return ret;
 }
@@ -947,9 +948,10 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	camera_flags = icd->ops->query_bus_param(icd);
 
 	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
+	dev_dbg(ici->dev, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
+		camera_flags, bus_flags, common_flags);
 	if (!common_flags) {
-		dev_dbg(ici->dev, "no common flags: camera %lx, host %lx\n",
-			camera_flags, bus_flags);
+		dev_dbg(ici->dev, "no common flags");
 		return -EINVAL;
 	}
 
@@ -1002,8 +1004,11 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 			SOCAM_DATAWIDTH_4;
 
 	ret = icd->ops->set_bus_param(icd, common_flags);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_dbg(ici->dev, "camera set_bus_param(%lx) returned %d\n",
+			common_flags, ret);
 		return ret;
+	}
 
 	/*
 	 * So far only gated clock mode is supported. Add a line
@@ -1127,8 +1132,9 @@ static int __devinit mx3_camera_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&mx3_cam->capture);
 	spin_lock_init(&mx3_cam->lock);
 
-	base = ioremap(res->start, res->end - res->start + 1);
+	base = ioremap(res->start, resource_size(res));
 	if (!base) {
+		pr_err("Couldn't map %x@%x\n", resource_size(res), res->start);
 		err = -ENOMEM;
 		goto eioremap;
 	}
@@ -1215,3 +1221,4 @@ module_exit(mx3_camera_exit);
 MODULE_DESCRIPTION("i.MX3x SoC Camera Host driver");
 MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:" MX3_CAM_DRV_NAME);
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 0bce255168bd..3ea650d55b17 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -399,8 +399,6 @@ struct ov772x_win_size {
 
 struct ov772x_priv {
 	struct ov772x_camera_info        *info;
-	struct i2c_client                *client;
-	struct soc_camera_device          icd;
 	const struct ov772x_color_format *fmt;
 	const struct ov772x_win_size     *win;
 	int                               model;
@@ -619,53 +617,56 @@ static int ov772x_reset(struct i2c_client *client)
 
 static int ov772x_init(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power) {
-		ret = priv->info->link.power(&priv->client->dev, 1);
+	if (icl->power) {
+		ret = icl->power(&client->dev, 1);
 		if (ret < 0)
 			return ret;
 	}
 
-	if (priv->info->link.reset)
-		ret = priv->info->link.reset(&priv->client->dev);
+	if (icl->reset)
+		ret = icl->reset(&client->dev);
 
 	return ret;
 }
 
 static int ov772x_release(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power)
-		ret = priv->info->link.power(&priv->client->dev, 0);
+	if (icl->power)
+		ret = icl->power(&client->dev, 0);
 
 	return ret;
 }
 
 static int ov772x_start_capture(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->win || !priv->fmt) {
 		dev_err(&icd->dev, "norm or win select error\n");
 		return -EPERM;
 	}
 
-	ov772x_mask_set(priv->client, COM2, SOFT_SLEEP_MODE, 0);
+	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
 	dev_dbg(&icd->dev,
-		 "format %s, win %s\n", priv->fmt->name, priv->win->name);
+		"format %s, win %s\n", priv->fmt->name, priv->win->name);
 
 	return 0;
 }
 
 static int ov772x_stop_capture(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	ov772x_mask_set(priv->client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
 	return 0;
 }
 
@@ -677,8 +678,9 @@ static int ov772x_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	struct soc_camera_link *icl = &priv->info->link;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | priv->info->buswidth;
@@ -689,7 +691,8 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 static int ov772x_get_control(struct soc_camera_device *icd,
 			      struct v4l2_control *ctrl)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
@@ -705,7 +708,8 @@ static int ov772x_get_control(struct soc_camera_device *icd,
 static int ov772x_set_control(struct soc_camera_device *icd,
 			      struct v4l2_control *ctrl)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	int ret = 0;
 	u8 val;
 
@@ -715,14 +719,14 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 		priv->flag_vflip = ctrl->value;
 		if (priv->info->flags & OV772X_FLAG_VFLIP)
 			val ^= VFLIP_IMG;
-		ret = ov772x_mask_set(priv->client, COM3, VFLIP_IMG, val);
+		ret = ov772x_mask_set(client, COM3, VFLIP_IMG, val);
 		break;
 	case V4L2_CID_HFLIP:
 		val = ctrl->value ? HFLIP_IMG : 0x00;
 		priv->flag_hflip = ctrl->value;
 		if (priv->info->flags & OV772X_FLAG_HFLIP)
 			val ^= HFLIP_IMG;
-		ret = ov772x_mask_set(priv->client, COM3, HFLIP_IMG, val);
+		ret = ov772x_mask_set(client, COM3, HFLIP_IMG, val);
 		break;
 	}
 
@@ -730,9 +734,10 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 }
 
 static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident   *id)
+			      struct v4l2_dbg_chip_ident *id)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	id->ident    = priv->model;
 	id->revision = 0;
@@ -744,14 +749,14 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 static int ov772x_get_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
-	int                 ret;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	int ret;
 
 	reg->size = 1;
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
-	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	ret = i2c_smbus_read_byte_data(client, reg->reg);
 	if (ret < 0)
 		return ret;
 
@@ -763,13 +768,13 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 static int ov772x_set_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
 		return -EINVAL;
 
-	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+	return i2c_smbus_write_byte_data(client, reg->reg, reg->val);
 }
 #endif
 
@@ -793,9 +798,11 @@ ov772x_select_win(u32 width, u32 height)
 	return win;
 }
 
-static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
-			     u32 pixfmt)
+static int ov772x_set_params(struct soc_camera_device *icd,
+			     u32 width, u32 height, u32 pixfmt)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	int ret = -EINVAL;
 	u8  val;
 	int i;
@@ -810,6 +817,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 			break;
 		}
 	}
+	dev_dbg(&icd->dev, "Using fmt %x #%d\n", pixfmt, i);
 	if (!priv->fmt)
 		goto ov772x_set_fmt_error;
 
@@ -821,7 +829,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	/*
 	 * reset hardware
 	 */
-	ov772x_reset(priv->client);
+	ov772x_reset(client);
 
 	/*
 	 * Edge Ctrl
@@ -835,17 +843,17 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 		 * Remove it when manual mode.
 		 */
 
-		ret = ov772x_mask_set(priv->client, DSPAUTO, EDGE_ACTRL, 0x00);
+		ret = ov772x_mask_set(client, DSPAUTO, EDGE_ACTRL, 0x00);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_TRSHLD, EDGE_THRESHOLD_MASK,
 				      priv->info->edgectrl.threshold);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_STRNGT, EDGE_STRENGTH_MASK,
 				      priv->info->edgectrl.strength);
 		if (ret < 0)
@@ -857,13 +865,13 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 		 *
 		 * set upper and lower limit
 		 */
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_UPPER, EDGE_UPPER_MASK,
 				      priv->info->edgectrl.upper);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
 
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      EDGE_LOWER, EDGE_LOWER_MASK,
 				      priv->info->edgectrl.lower);
 		if (ret < 0)
@@ -873,7 +881,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	/*
 	 * set size format
 	 */
-	ret = ov772x_write_array(priv->client, priv->win->regs);
+	ret = ov772x_write_array(client, priv->win->regs);
 	if (ret < 0)
 		goto ov772x_set_fmt_error;
 
@@ -882,7 +890,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	 */
 	val = priv->fmt->dsp3;
 	if (val) {
-		ret = ov772x_mask_set(priv->client,
+		ret = ov772x_mask_set(client,
 				      DSP_CTRL3, UV_MASK, val);
 		if (ret < 0)
 			goto ov772x_set_fmt_error;
@@ -901,7 +909,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	if (priv->flag_hflip)
 		val ^= HFLIP_IMG;
 
-	ret = ov772x_mask_set(priv->client,
+	ret = ov772x_mask_set(client,
 			      COM3, SWAP_MASK | IMG_MASK, val);
 	if (ret < 0)
 		goto ov772x_set_fmt_error;
@@ -910,7 +918,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 	 * set COM7
 	 */
 	val = priv->win->com7_bit | priv->fmt->com7;
-	ret = ov772x_mask_set(priv->client,
+	ret = ov772x_mask_set(client,
 			      COM7, (SLCT_MASK | FMT_MASK | OFMT_MASK),
 			      val);
 	if (ret < 0)
@@ -920,7 +928,7 @@ static int ov772x_set_params(struct ov772x_priv *priv, u32 width, u32 height,
 
 ov772x_set_fmt_error:
 
-	ov772x_reset(priv->client);
+	ov772x_reset(client);
 	priv->win = NULL;
 	priv->fmt = NULL;
 
@@ -930,22 +938,22 @@ ov772x_set_fmt_error:
 static int ov772x_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->fmt)
 		return -EINVAL;
 
-	return ov772x_set_params(priv, rect->width, rect->height,
+	return ov772x_set_params(icd, rect->width, rect->height,
 				 priv->fmt->fourcc);
 }
 
 static int ov772x_set_fmt(struct soc_camera_device *icd,
 			  struct v4l2_format *f)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	return ov772x_set_params(priv, pix->width, pix->height,
+	return ov772x_set_params(icd, pix->width, pix->height,
 				 pix->pixelformat);
 }
 
@@ -967,11 +975,13 @@ static int ov772x_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_video_probe(struct soc_camera_device *icd)
+static int ov772x_video_probe(struct soc_camera_device *icd,
+			      struct i2c_client *client)
 {
-	struct ov772x_priv *priv = container_of(icd, struct ov772x_priv, icd);
+	struct ov772x_priv *priv = i2c_get_clientdata(client);
 	u8                  pid, ver;
 	const char         *devname;
+	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -993,11 +1003,16 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 	icd->formats     = ov772x_fmt_lists;
 	icd->num_formats = ARRAY_SIZE(ov772x_fmt_lists);
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/*
 	 * check and show product ID and manufacturer ID
 	 */
-	pid = i2c_smbus_read_byte_data(priv->client, PID);
-	ver = i2c_smbus_read_byte_data(priv->client, VER);
+	pid = i2c_smbus_read_byte_data(client, PID);
+	ver = i2c_smbus_read_byte_data(client, VER);
 
 	switch (VERSION(pid, ver)) {
 	case OV7720:
@@ -1011,7 +1026,8 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
-		return -ENODEV;
+		ret = -ENODEV;
+		goto ever;
 	}
 
 	dev_info(&icd->dev,
@@ -1019,21 +1035,17 @@ static int ov772x_video_probe(struct soc_camera_device *icd)
 		 devname,
 		 pid,
 		 ver,
-		 i2c_smbus_read_byte_data(priv->client, MIDH),
-		 i2c_smbus_read_byte_data(priv->client, MIDL));
-
-	return soc_camera_video_start(icd);
-}
+		 i2c_smbus_read_byte_data(client, MIDH),
+		 i2c_smbus_read_byte_data(client, MIDL));
 
-static void ov772x_video_remove(struct soc_camera_device *icd)
-{
 	soc_camera_video_stop(icd);
+
+ever:
+	return ret;
 }
 
 static struct soc_camera_ops ov772x_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= ov772x_video_probe,
-	.remove			= ov772x_video_remove,
 	.init			= ov772x_init,
 	.release		= ov772x_release,
 	.start_capture		= ov772x_start_capture,
@@ -1059,19 +1071,25 @@ static struct soc_camera_ops ov772x_ops = {
  */
 
 static int ov772x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *did)
+			const struct i2c_device_id *did)
 {
 	struct ov772x_priv        *priv;
 	struct ov772x_camera_info *info;
-	struct soc_camera_device  *icd;
+	struct soc_camera_device  *icd = client->dev.platform_data;
 	struct i2c_adapter        *adapter = to_i2c_adapter(client->dev.parent);
+	struct soc_camera_link    *icl;
 	int                        ret;
 
-	if (!client->dev.platform_data)
+	if (!icd) {
+		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
 		return -EINVAL;
+	}
 
-	info = container_of(client->dev.platform_data,
-			    struct ov772x_camera_info, link);
+	icl = to_soc_camera_link(icd);
+	if (!icl)
+		return -EINVAL;
+
+	info = container_of(icl, struct ov772x_camera_info, link);
 
 	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
 		dev_err(&adapter->dev,
@@ -1085,19 +1103,15 @@ static int ov772x_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	priv->client = client;
 	i2c_set_clientdata(client, priv);
 
-	icd             = &priv->icd;
 	icd->ops        = &ov772x_ops;
-	icd->control    = &client->dev;
 	icd->width_max  = MAX_WIDTH;
 	icd->height_max = MAX_HEIGHT;
-	icd->iface      = priv->info->link.bus_id;
-
-	ret = soc_camera_device_register(icd);
 
+	ret = ov772x_video_probe(icd, client);
 	if (ret) {
+		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 	}
@@ -1108,8 +1122,9 @@ static int ov772x_probe(struct i2c_client *client,
 static int ov772x_remove(struct i2c_client *client)
 {
 	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
 	kfree(priv);
 	return 0;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 016bb45ba0c3..8b9b44d86837 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -830,7 +830,8 @@ static void pxa_camera_init_videobuf(struct videobuf_queue *q,
 				sizeof(struct pxa_buffer), icd);
 }
 
-static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
+static u32 mclk_get_divisor(struct platform_device *pdev,
+			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
 	u32 div;
@@ -842,7 +843,7 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	/* mclk <= ciclk / 4 (27.4.2) */
 	if (mclk > lcdclk / 4) {
 		mclk = lcdclk / 4;
-		dev_warn(pcdev->soc_host.dev, "Limiting master clock to %lu\n", mclk);
+		dev_warn(&pdev->dev, "Limiting master clock to %lu\n", mclk);
 	}
 
 	/* We verify mclk != 0, so if anyone breaks it, here comes their Oops */
@@ -852,8 +853,8 @@ static u32 mclk_get_divisor(struct pxa_camera_dev *pcdev)
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		pcdev->mclk = lcdclk / (2 * (div + 1));
 
-	dev_dbg(pcdev->soc_host.dev, "LCD clock %luHz, target freq %luHz, "
-		"divisor %u\n", lcdclk, mclk, div);
+	dev_dbg(&pdev->dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
+		lcdclk, mclk, div);
 
 	return div;
 }
@@ -958,15 +959,20 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 		goto ebusy;
 	}
 
-	dev_info(&icd->dev, "PXA Camera driver attached to camera %d\n",
-		 icd->devnum);
-
 	pxa_camera_activate(pcdev);
 	ret = icd->ops->init(icd);
+	if (ret < 0)
+		goto einit;
+
+	pcdev->icd = icd;
 
-	if (!ret)
-		pcdev->icd = icd;
+	dev_info(&icd->dev, "PXA Camera driver attached to camera %d\n",
+		 icd->devnum);
 
+	return 0;
+
+einit:
+	pxa_camera_deactivate(pcdev);
 ebusy:
 	return ret;
 }
@@ -1575,8 +1581,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 		pcdev->mclk = 20000000;
 	}
 
-	pcdev->soc_host.dev = &pdev->dev;
-	pcdev->mclk_divisor = mclk_get_divisor(pcdev);
+	pcdev->mclk_divisor = mclk_get_divisor(pdev, pcdev);
 
 	INIT_LIST_HEAD(&pcdev->capture);
 	spin_lock_init(&pcdev->lock);
@@ -1641,6 +1646,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= PXA_CAM_DRV_NAME;
 	pcdev->soc_host.ops		= &pxa_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
+	pcdev->soc_host.dev		= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 
 	err = soc_camera_host_register(&pcdev->soc_host);
@@ -1722,3 +1728,4 @@ module_exit(pxa_camera_exit);
 MODULE_DESCRIPTION("PXA27x SoC Camera Host driver");
 MODULE_AUTHOR("Guennadi Liakhovetski <kernel@pengutronix.de>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" PXA_CAM_DRV_NAME);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 61c47b824083..e7ac84daf670 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -356,11 +356,13 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 		 "SuperH Mobile CEU driver attached to camera %d\n",
 		 icd->devnum);
 
+	clk_enable(pcdev->clk);
+
 	ret = icd->ops->init(icd);
-	if (ret)
+	if (ret) {
+		clk_disable(pcdev->clk);
 		goto err;
-
-	pm_runtime_get_sync(ici->dev);
+	}
 
 	ceu_write(pcdev, CAPSR, 1 << 16); /* reset */
 	while (ceu_read(pcdev, CSTSR) & 1)
@@ -394,10 +396,10 @@ static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
 	}
 	spin_unlock_irqrestore(&pcdev->lock, flags);
 
-	pm_runtime_put_sync(ici->dev);
-
 	icd->ops->release(icd);
 
+	clk_disable(pcdev->clk);
+
 	dev_info(&icd->dev,
 		 "SuperH Mobile CEU driver detached from camera %d\n",
 		 icd->devnum);
@@ -946,3 +948,4 @@ module_exit(sh_mobile_ceu_exit);
 MODULE_DESCRIPTION("SuperH Mobile CEU driver");
 MODULE_AUTHOR("Magnus Damm");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:sh_mobile_ceu");
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 0340754e5406..20ef5c773fae 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -21,15 +21,15 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/vmalloc.h>
 
 #include <media/soc_camera.h>
 #include <media/v4l2-common.h>
-#include <media/v4l2-dev.h>
 #include <media/v4l2-ioctl.h>
+#include <media/v4l2-dev.h>
 #include <media/videobuf-core.h>
 
 /* Default to VGA resolution */
@@ -38,7 +38,7 @@
 
 static LIST_HEAD(hosts);
 static LIST_HEAD(devices);
-static DEFINE_MUTEX(list_lock);
+static DEFINE_MUTEX(list_lock);		/* Protects the list of hosts */
 
 const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc)
@@ -209,6 +209,7 @@ static int soc_camera_dqbuf(struct file *file, void *priv,
 	return videobuf_dqbuf(&icf->vb_vidq, p, file->f_flags & O_NONBLOCK);
 }
 
+/* Always entered with .video_lock held */
 static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
@@ -257,9 +258,12 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 	return 0;
 }
 
+/* Always entered with .video_lock held */
 static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 {
+	icd->current_fmt = NULL;
 	vfree(icd->user_formats);
+	icd->user_formats = NULL;
 }
 
 /* Called with .vb_lock held */
@@ -310,10 +314,6 @@ static int soc_camera_open(struct file *file)
 	struct soc_camera_file *icf;
 	int ret;
 
-	icf = vmalloc(sizeof(*icf));
-	if (!icf)
-		return -ENOMEM;
-
 	/*
 	 * It is safe to dereference these pointers now as long as a user has
 	 * the video device open - we are protected by the held cdev reference.
@@ -321,8 +321,17 @@ static int soc_camera_open(struct file *file)
 
 	vdev = video_devdata(file);
 	icd = container_of(vdev->parent, struct soc_camera_device, dev);
+
+	if (!icd->ops)
+		/* No device driver attached */
+		return -ENODEV;
+
 	ici = to_soc_camera_host(icd->dev.parent);
 
+	icf = vmalloc(sizeof(*icf));
+	if (!icf)
+		return -ENOMEM;
+
 	if (!try_module_get(icd->ops->owner)) {
 		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
 		ret = -EINVAL;
@@ -335,7 +344,7 @@ static int soc_camera_open(struct file *file)
 		goto emgi;
 	}
 
-	/* Protect against icd->remove() until we module_get() both drivers. */
+	/* Protect against icd->ops->remove() until we module_get() both drivers. */
 	mutex_lock(&icd->video_lock);
 
 	icf->icd = icd;
@@ -350,11 +359,18 @@ static int soc_camera_open(struct file *file)
 				.width		= icd->width,
 				.height		= icd->height,
 				.field		= icd->field,
-				.pixelformat	= icd->current_fmt->fourcc,
-				.colorspace	= icd->current_fmt->colorspace,
 			},
 		};
 
+		ret = soc_camera_init_user_formats(icd);
+		if (ret < 0)
+			goto eiufmt;
+
+		dev_dbg(&icd->dev, "Using fmt %x\n", icd->current_fmt->fourcc);
+
+		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
+		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
+
 		ret = ici->ops->add(icd);
 		if (ret < 0) {
 			dev_err(&icd->dev, "Couldn't activate the camera: %d\n", ret);
@@ -383,6 +399,8 @@ static int soc_camera_open(struct file *file)
 esfmt:
 	ici->ops->remove(icd);
 eiciadd:
+	soc_camera_free_user_formats(icd);
+eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
@@ -402,8 +420,10 @@ static int soc_camera_close(struct file *file)
 
 	mutex_lock(&icd->video_lock);
 	icd->use_count--;
-	if (!icd->use_count)
+	if (!icd->use_count) {
 		ici->ops->remove(icd);
+		soc_camera_free_user_formats(icd);
+	}
 
 	mutex_unlock(&icd->video_lock);
 
@@ -764,29 +784,6 @@ static int soc_camera_s_register(struct file *file, void *fh,
 }
 #endif
 
-static int device_register_link(struct soc_camera_device *icd)
-{
-	int ret = dev_set_name(&icd->dev, "%u-%u", icd->iface, icd->devnum);
-
-	if (!ret)
-		ret = device_register(&icd->dev);
-
-	if (ret < 0) {
-		/* Prevent calling device_unregister() */
-		icd->dev.parent = NULL;
-		dev_err(&icd->dev, "Cannot register device: %d\n", ret);
-	/* Even if probe() was unsuccessful for all registered drivers,
-	 * device_register() returns 0, and we add the link, just to
-	 * document this camera's control device */
-	} else if (icd->control)
-		/* Have to sysfs_remove_link() before device_unregister()? */
-		if (sysfs_create_link(&icd->dev.kobj, &icd->control->kobj,
-				      "control"))
-			dev_warn(&icd->dev,
-				 "Failed creating the control symlink\n");
-	return ret;
-}
-
 /* So far this function cannot fail */
 static void scan_add_host(struct soc_camera_host *ici)
 {
@@ -796,106 +793,124 @@ static void scan_add_host(struct soc_camera_host *ici)
 
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->iface == ici->nr) {
+			int ret;
 			icd->dev.parent = ici->dev;
-			device_register_link(icd);
+			dev_set_name(&icd->dev, "%u-%u", icd->iface,
+				     icd->devnum);
+			ret = device_register(&icd->dev);
+			if (ret < 0) {
+				icd->dev.parent = NULL;
+				dev_err(&icd->dev,
+					"Cannot register device: %d\n", ret);
+			}
 		}
 	}
 
 	mutex_unlock(&list_lock);
 }
 
-/* return: 0 if no match found or a match found and
- * device_register() successful, error code otherwise */
-static int scan_add_device(struct soc_camera_device *icd)
+#ifdef CONFIG_I2C_BOARDINFO
+static int soc_camera_init_i2c(struct soc_camera_device *icd,
+			       struct soc_camera_link *icl)
 {
-	struct soc_camera_host *ici;
-	int ret = 0;
+	struct i2c_client *client;
+	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	int ret;
 
-	mutex_lock(&list_lock);
+	if (!adap) {
+		ret = -ENODEV;
+		dev_err(&icd->dev, "Cannot get I2C adapter #%d. No driver?\n",
+			icl->i2c_adapter_id);
+		goto ei2cga;
+	}
 
-	list_add_tail(&icd->list, &devices);
+	icl->board_info->platform_data = icd;
 
-	/* Watch out for class_for_each_device / class_find_device API by
-	 * Dave Young <hidave.darkstar@gmail.com> */
-	list_for_each_entry(ici, &hosts, list) {
-		if (icd->iface == ici->nr) {
-			ret = 1;
-			icd->dev.parent = ici->dev;
-			break;
-		}
+	client = i2c_new_device(adap, icl->board_info);
+	if (!client) {
+		ret = -ENOMEM;
+		goto ei2cnd;
 	}
 
-	mutex_unlock(&list_lock);
-
-	if (ret)
-		ret = device_register_link(icd);
+	/*
+	 * We set icd drvdata at two locations - here and in
+	 * soc_camera_video_start(). Depending on the module loading /
+	 * initialisation order one of these locations will be entered first
+	 */
+	/* Use to_i2c_client(dev) to recover the i2c client */
+	dev_set_drvdata(&icd->dev, &client->dev);
 
+	return 0;
+ei2cnd:
+	i2c_put_adapter(adap);
+ei2cga:
 	return ret;
 }
 
+static void soc_camera_free_i2c(struct soc_camera_device *icd)
+{
+	struct i2c_client *client =
+		to_i2c_client(to_soc_camera_control(icd));
+	dev_set_drvdata(&icd->dev, NULL);
+	i2c_unregister_device(client);
+	i2c_put_adapter(client->adapter);
+}
+#else
+#define soc_camera_init_i2c(icd, icl)	(-ENODEV)
+#define soc_camera_free_i2c(icd)	do {} while (0)
+#endif
+
+static int video_dev_create(struct soc_camera_device *icd);
+/* Called during host-driver probe */
 static int soc_camera_probe(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	/*
-	 * Possible race scenario:
-	 * modprobe <camera-host-driver> triggers __func__
-	 * at this moment respective <camera-sensor-driver> gets rmmod'ed
-	 * to protect take module references.
-	 */
+	dev_info(dev, "Probing %s\n", dev_name(dev));
 
-	if (!try_module_get(icd->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
-		ret = -EINVAL;
-		goto emgd;
-	}
+	ret = video_dev_create(icd);
+	if (ret < 0)
+		goto evdc;
 
-	if (!try_module_get(ici->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
+	/* Non-i2c cameras, e.g., soc_camera_platform, have no board_info */
+	if (icl->board_info) {
+		ret = soc_camera_init_i2c(icd, icl);
+		if (ret < 0)
+			goto eadddev;
+	} else if (!icl->add_device || !icl->del_device) {
 		ret = -EINVAL;
-		goto emgi;
+		goto eadddev;
+	} else {
+		ret = icl->add_device(icl, &icd->dev);
+		if (ret < 0)
+			goto eadddev;
 	}
 
-	mutex_lock(&icd->video_lock);
-
-	/* We only call ->add() here to activate and probe the camera.
-	 * We shall ->remove() and deactivate it immediately afterwards. */
-	ret = ici->ops->add(icd);
-	if (ret < 0)
-		goto eiadd;
-
-	ret = icd->ops->probe(icd);
-	if (ret >= 0) {
-		const struct v4l2_queryctrl *qctrl;
+	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER, icd->vdev->minor);
+	if (ret < 0) {
+		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
+		goto evidregd;
+	}
 
-		qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
-		icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
-		qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-		icd->exposure = qctrl ? qctrl->default_value :
-			(unsigned short)~0;
+	/* Do we have to sysfs_remove_link() before device_unregister()? */
+	if (to_soc_camera_control(icd) &&
+	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
+			      "control"))
+		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
-		ret = soc_camera_init_user_formats(icd);
-		if (ret < 0) {
-			if (icd->ops->remove)
-				icd->ops->remove(icd);
-			goto eiufmt;
-		}
 
-		icd->height	= DEFAULT_HEIGHT;
-		icd->width	= DEFAULT_WIDTH;
-		icd->field	= V4L2_FIELD_ANY;
-	}
+	return 0;
 
-eiufmt:
-	ici->ops->remove(icd);
-eiadd:
-	mutex_unlock(&icd->video_lock);
-	module_put(ici->ops->owner);
-emgi:
-	module_put(icd->ops->owner);
-emgd:
+evidregd:
+	if (icl->board_info)
+		soc_camera_free_i2c(icd);
+	else
+		icl->del_device(icl);
+eadddev:
+	video_device_release(icd->vdev);
+evdc:
 	return ret;
 }
 
@@ -904,13 +919,22 @@ emgd:
 static int soc_camera_remove(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct video_device *vdev = icd->vdev;
 
-	mutex_lock(&icd->video_lock);
-	if (icd->ops->remove)
-		icd->ops->remove(icd);
-	mutex_unlock(&icd->video_lock);
+	BUG_ON(!dev->parent);
 
-	soc_camera_free_user_formats(icd);
+	if (vdev) {
+		mutex_lock(&icd->video_lock);
+		video_unregister_device(vdev);
+		icd->vdev = NULL;
+		mutex_unlock(&icd->video_lock);
+	}
+
+	if (icl->board_info)
+		soc_camera_free_i2c(icd);
+	else
+		icl->del_device(icl);
 
 	return 0;
 }
@@ -1005,10 +1029,14 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->dev.parent == ici->dev) {
+			/* The bus->remove will be called */
 			device_unregister(&icd->dev);
 			/* Not before device_unregister(), .remove
 			 * needs parent to call ici->ops->remove() */
 			icd->dev.parent = NULL;
+
+			/* If the host module is loaded again, device_register()
+			 * would complain "already initialised" */
 			memset(&icd->dev.kobj, 0, sizeof(icd->dev.kobj));
 		}
 	}
@@ -1020,26 +1048,14 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 EXPORT_SYMBOL(soc_camera_host_unregister);
 
 /* Image capture device */
-int soc_camera_device_register(struct soc_camera_device *icd)
+static int soc_camera_device_register(struct soc_camera_device *icd)
 {
 	struct soc_camera_device *ix;
 	int num = -1, i;
 
-	if (!icd || !icd->ops ||
-	    !icd->ops->probe ||
-	    !icd->ops->init ||
-	    !icd->ops->release ||
-	    !icd->ops->start_capture ||
-	    !icd->ops->stop_capture ||
-	    !icd->ops->set_crop ||
-	    !icd->ops->set_fmt ||
-	    !icd->ops->try_fmt ||
-	    !icd->ops->query_bus_param ||
-	    !icd->ops->set_bus_param)
-		return -EINVAL;
-
 	for (i = 0; i < 256 && num < 0; i++) {
 		num = i;
+		/* Check if this index is available on this interface */
 		list_for_each_entry(ix, &devices, list) {
 			if (ix->iface == icd->iface && ix->devnum == i) {
 				num = -1;
@@ -1061,21 +1077,15 @@ int soc_camera_device_register(struct soc_camera_device *icd)
 	icd->host_priv		= NULL;
 	mutex_init(&icd->video_lock);
 
-	return scan_add_device(icd);
+	list_add_tail(&icd->list, &devices);
+
+	return 0;
 }
-EXPORT_SYMBOL(soc_camera_device_register);
 
-void soc_camera_device_unregister(struct soc_camera_device *icd)
+static void soc_camera_device_unregister(struct soc_camera_device *icd)
 {
-	mutex_lock(&list_lock);
 	list_del(&icd->list);
-
-	/* The bus->remove will be eventually called */
-	if (icd->dev.parent)
-		device_unregister(&icd->dev);
-	mutex_unlock(&list_lock);
 }
-EXPORT_SYMBOL(soc_camera_device_unregister);
 
 static const struct v4l2_ioctl_ops soc_camera_ioctl_ops = {
 	.vidioc_querycap	 = soc_camera_querycap,
@@ -1106,22 +1116,13 @@ static const struct v4l2_ioctl_ops soc_camera_ioctl_ops = {
 #endif
 };
 
-/*
- * Usually called from the struct soc_camera_ops .probe() method, i.e., from
- * soc_camera_probe() above with .video_lock held
- */
-int soc_camera_video_start(struct soc_camera_device *icd)
+static int video_dev_create(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	int err = -ENOMEM;
-	struct video_device *vdev;
+	struct video_device *vdev = video_device_alloc();
 
-	if (!icd->dev.parent)
-		return -ENODEV;
-
-	vdev = video_device_alloc();
 	if (!vdev)
-		goto evidallocd;
+		return -ENOMEM;
 	dev_dbg(ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
@@ -1132,118 +1133,110 @@ int soc_camera_video_start(struct soc_camera_device *icd)
 	vdev->ioctl_ops		= &soc_camera_ioctl_ops;
 	vdev->release		= video_device_release;
 	vdev->minor		= -1;
-	vdev->tvnorms		= V4L2_STD_UNKNOWN,
+	vdev->tvnorms		= V4L2_STD_UNKNOWN;
 
-	err = video_register_device(vdev, VFL_TYPE_GRABBER, vdev->minor);
-	if (err < 0) {
-		dev_err(vdev->parent, "video_register_device failed\n");
-		goto evidregd;
-	}
 	icd->vdev = vdev;
 
 	return 0;
+}
 
-evidregd:
-	video_device_release(vdev);
-evidallocd:
-	return err;
+/*
+ * Usually called from the struct soc_camera_ops .probe() method, i.e., from
+ * soc_camera_probe() above with .video_lock held
+ */
+int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	const struct v4l2_queryctrl *qctrl;
+
+	if (!icd->dev.parent)
+		return -ENODEV;
+
+	if (!icd->ops ||
+	    !icd->ops->init ||
+	    !icd->ops->release ||
+	    !icd->ops->start_capture ||
+	    !icd->ops->stop_capture ||
+	    !icd->ops->set_fmt ||
+	    !icd->ops->try_fmt ||
+	    !icd->ops->query_bus_param ||
+	    !icd->ops->set_bus_param)
+		return -EINVAL;
+
+	/* See comment in soc_camera_probe() */
+	dev_set_drvdata(&icd->dev, dev);
+
+	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
+	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
+	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
+	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
+
+	return ici->ops->add(icd);
 }
 EXPORT_SYMBOL(soc_camera_video_start);
 
 /* Called from client .remove() methods with .video_lock held */
 void soc_camera_video_stop(struct soc_camera_device *icd)
 {
-	struct video_device *vdev = icd->vdev;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	dev_dbg(&icd->dev, "%s\n", __func__);
 
-	if (!icd->dev.parent || !vdev)
-		return;
-
-	video_unregister_device(vdev);
-	icd->vdev = NULL;
+	ici->ops->remove(icd);
 }
 EXPORT_SYMBOL(soc_camera_video_stop);
 
-#ifdef CONFIG_I2C_BOARDINFO
-static int soc_camera_init_i2c(struct platform_device *pdev,
-			       struct soc_camera_link *icl)
+static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 {
-	struct i2c_client *client;
-	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	struct soc_camera_link *icl = pdev->dev.platform_data;
+	struct soc_camera_device *icd;
 	int ret;
 
-	if (!adap) {
-		ret = -ENODEV;
-		dev_err(&pdev->dev, "Cannot get adapter #%d. No driver?\n",
-			icl->i2c_adapter_id);
-		goto ei2cga;
-	}
+	if (!icl)
+		return -EINVAL;
 
-	icl->board_info->platform_data = icl;
-	client = i2c_new_device(adap, icl->board_info);
-	if (!client) {
-		ret = -ENOMEM;
-		goto ei2cnd;
-	}
+	icd = kzalloc(sizeof(*icd), GFP_KERNEL);
+	if (!icd)
+		return -ENOMEM;
 
-	platform_set_drvdata(pdev, client);
+	icd->iface = icl->bus_id;
+	platform_set_drvdata(pdev, icd);
+	icd->dev.platform_data = icl;
 
-	return 0;
-ei2cnd:
-	i2c_put_adapter(adap);
-ei2cga:
-	return ret;
-}
+	ret = soc_camera_device_register(icd);
+	if (ret < 0)
+		goto escdevreg;
 
-static void soc_camera_free_i2c(struct platform_device *pdev)
-{
-	struct i2c_client *client = platform_get_drvdata(pdev);
+	return 0;
 
-	if (!client)
-		return;
+escdevreg:
+	kfree(icd);
 
-	i2c_unregister_device(client);
-	i2c_put_adapter(client->adapter);
+	return ret;
 }
-#else
-#define soc_camera_init_i2c(d, icl)	(-ENODEV)
-#define soc_camera_free_i2c(d)		do {} while (0)
-#endif
 
-static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
+/* Only called on rmmod for each platform device, since they are not
+ * hot-pluggable. Now we know, that all our users - hosts and devices have
+ * been unloaded already */
+static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
 {
-	struct soc_camera_link *icl = pdev->dev.platform_data;
+	struct soc_camera_device *icd = platform_get_drvdata(pdev);
 
-	if (!icl)
+	if (!icd)
 		return -EINVAL;
 
-	if (icl->board_info)
-		return soc_camera_init_i2c(pdev, icl);
-	else if (!icl->add_device || !icl->del_device)
-		return -EINVAL;
+	soc_camera_device_unregister(icd);
 
-	/* &pdev->dev will become &icd->dev */
-	return icl->add_device(icl, &pdev->dev);
-}
+	kfree(icd);
 
-static int __devexit soc_camera_pdrv_remove(struct platform_device *pdev)
-{
-	struct soc_camera_link *icl = pdev->dev.platform_data;
-
-	if (icl->board_info)
-		soc_camera_free_i2c(pdev);
-	else
-		icl->del_device(icl);
 	return 0;
 }
 
 static struct platform_driver __refdata soc_camera_pdrv = {
-	.probe	= soc_camera_pdrv_probe,
-	.remove	= __devexit_p(soc_camera_pdrv_remove),
-	.driver	= {
-		.name = "soc-camera-pdrv",
-		.owner = THIS_MODULE,
+	.remove  = __devexit_p(soc_camera_pdrv_remove),
+	.driver  = {
+		.name	= "soc-camera-pdrv",
+		.owner	= THIS_MODULE,
 	},
 };
 
@@ -1256,7 +1249,7 @@ static int __init soc_camera_init(void)
 	if (ret)
 		goto edrvr;
 
-	ret = platform_driver_register(&soc_camera_pdrv);
+	ret = platform_driver_probe(&soc_camera_pdrv, soc_camera_pdrv_probe);
 	if (ret)
 		goto epdr;
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index c48676356ab7..d84c134f8d59 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -21,35 +21,32 @@
 #include <media/soc_camera_platform.h>
 
 struct soc_camera_platform_priv {
-	struct soc_camera_platform_info *info;
-	struct soc_camera_device icd;
 	struct soc_camera_data_format format;
 };
 
 static struct soc_camera_platform_info *
 soc_camera_platform_get_info(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_priv *priv;
-	priv = container_of(icd, struct soc_camera_platform_priv, icd);
-	return priv->info;
+	struct platform_device *pdev = to_platform_device(dev_get_drvdata(&icd->dev));
+	return pdev->dev.platform_data;
 }
 
 static int soc_camera_platform_init(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	if (p->power)
-		p->power(1);
+	if (icl->power)
+		icl->power(dev_get_drvdata(&icd->dev), 1);
 
 	return 0;
 }
 
 static int soc_camera_platform_release(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	if (p->power)
-		p->power(0);
+	if (icl->power)
+		icl->power(dev_get_drvdata(&icd->dev), 0);
 
 	return 0;
 }
@@ -102,31 +99,29 @@ static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_video_probe(struct soc_camera_device *icd)
+static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
+					   struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv;
-	priv = container_of(icd, struct soc_camera_platform_priv, icd);
+	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
+	int ret;
 
-	priv->format.name = priv->info->format_name;
-	priv->format.depth = priv->info->format_depth;
-	priv->format.fourcc = priv->info->format.pixelformat;
-	priv->format.colorspace = priv->info->format.colorspace;
+	priv->format.name = p->format_name;
+	priv->format.depth = p->format_depth;
+	priv->format.fourcc = p->format.pixelformat;
+	priv->format.colorspace = p->format.colorspace;
 
 	icd->formats = &priv->format;
 	icd->num_formats = 1;
 
-	return soc_camera_video_start(icd);
-}
-
-static void soc_camera_platform_video_remove(struct soc_camera_device *icd)
-{
+	/* ..._video_start() does dev_set_drvdata(&icd->dev, &pdev->dev) */
+	ret = soc_camera_video_start(icd, &pdev->dev);
 	soc_camera_video_stop(icd);
+	return ret;
 }
 
 static struct soc_camera_ops soc_camera_platform_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= soc_camera_platform_video_probe,
-	.remove			= soc_camera_platform_video_remove,
 	.init			= soc_camera_platform_init,
 	.release		= soc_camera_platform_release,
 	.start_capture		= soc_camera_platform_start_capture,
@@ -141,11 +136,10 @@ static struct soc_camera_ops soc_camera_platform_ops = {
 static int soc_camera_platform_probe(struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv;
-	struct soc_camera_platform_info *p;
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd;
 	int ret;
 
-	p = pdev->dev.platform_data;
 	if (!p)
 		return -EINVAL;
 
@@ -153,31 +147,40 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->info = p;
 	platform_set_drvdata(pdev, priv);
 
-	icd = &priv->icd;
+	icd = to_soc_camera_dev(p->dev);
+	if (!icd)
+		goto enoicd;
+
 	icd->ops	= &soc_camera_platform_ops;
-	icd->control	= &pdev->dev;
+	dev_set_drvdata(&icd->dev, &pdev->dev);
 	icd->width_min	= 0;
-	icd->width_max	= priv->info->format.width;
+	icd->width_max	= p->format.width;
 	icd->height_min	= 0;
-	icd->height_max	= priv->info->format.height;
+	icd->height_max	= p->format.height;
 	icd->y_skip_top	= 0;
-	icd->iface	= priv->info->iface;
 
-	ret = soc_camera_device_register(icd);
-	if (ret)
+	ret = soc_camera_platform_video_probe(icd, pdev);
+	if (ret) {
+		icd->ops = NULL;
 		kfree(priv);
+	}
 
 	return ret;
+
+enoicd:
+	kfree(priv);
+	return -EINVAL;
 }
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_info *p = pdev->dev.platform_data;
+	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	kfree(priv);
 	return 0;
 }
@@ -206,3 +209,4 @@ module_exit(soc_camera_platform_module_exit);
 MODULE_DESCRIPTION("SoC Camera Platform driver");
 MODULE_AUTHOR("Magnus Damm");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:soc_camera_platform");
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index aa5065ea09ed..d780a509faa9 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -224,8 +224,6 @@ struct tw9910_hsync_ctrl {
 
 struct tw9910_priv {
 	struct tw9910_video_info       *info;
-	struct i2c_client              *client;
-	struct soc_camera_device        icd;
 	const struct tw9910_scale_ctrl *scale;
 };
 
@@ -511,35 +509,38 @@ tw9910_select_norm(struct soc_camera_device *icd, u32 width, u32 height)
  */
 static int tw9910_init(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power) {
-		ret = priv->info->link.power(&priv->client->dev, 1);
+	if (icl->power) {
+		ret = icl->power(&client->dev, 1);
 		if (ret < 0)
 			return ret;
 	}
 
-	if (priv->info->link.reset)
-		ret = priv->info->link.reset(&priv->client->dev);
+	if (icl->reset)
+		ret = icl->reset(&client->dev);
 
 	return ret;
 }
 
 static int tw9910_release(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret = 0;
 
-	if (priv->info->link.power)
-		ret = priv->info->link.power(&priv->client->dev, 0);
+	if (icl->power)
+		ret = icl->power(&client->dev, 0);
 
 	return ret;
 }
 
 static int tw9910_start_capture(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 
 	if (!priv->scale) {
 		dev_err(&icd->dev, "norm select error\n");
@@ -567,8 +568,9 @@ static int tw9910_set_bus_param(struct soc_camera_device *icd,
 
 static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
-	struct soc_camera_link *icl = priv->client->dev.platform_data;
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
 		SOCAM_DATA_ACTIVE_HIGH | priv->info->buswidth;
@@ -610,13 +612,13 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 static int tw9910_get_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	if (reg->reg > 0xff)
 		return -EINVAL;
 
-	ret = i2c_smbus_read_byte_data(priv->client, reg->reg);
+	ret = i2c_smbus_read_byte_data(client, reg->reg);
 	if (ret < 0)
 		return ret;
 
@@ -631,20 +633,21 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 static int tw9910_set_register(struct soc_camera_device *icd,
 			       struct v4l2_dbg_register *reg)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
 		return -EINVAL;
 
-	return i2c_smbus_write_byte_data(priv->client, reg->reg, reg->val);
+	return i2c_smbus_write_byte_data(client, reg->reg, reg->val);
 }
 #endif
 
 static int tw9910_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -658,8 +661,8 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 	/*
 	 * reset hardware
 	 */
-	tw9910_reset(priv->client);
-	ret = tw9910_write_array(priv->client, tw9910_default_regs);
+	tw9910_reset(client);
+	ret = tw9910_write_array(client, tw9910_default_regs);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -670,7 +673,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 	if (SOCAM_DATAWIDTH_16 == priv->info->buswidth)
 		val = LEN;
 
-	ret = tw9910_mask_set(priv->client, OPFORM, LEN, val);
+	ret = tw9910_mask_set(client, OPFORM, LEN, val);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -698,28 +701,28 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 		val = 0;
 	}
 
-	ret = tw9910_mask_set(priv->client, VBICNTL, RTSEL_MASK, val);
+	ret = tw9910_mask_set(client, VBICNTL, RTSEL_MASK, val);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set scale
 	 */
-	ret = tw9910_set_scale(priv->client, priv->scale);
+	ret = tw9910_set_scale(client, priv->scale);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set cropping
 	 */
-	ret = tw9910_set_cropping(priv->client, &tw9910_cropping_ctrl);
+	ret = tw9910_set_cropping(client, &tw9910_cropping_ctrl);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
 	/*
 	 * set hsync
 	 */
-	ret = tw9910_set_hsync(priv->client, &tw9910_hsync_ctrl);
+	ret = tw9910_set_hsync(client, &tw9910_hsync_ctrl);
 	if (ret < 0)
 		goto tw9910_set_fmt_error;
 
@@ -727,7 +730,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 
 tw9910_set_fmt_error:
 
-	tw9910_reset(priv->client);
+	tw9910_reset(client);
 	priv->scale = NULL;
 
 	return ret;
@@ -784,9 +787,10 @@ static int tw9910_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int tw9910_video_probe(struct soc_camera_device *icd)
+static int tw9910_video_probe(struct soc_camera_device *icd,
+			      struct i2c_client *client)
 {
-	struct tw9910_priv *priv = container_of(icd, struct tw9910_priv, icd);
+	struct tw9910_priv *priv = i2c_get_clientdata(client);
 	s32 val;
 	int ret;
 
@@ -810,10 +814,18 @@ static int tw9910_video_probe(struct soc_camera_device *icd)
 	icd->formats     = tw9910_color_fmt;
 	icd->num_formats = ARRAY_SIZE(tw9910_color_fmt);
 
+	/* Switch master clock on */
+	ret = soc_camera_video_start(icd, &client->dev);
+	if (ret)
+		return ret;
+
 	/*
 	 * check and show Product ID
 	 */
-	val = i2c_smbus_read_byte_data(priv->client, ID);
+	val = i2c_smbus_read_byte_data(client, ID);
+
+	soc_camera_video_stop(icd);
+
 	if (0x0B != GET_ID(val) ||
 	    0x00 != GET_ReV(val)) {
 		dev_err(&icd->dev,
@@ -824,25 +836,14 @@ static int tw9910_video_probe(struct soc_camera_device *icd)
 	dev_info(&icd->dev,
 		 "tw9910 Product ID %0x:%0x\n", GET_ID(val), GET_ReV(val));
 
-	ret = soc_camera_video_start(icd);
-	if (ret < 0)
-		return ret;
-
 	icd->vdev->tvnorms      = V4L2_STD_NTSC | V4L2_STD_PAL;
 	icd->vdev->current_norm = V4L2_STD_NTSC;
 
 	return ret;
 }
 
-static void tw9910_video_remove(struct soc_camera_device *icd)
-{
-	soc_camera_video_stop(icd);
-}
-
 static struct soc_camera_ops tw9910_ops = {
 	.owner			= THIS_MODULE,
-	.probe			= tw9910_video_probe,
-	.remove			= tw9910_video_remove,
 	.init			= tw9910_init,
 	.release		= tw9910_release,
 	.start_capture		= tw9910_start_capture,
@@ -871,18 +872,25 @@ static int tw9910_probe(struct i2c_client *client,
 {
 	struct tw9910_priv             *priv;
 	struct tw9910_video_info       *info;
-	struct soc_camera_device       *icd;
+	struct soc_camera_device       *icd = client->dev.platform_data;
+	struct i2c_adapter             *adapter =
+		to_i2c_adapter(client->dev.parent);
+	struct soc_camera_link         *icl;
 	const struct tw9910_scale_ctrl *scale;
 	int                             i, ret;
 
-	if (!client->dev.platform_data)
+	if (!icd) {
+		dev_err(&client->dev, "TW9910: missing soc-camera data!\n");
 		return -EINVAL;
+	}
 
-	info = container_of(client->dev.platform_data,
-			    struct tw9910_video_info, link);
+	icl = to_soc_camera_link(icd);
+	if (!icl)
+		return -EINVAL;
 
-	if (!i2c_check_functionality(to_i2c_adapter(client->dev.parent),
-				     I2C_FUNC_SMBUS_BYTE_DATA)) {
+	info = container_of(icl, struct tw9910_video_info, link);
+
+	if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) {
 		dev_err(&client->dev,
 			"I2C-Adapter doesn't support "
 			"I2C_FUNC_SMBUS_BYTE_DATA\n");
@@ -894,12 +902,9 @@ static int tw9910_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	priv->client = client;
 	i2c_set_clientdata(client, priv);
 
-	icd          = &priv->icd;
 	icd->ops     = &tw9910_ops;
-	icd->control = &client->dev;
 	icd->iface   = info->link.bus_id;
 
 	/*
@@ -925,9 +930,9 @@ static int tw9910_probe(struct i2c_client *client,
 		icd->height_min = min(scale[i].height, icd->height_min);
 	}
 
-	ret = soc_camera_device_register(icd);
-
+	ret = tw9910_video_probe(icd, client);
 	if (ret) {
+		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
 		kfree(priv);
 	}
@@ -938,8 +943,9 @@ static int tw9910_probe(struct i2c_client *client,
 static int tw9910_remove(struct i2c_client *client)
 {
 	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
-	soc_camera_device_unregister(&priv->icd);
+	icd->ops = NULL;
 	i2c_set_clientdata(client, NULL);
 	kfree(priv);
 	return 0;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 813e12061daa..d8b4256126a4 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -20,7 +20,6 @@
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
-	struct device *control;
 	unsigned short width;		/* Current window */
 	unsigned short height;		/* sizes */
 	unsigned short x_min;		/* Camera capabilities */
@@ -131,17 +130,25 @@ static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
 	return dev_get_drvdata(dev);
 }
 
-extern int soc_camera_host_register(struct soc_camera_host *ici);
-extern void soc_camera_host_unregister(struct soc_camera_host *ici);
-extern int soc_camera_device_register(struct soc_camera_device *icd);
-extern void soc_camera_device_unregister(struct soc_camera_device *icd);
+static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
+{
+	return icd->dev.platform_data;
+}
 
-extern int soc_camera_video_start(struct soc_camera_device *icd);
-extern void soc_camera_video_stop(struct soc_camera_device *icd);
+static inline struct device *to_soc_camera_control(struct soc_camera_device *icd)
+{
+	return dev_get_drvdata(&icd->dev);
+}
 
-extern const struct soc_camera_data_format *soc_camera_format_by_fourcc(
+int soc_camera_host_register(struct soc_camera_host *ici);
+void soc_camera_host_unregister(struct soc_camera_host *ici);
+
+int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev);
+void soc_camera_video_stop(struct soc_camera_device *icd);
+
+const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
-extern const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
+const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
 
 struct soc_camera_data_format {
@@ -170,8 +177,6 @@ struct soc_camera_format_xlate {
 
 struct soc_camera_ops {
 	struct module *owner;
-	int (*probe)(struct soc_camera_device *);
-	void (*remove)(struct soc_camera_device *);
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index 3e8f020abf48..b144f947f1cb 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -18,11 +18,10 @@ struct device;
 
 struct soc_camera_platform_info {
 	int iface;
-	char *format_name;
+	const char *format_name;
 	unsigned long format_depth;
 	struct v4l2_pix_format format;
 	unsigned long bus_param;
-	void (*power)(int);
 	struct device *dev;
 	int (*set_capture)(struct soc_camera_platform_info *info, int enable);
 	struct soc_camera_link link;
-- 
cgit v1.2.3


From dd4f0ad4b027078b0642d99a2d30c9c93a5e38ac Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:34:17 -0300
Subject: V4L/DVB (12508): soc-camera: remove unused .iface from struct
 soc_camera_platform_info

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/media/soc_camera_platform.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/media/soc_camera_platform.h b/include/media/soc_camera_platform.h
index b144f947f1cb..bb70401b8141 100644
--- a/include/media/soc_camera_platform.h
+++ b/include/media/soc_camera_platform.h
@@ -17,7 +17,6 @@
 struct device;
 
 struct soc_camera_platform_info {
-	int iface;
 	const char *format_name;
 	unsigned long format_depth;
 	struct v4l2_pix_format format;
-- 
cgit v1.2.3


From 979ea1ddf80ac7383acdea03471355ca62702539 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:43:33 -0300
Subject: V4L/DVB (12510): soc-camera: (partially) convert to v4l2-(sub)dev API

Convert the soc-camera framework to use the v4l2-(sub)dev API. Start using
v4l2-subdev operations. Only a part of the interface between the
soc_camera core, soc_camera host drivers on one side and soc_camera device
drivers on the other side is replaced so far. The rest of the interface
will be replaced in incremental steps, and will require extensions and,
possibly, modifications to the v4l2-subdev code.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 157 +++++++--------
 drivers/media/video/mt9m111.c              | 309 ++++++++++++-----------------
 drivers/media/video/mt9t031.c              | 173 ++++++++--------
 drivers/media/video/mt9v022.c              | 167 +++++++---------
 drivers/media/video/mx1_camera.c           |  31 ++-
 drivers/media/video/mx3_camera.c           |  54 ++---
 drivers/media/video/ov772x.c               | 169 +++++++---------
 drivers/media/video/pxa_camera.c           | 107 +++++-----
 drivers/media/video/sh_mobile_ceu_camera.c |  45 ++---
 drivers/media/video/soc_camera.c           | 250 +++++++++++++----------
 drivers/media/video/soc_camera_platform.c  | 115 +++++------
 drivers/media/video/tw9910.c               | 151 ++++++--------
 include/media/soc_camera.h                 |  23 +--
 13 files changed, 789 insertions(+), 962 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 1e4f269fc08b..2a73dac11d37 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -13,13 +13,13 @@
 #include <linux/i2c.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9m001 i2c address 0x5d
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define ctruct i2c_board_info objects and link to them
+ * from struct soc_camera_link */
 
 /* mt9m001 selected register addresses */
 #define MT9M001_CHIP_VERSION		0x00
@@ -69,10 +69,16 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 };
 
 struct mt9m001 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
 
+static struct mt9m001 *to_mt9m001(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9m001, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -110,32 +116,18 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 static int mt9m001_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s\n", __func__);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
-	/* The camera could have been already on, we reset it additionally */
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-	else
-		ret = -ENODEV;
+	/*
+	 * We don't know, whether platform provides reset,
+	 * issue a soft reset too
+	 */
+	ret = reg_write(client, MT9M001_RESET, 1);
+	if (!ret)
+		ret = reg_write(client, MT9M001_RESET, 0);
 
-	if (ret < 0) {
-		/* Either no platform reset, or platform reset failed */
-		ret = reg_write(client, MT9M001_RESET, 1);
-		if (!ret)
-			ret = reg_write(client, MT9M001_RESET, 0);
-	}
 	/* Disable chip, synchronous option update */
 	if (!ret)
 		ret = reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
@@ -146,33 +138,19 @@ static int mt9m001_init(struct soc_camera_device *icd)
 static int mt9m001_release(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
 	/* Disable the chip */
 	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
 	return 0;
 }
 
-static int mt9m001_start_capture(struct soc_camera_device *icd)
+static int mt9m001_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
-	/* Switch to master "normal" mode */
-	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 2) < 0)
-		return -EIO;
-	return 0;
-}
-
-static int mt9m001_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	/* Stop sensor readout */
-	if (reg_write(client, MT9M001_OUTPUT_CONTROL, 0) < 0)
+	/* Switch to master "normal" mode or stop sensor readout */
+	if (reg_write(client, MT9M001_OUTPUT_CONTROL, enable ? 2 : 0) < 0)
 		return -EIO;
 	return 0;
 }
@@ -220,7 +198,7 @@ static int mt9m001_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -257,9 +235,10 @@ static int mt9m001_set_crop(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m001_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
 		.top	= icd->y_current,
@@ -271,9 +250,10 @@ static int mt9m001_set_fmt(struct soc_camera_device *icd,
 	return mt9m001_set_crop(icd, &rect);
 }
 
-static int mt9m001_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	v4l_bound_align_image(&pix->width, 48, 1280, 1,
@@ -283,11 +263,11 @@ static int mt9m001_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m001_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9m001_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -302,10 +282,10 @@ static int mt9m001_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9m001_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m001_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -322,10 +302,10 @@ static int mt9m001_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m001_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m001_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -378,35 +358,20 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 	}
 };
 
-static int mt9m001_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9m001_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9m001_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
-	.start_capture		= mt9m001_start_capture,
-	.stop_capture		= mt9m001_stop_capture,
 	.set_crop		= mt9m001_set_crop,
-	.set_fmt		= mt9m001_set_fmt,
-	.try_fmt		= mt9m001_try_fmt,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
 	.num_controls		= ARRAY_SIZE(mt9m001_controls),
-	.get_control		= mt9m001_get_control,
-	.set_control		= mt9m001_set_control,
-	.get_chip_id		= mt9m001_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9m001_get_register,
-	.set_register		= mt9m001_set_register,
-#endif
 };
 
-static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9m001_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -423,10 +388,11 @@ static int mt9m001_get_control(struct soc_camera_device *icd, struct v4l2_contro
 	return 0;
 }
 
-static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -521,10 +487,9 @@ static int mt9m001_set_control(struct soc_camera_device *icd, struct v4l2_contro
 static int mt9m001_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
-	int ret;
 	unsigned long flags;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
@@ -533,11 +498,6 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9M001_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -545,8 +505,6 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	/* Read out the chip version register */
 	data = reg_read(client, MT9M001_CHIP_VERSION);
 
-	soc_camera_video_stop(icd);
-
 	/* must be 0x8411 or 0x8421 for colour sensor and 8431 for bw */
 	switch (data) {
 	case 0x8411:
@@ -601,6 +559,27 @@ static void mt9m001_video_remove(struct soc_camera_device *icd)
 		icl->free_bus(icl);
 }
 
+static struct v4l2_subdev_core_ops mt9m001_subdev_core_ops = {
+	.g_ctrl		= mt9m001_g_ctrl,
+	.s_ctrl		= mt9m001_s_ctrl,
+	.g_chip_ident	= mt9m001_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9m001_g_register,
+	.s_register	= mt9m001_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
+	.s_stream	= mt9m001_s_stream,
+	.s_fmt		= mt9m001_s_fmt,
+	.try_fmt	= mt9m001_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9m001_subdev_ops = {
+	.core	= &mt9m001_subdev_core_ops,
+	.video	= &mt9m001_subdev_video_ops,
+};
+
 static int mt9m001_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -631,7 +610,7 @@ static int mt9m001_probe(struct i2c_client *client,
 	if (!mt9m001)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9m001);
+	v4l2_i2c_subdev_init(&mt9m001->subdev, client, &mt9m001_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9m001_ops;
@@ -660,7 +639,7 @@ static int mt9m001_probe(struct i2c_client *client,
 
 static int mt9m001_remove(struct i2c_client *client)
 {
-	struct mt9m001 *mt9m001 = i2c_get_clientdata(client);
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 95c2f089605f..29f976afd465 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -148,6 +148,7 @@ enum mt9m111_context {
 };
 
 struct mt9m111 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9M11x* codes from v4l2-chip-ident.h */
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
@@ -164,6 +165,11 @@ struct mt9m111 {
 	unsigned int autowhitebalance:1;
 };
 
+static struct mt9m111 *to_mt9m111(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9m111, subdev);
+}
+
 static int reg_page_map_set(struct i2c_client *client, const u16 reg)
 {
 	int ret;
@@ -227,10 +233,9 @@ static int mt9m111_reg_clear(struct i2c_client *client, const u16 reg,
 	return mt9m111_reg_write(client, reg, ret & ~data);
 }
 
-static int mt9m111_set_context(struct soc_camera_device *icd,
+static int mt9m111_set_context(struct i2c_client *client,
 			       enum mt9m111_context ctxt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int valB = MT9M111_CTXT_CTRL_RESTART | MT9M111_CTXT_CTRL_DEFECTCOR_B
 		| MT9M111_CTXT_CTRL_RESIZE_B | MT9M111_CTXT_CTRL_CTRL2_B
 		| MT9M111_CTXT_CTRL_GAMMA_B | MT9M111_CTXT_CTRL_READ_MODE_B
@@ -244,11 +249,10 @@ static int mt9m111_set_context(struct soc_camera_device *icd,
 		return reg_write(CONTEXT_CONTROL, valA);
 }
 
-static int mt9m111_setup_rect(struct soc_camera_device *icd,
+static int mt9m111_setup_rect(struct i2c_client *client,
 			      struct v4l2_rect *rect)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret, is_raw_format;
 	int width = rect->width;
 	int height = rect->height;
@@ -290,9 +294,8 @@ static int mt9m111_setup_rect(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
+static int mt9m111_setup_pixfmt(struct i2c_client *client, u16 outfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	ret = reg_write(OUTPUT_FORMAT_CTRL2_A, outfmt);
@@ -301,20 +304,19 @@ static int mt9m111_setup_pixfmt(struct soc_camera_device *icd, u16 outfmt)
 	return ret;
 }
 
-static int mt9m111_setfmt_bayer8(struct soc_camera_device *icd)
+static int mt9m111_setfmt_bayer8(struct i2c_client *client)
 {
-	return mt9m111_setup_pixfmt(icd, MT9M111_OUTFMT_PROCESSED_BAYER);
+	return mt9m111_setup_pixfmt(client, MT9M111_OUTFMT_PROCESSED_BAYER);
 }
 
-static int mt9m111_setfmt_bayer10(struct soc_camera_device *icd)
+static int mt9m111_setfmt_bayer10(struct i2c_client *client)
 {
-	return mt9m111_setup_pixfmt(icd, MT9M111_OUTFMT_BYPASS_IFP);
+	return mt9m111_setup_pixfmt(client, MT9M111_OUTFMT_BYPASS_IFP);
 }
 
-static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
+static int mt9m111_setfmt_rgb565(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -323,13 +325,12 @@ static int mt9m111_setfmt_rgb565(struct soc_camera_device *icd)
 		val |= MT9M111_OUTFMT_SWAP_RGB_EVEN;
 	val |= MT9M111_OUTFMT_RGB | MT9M111_OUTFMT_RGB565;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
+static int mt9m111_setfmt_rgb555(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_rgb_red_blue)
@@ -338,13 +339,12 @@ static int mt9m111_setfmt_rgb555(struct soc_camera_device *icd)
 		val |= MT9M111_OUTFMT_SWAP_RGB_EVEN;
 	val |= MT9M111_OUTFMT_RGB | MT9M111_OUTFMT_RGB555;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
+static int mt9m111_setfmt_yuv(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int val = 0;
 
 	if (mt9m111->swap_yuv_cb_cr)
@@ -352,52 +352,22 @@ static int mt9m111_setfmt_yuv(struct soc_camera_device *icd)
 	if (mt9m111->swap_yuv_y_chromas)
 		val |= MT9M111_OUTFMT_SWAP_YCbCr_C_Y;
 
-	return mt9m111_setup_pixfmt(icd, val);
+	return mt9m111_setup_pixfmt(client, val);
 }
 
-static int mt9m111_enable(struct soc_camera_device *icd)
+static int mt9m111_enable(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
 	ret = reg_set(RESET, MT9M111_RESET_CHIP_ENABLE);
 	if (!ret)
 		mt9m111->powered = 1;
 	return ret;
 }
 
-static int mt9m111_disable(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-	int ret;
-
-	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
-	if (!ret)
-		mt9m111->powered = 0;
-
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int mt9m111_reset(struct soc_camera_device *icd)
+static int mt9m111_reset(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
 	ret = reg_set(RESET, MT9M111_RESET_RESET_MODE);
@@ -407,22 +377,9 @@ static int mt9m111_reset(struct soc_camera_device *icd)
 		ret = reg_clear(RESET, MT9M111_RESET_RESET_MODE
 				| MT9M111_RESET_RESET_SOC);
 
-	if (icl->reset)
-		icl->reset(&client->dev);
-
 	return ret;
 }
 
-static int mt9m111_start_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
-static int mt9m111_stop_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
 static unsigned long mt9m111_query_bus_param(struct soc_camera_device *icd)
 {
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
@@ -442,60 +399,59 @@ static int mt9m111_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
 		__func__, rect->left, rect->top, rect->width,
 		rect->height);
 
-	ret = mt9m111_setup_rect(icd, rect);
+	ret = mt9m111_setup_rect(client, rect);
 	if (!ret)
 		mt9m111->rect = *rect;
 	return ret;
 }
 
-static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
+static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_SBGGR8:
-		ret = mt9m111_setfmt_bayer8(icd);
+		ret = mt9m111_setfmt_bayer8(client);
 		break;
 	case V4L2_PIX_FMT_SBGGR16:
-		ret = mt9m111_setfmt_bayer10(icd);
+		ret = mt9m111_setfmt_bayer10(client);
 		break;
 	case V4L2_PIX_FMT_RGB555:
-		ret = mt9m111_setfmt_rgb555(icd);
+		ret = mt9m111_setfmt_rgb555(client);
 		break;
 	case V4L2_PIX_FMT_RGB565:
-		ret = mt9m111_setfmt_rgb565(icd);
+		ret = mt9m111_setfmt_rgb565(client);
 		break;
 	case V4L2_PIX_FMT_UYVY:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 0;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_VYUY:
 		mt9m111->swap_yuv_y_chromas = 0;
 		mt9m111->swap_yuv_cb_cr = 1;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_YUYV:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 0;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	case V4L2_PIX_FMT_YVYU:
 		mt9m111->swap_yuv_y_chromas = 1;
 		mt9m111->swap_yuv_cb_cr = 1;
-		ret = mt9m111_setfmt_yuv(icd);
+		ret = mt9m111_setfmt_yuv(client);
 		break;
 	default:
-		dev_err(&icd->dev, "Pixel format not handled : %x\n", pixfmt);
+		dev_err(&client->dev, "Pixel format not handled : %x\n", pixfmt);
 		ret = -EINVAL;
 	}
 
@@ -505,11 +461,10 @@ static int mt9m111_set_pixfmt(struct soc_camera_device *icd, u32 pixfmt)
 	return ret;
 }
 
-static int mt9m111_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= mt9m111->rect.left,
@@ -519,20 +474,19 @@ static int mt9m111_set_fmt(struct soc_camera_device *icd,
 	};
 	int ret;
 
-	dev_dbg(&icd->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
+	dev_dbg(&client->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
 		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
 		rect.height);
 
-	ret = mt9m111_setup_rect(icd, &rect);
+	ret = mt9m111_setup_rect(client, &rect);
 	if (!ret)
-		ret = mt9m111_set_pixfmt(icd, pix->pixelformat);
+		ret = mt9m111_set_pixfmt(client, pix->pixelformat);
 	if (!ret)
 		mt9m111->rect = rect;
 	return ret;
 }
 
-static int mt9m111_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9m111_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
@@ -544,11 +498,11 @@ static int mt9m111_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9m111_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -563,10 +517,10 @@ static int mt9m111_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9m111_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m111_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int val;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
@@ -584,10 +538,10 @@ static int mt9m111_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9m111_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0x2ff)
 		return -EINVAL;
@@ -639,41 +593,24 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 	}
 };
 
-static int mt9m111_get_control(struct soc_camera_device *,
-			       struct v4l2_control *);
-static int mt9m111_set_control(struct soc_camera_device *,
-			       struct v4l2_control *);
 static int mt9m111_resume(struct soc_camera_device *icd);
 static int mt9m111_init(struct soc_camera_device *icd);
 static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
-	.start_capture		= mt9m111_start_capture,
-	.stop_capture		= mt9m111_stop_capture,
 	.set_crop		= mt9m111_set_crop,
-	.set_fmt		= mt9m111_set_fmt,
-	.try_fmt		= mt9m111_try_fmt,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
 	.num_controls		= ARRAY_SIZE(mt9m111_controls),
-	.get_control		= mt9m111_get_control,
-	.set_control		= mt9m111_set_control,
-	.get_chip_id		= mt9m111_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9m111_get_register,
-	.set_register		= mt9m111_set_register,
-#endif
 };
 
-static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
+static int mt9m111_set_flip(struct i2c_client *client, int flip, int mask)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (mt9m111->context == HIGHPOWER) {
@@ -691,9 +628,8 @@ static int mt9m111_set_flip(struct soc_camera_device *icd, int flip, int mask)
 	return ret;
 }
 
-static int mt9m111_get_global_gain(struct soc_camera_device *icd)
+static int mt9m111_get_global_gain(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int data;
 
 	data = reg_read(GLOBAL_GAIN);
@@ -703,9 +639,9 @@ static int mt9m111_get_global_gain(struct soc_camera_device *icd)
 	return data;
 }
 
-static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
+static int mt9m111_set_global_gain(struct i2c_client *client, int gain)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct soc_camera_device *icd = client->dev.platform_data;
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
@@ -722,10 +658,9 @@ static int mt9m111_set_global_gain(struct soc_camera_device *icd, int gain)
 	return reg_write(GLOBAL_GAIN, val);
 }
 
-static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
+static int mt9m111_set_autoexposure(struct i2c_client *client, int on)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (on)
@@ -739,10 +674,9 @@ static int mt9m111_set_autoexposure(struct soc_camera_device *icd, int on)
 	return ret;
 }
 
-static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
+static int mt9m111_set_autowhitebalance(struct i2c_client *client, int on)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	if (on)
@@ -756,11 +690,10 @@ static int mt9m111_set_autowhitebalance(struct soc_camera_device *icd, int on)
 	return ret;
 }
 
-static int mt9m111_get_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9m111_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -785,7 +718,7 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 		ctrl->value = !!(data & MT9M111_RMB_MIRROR_COLS);
 		break;
 	case V4L2_CID_GAIN:
-		data = mt9m111_get_global_gain(icd);
+		data = mt9m111_get_global_gain(client);
 		if (data < 0)
 			return data;
 		ctrl->value = data;
@@ -800,38 +733,36 @@ static int mt9m111_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9m111_set_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9m111_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
 	qctrl = soc_camera_find_qctrl(&mt9m111_ops, ctrl->id);
-
 	if (!qctrl)
 		return -EINVAL;
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
 		mt9m111->vflip = ctrl->value;
-		ret = mt9m111_set_flip(icd, ctrl->value,
+		ret = mt9m111_set_flip(client, ctrl->value,
 					MT9M111_RMB_MIRROR_ROWS);
 		break;
 	case V4L2_CID_HFLIP:
 		mt9m111->hflip = ctrl->value;
-		ret = mt9m111_set_flip(icd, ctrl->value,
+		ret = mt9m111_set_flip(client, ctrl->value,
 					MT9M111_RMB_MIRROR_COLS);
 		break;
 	case V4L2_CID_GAIN:
-		ret = mt9m111_set_global_gain(icd, ctrl->value);
+		ret = mt9m111_set_global_gain(client, ctrl->value);
 		break;
 	case V4L2_CID_EXPOSURE_AUTO:
-		ret =  mt9m111_set_autoexposure(icd, ctrl->value);
+		ret =  mt9m111_set_autoexposure(client, ctrl->value);
 		break;
 	case V4L2_CID_AUTO_WHITE_BALANCE:
-		ret =  mt9m111_set_autowhitebalance(icd, ctrl->value);
+		ret =  mt9m111_set_autowhitebalance(client, ctrl->value);
 		break;
 	default:
 		ret = -EINVAL;
@@ -840,34 +771,34 @@ static int mt9m111_set_control(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9m111_restore_state(struct soc_camera_device *icd)
+static int mt9m111_restore_state(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
-
-	mt9m111_set_context(icd, mt9m111->context);
-	mt9m111_set_pixfmt(icd, mt9m111->pixfmt);
-	mt9m111_setup_rect(icd, &mt9m111->rect);
-	mt9m111_set_flip(icd, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
-	mt9m111_set_flip(icd, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
-	mt9m111_set_global_gain(icd, icd->gain);
-	mt9m111_set_autoexposure(icd, mt9m111->autoexposure);
-	mt9m111_set_autowhitebalance(icd, mt9m111->autowhitebalance);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
+
+	mt9m111_set_context(client, mt9m111->context);
+	mt9m111_set_pixfmt(client, mt9m111->pixfmt);
+	mt9m111_setup_rect(client, &mt9m111->rect);
+	mt9m111_set_flip(client, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
+	mt9m111_set_flip(client, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
+	mt9m111_set_global_gain(client, icd->gain);
+	mt9m111_set_autoexposure(client, mt9m111->autoexposure);
+	mt9m111_set_autowhitebalance(client, mt9m111->autowhitebalance);
 	return 0;
 }
 
 static int mt9m111_resume(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret = 0;
 
 	if (mt9m111->powered) {
-		ret = mt9m111_enable(icd);
+		ret = mt9m111_enable(client);
 		if (!ret)
-			ret = mt9m111_reset(icd);
+			ret = mt9m111_reset(client);
 		if (!ret)
-			ret = mt9m111_restore_state(icd);
+			ret = mt9m111_restore_state(client);
 	}
 	return ret;
 }
@@ -875,17 +806,17 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 static int mt9m111_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	mt9m111->context = HIGHPOWER;
-	ret = mt9m111_enable(icd);
+	ret = mt9m111_enable(client);
 	if (!ret)
-		ret = mt9m111_reset(icd);
+		ret = mt9m111_reset(client);
 	if (!ret)
-		ret = mt9m111_set_context(icd, mt9m111->context);
+		ret = mt9m111_set_context(client, mt9m111->context);
 	if (!ret)
-		ret = mt9m111_set_autoexposure(icd, mt9m111->autoexposure);
+		ret = mt9m111_set_autoexposure(client, mt9m111->autoexposure);
 	if (ret)
 		dev_err(&icd->dev, "mt9m11x init failed: %d\n", ret);
 	return ret;
@@ -893,9 +824,14 @@ static int mt9m111_init(struct soc_camera_device *icd)
 
 static int mt9m111_release(struct soc_camera_device *icd)
 {
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
-	ret = mt9m111_disable(icd);
+	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
+	if (!ret)
+		mt9m111->powered = 0;
+
 	if (ret < 0)
 		dev_err(&icd->dev, "mt9m11x release failed: %d\n", ret);
 
@@ -909,7 +845,7 @@ static int mt9m111_release(struct soc_camera_device *icd)
 static int mt9m111_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	s32 data;
 	int ret;
 
@@ -921,15 +857,10 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		goto evstart;
-
-	ret = mt9m111_enable(icd);
+	ret = mt9m111_enable(client);
 	if (ret)
 		goto ei2c;
-	ret = mt9m111_reset(icd);
+	ret = mt9m111_reset(client);
 	if (ret)
 		goto ei2c;
 
@@ -961,11 +892,29 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	mt9m111->swap_rgb_red_blue = 1;
 
 ei2c:
-	soc_camera_video_stop(icd);
-evstart:
 	return ret;
 }
 
+static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
+	.g_ctrl		= mt9m111_g_ctrl,
+	.s_ctrl		= mt9m111_s_ctrl,
+	.g_chip_ident	= mt9m111_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9m111_g_register,
+	.s_register	= mt9m111_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
+	.s_fmt		= mt9m111_s_fmt,
+	.try_fmt	= mt9m111_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9m111_subdev_ops = {
+	.core	= &mt9m111_subdev_core_ops,
+	.video	= &mt9m111_subdev_video_ops,
+};
+
 static int mt9m111_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -996,7 +945,7 @@ static int mt9m111_probe(struct i2c_client *client,
 	if (!mt9m111)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9m111);
+	v4l2_i2c_subdev_init(&mt9m111->subdev, client, &mt9m111_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9m111_ops;
@@ -1022,7 +971,7 @@ static int mt9m111_probe(struct i2c_client *client,
 
 static int mt9m111_remove(struct i2c_client *client)
 {
-	struct mt9m111 *mt9m111 = i2c_get_clientdata(client);
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index d9c7c2fd698a..27a5edda902c 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -13,13 +13,13 @@
 #include <linux/i2c.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9t031 i2c address 0x5d
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define i2c_board_info and link to it from
+ * struct soc_camera_link */
 
 /* mt9t031 selected register addresses */
 #define MT9T031_CHIP_VERSION		0x00
@@ -68,12 +68,18 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 };
 
 struct mt9t031 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 	u16 xskip;
 	u16 yskip;
 };
 
+static struct mt9t031 *to_mt9t031(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9t031, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -134,21 +140,10 @@ static int get_shutter(struct i2c_client *client, u32 *data)
 	return ret < 0 ? ret : 0;
 }
 
-static int mt9t031_init(struct soc_camera_device *icd)
+static int mt9t031_idle(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
 	/* Disable chip output, synchronous option update */
 	ret = reg_write(client, MT9T031_RESET, 1);
 	if (ret >= 0)
@@ -156,43 +151,46 @@ static int mt9t031_init(struct soc_camera_device *icd)
 	if (ret >= 0)
 		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
 
-	if (ret < 0 && icl->power)
-		icl->power(&client->dev, 0);
-
 	return ret >= 0 ? 0 : -EIO;
 }
 
-static int mt9t031_release(struct soc_camera_device *icd)
+static int mt9t031_disable(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
 	/* Disable the chip */
 	reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
 	return 0;
 }
 
-static int mt9t031_start_capture(struct soc_camera_device *icd)
+static int mt9t031_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
-	/* Switch to master "normal" mode */
-	if (reg_set(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
-		return -EIO;
-	return 0;
+	return mt9t031_idle(client);
 }
 
-static int mt9t031_stop_capture(struct soc_camera_device *icd)
+static int mt9t031_release(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 
-	/* Stop sensor readout */
-	if (reg_clear(client, MT9T031_OUTPUT_CONTROL, 2) < 0)
+	return mt9t031_disable(client);
+}
+
+static int mt9t031_s_stream(struct v4l2_subdev *sd, int enable)
+{
+	struct i2c_client *client = sd->priv;
+	int ret;
+
+	if (enable)
+		/* Switch to master "normal" mode */
+		ret = reg_set(client, MT9T031_OUTPUT_CONTROL, 2);
+	else
+		/* Stop sensor readout */
+		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 2);
+
+	if (ret < 0)
 		return -EIO;
+
 	return 0;
 }
 
@@ -236,7 +234,7 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int ret;
 	u16 xbin, ybin, width, height, left, top;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
@@ -334,17 +332,17 @@ static int mt9t031_set_crop(struct soc_camera_device *icd,
 			    struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 
 	/* CROP - no change in scaling, or in limits */
 	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
 }
 
-static int mt9t031_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
@@ -379,8 +377,7 @@ static int mt9t031_set_fmt(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int mt9t031_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9t031_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
@@ -391,11 +388,11 @@ static int mt9t031_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9t031_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9t031_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -410,10 +407,10 @@ static int mt9t031_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9t031_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9t031_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -429,10 +426,10 @@ static int mt9t031_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9t031_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9t031_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -493,35 +490,20 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 	}
 };
 
-static int mt9t031_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9t031_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9t031_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
-	.start_capture		= mt9t031_start_capture,
-	.stop_capture		= mt9t031_stop_capture,
 	.set_crop		= mt9t031_set_crop,
-	.set_fmt		= mt9t031_set_fmt,
-	.try_fmt		= mt9t031_try_fmt,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
 	.num_controls		= ARRAY_SIZE(mt9t031_controls),
-	.get_control		= mt9t031_get_control,
-	.set_control		= mt9t031_set_control,
-	.get_chip_id		= mt9t031_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9t031_get_register,
-	.set_register		= mt9t031_set_register,
-#endif
 };
 
-static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9t031_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int data;
 
 	switch (ctrl->id) {
@@ -544,10 +526,11 @@ static int mt9t031_get_control(struct soc_camera_device *icd, struct v4l2_contro
 	return 0;
 }
 
-static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_control *ctrl)
+static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 	int data;
 
@@ -653,12 +636,11 @@ static int mt9t031_set_control(struct soc_camera_device *icd, struct v4l2_contro
 
 /* Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one */
-static int mt9t031_video_probe(struct soc_camera_device *icd,
-			       struct i2c_client *client)
+static int mt9t031_video_probe(struct i2c_client *client)
 {
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
-	int ret;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
 	 * So this entire test is completely redundant. */
@@ -666,11 +648,6 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&icd->dev, "write: %d\n", data);
@@ -678,8 +655,6 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	/* Read out the chip version register */
 	data = reg_read(client, MT9T031_CHIP_VERSION);
 
-	soc_camera_video_stop(icd);
-
 	switch (data) {
 	case 0x1621:
 		mt9t031->model = V4L2_IDENT_MT9T031;
@@ -697,6 +672,27 @@ static int mt9t031_video_probe(struct soc_camera_device *icd,
 	return 0;
 }
 
+static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
+	.g_ctrl		= mt9t031_g_ctrl,
+	.s_ctrl		= mt9t031_s_ctrl,
+	.g_chip_ident	= mt9t031_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9t031_g_register,
+	.s_register	= mt9t031_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
+	.s_stream	= mt9t031_s_stream,
+	.s_fmt		= mt9t031_s_fmt,
+	.try_fmt	= mt9t031_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9t031_subdev_ops = {
+	.core	= &mt9t031_subdev_core_ops,
+	.video	= &mt9t031_subdev_video_ops,
+};
+
 static int mt9t031_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -727,7 +723,7 @@ static int mt9t031_probe(struct i2c_client *client,
 	if (!mt9t031)
 		return -ENOMEM;
 
-	i2c_set_clientdata(client, mt9t031);
+	v4l2_i2c_subdev_init(&mt9t031->subdev, client, &mt9t031_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops	= &mt9t031_ops;
@@ -747,7 +743,12 @@ static int mt9t031_probe(struct i2c_client *client,
 	mt9t031->xskip = 1;
 	mt9t031->yskip = 1;
 
-	ret = mt9t031_video_probe(icd, client);
+	mt9t031_idle(client);
+
+	ret = mt9t031_video_probe(client);
+
+	mt9t031_disable(client);
+
 	if (ret) {
 		icd->ops = NULL;
 		i2c_set_clientdata(client, NULL);
@@ -759,7 +760,7 @@ static int mt9t031_probe(struct i2c_client *client,
 
 static int mt9t031_remove(struct i2c_client *client)
 {
-	struct mt9t031 *mt9t031 = i2c_get_clientdata(client);
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 959cc299f1ae..3cb9f0f1e256 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -14,13 +14,13 @@
 #include <linux/delay.h>
 #include <linux/log2.h>
 
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/v4l2-chip-ident.h>
 #include <media/soc_camera.h>
 
 /* mt9v022 i2c address 0x48, 0x4c, 0x58, 0x5c
- * The platform has to define i2c_board_info
- * and call i2c_register_board_info() */
+ * The platform has to define ctruct i2c_board_info objects and link to them
+ * from struct soc_camera_link */
 
 static char *sensor_type;
 module_param(sensor_type, charp, S_IRUGO);
@@ -85,10 +85,16 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 };
 
 struct mt9v022 {
+	struct v4l2_subdev subdev;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
 
+static struct mt9v022 *to_mt9v022(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct mt9v022, subdev);
+}
+
 static int reg_read(struct i2c_client *client, const u8 reg)
 {
 	s32 data = i2c_smbus_read_word_data(client, reg);
@@ -126,26 +132,9 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 static int mt9v022_init(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	int ret;
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0) {
-			dev_err(icd->vdev->parent,
-				"Platform failed to power-on the camera.\n");
-			return ret;
-		}
-	}
-
-	/*
-	 * The camera could have been already on, we hard-reset it additionally,
-	 * if available. Soft reset is done in video_probe().
-	 */
-	if (icl->reset)
-		icl->reset(&client->dev);
-
 	/* Almost the default mode: master, parallel, simultaneous, and an
 	 * undocumented bit 0x200, which is present in table 7, but not in 8,
 	 * plus snapshot mode to disable scan for now */
@@ -169,37 +158,19 @@ static int mt9v022_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9v022_release(struct soc_camera_device *icd)
+static int mt9v022_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 
-	if (icl->power)
-		icl->power(&client->dev, 0);
-
-	return 0;
-}
-
-static int mt9v022_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
-	/* Switch to master "normal" mode */
-	mt9v022->chip_control &= ~0x10;
-	if (reg_write(client, MT9V022_CHIP_CONTROL,
-		      mt9v022->chip_control) < 0)
-		return -EIO;
-	return 0;
-}
+	if (enable)
+		/* Switch to master "normal" mode */
+		mt9v022->chip_control &= ~0x10;
+	else
+		/* Switch to snapshot mode */
+		mt9v022->chip_control |= 0x10;
 
-static int mt9v022_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
-	/* Switch to snapshot mode */
-	mt9v022->chip_control |= 0x10;
-	if (reg_write(client, MT9V022_CHIP_CONTROL,
-		      mt9v022->chip_control) < 0)
+	if (reg_write(client, MT9V022_CHIP_CONTROL, mt9v022->chip_control) < 0)
 		return -EIO;
 	return 0;
 }
@@ -208,7 +179,7 @@ static int mt9v022_set_bus_param(struct soc_camera_device *icd,
 				 unsigned long flags)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned int width_flag = flags & SOCAM_DATAWIDTH_MASK;
 	int ret;
@@ -320,11 +291,11 @@ static int mt9v022_set_crop(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -357,9 +328,10 @@ static int mt9v022_set_fmt(struct soc_camera_device *icd,
 	return mt9v022_set_crop(icd, &rect);
 }
 
-static int mt9v022_try_fmt(struct soc_camera_device *icd,
-			   struct v4l2_format *f)
+static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	v4l_bound_align_image(&pix->width, 48, 752, 2 /* ? */,
@@ -369,11 +341,11 @@ static int mt9v022_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_get_chip_id(struct soc_camera_device *icd,
-			       struct v4l2_dbg_chip_ident *id)
+static int mt9v022_g_chip_ident(struct v4l2_subdev *sd,
+				struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 
 	if (id->match.type != V4L2_CHIP_MATCH_I2C_ADDR)
 		return -EINVAL;
@@ -388,10 +360,10 @@ static int mt9v022_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int mt9v022_get_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9v022_g_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -408,10 +380,10 @@ static int mt9v022_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_register(struct soc_camera_device *icd,
-				struct v4l2_dbg_register *reg)
+static int mt9v022_s_register(struct v4l2_subdev *sd,
+			      struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->match.type != V4L2_CHIP_MATCH_I2C_ADDR || reg->reg > 0xff)
 		return -EINVAL;
@@ -480,35 +452,18 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 	}
 };
 
-static int mt9v022_get_control(struct soc_camera_device *, struct v4l2_control *);
-static int mt9v022_set_control(struct soc_camera_device *, struct v4l2_control *);
-
 static struct soc_camera_ops mt9v022_ops = {
-	.owner			= THIS_MODULE,
 	.init			= mt9v022_init,
-	.release		= mt9v022_release,
-	.start_capture		= mt9v022_start_capture,
-	.stop_capture		= mt9v022_stop_capture,
 	.set_crop		= mt9v022_set_crop,
-	.set_fmt		= mt9v022_set_fmt,
-	.try_fmt		= mt9v022_try_fmt,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
 	.num_controls		= ARRAY_SIZE(mt9v022_controls),
-	.get_control		= mt9v022_get_control,
-	.set_control		= mt9v022_set_control,
-	.get_chip_id		= mt9v022_get_chip_id,
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= mt9v022_get_register,
-	.set_register		= mt9v022_set_register,
-#endif
 };
 
-static int mt9v022_get_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int data;
 
 	switch (ctrl->id) {
@@ -540,15 +495,14 @@ static int mt9v022_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int mt9v022_set_control(struct soc_camera_device *icd,
-			       struct v4l2_control *ctrl)
+static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	int data;
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
-
 	if (!qctrl)
 		return -EINVAL;
 
@@ -644,7 +598,7 @@ static int mt9v022_set_control(struct soc_camera_device *icd,
 static int mt9v022_video_probe(struct soc_camera_device *icd,
 			       struct i2c_client *client)
 {
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	int ret;
@@ -654,11 +608,6 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/* Read out the chip version register */
 	data = reg_read(client, MT9V022_CHIP_VERSION);
 
@@ -723,8 +672,6 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 		 "monochrome" : "colour");
 
 ei2c:
-	soc_camera_video_stop(icd);
-
 	return ret;
 }
 
@@ -739,6 +686,27 @@ static void mt9v022_video_remove(struct soc_camera_device *icd)
 		icl->free_bus(icl);
 }
 
+static struct v4l2_subdev_core_ops mt9v022_subdev_core_ops = {
+	.g_ctrl		= mt9v022_g_ctrl,
+	.s_ctrl		= mt9v022_s_ctrl,
+	.g_chip_ident	= mt9v022_g_chip_ident,
+#ifdef CONFIG_VIDEO_ADV_DEBUG
+	.g_register	= mt9v022_g_register,
+	.s_register	= mt9v022_s_register,
+#endif
+};
+
+static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
+	.s_stream	= mt9v022_s_stream,
+	.s_fmt		= mt9v022_s_fmt,
+	.try_fmt	= mt9v022_try_fmt,
+};
+
+static struct v4l2_subdev_ops mt9v022_subdev_ops = {
+	.core	= &mt9v022_subdev_core_ops,
+	.video	= &mt9v022_subdev_video_ops,
+};
+
 static int mt9v022_probe(struct i2c_client *client,
 			 const struct i2c_device_id *did)
 {
@@ -769,8 +737,9 @@ static int mt9v022_probe(struct i2c_client *client,
 	if (!mt9v022)
 		return -ENOMEM;
 
+	v4l2_i2c_subdev_init(&mt9v022->subdev, client, &mt9v022_subdev_ops);
+
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
-	i2c_set_clientdata(client, mt9v022);
 
 	icd->ops	= &mt9v022_ops;
 	icd->x_min	= 1;
@@ -795,7 +764,7 @@ static int mt9v022_probe(struct i2c_client *client,
 
 static int mt9v022_remove(struct i2c_client *client)
 {
-	struct mt9v022 *mt9v022 = i2c_get_clientdata(client);
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 736c31d23194..ea4ceaec85fe 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -219,7 +219,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 	int ret;
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->icd->dev.parent, "DMA End IRQ with no active buffer\n");
 		return -EFAULT;
 	}
 
@@ -229,7 +229,7 @@ static int mx1_camera_setup_dma(struct mx1_camera_dev *pcdev)
 		vbuf->size, pcdev->res->start +
 		CSIRXR, DMA_MODE_READ);
 	if (unlikely(ret))
-		dev_err(pcdev->soc_host.dev, "Failed to setup DMA sg list\n");
+		dev_err(pcdev->icd->dev.parent, "Failed to setup DMA sg list\n");
 
 	return ret;
 }
@@ -334,14 +334,14 @@ static void mx1_camera_dma_irq(int channel, void *data)
 	imx_dma_disable(channel);
 
 	if (unlikely(!pcdev->active)) {
-		dev_err(pcdev->soc_host.dev, "DMA End IRQ with no active buffer\n");
+		dev_err(pcdev->icd->dev.parent, "DMA End IRQ with no active buffer\n");
 		goto out;
 	}
 
 	vb = &pcdev->active->vb;
 	buf = container_of(vb, struct mx1_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
-	dev_dbg(pcdev->soc_host.dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
+	dev_dbg(pcdev->icd->dev.parent, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
 	mx1_camera_wakeup(pcdev, vb, buf);
@@ -362,7 +362,7 @@ static void mx1_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx1_camera_dev *pcdev = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, ici->dev,
+	videobuf_queue_dma_contig_init(q, &mx1_videobuf_ops, icd->dev.parent,
 					&pcdev->lock,
 					V4L2_BUF_TYPE_VIDEO_CAPTURE,
 					V4L2_FIELD_NONE,
@@ -381,7 +381,7 @@ static int mclk_get_divisor(struct mx1_camera_dev *pcdev)
 	 * they get a nice Oops */
 	div = (lcdclk + 2 * mclk - 1) / (2 * mclk) - 1;
 
-	dev_dbg(pcdev->soc_host.dev, "System clock %lukHz, target freq %dkHz, "
+	dev_dbg(pcdev->icd->dev.parent, "System clock %lukHz, target freq %dkHz, "
 		"divisor %lu\n", lcdclk / 1000, mclk / 1000, div);
 
 	return div;
@@ -391,7 +391,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 {
 	unsigned int csicr1 = CSICR1_EN;
 
-	dev_dbg(pcdev->soc_host.dev, "Activate device\n");
+	dev_dbg(pcdev->icd->dev.parent, "Activate device\n");
 
 	clk_enable(pcdev->clk);
 
@@ -407,7 +407,7 @@ static void mx1_camera_activate(struct mx1_camera_dev *pcdev)
 
 static void mx1_camera_deactivate(struct mx1_camera_dev *pcdev)
 {
-	dev_dbg(pcdev->soc_host.dev, "Deactivate device\n");
+	dev_dbg(pcdev->icd->dev.parent, "Deactivate device\n");
 
 	/* Disable all CSI interface */
 	__raw_writel(0x00, pcdev->base + CSICR1);
@@ -432,10 +432,8 @@ static int mx1_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	mx1_camera_activate(pcdev);
-	ret = icd->ops->init(icd);
 
-	if (!ret)
-		pcdev->icd = icd;
+	pcdev->icd = icd;
 
 ebusy:
 	return ret;
@@ -459,8 +457,6 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 	dev_info(&icd->dev, "MX1 Camera driver detached from camera %d\n",
 		 icd->devnum);
 
-	icd->ops->release(icd);
-
 	mx1_camera_deactivate(pcdev);
 
 	pcdev->icd = NULL;
@@ -546,11 +542,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
-	ret = icd->ops->set_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -562,10 +558,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 static int mx1_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	/* TODO: limit to mx1 hardware capabilities */
 
 	/* limit to sensor capabilities */
-	return icd->ops->try_fmt(icd, f);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 }
 
 static int mx1_camera_reqbufs(struct soc_camera_file *icf,
@@ -737,7 +734,7 @@ static int __init mx1_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= DRIVER_NAME;
 	pcdev->soc_host.ops		= &mx1_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev		= &pdev->dev;
+	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 	err = soc_camera_host_register(&pcdev->soc_host);
 	if (err)
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 2edf77a6256b..677d355be8fc 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -431,7 +431,7 @@ static void mx3_camera_init_videobuf(struct videobuf_queue *q,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
 
-	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, ici->dev,
+	videobuf_queue_dma_contig_init(q, &mx3_videobuf_ops, icd->dev.parent,
 				       &mx3_cam->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       V4L2_FIELD_NONE,
@@ -494,17 +494,11 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
-	int ret;
 
-	if (mx3_cam->icd) {
-		ret = -EBUSY;
-		goto ebusy;
-	}
+	if (mx3_cam->icd)
+		return -EBUSY;
 
 	mx3_camera_activate(mx3_cam, icd);
-	ret = icd->ops->init(icd);
-	if (ret < 0)
-		goto einit;
 
 	mx3_cam->icd = icd;
 
@@ -512,12 +506,6 @@ static int mx3_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	return 0;
-
-einit:
-	clk_disable(mx3_cam->clk);
-ebusy:
-
-	return ret;
 }
 
 /* Called with .video_lock held */
@@ -534,8 +522,6 @@ static void mx3_camera_remove_device(struct soc_camera_device *icd)
 		*ichan = NULL;
 	}
 
-	icd->ops->release(icd);
-
 	clk_disable(mx3_cam->clk);
 
 	mx3_cam->icd = NULL;
@@ -600,7 +586,7 @@ static int test_platform_param(struct mx3_camera_dev *mx3_cam,
 		*flags |= SOCAM_DATAWIDTH_4;
 		break;
 	default:
-		dev_info(mx3_cam->soc_host.dev, "Unsupported bus width %d\n",
+		dev_info(mx3_cam->soc_host.v4l2_dev.dev, "Unsupported bus width %d\n",
 			 buswidth);
 		return -EINVAL;
 	}
@@ -616,7 +602,7 @@ static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
 	unsigned long bus_flags, camera_flags;
 	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
 
-	dev_dbg(ici->dev, "requested bus width %d bit: %d\n", depth, ret);
+	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n", depth, ret);
 
 	if (ret < 0)
 		return ret;
@@ -639,7 +625,7 @@ static bool chan_filter(struct dma_chan *chan, void *arg)
 	if (!rq)
 		return false;
 
-	pdata = rq->mx3_cam->soc_host.dev->platform_data;
+	pdata = rq->mx3_cam->soc_host.v4l2_dev.dev->platform_data;
 
 	return rq->id == chan->chan_id &&
 		pdata->dma_dev == chan->device->dev;
@@ -699,7 +685,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(icd->dev.parent, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -711,7 +697,7 @@ static int mx3_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(icd->dev.parent, "Providing format %s using %s\n",
 				mx3_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -724,7 +710,7 @@ passthrough:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(icd->dev.parent,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -831,7 +817,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -847,7 +833,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, &rect);
 
-	ret = icd->ops->set_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -868,7 +854,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (pixfmt && !xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -885,7 +871,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
@@ -935,11 +921,11 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	dev_dbg(ici->dev, "requested bus width %d bit: %d\n",
+	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n",
 		icd->buswidth, ret);
 
 	if (ret < 0)
@@ -948,10 +934,10 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	camera_flags = icd->ops->query_bus_param(icd);
 
 	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
-	dev_dbg(ici->dev, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
+	dev_dbg(icd->dev.parent, "Flags cam: 0x%lx host: 0x%lx common: 0x%lx\n",
 		camera_flags, bus_flags, common_flags);
 	if (!common_flags) {
-		dev_dbg(ici->dev, "no common flags");
+		dev_dbg(icd->dev.parent, "no common flags");
 		return -EINVAL;
 	}
 
@@ -1005,7 +991,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	ret = icd->ops->set_bus_param(icd, common_flags);
 	if (ret < 0) {
-		dev_dbg(ici->dev, "camera set_bus_param(%lx) returned %d\n",
+		dev_dbg(icd->dev.parent, "camera set_bus_param(%lx) returned %d\n",
 			common_flags, ret);
 		return ret;
 	}
@@ -1060,7 +1046,7 @@ static int mx3_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 
 	csi_reg_write(mx3_cam, sens_conf | dw, CSI_SENS_CONF);
 
-	dev_dbg(ici->dev, "Set SENS_CONF to %x\n", sens_conf | dw);
+	dev_dbg(icd->dev.parent, "Set SENS_CONF to %x\n", sens_conf | dw);
 
 	return 0;
 }
@@ -1145,7 +1131,7 @@ static int __devinit mx3_camera_probe(struct platform_device *pdev)
 	soc_host->drv_name	= MX3_CAM_DRV_NAME;
 	soc_host->ops		= &mx3_soc_camera_host_ops;
 	soc_host->priv		= mx3_cam;
-	soc_host->dev		= &pdev->dev;
+	soc_host->v4l2_dev.dev	= &pdev->dev;
 	soc_host->nr		= pdev->id;
 
 	err = soc_camera_host_register(soc_host);
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 3ea650d55b17..119159773a68 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -22,7 +22,7 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-chip-ident.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/ov772x.h>
 
@@ -398,6 +398,7 @@ struct ov772x_win_size {
 };
 
 struct ov772x_priv {
+	struct v4l2_subdev                subdev;
 	struct ov772x_camera_info        *info;
 	const struct ov772x_color_format *fmt;
 	const struct ov772x_win_size     *win;
@@ -575,6 +576,11 @@ static const struct v4l2_queryctrl ov772x_controls[] = {
  * general function
  */
 
+static struct ov772x_priv *to_ov772x(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct ov772x_priv, subdev);
+}
+
 static int ov772x_write_array(struct i2c_client        *client,
 			      const struct regval_list *vals)
 {
@@ -615,61 +621,29 @@ static int ov772x_reset(struct i2c_client *client)
  * soc_camera_ops function
  */
 
-static int ov772x_init(struct soc_camera_device *icd)
+static int ov772x_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0)
-			return ret;
+	if (!enable) {
+		ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
+		return 0;
 	}
 
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-
-	return ret;
-}
-
-static int ov772x_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
-
-	if (icl->power)
-		ret = icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int ov772x_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
-
 	if (!priv->win || !priv->fmt) {
-		dev_err(&icd->dev, "norm or win select error\n");
+		dev_err(&client->dev, "norm or win select error\n");
 		return -EPERM;
 	}
 
 	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
-	dev_dbg(&icd->dev,
+	dev_dbg(&client->dev,
 		"format %s, win %s\n", priv->fmt->name, priv->win->name);
 
 	return 0;
 }
 
-static int ov772x_stop_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, SOFT_SLEEP_MODE);
-	return 0;
-}
-
 static int ov772x_set_bus_param(struct soc_camera_device *icd,
 				unsigned long		  flags)
 {
@@ -688,11 +662,10 @@ static unsigned long ov772x_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int ov772x_get_control(struct soc_camera_device *icd,
-			      struct v4l2_control *ctrl)
+static int ov772x_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
@@ -705,11 +678,10 @@ static int ov772x_get_control(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_set_control(struct soc_camera_device *icd,
-			      struct v4l2_control *ctrl)
+static int ov772x_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 	int ret = 0;
 	u8 val;
 
@@ -733,11 +705,11 @@ static int ov772x_set_control(struct soc_camera_device *icd,
 	return ret;
 }
 
-static int ov772x_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident *id)
+static int ov772x_g_chip_ident(struct v4l2_subdev *sd,
+			       struct v4l2_dbg_chip_ident *id)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	id->ident    = priv->model;
 	id->revision = 0;
@@ -746,10 +718,10 @@ static int ov772x_get_chip_id(struct soc_camera_device *icd,
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int ov772x_get_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int ov772x_g_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int ret;
 
 	reg->size = 1;
@@ -765,10 +737,10 @@ static int ov772x_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int ov772x_set_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int ov772x_s_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
@@ -778,8 +750,7 @@ static int ov772x_set_register(struct soc_camera_device *icd,
 }
 #endif
 
-static const struct ov772x_win_size*
-ov772x_select_win(u32 width, u32 height)
+static const struct ov772x_win_size *ov772x_select_win(u32 width, u32 height)
 {
 	__u32 diff;
 	const struct ov772x_win_size *win;
@@ -798,11 +769,10 @@ ov772x_select_win(u32 width, u32 height)
 	return win;
 }
 
-static int ov772x_set_params(struct soc_camera_device *icd,
+static int ov772x_set_params(struct i2c_client *client,
 			     u32 width, u32 height, u32 pixfmt)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	int ret = -EINVAL;
 	u8  val;
 	int i;
@@ -817,7 +787,6 @@ static int ov772x_set_params(struct soc_camera_device *icd,
 			break;
 		}
 	}
-	dev_dbg(&icd->dev, "Using fmt %x #%d\n", pixfmt, i);
 	if (!priv->fmt)
 		goto ov772x_set_fmt_error;
 
@@ -939,26 +908,26 @@ static int ov772x_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 
 	if (!priv->fmt)
 		return -EINVAL;
 
-	return ov772x_set_params(icd, rect->width, rect->height,
+	return ov772x_set_params(client, rect->width, rect->height,
 				 priv->fmt->fourcc);
 }
 
-static int ov772x_set_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	return ov772x_set_params(icd, pix->width, pix->height,
+	return ov772x_set_params(client, pix->width, pix->height,
 				 pix->pixelformat);
 }
 
-static int ov772x_try_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format       *f)
+static int ov772x_try_fmt(struct v4l2_subdev *sd,
+			  struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	const struct ov772x_win_size *win;
@@ -978,10 +947,9 @@ static int ov772x_try_fmt(struct soc_camera_device *icd,
 static int ov772x_video_probe(struct soc_camera_device *icd,
 			      struct i2c_client *client)
 {
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	u8                  pid, ver;
 	const char         *devname;
-	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -1003,11 +971,6 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 	icd->formats     = ov772x_fmt_lists;
 	icd->num_formats = ARRAY_SIZE(ov772x_fmt_lists);
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/*
 	 * check and show product ID and manufacturer ID
 	 */
@@ -1026,8 +989,7 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 	default:
 		dev_err(&icd->dev,
 			"Product ID error %x:%x\n", pid, ver);
-		ret = -ENODEV;
-		goto ever;
+		return -ENODEV;
 	}
 
 	dev_info(&icd->dev,
@@ -1038,34 +1000,38 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 		 i2c_smbus_read_byte_data(client, MIDH),
 		 i2c_smbus_read_byte_data(client, MIDL));
 
-	soc_camera_video_stop(icd);
-
-ever:
-	return ret;
+	return 0;
 }
 
 static struct soc_camera_ops ov772x_ops = {
-	.owner			= THIS_MODULE,
-	.init			= ov772x_init,
-	.release		= ov772x_release,
-	.start_capture		= ov772x_start_capture,
-	.stop_capture		= ov772x_stop_capture,
 	.set_crop		= ov772x_set_crop,
-	.set_fmt		= ov772x_set_fmt,
-	.try_fmt		= ov772x_try_fmt,
 	.set_bus_param		= ov772x_set_bus_param,
 	.query_bus_param	= ov772x_query_bus_param,
 	.controls		= ov772x_controls,
 	.num_controls		= ARRAY_SIZE(ov772x_controls),
-	.get_control		= ov772x_get_control,
-	.set_control		= ov772x_set_control,
-	.get_chip_id		= ov772x_get_chip_id,
+};
+
+static struct v4l2_subdev_core_ops ov772x_subdev_core_ops = {
+	.g_ctrl		= ov772x_g_ctrl,
+	.s_ctrl		= ov772x_s_ctrl,
+	.g_chip_ident	= ov772x_g_chip_ident,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= ov772x_get_register,
-	.set_register		= ov772x_set_register,
+	.g_register	= ov772x_g_register,
+	.s_register	= ov772x_s_register,
 #endif
 };
 
+static struct v4l2_subdev_video_ops ov772x_subdev_video_ops = {
+	.s_stream	= ov772x_s_stream,
+	.s_fmt		= ov772x_s_fmt,
+	.try_fmt	= ov772x_try_fmt,
+};
+
+static struct v4l2_subdev_ops ov772x_subdev_ops = {
+	.core	= &ov772x_subdev_core_ops,
+	.video	= &ov772x_subdev_video_ops,
+};
+
 /*
  * i2c_driver function
  */
@@ -1081,7 +1047,7 @@ static int ov772x_probe(struct i2c_client *client,
 	int                        ret;
 
 	if (!icd) {
-		dev_err(&client->dev, "MT9M001: missing soc-camera data!\n");
+		dev_err(&client->dev, "OV772X: missing soc-camera data!\n");
 		return -EINVAL;
 	}
 
@@ -1102,8 +1068,9 @@ static int ov772x_probe(struct i2c_client *client,
 	if (!priv)
 		return -ENOMEM;
 
-	priv->info   = info;
-	i2c_set_clientdata(client, priv);
+	priv->info = info;
+
+	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
 	icd->ops        = &ov772x_ops;
 	icd->width_max  = MAX_WIDTH;
@@ -1121,7 +1088,7 @@ static int ov772x_probe(struct i2c_client *client,
 
 static int ov772x_remove(struct i2c_client *client)
 {
-	struct ov772x_priv *priv = i2c_get_clientdata(client);
+	struct ov772x_priv *priv = to_ov772x(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 8b9b44d86837..bdc0d85c461b 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -270,7 +270,7 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
 		if (buf->dmas[i].sg_cpu)
-			dma_free_coherent(ici->dev, buf->dmas[i].sg_size,
+			dma_free_coherent(ici->v4l2_dev.dev, buf->dmas[i].sg_size,
 					  buf->dmas[i].sg_cpu,
 					  buf->dmas[i].sg_dma);
 		buf->dmas[i].sg_cpu = NULL;
@@ -325,19 +325,20 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 				struct scatterlist **sg_first, int *sg_first_ofs)
 {
 	struct pxa_cam_dma *pxa_dma = &buf->dmas[channel];
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct scatterlist *sg;
 	int i, offset, sglen;
 	int dma_len = 0, xfer_len = 0;
 
 	if (pxa_dma->sg_cpu)
-		dma_free_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
+		dma_free_coherent(dev, pxa_dma->sg_size,
 				  pxa_dma->sg_cpu, pxa_dma->sg_dma);
 
 	sglen = calculate_dma_sglen(*sg_first, dma->sglen,
 				    *sg_first_ofs, size);
 
 	pxa_dma->sg_size = (sglen + 1) * sizeof(struct pxa_dma_desc);
-	pxa_dma->sg_cpu = dma_alloc_coherent(pcdev->soc_host.dev, pxa_dma->sg_size,
+	pxa_dma->sg_cpu = dma_alloc_coherent(dev, pxa_dma->sg_size,
 					     &pxa_dma->sg_dma, GFP_KERNEL);
 	if (!pxa_dma->sg_cpu)
 		return -ENOMEM;
@@ -345,7 +346,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 	pxa_dma->sglen = sglen;
 	offset = *sg_first_ofs;
 
-	dev_dbg(pcdev->soc_host.dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
+	dev_dbg(dev, "DMA: sg_first=%p, sglen=%d, ofs=%d, dma.desc=%x\n",
 		*sg_first, sglen, *sg_first_ofs, pxa_dma->sg_dma);
 
 
@@ -368,7 +369,7 @@ static int pxa_init_dma_channel(struct pxa_camera_dev *pcdev,
 		pxa_dma->sg_cpu[i].ddadr =
 			pxa_dma->sg_dma + (i + 1) * sizeof(struct pxa_dma_desc);
 
-		dev_vdbg(pcdev->soc_host.dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
+		dev_vdbg(dev, "DMA: desc.%08x->@phys=0x%08x, len=%d\n",
 			 pxa_dma->sg_dma + i * sizeof(struct pxa_dma_desc),
 			 sg_dma_address(sg) + offset, xfer_len);
 		offset = 0;
@@ -418,11 +419,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	struct soc_camera_device *icd = vq->priv_data;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct pxa_buffer *buf = container_of(vb, struct pxa_buffer, vb);
 	int ret;
 	int size_y, size_u = 0, size_v = 0;
 
-	dev_dbg(&icd->dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
+	dev_dbg(dev, "%s (vb=0x%p) 0x%08lx %d\n", __func__,
 		vb, vb->baddr, vb->bsize);
 
 	/* Added list head initialization on alloc */
@@ -480,8 +482,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 		ret = pxa_init_dma_channel(pcdev, buf, dma, 0, CIBR0, size_y,
 					   &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for Y/RGB failed\n");
+			dev_err(dev, "DMA initialization for Y/RGB failed\n");
 			goto fail;
 		}
 
@@ -490,8 +491,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 1, CIBR1,
 						   size_u, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for U failed\n");
+			dev_err(dev, "DMA initialization for U failed\n");
 			goto fail_u;
 		}
 
@@ -500,8 +500,7 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 			ret = pxa_init_dma_channel(pcdev, buf, dma, 2, CIBR2,
 						   size_v, &sg, &next_ofs);
 		if (ret) {
-			dev_err(pcdev->soc_host.dev,
-				"DMA initialization for V failed\n");
+			dev_err(dev, "DMA initialization for V failed\n");
 			goto fail_v;
 		}
 
@@ -514,10 +513,10 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	return 0;
 
 fail_v:
-	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[1].sg_size,
+	dma_free_coherent(dev, buf->dmas[1].sg_size,
 			  buf->dmas[1].sg_cpu, buf->dmas[1].sg_dma);
 fail_u:
-	dma_free_coherent(pcdev->soc_host.dev, buf->dmas[0].sg_size,
+	dma_free_coherent(dev, buf->dmas[0].sg_size,
 			  buf->dmas[0].sg_cpu, buf->dmas[0].sg_dma);
 fail:
 	free_buffer(vq, buf);
@@ -541,7 +540,7 @@ static void pxa_dma_start_channels(struct pxa_camera_dev *pcdev)
 	active = pcdev->active;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d) ddadr=%08x\n", __func__,
+		dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s (channel=%d) ddadr=%08x\n", __func__,
 			i, active->dmas[i].sg_dma);
 		DDADR(pcdev->dma_chans[i]) = active->dmas[i].sg_dma;
 		DCSR(pcdev->dma_chans[i]) = DCSR_RUN;
@@ -553,7 +552,7 @@ static void pxa_dma_stop_channels(struct pxa_camera_dev *pcdev)
 	int i;
 
 	for (i = 0; i < pcdev->channels; i++) {
-		dev_dbg(pcdev->soc_host.dev, "%s (channel=%d)\n", __func__, i);
+		dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s (channel=%d)\n", __func__, i);
 		DCSR(pcdev->dma_chans[i]) = 0;
 	}
 }
@@ -589,7 +588,7 @@ static void pxa_camera_start_capture(struct pxa_camera_dev *pcdev)
 {
 	unsigned long cicr0, cifr;
 
-	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s\n", __func__);
 	/* Reset the FIFOs */
 	cifr = __raw_readl(pcdev->base + CIFR) | CIFR_RESET_F;
 	__raw_writel(cifr, pcdev->base + CIFR);
@@ -609,7 +608,7 @@ static void pxa_camera_stop_capture(struct pxa_camera_dev *pcdev)
 	__raw_writel(cicr0, pcdev->base + CICR0);
 
 	pcdev->active = NULL;
-	dev_dbg(pcdev->soc_host.dev, "%s\n", __func__);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s\n", __func__);
 }
 
 /* Called under spinlock_irqsave(&pcdev->lock, ...) */
@@ -674,7 +673,8 @@ static void pxa_camera_wakeup(struct pxa_camera_dev *pcdev,
 	do_gettimeofday(&vb->ts);
 	vb->field_count++;
 	wake_up(&vb->done);
-	dev_dbg(pcdev->soc_host.dev, "%s dequeud buffer (vb=0x%p)\n", __func__, vb);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "%s dequeud buffer (vb=0x%p)\n",
+		__func__, vb);
 
 	if (list_empty(&pcdev->capture)) {
 		pxa_camera_stop_capture(pcdev);
@@ -710,7 +710,8 @@ static void pxa_camera_check_link_miss(struct pxa_camera_dev *pcdev)
 	for (i = 0; i < pcdev->channels; i++)
 		if (DDADR(pcdev->dma_chans[i]) != DDADR_STOP)
 			is_dma_stopped = 0;
-	dev_dbg(pcdev->soc_host.dev, "%s : top queued buffer=%p, dma_stopped=%d\n",
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev,
+		"%s : top queued buffer=%p, dma_stopped=%d\n",
 		__func__, pcdev->active, is_dma_stopped);
 	if (pcdev->active && is_dma_stopped)
 		pxa_camera_start_capture(pcdev);
@@ -719,6 +720,7 @@ static void pxa_camera_check_link_miss(struct pxa_camera_dev *pcdev)
 static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 			       enum pxa_camera_active_dma act_dma)
 {
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	struct pxa_buffer *buf;
 	unsigned long flags;
 	u32 status, camera_status, overrun;
@@ -735,13 +737,13 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		overrun |= CISR_IFO_1 | CISR_IFO_2;
 
 	if (status & DCSR_BUSERR) {
-		dev_err(pcdev->soc_host.dev, "DMA Bus Error IRQ!\n");
+		dev_err(dev, "DMA Bus Error IRQ!\n");
 		goto out;
 	}
 
 	if (!(status & (DCSR_ENDINTR | DCSR_STARTINTR))) {
-		dev_err(pcdev->soc_host.dev, "Unknown DMA IRQ source, "
-			"status: 0x%08x\n", status);
+		dev_err(dev, "Unknown DMA IRQ source, status: 0x%08x\n",
+			status);
 		goto out;
 	}
 
@@ -764,7 +766,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 	buf = container_of(vb, struct pxa_buffer, vb);
 	WARN_ON(buf->inwork || list_empty(&vb->queue));
 
-	dev_dbg(pcdev->soc_host.dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
+	dev_dbg(dev, "%s channel=%d %s%s(vb=0x%p) dma.desc=%x\n",
 		__func__, channel, status & DCSR_STARTINTR ? "SOF " : "",
 		status & DCSR_ENDINTR ? "EOF " : "", vb, DDADR(channel));
 
@@ -775,7 +777,7 @@ static void pxa_camera_dma_irq(int channel, struct pxa_camera_dev *pcdev,
 		 */
 		if (camera_status & overrun &&
 		    !list_is_last(pcdev->capture.next, &pcdev->capture)) {
-			dev_dbg(pcdev->soc_host.dev, "FIFO overrun! CISR: %x\n",
+			dev_dbg(dev, "FIFO overrun! CISR: %x\n",
 				camera_status);
 			pxa_camera_stop_capture(pcdev);
 			pxa_camera_start_capture(pcdev);
@@ -834,6 +836,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	u32 div;
 	unsigned long lcdclk;
 
@@ -843,7 +846,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 	/* mclk <= ciclk / 4 (27.4.2) */
 	if (mclk > lcdclk / 4) {
 		mclk = lcdclk / 4;
-		dev_warn(&pdev->dev, "Limiting master clock to %lu\n", mclk);
+		dev_warn(dev, "Limiting master clock to %lu\n", mclk);
 	}
 
 	/* We verify mclk != 0, so if anyone breaks it, here comes their Oops */
@@ -853,7 +856,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		pcdev->mclk = lcdclk / (2 * (div + 1));
 
-	dev_dbg(&pdev->dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
+	dev_dbg(dev, "LCD clock %luHz, target freq %luHz, divisor %u\n",
 		lcdclk, mclk, div);
 
 	return div;
@@ -871,14 +874,15 @@ static void recalculate_fifo_timeout(struct pxa_camera_dev *pcdev,
 static void pxa_camera_activate(struct pxa_camera_dev *pcdev)
 {
 	struct pxacamera_platform_data *pdata = pcdev->pdata;
+	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
 	u32 cicr4 = 0;
 
-	dev_dbg(pcdev->soc_host.dev, "Registered platform device at %p data %p\n",
+	dev_dbg(dev, "Registered platform device at %p data %p\n",
 		pcdev, pdata);
 
 	if (pdata && pdata->init) {
-		dev_dbg(pcdev->soc_host.dev, "%s: Init gpios\n", __func__);
-		pdata->init(pcdev->soc_host.dev);
+		dev_dbg(dev, "%s: Init gpios\n", __func__);
+		pdata->init(dev);
 	}
 
 	/* disable all interrupts */
@@ -920,7 +924,7 @@ static irqreturn_t pxa_camera_irq(int irq, void *data)
 	struct videobuf_buffer *vb;
 
 	status = __raw_readl(pcdev->base + CISR);
-	dev_dbg(pcdev->soc_host.dev, "Camera interrupt status 0x%lx\n", status);
+	dev_dbg(pcdev->soc_host.v4l2_dev.dev, "Camera interrupt status 0x%lx\n", status);
 
 	if (!status)
 		return IRQ_NONE;
@@ -952,17 +956,11 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	int ret;
 
-	if (pcdev->icd) {
-		ret = -EBUSY;
-		goto ebusy;
-	}
+	if (pcdev->icd)
+		return -EBUSY;
 
 	pxa_camera_activate(pcdev);
-	ret = icd->ops->init(icd);
-	if (ret < 0)
-		goto einit;
 
 	pcdev->icd = icd;
 
@@ -970,11 +968,6 @@ static int pxa_camera_add_device(struct soc_camera_device *icd)
 		 icd->devnum);
 
 	return 0;
-
-einit:
-	pxa_camera_deactivate(pcdev);
-ebusy:
-	return ret;
 }
 
 /* Called with .video_lock held */
@@ -996,8 +989,6 @@ static void pxa_camera_remove_device(struct soc_camera_device *icd)
 	DCSR(pcdev->dma_chans[1]) = 0;
 	DCSR(pcdev->dma_chans[2]) = 0;
 
-	icd->ops->release(icd);
-
 	pxa_camera_deactivate(pcdev);
 
 	pcdev->icd = NULL;
@@ -1253,7 +1244,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s using %s\n",
 				pxa_camera_formats[0].name,
 				icd->formats[idx].name);
 		}
@@ -1268,7 +1259,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = buswidth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s packed\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s packed\n",
 				icd->formats[idx].name);
 		}
 		break;
@@ -1280,7 +1271,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(ici->v4l2_dev.dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -1309,11 +1300,11 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(ici->dev, "Failed to crop to %ux%u@%u:%u\n",
+		dev_warn(ici->v4l2_dev.dev, "Failed to crop to %ux%u@%u:%u\n",
 			 rect->width, rect->height, rect->left, rect->top);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(ici->dev,
+			dev_err(ici->v4l2_dev.dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1341,7 +1332,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pix->pixelformat);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pix->pixelformat);
 		return -EINVAL;
 	}
 
@@ -1352,16 +1343,16 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 		icd->sense = &sense;
 
 	cam_f.fmt.pix.pixelformat = cam_fmt->fourcc;
-	ret = icd->ops->set_fmt(icd, &cam_f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
 
 	icd->sense = NULL;
 
 	if (ret < 0) {
-		dev_warn(ici->dev, "Failed to configure for format %x\n",
+		dev_warn(ici->v4l2_dev.dev, "Failed to configure for format %x\n",
 			 pix->pixelformat);
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
-			dev_err(ici->dev,
+			dev_err(ici->v4l2_dev.dev,
 				"pixel clock %lu set by the camera too high!",
 				sense.pixel_clock);
 			return -EIO;
@@ -1389,7 +1380,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -1410,7 +1401,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
@@ -1646,7 +1637,7 @@ static int __devinit pxa_camera_probe(struct platform_device *pdev)
 	pcdev->soc_host.drv_name	= PXA_CAM_DRV_NAME;
 	pcdev->soc_host.ops		= &pxa_soc_camera_host_ops;
 	pcdev->soc_host.priv		= pcdev;
-	pcdev->soc_host.dev		= &pdev->dev;
+	pcdev->soc_host.v4l2_dev.dev	= &pdev->dev;
 	pcdev->soc_host.nr		= pdev->id;
 
 	err = soc_camera_host_register(&pcdev->soc_host);
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index e7ac84daf670..5101fa7cdb2f 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -347,10 +347,9 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	int ret = -EBUSY;
 
 	if (pcdev->icd)
-		goto err;
+		return -EBUSY;
 
 	dev_info(&icd->dev,
 		 "SuperH Mobile CEU driver attached to camera %d\n",
@@ -358,19 +357,13 @@ static int sh_mobile_ceu_add_device(struct soc_camera_device *icd)
 
 	clk_enable(pcdev->clk);
 
-	ret = icd->ops->init(icd);
-	if (ret) {
-		clk_disable(pcdev->clk);
-		goto err;
-	}
-
 	ceu_write(pcdev, CAPSR, 1 << 16); /* reset */
 	while (ceu_read(pcdev, CSTSR) & 1)
 		msleep(1);
 
 	pcdev->icd = icd;
-err:
-	return ret;
+
+	return 0;
 }
 
 /* Called with .video_lock held */
@@ -396,8 +389,6 @@ static void sh_mobile_ceu_remove_device(struct soc_camera_device *icd)
 	}
 	spin_unlock_irqrestore(&pcdev->lock, flags);
 
-	icd->ops->release(icd);
-
 	clk_disable(pcdev->clk);
 
 	dev_info(&icd->dev,
@@ -614,7 +605,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev, "Providing format %s using %s\n",
+			dev_dbg(ici->v4l2_dev.dev, "Providing format %s using %s\n",
 				sh_mobile_ceu_formats[k].name,
 				icd->formats[idx].name);
 		}
@@ -627,7 +618,7 @@ add_single_format:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(ici->dev,
+			dev_dbg(ici->v4l2_dev.dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -649,18 +640,17 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	__u32 pixfmt = f->fmt.pix.pixelformat;
 	const struct soc_camera_format_xlate *xlate;
-	struct v4l2_format cam_f = *f;
 	int ret;
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	cam_f.fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
-	ret = icd->ops->set_fmt(icd, &cam_f);
-
+	f->fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_fmt, f);
+	f->fmt.pix.pixelformat = pixfmt;
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -681,7 +671,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(ici->dev, "Format %x not found\n", pixfmt);
+		dev_warn(ici->v4l2_dev.dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
@@ -694,8 +684,11 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 		DIV_ROUND_UP(xlate->host_fmt->depth, 8);
 	f->fmt.pix.sizeimage = f->fmt.pix.height * f->fmt.pix.bytesperline;
 
+	f->fmt.pix.pixelformat = xlate->cam_fmt->fourcc;
+
 	/* limit to sensor capabilities */
-	ret = icd->ops->try_fmt(icd, f);
+	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, try_fmt, f);
+	f->fmt.pix.pixelformat = pixfmt;
 	if (ret < 0)
 		return ret;
 
@@ -771,7 +764,7 @@ static void sh_mobile_ceu_init_videobuf(struct videobuf_queue *q,
 
 	videobuf_queue_dma_contig_init(q,
 				       &sh_mobile_ceu_videobuf_ops,
-				       ici->dev, &pcdev->lock,
+				       ici->v4l2_dev.dev, &pcdev->lock,
 				       V4L2_BUF_TYPE_VIDEO_CAPTURE,
 				       pcdev->is_interlaced ?
 				       V4L2_FIELD_INTERLACED : V4L2_FIELD_NONE,
@@ -794,7 +787,7 @@ static struct soc_camera_host_ops sh_mobile_ceu_host_ops = {
 	.init_videobuf	= sh_mobile_ceu_init_videobuf,
 };
 
-static int sh_mobile_ceu_probe(struct platform_device *pdev)
+static int __devinit sh_mobile_ceu_probe(struct platform_device *pdev)
 {
 	struct sh_mobile_ceu_dev *pcdev;
 	struct resource *res;
@@ -867,7 +860,7 @@ static int sh_mobile_ceu_probe(struct platform_device *pdev)
 	pm_runtime_resume(&pdev->dev);
 
 	pcdev->ici.priv = pcdev;
-	pcdev->ici.dev = &pdev->dev;
+	pcdev->ici.v4l2_dev.dev = &pdev->dev;
 	pcdev->ici.nr = pdev->id;
 	pcdev->ici.drv_name = dev_name(&pdev->dev);
 	pcdev->ici.ops = &sh_mobile_ceu_host_ops;
@@ -891,7 +884,7 @@ exit:
 	return err;
 }
 
-static int sh_mobile_ceu_remove(struct platform_device *pdev)
+static int __devexit sh_mobile_ceu_remove(struct platform_device *pdev)
 {
 	struct soc_camera_host *soc_host = to_soc_camera_host(&pdev->dev);
 	struct sh_mobile_ceu_dev *pcdev = container_of(soc_host,
@@ -929,7 +922,7 @@ static struct platform_driver sh_mobile_ceu_driver = {
 		.pm	= &sh_mobile_ceu_dev_pm_ops,
 	},
 	.probe		= sh_mobile_ceu_probe,
-	.remove		= sh_mobile_ceu_remove,
+	.remove		= __exit_p(sh_mobile_ceu_remove),
 };
 
 static int __init sh_mobile_ceu_init(void)
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 20ef5c773fae..0a1cb40bfbf6 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -170,8 +170,6 @@ static int soc_camera_reqbufs(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s: %d\n", __func__, p->memory);
-
 	ret = videobuf_reqbufs(&icf->vb_vidq, p);
 	if (ret < 0)
 		return ret;
@@ -285,7 +283,7 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return ret;
 	} else if (!icd->current_fmt ||
 		   icd->current_fmt->fourcc != pix->pixelformat) {
-		dev_err(ici->dev,
+		dev_err(ici->v4l2_dev.dev,
 			"Host driver hasn't set up current format correctly!\n");
 		return -EINVAL;
 	}
@@ -308,20 +306,13 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 
 static int soc_camera_open(struct file *file)
 {
-	struct video_device *vdev;
-	struct soc_camera_device *icd;
+	struct video_device *vdev = video_devdata(file);
+	struct soc_camera_device *icd = container_of(vdev->parent, struct soc_camera_device, dev);
+	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct soc_camera_host *ici;
 	struct soc_camera_file *icf;
 	int ret;
 
-	/*
-	 * It is safe to dereference these pointers now as long as a user has
-	 * the video device open - we are protected by the held cdev reference.
-	 */
-
-	vdev = video_devdata(file);
-	icd = container_of(vdev->parent, struct soc_camera_device, dev);
-
 	if (!icd->ops)
 		/* No device driver attached */
 		return -ENODEV;
@@ -332,12 +323,6 @@ static int soc_camera_open(struct file *file)
 	if (!icf)
 		return -ENOMEM;
 
-	if (!try_module_get(icd->ops->owner)) {
-		dev_err(&icd->dev, "Couldn't lock sensor driver.\n");
-		ret = -EINVAL;
-		goto emgd;
-	}
-
 	if (!try_module_get(ici->ops->owner)) {
 		dev_err(&icd->dev, "Couldn't lock capture bus driver.\n");
 		ret = -EINVAL;
@@ -366,47 +351,65 @@ static int soc_camera_open(struct file *file)
 		if (ret < 0)
 			goto eiufmt;
 
-		dev_dbg(&icd->dev, "Using fmt %x\n", icd->current_fmt->fourcc);
-
 		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
 		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
 
+		if (icl->power) {
+			ret = icl->power(icd->pdev, 1);
+			if (ret < 0)
+				goto epower;
+		}
+
+		/* The camera could have been already on, try to reset */
+		if (icl->reset)
+			icl->reset(icd->pdev);
+
 		ret = ici->ops->add(icd);
 		if (ret < 0) {
 			dev_err(&icd->dev, "Couldn't activate the camera: %d\n", ret);
 			goto eiciadd;
 		}
 
+		if (icd->ops->init) {
+			ret = icd->ops->init(icd);
+			if (ret < 0)
+				goto einit;
+		}
+
 		/* Try to configure with default parameters */
 		ret = soc_camera_set_fmt(icf, &f);
 		if (ret < 0)
 			goto esfmt;
 	}
 
-	mutex_unlock(&icd->video_lock);
-
 	file->private_data = icf;
 	dev_dbg(&icd->dev, "camera device open\n");
 
 	ici->ops->init_videobuf(&icf->vb_vidq, icd);
 
+	mutex_unlock(&icd->video_lock);
+
 	return 0;
 
 	/*
-	 * First three errors are entered with the .video_lock held
+	 * First five errors are entered with the .video_lock held
 	 * and use_count == 1
 	 */
 esfmt:
+	if (icd->ops->release)
+		icd->ops->release(icd);
+einit:
 	ici->ops->remove(icd);
 eiciadd:
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+epower:
 	soc_camera_free_user_formats(icd);
 eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
 emgi:
-	module_put(icd->ops->owner);
-emgd:
 	vfree(icf);
 	return ret;
 }
@@ -421,13 +424,18 @@ static int soc_camera_close(struct file *file)
 	mutex_lock(&icd->video_lock);
 	icd->use_count--;
 	if (!icd->use_count) {
+		struct soc_camera_link *icl = to_soc_camera_link(icd);
+
+		if (icd->ops->release)
+			icd->ops->release(icd);
 		ici->ops->remove(icd);
+		if (icl->power)
+			icl->power(icd->pdev, 0);
 		soc_camera_free_user_formats(icd);
 	}
 
 	mutex_unlock(&icd->video_lock);
 
-	module_put(icd->ops->owner);
 	module_put(ici->ops->owner);
 
 	vfree(icf);
@@ -575,18 +583,17 @@ static int soc_camera_streamon(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
 	mutex_lock(&icd->video_lock);
 
-	icd->ops->start_capture(icd);
+	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 1);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
 	ret = videobuf_streamon(&icf->vb_vidq);
@@ -601,11 +608,10 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
 	if (i != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
@@ -615,7 +621,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	 * remaining buffers. When the last buffer is freed, stop capture */
 	videobuf_streamoff(&icf->vb_vidq);
 
-	icd->ops->stop_capture(icd);
+	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 0);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -649,6 +655,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
@@ -665,9 +672,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 		return 0;
 	}
 
-	if (icd->ops->get_control)
-		return icd->ops->get_control(icd, ctrl);
-	return -EINVAL;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
 }
 
 static int soc_camera_s_ctrl(struct file *file, void *priv,
@@ -675,12 +680,11 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
 	WARN_ON(priv != file->private_data);
 
-	if (icd->ops->set_control)
-		return icd->ops->set_control(icd, ctrl);
-	return -EINVAL;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
 }
 
 static int soc_camera_cropcap(struct file *file, void *fh,
@@ -751,11 +755,9 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->get_chip_id)
-		return -EINVAL;
-
-	return icd->ops->get_chip_id(icd, id);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_chip_ident, id);
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
@@ -764,11 +766,9 @@ static int soc_camera_g_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->get_register)
-		return -EINVAL;
-
-	return icd->ops->get_register(icd, reg);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_register, reg);
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
@@ -776,11 +776,9 @@ static int soc_camera_s_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	if (!icd->ops->set_register)
-		return -EINVAL;
-
-	return icd->ops->set_register(icd, reg);
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_register, reg);
 }
 #endif
 
@@ -794,7 +792,7 @@ static void scan_add_host(struct soc_camera_host *ici)
 	list_for_each_entry(icd, &devices, list) {
 		if (icd->iface == ici->nr) {
 			int ret;
-			icd->dev.parent = ici->dev;
+			icd->dev.parent = ici->v4l2_dev.dev;
 			dev_set_name(&icd->dev, "%u-%u", icd->iface,
 				     icd->devnum);
 			ret = device_register(&icd->dev);
@@ -814,7 +812,9 @@ static int soc_camera_init_i2c(struct soc_camera_device *icd,
 			       struct soc_camera_link *icl)
 {
 	struct i2c_client *client;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct i2c_adapter *adap = i2c_get_adapter(icl->i2c_adapter_id);
+	struct v4l2_subdev *subdev;
 	int ret;
 
 	if (!adap) {
@@ -826,17 +826,16 @@ static int soc_camera_init_i2c(struct soc_camera_device *icd,
 
 	icl->board_info->platform_data = icd;
 
-	client = i2c_new_device(adap, icl->board_info);
-	if (!client) {
+	subdev = v4l2_i2c_new_subdev_board(&ici->v4l2_dev, adap,
+				icl->module_name, icl->board_info, NULL);
+	if (!subdev) {
 		ret = -ENOMEM;
 		goto ei2cnd;
 	}
 
-	/*
-	 * We set icd drvdata at two locations - here and in
-	 * soc_camera_video_start(). Depending on the module loading /
-	 * initialisation order one of these locations will be entered first
-	 */
+	subdev->grp_id = (__u32)icd;
+	client = subdev->priv;
+
 	/* Use to_i2c_client(dev) to recover the i2c client */
 	dev_set_drvdata(&icd->dev, &client->dev);
 
@@ -852,6 +851,7 @@ static void soc_camera_free_i2c(struct soc_camera_device *icd)
 	struct i2c_client *client =
 		to_i2c_client(to_soc_camera_control(icd));
 	dev_set_drvdata(&icd->dev, NULL);
+	v4l2_device_unregister_subdev(i2c_get_clientdata(client));
 	i2c_unregister_device(client);
 	i2c_put_adapter(client->adapter);
 }
@@ -860,16 +860,37 @@ static void soc_camera_free_i2c(struct soc_camera_device *icd)
 #define soc_camera_free_i2c(icd)	do {} while (0)
 #endif
 
+static int soc_camera_video_start(struct soc_camera_device *icd);
 static int video_dev_create(struct soc_camera_device *icd);
 /* Called during host-driver probe */
 static int soc_camera_probe(struct device *dev)
 {
 	struct soc_camera_device *icd = to_soc_camera_dev(dev);
+	struct soc_camera_host *ici = to_soc_camera_host(dev->parent);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
+	struct device *control = NULL;
 	int ret;
 
 	dev_info(dev, "Probing %s\n", dev_name(dev));
 
+	if (icl->power) {
+		ret = icl->power(icd->pdev, 1);
+		if (ret < 0) {
+			dev_err(dev,
+				"Platform failed to power-on the camera.\n");
+			goto epower;
+		}
+	}
+
+	/* The camera could have been already on, try to reset */
+	if (icl->reset)
+		icl->reset(icd->pdev);
+
+	ret = ici->ops->add(icd);
+	if (ret < 0)
+		goto eadd;
+
+	/* Must have icd->vdev before registering the device */
 	ret = video_dev_create(icd);
 	if (ret < 0)
 		goto evdc;
@@ -883,34 +904,61 @@ static int soc_camera_probe(struct device *dev)
 		ret = -EINVAL;
 		goto eadddev;
 	} else {
+		if (icl->module_name)
+			ret = request_module(icl->module_name);
+
 		ret = icl->add_device(icl, &icd->dev);
 		if (ret < 0)
 			goto eadddev;
-	}
 
-	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER, icd->vdev->minor);
-	if (ret < 0) {
-		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
-		goto evidregd;
+		/* FIXME: this is racy, have to use driver-binding notification */
+		control = to_soc_camera_control(icd);
+		if (!control || !control->driver ||
+		    !try_module_get(control->driver->owner)) {
+			icl->del_device(icl);
+			goto enodrv;
+		}
 	}
 
+	/* ..._video_start() will create a device node, so we have to protect */
+	mutex_lock(&icd->video_lock);
+
+	ret = soc_camera_video_start(icd);
+	if (ret < 0)
+		goto evidstart;
+
 	/* Do we have to sysfs_remove_link() before device_unregister()? */
 	if (to_soc_camera_control(icd) &&
 	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
 			      "control"))
 		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
+	ici->ops->remove(icd);
+
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+
+	mutex_unlock(&icd->video_lock);
 
 	return 0;
 
-evidregd:
-	if (icl->board_info)
+evidstart:
+	mutex_unlock(&icd->video_lock);
+	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
-	else
+	} else {
 		icl->del_device(icl);
+		module_put(control->driver->owner);
+	}
+enodrv:
 eadddev:
 	video_device_release(icd->vdev);
 evdc:
+	ici->ops->remove(icd);
+eadd:
+	if (icl->power)
+		icl->power(icd->pdev, 0);
+epower:
 	return ret;
 }
 
@@ -931,10 +979,16 @@ static int soc_camera_remove(struct device *dev)
 		mutex_unlock(&icd->video_lock);
 	}
 
-	if (icl->board_info)
+	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
-	else
-		icl->del_device(icl);
+	} else {
+		struct device_driver *drv = to_soc_camera_control(icd) ?
+			to_soc_camera_control(icd)->driver : NULL;
+		if (drv) {
+			icl->del_device(icl);
+			module_put(drv->owner);
+		}
+	}
 
 	return 0;
 }
@@ -984,6 +1038,7 @@ static void dummy_release(struct device *dev)
 int soc_camera_host_register(struct soc_camera_host *ici)
 {
 	struct soc_camera_host *ix;
+	int ret;
 
 	if (!ici || !ici->ops ||
 	    !ici->ops->try_fmt ||
@@ -996,18 +1051,20 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	    !ici->ops->add ||
 	    !ici->ops->remove ||
 	    !ici->ops->poll ||
-	    !ici->dev)
+	    !ici->v4l2_dev.dev)
 		return -EINVAL;
 
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
 		if (ix->nr == ici->nr) {
-			mutex_unlock(&list_lock);
-			return -EBUSY;
+			ret = -EBUSY;
+			goto edevreg;
 		}
 	}
 
-	dev_set_drvdata(ici->dev, ici);
+	ret = v4l2_device_register(ici->v4l2_dev.dev, &ici->v4l2_dev);
+	if (ret < 0)
+		goto edevreg;
 
 	list_add_tail(&ici->list, &hosts);
 	mutex_unlock(&list_lock);
@@ -1015,6 +1072,10 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	scan_add_host(ici);
 
 	return 0;
+
+edevreg:
+	mutex_unlock(&list_lock);
+	return ret;
 }
 EXPORT_SYMBOL(soc_camera_host_register);
 
@@ -1028,7 +1089,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 	list_del(&ici->list);
 
 	list_for_each_entry(icd, &devices, list) {
-		if (icd->dev.parent == ici->dev) {
+		if (icd->iface == ici->nr) {
 			/* The bus->remove will be called */
 			device_unregister(&icd->dev);
 			/* Not before device_unregister(), .remove
@@ -1043,7 +1104,7 @@ void soc_camera_host_unregister(struct soc_camera_host *ici)
 
 	mutex_unlock(&list_lock);
 
-	dev_set_drvdata(ici->dev, NULL);
+	v4l2_device_unregister(&ici->v4l2_dev);
 }
 EXPORT_SYMBOL(soc_camera_host_unregister);
 
@@ -1123,7 +1184,6 @@ static int video_dev_create(struct soc_camera_device *icd)
 
 	if (!vdev)
 		return -ENOMEM;
-	dev_dbg(ici->dev, "Allocated video_device %p\n", vdev);
 
 	strlcpy(vdev->name, ici->drv_name, sizeof(vdev->name));
 
@@ -1141,50 +1201,35 @@ static int video_dev_create(struct soc_camera_device *icd)
 }
 
 /*
- * Usually called from the struct soc_camera_ops .probe() method, i.e., from
- * soc_camera_probe() above with .video_lock held
+ * Called from soc_camera_probe() above (with .video_lock held???)
  */
-int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev)
+static int soc_camera_video_start(struct soc_camera_device *icd)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	const struct v4l2_queryctrl *qctrl;
+	int ret;
 
 	if (!icd->dev.parent)
 		return -ENODEV;
 
 	if (!icd->ops ||
-	    !icd->ops->init ||
-	    !icd->ops->release ||
-	    !icd->ops->start_capture ||
-	    !icd->ops->stop_capture ||
-	    !icd->ops->set_fmt ||
-	    !icd->ops->try_fmt ||
 	    !icd->ops->query_bus_param ||
 	    !icd->ops->set_bus_param)
 		return -EINVAL;
 
-	/* See comment in soc_camera_probe() */
-	dev_set_drvdata(&icd->dev, dev);
+	ret = video_register_device(icd->vdev, VFL_TYPE_GRABBER,
+				    icd->vdev->minor);
+	if (ret < 0) {
+		dev_err(&icd->dev, "video_register_device failed: %d\n", ret);
+		return ret;
+	}
 
 	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
 	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
 	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
 	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
 
-	return ici->ops->add(icd);
-}
-EXPORT_SYMBOL(soc_camera_video_start);
-
-/* Called from client .remove() methods with .video_lock held */
-void soc_camera_video_stop(struct soc_camera_device *icd)
-{
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-
-	dev_dbg(&icd->dev, "%s\n", __func__);
-
-	ici->ops->remove(icd);
+	return 0;
 }
-EXPORT_SYMBOL(soc_camera_video_stop);
 
 static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 {
@@ -1200,6 +1245,7 @@ static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	icd->iface = icl->bus_id;
+	icd->pdev = &pdev->dev;
 	platform_set_drvdata(pdev, icd);
 	icd->dev.platform_data = icl;
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index d84c134f8d59..8168cf470eb3 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -16,11 +16,12 @@
 #include <linux/delay.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/soc_camera_platform.h>
 
 struct soc_camera_platform_priv {
+	struct v4l2_subdev subdev;
 	struct soc_camera_data_format format;
 };
 
@@ -31,36 +32,10 @@ soc_camera_platform_get_info(struct soc_camera_device *icd)
 	return pdev->dev.platform_data;
 }
 
-static int soc_camera_platform_init(struct soc_camera_device *icd)
+static int soc_camera_platform_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
-	if (icl->power)
-		icl->power(dev_get_drvdata(&icd->dev), 1);
-
-	return 0;
-}
-
-static int soc_camera_platform_release(struct soc_camera_device *icd)
-{
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-
-	if (icl->power)
-		icl->power(dev_get_drvdata(&icd->dev), 0);
-
-	return 0;
-}
-
-static int soc_camera_platform_start_capture(struct soc_camera_device *icd)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
-	return p->set_capture(p, 1);
-}
-
-static int soc_camera_platform_stop_capture(struct soc_camera_device *icd)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
-	return p->set_capture(p, 0);
+	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
+	return p->set_capture(p, enable);
 }
 
 static int soc_camera_platform_set_bus_param(struct soc_camera_device *icd,
@@ -82,16 +57,10 @@ static int soc_camera_platform_set_crop(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_set_fmt(struct soc_camera_device *icd,
+static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 				       struct v4l2_format *f)
 {
-	return 0;
-}
-
-static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
-				       struct v4l2_format *f)
-{
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_platform_info *p = v4l2_get_subdevdata(sd);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
 	pix->width = p->format.width;
@@ -99,12 +68,11 @@ static int soc_camera_platform_try_fmt(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
-					   struct platform_device *pdev)
+static void soc_camera_platform_video_probe(struct soc_camera_device *icd,
+					    struct platform_device *pdev)
 {
 	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
-	int ret;
 
 	priv->format.name = p->format_name;
 	priv->format.depth = p->format_depth;
@@ -113,28 +81,29 @@ static int soc_camera_platform_video_probe(struct soc_camera_device *icd,
 
 	icd->formats = &priv->format;
 	icd->num_formats = 1;
-
-	/* ..._video_start() does dev_set_drvdata(&icd->dev, &pdev->dev) */
-	ret = soc_camera_video_start(icd, &pdev->dev);
-	soc_camera_video_stop(icd);
-	return ret;
 }
 
+static struct v4l2_subdev_core_ops platform_subdev_core_ops;
+
+static struct v4l2_subdev_video_ops platform_subdev_video_ops = {
+	.s_stream	= soc_camera_platform_s_stream,
+	.try_fmt	= soc_camera_platform_try_fmt,
+};
+
+static struct v4l2_subdev_ops platform_subdev_ops = {
+	.core	= &platform_subdev_core_ops,
+	.video	= &platform_subdev_video_ops,
+};
+
 static struct soc_camera_ops soc_camera_platform_ops = {
-	.owner			= THIS_MODULE,
-	.init			= soc_camera_platform_init,
-	.release		= soc_camera_platform_release,
-	.start_capture		= soc_camera_platform_start_capture,
-	.stop_capture		= soc_camera_platform_stop_capture,
 	.set_crop		= soc_camera_platform_set_crop,
-	.set_fmt		= soc_camera_platform_set_fmt,
-	.try_fmt		= soc_camera_platform_try_fmt,
 	.set_bus_param		= soc_camera_platform_set_bus_param,
 	.query_bus_param	= soc_camera_platform_query_bus_param,
 };
 
 static int soc_camera_platform_probe(struct platform_device *pdev)
 {
+	struct soc_camera_host *ici;
 	struct soc_camera_platform_priv *priv;
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd;
@@ -143,35 +112,48 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	if (!p)
 		return -EINVAL;
 
+	if (!p->dev) {
+		dev_err(&pdev->dev,
+			"Platform has not set soc_camera_device pointer!\n");
+		return -EINVAL;
+	}
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
 		return -ENOMEM;
 
-	platform_set_drvdata(pdev, priv);
-
 	icd = to_soc_camera_dev(p->dev);
-	if (!icd)
-		goto enoicd;
 
-	icd->ops	= &soc_camera_platform_ops;
+	platform_set_drvdata(pdev, priv);
 	dev_set_drvdata(&icd->dev, &pdev->dev);
+
 	icd->width_min	= 0;
 	icd->width_max	= p->format.width;
 	icd->height_min	= 0;
 	icd->height_max	= p->format.height;
 	icd->y_skip_top	= 0;
+	icd->ops	= &soc_camera_platform_ops;
 
-	ret = soc_camera_platform_video_probe(icd, pdev);
-	if (ret) {
-		icd->ops = NULL;
-		kfree(priv);
-	}
+	ici = to_soc_camera_host(icd->dev.parent);
+
+	soc_camera_platform_video_probe(icd, pdev);
+
+	v4l2_subdev_init(&priv->subdev, &platform_subdev_ops);
+	v4l2_set_subdevdata(&priv->subdev, p);
+	priv->subdev.grp_id = (__u32)icd;
+	strncpy(priv->subdev.name, dev_name(&pdev->dev), V4L2_SUBDEV_NAME_SIZE);
+
+	ret = v4l2_device_register_subdev(&ici->v4l2_dev, &priv->subdev);
+	if (ret)
+		goto evdrs;
 
 	return ret;
 
-enoicd:
+evdrs:
+	icd->ops = NULL;
+	platform_set_drvdata(pdev, NULL);
 	kfree(priv);
-	return -EINVAL;
+	return ret;
 }
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
@@ -180,7 +162,9 @@ static int soc_camera_platform_remove(struct platform_device *pdev)
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
+	v4l2_device_unregister_subdev(&priv->subdev);
 	icd->ops = NULL;
+	platform_set_drvdata(pdev, NULL);
 	kfree(priv);
 	return 0;
 }
@@ -188,6 +172,7 @@ static int soc_camera_platform_remove(struct platform_device *pdev)
 static struct platform_driver soc_camera_platform_driver = {
 	.driver 	= {
 		.name	= "soc_camera_platform",
+		.owner	= THIS_MODULE,
 	},
 	.probe		= soc_camera_platform_probe,
 	.remove		= soc_camera_platform_remove,
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index d780a509faa9..a006df1d28ec 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -24,7 +24,7 @@
 #include <linux/delay.h>
 #include <linux/videodev2.h>
 #include <media/v4l2-chip-ident.h>
-#include <media/v4l2-common.h>
+#include <media/v4l2-subdev.h>
 #include <media/soc_camera.h>
 #include <media/tw9910.h>
 
@@ -223,6 +223,7 @@ struct tw9910_hsync_ctrl {
 };
 
 struct tw9910_priv {
+	struct v4l2_subdev                subdev;
 	struct tw9910_video_info       *info;
 	const struct tw9910_scale_ctrl *scale;
 };
@@ -354,6 +355,11 @@ static const struct tw9910_hsync_ctrl tw9910_hsync_ctrl = {
 /*
  * general function
  */
+static struct tw9910_priv *to_tw9910(const struct i2c_client *client)
+{
+	return container_of(i2c_get_clientdata(client), struct tw9910_priv, subdev);
+}
+
 static int tw9910_set_scale(struct i2c_client *client,
 			    const struct tw9910_scale_ctrl *scale)
 {
@@ -507,47 +513,20 @@ tw9910_select_norm(struct soc_camera_device *icd, u32 width, u32 height)
 /*
  * soc_camera_ops function
  */
-static int tw9910_init(struct soc_camera_device *icd)
+static int tw9910_s_stream(struct v4l2_subdev *sd, int enable)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
 
-	if (icl->power) {
-		ret = icl->power(&client->dev, 1);
-		if (ret < 0)
-			return ret;
-	}
-
-	if (icl->reset)
-		ret = icl->reset(&client->dev);
-
-	return ret;
-}
-
-static int tw9910_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct soc_camera_link *icl = to_soc_camera_link(icd);
-	int ret = 0;
-
-	if (icl->power)
-		ret = icl->power(&client->dev, 0);
-
-	return ret;
-}
-
-static int tw9910_start_capture(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	if (!enable)
+		return 0;
 
 	if (!priv->scale) {
-		dev_err(&icd->dev, "norm select error\n");
+		dev_err(&client->dev, "norm select error\n");
 		return -EPERM;
 	}
 
-	dev_dbg(&icd->dev, "%s %dx%d\n",
+	dev_dbg(&client->dev, "%s %dx%d\n",
 		 priv->scale->name,
 		 priv->scale->width,
 		 priv->scale->height);
@@ -555,11 +534,6 @@ static int tw9910_start_capture(struct soc_camera_device *icd)
 	return 0;
 }
 
-static int tw9910_stop_capture(struct soc_camera_device *icd)
-{
-	return 0;
-}
-
 static int tw9910_set_bus_param(struct soc_camera_device *icd,
 				unsigned long flags)
 {
@@ -569,7 +543,7 @@ static int tw9910_set_bus_param(struct soc_camera_device *icd,
 static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	unsigned long flags = SOCAM_PCLK_SAMPLE_RISING | SOCAM_MASTER |
 		SOCAM_VSYNC_ACTIVE_HIGH | SOCAM_HSYNC_ACTIVE_HIGH |
@@ -578,21 +552,11 @@ static unsigned long tw9910_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int tw9910_get_chip_id(struct soc_camera_device *icd,
-			      struct v4l2_dbg_chip_ident *id)
-{
-	id->ident = V4L2_IDENT_TW9910;
-	id->revision = 0;
-
-	return 0;
-}
-
-static int tw9910_set_std(struct soc_camera_device *icd,
-			  v4l2_std_id *a)
+static int tw9910_s_std(struct v4l2_subdev *sd, v4l2_std_id norm)
 {
 	int ret = -EINVAL;
 
-	if (*a & (V4L2_STD_NTSC | V4L2_STD_PAL))
+	if (norm & (V4L2_STD_NTSC | V4L2_STD_PAL))
 		ret = 0;
 
 	return ret;
@@ -608,11 +572,20 @@ static int tw9910_enum_input(struct soc_camera_device *icd,
 	return 0;
 }
 
+static int tw9910_g_chip_ident(struct v4l2_subdev *sd,
+			       struct v4l2_dbg_chip_ident *id)
+{
+	id->ident = V4L2_IDENT_TW9910;
+	id->revision = 0;
+
+	return 0;
+}
+
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-static int tw9910_get_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int tw9910_g_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 	int ret;
 
 	if (reg->reg > 0xff)
@@ -630,10 +603,10 @@ static int tw9910_get_register(struct soc_camera_device *icd,
 	return 0;
 }
 
-static int tw9910_set_register(struct soc_camera_device *icd,
-			       struct v4l2_dbg_register *reg)
+static int tw9910_s_register(struct v4l2_subdev *sd,
+			     struct v4l2_dbg_register *reg)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct i2c_client *client = sd->priv;
 
 	if (reg->reg > 0xff ||
 	    reg->val > 0xff)
@@ -647,7 +620,7 @@ static int tw9910_set_crop(struct soc_camera_device *icd,
 			   struct v4l2_rect *rect)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -736,9 +709,10 @@ tw9910_set_fmt_error:
 	return ret;
 }
 
-static int tw9910_set_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
 		.left	= icd->x_current,
@@ -761,16 +735,17 @@ static int tw9910_set_fmt(struct soc_camera_device *icd,
 	return tw9910_set_crop(icd, &rect);
 }
 
-static int tw9910_try_fmt(struct soc_camera_device *icd,
-			  struct v4l2_format *f)
+static int tw9910_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	const struct tw9910_scale_ctrl *scale;
 
 	if (V4L2_FIELD_ANY == pix->field) {
 		pix->field = V4L2_FIELD_INTERLACED;
 	} else if (V4L2_FIELD_INTERLACED != pix->field) {
-		dev_err(&icd->dev, "Field type invalid.\n");
+		dev_err(&client->dev, "Field type invalid.\n");
 		return -EINVAL;
 	}
 
@@ -790,9 +765,8 @@ static int tw9910_try_fmt(struct soc_camera_device *icd,
 static int tw9910_video_probe(struct soc_camera_device *icd,
 			      struct i2c_client *client)
 {
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	s32 val;
-	int ret;
 
 	/*
 	 * We must have a parent by now. And it cannot be a wrong one.
@@ -814,18 +788,11 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 	icd->formats     = tw9910_color_fmt;
 	icd->num_formats = ARRAY_SIZE(tw9910_color_fmt);
 
-	/* Switch master clock on */
-	ret = soc_camera_video_start(icd, &client->dev);
-	if (ret)
-		return ret;
-
 	/*
 	 * check and show Product ID
 	 */
 	val = i2c_smbus_read_byte_data(client, ID);
 
-	soc_camera_video_stop(icd);
-
 	if (0x0B != GET_ID(val) ||
 	    0x00 != GET_ReV(val)) {
 		dev_err(&icd->dev,
@@ -839,29 +806,36 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 	icd->vdev->tvnorms      = V4L2_STD_NTSC | V4L2_STD_PAL;
 	icd->vdev->current_norm = V4L2_STD_NTSC;
 
-	return ret;
+	return 0;
 }
 
 static struct soc_camera_ops tw9910_ops = {
-	.owner			= THIS_MODULE,
-	.init			= tw9910_init,
-	.release		= tw9910_release,
-	.start_capture		= tw9910_start_capture,
-	.stop_capture		= tw9910_stop_capture,
 	.set_crop		= tw9910_set_crop,
-	.set_fmt		= tw9910_set_fmt,
-	.try_fmt		= tw9910_try_fmt,
 	.set_bus_param		= tw9910_set_bus_param,
 	.query_bus_param	= tw9910_query_bus_param,
-	.get_chip_id		= tw9910_get_chip_id,
-	.set_std		= tw9910_set_std,
 	.enum_input		= tw9910_enum_input,
+};
+
+static struct v4l2_subdev_core_ops tw9910_subdev_core_ops = {
+	.g_chip_ident	= tw9910_g_chip_ident,
+	.s_std		= tw9910_s_std,
 #ifdef CONFIG_VIDEO_ADV_DEBUG
-	.get_register		= tw9910_get_register,
-	.set_register		= tw9910_set_register,
+	.g_register	= tw9910_g_register,
+	.s_register	= tw9910_s_register,
 #endif
 };
 
+static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
+	.s_stream	= tw9910_s_stream,
+	.s_fmt		= tw9910_s_fmt,
+	.try_fmt	= tw9910_try_fmt,
+};
+
+static struct v4l2_subdev_ops tw9910_subdev_ops = {
+	.core	= &tw9910_subdev_core_ops,
+	.video	= &tw9910_subdev_video_ops,
+};
+
 /*
  * i2c_driver function
  */
@@ -902,7 +876,8 @@ static int tw9910_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	priv->info   = info;
-	i2c_set_clientdata(client, priv);
+
+	v4l2_i2c_subdev_init(&priv->subdev, client, &tw9910_subdev_ops);
 
 	icd->ops     = &tw9910_ops;
 	icd->iface   = info->link.bus_id;
@@ -942,7 +917,7 @@ static int tw9910_probe(struct i2c_client *client,
 
 static int tw9910_remove(struct i2c_client *client)
 {
-	struct tw9910_priv *priv = i2c_get_clientdata(client);
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
 	icd->ops = NULL;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index d8b4256126a4..3bc5b6b20f64 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -16,10 +16,12 @@
 #include <linux/pm.h>
 #include <linux/videodev2.h>
 #include <media/videobuf-core.h>
+#include <media/v4l2-device.h>
 
 struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
+	struct device *pdev;		/* Platform device */
 	unsigned short width;		/* Current window */
 	unsigned short height;		/* sizes */
 	unsigned short x_min;		/* Camera capabilities */
@@ -45,7 +47,6 @@ struct soc_camera_device {
 	struct soc_camera_format_xlate *user_formats;
 	int num_user_formats;
 	enum v4l2_field field;		/* Preserve field over close() */
-	struct module *owner;
 	void *host_priv;		/* Per-device host private data */
 	/* soc_camera.c private count. Only accessed with .video_lock held */
 	int use_count;
@@ -58,8 +59,8 @@ struct soc_camera_file {
 };
 
 struct soc_camera_host {
+	struct v4l2_device v4l2_dev;
 	struct list_head list;
-	struct device *dev;
 	unsigned char nr;				/* Host number */
 	void *priv;
 	const char *drv_name;
@@ -127,7 +128,9 @@ static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
 
 static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
 {
-	return dev_get_drvdata(dev);
+	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
+
+	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
 static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
@@ -143,9 +146,6 @@ static inline struct device *to_soc_camera_control(struct soc_camera_device *icd
 int soc_camera_host_register(struct soc_camera_host *ici);
 void soc_camera_host_unregister(struct soc_camera_host *ici);
 
-int soc_camera_video_start(struct soc_camera_device *icd, struct device *dev);
-void soc_camera_video_stop(struct soc_camera_device *icd);
-
 const struct soc_camera_data_format *soc_camera_format_by_fourcc(
 	struct soc_camera_device *icd, unsigned int fourcc);
 const struct soc_camera_format_xlate *soc_camera_xlate_by_fourcc(
@@ -176,28 +176,17 @@ struct soc_camera_format_xlate {
 };
 
 struct soc_camera_ops {
-	struct module *owner;
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
 	int (*release)(struct soc_camera_device *);
-	int (*start_capture)(struct soc_camera_device *);
-	int (*stop_capture)(struct soc_camera_device *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
-	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
-	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_dbg_chip_ident *);
 	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
-#ifdef CONFIG_VIDEO_ADV_DEBUG
-	int (*get_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
-	int (*set_register)(struct soc_camera_device *, struct v4l2_dbg_register *);
-#endif
-	int (*get_control)(struct soc_camera_device *, struct v4l2_control *);
-	int (*set_control)(struct soc_camera_device *, struct v4l2_control *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
 };
-- 
cgit v1.2.3


From 3418f165cc1cbcb30a8c017d7ebbc774710066bb Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:44:14 -0300
Subject: V4L/DVB (12511): V4L2: add a new V4L2_CID_BAND_STOP_FILTER integer
 control

Add a new V4L2_CID_BAND_STOP_FILTER integer control, which either switches the
band-stop filter off, or sets it to a certain strength.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3689d7d81fe9..b59e78c57161 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -910,9 +910,10 @@ enum v4l2_colorfx {
 	V4L2_COLORFX_SEPIA	= 2,
 };
 #define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
+#define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
 
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+33)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+34)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From 2840d2497b912f25d2957477faa1c922ddd733e0 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:44:15 -0300
Subject: V4L/DVB (12513): soc-camera: add support for camera-host controls

Until now soc-camera only supported client (sensor) controls. This patch
enables camera-host drivers to implement their own controls too.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 24 ++++++++++++++++++++++++
 include/media/soc_camera.h       |  4 ++++
 2 files changed, 28 insertions(+)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 0a1cb40bfbf6..b3fb8f290ad5 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -633,6 +633,7 @@ static int soc_camera_queryctrl(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	int i;
 
 	WARN_ON(priv != file->private_data);
@@ -640,6 +641,15 @@ static int soc_camera_queryctrl(struct file *file, void *priv,
 	if (!qc->id)
 		return -EINVAL;
 
+	/* First check host controls */
+	for (i = 0; i < ici->ops->num_controls; i++)
+		if (qc->id == ici->ops->controls[i].id) {
+			memcpy(qc, &(ici->ops->controls[i]),
+				sizeof(*qc));
+			return 0;
+		}
+
+	/* Then device controls */
 	for (i = 0; i < icd->ops->num_controls; i++)
 		if (qc->id == icd->ops->controls[i].id) {
 			memcpy(qc, &(icd->ops->controls[i]),
@@ -656,6 +666,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
 	WARN_ON(priv != file->private_data);
 
@@ -672,6 +683,12 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 		return 0;
 	}
 
+	if (ici->ops->get_ctrl) {
+		ret = ici->ops->get_ctrl(icd, ctrl);
+		if (ret != -ENOIOCTLCMD)
+			return ret;
+	}
+
 	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
 }
 
@@ -681,9 +698,16 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
 	WARN_ON(priv != file->private_data);
 
+	if (ici->ops->set_ctrl) {
+		ret = ici->ops->set_ctrl(icd, ctrl);
+		if (ret != -ENOIOCTLCMD)
+			return ret;
+	}
+
 	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
 }
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 3bc5b6b20f64..2d116bbbcce1 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -83,7 +83,11 @@ struct soc_camera_host_ops {
 	int (*reqbufs)(struct soc_camera_file *, struct v4l2_requestbuffers *);
 	int (*querycap)(struct soc_camera_host *, struct v4l2_capability *);
 	int (*set_bus_param)(struct soc_camera_device *, __u32);
+	int (*get_ctrl)(struct soc_camera_device *, struct v4l2_control *);
+	int (*set_ctrl)(struct soc_camera_device *, struct v4l2_control *);
 	unsigned int (*poll)(struct file *, poll_table *);
+	const struct v4l2_queryctrl *controls;
+	int num_controls;
 };
 
 #define SOCAM_SENSOR_INVERT_PCLK	(1 << 0)
-- 
cgit v1.2.3


From a0705b07f1816ae2b85388fcda71de69c221b4b8 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:17 -0300
Subject: V4L/DVB (12515): soc-camera: use struct v4l2_rect in struct
 soc_camera_device

Switch to using struct v4l2_rect in struct soc_camera_device for uniformity and
simplicity.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              | 30 ++++++++-------
 drivers/media/video/mt9m111.c              | 20 +++++-----
 drivers/media/video/mt9t031.c              | 49 ++++++++++++-----------
 drivers/media/video/mt9v022.c              | 24 ++++++------
 drivers/media/video/mx1_camera.c           | 10 ++---
 drivers/media/video/mx3_camera.c           | 25 ++++++------
 drivers/media/video/ov772x.c               |  6 +--
 drivers/media/video/pxa_camera.c           | 14 +++----
 drivers/media/video/sh_mobile_ceu_camera.c | 28 ++++++++------
 drivers/media/video/soc_camera.c           | 62 +++++++++++++++++-------------
 drivers/media/video/soc_camera_platform.c  | 12 +++---
 drivers/media/video/tw9910.c               | 38 ++++++++++--------
 include/media/soc_camera.h                 | 10 +----
 13 files changed, 174 insertions(+), 154 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 2a73dac11d37..8b36a74b1be0 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -240,8 +240,8 @@ static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= f->fmt.pix.width,
 		.height	= f->fmt.pix.height,
 	};
@@ -467,11 +467,13 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		if (ctrl->value) {
 			const u16 vblank = 25;
-			if (reg_write(client, MT9M001_SHUTTER_WIDTH, icd->height +
+			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
+				      icd->rect_current.height +
 				      icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (icd->height + icd->y_skip_top + vblank - 1) *
+			icd->exposure = (524 + (icd->rect_current.height +
+						icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 			mt9m001->autoexposure = 1;
@@ -613,16 +615,16 @@ static int mt9m001_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9m001->subdev, client, &mt9m001_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9m001_ops;
-	icd->x_min	= 20;
-	icd->y_min	= 12;
-	icd->x_current	= 20;
-	icd->y_current	= 12;
-	icd->width_min	= 48;
-	icd->width_max	= 1280;
-	icd->height_min	= 32;
-	icd->height_max	= 1024;
-	icd->y_skip_top	= 1;
+	icd->ops		= &mt9m001_ops;
+	icd->rect_max.left	= 20;
+	icd->rect_max.top	= 12;
+	icd->rect_max.width	= 1280;
+	icd->rect_max.height	= 1024;
+	icd->rect_current.left	= 20;
+	icd->rect_current.top	= 12;
+	icd->width_min		= 48;
+	icd->height_min		= 32;
+	icd->y_skip_top		= 1;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 29f976afd465..45101fd90ce0 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -948,16 +948,16 @@ static int mt9m111_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9m111->subdev, client, &mt9m111_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9m111_ops;
-	icd->x_min	= MT9M111_MIN_DARK_COLS;
-	icd->y_min	= MT9M111_MIN_DARK_ROWS;
-	icd->x_current	= icd->x_min;
-	icd->y_current	= icd->y_min;
-	icd->width_min	= MT9M111_MIN_DARK_ROWS;
-	icd->width_max	= MT9M111_MAX_WIDTH;
-	icd->height_min	= MT9M111_MIN_DARK_COLS;
-	icd->height_max	= MT9M111_MAX_HEIGHT;
-	icd->y_skip_top	= 0;
+	icd->ops		= &mt9m111_ops;
+	icd->rect_max.left	= MT9M111_MIN_DARK_COLS;
+	icd->rect_max.top	= MT9M111_MIN_DARK_ROWS;
+	icd->rect_max.width	= MT9M111_MAX_WIDTH;
+	icd->rect_max.height	= MT9M111_MAX_HEIGHT;
+	icd->rect_current.left	= icd->rect_max.left;
+	icd->rect_current.top	= icd->rect_max.top;
+	icd->width_min		= MT9M111_MIN_DARK_ROWS;
+	icd->height_min		= MT9M111_MIN_DARK_COLS;
+	icd->y_skip_top		= 0;
 
 	ret = mt9m111_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 27a5edda902c..dc3eb652a7cf 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -222,12 +222,12 @@ static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 static void recalculate_limits(struct soc_camera_device *icd,
 			       u16 xskip, u16 yskip)
 {
-	icd->x_min = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
-	icd->y_min = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
+	icd->rect_max.left = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
+	icd->rect_max.top = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
 	icd->width_min = (MT9T031_MIN_WIDTH + xskip - 1) / xskip;
 	icd->height_min = (MT9T031_MIN_HEIGHT + yskip - 1) / yskip;
-	icd->width_max = MT9T031_MAX_WIDTH / xskip;
-	icd->height_max = MT9T031_MAX_HEIGHT / yskip;
+	icd->rect_max.width = MT9T031_MAX_WIDTH / xskip;
+	icd->rect_max.height = MT9T031_MAX_HEIGHT / yskip;
 }
 
 static int mt9t031_set_params(struct soc_camera_device *icd,
@@ -241,11 +241,13 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 		vblank = MT9T031_VERTICAL_BLANK;
 
 	/* Make sure we don't exceed sensor limits */
-	if (rect->left + rect->width > icd->width_max)
-		rect->left = (icd->width_max - rect->width) / 2 + icd->x_min;
+	if (rect->left + rect->width > icd->rect_max.width)
+		rect->left = (icd->rect_max.width - rect->width) / 2 +
+			icd->rect_max.left;
 
-	if (rect->top + rect->height > icd->height_max)
-		rect->top = (icd->height_max - rect->height) / 2 + icd->y_min;
+	if (rect->top + rect->height > icd->rect_max.height)
+		rect->top = (icd->rect_max.height - rect->height) / 2 +
+			icd->rect_max.top;
 
 	width = rect->width * xskip;
 	height = rect->height * yskip;
@@ -346,8 +348,8 @@ static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	int ret;
 	u16 xskip, yskip;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= f->fmt.pix.width,
 		.height	= f->fmt.pix.height,
 	};
@@ -618,12 +620,13 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, icd->height +
+			if (set_shutter(client, icd->rect_current.height +
 					icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (icd->height +
-					 icd->y_skip_top + vblank - 1) *
+			icd->exposure = (shutter_max / 2 +
+					 (icd->rect_current.height +
+					  icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 			mt9t031->autoexposure = 1;
@@ -726,16 +729,16 @@ static int mt9t031_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&mt9t031->subdev, client, &mt9t031_subdev_ops);
 
 	/* Second stage probe - when a capture adapter is there */
-	icd->ops	= &mt9t031_ops;
-	icd->x_min	= MT9T031_COLUMN_SKIP;
-	icd->y_min	= MT9T031_ROW_SKIP;
-	icd->x_current	= icd->x_min;
-	icd->y_current	= icd->y_min;
-	icd->width_min	= MT9T031_MIN_WIDTH;
-	icd->width_max	= MT9T031_MAX_WIDTH;
-	icd->height_min	= MT9T031_MIN_HEIGHT;
-	icd->height_max	= MT9T031_MAX_HEIGHT;
-	icd->y_skip_top	= 0;
+	icd->ops		= &mt9t031_ops;
+	icd->rect_max.left	= MT9T031_COLUMN_SKIP;
+	icd->rect_max.top	= MT9T031_ROW_SKIP;
+	icd->rect_current.left	= icd->rect_max.left;
+	icd->rect_current.top	= icd->rect_max.top;
+	icd->width_min		= MT9T031_MIN_WIDTH;
+	icd->rect_max.width	= MT9T031_MAX_WIDTH;
+	icd->height_min		= MT9T031_MIN_HEIGHT;
+	icd->rect_max.height	= MT9T031_MAX_HEIGHT;
+	icd->y_skip_top		= 0;
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 3cb9f0f1e256..d2b0981ec1c1 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -298,8 +298,8 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
@@ -741,16 +741,16 @@ static int mt9v022_probe(struct i2c_client *client,
 
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
-	icd->ops	= &mt9v022_ops;
-	icd->x_min	= 1;
-	icd->y_min	= 4;
-	icd->x_current	= 1;
-	icd->y_current	= 4;
-	icd->width_min	= 48;
-	icd->width_max	= 752;
-	icd->height_min	= 32;
-	icd->height_max	= 480;
-	icd->y_skip_top	= 1;
+	icd->ops		= &mt9v022_ops;
+	icd->rect_max.left	= 1;
+	icd->rect_max.top	= 4;
+	icd->rect_max.width	= 752;
+	icd->rect_max.height	= 480;
+	icd->rect_current.left	= 1;
+	icd->rect_current.top	= 4;
+	icd->width_min		= 48;
+	icd->height_min		= 32;
+	icd->y_skip_top		= 1;
 
 	ret = mt9v022_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index ea4ceaec85fe..948a4714be9a 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -126,7 +126,7 @@ static int mx1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 {
 	struct soc_camera_device *icd = vq->priv_data;
 
-	*size = icd->width * icd->height *
+	*size = icd->rect_current.width * icd->rect_current.height *
 		((icd->current_fmt->depth + 7) >> 3);
 
 	if (!*count)
@@ -178,12 +178,12 @@ static int mx1_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 677d355be8fc..6c3b7f9b906f 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -220,7 +220,7 @@ static int mx3_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 	if (!mx3_cam->idmac_channel[0])
 		return -EINVAL;
 
-	*size = icd->width * icd->height * bpp;
+	*size = icd->rect_current.width * icd->rect_current.height * bpp;
 
 	if (!*count)
 		*count = 32;
@@ -241,7 +241,7 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	struct mx3_camera_buffer *buf =
 		container_of(vb, struct mx3_camera_buffer, vb);
 	/* current_fmt _must_ always be set */
-	size_t new_size = icd->width * icd->height *
+	size_t new_size = icd->rect_current.width * icd->rect_current.height *
 		((icd->current_fmt->depth + 7) >> 3);
 	int ret;
 
@@ -251,12 +251,12 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	 */
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		if (vb->state != VIDEOBUF_NEEDS_INIT)
 			free_buffer(vq, buf);
@@ -354,9 +354,9 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq,
 
 	/* This is the configuration of one sg-element */
 	video->out_pixel_fmt	= fourcc_to_ipu_pix(data_fmt->fourcc);
-	video->out_width	= icd->width;
-	video->out_height	= icd->height;
-	video->out_stride	= icd->width;
+	video->out_width	= icd->rect_current.width;
+	video->out_height	= icd->rect_current.height;
+	video->out_stride	= icd->rect_current.width;
 
 #ifdef DEBUG
 	/* helps to see what DMA actually has written */
@@ -538,7 +538,8 @@ static bool channel_change_requested(struct soc_camera_device *icd,
 	struct idmac_channel *ichan = mx3_cam->idmac_channel[0];
 
 	/* Do buffers have to be re-allocated or channel re-configured? */
-	return ichan && rect->width * rect->height > icd->width * icd->height;
+	return ichan && rect->width * rect->height >
+		icd->rect_current.width * icd->rect_current.height;
 }
 
 static int test_platform_param(struct mx3_camera_dev *mx3_cam,
@@ -808,8 +809,8 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 4c550f91ca24..3417398e1b50 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -1120,9 +1120,9 @@ static int ov772x_probe(struct i2c_client *client,
 
 	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
-	icd->ops        = &ov772x_ops;
-	icd->width_max  = MAX_WIDTH;
-	icd->height_max = MAX_HEIGHT;
+	icd->ops		= &ov772x_ops;
+	icd->rect_max.width	= MAX_WIDTH;
+	icd->rect_max.height	= MAX_HEIGHT;
 
 	ret = ov772x_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index bdc0d85c461b..0e4daaad2f4d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -239,7 +239,7 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 
 	dev_dbg(&icd->dev, "count=%d, size=%d\n", *count, *size);
 
-	*size = roundup(icd->width * icd->height *
+	*size = roundup(icd->rect_current.width * icd->rect_current.height *
 			((icd->current_fmt->depth + 7) >> 3), 8);
 
 	if (0 == *count)
@@ -443,12 +443,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -1118,7 +1118,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	if (cicr0 & CICR0_ENB)
 		__raw_writel(cicr0 & ~CICR0_ENB, pcdev->base + CICR0);
 
-	cicr1 = CICR1_PPL_VAL(icd->width - 1) | bpp | dw;
+	cicr1 = CICR1_PPL_VAL(icd->rect_current.width - 1) | bpp | dw;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_YUV422P:
@@ -1147,7 +1147,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	}
 
 	cicr2 = 0;
-	cicr3 = CICR3_LPF_VAL(icd->height - 1) |
+	cicr3 = CICR3_LPF_VAL(icd->rect_current.height - 1) |
 		CICR3_BFW_VAL(min((unsigned short)255, icd->y_skip_top));
 	cicr4 |= pcdev->mclk_divisor;
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 16fa56efaf99..4c4b60c32263 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -146,7 +146,8 @@ static int sh_mobile_ceu_videobuf_setup(struct videobuf_queue *vq,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int bytes_per_pixel = (icd->current_fmt->depth + 7) >> 3;
 
-	*size = PAGE_ALIGN(icd->width * icd->height * bytes_per_pixel);
+	*size = PAGE_ALIGN(icd->rect_current.width * icd->rect_current.height *
+			   bytes_per_pixel);
 
 	if (0 == *count)
 		*count = 2;
@@ -205,7 +206,7 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	phys_addr_top = videobuf_to_dma_contig(pcdev->active);
 	ceu_write(pcdev, CDAYR, phys_addr_top);
 	if (pcdev->is_interlaced) {
-		phys_addr_bottom = phys_addr_top + icd->width;
+		phys_addr_bottom = phys_addr_top + icd->rect_current.width;
 		ceu_write(pcdev, CDBYR, phys_addr_bottom);
 	}
 
@@ -214,10 +215,12 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		phys_addr_top += icd->width * icd->height;
+		phys_addr_top += icd->rect_current.width *
+			icd->rect_current.height;
 		ceu_write(pcdev, CDACR, phys_addr_top);
 		if (pcdev->is_interlaced) {
-			phys_addr_bottom = phys_addr_top + icd->width;
+			phys_addr_bottom = phys_addr_top +
+				icd->rect_current.width;
 			ceu_write(pcdev, CDBCR, phys_addr_bottom);
 		}
 	}
@@ -251,12 +254,12 @@ static int sh_mobile_ceu_videobuf_prepare(struct videobuf_queue *vq,
 	BUG_ON(NULL == icd->current_fmt);
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->width ||
-	    vb->height	!= icd->height ||
+	    vb->width	!= icd->rect_current.width ||
+	    vb->height	!= icd->rect_current.height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->width;
-		vb->height	= icd->height;
+		vb->width	= icd->rect_current.width;
+		vb->height	= icd->rect_current.height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -475,17 +478,18 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	mdelay(1);
 
 	if (yuv_mode) {
-		width = icd->width * 2;
+		width = icd->rect_current.width * 2;
 		width = buswidth == 16 ? width / 2 : width;
-		cfszr_width = cdwdr_width = icd->width;
+		cfszr_width = cdwdr_width = icd->rect_current.width;
 	} else {
-		width = icd->width * ((icd->current_fmt->depth + 7) >> 3);
+		width = icd->rect_current.width *
+			((icd->current_fmt->depth + 7) >> 3);
 		width = buswidth == 16 ? width / 2 : width;
 		cfszr_width = buswidth == 8 ? width / 2 : width;
 		cdwdr_width = buswidth == 16 ? width * 2 : width;
 	}
 
-	height = icd->height;
+	height = icd->rect_current.height;
 	if (pcdev->is_interlaced) {
 		height /= 2;
 		cdwdr_width *= 2;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index b3fb8f290ad5..5028023b72aa 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -288,17 +288,17 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return -EINVAL;
 	}
 
-	icd->width		= pix->width;
-	icd->height		= pix->height;
-	icf->vb_vidq.field	=
-		icd->field	= pix->field;
+	icd->rect_current.width		= pix->width;
+	icd->rect_current.height	= pix->height;
+	icf->vb_vidq.field		=
+		icd->field		= pix->field;
 
 	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		dev_warn(&icd->dev, "Attention! Wrong buf-type %d\n",
 			 f->type);
 
 	dev_dbg(&icd->dev, "set width: %d height: %d\n",
-		icd->width, icd->height);
+		icd->rect_current.width, icd->rect_current.height);
 
 	/* set physical bus parameters */
 	return ici->ops->set_bus_param(icd, pix->pixelformat);
@@ -341,8 +341,8 @@ static int soc_camera_open(struct file *file)
 		struct v4l2_format f = {
 			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			.fmt.pix = {
-				.width		= icd->width,
-				.height		= icd->height,
+				.width		= icd->rect_current.width,
+				.height		= icd->rect_current.height,
 				.field		= icd->field,
 			},
 		};
@@ -553,8 +553,8 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	pix->width		= icd->width;
-	pix->height		= icd->height;
+	pix->width		= icd->rect_current.width;
+	pix->height		= icd->rect_current.height;
 	pix->field		= icf->vb_vidq.field;
 	pix->pixelformat	= icd->current_fmt->fourcc;
 	pix->bytesperline	= pix->width *
@@ -718,12 +718,9 @@ static int soc_camera_cropcap(struct file *file, void *fh,
 	struct soc_camera_device *icd = icf->icd;
 
 	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->bounds.left			= icd->x_min;
-	a->bounds.top			= icd->y_min;
-	a->bounds.width			= icd->width_max;
-	a->bounds.height		= icd->height_max;
-	a->defrect.left			= icd->x_min;
-	a->defrect.top			= icd->y_min;
+	a->bounds			= icd->rect_max;
+	a->defrect.left			= icd->rect_max.left;
+	a->defrect.top			= icd->rect_max.top;
 	a->defrect.width		= DEFAULT_WIDTH;
 	a->defrect.height		= DEFAULT_HEIGHT;
 	a->pixelaspect.numerator	= 1;
@@ -738,11 +735,8 @@ static int soc_camera_g_crop(struct file *file, void *fh,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 
-	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->c.left	= icd->x_current;
-	a->c.top	= icd->y_current;
-	a->c.width	= icd->width;
-	a->c.height	= icd->height;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->c	= icd->rect_current;
 
 	return 0;
 }
@@ -761,13 +755,29 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	/* Cropping is allowed during a running capture, guard consistency */
 	mutex_lock(&icf->vb_vidq.vb_lock);
 
+	if (a->c.width > icd->rect_max.width)
+		a->c.width = icd->rect_max.width;
+
+	if (a->c.width < icd->width_min)
+		a->c.width = icd->width_min;
+
+	if (a->c.height > icd->rect_max.height)
+		a->c.height = icd->rect_max.height;
+
+	if (a->c.height < icd->height_min)
+		a->c.height = icd->height_min;
+
+	if (a->c.width + a->c.left > icd->rect_max.width + icd->rect_max.left)
+		a->c.left = icd->rect_max.width + icd->rect_max.left -
+			a->c.width;
+
+	if (a->c.height + a->c.top > icd->rect_max.height + icd->rect_max.top)
+		a->c.top = icd->rect_max.height + icd->rect_max.top -
+			a->c.height;
+
 	ret = ici->ops->set_crop(icd, &a->c);
-	if (!ret) {
-		icd->width	= a->c.width;
-		icd->height	= a->c.height;
-		icd->x_current	= a->c.left;
-		icd->y_current	= a->c.top;
-	}
+	if (!ret)
+		icd->rect_current = a->c;
 
 	mutex_unlock(&icf->vb_vidq.vb_lock);
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 8168cf470eb3..9e406c113aa4 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -127,12 +127,12 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, priv);
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
-	icd->width_min	= 0;
-	icd->width_max	= p->format.width;
-	icd->height_min	= 0;
-	icd->height_max	= p->format.height;
-	icd->y_skip_top	= 0;
-	icd->ops	= &soc_camera_platform_ops;
+	icd->width_min		= 0;
+	icd->rect_max.width	= p->format.width;
+	icd->height_min		= 0;
+	icd->rect_max.height	= p->format.height;
+	icd->y_skip_top		= 0;
+	icd->ops		= &soc_camera_platform_ops;
 
 	ici = to_soc_camera_host(icd->dev.parent);
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index a006df1d28ec..7199e0f71b2a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -715,8 +715,8 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
-		.left	= icd->x_current,
-		.top	= icd->y_current,
+		.left	= icd->rect_current.left,
+		.top	= icd->rect_current.top,
 		.width	= pix->width,
 		.height	= pix->height,
 	};
@@ -840,6 +840,19 @@ static struct v4l2_subdev_ops tw9910_subdev_ops = {
  * i2c_driver function
  */
 
+static void limit_to_scale(struct soc_camera_device *icd,
+			   const struct tw9910_scale_ctrl *scale)
+{
+	if (scale->width > icd->rect_max.width)
+		icd->rect_max.width  = scale->width;
+	if (scale->width < icd->width_min)
+		icd->width_min = scale->width;
+	if (scale->height > icd->rect_max.height)
+		icd->rect_max.height = scale->height;
+	if (scale->height < icd->height_min)
+		icd->height_min = scale->height;
+}
+
 static int tw9910_probe(struct i2c_client *client,
 			const struct i2c_device_id *did)
 
@@ -885,25 +898,18 @@ static int tw9910_probe(struct i2c_client *client,
 	/*
 	 * set width and height
 	 */
-	icd->width_max  = tw9910_ntsc_scales[0].width; /* set default */
+	icd->rect_max.width  = tw9910_ntsc_scales[0].width; /* set default */
 	icd->width_min  = tw9910_ntsc_scales[0].width;
-	icd->height_max = tw9910_ntsc_scales[0].height;
+	icd->rect_max.height = tw9910_ntsc_scales[0].height;
 	icd->height_min = tw9910_ntsc_scales[0].height;
 
 	scale = tw9910_ntsc_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++) {
-		icd->width_max  = max(scale[i].width,  icd->width_max);
-		icd->width_min  = min(scale[i].width,  icd->width_min);
-		icd->height_max = max(scale[i].height, icd->height_max);
-		icd->height_min = min(scale[i].height, icd->height_min);
-	}
+	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++)
+		limit_to_scale(icd, scale + i);
+
 	scale = tw9910_pal_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++) {
-		icd->width_max  = max(scale[i].width,  icd->width_max);
-		icd->width_min  = min(scale[i].width,  icd->width_min);
-		icd->height_max = max(scale[i].height, icd->height_max);
-		icd->height_min = min(scale[i].height, icd->height_min);
-	}
+	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++)
+		limit_to_scale(icd, scale + i);
 
 	ret = tw9910_video_probe(icd, client);
 	if (ret) {
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2d116bbbcce1..f623c010a539 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -22,16 +22,10 @@ struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
 	struct device *pdev;		/* Platform device */
-	unsigned short width;		/* Current window */
-	unsigned short height;		/* sizes */
-	unsigned short x_min;		/* Camera capabilities */
-	unsigned short y_min;
-	unsigned short x_current;	/* Current window location */
-	unsigned short y_current;
+	struct v4l2_rect rect_current;	/* Current window */
+	struct v4l2_rect rect_max;	/* Maximum window */
 	unsigned short width_min;
-	unsigned short width_max;
 	unsigned short height_min;
-	unsigned short height_max;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
 	unsigned short gain;
 	unsigned short exposure;
-- 
cgit v1.2.3


From fa48984e36ee73e964eeb994a45de6525114e871 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:43 -0300
Subject: V4L/DVB (12519): soc-camera: put pixel format initialisation back in
 probe, add .put_formats()

The move of format translation initialisation into soc_camera_open() was
temporary for the soc-camera as platform driver intermediate step, put it back
into soc_camera_probe(). Also add a .put_formats() method to
soc_camera_host_ops to free any resources host driver might have allocated in
.get_formats().

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 50 ++++++++++++++++++++++++++++------------
 include/media/soc_camera.h       |  7 ++++++
 2 files changed, 42 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 5028023b72aa..aa6614b60d6f 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -211,7 +211,7 @@ static int soc_camera_dqbuf(struct file *file, void *priv,
 static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	int i, fmts = 0;
+	int i, fmts = 0, ret;
 
 	if (!ici->ops->get_formats)
 		/*
@@ -224,8 +224,12 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 		 * First pass - only count formats this host-sensor
 		 * configuration can provide
 		 */
-		for (i = 0; i < icd->num_formats; i++)
-			fmts += ici->ops->get_formats(icd, i, NULL);
+		for (i = 0; i < icd->num_formats; i++) {
+			ret = ici->ops->get_formats(icd, i, NULL);
+			if (ret < 0)
+				return ret;
+			fmts += ret;
+		}
 
 	if (!fmts)
 		return -ENXIO;
@@ -247,19 +251,32 @@ static int soc_camera_init_user_formats(struct soc_camera_device *icd)
 			icd->user_formats[i].cam_fmt = icd->formats + i;
 			icd->user_formats[i].buswidth = icd->formats[i].depth;
 		} else {
-			fmts += ici->ops->get_formats(icd, i,
-						      &icd->user_formats[fmts]);
+			ret = ici->ops->get_formats(icd, i,
+						    &icd->user_formats[fmts]);
+			if (ret < 0)
+				goto egfmt;
+			fmts += ret;
 		}
 
 	icd->current_fmt = icd->user_formats[0].host_fmt;
 
 	return 0;
+
+egfmt:
+	icd->num_user_formats = 0;
+	vfree(icd->user_formats);
+	return ret;
 }
 
 /* Always entered with .video_lock held */
 static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 {
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+
+	if (ici->ops->put_formats)
+		ici->ops->put_formats(icd);
 	icd->current_fmt = NULL;
+	icd->num_user_formats = 0;
 	vfree(icd->user_formats);
 	icd->user_formats = NULL;
 }
@@ -344,16 +361,11 @@ static int soc_camera_open(struct file *file)
 				.width		= icd->rect_current.width,
 				.height		= icd->rect_current.height,
 				.field		= icd->field,
+				.pixelformat	= icd->current_fmt->fourcc,
+				.colorspace	= icd->current_fmt->colorspace,
 			},
 		};
 
-		ret = soc_camera_init_user_formats(icd);
-		if (ret < 0)
-			goto eiufmt;
-
-		f.fmt.pix.pixelformat	= icd->current_fmt->fourcc;
-		f.fmt.pix.colorspace	= icd->current_fmt->colorspace;
-
 		if (icl->power) {
 			ret = icl->power(icd->pdev, 1);
 			if (ret < 0)
@@ -404,8 +416,6 @@ eiciadd:
 	if (icl->power)
 		icl->power(icd->pdev, 0);
 epower:
-	soc_camera_free_user_formats(icd);
-eiufmt:
 	icd->use_count--;
 	mutex_unlock(&icd->video_lock);
 	module_put(ici->ops->owner);
@@ -431,7 +441,6 @@ static int soc_camera_close(struct file *file)
 		ici->ops->remove(icd);
 		if (icl->power)
 			icl->power(icd->pdev, 0);
-		soc_camera_free_user_formats(icd);
 	}
 
 	mutex_unlock(&icd->video_lock);
@@ -954,6 +963,14 @@ static int soc_camera_probe(struct device *dev)
 		}
 	}
 
+	/* At this point client .probe() should have run already */
+	ret = soc_camera_init_user_formats(icd);
+	if (ret < 0)
+		goto eiufmt;
+
+	icd->rect_current = icd->rect_max;
+	icd->field = V4L2_FIELD_ANY;
+
 	/* ..._video_start() will create a device node, so we have to protect */
 	mutex_lock(&icd->video_lock);
 
@@ -978,6 +995,8 @@ static int soc_camera_probe(struct device *dev)
 
 evidstart:
 	mutex_unlock(&icd->video_lock);
+	soc_camera_free_user_formats(icd);
+eiufmt:
 	if (icl->board_info) {
 		soc_camera_free_i2c(icd);
 	} else {
@@ -1023,6 +1042,7 @@ static int soc_camera_remove(struct device *dev)
 			module_put(drv->owner);
 		}
 	}
+	soc_camera_free_user_formats(icd);
 
 	return 0;
 }
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index f623c010a539..2b7a8c663605 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -67,8 +67,15 @@ struct soc_camera_host_ops {
 	void (*remove)(struct soc_camera_device *);
 	int (*suspend)(struct soc_camera_device *, pm_message_t);
 	int (*resume)(struct soc_camera_device *);
+	/*
+	 * .get_formats() is called for each client device format, but
+	 * .put_formats() is only called once. Further, if any of the calls to
+	 * .get_formats() fail, .put_formats() will not be called at all, the
+	 * failing .get_formats() must then clean up internally.
+	 */
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
+	void (*put_formats)(struct soc_camera_device *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
-- 
cgit v1.2.3


From a12222a73e7a9efd927eb99d1dec1cedc9887e0a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:51 -0300
Subject: V4L/DVB (12521): soc-camera: use .s_std() from struct
 v4l2_subdev_core_ops

Remove .set_std() method from struct soc_camera_ops, use .s_std() from
struct v4l2_subdev_core_ops instead.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/soc_camera.c | 7 ++-----
 include/media/soc_camera.h       | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index aa6614b60d6f..44a94dc934f8 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -152,12 +152,9 @@ static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	int ret = 0;
-
-	if (icd->ops->set_std)
-		ret = icd->ops->set_std(icd, a);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	return ret;
+	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_std, *a);
 }
 
 static int soc_camera_reqbufs(struct file *file, void *priv,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 2b7a8c663605..7c44d4016561 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -190,7 +190,6 @@ struct soc_camera_ops {
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
 			   struct v4l2_dbg_chip_ident *);
-	int (*set_std)(struct soc_camera_device *, v4l2_std_id *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
-- 
cgit v1.2.3


From 08590b9613f7f624fe3a052586eea2dbb3584b38 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:54 -0300
Subject: V4L/DVB (12529): soc-camera: switch to s_crop v4l2-subdev video
 operation

Remove set_crop soc-camera device method and switch to s_crop from v4l2-subdev
video operations. Also extend non-i2c drivers to also hold a pointer to their
v4l2-subdev instance in control device driver-data, i.e., in
dev_get_drvdata((struct device *)to_soc_camera_control(icd))

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c              |  23 +++---
 drivers/media/video/mt9m111.c              |   9 +--
 drivers/media/video/mt9t031.c              |   9 +--
 drivers/media/video/mt9v022.c              |  23 +++---
 drivers/media/video/mx1_camera.c           |   8 ++-
 drivers/media/video/mx3_camera.c           |   7 +-
 drivers/media/video/ov772x.c               |  19 -----
 drivers/media/video/pxa_camera.c           |   7 +-
 drivers/media/video/sh_mobile_ceu_camera.c | 110 +++++++++++++++--------------
 drivers/media/video/soc_camera.c           |   4 +-
 drivers/media/video/soc_camera_platform.c  |  28 ++++----
 drivers/media/video/tw9910.c               |  31 ++++----
 include/media/soc_camera.h                 |   3 +-
 13 files changed, 142 insertions(+), 139 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 8b36a74b1be0..6e762cd06e3f 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -194,11 +194,12 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, flags);
 }
 
-static int mt9m001_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
@@ -239,15 +240,17 @@ static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= f->fmt.pix.width,
-		.height	= f->fmt.pix.height,
+	struct v4l2_crop a = {
+		.c = {
+			.left	= icd->rect_current.left,
+			.top	= icd->rect_current.top,
+			.width	= f->fmt.pix.width,
+			.height	= f->fmt.pix.height,
+		},
 	};
 
 	/* No support for scaling so far, just crop. TODO: use skipping */
-	return mt9m001_set_crop(icd, &rect);
+	return mt9m001_s_crop(sd, &a);
 }
 
 static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -361,7 +364,6 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 static struct soc_camera_ops mt9m001_ops = {
 	.init			= mt9m001_init,
 	.release		= mt9m001_release,
-	.set_crop		= mt9m001_set_crop,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
@@ -575,6 +577,7 @@ static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
 	.s_stream	= mt9m001_s_stream,
 	.s_fmt		= mt9m001_s_fmt,
 	.try_fmt	= mt9m001_try_fmt,
+	.s_crop		= mt9m001_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9m001_subdev_ops = {
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 45101fd90ce0..bef415170186 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -395,11 +395,12 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 	return 0;
 }
 
-static int mt9m111_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9m111_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
 	dev_dbg(&icd->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
@@ -601,7 +602,6 @@ static struct soc_camera_ops mt9m111_ops = {
 	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
 	.release		= mt9m111_release,
-	.set_crop		= mt9m111_set_crop,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
@@ -908,6 +908,7 @@ static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
 	.s_fmt		= mt9m111_s_fmt,
 	.try_fmt	= mt9m111_try_fmt,
+	.s_crop		= mt9m111_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9m111_subdev_ops = {
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 125973bf08b9..3fa87be2fc6e 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -321,11 +321,12 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 	return ret < 0 ? ret : 0;
 }
 
-static int mt9t031_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9t031_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 
 	/* Make sure we don't exceed sensor limits */
 	if (rect->left + rect->width > icd->rect_max.left + icd->rect_max.width)
@@ -495,7 +496,6 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 static struct soc_camera_ops mt9t031_ops = {
 	.init			= mt9t031_init,
 	.release		= mt9t031_release,
-	.set_crop		= mt9t031_set_crop,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
@@ -689,6 +689,7 @@ static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
 	.s_stream	= mt9t031_s_stream,
 	.s_fmt		= mt9t031_s_fmt,
 	.try_fmt	= mt9t031_try_fmt,
+	.s_crop		= mt9t031_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9t031_subdev_ops = {
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index d2b0981ec1c1..e609ff51aa66 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -248,10 +248,11 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 		width_flag;
 }
 
-static int mt9v022_set_crop(struct soc_camera_device *icd,
-			    struct v4l2_rect *rect)
+static int mt9v022_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
 	/* Like in example app. Contradicts the datasheet though */
@@ -297,11 +298,13 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= pix->width,
-		.height	= pix->height,
+	struct v4l2_crop a = {
+		.c = {
+			.left	= icd->rect_current.left,
+			.top	= icd->rect_current.top,
+			.width	= pix->width,
+			.height	= pix->height,
+		},
 	};
 
 	/* The caller provides a supported format, as verified per call to
@@ -325,7 +328,7 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	}
 
 	/* No support for scaling on this camera, just crop. */
-	return mt9v022_set_crop(icd, &rect);
+	return mt9v022_s_crop(sd, &a);
 }
 
 static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -454,7 +457,6 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 
 static struct soc_camera_ops mt9v022_ops = {
 	.init			= mt9v022_init,
-	.set_crop		= mt9v022_set_crop,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
@@ -700,6 +702,7 @@ static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
 	.s_stream	= mt9v022_s_stream,
 	.s_fmt		= mt9v022_s_fmt,
 	.try_fmt	= mt9v022_try_fmt,
+	.s_crop		= mt9v022_s_crop,
 };
 
 static struct v4l2_subdev_ops mt9v022_subdev_ops = {
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 948a4714be9a..add496fca4d3 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -463,9 +463,13 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 }
 
 static int mx1_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
-	return icd->ops->set_crop(icd, rect);
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
+
+	return v4l2_subdev_call(sd, video, s_crop, a);
 }
 
 static int mx1_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index 6c3b7f9b906f..de7ebfbf0397 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -781,10 +781,13 @@ static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
 }
 
 static int mx3_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 
 	/*
 	 * We now know pixel formats and can decide upon DMA-channel(s)
@@ -798,7 +801,7 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, rect);
 
-	return icd->ops->set_crop(icd, rect);
+	return v4l2_subdev_call(sd, video, s_crop, a);
 }
 
 static int mx3_camera_set_fmt(struct soc_camera_device *icd,
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 03488f9e1c88..bbe6f2d71c15 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -955,24 +955,6 @@ ov772x_set_fmt_error:
 	return ret;
 }
 
-/* Cannot crop, just return the current geometry */
-static int ov772x_set_crop(struct soc_camera_device *icd,
-			   struct v4l2_rect *rect)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct ov772x_priv *priv = to_ov772x(client);
-
-	if (!priv->fmt || !priv->win)
-		return -EINVAL;
-
-	rect->left = 0;
-	rect->top = 0;
-	rect->width = priv->win->width;
-	rect->height = priv->win->height;
-
-	return 0;
-}
-
 static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
@@ -1060,7 +1042,6 @@ static int ov772x_video_probe(struct soc_camera_device *icd,
 }
 
 static struct soc_camera_ops ov772x_ops = {
-	.set_crop		= ov772x_set_crop,
 	.set_bus_param		= ov772x_set_bus_param,
 	.query_bus_param	= ov772x_query_bus_param,
 	.controls		= ov772x_controls,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 0e4daaad2f4d..c38ce84b944d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1281,10 +1281,13 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 }
 
 static int pxa_camera_set_crop(struct soc_camera_device *icd,
-			       struct v4l2_rect *rect)
+			       struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 	struct soc_camera_sense sense = {
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
@@ -1295,7 +1298,7 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	if (pcdev->platform_flags & PXA_CAMERA_PCLK_EN)
 		icd->sense = &sense;
 
-	ret = icd->ops->set_crop(icd, rect);
+	ret = v4l2_subdev_call(sd, video, s_crop, a);
 
 	icd->sense = NULL;
 
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 499d1a235fd7..726cf0e4dc23 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -846,12 +846,16 @@ static bool is_inside(struct v4l2_rect *r1, struct v4l2_rect *r2)
  * 3. if (2) failed, try to request the maximum image
  */
 static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
-				  struct v4l2_rect *rect)
+				  struct v4l2_crop *a)
 {
+	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_rect cam_rect, target, cam_max;
+	struct v4l2_crop cam_crop;
+	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct device *control = to_soc_camera_control(icd);
+	struct v4l2_subdev *sd = dev_get_drvdata(control);
 	unsigned int hscale = pcdev->cflcr & 0xffff;
 	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
 	unsigned short width, height;
@@ -859,80 +863,80 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	int ret;
 
 	/* Scale back up into client units */
-	cam_rect.left	= size_src(rect->left, hscale);
-	cam_rect.width	= size_src(rect->width, hscale);
-	cam_rect.top	= size_src(rect->top, vscale);
-	cam_rect.height	= size_src(rect->height, vscale);
+	cam_rect->left	= size_src(rect->left, hscale);
+	cam_rect->width	= size_src(rect->width, hscale);
+	cam_rect->top	= size_src(rect->top, vscale);
+	cam_rect->height	= size_src(rect->height, vscale);
 
-	target = cam_rect;
+	target = *cam_rect;
 
 	capsr = capture_save_reset(pcdev);
 	dev_dbg(&icd->dev, "CAPSR 0x%x, CFLCR 0x%x\n", capsr, pcdev->cflcr);
 
 	/* First attempt - see if the client can deliver a perfect result */
-	ret = icd->ops->set_crop(icd, &cam_rect);
+	ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 	if (!ret && !memcmp(&target, &cam_rect, sizeof(target))) {
 		dev_dbg(&icd->dev, "Camera S_CROP successful for %ux%u@%u:%u\n",
-			cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
+			cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
 		goto ceu_set_rect;
 	}
 
 	/* Try to fix cropping, that camera hasn't managed to do */
 	dev_dbg(&icd->dev, "Fix camera S_CROP %d for %ux%u@%u:%u"
 		" to %ux%u@%u:%u\n",
-		ret, cam_rect.width, cam_rect.height,
-		cam_rect.left, cam_rect.top,
+		ret, cam_rect->width, cam_rect->height,
+		cam_rect->left, cam_rect->top,
 		target.width, target.height, target.left, target.top);
 
 	/*
 	 * Popular special case - some cameras can only handle fixed sizes like
 	 * QVGA, VGA,... Take care to avoid infinite loop.
 	 */
-	width = max(cam_rect.width, 1);
-	height = max(cam_rect.height, 1);
+	width = max(cam_rect->width, 1);
+	height = max(cam_rect->height, 1);
 	cam_max.width = size_src(icd->rect_max.width, hscale);
 	cam_max.left = size_src(icd->rect_max.left, hscale);
 	cam_max.height = size_src(icd->rect_max.height, vscale);
 	cam_max.top = size_src(icd->rect_max.top, vscale);
-	while (!ret && (is_smaller(&cam_rect, &target) ||
-			is_inside(&cam_rect, &target)) &&
+	while (!ret && (is_smaller(cam_rect, &target) ||
+			is_inside(cam_rect, &target)) &&
 	       cam_max.width >= width && cam_max.height >= height) {
 
 		width *= 2;
 		height *= 2;
-		cam_rect.width = width;
-		cam_rect.height = height;
+		cam_rect->width = width;
+		cam_rect->height = height;
 
 		/* We do not know what the camera is capable of, play safe */
-		if (cam_rect.left > target.left)
-			cam_rect.left = cam_max.left;
+		if (cam_rect->left > target.left)
+			cam_rect->left = cam_max.left;
 
-		if (cam_rect.left + cam_rect.width < target.left + target.width)
-			cam_rect.width = target.left + target.width -
-				cam_rect.left;
+		if (cam_rect->left + cam_rect->width < target.left + target.width)
+			cam_rect->width = target.left + target.width -
+				cam_rect->left;
 
-		if (cam_rect.top > target.top)
-			cam_rect.top = cam_max.top;
+		if (cam_rect->top > target.top)
+			cam_rect->top = cam_max.top;
 
-		if (cam_rect.top + cam_rect.height < target.top + target.height)
-			cam_rect.height = target.top + target.height -
-				cam_rect.top;
+		if (cam_rect->top + cam_rect->height < target.top + target.height)
+			cam_rect->height = target.top + target.height -
+				cam_rect->top;
 
-		if (cam_rect.width + cam_rect.left >
+		if (cam_rect->width + cam_rect->left >
 		    cam_max.width + cam_max.left)
-			cam_rect.left = max(cam_max.width + cam_max.left -
-					    cam_rect.width, cam_max.left);
+			cam_rect->left = max(cam_max.width + cam_max.left -
+					     cam_rect->width, cam_max.left);
 
-		if (cam_rect.height + cam_rect.top >
+		if (cam_rect->height + cam_rect->top >
 		    cam_max.height + cam_max.top)
-			cam_rect.top = max(cam_max.height + cam_max.top -
-					   cam_rect.height, cam_max.top);
+			cam_rect->top = max(cam_max.height + cam_max.top -
+					    cam_rect->height, cam_max.top);
 
-		ret = icd->ops->set_crop(icd, &cam_rect);
+		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 		dev_dbg(&icd->dev, "Camera S_CROP %d for %ux%u@%u:%u\n",
-			ret, cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
+			ret, cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
 	}
 
 	/*
@@ -941,30 +945,30 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	 */
 	if ((ret < 0 && (is_smaller(&icd->rect_current, rect) ||
 			 is_inside(&icd->rect_current, rect))) ||
-	    is_smaller(&cam_rect, &target) || is_inside(&cam_rect, &target)) {
+	    is_smaller(cam_rect, &target) || is_inside(cam_rect, &target)) {
 		/*
 		 * The camera failed to configure a suitable cropping,
 		 * we cannot use the current rectangle, set to max
 		 */
-		cam_rect = cam_max;
-		ret = icd->ops->set_crop(icd, &cam_rect);
+		*cam_rect = cam_max;
+		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
 		dev_dbg(&icd->dev, "Camera S_CROP %d for max %ux%u@%u:%u\n",
-			ret, cam_rect.width, cam_rect.height,
-			cam_rect.left, cam_rect.top);
-		if (ret < 0)
+			ret, cam_rect->width, cam_rect->height,
+			cam_rect->left, cam_rect->top);
+		if (ret < 0 && ret != -ENOIOCTLCMD)
 			/* All failed, hopefully resume current capture */
 			goto resume_capture;
 
 		/* Finally, adjust the target rectangle */
-		if (target.width > cam_rect.width)
-			target.width = cam_rect.width;
-		if (target.height > cam_rect.height)
-			target.height = cam_rect.height;
-		if (target.left + target.width > cam_rect.left + cam_rect.width)
-			target.left = cam_rect.left + cam_rect.width -
+		if (target.width > cam_rect->width)
+			target.width = cam_rect->width;
+		if (target.height > cam_rect->height)
+			target.height = cam_rect->height;
+		if (target.left + target.width > cam_rect->left + cam_rect->width)
+			target.left = cam_rect->left + cam_rect->width -
 				target.width;
-		if (target.top + target.height > cam_rect.top + cam_rect.height)
-			target.top = cam_rect.top + cam_rect.height -
+		if (target.top + target.height > cam_rect->top + cam_rect->height)
+			target.top = cam_rect->top + cam_rect->height -
 				target.height;
 	}
 
@@ -978,14 +982,14 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	 */
 	dev_dbg(&icd->dev,
 		"SH S_CROP from %ux%u@%u:%u to %ux%u@%u:%u, scale to %ux%u@%u:%u\n",
-		cam_rect.width, cam_rect.height, cam_rect.left, cam_rect.top,
+		cam_rect->width, cam_rect->height, cam_rect->left, cam_rect->top,
 		target.width, target.height, target.left, target.top,
 		rect->width, rect->height, rect->left, rect->top);
 
 	ret = 0;
 
 ceu_set_rect:
-	cam->camera_rect = cam_rect;
+	cam->camera_rect = *cam_rect;
 
 	rect->width	= size_dst(target.width, hscale);
 	rect->left	= size_dst(target.left, hscale);
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 21a8aa586da5..d9ccc2866592 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -797,7 +797,7 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 		rect.top = icd->rect_max.height + icd->rect_max.top -
 			rect.height;
 
-	ret = ici->ops->set_crop(icd, &rect);
+	ret = ici->ops->set_crop(icd, a);
 	if (!ret)
 		icd->rect_current = rect;
 
@@ -970,7 +970,7 @@ static int soc_camera_probe(struct device *dev)
 
 		/* FIXME: this is racy, have to use driver-binding notification */
 		control = to_soc_camera_control(icd);
-		if (!control || !control->driver ||
+		if (!control || !control->driver || !dev_get_drvdata(control) ||
 		    !try_module_get(control->driver->owner)) {
 			icl->del_device(icl);
 			goto enodrv;
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 9e406c113aa4..aec2cadbd2ee 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -25,10 +25,15 @@ struct soc_camera_platform_priv {
 	struct soc_camera_data_format format;
 };
 
-static struct soc_camera_platform_info *
-soc_camera_platform_get_info(struct soc_camera_device *icd)
+static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev)
 {
-	struct platform_device *pdev = to_platform_device(dev_get_drvdata(&icd->dev));
+	struct v4l2_subdev *subdev = platform_get_drvdata(pdev);
+	return container_of(subdev, struct soc_camera_platform_priv, subdev);
+}
+
+static struct soc_camera_platform_info *get_info(struct soc_camera_device *icd)
+{
+	struct platform_device *pdev = to_platform_device(to_soc_camera_control(icd));
 	return pdev->dev.platform_data;
 }
 
@@ -47,16 +52,10 @@ static int soc_camera_platform_set_bus_param(struct soc_camera_device *icd,
 static unsigned long
 soc_camera_platform_query_bus_param(struct soc_camera_device *icd)
 {
-	struct soc_camera_platform_info *p = soc_camera_platform_get_info(icd);
+	struct soc_camera_platform_info *p = get_info(icd);
 	return p->bus_param;
 }
 
-static int soc_camera_platform_set_crop(struct soc_camera_device *icd,
-					struct v4l2_rect *rect)
-{
-	return 0;
-}
-
 static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 				       struct v4l2_format *f)
 {
@@ -71,7 +70,7 @@ static int soc_camera_platform_try_fmt(struct v4l2_subdev *sd,
 static void soc_camera_platform_video_probe(struct soc_camera_device *icd,
 					    struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_priv *priv = get_priv(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 
 	priv->format.name = p->format_name;
@@ -96,7 +95,6 @@ static struct v4l2_subdev_ops platform_subdev_ops = {
 };
 
 static struct soc_camera_ops soc_camera_platform_ops = {
-	.set_crop		= soc_camera_platform_set_crop,
 	.set_bus_param		= soc_camera_platform_set_bus_param,
 	.query_bus_param	= soc_camera_platform_query_bus_param,
 };
@@ -124,7 +122,9 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 
 	icd = to_soc_camera_dev(p->dev);
 
-	platform_set_drvdata(pdev, priv);
+	/* soc-camera convention: control's drvdata points to the subdev */
+	platform_set_drvdata(pdev, &priv->subdev);
+	/* Set the control device reference */
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
 	icd->width_min		= 0;
@@ -158,7 +158,7 @@ evdrs:
 
 static int soc_camera_platform_remove(struct platform_device *pdev)
 {
-	struct soc_camera_platform_priv *priv = platform_get_drvdata(pdev);
+	struct soc_camera_platform_priv *priv = get_priv(pdev);
 	struct soc_camera_platform_info *p = pdev->dev.platform_data;
 	struct soc_camera_device *icd = to_soc_camera_dev(p->dev);
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index 735d0bd4bb10..b6b15468b40a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -616,11 +616,12 @@ static int tw9910_s_register(struct v4l2_subdev *sd,
 }
 #endif
 
-static int tw9910_set_crop(struct soc_camera_device *icd,
-			   struct v4l2_rect *rect)
+static int tw9910_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct v4l2_rect *rect = &a->c;
+	struct i2c_client *client = sd->priv;
 	struct tw9910_priv *priv = to_tw9910(client);
+	struct soc_camera_device *icd = client->dev.platform_data;
 	int                 ret  = -EINVAL;
 	u8                  val;
 
@@ -716,15 +717,15 @@ tw9910_set_fmt_error:
 
 static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
-	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	/* See tw9910_set_crop() - no proper cropping support */
-	struct v4l2_rect rect = {
-		.left	= 0,
-		.top	= 0,
-		.width	= pix->width,
-		.height	= pix->height,
+	/* See tw9910_s_crop() - no proper cropping support */
+	struct v4l2_crop a = {
+		.c = {
+			.left	= 0,
+			.top	= 0,
+			.width	= pix->width,
+			.height	= pix->height,
+		},
 	};
 	int i, ret;
 
@@ -738,10 +739,10 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	if (i == ARRAY_SIZE(tw9910_color_fmt))
 		return -EINVAL;
 
-	ret = tw9910_set_crop(icd, &rect);
+	ret = tw9910_s_crop(sd, &a);
 	if (!ret) {
-		pix->width = rect.width;
-		pix->height = rect.height;
+		pix->width = a.c.width;
+		pix->height = a.c.height;
 	}
 	return ret;
 }
@@ -821,7 +822,6 @@ static int tw9910_video_probe(struct soc_camera_device *icd,
 }
 
 static struct soc_camera_ops tw9910_ops = {
-	.set_crop		= tw9910_set_crop,
 	.set_bus_param		= tw9910_set_bus_param,
 	.query_bus_param	= tw9910_query_bus_param,
 	.enum_input		= tw9910_enum_input,
@@ -840,6 +840,7 @@ static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
 	.s_stream	= tw9910_s_stream,
 	.s_fmt		= tw9910_s_fmt,
 	.try_fmt	= tw9910_try_fmt,
+	.s_crop		= tw9910_s_crop,
 };
 
 static struct v4l2_subdev_ops tw9910_subdev_ops = {
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 7c44d4016561..0bad8f1d7e8d 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -76,7 +76,7 @@ struct soc_camera_host_ops {
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
 	void (*put_formats)(struct soc_camera_device *);
-	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
+	int (*set_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	void (*init_videobuf)(struct videobuf_queue *,
@@ -185,7 +185,6 @@ struct soc_camera_ops {
 	int (*resume)(struct soc_camera_device *);
 	int (*init)(struct soc_camera_device *);
 	int (*release)(struct soc_camera_device *);
-	int (*set_crop)(struct soc_camera_device *, struct v4l2_rect *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
 	int (*get_chip_id)(struct soc_camera_device *,
-- 
cgit v1.2.3


From c9c1f1c0dbe90b82939917fdc3e4c9ccad42342d Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:46:59 -0300
Subject: V4L/DVB (12530): soc-camera: switch to using v4l2_subdev_call()

Use v4l2_subdev_call() instead of v4l2_device_call_until_err() in all host
drivers and in soc-camera core.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mx1_camera.c           | 12 +++++-------
 drivers/media/video/mx3_camera.c           | 10 +++++-----
 drivers/media/video/pxa_camera.c           |  9 +++++----
 drivers/media/video/sh_mobile_ceu_camera.c | 17 +++++++----------
 drivers/media/video/soc_camera.c           | 30 ++++++++++++++++--------------
 include/media/soc_camera.h                 | 14 ++++++++++----
 6 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index add496fca4d3..ed7856bdad48 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -465,9 +465,7 @@ static void mx1_camera_remove_device(struct soc_camera_device *icd)
 static int mx1_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	return v4l2_subdev_call(sd, video, s_crop, a);
 }
@@ -539,7 +537,7 @@ static int mx1_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	int ret;
@@ -550,7 +548,7 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 		return -EINVAL;
 	}
 
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -562,11 +560,11 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 static int mx1_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	/* TODO: limit to mx1 hardware capabilities */
 
 	/* limit to sensor capabilities */
-	return v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	return v4l2_subdev_call(sd, video, try_fmt, f);
 }
 
 static int mx1_camera_reqbufs(struct soc_camera_file *icf,
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index de7ebfbf0397..f7888f30da51 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -786,8 +786,7 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	/*
 	 * We now know pixel formats and can decide upon DMA-channel(s)
@@ -809,6 +808,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_rect rect = {
@@ -837,7 +837,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 
 	configure_geometry(mx3_cam, &rect);
 
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
 		icd->buswidth = xlate->buswidth;
 		icd->current_fmt = xlate->host_fmt;
@@ -849,7 +849,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	__u32 pixfmt = pix->pixelformat;
@@ -875,7 +875,7 @@ static int mx3_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index c38ce84b944d..4bc2a4f81f79 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -1286,8 +1286,7 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	struct v4l2_rect *rect = &a->c;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	struct soc_camera_sense sense = {
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
@@ -1323,6 +1322,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_data_format *cam_fmt = NULL;
 	const struct soc_camera_format_xlate *xlate = NULL;
 	struct soc_camera_sense sense = {
@@ -1346,7 +1346,7 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 		icd->sense = &sense;
 
 	cam_f.fmt.pix.pixelformat = cam_fmt->fourcc;
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 
 	icd->sense = NULL;
 
@@ -1375,6 +1375,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 			      struct v4l2_format *f)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	__u32 pixfmt = pix->pixelformat;
@@ -1404,7 +1405,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	/* camera has to see its format, but the user the original one */
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, 0, video, try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = xlate->host_fmt->fourcc;
 
 	field = pix->field;
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 726cf0e4dc23..28c3affe8828 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -854,8 +854,7 @@ static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
 	struct v4l2_crop cam_crop;
 	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct device *control = to_soc_camera_control(icd);
-	struct v4l2_subdev *sd = dev_get_drvdata(control);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	unsigned int hscale = pcdev->cflcr & 0xffff;
 	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
 	unsigned short width, height;
@@ -1016,6 +1015,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	__u32 pixfmt = pix->pixelformat;
 	const struct soc_camera_format_xlate *xlate;
 	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
@@ -1042,7 +1042,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	}
 
 	pix->pixelformat = xlate->cam_fmt->fourcc;
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_fmt, f);
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	pix->pixelformat = pixfmt;
 	dev_dbg(&icd->dev, "Camera %d fmt %ux%u, requested %ux%u, max %ux%u\n",
 		ret, pix->width, pix->height, width, height,
@@ -1082,8 +1082,7 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 		pix->width = tmp_w;
 		pix->height = tmp_h;
 		pix->pixelformat = xlate->cam_fmt->fourcc;
-		ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd,
-						 video, s_fmt, f);
+		ret = v4l2_subdev_call(sd, video, s_fmt, f);
 		pix->pixelformat = pixfmt;
 		dev_dbg(&icd->dev, "Camera scaled to %ux%u\n",
 			pix->width, pix->height);
@@ -1140,6 +1139,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	__u32 pixfmt = pix->pixelformat;
 	int width, height;
 	int ret;
@@ -1165,8 +1165,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 
 	/* limit to sensor capabilities */
-	ret = v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video,
-					 try_fmt, f);
+	ret = v4l2_subdev_call(sd, video, try_fmt, f);
 	pix->pixelformat = pixfmt;
 	if (ret < 0)
 		return ret;
@@ -1182,9 +1181,7 @@ static int sh_mobile_ceu_try_fmt(struct soc_camera_device *icd,
 			int tmp_w = pix->width, tmp_h = pix->height;
 			pix->width = 2560;
 			pix->height = 1920;
-			ret = v4l2_device_call_until_err(&ici->v4l2_dev,
-							 (__u32)icd, video,
-							 try_fmt, f);
+			ret = v4l2_subdev_call(sd, video, try_fmt, f);
 			if (ret < 0) {
 				/* Shouldn't actually happen... */
 				dev_err(&icd->dev,
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index d9ccc2866592..dd023bdb189e 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -152,9 +152,9 @@ static int soc_camera_s_std(struct file *file, void *priv, v4l2_std_id *a)
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_std, *a);
+	return v4l2_subdev_call(sd, core, s_std, *a);
 }
 
 static int soc_camera_reqbufs(struct file *file, void *priv,
@@ -589,7 +589,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -599,7 +599,7 @@ static int soc_camera_streamon(struct file *file, void *priv,
 
 	mutex_lock(&icd->video_lock);
 
-	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 1);
+	v4l2_subdev_call(sd, video, s_stream, 1);
 
 	/* This calls buf_queue from host driver's videobuf_queue_ops */
 	ret = videobuf_streamon(&icf->vb_vidq);
@@ -614,7 +614,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
 	WARN_ON(priv != file->private_data);
 
@@ -627,7 +627,7 @@ static int soc_camera_streamoff(struct file *file, void *priv,
 	 * remaining buffers. When the last buffer is freed, stop capture */
 	videobuf_streamoff(&icf->vb_vidq);
 
-	v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, video, s_stream, 0);
+	v4l2_subdev_call(sd, video, s_stream, 0);
 
 	mutex_unlock(&icd->video_lock);
 
@@ -672,6 +672,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -695,7 +696,7 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 			return ret;
 	}
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_ctrl, ctrl);
+	return v4l2_subdev_call(sd, core, g_ctrl, ctrl);
 }
 
 static int soc_camera_s_ctrl(struct file *file, void *priv,
@@ -704,6 +705,7 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	int ret;
 
 	WARN_ON(priv != file->private_data);
@@ -714,7 +716,7 @@ static int soc_camera_s_ctrl(struct file *file, void *priv,
 			return ret;
 	}
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_ctrl, ctrl);
+	return v4l2_subdev_call(sd, core, s_ctrl, ctrl);
 }
 
 static int soc_camera_cropcap(struct file *file, void *fh,
@@ -812,9 +814,9 @@ static int soc_camera_g_chip_ident(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_chip_ident, id);
+	return v4l2_subdev_call(sd, core, g_chip_ident, id);
 }
 
 #ifdef CONFIG_VIDEO_ADV_DEBUG
@@ -823,9 +825,9 @@ static int soc_camera_g_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, g_register, reg);
+	return v4l2_subdev_call(sd, core, g_register, reg);
 }
 
 static int soc_camera_s_register(struct file *file, void *fh,
@@ -833,9 +835,9 @@ static int soc_camera_s_register(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 
-	return v4l2_device_call_until_err(&ici->v4l2_dev, (__u32)icd, core, s_register, reg);
+	return v4l2_subdev_call(sd, core, s_register, reg);
 }
 #endif
 
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 0bad8f1d7e8d..344d89904774 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -126,28 +126,34 @@ struct soc_camera_link {
 	void (*free_bus)(struct soc_camera_link *);
 };
 
-static inline struct soc_camera_device *to_soc_camera_dev(struct device *dev)
+static inline struct soc_camera_device *to_soc_camera_dev(const struct device *dev)
 {
 	return container_of(dev, struct soc_camera_device, dev);
 }
 
-static inline struct soc_camera_host *to_soc_camera_host(struct device *dev)
+static inline struct soc_camera_host *to_soc_camera_host(const struct device *dev)
 {
 	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
 
 	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
-static inline struct soc_camera_link *to_soc_camera_link(struct soc_camera_device *icd)
+static inline struct soc_camera_link *to_soc_camera_link(const struct soc_camera_device *icd)
 {
 	return icd->dev.platform_data;
 }
 
-static inline struct device *to_soc_camera_control(struct soc_camera_device *icd)
+static inline struct device *to_soc_camera_control(const struct soc_camera_device *icd)
 {
 	return dev_get_drvdata(&icd->dev);
 }
 
+static inline struct v4l2_subdev *soc_camera_to_subdev(const struct soc_camera_device *icd)
+{
+	struct device *control = to_soc_camera_control(icd);
+	return dev_get_drvdata(control);
+}
+
 int soc_camera_host_register(struct soc_camera_host *ici);
 void soc_camera_host_unregister(struct soc_camera_host *ici);
 
-- 
cgit v1.2.3


From 6a6c8786725c0b3d143674effa8b772f47b1c189 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:50:46 -0300
Subject: V4L/DVB (12534): soc-camera: V4L2 API compliant scaling (S_FMT) and
 cropping (S_CROP)

The initial soc-camera scaling and cropping implementation turned out to be
incompliant with the V4L2 API, e.g., it expected the user to specify cropping
in output window pixels, instead of input window pixels. This patch converts
the soc-camera core and all drivers to comply with the standard.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/soc-camera.txt   |  40 ++
 drivers/media/video/mt9m001.c              | 142 ++++-
 drivers/media/video/mt9m111.c              | 112 +++-
 drivers/media/video/mt9t031.c              | 220 ++++----
 drivers/media/video/mt9v022.c              | 131 ++++-
 drivers/media/video/mx1_camera.c           |  10 +-
 drivers/media/video/mx3_camera.c           | 114 ++--
 drivers/media/video/ov772x.c               |  84 ++-
 drivers/media/video/pxa_camera.c           | 201 +++++--
 drivers/media/video/sh_mobile_ceu_camera.c | 828 ++++++++++++++++++++---------
 drivers/media/video/soc_camera.c           | 130 +++--
 drivers/media/video/soc_camera_platform.c  |   4 -
 drivers/media/video/tw9910.c               | 120 +++--
 include/media/soc_camera.h                 |  21 +-
 14 files changed, 1524 insertions(+), 633 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/soc-camera.txt b/Documentation/video4linux/soc-camera.txt
index 178ef3c5e579..3f87c7da4ca2 100644
--- a/Documentation/video4linux/soc-camera.txt
+++ b/Documentation/video4linux/soc-camera.txt
@@ -116,5 +116,45 @@ functionality.
 struct soc_camera_device also links to an array of struct soc_camera_data_format,
 listing pixel formats, supported by the camera.
 
+VIDIOC_S_CROP and VIDIOC_S_FMT behaviour
+----------------------------------------
+
+Above user ioctls modify image geometry as follows:
+
+VIDIOC_S_CROP: sets location and sizes of the sensor window. Unit is one sensor
+pixel. Changing sensor window sizes preserves any scaling factors, therefore
+user window sizes change as well.
+
+VIDIOC_S_FMT: sets user window. Should preserve previously set sensor window as
+much as possible by modifying scaling factors. If the sensor window cannot be
+preserved precisely, it may be changed too.
+
+In soc-camera there are two locations, where scaling and cropping can taks
+place: in the camera driver and in the host driver. User ioctls are first passed
+to the host driver, which then generally passes them down to the camera driver.
+It is more efficient to perform scaling and cropping in the camera driver to
+save camera bus bandwidth and maximise the framerate. However, if the camera
+driver failed to set the required parameters with sufficient precision, the host
+driver may decide to also use its own scaling and cropping to fulfill the user's
+request.
+
+Camera drivers are interfaced to the soc-camera core and to host drivers over
+the v4l2-subdev API, which is completely functional, it doesn't pass any data.
+Therefore all camera drivers shall reply to .g_fmt() requests with their current
+output geometry. This is necessary to correctly configure the camera bus.
+.s_fmt() and .try_fmt() have to be implemented too. Sensor window and scaling
+factors have to be maintained by camera drivers internally. According to the
+V4L2 API all capture drivers must support the VIDIOC_CROPCAP ioctl, hence we
+rely on camera drivers implementing .cropcap(). If the camera driver does not
+support cropping, it may choose to not implement .s_crop(), but to enable
+cropping support by the camera host driver at least the .g_crop method must be
+implemented.
+
+User window geometry is kept in .user_width and .user_height fields in struct
+soc_camera_device and used by the soc-camera core and host drivers. The core
+updates these fields upon successful completion of a .s_fmt() call, but if these
+fields change elsewhere, e.g., during .s_crop() processing, the host driver is
+responsible for updating them.
+
 --
 Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 775e1a3c98d3..e8cf56189ef1 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -39,6 +39,13 @@
 #define MT9M001_GLOBAL_GAIN		0x35
 #define MT9M001_CHIP_ENABLE		0xF1
 
+#define MT9M001_MAX_WIDTH		1280
+#define MT9M001_MAX_HEIGHT		1024
+#define MT9M001_MIN_WIDTH		48
+#define MT9M001_MIN_HEIGHT		32
+#define MT9M001_COLUMN_SKIP		20
+#define MT9M001_ROW_SKIP		12
+
 static const struct soc_camera_data_format mt9m001_colour_formats[] = {
 	/* Order important: first natively supported,
 	 * second supported with a GPIO extender */
@@ -70,6 +77,8 @@ static const struct soc_camera_data_format mt9m001_monochrome_formats[] = {
 
 struct mt9m001 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
+	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
 	unsigned char autoexposure;
 };
@@ -196,13 +205,31 @@ static unsigned long mt9m001_query_bus_param(struct soc_camera_device *icd)
 
 static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_rect rect = a->c;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
 
+	if (mt9m001->fourcc == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m001->fourcc == V4L2_PIX_FMT_SBGGR16)
+		/*
+		 * Bayer format - even number of rows for simplicity,
+		 * but let the user play with the top row.
+		 */
+		rect.height = ALIGN(rect.height, 2);
+
+	/* Datasheet requirement: see register description */
+	rect.width = ALIGN(rect.width, 2);
+	rect.left = ALIGN(rect.left, 2);
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9M001_COLUMN_SKIP, MT9M001_MIN_WIDTH, MT9M001_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9M001_ROW_SKIP, MT9M001_MIN_HEIGHT, MT9M001_MAX_HEIGHT);
+
 	/* Blanking and start values - default... */
 	ret = reg_write(client, MT9M001_HORIZONTAL_BLANKING, hblank);
 	if (!ret)
@@ -211,46 +238,98 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	/* The caller provides a supported format, as verified per
 	 * call to icd->try_fmt() */
 	if (!ret)
-		ret = reg_write(client, MT9M001_COLUMN_START, rect->left);
+		ret = reg_write(client, MT9M001_COLUMN_START, rect.left);
 	if (!ret)
-		ret = reg_write(client, MT9M001_ROW_START, rect->top);
+		ret = reg_write(client, MT9M001_ROW_START, rect.top);
 	if (!ret)
-		ret = reg_write(client, MT9M001_WINDOW_WIDTH, rect->width - 1);
+		ret = reg_write(client, MT9M001_WINDOW_WIDTH, rect.width - 1);
 	if (!ret)
 		ret = reg_write(client, MT9M001_WINDOW_HEIGHT,
-				rect->height + icd->y_skip_top - 1);
+				rect.height + icd->y_skip_top - 1);
 	if (!ret && mt9m001->autoexposure) {
 		ret = reg_write(client, MT9M001_SHUTTER_WIDTH,
-				rect->height + icd->y_skip_top + vblank);
+				rect.height + icd->y_skip_top + vblank);
 		if (!ret) {
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (rect->height + icd->y_skip_top +
+			icd->exposure = (524 + (rect.height + icd->y_skip_top +
 						vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 		}
 	}
 
+	if (!ret)
+		mt9m001->rect = rect;
+
 	return ret;
 }
 
+static int mt9m001_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+
+	a->c	= mt9m001->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9m001_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9M001_COLUMN_SKIP;
+	a->bounds.top			= MT9M001_ROW_SKIP;
+	a->bounds.width			= MT9M001_MAX_WIDTH;
+	a->bounds.height		= MT9M001_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9m001_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9m001->rect.width;
+	pix->height		= mt9m001->rect.height;
+	pix->pixelformat	= mt9m001->fourcc;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
+}
+
 static int mt9m001_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9m001 *mt9m001 = to_mt9m001(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_crop a = {
 		.c = {
-			.left	= icd->rect_current.left,
-			.top	= icd->rect_current.top,
-			.width	= f->fmt.pix.width,
-			.height	= f->fmt.pix.height,
+			.left	= mt9m001->rect.left,
+			.top	= mt9m001->rect.top,
+			.width	= pix->width,
+			.height	= pix->height,
 		},
 	};
+	int ret;
 
 	/* No support for scaling so far, just crop. TODO: use skipping */
-	return mt9m001_s_crop(sd, &a);
+	ret = mt9m001_s_crop(sd, &a);
+	if (!ret) {
+		pix->width = mt9m001->rect.width;
+		pix->height = mt9m001->rect.height;
+		mt9m001->fourcc = pix->pixelformat;
+	}
+
+	return ret;
 }
 
 static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -259,9 +338,14 @@ static int mt9m001_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 
-	v4l_bound_align_image(&pix->width, 48, 1280, 1,
-			      &pix->height, 32 + icd->y_skip_top,
-			      1024 + icd->y_skip_top, 0, 0);
+	v4l_bound_align_image(&pix->width, MT9M001_MIN_WIDTH,
+		MT9M001_MAX_WIDTH, 1,
+		&pix->height, MT9M001_MIN_HEIGHT + icd->y_skip_top,
+		MT9M001_MAX_HEIGHT + icd->y_skip_top, 0, 0);
+
+	if (pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+	    pix->pixelformat == V4L2_PIX_FMT_SBGGR16)
+		pix->height = ALIGN(pix->height - 1, 2);
 
 	return 0;
 }
@@ -472,11 +556,11 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = 25;
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
-				      icd->rect_current.height +
+				      mt9m001->rect.height +
 				      icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (icd->rect_current.height +
+			icd->exposure = (524 + (mt9m001->rect.height +
 						icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
@@ -548,6 +632,8 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
+	mt9m001->fourcc = icd->formats->fourcc;
+
 	dev_info(&client->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
@@ -556,10 +642,9 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&client->dev, "Video %x removed: %p, %p\n", client->addr,
+	dev_dbg(&icd->dev, "Video removed: %p, %p\n",
 		icd->dev.parent, icd->vdev);
 	if (icl->free_bus)
 		icl->free_bus(icl);
@@ -578,8 +663,11 @@ static struct v4l2_subdev_core_ops mt9m001_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9m001_subdev_video_ops = {
 	.s_stream	= mt9m001_s_stream,
 	.s_fmt		= mt9m001_s_fmt,
+	.g_fmt		= mt9m001_g_fmt,
 	.try_fmt	= mt9m001_try_fmt,
 	.s_crop		= mt9m001_s_crop,
+	.g_crop		= mt9m001_g_crop,
+	.cropcap	= mt9m001_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9m001_subdev_ops = {
@@ -621,15 +709,13 @@ static int mt9m001_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m001_ops;
-	icd->rect_max.left	= 20;
-	icd->rect_max.top	= 12;
-	icd->rect_max.width	= 1280;
-	icd->rect_max.height	= 1024;
-	icd->rect_current.left	= 20;
-	icd->rect_current.top	= 12;
-	icd->width_min		= 48;
-	icd->height_min		= 32;
 	icd->y_skip_top		= 1;
+
+	mt9m001->rect.left	= MT9M001_COLUMN_SKIP;
+	mt9m001->rect.top	= MT9M001_ROW_SKIP;
+	mt9m001->rect.width	= MT9M001_MAX_WIDTH;
+	mt9m001->rect.height	= MT9M001_MAX_HEIGHT;
+
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9m001->autoexposure = 1;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 3637376da755..920dd53c4cfa 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -194,7 +194,7 @@ static int mt9m111_reg_read(struct i2c_client *client, const u16 reg)
 
 	ret = reg_page_map_set(client, reg);
 	if (!ret)
-		ret = swab16(i2c_smbus_read_word_data(client, (reg & 0xff)));
+		ret = swab16(i2c_smbus_read_word_data(client, reg & 0xff));
 
 	dev_dbg(&client->dev, "read  reg.%03x -> %04x\n", reg, ret);
 	return ret;
@@ -257,8 +257,8 @@ static int mt9m111_setup_rect(struct i2c_client *client,
 	int width = rect->width;
 	int height = rect->height;
 
-	if ((mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8)
-	    || (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16))
+	if (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16)
 		is_raw_format = 1;
 	else
 		is_raw_format = 0;
@@ -395,23 +395,85 @@ static int mt9m111_set_bus_param(struct soc_camera_device *icd, unsigned long f)
 	return 0;
 }
 
+static int mt9m111_make_rect(struct i2c_client *client,
+			     struct v4l2_rect *rect)
+{
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	if (mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9m111->pixfmt == V4L2_PIX_FMT_SBGGR16) {
+		/* Bayer format - even size lengths */
+		rect->width	= ALIGN(rect->width, 2);
+		rect->height	= ALIGN(rect->height, 2);
+		/* Let the user play with the starting pixel */
+	}
+
+	/* FIXME: the datasheet doesn't specify minimum sizes */
+	soc_camera_limit_side(&rect->left, &rect->width,
+		     MT9M111_MIN_DARK_COLS, 2, MT9M111_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect->top, &rect->height,
+		     MT9M111_MIN_DARK_ROWS, 2, MT9M111_MAX_HEIGHT);
+
+	return mt9m111_setup_rect(client, rect);
+}
+
 static int mt9m111_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
+	struct v4l2_rect rect = a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
 	dev_dbg(&client->dev, "%s left=%d, top=%d, width=%d, height=%d\n",
-		__func__, rect->left, rect->top, rect->width,
-		rect->height);
+		__func__, rect.left, rect.top, rect.width, rect.height);
 
-	ret = mt9m111_setup_rect(client, rect);
+	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
-		mt9m111->rect = *rect;
+		mt9m111->rect = rect;
 	return ret;
 }
 
+static int mt9m111_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	a->c	= mt9m111->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9m111_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9M111_MIN_DARK_COLS;
+	a->bounds.top			= MT9M111_MIN_DARK_ROWS;
+	a->bounds.width			= MT9M111_MAX_WIDTH;
+	a->bounds.height		= MT9M111_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9m111_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9m111->rect.width;
+	pix->height		= mt9m111->rect.height;
+	pix->pixelformat	= mt9m111->pixfmt;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
+}
+
 static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 {
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
@@ -478,7 +540,7 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
 		rect.height);
 
-	ret = mt9m111_setup_rect(client, &rect);
+	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
 		ret = mt9m111_set_pixfmt(client, pix->pixelformat);
 	if (!ret)
@@ -489,11 +551,27 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 static int mt9m111_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	bool bayer = pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+		pix->pixelformat == V4L2_PIX_FMT_SBGGR16;
+
+	/*
+	 * With Bayer format enforce even side lengths, but let the user play
+	 * with the starting pixel
+	 */
 
 	if (pix->height > MT9M111_MAX_HEIGHT)
 		pix->height = MT9M111_MAX_HEIGHT;
+	else if (pix->height < 2)
+		pix->height = 2;
+	else if (bayer)
+		pix->height = ALIGN(pix->height, 2);
+
 	if (pix->width > MT9M111_MAX_WIDTH)
 		pix->width = MT9M111_MAX_WIDTH;
+	else if (pix->width < 2)
+		pix->width = 2;
+	else if (bayer)
+		pix->width = ALIGN(pix->width, 2);
 
 	return 0;
 }
@@ -906,8 +984,11 @@ static struct v4l2_subdev_core_ops mt9m111_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops mt9m111_subdev_video_ops = {
 	.s_fmt		= mt9m111_s_fmt,
+	.g_fmt		= mt9m111_g_fmt,
 	.try_fmt	= mt9m111_try_fmt,
 	.s_crop		= mt9m111_s_crop,
+	.g_crop		= mt9m111_g_crop,
+	.cropcap	= mt9m111_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9m111_subdev_ops = {
@@ -949,16 +1030,13 @@ static int mt9m111_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m111_ops;
-	icd->rect_max.left	= MT9M111_MIN_DARK_COLS;
-	icd->rect_max.top	= MT9M111_MIN_DARK_ROWS;
-	icd->rect_max.width	= MT9M111_MAX_WIDTH;
-	icd->rect_max.height	= MT9M111_MAX_HEIGHT;
-	icd->rect_current.left	= icd->rect_max.left;
-	icd->rect_current.top	= icd->rect_max.top;
-	icd->width_min		= MT9M111_MIN_DARK_ROWS;
-	icd->height_min		= MT9M111_MIN_DARK_COLS;
 	icd->y_skip_top		= 0;
 
+	mt9m111->rect.left	= MT9M111_MIN_DARK_COLS;
+	mt9m111->rect.top	= MT9M111_MIN_DARK_ROWS;
+	mt9m111->rect.width	= MT9M111_MAX_WIDTH;
+	mt9m111->rect.height	= MT9M111_MAX_HEIGHT;
+
 	ret = mt9m111_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index cd3eb7731ac2..f234ba602049 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -47,7 +47,7 @@
 #define MT9T031_MAX_HEIGHT		1536
 #define MT9T031_MAX_WIDTH		2048
 #define MT9T031_MIN_HEIGHT		2
-#define MT9T031_MIN_WIDTH		2
+#define MT9T031_MIN_WIDTH		18
 #define MT9T031_HORIZONTAL_BLANK	142
 #define MT9T031_VERTICAL_BLANK		25
 #define MT9T031_COLUMN_SKIP		32
@@ -69,10 +69,11 @@ static const struct soc_camera_data_format mt9t031_colour_formats[] = {
 
 struct mt9t031 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
-	unsigned char autoexposure;
 	u16 xskip;
 	u16 yskip;
+	unsigned char autoexposure;
 };
 
 static struct mt9t031 *to_mt9t031(const struct i2c_client *client)
@@ -218,56 +219,68 @@ static unsigned long mt9t031_query_bus_param(struct soc_camera_device *icd)
 	return soc_camera_apply_sensor_flags(icl, MT9T031_BUS_PARAM);
 }
 
-/* Round up minima and round down maxima */
-static void recalculate_limits(struct soc_camera_device *icd,
-			       u16 xskip, u16 yskip)
+/* target must be _even_ */
+static u16 mt9t031_skip(s32 *source, s32 target, s32 max)
 {
-	icd->rect_max.left = (MT9T031_COLUMN_SKIP + xskip - 1) / xskip;
-	icd->rect_max.top = (MT9T031_ROW_SKIP + yskip - 1) / yskip;
-	icd->width_min = (MT9T031_MIN_WIDTH + xskip - 1) / xskip;
-	icd->height_min = (MT9T031_MIN_HEIGHT + yskip - 1) / yskip;
-	icd->rect_max.width = MT9T031_MAX_WIDTH / xskip;
-	icd->rect_max.height = MT9T031_MAX_HEIGHT / yskip;
+	unsigned int skip;
+
+	if (*source < target + target / 2) {
+		*source = target;
+		return 1;
+	}
+
+	skip = min(max, *source + target / 2) / target;
+	if (skip > 8)
+		skip = 8;
+	*source = target * skip;
+
+	return skip;
 }
 
+/* rect is the sensor rectangle, the caller guarantees parameter validity */
 static int mt9t031_set_params(struct soc_camera_device *icd,
 			      struct v4l2_rect *rect, u16 xskip, u16 yskip)
 {
 	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	int ret;
-	u16 xbin, ybin, width, height, left, top;
+	u16 xbin, ybin;
 	const u16 hblank = MT9T031_HORIZONTAL_BLANK,
 		vblank = MT9T031_VERTICAL_BLANK;
 
-	width = rect->width * xskip;
-	height = rect->height * yskip;
-	left = rect->left * xskip;
-	top = rect->top * yskip;
-
 	xbin = min(xskip, (u16)3);
 	ybin = min(yskip, (u16)3);
 
-	dev_dbg(&client->dev, "xskip %u, width %u/%u, yskip %u, height %u/%u\n",
-		xskip, width, rect->width, yskip, height, rect->height);
-
-	/* Could just do roundup(rect->left, [xy]bin * 2); but this is cheaper */
+	/*
+	 * Could just do roundup(rect->left, [xy]bin * 2); but this is cheaper.
+	 * There is always a valid suitably aligned value. The worst case is
+	 * xbin = 3, width = 2048. Then we will start at 36, the last read out
+	 * pixel will be 2083, which is < 2085 - first black pixel.
+	 *
+	 * MT9T031 datasheet imposes window left border alignment, depending on
+	 * the selected xskip. Failing to conform to this requirement produces
+	 * dark horizontal stripes in the image. However, even obeying to this
+	 * requirement doesn't eliminate the stripes in all configurations. They
+	 * appear "locally reproducibly," but can differ between tests under
+	 * different lighting conditions.
+	 */
 	switch (xbin) {
-	case 2:
-		left = (left + 3) & ~3;
+	case 1:
+		rect->left &= ~1;
 		break;
-	case 3:
-		left = roundup(left, 6);
-	}
-
-	switch (ybin) {
 	case 2:
-		top = (top + 3) & ~3;
+		rect->left &= ~3;
 		break;
 	case 3:
-		top = roundup(top, 6);
+		rect->left = rect->left > roundup(MT9T031_COLUMN_SKIP, 6) ?
+			(rect->left / 6) * 6 : roundup(MT9T031_COLUMN_SKIP, 6);
 	}
 
+	rect->top &= ~1;
+
+	dev_dbg(&client->dev, "skip %u:%u, rect %ux%u@%u:%u\n",
+		xskip, yskip, rect->width, rect->height, rect->left, rect->top);
+
 	/* Disable register update, reconfigure atomically */
 	ret = reg_set(client, MT9T031_OUTPUT_CONTROL, 1);
 	if (ret < 0)
@@ -287,27 +300,29 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 			ret = reg_write(client, MT9T031_ROW_ADDRESS_MODE,
 					((ybin - 1) << 4) | (yskip - 1));
 	}
-	dev_dbg(&client->dev, "new physical left %u, top %u\n", left, top);
+	dev_dbg(&client->dev, "new physical left %u, top %u\n",
+		rect->left, rect->top);
 
 	/* The caller provides a supported format, as guaranteed by
 	 * icd->try_fmt_cap(), soc_camera_s_crop() and soc_camera_cropcap() */
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_COLUMN_START, left);
+		ret = reg_write(client, MT9T031_COLUMN_START, rect->left);
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_ROW_START, top);
+		ret = reg_write(client, MT9T031_ROW_START, rect->top);
 	if (ret >= 0)
-		ret = reg_write(client, MT9T031_WINDOW_WIDTH, width - 1);
+		ret = reg_write(client, MT9T031_WINDOW_WIDTH, rect->width - 1);
 	if (ret >= 0)
 		ret = reg_write(client, MT9T031_WINDOW_HEIGHT,
-				height + icd->y_skip_top - 1);
+				rect->height + icd->y_skip_top - 1);
 	if (ret >= 0 && mt9t031->autoexposure) {
-		ret = set_shutter(client, height + icd->y_skip_top + vblank);
+		ret = set_shutter(client,
+				  rect->height + icd->y_skip_top + vblank);
 		if (ret >= 0) {
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (height +
+			icd->exposure = (shutter_max / 2 + (rect->height +
 					 icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
@@ -318,27 +333,72 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 	if (ret >= 0)
 		ret = reg_clear(client, MT9T031_OUTPUT_CONTROL, 1);
 
+	if (ret >= 0) {
+		mt9t031->rect = *rect;
+		mt9t031->xskip = xskip;
+		mt9t031->yskip = yskip;
+	}
+
 	return ret < 0 ? ret : 0;
 }
 
 static int mt9t031_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
+	struct v4l2_rect rect = a->c;
 	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
 
-	/* Make sure we don't exceed sensor limits */
-	if (rect->left + rect->width > icd->rect_max.left + icd->rect_max.width)
-		rect->left = icd->rect_max.width + icd->rect_max.left -
-			rect->width;
+	rect.width = ALIGN(rect.width, 2);
+	rect.height = ALIGN(rect.height, 2);
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9T031_COLUMN_SKIP, MT9T031_MIN_WIDTH, MT9T031_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9T031_ROW_SKIP, MT9T031_MIN_HEIGHT, MT9T031_MAX_HEIGHT);
+
+	return mt9t031_set_params(icd, &rect, mt9t031->xskip, mt9t031->yskip);
+}
+
+static int mt9t031_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+
+	a->c	= mt9t031->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-	if (rect->top + rect->height > icd->rect_max.height + icd->rect_max.top)
-		rect->top = icd->rect_max.height + icd->rect_max.top -
-			rect->height;
+	return 0;
+}
+
+static int mt9t031_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9T031_COLUMN_SKIP;
+	a->bounds.top			= MT9T031_ROW_SKIP;
+	a->bounds.width			= MT9T031_MAX_WIDTH;
+	a->bounds.height		= MT9T031_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
 
-	/* CROP - no change in scaling, or in limits */
-	return mt9t031_set_params(icd, rect, mt9t031->xskip, mt9t031->yskip);
+	return 0;
+}
+
+static int mt9t031_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9t031 *mt9t031 = to_mt9t031(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9t031->rect.width / mt9t031->xskip;
+	pix->height		= mt9t031->rect.height / mt9t031->yskip;
+	pix->pixelformat	= V4L2_PIX_FMT_SGRBG10;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
+
+	return 0;
 }
 
 static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -346,40 +406,25 @@ static int mt9t031_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	struct soc_camera_device *icd = client->dev.platform_data;
-	int ret;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
 	u16 xskip, yskip;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= f->fmt.pix.width,
-		.height	= f->fmt.pix.height,
-	};
+	struct v4l2_rect rect = mt9t031->rect;
 
 	/*
-	 * try_fmt has put rectangle within limits.
-	 * S_FMT - use binning and skipping for scaling, recalculate
-	 * limits, used for cropping
+	 * try_fmt has put width and height within limits.
+	 * S_FMT: use binning and skipping for scaling
 	 */
-	/* Is this more optimal than just a division? */
-	for (xskip = 8; xskip > 1; xskip--)
-		if (rect.width * xskip <= MT9T031_MAX_WIDTH)
-			break;
-
-	for (yskip = 8; yskip > 1; yskip--)
-		if (rect.height * yskip <= MT9T031_MAX_HEIGHT)
-			break;
-
-	recalculate_limits(icd, xskip, yskip);
-
-	ret = mt9t031_set_params(icd, &rect, xskip, yskip);
-	if (!ret) {
-		mt9t031->xskip = xskip;
-		mt9t031->yskip = yskip;
-	}
+	xskip = mt9t031_skip(&rect.width, pix->width, MT9T031_MAX_WIDTH);
+	yskip = mt9t031_skip(&rect.height, pix->height, MT9T031_MAX_HEIGHT);
 
-	return ret;
+	/* mt9t031_set_params() doesn't change width and height */
+	return mt9t031_set_params(icd, &rect, xskip, yskip);
 }
 
+/*
+ * If a user window larger than sensor window is requested, we'll increase the
+ * sensor window.
+ */
 static int mt9t031_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct v4l2_pix_format *pix = &f->fmt.pix;
@@ -620,12 +665,12 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, icd->rect_current.height +
+			if (set_shutter(client, mt9t031->rect.height +
 					icd->y_skip_top + vblank) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
 			icd->exposure = (shutter_max / 2 +
-					 (icd->rect_current.height +
+					 (mt9t031->rect.height +
 					  icd->y_skip_top + vblank - 1) *
 					 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
@@ -645,12 +690,6 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
 
-	/* We must have a parent by now. And it cannot be a wrong one.
-	 * So this entire test is completely redundant. */
-	if (!icd->dev.parent ||
-	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
-		return -ENODEV;
-
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
 	dev_dbg(&client->dev, "write: %d\n", data);
@@ -688,8 +727,11 @@ static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9t031_subdev_video_ops = {
 	.s_stream	= mt9t031_s_stream,
 	.s_fmt		= mt9t031_s_fmt,
+	.g_fmt		= mt9t031_g_fmt,
 	.try_fmt	= mt9t031_try_fmt,
 	.s_crop		= mt9t031_s_crop,
+	.g_crop		= mt9t031_g_crop,
+	.cropcap	= mt9t031_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9t031_subdev_ops = {
@@ -731,15 +773,13 @@ static int mt9t031_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9t031_ops;
-	icd->rect_max.left	= MT9T031_COLUMN_SKIP;
-	icd->rect_max.top	= MT9T031_ROW_SKIP;
-	icd->rect_current.left	= icd->rect_max.left;
-	icd->rect_current.top	= icd->rect_max.top;
-	icd->width_min		= MT9T031_MIN_WIDTH;
-	icd->rect_max.width	= MT9T031_MAX_WIDTH;
-	icd->height_min		= MT9T031_MIN_HEIGHT;
-	icd->rect_max.height	= MT9T031_MAX_HEIGHT;
 	icd->y_skip_top		= 0;
+
+	mt9t031->rect.left	= MT9T031_COLUMN_SKIP;
+	mt9t031->rect.top	= MT9T031_ROW_SKIP;
+	mt9t031->rect.width	= MT9T031_MAX_WIDTH;
+	mt9t031->rect.height	= MT9T031_MAX_HEIGHT;
+
 	/* Simulated autoexposure. If enabled, we calculate shutter width
 	 * ourselves in the driver based on vertical blanking and frame width */
 	mt9t031->autoexposure = 1;
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index ab1965425289..35ea0ddd0715 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -55,6 +55,13 @@ MODULE_PARM_DESC(sensor_type, "Sensor type: \"colour\" or \"monochrome\"");
 /* Progressive scan, master, defaults */
 #define MT9V022_CHIP_CONTROL_DEFAULT	0x188
 
+#define MT9V022_MAX_WIDTH		752
+#define MT9V022_MAX_HEIGHT		480
+#define MT9V022_MIN_WIDTH		48
+#define MT9V022_MIN_HEIGHT		32
+#define MT9V022_COLUMN_SKIP		1
+#define MT9V022_ROW_SKIP		4
+
 static const struct soc_camera_data_format mt9v022_colour_formats[] = {
 	/* Order important: first natively supported,
 	 * second supported with a GPIO extender */
@@ -86,6 +93,8 @@ static const struct soc_camera_data_format mt9v022_monochrome_formats[] = {
 
 struct mt9v022 {
 	struct v4l2_subdev subdev;
+	struct v4l2_rect rect;	/* Sensor window */
+	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9V022* codes from v4l2-chip-ident.h */
 	u16 chip_control;
 };
@@ -250,44 +259,101 @@ static unsigned long mt9v022_query_bus_param(struct soc_camera_device *icd)
 
 static int mt9v022_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 {
-	struct v4l2_rect *rect = &a->c;
 	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct v4l2_rect rect = a->c;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 
+	/* Bayer format - even size lengths */
+	if (mt9v022->fourcc == V4L2_PIX_FMT_SBGGR8 ||
+	    mt9v022->fourcc == V4L2_PIX_FMT_SBGGR16) {
+		rect.width	= ALIGN(rect.width, 2);
+		rect.height	= ALIGN(rect.height, 2);
+		/* Let the user play with the starting pixel */
+	}
+
+	soc_camera_limit_side(&rect.left, &rect.width,
+		     MT9V022_COLUMN_SKIP, MT9V022_MIN_WIDTH, MT9V022_MAX_WIDTH);
+
+	soc_camera_limit_side(&rect.top, &rect.height,
+		     MT9V022_ROW_SKIP, MT9V022_MIN_HEIGHT, MT9V022_MAX_HEIGHT);
+
 	/* Like in example app. Contradicts the datasheet though */
 	ret = reg_read(client, MT9V022_AEC_AGC_ENABLE);
 	if (ret >= 0) {
 		if (ret & 1) /* Autoexposure */
 			ret = reg_write(client, MT9V022_MAX_TOTAL_SHUTTER_WIDTH,
-					rect->height + icd->y_skip_top + 43);
+					rect.height + icd->y_skip_top + 43);
 		else
 			ret = reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH,
-					rect->height + icd->y_skip_top + 43);
+					rect.height + icd->y_skip_top + 43);
 	}
 	/* Setup frame format: defaults apart from width and height */
 	if (!ret)
-		ret = reg_write(client, MT9V022_COLUMN_START, rect->left);
+		ret = reg_write(client, MT9V022_COLUMN_START, rect.left);
 	if (!ret)
-		ret = reg_write(client, MT9V022_ROW_START, rect->top);
+		ret = reg_write(client, MT9V022_ROW_START, rect.top);
 	if (!ret)
 		/* Default 94, Phytec driver says:
 		 * "width + horizontal blank >= 660" */
 		ret = reg_write(client, MT9V022_HORIZONTAL_BLANKING,
-				rect->width > 660 - 43 ? 43 :
-				660 - rect->width);
+				rect.width > 660 - 43 ? 43 :
+				660 - rect.width);
 	if (!ret)
 		ret = reg_write(client, MT9V022_VERTICAL_BLANKING, 45);
 	if (!ret)
-		ret = reg_write(client, MT9V022_WINDOW_WIDTH, rect->width);
+		ret = reg_write(client, MT9V022_WINDOW_WIDTH, rect.width);
 	if (!ret)
 		ret = reg_write(client, MT9V022_WINDOW_HEIGHT,
-				rect->height + icd->y_skip_top);
+				rect.height + icd->y_skip_top);
 
 	if (ret < 0)
 		return ret;
 
-	dev_dbg(&client->dev, "Frame %ux%u pixel\n", rect->width, rect->height);
+	dev_dbg(&client->dev, "Frame %ux%u pixel\n", rect.width, rect.height);
+
+	mt9v022->rect = rect;
+
+	return 0;
+}
+
+static int mt9v022_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+
+	a->c	= mt9v022->rect;
+	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int mt9v022_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= MT9V022_COLUMN_SKIP;
+	a->bounds.top			= MT9V022_ROW_SKIP;
+	a->bounds.width			= MT9V022_MAX_WIDTH;
+	a->bounds.height		= MT9V022_MAX_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int mt9v022_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct mt9v022 *mt9v022 = to_mt9v022(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	pix->width		= mt9v022->rect.width;
+	pix->height		= mt9v022->rect.height;
+	pix->pixelformat	= mt9v022->fourcc;
+	pix->field		= V4L2_FIELD_NONE;
+	pix->colorspace		= V4L2_COLORSPACE_SRGB;
 
 	return 0;
 }
@@ -296,16 +362,16 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
-	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	struct v4l2_crop a = {
 		.c = {
-			.left	= icd->rect_current.left,
-			.top	= icd->rect_current.top,
+			.left	= mt9v022->rect.left,
+			.top	= mt9v022->rect.top,
 			.width	= pix->width,
 			.height	= pix->height,
 		},
 	};
+	int ret;
 
 	/* The caller provides a supported format, as verified per call to
 	 * icd->try_fmt(), datawidth is from our supported format list */
@@ -328,7 +394,14 @@ static int mt9v022_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	}
 
 	/* No support for scaling on this camera, just crop. */
-	return mt9v022_s_crop(sd, &a);
+	ret = mt9v022_s_crop(sd, &a);
+	if (!ret) {
+		pix->width = mt9v022->rect.width;
+		pix->height = mt9v022->rect.height;
+		mt9v022->fourcc = pix->pixelformat;
+	}
+
+	return ret;
 }
 
 static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
@@ -336,10 +409,13 @@ static int mt9v022_try_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	struct i2c_client *client = sd->priv;
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	int align = pix->pixelformat == V4L2_PIX_FMT_SBGGR8 ||
+		pix->pixelformat == V4L2_PIX_FMT_SBGGR16;
 
-	v4l_bound_align_image(&pix->width, 48, 752, 2 /* ? */,
-			      &pix->height, 32 + icd->y_skip_top,
-			      480 + icd->y_skip_top, 0, 0);
+	v4l_bound_align_image(&pix->width, MT9V022_MIN_WIDTH,
+		MT9V022_MAX_WIDTH, align,
+		&pix->height, MT9V022_MIN_HEIGHT + icd->y_skip_top,
+		MT9V022_MAX_HEIGHT + icd->y_skip_top, align, 0);
 
 	return 0;
 }
@@ -669,6 +745,8 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 	if (flags & SOCAM_DATAWIDTH_8)
 		icd->num_formats++;
 
+	mt9v022->fourcc = icd->formats->fourcc;
+
 	dev_info(&client->dev, "Detected a MT9V022 chip ID %x, %s sensor\n",
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
@@ -679,10 +757,9 @@ ei2c:
 
 static void mt9v022_video_remove(struct soc_camera_device *icd)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-	dev_dbg(&client->dev, "Video %x removed: %p, %p\n", client->addr,
+	dev_dbg(&icd->dev, "Video removed: %p, %p\n",
 		icd->dev.parent, icd->vdev);
 	if (icl->free_bus)
 		icl->free_bus(icl);
@@ -701,8 +778,11 @@ static struct v4l2_subdev_core_ops mt9v022_subdev_core_ops = {
 static struct v4l2_subdev_video_ops mt9v022_subdev_video_ops = {
 	.s_stream	= mt9v022_s_stream,
 	.s_fmt		= mt9v022_s_fmt,
+	.g_fmt		= mt9v022_g_fmt,
 	.try_fmt	= mt9v022_try_fmt,
 	.s_crop		= mt9v022_s_crop,
+	.g_crop		= mt9v022_g_crop,
+	.cropcap	= mt9v022_cropcap,
 };
 
 static struct v4l2_subdev_ops mt9v022_subdev_ops = {
@@ -745,16 +825,13 @@ static int mt9v022_probe(struct i2c_client *client,
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
 	icd->ops		= &mt9v022_ops;
-	icd->rect_max.left	= 1;
-	icd->rect_max.top	= 4;
-	icd->rect_max.width	= 752;
-	icd->rect_max.height	= 480;
-	icd->rect_current.left	= 1;
-	icd->rect_current.top	= 4;
-	icd->width_min		= 48;
-	icd->height_min		= 32;
 	icd->y_skip_top		= 1;
 
+	mt9v022->rect.left	= MT9V022_COLUMN_SKIP;
+	mt9v022->rect.top	= MT9V022_ROW_SKIP;
+	mt9v022->rect.width	= MT9V022_MAX_WIDTH;
+	mt9v022->rect.height	= MT9V022_MAX_HEIGHT;
+
 	ret = mt9v022_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 1f1324a1d493..3875483ab9d5 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -126,7 +126,7 @@ static int mx1_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 {
 	struct soc_camera_device *icd = vq->priv_data;
 
-	*size = icd->rect_current.width * icd->rect_current.height *
+	*size = icd->user_width * icd->user_height *
 		((icd->current_fmt->depth + 7) >> 3);
 
 	if (!*count)
@@ -178,12 +178,12 @@ static int mx1_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
diff --git a/drivers/media/video/mx3_camera.c b/drivers/media/video/mx3_camera.c
index d5b51e9900bb..dff2e5e2d8c6 100644
--- a/drivers/media/video/mx3_camera.c
+++ b/drivers/media/video/mx3_camera.c
@@ -220,7 +220,7 @@ static int mx3_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 	if (!mx3_cam->idmac_channel[0])
 		return -EINVAL;
 
-	*size = icd->rect_current.width * icd->rect_current.height * bpp;
+	*size = icd->user_width * icd->user_height * bpp;
 
 	if (!*count)
 		*count = 32;
@@ -241,7 +241,7 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	struct mx3_camera_buffer *buf =
 		container_of(vb, struct mx3_camera_buffer, vb);
 	/* current_fmt _must_ always be set */
-	size_t new_size = icd->rect_current.width * icd->rect_current.height *
+	size_t new_size = icd->user_width * icd->user_height *
 		((icd->current_fmt->depth + 7) >> 3);
 	int ret;
 
@@ -251,12 +251,12 @@ static int mx3_videobuf_prepare(struct videobuf_queue *vq,
 	 */
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		if (vb->state != VIDEOBUF_NEEDS_INIT)
 			free_buffer(vq, buf);
@@ -354,9 +354,9 @@ static void mx3_videobuf_queue(struct videobuf_queue *vq,
 
 	/* This is the configuration of one sg-element */
 	video->out_pixel_fmt	= fourcc_to_ipu_pix(data_fmt->fourcc);
-	video->out_width	= icd->rect_current.width;
-	video->out_height	= icd->rect_current.height;
-	video->out_stride	= icd->rect_current.width;
+	video->out_width	= icd->user_width;
+	video->out_height	= icd->user_height;
+	video->out_stride	= icd->user_width;
 
 #ifdef DEBUG
 	/* helps to see what DMA actually has written */
@@ -541,7 +541,7 @@ static bool channel_change_requested(struct soc_camera_device *icd,
 
 	/* Do buffers have to be re-allocated or channel re-configured? */
 	return ichan && rect->width * rect->height >
-		icd->rect_current.width * icd->rect_current.height;
+		icd->user_width * icd->user_height;
 }
 
 static int test_platform_param(struct mx3_camera_dev *mx3_cam,
@@ -589,8 +589,8 @@ static int test_platform_param(struct mx3_camera_dev *mx3_cam,
 		*flags |= SOCAM_DATAWIDTH_4;
 		break;
 	default:
-		dev_info(mx3_cam->soc_host.v4l2_dev.dev, "Unsupported bus width %d\n",
-			 buswidth);
+		dev_warn(mx3_cam->soc_host.v4l2_dev.dev,
+			 "Unsupported bus width %d\n", buswidth);
 		return -EINVAL;
 	}
 
@@ -605,8 +605,7 @@ static int mx3_camera_try_bus_param(struct soc_camera_device *icd,
 	unsigned long bus_flags, camera_flags;
 	int ret = test_platform_param(mx3_cam, depth, &bus_flags);
 
-	dev_dbg(icd->dev.parent, "requested bus width %d bit: %d\n",
-		depth, ret);
+	dev_dbg(icd->dev.parent, "request bus width %d bit: %d\n", depth, ret);
 
 	if (ret < 0)
 		return ret;
@@ -727,13 +726,13 @@ passthrough:
 }
 
 static void configure_geometry(struct mx3_camera_dev *mx3_cam,
-			       struct v4l2_rect *rect)
+			       unsigned int width, unsigned int height)
 {
 	u32 ctrl, width_field, height_field;
 
 	/* Setup frame size - this cannot be changed on-the-fly... */
-	width_field = rect->width - 1;
-	height_field = rect->height - 1;
+	width_field = width - 1;
+	height_field = height - 1;
 	csi_reg_write(mx3_cam, width_field | (height_field << 16), CSI_SENS_FRM_SIZE);
 
 	csi_reg_write(mx3_cam, width_field << 16, CSI_FLASH_STROBE_1);
@@ -745,11 +744,6 @@ static void configure_geometry(struct mx3_camera_dev *mx3_cam,
 	ctrl = csi_reg_read(mx3_cam, CSI_OUT_FRM_CTRL) & 0xffff0000;
 	/* Sensor does the cropping */
 	csi_reg_write(mx3_cam, ctrl | 0 | (0 << 8), CSI_OUT_FRM_CTRL);
-
-	/*
-	 * No need to free resources here if we fail, we'll see if we need to
-	 * do this next time we are called
-	 */
 }
 
 static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
@@ -786,6 +780,22 @@ static int acquire_dma_channel(struct mx3_camera_dev *mx3_cam)
 	return 0;
 }
 
+/*
+ * FIXME: learn to use stride != width, then we can keep stride properly aligned
+ * and support arbitrary (even) widths.
+ */
+static inline void stride_align(__s32 *width)
+{
+	if (((*width + 7) &  ~7) < 4096)
+		*width = (*width + 7) &  ~7;
+	else
+		*width = *width &  ~7;
+}
+
+/*
+ * As long as we don't implement host-side cropping and scaling, we can use
+ * default g_crop and cropcap from soc_camera.c
+ */
 static int mx3_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
@@ -793,20 +803,51 @@ static int mx3_camera_set_crop(struct soc_camera_device *icd,
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct mx3_camera_dev *mx3_cam = ici->priv;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct v4l2_format f = {.type = V4L2_BUF_TYPE_VIDEO_CAPTURE};
+	struct v4l2_pix_format *pix = &f.fmt.pix;
+	int ret;
 
-	/*
-	 * We now know pixel formats and can decide upon DMA-channel(s)
-	 * So far only direct camera-to-memory is supported
-	 */
-	if (channel_change_requested(icd, rect)) {
-		int ret = acquire_dma_channel(mx3_cam);
+	soc_camera_limit_side(&rect->left, &rect->width, 0, 2, 4096);
+	soc_camera_limit_side(&rect->top, &rect->height, 0, 2, 4096);
+
+	ret = v4l2_subdev_call(sd, video, s_crop, a);
+	if (ret < 0)
+		return ret;
+
+	/* The capture device might have changed its output  */
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	if (pix->width & 7) {
+		/* Ouch! We can only handle 8-byte aligned width... */
+		stride_align(&pix->width);
+		ret = v4l2_subdev_call(sd, video, s_fmt, &f);
 		if (ret < 0)
 			return ret;
 	}
 
-	configure_geometry(mx3_cam, rect);
+	if (pix->width != icd->user_width || pix->height != icd->user_height) {
+		/*
+		 * We now know pixel formats and can decide upon DMA-channel(s)
+		 * So far only direct camera-to-memory is supported
+		 */
+		if (channel_change_requested(icd, rect)) {
+			int ret = acquire_dma_channel(mx3_cam);
+			if (ret < 0)
+				return ret;
+		}
 
-	return v4l2_subdev_call(sd, video, s_crop, a);
+		configure_geometry(mx3_cam, pix->width, pix->height);
+	}
+
+	dev_dbg(icd->dev.parent, "Sensor cropped %dx%d\n",
+		pix->width, pix->height);
+
+	icd->user_width = pix->width;
+	icd->user_height = pix->height;
+
+	return ret;
 }
 
 static int mx3_camera_set_fmt(struct soc_camera_device *icd,
@@ -817,12 +858,6 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
 	const struct soc_camera_format_xlate *xlate;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
-	struct v4l2_rect rect = {
-		.left	= icd->rect_current.left,
-		.top	= icd->rect_current.top,
-		.width	= pix->width,
-		.height	= pix->height,
-	};
 	int ret;
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
@@ -832,6 +867,9 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 		return -EINVAL;
 	}
 
+	stride_align(&pix->width);
+	dev_dbg(icd->dev.parent, "Set format %dx%d\n", pix->width, pix->height);
+
 	ret = acquire_dma_channel(mx3_cam);
 	if (ret < 0)
 		return ret;
@@ -842,7 +880,7 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 	 * mxc_v4l2_s_fmt()
 	 */
 
-	configure_geometry(mx3_cam, &rect);
+	configure_geometry(mx3_cam, pix->width, pix->height);
 
 	ret = v4l2_subdev_call(sd, video, s_fmt, f);
 	if (!ret) {
@@ -850,6 +888,8 @@ static int mx3_camera_set_fmt(struct soc_camera_device *icd,
 		icd->current_fmt = xlate->host_fmt;
 	}
 
+	dev_dbg(icd->dev.parent, "Sensor set %dx%d\n", pix->width, pix->height);
+
 	return ret;
 }
 
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index bbf5331a2eae..776a91dcfbe6 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -382,11 +382,10 @@ struct regval_list {
 };
 
 struct ov772x_color_format {
-	char                     *name;
-	__u32                     fourcc;
-	u8                        dsp3;
-	u8                        com3;
-	u8                        com7;
+	const struct soc_camera_data_format *format;
+	u8 dsp3;
+	u8 com3;
+	u8 com7;
 };
 
 struct ov772x_win_size {
@@ -481,43 +480,43 @@ static const struct soc_camera_data_format ov772x_fmt_lists[] = {
  */
 static const struct ov772x_color_format ov772x_cfmts[] = {
 	{
-		SETFOURCC(YUYV),
+		.format = &ov772x_fmt_lists[0],
 		.dsp3   = 0x0,
 		.com3   = SWAP_YUV,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(YVYU),
+		.format = &ov772x_fmt_lists[1],
 		.dsp3   = UV_ON,
 		.com3   = SWAP_YUV,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(UYVY),
+		.format = &ov772x_fmt_lists[2],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = OFMT_YUV,
 	},
 	{
-		SETFOURCC(RGB555),
+		.format = &ov772x_fmt_lists[3],
 		.dsp3   = 0x0,
 		.com3   = SWAP_RGB,
 		.com7   = FMT_RGB555 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB555X),
+		.format = &ov772x_fmt_lists[4],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = FMT_RGB555 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB565),
+		.format = &ov772x_fmt_lists[5],
 		.dsp3   = 0x0,
 		.com3   = SWAP_RGB,
 		.com7   = FMT_RGB565 | OFMT_RGB,
 	},
 	{
-		SETFOURCC(RGB565X),
+		.format = &ov772x_fmt_lists[6],
 		.dsp3   = 0x0,
 		.com3   = 0x0,
 		.com7   = FMT_RGB565 | OFMT_RGB,
@@ -648,8 +647,8 @@ static int ov772x_s_stream(struct v4l2_subdev *sd, int enable)
 
 	ov772x_mask_set(client, COM2, SOFT_SLEEP_MODE, 0);
 
-	dev_dbg(&client->dev,
-		"format %s, win %s\n", priv->fmt->name, priv->win->name);
+	dev_dbg(&client->dev, "format %s, win %s\n",
+		priv->fmt->format->name, priv->win->name);
 
 	return 0;
 }
@@ -818,7 +817,7 @@ static int ov772x_set_params(struct i2c_client *client,
 	 */
 	priv->fmt = NULL;
 	for (i = 0; i < ARRAY_SIZE(ov772x_cfmts); i++) {
-		if (pixfmt == ov772x_cfmts[i].fourcc) {
+		if (pixfmt == ov772x_cfmts[i].format->fourcc) {
 			priv->fmt = ov772x_cfmts + i;
 			break;
 		}
@@ -955,6 +954,56 @@ ov772x_set_fmt_error:
 	return ret;
 }
 
+static int ov772x_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	a->c.left	= 0;
+	a->c.top	= 0;
+	a->c.width	= VGA_WIDTH;
+	a->c.height	= VGA_HEIGHT;
+	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int ov772x_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= 0;
+	a->bounds.top			= 0;
+	a->bounds.width			= VGA_WIDTH;
+	a->bounds.height		= VGA_HEIGHT;
+	a->defrect			= a->bounds;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int ov772x_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct ov772x_priv *priv = to_ov772x(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	if (!priv->win || !priv->fmt) {
+		u32 width = VGA_WIDTH, height = VGA_HEIGHT;
+		int ret = ov772x_set_params(client, &width, &height,
+					    V4L2_PIX_FMT_YUYV);
+		if (ret < 0)
+			return ret;
+	}
+
+	f->type			= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	pix->width		= priv->win->width;
+	pix->height		= priv->win->height;
+	pix->pixelformat	= priv->fmt->format->fourcc;
+	pix->colorspace		= priv->fmt->format->colorspace;
+	pix->field		= V4L2_FIELD_NONE;
+
+	return 0;
+}
+
 static int ov772x_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
 	struct i2c_client *client = sd->priv;
@@ -1060,8 +1109,11 @@ static struct v4l2_subdev_core_ops ov772x_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops ov772x_subdev_video_ops = {
 	.s_stream	= ov772x_s_stream,
+	.g_fmt		= ov772x_g_fmt,
 	.s_fmt		= ov772x_s_fmt,
 	.try_fmt	= ov772x_try_fmt,
+	.cropcap	= ov772x_cropcap,
+	.g_crop		= ov772x_g_crop,
 };
 
 static struct v4l2_subdev_ops ov772x_subdev_ops = {
@@ -1110,8 +1162,6 @@ static int ov772x_probe(struct i2c_client *client,
 	v4l2_i2c_subdev_init(&priv->subdev, client, &ov772x_subdev_ops);
 
 	icd->ops		= &ov772x_ops;
-	icd->rect_max.width	= MAX_WIDTH;
-	icd->rect_max.height	= MAX_HEIGHT;
 
 	ret = ov772x_video_probe(icd, client);
 	if (ret) {
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index 1fd6ef392a54..a19bb76e175d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -225,6 +225,10 @@ struct pxa_camera_dev {
 	u32			save_cicr[5];
 };
 
+struct pxa_cam {
+	unsigned long flags;
+};
+
 static const char *pxa_cam_driver_description = "PXA_Camera";
 
 static unsigned int vid_limit = 16;	/* Video memory limit, in Mb */
@@ -239,7 +243,7 @@ static int pxa_videobuf_setup(struct videobuf_queue *vq, unsigned int *count,
 
 	dev_dbg(icd->dev.parent, "count=%d, size=%d\n", *count, *size);
 
-	*size = roundup(icd->rect_current.width * icd->rect_current.height *
+	*size = roundup(icd->user_width * icd->user_height *
 			((icd->current_fmt->depth + 7) >> 3), 8);
 
 	if (0 == *count)
@@ -443,12 +447,12 @@ static int pxa_videobuf_prepare(struct videobuf_queue *vq,
 	buf->inwork = 1;
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -839,7 +843,7 @@ static u32 mclk_get_divisor(struct platform_device *pdev,
 			    struct pxa_camera_dev *pcdev)
 {
 	unsigned long mclk = pcdev->mclk;
-	struct device *dev = pcdev->soc_host.v4l2_dev.dev;
+	struct device *dev = &pdev->dev;
 	u32 div;
 	unsigned long lcdclk;
 
@@ -1040,57 +1044,17 @@ static int test_platform_param(struct pxa_camera_dev *pcdev,
 	return 0;
 }
 
-static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
+static void pxa_camera_setup_cicr(struct soc_camera_device *icd,
+				  unsigned long flags, __u32 pixfmt)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct pxa_camera_dev *pcdev = ici->priv;
-	unsigned long dw, bpp, bus_flags, camera_flags, common_flags;
+	unsigned long dw, bpp;
 	u32 cicr0, cicr1, cicr2, cicr3, cicr4 = 0;
-	int ret = test_platform_param(pcdev, icd->buswidth, &bus_flags);
-
-	if (ret < 0)
-		return ret;
-
-	camera_flags = icd->ops->query_bus_param(icd);
-
-	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
-	if (!common_flags)
-		return -EINVAL;
-
-	pcdev->channels = 1;
-
-	/* Make choises, based on platform preferences */
-	if ((common_flags & SOCAM_HSYNC_ACTIVE_HIGH) &&
-	    (common_flags & SOCAM_HSYNC_ACTIVE_LOW)) {
-		if (pcdev->platform_flags & PXA_CAMERA_HSP)
-			common_flags &= ~SOCAM_HSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~SOCAM_HSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & SOCAM_VSYNC_ACTIVE_HIGH) &&
-	    (common_flags & SOCAM_VSYNC_ACTIVE_LOW)) {
-		if (pcdev->platform_flags & PXA_CAMERA_VSP)
-			common_flags &= ~SOCAM_VSYNC_ACTIVE_HIGH;
-		else
-			common_flags &= ~SOCAM_VSYNC_ACTIVE_LOW;
-	}
-
-	if ((common_flags & SOCAM_PCLK_SAMPLE_RISING) &&
-	    (common_flags & SOCAM_PCLK_SAMPLE_FALLING)) {
-		if (pcdev->platform_flags & PXA_CAMERA_PCP)
-			common_flags &= ~SOCAM_PCLK_SAMPLE_RISING;
-		else
-			common_flags &= ~SOCAM_PCLK_SAMPLE_FALLING;
-	}
-
-	ret = icd->ops->set_bus_param(icd, common_flags);
-	if (ret < 0)
-		return ret;
 
 	/* Datawidth is now guaranteed to be equal to one of the three values.
 	 * We fix bit-per-pixel equal to data-width... */
-	switch (common_flags & SOCAM_DATAWIDTH_MASK) {
+	switch (flags & SOCAM_DATAWIDTH_MASK) {
 	case SOCAM_DATAWIDTH_10:
 		dw = 4;
 		bpp = 0x40;
@@ -1111,18 +1075,18 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 		cicr4 |= CICR4_PCLK_EN;
 	if (pcdev->platform_flags & PXA_CAMERA_MCLK_EN)
 		cicr4 |= CICR4_MCLK_EN;
-	if (common_flags & SOCAM_PCLK_SAMPLE_FALLING)
+	if (flags & SOCAM_PCLK_SAMPLE_FALLING)
 		cicr4 |= CICR4_PCP;
-	if (common_flags & SOCAM_HSYNC_ACTIVE_LOW)
+	if (flags & SOCAM_HSYNC_ACTIVE_LOW)
 		cicr4 |= CICR4_HSP;
-	if (common_flags & SOCAM_VSYNC_ACTIVE_LOW)
+	if (flags & SOCAM_VSYNC_ACTIVE_LOW)
 		cicr4 |= CICR4_VSP;
 
 	cicr0 = __raw_readl(pcdev->base + CICR0);
 	if (cicr0 & CICR0_ENB)
 		__raw_writel(cicr0 & ~CICR0_ENB, pcdev->base + CICR0);
 
-	cicr1 = CICR1_PPL_VAL(icd->rect_current.width - 1) | bpp | dw;
+	cicr1 = CICR1_PPL_VAL(icd->user_width - 1) | bpp | dw;
 
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_YUV422P:
@@ -1151,7 +1115,7 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 	}
 
 	cicr2 = 0;
-	cicr3 = CICR3_LPF_VAL(icd->rect_current.height - 1) |
+	cicr3 = CICR3_LPF_VAL(icd->user_height - 1) |
 		CICR3_BFW_VAL(min((unsigned short)255, icd->y_skip_top));
 	cicr4 |= pcdev->mclk_divisor;
 
@@ -1165,6 +1129,59 @@ static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
 		CICR0_SIM_MP : (CICR0_SL_CAP_EN | CICR0_SIM_SP));
 	cicr0 |= CICR0_DMAEN | CICR0_IRQ_MASK;
 	__raw_writel(cicr0, pcdev->base + CICR0);
+}
+
+static int pxa_camera_set_bus_param(struct soc_camera_device *icd, __u32 pixfmt)
+{
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct pxa_camera_dev *pcdev = ici->priv;
+	unsigned long bus_flags, camera_flags, common_flags;
+	int ret = test_platform_param(pcdev, icd->buswidth, &bus_flags);
+	struct pxa_cam *cam = icd->host_priv;
+
+	if (ret < 0)
+		return ret;
+
+	camera_flags = icd->ops->query_bus_param(icd);
+
+	common_flags = soc_camera_bus_param_compatible(camera_flags, bus_flags);
+	if (!common_flags)
+		return -EINVAL;
+
+	pcdev->channels = 1;
+
+	/* Make choises, based on platform preferences */
+	if ((common_flags & SOCAM_HSYNC_ACTIVE_HIGH) &&
+	    (common_flags & SOCAM_HSYNC_ACTIVE_LOW)) {
+		if (pcdev->platform_flags & PXA_CAMERA_HSP)
+			common_flags &= ~SOCAM_HSYNC_ACTIVE_HIGH;
+		else
+			common_flags &= ~SOCAM_HSYNC_ACTIVE_LOW;
+	}
+
+	if ((common_flags & SOCAM_VSYNC_ACTIVE_HIGH) &&
+	    (common_flags & SOCAM_VSYNC_ACTIVE_LOW)) {
+		if (pcdev->platform_flags & PXA_CAMERA_VSP)
+			common_flags &= ~SOCAM_VSYNC_ACTIVE_HIGH;
+		else
+			common_flags &= ~SOCAM_VSYNC_ACTIVE_LOW;
+	}
+
+	if ((common_flags & SOCAM_PCLK_SAMPLE_RISING) &&
+	    (common_flags & SOCAM_PCLK_SAMPLE_FALLING)) {
+		if (pcdev->platform_flags & PXA_CAMERA_PCP)
+			common_flags &= ~SOCAM_PCLK_SAMPLE_RISING;
+		else
+			common_flags &= ~SOCAM_PCLK_SAMPLE_FALLING;
+	}
+
+	cam->flags = common_flags;
+
+	ret = icd->ops->set_bus_param(icd, common_flags);
+	if (ret < 0)
+		return ret;
+
+	pxa_camera_setup_cicr(icd, common_flags, pixfmt);
 
 	return 0;
 }
@@ -1230,6 +1247,7 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 {
 	struct device *dev = icd->dev.parent;
 	int formats = 0, buswidth, ret;
+	struct pxa_cam *cam;
 
 	buswidth = required_buswidth(icd->formats + idx);
 
@@ -1240,6 +1258,16 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 	if (ret < 0)
 		return 0;
 
+	if (!icd->host_priv) {
+		cam = kzalloc(sizeof(*cam), GFP_KERNEL);
+		if (!cam)
+			return -ENOMEM;
+
+		icd->host_priv = cam;
+	} else {
+		cam = icd->host_priv;
+	}
+
 	switch (icd->formats[idx].fourcc) {
 	case V4L2_PIX_FMT_UYVY:
 		formats++;
@@ -1284,6 +1312,19 @@ static int pxa_camera_get_formats(struct soc_camera_device *icd, int idx,
 	return formats;
 }
 
+static void pxa_camera_put_formats(struct soc_camera_device *icd)
+{
+	kfree(icd->host_priv);
+	icd->host_priv = NULL;
+}
+
+static int pxa_camera_check_frame(struct v4l2_pix_format *pix)
+{
+	/* limit to pxa hardware capabilities */
+	return pix->height < 32 || pix->height > 2048 || pix->width < 48 ||
+		pix->width > 2048 || (pix->width & 0x01);
+}
+
 static int pxa_camera_set_crop(struct soc_camera_device *icd,
 			       struct v4l2_crop *a)
 {
@@ -1296,6 +1337,9 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 		.master_clock = pcdev->mclk,
 		.pixel_clock_max = pcdev->ciclk / 4,
 	};
+	struct v4l2_format f;
+	struct v4l2_pix_format *pix = &f.fmt.pix, pix_tmp;
+	struct pxa_cam *cam = icd->host_priv;
 	int ret;
 
 	/* If PCLK is used to latch data from the sensor, check sense */
@@ -1309,7 +1353,37 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 	if (ret < 0) {
 		dev_warn(dev, "Failed to crop to %ux%u@%u:%u\n",
 			 rect->width, rect->height, rect->left, rect->top);
-	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
+		return ret;
+	}
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	pix_tmp = *pix;
+	if (pxa_camera_check_frame(pix)) {
+		/*
+		 * Camera cropping produced a frame beyond our capabilities.
+		 * FIXME: just extract a subframe, that we can process.
+		 */
+		v4l_bound_align_image(&pix->width, 48, 2048, 1,
+			&pix->height, 32, 2048, 0,
+			icd->current_fmt->fourcc == V4L2_PIX_FMT_YUV422P ?
+				4 : 0);
+		ret = v4l2_subdev_call(sd, video, s_fmt, &f);
+		if (ret < 0)
+			return ret;
+
+		if (pxa_camera_check_frame(pix)) {
+			dev_warn(icd->dev.parent,
+				 "Inconsistent state. Use S_FMT to repair\n");
+			return -EINVAL;
+		}
+	}
+
+	if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
 			dev_err(dev,
 				"pixel clock %lu set by the camera too high!",
@@ -1319,6 +1393,11 @@ static int pxa_camera_set_crop(struct soc_camera_device *icd,
 		recalculate_fifo_timeout(pcdev, sense.pixel_clock);
 	}
 
+	icd->user_width = pix->width;
+	icd->user_height = pix->height;
+
+	pxa_camera_setup_cicr(icd, cam->flags, icd->current_fmt->fourcc);
+
 	return ret;
 }
 
@@ -1359,6 +1438,11 @@ static int pxa_camera_set_fmt(struct soc_camera_device *icd,
 	if (ret < 0) {
 		dev_warn(dev, "Failed to configure for format %x\n",
 			 pix->pixelformat);
+	} else if (pxa_camera_check_frame(pix)) {
+		dev_warn(dev,
+			 "Camera driver produced an unsupported frame %dx%d\n",
+			 pix->width, pix->height);
+		ret = -EINVAL;
 	} else if (sense.flags & SOCAM_SENSE_PCLK_CHANGED) {
 		if (sense.pixel_clock > sense.pixel_clock_max) {
 			dev_err(dev,
@@ -1402,7 +1486,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	 */
 	v4l_bound_align_image(&pix->width, 48, 2048, 1,
 			      &pix->height, 32, 2048, 0,
-			      xlate->host_fmt->fourcc == V4L2_PIX_FMT_YUV422P ? 4 : 0);
+			      pixfmt == V4L2_PIX_FMT_YUV422P ? 4 : 0);
 
 	pix->bytesperline = pix->width *
 		DIV_ROUND_UP(xlate->host_fmt->depth, 8);
@@ -1412,7 +1496,7 @@ static int pxa_camera_try_fmt(struct soc_camera_device *icd,
 	pix->pixelformat = xlate->cam_fmt->fourcc;
 	/* limit to sensor capabilities */
 	ret = v4l2_subdev_call(sd, video, try_fmt, f);
-	pix->pixelformat = xlate->host_fmt->fourcc;
+	pix->pixelformat = pixfmt;
 
 	field = pix->field;
 
@@ -1525,6 +1609,7 @@ static struct soc_camera_host_ops pxa_soc_camera_host_ops = {
 	.resume		= pxa_camera_resume,
 	.set_crop	= pxa_camera_set_crop,
 	.get_formats	= pxa_camera_get_formats,
+	.put_formats	= pxa_camera_put_formats,
 	.set_fmt	= pxa_camera_set_fmt,
 	.try_fmt	= pxa_camera_try_fmt,
 	.init_videobuf	= pxa_camera_init_videobuf,
diff --git a/drivers/media/video/sh_mobile_ceu_camera.c b/drivers/media/video/sh_mobile_ceu_camera.c
index 3457bababd36..5ab7c5aefd62 100644
--- a/drivers/media/video/sh_mobile_ceu_camera.c
+++ b/drivers/media/video/sh_mobile_ceu_camera.c
@@ -74,6 +74,13 @@
 #define CDBYR2 0x98 /* Capture data bottom-field address Y register 2 */
 #define CDBCR2 0x9c /* Capture data bottom-field address C register 2 */
 
+#undef DEBUG_GEOMETRY
+#ifdef DEBUG_GEOMETRY
+#define dev_geo	dev_info
+#else
+#define dev_geo	dev_dbg
+#endif
+
 /* per video frame buffer */
 struct sh_mobile_ceu_buffer {
 	struct videobuf_buffer vb; /* v4l buffer must be first */
@@ -103,8 +110,9 @@ struct sh_mobile_ceu_dev {
 };
 
 struct sh_mobile_ceu_cam {
-	struct v4l2_rect camera_rect;
-	struct v4l2_rect camera_max;
+	struct v4l2_rect ceu_rect;
+	unsigned int cam_width;
+	unsigned int cam_height;
 	const struct soc_camera_data_format *extra_fmt;
 	const struct soc_camera_data_format *camera_fmt;
 };
@@ -156,7 +164,7 @@ static int sh_mobile_ceu_videobuf_setup(struct videobuf_queue *vq,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	int bytes_per_pixel = (icd->current_fmt->depth + 7) >> 3;
 
-	*size = PAGE_ALIGN(icd->rect_current.width * icd->rect_current.height *
+	*size = PAGE_ALIGN(icd->user_width * icd->user_height *
 			   bytes_per_pixel);
 
 	if (0 == *count)
@@ -176,8 +184,9 @@ static void free_buffer(struct videobuf_queue *vq,
 			struct sh_mobile_ceu_buffer *buf)
 {
 	struct soc_camera_device *icd = vq->priv_data;
+	struct device *dev = icd->dev.parent;
 
-	dev_dbg(icd->dev.parent, "%s (vb=0x%p) 0x%08lx %zd\n", __func__,
+	dev_dbg(dev, "%s (vb=0x%p) 0x%08lx %zd\n", __func__,
 		&buf->vb, buf->vb.baddr, buf->vb.bsize);
 
 	if (in_interrupt())
@@ -185,7 +194,7 @@ static void free_buffer(struct videobuf_queue *vq,
 
 	videobuf_waiton(&buf->vb, 0, 0);
 	videobuf_dma_contig_free(vq, &buf->vb);
-	dev_dbg(icd->dev.parent, "%s freed\n", __func__);
+	dev_dbg(dev, "%s freed\n", __func__);
 	buf->vb.state = VIDEOBUF_NEEDS_INIT;
 }
 
@@ -216,7 +225,7 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	phys_addr_top = videobuf_to_dma_contig(pcdev->active);
 	ceu_write(pcdev, CDAYR, phys_addr_top);
 	if (pcdev->is_interlaced) {
-		phys_addr_bottom = phys_addr_top + icd->rect_current.width;
+		phys_addr_bottom = phys_addr_top + icd->user_width;
 		ceu_write(pcdev, CDBYR, phys_addr_bottom);
 	}
 
@@ -225,12 +234,12 @@ static void sh_mobile_ceu_capture(struct sh_mobile_ceu_dev *pcdev)
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		phys_addr_top += icd->rect_current.width *
-			icd->rect_current.height;
+		phys_addr_top += icd->user_width *
+			icd->user_height;
 		ceu_write(pcdev, CDACR, phys_addr_top);
 		if (pcdev->is_interlaced) {
 			phys_addr_bottom = phys_addr_top +
-				icd->rect_current.width;
+				icd->user_width;
 			ceu_write(pcdev, CDBCR, phys_addr_bottom);
 		}
 	}
@@ -264,12 +273,12 @@ static int sh_mobile_ceu_videobuf_prepare(struct videobuf_queue *vq,
 	BUG_ON(NULL == icd->current_fmt);
 
 	if (buf->fmt	!= icd->current_fmt ||
-	    vb->width	!= icd->rect_current.width ||
-	    vb->height	!= icd->rect_current.height ||
+	    vb->width	!= icd->user_width ||
+	    vb->height	!= icd->user_height ||
 	    vb->field	!= field) {
 		buf->fmt	= icd->current_fmt;
-		vb->width	= icd->rect_current.width;
-		vb->height	= icd->rect_current.height;
+		vb->width	= icd->user_width;
+		vb->height	= icd->user_height;
 		vb->field	= field;
 		vb->state	= VIDEOBUF_NEEDS_INIT;
 	}
@@ -451,18 +460,6 @@ static unsigned int size_dst(unsigned int src, unsigned int scale)
 		mant_pre * 4096 / scale + 1;
 }
 
-static unsigned int size_src(unsigned int dst, unsigned int scale)
-{
-	unsigned int mant_pre = scale >> 12, tmp;
-	if (!dst || !scale)
-		return dst;
-	for (tmp = ((dst - 1) * scale + 2048 * mant_pre) / 4096 + 1;
-	     size_dst(tmp, scale) < dst;
-	     tmp++)
-		;
-	return tmp;
-}
-
 static u16 calc_scale(unsigned int src, unsigned int *dst)
 {
 	u16 scale;
@@ -482,65 +479,46 @@ static u16 calc_scale(unsigned int src, unsigned int *dst)
 
 /* rect is guaranteed to not exceed the scaled camera rectangle */
 static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd,
-				   struct v4l2_rect *rect)
+				   unsigned int out_width,
+				   unsigned int out_height)
 {
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *rect = &cam->ceu_rect;
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	int width, height, cfszr_width, cdwdr_width, in_width, in_height;
-	unsigned int left_offset, top_offset, left, top;
-	unsigned int hscale = pcdev->cflcr & 0xffff;
-	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
+	unsigned int height, width, cdwdr_width, in_width, in_height;
+	unsigned int left_offset, top_offset;
 	u32 camor;
 
-	/* Switch to the camera scale */
-	left = size_src(rect->left, hscale);
-	top = size_src(rect->top, vscale);
-
-	dev_dbg(icd->dev.parent, "Left %u * 0x%x = %u, top %u * 0x%x = %u\n",
-		rect->left, hscale, left, rect->top, vscale, top);
-
-	if (left > cam->camera_rect.left) {
-		left_offset = left - cam->camera_rect.left;
-	} else {
-		left_offset = 0;
-		left = cam->camera_rect.left;
-	}
-
-	if (top > cam->camera_rect.top) {
-		top_offset = top - cam->camera_rect.top;
-	} else {
-		top_offset = 0;
-		top = cam->camera_rect.top;
-	}
+	dev_dbg(icd->dev.parent, "Crop %ux%u@%u:%u\n",
+		rect->width, rect->height, rect->left, rect->top);
 
-	dev_dbg(icd->dev.parent, "New left %u, top %u, offsets %u:%u\n",
-		rect->left, rect->top, left_offset, top_offset);
+	left_offset	= rect->left;
+	top_offset	= rect->top;
 
 	if (pcdev->image_mode) {
-		width = rect->width;
-		in_width = cam->camera_rect.width;
+		in_width = rect->width;
 		if (!pcdev->is_16bit) {
-			width *= 2;
 			in_width *= 2;
 			left_offset *= 2;
 		}
-		cfszr_width = cdwdr_width = rect->width;
+		width = cdwdr_width = out_width;
 	} else {
 		unsigned int w_factor = (icd->current_fmt->depth + 7) >> 3;
+
+		width = out_width * w_factor / 2;
+
 		if (!pcdev->is_16bit)
 			w_factor *= 2;
 
-		width = rect->width * w_factor / 2;
-		in_width = cam->camera_rect.width * w_factor / 2;
+		in_width = rect->width * w_factor / 2;
 		left_offset = left_offset * w_factor / 2;
 
-		cfszr_width = pcdev->is_16bit ? width : width / 2;
-		cdwdr_width = pcdev->is_16bit ? width * 2 : width;
+		cdwdr_width = width * 2;
 	}
 
-	height = rect->height;
-	in_height = cam->camera_rect.height;
+	height = out_height;
+	in_height = rect->height;
 	if (pcdev->is_interlaced) {
 		height /= 2;
 		in_height /= 2;
@@ -548,10 +526,17 @@ static void sh_mobile_ceu_set_rect(struct soc_camera_device *icd,
 		cdwdr_width *= 2;
 	}
 
+	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
 	camor = left_offset | (top_offset << 16);
+
+	dev_geo(icd->dev.parent,
+		"CAMOR 0x%x, CAPWR 0x%x, CFSZR 0x%x, CDWDR 0x%x\n", camor,
+		(in_height << 16) | in_width, (height << 16) | width,
+		cdwdr_width);
+
 	ceu_write(pcdev, CAMOR, camor);
 	ceu_write(pcdev, CAPWR, (in_height << 16) | in_width);
-	ceu_write(pcdev, CFSZR, (height << 16) | cfszr_width);
+	ceu_write(pcdev, CFSZR, (height << 16) | width);
 	ceu_write(pcdev, CDWDR, cdwdr_width);
 }
 
@@ -663,8 +648,8 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	ceu_write(pcdev, CAPCR, 0x00300000);
 	ceu_write(pcdev, CAIFR, pcdev->is_interlaced ? 0x101 : 0);
 
+	sh_mobile_ceu_set_rect(icd, icd->user_width, icd->user_height);
 	mdelay(1);
-	sh_mobile_ceu_set_rect(icd, &icd->rect_current);
 
 	ceu_write(pcdev, CFLCR, pcdev->cflcr);
 
@@ -687,11 +672,10 @@ static int sh_mobile_ceu_set_bus_param(struct soc_camera_device *icd,
 	ceu_write(pcdev, CDOCR, value);
 	ceu_write(pcdev, CFWCR, 0); /* keep "datafetch firewall" disabled */
 
-	dev_dbg(icd->dev.parent, "S_FMT successful for %c%c%c%c %ux%u@%u:%u\n",
+	dev_dbg(icd->dev.parent, "S_FMT successful for %c%c%c%c %ux%u\n",
 		pixfmt & 0xff, (pixfmt >> 8) & 0xff,
 		(pixfmt >> 16) & 0xff, (pixfmt >> 24) & 0xff,
-		icd->rect_current.width, icd->rect_current.height,
-		icd->rect_current.left, icd->rect_current.top);
+		icd->user_width, icd->user_height);
 
 	capture_restore(pcdev, capsr);
 
@@ -744,6 +728,7 @@ static const struct soc_camera_data_format sh_mobile_ceu_formats[] = {
 static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 				     struct soc_camera_format_xlate *xlate)
 {
+	struct device *dev = icd->dev.parent;
 	int ret, k, n;
 	int formats = 0;
 	struct sh_mobile_ceu_cam *cam;
@@ -758,7 +743,6 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			return -ENOMEM;
 
 		icd->host_priv = cam;
-		cam->camera_max = icd->rect_max;
 	} else {
 		cam = icd->host_priv;
 	}
@@ -793,8 +777,7 @@ static int sh_mobile_ceu_get_formats(struct soc_camera_device *icd, int idx,
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(icd->dev.parent,
-				"Providing format %s using %s\n",
+			dev_dbg(dev, "Providing format %s using %s\n",
 				sh_mobile_ceu_formats[k].name,
 				icd->formats[idx].name);
 		}
@@ -807,7 +790,7 @@ add_single_format:
 			xlate->cam_fmt = icd->formats + idx;
 			xlate->buswidth = icd->formats[idx].depth;
 			xlate++;
-			dev_dbg(icd->dev.parent,
+			dev_dbg(dev,
 				"Providing format %s in pass-through mode\n",
 				icd->formats[idx].name);
 		}
@@ -836,176 +819,487 @@ static bool is_inside(struct v4l2_rect *r1, struct v4l2_rect *r2)
 		r1->top + r1->height < r2->top + r2->height;
 }
 
+static unsigned int scale_down(unsigned int size, unsigned int scale)
+{
+	return (size * 4096 + scale / 2) / scale;
+}
+
+static unsigned int scale_up(unsigned int size, unsigned int scale)
+{
+	return (size * scale + 2048) / 4096;
+}
+
+static unsigned int calc_generic_scale(unsigned int input, unsigned int output)
+{
+	return (input * 4096 + output / 2) / output;
+}
+
+static int client_g_rect(struct v4l2_subdev *sd, struct v4l2_rect *rect)
+{
+	struct v4l2_crop crop;
+	struct v4l2_cropcap cap;
+	int ret;
+
+	crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_crop, &crop);
+	if (!ret) {
+		*rect = crop.c;
+		return ret;
+	}
+
+	/* Camera driver doesn't support .g_crop(), assume default rectangle */
+	cap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	*rect = cap.defrect;
+
+	return ret;
+}
+
 /*
- * CEU can scale and crop, but we don't want to waste bandwidth and kill the
- * framerate by always requesting the maximum image from the client. For
- * cropping we also have to take care of the current scale. The common for both
- * scaling and cropping approach is:
+ * The common for both scaling and cropping iterative approach is:
  * 1. try if the client can produce exactly what requested by the user
  * 2. if (1) failed, try to double the client image until we get one big enough
  * 3. if (2) failed, try to request the maximum image
  */
-static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
-				  struct v4l2_crop *a)
+static int client_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *crop,
+			 struct v4l2_crop *cam_crop)
 {
-	struct v4l2_rect *rect = &a->c;
-	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct sh_mobile_ceu_dev *pcdev = ici->priv;
-	struct v4l2_crop cam_crop;
-	struct v4l2_rect *cam_rect = &cam_crop.c, target, cam_max;
-	struct sh_mobile_ceu_cam *cam = icd->host_priv;
-	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
-	unsigned int hscale = pcdev->cflcr & 0xffff;
-	unsigned int vscale = (pcdev->cflcr >> 16) & 0xffff;
-	unsigned short width, height;
-	u32 capsr;
+	struct v4l2_rect *rect = &crop->c, *cam_rect = &cam_crop->c;
+	struct device *dev = sd->v4l2_dev->dev;
+	struct v4l2_cropcap cap;
 	int ret;
+	unsigned int width, height;
 
-	/* Scale back up into client units */
-	cam_rect->left	= size_src(rect->left, hscale);
-	cam_rect->width	= size_src(rect->width, hscale);
-	cam_rect->top	= size_src(rect->top, vscale);
-	cam_rect->height	= size_src(rect->height, vscale);
-
-	target = *cam_rect;
+	v4l2_subdev_call(sd, video, s_crop, crop);
+	ret = client_g_rect(sd, cam_rect);
+	if (ret < 0)
+		return ret;
 
-	capsr = capture_save_reset(pcdev);
-	dev_dbg(icd->dev.parent, "CAPSR 0x%x, CFLCR 0x%x\n",
-		capsr, pcdev->cflcr);
-
-	/* First attempt - see if the client can deliver a perfect result */
-	ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-	if (!ret && !memcmp(&target, &cam_rect, sizeof(target))) {
-		dev_dbg(icd->dev.parent,
-			"Camera S_CROP successful for %ux%u@%u:%u\n",
-			cam_rect->width, cam_rect->height,
-			cam_rect->left, cam_rect->top);
-		goto ceu_set_rect;
+	/*
+	 * Now cam_crop contains the current camera input rectangle, and it must
+	 * be within camera cropcap bounds
+	 */
+	if (!memcmp(rect, cam_rect, sizeof(*rect))) {
+		/* Even if camera S_CROP failed, but camera rectangle matches */
+		dev_dbg(dev, "Camera S_CROP successful for %ux%u@%u:%u\n",
+			rect->width, rect->height, rect->left, rect->top);
+		return 0;
 	}
 
-	/* Try to fix cropping, that camera hasn't managed to do */
-	dev_dbg(icd->dev.parent, "Fix camera S_CROP %d for %ux%u@%u:%u"
-		" to %ux%u@%u:%u\n",
-		ret, cam_rect->width, cam_rect->height,
+	/* Try to fix cropping, that camera hasn't managed to set */
+	dev_geo(dev, "Fix camera S_CROP for %ux%u@%u:%u to %ux%u@%u:%u\n",
+		cam_rect->width, cam_rect->height,
 		cam_rect->left, cam_rect->top,
-		target.width, target.height, target.left, target.top);
+		rect->width, rect->height, rect->left, rect->top);
+
+	/* We need sensor maximum rectangle */
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	soc_camera_limit_side(&rect->left, &rect->width, cap.bounds.left, 2,
+			      cap.bounds.width);
+	soc_camera_limit_side(&rect->top, &rect->height, cap.bounds.top, 4,
+			      cap.bounds.height);
 
 	/*
 	 * Popular special case - some cameras can only handle fixed sizes like
 	 * QVGA, VGA,... Take care to avoid infinite loop.
 	 */
-	width = max(cam_rect->width, 1);
-	height = max(cam_rect->height, 1);
-	cam_max.width = size_src(icd->rect_max.width, hscale);
-	cam_max.left = size_src(icd->rect_max.left, hscale);
-	cam_max.height = size_src(icd->rect_max.height, vscale);
-	cam_max.top = size_src(icd->rect_max.top, vscale);
-	while (!ret && (is_smaller(cam_rect, &target) ||
-			is_inside(cam_rect, &target)) &&
-	       cam_max.width >= width && cam_max.height >= height) {
+	width = max(cam_rect->width, 2);
+	height = max(cam_rect->height, 2);
+
+	while (!ret && (is_smaller(cam_rect, rect) ||
+			is_inside(cam_rect, rect)) &&
+	       (cap.bounds.width > width || cap.bounds.height > height)) {
 
 		width *= 2;
 		height *= 2;
+
 		cam_rect->width = width;
 		cam_rect->height = height;
 
-		/* We do not know what the camera is capable of, play safe */
-		if (cam_rect->left > target.left)
-			cam_rect->left = cam_max.left;
+		/*
+		 * We do not know what capabilities the camera has to set up
+		 * left and top borders. We could try to be smarter in iterating
+		 * them, e.g., if camera current left is to the right of the
+		 * target left, set it to the middle point between the current
+		 * left and minimum left. But that would add too much
+		 * complexity: we would have to iterate each border separately.
+		 */
+		if (cam_rect->left > rect->left)
+			cam_rect->left = cap.bounds.left;
 
-		if (cam_rect->left + cam_rect->width < target.left + target.width)
-			cam_rect->width = target.left + target.width -
+		if (cam_rect->left + cam_rect->width < rect->left + rect->width)
+			cam_rect->width = rect->left + rect->width -
 				cam_rect->left;
 
-		if (cam_rect->top > target.top)
-			cam_rect->top = cam_max.top;
+		if (cam_rect->top > rect->top)
+			cam_rect->top = cap.bounds.top;
 
-		if (cam_rect->top + cam_rect->height < target.top + target.height)
-			cam_rect->height = target.top + target.height -
+		if (cam_rect->top + cam_rect->height < rect->top + rect->height)
+			cam_rect->height = rect->top + rect->height -
 				cam_rect->top;
 
-		if (cam_rect->width + cam_rect->left >
-		    cam_max.width + cam_max.left)
-			cam_rect->left = max(cam_max.width + cam_max.left -
-					     cam_rect->width, cam_max.left);
-
-		if (cam_rect->height + cam_rect->top >
-		    cam_max.height + cam_max.top)
-			cam_rect->top = max(cam_max.height + cam_max.top -
-					    cam_rect->height, cam_max.top);
-
-		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-		dev_dbg(icd->dev.parent, "Camera S_CROP %d for %ux%u@%u:%u\n",
-			ret, cam_rect->width, cam_rect->height,
+		v4l2_subdev_call(sd, video, s_crop, cam_crop);
+		ret = client_g_rect(sd, cam_rect);
+		dev_geo(dev, "Camera S_CROP %d for %ux%u@%u:%u\n", ret,
+			cam_rect->width, cam_rect->height,
 			cam_rect->left, cam_rect->top);
 	}
 
-	/*
-	 * If the camera failed to configure cropping, it should not modify the
-	 * rectangle
-	 */
-	if ((ret < 0 && (is_smaller(&icd->rect_current, rect) ||
-			 is_inside(&icd->rect_current, rect))) ||
-	    is_smaller(cam_rect, &target) || is_inside(cam_rect, &target)) {
+	/* S_CROP must not modify the rectangle */
+	if (is_smaller(cam_rect, rect) || is_inside(cam_rect, rect)) {
 		/*
 		 * The camera failed to configure a suitable cropping,
 		 * we cannot use the current rectangle, set to max
 		 */
-		*cam_rect = cam_max;
-		ret = v4l2_subdev_call(sd, video, s_crop, &cam_crop);
-		dev_dbg(icd->dev.parent,
-			"Camera S_CROP %d for max %ux%u@%u:%u\n",
-			ret, cam_rect->width, cam_rect->height,
+		*cam_rect = cap.bounds;
+		v4l2_subdev_call(sd, video, s_crop, cam_crop);
+		ret = client_g_rect(sd, cam_rect);
+		dev_geo(dev, "Camera S_CROP %d for max %ux%u@%u:%u\n", ret,
+			cam_rect->width, cam_rect->height,
 			cam_rect->left, cam_rect->top);
-		if (ret < 0 && ret != -ENOIOCTLCMD)
-			/* All failed, hopefully resume current capture */
-			goto resume_capture;
-
-		/* Finally, adjust the target rectangle */
-		if (target.width > cam_rect->width)
-			target.width = cam_rect->width;
-		if (target.height > cam_rect->height)
-			target.height = cam_rect->height;
-		if (target.left + target.width > cam_rect->left + cam_rect->width)
-			target.left = cam_rect->left + cam_rect->width -
-				target.width;
-		if (target.top + target.height > cam_rect->top + cam_rect->height)
-			target.top = cam_rect->top + cam_rect->height -
-				target.height;
 	}
 
-	/* We now have a rectangle, larger than requested, let's crop */
+	return ret;
+}
+
+static int get_camera_scales(struct v4l2_subdev *sd, struct v4l2_rect *rect,
+			     unsigned int *scale_h, unsigned int *scale_v)
+{
+	struct v4l2_format f;
+	int ret;
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+	if (ret < 0)
+		return ret;
+
+	*scale_h = calc_generic_scale(rect->width, f.fmt.pix.width);
+	*scale_v = calc_generic_scale(rect->height, f.fmt.pix.height);
+
+	return 0;
+}
+
+static int get_camera_subwin(struct soc_camera_device *icd,
+			     struct v4l2_rect *cam_subrect,
+			     unsigned int cam_hscale, unsigned int cam_vscale)
+{
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *ceu_rect = &cam->ceu_rect;
+
+	if (!ceu_rect->width) {
+		struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+		struct device *dev = icd->dev.parent;
+		struct v4l2_format f;
+		struct v4l2_pix_format *pix = &f.fmt.pix;
+		int ret;
+		/* First time */
+
+		f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+		ret = v4l2_subdev_call(sd, video, g_fmt, &f);
+		if (ret < 0)
+			return ret;
+
+		dev_geo(dev, "camera fmt %ux%u\n", pix->width, pix->height);
+
+		if (pix->width > 2560) {
+			ceu_rect->width	 = 2560;
+			ceu_rect->left	 = (pix->width - 2560) / 2;
+		} else {
+			ceu_rect->width	 = pix->width;
+			ceu_rect->left	 = 0;
+		}
+
+		if (pix->height > 1920) {
+			ceu_rect->height = 1920;
+			ceu_rect->top	 = (pix->height - 1920) / 2;
+		} else {
+			ceu_rect->height = pix->height;
+			ceu_rect->top	 = 0;
+		}
+
+		dev_geo(dev, "initialised CEU rect %ux%u@%u:%u\n",
+			ceu_rect->width, ceu_rect->height,
+			ceu_rect->left, ceu_rect->top);
+	}
+
+	cam_subrect->width	= scale_up(ceu_rect->width, cam_hscale);
+	cam_subrect->left	= scale_up(ceu_rect->left, cam_hscale);
+	cam_subrect->height	= scale_up(ceu_rect->height, cam_vscale);
+	cam_subrect->top	= scale_up(ceu_rect->top, cam_vscale);
+
+	return 0;
+}
+
+static int client_s_fmt(struct soc_camera_device *icd, struct v4l2_format *f,
+			bool ceu_can_scale)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
+	unsigned int max_width, max_height;
+	struct v4l2_cropcap cap;
+	int ret;
+
+	cap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = v4l2_subdev_call(sd, video, cropcap, &cap);
+	if (ret < 0)
+		return ret;
+
+	max_width = min(cap.bounds.width, 2560);
+	max_height = min(cap.bounds.height, 1920);
+
+	ret = v4l2_subdev_call(sd, video, s_fmt, f);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "camera scaled to %ux%u\n", pix->width, pix->height);
+
+	if ((width == pix->width && height == pix->height) || !ceu_can_scale)
+		return 0;
+
+	/* Camera set a format, but geometry is not precise, try to improve */
+	tmp_w = pix->width;
+	tmp_h = pix->height;
+
+	/* width <= max_width && height <= max_height - guaranteed by try_fmt */
+	while ((width > tmp_w || height > tmp_h) &&
+	       tmp_w < max_width && tmp_h < max_height) {
+		tmp_w = min(2 * tmp_w, max_width);
+		tmp_h = min(2 * tmp_h, max_height);
+		pix->width = tmp_w;
+		pix->height = tmp_h;
+		ret = v4l2_subdev_call(sd, video, s_fmt, f);
+		dev_geo(dev, "Camera scaled to %ux%u\n",
+			pix->width, pix->height);
+		if (ret < 0) {
+			/* This shouldn't happen */
+			dev_err(dev, "Client failed to set format: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * @rect	- camera cropped rectangle
+ * @sub_rect	- CEU cropped rectangle, mapped back to camera input area
+ * @ceu_rect	- on output calculated CEU crop rectangle
+ */
+static int client_scale(struct soc_camera_device *icd, struct v4l2_rect *rect,
+			struct v4l2_rect *sub_rect, struct v4l2_rect *ceu_rect,
+			struct v4l2_format *f, bool ceu_can_scale)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct device *dev = icd->dev.parent;
+	struct v4l2_format f_tmp = *f;
+	struct v4l2_pix_format *pix_tmp = &f_tmp.fmt.pix;
+	unsigned int scale_h, scale_v;
+	int ret;
+
+	/* 5. Apply iterative camera S_FMT for camera user window. */
+	ret = client_s_fmt(icd, &f_tmp, ceu_can_scale);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "5: camera scaled to %ux%u\n",
+		pix_tmp->width, pix_tmp->height);
+
+	/* 6. Retrieve camera output window (g_fmt) */
+
+	/* unneeded - it is already in "f_tmp" */
+
+	/* 7. Calculate new camera scales. */
+	ret = get_camera_scales(sd, rect, &scale_h, &scale_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "7: camera scales %u:%u\n", scale_h, scale_v);
+
+	cam->cam_width		= pix_tmp->width;
+	cam->cam_height		= pix_tmp->height;
+	f->fmt.pix.width	= pix_tmp->width;
+	f->fmt.pix.height	= pix_tmp->height;
 
 	/*
-	 * We have to preserve camera rectangle between close() / open(),
-	 * because soc-camera core calls .set_fmt() on each first open() with
-	 * last before last close() _user_ rectangle, which can be different
-	 * from camera rectangle.
+	 * 8. Calculate new CEU crop - apply camera scales to previously
+	 *    calculated "effective" crop.
 	 */
-	dev_dbg(icd->dev.parent,
-		"SH S_CROP from %ux%u@%u:%u to %ux%u@%u:%u, scale to %ux%u@%u:%u\n",
-		cam_rect->width, cam_rect->height, cam_rect->left, cam_rect->top,
-		target.width, target.height, target.left, target.top,
-		rect->width, rect->height, rect->left, rect->top);
+	ceu_rect->left = scale_down(sub_rect->left, scale_h);
+	ceu_rect->width = scale_down(sub_rect->width, scale_h);
+	ceu_rect->top = scale_down(sub_rect->top, scale_v);
+	ceu_rect->height = scale_down(sub_rect->height, scale_v);
+
+	dev_geo(dev, "8: new CEU rect %ux%u@%u:%u\n",
+		ceu_rect->width, ceu_rect->height,
+		ceu_rect->left, ceu_rect->top);
+
+	return 0;
+}
+
+/* Get combined scales */
+static int get_scales(struct soc_camera_device *icd,
+		      unsigned int *scale_h, unsigned int *scale_v)
+{
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct v4l2_crop cam_crop;
+	unsigned int width_in, height_in;
+	int ret;
 
-	ret = 0;
+	cam_crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
 
-ceu_set_rect:
-	cam->camera_rect = *cam_rect;
+	ret = client_g_rect(sd, &cam_crop.c);
+	if (ret < 0)
+		return ret;
 
-	rect->width	= size_dst(target.width, hscale);
-	rect->left	= size_dst(target.left, hscale);
-	rect->height	= size_dst(target.height, vscale);
-	rect->top	= size_dst(target.top, vscale);
+	ret = get_camera_scales(sd, &cam_crop.c, scale_h, scale_v);
+	if (ret < 0)
+		return ret;
 
-	sh_mobile_ceu_set_rect(icd, rect);
+	width_in = scale_up(cam->ceu_rect.width, *scale_h);
+	height_in = scale_up(cam->ceu_rect.height, *scale_v);
 
-resume_capture:
-	/* Set CAMOR, CAPWR, CFSZR, take care of CDWDR */
+	*scale_h = calc_generic_scale(cam->ceu_rect.width, icd->user_width);
+	*scale_v = calc_generic_scale(cam->ceu_rect.height, icd->user_height);
+
+	return 0;
+}
+
+/*
+ * CEU can scale and crop, but we don't want to waste bandwidth and kill the
+ * framerate by always requesting the maximum image from the client. See
+ * Documentation/video4linux/sh_mobile_camera_ceu.txt for a description of
+ * scaling and cropping algorithms and for the meaning of referenced here steps.
+ */
+static int sh_mobile_ceu_set_crop(struct soc_camera_device *icd,
+				  struct v4l2_crop *a)
+{
+	struct v4l2_rect *rect = &a->c;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	struct sh_mobile_ceu_dev *pcdev = ici->priv;
+	struct v4l2_crop cam_crop;
+	struct sh_mobile_ceu_cam *cam = icd->host_priv;
+	struct v4l2_rect *cam_rect = &cam_crop.c, *ceu_rect = &cam->ceu_rect;
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
+	struct v4l2_format f;
+	struct v4l2_pix_format *pix = &f.fmt.pix;
+	unsigned int scale_comb_h, scale_comb_v, scale_ceu_h, scale_ceu_v,
+		out_width, out_height;
+	u32 capsr, cflcr;
+	int ret;
+
+	/* 1. Calculate current combined scales. */
+	ret = get_scales(icd, &scale_comb_h, &scale_comb_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "1: combined scales %u:%u\n", scale_comb_h, scale_comb_v);
+
+	/* 2. Apply iterative camera S_CROP for new input window. */
+	ret = client_s_crop(sd, a, &cam_crop);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "2: camera cropped to %ux%u@%u:%u\n",
+		cam_rect->width, cam_rect->height,
+		cam_rect->left, cam_rect->top);
+
+	/* On success cam_crop contains current camera crop */
+
+	/*
+	 * 3. If old combined scales applied to new crop produce an impossible
+	 *    user window, adjust scales to produce nearest possible window.
+	 */
+	out_width	= scale_down(rect->width, scale_comb_h);
+	out_height	= scale_down(rect->height, scale_comb_v);
+
+	if (out_width > 2560)
+		out_width = 2560;
+	else if (out_width < 2)
+		out_width = 2;
+
+	if (out_height > 1920)
+		out_height = 1920;
+	else if (out_height < 4)
+		out_height = 4;
+
+	dev_geo(dev, "3: Adjusted output %ux%u\n", out_width, out_height);
+
+	/* 4. Use G_CROP to retrieve actual input window: already in cam_crop */
+
+	/*
+	 * 5. Using actual input window and calculated combined scales calculate
+	 *    camera target output window.
+	 */
+	pix->width		= scale_down(cam_rect->width, scale_comb_h);
+	pix->height		= scale_down(cam_rect->height, scale_comb_v);
+
+	dev_geo(dev, "5: camera target %ux%u\n", pix->width, pix->height);
+
+	/* 6. - 9. */
+	pix->pixelformat	= cam->camera_fmt->fourcc;
+	pix->colorspace		= cam->camera_fmt->colorspace;
+
+	capsr = capture_save_reset(pcdev);
+	dev_dbg(dev, "CAPSR 0x%x, CFLCR 0x%x\n", capsr, pcdev->cflcr);
+
+	/* Make relative to camera rectangle */
+	rect->left		-= cam_rect->left;
+	rect->top		-= cam_rect->top;
+
+	f.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = client_scale(icd, cam_rect, rect, ceu_rect, &f,
+			   pcdev->image_mode && !pcdev->is_interlaced);
+
+	dev_geo(dev, "6-9: %d\n", ret);
+
+	/* 10. Use CEU cropping to crop to the new window. */
+	sh_mobile_ceu_set_rect(icd, out_width, out_height);
+
+	dev_geo(dev, "10: CEU cropped to %ux%u@%u:%u\n",
+		ceu_rect->width, ceu_rect->height,
+		ceu_rect->left, ceu_rect->top);
+
+	/*
+	 * 11. Calculate CEU scales from camera scales from results of (10) and
+	 *     user window from (3)
+	 */
+	scale_ceu_h = calc_scale(ceu_rect->width, &out_width);
+	scale_ceu_v = calc_scale(ceu_rect->height, &out_height);
+
+	dev_geo(dev, "11: CEU scales %u:%u\n", scale_ceu_h, scale_ceu_v);
+
+	/* 12. Apply CEU scales. */
+	cflcr = scale_ceu_h | (scale_ceu_v << 16);
+	if (cflcr != pcdev->cflcr) {
+		pcdev->cflcr = cflcr;
+		ceu_write(pcdev, CFLCR, cflcr);
+	}
+
+	/* Restore capture */
 	if (pcdev->active)
 		capsr |= 1;
 	capture_restore(pcdev, capsr);
 
+	icd->user_width = out_width;
+	icd->user_height = out_height;
+
 	/* Even if only camera cropping succeeded */
 	return ret;
 }
@@ -1018,121 +1312,137 @@ static int sh_mobile_ceu_set_fmt(struct soc_camera_device *icd,
 	struct sh_mobile_ceu_dev *pcdev = ici->priv;
 	struct sh_mobile_ceu_cam *cam = icd->host_priv;
 	struct v4l2_pix_format *pix = &f->fmt.pix;
+	struct v4l2_format cam_f = *f;
+	struct v4l2_pix_format *cam_pix = &cam_f.fmt.pix;
 	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	struct device *dev = icd->dev.parent;
 	__u32 pixfmt = pix->pixelformat;
 	const struct soc_camera_format_xlate *xlate;
-	unsigned int width = pix->width, height = pix->height, tmp_w, tmp_h;
-	u16 vscale, hscale;
-	int ret, is_interlaced;
+	struct v4l2_crop cam_crop;
+	struct v4l2_rect *cam_rect = &cam_crop.c, cam_subrect, ceu_rect;
+	unsigned int scale_cam_h, scale_cam_v;
+	u16 scale_v, scale_h;
+	int ret;
+	bool is_interlaced, image_mode;
 
 	switch (pix->field) {
 	case V4L2_FIELD_INTERLACED:
-		is_interlaced = 1;
+		is_interlaced = true;
 		break;
 	case V4L2_FIELD_ANY:
 	default:
 		pix->field = V4L2_FIELD_NONE;
 		/* fall-through */
 	case V4L2_FIELD_NONE:
-		is_interlaced = 0;
+		is_interlaced = false;
 		break;
 	}
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pixfmt);
 	if (!xlate) {
-		dev_warn(icd->dev.parent, "Format %x not found\n", pixfmt);
+		dev_warn(dev, "Format %x not found\n", pixfmt);
 		return -EINVAL;
 	}
 
-	pix->pixelformat = xlate->cam_fmt->fourcc;
-	ret = v4l2_subdev_call(sd, video, s_fmt, f);
-	pix->pixelformat = pixfmt;
-	dev_dbg(icd->dev.parent,
-		"Camera %d fmt %ux%u, requested %ux%u, max %ux%u\n",
-		ret, pix->width, pix->height, width, height,
-		icd->rect_max.width, icd->rect_max.height);
+	/* 1. Calculate current camera scales. */
+	cam_crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	ret = client_g_rect(sd, cam_rect);
+	if (ret < 0)
+		return ret;
+
+	ret = get_camera_scales(sd, cam_rect, &scale_cam_h, &scale_cam_v);
+	if (ret < 0)
+		return ret;
+
+	dev_geo(dev, "1: camera scales %u:%u\n", scale_cam_h, scale_cam_v);
+
+	/*
+	 * 2. Calculate "effective" input crop (sensor subwindow) - CEU crop
+	 *    scaled back at current camera scales onto input window.
+	 */
+	ret = get_camera_subwin(icd, &cam_subrect, scale_cam_h, scale_cam_v);
 	if (ret < 0)
 		return ret;
 
+	dev_geo(dev, "2: subwin %ux%u@%u:%u\n",
+		cam_subrect.width, cam_subrect.height,
+		cam_subrect.left, cam_subrect.top);
+
+	/*
+	 * 3. Calculate new combined scales from "effective" input window to
+	 *    requested user window.
+	 */
+	scale_h = calc_generic_scale(cam_subrect.width, pix->width);
+	scale_v = calc_generic_scale(cam_subrect.height, pix->height);
+
+	dev_geo(dev, "3: scales %u:%u\n", scale_h, scale_v);
+
+	/*
+	 * 4. Calculate camera output window by applying combined scales to real
+	 *    input window.
+	 */
+	cam_pix->width = scale_down(cam_rect->width, scale_h);
+	cam_pix->height = scale_down(cam_rect->height, scale_v);
+	cam_pix->pixelformat = xlate->cam_fmt->fourcc;
+
 	switch (pixfmt) {
 	case V4L2_PIX_FMT_NV12:
 	case V4L2_PIX_FMT_NV21:
 	case V4L2_PIX_FMT_NV16:
 	case V4L2_PIX_FMT_NV61:
-		pcdev->image_mode = 1;
+		image_mode = true;
 		break;
 	default:
-		pcdev->image_mode = 0;
+		image_mode = false;
 	}
 
-	if ((abs(width - pix->width) < 4 && abs(height - pix->height) < 4) ||
-	    !pcdev->image_mode || is_interlaced) {
-		hscale = 0;
-		vscale = 0;
-		goto out;
-	}
+	dev_geo(dev, "4: camera output %ux%u\n",
+		cam_pix->width, cam_pix->height);
 
-	/* Camera set a format, but geometry is not precise, try to improve */
-	/*
-	 * FIXME: when soc-camera is converted to implement traditional S_FMT
-	 * and S_CROP semantics, replace CEU limits with camera maxima
-	 */
-	tmp_w = pix->width;
-	tmp_h = pix->height;
-	while ((width > tmp_w || height > tmp_h) &&
-	       tmp_w < 2560 && tmp_h < 1920) {
-		tmp_w = min(2 * tmp_w, (__u32)2560);
-		tmp_h = min(2 * tmp_h, (__u32)1920);
-		pix->width = tmp_w;
-		pix->height = tmp_h;
-		pix->pixelformat = xlate->cam_fmt->fourcc;
-		ret = v4l2_subdev_call(sd, video, s_fmt, f);
-		pix->pixelformat = pixfmt;
-		dev_dbg(icd->dev.parent, "Camera scaled to %ux%u\n",
-			pix->width, pix->height);
-		if (ret < 0) {
-			/* This shouldn't happen */
-			dev_err(icd->dev.parent,
-				"Client failed to set format: %d\n", ret);
-			return ret;
-		}
-	}
+	/* 5. - 9. */
+	ret = client_scale(icd, cam_rect, &cam_subrect, &ceu_rect, &cam_f,
+			   image_mode && !is_interlaced);
+
+	dev_geo(dev, "5-9: client scale %d\n", ret);
+
+	/* Done with the camera. Now see if we can improve the result */
+
+	dev_dbg(dev, "Camera %d fmt %ux%u, requested %ux%u\n",
+		ret, cam_pix->width, cam_pix->height, pix->width, pix->height);
+	if (ret < 0)
+		return ret;
+
+	/* 10. Use CEU scaling to scale to the requested user window. */
 
 	/* We cannot scale up */
-	if (width > pix->width)
-		width = pix->width;
+	if (pix->width > cam_pix->width)
+		pix->width = cam_pix->width;
+	if (pix->width > ceu_rect.width)
+		pix->width = ceu_rect.width;
 
-	if (height > pix->height)
-		height = pix->height;
+	if (pix->height > cam_pix->height)
+		pix->height = cam_pix->height;
+	if (pix->height > ceu_rect.height)
+		pix->height = ceu_rect.height;
 
 	/* Let's rock: scale pix->{width x height} down to width x height */
-	hscale = calc_scale(pix->width, &width);
-	vscale = calc_scale(pix->height, &height);
+	scale_h = calc_scale(ceu_rect.width, &pix->width);
+	scale_v = calc_scale(ceu_rect.height, &pix->height);
 
-	dev_dbg(icd->dev.parent, "W: %u : 0x%x = %u, H: %u : 0x%x = %u\n",
-		pix->width, hscale, width, pix->height, vscale, height);
+	dev_geo(dev, "10: W: %u : 0x%x = %u, H: %u : 0x%x = %u\n",
+		ceu_rect.width, scale_h, pix->width,
+		ceu_rect.height, scale_v, pix->height);
 
-out:
-	pcdev->cflcr = hscale | (vscale << 16);
+	pcdev->cflcr = scale_h | (scale_v << 16);
 
 	icd->buswidth = xlate->buswidth;
 	icd->current_fmt = xlate->host_fmt;
 	cam->camera_fmt = xlate->cam_fmt;
-	cam->camera_rect.width = pix->width;
-	cam->camera_rect.height = pix->height;
-
-	icd->rect_max.left = size_dst(cam->camera_max.left, hscale);
-	icd->rect_max.width = size_dst(cam->camera_max.width, hscale);
-	icd->rect_max.top = size_dst(cam->camera_max.top, vscale);
-	icd->rect_max.height = size_dst(cam->camera_max.height, vscale);
-
-	icd->rect_current.left = icd->rect_max.left;
-	icd->rect_current.top = icd->rect_max.top;
+	cam->ceu_rect = ceu_rect;
 
 	pcdev->is_interlaced = is_interlaced;
-
-	pix->width = width;
-	pix->height = height;
+	pcdev->image_mode = image_mode;
 
 	return 0;
 }
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index c6cccdf8daf5..86e0648f65a0 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -278,6 +278,9 @@ static void soc_camera_free_user_formats(struct soc_camera_device *icd)
 	icd->user_formats = NULL;
 }
 
+#define pixfmtstr(x) (x) & 0xff, ((x) >> 8) & 0xff, ((x) >> 16) & 0xff, \
+	((x) >> 24) & 0xff
+
 /* Called with .vb_lock held */
 static int soc_camera_set_fmt(struct soc_camera_file *icf,
 			      struct v4l2_format *f)
@@ -287,6 +290,9 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	int ret;
 
+	dev_dbg(&icd->dev, "S_FMT(%c%c%c%c, %ux%u)\n",
+		pixfmtstr(pix->pixelformat), pix->width, pix->height);
+
 	/* We always call try_fmt() before set_fmt() or set_crop() */
 	ret = ici->ops->try_fmt(icd, f);
 	if (ret < 0)
@@ -302,17 +308,17 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 		return -EINVAL;
 	}
 
-	icd->rect_current.width		= pix->width;
-	icd->rect_current.height	= pix->height;
-	icf->vb_vidq.field		=
-		icd->field		= pix->field;
+	icd->user_width		= pix->width;
+	icd->user_height	= pix->height;
+	icf->vb_vidq.field	=
+		icd->field	= pix->field;
 
 	if (f->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		dev_warn(&icd->dev, "Attention! Wrong buf-type %d\n",
 			 f->type);
 
 	dev_dbg(&icd->dev, "set width: %d height: %d\n",
-		icd->rect_current.width, icd->rect_current.height);
+		icd->user_width, icd->user_height);
 
 	/* set physical bus parameters */
 	return ici->ops->set_bus_param(icd, pix->pixelformat);
@@ -355,8 +361,8 @@ static int soc_camera_open(struct file *file)
 		struct v4l2_format f = {
 			.type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
 			.fmt.pix = {
-				.width		= icd->rect_current.width,
-				.height		= icd->rect_current.height,
+				.width		= icd->user_width,
+				.height		= icd->user_height,
 				.field		= icd->field,
 				.pixelformat	= icd->current_fmt->fourcc,
 				.colorspace	= icd->current_fmt->colorspace,
@@ -557,8 +563,8 @@ static int soc_camera_g_fmt_vid_cap(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	pix->width		= icd->rect_current.width;
-	pix->height		= icd->rect_current.height;
+	pix->width		= icd->user_width;
+	pix->height		= icd->user_height;
 	pix->field		= icf->vb_vidq.field;
 	pix->pixelformat	= icd->current_fmt->fourcc;
 	pix->bytesperline	= pix->width *
@@ -722,17 +728,9 @@ static int soc_camera_cropcap(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
 
-	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->bounds			= icd->rect_max;
-	a->defrect.left			= icd->rect_max.left;
-	a->defrect.top			= icd->rect_max.top;
-	a->defrect.width		= DEFAULT_WIDTH;
-	a->defrect.height		= DEFAULT_HEIGHT;
-	a->pixelaspect.numerator	= 1;
-	a->pixelaspect.denominator	= 1;
-
-	return 0;
+	return ici->ops->cropcap(icd, a);
 }
 
 static int soc_camera_g_crop(struct file *file, void *fh,
@@ -740,11 +738,14 @@ static int soc_camera_g_crop(struct file *file, void *fh,
 {
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
+	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
+	int ret;
 
-	a->type	= V4L2_BUF_TYPE_VIDEO_CAPTURE;
-	a->c	= icd->rect_current;
+	mutex_lock(&icf->vb_vidq.vb_lock);
+	ret = ici->ops->get_crop(icd, a);
+	mutex_unlock(&icf->vb_vidq.vb_lock);
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -759,49 +760,33 @@ static int soc_camera_s_crop(struct file *file, void *fh,
 	struct soc_camera_file *icf = file->private_data;
 	struct soc_camera_device *icd = icf->icd;
 	struct soc_camera_host *ici = to_soc_camera_host(icd->dev.parent);
-	struct v4l2_rect rect = a->c;
+	struct v4l2_rect *rect = &a->c;
+	struct v4l2_crop current_crop;
 	int ret;
 
 	if (a->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
 		return -EINVAL;
 
+	dev_dbg(&icd->dev, "S_CROP(%ux%u@%u:%u)\n",
+		rect->width, rect->height, rect->left, rect->top);
+
 	/* Cropping is allowed during a running capture, guard consistency */
 	mutex_lock(&icf->vb_vidq.vb_lock);
 
+	/* If get_crop fails, we'll let host and / or client drivers decide */
+	ret = ici->ops->get_crop(icd, &current_crop);
+
 	/* Prohibit window size change with initialised buffers */
-	if (icf->vb_vidq.bufs[0] && (rect.width != icd->rect_current.width ||
-				     rect.height != icd->rect_current.height)) {
+	if (icf->vb_vidq.bufs[0] && !ret &&
+	    (a->c.width != current_crop.c.width ||
+	     a->c.height != current_crop.c.height)) {
 		dev_err(&icd->dev,
 			"S_CROP denied: queue initialised and sizes differ\n");
 		ret = -EBUSY;
-		goto unlock;
+	} else {
+		ret = ici->ops->set_crop(icd, a);
 	}
 
-	if (rect.width > icd->rect_max.width)
-		rect.width = icd->rect_max.width;
-
-	if (rect.width < icd->width_min)
-		rect.width = icd->width_min;
-
-	if (rect.height > icd->rect_max.height)
-		rect.height = icd->rect_max.height;
-
-	if (rect.height < icd->height_min)
-		rect.height = icd->height_min;
-
-	if (rect.width + rect.left > icd->rect_max.width + icd->rect_max.left)
-		rect.left = icd->rect_max.width + icd->rect_max.left -
-			rect.width;
-
-	if (rect.height + rect.top > icd->rect_max.height + icd->rect_max.top)
-		rect.top = icd->rect_max.height + icd->rect_max.top -
-			rect.height;
-
-	ret = ici->ops->set_crop(icd, a);
-	if (!ret)
-		icd->rect_current = rect;
-
-unlock:
 	mutex_unlock(&icf->vb_vidq.vb_lock);
 
 	return ret;
@@ -926,6 +911,8 @@ static int soc_camera_probe(struct device *dev)
 	struct soc_camera_host *ici = to_soc_camera_host(dev->parent);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct device *control = NULL;
+	struct v4l2_subdev *sd;
+	struct v4l2_format f = {.type = V4L2_BUF_TYPE_VIDEO_CAPTURE};
 	int ret;
 
 	dev_info(dev, "Probing %s\n", dev_name(dev));
@@ -982,7 +969,6 @@ static int soc_camera_probe(struct device *dev)
 	if (ret < 0)
 		goto eiufmt;
 
-	icd->rect_current = icd->rect_max;
 	icd->field = V4L2_FIELD_ANY;
 
 	/* ..._video_start() will create a device node, so we have to protect */
@@ -992,9 +978,15 @@ static int soc_camera_probe(struct device *dev)
 	if (ret < 0)
 		goto evidstart;
 
+	/* Try to improve our guess of a reasonable window format */
+	sd = soc_camera_to_subdev(icd);
+	if (!v4l2_subdev_call(sd, video, g_fmt, &f)) {
+		icd->user_width		= f.fmt.pix.width;
+		icd->user_height	= f.fmt.pix.height;
+	}
+
 	/* Do we have to sysfs_remove_link() before device_unregister()? */
-	if (to_soc_camera_control(icd) &&
-	    sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
+	if (sysfs_create_link(&icd->dev.kobj, &to_soc_camera_control(icd)->kobj,
 			      "control"))
 		dev_warn(&icd->dev, "Failed creating the control symlink\n");
 
@@ -1103,6 +1095,25 @@ static void dummy_release(struct device *dev)
 {
 }
 
+static int default_cropcap(struct soc_camera_device *icd,
+			   struct v4l2_cropcap *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, cropcap, a);
+}
+
+static int default_g_crop(struct soc_camera_device *icd, struct v4l2_crop *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, g_crop, a);
+}
+
+static int default_s_crop(struct soc_camera_device *icd, struct v4l2_crop *a)
+{
+	struct v4l2_subdev *sd = soc_camera_to_subdev(icd);
+	return v4l2_subdev_call(sd, video, s_crop, a);
+}
+
 int soc_camera_host_register(struct soc_camera_host *ici)
 {
 	struct soc_camera_host *ix;
@@ -1111,7 +1122,6 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	if (!ici || !ici->ops ||
 	    !ici->ops->try_fmt ||
 	    !ici->ops->set_fmt ||
-	    !ici->ops->set_crop ||
 	    !ici->ops->set_bus_param ||
 	    !ici->ops->querycap ||
 	    !ici->ops->init_videobuf ||
@@ -1122,6 +1132,13 @@ int soc_camera_host_register(struct soc_camera_host *ici)
 	    !ici->v4l2_dev.dev)
 		return -EINVAL;
 
+	if (!ici->ops->set_crop)
+		ici->ops->set_crop = default_s_crop;
+	if (!ici->ops->get_crop)
+		ici->ops->get_crop = default_g_crop;
+	if (!ici->ops->cropcap)
+		ici->ops->cropcap = default_cropcap;
+
 	mutex_lock(&list_lock);
 	list_for_each_entry(ix, &hosts, list) {
 		if (ix->nr == ici->nr) {
@@ -1321,6 +1338,9 @@ static int __devinit soc_camera_pdrv_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto escdevreg;
 
+	icd->user_width		= DEFAULT_WIDTH;
+	icd->user_height	= DEFAULT_HEIGHT;
+
 	return 0;
 
 escdevreg:
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index aec2cadbd2ee..3825c358172f 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -127,10 +127,6 @@ static int soc_camera_platform_probe(struct platform_device *pdev)
 	/* Set the control device reference */
 	dev_set_drvdata(&icd->dev, &pdev->dev);
 
-	icd->width_min		= 0;
-	icd->rect_max.width	= p->format.width;
-	icd->height_min		= 0;
-	icd->rect_max.height	= p->format.height;
 	icd->y_skip_top		= 0;
 	icd->ops		= &soc_camera_platform_ops;
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index 94bd5b09f057..fbf4130dfc5d 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -715,8 +715,88 @@ tw9910_set_fmt_error:
 	return ret;
 }
 
+static int tw9910_g_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
+{
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
+
+	if (!priv->scale) {
+		int ret;
+		struct v4l2_crop crop = {
+			.c = {
+				.left	= 0,
+				.top	= 0,
+				.width	= 640,
+				.height	= 480,
+			},
+		};
+		ret = tw9910_s_crop(sd, &crop);
+		if (ret < 0)
+			return ret;
+	}
+
+	a->c.left	= 0;
+	a->c.top	= 0;
+	a->c.width	= priv->scale->width;
+	a->c.height	= priv->scale->height;
+	a->type		= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	return 0;
+}
+
+static int tw9910_cropcap(struct v4l2_subdev *sd, struct v4l2_cropcap *a)
+{
+	a->bounds.left			= 0;
+	a->bounds.top			= 0;
+	a->bounds.width			= 768;
+	a->bounds.height		= 576;
+	a->defrect.left			= 0;
+	a->defrect.top			= 0;
+	a->defrect.width		= 640;
+	a->defrect.height		= 480;
+	a->type				= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	a->pixelaspect.numerator	= 1;
+	a->pixelaspect.denominator	= 1;
+
+	return 0;
+}
+
+static int tw9910_g_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
+{
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
+	struct v4l2_pix_format *pix = &f->fmt.pix;
+
+	if (!priv->scale) {
+		int ret;
+		struct v4l2_crop crop = {
+			.c = {
+				.left	= 0,
+				.top	= 0,
+				.width	= 640,
+				.height	= 480,
+			},
+		};
+		ret = tw9910_s_crop(sd, &crop);
+		if (ret < 0)
+			return ret;
+	}
+
+	f->type			= V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+	pix->width		= priv->scale->width;
+	pix->height		= priv->scale->height;
+	pix->pixelformat	= V4L2_PIX_FMT_VYUY;
+	pix->colorspace		= V4L2_COLORSPACE_SMPTE170M;
+	pix->field		= V4L2_FIELD_INTERLACED;
+
+	return 0;
+}
+
 static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 {
+	struct i2c_client *client = sd->priv;
+	struct tw9910_priv *priv = to_tw9910(client);
 	struct v4l2_pix_format *pix = &f->fmt.pix;
 	/* See tw9910_s_crop() - no proper cropping support */
 	struct v4l2_crop a = {
@@ -741,8 +821,8 @@ static int tw9910_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 
 	ret = tw9910_s_crop(sd, &a);
 	if (!ret) {
-		pix->width = a.c.width;
-		pix->height = a.c.height;
+		pix->width = priv->scale->width;
+		pix->height = priv->scale->height;
 	}
 	return ret;
 }
@@ -838,8 +918,11 @@ static struct v4l2_subdev_core_ops tw9910_subdev_core_ops = {
 
 static struct v4l2_subdev_video_ops tw9910_subdev_video_ops = {
 	.s_stream	= tw9910_s_stream,
+	.g_fmt		= tw9910_g_fmt,
 	.s_fmt		= tw9910_s_fmt,
 	.try_fmt	= tw9910_try_fmt,
+	.cropcap	= tw9910_cropcap,
+	.g_crop		= tw9910_g_crop,
 	.s_crop		= tw9910_s_crop,
 };
 
@@ -852,20 +935,6 @@ static struct v4l2_subdev_ops tw9910_subdev_ops = {
  * i2c_driver function
  */
 
-/* This is called during probe, so, setting rect_max is Ok here: scale == 1 */
-static void limit_to_scale(struct soc_camera_device *icd,
-			   const struct tw9910_scale_ctrl *scale)
-{
-	if (scale->width > icd->rect_max.width)
-		icd->rect_max.width  = scale->width;
-	if (scale->width < icd->width_min)
-		icd->width_min = scale->width;
-	if (scale->height > icd->rect_max.height)
-		icd->rect_max.height = scale->height;
-	if (scale->height < icd->height_min)
-		icd->height_min = scale->height;
-}
-
 static int tw9910_probe(struct i2c_client *client,
 			const struct i2c_device_id *did)
 
@@ -876,8 +945,7 @@ static int tw9910_probe(struct i2c_client *client,
 	struct i2c_adapter             *adapter =
 		to_i2c_adapter(client->dev.parent);
 	struct soc_camera_link         *icl;
-	const struct tw9910_scale_ctrl *scale;
-	int                             i, ret;
+	int                             ret;
 
 	if (!icd) {
 		dev_err(&client->dev, "TW9910: missing soc-camera data!\n");
@@ -908,22 +976,6 @@ static int tw9910_probe(struct i2c_client *client,
 	icd->ops     = &tw9910_ops;
 	icd->iface   = info->link.bus_id;
 
-	/*
-	 * set width and height
-	 */
-	icd->rect_max.width  = tw9910_ntsc_scales[0].width; /* set default */
-	icd->width_min  = tw9910_ntsc_scales[0].width;
-	icd->rect_max.height = tw9910_ntsc_scales[0].height;
-	icd->height_min = tw9910_ntsc_scales[0].height;
-
-	scale = tw9910_ntsc_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_ntsc_scales); i++)
-		limit_to_scale(icd, scale + i);
-
-	scale = tw9910_pal_scales;
-	for (i = 0; i < ARRAY_SIZE(tw9910_pal_scales); i++)
-		limit_to_scale(icd, scale + i);
-
 	ret = tw9910_video_probe(icd, client);
 	if (ret) {
 		icd->ops = NULL;
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 344d89904774..3185e8daaa0a 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -22,8 +22,8 @@ struct soc_camera_device {
 	struct list_head list;
 	struct device dev;
 	struct device *pdev;		/* Platform device */
-	struct v4l2_rect rect_current;	/* Current window */
-	struct v4l2_rect rect_max;	/* Maximum window */
+	s32 user_width;
+	s32 user_height;
 	unsigned short width_min;
 	unsigned short height_min;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
@@ -76,6 +76,8 @@ struct soc_camera_host_ops {
 	int (*get_formats)(struct soc_camera_device *, int,
 			   struct soc_camera_format_xlate *);
 	void (*put_formats)(struct soc_camera_device *);
+	int (*cropcap)(struct soc_camera_device *, struct v4l2_cropcap *);
+	int (*get_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_crop)(struct soc_camera_device *, struct v4l2_crop *);
 	int (*set_fmt)(struct soc_camera_device *, struct v4l2_format *);
 	int (*try_fmt)(struct soc_camera_device *, struct v4l2_format *);
@@ -277,6 +279,21 @@ static inline unsigned long soc_camera_bus_param_compatible(
 		common_flags;
 }
 
+static inline void soc_camera_limit_side(unsigned int *start,
+		unsigned int *length, unsigned int start_min,
+		unsigned int length_min, unsigned int length_max)
+{
+	if (*length < length_min)
+		*length = length_min;
+	else if (*length > length_max)
+		*length = length_max;
+
+	if (*start < start_min)
+		*start = start_min;
+	else if (*start > start_min + length_max - *length)
+		*start = start_min + length_max - *length;
+}
+
 extern unsigned long soc_camera_apply_sensor_flags(struct soc_camera_link *icl,
 						   unsigned long flags);
 
-- 
cgit v1.2.3


From a4c56fd8892e51d675f7665ddee4fd9d7e5c2cc3 Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:53:23 -0300
Subject: V4L/DVB (12535): soc-camera: remove .init() and .release() methods
 from struct soc_camera_ops

Remove unneeded soc-camera operations, this also makes the soc-camera API to
v4l2 subdevices thinner.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c    | 22 +++++++---------------
 drivers/media/video/mt9m111.c    | 40 ++++++++--------------------------------
 drivers/media/video/mt9t031.c    | 23 ++++++-----------------
 drivers/media/video/mt9v022.c    |  8 +++++---
 drivers/media/video/soc_camera.c | 11 -----------
 include/media/soc_camera.h       |  4 ----
 6 files changed, 26 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index e8cf56189ef1..4b394798a293 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -122,9 +122,8 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 	return reg_write(client, reg, ret & ~data);
 }
 
-static int mt9m001_init(struct soc_camera_device *icd)
+static int mt9m001_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	int ret;
 
 	dev_dbg(&client->dev, "%s\n", __func__);
@@ -144,16 +143,6 @@ static int mt9m001_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m001_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	/* Disable the chip */
-	reg_write(client, MT9M001_OUTPUT_CONTROL, 0);
-
-	return 0;
-}
-
 static int mt9m001_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct i2c_client *client = sd->priv;
@@ -446,8 +435,6 @@ static const struct v4l2_queryctrl mt9m001_controls[] = {
 };
 
 static struct soc_camera_ops mt9m001_ops = {
-	.init			= mt9m001_init,
-	.release		= mt9m001_release,
 	.set_bus_param		= mt9m001_set_bus_param,
 	.query_bus_param	= mt9m001_query_bus_param,
 	.controls		= mt9m001_controls,
@@ -581,6 +568,7 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	s32 data;
 	unsigned long flags;
+	int ret;
 
 	/* We must have a parent by now. And it cannot be a wrong one.
 	 * So this entire test is completely redundant. */
@@ -637,7 +625,11 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	dev_info(&client->dev, "Detected a MT9M001 chip ID %x (%s)\n", data,
 		 data == 0x8431 ? "C12STM" : "C12ST");
 
-	return 0;
+	ret = mt9m001_init(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
+	return ret;
 }
 
 static void mt9m001_video_remove(struct soc_camera_device *icd)
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 920dd53c4cfa..186902f9be2e 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -672,13 +672,9 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 };
 
 static int mt9m111_resume(struct soc_camera_device *icd);
-static int mt9m111_init(struct soc_camera_device *icd);
-static int mt9m111_release(struct soc_camera_device *icd);
 
 static struct soc_camera_ops mt9m111_ops = {
-	.init			= mt9m111_init,
 	.resume			= mt9m111_resume,
-	.release		= mt9m111_release,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
 	.controls		= mt9m111_controls,
@@ -880,9 +876,8 @@ static int mt9m111_resume(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m111_init(struct soc_camera_device *icd)
+static int mt9m111_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	int ret;
 
@@ -899,22 +894,6 @@ static int mt9m111_init(struct soc_camera_device *icd)
 	return ret;
 }
 
-static int mt9m111_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-	struct mt9m111 *mt9m111 = to_mt9m111(client);
-	int ret;
-
-	ret = reg_clear(RESET, MT9M111_RESET_CHIP_ENABLE);
-	if (!ret)
-		mt9m111->powered = 0;
-
-	if (ret < 0)
-		dev_err(&client->dev, "mt9m11x release failed: %d\n", ret);
-
-	return ret;
-}
-
 /*
  * Interface active, can use i2c. If it fails, it can indeed mean, that
  * this wasn't our capture interface, so, we wait for the right one
@@ -934,10 +913,13 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 	    to_soc_camera_host(icd->dev.parent)->nr != icd->iface)
 		return -ENODEV;
 
-	ret = mt9m111_enable(client);
-	if (ret)
-		goto ei2c;
-	ret = mt9m111_reset(client);
+	mt9m111->autoexposure = 1;
+	mt9m111->autowhitebalance = 1;
+
+	mt9m111->swap_rgb_even_odd = 1;
+	mt9m111->swap_rgb_red_blue = 1;
+
+	ret = mt9m111_init(client);
 	if (ret)
 		goto ei2c;
 
@@ -962,12 +944,6 @@ static int mt9m111_video_probe(struct soc_camera_device *icd,
 
 	dev_info(&client->dev, "Detected a MT9M11x chip ID %x\n", data);
 
-	mt9m111->autoexposure = 1;
-	mt9m111->autowhitebalance = 1;
-
-	mt9m111->swap_rgb_even_odd = 1;
-	mt9m111->swap_rgb_red_blue = 1;
-
 ei2c:
 	return ret;
 }
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index f234ba602049..9a6489689382 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -163,20 +163,6 @@ static int mt9t031_disable(struct i2c_client *client)
 	return 0;
 }
 
-static int mt9t031_init(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	return mt9t031_idle(client);
-}
-
-static int mt9t031_release(struct soc_camera_device *icd)
-{
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
-
-	return mt9t031_disable(client);
-}
-
 static int mt9t031_s_stream(struct v4l2_subdev *sd, int enable)
 {
 	struct i2c_client *client = sd->priv;
@@ -539,8 +525,6 @@ static const struct v4l2_queryctrl mt9t031_controls[] = {
 };
 
 static struct soc_camera_ops mt9t031_ops = {
-	.init			= mt9t031_init,
-	.release		= mt9t031_release,
 	.set_bus_param		= mt9t031_set_bus_param,
 	.query_bus_param	= mt9t031_query_bus_param,
 	.controls		= mt9t031_controls,
@@ -689,6 +673,7 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	struct mt9t031 *mt9t031 = to_mt9t031(client);
 	s32 data;
+	int ret;
 
 	/* Enable the chip */
 	data = reg_write(client, MT9T031_CHIP_ENABLE, 1);
@@ -711,7 +696,11 @@ static int mt9t031_video_probe(struct i2c_client *client)
 
 	dev_info(&client->dev, "Detected a MT9T031 chip ID %x\n", data);
 
-	return 0;
+	ret = mt9t031_idle(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
+	return ret;
 }
 
 static struct v4l2_subdev_core_ops mt9t031_subdev_core_ops = {
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 35ea0ddd0715..5c47b55823c8 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -138,9 +138,8 @@ static int reg_clear(struct i2c_client *client, const u8 reg,
 	return reg_write(client, reg, ret & ~data);
 }
 
-static int mt9v022_init(struct soc_camera_device *icd)
+static int mt9v022_init(struct i2c_client *client)
 {
-	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
 	struct mt9v022 *mt9v022 = to_mt9v022(client);
 	int ret;
 
@@ -532,7 +531,6 @@ static const struct v4l2_queryctrl mt9v022_controls[] = {
 };
 
 static struct soc_camera_ops mt9v022_ops = {
-	.init			= mt9v022_init,
 	.set_bus_param		= mt9v022_set_bus_param,
 	.query_bus_param	= mt9v022_query_bus_param,
 	.controls		= mt9v022_controls,
@@ -751,6 +749,10 @@ static int mt9v022_video_probe(struct soc_camera_device *icd,
 		 data, mt9v022->model == V4L2_IDENT_MT9V022IX7ATM ?
 		 "monochrome" : "colour");
 
+	ret = mt9v022_init(client);
+	if (ret < 0)
+		dev_err(&client->dev, "Failed to initialise the camera\n");
+
 ei2c:
 	return ret;
 }
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 86e0648f65a0..27921162514c 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -385,12 +385,6 @@ static int soc_camera_open(struct file *file)
 			goto eiciadd;
 		}
 
-		if (icd->ops->init) {
-			ret = icd->ops->init(icd);
-			if (ret < 0)
-				goto einit;
-		}
-
 		/* Try to configure with default parameters */
 		ret = soc_camera_set_fmt(icf, &f);
 		if (ret < 0)
@@ -411,9 +405,6 @@ static int soc_camera_open(struct file *file)
 	 * and use_count == 1
 	 */
 esfmt:
-	if (icd->ops->release)
-		icd->ops->release(icd);
-einit:
 	ici->ops->remove(icd);
 eiciadd:
 	if (icl->power)
@@ -438,8 +429,6 @@ static int soc_camera_close(struct file *file)
 	if (!icd->use_count) {
 		struct soc_camera_link *icl = to_soc_camera_link(icd);
 
-		if (icd->ops->release)
-			icd->ops->release(icd);
 		ici->ops->remove(icd);
 		if (icl->power)
 			icl->power(icd->pdev, 0);
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index 3185e8daaa0a..f95cc4a2d9af 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -191,12 +191,8 @@ struct soc_camera_format_xlate {
 struct soc_camera_ops {
 	int (*suspend)(struct soc_camera_device *, pm_message_t state);
 	int (*resume)(struct soc_camera_device *);
-	int (*init)(struct soc_camera_device *);
-	int (*release)(struct soc_camera_device *);
 	unsigned long (*query_bus_param)(struct soc_camera_device *);
 	int (*set_bus_param)(struct soc_camera_device *, unsigned long);
-	int (*get_chip_id)(struct soc_camera_device *,
-			   struct v4l2_dbg_chip_ident *);
 	int (*enum_input)(struct soc_camera_device *, struct v4l2_input *);
 	const struct v4l2_queryctrl *controls;
 	int num_controls;
-- 
cgit v1.2.3


From 96c75399544838e1752001c8abdde36dd459cf8f Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Tue, 25 Aug 2009 11:53:23 -0300
Subject: V4L/DVB (12536): soc-camera: remove .gain and .exposure struct
 soc_camera_device members

This makes the soc-camera interface for V4L2 subdevices thinner yet. Handle
gain and exposure internally in each driver just like all other controls.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/mt9m001.c             | 43 +++++++++++++++++++----------
 drivers/media/video/mt9m111.c             | 29 +++++++++++++------
 drivers/media/video/mt9t031.c             | 37 ++++++++++++++++---------
 drivers/media/video/mt9v022.c             | 46 +++++++++++++++++++++++--------
 drivers/media/video/mx1_camera.c          |  3 +-
 drivers/media/video/ov772x.c              |  6 ++--
 drivers/media/video/pxa_camera.c          |  3 +-
 drivers/media/video/soc_camera.c          | 33 ++++++++--------------
 drivers/media/video/soc_camera_platform.c |  3 +-
 drivers/media/video/tw9910.c              |  3 +-
 include/media/soc_camera.h                | 17 +++++++-----
 11 files changed, 141 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/media/video/mt9m001.c b/drivers/media/video/mt9m001.c
index 4b394798a293..45388d2ce2fd 100644
--- a/drivers/media/video/mt9m001.c
+++ b/drivers/media/video/mt9m001.c
@@ -80,6 +80,8 @@ struct mt9m001 {
 	struct v4l2_rect rect;	/* Sensor window */
 	__u32 fourcc;
 	int model;	/* V4L2_IDENT_MT9M001* codes from v4l2-chip-ident.h */
+	unsigned int gain;
+	unsigned int exposure;
 	unsigned char autoexposure;
 };
 
@@ -129,8 +131,8 @@ static int mt9m001_init(struct i2c_client *client)
 	dev_dbg(&client->dev, "%s\n", __func__);
 
 	/*
-	 * We don't know, whether platform provides reset,
-	 * issue a soft reset too
+	 * We don't know, whether platform provides reset, issue a soft reset
+	 * too. This returns all registers to their default values.
 	 */
 	ret = reg_write(client, MT9M001_RESET, 1);
 	if (!ret)
@@ -200,6 +202,7 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	struct soc_camera_device *icd = client->dev.platform_data;
 	int ret;
 	const u16 hblank = 9, vblank = 25;
+	unsigned int total_h;
 
 	if (mt9m001->fourcc == V4L2_PIX_FMT_SBGGR8 ||
 	    mt9m001->fourcc == V4L2_PIX_FMT_SBGGR16)
@@ -219,6 +222,8 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 	soc_camera_limit_side(&rect.top, &rect.height,
 		     MT9M001_ROW_SKIP, MT9M001_MIN_HEIGHT, MT9M001_MAX_HEIGHT);
 
+	total_h = rect.height + icd->y_skip_top + vblank;
+
 	/* Blanking and start values - default... */
 	ret = reg_write(client, MT9M001_HORIZONTAL_BLANKING, hblank);
 	if (!ret)
@@ -236,15 +241,13 @@ static int mt9m001_s_crop(struct v4l2_subdev *sd, struct v4l2_crop *a)
 		ret = reg_write(client, MT9M001_WINDOW_HEIGHT,
 				rect.height + icd->y_skip_top - 1);
 	if (!ret && mt9m001->autoexposure) {
-		ret = reg_write(client, MT9M001_SHUTTER_WIDTH,
-				rect.height + icd->y_skip_top + vblank);
+		ret = reg_write(client, MT9M001_SHUTTER_WIDTH, total_h);
 		if (!ret) {
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (rect.height + icd->y_skip_top +
-						vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9m001->exposure = (524 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 		}
 	}
@@ -457,6 +460,12 @@ static int mt9m001_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		ctrl->value = mt9m001->autoexposure;
 		break;
+	case V4L2_CID_GAIN:
+		ctrl->value = mt9m001->gain;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ctrl->value = mt9m001->exposure;
+		break;
 	}
 	return 0;
 }
@@ -518,7 +527,7 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		}
 
 		/* Success */
-		icd->gain = ctrl->value;
+		mt9m001->gain = ctrl->value;
 		break;
 	case V4L2_CID_EXPOSURE:
 		/* mt9m001 has maximum == default */
@@ -535,21 +544,21 @@ static int mt9m001_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 				shutter);
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH, shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
+			mt9m001->exposure = ctrl->value;
 			mt9m001->autoexposure = 0;
 		}
 		break;
 	case V4L2_CID_EXPOSURE_AUTO:
 		if (ctrl->value) {
 			const u16 vblank = 25;
+			unsigned int total_h = mt9m001->rect.height +
+				icd->y_skip_top + vblank;
 			if (reg_write(client, MT9M001_SHUTTER_WIDTH,
-				      mt9m001->rect.height +
-				      icd->y_skip_top + vblank) < 0)
+				      total_h) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (524 + (mt9m001->rect.height +
-						icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9m001->exposure = (524 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				1048 + qctrl->minimum;
 			mt9m001->autoexposure = 1;
 		} else
@@ -629,6 +638,10 @@ static int mt9m001_video_probe(struct soc_camera_device *icd,
 	if (ret < 0)
 		dev_err(&client->dev, "Failed to initialise the camera\n");
 
+	/* mt9m001_init() has reset the chip, returning registers to defaults */
+	mt9m001->gain = 64;
+	mt9m001->exposure = 255;
+
 	return ret;
 }
 
@@ -701,7 +714,7 @@ static int mt9m001_probe(struct i2c_client *client,
 
 	/* Second stage probe - when a capture adapter is there */
 	icd->ops		= &mt9m001_ops;
-	icd->y_skip_top		= 1;
+	icd->y_skip_top		= 0;
 
 	mt9m001->rect.left	= MT9M001_COLUMN_SKIP;
 	mt9m001->rect.top	= MT9M001_ROW_SKIP;
diff --git a/drivers/media/video/mt9m111.c b/drivers/media/video/mt9m111.c
index 186902f9be2e..90da699601ea 100644
--- a/drivers/media/video/mt9m111.c
+++ b/drivers/media/video/mt9m111.c
@@ -153,6 +153,7 @@ struct mt9m111 {
 	enum mt9m111_context context;
 	struct v4l2_rect rect;
 	u32 pixfmt;
+	unsigned int gain;
 	unsigned char autoexposure;
 	unsigned char datawidth;
 	unsigned int powered:1;
@@ -513,7 +514,8 @@ static int mt9m111_set_pixfmt(struct i2c_client *client, u32 pixfmt)
 		ret = mt9m111_setfmt_yuv(client);
 		break;
 	default:
-		dev_err(&client->dev, "Pixel format not handled : %x\n", pixfmt);
+		dev_err(&client->dev, "Pixel format not handled : %x\n",
+			pixfmt);
 		ret = -EINVAL;
 	}
 
@@ -536,9 +538,9 @@ static int mt9m111_s_fmt(struct v4l2_subdev *sd, struct v4l2_format *f)
 	};
 	int ret;
 
-	dev_dbg(&client->dev, "%s fmt=%x left=%d, top=%d, width=%d, height=%d\n",
-		__func__, pix->pixelformat, rect.left, rect.top, rect.width,
-		rect.height);
+	dev_dbg(&client->dev,
+		"%s fmt=%x left=%d, top=%d, width=%d, height=%d\n", __func__,
+		pix->pixelformat, rect.left, rect.top, rect.width, rect.height);
 
 	ret = mt9m111_make_rect(client, &rect);
 	if (!ret)
@@ -672,8 +674,10 @@ static const struct v4l2_queryctrl mt9m111_controls[] = {
 };
 
 static int mt9m111_resume(struct soc_camera_device *icd);
+static int mt9m111_suspend(struct soc_camera_device *icd, pm_message_t state);
 
 static struct soc_camera_ops mt9m111_ops = {
+	.suspend		= mt9m111_suspend,
 	.resume			= mt9m111_resume,
 	.query_bus_param	= mt9m111_query_bus_param,
 	.set_bus_param		= mt9m111_set_bus_param,
@@ -714,13 +718,13 @@ static int mt9m111_get_global_gain(struct i2c_client *client)
 
 static int mt9m111_set_global_gain(struct i2c_client *client, int gain)
 {
-	struct soc_camera_device *icd = client->dev.platform_data;
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
 	u16 val;
 
 	if (gain > 63 * 2 * 2)
 		return -EINVAL;
 
-	icd->gain = gain;
+	mt9m111->gain = gain;
 	if ((gain >= 64 * 2) && (gain < 63 * 2 * 2))
 		val = (1 << 10) | (1 << 9) | (gain / 4);
 	else if ((gain >= 64) && (gain < 64 * 2))
@@ -844,17 +848,26 @@ static int mt9m111_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	return ret;
 }
 
+static int mt9m111_suspend(struct soc_camera_device *icd, pm_message_t state)
+{
+	struct i2c_client *client = to_i2c_client(to_soc_camera_control(icd));
+	struct mt9m111 *mt9m111 = to_mt9m111(client);
+
+	mt9m111->gain = mt9m111_get_global_gain(client);
+
+	return 0;
+}
+
 static int mt9m111_restore_state(struct i2c_client *client)
 {
 	struct mt9m111 *mt9m111 = to_mt9m111(client);
-	struct soc_camera_device *icd = client->dev.platform_data;
 
 	mt9m111_set_context(client, mt9m111->context);
 	mt9m111_set_pixfmt(client, mt9m111->pixfmt);
 	mt9m111_setup_rect(client, &mt9m111->rect);
 	mt9m111_set_flip(client, mt9m111->hflip, MT9M111_RMB_MIRROR_COLS);
 	mt9m111_set_flip(client, mt9m111->vflip, MT9M111_RMB_MIRROR_ROWS);
-	mt9m111_set_global_gain(client, icd->gain);
+	mt9m111_set_global_gain(client, mt9m111->gain);
 	mt9m111_set_autoexposure(client, mt9m111->autoexposure);
 	mt9m111_set_autowhitebalance(client, mt9m111->autowhitebalance);
 	return 0;
diff --git a/drivers/media/video/mt9t031.c b/drivers/media/video/mt9t031.c
index 9a6489689382..6966f644977e 100644
--- a/drivers/media/video/mt9t031.c
+++ b/drivers/media/video/mt9t031.c
@@ -73,6 +73,8 @@ struct mt9t031 {
 	int model;	/* V4L2_IDENT_MT9T031* codes from v4l2-chip-ident.h */
 	u16 xskip;
 	u16 yskip;
+	unsigned int gain;
+	unsigned int exposure;
 	unsigned char autoexposure;
 };
 
@@ -301,16 +303,15 @@ static int mt9t031_set_params(struct soc_camera_device *icd,
 		ret = reg_write(client, MT9T031_WINDOW_HEIGHT,
 				rect->height + icd->y_skip_top - 1);
 	if (ret >= 0 && mt9t031->autoexposure) {
-		ret = set_shutter(client,
-				  rect->height + icd->y_skip_top + vblank);
+		unsigned int total_h = rect->height + icd->y_skip_top + vblank;
+		ret = set_shutter(client, total_h);
 		if (ret >= 0) {
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
 			const struct v4l2_queryctrl *qctrl =
 				soc_camera_find_qctrl(icd->ops,
 						      V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 + (rect->height +
-					 icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9t031->exposure = (shutter_max / 2 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 		}
 	}
@@ -553,6 +554,12 @@ static int mt9t031_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 	case V4L2_CID_EXPOSURE_AUTO:
 		ctrl->value = mt9t031->autoexposure;
 		break;
+	case V4L2_CID_GAIN:
+		ctrl->value = mt9t031->gain;
+		break;
+	case V4L2_CID_EXPOSURE:
+		ctrl->value = mt9t031->exposure;
+		break;
 	}
 	return 0;
 }
@@ -624,7 +631,7 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		}
 
 		/* Success */
-		icd->gain = ctrl->value;
+		mt9t031->gain = ctrl->value;
 		break;
 	case V4L2_CID_EXPOSURE:
 		/* mt9t031 has maximum == default */
@@ -641,7 +648,7 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 				old, shutter);
 			if (set_shutter(client, shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
+			mt9t031->exposure = ctrl->value;
 			mt9t031->autoexposure = 0;
 		}
 		break;
@@ -649,14 +656,14 @@ static int mt9t031_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (ctrl->value) {
 			const u16 vblank = MT9T031_VERTICAL_BLANK;
 			const u32 shutter_max = MT9T031_MAX_HEIGHT + vblank;
-			if (set_shutter(client, mt9t031->rect.height +
-					icd->y_skip_top + vblank) < 0)
+			unsigned int total_h = mt9t031->rect.height +
+				icd->y_skip_top + vblank;
+
+			if (set_shutter(client, total_h) < 0)
 				return -EIO;
 			qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-			icd->exposure = (shutter_max / 2 +
-					 (mt9t031->rect.height +
-					  icd->y_skip_top + vblank - 1) *
-					 (qctrl->maximum - qctrl->minimum)) /
+			mt9t031->exposure = (shutter_max / 2 + (total_h - 1) *
+				 (qctrl->maximum - qctrl->minimum)) /
 				shutter_max + qctrl->minimum;
 			mt9t031->autoexposure = 1;
 		} else
@@ -700,6 +707,10 @@ static int mt9t031_video_probe(struct i2c_client *client)
 	if (ret < 0)
 		dev_err(&client->dev, "Failed to initialise the camera\n");
 
+	/* mt9t031_idle() has reset the chip to default. */
+	mt9t031->exposure = 255;
+	mt9t031->gain = 64;
+
 	return ret;
 }
 
diff --git a/drivers/media/video/mt9v022.c b/drivers/media/video/mt9v022.c
index 5c47b55823c8..995607f9d3ba 100644
--- a/drivers/media/video/mt9v022.c
+++ b/drivers/media/video/mt9v022.c
@@ -45,7 +45,7 @@ MODULE_PARM_DESC(sensor_type, "Sensor type: \"colour\" or \"monochrome\"");
 #define MT9V022_PIXEL_OPERATION_MODE	0x0f
 #define MT9V022_LED_OUT_CONTROL		0x1b
 #define MT9V022_ADC_MODE_CONTROL	0x1c
-#define MT9V022_ANALOG_GAIN		0x34
+#define MT9V022_ANALOG_GAIN		0x35
 #define MT9V022_BLACK_LEVEL_CALIB_CTRL	0x47
 #define MT9V022_PIXCLK_FV_LV		0x74
 #define MT9V022_DIGITAL_TEST_PATTERN	0x7f
@@ -155,6 +155,10 @@ static int mt9v022_init(struct i2c_client *client)
 	if (!ret)
 		/* AEC, AGC on */
 		ret = reg_set(client, MT9V022_AEC_AGC_ENABLE, 0x3);
+	if (!ret)
+		ret = reg_write(client, MT9V022_ANALOG_GAIN, 16);
+	if (!ret)
+		ret = reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH, 480);
 	if (!ret)
 		ret = reg_write(client, MT9V022_MAX_TOTAL_SHUTTER_WIDTH, 480);
 	if (!ret)
@@ -540,8 +544,12 @@ static struct soc_camera_ops mt9v022_ops = {
 static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	struct i2c_client *client = sd->priv;
+	const struct v4l2_queryctrl *qctrl;
+	unsigned long range;
 	int data;
 
+	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
+
 	switch (ctrl->id) {
 	case V4L2_CID_VFLIP:
 		data = reg_read(client, MT9V022_READ_MODE);
@@ -566,6 +574,24 @@ static int mt9v022_g_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 		if (data < 0)
 			return -EIO;
 		ctrl->value = !!(data & 0x2);
+		break;
+	case V4L2_CID_GAIN:
+		data = reg_read(client, MT9V022_ANALOG_GAIN);
+		if (data < 0)
+			return -EIO;
+
+		range = qctrl->maximum - qctrl->minimum;
+		ctrl->value = ((data - 16) * range + 24) / 48 + qctrl->minimum;
+
+		break;
+	case V4L2_CID_EXPOSURE:
+		data = reg_read(client, MT9V022_TOTAL_SHUTTER_WIDTH);
+		if (data < 0)
+			return -EIO;
+
+		range = qctrl->maximum - qctrl->minimum;
+		ctrl->value = ((data - 1) * range + 239) / 479 + qctrl->minimum;
+
 		break;
 	}
 	return 0;
@@ -575,7 +601,6 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 {
 	int data;
 	struct i2c_client *client = sd->priv;
-	struct soc_camera_device *icd = client->dev.platform_data;
 	const struct v4l2_queryctrl *qctrl;
 
 	qctrl = soc_camera_find_qctrl(&mt9v022_ops, ctrl->id);
@@ -605,12 +630,9 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			return -EINVAL;
 		else {
 			unsigned long range = qctrl->maximum - qctrl->minimum;
-			/* Datasheet says 16 to 64. autogain only works properly
-			 * after setting gain to maximum 14. Larger values
-			 * produce "white fly" noise effect. On the whole,
-			 * manually setting analog gain does no good. */
+			/* Valid values 16 to 64, 32 to 64 must be even. */
 			unsigned long gain = ((ctrl->value - qctrl->minimum) *
-					      10 + range / 2) / range + 4;
+					      48 + range / 2) / range + 16;
 			if (gain >= 32)
 				gain &= ~1;
 			/* The user wants to set gain manually, hope, she
@@ -619,11 +641,10 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			if (reg_clear(client, MT9V022_AEC_AGC_ENABLE, 0x2) < 0)
 				return -EIO;
 
-			dev_info(&client->dev, "Setting gain from %d to %lu\n",
-				 reg_read(client, MT9V022_ANALOG_GAIN), gain);
+			dev_dbg(&client->dev, "Setting gain from %d to %lu\n",
+				reg_read(client, MT9V022_ANALOG_GAIN), gain);
 			if (reg_write(client, MT9V022_ANALOG_GAIN, gain) < 0)
 				return -EIO;
-			icd->gain = ctrl->value;
 		}
 		break;
 	case V4L2_CID_EXPOSURE:
@@ -646,7 +667,6 @@ static int mt9v022_s_ctrl(struct v4l2_subdev *sd, struct v4l2_control *ctrl)
 			if (reg_write(client, MT9V022_TOTAL_SHUTTER_WIDTH,
 				      shutter) < 0)
 				return -EIO;
-			icd->exposure = ctrl->value;
 		}
 		break;
 	case V4L2_CID_AUTOGAIN:
@@ -827,6 +847,10 @@ static int mt9v022_probe(struct i2c_client *client,
 	mt9v022->chip_control = MT9V022_CHIP_CONTROL_DEFAULT;
 
 	icd->ops		= &mt9v022_ops;
+	/*
+	 * MT9V022 _really_ corrupts the first read out line.
+	 * TODO: verify on i.MX31
+	 */
 	icd->y_skip_top		= 1;
 
 	mt9v022->rect.left	= MT9V022_COLUMN_SKIP;
diff --git a/drivers/media/video/mx1_camera.c b/drivers/media/video/mx1_camera.c
index 3875483ab9d5..5f37952c75cf 100644
--- a/drivers/media/video/mx1_camera.c
+++ b/drivers/media/video/mx1_camera.c
@@ -548,7 +548,8 @@ static int mx1_camera_set_fmt(struct soc_camera_device *icd,
 
 	xlate = soc_camera_xlate_by_fourcc(icd, pix->pixelformat);
 	if (!xlate) {
-		dev_warn(icd->dev.parent, "Format %x not found\n", pix->pixelformat);
+		dev_warn(icd->dev.parent, "Format %x not found\n",
+			 pix->pixelformat);
 		return -EINVAL;
 	}
 
diff --git a/drivers/media/video/ov772x.c b/drivers/media/video/ov772x.c
index 776a91dcfbe6..eccb40ab7fec 100644
--- a/drivers/media/video/ov772x.c
+++ b/drivers/media/video/ov772x.c
@@ -404,7 +404,8 @@ struct ov772x_priv {
 	int                               model;
 	unsigned short                    flag_vflip:1;
 	unsigned short                    flag_hflip:1;
-	unsigned short                    band_filter;	/* 256 - BDBASE, 0 if (!COM8[5]) */
+	/* band_filter = COM8[5] ? 256 - BDBASE : 0 */
+	unsigned short                    band_filter;
 };
 
 #define ENDMARKER { 0xff, 0xff }
@@ -587,7 +588,8 @@ static const struct v4l2_queryctrl ov772x_controls[] = {
 
 static struct ov772x_priv *to_ov772x(const struct i2c_client *client)
 {
-	return container_of(i2c_get_clientdata(client), struct ov772x_priv, subdev);
+	return container_of(i2c_get_clientdata(client), struct ov772x_priv,
+			    subdev);
 }
 
 static int ov772x_write_array(struct i2c_client        *client,
diff --git a/drivers/media/video/pxa_camera.c b/drivers/media/video/pxa_camera.c
index a19bb76e175d..6952e9602d5d 100644
--- a/drivers/media/video/pxa_camera.c
+++ b/drivers/media/video/pxa_camera.c
@@ -274,7 +274,8 @@ static void free_buffer(struct videobuf_queue *vq, struct pxa_buffer *buf)
 
 	for (i = 0; i < ARRAY_SIZE(buf->dmas); i++) {
 		if (buf->dmas[i].sg_cpu)
-			dma_free_coherent(ici->v4l2_dev.dev, buf->dmas[i].sg_size,
+			dma_free_coherent(ici->v4l2_dev.dev,
+					  buf->dmas[i].sg_size,
 					  buf->dmas[i].sg_cpu,
 					  buf->dmas[i].sg_dma);
 		buf->dmas[i].sg_cpu = NULL;
diff --git a/drivers/media/video/soc_camera.c b/drivers/media/video/soc_camera.c
index 27921162514c..e8248ba0c03c 100644
--- a/drivers/media/video/soc_camera.c
+++ b/drivers/media/video/soc_camera.c
@@ -327,7 +327,9 @@ static int soc_camera_set_fmt(struct soc_camera_file *icf,
 static int soc_camera_open(struct file *file)
 {
 	struct video_device *vdev = video_devdata(file);
-	struct soc_camera_device *icd = container_of(vdev->parent, struct soc_camera_device, dev);
+	struct soc_camera_device *icd = container_of(vdev->parent,
+						     struct soc_camera_device,
+						     dev);
 	struct soc_camera_link *icl = to_soc_camera_link(icd);
 	struct soc_camera_host *ici;
 	struct soc_camera_file *icf;
@@ -349,7 +351,10 @@ static int soc_camera_open(struct file *file)
 		goto emgi;
 	}
 
-	/* Protect against icd->ops->remove() until we module_get() both drivers. */
+	/*
+	 * Protect against icd->ops->remove() until we module_get() both
+	 * drivers.
+	 */
 	mutex_lock(&icd->video_lock);
 
 	icf->icd = icd;
@@ -670,19 +675,6 @@ static int soc_camera_g_ctrl(struct file *file, void *priv,
 
 	WARN_ON(priv != file->private_data);
 
-	switch (ctrl->id) {
-	case V4L2_CID_GAIN:
-		if (icd->gain == (unsigned short)~0)
-			return -EINVAL;
-		ctrl->value = icd->gain;
-		return 0;
-	case V4L2_CID_EXPOSURE:
-		if (icd->exposure == (unsigned short)~0)
-			return -EINVAL;
-		ctrl->value = icd->exposure;
-		return 0;
-	}
-
 	if (ici->ops->get_ctrl) {
 		ret = ici->ops->get_ctrl(icd, ctrl);
 		if (ret != -ENOIOCTLCMD)
@@ -944,7 +936,10 @@ static int soc_camera_probe(struct device *dev)
 		if (ret < 0)
 			goto eadddev;
 
-		/* FIXME: this is racy, have to use driver-binding notification */
+		/*
+		 * FIXME: this is racy, have to use driver-binding notification,
+		 * when it is available
+		 */
 		control = to_soc_camera_control(icd);
 		if (!control || !control->driver || !dev_get_drvdata(control) ||
 		    !try_module_get(control->driver->owner)) {
@@ -1279,7 +1274,6 @@ static int video_dev_create(struct soc_camera_device *icd)
  */
 static int soc_camera_video_start(struct soc_camera_device *icd)
 {
-	const struct v4l2_queryctrl *qctrl;
 	int ret;
 
 	if (!icd->dev.parent)
@@ -1297,11 +1291,6 @@ static int soc_camera_video_start(struct soc_camera_device *icd)
 		return ret;
 	}
 
-	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_GAIN);
-	icd->gain = qctrl ? qctrl->default_value : (unsigned short)~0;
-	qctrl = soc_camera_find_qctrl(icd->ops, V4L2_CID_EXPOSURE);
-	icd->exposure = qctrl ? qctrl->default_value : (unsigned short)~0;
-
 	return 0;
 }
 
diff --git a/drivers/media/video/soc_camera_platform.c b/drivers/media/video/soc_camera_platform.c
index 3825c358172f..1b6dd02a801f 100644
--- a/drivers/media/video/soc_camera_platform.c
+++ b/drivers/media/video/soc_camera_platform.c
@@ -33,7 +33,8 @@ static struct soc_camera_platform_priv *get_priv(struct platform_device *pdev)
 
 static struct soc_camera_platform_info *get_info(struct soc_camera_device *icd)
 {
-	struct platform_device *pdev = to_platform_device(to_soc_camera_control(icd));
+	struct platform_device *pdev =
+		to_platform_device(to_soc_camera_control(icd));
 	return pdev->dev.platform_data;
 }
 
diff --git a/drivers/media/video/tw9910.c b/drivers/media/video/tw9910.c
index fbf4130dfc5d..269ab044072a 100644
--- a/drivers/media/video/tw9910.c
+++ b/drivers/media/video/tw9910.c
@@ -357,7 +357,8 @@ static const struct tw9910_hsync_ctrl tw9910_hsync_ctrl = {
  */
 static struct tw9910_priv *to_tw9910(const struct i2c_client *client)
 {
-	return container_of(i2c_get_clientdata(client), struct tw9910_priv, subdev);
+	return container_of(i2c_get_clientdata(client), struct tw9910_priv,
+			    subdev);
 }
 
 static int tw9910_set_scale(struct i2c_client *client,
diff --git a/include/media/soc_camera.h b/include/media/soc_camera.h
index f95cc4a2d9af..3d74e60032dd 100644
--- a/include/media/soc_camera.h
+++ b/include/media/soc_camera.h
@@ -27,8 +27,6 @@ struct soc_camera_device {
 	unsigned short width_min;
 	unsigned short height_min;
 	unsigned short y_skip_top;	/* Lines to skip at the top */
-	unsigned short gain;
-	unsigned short exposure;
 	unsigned char iface;		/* Host number */
 	unsigned char devnum;		/* Device number per host */
 	unsigned char buswidth;		/* See comment in .c */
@@ -128,29 +126,34 @@ struct soc_camera_link {
 	void (*free_bus)(struct soc_camera_link *);
 };
 
-static inline struct soc_camera_device *to_soc_camera_dev(const struct device *dev)
+static inline struct soc_camera_device *to_soc_camera_dev(
+	const struct device *dev)
 {
 	return container_of(dev, struct soc_camera_device, dev);
 }
 
-static inline struct soc_camera_host *to_soc_camera_host(const struct device *dev)
+static inline struct soc_camera_host *to_soc_camera_host(
+	const struct device *dev)
 {
 	struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
 
 	return container_of(v4l2_dev, struct soc_camera_host, v4l2_dev);
 }
 
-static inline struct soc_camera_link *to_soc_camera_link(const struct soc_camera_device *icd)
+static inline struct soc_camera_link *to_soc_camera_link(
+	const struct soc_camera_device *icd)
 {
 	return icd->dev.platform_data;
 }
 
-static inline struct device *to_soc_camera_control(const struct soc_camera_device *icd)
+static inline struct device *to_soc_camera_control(
+	const struct soc_camera_device *icd)
 {
 	return dev_get_drvdata(&icd->dev);
 }
 
-static inline struct v4l2_subdev *soc_camera_to_subdev(const struct soc_camera_device *icd)
+static inline struct v4l2_subdev *soc_camera_to_subdev(
+	const struct soc_camera_device *icd)
 {
 	struct device *control = to_soc_camera_control(icd);
 	return dev_get_drvdata(control);
-- 
cgit v1.2.3


From 53dacb15705901e14b03dcba27e40364fedd9d09 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 10 Aug 2009 02:49:08 -0300
Subject: V4L/DVB (12540): v4l: simplify v4l2_i2c_new_subdev and friends

Rewrite v4l2_i2c_new_subdev as a simplified version of v4l2_i2c_new_subdev_cfg
and remove v4l2_i2c_new_probed_subdev and v4l2_i2c_new_probed_subdev_addr.

This simplifies this API substantially.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt  |  16 ++--
 drivers/media/video/au0828/au0828-cards.c     |   4 +-
 drivers/media/video/bt8xx/bttv-cards.c        |  44 ++++-----
 drivers/media/video/cafe_ccic.c               |   2 +-
 drivers/media/video/cx18/cx18-i2c.c           |  14 +--
 drivers/media/video/cx231xx/cx231xx-cards.c   |   4 +-
 drivers/media/video/cx23885/cx23885-cards.c   |   2 +-
 drivers/media/video/cx23885/cx23885-video.c   |   6 +-
 drivers/media/video/cx88/cx88-cards.c         |  14 +--
 drivers/media/video/cx88/cx88-video.c         |   6 +-
 drivers/media/video/davinci/vpif_display.c    |   4 +-
 drivers/media/video/em28xx/em28xx-cards.c     |  30 +++---
 drivers/media/video/ivtv/ivtv-i2c.c           |  18 ++--
 drivers/media/video/mxb.c                     |  14 +--
 drivers/media/video/pvrusb2/pvrusb2-hdw.c     |  10 +-
 drivers/media/video/saa7134/saa7134-cards.c   |  12 +--
 drivers/media/video/saa7134/saa7134-core.c    |   6 +-
 drivers/media/video/usbvision/usbvision-i2c.c |  12 +--
 drivers/media/video/v4l2-common.c             | 133 --------------------------
 drivers/media/video/vino.c                    |   8 +-
 drivers/media/video/w9968cf.c                 |   4 +-
 drivers/media/video/zoran/zoran_card.c        |   8 +-
 include/media/v4l2-common.h                   |  24 ++---
 23 files changed, 126 insertions(+), 269 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index ba4706afc5fb..e395a9cdc533 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -370,19 +370,20 @@ from the remove() callback ensures that this is always done correctly.
 The bridge driver also has some helper functions it can use:
 
 struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
-	       "module_foo", "chipid", 0x36);
+	       "module_foo", "chipid", 0x36, NULL);
 
 This loads the given module (can be NULL if no module needs to be loaded) and
 calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
 If all goes well, then it registers the subdev with the v4l2_device.
 
-You can also use v4l2_i2c_new_probed_subdev() which is very similar to
-v4l2_i2c_new_subdev(), except that it has an array of possible I2C addresses
-that it should probe. Internally it calls i2c_new_probed_device().
+You can also use the last argument of v4l2_i2c_new_subdev() to pass an array
+of possible I2C addresses that it should probe. These probe addresses are
+only used if the previous argument is 0. A non-zero argument means that you
+know the exact i2c address so in that case no probing will take place.
 
 Both functions return NULL if something went wrong.
 
-Note that the chipid you pass to v4l2_i2c_new_(probed_)subdev() is usually
+Note that the chipid you pass to v4l2_i2c_new_subdev() is usually
 the same as the module name. It allows you to specify a chip variant, e.g.
 "saa7114" or "saa7115". In general though the i2c driver autodetects this.
 The use of chipid is something that needs to be looked at more closely at a
@@ -410,11 +411,6 @@ the irq and platform_data arguments after the subdev was setup. The older
 v4l2_i2c_new_(probed_)subdev functions will call s_config as well, but with
 irq set to 0 and platform_data set to NULL.
 
-Note that in the next kernel release the functions v4l2_i2c_new_subdev,
-v4l2_i2c_new_probed_subdev and v4l2_i2c_new_probed_subdev_addr will all be
-replaced by a single v4l2_i2c_new_subdev that is identical to
-v4l2_i2c_new_subdev_cfg but without the irq and platform_data arguments.
-
 struct video_device
 -------------------
 
diff --git a/drivers/media/video/au0828/au0828-cards.c b/drivers/media/video/au0828/au0828-cards.c
index 830c4a933f63..57dd9195daf5 100644
--- a/drivers/media/video/au0828/au0828-cards.c
+++ b/drivers/media/video/au0828/au0828-cards.c
@@ -212,7 +212,7 @@ void au0828_card_setup(struct au0828_dev *dev)
 		   be abstracted out if we ever need to support a different
 		   demod) */
 		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"au8522", "au8522", 0x8e >> 1);
+				"au8522", "au8522", 0x8e >> 1, NULL);
 		if (sd == NULL)
 			printk(KERN_ERR "analog subdev registration failed\n");
 	}
@@ -221,7 +221,7 @@ void au0828_card_setup(struct au0828_dev *dev)
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		/* Load the tuner module, which does the attach */
 		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->board.tuner_addr);
+				"tuner", "tuner", dev->board.tuner_addr, NULL);
 		if (sd == NULL)
 			printk(KERN_ERR "tuner subdev registration fail\n");
 
diff --git a/drivers/media/video/bt8xx/bttv-cards.c b/drivers/media/video/bt8xx/bttv-cards.c
index b42251fa96ba..12279f6d9bc4 100644
--- a/drivers/media/video/bt8xx/bttv-cards.c
+++ b/drivers/media/video/bt8xx/bttv-cards.c
@@ -3524,8 +3524,8 @@ void __devinit bttv_init_card2(struct bttv *btv)
 		};
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "saa6588", "saa6588", addrs);
+		sd = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "saa6588", "saa6588", 0, addrs);
 		btv->has_saa6588 = (sd != NULL);
 	}
 
@@ -3549,8 +3549,8 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "msp3400", "msp3400", addrs);
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "msp3400", "msp3400", 0, addrs);
 		if (btv->sd_msp34xx)
 			return;
 		goto no_audio;
@@ -3563,16 +3563,16 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		if (v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-				&btv->c.i2c_adap, "tda7432", "tda7432", addrs))
+		if (v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+				&btv->c.i2c_adap, "tda7432", "tda7432", 0, addrs))
 			return;
 		goto no_audio;
 	}
 
 	case 3: {
 		/* The user specified that we should probe for tvaudio */
-		btv->sd_tvaudio = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-			&btv->c.i2c_adap, "tvaudio", "tvaudio", tvaudio_addrs());
+		btv->sd_tvaudio = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+			&btv->c.i2c_adap, "tvaudio", "tvaudio", 0, tvaudio_addrs());
 		if (btv->sd_tvaudio)
 			return;
 		goto no_audio;
@@ -3591,13 +3591,13 @@ void __devinit bttv_init_card2(struct bttv *btv)
 	   it really is a msp3400, so it will return NULL when the device
 	   found is really something else (e.g. a tea6300). */
 	if (!bttv_tvcards[btv->c.type].no_msp34xx) {
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev_addr(&btv->c.v4l2_dev,
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 			&btv->c.i2c_adap, "msp3400", "msp3400",
-			I2C_ADDR_MSP3400 >> 1);
+			0, I2C_ADDRS(I2C_ADDR_MSP3400 >> 1));
 	} else if (bttv_tvcards[btv->c.type].msp34xx_alt) {
-		btv->sd_msp34xx = v4l2_i2c_new_probed_subdev_addr(&btv->c.v4l2_dev,
+		btv->sd_msp34xx = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 			&btv->c.i2c_adap, "msp3400", "msp3400",
-			I2C_ADDR_MSP3400_ALT >> 1);
+			0, I2C_ADDRS(I2C_ADDR_MSP3400_ALT >> 1));
 	}
 
 	/* If we found a msp34xx, then we're done. */
@@ -3611,14 +3611,14 @@ void __devinit bttv_init_card2(struct bttv *btv)
 			I2C_CLIENT_END
 		};
 
-		if (v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-				&btv->c.i2c_adap, "tda7432", "tda7432", addrs))
+		if (v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+				&btv->c.i2c_adap, "tda7432", "tda7432", 0, addrs))
 			return;
 	}
 
 	/* Now see if we can find one of the tvaudio devices. */
-	btv->sd_tvaudio = v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
-		&btv->c.i2c_adap, "tvaudio", "tvaudio", tvaudio_addrs());
+	btv->sd_tvaudio = v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
+		&btv->c.i2c_adap, "tvaudio", "tvaudio", 0, tvaudio_addrs());
 	if (btv->sd_tvaudio)
 		return;
 
@@ -3641,15 +3641,15 @@ void __devinit bttv_init_tuner(struct bttv *btv)
 
 		/* Load tuner module before issuing tuner config call! */
 		if (bttv_tvcards[btv->c.type].has_radio)
-			v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+			v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_RADIO));
-		v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+				0, v4l2_i2c_tuner_addrs(ADDRS_RADIO));
+		v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
-		v4l2_i2c_new_probed_subdev(&btv->c.v4l2_dev,
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+		v4l2_i2c_new_subdev(&btv->c.v4l2_dev,
 				&btv->c.i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_TV_WITH_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_TV_WITH_DEMOD));
 
 		tun_setup.mode_mask = T_ANALOG_TV | T_DIGITAL_TV;
 		tun_setup.type = btv->tuner_type;
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 9c149a781294..657c481d255c 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1955,7 +1955,7 @@ static int cafe_pci_probe(struct pci_dev *pdev,
 
 	cam->sensor_addr = 0x42;
 	cam->sensor = v4l2_i2c_new_subdev(&cam->v4l2_dev, &cam->i2c_adapter,
-			"ov7670", "ov7670", cam->sensor_addr);
+			"ov7670", "ov7670", cam->sensor_addr, NULL);
 	if (cam->sensor == NULL) {
 		ret = -ENODEV;
 		goto out_smbus;
diff --git a/drivers/media/video/cx18/cx18-i2c.c b/drivers/media/video/cx18/cx18-i2c.c
index dbbf93d2eee0..2477461e84d7 100644
--- a/drivers/media/video/cx18/cx18-i2c.c
+++ b/drivers/media/video/cx18/cx18-i2c.c
@@ -139,16 +139,16 @@ int cx18_i2c_register(struct cx18 *cx, unsigned idx)
 
 	if (hw == CX18_HW_TUNER) {
 		/* special tuner group handling */
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->radio);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->radio);
 		if (sd != NULL)
 			sd->grp_id = hw;
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->demod);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->demod);
 		if (sd != NULL)
 			sd->grp_id = hw;
-		sd = v4l2_i2c_new_probed_subdev(&cx->v4l2_dev,
-				adap, mod, type, cx->card_i2c->tv);
+		sd = v4l2_i2c_new_subdev(&cx->v4l2_dev,
+				adap, mod, type, 0, cx->card_i2c->tv);
 		if (sd != NULL)
 			sd->grp_id = hw;
 		return sd != NULL ? 0 : -1;
@@ -162,7 +162,7 @@ int cx18_i2c_register(struct cx18 *cx, unsigned idx)
 		return -1;
 
 	/* It's an I2C device other than an analog tuner or IR chip */
-	sd = v4l2_i2c_new_subdev(&cx->v4l2_dev, adap, mod, type, hw_addrs[idx]);
+	sd = v4l2_i2c_new_subdev(&cx->v4l2_dev, adap, mod, type, hw_addrs[idx], NULL);
 	if (sd != NULL)
 		sd->grp_id = hw;
 	return sd != NULL ? 0 : -1;
diff --git a/drivers/media/video/cx231xx/cx231xx-cards.c b/drivers/media/video/cx231xx/cx231xx-cards.c
index 63d2239fd324..319c459459e0 100644
--- a/drivers/media/video/cx231xx/cx231xx-cards.c
+++ b/drivers/media/video/cx231xx/cx231xx-cards.c
@@ -313,7 +313,7 @@ void cx231xx_card_setup(struct cx231xx *dev)
 	if (dev->board.decoder == CX231XX_AVDECODER) {
 		dev->sd_cx25840 = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 					&dev->i2c_bus[0].i2c_adap,
-					"cx25840", "cx25840", 0x88 >> 1);
+					"cx25840", "cx25840", 0x88 >> 1, NULL);
 		if (dev->sd_cx25840 == NULL)
 			cx231xx_info("cx25840 subdev registration failure\n");
 		cx25840_call(dev, core, load_fw);
@@ -323,7 +323,7 @@ void cx231xx_card_setup(struct cx231xx *dev)
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		dev->sd_tuner =	v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", 0xc2 >> 1);
+				"tuner", "tuner", 0xc2 >> 1, NULL);
 		if (dev->sd_tuner == NULL)
 			cx231xx_info("tuner subdev registration failure\n");
 
diff --git a/drivers/media/video/cx23885/cx23885-cards.c b/drivers/media/video/cx23885/cx23885-cards.c
index 3143d85ef31d..02ba4aec7d92 100644
--- a/drivers/media/video/cx23885/cx23885-cards.c
+++ b/drivers/media/video/cx23885/cx23885-cards.c
@@ -929,7 +929,7 @@ void cx23885_card_setup(struct cx23885_dev *dev)
 	case CX23885_BOARD_NETUP_DUAL_DVBS2_CI:
 		dev->sd_cx25840 = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[2].i2c_adap,
-				"cx25840", "cx25840", 0x88 >> 1);
+				"cx25840", "cx25840", 0x88 >> 1, NULL);
 		v4l2_subdev_call(dev->sd_cx25840, core, load_fw);
 		break;
 	}
diff --git a/drivers/media/video/cx23885/cx23885-video.c b/drivers/media/video/cx23885/cx23885-video.c
index 5d6093336300..654cc253cd50 100644
--- a/drivers/media/video/cx23885/cx23885-video.c
+++ b/drivers/media/video/cx23885/cx23885-video.c
@@ -1521,11 +1521,11 @@ int cx23885_video_register(struct cx23885_dev *dev)
 		if (dev->tuner_addr)
 			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", dev->tuner_addr);
+				"tuner", "tuner", dev->tuner_addr, NULL);
 		else
-			sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_bus[1].i2c_adap,
-				"tuner", "tuner", v4l2_i2c_tuner_addrs(ADDRS_TV));
+				"tuner", "tuner", 0, v4l2_i2c_tuner_addrs(ADDRS_TV));
 		if (sd) {
 			struct tuner_setup tun_setup;
 
diff --git a/drivers/media/video/cx88/cx88-cards.c b/drivers/media/video/cx88/cx88-cards.c
index e5f07fbd5a35..33be6369871a 100644
--- a/drivers/media/video/cx88/cx88-cards.c
+++ b/drivers/media/video/cx88/cx88-cards.c
@@ -3439,20 +3439,20 @@ struct cx88_core *cx88_core_create(struct pci_dev *pci, int nr)
 		   The radio_type is sometimes missing, or set to UNSET but
 		   later code configures a tea5767.
 		 */
-		v4l2_i2c_new_probed_subdev(&core->v4l2_dev, &core->i2c_adap,
+		v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
 				"tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_RADIO));
+				0, v4l2_i2c_tuner_addrs(ADDRS_RADIO));
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&core->v4l2_dev,
+			v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (core->board.tuner_addr == ADDR_UNSET) {
-			v4l2_i2c_new_probed_subdev(&core->v4l2_dev,
+			v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap, "tuner", "tuner",
-				has_demod ? tv_addrs + 4 : tv_addrs);
+				0, has_demod ? tv_addrs + 4 : tv_addrs);
 		} else {
 			v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
-				"tuner", "tuner", core->board.tuner_addr);
+				"tuner", "tuner", core->board.tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/cx88/cx88-video.c b/drivers/media/video/cx88/cx88-video.c
index 2bb54c3ef5cd..81d2b5dea18e 100644
--- a/drivers/media/video/cx88/cx88-video.c
+++ b/drivers/media/video/cx88/cx88-video.c
@@ -1881,14 +1881,14 @@ static int __devinit cx8800_initdev(struct pci_dev *pci_dev,
 
 	if (core->board.audio_chip == V4L2_IDENT_WM8775)
 		v4l2_i2c_new_subdev(&core->v4l2_dev, &core->i2c_adap,
-				"wm8775", "wm8775", 0x36 >> 1);
+				"wm8775", "wm8775", 0x36 >> 1, NULL);
 
 	if (core->board.audio_chip == V4L2_IDENT_TVAUDIO) {
 		/* This probes for a tda9874 as is used on some
 		   Pixelview Ultra boards. */
-		v4l2_i2c_new_probed_subdev_addr(&core->v4l2_dev,
+		v4l2_i2c_new_subdev(&core->v4l2_dev,
 				&core->i2c_adap,
-				"tvaudio", "tvaudio", 0xb0 >> 1);
+				"tvaudio", "tvaudio", 0, I2C_ADDRS(0xb0 >> 1));
 	}
 
 	switch (core->boardnr) {
diff --git a/drivers/media/video/davinci/vpif_display.c b/drivers/media/video/davinci/vpif_display.c
index 8ea65d794dbf..a125a452d24b 100644
--- a/drivers/media/video/davinci/vpif_display.c
+++ b/drivers/media/video/davinci/vpif_display.c
@@ -1566,10 +1566,10 @@ static __init int vpif_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < subdev_count; i++) {
-		vpif_obj.sd[i] = v4l2_i2c_new_probed_subdev(&vpif_obj.v4l2_dev,
+		vpif_obj.sd[i] = v4l2_i2c_new_subdev(&vpif_obj.v4l2_dev,
 						i2c_adap, subdevdata[i].name,
 						subdevdata[i].name,
-						&subdevdata[i].addr);
+						0, I2C_ADDRS(subdevdata[i].addr));
 		if (!vpif_obj.sd[i]) {
 			vpif_err("Error registering v4l2 subdevice\n");
 			goto probe_subdev_out;
diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c
index 8a5ce818170a..bdb249bd9d5d 100644
--- a/drivers/media/video/em28xx/em28xx-cards.c
+++ b/drivers/media/video/em28xx/em28xx-cards.c
@@ -2372,55 +2372,55 @@ void em28xx_card_setup(struct em28xx *dev)
 
 	/* request some modules */
 	if (dev->board.has_msp34xx)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"msp3400", "msp3400", msp3400_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"msp3400", "msp3400", 0, msp3400_addrs);
 
 	if (dev->board.decoder == EM28XX_SAA711X)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"saa7115", "saa7115_auto", saa711x_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"saa7115", "saa7115_auto", 0, saa711x_addrs);
 
 	if (dev->board.decoder == EM28XX_TVP5150)
-		v4l2_i2c_new_probed_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"tvp5150", "tvp5150", tvp5150_addrs);
+		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
+			"tvp5150", "tvp5150", 0, tvp5150_addrs);
 
 	if (dev->em28xx_sensor == EM28XX_MT9V011) {
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
-			 &dev->i2c_adap, "mt9v011", "mt9v011", mt9v011_addrs);
+		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
+			 &dev->i2c_adap, "mt9v011", "mt9v011", 0, mt9v011_addrs);
 		v4l2_subdev_call(sd, core, s_config, 0, &dev->sensor_xtal);
 	}
 
 
 	if (dev->board.adecoder == EM28XX_TVAUDIO)
 		v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-			"tvaudio", "tvaudio", dev->board.tvaudio_addr);
+			"tvaudio", "tvaudio", dev->board.tvaudio_addr, NULL);
 
 	if (dev->board.tuner_type != TUNER_ABSENT) {
 		int has_demod = (dev->tda9887_conf & TDA9887_PRESENT);
 
 		if (dev->board.radio.type)
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->board.radio_addr);
+				"tuner", "tuner", dev->board.radio_addr, NULL);
 
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (dev->tuner_addr == 0) {
 			enum v4l2_i2c_tuner_type type =
 				has_demod ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 			struct v4l2_subdev *sd;
 
-			sd = v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(type));
+				0, v4l2_i2c_tuner_addrs(type));
 
 			if (sd)
 				dev->tuner_addr = v4l2_i2c_subdev_addr(sd);
 		} else {
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
-				"tuner", "tuner", dev->tuner_addr);
+				"tuner", "tuner", dev->tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/ivtv/ivtv-i2c.c b/drivers/media/video/ivtv/ivtv-i2c.c
index 8f15a31d3f66..b9c71e61f7d6 100644
--- a/drivers/media/video/ivtv/ivtv-i2c.c
+++ b/drivers/media/video/ivtv/ivtv-i2c.c
@@ -161,19 +161,19 @@ int ivtv_i2c_register(struct ivtv *itv, unsigned idx)
 		return -1;
 	if (hw == IVTV_HW_TUNER) {
 		/* special tuner handling */
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->radio);
+				0, itv->card_i2c->radio);
 		if (sd)
 			sd->grp_id = 1 << idx;
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->demod);
+				0, itv->card_i2c->demod);
 		if (sd)
 			sd->grp_id = 1 << idx;
-		sd = v4l2_i2c_new_probed_subdev(&itv->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
 				adap, mod, type,
-				itv->card_i2c->tv);
+				0, itv->card_i2c->tv);
 		if (sd)
 			sd->grp_id = 1 << idx;
 		return sd ? 0 : -1;
@@ -181,11 +181,11 @@ int ivtv_i2c_register(struct ivtv *itv, unsigned idx)
 	if (!hw_addrs[idx])
 		return -1;
 	if (hw == IVTV_HW_UPD64031A || hw == IVTV_HW_UPD6408X) {
-		sd = v4l2_i2c_new_probed_subdev_addr(&itv->v4l2_dev,
-				adap, mod, type, hw_addrs[idx]);
+		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
+				adap, mod, type, 0, I2C_ADDRS(hw_addrs[idx]));
 	} else {
 		sd = v4l2_i2c_new_subdev(&itv->v4l2_dev,
-				adap, mod, type, hw_addrs[idx]);
+				adap, mod, type, hw_addrs[idx], NULL);
 	}
 	if (sd)
 		sd->grp_id = 1 << idx;
diff --git a/drivers/media/video/mxb.c b/drivers/media/video/mxb.c
index 35890e8b2431..3454070e63f0 100644
--- a/drivers/media/video/mxb.c
+++ b/drivers/media/video/mxb.c
@@ -186,19 +186,19 @@ static int mxb_probe(struct saa7146_dev *dev)
 	}
 
 	mxb->saa7111a = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"saa7115", "saa7111", I2C_SAA7111A);
+			"saa7115", "saa7111", I2C_SAA7111A, NULL);
 	mxb->tea6420_1 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6420", "tea6420", I2C_TEA6420_1);
+			"tea6420", "tea6420", I2C_TEA6420_1, NULL);
 	mxb->tea6420_2 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6420", "tea6420", I2C_TEA6420_2);
+			"tea6420", "tea6420", I2C_TEA6420_2, NULL);
 	mxb->tea6415c = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tea6415c", "tea6415c", I2C_TEA6415C);
+			"tea6415c", "tea6415c", I2C_TEA6415C, NULL);
 	mxb->tda9840 = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tda9840", "tda9840", I2C_TDA9840);
+			"tda9840", "tda9840", I2C_TDA9840, NULL);
 	mxb->tuner = v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"tuner", "tuner", I2C_TUNER);
+			"tuner", "tuner", I2C_TUNER, NULL);
 	if (v4l2_i2c_new_subdev(&dev->v4l2_dev, &mxb->i2c_adapter,
-			"saa5246a", "saa5246a", I2C_SAA5246A)) {
+			"saa5246a", "saa5246a", I2C_SAA5246A, NULL)) {
 		printk(KERN_INFO "mxb: found teletext decoder\n");
 	}
 
diff --git a/drivers/media/video/pvrusb2/pvrusb2-hdw.c b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
index cbc388729d77..13639b302700 100644
--- a/drivers/media/video/pvrusb2/pvrusb2-hdw.c
+++ b/drivers/media/video/pvrusb2/pvrusb2-hdw.c
@@ -2063,8 +2063,8 @@ static int pvr2_hdw_load_subdev(struct pvr2_hdw *hdw,
 		return -EINVAL;
 	}
 
-	/* Note how the 2nd and 3rd arguments are the same for both
-	 * v4l2_i2c_new_subdev() and v4l2_i2c_new_probed_subdev().  Why?
+	/* Note how the 2nd and 3rd arguments are the same for
+	 * v4l2_i2c_new_subdev().  Why?
 	 * Well the 2nd argument is the module name to load, while the 3rd
 	 * argument is documented in the framework as being the "chipid" -
 	 * and every other place where I can find examples of this, the
@@ -2077,15 +2077,15 @@ static int pvr2_hdw_load_subdev(struct pvr2_hdw *hdw,
 			   mid, i2caddr[0]);
 		sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
 					 fname, fname,
-					 i2caddr[0]);
+					 i2caddr[0], NULL);
 	} else {
 		pvr2_trace(PVR2_TRACE_INIT,
 			   "Module ID %u:"
 			   " Setting up with address probe list",
 			   mid);
-		sd = v4l2_i2c_new_probed_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
+		sd = v4l2_i2c_new_subdev(&hdw->v4l2_dev, &hdw->i2c_adap,
 						fname, fname,
-						i2caddr);
+						0, i2caddr);
 	}
 
 	if (!sd) {
diff --git a/drivers/media/video/saa7134/saa7134-cards.c b/drivers/media/video/saa7134/saa7134-cards.c
index 1b29487fd254..14b9ba4579b7 100644
--- a/drivers/media/video/saa7134/saa7134-cards.c
+++ b/drivers/media/video/saa7134/saa7134-cards.c
@@ -7208,22 +7208,22 @@ int saa7134_board_init2(struct saa7134_dev *dev)
 		if (dev->radio_type != UNSET)
 			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				dev->radio_addr);
+				dev->radio_addr, NULL);
 		if (has_demod)
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		if (dev->tuner_addr == ADDR_UNSET) {
 			enum v4l2_i2c_tuner_type type =
 				has_demod ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 
-			v4l2_i2c_new_probed_subdev(&dev->v4l2_dev,
+			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				v4l2_i2c_tuner_addrs(type));
+				0, v4l2_i2c_tuner_addrs(type));
 		} else {
 			v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap, "tuner", "tuner",
-				dev->tuner_addr);
+				dev->tuner_addr, NULL);
 		}
 	}
 
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index cb78c956d810..f87757fccc72 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -1000,7 +1000,7 @@ static int __devinit saa7134_initdev(struct pci_dev *pci_dev,
 		struct v4l2_subdev *sd =
 			v4l2_i2c_new_subdev(&dev->v4l2_dev, &dev->i2c_adap,
 				"saa6752hs", "saa6752hs",
-				saa7134_boards[dev->board].empress_addr);
+				saa7134_boards[dev->board].empress_addr, NULL);
 
 		if (sd)
 			sd->grp_id = GRP_EMPRESS;
@@ -1009,9 +1009,9 @@ static int __devinit saa7134_initdev(struct pci_dev *pci_dev,
 	if (saa7134_boards[dev->board].rds_addr) {
 		struct v4l2_subdev *sd;
 
-		sd = v4l2_i2c_new_probed_subdev_addr(&dev->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&dev->v4l2_dev,
 				&dev->i2c_adap,	"saa6588", "saa6588",
-				saa7134_boards[dev->board].rds_addr);
+				0, I2C_ADDRS(saa7134_boards[dev->board].rds_addr));
 		if (sd) {
 			printk(KERN_INFO "%s: found RDS decoder\n", dev->name);
 			dev->has_rds = 1;
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c
index 1fe5befbbf85..f97fd06d5948 100644
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -246,9 +246,9 @@ int usbvision_i2c_register(struct usb_usbvision *usbvision)
 	switch (usbvision_device_data[usbvision->DevModel].Codec) {
 	case CODEC_SAA7113:
 	case CODEC_SAA7111:
-		v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "saa7115",
-				"saa7115_auto", saa711x_addrs);
+				"saa7115_auto", 0, saa711x_addrs);
 		break;
 	}
 	if (usbvision_device_data[usbvision->DevModel].Tuner == 1) {
@@ -256,16 +256,16 @@ int usbvision_i2c_register(struct usb_usbvision *usbvision)
 		enum v4l2_i2c_tuner_type type;
 		struct tuner_setup tun_setup;
 
-		sd = v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "tuner",
-				"tuner", v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
+				"tuner", 0, v4l2_i2c_tuner_addrs(ADDRS_DEMOD));
 		/* depending on whether we found a demod or not, select
 		   the tuner type. */
 		type = sd ? ADDRS_TV_WITH_DEMOD : ADDRS_TV;
 
-		sd = v4l2_i2c_new_probed_subdev(&usbvision->v4l2_dev,
+		sd = v4l2_i2c_new_subdev(&usbvision->v4l2_dev,
 				&usbvision->i2c_adap, "tuner",
-				"tuner", v4l2_i2c_tuner_addrs(type));
+				"tuner", 0, v4l2_i2c_tuner_addrs(type));
 
 		if (usbvision->tuner_type != -1) {
 			tun_setup.mode_mask = T_ANALOG_TV | T_RADIO;
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 3a0c64935b0e..f5a93ae3cdf9 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -813,139 +813,6 @@ EXPORT_SYMBOL_GPL(v4l2_i2c_subdev_init);
 
 
-/* Load an i2c sub-device. */
-struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct i2c_client *client;
-	struct i2c_board_info info;
-
-	BUG_ON(!v4l2_dev);
-
-	if (module_name)
-		request_module(module_name);
-
-	/* Setup the i2c board info with the device type and
-	   the device address. */
-	memset(&info, 0, sizeof(info));
-	strlcpy(info.type, client_type, sizeof(info.type));
-	info.addr = addr;
-
-	/* Create the i2c client */
-	client = i2c_new_device(adapter, &info);
-	/* Note: it is possible in the future that
-	   c->driver is NULL if the driver is still being loaded.
-	   We need better support from the kernel so that we
-	   can easily wait for the load to finish. */
-	if (client == NULL || client->driver == NULL)
-		goto error;
-
-	/* Lock the module so we can safely get the v4l2_subdev pointer */
-	if (!try_module_get(client->driver->driver.owner))
-		goto error;
-	sd = i2c_get_clientdata(client);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(client->driver->driver.owner);
-
-	if (sd) {
-		/* We return errors from v4l2_subdev_call only if we have the
-		   callback as the .s_config is not mandatory */
-		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
-
-		if (err && err != -ENOIOCTLCMD) {
-			v4l2_device_unregister_subdev(sd);
-			sd = NULL;
-		}
-	}
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (client && sd == NULL)
-		i2c_unregister_device(client);
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_subdev);
-
-/* Probe and load an i2c sub-device. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
-	struct i2c_adapter *adapter,
-	const char *module_name, const char *client_type,
-	const unsigned short *addrs)
-{
-	struct v4l2_subdev *sd = NULL;
-	struct i2c_client *client = NULL;
-	struct i2c_board_info info;
-
-	BUG_ON(!v4l2_dev);
-
-	if (module_name)
-		request_module(module_name);
-
-	/* Setup the i2c board info with the device type and
-	   the device address. */
-	memset(&info, 0, sizeof(info));
-	strlcpy(info.type, client_type, sizeof(info.type));
-
-	/* Probe and create the i2c client */
-	client = i2c_new_probed_device(adapter, &info, addrs);
-	/* Note: it is possible in the future that
-	   c->driver is NULL if the driver is still being loaded.
-	   We need better support from the kernel so that we
-	   can easily wait for the load to finish. */
-	if (client == NULL || client->driver == NULL)
-		goto error;
-
-	/* Lock the module so we can safely get the v4l2_subdev pointer */
-	if (!try_module_get(client->driver->driver.owner))
-		goto error;
-	sd = i2c_get_clientdata(client);
-
-	/* Register with the v4l2_device which increases the module's
-	   use count as well. */
-	if (v4l2_device_register_subdev(v4l2_dev, sd))
-		sd = NULL;
-	/* Decrease the module use count to match the first try_module_get. */
-	module_put(client->driver->driver.owner);
-
-	if (sd) {
-		/* We return errors from v4l2_subdev_call only if we have the
-		   callback as the .s_config is not mandatory */
-		int err = v4l2_subdev_call(sd, core, s_config, 0, NULL);
-
-		if (err && err != -ENOIOCTLCMD) {
-			v4l2_device_unregister_subdev(sd);
-			sd = NULL;
-		}
-	}
-
-error:
-	/* If we have a client but no subdev, then something went wrong and
-	   we must unregister the client. */
-	if (client && sd == NULL)
-		i2c_unregister_device(client);
-	return sd;
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev);
-
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr)
-{
-	unsigned short addrs[2] = { addr, I2C_CLIENT_END };
-
-	return v4l2_i2c_new_probed_subdev(v4l2_dev, adapter,
-			module_name, client_type, addrs);
-}
-EXPORT_SYMBOL_GPL(v4l2_i2c_new_probed_subdev_addr);
-
 /* Load an i2c sub-device. */
 struct v4l2_subdev *v4l2_i2c_new_subdev_board(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter, const char *module_name,
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index f3b6e15d91f2..cd6a3446ab7e 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -4333,11 +4333,11 @@ static int __init vino_module_init(void)
 	vino_init_stage++;
 
 	vino_drvdata->decoder =
-		v4l2_i2c_new_probed_subdev_addr(&vino_drvdata->v4l2_dev,
-			&vino_i2c_adapter, "saa7191", "saa7191", 0x45);
+		v4l2_i2c_new_subdev(&vino_drvdata->v4l2_dev, &vino_i2c_adapter,
+			       "saa7191", "saa7191", 0, I2C_ADDRS(0x45));
 	vino_drvdata->camera =
-		v4l2_i2c_new_probed_subdev_addr(&vino_drvdata->v4l2_dev,
-			&vino_i2c_adapter, "indycam", "indycam", 0x2b);
+		v4l2_i2c_new_subdev(&vino_drvdata->v4l2_dev, &vino_i2c_adapter,
+			       "indycam", "indycam", 0, I2C_ADDRS(0x2b));
 
 	dprintk("init complete!\n");
 
diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c
index 602484dd3da9..37fcdc447db5 100644
--- a/drivers/media/video/w9968cf.c
+++ b/drivers/media/video/w9968cf.c
@@ -3515,9 +3515,9 @@ w9968cf_usb_probe(struct usb_interface* intf, const struct usb_device_id* id)
 	w9968cf_turn_on_led(cam);
 
 	w9968cf_i2c_init(cam);
-	cam->sensor_sd = v4l2_i2c_new_probed_subdev(&cam->v4l2_dev,
+	cam->sensor_sd = v4l2_i2c_new_subdev(&cam->v4l2_dev,
 			&cam->i2c_adapter,
-			"ovcamchip", "ovcamchip", addrs);
+			"ovcamchip", "ovcamchip", 0, addrs);
 
 	usb_set_intfdata(intf, cam);
 	mutex_unlock(&cam->dev_mutex);
diff --git a/drivers/media/video/zoran/zoran_card.c b/drivers/media/video/zoran/zoran_card.c
index 0c4d9b1f8e6f..be70574870de 100644
--- a/drivers/media/video/zoran/zoran_card.c
+++ b/drivers/media/video/zoran/zoran_card.c
@@ -1357,15 +1357,15 @@ static int __devinit zoran_probe(struct pci_dev *pdev,
 		goto zr_free_irq;
 	}
 
-	zr->decoder = v4l2_i2c_new_probed_subdev(&zr->v4l2_dev,
+	zr->decoder = v4l2_i2c_new_subdev(&zr->v4l2_dev,
 		&zr->i2c_adapter, zr->card.mod_decoder, zr->card.i2c_decoder,
-		zr->card.addrs_decoder);
+		0, zr->card.addrs_decoder);
 
 	if (zr->card.mod_encoder)
-		zr->encoder = v4l2_i2c_new_probed_subdev(&zr->v4l2_dev,
+		zr->encoder = v4l2_i2c_new_subdev(&zr->v4l2_dev,
 			&zr->i2c_adapter,
 			zr->card.mod_encoder, zr->card.i2c_encoder,
-			zr->card.addrs_encoder);
+			0, zr->card.addrs_encoder);
 
 	dprintk(2,
 		KERN_INFO "%s: Initializing videocodec bus...\n",
diff --git a/include/media/v4l2-common.h b/include/media/v4l2-common.h
index 33a18426ab9b..1c25b10da34b 100644
--- a/include/media/v4l2-common.h
+++ b/include/media/v4l2-common.h
@@ -139,29 +139,23 @@ struct v4l2_subdev_ops;
 /* Load an i2c module and return an initialized v4l2_subdev struct.
    Only call request_module if module_name != NULL.
    The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr);
-/* Probe and load an i2c module and return an initialized v4l2_subdev struct.
-   Only call request_module if module_name != NULL.
-   The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev(struct v4l2_device *v4l2_dev,
+struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter,
 		const char *module_name, const char *client_type,
-		const unsigned short *addrs);
-/* Like v4l2_i2c_new_probed_subdev, except probe for a single address. */
-struct v4l2_subdev *v4l2_i2c_new_probed_subdev_addr(struct v4l2_device *v4l2_dev,
-		struct i2c_adapter *adapter,
-		const char *module_name, const char *client_type, u8 addr);
+		int irq, void *platform_data,
+		u8 addr, const unsigned short *probe_addrs);
 
 /* Load an i2c module and return an initialized v4l2_subdev struct.
    Only call request_module if module_name != NULL.
    The client_type argument is the name of the chip that's on the adapter. */
-struct v4l2_subdev *v4l2_i2c_new_subdev_cfg(struct v4l2_device *v4l2_dev,
+static inline struct v4l2_subdev *v4l2_i2c_new_subdev(struct v4l2_device *v4l2_dev,
 		struct i2c_adapter *adapter,
 		const char *module_name, const char *client_type,
-		int irq, void *platform_data,
-		u8 addr, const unsigned short *probe_addrs);
+		u8 addr, const unsigned short *probe_addrs)
+{
+	return v4l2_i2c_new_subdev_cfg(v4l2_dev, adapter, module_name,
+				client_type, 0, NULL, addr, probe_addrs);
+}
 
 struct i2c_board_info;
 
-- 
cgit v1.2.3


From 7ae0cd9bc793e16d8d68df3c17c601732cc1d3c7 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 19 Jun 2009 11:32:56 -0300
Subject: V4L/DVB (12541): v4l: remove video_register_device_index

video_register_device_index is never actually called, instead the
stream index number is always calculated automatically.

This patch removes this function and simplifies the internal get_index
function since that can now always just return the first free index.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 17 +++------
 drivers/media/video/v4l2-dev.c               | 55 ++++++++--------------------
 include/media/v4l2-dev.h                     |  2 -
 3 files changed, 20 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index e395a9cdc533..cb6c7eb51472 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -500,17 +500,11 @@ first free number.
 Whenever a device node is created some attributes are also created for you.
 If you look in /sys/class/video4linux you see the devices. Go into e.g.
 video0 and you will see 'name' and 'index' attributes. The 'name' attribute
-is the 'name' field of the video_device struct. The 'index' attribute is
-a device node index that can be assigned by the driver, or that is calculated
-for you.
-
-If you call video_register_device(), then the index is just increased by
-1 for each device node you register. The first video device node you register
-always starts off with 0.
+is the 'name' field of the video_device struct.
 
-Alternatively you can call video_register_device_index() which is identical
-to video_register_device(), but with an extra index argument. Here you can
-pass a specific index value (between 0 and 31) that should be used.
+The 'index' attribute is the index of the device node: for each call to
+video_register_device() the index is just increased by 1. The first video
+device node you register always starts with index 0.
 
 Users can setup udev rules that utilize the index attribute to make fancy
 device names (e.g. 'mpegX' for MPEG video capture device nodes).
@@ -520,8 +514,7 @@ After the device was successfully registered, then you can use these fields:
 - vfl_type: the device type passed to video_register_device.
 - minor: the assigned device minor number.
 - num: the device kernel number (i.e. the X in videoX).
-- index: the device index number (calculated or set explicitly using
-  video_register_device_index).
+- index: the device index number.
 
 If the registration failed, then you need to call video_device_release()
 to free the allocated video_device struct, or free your own struct if the
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index a7f1b69a7dab..1219721894a1 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -299,32 +299,28 @@ static const struct file_operations v4l2_fops = {
 };
 
 /**
- * get_index - assign stream number based on parent device
+ * get_index - assign stream index number based on parent device
  * @vdev: video_device to assign index number to, vdev->parent should be assigned
- * @num:  -1 if auto assign, requested number otherwise
  *
  * Note that when this is called the new device has not yet been registered
- * in the video_device array.
+ * in the video_device array, but it was able to obtain a minor number.
  *
- * Returns -ENFILE if num is already in use, a free index number if
- * successful.
+ * This means that we can always obtain a free stream index number since
+ * the worst case scenario is that there are VIDEO_NUM_DEVICES - 1 slots in
+ * use of the video_device array.
+ *
+ * Returns a free index number.
  */
-static int get_index(struct video_device *vdev, int num)
+static int get_index(struct video_device *vdev)
 {
 	/* This can be static since this function is called with the global
 	   videodev_lock held. */
 	static DECLARE_BITMAP(used, VIDEO_NUM_DEVICES);
 	int i;
 
-	if (num >= VIDEO_NUM_DEVICES) {
-		printk(KERN_ERR "videodev: %s num is too large\n", __func__);
-		return -EINVAL;
-	}
-
-	/* Some drivers do not set the parent. In that case always return
-	   num or 0. */
+	/* Some drivers do not set the parent. In that case always return 0. */
 	if (vdev->parent == NULL)
-		return num >= 0 ? num : 0;
+		return 0;
 
 	bitmap_zero(used, VIDEO_NUM_DEVICES);
 
@@ -335,30 +331,15 @@ static int get_index(struct video_device *vdev, int num)
 		}
 	}
 
-	if (num >= 0) {
-		if (test_bit(num, used))
-			return -ENFILE;
-		return num;
-	}
-
-	i = find_first_zero_bit(used, VIDEO_NUM_DEVICES);
-	return i == VIDEO_NUM_DEVICES ? -ENFILE : i;
+	return find_first_zero_bit(used, VIDEO_NUM_DEVICES);
 }
 
-int video_register_device(struct video_device *vdev, int type, int nr)
-{
-	return video_register_device_index(vdev, type, nr, -1);
-}
-EXPORT_SYMBOL(video_register_device);
-
 /**
- *	video_register_device_index - register video4linux devices
+ *	video_register_device - register video4linux devices
  *	@vdev: video device structure we want to register
  *	@type: type of device to register
  *	@nr:   which device number (0 == /dev/video0, 1 == /dev/video1, ...
  *             -1 == first free)
- *	@index: stream number based on parent device;
- *		-1 if auto assign, requested number otherwise
  *
  *	The registration code assigns minor numbers based on the type
  *	requested. -ENFILE is returned in all the device slots for this
@@ -377,8 +358,7 @@ EXPORT_SYMBOL(video_register_device);
  *
  *	%VFL_TYPE_RADIO - A radio card
  */
-int video_register_device_index(struct video_device *vdev, int type, int nr,
-					int index)
+int video_register_device(struct video_device *vdev, int type, int nr)
 {
 	int i = 0;
 	int ret;
@@ -481,14 +461,9 @@ int video_register_device_index(struct video_device *vdev, int type, int nr,
 	set_bit(nr, video_nums[type]);
 	/* Should not happen since we thought this minor was free */
 	WARN_ON(video_device[vdev->minor] != NULL);
-	ret = vdev->index = get_index(vdev, index);
+	vdev->index = get_index(vdev);
 	mutex_unlock(&videodev_lock);
 
-	if (ret < 0) {
-		printk(KERN_ERR "%s: get_index failed\n", __func__);
-		goto cleanup;
-	}
-
 	/* Part 3: Initialize the character device */
 	vdev->cdev = cdev_alloc();
 	if (vdev->cdev == NULL) {
@@ -543,7 +518,7 @@ cleanup:
 	vdev->minor = -1;
 	return ret;
 }
-EXPORT_SYMBOL(video_register_device_index);
+EXPORT_SYMBOL(video_register_device);
 
 /**
  *	video_unregister_device - unregister a video4linux device
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 2058dd45e915..255f6442b635 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -100,8 +100,6 @@ struct video_device
 
    Also note that vdev->minor is set to -1 if the registration failed. */
 int __must_check video_register_device(struct video_device *vdev, int type, int nr);
-int __must_check video_register_device_index(struct video_device *vdev,
-						int type, int nr, int index);
 
 /* Unregister video devices. Will do nothing if vdev == NULL or
    vdev->minor < 0. */
-- 
cgit v1.2.3


From 6b5270d21202fcf6ae16a6266fed83a30ccece7a Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 6 Sep 2009 07:54:00 -0300
Subject: V4L/DVB (12725): v4l: warn when desired devnodenr is in use & add
 _no_warn function

Warn when the desired device node number is already in use, except when
the new video_register_device_no_warn function is called since in some
use-cases that warning is not relevant.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/video4linux/v4l2-framework.txt | 22 ++++++++++++++++------
 drivers/media/video/cx18/cx18-streams.c      |  2 +-
 drivers/media/video/ivtv/ivtv-streams.c      |  2 +-
 drivers/media/video/v4l2-dev.c               | 20 +++++++++++++++++++-
 include/media/v4l2-dev.h                     |  4 ++++
 5 files changed, 41 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
index 38b3716d8643..b806edaf3e75 100644
--- a/Documentation/video4linux/v4l2-framework.txt
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -486,17 +486,27 @@ VFL_TYPE_RADIO: radioX for radio tuners
 VFL_TYPE_VTX: vtxX for teletext devices (deprecated, don't use)
 
 The last argument gives you a certain amount of control over the device
-device node number used (i.e. the X in videoX). Normally you will pass -1 to
-let the v4l2 framework pick the first free number. But if a driver creates
-many devices, then it can be useful to have different video devices in
-separate ranges. For example, video capture devices start at 0, video
-output devices start at 16.
-
+device node number used (i.e. the X in videoX). Normally you will pass -1
+to let the v4l2 framework pick the first free number. But sometimes users
+want to select a specific node number. It is common that drivers allow
+the user to select a specific device node number through a driver module
+option. That number is then passed to this function and video_register_device
+will attempt to select that device node number. If that number was already
+in use, then the next free device node number will be selected and it
+will send a warning to the kernel log.
+
+Another use-case is if a driver creates many devices. In that case it can
+be useful to place different video devices in separate ranges. For example,
+video capture devices start at 0, video output devices start at 16.
 So you can use the last argument to specify a minimum device node number
 and the v4l2 framework will try to pick the first free number that is equal
 or higher to what you passed. If that fails, then it will just pick the
 first free number.
 
+Since in this case you do not care about a warning about not being able
+to select the specified device node number, you can call the function
+video_register_device_no_warn() instead.
+
 Whenever a device node is created some attributes are also created for you.
 If you look in /sys/class/video4linux you see the devices. Go into e.g.
 video0 and you will see 'name' and 'index' attributes. The 'name' attribute
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index 6c988b95adc6..7df513a2dba8 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -245,7 +245,7 @@ static int cx18_reg_dev(struct cx18 *cx, int type)
 	video_set_drvdata(s->video_dev, s);
 
 	/* Register device. First try the desired minor, then any free one. */
-	ret = video_register_device(s->video_dev, vfl_type, num);
+	ret = video_register_device_no_warn(s->video_dev, vfl_type, num);
 	if (ret < 0) {
 		CX18_ERR("Couldn't register v4l2 device for %s (device node number %d)\n",
 			s->name, num);
diff --git a/drivers/media/video/ivtv/ivtv-streams.c b/drivers/media/video/ivtv/ivtv-streams.c
index 23400035240a..67699e3f2aaa 100644
--- a/drivers/media/video/ivtv/ivtv-streams.c
+++ b/drivers/media/video/ivtv/ivtv-streams.c
@@ -261,7 +261,7 @@ static int ivtv_reg_dev(struct ivtv *itv, int type)
 	video_set_drvdata(s->vdev, s);
 
 	/* Register device. First try the desired minor, then any free one. */
-	if (video_register_device(s->vdev, vfl_type, num)) {
+	if (video_register_device_no_warn(s->vdev, vfl_type, num)) {
 		IVTV_ERR("Couldn't register v4l2 device for %s (device node number %d)\n",
 				s->name, num);
 		video_device_release(s->vdev);
diff --git a/drivers/media/video/v4l2-dev.c b/drivers/media/video/v4l2-dev.c
index 4715f08157bc..500cbe9891ac 100644
--- a/drivers/media/video/v4l2-dev.c
+++ b/drivers/media/video/v4l2-dev.c
@@ -382,6 +382,8 @@ static int get_index(struct video_device *vdev)
  *	@type: type of device to register
  *	@nr:   which device node number (0 == /dev/video0, 1 == /dev/video1, ...
  *             -1 == first free)
+ *	@warn_if_nr_in_use: warn if the desired device node number
+ *	       was already in use and another number was chosen instead.
  *
  *	The registration code assigns minor numbers and device node numbers
  *	based on the requested type and registers the new device node with
@@ -401,7 +403,8 @@ static int get_index(struct video_device *vdev)
  *
  *	%VFL_TYPE_RADIO - A radio card
  */
-int video_register_device(struct video_device *vdev, int type, int nr)
+static int __video_register_device(struct video_device *vdev, int type, int nr,
+		int warn_if_nr_in_use)
 {
 	int i = 0;
 	int ret;
@@ -547,6 +550,10 @@ int video_register_device(struct video_device *vdev, int type, int nr)
 	   reference to the device goes away. */
 	vdev->dev.release = v4l2_device_release;
 
+	if (nr != -1 && nr != vdev->num && warn_if_nr_in_use)
+		printk(KERN_WARNING "%s: requested %s%d, got %s%d\n",
+				__func__, name_base, nr, name_base, vdev->num);
+
 	/* Part 5: Activate this minor. The char device can now be used. */
 	mutex_lock(&videodev_lock);
 	video_device[vdev->minor] = vdev;
@@ -563,8 +570,19 @@ cleanup:
 	vdev->minor = -1;
 	return ret;
 }
+
+int video_register_device(struct video_device *vdev, int type, int nr)
+{
+	return __video_register_device(vdev, type, nr, 1);
+}
 EXPORT_SYMBOL(video_register_device);
 
+int video_register_device_no_warn(struct video_device *vdev, int type, int nr)
+{
+	return __video_register_device(vdev, type, nr, 0);
+}
+EXPORT_SYMBOL(video_register_device_no_warn);
+
 /**
  *	video_unregister_device - unregister a video4linux device
  *	@vdev: the device to unregister
diff --git a/include/media/v4l2-dev.h b/include/media/v4l2-dev.h
index 255f6442b635..73c9867d744c 100644
--- a/include/media/v4l2-dev.h
+++ b/include/media/v4l2-dev.h
@@ -101,6 +101,10 @@ struct video_device
    Also note that vdev->minor is set to -1 if the registration failed. */
 int __must_check video_register_device(struct video_device *vdev, int type, int nr);
 
+/* Same as video_register_device, but no warning is issued if the desired
+   device node number was already in use. */
+int __must_check video_register_device_no_warn(struct video_device *vdev, int type, int nr);
+
 /* Unregister video devices. Will do nothing if vdev == NULL or
    vdev->minor < 0. */
 void video_unregister_device(struct video_device *vdev);
-- 
cgit v1.2.3


From 98293ef3e54f9f2175f11b4d14c119a2ff753d61 Mon Sep 17 00:00:00 2001
From: HIRANO Takahito <hiranotaka@zng.info>
Date: Fri, 18 Sep 2009 11:17:54 -0300
Subject: V4L/DVB (12997): Add the DTV_ISDB_TS_ID property for ISDB_S

In ISDB-S, time-devision duplex is used to multiplexing several waves
in the same frequency. Each wave is identified by its own transport
stream ID, or TS ID. We need to provide some way to specify this ID
from user applications to handle ISDB-S frontends.

This code has been tested with the Earthsoft PT1 driver.

[mchehab@infradead.org: Fix merge conflicts with isdbt and rename the new parameter to DTV_ISDBS_TS_ID]
Signed-off-by: HIRANO Takahito <hiranotaka@zng.info>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-core/dvb_frontend.c | 8 ++++++++
 drivers/media/dvb/dvb-core/dvb_frontend.h | 3 +++
 include/linux/dvb/frontend.h              | 4 +++-
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 3c9482660eac..ddf639ed2fd8 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -1031,6 +1031,8 @@ static struct dtv_cmds_h dtv_cmds[] = {
 	_DTV_CMD(DTV_ISDBT_LAYERC_SEGMENT_COUNT, 0, 0),
 	_DTV_CMD(DTV_ISDBT_LAYERC_TIME_INTERLEAVING, 0, 0),
 
+	_DTV_CMD(DTV_ISDBS_TS_ID, 1, 0),
+
 	/* Get */
 	[DTV_DISEQC_SLAVE_REPLY] = {
 		.name	= "DTV_DISEQC_SLAVE_REPLY",
@@ -1420,6 +1422,9 @@ static int dtv_property_process_get(struct dvb_frontend *fe,
 	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
 		tvp->u.data = fe->dtv_property_cache.layer[2].interleaving;
 		break;
+	case DTV_ISDBS_TS_ID:
+		tvp->u.data = fe->dtv_property_cache.isdbs_ts_id;
+		break;
 	default:
 		r = -1;
 	}
@@ -1571,6 +1576,9 @@ static int dtv_property_process_set(struct dvb_frontend *fe,
 	case DTV_ISDBT_LAYERC_TIME_INTERLEAVING:
 		fe->dtv_property_cache.layer[2].interleaving = tvp->u.data;
 		break;
+	case DTV_ISDBS_TS_ID:
+		fe->dtv_property_cache.isdbs_ts_id = tvp->u.data;
+		break;
 	default:
 		r = -1;
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 9e46f1772c54..810f07d63246 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -355,6 +355,9 @@ struct dtv_frontend_properties {
 	    fe_modulation_t	modulation;
 	    u8			interleaving;
 	} layer[3];
+
+	/* ISDB-T specifics */
+	u32			isdbs_ts_id;
 };
 
 struct dvb_frontend {
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 25b01c14727b..b6cb5425cde3 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -302,7 +302,9 @@ struct dvb_frontend_event {
 
 #define DTV_ISDBT_LAYER_ENABLED	41
 
-#define DTV_MAX_COMMAND				DTV_ISDBT_LAYER_ENABLED
+#define DTV_ISDBS_TS_ID		42
+
+#define DTV_MAX_COMMAND				DTV_ISDBS_TS_ID
 
 typedef enum fe_pilot {
 	PILOT_ON,
-- 
cgit v1.2.3


From 6789cb5230f8b06271b6a89ace20449af14be303 Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Fri, 18 Sep 2009 21:17:20 -0300
Subject: V4L/DVB (13019): video: initial support for ADV7180
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is an initial driver for Analog Devices ADV7180 Video Decoder.
So far it only supports query standard.

[akpm@linux-foundation.org: remove unneeded cast]
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Douglas Schilling Landgraf <dougsland@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig     |   9 ++
 drivers/media/video/Makefile    |   1 +
 drivers/media/video/adv7180.c   | 202 ++++++++++++++++++++++++++++++++++++++++
 include/media/v4l2-chip-ident.h |   3 +
 4 files changed, 215 insertions(+)
 create mode 100644 drivers/media/video/adv7180.c

(limited to 'include')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index dc71eaea6af8..e6186b338a12 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -265,6 +265,15 @@ config VIDEO_SAA6588
 
 comment "Video decoders"
 
+config VIDEO_ADV7180
+	tristate "Analog Devices ADV7180 decoder"
+	depends on VIDEO_V4L2 && I2C
+	---help---
+	  Support for the Analog Devices ADV7180 video decoder.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adv7180.
+
 config VIDEO_BT819
 	tristate "BT819A VideoStream decoder"
 	depends on VIDEO_V4L2 && I2C
diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile
index 040bc0492007..e541932a789b 100644
--- a/drivers/media/video/Makefile
+++ b/drivers/media/video/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
 obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
 obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
 obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o
+obj-$(CONFIG_VIDEO_ADV7180) += adv7180.o
 obj-$(CONFIG_VIDEO_ADV7343) += adv7343.o
 obj-$(CONFIG_VIDEO_VPX3220) += vpx3220.o
 obj-$(CONFIG_VIDEO_BT819) += bt819.o
diff --git a/drivers/media/video/adv7180.c b/drivers/media/video/adv7180.c
new file mode 100644
index 000000000000..1b3cbd02a7fd
--- /dev/null
+++ b/drivers/media/video/adv7180.c
@@ -0,0 +1,202 @@
+/*
+ * adv7180.c Analog Devices ADV7180 video decoder driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/i2c.h>
+#include <linux/i2c-id.h>
+#include <media/v4l2-ioctl.h>
+#include <linux/videodev2.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-chip-ident.h>
+
+#define DRIVER_NAME "adv7180"
+
+#define ADV7180_INPUT_CONTROL_REG	0x00
+#define ADV7180_INPUT_CONTROL_PAL_BG_NTSC_J_SECAM	0x00
+#define ADV7180_AUTODETECT_ENABLE_REG	0x07
+#define ADV7180_AUTODETECT_DEFAULT	0x7f
+
+
+#define ADV7180_STATUS1_REG 0x10
+#define ADV7180_STATUS1_AUTOD_MASK 0x70
+#define ADV7180_STATUS1_AUTOD_NTSM_M_J	0x00
+#define ADV7180_STATUS1_AUTOD_NTSC_4_43 0x10
+#define ADV7180_STATUS1_AUTOD_PAL_M	0x20
+#define ADV7180_STATUS1_AUTOD_PAL_60	0x30
+#define ADV7180_STATUS1_AUTOD_PAL_B_G	0x40
+#define ADV7180_STATUS1_AUTOD_SECAM	0x50
+#define ADV7180_STATUS1_AUTOD_PAL_COMB	0x60
+#define ADV7180_STATUS1_AUTOD_SECAM_525	0x70
+
+#define ADV7180_IDENT_REG 0x11
+#define ADV7180_ID_7180 0x18
+
+
+struct adv7180_state {
+	struct v4l2_subdev sd;
+};
+
+static v4l2_std_id determine_norm(struct i2c_client *client)
+{
+	u8 status1 = i2c_smbus_read_byte_data(client, ADV7180_STATUS1_REG);
+
+	switch (status1 & ADV7180_STATUS1_AUTOD_MASK) {
+	case ADV7180_STATUS1_AUTOD_NTSM_M_J:
+		return V4L2_STD_NTSC_M_JP;
+	case ADV7180_STATUS1_AUTOD_NTSC_4_43:
+		return V4L2_STD_NTSC_443;
+	case ADV7180_STATUS1_AUTOD_PAL_M:
+		return V4L2_STD_PAL_M;
+	case ADV7180_STATUS1_AUTOD_PAL_60:
+		return V4L2_STD_PAL_60;
+	case ADV7180_STATUS1_AUTOD_PAL_B_G:
+		return V4L2_STD_PAL;
+	case ADV7180_STATUS1_AUTOD_SECAM:
+		return V4L2_STD_SECAM;
+	case ADV7180_STATUS1_AUTOD_PAL_COMB:
+		return V4L2_STD_PAL_Nc | V4L2_STD_PAL_N;
+	case ADV7180_STATUS1_AUTOD_SECAM_525:
+		return V4L2_STD_SECAM;
+	default:
+		return V4L2_STD_UNKNOWN;
+	}
+}
+
+static inline struct adv7180_state *to_state(struct v4l2_subdev *sd)
+{
+	return container_of(sd, struct adv7180_state, sd);
+}
+
+static int adv7180_querystd(struct v4l2_subdev *sd, v4l2_std_id *std)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	*std = determine_norm(client);
+	return 0;
+}
+
+static int adv7180_g_chip_ident(struct v4l2_subdev *sd,
+	struct v4l2_dbg_chip_ident *chip)
+{
+	struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+	return v4l2_chip_ident_i2c_client(client, chip, V4L2_IDENT_ADV7180, 0);
+}
+
+static const struct v4l2_subdev_video_ops adv7180_video_ops = {
+	.querystd = adv7180_querystd,
+};
+
+static const struct v4l2_subdev_core_ops adv7180_core_ops = {
+	.g_chip_ident = adv7180_g_chip_ident,
+};
+
+static const struct v4l2_subdev_ops adv7180_ops = {
+	.core = &adv7180_core_ops,
+	.video = &adv7180_video_ops,
+};
+
+/*
+ * Generic i2c probe
+ * concerning the addresses: i2c wants 7 bit (without the r/w bit), so '>>1'
+ */
+
+static int adv7180_probe(struct i2c_client *client,
+			const struct i2c_device_id *id)
+{
+	struct adv7180_state *state;
+	struct v4l2_subdev *sd;
+	int ret;
+
+	/* Check if the adapter supports the needed features */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA))
+		return -EIO;
+
+	v4l_info(client, "chip found @ 0x%02x (%s)\n",
+			client->addr << 1, client->adapter->name);
+
+	state = kzalloc(sizeof(struct adv7180_state), GFP_KERNEL);
+	if (state == NULL)
+		return -ENOMEM;
+	sd = &state->sd;
+	v4l2_i2c_subdev_init(sd, client, &adv7180_ops);
+
+	/* Initialize adv7180 */
+	/* enable autodetection */
+	ret = i2c_smbus_write_byte_data(client, ADV7180_INPUT_CONTROL_REG,
+		ADV7180_INPUT_CONTROL_PAL_BG_NTSC_J_SECAM);
+	if (ret > 0)
+		ret = i2c_smbus_write_byte_data(client,
+			ADV7180_AUTODETECT_ENABLE_REG,
+			ADV7180_AUTODETECT_DEFAULT);
+	if (ret < 0) {
+		printk(KERN_ERR DRIVER_NAME
+			": Failed to communicate to chip: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int adv7180_remove(struct i2c_client *client)
+{
+	struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+	v4l2_device_unregister_subdev(sd);
+	kfree(to_state(sd));
+	return 0;
+}
+
+static const struct i2c_device_id adv7180_id[] = {
+	{DRIVER_NAME, 0},
+	{},
+};
+
+MODULE_DEVICE_TABLE(i2c, adv7180_id);
+
+static struct i2c_driver adv7180_driver = {
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= DRIVER_NAME,
+	},
+	.probe		= adv7180_probe,
+	.remove		= adv7180_remove,
+	.id_table	= adv7180_id,
+};
+
+static __init int adv7180_init(void)
+{
+	return i2c_add_driver(&adv7180_driver);
+}
+
+static __exit void adv7180_exit(void)
+{
+	i2c_del_driver(&adv7180_driver);
+}
+
+module_init(adv7180_init);
+module_exit(adv7180_exit);
+
+MODULE_DESCRIPTION("Analog Devices ADV7180 video decoder driver");
+MODULE_AUTHOR("Mocean Laboratories");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/media/v4l2-chip-ident.h b/include/media/v4l2-chip-ident.h
index 94e908c0d7a0..cf16689adba7 100644
--- a/include/media/v4l2-chip-ident.h
+++ b/include/media/v4l2-chip-ident.h
@@ -135,6 +135,9 @@ enum {
 	/* module adv7175: just ident 7175 */
 	V4L2_IDENT_ADV7175 = 7175,
 
+	/* module adv7180: just ident 7180 */
+	V4L2_IDENT_ADV7180 = 7180,
+
 	/* module saa7185: just ident 7185 */
 	V4L2_IDENT_SAA7185 = 7185,
 
-- 
cgit v1.2.3


From eb27cae8adaa658a0bf31631baa1ce29d8183759 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:40:19 -0400
Subject: ACPI: linux/acpi.h should not include linux/dmi.h

users of acpi.h that need dmi.h should include it directly.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c      | 1 +
 drivers/acpi/ec.c       | 1 +
 drivers/acpi/pci_slot.c | 1 +
 drivers/acpi/video.c    | 2 +-
 drivers/pci/dmar.c      | 1 +
 include/linux/acpi.h    | 2 --
 6 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 2876fc70c3a9..0fb6b2a8b103 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #include "internal.h"
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 391f331674c7..c434f6571ab8 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -42,6 +42,7 @@
 #include <asm/io.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 #define ACPI_EC_CLASS			"embedded_controller"
 #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
diff --git a/drivers/acpi/pci_slot.c b/drivers/acpi/pci_slot.c
index 12158e0d009b..7aa6802c0ee3 100644
--- a/drivers/acpi/pci_slot.c
+++ b/drivers/acpi/pci_slot.c
@@ -31,6 +31,7 @@
 #include <linux/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
+#include <linux/dmi.h>
 
 static int debug;
 static int check_sta_before_sun;
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 60ea984c84a0..055a455b6c75 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -40,7 +40,7 @@
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <asm/uaccess.h>
-
+#include <linux/dmi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/acpi_drivers.h>
 
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 7b287cb38b7a..4484ac089772 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -33,6 +33,7 @@
 #include <linux/timer.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/dmi.h>
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 34321cfffeab..7950c65c43af 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -41,8 +41,6 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/acpi_numa.h>
 #include <asm/acpi.h>
-#include <linux/dmi.h>
-
 
 enum acpi_irq_model_id {
 	ACPI_IRQ_MODEL_PIC = 0,
-- 
cgit v1.2.3


From e4f55966d02c5dfade2978c2aa05fb202a78a4d1 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Mon, 6 Jul 2009 23:42:10 -0400
Subject: ACPI: remove unnecessary #ifdef CONFIG_DMI

acpi_osi_setup() does not depend on CONFIG_DMI
acpi_dmi_osi_linux()'s definition doesn't depend on CONFIG_DMI either

Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/linux/acpi.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7950c65c43af..9260408bf4ea 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -217,10 +217,8 @@ static inline int acpi_video_display_switch_support(void)
 #endif /* defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE) */
 
 extern int acpi_blacklisted(void);
-#ifdef CONFIG_DMI
 extern void acpi_dmi_osi_linux(int enable, const struct dmi_system_id *d);
 extern int acpi_osi_setup(char *str);
-#endif
 
 #ifdef CONFIG_ACPI_NUMA
 int acpi_get_pxm(acpi_handle handle);
-- 
cgit v1.2.3


From a71fca58b7f4abca551ae2256ac08dd9123a03f9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2009 10:28:19 -0700
Subject: rcu: Fix whitespace inconsistencies

Fix a number of whitespace ^Ierrors in the include/linux/rcu*
and the kernel/rcu* files.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: akpm@linux-foundation.org
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
LKML-Reference: <20090918172819.GA24405@linux.vnet.ibm.com>
[ did more checkpatch fixlets ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rculist_nulls.h |  2 +-
 include/linux/rcupdate.h      |  6 +++---
 include/linux/rcutree.h       |  2 +-
 kernel/rcupdate.c             |  4 ++--
 kernel/rcutorture.c           | 28 +++++++++++++++-------------
 kernel/rcutree.c              |  2 +-
 kernel/rcutree.h              |  2 +-
 kernel/rcutree_plugin.h       |  3 +--
 kernel/rcutree_trace.c        |  2 +-
 9 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index f9ddd03961a8..589a40919f01 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -102,7 +102,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
  */
 #define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
 	for (pos = rcu_dereference((head)->first);			 \
-		(!is_a_nulls(pos)) && 			\
+		(!is_a_nulls(pos)) &&			\
 		({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference(pos->next))
 
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 39dce83c4865..6fe0363724e9 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1,5 +1,5 @@
 /*
- * Read-Copy Update mechanism for mutual exclusion 
+ * Read-Copy Update mechanism for mutual exclusion
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,7 +18,7 @@
  * Copyright IBM Corporation, 2001
  *
  * Author: Dipankar Sarma <dipankar@in.ibm.com>
- * 
+ *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
@@ -26,7 +26,7 @@
  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *		http://lse.sourceforge.net/locking/rcupdate.html
  *
  */
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 00d08c0cbcc1..37682770e9d2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -24,7 +24,7 @@
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 	Documentation/RCU
+ *	Documentation/RCU
  */
 
 #ifndef __LINUX_RCUTREE_H
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 28d2f24e7871..37ac45483082 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -19,7 +19,7 @@
  *
  * Authors: Dipankar Sarma <dipankar@in.ibm.com>
  *	    Manfred Spraul <manfred@colorfullife.com>
- * 
+ *
  * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  * Papers:
@@ -27,7 +27,7 @@
  * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		http://lse.sourceforge.net/locking/rcupdate.html
+ *		http://lse.sourceforge.net/locking/rcupdate.html
  *
  */
 #include <linux/types.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 328a8257c885..233768f21f97 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -18,7 +18,7 @@
  * Copyright (C) IBM Corporation, 2005, 2006
  *
  * Authors: Paul E. McKenney <paulmck@us.ibm.com>
- *          Josh Triplett <josh@freedesktop.org>
+ *	  Josh Triplett <josh@freedesktop.org>
  *
  * See also:  Documentation/RCU/torture.txt
  */
@@ -50,7 +50,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and "
-              "Josh Triplett <josh@freedesktop.org>");
+	      "Josh Triplett <josh@freedesktop.org>");
 
 static int nreaders = -1;	/* # reader threads, defaults to 2*ncpus */
 static int nfakewriters = 4;	/* # fake writer threads */
@@ -110,8 +110,8 @@ struct rcu_torture {
 };
 
 static LIST_HEAD(rcu_torture_freelist);
-static struct rcu_torture *rcu_torture_current = NULL;
-static long rcu_torture_current_version = 0;
+static struct rcu_torture *rcu_torture_current;
+static long rcu_torture_current_version;
 static struct rcu_torture rcu_tortures[10 * RCU_TORTURE_PIPE_LEN];
 static DEFINE_SPINLOCK(rcu_torture_lock);
 static DEFINE_PER_CPU(long [RCU_TORTURE_PIPE_LEN + 1], rcu_torture_count) =
@@ -124,11 +124,11 @@ static atomic_t n_rcu_torture_alloc_fail;
 static atomic_t n_rcu_torture_free;
 static atomic_t n_rcu_torture_mberror;
 static atomic_t n_rcu_torture_error;
-static long n_rcu_torture_timers = 0;
+static long n_rcu_torture_timers;
 static struct list_head rcu_torture_removed;
 static cpumask_var_t shuffle_tmp_mask;
 
-static int stutter_pause_test = 0;
+static int stutter_pause_test;
 
 #if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
 #define RCUTORTURE_RUNNABLE_INIT 1
@@ -267,7 +267,8 @@ struct rcu_torture_ops {
 	int irq_capable;
 	char *name;
 };
-static struct rcu_torture_ops *cur_ops = NULL;
+
+static struct rcu_torture_ops *cur_ops;
 
 /*
  * Definitions for rcu torture testing.
@@ -342,8 +343,8 @@ static struct rcu_torture_ops rcu_ops = {
 	.sync		= synchronize_rcu,
 	.cb_barrier	= rcu_barrier,
 	.stats		= NULL,
-	.irq_capable 	= 1,
-	.name 		= "rcu"
+	.irq_capable	= 1,
+	.name		= "rcu"
 };
 
 static void rcu_sync_torture_deferred_free(struct rcu_torture *p)
@@ -641,7 +642,8 @@ rcu_torture_writer(void *arg)
 
 	do {
 		schedule_timeout_uninterruptible(1);
-		if ((rp = rcu_torture_alloc()) == NULL)
+		rp = rcu_torture_alloc();
+		if (rp == NULL)
 			continue;
 		rp->rtort_pipe_count = 0;
 		udelay(rcu_random(&rand) & 0x3ff);
@@ -1113,7 +1115,7 @@ rcu_torture_init(void)
 		printk(KERN_ALERT "rcutorture: invalid torture type: \"%s\"\n",
 		       torture_type);
 		mutex_unlock(&fullstop_mutex);
-		return (-EINVAL);
+		return -EINVAL;
 	}
 	if (cur_ops->init)
 		cur_ops->init(); /* no "goto unwind" prior to this point!!! */
@@ -1164,7 +1166,7 @@ rcu_torture_init(void)
 		goto unwind;
 	}
 	fakewriter_tasks = kzalloc(nfakewriters * sizeof(fakewriter_tasks[0]),
-	                           GFP_KERNEL);
+				   GFP_KERNEL);
 	if (fakewriter_tasks == NULL) {
 		VERBOSE_PRINTK_ERRSTRING("out of memory");
 		firsterr = -ENOMEM;
@@ -1173,7 +1175,7 @@ rcu_torture_init(void)
 	for (i = 0; i < nfakewriters; i++) {
 		VERBOSE_PRINTK_STRING("Creating rcu_torture_fakewriter task");
 		fakewriter_tasks[i] = kthread_run(rcu_torture_fakewriter, NULL,
-		                                  "rcu_torture_fakewriter");
+						  "rcu_torture_fakewriter");
 		if (IS_ERR(fakewriter_tasks[i])) {
 			firsterr = PTR_ERR(fakewriter_tasks[i]);
 			VERBOSE_PRINTK_ERRSTRING("Failed to create fakewriter");
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 1b32cdd1b2e2..52b06f6e158c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -25,7 +25,7 @@
  * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 	Documentation/RCU
+ *	Documentation/RCU
  */
 #include <linux/types.h>
 #include <linux/kernel.h>
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index bf8a6f9f134d..8e8287a983c2 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -142,7 +142,7 @@ struct rcu_data {
 	 */
 	struct rcu_head *nxtlist;
 	struct rcu_head **nxttail[RCU_NEXT_SIZE];
-	long		qlen; 	 	/* # of queued callbacks */
+	long		qlen;		/* # of queued callbacks */
 	long		blimit;		/* Upper limit on a processed batch */
 
 #ifdef CONFIG_NO_HZ
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 09b7325baad1..1cee04f627eb 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -370,9 +370,8 @@ static void rcu_preempt_check_callbacks(int cpu)
 		rcu_preempt_qs(cpu);
 		return;
 	}
-	if (per_cpu(rcu_preempt_data, cpu).qs_pending) {
+	if (per_cpu(rcu_preempt_data, cpu).qs_pending)
 		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
-	}
 }
 
 /*
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 0ea1bff69727..c89f5e9fd173 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -20,7 +20,7 @@
  * Papers:  http://www.rdrop.com/users/paulmck/RCU
  *
  * For detailed explanation of Read-Copy Update mechanism see -
- * 		Documentation/RCU
+ *		Documentation/RCU
  *
  */
 #include <linux/types.h>
-- 
cgit v1.2.3


From 393b2ad8c757ba3ccd2a99ca5bbcd6db4d3fa53d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 12 Sep 2009 07:52:47 +0200
Subject: perf: Add a timestamp to fork events

perf timechart needs to know when a process forked, in order to be
able to visualize properly when tasks start.

This patch adds a time field to the event structure, and fills it
in appropriately.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090912130341.51ad2de2@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  2 ++
 kernel/perf_counter.c        | 11 +++++++++--
 tools/perf/util/event.h      |  1 +
 3 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index c7375f97aa19..bd341007c4fc 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -336,6 +336,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_EXIT			= 4,
@@ -356,6 +357,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d013f4e89e9c..d5899b62b276 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3083,6 +3083,7 @@ struct perf_task_event {
 		u32				ppid;
 		u32				tid;
 		u32				ptid;
+		u64				time;
 	} event;
 };
 
@@ -3090,9 +3091,12 @@ static void perf_counter_task_output(struct perf_counter *counter,
 				     struct perf_task_event *task_event)
 {
 	struct perf_output_handle handle;
-	int size = task_event->event.header.size;
+	int size;
 	struct task_struct *task = task_event->task;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
+	int ret;
+
+	size  = task_event->event.header.size;
+	ret = perf_output_begin(&handle, counter, size, 0, 0);
 
 	if (ret)
 		return;
@@ -3103,7 +3107,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
 	task_event->event.tid = perf_counter_tid(counter, task);
 	task_event->event.ptid = perf_counter_tid(counter, current);
 
+	task_event->event.time = perf_clock();
+
 	perf_output_put(&handle, task_event->event);
+
 	perf_output_end(&handle);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 2495529cae7d..28a579f8fa8e 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -39,6 +39,7 @@ struct fork_event {
 	struct perf_event_header header;
 	u32 pid, ppid;
 	u32 tid, ptid;
+	u64 time;
 };
 
 struct lost_event {
-- 
cgit v1.2.3


From 6161352142d5fed4cd753b32e5ccde66e705b14e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 17 Sep 2009 16:11:28 +0200
Subject: tracing, perf: Convert the power tracer into an event tracer

This patch converts the existing power tracer into an event tracer,
so that power events (C states and frequency changes) can be
tracked via "perf".

This also removes the perl script that was used to demo the tracer;
its functionality is being replaced entirely with timechart.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090912130542.6d314860@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 Documentation/trace/power.txt              |  17 ---
 arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c |   7 +-
 arch/x86/kernel/process.c                  |  28 ++--
 include/trace/events/power.h               |  81 +++++++++++
 kernel/trace/Makefile                      |   2 +-
 kernel/trace/power-traces.c                |  20 +++
 kernel/trace/trace.h                       |   3 -
 kernel/trace/trace_entries.h               |  17 ---
 kernel/trace/trace_power.c                 | 218 -----------------------------
 scripts/tracing/power.pl                   | 108 --------------
 10 files changed, 113 insertions(+), 388 deletions(-)
 delete mode 100644 Documentation/trace/power.txt
 create mode 100644 include/trace/events/power.h
 create mode 100644 kernel/trace/power-traces.c
 delete mode 100644 kernel/trace/trace_power.c
 delete mode 100644 scripts/tracing/power.pl

(limited to 'include')

diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
deleted file mode 100644
index cd805e16dc27..000000000000
--- a/Documentation/trace/power.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-The power tracer collects detailed information about C-state and P-state
-transitions, instead of just looking at the high-level "average"
-information.
-
-There is a helper script found in scrips/tracing/power.pl in the kernel
-sources which can be used to parse this information and create a
-Scalable Vector Graphics (SVG) picture from the trace data.
-
-To use this tracer:
-
-	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
-	echo power > /sys/kernel/debug/tracing/current_tracer
-	echo 1 > /sys/kernel/debug/tracing/tracing_enabled
-	sleep 1
-	echo 0 > /sys/kernel/debug/tracing/tracing_enabled
-	cat /sys/kernel/debug/tracing/trace | \
-		perl scripts/tracing/power.pl > out.sv
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 4109679863c1..479cc8c418c1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
 
 static DEFINE_PER_CPU(struct aperfmperf, old_perf);
 
-DEFINE_TRACE(power_mark);
-
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
 
@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
 	int result = 0;
-	struct power_trace it;
 
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
-	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
 
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a4ba83..7b60e3906889 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
-		struct power_trace it;
-
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,7 @@ void default_idle(void)
 		else
 			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
-		trace_power_end(&it);
+		trace_power_end(0);
 	} else {
 		local_irq_enable();
 		/* loop is done by the caller */
@@ -372,9 +367,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -384,15 +377,14 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	struct power_trace it;
 	if (!need_resched()) {
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -402,7 +394,7 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
-		trace_power_end(&it);
+		trace_power_end(0);
 	} else
 		local_irq_enable();
 }
@@ -414,13 +406,11 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 /*
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
new file mode 100644
index 000000000000..ea6d579261ad
--- /dev/null
+++ b/include/trace/events/power.h
@@ -0,0 +1,81 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM power
+
+#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_POWER_H
+
+#include <linux/ktime.h>
+#include <linux/tracepoint.h>
+
+#ifndef _TRACE_POWER_ENUM_
+#define _TRACE_POWER_ENUM_
+enum {
+	POWER_NONE = 0,
+	POWER_CSTATE = 1,
+	POWER_PSTATE = 2,
+};
+#endif
+
+
+
+TRACE_EVENT(power_start,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state),
+
+	TP_STRUCT__entry(
+		__field(	u64,		type		)
+		__field(	u64,		state		)
+	),
+
+	TP_fast_assign(
+		__entry->type = type;
+		__entry->state = state;
+	),
+
+	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+);
+
+TRACE_EVENT(power_end,
+
+	TP_PROTO(int dummy),
+
+	TP_ARGS(dummy),
+
+	TP_STRUCT__entry(
+		__field(	u64,		dummy		)
+	),
+
+	TP_fast_assign(
+		__entry->dummy = 0xffff;
+	),
+
+	TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+
+);
+
+
+TRACE_EVENT(power_frequency,
+
+	TP_PROTO(unsigned int type, unsigned int state),
+
+	TP_ARGS(type, state),
+
+	TP_STRUCT__entry(
+		__field(	u64,		type		)
+		__field(	u64,		state		)
+	),
+
+	TP_fast_assign(
+		__entry->type = type;
+		__entry->state = state;
+	),
+
+	TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state)
+);
+
+#endif /* _TRACE_POWER_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 844164dca90a..26f03ac07c2b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
 obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
-obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
@@ -54,5 +53,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += power-traces.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/power-traces.c b/kernel/trace/power-traces.c
new file mode 100644
index 000000000000..e06c6e3d56a3
--- /dev/null
+++ b/kernel/trace/power-traces.c
@@ -0,0 +1,20 @@
+/*
+ * Power trace points
+ *
+ * Copyright (C) 2009 Arjan van de Ven <arjan@linux.intel.com>
+ */
+
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/power.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_start);
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(power_frequency);
+
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 86bcff94791a..405cb850b75d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,7 +11,6 @@
 #include <linux/ftrace.h>
 #include <trace/boot.h>
 #include <linux/kmemtrace.h>
-#include <trace/power.h>
 
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
@@ -37,7 +36,6 @@ enum trace_type {
 	TRACE_HW_BRANCHES,
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
-	TRACE_POWER,
 	TRACE_BLK,
 
 	__TRACE_LAST_TYPE,
@@ -207,7 +205,6 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
-		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index a431748ddd6e..ead3d724599d 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -330,23 +330,6 @@ FTRACE_ENTRY(hw_branch, hw_branch_entry,
 	F_printk("from: %llx to: %llx", __entry->from, __entry->to)
 );
 
-FTRACE_ENTRY(power, trace_power,
-
-	TRACE_POWER,
-
-	F_STRUCT(
-		__field_struct(	struct power_trace,	state_data	)
-		__field_desc(	s64,	state_data,	stamp		)
-		__field_desc(	s64,	state_data,	end		)
-		__field_desc(	int,	state_data,	type		)
-		__field_desc(	int,	state_data,	state		)
-	),
-
-	F_printk("%llx->%llx type:%u state:%u",
-		 __entry->stamp, __entry->end,
-		 __entry->type, __entry->state)
-);
-
 FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
 
 	TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
deleted file mode 100644
index fe1a00f1445a..000000000000
--- a/kernel/trace/trace_power.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * ring buffer based C-state tracer
- *
- * Arjan van de Ven <arjan@linux.intel.com>
- * Copyright (C) 2008 Intel Corporation
- *
- * Much is borrowed from trace_boot.c which is
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <trace/power.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *power_trace;
-static int __read_mostly trace_power_enabled;
-
-static void probe_power_start(struct power_trace *it, unsigned int type,
-				unsigned int level)
-{
-	if (!trace_power_enabled)
-		return;
-
-	memset(it, 0, sizeof(struct power_trace));
-	it->state = level;
-	it->type = type;
-	it->stamp = ktime_get();
-}
-
-
-static void probe_power_end(struct power_trace *it)
-{
-	struct ftrace_event_call *call = &event_power;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_power *entry;
-	struct trace_array_cpu *data;
-	struct trace_array *tr = power_trace;
-
-	if (!trace_power_enabled)
-		return;
-
-	buffer = tr->buffer;
-
-	preempt_disable();
-	it->end = ktime_get();
-	data = tr->data[smp_processor_id()];
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-static void probe_power_mark(struct power_trace *it, unsigned int type,
-				unsigned int level)
-{
-	struct ftrace_event_call *call = &event_power;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buffer;
-	struct trace_power *entry;
-	struct trace_array_cpu *data;
-	struct trace_array *tr = power_trace;
-
-	if (!trace_power_enabled)
-		return;
-
-	buffer = tr->buffer;
-
-	memset(it, 0, sizeof(struct power_trace));
-	it->state = level;
-	it->type = type;
-	it->stamp = ktime_get();
-	preempt_disable();
-	it->end = it->stamp;
-	data = tr->data[smp_processor_id()];
-
-	event = trace_buffer_lock_reserve(buffer, TRACE_POWER,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	entry->state_data = *it;
-	if (!filter_check_discard(call, entry, buffer, event))
-		trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
-	preempt_enable();
-}
-
-static int tracing_power_register(void)
-{
-	int ret;
-
-	ret = register_trace_power_start(probe_power_start);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_start\n");
-		return ret;
-	}
-	ret = register_trace_power_end(probe_power_end);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_end\n");
-		goto fail_start;
-	}
-	ret = register_trace_power_mark(probe_power_mark);
-	if (ret) {
-		pr_info("power trace: Couldn't activate tracepoint"
-			" probe to trace_power_mark\n");
-		goto fail_end;
-	}
-	return ret;
-fail_end:
-	unregister_trace_power_end(probe_power_end);
-fail_start:
-	unregister_trace_power_start(probe_power_start);
-	return ret;
-}
-
-static void start_power_trace(struct trace_array *tr)
-{
-	trace_power_enabled = 1;
-}
-
-static void stop_power_trace(struct trace_array *tr)
-{
-	trace_power_enabled = 0;
-}
-
-static void power_trace_reset(struct trace_array *tr)
-{
-	trace_power_enabled = 0;
-	unregister_trace_power_start(probe_power_start);
-	unregister_trace_power_end(probe_power_end);
-	unregister_trace_power_mark(probe_power_mark);
-}
-
-
-static int power_trace_init(struct trace_array *tr)
-{
-	power_trace = tr;
-
-	trace_power_enabled = 1;
-	tracing_power_register();
-
-	tracing_reset_online_cpus(tr);
-	return 0;
-}
-
-static enum print_line_t power_print_line(struct trace_iterator *iter)
-{
-	int ret = 0;
-	struct trace_entry *entry = iter->ent;
-	struct trace_power *field ;
-	struct power_trace *it;
-	struct trace_seq *s = &iter->seq;
-	struct timespec stamp;
-	struct timespec duration;
-
-	trace_assign_type(field, entry);
-	it = &field->state_data;
-	stamp = ktime_to_timespec(it->stamp);
-	duration = ktime_to_timespec(ktime_sub(it->end, it->stamp));
-
-	if (entry->type == TRACE_POWER) {
-		if (it->type == POWER_CSTATE)
-			ret = trace_seq_printf(s, "[%5ld.%09ld] CSTATE: Going to C%i on cpu %i for %ld.%09ld\n",
-					  stamp.tv_sec,
-					  stamp.tv_nsec,
-					  it->state, iter->cpu,
-					  duration.tv_sec,
-					  duration.tv_nsec);
-		if (it->type == POWER_PSTATE)
-			ret = trace_seq_printf(s, "[%5ld.%09ld] PSTATE: Going to P%i on cpu %i\n",
-					  stamp.tv_sec,
-					  stamp.tv_nsec,
-					  it->state, iter->cpu);
-		if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
-		return TRACE_TYPE_HANDLED;
-	}
-	return TRACE_TYPE_UNHANDLED;
-}
-
-static void power_print_header(struct seq_file *s)
-{
-	seq_puts(s, "#   TIMESTAMP      STATE  EVENT\n");
-	seq_puts(s, "#       |            |      |\n");
-}
-
-static struct tracer power_tracer __read_mostly =
-{
-	.name		= "power",
-	.init		= power_trace_init,
-	.start		= start_power_trace,
-	.stop		= stop_power_trace,
-	.reset		= power_trace_reset,
-	.print_line	= power_print_line,
-	.print_header	= power_print_header,
-};
-
-static int init_power_trace(void)
-{
-	return register_tracer(&power_tracer);
-}
-device_initcall(init_power_trace);
diff --git a/scripts/tracing/power.pl b/scripts/tracing/power.pl
deleted file mode 100644
index 4f729b3501e0..000000000000
--- a/scripts/tracing/power.pl
+++ /dev/null
@@ -1,108 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright 2008, Intel Corporation
-#
-# This file is part of the Linux kernel
-#
-# This program file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-# for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program in a file named COPYING; if not, write to the
-# Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor,
-# Boston, MA 02110-1301 USA
-#
-# Authors:
-# 	Arjan van de Ven <arjan@linux.intel.com>
-
-
-#
-# This script turns a cstate ftrace output into a SVG graphic that shows
-# historic C-state information
-#
-#
-# 	cat /sys/kernel/debug/tracing/trace | perl power.pl > out.svg
-#
-
-my @styles;
-my $base = 0;
-
-my @pstate_last;
-my @pstate_level;
-
-$styles[0] = "fill:rgb(0,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[1] = "fill:rgb(0,255,0);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[2] = "fill:rgb(255,0,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[3] = "fill:rgb(255,255,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[4] = "fill:rgb(255,0,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[5] = "fill:rgb(0,255,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[6] = "fill:rgb(0,128,255);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[7] = "fill:rgb(0,255,128);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-$styles[8] = "fill:rgb(0,25,20);fill-opacity:0.5;stroke-width:1;stroke:rgb(0,0,0)";
-
-
-print "<?xml version=\"1.0\" standalone=\"no\"?> \n";
-print "<svg width=\"10000\" height=\"100%\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n";
-
-my $scale = 30000.0;
-while (<>) {
-	my $line = $_;
-	if ($line =~ /([0-9\.]+)\] CSTATE: Going to C([0-9]) on cpu ([0-9]+) for ([0-9\.]+)/) {
-		if ($base == 0) {
-			$base = $1;
-		}
-		my $time = $1 - $base;
-		$time = $time * $scale;
-		my $C = $2;
-		my $cpu = $3;
-		my $y = 400 * $cpu;
-		my $duration = $4 * $scale;
-		my $msec = int($4 * 100000)/100.0;
-		my $height = $C * 20;
-		$style = $styles[$C];
-
-		$y = $y + 140 - $height;
-
-		$x2 = $time + 4;
-		$y2 = $y + 4;
-
-
-		print "<rect x=\"$time\" width=\"$duration\" y=\"$y\" height=\"$height\" style=\"$style\"/>\n";
-		print "<text transform=\"translate($x2,$y2) rotate(90)\">C$C $msec</text>\n";
-	}
-	if ($line =~ /([0-9\.]+)\] PSTATE: Going to P([0-9]) on cpu ([0-9]+)/) {
-		my $time = $1 - $base;
-		my $state = $2;
-		my $cpu = $3;
-
-		if (defined($pstate_last[$cpu])) {
-			my $from = $pstate_last[$cpu];
-			my $oldstate = $pstate_state[$cpu];
-			my $duration = ($time-$from) * $scale;
-
-			$from = $from * $scale;
-			my $to = $from + $duration;
-			my $height = 140 - ($oldstate * (140/8));
-
-			my $y = 400 * $cpu + 200 + $height;
-			my $y2 = $y+4;
-			my $style = $styles[8];
-
-			print "<rect x=\"$from\" y=\"$y\" width=\"$duration\" height=\"5\" style=\"$style\"/>\n";
-			print "<text transform=\"translate($from,$y2)\">P$oldstate (cpu $cpu)</text>\n";
-		};
-
-		$pstate_last[$cpu] = $time;
-		$pstate_state[$cpu] = $state;
-	}
-}
-
-
-print "</svg>\n";
-- 
cgit v1.2.3


From 778dbcc1ebea6f9a560020110987449bf4607e5f Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@igel.co.jp>
Date: Fri, 18 Sep 2009 12:51:44 -0700
Subject: mtd: onenand: make onenand/generic.c more generic

Remove the ARM dependency from the generic "onenand" platform device
driver.  This change makes the driver useful for other architectures as
well.  Needed for the SuperH kfr2r09 board.

Apart from the obvious Kconfig bits, the most important change is the move
away from ARM specific includes and platform data.  Together with this
change the only in-tree board code gets an update, and the driver name is
also changed gracefully break potential out of tree drivers.

The driver is also updated to allow NULL as platform data together with a
few changes to make use of resource_size() and dev_name().

Signed-off-by: Magnus Damm <damm@igel.co.jp>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Kyungmin Park <kmpark@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 arch/arm/mach-omap2/board-apollon.c |  4 ++--
 drivers/mtd/onenand/Kconfig         |  1 -
 drivers/mtd/onenand/generic.c       | 24 ++++++++++++++----------
 include/linux/mtd/onenand.h         |  8 ++++++++
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-omap2/board-apollon.c b/arch/arm/mach-omap2/board-apollon.c
index dcfc20d03894..b4d4ef6d90b9 100644
--- a/arch/arm/mach-omap2/board-apollon.c
+++ b/arch/arm/mach-omap2/board-apollon.c
@@ -87,7 +87,7 @@ static struct mtd_partition apollon_partitions[] = {
 	},
 };
 
-static struct flash_platform_data apollon_flash_data = {
+static struct onenand_platform_data apollon_flash_data = {
 	.parts		= apollon_partitions,
 	.nr_parts	= ARRAY_SIZE(apollon_partitions),
 };
@@ -99,7 +99,7 @@ static struct resource apollon_flash_resource[] = {
 };
 
 static struct platform_device apollon_onenand_device = {
-	.name		= "onenand",
+	.name		= "onenand-flash",
 	.id		= -1,
 	.dev		= {
 		.platform_data	= &apollon_flash_data,
diff --git a/drivers/mtd/onenand/Kconfig b/drivers/mtd/onenand/Kconfig
index 3a094d1bf8cc..a38f580c2bb3 100644
--- a/drivers/mtd/onenand/Kconfig
+++ b/drivers/mtd/onenand/Kconfig
@@ -24,7 +24,6 @@ config MTD_ONENAND_VERIFY_WRITE
 
 config MTD_ONENAND_GENERIC
 	tristate "OneNAND Flash device via platform device driver"
-	depends on ARM
 	help
 	  Support for OneNAND flash via platform device driver.
 
diff --git a/drivers/mtd/onenand/generic.c b/drivers/mtd/onenand/generic.c
index 3a496c33fb52..e78914938c5c 100644
--- a/drivers/mtd/onenand/generic.c
+++ b/drivers/mtd/onenand/generic.c
@@ -19,12 +19,16 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/onenand.h>
 #include <linux/mtd/partitions.h>
-
 #include <asm/io.h>
-#include <asm/mach/flash.h>
-
-#define DRIVER_NAME	"onenand"
 
+/*
+ * Note: Driver name and platform data format have been updated!
+ *
+ * This version of the driver is named "onenand-flash" and takes struct
+ * onenand_platform_data as platform data. The old ARM-specific version
+ * with the name "onenand" used to take struct flash_platform_data.
+ */
+#define DRIVER_NAME	"onenand-flash"
 
 #ifdef CONFIG_MTD_PARTITIONS
 static const char *part_probes[] = { "cmdlinepart", NULL,  };
@@ -39,16 +43,16 @@ struct onenand_info {
 static int __devinit generic_onenand_probe(struct platform_device *pdev)
 {
 	struct onenand_info *info;
-	struct flash_platform_data *pdata = pdev->dev.platform_data;
+	struct onenand_platform_data *pdata = pdev->dev.platform_data;
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 	int err;
 
 	info = kzalloc(sizeof(struct onenand_info), GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;
 
-	if (!request_mem_region(res->start, size, pdev->dev.driver->name)) {
+	if (!request_mem_region(res->start, size, dev_name(&pdev->dev))) {
 		err = -EBUSY;
 		goto out_free_info;
 	}
@@ -59,7 +63,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 		goto out_release_mem_region;
 	}
 
-	info->onenand.mmcontrol = pdata->mmcontrol;
+	info->onenand.mmcontrol = pdata ? pdata->mmcontrol : 0;
 	info->onenand.irq = platform_get_irq(pdev, 0);
 
 	info->mtd.name = dev_name(&pdev->dev);
@@ -75,7 +79,7 @@ static int __devinit generic_onenand_probe(struct platform_device *pdev)
 	err = parse_mtd_partitions(&info->mtd, part_probes, &info->parts, 0);
 	if (err > 0)
 		add_mtd_partitions(&info->mtd, info->parts, err);
-	else if (err <= 0 && pdata->parts)
+	else if (err <= 0 && pdata && pdata->parts)
 		add_mtd_partitions(&info->mtd, pdata->parts, pdata->nr_parts);
 	else
 #endif
@@ -99,7 +103,7 @@ static int __devexit generic_onenand_remove(struct platform_device *pdev)
 {
 	struct onenand_info *info = platform_get_drvdata(pdev);
 	struct resource *res = pdev->resource;
-	unsigned long size = res->end - res->start + 1;
+	unsigned long size = resource_size(res);
 
 	platform_set_drvdata(pdev, NULL);
 
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 8ed873374381..4e49f3350678 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -214,4 +214,12 @@ unsigned onenand_block(struct onenand_chip *this, loff_t addr);
 loff_t onenand_addr(struct onenand_chip *this, int block);
 int flexonenand_region(struct mtd_info *mtd, loff_t addr);
 
+struct mtd_partition;
+
+struct onenand_platform_data {
+	void		(*mmcontrol)(struct mtd_info *mtd, int sync_read);
+	struct mtd_partition *parts;
+	unsigned int	nr_parts;
+};
+
 #endif	/* __LINUX_MTD_ONENAND_H */
-- 
cgit v1.2.3


From 46a8cf2df2232c0051f29716ff8a166ebeb08daf Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:46 -0700
Subject: mtd: nand: add "page" parameter to all read_page/read_page_raw APIs

This patch adds a new "page" parameter to all NAND read_page/read_page_raw
APIs.  The read_page API for the new mode ECC_HW_OOB_FIRST requires the
page information to send the READOOB command and read the OOB area before
the data area.

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/atmel_nand.c    |  2 +-
 drivers/mtd/nand/cafe_nand.c     |  2 +-
 drivers/mtd/nand/fsl_elbc_nand.c |  3 ++-
 drivers/mtd/nand/nand_base.c     | 18 ++++++++++--------
 drivers/mtd/nand/sh_flctl.c      |  2 +-
 include/linux/mtd/nand.h         |  4 ++--
 6 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 20c828ba9405..f8e9975c86e5 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -218,7 +218,7 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
  * buf:        buffer to store read data
  */
 static int atmel_nand_read_page(struct mtd_info *mtd,
-		struct nand_chip *chip, uint8_t *buf)
+		struct nand_chip *chip, uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 29acd06b1c39..a70f40e161d7 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -381,7 +381,7 @@ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       uint8_t *buf)
+			       uint8_t *buf, int page)
 {
 	struct cafe_priv *cafe = mtd->priv;
 
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 1f6eb2578717..ddd37d2554ed 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -739,7 +739,8 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 
 static int fsl_elbc_read_page(struct mtd_info *mtd,
                               struct nand_chip *chip,
-                              uint8_t *buf)
+			      uint8_t *buf,
+			      int page)
 {
 	fsl_elbc_read_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 4c5e8a74e1b2..17bbd5062027 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -765,7 +765,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
  * Not for syndrome calculating ecc controllers, which use a special oob layout
  */
 static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -781,7 +781,7 @@ static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * We need a special oob layout and handling even when OOB isn't used.
  */
 static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf)
+			      uint8_t *buf, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -820,7 +820,7 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd, struct nand_chip *c
  * @buf:	buffer to store read data
  */
 static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -830,7 +830,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	uint8_t *ecc_code = chip->buffers->ecccode;
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
 
-	chip->ecc.read_page_raw(mtd, chip, buf);
+	chip->ecc.read_page_raw(mtd, chip, buf, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
 		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
@@ -943,7 +943,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip, uint3
  * Not for syndrome calculating ecc controllers which need a special oob layout
  */
 static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -988,7 +988,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * we need a special oob layout and handling.
  */
 static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				   uint8_t *buf)
+				   uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1130,11 +1130,13 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 
 			/* Now read the page into the buffer */
 			if (unlikely(ops->mode == MTD_OOB_RAW))
-				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page_raw(mtd, chip,
+							      bufpoi, page);
 			else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob)
 				ret = chip->ecc.read_subpage(mtd, chip, col, bytes, bufpoi);
 			else
-				ret = chip->ecc.read_page(mtd, chip, bufpoi);
+				ret = chip->ecc.read_page(mtd, chip, bufpoi,
+							  page);
 			if (ret < 0)
 				break;
 
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 2bc896623e2d..c5df28596a4a 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -329,7 +329,7 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 }
 
 static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf)
+				uint8_t *buf, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 4030ebada49e..686f3701f2f4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -271,13 +271,13 @@ struct nand_ecc_ctrl {
 					   uint8_t *calc_ecc);
 	int			(*read_page_raw)(struct mtd_info *mtd,
 						 struct nand_chip *chip,
-						 uint8_t *buf);
+						 uint8_t *buf, int page);
 	void			(*write_page_raw)(struct mtd_info *mtd,
 						  struct nand_chip *chip,
 						  const uint8_t *buf);
 	int			(*read_page)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
-					     uint8_t *buf);
+					     uint8_t *buf, int page);
 	int			(*read_subpage)(struct mtd_info *mtd,
 					     struct nand_chip *chip,
 					     uint32_t offs, uint32_t len,
-- 
cgit v1.2.3


From 6e0cb135b3f3713b95ea41a11155e83a8c70f5f8 Mon Sep 17 00:00:00 2001
From: Sneha Narnakaje <nsnehaprabha@ti.com>
Date: Fri, 18 Sep 2009 12:51:47 -0700
Subject: mtd: nand: add new ECC mode - ECC_HW_OOB_FIRST

This patch adds the new mode NAND_ECC_HW_OOB_FIRST in the nand code to
support 4-bit ECC on TI DaVinci devices with large page (up to 2KiB) NAND
chips.  This ECC mode is similar to NAND_ECC_HW, with the exception of
read_page API that first reads the OOB area, reads the data in chunks,
feeds the ECC from OOB area to the ECC hw engine and perform any
correction on the data as per the ECC status reported by the engine.

"ECC_HW_OOB_FIRST" name suggested by Thomas Gleixner

Reviewed-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Sneha Narnakaje <nsnehaprabha@ti.com>
Signed-off-by: Sandeep Paulraj <s-paulraj@ti.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 61 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/mtd/nand.h     |  1 +
 2 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 17bbd5062027..22113865438b 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -978,6 +978,54 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
+/**
+ * nand_read_page_hwecc_oob_first - [REPLACABLE] hw ecc, read oob first
+ * @mtd:	mtd info structure
+ * @chip:	nand chip info structure
+ * @buf:	buffer to store read data
+ *
+ * Hardware ECC for large page chips, require OOB to be read first.
+ * For this ECC mode, the write_page method is re-used from ECC_HW.
+ * These methods read/write ECC from the OOB area, unlike the
+ * ECC_HW_SYNDROME support with multiple ECC steps, follows the
+ * "infix ECC" scheme and reads/writes ECC from the data area, by
+ * overwriting the NAND manufacturer bad block markings.
+ */
+static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
+	struct nand_chip *chip, uint8_t *buf, int page)
+{
+	int i, eccsize = chip->ecc.size;
+	int eccbytes = chip->ecc.bytes;
+	int eccsteps = chip->ecc.steps;
+	uint8_t *p = buf;
+	uint8_t *ecc_code = chip->buffers->ecccode;
+	uint32_t *eccpos = chip->ecc.layout->eccpos;
+	uint8_t *ecc_calc = chip->buffers->ecccalc;
+
+	/* Read the OOB area first */
+	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
+	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->cmdfunc(mtd, NAND_CMD_READ0, 0, page);
+
+	for (i = 0; i < chip->ecc.total; i++)
+		ecc_code[i] = chip->oob_poi[eccpos[i]];
+
+	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
+		int stat;
+
+		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->read_buf(mtd, p, eccsize);
+		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+
+		stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL);
+		if (stat < 0)
+			mtd->ecc_stats.failed++;
+		else
+			mtd->ecc_stats.corrected += stat;
+	}
+	return 0;
+}
+
 /**
  * nand_read_page_syndrome - [REPLACABLE] hardware ecc syndrom based page read
  * @mtd:	mtd info structure
@@ -2673,6 +2721,17 @@ int nand_scan_tail(struct mtd_info *mtd)
 	 */
 
 	switch (chip->ecc.mode) {
+	case NAND_ECC_HW_OOB_FIRST:
+		/* Similar to NAND_ECC_HW, but a separate read_page handle */
+		if (!chip->ecc.calculate || !chip->ecc.correct ||
+		     !chip->ecc.hwctl) {
+			printk(KERN_WARNING "No ECC functions supplied; "
+			       "Hardware ECC not possible\n");
+			BUG();
+		}
+		if (!chip->ecc.read_page)
+			chip->ecc.read_page = nand_read_page_hwecc_oob_first;
+
 	case NAND_ECC_HW:
 		/* Use standard hwecc read page function ? */
 		if (!chip->ecc.read_page)
@@ -2695,7 +2754,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 		     chip->ecc.read_page == nand_read_page_hwecc ||
 		     !chip->ecc.write_page ||
 		     chip->ecc.write_page == nand_write_page_hwecc)) {
-			printk(KERN_WARNING "No ECC functions supplied, "
+			printk(KERN_WARNING "No ECC functions supplied; "
 			       "Hardware ECC not possible\n");
 			BUG();
 		}
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 686f3701f2f4..7a232a9bdd62 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -121,6 +121,7 @@ typedef enum {
 	NAND_ECC_SOFT,
 	NAND_ECC_HW,
 	NAND_ECC_HW_SYNDROME,
+	NAND_ECC_HW_OOB_FIRST,
 } nand_ecc_modes_t;
 
 /*
-- 
cgit v1.2.3


From e454cea20bdcff10ee698d11b8882662a0153a47 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Fri, 18 Sep 2009 23:01:12 +0200
Subject: Driver-Core: extend devnode callbacks to provide permissions

This allows subsytems to provide devtmpfs with non-default permissions
for the device node. Instead of the default mode of 0600, null, zero,
random, urandom, full, tty, ptmx now have a mode of 0666, which allows
non-privileged processes to access standard device nodes in case no
other userspace process applies the expected permissions.

This also fixes a wrong assignment in pktcdvd and a checkpatch.pl complain.

Signed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/cpuid.c             |  4 ++--
 arch/x86/kernel/microcode_core.c    |  2 +-
 arch/x86/kernel/msr.c               |  4 ++--
 block/bsg.c                         |  4 ++--
 block/genhd.c                       |  8 ++++----
 drivers/base/core.c                 | 19 ++++++++++++-------
 drivers/base/devtmpfs.c             | 24 ++++++++++++++++--------
 drivers/block/aoe/aoechr.c          |  4 ++--
 drivers/block/pktcdvd.c             |  6 +++---
 drivers/char/hw_random/core.c       |  2 +-
 drivers/char/mem.c                  | 29 +++++++++++++++++++----------
 drivers/char/misc.c                 | 10 ++++++----
 drivers/char/raw.c                  |  4 ++--
 drivers/char/tty_io.c               | 11 +++++++++++
 drivers/gpu/drm/drm_sysfs.c         |  4 ++--
 drivers/hid/usbhid/hiddev.c         |  4 ++--
 drivers/input/input.c               |  4 ++--
 drivers/md/dm-ioctl.c               |  2 +-
 drivers/media/dvb/dvb-core/dvbdev.c |  4 ++--
 drivers/net/tun.c                   |  2 +-
 drivers/usb/class/usblp.c           |  4 ++--
 drivers/usb/core/file.c             |  8 ++++----
 drivers/usb/core/usb.c              |  4 ++--
 drivers/usb/misc/iowarrior.c        |  4 ++--
 drivers/usb/misc/legousbtower.c     |  4 ++--
 include/linux/device.h              |  7 ++++---
 include/linux/genhd.h               |  2 +-
 include/linux/miscdevice.h          |  3 ++-
 include/linux/usb.h                 |  4 ++--
 sound/sound_core.c                  |  4 ++--
 30 files changed, 116 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index b07af8861244..6a52d4b36a30 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
 	.notifier_call = cpuid_class_cpu_callback,
 };
 
-static char *cpuid_nodename(struct device *dev)
+static char *cpuid_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
 }
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
 		err = PTR_ERR(cpuid_class);
 		goto out_chrdev;
 	}
-	cpuid_class->nodename = cpuid_nodename;
+	cpuid_class->devnode = cpuid_devnode;
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
 		if (err != 0)
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9371448290ac..0db7969b0dde 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
-	.devnode		= "cpu/microcode",
+	.nodename		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7dd950094178..6a3cefc7dda1 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
 	.notifier_call = msr_class_cpu_callback,
 };
 
-static char *msr_nodename(struct device *dev)
+static char *msr_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
 }
@@ -262,7 +262,7 @@ static int __init msr_init(void)
 		err = PTR_ERR(msr_class);
 		goto out_chrdev;
 	}
-	msr_class->nodename = msr_nodename;
+	msr_class->devnode = msr_devnode;
 	for_each_online_cpu(i) {
 		err = msr_device_create(i);
 		if (err != 0)
diff --git a/block/bsg.c b/block/bsg.c
index 5f184bb3ff9e..0676301f16d0 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -1062,7 +1062,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue);
 
 static struct cdev bsg_cdev;
 
-static char *bsg_nodename(struct device *dev)
+static char *bsg_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
 }
@@ -1087,7 +1087,7 @@ static int __init bsg_init(void)
 		ret = PTR_ERR(bsg_class);
 		goto destroy_kmemcache;
 	}
-	bsg_class->nodename = bsg_nodename;
+	bsg_class->devnode = bsg_devnode;
 
 	ret = alloc_chrdev_region(&devid, 0, BSG_MAX_DEVS, "bsg");
 	if (ret)
diff --git a/block/genhd.c b/block/genhd.c
index 2ad91ddad8e2..517e4332cb37 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -998,12 +998,12 @@ struct class block_class = {
 	.name		= "block",
 };
 
-static char *block_nodename(struct device *dev)
+static char *block_devnode(struct device *dev, mode_t *mode)
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	if (disk->nodename)
-		return disk->nodename(disk);
+	if (disk->devnode)
+		return disk->devnode(disk, mode);
 	return NULL;
 }
 
@@ -1011,7 +1011,7 @@ static struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
-	.nodename	= block_nodename,
+	.devnode	= block_devnode,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 390e664ec1c7..6bee6af8d8e1 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -166,13 +166,16 @@ static int dev_uevent(struct kset *kset, struct kobject *kobj,
 	if (MAJOR(dev->devt)) {
 		const char *tmp;
 		const char *name;
+		mode_t mode = 0;
 
 		add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt));
 		add_uevent_var(env, "MINOR=%u", MINOR(dev->devt));
-		name = device_get_nodename(dev, &tmp);
+		name = device_get_devnode(dev, &mode, &tmp);
 		if (name) {
 			add_uevent_var(env, "DEVNAME=%s", name);
 			kfree(tmp);
+			if (mode)
+				add_uevent_var(env, "DEVMODE=%#o", mode & 0777);
 		}
 	}
 
@@ -1148,8 +1151,9 @@ static struct device *next_device(struct klist_iter *i)
 }
 
 /**
- * device_get_nodename - path of device node file
+ * device_get_devnode - path of device node file
  * @dev: device
+ * @mode: returned file access mode
  * @tmp: possibly allocated string
  *
  * Return the relative path of a possible device node.
@@ -1157,21 +1161,22 @@ static struct device *next_device(struct klist_iter *i)
  * a name. This memory is returned in tmp and needs to be
  * freed by the caller.
  */
-const char *device_get_nodename(struct device *dev, const char **tmp)
+const char *device_get_devnode(struct device *dev,
+			       mode_t *mode, const char **tmp)
 {
 	char *s;
 
 	*tmp = NULL;
 
 	/* the device type may provide a specific name */
-	if (dev->type && dev->type->nodename)
-		*tmp = dev->type->nodename(dev);
+	if (dev->type && dev->type->devnode)
+		*tmp = dev->type->devnode(dev, mode);
 	if (*tmp)
 		return *tmp;
 
 	/* the class may provide a specific name */
-	if (dev->class && dev->class->nodename)
-		*tmp = dev->class->nodename(dev);
+	if (dev->class && dev->class->devnode)
+		*tmp = dev->class->devnode(dev, mode);
 	if (*tmp)
 		return *tmp;
 
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index fd488ad4263a..a1cb5afe6801 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -6,9 +6,10 @@
  * During bootup, before any driver core device is registered,
  * devtmpfs, a tmpfs-based filesystem is created. Every driver-core
  * device which requests a device node, will add a node in this
- * filesystem. The node is named after the the name of the device,
- * or the susbsytem can provide a custom name. All devices are
- * owned by root and have a mode of 0600.
+ * filesystem.
+ * By default, all devices are named after the the name of the
+ * device, owned by root and have a default mode of 0600. Subsystems
+ * can overwrite the default setting if needed.
  */
 
 #include <linux/kernel.h>
@@ -20,6 +21,7 @@
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
 #include <linux/cred.h>
+#include <linux/sched.h>
 #include <linux/init_task.h>
 
 static struct vfsmount *dev_mnt;
@@ -134,7 +136,7 @@ int devtmpfs_create_node(struct device *dev)
 	const char *tmp = NULL;
 	const char *nodename;
 	const struct cred *curr_cred;
-	mode_t mode;
+	mode_t mode = 0;
 	struct nameidata nd;
 	struct dentry *dentry;
 	int err;
@@ -142,14 +144,16 @@ int devtmpfs_create_node(struct device *dev)
 	if (!dev_mnt)
 		return 0;
 
-	nodename = device_get_nodename(dev, &tmp);
+	nodename = device_get_devnode(dev, &mode, &tmp);
 	if (!nodename)
 		return -ENOMEM;
 
+	if (mode == 0)
+		mode = 0600;
 	if (is_blockdev(dev))
-		mode = S_IFBLK|0600;
+		mode |= S_IFBLK;
 	else
-		mode = S_IFCHR|0600;
+		mode |= S_IFCHR;
 
 	curr_cred = override_creds(&init_cred);
 	err = vfs_path_lookup(dev_mnt->mnt_root, dev_mnt,
@@ -165,8 +169,12 @@ int devtmpfs_create_node(struct device *dev)
 
 	dentry = lookup_create(&nd, 0);
 	if (!IS_ERR(dentry)) {
+		int umask;
+
+		umask = sys_umask(0000);
 		err = vfs_mknod(nd.path.dentry->d_inode,
 				dentry, mode, dev->devt);
+		sys_umask(umask);
 		/* mark as kernel created inode */
 		if (!err)
 			dentry->d_inode->i_private = &dev_mnt;
@@ -271,7 +279,7 @@ int devtmpfs_delete_node(struct device *dev)
 	if (!dev_mnt)
 		return 0;
 
-	nodename = device_get_nodename(dev, &tmp);
+	nodename = device_get_devnode(dev, NULL, &tmp);
 	if (!nodename)
 		return -ENOMEM;
 
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 19888354188f..62141ec09a22 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -266,7 +266,7 @@ static const struct file_operations aoe_fops = {
 	.owner = THIS_MODULE,
 };
 
-static char *aoe_nodename(struct device *dev)
+static char *aoe_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
 }
@@ -288,7 +288,7 @@ aoechr_init(void)
 		unregister_chrdev(AOE_MAJOR, "aoechr");
 		return PTR_ERR(aoe_class);
 	}
-	aoe_class->nodename = aoe_nodename;
+	aoe_class->devnode = aoe_devnode;
 
 	for (i = 0; i < ARRAY_SIZE(chardevs); ++i)
 		device_create(aoe_class, NULL,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 95f11cdef203..fd5bb8ad59a9 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2857,7 +2857,7 @@ static struct block_device_operations pktcdvd_ops = {
 	.media_changed =	pkt_media_changed,
 };
 
-static char *pktcdvd_nodename(struct gendisk *gd)
+static char *pktcdvd_devnode(struct gendisk *gd, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "pktcdvd/%s", gd->disk_name);
 }
@@ -2914,7 +2914,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
 	disk->fops = &pktcdvd_ops;
 	disk->flags = GENHD_FL_REMOVABLE;
 	strcpy(disk->disk_name, pd->name);
-	disk->nodename = pktcdvd_nodename;
+	disk->devnode = pktcdvd_devnode;
 	disk->private_data = pd;
 	disk->queue = blk_alloc_queue(GFP_KERNEL);
 	if (!disk->queue)
@@ -3070,7 +3070,7 @@ static const struct file_operations pkt_ctl_fops = {
 static struct miscdevice pkt_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DRIVER_NAME,
-	.name  		= "pktcdvd/control",
+	.nodename	= "pktcdvd/control",
 	.fops  		= &pkt_ctl_fops
 };
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index fc93e2fc7c71..1573aebd54b5 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -153,7 +153,7 @@ static const struct file_operations rng_chrdev_ops = {
 static struct miscdevice rng_miscdev = {
 	.minor		= RNG_MISCDEV_MINOR,
 	.name		= RNG_MODULE_NAME,
-	.devnode	= "hwrng",
+	.nodename	= "hwrng",
 	.fops		= &rng_chrdev_ops,
 };
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 0491cdf63f2a..0aede1d6a9ea 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -866,24 +866,25 @@ static const struct file_operations kmsg_fops = {
 
 static const struct memdev {
 	const char *name;
+	mode_t mode;
 	const struct file_operations *fops;
 	struct backing_dev_info *dev_info;
 } devlist[] = {
-	[ 1] = { "mem", &mem_fops, &directly_mappable_cdev_bdi },
+	 [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
 #ifdef CONFIG_DEVKMEM
-	[ 2] = { "kmem", &kmem_fops, &directly_mappable_cdev_bdi },
+	 [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
 #endif
-	[ 3] = {"null", &null_fops, NULL },
+	 [3] = { "null", 0666, &null_fops, NULL },
 #ifdef CONFIG_DEVPORT
-	[ 4] = { "port", &port_fops, NULL },
+	 [4] = { "port", 0, &port_fops, NULL },
 #endif
-	[ 5] = { "zero", &zero_fops, &zero_bdi },
-	[ 7] = { "full", &full_fops, NULL },
-	[ 8] = { "random", &random_fops, NULL },
-	[ 9] = { "urandom", &urandom_fops, NULL },
-	[11] = { "kmsg", &kmsg_fops, NULL },
+	 [5] = { "zero", 0666, &zero_fops, &zero_bdi },
+	 [7] = { "full", 0666, &full_fops, NULL },
+	 [8] = { "random", 0666, &random_fops, NULL },
+	 [9] = { "urandom", 0666, &urandom_fops, NULL },
+	[11] = { "kmsg", 0, &kmsg_fops, NULL },
 #ifdef CONFIG_CRASH_DUMP
-	[12] = { "oldmem", &oldmem_fops, NULL },
+	[12] = { "oldmem", 0, &oldmem_fops, NULL },
 #endif
 };
 
@@ -920,6 +921,13 @@ static const struct file_operations memory_fops = {
 	.open		= memory_open,
 };
 
+static char *mem_devnode(struct device *dev, mode_t *mode)
+{
+	if (mode && devlist[MINOR(dev->devt)].mode)
+		*mode = devlist[MINOR(dev->devt)].mode;
+	return NULL;
+}
+
 static struct class *mem_class;
 
 static int __init chr_dev_init(void)
@@ -935,6 +943,7 @@ static int __init chr_dev_init(void)
 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
 
 	mem_class = class_create(THIS_MODULE, "mem");
+	mem_class->devnode = mem_devnode;
 	for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
 		if (!devlist[minor].name)
 			continue;
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 62c99fa59e2b..1ee27cc23426 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -263,12 +263,14 @@ int misc_deregister(struct miscdevice *misc)
 EXPORT_SYMBOL(misc_register);
 EXPORT_SYMBOL(misc_deregister);
 
-static char *misc_nodename(struct device *dev)
+static char *misc_devnode(struct device *dev, mode_t *mode)
 {
 	struct miscdevice *c = dev_get_drvdata(dev);
 
-	if (c->devnode)
-		return kstrdup(c->devnode, GFP_KERNEL);
+	if (mode && c->mode)
+		*mode = c->mode;
+	if (c->nodename)
+		return kstrdup(c->nodename, GFP_KERNEL);
 	return NULL;
 }
 
@@ -287,7 +289,7 @@ static int __init misc_init(void)
 	err = -EIO;
 	if (register_chrdev(MISC_MAJOR,"misc",&misc_fops))
 		goto fail_printk;
-	misc_class->nodename = misc_nodename;
+	misc_class->devnode = misc_devnode;
 	return 0;
 
 fail_printk:
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 40268db02e22..64acd05f71c8 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -261,7 +261,7 @@ static const struct file_operations raw_ctl_fops = {
 
 static struct cdev raw_cdev;
 
-static char *raw_nodename(struct device *dev)
+static char *raw_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "raw/%s", dev_name(dev));
 }
@@ -289,7 +289,7 @@ static int __init raw_init(void)
 		ret = PTR_ERR(raw_class);
 		goto error_region;
 	}
-	raw_class->nodename = raw_nodename;
+	raw_class->devnode = raw_devnode;
 	device_create(raw_class, NULL, MKDEV(RAW_MAJOR, 0), NULL, "rawctl");
 
 	return 0;
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index a3afa0c387cd..c70d9dabefae 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -3056,11 +3056,22 @@ void __init console_init(void)
 	}
 }
 
+static char *tty_devnode(struct device *dev, mode_t *mode)
+{
+	if (!mode)
+		return NULL;
+	if (dev->devt == MKDEV(TTYAUX_MAJOR, 0) ||
+	    dev->devt == MKDEV(TTYAUX_MAJOR, 2))
+		*mode = 0666;
+	return NULL;
+}
+
 static int __init tty_class_init(void)
 {
 	tty_class = class_create(THIS_MODULE, "tty");
 	if (IS_ERR(tty_class))
 		return PTR_ERR(tty_class);
+	tty_class->devnode = tty_devnode;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index f7a615b80c70..5301f226cb1c 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -76,7 +76,7 @@ static ssize_t version_show(struct class *dev, char *buf)
 		       CORE_MINOR, CORE_PATCHLEVEL, CORE_DATE);
 }
 
-static char *drm_nodename(struct device *dev)
+static char *drm_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
 }
@@ -112,7 +112,7 @@ struct class *drm_sysfs_create(struct module *owner, char *name)
 	if (err)
 		goto err_out_class;
 
-	class->nodename = drm_nodename;
+	class->devnode = drm_devnode;
 
 	return class;
 
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 4d1dc0cf1401..8b6ee247bfe4 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -852,14 +852,14 @@ static const struct file_operations hiddev_fops = {
 #endif
 };
 
-static char *hiddev_nodename(struct device *dev)
+static char *hiddev_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
 
 static struct usb_class_driver hiddev_class = {
 	.name =		"hiddev%d",
-	.nodename =	hiddev_nodename,
+	.devnode =	hiddev_devnode,
 	.fops =		&hiddev_fops,
 	.minor_base =	HIDDEV_MINOR_BASE,
 };
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 851791d955f3..556539d617a4 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1265,14 +1265,14 @@ static struct device_type input_dev_type = {
 	.uevent		= input_dev_uevent,
 };
 
-static char *input_nodename(struct device *dev)
+static char *input_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
 }
 
 struct class input_class = {
 	.name		= "input",
-	.nodename	= input_nodename,
+	.devnode	= input_devnode,
 };
 EXPORT_SYMBOL_GPL(input_class);
 
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 7f77f18fcafa..a67942931582 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1532,7 +1532,7 @@ static const struct file_operations _ctl_fops = {
 static struct miscdevice _dm_misc = {
 	.minor 		= MISC_DYNAMIC_MINOR,
 	.name  		= DM_NAME,
-	.devnode	= "mapper/control",
+	.nodename	= "mapper/control",
 	.fops  		= &_ctl_fops
 };
 
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 479dd05762a5..94159b90f733 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -447,7 +447,7 @@ static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static char *dvb_nodename(struct device *dev)
+static char *dvb_devnode(struct device *dev, mode_t *mode)
 {
 	struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
@@ -478,7 +478,7 @@ static int __init init_dvbdev(void)
 		goto error;
 	}
 	dvb_class->dev_uevent = dvb_uevent;
-	dvb_class->nodename = dvb_nodename;
+	dvb_class->devnode = dvb_devnode;
 	return 0;
 
 error:
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3f5d28851aa2..d3ee1994b02f 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1370,7 +1370,7 @@ static const struct file_operations tun_fops = {
 static struct miscdevice tun_miscdev = {
 	.minor = TUN_MINOR,
 	.name = "tun",
-	.devnode = "net/tun",
+	.nodename = "net/tun",
 	.fops = &tun_fops,
 };
 
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 26c09f0257db..9bc112ee7803 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1057,14 +1057,14 @@ static const struct file_operations usblp_fops = {
 	.release =	usblp_release,
 };
 
-static char *usblp_nodename(struct device *dev)
+static char *usblp_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
 
 static struct usb_class_driver usblp_class = {
 	.name =		"lp%d",
-	.nodename =	usblp_nodename,
+	.devnode =	usblp_devnode,
 	.fops =		&usblp_fops,
 	.minor_base =	USBLP_MINOR_BASE,
 };
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 5cef88929b3e..222ee07ea680 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -67,14 +67,14 @@ static struct usb_class {
 	struct class *class;
 } *usb_class;
 
-static char *usb_nodename(struct device *dev)
+static char *usb_devnode(struct device *dev, mode_t *mode)
 {
 	struct usb_class_driver *drv;
 
 	drv = dev_get_drvdata(dev);
-	if (!drv || !drv->nodename)
+	if (!drv || !drv->devnode)
 		return NULL;
-	return drv->nodename(dev);
+	return drv->devnode(dev, mode);
 }
 
 static int init_usb_class(void)
@@ -100,7 +100,7 @@ static int init_usb_class(void)
 		kfree(usb_class);
 		usb_class = NULL;
 	}
-	usb_class->class->nodename = usb_nodename;
+	usb_class->class->devnode = usb_devnode;
 
 exit:
 	return result;
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index a26f73880c32..43ee943d757a 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -311,7 +311,7 @@ static struct dev_pm_ops usb_device_pm_ops = {
 #endif	/* CONFIG_PM */
 
 
-static char *usb_nodename(struct device *dev)
+static char *usb_devnode(struct device *dev, mode_t *mode)
 {
 	struct usb_device *usb_dev;
 
@@ -324,7 +324,7 @@ struct device_type usb_device_type = {
 	.name =		"usb_device",
 	.release =	usb_release_dev,
 	.uevent =	usb_dev_uevent,
-	.nodename = 	usb_nodename,
+	.devnode = 	usb_devnode,
 	.pm =		&usb_device_pm_ops,
 };
 
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 90e1a8dedfa9..e75bb87ee92b 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -727,7 +727,7 @@ static const struct file_operations iowarrior_fops = {
 	.poll = iowarrior_poll,
 };
 
-static char *iowarrior_nodename(struct device *dev)
+static char *iowarrior_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
@@ -738,7 +738,7 @@ static char *iowarrior_nodename(struct device *dev)
  */
 static struct usb_class_driver iowarrior_class = {
 	.name = "iowarrior%d",
-	.nodename = iowarrior_nodename,
+	.devnode = iowarrior_devnode,
 	.fops = &iowarrior_fops,
 	.minor_base = IOWARRIOR_MINOR_BASE,
 };
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index c1e2433f640d..97efeaec4d52 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -266,7 +266,7 @@ static const struct file_operations tower_fops = {
 	.llseek =	tower_llseek,
 };
 
-static char *legousbtower_nodename(struct device *dev)
+static char *legousbtower_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
@@ -277,7 +277,7 @@ static char *legousbtower_nodename(struct device *dev)
  */
 static struct usb_class_driver tower_class = {
 	.name =		"legousbtower%d",
-	.nodename = 	legousbtower_nodename,
+	.devnode = 	legousbtower_devnode,
 	.fops =		&tower_fops,
 	.minor_base =	LEGO_USB_TOWER_MINOR_BASE,
 };
diff --git a/include/linux/device.h b/include/linux/device.h
index 847b763e40e9..aca31bf7d8ed 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -193,7 +193,7 @@ struct class {
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 
 	void (*class_release)(struct class *class);
 	void (*dev_release)(struct device *dev);
@@ -298,7 +298,7 @@ struct device_type {
 	const char *name;
 	const struct attribute_group **groups;
 	int (*uevent)(struct device *dev, struct kobj_uevent_env *env);
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 	void (*release)(struct device *dev);
 
 	const struct dev_pm_ops *pm;
@@ -487,7 +487,8 @@ extern struct device *device_find_child(struct device *dev, void *data,
 extern int device_rename(struct device *dev, char *new_name);
 extern int device_move(struct device *dev, struct device *new_parent,
 		       enum dpm_order dpm_order);
-extern const char *device_get_nodename(struct device *dev, const char **tmp);
+extern const char *device_get_devnode(struct device *dev,
+				      mode_t *mode, const char **tmp);
 extern void *dev_get_drvdata(const struct device *dev);
 extern void dev_set_drvdata(struct device *dev, void *data);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 44263cb27121..109d179adb93 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -142,7 +142,7 @@ struct gendisk {
                                          * disks that can't be partitioned. */
 
 	char disk_name[DISK_NAME_LEN];	/* name of major driver */
-	char *(*nodename)(struct gendisk *gd);
+	char *(*devnode)(struct gendisk *gd, mode_t *mode);
 	/* Array of pointers to partitions indexed by partno.
 	 * Protected with matching bdev lock but stat and other
 	 * non-critical accesses use RCU.  Always access through
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index 052117744629..adaf3c15e449 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -41,7 +41,8 @@ struct miscdevice  {
 	struct list_head list;
 	struct device *parent;
 	struct device *this_device;
-	const char *devnode;
+	const char *nodename;
+	mode_t mode;
 };
 
 extern int misc_register(struct miscdevice * misc);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index b1e3c2fbfe11..a8fe05f224e5 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -922,7 +922,7 @@ extern struct bus_type usb_bus_type;
 /**
  * struct usb_class_driver - identifies a USB driver that wants to use the USB major number
  * @name: the usb class device name for this driver.  Will show up in sysfs.
- * @nodename: Callback to provide a naming hint for a possible
+ * @devnode: Callback to provide a naming hint for a possible
  *	device node to create.
  * @fops: pointer to the struct file_operations of this driver.
  * @minor_base: the start of the minor range for this driver.
@@ -933,7 +933,7 @@ extern struct bus_type usb_bus_type;
  */
 struct usb_class_driver {
 	char *name;
-	char *(*nodename)(struct device *dev);
+	char *(*devnode)(struct device *dev, mode_t *mode);
 	const struct file_operations *fops;
 	int minor_base;
 };
diff --git a/sound/sound_core.c b/sound/sound_core.c
index bb4b88e606bb..49c998186592 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -29,7 +29,7 @@ MODULE_DESCRIPTION("Core sound module");
 MODULE_AUTHOR("Alan Cox");
 MODULE_LICENSE("GPL");
 
-static char *sound_nodename(struct device *dev)
+static char *sound_devnode(struct device *dev, mode_t *mode)
 {
 	if (MAJOR(dev->devt) == SOUND_MAJOR)
 		return NULL;
@@ -50,7 +50,7 @@ static int __init init_soundcore(void)
 		return PTR_ERR(sound_class);
 	}
 
-	sound_class->nodename = sound_nodename;
+	sound_class->devnode = sound_devnode;
 
 	return 0;
 }
-- 
cgit v1.2.3


From f0eefdc30e55e761facf645bd1be1339b21c30e6 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 19 Sep 2009 13:13:13 -0700
Subject: cyclades: avoid addresses recomputation

Don't fetch firmware address and recompute channel control on each
port access. Precompute the values on init and use them later all
the time.

The same for board control.

This simplify code and improves readability.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/cyclades.c  | 241 ++++++++++++++---------------------------------
 include/linux/cyclades.h |  10 ++
 2 files changed, 81 insertions(+), 170 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 9e0cd7020aef..7a7092ae5d39 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -814,7 +814,7 @@ static char rflow_thr[] = {	/* rflow threshold */
 /*  The Cyclom-Ye has placed the sequential chips in non-sequential
  *  address order.  This look-up table overcomes that problem.
  */
-static int cy_chip_offset[] = { 0x0000,
+static const unsigned int cy_chip_offset[] = { 0x0000,
 	0x0400,
 	0x0800,
 	0x0C00,
@@ -1406,15 +1406,9 @@ static int
 cyz_fetch_msg(struct cyclades_card *cinfo,
 		__u32 *channel, __u8 *cmd, __u32 *param)
 {
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	unsigned long loc_doorbell;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
-
 	loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
 	if (loc_doorbell) {
 		*cmd = (char)(0xff & loc_doorbell);
@@ -1430,19 +1424,13 @@ static int
 cyz_issue_cmd(struct cyclades_card *cinfo,
 		__u32 channel, __u8 cmd, __u32 param)
 {
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	__u32 __iomem *pci_doorbell;
 	unsigned int index;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
 	if (!cyz_is_loaded(cinfo))
 		return -1;
 
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
-
 	index = 0;
 	pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
 	while ((readl(pci_doorbell) & 0xff) != 0) {
@@ -1457,9 +1445,9 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
 	return 0;
 }				/* cyz_issue_cmd */
 
-static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty,
-		struct BUF_CTRL __iomem *buf_ctrl)
+static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty)
 {
+	struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 	struct cyclades_card *cinfo = info->card;
 	unsigned int char_count;
 	int len;
@@ -1549,9 +1537,9 @@ static void cyz_handle_rx(struct cyclades_port *info, struct tty_struct *tty,
 	}
 }
 
-static void cyz_handle_tx(struct cyclades_port *info, struct tty_struct *tty,
-		struct BUF_CTRL __iomem *buf_ctrl)
+static void cyz_handle_tx(struct cyclades_port *info, struct tty_struct *tty)
 {
+	struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 	struct cyclades_card *cinfo = info->card;
 	u8 data;
 	unsigned int char_count;
@@ -1627,21 +1615,14 @@ ztxdone:
 
 static void cyz_handle_cmd(struct cyclades_card *cinfo)
 {
+	struct BOARD_CTRL __iomem *board_ctrl = cinfo->board_ctrl;
 	struct tty_struct *tty;
 	struct cyclades_port *info;
-	static struct FIRM_ID __iomem *firm_id;
-	static struct ZFW_CTRL __iomem *zfw_ctrl;
-	static struct BOARD_CTRL __iomem *board_ctrl;
-	static struct CH_CTRL __iomem *ch_ctrl;
-	static struct BUF_CTRL __iomem *buf_ctrl;
 	__u32 channel, param, fw_ver;
 	__u8 cmd;
 	int special_count;
 	int delta_count;
 
-	firm_id = cinfo->base_addr + ID_ADDRESS;
-	zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	board_ctrl = &zfw_ctrl->board_ctrl;
 	fw_ver = readl(&board_ctrl->fw_version);
 
 	while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
@@ -1652,9 +1633,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 		if (tty == NULL)
 			continue;
 
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &(zfw_ctrl->buf_ctrl[channel]);
-
 		switch (cmd) {
 		case C_CM_PR_ERROR:
 			tty_insert_flip_char(tty, 0, TTY_PARITY);
@@ -1675,9 +1653,9 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			info->icount.dcd++;
 			delta_count++;
 			if (info->port.flags & ASYNC_CHECK_CD) {
-				if ((fw_ver > 241 ? ((u_long) param) :
-						readl(&ch_ctrl->rs_status)) &
-						C_RS_DCD) {
+				u32 dcd = fw_ver > 241 ? param :
+					readl(&info->u.cyz.ch_ctrl->rs_status);
+				if (dcd & C_RS_DCD) {
 					wake_up_interruptible(&info->port.open_wait);
 				} else {
 					tty_hangup(tty);
@@ -1712,7 +1690,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			printk(KERN_DEBUG "cyz_interrupt: rcvd intr, card %d, "
 					"port %ld\n", info->card, channel);
 #endif
-			cyz_handle_rx(info, tty, buf_ctrl);
+			cyz_handle_rx(info, tty);
 			break;
 		case C_CM_TXBEMPTY:
 		case C_CM_TXLOWWM:
@@ -1722,7 +1700,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			printk(KERN_DEBUG "cyz_interrupt: xmit intr, card %d, "
 					"port %ld\n", info->card, channel);
 #endif
-			cyz_handle_tx(info, tty, buf_ctrl);
+			cyz_handle_tx(info, tty);
 			break;
 #endif				/* CONFIG_CYZ_INTR */
 		case C_CM_FATAL:
@@ -1781,9 +1759,6 @@ static void cyz_poll(unsigned long arg)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BUF_CTRL __iomem *buf_ctrl;
 	unsigned long expires = jiffies + HZ;
 	unsigned int port, card;
 
@@ -1795,10 +1770,6 @@ static void cyz_poll(unsigned long arg)
 		if (!cyz_is_loaded(cinfo))
 			continue;
 
-		firm_id = cinfo->base_addr + ID_ADDRESS;
-		zfw_ctrl = cinfo->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-
 	/* Skip first polling cycle to avoid racing conditions with the FW */
 		if (!cinfo->intr_enabled) {
 			cinfo->intr_enabled = 1;
@@ -1811,15 +1782,13 @@ static void cyz_poll(unsigned long arg)
 			struct tty_struct *tty;
 
 			info = &cinfo->ports[port];
-			buf_ctrl = &(zfw_ctrl->buf_ctrl[port]);
-
 			tty = tty_port_tty_get(&info->port);
 			/* OK to pass NULL to the handle functions below.
 			   They need to drop the data in that case. */
 
 			if (!info->throttle)
-				cyz_handle_rx(info, tty, buf_ctrl);
-			cyz_handle_tx(info, tty, buf_ctrl);
+				cyz_handle_rx(info, tty);
+			cyz_handle_tx(info, tty);
 			tty_kref_put(tty);
 		}
 		/* poll every 'cyz_polling_cycle' period */
@@ -1922,45 +1891,34 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
-
-		base_addr = card->base_addr;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
-		firm_id = base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return -ENODEV;
 
-		zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
-		ch_ctrl = zfw_ctrl->ch_ctrl;
-
 #ifdef CY_DEBUG_OPEN
 		printk(KERN_DEBUG "cyc startup Z card %d, channel %d, "
-			"base_addr %p\n", card, channel, base_addr);
+			"base_addr %p\n", card, channel, card->base_addr);
 #endif
 		spin_lock_irqsave(&card->card_lock, flags);
 
-		cy_writel(&ch_ctrl[channel].op_mode, C_CH_ENABLE);
+		cy_writel(&ch_ctrl->op_mode, C_CH_ENABLE);
 #ifdef Z_WAKE
 #ifdef CONFIG_CYZ_INTR
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_TXBEMPTY | C_IN_TXLOWWM | C_IN_RXHIWM |
 			  C_IN_RXNNDT | C_IN_IOCTLW | C_IN_MDCD);
 #else
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_IOCTLW | C_IN_MDCD);
 #endif				/* CONFIG_CYZ_INTR */
 #else
 #ifdef CONFIG_CYZ_INTR
-		cy_writel(&ch_ctrl[channel].intr_enable,
+		cy_writel(&ch_ctrl->intr_enable,
 			  C_IN_TXBEMPTY | C_IN_TXLOWWM | C_IN_RXHIWM |
 			  C_IN_RXNNDT | C_IN_MDCD);
 #else
-		cy_writel(&ch_ctrl[channel].intr_enable, C_IN_MDCD);
+		cy_writel(&ch_ctrl->intr_enable, C_IN_MDCD);
 #endif				/* CONFIG_CYZ_INTR */
 #endif				/* Z_WAKE */
 
@@ -1979,9 +1937,8 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 
 		/* set timeout !!! */
 		/* set RTS and DTR !!! */
-		cy_writel(&ch_ctrl[channel].rs_control,
-			readl(&ch_ctrl[channel].rs_control) | C_RS_RTS |
-			C_RS_DTR);
+		cy_writel(&ch_ctrl->rs_control, readl(&ch_ctrl->rs_control) |
+				C_RS_RTS | C_RS_DTR);
 		retval = cyz_issue_cmd(card, channel, C_CM_IOCTLM, 0L);
 		if (retval != 0) {
 			printk(KERN_ERR "cyc:startup(3) retval on ttyC%d was "
@@ -2110,27 +2067,17 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		info->port.flags &= ~ASYNC_INITIALIZED;
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct BOARD_CTRL __iomem *board_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		int retval;
 
-		base_addr = card->base_addr;
 #ifdef CY_DEBUG_OPEN
 		printk(KERN_DEBUG "cyc shutdown Z card %d, channel %d, "
-			"base_addr %p\n", card, channel, base_addr);
+			"base_addr %p\n", card, channel, card->base_addr);
 #endif
 
-		firm_id = base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return;
 
-		zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		board_ctrl = &zfw_ctrl->board_ctrl;
-		ch_ctrl = zfw_ctrl->ch_ctrl;
-
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		if (info->port.xmit_buf) {
@@ -2141,9 +2088,9 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		}
 
 		if (tty->termios->c_cflag & HUPCL) {
-			cy_writel(&ch_ctrl[channel].rs_control,
-				(__u32)(readl(&ch_ctrl[channel].rs_control) &
-					~(C_RS_RTS | C_RS_DTR)));
+			cy_writel(&ch_ctrl->rs_control,
+				readl(&ch_ctrl->rs_control) &
+					~(C_RS_RTS | C_RS_DTR));
 			retval = cyz_issue_cmd(info->card, channel,
 					C_CM_IOCTLM, 0L);
 			if (retval != 0) {
@@ -2497,15 +2444,11 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 #ifdef Z_WAKE
 		/* Waiting for on-board buffers to be empty before closing
 		   the port */
-		void __iomem *base_addr = card->base_addr;
-		struct FIRM_ID __iomem *firm_id = base_addr + ID_ADDRESS;
-		struct ZFW_CTRL __iomem *zfw_ctrl =
-		    base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		struct CH_CTRL __iomem *ch_ctrl = zfw_ctrl->ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		int channel = info->line - card->first_line;
 		int retval;
 
-		if (readl(&ch_ctrl[channel].flow_status) != C_FS_TXIDLE) {
+		if (readl(&ch_ctrl->flow_status) != C_FS_TXIDLE) {
 			retval = cyz_issue_cmd(card, channel, C_CM_IOCTLW, 0L);
 			if (retval != 0) {
 				printk(KERN_DEBUG "cyc:cy_close retval on "
@@ -2685,18 +2628,13 @@ static int cy_write_room(struct tty_struct *tty)
 
 static int cy_chars_in_buffer(struct tty_struct *tty)
 {
-	struct cyclades_card *card;
 	struct cyclades_port *info = tty->driver_data;
-	int channel;
 
 	if (serial_paranoia_check(info, tty->name, "cy_chars_in_buffer"))
 		return 0;
 
-	card = info->card;
-	channel = (info->line) - (card->first_line);
-
 #ifdef Z_EXT_CHARS_IN_BUFFER
-	if (!cy_is_Z(card)) {
+	if (!cy_is_Z(info->card)) {
 #endif				/* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
 		printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2705,20 +2643,11 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
 		return info->xmit_cnt;
 #ifdef Z_EXT_CHARS_IN_BUFFER
 	} else {
-		static struct FIRM_ID *firm_id;
-		static struct ZFW_CTRL *zfw_ctrl;
-		static struct CH_CTRL *ch_ctrl;
-		static struct BUF_CTRL *buf_ctrl;
+		struct BUF_CTRL __iomem *buf_ctrl = info->u.cyz.buf_ctrl;
 		int char_count;
 		__u32 tx_put, tx_get, tx_bufsize;
 
 		lock_kernel();
-		firm_id = card->base_addr + ID_ADDRESS;
-		zfw_ctrl = card->base_addr +
-			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-		buf_ctrl = &(zfw_ctrl->buf_ctrl[channel]);
-
 		tx_get = readl(&buf_ctrl->tx_get);
 		tx_put = readl(&buf_ctrl->tx_put);
 		tx_bufsize = readl(&buf_ctrl->tx_bufsize);
@@ -3019,20 +2948,13 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 	} else {
-		struct FIRM_ID __iomem *firm_id;
-		struct ZFW_CTRL __iomem *zfw_ctrl;
-		struct CH_CTRL __iomem *ch_ctrl;
+		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 		__u32 sw_flow;
 		int retval;
 
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (!cyz_is_loaded(card))
 			return;
 
-		zfw_ctrl = card->base_addr +
-			(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-		ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-
 		/* baud rate */
 		baud = tty_get_baud_rate(tty);
 		if (baud == 38400 && (info->port.flags & ASYNC_SPD_MASK) ==
@@ -3268,10 +3190,6 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 	unsigned char status;
 	unsigned long lstatus;
 	unsigned int result;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
 		return -ENODEV;
@@ -3304,14 +3222,8 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 			((status & CyDSR) ? TIOCM_DSR : 0) |
 			((status & CyCTS) ? TIOCM_CTS : 0);
 	} else {
-		base_addr = card->base_addr;
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (cyz_is_loaded(card)) {
-			zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-			board_ctrl = &zfw_ctrl->board_ctrl;
-			ch_ctrl = zfw_ctrl->ch_ctrl;
-			lstatus = readl(&ch_ctrl[channel].rs_status);
+			lstatus = readl(&info->u.cyz.ch_ctrl->rs_status);
 			result = ((lstatus & C_RS_RTS) ? TIOCM_RTS : 0) |
 				((lstatus & C_RS_DTR) ? TIOCM_DTR : 0) |
 				((lstatus & C_RS_DCD) ? TIOCM_CAR : 0) |
@@ -3336,12 +3248,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
 	int chip, channel, index;
-	void __iomem *base_addr;
 	unsigned long flags;
-	struct FIRM_ID __iomem *firm_id;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct BOARD_CTRL __iomem *board_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
 	int retval;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
@@ -3350,6 +3257,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 	card = info->card;
 	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
+		void __iomem *base_addr;
 		chip = channel >> 2;
 		channel &= 0x03;
 		index = card->bus_index;
@@ -3421,34 +3329,26 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 			spin_unlock_irqrestore(&card->card_lock, flags);
 		}
 	} else {
-		base_addr = card->base_addr;
-
-		firm_id = card->base_addr + ID_ADDRESS;
 		if (cyz_is_loaded(card)) {
-			zfw_ctrl = card->base_addr +
-				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
-			board_ctrl = &zfw_ctrl->board_ctrl;
-			ch_ctrl = zfw_ctrl->ch_ctrl;
+			struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
 			if (set & TIOCM_RTS) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) |
-					C_RS_RTS);
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) | C_RS_RTS);
 				spin_unlock_irqrestore(&card->card_lock, flags);
 			}
 			if (clear & TIOCM_RTS) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) &
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) &
 					~C_RS_RTS);
 				spin_unlock_irqrestore(&card->card_lock, flags);
 			}
 			if (set & TIOCM_DTR) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) |
-					C_RS_DTR);
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) | C_RS_DTR);
 #ifdef CY_DEBUG_DTR
 				printk(KERN_DEBUG "cyc:set_modem_info raising "
 					"Z DTR\n");
@@ -3457,8 +3357,8 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 			}
 			if (clear & TIOCM_DTR) {
 				spin_lock_irqsave(&card->card_lock, flags);
-				cy_writel(&ch_ctrl[channel].rs_control,
-					readl(&ch_ctrl[channel].rs_control) &
+				cy_writel(&ch_ctrl->rs_control,
+					readl(&ch_ctrl->rs_control) &
 					~C_RS_DTR);
 #ifdef CY_DEBUG_DTR
 				printk(KERN_DEBUG "cyc:set_modem_info clearing "
@@ -4171,17 +4071,8 @@ static int cyz_carrier_raised(struct tty_port *port)
 {
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
-	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
-	struct FIRM_ID __iomem *firm_id = base + ID_ADDRESS;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
-	int channel = info->line - cinfo->first_line;
 
-	zfw_ctrl = base + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	ch_ctrl = zfw_ctrl->ch_ctrl;
-
-	return readl(&ch_ctrl[channel].rs_status) & C_RS_DCD;
+	return readl(&info->u.cyz.ch_ctrl->rs_status) & C_RS_DCD;
 }
 
 static void cyz_dtr_rts(struct tty_port *port, int raise)
@@ -4189,22 +4080,16 @@ static void cyz_dtr_rts(struct tty_port *port, int raise)
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
 	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
-	struct FIRM_ID __iomem *firm_id = base + ID_ADDRESS;
-	struct ZFW_CTRL __iomem *zfw_ctrl;
-	struct CH_CTRL __iomem *ch_ctrl;
+	struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 	int ret, channel = info->line - cinfo->first_line;
 	u32 rs;
 
-	zfw_ctrl = base + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-	ch_ctrl = zfw_ctrl->ch_ctrl;
-
-	rs = readl(&ch_ctrl[channel].rs_control);
+	rs = readl(&ch_ctrl->rs_control);
 	if (raise)
 		rs |= C_RS_RTS | C_RS_DTR;
 	else
 		rs &= ~(C_RS_RTS | C_RS_DTR);
-	cy_writel(&ch_ctrl[channel].rs_control, rs);
+	cy_writel(&ch_ctrl->rs_control, rs);
 	ret = cyz_issue_cmd(cinfo, channel, C_CM_IOCTLM, 0L);
 	if (ret != 0)
 		printk(KERN_ERR "%s: retval on ttyC%d was %x\n",
@@ -4235,8 +4120,7 @@ static const struct tty_port_operations cyz_port_ops = {
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
 	struct cyclades_port *info;
-	unsigned int port;
-	unsigned short chip_number;
+	unsigned int channel, port;
 
 	spin_lock_init(&cinfo->card_lock);
 	cinfo->intr_enabled = 0;
@@ -4248,9 +4132,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		return -ENOMEM;
 	}
 
-	for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
-			port++) {
-		info = &cinfo->ports[port - cinfo->first_line];
+	for (channel = 0, port = cinfo->first_line; channel < cinfo->nports;
+			channel++, port++) {
+		info = &cinfo->ports[channel];
 		tty_port_init(&info->port);
 		info->magic = CYCLADES_MAGIC;
 		info->card = cinfo;
@@ -4263,8 +4147,17 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		init_waitqueue_head(&info->delta_msr_wait);
 
 		if (cy_is_Z(cinfo)) {
+			struct FIRM_ID *firm_id = cinfo->base_addr + ID_ADDRESS;
+			struct ZFW_CTRL *zfw_ctrl;
+
 			info->port.ops = &cyz_port_ops;
 			info->type = PORT_STARTECH;
+
+			zfw_ctrl = cinfo->base_addr +
+				(readl(&firm_id->zfwctrl_addr) & 0xfffff);
+			info->u.cyz.ch_ctrl = &zfw_ctrl->ch_ctrl[channel];
+			info->u.cyz.buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
+
 			if (cinfo->hw_ver == ZO_V1)
 				info->xmit_fifo_size = CYZ_FIFO_SIZE;
 			else
@@ -4274,7 +4167,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 				cyz_rx_restart, (unsigned long)info);
 #endif
 		} else {
+			unsigned short chip_number;
 			int index = cinfo->bus_index;
+
 			info->port.ops = &cyy_port_ops;
 			info->type = PORT_CIRRUS;
 			info->xmit_fifo_size = CyMAX_CHAR_FIFO;
@@ -4282,7 +4177,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 			info->cor2 = CyETC;
 			info->cor3 = 0x08;	/* _very_ small rcv threshold */
 
-			chip_number = (port - cinfo->first_line) / 4;
+			chip_number = channel / CyPORTS_PER_CHIP;
 			info->chip_rev = readb(cinfo->base_addr +
 				      (cy_chip_offset[chip_number] << index) +
 				      (CyGFRCR << index));
@@ -4976,8 +4871,14 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 		}
 		cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
 	} else {
+		struct FIRM_ID __iomem *firm_id = addr2 + ID_ADDRESS;
+		struct ZFW_CTRL __iomem *zfw_ctrl;
+
+		zfw_ctrl = addr2 + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
+
 		cy_card[card_no].hw_ver = mailbox;
 		cy_card[card_no].num_chips = (unsigned int)-1;
+		cy_card[card_no].board_ctrl = &zfw_ctrl->board_ctrl;
 #ifdef CONFIG_CYZ_INTR
 		/* allocate IRQ only if board has an IRQ */
 		if (irq != 0 && irq != 255) {
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 1fbdea4f08eb..1eb87a6a2f6b 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -499,6 +499,7 @@ struct cyclades_card {
 		void __iomem *p9050;
 		struct RUNTIME_9060 __iomem *p9060;
 	} ctl_addr;
+	struct BOARD_CTRL __iomem *board_ctrl;	/* cyz specific */
 	int irq;
 	unsigned int num_chips;	/* 0 if card absent, -1 if Z/PCI, else Y */
 	unsigned int first_line;	/* minor number of first channel on card */
@@ -541,6 +542,15 @@ struct cyclades_port {
 	int                     magic;
 	struct tty_port		port;
 	struct cyclades_card	*card;
+	union {
+		struct {
+			int filler;
+		} cyy;
+		struct {
+			struct CH_CTRL __iomem	*ch_ctrl;
+			struct BUF_CTRL __iomem	*buf_ctrl;
+		} cyz;
+	} u;
 	int			line;
 	int			flags; 		/* defined in tty.h */
 	int                     type;		/* UART type */
-- 
cgit v1.2.3


From 3aeea5b92210083c7cffd4f08a0bb141d3f2d574 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Sat, 19 Sep 2009 13:13:16 -0700
Subject: cyclades: introduce cyy_readb/writeb

Add helpers for io operations, so that we can eliminate huge
amount of supporting code. It is now centralized in those
helpers and used values are precomputed in the init phase.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/cyclades.c  | 448 +++++++++++++++++------------------------------
 include/linux/cyclades.h |   2 +-
 2 files changed, 165 insertions(+), 285 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index ed66c3ab230d..cf2874a89c8c 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -90,7 +90,6 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
-static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
 #ifndef SERIAL_XMIT_SIZE
@@ -298,6 +297,20 @@ static void cyz_rx_restart(unsigned long);
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif				/* CONFIG_CYZ_INTR */
 
+static inline void cyy_writeb(struct cyclades_port *port, u32 reg, u8 val)
+{
+	struct cyclades_card *card = port->card;
+
+	cy_writeb(port->u.cyy.base_addr + (reg << card->bus_index), val);
+}
+
+static inline u8 cyy_readb(struct cyclades_port *port, u32 reg)
+{
+	struct cyclades_card *card = port->card;
+
+	return readb(port->u.cyy.base_addr + (reg << card->bus_index));
+}
+
 static inline bool cy_is_Z(struct cyclades_card *card)
 {
 	return card->num_chips == (unsigned int)-1;
@@ -350,13 +363,14 @@ static inline int serial_paranoia_check(struct cyclades_port *info,
 
    This function is only called from inside spinlock-protected code.
  */
-static int cyy_issue_cmd(void __iomem *base_addr, u_char cmd, int index)
+static int __cyy_issue_cmd(void __iomem *base_addr, u8 cmd, int index)
 {
+	void __iomem *ccr = base_addr + (CyCCR << index);
 	unsigned int i;
 
 	/* Check to see that the previous command has completed */
 	for (i = 0; i < 100; i++) {
-		if (readb(base_addr + (CyCCR << index)) == 0)
+		if (readb(ccr) == 0)
 			break;
 		udelay(10L);
 	}
@@ -366,10 +380,16 @@ static int cyy_issue_cmd(void __iomem *base_addr, u_char cmd, int index)
 		return -1;
 
 	/* Issue the new command */
-	cy_writeb(base_addr + (CyCCR << index), cmd);
+	cy_writeb(ccr, cmd);
 
 	return 0;
-}				/* cyy_issue_cmd */
+}
+
+static inline int cyy_issue_cmd(struct cyclades_port *port, u8 cmd)
+{
+	return __cyy_issue_cmd(port->u.cyy.base_addr, cmd,
+			port->card->bus_index);
+}
 
 #ifdef CONFIG_ISA
 /* ISA interrupt detection code */
@@ -394,7 +414,7 @@ static unsigned detect_isa_irq(void __iomem *address)
 	/* Enable the Tx interrupts on the CD1400 */
 	local_irq_save(flags);
 	cy_writeb(address + (CyCAR << index), 0);
-	cyy_issue_cmd(address, CyCHAN_CTL | CyENB_XMTR, index);
+	__cyy_issue_cmd(address, CyCHAN_CTL | CyENB_XMTR, index);
 
 	cy_writeb(address + (CyCAR << index), 0);
 	cy_writeb(address + (CySRER << index),
@@ -428,7 +448,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	struct cyclades_port *info;
 	struct tty_struct *tty;
 	int len, index = cinfo->bus_index;
-	u8 save_xir, channel, save_car, data, char_count;
+	u8 ivr, save_xir, channel, save_car, data, char_count;
 
 #ifdef CY_DEBUG_INTERRUPTS
 	printk(KERN_DEBUG "cyy_interrupt: rcvd intr, chip %d\n", chip);
@@ -437,26 +457,25 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	save_xir = readb(base_addr + (CyRIR << index));
 	channel = save_xir & CyIRChannel;
 	info = &cinfo->ports[channel + chip * 4];
-	save_car = readb(base_addr + (CyCAR << index));
-	cy_writeb(base_addr + (CyCAR << index), save_xir);
+	save_car = cyy_readb(info, CyCAR);
+	cyy_writeb(info, CyCAR, save_xir);
+	ivr = cyy_readb(info, CyRIVR) & CyIVRMask;
 
 	tty = tty_port_tty_get(&info->port);
 	/* if there is nowhere to put the data, discard it */
 	if (tty == NULL) {
-		if ((readb(base_addr + (CyRIVR << index)) & CyIVRMask) ==
-				CyIVRRxEx) {	/* exception */
-			data = readb(base_addr + (CyRDSR << index));
+		if (ivr == CyIVRRxEx) {	/* exception */
+			data = cyy_readb(info, CyRDSR);
 		} else {	/* normal character reception */
-			char_count = readb(base_addr + (CyRDCR << index));
+			char_count = cyy_readb(info, CyRDCR);
 			while (char_count--)
-				data = readb(base_addr + (CyRDSR << index));
+				data = cyy_readb(info, CyRDSR);
 		}
 		goto end;
 	}
 	/* there is an open port for this data */
-	if ((readb(base_addr + (CyRIVR << index)) & CyIVRMask) ==
-			CyIVRRxEx) {	/* exception */
-		data = readb(base_addr + (CyRDSR << index));
+	if (ivr == CyIVRRxEx) {	/* exception */
+		data = cyy_readb(info, CyRDSR);
 
 		/* For statistics only */
 		if (data & CyBREAK)
@@ -477,22 +496,22 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 			if (data & info->read_status_mask) {
 				if (data & CyBREAK) {
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_BREAK);
+						cyy_readb(info, CyRDSR),
+						TTY_BREAK);
 					info->icount.rx++;
 					if (info->port.flags & ASYNC_SAK)
 						do_SAK(tty);
 				} else if (data & CyFRAME) {
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_FRAME);
+						cyy_readb(info, CyRDSR),
+						TTY_FRAME);
 					info->icount.rx++;
 					info->idle_stats.frame_errs++;
 				} else if (data & CyPARITY) {
 					/* Pieces of seven... */
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_PARITY);
+						cyy_readb(info, CyRDSR),
+						TTY_PARITY);
 					info->icount.rx++;
 					info->idle_stats.parity_errs++;
 				} else if (data & CyOVERRUN) {
@@ -504,8 +523,8 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 					   the next incoming character.
 					 */
 					tty_insert_flip_char(tty,
-						readb(base_addr + (CyRDSR <<
-							index)), TTY_FRAME);
+						cyy_readb(info, CyRDSR),
+						TTY_FRAME);
 					info->icount.rx++;
 					info->idle_stats.overruns++;
 				/* These two conditions may imply */
@@ -529,7 +548,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 		}
 	} else {	/* normal character reception */
 		/* load # chars available from the chip */
-		char_count = readb(base_addr + (CyRDCR << index));
+		char_count = cyy_readb(info, CyRDCR);
 
 #ifdef CY_ENABLE_MONITORING
 		++info->mon.int_count;
@@ -540,7 +559,7 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 #endif
 		len = tty_buffer_request_room(tty, char_count);
 		while (len--) {
-			data = readb(base_addr + (CyRDSR << index));
+			data = cyy_readb(info, CyRDSR);
 			tty_insert_flip_char(tty, data, TTY_NORMAL);
 			info->idle_stats.recv_bytes++;
 			info->icount.rx++;
@@ -554,8 +573,8 @@ static void cyy_chip_rx(struct cyclades_card *cinfo, int chip,
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyRIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyRIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
@@ -588,8 +607,7 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 	info = &cinfo->ports[channel + chip * 4];
 	tty = tty_port_tty_get(&info->port);
 	if (tty == NULL) {
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyTxRdy);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyTxRdy);
 		goto end;
 	}
 
@@ -598,7 +616,7 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	if (info->x_char) {	/* send special char */
 		outch = info->x_char;
-		cy_writeb(base_addr + (CyTDR << index), outch);
+		cyy_writeb(info, CyTDR, outch);
 		char_count--;
 		info->icount.tx++;
 		info->x_char = 0;
@@ -606,14 +624,14 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	if (info->breakon || info->breakoff) {
 		if (info->breakon) {
-			cy_writeb(base_addr + (CyTDR << index), 0);
-			cy_writeb(base_addr + (CyTDR << index), 0x81);
+			cyy_writeb(info, CyTDR, 0);
+			cyy_writeb(info, CyTDR, 0x81);
 			info->breakon = 0;
 			char_count -= 2;
 		}
 		if (info->breakoff) {
-			cy_writeb(base_addr + (CyTDR << index), 0);
-			cy_writeb(base_addr + (CyTDR << index), 0x83);
+			cyy_writeb(info, CyTDR, 0);
+			cyy_writeb(info, CyTDR, 0x83);
 			info->breakoff = 0;
 			char_count -= 2;
 		}
@@ -621,27 +639,23 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 
 	while (char_count-- > 0) {
 		if (!info->xmit_cnt) {
-			if (readb(base_addr + (CySRER << index)) & CyTxMpty) {
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) &
-						~CyTxMpty);
+			if (cyy_readb(info, CySRER) & CyTxMpty) {
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) & ~CyTxMpty);
 			} else {
-				cy_writeb(base_addr + (CySRER << index),
-					(readb(base_addr + (CySRER << index)) &
-						~CyTxRdy) | CyTxMpty);
+				cyy_writeb(info, CySRER, CyTxMpty |
+					(cyy_readb(info, CySRER) & ~CyTxRdy));
 			}
 			goto done;
 		}
 		if (info->port.xmit_buf == NULL) {
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) &
-					~CyTxRdy);
+			cyy_writeb(info, CySRER,
+				cyy_readb(info, CySRER) & ~CyTxRdy);
 			goto done;
 		}
 		if (tty->stopped || tty->hw_stopped) {
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) &
-					~CyTxRdy);
+			cyy_writeb(info, CySRER,
+				cyy_readb(info, CySRER) & ~CyTxRdy);
 			goto done;
 		}
 		/* Because the Embedded Transmit Commands have been enabled,
@@ -658,15 +672,15 @@ static void cyy_chip_tx(struct cyclades_card *cinfo, unsigned int chip,
 			info->xmit_cnt--;
 			info->xmit_tail = (info->xmit_tail + 1) &
 					(SERIAL_XMIT_SIZE - 1);
-			cy_writeb(base_addr + (CyTDR << index), outch);
+			cyy_writeb(info, CyTDR, outch);
 			info->icount.tx++;
 		} else {
 			if (char_count > 1) {
 				info->xmit_cnt--;
 				info->xmit_tail = (info->xmit_tail + 1) &
 					(SERIAL_XMIT_SIZE - 1);
-				cy_writeb(base_addr + (CyTDR << index), outch);
-				cy_writeb(base_addr + (CyTDR << index), 0);
+				cyy_writeb(info, CyTDR, outch);
+				cyy_writeb(info, CyTDR, 0);
 				info->icount.tx++;
 				char_count--;
 			}
@@ -678,8 +692,8 @@ done:
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyTIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyTIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
@@ -694,11 +708,11 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 	save_xir = readb(base_addr + (CyMIR << index));
 	channel = save_xir & CyIRChannel;
 	info = &cinfo->ports[channel + chip * 4];
-	save_car = readb(base_addr + (CyCAR << index));
-	cy_writeb(base_addr + (CyCAR << index), save_xir);
+	save_car = cyy_readb(info, CyCAR);
+	cyy_writeb(info, CyCAR, save_xir);
 
-	mdm_change = readb(base_addr + (CyMISR << index));
-	mdm_status = readb(base_addr + (CyMSVR1 << index));
+	mdm_change = cyy_readb(info, CyMISR);
+	mdm_status = cyy_readb(info, CyMSVR1);
 
 	tty = tty_port_tty_get(&info->port);
 	if (!tty)
@@ -730,9 +744,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 				/* cy_start isn't used
 				   because... !!! */
 				tty->hw_stopped = 0;
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) |
-						CyTxRdy);
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyTxRdy);
 				tty_wakeup(tty);
 			}
 		} else {
@@ -740,9 +753,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 				/* cy_stop isn't used
 				   because ... !!! */
 				tty->hw_stopped = 1;
-				cy_writeb(base_addr + (CySRER << index),
-					readb(base_addr + (CySRER << index)) &
-						~CyTxRdy);
+				cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) & ~CyTxRdy);
 			}
 		}
 	}
@@ -753,8 +765,8 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 	tty_kref_put(tty);
 end:
 	/* end of service */
-	cy_writeb(base_addr + (CyMIR << index), save_xir & 0x3f);
-	cy_writeb(base_addr + (CyCAR << index), save_car);
+	cyy_writeb(info, CyMIR, save_xir & 0x3f);
+	cyy_writeb(info, CyCAR, save_car);
 }
 
 /* The real interrupt service routine is called
@@ -829,15 +841,10 @@ static void cyy_change_rts_dtr(struct cyclades_port *info, unsigned int set,
 		unsigned int clear)
 {
 	struct cyclades_card *card = info->card;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel = info->line - card->first_line;
 	u32 rts, dtr, msvrr, msvrd;
 
-	channel = info->line - card->first_line;
-	chip = channel >> 2;
 	channel &= 0x03;
-	index = card->bus_index;
-	base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
 	if (info->rtsdtr_inv) {
 		msvrr = CyMSVR2;
@@ -851,31 +858,31 @@ static void cyy_change_rts_dtr(struct cyclades_port *info, unsigned int set,
 		dtr = CyDTR;
 	}
 	if (set & TIOCM_RTS) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrr << index), rts);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrr, rts);
 	}
 	if (clear & TIOCM_RTS) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrr << index), ~rts);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrr, ~rts);
 	}
 	if (set & TIOCM_DTR) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrd << index), dtr);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrd, dtr);
 #ifdef CY_DEBUG_DTR
 		printk(KERN_DEBUG "cyc:set_modem_info raising DTR\n");
 		printk(KERN_DEBUG "     status: 0x%x, 0x%x\n",
-			readb(base_addr + (CyMSVR1 << index)),
-			readb(base_addr + (CyMSVR2 << index)));
+			cyy_readb(info, CyMSVR1),
+			cyy_readb(info, CyMSVR2));
 #endif
 	}
 	if (clear & TIOCM_DTR) {
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		cy_writeb(base_addr + (msvrd << index), ~dtr);
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, msvrd, ~dtr);
 #ifdef CY_DEBUG_DTR
 		printk(KERN_DEBUG "cyc:set_modem_info dropping DTR\n");
 		printk(KERN_DEBUG "     status: 0x%x, 0x%x\n",
-			readb(base_addr + (CyMSVR1 << index)),
-			readb(base_addr + (CyMSVR2 << index)));
+			cyy_readb(info, CyMSVR1),
+			cyy_readb(info, CyMSVR2));
 #endif
 	}
 }
@@ -1290,7 +1297,6 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 	struct cyclades_card *card;
 	unsigned long flags;
 	int retval = 0;
-	void __iomem *base_addr;
 	int channel;
 	unsigned long page;
 
@@ -1321,31 +1327,21 @@ static int cy_startup(struct cyclades_port *info, struct tty_struct *tty)
 	cy_set_line_char(info, tty);
 
 	if (!cy_is_Z(card)) {
-		int chip = channel >> 2;
-		int index = card->bus_index;
 		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
-#ifdef CY_DEBUG_OPEN
-		printk(KERN_DEBUG "cyc startup card %d, chip %d, channel %d, "
-				"base_addr %p\n",
-				card, chip, channel, base_addr);
-#endif
 		spin_lock_irqsave(&card->card_lock, flags);
 
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
+		cyy_writeb(info, CyCAR, channel);
 
-		cy_writeb(base_addr + (CyRTPR << index),
+		cyy_writeb(info, CyRTPR,
 			(info->default_timeout ? info->default_timeout : 0x02));
 		/* 10ms rx timeout */
 
-		cyy_issue_cmd(base_addr, CyCHAN_CTL | CyENB_RCVR | CyENB_XMTR,
-				index);
+		cyy_issue_cmd(info, CyCHAN_CTL | CyENB_RCVR | CyENB_XMTR);
 
 		cyy_change_rts_dtr(info, TIOCM_RTS | TIOCM_DTR, 0);
 
-		cy_writeb(base_addr + (CySRER << index),
-			readb(base_addr + (CySRER << index)) | CyRxData);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyRxData);
 	} else {
 		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
 
@@ -1423,23 +1419,14 @@ errout:
 
 static void start_xmit(struct cyclades_port *info)
 {
-	struct cyclades_card *card;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel = info->line - card->first_line;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), channel);
-		cy_writeb(base_addr + (CySRER << index),
-			readb(base_addr + (CySRER << index)) | CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyTxRdy);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	} else {
 #ifdef CONFIG_CYZ_INTR
@@ -1466,8 +1453,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 {
 	struct cyclades_card *card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 
 	if (!(info->port.flags & ASYNC_INITIALIZED))
 		return;
@@ -1475,17 +1461,6 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 	card = info->card;
 	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-#ifdef CY_DEBUG_OPEN
-		printk(KERN_DEBUG "cyc shutdown Y card %d, chip %d, "
-				"channel %d, base_addr %p\n",
-				card, chip, channel, base_addr);
-#endif
-
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		/* Clear delta_msr_wait queue to avoid mem leaks. */
@@ -1500,7 +1475,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		if (tty->termios->c_cflag & HUPCL)
 			cyy_change_rts_dtr(info, 0, TIOCM_RTS | TIOCM_DTR);
 
-		cyy_issue_cmd(base_addr, CyCHAN_CTL | CyDIS_RCVR, index);
+		cyy_issue_cmd(info, CyCHAN_CTL | CyDIS_RCVR);
 		/* it may be appropriate to clear _XMIT at
 		   some later date (after testing)!!! */
 
@@ -1677,8 +1652,6 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 {
 	struct cyclades_card *card;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
 	unsigned long orig_jiffies;
 	int char_time;
 
@@ -1722,13 +1695,8 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 		timeout, char_time, jiffies);
 #endif
 	card = info->card;
-	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-		while (readb(base_addr + (CySRER << index)) & CyTxRdy) {
+		while (cyy_readb(info, CySRER) & CyTxRdy) {
 #ifdef CY_DEBUG_WAIT_UNTIL_SENT
 			printk(KERN_DEBUG "Not clean (jiff=%lu)...", jiffies);
 #endif
@@ -1790,6 +1758,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
 	unsigned long flags;
+	int channel;
 
 	if (!info || serial_paranoia_check(info, tty->name, "cy_close"))
 		return;
@@ -1799,18 +1768,13 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 	if (!tty_port_close_start(&info->port, tty, filp))
 		return;
 
+	channel = info->line - card->first_line;
 	spin_lock_irqsave(&card->card_lock, flags);
 
 	if (!cy_is_Z(card)) {
-		int channel = info->line - card->first_line;
-		int index = card->bus_index;
-		void __iomem *base_addr = card->base_addr +
-			(cy_chip_offset[channel >> 2] << index);
 		/* Stop accepting input */
-		channel &= 0x03;
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyRxData);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyRxData);
 		if (info->port.flags & ASYNC_INITIALIZED) {
 			/* Waiting for on-board buffers to be empty before
 			   closing the port */
@@ -1823,7 +1787,6 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 		/* Waiting for on-board buffers to be empty before closing
 		   the port */
 		struct CH_CTRL __iomem *ch_ctrl = info->u.cyz.ch_ctrl;
-		int channel = info->line - card->first_line;
 		int retval;
 
 		if (readl(&ch_ctrl->flow_status) != C_FS_TXIDLE) {
@@ -2064,8 +2027,7 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 {
 	struct cyclades_card *card;
 	unsigned long flags;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned cflag, iflag;
 	int baud, baud_rate = 0;
 	int i;
@@ -2095,9 +2057,6 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 	channel = info->line - card->first_line;
 
 	if (!cy_is_Z(card)) {
-
-		index = card->bus_index;
-
 		/* baud rate */
 		baud = tty_get_baud_rate(tty);
 		if (baud == 38400 && (info->port.flags & ASYNC_SPD_MASK) ==
@@ -2208,70 +2167,67 @@ static void cy_set_line_char(struct cyclades_port *info, struct tty_struct *tty)
 	    cable.  Contact Marcio Saito for details.
 	 ***********************************************/
 
-		chip = channel >> 2;
 		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
+		cyy_writeb(info, CyCAR, channel);
 
 		/* tx and rx baud rate */
 
-		cy_writeb(base_addr + (CyTCOR << index), info->tco);
-		cy_writeb(base_addr + (CyTBPR << index), info->tbpr);
-		cy_writeb(base_addr + (CyRCOR << index), info->rco);
-		cy_writeb(base_addr + (CyRBPR << index), info->rbpr);
+		cyy_writeb(info, CyTCOR, info->tco);
+		cyy_writeb(info, CyTBPR, info->tbpr);
+		cyy_writeb(info, CyRCOR, info->rco);
+		cyy_writeb(info, CyRBPR, info->rbpr);
 
 		/* set line characteristics  according configuration */
 
-		cy_writeb(base_addr + (CySCHR1 << index), START_CHAR(tty));
-		cy_writeb(base_addr + (CySCHR2 << index), STOP_CHAR(tty));
-		cy_writeb(base_addr + (CyCOR1 << index), info->cor1);
-		cy_writeb(base_addr + (CyCOR2 << index), info->cor2);
-		cy_writeb(base_addr + (CyCOR3 << index), info->cor3);
-		cy_writeb(base_addr + (CyCOR4 << index), info->cor4);
-		cy_writeb(base_addr + (CyCOR5 << index), info->cor5);
+		cyy_writeb(info, CySCHR1, START_CHAR(tty));
+		cyy_writeb(info, CySCHR2, STOP_CHAR(tty));
+		cyy_writeb(info, CyCOR1, info->cor1);
+		cyy_writeb(info, CyCOR2, info->cor2);
+		cyy_writeb(info, CyCOR3, info->cor3);
+		cyy_writeb(info, CyCOR4, info->cor4);
+		cyy_writeb(info, CyCOR5, info->cor5);
 
-		cyy_issue_cmd(base_addr, CyCOR_CHANGE | CyCOR1ch | CyCOR2ch |
-				CyCOR3ch, index);
+		cyy_issue_cmd(info, CyCOR_CHANGE | CyCOR1ch | CyCOR2ch |
+				CyCOR3ch);
 
 		/* !!! Is this needed? */
-		cy_writeb(base_addr + (CyCAR << index), (u_char) channel);
-		cy_writeb(base_addr + (CyRTPR << index),
+		cyy_writeb(info, CyCAR, channel);
+		cyy_writeb(info, CyRTPR,
 			(info->default_timeout ? info->default_timeout : 0x02));
 		/* 10ms rx timeout */
 
 		if (C_CLOCAL(tty)) {
 			/* without modem intr */
-			cy_writeb(base_addr + (CySRER << index),
-				readb(base_addr + (CySRER << index)) | CyMdmCh);
+			cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyMdmCh);
 			/* act on 1->0 modem transitions */
 			if ((cflag & CRTSCTS) && info->rflow) {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  (CyCTS | rflow_thr[i]));
 			} else {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  CyCTS);
 			}
 			/* act on 0->1 modem transitions */
-			cy_writeb(base_addr + (CyMCOR2 << index), CyCTS);
+			cyy_writeb(info, CyMCOR2, CyCTS);
 		} else {
 			/* without modem intr */
-			cy_writeb(base_addr + (CySRER << index),
-				  readb(base_addr +
-					   (CySRER << index)) | CyMdmCh);
+			cyy_writeb(info, CySRER,
+					cyy_readb(info, CySRER) | CyMdmCh);
 			/* act on 1->0 modem transitions */
 			if ((cflag & CRTSCTS) && info->rflow) {
-				cy_writeb(base_addr + (CyMCOR1 << index),
+				cyy_writeb(info, CyMCOR1,
 					  (CyDSR | CyCTS | CyRI | CyDCD |
 					   rflow_thr[i]));
 			} else {
-				cy_writeb(base_addr + (CyMCOR1 << index),
-					  CyDSR | CyCTS | CyRI | CyDCD);
+				cyy_writeb(info, CyMCOR1,
+						CyDSR | CyCTS | CyRI | CyDCD);
 			}
 			/* act on 0->1 modem transitions */
-			cy_writeb(base_addr + (CyMCOR2 << index),
-				  CyDSR | CyCTS | CyRI | CyDCD);
+			cyy_writeb(info, CyMCOR2,
+					CyDSR | CyCTS | CyRI | CyDCD);
 		}
 
 		if (i == 0)	/* baud rate is zero, turn off line */
@@ -2482,24 +2438,14 @@ check_and_exit:
  */
 static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value)
 {
-	struct cyclades_card *card;
-	int chip, channel, index;
-	unsigned char status;
+	struct cyclades_card *card = info->card;
 	unsigned int result;
 	unsigned long flags;
-	void __iomem *base_addr;
+	u8 status;
 
-	card = info->card;
-	channel = (info->line) - (card->first_line);
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		status = readb(base_addr + (CySRER << index)) &
-				(CyTxRdy | CyTxMpty);
+		status = cyy_readb(info, CySRER) & (CyTxRdy | CyTxMpty);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 		result = (status ? 0 : TIOCSER_TEMT);
 	} else {
@@ -2513,29 +2459,23 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct cyclades_port *info = tty->driver_data;
 	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int result, channel;
+	int result;
 
 	if (serial_paranoia_check(info, tty->name, __func__))
 		return -ENODEV;
 
 	card = info->card;
-	channel = info->line - card->first_line;
 
 	lock_kernel();
 	if (!cy_is_Z(card)) {
 		unsigned long flags;
-		unsigned char status;
-		int chip = channel >> 2;
-		int index = card->bus_index;
-
-		channel &= 0x03;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
+		int channel = info->line - card->first_line;
+		u8 status;
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index), (u8)channel);
-		status = readb(base_addr + (CyMSVR1 << index));
-		status |= readb(base_addr + (CyMSVR2 << index));
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		status = cyy_readb(info, CyMSVR1);
+		status |= cyy_readb(info, CyMSVR2);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 
 		if (info->rtsdtr_inv) {
@@ -2689,26 +2629,16 @@ static int cy_break(struct tty_struct *tty, int break_state)
 
 static int set_threshold(struct cyclades_port *info, unsigned long value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr =
-		    card->base_addr + (cy_chip_offset[chip] << index);
-
 		info->cor3 &= ~CyREC_FIFO;
 		info->cor3 |= value & CyREC_FIFO;
 
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyCOR3 << index), info->cor3);
-		cyy_issue_cmd(base_addr, CyCOR_CHANGE | CyCOR3ch, index);
+		cyy_writeb(info, CyCOR3, info->cor3);
+		cyy_issue_cmd(info, CyCOR_CHANGE | CyCOR3ch);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	}
 	return 0;
@@ -2717,20 +2647,10 @@ static int set_threshold(struct cyclades_port *info, unsigned long value)
 static int get_threshold(struct cyclades_port *info,
 						unsigned long __user *value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
-	unsigned long tmp;
+	struct cyclades_card *card = info->card;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-		tmp = readb(base_addr + (CyCOR3 << index)) & CyREC_FIFO;
+		u8 tmp = cyy_readb(info, CyCOR3) & CyREC_FIFO;
 		return put_user(tmp, value);
 	}
 	return 0;
@@ -2738,21 +2658,12 @@ static int get_threshold(struct cyclades_port *info,
 
 static int set_timeout(struct cyclades_port *info, unsigned long value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
+	struct cyclades_card *card = info->card;
 	unsigned long flags;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&card->card_lock, flags);
-		cy_writeb(base_addr + (CyRTPR << index), value & 0xff);
+		cyy_writeb(info, CyRTPR, value & 0xff);
 		spin_unlock_irqrestore(&card->card_lock, flags);
 	}
 	return 0;
@@ -2761,20 +2672,10 @@ static int set_timeout(struct cyclades_port *info, unsigned long value)
 static int get_timeout(struct cyclades_port *info,
 						unsigned long __user *value)
 {
-	struct cyclades_card *card;
-	void __iomem *base_addr;
-	int channel, chip, index;
-	unsigned long tmp;
+	struct cyclades_card *card = info->card;
 
-	card = info->card;
-	channel = info->line - card->first_line;
 	if (!cy_is_Z(card)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		index = card->bus_index;
-		base_addr = card->base_addr + (cy_chip_offset[chip] << index);
-
-		tmp = readb(base_addr + (CyRTPR << index));
+		u8 tmp = cyy_readb(info, CyRTPR);
 		return put_user(tmp, value);
 	}
 	return 0;
@@ -3101,8 +3002,7 @@ static void cy_stop(struct tty_struct *tty)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned long flags;
 
 #ifdef CY_DEBUG_OTHER
@@ -3115,16 +3015,9 @@ static void cy_stop(struct tty_struct *tty)
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
 	if (!cy_is_Z(cinfo)) {
-		index = cinfo->bus_index;
-		chip = channel >> 2;
-		channel &= 0x03;
-		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&cinfo->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index),
-			(u_char)(channel & 0x0003)); /* index channel */
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) & ~CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) & ~CyTxRdy);
 		spin_unlock_irqrestore(&cinfo->card_lock, flags);
 	}
 }				/* cy_stop */
@@ -3133,8 +3026,7 @@ static void cy_start(struct tty_struct *tty)
 {
 	struct cyclades_card *cinfo;
 	struct cyclades_port *info = tty->driver_data;
-	void __iomem *base_addr;
-	int chip, channel, index;
+	int channel;
 	unsigned long flags;
 
 #ifdef CY_DEBUG_OTHER
@@ -3146,17 +3038,10 @@ static void cy_start(struct tty_struct *tty)
 
 	cinfo = info->card;
 	channel = info->line - cinfo->first_line;
-	index = cinfo->bus_index;
 	if (!cy_is_Z(cinfo)) {
-		chip = channel >> 2;
-		channel &= 0x03;
-		base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
-
 		spin_lock_irqsave(&cinfo->card_lock, flags);
-		cy_writeb(base_addr + (CyCAR << index),
-			(u_char) (channel & 0x0003));	/* index channel */
-		cy_writeb(base_addr + (CySRER << index),
-			  readb(base_addr + (CySRER << index)) | CyTxRdy);
+		cyy_writeb(info, CyCAR, channel & 0x03);
+		cyy_writeb(info, CySRER, cyy_readb(info, CySRER) | CyTxRdy);
 		spin_unlock_irqrestore(&cinfo->card_lock, flags);
 	}
 }				/* cy_start */
@@ -3185,18 +3070,13 @@ static int cyy_carrier_raised(struct tty_port *port)
 	struct cyclades_port *info = container_of(port, struct cyclades_port,
 			port);
 	struct cyclades_card *cinfo = info->card;
-	void __iomem *base = cinfo->base_addr;
 	unsigned long flags;
 	int channel = info->line - cinfo->first_line;
-	int chip = channel >> 2, index = cinfo->bus_index;
 	u32 cd;
 
-	channel &= 0x03;
-	base += cy_chip_offset[chip] << index;
-
 	spin_lock_irqsave(&cinfo->card_lock, flags);
-	cy_writeb(base + (CyCAR << index), (u8)channel);
-	cd = readb(base + (CyMSVR1 << index)) & CyDCD;
+	cyy_writeb(info, CyCAR, channel & 0x03);
+	cd = cyy_readb(info, CyMSVR1) & CyDCD;
 	spin_unlock_irqrestore(&cinfo->card_lock, flags);
 
 	return cd;
@@ -3326,9 +3206,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 			info->cor3 = 0x08;	/* _very_ small rcv threshold */
 
 			chip_number = channel / CyPORTS_PER_CHIP;
-			info->chip_rev = readb(cinfo->base_addr +
-				      (cy_chip_offset[chip_number] << index) +
-				      (CyGFRCR << index));
+			info->u.cyy.base_addr = cinfo->base_addr +
+				(cy_chip_offset[chip_number] << index);
+			info->chip_rev = cyy_readb(info, CyGFRCR);
 
 			if (info->chip_rev >= CD1400_REV_J) {
 				/* It is a CD1400 rev. J or later */
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index 1eb87a6a2f6b..bbebef7713b3 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -544,7 +544,7 @@ struct cyclades_port {
 	struct cyclades_card	*card;
 	union {
 		struct {
-			int filler;
+			void __iomem *base_addr;
 		} cyy;
 		struct {
 			struct CH_CTRL __iomem	*ch_ctrl;
-- 
cgit v1.2.3


From f8a7c1a976a6672204c7f4f0f694f33715dfa617 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:17 -0700
Subject: kfifo: Use "const" definitions

Currently kfifo cannot be used by parts of the kernel that use "const"
properly as kfifo itself does not use const for passed data blocks which
are indeed const.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 29f62e1733ff..ad6bdf5a5970 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -38,7 +38,7 @@ extern struct kfifo *kfifo_alloc(unsigned int size, gfp_t gfp_mask,
 				 spinlock_t *lock);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int __kfifo_put(struct kfifo *fifo,
-				unsigned char *buffer, unsigned int len);
+				const unsigned char *buffer, unsigned int len);
 extern unsigned int __kfifo_get(struct kfifo *fifo,
 				unsigned char *buffer, unsigned int len);
 
@@ -77,7 +77,7 @@ static inline void kfifo_reset(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_put(struct kfifo *fifo,
-				     unsigned char *buffer, unsigned int len)
+				const unsigned char *buffer, unsigned int len)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 26539e3228e5..3765ff3c1bbe 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -117,7 +117,7 @@ EXPORT_SYMBOL(kfifo_free);
  * writer, you don't need extra locking to use these functions.
  */
 unsigned int __kfifo_put(struct kfifo *fifo,
-			 unsigned char *buffer, unsigned int len)
+			const unsigned char *buffer, unsigned int len)
 {
 	unsigned int l;
 
-- 
cgit v1.2.3


From 1c2f04937b3e397a5695953c6b82aa4c77d21eb8 Mon Sep 17 00:00:00 2001
From: Vikram Pandita <vikram.pandita@ti.com>
Date: Sat, 19 Sep 2009 13:13:19 -0700
Subject: serial: 8250: add IRQ trigger support

There is currently no provision for passing IRQ trigger flags for
serial IRQs with triggering requirements (such as GPIO IRQs)

This patch adds irqflags to plat_serial8250_port that can be passed
from board file to reqest_irq() of 8250 driver

Changes are backward compatible with boards passing UPF_SHARE_IRQ flag

Tested on Zoom2 board that has IRQF_TRIGGER_RISING requirement for 8250 irq

[Moved new flag to end to fix bugs in the original with the old_serial array
	-- Alan]

Signed-off-by: Vikram Pandita <vikram.pandita@ti.com>
Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/8250.c       | 14 +++++++++-----
 drivers/serial/8250.h       |  1 +
 include/linux/serial_8250.h |  1 +
 include/linux/serial_core.h |  1 +
 4 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index fb867a9f55e9..83168a6c3c05 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1677,7 +1677,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up)
 		INIT_LIST_HEAD(&up->list);
 		i->head = &up->list;
 		spin_unlock_irq(&i->lock);
-
+		irq_flags |= up->port.irqflags;
 		ret = request_irq(up->port.irq, serial8250_interrupt,
 				  irq_flags, "serial", i);
 		if (ret < 0)
@@ -2026,7 +2026,7 @@ static int serial8250_startup(struct uart_port *port)
 		 * allow register changes to become visible.
 		 */
 		spin_lock_irqsave(&up->port.lock, flags);
-		if (up->port.flags & UPF_SHARE_IRQ)
+		if (up->port.irqflags & IRQF_SHARED)
 			disable_irq_nosync(up->port.irq);
 
 		wait_for_xmitr(up, UART_LSR_THRE);
@@ -2039,7 +2039,7 @@ static int serial8250_startup(struct uart_port *port)
 		iir = serial_in(up, UART_IIR);
 		serial_out(up, UART_IER, 0);
 
-		if (up->port.flags & UPF_SHARE_IRQ)
+		if (up->port.irqflags & IRQF_SHARED)
 			enable_irq(up->port.irq);
 		spin_unlock_irqrestore(&up->port.lock, flags);
 
@@ -2671,6 +2671,7 @@ static void __init serial8250_isa_init_ports(void)
 	     i++, up++) {
 		up->port.iobase   = old_serial_port[i].port;
 		up->port.irq      = irq_canonicalize(old_serial_port[i].irq);
+		up->port.irqflags = old_serial_port[i].irqflags;
 		up->port.uartclk  = old_serial_port[i].baud_base * 16;
 		up->port.flags    = old_serial_port[i].flags;
 		up->port.hub6     = old_serial_port[i].hub6;
@@ -2679,7 +2680,7 @@ static void __init serial8250_isa_init_ports(void)
 		up->port.regshift = old_serial_port[i].iomem_reg_shift;
 		set_io_from_upio(&up->port);
 		if (share_irqs)
-			up->port.flags |= UPF_SHARE_IRQ;
+			up->port.irqflags |= IRQF_SHARED;
 	}
 }
 
@@ -2869,6 +2870,7 @@ int __init early_serial_setup(struct uart_port *port)
 	p->iobase       = port->iobase;
 	p->membase      = port->membase;
 	p->irq          = port->irq;
+	p->irqflags     = port->irqflags;
 	p->uartclk      = port->uartclk;
 	p->fifosize     = port->fifosize;
 	p->regshift     = port->regshift;
@@ -2942,6 +2944,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.iobase		= p->iobase;
 		port.membase		= p->membase;
 		port.irq		= p->irq;
+		port.irqflags		= p->irqflags;
 		port.uartclk		= p->uartclk;
 		port.regshift		= p->regshift;
 		port.iotype		= p->iotype;
@@ -2954,7 +2957,7 @@ static int __devinit serial8250_probe(struct platform_device *dev)
 		port.serial_out		= p->serial_out;
 		port.dev		= &dev->dev;
 		if (share_irqs)
-			port.flags |= UPF_SHARE_IRQ;
+			port.irqflags |= IRQF_SHARED;
 		ret = serial8250_register_port(&port);
 		if (ret < 0) {
 			dev_err(&dev->dev, "unable to register port at index %d "
@@ -3096,6 +3099,7 @@ int serial8250_register_port(struct uart_port *port)
 		uart->port.iobase       = port->iobase;
 		uart->port.membase      = port->membase;
 		uart->port.irq          = port->irq;
+		uart->port.irqflags     = port->irqflags;
 		uart->port.uartclk      = port->uartclk;
 		uart->port.fifosize     = port->fifosize;
 		uart->port.regshift     = port->regshift;
diff --git a/drivers/serial/8250.h b/drivers/serial/8250.h
index 520260326f3d..6e19ea3e48d5 100644
--- a/drivers/serial/8250.h
+++ b/drivers/serial/8250.h
@@ -25,6 +25,7 @@ struct old_serial_port {
 	unsigned char io_type;
 	unsigned char *iomem_base;
 	unsigned short iomem_reg_shift;
+	unsigned long irqflags;
 };
 
 /*
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index d4d2a78ad43e..fb46aba11fb5 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -22,6 +22,7 @@ struct plat_serial8250_port {
 	void __iomem	*membase;	/* ioremap cookie or NULL */
 	resource_size_t	mapbase;	/* resource base */
 	unsigned int	irq;		/* interrupt number */
+	unsigned long	irqflags;	/* request_irq flags */
 	unsigned int	uartclk;	/* UART clock rate */
 	void            *private_data;
 	unsigned char	regshift;	/* register shift */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 23d2fb051f97..3cd255f0b211 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -265,6 +265,7 @@ struct uart_port {
 	unsigned int		(*serial_in)(struct uart_port *, int);
 	void			(*serial_out)(struct uart_port *, int, int);
 	unsigned int		irq;			/* irq number */
+	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
 	unsigned int		fifosize;		/* tx fifo size */
 	unsigned char		x_char;			/* xon/xoff char */
-- 
cgit v1.2.3


From 7ca0ff9ab3218ec443a7a9ad247e4650373ed41e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:20 -0700
Subject: tty: Add a full port_close function

Now we are extracting out methods for shutdown and the like we can add a
proper tty_port_close method that knows all the innards of the tty closing
process and hides the lot from the caller.

At some point in the future this will be paired with a similar open()
helper and the drivers can stick to hardware management.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Cc: stable <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_port.c | 29 +++++++++++++++++++++++++++--
 include/linux/tty.h     |  8 +++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 9769b1149f76..549bd0fa8bb6 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -96,6 +96,14 @@ void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_port_tty_set);
 
+static void tty_port_shutdown(struct tty_port *port)
+{
+	if (port->ops->shutdown &&
+		test_and_clear_bit(ASYNC_INITIALIZED, &port->flags))
+			port->ops->shutdown(port);
+
+}
+
 /**
  *	tty_port_hangup		-	hangup helper
  *	@port: tty port
@@ -116,6 +124,7 @@ void tty_port_hangup(struct tty_port *port)
 	port->tty = NULL;
 	spin_unlock_irqrestore(&port->lock, flags);
 	wake_up_interruptible(&port->open_wait);
+	tty_port_shutdown(port);
 }
 EXPORT_SYMBOL(tty_port_hangup);
 
@@ -296,15 +305,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 
 	if (port->count) {
 		spin_unlock_irqrestore(&port->lock, flags);
+		if (port->ops->drop)
+			port->ops->drop(port);
 		return 0;
 	}
-	port->flags |= ASYNC_CLOSING;
+	set_bit(ASYNC_CLOSING, &port->flags);
 	tty->closing = 1;
 	spin_unlock_irqrestore(&port->lock, flags);
 	/* Don't block on a stalled port, just pull the chain */
 	if (tty->flow_stopped)
 		tty_driver_flush_buffer(tty);
-	if (port->flags & ASYNC_INITIALIZED &&
+	if (test_bit(ASYNCB_INITIALIZED, &port->flags) &&
 			port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, port->closing_wait);
 	if (port->drain_delay) {
@@ -318,6 +329,9 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
 			timeout = 2 * HZ;
 		schedule_timeout_interruptible(timeout);
 	}
+	/* Don't call port->drop for the last reference. Callers will want
+	   to drop the last active reference in ->shutdown() or the tty
+	   shutdown path */
 	return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
@@ -348,3 +362,14 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 EXPORT_SYMBOL(tty_port_close_end);
+
+void tty_port_close(struct tty_port *port, struct tty_struct *tty,
+							struct file *filp)
+{
+	if (tty_port_close_start(port, tty, filp) == 0)
+		return;
+	tty_port_shutdown(port);
+	tty_port_close_end(port, tty);
+	tty_port_tty_set(port, NULL);
+}
+EXPORT_SYMBOL(tty_port_close);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a916a318004e..ecb3d1ba3017 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -187,7 +187,12 @@ struct tty_port;
 struct tty_port_operations {
 	/* Return 1 if the carrier is raised */
 	int (*carrier_raised)(struct tty_port *port);
+	/* Control the DTR line */
 	void (*dtr_rts)(struct tty_port *port, int raise);
+	/* Called when the last close completes or a hangup finishes
+	   IFF the port was initialized. Do not use to free resources */
+	void (*shutdown)(struct tty_port *port);
+	void (*drop)(struct tty_port *port);
 };
 	
 struct tty_port {
@@ -459,7 +464,8 @@ extern int tty_port_block_til_ready(struct tty_port *port,
 extern int tty_port_close_start(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
-
+extern void tty_port_close(struct tty_port *port,
+				struct tty_struct *tty, struct file *filp);
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
-- 
cgit v1.2.3


From 8b92e87d39bfd046e7581e1fe0f40eac40f88608 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:24 -0700
Subject: vt: add an event interface

This is needed and requested in various forms for ConsoleKit, screenblank
handling and the like so do the job with a single interface. Also build the
interface so that unlike VT_WAITACTIVE and friends it won't miss events.

FIXME: Should this be a waitactive ioctl or a new device file you can poll
and read events from. We need the code anyway to fix up the existing broken
wait for console switch logic but the ConsoleKit people would prefer the
new device to the ioctl we have here

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt.c       |   4 +-
 drivers/char/vt_ioctl.c | 185 +++++++++++++++++++++++++++++++++++-------------
 include/linux/vt.h      |  14 ++++
 include/linux/vt_kern.h |   3 +-
 4 files changed, 154 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index e47a4c88976b..33214d92ca4c 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -252,7 +252,6 @@ static void notify_update(struct vc_data *vc)
 	struct vt_notifier_param param = { .vc = vc };
 	atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param);
 }
-
 /*
  *	Low-Level Functions
  */
@@ -935,6 +934,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 
 	if (CON_IS_VISIBLE(vc))
 		update_screen(vc);
+	vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num);
 	return err;
 }
 
@@ -3637,6 +3637,7 @@ void do_blank_screen(int entering_gfx)
 		blank_state = blank_vesa_wait;
 		mod_timer(&console_timer, jiffies + vesa_off_interval);
 	}
+	vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num);
 }
 EXPORT_SYMBOL(do_blank_screen);
 
@@ -3681,6 +3682,7 @@ void do_unblank_screen(int leaving_gfx)
 		console_blank_hook(0);
 	set_palette(vc);
 	set_cursor(vc);
+	vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num);
 }
 EXPORT_SYMBOL(do_unblank_screen);
 
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 95189f288f8c..35ad94e65d0d 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -61,6 +61,133 @@ extern struct tty_driver *console_driver;
 
 static void complete_change_console(struct vc_data *vc);
 
+/*
+ *	User space VT_EVENT handlers
+ */
+
+struct vt_event_wait {
+	struct list_head list;
+	struct vt_event event;
+	int done;
+};
+
+static LIST_HEAD(vt_events);
+static DEFINE_SPINLOCK(vt_event_lock);
+static DECLARE_WAIT_QUEUE_HEAD(vt_event_waitqueue);
+
+/**
+ *	vt_event_post
+ *	@event: the event that occurred
+ *	@old: old console
+ *	@new: new console
+ *
+ *	Post an VT event to interested VT handlers
+ */
+
+void vt_event_post(unsigned int event, unsigned int old, unsigned int new)
+{
+	struct list_head *pos, *head;
+	unsigned long flags;
+	int wake = 0;
+
+	spin_lock_irqsave(&vt_event_lock, flags);
+	head = &vt_events;
+
+	list_for_each(pos, head) {
+		struct vt_event_wait *ve = list_entry(pos,
+						struct vt_event_wait, list);
+		if (!(ve->event.event & event))
+			continue;
+		ve->event.event = event;
+		/* kernel view is consoles 0..n-1, user space view is
+		   console 1..n with 0 meaning current, so we must bias */
+		ve->event.old = old + 1;
+		ve->event.new = new + 1;
+		wake = 1;
+		ve->done = 1;
+	}
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+	if (wake)
+		wake_up_interruptible(&vt_event_waitqueue);
+}
+
+/**
+ *	vt_event_wait		-	wait for an event
+ *	@vw: our event
+ *
+ *	Waits for an event to occur which completes our vt_event_wait
+ *	structure. On return the structure has wv->done set to 1 for success
+ *	or 0 if some event such as a signal ended the wait.
+ */
+
+static void vt_event_wait(struct vt_event_wait *vw)
+{
+	unsigned long flags;
+	/* Prepare the event */
+	INIT_LIST_HEAD(&vw->list);
+	vw->done = 0;
+	/* Queue our event */
+	spin_lock_irqsave(&vt_event_lock, flags);
+	list_add(&vw->list, &vt_events);
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+	/* Wait for it to pass */
+	wait_event_interruptible(vt_event_waitqueue, vw->done);
+	/* Dequeue it */
+	spin_lock_irqsave(&vt_event_lock, flags);
+	list_del(&vw->list);
+	spin_unlock_irqrestore(&vt_event_lock, flags);
+}
+
+/**
+ *	vt_event_wait_ioctl	-	event ioctl handler
+ *	@arg: argument to ioctl
+ *
+ *	Implement the VT_WAITEVENT ioctl using the VT event interface
+ */
+
+static int vt_event_wait_ioctl(struct vt_event __user *event)
+{
+	struct vt_event_wait vw;
+
+	if (copy_from_user(&vw.event, event, sizeof(struct vt_event)))
+		return -EFAULT;
+	/* Highest supported event for now */
+	if (vw.event.event & ~VT_MAX_EVENT)
+		return -EINVAL;
+
+	vt_event_wait(&vw);
+	/* If it occurred report it */
+	if (vw.done) {
+		if (copy_to_user(event, &vw.event, sizeof(struct vt_event)))
+			return -EFAULT;
+		return 0;
+	}
+	return -EINTR;
+}
+
+/**
+ *	vt_waitactive	-	active console wait
+ *	@event: event code
+ *	@n: new console
+ *
+ *	Helper for event waits. Used to implement the legacy
+ *	event waiting ioctls in terms of events
+ */
+
+int vt_waitactive(int n)
+{
+	struct vt_event_wait vw;
+	do {
+		if (n == fg_console + 1)
+			break;
+		vw.event.event = VT_EVENT_SWITCH;
+		vt_event_wait(&vw);
+		if (vw.done == 0)
+			return -EINTR;
+	} while (vw.event.new != n);
+	return 0;
+}
+
 /*
  * these are the valid i/o ports we're allowed to change. they map all the
  * video ports
@@ -360,6 +487,8 @@ do_unimap_ioctl(int cmd, struct unimapdesc __user *user_ud, int perm, struct vc_
 	return 0;
 }
 
+
+
 /*
  * We handle the console-specific ioctl's here.  We allow the
  * capability to modify any console, not just the fg_console. 
@@ -851,7 +980,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		if (arg == 0 || arg > MAX_NR_CONSOLES)
 			ret = -ENXIO;
 		else
-			ret = vt_waitactive(arg - 1);
+			ret = vt_waitactive(arg);
 		break;
 
 	/*
@@ -1159,6 +1288,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		ret = put_user(vc->vc_hi_font_mask,
 					(unsigned short __user *)arg);
 		break;
+	case VT_WAITEVENT:
+		ret = vt_event_wait_ioctl((struct vt_event __user *)arg);
+		break;
 	default:
 		ret = -ENOIOCTLCMD;
 	}
@@ -1170,54 +1302,6 @@ eperm:
 	goto out;
 }
 
-/*
- * Sometimes we want to wait until a particular VT has been activated. We
- * do it in a very simple manner. Everybody waits on a single queue and
- * get woken up at once. Those that are satisfied go on with their business,
- * while those not ready go back to sleep. Seems overkill to add a wait
- * to each vt just for this - usually this does nothing!
- */
-static DECLARE_WAIT_QUEUE_HEAD(vt_activate_queue);
-
-/*
- * Sleeps until a vt is activated, or the task is interrupted. Returns
- * 0 if activation, -EINTR if interrupted by a signal handler.
- */
-int vt_waitactive(int vt)
-{
-	int retval;
-	DECLARE_WAITQUEUE(wait, current);
-
-	add_wait_queue(&vt_activate_queue, &wait);
-	for (;;) {
-		retval = 0;
-
-		/*
-		 * Synchronize with redraw_screen(). By acquiring the console
-		 * semaphore we make sure that the console switch is completed
-		 * before we return. If we didn't wait for the semaphore, we
-		 * could return at a point where fg_console has already been
-		 * updated, but the console switch hasn't been completed.
-		 */
-		acquire_console_sem();
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (vt == fg_console) {
-			release_console_sem();
-			break;
-		}
-		release_console_sem();
-		retval = -ERESTARTNOHAND;
-		if (signal_pending(current))
-			break;
-		schedule();
-	}
-	remove_wait_queue(&vt_activate_queue, &wait);
-	__set_current_state(TASK_RUNNING);
-	return retval;
-}
-
-#define vt_wake_waitactive() wake_up(&vt_activate_queue)
-
 void reset_vc(struct vc_data *vc)
 {
 	vc->vc_mode = KD_TEXT;
@@ -1262,6 +1346,7 @@ void vc_SAK(struct work_struct *work)
 static void complete_change_console(struct vc_data *vc)
 {
 	unsigned char old_vc_mode;
+	int old = fg_console;
 
 	last_console = fg_console;
 
@@ -1325,7 +1410,7 @@ static void complete_change_console(struct vc_data *vc)
 	/*
 	 * Wake anyone waiting for their VT to activate
 	 */
-	vt_wake_waitactive();
+	vt_event_post(VT_EVENT_SWITCH, old, vc->vc_num);
 	return;
 }
 
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 02c1c0288770..89c03a11e193 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -74,4 +74,18 @@ struct vt_consize {
 #define VT_UNLOCKSWITCH 0x560C  /* allow vt switching */
 #define VT_GETHIFONTMASK 0x560D  /* return hi font mask */
 
+struct vt_event {
+	unsigned int event;
+#define VT_EVENT_SWITCH		0x0001	/* Console switch */
+#define VT_EVENT_BLANK		0x0002	/* Screen blank */
+#define VT_EVENT_UNBLANK	0x0004	/* Screen unblank */
+#define VT_EVENT_RESIZE		0x0008	/* Resize display */
+#define VT_MAX_EVENT		0x000F
+	unsigned int old;		/* Old console */
+	unsigned int new;		/* New console (if changing) */
+	unsigned int pad[4];		/* Padding for expansion */
+};
+
+#define VT_WAITEVENT	0x560E	/* Wait for an event */
+
 #endif /* _LINUX_VT_H */
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 2f1113467f70..f8c797d57c8b 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -91,7 +91,8 @@ int con_copy_unimap(struct vc_data *dst_vc, struct vc_data *src_vc);
 #endif
 
 /* vt.c */
-int vt_waitactive(int vt);
+void vt_event_post(unsigned int event, unsigned int old, unsigned int new);
+int vt_waitactive(int n);
 void change_console(struct vc_data *new_vc);
 void reset_vc(struct vc_data *vc);
 extern int unbind_con_driver(const struct consw *csw, int first, int last,
-- 
cgit v1.2.3


From 8d233558cd99a888571bb5a88a74970879e0aba4 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:25 -0700
Subject: vt: remove power stuff from kernel/power

In the past someone gratuitiously borrowed chunks of kernel internal vt
code and dumped them in kernel/power. They have all sorts of deep relations
with the vt code so put them in the vt tree instead

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 56 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/vt_kern.h |  1 +
 kernel/power/console.c  | 63 ++++---------------------------------------------
 3 files changed, 62 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 35ad94e65d0d..d29fbd44bdca 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -16,6 +16,7 @@
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/kd.h>
 #include <linux/vt.h>
 #include <linux/string.h>
@@ -1483,3 +1484,58 @@ void change_console(struct vc_data *new_vc)
 
 	complete_change_console(new_vc);
 }
+
+/* Perform a kernel triggered VT switch for suspend/resume */
+
+static int disable_vt_switch;
+
+int vt_move_to_console(unsigned int vt, int alloc)
+{
+	int prev;
+
+	acquire_console_sem();
+	/* Graphics mode - up to X */
+	if (disable_vt_switch) {
+		release_console_sem();
+		return 0;
+	}
+	prev = fg_console;
+
+	if (alloc && vc_allocate(vt)) {
+		/* we can't have a free VC for now. Too bad,
+		 * we don't want to mess the screen for now. */
+		release_console_sem();
+		return -ENOSPC;
+	}
+
+	if (set_console(vt)) {
+		/*
+		 * We're unable to switch to the SUSPEND_CONSOLE.
+		 * Let the calling function know so it can decide
+		 * what to do.
+		 */
+		release_console_sem();
+		return -EIO;
+	}
+	release_console_sem();
+	if (vt_waitactive(vt)) {
+		pr_debug("Suspend: Can't switch VCs.");
+		return -EINTR;
+	}
+	return prev;
+}
+
+/*
+ * Normally during a suspend, we allocate a new console and switch to it.
+ * When we resume, we switch back to the original console.  This switch
+ * can be slow, so on systems where the framebuffer can handle restoration
+ * of video registers anyways, there's little point in doing the console
+ * switch.  This function allows you to disable it by passing it '0'.
+ */
+void pm_set_vt_switch(int do_switch)
+{
+	acquire_console_sem();
+	disable_vt_switch = !do_switch;
+	release_console_sem();
+}
+EXPORT_SYMBOL(pm_set_vt_switch);
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index f8c797d57c8b..5b53efe41b5c 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -117,4 +117,5 @@ struct vt_spawn_console {
 };
 extern struct vt_spawn_console vt_spawn_con;
 
+extern int vt_move_to_console(unsigned int vt, int alloc);
 #endif /* _VT_KERN_H */
diff --git a/kernel/power/console.c b/kernel/power/console.c
index a3961b205de7..5187136fe1de 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -14,56 +14,13 @@
 #define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
 
 static int orig_fgconsole, orig_kmsg;
-static int disable_vt_switch;
-
-/*
- * Normally during a suspend, we allocate a new console and switch to it.
- * When we resume, we switch back to the original console.  This switch
- * can be slow, so on systems where the framebuffer can handle restoration
- * of video registers anyways, there's little point in doing the console
- * switch.  This function allows you to disable it by passing it '0'.
- */
-void pm_set_vt_switch(int do_switch)
-{
-	acquire_console_sem();
-	disable_vt_switch = !do_switch;
-	release_console_sem();
-}
-EXPORT_SYMBOL(pm_set_vt_switch);
 
 int pm_prepare_console(void)
 {
-	acquire_console_sem();
-
-	if (disable_vt_switch) {
-		release_console_sem();
-		return 0;
-	}
-
-	orig_fgconsole = fg_console;
-
-	if (vc_allocate(SUSPEND_CONSOLE)) {
-	  /* we can't have a free VC for now. Too bad,
-	   * we don't want to mess the screen for now. */
-		release_console_sem();
+	orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1);
+	if (orig_fgconsole < 0)
 		return 1;
-	}
 
-	if (set_console(SUSPEND_CONSOLE)) {
-		/*
-		 * We're unable to switch to the SUSPEND_CONSOLE.
-		 * Let the calling function know so it can decide
-		 * what to do.
-		 */
-		release_console_sem();
-		return 1;
-	}
-	release_console_sem();
-
-	if (vt_waitactive(SUSPEND_CONSOLE)) {
-		pr_debug("Suspend: Can't switch VCs.");
-		return 1;
-	}
 	orig_kmsg = kmsg_redirect;
 	kmsg_redirect = SUSPEND_CONSOLE;
 	return 0;
@@ -71,19 +28,9 @@ int pm_prepare_console(void)
 
 void pm_restore_console(void)
 {
-	acquire_console_sem();
-	if (disable_vt_switch) {
-		release_console_sem();
-		return;
-	}
-	set_console(orig_fgconsole);
-	release_console_sem();
-
-	if (vt_waitactive(orig_fgconsole)) {
-		pr_debug("Resume: Can't switch VCs.");
-		return;
+	if (orig_fgconsole >= 0) {
+		vt_move_to_console(orig_fgconsole, 0);
+		kmsg_redirect = orig_kmsg;
 	}
-
-	kmsg_redirect = orig_kmsg;
 }
 #endif
-- 
cgit v1.2.3


From a5eb56242d1e2d82938a066219ac1cdf0d68adc8 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:25 -0700
Subject: vt: move kernel stuff out of vt.h

We have vt_kern.h for this


Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/vt.h      | 11 -----------
 include/linux/vt_kern.h | 12 ++++++++++++
 2 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/vt.h b/include/linux/vt.h
index 89c03a11e193..831daf64b90c 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -1,17 +1,6 @@
 #ifndef _LINUX_VT_H
 #define _LINUX_VT_H
 
-#ifdef __KERNEL__
-struct notifier_block;
-
-struct vt_notifier_param {
-	struct vc_data *vc;	/* VC on which the update happened */
-	unsigned int c;		/* Printed char */
-};
-
-extern int register_vt_notifier(struct notifier_block *nb);
-extern int unregister_vt_notifier(struct notifier_block *nb);
-#endif
 
 /*
  * These constants are also useful for user-level apps (e.g., VC
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 5b53efe41b5c..c0c4e1103a73 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -13,6 +13,7 @@
 #include <linux/console_struct.h>
 #include <linux/mm.h>
 #include <linux/consolemap.h>
+#include <linux/notifier.h>
 
 /*
  * Presently, a lot of graphics programs do not restore the contents of
@@ -118,4 +119,15 @@ struct vt_spawn_console {
 extern struct vt_spawn_console vt_spawn_con;
 
 extern int vt_move_to_console(unsigned int vt, int alloc);
+
+/* Interfaces for VC notification of character events (for accessibility etc) */
+
+struct vt_notifier_param {
+	struct vc_data *vc;	/* VC on which the update happened */
+	unsigned int c;		/* Printed char */
+};
+
+extern int register_vt_notifier(struct notifier_block *nb);
+extern int unregister_vt_notifier(struct notifier_block *nb);
+
 #endif /* _VT_KERN_H */
-- 
cgit v1.2.3


From d3b5cffcf84a8bdc7073dce4745d67c72629af85 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:26 -0700
Subject: vt: add an activate and lock

X and other graphical interfaces need to be able to flip to a console
and lock it into graphics mode without races.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt_ioctl.c | 38 +++++++++++++++++++++++++++++++++++++-
 include/linux/vt.h      |  7 +++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index d29fbd44bdca..0fceb8f4d6f2 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -972,6 +972,41 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		}
 		break;
 
+	case VT_SETACTIVATE:
+	{
+		struct vt_setactivate vsa;
+
+		if (!perm)
+			goto eperm;
+
+		if (copy_from_user(&vsa, (struct vt_setactivate __user *)arg,
+						sizeof(struct vt_setactivate)))
+			return -EFAULT;
+		if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES)
+			ret = -ENXIO;
+		else {
+			vsa.console--;
+			acquire_console_sem();
+			ret = vc_allocate(vsa.console);
+			if (ret == 0) {
+				struct vc_data *nvc;
+				/* This is safe providing we don't drop the
+				   console sem between vc_allocate and
+				   finishing referencing nvc */
+				nvc = vc_cons[vsa.console].d;
+				nvc->vt_mode = vsa.mode;
+				nvc->vt_mode.frsig = 0;
+				put_pid(nvc->vt_pid);
+				nvc->vt_pid = get_pid(task_pid(current));
+			}
+			release_console_sem();
+			if (ret)
+				break;
+			/* Commence switch and lock */
+			set_console(arg);
+		}
+	}
+
 	/*
 	 * wait until the specified VT has been activated
 	 */
@@ -1342,7 +1377,8 @@ void vc_SAK(struct work_struct *work)
 }
 
 /*
- * Performs the back end of a vt switch
+ * Performs the back end of a vt switch. Called under the console
+ * semaphore.
  */
 static void complete_change_console(struct vc_data *vc)
 {
diff --git a/include/linux/vt.h b/include/linux/vt.h
index 831daf64b90c..7afca0d72139 100644
--- a/include/linux/vt.h
+++ b/include/linux/vt.h
@@ -77,4 +77,11 @@ struct vt_event {
 
 #define VT_WAITEVENT	0x560E	/* Wait for an event */
 
+struct vt_setactivate {
+	unsigned int console;
+	struct vt_mode mode;
+};
+
+#define VT_SETACTIVATE	0x560F	/* Activate and set the mode of a console */
+
 #endif /* _LINUX_VT_H */
-- 
cgit v1.2.3


From a509a7e478e4766114d69f12d19d644ac63e9765 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:26 -0700
Subject: tty: USB does not need the filp argument in the drivers

And indeed none of them use it. Clean this up as it will make moving to a
standard open method rather easier.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/ark3116.c          |  5 ++---
 drivers/usb/serial/belkin_sa.c        |  4 ++--
 drivers/usb/serial/ch341.c            |  5 ++---
 drivers/usb/serial/console.c          |  4 ++--
 drivers/usb/serial/cp210x.c           |  6 ++----
 drivers/usb/serial/cyberjack.c        |  4 ++--
 drivers/usb/serial/cypress_m8.c       |  6 ++----
 drivers/usb/serial/digi_acceleport.c  |  6 ++----
 drivers/usb/serial/empeg.c            |  8 +++-----
 drivers/usb/serial/ftdi_sio.c         |  6 ++----
 drivers/usb/serial/garmin_gps.c       |  3 +--
 drivers/usb/serial/generic.c          |  3 +--
 drivers/usb/serial/io_edgeport.c      |  6 ++----
 drivers/usb/serial/io_ti.c            |  3 +--
 drivers/usb/serial/ipaq.c             |  9 ++-------
 drivers/usb/serial/ipw.c              |  3 +--
 drivers/usb/serial/ir-usb.c           |  6 ++----
 drivers/usb/serial/iuu_phoenix.c      |  5 ++---
 drivers/usb/serial/keyspan.c          |  3 +--
 drivers/usb/serial/keyspan.h          |  3 +--
 drivers/usb/serial/keyspan_pda.c      |  2 +-
 drivers/usb/serial/kl5kusb105.c       | 10 ++--------
 drivers/usb/serial/kobil_sct.c        |  6 ++----
 drivers/usb/serial/mct_u232.c         |  6 ++----
 drivers/usb/serial/mos7720.c          |  3 +--
 drivers/usb/serial/mos7840.c          |  3 +--
 drivers/usb/serial/navman.c           |  3 +--
 drivers/usb/serial/omninet.c          |  6 ++----
 drivers/usb/serial/opticon.c          |  3 +--
 drivers/usb/serial/option.c           |  6 ++----
 drivers/usb/serial/oti6858.c          |  6 ++----
 drivers/usb/serial/pl2303.c           |  5 +----
 drivers/usb/serial/sierra.c           |  3 +--
 drivers/usb/serial/spcp8x5.c          |  5 +----
 drivers/usb/serial/symbolserial.c     |  3 +--
 drivers/usb/serial/ti_usb_3410_5052.c |  6 ++----
 drivers/usb/serial/usb-serial.c       |  2 +-
 drivers/usb/serial/usb_debug.c        |  5 ++---
 drivers/usb/serial/visor.c            |  6 ++----
 drivers/usb/serial/whiteheat.c        |  5 ++---
 include/linux/usb/serial.h            |  7 +++----
 41 files changed, 68 insertions(+), 131 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index aec61880f36c..7ddde4ddfb4b 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -318,8 +318,7 @@ static void ark3116_set_termios(struct tty_struct *tty,
 	return;
 }
 
-static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp)
+static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -334,7 +333,7 @@ static int ark3116_open(struct tty_struct *tty, struct usb_serial_port *port,
 		return -ENOMEM;
 	}
 
-	result = usb_serial_generic_open(tty, port, filp);
+	result = usb_serial_generic_open(tty, port);
 	if (result)
 		goto err_out;
 
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 7033b031b443..a0467bc61627 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -92,7 +92,7 @@ static int debug;
 static int  belkin_sa_startup(struct usb_serial *serial);
 static void belkin_sa_release(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
@@ -213,7 +213,7 @@ static void belkin_sa_release(struct usb_serial *serial)
 
 
 static int  belkin_sa_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	int retval = 0;
 
diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c
index 2830766f5b39..8c894a7d5dcf 100644
--- a/drivers/usb/serial/ch341.c
+++ b/drivers/usb/serial/ch341.c
@@ -300,8 +300,7 @@ static void ch341_close(struct usb_serial_port *port)
 
 
 /* open this device, set default parameters */
-static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct ch341_private *priv = usb_get_serial_port_data(serial->port[0]);
@@ -333,7 +332,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
 		return -EPROTO;
 	}
 
-	r = usb_serial_generic_open(tty, port, filp);
+	r = usb_serial_generic_open(tty, port);
 
 out:	return r;
 }
diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index 0e4f2e41ace5..be086e4c76b6 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -150,9 +150,9 @@ static int usb_console_setup(struct console *co, char *options)
 		/* only call the device specific open if this
 		 * is the first time the port is opened */
 		if (serial->type->open)
-			retval = serial->type->open(NULL, port, NULL);
+			retval = serial->type->open(NULL, port);
 		else
-			retval = usb_serial_generic_open(NULL, port, NULL);
+			retval = usb_serial_generic_open(NULL, port);
 
 		if (retval) {
 			err("could not open USB console port");
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index b5275c4a8eed..4a208fe85bc9 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -33,8 +33,7 @@
 /*
  * Function Prototypes
  */
-static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
-							struct file *);
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *);
 static void cp210x_cleanup(struct usb_serial_port *);
 static void cp210x_close(struct usb_serial_port *);
 static void cp210x_get_termios(struct tty_struct *,
@@ -368,8 +367,7 @@ static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
 	return baud;
 }
 
-static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result;
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 336523fd7366..b0f6402a91ca 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -61,7 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
 static void cyberjack_disconnect(struct usb_serial *serial);
 static void cyberjack_release(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	struct usb_serial_port *port);
 static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
@@ -173,7 +173,7 @@ static void cyberjack_release(struct usb_serial *serial)
 }
 
 static int  cyberjack_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	struct cyberjack_private *priv;
 	unsigned long flags;
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index 59adfe123110..df1adb9a4367 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -172,8 +172,7 @@ static int  cypress_earthmate_startup(struct usb_serial *serial);
 static int  cypress_hidcom_startup(struct usb_serial *serial);
 static int  cypress_ca42v2_startup(struct usb_serial *serial);
 static void cypress_release(struct usb_serial *serial);
-static int  cypress_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  cypress_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void cypress_close(struct usb_serial_port *port);
 static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -633,8 +632,7 @@ static void cypress_release(struct usb_serial *serial)
 }
 
 
-static int cypress_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct cypress_private *priv = usb_get_serial_port_data(port);
 	struct usb_serial *serial = port->serial;
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index f4808091c47c..ab3dd991586b 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -453,8 +453,7 @@ static int digi_write(struct tty_struct *tty, struct usb_serial_port *port,
 static void digi_write_bulk_callback(struct urb *urb);
 static int digi_write_room(struct tty_struct *tty);
 static int digi_chars_in_buffer(struct tty_struct *tty);
-static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
-	struct file *filp);
+static int digi_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void digi_close(struct usb_serial_port *port);
 static int digi_carrier_raised(struct usb_serial_port *port);
 static void digi_dtr_rts(struct usb_serial_port *port, int on);
@@ -1347,8 +1346,7 @@ static int digi_carrier_raised(struct usb_serial_port *port)
 	return 0;
 }
 
-static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int digi_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int ret;
 	unsigned char buf[32];
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 80cb3471adbe..da559a773b51 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -79,8 +79,7 @@ static int debug;
 #define EMPEG_PRODUCT_ID		0x0001
 
 /* function prototypes for an empeg-car player */
-static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
-						struct file *filp);
+static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
 						const unsigned char *buf,
@@ -142,8 +141,7 @@ static int		bytes_out;
 /******************************************************************************
  * Empeg specific driver functions
  ******************************************************************************/
-static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
-				struct file *filp)
+static int empeg_open(struct tty_struct *tty,struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result = 0;
@@ -151,7 +149,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 	dbg("%s - port %d", __func__, port->number);
 
 	/* Force default termio settings */
-	empeg_set_termios(tty, port, NULL) ;
+	empeg_set_termios(tty, port, NULL);
 
 	bytes_in = 0;
 	bytes_out = 0;
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 8fec5d4455c9..76a17f915eef 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -747,8 +747,7 @@ static int  ftdi_sio_probe(struct usb_serial *serial,
 					const struct usb_device_id *id);
 static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
-static int  ftdi_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  ftdi_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ftdi_close(struct usb_serial_port *port);
 static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -1680,8 +1679,7 @@ static int ftdi_sio_port_remove(struct usb_serial_port *port)
 	return 0;
 }
 
-static int ftdi_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ftdi_open(struct tty_struct *tty, struct usb_serial_port *port)
 { /* ftdi_open */
 	struct usb_device *dev = port->serial->dev;
 	struct ftdi_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 8839f1c70b7f..20432d345529 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -933,8 +933,7 @@ static int garmin_init_session(struct usb_serial_port *port)
 
 
-static int garmin_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int garmin_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	unsigned long flags;
 	int status = 0;
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index ce57f6a32bdf..d9398e9f30ce 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -114,8 +114,7 @@ void usb_serial_generic_deregister(void)
 #endif
 }
 
-int usb_serial_generic_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	int result = 0;
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 0191693625d6..dc0f832657e6 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -205,8 +205,7 @@ static void edge_bulk_out_data_callback(struct urb *urb);
 static void edge_bulk_out_cmd_callback(struct urb *urb);
 
 /* function prototypes for the usbserial callbacks */
-static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -852,8 +851,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb)
  *	If successful, we return 0
  *	Otherwise we return a negative error number.
  *****************************************************************************/
-static int edge_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
 	struct usb_serial *serial;
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index e8bc42f92e79..d4cc0f7af400 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -1831,8 +1831,7 @@ static void edge_bulk_out_callback(struct urb *urb)
 	tty_kref_put(tty);
 }
 
-static int edge_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int edge_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct edgeport_port *edge_port = usb_get_serial_port_data(port);
 	struct edgeport_serial *edge_serial;
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index 2545d45ce16f..24fcc64b837d 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -75,7 +75,7 @@ static int initial_wait;
 
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
@@ -587,7 +587,7 @@ static int		bytes_in;
 static int		bytes_out;
 
 static int ipaq_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+			struct usb_serial_port *port)
 {
 	struct usb_serial	*serial = port->serial;
 	struct ipaq_private	*priv;
@@ -628,11 +628,6 @@ static int ipaq_open(struct tty_struct *tty,
 		priv->free_len += PACKET_SIZE;
 	}
 
-	if (tty) {
-		/* FIXME: These two are bogus */
-		tty->raw = 1;
-		tty->real_raw = 1;
-	}
 	/*
 	 * Lose the small buffers usbserial provides. Make larger ones.
 	 */
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 29ad038b9c8d..727d323f092a 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -193,8 +193,7 @@ static void ipw_read_bulk_callback(struct urb *urb)
 	return;
 }
 
-static int ipw_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ipw_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_device *dev = port->serial->dev;
 	u8 buf_flow_static[16] = IPW_BYTES_FLOWINIT;
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 66009b6b763a..95d8d26b9a44 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -86,8 +86,7 @@ static int buffer_size;
 static int xbof = -1;
 
 static int  ir_startup (struct usb_serial *serial);
-static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filep);
+static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -296,8 +295,7 @@ static int ir_startup(struct usb_serial *serial)
 	return 0;
 }
 
-static int ir_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int ir_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	char *buffer;
 	int result = 0;
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 96873a7a32b0..f3f9be0469c5 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1018,8 +1018,7 @@ static void iuu_close(struct usb_serial_port *port)
 	}
 }
 
-static int iuu_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	u8 *buf;
@@ -1077,7 +1076,7 @@ static int iuu_open(struct tty_struct *tty,
 		tty->termios->c_iflag = 0;
 		priv->termios_initialized = 1;
 		priv->poll = 0;
-	 }
+	}
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* initialize writebuf */
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 2594b8743d3f..f8c4b07033ff 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -1209,8 +1209,7 @@ static int keyspan_write_room(struct tty_struct *tty)
 }
 
 
-static int keyspan_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int keyspan_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct keyspan_port_private 	*p_priv;
 	struct keyspan_serial_private 	*s_priv;
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 3107ed15af64..30771e5b3973 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -36,8 +36,7 @@
 
 /* Function prototypes for Keyspan serial converter */
 static int  keyspan_open		(struct tty_struct *tty,
-					 struct usb_serial_port *port,
-					 struct file *filp);
+					 struct usb_serial_port *port);
 static void keyspan_close		(struct usb_serial_port *port);
 static void keyspan_dtr_rts		(struct usb_serial_port *port, int on);
 static int  keyspan_startup		(struct usb_serial *serial);
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index d0b12e40c2b1..257c16cc6b2a 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -681,7 +681,7 @@ static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
 
 
 static int keyspan_pda_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+					struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	unsigned char room;
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index 0f44bb8e8d4f..a61673133d7d 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -75,8 +75,7 @@ static int debug;
 static int  klsi_105_startup(struct usb_serial *serial);
 static void klsi_105_disconnect(struct usb_serial *serial);
 static void klsi_105_release(struct usb_serial *serial);
-static int  klsi_105_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
@@ -358,8 +357,7 @@ static void klsi_105_release(struct usb_serial *serial)
 	}
 } /* klsi_105_release */
 
-static int  klsi_105_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int  klsi_105_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct klsi_105_private *priv = usb_get_serial_port_data(port);
 	int retval = 0;
@@ -371,10 +369,6 @@ static int  klsi_105_open(struct tty_struct *tty,
 
 	dbg("%s port %d", __func__, port->number);
 
-	/* force low_latency on so that our tty_push actually forces
-	 * the data through
-	 * tty->low_latency = 1; */
-
 	/* Do a defined restart:
 	 * Set up sane default baud rate and send the 'READ_ON'
 	 * vendor command.
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 6db0e561f680..97901578343a 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -70,8 +70,7 @@ static int debug;
 /* Function prototypes */
 static int  kobil_startup(struct usb_serial *serial);
 static void kobil_release(struct usb_serial *serial);
-static int  kobil_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  kobil_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
 			 const unsigned char *buf, int count);
@@ -211,8 +210,7 @@ static void kobil_release(struct usb_serial *serial)
 }
 
 
-static int kobil_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int result = 0;
 	struct kobil_private *priv;
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index d8825e159aa5..d501aefa2628 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -93,8 +93,7 @@ static int debug;
  */
 static int  mct_u232_startup(struct usb_serial *serial);
 static void mct_u232_release(struct usb_serial *serial);
-static int  mct_u232_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int  mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void mct_u232_close(struct usb_serial_port *port);
 static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
@@ -421,8 +420,7 @@ static void mct_u232_release(struct usb_serial *serial)
 	}
 } /* mct_u232_release */
 
-static int  mct_u232_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int  mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct mct_u232_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 4342a8a0eac9..763e32a44be0 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -319,8 +319,7 @@ static int send_mos_cmd(struct usb_serial *serial, __u8 request, __u16 value,
 	return status;
 }
 
-static int mos7720_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int mos7720_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial;
 	struct usb_serial_port *port0;
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index b93f0f992d9f..f11abf52be7d 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -824,8 +824,7 @@ static int mos7840_serial_probe(struct usb_serial *serial,
  *	Otherwise we return a negative error number.
  *****************************************************************************/
 
-static int mos7840_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int mos7840_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int response;
 	int j;
diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c
index f5f3751a888c..5ceaa4c6be09 100644
--- a/drivers/usb/serial/navman.c
+++ b/drivers/usb/serial/navman.c
@@ -80,8 +80,7 @@ exit:
 			__func__, result);
 }
 
-static int navman_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int navman_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int result = 0;
 
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 56857ddbd70b..062265038bf0 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -64,8 +64,7 @@ static int debug;
 #define BT_IGNITIONPRO_ID	0x2000
 
 /* function prototypes */
-static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
@@ -163,8 +162,7 @@ static int omninet_attach(struct usb_serial *serial)
 	return 0;
 }
 
-static int omninet_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int omninet_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial	*serial = port->serial;
 	struct usb_serial_port	*wport;
diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c
index 336bba79ad32..1085a577c5c1 100644
--- a/drivers/usb/serial/opticon.c
+++ b/drivers/usb/serial/opticon.c
@@ -144,8 +144,7 @@ exit:
 	spin_unlock(&priv->lock);
 }
 
-static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp)
+static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct opticon_private *priv = usb_get_serial_data(port->serial);
 	unsigned long flags;
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c784ddbe7b61..fe47051dbef2 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -45,8 +45,7 @@
 /* Function prototypes */
 static int  option_probe(struct usb_serial *serial,
 			const struct usb_device_id *id);
-static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp);
+static int  option_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void option_close(struct usb_serial_port *port);
 static void option_dtr_rts(struct usb_serial_port *port, int on);
 
@@ -961,8 +960,7 @@ static int option_chars_in_buffer(struct tty_struct *tty)
 	return data_len;
 }
 
-static int option_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int option_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct option_port_private *portdata;
 	int i, err;
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index 3cece27325e7..e90f88a3b040 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -141,8 +141,7 @@ struct oti6858_control_pkt {
 	  && ((a)->frame_fmt == (priv)->pending_setup.frame_fmt))
 
 /* function prototypes */
-static int oti6858_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
@@ -566,8 +565,7 @@ static void oti6858_set_termios(struct tty_struct *tty,
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-static int oti6858_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct oti6858_private *priv = usb_get_serial_port_data(port);
 	struct ktermios tmp_termios;
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 3e86815b2705..a63ea99936f7 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -691,8 +691,7 @@ static void pl2303_close(struct usb_serial_port *port)
 
 }
 
-static int pl2303_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -714,8 +713,6 @@ static int pl2303_open(struct tty_struct *tty,
 	if (tty)
 		pl2303_set_termios(tty, port, &tmp_termios);
 
-	/* FIXME: need to assert RTS and DTR if CRTSCTS off */
-
 	dbg("%s - submitting read urb", __func__);
 	port->read_urb->dev = serial->dev;
 	result = usb_submit_urb(port->read_urb, GFP_KERNEL);
diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c
index f48d05e0acc1..55391bbe1230 100644
--- a/drivers/usb/serial/sierra.c
+++ b/drivers/usb/serial/sierra.c
@@ -734,8 +734,7 @@ static void sierra_close(struct usb_serial_port *port)
 	}
 }
 
-static int sierra_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int sierra_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct sierra_port_private *portdata;
 	struct usb_serial *serial = port->serial;
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 3c249d8e8b8e..8b312a05a353 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -623,8 +623,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 
 /* open the serial port. do some usb system call. set termios and get the line
  * status of the device. then submit the read urb */
-static int spcp8x5_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int spcp8x5_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ktermios tmp_termios;
 	struct usb_serial *serial = port->serial;
@@ -658,8 +657,6 @@ static int spcp8x5_open(struct tty_struct *tty,
 	priv->line_status = status & 0xf0 ;
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* FIXME: need to assert RTS and DTR if CRTSCTS off */
-
 	dbg("%s - submitting read urb", __func__);
 	port->read_urb->dev = serial->dev;
 	ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c
index 6157fac9366b..cb7e95f9fcbf 100644
--- a/drivers/usb/serial/symbolserial.c
+++ b/drivers/usb/serial/symbolserial.c
@@ -124,8 +124,7 @@ exit:
 	spin_unlock(&priv->lock);
 }
 
-static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
-			struct file *filp)
+static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct symbol_private *priv = usb_get_serial_data(port->serial);
 	unsigned long flags;
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 3bc609fe2242..1e9dc8821698 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -98,8 +98,7 @@ struct ti_device {
 
 static int ti_startup(struct usb_serial *serial);
 static void ti_release(struct usb_serial *serial);
-static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
-		struct file *file);
+static int ti_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
 		const unsigned char *data, int count);
@@ -492,8 +491,7 @@ static void ti_release(struct usb_serial *serial)
 }
 
 
-static int ti_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *file)
+static int ti_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct ti_port *tport = usb_get_serial_port_data(port);
 	struct ti_device *tdev;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 31c7a0939b98..3dda6841e724 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -244,7 +244,7 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
 
 		/* only call the device specific open if this
 		 * is the first time the port is opened */
-		retval = serial->type->open(tty, port, filp);
+		retval = serial->type->open(tty, port);
 		if (retval)
 			goto bailout_interface_put;
 		mutex_unlock(&serial->disc_mutex);
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index 614800972dc3..7b5bfc4edd3d 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -43,11 +43,10 @@ static struct usb_driver debug_driver = {
 	.no_dynamic_id = 	1,
 };
 
-static int usb_debug_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp)
+static int usb_debug_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	port->bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE;
-	return usb_serial_generic_open(tty, port, filp);
+	return usb_serial_generic_open(tty, port);
 }
 
 /* This HW really does not support a serial break, so one will be
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index f5d0f64dcc52..1aa5d20a5d99 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -36,8 +36,7 @@
 #define DRIVER_DESC "USB HandSpring Visor / Palm OS driver"
 
 /* function prototypes for a handspring visor */
-static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
-					struct file *filp);
+static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
 					const unsigned char *buf, int count);
@@ -273,8 +272,7 @@ static int stats;
 /******************************************************************************
  * Handspring Visor specific driver functions
  ******************************************************************************/
-static int visor_open(struct tty_struct *tty, struct usb_serial_port *port,
-							struct file *filp)
+static int visor_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	struct visor_private *priv = usb_get_serial_port_data(port);
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 8d126dd7a02e..81f2ae505966 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -146,7 +146,7 @@ static int  whiteheat_firmware_attach(struct usb_serial *serial);
 static int  whiteheat_attach(struct usb_serial *serial);
 static void whiteheat_release(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+			struct usb_serial_port *port);
 static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
 			struct usb_serial_port *port,
@@ -659,8 +659,7 @@ static void whiteheat_release(struct usb_serial *serial)
 	return;
 }
 
-static int whiteheat_open(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp)
+static int whiteheat_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	int		retval = 0;
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 0ec50ba62139..1cbaf4a6b449 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -238,9 +238,8 @@ struct usb_serial_driver {
 	int (*resume)(struct usb_serial *serial);
 
 	/* serial function calls */
-	/* Called by console with tty = NULL and by tty */
-	int  (*open)(struct tty_struct *tty,
-			struct usb_serial_port *port, struct file *filp);
+	/* Called by console and by the tty layer */
+	int  (*open)(struct tty_struct *tty, struct usb_serial_port *port);
 	void (*close)(struct usb_serial_port *port);
 	int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
 			const unsigned char *buf, int count);
@@ -300,7 +299,7 @@ static inline void usb_serial_console_disconnect(struct usb_serial *serial) {}
 extern struct usb_serial *usb_serial_get_by_index(unsigned int minor);
 extern void usb_serial_put(struct usb_serial *serial);
 extern int usb_serial_generic_open(struct tty_struct *tty,
-		struct usb_serial_port *port, struct file *filp);
+	struct usb_serial_port *port);
 extern int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count);
 extern void usb_serial_generic_close(struct usb_serial_port *port);
-- 
cgit v1.2.3


From ebd2c8f6d2ec4012c267ecb95e72a57b8355a705 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:28 -0700
Subject: serial: kill off uart_info

We moved this into uart_state, now move the fields out of the separate
structure and kill it off.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/mn10300/kernel/asm-offsets.c       |   6 +-
 drivers/serial/21285.c                  |   8 +-
 drivers/serial/8250.c                   |  10 +-
 drivers/serial/amba-pl010.c             |   6 +-
 drivers/serial/amba-pl011.c             |   6 +-
 drivers/serial/atmel_serial.c           |  14 +-
 drivers/serial/bfin_5xx.c               |  20 +--
 drivers/serial/bfin_sport_uart.c        |   6 +-
 drivers/serial/clps711x.c               |   4 +-
 drivers/serial/cpm_uart/cpm_uart_core.c |   2 +-
 drivers/serial/dz.c                     |   6 +-
 drivers/serial/icom.c                   |  16 +-
 drivers/serial/imx.c                    |  16 +-
 drivers/serial/ioc3_serial.c            |  52 +++---
 drivers/serial/ioc4_serial.c            |  66 ++++----
 drivers/serial/ip22zilog.c              |  14 +-
 drivers/serial/jsm/jsm_neo.c            |   2 +-
 drivers/serial/jsm/jsm_tty.c            |  20 +--
 drivers/serial/m32r_sio.c               |   6 +-
 drivers/serial/max3100.c                |  16 +-
 drivers/serial/mcf.c                    |   4 +-
 drivers/serial/mpc52xx_uart.c           |   4 +-
 drivers/serial/mpsc.c                   |   4 +-
 drivers/serial/msm_serial.c             |   6 +-
 drivers/serial/mux.c                    |   4 +-
 drivers/serial/netx-serial.c            |   6 +-
 drivers/serial/nwpserial.c              |   4 +-
 drivers/serial/pmac_zilog.c             |  12 +-
 drivers/serial/pnx8xxx_uart.c           |   8 +-
 drivers/serial/pxa.c                    |   6 +-
 drivers/serial/sa1100.c                 |   8 +-
 drivers/serial/samsung.c                |   8 +-
 drivers/serial/sb1250-duart.c           |   6 +-
 drivers/serial/sc26xx.c                 |  10 +-
 drivers/serial/serial_core.c            | 286 ++++++++++++++++----------------
 drivers/serial/serial_ks8695.c          |   6 +-
 drivers/serial/serial_lh7a40x.c         |   6 +-
 drivers/serial/serial_txx9.c            |   4 +-
 drivers/serial/sh-sci.c                 |  10 +-
 drivers/serial/sn_console.c             |  22 +--
 drivers/serial/sunhv.c                  |   8 +-
 drivers/serial/sunsab.c                 |  10 +-
 drivers/serial/sunsu.c                  |   6 +-
 drivers/serial/sunzilog.c               |  14 +-
 drivers/serial/timbuart.c               |  10 +-
 drivers/serial/uartlite.c               |   6 +-
 drivers/serial/ucc_uart.c               |   4 +-
 drivers/serial/vr41xx_siu.c             |   6 +-
 drivers/serial/zs.c                     |   6 +-
 include/linux/serial_core.h             |  64 ++++---
 50 files changed, 422 insertions(+), 432 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/asm-offsets.c b/arch/mn10300/kernel/asm-offsets.c
index 2646fcbd7d89..82b40079ad76 100644
--- a/arch/mn10300/kernel/asm-offsets.c
+++ b/arch/mn10300/kernel/asm-offsets.c
@@ -95,7 +95,7 @@ void foo(void)
 	OFFSET(__iobase,		mn10300_serial_port, _iobase);
 
 	DEFINE(__UART_XMIT_SIZE,	UART_XMIT_SIZE);
-	OFFSET(__xmit_buffer,		uart_info, xmit.buf);
-	OFFSET(__xmit_head,		uart_info, xmit.head);
-	OFFSET(__xmit_tail,		uart_info, xmit.tail);
+	OFFSET(__xmit_buffer,		uart_state, xmit.buf);
+	OFFSET(__xmit_head,		uart_state, xmit.head);
+	OFFSET(__xmit_tail,		uart_state, xmit.tail);
 }
diff --git a/drivers/serial/21285.c b/drivers/serial/21285.c
index cb6d85d7ff43..1e3d19397a59 100644
--- a/drivers/serial/21285.c
+++ b/drivers/serial/21285.c
@@ -86,7 +86,7 @@ static void serial21285_enable_ms(struct uart_port *port)
 static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, flag, rxs, max_count = 256;
 
 	status = *CSR_UARTFLG;
@@ -124,7 +124,7 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 static irqreturn_t serial21285_tx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count = 256;
 
 	if (port->x_char) {
@@ -235,8 +235,8 @@ serial21285_set_termios(struct uart_port *port, struct ktermios *termios,
 	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); 
 	quot = uart_get_divisor(port, baud);
 
-	if (port->info && port->info->port.tty) {
-		struct tty_struct *tty = port->info->port.tty;
+	if (port->state && port->state->port.tty) {
+		struct tty_struct *tty = port->state->port.tty;
 		unsigned int b = port->uartclk / (16 * quot);
 		tty_encode_baud_rate(tty, b, b);
 	}
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 1fd4894d9b51..e415c5eca599 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1382,7 +1382,7 @@ static void serial8250_enable_ms(struct uart_port *port)
 static void
 receive_chars(struct uart_8250_port *up, unsigned int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch, lsr = *status;
 	int max_count = 256;
 	char flag;
@@ -1457,7 +1457,7 @@ ignore_char:
 
 static void transmit_chars(struct uart_8250_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -1500,7 +1500,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 	status |= up->msr_saved_flags;
 	up->msr_saved_flags = 0;
 	if (status & UART_MSR_ANY_DELTA && up->ier & UART_IER_MSI &&
-	    up->port.info != NULL) {
+	    up->port.state != NULL) {
 		if (status & UART_MSR_TERI)
 			up->port.icount.rng++;
 		if (status & UART_MSR_DDSR)
@@ -1510,7 +1510,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 		if (status & UART_MSR_DCTS)
 			uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	return status;
@@ -1764,7 +1764,7 @@ static void serial8250_backup_timeout(unsigned long data)
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
 	spin_unlock_irqrestore(&up->port.lock, flags);
 	if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) &&
-	    (!uart_circ_empty(&up->port.info->xmit) || up->port.x_char) &&
+	    (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) &&
 	    (lsr & UART_LSR_THRE)) {
 		iir &= ~(UART_IIR_ID | UART_IIR_NO_INT);
 		iir |= UART_IIR_THRI;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 58a4879c7e48..39032413d4a1 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -117,7 +117,7 @@ static void pl010_enable_ms(struct uart_port *port)
 
 static void pl010_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.info->port.tty;
+	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, rsr, max_count = 256;
 
 	status = readb(uap->port.membase + UART01x_FR);
@@ -172,7 +172,7 @@ static void pl010_rx_chars(struct uart_amba_port *uap)
 
 static void pl010_tx_chars(struct uart_amba_port *uap)
 {
-	struct circ_buf *xmit = &uap->port.info->xmit;
+	struct circ_buf *xmit = &uap->port.state->xmit;
 	int count;
 
 	if (uap->port.x_char) {
@@ -225,7 +225,7 @@ static void pl010_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.info->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->delta_msr_wait);
 }
 
 static irqreturn_t pl010_int(int irq, void *dev_id)
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 72ba0c6d3551..ef82a34baf0f 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -124,7 +124,7 @@ static void pl011_enable_ms(struct uart_port *port)
 
 static void pl011_rx_chars(struct uart_amba_port *uap)
 {
-	struct tty_struct *tty = uap->port.info->port.tty;
+	struct tty_struct *tty = uap->port.state->port.tty;
 	unsigned int status, ch, flag, max_count = 256;
 
 	status = readw(uap->port.membase + UART01x_FR);
@@ -175,7 +175,7 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
 
 static void pl011_tx_chars(struct uart_amba_port *uap)
 {
-	struct circ_buf *xmit = &uap->port.info->xmit;
+	struct circ_buf *xmit = &uap->port.state->xmit;
 	int count;
 
 	if (uap->port.x_char) {
@@ -226,7 +226,7 @@ static void pl011_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.info->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->delta_msr_wait);
 }
 
 static irqreturn_t pl011_int(int irq, void *dev_id)
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 607d43a31048..963e3c12af41 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -427,7 +427,7 @@ static void atmel_rx_chars(struct uart_port *port)
  */
 static void atmel_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char && UART_GET_CSR(port) & ATMEL_US_TXRDY) {
 		UART_PUT_CHAR(port, port->x_char);
@@ -560,7 +560,7 @@ static irqreturn_t atmel_interrupt(int irq, void *dev_id)
 static void atmel_tx_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	struct atmel_dma_buffer *pdc = &atmel_port->pdc_tx;
 	int count;
 
@@ -663,14 +663,14 @@ static void atmel_rx_from_ring(struct uart_port *port)
 	 * uart_start(), which takes the lock.
 	 */
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 	spin_lock(&port->lock);
 }
 
 static void atmel_rx_from_dma(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct atmel_dma_buffer *pdc;
 	int rx_idx = atmel_port->pdc_rx_idx;
 	unsigned int head;
@@ -776,7 +776,7 @@ static void atmel_tasklet_func(unsigned long data)
 		if (status_change & ATMEL_US_CTS)
 			uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
 
-		wake_up_interruptible(&port->info->delta_msr_wait);
+		wake_up_interruptible(&port->state->delta_msr_wait);
 
 		atmel_port->irq_status_prev = status;
 	}
@@ -795,7 +795,7 @@ static void atmel_tasklet_func(unsigned long data)
 static int atmel_startup(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int retval;
 
 	/*
@@ -854,7 +854,7 @@ static int atmel_startup(struct uart_port *port)
 	}
 	if (atmel_use_dma_tx(port)) {
 		struct atmel_dma_buffer *pdc = &atmel_port->pdc_tx;
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		pdc->buf = xmit->buf;
 		pdc->dma_addr = dma_map_single(port->dev,
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index 4fff4e524034..50abb7e557f4 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -144,7 +144,7 @@ static void bfin_serial_stop_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
 #ifdef CONFIG_SERIAL_BFIN_DMA
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 #endif
 
 	while (!(UART_GET_LSR(uart) & TEMT))
@@ -171,7 +171,7 @@ static void bfin_serial_stop_tx(struct uart_port *port)
 static void bfin_serial_start_tx(struct uart_port *port)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = uart->port.state->port.tty;
 
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
 	if (uart->scts && !(bfin_serial_get_mctrl(&uart->port) & TIOCM_CTS)) {
@@ -243,10 +243,10 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 			return;
 		}
 
-	if (!uart->port.info || !uart->port.info->port.tty)
+	if (!uart->port.state || !uart->port.state->port.tty)
 		return;
 #endif
-	tty = uart->port.info->port.tty;
+	tty = uart->port.state->port.tty;
 
 	if (ANOMALY_05000363) {
 		/* The BF533 (and BF561) family of processors have a nice anomaly
@@ -331,7 +331,7 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 
 static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
 {
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(&uart->port)) {
 #ifdef CONFIG_BF54x
@@ -398,7 +398,7 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id)
 #ifdef CONFIG_SERIAL_BFIN_DMA
 static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 {
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 	uart->tx_done = 0;
 
@@ -436,7 +436,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
 
 static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 {
-	struct tty_struct *tty = uart->port.info->port.tty;
+	struct tty_struct *tty = uart->port.state->port.tty;
 	int i, flg, status;
 
 	status = UART_GET_LSR(uart);
@@ -529,7 +529,7 @@ void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
-	struct circ_buf *xmit = &uart->port.info->xmit;
+	struct circ_buf *xmit = &uart->port.state->xmit;
 
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
 	if (uart->scts && !(bfin_serial_get_mctrl(&uart->port)&TIOCM_CTS)) {
@@ -965,10 +965,10 @@ static void bfin_serial_set_ldisc(struct uart_port *port)
 	int line = port->line;
 	unsigned short val;
 
-	if (line >= port->info->port.tty->driver->num)
+	if (line >= port->state->port.tty->driver->num)
 		return;
 
-	switch (port->info->port.tty->termios->c_line) {
+	switch (port->state->port.tty->termios->c_line) {
 	case N_IRDA:
 		val = UART_GET_GCTL(&bfin_serial_ports[line]);
 		val |= (IREN | RPOLC);
diff --git a/drivers/serial/bfin_sport_uart.c b/drivers/serial/bfin_sport_uart.c
index c108b1a0ce98..088bb35475f1 100644
--- a/drivers/serial/bfin_sport_uart.c
+++ b/drivers/serial/bfin_sport_uart.c
@@ -178,7 +178,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
 static irqreturn_t sport_uart_rx_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch;
 
 	do {
@@ -205,7 +205,7 @@ static irqreturn_t sport_uart_tx_irq(int irq, void *dev_id)
 static irqreturn_t sport_uart_err_irq(int irq, void *dev_id)
 {
 	struct sport_uart_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int stat = SPORT_GET_STAT(up);
 
 	/* Overflow in RX FIFO */
@@ -290,7 +290,7 @@ fail1:
 
 static void sport_uart_tx_chars(struct sport_uart_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 
 	if (SPORT_GET_STAT(up) & TXF)
 		return;
diff --git a/drivers/serial/clps711x.c b/drivers/serial/clps711x.c
index 80e76426131d..b6acd19b458e 100644
--- a/drivers/serial/clps711x.c
+++ b/drivers/serial/clps711x.c
@@ -93,7 +93,7 @@ static void clps711xuart_enable_ms(struct uart_port *port)
 static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = clps_readl(SYSFLG(port));
@@ -147,7 +147,7 @@ static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id)
 static irqreturn_t clps711xuart_int_tx(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count;
 
 	if (port->x_char) {
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
index f8df0681e160..8d349b23249a 100644
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -244,7 +244,7 @@ static void cpm_uart_int_rx(struct uart_port *port)
 	int i;
 	unsigned char ch;
 	u8 *cp;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct uart_cpm_port *pinfo = (struct uart_cpm_port *)port;
 	cbd_t __iomem *bdp;
 	u16 status;
diff --git a/drivers/serial/dz.c b/drivers/serial/dz.c
index 6042b87797a1..57421d776329 100644
--- a/drivers/serial/dz.c
+++ b/drivers/serial/dz.c
@@ -197,7 +197,7 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	while ((status = dz_in(dport, DZ_RBUF)) & DZ_DVAL) {
 		dport = &mux->dport[LINE(status)];
 		uport = &dport->port;
-		tty = uport->info->port.tty;	/* point to the proper dev */
+		tty = uport->state->port.tty;	/* point to the proper dev */
 
 		ch = UCHAR(status);		/* grab the char */
 		flag = TTY_NORMAL;
@@ -249,7 +249,7 @@ static inline void dz_receive_chars(struct dz_mux *mux)
 	}
 	for (i = 0; i < DZ_NB_PORT; i++)
 		if (lines_rx[i])
-			tty_flip_buffer_push(mux->dport[i].port.info->port.tty);
+			tty_flip_buffer_push(mux->dport[i].port.state->port.tty);
 }
 
 /*
@@ -268,7 +268,7 @@ static inline void dz_transmit_chars(struct dz_mux *mux)
 
 	status = dz_in(dport, DZ_CSR);
 	dport = &mux->dport[LINE(status)];
-	xmit = &dport->port.info->xmit;
+	xmit = &dport->port.state->xmit;
 
 	if (dport->port.x_char) {		/* XON/XOFF chars */
 		dz_out(dport, DZ_TDR, dport->port.x_char);
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index 060f4e3d54c5..f86c47e08a06 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -627,7 +627,7 @@ static int icom_write(struct uart_port *port)
 	unsigned long data_count;
 	unsigned char cmdReg;
 	unsigned long offset;
-	int temp_tail = port->info->xmit.tail;
+	int temp_tail = port->state->xmit.tail;
 
 	trace(ICOM_PORT, "WRITE", 0);
 
@@ -638,11 +638,11 @@ static int icom_write(struct uart_port *port)
 	}
 
 	data_count = 0;
-	while ((port->info->xmit.head != temp_tail) &&
+	while ((port->state->xmit.head != temp_tail) &&
 	       (data_count <= XMIT_BUFF_SZ)) {
 
 		ICOM_PORT->xmit_buf[data_count++] =
-		    port->info->xmit.buf[temp_tail];
+		    port->state->xmit.buf[temp_tail];
 
 		temp_tail++;
 		temp_tail &= (UART_XMIT_SIZE - 1);
@@ -694,7 +694,7 @@ static inline void check_modem_status(struct icom_port *icom_port)
 			uart_handle_cts_change(&icom_port->uart_port,
 					       delta_status & ICOM_CTS);
 
-		wake_up_interruptible(&icom_port->uart_port.info->
+		wake_up_interruptible(&icom_port->uart_port.state->
 				      delta_msr_wait);
 		old_status = status;
 	}
@@ -718,10 +718,10 @@ static void xmit_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 		icom_port->uart_port.icount.tx += count;
 
 		for (i=0; i<count &&
-			!uart_circ_empty(&icom_port->uart_port.info->xmit); i++) {
+			!uart_circ_empty(&icom_port->uart_port.state->xmit); i++) {
 
-			icom_port->uart_port.info->xmit.tail++;
-			icom_port->uart_port.info->xmit.tail &=
+			icom_port->uart_port.state->xmit.tail++;
+			icom_port->uart_port.state->xmit.tail &=
 				(UART_XMIT_SIZE - 1);
 		}
 
@@ -735,7 +735,7 @@ static void xmit_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 static void recv_interrupt(u16 port_int_reg, struct icom_port *icom_port)
 {
 	short int count, rcv_buff;
-	struct tty_struct *tty = icom_port->uart_port.info->port.tty;
+	struct tty_struct *tty = icom_port->uart_port.state->port.tty;
 	unsigned short int status;
 	struct uart_icount *icount;
 	unsigned long offset;
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 7485afd0df4c..1febeafcb97a 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -224,7 +224,7 @@ static void imx_mctrl_check(struct imx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -236,7 +236,7 @@ static void imx_timeout(unsigned long data)
 	struct imx_port *sport = (struct imx_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		imx_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -323,7 +323,7 @@ static void imx_enable_ms(struct uart_port *port)
 
 static inline void imx_transmit_buffer(struct imx_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	while (!(readl(sport->port.membase + UTS) & UTS_TXFULL)) {
 		/* send xmit->buf[xmit->tail]
@@ -388,7 +388,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 
 	writel(USR1_RTSD, sport->port.membase + USR1);
 	uart_handle_cts_change(&sport->port, !!val);
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 	return IRQ_HANDLED;
@@ -397,7 +397,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 static irqreturn_t imx_txint(int irq, void *dev_id)
 {
 	struct imx_port *sport = dev_id;
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned long flags;
 
 	spin_lock_irqsave(&sport->port.lock,flags);
@@ -427,7 +427,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
 {
 	struct imx_port *sport = dev_id;
 	unsigned int rx,flg,ignored = 0;
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned long flags, temp;
 
 	spin_lock_irqsave(&sport->port.lock,flags);
@@ -900,11 +900,11 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
 	rational_best_approximation(16 * div * baud, sport->port.uartclk,
 		1 << 16, 1 << 16, &num, &denom);
 
-	if (port->info && port->info->port.tty) {
+	if (port->state && port->state->port.tty) {
 		tdiv64 = sport->port.uartclk;
 		tdiv64 *= num;
 		do_div(tdiv64, denom * 16 * div);
-		tty_encode_baud_rate(sport->port.info->port.tty,
+		tty_encode_baud_rate(sport->port.state->port.tty,
 				(speed_t)tdiv64, (speed_t)tdiv64);
 	}
 
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c
index ae3699d77dd0..de4ab1bfee8d 100644
--- a/drivers/serial/ioc3_serial.c
+++ b/drivers/serial/ioc3_serial.c
@@ -897,25 +897,25 @@ static void transmit_chars(struct uart_port *the_port)
 	char *start;
 	struct tty_struct *tty;
 	struct ioc3_port *port = get_ioc3_port(the_port);
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return;
 	if (!port)
 		return;
 
-	info = the_port->info;
-	tty = info->port.tty;
+	state = the_port->state;
+	tty = state->port.tty;
 
-	if (uart_circ_empty(&info->xmit) || uart_tx_stopped(the_port)) {
+	if (uart_circ_empty(&state->xmit) || uart_tx_stopped(the_port)) {
 		/* Nothing to do or hw stopped */
 		set_notification(port, N_ALL_OUTPUT, 0);
 		return;
 	}
 
-	head = info->xmit.head;
-	tail = info->xmit.tail;
-	start = (char *)&info->xmit.buf[tail];
+	head = state->xmit.head;
+	tail = state->xmit.tail;
+	start = (char *)&state->xmit.buf[tail];
 
 	/* write out all the data or until the end of the buffer */
 	xmit_count = (head < tail) ? (UART_XMIT_SIZE - tail) : (head - tail);
@@ -928,14 +928,14 @@ static void transmit_chars(struct uart_port *the_port)
 			/* advance the pointers */
 			tail += result;
 			tail &= UART_XMIT_SIZE - 1;
-			info->xmit.tail = tail;
-			start = (char *)&info->xmit.buf[tail];
+			state->xmit.tail = tail;
+			start = (char *)&state->xmit.buf[tail];
 		}
 	}
-	if (uart_circ_chars_pending(&info->xmit) < WAKEUP_CHARS)
+	if (uart_circ_chars_pending(&state->xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(the_port);
 
-	if (uart_circ_empty(&info->xmit)) {
+	if (uart_circ_empty(&state->xmit)) {
 		set_notification(port, N_OUTPUT_LOWAT, 0);
 	} else {
 		set_notification(port, N_OUTPUT_LOWAT, 1);
@@ -956,7 +956,7 @@ ioc3_change_speed(struct uart_port *the_port,
 	unsigned int cflag;
 	int baud;
 	int new_parity = 0, new_parity_enable = 0, new_stop = 0, new_data = 8;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 
 	cflag = new_termios->c_cflag;
 
@@ -997,14 +997,14 @@ ioc3_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	info->port.tty->low_latency = 1;
+	state->port.tty->low_latency = 1;
 
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(state->port.tty))
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
 						  | N_FRAMING_ERROR);
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(state->port.tty)) {
 		the_port->ignore_status_mask &= ~N_BREAK;
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(state->port.tty))
 			the_port->ignore_status_mask &= ~N_OVERRUN_ERROR;
 	}
 	if (!(cflag & CREAD)) {
@@ -1286,7 +1286,7 @@ static inline int do_read(struct uart_port *the_port, char *buf, int len)
 						uart_handle_dcd_change
 							(port->ip_port, 0);
 						wake_up_interruptible
-						    (&the_port->info->
+						    (&the_port->state->
 						     delta_msr_wait);
 					}
 
@@ -1392,21 +1392,21 @@ static int receive_chars(struct uart_port *the_port)
 	struct tty_struct *tty;
 	unsigned char ch[MAX_CHARS];
 	int read_count = 0, read_room, flip = 0;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 	struct ioc3_port *port = get_ioc3_port(the_port);
 	unsigned long pflags;
 
 	/* Make sure all the pointers are "good" ones */
-	if (!info)
+	if (!state)
 		return 0;
-	if (!info->port.tty)
+	if (!state->port.tty)
 		return 0;
 
 	if (!(port->ip_flags & INPUT_ENABLE))
 		return 0;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = info->port.tty;
+	tty = state->port.tty;
 
 	read_count = do_read(the_port, ch, MAX_CHARS);
 	if (read_count > 0) {
@@ -1491,7 +1491,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_dcd_change(the_port,
 						shadow & SHADOW_DCD);
 				wake_up_interruptible
-				    (&the_port->info->delta_msr_wait);
+				    (&the_port->state->delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 				   && !(shadow & SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1511,7 +1511,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_cts_change(the_port, shadow
 						& SHADOW_CTS);
 				wake_up_interruptible
-				    (&the_port->info->delta_msr_wait);
+				    (&the_port->state->delta_msr_wait);
 			}
 		}
 
@@ -1721,14 +1721,14 @@ static void ic3_shutdown(struct uart_port *the_port)
 {
 	unsigned long port_flags;
 	struct ioc3_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	port = get_ioc3_port(the_port);
 	if (!port)
 		return;
 
-	info = the_port->info;
-	wake_up_interruptible(&info->delta_msr_wait);
+	state = the_port->state;
+	wake_up_interruptible(&state->delta_msr_wait);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c
index e5c58fe7e745..2055d323f15f 100644
--- a/drivers/serial/ioc4_serial.c
+++ b/drivers/serial/ioc4_serial.c
@@ -1627,25 +1627,25 @@ static void transmit_chars(struct uart_port *the_port)
 	char *start;
 	struct tty_struct *tty;
 	struct ioc4_port *port = get_ioc4_port(the_port, 0);
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return;
 	if (!port)
 		return;
 
-	info = the_port->info;
-	tty = info->port.tty;
+	state = the_port->state;
+	tty = state->port.tty;
 
-	if (uart_circ_empty(&info->xmit) || uart_tx_stopped(the_port)) {
+	if (uart_circ_empty(&state->xmit) || uart_tx_stopped(the_port)) {
 		/* Nothing to do or hw stopped */
 		set_notification(port, N_ALL_OUTPUT, 0);
 		return;
 	}
 
-	head = info->xmit.head;
-	tail = info->xmit.tail;
-	start = (char *)&info->xmit.buf[tail];
+	head = state->xmit.head;
+	tail = state->xmit.tail;
+	start = (char *)&state->xmit.buf[tail];
 
 	/* write out all the data or until the end of the buffer */
 	xmit_count = (head < tail) ? (UART_XMIT_SIZE - tail) : (head - tail);
@@ -1658,14 +1658,14 @@ static void transmit_chars(struct uart_port *the_port)
 			/* advance the pointers */
 			tail += result;
 			tail &= UART_XMIT_SIZE - 1;
-			info->xmit.tail = tail;
-			start = (char *)&info->xmit.buf[tail];
+			state->xmit.tail = tail;
+			start = (char *)&state->xmit.buf[tail];
 		}
 	}
-	if (uart_circ_chars_pending(&info->xmit) < WAKEUP_CHARS)
+	if (uart_circ_chars_pending(&state->xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(the_port);
 
-	if (uart_circ_empty(&info->xmit)) {
+	if (uart_circ_empty(&state->xmit)) {
 		set_notification(port, N_OUTPUT_LOWAT, 0);
 	} else {
 		set_notification(port, N_OUTPUT_LOWAT, 1);
@@ -1686,7 +1686,7 @@ ioc4_change_speed(struct uart_port *the_port,
 	int baud, bits;
 	unsigned cflag;
 	int new_parity = 0, new_parity_enable = 0, new_stop = 0, new_data = 8;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 
 	cflag = new_termios->c_cflag;
 
@@ -1738,14 +1738,14 @@ ioc4_change_speed(struct uart_port *the_port,
 
 	the_port->ignore_status_mask = N_ALL_INPUT;
 
-	info->port.tty->low_latency = 1;
+	state->port.tty->low_latency = 1;
 
-	if (I_IGNPAR(info->port.tty))
+	if (I_IGNPAR(state->port.tty))
 		the_port->ignore_status_mask &= ~(N_PARITY_ERROR
 						| N_FRAMING_ERROR);
-	if (I_IGNBRK(info->port.tty)) {
+	if (I_IGNBRK(state->port.tty)) {
 		the_port->ignore_status_mask &= ~N_BREAK;
-		if (I_IGNPAR(info->port.tty))
+		if (I_IGNPAR(state->port.tty))
 			the_port->ignore_status_mask &= ~N_OVERRUN_ERROR;
 	}
 	if (!(cflag & CREAD)) {
@@ -1784,7 +1784,7 @@ ioc4_change_speed(struct uart_port *the_port,
 static inline int ic4_startup_local(struct uart_port *the_port)
 {
 	struct ioc4_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	if (!the_port)
 		return -1;
@@ -1793,7 +1793,7 @@ static inline int ic4_startup_local(struct uart_port *the_port)
 	if (!port)
 		return -1;
 
-	info = the_port->info;
+	state = the_port->state;
 
 	local_open(port);
 
@@ -1801,7 +1801,7 @@ static inline int ic4_startup_local(struct uart_port *the_port)
 	ioc4_set_proto(port, the_port->mapbase);
 
 	/* set the speed of the serial port */
-	ioc4_change_speed(the_port, info->port.tty->termios,
+	ioc4_change_speed(the_port, state->port.tty->termios,
 			  (struct ktermios *)0);
 
 	return 0;
@@ -1882,7 +1882,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port = port->ip_port;
 				the_port->icount.dcd = 1;
 				wake_up_interruptible
-					    (&the_port-> info->delta_msr_wait);
+					    (&the_port->state->delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 					&& !(shadow & IOC4_SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1904,7 +1904,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port->icount.cts =
 					(shadow & IOC4_SHADOW_CTS) ? 1 : 0;
 				wake_up_interruptible
-					(&the_port->info->delta_msr_wait);
+					(&the_port->state->delta_msr_wait);
 			}
 		}
 
@@ -2236,7 +2236,7 @@ static inline int do_read(struct uart_port *the_port, unsigned char *buf,
 						   && port->ip_port) {
 						the_port->icount.dcd = 0;
 						wake_up_interruptible
-						    (&the_port->info->
+						    (&the_port->state->
 							delta_msr_wait);
 					}
 
@@ -2341,17 +2341,17 @@ static void receive_chars(struct uart_port *the_port)
 	unsigned char ch[IOC4_MAX_CHARS];
 	int read_count, request_count = IOC4_MAX_CHARS;
 	struct uart_icount *icount;
-	struct uart_info *info = the_port->info;
+	struct uart_state *state = the_port->state;
 	unsigned long pflags;
 
 	/* Make sure all the pointers are "good" ones */
-	if (!info)
+	if (!state)
 		return;
-	if (!info->port.tty)
+	if (!state->port.tty)
 		return;
 
 	spin_lock_irqsave(&the_port->lock, pflags);
-	tty = info->port.tty;
+	tty = state->port.tty;
 
 	request_count = tty_buffer_request_room(tty, IOC4_MAX_CHARS);
 
@@ -2430,19 +2430,19 @@ static void ic4_shutdown(struct uart_port *the_port)
 {
 	unsigned long port_flags;
 	struct ioc4_port *port;
-	struct uart_info *info;
+	struct uart_state *state;
 
 	port = get_ioc4_port(the_port, 0);
 	if (!port)
 		return;
 
-	info = the_port->info;
+	state = the_port->state;
 	port->ip_port = NULL;
 
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&state->delta_msr_wait);
 
-	if (info->port.tty)
-		set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	if (state->port.tty)
+		set_bit(TTY_IO_ERROR, &state->port.tty->flags);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
@@ -2538,7 +2538,7 @@ static int ic4_startup(struct uart_port *the_port)
 	int retval;
 	struct ioc4_port *port;
 	struct ioc4_control *control;
-	struct uart_info *info;
+	struct uart_state *state;
 	unsigned long port_flags;
 
 	if (!the_port)
@@ -2546,7 +2546,7 @@ static int ic4_startup(struct uart_port *the_port)
 	port = get_ioc4_port(the_port, 1);
 	if (!port)
 		return -ENODEV;
-	info = the_port->info;
+	state = the_port->state;
 
 	control = port->ip_control;
 	if (!control) {
diff --git a/drivers/serial/ip22zilog.c b/drivers/serial/ip22zilog.c
index 0d9acbd0bb70..2e847deb41dc 100644
--- a/drivers/serial/ip22zilog.c
+++ b/drivers/serial/ip22zilog.c
@@ -256,9 +256,9 @@ static struct tty_struct *ip22zilog_receive_chars(struct uart_ip22zilog_port *up
 	unsigned int r1;
 
 	tty = NULL;
-	if (up->port.info != NULL &&
-	    up->port.info->port.tty != NULL)
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL &&
+	    up->port.state->port.tty != NULL)
+		tty = up->port.state->port.tty;
 
 	for (;;) {
 		ch = readb(&channel->control);
@@ -354,7 +354,7 @@ static void ip22zilog_status_handle(struct uart_ip22zilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	up->prev_status = status;
@@ -404,9 +404,9 @@ static void ip22zilog_transmit_chars(struct uart_ip22zilog_port *up,
 		return;
 	}
 
-	if (up->port.info == NULL)
+	if (up->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &up->port.info->xmit;
+	xmit = &up->port.state->xmit;
 	if (uart_circ_empty(xmit))
 		goto ack_tx_int;
 	if (uart_tx_stopped(&up->port))
@@ -607,7 +607,7 @@ static void ip22zilog_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		writeb(xmit->buf[xmit->tail], &channel->data);
 		ZSDELAY();
diff --git a/drivers/serial/jsm/jsm_neo.c b/drivers/serial/jsm/jsm_neo.c
index 9dadaa11d266..b4b124e4828f 100644
--- a/drivers/serial/jsm/jsm_neo.c
+++ b/drivers/serial/jsm/jsm_neo.c
@@ -989,7 +989,7 @@ static void neo_param(struct jsm_channel *ch)
 			{     50, B50     },
 		};
 
-		cflag = C_BAUD(ch->uart_port.info->port.tty);
+		cflag = C_BAUD(ch->uart_port.state->port.tty);
 		baud = 9600;
 		for (i = 0; i < ARRAY_SIZE(baud_rates); i++) {
 			if (baud_rates[i].cflag == cflag) {
diff --git a/drivers/serial/jsm/jsm_tty.c b/drivers/serial/jsm/jsm_tty.c
index 00f4577d2f7f..7439c0373620 100644
--- a/drivers/serial/jsm/jsm_tty.c
+++ b/drivers/serial/jsm/jsm_tty.c
@@ -147,7 +147,7 @@ static void jsm_tty_send_xchar(struct uart_port *port, char ch)
 	struct ktermios *termios;
 
 	spin_lock_irqsave(&port->lock, lock_flags);
-	termios = port->info->port.tty->termios;
+	termios = port->state->port.tty->termios;
 	if (ch == termios->c_cc[VSTART])
 		channel->ch_bd->bd_ops->send_start_character(channel);
 
@@ -245,7 +245,7 @@ static int jsm_tty_open(struct uart_port *port)
 	channel->ch_cached_lsr = 0;
 	channel->ch_stops_sent = 0;
 
-	termios = port->info->port.tty->termios;
+	termios = port->state->port.tty->termios;
 	channel->ch_c_cflag	= termios->c_cflag;
 	channel->ch_c_iflag	= termios->c_iflag;
 	channel->ch_c_oflag	= termios->c_oflag;
@@ -278,7 +278,7 @@ static void jsm_tty_close(struct uart_port *port)
 	jsm_printk(CLOSE, INFO, &channel->ch_bd->pci_dev, "start\n");
 
 	bd = channel->ch_bd;
-	ts = port->info->port.tty->termios;
+	ts = port->state->port.tty->termios;
 
 	channel->ch_flags &= ~(CH_STOPI);
 
@@ -530,7 +530,7 @@ void jsm_input(struct jsm_channel *ch)
 	if (!ch)
 		return;
 
-	tp = ch->uart_port.info->port.tty;
+	tp = ch->uart_port.state->port.tty;
 
 	bd = ch->ch_bd;
 	if(!bd)
@@ -849,7 +849,7 @@ int jsm_tty_write(struct uart_port *port)
 	u16 tail;
 	u16 tmask;
 	u32 remain;
-	int temp_tail = port->info->xmit.tail;
+	int temp_tail = port->state->xmit.tail;
 	struct jsm_channel *channel = (struct jsm_channel *)port;
 
 	tmask = WQUEUEMASK;
@@ -865,10 +865,10 @@ int jsm_tty_write(struct uart_port *port)
 	data_count = 0;
 	if (bufcount >= remain) {
 		bufcount -= remain;
-		while ((port->info->xmit.head != temp_tail) &&
+		while ((port->state->xmit.head != temp_tail) &&
 		(data_count < remain)) {
 			channel->ch_wqueue[head++] =
-			port->info->xmit.buf[temp_tail];
+			port->state->xmit.buf[temp_tail];
 
 			temp_tail++;
 			temp_tail &= (UART_XMIT_SIZE - 1);
@@ -880,10 +880,10 @@ int jsm_tty_write(struct uart_port *port)
 	data_count1 = 0;
 	if (bufcount > 0) {
 		remain = bufcount;
-		while ((port->info->xmit.head != temp_tail) &&
+		while ((port->state->xmit.head != temp_tail) &&
 			(data_count1 < remain)) {
 			channel->ch_wqueue[head++] =
-				port->info->xmit.buf[temp_tail];
+				port->state->xmit.buf[temp_tail];
 
 			temp_tail++;
 			temp_tail &= (UART_XMIT_SIZE - 1);
@@ -892,7 +892,7 @@ int jsm_tty_write(struct uart_port *port)
 		}
 	}
 
-	port->info->xmit.tail = temp_tail;
+	port->state->xmit.tail = temp_tail;
 
 	data_count += data_count1;
 	if (data_count) {
diff --git a/drivers/serial/m32r_sio.c b/drivers/serial/m32r_sio.c
index 611c97a15654..bea5c215460c 100644
--- a/drivers/serial/m32r_sio.c
+++ b/drivers/serial/m32r_sio.c
@@ -286,7 +286,7 @@ static void m32r_sio_start_tx(struct uart_port *port)
 {
 #ifdef CONFIG_SERIAL_M32R_PLDSIO
 	struct uart_sio_port *up = (struct uart_sio_port *)port;
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 
 	if (!(up->ier & UART_IER_THRI)) {
 		up->ier |= UART_IER_THRI;
@@ -325,7 +325,7 @@ static void m32r_sio_enable_ms(struct uart_port *port)
 
 static void receive_chars(struct uart_sio_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
 	unsigned char flag;
 	int max_count = 256;
@@ -398,7 +398,7 @@ static void receive_chars(struct uart_sio_port *up, int *status)
 
 static void transmit_chars(struct uart_sio_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
diff --git a/drivers/serial/max3100.c b/drivers/serial/max3100.c
index 9fd33e5622bd..75ab00631c41 100644
--- a/drivers/serial/max3100.c
+++ b/drivers/serial/max3100.c
@@ -184,7 +184,7 @@ static void max3100_timeout(unsigned long data)
 {
 	struct max3100_port *s = (struct max3100_port *)data;
 
-	if (s->port.info) {
+	if (s->port.state) {
 		max3100_dowork(s);
 		mod_timer(&s->timer, jiffies + s->poll_time);
 	}
@@ -261,7 +261,7 @@ static void max3100_work(struct work_struct *w)
 	int rxchars;
 	u16 tx, rx;
 	int conf, cconf, rts, crts;
-	struct circ_buf *xmit = &s->port.info->xmit;
+	struct circ_buf *xmit = &s->port.state->xmit;
 
 	dev_dbg(&s->spi->dev, "%s\n", __func__);
 
@@ -307,8 +307,8 @@ static void max3100_work(struct work_struct *w)
 			}
 		}
 
-		if (rxchars > 16 && s->port.info->port.tty != NULL) {
-			tty_flip_buffer_push(s->port.info->port.tty);
+		if (rxchars > 16 && s->port.state->port.tty != NULL) {
+			tty_flip_buffer_push(s->port.state->port.tty);
 			rxchars = 0;
 		}
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -320,8 +320,8 @@ static void max3100_work(struct work_struct *w)
 		  (!uart_circ_empty(xmit) &&
 		   !uart_tx_stopped(&s->port))));
 
-	if (rxchars > 0 && s->port.info->port.tty != NULL)
-		tty_flip_buffer_push(s->port.info->port.tty);
+	if (rxchars > 0 && s->port.state->port.tty != NULL)
+		tty_flip_buffer_push(s->port.state->port.tty);
 }
 
 static irqreturn_t max3100_irq(int irqno, void *dev_id)
@@ -429,7 +429,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios,
 	int baud = 0;
 	unsigned cflag;
 	u32 param_new, param_mask, parity = 0;
-	struct tty_struct *tty = s->port.info->port.tty;
+	struct tty_struct *tty = s->port.state->port.tty;
 
 	dev_dbg(&s->spi->dev, "%s\n", __func__);
 	if (!tty)
@@ -529,7 +529,7 @@ max3100_set_termios(struct uart_port *port, struct ktermios *termios,
 			MAX3100_STATUS_OE;
 
 	/* we are sending char from a workqueue so enable */
-	s->port.info->port.tty->low_latency = 1;
+	s->port.state->port.tty->low_latency = 1;
 
 	if (s->poll_time > 0)
 		del_timer_sync(&s->timer);
diff --git a/drivers/serial/mcf.c b/drivers/serial/mcf.c
index 0eefb07bebaf..b44382442bf1 100644
--- a/drivers/serial/mcf.c
+++ b/drivers/serial/mcf.c
@@ -323,7 +323,7 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 		uart_insert_char(port, status, MCFUART_USR_RXOVERRUN, ch, flag);
 	}
 
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 }
 
 /****************************************************************************/
@@ -331,7 +331,7 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 static void mcf_tx_chars(struct mcf_uart *pp)
 {
 	struct uart_port *port = &pp->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		/* Send special char - probably flow control */
diff --git a/drivers/serial/mpc52xx_uart.c b/drivers/serial/mpc52xx_uart.c
index abbd146c50d9..d7bcd074d383 100644
--- a/drivers/serial/mpc52xx_uart.c
+++ b/drivers/serial/mpc52xx_uart.c
@@ -745,7 +745,7 @@ static struct uart_ops mpc52xx_uart_ops = {
 static inline int
 mpc52xx_uart_int_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned char ch, flag;
 	unsigned short status;
 
@@ -812,7 +812,7 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port)
 static inline int
 mpc52xx_uart_int_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	/* Process out of band chars */
 	if (port->x_char) {
diff --git a/drivers/serial/mpsc.c b/drivers/serial/mpsc.c
index 61d3ade5286c..b5496c28e60b 100644
--- a/drivers/serial/mpsc.c
+++ b/drivers/serial/mpsc.c
@@ -936,7 +936,7 @@ static int serial_polled;
 static int mpsc_rx_intr(struct mpsc_port_info *pi)
 {
 	struct mpsc_rx_desc *rxre;
-	struct tty_struct *tty = pi->port.info->port.tty;
+	struct tty_struct *tty = pi->port.state->port.tty;
 	u32	cmdstat, bytes_in, i;
 	int	rc = 0;
 	u8	*bp;
@@ -1109,7 +1109,7 @@ static void mpsc_setup_tx_desc(struct mpsc_port_info *pi, u32 count, u32 intr)
 
 static void mpsc_copy_tx_data(struct mpsc_port_info *pi)
 {
-	struct circ_buf *xmit = &pi->port.info->xmit;
+	struct circ_buf *xmit = &pi->port.state->xmit;
 	u8 *bp;
 	u32 i;
 
diff --git a/drivers/serial/msm_serial.c b/drivers/serial/msm_serial.c
index f7c24baa1416..ff18d50c99c1 100644
--- a/drivers/serial/msm_serial.c
+++ b/drivers/serial/msm_serial.c
@@ -88,7 +88,7 @@ static void msm_enable_ms(struct uart_port *port)
 
 static void handle_rx(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int sr;
 
 	/*
@@ -136,7 +136,7 @@ static void handle_rx(struct uart_port *port)
 
 static void handle_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	struct msm_port *msm_port = UART_TO_MSM(port);
 	int sent_tx;
 
@@ -169,7 +169,7 @@ static void handle_delta_cts(struct uart_port *port)
 {
 	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
 	port->icount.cts++;
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 }
 
 static irqreturn_t msm_irq(int irq, void *dev_id)
diff --git a/drivers/serial/mux.c b/drivers/serial/mux.c
index 953a5ffa9b44..7571aaa138b0 100644
--- a/drivers/serial/mux.c
+++ b/drivers/serial/mux.c
@@ -199,7 +199,7 @@ static void mux_break_ctl(struct uart_port *port, int break_state)
 static void mux_write(struct uart_port *port)
 {
 	int count;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if(port->x_char) {
 		UART_PUT_CHAR(port, port->x_char);
@@ -243,7 +243,7 @@ static void mux_write(struct uart_port *port)
 static void mux_read(struct uart_port *port)
 {
 	int data;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	__u32 start_count = port->icount.rx;
 
 	while(1) {
diff --git a/drivers/serial/netx-serial.c b/drivers/serial/netx-serial.c
index 3e5dda8518b7..7735c9f35fa0 100644
--- a/drivers/serial/netx-serial.c
+++ b/drivers/serial/netx-serial.c
@@ -140,7 +140,7 @@ static void netx_enable_ms(struct uart_port *port)
 
 static inline void netx_transmit_buffer(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		writel(port->x_char, port->membase + UART_DR);
@@ -185,7 +185,7 @@ static unsigned int netx_tx_empty(struct uart_port *port)
 
 static void netx_txint(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
 		netx_stop_tx(port);
@@ -201,7 +201,7 @@ static void netx_txint(struct uart_port *port)
 static void netx_rxint(struct uart_port *port)
 {
 	unsigned char rx, flg, status;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	while (!(readl(port->membase + UART_FR) & FR_RXFE)) {
 		rx = readl(port->membase + UART_DR);
diff --git a/drivers/serial/nwpserial.c b/drivers/serial/nwpserial.c
index 9e150b19d726..e1ab8ec0a4a6 100644
--- a/drivers/serial/nwpserial.c
+++ b/drivers/serial/nwpserial.c
@@ -126,7 +126,7 @@ static void nwpserial_config_port(struct uart_port *port, int flags)
 static irqreturn_t nwpserial_interrupt(int irq, void *dev_id)
 {
 	struct nwpserial_port *up = dev_id;
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	irqreturn_t ret;
 	unsigned int iir;
 	unsigned char ch;
@@ -261,7 +261,7 @@ static void nwpserial_start_tx(struct uart_port *port)
 	struct nwpserial_port *up;
 	struct circ_buf *xmit;
 	up = container_of(port, struct nwpserial_port, port);
-	xmit  = &up->port.info->xmit;
+	xmit  = &up->port.state->xmit;
 
 	if (port->x_char) {
 		nwpserial_putchar(up, up->port.x_char);
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 9c1243fbd512..ab4c85ba3549 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -242,12 +242,12 @@ static struct tty_struct *pmz_receive_chars(struct uart_pmac_port *uap)
 	}
 
 	/* Sanity check, make sure the old bug is no longer happening */
-	if (uap->port.info == NULL || uap->port.info->port.tty == NULL) {
+	if (uap->port.state == NULL || uap->port.state->port.tty == NULL) {
 		WARN_ON(1);
 		(void)read_zsdata(uap);
 		return NULL;
 	}
-	tty = uap->port.info->port.tty;
+	tty = uap->port.state->port.tty;
 
 	while (1) {
 		error = 0;
@@ -369,7 +369,7 @@ static void pmz_status_handle(struct uart_pmac_port *uap)
 			uart_handle_cts_change(&uap->port,
 					       !(status & CTS));
 
-		wake_up_interruptible(&uap->port.info->delta_msr_wait);
+		wake_up_interruptible(&uap->port.state->delta_msr_wait);
 	}
 
 	if (status & BRK_ABRT)
@@ -420,9 +420,9 @@ static void pmz_transmit_chars(struct uart_pmac_port *uap)
 		return;
 	}
 
-	if (uap->port.info == NULL)
+	if (uap->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &uap->port.info->xmit;
+	xmit = &uap->port.state->xmit;
 	if (uart_circ_empty(xmit)) {
 		uart_write_wakeup(&uap->port);
 		goto ack_tx_int;
@@ -655,7 +655,7 @@ static void pmz_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		write_zsdata(uap, xmit->buf[xmit->tail]);
 		zssync(uap);
diff --git a/drivers/serial/pnx8xxx_uart.c b/drivers/serial/pnx8xxx_uart.c
index 1bb8f1b45767..2da747635275 100644
--- a/drivers/serial/pnx8xxx_uart.c
+++ b/drivers/serial/pnx8xxx_uart.c
@@ -100,7 +100,7 @@ static void pnx8xxx_mctrl_check(struct pnx8xxx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -112,7 +112,7 @@ static void pnx8xxx_timeout(unsigned long data)
 	struct pnx8xxx_port *sport = (struct pnx8xxx_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		pnx8xxx_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -181,7 +181,7 @@ static void pnx8xxx_enable_ms(struct uart_port *port)
 
 static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 {
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) |
@@ -243,7 +243,7 @@ static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport)
 
 static void pnx8xxx_tx_chars(struct pnx8xxx_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	if (sport->port.x_char) {
 		serial_out(sport, PNX8XXX_FIFO, sport->port.x_char);
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index a48a8a13d87b..ad48919c0415 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -96,7 +96,7 @@ static void serial_pxa_stop_rx(struct uart_port *port)
 
 static inline void receive_chars(struct uart_pxa_port *up, int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned int ch, flag;
 	int max_count = 256;
 
@@ -161,7 +161,7 @@ static inline void receive_chars(struct uart_pxa_port *up, int *status)
 
 static void transmit_chars(struct uart_pxa_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -220,7 +220,7 @@ static inline void check_modem_status(struct uart_pxa_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c
index 94530f01521e..61ef3ae24927 100644
--- a/drivers/serial/sa1100.c
+++ b/drivers/serial/sa1100.c
@@ -117,7 +117,7 @@ static void sa1100_mctrl_check(struct sa1100_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.info->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->delta_msr_wait);
 }
 
 /*
@@ -129,7 +129,7 @@ static void sa1100_timeout(unsigned long data)
 	struct sa1100_port *sport = (struct sa1100_port *)data;
 	unsigned long flags;
 
-	if (sport->port.info) {
+	if (sport->port.state) {
 		spin_lock_irqsave(&sport->port.lock, flags);
 		sa1100_mctrl_check(sport);
 		spin_unlock_irqrestore(&sport->port.lock, flags);
@@ -189,7 +189,7 @@ static void sa1100_enable_ms(struct uart_port *port)
 static void
 sa1100_rx_chars(struct sa1100_port *sport)
 {
-	struct tty_struct *tty = sport->port.info->port.tty;
+	struct tty_struct *tty = sport->port.state->port.tty;
 	unsigned int status, ch, flg;
 
 	status = UTSR1_TO_SM(UART_GET_UTSR1(sport)) |
@@ -239,7 +239,7 @@ sa1100_rx_chars(struct sa1100_port *sport)
 
 static void sa1100_tx_chars(struct sa1100_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 
 	if (sport->port.x_char) {
 		UART_PUT_CHAR(sport, sport->port.x_char);
diff --git a/drivers/serial/samsung.c b/drivers/serial/samsung.c
index c8851a0db63a..1523e8d9ae77 100644
--- a/drivers/serial/samsung.c
+++ b/drivers/serial/samsung.c
@@ -196,7 +196,7 @@ s3c24xx_serial_rx_chars(int irq, void *dev_id)
 {
 	struct s3c24xx_uart_port *ourport = dev_id;
 	struct uart_port *port = &ourport->port;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int ufcon, ch, flag, ufstat, uerstat;
 	int max_count = 64;
 
@@ -281,7 +281,7 @@ static irqreturn_t s3c24xx_serial_tx_chars(int irq, void *id)
 {
 	struct s3c24xx_uart_port *ourport = id;
 	struct uart_port *port = &ourport->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	int count = 256;
 
 	if (port->x_char) {
@@ -992,10 +992,10 @@ static int s3c24xx_serial_cpufreq_transition(struct notifier_block *nb,
 		struct ktermios *termios;
 		struct tty_struct *tty;
 
-		if (uport->info == NULL)
+		if (uport->state == NULL)
 			goto exit;
 
-		tty = uport->info->port.tty;
+		tty = uport->state->port.tty;
 
 		if (tty == NULL)
 			goto exit;
diff --git a/drivers/serial/sb1250-duart.c b/drivers/serial/sb1250-duart.c
index 319e8b83f6be..fa5f303b36d3 100644
--- a/drivers/serial/sb1250-duart.c
+++ b/drivers/serial/sb1250-duart.c
@@ -384,13 +384,13 @@ static void sbd_receive_chars(struct sbd_port *sport)
 		uart_insert_char(uport, status, M_DUART_OVRUN_ERR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->info->port.tty);
+	tty_flip_buffer_push(uport->state->port.tty);
 }
 
 static void sbd_transmit_chars(struct sbd_port *sport)
 {
 	struct uart_port *uport = &sport->port;
-	struct circ_buf *xmit = &sport->port.info->xmit;
+	struct circ_buf *xmit = &sport->port.state->xmit;
 	unsigned int mask;
 	int stop_tx;
 
@@ -440,7 +440,7 @@ static void sbd_status_handle(struct sbd_port *sport)
 
 	if (delta & ((M_DUART_IN_PIN2_VAL | M_DUART_IN_PIN0_VAL) <<
 		     S_DUART_IN_PIN_CHNG))
-		wake_up_interruptible(&uport->info->delta_msr_wait);
+		wake_up_interruptible(&uport->state->delta_msr_wait);
 }
 
 static irqreturn_t sbd_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sc26xx.c b/drivers/serial/sc26xx.c
index e0be11ceaa25..75038ad2b242 100644
--- a/drivers/serial/sc26xx.c
+++ b/drivers/serial/sc26xx.c
@@ -140,8 +140,8 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 	char flag;
 	u8 status;
 
-	if (port->info != NULL)		/* Unopened serial console */
-		tty = port->info->port.tty;
+	if (port->state != NULL)		/* Unopened serial console */
+		tty = port->state->port.tty;
 
 	while (limit-- > 0) {
 		status = READ_SC_PORT(port, SR);
@@ -190,10 +190,10 @@ static void transmit_chars(struct uart_port *port)
 {
 	struct circ_buf *xmit;
 
-	if (!port->info)
+	if (!port->state)
 		return;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
 		sc26xx_disable_irq(port, IMR_TXRDY);
 		return;
@@ -316,7 +316,7 @@ static void sc26xx_stop_tx(struct uart_port *port)
 /* port->lock held by caller.  */
 static void sc26xx_start_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	while (!uart_circ_empty(xmit)) {
 		if (!(READ_SC_PORT(port, SR) & SR_TXRDY)) {
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index b0bb29d804ae..ea53b6f224b0 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,7 +52,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + (state)->info.port.blocked_open)
+#define uart_users(state)	((state)->count + (state)->port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -71,19 +71,19 @@ static void uart_change_pm(struct uart_state *state, int pm_state);
  */
 void uart_write_wakeup(struct uart_port *port)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 	/*
 	 * This means you called this function _after_ the port was
 	 * closed.  No cookie for you.
 	 */
-	BUG_ON(!info);
-	tasklet_schedule(&info->tlet);
+	BUG_ON(!state);
+	tasklet_schedule(&state->tlet);
 }
 
 static void uart_stop(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -94,9 +94,9 @@ static void uart_stop(struct tty_struct *tty)
 static void __uart_start(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
-	if (!uart_circ_empty(&state->info.xmit) && state->info.xmit.buf &&
+	if (!uart_circ_empty(&state->xmit) && state->xmit.buf &&
 	    !tty->stopped && !tty->hw_stopped)
 		port->ops->start_tx(port);
 }
@@ -104,7 +104,7 @@ static void __uart_start(struct tty_struct *tty)
 static void uart_start(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -115,7 +115,7 @@ static void uart_start(struct tty_struct *tty)
 static void uart_tasklet_action(unsigned long data)
 {
 	struct uart_state *state = (struct uart_state *)data;
-	tty_wakeup(state->info.port.tty);
+	tty_wakeup(state->port.tty);
 }
 
 static inline void
@@ -141,12 +141,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear)
  */
 static int uart_startup(struct uart_state *state, int init_hw)
 {
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long page;
 	int retval = 0;
 
-	if (info->flags & UIF_INITIALIZED)
+	if (state->flags & UIF_INITIALIZED)
 		return 0;
 
 	/*
@@ -154,7 +153,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	 * once we have successfully opened the port.  Also set
 	 * up the tty->alt_speed kludge
 	 */
-	set_bit(TTY_IO_ERROR, &info->port.tty->flags);
+	set_bit(TTY_IO_ERROR, &state->port.tty->flags);
 
 	if (port->type == PORT_UNKNOWN)
 		return 0;
@@ -163,14 +162,14 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	 * Initialise and allocate the transmit and temporary
 	 * buffer.
 	 */
-	if (!info->xmit.buf) {
+	if (!state->xmit.buf) {
 		/* This is protected by the per port mutex */
 		page = get_zeroed_page(GFP_KERNEL);
 		if (!page)
 			return -ENOMEM;
 
-		info->xmit.buf = (unsigned char *) page;
-		uart_circ_clear(&info->xmit);
+		state->xmit.buf = (unsigned char *) page;
+		uart_circ_clear(&state->xmit);
 	}
 
 	retval = port->ops->startup(port);
@@ -185,20 +184,20 @@ static int uart_startup(struct uart_state *state, int init_hw)
 			 * Setup the RTS and DTR signals once the
 			 * port is open and ready to respond.
 			 */
-			if (info->port.tty->termios->c_cflag & CBAUD)
+			if (state->port.tty->termios->c_cflag & CBAUD)
 				uart_set_mctrl(port, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (info->flags & UIF_CTS_FLOW) {
+		if (state->flags & UIF_CTS_FLOW) {
 			spin_lock_irq(&port->lock);
 			if (!(port->ops->get_mctrl(port) & TIOCM_CTS))
-				info->port.tty->hw_stopped = 1;
+				state->port.tty->hw_stopped = 1;
 			spin_unlock_irq(&port->lock);
 		}
 
-		info->flags |= UIF_INITIALIZED;
+		state->flags |= UIF_INITIALIZED;
 
-		clear_bit(TTY_IO_ERROR, &info->port.tty->flags);
+		clear_bit(TTY_IO_ERROR, &state->port.tty->flags);
 	}
 
 	if (retval && capable(CAP_SYS_ADMIN))
@@ -214,9 +213,8 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
-	struct tty_struct *tty = info->port.tty;
+	struct uart_port *port = state->uart_port;
+	struct tty_struct *tty = state->port.tty;
 
 	/*
 	 * Set the TTY IO error marker
@@ -224,8 +222,8 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (info->flags & UIF_INITIALIZED) {
-		info->flags &= ~UIF_INITIALIZED;
+	if (state->flags & UIF_INITIALIZED) {
+		state->flags &= ~UIF_INITIALIZED;
 
 		/*
 		 * Turn off DTR and RTS early.
@@ -240,7 +238,7 @@ static void uart_shutdown(struct uart_state *state)
 		 * any outstanding file descriptors should be pointing at
 		 * hung_up_tty_fops now.
 		 */
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&state->delta_msr_wait);
 
 		/*
 		 * Free the IRQ and disable the port.
@@ -256,14 +254,14 @@ static void uart_shutdown(struct uart_state *state)
 	/*
 	 * kill off our tasklet
 	 */
-	tasklet_kill(&info->tlet);
+	tasklet_kill(&state->tlet);
 
 	/*
 	 * Free the transmit buffer page.
 	 */
-	if (info->xmit.buf) {
-		free_page((unsigned long)info->xmit.buf);
-		info->xmit.buf = NULL;
+	if (state->xmit.buf) {
+		free_page((unsigned long)state->xmit.buf);
+		state->xmit.buf = NULL;
 	}
 }
 
@@ -430,8 +428,8 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->info.port.tty;
-	struct uart_port *port = state->port;
+	struct tty_struct *tty = state->port.tty;
+	struct uart_port *port = state->uart_port;
 	struct ktermios *termios;
 
 	/*
@@ -447,14 +445,14 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->info.flags |= UIF_CTS_FLOW;
+		state->flags |= UIF_CTS_FLOW;
 	else
-		state->info.flags &= ~UIF_CTS_FLOW;
+		state->flags &= ~UIF_CTS_FLOW;
 
 	if (termios->c_cflag & CLOCAL)
-		state->info.flags &= ~UIF_CHECK_CD;
+		state->flags &= ~UIF_CHECK_CD;
 	else
-		state->info.flags |= UIF_CHECK_CD;
+		state->flags |= UIF_CHECK_CD;
 
 	port->ops->set_termios(port, termios, old_termios);
 }
@@ -482,7 +480,7 @@ static int uart_put_char(struct tty_struct *tty, unsigned char ch)
 {
 	struct uart_state *state = tty->driver_data;
 
-	return __uart_put_char(state->port, &state->info.xmit, ch);
+	return __uart_put_char(state->uart_port, &state->xmit, ch);
 }
 
 static void uart_flush_chars(struct tty_struct *tty)
@@ -508,8 +506,8 @@ uart_write(struct tty_struct *tty, const unsigned char *buf, int count)
 		return -EL3HLT;
 	}
 
-	port = state->port;
-	circ = &state->info.xmit;
+	port = state->uart_port;
+	circ = &state->xmit;
 
 	if (!circ->buf)
 		return 0;
@@ -539,9 +537,9 @@ static int uart_write_room(struct tty_struct *tty)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_free(&state->info.xmit);
-	spin_unlock_irqrestore(&state->port->lock, flags);
+	spin_lock_irqsave(&state->uart_port->lock, flags);
+	ret = uart_circ_chars_free(&state->xmit);
+	spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	return ret;
 }
 
@@ -551,9 +549,9 @@ static int uart_chars_in_buffer(struct tty_struct *tty)
 	unsigned long flags;
 	int ret;
 
-	spin_lock_irqsave(&state->port->lock, flags);
-	ret = uart_circ_chars_pending(&state->info.xmit);
-	spin_unlock_irqrestore(&state->port->lock, flags);
+	spin_lock_irqsave(&state->uart_port->lock, flags);
+	ret = uart_circ_chars_pending(&state->xmit);
+	spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	return ret;
 }
 
@@ -572,11 +570,11 @@ static void uart_flush_buffer(struct tty_struct *tty)
 		return;
 	}
 
-	port = state->port;
+	port = state->uart_port;
 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
 
 	spin_lock_irqsave(&port->lock, flags);
-	uart_circ_clear(&state->info.xmit);
+	uart_circ_clear(&state->xmit);
 	if (port->ops->flush_buffer)
 		port->ops->flush_buffer(port);
 	spin_unlock_irqrestore(&port->lock, flags);
@@ -590,7 +588,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
 static void uart_send_xchar(struct tty_struct *tty, char ch)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long flags;
 
 	if (port->ops->send_xchar)
@@ -613,13 +611,13 @@ static void uart_throttle(struct tty_struct *tty)
 		uart_send_xchar(tty, STOP_CHAR(tty));
 
 	if (tty->termios->c_cflag & CRTSCTS)
-		uart_clear_mctrl(state->port, TIOCM_RTS);
+		uart_clear_mctrl(state->uart_port, TIOCM_RTS);
 }
 
 static void uart_unthrottle(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (I_IXOFF(tty)) {
 		if (port->x_char)
@@ -635,7 +633,7 @@ static void uart_unthrottle(struct tty_struct *tty)
 static int uart_get_info(struct uart_state *state,
 			 struct serial_struct __user *retinfo)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	struct serial_struct tmp;
 
 	memset(&tmp, 0, sizeof(tmp));
@@ -674,7 +672,7 @@ static int uart_set_info(struct uart_state *state,
 			 struct serial_struct __user *newinfo)
 {
 	struct serial_struct new_serial;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long new_port;
 	unsigned int change_irq, change_port, closing_wait;
 	unsigned int old_custom_divisor, close_delay;
@@ -840,15 +838,15 @@ static int uart_set_info(struct uart_state *state,
 	state->closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
-	if (state->info.port.tty)
-		state->info.port.tty->low_latency =
+	if (state->port.tty)
+		state->port.tty->low_latency =
 			(port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 
  check_and_exit:
 	retval = 0;
 	if (port->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		if (((old_flags ^ port->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != port->custom_divisor) {
 			/*
@@ -861,7 +859,7 @@ static int uart_set_info(struct uart_state *state,
 				printk(KERN_NOTICE
 				       "%s sets custom speed on %s. This "
 				       "is deprecated.\n", current->comm,
-				       tty_name(state->info.port.tty, buf));
+				       tty_name(state->port.tty, buf));
 			}
 			uart_change_speed(state, NULL);
 		}
@@ -880,7 +878,7 @@ static int uart_set_info(struct uart_state *state,
 static int uart_get_lsr_info(struct uart_state *state,
 			     unsigned int __user *value)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned int result;
 
 	result = port->ops->tx_empty(port);
@@ -892,8 +890,8 @@ static int uart_get_lsr_info(struct uart_state *state,
 	 * interrupt happens).
 	 */
 	if (port->x_char ||
-	    ((uart_circ_chars_pending(&state->info.xmit) > 0) &&
-	     !state->info.port.tty->stopped && !state->info.port.tty->hw_stopped))
+	    ((uart_circ_chars_pending(&state->xmit) > 0) &&
+	     !state->port.tty->stopped && !state->port.tty->hw_stopped))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -902,7 +900,7 @@ static int uart_get_lsr_info(struct uart_state *state,
 static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int result = -EIO;
 
 	mutex_lock(&state->mutex);
@@ -924,7 +922,7 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 	      unsigned int set, unsigned int clear)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int ret = -EIO;
 
 	mutex_lock(&state->mutex);
@@ -940,7 +938,7 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 static int uart_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	mutex_lock(&state->mutex);
 
@@ -953,7 +951,7 @@ static int uart_break_ctl(struct tty_struct *tty, int break_state)
 
 static int uart_do_autoconfig(struct uart_state *state)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	int flags, ret;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -1003,7 +1001,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 static int
 uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	DECLARE_WAITQUEUE(wait, current);
 	struct uart_icount cprev, cnow;
 	int ret;
@@ -1020,7 +1018,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	port->ops->enable_ms(port);
 	spin_unlock_irq(&port->lock);
 
-	add_wait_queue(&state->info.delta_msr_wait, &wait);
+	add_wait_queue(&state->delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&port->lock);
 		memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1048,7 +1046,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->info.delta_msr_wait, &wait);
+	remove_wait_queue(&state->delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1064,7 +1062,7 @@ static int uart_get_count(struct uart_state *state,
 {
 	struct serial_icounter_struct icount;
 	struct uart_icount cnow;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	spin_lock_irq(&port->lock);
 	memcpy(&cnow, &port->icount, sizeof(struct uart_icount));
@@ -1160,7 +1158,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 		break;
 
 	default: {
-		struct uart_port *port = state->port;
+		struct uart_port *port = state->uart_port;
 		if (port->ops->ioctl)
 			ret = port->ops->ioctl(port, cmd, arg);
 		break;
@@ -1175,7 +1173,7 @@ out:
 static void uart_set_ldisc(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (port->ops->set_ldisc)
 		port->ops->set_ldisc(port);
@@ -1207,7 +1205,7 @@ static void uart_set_termios(struct tty_struct *tty,
 
 	/* Handle transition to B0 status */
 	if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD))
-		uart_clear_mctrl(state->port, TIOCM_RTS | TIOCM_DTR);
+		uart_clear_mctrl(state->uart_port, TIOCM_RTS | TIOCM_DTR);
 
 	/* Handle transition away from B0 status */
 	if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) {
@@ -1215,25 +1213,25 @@ static void uart_set_termios(struct tty_struct *tty,
 		if (!(cflag & CRTSCTS) ||
 		    !test_bit(TTY_THROTTLED, &tty->flags))
 			mask |= TIOCM_RTS;
-		uart_set_mctrl(state->port, mask);
+		uart_set_mctrl(state->uart_port, mask);
 	}
 
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) && !(cflag & CRTSCTS)) {
-		spin_lock_irqsave(&state->port->lock, flags);
+		spin_lock_irqsave(&state->uart_port->lock, flags);
 		tty->hw_stopped = 0;
 		__uart_start(tty);
-		spin_unlock_irqrestore(&state->port->lock, flags);
+		spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	}
 
 	/* Handle turning on CRTSCTS */
 	if (!(old_termios->c_cflag & CRTSCTS) && (cflag & CRTSCTS)) {
-		spin_lock_irqsave(&state->port->lock, flags);
-		if (!(state->port->ops->get_mctrl(state->port) & TIOCM_CTS)) {
+		spin_lock_irqsave(&state->uart_port->lock, flags);
+		if (!(state->uart_port->ops->get_mctrl(state->uart_port) & TIOCM_CTS)) {
 			tty->hw_stopped = 1;
-			state->port->ops->stop_tx(state->port);
+			state->uart_port->ops->stop_tx(state->uart_port);
 		}
-		spin_unlock_irqrestore(&state->port->lock, flags);
+		spin_unlock_irqrestore(&state->uart_port->lock, flags);
 	}
 #if 0
 	/*
@@ -1244,7 +1242,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	 */
 	if (!(old_termios->c_cflag & CLOCAL) &&
 	    (tty->termios->c_cflag & CLOCAL))
-		wake_up_interruptible(&info->port.open_wait);
+		wake_up_interruptible(&state->uart_port.open_wait);
 #endif
 }
 
@@ -1260,10 +1258,10 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 
 	BUG_ON(!kernel_locked());
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return;
 
-	port = state->port;
+	port = state->uart_port;
 
 	pr_debug("uart_close(%d) called\n", port->line);
 
@@ -1306,7 +1304,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		port->ops->stop_rx(port);
@@ -1325,9 +1323,9 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	tty_ldisc_flush(tty);
 
 	tty->closing = 0;
-	state->info.port.tty = NULL;
+	state->port.tty = NULL;
 
-	if (state->info.port.blocked_open) {
+	if (state->port.blocked_open) {
 		if (state->close_delay)
 			msleep_interruptible(state->close_delay);
 	} else if (!uart_console(port)) {
@@ -1337,8 +1335,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->info.flags &= ~UIF_NORMAL_ACTIVE;
-	wake_up_interruptible(&state->info.port.open_wait);
+	state->flags &= ~UIF_NORMAL_ACTIVE;
+	wake_up_interruptible(&state->port.open_wait);
 
  done:
 	mutex_unlock(&state->mutex);
@@ -1347,7 +1345,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned long char_time, expire;
 
 	if (port->type == PORT_UNKNOWN || port->fifosize == 0)
@@ -1412,20 +1410,19 @@ static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
 static void uart_hangup(struct tty_struct *tty)
 {
 	struct uart_state *state = tty->driver_data;
-	struct uart_info *info = &state->info;
 
 	BUG_ON(!kernel_locked());
-	pr_debug("uart_hangup(%d)\n", state->port->line);
+	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
 	mutex_lock(&state->mutex);
-	if (info->flags & UIF_NORMAL_ACTIVE) {
+	if (state->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		state->count = 0;
-		info->flags &= ~UIF_NORMAL_ACTIVE;
-		info->port.tty = NULL;
-		wake_up_interruptible(&info->port.open_wait);
-		wake_up_interruptible(&info->delta_msr_wait);
+		state->flags &= ~UIF_NORMAL_ACTIVE;
+		state->port.tty = NULL;
+		wake_up_interruptible(&state->port.open_wait);
+		wake_up_interruptible(&state->delta_msr_wait);
 	}
 	mutex_unlock(&state->mutex);
 }
@@ -1438,8 +1435,8 @@ static void uart_hangup(struct tty_struct *tty)
  */
 static void uart_update_termios(struct uart_state *state)
 {
-	struct tty_struct *tty = state->info.port.tty;
-	struct uart_port *port = state->port;
+	struct tty_struct *tty = state->port.tty;
+	struct uart_port *port = state->uart_port;
 
 	if (uart_console(port) && port->cons->cflag) {
 		tty->termios->c_cflag = port->cons->cflag;
@@ -1473,27 +1470,26 @@ static int
 uart_block_til_ready(struct file *filp, struct uart_state *state)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct uart_info *info = &state->info;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	unsigned int mctrl;
 
-	info->port.blocked_open++;
+	state->port.blocked_open++;
 	state->count--;
 
-	add_wait_queue(&info->port.open_wait, &wait);
+	add_wait_queue(&state->port.open_wait, &wait);
 	while (1) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		/*
 		 * If we have been hung up, tell userspace/restart open.
 		 */
-		if (tty_hung_up_p(filp) || info->port.tty == NULL)
+		if (tty_hung_up_p(filp) || state->port.tty == NULL)
 			break;
 
 		/*
 		 * If the port has been closed, tell userspace/restart open.
 		 */
-		if (!(info->flags & UIF_INITIALIZED))
+		if (!(state->flags & UIF_INITIALIZED))
 			break;
 
 		/*
@@ -1506,8 +1502,8 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		 * have set TTY_IO_ERROR for a non-existant port.
 		 */
 		if ((filp->f_flags & O_NONBLOCK) ||
-		    (info->port.tty->termios->c_cflag & CLOCAL) ||
-		    (info->port.tty->flags & (1 << TTY_IO_ERROR)))
+		    (state->port.tty->termios->c_cflag & CLOCAL) ||
+		    (state->port.tty->flags & (1 << TTY_IO_ERROR)))
 			break;
 
 		/*
@@ -1515,7 +1511,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		 * not set RTS here - we want to make sure we catch
 		 * the data from the modem.
 		 */
-		if (info->port.tty->termios->c_cflag & CBAUD)
+		if (state->port.tty->termios->c_cflag & CBAUD)
 			uart_set_mctrl(port, TIOCM_DTR);
 
 		/*
@@ -1537,15 +1533,15 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(&info->port.open_wait, &wait);
+	remove_wait_queue(&state->port.open_wait, &wait);
 
 	state->count++;
-	info->port.blocked_open--;
+	state->port.blocked_open--;
 
 	if (signal_pending(current))
 		return -ERESTARTSYS;
 
-	if (!info->port.tty || tty_hung_up_p(filp))
+	if (!state->port.tty || tty_hung_up_p(filp))
 		return -EAGAIN;
 
 	return 0;
@@ -1563,7 +1559,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	}
 
 	state->count++;
-	if (!state->port || state->port->flags & UPF_DEAD) {
+	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
 	}
@@ -1606,10 +1602,11 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 
 	/*
 	 * We take the semaphore inside uart_get to guarantee that we won't
-	 * be re-entered while allocating the info structure, or while we
+	 * be re-entered while allocating the state structure, or while we
 	 * request any IRQs that the driver may need.  This also has the nice
 	 * side-effect that it delays the action of uart_hangup, so we can
-	 * guarantee that info->port.tty will always contain something reasonable.
+	 * guarantee that state->port.tty will always contain something
+	 * reasonable.
 	 */
 	state = uart_get(drv, line);
 	if (IS_ERR(state)) {
@@ -1623,10 +1620,10 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 * Any failures from here onwards should not touch the count.
 	 */
 	tty->driver_data = state;
-	state->port->info = &state->info;
-	tty->low_latency = (state->port->flags & UPF_LOW_LATENCY) ? 1 : 0;
+	state->uart_port->state = state;
+	tty->low_latency = (state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->info.port.tty = tty;
+	state->port.tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
@@ -1659,8 +1656,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->info.flags & UIF_NORMAL_ACTIVE)) {
-		state->info.flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(state->flags & UIF_NORMAL_ACTIVE)) {
+		state->flags |= UIF_NORMAL_ACTIVE;
 
 		uart_update_termios(state);
 	}
@@ -1688,7 +1685,7 @@ static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 {
 	struct uart_state *state = drv->state + i;
 	int pm_state;
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 	char stat_buf[32];
 	unsigned int status;
 	int mmio;
@@ -1958,7 +1955,7 @@ EXPORT_SYMBOL_GPL(uart_set_options);
 
 static void uart_change_pm(struct uart_state *state, int pm_state)
 {
-	struct uart_port *port = state->port;
+	struct uart_port *port = state->uart_port;
 
 	if (state->pm_state != pm_state) {
 		if (port->ops->pm)
@@ -2005,11 +2002,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	}
 	port->suspended = 1;
 
-	if (state->info.flags & UIF_INITIALIZED) {
+	if (state->flags & UIF_INITIALIZED) {
 		const struct uart_ops *ops = port->ops;
 		int tries;
 
-		state->info.flags = (state->info.flags & ~UIF_INITIALIZED)
+		state->flags = (state->flags & ~UIF_INITIALIZED)
 				     | UIF_SUSPENDED;
 
 		spin_lock_irq(&port->lock);
@@ -2084,15 +2081,15 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->info.port.tty && termios.c_cflag == 0)
-			termios = *state->info.port.tty->termios;
+		if (state->port.tty && termios.c_cflag == 0)
+			termios = *state->port.tty->termios;
 
 		uart_change_pm(state, 0);
 		port->ops->set_termios(port, &termios, NULL);
 		console_start(port->cons);
 	}
 
-	if (state->info.flags & UIF_SUSPENDED) {
+	if (state->flags & UIF_SUSPENDED) {
 		const struct uart_ops *ops = port->ops;
 		int ret;
 
@@ -2107,7 +2104,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			ops->set_mctrl(port, port->mctrl);
 			ops->start_tx(port);
 			spin_unlock_irq(&port->lock);
-			state->info.flags |= UIF_INITIALIZED;
+			state->flags |= UIF_INITIALIZED;
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2117,7 +2114,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->info.flags &= ~UIF_SUSPENDED;
+		state->flags &= ~UIF_SUSPENDED;
 	}
 
 	mutex_unlock(&state->mutex);
@@ -2232,10 +2229,10 @@ static int uart_poll_init(struct tty_driver *driver, int line, char *options)
 	int parity = 'n';
 	int flow = 'n';
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return -1;
 
-	port = state->port;
+	port = state->uart_port;
 	if (!(port->ops->poll_get_char && port->ops->poll_put_char))
 		return -1;
 
@@ -2253,10 +2250,10 @@ static int uart_poll_get_char(struct tty_driver *driver, int line)
 	struct uart_state *state = drv->state + line;
 	struct uart_port *port;
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return -1;
 
-	port = state->port;
+	port = state->uart_port;
 	return port->ops->poll_get_char(port);
 }
 
@@ -2266,10 +2263,10 @@ static void uart_poll_put_char(struct tty_driver *driver, int line, char ch)
 	struct uart_state *state = drv->state + line;
 	struct uart_port *port;
 
-	if (!state || !state->port)
+	if (!state || !state->uart_port)
 		return;
 
-	port = state->port;
+	port = state->uart_port;
 	port->ops->poll_put_char(port, ch);
 }
 #endif
@@ -2365,9 +2362,9 @@ int uart_register_driver(struct uart_driver *drv)
 		state->closing_wait    = 30000;	/* 30 seconds */
 		mutex_init(&state->mutex);
 
-		tty_port_init(&state->info.port);
-		init_waitqueue_head(&state->info.delta_msr_wait);
-		tasklet_init(&state->info.tlet, uart_tasklet_action,
+		tty_port_init(&state->port);
+		init_waitqueue_head(&state->delta_msr_wait);
+		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
 	}
 
@@ -2430,16 +2427,16 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 
 	mutex_lock(&port_mutex);
 	mutex_lock(&state->mutex);
-	if (state->port) {
+	if (state->uart_port) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	state->port = port;
+	state->uart_port = port;
 	state->pm_state = -1;
 
 	port->cons = drv->cons;
-	port->info = &state->info;
+	port->state = state;
 
 	/*
 	 * If this port is a console, then the spinlock is already
@@ -2488,13 +2485,12 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
 int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 {
 	struct uart_state *state = drv->state + port->line;
-	struct uart_info *info;
 
 	BUG_ON(in_interrupt());
 
-	if (state->port != port)
+	if (state->uart_port != port)
 		printk(KERN_ALERT "Removing wrong port: %p != %p\n",
-			state->port, port);
+			state->uart_port, port);
 
 	mutex_lock(&port_mutex);
 
@@ -2511,9 +2507,8 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 */
 	tty_unregister_device(drv->tty_driver, port->line);
 
-	info = &state->info;
-	if (info && info->port.tty)
-		tty_vhangup(info->port.tty);
+	if (state->port.tty)
+		tty_vhangup(state->port.tty);
 
 	/*
 	 * Free the port IO and memory resources, if any.
@@ -2529,10 +2524,9 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	/*
 	 * Kill the tasklet, and free resources.
 	 */
-	if (info)
-		tasklet_kill(&info->tlet);
+	tasklet_kill(&state->tlet);
 
-	state->port = NULL;
+	state->uart_port = NULL;
 	mutex_unlock(&port_mutex);
 
 	return 0;
diff --git a/drivers/serial/serial_ks8695.c b/drivers/serial/serial_ks8695.c
index 52db5cc3f900..4560b2e70685 100644
--- a/drivers/serial/serial_ks8695.c
+++ b/drivers/serial/serial_ks8695.c
@@ -154,7 +154,7 @@ static void ks8695uart_disable_ms(struct uart_port *port)
 static irqreturn_t ks8695uart_rx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned int status, ch, lsr, flg, max_count = 256;
 
 	status = UART_GET_LSR(port);		/* clears pending LSR interrupts */
@@ -210,7 +210,7 @@ ignore_char:
 static irqreturn_t ks8695uart_tx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	unsigned int count;
 
 	if (port->x_char) {
@@ -266,7 +266,7 @@ static irqreturn_t ks8695uart_modem_status(int irq, void *dev_id)
 	if (status & URMS_URTERI)
 		port->icount.rng++;
 
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index a7bf024a8286..057fc5e8cc8d 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -138,7 +138,7 @@ static void lh7a40xuart_enable_ms (struct uart_port* port)
 
 static void lh7a40xuart_rx_chars (struct uart_port* port)
 {
-	struct tty_struct* tty = port->info->port.tty;
+	struct tty_struct* tty = port->state->port.tty;
 	int cbRxMax = 256;	/* (Gross) limit on receive */
 	unsigned int data;	/* Received data and status */
 	unsigned int flag;
@@ -184,7 +184,7 @@ static void lh7a40xuart_rx_chars (struct uart_port* port)
 
 static void lh7a40xuart_tx_chars (struct uart_port* port)
 {
-	struct circ_buf* xmit = &port->info->xmit;
+	struct circ_buf* xmit = &port->state->xmit;
 	int cbTxMax = port->fifosize;
 
 	if (port->x_char) {
@@ -241,7 +241,7 @@ static void lh7a40xuart_modem_status (struct uart_port* port)
 	if (delta & CTS)
 		uart_handle_cts_change (port, status & CTS);
 
-	wake_up_interruptible (&port->info->delta_msr_wait);
+	wake_up_interruptible (&port->state->delta_msr_wait);
 }
 
 static irqreturn_t lh7a40xuart_int (int irq, void* dev_id)
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index 54dd16d66a4b..0f7cf4c453e6 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -272,7 +272,7 @@ static void serial_txx9_initialize(struct uart_port *port)
 static inline void
 receive_chars(struct uart_txx9_port *up, unsigned int *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch;
 	unsigned int disr = *status;
 	int max_count = 256;
@@ -348,7 +348,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status)
 
 static inline void transmit_chars(struct uart_txx9_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 32dc2fc50e6b..85119fb7cb50 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -361,7 +361,7 @@ static inline int sci_rxroom(struct uart_port *port)
 
 static void sci_transmit_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 	unsigned int stopped = uart_tx_stopped(port);
 	unsigned short status;
 	unsigned short ctrl;
@@ -426,7 +426,7 @@ static void sci_transmit_chars(struct uart_port *port)
 static inline void sci_receive_chars(struct uart_port *port)
 {
 	struct sci_port *sci_port = to_sci_port(port);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int i, count, copied = 0;
 	unsigned short status;
 	unsigned char flag;
@@ -546,7 +546,7 @@ static inline int sci_handle_errors(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	if (status & SCxSR_ORER(port)) {
 		/* overrun error */
@@ -600,7 +600,7 @@ static inline int sci_handle_errors(struct uart_port *port)
 
 static inline int sci_handle_fifo_overrun(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	int copied = 0;
 
 	if (port->type != PORT_SCIF)
@@ -623,7 +623,7 @@ static inline int sci_handle_breaks(struct uart_port *port)
 {
 	int copied = 0;
 	unsigned short status = sci_in(port, SCxSR);
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct sci_port *s = to_sci_port(port);
 
 	if (uart_handle_break(port))
diff --git a/drivers/serial/sn_console.c b/drivers/serial/sn_console.c
index d5276c012f78..9794e0cd3dcc 100644
--- a/drivers/serial/sn_console.c
+++ b/drivers/serial/sn_console.c
@@ -469,9 +469,9 @@ sn_receive_chars(struct sn_cons_port *port, unsigned long flags)
 		return;
 	}
 
-	if (port->sc_port.info) {
+	if (port->sc_port.state) {
 		/* The serial_core stuffs are initilized, use them */
-		tty = port->sc_port.info->port.tty;
+		tty = port->sc_port.state->port.tty;
 	}
 	else {
 		/* Not registered yet - can't pass to tty layer.  */
@@ -550,9 +550,9 @@ static void sn_transmit_chars(struct sn_cons_port *port, int raw)
 
 	BUG_ON(!port->sc_is_asynch);
 
-	if (port->sc_port.info) {
+	if (port->sc_port.state) {
 		/* We're initilized, using serial core infrastructure */
-		xmit = &port->sc_port.info->xmit;
+		xmit = &port->sc_port.state->xmit;
 	} else {
 		/* Probably sn_sal_switch_to_asynch has been run but serial core isn't
 		 * initilized yet.  Just return.  Writes are going through
@@ -927,7 +927,7 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 	/* We can't look at the xmit buffer if we're not registered with serial core
 	 *  yet.  So only do the fancy recovery after registering
 	 */
-	if (!port->sc_port.info) {
+	if (!port->sc_port.state) {
 		/* Not yet registered with serial core - simple case */
 		puts_raw_fixed(port->sc_ops->sal_puts_raw, s, count);
 		return;
@@ -936,8 +936,8 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 	/* somebody really wants this output, might be an
 	 * oops, kdb, panic, etc.  make sure they get it. */
 	if (spin_is_locked(&port->sc_port.lock)) {
-		int lhead = port->sc_port.info->xmit.head;
-		int ltail = port->sc_port.info->xmit.tail;
+		int lhead = port->sc_port.state->xmit.head;
+		int ltail = port->sc_port.state->xmit.tail;
 		int counter, got_lock = 0;
 
 		/*
@@ -962,13 +962,13 @@ sn_sal_console_write(struct console *co, const char *s, unsigned count)
 				break;
 			} else {
 				/* still locked */
-				if ((lhead != port->sc_port.info->xmit.head)
+				if ((lhead != port->sc_port.state->xmit.head)
 				    || (ltail !=
-					port->sc_port.info->xmit.tail)) {
+					port->sc_port.state->xmit.tail)) {
 					lhead =
-						port->sc_port.info->xmit.head;
+						port->sc_port.state->xmit.head;
 					ltail =
-						port->sc_port.info->xmit.tail;
+						port->sc_port.state->xmit.tail;
 					counter = 0;
 				}
 			}
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index 1df5325faab2..d548652dee50 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -184,8 +184,8 @@ static struct tty_struct *receive_chars(struct uart_port *port)
 {
 	struct tty_struct *tty = NULL;
 
-	if (port->info != NULL)		/* Unopened serial console */
-		tty = port->info->port.tty;
+	if (port->state != NULL)		/* Unopened serial console */
+		tty = port->state->port.tty;
 
 	if (sunhv_ops->receive_chars(port, tty))
 		sun_do_break();
@@ -197,10 +197,10 @@ static void transmit_chars(struct uart_port *port)
 {
 	struct circ_buf *xmit;
 
-	if (!port->info)
+	if (!port->state)
 		return;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
 		return;
 
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 0355efe115d9..7c4f2fe8e246 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -117,8 +117,8 @@ receive_chars(struct uart_sunsab_port *up,
 	int count = 0;
 	int i;
 
-	if (up->port.info != NULL)		/* Unopened serial console */
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL)		/* Unopened serial console */
+		tty = up->port.state->port.tty;
 
 	/* Read number of BYTES (Character + Status) available. */
 	if (stat->sreg.isr0 & SAB82532_ISR0_RPF) {
@@ -229,7 +229,7 @@ static void sunsab_tx_idle(struct uart_sunsab_port *);
 static void transmit_chars(struct uart_sunsab_port *up,
 			   union sab82532_irq_status *stat)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int i;
 
 	if (stat->sreg.isr1 & SAB82532_ISR1_ALLS) {
@@ -297,7 +297,7 @@ static void check_status(struct uart_sunsab_port *up,
 		up->port.icount.dsr++;
 	}
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
@@ -429,7 +429,7 @@ static void sunsab_tx_idle(struct uart_sunsab_port *up)
 static void sunsab_start_tx(struct uart_port *port)
 {
 	struct uart_sunsab_port *up = (struct uart_sunsab_port *) port;
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int i;
 
 	up->interrupt_mask1 &= ~(SAB82532_IMR1_ALLS|SAB82532_IMR1_XPR);
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 47c6837850b1..5a32365b58ad 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -311,7 +311,7 @@ static void sunsu_enable_ms(struct uart_port *port)
 static struct tty_struct *
 receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 {
-	struct tty_struct *tty = up->port.info->port.tty;
+	struct tty_struct *tty = up->port.state->port.tty;
 	unsigned char ch, flag;
 	int max_count = 256;
 	int saw_console_brk = 0;
@@ -389,7 +389,7 @@ receive_chars(struct uart_sunsu_port *up, unsigned char *status)
 
 static void transmit_chars(struct uart_sunsu_port *up)
 {
-	struct circ_buf *xmit = &up->port.info->xmit;
+	struct circ_buf *xmit = &up->port.state->xmit;
 	int count;
 
 	if (up->port.x_char) {
@@ -441,7 +441,7 @@ static void check_modem_status(struct uart_sunsu_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.info->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->delta_msr_wait);
 }
 
 static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index e09d3cebb4fb..055034d12b1c 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -328,9 +328,9 @@ sunzilog_receive_chars(struct uart_sunzilog_port *up,
 	unsigned char ch, r1, flag;
 
 	tty = NULL;
-	if (up->port.info != NULL &&		/* Unopened serial console */
-	    up->port.info->port.tty != NULL)	/* Keyboard || mouse */
-		tty = up->port.info->port.tty;
+	if (up->port.state != NULL &&		/* Unopened serial console */
+	    up->port.state->port.tty != NULL)	/* Keyboard || mouse */
+		tty = up->port.state->port.tty;
 
 	for (;;) {
 
@@ -451,7 +451,7 @@ static void sunzilog_status_handle(struct uart_sunzilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.info->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->delta_msr_wait);
 	}
 
 	up->prev_status = status;
@@ -501,9 +501,9 @@ static void sunzilog_transmit_chars(struct uart_sunzilog_port *up,
 		return;
 	}
 
-	if (up->port.info == NULL)
+	if (up->port.state == NULL)
 		goto ack_tx_int;
-	xmit = &up->port.info->xmit;
+	xmit = &up->port.state->xmit;
 	if (uart_circ_empty(xmit))
 		goto ack_tx_int;
 
@@ -705,7 +705,7 @@ static void sunzilog_start_tx(struct uart_port *port)
 		port->icount.tx++;
 		port->x_char = 0;
 	} else {
-		struct circ_buf *xmit = &port->info->xmit;
+		struct circ_buf *xmit = &port->state->xmit;
 
 		writeb(xmit->buf[xmit->tail], &channel->data);
 		ZSDELAY();
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
index 063a313b755c..3d40be6f389f 100644
--- a/drivers/serial/timbuart.c
+++ b/drivers/serial/timbuart.c
@@ -77,7 +77,7 @@ static void timbuart_flush_buffer(struct uart_port *port)
 
 static void timbuart_rx_chars(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	while (ioread32(port->membase + TIMBUART_ISR) & RXDP) {
 		u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
@@ -86,7 +86,7 @@ static void timbuart_rx_chars(struct uart_port *port)
 	}
 
 	spin_unlock(&port->lock);
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 	spin_lock(&port->lock);
 
 	dev_dbg(port->dev, "%s - total read %d bytes\n",
@@ -95,7 +95,7 @@ static void timbuart_rx_chars(struct uart_port *port)
 
 static void timbuart_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	while (!(ioread32(port->membase + TIMBUART_ISR) & TXBF) &&
 		!uart_circ_empty(xmit)) {
@@ -118,7 +118,7 @@ static void timbuart_handle_tx_port(struct uart_port *port, u32 isr, u32 *ier)
 {
 	struct timbuart_port *uart =
 		container_of(port, struct timbuart_port, port);
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
 		return;
@@ -231,7 +231,7 @@ static void timbuart_mctrl_check(struct uart_port *port, u32 isr, u32 *ier)
 		iowrite32(CTS_DELTA, port->membase + TIMBUART_ISR);
 		cts = timbuart_get_mctrl(port);
 		uart_handle_cts_change(port, cts & TIOCM_CTS);
-		wake_up_interruptible(&port->info->delta_msr_wait);
+		wake_up_interruptible(&port->state->delta_msr_wait);
 	}
 
 	*ier |= CTS_DELTA;
diff --git a/drivers/serial/uartlite.c b/drivers/serial/uartlite.c
index 3317148a4b93..5d3a573d6994 100644
--- a/drivers/serial/uartlite.c
+++ b/drivers/serial/uartlite.c
@@ -75,7 +75,7 @@ static struct uart_port ulite_ports[ULITE_NR_UARTS];
 
 static int ulite_receive(struct uart_port *port, int stat)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	unsigned char ch = 0;
 	char flag = TTY_NORMAL;
 
@@ -125,7 +125,7 @@ static int ulite_receive(struct uart_port *port, int stat)
 
 static int ulite_transmit(struct uart_port *port, int stat)
 {
-	struct circ_buf *xmit  = &port->info->xmit;
+	struct circ_buf *xmit  = &port->state->xmit;
 
 	if (stat & ULITE_STATUS_TXFULL)
 		return 0;
@@ -162,7 +162,7 @@ static irqreturn_t ulite_isr(int irq, void *dev_id)
 		busy |= ulite_transmit(port, stat);
 	} while (busy);
 
-	tty_flip_buffer_push(port->info->port.tty);
+	tty_flip_buffer_push(port->state->port.tty);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/ucc_uart.c b/drivers/serial/ucc_uart.c
index e945e780b5c9..0c08f286a2ef 100644
--- a/drivers/serial/ucc_uart.c
+++ b/drivers/serial/ucc_uart.c
@@ -327,7 +327,7 @@ static int qe_uart_tx_pump(struct uart_qe_port *qe_port)
 	unsigned char *p;
 	unsigned int count;
 	struct uart_port *port = &qe_port->port;
-	struct circ_buf *xmit = &port->info->xmit;
+	struct circ_buf *xmit = &port->state->xmit;
 
 	bdp = qe_port->rx_cur;
 
@@ -466,7 +466,7 @@ static void qe_uart_int_rx(struct uart_qe_port *qe_port)
 	int i;
 	unsigned char ch, *cp;
 	struct uart_port *port = &qe_port->port;
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	struct qe_bd *bdp;
 	u16 status;
 	unsigned int flg;
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index dac550e57c29..cf4410e6d53b 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -318,7 +318,7 @@ static inline void receive_chars(struct uart_port *port, uint8_t *status)
 	char flag;
 	int max_count = RX_MAX_COUNT;
 
-	tty = port->info->port.tty;
+	tty = port->state->port.tty;
 	lsr = *status;
 
 	do {
@@ -386,7 +386,7 @@ static inline void check_modem_status(struct uart_port *port)
 	if (msr & UART_MSR_DCTS)
 		uart_handle_cts_change(port, msr & UART_MSR_CTS);
 
-	wake_up_interruptible(&port->info->delta_msr_wait);
+	wake_up_interruptible(&port->state->delta_msr_wait);
 }
 
 static inline void transmit_chars(struct uart_port *port)
@@ -394,7 +394,7 @@ static inline void transmit_chars(struct uart_port *port)
 	struct circ_buf *xmit;
 	int max_count = TX_MAX_COUNT;
 
-	xmit = &port->info->xmit;
+	xmit = &port->state->xmit;
 
 	if (port->x_char) {
 		siu_write(port, UART_TX, port->x_char);
diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c
index d8c2809b1ab6..b9c9fb9198d6 100644
--- a/drivers/serial/zs.c
+++ b/drivers/serial/zs.c
@@ -602,12 +602,12 @@ static void zs_receive_chars(struct zs_port *zport)
 		uart_insert_char(uport, status, Rx_OVR, ch, flag);
 	}
 
-	tty_flip_buffer_push(uport->info->port.tty);
+	tty_flip_buffer_push(uport->state->port.tty);
 }
 
 static void zs_raw_transmit_chars(struct zs_port *zport)
 {
-	struct circ_buf *xmit = &zport->port.info->xmit;
+	struct circ_buf *xmit = &zport->port.state->xmit;
 
 	/* XON/XOFF chars.  */
 	if (zport->port.x_char) {
@@ -686,7 +686,7 @@ static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a)
 			uport->icount.rng++;
 
 		if (delta)
-			wake_up_interruptible(&uport->info->delta_msr_wait);
+			wake_up_interruptible(&uport->state->delta_msr_wait);
 
 		spin_lock(&scc->zlock);
 	}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3cd255f0b211..c1542703fbab 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -186,7 +186,6 @@
 #include <linux/sysrq.h>
 
 struct uart_port;
-struct uart_info;
 struct serial_struct;
 struct device;
 
@@ -284,7 +283,7 @@ struct uart_port {
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-	struct uart_info	*info;			/* pointer to parent info */
+	struct uart_state	*state;			/* pointer to parent state */
 	struct uart_icount	icount;			/* statistics */
 
 	struct console		*cons;			/* struct console, if any */
@@ -343,8 +342,22 @@ struct uart_port {
  */
 typedef unsigned int __bitwise__ uif_t;
 
-struct uart_info {
+
+/*
+ * This is the state information which is persistent across opens.
+ * The low level driver must not to touch any elements contained
+ * within.
+ */
+struct uart_state {
 	struct tty_port		port;
+	unsigned int		close_delay;		/* msec */
+	unsigned int		closing_wait;		/* msec */
+
+#define USF_CLOSING_WAIT_INF	(0)
+#define USF_CLOSING_WAIT_NONE	(~0U)
+
+	int			count;
+	int			pm_state;
 	struct circ_buf		xmit;
 	uif_t			flags;
 
@@ -362,24 +375,7 @@ struct uart_info {
 
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
-};
-
-/*
- * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
- */
-struct uart_state {
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
-
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
-	int			count;
-	int			pm_state;
-	struct uart_info	info;
-	struct uart_port	*port;
+	struct uart_port	*uart_port;
 
 	struct mutex		mutex;
 };
@@ -462,7 +458,7 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 
 static inline int uart_tx_stopped(struct uart_port *port)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 	if(tty->stopped || tty->hw_stopped)
 		return 1;
 	return 0;
@@ -477,7 +473,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
 #ifdef SUPPORT_SYSRQ
 	if (port->sysrq) {
 		if (ch && time_before(jiffies, port->sysrq)) {
-			handle_sysrq(ch, port->info->port.tty);
+			handle_sysrq(ch, port->state->port.tty);
 			port->sysrq = 0;
 			return 1;
 		}
@@ -495,7 +491,7 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch)
  */
 static inline int uart_handle_break(struct uart_port *port)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 #ifdef SUPPORT_SYSRQ
 	if (port->cons && port->cons->index == port->line) {
 		if (!port->sysrq) {
@@ -506,7 +502,7 @@ static inline int uart_handle_break(struct uart_port *port)
 	}
 #endif
 	if (port->flags & UPF_SAK)
-		do_SAK(info->port.tty);
+		do_SAK(state->port.tty);
 	return 0;
 }
 
@@ -518,7 +514,7 @@ static inline int uart_handle_break(struct uart_port *port)
 static inline void
 uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 {
-	struct uart_info *info = port->info;
+	struct uart_state *state = port->state;
 
 	port->icount.dcd++;
 
@@ -527,11 +523,11 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 		hardpps();
 #endif
 
-	if (info->flags & UIF_CHECK_CD) {
+	if (state->flags & UIF_CHECK_CD) {
 		if (status)
-			wake_up_interruptible(&info->port.open_wait);
-		else if (info->port.tty)
-			tty_hangup(info->port.tty);
+			wake_up_interruptible(&state->port.open_wait);
+		else if (state->port.tty)
+			tty_hangup(state->port.tty);
 	}
 }
 
@@ -543,12 +539,12 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
 static inline void
 uart_handle_cts_change(struct uart_port *port, unsigned int status)
 {
-	struct uart_info *info = port->info;
-	struct tty_struct *tty = info->port.tty;
+	struct uart_state *state = port->state;
+	struct tty_struct *tty = state->port.tty;
 
 	port->icount.cts++;
 
-	if (info->flags & UIF_CTS_FLOW) {
+	if (state->flags & UIF_CTS_FLOW) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
@@ -570,7 +566,7 @@ static inline void
 uart_insert_char(struct uart_port *port, unsigned int status,
 		 unsigned int overrun, unsigned int ch, unsigned int flag)
 {
-	struct tty_struct *tty = port->info->port.tty;
+	struct tty_struct *tty = port->state->port.tty;
 
 	if ((status & port->ignore_status_mask & ~overrun) == 0)
 		tty_insert_flip_char(tty, ch, flag);
-- 
cgit v1.2.3


From 5e99df561fc830730d63672d795a0b02ef8cdd6f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:28 -0700
Subject: serial: Fold closing_* fields into the tty_port ones

Remove some more serial specific use

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 26 +++++++++++++-------------
 include/linux/serial_core.h  |  2 --
 include/linux/tty.h          |  4 ++--
 3 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index ea53b6f224b0..2d63b13e2f7e 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -651,10 +651,10 @@ static int uart_get_info(struct uart_state *state,
 	tmp.flags	    = port->flags;
 	tmp.xmit_fifo_size  = port->fifosize;
 	tmp.baud_base	    = port->uartclk / 16;
-	tmp.close_delay	    = state->close_delay / 10;
-	tmp.closing_wait    = state->closing_wait == USF_CLOSING_WAIT_NONE ?
+	tmp.close_delay	    = state->port.close_delay / 10;
+	tmp.closing_wait    = state->port.closing_wait == USF_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
-				state->closing_wait / 10;
+				state->port.closing_wait / 10;
 	tmp.custom_divisor  = port->custom_divisor;
 	tmp.hub6	    = port->hub6;
 	tmp.io_type         = port->iotype;
@@ -724,8 +724,8 @@ static int uart_set_info(struct uart_state *state,
 		retval = -EPERM;
 		if (change_irq || change_port ||
 		    (new_serial.baud_base != port->uartclk / 16) ||
-		    (close_delay != state->close_delay) ||
-		    (closing_wait != state->closing_wait) ||
+		    (close_delay != state->port.close_delay) ||
+		    (closing_wait != state->port.closing_wait) ||
 		    (new_serial.xmit_fifo_size &&
 		     new_serial.xmit_fifo_size != port->fifosize) ||
 		    (((new_flags ^ old_flags) & ~UPF_USR_MASK) != 0))
@@ -834,8 +834,8 @@ static int uart_set_info(struct uart_state *state,
 	port->flags            = (port->flags & ~UPF_CHANGE_MASK) |
 				 (new_flags & UPF_CHANGE_MASK);
 	port->custom_divisor   = new_serial.custom_divisor;
-	state->close_delay     = close_delay;
-	state->closing_wait    = closing_wait;
+	state->port.close_delay     = close_delay;
+	state->port.closing_wait    = closing_wait;
 	if (new_serial.xmit_fifo_size)
 		port->fifosize = new_serial.xmit_fifo_size;
 	if (state->port.tty)
@@ -1297,8 +1297,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 */
 	tty->closing = 1;
 
-	if (state->closing_wait != USF_CLOSING_WAIT_NONE)
-		tty_wait_until_sent(tty, msecs_to_jiffies(state->closing_wait));
+	if (state->port.closing_wait != USF_CLOSING_WAIT_NONE)
+		tty_wait_until_sent(tty, msecs_to_jiffies(state->port.closing_wait));
 
 	/*
 	 * At this point, we stop accepting input.  To do this, we
@@ -1326,8 +1326,8 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	state->port.tty = NULL;
 
 	if (state->port.blocked_open) {
-		if (state->close_delay)
-			msleep_interruptible(state->close_delay);
+		if (state->port.close_delay)
+			msleep_interruptible(state->port.close_delay);
 	} else if (!uart_console(port)) {
 		uart_change_pm(state, 3);
 	}
@@ -2358,11 +2358,11 @@ int uart_register_driver(struct uart_driver *drv)
 	for (i = 0; i < drv->nr; i++) {
 		struct uart_state *state = drv->state + i;
 
-		state->close_delay     = 500;	/* .5 seconds */
-		state->closing_wait    = 30000;	/* 30 seconds */
 		mutex_init(&state->mutex);
 
 		tty_port_init(&state->port);
+		state->port.close_delay     = 500;	/* .5 seconds */
+		state->port.closing_wait    = 30000;	/* 30 seconds */
 		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index c1542703fbab..fd11d4d5a571 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -350,8 +350,6 @@ typedef unsigned int __bitwise__ uif_t;
  */
 struct uart_state {
 	struct tty_port		port;
-	unsigned int		close_delay;		/* msec */
-	unsigned int		closing_wait;		/* msec */
 
 #define USF_CLOSING_WAIT_INF	(0)
 #define USF_CLOSING_WAIT_NONE	(~0U)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index ecb3d1ba3017..9fdc3d84baad 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -206,8 +206,8 @@ struct tty_port {
 	unsigned long		flags;		/* TTY flags ASY_*/
 	struct mutex		mutex;		/* Locking */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-	int			close_delay;	/* Close port delay */
-	int			closing_wait;	/* Delay for output */
+	unsigned int		close_delay;	/* Close port delay */
+	unsigned int		closing_wait;	/* Delay for output */
 	int			drain_delay;	/* Set to zero if no pure time
 						   based drain is needed else
 						   set to size of fifo */
-- 
cgit v1.2.3


From 91312cdb4fcd832341e425f74f49938e0503c929 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:29 -0700
Subject: serial: move count into the tty_port version

Remove more stuff from the serial special case code

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 38 ++++++++++++++++++++------------------
 include/linux/serial_core.h  |  1 -
 2 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 0ffefb3f1d8f..4af3364dd811 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,7 +52,7 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->count + (state)->port.blocked_open)
+#define uart_users(state)	((state)->port.count + (state)->port.blocked_open)
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
@@ -694,7 +694,7 @@ static int uart_set_info(struct uart_state *state,
 			USF_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
 
 	/*
-	 * This semaphore protects state->count.  It is also
+	 * This semaphore protects port->count.  It is also
 	 * very useful to prevent opens.  Also, take the
 	 * port configuration semaphore to make sure that a
 	 * module insertion/removal doesn't change anything
@@ -1272,24 +1272,24 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	if (tty_hung_up_p(filp))
 		goto done;
 
-	if ((tty->count == 1) && (state->count != 1)) {
+	if ((tty->count == 1) && (port->count != 1)) {
 		/*
 		 * Uh, oh.  tty->count is 1, which means that the tty
-		 * structure will be freed.  state->count should always
+		 * structure will be freed.  port->count should always
 		 * be one in these conditions.  If it's greater than
 		 * one, we've got real problems, since it means the
 		 * serial port won't be shutdown.
 		 */
 		printk(KERN_ERR "uart_close: bad serial port count; tty->count is 1, "
-		       "state->count is %d\n", state->count);
-		state->count = 1;
+		       "port->count is %d\n", port->count);
+		port->count = 1;
 	}
-	if (--state->count < 0) {
+	if (--port->count < 0) {
 		printk(KERN_ERR "uart_close: bad serial port count for %s: %d\n",
-		       tty->name, state->count);
-		state->count = 0;
+		       tty->name, port->count);
+		port->count = 0;
 	}
-	if (state->count)
+	if (port->count)
 		goto done;
 
 	/*
@@ -1421,7 +1421,7 @@ static void uart_hangup(struct tty_struct *tty)
 	if (state->flags & UIF_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
-		state->count = 0;
+		port->count = 0;
 		state->flags &= ~UIF_NORMAL_ACTIVE;
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
@@ -1478,7 +1478,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 	unsigned int mctrl;
 
 	port->blocked_open++;
-	state->count--;
+	port->count--;
 
 	add_wait_queue(&port->open_wait, &wait);
 	while (1) {
@@ -1539,7 +1539,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&port->open_wait, &wait);
 
-	state->count++;
+	port->count++;
 	port->blocked_open--;
 
 	if (signal_pending(current))
@@ -1562,7 +1562,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 		goto err;
 	}
 
-	state->count++;
+	state->port.count++;
 	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
@@ -1570,7 +1570,7 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	return state;
 
  err_unlock:
-	state->count--;
+	state->port.count--;
 	mutex_unlock(&state->mutex);
  err:
 	return ERR_PTR(ret);
@@ -1590,6 +1590,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 {
 	struct uart_driver *drv = (struct uart_driver *)tty->driver->driver_state;
 	struct uart_state *state;
+	struct tty_port *port;
 	int retval, line = tty->index;
 
 	BUG_ON(!kernel_locked());
@@ -1617,6 +1618,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 		retval = PTR_ERR(state);
 		goto fail;
 	}
+	port = &state->port;
 
 	/*
 	 * Once we set tty->driver_data here, we are guaranteed that
@@ -1627,14 +1629,14 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	state->uart_port->state = state;
 	tty->low_latency = (state->uart_port->flags & UPF_LOW_LATENCY) ? 1 : 0;
 	tty->alt_speed = 0;
-	state->port.tty = tty;
+	port->tty = tty;
 
 	/*
 	 * If the port is in the middle of closing, bail out now.
 	 */
 	if (tty_hung_up_p(filp)) {
 		retval = -EAGAIN;
-		state->count--;
+		port->count--;
 		mutex_unlock(&state->mutex);
 		goto fail;
 	}
@@ -1642,7 +1644,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Make sure the device is in D0 state.
 	 */
-	if (state->count == 1)
+	if (port->count == 1)
 		uart_change_pm(state, 0);
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index fd11d4d5a571..63ad90966db2 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -354,7 +354,6 @@ struct uart_state {
 #define USF_CLOSING_WAIT_INF	(0)
 #define USF_CLOSING_WAIT_NONE	(~0U)
 
-	int			count;
 	int			pm_state;
 	struct circ_buf		xmit;
 	uif_t			flags;
-- 
cgit v1.2.3


From ccce6debb62d94964e3878f978a56b0f3e32d94f Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:30 -0700
Subject: serial: move the flags into the tty_port field

Fortunately the serial layer was designed to use the same flag values but
with different names. It has its own SUSPENDED flag which is a free slot in
the ASYNC flags so we allocate it in the ASYNC flags instead.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 153 ++++++++++++++++++++++---------------------
 include/linux/serial.h       |   2 +
 include/linux/serial_core.h  |  48 ++++++--------
 3 files changed, 98 insertions(+), 105 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 4af3364dd811..744dec9301fb 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -29,10 +29,10 @@
 #include <linux/console.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <linux/serial_core.h>
 #include <linux/smp_lock.h>
 #include <linux/device.h>
 #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
+#include <linux/serial_core.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 
@@ -146,7 +146,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
 	unsigned long page;
 	int retval = 0;
 
-	if (state->flags & UIF_INITIALIZED)
+	if (port->flags & ASYNC_INITIALIZED)
 		return 0;
 
 	/*
@@ -189,14 +189,14 @@ static int uart_startup(struct uart_state *state, int init_hw)
 				uart_set_mctrl(uport, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (state->flags & UIF_CTS_FLOW) {
+		if (port->flags & ASYNC_CTS_FLOW) {
 			spin_lock_irq(&uport->lock);
 			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
 				port->tty->hw_stopped = 1;
 			spin_unlock_irq(&uport->lock);
 		}
 
-		state->flags |= UIF_INITIALIZED;
+		set_bit(ASYNCB_INITIALIZED, &port->flags);
 
 		clear_bit(TTY_IO_ERROR, &port->tty->flags);
 	}
@@ -214,7 +214,7 @@ static int uart_startup(struct uart_state *state, int init_hw)
  */
 static void uart_shutdown(struct uart_state *state)
 {
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
 	struct tty_struct *tty = state->port.tty;
 
 	/*
@@ -223,14 +223,12 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (state->flags & UIF_INITIALIZED) {
-		state->flags &= ~UIF_INITIALIZED;
-
+	if (test_and_clear_bit(ASYNCB_INITIALIZED, &state->port.flags)) {
 		/*
 		 * Turn off DTR and RTS early.
 		 */
 		if (!tty || (tty->termios->c_cflag & HUPCL))
-			uart_clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+			uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS);
 
 		/*
 		 * clear delta_msr_wait queue to avoid mem leaks: we may free
@@ -244,12 +242,12 @@ static void uart_shutdown(struct uart_state *state)
 		/*
 		 * Free the IRQ and disable the port.
 		 */
-		port->ops->shutdown(port);
+		uport->ops->shutdown(uport);
 
 		/*
 		 * Ensure that the IRQ handler isn't running on another CPU.
 		 */
-		synchronize_irq(port->irq);
+		synchronize_irq(uport->irq);
 	}
 
 	/*
@@ -429,15 +427,16 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void
 uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 {
-	struct tty_struct *tty = state->port.tty;
-	struct uart_port *port = state->uart_port;
+	struct tty_port *port = &state->port;
+	struct tty_struct *tty = port->tty;
+	struct uart_port *uport = state->uart_port;
 	struct ktermios *termios;
 
 	/*
 	 * If we have no tty, termios, or the port does not exist,
 	 * then we can't set the parameters for this port.
 	 */
-	if (!tty || !tty->termios || port->type == PORT_UNKNOWN)
+	if (!tty || !tty->termios || uport->type == PORT_UNKNOWN)
 		return;
 
 	termios = tty->termios;
@@ -446,16 +445,16 @@ uart_change_speed(struct uart_state *state, struct ktermios *old_termios)
 	 * Set flags based on termios cflag
 	 */
 	if (termios->c_cflag & CRTSCTS)
-		state->flags |= UIF_CTS_FLOW;
+		set_bit(ASYNCB_CTS_FLOW, &port->flags);
 	else
-		state->flags &= ~UIF_CTS_FLOW;
+		clear_bit(ASYNCB_CTS_FLOW, &port->flags);
 
 	if (termios->c_cflag & CLOCAL)
-		state->flags &= ~UIF_CHECK_CD;
+		clear_bit(ASYNCB_CHECK_CD, &port->flags);
 	else
-		state->flags |= UIF_CHECK_CD;
+		set_bit(ASYNCB_CHECK_CD, &port->flags);
 
-	port->ops->set_termios(port, termios, old_termios);
+	uport->ops->set_termios(uport, termios, old_termios);
 }
 
 static inline int
@@ -848,7 +847,7 @@ static int uart_set_info(struct uart_state *state,
 	retval = 0;
 	if (uport->type == PORT_UNKNOWN)
 		goto exit;
-	if (state->flags & UIF_INITIALIZED) {
+	if (port->flags & ASYNC_INITIALIZED) {
 		if (((old_flags ^ uport->flags) & UPF_SPD_MASK) ||
 		    old_custom_divisor != uport->custom_divisor) {
 			/*
@@ -1306,7 +1305,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 * At this point, we stop accepting input.  To do this, we
 	 * disable the receive line status interrupts.
 	 */
-	if (state->flags & UIF_INITIALIZED) {
+	if (port->flags & ASYNC_INITIALIZED) {
 		unsigned long flags;
 		spin_lock_irqsave(&port->lock, flags);
 		uport->ops->stop_rx(uport);
@@ -1337,7 +1336,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	/*
 	 * Wake up anyone trying to open this port.
 	 */
-	state->flags &= ~UIF_NORMAL_ACTIVE;
+	clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 	wake_up_interruptible(&port->open_wait);
 
 done:
@@ -1418,11 +1417,11 @@ static void uart_hangup(struct tty_struct *tty)
 	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
 	mutex_lock(&state->mutex);
-	if (state->flags & UIF_NORMAL_ACTIVE) {
+	if (port->flags & ASYNC_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
 		port->count = 0;
-		state->flags &= ~UIF_NORMAL_ACTIVE;
+		clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
 		wake_up_interruptible(&state->delta_msr_wait);
@@ -1493,7 +1492,7 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		/*
 		 * If the port has been closed, tell userspace/restart open.
 		 */
-		if (!(state->flags & UIF_INITIALIZED))
+		if (!(port->flags & ASYNC_INITIALIZED))
 			break;
 
 		/*
@@ -1662,8 +1661,8 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
 	 */
-	if (retval == 0 && !(state->flags & UIF_NORMAL_ACTIVE)) {
-		state->flags |= UIF_NORMAL_ACTIVE;
+	if (retval == 0 && !(port->flags & ASYNC_NORMAL_ACTIVE)) {
+		set_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 
 		uart_update_termios(state);
 	}
@@ -1985,63 +1984,64 @@ static int serial_match_port(struct device *dev, void *data)
 	return dev->devt == devt; /* Actually, only one tty per port */
 }
 
-int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
+int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 	struct device *tty_dev;
-	struct uart_match match = {port, drv};
+	struct uart_match match = {uport, drv};
 
 	mutex_lock(&state->mutex);
 
-	if (!console_suspend_enabled && uart_console(port)) {
+	if (!console_suspend_enabled && uart_console(uport)) {
 		/* we're going to avoid suspending serial console */
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
 
-	tty_dev = device_find_child(port->dev, &match, serial_match_port);
+	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (device_may_wakeup(tty_dev)) {
-		enable_irq_wake(port->irq);
+		enable_irq_wake(uport->irq);
 		put_device(tty_dev);
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
-	port->suspended = 1;
+	uport->suspended = 1;
 
-	if (state->flags & UIF_INITIALIZED) {
-		const struct uart_ops *ops = port->ops;
+	if (port->flags & ASYNC_INITIALIZED) {
+		const struct uart_ops *ops = uport->ops;
 		int tries;
 
-		state->flags = (state->flags & ~UIF_INITIALIZED)
-				     | UIF_SUSPENDED;
+		set_bit(ASYNCB_SUSPENDED, &port->flags);
+		clear_bit(ASYNCB_INITIALIZED, &port->flags);
 
-		spin_lock_irq(&port->lock);
-		ops->stop_tx(port);
-		ops->set_mctrl(port, 0);
-		ops->stop_rx(port);
-		spin_unlock_irq(&port->lock);
+		spin_lock_irq(&uport->lock);
+		ops->stop_tx(uport);
+		ops->set_mctrl(uport, 0);
+		ops->stop_rx(uport);
+		spin_unlock_irq(&uport->lock);
 
 		/*
 		 * Wait for the transmitter to empty.
 		 */
-		for (tries = 3; !ops->tx_empty(port) && tries; tries--)
+		for (tries = 3; !ops->tx_empty(uport) && tries; tries--)
 			msleep(10);
 		if (!tries)
 			printk(KERN_ERR "%s%s%s%d: Unable to drain "
 					"transmitter\n",
-			       port->dev ? dev_name(port->dev) : "",
-			       port->dev ? ": " : "",
+			       uport->dev ? dev_name(uport->dev) : "",
+			       uport->dev ? ": " : "",
 			       drv->dev_name,
-			       drv->tty_driver->name_base + port->line);
+			       drv->tty_driver->name_base + uport->line);
 
-		ops->shutdown(port);
+		ops->shutdown(uport);
 	}
 
 	/*
 	 * Disable the console device before suspending.
 	 */
-	if (uart_console(port))
-		console_stop(port->cons);
+	if (uart_console(uport))
+		console_stop(uport->cons);
 
 	uart_change_pm(state, 3);
 
@@ -2050,67 +2050,68 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *port)
 	return 0;
 }
 
-int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
+int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 	struct device *tty_dev;
-	struct uart_match match = {port, drv};
+	struct uart_match match = {uport, drv};
 
 	mutex_lock(&state->mutex);
 
-	if (!console_suspend_enabled && uart_console(port)) {
+	if (!console_suspend_enabled && uart_console(uport)) {
 		/* no need to resume serial console, it wasn't suspended */
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
 
-	tty_dev = device_find_child(port->dev, &match, serial_match_port);
-	if (!port->suspended && device_may_wakeup(tty_dev)) {
-		disable_irq_wake(port->irq);
+	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
+	if (!uport->suspended && device_may_wakeup(tty_dev)) {
+		disable_irq_wake(uport->irq);
 		mutex_unlock(&state->mutex);
 		return 0;
 	}
-	port->suspended = 0;
+	uport->suspended = 0;
 
 	/*
 	 * Re-enable the console device after suspending.
 	 */
-	if (uart_console(port)) {
+	if (uart_console(uport)) {
 		struct ktermios termios;
 
 		/*
 		 * First try to use the console cflag setting.
 		 */
 		memset(&termios, 0, sizeof(struct ktermios));
-		termios.c_cflag = port->cons->cflag;
+		termios.c_cflag = uport->cons->cflag;
 
 		/*
 		 * If that's unset, use the tty termios setting.
 		 */
-		if (state->port.tty && termios.c_cflag == 0)
-			termios = *state->port.tty->termios;
+		if (port->tty && termios.c_cflag == 0)
+			termios = *port->tty->termios;
 
 		uart_change_pm(state, 0);
-		port->ops->set_termios(port, &termios, NULL);
-		console_start(port->cons);
+		uport->ops->set_termios(uport, &termios, NULL);
+		console_start(uport->cons);
 	}
 
-	if (state->flags & UIF_SUSPENDED) {
-		const struct uart_ops *ops = port->ops;
+	if (port->flags & ASYNC_SUSPENDED) {
+		const struct uart_ops *ops = uport->ops;
 		int ret;
 
 		uart_change_pm(state, 0);
-		spin_lock_irq(&port->lock);
-		ops->set_mctrl(port, 0);
-		spin_unlock_irq(&port->lock);
-		ret = ops->startup(port);
+		spin_lock_irq(&uport->lock);
+		ops->set_mctrl(uport, 0);
+		spin_unlock_irq(&uport->lock);
+		ret = ops->startup(uport);
 		if (ret == 0) {
 			uart_change_speed(state, NULL);
-			spin_lock_irq(&port->lock);
-			ops->set_mctrl(port, port->mctrl);
-			ops->start_tx(port);
-			spin_unlock_irq(&port->lock);
-			state->flags |= UIF_INITIALIZED;
+			spin_lock_irq(&uport->lock);
+			ops->set_mctrl(uport, uport->mctrl);
+			ops->start_tx(uport);
+			spin_unlock_irq(&uport->lock);
+			set_bit(ASYNCB_INITIALIZED, &port->flags);
 		} else {
 			/*
 			 * Failed to resume - maybe hardware went away?
@@ -2120,7 +2121,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
 			uart_shutdown(state);
 		}
 
-		state->flags &= ~UIF_SUSPENDED;
+		clear_bit(ASYNCB_SUSPENDED, &port->flags);
 	}
 
 	mutex_unlock(&state->mutex);
diff --git a/include/linux/serial.h b/include/linux/serial.h
index e5bb75a63802..c8613c3ff9d3 100644
--- a/include/linux/serial.h
+++ b/include/linux/serial.h
@@ -122,6 +122,7 @@ struct serial_uart_config {
 
 /* Internal flags used only by kernel */
 #define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
+#define ASYNCB_SUSPENDED	30 /* Serial port is suspended */
 #define ASYNCB_NORMAL_ACTIVE	29 /* Normal device is active */
 #define ASYNCB_BOOT_AUTOCONF	28 /* Autoconfigure port on bootup */
 #define ASYNCB_CLOSING		27 /* Serial port is closing */
@@ -133,6 +134,7 @@ struct serial_uart_config {
 #define ASYNCB_FIRST_KERNEL	22
 
 #define ASYNC_HUP_NOTIFY	(1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_SUSPENDED		(1U << ASYNCB_SUSPENDED)
 #define ASYNC_FOURPORT		(1U << ASYNCB_FOURPORT)
 #define ASYNC_SAK		(1U << ASYNCB_SAK)
 #define ASYNC_SPLIT_TERMIOS	(1U << ASYNCB_SPLIT_TERMIOS)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 63ad90966db2..284ef7f54890 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -20,6 +20,8 @@
 #ifndef LINUX_SERIAL_CORE_H
 #define LINUX_SERIAL_CORE_H
 
+#include <linux/serial.h>
+
 /*
  * The type definitions.  These are from Ted Ts'o's serial.h
  */
@@ -356,19 +358,6 @@ struct uart_state {
 
 	int			pm_state;
 	struct circ_buf		xmit;
-	uif_t			flags;
-
-/*
- * Definitions for info->flags.  These are _private_ to serial_core, and
- * are specific to this structure.  They may be queried by low level drivers.
- *
- * FIXME: use the ASY_ definitions
- */
-#define UIF_CHECK_CD		((__force uif_t) (1 << 25))
-#define UIF_CTS_FLOW		((__force uif_t) (1 << 26))
-#define UIF_NORMAL_ACTIVE	((__force uif_t) (1 << 29))
-#define UIF_INITIALIZED		((__force uif_t) (1 << 31))
-#define UIF_SUSPENDED		((__force uif_t) (1 << 30))
 
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
@@ -509,22 +498,23 @@ static inline int uart_handle_break(struct uart_port *port)
  *	@status: new carrier detect status, nonzero if active
  */
 static inline void
-uart_handle_dcd_change(struct uart_port *port, unsigned int status)
+uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 {
-	struct uart_state *state = port->state;
+	struct uart_state *state = uport->state;
+	struct tty_port *port = &state->port;
 
-	port->icount.dcd++;
+	uport->icount.dcd++;
 
 #ifdef CONFIG_HARD_PPS
-	if ((port->flags & UPF_HARDPPS_CD) && status)
+	if ((uport->flags & UPF_HARDPPS_CD) && status)
 		hardpps();
 #endif
 
-	if (state->flags & UIF_CHECK_CD) {
+	if (port->flags & ASYNC_CHECK_CD) {
 		if (status)
-			wake_up_interruptible(&state->port.open_wait);
-		else if (state->port.tty)
-			tty_hangup(state->port.tty);
+			wake_up_interruptible(&port->open_wait);
+		else if (port->tty)
+			tty_hangup(port->tty);
 	}
 }
 
@@ -534,24 +524,24 @@ uart_handle_dcd_change(struct uart_port *port, unsigned int status)
  *	@status: new clear to send status, nonzero if active
  */
 static inline void
-uart_handle_cts_change(struct uart_port *port, unsigned int status)
+uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct uart_state *state = port->state;
-	struct tty_struct *tty = state->port.tty;
+	struct tty_port *port = &uport->state->port;
+	struct tty_struct *tty = port->tty;
 
-	port->icount.cts++;
+	uport->icount.cts++;
 
-	if (state->flags & UIF_CTS_FLOW) {
+	if (port->flags & ASYNC_CTS_FLOW) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
-				port->ops->start_tx(port);
-				uart_write_wakeup(port);
+				uport->ops->start_tx(uport);
+				uart_write_wakeup(uport);
 			}
 		} else {
 			if (!status) {
 				tty->hw_stopped = 1;
-				port->ops->stop_tx(port);
+				uport->ops->stop_tx(uport);
 			}
 		}
 	}
-- 
cgit v1.2.3


From a03006860d272eac5a8ebf23f04f54c7e1e783a5 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:30 -0700
Subject: serial: kill off uif_t

This typedef is now extinct

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/serial_core.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 284ef7f54890..f98dc78abb27 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -336,19 +336,8 @@ struct uart_port {
 	void			*private_data;		/* generic platform data pointer */
 };
 
-/*
- * This is the state information which is only valid when the port
- * is open; it may be cleared the core driver once the device has
- * been closed.  Either the low level driver or the core can modify
- * stuff here.
- */
-typedef unsigned int __bitwise__ uif_t;
-
-
 /*
  * This is the state information which is persistent across opens.
- * The low level driver must not to touch any elements contained
- * within.
  */
 struct uart_state {
 	struct tty_port		port;
-- 
cgit v1.2.3


From a2bceae065ed8c4f552b35c4dde4cc2db05ce9e3 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:31 -0700
Subject: serial: replace the state mutex with the tty port mutex

They cover essentially the same stuff and we can therefore fold it into the
tty_port one.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/pmac_zilog.c  |   8 +-
 drivers/serial/serial_core.c | 227 +++++++++++++++++++++++--------------------
 include/linux/serial_core.h  |   2 -
 3 files changed, 123 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index ab4c85ba3549..0dc786835dca 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -1645,7 +1645,7 @@ static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state)
 	state = pmz_uart_reg.state + uap->port.line;
 
 	mutex_lock(&pmz_irq_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&state->port.mutex);
 
 	spin_lock_irqsave(&uap->port.lock, flags);
 
@@ -1676,7 +1676,7 @@ static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state)
 	/* Shut the chip down */
 	pmz_set_scc_power(uap, 0);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&state->port.mutex);
 	mutex_unlock(&pmz_irq_mutex);
 
 	pmz_debug("suspend, switching complete\n");
@@ -1705,7 +1705,7 @@ static int pmz_resume(struct macio_dev *mdev)
 	state = pmz_uart_reg.state + uap->port.line;
 
 	mutex_lock(&pmz_irq_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&state->port.mutex);
 
 	spin_lock_irqsave(&uap->port.lock, flags);
 	if (!ZS_IS_OPEN(uap) && !ZS_IS_CONS(uap)) {
@@ -1737,7 +1737,7 @@ static int pmz_resume(struct macio_dev *mdev)
 	}
 
  bail:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&state->port.mutex);
 	mutex_unlock(&pmz_irq_mutex);
 
 	/* Right now, we deal with delay by blocking here, I'll be
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 744dec9301fb..9d42e57e1971 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -633,35 +633,36 @@ static void uart_unthrottle(struct tty_struct *tty)
 static int uart_get_info(struct uart_state *state,
 			 struct serial_struct __user *retinfo)
 {
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	struct serial_struct tmp;
 
 	memset(&tmp, 0, sizeof(tmp));
 
 	/* Ensure the state we copy is consistent and no hardware changes
 	   occur as we go */
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
-	tmp.type	    = port->type;
-	tmp.line	    = port->line;
-	tmp.port	    = port->iobase;
+	tmp.type	    = uport->type;
+	tmp.line	    = uport->line;
+	tmp.port	    = uport->iobase;
 	if (HIGH_BITS_OFFSET)
-		tmp.port_high = (long) port->iobase >> HIGH_BITS_OFFSET;
-	tmp.irq		    = port->irq;
-	tmp.flags	    = port->flags;
-	tmp.xmit_fifo_size  = port->fifosize;
-	tmp.baud_base	    = port->uartclk / 16;
-	tmp.close_delay	    = state->port.close_delay / 10;
-	tmp.closing_wait    = state->port.closing_wait == USF_CLOSING_WAIT_NONE ?
+		tmp.port_high = (long) uport->iobase >> HIGH_BITS_OFFSET;
+	tmp.irq		    = uport->irq;
+	tmp.flags	    = uport->flags;
+	tmp.xmit_fifo_size  = uport->fifosize;
+	tmp.baud_base	    = uport->uartclk / 16;
+	tmp.close_delay	    = port->close_delay / 10;
+	tmp.closing_wait    = port->closing_wait == USF_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
-				state->port.closing_wait / 10;
-	tmp.custom_divisor  = port->custom_divisor;
-	tmp.hub6	    = port->hub6;
-	tmp.io_type         = port->iotype;
-	tmp.iomem_reg_shift = port->regshift;
-	tmp.iomem_base      = (void *)(unsigned long)port->mapbase;
+				port->closing_wait / 10;
+	tmp.custom_divisor  = uport->custom_divisor;
+	tmp.hub6	    = uport->hub6;
+	tmp.io_type         = uport->iotype;
+	tmp.iomem_reg_shift = uport->regshift;
+	tmp.iomem_base      = (void *)(unsigned long)uport->mapbase;
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	if (copy_to_user(retinfo, &tmp, sizeof(*retinfo)))
 		return -EFAULT;
@@ -699,7 +700,7 @@ static int uart_set_info(struct uart_state *state,
 	 * module insertion/removal doesn't change anything
 	 * under us.
 	 */
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	change_irq  = !(uport->flags & UPF_FIXED_PORT)
 		&& new_serial.irq != uport->irq;
@@ -867,7 +868,7 @@ static int uart_set_info(struct uart_state *state,
 	} else
 		retval = uart_startup(state, 1);
  exit:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return retval;
 }
 
@@ -902,10 +903,11 @@ static int uart_get_lsr_info(struct uart_state *state,
 static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 	int result = -EIO;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if ((!file || !tty_hung_up_p(file)) &&
 	    !(tty->flags & (1 << TTY_IO_ERROR))) {
 		result = uport->mctrl;
@@ -914,7 +916,7 @@ static int uart_tiocmget(struct tty_struct *tty, struct file *file)
 		result |= uport->ops->get_mctrl(uport);
 		spin_unlock_irq(&uport->lock);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return result;
 }
@@ -925,35 +927,38 @@ uart_tiocmset(struct tty_struct *tty, struct file *file,
 {
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	int ret = -EIO;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if ((!file || !tty_hung_up_p(file)) &&
 	    !(tty->flags & (1 << TTY_IO_ERROR))) {
 		uart_update_mctrl(uport, set, clear);
 		ret = 0;
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return ret;
 }
 
 static int uart_break_ctl(struct tty_struct *tty, int break_state)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (uport->type != PORT_UNKNOWN)
 		uport->ops->break_ctl(uport, break_state);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return 0;
 }
 
 static int uart_do_autoconfig(struct uart_state *state)
 {
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	int flags, ret;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -964,7 +969,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 	 * changing, and hence any extra opens of the port while
 	 * we're auto-configuring.
 	 */
-	if (mutex_lock_interruptible(&state->mutex))
+	if (mutex_lock_interruptible(&port->mutex))
 		return -ERESTARTSYS;
 
 	ret = -EBUSY;
@@ -990,7 +995,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 
 		ret = uart_startup(state, 1);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	return ret;
 }
 
@@ -1093,6 +1098,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	   unsigned long arg)
 {
 	struct uart_state *state = tty->driver_data;
+	struct tty_port *port = &state->port;
 	void __user *uarg = (void __user *)arg;
 	int ret = -ENOIOCTLCMD;
 
@@ -1143,7 +1149,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	if (ret != -ENOIOCTLCMD)
 		goto out;
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (tty_hung_up_p(filp)) {
 		ret = -EIO;
@@ -1167,7 +1173,7 @@ uart_ioctl(struct tty_struct *tty, struct file *filp, unsigned int cmd,
 	}
 	}
 out_up:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 out:
 	return ret;
 }
@@ -1266,7 +1272,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 
 	pr_debug("uart_close(%d) called\n", uport->line);
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (tty_hung_up_p(filp))
 		goto done;
@@ -1340,7 +1346,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	wake_up_interruptible(&port->open_wait);
 
 done:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 }
 
 static void uart_wait_until_sent(struct tty_struct *tty, int timeout)
@@ -1416,7 +1422,7 @@ static void uart_hangup(struct tty_struct *tty)
 	BUG_ON(!kernel_locked());
 	pr_debug("uart_hangup(%d)\n", state->uart_port->line);
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if (port->flags & ASYNC_NORMAL_ACTIVE) {
 		uart_flush_buffer(tty);
 		uart_shutdown(state);
@@ -1426,7 +1432,7 @@ static void uart_hangup(struct tty_struct *tty)
 		wake_up_interruptible(&port->open_wait);
 		wake_up_interruptible(&state->delta_msr_wait);
 	}
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 }
 
 /*
@@ -1528,9 +1534,9 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 		if (mctrl & TIOCM_CAR)
 			break;
 
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		schedule();
-		mutex_lock(&state->mutex);
+		mutex_lock(&port->mutex);
 
 		if (signal_pending(current))
 			break;
@@ -1553,15 +1559,17 @@ uart_block_til_ready(struct file *filp, struct uart_state *state)
 static struct uart_state *uart_get(struct uart_driver *drv, int line)
 {
 	struct uart_state *state;
+	struct tty_port *port;
 	int ret = 0;
 
 	state = drv->state + line;
-	if (mutex_lock_interruptible(&state->mutex)) {
+	port = &state->port;
+	if (mutex_lock_interruptible(&port->mutex)) {
 		ret = -ERESTARTSYS;
 		goto err;
 	}
 
-	state->port.count++;
+	port->count++;
 	if (!state->uart_port || state->uart_port->flags & UPF_DEAD) {
 		ret = -ENXIO;
 		goto err_unlock;
@@ -1569,8 +1577,8 @@ static struct uart_state *uart_get(struct uart_driver *drv, int line)
 	return state;
 
  err_unlock:
-	state->port.count--;
-	mutex_unlock(&state->mutex);
+	port->count--;
+	mutex_unlock(&port->mutex);
  err:
 	return ERR_PTR(ret);
 }
@@ -1636,7 +1644,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	if (tty_hung_up_p(filp)) {
 		retval = -EAGAIN;
 		port->count--;
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		goto fail;
 	}
 
@@ -1656,7 +1664,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 	 */
 	if (retval == 0)
 		retval = uart_block_til_ready(filp, state);
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	/*
 	 * If this is the first open to succeed, adjust things to suit.
@@ -1667,7 +1675,7 @@ static int uart_open(struct tty_struct *tty, struct file *filp)
 		uart_update_termios(state);
 	}
 
- fail:
+fail:
 	return retval;
 }
 
@@ -1689,57 +1697,58 @@ static const char *uart_type(struct uart_port *port)
 static void uart_line_info(struct seq_file *m, struct uart_driver *drv, int i)
 {
 	struct uart_state *state = drv->state + i;
+	struct tty_port *port = &state->port;
 	int pm_state;
-	struct uart_port *port = state->uart_port;
+	struct uart_port *uport = state->uart_port;
 	char stat_buf[32];
 	unsigned int status;
 	int mmio;
 
-	if (!port)
+	if (!uport)
 		return;
 
-	mmio = port->iotype >= UPIO_MEM;
+	mmio = uport->iotype >= UPIO_MEM;
 	seq_printf(m, "%d: uart:%s %s%08llX irq:%d",
-			port->line, uart_type(port),
+			uport->line, uart_type(uport),
 			mmio ? "mmio:0x" : "port:",
-			mmio ? (unsigned long long)port->mapbase
-			     : (unsigned long long) port->iobase,
-			port->irq);
+			mmio ? (unsigned long long)uport->mapbase
+			     : (unsigned long long)uport->iobase,
+			uport->irq);
 
-	if (port->type == PORT_UNKNOWN) {
+	if (uport->type == PORT_UNKNOWN) {
 		seq_putc(m, '\n');
 		return;
 	}
 
 	if (capable(CAP_SYS_ADMIN)) {
-		mutex_lock(&state->mutex);
+		mutex_lock(&port->mutex);
 		pm_state = state->pm_state;
 		if (pm_state)
 			uart_change_pm(state, 0);
-		spin_lock_irq(&port->lock);
-		status = port->ops->get_mctrl(port);
-		spin_unlock_irq(&port->lock);
+		spin_lock_irq(&uport->lock);
+		status = uport->ops->get_mctrl(uport);
+		spin_unlock_irq(&uport->lock);
 		if (pm_state)
 			uart_change_pm(state, pm_state);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 
 		seq_printf(m, " tx:%d rx:%d",
-				port->icount.tx, port->icount.rx);
-		if (port->icount.frame)
+				uport->icount.tx, uport->icount.rx);
+		if (uport->icount.frame)
 			seq_printf(m, " fe:%d",
-				port->icount.frame);
-		if (port->icount.parity)
+				uport->icount.frame);
+		if (uport->icount.parity)
 			seq_printf(m, " pe:%d",
-				port->icount.parity);
-		if (port->icount.brk)
+				uport->icount.parity);
+		if (uport->icount.brk)
 			seq_printf(m, " brk:%d",
-				port->icount.brk);
-		if (port->icount.overrun)
+				uport->icount.brk);
+		if (uport->icount.overrun)
 			seq_printf(m, " oe:%d",
-				port->icount.overrun);
+				uport->icount.overrun);
 
 #define INFOBIT(bit, str) \
-	if (port->mctrl & (bit)) \
+	if (uport->mctrl & (bit)) \
 		strncat(stat_buf, (str), sizeof(stat_buf) - \
 			strlen(stat_buf) - 2)
 #define STATBIT(bit, str) \
@@ -1991,11 +2000,11 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	struct device *tty_dev;
 	struct uart_match match = {uport, drv};
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (!console_suspend_enabled && uart_console(uport)) {
 		/* we're going to avoid suspending serial console */
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 
@@ -2003,7 +2012,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 	if (device_may_wakeup(tty_dev)) {
 		enable_irq_wake(uport->irq);
 		put_device(tty_dev);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 	uport->suspended = 1;
@@ -2045,7 +2054,7 @@ int uart_suspend_port(struct uart_driver *drv, struct uart_port *uport)
 
 	uart_change_pm(state, 3);
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return 0;
 }
@@ -2057,18 +2066,18 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 	struct device *tty_dev;
 	struct uart_match match = {uport, drv};
 
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 
 	if (!console_suspend_enabled && uart_console(uport)) {
 		/* no need to resume serial console, it wasn't suspended */
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 
 	tty_dev = device_find_child(uport->dev, &match, serial_match_port);
 	if (!uport->suspended && device_may_wakeup(tty_dev)) {
 		disable_irq_wake(uport->irq);
-		mutex_unlock(&state->mutex);
+		mutex_unlock(&port->mutex);
 		return 0;
 	}
 	uport->suspended = 0;
@@ -2124,7 +2133,7 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport)
 		clear_bit(ASYNCB_SUSPENDED, &port->flags);
 	}
 
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 
 	return 0;
 }
@@ -2364,12 +2373,11 @@ int uart_register_driver(struct uart_driver *drv)
 	 */
 	for (i = 0; i < drv->nr; i++) {
 		struct uart_state *state = drv->state + i;
+		struct tty_port *port = &state->port;
 
-		mutex_init(&state->mutex);
-
-		tty_port_init(&state->port);
-		state->port.close_delay     = 500;	/* .5 seconds */
-		state->port.closing_wait    = 30000;	/* 30 seconds */
+		tty_port_init(port);
+		port->close_delay     = 500;	/* .5 seconds */
+		port->closing_wait    = 30000;	/* 30 seconds */
 		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
@@ -2419,62 +2427,64 @@ struct tty_driver *uart_console_device(struct console *co, int *index)
  *	level uart drivers to expand uart_port, rather than having yet
  *	more levels of structures.
  */
-int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
+int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
 	struct uart_state *state;
+	struct tty_port *port;
 	int ret = 0;
 	struct device *tty_dev;
 
 	BUG_ON(in_interrupt());
 
-	if (port->line >= drv->nr)
+	if (uport->line >= drv->nr)
 		return -EINVAL;
 
-	state = drv->state + port->line;
+	state = drv->state + uport->line;
+	port = &state->port;
 
 	mutex_lock(&port_mutex);
-	mutex_lock(&state->mutex);
+	mutex_lock(&port->mutex);
 	if (state->uart_port) {
 		ret = -EINVAL;
 		goto out;
 	}
 
-	state->uart_port = port;
+	state->uart_port = uport;
 	state->pm_state = -1;
 
-	port->cons = drv->cons;
-	port->state = state;
+	uport->cons = drv->cons;
+	uport->state = state;
 
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
 	 */
-	if (!(uart_console(port) && (port->cons->flags & CON_ENABLED))) {
-		spin_lock_init(&port->lock);
-		lockdep_set_class(&port->lock, &port_lock_key);
+	if (!(uart_console(uport) && (uport->cons->flags & CON_ENABLED))) {
+		spin_lock_init(&uport->lock);
+		lockdep_set_class(&uport->lock, &port_lock_key);
 	}
 
-	uart_configure_port(drv, state, port);
+	uart_configure_port(drv, state, uport);
 
 	/*
 	 * Register the port whether it's detected or not.  This allows
 	 * setserial to be used to alter this ports parameters.
 	 */
-	tty_dev = tty_register_device(drv->tty_driver, port->line, port->dev);
+	tty_dev = tty_register_device(drv->tty_driver, uport->line, uport->dev);
 	if (likely(!IS_ERR(tty_dev))) {
 		device_init_wakeup(tty_dev, 1);
 		device_set_wakeup_enable(tty_dev, 0);
 	} else
 		printk(KERN_ERR "Cannot register tty device on line %d\n",
-		       port->line);
+		       uport->line);
 
 	/*
 	 * Ensure UPF_DEAD is not set.
 	 */
-	port->flags &= ~UPF_DEAD;
+	uport->flags &= ~UPF_DEAD;
 
  out:
-	mutex_unlock(&state->mutex);
+	mutex_unlock(&port->mutex);
 	mutex_unlock(&port_mutex);
 
 	return ret;
@@ -2489,15 +2499,16 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *port)
  *	core driver.  No further calls will be made to the low-level code
  *	for this port.
  */
-int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
+int uart_remove_one_port(struct uart_driver *drv, struct uart_port *uport)
 {
-	struct uart_state *state = drv->state + port->line;
+	struct uart_state *state = drv->state + uport->line;
+	struct tty_port *port = &state->port;
 
 	BUG_ON(in_interrupt());
 
-	if (state->uart_port != port)
+	if (state->uart_port != uport)
 		printk(KERN_ALERT "Removing wrong port: %p != %p\n",
-			state->uart_port, port);
+			state->uart_port, uport);
 
 	mutex_lock(&port_mutex);
 
@@ -2505,28 +2516,28 @@ int uart_remove_one_port(struct uart_driver *drv, struct uart_port *port)
 	 * Mark the port "dead" - this prevents any opens from
 	 * succeeding while we shut down the port.
 	 */
-	mutex_lock(&state->mutex);
-	port->flags |= UPF_DEAD;
-	mutex_unlock(&state->mutex);
+	mutex_lock(&port->mutex);
+	uport->flags |= UPF_DEAD;
+	mutex_unlock(&port->mutex);
 
 	/*
 	 * Remove the devices from the tty layer
 	 */
-	tty_unregister_device(drv->tty_driver, port->line);
+	tty_unregister_device(drv->tty_driver, uport->line);
 
-	if (state->port.tty)
-		tty_vhangup(state->port.tty);
+	if (port->tty)
+		tty_vhangup(port->tty);
 
 	/*
 	 * Free the port IO and memory resources, if any.
 	 */
-	if (port->type != PORT_UNKNOWN)
-		port->ops->release_port(port);
+	if (uport->type != PORT_UNKNOWN)
+		uport->ops->release_port(uport);
 
 	/*
 	 * Indicate that there isn't a port here anymore.
 	 */
-	port->type = PORT_UNKNOWN;
+	uport->type = PORT_UNKNOWN;
 
 	/*
 	 * Kill the tasklet, and free resources.
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index f98dc78abb27..27767ea5fa29 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -351,8 +351,6 @@ struct uart_state {
 	struct tasklet_struct	tlet;
 	wait_queue_head_t	delta_msr_wait;
 	struct uart_port	*uart_port;
-
-	struct mutex		mutex;
 };
 
 #define UART_XMIT_SIZE	PAGE_SIZE
-- 
cgit v1.2.3


From bdc04e3174e18f475289fa8f4144f66686326b7e Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:31 -0700
Subject: serial: move delta_msr_wait into the tty_port

This is used by various drivers not just serial and can be extracted
as commonality

Signed-off-by: Alan Cox <alan@linux.intel.com>
---
 drivers/char/cyclades.c         |  9 ++++-----
 drivers/char/esp.c              |  7 +++----
 drivers/char/mxser.c            |  8 +++-----
 drivers/char/tty_port.c         |  2 ++
 drivers/serial/8250.c           |  2 +-
 drivers/serial/amba-pl010.c     |  2 +-
 drivers/serial/amba-pl011.c     |  2 +-
 drivers/serial/atmel_serial.c   |  2 +-
 drivers/serial/icom.c           |  2 +-
 drivers/serial/imx.c            |  4 ++--
 drivers/serial/ioc3_serial.c    |  8 ++++----
 drivers/serial/ioc4_serial.c    |  8 ++++----
 drivers/serial/ip22zilog.c      |  2 +-
 drivers/serial/msm_serial.c     |  2 +-
 drivers/serial/pmac_zilog.c     |  2 +-
 drivers/serial/pnx8xxx_uart.c   |  2 +-
 drivers/serial/pxa.c            |  2 +-
 drivers/serial/sa1100.c         |  2 +-
 drivers/serial/sb1250-duart.c   |  2 +-
 drivers/serial/serial_core.c    | 18 +++++++++++-------
 drivers/serial/serial_ks8695.c  |  2 +-
 drivers/serial/serial_lh7a40x.c |  2 +-
 drivers/serial/sunsab.c         |  2 +-
 drivers/serial/sunsu.c          |  2 +-
 drivers/serial/sunzilog.c       |  2 +-
 drivers/serial/timbuart.c       |  2 +-
 drivers/serial/vr41xx_siu.c     |  2 +-
 drivers/serial/zs.c             |  2 +-
 include/linux/cyclades.h        |  1 -
 include/linux/hayesesp.h        |  1 -
 include/linux/serial_core.h     |  1 -
 include/linux/tty.h             |  1 +
 32 files changed, 54 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/char/cyclades.c b/drivers/char/cyclades.c
index 70bd61b2a7d7..df5038bbcbc2 100644
--- a/drivers/char/cyclades.c
+++ b/drivers/char/cyclades.c
@@ -729,7 +729,7 @@ static void cyy_chip_modem(struct cyclades_card *cinfo, int chip,
 		if (mdm_change & CyRI)
 			info->icount.rng++;
 
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 	}
 
 	if ((mdm_change & CyDCD) && (info->port.flags & ASYNC_CHECK_CD)) {
@@ -1197,7 +1197,7 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
 			break;
 		}
 		if (delta_count)
-			wake_up_interruptible(&info->delta_msr_wait);
+			wake_up_interruptible(&info->port.delta_msr_wait);
 		if (special_count)
 			tty_schedule_flip(tty);
 		tty_kref_put(tty);
@@ -1464,7 +1464,7 @@ static void cy_shutdown(struct cyclades_port *info, struct tty_struct *tty)
 		spin_lock_irqsave(&card->card_lock, flags);
 
 		/* Clear delta_msr_wait queue to avoid mem leaks. */
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 
 		if (info->port.xmit_buf) {
 			unsigned char *temp;
@@ -2788,7 +2788,7 @@ cy_ioctl(struct tty_struct *tty, struct file *file,
 		/* note the counters on entry */
 		cnow = info->icount;
 		spin_unlock_irqrestore(&info->card->card_lock, flags);
-		ret_val = wait_event_interruptible(info->delta_msr_wait,
+		ret_val = wait_event_interruptible(info->port.delta_msr_wait,
 				cy_cflags_changed(info, arg, &cnow));
 		break;
 
@@ -3153,7 +3153,6 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
 		info->port.close_delay = 5 * HZ / 10;
 		info->port.flags = STD_COM_FLAGS;
 		init_completion(&info->shutdown_wait);
-		init_waitqueue_head(&info->delta_msr_wait);
 
 		if (cy_is_Z(cinfo)) {
 			struct FIRM_ID *firm_id = cinfo->base_addr + ID_ADDRESS;
diff --git a/drivers/char/esp.c b/drivers/char/esp.c
index a5c59fc2b0ff..b19d43cd9542 100644
--- a/drivers/char/esp.c
+++ b/drivers/char/esp.c
@@ -572,7 +572,7 @@ static void check_modem_status(struct esp_struct *info)
 			info->icount.dcd++;
 		if (status & UART_MSR_DCTS)
 			info->icount.cts++;
-		wake_up_interruptible(&info->delta_msr_wait);
+		wake_up_interruptible(&info->port.delta_msr_wait);
 	}
 
 	if ((info->port.flags & ASYNC_CHECK_CD) && (status & UART_MSR_DDCD)) {
@@ -927,7 +927,7 @@ static void shutdown(struct esp_struct *info)
 	 * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
 	 * here so the queue might never be waken up
 	 */
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&info->port.delta_msr_wait);
 	wake_up_interruptible(&info->break_wait);
 
 	/* stop a DMA transfer on the port being closed */
@@ -1800,7 +1800,7 @@ static int rs_ioctl(struct tty_struct *tty, struct file *file,
 		spin_unlock_irqrestore(&info->lock, flags);
 		while (1) {
 			/* FIXME: convert to new style wakeup */
-			interruptible_sleep_on(&info->delta_msr_wait);
+			interruptible_sleep_on(&info->port.delta_msr_wait);
 			/* see if a signal did it */
 			if (signal_pending(current))
 				return -ERESTARTSYS;
@@ -2452,7 +2452,6 @@ static int __init espserial_init(void)
 		info->config.flow_off = flow_off;
 		info->config.pio_threshold = pio_threshold;
 		info->next_port = ports;
-		init_waitqueue_head(&info->delta_msr_wait);
 		init_waitqueue_head(&info->break_wait);
 		ports = info;
 		printk(KERN_INFO "ttyP%d at 0x%04x (irq = %d) is an ESP ",
diff --git a/drivers/char/mxser.c b/drivers/char/mxser.c
index 30544ca5e956..37058ff7da7d 100644
--- a/drivers/char/mxser.c
+++ b/drivers/char/mxser.c
@@ -258,7 +258,6 @@ struct mxser_port {
 	struct mxser_mon mon_data;
 
 	spinlock_t slock;
-	wait_queue_head_t delta_msr_wait;
 };
 
 struct mxser_board {
@@ -818,7 +817,7 @@ static void mxser_check_modem_status(struct tty_struct *tty,
 	if (status & UART_MSR_DCTS)
 		port->icount.cts++;
 	port->mon_data.modem_status = status;
-	wake_up_interruptible(&port->delta_msr_wait);
+	wake_up_interruptible(&port->port.delta_msr_wait);
 
 	if ((port->port.flags & ASYNC_CHECK_CD) && (status & UART_MSR_DDCD)) {
 		if (status & UART_MSR_DCD)
@@ -973,7 +972,7 @@ static void mxser_shutdown(struct tty_struct *tty)
 	 * clear delta_msr_wait queue to avoid mem leaks: we may free the irq
 	 * here so the queue might never be waken up
 	 */
-	wake_up_interruptible(&info->delta_msr_wait);
+	wake_up_interruptible(&info->port.delta_msr_wait);
 
 	/*
 	 * Free the IRQ, if necessary
@@ -1762,7 +1761,7 @@ static int mxser_ioctl(struct tty_struct *tty, struct file *file,
 		cnow = info->icount;	/* note the counters on entry */
 		spin_unlock_irqrestore(&info->slock, flags);
 
-		return wait_event_interruptible(info->delta_msr_wait,
+		return wait_event_interruptible(info->port.delta_msr_wait,
 				mxser_cflags_changed(info, arg, &cnow));
 	/*
 	 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
@@ -2414,7 +2413,6 @@ static int __devinit mxser_initbrd(struct mxser_board *brd,
 		info->port.close_delay = 5 * HZ / 10;
 		info->port.closing_wait = 30 * HZ;
 		info->normal_termios = mxvar_sdriver->init_termios;
-		init_waitqueue_head(&info->delta_msr_wait);
 		memset(&info->mon_data, 0, sizeof(struct mxser_mon));
 		info->err_shadow = 0;
 		spin_lock_init(&info->slock);
diff --git a/drivers/char/tty_port.c b/drivers/char/tty_port.c
index 549bd0fa8bb6..c767e30a1425 100644
--- a/drivers/char/tty_port.c
+++ b/drivers/char/tty_port.c
@@ -23,6 +23,7 @@ void tty_port_init(struct tty_port *port)
 	memset(port, 0, sizeof(*port));
 	init_waitqueue_head(&port->open_wait);
 	init_waitqueue_head(&port->close_wait);
+	init_waitqueue_head(&port->delta_msr_wait);
 	mutex_init(&port->mutex);
 	spin_lock_init(&port->lock);
 	port->close_delay = (50 * HZ) / 100;
@@ -124,6 +125,7 @@ void tty_port_hangup(struct tty_port *port)
 	port->tty = NULL;
 	spin_unlock_irqrestore(&port->lock, flags);
 	wake_up_interruptible(&port->open_wait);
+	wake_up_interruptible(&port->delta_msr_wait);
 	tty_port_shutdown(port);
 }
 EXPORT_SYMBOL(tty_port_hangup);
diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index e415c5eca599..2209620d2349 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1510,7 +1510,7 @@ static unsigned int check_modem_status(struct uart_8250_port *up)
 		if (status & UART_MSR_DCTS)
 			uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	return status;
diff --git a/drivers/serial/amba-pl010.c b/drivers/serial/amba-pl010.c
index 39032413d4a1..429a8ae86933 100644
--- a/drivers/serial/amba-pl010.c
+++ b/drivers/serial/amba-pl010.c
@@ -225,7 +225,7 @@ static void pl010_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.state->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t pl010_int(int irq, void *dev_id)
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index ef82a34baf0f..ef7adc8135dd 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -226,7 +226,7 @@ static void pl011_modem_status(struct uart_amba_port *uap)
 	if (delta & UART01x_FR_CTS)
 		uart_handle_cts_change(&uap->port, status & UART01x_FR_CTS);
 
-	wake_up_interruptible(&uap->port.state->delta_msr_wait);
+	wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t pl011_int(int irq, void *dev_id)
diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c
index 963e3c12af41..3551c5cb7094 100644
--- a/drivers/serial/atmel_serial.c
+++ b/drivers/serial/atmel_serial.c
@@ -776,7 +776,7 @@ static void atmel_tasklet_func(unsigned long data)
 		if (status_change & ATMEL_US_CTS)
 			uart_handle_cts_change(port, !(status & ATMEL_US_CTS));
 
-		wake_up_interruptible(&port->state->delta_msr_wait);
+		wake_up_interruptible(&port->state->port.delta_msr_wait);
 
 		atmel_port->irq_status_prev = status;
 	}
diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c
index f86c47e08a06..2d7feecaf492 100644
--- a/drivers/serial/icom.c
+++ b/drivers/serial/icom.c
@@ -695,7 +695,7 @@ static inline void check_modem_status(struct icom_port *icom_port)
 					       delta_status & ICOM_CTS);
 
 		wake_up_interruptible(&icom_port->uart_port.state->
-				      delta_msr_wait);
+				      port.delta_msr_wait);
 		old_status = status;
 	}
 	spin_unlock(&icom_port->uart_port.lock);
diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c
index 1febeafcb97a..18130f11238e 100644
--- a/drivers/serial/imx.c
+++ b/drivers/serial/imx.c
@@ -224,7 +224,7 @@ static void imx_mctrl_check(struct imx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
@@ -388,7 +388,7 @@ static irqreturn_t imx_rtsint(int irq, void *dev_id)
 
 	writel(USR1_RTSD, sport->port.membase + USR1);
 	uart_handle_cts_change(&sport->port, !!val);
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 	return IRQ_HANDLED;
diff --git a/drivers/serial/ioc3_serial.c b/drivers/serial/ioc3_serial.c
index de4ab1bfee8d..d8983dd5c4b2 100644
--- a/drivers/serial/ioc3_serial.c
+++ b/drivers/serial/ioc3_serial.c
@@ -1287,7 +1287,7 @@ static inline int do_read(struct uart_port *the_port, char *buf, int len)
 							(port->ip_port, 0);
 						wake_up_interruptible
 						    (&the_port->state->
-						     delta_msr_wait);
+						     port.delta_msr_wait);
 					}
 
 					/* If we had any data to return, we
@@ -1491,7 +1491,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_dcd_change(the_port,
 						shadow & SHADOW_DCD);
 				wake_up_interruptible
-				    (&the_port->state->delta_msr_wait);
+				    (&the_port->state->port.delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 				   && !(shadow & SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1511,7 +1511,7 @@ ioc3uart_intr_one(struct ioc3_submodule *is,
 				uart_handle_cts_change(the_port, shadow
 						& SHADOW_CTS);
 				wake_up_interruptible
-				    (&the_port->state->delta_msr_wait);
+				    (&the_port->state->port.delta_msr_wait);
 			}
 		}
 
@@ -1728,7 +1728,7 @@ static void ic3_shutdown(struct uart_port *the_port)
 		return;
 
 	state = the_port->state;
-	wake_up_interruptible(&state->delta_msr_wait);
+	wake_up_interruptible(&state->port.delta_msr_wait);
 
 	spin_lock_irqsave(&the_port->lock, port_flags);
 	set_notification(port, N_ALL, 0);
diff --git a/drivers/serial/ioc4_serial.c b/drivers/serial/ioc4_serial.c
index 2055d323f15f..2e02c3026d24 100644
--- a/drivers/serial/ioc4_serial.c
+++ b/drivers/serial/ioc4_serial.c
@@ -1882,7 +1882,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port = port->ip_port;
 				the_port->icount.dcd = 1;
 				wake_up_interruptible
-					    (&the_port->state->delta_msr_wait);
+					    (&the_port->state->port.delta_msr_wait);
 			} else if ((port->ip_notify & N_DDCD)
 					&& !(shadow & IOC4_SHADOW_DCD)) {
 				/* Flag delta DCD/no DCD */
@@ -1904,7 +1904,7 @@ static void handle_intr(void *arg, uint32_t sio_ir)
 				the_port->icount.cts =
 					(shadow & IOC4_SHADOW_CTS) ? 1 : 0;
 				wake_up_interruptible
-					(&the_port->state->delta_msr_wait);
+					(&the_port->state->port.delta_msr_wait);
 			}
 		}
 
@@ -2237,7 +2237,7 @@ static inline int do_read(struct uart_port *the_port, unsigned char *buf,
 						the_port->icount.dcd = 0;
 						wake_up_interruptible
 						    (&the_port->state->
-							delta_msr_wait);
+							port.delta_msr_wait);
 					}
 
 					/* If we had any data to return, we
@@ -2439,7 +2439,7 @@ static void ic4_shutdown(struct uart_port *the_port)
 	state = the_port->state;
 	port->ip_port = NULL;
 
-	wake_up_interruptible(&state->delta_msr_wait);
+	wake_up_interruptible(&state->port.delta_msr_wait);
 
 	if (state->port.tty)
 		set_bit(TTY_IO_ERROR, &state->port.tty->flags);
diff --git a/drivers/serial/ip22zilog.c b/drivers/serial/ip22zilog.c
index 2e847deb41dc..ebff4a1d4bcc 100644
--- a/drivers/serial/ip22zilog.c
+++ b/drivers/serial/ip22zilog.c
@@ -354,7 +354,7 @@ static void ip22zilog_status_handle(struct uart_ip22zilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	up->prev_status = status;
diff --git a/drivers/serial/msm_serial.c b/drivers/serial/msm_serial.c
index ff18d50c99c1..b05c5aa02cb4 100644
--- a/drivers/serial/msm_serial.c
+++ b/drivers/serial/msm_serial.c
@@ -169,7 +169,7 @@ static void handle_delta_cts(struct uart_port *port)
 {
 	msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR);
 	port->icount.cts++;
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 }
 
 static irqreturn_t msm_irq(int irq, void *dev_id)
diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c
index 0dc786835dca..0700cd10b97c 100644
--- a/drivers/serial/pmac_zilog.c
+++ b/drivers/serial/pmac_zilog.c
@@ -369,7 +369,7 @@ static void pmz_status_handle(struct uart_pmac_port *uap)
 			uart_handle_cts_change(&uap->port,
 					       !(status & CTS));
 
-		wake_up_interruptible(&uap->port.state->delta_msr_wait);
+		wake_up_interruptible(&uap->port.state->port.delta_msr_wait);
 	}
 
 	if (status & BRK_ABRT)
diff --git a/drivers/serial/pnx8xxx_uart.c b/drivers/serial/pnx8xxx_uart.c
index 2da747635275..0aa75a97531c 100644
--- a/drivers/serial/pnx8xxx_uart.c
+++ b/drivers/serial/pnx8xxx_uart.c
@@ -100,7 +100,7 @@ static void pnx8xxx_mctrl_check(struct pnx8xxx_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/pxa.c b/drivers/serial/pxa.c
index ad48919c0415..6443b7ff274a 100644
--- a/drivers/serial/pxa.c
+++ b/drivers/serial/pxa.c
@@ -220,7 +220,7 @@ static inline void check_modem_status(struct uart_pxa_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sa1100.c b/drivers/serial/sa1100.c
index 61ef3ae24927..7f5e26873220 100644
--- a/drivers/serial/sa1100.c
+++ b/drivers/serial/sa1100.c
@@ -117,7 +117,7 @@ static void sa1100_mctrl_check(struct sa1100_port *sport)
 	if (changed & TIOCM_CTS)
 		uart_handle_cts_change(&sport->port, status & TIOCM_CTS);
 
-	wake_up_interruptible(&sport->port.state->delta_msr_wait);
+	wake_up_interruptible(&sport->port.state->port.delta_msr_wait);
 }
 
 /*
diff --git a/drivers/serial/sb1250-duart.c b/drivers/serial/sb1250-duart.c
index fa5f303b36d3..a2f2b3254499 100644
--- a/drivers/serial/sb1250-duart.c
+++ b/drivers/serial/sb1250-duart.c
@@ -440,7 +440,7 @@ static void sbd_status_handle(struct sbd_port *sport)
 
 	if (delta & ((M_DUART_IN_PIN2_VAL | M_DUART_IN_PIN0_VAL) <<
 		     S_DUART_IN_PIN_CHNG))
-		wake_up_interruptible(&uport->state->delta_msr_wait);
+		wake_up_interruptible(&uport->state->port.delta_msr_wait);
 }
 
 static irqreturn_t sbd_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 9d42e57e1971..e16d15343dfd 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -215,7 +215,8 @@ static int uart_startup(struct uart_state *state, int init_hw)
 static void uart_shutdown(struct uart_state *state)
 {
 	struct uart_port *uport = state->uart_port;
-	struct tty_struct *tty = state->port.tty;
+	struct tty_port *port = &state->port;
+	struct tty_struct *tty = port->tty;
 
 	/*
 	 * Set the TTY IO error marker
@@ -223,7 +224,7 @@ static void uart_shutdown(struct uart_state *state)
 	if (tty)
 		set_bit(TTY_IO_ERROR, &tty->flags);
 
-	if (test_and_clear_bit(ASYNCB_INITIALIZED, &state->port.flags)) {
+	if (test_and_clear_bit(ASYNCB_INITIALIZED, &port->flags)) {
 		/*
 		 * Turn off DTR and RTS early.
 		 */
@@ -237,7 +238,7 @@ static void uart_shutdown(struct uart_state *state)
 		 * any outstanding file descriptors should be pointing at
 		 * hung_up_tty_fops now.
 		 */
-		wake_up_interruptible(&state->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 
 		/*
 		 * Free the IRQ and disable the port.
@@ -1004,11 +1005,15 @@ static int uart_do_autoconfig(struct uart_state *state)
  * - mask passed in arg for lines of interest
  *   (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
  * Caller should use TIOCGICOUNT to see which one it was
+ *
+ * FIXME: This wants extracting into a common all driver implementation
+ * of TIOCMWAIT using tty_port.
  */
 static int
 uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 {
 	struct uart_port *uport = state->uart_port;
+	struct tty_port *port = &state->port;
 	DECLARE_WAITQUEUE(wait, current);
 	struct uart_icount cprev, cnow;
 	int ret;
@@ -1025,7 +1030,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	uport->ops->enable_ms(uport);
 	spin_unlock_irq(&uport->lock);
 
-	add_wait_queue(&state->delta_msr_wait, &wait);
+	add_wait_queue(&port->delta_msr_wait, &wait);
 	for (;;) {
 		spin_lock_irq(&uport->lock);
 		memcpy(&cnow, &uport->icount, sizeof(struct uart_icount));
@@ -1053,7 +1058,7 @@ uart_wait_modem_status(struct uart_state *state, unsigned long arg)
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&state->delta_msr_wait, &wait);
+	remove_wait_queue(&port->delta_msr_wait, &wait);
 
 	return ret;
 }
@@ -1430,7 +1435,7 @@ static void uart_hangup(struct tty_struct *tty)
 		clear_bit(ASYNCB_NORMAL_ACTIVE, &port->flags);
 		port->tty = NULL;
 		wake_up_interruptible(&port->open_wait);
-		wake_up_interruptible(&state->delta_msr_wait);
+		wake_up_interruptible(&port->delta_msr_wait);
 	}
 	mutex_unlock(&port->mutex);
 }
@@ -2378,7 +2383,6 @@ int uart_register_driver(struct uart_driver *drv)
 		tty_port_init(port);
 		port->close_delay     = 500;	/* .5 seconds */
 		port->closing_wait    = 30000;	/* 30 seconds */
-		init_waitqueue_head(&state->delta_msr_wait);
 		tasklet_init(&state->tlet, uart_tasklet_action,
 			     (unsigned long)state);
 	}
diff --git a/drivers/serial/serial_ks8695.c b/drivers/serial/serial_ks8695.c
index 4560b2e70685..2e71bbc04dac 100644
--- a/drivers/serial/serial_ks8695.c
+++ b/drivers/serial/serial_ks8695.c
@@ -266,7 +266,7 @@ static irqreturn_t ks8695uart_modem_status(int irq, void *dev_id)
 	if (status & URMS_URTERI)
 		port->icount.rng++;
 
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/serial/serial_lh7a40x.c b/drivers/serial/serial_lh7a40x.c
index 057fc5e8cc8d..ea744707c4d6 100644
--- a/drivers/serial/serial_lh7a40x.c
+++ b/drivers/serial/serial_lh7a40x.c
@@ -241,7 +241,7 @@ static void lh7a40xuart_modem_status (struct uart_port* port)
 	if (delta & CTS)
 		uart_handle_cts_change (port, status & CTS);
 
-	wake_up_interruptible (&port->state->delta_msr_wait);
+	wake_up_interruptible (&port->state->port.delta_msr_wait);
 }
 
 static irqreturn_t lh7a40xuart_int (int irq, void* dev_id)
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 7c4f2fe8e246..d1ad34128635 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -297,7 +297,7 @@ static void check_status(struct uart_sunsab_port *up,
 		up->port.icount.dsr++;
 	}
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t sunsab_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 5a32365b58ad..68d262b15749 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -441,7 +441,7 @@ static void check_modem_status(struct uart_sunsu_port *up)
 	if (status & UART_MSR_DCTS)
 		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
 
-	wake_up_interruptible(&up->port.state->delta_msr_wait);
+	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 }
 
 static irqreturn_t sunsu_serial_interrupt(int irq, void *dev_id)
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 055034d12b1c..ef693ae22e7f 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -451,7 +451,7 @@ static void sunzilog_status_handle(struct uart_sunzilog_port *up,
 			uart_handle_cts_change(&up->port,
 					       (status & CTS));
 
-		wake_up_interruptible(&up->port.state->delta_msr_wait);
+		wake_up_interruptible(&up->port.state->port.delta_msr_wait);
 	}
 
 	up->prev_status = status;
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
index 3d40be6f389f..34b31da01d09 100644
--- a/drivers/serial/timbuart.c
+++ b/drivers/serial/timbuart.c
@@ -231,7 +231,7 @@ static void timbuart_mctrl_check(struct uart_port *port, u32 isr, u32 *ier)
 		iowrite32(CTS_DELTA, port->membase + TIMBUART_ISR);
 		cts = timbuart_get_mctrl(port);
 		uart_handle_cts_change(port, cts & TIOCM_CTS);
-		wake_up_interruptible(&port->state->delta_msr_wait);
+		wake_up_interruptible(&port->state->port.delta_msr_wait);
 	}
 
 	*ier |= CTS_DELTA;
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index cf4410e6d53b..3beb6ab4fa68 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -386,7 +386,7 @@ static inline void check_modem_status(struct uart_port *port)
 	if (msr & UART_MSR_DCTS)
 		uart_handle_cts_change(port, msr & UART_MSR_CTS);
 
-	wake_up_interruptible(&port->state->delta_msr_wait);
+	wake_up_interruptible(&port->state->port.delta_msr_wait);
 }
 
 static inline void transmit_chars(struct uart_port *port)
diff --git a/drivers/serial/zs.c b/drivers/serial/zs.c
index b9c9fb9198d6..1a7fd3e70315 100644
--- a/drivers/serial/zs.c
+++ b/drivers/serial/zs.c
@@ -686,7 +686,7 @@ static void zs_status_handle(struct zs_port *zport, struct zs_port *zport_a)
 			uport->icount.rng++;
 
 		if (delta)
-			wake_up_interruptible(&uport->state->delta_msr_wait);
+			wake_up_interruptible(&uport->state->port.delta_msr_wait);
 
 		spin_lock(&scc->zlock);
 	}
diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h
index bbebef7713b3..a5049eaf782d 100644
--- a/include/linux/cyclades.h
+++ b/include/linux/cyclades.h
@@ -578,7 +578,6 @@ struct cyclades_port {
 	struct cyclades_idle_stats	idle_stats;
 	struct cyclades_icount	icount;
 	struct completion       shutdown_wait;
-	wait_queue_head_t       delta_msr_wait;
 	int throttle;
 };
 
diff --git a/include/linux/hayesesp.h b/include/linux/hayesesp.h
index 940aeb51d53f..92b08cfe4a75 100644
--- a/include/linux/hayesesp.h
+++ b/include/linux/hayesesp.h
@@ -96,7 +96,6 @@ struct esp_struct {
 	int			xmit_head;
 	int			xmit_tail;
 	int			xmit_cnt;
-	wait_queue_head_t	delta_msr_wait;
 	wait_queue_head_t	break_wait;
 	struct async_icount	icount;	/* kernel counters for the 4 input interrupts */
 	struct hayes_esp_config config; /* port configuration */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 27767ea5fa29..bcafecd3b7c1 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -349,7 +349,6 @@ struct uart_state {
 	struct circ_buf		xmit;
 
 	struct tasklet_struct	tlet;
-	wait_queue_head_t	delta_msr_wait;
 	struct uart_port	*uart_port;
 };
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 9fdc3d84baad..0daa8a72b176 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -203,6 +203,7 @@ struct tty_port {
 	int			count;		/* Usage count */
 	wait_queue_head_t	open_wait;	/* Open waiters */
 	wait_queue_head_t	close_wait;	/* Close waiters */
+	wait_queue_head_t	delta_msr_wait;	/* Modem status change */
 	unsigned long		flags;		/* TTY flags ASY_*/
 	struct mutex		mutex;		/* Locking */
 	unsigned char		*xmit_buf;	/* Optional buffer */
-- 
cgit v1.2.3


From b58d13a0216d4e0753668214f23e1d2c24c30f8c Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:32 -0700
Subject: serial: move port users helper

This little helper is now tty_port specific and useful generally so move it

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 6 ++----
 include/linux/tty.h          | 5 +++++
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index e16d15343dfd..3c45a8d7eb7f 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -52,8 +52,6 @@ static struct lock_class_key port_lock_key;
 
 #define HIGH_BITS_OFFSET	((sizeof(long)-sizeof(int))*8)
 
-#define uart_users(state)	((state)->port.count + (state)->port.blocked_open)
-
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
 #define uart_console(port)	((port)->cons && (port)->cons->index == (port)->line)
 #else
@@ -758,7 +756,7 @@ static int uart_set_info(struct uart_state *state,
 		/*
 		 * Make sure that we are the sole user of this port.
 		 */
-		if (uart_users(state) > 1)
+		if (tty_port_users(port) > 1)
 			goto exit;
 
 		/*
@@ -974,7 +972,7 @@ static int uart_do_autoconfig(struct uart_state *state)
 		return -ERESTARTSYS;
 
 	ret = -EBUSY;
-	if (uart_users(state) == 1) {
+	if (tty_port_users(port) == 1) {
 		uart_shutdown(state);
 
 		/*
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 0daa8a72b176..a0e03309a379 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -467,6 +467,11 @@ extern int tty_port_close_start(struct tty_port *port,
 extern void tty_port_close_end(struct tty_port *port, struct tty_struct *tty);
 extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
+extern inline int tty_port_users(struct tty_port *port)
+{
+	return port->count + port->blocked_open;
+}
+
 extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
-- 
cgit v1.2.3


From 016af53a6de6837e5be3da68901083ea85ebb4da Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:32 -0700
Subject: serial: kill USF_CLOSING_* definitions

The serial layer for some reason uses different defines for the special
case close delays and then conditionally switches to/from the normal ones
in the ioctls.

Remove this rather pointless abstraction

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/serial_core.c | 6 +++---
 include/linux/serial_core.h  | 3 ---
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 3c45a8d7eb7f..69c4e20530f5 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -652,7 +652,7 @@ static int uart_get_info(struct uart_state *state,
 	tmp.xmit_fifo_size  = uport->fifosize;
 	tmp.baud_base	    = uport->uartclk / 16;
 	tmp.close_delay	    = port->close_delay / 10;
-	tmp.closing_wait    = port->closing_wait == USF_CLOSING_WAIT_NONE ?
+	tmp.closing_wait    = port->closing_wait == ASYNC_CLOSING_WAIT_NONE ?
 				ASYNC_CLOSING_WAIT_NONE :
 				port->closing_wait / 10;
 	tmp.custom_divisor  = uport->custom_divisor;
@@ -690,7 +690,7 @@ static int uart_set_info(struct uart_state *state,
 	new_serial.irq = irq_canonicalize(new_serial.irq);
 	close_delay = new_serial.close_delay * 10;
 	closing_wait = new_serial.closing_wait == ASYNC_CLOSING_WAIT_NONE ?
-			USF_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
+			ASYNC_CLOSING_WAIT_NONE : new_serial.closing_wait * 10;
 
 	/*
 	 * This semaphore protects port->count.  It is also
@@ -1307,7 +1307,7 @@ static void uart_close(struct tty_struct *tty, struct file *filp)
 	 */
 	tty->closing = 1;
 
-	if (port->closing_wait != USF_CLOSING_WAIT_NONE)
+	if (port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
 		tty_wait_until_sent(tty, msecs_to_jiffies(port->closing_wait));
 
 	/*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index bcafecd3b7c1..d58e460844dd 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -342,9 +342,6 @@ struct uart_port {
 struct uart_state {
 	struct tty_port		port;
 
-#define USF_CLOSING_WAIT_INF	(0)
-#define USF_CLOSING_WAIT_NONE	(~0U)
-
 	int			pm_state;
 	struct circ_buf		xmit;
 
-- 
cgit v1.2.3


From fe1ae7fdd2ee603f2d95f04e09a68f7f79045127 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Sat, 19 Sep 2009 13:13:33 -0700
Subject: tty: USB serial termios bits

Various drivers have hacks to mangle termios structures. This stems from
the fact there is no nice setup hook for configuring the termios settings
when the port is created

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/tty_io.c            |  1 +
 drivers/usb/serial/ark3116.c     | 46 ++++++++++------------------------------
 drivers/usb/serial/cypress_m8.c  | 12 +++--------
 drivers/usb/serial/empeg.c       | 12 +++--------
 drivers/usb/serial/iuu_phoenix.c | 31 ++++++++++++---------------
 drivers/usb/serial/kobil_sct.c   | 22 +++++++++----------
 drivers/usb/serial/oti6858.c     | 21 +++++++++---------
 drivers/usb/serial/spcp8x5.c     | 21 +++++++++---------
 drivers/usb/serial/usb-serial.c  | 38 ++++++++++++++++++++++++++++++++-
 drivers/usb/serial/whiteheat.c   |  6 +++---
 include/linux/usb/serial.h       |  3 +++
 11 files changed, 106 insertions(+), 107 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 385cca7074da..05f443c47bbe 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1184,6 +1184,7 @@ int tty_init_termios(struct tty_struct *tty)
 	tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(tty_init_termios);
 
 /**
  *	tty_driver_install_tty() - install a tty entry in the driver
diff --git a/drivers/usb/serial/ark3116.c b/drivers/usb/serial/ark3116.c
index 7ddde4ddfb4b..5d25d3e52bf6 100644
--- a/drivers/usb/serial/ark3116.c
+++ b/drivers/usb/serial/ark3116.c
@@ -35,11 +35,6 @@ static struct usb_device_id id_table [] = {
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
-struct ark3116_private {
-	spinlock_t lock;
-	u8 termios_initialized;
-};
-
 static inline void ARK3116_SND(struct usb_serial *serial, int seq,
 			       __u8 request, __u8 requesttype,
 			       __u16 value, __u16 index)
@@ -82,22 +77,11 @@ static inline void ARK3116_RCV_QUIET(struct usb_serial *serial,
 static int ark3116_attach(struct usb_serial *serial)
 {
 	char *buf;
-	struct ark3116_private *priv;
-	int i;
-
-	for (i = 0; i < serial->num_ports; ++i) {
-		priv = kzalloc(sizeof(struct ark3116_private), GFP_KERNEL);
-		if (!priv)
-			goto cleanup;
-		spin_lock_init(&priv->lock);
-
-		usb_set_serial_port_data(serial->port[i], priv);
-	}
 
 	buf = kmalloc(1, GFP_KERNEL);
 	if (!buf) {
 		dbg("error kmalloc -> out of mem?");
-		goto cleanup;
+		return -ENOMEM;
 	}
 
 	/* 3 */
@@ -149,13 +133,16 @@ static int ark3116_attach(struct usb_serial *serial)
 
 	kfree(buf);
 	return 0;
+}
 
-cleanup:
-	for (--i; i >= 0; --i) {
-		kfree(usb_get_serial_port_data(serial->port[i]));
-		usb_set_serial_port_data(serial->port[i], NULL);
-	}
-	return -ENOMEM;
+static void ark3116_init_termios(struct tty_struct *tty)
+{
+	struct ktermios *termios = tty->termios;
+	*termios = tty_std_termios;
+	termios->c_cflag = B9600 | CS8
+				      | CREAD | HUPCL | CLOCAL;
+	termios->c_ispeed = 9600;
+	termios->c_ospeed = 9600;
 }
 
 static void ark3116_set_termios(struct tty_struct *tty,
@@ -163,10 +150,8 @@ static void ark3116_set_termios(struct tty_struct *tty,
 				struct ktermios *old_termios)
 {
 	struct usb_serial *serial = port->serial;
-	struct ark3116_private *priv = usb_get_serial_port_data(port);
 	struct ktermios *termios = tty->termios;
 	unsigned int cflag = termios->c_cflag;
-	unsigned long flags;
 	int baud;
 	int ark3116_baud;
 	char *buf;
@@ -176,16 +161,6 @@ static void ark3116_set_termios(struct tty_struct *tty,
 
 	dbg("%s - port %d", __func__, port->number);
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->termios_initialized) {
-		*termios = tty_std_termios;
-		termios->c_cflag = B9600 | CS8
-					      | CREAD | HUPCL | CLOCAL;
-		termios->c_ispeed = 9600;
-		termios->c_ospeed = 9600;
-		priv->termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	cflag = termios->c_cflag;
 	termios->c_cflag &= ~(CMSPAR|CRTSCTS);
@@ -454,6 +429,7 @@ static struct usb_serial_driver ark3116_device = {
 	.num_ports =		1,
 	.attach =		ark3116_attach,
 	.set_termios =		ark3116_set_termios,
+	.init_termios =		ark3116_init_termios,
 	.ioctl =		ark3116_ioctl,
 	.tiocmget =		ark3116_tiocmget,
 	.open =			ark3116_open,
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index df1adb9a4367..e0a8b715f2f2 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -657,15 +657,7 @@ static int cypress_open(struct tty_struct *tty, struct usb_serial_port *port)
 	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* Set termios */
-	result = cypress_write(tty, port, NULL, 0);
-
-	if (result) {
-		dev_err(&port->dev,
-			"%s - failed setting the control lines - error %d\n",
-							__func__, result);
-		return result;
-	} else
-		dbg("%s - success setting the control lines", __func__);
+	cypress_send(port);
 
 	if (tty)
 		cypress_set_termios(tty, port, &priv->tmp_termios);
@@ -1003,6 +995,8 @@ static void cypress_set_termios(struct tty_struct *tty,
 	dbg("%s - port %d", __func__, port->number);
 
 	spin_lock_irqsave(&priv->lock, flags);
+	/* We can't clean this one up as we don't know the device type
+	   early enough */
 	if (!priv->termios_initialized) {
 		if (priv->chiptype == CT_EARTHMATE) {
 			*(tty->termios) = tty_std_termios;
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index da559a773b51..33c9e9cf9eb2 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -89,8 +89,7 @@ static int  empeg_chars_in_buffer(struct tty_struct *tty);
 static void empeg_throttle(struct tty_struct *tty);
 static void empeg_unthrottle(struct tty_struct *tty);
 static int  empeg_startup(struct usb_serial *serial);
-static void empeg_set_termios(struct tty_struct *tty,
-		struct usb_serial_port *port, struct ktermios *old_termios);
+static void empeg_init_termios(struct tty_struct *tty);
 static void empeg_write_bulk_callback(struct urb *urb);
 static void empeg_read_bulk_callback(struct urb *urb);
 
@@ -122,7 +121,7 @@ static struct usb_serial_driver empeg_device = {
 	.throttle =		empeg_throttle,
 	.unthrottle =		empeg_unthrottle,
 	.attach =		empeg_startup,
-	.set_termios =		empeg_set_termios,
+	.init_termios =		empeg_init_termios,
 	.write =		empeg_write,
 	.write_room =		empeg_write_room,
 	.chars_in_buffer =	empeg_chars_in_buffer,
@@ -148,9 +147,6 @@ static int empeg_open(struct tty_struct *tty,struct usb_serial_port *port)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* Force default termio settings */
-	empeg_set_termios(tty, port, NULL);
-
 	bytes_in = 0;
 	bytes_out = 0;
 
@@ -423,11 +419,9 @@ static int  empeg_startup(struct usb_serial *serial)
 }
 
 
-static void empeg_set_termios(struct tty_struct *tty,
-		struct usb_serial_port *port, struct ktermios *old_termios)
+static void empeg_init_termios(struct tty_struct *tty)
 {
 	struct ktermios *termios = tty->termios;
-	dbg("%s - port %d", __func__, port->number);
 
 	/*
 	 * The empeg-car player wants these particular tty settings.
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index f3f9be0469c5..6138c1cda35f 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -71,7 +71,6 @@ struct iuu_private {
 	spinlock_t lock;	/* store irq state */
 	wait_queue_head_t delta_msr_wait;
 	u8 line_status;
-	u8 termios_initialized;
 	int tiostatus;		/* store IUART SIGNAL for tiocmget call */
 	u8 reset;		/* if 1 reset is needed */
 	int poll;		/* number of poll */
@@ -1018,13 +1017,24 @@ static void iuu_close(struct usb_serial_port *port)
 	}
 }
 
+static void iuu_init_termios(struct tty_struct *tty)
+{
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600
+				| TIOCM_CTS | CSTOPB | PARENB;
+	tty->termios->c_ispeed = 9600;
+	tty->termios->c_ospeed = 9600;
+	tty->termios->c_lflag = 0;
+	tty->termios->c_oflag = 0;
+	tty->termios->c_iflag = 0;
+}
+
 static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	u8 *buf;
 	int result;
 	u32 actual;
-	unsigned long flags;
 	struct iuu_private *priv = usb_get_serial_port_data(port);
 
 	dbg("%s -  port %d", __func__, port->number);
@@ -1063,21 +1073,7 @@ static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 			  port->bulk_in_buffer, 512,
 			  NULL, NULL);
 
-	/* set the termios structure */
-	spin_lock_irqsave(&priv->lock, flags);
-	if (tty && !priv->termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = CLOCAL | CREAD | CS8 | B9600
-					| TIOCM_CTS | CSTOPB | PARENB;
-		tty->termios->c_ispeed = 9600;
-		tty->termios->c_ospeed = 9600;
-		tty->termios->c_lflag = 0;
-		tty->termios->c_oflag = 0;
-		tty->termios->c_iflag = 0;
-		priv->termios_initialized = 1;
-		priv->poll = 0;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
+	priv->poll = 0;
 
 	/* initialize writebuf */
 #define FISH(a, b, c, d) do { \
@@ -1200,6 +1196,7 @@ static struct usb_serial_driver iuu_device = {
 	.tiocmget = iuu_tiocmget,
 	.tiocmset = iuu_tiocmset,
 	.set_termios = iuu_set_termios,
+	.init_termios = iuu_init_termios,
 	.attach = iuu_startup,
 	.release = iuu_release,
 };
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 97901578343a..45ea694b3ae6 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -84,7 +84,7 @@ static void kobil_read_int_callback(struct urb *urb);
 static void kobil_write_callback(struct urb *purb);
 static void kobil_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
-
+static void kobil_init_termios(struct tty_struct *tty);
 
 static struct usb_device_id id_table [] = {
 	{ USB_DEVICE(KOBIL_VENDOR_ID, KOBIL_ADAPTER_B_PRODUCT_ID) },
@@ -119,6 +119,7 @@ static struct usb_serial_driver kobil_device = {
 	.release =		kobil_release,
 	.ioctl =		kobil_ioctl,
 	.set_termios =		kobil_set_termios,
+	.init_termios =		kobil_init_termios,
 	.tiocmget =		kobil_tiocmget,
 	.tiocmset =		kobil_tiocmset,
 	.open =			kobil_open,
@@ -209,6 +210,15 @@ static void kobil_release(struct usb_serial *serial)
 		kfree(usb_get_serial_port_data(serial->port[i]));
 }
 
+static void kobil_init_termios(struct tty_struct *tty)
+{
+	/* Default to echo off and other sane device settings */
+	tty->termios->c_lflag = 0;
+	tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN | XCASE);
+	tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF;
+	/* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */
+	tty->termios->c_oflag &= ~ONLCR;
+}
 
 static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
@@ -224,16 +234,6 @@ static int kobil_open(struct tty_struct *tty, struct usb_serial_port *port)
 	/* someone sets the dev to 0 if the close method has been called */
 	port->interrupt_in_urb->dev = port->serial->dev;
 
-	if (tty) {
-
-		/* Default to echo off and other sane device settings */
-		tty->termios->c_lflag = 0;
-		tty->termios->c_lflag &= ~(ISIG | ICANON | ECHO | IEXTEN |
-								 XCASE);
-		tty->termios->c_iflag = IGNBRK | IGNPAR | IXOFF;
-		/* do NOT translate CR to CR-NL (0x0A -> 0x0A 0x0D) */
-		tty->termios->c_oflag &= ~ONLCR;
-	}
 	/* allocate memory for transfer buffer */
 	transfer_buffer = kzalloc(transfer_buffer_length, GFP_KERNEL);
 	if (!transfer_buffer)
diff --git a/drivers/usb/serial/oti6858.c b/drivers/usb/serial/oti6858.c
index e90f88a3b040..0f4a70ce3823 100644
--- a/drivers/usb/serial/oti6858.c
+++ b/drivers/usb/serial/oti6858.c
@@ -145,6 +145,7 @@ static int oti6858_open(struct tty_struct *tty, struct usb_serial_port *port);
 static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
+static void oti6858_init_termios(struct tty_struct *tty);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
 			unsigned int cmd, unsigned long arg);
 static void oti6858_read_int_callback(struct urb *urb);
@@ -185,6 +186,7 @@ static struct usb_serial_driver oti6858_device = {
 	.write =		oti6858_write,
 	.ioctl =		oti6858_ioctl,
 	.set_termios =		oti6858_set_termios,
+	.init_termios = 	oti6858_init_termios,
 	.tiocmget =		oti6858_tiocmget,
 	.tiocmset =		oti6858_tiocmset,
 	.read_bulk_callback =	oti6858_read_bulk_callback,
@@ -205,7 +207,6 @@ struct oti6858_private {
 	struct {
 		u8 read_urb_in_use;
 		u8 write_urb_in_use;
-		u8 termios_initialized;
 	} flags;
 	struct delayed_work delayed_write_work;
 
@@ -446,6 +447,14 @@ static int oti6858_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
+static void oti6858_init_termios(struct tty_struct *tty)
+{
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL;
+	tty->termios->c_ispeed = 38400;
+	tty->termios->c_ospeed = 38400;
+}
+
 static void oti6858_set_termios(struct tty_struct *tty,
 		struct usb_serial_port *port, struct ktermios *old_termios)
 {
@@ -463,16 +472,6 @@ static void oti6858_set_termios(struct tty_struct *tty,
 		return;
 	}
 
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->flags.termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = B38400 | CS8 | CREAD | HUPCL | CLOCAL;
-		tty->termios->c_ispeed = 38400;
-		tty->termios->c_ospeed = 38400;
-		priv->flags.termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
-
 	cflag = tty->termios->c_cflag;
 
 	spin_lock_irqsave(&priv->lock, flags);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 8b312a05a353..61e7c40b94fb 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -299,7 +299,6 @@ struct spcp8x5_private {
 	wait_queue_head_t	delta_msr_wait;
 	u8 			line_control;
 	u8 			line_status;
-	u8 			termios_initialized;
 };
 
 /* desc : when device plug in,this function would be called.
@@ -498,6 +497,15 @@ static void spcp8x5_close(struct usb_serial_port *port)
 		dev_dbg(&port->dev, "usb_unlink_urb(read_urb) = %d\n", result);
 }
 
+static void spcp8x5_init_termios(struct tty_struct *tty)
+{
+	/* for the 1st time call this function */
+	*(tty->termios) = tty_std_termios;
+	tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL;
+	tty->termios->c_ispeed = 115200;
+	tty->termios->c_ospeed = 115200;
+}
+
 /* set the serial param for transfer. we should check if we really need to
  * transfer. if we set flow control we should do this too. */
 static void spcp8x5_set_termios(struct tty_struct *tty,
@@ -514,16 +522,6 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 	int i;
 	u8 control;
 
-	/* for the 1st time call this function */
-	spin_lock_irqsave(&priv->lock, flags);
-	if (!priv->termios_initialized) {
-		*(tty->termios) = tty_std_termios;
-		tty->termios->c_cflag = B115200 | CS8 | CREAD | HUPCL | CLOCAL;
-		tty->termios->c_ispeed = 115200;
-		tty->termios->c_ospeed = 115200;
-		priv->termios_initialized = 1;
-	}
-	spin_unlock_irqrestore(&priv->lock, flags);
 
 	/* check that they really want us to change something */
 	if (!tty_termios_hw_change(tty->termios, old_termios))
@@ -1008,6 +1006,7 @@ static struct usb_serial_driver spcp8x5_device = {
 	.carrier_raised		= spcp8x5_carrier_raised,
 	.write 			= spcp8x5_write,
 	.set_termios 		= spcp8x5_set_termios,
+	.init_termios		= spcp8x5_init_termios,
 	.ioctl 			= spcp8x5_ioctl,
 	.tiocmget 		= spcp8x5_tiocmget,
 	.tiocmset 		= spcp8x5_tiocmset,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 3dda6841e724..80c1f4d8e910 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -721,6 +721,41 @@ static const struct tty_port_operations serial_port_ops = {
 	.dtr_rts = serial_dtr_rts,
 };
 
+/**
+ *	serial_install		-	install tty
+ *	@driver: the driver (USB in our case)
+ *	@tty: the tty being created
+ *
+ *	Create the termios objects for this tty. We use the default USB
+ *	serial ones but permit them to be overriddenby serial->type->termios.
+ *	This lets us remove all the ugly hackery
+ */
+
+static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
+{
+	int idx = tty->index;
+	struct usb_serial *serial;
+	int retval;
+
+	/* If the termios setup has yet to be done */
+	if (tty->driver->termios[idx] == NULL) {
+		/* perform the standard setup */
+		retval = tty_init_termios(tty);
+		if (retval)
+			return retval;
+		/* allow the driver to update it */
+		serial = usb_serial_get_by_index(tty->index);
+		if (serial->type->init_termios)
+			serial->type->init_termios(tty);
+		usb_serial_put(serial);
+	}
+	/* Final install (we use the default method) */
+	tty_driver_kref_get(driver);
+	tty->count++;
+	driver->ttys[idx] = tty;
+	return 0;
+}
+
 int usb_serial_probe(struct usb_interface *interface,
 			       const struct usb_device_id *id)
 {
@@ -1228,7 +1263,8 @@ static const struct tty_operations serial_ops = {
 	.chars_in_buffer =	serial_chars_in_buffer,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
-	.shutdown =		serial_do_free,
+	.shutdown = 		serial_do_free,
+	.install = 		serial_install,
 	.proc_fops =		&serial_proc_fops,
 };
 
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 81f2ae505966..62424eec33ec 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -259,7 +259,7 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command,
 						__u8 *data, __u8 datasize);
 static int firm_open(struct usb_serial_port *port);
 static int firm_close(struct usb_serial_port *port);
-static int firm_setup_port(struct tty_struct *tty);
+static void firm_setup_port(struct tty_struct *tty);
 static int firm_set_rts(struct usb_serial_port *port, __u8 onoff);
 static int firm_set_dtr(struct usb_serial_port *port, __u8 onoff);
 static int firm_set_break(struct usb_serial_port *port, __u8 onoff);
@@ -1210,7 +1210,7 @@ static int firm_close(struct usb_serial_port *port)
 }
 
 
-static int firm_setup_port(struct tty_struct *tty)
+static void firm_setup_port(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct whiteheat_port_settings port_settings;
@@ -1285,7 +1285,7 @@ static int firm_setup_port(struct tty_struct *tty)
 	port_settings.lloop = 0;
 
 	/* now send the message to the device */
-	return firm_send_command(port, WHITEHEAT_SETUP_PORT,
+	firm_send_command(port, WHITEHEAT_SETUP_PORT,
 			(__u8 *)&port_settings, sizeof(port_settings));
 }
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 1cbaf4a6b449..7b85e327af91 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -260,6 +260,9 @@ struct usb_serial_driver {
 	   be an attached tty at this point */
 	void (*dtr_rts)(struct usb_serial_port *port, int on);
 	int  (*carrier_raised)(struct usb_serial_port *port);
+	/* Called by the usb serial hooks to allow the user to rework the
+	   termios state */
+	void (*init_termios)(struct tty_struct *tty);
 	/* USB events */
 	void (*read_int_callback)(struct urb *urb);
 	void (*write_int_callback)(struct urb *urb);
-- 
cgit v1.2.3


From e92166517e3ca9bfb416f91e69cf0373b55b6ede Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 6 Aug 2009 15:09:28 +0200
Subject: tty: handle VT specific compat ioctls in vt driver

The VT specific compat_ioctl handlers are the only ones
in common code that require the BKL. Moving them into
the vt driver lets us remove the BKL from the other handlers
and cleans up the code.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/vt.c       |   3 +
 drivers/char/vt_ioctl.c | 203 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/tty.h     |   3 +
 3 files changed, 209 insertions(+)

(limited to 'include')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index 33214d92ca4c..5dfbfa7d7606 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2910,6 +2910,9 @@ static const struct tty_operations con_ops = {
 	.flush_chars = con_flush_chars,
 	.chars_in_buffer = con_chars_in_buffer,
 	.ioctl = vt_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = vt_compat_ioctl,
+#endif
 	.stop = con_stop,
 	.start = con_start,
 	.throttle = con_throttle,
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 0fceb8f4d6f2..30b5d128037c 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -16,6 +16,7 @@
 #include <linux/tty.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
+#include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/kd.h>
 #include <linux/vt.h>
@@ -1376,6 +1377,208 @@ void vc_SAK(struct work_struct *work)
 	release_console_sem();
 }
 
+#ifdef CONFIG_COMPAT
+
+struct compat_consolefontdesc {
+	unsigned short charcount;       /* characters in font (256 or 512) */
+	unsigned short charheight;      /* scan lines per character (1-32) */
+	compat_caddr_t chardata;	/* font data in expanded form */
+};
+
+static inline int
+compat_fontx_ioctl(int cmd, struct compat_consolefontdesc __user *user_cfd,
+			 int perm, struct console_font_op *op)
+{
+	struct compat_consolefontdesc cfdarg;
+	int i;
+
+	if (copy_from_user(&cfdarg, user_cfd, sizeof(struct compat_consolefontdesc)))
+		return -EFAULT;
+
+	switch (cmd) {
+	case PIO_FONTX:
+		if (!perm)
+			return -EPERM;
+		op->op = KD_FONT_OP_SET;
+		op->flags = KD_FONT_FLAG_OLD;
+		op->width = 8;
+		op->height = cfdarg.charheight;
+		op->charcount = cfdarg.charcount;
+		op->data = compat_ptr(cfdarg.chardata);
+		return con_font_op(vc_cons[fg_console].d, op);
+	case GIO_FONTX:
+		op->op = KD_FONT_OP_GET;
+		op->flags = KD_FONT_FLAG_OLD;
+		op->width = 8;
+		op->height = cfdarg.charheight;
+		op->charcount = cfdarg.charcount;
+		op->data = compat_ptr(cfdarg.chardata);
+		i = con_font_op(vc_cons[fg_console].d, op);
+		if (i)
+			return i;
+		cfdarg.charheight = op->height;
+		cfdarg.charcount = op->charcount;
+		if (copy_to_user(user_cfd, &cfdarg, sizeof(struct compat_consolefontdesc)))
+			return -EFAULT;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+struct compat_console_font_op {
+	compat_uint_t op;        /* operation code KD_FONT_OP_* */
+	compat_uint_t flags;     /* KD_FONT_FLAG_* */
+	compat_uint_t width, height;     /* font size */
+	compat_uint_t charcount;
+	compat_caddr_t data;    /* font data with height fixed to 32 */
+};
+
+static inline int
+compat_kdfontop_ioctl(struct compat_console_font_op __user *fontop,
+			 int perm, struct console_font_op *op, struct vc_data *vc)
+{
+	int i;
+
+	if (copy_from_user(op, fontop, sizeof(struct compat_console_font_op)))
+		return -EFAULT;
+	if (!perm && op->op != KD_FONT_OP_GET)
+		return -EPERM;
+	op->data = compat_ptr(((struct compat_console_font_op *)op)->data);
+	op->flags |= KD_FONT_FLAG_OLD;
+	i = con_font_op(vc, op);
+	if (i)
+		return i;
+	((struct compat_console_font_op *)op)->data = (unsigned long)op->data;
+	if (copy_to_user(fontop, op, sizeof(struct compat_console_font_op)))
+		return -EFAULT;
+	return 0;
+}
+
+struct compat_unimapdesc {
+	unsigned short entry_ct;
+	compat_caddr_t entries;
+};
+
+static inline int
+compat_unimap_ioctl(unsigned int cmd, struct compat_unimapdesc __user *user_ud,
+			 int perm, struct vc_data *vc)
+{
+	struct compat_unimapdesc tmp;
+	struct unipair __user *tmp_entries;
+
+	if (copy_from_user(&tmp, user_ud, sizeof tmp))
+		return -EFAULT;
+	tmp_entries = compat_ptr(tmp.entries);
+	if (tmp_entries)
+		if (!access_ok(VERIFY_WRITE, tmp_entries,
+				tmp.entry_ct*sizeof(struct unipair)))
+			return -EFAULT;
+	switch (cmd) {
+	case PIO_UNIMAP:
+		if (!perm)
+			return -EPERM;
+		return con_set_unimap(vc, tmp.entry_ct, tmp_entries);
+	case GIO_UNIMAP:
+		if (!perm && fg_console != vc->vc_num)
+			return -EPERM;
+		return con_get_unimap(vc, tmp.entry_ct, &(user_ud->entry_ct), tmp_entries);
+	}
+	return 0;
+}
+
+long vt_compat_ioctl(struct tty_struct *tty, struct file * file,
+	     unsigned int cmd, unsigned long arg)
+{
+	struct vc_data *vc = tty->driver_data;
+	struct console_font_op op;	/* used in multiple places here */
+	struct kbd_struct *kbd;
+	unsigned int console;
+	void __user *up = (void __user *)arg;
+	int perm;
+	int ret = 0;
+
+	console = vc->vc_num;
+
+	lock_kernel();
+
+	if (!vc_cons_allocated(console)) { 	/* impossible? */
+		ret = -ENOIOCTLCMD;
+		goto out;
+	}
+
+	/*
+	 * To have permissions to do most of the vt ioctls, we either have
+	 * to be the owner of the tty, or have CAP_SYS_TTY_CONFIG.
+	 */
+	perm = 0;
+	if (current->signal->tty == tty || capable(CAP_SYS_TTY_CONFIG))
+		perm = 1;
+
+	kbd = kbd_table + console;
+	switch (cmd) {
+	/*
+	 * these need special handlers for incompatible data structures
+	 */
+	case PIO_FONTX:
+	case GIO_FONTX:
+		ret = compat_fontx_ioctl(cmd, up, perm, &op);
+		break;
+
+	case KDFONTOP:
+		ret = compat_kdfontop_ioctl(up, perm, &op, vc);
+		break;
+
+	case PIO_UNIMAP:
+	case GIO_UNIMAP:
+		ret = do_unimap_ioctl(cmd, up, perm, vc);
+		break;
+
+	/*
+	 * all these treat 'arg' as an integer
+	 */
+	case KIOCSOUND:
+	case KDMKTONE:
+#ifdef CONFIG_X86
+	case KDADDIO:
+	case KDDELIO:
+#endif
+	case KDSETMODE:
+	case KDMAPDISP:
+	case KDUNMAPDISP:
+	case KDSKBMODE:
+	case KDSKBMETA:
+	case KDSKBLED:
+	case KDSETLED:
+	case KDSIGACCEPT:
+	case VT_ACTIVATE:
+	case VT_WAITACTIVE:
+	case VT_RELDISP:
+	case VT_DISALLOCATE:
+	case VT_RESIZE:
+	case VT_RESIZEX:
+		goto fallback;
+
+	/*
+	 * the rest has a compatible data structure behind arg,
+	 * but we have to convert it to a proper 64 bit pointer.
+	 */
+	default:
+		arg = (unsigned long)compat_ptr(arg);
+		goto fallback;
+	}
+out:
+	unlock_kernel();
+	return ret;
+
+fallback:
+	unlock_kernel();
+	return vt_ioctl(tty, file, cmd, arg);
+}
+
+
+#endif /* CONFIG_COMPAT */
+
+
 /*
  * Performs the back end of a vt switch. Called under the console
  * semaphore.
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a0e03309a379..f0f43d08d8b8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -536,5 +536,8 @@ extern int pcxe_open(struct tty_struct *tty, struct file *filp);
 extern int vt_ioctl(struct tty_struct *tty, struct file *file,
 		    unsigned int cmd, unsigned long arg);
 
+extern long vt_compat_ioctl(struct tty_struct *tty, struct file * file,
+		     unsigned int cmd, unsigned long arg);
+
 #endif /* __KERNEL__ */
 #endif
-- 
cgit v1.2.3


From be2f092bfc4f6a415bb4c3e2dcbf521a1f2a0fe5 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Sat, 5 Sep 2009 01:20:43 +0900
Subject: mtd: nand: add __nand_correct_data helper function

Split nand_correct_data() into two part, a pure calculation function
and a wrapper for mtd interface.

The tmio_nand driver can implement its ecc.correct function easily
using this __nand_correct_data helper.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Acked-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Acked-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_ecc.c  | 31 ++++++++++++++++++++++++-------
 include/linux/mtd/nand_ecc.h |  6 ++++++
 2 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/mtd/nand/nand_ecc.c b/drivers/mtd/nand/nand_ecc.c
index c0cb87d6d16e..db7ae9d6a296 100644
--- a/drivers/mtd/nand/nand_ecc.c
+++ b/drivers/mtd/nand/nand_ecc.c
@@ -417,22 +417,22 @@ int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
 EXPORT_SYMBOL(nand_calculate_ecc);
 
 /**
- * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * __nand_correct_data - [NAND Interface] Detect and correct bit error(s)
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
+ * @eccsize:	data bytes per ecc step (256 or 512)
  *
- * Detect and correct a 1 bit error for 256/512 byte block
+ * Detect and correct a 1 bit error for eccsize byte block
  */
-int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
-		      unsigned char *read_ecc, unsigned char *calc_ecc)
+int __nand_correct_data(unsigned char *buf,
+			unsigned char *read_ecc, unsigned char *calc_ecc,
+			unsigned int eccsize)
 {
 	unsigned char b0, b1, b2, bit_addr;
 	unsigned int byte_addr;
 	/* 256 or 512 bytes/ecc  */
-	const uint32_t eccsize_mult =
-			(((struct nand_chip *)mtd->priv)->ecc.size) >> 8;
+	const uint32_t eccsize_mult = eccsize >> 8;
 
 	/*
 	 * b0 to b2 indicate which bit is faulty (if any)
@@ -495,6 +495,23 @@ int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	printk(KERN_ERR "uncorrectable error : ");
 	return -1;
 }
+EXPORT_SYMBOL(__nand_correct_data);
+
+/**
+ * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
+ * @mtd:	MTD block structure
+ * @buf:	raw data read from the chip
+ * @read_ecc:	ECC from the chip
+ * @calc_ecc:	the ECC calculated from raw data
+ *
+ * Detect and correct a 1 bit error for 256/512 byte block
+ */
+int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+		      unsigned char *read_ecc, unsigned char *calc_ecc)
+{
+	return __nand_correct_data(buf, read_ecc, calc_ecc,
+				   ((struct nand_chip *)mtd->priv)->ecc.size);
+}
 EXPORT_SYMBOL(nand_correct_data);
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 090da505425d..052ea8ca2434 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -20,6 +20,12 @@ struct mtd_info;
  */
 int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
 
+/*
+ * Detect and correct a 1 bit error for eccsize byte block
+ */
+int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
+			unsigned int eccsize);
+
 /*
  * Detect and correct a 1 bit error for 256 byte block
  */
-- 
cgit v1.2.3


From b21495a03e20514eacd788a6b5d160667177cd94 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 15:48:42 +0530
Subject: includecheck fix: include/acpi, acpi_bus.h

fix the following 'make includecheck' warning:

  include/acpi/acpi_bus.h: linux/device.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068058.4382.96.camel@ht.satnam>
Acked-by: Len Brown <len.brown@intel.com>
---
 include/acpi/acpi_bus.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1fa3ffb7c93b..1b3b36068ca5 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -356,7 +356,6 @@ void acpi_remove_dir(struct acpi_device *);
 /*
  * Bind physical devices with ACPI devices
  */
-#include <linux/device.h>
 struct acpi_bus_type {
 	struct list_head list;
 	struct bus_type *bus;
-- 
cgit v1.2.3


From 067006a5f5b956fbdd183f9b799e7b8059b53f6c Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 15:53:25 +0530
Subject: includecheck fix: include/drm, drm_memory.h

fix the following 'make includecheck' warning:

  include/drm/drm_memory.h: linux/vmalloc.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068169.4382.99.camel@ht.satnam>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 include/drm/drm_memory.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/drm/drm_memory.h b/include/drm/drm_memory.h
index 63e425b5ea82..15af9b32ae42 100644
--- a/include/drm/drm_memory.h
+++ b/include/drm/drm_memory.h
@@ -44,8 +44,6 @@
 
 #if __OS_HAS_AGP
 
-#include <linux/vmalloc.h>
-
 #ifdef HAVE_PAGE_AGP
 #include <asm/agp.h>
 #else
-- 
cgit v1.2.3


From 43cc960980760f58d1f49ecd39e8cbfb063d63e1 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:09:20 +0530
Subject: includecheck fix: include/linux, aio.h

fix the following 'make includecheck' warning:

  include/linux/aio.h: linux/aio_abi.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: bcrl@kvack.org
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068254.4382.101.camel@ht.satnam>
---
 include/linux/aio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/aio.h b/include/linux/aio.h
index 47f7d932a01d..aea219d7d8d1 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -225,8 +225,6 @@ static inline void exit_aio(struct mm_struct *mm) { }
 
 #define io_wait_to_kiocb(wait) container_of(wait, struct kiocb, ki_wait)
 
-#include <linux/aio_abi.h>
-
 static inline struct kiocb *list_kiocb(struct list_head *h)
 {
 	return list_entry(h, struct kiocb, ki_list);
-- 
cgit v1.2.3


From 97572751d78133cf9a5f7165b252bf975f9dd17d Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:20:44 +0530
Subject: includecheck fix: include/linux, page_cgroup.h

fix the following 'make includecheck' warning:

  include/linux/page_cgroup.h: linux/swap.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Paul Menage <menage@google.com>
---
 include/linux/page_cgroup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 13f126c89ae8..ada779f24178 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -105,14 +105,14 @@ static inline void __init page_cgroup_init_flatmem(void)
 
 #endif
 
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 #include <linux/swap.h>
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
 extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
 extern int swap_cgroup_swapon(int type, unsigned long max_pages);
 extern void swap_cgroup_swapoff(int type);
 #else
-#include <linux/swap.h>
 
 static inline
 unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
-- 
cgit v1.2.3


From 83ba7c34d2b82dc608647f629616df393ab883f9 Mon Sep 17 00:00:00 2001
From: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Date: Sun, 20 Sep 2009 16:24:00 +0530
Subject: includecheck fix: include/linux, ftrace.h

fix the following 'make includecheck' warning:

  include/linux/ftrace.h: linux/sched.h is included more than once.

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Sam Ravnborg <sam@ravnborg.org>
LKML-Reference: <1247068321.4382.102.camel@ht.satnam>
---
 include/linux/ftrace.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index dc3b1328aaeb..3c0924a18daf 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -446,7 +446,6 @@ static inline void unpause_graph_tracing(void) { }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 #ifdef CONFIG_TRACING
-#include <linux/sched.h>
 
 /* flags for current->trace */
 enum {
-- 
cgit v1.2.3


From 89f19f04dc72363d912fd007a399cb10310eff6e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Sat, 19 Sep 2009 11:55:44 -0700
Subject: sched: Fix raciness in runqueue_is_locked()

runqueue_is_locked() is unavoidably racy due to a poor interface design.
It does

	cpu = get_cpu()
	ret = some_perpcu_thing(cpu);
	put_cpu(cpu);
	return ret;

Its return value is unreliable.

Fix.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <200909191855.n8JItiko022148@imap1.linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  2 +-
 kernel/sched.c        | 10 ++--------
 kernel/trace/trace.c  |  8 +++++++-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af3d249170e..cc37a3fa5065 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -257,7 +257,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-extern int runqueue_is_locked(void);
+extern int runqueue_is_locked(int cpu);
 extern void task_rq_unlock_wait(struct task_struct *p);
 
 extern cpumask_var_t nohz_cpu_mask;
diff --git a/kernel/sched.c b/kernel/sched.c
index faf4d463bbff..575fb0139038 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -681,15 +681,9 @@ inline void update_rq_clock(struct rq *rq)
  * This interface allows printk to be called with the runqueue lock
  * held and know whether or not it is OK to wake up the klogd.
  */
-int runqueue_is_locked(void)
+int runqueue_is_locked(int cpu)
 {
-	int cpu = get_cpu();
-	struct rq *rq = cpu_rq(cpu);
-	int ret;
-
-	ret = spin_is_locked(&rq->lock);
-	put_cpu();
-	return ret;
+	return spin_is_locked(&cpu_rq(cpu)->lock);
 }
 
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fd52a19dd172..420232a1fbba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -275,12 +275,18 @@ static DEFINE_SPINLOCK(tracing_start_lock);
  */
 void trace_wake_up(void)
 {
+	int cpu;
+
+	if (trace_flags & TRACE_ITER_BLOCK)
+		return;
 	/*
 	 * The runqueue_is_locked() can fail, but this is the best we
 	 * have for now:
 	 */
-	if (!(trace_flags & TRACE_ITER_BLOCK) && !runqueue_is_locked())
+	cpu = get_cpu();
+	if (!runqueue_is_locked(cpu))
 		wake_up(&trace_wait);
+	put_cpu();
 }
 
 static int __init set_buf_size(char *str)
-- 
cgit v1.2.3


From 42f29a25207dc7b3051d299cc028d4b395d1328d Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Sun, 20 Sep 2009 18:14:12 -0400
Subject: kbuild: Don't define ALIGN and ENTRY when preprocessing linker
 scripts.

Adding a reference to <linux/linkage.h> to x86's <asm/cache.h> causes
the x86 linker script to have syntax errors, because the ALIGN and
ENTRY keywords get redefined to the assembly implementations of those.
One could fix this by adjusting the include structure, but I think any
solution based on that approach would be fragile.

Currently, it is impossible when writing a header to do something
different for assembly files and linker scripts, even though there are
clearly cases where one wants them to define macros differently for
the two (ENTRY being an excellent example).
So I think the right solution here is to introduce a new preprocessor
definition, called LINKER_SCRIPT that is set along with __ASSEMBLY__
for linker scripts, and to use that to not define ALIGN and ENTRY in
linker scripts.
I suspect we'll find other uses for this mechanism in
the future.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/linkage.h | 2 ++
 scripts/Makefile.build  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 691f59171c6c..5126cceb6ae9 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -57,6 +57,7 @@
 
 #ifdef __ASSEMBLY__
 
+#ifndef LINKER_SCRIPT
 #define ALIGN __ALIGN
 #define ALIGN_STR __ALIGN_STR
 
@@ -66,6 +67,7 @@
   ALIGN; \
   name:
 #endif
+#endif /* LINKER_SCRIPT */
 
 #ifndef WEAK
 #define WEAK(name)	   \
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index d5425660a3df..341b58902ffc 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -271,7 +271,7 @@ targets += $(extra-y) $(MAKECMDGOALS) $(always)
 # ---------------------------------------------------------------------------
 quiet_cmd_cpp_lds_S = LDS     $@
       cmd_cpp_lds_S = $(CPP) $(cpp_flags) -P -C -U$(ARCH) \
-	                     -D__ASSEMBLY__ -o $@ $<
+	                     -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
 
 $(obj)/%.lds: $(src)/%.lds.S FORCE
 	$(call if_changed_dep,cpp_lds_S)
-- 
cgit v1.2.3


From 28d520433b6375740990ab99d69b0d0067fd656b Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Mon, 21 Sep 2009 14:33:58 +1000
Subject: drm/vgaarb: add VGA arbitration support to the drm and kms.

VGA arb requires DRM support for non-kms drivers, to turn on/off
irqs when disabling the mem/io regions.

VGA arb requires KMS support for GPUs where we can turn off VGA
decoding. Currently we know how to do this for intel and radeon
kms drivers, which allows them to be removed from the arbiter.

This patch comes from Fedora rawhide kernel.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_irq.c              | 27 +++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_dma.c        | 20 ++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h        |  1 +
 drivers/gpu/drm/i915/i915_reg.h        |  1 +
 drivers/gpu/drm/i915/intel_display.c   | 17 +++++++++++++++++
 drivers/gpu/drm/i915/intel_drv.h       |  1 +
 drivers/gpu/drm/radeon/r100.c          | 14 ++++++++++++++
 drivers/gpu/drm/radeon/r600.c          | 14 ++++++++++++++
 drivers/gpu/drm/radeon/r600d.h         |  1 +
 drivers/gpu/drm/radeon/radeon.h        |  2 ++
 drivers/gpu/drm/radeon/radeon_asic.h   | 12 ++++++++++++
 drivers/gpu/drm/radeon/radeon_device.c | 21 ++++++++++++++++++++-
 include/drm/drmP.h                     |  3 +++
 13 files changed, 133 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index f85aaf21e783..0a6f0b3bdc78 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -37,6 +37,7 @@
 
 #include <linux/interrupt.h>	/* For task queue support */
 
+#include <linux/vgaarb.h>
 /**
  * Get interrupt from bus id.
  *
@@ -171,6 +172,26 @@ err:
 }
 EXPORT_SYMBOL(drm_vblank_init);
 
+static void drm_irq_vgaarb_nokms(void *cookie, bool state)
+{
+	struct drm_device *dev = cookie;
+
+	if (dev->driver->vgaarb_irq) {
+		dev->driver->vgaarb_irq(dev, state);
+		return;
+	}
+
+	if (!dev->irq_enabled)
+		return;
+
+	if (state)
+		dev->driver->irq_uninstall(dev);
+	else {
+		dev->driver->irq_preinstall(dev);
+		dev->driver->irq_postinstall(dev);
+	}
+}
+
 /**
  * Install IRQ handler.
  *
@@ -231,6 +252,9 @@ int drm_irq_install(struct drm_device *dev)
 		return ret;
 	}
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		vga_client_register(dev->pdev, (void *)dev, drm_irq_vgaarb_nokms, NULL);
+
 	/* After installing handler */
 	ret = dev->driver->irq_postinstall(dev);
 	if (ret < 0) {
@@ -279,6 +303,9 @@ int drm_irq_uninstall(struct drm_device * dev)
 
 	DRM_DEBUG("irq=%d\n", dev->pdev->irq);
 
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		vga_client_register(dev->pdev, NULL, NULL, NULL);
+
 	dev->driver->irq_uninstall(dev);
 
 	free_irq(dev->pdev->irq, dev);
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9909505d070a..5a49a1867b35 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -33,6 +33,7 @@
 #include "intel_drv.h"
 #include "i915_drm.h"
 #include "i915_drv.h"
+#include <linux/vgaarb.h>
 
 /* Really want an OS-independent resettable timer.  Would like to have
  * this loop run for (eg) 3 sec, but have the timer reset every time
@@ -1012,6 +1013,19 @@ static int i915_probe_agp(struct drm_device *dev, uint32_t *aperture_size,
 	return 0;
 }
 
+/* true = enable decode, false = disable decoder */
+static unsigned int i915_vga_set_decode(void *cookie, bool state)
+{
+	struct drm_device *dev = cookie;
+
+	intel_modeset_vga_set_state(dev, state);
+	if (state)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
+
 static int i915_load_modeset_init(struct drm_device *dev,
 				  unsigned long prealloc_size,
 				  unsigned long agp_size)
@@ -1057,6 +1071,11 @@ static int i915_load_modeset_init(struct drm_device *dev,
 	if (ret)
 		DRM_INFO("failed to find VBIOS tables\n");
 
+	/* if we have > 1 VGA cards, then disable the radeon VGA resources */
+	ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
+	if (ret)
+		goto destroy_ringbuffer;
+
 	ret = drm_irq_install(dev);
 	if (ret)
 		goto destroy_ringbuffer;
@@ -1324,6 +1343,7 @@ int i915_driver_unload(struct drm_device *dev)
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		drm_irq_uninstall(dev);
+		vga_client_register(dev->pdev, NULL, NULL, NULL);
 	}
 
 	if (dev->pdev->msi_enabled)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 77ed060b4292..a0632f8e76ac 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -766,6 +766,7 @@ static inline void opregion_enable_asle(struct drm_device *dev) { return; }
 /* modesetting */
 extern void intel_modeset_init(struct drm_device *dev);
 extern void intel_modeset_cleanup(struct drm_device *dev);
+extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
 
 /**
  * Lock test for when it's just for synchronization of ring access.
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e38cd21161c8..3f7963553464 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -30,6 +30,7 @@
  * fb aperture size and the amount of pre-reserved memory.
  */
 #define INTEL_GMCH_CTRL		0x52
+#define INTEL_GMCH_VGA_DISABLE  (1 << 1)
 #define INTEL_GMCH_ENABLED	0x4
 #define INTEL_GMCH_MEM_MASK	0x1
 #define INTEL_GMCH_MEM_64M	0x1
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 155719ff99d1..0227b1652906 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -3917,3 +3917,20 @@ struct drm_encoder *intel_best_encoder(struct drm_connector *connector)
 
 	return &intel_output->enc;
 }
+
+/*
+ * set vga decode state - true == enable VGA decode
+ */
+int intel_modeset_vga_set_state(struct drm_device *dev, bool state)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u16 gmch_ctrl;
+
+	pci_read_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, &gmch_ctrl);
+	if (state)
+		gmch_ctrl &= ~INTEL_GMCH_VGA_DISABLE;
+	else
+		gmch_ctrl |= INTEL_GMCH_VGA_DISABLE;
+	pci_write_config_word(dev_priv->bridge_dev, INTEL_GMCH_CTRL, gmch_ctrl);
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b9e47f1e1cc0..3a0004f755d0 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -178,4 +178,5 @@ extern int intel_framebuffer_create(struct drm_device *dev,
 				    struct drm_mode_fb_cmd *mode_cmd,
 				    struct drm_framebuffer **fb,
 				    struct drm_gem_object *obj);
+
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 737970b43aef..be51c5f7d0f6 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1955,6 +1955,20 @@ void r100_vram_init_sizes(struct radeon_device *rdev)
 		rdev->mc.real_vram_size = rdev->mc.aper_size;
 }
 
+void r100_vga_set_state(struct radeon_device *rdev, bool state)
+{
+	uint32_t temp;
+
+	temp = RREG32(RADEON_CONFIG_CNTL);
+	if (state == false) {
+		temp &= ~(1<<8);
+		temp |= (1<<9);
+	} else {
+		temp &= ~(1<<9);
+	}
+	WREG32(RADEON_CONFIG_CNTL, temp);
+}
+
 void r100_vram_info(struct radeon_device *rdev)
 {
 	r100_vram_get_type(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 5f42fad19190..eab31c1d6df1 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1499,6 +1499,20 @@ int r600_startup(struct radeon_device *rdev)
 	return 0;
 }
 
+void r600_vga_set_state(struct radeon_device *rdev, bool state)
+{
+	uint32_t temp;
+
+	temp = RREG32(CONFIG_CNTL);
+	if (state == false) {
+		temp &= ~(1<<0);
+		temp |= (1<<1);
+	} else {
+		temp &= ~(1<<1);
+	}
+	WREG32(CONFIG_CNTL, temp);
+}
+
 int r600_resume(struct radeon_device *rdev)
 {
 	int r;
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 723295f59281..4a9028a85c9b 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -78,6 +78,7 @@
 #define CB_COLOR0_MASK                                  0x28100
 
 #define	CONFIG_MEMSIZE					0x5428
+#define CONFIG_CNTL					0x5424
 #define	CP_STAT						0x8680
 #define	CP_COHER_BASE					0x85F8
 #define	CP_DEBUG					0xC1FC
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 817af8ecff10..c839b608970f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -596,6 +596,7 @@ struct radeon_asic {
 	int (*suspend)(struct radeon_device *rdev);
 	void (*errata)(struct radeon_device *rdev);
 	void (*vram_info)(struct radeon_device *rdev);
+	void (*vga_set_state)(struct radeon_device *rdev, bool state);
 	int (*gpu_reset)(struct radeon_device *rdev);
 	int (*mc_init)(struct radeon_device *rdev);
 	void (*mc_fini)(struct radeon_device *rdev);
@@ -954,6 +955,7 @@ static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 #define radeon_cs_parse(p) rdev->asic->cs_parse((p))
 #define radeon_errata(rdev) (rdev)->asic->errata((rdev))
 #define radeon_vram_info(rdev) (rdev)->asic->vram_info((rdev))
+#define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_gpu_reset(rdev) (rdev)->asic->gpu_reset((rdev))
 #define radeon_mc_init(rdev) (rdev)->asic->mc_init((rdev))
 #define radeon_mc_fini(rdev) (rdev)->asic->mc_fini((rdev))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5f2a9e6f12c5..8968f78fa1e3 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -47,6 +47,7 @@ uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
 void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 void r100_errata(struct radeon_device *rdev);
 void r100_vram_info(struct radeon_device *rdev);
+void r100_vga_set_state(struct radeon_device *rdev, bool state);
 int r100_gpu_reset(struct radeon_device *rdev);
 int r100_mc_init(struct radeon_device *rdev);
 void r100_mc_fini(struct radeon_device *rdev);
@@ -89,6 +90,7 @@ static struct radeon_asic r100_asic = {
 	.init = &r100_init,
 	.errata = &r100_errata,
 	.vram_info = &r100_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r100_gpu_reset,
 	.mc_init = &r100_mc_init,
 	.mc_fini = &r100_mc_fini,
@@ -158,6 +160,7 @@ static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.errata = &r300_errata,
 	.vram_info = &r300_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &r300_mc_init,
 	.mc_fini = &r300_mc_fini,
@@ -208,6 +211,7 @@ static struct radeon_asic r420_asic = {
 	.resume = &r420_resume,
 	.errata = NULL,
 	.vram_info = NULL,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = NULL,
 	.mc_fini = NULL,
@@ -262,6 +266,7 @@ static struct radeon_asic rs400_asic = {
 	.init = &r300_init,
 	.errata = &rs400_errata,
 	.vram_info = &rs400_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs400_mc_init,
 	.mc_fini = &rs400_mc_fini,
@@ -323,6 +328,7 @@ static struct radeon_asic rs600_asic = {
 	.init = &rs600_init,
 	.errata = &rs600_errata,
 	.vram_info = &rs600_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs600_mc_init,
 	.mc_fini = &rs600_mc_fini,
@@ -372,6 +378,7 @@ static struct radeon_asic rs690_asic = {
 	.init = &rs600_init,
 	.errata = &rs690_errata,
 	.vram_info = &rs690_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &r300_gpu_reset,
 	.mc_init = &rs690_mc_init,
 	.mc_fini = &rs690_mc_fini,
@@ -428,6 +435,7 @@ static struct radeon_asic rv515_asic = {
 	.init = &rv515_init,
 	.errata = &rv515_errata,
 	.vram_info = &rv515_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &rv515_gpu_reset,
 	.mc_init = &rv515_mc_init,
 	.mc_fini = &rv515_mc_fini,
@@ -477,6 +485,7 @@ static struct radeon_asic r520_asic = {
 	.init = &rv515_init,
 	.errata = &r520_errata,
 	.vram_info = &r520_vram_info,
+	.vga_set_state = &r100_vga_set_state,
 	.gpu_reset = &rv515_gpu_reset,
 	.mc_init = &r520_mc_init,
 	.mc_fini = &r520_mc_fini,
@@ -520,6 +529,7 @@ int r600_init(struct radeon_device *rdev);
 void r600_fini(struct radeon_device *rdev);
 int r600_suspend(struct radeon_device *rdev);
 int r600_resume(struct radeon_device *rdev);
+void r600_vga_set_state(struct radeon_device *rdev, bool state);
 int r600_wb_init(struct radeon_device *rdev);
 void r600_wb_fini(struct radeon_device *rdev);
 void r600_cp_commit(struct radeon_device *rdev);
@@ -556,6 +566,7 @@ static struct radeon_asic r600_asic = {
 	.resume = &r600_resume,
 	.cp_commit = &r600_cp_commit,
 	.vram_info = NULL,
+	.vga_set_state = &r600_vga_set_state,
 	.gpu_reset = &r600_gpu_reset,
 	.mc_init = NULL,
 	.mc_fini = NULL,
@@ -606,6 +617,7 @@ static struct radeon_asic rv770_asic = {
 	.cp_commit = &r600_cp_commit,
 	.vram_info = NULL,
 	.gpu_reset = &rv770_gpu_reset,
+	.vga_set_state = &r600_vga_set_state,
 	.mc_init = NULL,
 	.mc_fini = NULL,
 	.wb_init = &r600_wb_init,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 8a40c616b534..daf5db780956 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -29,6 +29,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
+#include <linux/vgaarb.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -480,7 +481,18 @@ void radeon_combios_fini(struct radeon_device *rdev)
 {
 }
 
+/* if we get transitioned to only one device, tak VGA back */
+static unsigned int radeon_vga_set_decode(void *cookie, bool state)
+{
+	struct radeon_device *rdev = cookie;
 
+	radeon_vga_set_state(rdev, state);
+	if (state)
+		return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
+		       VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+	else
+		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+}
 /*
  * Radeon device.
  */
@@ -578,6 +590,13 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r) {
 		return r;
 	}
+
+	/* if we have > 1 VGA cards, then disable the radeon VGA resources */
+	r = vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
+	if (r) {
+		return -EINVAL;
+	}
+
 	if (!rdev->new_init_path) {
 		/* Setup errata flags */
 		radeon_errata(rdev);
@@ -586,7 +605,6 @@ int radeon_device_init(struct radeon_device *rdev,
 		/* Initialize surface registers */
 		radeon_surface_init(rdev);
 
-		/* TODO: disable VGA need to use VGA request */
 		/* BIOS*/
 		if (!radeon_get_bios(rdev)) {
 			if (ASIC_IS_AVIVO(rdev))
@@ -697,6 +715,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 		radeon_agp_fini(rdev);
 #endif
 		radeon_irq_kms_fini(rdev);
+		vga_client_register(rdev->pdev, NULL, NULL, NULL);
 		radeon_fence_driver_fini(rdev);
 		radeon_clocks_fini(rdev);
 		radeon_object_fini(rdev);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index eeefb6369e19..c8e64bbadbcf 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -810,6 +810,9 @@ struct drm_driver {
 	int (*gem_init_object) (struct drm_gem_object *obj);
 	void (*gem_free_object) (struct drm_gem_object *obj);
 
+	/* vga arb irq handler */
+	void (*vgaarb_irq)(struct drm_device *dev, bool state);
+
 	/* Driver private ops for this object */
 	struct vm_operations_struct *gem_vm_ops;
 
-- 
cgit v1.2.3


From cd74c86bdf705f824d494a2bbda393d1d562b40a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 21 Sep 2009 16:44:32 +1000
Subject: perf_counter, powerpc, sparc: Fix compilation after
 perf_counter_overflow() change

Commit 5622f295 ("x86, perf_counter, bts: Optimize BTS overflow
handling") removed the regs field from struct perf_sample_data and
added a regs parameter to perf_counter_overflow().  This breaks the
build on powerpc (and Sparc) as reported by Sachin Sant:

  arch/powerpc/kernel/perf_counter.c: In function 'record_and_restart':
  arch/powerpc/kernel/perf_counter.c:1165: error: unknown field 'regs' specified in initializer

This adjusts arch/powerpc/kernel/perf_counter.c to correspond with the
new struct perf_sample_data and perf_counter_overflow().

[ v2: also fix Sparc, Markus Metzger <markus.t.metzger@intel.com> ]

Reported-by: Sachin Sant <sachinp@in.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Markus Metzger <markus.t.metzger@intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: benh@kernel.crashing.org
Cc: linuxppc-dev@ozlabs.org
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <19127.8400.376239.586120@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_counter.c |  3 +--
 arch/sparc/kernel/perf_counter.c   |  3 +--
 include/linux/perf_counter.h       | 17 -----------------
 3 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 7ceefaf3a7f5..5ccf9bca96c0 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -1162,7 +1162,6 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val,
 	 */
 	if (record) {
 		struct perf_sample_data data = {
-			.regs	= regs,
 			.addr	= 0,
 			.period	= counter->hw.last_period,
 		};
@@ -1170,7 +1169,7 @@ static void record_and_restart(struct perf_counter *counter, unsigned long val,
 		if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 
-		if (perf_counter_overflow(counter, nmi, &data)) {
+		if (perf_counter_overflow(counter, nmi, &data, regs)) {
 			/*
 			 * Interrupts are coming too fast - throttle them
 			 * by setting the counter to 0, so it will be
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
index 09de4035eaa9..b1265ce8a053 100644
--- a/arch/sparc/kernel/perf_counter.c
+++ b/arch/sparc/kernel/perf_counter.c
@@ -493,7 +493,6 @@ static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
 
 	regs = args->regs;
 
-	data.regs = regs;
 	data.addr = 0;
 
 	cpuc = &__get_cpu_var(cpu_hw_counters);
@@ -513,7 +512,7 @@ static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
 		if (!sparc_perf_counter_set_period(counter, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
+		if (perf_counter_overflow(counter, 1, &data, regs))
 			sparc_pmu_disable_counter(hwc, idx);
 	}
 
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index bd341007c4fc..740caad09a44 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -849,23 +849,6 @@ static inline void perf_counter_comm(struct task_struct *tsk)		{ }
 static inline void perf_counter_fork(struct task_struct *tsk)		{ }
 static inline void perf_counter_init(void)				{ }
 
-static inline int
-perf_output_begin(struct perf_output_handle *handle, struct perf_counter *c,
-		  unsigned int size, int nmi, int sample)		{ }
-static inline void perf_output_end(struct perf_output_handle *handle)	{ }
-static inline void
-perf_output_copy(struct perf_output_handle *handle,
-		 const void *buf, unsigned int len)			{ }
-static inline void
-perf_output_sample(struct perf_output_handle *handle,
-		   struct perf_event_header *header,
-		   struct perf_sample_data *data,
-		   struct perf_counter *counter)			{ }
-static inline void
-perf_prepare_sample(struct perf_event_header *header,
-		    struct perf_sample_data *data,
-		    struct perf_counter *counter,
-		    struct pt_regs *regs)				{ }
 #endif
 
 #define perf_output_put(handle, x) \
-- 
cgit v1.2.3


From 0d721ceadbeaa24d7f9dd41b3e5e29912327a7e1 Mon Sep 17 00:00:00 2001
From: Peter Williams <pwil3058@bigpond.net.au>
Date: Mon, 21 Sep 2009 01:31:53 +0000
Subject: sched: Simplify sys_sched_rr_get_interval() system call

By removing the need for it to know details of scheduling classes.

This allows PlugSched to define orthogonal scheduling classes.

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <06d1b89ee15a0eef82d7.1253496713@mudlark.pw.nest>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  2 ++
 kernel/sched.c          | 17 +----------------
 kernel/sched_fair.c     | 21 +++++++++++++++++++++
 kernel/sched_idletask.c |  7 +++++++
 kernel/sched_rt.c       | 13 +++++++++++++
 5 files changed, 44 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cc37a3fa5065..239c8e0dba9f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1075,6 +1075,8 @@ struct sched_class {
 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 			     int oldprio, int running);
 
+	unsigned int (*get_rr_interval) (struct task_struct *task);
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	void (*moved_group) (struct task_struct *p);
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b900fb1c6e1..830967e18285 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6819,23 +6819,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	if (retval)
 		goto out_unlock;
 
-	/*
-	 * Time slice is 0 for SCHED_FIFO tasks and for SCHED_OTHER
-	 * tasks that are on an otherwise idle runqueue:
-	 */
-	time_slice = 0;
-	if (p->policy == SCHED_RR) {
-		time_slice = DEF_TIMESLICE;
-	} else if (p->policy != SCHED_FIFO) {
-		struct sched_entity *se = &p->se;
-		unsigned long flags;
-		struct rq *rq;
+	time_slice = p->sched_class->get_rr_interval(p);
 
-		rq = task_rq_lock(p, &flags);
-		if (rq->cfs.load.weight)
-			time_slice = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
-		task_rq_unlock(rq, &flags);
-	}
 	read_unlock(&tasklist_lock);
 	jiffies_to_timespec(time_slice, &t);
 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 566e3bb78ed9..cd73738f0d5f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1938,6 +1938,25 @@ static void moved_group_fair(struct task_struct *p)
 }
 #endif
 
+unsigned int get_rr_interval_fair(struct task_struct *task)
+{
+	struct sched_entity *se = &task->se;
+	unsigned long flags;
+	struct rq *rq;
+	unsigned int rr_interval = 0;
+
+	/*
+	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+	 * idle runqueue:
+	 */
+	rq = task_rq_lock(task, &flags);
+	if (rq->cfs.load.weight)
+		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+	task_rq_unlock(rq, &flags);
+
+	return rr_interval;
+}
+
 /*
  * All the scheduling class methods:
  */
@@ -1966,6 +1985,8 @@ static const struct sched_class fair_sched_class = {
 	.prio_changed		= prio_changed_fair,
 	.switched_to		= switched_to_fair,
 
+	.get_rr_interval	= get_rr_interval_fair,
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	.moved_group		= moved_group_fair,
 #endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a8b448af004b..b133a28fcde3 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -97,6 +97,11 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
 		check_preempt_curr(rq, p, 0);
 }
 
+unsigned int get_rr_interval_idle(struct task_struct *task)
+{
+	return 0;
+}
+
 /*
  * Simple, special scheduling class for the per-CPU idle tasks:
  */
@@ -122,6 +127,8 @@ static const struct sched_class idle_sched_class = {
 	.set_curr_task          = set_curr_task_idle,
 	.task_tick		= task_tick_idle,
 
+	.get_rr_interval	= get_rr_interval_idle,
+
 	.prio_changed		= prio_changed_idle,
 	.switched_to		= switched_to_idle,
 
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 13de7126a6ab..a4d790cddb19 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1734,6 +1734,17 @@ static void set_curr_task_rt(struct rq *rq)
 	dequeue_pushable_task(rq, p);
 }
 
+unsigned int get_rr_interval_rt(struct task_struct *task)
+{
+	/*
+	 * Time slice is 0 for SCHED_FIFO tasks
+	 */
+	if (task->policy == SCHED_RR)
+		return DEF_TIMESLICE;
+	else
+		return 0;
+}
+
 static const struct sched_class rt_sched_class = {
 	.next			= &fair_sched_class,
 	.enqueue_task		= enqueue_task_rt,
@@ -1762,6 +1773,8 @@ static const struct sched_class rt_sched_class = {
 	.set_curr_task          = set_curr_task_rt,
 	.task_tick		= task_tick_rt,
 
+	.get_rr_interval	= get_rr_interval_rt,
+
 	.prio_changed		= prio_changed_rt,
 	.switched_to		= switched_to_rt,
 };
-- 
cgit v1.2.3


From 65abc8653c282ded3dbdb9ec1227784140ba28cd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 10:18:27 +0200
Subject: perf_counter: Rename list_entry -> group_entry, counter_list ->
 group_list

This is in preparation of the big rename, but also makes sense
in a standalone way: 'list_entry' is a bad name as we already
have a list_entry() in list.h.

Also, the 'counter list' is too vague, it doesnt tell us the
purpose of that list.

Clarify these names to show that it's all about the group
hiearchy.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h |  4 +--
 kernel/perf_counter.c        | 71 ++++++++++++++++++++++----------------------
 2 files changed, 37 insertions(+), 38 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 740caad09a44..f64862732673 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -543,7 +543,7 @@ struct perf_pending_entry {
  */
 struct perf_counter {
 #ifdef CONFIG_PERF_COUNTERS
-	struct list_head		list_entry;
+	struct list_head		group_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
 	int				nr_siblings;
@@ -649,7 +649,7 @@ struct perf_counter_context {
 	 */
 	struct mutex			mutex;
 
-	struct list_head		counter_list;
+	struct list_head		group_list;
 	struct list_head		event_list;
 	int				nr_counters;
 	int				nr_active;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index cc768ab81ac8..13ad73aed4ca 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -258,9 +258,9 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * leader's sibling list:
 	 */
 	if (group_leader == counter)
-		list_add_tail(&counter->list_entry, &ctx->counter_list);
+		list_add_tail(&counter->group_entry, &ctx->group_list);
 	else {
-		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
+		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
 		group_leader->nr_siblings++;
 	}
 
@@ -279,13 +279,13 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
 	struct perf_counter *sibling, *tmp;
 
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		return;
 	ctx->nr_counters--;
 	if (counter->attr.inherit_stat)
 		ctx->nr_stat--;
 
-	list_del_init(&counter->list_entry);
+	list_del_init(&counter->group_entry);
 	list_del_rcu(&counter->event_entry);
 
 	if (counter->group_leader != counter)
@@ -296,10 +296,9 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 	 * upgrade the siblings to singleton counters by adding them
 	 * to the context list directly:
 	 */
-	list_for_each_entry_safe(sibling, tmp,
-				 &counter->sibling_list, list_entry) {
+	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
 
-		list_move_tail(&sibling->list_entry, &ctx->counter_list);
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
 		sibling->group_leader = sibling;
 	}
 }
@@ -343,7 +342,7 @@ group_sched_out(struct perf_counter *group_counter,
 	/*
 	 * Schedule out siblings (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
 		counter_sched_out(counter, cpuctx, ctx);
 
 	if (group_counter->attr.exclusive)
@@ -435,7 +434,7 @@ retry:
 	/*
 	 * If the context is active we need to retry the smp call.
 	 */
-	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
+	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -445,7 +444,7 @@ retry:
 	 * can remove the counter safely, if the call above did not
 	 * succeed.
 	 */
-	if (!list_empty(&counter->list_entry)) {
+	if (!list_empty(&counter->group_entry)) {
 		list_del_counter(counter, ctx);
 	}
 	spin_unlock_irq(&ctx->lock);
@@ -497,7 +496,7 @@ static void update_group_times(struct perf_counter *leader)
 	struct perf_counter *counter;
 
 	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		update_counter_times(counter);
 }
 
@@ -643,7 +642,7 @@ group_sched_in(struct perf_counter *group_counter,
 	/*
 	 * Schedule in siblings as one group (if any):
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
 			partial_group = counter;
 			goto group_error;
@@ -657,7 +656,7 @@ group_error:
 	 * Groups can be scheduled in as one unit only, so undo any
 	 * partial group before returning:
 	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
+	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
 		if (counter == partial_group)
 			break;
 		counter_sched_out(counter, cpuctx, ctx);
@@ -678,7 +677,7 @@ static int is_software_only_group(struct perf_counter *leader)
 	if (!is_software_counter(leader))
 		return 0;
 
-	list_for_each_entry(counter, &leader->sibling_list, list_entry)
+	list_for_each_entry(counter, &leader->sibling_list, group_entry)
 		if (!is_software_counter(counter))
 			return 0;
 
@@ -842,7 +841,7 @@ retry:
 	/*
 	 * we need to retry the smp call.
 	 */
-	if (ctx->is_active && list_empty(&counter->list_entry)) {
+	if (ctx->is_active && list_empty(&counter->group_entry)) {
 		spin_unlock_irq(&ctx->lock);
 		goto retry;
 	}
@@ -852,7 +851,7 @@ retry:
 	 * can add the counter safely, if it the call above did not
 	 * succeed.
 	 */
-	if (list_empty(&counter->list_entry))
+	if (list_empty(&counter->group_entry))
 		add_counter_to_ctx(counter, ctx);
 	spin_unlock_irq(&ctx->lock);
 }
@@ -872,7 +871,7 @@ static void __perf_counter_mark_enabled(struct perf_counter *counter,
 
 	counter->state = PERF_COUNTER_STATE_INACTIVE;
 	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, list_entry)
+	list_for_each_entry(sub, &counter->sibling_list, group_entry)
 		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
 			sub->tstamp_enabled =
 				ctx->time - sub->total_time_enabled;
@@ -1032,7 +1031,7 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 
 	perf_disable();
 	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		list_for_each_entry(counter, &ctx->group_list, group_entry) {
 			if (counter != counter->group_leader)
 				counter_sched_out(counter, cpuctx, ctx);
 			else
@@ -1252,7 +1251,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 	 * First go through the list and put on any pinned groups
 	 * in order to give them the best chance of going on.
 	 */
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state <= PERF_COUNTER_STATE_OFF ||
 		    !counter->attr.pinned)
 			continue;
@@ -1276,7 +1275,7 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 		}
 	}
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		/*
 		 * Ignore counters in OFF or ERROR state, and
 		 * ignore pinned counters since we did them already.
@@ -1369,7 +1368,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
 	u64 interrupts, freq;
 
 	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
 			continue;
 
@@ -1441,8 +1440,8 @@ static void rotate_ctx(struct perf_counter_context *ctx)
 	 * Rotate the first entry last (works just fine for group counters too):
 	 */
 	perf_disable();
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
-		list_move_tail(&counter->list_entry, &ctx->counter_list);
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
+		list_move_tail(&counter->group_entry, &ctx->group_list);
 		break;
 	}
 	perf_enable();
@@ -1498,7 +1497,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
 
 	spin_lock(&ctx->lock);
 
-	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+	list_for_each_entry(counter, &ctx->group_list, group_entry) {
 		if (!counter->attr.enable_on_exec)
 			continue;
 		counter->attr.enable_on_exec = 0;
@@ -1575,7 +1574,7 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
 	memset(ctx, 0, sizeof(*ctx));
 	spin_lock_init(&ctx->lock);
 	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->counter_list);
+	INIT_LIST_HEAD(&ctx->group_list);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
 	ctx->task = task;
@@ -1818,7 +1817,7 @@ static int perf_counter_read_group(struct perf_counter *counter,
 
 	size += err;
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		err = perf_counter_read_entry(sub, read_format,
 				buf + size);
 		if (err < 0)
@@ -1948,7 +1947,7 @@ static void perf_counter_for_each(struct perf_counter *counter,
 
 	perf_counter_for_each_child(counter, func);
 	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, list_entry)
+	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
 		perf_counter_for_each_child(counter, func);
 	mutex_unlock(&ctx->mutex);
 }
@@ -2832,7 +2831,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
 
 	perf_output_copy(handle, values, n * sizeof(u64));
 
-	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
 		n = 0;
 
 		if (sub != counter)
@@ -4118,7 +4117,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	mutex_init(&counter->child_mutex);
 	INIT_LIST_HEAD(&counter->child_list);
 
-	INIT_LIST_HEAD(&counter->list_entry);
+	INIT_LIST_HEAD(&counter->group_entry);
 	INIT_LIST_HEAD(&counter->event_entry);
 	INIT_LIST_HEAD(&counter->sibling_list);
 	init_waitqueue_head(&counter->waitq);
@@ -4544,7 +4543,7 @@ static int inherit_group(struct perf_counter *parent_counter,
 				 child, NULL, child_ctx);
 	if (IS_ERR(leader))
 		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
+	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
 		child_ctr = inherit_counter(sub, parent, parent_ctx,
 					    child, leader, child_ctx);
 		if (IS_ERR(child_ctr))
@@ -4670,8 +4669,8 @@ void perf_counter_exit_task(struct task_struct *child)
 	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
 
 again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
-				 list_entry)
+	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
+				 group_entry)
 		__perf_counter_exit_task(child_counter, child_ctx, child);
 
 	/*
@@ -4679,7 +4678,7 @@ again:
 	 * its siblings to the list, but we obtained 'tmp' before that which
 	 * will still point to the list head terminating the iteration.
 	 */
-	if (!list_empty(&child_ctx->counter_list))
+	if (!list_empty(&child_ctx->group_list))
 		goto again;
 
 	mutex_unlock(&child_ctx->mutex);
@@ -4701,7 +4700,7 @@ void perf_counter_free_task(struct task_struct *task)
 
 	mutex_lock(&ctx->mutex);
 again:
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) {
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
 		struct perf_counter *parent = counter->parent;
 
 		if (WARN_ON_ONCE(!parent))
@@ -4717,7 +4716,7 @@ again:
 		free_counter(counter);
 	}
 
-	if (!list_empty(&ctx->counter_list))
+	if (!list_empty(&ctx->group_list))
 		goto again;
 
 	mutex_unlock(&ctx->mutex);
@@ -4847,7 +4846,7 @@ static void __perf_counter_exit_cpu(void *info)
 	struct perf_counter_context *ctx = &cpuctx->ctx;
 	struct perf_counter *counter, *tmp;
 
-	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
+	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
 		__perf_counter_remove_from_context(counter);
 }
 static void perf_counter_exit_cpu(int cpu)
-- 
cgit v1.2.3


From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:02:48 +0200
Subject: perf: Do the big rename: Performance Counters -> Performance Events

Bye-bye Performance Counters, welcome Performance Events!

In the past few months the perfcounters subsystem has grown out its
initial role of counting hardware events, and has become (and is
becoming) a much broader generic event enumeration, reporting, logging,
monitoring, analysis facility.

Naming its core object 'perf_counter' and naming the subsystem
'perfcounters' has become more and more of a misnomer. With pending
code like hw-breakpoints support the 'counter' name is less and
less appropriate.

All in one, we've decided to rename the subsystem to 'performance
events' and to propagate this rename through all fields, variables
and API names. (in an ABI compatible fashion)

The word 'event' is also a bit shorter than 'counter' - which makes
it slightly more convenient to write/handle as well.

Thanks goes to Stephane Eranian who first observed this misnomer and
suggested a rename.

User-space tooling and ABI compatibility is not affected - this patch
should be function-invariant. (Also, defconfigs were not touched to
keep the size down.)

This patch has been generated via the following script:

  FILES=$(find * -type f | grep -vE 'oprofile|[^K]config')

  sed -i \
    -e 's/PERF_EVENT_/PERF_RECORD_/g' \
    -e 's/PERF_COUNTER/PERF_EVENT/g' \
    -e 's/perf_counter/perf_event/g' \
    -e 's/nb_counters/nb_events/g' \
    -e 's/swcounter/swevent/g' \
    -e 's/tpcounter_event/tp_event/g' \
    $FILES

  for N in $(find . -name perf_counter.[ch]); do
    M=$(echo $N | sed 's/perf_counter/perf_event/g')
    mv $N $M
  done

  FILES=$(find . -name perf_event.*)

  sed -i \
    -e 's/COUNTER_MASK/REG_MASK/g' \
    -e 's/COUNTER/EVENT/g' \
    -e 's/\<event\>/event_id/g' \
    -e 's/counter/event/g' \
    -e 's/Counter/Event/g' \
    $FILES

... to keep it as correct as possible. This script can also be
used by anyone who has pending perfcounters patches - it converts
a Linux kernel tree over to the new naming. We tried to time this
change to the point in time where the amount of pending patches
is the smallest: the end of the merge window.

Namespace clashes were fixed up in a preparatory patch - and some
stylistic fallout will be fixed up in a subsequent patch.

( NOTE: 'counters' are still the proper terminology when we deal
  with hardware registers - and these sed scripts are a bit
  over-eager in renaming them. I've undone some of that, but
  in case there's something left where 'counter' would be
  better than 'event' we can undo that on an individual basis
  instead of touching an otherwise nicely automated patch. )

Suggested-by: Stephane Eranian <eranian@google.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Paul Mackerras <paulus@samba.org>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <linux-arch@vger.kernel.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/arm/include/asm/unistd.h           |    2 +-
 arch/arm/kernel/calls.S                 |    2 +-
 arch/blackfin/include/asm/unistd.h      |    2 +-
 arch/blackfin/mach-common/entry.S       |    2 +-
 arch/frv/Kconfig                        |    2 +-
 arch/frv/include/asm/perf_counter.h     |   17 -
 arch/frv/include/asm/perf_event.h       |   17 +
 arch/frv/include/asm/unistd.h           |    2 +-
 arch/frv/kernel/entry.S                 |    2 +-
 arch/frv/lib/Makefile                   |    2 +-
 arch/frv/lib/perf_counter.c             |   19 -
 arch/frv/lib/perf_event.c               |   19 +
 arch/m68k/include/asm/unistd.h          |    2 +-
 arch/m68k/kernel/entry.S                |    2 +-
 arch/m68knommu/kernel/syscalltable.S    |    2 +-
 arch/microblaze/include/asm/unistd.h    |    2 +-
 arch/microblaze/kernel/syscall_table.S  |    2 +-
 arch/mips/include/asm/unistd.h          |    6 +-
 arch/mips/kernel/scall32-o32.S          |    2 +-
 arch/mips/kernel/scall64-64.S           |    2 +-
 arch/mips/kernel/scall64-n32.S          |    2 +-
 arch/mips/kernel/scall64-o32.S          |    2 +-
 arch/mn10300/include/asm/unistd.h       |    2 +-
 arch/mn10300/kernel/entry.S             |    2 +-
 arch/parisc/Kconfig                     |    2 +-
 arch/parisc/include/asm/perf_counter.h  |    7 -
 arch/parisc/include/asm/perf_event.h    |    7 +
 arch/parisc/include/asm/unistd.h        |    4 +-
 arch/parisc/kernel/syscall_table.S      |    2 +-
 arch/powerpc/Kconfig                    |    2 +-
 arch/powerpc/include/asm/hw_irq.h       |   22 +-
 arch/powerpc/include/asm/paca.h         |    2 +-
 arch/powerpc/include/asm/perf_counter.h |  110 -
 arch/powerpc/include/asm/perf_event.h   |  110 +
 arch/powerpc/include/asm/systbl.h       |    2 +-
 arch/powerpc/include/asm/unistd.h       |    2 +-
 arch/powerpc/kernel/Makefile            |    2 +-
 arch/powerpc/kernel/asm-offsets.c       |    2 +-
 arch/powerpc/kernel/entry_64.S          |    8 +-
 arch/powerpc/kernel/irq.c               |    8 +-
 arch/powerpc/kernel/mpc7450-pmu.c       |    2 +-
 arch/powerpc/kernel/perf_callchain.c    |    2 +-
 arch/powerpc/kernel/perf_counter.c      | 1315 --------
 arch/powerpc/kernel/perf_event.c        | 1315 ++++++++
 arch/powerpc/kernel/power4-pmu.c        |    2 +-
 arch/powerpc/kernel/power5+-pmu.c       |    2 +-
 arch/powerpc/kernel/power5-pmu.c        |    2 +-
 arch/powerpc/kernel/power6-pmu.c        |    2 +-
 arch/powerpc/kernel/power7-pmu.c        |    2 +-
 arch/powerpc/kernel/ppc970-pmu.c        |    2 +-
 arch/powerpc/kernel/time.c              |   30 +-
 arch/powerpc/mm/fault.c                 |    8 +-
 arch/powerpc/platforms/Kconfig.cputype  |    4 +-
 arch/s390/Kconfig                       |    2 +-
 arch/s390/include/asm/perf_counter.h    |   10 -
 arch/s390/include/asm/perf_event.h      |   10 +
 arch/s390/include/asm/unistd.h          |    2 +-
 arch/s390/kernel/compat_wrapper.S       |    8 +-
 arch/s390/kernel/syscalls.S             |    2 +-
 arch/s390/mm/fault.c                    |    8 +-
 arch/sh/Kconfig                         |    2 +-
 arch/sh/include/asm/perf_counter.h      |    9 -
 arch/sh/include/asm/perf_event.h        |    9 +
 arch/sh/include/asm/unistd_32.h         |    2 +-
 arch/sh/include/asm/unistd_64.h         |    2 +-
 arch/sh/kernel/syscalls_32.S            |    2 +-
 arch/sh/kernel/syscalls_64.S            |    2 +-
 arch/sh/mm/fault_32.c                   |    8 +-
 arch/sh/mm/tlbflush_64.c                |    8 +-
 arch/sparc/Kconfig                      |    4 +-
 arch/sparc/include/asm/perf_counter.h   |   14 -
 arch/sparc/include/asm/perf_event.h     |   14 +
 arch/sparc/include/asm/unistd.h         |    2 +-
 arch/sparc/kernel/Makefile              |    2 +-
 arch/sparc/kernel/nmi.c                 |    4 +-
 arch/sparc/kernel/pcr.c                 |   10 +-
 arch/sparc/kernel/perf_counter.c        |  556 ----
 arch/sparc/kernel/perf_event.c          |  556 ++++
 arch/sparc/kernel/systbls_32.S          |    2 +-
 arch/sparc/kernel/systbls_64.S          |    4 +-
 arch/x86/Kconfig                        |    2 +-
 arch/x86/ia32/ia32entry.S               |    2 +-
 arch/x86/include/asm/entry_arch.h       |    2 +-
 arch/x86/include/asm/perf_counter.h     |  108 -
 arch/x86/include/asm/perf_event.h       |  108 +
 arch/x86/include/asm/unistd_32.h        |    2 +-
 arch/x86/include/asm/unistd_64.h        |    4 +-
 arch/x86/kernel/apic/apic.c             |    6 +-
 arch/x86/kernel/cpu/Makefile            |    2 +-
 arch/x86/kernel/cpu/common.c            |    4 +-
 arch/x86/kernel/cpu/perf_counter.c      | 2298 --------------
 arch/x86/kernel/cpu/perf_event.c        | 2298 ++++++++++++++
 arch/x86/kernel/cpu/perfctr-watchdog.c  |    2 +-
 arch/x86/kernel/entry_64.S              |    2 +-
 arch/x86/kernel/irqinit.c               |    2 +-
 arch/x86/kernel/syscall_table_32.S      |    2 +-
 arch/x86/mm/fault.c                     |    8 +-
 arch/x86/oprofile/op_model_ppro.c       |    4 +-
 arch/x86/oprofile/op_x86_model.h        |    2 +-
 drivers/char/sysrq.c                    |    4 +-
 fs/exec.c                               |    6 +-
 include/asm-generic/unistd.h            |    4 +-
 include/linux/init_task.h               |   14 +-
 include/linux/perf_counter.h            |  858 ------
 include/linux/perf_event.h              |  858 ++++++
 include/linux/prctl.h                   |    4 +-
 include/linux/sched.h                   |   12 +-
 include/linux/syscalls.h                |    6 +-
 include/trace/ftrace.h                  |   10 +-
 init/Kconfig                            |    8 +-
 kernel/Makefile                         |    2 +-
 kernel/exit.c                           |    8 +-
 kernel/fork.c                           |    8 +-
 kernel/perf_counter.c                   | 5000 -------------------------------
 kernel/perf_event.c                     | 5000 +++++++++++++++++++++++++++++++
 kernel/sched.c                          |   14 +-
 kernel/sys.c                            |   10 +-
 kernel/sys_ni.c                         |    2 +-
 kernel/sysctl.c                         |   22 +-
 kernel/timer.c                          |    4 +-
 kernel/trace/trace_syscalls.c           |    6 +-
 mm/mmap.c                               |    6 +-
 mm/mprotect.c                           |    4 +-
 tools/perf/Makefile                     |    2 +-
 tools/perf/builtin-annotate.c           |   28 +-
 tools/perf/builtin-record.c             |   22 +-
 tools/perf/builtin-report.c             |   48 +-
 tools/perf/builtin-sched.c              |   20 +-
 tools/perf/builtin-stat.c               |   10 +-
 tools/perf/builtin-timechart.c          |   14 +-
 tools/perf/builtin-top.c                |   12 +-
 tools/perf/builtin-trace.c              |   22 +-
 tools/perf/design.txt                   |   58 +-
 tools/perf/perf.h                       |   12 +-
 tools/perf/util/event.h                 |    4 +-
 tools/perf/util/header.c                |    6 +-
 tools/perf/util/header.h                |    8 +-
 tools/perf/util/parse-events.c          |   32 +-
 tools/perf/util/parse-events.h          |    2 +-
 tools/perf/util/trace-event-info.c      |    8 +-
 tools/perf/util/trace-event.h           |    2 +-
 141 files changed, 10694 insertions(+), 10694 deletions(-)
 delete mode 100644 arch/frv/include/asm/perf_counter.h
 create mode 100644 arch/frv/include/asm/perf_event.h
 delete mode 100644 arch/frv/lib/perf_counter.c
 create mode 100644 arch/frv/lib/perf_event.c
 delete mode 100644 arch/parisc/include/asm/perf_counter.h
 create mode 100644 arch/parisc/include/asm/perf_event.h
 delete mode 100644 arch/powerpc/include/asm/perf_counter.h
 create mode 100644 arch/powerpc/include/asm/perf_event.h
 delete mode 100644 arch/powerpc/kernel/perf_counter.c
 create mode 100644 arch/powerpc/kernel/perf_event.c
 delete mode 100644 arch/s390/include/asm/perf_counter.h
 create mode 100644 arch/s390/include/asm/perf_event.h
 delete mode 100644 arch/sh/include/asm/perf_counter.h
 create mode 100644 arch/sh/include/asm/perf_event.h
 delete mode 100644 arch/sparc/include/asm/perf_counter.h
 create mode 100644 arch/sparc/include/asm/perf_event.h
 delete mode 100644 arch/sparc/kernel/perf_counter.c
 create mode 100644 arch/sparc/kernel/perf_event.c
 delete mode 100644 arch/x86/include/asm/perf_counter.h
 create mode 100644 arch/x86/include/asm/perf_event.h
 delete mode 100644 arch/x86/kernel/cpu/perf_counter.c
 create mode 100644 arch/x86/kernel/cpu/perf_event.c
 delete mode 100644 include/linux/perf_counter.h
 create mode 100644 include/linux/perf_event.h
 delete mode 100644 kernel/perf_counter.c
 create mode 100644 kernel/perf_event.c

(limited to 'include')

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 9122c9ee18fb..89f7eade20af 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -390,7 +390,7 @@
 #define __NR_preadv			(__NR_SYSCALL_BASE+361)
 #define __NR_pwritev			(__NR_SYSCALL_BASE+362)
 #define __NR_rt_tgsigqueueinfo		(__NR_SYSCALL_BASE+363)
-#define __NR_perf_counter_open		(__NR_SYSCALL_BASE+364)
+#define __NR_perf_event_open		(__NR_SYSCALL_BASE+364)
 
 /*
  * The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index ecfa98954d1d..fafce1b5c69f 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -373,7 +373,7 @@
 		CALL(sys_preadv)
 		CALL(sys_pwritev)
 		CALL(sys_rt_tgsigqueueinfo)
-		CALL(sys_perf_counter_open)
+		CALL(sys_perf_event_open)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index c8e7ee4768cd..02b1529dad57 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		366
 #define __NR_pwritev		367
 #define __NR_rt_tgsigqueueinfo	368
-#define __NR_perf_counter_open	369
+#define __NR_perf_event_open	369
 
 #define __NR_syscall		370
 #define NR_syscalls		__NR_syscall
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 01af24cde362..1e7cac23e25f 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1620,7 +1620,7 @@ ENTRY(_sys_call_table)
 	.long _sys_preadv
 	.long _sys_pwritev
 	.long _sys_rt_tgsigqueueinfo
-	.long _sys_perf_counter_open
+	.long _sys_perf_event_open
 
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index b86e19c9b5b0..4b5830bcbe2e 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -7,7 +7,7 @@ config FRV
 	default y
 	select HAVE_IDE
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ZONE_DMA
 	bool
diff --git a/arch/frv/include/asm/perf_counter.h b/arch/frv/include/asm/perf_counter.h
deleted file mode 100644
index ccf726e61b2e..000000000000
--- a/arch/frv/include/asm/perf_counter.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* FRV performance counter support
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_PERF_COUNTER_H
-#define _ASM_PERF_COUNTER_H
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* _ASM_PERF_COUNTER_H */
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
new file mode 100644
index 000000000000..a69e0155d146
--- /dev/null
+++ b/arch/frv/include/asm/perf_event.h
@@ -0,0 +1,17 @@
+/* FRV performance event support
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_PERF_EVENT_H
+#define _ASM_PERF_EVENT_H
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 4a8fb427ce0a..be6ef0f5cd42 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -342,7 +342,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index fde1e446b440..189397ec012a 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1525,6 +1525,6 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 syscall_table_size = (. - sys_call_table)
diff --git a/arch/frv/lib/Makefile b/arch/frv/lib/Makefile
index 0a377210c89b..f4709756d0d9 100644
--- a/arch/frv/lib/Makefile
+++ b/arch/frv/lib/Makefile
@@ -5,4 +5,4 @@
 lib-y := \
 	__ashldi3.o __lshrdi3.o __muldi3.o __ashrdi3.o __negdi2.o __ucmpdi2.o \
 	checksum.o memcpy.o memset.o atomic-ops.o atomic64-ops.o \
-	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_counter.o
+	outsl_ns.o outsl_sw.o insl_ns.o insl_sw.o cache.o perf_event.o
diff --git a/arch/frv/lib/perf_counter.c b/arch/frv/lib/perf_counter.c
deleted file mode 100644
index 2000feecd571..000000000000
--- a/arch/frv/lib/perf_counter.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/* Performance counter handling
- *
- * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#include <linux/perf_counter.h>
-
-/*
- * mark the performance counter as pending
- */
-void set_perf_counter_pending(void)
-{
-}
diff --git a/arch/frv/lib/perf_event.c b/arch/frv/lib/perf_event.c
new file mode 100644
index 000000000000..9ac5acfd2e91
--- /dev/null
+++ b/arch/frv/lib/perf_event.c
@@ -0,0 +1,19 @@
+/* Performance event handling
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/perf_event.h>
+
+/*
+ * mark the performance event as pending
+ */
+void set_perf_event_pending(void)
+{
+}
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 946d8691f2b0..48b87f5ced50 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -335,7 +335,7 @@
 #define __NR_preadv		329
 #define __NR_pwritev		330
 #define __NR_rt_tgsigqueueinfo	331
-#define __NR_perf_counter_open	332
+#define __NR_perf_event_open	332
 
 #ifdef __KERNEL__
 
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 922f52e7ed1a..c5b33634c980 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -756,5 +756,5 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 0ae123e08985..23535cc415ae 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -350,7 +350,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 330 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 0b852327c0e7..cb05a07e55e9 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -381,7 +381,7 @@
 #define __NR_preadv		363 /* new */
 #define __NR_pwritev		364 /* new */
 #define __NR_rt_tgsigqueueinfo	365 /* new */
-#define __NR_perf_counter_open	366 /* new */
+#define __NR_perf_event_open	366 /* new */
 
 #define __NR_syscalls		367
 
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 457216097dfd..ecec19155135 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -370,4 +370,4 @@ ENTRY(sys_call_table)
 	.long sys_ni_syscall
 	.long sys_ni_syscall
 	.long sys_rt_tgsigqueueinfo	/* 365 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index e753a777949b..8c9dfa9e9018 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -353,7 +353,7 @@
 #define __NR_preadv			(__NR_Linux + 330)
 #define __NR_pwritev			(__NR_Linux + 331)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 332)
-#define __NR_perf_counter_open		(__NR_Linux + 333)
+#define __NR_perf_event_open		(__NR_Linux + 333)
 #define __NR_accept4			(__NR_Linux + 334)
 
 /*
@@ -664,7 +664,7 @@
 #define __NR_preadv			(__NR_Linux + 289)
 #define __NR_pwritev			(__NR_Linux + 290)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 291)
-#define __NR_perf_counter_open		(__NR_Linux + 292)
+#define __NR_perf_event_open		(__NR_Linux + 292)
 #define __NR_accept4			(__NR_Linux + 293)
 
 /*
@@ -979,7 +979,7 @@
 #define __NR_preadv			(__NR_Linux + 293)
 #define __NR_pwritev			(__NR_Linux + 294)
 #define __NR_rt_tgsigqueueinfo		(__NR_Linux + 295)
-#define __NR_perf_counter_open		(__NR_Linux + 296)
+#define __NR_perf_event_open		(__NR_Linux + 296)
 #define __NR_accept4			(__NR_Linux + 297)
 
 /*
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 7c2de4f091c4..fd2a9bb620d6 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -581,7 +581,7 @@ einval:	li	v0, -ENOSYS
 	sys	sys_preadv		6	/* 4330 */
 	sys	sys_pwritev		6
 	sys	sys_rt_tgsigqueueinfo	4
-	sys	sys_perf_counter_open	5
+	sys	sys_perf_event_open	5
 	sys	sys_accept4		4
 	.endm
 
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index b97b993846d6..18bf7f32c5e4 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -418,6 +418,6 @@ sys_call_table:
 	PTR	sys_preadv
 	PTR	sys_pwritev			/* 5390 */
 	PTR	sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 1a6ae124635b..6ebc07976694 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -416,6 +416,6 @@ EXPORT(sysn32_call_table)
 	PTR	sys_preadv
 	PTR	sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo	/* 5295 */
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index cd31087a651f..9bbf9775e0bd 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -536,6 +536,6 @@ sys_call_table:
 	PTR	compat_sys_preadv		/* 4330 */
 	PTR	compat_sys_pwritev
 	PTR	compat_sys_rt_tgsigqueueinfo
-	PTR	sys_perf_counter_open
+	PTR	sys_perf_event_open
 	PTR	sys_accept4
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index fad68616af32..2a983931c11f 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -347,7 +347,7 @@
 #define __NR_preadv		334
 #define __NR_pwritev		335
 #define __NR_rt_tgsigqueueinfo	336
-#define __NR_perf_counter_open	337
+#define __NR_perf_event_open	337
 
 #ifdef __KERNEL__
 
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index e0d2563af4f2..a94e7ea3faa6 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -723,7 +723,7 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev		/* 335 */
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
 
 
 nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 06f8d5b5b0f9..f388dc68f605 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,7 +16,7 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select BUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select GENERIC_ATOMIC64 if !64BIT
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/perf_counter.h b/arch/parisc/include/asm/perf_counter.h
deleted file mode 100644
index dc9e829f7013..000000000000
--- a/arch/parisc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_PARISC_PERF_COUNTER_H
-#define __ASM_PARISC_PERF_COUNTER_H
-
-/* parisc only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) { }
-
-#endif /* __ASM_PARISC_PERF_COUNTER_H */
diff --git a/arch/parisc/include/asm/perf_event.h b/arch/parisc/include/asm/perf_event.h
new file mode 100644
index 000000000000..cc146427d8f9
--- /dev/null
+++ b/arch/parisc/include/asm/perf_event.h
@@ -0,0 +1,7 @@
+#ifndef __ASM_PARISC_PERF_EVENT_H
+#define __ASM_PARISC_PERF_EVENT_H
+
+/* parisc only supports software events through this interface. */
+static inline void set_perf_event_pending(void) { }
+
+#endif /* __ASM_PARISC_PERF_EVENT_H */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index f3d3b8b012c4..cda158318c62 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -810,9 +810,9 @@
 #define __NR_preadv		(__NR_Linux + 315)
 #define __NR_pwritev		(__NR_Linux + 316)
 #define __NR_rt_tgsigqueueinfo	(__NR_Linux + 317)
-#define __NR_perf_counter_open	(__NR_Linux + 318)
+#define __NR_perf_event_open	(__NR_Linux + 318)
 
-#define __NR_Linux_syscalls	(__NR_perf_counter_open + 1)
+#define __NR_Linux_syscalls	(__NR_perf_event_open + 1)
 
 
 #define __IGNORE_select		/* newselect */
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index cf145eb026b3..843f423dec67 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -416,7 +416,7 @@
 	ENTRY_COMP(preadv)		/* 315 */
 	ENTRY_COMP(pwritev)
 	ENTRY_COMP(rt_tgsigqueueinfo)
-	ENTRY_SAME(perf_counter_open)
+	ENTRY_SAME(perf_event_open)
 
 	/* Nothing yet */
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8250902265c6..4fd479059d65 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -129,7 +129,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index e73d554538dd..abbc2aaaced5 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -135,43 +135,43 @@ static inline int irqs_disabled_flags(unsigned long flags)
  */
 struct irq_chip;
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
+		: "i" (offsetof(struct paca_struct, perf_event_pending)));
 	return x;
 }
 
-static inline void set_perf_counter_pending(void)
+static inline void set_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 
-static inline void clear_perf_counter_pending(void)
+static inline void clear_perf_event_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_counter_pending)));
+		"i" (offsetof(struct paca_struct, perf_event_pending)));
 }
 #endif /* CONFIG_PPC64 */
 
-#else  /* CONFIG_PERF_COUNTERS */
+#else  /* CONFIG_PERF_EVENTS */
 
-static inline unsigned long test_perf_counter_pending(void)
+static inline unsigned long test_perf_event_pending(void)
 {
 	return 0;
 }
 
-static inline void clear_perf_counter_pending(void) {}
-#endif /* CONFIG_PERF_COUNTERS */
+static inline void clear_perf_event_pending(void) {}
+#endif /* CONFIG_PERF_EVENTS */
 
 #endif	/* __KERNEL__ */
 #endif	/* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index b634456ea893..154f405b642f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h
deleted file mode 100644
index 0ea0639fcf75..000000000000
--- a/arch/powerpc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Performance counter support - PowerPC-specific definitions.
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/types.h>
-
-#include <asm/hw_irq.h>
-
-#define MAX_HWCOUNTERS		8
-#define MAX_EVENT_ALTERNATIVES	8
-#define MAX_LIMITED_HWCOUNTERS	2
-
-/*
- * This struct provides the constants and functions needed to
- * describe the PMU on a particular POWER-family CPU.
- */
-struct power_pmu {
-	const char	*name;
-	int		n_counter;
-	int		max_alternatives;
-	unsigned long	add_fields;
-	unsigned long	test_adder;
-	int		(*compute_mmcr)(u64 events[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[]);
-	int		(*get_constraint)(u64 event, unsigned long *mskp,
-				unsigned long *valp);
-	int		(*get_alternatives)(u64 event, unsigned int flags,
-				u64 alt[]);
-	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
-	int		(*limited_pmc_event)(u64 event);
-	u32		flags;
-	int		n_generic;
-	int		*generic_events;
-	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
-			       [PERF_COUNT_HW_CACHE_OP_MAX]
-			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
-};
-
-/*
- * Values for power_pmu.flags
- */
-#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
-#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
-
-/*
- * Values for flags to get_alternatives()
- */
-#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
-#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
-#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
-
-extern int register_power_pmu(struct power_pmu *);
-
-struct pt_regs;
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-
-#define PERF_COUNTER_INDEX_OFFSET	1
-
-/*
- * Only override the default definitions in include/linux/perf_counter.h
- * if we have hardware PMU support.
- */
-#ifdef CONFIG_PPC_PERF_CTRS
-#define perf_misc_flags(regs)	perf_misc_flags(regs)
-#endif
-
-/*
- * The power_pmu.get_constraint function returns a 32/64-bit value and
- * a 32/64-bit mask that express the constraints between this event and
- * other events.
- *
- * The value and mask are divided up into (non-overlapping) bitfields
- * of three different types:
- *
- * Select field: this expresses the constraint that some set of bits
- * in MMCR* needs to be set to a specific value for this event.  For a
- * select field, the mask contains 1s in every bit of the field, and
- * the value contains a unique value for each possible setting of the
- * MMCR* bits.  The constraint checking code will ensure that two events
- * that set the same field in their masks have the same value in their
- * value dwords.
- *
- * Add field: this expresses the constraint that there can be at most
- * N events in a particular class.  A field of k bits can be used for
- * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
- * set (and the other bits 0), and the value has only the least significant
- * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
- * in the struct power_pmu for this processor come into play.  The
- * add_fields value contains 1 in the LSB of the field, and the
- * test_adder contains 2^(k-1) - 1 - N in the field.
- *
- * NAND field: this expresses the constraint that you may not have events
- * in all of a set of classes.  (For example, on PPC970, you can't select
- * events from the FPU, ISU and IDU simultaneously, although any two are
- * possible.)  For N classes, the field is N+1 bits wide, and each class
- * is assigned one bit from the least-significant N bits.  The mask has
- * only the most-significant bit set, and the value has only the bit
- * for the event's class set.  The test_adder has the least significant
- * bit set in the field.
- *
- * If an event is not subject to the constraint expressed by a particular
- * field, then it will have 0 in both the mask and value for that field.
- */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
new file mode 100644
index 000000000000..2499aaadaeb9
--- /dev/null
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -0,0 +1,110 @@
+/*
+ * Performance event support - PowerPC-specific definitions.
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+
+#include <asm/hw_irq.h>
+
+#define MAX_HWEVENTS		8
+#define MAX_EVENT_ALTERNATIVES	8
+#define MAX_LIMITED_HWEVENTS	2
+
+/*
+ * This struct provides the constants and functions needed to
+ * describe the PMU on a particular POWER-family CPU.
+ */
+struct power_pmu {
+	const char	*name;
+	int		n_event;
+	int		max_alternatives;
+	unsigned long	add_fields;
+	unsigned long	test_adder;
+	int		(*compute_mmcr)(u64 events[], int n_ev,
+				unsigned int hwc[], unsigned long mmcr[]);
+	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
+				unsigned long *valp);
+	int		(*get_alternatives)(u64 event_id, unsigned int flags,
+				u64 alt[]);
+	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
+	int		(*limited_pmc_event)(u64 event_id);
+	u32		flags;
+	int		n_generic;
+	int		*generic_events;
+	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+			       [PERF_COUNT_HW_CACHE_OP_MAX]
+			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
+};
+
+/*
+ * Values for power_pmu.flags
+ */
+#define PPMU_LIMITED_PMC5_6	1	/* PMC5/6 have limited function */
+#define PPMU_ALT_SIPR		2	/* uses alternate posn for SIPR/HV */
+
+/*
+ * Values for flags to get_alternatives()
+ */
+#define PPMU_LIMITED_PMC_OK	1	/* can put this on a limited PMC */
+#define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
+#define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
+
+extern int register_power_pmu(struct power_pmu *);
+
+struct pt_regs;
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+
+#define PERF_EVENT_INDEX_OFFSET	1
+
+/*
+ * Only override the default definitions in include/linux/perf_event.h
+ * if we have hardware PMU support.
+ */
+#ifdef CONFIG_PPC_PERF_CTRS
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
+#endif
+
+/*
+ * The power_pmu.get_constraint function returns a 32/64-bit value and
+ * a 32/64-bit mask that express the constraints between this event_id and
+ * other events.
+ *
+ * The value and mask are divided up into (non-overlapping) bitfields
+ * of three different types:
+ *
+ * Select field: this expresses the constraint that some set of bits
+ * in MMCR* needs to be set to a specific value for this event_id.  For a
+ * select field, the mask contains 1s in every bit of the field, and
+ * the value contains a unique value for each possible setting of the
+ * MMCR* bits.  The constraint checking code will ensure that two events
+ * that set the same field in their masks have the same value in their
+ * value dwords.
+ *
+ * Add field: this expresses the constraint that there can be at most
+ * N events in a particular class.  A field of k bits can be used for
+ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
+ * set (and the other bits 0), and the value has only the least significant
+ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
+ * in the struct power_pmu for this processor come into play.  The
+ * add_fields value contains 1 in the LSB of the field, and the
+ * test_adder contains 2^(k-1) - 1 - N in the field.
+ *
+ * NAND field: this expresses the constraint that you may not have events
+ * in all of a set of classes.  (For example, on PPC970, you can't select
+ * events from the FPU, ISU and IDU simultaneously, although any two are
+ * possible.)  For N classes, the field is N+1 bits wide, and each class
+ * is assigned one bit from the least-significant N bits.  The mask has
+ * only the most-significant bit set, and the value has only the bit
+ * for the event_id's class set.  The test_adder has the least significant
+ * bit set in the field.
+ *
+ * If an event_id is not subject to the constraint expressed by a particular
+ * field, then it will have 0 in both the mask and value for that field.
+ */
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index ed24bd92fe49..c7d671a7d9a1 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -322,7 +322,7 @@ SYSCALL_SPU(epoll_create1)
 SYSCALL_SPU(dup3)
 SYSCALL_SPU(pipe2)
 SYSCALL(inotify_init1)
-SYSCALL_SPU(perf_counter_open)
+SYSCALL_SPU(perf_event_open)
 COMPAT_SYS_SPU(preadv)
 COMPAT_SYS_SPU(pwritev)
 COMPAT_SYS(rt_tgsigqueueinfo)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index cef080bfc607..f6ca76176766 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -341,7 +341,7 @@
 #define __NR_dup3		316
 #define __NR_pipe2		317
 #define __NR_inotify_init1	318
-#define __NR_perf_counter_open	319
+#define __NR_perf_event_open	319
 #define __NR_preadv		320
 #define __NR_pwritev		321
 #define __NR_rt_tgsigqueueinfo	322
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 569f79ccd310..b23664a0b86c 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
 
 obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
-obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_counter.o perf_callchain.o
+obj-$(CONFIG_PPC_PERF_CTRS)	+= perf_event.o perf_callchain.o
 obj64-$(CONFIG_PPC_PERF_CTRS)	+= power4-pmu.o ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f0df285f0f87..0812b0f414bb 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -133,7 +133,7 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
-	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
+	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending));
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 66bcda34a6bb..900e0eea0099 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -556,14 +556,14 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 2:
 	TRACE_AND_RESTORE_IRQ(r5);
 
-#ifdef CONFIG_PERF_COUNTERS
-	/* check paca->perf_counter_pending if we're enabling ints */
+#ifdef CONFIG_PERF_EVENTS
+	/* check paca->perf_event_pending if we're enabling ints */
 	lbz	r3,PACAPERFPEND(r13)
 	and.	r3,r3,r5
 	beq	27f
-	bl	.perf_counter_do_pending
+	bl	.perf_event_do_pending
 27:
-#endif /* CONFIG_PERF_COUNTERS */
+#endif /* CONFIG_PERF_EVENTS */
 
 	/* extract EE bit and use it to restore paca->hard_enabled */
 	ld	r3,_MSR(r1)
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index f7f376ea7b17..e5d121177984 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -53,7 +53,7 @@
 #include <linux/bootmem.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -138,9 +138,9 @@ notrace void raw_local_irq_restore(unsigned long en)
 	}
 #endif /* CONFIG_PPC_STD_MMU_64 */
 
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 
 	/*
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c
index cc466d039af6..09d72028f317 100644
--- a/arch/powerpc/kernel/mpc7450-pmu.c
+++ b/arch/powerpc/kernel/mpc7450-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index f74b62c67511..0a03cf70d247 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -10,7 +10,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/percpu.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
deleted file mode 100644
index 5ccf9bca96c0..000000000000
--- a/arch/powerpc/kernel/perf_counter.c
+++ /dev/null
@@ -1,1315 +0,0 @@
-/*
- * Performance counter support - powerpc architecture code
- *
- * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/perf_counter.h>
-#include <linux/percpu.h>
-#include <linux/hardirq.h>
-#include <asm/reg.h>
-#include <asm/pmc.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-#include <asm/ptrace.h>
-
-struct cpu_hw_counters {
-	int n_counters;
-	int n_percpu;
-	int disabled;
-	int n_added;
-	int n_limited;
-	u8  pmcs_enabled;
-	struct perf_counter *counter[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int flags[MAX_HWCOUNTERS];
-	unsigned long mmcr[3];
-	struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS];
-	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
-	u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-	unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
-
-struct power_pmu *ppmu;
-
-/*
- * Normally, to ignore kernel events we set the FCS (freeze counters
- * in supervisor mode) bit in MMCR0, but if the kernel runs with the
- * hypervisor bit set in the MSR, or if we are running on a processor
- * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
- * then we need to use the FCHV bit to ignore kernel events.
- */
-static unsigned int freeze_counters_kernel = MMCR0_FCS;
-
-/*
- * 32-bit doesn't have MMCRA but does have an MMCR2,
- * and a few other names are different.
- */
-#ifdef CONFIG_PPC32
-
-#define MMCR0_FCHV		0
-#define MMCR0_PMCjCE		MMCR0_PMCnCE
-
-#define SPRN_MMCRA		SPRN_MMCR2
-#define MMCRA_SAMPLE_ENABLE	0
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	return 0;
-}
-static inline void perf_read_regs(struct pt_regs *regs) { }
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PPC32 */
-
-/*
- * Things that are specific to 64-bit implementations.
- */
-#ifdef CONFIG_PPC64
-
-static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
-		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
-		if (slot > 1)
-			return 4 * (slot - 1);
-	}
-	return 0;
-}
-
-/*
- * The user wants a data address recorded.
- * If we're not doing instruction sampling, give them the SDAR
- * (sampled data address).  If we are doing instruction sampling, then
- * only give them the SDAR if it corresponds to the instruction
- * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
- * bit in MMCRA.
- */
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
-{
-	unsigned long mmcra = regs->dsisr;
-	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
-		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
-
-	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
-		*addrp = mfspr(SPRN_SDAR);
-}
-
-static inline u32 perf_get_misc_flags(struct pt_regs *regs)
-{
-	unsigned long mmcra = regs->dsisr;
-
-	if (TRAP(regs) != 0xf00)
-		return 0;	/* not a PMU interrupt */
-
-	if (ppmu->flags & PPMU_ALT_SIPR) {
-		if (mmcra & POWER6_MMCRA_SIHV)
-			return PERF_EVENT_MISC_HYPERVISOR;
-		return (mmcra & POWER6_MMCRA_SIPR) ?
-			PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL;
-	}
-	if (mmcra & MMCRA_SIHV)
-		return PERF_EVENT_MISC_HYPERVISOR;
-	return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Overload regs->dsisr to store MMCRA so we only need to read it once
- * on each interrupt.
- */
-static inline void perf_read_regs(struct pt_regs *regs)
-{
-	regs->dsisr = mfspr(SPRN_MMCRA);
-}
-
-/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-	return !regs->softe;
-}
-
-#endif /* CONFIG_PPC64 */
-
-static void perf_counter_interrupt(struct pt_regs *regs);
-
-void perf_counter_print_debug(void)
-{
-}
-
-/*
- * Read one performance monitor counter (PMC).
- */
-static unsigned long read_pmc(int idx)
-{
-	unsigned long val;
-
-	switch (idx) {
-	case 1:
-		val = mfspr(SPRN_PMC1);
-		break;
-	case 2:
-		val = mfspr(SPRN_PMC2);
-		break;
-	case 3:
-		val = mfspr(SPRN_PMC3);
-		break;
-	case 4:
-		val = mfspr(SPRN_PMC4);
-		break;
-	case 5:
-		val = mfspr(SPRN_PMC5);
-		break;
-	case 6:
-		val = mfspr(SPRN_PMC6);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		val = mfspr(SPRN_PMC7);
-		break;
-	case 8:
-		val = mfspr(SPRN_PMC8);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
-		val = 0;
-	}
-	return val;
-}
-
-/*
- * Write one PMC.
- */
-static void write_pmc(int idx, unsigned long val)
-{
-	switch (idx) {
-	case 1:
-		mtspr(SPRN_PMC1, val);
-		break;
-	case 2:
-		mtspr(SPRN_PMC2, val);
-		break;
-	case 3:
-		mtspr(SPRN_PMC3, val);
-		break;
-	case 4:
-		mtspr(SPRN_PMC4, val);
-		break;
-	case 5:
-		mtspr(SPRN_PMC5, val);
-		break;
-	case 6:
-		mtspr(SPRN_PMC6, val);
-		break;
-#ifdef CONFIG_PPC64
-	case 7:
-		mtspr(SPRN_PMC7, val);
-		break;
-	case 8:
-		mtspr(SPRN_PMC8, val);
-		break;
-#endif /* CONFIG_PPC64 */
-	default:
-		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
-	}
-}
-
-/*
- * Check if a set of events can all go on the PMU at once.
- * If they can't, this will look at alternative codes for the events
- * and see if any combination of alternative codes is feasible.
- * The feasible set is returned in event[].
- */
-static int power_check_constraints(struct cpu_hw_counters *cpuhw,
-				   u64 event[], unsigned int cflags[],
-				   int n_ev)
-{
-	unsigned long mask, value, nv;
-	unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
-	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
-	int i, j;
-	unsigned long addf = ppmu->add_fields;
-	unsigned long tadd = ppmu->test_adder;
-
-	if (n_ev > ppmu->n_counter)
-		return -1;
-
-	/* First see if the events will go on as-is */
-	for (i = 0; i < n_ev; ++i) {
-		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
-		    && !ppmu->limited_pmc_event(event[i])) {
-			ppmu->get_alternatives(event[i], cflags[i],
-					       cpuhw->alternatives[i]);
-			event[i] = cpuhw->alternatives[i][0];
-		}
-		if (ppmu->get_constraint(event[i], &cpuhw->amasks[i][0],
-					 &cpuhw->avalues[i][0]))
-			return -1;
-	}
-	value = mask = 0;
-	for (i = 0; i < n_ev; ++i) {
-		nv = (value | cpuhw->avalues[i][0]) +
-			(value & cpuhw->avalues[i][0] & addf);
-		if ((((nv + tadd) ^ value) & mask) != 0 ||
-		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
-		     cpuhw->amasks[i][0]) != 0)
-			break;
-		value = nv;
-		mask |= cpuhw->amasks[i][0];
-	}
-	if (i == n_ev)
-		return 0;	/* all OK */
-
-	/* doesn't work, gather alternatives... */
-	if (!ppmu->get_alternatives)
-		return -1;
-	for (i = 0; i < n_ev; ++i) {
-		choice[i] = 0;
-		n_alt[i] = ppmu->get_alternatives(event[i], cflags[i],
-						  cpuhw->alternatives[i]);
-		for (j = 1; j < n_alt[i]; ++j)
-			ppmu->get_constraint(cpuhw->alternatives[i][j],
-					     &cpuhw->amasks[i][j],
-					     &cpuhw->avalues[i][j]);
-	}
-
-	/* enumerate all possibilities and see if any will work */
-	i = 0;
-	j = -1;
-	value = mask = nv = 0;
-	while (i < n_ev) {
-		if (j >= 0) {
-			/* we're backtracking, restore context */
-			value = svalues[i];
-			mask = smasks[i];
-			j = choice[i];
-		}
-		/*
-		 * See if any alternative k for event i,
-		 * where k > j, will satisfy the constraints.
-		 */
-		while (++j < n_alt[i]) {
-			nv = (value | cpuhw->avalues[i][j]) +
-				(value & cpuhw->avalues[i][j] & addf);
-			if ((((nv + tadd) ^ value) & mask) == 0 &&
-			    (((nv + tadd) ^ cpuhw->avalues[i][j])
-			     & cpuhw->amasks[i][j]) == 0)
-				break;
-		}
-		if (j >= n_alt[i]) {
-			/*
-			 * No feasible alternative, backtrack
-			 * to event i-1 and continue enumerating its
-			 * alternatives from where we got up to.
-			 */
-			if (--i < 0)
-				return -1;
-		} else {
-			/*
-			 * Found a feasible alternative for event i,
-			 * remember where we got up to with this event,
-			 * go on to the next event, and start with
-			 * the first alternative for it.
-			 */
-			choice[i] = j;
-			svalues[i] = value;
-			smasks[i] = mask;
-			value = nv;
-			mask |= cpuhw->amasks[i][j];
-			++i;
-			j = -1;
-		}
-	}
-
-	/* OK, we have a feasible combination, tell the caller the solution */
-	for (i = 0; i < n_ev; ++i)
-		event[i] = cpuhw->alternatives[i][choice[i]];
-	return 0;
-}
-
-/*
- * Check if newly-added counters have consistent settings for
- * exclude_{user,kernel,hv} with each other and any previously
- * added counters.
- */
-static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[],
-			  int n_prev, int n_new)
-{
-	int eu = 0, ek = 0, eh = 0;
-	int i, n, first;
-	struct perf_counter *counter;
-
-	n = n_prev + n_new;
-	if (n <= 1)
-		return 0;
-
-	first = 1;
-	for (i = 0; i < n; ++i) {
-		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
-			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
-			continue;
-		}
-		counter = ctrs[i];
-		if (first) {
-			eu = counter->attr.exclude_user;
-			ek = counter->attr.exclude_kernel;
-			eh = counter->attr.exclude_hv;
-			first = 0;
-		} else if (counter->attr.exclude_user != eu ||
-			   counter->attr.exclude_kernel != ek ||
-			   counter->attr.exclude_hv != eh) {
-			return -EAGAIN;
-		}
-	}
-
-	if (eu || ek || eh)
-		for (i = 0; i < n; ++i)
-			if (cflags[i] & PPMU_LIMITED_PMC_OK)
-				cflags[i] |= PPMU_LIMITED_PMC_REQD;
-
-	return 0;
-}
-
-static void power_pmu_read(struct perf_counter *counter)
-{
-	s64 val, delta, prev;
-
-	if (!counter->hw.idx)
-		return;
-	/*
-	 * Performance monitor interrupts come even when interrupts
-	 * are soft-disabled, as long as interrupts are hard-enabled.
-	 * Therefore we treat them like NMIs.
-	 */
-	do {
-		prev = atomic64_read(&counter->hw.prev_count);
-		barrier();
-		val = read_pmc(counter->hw.idx);
-	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
-
-	/* The counters are only 32 bits wide */
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &counter->hw.period_left);
-}
-
-/*
- * On some machines, PMC5 and PMC6 can't be written, don't respect
- * the freeze conditions, and don't generate interrupts.  This tells
- * us if `counter' is using such a PMC.
- */
-static int is_limited_pmc(int pmcnum)
-{
-	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
-		&& (pmcnum == 5 || pmcnum == 6);
-}
-
-static void freeze_limited_counters(struct cpu_hw_counters *cpuhw,
-				    unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val, prev, delta;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		if (!counter->hw.idx)
-			continue;
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		prev = atomic64_read(&counter->hw.prev_count);
-		counter->hw.idx = 0;
-		delta = (val - prev) & 0xfffffffful;
-		atomic64_add(delta, &counter->count);
-	}
-}
-
-static void thaw_limited_counters(struct cpu_hw_counters *cpuhw,
-				  unsigned long pmc5, unsigned long pmc6)
-{
-	struct perf_counter *counter;
-	u64 val;
-	int i;
-
-	for (i = 0; i < cpuhw->n_limited; ++i) {
-		counter = cpuhw->limited_counter[i];
-		counter->hw.idx = cpuhw->limited_hwidx[i];
-		val = (counter->hw.idx == 5) ? pmc5 : pmc6;
-		atomic64_set(&counter->hw.prev_count, val);
-		perf_counter_update_userpage(counter);
-	}
-}
-
-/*
- * Since limited counters don't respect the freeze conditions, we
- * have to read them immediately after freezing or unfreezing the
- * other counters.  We try to keep the values from the limited
- * counters as consistent as possible by keeping the delay (in
- * cycles and instructions) between freezing/unfreezing and reading
- * the limited counters as small and consistent as possible.
- * Therefore, if any limited counters are in use, we read them
- * both, and always in the same order, to minimize variability,
- * and do it inside the same asm that writes MMCR0.
- */
-static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0)
-{
-	unsigned long pmc5, pmc6;
-
-	if (!cpuhw->n_limited) {
-		mtspr(SPRN_MMCR0, mmcr0);
-		return;
-	}
-
-	/*
-	 * Write MMCR0, then read PMC5 and PMC6 immediately.
-	 * To ensure we don't get a performance monitor interrupt
-	 * between writing MMCR0 and freezing/thawing the limited
-	 * counters, we first write MMCR0 with the counter overflow
-	 * interrupt enable bits turned off.
-	 */
-	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
-		     : "=&r" (pmc5), "=&r" (pmc6)
-		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
-		       "i" (SPRN_MMCR0),
-		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
-
-	if (mmcr0 & MMCR0_FC)
-		freeze_limited_counters(cpuhw, pmc5, pmc6);
-	else
-		thaw_limited_counters(cpuhw, pmc5, pmc6);
-
-	/*
-	 * Write the full MMCR0 including the counter overflow interrupt
-	 * enable bits, if necessary.
-	 */
-	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
-		mtspr(SPRN_MMCR0, mmcr0);
-}
-
-/*
- * Disable all counters to prevent PMU interrupts and to allow
- * counters to be added or removed.
- */
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuhw->disabled) {
-		cpuhw->disabled = 1;
-		cpuhw->n_added = 0;
-
-		/*
-		 * Check if we ever enabled the PMU on this cpu.
-		 */
-		if (!cpuhw->pmcs_enabled) {
-			ppc_enable_pmcs();
-			cpuhw->pmcs_enabled = 1;
-		}
-
-		/*
-		 * Disable instruction sampling if it was enabled
-		 */
-		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-			mtspr(SPRN_MMCRA,
-			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-			mb();
-		}
-
-		/*
-		 * Set the 'freeze counters' bit.
-		 * The barrier is to make sure the mtspr has been
-		 * executed and the PMU has frozen the counters
-		 * before we return.
-		 */
-		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
-		mb();
-	}
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable all counters if disable == 0.
- * If we were previously disabled and counters were added, then
- * put the new config on the PMU.
- */
-void hw_perf_enable(void)
-{
-	struct perf_counter *counter;
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	long i;
-	unsigned long val;
-	s64 left;
-	unsigned int hwc_index[MAX_HWCOUNTERS];
-	int n_lim;
-	int idx;
-
-	if (!ppmu)
-		return;
-	local_irq_save(flags);
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	if (!cpuhw->disabled) {
-		local_irq_restore(flags);
-		return;
-	}
-	cpuhw->disabled = 0;
-
-	/*
-	 * If we didn't change anything, or only removed counters,
-	 * no need to recalculate MMCR* settings and reset the PMCs.
-	 * Just reenable the PMU with the current MMCR* settings
-	 * (possibly updated for removal of counters).
-	 */
-	if (!cpuhw->n_added) {
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-		if (cpuhw->n_counters == 0)
-			ppc_set_pmu_inuse(0);
-		goto out_enable;
-	}
-
-	/*
-	 * Compute MMCR* values for the new set of counters
-	 */
-	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
-			       cpuhw->mmcr)) {
-		/* shouldn't ever get here */
-		printk(KERN_ERR "oops compute_mmcr failed\n");
-		goto out;
-	}
-
-	/*
-	 * Add in MMCR0 freeze bits corresponding to the
-	 * attr.exclude_* bits for the first counter.
-	 * We have already checked that all counters have the
-	 * same values for these bits as the first counter.
-	 */
-	counter = cpuhw->counter[0];
-	if (counter->attr.exclude_user)
-		cpuhw->mmcr[0] |= MMCR0_FCP;
-	if (counter->attr.exclude_kernel)
-		cpuhw->mmcr[0] |= freeze_counters_kernel;
-	if (counter->attr.exclude_hv)
-		cpuhw->mmcr[0] |= MMCR0_FCHV;
-
-	/*
-	 * Write the new configuration to MMCR* with the freeze
-	 * bit set and set the hardware counters to their initial values.
-	 * Then unfreeze the counters.
-	 */
-	ppc_set_pmu_inuse(1);
-	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
-				| MMCR0_FC);
-
-	/*
-	 * Read off any pre-existing counters that need to move
-	 * to another PMC.
-	 */
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
-			power_pmu_read(counter);
-			write_pmc(counter->hw.idx, 0);
-			counter->hw.idx = 0;
-		}
-	}
-
-	/*
-	 * Initialize the PMCs for all the new and moved counters.
-	 */
-	cpuhw->n_limited = n_lim = 0;
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (counter->hw.idx)
-			continue;
-		idx = hwc_index[i] + 1;
-		if (is_limited_pmc(idx)) {
-			cpuhw->limited_counter[n_lim] = counter;
-			cpuhw->limited_hwidx[n_lim] = idx;
-			++n_lim;
-			continue;
-		}
-		val = 0;
-		if (counter->hw.sample_period) {
-			left = atomic64_read(&counter->hw.period_left);
-			if (left < 0x80000000L)
-				val = 0x80000000L - left;
-		}
-		atomic64_set(&counter->hw.prev_count, val);
-		counter->hw.idx = idx;
-		write_pmc(idx, val);
-		perf_counter_update_userpage(counter);
-	}
-	cpuhw->n_limited = n_lim;
-	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
-
- out_enable:
-	mb();
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	/*
-	 * Enable instruction sampling if necessary
-	 */
-	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-		mb();
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
-	}
-
- out:
-	local_irq_restore(flags);
-}
-
-static int collect_events(struct perf_counter *group, int max_count,
-			  struct perf_counter *ctrs[], u64 *events,
-			  unsigned int *flags)
-{
-	int n = 0;
-	struct perf_counter *counter;
-
-	if (!is_software_counter(group)) {
-		if (n >= max_count)
-			return -1;
-		ctrs[n] = group;
-		flags[n] = group->hw.counter_base;
-		events[n++] = group->hw.config;
-	}
-	list_for_each_entry(counter, &group->sibling_list, list_entry) {
-		if (!is_software_counter(counter) &&
-		    counter->state != PERF_COUNTER_STATE_OFF) {
-			if (n >= max_count)
-				return -1;
-			ctrs[n] = counter;
-			flags[n] = counter->hw.counter_base;
-			events[n++] = counter->hw.config;
-		}
-	}
-	return n;
-}
-
-static void counter_sched_in(struct perf_counter *counter, int cpu)
-{
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;
-	counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped;
-	if (is_software_counter(counter))
-		counter->pmu->enable(counter);
-}
-
-/*
- * Called to enable a whole group of counters.
- * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
- * Assumes the caller has disabled interrupts and has
- * frozen the PMU with hw_perf_save_disable.
- */
-int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i, n, n0;
-	struct perf_counter *sub;
-
-	if (!ppmu)
-		return 0;
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	n = collect_events(group_leader, ppmu->n_counter - n0,
-			   &cpuhw->counter[n0], &cpuhw->events[n0],
-			   &cpuhw->flags[n0]);
-	if (n < 0)
-		return -EAGAIN;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n))
-		return -EAGAIN;
-	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
-	if (i < 0)
-		return -EAGAIN;
-	cpuhw->n_counters = n0 + n;
-	cpuhw->n_added += n;
-
-	/*
-	 * OK, this group can go on; update counter states etc.,
-	 * and enable any software counters
-	 */
-	for (i = n0; i < n0 + n; ++i)
-		cpuhw->counter[i]->hw.config = cpuhw->events[i];
-	cpuctx->active_oncpu += n;
-	n = 1;
-	counter_sched_in(group_leader, cpu);
-	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
-		if (sub->state != PERF_COUNTER_STATE_OFF) {
-			counter_sched_in(sub, cpu);
-			++n;
-		}
-	}
-	ctx->nr_active += n;
-
-	return 1;
-}
-
-/*
- * Add a counter to the PMU.
- * If all counters are not already frozen, then we disable and
- * re-enable the PMU in order to get hw_perf_enable to do the
- * actual work of reconfiguring the PMU.
- */
-static int power_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	unsigned long flags;
-	int n0;
-	int ret = -EAGAIN;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	/*
-	 * Add the counter to the list (if there is room)
-	 * and check whether the total set is still feasible.
-	 */
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	n0 = cpuhw->n_counters;
-	if (n0 >= ppmu->n_counter)
-		goto out;
-	cpuhw->counter[n0] = counter;
-	cpuhw->events[n0] = counter->hw.config;
-	cpuhw->flags[n0] = counter->hw.counter_base;
-	if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1))
-		goto out;
-	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
-		goto out;
-
-	counter->hw.config = cpuhw->events[n0];
-	++cpuhw->n_counters;
-	++cpuhw->n_added;
-
-	ret = 0;
- out:
-	perf_enable();
-	local_irq_restore(flags);
-	return ret;
-}
-
-/*
- * Remove a counter from the PMU.
- */
-static void power_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuhw;
-	long i;
-	unsigned long flags;
-
-	local_irq_save(flags);
-	perf_disable();
-
-	power_pmu_read(counter);
-
-	cpuhw = &__get_cpu_var(cpu_hw_counters);
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		if (counter == cpuhw->counter[i]) {
-			while (++i < cpuhw->n_counters)
-				cpuhw->counter[i-1] = cpuhw->counter[i];
-			--cpuhw->n_counters;
-			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
-			if (counter->hw.idx) {
-				write_pmc(counter->hw.idx, 0);
-				counter->hw.idx = 0;
-			}
-			perf_counter_update_userpage(counter);
-			break;
-		}
-	}
-	for (i = 0; i < cpuhw->n_limited; ++i)
-		if (counter == cpuhw->limited_counter[i])
-			break;
-	if (i < cpuhw->n_limited) {
-		while (++i < cpuhw->n_limited) {
-			cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i];
-			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
-		}
-		--cpuhw->n_limited;
-	}
-	if (cpuhw->n_counters == 0) {
-		/* disable exceptions if no counters are running */
-		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
-	}
-
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-/*
- * Re-enable interrupts on a counter after they were throttled
- * because they were coming too fast.
- */
-static void power_pmu_unthrottle(struct perf_counter *counter)
-{
-	s64 val, left;
-	unsigned long flags;
-
-	if (!counter->hw.idx || !counter->hw.sample_period)
-		return;
-	local_irq_save(flags);
-	perf_disable();
-	power_pmu_read(counter);
-	left = counter->hw.sample_period;
-	counter->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-	perf_enable();
-	local_irq_restore(flags);
-}
-
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-};
-
-/*
- * Return 1 if we might be able to put counter on a limited PMC,
- * or 0 if not.
- * A counter can only go on a limited PMC if it counts something
- * that a limited PMC can count, doesn't require interrupts, and
- * doesn't exclude any processor mode.
- */
-static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev,
-				 unsigned int flags)
-{
-	int n;
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-
-	if (counter->attr.exclude_user
-	    || counter->attr.exclude_kernel
-	    || counter->attr.exclude_hv
-	    || counter->attr.sample_period)
-		return 0;
-
-	if (ppmu->limited_pmc_event(ev))
-		return 1;
-
-	/*
-	 * The requested event isn't on a limited PMC already;
-	 * see if any alternative code goes on a limited PMC.
-	 */
-	if (!ppmu->get_alternatives)
-		return 0;
-
-	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
-	n = ppmu->get_alternatives(ev, flags, alt);
-
-	return n > 0;
-}
-
-/*
- * Find an alternative event that goes on a normal PMC, if possible,
- * and return the event code, or 0 if there is no such alternative.
- * (Note: event code 0 is "don't count" on all machines.)
- */
-static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
-{
-	u64 alt[MAX_EVENT_ALTERNATIVES];
-	int n;
-
-	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
-	n = ppmu->get_alternatives(ev, flags, alt);
-	if (!n)
-		return 0;
-	return alt[0];
-}
-
-/* Number of perf_counters counting hardware events */
-static atomic_t num_counters;
-/* Used to avoid races in calling reserve/release_pmc_hardware */
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-/*
- * Release the PMU if this is the last perf_counter.
- */
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (!atomic_add_unless(&num_counters, -1, 1)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_dec_return(&num_counters) == 0)
-			release_pmc_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-/*
- * Translate a generic cache event config to a raw event code.
- */
-static int hw_perf_cache_event(u64 config, u64 *eventp)
-{
-	unsigned long type, op, result;
-	int ev;
-
-	if (!ppmu->cache_events)
-		return -EINVAL;
-
-	/* unpack config */
-	type = config & 0xff;
-	op = (config >> 8) & 0xff;
-	result = (config >> 16) & 0xff;
-
-	if (type >= PERF_COUNT_HW_CACHE_MAX ||
-	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
-	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	ev = (*ppmu->cache_events)[type][op][result];
-	if (ev == 0)
-		return -EOPNOTSUPP;
-	if (ev == -1)
-		return -EINVAL;
-	*eventp = ev;
-	return 0;
-}
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	u64 ev;
-	unsigned long flags;
-	struct perf_counter *ctrs[MAX_HWCOUNTERS];
-	u64 events[MAX_HWCOUNTERS];
-	unsigned int cflags[MAX_HWCOUNTERS];
-	int n;
-	int err;
-	struct cpu_hw_counters *cpuhw;
-
-	if (!ppmu)
-		return ERR_PTR(-ENXIO);
-	switch (counter->attr.type) {
-	case PERF_TYPE_HARDWARE:
-		ev = counter->attr.config;
-		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
-		ev = ppmu->generic_events[ev];
-		break;
-	case PERF_TYPE_HW_CACHE:
-		err = hw_perf_cache_event(counter->attr.config, &ev);
-		if (err)
-			return ERR_PTR(err);
-		break;
-	case PERF_TYPE_RAW:
-		ev = counter->attr.config;
-		break;
-	default:
-		return ERR_PTR(-EINVAL);
-	}
-	counter->hw.config_base = ev;
-	counter->hw.idx = 0;
-
-	/*
-	 * If we are not running on a hypervisor, force the
-	 * exclude_hv bit to 0 so that we don't care what
-	 * the user set it to.
-	 */
-	if (!firmware_has_feature(FW_FEATURE_LPAR))
-		counter->attr.exclude_hv = 0;
-
-	/*
-	 * If this is a per-task counter, then we can use
-	 * PM_RUN_* events interchangeably with their non RUN_*
-	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
-	 * XXX we should check if the task is an idle task.
-	 */
-	flags = 0;
-	if (counter->ctx->task)
-		flags |= PPMU_ONLY_COUNT_RUN;
-
-	/*
-	 * If this machine has limited counters, check whether this
-	 * event could go on a limited counter.
-	 */
-	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
-		if (can_go_on_limited_pmc(counter, ev, flags)) {
-			flags |= PPMU_LIMITED_PMC_OK;
-		} else if (ppmu->limited_pmc_event(ev)) {
-			/*
-			 * The requested event is on a limited PMC,
-			 * but we can't use a limited PMC; see if any
-			 * alternative goes on a normal PMC.
-			 */
-			ev = normal_pmc_alternative(ev, flags);
-			if (!ev)
-				return ERR_PTR(-EINVAL);
-		}
-	}
-
-	/*
-	 * If this is in a group, check if it can go on with all the
-	 * other hardware counters in the group.  We assume the counter
-	 * hasn't been linked into its leader's sibling list at this point.
-	 */
-	n = 0;
-	if (counter->group_leader != counter) {
-		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
-				   ctrs, events, cflags);
-		if (n < 0)
-			return ERR_PTR(-EINVAL);
-	}
-	events[n] = ev;
-	ctrs[n] = counter;
-	cflags[n] = flags;
-	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
-
-	cpuhw = &get_cpu_var(cpu_hw_counters);
-	err = power_check_constraints(cpuhw, events, cflags, n + 1);
-	put_cpu_var(cpu_hw_counters);
-	if (err)
-		return ERR_PTR(-EINVAL);
-
-	counter->hw.config = events[n];
-	counter->hw.counter_base = cflags[n];
-	counter->hw.last_period = counter->hw.sample_period;
-	atomic64_set(&counter->hw.period_left, counter->hw.last_period);
-
-	/*
-	 * See if we need to reserve the PMU.
-	 * If no counters are currently in use, then we have to take a
-	 * mutex to ensure that we don't race with another task doing
-	 * reserve_pmc_hardware or release_pmc_hardware.
-	 */
-	err = 0;
-	if (!atomic_inc_not_zero(&num_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&num_counters) == 0 &&
-		    reserve_pmc_hardware(perf_counter_interrupt))
-			err = -EBUSY;
-		else
-			atomic_inc(&num_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	counter->destroy = hw_perf_counter_destroy;
-
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
-}
-
-/*
- * A counter has overflowed; update its count and record
- * things if requested.  Note that interrupts are hard-disabled
- * here so there is no possibility of being interrupted.
- */
-static void record_and_restart(struct perf_counter *counter, unsigned long val,
-			       struct pt_regs *regs, int nmi)
-{
-	u64 period = counter->hw.sample_period;
-	s64 prev, delta, left;
-	int record = 0;
-
-	/* we don't have to worry about interrupts here */
-	prev = atomic64_read(&counter->hw.prev_count);
-	delta = (val - prev) & 0xfffffffful;
-	atomic64_add(delta, &counter->count);
-
-	/*
-	 * See if the total period for this counter has expired,
-	 * and update for the next period.
-	 */
-	val = 0;
-	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (period) {
-		if (left <= 0) {
-			left += period;
-			if (left <= 0)
-				left = period;
-			record = 1;
-		}
-		if (left < 0x80000000LL)
-			val = 0x80000000LL - left;
-	}
-
-	/*
-	 * Finally record data if requested.
-	 */
-	if (record) {
-		struct perf_sample_data data = {
-			.addr	= 0,
-			.period	= counter->hw.last_period,
-		};
-
-		if (counter->attr.sample_type & PERF_SAMPLE_ADDR)
-			perf_get_data_addr(regs, &data.addr);
-
-		if (perf_counter_overflow(counter, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the counter to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each counter counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
-	}
-
-	write_pmc(counter->hw.idx, val);
-	atomic64_set(&counter->hw.prev_count, val);
-	atomic64_set(&counter->hw.period_left, left);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Called from generic code to get the misc flags (i.e. processor mode)
- * for an event.
- */
-unsigned long perf_misc_flags(struct pt_regs *regs)
-{
-	u32 flags = perf_get_misc_flags(regs);
-
-	if (flags)
-		return flags;
-	return user_mode(regs) ? PERF_EVENT_MISC_USER :
-		PERF_EVENT_MISC_KERNEL;
-}
-
-/*
- * Called from generic code to get the instruction pointer
- * for an event.
- */
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
-{
-	unsigned long ip;
-
-	if (TRAP(regs) != 0xf00)
-		return regs->nip;	/* not a PMU interrupt */
-
-	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
-	return ip;
-}
-
-/*
- * Performance monitor interrupt stuff
- */
-static void perf_counter_interrupt(struct pt_regs *regs)
-{
-	int i;
-	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
-	struct perf_counter *counter;
-	unsigned long val;
-	int found = 0;
-	int nmi;
-
-	if (cpuhw->n_limited)
-		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
-					mfspr(SPRN_PMC6));
-
-	perf_read_regs(regs);
-
-	nmi = perf_intr_is_nmi(regs);
-	if (nmi)
-		nmi_enter();
-	else
-		irq_enter();
-
-	for (i = 0; i < cpuhw->n_counters; ++i) {
-		counter = cpuhw->counter[i];
-		if (!counter->hw.idx || is_limited_pmc(counter->hw.idx))
-			continue;
-		val = read_pmc(counter->hw.idx);
-		if ((int)val < 0) {
-			/* counter has overflowed */
-			found = 1;
-			record_and_restart(counter, val, regs, nmi);
-		}
-	}
-
-	/*
-	 * In case we didn't find and reset the counter that caused
-	 * the interrupt, scan all counters and reset any that are
-	 * negative, to avoid getting continual interrupts.
-	 * Any that we processed in the previous loop will not be negative.
-	 */
-	if (!found) {
-		for (i = 0; i < ppmu->n_counter; ++i) {
-			if (is_limited_pmc(i + 1))
-				continue;
-			val = read_pmc(i + 1);
-			if ((int)val < 0)
-				write_pmc(i + 1, 0);
-		}
-	}
-
-	/*
-	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
-	 * and thus allow interrupts to occur again.
-	 * XXX might want to use MSR.PM to keep the counters frozen until
-	 * we get back out of this interrupt.
-	 */
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
-
-	if (nmi)
-		nmi_exit();
-	else
-		irq_exit();
-}
-
-void hw_perf_counter_setup(int cpu)
-{
-	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
-
-	if (!ppmu)
-		return;
-	memset(cpuhw, 0, sizeof(*cpuhw));
-	cpuhw->mmcr[0] = MMCR0_FC;
-}
-
-int register_power_pmu(struct power_pmu *pmu)
-{
-	if (ppmu)
-		return -EBUSY;		/* something's already registered */
-
-	ppmu = pmu;
-	pr_info("%s performance monitor hardware support registered\n",
-		pmu->name);
-
-#ifdef MSR_HV
-	/*
-	 * Use FCHV to ignore kernel events if MSR.HV is set.
-	 */
-	if (mfmsr() & MSR_HV)
-		freeze_counters_kernel = MMCR0_FCHV;
-#endif /* CONFIG_PPC64 */
-
-	return 0;
-}
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
new file mode 100644
index 000000000000..c98321fcb459
--- /dev/null
+++ b/arch/powerpc/kernel/perf_event.c
@@ -0,0 +1,1315 @@
+/*
+ * Performance event support - powerpc architecture code
+ *
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/reg.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+
+struct cpu_hw_events {
+	int n_events;
+	int n_percpu;
+	int disabled;
+	int n_added;
+	int n_limited;
+	u8  pmcs_enabled;
+	struct perf_event *event[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int flags[MAX_HWEVENTS];
+	unsigned long mmcr[3];
+	struct perf_event *limited_event[MAX_LIMITED_HWEVENTS];
+	u8  limited_hwidx[MAX_LIMITED_HWEVENTS];
+	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+	unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+
+struct power_pmu *ppmu;
+
+/*
+ * Normally, to ignore kernel events we set the FCS (freeze events
+ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
+ * hypervisor bit set in the MSR, or if we are running on a processor
+ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
+ * then we need to use the FCHV bit to ignore kernel events.
+ */
+static unsigned int freeze_events_kernel = MMCR0_FCS;
+
+/*
+ * 32-bit doesn't have MMCRA but does have an MMCR2,
+ * and a few other names are different.
+ */
+#ifdef CONFIG_PPC32
+
+#define MMCR0_FCHV		0
+#define MMCR0_PMCjCE		MMCR0_PMCnCE
+
+#define SPRN_MMCRA		SPRN_MMCR2
+#define MMCRA_SAMPLE_ENABLE	0
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	return 0;
+}
+static inline void perf_read_regs(struct pt_regs *regs) { }
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC32 */
+
+/*
+ * Things that are specific to 64-bit implementations.
+ */
+#ifdef CONFIG_PPC64
+
+static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) {
+		unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT;
+		if (slot > 1)
+			return 4 * (slot - 1);
+	}
+	return 0;
+}
+
+/*
+ * The user wants a data address recorded.
+ * If we're not doing instruction sampling, give them the SDAR
+ * (sampled data address).  If we are doing instruction sampling, then
+ * only give them the SDAR if it corresponds to the instruction
+ * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC
+ * bit in MMCRA.
+ */
+static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+{
+	unsigned long mmcra = regs->dsisr;
+	unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ?
+		POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC;
+
+	if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync))
+		*addrp = mfspr(SPRN_SDAR);
+}
+
+static inline u32 perf_get_misc_flags(struct pt_regs *regs)
+{
+	unsigned long mmcra = regs->dsisr;
+
+	if (TRAP(regs) != 0xf00)
+		return 0;	/* not a PMU interrupt */
+
+	if (ppmu->flags & PPMU_ALT_SIPR) {
+		if (mmcra & POWER6_MMCRA_SIHV)
+			return PERF_RECORD_MISC_HYPERVISOR;
+		return (mmcra & POWER6_MMCRA_SIPR) ?
+			PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL;
+	}
+	if (mmcra & MMCRA_SIHV)
+		return PERF_RECORD_MISC_HYPERVISOR;
+	return (mmcra & MMCRA_SIPR) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Overload regs->dsisr to store MMCRA so we only need to read it once
+ * on each interrupt.
+ */
+static inline void perf_read_regs(struct pt_regs *regs)
+{
+	regs->dsisr = mfspr(SPRN_MMCRA);
+}
+
+/*
+ * If interrupts were soft-disabled when a PMU interrupt occurs, treat
+ * it as an NMI.
+ */
+static inline int perf_intr_is_nmi(struct pt_regs *regs)
+{
+	return !regs->softe;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static void perf_event_interrupt(struct pt_regs *regs);
+
+void perf_event_print_debug(void)
+{
+}
+
+/*
+ * Read one performance monitor event (PMC).
+ */
+static unsigned long read_pmc(int idx)
+{
+	unsigned long val;
+
+	switch (idx) {
+	case 1:
+		val = mfspr(SPRN_PMC1);
+		break;
+	case 2:
+		val = mfspr(SPRN_PMC2);
+		break;
+	case 3:
+		val = mfspr(SPRN_PMC3);
+		break;
+	case 4:
+		val = mfspr(SPRN_PMC4);
+		break;
+	case 5:
+		val = mfspr(SPRN_PMC5);
+		break;
+	case 6:
+		val = mfspr(SPRN_PMC6);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		val = mfspr(SPRN_PMC7);
+		break;
+	case 8:
+		val = mfspr(SPRN_PMC8);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
+		val = 0;
+	}
+	return val;
+}
+
+/*
+ * Write one PMC.
+ */
+static void write_pmc(int idx, unsigned long val)
+{
+	switch (idx) {
+	case 1:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC6, val);
+		break;
+#ifdef CONFIG_PPC64
+	case 7:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 8:
+		mtspr(SPRN_PMC8, val);
+		break;
+#endif /* CONFIG_PPC64 */
+	default:
+		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
+	}
+}
+
+/*
+ * Check if a set of events can all go on the PMU at once.
+ * If they can't, this will look at alternative codes for the events
+ * and see if any combination of alternative codes is feasible.
+ * The feasible set is returned in event_id[].
+ */
+static int power_check_constraints(struct cpu_hw_events *cpuhw,
+				   u64 event_id[], unsigned int cflags[],
+				   int n_ev)
+{
+	unsigned long mask, value, nv;
+	unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
+	int n_alt[MAX_HWEVENTS], choice[MAX_HWEVENTS];
+	int i, j;
+	unsigned long addf = ppmu->add_fields;
+	unsigned long tadd = ppmu->test_adder;
+
+	if (n_ev > ppmu->n_event)
+		return -1;
+
+	/* First see if the events will go on as-is */
+	for (i = 0; i < n_ev; ++i) {
+		if ((cflags[i] & PPMU_LIMITED_PMC_REQD)
+		    && !ppmu->limited_pmc_event(event_id[i])) {
+			ppmu->get_alternatives(event_id[i], cflags[i],
+					       cpuhw->alternatives[i]);
+			event_id[i] = cpuhw->alternatives[i][0];
+		}
+		if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
+					 &cpuhw->avalues[i][0]))
+			return -1;
+	}
+	value = mask = 0;
+	for (i = 0; i < n_ev; ++i) {
+		nv = (value | cpuhw->avalues[i][0]) +
+			(value & cpuhw->avalues[i][0] & addf);
+		if ((((nv + tadd) ^ value) & mask) != 0 ||
+		    (((nv + tadd) ^ cpuhw->avalues[i][0]) &
+		     cpuhw->amasks[i][0]) != 0)
+			break;
+		value = nv;
+		mask |= cpuhw->amasks[i][0];
+	}
+	if (i == n_ev)
+		return 0;	/* all OK */
+
+	/* doesn't work, gather alternatives... */
+	if (!ppmu->get_alternatives)
+		return -1;
+	for (i = 0; i < n_ev; ++i) {
+		choice[i] = 0;
+		n_alt[i] = ppmu->get_alternatives(event_id[i], cflags[i],
+						  cpuhw->alternatives[i]);
+		for (j = 1; j < n_alt[i]; ++j)
+			ppmu->get_constraint(cpuhw->alternatives[i][j],
+					     &cpuhw->amasks[i][j],
+					     &cpuhw->avalues[i][j]);
+	}
+
+	/* enumerate all possibilities and see if any will work */
+	i = 0;
+	j = -1;
+	value = mask = nv = 0;
+	while (i < n_ev) {
+		if (j >= 0) {
+			/* we're backtracking, restore context */
+			value = svalues[i];
+			mask = smasks[i];
+			j = choice[i];
+		}
+		/*
+		 * See if any alternative k for event_id i,
+		 * where k > j, will satisfy the constraints.
+		 */
+		while (++j < n_alt[i]) {
+			nv = (value | cpuhw->avalues[i][j]) +
+				(value & cpuhw->avalues[i][j] & addf);
+			if ((((nv + tadd) ^ value) & mask) == 0 &&
+			    (((nv + tadd) ^ cpuhw->avalues[i][j])
+			     & cpuhw->amasks[i][j]) == 0)
+				break;
+		}
+		if (j >= n_alt[i]) {
+			/*
+			 * No feasible alternative, backtrack
+			 * to event_id i-1 and continue enumerating its
+			 * alternatives from where we got up to.
+			 */
+			if (--i < 0)
+				return -1;
+		} else {
+			/*
+			 * Found a feasible alternative for event_id i,
+			 * remember where we got up to with this event_id,
+			 * go on to the next event_id, and start with
+			 * the first alternative for it.
+			 */
+			choice[i] = j;
+			svalues[i] = value;
+			smasks[i] = mask;
+			value = nv;
+			mask |= cpuhw->amasks[i][j];
+			++i;
+			j = -1;
+		}
+	}
+
+	/* OK, we have a feasible combination, tell the caller the solution */
+	for (i = 0; i < n_ev; ++i)
+		event_id[i] = cpuhw->alternatives[i][choice[i]];
+	return 0;
+}
+
+/*
+ * Check if newly-added events have consistent settings for
+ * exclude_{user,kernel,hv} with each other and any previously
+ * added events.
+ */
+static int check_excludes(struct perf_event **ctrs, unsigned int cflags[],
+			  int n_prev, int n_new)
+{
+	int eu = 0, ek = 0, eh = 0;
+	int i, n, first;
+	struct perf_event *event;
+
+	n = n_prev + n_new;
+	if (n <= 1)
+		return 0;
+
+	first = 1;
+	for (i = 0; i < n; ++i) {
+		if (cflags[i] & PPMU_LIMITED_PMC_OK) {
+			cflags[i] &= ~PPMU_LIMITED_PMC_REQD;
+			continue;
+		}
+		event = ctrs[i];
+		if (first) {
+			eu = event->attr.exclude_user;
+			ek = event->attr.exclude_kernel;
+			eh = event->attr.exclude_hv;
+			first = 0;
+		} else if (event->attr.exclude_user != eu ||
+			   event->attr.exclude_kernel != ek ||
+			   event->attr.exclude_hv != eh) {
+			return -EAGAIN;
+		}
+	}
+
+	if (eu || ek || eh)
+		for (i = 0; i < n; ++i)
+			if (cflags[i] & PPMU_LIMITED_PMC_OK)
+				cflags[i] |= PPMU_LIMITED_PMC_REQD;
+
+	return 0;
+}
+
+static void power_pmu_read(struct perf_event *event)
+{
+	s64 val, delta, prev;
+
+	if (!event->hw.idx)
+		return;
+	/*
+	 * Performance monitor interrupts come even when interrupts
+	 * are soft-disabled, as long as interrupts are hard-enabled.
+	 * Therefore we treat them like NMIs.
+	 */
+	do {
+		prev = atomic64_read(&event->hw.prev_count);
+		barrier();
+		val = read_pmc(event->hw.idx);
+	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+	/* The events are only 32 bits wide */
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &event->hw.period_left);
+}
+
+/*
+ * On some machines, PMC5 and PMC6 can't be written, don't respect
+ * the freeze conditions, and don't generate interrupts.  This tells
+ * us if `event' is using such a PMC.
+ */
+static int is_limited_pmc(int pmcnum)
+{
+	return (ppmu->flags & PPMU_LIMITED_PMC5_6)
+		&& (pmcnum == 5 || pmcnum == 6);
+}
+
+static void freeze_limited_events(struct cpu_hw_events *cpuhw,
+				    unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val, prev, delta;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		if (!event->hw.idx)
+			continue;
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		prev = atomic64_read(&event->hw.prev_count);
+		event->hw.idx = 0;
+		delta = (val - prev) & 0xfffffffful;
+		atomic64_add(delta, &event->count);
+	}
+}
+
+static void thaw_limited_events(struct cpu_hw_events *cpuhw,
+				  unsigned long pmc5, unsigned long pmc6)
+{
+	struct perf_event *event;
+	u64 val;
+	int i;
+
+	for (i = 0; i < cpuhw->n_limited; ++i) {
+		event = cpuhw->limited_event[i];
+		event->hw.idx = cpuhw->limited_hwidx[i];
+		val = (event->hw.idx == 5) ? pmc5 : pmc6;
+		atomic64_set(&event->hw.prev_count, val);
+		perf_event_update_userpage(event);
+	}
+}
+
+/*
+ * Since limited events don't respect the freeze conditions, we
+ * have to read them immediately after freezing or unfreezing the
+ * other events.  We try to keep the values from the limited
+ * events as consistent as possible by keeping the delay (in
+ * cycles and instructions) between freezing/unfreezing and reading
+ * the limited events as small and consistent as possible.
+ * Therefore, if any limited events are in use, we read them
+ * both, and always in the same order, to minimize variability,
+ * and do it inside the same asm that writes MMCR0.
+ */
+static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
+{
+	unsigned long pmc5, pmc6;
+
+	if (!cpuhw->n_limited) {
+		mtspr(SPRN_MMCR0, mmcr0);
+		return;
+	}
+
+	/*
+	 * Write MMCR0, then read PMC5 and PMC6 immediately.
+	 * To ensure we don't get a performance monitor interrupt
+	 * between writing MMCR0 and freezing/thawing the limited
+	 * events, we first write MMCR0 with the event overflow
+	 * interrupt enable bits turned off.
+	 */
+	asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5"
+		     : "=&r" (pmc5), "=&r" (pmc6)
+		     : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)),
+		       "i" (SPRN_MMCR0),
+		       "i" (SPRN_PMC5), "i" (SPRN_PMC6));
+
+	if (mmcr0 & MMCR0_FC)
+		freeze_limited_events(cpuhw, pmc5, pmc6);
+	else
+		thaw_limited_events(cpuhw, pmc5, pmc6);
+
+	/*
+	 * Write the full MMCR0 including the event overflow interrupt
+	 * enable bits, if necessary.
+	 */
+	if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE))
+		mtspr(SPRN_MMCR0, mmcr0);
+}
+
+/*
+ * Disable all events to prevent PMU interrupts and to allow
+ * events to be added or removed.
+ */
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuhw->disabled) {
+		cpuhw->disabled = 1;
+		cpuhw->n_added = 0;
+
+		/*
+		 * Check if we ever enabled the PMU on this cpu.
+		 */
+		if (!cpuhw->pmcs_enabled) {
+			ppc_enable_pmcs();
+			cpuhw->pmcs_enabled = 1;
+		}
+
+		/*
+		 * Disable instruction sampling if it was enabled
+		 */
+		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+			mtspr(SPRN_MMCRA,
+			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+			mb();
+		}
+
+		/*
+		 * Set the 'freeze events' bit.
+		 * The barrier is to make sure the mtspr has been
+		 * executed and the PMU has frozen the events
+		 * before we return.
+		 */
+		write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC);
+		mb();
+	}
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable all events if disable == 0.
+ * If we were previously disabled and events were added, then
+ * put the new config on the PMU.
+ */
+void hw_perf_enable(void)
+{
+	struct perf_event *event;
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	long i;
+	unsigned long val;
+	s64 left;
+	unsigned int hwc_index[MAX_HWEVENTS];
+	int n_lim;
+	int idx;
+
+	if (!ppmu)
+		return;
+	local_irq_save(flags);
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	if (!cpuhw->disabled) {
+		local_irq_restore(flags);
+		return;
+	}
+	cpuhw->disabled = 0;
+
+	/*
+	 * If we didn't change anything, or only removed events,
+	 * no need to recalculate MMCR* settings and reset the PMCs.
+	 * Just reenable the PMU with the current MMCR* settings
+	 * (possibly updated for removal of events).
+	 */
+	if (!cpuhw->n_added) {
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		if (cpuhw->n_events == 0)
+			ppc_set_pmu_inuse(0);
+		goto out_enable;
+	}
+
+	/*
+	 * Compute MMCR* values for the new set of events
+	 */
+	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
+			       cpuhw->mmcr)) {
+		/* shouldn't ever get here */
+		printk(KERN_ERR "oops compute_mmcr failed\n");
+		goto out;
+	}
+
+	/*
+	 * Add in MMCR0 freeze bits corresponding to the
+	 * attr.exclude_* bits for the first event.
+	 * We have already checked that all events have the
+	 * same values for these bits as the first event.
+	 */
+	event = cpuhw->event[0];
+	if (event->attr.exclude_user)
+		cpuhw->mmcr[0] |= MMCR0_FCP;
+	if (event->attr.exclude_kernel)
+		cpuhw->mmcr[0] |= freeze_events_kernel;
+	if (event->attr.exclude_hv)
+		cpuhw->mmcr[0] |= MMCR0_FCHV;
+
+	/*
+	 * Write the new configuration to MMCR* with the freeze
+	 * bit set and set the hardware events to their initial values.
+	 * Then unfreeze the events.
+	 */
+	ppc_set_pmu_inuse(1);
+	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+				| MMCR0_FC);
+
+	/*
+	 * Read off any pre-existing events that need to move
+	 * to another PMC.
+	 */
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx && event->hw.idx != hwc_index[i] + 1) {
+			power_pmu_read(event);
+			write_pmc(event->hw.idx, 0);
+			event->hw.idx = 0;
+		}
+	}
+
+	/*
+	 * Initialize the PMCs for all the new and moved events.
+	 */
+	cpuhw->n_limited = n_lim = 0;
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (event->hw.idx)
+			continue;
+		idx = hwc_index[i] + 1;
+		if (is_limited_pmc(idx)) {
+			cpuhw->limited_event[n_lim] = event;
+			cpuhw->limited_hwidx[n_lim] = idx;
+			++n_lim;
+			continue;
+		}
+		val = 0;
+		if (event->hw.sample_period) {
+			left = atomic64_read(&event->hw.period_left);
+			if (left < 0x80000000L)
+				val = 0x80000000L - left;
+		}
+		atomic64_set(&event->hw.prev_count, val);
+		event->hw.idx = idx;
+		write_pmc(idx, val);
+		perf_event_update_userpage(event);
+	}
+	cpuhw->n_limited = n_lim;
+	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+
+ out_enable:
+	mb();
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	/*
+	 * Enable instruction sampling if necessary
+	 */
+	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+		mb();
+		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+	}
+
+ out:
+	local_irq_restore(flags);
+}
+
+static int collect_events(struct perf_event *group, int max_count,
+			  struct perf_event *ctrs[], u64 *events,
+			  unsigned int *flags)
+{
+	int n = 0;
+	struct perf_event *event;
+
+	if (!is_software_event(group)) {
+		if (n >= max_count)
+			return -1;
+		ctrs[n] = group;
+		flags[n] = group->hw.event_base;
+		events[n++] = group->hw.config;
+	}
+	list_for_each_entry(event, &group->sibling_list, list_entry) {
+		if (!is_software_event(event) &&
+		    event->state != PERF_EVENT_STATE_OFF) {
+			if (n >= max_count)
+				return -1;
+			ctrs[n] = event;
+			flags[n] = event->hw.event_base;
+			events[n++] = event->hw.config;
+		}
+	}
+	return n;
+}
+
+static void event_sched_in(struct perf_event *event, int cpu)
+{
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;
+	event->tstamp_running += event->ctx->time - event->tstamp_stopped;
+	if (is_software_event(event))
+		event->pmu->enable(event);
+}
+
+/*
+ * Called to enable a whole group of events.
+ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
+ * Assumes the caller has disabled interrupts and has
+ * frozen the PMU with hw_perf_save_disable.
+ */
+int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	struct cpu_hw_events *cpuhw;
+	long i, n, n0;
+	struct perf_event *sub;
+
+	if (!ppmu)
+		return 0;
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	n = collect_events(group_leader, ppmu->n_event - n0,
+			   &cpuhw->event[n0], &cpuhw->events[n0],
+			   &cpuhw->flags[n0]);
+	if (n < 0)
+		return -EAGAIN;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, n))
+		return -EAGAIN;
+	i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0);
+	if (i < 0)
+		return -EAGAIN;
+	cpuhw->n_events = n0 + n;
+	cpuhw->n_added += n;
+
+	/*
+	 * OK, this group can go on; update event states etc.,
+	 * and enable any software events
+	 */
+	for (i = n0; i < n0 + n; ++i)
+		cpuhw->event[i]->hw.config = cpuhw->events[i];
+	cpuctx->active_oncpu += n;
+	n = 1;
+	event_sched_in(group_leader, cpu);
+	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
+		if (sub->state != PERF_EVENT_STATE_OFF) {
+			event_sched_in(sub, cpu);
+			++n;
+		}
+	}
+	ctx->nr_active += n;
+
+	return 1;
+}
+
+/*
+ * Add a event to the PMU.
+ * If all events are not already frozen, then we disable and
+ * re-enable the PMU in order to get hw_perf_enable to do the
+ * actual work of reconfiguring the PMU.
+ */
+static int power_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	unsigned long flags;
+	int n0;
+	int ret = -EAGAIN;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	/*
+	 * Add the event to the list (if there is room)
+	 * and check whether the total set is still feasible.
+	 */
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	n0 = cpuhw->n_events;
+	if (n0 >= ppmu->n_event)
+		goto out;
+	cpuhw->event[n0] = event;
+	cpuhw->events[n0] = event->hw.config;
+	cpuhw->flags[n0] = event->hw.event_base;
+	if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
+		goto out;
+	if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+		goto out;
+
+	event->hw.config = cpuhw->events[n0];
+	++cpuhw->n_events;
+	++cpuhw->n_added;
+
+	ret = 0;
+ out:
+	perf_enable();
+	local_irq_restore(flags);
+	return ret;
+}
+
+/*
+ * Remove a event from the PMU.
+ */
+static void power_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuhw;
+	long i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	perf_disable();
+
+	power_pmu_read(event);
+
+	cpuhw = &__get_cpu_var(cpu_hw_events);
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		if (event == cpuhw->event[i]) {
+			while (++i < cpuhw->n_events)
+				cpuhw->event[i-1] = cpuhw->event[i];
+			--cpuhw->n_events;
+			ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+			if (event->hw.idx) {
+				write_pmc(event->hw.idx, 0);
+				event->hw.idx = 0;
+			}
+			perf_event_update_userpage(event);
+			break;
+		}
+	}
+	for (i = 0; i < cpuhw->n_limited; ++i)
+		if (event == cpuhw->limited_event[i])
+			break;
+	if (i < cpuhw->n_limited) {
+		while (++i < cpuhw->n_limited) {
+			cpuhw->limited_event[i-1] = cpuhw->limited_event[i];
+			cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i];
+		}
+		--cpuhw->n_limited;
+	}
+	if (cpuhw->n_events == 0) {
+		/* disable exceptions if no events are running */
+		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+	}
+
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+/*
+ * Re-enable interrupts on a event after they were throttled
+ * because they were coming too fast.
+ */
+static void power_pmu_unthrottle(struct perf_event *event)
+{
+	s64 val, left;
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+	local_irq_save(flags);
+	perf_disable();
+	power_pmu_read(event);
+	left = event->hw.sample_period;
+	event->hw.last_period = left;
+	val = 0;
+	if (left < 0x80000000L)
+		val = 0x80000000L - left;
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+	perf_enable();
+	local_irq_restore(flags);
+}
+
+struct pmu power_pmu = {
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+};
+
+/*
+ * Return 1 if we might be able to put event on a limited PMC,
+ * or 0 if not.
+ * A event can only go on a limited PMC if it counts something
+ * that a limited PMC can count, doesn't require interrupts, and
+ * doesn't exclude any processor mode.
+ */
+static int can_go_on_limited_pmc(struct perf_event *event, u64 ev,
+				 unsigned int flags)
+{
+	int n;
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+
+	if (event->attr.exclude_user
+	    || event->attr.exclude_kernel
+	    || event->attr.exclude_hv
+	    || event->attr.sample_period)
+		return 0;
+
+	if (ppmu->limited_pmc_event(ev))
+		return 1;
+
+	/*
+	 * The requested event_id isn't on a limited PMC already;
+	 * see if any alternative code goes on a limited PMC.
+	 */
+	if (!ppmu->get_alternatives)
+		return 0;
+
+	flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD;
+	n = ppmu->get_alternatives(ev, flags, alt);
+
+	return n > 0;
+}
+
+/*
+ * Find an alternative event_id that goes on a normal PMC, if possible,
+ * and return the event_id code, or 0 if there is no such alternative.
+ * (Note: event_id code 0 is "don't count" on all machines.)
+ */
+static u64 normal_pmc_alternative(u64 ev, unsigned long flags)
+{
+	u64 alt[MAX_EVENT_ALTERNATIVES];
+	int n;
+
+	flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD);
+	n = ppmu->get_alternatives(ev, flags, alt);
+	if (!n)
+		return 0;
+	return alt[0];
+}
+
+/* Number of perf_events counting hardware events */
+static atomic_t num_events;
+/* Used to avoid races in calling reserve/release_pmc_hardware */
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+/*
+ * Release the PMU if this is the last perf_event.
+ */
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (!atomic_add_unless(&num_events, -1, 1)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_dec_return(&num_events) == 0)
+			release_pmc_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+/*
+ * Translate a generic cache event_id config to a raw event_id code.
+ */
+static int hw_perf_cache_event(u64 config, u64 *eventp)
+{
+	unsigned long type, op, result;
+	int ev;
+
+	if (!ppmu->cache_events)
+		return -EINVAL;
+
+	/* unpack config */
+	type = config & 0xff;
+	op = (config >> 8) & 0xff;
+	result = (config >> 16) & 0xff;
+
+	if (type >= PERF_COUNT_HW_CACHE_MAX ||
+	    op >= PERF_COUNT_HW_CACHE_OP_MAX ||
+	    result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	ev = (*ppmu->cache_events)[type][op][result];
+	if (ev == 0)
+		return -EOPNOTSUPP;
+	if (ev == -1)
+		return -EINVAL;
+	*eventp = ev;
+	return 0;
+}
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	u64 ev;
+	unsigned long flags;
+	struct perf_event *ctrs[MAX_HWEVENTS];
+	u64 events[MAX_HWEVENTS];
+	unsigned int cflags[MAX_HWEVENTS];
+	int n;
+	int err;
+	struct cpu_hw_events *cpuhw;
+
+	if (!ppmu)
+		return ERR_PTR(-ENXIO);
+	switch (event->attr.type) {
+	case PERF_TYPE_HARDWARE:
+		ev = event->attr.config;
+		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
+			return ERR_PTR(-EOPNOTSUPP);
+		ev = ppmu->generic_events[ev];
+		break;
+	case PERF_TYPE_HW_CACHE:
+		err = hw_perf_cache_event(event->attr.config, &ev);
+		if (err)
+			return ERR_PTR(err);
+		break;
+	case PERF_TYPE_RAW:
+		ev = event->attr.config;
+		break;
+	default:
+		return ERR_PTR(-EINVAL);
+	}
+	event->hw.config_base = ev;
+	event->hw.idx = 0;
+
+	/*
+	 * If we are not running on a hypervisor, force the
+	 * exclude_hv bit to 0 so that we don't care what
+	 * the user set it to.
+	 */
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		event->attr.exclude_hv = 0;
+
+	/*
+	 * If this is a per-task event, then we can use
+	 * PM_RUN_* events interchangeably with their non RUN_*
+	 * equivalents, e.g. PM_RUN_CYC instead of PM_CYC.
+	 * XXX we should check if the task is an idle task.
+	 */
+	flags = 0;
+	if (event->ctx->task)
+		flags |= PPMU_ONLY_COUNT_RUN;
+
+	/*
+	 * If this machine has limited events, check whether this
+	 * event_id could go on a limited event.
+	 */
+	if (ppmu->flags & PPMU_LIMITED_PMC5_6) {
+		if (can_go_on_limited_pmc(event, ev, flags)) {
+			flags |= PPMU_LIMITED_PMC_OK;
+		} else if (ppmu->limited_pmc_event(ev)) {
+			/*
+			 * The requested event_id is on a limited PMC,
+			 * but we can't use a limited PMC; see if any
+			 * alternative goes on a normal PMC.
+			 */
+			ev = normal_pmc_alternative(ev, flags);
+			if (!ev)
+				return ERR_PTR(-EINVAL);
+		}
+	}
+
+	/*
+	 * If this is in a group, check if it can go on with all the
+	 * other hardware events in the group.  We assume the event
+	 * hasn't been linked into its leader's sibling list at this point.
+	 */
+	n = 0;
+	if (event->group_leader != event) {
+		n = collect_events(event->group_leader, ppmu->n_event - 1,
+				   ctrs, events, cflags);
+		if (n < 0)
+			return ERR_PTR(-EINVAL);
+	}
+	events[n] = ev;
+	ctrs[n] = event;
+	cflags[n] = flags;
+	if (check_excludes(ctrs, cflags, n, 1))
+		return ERR_PTR(-EINVAL);
+
+	cpuhw = &get_cpu_var(cpu_hw_events);
+	err = power_check_constraints(cpuhw, events, cflags, n + 1);
+	put_cpu_var(cpu_hw_events);
+	if (err)
+		return ERR_PTR(-EINVAL);
+
+	event->hw.config = events[n];
+	event->hw.event_base = cflags[n];
+	event->hw.last_period = event->hw.sample_period;
+	atomic64_set(&event->hw.period_left, event->hw.last_period);
+
+	/*
+	 * See if we need to reserve the PMU.
+	 * If no events are currently in use, then we have to take a
+	 * mutex to ensure that we don't race with another task doing
+	 * reserve_pmc_hardware or release_pmc_hardware.
+	 */
+	err = 0;
+	if (!atomic_inc_not_zero(&num_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&num_events) == 0 &&
+		    reserve_pmc_hardware(perf_event_interrupt))
+			err = -EBUSY;
+		else
+			atomic_inc(&num_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	event->destroy = hw_perf_event_destroy;
+
+	if (err)
+		return ERR_PTR(err);
+	return &power_pmu;
+}
+
+/*
+ * A event has overflowed; update its count and record
+ * things if requested.  Note that interrupts are hard-disabled
+ * here so there is no possibility of being interrupted.
+ */
+static void record_and_restart(struct perf_event *event, unsigned long val,
+			       struct pt_regs *regs, int nmi)
+{
+	u64 period = event->hw.sample_period;
+	s64 prev, delta, left;
+	int record = 0;
+
+	/* we don't have to worry about interrupts here */
+	prev = atomic64_read(&event->hw.prev_count);
+	delta = (val - prev) & 0xfffffffful;
+	atomic64_add(delta, &event->count);
+
+	/*
+	 * See if the total period for this event has expired,
+	 * and update for the next period.
+	 */
+	val = 0;
+	left = atomic64_read(&event->hw.period_left) - delta;
+	if (period) {
+		if (left <= 0) {
+			left += period;
+			if (left <= 0)
+				left = period;
+			record = 1;
+		}
+		if (left < 0x80000000LL)
+			val = 0x80000000LL - left;
+	}
+
+	/*
+	 * Finally record data if requested.
+	 */
+	if (record) {
+		struct perf_sample_data data = {
+			.addr	= 0,
+			.period	= event->hw.last_period,
+		};
+
+		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
+			perf_get_data_addr(regs, &data.addr);
+
+		if (perf_event_overflow(event, nmi, &data, regs)) {
+			/*
+			 * Interrupts are coming too fast - throttle them
+			 * by setting the event to 0, so it will be
+			 * at least 2^30 cycles until the next interrupt
+			 * (assuming each event counts at most 2 counts
+			 * per cycle).
+			 */
+			val = 0;
+			left = ~0ULL >> 1;
+		}
+	}
+
+	write_pmc(event->hw.idx, val);
+	atomic64_set(&event->hw.prev_count, val);
+	atomic64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Called from generic code to get the misc flags (i.e. processor mode)
+ * for an event_id.
+ */
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	u32 flags = perf_get_misc_flags(regs);
+
+	if (flags)
+		return flags;
+	return user_mode(regs) ? PERF_RECORD_MISC_USER :
+		PERF_RECORD_MISC_KERNEL;
+}
+
+/*
+ * Called from generic code to get the instruction pointer
+ * for an event_id.
+ */
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long ip;
+
+	if (TRAP(regs) != 0xf00)
+		return regs->nip;	/* not a PMU interrupt */
+
+	ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
+	return ip;
+}
+
+/*
+ * Performance monitor interrupt stuff
+ */
+static void perf_event_interrupt(struct pt_regs *regs)
+{
+	int i;
+	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+	struct perf_event *event;
+	unsigned long val;
+	int found = 0;
+	int nmi;
+
+	if (cpuhw->n_limited)
+		freeze_limited_events(cpuhw, mfspr(SPRN_PMC5),
+					mfspr(SPRN_PMC6));
+
+	perf_read_regs(regs);
+
+	nmi = perf_intr_is_nmi(regs);
+	if (nmi)
+		nmi_enter();
+	else
+		irq_enter();
+
+	for (i = 0; i < cpuhw->n_events; ++i) {
+		event = cpuhw->event[i];
+		if (!event->hw.idx || is_limited_pmc(event->hw.idx))
+			continue;
+		val = read_pmc(event->hw.idx);
+		if ((int)val < 0) {
+			/* event has overflowed */
+			found = 1;
+			record_and_restart(event, val, regs, nmi);
+		}
+	}
+
+	/*
+	 * In case we didn't find and reset the event that caused
+	 * the interrupt, scan all events and reset any that are
+	 * negative, to avoid getting continual interrupts.
+	 * Any that we processed in the previous loop will not be negative.
+	 */
+	if (!found) {
+		for (i = 0; i < ppmu->n_event; ++i) {
+			if (is_limited_pmc(i + 1))
+				continue;
+			val = read_pmc(i + 1);
+			if ((int)val < 0)
+				write_pmc(i + 1, 0);
+		}
+	}
+
+	/*
+	 * Reset MMCR0 to its normal value.  This will set PMXE and
+	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * and thus allow interrupts to occur again.
+	 * XXX might want to use MSR.PM to keep the events frozen until
+	 * we get back out of this interrupt.
+	 */
+	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+
+	if (nmi)
+		nmi_exit();
+	else
+		irq_exit();
+}
+
+void hw_perf_event_setup(int cpu)
+{
+	struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+
+	if (!ppmu)
+		return;
+	memset(cpuhw, 0, sizeof(*cpuhw));
+	cpuhw->mmcr[0] = MMCR0_FC;
+}
+
+int register_power_pmu(struct power_pmu *pmu)
+{
+	if (ppmu)
+		return -EBUSY;		/* something's already registered */
+
+	ppmu = pmu;
+	pr_info("%s performance monitor hardware support registered\n",
+		pmu->name);
+
+#ifdef MSR_HV
+	/*
+	 * Use FCHV to ignore kernel events if MSR.HV is set.
+	 */
+	if (mfmsr() & MSR_HV)
+		freeze_events_kernel = MMCR0_FCHV;
+#endif /* CONFIG_PPC64 */
+
+	return 0;
+}
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c
index 3c90a3d9173e..2a361cdda635 100644
--- a/arch/powerpc/kernel/power4-pmu.c
+++ b/arch/powerpc/kernel/power4-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c
index 31918af3e355..0f4c1c73a6ad 100644
--- a/arch/powerpc/kernel/power5+-pmu.c
+++ b/arch/powerpc/kernel/power5+-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c
index 867f6f663963..c351b3a57fbb 100644
--- a/arch/powerpc/kernel/power5-pmu.c
+++ b/arch/powerpc/kernel/power5-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c
index fa21890531da..ca399ba5034c 100644
--- a/arch/powerpc/kernel/power6-pmu.c
+++ b/arch/powerpc/kernel/power6-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c
index 018d094d92f9..28a4daacdc02 100644
--- a/arch/powerpc/kernel/power7-pmu.c
+++ b/arch/powerpc/kernel/power7-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/kernel.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/string.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 75dccb71a043..479574413a93 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -9,7 +9,7 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include <linux/string.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/reg.h>
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 465e498bcb33..df45a7449a66 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
@@ -527,25 +527,25 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32)
-DEFINE_PER_CPU(u8, perf_counter_pending);
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32)
+DEFINE_PER_CPU(u8, perf_event_pending);
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
-	get_cpu_var(perf_counter_pending) = 1;
+	get_cpu_var(perf_event_pending) = 1;
 	set_dec(1);
-	put_cpu_var(perf_counter_pending);
+	put_cpu_var(perf_event_pending);
 }
 
-#define test_perf_counter_pending()	__get_cpu_var(perf_counter_pending)
-#define clear_perf_counter_pending()	__get_cpu_var(perf_counter_pending) = 0
+#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
+#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
 
-#else  /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#else  /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
-#define test_perf_counter_pending()	0
-#define clear_perf_counter_pending()
+#define test_perf_event_pending()	0
+#define clear_perf_event_pending()
 
-#endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */
+#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -573,9 +573,9 @@ void timer_interrupt(struct pt_regs * regs)
 	set_dec(DECREMENTER_MAX);
 
 #ifdef CONFIG_PPC32
-	if (test_perf_counter_pending()) {
-		clear_perf_counter_pending();
-		perf_counter_do_pending();
+	if (test_perf_event_pending()) {
+		clear_perf_event_pending();
+		perf_event_do_pending();
 	}
 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
 		do_IRQ(regs);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 830bef0a1131..e7dae82c1285 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -29,7 +29,7 @@
 #include <linux/module.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 		die("Weird page fault", regs, SIGSEGV);
 	}
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/* When running in the kernel we expect faults to occur only to
 	 * addresses in user space.  All other faults represent errors in the
@@ -312,7 +312,7 @@ good_area:
 	}
 	if (ret & VM_FAULT_MAJOR) {
 		current->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 #ifdef CONFIG_PPC_SMLPAR
 		if (firmware_has_feature(FW_FEATURE_CMO)) {
@@ -323,7 +323,7 @@ good_area:
 #endif
 	} else {
 		current->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 	up_read(&mm->mmap_sem);
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 9efc8bda01b4..e382cae678b8 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -280,9 +280,9 @@ config PPC_HAVE_PMU_SUPPORT
 
 config PPC_PERF_CTRS
        def_bool y
-       depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
        help
-         This enables the powerpc-specific perf_counter back-end.
+         This enables the powerpc-specific perf_event back-end.
 
 config SMP
 	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1c866efd217d..43c0acad7160 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -94,7 +94,7 @@ config S390
 	select HAVE_KVM if 64BIT
 	select HAVE_ARCH_TRACEHOOK
 	select INIT_ALL_POSSIBLE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config SCHED_OMIT_FRAME_POINTER
 	bool
diff --git a/arch/s390/include/asm/perf_counter.h b/arch/s390/include/asm/perf_counter.h
deleted file mode 100644
index 7015188c2cc2..000000000000
--- a/arch/s390/include/asm/perf_counter.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/*
- * Performance counter support - s390 specific definitions.
- *
- * Copyright 2009 Martin Schwidefsky, IBM Corporation.
- */
-
-static inline void set_perf_counter_pending(void) {}
-static inline void clear_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
new file mode 100644
index 000000000000..3840cbe77637
--- /dev/null
+++ b/arch/s390/include/asm/perf_event.h
@@ -0,0 +1,10 @@
+/*
+ * Performance event support - s390 specific definitions.
+ *
+ * Copyright 2009 Martin Schwidefsky, IBM Corporation.
+ */
+
+static inline void set_perf_event_pending(void) {}
+static inline void clear_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index c80602d7c880..cb5232df151e 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -268,7 +268,7 @@
 #define	__NR_preadv		328
 #define	__NR_pwritev		329
 #define __NR_rt_tgsigqueueinfo	330
-#define __NR_perf_counter_open	331
+#define __NR_perf_event_open	331
 #define NR_syscalls 332
 
 /* 
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 88a83366819f..624790042d41 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1832,11 +1832,11 @@ compat_sys_rt_tgsigqueueinfo_wrapper:
 	llgtr	%r5,%r5			# struct compat_siginfo *
 	jg	compat_sys_rt_tgsigqueueinfo_wrapper # branch to system call
 
-	.globl	sys_perf_counter_open_wrapper
-sys_perf_counter_open_wrapper:
-	llgtr	%r2,%r2			# const struct perf_counter_attr *
+	.globl	sys_perf_event_open_wrapper
+sys_perf_event_open_wrapper:
+	llgtr	%r2,%r2			# const struct perf_event_attr *
 	lgfr	%r3,%r3			# pid_t
 	lgfr	%r4,%r4			# int
 	lgfr	%r5,%r5			# int
 	llgfr	%r6,%r6			# unsigned long
-	jg	sys_perf_counter_open	# branch to system call
+	jg	sys_perf_event_open	# branch to system call
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index ad1acd200385..0b5083681e77 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -339,4 +339,4 @@ SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
 SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
 SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
-SYSCALL(sys_perf_counter_open,sys_perf_counter_open,sys_perf_counter_open_wrapper)
+SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 1abbadd497e1..6d507462967a 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -10,7 +10,7 @@
  *    Copyright (C) 1995  Linus Torvalds
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -306,7 +306,7 @@ do_exception(struct pt_regs *regs, unsigned long error_code, int write)
 	 * interrupts again and then search the VMAs
 	 */
 	local_irq_enable();
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
@@ -366,11 +366,11 @@ good_area:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
         up_read(&mm->mmap_sem);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 4df3570fe511..b940424f8ccc 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,7 +16,7 @@ config SUPERH
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/include/asm/perf_counter.h b/arch/sh/include/asm/perf_counter.h
deleted file mode 100644
index d8e6bb9c0ccc..000000000000
--- a/arch/sh/include/asm/perf_counter.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_SH_PERF_COUNTER_H
-#define __ASM_SH_PERF_COUNTER_H
-
-/* SH only supports software counters through this interface. */
-static inline void set_perf_counter_pending(void) {}
-
-#define PERF_COUNTER_INDEX_OFFSET	0
-
-#endif /* __ASM_SH_PERF_COUNTER_H */
diff --git a/arch/sh/include/asm/perf_event.h b/arch/sh/include/asm/perf_event.h
new file mode 100644
index 000000000000..11a302297ab7
--- /dev/null
+++ b/arch/sh/include/asm/perf_event.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_SH_PERF_EVENT_H
+#define __ASM_SH_PERF_EVENT_H
+
+/* SH only supports software events through this interface. */
+static inline void set_perf_event_pending(void) {}
+
+#define PERF_EVENT_INDEX_OFFSET	0
+
+#endif /* __ASM_SH_PERF_EVENT_H */
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index 925dd40d9d55..f3fd1b9eb6b1 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -344,7 +344,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #define NR_syscalls 337
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 2b84bc916bc5..343ce8f073ea 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -384,7 +384,7 @@
 #define __NR_preadv		361
 #define __NR_pwritev		362
 #define __NR_rt_tgsigqueueinfo	363
-#define __NR_perf_counter_open	364
+#define __NR_perf_event_open	364
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index 16ba225ede89..19fd11dd9871 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -352,4 +352,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index af6fb7410c21..5bfde6c77498 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -390,4 +390,4 @@ sys_call_table:
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index 781b413ff82d..47530104e0ad 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/io_trapped.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
@@ -157,7 +157,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
 	if ((regs->sr & SR_IMASK) != SR_IMASK)
 		local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -208,11 +208,11 @@ survive:
 	}
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sh/mm/tlbflush_64.c b/arch/sh/mm/tlbflush_64.c
index 2dcc48528f7a..de0b0e881823 100644
--- a/arch/sh/mm/tlbflush_64.c
+++ b/arch/sh/mm/tlbflush_64.c
@@ -20,7 +20,7 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/interrupt.h>
 #include <asm/system.h>
 #include <asm/io.h>
@@ -116,7 +116,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
 	/* Not an IO address, so reenable interrupts */
 	local_irq_enable();
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt or have no user
@@ -201,11 +201,11 @@ survive:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 86b82348b97c..97fca4695e0b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -25,7 +25,7 @@ config SPARC
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 	select HAVE_DMA_ATTRS
 	select HAVE_DMA_API_DEBUG
 
@@ -47,7 +47,7 @@ config SPARC64
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
 	select RTC_DRV_STARFIRE
-	select HAVE_PERF_COUNTERS
+	select HAVE_PERF_EVENTS
 
 config ARCH_DEFCONFIG
 	string
diff --git a/arch/sparc/include/asm/perf_counter.h b/arch/sparc/include/asm/perf_counter.h
deleted file mode 100644
index 5d7a8ca0e491..000000000000
--- a/arch/sparc/include/asm/perf_counter.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef __ASM_SPARC_PERF_COUNTER_H
-#define __ASM_SPARC_PERF_COUNTER_H
-
-extern void set_perf_counter_pending(void);
-
-#define	PERF_COUNTER_INDEX_OFFSET	0
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-#else
-static inline void init_hw_perf_counters(void)	{ }
-#endif
-
-#endif
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
new file mode 100644
index 000000000000..7e2669894ce8
--- /dev/null
+++ b/arch/sparc/include/asm/perf_event.h
@@ -0,0 +1,14 @@
+#ifndef __ASM_SPARC_PERF_EVENT_H
+#define __ASM_SPARC_PERF_EVENT_H
+
+extern void set_perf_event_pending(void);
+
+#define	PERF_EVENT_INDEX_OFFSET	0
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+#else
+static inline void init_hw_perf_events(void)	{ }
+#endif
+
+#endif
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 706df669f3b8..42f2316c3eaa 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -395,7 +395,7 @@
 #define __NR_preadv		324
 #define __NR_pwritev		325
 #define __NR_rt_tgsigqueueinfo	326
-#define __NR_perf_counter_open	327
+#define __NR_perf_event_open	327
 
 #define NR_SYSCALLS		328
 
diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile
index 247cc620cee5..3a048fad7ee2 100644
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -104,5 +104,5 @@ obj-$(CONFIG_AUDIT)     += audit.o
 audit--$(CONFIG_AUDIT)  := compat_audit.o
 obj-$(CONFIG_COMPAT)    += $(audit--y)
 
-pc--$(CONFIG_PERF_COUNTERS) := perf_counter.o
+pc--$(CONFIG_PERF_EVENTS) := perf_event.o
 obj-$(CONFIG_SPARC64)	+= $(pc--y)
diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c
index 378eb53e0776..b129611590a4 100644
--- a/arch/sparc/kernel/nmi.c
+++ b/arch/sparc/kernel/nmi.c
@@ -19,7 +19,7 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/ptrace.h>
 #include <asm/local.h>
 #include <asm/pcr.h>
@@ -265,7 +265,7 @@ int __init nmi_init(void)
 		}
 	}
 	if (!err)
-		init_hw_perf_counters();
+		init_hw_perf_events();
 
 	return err;
 }
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 68ff00107073..2d94e7a03af5 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pil.h>
 #include <asm/pcr.h>
@@ -15,7 +15,7 @@
 
 /* This code is shared between various users of the performance
  * counters.  Users will be oprofile, pseudo-NMI watchdog, and the
- * perf_counter support layer.
+ * perf_event support layer.
  */
 
 #define PCR_SUN4U_ENABLE	(PCR_PIC_PRIV | PCR_STRACE | PCR_UTRACE)
@@ -42,14 +42,14 @@ void deferred_pcr_work_irq(int irq, struct pt_regs *regs)
 
 	old_regs = set_irq_regs(regs);
 	irq_enter();
-#ifdef CONFIG_PERF_COUNTERS
-	perf_counter_do_pending();
+#ifdef CONFIG_PERF_EVENTS
+	perf_event_do_pending();
 #endif
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
 	set_softint(1 << PIL_DEFERRED_PCR_WORK);
 }
diff --git a/arch/sparc/kernel/perf_counter.c b/arch/sparc/kernel/perf_counter.c
deleted file mode 100644
index b1265ce8a053..000000000000
--- a/arch/sparc/kernel/perf_counter.c
+++ /dev/null
@@ -1,556 +0,0 @@
-/* Performance counter support for sparc64.
- *
- * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
- *
- * This code is based almost entirely upon the x86 perf counter
- * code, which is:
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- */
-
-#include <linux/perf_counter.h>
-#include <linux/kprobes.h>
-#include <linux/kernel.h>
-#include <linux/kdebug.h>
-#include <linux/mutex.h>
-
-#include <asm/cpudata.h>
-#include <asm/atomic.h>
-#include <asm/nmi.h>
-#include <asm/pcr.h>
-
-/* Sparc64 chips have two performance counters, 32-bits each, with
- * overflow interrupts generated on transition from 0xffffffff to 0.
- * The counters are accessed in one go using a 64-bit register.
- *
- * Both counters are controlled using a single control register.  The
- * only way to stop all sampling is to clear all of the context (user,
- * supervisor, hypervisor) sampling enable bits.  But these bits apply
- * to both counters, thus the two counters can't be enabled/disabled
- * individually.
- *
- * The control register has two event fields, one for each of the two
- * counters.  It's thus nearly impossible to have one counter going
- * while keeping the other one stopped.  Therefore it is possible to
- * get overflow interrupts for counters not currently "in use" and
- * that condition must be checked in the overflow interrupt handler.
- *
- * So we use a hack, in that we program inactive counters with the
- * "sw_count0" and "sw_count1" events.  These count how many times
- * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
- * unusual way to encode a NOP and therefore will not trigger in
- * normal code.
- */
-
-#define MAX_HWCOUNTERS			2
-#define MAX_PERIOD			((1UL << 32) - 1)
-
-#define PIC_UPPER_INDEX			0
-#define PIC_LOWER_INDEX			1
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[MAX_HWCOUNTERS];
-	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWCOUNTERS)];
-	int enabled;
-};
-DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { .enabled = 1, };
-
-struct perf_event_map {
-	u16	encoding;
-	u8	pic_mask;
-#define PIC_NONE	0x00
-#define PIC_UPPER	0x01
-#define PIC_LOWER	0x02
-};
-
-struct sparc_pmu {
-	const struct perf_event_map	*(*event_map)(int);
-	int				max_events;
-	int				upper_shift;
-	int				lower_shift;
-	int				event_mask;
-	int				hv_bit;
-	int				irq_bit;
-	int				upper_nop;
-	int				lower_nop;
-};
-
-static const struct perf_event_map ultra3i_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
-};
-
-static const struct perf_event_map *ultra3i_event_map(int event)
-{
-	return &ultra3i_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu ultra3i_pmu = {
-	.event_map	= ultra3i_event_map,
-	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
-	.upper_shift	= 11,
-	.lower_shift	= 4,
-	.event_mask	= 0x3f,
-	.upper_nop	= 0x1c,
-	.lower_nop	= 0x14,
-};
-
-static const struct perf_event_map niagara2_perfmon_event_map[] = {
-	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
-	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
-};
-
-static const struct perf_event_map *niagara2_event_map(int event)
-{
-	return &niagara2_perfmon_event_map[event];
-}
-
-static const struct sparc_pmu niagara2_pmu = {
-	.event_map	= niagara2_event_map,
-	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
-	.upper_shift	= 19,
-	.lower_shift	= 6,
-	.event_mask	= 0xfff,
-	.hv_bit		= 0x8,
-	.irq_bit	= 0x03,
-	.upper_nop	= 0x220,
-	.lower_nop	= 0x220,
-};
-
-static const struct sparc_pmu *sparc_pmu __read_mostly;
-
-static u64 event_encoding(u64 event, int idx)
-{
-	if (idx == PIC_UPPER_INDEX)
-		event <<= sparc_pmu->upper_shift;
-	else
-		event <<= sparc_pmu->lower_shift;
-	return event;
-}
-
-static u64 mask_for_index(int idx)
-{
-	return event_encoding(sparc_pmu->event_mask, idx);
-}
-
-static u64 nop_for_index(int idx)
-{
-	return event_encoding(idx == PIC_UPPER_INDEX ?
-			      sparc_pmu->upper_nop :
-			      sparc_pmu->lower_nop, idx);
-}
-
-static inline void sparc_pmu_enable_counter(struct hw_perf_counter *hwc,
-					    int idx)
-{
-	u64 val, mask = mask_for_index(idx);
-
-	val = pcr_ops->read();
-	pcr_ops->write((val & ~mask) | hwc->config);
-}
-
-static inline void sparc_pmu_disable_counter(struct hw_perf_counter *hwc,
-					     int idx)
-{
-	u64 mask = mask_for_index(idx);
-	u64 nop = nop_for_index(idx);
-	u64 val = pcr_ops->read();
-
-	pcr_ops->write((val & ~mask) | nop);
-}
-
-void hw_perf_enable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-	int i;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	val = pcr_ops->read();
-
-	for (i = 0; i < MAX_HWCOUNTERS; i++) {
-		struct perf_counter *cp = cpuc->counters[i];
-		struct hw_perf_counter *hwc;
-
-		if (!cp)
-			continue;
-		hwc = &cp->hw;
-		val |= hwc->config_base;
-	}
-
-	pcr_ops->write(val);
-}
-
-void hw_perf_disable(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-
-	val = pcr_ops->read();
-	val &= ~(PCR_UTRACE | PCR_STRACE |
-		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
-	pcr_ops->write(val);
-}
-
-static u32 read_pmc(int idx)
-{
-	u64 val;
-
-	read_pic(val);
-	if (idx == PIC_UPPER_INDEX)
-		val >>= 32;
-
-	return val & 0xffffffff;
-}
-
-static void write_pmc(int idx, u64 val)
-{
-	u64 shift, mask, pic;
-
-	shift = 0;
-	if (idx == PIC_UPPER_INDEX)
-		shift = 32;
-
-	mask = ((u64) 0xffffffff) << shift;
-	val <<= shift;
-
-	read_pic(pic);
-	pic &= ~mask;
-	pic |= val;
-	write_pic(pic);
-}
-
-static int sparc_perf_counter_set_period(struct perf_counter *counter,
-					 struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int ret = 0;
-
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	if (left > MAX_PERIOD)
-		left = MAX_PERIOD;
-
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	write_pmc(idx, (u64)(-left) & 0xffffffff);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static int sparc_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	if (test_and_set_bit(idx, cpuc->used_mask))
-		return -EAGAIN;
-
-	sparc_pmu_disable_counter(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	sparc_perf_counter_set_period(counter, hwc, idx);
-	sparc_pmu_enable_counter(hwc, idx);
-	perf_counter_update_userpage(counter);
-	return 0;
-}
-
-static u64 sparc_perf_counter_update(struct perf_counter *counter,
-				     struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - 32;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	new_raw_count = read_pmc(idx);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			     new_raw_count) != prev_raw_count)
-		goto again;
-
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static void sparc_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	clear_bit(idx, cpuc->active_mask);
-	sparc_pmu_disable_counter(hwc, idx);
-
-	barrier();
-
-	sparc_perf_counter_update(counter, hwc, idx);
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-static void sparc_pmu_read(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_perf_counter_update(counter, hwc, hwc->idx);
-}
-
-static void sparc_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	sparc_pmu_enable_counter(hwc, hwc->idx);
-}
-
-static atomic_t active_counters = ATOMIC_INIT(0);
-static DEFINE_MUTEX(pmc_grab_mutex);
-
-void perf_counter_grab_pmc(void)
-{
-	if (atomic_inc_not_zero(&active_counters))
-		return;
-
-	mutex_lock(&pmc_grab_mutex);
-	if (atomic_read(&active_counters) == 0) {
-		if (atomic_read(&nmi_active) > 0) {
-			on_each_cpu(stop_nmi_watchdog, NULL, 1);
-			BUG_ON(atomic_read(&nmi_active) != 0);
-		}
-		atomic_inc(&active_counters);
-	}
-	mutex_unlock(&pmc_grab_mutex);
-}
-
-void perf_counter_release_pmc(void)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_grab_mutex)) {
-		if (atomic_read(&nmi_active) == 0)
-			on_each_cpu(start_nmi_watchdog, NULL, 1);
-		mutex_unlock(&pmc_grab_mutex);
-	}
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	perf_counter_release_pmc();
-}
-
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	const struct perf_event_map *pmap;
-	u64 enc;
-
-	if (atomic_read(&nmi_active) < 0)
-		return -ENODEV;
-
-	if (attr->type != PERF_TYPE_HARDWARE)
-		return -EOPNOTSUPP;
-
-	if (attr->config >= sparc_pmu->max_events)
-		return -EINVAL;
-
-	perf_counter_grab_pmc();
-	counter->destroy = hw_perf_counter_destroy;
-
-	/* We save the enable bits in the config_base.  So to
-	 * turn off sampling just write 'config', and to enable
-	 * things write 'config | config_base'.
-	 */
-	hwc->config_base = sparc_pmu->irq_bit;
-	if (!attr->exclude_user)
-		hwc->config_base |= PCR_UTRACE;
-	if (!attr->exclude_kernel)
-		hwc->config_base |= PCR_STRACE;
-	if (!attr->exclude_hv)
-		hwc->config_base |= sparc_pmu->hv_bit;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = MAX_PERIOD;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	}
-
-	pmap = sparc_pmu->event_map(attr->config);
-
-	enc = pmap->encoding;
-	if (pmap->pic_mask & PIC_UPPER) {
-		hwc->idx = PIC_UPPER_INDEX;
-		enc <<= sparc_pmu->upper_shift;
-	} else {
-		hwc->idx = PIC_LOWER_INDEX;
-		enc <<= sparc_pmu->lower_shift;
-	}
-
-	hwc->config |= enc;
-	return 0;
-}
-
-static const struct pmu pmu = {
-	.enable		= sparc_pmu_enable,
-	.disable	= sparc_pmu_disable,
-	.read		= sparc_pmu_read,
-	.unthrottle	= sparc_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err = __hw_perf_counter_init(counter);
-
-	if (err)
-		return ERR_PTR(err);
-	return &pmu;
-}
-
-void perf_counter_print_debug(void)
-{
-	unsigned long flags;
-	u64 pcr, pic;
-	int cpu;
-
-	if (!sparc_pmu)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-
-	pcr = pcr_ops->read();
-	read_pic(pic);
-
-	pr_info("\n");
-	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
-		cpu, pcr, pic);
-
-	local_irq_restore(flags);
-}
-
-static int __kprobes perf_counter_nmi_handler(struct notifier_block *self,
-					      unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct pt_regs *regs;
-	int idx;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-	for (idx = 0; idx < MAX_HWCOUNTERS; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		struct hw_perf_counter *hwc;
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		hwc = &counter->hw;
-		val = sparc_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << 31))
-			continue;
-
-		data.period = counter->hw.last_period;
-		if (!sparc_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			sparc_pmu_disable_counter(hwc, idx);
-	}
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-};
-
-static bool __init supported_pmu(void)
-{
-	if (!strcmp(sparc_pmu_type, "ultra3i")) {
-		sparc_pmu = &ultra3i_pmu;
-		return true;
-	}
-	if (!strcmp(sparc_pmu_type, "niagara2")) {
-		sparc_pmu = &niagara2_pmu;
-		return true;
-	}
-	return false;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	pr_info("Performance counters: ");
-
-	if (!supported_pmu()) {
-		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
-		return;
-	}
-
-	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
-
-	/* All sparc64 PMUs currently have 2 counters.  But this simple
-	 * driver only supports one active counter at a time.
-	 */
-	perf_max_counters = 1;
-
-	register_die_notifier(&perf_counter_nmi_notifier);
-}
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
new file mode 100644
index 000000000000..2d6a1b10c81d
--- /dev/null
+++ b/arch/sparc/kernel/perf_event.c
@@ -0,0 +1,556 @@
+/* Performance event support for sparc64.
+ *
+ * Copyright (C) 2009 David S. Miller <davem@davemloft.net>
+ *
+ * This code is based almost entirely upon the x86 perf event
+ * code, which is:
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kprobes.h>
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/mutex.h>
+
+#include <asm/cpudata.h>
+#include <asm/atomic.h>
+#include <asm/nmi.h>
+#include <asm/pcr.h>
+
+/* Sparc64 chips have two performance counters, 32-bits each, with
+ * overflow interrupts generated on transition from 0xffffffff to 0.
+ * The counters are accessed in one go using a 64-bit register.
+ *
+ * Both counters are controlled using a single control register.  The
+ * only way to stop all sampling is to clear all of the context (user,
+ * supervisor, hypervisor) sampling enable bits.  But these bits apply
+ * to both counters, thus the two counters can't be enabled/disabled
+ * individually.
+ *
+ * The control register has two event fields, one for each of the two
+ * counters.  It's thus nearly impossible to have one counter going
+ * while keeping the other one stopped.  Therefore it is possible to
+ * get overflow interrupts for counters not currently "in use" and
+ * that condition must be checked in the overflow interrupt handler.
+ *
+ * So we use a hack, in that we program inactive counters with the
+ * "sw_count0" and "sw_count1" events.  These count how many times
+ * the instruction "sethi %hi(0xfc000), %g0" is executed.  It's an
+ * unusual way to encode a NOP and therefore will not trigger in
+ * normal code.
+ */
+
+#define MAX_HWEVENTS			2
+#define MAX_PERIOD			((1UL << 32) - 1)
+
+#define PIC_UPPER_INDEX			0
+#define PIC_LOWER_INDEX			1
+
+struct cpu_hw_events {
+	struct perf_event	*events[MAX_HWEVENTS];
+	unsigned long		used_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	unsigned long		active_mask[BITS_TO_LONGS(MAX_HWEVENTS)];
+	int enabled;
+};
+DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, };
+
+struct perf_event_map {
+	u16	encoding;
+	u8	pic_mask;
+#define PIC_NONE	0x00
+#define PIC_UPPER	0x01
+#define PIC_LOWER	0x02
+};
+
+struct sparc_pmu {
+	const struct perf_event_map	*(*event_map)(int);
+	int				max_events;
+	int				upper_shift;
+	int				lower_shift;
+	int				event_mask;
+	int				hv_bit;
+	int				irq_bit;
+	int				upper_nop;
+	int				lower_nop;
+};
+
+static const struct perf_event_map ultra3i_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x0000, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x0001, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0009, PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0009, PIC_UPPER },
+};
+
+static const struct perf_event_map *ultra3i_event_map(int event_id)
+{
+	return &ultra3i_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu ultra3i_pmu = {
+	.event_map	= ultra3i_event_map,
+	.max_events	= ARRAY_SIZE(ultra3i_perfmon_event_map),
+	.upper_shift	= 11,
+	.lower_shift	= 4,
+	.event_mask	= 0x3f,
+	.upper_nop	= 0x1c,
+	.lower_nop	= 0x14,
+};
+
+static const struct perf_event_map niagara2_perfmon_event_map[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_INSTRUCTIONS] = { 0x02ff, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_REFERENCES] = { 0x0208, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_CACHE_MISSES] = { 0x0302, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x0201, PIC_UPPER | PIC_LOWER },
+	[PERF_COUNT_HW_BRANCH_MISSES] = { 0x0202, PIC_UPPER | PIC_LOWER },
+};
+
+static const struct perf_event_map *niagara2_event_map(int event_id)
+{
+	return &niagara2_perfmon_event_map[event_id];
+}
+
+static const struct sparc_pmu niagara2_pmu = {
+	.event_map	= niagara2_event_map,
+	.max_events	= ARRAY_SIZE(niagara2_perfmon_event_map),
+	.upper_shift	= 19,
+	.lower_shift	= 6,
+	.event_mask	= 0xfff,
+	.hv_bit		= 0x8,
+	.irq_bit	= 0x03,
+	.upper_nop	= 0x220,
+	.lower_nop	= 0x220,
+};
+
+static const struct sparc_pmu *sparc_pmu __read_mostly;
+
+static u64 event_encoding(u64 event_id, int idx)
+{
+	if (idx == PIC_UPPER_INDEX)
+		event_id <<= sparc_pmu->upper_shift;
+	else
+		event_id <<= sparc_pmu->lower_shift;
+	return event_id;
+}
+
+static u64 mask_for_index(int idx)
+{
+	return event_encoding(sparc_pmu->event_mask, idx);
+}
+
+static u64 nop_for_index(int idx)
+{
+	return event_encoding(idx == PIC_UPPER_INDEX ?
+			      sparc_pmu->upper_nop :
+			      sparc_pmu->lower_nop, idx);
+}
+
+static inline void sparc_pmu_enable_event(struct hw_perf_event *hwc,
+					    int idx)
+{
+	u64 val, mask = mask_for_index(idx);
+
+	val = pcr_ops->read();
+	pcr_ops->write((val & ~mask) | hwc->config);
+}
+
+static inline void sparc_pmu_disable_event(struct hw_perf_event *hwc,
+					     int idx)
+{
+	u64 mask = mask_for_index(idx);
+	u64 nop = nop_for_index(idx);
+	u64 val = pcr_ops->read();
+
+	pcr_ops->write((val & ~mask) | nop);
+}
+
+void hw_perf_enable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+	int i;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	val = pcr_ops->read();
+
+	for (i = 0; i < MAX_HWEVENTS; i++) {
+		struct perf_event *cp = cpuc->events[i];
+		struct hw_perf_event *hwc;
+
+		if (!cp)
+			continue;
+		hwc = &cp->hw;
+		val |= hwc->config_base;
+	}
+
+	pcr_ops->write(val);
+}
+
+void hw_perf_disable(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+
+	val = pcr_ops->read();
+	val &= ~(PCR_UTRACE | PCR_STRACE |
+		 sparc_pmu->hv_bit | sparc_pmu->irq_bit);
+	pcr_ops->write(val);
+}
+
+static u32 read_pmc(int idx)
+{
+	u64 val;
+
+	read_pic(val);
+	if (idx == PIC_UPPER_INDEX)
+		val >>= 32;
+
+	return val & 0xffffffff;
+}
+
+static void write_pmc(int idx, u64 val)
+{
+	u64 shift, mask, pic;
+
+	shift = 0;
+	if (idx == PIC_UPPER_INDEX)
+		shift = 32;
+
+	mask = ((u64) 0xffffffff) << shift;
+	val <<= shift;
+
+	read_pic(pic);
+	pic &= ~mask;
+	pic |= val;
+	write_pic(pic);
+}
+
+static int sparc_perf_event_set_period(struct perf_event *event,
+					 struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int ret = 0;
+
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	if (left > MAX_PERIOD)
+		left = MAX_PERIOD;
+
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	write_pmc(idx, (u64)(-left) & 0xffffffff);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static int sparc_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	if (test_and_set_bit(idx, cpuc->used_mask))
+		return -EAGAIN;
+
+	sparc_pmu_disable_event(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	sparc_perf_event_set_period(event, hwc, idx);
+	sparc_pmu_enable_event(hwc, idx);
+	perf_event_update_userpage(event);
+	return 0;
+}
+
+static u64 sparc_perf_event_update(struct perf_event *event,
+				     struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - 32;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	new_raw_count = read_pmc(idx);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+			     new_raw_count) != prev_raw_count)
+		goto again;
+
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static void sparc_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	clear_bit(idx, cpuc->active_mask);
+	sparc_pmu_disable_event(hwc, idx);
+
+	barrier();
+
+	sparc_perf_event_update(event, hwc, idx);
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+static void sparc_pmu_read(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_perf_event_update(event, hwc, hwc->idx);
+}
+
+static void sparc_pmu_unthrottle(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	sparc_pmu_enable_event(hwc, hwc->idx);
+}
+
+static atomic_t active_events = ATOMIC_INIT(0);
+static DEFINE_MUTEX(pmc_grab_mutex);
+
+void perf_event_grab_pmc(void)
+{
+	if (atomic_inc_not_zero(&active_events))
+		return;
+
+	mutex_lock(&pmc_grab_mutex);
+	if (atomic_read(&active_events) == 0) {
+		if (atomic_read(&nmi_active) > 0) {
+			on_each_cpu(stop_nmi_watchdog, NULL, 1);
+			BUG_ON(atomic_read(&nmi_active) != 0);
+		}
+		atomic_inc(&active_events);
+	}
+	mutex_unlock(&pmc_grab_mutex);
+}
+
+void perf_event_release_pmc(void)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_grab_mutex)) {
+		if (atomic_read(&nmi_active) == 0)
+			on_each_cpu(start_nmi_watchdog, NULL, 1);
+		mutex_unlock(&pmc_grab_mutex);
+	}
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	perf_event_release_pmc();
+}
+
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	const struct perf_event_map *pmap;
+	u64 enc;
+
+	if (atomic_read(&nmi_active) < 0)
+		return -ENODEV;
+
+	if (attr->type != PERF_TYPE_HARDWARE)
+		return -EOPNOTSUPP;
+
+	if (attr->config >= sparc_pmu->max_events)
+		return -EINVAL;
+
+	perf_event_grab_pmc();
+	event->destroy = hw_perf_event_destroy;
+
+	/* We save the enable bits in the config_base.  So to
+	 * turn off sampling just write 'config', and to enable
+	 * things write 'config | config_base'.
+	 */
+	hwc->config_base = sparc_pmu->irq_bit;
+	if (!attr->exclude_user)
+		hwc->config_base |= PCR_UTRACE;
+	if (!attr->exclude_kernel)
+		hwc->config_base |= PCR_STRACE;
+	if (!attr->exclude_hv)
+		hwc->config_base |= sparc_pmu->hv_bit;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = MAX_PERIOD;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	}
+
+	pmap = sparc_pmu->event_map(attr->config);
+
+	enc = pmap->encoding;
+	if (pmap->pic_mask & PIC_UPPER) {
+		hwc->idx = PIC_UPPER_INDEX;
+		enc <<= sparc_pmu->upper_shift;
+	} else {
+		hwc->idx = PIC_LOWER_INDEX;
+		enc <<= sparc_pmu->lower_shift;
+	}
+
+	hwc->config |= enc;
+	return 0;
+}
+
+static const struct pmu pmu = {
+	.enable		= sparc_pmu_enable,
+	.disable	= sparc_pmu_disable,
+	.read		= sparc_pmu_read,
+	.unthrottle	= sparc_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err = __hw_perf_event_init(event);
+
+	if (err)
+		return ERR_PTR(err);
+	return &pmu;
+}
+
+void perf_event_print_debug(void)
+{
+	unsigned long flags;
+	u64 pcr, pic;
+	int cpu;
+
+	if (!sparc_pmu)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+
+	pcr = pcr_ops->read();
+	read_pic(pic);
+
+	pr_info("\n");
+	pr_info("CPU#%d: PCR[%016llx] PIC[%016llx]\n",
+		cpu, pcr, pic);
+
+	local_irq_restore(flags);
+}
+
+static int __kprobes perf_event_nmi_handler(struct notifier_block *self,
+					      unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct pt_regs *regs;
+	int idx;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+	for (idx = 0; idx < MAX_HWEVENTS; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		struct hw_perf_event *hwc;
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		hwc = &event->hw;
+		val = sparc_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << 31))
+			continue;
+
+		data.period = event->hw.last_period;
+		if (!sparc_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			sparc_pmu_disable_event(hwc, idx);
+	}
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+};
+
+static bool __init supported_pmu(void)
+{
+	if (!strcmp(sparc_pmu_type, "ultra3i")) {
+		sparc_pmu = &ultra3i_pmu;
+		return true;
+	}
+	if (!strcmp(sparc_pmu_type, "niagara2")) {
+		sparc_pmu = &niagara2_pmu;
+		return true;
+	}
+	return false;
+}
+
+void __init init_hw_perf_events(void)
+{
+	pr_info("Performance events: ");
+
+	if (!supported_pmu()) {
+		pr_cont("No support for PMU type '%s'\n", sparc_pmu_type);
+		return;
+	}
+
+	pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type);
+
+	/* All sparc64 PMUs currently have 2 events.  But this simple
+	 * driver only supports one active event at a time.
+	 */
+	perf_max_events = 1;
+
+	register_die_notifier(&perf_event_nmi_notifier);
+}
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 04181577cb65..0f1658d37490 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,5 @@ sys_call_table:
 /*310*/	.long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 /*315*/	.long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+/*325*/	.long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
 
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 91b06b7f7acf..009825f6e73c 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,7 @@ sys_call_table32:
 /*310*/	.word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
 	.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
-	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open
 
 #endif /* CONFIG_COMPAT */
 
@@ -158,4 +158,4 @@ sys_call_table:
 /*310*/	.word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
 	.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
 /*320*/	.word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_counter_open
+	.word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51c59015b280..e4ff5d1280ca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_COUNTERS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS if (!M386 && !M486)
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bfd1112..74619c4f9fda 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,5 +831,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 5e3f2044f0d3..f5693c81a1db 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h
deleted file mode 100644
index e7b7c938ae27..000000000000
--- a/arch/x86/include/asm/perf_counter.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#ifndef _ASM_X86_PERF_COUNTER_H
-#define _ASM_X86_PERF_COUNTER_H
-
-/*
- * Performance counter hw details:
- */
-
-#define X86_PMC_MAX_GENERIC					8
-#define X86_PMC_MAX_FIXED					3
-
-#define X86_PMC_IDX_GENERIC				        0
-#define X86_PMC_IDX_FIXED				       32
-#define X86_PMC_IDX_MAX					       64
-
-#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
-#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
-
-#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
-#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
-
-#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
-#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
-#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
-#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
-
-/*
- * Includes eventsel and unit mask as well:
- */
-#define ARCH_PERFMON_EVENT_MASK				    0xffff
-
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
-		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
-
-#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
-
-/*
- * Intel "Architectural Performance Monitoring" CPUID
- * detection/enumeration details:
- */
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-union cpuid10_edx {
-	struct {
-		unsigned int num_counters_fixed:4;
-		unsigned int reserved:28;
-	} split;
-	unsigned int full;
-};
-
-
-/*
- * Fixed-purpose performance counters:
- */
-
-/*
- * All 3 fixed-mode PMCs are configured via this single MSR:
- */
-#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
-
-/*
- * The counts are available in three separate MSRs:
- */
-
-/* Instr_Retired.Any: */
-#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
-#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
-
-/* CPU_CLK_Unhalted.Core: */
-#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
-#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
-
-/* CPU_CLK_Unhalted.Ref: */
-#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
-#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
-
-/*
- * We model BTS tracing as another fixed-mode PMC.
- *
- * We choose a value in the middle of the fixed counter range, since lower
- * values are used by actual fixed counters and higher values are used
- * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
- */
-#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
-
-
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(void);
-
-#define PERF_COUNTER_INDEX_OFFSET			0
-
-#else
-static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(void)	{ }
-#endif
-
-#endif /* _ASM_X86_PERF_COUNTER_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
new file mode 100644
index 000000000000..ad7ce3fd5065
--- /dev/null
+++ b/arch/x86/include/asm/perf_event.h
@@ -0,0 +1,108 @@
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
+
+/*
+ * Performance event hw details:
+ */
+
+#define X86_PMC_MAX_GENERIC					8
+#define X86_PMC_MAX_FIXED					3
+
+#define X86_PMC_IDX_GENERIC				        0
+#define X86_PMC_IDX_FIXED				       32
+#define X86_PMC_IDX_MAX					       64
+
+#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
+#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
+
+#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
+#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
+
+#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
+#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
+#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
+#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
+
+/*
+ * Includes eventsel and unit mask as well:
+ */
+#define ARCH_PERFMON_EVENT_MASK				    0xffff
+
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
+#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+
+#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
+
+/*
+ * Intel "Architectural Performance Monitoring" CPUID
+ * detection/enumeration details:
+ */
+union cpuid10_eax {
+	struct {
+		unsigned int version_id:8;
+		unsigned int num_events:8;
+		unsigned int bit_width:8;
+		unsigned int mask_length:8;
+	} split;
+	unsigned int full;
+};
+
+union cpuid10_edx {
+	struct {
+		unsigned int num_events_fixed:4;
+		unsigned int reserved:28;
+	} split;
+	unsigned int full;
+};
+
+
+/*
+ * Fixed-purpose performance events:
+ */
+
+/*
+ * All 3 fixed-mode PMCs are configured via this single MSR:
+ */
+#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
+
+/*
+ * The counts are available in three separate MSRs:
+ */
+
+/* Instr_Retired.Any: */
+#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
+#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
+
+/* CPU_CLK_Unhalted.Core: */
+#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
+#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
+
+/* CPU_CLK_Unhalted.Ref: */
+#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
+#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
+
+/*
+ * We model BTS tracing as another fixed-mode PMC.
+ *
+ * We choose a value in the middle of the fixed event range, since lower
+ * values are used by actual fixed events and higher values are used
+ * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
+ */
+#define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
+
+
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+extern void perf_events_lapic_init(void);
+
+#define PERF_EVENT_INDEX_OFFSET			0
+
+#else
+static inline void init_hw_perf_events(void)		{ }
+static inline void perf_events_lapic_init(void)	{ }
+#endif
+
+#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada61bc8..6fb3c209a7e3 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60de5f7..8d3ad0adbc68 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
 __SYSCALL(__NR_pwritev, sys_pwritev)
 #define __NR_rt_tgsigqueueinfo			297
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open			298
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open			298
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a34601f52987..754174d09deb 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
@@ -35,7 +35,7 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/atomic.h>
@@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_counters_lapic_init();
+	perf_events_lapic_init();
 
 	preempt_disable();
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 8dd30638fe44..68537e957a9b 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2fea97eccf77..cc25c2b4a567 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -869,7 +869,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
deleted file mode 100644
index b1f115696c84..000000000000
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ /dev/null
@@ -1,2298 +0,0 @@
-/*
- * Performance counter x86 architecture code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2009 Jaswinder Singh Rajput
- *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
- *
- *  For licencing details see kernel-base/COPYING
- */
-
-#include <linux/perf_counter.h>
-#include <linux/capability.h>
-#include <linux/notifier.h>
-#include <linux/hardirq.h>
-#include <linux/kprobes.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/sched.h>
-#include <linux/uaccess.h>
-#include <linux/highmem.h>
-#include <linux/cpu.h>
-
-#include <asm/apic.h>
-#include <asm/stacktrace.h>
-#include <asm/nmi.h>
-
-static u64 perf_counter_mask __read_mostly;
-
-/* The maximal number of PEBS counters: */
-#define MAX_PEBS_COUNTERS	4
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE		24
-
-/* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
-
-/* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
-
-
-/*
- * Bits in the debugctlmsr controlling branch tracing.
- */
-#define X86_DEBUGCTL_TR			(1 << 6)
-#define X86_DEBUGCTL_BTS		(1 << 7)
-#define X86_DEBUGCTL_BTINT		(1 << 8)
-#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
-#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_counter_reset[MAX_PEBS_COUNTERS];
-};
-
-struct cpu_hw_counters {
-	struct perf_counter	*counters[X86_PMC_IDX_MAX];
-	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
-	unsigned long		interrupts;
-	int			enabled;
-	struct debug_store	*ds;
-};
-
-/*
- * struct x86_pmu - generic x86 pmu
- */
-struct x86_pmu {
-	const char	*name;
-	int		version;
-	int		(*handle_irq)(struct pt_regs *);
-	void		(*disable_all)(void);
-	void		(*enable_all)(void);
-	void		(*enable)(struct hw_perf_counter *, int);
-	void		(*disable)(struct hw_perf_counter *, int);
-	unsigned	eventsel;
-	unsigned	perfctr;
-	u64		(*event_map)(int);
-	u64		(*raw_event)(u64);
-	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
-	int		counter_bits;
-	u64		counter_mask;
-	int		apic;
-	u64		max_period;
-	u64		intel_ctrl;
-	void		(*enable_bts)(u64 config);
-	void		(*disable_bts)(void);
-};
-
-static struct x86_pmu x86_pmu __read_mostly;
-
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
-	.enabled = 1,
-};
-
-/*
- * Not sure about some of these
- */
-static const u64 p6_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
-};
-
-static u64 p6_pmu_event_map(int hw_event)
-{
-	return p6_perfmon_event_map[hw_event];
-}
-
-/*
- * Counter setting that is specified not to count anything.
- * We use this to effectively disable a counter.
- *
- * L2_RQSTS with 0 MESI unit mask.
- */
-#define P6_NOP_COUNTER			0x0000002EULL
-
-static u64 p6_pmu_raw_event(u64 hw_event)
-{
-#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_COUNTER_MASK		0xFF000000ULL
-
-#define P6_EVNTSEL_MASK			\
-	(P6_EVNTSEL_EVENT_MASK |	\
-	 P6_EVNTSEL_UNIT_MASK  |	\
-	 P6_EVNTSEL_EDGE_MASK  |	\
-	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & P6_EVNTSEL_MASK;
-}
-
-
-/*
- * Intel PerfMon v3. Used on Core2 and later.
- */
-static const u64 intel_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
-};
-
-static u64 intel_pmu_event_map(int hw_event)
-{
-	return intel_perfmon_event_map[hw_event];
-}
-
-/*
- * Generalized hw caching related hw_event table, filled
- * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'hw_event makes no sense on
- * this CPU', any other value means the raw hw_event
- * ID.
- */
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-static u64 __read_mostly hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX];
-
-static const u64 nehalem_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
-		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
-		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0x0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
-		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
-		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 core2_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
-		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static const u64 atom_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
-		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
-		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
-		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
-		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
-		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-static u64 intel_pmu_raw_event(u64 hw_event)
-{
-#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
-#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
-#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
-#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
-
-#define CORE_EVNTSEL_MASK		\
-	(CORE_EVNTSEL_EVENT_MASK |	\
-	 CORE_EVNTSEL_UNIT_MASK  |	\
-	 CORE_EVNTSEL_EDGE_MASK  |	\
-	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & CORE_EVNTSEL_MASK;
-}
-
-static const u64 amd_hw_cache_event_ids
-				[PERF_COUNT_HW_CACHE_MAX]
-				[PERF_COUNT_HW_CACHE_OP_MAX]
-				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
-{
- [ C(L1D) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
-		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
-	},
- },
- [ C(L1I ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
-		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(LL  ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
-		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(DTLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
-		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0,
-		[ C(RESULT_MISS)   ] = 0,
-	},
- },
- [ C(ITLB) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
-		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
- [ C(BPU ) ] = {
-	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
-		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
-	},
-	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
-	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = -1,
-		[ C(RESULT_MISS)   ] = -1,
-	},
- },
-};
-
-/*
- * AMD Performance Monitor K7 and later.
- */
-static const u64 amd_perfmon_event_map[] =
-{
-  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
-  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
-  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
-  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
-  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
-  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
-};
-
-static u64 amd_pmu_event_map(int hw_event)
-{
-	return amd_perfmon_event_map[hw_event];
-}
-
-static u64 amd_pmu_raw_event(u64 hw_event)
-{
-#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
-#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
-#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
-#define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
-
-#define K7_EVNTSEL_MASK			\
-	(K7_EVNTSEL_EVENT_MASK |	\
-	 K7_EVNTSEL_UNIT_MASK  |	\
-	 K7_EVNTSEL_EDGE_MASK  |	\
-	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_COUNTER_MASK)
-
-	return hw_event & K7_EVNTSEL_MASK;
-}
-
-/*
- * Propagate counter elapsed time into the generic counter.
- * Can only be executed on the CPU where the counter is active.
- * Returns the delta events processed.
- */
-static u64
-x86_perf_counter_update(struct perf_counter *counter,
-			struct hw_perf_counter *hwc, int idx)
-{
-	int shift = 64 - x86_pmu.counter_bits;
-	u64 prev_raw_count, new_raw_count;
-	s64 delta;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * Careful: an NMI might modify the previous counter value.
-	 *
-	 * Our tactic to handle this is to first atomically read and
-	 * exchange a new raw count - then add that new-prev delta
-	 * count to the generic counter atomically:
-	 */
-again:
-	prev_raw_count = atomic64_read(&hwc->prev_count);
-	rdmsrl(hwc->counter_base + idx, new_raw_count);
-
-	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
-					new_raw_count) != prev_raw_count)
-		goto again;
-
-	/*
-	 * Now we have the new raw value and have updated the prev
-	 * timestamp already. We can now calculate the elapsed delta
-	 * (counter-)time and add that to the generic counter.
-	 *
-	 * Careful, not all hw sign-extends above the physical width
-	 * of the count.
-	 */
-	delta = (new_raw_count << shift) - (prev_raw_count << shift);
-	delta >>= shift;
-
-	atomic64_add(delta, &counter->count);
-	atomic64_sub(delta, &hwc->period_left);
-
-	return new_raw_count;
-}
-
-static atomic_t active_counters;
-static DEFINE_MUTEX(pmc_reserve_mutex);
-
-static bool reserve_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		disable_lapic_nmi_watchdog();
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
-			goto perfctr_fail;
-	}
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
-			goto eventsel_fail;
-	}
-#endif
-
-	return true;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-eventsel_fail:
-	for (i--; i >= 0; i--)
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-
-	i = x86_pmu.num_counters;
-
-perfctr_fail:
-	for (i--; i >= 0; i--)
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-
-	return false;
-#endif
-}
-
-static void release_pmc_hardware(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	int i;
-
-	for (i = 0; i < x86_pmu.num_counters; i++) {
-		release_perfctr_nmi(x86_pmu.perfctr + i);
-		release_evntsel_nmi(x86_pmu.eventsel + i);
-	}
-
-	if (nmi_watchdog == NMI_LOCAL_APIC)
-		enable_lapic_nmi_watchdog();
-#endif
-}
-
-static inline bool bts_available(void)
-{
-	return x86_pmu.enable_bts != NULL;
-}
-
-static inline void init_debug_store_on_cpu(int cpu)
-{
-	struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-	if (!ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-		     (u32)((u64)(unsigned long)ds),
-		     (u32)((u64)(unsigned long)ds >> 32));
-}
-
-static inline void fini_debug_store_on_cpu(int cpu)
-{
-	if (!per_cpu(cpu_hw_counters, cpu).ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static void release_bts_hardware(void)
-{
-	int cpu;
-
-	if (!bts_available())
-		return;
-
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		fini_debug_store_on_cpu(cpu);
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
-
-		if (!ds)
-			continue;
-
-		per_cpu(cpu_hw_counters, cpu).ds = NULL;
-
-		kfree((void *)(unsigned long)ds->bts_buffer_base);
-		kfree(ds);
-	}
-
-	put_online_cpus();
-}
-
-static int reserve_bts_hardware(void)
-{
-	int cpu, err = 0;
-
-	if (!bts_available())
-		return 0;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds;
-		void *buffer;
-
-		err = -ENOMEM;
-		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-		if (unlikely(!buffer))
-			break;
-
-		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-		if (unlikely(!ds)) {
-			kfree(buffer);
-			break;
-		}
-
-		ds->bts_buffer_base = (u64)(unsigned long)buffer;
-		ds->bts_index = ds->bts_buffer_base;
-		ds->bts_absolute_maximum =
-			ds->bts_buffer_base + BTS_BUFFER_SIZE;
-		ds->bts_interrupt_threshold =
-			ds->bts_absolute_maximum - BTS_OVFL_TH;
-
-		per_cpu(cpu_hw_counters, cpu).ds = ds;
-		err = 0;
-	}
-
-	if (err)
-		release_bts_hardware();
-	else {
-		for_each_online_cpu(cpu)
-			init_debug_store_on_cpu(cpu);
-	}
-
-	put_online_cpus();
-
-	return err;
-}
-
-static void hw_perf_counter_destroy(struct perf_counter *counter)
-{
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
-		release_pmc_hardware();
-		release_bts_hardware();
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-}
-
-static inline int x86_pmu_initialized(void)
-{
-	return x86_pmu.handle_irq != NULL;
-}
-
-static inline int
-set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
-{
-	unsigned int cache_type, cache_op, cache_result;
-	u64 config, val;
-
-	config = attr->config;
-
-	cache_type = (config >>  0) & 0xff;
-	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
-		return -EINVAL;
-
-	cache_op = (config >>  8) & 0xff;
-	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
-		return -EINVAL;
-
-	cache_result = (config >> 16) & 0xff;
-	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
-		return -EINVAL;
-
-	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
-
-	if (val == 0)
-		return -ENOENT;
-
-	if (val == -1)
-		return -EINVAL;
-
-	hwc->config |= val;
-
-	return 0;
-}
-
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-/*
- * Setup the hardware configuration for a given attr_type
- */
-static int __hw_perf_counter_init(struct perf_counter *counter)
-{
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 config;
-	int err;
-
-	if (!x86_pmu_initialized())
-		return -ENODEV;
-
-	err = 0;
-	if (!atomic_inc_not_zero(&active_counters)) {
-		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_counters) == 0) {
-			if (!reserve_pmc_hardware())
-				err = -EBUSY;
-			else
-				err = reserve_bts_hardware();
-		}
-		if (!err)
-			atomic_inc(&active_counters);
-		mutex_unlock(&pmc_reserve_mutex);
-	}
-	if (err)
-		return err;
-
-	counter->destroy = hw_perf_counter_destroy;
-
-	/*
-	 * Generate PMC IRQs:
-	 * (keep 'enabled' bit clear for now)
-	 */
-	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
-
-	/*
-	 * Count user and OS events unless requested not to.
-	 */
-	if (!attr->exclude_user)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
-	if (!attr->exclude_kernel)
-		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
-
-	if (!hwc->sample_period) {
-		hwc->sample_period = x86_pmu.max_period;
-		hwc->last_period = hwc->sample_period;
-		atomic64_set(&hwc->period_left, hwc->sample_period);
-	} else {
-		/*
-		 * If we have a PMU initialized but no APIC
-		 * interrupts, we cannot sample hardware
-		 * counters (user-space has to fall back and
-		 * sample via a hrtimer based software counter):
-		 */
-		if (!x86_pmu.apic)
-			return -EOPNOTSUPP;
-	}
-
-	/*
-	 * Raw hw_event type provide the config in the hw_event structure
-	 */
-	if (attr->type == PERF_TYPE_RAW) {
-		hwc->config |= x86_pmu.raw_event(attr->config);
-		return 0;
-	}
-
-	if (attr->type == PERF_TYPE_HW_CACHE)
-		return set_ext_hw_attr(hwc, attr);
-
-	if (attr->config >= x86_pmu.max_events)
-		return -EINVAL;
-
-	/*
-	 * The generic map:
-	 */
-	config = x86_pmu.event_map(attr->config);
-
-	if (config == 0)
-		return -ENOENT;
-
-	if (config == -1LL)
-		return -EINVAL;
-
-	/*
-	 * Branch tracing:
-	 */
-	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
-	    (hwc->sample_period == 1)) {
-		/* BTS is not supported by this architecture. */
-		if (!bts_available())
-			return -EOPNOTSUPP;
-
-		/* BTS is currently only allowed for user-mode. */
-		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-			return -EOPNOTSUPP;
-	}
-
-	hwc->config |= config;
-
-	return 0;
-}
-
-static void p6_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
-		intel_pmu_disable_bts();
-}
-
-static void amd_pmu_disable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * counters proper, so that amd_pmu_enable_counter() does the
-	 * right thing.
-	 */
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
-		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
-			continue;
-		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_disable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	return x86_pmu.disable_all();
-}
-
-static void p6_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	unsigned long val;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	/* p6 only has one enable register */
-	rdmsrl(MSR_P6_EVNTSEL0, val);
-	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-	wrmsrl(MSR_P6_EVNTSEL0, val);
-}
-
-static void intel_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
-
-	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_counter *counter =
-			cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-
-		if (WARN_ON_ONCE(!counter))
-			return;
-
-		intel_pmu_enable_bts(counter->hw.config);
-	}
-}
-
-static void amd_pmu_enable_all(void)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	int idx;
-
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
-		u64 val;
-
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		val = counter->hw.config;
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
-	}
-}
-
-void hw_perf_enable(void)
-{
-	if (!x86_pmu_initialized())
-		return;
-	x86_pmu.enable_all();
-}
-
-static inline u64 intel_pmu_get_status(void)
-{
-	u64 status;
-
-	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-
-	return status;
-}
-
-static inline void intel_pmu_ack_status(u64 ack)
-{
-	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
-}
-
-static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx,
-			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
-}
-
-static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
-}
-
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, mask;
-
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static inline void
-p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val = P6_NOP_COUNTER;
-
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-static inline void
-intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		intel_pmu_disable_bts();
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_disable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static inline void
-amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	x86_pmu_disable_counter(hwc, idx);
-}
-
-static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
-
-/*
- * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the counter disabled in hw:
- */
-static int
-x86_perf_counter_set_period(struct perf_counter *counter,
-			     struct hw_perf_counter *hwc, int idx)
-{
-	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->sample_period;
-	int err, ret = 0;
-
-	if (idx == X86_PMC_IDX_FIXED_BTS)
-		return 0;
-
-	/*
-	 * If we are way outside a reasoable range then just skip forward:
-	 */
-	if (unlikely(left <= -period)) {
-		left = period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-
-	if (unlikely(left <= 0)) {
-		left += period;
-		atomic64_set(&hwc->period_left, left);
-		hwc->last_period = period;
-		ret = 1;
-	}
-	/*
-	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
-	 */
-	if (unlikely(left < 2))
-		left = 2;
-
-	if (left > x86_pmu.max_period)
-		left = x86_pmu.max_period;
-
-	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
-
-	/*
-	 * The hw counter starts counting from this counter offset,
-	 * mark it to be able to extra future deltas:
-	 */
-	atomic64_set(&hwc->prev_count, (u64)-left);
-
-	err = checking_wrmsrl(hwc->counter_base + idx,
-			     (u64)(-left) & x86_pmu.counter_mask);
-
-	perf_counter_update_userpage(counter);
-
-	return ret;
-}
-
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
-{
-	int idx = __idx - X86_PMC_IDX_FIXED;
-	u64 ctrl_val, bits, mask;
-	int err;
-
-	/*
-	 * Enable IRQ generation (0x8),
-	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
-	 * if requested:
-	 */
-	bits = 0x8ULL;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
-		bits |= 0x2;
-	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
-		bits |= 0x1;
-	bits <<= (idx * 4);
-	mask = 0xfULL << (idx * 4);
-
-	rdmsrl(hwc->config_base, ctrl_val);
-	ctrl_val &= ~mask;
-	ctrl_val |= bits;
-	err = checking_wrmsrl(hwc->config_base, ctrl_val);
-}
-
-static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val;
-
-	val = hwc->config;
-	if (cpuc->enabled)
-		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
-
-	(void)checking_wrmsrl(hwc->config_base + idx, val);
-}
-
-
-static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_counters).enabled)
-			return;
-
-		intel_pmu_enable_bts(hwc->config);
-		return;
-	}
-
-	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
-		intel_pmu_enable_fixed(hwc, idx);
-		return;
-	}
-
-	x86_pmu_enable_counter(hwc, idx);
-}
-
-static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	if (cpuc->enabled)
-		x86_pmu_enable_counter(hwc, idx);
-}
-
-static int
-fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
-{
-	unsigned int hw_event;
-
-	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
-
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (hwc->sample_period == 1)))
-		return X86_PMC_IDX_FIXED_BTS;
-
-	if (!x86_pmu.num_counters_fixed)
-		return -1;
-
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
-		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
-		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
-		return X86_PMC_IDX_FIXED_BUS_CYCLES;
-
-	return -1;
-}
-
-/*
- * Find a PMC slot for the freshly enabled / scheduled in counter:
- */
-static int x86_pmu_enable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx;
-
-	idx = fixed_mode_idx(counter, hwc);
-	if (idx == X86_PMC_IDX_FIXED_BTS) {
-		/* BTS is already occupied. */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			return -EAGAIN;
-
-		hwc->config_base	= 0;
-		hwc->counter_base	= 0;
-		hwc->idx		= idx;
-	} else if (idx >= 0) {
-		/*
-		 * Try to get the fixed counter, if that is already taken
-		 * then try to get a generic counter:
-		 */
-		if (test_and_set_bit(idx, cpuc->used_mask))
-			goto try_generic;
-
-		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		/*
-		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
-		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
-		 */
-		hwc->counter_base =
-			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
-		hwc->idx = idx;
-	} else {
-		idx = hwc->idx;
-		/* Try to get the previous generic counter again */
-		if (test_and_set_bit(idx, cpuc->used_mask)) {
-try_generic:
-			idx = find_first_zero_bit(cpuc->used_mask,
-						  x86_pmu.num_counters);
-			if (idx == x86_pmu.num_counters)
-				return -EAGAIN;
-
-			set_bit(idx, cpuc->used_mask);
-			hwc->idx = idx;
-		}
-		hwc->config_base  = x86_pmu.eventsel;
-		hwc->counter_base = x86_pmu.perfctr;
-	}
-
-	perf_counters_lapic_init();
-
-	x86_pmu.disable(hwc, idx);
-
-	cpuc->counters[idx] = counter;
-	set_bit(idx, cpuc->active_mask);
-
-	x86_perf_counter_set_period(counter, hwc, idx);
-	x86_pmu.enable(hwc, idx);
-
-	perf_counter_update_userpage(counter);
-
-	return 0;
-}
-
-static void x86_pmu_unthrottle(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
-				cpuc->counters[hwc->idx] != counter))
-		return;
-
-	x86_pmu.enable(hwc, hwc->idx);
-}
-
-void perf_counter_print_debug(void)
-{
-	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	struct cpu_hw_counters *cpuc;
-	unsigned long flags;
-	int cpu, idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
-
-	if (x86_pmu.version >= 2) {
-		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
-		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
-
-		pr_info("\n");
-		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
-		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
-		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
-		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
-	}
-	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
-		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
-
-		prev_left = per_cpu(pmc_prev_left[idx], cpu);
-
-		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
-			cpu, idx, pmc_ctrl);
-		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
-			cpu, idx, prev_left);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
-
-		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
-			cpu, idx, pmc_count);
-	}
-	local_irq_restore(flags);
-}
-
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc)
-{
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!counter)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-
-	data.period	= counter->hw.last_period;
-	data.addr	= 0;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, counter, &regs);
-
-	if (perf_output_begin(&handle, counter,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, counter);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	counter->hw.interrupts++;
-	counter->pending_kill = POLL_IN;
-}
-
-static void x86_pmu_disable(struct perf_counter *counter)
-{
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-
-	/*
-	 * Must be done before we disable, otherwise the nmi handler
-	 * could reenable again:
-	 */
-	clear_bit(idx, cpuc->active_mask);
-	x86_pmu.disable(hwc, idx);
-
-	/*
-	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the counter:
-	 */
-	barrier();
-
-	/*
-	 * Drain the remaining delta count out of a counter
-	 * that we are disabling:
-	 */
-	x86_perf_counter_update(counter, hwc, idx);
-
-	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
-		intel_pmu_drain_bts_buffer(cpuc);
-
-	cpuc->counters[idx] = NULL;
-	clear_bit(idx, cpuc->used_mask);
-
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Save and restart an expired counter. Called by NMI contexts,
- * so it has to be careful about preempting normal counter ops:
- */
-static int intel_pmu_save_and_restart(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int idx = hwc->idx;
-	int ret;
-
-	x86_perf_counter_update(counter, hwc, idx);
-	ret = x86_perf_counter_set_period(counter, hwc, idx);
-
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		intel_pmu_enable_counter(hwc, idx);
-
-	return ret;
-}
-
-static void intel_pmu_reset(void)
-{
-	struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
-	unsigned long flags;
-	int idx;
-
-	if (!x86_pmu.num_counters)
-		return;
-
-	local_irq_save(flags);
-
-	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
-		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
-	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
-		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
-	}
-	if (ds)
-		ds->bts_index = ds->bts_buffer_base;
-
-	local_irq_restore(flags);
-}
-
-static int p6_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			p6_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-/*
- * This handler is triggered by the local APIC, so the APIC IRQ handling
- * rules apply:
- */
-static int intel_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	int bit, loops;
-	u64 ack, status;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc);
-	status = intel_pmu_get_status();
-	if (!status) {
-		perf_enable();
-		return 0;
-	}
-
-	loops = 0;
-again:
-	if (++loops > 100) {
-		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
-		perf_counter_print_debug();
-		intel_pmu_reset();
-		perf_enable();
-		return 1;
-	}
-
-	inc_irq_stat(apic_perf_irqs);
-	ack = status;
-	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
-
-		clear_bit(bit, (unsigned long *) &status);
-		if (!test_bit(bit, cpuc->active_mask))
-			continue;
-
-		if (!intel_pmu_save_and_restart(counter))
-			continue;
-
-		data.period = counter->hw.last_period;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			intel_pmu_disable_counter(&counter->hw, bit);
-	}
-
-	intel_pmu_ack_status(ack);
-
-	/*
-	 * Repeat if there is more work to be done:
-	 */
-	status = intel_pmu_get_status();
-	if (status)
-		goto again;
-
-	perf_enable();
-
-	return 1;
-}
-
-static int amd_pmu_handle_irq(struct pt_regs *regs)
-{
-	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	int idx, handled = 0;
-	u64 val;
-
-	data.addr = 0;
-
-	cpuc = &__get_cpu_var(cpu_hw_counters);
-
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		if (!test_bit(idx, cpuc->active_mask))
-			continue;
-
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
-
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-			continue;
-
-		/*
-		 * counter overflow
-		 */
-		handled		= 1;
-		data.period	= counter->hw.last_period;
-
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
-			continue;
-
-		if (perf_counter_overflow(counter, 1, &data, regs))
-			amd_pmu_disable_counter(hwc, idx);
-	}
-
-	if (handled)
-		inc_irq_stat(apic_perf_irqs);
-
-	return handled;
-}
-
-void smp_perf_pending_interrupt(struct pt_regs *regs)
-{
-	irq_enter();
-	ack_APIC_irq();
-	inc_irq_stat(apic_pending_irqs);
-	perf_counter_do_pending();
-	irq_exit();
-}
-
-void set_perf_counter_pending(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
-#endif
-}
-
-void perf_counters_lapic_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-	if (!x86_pmu.apic || !x86_pmu_initialized())
-		return;
-
-	/*
-	 * Always use NMI for PMU
-	 */
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-}
-
-static int __kprobes
-perf_counter_nmi_handler(struct notifier_block *self,
-			 unsigned long cmd, void *__args)
-{
-	struct die_args *args = __args;
-	struct pt_regs *regs;
-
-	if (!atomic_read(&active_counters))
-		return NOTIFY_DONE;
-
-	switch (cmd) {
-	case DIE_NMI:
-	case DIE_NMI_IPI:
-		break;
-
-	default:
-		return NOTIFY_DONE;
-	}
-
-	regs = args->regs;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-	apic_write(APIC_LVTPC, APIC_DM_NMI);
-#endif
-	/*
-	 * Can't rely on the handled return value to say it was our NMI, two
-	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
-	 *
-	 * If the first NMI handles both, the latter will be empty and daze
-	 * the CPU.
-	 */
-	x86_pmu.handle_irq(regs);
-
-	return NOTIFY_STOP;
-}
-
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
-	.next			= NULL,
-	.priority		= 1
-};
-
-static struct x86_pmu p6_pmu = {
-	.name			= "p6",
-	.handle_irq		= p6_pmu_handle_irq,
-	.disable_all		= p6_pmu_disable_all,
-	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_counter,
-	.disable		= p6_pmu_disable_counter,
-	.eventsel		= MSR_P6_EVNTSEL0,
-	.perfctr		= MSR_P6_PERFCTR0,
-	.event_map		= p6_pmu_event_map,
-	.raw_event		= p6_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
-	.apic			= 1,
-	.max_period		= (1ULL << 31) - 1,
-	.version		= 0,
-	.num_counters		= 2,
-	/*
-	 * Counters have 40 bits implemented. However they are designed such
-	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a counter for P6-like PMU is 32 bits only.
-	 *
-	 * See IA-32 Intel Architecture Software developer manual Vol 3B
-	 */
-	.counter_bits		= 32,
-	.counter_mask		= (1ULL << 32) - 1,
-};
-
-static struct x86_pmu intel_pmu = {
-	.name			= "Intel",
-	.handle_irq		= intel_pmu_handle_irq,
-	.disable_all		= intel_pmu_disable_all,
-	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_counter,
-	.disable		= intel_pmu_disable_counter,
-	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
-	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
-	.event_map		= intel_pmu_event_map,
-	.raw_event		= intel_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
-	.apic			= 1,
-	/*
-	 * Intel PMCs cannot be accessed sanely above 32 bit width,
-	 * so we install an artificial 1<<31 period regardless of
-	 * the generic counter period:
-	 */
-	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
-};
-
-static struct x86_pmu amd_pmu = {
-	.name			= "AMD",
-	.handle_irq		= amd_pmu_handle_irq,
-	.disable_all		= amd_pmu_disable_all,
-	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_counter,
-	.disable		= amd_pmu_disable_counter,
-	.eventsel		= MSR_K7_EVNTSEL0,
-	.perfctr		= MSR_K7_PERFCTR0,
-	.event_map		= amd_pmu_event_map,
-	.raw_event		= amd_pmu_raw_event,
-	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
-	.counter_bits		= 48,
-	.counter_mask		= (1ULL << 48) - 1,
-	.apic			= 1,
-	/* use highest bit to detect overflow */
-	.max_period		= (1ULL << 47) - 1,
-};
-
-static int p6_pmu_init(void)
-{
-	switch (boot_cpu_data.x86_model) {
-	case 1:
-	case 3:  /* Pentium Pro */
-	case 5:
-	case 6:  /* Pentium II */
-	case 7:
-	case 8:
-	case 11: /* Pentium III */
-		break;
-	case 9:
-	case 13:
-		/* Pentium M */
-		break;
-	default:
-		pr_cont("unsupported p6 CPU model %d ",
-			boot_cpu_data.x86_model);
-		return -ENODEV;
-	}
-
-	x86_pmu = p6_pmu;
-
-	if (!cpu_has_apic) {
-		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
-		pr_info("no hardware sampling interrupt available.\n");
-		x86_pmu.apic = 0;
-	}
-
-	return 0;
-}
-
-static int intel_pmu_init(void)
-{
-	union cpuid10_edx edx;
-	union cpuid10_eax eax;
-	unsigned int unused;
-	unsigned int ebx;
-	int version;
-
-	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
-		/* check for P6 processor family */
-	   if (boot_cpu_data.x86 == 6) {
-		return p6_pmu_init();
-	   } else {
-		return -ENODEV;
-	   }
-	}
-
-	/*
-	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired hw_event or not.
-	 */
-	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
-	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
-		return -ENODEV;
-
-	version = eax.split.version_id;
-	if (version < 2)
-		return -ENODEV;
-
-	x86_pmu				= intel_pmu;
-	x86_pmu.version			= version;
-	x86_pmu.num_counters		= eax.split.num_counters;
-	x86_pmu.counter_bits		= eax.split.bit_width;
-	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
-
-	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
-	 * assume at least 3 counters:
-	 */
-	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
-
-	/*
-	 * Install the hw-cache-events table:
-	 */
-	switch (boot_cpu_data.x86_model) {
-	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
-	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
-	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
-	case 29: /* six-core 45 nm xeon "Dunnington" */
-		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Core2 events, ");
-		break;
-	default:
-	case 26:
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Nehalem/Corei7 events, ");
-		break;
-	case 28:
-		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-
-		pr_cont("Atom events, ");
-		break;
-	}
-	return 0;
-}
-
-static int amd_pmu_init(void)
-{
-	/* Performance-monitoring supported from K7 and later: */
-	if (boot_cpu_data.x86 < 6)
-		return -ENODEV;
-
-	x86_pmu = amd_pmu;
-
-	/* Events are common for all AMDs */
-	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
-	       sizeof(hw_cache_event_ids));
-
-	return 0;
-}
-
-void __init init_hw_perf_counters(void)
-{
-	int err;
-
-	pr_info("Performance Counters: ");
-
-	switch (boot_cpu_data.x86_vendor) {
-	case X86_VENDOR_INTEL:
-		err = intel_pmu_init();
-		break;
-	case X86_VENDOR_AMD:
-		err = amd_pmu_init();
-		break;
-	default:
-		return;
-	}
-	if (err != 0) {
-		pr_cont("no PMU driver, software counters only.\n");
-		return;
-	}
-
-	pr_cont("%s PMU driver.\n", x86_pmu.name);
-
-	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
-	}
-	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
-	perf_max_counters = x86_pmu.num_counters;
-
-	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
-	}
-
-	perf_counter_mask |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
-	x86_pmu.intel_ctrl = perf_counter_mask;
-
-	perf_counters_lapic_init();
-	register_die_notifier(&perf_counter_nmi_notifier);
-
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
-	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
-	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
-}
-
-static inline void x86_pmu_read(struct perf_counter *counter)
-{
-	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
-}
-
-static const struct pmu pmu = {
-	.enable		= x86_pmu_enable,
-	.disable	= x86_pmu_disable,
-	.read		= x86_pmu_read,
-	.unthrottle	= x86_pmu_unthrottle,
-};
-
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	int err;
-
-	err = __hw_perf_counter_init(counter);
-	if (err) {
-		if (counter->destroy)
-			counter->destroy(counter);
-		return ERR_PTR(err);
-	}
-
-	return &pmu;
-}
-
-/*
- * callchain support
- */
-
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	if (entry->nr < PERF_MAX_STACK_DEPTH)
-		entry->ip[entry->nr++] = ip;
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-static DEFINE_PER_CPU(int, in_nmi_frame);
-
-
-static void
-backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
-{
-	/* Ignore warnings */
-}
-
-static void backtrace_warning(void *data, char *msg)
-{
-	/* Ignore warnings */
-}
-
-static int backtrace_stack(void *data, char *name)
-{
-	per_cpu(in_nmi_frame, smp_processor_id()) =
-			x86_is_stack_id(NMI_STACK, name);
-
-	return 0;
-}
-
-static void backtrace_address(void *data, unsigned long addr, int reliable)
-{
-	struct perf_callchain_entry *entry = data;
-
-	if (per_cpu(in_nmi_frame, smp_processor_id()))
-		return;
-
-	if (reliable)
-		callchain_store(entry, addr);
-}
-
-static const struct stacktrace_ops backtrace_ops = {
-	.warning		= backtrace_warning,
-	.warning_symbol		= backtrace_warning_symbol,
-	.stack			= backtrace_stack,
-	.address		= backtrace_address,
-};
-
-#include "../dumpstack.h"
-
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->ip);
-
-	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
-}
-
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long offset, addr = (unsigned long)from;
-	int type = in_nmi() ? KM_NMI : KM_IRQ0;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page, type);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map, type);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
-}
-
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
-{
-	unsigned long bytes;
-
-	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
-
-	return bytes == sizeof(*frame);
-}
-
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	struct stack_frame frame;
-	const void __user *fp;
-
-	if (!user_mode(regs))
-		regs = task_pt_regs(current);
-
-	fp = (void __user *)regs->bp;
-
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, regs->ip);
-
-	while (entry->nr < PERF_MAX_STACK_DEPTH) {
-		frame.next_frame	     = NULL;
-		frame.return_address = 0;
-
-		if (!copy_stack_frame(fp, &frame))
-			break;
-
-		if ((unsigned long)fp < regs->sp)
-			break;
-
-		callchain_store(entry, frame.return_address);
-		fp = frame.next_frame;
-	}
-}
-
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-	int is_user;
-
-	if (!regs)
-		return;
-
-	is_user = user_mode(regs);
-
-	if (!current || current->pid == 0)
-		return;
-
-	if (is_user && current->state != TASK_RUNNING)
-		return;
-
-	if (!is_user)
-		perf_callchain_kernel(regs, entry);
-
-	if (current->mm)
-		perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	struct perf_callchain_entry *entry;
-
-	if (in_nmi())
-		entry = &__get_cpu_var(pmc_nmi_entry);
-	else
-		entry = &__get_cpu_var(pmc_irq_entry);
-
-	entry->nr = 0;
-
-	perf_do_callchain(regs, entry);
-
-	return entry;
-}
-
-void hw_perf_counter_setup_online(int cpu)
-{
-	init_debug_store_on_cpu(cpu);
-}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
new file mode 100644
index 000000000000..0d03629fb1a5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -0,0 +1,2298 @@
+/*
+ * Performance events x86 architecture code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2009 Jaswinder Singh Rajput
+ *  Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+
+#include <linux/perf_event.h>
+#include <linux/capability.h>
+#include <linux/notifier.h>
+#include <linux/hardirq.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/highmem.h>
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+
+static u64 perf_event_mask __read_mostly;
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS	4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE		24
+
+/* The size of a per-cpu BTS buffer in bytes: */
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
+
+/* The BTS overflow threshold in bytes from the end of the buffer: */
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
+
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR			(1 << 6)
+#define X86_DEBUGCTL_BTS		(1 << 7)
+#define X86_DEBUGCTL_BTINT		(1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+struct cpu_hw_events {
+	struct perf_event	*events[X86_PMC_IDX_MAX];
+	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
+	unsigned long		interrupts;
+	int			enabled;
+	struct debug_store	*ds;
+};
+
+/*
+ * struct x86_pmu - generic x86 pmu
+ */
+struct x86_pmu {
+	const char	*name;
+	int		version;
+	int		(*handle_irq)(struct pt_regs *);
+	void		(*disable_all)(void);
+	void		(*enable_all)(void);
+	void		(*enable)(struct hw_perf_event *, int);
+	void		(*disable)(struct hw_perf_event *, int);
+	unsigned	eventsel;
+	unsigned	perfctr;
+	u64		(*event_map)(int);
+	u64		(*raw_event)(u64);
+	int		max_events;
+	int		num_events;
+	int		num_events_fixed;
+	int		event_bits;
+	u64		event_mask;
+	int		apic;
+	u64		max_period;
+	u64		intel_ctrl;
+	void		(*enable_bts)(u64 config);
+	void		(*disable_bts)(void);
+};
+
+static struct x86_pmu x86_pmu __read_mostly;
+
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
+	.enabled = 1,
+};
+
+/*
+ * Not sure about some of these
+ */
+static const u64 p6_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0079,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x012e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
+};
+
+static u64 p6_pmu_event_map(int hw_event)
+{
+	return p6_perfmon_event_map[hw_event];
+}
+
+/*
+ * Event setting that is specified not to count anything.
+ * We use this to effectively disable a counter.
+ *
+ * L2_RQSTS with 0 MESI unit mask.
+ */
+#define P6_NOP_EVENT			0x0000002EULL
+
+static u64 p6_pmu_raw_event(u64 hw_event)
+{
+#define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define P6_EVNTSEL_INV_MASK		0x00800000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
+
+#define P6_EVNTSEL_MASK			\
+	(P6_EVNTSEL_EVENT_MASK |	\
+	 P6_EVNTSEL_UNIT_MASK  |	\
+	 P6_EVNTSEL_EDGE_MASK  |	\
+	 P6_EVNTSEL_INV_MASK   |	\
+	 P6_EVNTSEL_REG_MASK)
+
+	return hw_event & P6_EVNTSEL_MASK;
+}
+
+
+/*
+ * Intel PerfMon v3. Used on Core2 and later.
+ */
+static const u64 intel_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+};
+
+static u64 intel_pmu_event_map(int hw_event)
+{
+	return intel_perfmon_event_map[hw_event];
+}
+
+/*
+ * Generalized hw caching related hw_event table, filled
+ * in on a per model basis. A value of 0 means
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
+ * ID.
+ */
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+static u64 __read_mostly hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX];
+
+static const u64 nehalem_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI            */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE         */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 core2_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
+		[ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static const u64 atom_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
+		[ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
+		[ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
+		[ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
+		[ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+static u64 intel_pmu_raw_event(u64 hw_event)
+{
+#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
+#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK		0x00800000ULL
+#define CORE_EVNTSEL_REG_MASK	0xFF000000ULL
+
+#define CORE_EVNTSEL_MASK		\
+	(CORE_EVNTSEL_EVENT_MASK |	\
+	 CORE_EVNTSEL_UNIT_MASK  |	\
+	 CORE_EVNTSEL_EDGE_MASK  |	\
+	 CORE_EVNTSEL_INV_MASK  |	\
+	 CORE_EVNTSEL_REG_MASK)
+
+	return hw_event & CORE_EVNTSEL_MASK;
+}
+
+static const u64 amd_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0041, /* Data Cache Misses          */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts  */
+		[ C(RESULT_MISS)   ] = 0x0167, /* Data Prefetcher :cancelled */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches  */
+		[ C(RESULT_MISS)   ] = 0x0081, /* Instruction cache misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
+		[ C(RESULT_MISS)   ] = 0x037E, /* L2 Cache Misses : IC+DC     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback           */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses        */
+		[ C(RESULT_MISS)   ] = 0x0046, /* L1 DTLB and L2 DLTB Miss   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes        */
+		[ C(RESULT_MISS)   ] = 0x0085, /* Instr. fetch ITLB misses   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr.      */
+		[ C(RESULT_MISS)   ] = 0x00c3, /* Retired Mispredicted BI    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+/*
+ * AMD Performance Monitor K7 and later.
+ */
+static const u64 amd_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x0076,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0080,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0081,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+};
+
+static u64 amd_pmu_event_map(int hw_event)
+{
+	return amd_perfmon_event_map[hw_event];
+}
+
+static u64 amd_pmu_raw_event(u64 hw_event)
+{
+#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
+#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
+#define K7_EVNTSEL_INV_MASK	0x000800000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
+
+#define K7_EVNTSEL_MASK			\
+	(K7_EVNTSEL_EVENT_MASK |	\
+	 K7_EVNTSEL_UNIT_MASK  |	\
+	 K7_EVNTSEL_EDGE_MASK  |	\
+	 K7_EVNTSEL_INV_MASK   |	\
+	 K7_EVNTSEL_REG_MASK)
+
+	return hw_event & K7_EVNTSEL_MASK;
+}
+
+/*
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
+ * Returns the delta events processed.
+ */
+static u64
+x86_perf_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc, int idx)
+{
+	int shift = 64 - x86_pmu.event_bits;
+	u64 prev_raw_count, new_raw_count;
+	s64 delta;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * Careful: an NMI might modify the previous event value.
+	 *
+	 * Our tactic to handle this is to first atomically read and
+	 * exchange a new raw count - then add that new-prev delta
+	 * count to the generic event atomically:
+	 */
+again:
+	prev_raw_count = atomic64_read(&hwc->prev_count);
+	rdmsrl(hwc->event_base + idx, new_raw_count);
+
+	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+					new_raw_count) != prev_raw_count)
+		goto again;
+
+	/*
+	 * Now we have the new raw value and have updated the prev
+	 * timestamp already. We can now calculate the elapsed delta
+	 * (event-)time and add that to the generic event.
+	 *
+	 * Careful, not all hw sign-extends above the physical width
+	 * of the count.
+	 */
+	delta = (new_raw_count << shift) - (prev_raw_count << shift);
+	delta >>= shift;
+
+	atomic64_add(delta, &event->count);
+	atomic64_sub(delta, &hwc->period_left);
+
+	return new_raw_count;
+}
+
+static atomic_t active_events;
+static DEFINE_MUTEX(pmc_reserve_mutex);
+
+static bool reserve_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		disable_lapic_nmi_watchdog();
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
+			goto perfctr_fail;
+	}
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
+			goto eventsel_fail;
+	}
+#endif
+
+	return true;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+eventsel_fail:
+	for (i--; i >= 0; i--)
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+
+	i = x86_pmu.num_events;
+
+perfctr_fail:
+	for (i--; i >= 0; i--)
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+
+	return false;
+#endif
+}
+
+static void release_pmc_hardware(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	int i;
+
+	for (i = 0; i < x86_pmu.num_events; i++) {
+		release_perfctr_nmi(x86_pmu.perfctr + i);
+		release_evntsel_nmi(x86_pmu.eventsel + i);
+	}
+
+	if (nmi_watchdog == NMI_LOCAL_APIC)
+		enable_lapic_nmi_watchdog();
+#endif
+}
+
+static inline bool bts_available(void)
+{
+	return x86_pmu.enable_bts != NULL;
+}
+
+static inline void init_debug_store_on_cpu(int cpu)
+{
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+	if (!ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+		     (u32)((u64)(unsigned long)ds),
+		     (u32)((u64)(unsigned long)ds >> 32));
+}
+
+static inline void fini_debug_store_on_cpu(int cpu)
+{
+	if (!per_cpu(cpu_hw_events, cpu).ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_bts_hardware(void)
+{
+	int cpu;
+
+	if (!bts_available())
+		return;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		fini_debug_store_on_cpu(cpu);
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+		if (!ds)
+			continue;
+
+		per_cpu(cpu_hw_events, cpu).ds = NULL;
+
+		kfree((void *)(unsigned long)ds->bts_buffer_base);
+		kfree(ds);
+	}
+
+	put_online_cpus();
+}
+
+static int reserve_bts_hardware(void)
+{
+	int cpu, err = 0;
+
+	if (!bts_available())
+		return 0;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds;
+		void *buffer;
+
+		err = -ENOMEM;
+		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+		if (unlikely(!buffer))
+			break;
+
+		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+		if (unlikely(!ds)) {
+			kfree(buffer);
+			break;
+		}
+
+		ds->bts_buffer_base = (u64)(unsigned long)buffer;
+		ds->bts_index = ds->bts_buffer_base;
+		ds->bts_absolute_maximum =
+			ds->bts_buffer_base + BTS_BUFFER_SIZE;
+		ds->bts_interrupt_threshold =
+			ds->bts_absolute_maximum - BTS_OVFL_TH;
+
+		per_cpu(cpu_hw_events, cpu).ds = ds;
+		err = 0;
+	}
+
+	if (err)
+		release_bts_hardware();
+	else {
+		for_each_online_cpu(cpu)
+			init_debug_store_on_cpu(cpu);
+	}
+
+	put_online_cpus();
+
+	return err;
+}
+
+static void hw_perf_event_destroy(struct perf_event *event)
+{
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
+		release_pmc_hardware();
+		release_bts_hardware();
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+}
+
+static inline int x86_pmu_initialized(void)
+{
+	return x86_pmu.handle_irq != NULL;
+}
+
+static inline int
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+{
+	unsigned int cache_type, cache_op, cache_result;
+	u64 config, val;
+
+	config = attr->config;
+
+	cache_type = (config >>  0) & 0xff;
+	if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
+		return -EINVAL;
+
+	cache_op = (config >>  8) & 0xff;
+	if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
+		return -EINVAL;
+
+	cache_result = (config >> 16) & 0xff;
+	if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+		return -EINVAL;
+
+	val = hw_cache_event_ids[cache_type][cache_op][cache_result];
+
+	if (val == 0)
+		return -ENOENT;
+
+	if (val == -1)
+		return -EINVAL;
+
+	hwc->config |= val;
+
+	return 0;
+}
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+/*
+ * Setup the hardware configuration for a given attr_type
+ */
+static int __hw_perf_event_init(struct perf_event *event)
+{
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
+	u64 config;
+	int err;
+
+	if (!x86_pmu_initialized())
+		return -ENODEV;
+
+	err = 0;
+	if (!atomic_inc_not_zero(&active_events)) {
+		mutex_lock(&pmc_reserve_mutex);
+		if (atomic_read(&active_events) == 0) {
+			if (!reserve_pmc_hardware())
+				err = -EBUSY;
+			else
+				err = reserve_bts_hardware();
+		}
+		if (!err)
+			atomic_inc(&active_events);
+		mutex_unlock(&pmc_reserve_mutex);
+	}
+	if (err)
+		return err;
+
+	event->destroy = hw_perf_event_destroy;
+
+	/*
+	 * Generate PMC IRQs:
+	 * (keep 'enabled' bit clear for now)
+	 */
+	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
+
+	/*
+	 * Count user and OS events unless requested not to.
+	 */
+	if (!attr->exclude_user)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
+	if (!attr->exclude_kernel)
+		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
+
+	if (!hwc->sample_period) {
+		hwc->sample_period = x86_pmu.max_period;
+		hwc->last_period = hwc->sample_period;
+		atomic64_set(&hwc->period_left, hwc->sample_period);
+	} else {
+		/*
+		 * If we have a PMU initialized but no APIC
+		 * interrupts, we cannot sample hardware
+		 * events (user-space has to fall back and
+		 * sample via a hrtimer based software event):
+		 */
+		if (!x86_pmu.apic)
+			return -EOPNOTSUPP;
+	}
+
+	/*
+	 * Raw hw_event type provide the config in the hw_event structure
+	 */
+	if (attr->type == PERF_TYPE_RAW) {
+		hwc->config |= x86_pmu.raw_event(attr->config);
+		return 0;
+	}
+
+	if (attr->type == PERF_TYPE_HW_CACHE)
+		return set_ext_hw_attr(hwc, attr);
+
+	if (attr->config >= x86_pmu.max_events)
+		return -EINVAL;
+
+	/*
+	 * The generic map:
+	 */
+	config = x86_pmu.event_map(attr->config);
+
+	if (config == 0)
+		return -ENOENT;
+
+	if (config == -1LL)
+		return -EINVAL;
+
+	/*
+	 * Branch tracing:
+	 */
+	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+	    (hwc->sample_period == 1)) {
+		/* BTS is not supported by this architecture. */
+		if (!bts_available())
+			return -EOPNOTSUPP;
+
+		/* BTS is currently only allowed for user-mode. */
+		if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+			return -EOPNOTSUPP;
+	}
+
+	hwc->config |= config;
+
+	return 0;
+}
+
+static void p6_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+		intel_pmu_disable_bts();
+}
+
+static void amd_pmu_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->enabled = 0;
+	/*
+	 * ensure we write the disable before we start disabling the
+	 * events proper, so that amd_pmu_enable_event() does the
+	 * right thing.
+	 */
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
+		if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
+			continue;
+		val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_disable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	return x86_pmu.disable_all();
+}
+
+static void p6_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long val;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	/* p6 only has one enable register */
+	rdmsrl(MSR_P6_EVNTSEL0, val);
+	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+	wrmsrl(MSR_P6_EVNTSEL0, val);
+}
+
+static void intel_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
+	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
+
+		if (WARN_ON_ONCE(!event))
+			return;
+
+		intel_pmu_enable_bts(event->hw.config);
+	}
+}
+
+static void amd_pmu_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	int idx;
+
+	if (cpuc->enabled)
+		return;
+
+	cpuc->enabled = 1;
+	barrier();
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
+		u64 val;
+
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		val = event->hw.config;
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
+	}
+}
+
+void hw_perf_enable(void)
+{
+	if (!x86_pmu_initialized())
+		return;
+	x86_pmu.enable_all();
+}
+
+static inline u64 intel_pmu_get_status(void)
+{
+	u64 status;
+
+	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+
+	return status;
+}
+
+static inline void intel_pmu_ack_status(u64 ack)
+{
+	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
+}
+
+static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx,
+			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
+}
+
+static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
+}
+
+static inline void
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, mask;
+
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static inline void
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = P6_NOP_EVENT;
+
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+static inline void
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		intel_pmu_disable_bts();
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_disable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static inline void
+amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
+{
+	x86_pmu_disable_event(hwc, idx);
+}
+
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
+
+/*
+ * Set the next IRQ period, based on the hwc->period_left value.
+ * To be called with the event disabled in hw:
+ */
+static int
+x86_perf_event_set_period(struct perf_event *event,
+			     struct hw_perf_event *hwc, int idx)
+{
+	s64 left = atomic64_read(&hwc->period_left);
+	s64 period = hwc->sample_period;
+	int err, ret = 0;
+
+	if (idx == X86_PMC_IDX_FIXED_BTS)
+		return 0;
+
+	/*
+	 * If we are way outside a reasoable range then just skip forward:
+	 */
+	if (unlikely(left <= -period)) {
+		left = period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+
+	if (unlikely(left <= 0)) {
+		left += period;
+		atomic64_set(&hwc->period_left, left);
+		hwc->last_period = period;
+		ret = 1;
+	}
+	/*
+	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
+	 */
+	if (unlikely(left < 2))
+		left = 2;
+
+	if (left > x86_pmu.max_period)
+		left = x86_pmu.max_period;
+
+	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
+
+	/*
+	 * The hw event starts counting from this event offset,
+	 * mark it to be able to extra future deltas:
+	 */
+	atomic64_set(&hwc->prev_count, (u64)-left);
+
+	err = checking_wrmsrl(hwc->event_base + idx,
+			     (u64)(-left) & x86_pmu.event_mask);
+
+	perf_event_update_userpage(event);
+
+	return ret;
+}
+
+static inline void
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
+{
+	int idx = __idx - X86_PMC_IDX_FIXED;
+	u64 ctrl_val, bits, mask;
+	int err;
+
+	/*
+	 * Enable IRQ generation (0x8),
+	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
+	 * if requested:
+	 */
+	bits = 0x8ULL;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
+		bits |= 0x2;
+	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
+		bits |= 0x1;
+	bits <<= (idx * 4);
+	mask = 0xfULL << (idx * 4);
+
+	rdmsrl(hwc->config_base, ctrl_val);
+	ctrl_val &= ~mask;
+	ctrl_val |= bits;
+	err = checking_wrmsrl(hwc->config_base, ctrl_val);
+}
+
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
+
+	(void)checking_wrmsrl(hwc->config_base + idx, val);
+}
+
+
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
+		if (!__get_cpu_var(cpu_hw_events).enabled)
+			return;
+
+		intel_pmu_enable_bts(hwc->config);
+		return;
+	}
+
+	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
+		intel_pmu_enable_fixed(hwc, idx);
+		return;
+	}
+
+	x86_pmu_enable_event(hwc, idx);
+}
+
+static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->enabled)
+		x86_pmu_enable_event(hwc, idx);
+}
+
+static int
+fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
+{
+	unsigned int hw_event;
+
+	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+
+	if (unlikely((hw_event ==
+		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
+		     (hwc->sample_period == 1)))
+		return X86_PMC_IDX_FIXED_BTS;
+
+	if (!x86_pmu.num_events_fixed)
+		return -1;
+
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+		return X86_PMC_IDX_FIXED_CPU_CYCLES;
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+		return X86_PMC_IDX_FIXED_BUS_CYCLES;
+
+	return -1;
+}
+
+/*
+ * Find a PMC slot for the freshly enabled / scheduled in event:
+ */
+static int x86_pmu_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx;
+
+	idx = fixed_mode_idx(event, hwc);
+	if (idx == X86_PMC_IDX_FIXED_BTS) {
+		/* BTS is already occupied. */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			return -EAGAIN;
+
+		hwc->config_base	= 0;
+		hwc->event_base	= 0;
+		hwc->idx		= idx;
+	} else if (idx >= 0) {
+		/*
+		 * Try to get the fixed event, if that is already taken
+		 * then try to get a generic event:
+		 */
+		if (test_and_set_bit(idx, cpuc->used_mask))
+			goto try_generic;
+
+		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+		/*
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
+		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
+		 */
+		hwc->event_base =
+			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
+		hwc->idx = idx;
+	} else {
+		idx = hwc->idx;
+		/* Try to get the previous generic event again */
+		if (test_and_set_bit(idx, cpuc->used_mask)) {
+try_generic:
+			idx = find_first_zero_bit(cpuc->used_mask,
+						  x86_pmu.num_events);
+			if (idx == x86_pmu.num_events)
+				return -EAGAIN;
+
+			set_bit(idx, cpuc->used_mask);
+			hwc->idx = idx;
+		}
+		hwc->config_base  = x86_pmu.eventsel;
+		hwc->event_base = x86_pmu.perfctr;
+	}
+
+	perf_events_lapic_init();
+
+	x86_pmu.disable(hwc, idx);
+
+	cpuc->events[idx] = event;
+	set_bit(idx, cpuc->active_mask);
+
+	x86_perf_event_set_period(event, hwc, idx);
+	x86_pmu.enable(hwc, idx);
+
+	perf_event_update_userpage(event);
+
+	return 0;
+}
+
+static void x86_pmu_unthrottle(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+				cpuc->events[hwc->idx] != event))
+		return;
+
+	x86_pmu.enable(hwc, hwc->idx);
+}
+
+void perf_event_print_debug(void)
+{
+	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+	struct cpu_hw_events *cpuc;
+	unsigned long flags;
+	int cpu, idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	cpu = smp_processor_id();
+	cpuc = &per_cpu(cpu_hw_events, cpu);
+
+	if (x86_pmu.version >= 2) {
+		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
+		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+
+		pr_info("\n");
+		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
+		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
+		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
+		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+	}
+	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
+		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
+
+		prev_left = per_cpu(pmc_prev_left[idx], cpu);
+
+		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
+			cpu, idx, pmc_ctrl);
+		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
+			cpu, idx, prev_left);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
+
+		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
+			cpu, idx, pmc_count);
+	}
+	local_irq_restore(flags);
+}
+
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
+{
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+
+	data.period	= event->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+static void x86_pmu_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+
+	/*
+	 * Must be done before we disable, otherwise the nmi handler
+	 * could reenable again:
+	 */
+	clear_bit(idx, cpuc->active_mask);
+	x86_pmu.disable(hwc, idx);
+
+	/*
+	 * Make sure the cleared pointer becomes visible before we
+	 * (potentially) free the event:
+	 */
+	barrier();
+
+	/*
+	 * Drain the remaining delta count out of a event
+	 * that we are disabling:
+	 */
+	x86_perf_event_update(event, hwc, idx);
+
+	/* Drain the remaining BTS records. */
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
+
+	cpuc->events[idx] = NULL;
+	clear_bit(idx, cpuc->used_mask);
+
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
+ */
+static int intel_pmu_save_and_restart(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int idx = hwc->idx;
+	int ret;
+
+	x86_perf_event_update(event, hwc, idx);
+	ret = x86_perf_event_set_period(event, hwc, idx);
+
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		intel_pmu_enable_event(hwc, idx);
+
+	return ret;
+}
+
+static void intel_pmu_reset(void)
+{
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+	unsigned long flags;
+	int idx;
+
+	if (!x86_pmu.num_events)
+		return;
+
+	local_irq_save(flags);
+
+	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
+		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
+	}
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
+		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
+	}
+	if (ds)
+		ds->bts_index = ds->bts_buffer_base;
+
+	local_irq_restore(flags);
+}
+
+static int p6_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			p6_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+/*
+ * This handler is triggered by the local APIC, so the APIC IRQ handling
+ * rules apply:
+ */
+static int intel_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	int bit, loops;
+	u64 ack, status;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	perf_disable();
+	intel_pmu_drain_bts_buffer(cpuc);
+	status = intel_pmu_get_status();
+	if (!status) {
+		perf_enable();
+		return 0;
+	}
+
+	loops = 0;
+again:
+	if (++loops > 100) {
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
+		intel_pmu_reset();
+		perf_enable();
+		return 1;
+	}
+
+	inc_irq_stat(apic_perf_irqs);
+	ack = status;
+	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+		struct perf_event *event = cpuc->events[bit];
+
+		clear_bit(bit, (unsigned long *) &status);
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		data.period = event->hw.last_period;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			intel_pmu_disable_event(&event->hw, bit);
+	}
+
+	intel_pmu_ack_status(ack);
+
+	/*
+	 * Repeat if there is more work to be done:
+	 */
+	status = intel_pmu_get_status();
+	if (status)
+		goto again;
+
+	perf_enable();
+
+	return 1;
+}
+
+static int amd_pmu_handle_irq(struct pt_regs *regs)
+{
+	struct perf_sample_data data;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	int idx, handled = 0;
+	u64 val;
+
+	data.addr = 0;
+
+	cpuc = &__get_cpu_var(cpu_hw_events);
+
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		if (!test_bit(idx, cpuc->active_mask))
+			continue;
+
+		event = cpuc->events[idx];
+		hwc = &event->hw;
+
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
+			continue;
+
+		/*
+		 * event overflow
+		 */
+		handled		= 1;
+		data.period	= event->hw.last_period;
+
+		if (!x86_perf_event_set_period(event, hwc, idx))
+			continue;
+
+		if (perf_event_overflow(event, 1, &data, regs))
+			amd_pmu_disable_event(hwc, idx);
+	}
+
+	if (handled)
+		inc_irq_stat(apic_perf_irqs);
+
+	return handled;
+}
+
+void smp_perf_pending_interrupt(struct pt_regs *regs)
+{
+	irq_enter();
+	ack_APIC_irq();
+	inc_irq_stat(apic_pending_irqs);
+	perf_event_do_pending();
+	irq_exit();
+}
+
+void set_perf_event_pending(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
+#endif
+}
+
+void perf_events_lapic_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (!x86_pmu.apic || !x86_pmu_initialized())
+		return;
+
+	/*
+	 * Always use NMI for PMU
+	 */
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+}
+
+static int __kprobes
+perf_event_nmi_handler(struct notifier_block *self,
+			 unsigned long cmd, void *__args)
+{
+	struct die_args *args = __args;
+	struct pt_regs *regs;
+
+	if (!atomic_read(&active_events))
+		return NOTIFY_DONE;
+
+	switch (cmd) {
+	case DIE_NMI:
+	case DIE_NMI_IPI:
+		break;
+
+	default:
+		return NOTIFY_DONE;
+	}
+
+	regs = args->regs;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	apic_write(APIC_LVTPC, APIC_DM_NMI);
+#endif
+	/*
+	 * Can't rely on the handled return value to say it was our NMI, two
+	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
+	 *
+	 * If the first NMI handles both, the latter will be empty and daze
+	 * the CPU.
+	 */
+	x86_pmu.handle_irq(regs);
+
+	return NOTIFY_STOP;
+}
+
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
+	.next			= NULL,
+	.priority		= 1
+};
+
+static struct x86_pmu p6_pmu = {
+	.name			= "p6",
+	.handle_irq		= p6_pmu_handle_irq,
+	.disable_all		= p6_pmu_disable_all,
+	.enable_all		= p6_pmu_enable_all,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
+	.eventsel		= MSR_P6_EVNTSEL0,
+	.perfctr		= MSR_P6_PERFCTR0,
+	.event_map		= p6_pmu_event_map,
+	.raw_event		= p6_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(p6_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_events		= 2,
+	/*
+	 * Events have 40 bits implemented. However they are designed such
+	 * that bits [32-39] are sign extensions of bit 31. As such the
+	 * effective width of a event for P6-like PMU is 32 bits only.
+	 *
+	 * See IA-32 Intel Architecture Software developer manual Vol 3B
+	 */
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
+};
+
+static struct x86_pmu intel_pmu = {
+	.name			= "Intel",
+	.handle_irq		= intel_pmu_handle_irq,
+	.disable_all		= intel_pmu_disable_all,
+	.enable_all		= intel_pmu_enable_all,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
+	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
+	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
+	.event_map		= intel_pmu_event_map,
+	.raw_event		= intel_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+	.apic			= 1,
+	/*
+	 * Intel PMCs cannot be accessed sanely above 32 bit width,
+	 * so we install an artificial 1<<31 period regardless of
+	 * the generic event period:
+	 */
+	.max_period		= (1ULL << 31) - 1,
+	.enable_bts		= intel_pmu_enable_bts,
+	.disable_bts		= intel_pmu_disable_bts,
+};
+
+static struct x86_pmu amd_pmu = {
+	.name			= "AMD",
+	.handle_irq		= amd_pmu_handle_irq,
+	.disable_all		= amd_pmu_disable_all,
+	.enable_all		= amd_pmu_enable_all,
+	.enable			= amd_pmu_enable_event,
+	.disable		= amd_pmu_disable_event,
+	.eventsel		= MSR_K7_EVNTSEL0,
+	.perfctr		= MSR_K7_PERFCTR0,
+	.event_map		= amd_pmu_event_map,
+	.raw_event		= amd_pmu_raw_event,
+	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
+	.apic			= 1,
+	/* use highest bit to detect overflow */
+	.max_period		= (1ULL << 47) - 1,
+};
+
+static int p6_pmu_init(void)
+{
+	switch (boot_cpu_data.x86_model) {
+	case 1:
+	case 3:  /* Pentium Pro */
+	case 5:
+	case 6:  /* Pentium II */
+	case 7:
+	case 8:
+	case 11: /* Pentium III */
+		break;
+	case 9:
+	case 13:
+		/* Pentium M */
+		break;
+	default:
+		pr_cont("unsupported p6 CPU model %d ",
+			boot_cpu_data.x86_model);
+		return -ENODEV;
+	}
+
+	x86_pmu = p6_pmu;
+
+	if (!cpu_has_apic) {
+		pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
+		pr_info("no hardware sampling interrupt available.\n");
+		x86_pmu.apic = 0;
+	}
+
+	return 0;
+}
+
+static int intel_pmu_init(void)
+{
+	union cpuid10_edx edx;
+	union cpuid10_eax eax;
+	unsigned int unused;
+	unsigned int ebx;
+	int version;
+
+	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
+		/* check for P6 processor family */
+	   if (boot_cpu_data.x86 == 6) {
+		return p6_pmu_init();
+	   } else {
+		return -ENODEV;
+	   }
+	}
+
+	/*
+	 * Check whether the Architectural PerfMon supports
+	 * Branch Misses Retired hw_event or not.
+	 */
+	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
+	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
+		return -ENODEV;
+
+	version = eax.split.version_id;
+	if (version < 2)
+		return -ENODEV;
+
+	x86_pmu				= intel_pmu;
+	x86_pmu.version			= version;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
+
+	/*
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
+	 */
+	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
+
+	/*
+	 * Install the hw-cache-events table:
+	 */
+	switch (boot_cpu_data.x86_model) {
+	case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+	case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+	case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+	case 29: /* six-core 45 nm xeon "Dunnington" */
+		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Core2 events, ");
+		break;
+	default:
+	case 26:
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Nehalem/Corei7 events, ");
+		break;
+	case 28:
+		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		pr_cont("Atom events, ");
+		break;
+	}
+	return 0;
+}
+
+static int amd_pmu_init(void)
+{
+	/* Performance-monitoring supported from K7 and later: */
+	if (boot_cpu_data.x86 < 6)
+		return -ENODEV;
+
+	x86_pmu = amd_pmu;
+
+	/* Events are common for all AMDs */
+	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
+	       sizeof(hw_cache_event_ids));
+
+	return 0;
+}
+
+void __init init_hw_perf_events(void)
+{
+	int err;
+
+	pr_info("Performance Events: ");
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		err = intel_pmu_init();
+		break;
+	case X86_VENDOR_AMD:
+		err = amd_pmu_init();
+		break;
+	default:
+		return;
+	}
+	if (err != 0) {
+		pr_cont("no PMU driver, software events only.\n");
+		return;
+	}
+
+	pr_cont("%s PMU driver.\n", x86_pmu.name);
+
+	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
+	}
+	perf_event_mask = (1 << x86_pmu.num_events) - 1;
+	perf_max_events = x86_pmu.num_events;
+
+	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
+	}
+
+	perf_event_mask |=
+		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl = perf_event_mask;
+
+	perf_events_lapic_init();
+	register_die_notifier(&perf_event_nmi_notifier);
+
+	pr_info("... version:                 %d\n",     x86_pmu.version);
+	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
+	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+}
+
+static inline void x86_pmu_read(struct perf_event *event)
+{
+	x86_perf_event_update(event, &event->hw, event->hw.idx);
+}
+
+static const struct pmu pmu = {
+	.enable		= x86_pmu_enable,
+	.disable	= x86_pmu_disable,
+	.read		= x86_pmu_read,
+	.unthrottle	= x86_pmu_unthrottle,
+};
+
+const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	int err;
+
+	err = __hw_perf_event_init(event);
+	if (err) {
+		if (event->destroy)
+			event->destroy(event);
+		return ERR_PTR(err);
+	}
+
+	return &pmu;
+}
+
+/*
+ * callchain support
+ */
+
+static inline
+void callchain_store(struct perf_callchain_entry *entry, u64 ip)
+{
+	if (entry->nr < PERF_MAX_STACK_DEPTH)
+		entry->ip[entry->nr++] = ip;
+}
+
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
+static DEFINE_PER_CPU(int, in_nmi_frame);
+
+
+static void
+backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
+{
+	/* Ignore warnings */
+}
+
+static void backtrace_warning(void *data, char *msg)
+{
+	/* Ignore warnings */
+}
+
+static int backtrace_stack(void *data, char *name)
+{
+	per_cpu(in_nmi_frame, smp_processor_id()) =
+			x86_is_stack_id(NMI_STACK, name);
+
+	return 0;
+}
+
+static void backtrace_address(void *data, unsigned long addr, int reliable)
+{
+	struct perf_callchain_entry *entry = data;
+
+	if (per_cpu(in_nmi_frame, smp_processor_id()))
+		return;
+
+	if (reliable)
+		callchain_store(entry, addr);
+}
+
+static const struct stacktrace_ops backtrace_ops = {
+	.warning		= backtrace_warning,
+	.warning_symbol		= backtrace_warning_symbol,
+	.stack			= backtrace_stack,
+	.address		= backtrace_address,
+};
+
+#include "../dumpstack.h"
+
+static void
+perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	callchain_store(entry, PERF_CONTEXT_KERNEL);
+	callchain_store(entry, regs->ip);
+
+	dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
+}
+
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long offset, addr = (unsigned long)from;
+	int type = in_nmi() ? KM_NMI : KM_IRQ0;
+	unsigned long size, len = 0;
+	struct page *page;
+	void *map;
+	int ret;
+
+	do {
+		ret = __get_user_pages_fast(addr, 1, 0, &page);
+		if (!ret)
+			break;
+
+		offset = addr & (PAGE_SIZE - 1);
+		size = min(PAGE_SIZE - offset, n - len);
+
+		map = kmap_atomic(page, type);
+		memcpy(to, map+offset, size);
+		kunmap_atomic(map, type);
+		put_page(page);
+
+		len  += size;
+		to   += size;
+		addr += size;
+
+	} while (len < n);
+
+	return len;
+}
+
+static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+{
+	unsigned long bytes;
+
+	bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
+
+	return bytes == sizeof(*frame);
+}
+
+static void
+perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	struct stack_frame frame;
+	const void __user *fp;
+
+	if (!user_mode(regs))
+		regs = task_pt_regs(current);
+
+	fp = (void __user *)regs->bp;
+
+	callchain_store(entry, PERF_CONTEXT_USER);
+	callchain_store(entry, regs->ip);
+
+	while (entry->nr < PERF_MAX_STACK_DEPTH) {
+		frame.next_frame	     = NULL;
+		frame.return_address = 0;
+
+		if (!copy_stack_frame(fp, &frame))
+			break;
+
+		if ((unsigned long)fp < regs->sp)
+			break;
+
+		callchain_store(entry, frame.return_address);
+		fp = frame.next_frame;
+	}
+}
+
+static void
+perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
+{
+	int is_user;
+
+	if (!regs)
+		return;
+
+	is_user = user_mode(regs);
+
+	if (!current || current->pid == 0)
+		return;
+
+	if (is_user && current->state != TASK_RUNNING)
+		return;
+
+	if (!is_user)
+		perf_callchain_kernel(regs, entry);
+
+	if (current->mm)
+		perf_callchain_user(regs, entry);
+}
+
+struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	struct perf_callchain_entry *entry;
+
+	if (in_nmi())
+		entry = &__get_cpu_var(pmc_nmi_entry);
+	else
+		entry = &__get_cpu_var(pmc_irq_entry);
+
+	entry->nr = 0;
+
+	perf_do_callchain(regs, entry);
+
+	return entry;
+}
+
+void hw_perf_event_setup_online(int cpu)
+{
+	init_debug_store_on_cpu(cpu);
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea43b890..fab786f60ed6 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d59fe323807e..681c3fda7391 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 300883112e3d..40f30773fb29 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -208,7 +208,7 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_COUNTERS
+# ifdef CONFIG_PERF_EVENTS
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321ddafda..0157cd26d7cc 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020990a5..82728f2c6d55 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1114,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215999de..8eb05878554c 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_counters = 2;
+		eax.split.num_events = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_counters;
+	num_counters = eax.split.num_events;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b83776180c7f..7b8e75d16081 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
 #define OP_X86_MODEL_H
 
 #include <asm/types.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct op_msr {
 	unsigned long	addr;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 50eecfe1d724..44203ff599da 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -26,7 +26,7 @@
 #include <linux/proc_fs.h>
 #include <linux/nmi.h>
 #include <linux/quotaops.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/suspend.h>
@@ -252,7 +252,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty)
 	struct pt_regs *regs = get_irq_regs();
 	if (regs)
 		show_regs(regs);
-	perf_counter_print_debug();
+	perf_event_print_debug();
 }
 static struct sysrq_key_op sysrq_showregs_op = {
 	.handler	= sysrq_handle_showregs,
diff --git a/fs/exec.c b/fs/exec.c
index 172ceb6edde4..434dba778ccc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -33,7 +33,7 @@
 #include <linux/string.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 #include <linux/key.h>
@@ -923,7 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
 	task_lock(tsk);
 	strlcpy(tsk->comm, buf, sizeof(tsk->comm));
 	task_unlock(tsk);
-	perf_counter_comm(tsk);
+	perf_event_comm(tsk);
 }
 
 int flush_old_exec(struct linux_binprm * bprm)
@@ -997,7 +997,7 @@ int flush_old_exec(struct linux_binprm * bprm)
 	 * security domain:
 	 */
 	if (!get_dumpable(current->mm))
-		perf_counter_exit_task(current);
+		perf_event_exit_task(current);
 
 	/* An exec changes our domain. We are no longer part of the thread
 	   group */
diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h
index 1125e5a1ee5d..d76b66acea95 100644
--- a/include/asm-generic/unistd.h
+++ b/include/asm-generic/unistd.h
@@ -620,8 +620,8 @@ __SYSCALL(__NR_move_pages, sys_move_pages)
 
 #define __NR_rt_tgsigqueueinfo 240
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open 241
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open 241
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #undef __NR_syscalls
 #define __NR_syscalls 242
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 9e7f2e8fc66e..21a6f5d9af22 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -106,13 +106,13 @@ extern struct group_info init_groups;
 
 extern struct cred init_cred;
 
-#ifdef CONFIG_PERF_COUNTERS
-# define INIT_PERF_COUNTERS(tsk)					\
-	.perf_counter_mutex = 						\
-		 __MUTEX_INITIALIZER(tsk.perf_counter_mutex),		\
-	.perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list),
+#ifdef CONFIG_PERF_EVENTS
+# define INIT_PERF_EVENTS(tsk)					\
+	.perf_event_mutex = 						\
+		 __MUTEX_INITIALIZER(tsk.perf_event_mutex),		\
+	.perf_event_list = LIST_HEAD_INIT(tsk.perf_event_list),
 #else
-# define INIT_PERF_COUNTERS(tsk)
+# define INIT_PERF_EVENTS(tsk)
 #endif
 
 /*
@@ -178,7 +178,7 @@ extern struct cred init_cred;
 	},								\
 	.dirties = INIT_PROP_LOCAL_SINGLE(dirties),			\
 	INIT_IDS							\
-	INIT_PERF_COUNTERS(tsk)						\
+	INIT_PERF_EVENTS(tsk)						\
 	INIT_TRACE_IRQFLAGS						\
 	INIT_LOCKDEP							\
 	INIT_FTRACE_GRAPH						\
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
deleted file mode 100644
index f64862732673..000000000000
--- a/include/linux/perf_counter.h
+++ /dev/null
@@ -1,858 +0,0 @@
-/*
- *  Performance counters:
- *
- *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
- *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
- *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
- *
- *  Data type definitions, declarations, prototypes.
- *
- *    Started by: Thomas Gleixner and Ingo Molnar
- *
- *  For licencing details see kernel-base/COPYING
- */
-#ifndef _LINUX_PERF_COUNTER_H
-#define _LINUX_PERF_COUNTER_H
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <asm/byteorder.h>
-
-/*
- * User-space ABI bits:
- */
-
-/*
- * attr.type
- */
-enum perf_type_id {
-	PERF_TYPE_HARDWARE			= 0,
-	PERF_TYPE_SOFTWARE			= 1,
-	PERF_TYPE_TRACEPOINT			= 2,
-	PERF_TYPE_HW_CACHE			= 3,
-	PERF_TYPE_RAW				= 4,
-
-	PERF_TYPE_MAX,				/* non-ABI */
-};
-
-/*
- * Generalized performance counter event types, used by the
- * attr.event_id parameter of the sys_perf_counter_open()
- * syscall:
- */
-enum perf_hw_id {
-	/*
-	 * Common hardware events, generalized by the kernel:
-	 */
-	PERF_COUNT_HW_CPU_CYCLES		= 0,
-	PERF_COUNT_HW_INSTRUCTIONS		= 1,
-	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
-	PERF_COUNT_HW_CACHE_MISSES		= 3,
-	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
-	PERF_COUNT_HW_BRANCH_MISSES		= 5,
-	PERF_COUNT_HW_BUS_CYCLES		= 6,
-
-	PERF_COUNT_HW_MAX,			/* non-ABI */
-};
-
-/*
- * Generalized hardware cache counters:
- *
- *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
- *       { read, write, prefetch } x
- *       { accesses, misses }
- */
-enum perf_hw_cache_id {
-	PERF_COUNT_HW_CACHE_L1D			= 0,
-	PERF_COUNT_HW_CACHE_L1I			= 1,
-	PERF_COUNT_HW_CACHE_LL			= 2,
-	PERF_COUNT_HW_CACHE_DTLB		= 3,
-	PERF_COUNT_HW_CACHE_ITLB		= 4,
-	PERF_COUNT_HW_CACHE_BPU			= 5,
-
-	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_id {
-	PERF_COUNT_HW_CACHE_OP_READ		= 0,
-	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
-	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
-
-	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
-};
-
-enum perf_hw_cache_op_result_id {
-	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
-	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
-
-	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
-};
-
-/*
- * Special "software" counters provided by the kernel, even if the hardware
- * does not support performance counters. These counters measure various
- * physical and sw events of the kernel (and allow the profiling of them as
- * well):
- */
-enum perf_sw_ids {
-	PERF_COUNT_SW_CPU_CLOCK			= 0,
-	PERF_COUNT_SW_TASK_CLOCK		= 1,
-	PERF_COUNT_SW_PAGE_FAULTS		= 2,
-	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
-	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
-	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
-	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
-
-	PERF_COUNT_SW_MAX,			/* non-ABI */
-};
-
-/*
- * Bits that can be set in attr.sample_type to request information
- * in the overflow packets.
- */
-enum perf_counter_sample_format {
-	PERF_SAMPLE_IP				= 1U << 0,
-	PERF_SAMPLE_TID				= 1U << 1,
-	PERF_SAMPLE_TIME			= 1U << 2,
-	PERF_SAMPLE_ADDR			= 1U << 3,
-	PERF_SAMPLE_READ			= 1U << 4,
-	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
-	PERF_SAMPLE_ID				= 1U << 6,
-	PERF_SAMPLE_CPU				= 1U << 7,
-	PERF_SAMPLE_PERIOD			= 1U << 8,
-	PERF_SAMPLE_STREAM_ID			= 1U << 9,
-	PERF_SAMPLE_RAW				= 1U << 10,
-
-	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
-};
-
-/*
- * The format of the data returned by read() on a perf counter fd,
- * as specified by attr.read_format:
- *
- * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
- *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
- * };
- */
-enum perf_counter_read_format {
-	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
-	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
-	PERF_FORMAT_ID				= 1U << 2,
-	PERF_FORMAT_GROUP			= 1U << 3,
-
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
-};
-
-#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
-
-/*
- * Hardware event to monitor via a performance monitoring counter:
- */
-struct perf_counter_attr {
-
-	/*
-	 * Major type: hardware/software/tracepoint/etc.
-	 */
-	__u32			type;
-
-	/*
-	 * Size of the attr structure, for fwd/bwd compat.
-	 */
-	__u32			size;
-
-	/*
-	 * Type specific configuration information.
-	 */
-	__u64			config;
-
-	union {
-		__u64		sample_period;
-		__u64		sample_freq;
-	};
-
-	__u64			sample_type;
-	__u64			read_format;
-
-	__u64			disabled       :  1, /* off by default        */
-				inherit	       :  1, /* children inherit it   */
-				pinned	       :  1, /* must always be on PMU */
-				exclusive      :  1, /* only group on PMU     */
-				exclude_user   :  1, /* don't count user      */
-				exclude_kernel :  1, /* ditto kernel          */
-				exclude_hv     :  1, /* ditto hypervisor      */
-				exclude_idle   :  1, /* don't count when idle */
-				mmap           :  1, /* include mmap data     */
-				comm	       :  1, /* include comm data     */
-				freq           :  1, /* use freq, not period  */
-				inherit_stat   :  1, /* per task counts       */
-				enable_on_exec :  1, /* next exec enables     */
-				task           :  1, /* trace fork/exit       */
-				watermark      :  1, /* wakeup_watermark      */
-
-				__reserved_1   : 49;
-
-	union {
-		__u32		wakeup_events;	  /* wakeup every n events */
-		__u32		wakeup_watermark; /* bytes before wakeup   */
-	};
-	__u32			__reserved_2;
-
-	__u64			__reserved_3;
-};
-
-/*
- * Ioctls that can be done on a perf counter fd:
- */
-#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
-#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
-#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
-#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
-#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
-#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
-
-enum perf_counter_ioc_flags {
-	PERF_IOC_FLAG_GROUP		= 1U << 0,
-};
-
-/*
- * Structure of the page that can be mapped via mmap
- */
-struct perf_counter_mmap_page {
-	__u32	version;		/* version number of this structure */
-	__u32	compat_version;		/* lowest version this is compat with */
-
-	/*
-	 * Bits needed to read the hw counters in user-space.
-	 *
-	 *   u32 seq;
-	 *   s64 count;
-	 *
-	 *   do {
-	 *     seq = pc->lock;
-	 *
-	 *     barrier()
-	 *     if (pc->index) {
-	 *       count = pmc_read(pc->index - 1);
-	 *       count += pc->offset;
-	 *     } else
-	 *       goto regular_read;
-	 *
-	 *     barrier();
-	 *   } while (pc->lock != seq);
-	 *
-	 * NOTE: for obvious reason this only works on self-monitoring
-	 *       processes.
-	 */
-	__u32	lock;			/* seqlock for synchronization */
-	__u32	index;			/* hardware counter identifier */
-	__s64	offset;			/* add to hardware counter value */
-	__u64	time_enabled;		/* time counter active */
-	__u64	time_running;		/* time counter on cpu */
-
-		/*
-		 * Hole for extension of the self monitor capabilities
-		 */
-
-	__u64	__reserved[123];	/* align to 1k */
-
-	/*
-	 * Control data for the mmap() data buffer.
-	 *
-	 * User-space reading the @data_head value should issue an rmb(), on
-	 * SMP capable platforms, after reading this value -- see
-	 * perf_counter_wakeup().
-	 *
-	 * When the mapping is PROT_WRITE the @data_tail value should be
-	 * written by userspace to reflect the last read data. In this case
-	 * the kernel will not over-write unread data.
-	 */
-	__u64   data_head;		/* head in the data section */
-	__u64	data_tail;		/* user-space written tail */
-};
-
-#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
-#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
-#define PERF_EVENT_MISC_KERNEL			(1 << 0)
-#define PERF_EVENT_MISC_USER			(2 << 0)
-#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
-
-struct perf_event_header {
-	__u32	type;
-	__u16	misc;
-	__u16	size;
-};
-
-enum perf_event_type {
-
-	/*
-	 * The MMAP events record the PROT_EXEC mappings so that we can
-	 * correlate userspace IPs to code. They have the following structure:
-	 *
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	u64				addr;
-	 *	u64				len;
-	 *	u64				pgoff;
-	 *	char				filename[];
-	 * };
-	 */
-	PERF_EVENT_MMAP			= 1,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
-	 * };
-	 */
-	PERF_EVENT_LOST			= 2,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	u32				pid, tid;
-	 *	char				comm[];
-	 * };
-	 */
-	PERF_EVENT_COMM			= 3,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	u64				time;
-	 * };
-	 */
-	PERF_EVENT_EXIT			= 4,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u64				time;
-	 *	u64				id;
-	 *	u64				stream_id;
-	 * };
-	 */
-	PERF_EVENT_THROTTLE		= 5,
-	PERF_EVENT_UNTHROTTLE		= 6,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *	u32				pid, ppid;
-	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
-	 * };
-	 */
-	PERF_EVENT_FORK			= 7,
-
-	/*
-	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u32				pid, tid;
-	 *
-	 * 	struct read_format		values;
-	 * };
-	 */
-	PERF_EVENT_READ			= 8,
-
-	/*
-	 * struct {
-	 *	struct perf_event_header	header;
-	 *
-	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
-	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
-	 *	{ u64			time;     } && PERF_SAMPLE_TIME
-	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
-	 *	{ u64			id;	  } && PERF_SAMPLE_ID
-	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
-	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
-	 *
-	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
-	 *
-	 *	{ u64			nr,
-	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
-	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
-	 *
-	 *	{ u32			size;
-	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
-	 * };
-	 */
-	PERF_EVENT_SAMPLE		= 9,
-
-	PERF_EVENT_MAX,			/* non-ABI */
-};
-
-enum perf_callchain_context {
-	PERF_CONTEXT_HV			= (__u64)-32,
-	PERF_CONTEXT_KERNEL		= (__u64)-128,
-	PERF_CONTEXT_USER		= (__u64)-512,
-
-	PERF_CONTEXT_GUEST		= (__u64)-2048,
-	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
-	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
-
-	PERF_CONTEXT_MAX		= (__u64)-4095,
-};
-
-#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
-#define PERF_FLAG_FD_OUTPUT	(1U << 1)
-
-#ifdef __KERNEL__
-/*
- * Kernel-internal data types and definitions:
- */
-
-#ifdef CONFIG_PERF_COUNTERS
-# include <asm/perf_counter.h>
-#endif
-
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/rculist.h>
-#include <linux/rcupdate.h>
-#include <linux/spinlock.h>
-#include <linux/hrtimer.h>
-#include <linux/fs.h>
-#include <linux/pid_namespace.h>
-#include <asm/atomic.h>
-
-#define PERF_MAX_STACK_DEPTH		255
-
-struct perf_callchain_entry {
-	__u64				nr;
-	__u64				ip[PERF_MAX_STACK_DEPTH];
-};
-
-struct perf_raw_record {
-	u32				size;
-	void				*data;
-};
-
-struct task_struct;
-
-/**
- * struct hw_perf_counter - performance counter hardware details:
- */
-struct hw_perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	union {
-		struct { /* hardware */
-			u64		config;
-			unsigned long	config_base;
-			unsigned long	counter_base;
-			int		idx;
-		};
-		union { /* software */
-			atomic64_t	count;
-			struct hrtimer	hrtimer;
-		};
-	};
-	atomic64_t			prev_count;
-	u64				sample_period;
-	u64				last_period;
-	atomic64_t			period_left;
-	u64				interrupts;
-
-	u64				freq_count;
-	u64				freq_interrupts;
-	u64				freq_stamp;
-#endif
-};
-
-struct perf_counter;
-
-/**
- * struct pmu - generic performance monitoring unit
- */
-struct pmu {
-	int (*enable)			(struct perf_counter *counter);
-	void (*disable)			(struct perf_counter *counter);
-	void (*read)			(struct perf_counter *counter);
-	void (*unthrottle)		(struct perf_counter *counter);
-};
-
-/**
- * enum perf_counter_active_state - the states of a counter
- */
-enum perf_counter_active_state {
-	PERF_COUNTER_STATE_ERROR	= -2,
-	PERF_COUNTER_STATE_OFF		= -1,
-	PERF_COUNTER_STATE_INACTIVE	=  0,
-	PERF_COUNTER_STATE_ACTIVE	=  1,
-};
-
-struct file;
-
-struct perf_mmap_data {
-	struct rcu_head			rcu_head;
-	int				nr_pages;	/* nr of data pages  */
-	int				writable;	/* are we writable   */
-	int				nr_locked;	/* nr pages mlocked  */
-
-	atomic_t			poll;		/* POLL_ for wakeups */
-	atomic_t			events;		/* event limit       */
-
-	atomic_long_t			head;		/* write position    */
-	atomic_long_t			done_head;	/* completed head    */
-
-	atomic_t			lock;		/* concurrent writes */
-	atomic_t			wakeup;		/* needs a wakeup    */
-	atomic_t			lost;		/* nr records lost   */
-
-	long				watermark;	/* wakeup watermark  */
-
-	struct perf_counter_mmap_page   *user_page;
-	void				*data_pages[0];
-};
-
-struct perf_pending_entry {
-	struct perf_pending_entry *next;
-	void (*func)(struct perf_pending_entry *);
-};
-
-/**
- * struct perf_counter - performance counter kernel representation:
- */
-struct perf_counter {
-#ifdef CONFIG_PERF_COUNTERS
-	struct list_head		group_entry;
-	struct list_head		event_entry;
-	struct list_head		sibling_list;
-	int				nr_siblings;
-	struct perf_counter		*group_leader;
-	struct perf_counter		*output;
-	const struct pmu		*pmu;
-
-	enum perf_counter_active_state	state;
-	atomic64_t			count;
-
-	/*
-	 * These are the total time in nanoseconds that the counter
-	 * has been enabled (i.e. eligible to run, and the task has
-	 * been scheduled in, if this is a per-task counter)
-	 * and running (scheduled onto the CPU), respectively.
-	 *
-	 * They are computed from tstamp_enabled, tstamp_running and
-	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
-	 */
-	u64				total_time_enabled;
-	u64				total_time_running;
-
-	/*
-	 * These are timestamps used for computing total_time_enabled
-	 * and total_time_running when the counter is in INACTIVE or
-	 * ACTIVE state, measured in nanoseconds from an arbitrary point
-	 * in time.
-	 * tstamp_enabled: the notional time when the counter was enabled
-	 * tstamp_running: the notional time when the counter was scheduled on
-	 * tstamp_stopped: in INACTIVE state, the notional time when the
-	 *	counter was scheduled off.
-	 */
-	u64				tstamp_enabled;
-	u64				tstamp_running;
-	u64				tstamp_stopped;
-
-	struct perf_counter_attr	attr;
-	struct hw_perf_counter		hw;
-
-	struct perf_counter_context	*ctx;
-	struct file			*filp;
-
-	/*
-	 * These accumulate total time (in nanoseconds) that children
-	 * counters have been enabled and running, respectively.
-	 */
-	atomic64_t			child_total_time_enabled;
-	atomic64_t			child_total_time_running;
-
-	/*
-	 * Protect attach/detach and child_list:
-	 */
-	struct mutex			child_mutex;
-	struct list_head		child_list;
-	struct perf_counter		*parent;
-
-	int				oncpu;
-	int				cpu;
-
-	struct list_head		owner_entry;
-	struct task_struct		*owner;
-
-	/* mmap bits */
-	struct mutex			mmap_mutex;
-	atomic_t			mmap_count;
-	struct perf_mmap_data		*data;
-
-	/* poll related */
-	wait_queue_head_t		waitq;
-	struct fasync_struct		*fasync;
-
-	/* delayed work for NMIs and such */
-	int				pending_wakeup;
-	int				pending_kill;
-	int				pending_disable;
-	struct perf_pending_entry	pending;
-
-	atomic_t			event_limit;
-
-	void (*destroy)(struct perf_counter *);
-	struct rcu_head			rcu_head;
-
-	struct pid_namespace		*ns;
-	u64				id;
-#endif
-};
-
-/**
- * struct perf_counter_context - counter context structure
- *
- * Used as a container for task counters and CPU counters as well:
- */
-struct perf_counter_context {
-	/*
-	 * Protect the states of the counters in the list,
-	 * nr_active, and the list:
-	 */
-	spinlock_t			lock;
-	/*
-	 * Protect the list of counters.  Locking either mutex or lock
-	 * is sufficient to ensure the list doesn't change; to change
-	 * the list you need to lock both the mutex and the spinlock.
-	 */
-	struct mutex			mutex;
-
-	struct list_head		group_list;
-	struct list_head		event_list;
-	int				nr_counters;
-	int				nr_active;
-	int				is_active;
-	int				nr_stat;
-	atomic_t			refcount;
-	struct task_struct		*task;
-
-	/*
-	 * Context clock, runs when context enabled.
-	 */
-	u64				time;
-	u64				timestamp;
-
-	/*
-	 * These fields let us detect when two contexts have both
-	 * been cloned (inherited) from a common ancestor.
-	 */
-	struct perf_counter_context	*parent_ctx;
-	u64				parent_gen;
-	u64				generation;
-	int				pin_count;
-	struct rcu_head			rcu_head;
-};
-
-/**
- * struct perf_counter_cpu_context - per cpu counter context structure
- */
-struct perf_cpu_context {
-	struct perf_counter_context	ctx;
-	struct perf_counter_context	*task_ctx;
-	int				active_oncpu;
-	int				max_pertask;
-	int				exclusive;
-
-	/*
-	 * Recursion avoidance:
-	 *
-	 * task, softirq, irq, nmi context
-	 */
-	int				recursion[4];
-};
-
-struct perf_output_handle {
-	struct perf_counter	*counter;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
-};
-
-#ifdef CONFIG_PERF_COUNTERS
-
-/*
- * Set by architecture code:
- */
-extern int perf_max_counters;
-
-extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter);
-
-extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
-extern void perf_counter_task_sched_out(struct task_struct *task,
-					struct task_struct *next, int cpu);
-extern void perf_counter_task_tick(struct task_struct *task, int cpu);
-extern int perf_counter_init_task(struct task_struct *child);
-extern void perf_counter_exit_task(struct task_struct *child);
-extern void perf_counter_free_task(struct task_struct *task);
-extern void set_perf_counter_pending(void);
-extern void perf_counter_do_pending(void);
-extern void perf_counter_print_debug(void);
-extern void __perf_disable(void);
-extern bool __perf_enable(void);
-extern void perf_disable(void);
-extern void perf_enable(void);
-extern int perf_counter_task_disable(void);
-extern int perf_counter_task_enable(void);
-extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu);
-extern void perf_counter_update_userpage(struct perf_counter *counter);
-
-struct perf_sample_data {
-	u64				type;
-
-	u64				ip;
-	struct {
-		u32	pid;
-		u32	tid;
-	}				tid_entry;
-	u64				time;
-	u64				addr;
-	u64				id;
-	u64				stream_id;
-	struct {
-		u32	cpu;
-		u32	reserved;
-	}				cpu_entry;
-	u64				period;
-	struct perf_callchain_entry	*callchain;
-	struct perf_raw_record		*raw;
-};
-
-extern void perf_output_sample(struct perf_output_handle *handle,
-			       struct perf_event_header *header,
-			       struct perf_sample_data *data,
-			       struct perf_counter *counter);
-extern void perf_prepare_sample(struct perf_event_header *header,
-				struct perf_sample_data *data,
-				struct perf_counter *counter,
-				struct pt_regs *regs);
-
-extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
-				 struct perf_sample_data *data,
-				 struct pt_regs *regs);
-
-/*
- * Return 1 for a software counter, 0 for a hardware counter
- */
-static inline int is_software_counter(struct perf_counter *counter)
-{
-	return (counter->attr.type != PERF_TYPE_RAW) &&
-		(counter->attr.type != PERF_TYPE_HARDWARE) &&
-		(counter->attr.type != PERF_TYPE_HW_CACHE);
-}
-
-extern atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-extern void __perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
-{
-	if (atomic_read(&perf_swcounter_enabled[event]))
-		__perf_swcounter_event(event, nr, nmi, regs, addr);
-}
-
-extern void __perf_counter_mmap(struct vm_area_struct *vma);
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)
-{
-	if (vma->vm_flags & VM_EXEC)
-		__perf_counter_mmap(vma);
-}
-
-extern void perf_counter_comm(struct task_struct *tsk);
-extern void perf_counter_fork(struct task_struct *tsk);
-
-extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
-
-extern int sysctl_perf_counter_paranoid;
-extern int sysctl_perf_counter_mlock;
-extern int sysctl_perf_counter_sample_rate;
-
-extern void perf_counter_init(void);
-extern void perf_tpcounter_event(int event_id, u64 addr, u64 count,
-				 void *record, int entry_size);
-
-#ifndef perf_misc_flags
-#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_EVENT_MISC_USER : \
-				 PERF_EVENT_MISC_KERNEL)
-#define perf_instruction_pointer(regs)	instruction_pointer(regs)
-#endif
-
-extern int perf_output_begin(struct perf_output_handle *handle,
-			     struct perf_counter *counter, unsigned int size,
-			     int nmi, int sample);
-extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
-			     const void *buf, unsigned int len);
-#else
-static inline void
-perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
-static inline void
-perf_counter_task_sched_out(struct task_struct *task,
-			    struct task_struct *next, int cpu)		{ }
-static inline void
-perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
-static inline int perf_counter_init_task(struct task_struct *child)	{ return 0; }
-static inline void perf_counter_exit_task(struct task_struct *child)	{ }
-static inline void perf_counter_free_task(struct task_struct *task)	{ }
-static inline void perf_counter_do_pending(void)			{ }
-static inline void perf_counter_print_debug(void)			{ }
-static inline void perf_disable(void)					{ }
-static inline void perf_enable(void)					{ }
-static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
-static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
-
-static inline void
-perf_swcounter_event(u32 event, u64 nr, int nmi,
-		     struct pt_regs *regs, u64 addr)			{ }
-
-static inline void perf_counter_mmap(struct vm_area_struct *vma)	{ }
-static inline void perf_counter_comm(struct task_struct *tsk)		{ }
-static inline void perf_counter_fork(struct task_struct *tsk)		{ }
-static inline void perf_counter_init(void)				{ }
-
-#endif
-
-#define perf_output_put(handle, x) \
-	perf_output_copy((handle), &(x), sizeof(x))
-
-#endif /* __KERNEL__ */
-#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
new file mode 100644
index 000000000000..ae9d9ed6df2a
--- /dev/null
+++ b/include/linux/perf_event.h
@@ -0,0 +1,858 @@
+/*
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_EVENT_H
+#define _LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * 	{ u64		value;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		id;           } && PERF_FORMAT_ID
+ * 	} && !PERF_FORMAT_GROUP
+ *
+ * 	{ u64		nr;
+ * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ * 	  { u64		value;
+ * 	    { u64	id;           } && PERF_FORMAT_ID
+ * 	  }		cntr[nr];
+ * 	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ */
+struct perf_event_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_RESET		_IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_event_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw events in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware event identifier */
+	__s64	offset;			/* add to hardware event value */
+	__u64	time_enabled;		/* time event active */
+	__u64	time_running;		/* time event on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_event_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_RECORD_MISC_KERNEL			(1 << 0)
+#define PERF_RECORD_MISC_USER			(2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_RECORD_MMAP			= 1,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u64				id;
+	 * 	u64				lost;
+	 * };
+	 */
+	PERF_RECORD_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_RECORD_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_RECORD_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_RECORD_THROTTLE		= 5,
+	PERF_RECORD_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_RECORD_FORK			= 7,
+
+	/*
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 * 	u32				pid, tid;
+	 *
+	 * 	struct read_format		values;
+	 * };
+	 */
+	PERF_RECORD_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 * 	#
+	 * 	# The RAW record below is opaque data wrt the ABI
+	 * 	#
+	 * 	# That is, the ABI doesn't make any promises wrt to
+	 * 	# the stability of its content, it may vary depending
+	 * 	# on event_id, hardware, kernel version and phase of
+	 * 	# the moon.
+	 * 	#
+	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 * 	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_RECORD_SAMPLE		= 9,
+
+	PERF_RECORD_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP	(1U << 0)
+#define PERF_FLAG_FD_OUTPUT	(1U << 1)
+
+#ifdef __KERNEL__
+/*
+ * Kernel-internal data types and definitions:
+ */
+
+#ifdef CONFIG_PERF_EVENTS
+# include <asm/perf_event.h>
+#endif
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/hrtimer.h>
+#include <linux/fs.h>
+#include <linux/pid_namespace.h>
+#include <asm/atomic.h>
+
+#define PERF_MAX_STACK_DEPTH		255
+
+struct perf_callchain_entry {
+	__u64				nr;
+	__u64				ip[PERF_MAX_STACK_DEPTH];
+};
+
+struct perf_raw_record {
+	u32				size;
+	void				*data;
+};
+
+struct task_struct;
+
+/**
+ * struct hw_perf_event - performance event hardware details:
+ */
+struct hw_perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	union {
+		struct { /* hardware */
+			u64		config;
+			unsigned long	config_base;
+			unsigned long	event_base;
+			int		idx;
+		};
+		union { /* software */
+			atomic64_t	count;
+			struct hrtimer	hrtimer;
+		};
+	};
+	atomic64_t			prev_count;
+	u64				sample_period;
+	u64				last_period;
+	atomic64_t			period_left;
+	u64				interrupts;
+
+	u64				freq_count;
+	u64				freq_interrupts;
+	u64				freq_stamp;
+#endif
+};
+
+struct perf_event;
+
+/**
+ * struct pmu - generic performance monitoring unit
+ */
+struct pmu {
+	int (*enable)			(struct perf_event *event);
+	void (*disable)			(struct perf_event *event);
+	void (*read)			(struct perf_event *event);
+	void (*unthrottle)		(struct perf_event *event);
+};
+
+/**
+ * enum perf_event_active_state - the states of a event
+ */
+enum perf_event_active_state {
+	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_OFF		= -1,
+	PERF_EVENT_STATE_INACTIVE	=  0,
+	PERF_EVENT_STATE_ACTIVE	=  1,
+};
+
+struct file;
+
+struct perf_mmap_data {
+	struct rcu_head			rcu_head;
+	int				nr_pages;	/* nr of data pages  */
+	int				writable;	/* are we writable   */
+	int				nr_locked;	/* nr pages mlocked  */
+
+	atomic_t			poll;		/* POLL_ for wakeups */
+	atomic_t			events;		/* event_id limit       */
+
+	atomic_long_t			head;		/* write position    */
+	atomic_long_t			done_head;	/* completed head    */
+
+	atomic_t			lock;		/* concurrent writes */
+	atomic_t			wakeup;		/* needs a wakeup    */
+	atomic_t			lost;		/* nr records lost   */
+
+	long				watermark;	/* wakeup watermark  */
+
+	struct perf_event_mmap_page   *user_page;
+	void				*data_pages[0];
+};
+
+struct perf_pending_entry {
+	struct perf_pending_entry *next;
+	void (*func)(struct perf_pending_entry *);
+};
+
+/**
+ * struct perf_event - performance event kernel representation:
+ */
+struct perf_event {
+#ifdef CONFIG_PERF_EVENTS
+	struct list_head		group_entry;
+	struct list_head		event_entry;
+	struct list_head		sibling_list;
+	int				nr_siblings;
+	struct perf_event		*group_leader;
+	struct perf_event		*output;
+	const struct pmu		*pmu;
+
+	enum perf_event_active_state	state;
+	atomic64_t			count;
+
+	/*
+	 * These are the total time in nanoseconds that the event
+	 * has been enabled (i.e. eligible to run, and the task has
+	 * been scheduled in, if this is a per-task event)
+	 * and running (scheduled onto the CPU), respectively.
+	 *
+	 * They are computed from tstamp_enabled, tstamp_running and
+	 * tstamp_stopped when the event is in INACTIVE or ACTIVE state.
+	 */
+	u64				total_time_enabled;
+	u64				total_time_running;
+
+	/*
+	 * These are timestamps used for computing total_time_enabled
+	 * and total_time_running when the event is in INACTIVE or
+	 * ACTIVE state, measured in nanoseconds from an arbitrary point
+	 * in time.
+	 * tstamp_enabled: the notional time when the event was enabled
+	 * tstamp_running: the notional time when the event was scheduled on
+	 * tstamp_stopped: in INACTIVE state, the notional time when the
+	 *	event was scheduled off.
+	 */
+	u64				tstamp_enabled;
+	u64				tstamp_running;
+	u64				tstamp_stopped;
+
+	struct perf_event_attr	attr;
+	struct hw_perf_event		hw;
+
+	struct perf_event_context	*ctx;
+	struct file			*filp;
+
+	/*
+	 * These accumulate total time (in nanoseconds) that children
+	 * events have been enabled and running, respectively.
+	 */
+	atomic64_t			child_total_time_enabled;
+	atomic64_t			child_total_time_running;
+
+	/*
+	 * Protect attach/detach and child_list:
+	 */
+	struct mutex			child_mutex;
+	struct list_head		child_list;
+	struct perf_event		*parent;
+
+	int				oncpu;
+	int				cpu;
+
+	struct list_head		owner_entry;
+	struct task_struct		*owner;
+
+	/* mmap bits */
+	struct mutex			mmap_mutex;
+	atomic_t			mmap_count;
+	struct perf_mmap_data		*data;
+
+	/* poll related */
+	wait_queue_head_t		waitq;
+	struct fasync_struct		*fasync;
+
+	/* delayed work for NMIs and such */
+	int				pending_wakeup;
+	int				pending_kill;
+	int				pending_disable;
+	struct perf_pending_entry	pending;
+
+	atomic_t			event_limit;
+
+	void (*destroy)(struct perf_event *);
+	struct rcu_head			rcu_head;
+
+	struct pid_namespace		*ns;
+	u64				id;
+#endif
+};
+
+/**
+ * struct perf_event_context - event context structure
+ *
+ * Used as a container for task events and CPU events as well:
+ */
+struct perf_event_context {
+	/*
+	 * Protect the states of the events in the list,
+	 * nr_active, and the list:
+	 */
+	spinlock_t			lock;
+	/*
+	 * Protect the list of events.  Locking either mutex or lock
+	 * is sufficient to ensure the list doesn't change; to change
+	 * the list you need to lock both the mutex and the spinlock.
+	 */
+	struct mutex			mutex;
+
+	struct list_head		group_list;
+	struct list_head		event_list;
+	int				nr_events;
+	int				nr_active;
+	int				is_active;
+	int				nr_stat;
+	atomic_t			refcount;
+	struct task_struct		*task;
+
+	/*
+	 * Context clock, runs when context enabled.
+	 */
+	u64				time;
+	u64				timestamp;
+
+	/*
+	 * These fields let us detect when two contexts have both
+	 * been cloned (inherited) from a common ancestor.
+	 */
+	struct perf_event_context	*parent_ctx;
+	u64				parent_gen;
+	u64				generation;
+	int				pin_count;
+	struct rcu_head			rcu_head;
+};
+
+/**
+ * struct perf_event_cpu_context - per cpu event context structure
+ */
+struct perf_cpu_context {
+	struct perf_event_context	ctx;
+	struct perf_event_context	*task_ctx;
+	int				active_oncpu;
+	int				max_pertask;
+	int				exclusive;
+
+	/*
+	 * Recursion avoidance:
+	 *
+	 * task, softirq, irq, nmi context
+	 */
+	int				recursion[4];
+};
+
+struct perf_output_handle {
+	struct perf_event	*event;
+	struct perf_mmap_data	*data;
+	unsigned long		head;
+	unsigned long		offset;
+	int			nmi;
+	int			sample;
+	int			locked;
+	unsigned long		flags;
+};
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Set by architecture code:
+ */
+extern int perf_max_events;
+
+extern const struct pmu *hw_perf_event_init(struct perf_event *event);
+
+extern void perf_event_task_sched_in(struct task_struct *task, int cpu);
+extern void perf_event_task_sched_out(struct task_struct *task,
+					struct task_struct *next, int cpu);
+extern void perf_event_task_tick(struct task_struct *task, int cpu);
+extern int perf_event_init_task(struct task_struct *child);
+extern void perf_event_exit_task(struct task_struct *child);
+extern void perf_event_free_task(struct task_struct *task);
+extern void set_perf_event_pending(void);
+extern void perf_event_do_pending(void);
+extern void perf_event_print_debug(void);
+extern void __perf_disable(void);
+extern bool __perf_enable(void);
+extern void perf_disable(void);
+extern void perf_enable(void);
+extern int perf_event_task_disable(void);
+extern int perf_event_task_enable(void);
+extern int hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu);
+extern void perf_event_update_userpage(struct perf_event *event);
+
+struct perf_sample_data {
+	u64				type;
+
+	u64				ip;
+	struct {
+		u32	pid;
+		u32	tid;
+	}				tid_entry;
+	u64				time;
+	u64				addr;
+	u64				id;
+	u64				stream_id;
+	struct {
+		u32	cpu;
+		u32	reserved;
+	}				cpu_entry;
+	u64				period;
+	struct perf_callchain_entry	*callchain;
+	struct perf_raw_record		*raw;
+};
+
+extern void perf_output_sample(struct perf_output_handle *handle,
+			       struct perf_event_header *header,
+			       struct perf_sample_data *data,
+			       struct perf_event *event);
+extern void perf_prepare_sample(struct perf_event_header *header,
+				struct perf_sample_data *data,
+				struct perf_event *event,
+				struct pt_regs *regs);
+
+extern int perf_event_overflow(struct perf_event *event, int nmi,
+				 struct perf_sample_data *data,
+				 struct pt_regs *regs);
+
+/*
+ * Return 1 for a software event, 0 for a hardware event
+ */
+static inline int is_software_event(struct perf_event *event)
+{
+	return (event->attr.type != PERF_TYPE_RAW) &&
+		(event->attr.type != PERF_TYPE_HARDWARE) &&
+		(event->attr.type != PERF_TYPE_HW_CACHE);
+}
+
+extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
+{
+	if (atomic_read(&perf_swevent_enabled[event_id]))
+		__perf_sw_event(event_id, nr, nmi, regs, addr);
+}
+
+extern void __perf_event_mmap(struct vm_area_struct *vma);
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & VM_EXEC)
+		__perf_event_mmap(vma);
+}
+
+extern void perf_event_comm(struct task_struct *tsk);
+extern void perf_event_fork(struct task_struct *tsk);
+
+extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs);
+
+extern int sysctl_perf_event_paranoid;
+extern int sysctl_perf_event_mlock;
+extern int sysctl_perf_event_sample_rate;
+
+extern void perf_event_init(void);
+extern void perf_tp_event(int event_id, u64 addr, u64 count,
+				 void *record, int entry_size);
+
+#ifndef perf_misc_flags
+#define perf_misc_flags(regs)	(user_mode(regs) ? PERF_RECORD_MISC_USER : \
+				 PERF_RECORD_MISC_KERNEL)
+#define perf_instruction_pointer(regs)	instruction_pointer(regs)
+#endif
+
+extern int perf_output_begin(struct perf_output_handle *handle,
+			     struct perf_event *event, unsigned int size,
+			     int nmi, int sample);
+extern void perf_output_end(struct perf_output_handle *handle);
+extern void perf_output_copy(struct perf_output_handle *handle,
+			     const void *buf, unsigned int len);
+#else
+static inline void
+perf_event_task_sched_in(struct task_struct *task, int cpu)		{ }
+static inline void
+perf_event_task_sched_out(struct task_struct *task,
+			    struct task_struct *next, int cpu)		{ }
+static inline void
+perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
+static inline void perf_event_exit_task(struct task_struct *child)	{ }
+static inline void perf_event_free_task(struct task_struct *task)	{ }
+static inline void perf_event_do_pending(void)			{ }
+static inline void perf_event_print_debug(void)			{ }
+static inline void perf_disable(void)					{ }
+static inline void perf_enable(void)					{ }
+static inline int perf_event_task_disable(void)	{ return -EINVAL; }
+static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+
+static inline void
+perf_sw_event(u32 event_id, u64 nr, int nmi,
+		     struct pt_regs *regs, u64 addr)			{ }
+
+static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_comm(struct task_struct *tsk)		{ }
+static inline void perf_event_fork(struct task_struct *tsk)		{ }
+static inline void perf_event_init(void)				{ }
+
+#endif
+
+#define perf_output_put(handle, x) \
+	perf_output_copy((handle), &(x), sizeof(x))
+
+#endif /* __KERNEL__ */
+#endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index b00df4c79c63..07bff666e65b 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -85,7 +85,7 @@
 #define PR_SET_TIMERSLACK 29
 #define PR_GET_TIMERSLACK 30
 
-#define PR_TASK_PERF_COUNTERS_DISABLE		31
-#define PR_TASK_PERF_COUNTERS_ENABLE		32
+#define PR_TASK_PERF_EVENTS_DISABLE		31
+#define PR_TASK_PERF_EVENTS_ENABLE		32
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8af3d249170e..8b265a8986d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -100,7 +100,7 @@ struct robust_list_head;
 struct bio;
 struct fs_struct;
 struct bts_context;
-struct perf_counter_context;
+struct perf_event_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -701,7 +701,7 @@ struct user_struct {
 #endif
 #endif
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	atomic_long_t locked_vm;
 #endif
 };
@@ -1449,10 +1449,10 @@ struct task_struct {
 	struct list_head pi_state_list;
 	struct futex_pi_state *pi_state_cache;
 #endif
-#ifdef CONFIG_PERF_COUNTERS
-	struct perf_counter_context *perf_counter_ctxp;
-	struct mutex perf_counter_mutex;
-	struct list_head perf_counter_list;
+#ifdef CONFIG_PERF_EVENTS
+	struct perf_event_context *perf_event_ctxp;
+	struct mutex perf_event_mutex;
+	struct list_head perf_event_list;
 #endif
 #ifdef CONFIG_NUMA
 	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index a8e37821cc60..02f19f9a76c6 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -55,7 +55,7 @@ struct compat_timeval;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
-struct perf_counter_attr;
+struct perf_event_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -885,7 +885,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
 
-asmlinkage long sys_perf_counter_open(
-		struct perf_counter_attr __user *attr_uptr,
+asmlinkage long sys_perf_event_open(
+		struct perf_event_attr __user *attr_uptr,
 		pid_t pid, int cpu, int group_fd, unsigned long flags);
 #endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 72a3b437b829..ec91e78244f0 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call(				\
 #ifdef CONFIG_EVENT_PROFILE
 
 /*
- * Generate the functions needed for tracepoint perf_counter support.
+ * Generate the functions needed for tracepoint perf_event support.
  *
  * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
  *
@@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  * {
  *	struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
  *	struct ftrace_event_call *event_call = &event_<call>;
- *	extern void perf_tpcounter_event(int, u64, u64, void *, int);
+ *	extern void perf_tp_event(int, u64, u64, void *, int);
  *	struct ftrace_raw_##call *entry;
  *	u64 __addr = 0, __count = 1;
  *	unsigned long irq_flags;
@@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = {		\
  *
  *		<assign>  <- affect our values
  *
- *		perf_tpcounter_event(event_call->id, __addr, __count, entry,
+ *		perf_tp_event(event_call->id, __addr, __count, entry,
  *			     __entry_size);  <- submit them to perf counter
  *	} while (0);
  *
@@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto)				\
 {									\
 	struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
 	struct ftrace_event_call *event_call = &event_##call;		\
-	extern void perf_tpcounter_event(int, u64, u64, void *, int);	\
+	extern void perf_tp_event(int, u64, u64, void *, int);	\
 	struct ftrace_raw_##call *entry;				\
 	u64 __addr = 0, __count = 1;					\
 	unsigned long irq_flags;					\
@@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto)				\
 									\
 		{ assign; }						\
 									\
-		perf_tpcounter_event(event_call->id, __addr, __count, entry,\
+		perf_tp_event(event_call->id, __addr, __count, entry,\
 			     __entry_size);				\
 	} while (0);							\
 									\
diff --git a/init/Kconfig b/init/Kconfig
index 8e8b76d8a272..cfdf5c322806 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -915,17 +915,17 @@ config AIO
           by some high performance threaded applications. Disabling
           this option saves about 7k.
 
-config HAVE_PERF_COUNTERS
+config HAVE_PERF_EVENTS
 	bool
 	help
 	  See tools/perf/design.txt for details.
 
 menu "Performance Counters"
 
-config PERF_COUNTERS
+config PERF_EVENTS
 	bool "Kernel Performance Counters"
 	default y if PROFILING
-	depends on HAVE_PERF_COUNTERS
+	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
 	  Enable kernel support for performance counter hardware.
@@ -947,7 +947,7 @@ config PERF_COUNTERS
 
 config EVENT_PROFILE
 	bool "Tracepoint profiling sources"
-	depends on PERF_COUNTERS && EVENT_TRACING
+	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
 	 Allow the use of tracepoints as software performance counters.
diff --git a/kernel/Makefile b/kernel/Makefile
index 3d9c7e27e3f9..e26a546eac44 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,7 +96,7 @@ obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
-obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
 
 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index ae5d8660ddff..e47ee8a06135 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -47,7 +47,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
@@ -154,8 +154,8 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
 {
 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
 
-#ifdef CONFIG_PERF_COUNTERS
-	WARN_ON_ONCE(tsk->perf_counter_ctxp);
+#ifdef CONFIG_PERF_EVENTS
+	WARN_ON_ONCE(tsk->perf_event_ctxp);
 #endif
 	trace_sched_process_free(tsk);
 	put_task_struct(tsk);
@@ -981,7 +981,7 @@ NORET_TYPE void do_exit(long code)
 	 * Flush inherited counters to the parent - before the parent
 	 * gets woken up by child-exit notifications.
 	 */
-	perf_counter_exit_task(tsk);
+	perf_event_exit_task(tsk);
 
 	exit_notify(tsk, group_dead);
 #ifdef CONFIG_NUMA
diff --git a/kernel/fork.c b/kernel/fork.c
index bfee931ee3fb..2cebfb23b0b8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,7 +61,7 @@
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
 #include <linux/magic.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1078,7 +1078,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-	retval = perf_counter_init_task(p);
+	retval = perf_event_init_task(p);
 	if (retval)
 		goto bad_fork_cleanup_policy;
 
@@ -1253,7 +1253,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	perf_counter_fork(p);
+	perf_event_fork(p);
 	return p;
 
 bad_fork_free_pid:
@@ -1280,7 +1280,7 @@ bad_fork_cleanup_semundo:
 bad_fork_cleanup_audit:
 	audit_free(p);
 bad_fork_cleanup_policy:
-	perf_counter_free_task(p);
+	perf_event_free_task(p);
 #ifdef CONFIG_NUMA
 	mpol_put(p->mempolicy);
 bad_fork_cleanup_cgroup:
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
deleted file mode 100644
index 62de0db8092b..000000000000
--- a/kernel/perf_counter.c
+++ /dev/null
@@ -1,5000 +0,0 @@
-/*
- * Performance counter core code
- *
- *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
- *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
- *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
- *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- *  For licensing details see kernel-base/COPYING
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/file.h>
-#include <linux/poll.h>
-#include <linux/sysfs.h>
-#include <linux/dcache.h>
-#include <linux/percpu.h>
-#include <linux/ptrace.h>
-#include <linux/vmstat.h>
-#include <linux/hardirq.h>
-#include <linux/rculist.h>
-#include <linux/uaccess.h>
-#include <linux/syscalls.h>
-#include <linux/anon_inodes.h>
-#include <linux/kernel_stat.h>
-#include <linux/perf_counter.h>
-
-#include <asm/irq_regs.h>
-
-/*
- * Each CPU has a list of per CPU counters:
- */
-DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
-
-int perf_max_counters __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
-static atomic_t nr_counters __read_mostly;
-static atomic_t nr_mmap_counters __read_mostly;
-static atomic_t nr_comm_counters __read_mostly;
-static atomic_t nr_task_counters __read_mostly;
-
-/*
- * perf counter paranoia level:
- *  -1 - not paranoid at all
- *   0 - disallow raw tracepoint access for unpriv
- *   1 - disallow cpu counters for unpriv
- *   2 - disallow kernel profiling for unpriv
- */
-int sysctl_perf_counter_paranoid __read_mostly = 1;
-
-static inline bool perf_paranoid_tracepoint_raw(void)
-{
-	return sysctl_perf_counter_paranoid > -1;
-}
-
-static inline bool perf_paranoid_cpu(void)
-{
-	return sysctl_perf_counter_paranoid > 0;
-}
-
-static inline bool perf_paranoid_kernel(void)
-{
-	return sysctl_perf_counter_paranoid > 1;
-}
-
-int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
-
-/*
- * max perf counter sample rate
- */
-int sysctl_perf_counter_sample_rate __read_mostly = 100000;
-
-static atomic64_t perf_counter_id;
-
-/*
- * Lock for (sysadmin-configurable) counter reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
-/*
- * Architecture provided APIs - weak aliases:
- */
-extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-
-void __weak hw_perf_disable(void)		{ barrier(); }
-void __weak hw_perf_enable(void)		{ barrier(); }
-
-void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
-void __weak hw_perf_counter_setup_online(int cpu)	{ barrier(); }
-
-int __weak
-hw_perf_group_sched_in(struct perf_counter *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx, int cpu)
-{
-	return 0;
-}
-
-void __weak perf_counter_print_debug(void)	{ }
-
-static DEFINE_PER_CPU(int, perf_disable_count);
-
-void __perf_disable(void)
-{
-	__get_cpu_var(perf_disable_count)++;
-}
-
-bool __perf_enable(void)
-{
-	return !--__get_cpu_var(perf_disable_count);
-}
-
-void perf_disable(void)
-{
-	__perf_disable();
-	hw_perf_disable();
-}
-
-void perf_enable(void)
-{
-	if (__perf_enable())
-		hw_perf_enable();
-}
-
-static void get_ctx(struct perf_counter_context *ctx)
-{
-	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
-}
-
-static void free_ctx(struct rcu_head *head)
-{
-	struct perf_counter_context *ctx;
-
-	ctx = container_of(head, struct perf_counter_context, rcu_head);
-	kfree(ctx);
-}
-
-static void put_ctx(struct perf_counter_context *ctx)
-{
-	if (atomic_dec_and_test(&ctx->refcount)) {
-		if (ctx->parent_ctx)
-			put_ctx(ctx->parent_ctx);
-		if (ctx->task)
-			put_task_struct(ctx->task);
-		call_rcu(&ctx->rcu_head, free_ctx);
-	}
-}
-
-static void unclone_ctx(struct perf_counter_context *ctx)
-{
-	if (ctx->parent_ctx) {
-		put_ctx(ctx->parent_ctx);
-		ctx->parent_ctx = NULL;
-	}
-}
-
-/*
- * If we inherit counters we want to return the parent counter id
- * to userspace.
- */
-static u64 primary_counter_id(struct perf_counter *counter)
-{
-	u64 id = counter->id;
-
-	if (counter->parent)
-		id = counter->parent->id;
-
-	return id;
-}
-
-/*
- * Get the perf_counter_context for a task and lock it.
- * This has to cope with with the fact that until it is locked,
- * the context could get moved to another task.
- */
-static struct perf_counter_context *
-perf_lock_task_context(struct task_struct *task, unsigned long *flags)
-{
-	struct perf_counter_context *ctx;
-
-	rcu_read_lock();
- retry:
-	ctx = rcu_dereference(task->perf_counter_ctxp);
-	if (ctx) {
-		/*
-		 * If this context is a clone of another, it might
-		 * get swapped for another underneath us by
-		 * perf_counter_task_sched_out, though the
-		 * rcu_read_lock() protects us from any context
-		 * getting freed.  Lock the context and check if it
-		 * got swapped before we could get the lock, and retry
-		 * if so.  If we locked the right context, then it
-		 * can't get swapped on us any more.
-		 */
-		spin_lock_irqsave(&ctx->lock, *flags);
-		if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			goto retry;
-		}
-
-		if (!atomic_inc_not_zero(&ctx->refcount)) {
-			spin_unlock_irqrestore(&ctx->lock, *flags);
-			ctx = NULL;
-		}
-	}
-	rcu_read_unlock();
-	return ctx;
-}
-
-/*
- * Get the context for a task and increment its pin_count so it
- * can't get swapped to another task.  This also increments its
- * reference count so that the context can't get freed.
- */
-static struct perf_counter_context *perf_pin_task_context(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	unsigned long flags;
-
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		++ctx->pin_count;
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-	return ctx;
-}
-
-static void perf_unpin_context(struct perf_counter_context *ctx)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&ctx->lock, flags);
-	--ctx->pin_count;
-	spin_unlock_irqrestore(&ctx->lock, flags);
-	put_ctx(ctx);
-}
-
-/*
- * Add a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *group_leader = counter->group_leader;
-
-	/*
-	 * Depending on whether it is a standalone or sibling counter,
-	 * add it straight to the context's counter list, or to the group
-	 * leader's sibling list:
-	 */
-	if (group_leader == counter)
-		list_add_tail(&counter->group_entry, &ctx->group_list);
-	else {
-		list_add_tail(&counter->group_entry, &group_leader->sibling_list);
-		group_leader->nr_siblings++;
-	}
-
-	list_add_rcu(&counter->event_entry, &ctx->event_list);
-	ctx->nr_counters++;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat++;
-}
-
-/*
- * Remove a counter from the lists for its context.
- * Must be called with ctx->mutex and ctx->lock held.
- */
-static void
-list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
-{
-	struct perf_counter *sibling, *tmp;
-
-	if (list_empty(&counter->group_entry))
-		return;
-	ctx->nr_counters--;
-	if (counter->attr.inherit_stat)
-		ctx->nr_stat--;
-
-	list_del_init(&counter->group_entry);
-	list_del_rcu(&counter->event_entry);
-
-	if (counter->group_leader != counter)
-		counter->group_leader->nr_siblings--;
-
-	/*
-	 * If this was a group counter with sibling counters then
-	 * upgrade the siblings to singleton counters by adding them
-	 * to the context list directly:
-	 */
-	list_for_each_entry_safe(sibling, tmp, &counter->sibling_list, group_entry) {
-
-		list_move_tail(&sibling->group_entry, &ctx->group_list);
-		sibling->group_leader = sibling;
-	}
-}
-
-static void
-counter_sched_out(struct perf_counter *counter,
-		  struct perf_cpu_context *cpuctx,
-		  struct perf_counter_context *ctx)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-	counter->tstamp_stopped = ctx->time;
-	counter->pmu->disable(counter);
-	counter->oncpu = -1;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu--;
-	ctx->nr_active--;
-	if (counter->attr.exclusive || !cpuctx->active_oncpu)
-		cpuctx->exclusive = 0;
-}
-
-static void
-group_sched_out(struct perf_counter *group_counter,
-		struct perf_cpu_context *cpuctx,
-		struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return;
-
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	/*
-	 * Schedule out siblings (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry)
-		counter_sched_out(counter, cpuctx, ctx);
-
-	if (group_counter->attr.exclusive)
-		cpuctx->exclusive = 0;
-}
-
-/*
- * Cross CPU call to remove a performance counter
- *
- * We disable the counter on the hardware level first. After that we
- * remove it from the context list.
- */
-static void __perf_counter_remove_from_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level.
-	 */
-	perf_disable();
-
-	counter_sched_out(counter, cpuctx, ctx);
-
-	list_del_counter(counter, ctx);
-
-	if (!ctx->task) {
-		/*
-		 * Allow more per task counters with respect to the
-		 * reservation:
-		 */
-		cpuctx->max_pertask =
-			min(perf_max_counters - ctx->nr_counters,
-			    perf_max_counters - perf_reserved_percpu);
-	}
-
-	perf_enable();
-	spin_unlock(&ctx->lock);
-}
-
-
-/*
- * Remove the counter from a task's (or a CPU's) list of counters.
- *
- * Must be called with ctx->mutex held.
- *
- * CPU counters are removed with a smp call. For task counters we only
- * call when the task is on a CPU.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This is OK when called from perf_release since
- * that only calls us on the top-level context, which can't be a clone.
- * When called from perf_counter_exit_task, it's OK because the
- * context has been detached from its task.
- */
-static void perf_counter_remove_from_context(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are removed via an smp call and
-		 * the removal is always sucessful.
-		 */
-		smp_call_function_single(counter->cpu,
-					 __perf_counter_remove_from_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_counter_remove_from_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the context is active we need to retry the smp call.
-	 */
-	if (ctx->nr_active && !list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can remove the counter safely, if the call above did not
-	 * succeed.
-	 */
-	if (!list_empty(&counter->group_entry)) {
-		list_del_counter(counter, ctx);
-	}
-	spin_unlock_irq(&ctx->lock);
-}
-
-static inline u64 perf_clock(void)
-{
-	return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_counter_context *ctx)
-{
-	u64 now = perf_clock();
-
-	ctx->time += now - ctx->timestamp;
-	ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a counter.
- */
-static void update_counter_times(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	u64 run_end;
-
-	if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
-	    counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
-		return;
-
-	counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
-
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
-		run_end = counter->tstamp_stopped;
-	else
-		run_end = ctx->time;
-
-	counter->total_time_running = run_end - counter->tstamp_running;
-}
-
-/*
- * Update total_time_enabled and total_time_running for all counters in a group.
- */
-static void update_group_times(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	update_counter_times(leader);
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		update_counter_times(counter);
-}
-
-/*
- * Cross CPU call to disable a performance counter
- */
-static void __perf_counter_disable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	spin_lock(&ctx->lock);
-
-	/*
-	 * If the counter is on, turn it off.
-	 * If it is in error state, leave it in error state.
-	 */
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
-		update_context_time(ctx);
-		update_group_times(counter);
-		if (counter == counter->group_leader)
-			group_sched_out(counter, cpuctx, ctx);
-		else
-			counter_sched_out(counter, cpuctx, ctx);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Disable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisifed when called through
- * perf_counter_for_each_child or perf_counter_for_each because they
- * hold the top-level counter's child_mutex, so any descendant that
- * goes to exit will block in sync_child_counter.
- * When called from perf_pending_counter it's OK because counter->ctx
- * is the current context on this CPU and preemption is disabled,
- * hence we can't get into perf_counter_task_sched_out for this context.
- */
-static void perf_counter_disable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Disable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_disable,
-					 counter, 1);
-		return;
-	}
-
- retry:
-	task_oncpu_function_call(task, __perf_counter_disable, counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * If the counter is still active, we need to retry the cross-call.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_group_times(counter);
-		counter->state = PERF_COUNTER_STATE_OFF;
-	}
-
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int
-counter_sched_in(struct perf_counter *counter,
-		 struct perf_cpu_context *cpuctx,
-		 struct perf_counter_context *ctx,
-		 int cpu)
-{
-	if (counter->state <= PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	counter->state = PERF_COUNTER_STATE_ACTIVE;
-	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
-	/*
-	 * The new state must be visible before we turn it on in the hardware:
-	 */
-	smp_wmb();
-
-	if (counter->pmu->enable(counter)) {
-		counter->state = PERF_COUNTER_STATE_INACTIVE;
-		counter->oncpu = -1;
-		return -EAGAIN;
-	}
-
-	counter->tstamp_running += ctx->time - counter->tstamp_stopped;
-
-	if (!is_software_counter(counter))
-		cpuctx->active_oncpu++;
-	ctx->nr_active++;
-
-	if (counter->attr.exclusive)
-		cpuctx->exclusive = 1;
-
-	return 0;
-}
-
-static int
-group_sched_in(struct perf_counter *group_counter,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_counter_context *ctx,
-	       int cpu)
-{
-	struct perf_counter *counter, *partial_group;
-	int ret;
-
-	if (group_counter->state == PERF_COUNTER_STATE_OFF)
-		return 0;
-
-	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
-	if (ret)
-		return ret < 0 ? ret : 0;
-
-	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
-		return -EAGAIN;
-
-	/*
-	 * Schedule in siblings as one group (if any):
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
-			partial_group = counter;
-			goto group_error;
-		}
-	}
-
-	return 0;
-
-group_error:
-	/*
-	 * Groups can be scheduled in as one unit only, so undo any
-	 * partial group before returning:
-	 */
-	list_for_each_entry(counter, &group_counter->sibling_list, group_entry) {
-		if (counter == partial_group)
-			break;
-		counter_sched_out(counter, cpuctx, ctx);
-	}
-	counter_sched_out(group_counter, cpuctx, ctx);
-
-	return -EAGAIN;
-}
-
-/*
- * Return 1 for a group consisting entirely of software counters,
- * 0 if the group contains any hardware counters.
- */
-static int is_software_only_group(struct perf_counter *leader)
-{
-	struct perf_counter *counter;
-
-	if (!is_software_counter(leader))
-		return 0;
-
-	list_for_each_entry(counter, &leader->sibling_list, group_entry)
-		if (!is_software_counter(counter))
-			return 0;
-
-	return 1;
-}
-
-/*
- * Work out whether we can put this counter group on the CPU now.
- */
-static int group_can_go_on(struct perf_counter *counter,
-			   struct perf_cpu_context *cpuctx,
-			   int can_add_hw)
-{
-	/*
-	 * Groups consisting entirely of software counters can always go on.
-	 */
-	if (is_software_only_group(counter))
-		return 1;
-	/*
-	 * If an exclusive group is already on, no other hardware
-	 * counters can go on.
-	 */
-	if (cpuctx->exclusive)
-		return 0;
-	/*
-	 * If this group is exclusive and there are already
-	 * counters on the CPU, it can't go on.
-	 */
-	if (counter->attr.exclusive && cpuctx->active_oncpu)
-		return 0;
-	/*
-	 * Otherwise, try to add it if all previous groups were able
-	 * to go on.
-	 */
-	return can_add_hw;
-}
-
-static void add_counter_to_ctx(struct perf_counter *counter,
-			       struct perf_counter_context *ctx)
-{
-	list_add_counter(counter, ctx);
-	counter->tstamp_enabled = ctx->time;
-	counter->tstamp_running = ctx->time;
-	counter->tstamp_stopped = ctx->time;
-}
-
-/*
- * Cross CPU call to install and enable a performance counter
- *
- * Must be called with ctx->mutex held
- */
-static void __perf_install_in_context(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int cpu = smp_processor_id();
-	int err;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu. If not it has been
-	 * scheduled out before the smp call arrived.
-	 * Or possibly this is the right context but it isn't
-	 * on this cpu because it had no counters.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	/*
-	 * Protect the list operation against NMI by disabling the
-	 * counters on a global level. NOP for non NMI based counters.
-	 */
-	perf_disable();
-
-	add_counter_to_ctx(counter, ctx);
-
-	/*
-	 * Don't put the counter on if it is disabled or if
-	 * it is in a group and the group isn't on.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
-	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
-		goto unlock;
-
-	/*
-	 * An exclusive counter can't go on if there are already active
-	 * hardware counters, and no hardware counter can go on if there
-	 * is already an exclusive counter on.
-	 */
-	if (!group_can_go_on(counter, cpuctx, 1))
-		err = -EEXIST;
-	else
-		err = counter_sched_in(counter, cpuctx, ctx, cpu);
-
-	if (err) {
-		/*
-		 * This counter couldn't go on.  If it is in a group
-		 * then we have to pull the whole group off.
-		 * If the counter group is pinned then put it in error state.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	if (!err && !ctx->task && cpuctx->max_pertask)
-		cpuctx->max_pertask--;
-
- unlock:
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Attach a performance counter to a context
- *
- * First we add the counter to the list with the hardware enable bit
- * in counter->hw_config cleared.
- *
- * If the counter is attached to a task which is on a CPU we use a smp
- * call to enable it in the task context. The task might have been
- * scheduled away, but we check this in the smp call again.
- *
- * Must be called with ctx->mutex held.
- */
-static void
-perf_install_in_context(struct perf_counter_context *ctx,
-			struct perf_counter *counter,
-			int cpu)
-{
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Per cpu counters are installed via an smp call and
-		 * the install is always sucessful.
-		 */
-		smp_call_function_single(cpu, __perf_install_in_context,
-					 counter, 1);
-		return;
-	}
-
-retry:
-	task_oncpu_function_call(task, __perf_install_in_context,
-				 counter);
-
-	spin_lock_irq(&ctx->lock);
-	/*
-	 * we need to retry the smp call.
-	 */
-	if (ctx->is_active && list_empty(&counter->group_entry)) {
-		spin_unlock_irq(&ctx->lock);
-		goto retry;
-	}
-
-	/*
-	 * The lock prevents that this context is scheduled in so we
-	 * can add the counter safely, if it the call above did not
-	 * succeed.
-	 */
-	if (list_empty(&counter->group_entry))
-		add_counter_to_ctx(counter, ctx);
-	spin_unlock_irq(&ctx->lock);
-}
-
-/*
- * Put a counter into inactive state and update time fields.
- * Enabling the leader of a group effectively enables all
- * the group members that aren't explicitly disabled, so we
- * have to update their ->tstamp_enabled also.
- * Note: this works for group members as well as group leaders
- * since the non-leader members' sibling_lists will be empty.
- */
-static void __perf_counter_mark_enabled(struct perf_counter *counter,
-					struct perf_counter_context *ctx)
-{
-	struct perf_counter *sub;
-
-	counter->state = PERF_COUNTER_STATE_INACTIVE;
-	counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
-	list_for_each_entry(sub, &counter->sibling_list, group_entry)
-		if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
-			sub->tstamp_enabled =
-				ctx->time - sub->total_time_enabled;
-}
-
-/*
- * Cross CPU call to enable a performance counter
- */
-static void __perf_counter_enable(void *info)
-{
-	struct perf_counter *counter = info;
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *leader = counter->group_leader;
-	int err;
-
-	/*
-	 * If this is a per-task counter, need to check whether this
-	 * counter's task is the current task on this cpu.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx) {
-		if (cpuctx->task_ctx || ctx->task != current)
-			return;
-		cpuctx->task_ctx = ctx;
-	}
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	update_context_time(ctx);
-
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto unlock;
-	__perf_counter_mark_enabled(counter, ctx);
-
-	/*
-	 * If the counter is in a group and isn't the group leader,
-	 * then don't put it on unless the group is on.
-	 */
-	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
-		goto unlock;
-
-	if (!group_can_go_on(counter, cpuctx, 1)) {
-		err = -EEXIST;
-	} else {
-		perf_disable();
-		if (counter == leader)
-			err = group_sched_in(counter, cpuctx, ctx,
-					     smp_processor_id());
-		else
-			err = counter_sched_in(counter, cpuctx, ctx,
-					       smp_processor_id());
-		perf_enable();
-	}
-
-	if (err) {
-		/*
-		 * If this counter can't go on and it's part of a
-		 * group, then the whole group has to come off.
-		 */
-		if (leader != counter)
-			group_sched_out(leader, cpuctx, ctx);
-		if (leader->attr.pinned) {
-			update_group_times(leader);
-			leader->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
- unlock:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Enable a counter.
- *
- * If counter->ctx is a cloned context, callers must make sure that
- * every task struct that counter->ctx->task could possibly point to
- * remains valid.  This condition is satisfied when called through
- * perf_counter_for_each_child or perf_counter_for_each as described
- * for perf_counter_disable.
- */
-static void perf_counter_enable(struct perf_counter *counter)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct task_struct *task = ctx->task;
-
-	if (!task) {
-		/*
-		 * Enable the counter on the cpu that it's on
-		 */
-		smp_call_function_single(counter->cpu, __perf_counter_enable,
-					 counter, 1);
-		return;
-	}
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		goto out;
-
-	/*
-	 * If the counter is in error state, clear that first.
-	 * That way, if we see the counter in error state below, we
-	 * know that it has gone back into error state, as distinct
-	 * from the task having been scheduled away before the
-	 * cross-call arrived.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
- retry:
-	spin_unlock_irq(&ctx->lock);
-	task_oncpu_function_call(task, __perf_counter_enable, counter);
-
-	spin_lock_irq(&ctx->lock);
-
-	/*
-	 * If the context is active and the counter is still off,
-	 * we need to retry the cross-call.
-	 */
-	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
-		goto retry;
-
-	/*
-	 * Since we have the lock this context can't be scheduled
-	 * in, so we can change the state safely.
-	 */
-	if (counter->state == PERF_COUNTER_STATE_OFF)
-		__perf_counter_mark_enabled(counter, ctx);
-
- out:
-	spin_unlock_irq(&ctx->lock);
-}
-
-static int perf_counter_refresh(struct perf_counter *counter, int refresh)
-{
-	/*
-	 * not supported on inherited counters
-	 */
-	if (counter->attr.inherit)
-		return -EINVAL;
-
-	atomic_add(refresh, &counter->event_limit);
-	perf_counter_enable(counter);
-
-	return 0;
-}
-
-void __perf_counter_sched_out(struct perf_counter_context *ctx,
-			      struct perf_cpu_context *cpuctx)
-{
-	struct perf_counter *counter;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 0;
-	if (likely(!ctx->nr_counters))
-		goto out;
-	update_context_time(ctx);
-
-	perf_disable();
-	if (ctx->nr_active) {
-		list_for_each_entry(counter, &ctx->group_list, group_entry) {
-			if (counter != counter->group_leader)
-				counter_sched_out(counter, cpuctx, ctx);
-			else
-				group_sched_out(counter, cpuctx, ctx);
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Test whether two contexts are equivalent, i.e. whether they
- * have both been cloned from the same version of the same context
- * and they both have the same number of enabled counters.
- * If the number of enabled counters is the same, then the set
- * of enabled counters should be the same, because these are both
- * inherited contexts, therefore we can't access individual counters
- * in them directly with an fd; we can only enable/disable all
- * counters via prctl, or enable/disable all counters in a family
- * via ioctl, which will have the same effect on both contexts.
- */
-static int context_equiv(struct perf_counter_context *ctx1,
-			 struct perf_counter_context *ctx2)
-{
-	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
-		&& ctx1->parent_gen == ctx2->parent_gen
-		&& !ctx1->pin_count && !ctx2->pin_count;
-}
-
-static void __perf_counter_read(void *counter);
-
-static void __perf_counter_sync_stat(struct perf_counter *counter,
-				     struct perf_counter *next_counter)
-{
-	u64 value;
-
-	if (!counter->attr.inherit_stat)
-		return;
-
-	/*
-	 * Update the counter value, we cannot use perf_counter_read()
-	 * because we're in the middle of a context switch and have IRQs
-	 * disabled, which upsets smp_call_function_single(), however
-	 * we know the counter must be on the current CPU, therefore we
-	 * don't need to use it.
-	 */
-	switch (counter->state) {
-	case PERF_COUNTER_STATE_ACTIVE:
-		__perf_counter_read(counter);
-		break;
-
-	case PERF_COUNTER_STATE_INACTIVE:
-		update_counter_times(counter);
-		break;
-
-	default:
-		break;
-	}
-
-	/*
-	 * In order to keep per-task stats reliable we need to flip the counter
-	 * values when we flip the contexts.
-	 */
-	value = atomic64_read(&next_counter->count);
-	value = atomic64_xchg(&counter->count, value);
-	atomic64_set(&next_counter->count, value);
-
-	swap(counter->total_time_enabled, next_counter->total_time_enabled);
-	swap(counter->total_time_running, next_counter->total_time_running);
-
-	/*
-	 * Since we swizzled the values, update the user visible data too.
-	 */
-	perf_counter_update_userpage(counter);
-	perf_counter_update_userpage(next_counter);
-}
-
-#define list_next_entry(pos, member) \
-	list_entry(pos->member.next, typeof(*pos), member)
-
-static void perf_counter_sync_stat(struct perf_counter_context *ctx,
-				   struct perf_counter_context *next_ctx)
-{
-	struct perf_counter *counter, *next_counter;
-
-	if (!ctx->nr_stat)
-		return;
-
-	counter = list_first_entry(&ctx->event_list,
-				   struct perf_counter, event_entry);
-
-	next_counter = list_first_entry(&next_ctx->event_list,
-					struct perf_counter, event_entry);
-
-	while (&counter->event_entry != &ctx->event_list &&
-	       &next_counter->event_entry != &next_ctx->event_list) {
-
-		__perf_counter_sync_stat(counter, next_counter);
-
-		counter = list_next_entry(counter, event_entry);
-		next_counter = list_next_entry(next_counter, event_entry);
-	}
-}
-
-/*
- * Called from scheduler to remove the counters of the current task,
- * with interrupts disabled.
- *
- * We stop each counter and update the counter value in counter->count.
- *
- * This does not protect us against NMI, but disable()
- * sets the disabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * not restart the counter.
- */
-void perf_counter_task_sched_out(struct task_struct *task,
-				 struct task_struct *next, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter_context *next_ctx;
-	struct perf_counter_context *parent;
-	struct pt_regs *regs;
-	int do_switch = 1;
-
-	regs = task_pt_regs(task);
-	perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
-
-	if (likely(!ctx || !cpuctx->task_ctx))
-		return;
-
-	update_context_time(ctx);
-
-	rcu_read_lock();
-	parent = rcu_dereference(ctx->parent_ctx);
-	next_ctx = next->perf_counter_ctxp;
-	if (parent && next_ctx &&
-	    rcu_dereference(next_ctx->parent_ctx) == parent) {
-		/*
-		 * Looks like the two contexts are clones, so we might be
-		 * able to optimize the context switch.  We lock both
-		 * contexts and check that they are clones under the
-		 * lock (including re-checking that neither has been
-		 * uncloned in the meantime).  It doesn't matter which
-		 * order we take the locks because no other cpu could
-		 * be trying to lock both of these tasks.
-		 */
-		spin_lock(&ctx->lock);
-		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
-		if (context_equiv(ctx, next_ctx)) {
-			/*
-			 * XXX do we need a memory barrier of sorts
-			 * wrt to rcu_dereference() of perf_counter_ctxp
-			 */
-			task->perf_counter_ctxp = next_ctx;
-			next->perf_counter_ctxp = ctx;
-			ctx->task = next;
-			next_ctx->task = task;
-			do_switch = 0;
-
-			perf_counter_sync_stat(ctx, next_ctx);
-		}
-		spin_unlock(&next_ctx->lock);
-		spin_unlock(&ctx->lock);
-	}
-	rcu_read_unlock();
-
-	if (do_switch) {
-		__perf_counter_sched_out(ctx, cpuctx);
-		cpuctx->task_ctx = NULL;
-	}
-}
-
-/*
- * Called with IRQs disabled
- */
-static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-
-	if (!cpuctx->task_ctx)
-		return;
-
-	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
-		return;
-
-	__perf_counter_sched_out(ctx, cpuctx);
-	cpuctx->task_ctx = NULL;
-}
-
-/*
- * Called with IRQs disabled
- */
-static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
-{
-	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
-}
-
-static void
-__perf_counter_sched_in(struct perf_counter_context *ctx,
-			struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter *counter;
-	int can_add_hw = 1;
-
-	spin_lock(&ctx->lock);
-	ctx->is_active = 1;
-	if (likely(!ctx->nr_counters))
-		goto out;
-
-	ctx->timestamp = perf_clock();
-
-	perf_disable();
-
-	/*
-	 * First go through the list and put on any pinned groups
-	 * in order to give them the best chance of going on.
-	 */
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    !counter->attr.pinned)
-			continue;
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader)
-			counter_sched_in(counter, cpuctx, ctx, cpu);
-		else {
-			if (group_can_go_on(counter, cpuctx, 1))
-				group_sched_in(counter, cpuctx, ctx, cpu);
-		}
-
-		/*
-		 * If this pinned group hasn't been scheduled,
-		 * put it in error state.
-		 */
-		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-			update_group_times(counter);
-			counter->state = PERF_COUNTER_STATE_ERROR;
-		}
-	}
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		/*
-		 * Ignore counters in OFF or ERROR state, and
-		 * ignore pinned counters since we did them already.
-		 */
-		if (counter->state <= PERF_COUNTER_STATE_OFF ||
-		    counter->attr.pinned)
-			continue;
-
-		/*
-		 * Listen to the 'cpu' scheduling filter constraint
-		 * of counters:
-		 */
-		if (counter->cpu != -1 && counter->cpu != cpu)
-			continue;
-
-		if (counter != counter->group_leader) {
-			if (counter_sched_in(counter, cpuctx, ctx, cpu))
-				can_add_hw = 0;
-		} else {
-			if (group_can_go_on(counter, cpuctx, can_add_hw)) {
-				if (group_sched_in(counter, cpuctx, ctx, cpu))
-					can_add_hw = 0;
-			}
-		}
-	}
-	perf_enable();
- out:
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Called from scheduler to add the counters of the current task
- * with interrupts disabled.
- *
- * We restore the counter value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of counter _before_
- * accessing the counter control register. If a NMI hits, then it will
- * keep the counter running.
- */
-void perf_counter_task_sched_in(struct task_struct *task, int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-
-	if (likely(!ctx))
-		return;
-	if (cpuctx->task_ctx == ctx)
-		return;
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-	cpuctx->task_ctx = ctx;
-}
-
-static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
-{
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	__perf_counter_sched_in(ctx, cpuctx, cpu);
-}
-
-#define MAX_INTERRUPTS (~0ULL)
-
-static void perf_log_throttle(struct perf_counter *counter, int enable);
-
-static void perf_adjust_period(struct perf_counter *counter, u64 events)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period, sample_period;
-	s64 delta;
-
-	events *= hwc->sample_period;
-	period = div64_u64(events, counter->attr.sample_freq);
-
-	delta = (s64)(period - hwc->sample_period);
-	delta = (delta + 7) / 8; /* low pass filter */
-
-	sample_period = hwc->sample_period + delta;
-
-	if (!sample_period)
-		sample_period = 1;
-
-	hwc->sample_period = sample_period;
-}
-
-static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	u64 interrupts, freq;
-
-	spin_lock(&ctx->lock);
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-			continue;
-
-		hwc = &counter->hw;
-
-		interrupts = hwc->interrupts;
-		hwc->interrupts = 0;
-
-		/*
-		 * unthrottle counters on the tick
-		 */
-		if (interrupts == MAX_INTERRUPTS) {
-			perf_log_throttle(counter, 1);
-			counter->pmu->unthrottle(counter);
-			interrupts = 2*sysctl_perf_counter_sample_rate/HZ;
-		}
-
-		if (!counter->attr.freq || !counter->attr.sample_freq)
-			continue;
-
-		/*
-		 * if the specified freq < HZ then we need to skip ticks
-		 */
-		if (counter->attr.sample_freq < HZ) {
-			freq = counter->attr.sample_freq;
-
-			hwc->freq_count += freq;
-			hwc->freq_interrupts += interrupts;
-
-			if (hwc->freq_count < HZ)
-				continue;
-
-			interrupts = hwc->freq_interrupts;
-			hwc->freq_interrupts = 0;
-			hwc->freq_count -= HZ;
-		} else
-			freq = HZ;
-
-		perf_adjust_period(counter, freq * interrupts);
-
-		/*
-		 * In order to avoid being stalled by an (accidental) huge
-		 * sample period, force reset the sample period if we didn't
-		 * get any events in this freq period.
-		 */
-		if (!interrupts) {
-			perf_disable();
-			counter->pmu->disable(counter);
-			atomic64_set(&hwc->period_left, 0);
-			counter->pmu->enable(counter);
-			perf_enable();
-		}
-	}
-	spin_unlock(&ctx->lock);
-}
-
-/*
- * Round-robin a context's counters:
- */
-static void rotate_ctx(struct perf_counter_context *ctx)
-{
-	struct perf_counter *counter;
-
-	if (!ctx->nr_counters)
-		return;
-
-	spin_lock(&ctx->lock);
-	/*
-	 * Rotate the first entry last (works just fine for group counters too):
-	 */
-	perf_disable();
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		list_move_tail(&counter->group_entry, &ctx->group_list);
-		break;
-	}
-	perf_enable();
-
-	spin_unlock(&ctx->lock);
-}
-
-void perf_counter_task_tick(struct task_struct *curr, int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-
-	if (!atomic_read(&nr_counters))
-		return;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	ctx = curr->perf_counter_ctxp;
-
-	perf_ctx_adjust_freq(&cpuctx->ctx);
-	if (ctx)
-		perf_ctx_adjust_freq(ctx);
-
-	perf_counter_cpu_sched_out(cpuctx);
-	if (ctx)
-		__perf_counter_task_sched_out(ctx);
-
-	rotate_ctx(&cpuctx->ctx);
-	if (ctx)
-		rotate_ctx(ctx);
-
-	perf_counter_cpu_sched_in(cpuctx, cpu);
-	if (ctx)
-		perf_counter_task_sched_in(curr, cpu);
-}
-
-/*
- * Enable all of a task's counters that have been marked enable-on-exec.
- * This expects task == current.
- */
-static void perf_counter_enable_on_exec(struct task_struct *task)
-{
-	struct perf_counter_context *ctx;
-	struct perf_counter *counter;
-	unsigned long flags;
-	int enabled = 0;
-
-	local_irq_save(flags);
-	ctx = task->perf_counter_ctxp;
-	if (!ctx || !ctx->nr_counters)
-		goto out;
-
-	__perf_counter_task_sched_out(ctx);
-
-	spin_lock(&ctx->lock);
-
-	list_for_each_entry(counter, &ctx->group_list, group_entry) {
-		if (!counter->attr.enable_on_exec)
-			continue;
-		counter->attr.enable_on_exec = 0;
-		if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
-			continue;
-		__perf_counter_mark_enabled(counter, ctx);
-		enabled = 1;
-	}
-
-	/*
-	 * Unclone this context if we enabled any counter.
-	 */
-	if (enabled)
-		unclone_ctx(ctx);
-
-	spin_unlock(&ctx->lock);
-
-	perf_counter_task_sched_in(task, smp_processor_id());
- out:
-	local_irq_restore(flags);
-}
-
-/*
- * Cross CPU call to read the hardware counter
- */
-static void __perf_counter_read(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter *counter = info;
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long flags;
-
-	/*
-	 * If this is a task context, we need to check whether it is
-	 * the current task context of this cpu.  If not it has been
-	 * scheduled out before the smp call arrived.  In that case
-	 * counter->count would have been updated to a recent sample
-	 * when the counter was scheduled out.
-	 */
-	if (ctx->task && cpuctx->task_ctx != ctx)
-		return;
-
-	local_irq_save(flags);
-	if (ctx->is_active)
-		update_context_time(ctx);
-	counter->pmu->read(counter);
-	update_counter_times(counter);
-	local_irq_restore(flags);
-}
-
-static u64 perf_counter_read(struct perf_counter *counter)
-{
-	/*
-	 * If counter is enabled and currently active on a CPU, update the
-	 * value in the counter structure:
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
-		smp_call_function_single(counter->oncpu,
-					 __perf_counter_read, counter, 1);
-	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
-		update_counter_times(counter);
-	}
-
-	return atomic64_read(&counter->count);
-}
-
-/*
- * Initialize the perf_counter context in a task_struct:
- */
-static void
-__perf_counter_init_context(struct perf_counter_context *ctx,
-			    struct task_struct *task)
-{
-	memset(ctx, 0, sizeof(*ctx));
-	spin_lock_init(&ctx->lock);
-	mutex_init(&ctx->mutex);
-	INIT_LIST_HEAD(&ctx->group_list);
-	INIT_LIST_HEAD(&ctx->event_list);
-	atomic_set(&ctx->refcount, 1);
-	ctx->task = task;
-}
-
-static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
-{
-	struct perf_counter_context *ctx;
-	struct perf_cpu_context *cpuctx;
-	struct task_struct *task;
-	unsigned long flags;
-	int err;
-
-	/*
-	 * If cpu is not a wildcard then this is a percpu counter:
-	 */
-	if (cpu != -1) {
-		/* Must be root to operate on a CPU counter: */
-		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
-			return ERR_PTR(-EACCES);
-
-		if (cpu < 0 || cpu > num_possible_cpus())
-			return ERR_PTR(-EINVAL);
-
-		/*
-		 * We could be clever and allow to attach a counter to an
-		 * offline CPU and activate it when the CPU comes up, but
-		 * that's for later.
-		 */
-		if (!cpu_isset(cpu, cpu_online_map))
-			return ERR_PTR(-ENODEV);
-
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		ctx = &cpuctx->ctx;
-		get_ctx(ctx);
-
-		return ctx;
-	}
-
-	rcu_read_lock();
-	if (!pid)
-		task = current;
-	else
-		task = find_task_by_vpid(pid);
-	if (task)
-		get_task_struct(task);
-	rcu_read_unlock();
-
-	if (!task)
-		return ERR_PTR(-ESRCH);
-
-	/*
-	 * Can't attach counters to a dying task.
-	 */
-	err = -ESRCH;
-	if (task->flags & PF_EXITING)
-		goto errout;
-
-	/* Reuse ptrace permission checks for now. */
-	err = -EACCES;
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto errout;
-
- retry:
-	ctx = perf_lock_task_context(task, &flags);
-	if (ctx) {
-		unclone_ctx(ctx);
-		spin_unlock_irqrestore(&ctx->lock, flags);
-	}
-
-	if (!ctx) {
-		ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-		err = -ENOMEM;
-		if (!ctx)
-			goto errout;
-		__perf_counter_init_context(ctx, task);
-		get_ctx(ctx);
-		if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
-			/*
-			 * We raced with some other task; use
-			 * the context they set.
-			 */
-			kfree(ctx);
-			goto retry;
-		}
-		get_task_struct(task);
-	}
-
-	put_task_struct(task);
-	return ctx;
-
- errout:
-	put_task_struct(task);
-	return ERR_PTR(err);
-}
-
-static void free_counter_rcu(struct rcu_head *head)
-{
-	struct perf_counter *counter;
-
-	counter = container_of(head, struct perf_counter, rcu_head);
-	if (counter->ns)
-		put_pid_ns(counter->ns);
-	kfree(counter);
-}
-
-static void perf_pending_sync(struct perf_counter *counter);
-
-static void free_counter(struct perf_counter *counter)
-{
-	perf_pending_sync(counter);
-
-	if (!counter->parent) {
-		atomic_dec(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_dec(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_dec(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_dec(&nr_task_counters);
-	}
-
-	if (counter->output) {
-		fput(counter->output->filp);
-		counter->output = NULL;
-	}
-
-	if (counter->destroy)
-		counter->destroy(counter);
-
-	put_ctx(counter->ctx);
-	call_rcu(&counter->rcu_head, free_counter_rcu);
-}
-
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_counter_context *ctx = counter->ctx;
-
-	file->private_data = NULL;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_counter_remove_from_context(counter);
-	mutex_unlock(&ctx->mutex);
-
-	mutex_lock(&counter->owner->perf_counter_mutex);
-	list_del_init(&counter->owner_entry);
-	mutex_unlock(&counter->owner->perf_counter_mutex);
-	put_task_struct(counter->owner);
-
-	free_counter(counter);
-
-	return 0;
-}
-
-static int perf_counter_read_size(struct perf_counter *counter)
-{
-	int entry = sizeof(u64); /* value */
-	int size = 0;
-	int nr = 1;
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		size += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_ID)
-		entry += sizeof(u64);
-
-	if (counter->attr.read_format & PERF_FORMAT_GROUP) {
-		nr += counter->group_leader->nr_siblings;
-		size += sizeof(u64);
-	}
-
-	size += entry * nr;
-
-	return size;
-}
-
-static u64 perf_counter_read_value(struct perf_counter *counter)
-{
-	struct perf_counter *child;
-	u64 total = 0;
-
-	total += perf_counter_read(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		total += perf_counter_read(child);
-
-	return total;
-}
-
-static int perf_counter_read_entry(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	int n = 0, count = 0;
-	u64 values[2];
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	count = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, count))
-		return -EFAULT;
-
-	return count;
-}
-
-static int perf_counter_read_group(struct perf_counter *counter,
-				   u64 read_format, char __user *buf)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	int n = 0, size = 0, err = -EFAULT;
-	u64 values[3];
-
-	values[n++] = 1 + leader->nr_siblings;
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = leader->total_time_enabled +
-			atomic64_read(&leader->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = leader->total_time_running +
-			atomic64_read(&leader->child_total_time_running);
-	}
-
-	size = n * sizeof(u64);
-
-	if (copy_to_user(buf, values, size))
-		return -EFAULT;
-
-	err = perf_counter_read_entry(leader, read_format, buf + size);
-	if (err < 0)
-		return err;
-
-	size += err;
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		err = perf_counter_read_entry(sub, read_format,
-				buf + size);
-		if (err < 0)
-			return err;
-
-		size += err;
-	}
-
-	return size;
-}
-
-static int perf_counter_read_one(struct perf_counter *counter,
-				 u64 read_format, char __user *buf)
-{
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = perf_counter_read_value(counter);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	if (copy_to_user(buf, values, n * sizeof(u64)))
-		return -EFAULT;
-
-	return n * sizeof(u64);
-}
-
-/*
- * Read the performance counter - simple non blocking version for now
- */
-static ssize_t
-perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
-{
-	u64 read_format = counter->attr.read_format;
-	int ret;
-
-	/*
-	 * Return end-of-file for a read on a counter that is in
-	 * error state (i.e. because it was pinned but it couldn't be
-	 * scheduled on to the CPU at some point).
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ERROR)
-		return 0;
-
-	if (count < perf_counter_read_size(counter))
-		return -ENOSPC;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	if (read_format & PERF_FORMAT_GROUP)
-		ret = perf_counter_read_group(counter, read_format, buf);
-	else
-		ret = perf_counter_read_one(counter, read_format, buf);
-	mutex_unlock(&counter->child_mutex);
-
-	return ret;
-}
-
-static ssize_t
-perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
-{
-	struct perf_counter *counter = file->private_data;
-
-	return perf_read_hw(counter, buf, count);
-}
-
-static unsigned int perf_poll(struct file *file, poll_table *wait)
-{
-	struct perf_counter *counter = file->private_data;
-	struct perf_mmap_data *data;
-	unsigned int events = POLL_HUP;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (data)
-		events = atomic_xchg(&data->poll, 0);
-	rcu_read_unlock();
-
-	poll_wait(file, &counter->waitq, wait);
-
-	return events;
-}
-
-static void perf_counter_reset(struct perf_counter *counter)
-{
-	(void)perf_counter_read(counter);
-	atomic64_set(&counter->count, 0);
-	perf_counter_update_userpage(counter);
-}
-
-/*
- * Holding the top-level counter's child_mutex means that any
- * descendant process that has inherited this counter will block
- * in sync_child_counter if it goes to exit, thus satisfying the
- * task existence requirements of perf_counter_enable/disable.
- */
-static void perf_counter_for_each_child(struct perf_counter *counter,
-					void (*func)(struct perf_counter *))
-{
-	struct perf_counter *child;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->child_mutex);
-	func(counter);
-	list_for_each_entry(child, &counter->child_list, child_list)
-		func(child);
-	mutex_unlock(&counter->child_mutex);
-}
-
-static void perf_counter_for_each(struct perf_counter *counter,
-				  void (*func)(struct perf_counter *))
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	struct perf_counter *sibling;
-
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	counter = counter->group_leader;
-
-	perf_counter_for_each_child(counter, func);
-	func(counter);
-	list_for_each_entry(sibling, &counter->sibling_list, group_entry)
-		perf_counter_for_each_child(counter, func);
-	mutex_unlock(&ctx->mutex);
-}
-
-static int perf_counter_period(struct perf_counter *counter, u64 __user *arg)
-{
-	struct perf_counter_context *ctx = counter->ctx;
-	unsigned long size;
-	int ret = 0;
-	u64 value;
-
-	if (!counter->attr.sample_period)
-		return -EINVAL;
-
-	size = copy_from_user(&value, arg, sizeof(value));
-	if (size != sizeof(value))
-		return -EFAULT;
-
-	if (!value)
-		return -EINVAL;
-
-	spin_lock_irq(&ctx->lock);
-	if (counter->attr.freq) {
-		if (value > sysctl_perf_counter_sample_rate) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-
-		counter->attr.sample_freq = value;
-	} else {
-		counter->attr.sample_period = value;
-		counter->hw.sample_period = value;
-	}
-unlock:
-	spin_unlock_irq(&ctx->lock);
-
-	return ret;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd);
-
-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
-	struct perf_counter *counter = file->private_data;
-	void (*func)(struct perf_counter *);
-	u32 flags = arg;
-
-	switch (cmd) {
-	case PERF_COUNTER_IOC_ENABLE:
-		func = perf_counter_enable;
-		break;
-	case PERF_COUNTER_IOC_DISABLE:
-		func = perf_counter_disable;
-		break;
-	case PERF_COUNTER_IOC_RESET:
-		func = perf_counter_reset;
-		break;
-
-	case PERF_COUNTER_IOC_REFRESH:
-		return perf_counter_refresh(counter, arg);
-
-	case PERF_COUNTER_IOC_PERIOD:
-		return perf_counter_period(counter, (u64 __user *)arg);
-
-	case PERF_COUNTER_IOC_SET_OUTPUT:
-		return perf_counter_set_output(counter, arg);
-
-	default:
-		return -ENOTTY;
-	}
-
-	if (flags & PERF_IOC_FLAG_GROUP)
-		perf_counter_for_each(counter, func);
-	else
-		perf_counter_for_each_child(counter, func);
-
-	return 0;
-}
-
-int perf_counter_task_enable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_enable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-int perf_counter_task_disable(void)
-{
-	struct perf_counter *counter;
-
-	mutex_lock(&current->perf_counter_mutex);
-	list_for_each_entry(counter, &current->perf_counter_list, owner_entry)
-		perf_counter_for_each_child(counter, perf_counter_disable);
-	mutex_unlock(&current->perf_counter_mutex);
-
-	return 0;
-}
-
-#ifndef PERF_COUNTER_INDEX_OFFSET
-# define PERF_COUNTER_INDEX_OFFSET 0
-#endif
-
-static int perf_counter_index(struct perf_counter *counter)
-{
-	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
-		return 0;
-
-	return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
-}
-
-/*
- * Callers need to ensure there can be no nesting of this function, otherwise
- * the seqlock logic goes bad. We can not serialize this because the arch
- * code calls this from NMI context.
- */
-void perf_counter_update_userpage(struct perf_counter *counter)
-{
-	struct perf_counter_mmap_page *userpg;
-	struct perf_mmap_data *data;
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	userpg = data->user_page;
-
-	/*
-	 * Disable preemption so as to not let the corresponding user-space
-	 * spin too long if we get preempted.
-	 */
-	preempt_disable();
-	++userpg->lock;
-	barrier();
-	userpg->index = perf_counter_index(counter);
-	userpg->offset = atomic64_read(&counter->count);
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		userpg->offset -= atomic64_read(&counter->hw.prev_count);
-
-	userpg->time_enabled = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-
-	userpg->time_running = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-
-	barrier();
-	++userpg->lock;
-	preempt_enable();
-unlock:
-	rcu_read_unlock();
-}
-
-static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-	struct perf_mmap_data *data;
-	int ret = VM_FAULT_SIGBUS;
-
-	if (vmf->flags & FAULT_FLAG_MKWRITE) {
-		if (vmf->pgoff == 0)
-			ret = 0;
-		return ret;
-	}
-
-	rcu_read_lock();
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto unlock;
-
-	if (vmf->pgoff == 0) {
-		vmf->page = virt_to_page(data->user_page);
-	} else {
-		int nr = vmf->pgoff - 1;
-
-		if ((unsigned)nr > data->nr_pages)
-			goto unlock;
-
-		if (vmf->flags & FAULT_FLAG_WRITE)
-			goto unlock;
-
-		vmf->page = virt_to_page(data->data_pages[nr]);
-	}
-
-	get_page(vmf->page);
-	vmf->page->mapping = vma->vm_file->f_mapping;
-	vmf->page->index   = vmf->pgoff;
-
-	ret = 0;
-unlock:
-	rcu_read_unlock();
-
-	return ret;
-}
-
-static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
-{
-	struct perf_mmap_data *data;
-	unsigned long size;
-	int i;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	size = sizeof(struct perf_mmap_data);
-	size += nr_pages * sizeof(void *);
-
-	data = kzalloc(size, GFP_KERNEL);
-	if (!data)
-		goto fail;
-
-	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!data->user_page)
-		goto fail_user_page;
-
-	for (i = 0; i < nr_pages; i++) {
-		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
-		if (!data->data_pages[i])
-			goto fail_data_pages;
-	}
-
-	data->nr_pages = nr_pages;
-	atomic_set(&data->lock, -1);
-
-	if (counter->attr.watermark) {
-		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
-				      counter->attr.wakeup_watermark);
-	}
-	if (!data->watermark)
-		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
-
-	rcu_assign_pointer(counter->data, data);
-
-	return 0;
-
-fail_data_pages:
-	for (i--; i >= 0; i--)
-		free_page((unsigned long)data->data_pages[i]);
-
-	free_page((unsigned long)data->user_page);
-
-fail_user_page:
-	kfree(data);
-
-fail:
-	return -ENOMEM;
-}
-
-static void perf_mmap_free_page(unsigned long addr)
-{
-	struct page *page = virt_to_page((void *)addr);
-
-	page->mapping = NULL;
-	__free_page(page);
-}
-
-static void __perf_mmap_data_free(struct rcu_head *rcu_head)
-{
-	struct perf_mmap_data *data;
-	int i;
-
-	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
-
-	perf_mmap_free_page((unsigned long)data->user_page);
-	for (i = 0; i < data->nr_pages; i++)
-		perf_mmap_free_page((unsigned long)data->data_pages[i]);
-
-	kfree(data);
-}
-
-static void perf_mmap_data_free(struct perf_counter *counter)
-{
-	struct perf_mmap_data *data = counter->data;
-
-	WARN_ON(atomic_read(&counter->mmap_count));
-
-	rcu_assign_pointer(counter->data, NULL);
-	call_rcu(&data->rcu_head, __perf_mmap_data_free);
-}
-
-static void perf_mmap_open(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	atomic_inc(&counter->mmap_count);
-}
-
-static void perf_mmap_close(struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = vma->vm_file->private_data;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) {
-		struct user_struct *user = current_user();
-
-		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
-		vma->vm_mm->locked_vm -= counter->data->nr_locked;
-		perf_mmap_data_free(counter);
-		mutex_unlock(&counter->mmap_mutex);
-	}
-}
-
-static struct vm_operations_struct perf_mmap_vmops = {
-	.open		= perf_mmap_open,
-	.close		= perf_mmap_close,
-	.fault		= perf_mmap_fault,
-	.page_mkwrite	= perf_mmap_fault,
-};
-
-static int perf_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct perf_counter *counter = file->private_data;
-	unsigned long user_locked, user_lock_limit;
-	struct user_struct *user = current_user();
-	unsigned long locked, lock_limit;
-	unsigned long vma_size;
-	unsigned long nr_pages;
-	long user_extra, extra;
-	int ret = 0;
-
-	if (!(vma->vm_flags & VM_SHARED))
-		return -EINVAL;
-
-	vma_size = vma->vm_end - vma->vm_start;
-	nr_pages = (vma_size / PAGE_SIZE) - 1;
-
-	/*
-	 * If we have data pages ensure they're a power-of-two number, so we
-	 * can do bitmasks instead of modulo.
-	 */
-	if (nr_pages != 0 && !is_power_of_2(nr_pages))
-		return -EINVAL;
-
-	if (vma_size != PAGE_SIZE * (1 + nr_pages))
-		return -EINVAL;
-
-	if (vma->vm_pgoff != 0)
-		return -EINVAL;
-
-	WARN_ON_ONCE(counter->ctx->parent_ctx);
-	mutex_lock(&counter->mmap_mutex);
-	if (counter->output) {
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	if (atomic_inc_not_zero(&counter->mmap_count)) {
-		if (nr_pages != counter->data->nr_pages)
-			ret = -EINVAL;
-		goto unlock;
-	}
-
-	user_extra = nr_pages + 1;
-	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-
-	/*
-	 * Increase the limit linearly with more CPUs:
-	 */
-	user_lock_limit *= num_online_cpus();
-
-	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
-
-	extra = 0;
-	if (user_locked > user_lock_limit)
-		extra = user_locked - user_lock_limit;
-
-	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
-	lock_limit >>= PAGE_SHIFT;
-	locked = vma->vm_mm->locked_vm + extra;
-
-	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
-		!capable(CAP_IPC_LOCK)) {
-		ret = -EPERM;
-		goto unlock;
-	}
-
-	WARN_ON(counter->data);
-	ret = perf_mmap_data_alloc(counter, nr_pages);
-	if (ret)
-		goto unlock;
-
-	atomic_set(&counter->mmap_count, 1);
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->locked_vm += extra;
-	counter->data->nr_locked = extra;
-	if (vma->vm_flags & VM_WRITE)
-		counter->data->writable = 1;
-
-unlock:
-	mutex_unlock(&counter->mmap_mutex);
-
-	vma->vm_flags |= VM_RESERVED;
-	vma->vm_ops = &perf_mmap_vmops;
-
-	return ret;
-}
-
-static int perf_fasync(int fd, struct file *filp, int on)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	struct perf_counter *counter = filp->private_data;
-	int retval;
-
-	mutex_lock(&inode->i_mutex);
-	retval = fasync_helper(fd, filp, on, &counter->fasync);
-	mutex_unlock(&inode->i_mutex);
-
-	if (retval < 0)
-		return retval;
-
-	return 0;
-}
-
-static const struct file_operations perf_fops = {
-	.release		= perf_release,
-	.read			= perf_read,
-	.poll			= perf_poll,
-	.unlocked_ioctl		= perf_ioctl,
-	.compat_ioctl		= perf_ioctl,
-	.mmap			= perf_mmap,
-	.fasync			= perf_fasync,
-};
-
-/*
- * Perf counter wakeup
- *
- * If there's data, ensure we set the poll() state and publish everything
- * to user-space before waking everybody up.
- */
-
-void perf_counter_wakeup(struct perf_counter *counter)
-{
-	wake_up_all(&counter->waitq);
-
-	if (counter->pending_kill) {
-		kill_fasync(&counter->fasync, SIGIO, counter->pending_kill);
-		counter->pending_kill = 0;
-	}
-}
-
-/*
- * Pending wakeups
- *
- * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
- *
- * The NMI bit means we cannot possibly take locks. Therefore, maintain a
- * single linked list and use cmpxchg() to add entries lockless.
- */
-
-static void perf_pending_counter(struct perf_pending_entry *entry)
-{
-	struct perf_counter *counter = container_of(entry,
-			struct perf_counter, pending);
-
-	if (counter->pending_disable) {
-		counter->pending_disable = 0;
-		__perf_counter_disable(counter);
-	}
-
-	if (counter->pending_wakeup) {
-		counter->pending_wakeup = 0;
-		perf_counter_wakeup(counter);
-	}
-}
-
-#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
-
-static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
-	PENDING_TAIL,
-};
-
-static void perf_pending_queue(struct perf_pending_entry *entry,
-			       void (*func)(struct perf_pending_entry *))
-{
-	struct perf_pending_entry **head;
-
-	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
-		return;
-
-	entry->func = func;
-
-	head = &get_cpu_var(perf_pending_head);
-
-	do {
-		entry->next = *head;
-	} while (cmpxchg(head, entry->next, entry) != entry->next);
-
-	set_perf_counter_pending();
-
-	put_cpu_var(perf_pending_head);
-}
-
-static int __perf_pending_run(void)
-{
-	struct perf_pending_entry *list;
-	int nr = 0;
-
-	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
-	while (list != PENDING_TAIL) {
-		void (*func)(struct perf_pending_entry *);
-		struct perf_pending_entry *entry = list;
-
-		list = list->next;
-
-		func = entry->func;
-		entry->next = NULL;
-		/*
-		 * Ensure we observe the unqueue before we issue the wakeup,
-		 * so that we won't be waiting forever.
-		 * -- see perf_not_pending().
-		 */
-		smp_wmb();
-
-		func(entry);
-		nr++;
-	}
-
-	return nr;
-}
-
-static inline int perf_not_pending(struct perf_counter *counter)
-{
-	/*
-	 * If we flush on whatever cpu we run, there is a chance we don't
-	 * need to wait.
-	 */
-	get_cpu();
-	__perf_pending_run();
-	put_cpu();
-
-	/*
-	 * Ensure we see the proper queue state before going to sleep
-	 * so that we do not miss the wakeup. -- see perf_pending_handle()
-	 */
-	smp_rmb();
-	return counter->pending.next == NULL;
-}
-
-static void perf_pending_sync(struct perf_counter *counter)
-{
-	wait_event(counter->waitq, perf_not_pending(counter));
-}
-
-void perf_counter_do_pending(void)
-{
-	__perf_pending_run();
-}
-
-/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-	return NULL;
-}
-
-/*
- * Output
- */
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
-			      unsigned long offset, unsigned long head)
-{
-	unsigned long mask;
-
-	if (!data->writable)
-		return true;
-
-	mask = (data->nr_pages << PAGE_SHIFT) - 1;
-
-	offset = (offset - tail) & mask;
-	head   = (head   - tail) & mask;
-
-	if ((int)(head - offset) < 0)
-		return false;
-
-	return true;
-}
-
-static void perf_output_wakeup(struct perf_output_handle *handle)
-{
-	atomic_set(&handle->data->poll, POLL_IN);
-
-	if (handle->nmi) {
-		handle->counter->pending_wakeup = 1;
-		perf_pending_queue(&handle->counter->pending,
-				   perf_pending_counter);
-	} else
-		perf_counter_wakeup(handle->counter);
-}
-
-/*
- * Curious locking construct.
- *
- * We need to ensure a later event doesn't publish a head when a former
- * event isn't done writing. However since we need to deal with NMIs we
- * cannot fully serialize things.
- *
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
- * We only publish the head (and generate a wakeup) when the outer-most
- * event completes.
- */
-static void perf_output_lock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	int cpu;
-
-	handle->locked = 0;
-
-	local_irq_save(handle->flags);
-	cpu = smp_processor_id();
-
-	if (in_nmi() && atomic_read(&data->lock) == cpu)
-		return;
-
-	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-		cpu_relax();
-
-	handle->locked = 1;
-}
-
-static void perf_output_unlock(struct perf_output_handle *handle)
-{
-	struct perf_mmap_data *data = handle->data;
-	unsigned long head;
-	int cpu;
-
-	data->done_head = data->head;
-
-	if (!handle->locked)
-		goto out;
-
-again:
-	/*
-	 * The xchg implies a full barrier that ensures all writes are done
-	 * before we publish the new head, matched by a rmb() in userspace when
-	 * reading this position.
-	 */
-	while ((head = atomic_long_xchg(&data->done_head, 0)))
-		data->user_page->data_head = head;
-
-	/*
-	 * NMI can happen here, which means we can miss a done_head update.
-	 */
-
-	cpu = atomic_xchg(&data->lock, -1);
-	WARN_ON_ONCE(cpu != smp_processor_id());
-
-	/*
-	 * Therefore we have to validate we did not indeed do so.
-	 */
-	if (unlikely(atomic_long_read(&data->done_head))) {
-		/*
-		 * Since we had it locked, we can lock it again.
-		 */
-		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
-			cpu_relax();
-
-		goto again;
-	}
-
-	if (atomic_xchg(&data->wakeup, 0))
-		perf_output_wakeup(handle);
-out:
-	local_irq_restore(handle->flags);
-}
-
-void perf_output_copy(struct perf_output_handle *handle,
-		      const void *buf, unsigned int len)
-{
-	unsigned int pages_mask;
-	unsigned int offset;
-	unsigned int size;
-	void **pages;
-
-	offset		= handle->offset;
-	pages_mask	= handle->data->nr_pages - 1;
-	pages		= handle->data->data_pages;
-
-	do {
-		unsigned int page_offset;
-		int nr;
-
-		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
-		page_offset = offset & (PAGE_SIZE - 1);
-		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
-
-		memcpy(pages[nr] + page_offset, buf, size);
-
-		len	    -= size;
-		buf	    += size;
-		offset	    += size;
-	} while (len);
-
-	handle->offset = offset;
-
-	/*
-	 * Check we didn't copy past our reservation window, taking the
-	 * possible unsigned int wrap into account.
-	 */
-	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
-}
-
-int perf_output_begin(struct perf_output_handle *handle,
-		      struct perf_counter *counter, unsigned int size,
-		      int nmi, int sample)
-{
-	struct perf_counter *output_counter;
-	struct perf_mmap_data *data;
-	unsigned long tail, offset, head;
-	int have_lost;
-	struct {
-		struct perf_event_header header;
-		u64			 id;
-		u64			 lost;
-	} lost_event;
-
-	rcu_read_lock();
-	/*
-	 * For inherited counters we send all the output towards the parent.
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	output_counter = rcu_dereference(counter->output);
-	if (output_counter)
-		counter = output_counter;
-
-	data = rcu_dereference(counter->data);
-	if (!data)
-		goto out;
-
-	handle->data	= data;
-	handle->counter	= counter;
-	handle->nmi	= nmi;
-	handle->sample	= sample;
-
-	if (!data->nr_pages)
-		goto fail;
-
-	have_lost = atomic_read(&data->lost);
-	if (have_lost)
-		size += sizeof(lost_event);
-
-	perf_output_lock(handle);
-
-	do {
-		/*
-		 * Userspace could choose to issue a mb() before updating the
-		 * tail pointer. So that all reads will be completed before the
-		 * write is issued.
-		 */
-		tail = ACCESS_ONCE(data->user_page->data_tail);
-		smp_rmb();
-		offset = head = atomic_long_read(&data->head);
-		head += size;
-		if (unlikely(!perf_output_space(data, tail, offset, head)))
-			goto fail;
-	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
-
-	handle->offset	= offset;
-	handle->head	= head;
-
-	if (head - tail > data->watermark)
-		atomic_set(&data->wakeup, 1);
-
-	if (have_lost) {
-		lost_event.header.type = PERF_EVENT_LOST;
-		lost_event.header.misc = 0;
-		lost_event.header.size = sizeof(lost_event);
-		lost_event.id          = counter->id;
-		lost_event.lost        = atomic_xchg(&data->lost, 0);
-
-		perf_output_put(handle, lost_event);
-	}
-
-	return 0;
-
-fail:
-	atomic_inc(&data->lost);
-	perf_output_unlock(handle);
-out:
-	rcu_read_unlock();
-
-	return -ENOSPC;
-}
-
-void perf_output_end(struct perf_output_handle *handle)
-{
-	struct perf_counter *counter = handle->counter;
-	struct perf_mmap_data *data = handle->data;
-
-	int wakeup_events = counter->attr.wakeup_events;
-
-	if (handle->sample && wakeup_events) {
-		int events = atomic_inc_return(&data->events);
-		if (events >= wakeup_events) {
-			atomic_sub(wakeup_events, &data->events);
-			atomic_set(&data->wakeup, 1);
-		}
-	}
-
-	perf_output_unlock(handle);
-	rcu_read_unlock();
-}
-
-static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_tgid_nr_ns(p, counter->ns);
-}
-
-static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
-{
-	/*
-	 * only top level counters have the pid namespace they were created in
-	 */
-	if (counter->parent)
-		counter = counter->parent;
-
-	return task_pid_nr_ns(p, counter->ns);
-}
-
-static void perf_output_read_one(struct perf_output_handle *handle,
-				 struct perf_counter *counter)
-{
-	u64 read_format = counter->attr.read_format;
-	u64 values[4];
-	int n = 0;
-
-	values[n++] = atomic64_read(&counter->count);
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
-		values[n++] = counter->total_time_enabled +
-			atomic64_read(&counter->child_total_time_enabled);
-	}
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
-		values[n++] = counter->total_time_running +
-			atomic64_read(&counter->child_total_time_running);
-	}
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(counter);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-}
-
-/*
- * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
- */
-static void perf_output_read_group(struct perf_output_handle *handle,
-			    struct perf_counter *counter)
-{
-	struct perf_counter *leader = counter->group_leader, *sub;
-	u64 read_format = counter->attr.read_format;
-	u64 values[5];
-	int n = 0;
-
-	values[n++] = 1 + leader->nr_siblings;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
-		values[n++] = leader->total_time_enabled;
-
-	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
-		values[n++] = leader->total_time_running;
-
-	if (leader != counter)
-		leader->pmu->read(leader);
-
-	values[n++] = atomic64_read(&leader->count);
-	if (read_format & PERF_FORMAT_ID)
-		values[n++] = primary_counter_id(leader);
-
-	perf_output_copy(handle, values, n * sizeof(u64));
-
-	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
-		n = 0;
-
-		if (sub != counter)
-			sub->pmu->read(sub);
-
-		values[n++] = atomic64_read(&sub->count);
-		if (read_format & PERF_FORMAT_ID)
-			values[n++] = primary_counter_id(sub);
-
-		perf_output_copy(handle, values, n * sizeof(u64));
-	}
-}
-
-static void perf_output_read(struct perf_output_handle *handle,
-			     struct perf_counter *counter)
-{
-	if (counter->attr.read_format & PERF_FORMAT_GROUP)
-		perf_output_read_group(handle, counter);
-	else
-		perf_output_read_one(handle, counter);
-}
-
-void perf_output_sample(struct perf_output_handle *handle,
-			struct perf_event_header *header,
-			struct perf_sample_data *data,
-			struct perf_counter *counter)
-{
-	u64 sample_type = data->type;
-
-	perf_output_put(handle, *header);
-
-	if (sample_type & PERF_SAMPLE_IP)
-		perf_output_put(handle, data->ip);
-
-	if (sample_type & PERF_SAMPLE_TID)
-		perf_output_put(handle, data->tid_entry);
-
-	if (sample_type & PERF_SAMPLE_TIME)
-		perf_output_put(handle, data->time);
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		perf_output_put(handle, data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID)
-		perf_output_put(handle, data->id);
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID)
-		perf_output_put(handle, data->stream_id);
-
-	if (sample_type & PERF_SAMPLE_CPU)
-		perf_output_put(handle, data->cpu_entry);
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		perf_output_put(handle, data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		perf_output_read(handle, counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		if (data->callchain) {
-			int size = 1;
-
-			if (data->callchain)
-				size += data->callchain->nr;
-
-			size *= sizeof(u64);
-
-			perf_output_copy(handle, data->callchain, size);
-		} else {
-			u64 nr = 0;
-			perf_output_put(handle, nr);
-		}
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		if (data->raw) {
-			perf_output_put(handle, data->raw->size);
-			perf_output_copy(handle, data->raw->data,
-					 data->raw->size);
-		} else {
-			struct {
-				u32	size;
-				u32	data;
-			} raw = {
-				.size = sizeof(u32),
-				.data = 0,
-			};
-			perf_output_put(handle, raw);
-		}
-	}
-}
-
-void perf_prepare_sample(struct perf_event_header *header,
-			 struct perf_sample_data *data,
-			 struct perf_counter *counter,
-			 struct pt_regs *regs)
-{
-	u64 sample_type = counter->attr.sample_type;
-
-	data->type = sample_type;
-
-	header->type = PERF_EVENT_SAMPLE;
-	header->size = sizeof(*header);
-
-	header->misc = 0;
-	header->misc |= perf_misc_flags(regs);
-
-	if (sample_type & PERF_SAMPLE_IP) {
-		data->ip = perf_instruction_pointer(regs);
-
-		header->size += sizeof(data->ip);
-	}
-
-	if (sample_type & PERF_SAMPLE_TID) {
-		/* namespace issues */
-		data->tid_entry.pid = perf_counter_pid(counter, current);
-		data->tid_entry.tid = perf_counter_tid(counter, current);
-
-		header->size += sizeof(data->tid_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_TIME) {
-		data->time = perf_clock();
-
-		header->size += sizeof(data->time);
-	}
-
-	if (sample_type & PERF_SAMPLE_ADDR)
-		header->size += sizeof(data->addr);
-
-	if (sample_type & PERF_SAMPLE_ID) {
-		data->id = primary_counter_id(counter);
-
-		header->size += sizeof(data->id);
-	}
-
-	if (sample_type & PERF_SAMPLE_STREAM_ID) {
-		data->stream_id = counter->id;
-
-		header->size += sizeof(data->stream_id);
-	}
-
-	if (sample_type & PERF_SAMPLE_CPU) {
-		data->cpu_entry.cpu		= raw_smp_processor_id();
-		data->cpu_entry.reserved	= 0;
-
-		header->size += sizeof(data->cpu_entry);
-	}
-
-	if (sample_type & PERF_SAMPLE_PERIOD)
-		header->size += sizeof(data->period);
-
-	if (sample_type & PERF_SAMPLE_READ)
-		header->size += perf_counter_read_size(counter);
-
-	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
-		int size = 1;
-
-		data->callchain = perf_callchain(regs);
-
-		if (data->callchain)
-			size += data->callchain->nr;
-
-		header->size += size * sizeof(u64);
-	}
-
-	if (sample_type & PERF_SAMPLE_RAW) {
-		int size = sizeof(u32);
-
-		if (data->raw)
-			size += data->raw->size;
-		else
-			size += sizeof(u32);
-
-		WARN_ON_ONCE(size & (sizeof(u64)-1));
-		header->size += size;
-	}
-}
-
-static void perf_counter_output(struct perf_counter *counter, int nmi,
-				struct perf_sample_data *data,
-				struct pt_regs *regs)
-{
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-
-	perf_prepare_sample(&header, data, counter, regs);
-
-	if (perf_output_begin(&handle, counter, header.size, nmi, 1))
-		return;
-
-	perf_output_sample(&handle, &header, data, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * read event
- */
-
-struct perf_read_event {
-	struct perf_event_header	header;
-
-	u32				pid;
-	u32				tid;
-};
-
-static void
-perf_counter_read_event(struct perf_counter *counter,
-			struct task_struct *task)
-{
-	struct perf_output_handle handle;
-	struct perf_read_event read_event = {
-		.header = {
-			.type = PERF_EVENT_READ,
-			.misc = 0,
-			.size = sizeof(read_event) + perf_counter_read_size(counter),
-		},
-		.pid = perf_counter_pid(counter, task),
-		.tid = perf_counter_tid(counter, task),
-	};
-	int ret;
-
-	ret = perf_output_begin(&handle, counter, read_event.header.size, 0, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, read_event);
-	perf_output_read(&handle, counter);
-
-	perf_output_end(&handle);
-}
-
-/*
- * task tracking -- fork/exit
- *
- * enabled by: attr.comm | attr.mmap | attr.task
- */
-
-struct perf_task_event {
-	struct task_struct		*task;
-	struct perf_counter_context	*task_ctx;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				ppid;
-		u32				tid;
-		u32				ptid;
-		u64				time;
-	} event;
-};
-
-static void perf_counter_task_output(struct perf_counter *counter,
-				     struct perf_task_event *task_event)
-{
-	struct perf_output_handle handle;
-	int size;
-	struct task_struct *task = task_event->task;
-	int ret;
-
-	size  = task_event->event.header.size;
-	ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	task_event->event.pid = perf_counter_pid(counter, task);
-	task_event->event.ppid = perf_counter_pid(counter, current);
-
-	task_event->event.tid = perf_counter_tid(counter, task);
-	task_event->event.ptid = perf_counter_tid(counter, current);
-
-	task_event->event.time = perf_clock();
-
-	perf_output_put(&handle, task_event->event);
-
-	perf_output_end(&handle);
-}
-
-static int perf_counter_task_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm || counter->attr.mmap || counter->attr.task)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_task_ctx(struct perf_counter_context *ctx,
-				  struct perf_task_event *task_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_task_match(counter))
-			perf_counter_task_output(counter, task_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_task_event(struct perf_task_event *task_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx = task_event->task_ctx;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_task_ctx(&cpuctx->ctx, task_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	if (!ctx)
-		ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_task_ctx(ctx, task_event);
-	rcu_read_unlock();
-}
-
-static void perf_counter_task(struct task_struct *task,
-			      struct perf_counter_context *task_ctx,
-			      int new)
-{
-	struct perf_task_event task_event;
-
-	if (!atomic_read(&nr_comm_counters) &&
-	    !atomic_read(&nr_mmap_counters) &&
-	    !atomic_read(&nr_task_counters))
-		return;
-
-	task_event = (struct perf_task_event){
-		.task	  = task,
-		.task_ctx = task_ctx,
-		.event    = {
-			.header = {
-				.type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
-				.misc = 0,
-				.size = sizeof(task_event.event),
-			},
-			/* .pid  */
-			/* .ppid */
-			/* .tid  */
-			/* .ptid */
-		},
-	};
-
-	perf_counter_task_event(&task_event);
-}
-
-void perf_counter_fork(struct task_struct *task)
-{
-	perf_counter_task(task, NULL, 1);
-}
-
-/*
- * comm tracking
- */
-
-struct perf_comm_event {
-	struct task_struct	*task;
-	char			*comm;
-	int			comm_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-	} event;
-};
-
-static void perf_counter_comm_output(struct perf_counter *counter,
-				     struct perf_comm_event *comm_event)
-{
-	struct perf_output_handle handle;
-	int size = comm_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	comm_event->event.pid = perf_counter_pid(counter, comm_event->task);
-	comm_event->event.tid = perf_counter_tid(counter, comm_event->task);
-
-	perf_output_put(&handle, comm_event->event);
-	perf_output_copy(&handle, comm_event->comm,
-				   comm_event->comm_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_comm_match(struct perf_counter *counter)
-{
-	if (counter->attr.comm)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_comm_ctx(struct perf_counter_context *ctx,
-				  struct perf_comm_event *comm_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_comm_match(counter))
-			perf_counter_comm_output(counter, comm_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_comm_event(struct perf_comm_event *comm_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	unsigned int size;
-	char comm[TASK_COMM_LEN];
-
-	memset(comm, 0, sizeof(comm));
-	strncpy(comm, comm_event->task->comm, sizeof(comm));
-	size = ALIGN(strlen(comm)+1, sizeof(u64));
-
-	comm_event->comm = comm;
-	comm_event->comm_size = size;
-
-	comm_event->event.header.size = sizeof(comm_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_comm_ctx(&cpuctx->ctx, comm_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_comm_ctx(ctx, comm_event);
-	rcu_read_unlock();
-}
-
-void perf_counter_comm(struct task_struct *task)
-{
-	struct perf_comm_event comm_event;
-
-	if (task->perf_counter_ctxp)
-		perf_counter_enable_on_exec(task);
-
-	if (!atomic_read(&nr_comm_counters))
-		return;
-
-	comm_event = (struct perf_comm_event){
-		.task	= task,
-		/* .comm      */
-		/* .comm_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_COMM,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-		},
-	};
-
-	perf_counter_comm_event(&comm_event);
-}
-
-/*
- * mmap tracking
- */
-
-struct perf_mmap_event {
-	struct vm_area_struct	*vma;
-
-	const char		*file_name;
-	int			file_size;
-
-	struct {
-		struct perf_event_header	header;
-
-		u32				pid;
-		u32				tid;
-		u64				start;
-		u64				len;
-		u64				pgoff;
-	} event;
-};
-
-static void perf_counter_mmap_output(struct perf_counter *counter,
-				     struct perf_mmap_event *mmap_event)
-{
-	struct perf_output_handle handle;
-	int size = mmap_event->event.header.size;
-	int ret = perf_output_begin(&handle, counter, size, 0, 0);
-
-	if (ret)
-		return;
-
-	mmap_event->event.pid = perf_counter_pid(counter, current);
-	mmap_event->event.tid = perf_counter_tid(counter, current);
-
-	perf_output_put(&handle, mmap_event->event);
-	perf_output_copy(&handle, mmap_event->file_name,
-				   mmap_event->file_size);
-	perf_output_end(&handle);
-}
-
-static int perf_counter_mmap_match(struct perf_counter *counter,
-				   struct perf_mmap_event *mmap_event)
-{
-	if (counter->attr.mmap)
-		return 1;
-
-	return 0;
-}
-
-static void perf_counter_mmap_ctx(struct perf_counter_context *ctx,
-				  struct perf_mmap_event *mmap_event)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_counter_mmap_match(counter, mmap_event))
-			perf_counter_mmap_output(counter, mmap_event);
-	}
-	rcu_read_unlock();
-}
-
-static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_counter_context *ctx;
-	struct vm_area_struct *vma = mmap_event->vma;
-	struct file *file = vma->vm_file;
-	unsigned int size;
-	char tmp[16];
-	char *buf = NULL;
-	const char *name;
-
-	memset(tmp, 0, sizeof(tmp));
-
-	if (file) {
-		/*
-		 * d_path works from the end of the buffer backwards, so we
-		 * need to add enough zero bytes after the string to handle
-		 * the 64bit alignment we do later.
-		 */
-		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
-		if (!buf) {
-			name = strncpy(tmp, "//enomem", sizeof(tmp));
-			goto got_name;
-		}
-		name = d_path(&file->f_path, buf, PATH_MAX);
-		if (IS_ERR(name)) {
-			name = strncpy(tmp, "//toolong", sizeof(tmp));
-			goto got_name;
-		}
-	} else {
-		if (arch_vma_name(mmap_event->vma)) {
-			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
-				       sizeof(tmp));
-			goto got_name;
-		}
-
-		if (!vma->vm_mm) {
-			name = strncpy(tmp, "[vdso]", sizeof(tmp));
-			goto got_name;
-		}
-
-		name = strncpy(tmp, "//anon", sizeof(tmp));
-		goto got_name;
-	}
-
-got_name:
-	size = ALIGN(strlen(name)+1, sizeof(u64));
-
-	mmap_event->file_name = name;
-	mmap_event->file_size = size;
-
-	mmap_event->event.header.size = sizeof(mmap_event->event) + size;
-
-	cpuctx = &get_cpu_var(perf_cpu_context);
-	perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event);
-	put_cpu_var(perf_cpu_context);
-
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_counter_mmap_ctx(ctx, mmap_event);
-	rcu_read_unlock();
-
-	kfree(buf);
-}
-
-void __perf_counter_mmap(struct vm_area_struct *vma)
-{
-	struct perf_mmap_event mmap_event;
-
-	if (!atomic_read(&nr_mmap_counters))
-		return;
-
-	mmap_event = (struct perf_mmap_event){
-		.vma	= vma,
-		/* .file_name */
-		/* .file_size */
-		.event  = {
-			.header = {
-				.type = PERF_EVENT_MMAP,
-				.misc = 0,
-				/* .size */
-			},
-			/* .pid */
-			/* .tid */
-			.start  = vma->vm_start,
-			.len    = vma->vm_end - vma->vm_start,
-			.pgoff  = vma->vm_pgoff,
-		},
-	};
-
-	perf_counter_mmap_event(&mmap_event);
-}
-
-/*
- * IRQ throttle logging
- */
-
-static void perf_log_throttle(struct perf_counter *counter, int enable)
-{
-	struct perf_output_handle handle;
-	int ret;
-
-	struct {
-		struct perf_event_header	header;
-		u64				time;
-		u64				id;
-		u64				stream_id;
-	} throttle_event = {
-		.header = {
-			.type = PERF_EVENT_THROTTLE,
-			.misc = 0,
-			.size = sizeof(throttle_event),
-		},
-		.time		= perf_clock(),
-		.id		= primary_counter_id(counter),
-		.stream_id	= counter->id,
-	};
-
-	if (enable)
-		throttle_event.header.type = PERF_EVENT_UNTHROTTLE;
-
-	ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0);
-	if (ret)
-		return;
-
-	perf_output_put(&handle, throttle_event);
-	perf_output_end(&handle);
-}
-
-/*
- * Generic counter overflow handling, sampling.
- */
-
-static int __perf_counter_overflow(struct perf_counter *counter, int nmi,
-				   int throttle, struct perf_sample_data *data,
-				   struct pt_regs *regs)
-{
-	int events = atomic_read(&counter->event_limit);
-	struct hw_perf_counter *hwc = &counter->hw;
-	int ret = 0;
-
-	throttle = (throttle && counter->pmu->unthrottle != NULL);
-
-	if (!throttle) {
-		hwc->interrupts++;
-	} else {
-		if (hwc->interrupts != MAX_INTERRUPTS) {
-			hwc->interrupts++;
-			if (HZ * hwc->interrupts >
-					(u64)sysctl_perf_counter_sample_rate) {
-				hwc->interrupts = MAX_INTERRUPTS;
-				perf_log_throttle(counter, 0);
-				ret = 1;
-			}
-		} else {
-			/*
-			 * Keep re-disabling counters even though on the previous
-			 * pass we disabled it - just in case we raced with a
-			 * sched-in and the counter got enabled again:
-			 */
-			ret = 1;
-		}
-	}
-
-	if (counter->attr.freq) {
-		u64 now = perf_clock();
-		s64 delta = now - hwc->freq_stamp;
-
-		hwc->freq_stamp = now;
-
-		if (delta > 0 && delta < TICK_NSEC)
-			perf_adjust_period(counter, NSEC_PER_SEC / (int)delta);
-	}
-
-	/*
-	 * XXX event_limit might not quite work as expected on inherited
-	 * counters
-	 */
-
-	counter->pending_kill = POLL_IN;
-	if (events && atomic_dec_and_test(&counter->event_limit)) {
-		ret = 1;
-		counter->pending_kill = POLL_HUP;
-		if (nmi) {
-			counter->pending_disable = 1;
-			perf_pending_queue(&counter->pending,
-					   perf_pending_counter);
-		} else
-			perf_counter_disable(counter);
-	}
-
-	perf_counter_output(counter, nmi, data, regs);
-	return ret;
-}
-
-int perf_counter_overflow(struct perf_counter *counter, int nmi,
-			  struct perf_sample_data *data,
-			  struct pt_regs *regs)
-{
-	return __perf_counter_overflow(counter, nmi, 1, data, regs);
-}
-
-/*
- * Generic software counter infrastructure
- */
-
-/*
- * We directly increment counter->count and keep a second value in
- * counter->hw.period_left to count intervals. This period counter
- * is kept in the range [-sample_period, 0] so that we can use the
- * sign as trigger.
- */
-
-static u64 perf_swcounter_set_period(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 period = hwc->last_period;
-	u64 nr, offset;
-	s64 old, val;
-
-	hwc->last_period = hwc->sample_period;
-
-again:
-	old = val = atomic64_read(&hwc->period_left);
-	if (val < 0)
-		return 0;
-
-	nr = div64_u64(period + val, period);
-	offset = nr * period;
-	val -= offset;
-	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
-		goto again;
-
-	return nr;
-}
-
-static void perf_swcounter_overflow(struct perf_counter *counter,
-				    int nmi, struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int throttle = 0;
-	u64 overflow;
-
-	data->period = counter->hw.last_period;
-	overflow = perf_swcounter_set_period(counter);
-
-	if (hwc->interrupts == MAX_INTERRUPTS)
-		return;
-
-	for (; overflow; overflow--) {
-		if (__perf_counter_overflow(counter, nmi, throttle,
-					    data, regs)) {
-			/*
-			 * We inhibit the overflow from happening when
-			 * hwc->interrupts == MAX_INTERRUPTS.
-			 */
-			break;
-		}
-		throttle = 1;
-	}
-}
-
-static void perf_swcounter_unthrottle(struct perf_counter *counter)
-{
-	/*
-	 * Nothing to do, we already reset hwc->interrupts.
-	 */
-}
-
-static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
-			       int nmi, struct perf_sample_data *data,
-			       struct pt_regs *regs)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	atomic64_add(nr, &counter->count);
-
-	if (!hwc->sample_period)
-		return;
-
-	if (!regs)
-		return;
-
-	if (!atomic64_add_negative(nr, &hwc->period_left))
-		perf_swcounter_overflow(counter, nmi, data, regs);
-}
-
-static int perf_swcounter_is_counting(struct perf_counter *counter)
-{
-	/*
-	 * The counter is active, we're good!
-	 */
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		return 1;
-
-	/*
-	 * The counter is off/error, not counting.
-	 */
-	if (counter->state != PERF_COUNTER_STATE_INACTIVE)
-		return 0;
-
-	/*
-	 * The counter is inactive, if the context is active
-	 * we're part of a group that didn't make it on the 'pmu',
-	 * not counting.
-	 */
-	if (counter->ctx->is_active)
-		return 0;
-
-	/*
-	 * We're inactive and the context is too, this means the
-	 * task is scheduled out, we're counting events that happen
-	 * to us, like migration events.
-	 */
-	return 1;
-}
-
-static int perf_swcounter_match(struct perf_counter *counter,
-				enum perf_type_id type,
-				u32 event_id, struct pt_regs *regs)
-{
-	if (!perf_swcounter_is_counting(counter))
-		return 0;
-
-	if (counter->attr.type != type)
-		return 0;
-	if (counter->attr.config != event_id)
-		return 0;
-
-	if (regs) {
-		if (counter->attr.exclude_user && user_mode(regs))
-			return 0;
-
-		if (counter->attr.exclude_kernel && !user_mode(regs))
-			return 0;
-	}
-
-	return 1;
-}
-
-static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
-				     enum perf_type_id type,
-				     u32 event_id, u64 nr, int nmi,
-				     struct perf_sample_data *data,
-				     struct pt_regs *regs)
-{
-	struct perf_counter *counter;
-
-	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
-		return;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
-		if (perf_swcounter_match(counter, type, event_id, regs))
-			perf_swcounter_add(counter, nr, nmi, data, regs);
-	}
-	rcu_read_unlock();
-}
-
-static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
-{
-	if (in_nmi())
-		return &cpuctx->recursion[3];
-
-	if (in_irq())
-		return &cpuctx->recursion[2];
-
-	if (in_softirq())
-		return &cpuctx->recursion[1];
-
-	return &cpuctx->recursion[0];
-}
-
-static void do_perf_swcounter_event(enum perf_type_id type, u32 event,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
-	int *recursion = perf_swcounter_recursion_context(cpuctx);
-	struct perf_counter_context *ctx;
-
-	if (*recursion)
-		goto out;
-
-	(*recursion)++;
-	barrier();
-
-	perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
-				 nr, nmi, data, regs);
-	rcu_read_lock();
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_counter_ctxp);
-	if (ctx)
-		perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data, regs);
-	rcu_read_unlock();
-
-	barrier();
-	(*recursion)--;
-
-out:
-	put_cpu_var(perf_cpu_context);
-}
-
-void __perf_swcounter_event(u32 event, u64 nr, int nmi,
-			    struct pt_regs *regs, u64 addr)
-{
-	struct perf_sample_data data = {
-		.addr = addr,
-	};
-
-	do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi,
-				&data, regs);
-}
-
-static void perf_swcounter_read(struct perf_counter *counter)
-{
-}
-
-static int perf_swcounter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-
-	if (hwc->sample_period) {
-		hwc->last_period = hwc->sample_period;
-		perf_swcounter_set_period(counter);
-	}
-	return 0;
-}
-
-static void perf_swcounter_disable(struct perf_counter *counter)
-{
-}
-
-static const struct pmu perf_ops_generic = {
-	.enable		= perf_swcounter_enable,
-	.disable	= perf_swcounter_disable,
-	.read		= perf_swcounter_read,
-	.unthrottle	= perf_swcounter_unthrottle,
-};
-
-/*
- * hrtimer based swcounter callback
- */
-
-static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
-{
-	enum hrtimer_restart ret = HRTIMER_RESTART;
-	struct perf_sample_data data;
-	struct pt_regs *regs;
-	struct perf_counter *counter;
-	u64 period;
-
-	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
-	counter->pmu->read(counter);
-
-	data.addr = 0;
-	regs = get_irq_regs();
-	/*
-	 * In case we exclude kernel IPs or are somehow not in interrupt
-	 * context, provide the next best thing, the user IP.
-	 */
-	if ((counter->attr.exclude_kernel || !regs) &&
-			!counter->attr.exclude_user)
-		regs = task_pt_regs(current);
-
-	if (regs) {
-		if (perf_counter_overflow(counter, 0, &data, regs))
-			ret = HRTIMER_NORESTART;
-	}
-
-	period = max_t(u64, 10000, counter->hw.sample_period);
-	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
-
-	return ret;
-}
-
-/*
- * Software counter: cpu wall time clock
- */
-
-static void cpu_clock_perf_counter_update(struct perf_counter *counter)
-{
-	int cpu = raw_smp_processor_id();
-	s64 prev;
-	u64 now;
-
-	now = cpu_clock(cpu);
-	prev = atomic64_read(&counter->hw.prev_count);
-	atomic64_set(&counter->hw.prev_count, now);
-	atomic64_add(now - prev, &counter->count);
-}
-
-static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	cpu_clock_perf_counter_update(counter);
-}
-
-static void cpu_clock_perf_counter_read(struct perf_counter *counter)
-{
-	cpu_clock_perf_counter_update(counter);
-}
-
-static const struct pmu perf_ops_cpu_clock = {
-	.enable		= cpu_clock_perf_counter_enable,
-	.disable	= cpu_clock_perf_counter_disable,
-	.read		= cpu_clock_perf_counter_read,
-};
-
-/*
- * Software counter: task time clock
- */
-
-static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
-{
-	u64 prev;
-	s64 delta;
-
-	prev = atomic64_xchg(&counter->hw.prev_count, now);
-	delta = now - prev;
-	atomic64_add(delta, &counter->count);
-}
-
-static int task_clock_perf_counter_enable(struct perf_counter *counter)
-{
-	struct hw_perf_counter *hwc = &counter->hw;
-	u64 now;
-
-	now = counter->ctx->time;
-
-	atomic64_set(&hwc->prev_count, now);
-	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hwc->hrtimer.function = perf_swcounter_hrtimer;
-	if (hwc->sample_period) {
-		u64 period = max_t(u64, 10000, hwc->sample_period);
-		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(period), 0,
-				HRTIMER_MODE_REL, 0);
-	}
-
-	return 0;
-}
-
-static void task_clock_perf_counter_disable(struct perf_counter *counter)
-{
-	if (counter->hw.sample_period)
-		hrtimer_cancel(&counter->hw.hrtimer);
-	task_clock_perf_counter_update(counter, counter->ctx->time);
-
-}
-
-static void task_clock_perf_counter_read(struct perf_counter *counter)
-{
-	u64 time;
-
-	if (!in_nmi()) {
-		update_context_time(counter->ctx);
-		time = counter->ctx->time;
-	} else {
-		u64 now = perf_clock();
-		u64 delta = now - counter->ctx->timestamp;
-		time = counter->ctx->time + delta;
-	}
-
-	task_clock_perf_counter_update(counter, time);
-}
-
-static const struct pmu perf_ops_task_clock = {
-	.enable		= task_clock_perf_counter_enable,
-	.disable	= task_clock_perf_counter_disable,
-	.read		= task_clock_perf_counter_read,
-};
-
-#ifdef CONFIG_EVENT_PROFILE
-void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
-			  int entry_size)
-{
-	struct perf_raw_record raw = {
-		.size = entry_size,
-		.data = record,
-	};
-
-	struct perf_sample_data data = {
-		.addr = addr,
-		.raw = &raw,
-	};
-
-	struct pt_regs *regs = get_irq_regs();
-
-	if (!regs)
-		regs = task_pt_regs(current);
-
-	do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
-				&data, regs);
-}
-EXPORT_SYMBOL_GPL(perf_tpcounter_event);
-
-extern int ftrace_profile_enable(int);
-extern void ftrace_profile_disable(int);
-
-static void tp_perf_counter_destroy(struct perf_counter *counter)
-{
-	ftrace_profile_disable(counter->attr.config);
-}
-
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	/*
-	 * Raw tracepoint data is a severe data leak, only allow root to
-	 * have these.
-	 */
-	if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
-			perf_paranoid_tracepoint_raw() &&
-			!capable(CAP_SYS_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	if (ftrace_profile_enable(counter->attr.config))
-		return NULL;
-
-	counter->destroy = tp_perf_counter_destroy;
-
-	return &perf_ops_generic;
-}
-#else
-static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
-{
-	return NULL;
-}
-#endif
-
-atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
-
-static void sw_perf_counter_destroy(struct perf_counter *counter)
-{
-	u64 event_id = counter->attr.config;
-
-	WARN_ON(counter->parent);
-
-	atomic_dec(&perf_swcounter_enabled[event_id]);
-}
-
-static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
-{
-	const struct pmu *pmu = NULL;
-	u64 event_id = counter->attr.config;
-
-	/*
-	 * Software counters (currently) can't in general distinguish
-	 * between user, kernel and hypervisor events.
-	 * However, context switches and cpu migrations are considered
-	 * to be kernel events, and page faults are never hypervisor
-	 * events.
-	 */
-	switch (event_id) {
-	case PERF_COUNT_SW_CPU_CLOCK:
-		pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_TASK_CLOCK:
-		/*
-		 * If the user instantiates this as a per-cpu counter,
-		 * use the cpu_clock counter instead.
-		 */
-		if (counter->ctx->task)
-			pmu = &perf_ops_task_clock;
-		else
-			pmu = &perf_ops_cpu_clock;
-
-		break;
-	case PERF_COUNT_SW_PAGE_FAULTS:
-	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
-	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
-	case PERF_COUNT_SW_CONTEXT_SWITCHES:
-	case PERF_COUNT_SW_CPU_MIGRATIONS:
-		if (!counter->parent) {
-			atomic_inc(&perf_swcounter_enabled[event_id]);
-			counter->destroy = sw_perf_counter_destroy;
-		}
-		pmu = &perf_ops_generic;
-		break;
-	}
-
-	return pmu;
-}
-
-/*
- * Allocate and initialize a counter structure
- */
-static struct perf_counter *
-perf_counter_alloc(struct perf_counter_attr *attr,
-		   int cpu,
-		   struct perf_counter_context *ctx,
-		   struct perf_counter *group_leader,
-		   struct perf_counter *parent_counter,
-		   gfp_t gfpflags)
-{
-	const struct pmu *pmu;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
-	long err;
-
-	counter = kzalloc(sizeof(*counter), gfpflags);
-	if (!counter)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Single counters are their own group leaders, with an
-	 * empty sibling list:
-	 */
-	if (!group_leader)
-		group_leader = counter;
-
-	mutex_init(&counter->child_mutex);
-	INIT_LIST_HEAD(&counter->child_list);
-
-	INIT_LIST_HEAD(&counter->group_entry);
-	INIT_LIST_HEAD(&counter->event_entry);
-	INIT_LIST_HEAD(&counter->sibling_list);
-	init_waitqueue_head(&counter->waitq);
-
-	mutex_init(&counter->mmap_mutex);
-
-	counter->cpu		= cpu;
-	counter->attr		= *attr;
-	counter->group_leader	= group_leader;
-	counter->pmu		= NULL;
-	counter->ctx		= ctx;
-	counter->oncpu		= -1;
-
-	counter->parent		= parent_counter;
-
-	counter->ns		= get_pid_ns(current->nsproxy->pid_ns);
-	counter->id		= atomic64_inc_return(&perf_counter_id);
-
-	counter->state		= PERF_COUNTER_STATE_INACTIVE;
-
-	if (attr->disabled)
-		counter->state = PERF_COUNTER_STATE_OFF;
-
-	pmu = NULL;
-
-	hwc = &counter->hw;
-	hwc->sample_period = attr->sample_period;
-	if (attr->freq && attr->sample_freq)
-		hwc->sample_period = 1;
-	hwc->last_period = hwc->sample_period;
-
-	atomic64_set(&hwc->period_left, hwc->sample_period);
-
-	/*
-	 * we currently do not support PERF_FORMAT_GROUP on inherited counters
-	 */
-	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
-		goto done;
-
-	switch (attr->type) {
-	case PERF_TYPE_RAW:
-	case PERF_TYPE_HARDWARE:
-	case PERF_TYPE_HW_CACHE:
-		pmu = hw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_SOFTWARE:
-		pmu = sw_perf_counter_init(counter);
-		break;
-
-	case PERF_TYPE_TRACEPOINT:
-		pmu = tp_perf_counter_init(counter);
-		break;
-
-	default:
-		break;
-	}
-done:
-	err = 0;
-	if (!pmu)
-		err = -EINVAL;
-	else if (IS_ERR(pmu))
-		err = PTR_ERR(pmu);
-
-	if (err) {
-		if (counter->ns)
-			put_pid_ns(counter->ns);
-		kfree(counter);
-		return ERR_PTR(err);
-	}
-
-	counter->pmu = pmu;
-
-	if (!counter->parent) {
-		atomic_inc(&nr_counters);
-		if (counter->attr.mmap)
-			atomic_inc(&nr_mmap_counters);
-		if (counter->attr.comm)
-			atomic_inc(&nr_comm_counters);
-		if (counter->attr.task)
-			atomic_inc(&nr_task_counters);
-	}
-
-	return counter;
-}
-
-static int perf_copy_attr(struct perf_counter_attr __user *uattr,
-			  struct perf_counter_attr *attr)
-{
-	u32 size;
-	int ret;
-
-	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
-		return -EFAULT;
-
-	/*
-	 * zero the full structure, so that a short copy will be nice.
-	 */
-	memset(attr, 0, sizeof(*attr));
-
-	ret = get_user(size, &uattr->size);
-	if (ret)
-		return ret;
-
-	if (size > PAGE_SIZE)	/* silly large */
-		goto err_size;
-
-	if (!size)		/* abi compat */
-		size = PERF_ATTR_SIZE_VER0;
-
-	if (size < PERF_ATTR_SIZE_VER0)
-		goto err_size;
-
-	/*
-	 * If we're handed a bigger struct than we know of,
-	 * ensure all the unknown bits are 0 - i.e. new
-	 * user-space does not rely on any kernel feature
-	 * extensions we dont know about yet.
-	 */
-	if (size > sizeof(*attr)) {
-		unsigned char __user *addr;
-		unsigned char __user *end;
-		unsigned char val;
-
-		addr = (void __user *)uattr + sizeof(*attr);
-		end  = (void __user *)uattr + size;
-
-		for (; addr < end; addr++) {
-			ret = get_user(val, addr);
-			if (ret)
-				return ret;
-			if (val)
-				goto err_size;
-		}
-		size = sizeof(*attr);
-	}
-
-	ret = copy_from_user(attr, uattr, size);
-	if (ret)
-		return -EFAULT;
-
-	/*
-	 * If the type exists, the corresponding creation will verify
-	 * the attr->config.
-	 */
-	if (attr->type >= PERF_TYPE_MAX)
-		return -EINVAL;
-
-	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
-		return -EINVAL;
-
-	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
-		return -EINVAL;
-
-	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
-		return -EINVAL;
-
-out:
-	return ret;
-
-err_size:
-	put_user(sizeof(*attr), &uattr->size);
-	ret = -E2BIG;
-	goto out;
-}
-
-int perf_counter_set_output(struct perf_counter *counter, int output_fd)
-{
-	struct perf_counter *output_counter = NULL;
-	struct file *output_file = NULL;
-	struct perf_counter *old_output;
-	int fput_needed = 0;
-	int ret = -EINVAL;
-
-	if (!output_fd)
-		goto set;
-
-	output_file = fget_light(output_fd, &fput_needed);
-	if (!output_file)
-		return -EBADF;
-
-	if (output_file->f_op != &perf_fops)
-		goto out;
-
-	output_counter = output_file->private_data;
-
-	/* Don't chain output fds */
-	if (output_counter->output)
-		goto out;
-
-	/* Don't set an output fd when we already have an output channel */
-	if (counter->data)
-		goto out;
-
-	atomic_long_inc(&output_file->f_count);
-
-set:
-	mutex_lock(&counter->mmap_mutex);
-	old_output = counter->output;
-	rcu_assign_pointer(counter->output, output_counter);
-	mutex_unlock(&counter->mmap_mutex);
-
-	if (old_output) {
-		/*
-		 * we need to make sure no existing perf_output_*()
-		 * is still referencing this counter.
-		 */
-		synchronize_rcu();
-		fput(old_output->filp);
-	}
-
-	ret = 0;
-out:
-	fput_light(output_file, fput_needed);
-	return ret;
-}
-
-/**
- * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
- *
- * @attr_uptr:	event type attributes for monitoring/sampling
- * @pid:		target pid
- * @cpu:		target cpu
- * @group_fd:		group leader counter fd
- */
-SYSCALL_DEFINE5(perf_counter_open,
-		struct perf_counter_attr __user *, attr_uptr,
-		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
-{
-	struct perf_counter *counter, *group_leader;
-	struct perf_counter_attr attr;
-	struct perf_counter_context *ctx;
-	struct file *counter_file = NULL;
-	struct file *group_file = NULL;
-	int fput_needed = 0;
-	int fput_needed2 = 0;
-	int err;
-
-	/* for future expandability... */
-	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
-		return -EINVAL;
-
-	err = perf_copy_attr(attr_uptr, &attr);
-	if (err)
-		return err;
-
-	if (!attr.exclude_kernel) {
-		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
-			return -EACCES;
-	}
-
-	if (attr.freq) {
-		if (attr.sample_freq > sysctl_perf_counter_sample_rate)
-			return -EINVAL;
-	}
-
-	/*
-	 * Get the target context (task or percpu):
-	 */
-	ctx = find_get_context(pid, cpu);
-	if (IS_ERR(ctx))
-		return PTR_ERR(ctx);
-
-	/*
-	 * Look up the group leader (we will attach this counter to it):
-	 */
-	group_leader = NULL;
-	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
-		err = -EINVAL;
-		group_file = fget_light(group_fd, &fput_needed);
-		if (!group_file)
-			goto err_put_context;
-		if (group_file->f_op != &perf_fops)
-			goto err_put_context;
-
-		group_leader = group_file->private_data;
-		/*
-		 * Do not allow a recursive hierarchy (this new sibling
-		 * becoming part of another group-sibling):
-		 */
-		if (group_leader->group_leader != group_leader)
-			goto err_put_context;
-		/*
-		 * Do not allow to attach to a group in a different
-		 * task or CPU context:
-		 */
-		if (group_leader->ctx != ctx)
-			goto err_put_context;
-		/*
-		 * Only a group leader can be exclusive or pinned
-		 */
-		if (attr.exclusive || attr.pinned)
-			goto err_put_context;
-	}
-
-	counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
-				     NULL, GFP_KERNEL);
-	err = PTR_ERR(counter);
-	if (IS_ERR(counter))
-		goto err_put_context;
-
-	err = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
-	if (err < 0)
-		goto err_free_put_context;
-
-	counter_file = fget_light(err, &fput_needed2);
-	if (!counter_file)
-		goto err_free_put_context;
-
-	if (flags & PERF_FLAG_FD_OUTPUT) {
-		err = perf_counter_set_output(counter, group_fd);
-		if (err)
-			goto err_fput_free_put_context;
-	}
-
-	counter->filp = counter_file;
-	WARN_ON_ONCE(ctx->parent_ctx);
-	mutex_lock(&ctx->mutex);
-	perf_install_in_context(ctx, counter, cpu);
-	++ctx->generation;
-	mutex_unlock(&ctx->mutex);
-
-	counter->owner = current;
-	get_task_struct(current);
-	mutex_lock(&current->perf_counter_mutex);
-	list_add_tail(&counter->owner_entry, &current->perf_counter_list);
-	mutex_unlock(&current->perf_counter_mutex);
-
-err_fput_free_put_context:
-	fput_light(counter_file, fput_needed2);
-
-err_free_put_context:
-	if (err < 0)
-		kfree(counter);
-
-err_put_context:
-	if (err < 0)
-		put_ctx(ctx);
-
-	fput_light(group_file, fput_needed);
-
-	return err;
-}
-
-/*
- * inherit a counter from parent task to child task:
- */
-static struct perf_counter *
-inherit_counter(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter *group_leader,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *child_counter;
-
-	/*
-	 * Instead of creating recursive hierarchies of counters,
-	 * we link inherited counters back to the original parent,
-	 * which has a filp for sure, which we use as the reference
-	 * count:
-	 */
-	if (parent_counter->parent)
-		parent_counter = parent_counter->parent;
-
-	child_counter = perf_counter_alloc(&parent_counter->attr,
-					   parent_counter->cpu, child_ctx,
-					   group_leader, parent_counter,
-					   GFP_KERNEL);
-	if (IS_ERR(child_counter))
-		return child_counter;
-	get_ctx(child_ctx);
-
-	/*
-	 * Make the child state follow the state of the parent counter,
-	 * not its attr.disabled bit.  We hold the parent's mutex,
-	 * so we won't race with perf_counter_{en, dis}able_family.
-	 */
-	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
-		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
-	else
-		child_counter->state = PERF_COUNTER_STATE_OFF;
-
-	if (parent_counter->attr.freq)
-		child_counter->hw.sample_period = parent_counter->hw.sample_period;
-
-	/*
-	 * Link it up in the child's context:
-	 */
-	add_counter_to_ctx(child_counter, child_ctx);
-
-	/*
-	 * Get a reference to the parent filp - we will fput it
-	 * when the child counter exits. This is safe to do because
-	 * we are in the parent and we know that the filp still
-	 * exists and has a nonzero count:
-	 */
-	atomic_long_inc(&parent_counter->filp->f_count);
-
-	/*
-	 * Link this into the parent counter's child list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	return child_counter;
-}
-
-static int inherit_group(struct perf_counter *parent_counter,
-	      struct task_struct *parent,
-	      struct perf_counter_context *parent_ctx,
-	      struct task_struct *child,
-	      struct perf_counter_context *child_ctx)
-{
-	struct perf_counter *leader;
-	struct perf_counter *sub;
-	struct perf_counter *child_ctr;
-
-	leader = inherit_counter(parent_counter, parent, parent_ctx,
-				 child, NULL, child_ctx);
-	if (IS_ERR(leader))
-		return PTR_ERR(leader);
-	list_for_each_entry(sub, &parent_counter->sibling_list, group_entry) {
-		child_ctr = inherit_counter(sub, parent, parent_ctx,
-					    child, leader, child_ctx);
-		if (IS_ERR(child_ctr))
-			return PTR_ERR(child_ctr);
-	}
-	return 0;
-}
-
-static void sync_child_counter(struct perf_counter *child_counter,
-			       struct task_struct *child)
-{
-	struct perf_counter *parent_counter = child_counter->parent;
-	u64 child_val;
-
-	if (child_counter->attr.inherit_stat)
-		perf_counter_read_event(child_counter, child);
-
-	child_val = atomic64_read(&child_counter->count);
-
-	/*
-	 * Add back the child's count to the parent's count:
-	 */
-	atomic64_add(child_val, &parent_counter->count);
-	atomic64_add(child_counter->total_time_enabled,
-		     &parent_counter->child_total_time_enabled);
-	atomic64_add(child_counter->total_time_running,
-		     &parent_counter->child_total_time_running);
-
-	/*
-	 * Remove this counter from the parent's list
-	 */
-	WARN_ON_ONCE(parent_counter->ctx->parent_ctx);
-	mutex_lock(&parent_counter->child_mutex);
-	list_del_init(&child_counter->child_list);
-	mutex_unlock(&parent_counter->child_mutex);
-
-	/*
-	 * Release the parent counter, if this was the last
-	 * reference to it.
-	 */
-	fput(parent_counter->filp);
-}
-
-static void
-__perf_counter_exit_task(struct perf_counter *child_counter,
-			 struct perf_counter_context *child_ctx,
-			 struct task_struct *child)
-{
-	struct perf_counter *parent_counter;
-
-	update_counter_times(child_counter);
-	perf_counter_remove_from_context(child_counter);
-
-	parent_counter = child_counter->parent;
-	/*
-	 * It can happen that parent exits first, and has counters
-	 * that are still around due to the child reference. These
-	 * counters need to be zapped - but otherwise linger.
-	 */
-	if (parent_counter) {
-		sync_child_counter(child_counter, child);
-		free_counter(child_counter);
-	}
-}
-
-/*
- * When a child task exits, feed back counter values to parent counters.
- */
-void perf_counter_exit_task(struct task_struct *child)
-{
-	struct perf_counter *child_counter, *tmp;
-	struct perf_counter_context *child_ctx;
-	unsigned long flags;
-
-	if (likely(!child->perf_counter_ctxp)) {
-		perf_counter_task(child, NULL, 0);
-		return;
-	}
-
-	local_irq_save(flags);
-	/*
-	 * We can't reschedule here because interrupts are disabled,
-	 * and either child is current or it is a task that can't be
-	 * scheduled, so we are now safe from rescheduling changing
-	 * our context.
-	 */
-	child_ctx = child->perf_counter_ctxp;
-	__perf_counter_task_sched_out(child_ctx);
-
-	/*
-	 * Take the context lock here so that if find_get_context is
-	 * reading child->perf_counter_ctxp, we wait until it has
-	 * incremented the context's refcount before we do put_ctx below.
-	 */
-	spin_lock(&child_ctx->lock);
-	child->perf_counter_ctxp = NULL;
-	/*
-	 * If this context is a clone; unclone it so it can't get
-	 * swapped to another process while we're removing all
-	 * the counters from it.
-	 */
-	unclone_ctx(child_ctx);
-	spin_unlock_irqrestore(&child_ctx->lock, flags);
-
-	/*
-	 * Report the task dead after unscheduling the counters so that we
-	 * won't get any samples after PERF_EVENT_EXIT. We can however still
-	 * get a few PERF_EVENT_READ events.
-	 */
-	perf_counter_task(child, child_ctx, 0);
-
-	/*
-	 * We can recurse on the same lock type through:
-	 *
-	 *   __perf_counter_exit_task()
-	 *     sync_child_counter()
-	 *       fput(parent_counter->filp)
-	 *         perf_release()
-	 *           mutex_lock(&ctx->mutex)
-	 *
-	 * But since its the parent context it won't be the same instance.
-	 */
-	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
-
-again:
-	list_for_each_entry_safe(child_counter, tmp, &child_ctx->group_list,
-				 group_entry)
-		__perf_counter_exit_task(child_counter, child_ctx, child);
-
-	/*
-	 * If the last counter was a group counter, it will have appended all
-	 * its siblings to the list, but we obtained 'tmp' before that which
-	 * will still point to the list head terminating the iteration.
-	 */
-	if (!list_empty(&child_ctx->group_list))
-		goto again;
-
-	mutex_unlock(&child_ctx->mutex);
-
-	put_ctx(child_ctx);
-}
-
-/*
- * free an unexposed, unused context as created by inheritance by
- * init_task below, used by fork() in case of fail.
- */
-void perf_counter_free_task(struct task_struct *task)
-{
-	struct perf_counter_context *ctx = task->perf_counter_ctxp;
-	struct perf_counter *counter, *tmp;
-
-	if (!ctx)
-		return;
-
-	mutex_lock(&ctx->mutex);
-again:
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry) {
-		struct perf_counter *parent = counter->parent;
-
-		if (WARN_ON_ONCE(!parent))
-			continue;
-
-		mutex_lock(&parent->child_mutex);
-		list_del_init(&counter->child_list);
-		mutex_unlock(&parent->child_mutex);
-
-		fput(parent->filp);
-
-		list_del_counter(counter, ctx);
-		free_counter(counter);
-	}
-
-	if (!list_empty(&ctx->group_list))
-		goto again;
-
-	mutex_unlock(&ctx->mutex);
-
-	put_ctx(ctx);
-}
-
-/*
- * Initialize the perf_counter context in task_struct
- */
-int perf_counter_init_task(struct task_struct *child)
-{
-	struct perf_counter_context *child_ctx, *parent_ctx;
-	struct perf_counter_context *cloned_ctx;
-	struct perf_counter *counter;
-	struct task_struct *parent = current;
-	int inherited_all = 1;
-	int ret = 0;
-
-	child->perf_counter_ctxp = NULL;
-
-	mutex_init(&child->perf_counter_mutex);
-	INIT_LIST_HEAD(&child->perf_counter_list);
-
-	if (likely(!parent->perf_counter_ctxp))
-		return 0;
-
-	/*
-	 * This is executed from the parent task context, so inherit
-	 * counters that have been marked for cloning.
-	 * First allocate and initialize a context for the child.
-	 */
-
-	child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
-	if (!child_ctx)
-		return -ENOMEM;
-
-	__perf_counter_init_context(child_ctx, child);
-	child->perf_counter_ctxp = child_ctx;
-	get_task_struct(child);
-
-	/*
-	 * If the parent's context is a clone, pin it so it won't get
-	 * swapped under us.
-	 */
-	parent_ctx = perf_pin_task_context(parent);
-
-	/*
-	 * No need to check if parent_ctx != NULL here; since we saw
-	 * it non-NULL earlier, the only reason for it to become NULL
-	 * is if we exit, and since we're currently in the middle of
-	 * a fork we can't be exiting at the same time.
-	 */
-
-	/*
-	 * Lock the parent list. No need to lock the child - not PID
-	 * hashed yet and not running, so nobody can access it.
-	 */
-	mutex_lock(&parent_ctx->mutex);
-
-	/*
-	 * We dont have to disable NMIs - we are only looking at
-	 * the list, not manipulating it:
-	 */
-	list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) {
-		if (counter != counter->group_leader)
-			continue;
-
-		if (!counter->attr.inherit) {
-			inherited_all = 0;
-			continue;
-		}
-
-		ret = inherit_group(counter, parent, parent_ctx,
-					     child, child_ctx);
-		if (ret) {
-			inherited_all = 0;
-			break;
-		}
-	}
-
-	if (inherited_all) {
-		/*
-		 * Mark the child context as a clone of the parent
-		 * context, or of whatever the parent is a clone of.
-		 * Note that if the parent is a clone, it could get
-		 * uncloned at any point, but that doesn't matter
-		 * because the list of counters and the generation
-		 * count can't have changed since we took the mutex.
-		 */
-		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
-		if (cloned_ctx) {
-			child_ctx->parent_ctx = cloned_ctx;
-			child_ctx->parent_gen = parent_ctx->parent_gen;
-		} else {
-			child_ctx->parent_ctx = parent_ctx;
-			child_ctx->parent_gen = parent_ctx->generation;
-		}
-		get_ctx(child_ctx->parent_ctx);
-	}
-
-	mutex_unlock(&parent_ctx->mutex);
-
-	perf_unpin_context(parent_ctx);
-
-	return ret;
-}
-
-static void __cpuinit perf_counter_init_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx;
-
-	cpuctx = &per_cpu(perf_cpu_context, cpu);
-	__perf_counter_init_context(&cpuctx->ctx, NULL);
-
-	spin_lock(&perf_resource_lock);
-	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
-	spin_unlock(&perf_resource_lock);
-
-	hw_perf_counter_setup(cpu);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void __perf_counter_exit_cpu(void *info)
-{
-	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-	struct perf_counter *counter, *tmp;
-
-	list_for_each_entry_safe(counter, tmp, &ctx->group_list, group_entry)
-		__perf_counter_remove_from_context(counter);
-}
-static void perf_counter_exit_cpu(int cpu)
-{
-	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
-	struct perf_counter_context *ctx = &cpuctx->ctx;
-
-	mutex_lock(&ctx->mutex);
-	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
-	mutex_unlock(&ctx->mutex);
-}
-#else
-static inline void perf_counter_exit_cpu(int cpu) { }
-#endif
-
-static int __cpuinit
-perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
-{
-	unsigned int cpu = (long)hcpu;
-
-	switch (action) {
-
-	case CPU_UP_PREPARE:
-	case CPU_UP_PREPARE_FROZEN:
-		perf_counter_init_cpu(cpu);
-		break;
-
-	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
-		hw_perf_counter_setup_online(cpu);
-		break;
-
-	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
-		perf_counter_exit_cpu(cpu);
-		break;
-
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-/*
- * This has to have a higher priority than migration_notifier in sched.c.
- */
-static struct notifier_block __cpuinitdata perf_cpu_nb = {
-	.notifier_call		= perf_cpu_notify,
-	.priority		= 20,
-};
-
-void __init perf_counter_init(void)
-{
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
-			(void *)(long)smp_processor_id());
-	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
-			(void *)(long)smp_processor_id());
-	register_cpu_notifier(&perf_cpu_nb);
-}
-
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_reserved_percpu);
-}
-
-static ssize_t
-perf_set_reserve_percpu(struct sysdev_class *class,
-			const char *buf,
-			size_t count)
-{
-	struct perf_cpu_context *cpuctx;
-	unsigned long val;
-	int err, cpu, mpt;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > perf_max_counters)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_reserved_percpu = val;
-	for_each_online_cpu(cpu) {
-		cpuctx = &per_cpu(perf_cpu_context, cpu);
-		spin_lock_irq(&cpuctx->ctx.lock);
-		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
-			  perf_max_counters - perf_reserved_percpu);
-		cpuctx->max_pertask = mpt;
-		spin_unlock_irq(&cpuctx->ctx.lock);
-	}
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
-{
-	return sprintf(buf, "%d\n", perf_overcommit);
-}
-
-static ssize_t
-perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
-{
-	unsigned long val;
-	int err;
-
-	err = strict_strtoul(buf, 10, &val);
-	if (err)
-		return err;
-	if (val > 1)
-		return -EINVAL;
-
-	spin_lock(&perf_resource_lock);
-	perf_overcommit = val;
-	spin_unlock(&perf_resource_lock);
-
-	return count;
-}
-
-static SYSDEV_CLASS_ATTR(
-				reserve_percpu,
-				0644,
-				perf_show_reserve_percpu,
-				perf_set_reserve_percpu
-			);
-
-static SYSDEV_CLASS_ATTR(
-				overcommit,
-				0644,
-				perf_show_overcommit,
-				perf_set_overcommit
-			);
-
-static struct attribute *perfclass_attrs[] = {
-	&attr_reserve_percpu.attr,
-	&attr_overcommit.attr,
-	NULL
-};
-
-static struct attribute_group perfclass_attr_group = {
-	.attrs			= perfclass_attrs,
-	.name			= "perf_counters",
-};
-
-static int __init perf_counter_sysfs_init(void)
-{
-	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
-				  &perfclass_attr_group);
-}
-device_initcall(perf_counter_sysfs_init);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
new file mode 100644
index 000000000000..6e8b99a04e1e
--- /dev/null
+++ b/kernel/perf_event.c
@@ -0,0 +1,5000 @@
+/*
+ * Performance event core code
+ *
+ *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
+ *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  For licensing details see kernel-base/COPYING
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/sysfs.h>
+#include <linux/dcache.h>
+#include <linux/percpu.h>
+#include <linux/ptrace.h>
+#include <linux/vmstat.h>
+#include <linux/hardirq.h>
+#include <linux/rculist.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/anon_inodes.h>
+#include <linux/kernel_stat.h>
+#include <linux/perf_event.h>
+
+#include <asm/irq_regs.h>
+
+/*
+ * Each CPU has a list of per CPU events:
+ */
+DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
+
+int perf_max_events __read_mostly = 1;
+static int perf_reserved_percpu __read_mostly;
+static int perf_overcommit __read_mostly = 1;
+
+static atomic_t nr_events __read_mostly;
+static atomic_t nr_mmap_events __read_mostly;
+static atomic_t nr_comm_events __read_mostly;
+static atomic_t nr_task_events __read_mostly;
+
+/*
+ * perf event paranoia level:
+ *  -1 - not paranoid at all
+ *   0 - disallow raw tracepoint access for unpriv
+ *   1 - disallow cpu events for unpriv
+ *   2 - disallow kernel profiling for unpriv
+ */
+int sysctl_perf_event_paranoid __read_mostly = 1;
+
+static inline bool perf_paranoid_tracepoint_raw(void)
+{
+	return sysctl_perf_event_paranoid > -1;
+}
+
+static inline bool perf_paranoid_cpu(void)
+{
+	return sysctl_perf_event_paranoid > 0;
+}
+
+static inline bool perf_paranoid_kernel(void)
+{
+	return sysctl_perf_event_paranoid > 1;
+}
+
+int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+
+/*
+ * max perf event sample rate
+ */
+int sysctl_perf_event_sample_rate __read_mostly = 100000;
+
+static atomic64_t perf_event_id;
+
+/*
+ * Lock for (sysadmin-configurable) event reservations:
+ */
+static DEFINE_SPINLOCK(perf_resource_lock);
+
+/*
+ * Architecture provided APIs - weak aliases:
+ */
+extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+
+void __weak hw_perf_disable(void)		{ barrier(); }
+void __weak hw_perf_enable(void)		{ barrier(); }
+
+void __weak hw_perf_event_setup(int cpu)	{ barrier(); }
+void __weak hw_perf_event_setup_online(int cpu)	{ barrier(); }
+
+int __weak
+hw_perf_group_sched_in(struct perf_event *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx, int cpu)
+{
+	return 0;
+}
+
+void __weak perf_event_print_debug(void)	{ }
+
+static DEFINE_PER_CPU(int, perf_disable_count);
+
+void __perf_disable(void)
+{
+	__get_cpu_var(perf_disable_count)++;
+}
+
+bool __perf_enable(void)
+{
+	return !--__get_cpu_var(perf_disable_count);
+}
+
+void perf_disable(void)
+{
+	__perf_disable();
+	hw_perf_disable();
+}
+
+void perf_enable(void)
+{
+	if (__perf_enable())
+		hw_perf_enable();
+}
+
+static void get_ctx(struct perf_event_context *ctx)
+{
+	WARN_ON(!atomic_inc_not_zero(&ctx->refcount));
+}
+
+static void free_ctx(struct rcu_head *head)
+{
+	struct perf_event_context *ctx;
+
+	ctx = container_of(head, struct perf_event_context, rcu_head);
+	kfree(ctx);
+}
+
+static void put_ctx(struct perf_event_context *ctx)
+{
+	if (atomic_dec_and_test(&ctx->refcount)) {
+		if (ctx->parent_ctx)
+			put_ctx(ctx->parent_ctx);
+		if (ctx->task)
+			put_task_struct(ctx->task);
+		call_rcu(&ctx->rcu_head, free_ctx);
+	}
+}
+
+static void unclone_ctx(struct perf_event_context *ctx)
+{
+	if (ctx->parent_ctx) {
+		put_ctx(ctx->parent_ctx);
+		ctx->parent_ctx = NULL;
+	}
+}
+
+/*
+ * If we inherit events we want to return the parent event id
+ * to userspace.
+ */
+static u64 primary_event_id(struct perf_event *event)
+{
+	u64 id = event->id;
+
+	if (event->parent)
+		id = event->parent->id;
+
+	return id;
+}
+
+/*
+ * Get the perf_event_context for a task and lock it.
+ * This has to cope with with the fact that until it is locked,
+ * the context could get moved to another task.
+ */
+static struct perf_event_context *
+perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+{
+	struct perf_event_context *ctx;
+
+	rcu_read_lock();
+ retry:
+	ctx = rcu_dereference(task->perf_event_ctxp);
+	if (ctx) {
+		/*
+		 * If this context is a clone of another, it might
+		 * get swapped for another underneath us by
+		 * perf_event_task_sched_out, though the
+		 * rcu_read_lock() protects us from any context
+		 * getting freed.  Lock the context and check if it
+		 * got swapped before we could get the lock, and retry
+		 * if so.  If we locked the right context, then it
+		 * can't get swapped on us any more.
+		 */
+		spin_lock_irqsave(&ctx->lock, *flags);
+		if (ctx != rcu_dereference(task->perf_event_ctxp)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			goto retry;
+		}
+
+		if (!atomic_inc_not_zero(&ctx->refcount)) {
+			spin_unlock_irqrestore(&ctx->lock, *flags);
+			ctx = NULL;
+		}
+	}
+	rcu_read_unlock();
+	return ctx;
+}
+
+/*
+ * Get the context for a task and increment its pin_count so it
+ * can't get swapped to another task.  This also increments its
+ * reference count so that the context can't get freed.
+ */
+static struct perf_event_context *perf_pin_task_context(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	unsigned long flags;
+
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		++ctx->pin_count;
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+	return ctx;
+}
+
+static void perf_unpin_context(struct perf_event_context *ctx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	--ctx->pin_count;
+	spin_unlock_irqrestore(&ctx->lock, flags);
+	put_ctx(ctx);
+}
+
+/*
+ * Add a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_add_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *group_leader = event->group_leader;
+
+	/*
+	 * Depending on whether it is a standalone or sibling event,
+	 * add it straight to the context's event list, or to the group
+	 * leader's sibling list:
+	 */
+	if (group_leader == event)
+		list_add_tail(&event->group_entry, &ctx->group_list);
+	else {
+		list_add_tail(&event->group_entry, &group_leader->sibling_list);
+		group_leader->nr_siblings++;
+	}
+
+	list_add_rcu(&event->event_entry, &ctx->event_list);
+	ctx->nr_events++;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat++;
+}
+
+/*
+ * Remove a event from the lists for its context.
+ * Must be called with ctx->mutex and ctx->lock held.
+ */
+static void
+list_del_event(struct perf_event *event, struct perf_event_context *ctx)
+{
+	struct perf_event *sibling, *tmp;
+
+	if (list_empty(&event->group_entry))
+		return;
+	ctx->nr_events--;
+	if (event->attr.inherit_stat)
+		ctx->nr_stat--;
+
+	list_del_init(&event->group_entry);
+	list_del_rcu(&event->event_entry);
+
+	if (event->group_leader != event)
+		event->group_leader->nr_siblings--;
+
+	/*
+	 * If this was a group event with sibling events then
+	 * upgrade the siblings to singleton events by adding them
+	 * to the context list directly:
+	 */
+	list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
+
+		list_move_tail(&sibling->group_entry, &ctx->group_list);
+		sibling->group_leader = sibling;
+	}
+}
+
+static void
+event_sched_out(struct perf_event *event,
+		  struct perf_cpu_context *cpuctx,
+		  struct perf_event_context *ctx)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+	event->tstamp_stopped = ctx->time;
+	event->pmu->disable(event);
+	event->oncpu = -1;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu--;
+	ctx->nr_active--;
+	if (event->attr.exclusive || !cpuctx->active_oncpu)
+		cpuctx->exclusive = 0;
+}
+
+static void
+group_sched_out(struct perf_event *group_event,
+		struct perf_cpu_context *cpuctx,
+		struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (group_event->state != PERF_EVENT_STATE_ACTIVE)
+		return;
+
+	event_sched_out(group_event, cpuctx, ctx);
+
+	/*
+	 * Schedule out siblings (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry)
+		event_sched_out(event, cpuctx, ctx);
+
+	if (group_event->attr.exclusive)
+		cpuctx->exclusive = 0;
+}
+
+/*
+ * Cross CPU call to remove a performance event
+ *
+ * We disable the event on the hardware level first. After that we
+ * remove it from the context list.
+ */
+static void __perf_event_remove_from_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level.
+	 */
+	perf_disable();
+
+	event_sched_out(event, cpuctx, ctx);
+
+	list_del_event(event, ctx);
+
+	if (!ctx->task) {
+		/*
+		 * Allow more per task events with respect to the
+		 * reservation:
+		 */
+		cpuctx->max_pertask =
+			min(perf_max_events - ctx->nr_events,
+			    perf_max_events - perf_reserved_percpu);
+	}
+
+	perf_enable();
+	spin_unlock(&ctx->lock);
+}
+
+
+/*
+ * Remove the event from a task's (or a CPU's) list of events.
+ *
+ * Must be called with ctx->mutex held.
+ *
+ * CPU events are removed with a smp call. For task events we only
+ * call when the task is on a CPU.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This is OK when called from perf_release since
+ * that only calls us on the top-level context, which can't be a clone.
+ * When called from perf_event_exit_task, it's OK because the
+ * context has been detached from its task.
+ */
+static void perf_event_remove_from_context(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are removed via an smp call and
+		 * the removal is always sucessful.
+		 */
+		smp_call_function_single(event->cpu,
+					 __perf_event_remove_from_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_event_remove_from_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the context is active we need to retry the smp call.
+	 */
+	if (ctx->nr_active && !list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can remove the event safely, if the call above did not
+	 * succeed.
+	 */
+	if (!list_empty(&event->group_entry)) {
+		list_del_event(event, ctx);
+	}
+	spin_unlock_irq(&ctx->lock);
+}
+
+static inline u64 perf_clock(void)
+{
+	return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+	u64 now = perf_clock();
+
+	ctx->time += now - ctx->timestamp;
+	ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	u64 run_end;
+
+	if (event->state < PERF_EVENT_STATE_INACTIVE ||
+	    event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+		return;
+
+	event->total_time_enabled = ctx->time - event->tstamp_enabled;
+
+	if (event->state == PERF_EVENT_STATE_INACTIVE)
+		run_end = event->tstamp_stopped;
+	else
+		run_end = ctx->time;
+
+	event->total_time_running = run_end - event->tstamp_running;
+}
+
+/*
+ * Update total_time_enabled and total_time_running for all events in a group.
+ */
+static void update_group_times(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	update_event_times(leader);
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		update_event_times(event);
+}
+
+/*
+ * Cross CPU call to disable a performance event
+ */
+static void __perf_event_disable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	spin_lock(&ctx->lock);
+
+	/*
+	 * If the event is on, turn it off.
+	 * If it is in error state, leave it in error state.
+	 */
+	if (event->state >= PERF_EVENT_STATE_INACTIVE) {
+		update_context_time(ctx);
+		update_group_times(event);
+		if (event == event->group_leader)
+			group_sched_out(event, cpuctx, ctx);
+		else
+			event_sched_out(event, cpuctx, ctx);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Disable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisifed when called through
+ * perf_event_for_each_child or perf_event_for_each because they
+ * hold the top-level event's child_mutex, so any descendant that
+ * goes to exit will block in sync_child_event.
+ * When called from perf_pending_event it's OK because event->ctx
+ * is the current context on this CPU and preemption is disabled,
+ * hence we can't get into perf_event_task_sched_out for this context.
+ */
+static void perf_event_disable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Disable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_disable,
+					 event, 1);
+		return;
+	}
+
+ retry:
+	task_oncpu_function_call(task, __perf_event_disable, event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * If the event is still active, we need to retry the cross-call.
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_group_times(event);
+		event->state = PERF_EVENT_STATE_OFF;
+	}
+
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int
+event_sched_in(struct perf_event *event,
+		 struct perf_cpu_context *cpuctx,
+		 struct perf_event_context *ctx,
+		 int cpu)
+{
+	if (event->state <= PERF_EVENT_STATE_OFF)
+		return 0;
+
+	event->state = PERF_EVENT_STATE_ACTIVE;
+	event->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
+	/*
+	 * The new state must be visible before we turn it on in the hardware:
+	 */
+	smp_wmb();
+
+	if (event->pmu->enable(event)) {
+		event->state = PERF_EVENT_STATE_INACTIVE;
+		event->oncpu = -1;
+		return -EAGAIN;
+	}
+
+	event->tstamp_running += ctx->time - event->tstamp_stopped;
+
+	if (!is_software_event(event))
+		cpuctx->active_oncpu++;
+	ctx->nr_active++;
+
+	if (event->attr.exclusive)
+		cpuctx->exclusive = 1;
+
+	return 0;
+}
+
+static int
+group_sched_in(struct perf_event *group_event,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_event_context *ctx,
+	       int cpu)
+{
+	struct perf_event *event, *partial_group;
+	int ret;
+
+	if (group_event->state == PERF_EVENT_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
+
+	if (event_sched_in(group_event, cpuctx, ctx, cpu))
+		return -EAGAIN;
+
+	/*
+	 * Schedule in siblings as one group (if any):
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event_sched_in(event, cpuctx, ctx, cpu)) {
+			partial_group = event;
+			goto group_error;
+		}
+	}
+
+	return 0;
+
+group_error:
+	/*
+	 * Groups can be scheduled in as one unit only, so undo any
+	 * partial group before returning:
+	 */
+	list_for_each_entry(event, &group_event->sibling_list, group_entry) {
+		if (event == partial_group)
+			break;
+		event_sched_out(event, cpuctx, ctx);
+	}
+	event_sched_out(group_event, cpuctx, ctx);
+
+	return -EAGAIN;
+}
+
+/*
+ * Return 1 for a group consisting entirely of software events,
+ * 0 if the group contains any hardware events.
+ */
+static int is_software_only_group(struct perf_event *leader)
+{
+	struct perf_event *event;
+
+	if (!is_software_event(leader))
+		return 0;
+
+	list_for_each_entry(event, &leader->sibling_list, group_entry)
+		if (!is_software_event(event))
+			return 0;
+
+	return 1;
+}
+
+/*
+ * Work out whether we can put this event group on the CPU now.
+ */
+static int group_can_go_on(struct perf_event *event,
+			   struct perf_cpu_context *cpuctx,
+			   int can_add_hw)
+{
+	/*
+	 * Groups consisting entirely of software events can always go on.
+	 */
+	if (is_software_only_group(event))
+		return 1;
+	/*
+	 * If an exclusive group is already on, no other hardware
+	 * events can go on.
+	 */
+	if (cpuctx->exclusive)
+		return 0;
+	/*
+	 * If this group is exclusive and there are already
+	 * events on the CPU, it can't go on.
+	 */
+	if (event->attr.exclusive && cpuctx->active_oncpu)
+		return 0;
+	/*
+	 * Otherwise, try to add it if all previous groups were able
+	 * to go on.
+	 */
+	return can_add_hw;
+}
+
+static void add_event_to_ctx(struct perf_event *event,
+			       struct perf_event_context *ctx)
+{
+	list_add_event(event, ctx);
+	event->tstamp_enabled = ctx->time;
+	event->tstamp_running = ctx->time;
+	event->tstamp_stopped = ctx->time;
+}
+
+/*
+ * Cross CPU call to install and enable a performance event
+ *
+ * Must be called with ctx->mutex held
+ */
+static void __perf_install_in_context(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int cpu = smp_processor_id();
+	int err;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu. If not it has been
+	 * scheduled out before the smp call arrived.
+	 * Or possibly this is the right context but it isn't
+	 * on this cpu because it had no events.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	/*
+	 * Protect the list operation against NMI by disabling the
+	 * events on a global level. NOP for non NMI based events.
+	 */
+	perf_disable();
+
+	add_event_to_ctx(event, ctx);
+
+	/*
+	 * Don't put the event on if it is disabled or if
+	 * it is in a group and the group isn't on.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE ||
+	    (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE))
+		goto unlock;
+
+	/*
+	 * An exclusive event can't go on if there are already active
+	 * hardware events, and no hardware event can go on if there
+	 * is already an exclusive event on.
+	 */
+	if (!group_can_go_on(event, cpuctx, 1))
+		err = -EEXIST;
+	else
+		err = event_sched_in(event, cpuctx, ctx, cpu);
+
+	if (err) {
+		/*
+		 * This event couldn't go on.  If it is in a group
+		 * then we have to pull the whole group off.
+		 * If the event group is pinned then put it in error state.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	if (!err && !ctx->task && cpuctx->max_pertask)
+		cpuctx->max_pertask--;
+
+ unlock:
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Attach a performance event to a context
+ *
+ * First we add the event to the list with the hardware enable bit
+ * in event->hw_config cleared.
+ *
+ * If the event is attached to a task which is on a CPU we use a smp
+ * call to enable it in the task context. The task might have been
+ * scheduled away, but we check this in the smp call again.
+ *
+ * Must be called with ctx->mutex held.
+ */
+static void
+perf_install_in_context(struct perf_event_context *ctx,
+			struct perf_event *event,
+			int cpu)
+{
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Per cpu events are installed via an smp call and
+		 * the install is always sucessful.
+		 */
+		smp_call_function_single(cpu, __perf_install_in_context,
+					 event, 1);
+		return;
+	}
+
+retry:
+	task_oncpu_function_call(task, __perf_install_in_context,
+				 event);
+
+	spin_lock_irq(&ctx->lock);
+	/*
+	 * we need to retry the smp call.
+	 */
+	if (ctx->is_active && list_empty(&event->group_entry)) {
+		spin_unlock_irq(&ctx->lock);
+		goto retry;
+	}
+
+	/*
+	 * The lock prevents that this context is scheduled in so we
+	 * can add the event safely, if it the call above did not
+	 * succeed.
+	 */
+	if (list_empty(&event->group_entry))
+		add_event_to_ctx(event, ctx);
+	spin_unlock_irq(&ctx->lock);
+}
+
+/*
+ * Put a event into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_event_mark_enabled(struct perf_event *event,
+					struct perf_event_context *ctx)
+{
+	struct perf_event *sub;
+
+	event->state = PERF_EVENT_STATE_INACTIVE;
+	event->tstamp_enabled = ctx->time - event->total_time_enabled;
+	list_for_each_entry(sub, &event->sibling_list, group_entry)
+		if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+			sub->tstamp_enabled =
+				ctx->time - sub->total_time_enabled;
+}
+
+/*
+ * Cross CPU call to enable a performance event
+ */
+static void __perf_event_enable(void *info)
+{
+	struct perf_event *event = info;
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *leader = event->group_leader;
+	int err;
+
+	/*
+	 * If this is a per-task event, need to check whether this
+	 * event's task is the current task on this cpu.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx) {
+		if (cpuctx->task_ctx || ctx->task != current)
+			return;
+		cpuctx->task_ctx = ctx;
+	}
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	update_context_time(ctx);
+
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto unlock;
+	__perf_event_mark_enabled(event, ctx);
+
+	/*
+	 * If the event is in a group and isn't the group leader,
+	 * then don't put it on unless the group is on.
+	 */
+	if (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)
+		goto unlock;
+
+	if (!group_can_go_on(event, cpuctx, 1)) {
+		err = -EEXIST;
+	} else {
+		perf_disable();
+		if (event == leader)
+			err = group_sched_in(event, cpuctx, ctx,
+					     smp_processor_id());
+		else
+			err = event_sched_in(event, cpuctx, ctx,
+					       smp_processor_id());
+		perf_enable();
+	}
+
+	if (err) {
+		/*
+		 * If this event can't go on and it's part of a
+		 * group, then the whole group has to come off.
+		 */
+		if (leader != event)
+			group_sched_out(leader, cpuctx, ctx);
+		if (leader->attr.pinned) {
+			update_group_times(leader);
+			leader->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+ unlock:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Enable a event.
+ *
+ * If event->ctx is a cloned context, callers must make sure that
+ * every task struct that event->ctx->task could possibly point to
+ * remains valid.  This condition is satisfied when called through
+ * perf_event_for_each_child or perf_event_for_each as described
+ * for perf_event_disable.
+ */
+static void perf_event_enable(struct perf_event *event)
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct task_struct *task = ctx->task;
+
+	if (!task) {
+		/*
+		 * Enable the event on the cpu that it's on
+		 */
+		smp_call_function_single(event->cpu, __perf_event_enable,
+					 event, 1);
+		return;
+	}
+
+	spin_lock_irq(&ctx->lock);
+	if (event->state >= PERF_EVENT_STATE_INACTIVE)
+		goto out;
+
+	/*
+	 * If the event is in error state, clear that first.
+	 * That way, if we see the event in error state below, we
+	 * know that it has gone back into error state, as distinct
+	 * from the task having been scheduled away before the
+	 * cross-call arrived.
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		event->state = PERF_EVENT_STATE_OFF;
+
+ retry:
+	spin_unlock_irq(&ctx->lock);
+	task_oncpu_function_call(task, __perf_event_enable, event);
+
+	spin_lock_irq(&ctx->lock);
+
+	/*
+	 * If the context is active and the event is still off,
+	 * we need to retry the cross-call.
+	 */
+	if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF)
+		goto retry;
+
+	/*
+	 * Since we have the lock this context can't be scheduled
+	 * in, so we can change the state safely.
+	 */
+	if (event->state == PERF_EVENT_STATE_OFF)
+		__perf_event_mark_enabled(event, ctx);
+
+ out:
+	spin_unlock_irq(&ctx->lock);
+}
+
+static int perf_event_refresh(struct perf_event *event, int refresh)
+{
+	/*
+	 * not supported on inherited events
+	 */
+	if (event->attr.inherit)
+		return -EINVAL;
+
+	atomic_add(refresh, &event->event_limit);
+	perf_event_enable(event);
+
+	return 0;
+}
+
+void __perf_event_sched_out(struct perf_event_context *ctx,
+			      struct perf_cpu_context *cpuctx)
+{
+	struct perf_event *event;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 0;
+	if (likely(!ctx->nr_events))
+		goto out;
+	update_context_time(ctx);
+
+	perf_disable();
+	if (ctx->nr_active) {
+		list_for_each_entry(event, &ctx->group_list, group_entry) {
+			if (event != event->group_leader)
+				event_sched_out(event, cpuctx, ctx);
+			else
+				group_sched_out(event, cpuctx, ctx);
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Test whether two contexts are equivalent, i.e. whether they
+ * have both been cloned from the same version of the same context
+ * and they both have the same number of enabled events.
+ * If the number of enabled events is the same, then the set
+ * of enabled events should be the same, because these are both
+ * inherited contexts, therefore we can't access individual events
+ * in them directly with an fd; we can only enable/disable all
+ * events via prctl, or enable/disable all events in a family
+ * via ioctl, which will have the same effect on both contexts.
+ */
+static int context_equiv(struct perf_event_context *ctx1,
+			 struct perf_event_context *ctx2)
+{
+	return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
+		&& ctx1->parent_gen == ctx2->parent_gen
+		&& !ctx1->pin_count && !ctx2->pin_count;
+}
+
+static void __perf_event_read(void *event);
+
+static void __perf_event_sync_stat(struct perf_event *event,
+				     struct perf_event *next_event)
+{
+	u64 value;
+
+	if (!event->attr.inherit_stat)
+		return;
+
+	/*
+	 * Update the event value, we cannot use perf_event_read()
+	 * because we're in the middle of a context switch and have IRQs
+	 * disabled, which upsets smp_call_function_single(), however
+	 * we know the event must be on the current CPU, therefore we
+	 * don't need to use it.
+	 */
+	switch (event->state) {
+	case PERF_EVENT_STATE_ACTIVE:
+		__perf_event_read(event);
+		break;
+
+	case PERF_EVENT_STATE_INACTIVE:
+		update_event_times(event);
+		break;
+
+	default:
+		break;
+	}
+
+	/*
+	 * In order to keep per-task stats reliable we need to flip the event
+	 * values when we flip the contexts.
+	 */
+	value = atomic64_read(&next_event->count);
+	value = atomic64_xchg(&event->count, value);
+	atomic64_set(&next_event->count, value);
+
+	swap(event->total_time_enabled, next_event->total_time_enabled);
+	swap(event->total_time_running, next_event->total_time_running);
+
+	/*
+	 * Since we swizzled the values, update the user visible data too.
+	 */
+	perf_event_update_userpage(event);
+	perf_event_update_userpage(next_event);
+}
+
+#define list_next_entry(pos, member) \
+	list_entry(pos->member.next, typeof(*pos), member)
+
+static void perf_event_sync_stat(struct perf_event_context *ctx,
+				   struct perf_event_context *next_ctx)
+{
+	struct perf_event *event, *next_event;
+
+	if (!ctx->nr_stat)
+		return;
+
+	event = list_first_entry(&ctx->event_list,
+				   struct perf_event, event_entry);
+
+	next_event = list_first_entry(&next_ctx->event_list,
+					struct perf_event, event_entry);
+
+	while (&event->event_entry != &ctx->event_list &&
+	       &next_event->event_entry != &next_ctx->event_list) {
+
+		__perf_event_sync_stat(event, next_event);
+
+		event = list_next_entry(event, event_entry);
+		next_event = list_next_entry(next_event, event_entry);
+	}
+}
+
+/*
+ * Called from scheduler to remove the events of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each event and update the event value in event->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * not restart the event.
+ */
+void perf_event_task_sched_out(struct task_struct *task,
+				 struct task_struct *next, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event_context *next_ctx;
+	struct perf_event_context *parent;
+	struct pt_regs *regs;
+	int do_switch = 1;
+
+	regs = task_pt_regs(task);
+	perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0);
+
+	if (likely(!ctx || !cpuctx->task_ctx))
+		return;
+
+	update_context_time(ctx);
+
+	rcu_read_lock();
+	parent = rcu_dereference(ctx->parent_ctx);
+	next_ctx = next->perf_event_ctxp;
+	if (parent && next_ctx &&
+	    rcu_dereference(next_ctx->parent_ctx) == parent) {
+		/*
+		 * Looks like the two contexts are clones, so we might be
+		 * able to optimize the context switch.  We lock both
+		 * contexts and check that they are clones under the
+		 * lock (including re-checking that neither has been
+		 * uncloned in the meantime).  It doesn't matter which
+		 * order we take the locks because no other cpu could
+		 * be trying to lock both of these tasks.
+		 */
+		spin_lock(&ctx->lock);
+		spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
+		if (context_equiv(ctx, next_ctx)) {
+			/*
+			 * XXX do we need a memory barrier of sorts
+			 * wrt to rcu_dereference() of perf_event_ctxp
+			 */
+			task->perf_event_ctxp = next_ctx;
+			next->perf_event_ctxp = ctx;
+			ctx->task = next;
+			next_ctx->task = task;
+			do_switch = 0;
+
+			perf_event_sync_stat(ctx, next_ctx);
+		}
+		spin_unlock(&next_ctx->lock);
+		spin_unlock(&ctx->lock);
+	}
+	rcu_read_unlock();
+
+	if (do_switch) {
+		__perf_event_sched_out(ctx, cpuctx);
+		cpuctx->task_ctx = NULL;
+	}
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void __perf_event_task_sched_out(struct perf_event_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	if (!cpuctx->task_ctx)
+		return;
+
+	if (WARN_ON_ONCE(ctx != cpuctx->task_ctx))
+		return;
+
+	__perf_event_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
+/*
+ * Called with IRQs disabled
+ */
+static void perf_event_cpu_sched_out(struct perf_cpu_context *cpuctx)
+{
+	__perf_event_sched_out(&cpuctx->ctx, cpuctx);
+}
+
+static void
+__perf_event_sched_in(struct perf_event_context *ctx,
+			struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event *event;
+	int can_add_hw = 1;
+
+	spin_lock(&ctx->lock);
+	ctx->is_active = 1;
+	if (likely(!ctx->nr_events))
+		goto out;
+
+	ctx->timestamp = perf_clock();
+
+	perf_disable();
+
+	/*
+	 * First go through the list and put on any pinned groups
+	 * in order to give them the best chance of going on.
+	 */
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    !event->attr.pinned)
+			continue;
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader)
+			event_sched_in(event, cpuctx, ctx, cpu);
+		else {
+			if (group_can_go_on(event, cpuctx, 1))
+				group_sched_in(event, cpuctx, ctx, cpu);
+		}
+
+		/*
+		 * If this pinned group hasn't been scheduled,
+		 * put it in error state.
+		 */
+		if (event->state == PERF_EVENT_STATE_INACTIVE) {
+			update_group_times(event);
+			event->state = PERF_EVENT_STATE_ERROR;
+		}
+	}
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		/*
+		 * Ignore events in OFF or ERROR state, and
+		 * ignore pinned events since we did them already.
+		 */
+		if (event->state <= PERF_EVENT_STATE_OFF ||
+		    event->attr.pinned)
+			continue;
+
+		/*
+		 * Listen to the 'cpu' scheduling filter constraint
+		 * of events:
+		 */
+		if (event->cpu != -1 && event->cpu != cpu)
+			continue;
+
+		if (event != event->group_leader) {
+			if (event_sched_in(event, cpuctx, ctx, cpu))
+				can_add_hw = 0;
+		} else {
+			if (group_can_go_on(event, cpuctx, can_add_hw)) {
+				if (group_sched_in(event, cpuctx, ctx, cpu))
+					can_add_hw = 0;
+			}
+		}
+	}
+	perf_enable();
+ out:
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Called from scheduler to add the events of the current task
+ * with interrupts disabled.
+ *
+ * We restore the event value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * keep the event running.
+ */
+void perf_event_task_sched_in(struct task_struct *task, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+
+	if (likely(!ctx))
+		return;
+	if (cpuctx->task_ctx == ctx)
+		return;
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+	cpuctx->task_ctx = ctx;
+}
+
+static void perf_event_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
+{
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	__perf_event_sched_in(ctx, cpuctx, cpu);
+}
+
+#define MAX_INTERRUPTS (~0ULL)
+
+static void perf_log_throttle(struct perf_event *event, int enable);
+
+static void perf_adjust_period(struct perf_event *event, u64 events)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period, sample_period;
+	s64 delta;
+
+	events *= hwc->sample_period;
+	period = div64_u64(events, event->attr.sample_freq);
+
+	delta = (s64)(period - hwc->sample_period);
+	delta = (delta + 7) / 8; /* low pass filter */
+
+	sample_period = hwc->sample_period + delta;
+
+	if (!sample_period)
+		sample_period = 1;
+
+	hwc->sample_period = sample_period;
+}
+
+static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	u64 interrupts, freq;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (event->state != PERF_EVENT_STATE_ACTIVE)
+			continue;
+
+		hwc = &event->hw;
+
+		interrupts = hwc->interrupts;
+		hwc->interrupts = 0;
+
+		/*
+		 * unthrottle events on the tick
+		 */
+		if (interrupts == MAX_INTERRUPTS) {
+			perf_log_throttle(event, 1);
+			event->pmu->unthrottle(event);
+			interrupts = 2*sysctl_perf_event_sample_rate/HZ;
+		}
+
+		if (!event->attr.freq || !event->attr.sample_freq)
+			continue;
+
+		/*
+		 * if the specified freq < HZ then we need to skip ticks
+		 */
+		if (event->attr.sample_freq < HZ) {
+			freq = event->attr.sample_freq;
+
+			hwc->freq_count += freq;
+			hwc->freq_interrupts += interrupts;
+
+			if (hwc->freq_count < HZ)
+				continue;
+
+			interrupts = hwc->freq_interrupts;
+			hwc->freq_interrupts = 0;
+			hwc->freq_count -= HZ;
+		} else
+			freq = HZ;
+
+		perf_adjust_period(event, freq * interrupts);
+
+		/*
+		 * In order to avoid being stalled by an (accidental) huge
+		 * sample period, force reset the sample period if we didn't
+		 * get any events in this freq period.
+		 */
+		if (!interrupts) {
+			perf_disable();
+			event->pmu->disable(event);
+			atomic64_set(&hwc->period_left, 0);
+			event->pmu->enable(event);
+			perf_enable();
+		}
+	}
+	spin_unlock(&ctx->lock);
+}
+
+/*
+ * Round-robin a context's events:
+ */
+static void rotate_ctx(struct perf_event_context *ctx)
+{
+	struct perf_event *event;
+
+	if (!ctx->nr_events)
+		return;
+
+	spin_lock(&ctx->lock);
+	/*
+	 * Rotate the first entry last (works just fine for group events too):
+	 */
+	perf_disable();
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		list_move_tail(&event->group_entry, &ctx->group_list);
+		break;
+	}
+	perf_enable();
+
+	spin_unlock(&ctx->lock);
+}
+
+void perf_event_task_tick(struct task_struct *curr, int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+
+	if (!atomic_read(&nr_events))
+		return;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	ctx = curr->perf_event_ctxp;
+
+	perf_ctx_adjust_freq(&cpuctx->ctx);
+	if (ctx)
+		perf_ctx_adjust_freq(ctx);
+
+	perf_event_cpu_sched_out(cpuctx);
+	if (ctx)
+		__perf_event_task_sched_out(ctx);
+
+	rotate_ctx(&cpuctx->ctx);
+	if (ctx)
+		rotate_ctx(ctx);
+
+	perf_event_cpu_sched_in(cpuctx, cpu);
+	if (ctx)
+		perf_event_task_sched_in(curr, cpu);
+}
+
+/*
+ * Enable all of a task's events that have been marked enable-on-exec.
+ * This expects task == current.
+ */
+static void perf_event_enable_on_exec(struct task_struct *task)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event;
+	unsigned long flags;
+	int enabled = 0;
+
+	local_irq_save(flags);
+	ctx = task->perf_event_ctxp;
+	if (!ctx || !ctx->nr_events)
+		goto out;
+
+	__perf_event_task_sched_out(ctx);
+
+	spin_lock(&ctx->lock);
+
+	list_for_each_entry(event, &ctx->group_list, group_entry) {
+		if (!event->attr.enable_on_exec)
+			continue;
+		event->attr.enable_on_exec = 0;
+		if (event->state >= PERF_EVENT_STATE_INACTIVE)
+			continue;
+		__perf_event_mark_enabled(event, ctx);
+		enabled = 1;
+	}
+
+	/*
+	 * Unclone this context if we enabled any event.
+	 */
+	if (enabled)
+		unclone_ctx(ctx);
+
+	spin_unlock(&ctx->lock);
+
+	perf_event_task_sched_in(task, smp_processor_id());
+ out:
+	local_irq_restore(flags);
+}
+
+/*
+ * Cross CPU call to read the hardware event
+ */
+static void __perf_event_read(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event *event = info;
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long flags;
+
+	/*
+	 * If this is a task context, we need to check whether it is
+	 * the current task context of this cpu.  If not it has been
+	 * scheduled out before the smp call arrived.  In that case
+	 * event->count would have been updated to a recent sample
+	 * when the event was scheduled out.
+	 */
+	if (ctx->task && cpuctx->task_ctx != ctx)
+		return;
+
+	local_irq_save(flags);
+	if (ctx->is_active)
+		update_context_time(ctx);
+	event->pmu->read(event);
+	update_event_times(event);
+	local_irq_restore(flags);
+}
+
+static u64 perf_event_read(struct perf_event *event)
+{
+	/*
+	 * If event is enabled and currently active on a CPU, update the
+	 * value in the event structure:
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE) {
+		smp_call_function_single(event->oncpu,
+					 __perf_event_read, event, 1);
+	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+		update_event_times(event);
+	}
+
+	return atomic64_read(&event->count);
+}
+
+/*
+ * Initialize the perf_event context in a task_struct:
+ */
+static void
+__perf_event_init_context(struct perf_event_context *ctx,
+			    struct task_struct *task)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	spin_lock_init(&ctx->lock);
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->group_list);
+	INIT_LIST_HEAD(&ctx->event_list);
+	atomic_set(&ctx->refcount, 1);
+	ctx->task = task;
+}
+
+static struct perf_event_context *find_get_context(pid_t pid, int cpu)
+{
+	struct perf_event_context *ctx;
+	struct perf_cpu_context *cpuctx;
+	struct task_struct *task;
+	unsigned long flags;
+	int err;
+
+	/*
+	 * If cpu is not a wildcard then this is a percpu event:
+	 */
+	if (cpu != -1) {
+		/* Must be root to operate on a CPU event: */
+		if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+			return ERR_PTR(-EACCES);
+
+		if (cpu < 0 || cpu > num_possible_cpus())
+			return ERR_PTR(-EINVAL);
+
+		/*
+		 * We could be clever and allow to attach a event to an
+		 * offline CPU and activate it when the CPU comes up, but
+		 * that's for later.
+		 */
+		if (!cpu_isset(cpu, cpu_online_map))
+			return ERR_PTR(-ENODEV);
+
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		ctx = &cpuctx->ctx;
+		get_ctx(ctx);
+
+		return ctx;
+	}
+
+	rcu_read_lock();
+	if (!pid)
+		task = current;
+	else
+		task = find_task_by_vpid(pid);
+	if (task)
+		get_task_struct(task);
+	rcu_read_unlock();
+
+	if (!task)
+		return ERR_PTR(-ESRCH);
+
+	/*
+	 * Can't attach events to a dying task.
+	 */
+	err = -ESRCH;
+	if (task->flags & PF_EXITING)
+		goto errout;
+
+	/* Reuse ptrace permission checks for now. */
+	err = -EACCES;
+	if (!ptrace_may_access(task, PTRACE_MODE_READ))
+		goto errout;
+
+ retry:
+	ctx = perf_lock_task_context(task, &flags);
+	if (ctx) {
+		unclone_ctx(ctx);
+		spin_unlock_irqrestore(&ctx->lock, flags);
+	}
+
+	if (!ctx) {
+		ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+		err = -ENOMEM;
+		if (!ctx)
+			goto errout;
+		__perf_event_init_context(ctx, task);
+		get_ctx(ctx);
+		if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
+			/*
+			 * We raced with some other task; use
+			 * the context they set.
+			 */
+			kfree(ctx);
+			goto retry;
+		}
+		get_task_struct(task);
+	}
+
+	put_task_struct(task);
+	return ctx;
+
+ errout:
+	put_task_struct(task);
+	return ERR_PTR(err);
+}
+
+static void free_event_rcu(struct rcu_head *head)
+{
+	struct perf_event *event;
+
+	event = container_of(head, struct perf_event, rcu_head);
+	if (event->ns)
+		put_pid_ns(event->ns);
+	kfree(event);
+}
+
+static void perf_pending_sync(struct perf_event *event);
+
+static void free_event(struct perf_event *event)
+{
+	perf_pending_sync(event);
+
+	if (!event->parent) {
+		atomic_dec(&nr_events);
+		if (event->attr.mmap)
+			atomic_dec(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_dec(&nr_comm_events);
+		if (event->attr.task)
+			atomic_dec(&nr_task_events);
+	}
+
+	if (event->output) {
+		fput(event->output->filp);
+		event->output = NULL;
+	}
+
+	if (event->destroy)
+		event->destroy(event);
+
+	put_ctx(event->ctx);
+	call_rcu(&event->rcu_head, free_event_rcu);
+}
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_event_context *ctx = event->ctx;
+
+	file->private_data = NULL;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_event_remove_from_context(event);
+	mutex_unlock(&ctx->mutex);
+
+	mutex_lock(&event->owner->perf_event_mutex);
+	list_del_init(&event->owner_entry);
+	mutex_unlock(&event->owner->perf_event_mutex);
+	put_task_struct(event->owner);
+
+	free_event(event);
+
+	return 0;
+}
+
+static int perf_event_read_size(struct perf_event *event)
+{
+	int entry = sizeof(u64); /* value */
+	int size = 0;
+	int nr = 1;
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		size += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_ID)
+		entry += sizeof(u64);
+
+	if (event->attr.read_format & PERF_FORMAT_GROUP) {
+		nr += event->group_leader->nr_siblings;
+		size += sizeof(u64);
+	}
+
+	size += entry * nr;
+
+	return size;
+}
+
+static u64 perf_event_read_value(struct perf_event *event)
+{
+	struct perf_event *child;
+	u64 total = 0;
+
+	total += perf_event_read(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		total += perf_event_read(child);
+
+	return total;
+}
+
+static int perf_event_read_entry(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	int n = 0, count = 0;
+	u64 values[2];
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	count = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, count))
+		return -EFAULT;
+
+	return count;
+}
+
+static int perf_event_read_group(struct perf_event *event,
+				   u64 read_format, char __user *buf)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	int n = 0, size = 0, err = -EFAULT;
+	u64 values[3];
+
+	values[n++] = 1 + leader->nr_siblings;
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = leader->total_time_enabled +
+			atomic64_read(&leader->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = leader->total_time_running +
+			atomic64_read(&leader->child_total_time_running);
+	}
+
+	size = n * sizeof(u64);
+
+	if (copy_to_user(buf, values, size))
+		return -EFAULT;
+
+	err = perf_event_read_entry(leader, read_format, buf + size);
+	if (err < 0)
+		return err;
+
+	size += err;
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		err = perf_event_read_entry(sub, read_format,
+				buf + size);
+		if (err < 0)
+			return err;
+
+		size += err;
+	}
+
+	return size;
+}
+
+static int perf_event_read_one(struct perf_event *event,
+				 u64 read_format, char __user *buf)
+{
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = perf_event_read_value(event);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	if (copy_to_user(buf, values, n * sizeof(u64)))
+		return -EFAULT;
+
+	return n * sizeof(u64);
+}
+
+/*
+ * Read the performance event - simple non blocking version for now
+ */
+static ssize_t
+perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
+{
+	u64 read_format = event->attr.read_format;
+	int ret;
+
+	/*
+	 * Return end-of-file for a read on a event that is in
+	 * error state (i.e. because it was pinned but it couldn't be
+	 * scheduled on to the CPU at some point).
+	 */
+	if (event->state == PERF_EVENT_STATE_ERROR)
+		return 0;
+
+	if (count < perf_event_read_size(event))
+		return -ENOSPC;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	if (read_format & PERF_FORMAT_GROUP)
+		ret = perf_event_read_group(event, read_format, buf);
+	else
+		ret = perf_event_read_one(event, read_format, buf);
+	mutex_unlock(&event->child_mutex);
+
+	return ret;
+}
+
+static ssize_t
+perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct perf_event *event = file->private_data;
+
+	return perf_read_hw(event, buf, count);
+}
+
+static unsigned int perf_poll(struct file *file, poll_table *wait)
+{
+	struct perf_event *event = file->private_data;
+	struct perf_mmap_data *data;
+	unsigned int events = POLL_HUP;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (data)
+		events = atomic_xchg(&data->poll, 0);
+	rcu_read_unlock();
+
+	poll_wait(file, &event->waitq, wait);
+
+	return events;
+}
+
+static void perf_event_reset(struct perf_event *event)
+{
+	(void)perf_event_read(event);
+	atomic64_set(&event->count, 0);
+	perf_event_update_userpage(event);
+}
+
+/*
+ * Holding the top-level event's child_mutex means that any
+ * descendant process that has inherited this event will block
+ * in sync_child_event if it goes to exit, thus satisfying the
+ * task existence requirements of perf_event_enable/disable.
+ */
+static void perf_event_for_each_child(struct perf_event *event,
+					void (*func)(struct perf_event *))
+{
+	struct perf_event *child;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->child_mutex);
+	func(event);
+	list_for_each_entry(child, &event->child_list, child_list)
+		func(child);
+	mutex_unlock(&event->child_mutex);
+}
+
+static void perf_event_for_each(struct perf_event *event,
+				  void (*func)(struct perf_event *))
+{
+	struct perf_event_context *ctx = event->ctx;
+	struct perf_event *sibling;
+
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	event = event->group_leader;
+
+	perf_event_for_each_child(event, func);
+	func(event);
+	list_for_each_entry(sibling, &event->sibling_list, group_entry)
+		perf_event_for_each_child(event, func);
+	mutex_unlock(&ctx->mutex);
+}
+
+static int perf_event_period(struct perf_event *event, u64 __user *arg)
+{
+	struct perf_event_context *ctx = event->ctx;
+	unsigned long size;
+	int ret = 0;
+	u64 value;
+
+	if (!event->attr.sample_period)
+		return -EINVAL;
+
+	size = copy_from_user(&value, arg, sizeof(value));
+	if (size != sizeof(value))
+		return -EFAULT;
+
+	if (!value)
+		return -EINVAL;
+
+	spin_lock_irq(&ctx->lock);
+	if (event->attr.freq) {
+		if (value > sysctl_perf_event_sample_rate) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+
+		event->attr.sample_freq = value;
+	} else {
+		event->attr.sample_period = value;
+		event->hw.sample_period = value;
+	}
+unlock:
+	spin_unlock_irq(&ctx->lock);
+
+	return ret;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd);
+
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct perf_event *event = file->private_data;
+	void (*func)(struct perf_event *);
+	u32 flags = arg;
+
+	switch (cmd) {
+	case PERF_EVENT_IOC_ENABLE:
+		func = perf_event_enable;
+		break;
+	case PERF_EVENT_IOC_DISABLE:
+		func = perf_event_disable;
+		break;
+	case PERF_EVENT_IOC_RESET:
+		func = perf_event_reset;
+		break;
+
+	case PERF_EVENT_IOC_REFRESH:
+		return perf_event_refresh(event, arg);
+
+	case PERF_EVENT_IOC_PERIOD:
+		return perf_event_period(event, (u64 __user *)arg);
+
+	case PERF_EVENT_IOC_SET_OUTPUT:
+		return perf_event_set_output(event, arg);
+
+	default:
+		return -ENOTTY;
+	}
+
+	if (flags & PERF_IOC_FLAG_GROUP)
+		perf_event_for_each(event, func);
+	else
+		perf_event_for_each_child(event, func);
+
+	return 0;
+}
+
+int perf_event_task_enable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_enable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+int perf_event_task_disable(void)
+{
+	struct perf_event *event;
+
+	mutex_lock(&current->perf_event_mutex);
+	list_for_each_entry(event, &current->perf_event_list, owner_entry)
+		perf_event_for_each_child(event, perf_event_disable);
+	mutex_unlock(&current->perf_event_mutex);
+
+	return 0;
+}
+
+#ifndef PERF_EVENT_INDEX_OFFSET
+# define PERF_EVENT_INDEX_OFFSET 0
+#endif
+
+static int perf_event_index(struct perf_event *event)
+{
+	if (event->state != PERF_EVENT_STATE_ACTIVE)
+		return 0;
+
+	return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+}
+
+/*
+ * Callers need to ensure there can be no nesting of this function, otherwise
+ * the seqlock logic goes bad. We can not serialize this because the arch
+ * code calls this from NMI context.
+ */
+void perf_event_update_userpage(struct perf_event *event)
+{
+	struct perf_event_mmap_page *userpg;
+	struct perf_mmap_data *data;
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	userpg = data->user_page;
+
+	/*
+	 * Disable preemption so as to not let the corresponding user-space
+	 * spin too long if we get preempted.
+	 */
+	preempt_disable();
+	++userpg->lock;
+	barrier();
+	userpg->index = perf_event_index(event);
+	userpg->offset = atomic64_read(&event->count);
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		userpg->offset -= atomic64_read(&event->hw.prev_count);
+
+	userpg->time_enabled = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+
+	userpg->time_running = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+
+	barrier();
+	++userpg->lock;
+	preempt_enable();
+unlock:
+	rcu_read_unlock();
+}
+
+static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+	struct perf_mmap_data *data;
+	int ret = VM_FAULT_SIGBUS;
+
+	if (vmf->flags & FAULT_FLAG_MKWRITE) {
+		if (vmf->pgoff == 0)
+			ret = 0;
+		return ret;
+	}
+
+	rcu_read_lock();
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto unlock;
+
+	if (vmf->pgoff == 0) {
+		vmf->page = virt_to_page(data->user_page);
+	} else {
+		int nr = vmf->pgoff - 1;
+
+		if ((unsigned)nr > data->nr_pages)
+			goto unlock;
+
+		if (vmf->flags & FAULT_FLAG_WRITE)
+			goto unlock;
+
+		vmf->page = virt_to_page(data->data_pages[nr]);
+	}
+
+	get_page(vmf->page);
+	vmf->page->mapping = vma->vm_file->f_mapping;
+	vmf->page->index   = vmf->pgoff;
+
+	ret = 0;
+unlock:
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+{
+	struct perf_mmap_data *data;
+	unsigned long size;
+	int i;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	size = sizeof(struct perf_mmap_data);
+	size += nr_pages * sizeof(void *);
+
+	data = kzalloc(size, GFP_KERNEL);
+	if (!data)
+		goto fail;
+
+	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!data->user_page)
+		goto fail_user_page;
+
+	for (i = 0; i < nr_pages; i++) {
+		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+		if (!data->data_pages[i])
+			goto fail_data_pages;
+	}
+
+	data->nr_pages = nr_pages;
+	atomic_set(&data->lock, -1);
+
+	if (event->attr.watermark) {
+		data->watermark = min_t(long, PAGE_SIZE * nr_pages,
+				      event->attr.wakeup_watermark);
+	}
+	if (!data->watermark)
+		data->watermark = max(PAGE_SIZE, PAGE_SIZE * nr_pages / 4);
+
+	rcu_assign_pointer(event->data, data);
+
+	return 0;
+
+fail_data_pages:
+	for (i--; i >= 0; i--)
+		free_page((unsigned long)data->data_pages[i]);
+
+	free_page((unsigned long)data->user_page);
+
+fail_user_page:
+	kfree(data);
+
+fail:
+	return -ENOMEM;
+}
+
+static void perf_mmap_free_page(unsigned long addr)
+{
+	struct page *page = virt_to_page((void *)addr);
+
+	page->mapping = NULL;
+	__free_page(page);
+}
+
+static void __perf_mmap_data_free(struct rcu_head *rcu_head)
+{
+	struct perf_mmap_data *data;
+	int i;
+
+	data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
+
+	perf_mmap_free_page((unsigned long)data->user_page);
+	for (i = 0; i < data->nr_pages; i++)
+		perf_mmap_free_page((unsigned long)data->data_pages[i]);
+
+	kfree(data);
+}
+
+static void perf_mmap_data_free(struct perf_event *event)
+{
+	struct perf_mmap_data *data = event->data;
+
+	WARN_ON(atomic_read(&event->mmap_count));
+
+	rcu_assign_pointer(event->data, NULL);
+	call_rcu(&data->rcu_head, __perf_mmap_data_free);
+}
+
+static void perf_mmap_open(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	atomic_inc(&event->mmap_count);
+}
+
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(event->data->nr_pages + 1, &user->locked_vm);
+		vma->vm_mm->locked_vm -= event->data->nr_locked;
+		perf_mmap_data_free(event);
+		mutex_unlock(&event->mmap_mutex);
+	}
+}
+
+static struct vm_operations_struct perf_mmap_vmops = {
+	.open		= perf_mmap_open,
+	.close		= perf_mmap_close,
+	.fault		= perf_mmap_fault,
+	.page_mkwrite	= perf_mmap_fault,
+};
+
+static int perf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct perf_event *event = file->private_data;
+	unsigned long user_locked, user_lock_limit;
+	struct user_struct *user = current_user();
+	unsigned long locked, lock_limit;
+	unsigned long vma_size;
+	unsigned long nr_pages;
+	long user_extra, extra;
+	int ret = 0;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+
+	vma_size = vma->vm_end - vma->vm_start;
+	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	/*
+	 * If we have data pages ensure they're a power-of-two number, so we
+	 * can do bitmasks instead of modulo.
+	 */
+	if (nr_pages != 0 && !is_power_of_2(nr_pages))
+		return -EINVAL;
+
+	if (vma_size != PAGE_SIZE * (1 + nr_pages))
+		return -EINVAL;
+
+	if (vma->vm_pgoff != 0)
+		return -EINVAL;
+
+	WARN_ON_ONCE(event->ctx->parent_ctx);
+	mutex_lock(&event->mmap_mutex);
+	if (event->output) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	if (atomic_inc_not_zero(&event->mmap_count)) {
+		if (nr_pages != event->data->nr_pages)
+			ret = -EINVAL;
+		goto unlock;
+	}
+
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
+
+	/*
+	 * Increase the limit linearly with more CPUs:
+	 */
+	user_lock_limit *= num_online_cpus();
+
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
+
+	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
+	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
+
+	if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
+		!capable(CAP_IPC_LOCK)) {
+		ret = -EPERM;
+		goto unlock;
+	}
+
+	WARN_ON(event->data);
+	ret = perf_mmap_data_alloc(event, nr_pages);
+	if (ret)
+		goto unlock;
+
+	atomic_set(&event->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
+	vma->vm_mm->locked_vm += extra;
+	event->data->nr_locked = extra;
+	if (vma->vm_flags & VM_WRITE)
+		event->data->writable = 1;
+
+unlock:
+	mutex_unlock(&event->mmap_mutex);
+
+	vma->vm_flags |= VM_RESERVED;
+	vma->vm_ops = &perf_mmap_vmops;
+
+	return ret;
+}
+
+static int perf_fasync(int fd, struct file *filp, int on)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	struct perf_event *event = filp->private_data;
+	int retval;
+
+	mutex_lock(&inode->i_mutex);
+	retval = fasync_helper(fd, filp, on, &event->fasync);
+	mutex_unlock(&inode->i_mutex);
+
+	if (retval < 0)
+		return retval;
+
+	return 0;
+}
+
+static const struct file_operations perf_fops = {
+	.release		= perf_release,
+	.read			= perf_read,
+	.poll			= perf_poll,
+	.unlocked_ioctl		= perf_ioctl,
+	.compat_ioctl		= perf_ioctl,
+	.mmap			= perf_mmap,
+	.fasync			= perf_fasync,
+};
+
+/*
+ * Perf event wakeup
+ *
+ * If there's data, ensure we set the poll() state and publish everything
+ * to user-space before waking everybody up.
+ */
+
+void perf_event_wakeup(struct perf_event *event)
+{
+	wake_up_all(&event->waitq);
+
+	if (event->pending_kill) {
+		kill_fasync(&event->fasync, SIGIO, event->pending_kill);
+		event->pending_kill = 0;
+	}
+}
+
+/*
+ * Pending wakeups
+ *
+ * Handle the case where we need to wakeup up from NMI (or rq->lock) context.
+ *
+ * The NMI bit means we cannot possibly take locks. Therefore, maintain a
+ * single linked list and use cmpxchg() to add entries lockless.
+ */
+
+static void perf_pending_event(struct perf_pending_entry *entry)
+{
+	struct perf_event *event = container_of(entry,
+			struct perf_event, pending);
+
+	if (event->pending_disable) {
+		event->pending_disable = 0;
+		__perf_event_disable(event);
+	}
+
+	if (event->pending_wakeup) {
+		event->pending_wakeup = 0;
+		perf_event_wakeup(event);
+	}
+}
+
+#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
+
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
+	PENDING_TAIL,
+};
+
+static void perf_pending_queue(struct perf_pending_entry *entry,
+			       void (*func)(struct perf_pending_entry *))
+{
+	struct perf_pending_entry **head;
+
+	if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
+		return;
+
+	entry->func = func;
+
+	head = &get_cpu_var(perf_pending_head);
+
+	do {
+		entry->next = *head;
+	} while (cmpxchg(head, entry->next, entry) != entry->next);
+
+	set_perf_event_pending();
+
+	put_cpu_var(perf_pending_head);
+}
+
+static int __perf_pending_run(void)
+{
+	struct perf_pending_entry *list;
+	int nr = 0;
+
+	list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+	while (list != PENDING_TAIL) {
+		void (*func)(struct perf_pending_entry *);
+		struct perf_pending_entry *entry = list;
+
+		list = list->next;
+
+		func = entry->func;
+		entry->next = NULL;
+		/*
+		 * Ensure we observe the unqueue before we issue the wakeup,
+		 * so that we won't be waiting forever.
+		 * -- see perf_not_pending().
+		 */
+		smp_wmb();
+
+		func(entry);
+		nr++;
+	}
+
+	return nr;
+}
+
+static inline int perf_not_pending(struct perf_event *event)
+{
+	/*
+	 * If we flush on whatever cpu we run, there is a chance we don't
+	 * need to wait.
+	 */
+	get_cpu();
+	__perf_pending_run();
+	put_cpu();
+
+	/*
+	 * Ensure we see the proper queue state before going to sleep
+	 * so that we do not miss the wakeup. -- see perf_pending_handle()
+	 */
+	smp_rmb();
+	return event->pending.next == NULL;
+}
+
+static void perf_pending_sync(struct perf_event *event)
+{
+	wait_event(event->waitq, perf_not_pending(event));
+}
+
+void perf_event_do_pending(void)
+{
+	__perf_pending_run();
+}
+
+/*
+ * Callchain support -- arch specific
+ */
+
+__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+	return NULL;
+}
+
+/*
+ * Output
+ */
+static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+			      unsigned long offset, unsigned long head)
+{
+	unsigned long mask;
+
+	if (!data->writable)
+		return true;
+
+	mask = (data->nr_pages << PAGE_SHIFT) - 1;
+
+	offset = (offset - tail) & mask;
+	head   = (head   - tail) & mask;
+
+	if ((int)(head - offset) < 0)
+		return false;
+
+	return true;
+}
+
+static void perf_output_wakeup(struct perf_output_handle *handle)
+{
+	atomic_set(&handle->data->poll, POLL_IN);
+
+	if (handle->nmi) {
+		handle->event->pending_wakeup = 1;
+		perf_pending_queue(&handle->event->pending,
+				   perf_pending_event);
+	} else
+		perf_event_wakeup(handle->event);
+}
+
+/*
+ * Curious locking construct.
+ *
+ * We need to ensure a later event_id doesn't publish a head when a former
+ * event_id isn't done writing. However since we need to deal with NMIs we
+ * cannot fully serialize things.
+ *
+ * What we do is serialize between CPUs so we only have to deal with NMI
+ * nesting on a single CPU.
+ *
+ * We only publish the head (and generate a wakeup) when the outer-most
+ * event_id completes.
+ */
+static void perf_output_lock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	int cpu;
+
+	handle->locked = 0;
+
+	local_irq_save(handle->flags);
+	cpu = smp_processor_id();
+
+	if (in_nmi() && atomic_read(&data->lock) == cpu)
+		return;
+
+	while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+		cpu_relax();
+
+	handle->locked = 1;
+}
+
+static void perf_output_unlock(struct perf_output_handle *handle)
+{
+	struct perf_mmap_data *data = handle->data;
+	unsigned long head;
+	int cpu;
+
+	data->done_head = data->head;
+
+	if (!handle->locked)
+		goto out;
+
+again:
+	/*
+	 * The xchg implies a full barrier that ensures all writes are done
+	 * before we publish the new head, matched by a rmb() in userspace when
+	 * reading this position.
+	 */
+	while ((head = atomic_long_xchg(&data->done_head, 0)))
+		data->user_page->data_head = head;
+
+	/*
+	 * NMI can happen here, which means we can miss a done_head update.
+	 */
+
+	cpu = atomic_xchg(&data->lock, -1);
+	WARN_ON_ONCE(cpu != smp_processor_id());
+
+	/*
+	 * Therefore we have to validate we did not indeed do so.
+	 */
+	if (unlikely(atomic_long_read(&data->done_head))) {
+		/*
+		 * Since we had it locked, we can lock it again.
+		 */
+		while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
+			cpu_relax();
+
+		goto again;
+	}
+
+	if (atomic_xchg(&data->wakeup, 0))
+		perf_output_wakeup(handle);
+out:
+	local_irq_restore(handle->flags);
+}
+
+void perf_output_copy(struct perf_output_handle *handle,
+		      const void *buf, unsigned int len)
+{
+	unsigned int pages_mask;
+	unsigned int offset;
+	unsigned int size;
+	void **pages;
+
+	offset		= handle->offset;
+	pages_mask	= handle->data->nr_pages - 1;
+	pages		= handle->data->data_pages;
+
+	do {
+		unsigned int page_offset;
+		int nr;
+
+		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
+		page_offset = offset & (PAGE_SIZE - 1);
+		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
+
+		memcpy(pages[nr] + page_offset, buf, size);
+
+		len	    -= size;
+		buf	    += size;
+		offset	    += size;
+	} while (len);
+
+	handle->offset = offset;
+
+	/*
+	 * Check we didn't copy past our reservation window, taking the
+	 * possible unsigned int wrap into account.
+	 */
+	WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
+}
+
+int perf_output_begin(struct perf_output_handle *handle,
+		      struct perf_event *event, unsigned int size,
+		      int nmi, int sample)
+{
+	struct perf_event *output_event;
+	struct perf_mmap_data *data;
+	unsigned long tail, offset, head;
+	int have_lost;
+	struct {
+		struct perf_event_header header;
+		u64			 id;
+		u64			 lost;
+	} lost_event;
+
+	rcu_read_lock();
+	/*
+	 * For inherited events we send all the output towards the parent.
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	output_event = rcu_dereference(event->output);
+	if (output_event)
+		event = output_event;
+
+	data = rcu_dereference(event->data);
+	if (!data)
+		goto out;
+
+	handle->data	= data;
+	handle->event	= event;
+	handle->nmi	= nmi;
+	handle->sample	= sample;
+
+	if (!data->nr_pages)
+		goto fail;
+
+	have_lost = atomic_read(&data->lost);
+	if (have_lost)
+		size += sizeof(lost_event);
+
+	perf_output_lock(handle);
+
+	do {
+		/*
+		 * Userspace could choose to issue a mb() before updating the
+		 * tail pointer. So that all reads will be completed before the
+		 * write is issued.
+		 */
+		tail = ACCESS_ONCE(data->user_page->data_tail);
+		smp_rmb();
+		offset = head = atomic_long_read(&data->head);
+		head += size;
+		if (unlikely(!perf_output_space(data, tail, offset, head)))
+			goto fail;
+	} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
+
+	handle->offset	= offset;
+	handle->head	= head;
+
+	if (head - tail > data->watermark)
+		atomic_set(&data->wakeup, 1);
+
+	if (have_lost) {
+		lost_event.header.type = PERF_RECORD_LOST;
+		lost_event.header.misc = 0;
+		lost_event.header.size = sizeof(lost_event);
+		lost_event.id          = event->id;
+		lost_event.lost        = atomic_xchg(&data->lost, 0);
+
+		perf_output_put(handle, lost_event);
+	}
+
+	return 0;
+
+fail:
+	atomic_inc(&data->lost);
+	perf_output_unlock(handle);
+out:
+	rcu_read_unlock();
+
+	return -ENOSPC;
+}
+
+void perf_output_end(struct perf_output_handle *handle)
+{
+	struct perf_event *event = handle->event;
+	struct perf_mmap_data *data = handle->data;
+
+	int wakeup_events = event->attr.wakeup_events;
+
+	if (handle->sample && wakeup_events) {
+		int events = atomic_inc_return(&data->events);
+		if (events >= wakeup_events) {
+			atomic_sub(wakeup_events, &data->events);
+			atomic_set(&data->wakeup, 1);
+		}
+	}
+
+	perf_output_unlock(handle);
+	rcu_read_unlock();
+}
+
+static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_tgid_nr_ns(p, event->ns);
+}
+
+static u32 perf_event_tid(struct perf_event *event, struct task_struct *p)
+{
+	/*
+	 * only top level events have the pid namespace they were created in
+	 */
+	if (event->parent)
+		event = event->parent;
+
+	return task_pid_nr_ns(p, event->ns);
+}
+
+static void perf_output_read_one(struct perf_output_handle *handle,
+				 struct perf_event *event)
+{
+	u64 read_format = event->attr.read_format;
+	u64 values[4];
+	int n = 0;
+
+	values[n++] = atomic64_read(&event->count);
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+		values[n++] = event->total_time_enabled +
+			atomic64_read(&event->child_total_time_enabled);
+	}
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+		values[n++] = event->total_time_running +
+			atomic64_read(&event->child_total_time_running);
+	}
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(event);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+}
+
+/*
+ * XXX PERF_FORMAT_GROUP vs inherited events seems difficult.
+ */
+static void perf_output_read_group(struct perf_output_handle *handle,
+			    struct perf_event *event)
+{
+	struct perf_event *leader = event->group_leader, *sub;
+	u64 read_format = event->attr.read_format;
+	u64 values[5];
+	int n = 0;
+
+	values[n++] = 1 + leader->nr_siblings;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+		values[n++] = leader->total_time_enabled;
+
+	if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+		values[n++] = leader->total_time_running;
+
+	if (leader != event)
+		leader->pmu->read(leader);
+
+	values[n++] = atomic64_read(&leader->count);
+	if (read_format & PERF_FORMAT_ID)
+		values[n++] = primary_event_id(leader);
+
+	perf_output_copy(handle, values, n * sizeof(u64));
+
+	list_for_each_entry(sub, &leader->sibling_list, group_entry) {
+		n = 0;
+
+		if (sub != event)
+			sub->pmu->read(sub);
+
+		values[n++] = atomic64_read(&sub->count);
+		if (read_format & PERF_FORMAT_ID)
+			values[n++] = primary_event_id(sub);
+
+		perf_output_copy(handle, values, n * sizeof(u64));
+	}
+}
+
+static void perf_output_read(struct perf_output_handle *handle,
+			     struct perf_event *event)
+{
+	if (event->attr.read_format & PERF_FORMAT_GROUP)
+		perf_output_read_group(handle, event);
+	else
+		perf_output_read_one(handle, event);
+}
+
+void perf_output_sample(struct perf_output_handle *handle,
+			struct perf_event_header *header,
+			struct perf_sample_data *data,
+			struct perf_event *event)
+{
+	u64 sample_type = data->type;
+
+	perf_output_put(handle, *header);
+
+	if (sample_type & PERF_SAMPLE_IP)
+		perf_output_put(handle, data->ip);
+
+	if (sample_type & PERF_SAMPLE_TID)
+		perf_output_put(handle, data->tid_entry);
+
+	if (sample_type & PERF_SAMPLE_TIME)
+		perf_output_put(handle, data->time);
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		perf_output_put(handle, data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID)
+		perf_output_put(handle, data->id);
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID)
+		perf_output_put(handle, data->stream_id);
+
+	if (sample_type & PERF_SAMPLE_CPU)
+		perf_output_put(handle, data->cpu_entry);
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		perf_output_put(handle, data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		perf_output_read(handle, event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		if (data->callchain) {
+			int size = 1;
+
+			if (data->callchain)
+				size += data->callchain->nr;
+
+			size *= sizeof(u64);
+
+			perf_output_copy(handle, data->callchain, size);
+		} else {
+			u64 nr = 0;
+			perf_output_put(handle, nr);
+		}
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		if (data->raw) {
+			perf_output_put(handle, data->raw->size);
+			perf_output_copy(handle, data->raw->data,
+					 data->raw->size);
+		} else {
+			struct {
+				u32	size;
+				u32	data;
+			} raw = {
+				.size = sizeof(u32),
+				.data = 0,
+			};
+			perf_output_put(handle, raw);
+		}
+	}
+}
+
+void perf_prepare_sample(struct perf_event_header *header,
+			 struct perf_sample_data *data,
+			 struct perf_event *event,
+			 struct pt_regs *regs)
+{
+	u64 sample_type = event->attr.sample_type;
+
+	data->type = sample_type;
+
+	header->type = PERF_RECORD_SAMPLE;
+	header->size = sizeof(*header);
+
+	header->misc = 0;
+	header->misc |= perf_misc_flags(regs);
+
+	if (sample_type & PERF_SAMPLE_IP) {
+		data->ip = perf_instruction_pointer(regs);
+
+		header->size += sizeof(data->ip);
+	}
+
+	if (sample_type & PERF_SAMPLE_TID) {
+		/* namespace issues */
+		data->tid_entry.pid = perf_event_pid(event, current);
+		data->tid_entry.tid = perf_event_tid(event, current);
+
+		header->size += sizeof(data->tid_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_TIME) {
+		data->time = perf_clock();
+
+		header->size += sizeof(data->time);
+	}
+
+	if (sample_type & PERF_SAMPLE_ADDR)
+		header->size += sizeof(data->addr);
+
+	if (sample_type & PERF_SAMPLE_ID) {
+		data->id = primary_event_id(event);
+
+		header->size += sizeof(data->id);
+	}
+
+	if (sample_type & PERF_SAMPLE_STREAM_ID) {
+		data->stream_id = event->id;
+
+		header->size += sizeof(data->stream_id);
+	}
+
+	if (sample_type & PERF_SAMPLE_CPU) {
+		data->cpu_entry.cpu		= raw_smp_processor_id();
+		data->cpu_entry.reserved	= 0;
+
+		header->size += sizeof(data->cpu_entry);
+	}
+
+	if (sample_type & PERF_SAMPLE_PERIOD)
+		header->size += sizeof(data->period);
+
+	if (sample_type & PERF_SAMPLE_READ)
+		header->size += perf_event_read_size(event);
+
+	if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+		int size = 1;
+
+		data->callchain = perf_callchain(regs);
+
+		if (data->callchain)
+			size += data->callchain->nr;
+
+		header->size += size * sizeof(u64);
+	}
+
+	if (sample_type & PERF_SAMPLE_RAW) {
+		int size = sizeof(u32);
+
+		if (data->raw)
+			size += data->raw->size;
+		else
+			size += sizeof(u32);
+
+		WARN_ON_ONCE(size & (sizeof(u64)-1));
+		header->size += size;
+	}
+}
+
+static void perf_event_output(struct perf_event *event, int nmi,
+				struct perf_sample_data *data,
+				struct pt_regs *regs)
+{
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+
+	perf_prepare_sample(&header, data, event, regs);
+
+	if (perf_output_begin(&handle, event, header.size, nmi, 1))
+		return;
+
+	perf_output_sample(&handle, &header, data, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * read event_id
+ */
+
+struct perf_read_event {
+	struct perf_event_header	header;
+
+	u32				pid;
+	u32				tid;
+};
+
+static void
+perf_event_read_event(struct perf_event *event,
+			struct task_struct *task)
+{
+	struct perf_output_handle handle;
+	struct perf_read_event read_event = {
+		.header = {
+			.type = PERF_RECORD_READ,
+			.misc = 0,
+			.size = sizeof(read_event) + perf_event_read_size(event),
+		},
+		.pid = perf_event_pid(event, task),
+		.tid = perf_event_tid(event, task),
+	};
+	int ret;
+
+	ret = perf_output_begin(&handle, event, read_event.header.size, 0, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, read_event);
+	perf_output_read(&handle, event);
+
+	perf_output_end(&handle);
+}
+
+/*
+ * task tracking -- fork/exit
+ *
+ * enabled by: attr.comm | attr.mmap | attr.task
+ */
+
+struct perf_task_event {
+	struct task_struct		*task;
+	struct perf_event_context	*task_ctx;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				ppid;
+		u32				tid;
+		u32				ptid;
+		u64				time;
+	} event_id;
+};
+
+static void perf_event_task_output(struct perf_event *event,
+				     struct perf_task_event *task_event)
+{
+	struct perf_output_handle handle;
+	int size;
+	struct task_struct *task = task_event->task;
+	int ret;
+
+	size  = task_event->event_id.header.size;
+	ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	task_event->event_id.pid = perf_event_pid(event, task);
+	task_event->event_id.ppid = perf_event_pid(event, current);
+
+	task_event->event_id.tid = perf_event_tid(event, task);
+	task_event->event_id.ptid = perf_event_tid(event, current);
+
+	task_event->event_id.time = perf_clock();
+
+	perf_output_put(&handle, task_event->event_id);
+
+	perf_output_end(&handle);
+}
+
+static int perf_event_task_match(struct perf_event *event)
+{
+	if (event->attr.comm || event->attr.mmap || event->attr.task)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_task_ctx(struct perf_event_context *ctx,
+				  struct perf_task_event *task_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_task_match(event))
+			perf_event_task_output(event, task_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_task_event(struct perf_task_event *task_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx = task_event->task_ctx;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_task_ctx(&cpuctx->ctx, task_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	if (!ctx)
+		ctx = rcu_dereference(task_event->task->perf_event_ctxp);
+	if (ctx)
+		perf_event_task_ctx(ctx, task_event);
+	rcu_read_unlock();
+}
+
+static void perf_event_task(struct task_struct *task,
+			      struct perf_event_context *task_ctx,
+			      int new)
+{
+	struct perf_task_event task_event;
+
+	if (!atomic_read(&nr_comm_events) &&
+	    !atomic_read(&nr_mmap_events) &&
+	    !atomic_read(&nr_task_events))
+		return;
+
+	task_event = (struct perf_task_event){
+		.task	  = task,
+		.task_ctx = task_ctx,
+		.event_id    = {
+			.header = {
+				.type = new ? PERF_RECORD_FORK : PERF_RECORD_EXIT,
+				.misc = 0,
+				.size = sizeof(task_event.event_id),
+			},
+			/* .pid  */
+			/* .ppid */
+			/* .tid  */
+			/* .ptid */
+		},
+	};
+
+	perf_event_task_event(&task_event);
+}
+
+void perf_event_fork(struct task_struct *task)
+{
+	perf_event_task(task, NULL, 1);
+}
+
+/*
+ * comm tracking
+ */
+
+struct perf_comm_event {
+	struct task_struct	*task;
+	char			*comm;
+	int			comm_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+	} event_id;
+};
+
+static void perf_event_comm_output(struct perf_event *event,
+				     struct perf_comm_event *comm_event)
+{
+	struct perf_output_handle handle;
+	int size = comm_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	comm_event->event_id.pid = perf_event_pid(event, comm_event->task);
+	comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
+
+	perf_output_put(&handle, comm_event->event_id);
+	perf_output_copy(&handle, comm_event->comm,
+				   comm_event->comm_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_comm_match(struct perf_event *event)
+{
+	if (event->attr.comm)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_comm_ctx(struct perf_event_context *ctx,
+				  struct perf_comm_event *comm_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_comm_match(event))
+			perf_event_comm_output(event, comm_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_comm_event(struct perf_comm_event *comm_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	unsigned int size;
+	char comm[TASK_COMM_LEN];
+
+	memset(comm, 0, sizeof(comm));
+	strncpy(comm, comm_event->task->comm, sizeof(comm));
+	size = ALIGN(strlen(comm)+1, sizeof(u64));
+
+	comm_event->comm = comm;
+	comm_event->comm_size = size;
+
+	comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_comm_ctx(&cpuctx->ctx, comm_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_comm_ctx(ctx, comm_event);
+	rcu_read_unlock();
+}
+
+void perf_event_comm(struct task_struct *task)
+{
+	struct perf_comm_event comm_event;
+
+	if (task->perf_event_ctxp)
+		perf_event_enable_on_exec(task);
+
+	if (!atomic_read(&nr_comm_events))
+		return;
+
+	comm_event = (struct perf_comm_event){
+		.task	= task,
+		/* .comm      */
+		/* .comm_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_COMM,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+		},
+	};
+
+	perf_event_comm_event(&comm_event);
+}
+
+/*
+ * mmap tracking
+ */
+
+struct perf_mmap_event {
+	struct vm_area_struct	*vma;
+
+	const char		*file_name;
+	int			file_size;
+
+	struct {
+		struct perf_event_header	header;
+
+		u32				pid;
+		u32				tid;
+		u64				start;
+		u64				len;
+		u64				pgoff;
+	} event_id;
+};
+
+static void perf_event_mmap_output(struct perf_event *event,
+				     struct perf_mmap_event *mmap_event)
+{
+	struct perf_output_handle handle;
+	int size = mmap_event->event_id.header.size;
+	int ret = perf_output_begin(&handle, event, size, 0, 0);
+
+	if (ret)
+		return;
+
+	mmap_event->event_id.pid = perf_event_pid(event, current);
+	mmap_event->event_id.tid = perf_event_tid(event, current);
+
+	perf_output_put(&handle, mmap_event->event_id);
+	perf_output_copy(&handle, mmap_event->file_name,
+				   mmap_event->file_size);
+	perf_output_end(&handle);
+}
+
+static int perf_event_mmap_match(struct perf_event *event,
+				   struct perf_mmap_event *mmap_event)
+{
+	if (event->attr.mmap)
+		return 1;
+
+	return 0;
+}
+
+static void perf_event_mmap_ctx(struct perf_event_context *ctx,
+				  struct perf_mmap_event *mmap_event)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_event_mmap_match(event, mmap_event))
+			perf_event_mmap_output(event, mmap_event);
+	}
+	rcu_read_unlock();
+}
+
+static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
+{
+	struct perf_cpu_context *cpuctx;
+	struct perf_event_context *ctx;
+	struct vm_area_struct *vma = mmap_event->vma;
+	struct file *file = vma->vm_file;
+	unsigned int size;
+	char tmp[16];
+	char *buf = NULL;
+	const char *name;
+
+	memset(tmp, 0, sizeof(tmp));
+
+	if (file) {
+		/*
+		 * d_path works from the end of the buffer backwards, so we
+		 * need to add enough zero bytes after the string to handle
+		 * the 64bit alignment we do later.
+		 */
+		buf = kzalloc(PATH_MAX + sizeof(u64), GFP_KERNEL);
+		if (!buf) {
+			name = strncpy(tmp, "//enomem", sizeof(tmp));
+			goto got_name;
+		}
+		name = d_path(&file->f_path, buf, PATH_MAX);
+		if (IS_ERR(name)) {
+			name = strncpy(tmp, "//toolong", sizeof(tmp));
+			goto got_name;
+		}
+	} else {
+		if (arch_vma_name(mmap_event->vma)) {
+			name = strncpy(tmp, arch_vma_name(mmap_event->vma),
+				       sizeof(tmp));
+			goto got_name;
+		}
+
+		if (!vma->vm_mm) {
+			name = strncpy(tmp, "[vdso]", sizeof(tmp));
+			goto got_name;
+		}
+
+		name = strncpy(tmp, "//anon", sizeof(tmp));
+		goto got_name;
+	}
+
+got_name:
+	size = ALIGN(strlen(name)+1, sizeof(u64));
+
+	mmap_event->file_name = name;
+	mmap_event->file_size = size;
+
+	mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
+
+	cpuctx = &get_cpu_var(perf_cpu_context);
+	perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+	put_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_event_mmap_ctx(ctx, mmap_event);
+	rcu_read_unlock();
+
+	kfree(buf);
+}
+
+void __perf_event_mmap(struct vm_area_struct *vma)
+{
+	struct perf_mmap_event mmap_event;
+
+	if (!atomic_read(&nr_mmap_events))
+		return;
+
+	mmap_event = (struct perf_mmap_event){
+		.vma	= vma,
+		/* .file_name */
+		/* .file_size */
+		.event_id  = {
+			.header = {
+				.type = PERF_RECORD_MMAP,
+				.misc = 0,
+				/* .size */
+			},
+			/* .pid */
+			/* .tid */
+			.start  = vma->vm_start,
+			.len    = vma->vm_end - vma->vm_start,
+			.pgoff  = vma->vm_pgoff,
+		},
+	};
+
+	perf_event_mmap_event(&mmap_event);
+}
+
+/*
+ * IRQ throttle logging
+ */
+
+static void perf_log_throttle(struct perf_event *event, int enable)
+{
+	struct perf_output_handle handle;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				time;
+		u64				id;
+		u64				stream_id;
+	} throttle_event = {
+		.header = {
+			.type = PERF_RECORD_THROTTLE,
+			.misc = 0,
+			.size = sizeof(throttle_event),
+		},
+		.time		= perf_clock(),
+		.id		= primary_event_id(event),
+		.stream_id	= event->id,
+	};
+
+	if (enable)
+		throttle_event.header.type = PERF_RECORD_UNTHROTTLE;
+
+	ret = perf_output_begin(&handle, event, sizeof(throttle_event), 1, 0);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, throttle_event);
+	perf_output_end(&handle);
+}
+
+/*
+ * Generic event overflow handling, sampling.
+ */
+
+static int __perf_event_overflow(struct perf_event *event, int nmi,
+				   int throttle, struct perf_sample_data *data,
+				   struct pt_regs *regs)
+{
+	int events = atomic_read(&event->event_limit);
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = 0;
+
+	throttle = (throttle && event->pmu->unthrottle != NULL);
+
+	if (!throttle) {
+		hwc->interrupts++;
+	} else {
+		if (hwc->interrupts != MAX_INTERRUPTS) {
+			hwc->interrupts++;
+			if (HZ * hwc->interrupts >
+					(u64)sysctl_perf_event_sample_rate) {
+				hwc->interrupts = MAX_INTERRUPTS;
+				perf_log_throttle(event, 0);
+				ret = 1;
+			}
+		} else {
+			/*
+			 * Keep re-disabling events even though on the previous
+			 * pass we disabled it - just in case we raced with a
+			 * sched-in and the event got enabled again:
+			 */
+			ret = 1;
+		}
+	}
+
+	if (event->attr.freq) {
+		u64 now = perf_clock();
+		s64 delta = now - hwc->freq_stamp;
+
+		hwc->freq_stamp = now;
+
+		if (delta > 0 && delta < TICK_NSEC)
+			perf_adjust_period(event, NSEC_PER_SEC / (int)delta);
+	}
+
+	/*
+	 * XXX event_limit might not quite work as expected on inherited
+	 * events
+	 */
+
+	event->pending_kill = POLL_IN;
+	if (events && atomic_dec_and_test(&event->event_limit)) {
+		ret = 1;
+		event->pending_kill = POLL_HUP;
+		if (nmi) {
+			event->pending_disable = 1;
+			perf_pending_queue(&event->pending,
+					   perf_pending_event);
+		} else
+			perf_event_disable(event);
+	}
+
+	perf_event_output(event, nmi, data, regs);
+	return ret;
+}
+
+int perf_event_overflow(struct perf_event *event, int nmi,
+			  struct perf_sample_data *data,
+			  struct pt_regs *regs)
+{
+	return __perf_event_overflow(event, nmi, 1, data, regs);
+}
+
+/*
+ * Generic software event infrastructure
+ */
+
+/*
+ * We directly increment event->count and keep a second value in
+ * event->hw.period_left to count intervals. This period event
+ * is kept in the range [-sample_period, 0] so that we can use the
+ * sign as trigger.
+ */
+
+static u64 perf_swevent_set_period(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 period = hwc->last_period;
+	u64 nr, offset;
+	s64 old, val;
+
+	hwc->last_period = hwc->sample_period;
+
+again:
+	old = val = atomic64_read(&hwc->period_left);
+	if (val < 0)
+		return 0;
+
+	nr = div64_u64(period + val, period);
+	offset = nr * period;
+	val -= offset;
+	if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+		goto again;
+
+	return nr;
+}
+
+static void perf_swevent_overflow(struct perf_event *event,
+				    int nmi, struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int throttle = 0;
+	u64 overflow;
+
+	data->period = event->hw.last_period;
+	overflow = perf_swevent_set_period(event);
+
+	if (hwc->interrupts == MAX_INTERRUPTS)
+		return;
+
+	for (; overflow; overflow--) {
+		if (__perf_event_overflow(event, nmi, throttle,
+					    data, regs)) {
+			/*
+			 * We inhibit the overflow from happening when
+			 * hwc->interrupts == MAX_INTERRUPTS.
+			 */
+			break;
+		}
+		throttle = 1;
+	}
+}
+
+static void perf_swevent_unthrottle(struct perf_event *event)
+{
+	/*
+	 * Nothing to do, we already reset hwc->interrupts.
+	 */
+}
+
+static void perf_swevent_add(struct perf_event *event, u64 nr,
+			       int nmi, struct perf_sample_data *data,
+			       struct pt_regs *regs)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	atomic64_add(nr, &event->count);
+
+	if (!hwc->sample_period)
+		return;
+
+	if (!regs)
+		return;
+
+	if (!atomic64_add_negative(nr, &hwc->period_left))
+		perf_swevent_overflow(event, nmi, data, regs);
+}
+
+static int perf_swevent_is_counting(struct perf_event *event)
+{
+	/*
+	 * The event is active, we're good!
+	 */
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		return 1;
+
+	/*
+	 * The event is off/error, not counting.
+	 */
+	if (event->state != PERF_EVENT_STATE_INACTIVE)
+		return 0;
+
+	/*
+	 * The event is inactive, if the context is active
+	 * we're part of a group that didn't make it on the 'pmu',
+	 * not counting.
+	 */
+	if (event->ctx->is_active)
+		return 0;
+
+	/*
+	 * We're inactive and the context is too, this means the
+	 * task is scheduled out, we're counting events that happen
+	 * to us, like migration events.
+	 */
+	return 1;
+}
+
+static int perf_swevent_match(struct perf_event *event,
+				enum perf_type_id type,
+				u32 event_id, struct pt_regs *regs)
+{
+	if (!perf_swevent_is_counting(event))
+		return 0;
+
+	if (event->attr.type != type)
+		return 0;
+	if (event->attr.config != event_id)
+		return 0;
+
+	if (regs) {
+		if (event->attr.exclude_user && user_mode(regs))
+			return 0;
+
+		if (event->attr.exclude_kernel && !user_mode(regs))
+			return 0;
+	}
+
+	return 1;
+}
+
+static void perf_swevent_ctx_event(struct perf_event_context *ctx,
+				     enum perf_type_id type,
+				     u32 event_id, u64 nr, int nmi,
+				     struct perf_sample_data *data,
+				     struct pt_regs *regs)
+{
+	struct perf_event *event;
+
+	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
+		return;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+		if (perf_swevent_match(event, type, event_id, regs))
+			perf_swevent_add(event, nr, nmi, data, regs);
+	}
+	rcu_read_unlock();
+}
+
+static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+{
+	if (in_nmi())
+		return &cpuctx->recursion[3];
+
+	if (in_irq())
+		return &cpuctx->recursion[2];
+
+	if (in_softirq())
+		return &cpuctx->recursion[1];
+
+	return &cpuctx->recursion[0];
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
+{
+	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+	int *recursion = perf_swevent_recursion_context(cpuctx);
+	struct perf_event_context *ctx;
+
+	if (*recursion)
+		goto out;
+
+	(*recursion)++;
+	barrier();
+
+	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
+				 nr, nmi, data, regs);
+	rcu_read_lock();
+	/*
+	 * doesn't really matter which of the child contexts the
+	 * events ends up in.
+	 */
+	ctx = rcu_dereference(current->perf_event_ctxp);
+	if (ctx)
+		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
+	rcu_read_unlock();
+
+	barrier();
+	(*recursion)--;
+
+out:
+	put_cpu_var(perf_cpu_context);
+}
+
+void __perf_sw_event(u32 event_id, u64 nr, int nmi,
+			    struct pt_regs *regs, u64 addr)
+{
+	struct perf_sample_data data = {
+		.addr = addr,
+	};
+
+	do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
+				&data, regs);
+}
+
+static void perf_swevent_read(struct perf_event *event)
+{
+}
+
+static int perf_swevent_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (hwc->sample_period) {
+		hwc->last_period = hwc->sample_period;
+		perf_swevent_set_period(event);
+	}
+	return 0;
+}
+
+static void perf_swevent_disable(struct perf_event *event)
+{
+}
+
+static const struct pmu perf_ops_generic = {
+	.enable		= perf_swevent_enable,
+	.disable	= perf_swevent_disable,
+	.read		= perf_swevent_read,
+	.unthrottle	= perf_swevent_unthrottle,
+};
+
+/*
+ * hrtimer based swevent callback
+ */
+
+static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
+{
+	enum hrtimer_restart ret = HRTIMER_RESTART;
+	struct perf_sample_data data;
+	struct pt_regs *regs;
+	struct perf_event *event;
+	u64 period;
+
+	event	= container_of(hrtimer, struct perf_event, hw.hrtimer);
+	event->pmu->read(event);
+
+	data.addr = 0;
+	regs = get_irq_regs();
+	/*
+	 * In case we exclude kernel IPs or are somehow not in interrupt
+	 * context, provide the next best thing, the user IP.
+	 */
+	if ((event->attr.exclude_kernel || !regs) &&
+			!event->attr.exclude_user)
+		regs = task_pt_regs(current);
+
+	if (regs) {
+		if (perf_event_overflow(event, 0, &data, regs))
+			ret = HRTIMER_NORESTART;
+	}
+
+	period = max_t(u64, 10000, event->hw.sample_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+	return ret;
+}
+
+/*
+ * Software event: cpu wall time clock
+ */
+
+static void cpu_clock_perf_event_update(struct perf_event *event)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&event->hw.prev_count);
+	atomic64_set(&event->hw.prev_count, now);
+	atomic64_add(now - prev, &event->count);
+}
+
+static int cpu_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void cpu_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	cpu_clock_perf_event_update(event);
+}
+
+static void cpu_clock_perf_event_read(struct perf_event *event)
+{
+	cpu_clock_perf_event_update(event);
+}
+
+static const struct pmu perf_ops_cpu_clock = {
+	.enable		= cpu_clock_perf_event_enable,
+	.disable	= cpu_clock_perf_event_disable,
+	.read		= cpu_clock_perf_event_read,
+};
+
+/*
+ * Software event: task time clock
+ */
+
+static void task_clock_perf_event_update(struct perf_event *event, u64 now)
+{
+	u64 prev;
+	s64 delta;
+
+	prev = atomic64_xchg(&event->hw.prev_count, now);
+	delta = now - prev;
+	atomic64_add(delta, &event->count);
+}
+
+static int task_clock_perf_event_enable(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	u64 now;
+
+	now = event->ctx->time;
+
+	atomic64_set(&hwc->prev_count, now);
+	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hwc->hrtimer.function = perf_swevent_hrtimer;
+	if (hwc->sample_period) {
+		u64 period = max_t(u64, 10000, hwc->sample_period);
+		__hrtimer_start_range_ns(&hwc->hrtimer,
+				ns_to_ktime(period), 0,
+				HRTIMER_MODE_REL, 0);
+	}
+
+	return 0;
+}
+
+static void task_clock_perf_event_disable(struct perf_event *event)
+{
+	if (event->hw.sample_period)
+		hrtimer_cancel(&event->hw.hrtimer);
+	task_clock_perf_event_update(event, event->ctx->time);
+
+}
+
+static void task_clock_perf_event_read(struct perf_event *event)
+{
+	u64 time;
+
+	if (!in_nmi()) {
+		update_context_time(event->ctx);
+		time = event->ctx->time;
+	} else {
+		u64 now = perf_clock();
+		u64 delta = now - event->ctx->timestamp;
+		time = event->ctx->time + delta;
+	}
+
+	task_clock_perf_event_update(event, time);
+}
+
+static const struct pmu perf_ops_task_clock = {
+	.enable		= task_clock_perf_event_enable,
+	.disable	= task_clock_perf_event_disable,
+	.read		= task_clock_perf_event_read,
+};
+
+#ifdef CONFIG_EVENT_PROFILE
+void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
+			  int entry_size)
+{
+	struct perf_raw_record raw = {
+		.size = entry_size,
+		.data = record,
+	};
+
+	struct perf_sample_data data = {
+		.addr = addr,
+		.raw = &raw,
+	};
+
+	struct pt_regs *regs = get_irq_regs();
+
+	if (!regs)
+		regs = task_pt_regs(current);
+
+	do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
+				&data, regs);
+}
+EXPORT_SYMBOL_GPL(perf_tp_event);
+
+extern int ftrace_profile_enable(int);
+extern void ftrace_profile_disable(int);
+
+static void tp_perf_event_destroy(struct perf_event *event)
+{
+	ftrace_profile_disable(event->attr.config);
+}
+
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	/*
+	 * Raw tracepoint data is a severe data leak, only allow root to
+	 * have these.
+	 */
+	if ((event->attr.sample_type & PERF_SAMPLE_RAW) &&
+			perf_paranoid_tracepoint_raw() &&
+			!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (ftrace_profile_enable(event->attr.config))
+		return NULL;
+
+	event->destroy = tp_perf_event_destroy;
+
+	return &perf_ops_generic;
+}
+#else
+static const struct pmu *tp_perf_event_init(struct perf_event *event)
+{
+	return NULL;
+}
+#endif
+
+atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
+
+static void sw_perf_event_destroy(struct perf_event *event)
+{
+	u64 event_id = event->attr.config;
+
+	WARN_ON(event->parent);
+
+	atomic_dec(&perf_swevent_enabled[event_id]);
+}
+
+static const struct pmu *sw_perf_event_init(struct perf_event *event)
+{
+	const struct pmu *pmu = NULL;
+	u64 event_id = event->attr.config;
+
+	/*
+	 * Software events (currently) can't in general distinguish
+	 * between user, kernel and hypervisor events.
+	 * However, context switches and cpu migrations are considered
+	 * to be kernel events, and page faults are never hypervisor
+	 * events.
+	 */
+	switch (event_id) {
+	case PERF_COUNT_SW_CPU_CLOCK:
+		pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_TASK_CLOCK:
+		/*
+		 * If the user instantiates this as a per-cpu event,
+		 * use the cpu_clock event instead.
+		 */
+		if (event->ctx->task)
+			pmu = &perf_ops_task_clock;
+		else
+			pmu = &perf_ops_cpu_clock;
+
+		break;
+	case PERF_COUNT_SW_PAGE_FAULTS:
+	case PERF_COUNT_SW_PAGE_FAULTS_MIN:
+	case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
+	case PERF_COUNT_SW_CONTEXT_SWITCHES:
+	case PERF_COUNT_SW_CPU_MIGRATIONS:
+		if (!event->parent) {
+			atomic_inc(&perf_swevent_enabled[event_id]);
+			event->destroy = sw_perf_event_destroy;
+		}
+		pmu = &perf_ops_generic;
+		break;
+	}
+
+	return pmu;
+}
+
+/*
+ * Allocate and initialize a event structure
+ */
+static struct perf_event *
+perf_event_alloc(struct perf_event_attr *attr,
+		   int cpu,
+		   struct perf_event_context *ctx,
+		   struct perf_event *group_leader,
+		   struct perf_event *parent_event,
+		   gfp_t gfpflags)
+{
+	const struct pmu *pmu;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
+	long err;
+
+	event = kzalloc(sizeof(*event), gfpflags);
+	if (!event)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Single events are their own group leaders, with an
+	 * empty sibling list:
+	 */
+	if (!group_leader)
+		group_leader = event;
+
+	mutex_init(&event->child_mutex);
+	INIT_LIST_HEAD(&event->child_list);
+
+	INIT_LIST_HEAD(&event->group_entry);
+	INIT_LIST_HEAD(&event->event_entry);
+	INIT_LIST_HEAD(&event->sibling_list);
+	init_waitqueue_head(&event->waitq);
+
+	mutex_init(&event->mmap_mutex);
+
+	event->cpu		= cpu;
+	event->attr		= *attr;
+	event->group_leader	= group_leader;
+	event->pmu		= NULL;
+	event->ctx		= ctx;
+	event->oncpu		= -1;
+
+	event->parent		= parent_event;
+
+	event->ns		= get_pid_ns(current->nsproxy->pid_ns);
+	event->id		= atomic64_inc_return(&perf_event_id);
+
+	event->state		= PERF_EVENT_STATE_INACTIVE;
+
+	if (attr->disabled)
+		event->state = PERF_EVENT_STATE_OFF;
+
+	pmu = NULL;
+
+	hwc = &event->hw;
+	hwc->sample_period = attr->sample_period;
+	if (attr->freq && attr->sample_freq)
+		hwc->sample_period = 1;
+	hwc->last_period = hwc->sample_period;
+
+	atomic64_set(&hwc->period_left, hwc->sample_period);
+
+	/*
+	 * we currently do not support PERF_FORMAT_GROUP on inherited events
+	 */
+	if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
+		goto done;
+
+	switch (attr->type) {
+	case PERF_TYPE_RAW:
+	case PERF_TYPE_HARDWARE:
+	case PERF_TYPE_HW_CACHE:
+		pmu = hw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_SOFTWARE:
+		pmu = sw_perf_event_init(event);
+		break;
+
+	case PERF_TYPE_TRACEPOINT:
+		pmu = tp_perf_event_init(event);
+		break;
+
+	default:
+		break;
+	}
+done:
+	err = 0;
+	if (!pmu)
+		err = -EINVAL;
+	else if (IS_ERR(pmu))
+		err = PTR_ERR(pmu);
+
+	if (err) {
+		if (event->ns)
+			put_pid_ns(event->ns);
+		kfree(event);
+		return ERR_PTR(err);
+	}
+
+	event->pmu = pmu;
+
+	if (!event->parent) {
+		atomic_inc(&nr_events);
+		if (event->attr.mmap)
+			atomic_inc(&nr_mmap_events);
+		if (event->attr.comm)
+			atomic_inc(&nr_comm_events);
+		if (event->attr.task)
+			atomic_inc(&nr_task_events);
+	}
+
+	return event;
+}
+
+static int perf_copy_attr(struct perf_event_attr __user *uattr,
+			  struct perf_event_attr *attr)
+{
+	u32 size;
+	int ret;
+
+	if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
+		return -EFAULT;
+
+	/*
+	 * zero the full structure, so that a short copy will be nice.
+	 */
+	memset(attr, 0, sizeof(*attr));
+
+	ret = get_user(size, &uattr->size);
+	if (ret)
+		return ret;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		goto err_size;
+
+	if (!size)		/* abi compat */
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size < PERF_ATTR_SIZE_VER0)
+		goto err_size;
+
+	/*
+	 * If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(*attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(*attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			ret = get_user(val, addr);
+			if (ret)
+				return ret;
+			if (val)
+				goto err_size;
+		}
+		size = sizeof(*attr);
+	}
+
+	ret = copy_from_user(attr, uattr, size);
+	if (ret)
+		return -EFAULT;
+
+	/*
+	 * If the type exists, the corresponding creation will verify
+	 * the attr->config.
+	 */
+	if (attr->type >= PERF_TYPE_MAX)
+		return -EINVAL;
+
+	if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3)
+		return -EINVAL;
+
+	if (attr->sample_type & ~(PERF_SAMPLE_MAX-1))
+		return -EINVAL;
+
+	if (attr->read_format & ~(PERF_FORMAT_MAX-1))
+		return -EINVAL;
+
+out:
+	return ret;
+
+err_size:
+	put_user(sizeof(*attr), &uattr->size);
+	ret = -E2BIG;
+	goto out;
+}
+
+int perf_event_set_output(struct perf_event *event, int output_fd)
+{
+	struct perf_event *output_event = NULL;
+	struct file *output_file = NULL;
+	struct perf_event *old_output;
+	int fput_needed = 0;
+	int ret = -EINVAL;
+
+	if (!output_fd)
+		goto set;
+
+	output_file = fget_light(output_fd, &fput_needed);
+	if (!output_file)
+		return -EBADF;
+
+	if (output_file->f_op != &perf_fops)
+		goto out;
+
+	output_event = output_file->private_data;
+
+	/* Don't chain output fds */
+	if (output_event->output)
+		goto out;
+
+	/* Don't set an output fd when we already have an output channel */
+	if (event->data)
+		goto out;
+
+	atomic_long_inc(&output_file->f_count);
+
+set:
+	mutex_lock(&event->mmap_mutex);
+	old_output = event->output;
+	rcu_assign_pointer(event->output, output_event);
+	mutex_unlock(&event->mmap_mutex);
+
+	if (old_output) {
+		/*
+		 * we need to make sure no existing perf_output_*()
+		 * is still referencing this event.
+		 */
+		synchronize_rcu();
+		fput(old_output->filp);
+	}
+
+	ret = 0;
+out:
+	fput_light(output_file, fput_needed);
+	return ret;
+}
+
+/**
+ * sys_perf_event_open - open a performance event, associate it to a task/cpu
+ *
+ * @attr_uptr:	event_id type attributes for monitoring/sampling
+ * @pid:		target pid
+ * @cpu:		target cpu
+ * @group_fd:		group leader event fd
+ */
+SYSCALL_DEFINE5(perf_event_open,
+		struct perf_event_attr __user *, attr_uptr,
+		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
+{
+	struct perf_event *event, *group_leader;
+	struct perf_event_attr attr;
+	struct perf_event_context *ctx;
+	struct file *event_file = NULL;
+	struct file *group_file = NULL;
+	int fput_needed = 0;
+	int fput_needed2 = 0;
+	int err;
+
+	/* for future expandability... */
+	if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT))
+		return -EINVAL;
+
+	err = perf_copy_attr(attr_uptr, &attr);
+	if (err)
+		return err;
+
+	if (!attr.exclude_kernel) {
+		if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+			return -EACCES;
+	}
+
+	if (attr.freq) {
+		if (attr.sample_freq > sysctl_perf_event_sample_rate)
+			return -EINVAL;
+	}
+
+	/*
+	 * Get the target context (task or percpu):
+	 */
+	ctx = find_get_context(pid, cpu);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * Look up the group leader (we will attach this event to it):
+	 */
+	group_leader = NULL;
+	if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
+		err = -EINVAL;
+		group_file = fget_light(group_fd, &fput_needed);
+		if (!group_file)
+			goto err_put_context;
+		if (group_file->f_op != &perf_fops)
+			goto err_put_context;
+
+		group_leader = group_file->private_data;
+		/*
+		 * Do not allow a recursive hierarchy (this new sibling
+		 * becoming part of another group-sibling):
+		 */
+		if (group_leader->group_leader != group_leader)
+			goto err_put_context;
+		/*
+		 * Do not allow to attach to a group in a different
+		 * task or CPU context:
+		 */
+		if (group_leader->ctx != ctx)
+			goto err_put_context;
+		/*
+		 * Only a group leader can be exclusive or pinned
+		 */
+		if (attr.exclusive || attr.pinned)
+			goto err_put_context;
+	}
+
+	event = perf_event_alloc(&attr, cpu, ctx, group_leader,
+				     NULL, GFP_KERNEL);
+	err = PTR_ERR(event);
+	if (IS_ERR(event))
+		goto err_put_context;
+
+	err = anon_inode_getfd("[perf_event]", &perf_fops, event, 0);
+	if (err < 0)
+		goto err_free_put_context;
+
+	event_file = fget_light(err, &fput_needed2);
+	if (!event_file)
+		goto err_free_put_context;
+
+	if (flags & PERF_FLAG_FD_OUTPUT) {
+		err = perf_event_set_output(event, group_fd);
+		if (err)
+			goto err_fput_free_put_context;
+	}
+
+	event->filp = event_file;
+	WARN_ON_ONCE(ctx->parent_ctx);
+	mutex_lock(&ctx->mutex);
+	perf_install_in_context(ctx, event, cpu);
+	++ctx->generation;
+	mutex_unlock(&ctx->mutex);
+
+	event->owner = current;
+	get_task_struct(current);
+	mutex_lock(&current->perf_event_mutex);
+	list_add_tail(&event->owner_entry, &current->perf_event_list);
+	mutex_unlock(&current->perf_event_mutex);
+
+err_fput_free_put_context:
+	fput_light(event_file, fput_needed2);
+
+err_free_put_context:
+	if (err < 0)
+		kfree(event);
+
+err_put_context:
+	if (err < 0)
+		put_ctx(ctx);
+
+	fput_light(group_file, fput_needed);
+
+	return err;
+}
+
+/*
+ * inherit a event from parent task to child task:
+ */
+static struct perf_event *
+inherit_event(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event *group_leader,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *child_event;
+
+	/*
+	 * Instead of creating recursive hierarchies of events,
+	 * we link inherited events back to the original parent,
+	 * which has a filp for sure, which we use as the reference
+	 * count:
+	 */
+	if (parent_event->parent)
+		parent_event = parent_event->parent;
+
+	child_event = perf_event_alloc(&parent_event->attr,
+					   parent_event->cpu, child_ctx,
+					   group_leader, parent_event,
+					   GFP_KERNEL);
+	if (IS_ERR(child_event))
+		return child_event;
+	get_ctx(child_ctx);
+
+	/*
+	 * Make the child state follow the state of the parent event,
+	 * not its attr.disabled bit.  We hold the parent's mutex,
+	 * so we won't race with perf_event_{en, dis}able_family.
+	 */
+	if (parent_event->state >= PERF_EVENT_STATE_INACTIVE)
+		child_event->state = PERF_EVENT_STATE_INACTIVE;
+	else
+		child_event->state = PERF_EVENT_STATE_OFF;
+
+	if (parent_event->attr.freq)
+		child_event->hw.sample_period = parent_event->hw.sample_period;
+
+	/*
+	 * Link it up in the child's context:
+	 */
+	add_event_to_ctx(child_event, child_ctx);
+
+	/*
+	 * Get a reference to the parent filp - we will fput it
+	 * when the child event exits. This is safe to do because
+	 * we are in the parent and we know that the filp still
+	 * exists and has a nonzero count:
+	 */
+	atomic_long_inc(&parent_event->filp->f_count);
+
+	/*
+	 * Link this into the parent event's child list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_add_tail(&child_event->child_list, &parent_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	return child_event;
+}
+
+static int inherit_group(struct perf_event *parent_event,
+	      struct task_struct *parent,
+	      struct perf_event_context *parent_ctx,
+	      struct task_struct *child,
+	      struct perf_event_context *child_ctx)
+{
+	struct perf_event *leader;
+	struct perf_event *sub;
+	struct perf_event *child_ctr;
+
+	leader = inherit_event(parent_event, parent, parent_ctx,
+				 child, NULL, child_ctx);
+	if (IS_ERR(leader))
+		return PTR_ERR(leader);
+	list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
+		child_ctr = inherit_event(sub, parent, parent_ctx,
+					    child, leader, child_ctx);
+		if (IS_ERR(child_ctr))
+			return PTR_ERR(child_ctr);
+	}
+	return 0;
+}
+
+static void sync_child_event(struct perf_event *child_event,
+			       struct task_struct *child)
+{
+	struct perf_event *parent_event = child_event->parent;
+	u64 child_val;
+
+	if (child_event->attr.inherit_stat)
+		perf_event_read_event(child_event, child);
+
+	child_val = atomic64_read(&child_event->count);
+
+	/*
+	 * Add back the child's count to the parent's count:
+	 */
+	atomic64_add(child_val, &parent_event->count);
+	atomic64_add(child_event->total_time_enabled,
+		     &parent_event->child_total_time_enabled);
+	atomic64_add(child_event->total_time_running,
+		     &parent_event->child_total_time_running);
+
+	/*
+	 * Remove this event from the parent's list
+	 */
+	WARN_ON_ONCE(parent_event->ctx->parent_ctx);
+	mutex_lock(&parent_event->child_mutex);
+	list_del_init(&child_event->child_list);
+	mutex_unlock(&parent_event->child_mutex);
+
+	/*
+	 * Release the parent event, if this was the last
+	 * reference to it.
+	 */
+	fput(parent_event->filp);
+}
+
+static void
+__perf_event_exit_task(struct perf_event *child_event,
+			 struct perf_event_context *child_ctx,
+			 struct task_struct *child)
+{
+	struct perf_event *parent_event;
+
+	update_event_times(child_event);
+	perf_event_remove_from_context(child_event);
+
+	parent_event = child_event->parent;
+	/*
+	 * It can happen that parent exits first, and has events
+	 * that are still around due to the child reference. These
+	 * events need to be zapped - but otherwise linger.
+	 */
+	if (parent_event) {
+		sync_child_event(child_event, child);
+		free_event(child_event);
+	}
+}
+
+/*
+ * When a child task exits, feed back event values to parent events.
+ */
+void perf_event_exit_task(struct task_struct *child)
+{
+	struct perf_event *child_event, *tmp;
+	struct perf_event_context *child_ctx;
+	unsigned long flags;
+
+	if (likely(!child->perf_event_ctxp)) {
+		perf_event_task(child, NULL, 0);
+		return;
+	}
+
+	local_irq_save(flags);
+	/*
+	 * We can't reschedule here because interrupts are disabled,
+	 * and either child is current or it is a task that can't be
+	 * scheduled, so we are now safe from rescheduling changing
+	 * our context.
+	 */
+	child_ctx = child->perf_event_ctxp;
+	__perf_event_task_sched_out(child_ctx);
+
+	/*
+	 * Take the context lock here so that if find_get_context is
+	 * reading child->perf_event_ctxp, we wait until it has
+	 * incremented the context's refcount before we do put_ctx below.
+	 */
+	spin_lock(&child_ctx->lock);
+	child->perf_event_ctxp = NULL;
+	/*
+	 * If this context is a clone; unclone it so it can't get
+	 * swapped to another process while we're removing all
+	 * the events from it.
+	 */
+	unclone_ctx(child_ctx);
+	spin_unlock_irqrestore(&child_ctx->lock, flags);
+
+	/*
+	 * Report the task dead after unscheduling the events so that we
+	 * won't get any samples after PERF_RECORD_EXIT. We can however still
+	 * get a few PERF_RECORD_READ events.
+	 */
+	perf_event_task(child, child_ctx, 0);
+
+	/*
+	 * We can recurse on the same lock type through:
+	 *
+	 *   __perf_event_exit_task()
+	 *     sync_child_event()
+	 *       fput(parent_event->filp)
+	 *         perf_release()
+	 *           mutex_lock(&ctx->mutex)
+	 *
+	 * But since its the parent context it won't be the same instance.
+	 */
+	mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING);
+
+again:
+	list_for_each_entry_safe(child_event, tmp, &child_ctx->group_list,
+				 group_entry)
+		__perf_event_exit_task(child_event, child_ctx, child);
+
+	/*
+	 * If the last event was a group event, it will have appended all
+	 * its siblings to the list, but we obtained 'tmp' before that which
+	 * will still point to the list head terminating the iteration.
+	 */
+	if (!list_empty(&child_ctx->group_list))
+		goto again;
+
+	mutex_unlock(&child_ctx->mutex);
+
+	put_ctx(child_ctx);
+}
+
+/*
+ * free an unexposed, unused context as created by inheritance by
+ * init_task below, used by fork() in case of fail.
+ */
+void perf_event_free_task(struct task_struct *task)
+{
+	struct perf_event_context *ctx = task->perf_event_ctxp;
+	struct perf_event *event, *tmp;
+
+	if (!ctx)
+		return;
+
+	mutex_lock(&ctx->mutex);
+again:
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry) {
+		struct perf_event *parent = event->parent;
+
+		if (WARN_ON_ONCE(!parent))
+			continue;
+
+		mutex_lock(&parent->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent->child_mutex);
+
+		fput(parent->filp);
+
+		list_del_event(event, ctx);
+		free_event(event);
+	}
+
+	if (!list_empty(&ctx->group_list))
+		goto again;
+
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
+/*
+ * Initialize the perf_event context in task_struct
+ */
+int perf_event_init_task(struct task_struct *child)
+{
+	struct perf_event_context *child_ctx, *parent_ctx;
+	struct perf_event_context *cloned_ctx;
+	struct perf_event *event;
+	struct task_struct *parent = current;
+	int inherited_all = 1;
+	int ret = 0;
+
+	child->perf_event_ctxp = NULL;
+
+	mutex_init(&child->perf_event_mutex);
+	INIT_LIST_HEAD(&child->perf_event_list);
+
+	if (likely(!parent->perf_event_ctxp))
+		return 0;
+
+	/*
+	 * This is executed from the parent task context, so inherit
+	 * events that have been marked for cloning.
+	 * First allocate and initialize a context for the child.
+	 */
+
+	child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+	if (!child_ctx)
+		return -ENOMEM;
+
+	__perf_event_init_context(child_ctx, child);
+	child->perf_event_ctxp = child_ctx;
+	get_task_struct(child);
+
+	/*
+	 * If the parent's context is a clone, pin it so it won't get
+	 * swapped under us.
+	 */
+	parent_ctx = perf_pin_task_context(parent);
+
+	/*
+	 * No need to check if parent_ctx != NULL here; since we saw
+	 * it non-NULL earlier, the only reason for it to become NULL
+	 * is if we exit, and since we're currently in the middle of
+	 * a fork we can't be exiting at the same time.
+	 */
+
+	/*
+	 * Lock the parent list. No need to lock the child - not PID
+	 * hashed yet and not running, so nobody can access it.
+	 */
+	mutex_lock(&parent_ctx->mutex);
+
+	/*
+	 * We dont have to disable NMIs - we are only looking at
+	 * the list, not manipulating it:
+	 */
+	list_for_each_entry_rcu(event, &parent_ctx->event_list, event_entry) {
+		if (event != event->group_leader)
+			continue;
+
+		if (!event->attr.inherit) {
+			inherited_all = 0;
+			continue;
+		}
+
+		ret = inherit_group(event, parent, parent_ctx,
+					     child, child_ctx);
+		if (ret) {
+			inherited_all = 0;
+			break;
+		}
+	}
+
+	if (inherited_all) {
+		/*
+		 * Mark the child context as a clone of the parent
+		 * context, or of whatever the parent is a clone of.
+		 * Note that if the parent is a clone, it could get
+		 * uncloned at any point, but that doesn't matter
+		 * because the list of events and the generation
+		 * count can't have changed since we took the mutex.
+		 */
+		cloned_ctx = rcu_dereference(parent_ctx->parent_ctx);
+		if (cloned_ctx) {
+			child_ctx->parent_ctx = cloned_ctx;
+			child_ctx->parent_gen = parent_ctx->parent_gen;
+		} else {
+			child_ctx->parent_ctx = parent_ctx;
+			child_ctx->parent_gen = parent_ctx->generation;
+		}
+		get_ctx(child_ctx->parent_ctx);
+	}
+
+	mutex_unlock(&parent_ctx->mutex);
+
+	perf_unpin_context(parent_ctx);
+
+	return ret;
+}
+
+static void __cpuinit perf_event_init_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx;
+
+	cpuctx = &per_cpu(perf_cpu_context, cpu);
+	__perf_event_init_context(&cpuctx->ctx, NULL);
+
+	spin_lock(&perf_resource_lock);
+	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
+	spin_unlock(&perf_resource_lock);
+
+	hw_perf_event_setup(cpu);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void __perf_event_exit_cpu(void *info)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+	struct perf_event *event, *tmp;
+
+	list_for_each_entry_safe(event, tmp, &ctx->group_list, group_entry)
+		__perf_event_remove_from_context(event);
+}
+static void perf_event_exit_cpu(int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	struct perf_event_context *ctx = &cpuctx->ctx;
+
+	mutex_lock(&ctx->mutex);
+	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
+	mutex_unlock(&ctx->mutex);
+}
+#else
+static inline void perf_event_exit_cpu(int cpu) { }
+#endif
+
+static int __cpuinit
+perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (long)hcpu;
+
+	switch (action) {
+
+	case CPU_UP_PREPARE:
+	case CPU_UP_PREPARE_FROZEN:
+		perf_event_init_cpu(cpu);
+		break;
+
+	case CPU_ONLINE:
+	case CPU_ONLINE_FROZEN:
+		hw_perf_event_setup_online(cpu);
+		break;
+
+	case CPU_DOWN_PREPARE:
+	case CPU_DOWN_PREPARE_FROZEN:
+		perf_event_exit_cpu(cpu);
+		break;
+
+	default:
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * This has to have a higher priority than migration_notifier in sched.c.
+ */
+static struct notifier_block __cpuinitdata perf_cpu_nb = {
+	.notifier_call		= perf_cpu_notify,
+	.priority		= 20,
+};
+
+void __init perf_event_init(void)
+{
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
+			(void *)(long)smp_processor_id());
+	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
+			(void *)(long)smp_processor_id());
+	register_cpu_notifier(&perf_cpu_nb);
+}
+
+static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_reserved_percpu);
+}
+
+static ssize_t
+perf_set_reserve_percpu(struct sysdev_class *class,
+			const char *buf,
+			size_t count)
+{
+	struct perf_cpu_context *cpuctx;
+	unsigned long val;
+	int err, cpu, mpt;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > perf_max_events)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_reserved_percpu = val;
+	for_each_online_cpu(cpu) {
+		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		spin_lock_irq(&cpuctx->ctx.lock);
+		mpt = min(perf_max_events - cpuctx->ctx.nr_events,
+			  perf_max_events - perf_reserved_percpu);
+		cpuctx->max_pertask = mpt;
+		spin_unlock_irq(&cpuctx->ctx.lock);
+	}
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
+{
+	return sprintf(buf, "%d\n", perf_overcommit);
+}
+
+static ssize_t
+perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
+{
+	unsigned long val;
+	int err;
+
+	err = strict_strtoul(buf, 10, &val);
+	if (err)
+		return err;
+	if (val > 1)
+		return -EINVAL;
+
+	spin_lock(&perf_resource_lock);
+	perf_overcommit = val;
+	spin_unlock(&perf_resource_lock);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(
+				reserve_percpu,
+				0644,
+				perf_show_reserve_percpu,
+				perf_set_reserve_percpu
+			);
+
+static SYSDEV_CLASS_ATTR(
+				overcommit,
+				0644,
+				perf_show_overcommit,
+				perf_set_overcommit
+			);
+
+static struct attribute *perfclass_attrs[] = {
+	&attr_reserve_percpu.attr,
+	&attr_overcommit.attr,
+	NULL
+};
+
+static struct attribute_group perfclass_attr_group = {
+	.attrs			= perfclass_attrs,
+	.name			= "perf_events",
+};
+
+static int __init perf_event_sysfs_init(void)
+{
+	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
+				  &perfclass_attr_group);
+}
+device_initcall(perf_event_sysfs_init);
diff --git a/kernel/sched.c b/kernel/sched.c
index faf4d463bbff..291c8d213d13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -39,7 +39,7 @@
 #include <linux/completion.h>
 #include <linux/kernel_stat.h>
 #include <linux/debug_locks.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/security.h>
 #include <linux/notifier.h>
 #include <linux/profile.h>
@@ -2059,7 +2059,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (task_hot(p, old_rq->clock, NULL))
 			schedstat_inc(p, se.nr_forced2_migrations);
 #endif
-		perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
 				     1, 1, NULL, 0);
 	}
 	p->se.vruntime -= old_cfsrq->min_vruntime -
@@ -2724,7 +2724,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
 	 */
 	prev_state = prev->state;
 	finish_arch_switch(prev);
-	perf_counter_task_sched_in(current, cpu_of(rq));
+	perf_event_task_sched_in(current, cpu_of(rq));
 	finish_lock_switch(rq, prev);
 
 	fire_sched_in_preempt_notifiers(current);
@@ -5199,7 +5199,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	spin_unlock(&rq->lock);
 
-	perf_counter_task_tick(curr, cpu);
+	perf_event_task_tick(curr, cpu);
 
 #ifdef CONFIG_SMP
 	rq->idle_at_tick = idle_cpu(cpu);
@@ -5415,7 +5415,7 @@ need_resched_nonpreemptible:
 
 	if (likely(prev != next)) {
 		sched_info_switch(prev, next);
-		perf_counter_task_sched_out(prev, next, cpu);
+		perf_event_task_sched_out(prev, next, cpu);
 
 		rq->nr_switches++;
 		rq->curr = next;
@@ -7692,7 +7692,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 /*
  * Register at high priority so that task migration (migrate_all_tasks)
  * happens before everything else.  This has to be lower priority than
- * the notifier in the perf_counter subsystem, though.
+ * the notifier in the perf_event subsystem, though.
  */
 static struct notifier_block __cpuinitdata migration_notifier = {
 	.notifier_call = migration_call,
@@ -9549,7 +9549,7 @@ void __init sched_init(void)
 	alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
 #endif /* SMP */
 
-	perf_counter_init();
+	perf_event_init();
 
 	scheduler_running = 1;
 }
diff --git a/kernel/sys.c b/kernel/sys.c
index b3f1097c76fa..ea5c3bcac881 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -14,7 +14,7 @@
 #include <linux/prctl.h>
 #include <linux/highuid.h>
 #include <linux/fs.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/resource.h>
 #include <linux/kernel.h>
 #include <linux/kexec.h>
@@ -1511,11 +1511,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		case PR_SET_TSC:
 			error = SET_TSC_CTL(arg2);
 			break;
-		case PR_TASK_PERF_COUNTERS_DISABLE:
-			error = perf_counter_task_disable();
+		case PR_TASK_PERF_EVENTS_DISABLE:
+			error = perf_event_task_disable();
 			break;
-		case PR_TASK_PERF_COUNTERS_ENABLE:
-			error = perf_counter_task_enable();
+		case PR_TASK_PERF_EVENTS_ENABLE:
+			error = perf_event_task_enable();
 			break;
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 68320f6b07b5..515bc230ac2a 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -177,4 +177,4 @@ cond_syscall(sys_eventfd);
 cond_syscall(sys_eventfd2);
 
 /* performance counters: */
-cond_syscall(sys_perf_counter_open);
+cond_syscall(sys_perf_event_open);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a631ba684a4..6ba49c7cb128 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -50,7 +50,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/slow-work.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
@@ -964,28 +964,28 @@ static struct ctl_table kern_table[] = {
 		.child		= slow_work_sysctls,
 	},
 #endif
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_paranoid",
-		.data		= &sysctl_perf_counter_paranoid,
-		.maxlen		= sizeof(sysctl_perf_counter_paranoid),
+		.procname	= "perf_event_paranoid",
+		.data		= &sysctl_perf_event_paranoid,
+		.maxlen		= sizeof(sysctl_perf_event_paranoid),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_mlock_kb",
-		.data		= &sysctl_perf_counter_mlock,
-		.maxlen		= sizeof(sysctl_perf_counter_mlock),
+		.procname	= "perf_event_mlock_kb",
+		.data		= &sysctl_perf_event_mlock,
+		.maxlen		= sizeof(sysctl_perf_event_mlock),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "perf_counter_max_sample_rate",
-		.data		= &sysctl_perf_counter_sample_rate,
-		.maxlen		= sizeof(sysctl_perf_counter_sample_rate),
+		.procname	= "perf_event_max_sample_rate",
+		.data		= &sysctl_perf_event_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
diff --git a/kernel/timer.c b/kernel/timer.c
index bbb51074680e..811e5c391456 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -37,7 +37,7 @@
 #include <linux/delay.h>
 #include <linux/tick.h>
 #include <linux/kallsyms.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/sched.h>
 
 #include <asm/uaccess.h>
@@ -1187,7 +1187,7 @@ static void run_timer_softirq(struct softirq_action *h)
 {
 	struct tvec_base *base = __get_cpu_var(tvec_bases);
 
-	perf_counter_do_pending();
+	perf_event_do_pending();
 
 	hrtimer_run_pending();
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 8712ce3c6a0e..233f3483ac83 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -2,7 +2,7 @@
 #include <trace/events/syscalls.h>
 #include <linux/kernel.h>
 #include <linux/ftrace.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/syscall.h>
 
 #include "trace_output.h"
@@ -414,7 +414,7 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
 		rec->nr = syscall_nr;
 		syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 				       (unsigned long *)&rec->args);
-		perf_tpcounter_event(sys_data->enter_id, 0, 1, rec, size);
+		perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
 	} while(0);
 }
 
@@ -476,7 +476,7 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
 	rec.nr = syscall_nr;
 	rec.ret = syscall_get_return_value(current, regs);
 
-	perf_tpcounter_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
+	perf_tp_event(sys_data->exit_id, 0, 1, &rec, sizeof(rec));
 }
 
 int reg_prof_syscall_exit(char *name)
diff --git a/mm/mmap.c b/mm/mmap.c
index 26892e346d8f..376492ed08f4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -28,7 +28,7 @@
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
 #include <linux/mmu_notifier.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1220,7 +1220,7 @@ munmap_back:
 	if (correct_wcount)
 		atomic_inc(&inode->i_writecount);
 out:
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	mm->total_vm += len >> PAGE_SHIFT;
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
@@ -2308,7 +2308,7 @@ int install_special_mapping(struct mm_struct *mm,
 
 	mm->total_vm += len >> PAGE_SHIFT;
 
-	perf_counter_mmap(vma);
+	perf_event_mmap(vma);
 
 	return 0;
 }
diff --git a/mm/mprotect.c b/mm/mprotect.c
index d80311baeb2d..8bc969d8112d 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -23,7 +23,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/cacheflush.h>
@@ -300,7 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
 		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
 		if (error)
 			goto out;
-		perf_counter_mmap(vma);
+		perf_event_mmap(vma);
 		nstart = tmp;
 
 		if (nstart < prev->vm_end)
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 0aba8b6e9c54..b5f1953b6144 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -318,7 +318,7 @@ export PERL_PATH
 
 LIB_FILE=libperf.a
 
-LIB_H += ../../include/linux/perf_counter.h
+LIB_H += ../../include/linux/perf_event.h
 LIB_H += ../../include/linux/rbtree.h
 LIB_H += ../../include/linux/list.h
 LIB_H += util/include/linux/list.h
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 043d85b7e254..1ec741615814 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	if (event->header.misc & PERF_EVENT_MISC_KERNEL) {
+	if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (event->header.misc & PERF_EVENT_MISC_USER) {
+	} else if (event->header.misc & PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 	struct thread *thread;
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
-	dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->fork.pid, event->fork.ppid);
@@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -639,23 +639,23 @@ static int
 process_event(event_t *event, unsigned long offset, unsigned long head)
 {
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event, offset, head);
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 2459e5a22ed8..a5a050af8e7d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -77,7 +77,7 @@ static struct mmap_data		mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
 
 static unsigned long mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	long head;
 
 	head = pc->data_head;
@@ -88,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md)
 
 static void mmap_write_tail(struct mmap_data *md, unsigned long tail)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 
 	/*
 	 * ensure all reads are done before we write the tail out.
@@ -233,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full)
 		}
 	}
 
-	comm_ev.header.type = PERF_EVENT_COMM;
+	comm_ev.header.type = PERF_RECORD_COMM;
 	size = ALIGN(size, sizeof(u64));
 	comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
 
@@ -288,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
 	while (1) {
 		char bf[BUFSIZ], *pbf = bf;
 		struct mmap_event mmap_ev = {
-			.header = { .type = PERF_EVENT_MMAP },
+			.header = { .type = PERF_RECORD_MMAP },
 		};
 		int n;
 		size_t size;
@@ -355,7 +355,7 @@ static void synthesize_all(void)
 
 static int group_fd;
 
-static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr)
+static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr)
 {
 	struct perf_header_attr *h_attr;
 
@@ -371,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int
 
 static void create_counter(int counter, int cpu, pid_t pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 	struct perf_header_attr *h_attr;
 	int track = !counter; /* only the first counter needs these */
 	struct {
@@ -417,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
 	attr->disabled		= 1;
 
 try_again:
-	fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0);
+	fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0);
 
 	if (fd[nr_cpu][counter] < 0) {
 		int err = errno;
@@ -444,7 +444,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[nr_cpu][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 
@@ -478,7 +478,7 @@ try_again:
 	if (multiplex && fd[nr_cpu][counter] != multiplex_fd) {
 		int ret;
 
-		ret = ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_SET_OUTPUT, multiplex_fd);
+		ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd);
 		assert(ret != -1);
 	} else {
 		event_array[nr_poll].fd = fd[nr_cpu][counter];
@@ -496,7 +496,7 @@ try_again:
 		}
 	}
 
-	ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
 }
 
 static void open_counters(int cpu, pid_t pid)
@@ -642,7 +642,7 @@ static int __cmd_record(int argc, const char **argv)
 		if (done) {
 			for (i = 0; i < nr_cpu; i++) {
 				for (counter = 0; counter < nr_counters; counter++)
-					ioctl(fd[i][counter], PERF_COUNTER_IOC_DISABLE);
+					ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE);
 			}
 		}
 	}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index cdf9a8d27bb9..19669c20088e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 	if (comm_list && !strlist__has_entry(comm_list, thread->comm))
 		return 0;
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->mmap.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
+	dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->mmap.pid,
@@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
 		event->mmap.filename);
 
 	if (thread == NULL || map == NULL) {
-		dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 		return 0;
 	}
 
@@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm_adjust(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	thread = threads__findnew(event->fork.pid, &threads, &last_match);
 	parent = threads__findnew(event->fork.ppid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n",
+	dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
-		event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT",
+		event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT",
 		event->fork.pid, event->fork.tid,
 		event->fork.ppid, event->fork.ptid);
 
@@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 	if (thread == parent)
 		return 0;
 
-	if (event->header.type == PERF_EVENT_EXIT)
+	if (event->header.type == PERF_RECORD_EXIT)
 		return 0;
 
 	if (!thread || !parent || thread__fork(thread, parent)) {
-		dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
 		return -1;
 	}
 	total_fork++;
@@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->lost.id,
@@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head)
 static int
 process_read_event(event_t *event, unsigned long offset, unsigned long head)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 
 	attr = perf_header__find_attr(event->read.id, header);
 
@@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head)
 					   event->read.value);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
+	dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n",
 			(void *)(offset + head),
 			(void *)(long)(event->header.size),
 			event->read.pid,
@@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return process_mmap_event(event, offset, head);
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_FORK:
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_FORK:
+	case PERF_RECORD_EXIT:
 		return process_task_event(event, offset, head);
 
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		return process_lost_event(event, offset, head);
 
-	case PERF_EVENT_READ:
+	case PERF_RECORD_READ:
 		return process_read_event(event, offset, head);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
 
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 275d79c6627a..ea9c15c0cdfe 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1573,7 +1573,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -1589,9 +1589,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -1599,7 +1599,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -1626,23 +1626,23 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 
 	nr_events++;
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP:
+	case PERF_RECORD_MMAP:
 		return 0;
-	case PERF_EVENT_LOST:
+	case PERF_RECORD_LOST:
 		nr_lost_chunks++;
 		nr_lost_events += event->lost.lost;
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 61b828236c11..16af2d82e858 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -48,7 +48,7 @@
 #include <sys/prctl.h>
 #include <math.h>
 
-static struct perf_counter_attr default_attrs[] = {
+static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK	},
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES},
@@ -130,11 +130,11 @@ struct stats			runtime_cycles_stats;
 	 attrs[counter].config == PERF_COUNT_##c)
 
 #define ERR_PERF_OPEN \
-"Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
+"Error: counter %d, sys_perf_event_open() syscall returned with %d (%s)\n"
 
 static void create_perf_stat_counter(int counter, int pid)
 {
-	struct perf_counter_attr *attr = attrs + counter;
+	struct perf_event_attr *attr = attrs + counter;
 
 	if (scale)
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -144,7 +144,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		unsigned int cpu;
 
 		for (cpu = 0; cpu < nr_cpus; cpu++) {
-			fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0);
+			fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0);
 			if (fd[cpu][counter] < 0 && verbose)
 				fprintf(stderr, ERR_PERF_OPEN, counter,
 					fd[cpu][counter], strerror(errno));
@@ -154,7 +154,7 @@ static void create_perf_stat_counter(int counter, int pid)
 		attr->disabled	     = 1;
 		attr->enable_on_exec = 1;
 
-		fd[0][counter] = sys_perf_counter_open(attr, pid, -1, -1, 0);
+		fd[0][counter] = sys_perf_event_open(attr, pid, -1, -1, 0);
 		if (fd[0][counter] < 0 && verbose)
 			fprintf(stderr, ERR_PERF_OPEN, counter,
 				fd[0][counter], strerror(errno));
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 600406396274..4405681b3134 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -937,21 +937,21 @@ process_event(event_t *event)
 
 	switch (event->header.type) {
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event);
-	case PERF_EVENT_FORK:
+	case PERF_RECORD_FORK:
 		return process_fork_event(event);
-	case PERF_EVENT_EXIT:
+	case PERF_RECORD_EXIT:
 		return process_exit_event(event);
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return queue_sample_event(event);
 
 	/*
 	 * We dont process them right now but they are fine:
 	 */
-	case PERF_EVENT_MMAP:
-	case PERF_EVENT_THROTTLE:
-	case PERF_EVENT_UNTHROTTLE:
+	case PERF_RECORD_MMAP:
+	case PERF_RECORD_THROTTLE:
+	case PERF_RECORD_UNTHROTTLE:
 		return 0;
 
 	default:
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4002ccb36750..1ca88896eee4 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -901,7 +901,7 @@ struct mmap_data {
 
 static unsigned int mmap_read_head(struct mmap_data *md)
 {
-	struct perf_counter_mmap_page *pc = md->base;
+	struct perf_event_mmap_page *pc = md->base;
 	int head;
 
 	head = pc->data_head;
@@ -977,9 +977,9 @@ static void mmap_read_counter(struct mmap_data *md)
 
 		old += size;
 
-		if (event->header.type == PERF_EVENT_SAMPLE) {
+		if (event->header.type == PERF_RECORD_SAMPLE) {
 			int user =
-	(event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
+	(event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER;
 			process_event(event->ip.ip, md->counter, user);
 		}
 	}
@@ -1005,7 +1005,7 @@ int group_fd;
 
 static void start_counter(int i, int counter)
 {
-	struct perf_counter_attr *attr;
+	struct perf_event_attr *attr;
 	int cpu;
 
 	cpu = profile_cpu;
@@ -1019,7 +1019,7 @@ static void start_counter(int i, int counter)
 	attr->inherit		= (cpu < 0) && inherit;
 
 try_again:
-	fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
+	fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0);
 
 	if (fd[i][counter] < 0) {
 		int err = errno;
@@ -1044,7 +1044,7 @@ try_again:
 		printf("\n");
 		error("perfcounter syscall returned with %d (%s)\n",
 			fd[i][counter], strerror(err));
-		die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
+		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
 		exit(-1);
 	}
 	assert(fd[i][counter] >= 0);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 914ab366e369..e9d256e2f47d 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -35,14 +35,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
 
 	thread = threads__findnew(event->comm.pid, &threads, &last_match);
 
-	dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n",
+	dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->comm.comm, event->comm.pid);
 
 	if (thread == NULL ||
 	    thread__set_comm(thread, event->comm.comm)) {
-		dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n");
+		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
 		return -1;
 	}
 	total_comm++;
@@ -82,7 +82,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		more_data += sizeof(u64);
 	}
 
-	dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+	dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
 		(void *)(offset + head),
 		(void *)(long)(event->header.size),
 		event->header.misc,
@@ -98,9 +98,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 		return -1;
 	}
 
-	cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK;
+	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (cpumode == PERF_EVENT_MISC_KERNEL) {
+	if (cpumode == PERF_RECORD_MISC_KERNEL) {
 		show = SHOW_KERNEL;
 		level = 'k';
 
@@ -108,7 +108,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
 
 		dump_printf(" ...... dso: %s\n", dso->name);
 
-	} else if (cpumode == PERF_EVENT_MISC_USER) {
+	} else if (cpumode == PERF_RECORD_MISC_USER) {
 
 		show = SHOW_USER;
 		level = '.';
@@ -146,19 +146,19 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
 	trace_event(event);
 
 	switch (event->header.type) {
-	case PERF_EVENT_MMAP ... PERF_EVENT_LOST:
+	case PERF_RECORD_MMAP ... PERF_RECORD_LOST:
 		return 0;
 
-	case PERF_EVENT_COMM:
+	case PERF_RECORD_COMM:
 		return process_comm_event(event, offset, head);
 
-	case PERF_EVENT_EXIT ... PERF_EVENT_READ:
+	case PERF_RECORD_EXIT ... PERF_RECORD_READ:
 		return 0;
 
-	case PERF_EVENT_SAMPLE:
+	case PERF_RECORD_SAMPLE:
 		return process_sample_event(event, offset, head);
 
-	case PERF_EVENT_MAX:
+	case PERF_RECORD_MAX:
 	default:
 		return -1;
 	}
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index f71e0d245cba..f1946d107b10 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -18,10 +18,10 @@ underlying hardware counters.
 Performance counters are accessed via special file descriptors.
 There's one file descriptor per virtual counter used.
 
-The special file descriptor is opened via the perf_counter_open()
+The special file descriptor is opened via the perf_event_open()
 system call:
 
-   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
+   int sys_perf_event_open(struct perf_event_hw_event *hw_event_uptr,
 			     pid_t pid, int cpu, int group_fd,
 			     unsigned long flags);
 
@@ -32,9 +32,9 @@ can be used to set the blocking mode, etc.
 Multiple counters can be kept open at a time, and the counters
 can be poll()ed.
 
-When creating a new counter fd, 'perf_counter_hw_event' is:
+When creating a new counter fd, 'perf_event_hw_event' is:
 
-struct perf_counter_hw_event {
+struct perf_event_hw_event {
         /*
          * The MSB of the config word signifies if the rest contains cpu
          * specific (raw) counter configuration data, if unset, the next
@@ -93,7 +93,7 @@ specified by 'event_id':
 
 /*
  * Generalized performance counter event types, used by the hw_event.event_id
- * parameter of the sys_perf_counter_open() syscall:
+ * parameter of the sys_perf_event_open() syscall:
  */
 enum hw_event_ids {
 	/*
@@ -159,7 +159,7 @@ in size.
  * reads on the counter should return the indicated quantities,
  * in increasing order of bit value, after the counter value.
  */
-enum perf_counter_read_format {
+enum perf_event_read_format {
         PERF_FORMAT_TOTAL_TIME_ENABLED  =  1,
         PERF_FORMAT_TOTAL_TIME_RUNNING  =  2,
 };
@@ -178,7 +178,7 @@ interrupt:
  * Bits that can be set in hw_event.record_type to request information
  * in the overflow packets.
  */
-enum perf_counter_record_format {
+enum perf_event_record_format {
         PERF_RECORD_IP          = 1U << 0,
         PERF_RECORD_TID         = 1U << 1,
         PERF_RECORD_TIME        = 1U << 2,
@@ -228,7 +228,7 @@ these events are recorded in the ring-buffer (see below).
 The 'comm' bit allows tracking of process comm data on process creation.
 This too is recorded in the ring-buffer (see below).
 
-The 'pid' parameter to the perf_counter_open() system call allows the
+The 'pid' parameter to the perf_event_open() system call allows the
 counter to be specific to a task:
 
  pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -258,7 +258,7 @@ The 'flags' parameter is currently unused and must be zero.
 
 The 'group_fd' parameter allows counter "groups" to be set up.  A
 counter group has one counter which is the group "leader".  The leader
-is created first, with group_fd = -1 in the perf_counter_open call
+is created first, with group_fd = -1 in the perf_event_open call
 that creates it.  The rest of the group members are created
 subsequently, with group_fd giving the fd of the group leader.
 (A single counter on its own is created with group_fd = -1 and is
@@ -277,13 +277,13 @@ tracking are logged into a ring-buffer. This ring-buffer is created and
 accessed through mmap().
 
 The mmap size should be 1+2^n pages, where the first page is a meta-data page
-(struct perf_counter_mmap_page) that contains various bits of information such
+(struct perf_event_mmap_page) that contains various bits of information such
 as where the ring-buffer head is.
 
 /*
  * Structure of the page that can be mapped via mmap
  */
-struct perf_counter_mmap_page {
+struct perf_event_mmap_page {
         __u32   version;                /* version number of this structure */
         __u32   compat_version;         /* lowest version this is compat with */
 
@@ -317,7 +317,7 @@ struct perf_counter_mmap_page {
          * Control data for the mmap() data buffer.
          *
          * User-space reading this value should issue an rmb(), on SMP capable
-         * platforms, after reading this value -- see perf_counter_wakeup().
+         * platforms, after reading this value -- see perf_event_wakeup().
          */
         __u32   data_head;              /* head in the data section */
 };
@@ -327,9 +327,9 @@ NOTE: the hw-counter userspace bits are arch specific and are currently only
 
 The following 2^n pages are the ring-buffer which contains events of the form:
 
-#define PERF_EVENT_MISC_KERNEL          (1 << 0)
-#define PERF_EVENT_MISC_USER            (1 << 1)
-#define PERF_EVENT_MISC_OVERFLOW        (1 << 2)
+#define PERF_RECORD_MISC_KERNEL          (1 << 0)
+#define PERF_RECORD_MISC_USER            (1 << 1)
+#define PERF_RECORD_MISC_OVERFLOW        (1 << 2)
 
 struct perf_event_header {
         __u32   type;
@@ -353,8 +353,8 @@ enum perf_event_type {
          *      char                            filename[];
          * };
          */
-        PERF_EVENT_MMAP                 = 1,
-        PERF_EVENT_MUNMAP               = 2,
+        PERF_RECORD_MMAP                 = 1,
+        PERF_RECORD_MUNMAP               = 2,
 
         /*
          * struct {
@@ -364,10 +364,10 @@ enum perf_event_type {
          *      char                            comm[];
          * };
          */
-        PERF_EVENT_COMM                 = 3,
+        PERF_RECORD_COMM                 = 3,
 
         /*
-         * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field
+         * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field
          * will be PERF_RECORD_*
          *
          * struct {
@@ -397,7 +397,7 @@ Notification of new events is possible through poll()/select()/epoll() and
 fcntl() managing signals.
 
 Normally a notification is generated for every page filled, however one can
-additionally set perf_counter_hw_event.wakeup_events to generate one every
+additionally set perf_event_hw_event.wakeup_events to generate one every
 so many counter overflow events.
 
 Future work will include a splice() interface to the ring-buffer.
@@ -409,11 +409,11 @@ events but does continue to exist and maintain its count value.
 
 An individual counter or counter group can be enabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
+	ioctl(fd, PERF_EVENT_IOC_ENABLE);
 
 or disabled with
 
-	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
+	ioctl(fd, PERF_EVENT_IOC_DISABLE);
 
 Enabling or disabling the leader of a group enables or disables the
 whole group; that is, while the group leader is disabled, none of the
@@ -424,16 +424,16 @@ other counter.
 
 Additionally, non-inherited overflow counters can use
 
-	ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr);
+	ioctl(fd, PERF_EVENT_IOC_REFRESH, nr);
 
 to enable a counter for 'nr' events, after which it gets disabled again.
 
 A process can enable or disable all the counter groups that are
 attached to it, using prctl:
 
-	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
 
-	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
 
 This applies to all counters on the current process, whether created
 by this process or by another, and doesn't affect any counters that
@@ -447,11 +447,11 @@ Arch requirements
 If your architecture does not have hardware performance metrics, you can
 still use the generic software counters based on hrtimers for sampling.
 
-So to start with, in order to add HAVE_PERF_COUNTERS to your Kconfig, you
+So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
 will need at least this:
-	- asm/perf_counter.h - a basic stub will suffice at first
+	- asm/perf_event.h - a basic stub will suffice at first
 	- support for atomic64 types (and associated helper functions)
-	- set_perf_counter_pending() implemented
+	- set_perf_event_pending() implemented
 
 If your architecture does have hardware capabilities, you can override the
-weak stub hw_perf_counter_init() to register hardware counters.
+weak stub hw_perf_event_init() to register hardware counters.
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 2abeb20d0bf3..8cc4623afd6f 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -52,15 +52,15 @@
 #include <sys/types.h>
 #include <sys/syscall.h>
 
-#include "../../include/linux/perf_counter.h"
+#include "../../include/linux/perf_event.h"
 #include "util/types.h"
 
 /*
- * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
+ * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
  * counters in the current task.
  */
-#define PR_TASK_PERF_COUNTERS_DISABLE   31
-#define PR_TASK_PERF_COUNTERS_ENABLE    32
+#define PR_TASK_PERF_EVENTS_DISABLE   31
+#define PR_TASK_PERF_EVENTS_ENABLE    32
 
 #ifndef NSEC_PER_SEC
 # define NSEC_PER_SEC			1000000000ULL
@@ -90,12 +90,12 @@ static inline unsigned long long rdclock(void)
 	_min1 < _min2 ? _min1 : _min2; })
 
 static inline int
-sys_perf_counter_open(struct perf_counter_attr *attr,
+sys_perf_event_open(struct perf_event_attr *attr,
 		      pid_t pid, int cpu, int group_fd,
 		      unsigned long flags)
 {
 	attr->size = sizeof(*attr);
-	return syscall(__NR_perf_counter_open, attr, pid, cpu,
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
 		       group_fd, flags);
 }
 
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 018d414a09d1..2c9c26d6ded0 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -1,5 +1,5 @@
-#ifndef __PERF_EVENT_H
-#define __PERF_EVENT_H
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
 #include "../perf.h"
 #include "util.h"
 #include <linux/list.h>
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index bb4fca3efcc3..e306857b2c2b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -9,7 +9,7 @@
 /*
  * Create new perf.data header attribute:
  */
-struct perf_header_attr *perf_header_attr__new(struct perf_counter_attr *attr)
+struct perf_header_attr *perf_header_attr__new(struct perf_event_attr *attr)
 {
 	struct perf_header_attr *self = malloc(sizeof(*self));
 
@@ -134,7 +134,7 @@ struct perf_file_section {
 };
 
 struct perf_file_attr {
-	struct perf_counter_attr	attr;
+	struct perf_event_attr	attr;
 	struct perf_file_section	ids;
 };
 
@@ -320,7 +320,7 @@ u64 perf_header__sample_type(struct perf_header *header)
 	return type;
 }
 
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header)
 {
 	int i;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 7b0e84a87179..a0761bc7863c 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -1,12 +1,12 @@
 #ifndef _PERF_HEADER_H
 #define _PERF_HEADER_H
 
-#include "../../../include/linux/perf_counter.h"
+#include "../../../include/linux/perf_event.h"
 #include <sys/types.h>
 #include "types.h"
 
 struct perf_header_attr {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	int ids, size;
 	u64 *id;
 	off_t id_offset;
@@ -34,11 +34,11 @@ char *perf_header__find_event(u64 id);
 
 
 struct perf_header_attr *
-perf_header_attr__new(struct perf_counter_attr *attr);
+perf_header_attr__new(struct perf_event_attr *attr);
 void perf_header_attr__add_id(struct perf_header_attr *self, u64 id);
 
 u64 perf_header__sample_type(struct perf_header *header);
-struct perf_counter_attr *
+struct perf_event_attr *
 perf_header__find_attr(u64 id, struct perf_header *header);
 
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 89172fd0038b..13ab4b842d49 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -10,7 +10,7 @@
 
 int					nr_counters;
 
-struct perf_counter_attr		attrs[MAX_COUNTERS];
+struct perf_event_attr		attrs[MAX_COUNTERS];
 
 struct event_symbol {
 	u8		type;
@@ -48,13 +48,13 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
-#define __PERF_COUNTER_FIELD(config, name) \
-	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
+#define __PERF_EVENT_FIELD(config, name) \
+	((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
 
-#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
-#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
-#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
-#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
+#define PERF_EVENT_RAW(config)	__PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_CONFIG(config)	__PERF_EVENT_FIELD(config, CONFIG)
+#define PERF_EVENT_TYPE(config)	__PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_ID(config)		__PERF_EVENT_FIELD(config, EVENT)
 
 static const char *hw_event_names[] = {
 	"cycles",
@@ -352,7 +352,7 @@ static int parse_aliases(const char **str, const char *names[][MAX_ALIASES], int
 }
 
 static enum event_result
-parse_generic_hw_event(const char **str, struct perf_counter_attr *attr)
+parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
 {
 	const char *s = *str;
 	int cache_type = -1, cache_op = -1, cache_result = -1;
@@ -417,7 +417,7 @@ parse_single_tracepoint_event(char *sys_name,
 			      const char *evt_name,
 			      unsigned int evt_length,
 			      char *flags,
-			      struct perf_counter_attr *attr,
+			      struct perf_event_attr *attr,
 			      const char **strp)
 {
 	char evt_path[MAXPATHLEN];
@@ -505,7 +505,7 @@ parse_subsystem_tracepoint_event(char *sys_name, char *flags)
 
 
 static enum event_result parse_tracepoint_event(const char **strp,
-				    struct perf_counter_attr *attr)
+				    struct perf_event_attr *attr)
 {
 	const char *evt_name;
 	char *flags;
@@ -563,7 +563,7 @@ static int check_events(const char *str, unsigned int i)
 }
 
 static enum event_result
-parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
+parse_symbolic_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	unsigned int i;
@@ -582,7 +582,7 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_raw_event(const char **strp, struct perf_counter_attr *attr)
+parse_raw_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	u64 config;
@@ -601,7 +601,7 @@ parse_raw_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
+parse_numeric_event(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	char *endp;
@@ -623,7 +623,7 @@ parse_numeric_event(const char **strp, struct perf_counter_attr *attr)
 }
 
 static enum event_result
-parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
+parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
 	int eu = 1, ek = 1, eh = 1;
@@ -656,7 +656,7 @@ parse_event_modifier(const char **strp, struct perf_counter_attr *attr)
  * Symbolic names are (almost) exactly matched.
  */
 static enum event_result
-parse_event_symbols(const char **str, struct perf_counter_attr *attr)
+parse_event_symbols(const char **str, struct perf_event_attr *attr)
 {
 	enum event_result ret;
 
@@ -711,7 +711,7 @@ static void store_event_type(const char *orgname)
 
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
-	struct perf_counter_attr attr;
+	struct perf_event_attr attr;
 	enum event_result ret;
 
 	if (strchr(str, ':'))
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 60704c15961f..30c608112845 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -16,7 +16,7 @@ extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 
 extern int			nr_counters;
 
-extern struct perf_counter_attr attrs[MAX_COUNTERS];
+extern struct perf_event_attr attrs[MAX_COUNTERS];
 
 extern const char *event_name(int ctr);
 extern const char *__event_name(int type, u64 config);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 1fd824c1f1c4..af4b0573b37f 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -480,12 +480,12 @@ out:
 }
 
 static struct tracepoint_path *
-get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
+get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
 {
 	struct tracepoint_path path, *ppath = &path;
 	int i;
 
-	for (i = 0; i < nb_counters; i++) {
+	for (i = 0; i < nb_events; i++) {
 		if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
 			continue;
 		ppath->next = tracepoint_id_to_path(pattrs[i].config);
@@ -496,7 +496,7 @@ get_tracepoints_path(struct perf_counter_attr *pattrs, int nb_counters)
 
 	return path.next;
 }
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events)
 {
 	char buf[BUFSIZ];
 	struct tracepoint_path *tps;
@@ -530,7 +530,7 @@ void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters)
 	page_size = getpagesize();
 	write_or_die(&page_size, 4);
 
-	tps = get_tracepoints_path(pattrs, nb_counters);
+	tps = get_tracepoints_path(pattrs, nb_events);
 
 	read_header_files();
 	read_ftrace_files(tps);
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index d35ebf1e29ff..693f815c9429 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -240,6 +240,6 @@ unsigned long long
 raw_field_value(struct event *event, const char *name, void *data);
 void *raw_field_ptr(struct event *event, const char *name, void *data);
 
-void read_tracing_data(struct perf_counter_attr *pattrs, int nb_counters);
+void read_tracing_data(struct perf_event_attr *pattrs, int nb_events);
 
 #endif /* _TRACE_EVENTS_H */
-- 
cgit v1.2.3


From 57c0c15b5244320065374ad2c54f4fbec77a6428 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 21 Sep 2009 12:20:38 +0200
Subject: perf: Tidy up after the big rename

 - provide compatibility Kconfig entry for existing PERF_COUNTERS .config's

 - provide courtesy copy of old perf_counter.h, for user-space projects

 - small indentation fixups

 - fix up MAINTAINERS

 - fix small x86 printout fallout

 - fix up small PowerPC comment fallout (use 'counter' as in register)

Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 MAINTAINERS                      |   2 +-
 arch/powerpc/include/asm/paca.h  |   2 +-
 arch/powerpc/kernel/perf_event.c |  12 +-
 arch/x86/kernel/cpu/perf_event.c |  14 +-
 include/linux/perf_counter.h     | 441 +++++++++++++++++++++++++++++++++++++++
 include/linux/perf_event.h       |  98 ++++-----
 init/Kconfig                     |  37 +++-
 kernel/perf_event.c              |   4 +-
 8 files changed, 534 insertions(+), 76 deletions(-)
 create mode 100644 include/linux/perf_counter.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 43761a00e3f1..751a307dc44e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4000,7 +4000,7 @@ S:	Maintained
 F:	include/linux/delayacct.h
 F:	kernel/delayacct.c
 
-PERFORMANCE COUNTER SUBSYSTEM
+PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
 M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@elte.hu>
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 154f405b642f..7d8514ceceae 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -122,7 +122,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;	/* PM interrupt while soft-disabled */
+	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c98321fcb459..197b7d958796 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -41,7 +41,7 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 struct power_pmu *ppmu;
 
 /*
- * Normally, to ignore kernel events we set the FCS (freeze events
+ * Normally, to ignore kernel events we set the FCS (freeze counters
  * in supervisor mode) bit in MMCR0, but if the kernel runs with the
  * hypervisor bit set in the MSR, or if we are running on a processor
  * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
@@ -159,7 +159,7 @@ void perf_event_print_debug(void)
 }
 
 /*
- * Read one performance monitor event (PMC).
+ * Read one performance monitor counter (PMC).
  */
 static unsigned long read_pmc(int idx)
 {
@@ -409,7 +409,7 @@ static void power_pmu_read(struct perf_event *event)
 		val = read_pmc(event->hw.idx);
 	} while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
 
-	/* The events are only 32 bits wide */
+	/* The counters are only 32 bits wide */
 	delta = (val - prev) & 0xfffffffful;
 	atomic64_add(delta, &event->count);
 	atomic64_sub(delta, &event->hw.period_left);
@@ -543,7 +543,7 @@ void hw_perf_disable(void)
 		}
 
 		/*
-		 * Set the 'freeze events' bit.
+		 * Set the 'freeze counters' bit.
 		 * The barrier is to make sure the mtspr has been
 		 * executed and the PMU has frozen the events
 		 * before we return.
@@ -1124,7 +1124,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 }
 
 /*
- * A event has overflowed; update its count and record
+ * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
  * here so there is no possibility of being interrupted.
  */
@@ -1271,7 +1271,7 @@ static void perf_event_interrupt(struct pt_regs *regs)
 
 	/*
 	 * Reset MMCR0 to its normal value.  This will set PMXE and
-	 * clear FC (freeze events) and PMAO (perf mon alert occurred)
+	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
 	 * and thus allow interrupts to occur again.
 	 * XXX might want to use MSR.PM to keep the events frozen until
 	 * we get back out of this interrupt.
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0d03629fb1a5..a3c7adb06b78 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2081,13 +2081,13 @@ void __init init_hw_perf_events(void)
 	perf_events_lapic_init();
 	register_die_notifier(&perf_event_nmi_notifier);
 
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.event_bits);
-	pr_info("... generic events:        %d\n",     x86_pmu.num_events);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.event_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose events:  %d\n",     x86_pmu.num_events_fixed);
-	pr_info("... event mask:            %016Lx\n", perf_event_mask);
+	pr_info("... version:                %d\n",     x86_pmu.version);
+	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:             %016Lx\n", perf_event_mask);
 }
 
 static inline void x86_pmu_read(struct perf_event *event)
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
new file mode 100644
index 000000000000..368bd70f1d2d
--- /dev/null
+++ b/include/linux/perf_counter.h
@@ -0,0 +1,441 @@
+/*
+ *  NOTE: this file will be removed in a future kernel release, it is
+ *  provided as a courtesy copy of user-space code that relies on the
+ *  old (pre-rename) symbols and constants.
+ *
+ *  Performance events:
+ *
+ *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
+ *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
+ *
+ *  Data type definitions, declarations, prototypes.
+ *
+ *    Started by: Thomas Gleixner and Ingo Molnar
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_PERF_COUNTER_H
+#define _LINUX_PERF_COUNTER_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+	PERF_TYPE_HARDWARE			= 0,
+	PERF_TYPE_SOFTWARE			= 1,
+	PERF_TYPE_TRACEPOINT			= 2,
+	PERF_TYPE_HW_CACHE			= 3,
+	PERF_TYPE_RAW				= 4,
+
+	PERF_TYPE_MAX,				/* non-ABI */
+};
+
+/*
+ * Generalized performance counter event types, used by the
+ * attr.event_id parameter of the sys_perf_counter_open()
+ * syscall:
+ */
+enum perf_hw_id {
+	/*
+	 * Common hardware events, generalized by the kernel:
+	 */
+	PERF_COUNT_HW_CPU_CYCLES		= 0,
+	PERF_COUNT_HW_INSTRUCTIONS		= 1,
+	PERF_COUNT_HW_CACHE_REFERENCES		= 2,
+	PERF_COUNT_HW_CACHE_MISSES		= 3,
+	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
+	PERF_COUNT_HW_BRANCH_MISSES		= 5,
+	PERF_COUNT_HW_BUS_CYCLES		= 6,
+
+	PERF_COUNT_HW_MAX,			/* non-ABI */
+};
+
+/*
+ * Generalized hardware cache counters:
+ *
+ *       { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x
+ *       { read, write, prefetch } x
+ *       { accesses, misses }
+ */
+enum perf_hw_cache_id {
+	PERF_COUNT_HW_CACHE_L1D			= 0,
+	PERF_COUNT_HW_CACHE_L1I			= 1,
+	PERF_COUNT_HW_CACHE_LL			= 2,
+	PERF_COUNT_HW_CACHE_DTLB		= 3,
+	PERF_COUNT_HW_CACHE_ITLB		= 4,
+	PERF_COUNT_HW_CACHE_BPU			= 5,
+
+	PERF_COUNT_HW_CACHE_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+	PERF_COUNT_HW_CACHE_OP_READ		= 0,
+	PERF_COUNT_HW_CACHE_OP_WRITE		= 1,
+	PERF_COUNT_HW_CACHE_OP_PREFETCH		= 2,
+
+	PERF_COUNT_HW_CACHE_OP_MAX,		/* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+	PERF_COUNT_HW_CACHE_RESULT_ACCESS	= 0,
+	PERF_COUNT_HW_CACHE_RESULT_MISS		= 1,
+
+	PERF_COUNT_HW_CACHE_RESULT_MAX,		/* non-ABI */
+};
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+	PERF_COUNT_SW_CPU_CLOCK			= 0,
+	PERF_COUNT_SW_TASK_CLOCK		= 1,
+	PERF_COUNT_SW_PAGE_FAULTS		= 2,
+	PERF_COUNT_SW_CONTEXT_SWITCHES		= 3,
+	PERF_COUNT_SW_CPU_MIGRATIONS		= 4,
+	PERF_COUNT_SW_PAGE_FAULTS_MIN		= 5,
+	PERF_COUNT_SW_PAGE_FAULTS_MAJ		= 6,
+
+	PERF_COUNT_SW_MAX,			/* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_counter_sample_format {
+	PERF_SAMPLE_IP				= 1U << 0,
+	PERF_SAMPLE_TID				= 1U << 1,
+	PERF_SAMPLE_TIME			= 1U << 2,
+	PERF_SAMPLE_ADDR			= 1U << 3,
+	PERF_SAMPLE_READ			= 1U << 4,
+	PERF_SAMPLE_CALLCHAIN			= 1U << 5,
+	PERF_SAMPLE_ID				= 1U << 6,
+	PERF_SAMPLE_CPU				= 1U << 7,
+	PERF_SAMPLE_PERIOD			= 1U << 8,
+	PERF_SAMPLE_STREAM_ID			= 1U << 9,
+	PERF_SAMPLE_RAW				= 1U << 10,
+
+	PERF_SAMPLE_MAX = 1U << 11,		/* non-ABI */
+};
+
+/*
+ * The format of the data returned by read() on a perf counter fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
+ *
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_counter_read_format {
+	PERF_FORMAT_TOTAL_TIME_ENABLED		= 1U << 0,
+	PERF_FORMAT_TOTAL_TIME_RUNNING		= 1U << 1,
+	PERF_FORMAT_ID				= 1U << 2,
+	PERF_FORMAT_GROUP			= 1U << 3,
+
+	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
+
+/*
+ * Hardware event to monitor via a performance monitoring counter:
+ */
+struct perf_counter_attr {
+
+	/*
+	 * Major type: hardware/software/tracepoint/etc.
+	 */
+	__u32			type;
+
+	/*
+	 * Size of the attr structure, for fwd/bwd compat.
+	 */
+	__u32			size;
+
+	/*
+	 * Type specific configuration information.
+	 */
+	__u64			config;
+
+	union {
+		__u64		sample_period;
+		__u64		sample_freq;
+	};
+
+	__u64			sample_type;
+	__u64			read_format;
+
+	__u64			disabled       :  1, /* off by default        */
+				inherit	       :  1, /* children inherit it   */
+				pinned	       :  1, /* must always be on PMU */
+				exclusive      :  1, /* only group on PMU     */
+				exclude_user   :  1, /* don't count user      */
+				exclude_kernel :  1, /* ditto kernel          */
+				exclude_hv     :  1, /* ditto hypervisor      */
+				exclude_idle   :  1, /* don't count when idle */
+				mmap           :  1, /* include mmap data     */
+				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
+				inherit_stat   :  1, /* per task counts       */
+				enable_on_exec :  1, /* next exec enables     */
+				task           :  1, /* trace fork/exit       */
+				watermark      :  1, /* wakeup_watermark      */
+
+				__reserved_1   : 49;
+
+	union {
+		__u32		wakeup_events;	  /* wakeup every n events */
+		__u32		wakeup_watermark; /* bytes before wakeup   */
+	};
+	__u32			__reserved_2;
+
+	__u64			__reserved_3;
+};
+
+/*
+ * Ioctls that can be done on a perf counter fd:
+ */
+#define PERF_COUNTER_IOC_ENABLE		_IO ('$', 0)
+#define PERF_COUNTER_IOC_DISABLE	_IO ('$', 1)
+#define PERF_COUNTER_IOC_REFRESH	_IO ('$', 2)
+#define PERF_COUNTER_IOC_RESET		_IO ('$', 3)
+#define PERF_COUNTER_IOC_PERIOD		_IOW('$', 4, u64)
+#define PERF_COUNTER_IOC_SET_OUTPUT	_IO ('$', 5)
+
+enum perf_counter_ioc_flags {
+	PERF_IOC_FLAG_GROUP		= 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_counter_mmap_page {
+	__u32	version;		/* version number of this structure */
+	__u32	compat_version;		/* lowest version this is compat with */
+
+	/*
+	 * Bits needed to read the hw counters in user-space.
+	 *
+	 *   u32 seq;
+	 *   s64 count;
+	 *
+	 *   do {
+	 *     seq = pc->lock;
+	 *
+	 *     barrier()
+	 *     if (pc->index) {
+	 *       count = pmc_read(pc->index - 1);
+	 *       count += pc->offset;
+	 *     } else
+	 *       goto regular_read;
+	 *
+	 *     barrier();
+	 *   } while (pc->lock != seq);
+	 *
+	 * NOTE: for obvious reason this only works on self-monitoring
+	 *       processes.
+	 */
+	__u32	lock;			/* seqlock for synchronization */
+	__u32	index;			/* hardware counter identifier */
+	__s64	offset;			/* add to hardware counter value */
+	__u64	time_enabled;		/* time counter active */
+	__u64	time_running;		/* time counter on cpu */
+
+		/*
+		 * Hole for extension of the self monitor capabilities
+		 */
+
+	__u64	__reserved[123];	/* align to 1k */
+
+	/*
+	 * Control data for the mmap() data buffer.
+	 *
+	 * User-space reading the @data_head value should issue an rmb(), on
+	 * SMP capable platforms, after reading this value -- see
+	 * perf_counter_wakeup().
+	 *
+	 * When the mapping is PROT_WRITE the @data_tail value should be
+	 * written by userspace to reflect the last read data. In this case
+	 * the kernel will not over-write unread data.
+	 */
+	__u64   data_head;		/* head in the data section */
+	__u64	data_tail;		/* user-space written tail */
+};
+
+#define PERF_EVENT_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_EVENT_MISC_CPUMODE_UNKNOWN		(0 << 0)
+#define PERF_EVENT_MISC_KERNEL			(1 << 0)
+#define PERF_EVENT_MISC_USER			(2 << 0)
+#define PERF_EVENT_MISC_HYPERVISOR		(3 << 0)
+
+struct perf_event_header {
+	__u32	type;
+	__u16	misc;
+	__u16	size;
+};
+
+enum perf_event_type {
+
+	/*
+	 * The MMAP events record the PROT_EXEC mappings so that we can
+	 * correlate userspace IPs to code. They have the following structure:
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	u64				addr;
+	 *	u64				len;
+	 *	u64				pgoff;
+	 *	char				filename[];
+	 * };
+	 */
+	PERF_EVENT_MMAP			= 1,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
+	 * };
+	 */
+	PERF_EVENT_LOST			= 2,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u32				pid, tid;
+	 *	char				comm[];
+	 * };
+	 */
+	PERF_EVENT_COMM			= 3,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	u64				time;
+	 * };
+	 */
+	PERF_EVENT_EXIT			= 4,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u64				time;
+	 *	u64				id;
+	 *	u64				stream_id;
+	 * };
+	 */
+	PERF_EVENT_THROTTLE		= 5,
+	PERF_EVENT_UNTHROTTLE		= 6,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, ppid;
+	 *	u32				tid, ptid;
+	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 * };
+	 */
+	PERF_EVENT_FORK			= 7,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid, tid;
+	 *
+	 *	struct read_format		values;
+	 * };
+	 */
+	PERF_EVENT_READ			= 8,
+
+	/*
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	{ u64			ip;	  } && PERF_SAMPLE_IP
+	 *	{ u32			pid, tid; } && PERF_SAMPLE_TID
+	 *	{ u64			time;     } && PERF_SAMPLE_TIME
+	 *	{ u64			addr;     } && PERF_SAMPLE_ADDR
+	 *	{ u64			id;	  } && PERF_SAMPLE_ID
+	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
+	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *
+	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
+	 *
+	 *	{ u64			nr,
+	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
+	 *
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
+	 *
+	 *	{ u32			size;
+	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
+	 * };
+	 */
+	PERF_EVENT_SAMPLE		= 9,
+
+	PERF_EVENT_MAX,			/* non-ABI */
+};
+
+enum perf_callchain_context {
+	PERF_CONTEXT_HV			= (__u64)-32,
+	PERF_CONTEXT_KERNEL		= (__u64)-128,
+	PERF_CONTEXT_USER		= (__u64)-512,
+
+	PERF_CONTEXT_GUEST		= (__u64)-2048,
+	PERF_CONTEXT_GUEST_KERNEL	= (__u64)-2176,
+	PERF_CONTEXT_GUEST_USER		= (__u64)-2560,
+
+	PERF_CONTEXT_MAX		= (__u64)-4095,
+};
+
+#define PERF_FLAG_FD_NO_GROUP		(1U << 0)
+#define PERF_FLAG_FD_OUTPUT		(1U << 1)
+
+/*
+ * In case some app still references the old symbols:
+ */
+
+#define __NR_perf_counter_open		__NR_perf_event_open
+
+#define PR_TASK_PERF_COUNTERS_DISABLE	PR_TASK_PERF_EVENTS_DISABLE
+#define PR_TASK_PERF_COUNTERS_ENABLE	PR_TASK_PERF_EVENTS_ENABLE
+
+#endif /* _LINUX_PERF_COUNTER_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ae9d9ed6df2a..acefaf71e6dd 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1,15 +1,15 @@
 /*
- *  Performance events:
+ * Performance events:
  *
  *    Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
  *    Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar
  *    Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra
  *
- *  Data type definitions, declarations, prototypes.
+ * Data type definitions, declarations, prototypes.
  *
  *    Started by: Thomas Gleixner and Ingo Molnar
  *
- *  For licencing details see kernel-base/COPYING
+ * For licencing details see kernel-base/COPYING
  */
 #ifndef _LINUX_PERF_EVENT_H
 #define _LINUX_PERF_EVENT_H
@@ -131,19 +131,19 @@ enum perf_event_sample_format {
  * as specified by attr.read_format:
  *
  * struct read_format {
- * 	{ u64		value;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		id;           } && PERF_FORMAT_ID
- * 	} && !PERF_FORMAT_GROUP
+ *	{ u64		value;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		id;           } && PERF_FORMAT_ID
+ *	} && !PERF_FORMAT_GROUP
  *
- * 	{ u64		nr;
- * 	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
- * 	  { u64		time_running; } && PERF_FORMAT_RUNNING
- * 	  { u64		value;
- * 	    { u64	id;           } && PERF_FORMAT_ID
- * 	  }		cntr[nr];
- * 	} && PERF_FORMAT_GROUP
+ *	{ u64		nr;
+ *	  { u64		time_enabled; } && PERF_FORMAT_ENABLED
+ *	  { u64		time_running; } && PERF_FORMAT_RUNNING
+ *	  { u64		value;
+ *	    { u64	id;           } && PERF_FORMAT_ID
+ *	  }		cntr[nr];
+ *	} && PERF_FORMAT_GROUP
  * };
  */
 enum perf_event_read_format {
@@ -152,7 +152,7 @@ enum perf_event_read_format {
 	PERF_FORMAT_ID				= 1U << 2,
 	PERF_FORMAT_GROUP			= 1U << 3,
 
-	PERF_FORMAT_MAX = 1U << 4, 		/* non-ABI */
+	PERF_FORMAT_MAX = 1U << 4,		/* non-ABI */
 };
 
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
@@ -216,8 +216,8 @@ struct perf_event_attr {
  * Ioctls that can be done on a perf event fd:
  */
 #define PERF_EVENT_IOC_ENABLE		_IO ('$', 0)
-#define PERF_EVENT_IOC_DISABLE	_IO ('$', 1)
-#define PERF_EVENT_IOC_REFRESH	_IO ('$', 2)
+#define PERF_EVENT_IOC_DISABLE		_IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH		_IO ('$', 2)
 #define PERF_EVENT_IOC_RESET		_IO ('$', 3)
 #define PERF_EVENT_IOC_PERIOD		_IOW('$', 4, u64)
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
@@ -314,9 +314,9 @@ enum perf_event_type {
 
 	/*
 	 * struct {
-	 * 	struct perf_event_header	header;
-	 * 	u64				id;
-	 * 	u64				lost;
+	 *	struct perf_event_header	header;
+	 *	u64				id;
+	 *	u64				lost;
 	 * };
 	 */
 	PERF_RECORD_LOST			= 2,
@@ -383,23 +383,23 @@ enum perf_event_type {
 	 *	{ u64			id;	  } && PERF_SAMPLE_ID
 	 *	{ u64			stream_id;} && PERF_SAMPLE_STREAM_ID
 	 *	{ u32			cpu, res; } && PERF_SAMPLE_CPU
-	 * 	{ u64			period;   } && PERF_SAMPLE_PERIOD
+	 *	{ u64			period;   } && PERF_SAMPLE_PERIOD
 	 *
 	 *	{ struct read_format	values;	  } && PERF_SAMPLE_READ
 	 *
 	 *	{ u64			nr,
 	 *	  u64			ips[nr];  } && PERF_SAMPLE_CALLCHAIN
 	 *
-	 * 	#
-	 * 	# The RAW record below is opaque data wrt the ABI
-	 * 	#
-	 * 	# That is, the ABI doesn't make any promises wrt to
-	 * 	# the stability of its content, it may vary depending
-	 * 	# on event_id, hardware, kernel version and phase of
-	 * 	# the moon.
-	 * 	#
-	 * 	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
-	 * 	#
+	 *	#
+	 *	# The RAW record below is opaque data wrt the ABI
+	 *	#
+	 *	# That is, the ABI doesn't make any promises wrt to
+	 *	# the stability of its content, it may vary depending
+	 *	# on event, hardware, kernel version and phase of
+	 *	# the moon.
+	 *	#
+	 *	# In other words, PERF_SAMPLE_RAW contents are not an ABI.
+	 *	#
 	 *
 	 *	{ u32			size;
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
@@ -503,10 +503,10 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
-	PERF_EVENT_STATE_ERROR	= -2,
+	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
-	PERF_EVENT_STATE_ACTIVE	=  1,
+	PERF_EVENT_STATE_ACTIVE		=  1,
 };
 
 struct file;
@@ -529,7 +529,7 @@ struct perf_mmap_data {
 
 	long				watermark;	/* wakeup watermark  */
 
-	struct perf_event_mmap_page   *user_page;
+	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
 
@@ -694,14 +694,14 @@ struct perf_cpu_context {
 };
 
 struct perf_output_handle {
-	struct perf_event	*event;
-	struct perf_mmap_data	*data;
-	unsigned long		head;
-	unsigned long		offset;
-	int			nmi;
-	int			sample;
-	int			locked;
-	unsigned long		flags;
+	struct perf_event		*event;
+	struct perf_mmap_data		*data;
+	unsigned long			head;
+	unsigned long			offset;
+	int				nmi;
+	int				sample;
+	int				locked;
+	unsigned long			flags;
 };
 
 #ifdef CONFIG_PERF_EVENTS
@@ -829,22 +829,22 @@ static inline void
 perf_event_task_sched_out(struct task_struct *task,
 			    struct task_struct *next, int cpu)		{ }
 static inline void
-perf_event_task_tick(struct task_struct *task, int cpu)		{ }
+perf_event_task_tick(struct task_struct *task, int cpu)			{ }
 static inline int perf_event_init_task(struct task_struct *child)	{ return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)	{ }
 static inline void perf_event_free_task(struct task_struct *task)	{ }
-static inline void perf_event_do_pending(void)			{ }
-static inline void perf_event_print_debug(void)			{ }
+static inline void perf_event_do_pending(void)				{ }
+static inline void perf_event_print_debug(void)				{ }
 static inline void perf_disable(void)					{ }
 static inline void perf_enable(void)					{ }
-static inline int perf_event_task_disable(void)	{ return -EINVAL; }
-static inline int perf_event_task_enable(void)	{ return -EINVAL; }
+static inline int perf_event_task_disable(void)				{ return -EINVAL; }
+static inline int perf_event_task_enable(void)				{ return -EINVAL; }
 
 static inline void
 perf_sw_event(u32 event_id, u64 nr, int nmi,
 		     struct pt_regs *regs, u64 addr)			{ }
 
-static inline void perf_event_mmap(struct vm_area_struct *vma)	{ }
+static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
 static inline void perf_event_init(void)				{ }
diff --git a/init/Kconfig b/init/Kconfig
index cfdf5c322806..706728be312f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -920,26 +920,31 @@ config HAVE_PERF_EVENTS
 	help
 	  See tools/perf/design.txt for details.
 
-menu "Performance Counters"
+menu "Kernel Performance Events And Counters"
 
 config PERF_EVENTS
-	bool "Kernel Performance Counters"
-	default y if PROFILING
+	bool "Kernel performance events and counters"
+	default y if (PROFILING || PERF_COUNTERS)
 	depends on HAVE_PERF_EVENTS
 	select ANON_INODES
 	help
-	  Enable kernel support for performance counter hardware.
+	  Enable kernel support for various performance events provided
+	  by software and hardware.
 
-	  Performance counters are special hardware registers available
-	  on most modern CPUs. These registers count the number of certain
+	  Software events are supported either build-in or via the
+	  use of generic tracepoints.
+
+	  Most modern CPUs support performance events via performance
+	  counter registers. These registers count the number of certain
 	  types of hw events: such as instructions executed, cachemisses
 	  suffered, or branches mis-predicted - without slowing down the
 	  kernel or applications. These registers can also trigger interrupts
 	  when a threshold number of events have passed - and can thus be
 	  used to profile the code that runs on that CPU.
 
-	  The Linux Performance Counter subsystem provides an abstraction of
-	  these hardware capabilities, available via a system call. It
+	  The Linux Performance Event subsystem provides an abstraction of
+	  these software and hardware cevent apabilities, available via a
+	  system call and used by the "perf" utility in tools/perf/. It
 	  provides per task and per CPU counters, and it provides event
 	  capabilities on top of those.
 
@@ -950,14 +955,26 @@ config EVENT_PROFILE
 	depends on PERF_EVENTS && EVENT_TRACING
 	default y
 	help
-	 Allow the use of tracepoints as software performance counters.
+	 Allow the use of tracepoints as software performance events.
 
-	 When this is enabled, you can create perf counters based on
+	 When this is enabled, you can create perf events based on
 	 tracepoints using PERF_TYPE_TRACEPOINT and the tracepoint ID
 	 found in debugfs://tracing/events/*/*/id. (The -e/--events
 	 option to the perf tool can parse and interpret symbolic
 	 tracepoints, in the subsystem:tracepoint_name format.)
 
+config PERF_COUNTERS
+	bool "Kernel performance counters (old config option)"
+	depends on HAVE_PERF_EVENTS
+	help
+	  This config has been obsoleted by the PERF_EVENTS
+	  config option - please see that one for details.
+
+	  It has no effect on the kernel whether you enable
+	  it or not, it is a compatibility placeholder.
+
+	  Say N if unsure.
+
 endmenu
 
 config VM_EVENT_COUNTERS
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 6e8b99a04e1e..76ac4db405e9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1,12 +1,12 @@
 /*
- * Performance event core code
+ * Performance events core code:
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
  *  Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *  Copyright  �  2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
  *
- *  For licensing details see kernel-base/COPYING
+ * For licensing details see kernel-base/COPYING
  */
 
 #include <linux/fs.h>
-- 
cgit v1.2.3


From bcf56442429a15bdd6e1d81a9d4c89f93a44fdf7 Mon Sep 17 00:00:00 2001
From: GeunSik Lim <leemgs1@gmail.com>
Date: Tue, 16 Jun 2009 10:26:25 +0200
Subject: trivial: change address of the libcap source.

This is patch to change ftp site of the libcap source.
"ftp://linux.kernel.org" address does not exist.

Signed-off-by: GeunSik Lim <geunsik.lim@samsung.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/capability.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index c3021105edc0..c8f2a5f70ed5 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -7,7 +7,7 @@
  *
  * See here for the libcap library ("POSIX draft" compliance):
  *
- * ftp://linux.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
+ * ftp://www.kernel.org/pub/linux/libs/security/linux-privs/kernel-2.6/
  */
 
 #ifndef _LINUX_CAPABILITY_H
-- 
cgit v1.2.3


From 47a0dfaad9eb82b16193477cf99c2462af03c329 Mon Sep 17 00:00:00 2001
From: Ori Avtalion <ori@avtalion.name>
Date: Tue, 16 Jun 2009 12:17:53 +0300
Subject: trivial: fix typo in namei.h comment

Signed-off-by: Ori Avtalion <ori@avtalion.name>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/namei.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/namei.h b/include/linux/namei.h
index d870ae2faedc..ec0f607b364a 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -40,7 +40,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND};
  *  - follow links at the end
  *  - require a directory
  *  - ending slashes ok even for nonexistent files
- *  - internal "there are more path compnents" flag
+ *  - internal "there are more path components" flag
  *  - locked when lookup done with dcache_lock held
  *  - dentry cache is untrusted; force a real lookup
  */
-- 
cgit v1.2.3


From b9049df5a0e7f35456c06b949b08b898b9c2e7bc Mon Sep 17 00:00:00 2001
From: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Date: Tue, 23 Jun 2009 12:09:29 +0200
Subject: Change "useing" -> "using".

Signed-off-by: Dmitri Vorobiev <dmitri.vorobiev@movial.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/workqueue.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6273fa97b527..7ef0c7b94f31 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -94,7 +94,7 @@ struct execute_work {
 /*
  * initialize all of a work item in one go
  *
- * NOTE! No point in using "atomic_long_set()": useing a direct
+ * NOTE! No point in using "atomic_long_set()": using a direct
  * assignment of the work data initializer allows the compiler
  * to generate better code.
  */
-- 
cgit v1.2.3


From 411c94038594b2a3fd123d09bdec3fe2500e383d Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Tue, 7 Jul 2009 15:24:23 +0530
Subject: trivial: fix typo "for for" in multiple files

trivial: fix typo "for for" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/filesystems/nfsroot.txt            | 2 +-
 Documentation/powerpc/dts-bindings/marvell.txt   | 2 +-
 arch/blackfin/mach-bf538/include/mach/defBF539.h | 2 +-
 arch/powerpc/kernel/udbg_16550.c                 | 2 +-
 arch/powerpc/platforms/powermac/udbg_scc.c       | 2 +-
 drivers/edac/edac_core.h                         | 2 +-
 drivers/infiniband/hw/ipath/ipath_iba6110.c      | 2 +-
 drivers/isdn/i4l/isdn_common.c                   | 4 ++--
 drivers/md/md.h                                  | 2 +-
 drivers/net/bnx2x_reg.h                          | 2 +-
 drivers/net/rionet.c                             | 2 +-
 drivers/usb/host/ehci-pci.c                      | 2 +-
 drivers/usb/host/ehci.h                          | 2 +-
 fs/xfs/xfs_fs.h                                  | 2 +-
 include/linux/usb.h                              | 2 +-
 15 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/nfsroot.txt b/Documentation/filesystems/nfsroot.txt
index 68baddf3c3e0..3ba0b945aaf8 100644
--- a/Documentation/filesystems/nfsroot.txt
+++ b/Documentation/filesystems/nfsroot.txt
@@ -105,7 +105,7 @@ ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>
 		the client address and this parameter is NOT empty only
 		replies from the specified server are accepted.
 
-		Only required for for NFS root. That is autoconfiguration
+		Only required for NFS root. That is autoconfiguration
 		will not be triggered if it is missing and NFS root is not
 		in operation.
 
diff --git a/Documentation/powerpc/dts-bindings/marvell.txt b/Documentation/powerpc/dts-bindings/marvell.txt
index 3708a2fd4747..f1533d91953a 100644
--- a/Documentation/powerpc/dts-bindings/marvell.txt
+++ b/Documentation/powerpc/dts-bindings/marvell.txt
@@ -32,7 +32,7 @@ prefixed with the string "marvell,", for Marvell Technology Group Ltd.
       devices.  This field represents the number of cells needed to
       represent the address of the memory-mapped registers of devices
       within the system controller chip.
-    - #size-cells : Size representation for for the memory-mapped
+    - #size-cells : Size representation for the memory-mapped
       registers within the system controller chip.
     - #interrupt-cells : Defines the width of cells used to represent
       interrupts.
diff --git a/arch/blackfin/mach-bf538/include/mach/defBF539.h b/arch/blackfin/mach-bf538/include/mach/defBF539.h
index bdc330cd0e1c..1c58914a8740 100644
--- a/arch/blackfin/mach-bf538/include/mach/defBF539.h
+++ b/arch/blackfin/mach-bf538/include/mach/defBF539.h
@@ -2325,7 +2325,7 @@
 #define	AMBEN_B0_B1	0x0004	/* Enable Asynchronous Memory Banks 0 &	1 only */
 #define	AMBEN_B0_B1_B2	0x0006	/* Enable Asynchronous Memory Banks 0, 1, and 2 */
 #define	AMBEN_ALL	0x0008	/* Enable Asynchronous Memory Banks (all) 0, 1,	2, and 3 */
-#define	CDPRIO		0x0100	/* DMA has priority over core for for external accesses */
+#define	CDPRIO		0x0100	/* DMA has priority over core for external accesses */
 
 /* EBIU_AMGCTL Bit Positions */
 #define	AMCKEN_P		0x0000	/* Enable CLKOUT */
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index acb74a17bbbf..b4b167b33643 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for NS16550 compatable serial ports
+ * udbg for NS16550 compatable serial ports
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 572771fd8463..9490157da62e 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -1,5 +1,5 @@
 /*
- * udbg for for zilog scc ports as found on Apple PowerMacs
+ * udbg for zilog scc ports as found on Apple PowerMacs
  *
  * Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
  *
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 871c13b4c148..12f355cafdbe 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -286,7 +286,7 @@ enum scrub_type {
  *			is irrespective of the memory devices being mounted
  *			on both sides of the memory stick.
  *
- * Socket set:		All of the memory sticks that are required for for
+ * Socket set:		All of the memory sticks that are required for
  *			a single memory access or all of the memory sticks
  *			spanned by a chip-select row.  A single socket set
  *			has two chip-select rows and if double-sided sticks
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 02831ad070b8..4bd39c8af80f 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -809,7 +809,7 @@ static int ipath_setup_ht_reset(struct ipath_devdata *dd)
  * errors.  We only bother to do this at load time, because it's OK if
  * it happened before we were loaded (first time after boot/reset),
  * but any time after that, it's fatal anyway.  Also need to not check
- * for for upper byte errors if we are in 8 bit mode, so figure out
+ * for upper byte errors if we are in 8 bit mode, so figure out
  * our width.  For now, at least, also complain if it's 8 bit.
  */
 static void slave_or_pri_blk(struct ipath_devdata *dd, struct pci_dev *pdev,
diff --git a/drivers/isdn/i4l/isdn_common.c b/drivers/isdn/i4l/isdn_common.c
index 7188c59a76ff..adb1e8c36b46 100644
--- a/drivers/isdn/i4l/isdn_common.c
+++ b/drivers/isdn/i4l/isdn_common.c
@@ -761,7 +761,7 @@ isdn_getnum(char **p)
  * Be aware that this is not an atomic operation when sleep != 0, even though 
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
@@ -873,7 +873,7 @@ isdn_readbchan(int di, int channel, u_char * buf, u_char * fp, int len, wait_que
  * Be aware that this is not an atomic operation when sleep != 0, even though
  * interrupts are turned off! Well, like that we are currently only called
  * on behalf of a read system call on raw device files (which are documented
- * to be dangerous and for for debugging purpose only). The inode semaphore
+ * to be dangerous and for debugging purpose only). The inode semaphore
  * takes care that this is not called for the same minor device number while
  * we are sleeping, but access is not serialized against simultaneous read()
  * from the corresponding ttyI device. Can other ugly events, like changes
diff --git a/drivers/md/md.h b/drivers/md/md.h
index f8fc188bc762..f55d2ff95133 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -201,7 +201,7 @@ struct mddev_s
 	 * INTR:     resync needs to be aborted for some reason
 	 * DONE:     thread is done and is waiting to be reaped
 	 * REQUEST:  user-space has requested a sync (used with SYNC)
-	 * CHECK:    user-space request for for check-only, no repair
+	 * CHECK:    user-space request for check-only, no repair
 	 * RESHAPE:  A reshape is happening
 	 *
 	 * If neither SYNC or RESHAPE are set, then it is a recovery.
diff --git a/drivers/net/bnx2x_reg.h b/drivers/net/bnx2x_reg.h
index 0695be14cf91..aa76cbada5e2 100644
--- a/drivers/net/bnx2x_reg.h
+++ b/drivers/net/bnx2x_reg.h
@@ -3122,7 +3122,7 @@
    The fields are:[4:0] - tail pointer; [10:5] - Link List size; 15:11] -
    header pointer. */
 #define TCM_REG_XX_TABLE					 0x50240
-/* [RW 4] Load value for for cfc ac credit cnt. */
+/* [RW 4] Load value for cfc ac credit cnt. */
 #define TM_REG_CFC_AC_CRDCNT_VAL				 0x164208
 /* [RW 4] Load value for cfc cld credit cnt. */
 #define TM_REG_CFC_CLD_CRDCNT_VAL				 0x164210
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index bc98e7f69ee9..ede937ee50c7 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -72,7 +72,7 @@ static int rionet_check = 0;
 static int rionet_capable = 1;
 
 /*
- * This is a fast lookup table for for translating TX
+ * This is a fast lookup table for translating TX
  * Ethernet packets into a destination RIO device. It
  * could be made into a hash table to save memory depending
  * on system trade-offs.
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index c2f1b7df918c..b5b83c43898a 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -242,7 +242,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 	 * System suspend currently expects to be able to suspend the entire
 	 * device tree, device-at-a-time.  If we failed selective suspend
 	 * reports, system suspend would fail; so the root hub code must claim
-	 * success.  That's lying to usbcore, and it matters for for runtime
+	 * success.  That's lying to usbcore, and it matters for runtime
 	 * PM scenarios with selective suspend and remote wakeup...
 	 */
 	if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev))
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 2bfff30f4704..48b9e889a18b 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -37,7 +37,7 @@ typedef __u16 __bitwise __hc16;
 #define __hc16	__le16
 #endif
 
-/* statistics can be kept for for tuning/monitoring */
+/* statistics can be kept for tuning/monitoring */
 struct ehci_stats {
 	/* irq usage */
 	unsigned long		normal;
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index c4ea51b55dce..f52ac276277e 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -117,7 +117,7 @@ struct getbmapx {
 #define BMV_IF_VALID	\
 	(BMV_IF_ATTRFORK|BMV_IF_NO_DMAPI_READ|BMV_IF_PREALLOC|BMV_IF_DELALLOC)
 
-/*	bmv_oflags values - returned for for each non-header segment */
+/*	bmv_oflags values - returned for each non-header segment */
 #define BMV_OF_PREALLOC		0x1	/* segment = unwritten pre-allocation */
 #define BMV_OF_DELALLOC		0x2	/* segment = delayed allocation */
 #define BMV_OF_LAST		0x4	/* segment is the last in the file */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a8fe05f224e5..19fabc487beb 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1071,7 +1071,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * @start_frame: Returns the initial frame for isochronous transfers.
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
- *	transfers.  The units are frames (milliseconds) for for full and low
+ *	transfers.  The units are frames (milliseconds) for full and low
  *	speed devices, and microframes (1/8 millisecond) for highspeed ones.
  * @error_count: Returns the number of ISO transfers that reported errors.
  * @context: For use in completion functions.  This normally points to
-- 
cgit v1.2.3


From fd589a8f0a13f53a2dd580b1fe170633cf6b095f Mon Sep 17 00:00:00 2001
From: Anand Gadiyar <gadiyar@ti.com>
Date: Thu, 16 Jul 2009 17:13:03 +0200
Subject: trivial: fix typo "to to" in multiple files

Signed-off-by: Anand Gadiyar <gadiyar@ti.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/hwmon/pc87427                  | 2 +-
 Documentation/networking/regulatory.txt      | 2 +-
 Documentation/scsi/scsi_fc_transport.txt     | 2 +-
 arch/ia64/ia32/sys_ia32.c                    | 2 +-
 arch/um/include/shared/ptrace_user.h         | 2 +-
 drivers/char/agp/uninorth-agp.c              | 2 +-
 drivers/gpu/drm/mga/mga_state.c              | 4 ++--
 drivers/lguest/page_tables.c                 | 2 +-
 drivers/media/video/gspca/m5602/m5602_core.c | 2 +-
 drivers/mmc/host/mxcmmc.c                    | 2 +-
 drivers/mtd/maps/ixp2000.c                   | 2 +-
 drivers/mtd/ubi/eba.c                        | 2 +-
 drivers/mtd/ubi/ubi.h                        | 2 +-
 drivers/net/bonding/bond_3ad.c               | 2 +-
 drivers/net/e1000/e1000_hw.c                 | 2 +-
 drivers/net/wireless/zd1211rw/zd_chip.c      | 2 +-
 drivers/scsi/megaraid/megaraid_sas.c         | 2 +-
 drivers/scsi/qla4xxx/ql4_os.c                | 4 ++--
 drivers/staging/rt2860/rtmp.h                | 2 +-
 drivers/usb/serial/cypress_m8.h              | 2 +-
 drivers/usb/serial/io_edgeport.c             | 2 +-
 drivers/usb/serial/kl5kusb105.c              | 2 +-
 fs/ext4/inode.c                              | 2 +-
 fs/gfs2/rgrp.c                               | 2 +-
 fs/ntfs/layout.h                             | 2 +-
 include/acpi/actypes.h                       | 2 +-
 include/acpi/platform/acgcc.h                | 2 +-
 include/rdma/ib_cm.h                         | 2 +-
 kernel/tracepoint.c                          | 2 +-
 lib/zlib_deflate/deflate.c                   | 4 ++--
 net/rxrpc/ar-call.c                          | 2 +-
 net/sched/sch_hfsc.c                         | 2 +-
 32 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/Documentation/hwmon/pc87427 b/Documentation/hwmon/pc87427
index d1ebbe510f35..db5cc1227a83 100644
--- a/Documentation/hwmon/pc87427
+++ b/Documentation/hwmon/pc87427
@@ -34,5 +34,5 @@ Fan rotation speeds are reported as 14-bit values from a gated clock
 signal. Speeds down to 83 RPM can be measured.
 
 An alarm is triggered if the rotation speed drops below a programmable
-limit. Another alarm is triggered if the speed is too low to to be measured
+limit. Another alarm is triggered if the speed is too low to be measured
 (including stalled or missing fan).
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index eaa1a25946c1..ee31369e9e5b 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -96,7 +96,7 @@ Example code - drivers hinting an alpha2:
 
 This example comes from the zd1211rw device driver. You can start
 by having a mapping of your device's EEPROM country/regulatory
-domain value to to a specific alpha2 as follows:
+domain value to a specific alpha2 as follows:
 
 static struct zd_reg_alpha2_map reg_alpha2_map[] = {
 	{ ZD_REGDOMAIN_FCC, "US" },
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
index d7f181701dc2..aec6549ab097 100644
--- a/Documentation/scsi/scsi_fc_transport.txt
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -378,7 +378,7 @@ Vport Disable/Enable:
       int vport_disable(struct fc_vport *vport, bool disable)
 
     where:
-      vport:    Is vport to to be enabled or disabled
+      vport:    Is vport to be enabled or disabled
       disable:  If "true", the vport is to be disabled.
                 If "false", the vport is to be enabled.
 
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
index 16ef61a91d95..625ed8f76fce 100644
--- a/arch/ia64/ia32/sys_ia32.c
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -1270,7 +1270,7 @@ putreg (struct task_struct *child, int regno, unsigned int value)
 	      case PT_CS:
 		if (value != __USER_CS)
 			printk(KERN_ERR
-			       "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
+			       "ia32.putreg: attempt to set invalid segment register %d = %x\n",
 			       regno, value);
 		break;
 	      default:
diff --git a/arch/um/include/shared/ptrace_user.h b/arch/um/include/shared/ptrace_user.h
index 4bce6e012889..7fd8539bc19a 100644
--- a/arch/um/include/shared/ptrace_user.h
+++ b/arch/um/include/shared/ptrace_user.h
@@ -29,7 +29,7 @@ extern int ptrace_setregs(long pid, unsigned long *regs_in);
  * recompilation. So, we use PTRACE_OLDSETOPTIONS in UML.
  * We also want to be able to build the kernel on 2.4, which doesn't
  * have PTRACE_OLDSETOPTIONS. So, if it is missing, we declare
- * PTRACE_OLDSETOPTIONS to to be the same as PTRACE_SETOPTIONS.
+ * PTRACE_OLDSETOPTIONS to be the same as PTRACE_SETOPTIONS.
  *
  * On architectures, that start to support PTRACE_O_TRACESYSGOOD on
  * linux 2.6, PTRACE_OLDSETOPTIONS never is defined, and also isn't
diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
index 20ef1bf5e726..703959eba45a 100644
--- a/drivers/char/agp/uninorth-agp.c
+++ b/drivers/char/agp/uninorth-agp.c
@@ -270,7 +270,7 @@ static void uninorth_agp_enable(struct agp_bridge_data *bridge, u32 mode)
 
 	if ((uninorth_rev >= 0x30) && (uninorth_rev <= 0x33)) {
 		/*
-		 * We need to to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
+		 * We need to set REQ_DEPTH to 7 for U3 versions 1.0, 2.1,
 		 * 2.2 and 2.3, Darwin do so.
 		 */
 		if ((command >> AGPSTAT_RQ_DEPTH_SHIFT) > 7)
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index b710fab21cb3..a53b848e0f17 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -239,7 +239,7 @@ static __inline__ void mga_g200_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR34, 0x00000000,
 		  MGA_WR42, 0x0000ffff, MGA_WR60, 0x0000ffff);
 
-	/* Padding required to to hardware bug.
+	/* Padding required due to hardware bug.
 	 */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
@@ -317,7 +317,7 @@ static __inline__ void mga_g400_emit_pipe(drm_mga_private_t * dev_priv)
 		  MGA_WR52, MGA_G400_WR_MAGIC,	/* tex1 width        */
 		  MGA_WR60, MGA_G400_WR_MAGIC);	/* tex1 height       */
 
-	/* Padding required to to hardware bug */
+	/* Padding required due to hardware bug */
 	DMA_BLOCK(MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
 		  MGA_DMAPAD, 0xffffffff,
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index a8d0aee3bc0e..8aaad65c3bb5 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -894,7 +894,7 @@ void guest_set_pte(struct lg_cpu *cpu,
  * tells us they've changed.  When the Guest tries to use the new entry it will
  * fault and demand_page() will fix it up.
  *
- * So with that in mind here's our code to to update a (top-level) PGD entry:
+ * So with that in mind here's our code to update a (top-level) PGD entry:
  */
 void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
 {
diff --git a/drivers/media/video/gspca/m5602/m5602_core.c b/drivers/media/video/gspca/m5602/m5602_core.c
index 8a5bba16ff32..7f1e5415850b 100644
--- a/drivers/media/video/gspca/m5602/m5602_core.c
+++ b/drivers/media/video/gspca/m5602/m5602_core.c
@@ -56,7 +56,7 @@ int m5602_read_bridge(struct sd *sd, const u8 address, u8 *i2c_data)
 	return (err < 0) ? err : 0;
 }
 
-/* Writes a byte to to the m5602 */
+/* Writes a byte to the m5602 */
 int m5602_write_bridge(struct sd *sd, const u8 address, const u8 i2c_data)
 {
 	int err;
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index bc14bb1b0579..88671529c45d 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -512,7 +512,7 @@ static void mxcmci_cmd_done(struct mxcmci_host *host, unsigned int stat)
 	}
 
 	/* For the DMA case the DMA engine handles the data transfer
-	 * automatically. For non DMA we have to to it ourselves.
+	 * automatically. For non DMA we have to do it ourselves.
 	 * Don't do it in interrupt context though.
 	 */
 	if (!mxcmci_use_dma(host) && host->data)
diff --git a/drivers/mtd/maps/ixp2000.c b/drivers/mtd/maps/ixp2000.c
index d4fb9a3ab4df..1bdf0ee6d0b6 100644
--- a/drivers/mtd/maps/ixp2000.c
+++ b/drivers/mtd/maps/ixp2000.c
@@ -184,7 +184,7 @@ static int ixp2000_flash_probe(struct platform_device *dev)
 	info->map.bankwidth = 1;
 
 	/*
- 	 * map_priv_2 is used to store a ptr to to the bank_setup routine
+ 	 * map_priv_2 is used to store a ptr to the bank_setup routine
  	 */
 	info->map.map_priv_2 = (unsigned long) ixp_data->bank_setup;
 
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index e4d9ef0c965a..9f87c99189a9 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1065,7 +1065,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
 	}
 
 	/*
-	 * Now we have got to calculate how much data we have to to copy. In
+	 * Now we have got to calculate how much data we have to copy. In
 	 * case of a static volume it is fairly easy - the VID header contains
 	 * the data size. In case of a dynamic volume it is more difficult - we
 	 * have to read the contents, cut 0xFF bytes from the end and copy only
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 6a5fe9633783..47877942decc 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -570,7 +570,7 @@ void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
 
 /*
  * ubi_rb_for_each_entry - walk an RB-tree.
- * @rb: a pointer to type 'struct rb_node' to to use as a loop counter
+ * @rb: a pointer to type 'struct rb_node' to use as a loop counter
  * @pos: a pointer to RB-tree entry type to use as a loop counter
  * @root: RB-tree's root
  * @member: the name of the 'struct rb_node' within the RB-tree entry
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index cea5cfe23b71..c3fa31c9f2a7 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1987,7 +1987,7 @@ void bond_3ad_unbind_slave(struct slave *slave)
 			// find new aggregator for the related port(s)
 			new_aggregator = __get_first_agg(port);
 			for (; new_aggregator; new_aggregator = __get_next_agg(new_aggregator)) {
-				// if the new aggregator is empty, or it connected to to our port only
+				// if the new aggregator is empty, or it is connected to our port only
 				if (!new_aggregator->lag_ports || ((new_aggregator->lag_ports == port) && !new_aggregator->lag_ports->next_port_in_aggregator)) {
 					break;
 				}
diff --git a/drivers/net/e1000/e1000_hw.c b/drivers/net/e1000/e1000_hw.c
index cda6b397550d..45ac225a7aaa 100644
--- a/drivers/net/e1000/e1000_hw.c
+++ b/drivers/net/e1000/e1000_hw.c
@@ -3035,7 +3035,7 @@ s32 e1000_check_for_link(struct e1000_hw *hw)
                 /* If TBI compatibility is was previously off, turn it on. For
                  * compatibility with a TBI link partner, we will store bad
                  * packets. Some frames have an additional byte on the end and
-                 * will look like CRC errors to to the hardware.
+                 * will look like CRC errors to the hardware.
                  */
                 if (!hw->tbi_compatibility_on) {
                     hw->tbi_compatibility_on = true;
diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 5e110a2328ae..4e79a9800134 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c
@@ -368,7 +368,7 @@ error:
 	return r;
 }
 
-/* MAC address: if custom mac addresses are to to be used CR_MAC_ADDR_P1 and
+/* MAC address: if custom mac addresses are to be used CR_MAC_ADDR_P1 and
  *              CR_MAC_ADDR_P2 must be overwritten
  */
 int zd_write_mac_addr(struct zd_chip *chip, const u8 *mac_addr)
diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c
index 7dc3d1894b1a..a39addc3a596 100644
--- a/drivers/scsi/megaraid/megaraid_sas.c
+++ b/drivers/scsi/megaraid/megaraid_sas.c
@@ -718,7 +718,7 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
  * megasas_build_ldio -	Prepares IOs to logical devices
  * @instance:		Adapter soft state
  * @scp:		SCSI command
- * @cmd:		Command to to be prepared
+ * @cmd:		Command to be prepared
  *
  * Frames (and accompanying SGLs) for regular SCSI IOs use this function.
  */
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 40e3cafb3a9c..83c8b5e4fc8b 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -1422,7 +1422,7 @@ static void qla4xxx_slave_destroy(struct scsi_device *sdev)
 /**
  * qla4xxx_del_from_active_array - returns an active srb
  * @ha: Pointer to host adapter structure.
- * @index: index into to the active_array
+ * @index: index into the active_array
  *
  * This routine removes and returns the srb at the specified index
  **/
@@ -1500,7 +1500,7 @@ static int qla4xxx_wait_for_hba_online(struct scsi_qla_host *ha)
 
 /**
  * qla4xxx_eh_wait_for_commands - wait for active cmds to finish.
- * @ha: pointer to to HBA
+ * @ha: pointer to HBA
  * @t: target id
  * @l: lun id
  *
diff --git a/drivers/staging/rt2860/rtmp.h b/drivers/staging/rt2860/rtmp.h
index 3f498f6f3ff6..90fd40f24734 100644
--- a/drivers/staging/rt2860/rtmp.h
+++ b/drivers/staging/rt2860/rtmp.h
@@ -2060,7 +2060,7 @@ typedef struct _STA_ADMIN_CONFIG {
     BOOLEAN		AdhocBGJoined;		// Indicate Adhoc B/G Join.
     BOOLEAN		Adhoc20NJoined;		// Indicate Adhoc 20MHz N Join.
 #endif
-	// New for WPA, windows want us to to keep association information and
+	// New for WPA, windows want us to keep association information and
 	// Fixed IEs from last association response
 	NDIS_802_11_ASSOCIATION_INFORMATION     AssocInfo;
 	USHORT       ReqVarIELen;                // Length of next VIE include EID & Length
diff --git a/drivers/usb/serial/cypress_m8.h b/drivers/usb/serial/cypress_m8.h
index e772b01ac3ac..1fd360e04065 100644
--- a/drivers/usb/serial/cypress_m8.h
+++ b/drivers/usb/serial/cypress_m8.h
@@ -57,7 +57,7 @@
 #define	UART_RI		0x10	/* ring indicator - modem - device to host */
 #define UART_CD		0x40	/* carrier detect - modem - device to host */
 #define CYP_ERROR 	0x08	/* received from input report - device to host */
-/* Note - the below has nothing to to with the "feature report" reset */
+/* Note - the below has nothing to do with the "feature report" reset */
 #define CONTROL_RESET	0x08  	/* sent with output report - host to device */
 
 /* End of RS-232 protocol definitions */
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index dc0f832657e6..b97960ac92f2 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2540,7 +2540,7 @@ static int calc_baud_rate_divisor(int baudrate, int *divisor)
 
 /*****************************************************************************
  * send_cmd_write_uart_register
- *  this function builds up a uart register message and sends to to the device.
+ *  this function builds up a uart register message and sends to the device.
  *****************************************************************************/
 static int send_cmd_write_uart_register(struct edgeport_port *edge_port,
 						__u8 regNum, __u8 regValue)
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index a61673133d7d..f7373371b137 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -38,7 +38,7 @@
  *   0.3a - implemented pools of write URBs
  *   0.3  - alpha version for public testing
  *   0.2  - TIOCMGET works, so autopilot(1) can be used!
- *   0.1  - can be used to to pilot-xfer -p /dev/ttyUSB0 -l
+ *   0.1  - can be used to do pilot-xfer -p /dev/ttyUSB0 -l
  *
  *   The driver skeleton is mainly based on mct_u232.c and various other
  *   pieces of code shamelessly copied from the drivers/usb/serial/ directory.
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4abd683b963d..3a798737e305 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2337,7 +2337,7 @@ static int __mpage_da_writepage(struct page *page,
 		/*
 		 * Rest of the page in the page_vec
 		 * redirty then and skip then. We will
-		 * try to to write them again after
+		 * try to write them again after
 		 * starting a new transaction
 		 */
 		redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 28c590b7c9da..8f1cfb02a6cb 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -179,7 +179,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  * always aligned to a 64 bit boundary.
  *
  * The size of the buffer is in bytes, but is it assumed that it is
- * always ok to to read a complete multiple of 64 bits at the end
+ * always ok to read a complete multiple of 64 bits at the end
  * of the block in case the end is no aligned to a natural boundary.
  *
  * Return: the block number (bitmap buffer scope) that was found
diff --git a/fs/ntfs/layout.h b/fs/ntfs/layout.h
index 50931b1ce4b9..8b2549f672bf 100644
--- a/fs/ntfs/layout.h
+++ b/fs/ntfs/layout.h
@@ -829,7 +829,7 @@ enum {
 	/* Note, FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the
 	   F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT,
 	   F_A_COMPRESSED, and F_A_ENCRYPTED and preserves the rest.  This mask
-	   is used to to obtain all flags that are valid for setting. */
+	   is used to obtain all flags that are valid for setting. */
 	/*
 	 * The flag FILE_ATTR_DUP_FILENAME_INDEX_PRESENT is present in all
 	 * FILENAME_ATTR attributes but not in the STANDARD_INFORMATION
diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h
index 37ba576d06e8..8052236d1a3d 100644
--- a/include/acpi/actypes.h
+++ b/include/acpi/actypes.h
@@ -288,7 +288,7 @@ typedef u32 acpi_physical_address;
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler in a per-variable manner that a variable
+ * to tell the compiler in a per-variable manner that a variable
  * is unused
  */
 #ifndef ACPI_UNUSED_VAR
diff --git a/include/acpi/platform/acgcc.h b/include/acpi/platform/acgcc.h
index 935c5d7fc86e..6aadbf84ae71 100644
--- a/include/acpi/platform/acgcc.h
+++ b/include/acpi/platform/acgcc.h
@@ -57,7 +57,7 @@
 /*
  * Some compilers complain about unused variables. Sometimes we don't want to
  * use all the variables (for example, _acpi_module_name). This allows us
- * to to tell the compiler warning in a per-variable manner that a variable
+ * to tell the compiler warning in a per-variable manner that a variable
  * is unused.
  */
 #define ACPI_UNUSED_VAR __attribute__ ((unused))
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 938858304300..c8f94e8db69c 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -482,7 +482,7 @@ int ib_send_cm_rej(struct ib_cm_id *cm_id,
  *   message.
  * @cm_id: Connection identifier associated with the connection message.
  * @service_timeout: The lower 5-bits specify the maximum time required for
- *   the sender to reply to to the connection message.  The upper 3-bits
+ *   the sender to reply to the connection message.  The upper 3-bits
  *   specify additional control flags.
  * @private_data: Optional user-defined private data sent with the
  *   message receipt acknowledgement.
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index 9489a0a9b1be..cc89be5bc0f8 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -48,7 +48,7 @@ static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE];
 
 /*
  * Note about RCU :
- * It is used to to delay the free of multiple probes array until a quiescent
+ * It is used to delay the free of multiple probes array until a quiescent
  * state is reached.
  * Tracepoint entries modifications are protected by the tracepoints_mutex.
  */
diff --git a/lib/zlib_deflate/deflate.c b/lib/zlib_deflate/deflate.c
index c3e4a2baf835..46a31e5f49c3 100644
--- a/lib/zlib_deflate/deflate.c
+++ b/lib/zlib_deflate/deflate.c
@@ -135,7 +135,7 @@ static const config configuration_table[10] = {
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive
  *    input characters, so that a running hash key can be computed from the
  *    previous key instead of complete recalculation each time.
  */
@@ -146,7 +146,7 @@ static const config configuration_table[10] = {
  * Insert string str in the dictionary and set match_head to the previous head
  * of the hash chain (the most recent string with same hash key). Return
  * the previous length of the hash chain.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive
  *    input characters and the first MIN_MATCH bytes of str are valid
  *    (except for the last MIN_MATCH-1 bytes of the input file).
  */
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index d9231245a79a..bc0019f704fe 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -96,7 +96,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 }
 
 /*
- * allocate a new client call and attempt to to get a connection slot for it
+ * allocate a new client call and attempt to get a connection slot for it
  */
 static struct rxrpc_call *rxrpc_alloc_client_call(
 	struct rxrpc_sock *rx,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 375d64cb1a3d..2c5c76be18f8 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -77,7 +77,7 @@
  *   The service curve parameters are converted to the internal
  *   representation. The slope values are scaled to avoid overflow.
  *   the inverse slope values as well as the y-projection of the 1st
- *   segment are kept in order to to avoid 64-bit divide operations
+ *   segment are kept in order to avoid 64-bit divide operations
  *   that are expensive on 32-bit architectures.
  */
 
-- 
cgit v1.2.3


From 3dbda77e6f3375f87090cfce97b2551d3723521b Mon Sep 17 00:00:00 2001
From: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Date: Thu, 23 Jul 2009 08:31:31 +0200
Subject: trivial: fix typos "man[ae]g?ment" -> "management"

Signed-off-by: Uwe Kleine-Koenig <u.kleine-koenig@pengutronix.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 Documentation/DocBook/mtdnand.tmpl           | 2 +-
 Documentation/DocBook/scsi.tmpl              | 2 +-
 Documentation/scsi/ChangeLog.megaraid        | 2 +-
 Documentation/sound/alsa/HD-Audio-Models.txt | 2 +-
 Documentation/trace/ftrace.txt               | 2 +-
 arch/arm/Makefile                            | 2 +-
 arch/frv/lib/cache.S                         | 2 +-
 arch/mn10300/include/asm/cacheflush.h        | 4 ++--
 drivers/media/dvb/siano/smscoreapi.c         | 2 +-
 drivers/media/dvb/siano/smscoreapi.h         | 4 ++--
 drivers/media/radio/radio-mr800.c            | 2 +-
 drivers/message/fusion/mptbase.c             | 4 ++--
 drivers/net/macb.c                           | 2 +-
 drivers/net/wireless/ath/ath5k/reg.h         | 2 +-
 drivers/net/wireless/atmel.c                 | 2 +-
 drivers/usb/host/xhci.h                      | 2 +-
 drivers/usb/wusbcore/wa-hc.h                 | 2 +-
 include/linux/mISDNif.h                      | 2 +-
 18 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 8e145857fc9d..df0d089d0fb9 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -568,7 +568,7 @@ static void board_select_chip (struct mtd_info *mtd, int chip)
 			<para>
 				The blocks in which the tables are stored are procteted against
 				accidental access by marking them bad in the memory bad block
-				table. The bad block table managment functions are allowed
+				table. The bad block table management functions are allowed
 				to circumvernt this protection.
 			</para>
 			<para>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
index 10a150ae2a7e..d87f4569e768 100644
--- a/Documentation/DocBook/scsi.tmpl
+++ b/Documentation/DocBook/scsi.tmpl
@@ -317,7 +317,7 @@
         <para>
           The SAS transport class contains common code to deal with SAS HBAs,
           an aproximated representation of SAS topologies in the driver model,
-          and various sysfs attributes to expose these topologies and managment
+          and various sysfs attributes to expose these topologies and management
           interfaces to userspace.
         </para>
         <para>
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
index eaa4801f2ce6..38e9e7cadc90 100644
--- a/Documentation/scsi/ChangeLog.megaraid
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -514,7 +514,7 @@ iv.	Remove yield() while mailbox handshake in synchronous commands
 
 v.	Remove redundant __megaraid_busywait_mbox routine
 
-vi.	Fix bug in the managment module, which causes a system lockup when the
+vi.	Fix bug in the management module, which causes a system lockup when the
 	IO module is loaded and then unloaded, followed by executing any
 	management utility. The current version of management module does not
 	handle the adapter unregister properly.
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
index 97eebd63bedc..f1708b79f963 100644
--- a/Documentation/sound/alsa/HD-Audio-Models.txt
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -387,7 +387,7 @@ STAC92HD73*
 STAC92HD83*
 ===========
   ref		Reference board
-  mic-ref	Reference board with power managment for ports
+  mic-ref	Reference board with power management for ports
   dell-s14	Dell laptop
   auto		BIOS setup (default)
 
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 1b6292bbdd6d..957b22fde2df 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -133,7 +133,7 @@ of ftrace. Here is a list of some of the key files:
 	than requested, the rest of the page will be used,
 	making the actual allocation bigger than requested.
 	( Note, the size may not be a multiple of the page size
-	  due to buffer managment overhead. )
+	  due to buffer management overhead. )
 
 	This can only be updated when the current_tracer
 	is set to "nop".
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7350557a81e0..54661125a8bf 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -25,7 +25,7 @@ KBUILD_CFLAGS	+=$(call cc-option,-marm,)
 # Select a platform tht is kept up-to-date
 KBUILD_DEFCONFIG := versatile_defconfig
 
-# defines filename extension depending memory manement type.
+# defines filename extension depending memory management type.
 ifeq ($(CONFIG_MMU),)
 MMUEXT		:= -nommu
 endif
diff --git a/arch/frv/lib/cache.S b/arch/frv/lib/cache.S
index 0e10ad8dc462..0c4fb204911b 100644
--- a/arch/frv/lib/cache.S
+++ b/arch/frv/lib/cache.S
@@ -1,4 +1,4 @@
-/* cache.S: cache managment routines
+/* cache.S: cache management routines
  *
  * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
diff --git a/arch/mn10300/include/asm/cacheflush.h b/arch/mn10300/include/asm/cacheflush.h
index 2db746a251f8..1a55d61f0d06 100644
--- a/arch/mn10300/include/asm/cacheflush.h
+++ b/arch/mn10300/include/asm/cacheflush.h
@@ -17,7 +17,7 @@
 #include <linux/mm.h>
 
 /*
- * virtually-indexed cache managment (our cache is physically indexed)
+ * virtually-indexed cache management (our cache is physically indexed)
  */
 #define flush_cache_all()			do {} while (0)
 #define flush_cache_mm(mm)			do {} while (0)
@@ -31,7 +31,7 @@
 #define flush_dcache_mmap_unlock(mapping)	do {} while (0)
 
 /*
- * physically-indexed cache managment
+ * physically-indexed cache management
  */
 #ifndef CONFIG_MN10300_CACHE_DISABLED
 
diff --git a/drivers/media/dvb/siano/smscoreapi.c b/drivers/media/dvb/siano/smscoreapi.c
index bd9ab9d0d12a..fa6a62369a78 100644
--- a/drivers/media/dvb/siano/smscoreapi.c
+++ b/drivers/media/dvb/siano/smscoreapi.c
@@ -1367,7 +1367,7 @@ int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level)
 					    &msg, sizeof(msg));
 }
 
-/* new GPIO managment implementation */
+/* new GPIO management implementation */
 static int GetGpioPinParams(u32 PinNum, u32 *pTranslatedPinNum,
 		u32 *pGroupNum, u32 *pGroupCfg) {
 
diff --git a/drivers/media/dvb/siano/smscoreapi.h b/drivers/media/dvb/siano/smscoreapi.h
index f1108c64e895..eec18aaf5512 100644
--- a/drivers/media/dvb/siano/smscoreapi.h
+++ b/drivers/media/dvb/siano/smscoreapi.h
@@ -657,12 +657,12 @@ struct smscore_buffer_t *smscore_getbuffer(struct smscore_device_t *coredev);
 extern void smscore_putbuffer(struct smscore_device_t *coredev,
 			      struct smscore_buffer_t *cb);
 
-/* old GPIO managment */
+/* old GPIO management */
 int smscore_configure_gpio(struct smscore_device_t *coredev, u32 pin,
 			   struct smscore_config_gpio *pinconfig);
 int smscore_set_gpio(struct smscore_device_t *coredev, u32 pin, int level);
 
-/* new GPIO managment */
+/* new GPIO management */
 extern int smscore_gpio_configure(struct smscore_device_t *coredev, u8 PinNum,
 		struct smscore_gpio_config *pGpioConfig);
 extern int smscore_gpio_set_level(struct smscore_device_t *coredev, u8 PinNum,
diff --git a/drivers/media/radio/radio-mr800.c b/drivers/media/radio/radio-mr800.c
index 575bf9d89419..a1239083472d 100644
--- a/drivers/media/radio/radio-mr800.c
+++ b/drivers/media/radio/radio-mr800.c
@@ -46,7 +46,7 @@
  * Version 0.11:	Converted to v4l2_device.
  *
  * Many things to do:
- * 	- Correct power managment of device (suspend & resume)
+ * 	- Correct power management of device (suspend & resume)
  * 	- Add code for scanning and smooth tuning
  * 	- Add code for sensitivity value
  * 	- Correct mistakes
diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 76fa2ee0b574..610e914abe6c 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -6821,7 +6821,7 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh
 	*size = y;
 }
 /**
- *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task managment
+ *	mpt_set_taskmgmt_in_progress_flag - set flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  *	Returns 0 for SUCCESS or -1 if FAILED.
@@ -6854,7 +6854,7 @@ mpt_set_taskmgmt_in_progress_flag(MPT_ADAPTER *ioc)
 EXPORT_SYMBOL(mpt_set_taskmgmt_in_progress_flag);
 
 /**
- *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task managment
+ *	mpt_clear_taskmgmt_in_progress_flag - clear flags associated with task management
  *	@ioc: Pointer to MPT_ADAPTER structure
  *
  **/
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index fb65b427c692..1d0d4d9ab623 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -241,7 +241,7 @@ static int macb_mii_init(struct macb *bp)
 	struct eth_platform_data *pdata;
 	int err = -ENXIO, i;
 
-	/* Enable managment port */
+	/* Enable management port */
 	macb_writel(bp, NCR, MACB_BIT(MPE));
 
 	bp->mii_bus = mdiobus_alloc();
diff --git a/drivers/net/wireless/ath/ath5k/reg.h b/drivers/net/wireless/ath/ath5k/reg.h
index debad07d9900..c63ea6afd96f 100644
--- a/drivers/net/wireless/ath/ath5k/reg.h
+++ b/drivers/net/wireless/ath/ath5k/reg.h
@@ -982,7 +982,7 @@
 #define AR5K_5414_CBCFG_BUF_DIS	0x10	/* Disable buffer */
 
 /*
- * PCI-E Power managment configuration
+ * PCI-E Power management configuration
  * and status register [5424+]
  */
 #define	AR5K_PCIE_PM_CTL		0x4068			/* Register address */
diff --git a/drivers/net/wireless/atmel.c b/drivers/net/wireless/atmel.c
index a3b36b3a9d67..cce188837d10 100644
--- a/drivers/net/wireless/atmel.c
+++ b/drivers/net/wireless/atmel.c
@@ -3330,7 +3330,7 @@ static void atmel_smooth_qual(struct atmel_private *priv)
 	priv->wstats.qual.updated &= ~IW_QUAL_QUAL_INVALID;
 }
 
-/* deals with incoming managment frames. */
+/* deals with incoming management frames. */
 static void atmel_management_frame(struct atmel_private *priv,
 				   struct ieee80211_hdr *header,
 				   u16 frame_len, u8 rssi)
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index d31d32206ba3..ffe1625d4e1b 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1150,7 +1150,7 @@ void xhci_dbg_cmd_ptrs(struct xhci_hcd *xhci);
 void xhci_dbg_ring_ptrs(struct xhci_hcd *xhci, struct xhci_ring *ring);
 void xhci_dbg_ctx(struct xhci_hcd *xhci, struct xhci_container_ctx *ctx, unsigned int last_ep);
 
-/* xHCI memory managment */
+/* xHCI memory management */
 void xhci_mem_cleanup(struct xhci_hcd *xhci);
 int xhci_mem_init(struct xhci_hcd *xhci, gfp_t flags);
 void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id);
diff --git a/drivers/usb/wusbcore/wa-hc.h b/drivers/usb/wusbcore/wa-hc.h
index 586d350cdb4d..d6bea3e0b54a 100644
--- a/drivers/usb/wusbcore/wa-hc.h
+++ b/drivers/usb/wusbcore/wa-hc.h
@@ -47,7 +47,7 @@
  *             to an endpoint on a WUSB device that is connected to a
  *             HWA RC.
  *
- *  xfer       Transfer managment -- this is all the code that gets a
+ *  xfer       Transfer management -- this is all the code that gets a
  *             buffer and pushes it to a device (or viceversa). *
  *
  * Some day a lot of this code will be shared between this driver and
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 536ca12442ca..78c3bed1c3f5 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -104,7 +104,7 @@
 #define DL_UNITDATA_IND		0x3108
 #define DL_INFORMATION_IND	0x0008
 
-/* intern layer 2 managment */
+/* intern layer 2 management */
 #define MDL_ASSIGN_REQ		0x1804
 #define MDL_ASSIGN_IND		0x1904
 #define MDL_REMOVE_REQ		0x1A04
-- 
cgit v1.2.3


From a419aef8b858a2bdb98df60336063d28df4b272f Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 18 Aug 2009 11:18:35 -0700
Subject: trivial: remove unnecessary semicolons

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/s390/hypfs/inode.c               | 2 +-
 arch/s390/kvm/interrupt.c             | 2 +-
 arch/sparc/kernel/irq_64.c            | 2 +-
 arch/um/drivers/net_kern.c            | 2 +-
 drivers/block/DAC960.c                | 4 ++--
 drivers/block/swim3.c                 | 2 +-
 drivers/char/epca.c                   | 2 +-
 drivers/gpu/drm/i915/intel_dp.c       | 2 +-
 drivers/gpu/drm/radeon/r300.c         | 4 ++--
 drivers/ide/ide-probe.c               | 2 +-
 drivers/ide/umc8672.c                 | 4 ++--
 drivers/isdn/capi/capiutil.c          | 2 +-
 drivers/macintosh/rack-meter.c        | 2 +-
 drivers/net/arcnet/arc-rawmode.c      | 1 -
 drivers/net/arcnet/capmode.c          | 1 -
 drivers/net/gianfar_ethtool.c         | 2 +-
 drivers/net/ibm_newemac/core.c        | 8 ++++----
 drivers/net/igb/igb_main.c            | 2 +-
 drivers/net/ll_temac_main.c           | 2 +-
 drivers/net/ni52.c                    | 4 ++--
 drivers/net/qlge/qlge_main.c          | 4 ++--
 drivers/net/skfp/pcmplc.c             | 2 +-
 drivers/net/skfp/pmf.c                | 8 ++++----
 drivers/net/skge.c                    | 2 +-
 drivers/net/sky2.c                    | 2 +-
 drivers/net/vxge/vxge-config.h        | 2 +-
 drivers/net/vxge/vxge-main.c          | 2 +-
 drivers/rtc/rtc-omap.c                | 2 +-
 drivers/s390/block/dasd_eckd.c        | 2 +-
 drivers/s390/net/netiucv.c            | 2 +-
 drivers/s390/scsi/zfcp_scsi.c         | 2 +-
 drivers/scsi/bnx2i/bnx2i_hwi.c        | 2 +-
 drivers/scsi/lpfc/lpfc_ct.c           | 2 +-
 drivers/spi/omap_uwire.c              | 2 +-
 drivers/spi/spi_s3c24xx.c             | 2 +-
 drivers/usb/class/cdc-wdm.c           | 2 --
 drivers/usb/serial/spcp8x5.c          | 2 +-
 drivers/uwb/i1480/i1480u-wlp/netdev.c | 2 +-
 drivers/video/cfbcopyarea.c           | 2 +-
 drivers/video/imxfb.c                 | 2 +-
 drivers/video/s3c2410fb.c             | 2 +-
 drivers/xen/balloon.c                 | 2 +-
 fs/autofs/dirhash.c                   | 2 +-
 fs/btrfs/tree-log.c                   | 2 +-
 fs/cifs/cifs_dfs_ref.c                | 2 +-
 fs/nfs/callback_xdr.c                 | 2 +-
 fs/ocfs2/quota_global.c               | 2 +-
 include/scsi/fc/fc_fc2.h              | 3 +--
 kernel/trace/trace_hw_branches.c      | 2 +-
 net/wireless/wext-compat.c            | 2 +-
 sound/oss/sys_timer.c                 | 3 ---
 sound/soc/codecs/wm9081.c             | 2 +-
 sound/soc/pxa/pxa-ssp.c               | 2 +-
 sound/soc/s3c24xx/s3c24xx_uda134x.c   | 2 +-
 54 files changed, 61 insertions(+), 69 deletions(-)

(limited to 'include')

diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index bd9914b89488..1baa635717b0 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -496,7 +496,7 @@ static int __init hypfs_init(void)
 	}
 	s390_kobj = kobject_create_and_add("s390", hypervisor_kobj);
 	if (!s390_kobj) {
-		rc = -ENOMEM;;
+		rc = -ENOMEM;
 		goto fail_sysfs;
 	}
 	rc = register_filesystem(&hypfs_type);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2c2f98353415..43486c2408e1 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -478,7 +478,7 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 	if (!inti)
 		return -ENOMEM;
 
-	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->type = KVM_S390_PROGRAM_INT;
 	inti->pgm.code = code;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c
index 8daab33fc17d..8ab1d4728a4b 100644
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -229,7 +229,7 @@ static unsigned int sun4u_compute_tid(unsigned long imap, unsigned long cpuid)
 				tid = ((a << IMAP_AID_SHIFT) |
 				       (n << IMAP_NID_SHIFT));
 				tid &= (IMAP_AID_SAFARI |
-					IMAP_NID_SAFARI);;
+					IMAP_NID_SAFARI);
 			}
 		} else {
 			tid = cpuid << IMAP_TID_SHIFT;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index f114813ae258..a74245ae3a84 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -533,7 +533,7 @@ static int eth_parse(char *str, int *index_out, char **str_out,
 		     char **error_out)
 {
 	char *end;
-	int n, err = -EINVAL;;
+	int n, err = -EINVAL;
 
 	n = simple_strtoul(str, &end, 0);
 	if (end == str) {
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index b839af1702e2..26caa3ffaff7 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6653,7 +6653,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	else ErrorCode = get_user(ControllerNumber,
 			     &UserSpaceControllerInfo->ControllerNumber);
 	if (ErrorCode != 0)
-		break;;
+		break;
 	ErrorCode = -ENXIO;
 	if (ControllerNumber < 0 ||
 	    ControllerNumber > DAC960_ControllerCount - 1) {
@@ -6661,7 +6661,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
 	}
 	Controller = DAC960_Controllers[ControllerNumber];
 	if (Controller == NULL)
-		break;;
+		break;
 	memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
 	ControllerInfo.ControllerNumber = ControllerNumber;
 	ControllerInfo.FirmwareType = Controller->FirmwareType;
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e3cdd0..572ec6164f2d 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -1062,7 +1062,7 @@ static int swim3_add_device(struct macio_dev *mdev, int index)
 		goto out_release;
 	}
 	fs->swim3_intr = macio_irq(mdev, 0);
-	fs->dma_intr = macio_irq(mdev, 1);;
+	fs->dma_intr = macio_irq(mdev, 1);
 	fs->cur_cyl = -1;
 	fs->cur_sector = -1;
 	fs->secpercyl = 36;
diff --git a/drivers/char/epca.c b/drivers/char/epca.c
index ff647ca1c489..9d589e3144de 100644
--- a/drivers/char/epca.c
+++ b/drivers/char/epca.c
@@ -2239,7 +2239,7 @@ static void do_softint(struct work_struct *work)
 	struct channel *ch = container_of(work, struct channel, tqueue);
 	/* Called in response to a modem change event */
 	if (ch && ch->magic == EPCA_MAGIC) {
-		struct tty_struct *tty = tty_port_tty_get(&ch->port);;
+		struct tty_struct *tty = tty_port_tty_get(&ch->port);
 
 		if (tty && tty->driver_data) {
 			if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 2b914d732076..f4856a510476 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -232,7 +232,7 @@ intel_dp_aux_ch(struct intel_output *intel_output,
 	for (try = 0; try < 5; try++) {
 		/* Load the send data into the aux channel data registers */
 		for (i = 0; i < send_bytes; i += 4) {
-			uint32_t    d = pack_aux(send + i, send_bytes - i);;
+			uint32_t    d = pack_aux(send + i, send_bytes - i);
 	
 			I915_WRITE(ch_data + i, d);
 		}
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 051bca6e3a4f..b2f5d32efb0c 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1319,11 +1319,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 	case 0x443C:
 		/* TX_FILTER0_[0-15] */
 		i = (reg - 0x4400) >> 2;
-		tmp = ib_chunk->kdata[idx] & 0x7;;
+		tmp = ib_chunk->kdata[idx] & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_w = false;
 		}
-		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;;
+		tmp = (ib_chunk->kdata[idx] >> 3) & 0x7;
 		if (tmp == 2 || tmp == 4 || tmp == 6) {
 			track->textures[i].roundup_h = false;
 		}
diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
index 8de442cbee94..63c53d65e875 100644
--- a/drivers/ide/ide-probe.c
+++ b/drivers/ide/ide-probe.c
@@ -1212,7 +1212,7 @@ static int ide_find_port_slot(const struct ide_port_info *d)
 {
 	int idx = -ENOENT;
 	u8 bootable = (d && (d->host_flags & IDE_HFLAG_NON_BOOTABLE)) ? 0 : 1;
-	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;;
+	u8 i = (d && (d->host_flags & IDE_HFLAG_QD_2ND_PORT)) ? 1 : 0;
 
 	/*
 	 * Claim an unassigned slot.
diff --git a/drivers/ide/umc8672.c b/drivers/ide/umc8672.c
index 0608d41fb6d0..60f936e2319c 100644
--- a/drivers/ide/umc8672.c
+++ b/drivers/ide/umc8672.c
@@ -170,9 +170,9 @@ static int __init umc8672_init(void)
 		goto out;
 
 	if (umc8672_probe() == 0)
-		return 0;;
+		return 0;
 out:
-	return -ENODEV;;
+	return -ENODEV;
 }
 
 module_init(umc8672_init);
diff --git a/drivers/isdn/capi/capiutil.c b/drivers/isdn/capi/capiutil.c
index 16f2e465e5f9..26626eead828 100644
--- a/drivers/isdn/capi/capiutil.c
+++ b/drivers/isdn/capi/capiutil.c
@@ -1019,7 +1019,7 @@ int __init cdebug_init(void)
 	if (!g_debbuf->buf) {
 		kfree(g_cmsg);
 		kfree(g_debbuf);
-		return -ENOMEM;;
+		return -ENOMEM;
 	}
 	g_debbuf->size = CDEBUG_GSIZE;
 	g_debbuf->buf[0] = 0;
diff --git a/drivers/macintosh/rack-meter.c b/drivers/macintosh/rack-meter.c
index a98ab72adf95..93fb32038b14 100644
--- a/drivers/macintosh/rack-meter.c
+++ b/drivers/macintosh/rack-meter.c
@@ -274,7 +274,7 @@ static void __devinit rackmeter_init_cpu_sniffer(struct rackmeter *rm)
 
 		if (cpu > 1)
 			continue;
-		rcpu = &rm->cpu[cpu];;
+		rcpu = &rm->cpu[cpu];
 		rcpu->prev_idle = get_cpu_idle_time(cpu);
 		rcpu->prev_wall = jiffies64_to_cputime64(get_jiffies_64());
 		schedule_delayed_work_on(cpu, &rm->cpu[cpu].sniffer,
diff --git a/drivers/net/arcnet/arc-rawmode.c b/drivers/net/arcnet/arc-rawmode.c
index 646dfc5f50c9..8ea9c7545c12 100644
--- a/drivers/net/arcnet/arc-rawmode.c
+++ b/drivers/net/arcnet/arc-rawmode.c
@@ -123,7 +123,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/arcnet/capmode.c b/drivers/net/arcnet/capmode.c
index 083e21094b20..66bcbbb6babc 100644
--- a/drivers/net/arcnet/capmode.c
+++ b/drivers/net/arcnet/capmode.c
@@ -149,7 +149,6 @@ static void rx(struct net_device *dev, int bufnum,
 	BUGLVL(D_SKB) arcnet_dump_skb(dev, skb, "rx");
 
 	skb->protocol = cpu_to_be16(ETH_P_ARCNET);
-;
 	netif_rx(skb);
 }
 
diff --git a/drivers/net/gianfar_ethtool.c b/drivers/net/gianfar_ethtool.c
index 2234118eedbb..6c144b525b47 100644
--- a/drivers/net/gianfar_ethtool.c
+++ b/drivers/net/gianfar_ethtool.c
@@ -293,7 +293,7 @@ static int gfar_gcoalesce(struct net_device *dev, struct ethtool_coalesce *cvals
 	rxtime  = get_ictt_value(priv->rxic);
 	rxcount = get_icft_value(priv->rxic);
 	txtime  = get_ictt_value(priv->txic);
-	txcount = get_icft_value(priv->txic);;
+	txcount = get_icft_value(priv->txic);
 	cvals->rx_coalesce_usecs = gfar_ticks2usecs(priv, rxtime);
 	cvals->rx_max_coalesced_frames = rxcount;
 
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index 1d7d7fef414f..89c82c5e63e4 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -2556,13 +2556,13 @@ static int __devinit emac_init_config(struct emac_instance *dev)
 	if (emac_read_uint_prop(np, "mdio-device", &dev->mdio_ph, 0))
 		dev->mdio_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-device", &dev->zmii_ph, 0))
-		dev->zmii_ph = 0;;
+		dev->zmii_ph = 0;
 	if (emac_read_uint_prop(np, "zmii-channel", &dev->zmii_port, 0))
-		dev->zmii_port = 0xffffffff;;
+		dev->zmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "rgmii-device", &dev->rgmii_ph, 0))
-		dev->rgmii_ph = 0;;
+		dev->rgmii_ph = 0;
 	if (emac_read_uint_prop(np, "rgmii-channel", &dev->rgmii_port, 0))
-		dev->rgmii_port = 0xffffffff;;
+		dev->rgmii_port = 0xffffffff;
 	if (emac_read_uint_prop(np, "fifo-entry-size", &dev->fifo_entry_size, 0))
 		dev->fifo_entry_size = 16;
 	if (emac_read_uint_prop(np, "mal-burst-size", &dev->mal_burst_size, 0))
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d2639c4a086d..5d6c1530a8c0 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -3966,7 +3966,7 @@ static int igb_set_vf_multicasts(struct igb_adapter *adapter,
 	/* VFs are limited to using the MTA hash table for their multicast
 	 * addresses */
 	for (i = 0; i < n; i++)
-		vf_data->vf_mc_hashes[i] = hash_list[i];;
+		vf_data->vf_mc_hashes[i] = hash_list[i];
 
 	/* Flush and reset the mta with the new values */
 	igb_set_rx_mode(adapter->netdev);
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index da8d0a0ca94f..f2a197fd47a5 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -865,7 +865,7 @@ temac_of_probe(struct of_device *op, const struct of_device_id *match)
 	dcrs = dcr_resource_start(np, 0);
 	if (dcrs == 0) {
 		dev_err(&op->dev, "could not get DMA register address\n");
-		goto nodev;;
+		goto nodev;
 	}
 	lp->sdma_dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
 	dev_dbg(&op->dev, "DCR base: %x\n", dcrs);
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index bd0ac690d12c..aad3b370c562 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -615,10 +615,10 @@ static int init586(struct net_device *dev)
 	/* addr_len |!src_insert |pre-len |loopback */
 	writeb(0x2e, &cfg_cmd->adr_len);
 	writeb(0x00, &cfg_cmd->priority);
-	writeb(0x60, &cfg_cmd->ifs);;
+	writeb(0x60, &cfg_cmd->ifs);
 	writeb(0x00, &cfg_cmd->time_low);
 	writeb(0xf2, &cfg_cmd->time_high);
-	writeb(0x00, &cfg_cmd->promisc);;
+	writeb(0x00, &cfg_cmd->promisc);
 	if (dev->flags & IFF_ALLMULTI) {
 		int len = ((char __iomem *)p->iscp - (char __iomem *)ptr - 8) / 6;
 		if (num_addrs > len) {
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 220529257828..7783c5db81dc 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -2630,7 +2630,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	    FLAGS_LI;		/* Load irq delay values */
 	if (rx_ring->lbq_len) {
 		cqicb->flags |= FLAGS_LL;	/* Load lbq values */
-		tmp = (u64)rx_ring->lbq_base_dma;;
+		tmp = (u64)rx_ring->lbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->lbq_base_indirect;
 		page_entries = 0;
 		do {
@@ -2654,7 +2654,7 @@ static int ql_start_rx_ring(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 	}
 	if (rx_ring->sbq_len) {
 		cqicb->flags |= FLAGS_LS;	/* Load sbq values */
-		tmp = (u64)rx_ring->sbq_base_dma;;
+		tmp = (u64)rx_ring->sbq_base_dma;
 		base_indirect_ptr = (__le64 *) rx_ring->sbq_base_indirect;
 		page_entries = 0;
 		do {
diff --git a/drivers/net/skfp/pcmplc.c b/drivers/net/skfp/pcmplc.c
index f1df2ec8ad41..e6b33ee05ede 100644
--- a/drivers/net/skfp/pcmplc.c
+++ b/drivers/net/skfp/pcmplc.c
@@ -960,7 +960,7 @@ static void pcm_fsm(struct s_smc *smc, struct s_phy *phy, int cmd)
 			/*PC88b*/
 			if (!phy->cf_join) {
 				phy->cf_join = TRUE ;
-				queue_event(smc,EVENT_CFM,CF_JOIN+np) ; ;
+				queue_event(smc,EVENT_CFM,CF_JOIN+np) ;
 			}
 			if (cmd == PC_JOIN)
 				GO_STATE(PC8_ACTIVE) ;
diff --git a/drivers/net/skfp/pmf.c b/drivers/net/skfp/pmf.c
index 79e665e0853d..a320fdb3727d 100644
--- a/drivers/net/skfp/pmf.c
+++ b/drivers/net/skfp/pmf.c
@@ -807,9 +807,9 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 				mib_p->fddiPORTLerFlag ;
 			sp->p4050_pad = 0 ;
 			sp->p4050_cutoff =
-				mib_p->fddiPORTLer_Cutoff ; ;
+				mib_p->fddiPORTLer_Cutoff ;
 			sp->p4050_alarm =
-				mib_p->fddiPORTLer_Alarm ; ;
+				mib_p->fddiPORTLer_Alarm ;
 			sp->p4050_estimate =
 				mib_p->fddiPORTLer_Estimate ;
 			sp->p4050_reject_ct =
@@ -829,7 +829,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			sp->p4051_porttype =
 				mib_p->fddiPORTMy_Type ;
 			sp->p4051_connectstate =
-				mib_p->fddiPORTConnectState ; ;
+				mib_p->fddiPORTConnectState ;
 			sp->p4051_pc_neighbor =
 				mib_p->fddiPORTNeighborType ;
 			sp->p4051_pc_withhold =
@@ -853,7 +853,7 @@ void smt_add_para(struct s_smc *smc, struct s_pcon *pcon, u_short para,
 			struct smt_p_4053	*sp ;
 			sp = (struct smt_p_4053 *) to ;
 			sp->p4053_multiple =
-				mib_p->fddiPORTMultiple_P ; ;
+				mib_p->fddiPORTMultiple_P ;
 			sp->p4053_availablepaths =
 				mib_p->fddiPORTAvailablePaths ;
 			sp->p4053_currentpath =
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 62e852e21ab2..55bad4081966 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -215,7 +215,7 @@ static void skge_wol_init(struct skge_port *skge)
 	if (skge->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	skge_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 4bb52e9cd371..15140f9f2e92 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -765,7 +765,7 @@ static void sky2_wol_init(struct sky2_port *sky2)
 	if (sky2->wol & WAKE_MAGIC)
 		ctrl |= WOL_CTL_ENA_PME_ON_MAGIC_PKT|WOL_CTL_ENA_MAGIC_PKT_UNIT;
 	else
-		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;;
+		ctrl |= WOL_CTL_DIS_PME_ON_MAGIC_PKT|WOL_CTL_DIS_MAGIC_PKT_UNIT;
 
 	ctrl |= WOL_CTL_DIS_PME_ON_PATTERN|WOL_CTL_DIS_PATTERN_UNIT;
 	sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl);
diff --git a/drivers/net/vxge/vxge-config.h b/drivers/net/vxge/vxge-config.h
index 62779a520ca1..3e94f0ce0900 100644
--- a/drivers/net/vxge/vxge-config.h
+++ b/drivers/net/vxge/vxge-config.h
@@ -1541,7 +1541,7 @@ void vxge_hw_ring_rxd_1b_info_get(
 	rxd_info->l4_cksum_valid =
 		(u32)VXGE_HW_RING_RXD_L4_CKSUM_CORRECT_GET(rxdp->control_0);
 	rxd_info->l4_cksum =
-		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);;
+		(u32)VXGE_HW_RING_RXD_L4_CKSUM_GET(rxdp->control_0);
 	rxd_info->frame =
 		(u32)VXGE_HW_RING_RXD_ETHER_ENCAP_GET(rxdp->control_0);
 	rxd_info->proto =
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index b378037a29b5..068d7a9d3e36 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -2350,7 +2350,7 @@ static int vxge_enable_msix(struct vxgedev *vdev)
 	enum vxge_hw_status status;
 	/* 0 - Tx, 1 - Rx  */
 	int tim_msix_id[4];
-	int alarm_msix_id = 0, msix_intr_vect = 0;;
+	int alarm_msix_id = 0, msix_intr_vect = 0;
 	vdev->intr_cnt = 0;
 
 	/* allocate msix vectors */
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index bd1ce8e2bc18..0587d53987fe 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -430,7 +430,7 @@ fail:
 
 static int __exit omap_rtc_remove(struct platform_device *pdev)
 {
-	struct rtc_device	*rtc = platform_get_drvdata(pdev);;
+	struct rtc_device	*rtc = platform_get_drvdata(pdev);
 
 	device_init_wakeup(&pdev->dev, 0);
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index a1ce573648a2..bd9fe2e36dce 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -706,7 +706,7 @@ static int dasd_eckd_generate_uid(struct dasd_device *device,
 	       sizeof(uid->serial) - 1);
 	EBCASC(uid->serial, sizeof(uid->serial) - 1);
 	uid->ssid = private->gneq->subsystemID;
-	uid->real_unit_addr = private->ned->unit_addr;;
+	uid->real_unit_addr = private->ned->unit_addr;
 	if (private->sneq) {
 		uid->type = private->sneq->sua_flags;
 		if (uid->type == UA_BASE_PAV_ALIAS)
diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index a4b2c576144b..c84eadd3602a 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -2113,7 +2113,7 @@ static ssize_t remove_write (struct device_driver *drv,
 	IUCV_DBF_TEXT(trace, 3, __func__);
 
         if (count >= IFNAMSIZ)
-                count = IFNAMSIZ - 1;;
+                count = IFNAMSIZ - 1;
 
 	for (i = 0, p = buf; i < count && *p; i++, p++) {
 		if (*p == '\n' || *p == ' ')
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 3ff726afafc6..0e1a34627a2e 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -102,7 +102,7 @@ static int zfcp_scsi_queuecommand(struct scsi_cmnd *scpnt,
 	if (unlikely((status & ZFCP_STATUS_COMMON_ERP_FAILED) ||
 		     !(status & ZFCP_STATUS_COMMON_RUNNING))) {
 		zfcp_scsi_command_fail(scpnt, DID_ERROR);
-		return 0;;
+		return 0;
 	}
 
 	ret = zfcp_fsf_send_fcp_command_task(unit, scpnt);
diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c
index 906cef5cda86..41e1b0e7e2ef 100644
--- a/drivers/scsi/bnx2i/bnx2i_hwi.c
+++ b/drivers/scsi/bnx2i/bnx2i_hwi.c
@@ -1340,7 +1340,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
 	resp_hdr->opcode = login->op_code;
 	resp_hdr->flags = login->response_flags;
 	resp_hdr->max_version = login->version_max;
-	resp_hdr->active_version = login->version_active;;
+	resp_hdr->active_version = login->version_active;
 	resp_hdr->hlength = 0;
 
 	hton24(resp_hdr->dlength, login->data_length);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 9df7ed38e1be..9a1bd9534d74 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -1207,7 +1207,7 @@ lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
 		vport->ct_flags &= ~FC_CT_RFF_ID;
 		CtReq->CommandResponse.bits.CmdRsp =
 		    be16_to_cpu(SLI_CTNS_RFF_ID);
-		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);;
+		CtReq->un.rff.PortId = cpu_to_be32(vport->fc_myDID);
 		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
 		CtReq->un.rff.type_code = FC_FCP_DATA;
 		cmpl = lpfc_cmpl_ct_cmd_rff_id;
diff --git a/drivers/spi/omap_uwire.c b/drivers/spi/omap_uwire.c
index 8980a5640bd9..e75ba9b28898 100644
--- a/drivers/spi/omap_uwire.c
+++ b/drivers/spi/omap_uwire.c
@@ -213,7 +213,7 @@ static int uwire_txrx(struct spi_device *spi, struct spi_transfer *t)
 	unsigned	bits = ust->bits_per_word;
 	unsigned	bytes;
 	u16		val, w;
-	int		status = 0;;
+	int		status = 0;
 
 	if (!t->tx_buf && !t->rx_buf)
 		return 0;
diff --git a/drivers/spi/spi_s3c24xx.c b/drivers/spi/spi_s3c24xx.c
index 3f3119d760db..6ba8aece90b5 100644
--- a/drivers/spi/spi_s3c24xx.c
+++ b/drivers/spi/spi_s3c24xx.c
@@ -388,7 +388,7 @@ static int __init s3c24xx_spi_probe(struct platform_device *pdev)
 
  err_no_iores:
  err_no_pdata:
-	spi_master_put(hw->master);;
+	spi_master_put(hw->master);
 
  err_nomem:
 	return err;
diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
index ba589d4ca8bc..8c64c018b676 100644
--- a/drivers/usb/class/cdc-wdm.c
+++ b/drivers/usb/class/cdc-wdm.c
@@ -506,8 +506,6 @@ static int wdm_open(struct inode *inode, struct file *file)
 	desc = usb_get_intfdata(intf);
 	if (test_bit(WDM_DISCONNECTING, &desc->flags))
 		goto out;
-
-	;
 	file->private_data = desc;
 
 	rv = usb_autopm_get_interface(desc->intf);
diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c
index 61e7c40b94fb..1e58220403d1 100644
--- a/drivers/usb/serial/spcp8x5.c
+++ b/drivers/usb/serial/spcp8x5.c
@@ -544,7 +544,7 @@ static void spcp8x5_set_termios(struct tty_struct *tty,
 	}
 
 	/* Set Baud Rate */
-	baud = tty_get_baud_rate(tty);;
+	baud = tty_get_baud_rate(tty);
 	switch (baud) {
 	case 300:	buf[0] = 0x00;	break;
 	case 600:	buf[0] = 0x01;	break;
diff --git a/drivers/uwb/i1480/i1480u-wlp/netdev.c b/drivers/uwb/i1480/i1480u-wlp/netdev.c
index 73055530e60f..b236e6969942 100644
--- a/drivers/uwb/i1480/i1480u-wlp/netdev.c
+++ b/drivers/uwb/i1480/i1480u-wlp/netdev.c
@@ -214,7 +214,7 @@ int i1480u_open(struct net_device *net_dev)
 
 	netif_wake_queue(net_dev);
 #ifdef i1480u_FLOW_CONTROL
-	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);;
+	result = usb_submit_urb(i1480u->notif_urb, GFP_KERNEL);
 	if (result < 0) {
 		dev_err(dev, "Can't submit notification URB: %d\n", result);
 		goto error_notif_urb_submit;
diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index df03f3776dcc..79e5f40e6486 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -114,7 +114,7 @@ bitcpy(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
 				d0 >>= right;
 			} else if (src_idx+n <= bits) {
 				// Single source word
-				d0 <<= left;;
+				d0 <<= left;
 			} else {
 				// 2 source words
 				d1 = FB_READL(src + 1);
diff --git a/drivers/video/imxfb.c b/drivers/video/imxfb.c
index 30ae3022f633..66358fa825f3 100644
--- a/drivers/video/imxfb.c
+++ b/drivers/video/imxfb.c
@@ -710,7 +710,7 @@ static int __init imxfb_probe(struct platform_device *pdev)
 
 	fbi->clk = clk_get(&pdev->dev, NULL);
 	if (IS_ERR(fbi->clk)) {
-		ret = PTR_ERR(fbi->clk);;
+		ret = PTR_ERR(fbi->clk);
 		dev_err(&pdev->dev, "unable to get clock: %d\n", ret);
 		goto failed_getclock;
 	}
diff --git a/drivers/video/s3c2410fb.c b/drivers/video/s3c2410fb.c
index 7da0027e2409..5ffca2adc6a8 100644
--- a/drivers/video/s3c2410fb.c
+++ b/drivers/video/s3c2410fb.c
@@ -1119,7 +1119,7 @@ int __init s3c2410fb_init(void)
 	int ret = platform_driver_register(&s3c2410fb_driver);
 
 	if (ret == 0)
-		ret = platform_driver_register(&s3c2412fb_driver);;
+		ret = platform_driver_register(&s3c2412fb_driver);
 
 	return ret;
 }
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e83416..4d1b322d2b45 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -214,7 +214,7 @@ static int increase_reservation(unsigned long nr_pages)
 	page = balloon_first_page();
 	for (i = 0; i < nr_pages; i++) {
 		BUG_ON(page == NULL);
-		frame_list[i] = page_to_pfn(page);;
+		frame_list[i] = page_to_pfn(page);
 		page = balloon_next_page(page);
 	}
 
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 2316e944a109..e947915109e5 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -90,7 +90,7 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
 			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
 			continue;
 		}
-		while (d_mountpoint(path.dentry) && follow_down(&path));
+		while (d_mountpoint(path.dentry) && follow_down(&path))
 			;
 		umount_ok = may_umount(path.mnt);
 		path_put(&path);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d91b0de7c502..30c0d45c1b5e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2605,7 +2605,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
 								extent);
 				cs = btrfs_file_extent_offset(src, extent);
 				cl = btrfs_file_extent_num_bytes(src,
-								extent);;
+								extent);
 				if (btrfs_file_extent_compression(src,
 								  extent)) {
 					cs = 0;
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 606912d8f2a8..5e8bc99221cb 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -142,7 +142,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 	rc = dns_resolve_server_name_to_ip(*devname, &srvIP);
 	if (rc != 0) {
 		cERROR(1, ("%s: Failed to resolve server part of %s to IP: %d",
-			  __func__, *devname, rc));;
+			  __func__, *devname, rc));
 		goto compose_mount_options_err;
 	}
 	/* md_len = strlen(...) + 12 for 'sep+prefixpath='
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e5a2dac5f715..76b0aa0f73bf 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -222,7 +222,7 @@ static unsigned decode_sessionid(struct xdr_stream *xdr,
 
 	p = read_buf(xdr, len);
 	if (unlikely(p == NULL))
-		return htonl(NFS4ERR_RESOURCE);;
+		return htonl(NFS4ERR_RESOURCE);
 
 	memcpy(sid->data, p, len);
 	return 0;
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..0578cc14b7a3 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -154,7 +154,7 @@ static int ocfs2_get_quota_block(struct inode *inode, int block,
 		err = -EIO;
 		mlog_errno(err);
 	}
-	return err;;
+	return err;
 }
 
 /* Read data from global quotafile - avoid pagecache and such because we cannot
diff --git a/include/scsi/fc/fc_fc2.h b/include/scsi/fc/fc_fc2.h
index cff8a8c22f50..f87777d0d5bd 100644
--- a/include/scsi/fc/fc_fc2.h
+++ b/include/scsi/fc/fc_fc2.h
@@ -92,8 +92,7 @@ struct fc_esb {
 	__u8	_esb_resvd[4];
 	__u8	esb_service_params[112]; /* TBD */
 	__u8	esb_seq_status[8];	/* sequence statuses, 8 bytes each */
-} __attribute__((packed));;
-
+} __attribute__((packed));
 
 /*
  * Define expected size for ASSERTs.
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
index ca7d7c4d0c2a..23b63859130e 100644
--- a/kernel/trace/trace_hw_branches.c
+++ b/kernel/trace/trace_hw_branches.c
@@ -155,7 +155,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 		    seq_print_ip_sym(seq, it->from, symflags) &&
 		    trace_seq_printf(seq, "\n"))
 			return TRACE_TYPE_HANDLED;
-		return TRACE_TYPE_PARTIAL_LINE;;
+		return TRACE_TYPE_PARTIAL_LINE;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 429dd06a4ecc..561a45cf2a6a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -834,7 +834,7 @@ int cfg80211_wext_siwtxpower(struct net_device *dev,
 		return 0;
 	}
 
-	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);;
+	return rdev->ops->set_tx_power(wdev->wiphy, type, dbm);
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwtxpower);
 
diff --git a/sound/oss/sys_timer.c b/sound/oss/sys_timer.c
index 107534477a2f..8db6aefe15e4 100644
--- a/sound/oss/sys_timer.c
+++ b/sound/oss/sys_timer.c
@@ -100,9 +100,6 @@ def_tmr_open(int dev, int mode)
 	curr_tempo = 60;
 	curr_timebase = 100;
 	opened = 1;
-
-	;
-
 	{
 		def_tmr.expires = (1) + jiffies;
 		add_timer(&def_tmr);
diff --git a/sound/soc/codecs/wm9081.c b/sound/soc/codecs/wm9081.c
index c64e55aa63b6..686e5aa97206 100644
--- a/sound/soc/codecs/wm9081.c
+++ b/sound/soc/codecs/wm9081.c
@@ -1027,7 +1027,7 @@ static int wm9081_hw_params(struct snd_pcm_substream *substream,
 		       - wm9081->fs);
 	for (i = 1; i < ARRAY_SIZE(clk_sys_rates); i++) {
 		cur_val = abs((wm9081->sysclk_rate /
-			       clk_sys_rates[i].ratio) - wm9081->fs);;
+			       clk_sys_rates[i].ratio) - wm9081->fs);
 		if (cur_val < best_val) {
 			best = i;
 			best_val = cur_val;
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 5b9ed6464789..d11a6d7e384a 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -351,7 +351,7 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai,
 			do_div(tmp, freq_out);
 			val = tmp;
 
-			val = (val << 16) | 64;;
+			val = (val << 16) | 64;
 			ssp_write_reg(ssp, SSACDD, val);
 
 			ssacd |= (0x6 << 4);
diff --git a/sound/soc/s3c24xx/s3c24xx_uda134x.c b/sound/soc/s3c24xx/s3c24xx_uda134x.c
index 8e79a416db57..c215d32d6322 100644
--- a/sound/soc/s3c24xx/s3c24xx_uda134x.c
+++ b/sound/soc/s3c24xx/s3c24xx_uda134x.c
@@ -67,7 +67,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream)
 {
 	int ret = 0;
 #ifdef ENFORCE_RATES
-	struct snd_pcm_runtime *runtime = substream->runtime;;
+	struct snd_pcm_runtime *runtime = substream->runtime;
 #endif
 
 	mutex_lock(&clk_lock);
-- 
cgit v1.2.3


From 325253a6b2de4bdfa9ef0e28b5df8a4a4fe2b677 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 14 Jul 2009 17:06:02 +0100
Subject: backlight: Allow drivers to update the core, and generate events on
 changes

Certain hardware will send us events when the backlight brightness
changes. Add a function to update the value in the core, and
additionally send a uevent so that userspace can pop up appropriate
UI. The uevents are flagged depending on whether the update originated
in the kernel or from userspace, making it easier to only display UI
at the appropriate time.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
---
 drivers/video/backlight/backlight.c | 41 +++++++++++++++++++++++++++++++++++++
 include/linux/backlight.h           |  7 +++++++
 2 files changed, 48 insertions(+)

(limited to 'include')

diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index 157057c79ca3..6e1446ae7f52 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -73,6 +73,26 @@ static inline void backlight_unregister_fb(struct backlight_device *bd)
 }
 #endif /* CONFIG_FB */
 
+static void backlight_generate_event(struct backlight_device *bd,
+				     enum backlight_update_reason reason)
+{
+	char *envp[2];
+
+	switch (reason) {
+	case BACKLIGHT_UPDATE_SYSFS:
+		envp[0] = "SOURCE=sysfs";
+		break;
+	case BACKLIGHT_UPDATE_HOTKEY:
+		envp[0] = "SOURCE=hotkey";
+		break;
+	default:
+		envp[0] = "SOURCE=unknown";
+		break;
+	}
+	envp[1] = NULL;
+	kobject_uevent_env(&bd->dev.kobj, KOBJ_CHANGE, envp);
+}
+
 static ssize_t backlight_show_power(struct device *dev,
 		struct device_attribute *attr,char *buf)
 {
@@ -142,6 +162,8 @@ static ssize_t backlight_store_brightness(struct device *dev,
 	}
 	mutex_unlock(&bd->ops_lock);
 
+	backlight_generate_event(bd, BACKLIGHT_UPDATE_SYSFS);
+
 	return rc;
 }
 
@@ -213,6 +235,25 @@ static struct device_attribute bl_device_attributes[] = {
 	__ATTR_NULL,
 };
 
+/**
+ * backlight_force_update - tell the backlight subsystem that hardware state
+ *   has changed
+ * @bd: the backlight device to update
+ *
+ * Updates the internal state of the backlight in response to a hardware event,
+ * and generate a uevent to notify userspace
+ */
+void backlight_force_update(struct backlight_device *bd,
+			    enum backlight_update_reason reason)
+{
+	mutex_lock(&bd->ops_lock);
+	if (bd->ops && bd->ops->get_brightness)
+		bd->props.brightness = bd->ops->get_brightness(bd);
+	mutex_unlock(&bd->ops_lock);
+	backlight_generate_event(bd, reason);
+}
+EXPORT_SYMBOL(backlight_force_update);
+
 /**
  * backlight_device_register - create and register a new object of
  *   backlight_device class.
diff --git a/include/linux/backlight.h b/include/linux/backlight.h
index 79ca2da81c87..0f5f57858a23 100644
--- a/include/linux/backlight.h
+++ b/include/linux/backlight.h
@@ -27,6 +27,11 @@
  * Any other use of the locks below is probably wrong.
  */
 
+enum backlight_update_reason {
+	BACKLIGHT_UPDATE_HOTKEY,
+	BACKLIGHT_UPDATE_SYSFS,
+};
+
 struct backlight_device;
 struct fb_info;
 
@@ -100,6 +105,8 @@ static inline void backlight_update_status(struct backlight_device *bd)
 extern struct backlight_device *backlight_device_register(const char *name,
 	struct device *dev, void *devdata, struct backlight_ops *ops);
 extern void backlight_device_unregister(struct backlight_device *bd);
+extern void backlight_force_update(struct backlight_device *bd,
+				   enum backlight_update_reason reason);
 
 #define to_backlight_device(obj) container_of(obj, struct backlight_device, dev)
 
-- 
cgit v1.2.3


From 181f7c5dd3832763bdf2756b6d2d8a49bdf12791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 6 Jul 2009 11:53:03 +0200
Subject: kmemcheck: add missing braces to do-while in
 kmemcheck_annotate_bitfield

Whether or not the sparse warning

	warning: do-while statement is not a compound statement

is justified or not in this case, it is annoying and trivial to fix.

[vegard.nossum@gmail.com: title and cleanup]
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
---
 include/linux/kmemcheck.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 47b39b7c7e84..06c6c5501f13 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -137,7 +137,10 @@ static inline void kmemcheck_mark_initialized_pages(struct page *p,
 	int name##_end[0];
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
-	do if (ptr) {							\
+	do {								\
+		if (!ptr)						\
+			break;						\
+									\
 		int _n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
-- 
cgit v1.2.3


From 40f9244f4da8976eeb6d5ed6313c635ba238a9d3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 17 Jun 2009 17:56:39 +0100
Subject: regulator: Allow consumer supplies to be set up with dev_name()

Follow the approach suggested by Russell King and implemented by him in
the clkdev API and allow consumer device supply mapings to be set up
using the dev_name() for the consumer instead of the struct device.
In order to avoid making existing machines instabuggy and creating merge
issues the use of struct device is still supported for the time being.

This resolves problems working with buses such as I2C which make the
struct device available late providing that the final device name is
known, which is the case for most embedded systems with fixed setups.

Consumers must still use the struct device when calling regulator_get().

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c          | 62 ++++++++++++++++++++++++++++++---------
 include/linux/regulator/machine.h |  7 ++++-
 2 files changed, 54 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 91ba9bfaa706..24e05b7607b4 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -37,7 +37,7 @@ static int has_full_constraints;
  */
 struct regulator_map {
 	struct list_head list;
-	struct device *dev;
+	const char *dev_name;   /* The dev_name() for the consumer */
 	const char *supply;
 	struct regulator_dev *regulator;
 };
@@ -857,23 +857,33 @@ out:
  * set_consumer_device_supply: Bind a regulator to a symbolic supply
  * @rdev:         regulator source
  * @consumer_dev: device the supply applies to
+ * @consumer_dev_name: dev_name() string for device supply applies to
  * @supply:       symbolic name for supply
  *
  * Allows platform initialisation code to map physical regulator
  * sources to symbolic names for supplies for use by devices.  Devices
  * should use these symbolic names to request regulators, avoiding the
  * need to provide board-specific regulator names as platform data.
+ *
+ * Only one of consumer_dev and consumer_dev_name may be specified.
  */
 static int set_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev, const char *supply)
+	struct device *consumer_dev, const char *consumer_dev_name,
+	const char *supply)
 {
 	struct regulator_map *node;
 
+	if (consumer_dev && consumer_dev_name)
+		return -EINVAL;
+
+	if (!consumer_dev_name && consumer_dev)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	if (supply == NULL)
 		return -EINVAL;
 
 	list_for_each_entry(node, &regulator_map_list, list) {
-		if (consumer_dev != node->dev)
+		if (consumer_dev_name != node->dev_name)
 			continue;
 		if (strcmp(node->supply, supply) != 0)
 			continue;
@@ -891,25 +901,38 @@ static int set_consumer_device_supply(struct regulator_dev *rdev,
 		return -ENOMEM;
 
 	node->regulator = rdev;
-	node->dev = consumer_dev;
+	node->dev_name = kstrdup(consumer_dev_name, GFP_KERNEL);
 	node->supply = supply;
 
+	if (node->dev_name == NULL) {
+		kfree(node);
+		return -ENOMEM;
+	}
+
 	list_add(&node->list, &regulator_map_list);
 	return 0;
 }
 
 static void unset_consumer_device_supply(struct regulator_dev *rdev,
-	struct device *consumer_dev)
+	const char *consumer_dev_name, struct device *consumer_dev)
 {
 	struct regulator_map *node, *n;
 
+	if (consumer_dev && !consumer_dev_name)
+		consumer_dev_name = dev_name(consumer_dev);
+
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
-		if (rdev == node->regulator &&
-			consumer_dev == node->dev) {
-			list_del(&node->list);
-			kfree(node);
-			return;
-		}
+		if (rdev != node->regulator)
+			continue;
+
+		if (consumer_dev_name && node->dev_name &&
+		    strcmp(consumer_dev_name, node->dev_name))
+			continue;
+
+		list_del(&node->list);
+		kfree(node->dev_name);
+		kfree(node);
+		return;
 	}
 }
 
@@ -920,6 +943,7 @@ static void unset_regulator_supplies(struct regulator_dev *rdev)
 	list_for_each_entry_safe(node, n, &regulator_map_list, list) {
 		if (rdev == node->regulator) {
 			list_del(&node->list);
+			kfree(node->dev_name);
 			kfree(node);
 			return;
 		}
@@ -1019,17 +1043,25 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
+	const char *devname = NULL;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
 		return regulator;
 	}
 
+	if (dev)
+		devname = dev_name(dev);
+
 	mutex_lock(&regulator_list_mutex);
 
 	list_for_each_entry(map, &regulator_map_list, list) {
-		if (dev == map->dev &&
-		    strcmp(map->supply, id) == 0) {
+		/* If the mapping has a device set up it must match */
+		if (map->dev_name &&
+		    (!devname || strcmp(map->dev_name, devname)))
+			continue;
+
+		if (strcmp(map->supply, id) == 0) {
 			rdev = map->regulator;
 			goto found;
 		}
@@ -2091,11 +2123,13 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
 	for (i = 0; i < init_data->num_consumer_supplies; i++) {
 		ret = set_consumer_device_supply(rdev,
 			init_data->consumer_supplies[i].dev,
+			init_data->consumer_supplies[i].dev_name,
 			init_data->consumer_supplies[i].supply);
 		if (ret < 0) {
 			for (--i; i >= 0; i--)
 				unset_consumer_device_supply(rdev,
-					init_data->consumer_supplies[i].dev);
+				    init_data->consumer_supplies[i].dev_name,
+				    init_data->consumer_supplies[i].dev);
 			goto scrub;
 		}
 	}
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index bac64fa390f2..9328090eca20 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -126,13 +126,18 @@ struct regulation_constraints {
 /**
  * struct regulator_consumer_supply - supply -> device mapping
  *
- * This maps a supply name to a device.
+ * This maps a supply name to a device.  Only one of dev or dev_name
+ * can be specified.  Use of dev_name allows support for buses which
+ * make struct device available late such as I2C and is the preferred
+ * form.
  *
  * @dev: Device structure for the consumer.
+ * @dev_name: Result of dev_name() for the consumer.
  * @supply: Name for the supply.
  */
 struct regulator_consumer_supply {
 	struct device *dev;	/* consumer */
+	const char *dev_name;   /* dev_name() for consumer */
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
-- 
cgit v1.2.3


From 9c19bc0444490e76197f47316c649590dc6f10a4 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Thu, 9 Jul 2009 15:44:31 +0100
Subject: regulator: Define full constraints function with REGULATOR disabled

This allows machine drivers to build without ifdefs if they have
full constraints. Suggested by machine drivers contributed by
Haojian Zhuang <haojian.zhuang@gmail.com>.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/machine.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 9328090eca20..73a88f6cbb1c 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -171,6 +171,12 @@ struct regulator_init_data {
 
 int regulator_suspend_prepare(suspend_state_t state);
 
+#ifdef CONFIG_REGULATOR
 void regulator_has_full_constraints(void);
+#else
+static inline void regulator_has_full_constraints(void)
+{
+}
+#endif
 
 #endif
-- 
cgit v1.2.3


From 0198d1163b3e0313b3f073b62384abfab1a17cff Mon Sep 17 00:00:00 2001
From: Haojian Zhuang <haojian.zhuang@marvell.com>
Date: Fri, 26 Jun 2009 19:20:59 +0800
Subject: regulator: add buck3 in da903x driver

BUCK3 is the new component in DA9035. So there're three BUCKs in DA9035.
And there're two BUCKs in DA9034.

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Eric Miao <eric.y.miao@gmail.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/da903x.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/mfd/da903x.h |  4 +++-
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/regulator/da903x.c b/drivers/regulator/da903x.c
index b8b89ef10a84..33dfeeb9407c 100644
--- a/drivers/regulator/da903x.c
+++ b/drivers/regulator/da903x.c
@@ -64,6 +64,14 @@
 #define DA9034_MDTV2		(0x33)
 #define DA9034_MVRC		(0x34)
 
+/* DA9035 Registers. DA9034 Registers are comptabile to DA9035. */
+#define DA9035_OVER3		(0x12)
+#define DA9035_VCC2		(0x1f)
+#define DA9035_3DTV1		(0x2c)
+#define DA9035_3DTV2		(0x2d)
+#define DA9035_3VRC		(0x2e)
+#define DA9035_AUTOSKIP		(0x2f)
+
 struct da903x_regulator_info {
 	struct regulator_desc desc;
 
@@ -388,6 +396,27 @@ static struct regulator_ops da9034_regulator_ldo12_ops = {
 	.enable_bit	= (ebit),					\
 }
 
+#define DA9035_DVC(_id, min, max, step, vreg, nbits, ureg, ubit, ereg, ebit) \
+{									\
+	.desc	= {							\
+		.name	= #_id,						\
+		.ops	= &da9034_regulator_dvc_ops,			\
+		.type	= REGULATOR_VOLTAGE,				\
+		.id	= DA9035_ID_##_id,				\
+		.owner	= THIS_MODULE,					\
+	},								\
+	.min_uV		= (min) * 1000,					\
+	.max_uV		= (max) * 1000,					\
+	.step_uV	= (step) * 1000,				\
+	.vol_reg	= DA9035_##vreg,				\
+	.vol_shift	= (0),						\
+	.vol_nbits	= (nbits),					\
+	.update_reg	= DA9035_##ureg,				\
+	.update_bit	= (ubit),					\
+	.enable_reg	= DA9035_##ereg,				\
+	.enable_bit	= (ebit),					\
+}
+
 #define DA9034_LDO(_id, min, max, step, vreg, shift, nbits, ereg, ebit)	\
 	DA903x_LDO(DA9034, _id, min, max, step, vreg, shift, nbits, ereg, ebit)
 
@@ -435,6 +464,9 @@ static struct da903x_regulator_info da903x_regulator_info[] = {
 	DA9034_LDO(14, 1800, 3300, 100, LDO1514, 0, 4, OVER3, 0),
 	DA9034_LDO(15, 1800, 3300, 100, LDO1514, 4, 4, OVER3, 1),
 	DA9034_LDO(5, 3100, 3100, 0, INVAL, 0, 0, OVER3, 7), /* fixed @3.1V */
+
+	/* DA9035 */
+	DA9035_DVC(BUCK3, 1800, 2200, 100, 3DTV1, 3, VCC2, 0, OVER3, 3),
 };
 
 static inline struct da903x_regulator_info *find_regulator_info(int id)
diff --git a/include/linux/mfd/da903x.h b/include/linux/mfd/da903x.h
index 115dbe965082..c63b65c94429 100644
--- a/include/linux/mfd/da903x.h
+++ b/include/linux/mfd/da903x.h
@@ -1,7 +1,7 @@
 #ifndef __LINUX_PMIC_DA903X_H
 #define __LINUX_PMIC_DA903X_H
 
-/* Unified sub device IDs for DA9030/DA9034 */
+/* Unified sub device IDs for DA9030/DA9034/DA9035 */
 enum {
 	DA9030_ID_LED_1,
 	DA9030_ID_LED_2,
@@ -57,6 +57,8 @@ enum {
 	DA9034_ID_LDO13,
 	DA9034_ID_LDO14,
 	DA9034_ID_LDO15,
+
+	DA9035_ID_BUCK3,
 };
 
 /*
-- 
cgit v1.2.3


From ed6543243a1c557dbe2005a86f6d8e851c1ebb79 Mon Sep 17 00:00:00 2001
From: roald <roald@sh-dt-4505.(none)>
Date: Mon, 13 Jul 2009 17:25:21 +0800
Subject: regulator: add initialization macro of regulator supply

Signed-off-by: Haojian Zhuang <haojian.zhuang@marvell.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/machine.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 73a88f6cbb1c..99a4e2eb36aa 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -141,6 +141,13 @@ struct regulator_consumer_supply {
 	const char *supply;	/* consumer supply - e.g. "vcc" */
 };
 
+/* Initialize struct regulator_consumer_supply */
+#define REGULATOR_SUPPLY(_name, _dev_name)			\
+{								\
+	.supply		= _name,				\
+	.dev_name	= _dev_name,				\
+}
+
 /**
  * struct regulator_init_data - regulator platform initialisation data.
  *
-- 
cgit v1.2.3


From 5ffbd136e6c51c8d1eec7a4a0c5d2180c81aea30 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:23 +0100
Subject: regulator: Add regulator_get_exclusive() API

Some consumers require complete control of the regulator and can't
tolerate sharing it with other consumers, most commonly because they need
to have the regulator actually disabled so can't have other consumers
forcing it on. This new regulator_get_exclusive() API call allows these
consumers to explicitly request this, documenting the assumptions that
they are making.

In order to simplify coding of such consumers the use count for regulators
they request is forced to match the enabled state of the regulator when
it is requested. This is not possible for consumers which can share
regulators due to the need to keep track of the ownership of use counts.

A new API call is used rather than an additional argument to the existing
regulator_get() in order to avoid merge headaches with driver code in
other trees.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c           | 88 ++++++++++++++++++++++++++++++++------
 include/linux/regulator/consumer.h |  2 +
 include/linux/regulator/driver.h   |  2 +
 3 files changed, 78 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 24e05b7607b4..68549008582c 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1025,25 +1025,15 @@ overflow_err:
 	return NULL;
 }
 
-/**
- * regulator_get - lookup and obtain a reference to a regulator.
- * @dev: device for regulator "consumer"
- * @id: Supply name or regulator ID.
- *
- * Returns a struct regulator corresponding to the regulator producer,
- * or IS_ERR() condition containing errno.
- *
- * Use of supply names configured via regulator_set_device_supply() is
- * strongly encouraged.  It is recommended that the supply name used
- * should match the name used for the supply and/or the relevant
- * device pins in the datasheet.
- */
-struct regulator *regulator_get(struct device *dev, const char *id)
+/* Internal regulator request function */
+static struct regulator *_regulator_get(struct device *dev, const char *id,
+					int exclusive)
 {
 	struct regulator_dev *rdev;
 	struct regulator_map *map;
 	struct regulator *regulator = ERR_PTR(-ENODEV);
 	const char *devname = NULL;
+	int ret;
 
 	if (id == NULL) {
 		printk(KERN_ERR "regulator: get() with no identifier\n");
@@ -1070,6 +1060,16 @@ struct regulator *regulator_get(struct device *dev, const char *id)
 	return regulator;
 
 found:
+	if (rdev->exclusive) {
+		regulator = ERR_PTR(-EPERM);
+		goto out;
+	}
+
+	if (exclusive && rdev->open_count) {
+		regulator = ERR_PTR(-EBUSY);
+		goto out;
+	}
+
 	if (!try_module_get(rdev->owner))
 		goto out;
 
@@ -1079,12 +1079,69 @@ found:
 		module_put(rdev->owner);
 	}
 
+	rdev->open_count++;
+	if (exclusive) {
+		rdev->exclusive = 1;
+
+		ret = _regulator_is_enabled(rdev);
+		if (ret > 0)
+			rdev->use_count = 1;
+		else
+			rdev->use_count = 0;
+	}
+
 out:
 	mutex_unlock(&regulator_list_mutex);
+
 	return regulator;
 }
+
+/**
+ * regulator_get - lookup and obtain a reference to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 0);
+}
 EXPORT_SYMBOL_GPL(regulator_get);
 
+/**
+ * regulator_get_exclusive - obtain exclusive access to a regulator.
+ * @dev: device for regulator "consumer"
+ * @id: Supply name or regulator ID.
+ *
+ * Returns a struct regulator corresponding to the regulator producer,
+ * or IS_ERR() condition containing errno.  Other consumers will be
+ * unable to obtain this reference is held and the use count for the
+ * regulator will be initialised to reflect the current state of the
+ * regulator.
+ *
+ * This is intended for use by consumers which cannot tolerate shared
+ * use of the regulator such as those which need to force the
+ * regulator off for correct operation of the hardware they are
+ * controlling.
+ *
+ * Use of supply names configured via regulator_set_device_supply() is
+ * strongly encouraged.  It is recommended that the supply name used
+ * should match the name used for the supply and/or the relevant
+ * device pins in the datasheet.
+ */
+struct regulator *regulator_get_exclusive(struct device *dev, const char *id)
+{
+	return _regulator_get(dev, id, 1);
+}
+EXPORT_SYMBOL_GPL(regulator_get_exclusive);
+
 /**
  * regulator_put - "free" the regulator source
  * @regulator: regulator source
@@ -1113,6 +1170,9 @@ void regulator_put(struct regulator *regulator)
 	list_del(&regulator->list);
 	kfree(regulator);
 
+	rdev->open_count--;
+	rdev->exclusive = 0;
+
 	module_put(rdev->owner);
 	mutex_unlock(&regulator_list_mutex);
 }
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 277f4b964df5..976b57b6912c 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -125,6 +125,8 @@ struct regulator_bulk_data {
 /* regulator get and put */
 struct regulator *__must_check regulator_get(struct device *dev,
 					     const char *id);
+struct regulator *__must_check regulator_get_exclusive(struct device *dev,
+						       const char *id);
 void regulator_put(struct regulator *regulator);
 
 /* regulator output control and status */
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index ce1be708ca16..73c9cd6cda7d 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -162,6 +162,8 @@ struct regulator_desc {
 struct regulator_dev {
 	struct regulator_desc *desc;
 	int use_count;
+	int open_count;
+	int exclusive;
 
 	/* lists we belong to */
 	struct list_head list; /* list of all regulators */
-- 
cgit v1.2.3


From a7a1ad9066e0266c8a4357ba3dbaeebfb80f531d Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 21 Jul 2009 16:00:24 +0100
Subject: regulator: Add regulator voltage range check API

Simplify checking of support for voltage ranges by providing an API which
wraps the existing count and list operations.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/core.c           | 29 +++++++++++++++++++++++++++++
 include/linux/regulator/consumer.h |  2 ++
 2 files changed, 31 insertions(+)

(limited to 'include')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 68549008582c..e11c2222d9af 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1441,6 +1441,35 @@ int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 }
 EXPORT_SYMBOL_GPL(regulator_list_voltage);
 
+/**
+ * regulator_is_supported_voltage - check if a voltage range can be supported
+ *
+ * @regulator: Regulator to check.
+ * @min_uV: Minimum required voltage in uV.
+ * @max_uV: Maximum required voltage in uV.
+ *
+ * Returns a boolean or a negative error code.
+ */
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV)
+{
+	int i, voltages, ret;
+
+	ret = regulator_count_voltages(regulator);
+	if (ret < 0)
+		return ret;
+	voltages = ret;
+
+	for (i = 0; i < voltages; i++) {
+		ret = regulator_list_voltage(regulator, i);
+
+		if (ret >= min_uV && ret <= max_uV)
+			return 1;
+	}
+
+	return 0;
+}
+
 /**
  * regulator_set_voltage - set regulator output voltage
  * @regulator: regulator source
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index 976b57b6912c..490c5b37b6d7 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -146,6 +146,8 @@ void regulator_bulk_free(int num_consumers,
 
 int regulator_count_voltages(struct regulator *regulator);
 int regulator_list_voltage(struct regulator *regulator, unsigned selector);
+int regulator_is_supported_voltage(struct regulator *regulator,
+				   int min_uV, int max_uV);
 int regulator_set_voltage(struct regulator *regulator, int min_uV, int max_uV);
 int regulator_get_voltage(struct regulator *regulator);
 int regulator_set_current_limit(struct regulator *regulator,
-- 
cgit v1.2.3


From 86d9884b6a3646bc24e57430f1f694c5171c1bf6 Mon Sep 17 00:00:00 2001
From: Roger Quadros <ext-roger.quadros@nokia.com>
Date: Thu, 6 Aug 2009 19:37:29 +0300
Subject: regulator: Add GPIO enable control to fixed voltage regulator driver

Now fixed regulators that have their enable pin connected to a GPIO line
can use the fixed regulator driver for regulator enable/disable control.
The GPIO number and polarity information is passed through platform data.
GPIO enable control is achieved using gpiolib.

Signed-off-by: Roger Quadros <ext-roger.quadros@nokia.com>
Reviewed-by: Philipp Zabel <philipp.zabel@gmail.com>
Reviewed-by: Felipe Balbi <felipe.balbi@nokia.com>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 drivers/regulator/fixed.c       | 88 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/regulator/fixed.h | 24 +++++++++++
 2 files changed, 110 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 9c7f956d57c4..f8b295700d7d 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -20,20 +23,45 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
+#include <linux/gpio.h>
 
 struct fixed_voltage_data {
 	struct regulator_desc desc;
 	struct regulator_dev *dev;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned is_enabled:1;
 };
 
 static int fixed_voltage_is_enabled(struct regulator_dev *dev)
 {
-	return 1;
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	return data->is_enabled;
 }
 
 static int fixed_voltage_enable(struct regulator_dev *dev)
 {
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, data->enable_high);
+		data->is_enabled = 1;
+	}
+
+	return 0;
+}
+
+static int fixed_voltage_disable(struct regulator_dev *dev)
+{
+	struct fixed_voltage_data *data = rdev_get_drvdata(dev);
+
+	if (gpio_is_valid(data->gpio)) {
+		gpio_set_value_cansleep(data->gpio, !data->enable_high);
+		data->is_enabled = 0;
+	}
+
 	return 0;
 }
 
@@ -58,6 +86,7 @@ static int fixed_voltage_list_voltage(struct regulator_dev *dev,
 static struct regulator_ops fixed_voltage_ops = {
 	.is_enabled = fixed_voltage_is_enabled,
 	.enable = fixed_voltage_enable,
+	.disable = fixed_voltage_disable,
 	.get_voltage = fixed_voltage_get_voltage,
 	.list_voltage = fixed_voltage_list_voltage,
 };
@@ -87,13 +116,62 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 	drvdata->desc.n_voltages = 1;
 
 	drvdata->microvolts = config->microvolts;
+	drvdata->gpio = config->gpio;
+
+	if (gpio_is_valid(config->gpio)) {
+		drvdata->enable_high = config->enable_high;
+
+		/* FIXME: Remove below print warning
+		 *
+		 * config->gpio must be set to -EINVAL by platform code if
+		 * GPIO control is not required. However, early adopters
+		 * not requiring GPIO control may forget to initialize
+		 * config->gpio to -EINVAL. This will cause GPIO 0 to be used
+		 * for GPIO control.
+		 *
+		 * This warning will be removed once there are a couple of users
+		 * for this driver.
+		 */
+		if (!config->gpio)
+			dev_warn(&pdev->dev,
+				"using GPIO 0 for regulator enable control\n");
+
+		ret = gpio_request(config->gpio, config->supply_name);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not obtain regulator enable GPIO %d: %d\n",
+							config->gpio, ret);
+			goto err_name;
+		}
+
+		/* set output direction without changing state
+		 * to prevent glitch
+		 */
+		drvdata->is_enabled = config->enabled_at_boot;
+		ret = drvdata->is_enabled ?
+				config->enable_high : !config->enable_high;
+
+		ret = gpio_direction_output(config->gpio, ret);
+		if (ret) {
+			dev_err(&pdev->dev,
+			   "Could not configure regulator enable GPIO %d direction: %d\n",
+							config->gpio, ret);
+			goto err_gpio;
+		}
+
+	} else {
+		/* Regulator without GPIO control is considered
+		 * always enabled
+		 */
+		drvdata->is_enabled = 1;
+	}
 
 	drvdata->dev = regulator_register(&drvdata->desc, &pdev->dev,
 					  config->init_data, drvdata);
 	if (IS_ERR(drvdata->dev)) {
 		ret = PTR_ERR(drvdata->dev);
 		dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret);
-		goto err_name;
+		goto err_gpio;
 	}
 
 	platform_set_drvdata(pdev, drvdata);
@@ -103,6 +181,9 @@ static int regulator_fixed_voltage_probe(struct platform_device *pdev)
 
 	return 0;
 
+err_gpio:
+	if (gpio_is_valid(config->gpio))
+		gpio_free(config->gpio);
 err_name:
 	kfree(drvdata->desc.name);
 err:
@@ -118,6 +199,9 @@ static int regulator_fixed_voltage_remove(struct platform_device *pdev)
 	kfree(drvdata->desc.name);
 	kfree(drvdata);
 
+	if (gpio_is_valid(drvdata->gpio))
+		gpio_free(drvdata->gpio);
+
 	return 0;
 }
 
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index 91b4da31f1b5..e94a4a1c7c8a 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -5,6 +5,9 @@
  *
  * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
  *
+ * Copyright (c) 2009 Nokia Corporation
+ * Roger Quadros <ext-roger.quadros@nokia.com>
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -16,9 +19,30 @@
 
 struct regulator_init_data;
 
+/**
+ * struct fixed_voltage_config - fixed_voltage_config structure
+ * @supply_name:	Name of the regulator supply
+ * @microvolts:		Output voltage of regulator
+ * @gpio:		GPIO to use for enable control
+ * 			set to -EINVAL if not used
+ * @enable_high:	Polarity of enable GPIO
+ *			1 = Active high, 0 = Active low
+ * @enabled_at_boot:	Whether regulator has been enabled at
+ * 			boot or not. 1 = Yes, 0 = No
+ * 			This is used to keep the regulator at
+ * 			the default state
+ * @init_data:		regulator_init_data
+ *
+ * This structure contains fixed voltage regulator configuration
+ * information that must be passed by platform code to the fixed
+ * voltage regulator driver.
+ */
 struct fixed_voltage_config {
 	const char *supply_name;
 	int microvolts;
+	int gpio;
+	unsigned enable_high:1;
+	unsigned enabled_at_boot:1;
 	struct regulator_init_data *init_data;
 };
 
-- 
cgit v1.2.3


From 2e7e65ce55566fc81036960b00e5e15f5d9578ea Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:43 +0200
Subject: regulator: fix typos

Fix a couple of typos I found while working with this subsystem.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 Documentation/power/regulator/machine.txt | 4 ++--
 include/linux/regulator/machine.h         | 6 +++---
 include/linux/regulator/max1586.h         | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
index ce3487d99abe..63728fed620b 100644
--- a/Documentation/power/regulator/machine.txt
+++ b/Documentation/power/regulator/machine.txt
@@ -87,7 +87,7 @@ static struct platform_device regulator_devices[] = {
 },
 };
 /* register regulator 1 device */
-platform_device_register(&wm8350_regulator_devices[0]);
+platform_device_register(&regulator_devices[0]);
 
 /* register regulator 2 device */
-platform_device_register(&wm8350_regulator_devices[1]);
+platform_device_register(&regulator_devices[1]);
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 99a4e2eb36aa..87f5f176d4ef 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -41,7 +41,7 @@ struct regulator;
 #define REGULATOR_CHANGE_DRMS		0x10
 
 /**
- * struct regulator_state - regulator state during low power syatem states
+ * struct regulator_state - regulator state during low power system states
  *
  * This describes a regulators state during a system wide low power state.
  *
@@ -117,10 +117,10 @@ struct regulation_constraints {
 	/* mode to set on startup */
 	unsigned int initial_mode;
 
-	/* constriant flags */
+	/* constraint flags */
 	unsigned always_on:1;	/* regulator never off when system is on */
 	unsigned boot_on:1;	/* bootloader/firmware enabled regulator */
-	unsigned apply_uV:1;	/* apply uV constraint iff min == max */
+	unsigned apply_uV:1;	/* apply uV constraint if min == max */
 };
 
 /**
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index 44563192bf16..de9a7fae20be 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -36,7 +36,7 @@
  * max1586_subdev_data - regulator data
  * @id: regulator Id (either MAX1586_V3 or MAX1586_V6)
  * @name: regulator cute name (example for V3: "vcc_core")
- * @platform_data: regulator init data (contraints, supplies, ...)
+ * @platform_data: regulator init data (constraints, supplies, ...)
  */
 struct max1586_subdev_data {
 	int				id;
@@ -46,7 +46,7 @@ struct max1586_subdev_data {
 
 /**
  * max1586_platform_data - platform data for max1586
- * @num_subdevs: number of regultors used (may be 1 or 2)
+ * @num_subdevs: number of regulators used (may be 1 or 2)
  * @subdevs: regulator used
  *           At most, there will be a regulator for V3 and one for V6 voltages.
  * @v3_gain: gain on the V3 voltage output multiplied by 1e6.
-- 
cgit v1.2.3


From d87b969d15a084503870da598c97278fb4877753 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Fri, 18 Sep 2009 22:44:46 +0200
Subject: regulator/driver: be more specific in nanodoc for is_enabled

Document the possibility that is_enabled may also return with negative
errorcodes.

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/regulator/driver.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 73c9cd6cda7d..31f2055eae28 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -37,7 +37,8 @@ enum regulator_status {
  *
  * @enable: Configure the regulator as enabled.
  * @disable: Configure the regulator as disabled.
- * @is_enabled: Return 1 if the regulator is enabled, 0 otherwise.
+ * @is_enabled: Return 1 if the regulator is enabled, 0 if not.
+ *		May also return negative errno.
  *
  * @set_voltage: Set the voltage for the regulator within the range specified.
  *               The driver should select the voltage closest to min_uV.
-- 
cgit v1.2.3


From a6f10a2f5d8c2738b3ac05974bdbea3b68a2aecd Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 22 Sep 2009 22:34:24 +1000
Subject: perf_event: Update PERF_EVENT_FORK header definition

PERF_EVENT_FORK always outputs the time field, so update the header
to reflect this.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20090922123424.GD19453@kryten>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_counter.h | 2 +-
 include/linux/perf_event.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 368bd70f1d2d..7b7fbf433cff 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -361,7 +361,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 *	u64				time;
 	 * };
 	 */
 	PERF_EVENT_FORK			= 7,
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index acefaf71e6dd..3a9d36d1e92a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -357,7 +357,7 @@ enum perf_event_type {
 	 *	struct perf_event_header	header;
 	 *	u32				pid, ppid;
 	 *	u32				tid, ptid;
-	 *	{ u64				time;     } && PERF_SAMPLE_TIME
+	 *	u64				time;
 	 * };
 	 */
 	PERF_RECORD_FORK			= 7,
-- 
cgit v1.2.3


From 61e225dc341107be304fd1088146c2a5e88ff9e0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:08 -0700
Subject: const: make struct super_block::dq_op const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ext3/super.c          | 2 +-
 fs/ext4/super.c          | 2 +-
 fs/ocfs2/quota.h         | 2 +-
 fs/ocfs2/quota_global.c  | 2 +-
 fs/quota/dquot.c         | 2 +-
 fs/reiserfs/super.c      | 2 +-
 include/linux/fs.h       | 2 +-
 include/linux/quotaops.h | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index a8d80a7f1105..e7a4e11352d2 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -720,7 +720,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext3_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext3_quota_operations = {
+static const struct dquot_operations ext3_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a6b1ab734728..7ffb62eca4b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext4_quota_operations = {
+static const struct dquot_operations ext4_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 3fb96fcd4c81..e5df9d170b0c 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -109,7 +109,7 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex);
 int ocfs2_read_quota_block(struct inode *inode, u64 v_block,
 			   struct buffer_head **bh);
 
-extern struct dquot_operations ocfs2_quota_operations;
+extern const struct dquot_operations ocfs2_quota_operations;
 extern struct quota_format_type ocfs2_quota_format;
 
 int ocfs2_quota_setup(void);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 44f2a5e1d042..3af4954d537b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -849,7 +849,7 @@ static void ocfs2_destroy_dquot(struct dquot *dquot)
 	kmem_cache_free(ocfs2_dquot_cachep, dquot);
 }
 
-struct dquot_operations ocfs2_quota_operations = {
+const struct dquot_operations ocfs2_quota_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 38f7bd559f35..635ae2e535bf 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1839,7 +1839,7 @@ EXPORT_SYMBOL(dquot_commit_info);
 /*
  * Definitions of diskquota operations.
  */
-struct dquot_operations dquot_operations = {
+const struct dquot_operations dquot_operations = {
 	.initialize	= dquot_initialize,
 	.drop		= dquot_drop,
 	.alloc_space	= dquot_alloc_space,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 7adea74d6a8a..09c93c12874b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -612,7 +612,7 @@ static int reiserfs_mark_dquot_dirty(struct dquot *);
 static int reiserfs_write_info(struct super_block *, int);
 static int reiserfs_quota_on(struct super_block *, int, int, char *, int);
 
-static struct dquot_operations reiserfs_quota_operations = {
+static const struct dquot_operations reiserfs_quota_operations = {
 	.initialize = dquot_initialize,
 	.drop = dquot_drop,
 	.alloc_space = dquot_alloc_space,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 90162fb3bf04..83e1a0cea97a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1318,7 +1318,7 @@ struct super_block {
 	unsigned long long	s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
-	struct dquot_operations	*dq_op;
+	const struct dquot_operations	*dq_op;
  	struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 26361c4c037a..8dcbdb6e1019 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -135,7 +135,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
 /*
  * Operations supported for diskquotas.
  */
-extern struct dquot_operations dquot_operations;
+extern const struct dquot_operations dquot_operations;
 extern struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
-- 
cgit v1.2.3


From 0d54b217a247f39605361f867fefbb9e099a5432 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:09 -0700
Subject: const: make struct super_block::s_qcop const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/cifs/cifsfs.c                | 4 ++--
 fs/ext3/super.c                 | 2 +-
 fs/ext4/super.c                 | 2 +-
 fs/ocfs2/super.c                | 2 +-
 fs/quota/dquot.c                | 2 +-
 fs/reiserfs/super.c             | 2 +-
 fs/xfs/linux-2.6/xfs_quotaops.c | 2 +-
 fs/xfs/linux-2.6/xfs_super.h    | 2 +-
 include/linux/fs.h              | 2 +-
 include/linux/quotaops.h        | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3610e9958b4c..d79ce2e95c23 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -50,7 +50,7 @@
 #define CIFS_MAGIC_NUMBER 0xFF534D42	/* the first four bytes of SMB PDUs */
 
 #ifdef CONFIG_CIFS_QUOTA
-static struct quotactl_ops cifs_quotactl_ops;
+static const struct quotactl_ops cifs_quotactl_ops;
 #endif /* QUOTA */
 
 int cifsFYI = 0;
@@ -517,7 +517,7 @@ int cifs_xstate_get(struct super_block *sb, struct fs_quota_stat *qstats)
 	return rc;
 }
 
-static struct quotactl_ops cifs_quotactl_ops = {
+static const struct quotactl_ops cifs_quotactl_ops = {
 	.set_xquota	= cifs_xquota_set,
 	.get_xquota	= cifs_xquota_get,
 	.set_xstate	= cifs_xstate_set,
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index e7a4e11352d2..72743d360509 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -737,7 +737,7 @@ static const struct dquot_operations ext3_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext3_qctl_operations = {
+static const struct quotactl_ops ext3_qctl_operations = {
 	.quota_on	= ext3_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7ffb62eca4b2..df539ba27779 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -985,7 +985,7 @@ static const struct dquot_operations ext4_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops ext4_qctl_operations = {
+static const struct quotactl_ops ext4_qctl_operations = {
 	.quota_on	= ext4_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index a3f8871d21fd..faca4720aa47 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -965,7 +965,7 @@ static int ocfs2_quota_off(struct super_block *sb, int type, int remount)
 	return vfs_quota_disable(sb, type, DQUOT_LIMITS_ENABLED);
 }
 
-static struct quotactl_ops ocfs2_quotactl_ops = {
+static const struct quotactl_ops ocfs2_quotactl_ops = {
 	.quota_on	= ocfs2_quota_on,
 	.quota_off	= ocfs2_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 635ae2e535bf..39b49c42a7ed 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2461,7 +2461,7 @@ out:
 }
 EXPORT_SYMBOL(vfs_set_dqinfo);
 
-struct quotactl_ops vfs_quotactl_ops = {
+const struct quotactl_ops vfs_quotactl_ops = {
 	.quota_on	= vfs_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 09c93c12874b..f0ad05f38022 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -629,7 +629,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
 	.destroy_dquot	= dquot_destroy,
 };
 
-static struct quotactl_ops reiserfs_qctl_operations = {
+static const struct quotactl_ops reiserfs_qctl_operations = {
 	.quota_on = reiserfs_quota_on,
 	.quota_off = vfs_quota_off,
 	.quota_sync = vfs_quota_sync,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index cb6e2cca214f..9e41f91aa269 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -150,7 +150,7 @@ xfs_fs_set_xquota(
 	return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
 }
 
-struct quotactl_ops xfs_quotactl_operations = {
+const struct quotactl_ops xfs_quotactl_operations = {
 	.quota_sync		= xfs_fs_quota_sync,
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 5a2ea3a21781..18175ebd58ed 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -93,7 +93,7 @@ extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
 
 extern const struct export_operations xfs_export_operations;
 extern struct xattr_handler *xfs_xattr_handlers[];
-extern struct quotactl_ops xfs_quotactl_operations;
+extern const struct quotactl_ops xfs_quotactl_operations;
 
 #define XFS_M(sb)		((struct xfs_mount *)((sb)->s_fs_info))
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83e1a0cea97a..13c030ebfd28 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1319,7 +1319,7 @@ struct super_block {
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
- 	struct quotactl_ops	*s_qcop;
+	const struct quotactl_ops	*s_qcop;
 	const struct export_operations *s_export_op;
 	unsigned long		s_flags;
 	unsigned long		s_magic;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8dcbdb6e1019..3ebb23153640 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -136,7 +136,7 @@ static inline int sb_any_quota_active(struct super_block *sb)
  * Operations supported for diskquotas.
  */
 extern const struct dquot_operations dquot_operations;
-extern struct quotactl_ops vfs_quotactl_ops;
+extern const struct quotactl_ops vfs_quotactl_ops;
 
 #define sb_dquot_ops (&dquot_operations)
 #define sb_quotactl_ops (&vfs_quotactl_ops)
-- 
cgit v1.2.3


From 6aed62853c72e29f2c97bbac7712cb398e8c9437 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:11 -0700
Subject: const: make file_lock_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/flock.c      | 2 +-
 fs/lockd/clntproc.c | 2 +-
 fs/nfs/nfs4state.c  | 2 +-
 include/linux/fs.h  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/afs/flock.c b/fs/afs/flock.c
index 3ff8bdd18fb3..0931bc1325eb 100644
--- a/fs/afs/flock.c
+++ b/fs/afs/flock.c
@@ -21,7 +21,7 @@ static void afs_fl_release_private(struct file_lock *fl);
 static struct workqueue_struct *afs_lock_manager;
 static DEFINE_MUTEX(afs_lock_manager_mutex);
 
-static struct file_lock_operations afs_lock_ops = {
+static const struct file_lock_operations afs_lock_ops = {
 	.fl_copy_lock		= afs_fl_copy_lock,
 	.fl_release_private	= afs_fl_release_private,
 };
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4336adba952a..c81249fef11f 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -458,7 +458,7 @@ static void nlmclnt_locks_release_private(struct file_lock *fl)
 	nlm_put_lockowner(fl->fl_u.nfs_fl.owner);
 }
 
-static struct file_lock_operations nlmclnt_lock_ops = {
+static const struct file_lock_operations nlmclnt_lock_ops = {
 	.fl_copy_lock = nlmclnt_locks_copy_lock,
 	.fl_release_private = nlmclnt_locks_release_private,
 };
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1434080aefeb..2ef4fecf3984 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -638,7 +638,7 @@ static void nfs4_fl_release_lock(struct file_lock *fl)
 	nfs4_put_lock_state(fl->fl_u.nfs4_fl.owner);
 }
 
-static struct file_lock_operations nfs4_fl_lock_ops = {
+static const struct file_lock_operations nfs4_fl_lock_ops = {
 	.fl_copy_lock = nfs4_fl_copy_lock,
 	.fl_release_private = nfs4_fl_release_lock,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 13c030ebfd28..6146dec21c35 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1066,7 +1066,7 @@ struct file_lock {
 	struct fasync_struct *	fl_fasync; /* for lease break notifications */
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
-	struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
+	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
 	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
-- 
cgit v1.2.3


From 7b021967c5e1463936042c8da72b550d3cabe9ac Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:12 -0700
Subject: const: make lock_manager_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/lockd/svclock.c          | 2 +-
 fs/locks.c                  | 2 +-
 fs/nfsd/nfs4state.c         | 4 ++--
 include/linux/fs.h          | 2 +-
 include/linux/lockd/lockd.h | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e577a78d7bac..d1001790fa9a 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -705,7 +705,7 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 	return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid;
 }
 
-struct lock_manager_operations nlmsvc_lock_operations = {
+const struct lock_manager_operations nlmsvc_lock_operations = {
 	.fl_compare_owner = nlmsvc_same_owner,
 	.fl_notify = nlmsvc_notify_blocked,
 	.fl_grant = nlmsvc_grant_deferred,
diff --git a/fs/locks.c b/fs/locks.c
index 19ee18a6829b..a8794f233bc9 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -434,7 +434,7 @@ static int lease_mylease_callback(struct file_lock *fl, struct file_lock *try)
 	return fl->fl_file == try->fl_file;
 }
 
-static struct lock_manager_operations lease_manager_ops = {
+static const struct lock_manager_operations lease_manager_ops = {
 	.fl_break = lease_break_callback,
 	.fl_release_private = lease_release_private_callback,
 	.fl_mylease = lease_mylease_callback,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 980a216a48c8..766d3d544544 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2163,7 +2163,7 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 		return -EAGAIN;
 }
 
-static struct lock_manager_operations nfsd_lease_mng_ops = {
+static const struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
 	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
@@ -3368,7 +3368,7 @@ nfs4_transform_lock_offset(struct file_lock *lock)
 
 /* Hack!: For now, we're defining this just so we can use a pointer to it
  * as a unique cookie to identify our (NFSv4's) posix locks. */
-static struct lock_manager_operations nfsd_posix_mng_ops  = {
+static const struct lock_manager_operations nfsd_posix_mng_ops  = {
 };
 
 static inline void
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6146dec21c35..51803528b095 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1067,7 +1067,7 @@ struct file_lock {
 	unsigned long fl_break_time;	/* for nonblocking lease breaks */
 
 	const struct file_lock_operations *fl_ops;	/* Callbacks for filesystems */
-	struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
+	const struct lock_manager_operations *fl_lmops;	/* Callbacks for lockmanagers */
 	union {
 		struct nfs_lock_info	nfs_fl;
 		struct nfs4_lock_info	nfs4_fl;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index c325b187966b..ccf2e0dc077a 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -395,7 +395,7 @@ static inline int nlm_compare_locks(const struct file_lock *fl1,
 	     &&(fl1->fl_type  == fl2->fl_type || fl2->fl_type == F_UNLCK);
 }
 
-extern struct lock_manager_operations nlmsvc_lock_operations;
+extern const struct lock_manager_operations nlmsvc_lock_operations;
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 83d5cde47dedf01b6a4a4331882cbc0a7eea3c2e Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: const: make block_device_operations const

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/sysdev/axonram.c       | 2 +-
 arch/um/drivers/ubd_kern.c          | 2 +-
 drivers/block/DAC960.c              | 2 +-
 drivers/block/amiflop.c             | 2 +-
 drivers/block/aoe/aoeblk.c          | 2 +-
 drivers/block/ataflop.c             | 2 +-
 drivers/block/brd.c                 | 2 +-
 drivers/block/cciss.c               | 2 +-
 drivers/block/cpqarray.c            | 2 +-
 drivers/block/floppy.c              | 2 +-
 drivers/block/hd.c                  | 2 +-
 drivers/block/loop.c                | 2 +-
 drivers/block/mg_disk.c             | 2 +-
 drivers/block/nbd.c                 | 2 +-
 drivers/block/osdblk.c              | 2 +-
 drivers/block/paride/pcd.c          | 2 +-
 drivers/block/paride/pd.c           | 2 +-
 drivers/block/paride/pf.c           | 2 +-
 drivers/block/pktcdvd.c             | 2 +-
 drivers/block/ps3disk.c             | 2 +-
 drivers/block/ps3vram.c             | 2 +-
 drivers/block/sunvdc.c              | 2 +-
 drivers/block/swim.c                | 2 +-
 drivers/block/swim3.c               | 2 +-
 drivers/block/sx8.c                 | 2 +-
 drivers/block/ub.c                  | 2 +-
 drivers/block/umem.c                | 3 +--
 drivers/block/viodasd.c             | 2 +-
 drivers/block/virtio_blk.c          | 2 +-
 drivers/block/xd.c                  | 2 +-
 drivers/block/xen-blkfront.c        | 4 ++--
 drivers/block/xsysace.c             | 2 +-
 drivers/block/z2ram.c               | 3 +--
 drivers/cdrom/gdrom.c               | 2 +-
 drivers/cdrom/viocd.c               | 2 +-
 drivers/ide/ide-cd.c                | 2 +-
 drivers/ide/ide-gd.c                | 2 +-
 drivers/ide/ide-tape.c              | 2 +-
 drivers/md/dm.c                     | 4 ++--
 drivers/md/md.c                     | 4 ++--
 drivers/memstick/core/mspro_block.c | 2 +-
 drivers/message/i2o/i2o_block.c     | 2 +-
 drivers/mmc/card/block.c            | 2 +-
 drivers/mtd/mtd_blkdevs.c           | 2 +-
 drivers/s390/block/dasd.c           | 2 +-
 drivers/s390/block/dasd_int.h       | 2 +-
 drivers/s390/block/dcssblk.c        | 2 +-
 drivers/s390/block/xpram.c          | 2 +-
 drivers/s390/char/tape_block.c      | 2 +-
 drivers/sbus/char/jsflash.c         | 2 +-
 drivers/scsi/sd.c                   | 2 +-
 drivers/scsi/sr.c                   | 2 +-
 fs/block_dev.c                      | 2 +-
 fs/ext2/xip.c                       | 2 +-
 fs/partitions/check.c               | 2 +-
 include/linux/genhd.h               | 2 +-
 56 files changed, 59 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index a4779912a5ca..88f4ae787832 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -165,7 +165,7 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
 	return 0;
 }
 
-static struct block_device_operations axon_ram_devops = {
+static const struct block_device_operations axon_ram_devops = {
 	.owner		= THIS_MODULE,
 	.direct_access	= axon_ram_direct_access
 };
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 8f05d4d9da12..635d16d90a80 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -106,7 +106,7 @@ static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 #define MAX_DEV (16)
 
-static struct block_device_operations ubd_blops = {
+static const struct block_device_operations ubd_blops = {
         .owner		= THIS_MODULE,
         .open		= ubd_open,
         .release	= ubd_release,
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 1e6b7c14f697..8b50af381a8e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -152,7 +152,7 @@ static int DAC960_revalidate_disk(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations DAC960_BlockDeviceOperations = {
+static const struct block_device_operations DAC960_BlockDeviceOperations = {
 	.owner			= THIS_MODULE,
 	.open			= DAC960_open,
 	.getgeo			= DAC960_getgeo,
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 2f07b7c99a95..055225839024 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1632,7 +1632,7 @@ static int amiga_floppy_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index b6cd571adbf2..3af97d4da2db 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -237,7 +237,7 @@ aoeblk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations aoe_bdops = {
+static const struct block_device_operations aoe_bdops = {
 	.open = aoeblk_open,
 	.release = aoeblk_release,
 	.getgeo = aoeblk_getgeo,
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 3ff02941b3dd..847a9e57570a 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1856,7 +1856,7 @@ static int floppy_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 4bf8705b3ace..4f688434daf1 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -375,7 +375,7 @@ static int brd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations brd_fops = {
+static const struct block_device_operations brd_fops = {
 	.owner =		THIS_MODULE,
 	.locked_ioctl =		brd_ioctl,
 #ifdef CONFIG_BLK_DEV_XIP
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d8372b432826..4f19105f755c 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -205,7 +205,7 @@ static int cciss_compat_ioctl(struct block_device *, fmode_t,
 			      unsigned, unsigned long);
 #endif
 
-static struct block_device_operations cciss_fops = {
+static const struct block_device_operations cciss_fops = {
 	.owner = THIS_MODULE,
 	.open = cciss_open,
 	.release = cciss_release,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 44fa2018f6b0..b82d438e2607 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -193,7 +193,7 @@ static inline ctlr_info_t *get_host(struct gendisk *disk)
 }
 
 
-static struct block_device_operations ida_fops  = {
+static const struct block_device_operations ida_fops  = {
 	.owner		= THIS_MODULE,
 	.open		= ida_open,
 	.release	= ida_release,
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2b387c2260d8..5c01f747571b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3907,7 +3907,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return res;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner			= THIS_MODULE,
 	.open			= floppy_open,
 	.release		= floppy_release,
diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index f9d01608cbe2..d5cdce08ffd2 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c
@@ -692,7 +692,7 @@ static irqreturn_t hd_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static struct block_device_operations hd_fops = {
+static const struct block_device_operations hd_fops = {
 	.getgeo =	hd_getgeo,
 };
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index bbb79441d895..edda9ea7c626 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1438,7 +1438,7 @@ out_unlocked:
 	return 0;
 }
 
-static struct block_device_operations lo_fops = {
+static const struct block_device_operations lo_fops = {
 	.owner =	THIS_MODULE,
 	.open =		lo_open,
 	.release =	lo_release,
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 6d7fbaa92248..e0339aaa1815 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c
@@ -775,7 +775,7 @@ static int mg_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mg_disk_ops = {
+static const struct block_device_operations mg_disk_ops = {
 	.getgeo = mg_getgeo
 };
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 5d23ffad7c77..cc923a5b430c 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -722,7 +722,7 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode,
 	return error;
 }
 
-static struct block_device_operations nbd_fops =
+static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
 	.locked_ioctl =	nbd_ioctl,
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 13c1aee6aa3f..a808b1530b3b 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -125,7 +125,7 @@ static struct class *class_osdblk;		/* /sys/class/osdblk */
 static DEFINE_MUTEX(ctl_mutex);	/* Serialize open/close/setup/teardown */
 static LIST_HEAD(osdblkdev_list);
 
-static struct block_device_operations osdblk_bd_ops = {
+static const struct block_device_operations osdblk_bd_ops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 9f3518c515a1..8866ca369d5e 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -247,7 +247,7 @@ static int pcd_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->info);
 }
 
-static struct block_device_operations pcd_bdops = {
+static const struct block_device_operations pcd_bdops = {
 	.owner		= THIS_MODULE,
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index bf5955b3d873..569e39e8f114 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c
@@ -807,7 +807,7 @@ static int pd_revalidate(struct gendisk *p)
 	return 0;
 }
 
-static struct block_device_operations pd_fops = {
+static const struct block_device_operations pd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pd_open,
 	.release	= pd_release,
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index 68a90834e993..ea54ea393553 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c
@@ -262,7 +262,7 @@ static char *pf_buf;		/* buffer for request in progress */
 
 /* kernel glue structures */
 
-static struct block_device_operations pf_fops = {
+static const struct block_device_operations pf_fops = {
 	.owner		= THIS_MODULE,
 	.open		= pf_open,
 	.release	= pf_release,
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index fd5bb8ad59a9..2ddf03ae034e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2849,7 +2849,7 @@ static int pkt_media_changed(struct gendisk *disk)
 	return attached_disk->fops->media_changed(attached_disk);
 }
 
-static struct block_device_operations pktcdvd_ops = {
+static const struct block_device_operations pktcdvd_ops = {
 	.owner =		THIS_MODULE,
 	.open =			pkt_open,
 	.release =		pkt_close,
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 34cbb7f3efa8..03a130dca8ab 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -82,7 +82,7 @@ enum lv1_ata_in_out {
 static int ps3disk_major;
 
 
-static struct block_device_operations ps3disk_fops = {
+static const struct block_device_operations ps3disk_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index c8753a9ed290..3bb7c47c869f 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -88,7 +88,7 @@ struct ps3vram_priv {
 static int ps3vram_major;
 
 
-static struct block_device_operations ps3vram_fops = {
+static const struct block_device_operations ps3vram_fops = {
 	.owner		= THIS_MODULE,
 };
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index cbfd9c0aef03..411f064760b4 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -103,7 +103,7 @@ static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations vdc_fops = {
+static const struct block_device_operations vdc_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= vdc_getgeo,
 };
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index cf7877fb8a7d..8f569e3df890 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -748,7 +748,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return !fs->disk_in;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = floppy_open,
 	.release	 = floppy_release,
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 80df93e3cdd0..e39e3820fef9 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -998,7 +998,7 @@ static int floppy_revalidate(struct gendisk *disk)
 	return ret;
 }
 
-static struct block_device_operations floppy_fops = {
+static const struct block_device_operations floppy_fops = {
 	.open		= floppy_open,
 	.release	= floppy_release,
 	.locked_ioctl	= floppy_ioctl,
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index f5cd2e83ebcc..a7c4184f4a63 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -423,7 +423,7 @@ static struct pci_driver carm_driver = {
 	.remove		= carm_remove_one,
 };
 
-static struct block_device_operations carm_bd_ops = {
+static const struct block_device_operations carm_bd_ops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= carm_bdev_getgeo,
 };
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index cc54473b8e77..c739b203fe91 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -1789,7 +1789,7 @@ static int ub_bd_media_changed(struct gendisk *disk)
 	return lun->changed;
 }
 
-static struct block_device_operations ub_bd_fops = {
+static const struct block_device_operations ub_bd_fops = {
 	.owner		= THIS_MODULE,
 	.open		= ub_bd_open,
 	.release	= ub_bd_release,
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 858c34dd032d..ad1ba393801a 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -140,7 +140,6 @@ struct cardinfo {
 };
 
 static struct cardinfo cards[MM_MAXCARDS];
-static struct block_device_operations mm_fops;
 static struct timer_list battery_timer;
 
 static int num_cards;
@@ -789,7 +788,7 @@ static int mm_check_change(struct gendisk *disk)
 	return 0;
 }
 
-static struct block_device_operations mm_fops = {
+static const struct block_device_operations mm_fops = {
 	.owner		= THIS_MODULE,
 	.getgeo		= mm_getgeo,
 	.revalidate_disk = mm_revalidate,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index b441ce3832e9..a8c8b56b275e 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -219,7 +219,7 @@ static int viodasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 /*
  * Our file operations table
  */
-static struct block_device_operations viodasd_fops = {
+static const struct block_device_operations viodasd_fops = {
 	.owner = THIS_MODULE,
 	.open = viodasd_open,
 	.release = viodasd_release,
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5a3e2b..aa89fe45237d 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -243,7 +243,7 @@ static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations virtblk_fops = {
+static const struct block_device_operations virtblk_fops = {
 	.locked_ioctl = virtblk_ioctl,
 	.owner  = THIS_MODULE,
 	.getgeo = virtblk_getgeo,
diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index ce2429219925..0877d3628fda 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c
@@ -130,7 +130,7 @@ static struct gendisk *xd_gendisk[2];
 
 static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
-static struct block_device_operations xd_fops = {
+static const struct block_device_operations xd_fops = {
 	.owner	= THIS_MODULE,
 	.locked_ioctl	= xd_ioctl,
 	.getgeo = xd_getgeo,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index e53284767f7c..b8578bb3f4c9 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ struct blk_shadow {
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
-static struct block_device_operations xlvbd_block_fops;
+static const struct block_device_operations xlvbd_block_fops;
 
 #define BLK_RING_SIZE __RING_SIZE((struct blkif_sring *)0, PAGE_SIZE)
 
@@ -1039,7 +1039,7 @@ static int blkif_release(struct gendisk *disk, fmode_t mode)
 	return 0;
 }
 
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
 {
 	.owner = THIS_MODULE,
 	.open = blkif_open,
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index b20abe102a2b..e5c5415eb45e 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -941,7 +941,7 @@ static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations ace_fops = {
+static const struct block_device_operations ace_fops = {
 	.owner = THIS_MODULE,
 	.open = ace_open,
 	.release = ace_release,
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index b2590409f25e..64f941e0f14b 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c
@@ -64,7 +64,6 @@ static int current_device   = -1;
 
 static DEFINE_SPINLOCK(z2ram_lock);
 
-static struct block_device_operations z2_fops;
 static struct gendisk *z2ram_gendisk;
 
 static void do_z2_request(struct request_queue *q)
@@ -315,7 +314,7 @@ z2_release(struct gendisk *disk, fmode_t mode)
     return 0;
 }
 
-static struct block_device_operations z2_fops =
+static const struct block_device_operations z2_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= z2_open,
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index b5621f27c4be..a762283d2a21 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c
@@ -512,7 +512,7 @@ static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
 	return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations gdrom_bdops = {
+static const struct block_device_operations gdrom_bdops = {
 	.owner			= THIS_MODULE,
 	.open			= gdrom_bdops_open,
 	.release		= gdrom_bdops_release,
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 0fff646cc2f0..57ca69e0ac55 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -177,7 +177,7 @@ static int viocd_blk_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&di->viocd_info);
 }
 
-struct block_device_operations viocd_fops = {
+static const struct block_device_operations viocd_fops = {
 	.owner =		THIS_MODULE,
 	.open =			viocd_blk_open,
 	.release =		viocd_blk_release,
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index b79ca419d8d9..64207df8da82 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1686,7 +1686,7 @@ static int idecd_revalidate_disk(struct gendisk *disk)
 	return  0;
 }
 
-static struct block_device_operations idecd_ops = {
+static const struct block_device_operations idecd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= idecd_open,
 	.release		= idecd_release,
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 214119026b3f..753241429c26 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -321,7 +321,7 @@ static int ide_gd_ioctl(struct block_device *bdev, fmode_t mode,
 	return drive->disk_ops->ioctl(drive, bdev, mode, cmd, arg);
 }
 
-static struct block_device_operations ide_gd_ops = {
+static const struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
 	.open			= ide_gd_open,
 	.release		= ide_gd_release,
diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index 9d6f62baac27..58fc920d5c32 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c
@@ -1913,7 +1913,7 @@ static int idetape_ioctl(struct block_device *bdev, fmode_t mode,
 	return err;
 }
 
-static struct block_device_operations idetape_block_ops = {
+static const struct block_device_operations idetape_block_ops = {
 	.owner		= THIS_MODULE,
 	.open		= idetape_open,
 	.release	= idetape_release,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eee28fac210c..376f1ab48a24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1716,7 +1716,7 @@ out:
 	return r;
 }
 
-static struct block_device_operations dm_blk_dops;
+static const struct block_device_operations dm_blk_dops;
 
 static void dm_wq_work(struct work_struct *work);
 
@@ -2663,7 +2663,7 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 	kfree(pools);
 }
 
-static struct block_device_operations dm_blk_dops = {
+static const struct block_device_operations dm_blk_dops = {
 	.open = dm_blk_open,
 	.release = dm_blk_close,
 	.ioctl = dm_blk_ioctl,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 9dd872000cec..6aa497e4baf8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -138,7 +138,7 @@ static ctl_table raid_root_table[] = {
 	{ .ctl_name = 0 }
 };
 
-static struct block_device_operations md_fops;
+static const struct block_device_operations md_fops;
 
 static int start_readonly;
 
@@ -5556,7 +5556,7 @@ static int md_revalidate(struct gendisk *disk)
 	mddev->changed = 0;
 	return 0;
 }
-static struct block_device_operations md_fops =
+static const struct block_device_operations md_fops =
 {
 	.owner		= THIS_MODULE,
 	.open		= md_open,
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 7847bbc1440d..bd83fa0a4970 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -235,7 +235,7 @@ static int mspro_block_bd_getgeo(struct block_device *bdev,
 	return 0;
 }
 
-static struct block_device_operations ms_block_bdops = {
+static const struct block_device_operations ms_block_bdops = {
 	.open    = mspro_block_bd_open,
 	.release = mspro_block_bd_release,
 	.getgeo  = mspro_block_bd_getgeo,
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index 335d4c78a775..d505b68cd372 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -925,7 +925,7 @@ static void i2o_block_request_fn(struct request_queue *q)
 };
 
 /* I2O Block device operations definition */
-static struct block_device_operations i2o_block_fops = {
+static const struct block_device_operations i2o_block_fops = {
 	.owner = THIS_MODULE,
 	.open = i2o_block_open,
 	.release = i2o_block_release,
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adc205c49fbf..85f0e8cd875b 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -130,7 +130,7 @@ mmc_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations mmc_bdops = {
+static const struct block_device_operations mmc_bdops = {
 	.open			= mmc_blk_open,
 	.release		= mmc_blk_release,
 	.getgeo			= mmc_blk_getgeo,
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 7baba40c1ed2..0acbf4f5be50 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -210,7 +210,7 @@ static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
 	}
 }
 
-static struct block_device_operations mtd_blktrans_ops = {
+static const struct block_device_operations mtd_blktrans_ops = {
 	.owner		= THIS_MODULE,
 	.open		= blktrans_open,
 	.release	= blktrans_release,
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e109da4583a8..dad0449475b6 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2146,7 +2146,7 @@ static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-struct block_device_operations
+const struct block_device_operations
 dasd_device_operations = {
 	.owner		= THIS_MODULE,
 	.open		= dasd_open,
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index 5e47a1ee52b9..8afd9fa00875 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -540,7 +540,7 @@ dasd_check_blocksize(int bsize)
 extern debug_info_t *dasd_debug_area;
 extern struct dasd_profile_info_t dasd_global_profile;
 extern unsigned int dasd_profile_level;
-extern struct block_device_operations dasd_device_operations;
+extern const struct block_device_operations dasd_device_operations;
 
 extern struct kmem_cache *dasd_page_cache;
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index d34617682a62..f76f4bd82b9f 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -34,7 +34,7 @@ static int dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
 static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
 
 static int dcssblk_major;
-static struct block_device_operations dcssblk_devops = {
+static const struct block_device_operations dcssblk_devops = {
 	.owner   	= THIS_MODULE,
 	.open    	= dcssblk_open,
 	.release 	= dcssblk_release,
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index ee604e92a5fa..116d1b3eeb15 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -244,7 +244,7 @@ static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return 0;
 }
 
-static struct block_device_operations xpram_devops =
+static const struct block_device_operations xpram_devops =
 {
 	.owner	= THIS_MODULE,
 	.getgeo	= xpram_getgeo,
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 4cb9e70507ab..64f57ef2763c 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -50,7 +50,7 @@ static int tapeblock_ioctl(struct block_device *, fmode_t, unsigned int,
 static int tapeblock_medium_changed(struct gendisk *);
 static int tapeblock_revalidate_disk(struct gendisk *);
 
-static struct block_device_operations tapeblock_fops = {
+static const struct block_device_operations tapeblock_fops = {
 	.owner		 = THIS_MODULE,
 	.open		 = tapeblock_open,
 	.release	 = tapeblock_release,
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index 6d4651684688..869a30b49edc 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -452,7 +452,7 @@ static const struct file_operations jsf_fops = {
 
 static struct miscdevice jsf_dev = { JSF_MINOR, "jsflash", &jsf_fops };
 
-static struct block_device_operations jsfd_fops = {
+static const struct block_device_operations jsfd_fops = {
 	.owner =	THIS_MODULE,
 };
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a89c421dab51..8dd96dcd716c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -956,7 +956,7 @@ static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode,
 }
 #endif
 
-static struct block_device_operations sd_fops = {
+static const struct block_device_operations sd_fops = {
 	.owner			= THIS_MODULE,
 	.open			= sd_open,
 	.release		= sd_release,
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index cce0fe4c8a3b..eb61f7a70e1d 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -525,7 +525,7 @@ static int sr_block_media_changed(struct gendisk *disk)
 	return cdrom_media_changed(&cd->cdi);
 }
 
-static struct block_device_operations sr_bdops =
+static const struct block_device_operations sr_bdops =
 {
 	.owner		= THIS_MODULE,
 	.open		= sr_block_open,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 71e7e03ac343..5d1ed50bd46c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1114,7 +1114,7 @@ EXPORT_SYMBOL(revalidate_disk);
 int check_disk_change(struct block_device *bdev)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	struct block_device_operations * bdops = disk->fops;
+	const struct block_device_operations *bdops = disk->fops;
 
 	if (!bdops->media_changed)
 		return 0;
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index b72b85884223..c18fbf3e4068 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -20,7 +20,7 @@ __inode_direct_access(struct inode *inode, sector_t block,
 		      void **kaddr, unsigned long *pfn)
 {
 	struct block_device *bdev = inode->i_sb->s_bdev;
-	struct block_device_operations *ops = bdev->bd_disk->fops;
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
 	sector_t sector;
 
 	sector = block * (PAGE_SIZE / 512); /* ext2 block to bdev sector */
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index fbeaddf595d3..7b685e10cbad 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -581,7 +581,7 @@ try_scan:
 		}
 
 		if (from + size > get_capacity(disk)) {
-			struct block_device_operations *bdops = disk->fops;
+			const struct block_device_operations *bdops = disk->fops;
 			unsigned long long capacity;
 
 			printk(KERN_WARNING
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 109d179adb93..297df45ffd0a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -151,7 +151,7 @@ struct gendisk {
 	struct disk_part_tbl *part_tbl;
 	struct hd_struct part0;
 
-	struct block_device_operations *fops;
+	const struct block_device_operations *fops;
 	struct request_queue *queue;
 	void *private_data;
 
-- 
cgit v1.2.3


From a5abeeacc44bbef2935a7a8e939264c28962def2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:13 -0700
Subject: mm: make swap token dummies static inlines

Make use of the compiler's typechecking on !CONFIG_SWAP as well.

[akpm@linux-foundation.org: build fix]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c15334f3ff2..6c990e658f4e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -419,10 +419,22 @@ static inline swp_entry_t get_swap_page(void)
 }
 
 /* linux/mm/thrash.c */
-#define put_swap_token(mm)	do { } while (0)
-#define grab_swap_token(mm)	do { } while (0)
-#define has_swap_token(mm)	0
-#define disable_swap_token()	do { } while (0)
+static inline void put_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline void grab_swap_token(struct mm_struct *mm)
+{
+}
+
+static inline int has_swap_token(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void disable_swap_token(void)
+{
+}
 
 static inline void
 mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
-- 
cgit v1.2.3


From 112067f0905b2de862c607ee62411cf47d2fe5c4 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shaohua.li@intel.com>
Date: Mon, 21 Sep 2009 17:01:16 -0700
Subject: memory hotplug: update zone pcp at memory online

In my test, 128M memory is hot added, but zone's pcp batch is 0, which is
an obvious error.  When pages are onlined, zone pcp should be updated
accordingly.

[akpm@linux-foundation.org: fix warnings]
Signed-off-by: Shaohua Li <shaohua.li@intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Yakui Zhao <yakui.zhao@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  2 ++
 mm/memory_hotplug.c |  1 +
 mm/page_alloc.c     | 26 ++++++++++++++++++++++++++
 3 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9a72cc78e6b8..d3c8ae7c8015 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1058,6 +1058,8 @@ extern void setup_per_cpu_pageset(void);
 static inline void setup_per_cpu_pageset(void) {}
 #endif
 
+extern void zone_pcp_update(struct zone *zone);
+
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e4412a676c88..616236e6343f 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -422,6 +422,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	zone->present_pages += onlined_pages;
 	zone->zone_pgdat->node_present_pages += onlined_pages;
 
+	zone_pcp_update(zone);
 	setup_per_zone_wmarks();
 	calculate_zone_inactive_ratio(zone);
 	if (onlined_pages) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f4e929e356db..1a3a893ef50e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3142,6 +3142,32 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
 	return 0;
 }
 
+static int __zone_pcp_update(void *data)
+{
+	struct zone *zone = data;
+	int cpu;
+	unsigned long batch = zone_batchsize(zone), flags;
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct per_cpu_pageset *pset;
+		struct per_cpu_pages *pcp;
+
+		pset = zone_pcp(zone, cpu);
+		pcp = &pset->pcp;
+
+		local_irq_save(flags);
+		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		setup_pageset(pset, batch);
+		local_irq_restore(flags);
+	}
+	return 0;
+}
+
+void zone_pcp_update(struct zone *zone)
+{
+	stop_machine(__zone_pcp_update, zone, NULL);
+}
+
 static __meminit void zone_pcp_init(struct zone *zone)
 {
 	int cpu;
-- 
cgit v1.2.3


From e8c5c8249878fb6564125680a1d15e06adbd5639 Mon Sep 17 00:00:00 2001
From: Lee Schermerhorn <lee.schermerhorn@hp.com>
Date: Mon, 21 Sep 2009 17:01:22 -0700
Subject: hugetlb: balance freeing of huge pages across nodes

Free huges pages from nodes in round robin fashion in an attempt to keep
[persistent a.k.a static] hugepages balanced across nodes

New function free_pool_huge_page() is modeled on and performs roughly the
inverse of alloc_fresh_huge_page().  Replaces dequeue_huge_page() which
now has no callers, so this patch removes it.

Helper function hstate_next_node_to_free() uses new hstate member
next_to_free_nid to distribute "frees" across all nodes with huge pages.

Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |   3 +-
 mm/hugetlb.c            | 132 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 88 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5cbc620bdfe0..16cdb75a543a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -185,7 +185,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
 #define HSTATE_NAME_LEN 32
 /* Defines one hugetlb page size */
 struct hstate {
-	int hugetlb_next_nid;
+	int next_nid_to_alloc;
+	int next_nid_to_free;
 	unsigned int order;
 	unsigned long mask;
 	unsigned long max_huge_pages;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16d63634777..38dab5586827 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 	h->free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(struct hstate *h)
-{
-	int nid;
-	struct page *page = NULL;
-
-	for (nid = 0; nid < MAX_NUMNODES; ++nid) {
-		if (!list_empty(&h->hugepage_freelists[nid])) {
-			page = list_entry(h->hugepage_freelists[nid].next,
-					  struct page, lru);
-			list_del(&page->lru);
-			h->free_huge_pages--;
-			h->free_huge_pages_node[nid]--;
-			break;
-		}
-	}
-	return page;
-}
-
 static struct page *dequeue_huge_page_vma(struct hstate *h,
 				struct vm_area_struct *vma,
 				unsigned long address, int avoid_reserve)
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
 
 /*
  * Use a helper variable to find the next node and then
- * copy it back to hugetlb_next_nid afterwards:
+ * copy it back to next_nid_to_alloc afterwards:
  * otherwise there's a window in which a racer might
  * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
  * But we don't need to use a spin_lock here: it really
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
  * if we just successfully allocated a hugepage so that
  * the next caller gets hugepages on the next node.
  */
-static int hstate_next_node(struct hstate *h)
+static int hstate_next_node_to_alloc(struct hstate *h)
 {
 	int next_nid;
-	next_nid = next_node(h->hugetlb_next_nid, node_online_map);
+	next_nid = next_node(h->next_nid_to_alloc, node_online_map);
 	if (next_nid == MAX_NUMNODES)
 		next_nid = first_node(node_online_map);
-	h->hugetlb_next_nid = next_nid;
+	h->next_nid_to_alloc = next_nid;
 	return next_nid;
 }
 
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	int next_nid;
 	int ret = 0;
 
-	start_nid = h->hugetlb_next_nid;
+	start_nid = h->next_nid_to_alloc;
+	next_nid = start_nid;
 
 	do {
-		page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid);
+		page = alloc_fresh_huge_page_node(h, next_nid);
 		if (page)
 			ret = 1;
-		next_nid = hstate_next_node(h);
-	} while (!page && h->hugetlb_next_nid != start_nid);
+		next_nid = hstate_next_node_to_alloc(h);
+	} while (!page && next_nid != start_nid);
 
 	if (ret)
 		count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
 	return ret;
 }
 
+/*
+ * helper for free_pool_huge_page() - find next node
+ * from which to free a huge page
+ */
+static int hstate_next_node_to_free(struct hstate *h)
+{
+	int next_nid;
+	next_nid = next_node(h->next_nid_to_free, node_online_map);
+	if (next_nid == MAX_NUMNODES)
+		next_nid = first_node(node_online_map);
+	h->next_nid_to_free = next_nid;
+	return next_nid;
+}
+
+/*
+ * Free huge page from pool from next node to free.
+ * Attempt to keep persistent huge pages more or less
+ * balanced over allowed nodes.
+ * Called with hugetlb_lock locked.
+ */
+static int free_pool_huge_page(struct hstate *h)
+{
+	int start_nid;
+	int next_nid;
+	int ret = 0;
+
+	start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		if (!list_empty(&h->hugepage_freelists[next_nid])) {
+			struct page *page =
+				list_entry(h->hugepage_freelists[next_nid].next,
+					  struct page, lru);
+			list_del(&page->lru);
+			h->free_huge_pages--;
+			h->free_huge_pages_node[next_nid]--;
+			update_and_free_page(h, page);
+			ret = 1;
+		}
+		next_nid = hstate_next_node_to_free(h);
+	} while (!ret && next_nid != start_nid);
+
+	return ret;
+}
+
 static struct page *alloc_buddy_huge_page(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
 {
@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 		void *addr;
 
 		addr = __alloc_bootmem_node_nopanic(
-				NODE_DATA(h->hugetlb_next_nid),
+				NODE_DATA(h->next_nid_to_alloc),
 				huge_page_size(h), huge_page_size(h), 0);
 
 		if (addr) {
@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
 			m = addr;
 			goto found;
 		}
-		hstate_next_node(h);
+		hstate_next_node_to_alloc(h);
 		nr_nodes--;
 	}
 	return 0;
@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
  */
 static int adjust_pool_surplus(struct hstate *h, int delta)
 {
-	static int prev_nid;
-	int nid = prev_nid;
+	int start_nid, next_nid;
 	int ret = 0;
 
 	VM_BUG_ON(delta != -1 && delta != 1);
-	do {
-		nid = next_node(nid, node_online_map);
-		if (nid == MAX_NUMNODES)
-			nid = first_node(node_online_map);
 
-		/* To shrink on this node, there must be a surplus page */
-		if (delta < 0 && !h->surplus_huge_pages_node[nid])
-			continue;
-		/* Surplus cannot exceed the total number of pages */
-		if (delta > 0 && h->surplus_huge_pages_node[nid] >=
+	if (delta < 0)
+		start_nid = h->next_nid_to_alloc;
+	else
+		start_nid = h->next_nid_to_free;
+	next_nid = start_nid;
+
+	do {
+		int nid = next_nid;
+		if (delta < 0)  {
+			next_nid = hstate_next_node_to_alloc(h);
+			/*
+			 * To shrink on this node, there must be a surplus page
+			 */
+			if (!h->surplus_huge_pages_node[nid])
+				continue;
+		}
+		if (delta > 0) {
+			next_nid = hstate_next_node_to_free(h);
+			/*
+			 * Surplus cannot exceed the total number of pages
+			 */
+			if (h->surplus_huge_pages_node[nid] >=
 						h->nr_huge_pages_node[nid])
-			continue;
+				continue;
+		}
 
 		h->surplus_huge_pages += delta;
 		h->surplus_huge_pages_node[nid] += delta;
 		ret = 1;
 		break;
-	} while (nid != prev_nid);
+	} while (next_nid != start_nid);
 
-	prev_nid = nid;
 	return ret;
 }
 
@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
 	min_count = max(count, min_count);
 	try_to_free_low(h, min_count);
 	while (min_count < persistent_huge_pages(h)) {
-		struct page *page = dequeue_huge_page(h);
-		if (!page)
+		if (!free_pool_huge_page(h))
 			break;
-		update_and_free_page(h, page);
 	}
 	while (count < persistent_huge_pages(h)) {
 		if (!adjust_pool_surplus(h, 1))
@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
 	h->free_huge_pages = 0;
 	for (i = 0; i < MAX_NUMNODES; ++i)
 		INIT_LIST_HEAD(&h->hugepage_freelists[i]);
-	h->hugetlb_next_nid = first_node(node_online_map);
+	h->next_nid_to_alloc = first_node(node_online_map);
+	h->next_nid_to_free = first_node(node_online_map);
 	snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
 					huge_page_size(h)/1024);
 
-- 
cgit v1.2.3


From c6a7f5728a1db45d30df55a01adc130b4ab0327c Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:32 -0700
Subject: mm: oom analysis: Show kernel stack usage in /proc/meminfo and OOM
 log output

The amount of memory allocated to kernel stacks can become significant and
cause OOM conditions.  However, we do not display the amount of memory
consumed by stacks.

Add code to display the amount of memory used for stacks in /proc/meminfo.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    |  3 +++
 fs/proc/meminfo.c      |  2 ++
 include/linux/mmzone.h |  3 ++-
 kernel/fork.c          | 11 +++++++++++
 mm/page_alloc.c        |  3 +++
 mm/vmstat.c            |  1 +
 6 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91d4087b4039..b560c17f6d4e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
 		       "Node %d Bounce:         %8lu kB\n"
@@ -116,6 +117,8 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, node_page_state(nid, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
 		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
 		       nid, K(node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index d5c410d47fae..1fc588f430e4 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -84,6 +84,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
+		"KernelStack:    %8lu kB\n"
 		"PageTables:     %8lu kB\n"
 #ifdef CONFIG_QUICKLIST
 		"Quicklists:     %8lu kB\n"
@@ -128,6 +129,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
 		K(global_page_state(NR_SLAB_UNRECLAIMABLE)),
+		global_page_state(NR_KERNEL_STACK) * THREAD_SIZE / 1024,
 		K(global_page_state(NR_PAGETABLE)),
 #ifdef CONFIG_QUICKLIST
 		K(quicklist_total_size()),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 889598537370..d9335b8de84a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -94,10 +94,11 @@ enum zone_stat_item {
 	NR_SLAB_RECLAIMABLE,
 	NR_SLAB_UNRECLAIMABLE,
 	NR_PAGETABLE,		/* used for pagetables */
+	NR_KERNEL_STACK,
+	/* Second 128 byte cacheline */
 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
-	/* Second 128 byte cacheline */
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/kernel/fork.c b/kernel/fork.c
index 2cebfb23b0b8..d4638c8cc19e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -136,9 +136,17 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+static void account_kernel_stack(struct thread_info *ti, int account)
+{
+	struct zone *zone = page_zone(virt_to_page(ti));
+
+	mod_zone_page_state(zone, NR_KERNEL_STACK, account);
+}
+
 void free_task(struct task_struct *tsk)
 {
 	prop_local_destroy_single(&tsk->dirties);
+	account_kernel_stack(tsk->stack, -1);
 	free_thread_info(tsk->stack);
 	rt_mutex_debug_task_free(tsk);
 	ftrace_graph_exit_task(tsk);
@@ -253,6 +261,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	tsk->btrace_seq = 0;
 #endif
 	tsk->splice_pipe = NULL;
+
+	account_kernel_stack(ti, 1);
+
 	return tsk;
 
 out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 494c09196c30..4e050f325ebd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2177,6 +2177,7 @@ void show_free_areas(void)
 			" mapped:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
+			" kernel_stack:%lukB"
 			" pagetables:%lukB"
 			" unstable:%lukB"
 			" bounce:%lukB"
@@ -2201,6 +2202,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
+			zone_page_state(zone, NR_KERNEL_STACK) *
+				THREAD_SIZE / 1024,
 			K(zone_page_state(zone, NR_PAGETABLE)),
 			K(zone_page_state(zone, NR_UNSTABLE_NFS)),
 			K(zone_page_state(zone, NR_BOUNCE)),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 138bed53706e..ceda39b63d7e 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -639,6 +639,7 @@ static const char * const vmstat_text[] = {
 	"nr_slab_reclaimable",
 	"nr_slab_unreclaimable",
 	"nr_page_table_pages",
+	"nr_kernel_stack",
 	"nr_unstable",
 	"nr_bounce",
 	"nr_vmscan_write",
-- 
cgit v1.2.3


From 4b02108ac1b3354a22b0d83c684797692efdc395 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:33 -0700
Subject: mm: oom analysis: add shmem vmstat

Recently we encountered OOM problems due to memory use of the GEM cache.
Generally a large amuont of Shmem/Tmpfs pages tend to create a memory
shortage problem.

We often use the following calculation to determine the amount of shmem
pages:

shmem = NR_ACTIVE_ANON + NR_INACTIVE_ANON - NR_ANON_PAGES

however the expression does not consider isolated and mlocked pages.

This patch adds explicit accounting for pages used by shmem and tmpfs.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/node.c    | 2 ++
 fs/proc/meminfo.c      | 2 ++
 include/linux/mmzone.h | 1 +
 mm/filemap.c           | 4 ++++
 mm/migrate.c           | 5 ++++-
 mm/page_alloc.c        | 5 ++++-
 mm/vmstat.c            | 2 +-
 7 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/base/node.c b/drivers/base/node.c
index b560c17f6d4e..1fe5536d404f 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -85,6 +85,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d FilePages:      %8lu kB\n"
 		       "Node %d Mapped:         %8lu kB\n"
 		       "Node %d AnonPages:      %8lu kB\n"
+		       "Node %d Shmem:          %8lu kB\n"
 		       "Node %d KernelStack:    %8lu kB\n"
 		       "Node %d PageTables:     %8lu kB\n"
 		       "Node %d NFS_Unstable:   %8lu kB\n"
@@ -117,6 +118,7 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
 		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
 		       nid, K(node_page_state(nid, NR_ANON_PAGES)),
+		       nid, K(node_page_state(nid, NR_SHMEM)),
 		       nid, node_page_state(nid, NR_KERNEL_STACK) *
 				THREAD_SIZE / 1024,
 		       nid, K(node_page_state(nid, NR_PAGETABLE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 1fc588f430e4..171e052c07b3 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -81,6 +81,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		"Writeback:      %8lu kB\n"
 		"AnonPages:      %8lu kB\n"
 		"Mapped:         %8lu kB\n"
+		"Shmem:          %8lu kB\n"
 		"Slab:           %8lu kB\n"
 		"SReclaimable:   %8lu kB\n"
 		"SUnreclaim:     %8lu kB\n"
@@ -125,6 +126,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 		K(global_page_state(NR_WRITEBACK)),
 		K(global_page_state(NR_ANON_PAGES)),
 		K(global_page_state(NR_FILE_MAPPED)),
+		K(global_page_state(NR_SHMEM)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE) +
 				global_page_state(NR_SLAB_UNRECLAIMABLE)),
 		K(global_page_state(NR_SLAB_RECLAIMABLE)),
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d9335b8de84a..b3583b93b77e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,7 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
 	NUMA_MISS,		/* allocated in non intended node */
diff --git a/mm/filemap.c b/mm/filemap.c
index dd51c68e2b86..bcc7372aebbc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -119,6 +119,8 @@ void __remove_from_page_cache(struct page *page)
 	page->mapping = NULL;
 	mapping->nrpages--;
 	__dec_zone_page_state(page, NR_FILE_PAGES);
+	if (PageSwapBacked(page))
+		__dec_zone_page_state(page, NR_SHMEM);
 	BUG_ON(page_mapped(page));
 
 	/*
@@ -431,6 +433,8 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
 		if (likely(!error)) {
 			mapping->nrpages++;
 			__inc_zone_page_state(page, NR_FILE_PAGES);
+			if (PageSwapBacked(page))
+				__inc_zone_page_state(page, NR_SHMEM);
 			spin_unlock_irq(&mapping->tree_lock);
 		} else {
 			page->mapping = NULL;
diff --git a/mm/migrate.c b/mm/migrate.c
index 0edeac91348d..37143b924484 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -312,7 +312,10 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	 */
 	__dec_zone_page_state(page, NR_FILE_PAGES);
 	__inc_zone_page_state(newpage, NR_FILE_PAGES);
-
+	if (PageSwapBacked(page)) {
+		__dec_zone_page_state(page, NR_SHMEM);
+		__inc_zone_page_state(newpage, NR_SHMEM);
+	}
 	spin_unlock_irq(&mapping->tree_lock);
 
 	return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4e050f325ebd..e50c22545b8f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2139,7 +2139,7 @@ void show_free_areas(void)
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
-		" mapped:%lu pagetables:%lu bounce:%lu\n",
+		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
 		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
@@ -2153,6 +2153,7 @@ void show_free_areas(void)
 		global_page_state(NR_SLAB_RECLAIMABLE),
 		global_page_state(NR_SLAB_UNRECLAIMABLE),
 		global_page_state(NR_FILE_MAPPED),
+		global_page_state(NR_SHMEM),
 		global_page_state(NR_PAGETABLE),
 		global_page_state(NR_BOUNCE));
 
@@ -2175,6 +2176,7 @@ void show_free_areas(void)
 			" dirty:%lukB"
 			" writeback:%lukB"
 			" mapped:%lukB"
+			" shmem:%lukB"
 			" slab_reclaimable:%lukB"
 			" slab_unreclaimable:%lukB"
 			" kernel_stack:%lukB"
@@ -2200,6 +2202,7 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
 			K(zone_page_state(zone, NR_WRITEBACK)),
 			K(zone_page_state(zone, NR_FILE_MAPPED)),
+			K(zone_page_state(zone, NR_SHMEM)),
 			K(zone_page_state(zone, NR_SLAB_RECLAIMABLE)),
 			K(zone_page_state(zone, NR_SLAB_UNRECLAIMABLE)),
 			zone_page_state(zone, NR_KERNEL_STACK) *
diff --git a/mm/vmstat.c b/mm/vmstat.c
index ceda39b63d7e..7214a4511257 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,7 +644,7 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
-
+	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
 	"numa_miss",
-- 
cgit v1.2.3


From a731286de62294b63d8ceb3c5914ac52cc17e690 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:37 -0700
Subject: mm: vmstat: add isolate pages

If the system is running a heavy load of processes then concurrent reclaim
can isolate a large number of pages from the LRU. /proc/vmstat and the
output generated for an OOM do not show how many pages were isolated.

This has been observed during process fork bomb testing (mstctl11 in LTP).

This patch shows the information about isolated pages.

Reproduced via:

-----------------------
% ./hackbench 140 process 1000
   => OOM occur

active_anon:146 inactive_anon:0 isolated_anon:49245
 active_file:79 inactive_file:18 isolated_file:113
 unevictable:0 dirty:0 writeback:0 unstable:0 buffer:39
 free:370 slab_reclaimable:309 slab_unreclaimable:5492
 mapped:53 shmem:15 pagetables:28140 bounce:0

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Wu Fengguang <fengguang.wu@intel.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  2 ++
 mm/migrate.c           | 11 +++++++++++
 mm/page_alloc.c        | 12 +++++++++---
 mm/vmscan.c            | 12 +++++++++++-
 mm/vmstat.c            |  2 ++
 5 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b3583b93b77e..9c50309b30a1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -100,6 +100,8 @@ enum zone_stat_item {
 	NR_BOUNCE,
 	NR_VMSCAN_WRITE,
 	NR_WRITEBACK_TEMP,	/* Writeback using temporary buffers */
+	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
+	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	NR_SHMEM,		/* shmem pages (included tmpfs/GEM pages) */
 #ifdef CONFIG_NUMA
 	NUMA_HIT,		/* allocated in intended node */
diff --git a/mm/migrate.c b/mm/migrate.c
index 37143b924484..b535a2c1656c 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -67,6 +67,8 @@ int putback_lru_pages(struct list_head *l)
 
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -698,6 +700,8 @@ unlock:
  		 * restored.
  		 */
  		list_del(&page->lru);
+		dec_zone_page_state(page, NR_ISOLATED_ANON +
+				    !!page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -742,6 +746,13 @@ int migrate_pages(struct list_head *from,
 	struct page *page2;
 	int swapwrite = current->flags & PF_SWAPWRITE;
 	int rc;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	list_for_each_entry(page, from, lru)
+		__inc_zone_page_state(page, NR_ISOLATED_ANON +
+				      !!page_is_file_cache(page));
+	local_irq_restore(flags);
 
 	if (!swapwrite)
 		current->flags |= PF_SWAPWRITE;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b6d0d09557ef..afda8fd16484 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2134,16 +2134,18 @@ void show_free_areas(void)
 		}
 	}
 
-	printk("Active_anon:%lu active_file:%lu inactive_anon:%lu\n"
-		" inactive_file:%lu"
+	printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
+		" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
 		" unevictable:%lu"
 		" dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
 		" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
 		" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
 		global_page_state(NR_ACTIVE_ANON),
-		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_ANON),
+		global_page_state(NR_ISOLATED_ANON),
+		global_page_state(NR_ACTIVE_FILE),
 		global_page_state(NR_INACTIVE_FILE),
+		global_page_state(NR_ISOLATED_FILE),
 		global_page_state(NR_UNEVICTABLE),
 		global_page_state(NR_FILE_DIRTY),
 		global_page_state(NR_WRITEBACK),
@@ -2171,6 +2173,8 @@ void show_free_areas(void)
 			" active_file:%lukB"
 			" inactive_file:%lukB"
 			" unevictable:%lukB"
+			" isolated(anon):%lukB"
+			" isolated(file):%lukB"
 			" present:%lukB"
 			" mlocked:%lukB"
 			" dirty:%lukB"
@@ -2197,6 +2201,8 @@ void show_free_areas(void)
 			K(zone_page_state(zone, NR_ACTIVE_FILE)),
 			K(zone_page_state(zone, NR_INACTIVE_FILE)),
 			K(zone_page_state(zone, NR_UNEVICTABLE)),
+			K(zone_page_state(zone, NR_ISOLATED_ANON)),
+			K(zone_page_state(zone, NR_ISOLATED_FILE)),
 			K(zone->present_pages),
 			K(zone_page_state(zone, NR_MLOCK)),
 			K(zone_page_state(zone, NR_FILE_DIRTY)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d86a91f8c16b..75c29974e878 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1072,6 +1072,8 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		unsigned long nr_active;
 		unsigned int count[NR_LRU_LISTS] = { 0, };
 		int mode = lumpy_reclaim ? ISOLATE_BOTH : ISOLATE_INACTIVE;
+		unsigned long nr_anon;
+		unsigned long nr_file;
 
 		nr_taken = sc->isolate_pages(sc->swap_cluster_max,
 			     &page_list, &nr_scan, sc->order, mode,
@@ -1102,6 +1104,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 		__mod_zone_page_state(zone, NR_INACTIVE_ANON,
 						-count[LRU_INACTIVE_ANON]);
 
+		nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON];
+		nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE];
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, nr_file);
 
 		reclaim_stat->recent_scanned[0] += count[LRU_INACTIVE_ANON];
 		reclaim_stat->recent_scanned[0] += count[LRU_ACTIVE_ANON];
@@ -1169,6 +1175,9 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
 				spin_lock_irq(&zone->lru_lock);
 			}
 		}
+		__mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon);
+		__mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file);
+
   	} while (nr_scanned < max_scan);
 
 done:
@@ -1279,6 +1288,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		__mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken);
 	else
 		__mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken);
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 
 	while (!list_empty(&l_hold)) {
@@ -1329,7 +1339,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 						LRU_ACTIVE + file * LRU_FILE);
 	move_active_pages_to_lru(zone, &l_inactive,
 						LRU_BASE   + file * LRU_FILE);
-
+	__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&zone->lru_lock);
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7214a4511257..c81321f9feec 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -644,6 +644,8 @@ static const char * const vmstat_text[] = {
 	"nr_bounce",
 	"nr_vmscan_write",
 	"nr_writeback_temp",
+	"nr_isolated_anon",
+	"nr_isolated_file",
 	"nr_shmem",
 #ifdef CONFIG_NUMA
 	"numa_hit",
-- 
cgit v1.2.3


From 5a2ae913f5229d6e1d4a666f0477350789d5128e Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:01:39 -0700
Subject: mm: remove __{add,sub}_zone_page_state()

__add_zone_page_state() and __sub_zone_page_state() are unused.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 81a97cf8f0a0..d7f577f49d16 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -210,11 +210,6 @@ extern void zone_statistics(struct zone *, struct zone *);
 
 #endif /* CONFIG_NUMA */
 
-#define __add_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i, __d)
-#define __sub_zone_page_state(__z, __i, __d)	\
-		__mod_zone_page_state(__z, __i,-(__d))
-
 #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
 #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
 
-- 
cgit v1.2.3


From adea02a1bea71a508da32c04d715485a1fe62029 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:01:42 -0700
Subject: mm: count only reclaimable lru pages

global_lru_pages() / zone_lru_pages() can be used in two ways:
- to estimate max reclaimable pages in determine_dirtyable_memory()
- to calculate the slab scan ratio

When swap is full or not present, the anon lru lists are not reclaimable
and also won't be scanned.  So the anon pages shall not be counted in both
usage scenarios.  Also rename to _reclaimable_pages: now they are counting
the possibly reclaimable lru pages.

It can greatly (and correctly) increase the slab scan rate under high
memory pressure (when most file pages have been reclaimed and swap is
full/absent), thus reduce false OOM kills.

Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Howells <dhowells@redhat.com>
Cc: "Li, Ming Chun" <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 11 ++---------
 mm/page-writeback.c    |  5 +++--
 mm/vmscan.c            | 50 +++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 44 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d7f577f49d16..2d0f222388a8 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -166,15 +166,8 @@ static inline unsigned long zone_page_state(struct zone *zone,
 	return x;
 }
 
-extern unsigned long global_lru_pages(void);
-
-static inline unsigned long zone_lru_pages(struct zone *zone)
-{
-	return (zone_page_state(zone, NR_ACTIVE_ANON)
-		+ zone_page_state(zone, NR_ACTIVE_FILE)
-		+ zone_page_state(zone, NR_INACTIVE_ANON)
-		+ zone_page_state(zone, NR_INACTIVE_FILE));
-}
+extern unsigned long global_reclaimable_pages(void);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d1ba46441053..5f378dd58802 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -380,7 +380,8 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
 		struct zone *z =
 			&NODE_DATA(node)->node_zones[ZONE_HIGHMEM];
 
-		x += zone_page_state(z, NR_FREE_PAGES) + zone_lru_pages(z);
+		x += zone_page_state(z, NR_FREE_PAGES) +
+		     zone_reclaimable_pages(z);
 	}
 	/*
 	 * Make sure that the number of highmem pages is never larger
@@ -404,7 +405,7 @@ unsigned long determine_dirtyable_memory(void)
 {
 	unsigned long x;
 
-	x = global_page_state(NR_FREE_PAGES) + global_lru_pages();
+	x = global_page_state(NR_FREE_PAGES) + global_reclaimable_pages();
 
 	if (!vm_highmem_is_dirtyable)
 		x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f90b76086ffa..208071c48bf2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1734,7 +1734,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 				continue;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 	}
 
@@ -1951,7 +1951,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 
-			lru_pages += zone_lru_pages(zone);
+			lru_pages += zone_reclaimable_pages(zone);
 		}
 
 		/*
@@ -1995,7 +1995,7 @@ loop_again:
 			if (zone_is_all_unreclaimable(zone))
 				continue;
 			if (nr_slab == 0 && zone->pages_scanned >=
-						(zone_lru_pages(zone) * 6))
+					(zone_reclaimable_pages(zone) * 6))
 					zone_set_flag(zone,
 						      ZONE_ALL_UNRECLAIMABLE);
 			/*
@@ -2162,12 +2162,39 @@ void wakeup_kswapd(struct zone *zone, int order)
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
-unsigned long global_lru_pages(void)
+/*
+ * The reclaimable count would be mostly accurate.
+ * The less reclaimable pages may be
+ * - mlocked pages, which will be moved to unevictable list when encountered
+ * - mapped pages, which may require several travels to be reclaimed
+ * - dirty pages, which is not "instantly" reclaimable
+ */
+unsigned long global_reclaimable_pages(void)
 {
-	return global_page_state(NR_ACTIVE_ANON)
-		+ global_page_state(NR_ACTIVE_FILE)
-		+ global_page_state(NR_INACTIVE_ANON)
-		+ global_page_state(NR_INACTIVE_FILE);
+	int nr;
+
+	nr = global_page_state(NR_ACTIVE_FILE) +
+	     global_page_state(NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += global_page_state(NR_ACTIVE_ANON) +
+		      global_page_state(NR_INACTIVE_ANON);
+
+	return nr;
+}
+
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+	int nr;
+
+	nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+	     zone_page_state(zone, NR_INACTIVE_FILE);
+
+	if (nr_swap_pages > 0)
+		nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+		      zone_page_state(zone, NR_INACTIVE_ANON);
+
+	return nr;
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2239,7 +2266,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 	current->reclaim_state = &reclaim_state;
 
-	lru_pages = global_lru_pages();
+	lru_pages = global_reclaimable_pages();
 	nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
 	/* If slab caches are huge, it's better to hit them first */
 	while (nr_slab >= lru_pages) {
@@ -2281,7 +2308,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 
 			reclaim_state.reclaimed_slab = 0;
 			shrink_slab(sc.nr_scanned, sc.gfp_mask,
-					global_lru_pages());
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 			if (sc.nr_reclaimed >= nr_pages)
 				goto out;
@@ -2298,7 +2325,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 	if (!sc.nr_reclaimed) {
 		do {
 			reclaim_state.reclaimed_slab = 0;
-			shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
+			shrink_slab(nr_pages, sc.gfp_mask,
+				    global_reclaimable_pages());
 			sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 		} while (sc.nr_reclaimed < nr_pages &&
 				reclaim_state.reclaimed_slab > 0);
-- 
cgit v1.2.3


From 451ea25da71590361c71bf3044c55b870a887d53 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:01:48 -0700
Subject: mm: perform non-atomic test-clear of PG_mlocked on free

By the time PG_mlocked is cleared in the page freeing path, nobody else is
looking at our page->flags anymore.

It is thus safe to make the test-and-clear non-atomic and thereby removing
an unnecessary and expensive operation from a hotpath.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 12 +++++++++---
 mm/page_alloc.c            |  4 ++--
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 2b87acfc5f87..d07c0bb2203a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -158,6 +158,9 @@ static inline int TestSetPage##uname(struct page *page)			\
 static inline int TestClearPage##uname(struct page *page)		\
 		{ return test_and_clear_bit(PG_##lname, &page->flags); }
 
+#define __TESTCLEARFLAG(uname, lname)					\
+static inline int __TestClearPage##uname(struct page *page)		\
+		{ return __test_and_clear_bit(PG_##lname, &page->flags); }
 
 #define PAGEFLAG(uname, lname) TESTPAGEFLAG(uname, lname)		\
 	SETPAGEFLAG(uname, lname) CLEARPAGEFLAG(uname, lname)
@@ -184,6 +187,9 @@ static inline void __ClearPage##uname(struct page *page) {  }
 #define TESTCLEARFLAG_FALSE(uname)					\
 static inline int TestClearPage##uname(struct page *page) { return 0; }
 
+#define __TESTCLEARFLAG_FALSE(uname)					\
+static inline int __TestClearPage##uname(struct page *page) { return 0; }
+
 struct page;	/* forward declaration */
 
 TESTPAGEFLAG(Locked, locked) TESTSETFLAG(Locked, locked)
@@ -250,11 +256,11 @@ PAGEFLAG(Unevictable, unevictable) __CLEARPAGEFLAG(Unevictable, unevictable)
 #ifdef CONFIG_HAVE_MLOCKED_PAGE_BIT
 #define MLOCK_PAGES 1
 PAGEFLAG(Mlocked, mlocked) __CLEARPAGEFLAG(Mlocked, mlocked)
-	TESTSCFLAG(Mlocked, mlocked)
+	TESTSCFLAG(Mlocked, mlocked) __TESTCLEARFLAG(Mlocked, mlocked)
 #else
 #define MLOCK_PAGES 0
-PAGEFLAG_FALSE(Mlocked)
-	SETPAGEFLAG_NOOP(Mlocked) TESTCLEARFLAG_FALSE(Mlocked)
+PAGEFLAG_FALSE(Mlocked) SETPAGEFLAG_NOOP(Mlocked)
+	TESTCLEARFLAG_FALSE(Mlocked) __TESTCLEARFLAG_FALSE(Mlocked)
 #endif
 
 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 81926c7ef6f0..9242d13f4ff3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -557,7 +557,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	unsigned long flags;
 	int i;
 	int bad = 0;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, order);
 
@@ -1026,7 +1026,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
-	int wasMlocked = TestClearPageMlocked(page);
+	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
 
-- 
cgit v1.2.3


From 828502d30073036a486d96b1fe051e0f08b6df83 Mon Sep 17 00:00:00 2001
From: Izik Eidus <ieidus@redhat.com>
Date: Mon, 21 Sep 2009 17:01:51 -0700
Subject: ksm: add mmu_notifier set_pte_at_notify()

KSM is a linux driver that allows dynamicly sharing identical memory pages
between one or more processes.

Unlike tradtional page sharing that is made at the allocation of the
memory, ksm do it dynamicly after the memory was created.  Memory is
periodically scanned; identical pages are identified and merged.

The sharing is made in a transparent way to the processes that use it.

Ksm is highly important for hypervisors (kvm), where in production
enviorments there might be many copys of the same data data among the host
memory.  This kind of data can be: similar kernels, librarys, cache, and
so on.

Even that ksm was wrote for kvm, any userspace application that want to
use it to share its data can try it.

Ksm may be useful for any application that might have similar (page
aligment) data strctures among the memory, ksm will find this data merge
it to one copy, and even if it will be changed and thereforew copy on
writed, ksm will merge it again as soon as it will be identical again.

Another reason to consider using ksm is the fact that it might simplify
alot the userspace code of application that want to use shared private
data, instead that the application will mange shared area, ksm will do
this for the application, and even write to this data will be allowed
without any synchinization acts from the application.

Ksm was designed to be a loadable module that doesn't change the VM code
of linux.

This patch:

The set_pte_at_notify() macro allows setting a pte in the shadow page
table directly, instead of flushing the shadow page table entry and then
getting vmexit to set it.  It uses a new change_pte() callback to do so.

set_pte_at_notify() is an optimization for kvm, and other users of
mmu_notifiers, for COW pages.  It is useful for kvm when ksm is used,
because it allows kvm not to have to receive vmexit and only then map the
ksm page into the shadow page table, but instead map it directly at the
same time as Linux maps the page into the host page table.

Users of mmu_notifiers who don't implement new mmu_notifier_change_pte()
callback will just receive the mmu_notifier_invalidate_page() callback.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 34 ++++++++++++++++++++++++++++++++++
 mm/memory.c                  |  9 +++++++--
 mm/mmu_notifier.c            | 20 ++++++++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index b77486d152cd..4e02ee2b071e 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -61,6 +61,15 @@ struct mmu_notifier_ops {
 				 struct mm_struct *mm,
 				 unsigned long address);
 
+	/*
+	 * change_pte is called in cases that pte mapping to page is changed:
+	 * for example, when ksm remaps pte to point to a new shared page.
+	 */
+	void (*change_pte)(struct mmu_notifier *mn,
+			   struct mm_struct *mm,
+			   unsigned long address,
+			   pte_t pte);
+
 	/*
 	 * Before this is invoked any secondary MMU is still ok to
 	 * read/write to the page previously pointed to by the Linux
@@ -154,6 +163,8 @@ extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 					  unsigned long address);
+extern void __mmu_notifier_change_pte(struct mm_struct *mm,
+				      unsigned long address, pte_t pte);
 extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address);
 extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
@@ -175,6 +186,13 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_change_pte(mm, address, pte);
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -236,6 +254,16 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__young;							\
 })
 
+#define set_pte_at_notify(__mm, __address, __ptep, __pte)		\
+({									\
+	struct mm_struct *___mm = __mm;					\
+	unsigned long ___address = __address;				\
+	pte_t ___pte = __pte;						\
+									\
+	set_pte_at(___mm, ___address, __ptep, ___pte);			\
+	mmu_notifier_change_pte(___mm, ___address, ___pte);		\
+})
+
 #else /* CONFIG_MMU_NOTIFIER */
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
@@ -248,6 +276,11 @@ static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return 0;
 }
 
+static inline void mmu_notifier_change_pte(struct mm_struct *mm,
+					   unsigned long address, pte_t pte)
+{
+}
+
 static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
@@ -273,6 +306,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define ptep_clear_flush_notify ptep_clear_flush
+#define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
 
diff --git a/mm/memory.c b/mm/memory.c
index e8f63d9961ea..368561f32009 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2115,9 +2115,14 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush_notify(vma, address, page_table);
+		ptep_clear_flush(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
-		set_pte_at(mm, address, page_table, entry);
+		/*
+		 * We call the notify macro here because, when using secondary
+		 * mmu page tables (such as kvm shadow page tables), we want the
+		 * new page to be mapped directly into the secondary page table.
+		 */
+		set_pte_at_notify(mm, address, page_table, entry);
 		update_mmu_cache(vma, address, entry);
 		if (old_page) {
 			/*
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 5f4ef0250bee..7e33f2cb3c77 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -99,6 +99,26 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	return young;
 }
 
+void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
+			       pte_t pte)
+{
+	struct mmu_notifier *mn;
+	struct hlist_node *n;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->change_pte)
+			mn->ops->change_pte(mn, mm, address, pte);
+		/*
+		 * Some drivers don't have change_pte,
+		 * so we must call invalidate_page in that case.
+		 */
+		else if (mn->ops->invalidate_page)
+			mn->ops->invalidate_page(mn, mm, address);
+	}
+	rcu_read_unlock();
+}
+
 void __mmu_notifier_invalidate_page(struct mm_struct *mm,
 					  unsigned long address)
 {
-- 
cgit v1.2.3


From d19f352484467a5e518639ddff0554669c10ffab Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:53 -0700
Subject: ksm: define MADV_MERGEABLE and MADV_UNMERGEABLE

The out-of-tree KSM used ioctls on fds cloned from /dev/ksm to register a
memory area for merging: we prefer now to use an madvise(2) interface.

This patch just defines MADV_MERGEABLE (to tell KSM it may merge pages in
this area found identical to pages in other mergeable areas) and
MADV_UNMERGEABLE (to undo that).

Most architectures use asm-generic, but alpha, mips, parisc, xtensa need
their own definitions: included here for mmotm convenience, but we'll
probably want to split this and feed pieces to arch maintainers.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Helge Deller <deller@gmx.de>
Cc: Chris Zankel <chris@zankel.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h     | 3 +++
 arch/mips/include/asm/mman.h      | 3 +++
 arch/parisc/include/asm/mman.h    | 3 +++
 arch/xtensa/include/asm/mman.h    | 3 +++
 include/asm-generic/mman-common.h | 3 +++
 5 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index 90d7c35d2867..c77c55756a7c 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -48,6 +48,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index e4d6f1fb1cf7..f15554d1518a 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -71,6 +71,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index defe752cc996..a12d9d43f507 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -54,6 +54,9 @@
 #define MADV_16M_PAGES  24              /* Use 16 Megabyte pages */
 #define MADV_64M_PAGES  26              /* Use 64 Megabyte pages */
 
+#define MADV_MERGEABLE   65		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 #define MAP_VARIABLE	0
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 9b92620c8a1e..6e55b4d1f9c5 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -78,6 +78,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
diff --git a/include/asm-generic/mman-common.h b/include/asm-generic/mman-common.h
index 3b69ad34189a..dd63bd38864b 100644
--- a/include/asm-generic/mman-common.h
+++ b/include/asm-generic/mman-common.h
@@ -35,6 +35,9 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
 /* compatibility flags */
 #define MAP_FILE	0
 
-- 
cgit v1.2.3


From f8af4da3b4c14e7267c4ffb952079af3912c51c5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:57 -0700
Subject: ksm: the mm interface to ksm

This patch presents the mm interface to a dummy version of ksm.c, for
better scrutiny of that interface: the real ksm.c follows later.

When CONFIG_KSM is not set, madvise(2) reject MADV_MERGEABLE and
MADV_UNMERGEABLE with EINVAL, since that seems more helpful than
pretending that they can be serviced.  But when CONFIG_KSM=y, accept them
even if KSM is not currently running, and even on areas which KSM will not
touch (e.g.  hugetlb or shared file or special driver mappings).

Like other madvices, report ENOMEM despite success if any area in the
range is unmapped, and use EAGAIN to report out of memory.

Define vma flag VM_MERGEABLE to identify an area on which KSM may try
merging pages: leave it to ksm_madvise() to decide whether to set it.
Define mm flag MMF_VM_MERGEABLE to identify an mm which might contain
VM_MERGEABLE areas, to minimize callouts when forking or exiting.

Based upon earlier patches by Chris Wright and Izik Eidus.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h   | 50 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h    |  1 +
 include/linux/sched.h |  7 +++++++
 kernel/fork.c         |  8 +++++++-
 mm/Kconfig            | 11 ++++++++++
 mm/Makefile           |  1 +
 mm/ksm.c              | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/madvise.c          | 14 +++++++++++++
 8 files changed, 147 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/ksm.h
 create mode 100644 mm/ksm.c

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
new file mode 100644
index 000000000000..eb2a448981ee
--- /dev/null
+++ b/include/linux/ksm.h
@@ -0,0 +1,50 @@
+#ifndef __LINUX_KSM_H
+#define __LINUX_KSM_H
+/*
+ * Memory merging support.
+ *
+ * This code enables dynamic sharing of identical pages found in different
+ * memory areas, even if they are not shared by fork().
+ */
+
+#include <linux/bitops.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_KSM
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags);
+int __ksm_enter(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm);
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+		return __ksm_enter(mm);
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
+		__ksm_exit(mm);
+}
+#else  /* !CONFIG_KSM */
+
+static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	return 0;
+}
+
+static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm)
+{
+}
+#endif /* !CONFIG_KSM */
+
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d3c8ae7c8015..d808cf832c4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -103,6 +103,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
 #define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fe351c3914a..8f3e63cb33a6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -434,7 +434,9 @@ extern int get_dumpable(struct mm_struct *mm);
 /* dumpable bits */
 #define MMF_DUMPABLE      0  /* core dump is permitted */
 #define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
+
 #define MMF_DUMPABLE_BITS 2
+#define MMF_DUMPABLE_MASK ((1 << MMF_DUMPABLE_BITS) - 1)
 
 /* coredump filter bits */
 #define MMF_DUMP_ANON_PRIVATE	2
@@ -444,6 +446,7 @@ extern int get_dumpable(struct mm_struct *mm);
 #define MMF_DUMP_ELF_HEADERS	6
 #define MMF_DUMP_HUGETLB_PRIVATE 7
 #define MMF_DUMP_HUGETLB_SHARED  8
+
 #define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
 #define MMF_DUMP_FILTER_BITS	7
 #define MMF_DUMP_FILTER_MASK \
@@ -457,6 +460,10 @@ extern int get_dumpable(struct mm_struct *mm);
 #else
 # define MMF_DUMP_MASK_DEFAULT_ELF	0
 #endif
+					/* leave room for more dump flags */
+#define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
+
+#define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
 struct sighand_struct {
 	atomic_t		count;
diff --git a/kernel/fork.c b/kernel/fork.c
index d4638c8cc19e..73a442b7be6d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -49,6 +49,7 @@
 #include <linux/ftrace.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/acct.h>
 #include <linux/tsacct_kern.h>
 #include <linux/cn_proc.h>
@@ -299,6 +300,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	rb_link = &mm->mm_rb.rb_node;
 	rb_parent = NULL;
 	pprev = &mm->mmap;
+	retval = ksm_fork(mm, oldmm);
+	if (retval)
+		goto out;
 
 	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
 		struct file *file;
@@ -435,7 +439,8 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	atomic_set(&mm->mm_count, 1);
 	init_rwsem(&mm->mmap_sem);
 	INIT_LIST_HEAD(&mm->mmlist);
-	mm->flags = (current->mm) ? current->mm->flags : default_dump_filter;
+	mm->flags = (current->mm) ?
+		(current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
 	mm->core_state = NULL;
 	mm->nr_ptes = 0;
 	set_mm_counter(mm, file_rss, 0);
@@ -496,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/Kconfig b/mm/Kconfig
index 3aa519f52e18..c0b6afa178a1 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -214,6 +214,17 @@ config HAVE_MLOCKED_PAGE_BIT
 config MMU_NOTIFIER
 	bool
 
+config KSM
+	bool "Enable KSM for page merging"
+	depends on MMU
+	help
+	  Enable Kernel Samepage Merging: KSM periodically scans those areas
+	  of an application's address space that an app has advised may be
+	  mergeable.  When it finds pages of identical content, it replaces
+	  the many instances by a single resident page with that content, so
+	  saving memory until one or another app needs to modify the content.
+	  Recommended for use with KVM, or with other duplicative applications.
+
 config DEFAULT_MMAP_MIN_ADDR
         int "Low address space to protect from user allocation"
         default 4096
diff --git a/mm/Makefile b/mm/Makefile
index ea4b18bd3960..a63bf59a0c77 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_SPARSEMEM_VMEMMAP) += sparse-vmemmap.o
 obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+obj-$(CONFIG_KSM) += ksm.o
 obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
diff --git a/mm/ksm.c b/mm/ksm.c
new file mode 100644
index 000000000000..8b76008fcd32
--- /dev/null
+++ b/mm/ksm.c
@@ -0,0 +1,56 @@
+/*
+ * Initial dummy version just to illustrate KSM's interface to other files.
+ */
+
+#include <linux/errno.h>
+#include <linux/mman.h>
+#include <linux/ksm.h>
+
+int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
+		unsigned long end, int advice, unsigned long *vm_flags)
+{
+	struct mm_struct *mm = vma->vm_mm;
+
+	switch (advice) {
+	case MADV_MERGEABLE:
+		/*
+		 * Be somewhat over-protective for now!
+		 */
+		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
+				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
+				 VM_RESERVED  | VM_HUGETLB | VM_INSERTPAGE |
+				 VM_MIXEDMAP  | VM_SAO))
+			return 0;		/* just ignore the advice */
+
+		if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
+			if (__ksm_enter(mm) < 0)
+				return -EAGAIN;
+
+		*vm_flags |= VM_MERGEABLE;
+		break;
+
+	case MADV_UNMERGEABLE:
+		if (!(*vm_flags & VM_MERGEABLE))
+			return 0;		/* just ignore the advice */
+
+		/* Unmerge any merged pages here */
+
+		*vm_flags &= ~VM_MERGEABLE;
+		break;
+	}
+
+	return 0;
+}
+
+int __ksm_enter(struct mm_struct *mm)
+{
+	/* Allocate a structure to track mm and link it into KSM's list */
+	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	return 0;
+}
+
+void __ksm_exit(struct mm_struct *mm)
+{
+	/* Unlink and free all KSM's structures which track this mm */
+	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+}
diff --git a/mm/madvise.c b/mm/madvise.c
index 66c31264f062..d9ae2067952e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -11,6 +11,7 @@
 #include <linux/mempolicy.h>
 #include <linux/hugetlb.h>
 #include <linux/sched.h>
+#include <linux/ksm.h>
 
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
@@ -63,6 +64,12 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		}
 		new_flags &= ~VM_DONTCOPY;
 		break;
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+		error = ksm_madvise(vma, start, end, behavior, &new_flags);
+		if (error)
+			goto out;
+		break;
 	}
 
 	if (new_flags == vma->vm_flags) {
@@ -239,6 +246,10 @@ madvise_behavior_valid(int behavior)
 	case MADV_REMOVE:
 	case MADV_WILLNEED:
 	case MADV_DONTNEED:
+#ifdef CONFIG_KSM
+	case MADV_MERGEABLE:
+	case MADV_UNMERGEABLE:
+#endif
 		return 1;
 
 	default:
@@ -273,6 +284,9 @@ madvise_behavior_valid(int behavior)
  *  MADV_DONTFORK - omit this area from child's address space when forking:
  *		typically, to avoid COWing pages pinned by get_user_pages().
  *  MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ *  MADV_MERGEABLE - the application recommends that KSM try to merge pages in
+ *		this area with pages of identical content from other such areas.
+ *  MADV_UNMERGEABLE- cancel MADV_MERGEABLE: no longer merge pages with others.
  *
  * return values:
  *  zero    - success
-- 
cgit v1.2.3


From 21333b2b66b805a360641568588e5a0bb06d9d1f Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:01:59 -0700
Subject: ksm: no debug in page_dup_rmap()

page_dup_rmap(), used on each mapped page when forking, was originally
just an inline atomic_inc of mapcount.  2.6.22 added CONFIG_DEBUG_VM
out-of-line checks to it, which would need to be ever-so-slightly
complicated to allow for the PageKsm() we're about to define.

But I think these checks never caught anything.  And if it's coding errors
we're worried about, such checks should be in page_remove_rmap() too, not
just when forking; whereas if it's pagetable corruption we're worried
about, then they shouldn't be limited to CONFIG_DEBUG_VM.

Oh, just revert page_dup_rmap() to an inline atomic_inc of mapcount.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h |  6 +-----
 mm/memory.c          |  2 +-
 mm/rmap.c            | 21 ---------------------
 3 files changed, 2 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bf116d0dbf23..477841d29fce 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -71,14 +71,10 @@ void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned lon
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
-#ifdef CONFIG_DEBUG_VM
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address);
-#else
-static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
+static inline void page_dup_rmap(struct page *page)
 {
 	atomic_inc(&page->_mapcount);
 }
-#endif
 
 /*
  * Called from mm/vmscan.c to handle paging out
diff --git a/mm/memory.c b/mm/memory.c
index 368561f32009..7a61a11f1867 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -597,7 +597,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	page = vm_normal_page(vma, addr, pte);
 	if (page) {
 		get_page(page);
-		page_dup_rmap(page, vma, addr);
+		page_dup_rmap(page);
 		rss[!!PageAnon(page)]++;
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 1406e67f9613..720fc03a7bc4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -710,27 +710,6 @@ void page_add_file_rmap(struct page *page)
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-/**
- * page_dup_rmap - duplicate pte mapping to a page
- * @page:	the page to add the mapping to
- * @vma:	the vm area being duplicated
- * @address:	the user virtual address mapped
- *
- * For copy_page_range only: minimal extract from page_add_file_rmap /
- * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
- * quicker.
- *
- * The caller needs to hold the pte lock.
- */
-void page_dup_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address)
-{
-	if (PageAnon(page))
-		__page_check_anon_rmap(page, vma, address);
-	atomic_inc(&page->_mapcount);
-}
-#endif
-
 /**
  * page_remove_rmap - take down pte mapping from a page
  * @page: page to remove mapping from
-- 
cgit v1.2.3


From 9a840895147b12de5cdd633c600b38686840ee53 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:01 -0700
Subject: ksm: identify PageKsm pages

KSM will need to identify its kernel merged pages unambiguously, and
/proc/kpageflags will probably like to do so too.

Since KSM will only be substituting anonymous pages, statistics are best
preserved by making a PageKsm page a special PageAnon page: one with no
anon_vma.

But KSM then needs its own page_add_ksm_rmap() - keep it in ksm.h near
PageKsm; and do_wp_page() must COW them, unlike singly mapped PageAnons.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Chris Wright <chrisw@redhat.com>
Signed-off-by: Izik Eidus <ieidus@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/page.c      |  5 +++++
 include/linux/ksm.h | 29 +++++++++++++++++++++++++++++
 mm/memory.c         |  3 ++-
 3 files changed, 36 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 2707c6c7a20f..2281c2cbfe2b 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -2,6 +2,7 @@
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/ksm.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
 #include <linux/proc_fs.h>
@@ -95,6 +96,8 @@ static const struct file_operations proc_kpagecount_operations = {
 #define KPF_UNEVICTABLE		18
 #define KPF_NOPAGE		20
 
+#define KPF_KSM			21
+
 /* kernel hacking assistances
  * WARNING: subject to change, never rely on them!
  */
@@ -137,6 +140,8 @@ static u64 get_uflags(struct page *page)
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
+	if (PageKsm(page))
+		u |= 1 << KPF_KSM;
 
 	/*
 	 * compound pages: export both head/tail info
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index eb2a448981ee..a485c14ecd5d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -10,6 +10,7 @@
 #include <linux/bitops.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
+#include <linux/vmstat.h>
 
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -29,6 +30,27 @@ static inline void ksm_exit(struct mm_struct *mm)
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
 		__ksm_exit(mm);
 }
+
+/*
+ * A KSM page is one of those write-protected "shared pages" or "merged pages"
+ * which KSM maps into multiple mms, wherever identical anonymous page content
+ * is found in VM_MERGEABLE vmas.  It's a PageAnon page, with NULL anon_vma.
+ */
+static inline int PageKsm(struct page *page)
+{
+	return ((unsigned long)page->mapping == PAGE_MAPPING_ANON);
+}
+
+/*
+ * But we have to avoid the checking which page_add_anon_rmap() performs.
+ */
+static inline void page_add_ksm_rmap(struct page *page)
+{
+	if (atomic_inc_and_test(&page->_mapcount)) {
+		page->mapping = (void *) PAGE_MAPPING_ANON;
+		__inc_zone_page_state(page, NR_ANON_PAGES);
+	}
+}
 #else  /* !CONFIG_KSM */
 
 static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
@@ -45,6 +67,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
+
+static inline int PageKsm(struct page *page)
+{
+	return 0;
+}
+
+/* No stub required for page_add_ksm_rmap(page) */
 #endif /* !CONFIG_KSM */
 
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 7a61a11f1867..1a435b81876c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -45,6 +45,7 @@
 #include <linux/swap.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/ksm.h>
 #include <linux/rmap.h>
 #include <linux/module.h>
 #include <linux/delayacct.h>
@@ -1974,7 +1975,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * Take out anonymous pages first, anonymous shared vmas are
 	 * not dirty accountable.
 	 */
-	if (PageAnon(old_page)) {
+	if (PageAnon(old_page) && !PageKsm(old_page)) {
 		if (!trylock_page(old_page)) {
 			page_cache_get(old_page);
 			pte_unmap_unlock(page_table, ptl);
-- 
cgit v1.2.3


From 9ba6929480088a85c1ff60a4b1f1c9fc80dbd2b7 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:20 -0700
Subject: ksm: fix oom deadlock

There's a now-obvious deadlock in KSM's out-of-memory handling:
imagine ksmd or KSM_RUN_UNMERGE handling, holding ksm_thread_mutex,
trying to allocate a page to break KSM in an mm which becomes the
OOM victim (quite likely in the unmerge case): it's killed and goes
to exit, and hangs there waiting to acquire ksm_thread_mutex.

Clearly we must not require ksm_thread_mutex in __ksm_exit, simple
though that made everything else: perhaps use mmap_sem somehow?
And part of the answer lies in the comments on unmerge_ksm_pages:
__ksm_exit should also leave all the rmap_item removal to ksmd.

But there's a fundamental problem, that KSM relies upon mmap_sem to
guarantee the consistency of the mm it's dealing with, yet exit_mmap
tears down an mm without taking mmap_sem.  And bumping mm_users won't
help at all, that just ensures that the pages the OOM killer assumes
are on their way to being freed will not be freed.

The best answer seems to be, to move the ksm_exit callout from just
before exit_mmap, to the middle of exit_mmap: after the mm's pages
have been freed (if the mmu_gather is flushed), but before its page
tables and vma structures have been freed; and down_write,up_write
mmap_sem there to serialize with KSM's own reliance on mmap_sem.

But KSM then needs to be careful, whenever it downs mmap_sem, to
check that the mm is not already exiting: there's a danger of using
find_vma on a layout that's being torn apart, or writing into page
tables which have been freed for reuse; and even do_anonymous_page
and __do_fault need to check they're not being called by break_ksm
to reinstate a pte after zap_pte_range has zapped that page table.

Though it might be clearer to add an exiting flag, set while holding
mmap_sem in __ksm_exit, that wouldn't cover the issue of reinstating
a zapped pte.  All we need is to check whether mm_users is 0 - but
must remember that ksmd may detect that before __ksm_exit is reached.
So, ksm_test_exit(mm) added to comment such checks on mm->mm_users.

__ksm_exit now has to leave clearing up the rmap_items to ksmd,
that needs ksm_thread_mutex; but shift the exiting mm just after the
ksm_scan cursor so that it will soon be dealt with.  __ksm_enter raise
mm_count to hold the mm_struct, ksmd's exit processing (exactly like
its processing when it finds all VM_MERGEABLEs unmapped) mmdrop it,
similar procedure for KSM_RUN_UNMERGE (which has stopped ksmd).

But also give __ksm_exit a fast path: when there's no complication
(no rmap_items attached to mm and it's not at the ksm_scan cursor),
it can safely do all the exiting work itself.  This is not just an
optimization: when ksmd is not running, the raised mm_count would
otherwise leak mm_structs.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h |  31 +++++++++--
 kernel/fork.c       |   1 -
 mm/ksm.c            | 144 +++++++++++++++++++++++++++++++++++-----------------
 mm/memory.c         |   5 +-
 mm/mmap.c           |   9 ++++
 5 files changed, 137 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index a485c14ecd5d..2d64ff30c0de 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,11 +12,14 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
+struct mmu_gather;
+
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm);
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -25,10 +28,24 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+/*
+ * For KSM to handle OOM without deadlock when it's breaking COW in a
+ * likely victim of the OOM killer, exit_mmap() has to serialize with
+ * ksm_exit() after freeing mm's pages but before freeing its page tables.
+ * That leaves a window in which KSM might refault pages which have just
+ * been finally unmapped: guard against that with ksm_test_exit(), and
+ * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm);
+		__ksm_exit(mm, tlbp, end);
 }
 
 /*
@@ -64,7 +81,13 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm)
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void ksm_exit(struct mm_struct *mm,
+			    struct mmu_gather **tlbp, unsigned long end)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b7be6d..42f20f565b16 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,7 +501,6 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
-		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 7e4d255dadc0..722e3f2a8dc5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,6 +32,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -347,6 +348,8 @@ static void break_cow(struct mm_struct *mm, unsigned long addr)
 	struct vm_area_struct *vma;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -365,6 +368,8 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
 	struct page *page;
 
 	down_read(&mm->mmap_sem);
+	if (ksm_test_exit(mm))
+		goto out;
 	vma = find_vma(mm, addr);
 	if (!vma || vma->vm_start > addr)
 		goto out;
@@ -439,11 +444,11 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
 	} else if (rmap_item->address & NODE_FLAG) {
 		unsigned char age;
 		/*
-		 * ksm_thread can and must skip the rb_erase, because
+		 * Usually ksmd can and must skip the rb_erase, because
 		 * root_unstable_tree was already reset to RB_ROOT.
-		 * But __ksm_exit has to be careful: do the rb_erase
-		 * if it's interrupting a scan, and this rmap_item was
-		 * inserted by this scan rather than left from before.
+		 * But be careful when an mm is exiting: do the rb_erase
+		 * if this rmap_item was inserted by this scan, rather
+		 * than left over from before.
 		 */
 		age = (unsigned char)(ksm_scan.seqnr - rmap_item->address);
 		BUG_ON(age > 1);
@@ -491,6 +496,8 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
 	int err = 0;
 
 	for (addr = start; addr < end && !err; addr += PAGE_SIZE) {
+		if (ksm_test_exit(vma->vm_mm))
+			break;
 		if (signal_pending(current))
 			err = -ERESTARTSYS;
 		else
@@ -507,34 +514,50 @@ static int unmerge_and_remove_all_rmap_items(void)
 	int err = 0;
 
 	spin_lock(&ksm_mmlist_lock);
-	mm_slot = list_entry(ksm_mm_head.mm_list.next,
+	ksm_scan.mm_slot = list_entry(ksm_mm_head.mm_list.next,
 						struct mm_slot, mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	while (mm_slot != &ksm_mm_head) {
+	for (mm_slot = ksm_scan.mm_slot;
+			mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) {
 		mm = mm_slot->mm;
 		down_read(&mm->mmap_sem);
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			if (ksm_test_exit(mm))
+				break;
 			if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
 				continue;
 			err = unmerge_ksm_pages(vma,
 						vma->vm_start, vma->vm_end);
-			if (err) {
-				up_read(&mm->mmap_sem);
-				goto out;
-			}
+			if (err)
+				goto error;
 		}
+
 		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		up_read(&mm->mmap_sem);
 
 		spin_lock(&ksm_mmlist_lock);
-		mm_slot = list_entry(mm_slot->mm_list.next,
+		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
 						struct mm_slot, mm_list);
-		spin_unlock(&ksm_mmlist_lock);
+		if (ksm_test_exit(mm)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			spin_unlock(&ksm_mmlist_lock);
+
+			free_mm_slot(mm_slot);
+			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+			up_read(&mm->mmap_sem);
+			mmdrop(mm);
+		} else {
+			spin_unlock(&ksm_mmlist_lock);
+			up_read(&mm->mmap_sem);
+		}
 	}
 
 	ksm_scan.seqnr = 0;
-out:
+	return 0;
+
+error:
+	up_read(&mm->mmap_sem);
 	spin_lock(&ksm_mmlist_lock);
 	ksm_scan.mm_slot = &ksm_mm_head;
 	spin_unlock(&ksm_mmlist_lock);
@@ -755,6 +778,9 @@ static int try_to_merge_with_ksm_page(struct mm_struct *mm1,
 	int err = -EFAULT;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1))
+		goto out;
+
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1)
 		goto out;
@@ -796,6 +822,10 @@ static int try_to_merge_two_pages(struct mm_struct *mm1, unsigned long addr1,
 		return err;
 
 	down_read(&mm1->mmap_sem);
+	if (ksm_test_exit(mm1)) {
+		up_read(&mm1->mmap_sem);
+		goto out;
+	}
 	vma = find_vma(mm1, addr1);
 	if (!vma || vma->vm_start > addr1) {
 		up_read(&mm1->mmap_sem);
@@ -1174,7 +1204,12 @@ next_mm:
 
 	mm = slot->mm;
 	down_read(&mm->mmap_sem);
-	for (vma = find_vma(mm, ksm_scan.address); vma; vma = vma->vm_next) {
+	if (ksm_test_exit(mm))
+		vma = NULL;
+	else
+		vma = find_vma(mm, ksm_scan.address);
+
+	for (; vma; vma = vma->vm_next) {
 		if (!(vma->vm_flags & VM_MERGEABLE))
 			continue;
 		if (ksm_scan.address < vma->vm_start)
@@ -1183,6 +1218,8 @@ next_mm:
 			ksm_scan.address = vma->vm_end;
 
 		while (ksm_scan.address < vma->vm_end) {
+			if (ksm_test_exit(mm))
+				break;
 			*page = follow_page(vma, ksm_scan.address, FOLL_GET);
 			if (*page && PageAnon(*page)) {
 				flush_anon_page(vma, *page, ksm_scan.address);
@@ -1205,6 +1242,11 @@ next_mm:
 		}
 	}
 
+	if (ksm_test_exit(mm)) {
+		ksm_scan.address = 0;
+		ksm_scan.rmap_item = list_entry(&slot->rmap_list,
+						struct rmap_item, link);
+	}
 	/*
 	 * Nuke all the rmap_items that are above this current rmap:
 	 * because there were no VM_MERGEABLE vmas with such addresses.
@@ -1219,24 +1261,29 @@ next_mm:
 		 * We've completed a full scan of all vmas, holding mmap_sem
 		 * throughout, and found no VM_MERGEABLE: so do the same as
 		 * __ksm_exit does to remove this mm from all our lists now.
+		 * This applies either when cleaning up after __ksm_exit
+		 * (but beware: we can reach here even before __ksm_exit),
+		 * or when all VM_MERGEABLE areas have been unmapped (and
+		 * mmap_sem then protects against race with MADV_MERGEABLE).
 		 */
 		hlist_del(&slot->link);
 		list_del(&slot->mm_list);
+		spin_unlock(&ksm_mmlist_lock);
+
 		free_mm_slot(slot);
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		up_read(&mm->mmap_sem);
+		mmdrop(mm);
+	} else {
+		spin_unlock(&ksm_mmlist_lock);
+		up_read(&mm->mmap_sem);
 	}
-	spin_unlock(&ksm_mmlist_lock);
-	up_read(&mm->mmap_sem);
 
 	/* Repeat until we've completed scanning the whole list */
 	slot = ksm_scan.mm_slot;
 	if (slot != &ksm_mm_head)
 		goto next_mm;
 
-	/*
-	 * Bump seqnr here rather than at top, so that __ksm_exit
-	 * can skip rb_erase on unstable tree until we run again.
-	 */
 	ksm_scan.seqnr++;
 	return NULL;
 }
@@ -1361,6 +1408,7 @@ int __ksm_enter(struct mm_struct *mm)
 	spin_unlock(&ksm_mmlist_lock);
 
 	set_bit(MMF_VM_MERGEABLE, &mm->flags);
+	atomic_inc(&mm->mm_count);
 
 	if (needs_wakeup)
 		wake_up_interruptible(&ksm_thread_wait);
@@ -1368,41 +1416,45 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm)
+void __ksm_exit(struct mm_struct *mm,
+		struct mmu_gather **tlbp, unsigned long end)
 {
 	struct mm_slot *mm_slot;
+	int easy_to_free = 0;
 
 	/*
-	 * This process is exiting: doesn't hold and doesn't need mmap_sem;
-	 * but we do need to exclude ksmd and other exiters while we modify
-	 * the various lists and trees.
+	 * This process is exiting: if it's straightforward (as is the
+	 * case when ksmd was never running), free mm_slot immediately.
+	 * But if it's at the cursor or has rmap_items linked to it, use
+	 * mmap_sem to synchronize with any break_cows before pagetables
+	 * are freed, and leave the mm_slot on the list for ksmd to free.
+	 * Beware: ksm may already have noticed it exiting and freed the slot.
 	 */
-	mutex_lock(&ksm_thread_mutex);
+
 	spin_lock(&ksm_mmlist_lock);
 	mm_slot = get_mm_slot(mm);
-	if (!list_empty(&mm_slot->rmap_list)) {
-		spin_unlock(&ksm_mmlist_lock);
-		remove_trailing_rmap_items(mm_slot, mm_slot->rmap_list.next);
-		spin_lock(&ksm_mmlist_lock);
-	}
-
-	if (ksm_scan.mm_slot == mm_slot) {
-		ksm_scan.mm_slot = list_entry(
-			mm_slot->mm_list.next, struct mm_slot, mm_list);
-		ksm_scan.address = 0;
-		ksm_scan.rmap_item = list_entry(
-			&ksm_scan.mm_slot->rmap_list, struct rmap_item, link);
-		if (ksm_scan.mm_slot == &ksm_mm_head)
-			ksm_scan.seqnr++;
+	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
+		if (list_empty(&mm_slot->rmap_list)) {
+			hlist_del(&mm_slot->link);
+			list_del(&mm_slot->mm_list);
+			easy_to_free = 1;
+		} else {
+			list_move(&mm_slot->mm_list,
+				  &ksm_scan.mm_slot->mm_list);
+		}
 	}
-
-	hlist_del(&mm_slot->link);
-	list_del(&mm_slot->mm_list);
 	spin_unlock(&ksm_mmlist_lock);
 
-	free_mm_slot(mm_slot);
-	clear_bit(MMF_VM_MERGEABLE, &mm->flags);
-	mutex_unlock(&ksm_thread_mutex);
+	if (easy_to_free) {
+		free_mm_slot(mm_slot);
+		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+		mmdrop(mm);
+	} else if (mm_slot) {
+		tlb_finish_mmu(*tlbp, 0, end);
+		down_write(&mm->mmap_sem);
+		up_write(&mm->mmap_sem);
+		*tlbp = tlb_gather_mmu(mm, 1);
+	}
 }
 
 #define KSM_ATTR_RO(_name) \
diff --git a/mm/memory.c b/mm/memory.c
index 1a435b81876c..f47ffe971012 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,8 +2648,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table))
+	if (!pte_none(*page_table) || ksm_test_exit(mm))
 		goto release;
+
 	inc_mm_counter(mm, anon_rss);
 	page_add_new_anon_rmap(page, vma, address);
 	set_pte_at(mm, address, page_table, entry);
@@ -2791,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte))) {
+	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index 376492ed08f4..e02f1aa66a1a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,6 +27,7 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
+#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
@@ -2111,6 +2112,14 @@ void exit_mmap(struct mm_struct *mm)
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
+
+	/*
+	 * For KSM to handle OOM without deadlock when it's breaking COW in a
+	 * likely victim of the OOM killer, we must serialize with ksm_exit()
+	 * after freeing mm's pages but before freeing its page tables.
+	 */
+	ksm_exit(mm, &tlb, end);
+
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v1.2.3


From 1c2fb7a4c2ca7a958b02bc1e615d0254990bba8d Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 21 Sep 2009 17:02:22 -0700
Subject: ksm: fix deadlock with munlock in exit_mmap

Rawhide users have reported hang at startup when cryptsetup is run: the
same problem can be simply reproduced by running a program int main() {
mlockall(MCL_CURRENT | MCL_FUTURE); return 0; }

The problem is that exit_mmap() applies munlock_vma_pages_all() to
clean up VM_LOCKED areas, and its current implementation (stupidly)
tries to fault in absent pages, for example where PROT_NONE prevented
them being faulted in when mlocking.  Whereas the "ksm: fix oom
deadlock" patch, knowing there's a race by which KSM might try to fault
in pages after exit_mmap() had finally zapped the range, backs out of
such faults doing nothing when its ksm_test_exit() notices mm_users 0.

So revert that part of "ksm: fix oom deadlock" which moved the
ksm_exit() call from before exit_mmap() to the middle of exit_mmap();
and remove those ksm_test_exit() checks from the page fault paths, so
allowing the munlocking to proceed without interference.

ksm_exit, if there are rmap_items still chained on this mm slot, takes
mmap_sem write side: so preventing KSM from working on an mm while
exit_mmap runs.  And KSM will bail out as soon as it notices that
mm_users is already zero, thanks to its internal ksm_test_exit checks.
So that when a task is killed by OOM killer or the user, KSM will not
indefinitely prevent it from running exit_mmap to release its memory.

This does break a part of what "ksm: fix oom deadlock" was trying to
achieve.  When unmerging KSM (echo 2 >/sys/kernel/mm/ksm), and even
when ksmd itself has to cancel a KSM page, it is possible that the
first OOM-kill victim would be the KSM process being faulted: then its
memory won't be freed until a second victim has been selected (freeing
memory for the unmerging fault to complete).

But the OOM killer is already liable to kill a second victim once the
intended victim's p->mm goes to NULL: so there's not much point in
rejecting this KSM patch before fixing that OOM behaviour.  It is very
much more important to allow KSM users to boot up, than to haggle over
an unlikely and poorly supported OOM case.

We also intend to fix munlocking to not fault pages: at which point
this patch _could_ be reverted; though that would be controversial, so
we hope to find a better solution.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Justin M. Forbes <jforbes@redhat.com>
Acked-for-now-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Izik Eidus <ieidus@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 11 ++++-------
 kernel/fork.c       |  1 +
 mm/ksm.c            |  5 +----
 mm/memory.c         |  4 ++--
 mm/mmap.c           |  7 -------
 5 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 2d64ff30c0de..0e26de6adb51 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -18,8 +18,7 @@ struct mmu_gather;
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
 int __ksm_enter(struct mm_struct *mm);
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end);
+void __ksm_exit(struct mm_struct *mm);
 
 static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
@@ -41,11 +40,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
-		__ksm_exit(mm, tlbp, end);
+		__ksm_exit(mm);
 }
 
 /*
@@ -86,8 +84,7 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return 0;
 }
 
-static inline void ksm_exit(struct mm_struct *mm,
-			    struct mmu_gather **tlbp, unsigned long end)
+static inline void ksm_exit(struct mm_struct *mm)
 {
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 42f20f565b16..73a442b7be6d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -501,6 +501,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		exit_aio(mm);
+		ksm_exit(mm);
 		exit_mmap(mm);
 		set_mm_exe_file(mm, NULL);
 		if (!list_empty(&mm->mmlist)) {
diff --git a/mm/ksm.c b/mm/ksm.c
index 722e3f2a8dc5..92034eb47eba 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1416,8 +1416,7 @@ int __ksm_enter(struct mm_struct *mm)
 	return 0;
 }
 
-void __ksm_exit(struct mm_struct *mm,
-		struct mmu_gather **tlbp, unsigned long end)
+void __ksm_exit(struct mm_struct *mm)
 {
 	struct mm_slot *mm_slot;
 	int easy_to_free = 0;
@@ -1450,10 +1449,8 @@ void __ksm_exit(struct mm_struct *mm,
 		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
 		mmdrop(mm);
 	} else if (mm_slot) {
-		tlb_finish_mmu(*tlbp, 0, end);
 		down_write(&mm->mmap_sem);
 		up_write(&mm->mmap_sem);
-		*tlbp = tlb_gather_mmu(mm, 1);
 	}
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index f47ffe971012..05feaa11d87c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2648,7 +2648,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-	if (!pte_none(*page_table) || ksm_test_exit(mm))
+	if (!pte_none(*page_table))
 		goto release;
 
 	inc_mm_counter(mm, anon_rss);
@@ -2792,7 +2792,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * handle that later.
 	 */
 	/* Only go through if we didn't race with anybody else... */
-	if (likely(pte_same(*page_table, orig_pte) && !ksm_test_exit(mm))) {
+	if (likely(pte_same(*page_table, orig_pte))) {
 		flush_icache_page(vma, page);
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
diff --git a/mm/mmap.c b/mm/mmap.c
index e02f1aa66a1a..ffd6c6c9bcf4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2113,13 +2113,6 @@ void exit_mmap(struct mm_struct *mm)
 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
 	vm_unacct_memory(nr_accounted);
 
-	/*
-	 * For KSM to handle OOM without deadlock when it's breaking COW in a
-	 * likely victim of the OOM killer, we must serialize with ksm_exit()
-	 * after freeing mm's pages but before freeing its page tables.
-	 */
-	ksm_exit(mm, &tlb, end);
-
 	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 	tlb_finish_mmu(tlb, 0, end);
 
-- 
cgit v1.2.3


From a913e182ab9484308e870af37a14d372742d53b0 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:26 -0700
Subject: ksm: clean up obsolete references

A few cleanups, given the munlock fix: the comment on ksm_test_exit() no
longer applies, and it can be made private to ksm.c; there's no more
reference to mmu_gather or tlb.h, and mmap.c doesn't need ksm.h.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ksm.h | 20 --------------------
 mm/ksm.c            | 14 +++++++++++++-
 mm/mmap.c           |  1 -
 3 files changed, 13 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 0e26de6adb51..a485c14ecd5d 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -12,8 +12,6 @@
 #include <linux/sched.h>
 #include <linux/vmstat.h>
 
-struct mmu_gather;
-
 #ifdef CONFIG_KSM
 int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		unsigned long end, int advice, unsigned long *vm_flags);
@@ -27,19 +25,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-/*
- * For KSM to handle OOM without deadlock when it's breaking COW in a
- * likely victim of the OOM killer, exit_mmap() has to serialize with
- * ksm_exit() after freeing mm's pages but before freeing its page tables.
- * That leaves a window in which KSM might refault pages which have just
- * been finally unmapped: guard against that with ksm_test_exit(), and
- * use it after getting mmap_sem in ksm.c, to check if mm is exiting.
- */
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return atomic_read(&mm->mm_users) == 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
@@ -79,11 +64,6 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
-static inline bool ksm_test_exit(struct mm_struct *mm)
-{
-	return 0;
-}
-
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }
diff --git a/mm/ksm.c b/mm/ksm.c
index 3bd54ce9eb38..e11e7a5ac84f 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -32,7 +32,6 @@
 #include <linux/mmu_notifier.h>
 #include <linux/ksm.h>
 
-#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 
 /*
@@ -284,6 +283,19 @@ static inline int in_stable_tree(struct rmap_item *rmap_item)
 	return rmap_item->address & STABLE_FLAG;
 }
 
+/*
+ * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
+ * page tables after it has passed through ksm_exit() - which, if necessary,
+ * takes mmap_sem briefly to serialize against them.  ksm_exit() does not set
+ * a special flag: they can just back out as soon as mm_users goes to zero.
+ * ksm_test_exit() is used throughout to make this test for exit: in some
+ * places for correctness, in some places just to avoid unnecessary work.
+ */
+static inline bool ksm_test_exit(struct mm_struct *mm)
+{
+	return atomic_read(&mm->mm_users) == 0;
+}
+
 /*
  * We use break_ksm to break COW on a ksm page: it's a stripped down
  *
diff --git a/mm/mmap.c b/mm/mmap.c
index 22dff49d579e..6eed98c00543 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -27,7 +27,6 @@
 #include <linux/mount.h>
 #include <linux/mempolicy.h>
 #include <linux/rmap.h>
-#include <linux/ksm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/perf_event.h>
 
-- 
cgit v1.2.3


From 35451beecbd7c86ce3249d543594517a5fe9a0cd Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:02:27 -0700
Subject: ksm: unmerge is an origin of OOMs

Just as the swapoff system call allocates many pages of RAM to various
processes, perhaps triggering OOM, so "echo 2 >/sys/kernel/mm/ksm/run"
(unmerge) is liable to allocate many pages of RAM to various processes,
perhaps triggering OOM; and each is normally run from a modest admin
process (swapoff or shell), easily repeated until it succeeds.

So treat unmerge_and_remove_all_rmap_items() in the same way that we treat
try_to_unuse(): generalize PF_SWAPOFF to PF_OOM_ORIGIN, and bracket both
with that, to ask the OOM killer to kill them first, to prevent them from
spawning more and more OOM kills.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Izik Eidus <ieidus@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 2 +-
 mm/ksm.c              | 2 ++
 mm/oom_kill.c         | 2 +-
 mm/swapfile.c         | 4 ++--
 4 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8f3e63cb33a6..899d7304d594 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1720,7 +1720,7 @@ extern cputime_t task_gtime(struct task_struct *p);
 #define PF_FROZEN	0x00010000	/* frozen for system suspend */
 #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
 #define PF_KSWAPD	0x00040000	/* I am kswapd */
-#define PF_SWAPOFF	0x00080000	/* I am in swapoff */
+#define PF_OOM_ORIGIN	0x00080000	/* Allocating much memory to others */
 #define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
 #define PF_KTHREAD	0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
diff --git a/mm/ksm.c b/mm/ksm.c
index e11e7a5ac84f..37cc37325094 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1557,7 +1557,9 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
 	if (ksm_run != flags) {
 		ksm_run = flags;
 		if (flags & KSM_RUN_UNMERGE) {
+			current->flags |= PF_OOM_ORIGIN;
 			err = unmerge_and_remove_all_rmap_items();
+			current->flags &= ~PF_OOM_ORIGIN;
 			if (err) {
 				ksm_run = KSM_RUN_STOP;
 				count = err;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index a7b2460e922b..da4c342f2641 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -79,7 +79,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	/*
 	 * swapoff can easily use up all memory, so kill those first.
 	 */
-	if (p->flags & PF_SWAPOFF)
+	if (p->flags & PF_OOM_ORIGIN)
 		return ULONG_MAX;
 
 	/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 74f1102e8749..f1bf19daadc6 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1575,9 +1575,9 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	p->flags &= ~SWP_WRITEOK;
 	spin_unlock(&swap_lock);
 
-	current->flags |= PF_SWAPOFF;
+	current->flags |= PF_OOM_ORIGIN;
 	err = try_to_unuse(type);
-	current->flags &= ~PF_SWAPOFF;
+	current->flags &= ~PF_OOM_ORIGIN;
 
 	if (err) {
 		/* re-insert swap space back into swap_list */
-- 
cgit v1.2.3


From 38a398572fa2d8124f7479e40db581b5b72719c9 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:39 -0700
Subject: page-allocator: remove dead function free_cold_page()

The function free_cold_page() has no callers so delete it.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 1 -
 mm/page_alloc.c     | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 7c777a0da17a..c32bfa8e7f1e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -326,7 +326,6 @@ void free_pages_exact(void *virt, size_t size);
 extern void __free_pages(struct page *page, unsigned int order);
 extern void free_pages(unsigned long addr, unsigned int order);
 extern void free_hot_page(struct page *page);
-extern void free_cold_page(struct page *page);
 
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr),0)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20759803a64a..913a8ebd3a8e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1079,11 +1079,6 @@ void free_hot_page(struct page *page)
 	free_hot_cold_page(page, 0);
 }
 	
-void free_cold_page(struct page *page)
-{
-	free_hot_cold_page(page, 1);
-}
-
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
-- 
cgit v1.2.3


From 4b4f278c030aa4b6ee0915f396e9a9478d92d610 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:41 -0700
Subject: tracing, page-allocator: add trace events for page allocation and
 page freeing

This patch adds trace events for the allocation and freeing of pages,
including the freeing of pagevecs.  Using the events, it will be known
what struct page and pfns are being allocated and freed and what the call
site was in many cases.

The page alloc tracepoints be used as an indicator as to whether the
workload was heavily dependant on the page allocator or not.  You can make
a guess based on vmstat but you can't get a per-process breakdown.
Depending on the call path, the call_site for page allocation may be
__get_free_pages() instead of a useful callsite.  Instead of passing down
a return address similar to slab debugging, the user should enable the
stacktrace and seg-addr options to get a proper stack trace.

The pagevec free tracepoint has a different usecase.  It can be used to
get a idea of how many pages are being dumped off the LRU and whether it
is kswapd doing the work or a process doing direct reclaim.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 74 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  7 ++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 1493c541f9c4..0d358a0d45c1 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free,
 
 	TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
 );
+
+TRACE_EVENT(mm_page_free_direct,
+
+	TP_PROTO(struct page *page, unsigned int order),
+
+	TP_ARGS(page, order),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->order)
+);
+
+TRACE_EVENT(mm_pagevec_free,
+
+	TP_PROTO(struct page *page, int cold),
+
+	TP_ARGS(page, cold),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		cold		)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->cold		= cold;
+	),
+
+	TP_printk("page=%p pfn=%lu order=0 cold=%d",
+			__entry->page,
+			page_to_pfn(__entry->page),
+			__entry->cold)
+);
+
+TRACE_EVENT(mm_page_alloc,
+
+	TP_PROTO(struct page *page, unsigned int order,
+			gfp_t gfp_flags, int migratetype),
+
+	TP_ARGS(page, order, gfp_flags, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	gfp_t,		gfp_flags	)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->gfp_flags	= gfp_flags;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		show_gfp_flags(__entry->gfp_flags))
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 913a8ebd3a8e..80f954d82d77 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1076,6 +1076,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 void free_hot_page(struct page *page)
 {
+	trace_mm_page_free_direct(page, 0);
 	free_hot_cold_page(page, 0);
 }
 	
@@ -1920,6 +1921,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 				zonelist, high_zoneidx, nodemask,
 				preferred_zone, migratetype);
 
+	trace_mm_page_alloc(page, order, gfp_mask, migratetype);
 	return page;
 }
 EXPORT_SYMBOL(__alloc_pages_nodemask);
@@ -1954,13 +1956,16 @@ void __pagevec_free(struct pagevec *pvec)
 {
 	int i = pagevec_count(pvec);
 
-	while (--i >= 0)
+	while (--i >= 0) {
+		trace_mm_pagevec_free(pvec->pages[i], pvec->cold);
 		free_hot_cold_page(pvec->pages[i], pvec->cold);
+	}
 }
 
 void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page)) {
+		trace_mm_page_free_direct(page, order);
 		if (order == 0)
 			free_hot_page(page);
 		else
-- 
cgit v1.2.3


From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:42 -0700
Subject: tracing, page-allocator: add trace events for anti-fragmentation
 falling back to other migratetypes

Fragmentation avoidance depends on being able to use free pages from lists
of the appropriate migrate type.  In the event this is not possible,
__rmqueue_fallback() selects a different list and in some circumstances
change the migratetype of the pageblock.  Simplistically, the more times
this event occurs, the more likely that fragmentation will be a problem
later for hugepage allocation at least but there are other considerations
such as the order of page being split to satisfy the allocation.

This patch adds a trace event for __rmqueue_fallback() that reports what
page is being used for the fallback, the orders of relevant pages, the
desired migratetype and the migratetype of the lists being used, whether
the pageblock changed type and whether this event is important with
respect to fragmentation avoidance or not.  This information can be used
to help analyse fragmentation avoidance and help decide whether
min_free_kbytes should be increased or not.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  4 ++++
 2 files changed, 42 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 0d358a0d45c1..aae16ee17601 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_extfrag,
+
+	TP_PROTO(struct page *page,
+			int alloc_order, int fallback_order,
+			int alloc_migratetype, int fallback_migratetype),
+
+	TP_ARGS(page,
+		alloc_order, fallback_order,
+		alloc_migratetype, fallback_migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page			)
+		__field(	int,		alloc_order		)
+		__field(	int,		fallback_order		)
+		__field(	int,		alloc_migratetype	)
+		__field(	int,		fallback_migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page			= page;
+		__entry->alloc_order		= alloc_order;
+		__entry->fallback_order		= fallback_order;
+		__entry->alloc_migratetype	= alloc_migratetype;
+		__entry->fallback_migratetype	= fallback_migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->alloc_order,
+		__entry->fallback_order,
+		pageblock_order,
+		__entry->alloc_migratetype,
+		__entry->fallback_migratetype,
+		__entry->fallback_order < pageblock_order,
+		__entry->alloc_migratetype == __entry->fallback_migratetype)
+);
+
 #endif /* _TRACE_KMEM_H */
 
 /* This part must be outside protection */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80f954d82d77..77f517c18b37 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -853,6 +853,10 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 							start_migratetype);
 
 			expand(zone, page, order, current_order, area, migratetype);
+
+			trace_mm_page_alloc_extfrag(page, order, current_order,
+				start_migratetype, migratetype);
+
 			return page;
 		}
 	}
-- 
cgit v1.2.3


From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:02:44 -0700
Subject: tracing, page-allocator: add trace event for page traffic related to
 the buddy lists

The page allocation trace event reports that a page was successfully
allocated but it does not specify where it came from.  When analysing
performance, it can be important to distinguish between pages coming from
the per-cpu allocator and pages coming from the buddy lists as the latter
requires the zone lock to the taken and more data structures to be
examined.

This patch adds a trace event for __rmqueue reporting when a page is being
allocated from the buddy lists.  It distinguishes between being called to
refill the per-cpu lists or whether it is a high-order allocation.
Similarly, this patch adds an event to catch when the PCP lists are being
drained a little and pages are going back to the buddy lists.

This is trickier to draw conclusions from but high activity on those
events could explain why there were a large number of cache misses on a
page-allocator-intensive workload.  The coalescing and splitting of
buddies involves a lot of writing of page metadata and cache line bounces
not to mention the acquisition of an interrupt-safe lock necessary to
enter this path.

[akpm@linux-foundation.org: fix build]
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Li Ming Chun <macli@brc.ubc.ca>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/trace/events/kmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c             |  3 +++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index aae16ee17601..eaf46bdd18a5 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
+TRACE_EVENT(mm_page_alloc_zone_locked,
+
+	TP_PROTO(struct page *page, unsigned int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	unsigned int,	order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype,
+		__entry->order == 0)
+);
+
+TRACE_EVENT(mm_page_pcpu_drain,
+
+	TP_PROTO(struct page *page, int order, int migratetype),
+
+	TP_ARGS(page, order, migratetype),
+
+	TP_STRUCT__entry(
+		__field(	struct page *,	page		)
+		__field(	int,		order		)
+		__field(	int,		migratetype	)
+	),
+
+	TP_fast_assign(
+		__entry->page		= page;
+		__entry->order		= order;
+		__entry->migratetype	= migratetype;
+	),
+
+	TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
+		__entry->page,
+		page_to_pfn(__entry->page),
+		__entry->order,
+		__entry->migratetype)
+);
+
 TRACE_EVENT(mm_page_alloc_extfrag,
 
 	TP_PROTO(struct page *page,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77f517c18b37..4c847cc57caf 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
+#include <trace/events/kmem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -535,6 +536,7 @@ static void free_pages_bulk(struct zone *zone, int count,
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
+		trace_mm_page_pcpu_drain(page, order, page_private(page));
 		__free_one_page(page, zone, order, page_private(page));
 	}
 	spin_unlock(&zone->lock);
@@ -890,6 +892,7 @@ retry_reserve:
 		}
 	}
 
+	trace_mm_page_alloc_zone_locked(page, order, migratetype);
 	return page;
 }
 
-- 
cgit v1.2.3


From bba78819548a59a52e60f0b259997bbd011164ae Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Mon, 21 Sep 2009 17:02:56 -0700
Subject: mm: remove broken 'kzalloc' mempool

The kzalloc mempool zeros items when they are initially allocated, but
does not rezero used items that are returned to the pool.  Consequently
mempool_alloc()s may return non-zeroed memory.

Since there are/were only two in-tree users for
mempool_create_kzalloc_pool(), and 'fixing' this in a way that will
re-zero used (but not new) items before first use is non-trivial, just
remove it.

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempool.h | 10 ++--------
 mm/mempool.c            |  7 -------
 2 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 9be484d11283..7c08052e3321 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -47,22 +47,16 @@ mempool_create_slab_pool(int min_nr, struct kmem_cache *kc)
 }
 
 /*
- * 2 mempool_alloc_t's and a mempool_free_t to kmalloc/kzalloc and kfree
- * the amount of memory specified by pool_data
+ * a mempool_alloc_t and a mempool_free_t to kmalloc and kfree the
+ * amount of memory specified by pool_data
  */
 void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data);
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data);
 void mempool_kfree(void *element, void *pool_data);
 static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size)
 {
 	return mempool_create(min_nr, mempool_kmalloc, mempool_kfree,
 			      (void *) size);
 }
-static inline mempool_t *mempool_create_kzalloc_pool(int min_nr, size_t size)
-{
-	return mempool_create(min_nr, mempool_kzalloc, mempool_kfree,
-			      (void *) size);
-}
 
 /*
  * A mempool_alloc_t and mempool_free_t for a simple page allocator that
diff --git a/mm/mempool.c b/mm/mempool.c
index 32e75d400503..1a3bc3d4d554 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -308,13 +308,6 @@ void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
 }
 EXPORT_SYMBOL(mempool_kmalloc);
 
-void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
-{
-	size_t size = (size_t)pool_data;
-	return kzalloc(size, gfp_mask);
-}
-EXPORT_SYMBOL(mempool_kzalloc);
-
 void mempool_kfree(void *element, void *pool_data)
 {
 	kfree(element);
-- 
cgit v1.2.3


From 401a8e1c1670085b8177330ca47d4f7c4ac88761 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:58 -0700
Subject: mm: introduce page_lru_base_type()

Instead of abusing page_is_file_cache() for LRU list index arithmetic, add
another helper with a more appropriate name and convert the non-boolean
users of page_is_file_cache() accordingly.

This new helper gives the LRU base type a page is supposed to live on,
inactive anon or inactive file.

[hugh.dickins@tiscali.co.uk: convert del_page_from_lru() also]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 23 +++++++++++++++++++----
 mm/swap.c                 |  4 ++--
 mm/vmscan.c               |  6 +++---
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 7fbb97267556..99977ff45b83 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -39,21 +39,36 @@ del_page_from_lru_list(struct zone *zone, struct page *page, enum lru_list l)
 	mem_cgroup_del_lru_list(page, l);
 }
 
+/**
+ * page_lru_base_type - which LRU list type should a page be on?
+ * @page: the page to test
+ *
+ * Used for LRU list index arithmetic.
+ *
+ * Returns the base LRU type - file or anon - @page should be on.
+ */
+static inline enum lru_list page_lru_base_type(struct page *page)
+{
+	if (page_is_file_cache(page))
+		return LRU_INACTIVE_FILE;
+	return LRU_INACTIVE_ANON;
+}
+
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
-	enum lru_list l = LRU_BASE;
+	enum lru_list l;
 
 	list_del(&page->lru);
 	if (PageUnevictable(page)) {
 		__ClearPageUnevictable(page);
 		l = LRU_UNEVICTABLE;
 	} else {
+		l = page_lru_base_type(page);
 		if (PageActive(page)) {
 			__ClearPageActive(page);
 			l += LRU_ACTIVE;
 		}
-		l += page_is_file_cache(page);
 	}
 	__dec_zone_state(zone, NR_LRU_BASE + l);
 	mem_cgroup_del_lru_list(page, l);
@@ -68,14 +83,14 @@ del_page_from_lru(struct zone *zone, struct page *page)
  */
 static inline enum lru_list page_lru(struct page *page)
 {
-	enum lru_list lru = LRU_BASE;
+	enum lru_list lru;
 
 	if (PageUnevictable(page))
 		lru = LRU_UNEVICTABLE;
 	else {
+		lru = page_lru_base_type(page);
 		if (PageActive(page))
 			lru += LRU_ACTIVE;
-		lru += page_is_file_cache(page);
 	}
 
 	return lru;
diff --git a/mm/swap.c b/mm/swap.c
index cb29ae5d33ab..168d53e6e58e 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -118,7 +118,7 @@ static void pagevec_move_tail(struct pagevec *pvec)
 			spin_lock(&zone->lru_lock);
 		}
 		if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-			int lru = page_is_file_cache(page);
+			int lru = page_lru_base_type(page);
 			list_move_tail(&page->lru, &zone->lru[lru].list);
 			pgmoved++;
 		}
@@ -181,7 +181,7 @@ void activate_page(struct page *page)
 	spin_lock_irq(&zone->lru_lock);
 	if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
 		int file = page_is_file_cache(page);
-		int lru = LRU_BASE + file;
+		int lru = page_lru_base_type(page);
 		del_page_from_lru_list(zone, page, lru);
 
 		SetPageActive(page);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cad5d528a6f0..30e56ee833f8 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -531,7 +531,7 @@ redo:
 		 * unevictable page on [in]active list.
 		 * We know how to handle that.
 		 */
-		lru = active + page_is_file_cache(page);
+		lru = active + page_lru_base_type(page);
 		lru_cache_add_lru(page, lru);
 	} else {
 		/*
@@ -986,7 +986,7 @@ static unsigned long clear_active_flags(struct list_head *page_list,
 	struct page *page;
 
 	list_for_each_entry(page, page_list, lru) {
-		lru = page_is_file_cache(page);
+		lru = page_lru_base_type(page);
 		if (PageActive(page)) {
 			lru += LRU_ACTIVE;
 			ClearPageActive(page);
@@ -2652,7 +2652,7 @@ static void check_move_unevictable_page(struct page *page, struct zone *zone)
 retry:
 	ClearPageUnevictable(page);
 	if (page_evictable(page, NULL)) {
-		enum lru_list l = LRU_INACTIVE_ANON + page_is_file_cache(page);
+		enum lru_list l = page_lru_base_type(page);
 
 		__dec_zone_state(zone, NR_UNEVICTABLE);
 		list_move(&page->lru, &zone->lru[l].list);
-- 
cgit v1.2.3


From 6c0b13519d1c755d874e82c8fb8a6dcef0ee402c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_is_file_cache()

page_is_file_cache() has been used for both boolean checks and LRU
arithmetic, which was always a bit weird.

Now that page_lru_base_type() exists for LRU arithmetic, make
page_is_file_cache() a real predicate function and adjust the
boolean-using callsites to drop those pesky double negations.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_inline.h | 8 ++------
 mm/migrate.c              | 6 +++---
 mm/swap.c                 | 2 +-
 mm/vmscan.c               | 2 +-
 4 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 99977ff45b83..8835b877b8db 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -5,7 +5,7 @@
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
  * @page: the page to test
  *
- * Returns LRU_FILE if @page is page cache page backed by a regular filesystem,
+ * Returns 1 if @page is page cache page backed by a regular filesystem,
  * or 0 if @page is anonymous, tmpfs or otherwise ram or swap backed.
  * Used by functions that manipulate the LRU lists, to sort a page
  * onto the right LRU list.
@@ -16,11 +16,7 @@
  */
 static inline int page_is_file_cache(struct page *page)
 {
-	if (PageSwapBacked(page))
-		return 0;
-
-	/* The page is page cache backed by a normal filesystem. */
-	return LRU_FILE;
+	return !PageSwapBacked(page);
 }
 
 static inline void
diff --git a/mm/migrate.c b/mm/migrate.c
index b535a2c1656c..e97e513fe898 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -68,7 +68,7 @@ int putback_lru_pages(struct list_head *l)
 	list_for_each_entry_safe(page, page2, l, lru) {
 		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 		count++;
 	}
@@ -701,7 +701,7 @@ unlock:
  		 */
  		list_del(&page->lru);
 		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    !!page_is_file_cache(page));
+				page_is_file_cache(page));
 		putback_lru_page(page);
 	}
 
@@ -751,7 +751,7 @@ int migrate_pages(struct list_head *from,
 	local_irq_save(flags);
 	list_for_each_entry(page, from, lru)
 		__inc_zone_page_state(page, NR_ISOLATED_ANON +
-				      !!page_is_file_cache(page));
+				page_is_file_cache(page));
 	local_irq_restore(flags);
 
 	if (!swapwrite)
diff --git a/mm/swap.c b/mm/swap.c
index 168d53e6e58e..4a8a59e671f7 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -189,7 +189,7 @@ void activate_page(struct page *page)
 		add_page_to_lru_list(zone, page, lru);
 		__count_vm_event(PGACTIVATE);
 
-		update_page_reclaim_stat(zone, page, !!file, 1);
+		update_page_reclaim_stat(zone, page, file, 1);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 30e56ee833f8..172119caebcc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -821,7 +821,7 @@ int __isolate_lru_page(struct page *page, int mode, int file)
 	if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode))
 		return ret;
 
-	if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file))
+	if (mode != ISOLATE_BOTH && page_is_file_cache(page) != file)
 		return ret;
 
 	/*
-- 
cgit v1.2.3


From edcf4748cd56adcdf0856cc99ef108a4ea3ac7fe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 21 Sep 2009 17:02:59 -0700
Subject: mm: return boolean from page_has_private()

Make page_has_private() return a true boolean value and remove the double
negations from the two callsites using it for arithmetic.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: Christoph Lameter <cl@linux-foundation.org>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 13 ++++++++-----
 mm/migrate.c               |  2 +-
 mm/vmscan.c                |  2 +-
 3 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d07c0bb2203a..13de789f0a5c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -402,8 +402,8 @@ static inline void __ClearPageTail(struct page *page)
  */
 #define PAGE_FLAGS_CHECK_AT_PREP	((1 << NR_PAGEFLAGS) - 1)
 
-#endif /* !__GENERATING_BOUNDS_H */
-
+#define PAGE_FLAGS_PRIVATE				\
+	(1 << PG_private | 1 << PG_private_2)
 /**
  * page_has_private - Determine if page has private stuff
  * @page: The page to be checked
@@ -411,8 +411,11 @@ static inline void __ClearPageTail(struct page *page)
  * Determine if a page has private stuff, indicating that release routines
  * should be invoked upon it.
  */
-#define page_has_private(page)			\
-	((page)->flags & ((1 << PG_private) |	\
-			  (1 << PG_private_2)))
+static inline int page_has_private(struct page *page)
+{
+	return !!(page->flags & PAGE_FLAGS_PRIVATE);
+}
+
+#endif /* !__GENERATING_BOUNDS_H */
 
 #endif	/* PAGE_FLAGS_H */
diff --git a/mm/migrate.c b/mm/migrate.c
index e97e513fe898..16052e80aaac 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -272,7 +272,7 @@ static int migrate_page_move_mapping(struct address_space *mapping,
 	pslot = radix_tree_lookup_slot(&mapping->page_tree,
  					page_index(page));
 
-	expected_count = 2 + !!page_has_private(page);
+	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
 			(struct page *)radix_tree_deref_slot(pslot) != page) {
 		spin_unlock_irq(&mapping->tree_lock);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 172119caebcc..f5b5f029288c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -286,7 +286,7 @@ static inline int page_mapping_inuse(struct page *page)
 
 static inline int is_page_cache_freeable(struct page *page)
 {
-	return page_count(page) - !!page_has_private(page) == 2;
+	return page_count(page) - page_has_private(page) == 2;
 }
 
 static int may_write_to_queue(struct backing_dev_info *bdi)
-- 
cgit v1.2.3


From 4481374ce88ba8f460c8b89f2572027bd27057d0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:05 -0700
Subject: mm: replace various uses of num_physpages by totalram_pages

Sizing of memory allocations shouldn't depend on the number of physical
pages found in a system, as that generally includes (perhaps a huge amount
of) non-RAM pages.  The amount of what actually is usable as storage
should instead be used as a basis here.

Some of the calculations (i.e.  those not intending to use high memory)
should likely even use (totalram_pages - totalhigh_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Dave Airlie <airlied@linux.ie>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/microcode_core.c  | 4 ++--
 drivers/char/agp/backend.c        | 4 ++--
 drivers/parisc/ccio-dma.c         | 4 ++--
 drivers/parisc/sba_iommu.c        | 4 ++--
 drivers/xen/balloon.c             | 4 ----
 fs/ntfs/malloc.h                  | 2 +-
 include/linux/mm.h                | 1 +
 init/main.c                       | 4 ++--
 mm/slab.c                         | 2 +-
 mm/swap.c                         | 2 +-
 mm/vmalloc.c                      | 4 ++--
 net/core/sock.c                   | 4 ++--
 net/dccp/proto.c                  | 6 +++---
 net/decnet/dn_route.c             | 2 +-
 net/ipv4/route.c                  | 2 +-
 net/ipv4/tcp.c                    | 4 ++--
 net/netfilter/nf_conntrack_core.c | 4 ++--
 net/netfilter/x_tables.c          | 2 +-
 net/netfilter/xt_hashlimit.c      | 8 ++++----
 net/netlink/af_netlink.c          | 6 +++---
 net/sctp/protocol.c               | 6 +++---
 21 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 0db7969b0dde..378e9a8f1bf8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
 	}
 
diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index ad87753f6de4..a56ca080e108 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c
@@ -114,9 +114,9 @@ static int agp_find_max(void)
 	long memory, index, result;
 
 #if PAGE_SHIFT < 20
-	memory = num_physpages >> (20 - PAGE_SHIFT);
+	memory = totalram_pages >> (20 - PAGE_SHIFT);
 #else
-	memory = num_physpages << (PAGE_SHIFT - 20);
+	memory = totalram_pages << (PAGE_SHIFT - 20);
 #endif
 	index = 1;
 
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index a45b0c0d574e..a6b4a5a53d40 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -1266,7 +1266,7 @@ ccio_ioc_init(struct ioc *ioc)
 	** Hot-Plug/Removal of PCI cards. (aka PCI OLARD).
 	*/
 
-	iova_space_size = (u32) (num_physpages / count_parisc_driver(&ccio_driver));
+	iova_space_size = (u32) (totalram_pages / count_parisc_driver(&ccio_driver));
 
 	/* limit IOVA space size to 1MB-1GB */
 
@@ -1305,7 +1305,7 @@ ccio_ioc_init(struct ioc *ioc)
 
 	DBG_INIT("%s() hpa 0x%p mem %luMB IOV %dMB (%d bits)\n",
 			__func__, ioc->ioc_regs,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 123d8fe3427d..57a6d19eba4c 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -1390,7 +1390,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	** for DMA hints - ergo only 30 bits max.
 	*/
 
-	iova_space_size = (u32) (num_physpages/global_ioc_cnt);
+	iova_space_size = (u32) (totalram_pages/global_ioc_cnt);
 
 	/* limit IOVA space size to 1MB-1GB */
 	if (iova_space_size < (1 << (20 - PAGE_SHIFT))) {
@@ -1415,7 +1415,7 @@ sba_ioc_init(struct parisc_device *sba, struct ioc *ioc, int ioc_num)
 	DBG_INIT("%s() hpa 0x%lx mem %ldMB IOV %dMB (%d bits)\n",
 			__func__,
 			ioc->ioc_hpa,
-			(unsigned long) num_physpages >> (20 - PAGE_SHIFT),
+			(unsigned long) totalram_pages >> (20 - PAGE_SHIFT),
 			iova_space_size>>20,
 			iov_order + PAGE_SHIFT);
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index f5bbd9e83416..1b7123eb5d7b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -96,11 +96,7 @@ static struct balloon_stats balloon_stats;
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
 
-/* VM /proc information for memory */
-extern unsigned long totalram_pages;
-
 #ifdef CONFIG_HIGHMEM
-extern unsigned long totalhigh_pages;
 #define inc_totalhigh_pages() (totalhigh_pages++)
 #define dec_totalhigh_pages() (totalhigh_pages--)
 #else
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index cd0be3f5c3cd..a44b14cbceeb 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -47,7 +47,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask)
 		return kmalloc(PAGE_SIZE, gfp_mask & ~__GFP_HIGHMEM);
 		/* return (void *)__get_free_page(gfp_mask); */
 	}
-	if (likely(size >> PAGE_SHIFT < num_physpages))
+	if (likely((size >> PAGE_SHIFT) < totalram_pages))
 		return __vmalloc(size, gfp_mask, PAGE_KERNEL);
 	return NULL;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index d808cf832c4d..19ff81c49ba6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -25,6 +25,7 @@ extern unsigned long max_mapnr;
 #endif
 
 extern unsigned long num_physpages;
+extern unsigned long totalram_pages;
 extern void * high_memory;
 extern int page_cluster;
 
diff --git a/init/main.c b/init/main.c
index 34971becbd3c..2c48c3153163 100644
--- a/init/main.c
+++ b/init/main.c
@@ -668,12 +668,12 @@ asmlinkage void __init start_kernel(void)
 #endif
 	thread_info_cache_init();
 	cred_init();
-	fork_init(num_physpages);
+	fork_init(totalram_pages);
 	proc_caches_init();
 	buffer_init();
 	key_init();
 	security_init();
-	vfs_caches_init(num_physpages);
+	vfs_caches_init(totalram_pages);
 	radix_tree_init();
 	signals_init();
 	/* rootfs populating might need page-writeback */
diff --git a/mm/slab.c b/mm/slab.c
index 7b5d4deacfcd..7dfa481c96ba 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1384,7 +1384,7 @@ void __init kmem_cache_init(void)
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory.
 	 */
-	if (num_physpages > (32 << 20) >> PAGE_SHIFT)
+	if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
 		slab_break_gfp_order = BREAK_GFP_ORDER_HI;
 
 	/* Bootstrap is tricky, because several objects are allocated
diff --git a/mm/swap.c b/mm/swap.c
index 4a8a59e671f7..308e57d8d7ed 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -496,7 +496,7 @@ EXPORT_SYMBOL(pagevec_lookup_tag);
  */
 void __init swap_setup(void)
 {
-	unsigned long megs = num_physpages >> (20 - PAGE_SHIFT);
+	unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
 
 #ifdef CONFIG_SWAP
 	bdi_init(swapper_space.backing_dev_info);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9216b2555d07..5535da1d6961 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1386,7 +1386,7 @@ void *vmap(struct page **pages, unsigned int count,
 
 	might_sleep();
 
-	if (count > num_physpages)
+	if (count > totalram_pages)
 		return NULL;
 
 	area = get_vm_area_caller((count << PAGE_SHIFT), flags,
@@ -1493,7 +1493,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 	unsigned long real_size = size;
 
 	size = PAGE_ALIGN(size);
-	if (!size || (size >> PAGE_SHIFT) > num_physpages)
+	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
 		return NULL;
 
 	area = __get_vm_area_node(size, VM_ALLOC, VMALLOC_START, VMALLOC_END,
diff --git a/net/core/sock.c b/net/core/sock.c
index 30d5446512f9..524712a7b154 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1206,12 +1206,12 @@ EXPORT_SYMBOL_GPL(sk_setup_caps);
 
 void __init sk_init(void)
 {
-	if (num_physpages <= 4096) {
+	if (totalram_pages <= 4096) {
 		sysctl_wmem_max = 32767;
 		sysctl_rmem_max = 32767;
 		sysctl_wmem_default = 32767;
 		sysctl_rmem_default = 32767;
-	} else if (num_physpages >= 131072) {
+	} else if (totalram_pages >= 131072) {
 		sysctl_wmem_max = 131071;
 		sysctl_rmem_max = 131071;
 	}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 923db06c7e55..bc4467082a00 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1049,10 +1049,10 @@ static int __init dccp_init(void)
 	 *
 	 * The methodology is similar to that of the buffer cache.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (23 - PAGE_SHIFT);
+		goal = totalram_pages >> (23 - PAGE_SHIFT);
 
 	if (thash_entries)
 		goal = (thash_entries *
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 9383d3e5a1ab..57662cabaf9b 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1750,7 +1750,7 @@ void __init dn_route_init(void)
 	dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
 	add_timer(&dn_route_timer);
 
-	goal = num_physpages >> (26 - PAGE_SHIFT);
+	goal = totalram_pages >> (26 - PAGE_SHIFT);
 
 	for(order = 0; (1UL << order) < goal; order++)
 		/* NOTHING */;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 91867d3e6328..df9347314538 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3414,7 +3414,7 @@ int __init ip_rt_init(void)
 		alloc_large_system_hash("IP route cache",
 					sizeof(struct rt_hash_bucket),
 					rhash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					15 : 17,
 					0,
 					&rt_hash_log,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 19a0612b8a20..21387ebabf00 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2862,7 +2862,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP established",
 					sizeof(struct inet_ehash_bucket),
 					thash_entries,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.ehash_size,
@@ -2879,7 +2879,7 @@ void __init tcp_init(void)
 		alloc_large_system_hash("TCP bind",
 					sizeof(struct inet_bind_hashbucket),
 					tcp_hashinfo.ehash_size,
-					(num_physpages >= 128 * 1024) ?
+					(totalram_pages >= 128 * 1024) ?
 					13 : 15,
 					0,
 					&tcp_hashinfo.bhash_size,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index b37109817a98..7c9ec3dee96e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1245,9 +1245,9 @@ static int nf_conntrack_init_init_net(void)
 	 * machine has 512 buckets. >= 1GB machines have 16384 buckets. */
 	if (!nf_conntrack_htable_size) {
 		nf_conntrack_htable_size
-			= (((num_physpages << PAGE_SHIFT) / 16384)
+			= (((totalram_pages << PAGE_SHIFT) / 16384)
 			   / sizeof(struct hlist_head));
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			nf_conntrack_htable_size = 16384;
 		if (nf_conntrack_htable_size < 32)
 			nf_conntrack_htable_size = 32;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index a6ac83a93348..f01955cce314 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -617,7 +617,7 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size)
 	int cpu;
 
 	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
-	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > num_physpages)
+	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 		return NULL;
 
 	newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 219dcdbe388c..dd16e404424f 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -194,9 +194,9 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family)
 	if (minfo->cfg.size)
 		size = minfo->cfg.size;
 	else {
-		size = ((num_physpages << PAGE_SHIFT) / 16384) /
+		size = ((totalram_pages << PAGE_SHIFT) / 16384) /
 		       sizeof(struct list_head);
-		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
 			size = 8192;
 		if (size < 16)
 			size = 16;
@@ -266,9 +266,9 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family)
 	if (minfo->cfg.size) {
 		size = minfo->cfg.size;
 	} else {
-		size = (num_physpages << PAGE_SHIFT) / 16384 /
+		size = (totalram_pages << PAGE_SHIFT) / 16384 /
 		       sizeof(struct list_head);
-		if (num_physpages > 1024 * 1024 * 1024 / PAGE_SIZE)
+		if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE)
 			size = 8192;
 		if (size < 16)
 			size = 16;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c5aab6a368ce..55180b99562a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2091,10 +2091,10 @@ static int __init netlink_proto_init(void)
 	if (!nl_table)
 		goto panic;
 
-	if (num_physpages >= (128 * 1024))
-		limit = num_physpages >> (21 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		limit = totalram_pages >> (21 - PAGE_SHIFT);
 	else
-		limit = num_physpages >> (23 - PAGE_SHIFT);
+		limit = totalram_pages >> (23 - PAGE_SHIFT);
 
 	order = get_bitmask_order(limit) - 1 + PAGE_SHIFT;
 	limit = (1UL << order) / sizeof(struct hlist_head);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c557f1fb1c66..612dc878e05c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1184,10 +1184,10 @@ SCTP_STATIC __init int sctp_init(void)
 	/* Size and allocate the association hash table.
 	 * The methodology is similar to that of the tcp hash tables.
 	 */
-	if (num_physpages >= (128 * 1024))
-		goal = num_physpages >> (22 - PAGE_SHIFT);
+	if (totalram_pages >= (128 * 1024))
+		goal = totalram_pages >> (22 - PAGE_SHIFT);
 	else
-		goal = num_physpages >> (24 - PAGE_SHIFT);
+		goal = totalram_pages >> (24 - PAGE_SHIFT);
 
 	for (order = 0; (1UL << order) < goal; order++)
 		;
-- 
cgit v1.2.3


From 2c85f51d222ccdd8c401d77a36b723a89156810d Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 21 Sep 2009 17:03:07 -0700
Subject: mm: also use alloc_large_system_hash() for the PID hash table

This is being done by allowing boot time allocations to specify that they
may want a sub-page sized amount of memory.

Overall this seems more consistent with the other hash table allocations,
and allows making two supposedly mm-only variables really mm-only
(nr_{kernel,all}_pages).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  5 ++---
 kernel/pid.c            | 15 ++++-----------
 mm/page_alloc.c         | 13 ++++++++++---
 3 files changed, 16 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bc3ab7073695..dd97fb8408a8 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ static inline void *alloc_remap(int nid, unsigned long size)
 }
 #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
-extern unsigned long __meminitdata nr_kernel_pages;
-extern unsigned long __meminitdata nr_all_pages;
-
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
 				     unsigned long numentries,
@@ -145,6 +142,8 @@ extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long limit);
 
 #define HASH_EARLY	0x00000001	/* Allocating during early boot? */
+#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
+					 * shift passed via *_hash_shift */
 
 /* Only NUMA needs hash distribution. 64bit NUMA architectures have
  * sufficient vmalloc space.
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5d3f50..d3f722d20f9c 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -40,7 +40,7 @@
 #define pid_hashfn(nr, ns)	\
 	hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
 static struct hlist_head *pid_hash;
-static int pidhash_shift;
+static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -499,19 +499,12 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 void __init pidhash_init(void)
 {
 	int i, pidhash_size;
-	unsigned long megabytes = nr_kernel_pages >> (20 - PAGE_SHIFT);
 
-	pidhash_shift = max(4, fls(megabytes * 4));
-	pidhash_shift = min(12, pidhash_shift);
+	pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
+					   HASH_EARLY | HASH_SMALL,
+					   &pidhash_shift, NULL, 4096);
 	pidhash_size = 1 << pidhash_shift;
 
-	printk("PID hash table entries: %d (order: %d, %Zd bytes)\n",
-		pidhash_size, pidhash_shift,
-		pidhash_size * sizeof(struct hlist_head));
-
-	pid_hash = alloc_bootmem(pidhash_size *	sizeof(*(pid_hash)));
-	if (!pid_hash)
-		panic("Could not alloc pidhash!\n");
 	for (i = 0; i < pidhash_size; i++)
 		INIT_HLIST_HEAD(&pid_hash[i]);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 33b1a4762a7b..770f011e1c12 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -124,8 +124,8 @@ static char * const zone_names[MAX_NR_ZONES] = {
 
 int min_free_kbytes = 1024;
 
-unsigned long __meminitdata nr_kernel_pages;
-unsigned long __meminitdata nr_all_pages;
+static unsigned long __meminitdata nr_kernel_pages;
+static unsigned long __meminitdata nr_all_pages;
 static unsigned long __meminitdata dma_reserve;
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
@@ -4821,7 +4821,14 @@ void *__init alloc_large_system_hash(const char *tablename,
 			numentries <<= (PAGE_SHIFT - scale);
 
 		/* Make sure we've got at least a 0-order allocation.. */
-		if (unlikely((numentries * bucketsize) < PAGE_SIZE))
+		if (unlikely(flags & HASH_SMALL)) {
+			/* Makes no sense without HASH_EARLY */
+			WARN_ON(!(flags & HASH_EARLY));
+			if (!(numentries >> *_hash_shift)) {
+				numentries = 1UL << *_hash_shift;
+				BUG_ON(!numentries);
+			}
+		} else if (unlikely((numentries * bucketsize) < PAGE_SIZE))
 			numentries = PAGE_SIZE / bucketsize;
 	}
 	numentries = roundup_pow_of_two(numentries);
-- 
cgit v1.2.3


From 1a8670a29b5277cbe601f74ab63d2c5211fb3005 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Sep 2009 17:03:09 -0700
Subject: oom: move oom_killer_enable()/oom_killer_disable to where they belong

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h    | 12 ------------
 include/linux/oom.h    | 11 +++++++++++
 kernel/power/process.c |  1 +
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c32bfa8e7f1e..f53e9b868c26 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -335,18 +335,6 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(void);
 void drain_local_pages(void *dummy);
 
-extern bool oom_killer_disabled;
-
-static inline void oom_killer_disable(void)
-{
-	oom_killer_disabled = true;
-}
-
-static inline void oom_killer_enable(void)
-{
-	oom_killer_disabled = false;
-}
-
 extern gfp_t gfp_allowed_mask;
 
 static inline void set_gfp_allowed_mask(gfp_t mask)
diff --git a/include/linux/oom.h b/include/linux/oom.h
index a7979baf1e39..6aac5fe4f6f1 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -30,5 +30,16 @@ extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 
+extern bool oom_killer_disabled;
+
+static inline void oom_killer_disable(void)
+{
+	oom_killer_disabled = true;
+}
+
+static inline void oom_killer_enable(void)
+{
+	oom_killer_disabled = false;
+}
 #endif /* __KERNEL__*/
 #endif /* _INCLUDE_LINUX_OOM_H */
diff --git a/kernel/power/process.c b/kernel/power/process.c
index da2072d73811..cc2e55373b68 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -9,6 +9,7 @@
 #undef DEBUG
 
 #include <linux/interrupt.h>
+#include <linux/oom.h>
 #include <linux/suspend.h>
 #include <linux/module.h>
 #include <linux/syscalls.h>
-- 
cgit v1.2.3


From f86296317434b21585e229f6c49a33cb9ebab4d3 Mon Sep 17 00:00:00 2001
From: Wu Fengguang <fengguang.wu@intel.com>
Date: Mon, 21 Sep 2009 17:03:11 -0700
Subject: mm: do batched scans for mem_cgroup

For mem_cgroup, shrink_zone() may call shrink_list() with nr_to_scan=1, in
which case shrink_list() _still_ calls isolate_pages() with the much
larger SWAP_CLUSTER_MAX.  It effectively scales up the inactive list scan
rate by up to 32 times.

For example, with 16k inactive pages and DEF_PRIORITY=12, (16k >> 12)=4.
So when shrink_zone() expects to scan 4 pages in the active/inactive list,
the active list will be scanned 4 pages, while the inactive list will be
(over) scanned SWAP_CLUSTER_MAX=32 pages in effect.  And that could break
the balance between the two lists.

It can further impact the scan of anon active list, due to the anon
active/inactive ratio rebalance logic in balance_pgdat()/shrink_zone():

inactive anon list over scanned => inactive_anon_is_low() == TRUE
                                => shrink_active_list()
                                => active anon list over scanned

So the end result may be

- anon inactive  => over scanned
- anon active    => over scanned (maybe not as much)
- file inactive  => over scanned
- file active    => under scanned (relatively)

The accesses to nr_saved_scan are not lock protected and so not 100%
accurate, however we can tolerate small errors and the resulted small
imbalanced scan rates between zones.

Cc: Rik van Riel <riel@redhat.com>
Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  6 +++++-
 mm/page_alloc.c        |  2 +-
 mm/vmscan.c            | 20 +++++++++++---------
 3 files changed, 17 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9c50309b30a1..c188ea624c74 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -273,6 +273,11 @@ struct zone_reclaim_stat {
 	 */
 	unsigned long		recent_rotated[2];
 	unsigned long		recent_scanned[2];
+
+	/*
+	 * accumulated for batching
+	 */
+	unsigned long		nr_saved_scan[NR_LRU_LISTS];
 };
 
 struct zone {
@@ -327,7 +332,6 @@ struct zone {
 	spinlock_t		lru_lock;	
 	struct zone_lru {
 		struct list_head list;
-		unsigned long nr_saved_scan;	/* accumulated for batching */
 	} lru[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 770f011e1c12..84d9da1e8f4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3809,7 +3809,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 		zone_pcp_init(zone);
 		for_each_lru(l) {
 			INIT_LIST_HEAD(&zone->lru[l].list);
-			zone->lru[l].nr_saved_scan = 0;
+			zone->reclaim_stat.nr_saved_scan[l] = 0;
 		}
 		zone->reclaim_stat.recent_rotated[0] = 0;
 		zone->reclaim_stat.recent_rotated[1] = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5432c230c4cb..0e7f5e4a22d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1586,6 +1586,7 @@ static void shrink_zone(int priority, struct zone *zone,
 	enum lru_list l;
 	unsigned long nr_reclaimed = sc->nr_reclaimed;
 	unsigned long swap_cluster_max = sc->swap_cluster_max;
+	struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
 	int noswap = 0;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
@@ -1605,12 +1606,9 @@ static void shrink_zone(int priority, struct zone *zone,
 			scan >>= priority;
 			scan = (scan * percent[file]) / 100;
 		}
-		if (scanning_global_lru(sc))
-			nr[l] = nr_scan_try_batch(scan,
-						  &zone->lru[l].nr_saved_scan,
-						  swap_cluster_max);
-		else
-			nr[l] = scan;
+		nr[l] = nr_scan_try_batch(scan,
+					  &reclaim_stat->nr_saved_scan[l],
+					  swap_cluster_max);
 	}
 
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
@@ -2220,6 +2218,7 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 {
 	struct zone *zone;
 	unsigned long nr_reclaimed = 0;
+	struct zone_reclaim_stat *reclaim_stat;
 
 	for_each_populated_zone(zone) {
 		enum lru_list l;
@@ -2236,11 +2235,14 @@ static void shrink_all_zones(unsigned long nr_pages, int prio,
 						l == LRU_ACTIVE_FILE))
 				continue;
 
-			zone->lru[l].nr_saved_scan += (lru_pages >> prio) + 1;
-			if (zone->lru[l].nr_saved_scan >= nr_pages || pass > 3) {
+			reclaim_stat = get_reclaim_stat(zone, sc);
+			reclaim_stat->nr_saved_scan[l] +=
+						(lru_pages >> prio) + 1;
+			if (reclaim_stat->nr_saved_scan[l]
+						>= nr_pages || pass > 3) {
 				unsigned long nr_to_scan;
 
-				zone->lru[l].nr_saved_scan = 0;
+				reclaim_stat->nr_saved_scan[l] = 0;
 				nr_to_scan = min(nr_pages, lru_pages);
 				nr_reclaimed += shrink_list(l, nr_to_scan, zone,
 								sc, prio);
-- 
cgit v1.2.3


From 28b83c5193e7ab951e402252278f2cc79dc4d298 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Mon, 21 Sep 2009 17:03:13 -0700
Subject: oom: move oom_adj value from task_struct to signal_struct

Currently, OOM logic callflow is here.

    __out_of_memory()
        select_bad_process()            for each task
            badness()                   calculate badness of one task
                oom_kill_process()      search child
                    oom_kill_task()     kill target task and mm shared tasks with it

example, process-A have two thread, thread-A and thread-B and it have very
fat memory and each thread have following oom_adj and oom_score.

     thread-A: oom_adj = OOM_DISABLE, oom_score = 0
     thread-B: oom_adj = 0,           oom_score = very-high

Then, select_bad_process() select thread-B, but oom_kill_task() refuse
kill the task because thread-A have OOM_DISABLE.  Thus __out_of_memory()
call select_bad_process() again.  but select_bad_process() select the same
task.  It mean kernel fall in livelock.

The fact is, select_bad_process() must select killable task.  otherwise
OOM logic go into livelock.

And root cause is, oom_adj shouldn't be per-thread value.  it should be
per-process value because OOM-killer kill a process, not thread.  Thus
This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct signal_struct.  it naturally prevent
select_bad_process() choose wrong task.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c        | 24 ++++++++++++++++++++----
 include/linux/sched.h |  3 ++-
 kernel/fork.c         |  2 ++
 mm/oom_kill.c         | 34 +++++++++++++++-------------------
 4 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6f742f6658a9..81cfff82875b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -999,11 +999,17 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf,
 	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
 	char buffer[PROC_NUMBUF];
 	size_t len;
-	int oom_adjust;
+	int oom_adjust = OOM_DISABLE;
+	unsigned long flags;
 
 	if (!task)
 		return -ESRCH;
-	oom_adjust = task->oomkilladj;
+
+	if (lock_task_sighand(task, &flags)) {
+		oom_adjust = task->signal->oom_adj;
+		unlock_task_sighand(task, &flags);
+	}
+
 	put_task_struct(task);
 
 	len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust);
@@ -1017,6 +1023,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	struct task_struct *task;
 	char buffer[PROC_NUMBUF], *end;
 	int oom_adjust;
+	unsigned long flags;
 
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
@@ -1032,11 +1039,20 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	task = get_proc_task(file->f_path.dentry->d_inode);
 	if (!task)
 		return -ESRCH;
-	if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) {
+	if (!lock_task_sighand(task, &flags)) {
+		put_task_struct(task);
+		return -ESRCH;
+	}
+
+	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
+		unlock_task_sighand(task, &flags);
 		put_task_struct(task);
 		return -EACCES;
 	}
-	task->oomkilladj = oom_adjust;
+
+	task->signal->oom_adj = oom_adjust;
+
+	unlock_task_sighand(task, &flags);
 	put_task_struct(task);
 	if (end - buffer == 0)
 		return -EIO;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 899d7304d594..17e9a8e9a51d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -639,6 +639,8 @@ struct signal_struct {
 	unsigned audit_tty;
 	struct tty_audit_buf *tty_audit_buf;
 #endif
+
+	int oom_adj;	/* OOM kill score adjustment (bit shift) */
 };
 
 /* Context switch must be unlocked if interrupts are to be enabled */
@@ -1221,7 +1223,6 @@ struct task_struct {
 	 * a short time
 	 */
 	unsigned char fpu_counter;
-	s8 oomkilladj; /* OOM kill score adjustment (bit shift). */
 #ifdef CONFIG_BLK_DEV_IO_TRACE
 	unsigned int btrace_seq;
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 73a442b7be6d..1020977b57ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -880,6 +880,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 
 	tty_audit_fork(sig);
 
+	sig->oom_adj = current->signal->oom_adj;
+
 	return 0;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index da4c342f2641..630b77fe862f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -58,6 +58,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 	unsigned long points, cpu_time, run_time;
 	struct mm_struct *mm;
 	struct task_struct *child;
+	int oom_adj = p->signal->oom_adj;
+
+	if (oom_adj == OOM_DISABLE)
+		return 0;
 
 	task_lock(p);
 	mm = p->mm;
@@ -148,15 +152,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
 		points /= 8;
 
 	/*
-	 * Adjust the score by oomkilladj.
+	 * Adjust the score by oom_adj.
 	 */
-	if (p->oomkilladj) {
-		if (p->oomkilladj > 0) {
+	if (oom_adj) {
+		if (oom_adj > 0) {
 			if (!points)
 				points = 1;
-			points <<= p->oomkilladj;
+			points <<= oom_adj;
 		} else
-			points >>= -(p->oomkilladj);
+			points >>= -(oom_adj);
 	}
 
 #ifdef DEBUG
@@ -251,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
 			*ppoints = ULONG_MAX;
 		}
 
-		if (p->oomkilladj == OOM_DISABLE)
+		if (p->signal->oom_adj == OOM_DISABLE)
 			continue;
 
 		points = badness(p, uptime.tv_sec);
@@ -304,7 +308,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
 		}
 		printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d     %3d %s\n",
 		       p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
-		       get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
+		       get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
 		       p->comm);
 		task_unlock(p);
 	} while_each_thread(g, p);
@@ -359,18 +363,9 @@ static int oom_kill_task(struct task_struct *p)
 	 * change to NULL at any time since we do not hold task_lock(p).
 	 * However, this is of no concern to us.
 	 */
-
-	if (mm == NULL)
+	if (!mm || p->signal->oom_adj == OOM_DISABLE)
 		return 1;
 
-	/*
-	 * Don't kill the process if any threads are set to OOM_DISABLE
-	 */
-	do_each_thread(g, q) {
-		if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
-			return 1;
-	} while_each_thread(g, q);
-
 	__oom_kill_task(p, 1);
 
 	/*
@@ -394,8 +389,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 
 	if (printk_ratelimit()) {
 		printk(KERN_WARNING "%s invoked oom-killer: "
-			"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
-			current->comm, gfp_mask, order, current->oomkilladj);
+			"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
+			current->comm, gfp_mask, order,
+			current->signal->oom_adj);
 		task_lock(current);
 		cpuset_print_task_mems_allowed(current);
 		task_unlock(current);
-- 
cgit v1.2.3


From 5f8dcc21211a3d4e3a7a5ca366b469fb88117f61 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Mon, 21 Sep 2009 17:03:19 -0700
Subject: page-allocator: split per-cpu list into one-list-per-migrate-type

The following two patches remove searching in the page allocator fast-path
by maintaining multiple free-lists in the per-cpu structure.  At the time
the search was introduced, increasing the per-cpu structures would waste a
lot of memory as per-cpu structures were statically allocated at
compile-time.  This is no longer the case.

The patches are as follows. They are based on mmotm-2009-08-27.

Patch 1 adds multiple lists to struct per_cpu_pages, one per
	migratetype that can be stored on the PCP lists.

Patch 2 notes that the pcpu drain path check empty lists multiple times. The
	patch reduces the number of checks by maintaining a count of free
	lists encountered. Lists containing pages will then free multiple
	pages in batch

The patches were tested with kernbench, netperf udp/tcp, hackbench and
sysbench.  The netperf tests were not bound to any CPU in particular and
were run such that the results should be 99% confidence that the reported
results are within 1% of the estimated mean.  sysbench was run with a
postgres background and read-only tests.  Similar to netperf, it was run
multiple times so that it's 99% confidence results are within 1%.  The
patches were tested on x86, x86-64 and ppc64 as

x86:	Intel Pentium D 3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.34% to 2.28% gain
	netperf-tcp	- 0.45% to 1.22% gain
	hackbench	- Small variances, very close to noise
	sysbench	- Very small gains

x86-64:	AMD Phenom 9950 1.3GHz with 8G RAM (no-brand machine)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 1.83% to 10.42% gains
	netperf-tcp	- No conclusive until buffer >= PAGE_SIZE
				4096	+15.83%
				8192	+ 0.34% (not significant)
				16384	+ 1%
	hackbench	- Small gains, very close to noise
	sysbench	- 0.79% to 1.6% gain

ppc64:	PPC970MP 2.5GHz with 10GB RAM (it's a terrasoft powerstation)
	kernbench	- No significant difference, variance well within noise
	netperf-udp	- 2-3% gain for almost all buffer sizes tested
	netperf-tcp	- losses on small buffers, gains on larger buffers
			  possibly indicates some bad caching effect.
	hackbench	- No significant difference
	sysbench	- 2-4% gain

This patch:

Currently the per-cpu page allocator searches the PCP list for pages of
the correct migrate-type to reduce the possibility of pages being
inappropriate placed from a fragmentation perspective.  This search is
potentially expensive in a fast-path and undesirable.  Splitting the
per-cpu list into multiple lists increases the size of a per-cpu structure
and this was potentially a major problem at the time the search was
introduced.  These problem has been mitigated as now only the necessary
number of structures is allocated for the running system.

This patch replaces a list search in the per-cpu allocator with one list
per migrate type.  The potential snag with this approach is when bulk
freeing pages.  We round-robin free pages based on migrate type which has
little bearing on the cache hotness of the page and potentially checks
empty lists repeatedly in the event the majority of PCP pages are of one
type.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Nick Piggin <npiggin@suse.de>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |   5 ++-
 mm/page_alloc.c        | 106 +++++++++++++++++++++++++++----------------------
 2 files changed, 63 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c188ea624c74..652ef01be582 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -38,6 +38,7 @@
 #define MIGRATE_UNMOVABLE     0
 #define MIGRATE_RECLAIMABLE   1
 #define MIGRATE_MOVABLE       2
+#define MIGRATE_PCPTYPES      3 /* the number of types on the pcp lists */
 #define MIGRATE_RESERVE       3
 #define MIGRATE_ISOLATE       4 /* can't allocate from here */
 #define MIGRATE_TYPES         5
@@ -169,7 +170,9 @@ struct per_cpu_pages {
 	int count;		/* number of pages in the list */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
-	struct list_head list;	/* the list of pages */
+
+	/* Lists of pages, one per migrate type stored on the pcp-lists */
+	struct list_head lists[MIGRATE_PCPTYPES];
 };
 
 struct per_cpu_pageset {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 84d9da1e8f4c..1b1c39e6a9b8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -511,7 +511,7 @@ static inline int free_pages_check(struct page *page)
 }
 
 /*
- * Frees a list of pages. 
+ * Frees a number of pages from the PCP lists
  * Assumes all pages on list are in same zone, and of same order.
  * count is the number of pages to free.
  *
@@ -521,23 +521,36 @@ static inline int free_pages_check(struct page *page)
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static void free_pages_bulk(struct zone *zone, int count,
-					struct list_head *list, int order)
+static void free_pcppages_bulk(struct zone *zone, int count,
+					struct per_cpu_pages *pcp)
 {
+	int migratetype = 0;
+
 	spin_lock(&zone->lock);
 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
 	zone->pages_scanned = 0;
 
-	__mod_zone_page_state(zone, NR_FREE_PAGES, count << order);
+	__mod_zone_page_state(zone, NR_FREE_PAGES, count);
 	while (count--) {
 		struct page *page;
+		struct list_head *list;
+
+		/*
+		 * Remove pages from lists in a round-robin fashion. This spinning
+		 * around potentially empty lists is bloody awful, alternatives that
+		 * don't suck are welcome
+		 */
+		do {
+			if (++migratetype == MIGRATE_PCPTYPES)
+				migratetype = 0;
+			list = &pcp->lists[migratetype];
+		} while (list_empty(list));
 
-		VM_BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
 		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
-		trace_mm_page_pcpu_drain(page, order, page_private(page));
-		__free_one_page(page, zone, order, page_private(page));
+		trace_mm_page_pcpu_drain(page, 0, migratetype);
+		__free_one_page(page, zone, 0, migratetype);
 	}
 	spin_unlock(&zone->lock);
 }
@@ -953,7 +966,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 		to_drain = pcp->batch;
 	else
 		to_drain = pcp->count;
-	free_pages_bulk(zone, to_drain, &pcp->list, 0);
+	free_pcppages_bulk(zone, to_drain, pcp);
 	pcp->count -= to_drain;
 	local_irq_restore(flags);
 }
@@ -979,7 +992,7 @@ static void drain_pages(unsigned int cpu)
 
 		pcp = &pset->pcp;
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		pcp->count = 0;
 		local_irq_restore(flags);
 	}
@@ -1045,6 +1058,7 @@ static void free_hot_cold_page(struct page *page, int cold)
 	struct zone *zone = page_zone(page);
 	struct per_cpu_pages *pcp;
 	unsigned long flags;
+	int migratetype;
 	int wasMlocked = __TestClearPageMlocked(page);
 
 	kmemcheck_free_shadow(page, 0);
@@ -1062,21 +1076,39 @@ static void free_hot_cold_page(struct page *page, int cold)
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp;
-	set_page_private(page, get_pageblock_migratetype(page));
+	migratetype = get_pageblock_migratetype(page);
+	set_page_private(page, migratetype);
 	local_irq_save(flags);
 	if (unlikely(wasMlocked))
 		free_page_mlock(page);
 	__count_vm_event(PGFREE);
 
+	/*
+	 * We only track unmovable, reclaimable and movable on pcp lists.
+	 * Free ISOLATE pages back to the allocator because they are being
+	 * offlined but treat RESERVE as movable pages so we can get those
+	 * areas back if necessary. Otherwise, we may have to free
+	 * excessively into the page allocator
+	 */
+	if (migratetype >= MIGRATE_PCPTYPES) {
+		if (unlikely(migratetype == MIGRATE_ISOLATE)) {
+			free_one_page(zone, page, 0, migratetype);
+			goto out;
+		}
+		migratetype = MIGRATE_MOVABLE;
+	}
+
 	if (cold)
-		list_add_tail(&page->lru, &pcp->list);
+		list_add_tail(&page->lru, &pcp->lists[migratetype]);
 	else
-		list_add(&page->lru, &pcp->list);
+		list_add(&page->lru, &pcp->lists[migratetype]);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
-		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->batch, pcp);
 		pcp->count -= pcp->batch;
 	}
+
+out:
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -1134,46 +1166,24 @@ again:
 	cpu  = get_cpu();
 	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
+		struct list_head *list;
 
 		pcp = &zone_pcp(zone, cpu)->pcp;
+		list = &pcp->lists[migratetype];
 		local_irq_save(flags);
-		if (!pcp->count) {
-			pcp->count = rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
-					migratetype, cold);
-			if (unlikely(!pcp->count))
-				goto failed;
-		}
-
-		/* Find a page of the appropriate migrate type */
-		if (cold) {
-			list_for_each_entry_reverse(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		} else {
-			list_for_each_entry(page, &pcp->list, lru)
-				if (page_private(page) == migratetype)
-					break;
-		}
-
-		/* Allocate more to the pcp list if necessary */
-		if (unlikely(&page->lru == &pcp->list)) {
-			int get_one_page = 0;
-
+		if (list_empty(list)) {
 			pcp->count += rmqueue_bulk(zone, 0,
-					pcp->batch, &pcp->list,
+					pcp->batch, list,
 					migratetype, cold);
-			list_for_each_entry(page, &pcp->list, lru) {
-				if (get_pageblock_migratetype(page) !=
-					    MIGRATE_ISOLATE) {
-					get_one_page = 1;
-					break;
-				}
-			}
-			if (!get_one_page)
+			if (unlikely(list_empty(list)))
 				goto failed;
 		}
 
+		if (cold)
+			page = list_entry(list->prev, struct page, lru);
+		else
+			page = list_entry(list->next, struct page, lru);
+
 		list_del(&page->lru);
 		pcp->count--;
 	} else {
@@ -3024,6 +3034,7 @@ static int zone_batchsize(struct zone *zone)
 static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 {
 	struct per_cpu_pages *pcp;
+	int migratetype;
 
 	memset(p, 0, sizeof(*p));
 
@@ -3031,7 +3042,8 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 	pcp->count = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
+	for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
+		INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
 
 /*
@@ -3223,7 +3235,7 @@ static int __zone_pcp_update(void *data)
 		pcp = &pset->pcp;
 
 		local_irq_save(flags);
-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+		free_pcppages_bulk(zone, pcp->count, pcp);
 		setup_pageset(pset, batch);
 		local_irq_restore(flags);
 	}
-- 
cgit v1.2.3


From f3e8fccd06d27773186a0094371daf2d84c79469 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:25 -0700
Subject: mm: add get_dump_page

In preparation for the next patch, add a simple get_dump_page(addr)
interface for the CONFIG_ELF_CORE dumpers to use, instead of calling
get_user_pages() directly.  They're not interested in errors: they
just want to use holes as much as possible, to save space and make
sure that the data is aligned where the headers said it would be.

Oh, and don't use that horrid DUMP_SEEK(off) macro!

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_elf.c       | 44 +++++++++++++---------------------------
 fs/binfmt_elf_fdpic.c | 56 +++++++++++++++++----------------------------------
 include/linux/mm.h    |  1 +
 mm/memory.c           | 33 +++++++++++++++++++++++++++++-
 4 files changed, 66 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7c1e65d54872..442d94fe255c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1280,9 +1280,6 @@ static int writenote(struct memelfnote *men, struct file *file,
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static void fill_elf_header(struct elfhdr *elf, int segs,
 			    u16 machine, u32 flags, u8 osabi)
@@ -2016,7 +2013,8 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 		goto end_coredump;
 
 	/* Align to page */
-	DUMP_SEEK(dataoff - foffset);
+	if (!dump_seek(file, dataoff - foffset))
+		goto end_coredump;
 
 	for (vma = first_vma(current, gate_vma); vma != NULL;
 			vma = next_vma(vma, gate_vma)) {
@@ -2027,33 +2025,19 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
 
 		for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
 			struct page *page;
-			struct vm_area_struct *tmp_vma;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-						&page, &tmp_vma) <= 0) {
-				DUMP_SEEK(PAGE_SIZE);
-			} else {
-				if (page == ZERO_PAGE(0)) {
-					if (!dump_seek(file, PAGE_SIZE)) {
-						page_cache_release(page);
-						goto end_coredump;
-					}
-				} else {
-					void *kaddr;
-					flush_cache_page(tmp_vma, addr,
-							 page_to_pfn(page));
-					kaddr = kmap(page);
-					if ((size += PAGE_SIZE) > limit ||
-					    !dump_write(file, kaddr,
-					    PAGE_SIZE)) {
-						kunmap(page);
-						page_cache_release(page);
-						goto end_coredump;
-					}
-					kunmap(page);
-				}
+			int stop;
+
+			page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				stop = ((size += PAGE_SIZE) > limit) ||
+					!dump_write(file, kaddr, PAGE_SIZE);
+				kunmap(page);
 				page_cache_release(page);
-			}
+			} else
+				stop = !dump_seek(file, PAGE_SIZE);
+			if (stop)
+				goto end_coredump;
 		}
 	}
 
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 20fbeced472b..76285471073e 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1325,9 +1325,6 @@ static int writenote(struct memelfnote *men, struct file *file)
 #define DUMP_WRITE(addr, nr)	\
 	if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
 		goto end_coredump;
-#define DUMP_SEEK(off)	\
-	if (!dump_seek(file, (off))) \
-		goto end_coredump;
 
 static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
 {
@@ -1518,6 +1515,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 			   unsigned long *limit, unsigned long mm_flags)
 {
 	struct vm_area_struct *vma;
+	int err = 0;
 
 	for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
 		unsigned long addr;
@@ -1525,43 +1523,26 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
 		if (!maydump(vma, mm_flags))
 			continue;
 
-		for (addr = vma->vm_start;
-		     addr < vma->vm_end;
-		     addr += PAGE_SIZE
-		     ) {
-			struct vm_area_struct *vma;
-			struct page *page;
-
-			if (get_user_pages(current, current->mm, addr, 1, 0, 1,
-					   &page, &vma) <= 0) {
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else if (page == ZERO_PAGE(0)) {
-				page_cache_release(page);
-				DUMP_SEEK(file->f_pos + PAGE_SIZE);
-			}
-			else {
-				void *kaddr;
-
-				flush_cache_page(vma, addr, page_to_pfn(page));
-				kaddr = kmap(page);
-				if ((*size += PAGE_SIZE) > *limit ||
-				    !dump_write(file, kaddr, PAGE_SIZE)
-				    ) {
-					kunmap(page);
-					page_cache_release(page);
-					return -EIO;
-				}
+		for (addr = vma->vm_start; addr < vma->vm_end;
+							addr += PAGE_SIZE) {
+			struct page *page = get_dump_page(addr);
+			if (page) {
+				void *kaddr = kmap(page);
+				*size += PAGE_SIZE;
+				if (*size > *limit)
+					err = -EFBIG;
+				else if (!dump_write(file, kaddr, PAGE_SIZE))
+					err = -EIO;
 				kunmap(page);
 				page_cache_release(page);
-			}
+			} else if (!dump_seek(file, file->f_pos + PAGE_SIZE))
+				err = -EFBIG;
+			if (err)
+				goto out;
 		}
 	}
-
-	return 0;
-
-end_coredump:
-	return -EFBIG;
+out:
+	return err;
 }
 #endif
 
@@ -1802,7 +1783,8 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
 				goto end_coredump;
 	}
 
-	DUMP_SEEK(dataoff);
+	if (!dump_seek(file, dataoff))
+		goto end_coredump;
 
 	if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
 		goto end_coredump;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 19ff81c49ba6..e41795bba95d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -817,6 +817,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			struct page **pages, struct vm_area_struct **vmas);
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages);
+struct page *get_dump_page(unsigned long addr);
 
 extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
 extern void do_invalidatepage(struct page *page, unsigned long offset);
diff --git a/mm/memory.c b/mm/memory.c
index 4b5200f5f35a..a8430ff13837 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1423,9 +1423,40 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-
 EXPORT_SYMBOL(get_user_pages);
 
+/**
+ * get_dump_page() - pin user page in memory while writing it to core dump
+ * @addr: user address
+ *
+ * Returns struct page pointer of user page pinned for dump,
+ * to be freed afterwards by page_cache_release() or put_page().
+ *
+ * Returns NULL on any kind of failure - a hole must then be inserted into
+ * the corefile, to preserve alignment with its headers; and also returns
+ * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
+ * allowing a hole to be left in the corefile to save diskspace.
+ *
+ * Called without mmap_sem, but after all other threads have been killed.
+ */
+#ifdef CONFIG_ELF_CORE
+struct page *get_dump_page(unsigned long addr)
+{
+	struct vm_area_struct *vma;
+	struct page *page;
+
+	if (__get_user_pages(current, current->mm, addr, 1,
+				GUP_FLAGS_FORCE, &page, &vma) < 1)
+		return NULL;
+	if (page == ZERO_PAGE(0)) {
+		page_cache_release(page);
+		return NULL;
+	}
+	flush_cache_page(vma, addr, page_to_pfn(page));
+	return page;
+}
+#endif /* CONFIG_ELF_CORE */
+
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
-- 
cgit v1.2.3


From 8e4b9a60718970bbc02dfd3abd0b956ab65af231 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:26 -0700
Subject: mm: FOLL_DUMP replace FOLL_ANON

The "FOLL_ANON optimization" and its use_zero_page() test have caused
confusion and bugs: why does it test VM_SHARED? for the very good but
unsatisfying reason that VMware crashed without.  As we look to maybe
reinstating anonymous use of the ZERO_PAGE, we need to sort this out.

Easily done: it's silly for __get_user_pages() and follow_page() to
be guessing whether it's safe to assume that they're being used for
a coredump (which can take a shortcut snapshot where other uses must
handle a fault) - just tell them with GUP_FLAGS_DUMP and FOLL_DUMP.

get_dump_page() doesn't even want a ZERO_PAGE: an error suits fine.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 +-
 mm/internal.h      |  1 +
 mm/memory.c        | 43 ++++++++++++-------------------------------
 3 files changed, 14 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e41795bba95d..45ee5b5a343d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1231,7 +1231,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
-#define FOLL_ANON	0x08	/* give ZERO_PAGE if no pgtable */
+#define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index 166765cd58d6..d41475078b20 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -252,6 +252,7 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 
 #define GUP_FLAGS_WRITE		0x01
 #define GUP_FLAGS_FORCE		0x02
+#define GUP_FLAGS_DUMP		0x04
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int len, int flags,
diff --git a/mm/memory.c b/mm/memory.c
index a8430ff13837..532a55bce6a4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1174,41 +1174,22 @@ no_page:
 	pte_unmap_unlock(ptep, ptl);
 	if (!pte_none(pte))
 		return page;
-	/* Fall through to ZERO_PAGE handling */
+
 no_page_table:
 	/*
 	 * When core dumping an enormous anonymous area that nobody
-	 * has touched so far, we don't want to allocate page tables.
+	 * has touched so far, we don't want to allocate unnecessary pages or
+	 * page tables.  Return error instead of NULL to skip handle_mm_fault,
+	 * then get_dump_page() will return NULL to leave a hole in the dump.
+	 * But we can only make this optimization where a hole would surely
+	 * be zero-filled if handle_mm_fault() actually did handle it.
 	 */
-	if (flags & FOLL_ANON) {
-		page = ZERO_PAGE(0);
-		if (flags & FOLL_GET)
-			get_page(page);
-		BUG_ON(flags & FOLL_WRITE);
-	}
+	if ((flags & FOLL_DUMP) &&
+	    (!vma->vm_ops || !vma->vm_ops->fault))
+		return ERR_PTR(-EFAULT);
 	return page;
 }
 
-/* Can we do the FOLL_ANON optimization? */
-static inline int use_zero_page(struct vm_area_struct *vma)
-{
-	/*
-	 * We don't want to optimize FOLL_ANON for make_pages_present()
-	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
-	 * we want to get the page from the page tables to make sure
-	 * that we serialize and update with any other user of that
-	 * mapping.
-	 */
-	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
-		return 0;
-	/*
-	 * And if we have a fault routine, it's not an anonymous region.
-	 */
-	return !vma->vm_ops || !vma->vm_ops->fault;
-}
-
-
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		     unsigned long start, int nr_pages, int flags,
 		     struct page **pages, struct vm_area_struct **vmas)
@@ -1288,8 +1269,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
-		if (!write && use_zero_page(vma))
-			foll_flags |= FOLL_ANON;
+		if (flags & GUP_FLAGS_DUMP)
+			foll_flags |= FOLL_DUMP;
 
 		do {
 			struct page *page;
@@ -1446,7 +1427,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-				GUP_FLAGS_FORCE, &page, &vma) < 1)
+			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
-- 
cgit v1.2.3


From 2a15efc953b26ad57d7d38b9e6782d57e53b4ab2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:27 -0700
Subject: mm: follow_hugetlb_page flags

follow_hugetlb_page() shouldn't be guessing about the coredump case
either: pass the foll_flags down to it, instead of just the write bit.

Remove that obscure huge_zeropage_ok() test.  The decision is easy,
though unlike the non-huge case - here vm_ops->fault is always set.
But we know that a fault would serve up zeroes, unless there's
already a hugetlbfs pagecache page to back the range.

(Alternatively, since hugetlb pages aren't swapped out under pressure,
you could save more dump space by arguing that a page not yet faulted
into this process cannot be relevant to the dump; but that would be
more surprising.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  4 +++-
 mm/hugetlb.c            | 62 +++++++++++++++++++++++++++++--------------------
 mm/memory.c             | 14 ++++++-----
 3 files changed, 48 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16cdb75a543a..e7f0fabfa1c2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -24,7 +24,9 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
 int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
-int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int, int);
+int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
+			struct page **, struct vm_area_struct **,
+			unsigned long *, int *, int, unsigned int flags);
 void unmap_hugepage_range(struct vm_area_struct *,
 			unsigned long, unsigned long, struct page *);
 void __unmap_hugepage_range(struct vm_area_struct *,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c001f846f17d..6b41f70bbc7f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2016,6 +2016,23 @@ static struct page *hugetlbfs_pagecache_page(struct hstate *h,
 	return find_lock_page(mapping, idx);
 }
 
+/* Return whether there is a pagecache page to back given address within VMA */
+static bool hugetlbfs_backed(struct hstate *h,
+			struct vm_area_struct *vma, unsigned long address)
+{
+	struct address_space *mapping;
+	pgoff_t idx;
+	struct page *page;
+
+	mapping = vma->vm_file->f_mapping;
+	idx = vma_hugecache_offset(h, vma, address);
+
+	page = find_get_page(mapping, idx);
+	if (page)
+		put_page(page);
+	return page != NULL;
+}
+
 static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, unsigned int flags)
 {
@@ -2211,54 +2228,52 @@ follow_huge_pud(struct mm_struct *mm, unsigned long address,
 	return NULL;
 }
 
-static int huge_zeropage_ok(pte_t *ptep, int write, int shared)
-{
-	if (!ptep || write || shared)
-		return 0;
-	else
-		return huge_pte_none(huge_ptep_get(ptep));
-}
-
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i,
-			int write)
+			unsigned int flags)
 {
 	unsigned long pfn_offset;
 	unsigned long vaddr = *position;
 	int remainder = *length;
 	struct hstate *h = hstate_vma(vma);
-	int zeropage_ok = 0;
-	int shared = vma->vm_flags & VM_SHARED;
 
 	spin_lock(&mm->page_table_lock);
 	while (vaddr < vma->vm_end && remainder) {
 		pte_t *pte;
+		int absent;
 		struct page *page;
 
 		/*
 		 * Some archs (sparc64, sh*) have multiple pte_ts to
-		 * each hugepage.  We have to make * sure we get the
+		 * each hugepage.  We have to make sure we get the
 		 * first, for the page indexing below to work.
 		 */
 		pte = huge_pte_offset(mm, vaddr & huge_page_mask(h));
-		if (huge_zeropage_ok(pte, write, shared))
-			zeropage_ok = 1;
+		absent = !pte || huge_pte_none(huge_ptep_get(pte));
+
+		/*
+		 * When coredumping, it suits get_dump_page if we just return
+		 * an error if there's a hole and no huge pagecache to back it.
+		 */
+		if (absent &&
+		    ((flags & FOLL_DUMP) && !hugetlbfs_backed(h, vma, vaddr))) {
+			remainder = 0;
+			break;
+		}
 
-		if (!pte ||
-		    (huge_pte_none(huge_ptep_get(pte)) && !zeropage_ok) ||
-		    (write && !pte_write(huge_ptep_get(pte)))) {
+		if (absent ||
+		    ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
 			int ret;
 
 			spin_unlock(&mm->page_table_lock);
-			ret = hugetlb_fault(mm, vma, vaddr, write);
+			ret = hugetlb_fault(mm, vma, vaddr,
+				(flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
 			spin_lock(&mm->page_table_lock);
 			if (!(ret & VM_FAULT_ERROR))
 				continue;
 
 			remainder = 0;
-			if (!i)
-				i = -EFAULT;
 			break;
 		}
 
@@ -2266,10 +2281,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		page = pte_page(huge_ptep_get(pte));
 same_page:
 		if (pages) {
-			if (zeropage_ok)
-				pages[i] = ZERO_PAGE(0);
-			else
-				pages[i] = mem_map_offset(page, pfn_offset);
+			pages[i] = mem_map_offset(page, pfn_offset);
 			get_page(pages[i]);
 		}
 
@@ -2293,7 +2305,7 @@ same_page:
 	*length = remainder;
 	*position = vaddr;
 
-	return i;
+	return i ? i : -EFAULT;
 }
 
 void hugetlb_change_protection(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 532a55bce6a4..6359a4f80c4a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1260,17 +1260,19 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		if (is_vm_hugetlb_page(vma)) {
-			i = follow_hugetlb_page(mm, vma, pages, vmas,
-						&start, &nr_pages, i, write);
-			continue;
-		}
-
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
 		if (flags & GUP_FLAGS_DUMP)
 			foll_flags |= FOLL_DUMP;
+		if (write)
+			foll_flags |= FOLL_WRITE;
+
+		if (is_vm_hugetlb_page(vma)) {
+			i = follow_hugetlb_page(mm, vma, pages, vmas,
+					&start, &nr_pages, i, foll_flags);
+			continue;
+		}
 
 		do {
 			struct page *page;
-- 
cgit v1.2.3


From 58fa879e1e640a1856f736b418984ebeccee1c95 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:31 -0700
Subject: mm: FOLL flags for GUP flags

__get_user_pages() has been taking its own GUP flags, then processing
them into FOLL flags for follow_page().  Though oddly named, the FOLL
flags are more widely used, so pass them to __get_user_pages() now.
Sorry, VM flags, VM_FAULT flags and FAULT_FLAGs are still distinct.

(The patch to __get_user_pages() looks peculiar, with both gup_flags
and foll_flags: the gup_flags remain constant; but as before there's
an exceptional case, out of scope of the patch, in which foll_flags
per page have FOLL_WRITE masked off.)

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Rik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 mm/internal.h      |  6 +-----
 mm/memory.c        | 44 +++++++++++++++++++-------------------------
 mm/mlock.c         |  4 ++--
 mm/nommu.c         | 16 ++++++++--------
 5 files changed, 31 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 45ee5b5a343d..5409eced7aae 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1232,6 +1232,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_TOUCH	0x02	/* mark page accessed */
 #define FOLL_GET	0x04	/* do get_page on page */
 #define FOLL_DUMP	0x08	/* give error on hole if it would be zero */
+#define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/internal.h b/mm/internal.h
index d41475078b20..75596574911e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -250,12 +250,8 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn,
 }
 #endif /* CONFIG_SPARSEMEM */
 
-#define GUP_FLAGS_WRITE		0x01
-#define GUP_FLAGS_FORCE		0x02
-#define GUP_FLAGS_DUMP		0x04
-
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int len, int flags,
+		     unsigned long start, int len, unsigned int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas);
 
 #define ZONE_RECLAIM_NOSCAN	-2
diff --git a/mm/memory.c b/mm/memory.c
index c8b5b9435a92..5c694f2b9c12 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1209,27 +1209,29 @@ no_page_table:
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, unsigned int gup_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	int i;
-	unsigned int vm_flags = 0;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
+	unsigned long vm_flags;
 
 	if (nr_pages <= 0)
 		return 0;
+
+	VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
+
 	/* 
 	 * Require read or write permissions.
-	 * If 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (gup_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (gup_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 	i = 0;
 
 	do {
 		struct vm_area_struct *vma;
-		unsigned int foll_flags;
 
 		vma = find_extend_vma(mm, start);
 		if (!vma && in_gate_area(tsk, start)) {
@@ -1241,7 +1243,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			pte_t *pte;
 
 			/* user gate pages are read-only */
-			if (write)
+			if (gup_flags & FOLL_WRITE)
 				return i ? : -EFAULT;
 			if (pg > TASK_SIZE)
 				pgd = pgd_offset_k(pg);
@@ -1278,22 +1280,15 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		    !(vm_flags & vma->vm_flags))
 			return i ? : -EFAULT;
 
-		foll_flags = FOLL_TOUCH;
-		if (pages)
-			foll_flags |= FOLL_GET;
-		if (flags & GUP_FLAGS_DUMP)
-			foll_flags |= FOLL_DUMP;
-		if (write)
-			foll_flags |= FOLL_WRITE;
-
 		if (is_vm_hugetlb_page(vma)) {
 			i = follow_hugetlb_page(mm, vma, pages, vmas,
-					&start, &nr_pages, i, foll_flags);
+					&start, &nr_pages, i, gup_flags);
 			continue;
 		}
 
 		do {
 			struct page *page;
+			unsigned int foll_flags = gup_flags;
 
 			/*
 			 * If we have a pending SIGKILL, don't keep faulting
@@ -1302,9 +1297,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			if (unlikely(fatal_signal_pending(current)))
 				return i ? i : -ERESTARTSYS;
 
-			if (write)
-				foll_flags |= FOLL_WRITE;
-
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
@@ -1415,12 +1407,14 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int nr_pages, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
 {
-	int flags = 0;
+	int flags = FOLL_TOUCH;
 
+	if (pages)
+		flags |= FOLL_GET;
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
@@ -1447,7 +1441,7 @@ struct page *get_dump_page(unsigned long addr)
 	struct page *page;
 
 	if (__get_user_pages(current, current->mm, addr, 1,
-			GUP_FLAGS_FORCE | GUP_FLAGS_DUMP, &page, &vma) < 1)
+			FOLL_FORCE | FOLL_DUMP | FOLL_GET, &page, &vma) < 1)
 		return NULL;
 	if (page == ZERO_PAGE(0)) {
 		page_cache_release(page);
diff --git a/mm/mlock.c b/mm/mlock.c
index e13918d4fc4f..22041aa9f5c1 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -166,9 +166,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
 	VM_BUG_ON(end   > vma->vm_end);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
-	gup_flags = 0;
+	gup_flags = FOLL_TOUCH | FOLL_GET;
 	if (vma->vm_flags & VM_WRITE)
-		gup_flags = GUP_FLAGS_WRITE;
+		gup_flags |= FOLL_WRITE;
 
 	while (nr_pages > 0) {
 		int i;
diff --git a/mm/nommu.c b/mm/nommu.c
index 386443e9d2c6..2d02ca17ce18 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -168,20 +168,20 @@ unsigned int kobjsize(const void *objp)
 }
 
 int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		     unsigned long start, int nr_pages, int flags,
+		     unsigned long start, int nr_pages, int foll_flags,
 		     struct page **pages, struct vm_area_struct **vmas)
 {
 	struct vm_area_struct *vma;
 	unsigned long vm_flags;
 	int i;
-	int write = !!(flags & GUP_FLAGS_WRITE);
-	int force = !!(flags & GUP_FLAGS_FORCE);
 
 	/* calculate required read or write permissions.
-	 * - if 'force' is set, we only require the "MAY" flags.
+	 * If FOLL_FORCE is set, we only require the "MAY" flags.
 	 */
-	vm_flags  = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
-	vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+	vm_flags  = (foll_flags & FOLL_WRITE) ?
+			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+	vm_flags &= (foll_flags & FOLL_FORCE) ?
+			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
 
 	for (i = 0; i < nr_pages; i++) {
 		vma = find_vma(mm, start);
@@ -223,9 +223,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	int flags = 0;
 
 	if (write)
-		flags |= GUP_FLAGS_WRITE;
+		flags |= FOLL_WRITE;
 	if (force)
-		flags |= GUP_FLAGS_FORCE;
+		flags |= FOLL_FORCE;
 
 	return __get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
 }
-- 
cgit v1.2.3


From 3f96b79ad96263cc0ece7bb340cddf9b2ddfb1b3 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Date: Mon, 21 Sep 2009 17:03:37 -0700
Subject: tmpfs: depend on shmem

CONFIG_SHMEM off gives you (ramfs masquerading as) tmpfs, even when
CONFIG_TMPFS is off: that's a little anomalous, and I'd intended to make
more sense of it by removing CONFIG_TMPFS altogether, always enabling its
code when CONFIG_SHMEM; but so many defconfigs have CONFIG_SHMEM on
CONFIG_TMPFS off that we'd better leave that as is.

But there is no point in asking for CONFIG_TMPFS if CONFIG_SHMEM is off:
make TMPFS depend on SHMEM, which also prevents TMPFS_POSIX_ACL
shmem_acl.o being pointlessly built into the kernel when SHMEM is off.

And a selfish change, to prevent the world from being rebuilt when I
switch between CONFIG_SHMEM on and off: the only CONFIG_SHMEM in the
header files is mm.h shmem_lock() - give that a shmem.c stub instead.

Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/Kconfig         |  1 +
 include/linux/mm.h | 11 +----------
 mm/shmem.c         |  5 +++++
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index 455aa207e67e..d4bf8caad8d0 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,6 +109,7 @@ source "fs/sysfs/Kconfig"
 
 config TMPFS
 	bool "Virtual memory file system support (former shm fs)"
+	depends on SHMEM
 	help
 	  Tmpfs is a file system which keeps all files in virtual memory.
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5409eced7aae..5946e2ff9fe8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -702,17 +702,8 @@ extern void pagefault_out_of_memory(void);
 
 extern void show_free_areas(void);
 
-#ifdef CONFIG_SHMEM
-extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
-#else
-static inline int shmem_lock(struct file *file, int lock,
-			    struct user_struct *user)
-{
-	return 0;
-}
-#endif
+int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
-
 int shmem_zero_setup(struct vm_area_struct *);
 
 #ifndef CONFIG_MMU
diff --git a/mm/shmem.c b/mm/shmem.c
index 25ba75d02580..b4b56fd1e772 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2593,6 +2593,11 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 	return 0;
 }
 
+int shmem_lock(struct file *file, int lock, struct user_struct *user)
+{
+	return 0;
+}
+
 #define shmem_vm_ops				generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, mode, dev, flags)	ramfs_get_inode(sb, mode, dev)
-- 
cgit v1.2.3


From 6bfde05bf5c9682e255c6a2c669dc80f91af6296 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:43 -0700
Subject: hugetlbfs: allow the creation of files suitable for MAP_PRIVATE on
 the vfs internal mount

This patchset adds a flag to mmap that allows the user to request that an
anonymous mapping be backed with huge pages.  This mapping will borrow
functionality from the huge page shm code to create a file on the kernel
internal mount and use it to approximate an anonymous mapping.  The
MAP_HUGETLB flag is a modifier to MAP_ANONYMOUS and will not work without
both flags being preset.

A new flag is necessary because there is no other way to hook into huge
pages without creating a file on a hugetlbfs mount which wouldn't be
MAP_ANONYMOUS.

To userspace, this mapping will behave just like an anonymous mapping
because the file is not accessible outside of the kernel.

This patchset is meant to simplify the programming model.  Presently there
is a large chunk of boiler platecode, contained in libhugetlbfs, required
to create private, hugepage backed mappings.  This patch set would allow
use of hugepages without linking to libhugetlbfs or having hugetblfs
mounted.

Unification of the VM code would provide these same benefits, but it has
been resisted each time that it has been suggested for several reasons: it
would break PAGE_SIZE assumptions across the kernel, it makes page-table
abstractions really expensive, and it does not provide any benefit on
architectures that do not support huge pages, incurring fast path
penalties without providing any benefit on these architectures.

This patch:

There are two means of creating mappings backed by huge pages:

        1. mmap() a file created on hugetlbfs
        2. Use shm which creates a file on an internal mount which essentially
           maps it MAP_SHARED

The internal mount is only used for shared mappings but there is very
little that stops it being used for private mappings. This patch extends
hugetlbfs_file_setup() to deal with the creation of files that will be
mapped MAP_PRIVATE on the internal hugetlbfs mount. This extended API is
used in a subsequent patch to implement the MAP_HUGETLB mmap() flag.

Signed-off-by: Eric Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 21 +++++++++++++++++----
 include/linux/hugetlb.h | 12 ++++++++++--
 ipc/shm.c               |  2 +-
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a93b885311d8..06b7c2623f99 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -507,6 +507,13 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		INIT_LIST_HEAD(&inode->i_mapping->private_list);
 		info = HUGETLBFS_I(inode);
+		/*
+		 * The policy is initialized here even if we are creating a
+		 * private inode because initialization simply creates an
+		 * an empty rb tree and calls spin_lock_init(), later when we
+		 * call mpol_free_shared_policy() it will just return because
+		 * the rb tree will still be empty.
+		 */
 		mpol_shared_policy_init(&info->policy, NULL);
 		switch (mode & S_IFMT) {
 		default:
@@ -931,13 +938,19 @@ static struct file_system_type hugetlbfs_fs_type = {
 
 static struct vfsmount *hugetlbfs_vfsmount;
 
-static int can_do_hugetlb_shm(void)
+static int can_do_hugetlb_shm(int creat_flags)
 {
-	return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group);
+	if (creat_flags != HUGETLB_SHMFS_INODE)
+		return 0;
+	if (capable(CAP_IPC_LOCK))
+		return 1;
+	if (in_group_p(sysctl_hugetlb_shm_group))
+		return 1;
+	return 0;
 }
 
 struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
-						struct user_struct **user)
+				struct user_struct **user, int creat_flags)
 {
 	int error = -ENOMEM;
 	struct file *file;
@@ -949,7 +962,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	if (!hugetlbfs_vfsmount)
 		return ERR_PTR(-ENOENT);
 
-	if (!can_do_hugetlb_shm()) {
+	if (!can_do_hugetlb_shm(creat_flags)) {
 		*user = current_user();
 		if (user_shm_lock(size, *user)) {
 			WARN_ONCE(1,
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index e7f0fabfa1c2..f6505ad86657 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,6 +112,14 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+enum {
+	/*
+	 * The file will be used as an shm file so shmfs accounting rules
+	 * apply
+	 */
+	HUGETLB_SHMFS_INODE     = 1,
+};
+
 #ifdef CONFIG_HUGETLBFS
 struct hugetlbfs_config {
 	uid_t   uid;
@@ -150,7 +158,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 extern const struct file_operations hugetlbfs_file_operations;
 extern struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
-						struct user_struct **user);
+				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
 void hugetlb_put_quota(struct address_space *mapping, long delta);
 
@@ -172,7 +180,7 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acct,user)	ERR_PTR(-ENOSYS)
+#define hugetlb_file_setup(name,size,acct,user,creat)	ERR_PTR(-ENOSYS)
 
 #endif /* !CONFIG_HUGETLBFS */
 
diff --git a/ipc/shm.c b/ipc/shm.c
index 30162a59621a..9eb1488b543b 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -370,7 +370,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
 		if (shmflg & SHM_NORESERVE)
 			acctflag = VM_NORESERVE;
 		file = hugetlb_file_setup(name, size, acctflag,
-							&shp->mlock_user);
+					&shp->mlock_user, HUGETLB_SHMFS_INODE);
 	} else {
 		/*
 		 * Do not allow no accounting for OVERCOMMIT_NEVER, even
-- 
cgit v1.2.3


From 90f72aa58bbf076b68e289fbd71eb829bc505923 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 21 Sep 2009 17:03:45 -0700
Subject: mm: add MAP_HUGETLB for mmaping pseudo-anonymous huge page regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to user space.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

The patch also adds the MAP_STACK flag, which was previously defined only
on some architectures but not on others.  Since MAP_STACK is meant to be a
hint only, architectures can define it without assigning a specific
meaning to it.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: David Rientjes <rientjes@google.com>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/mman.h   | 2 ++
 arch/arm/include/asm/mman.h     | 2 ++
 arch/avr32/include/asm/mman.h   | 2 ++
 arch/cris/include/asm/mman.h    | 2 ++
 arch/frv/include/asm/mman.h     | 2 ++
 arch/h8300/include/asm/mman.h   | 2 ++
 arch/ia64/include/asm/mman.h    | 2 ++
 arch/m32r/include/asm/mman.h    | 2 ++
 arch/m68k/include/asm/mman.h    | 2 ++
 arch/mips/include/asm/mman.h    | 2 ++
 arch/mn10300/include/asm/mman.h | 2 ++
 arch/parisc/include/asm/mman.h  | 2 ++
 arch/powerpc/include/asm/mman.h | 2 ++
 arch/s390/include/asm/mman.h    | 2 ++
 arch/sparc/include/asm/mman.h   | 2 ++
 arch/xtensa/include/asm/mman.h  | 2 ++
 include/asm-generic/mman.h      | 1 +
 17 files changed, 33 insertions(+)

(limited to 'include')

diff --git a/arch/alpha/include/asm/mman.h b/arch/alpha/include/asm/mman.h
index c77c55756a7c..99c56d47879d 100644
--- a/arch/alpha/include/asm/mman.h
+++ b/arch/alpha/include/asm/mman.h
@@ -28,6 +28,8 @@
 #define MAP_NORESERVE	0x10000		/* don't check for reservations */
 #define MAP_POPULATE	0x20000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x40000		/* do not block on IO */
+#define MAP_STACK	0x80000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x100000	/* create a huge page mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h
index fc26976d8e3a..6464d471bc70 100644
--- a/arch/arm/include/asm/mman.h
+++ b/arch/arm/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/avr32/include/asm/mman.h b/arch/avr32/include/asm/mman.h
index 9a92b15f6a66..38cea1b597c2 100644
--- a/arch/avr32/include/asm/mman.h
+++ b/arch/avr32/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) page tables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/cris/include/asm/mman.h b/arch/cris/include/asm/mman.h
index b7f0afba3ce0..de6b903b22cd 100644
--- a/arch/cris/include/asm/mman.h
+++ b/arch/cris/include/asm/mman.h
@@ -12,6 +12,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/frv/include/asm/mman.h b/arch/frv/include/asm/mman.h
index 58c1d11e2ac7..1939343322bb 100644
--- a/arch/frv/include/asm/mman.h
+++ b/arch/frv/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/h8300/include/asm/mman.h b/arch/h8300/include/asm/mman.h
index cf35f0a6f12e..eacacd04032e 100644
--- a/arch/h8300/include/asm/mman.h
+++ b/arch/h8300/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h
index 48cf8b98a0b4..cf55884e7f39 100644
--- a/arch/ia64/include/asm/mman.h
+++ b/arch/ia64/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x04000		/* don't check for reservations */
 #define MAP_POPULATE	0x08000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m32r/include/asm/mman.h b/arch/m32r/include/asm/mman.h
index 04a5f40aa401..d191089808f4 100644
--- a/arch/m32r/include/asm/mman.h
+++ b/arch/m32r/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/m68k/include/asm/mman.h b/arch/m68k/include/asm/mman.h
index 9f5c4c4b3c7b..c421fef55f5e 100644
--- a/arch/m68k/include/asm/mman.h
+++ b/arch/m68k/include/asm/mman.h
@@ -10,6 +10,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/mips/include/asm/mman.h b/arch/mips/include/asm/mman.h
index f15554d1518a..a2250f390a29 100644
--- a/arch/mips/include/asm/mman.h
+++ b/arch/mips/include/asm/mman.h
@@ -46,6 +46,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/arch/mn10300/include/asm/mman.h b/arch/mn10300/include/asm/mman.h
index d04fac1da5aa..94611c356bb4 100644
--- a/arch/mn10300/include/asm/mman.h
+++ b/arch/mn10300/include/asm/mman.h
@@ -21,6 +21,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h
index a12d9d43f507..9749c8afe83a 100644
--- a/arch/parisc/include/asm/mman.h
+++ b/arch/parisc/include/asm/mman.h
@@ -22,6 +22,8 @@
 #define MAP_GROWSDOWN	0x8000		/* stack-like segment */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 7b1c49811a24..d4a7f645c5db 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -25,6 +25,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifdef CONFIG_PPC64
diff --git a/arch/s390/include/asm/mman.h b/arch/s390/include/asm/mman.h
index f63fe7b431ed..22714ca181ad 100644
--- a/arch/s390/include/asm/mman.h
+++ b/arch/s390/include/asm/mman.h
@@ -18,6 +18,8 @@
 #define MAP_NORESERVE	0x4000		/* don't check for reservations */
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h
index 988192e8e956..c3029ad6619a 100644
--- a/arch/sparc/include/asm/mman.h
+++ b/arch/sparc/include/asm/mman.h
@@ -20,6 +20,8 @@
 
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
+#define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
diff --git a/arch/xtensa/include/asm/mman.h b/arch/xtensa/include/asm/mman.h
index 6e55b4d1f9c5..fca4db425f6e 100644
--- a/arch/xtensa/include/asm/mman.h
+++ b/arch/xtensa/include/asm/mman.h
@@ -53,6 +53,8 @@
 #define MAP_LOCKED	0x8000		/* pages are locked */
 #define MAP_POPULATE	0x10000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x20000		/* do not block on IO */
+#define MAP_STACK	0x40000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x80000		/* create a huge page mapping */
 
 /*
  * Flags for msync
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 7cab4de2bca6..32c8bd6a196d 100644
--- a/include/asm-generic/mman.h
+++ b/include/asm-generic/mman.h
@@ -11,6 +11,7 @@
 #define MAP_POPULATE	0x8000		/* populate (prefault) pagetables */
 #define MAP_NONBLOCK	0x10000		/* do not block on IO */
 #define MAP_STACK	0x20000		/* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
 #define MCL_FUTURE	2		/* lock all future mappings */
-- 
cgit v1.2.3


From 4e52780d41a741fb4861ae1df2413dd816ec11b1 Mon Sep 17 00:00:00 2001
From: Eric B Munson <ebmunson@us.ibm.com>
Date: Mon, 21 Sep 2009 17:03:47 -0700
Subject: hugetlb: add MAP_HUGETLB for mmaping pseudo-anonymous huge page
 regions

Add a flag for mmap that will be used to request a huge page region that
will look like anonymous memory to userspace.  This is accomplished by
using a file on the internal vfsmount.  MAP_HUGETLB is a modifier of
MAP_ANONYMOUS and so must be specified with it.  The region will behave
the same as a MAP_ANONYMOUS region using small pages.

[akpm@linux-foundation.org: fix arch definitions of MAP_HUGETLB]
Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Acked-by: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Adam Litke <agl@us.ibm.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h |  7 +++++++
 mm/mmap.c               | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f6505ad86657..176e7ee73eff 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -112,12 +112,19 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 
+#define HUGETLB_ANON_FILE "anon_hugepage"
+
 enum {
 	/*
 	 * The file will be used as an shm file so shmfs accounting rules
 	 * apply
 	 */
 	HUGETLB_SHMFS_INODE     = 1,
+	/*
+	 * The file is being created on the internal vfs mount and shmfs
+	 * accounting rules do not apply
+	 */
+	HUGETLB_ANONHUGE_INODE  = 2,
 };
 
 #ifdef CONFIG_HUGETLBFS
diff --git a/mm/mmap.c b/mm/mmap.c
index 1aeef6625e6a..21d4029a07b3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -949,6 +949,24 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	if (mm->map_count > sysctl_max_map_count)
 		return -ENOMEM;
 
+	if (flags & MAP_HUGETLB) {
+		struct user_struct *user = NULL;
+		if (file)
+			return -EINVAL;
+
+		/*
+		 * VM_NORESERVE is used because the reservations will be
+		 * taken when vm_ops->mmap() is called
+		 * A dummy user value is used because we are not locking
+		 * memory so no accounting is necessary
+		 */
+		len = ALIGN(len, huge_page_size(&default_hstate));
+		file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+						&user, HUGETLB_ANONHUGE_INODE);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+	}
+
 	/* Obtain the address to map to. we verify (or select) it and ensure
 	 * that it represents a valid section of the address space.
 	 */
-- 
cgit v1.2.3


From 3d2d827f5ca5e32816194119d5c980c7e04474a6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 21 Sep 2009 17:03:51 -0700
Subject: mm: move use_mm/unuse_mm from aio.c to mm/

Anyone who wants to do copy to/from user from a kernel thread, needs
use_mm (like what fs/aio has).  Move that into mm/, to make reusing and
exporting easier down the line, and make aio use it.  Next intended user,
besides aio, will be vhost-net.

Acked-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c                    | 47 +-------------------------------------
 include/linux/mmu_context.h |  9 ++++++++
 mm/Makefile                 |  2 +-
 mm/mmu_context.c            | 55 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 47 deletions(-)
 create mode 100644 include/linux/mmu_context.h
 create mode 100644 mm/mmu_context.c

(limited to 'include')

diff --git a/fs/aio.c b/fs/aio.c
index d065b2c3273e..fc21c23b2387 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -24,6 +24,7 @@
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/mmu_context.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
@@ -34,7 +35,6 @@
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
-#include <asm/mmu_context.h>
 
 #if DEBUG > 1
 #define dprintk		printk
@@ -594,51 +594,6 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 	return ret;
 }
 
-/*
- * use_mm
- *	Makes the calling kernel thread take on the specified
- *	mm context.
- *	Called by the retry thread execute retries within the
- *	iocb issuer's mm context, so that copy_from/to_user
- *	operations work seamlessly for aio.
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void use_mm(struct mm_struct *mm)
-{
-	struct mm_struct *active_mm;
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	active_mm = tsk->active_mm;
-	atomic_inc(&mm->mm_count);
-	tsk->mm = mm;
-	tsk->active_mm = mm;
-	switch_mm(active_mm, mm, tsk);
-	task_unlock(tsk);
-
-	mmdrop(active_mm);
-}
-
-/*
- * unuse_mm
- *	Reverses the effect of use_mm, i.e. releases the
- *	specified mm context which was earlier taken on
- *	by the calling kernel thread
- *	(Note: this routine is intended to be called only
- *	from a kernel thread context)
- */
-static void unuse_mm(struct mm_struct *mm)
-{
-	struct task_struct *tsk = current;
-
-	task_lock(tsk);
-	tsk->mm = NULL;
-	/* active_mm is still 'mm' */
-	enter_lazy_tlb(mm, tsk);
-	task_unlock(tsk);
-}
-
 /*
  * Queue up a kiocb to be retried. Assumes that the kiocb
  * has already been marked as kicked, and places it on
diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h
new file mode 100644
index 000000000000..70fffeba7495
--- /dev/null
+++ b/include/linux/mmu_context.h
@@ -0,0 +1,9 @@
+#ifndef _LINUX_MMU_CONTEXT_H
+#define _LINUX_MMU_CONTEXT_H
+
+struct mm_struct;
+
+void use_mm(struct mm_struct *mm);
+void unuse_mm(struct mm_struct *mm);
+
+#endif
diff --git a/mm/Makefile b/mm/Makefile
index a63bf59a0c77..728a9fde49d1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,7 +11,7 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
 obj-y += init-mm.o
 
 obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
new file mode 100644
index 000000000000..fd473b51c903
--- /dev/null
+++ b/mm/mmu_context.c
@@ -0,0 +1,55 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ *
+ * See ../COPYING for licensing terms.
+ */
+
+#include <linux/mm.h>
+#include <linux/mmu_context.h>
+#include <linux/sched.h>
+
+#include <asm/mmu_context.h>
+
+/*
+ * use_mm
+ *	Makes the calling kernel thread take on the specified
+ *	mm context.
+ *	Called by the retry thread execute retries within the
+ *	iocb issuer's mm context, so that copy_from/to_user
+ *	operations work seamlessly for aio.
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void use_mm(struct mm_struct *mm)
+{
+	struct mm_struct *active_mm;
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	active_mm = tsk->active_mm;
+	atomic_inc(&mm->mm_count);
+	tsk->mm = mm;
+	tsk->active_mm = mm;
+	switch_mm(active_mm, mm, tsk);
+	task_unlock(tsk);
+
+	mmdrop(active_mm);
+}
+
+/*
+ * unuse_mm
+ *	Reverses the effect of use_mm, i.e. releases the
+ *	specified mm context which was earlier taken on
+ *	by the calling kernel thread
+ *	(Note: this routine is intended to be called only
+ *	from a kernel thread context)
+ */
+void unuse_mm(struct mm_struct *mm)
+{
+	struct task_struct *tsk = current;
+
+	task_lock(tsk);
+	tsk->mm = NULL;
+	/* active_mm is still 'mm' */
+	enter_lazy_tlb(mm, tsk);
+	task_unlock(tsk);
+}
-- 
cgit v1.2.3


From 69d25870f20c4b2563304f2b79c5300dd60a067e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@infradead.org>
Date: Mon, 21 Sep 2009 17:04:08 -0700
Subject: cpuidle: fix the menu governor to boost IO performance

Fix the menu idle governor which balances power savings, energy efficiency
and performance impact.

The reason for a reworked governor is that there have been serious
performance issues reported with the existing code on Nehalem server
systems.

To show this I'm sure Andrew wants to see benchmark results:
(benchmark is "fio", "no cstates" is using "idle=poll")

		no cstates	current linux	new algorithm
1 disk		107 Mb/s	85 Mb/s		105 Mb/s
2 disks		215 Mb/s	123 Mb/s	209 Mb/s
12 disks	590 Mb/s	320 Mb/s	585 Mb/s

In various power benchmark measurements, no degredation was found by our
measurement&diagnostics team.  Obviously a small percentage more power was
used in the "fio" benchmark, due to the much higher performance.

While it would be a novel idea to describe the new algorithm in this
commit message, I cheaped out and described it in comments in the code
instead.

[changes since first post: spelling fixes from akpm, review feedback,
folded menu-tng into menu.c]

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/cpuidle/governors/menu.c | 251 +++++++++++++++++++++++++++++++++------
 include/linux/sched.h            |   4 +
 kernel/sched.c                   |  13 ++
 3 files changed, 229 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f1df59f59a37..9f3d77532ab9 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -2,8 +2,12 @@
  * menu.c - the menu idle governor
  *
  * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
+ * Author:
+ *        Arjan van de Ven <arjan@linux.intel.com>
  *
- * This code is licenced under the GPL.
+ * This code is licenced under the GPL version 2 as described
+ * in the COPYING file that acompanies the Linux Kernel.
  */
 
 #include <linux/kernel.h>
@@ -13,20 +17,153 @@
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
+#include <linux/sched.h>
 
-#define BREAK_FUZZ	4	/* 4 us */
-#define PRED_HISTORY_PCT	50
+#define BUCKETS 12
+#define RESOLUTION 1024
+#define DECAY 4
+#define MAX_INTERESTING 50000
+
+/*
+ * Concepts and ideas behind the menu governor
+ *
+ * For the menu governor, there are 3 decision factors for picking a C
+ * state:
+ * 1) Energy break even point
+ * 2) Performance impact
+ * 3) Latency tolerance (from pmqos infrastructure)
+ * These these three factors are treated independently.
+ *
+ * Energy break even point
+ * -----------------------
+ * C state entry and exit have an energy cost, and a certain amount of time in
+ * the  C state is required to actually break even on this cost. CPUIDLE
+ * provides us this duration in the "target_residency" field. So all that we
+ * need is a good prediction of how long we'll be idle. Like the traditional
+ * menu governor, we start with the actual known "next timer event" time.
+ *
+ * Since there are other source of wakeups (interrupts for example) than
+ * the next timer event, this estimation is rather optimistic. To get a
+ * more realistic estimate, a correction factor is applied to the estimate,
+ * that is based on historic behavior. For example, if in the past the actual
+ * duration always was 50% of the next timer tick, the correction factor will
+ * be 0.5.
+ *
+ * menu uses a running average for this correction factor, however it uses a
+ * set of factors, not just a single factor. This stems from the realization
+ * that the ratio is dependent on the order of magnitude of the expected
+ * duration; if we expect 500 milliseconds of idle time the likelihood of
+ * getting an interrupt very early is much higher than if we expect 50 micro
+ * seconds of idle time. A second independent factor that has big impact on
+ * the actual factor is if there is (disk) IO outstanding or not.
+ * (as a special twist, we consider every sleep longer than 50 milliseconds
+ * as perfect; there are no power gains for sleeping longer than this)
+ *
+ * For these two reasons we keep an array of 12 independent factors, that gets
+ * indexed based on the magnitude of the expected duration as well as the
+ * "is IO outstanding" property.
+ *
+ * Limiting Performance Impact
+ * ---------------------------
+ * C states, especially those with large exit latencies, can have a real
+ * noticable impact on workloads, which is not acceptable for most sysadmins,
+ * and in addition, less performance has a power price of its own.
+ *
+ * As a general rule of thumb, menu assumes that the following heuristic
+ * holds:
+ *     The busier the system, the less impact of C states is acceptable
+ *
+ * This rule-of-thumb is implemented using a performance-multiplier:
+ * If the exit latency times the performance multiplier is longer than
+ * the predicted duration, the C state is not considered a candidate
+ * for selection due to a too high performance impact. So the higher
+ * this multiplier is, the longer we need to be idle to pick a deep C
+ * state, and thus the less likely a busy CPU will hit such a deep
+ * C state.
+ *
+ * Two factors are used in determing this multiplier:
+ * a value of 10 is added for each point of "per cpu load average" we have.
+ * a value of 5 points is added for each process that is waiting for
+ * IO on this CPU.
+ * (these values are experimentally determined)
+ *
+ * The load average factor gives a longer term (few seconds) input to the
+ * decision, while the iowait value gives a cpu local instantanious input.
+ * The iowait factor may look low, but realize that this is also already
+ * represented in the system load average.
+ *
+ */
 
 struct menu_device {
 	int		last_state_idx;
 
 	unsigned int	expected_us;
-	unsigned int	predicted_us;
-	unsigned int    current_predicted_us;
-	unsigned int	last_measured_us;
-	unsigned int	elapsed_us;
+	u64		predicted_us;
+	unsigned int	measured_us;
+	unsigned int	exit_us;
+	unsigned int	bucket;
+	u64		correction_factor[BUCKETS];
 };
 
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static int get_loadavg(void)
+{
+	unsigned long this = this_cpu_load();
+
+
+	return LOAD_INT(this) * 10 + LOAD_FRAC(this) / 10;
+}
+
+static inline int which_bucket(unsigned int duration)
+{
+	int bucket = 0;
+
+	/*
+	 * We keep two groups of stats; one with no
+	 * IO pending, one without.
+	 * This allows us to calculate
+	 * E(duration)|iowait
+	 */
+	if (nr_iowait_cpu())
+		bucket = BUCKETS/2;
+
+	if (duration < 10)
+		return bucket;
+	if (duration < 100)
+		return bucket + 1;
+	if (duration < 1000)
+		return bucket + 2;
+	if (duration < 10000)
+		return bucket + 3;
+	if (duration < 100000)
+		return bucket + 4;
+	return bucket + 5;
+}
+
+/*
+ * Return a multiplier for the exit latency that is intended
+ * to take performance requirements into account.
+ * The more performance critical we estimate the system
+ * to be, the higher this multiplier, and thus the higher
+ * the barrier to go to an expensive C state.
+ */
+static inline int performance_multiplier(void)
+{
+	int mult = 1;
+
+	/* for higher loadavg, we are more reluctant */
+
+	mult += 2 * get_loadavg();
+
+	/* for IO wait tasks (per cpu!) we add 5x each */
+	mult += 10 * nr_iowait_cpu();
+
+	return mult;
+}
+
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
 
 /**
@@ -38,37 +175,59 @@ static int menu_select(struct cpuidle_device *dev)
 	struct menu_device *data = &__get_cpu_var(menu_devices);
 	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
 	int i;
+	int multiplier;
+
+	data->last_state_idx = 0;
+	data->exit_us = 0;
 
 	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0)) {
-		data->last_state_idx = 0;
+	if (unlikely(latency_req == 0))
 		return 0;
-	}
 
-	/* determine the expected residency time */
+	/* determine the expected residency time, round up */
 	data->expected_us =
-		(u32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
+	    DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000);
+
+
+	data->bucket = which_bucket(data->expected_us);
+
+	multiplier = performance_multiplier();
+
+	/*
+	 * if the correction factor is 0 (eg first time init or cpu hotplug
+	 * etc), we actually want to start out with a unity factor.
+	 */
+	if (data->correction_factor[data->bucket] == 0)
+		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
+
+	/* Make sure to round up for half microseconds */
+	data->predicted_us = DIV_ROUND_CLOSEST(
+		data->expected_us * data->correction_factor[data->bucket],
+		RESOLUTION * DECAY);
+
+	/*
+	 * We want to default to C1 (hlt), not to busy polling
+	 * unless the timer is happening really really soon.
+	 */
+	if (data->expected_us > 5)
+		data->last_state_idx = CPUIDLE_DRIVER_STATE_START;
 
-	/* Recalculate predicted_us based on prediction_history_pct */
-	data->predicted_us *= PRED_HISTORY_PCT;
-	data->predicted_us += (100 - PRED_HISTORY_PCT) *
-				data->current_predicted_us;
-	data->predicted_us /= 100;
 
 	/* find the deepest idle state that satisfies our constraints */
-	for (i = CPUIDLE_DRIVER_STATE_START + 1; i < dev->state_count; i++) {
+	for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) {
 		struct cpuidle_state *s = &dev->states[i];
 
-		if (s->target_residency > data->expected_us)
-			break;
 		if (s->target_residency > data->predicted_us)
 			break;
 		if (s->exit_latency > latency_req)
 			break;
+		if (s->exit_latency * multiplier > data->predicted_us)
+			break;
+		data->exit_us = s->exit_latency;
+		data->last_state_idx = i;
 	}
 
-	data->last_state_idx = i - 1;
-	return i - 1;
+	return data->last_state_idx;
 }
 
 /**
@@ -85,35 +244,49 @@ static void menu_reflect(struct cpuidle_device *dev)
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &dev->states[last_idx];
 	unsigned int measured_us;
+	u64 new_factor;
 
 	/*
 	 * Ugh, this idle state doesn't support residency measurements, so we
 	 * are basically lost in the dark.  As a compromise, assume we slept
-	 * for one full standard timer tick.  However, be aware that this
-	 * could potentially result in a suboptimal state transition.
+	 * for the whole expected time.
 	 */
 	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID)))
-		last_idle_us = USEC_PER_SEC / HZ;
+		last_idle_us = data->expected_us;
+
+
+	measured_us = last_idle_us;
 
 	/*
-	 * measured_us and elapsed_us are the cumulative idle time, since the
-	 * last time we were woken out of idle by an interrupt.
+	 * We correct for the exit latency; we are assuming here that the
+	 * exit latency happens after the event that we're interested in.
 	 */
-	if (data->elapsed_us <= data->elapsed_us + last_idle_us)
-		measured_us = data->elapsed_us + last_idle_us;
+	if (measured_us > data->exit_us)
+		measured_us -= data->exit_us;
+
+
+	/* update our correction ratio */
+
+	new_factor = data->correction_factor[data->bucket]
+			* (DECAY - 1) / DECAY;
+
+	if (data->expected_us > 0 && data->measured_us < MAX_INTERESTING)
+		new_factor += RESOLUTION * measured_us / data->expected_us;
 	else
-		measured_us = -1;
+		/*
+		 * we were idle so long that we count it as a perfect
+		 * prediction
+		 */
+		new_factor += RESOLUTION;
 
-	/* Predict time until next break event */
-	data->current_predicted_us = max(measured_us, data->last_measured_us);
+	/*
+	 * We don't want 0 as factor; we always want at least
+	 * a tiny bit of estimated time.
+	 */
+	if (new_factor == 0)
+		new_factor = 1;
 
-	if (last_idle_us + BREAK_FUZZ <
-	    data->expected_us - target->exit_latency) {
-		data->last_measured_us = measured_us;
-		data->elapsed_us = 0;
-	} else {
-		data->elapsed_us = measured_us;
-	}
+	data->correction_factor[data->bucket] = new_factor;
 }
 
 /**
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17e9a8e9a51d..97b10da0a3ea 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -140,6 +140,10 @@ extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
+extern unsigned long nr_iowait_cpu(void);
+extern unsigned long this_cpu_load(void);
+
+
 extern void calc_global_load(void);
 extern u64 cpu_nr_migrations(int cpu);
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 91843ba7f237..0ac9053c21d6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2904,6 +2904,19 @@ unsigned long nr_iowait(void)
 	return sum;
 }
 
+unsigned long nr_iowait_cpu(void)
+{
+	struct rq *this = this_rq();
+	return atomic_read(&this->nr_iowait);
+}
+
+unsigned long this_cpu_load(void)
+{
+	struct rq *this = this_rq();
+	return this->cpu_load[0];
+}
+
+
 /* Variables and functions for calc_load */
 static atomic_long_t calc_load_tasks;
 static unsigned long calc_load_update;
-- 
cgit v1.2.3


From e6de3988aa52debb25a427d085061f3bf1181d54 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:30 -0700
Subject: flex_array: add flex_array_clear function

Add a new function to the flex_array API:

	int flex_array_clear(struct flex_array *fa,
				unsigned int element_nr)

This function will zero the element at element_nr in the flex_array.

Although this is equivalent to using flex_array_put() and passing a
pointer to zero'd memory, flex_array_clear() does not require such a
pointer to memory that would most likely need to be allocated on the
caller's stack which could be significantly large depending on
element_size.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  1 +
 lib/flex_array.c           | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 45ff18491514..3887b21f883f 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -44,6 +44,7 @@ void flex_array_free(struct flex_array *fa);
 void flex_array_free_parts(struct flex_array *fa);
 int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 7baed2fc3bc8..b68f99be4080 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -206,6 +206,32 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 	return 0;
 }
 
+/**
+ * flex_array_clear - clear element in array at @element_nr
+ * @element_nr:	index of the position to clear.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
+{
+	int part_nr = fa_element_to_part_nr(fa, element_nr);
+	struct flex_array_part *part;
+	void *dst;
+
+	if (element_nr >= fa->total_nr_elements)
+		return -ENOSPC;
+	if (elements_fit_in_base(fa))
+		part = (struct flex_array_part *)&fa->parts[0];
+	else {
+		part = fa->parts[part_nr];
+		if (!part)
+			return -EINVAL;
+	}
+	dst = &part->elements[index_inside_part(fa, element_nr)];
+	memset(dst, 0, fa->element_size);
+	return 0;
+}
+
 /**
  * flex_array_prealloc - guarantee that array space exists
  * @start:	index of first array element for which space is allocated
-- 
cgit v1.2.3


From 19da3dd157f8db6fe727ff268dab4791d55a6371 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: poison free elements

Newly initialized flex_array's and/or flex_array_part's are now poisoned
with a new poison value, FLEX_ARRAY_FREE.  It's value is similar to
POISON_FREE used in the various slab allocators, but is different to
distinguish between flex array's poisoned kmem and slab allocator poisoned
kmem.

This will allow us to identify flex_array_part's that only contain free
elements (and free them with an addition to the flex_array API).  This
could also be extended in the future to identify `get' uses on elements
that have not been `put'.

If __GFP_ZERO is passed for a part's gfp mask, the poisoning is avoided.
These elements are considered to be in-use since they have been
initialized.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/poison.h |  3 +++
 lib/flex_array.c       | 15 +++++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/poison.h b/include/linux/poison.h
index 6729f7dcd60e..7fc194aef8c2 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -65,6 +65,9 @@
 #define MUTEX_DEBUG_INIT	0x11
 #define MUTEX_DEBUG_FREE	0x22
 
+/********** lib/flex_array.c **********/
+#define FLEX_ARRAY_FREE	0x6c	/* for use-after-free poisoning */
+
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
diff --git a/lib/flex_array.c b/lib/flex_array.c
index b68f99be4080..e22d0e9776aa 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -113,6 +113,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		return NULL;
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
+	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
+		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
 	return ret;
 }
 
@@ -159,15 +161,12 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags)
 {
 	struct flex_array_part *part = fa->parts[part_nr];
 	if (!part) {
-		/*
-		 * This leaves the part pages uninitialized
-		 * and with potentially random data, just
-		 * as if the user had kmalloc()'d the whole.
-		 * __GFP_ZERO can be used to zero it.
-		 */
-		part = kmalloc(FLEX_ARRAY_PART_SIZE, flags);
+		part = kmalloc(sizeof(struct flex_array_part), flags);
 		if (!part)
 			return NULL;
+		if (!(flags & __GFP_ZERO))
+			memset(part, FLEX_ARRAY_FREE,
+				sizeof(struct flex_array_part));
 		fa->parts[part_nr] = part;
 	}
 	return part;
@@ -228,7 +227,7 @@ int flex_array_clear(struct flex_array *fa, unsigned int element_nr)
 			return -EINVAL;
 	}
 	dst = &part->elements[index_inside_part(fa, element_nr)];
-	memset(dst, 0, fa->element_size);
+	memset(dst, FLEX_ARRAY_FREE, fa->element_size);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 4af5a2f770cc8575840ccb1514ec76ecb592985c Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:31 -0700
Subject: flex_array: add flex_array_shrink function

Add a new function to the flex_array API:

	int flex_array_shrink(struct flex_array *fa)

This function will free all unused second-level pages.  Since elements are
now poisoned if they are not allocated with __GFP_ZERO, it's possible to
identify parts that consist solely of unused elements.

flex_array_shrink() returns the number of pages freed.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h |  1 +
 lib/flex_array.c           | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 3887b21f883f..f12401e485fe 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -46,5 +46,6 @@ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src,
 		gfp_t flags);
 int flex_array_clear(struct flex_array *fa, unsigned int element_nr);
 void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
+int flex_array_shrink(struct flex_array *fa);
 
 #endif /* _FLEX_ARRAY_H */
diff --git a/lib/flex_array.c b/lib/flex_array.c
index e22d0e9776aa..1b03bb553410 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -291,3 +291,43 @@ void *flex_array_get(struct flex_array *fa, unsigned int element_nr)
 	}
 	return &part->elements[index_inside_part(fa, element_nr)];
 }
+
+static int part_is_free(struct flex_array_part *part)
+{
+	int i;
+
+	for (i = 0; i < sizeof(struct flex_array_part); i++)
+		if (part->elements[i] != FLEX_ARRAY_FREE)
+			return 0;
+	return 1;
+}
+
+/**
+ * flex_array_shrink - free unused second-level pages
+ *
+ * Frees all second-level pages that consist solely of unused
+ * elements.  Returns the number of pages freed.
+ *
+ * Locking must be provided by the caller.
+ */
+int flex_array_shrink(struct flex_array *fa)
+{
+	struct flex_array_part *part;
+	int max_part = nr_base_part_ptrs();
+	int part_nr;
+	int ret = 0;
+
+	if (elements_fit_in_base(fa))
+		return ret;
+	for (part_nr = 0; part_nr < max_part; part_nr++) {
+		part = fa->parts[part_nr];
+		if (!part)
+			continue;
+		if (part_is_free(part)) {
+			fa->parts[part_nr] = NULL;
+			kfree(part);
+			ret++;
+		}
+	}
+	return ret;
+}
-- 
cgit v1.2.3


From 45b588d6e5cc172704bac0c998ce54873b149b22 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Mon, 21 Sep 2009 17:04:33 -0700
Subject: flex_array: introduce DEFINE_FLEX_ARRAY

FLEX_ARRAY_INIT(element_size, total_nr_elements) cannot determine if
either parameter is valid, so flex arrays which are statically allocated
with this interface can easily become corrupted or reference beyond its
allocated memory.

This removes FLEX_ARRAY_INIT() as a struct flex_array initializer since no
initializer may perform the required checking.  Instead, the array is now
defined with a new interface:

	DEFINE_FLEX_ARRAY(name, element_size, total_nr_elements)

This may be prefixed with `static' for file scope.

This interface includes compile-time checking of the parameters to ensure
they are valid.  Since the validity of both element_size and
total_nr_elements depend on FLEX_ARRAY_BASE_SIZE and FLEX_ARRAY_PART_SIZE,
the kernel build will fail if either of these predefined values changes
such that the array parameters are no longer valid.

Since BUILD_BUG_ON() requires compile time constants, several of the
static inline functions that were once local to lib/flex_array.c had to be
moved to include/linux/flex_array.h.

Signed-off-by: David Rientjes <rientjes@google.com>
Acked-by: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/flex_array.h | 30 ++++++++++++++++++++++++++----
 lib/flex_array.c           | 36 ++++++++++--------------------------
 2 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index f12401e485fe..1d747f72298b 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -31,10 +31,32 @@ struct flex_array {
 	};
 };
 
-#define FLEX_ARRAY_INIT(size, total) { { {\
-	.element_size = (size),		\
-	.total_nr_elements = (total),	\
-} } }
+/* Number of bytes left in base struct flex_array, excluding metadata */
+#define FLEX_ARRAY_BASE_BYTES_LEFT					\
+	(FLEX_ARRAY_BASE_SIZE - offsetof(struct flex_array, parts))
+
+/* Number of pointers in base to struct flex_array_part pages */
+#define FLEX_ARRAY_NR_BASE_PTRS						\
+	(FLEX_ARRAY_BASE_BYTES_LEFT / sizeof(struct flex_array_part *))
+
+/* Number of elements of size that fit in struct flex_array_part */
+#define FLEX_ARRAY_ELEMENTS_PER_PART(size)				\
+	(FLEX_ARRAY_PART_SIZE / size)
+
+/*
+ * Defines a statically allocated flex array and ensures its parameters are
+ * valid.
+ */
+#define DEFINE_FLEX_ARRAY(__arrayname, __element_size, __total)		\
+	struct flex_array __arrayname = { { {				\
+		.element_size = (__element_size),			\
+		.total_nr_elements = (__total),				\
+	} } };								\
+	static inline void __arrayname##_invalid_parameter(void)	\
+	{								\
+		BUILD_BUG_ON((__total) > FLEX_ARRAY_NR_BASE_PTRS *	\
+			FLEX_ARRAY_ELEMENTS_PER_PART(__element_size));	\
+	}
 
 struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 		gfp_t flags);
diff --git a/lib/flex_array.c b/lib/flex_array.c
index 1b03bb553410..b62ce6cffd0a 100644
--- a/lib/flex_array.c
+++ b/lib/flex_array.c
@@ -28,23 +28,6 @@ struct flex_array_part {
 	char elements[FLEX_ARRAY_PART_SIZE];
 };
 
-static inline int __elements_per_part(int element_size)
-{
-	return FLEX_ARRAY_PART_SIZE / element_size;
-}
-
-static inline int bytes_left_in_base(void)
-{
-	int element_offset = offsetof(struct flex_array, parts);
-	int bytes_left = FLEX_ARRAY_BASE_SIZE - element_offset;
-	return bytes_left;
-}
-
-static inline int nr_base_part_ptrs(void)
-{
-	return bytes_left_in_base() / sizeof(struct flex_array_part *);
-}
-
 /*
  * If a user requests an allocation which is small
  * enough, we may simply use the space in the
@@ -54,7 +37,7 @@ static inline int nr_base_part_ptrs(void)
 static inline int elements_fit_in_base(struct flex_array *fa)
 {
 	int data_size = fa->element_size * fa->total_nr_elements;
-	if (data_size <= bytes_left_in_base())
+	if (data_size <= FLEX_ARRAY_BASE_BYTES_LEFT)
 		return 1;
 	return 0;
 }
@@ -103,7 +86,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 					gfp_t flags)
 {
 	struct flex_array *ret;
-	int max_size = nr_base_part_ptrs() * __elements_per_part(element_size);
+	int max_size = FLEX_ARRAY_NR_BASE_PTRS *
+				FLEX_ARRAY_ELEMENTS_PER_PART(element_size);
 
 	/* max_size will end up 0 if element_size > PAGE_SIZE */
 	if (total > max_size)
@@ -114,14 +98,15 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total,
 	ret->element_size = element_size;
 	ret->total_nr_elements = total;
 	if (elements_fit_in_base(ret) && !(flags & __GFP_ZERO))
-		memset(ret->parts[0], FLEX_ARRAY_FREE, bytes_left_in_base());
+		memset(ret->parts[0], FLEX_ARRAY_FREE,
+						FLEX_ARRAY_BASE_BYTES_LEFT);
 	return ret;
 }
 
 static int fa_element_to_part_nr(struct flex_array *fa,
 					unsigned int element_nr)
 {
-	return element_nr / __elements_per_part(fa->element_size);
+	return element_nr / FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 }
 
 /**
@@ -133,11 +118,10 @@ static int fa_element_to_part_nr(struct flex_array *fa,
 void flex_array_free_parts(struct flex_array *fa)
 {
 	int part_nr;
-	int max_part = nr_base_part_ptrs();
 
 	if (elements_fit_in_base(fa))
 		return;
-	for (part_nr = 0; part_nr < max_part; part_nr++)
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++)
 		kfree(fa->parts[part_nr]);
 }
 
@@ -152,7 +136,8 @@ static unsigned int index_inside_part(struct flex_array *fa,
 {
 	unsigned int part_offset;
 
-	part_offset = element_nr % __elements_per_part(fa->element_size);
+	part_offset = element_nr %
+				FLEX_ARRAY_ELEMENTS_PER_PART(fa->element_size);
 	return part_offset * fa->element_size;
 }
 
@@ -313,13 +298,12 @@ static int part_is_free(struct flex_array_part *part)
 int flex_array_shrink(struct flex_array *fa)
 {
 	struct flex_array_part *part;
-	int max_part = nr_base_part_ptrs();
 	int part_nr;
 	int ret = 0;
 
 	if (elements_fit_in_base(fa))
 		return ret;
-	for (part_nr = 0; part_nr < max_part; part_nr++) {
+	for (part_nr = 0; part_nr < FLEX_ARRAY_NR_BASE_PTRS; part_nr++) {
 		part = fa->parts[part_nr];
 		if (!part)
 			continue;
-- 
cgit v1.2.3


From 0ec48915e8bbb37dea3df85c41e4c3498b95664b Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:42 -0700
Subject: lis3: fix typo

Bit 0x80 in CTRL_REG3 is an ACTIVE_LOW rather than an ACTIVE_HIGH
function, I got that wrong during my last change.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lis3lv02d.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index ad651f4e45ac..113778b5df36 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -32,7 +32,7 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ2_DATA_READY	(4 << 3)
 #define LIS3_IRQ2_CLICK		(7 << 3)
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
-#define LIS3_IRQ_ACTIVE_HIGH	(1 << 7)
+#define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
 };
 
-- 
cgit v1.2.3


From 8873c33483e62988ed886230aab71ef4c678f710 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 21 Sep 2009 17:04:43 -0700
Subject: lis3: add free-fall/wakeup function via platform_data

This offers a way for platforms to define flags and thresholds for the
free-fall/wakeup functions of the lis302d chips.

More registers needed to be seperated as they are specific to the

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Eric Piel <eric.piel@tremplin-utc.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/hwmon/lis3lv02d.c |  9 +++++++++
 drivers/hwmon/lis3lv02d.h | 24 ++++++++++++++++++------
 include/linux/lis3lv02d.h |  9 +++++++++
 3 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c
index 271338bdb6be..cf5afb9a10ab 100644
--- a/drivers/hwmon/lis3lv02d.c
+++ b/drivers/hwmon/lis3lv02d.c
@@ -454,6 +454,15 @@ int lis3lv02d_init_device(struct lis3lv02d *dev)
 					(p->click_thresh_y << 4));
 		}
 
+		if (p->wakeup_flags && (dev->whoami == LIS_SINGLE_ID)) {
+			dev->write(dev, FF_WU_CFG_1, p->wakeup_flags);
+			dev->write(dev, FF_WU_THS_1, p->wakeup_thresh & 0x7f);
+			/* default to 2.5ms for now */
+			dev->write(dev, FF_WU_DURATION_1, 1);
+			/* enable high pass filter for both free-fall units */
+			dev->write(dev, CTRL_REG2, HP_FF_WU1 | HP_FF_WU2);
+		}
+
 		if (p->irq_cfg)
 			dev->write(dev, CTRL_REG3, p->irq_cfg);
 	}
diff --git a/drivers/hwmon/lis3lv02d.h b/drivers/hwmon/lis3lv02d.h
index e320e2f511f1..3e1ff46f72d3 100644
--- a/drivers/hwmon/lis3lv02d.h
+++ b/drivers/hwmon/lis3lv02d.h
@@ -58,15 +58,17 @@ enum lis3_reg {
 	OUTZ_L		= 0x2C,
 	OUTZ_H		= 0x2D,
 	OUTZ		= 0x2D,
-	FF_WU_CFG	= 0x30,
-	FF_WU_SRC	= 0x31,
-	FF_WU_ACK	= 0x32,
-	FF_WU_THS_L	= 0x34,
-	FF_WU_THS_H	= 0x35,
-	FF_WU_DURATION	= 0x36,
 };
 
 enum lis302d_reg {
+	FF_WU_CFG_1	= 0x30,
+	FF_WU_SRC_1	= 0x31,
+	FF_WU_THS_1	= 0x32,
+	FF_WU_DURATION_1 = 0x33,
+	FF_WU_CFG_2	= 0x34,
+	FF_WU_SRC_2	= 0x35,
+	FF_WU_THS_2	= 0x36,
+	FF_WU_DURATION_2 = 0x37,
 	CLICK_CFG	= 0x38,
 	CLICK_SRC	= 0x39,
 	CLICK_THSY_X	= 0x3B,
@@ -77,6 +79,12 @@ enum lis302d_reg {
 };
 
 enum lis3lv02d_reg {
+	FF_WU_CFG	= 0x30,
+	FF_WU_SRC	= 0x31,
+	FF_WU_ACK	= 0x32,
+	FF_WU_THS_L	= 0x34,
+	FF_WU_THS_H	= 0x35,
+	FF_WU_DURATION	= 0x36,
 	DD_CFG		= 0x38,
 	DD_SRC		= 0x39,
 	DD_ACK		= 0x3A,
@@ -107,6 +115,10 @@ enum lis3lv02d_ctrl2 {
 	CTRL2_FS	= 0x80, /* Full Scale selection */
 };
 
+enum lis302d_ctrl2 {
+	HP_FF_WU2	= 0x08,
+	HP_FF_WU1	= 0x04,
+};
 
 enum lis3lv02d_ctrl3 {
 	CTRL3_CFS0	= 0x01,
diff --git a/include/linux/lis3lv02d.h b/include/linux/lis3lv02d.h
index 113778b5df36..3cc2f2c53e4c 100644
--- a/include/linux/lis3lv02d.h
+++ b/include/linux/lis3lv02d.h
@@ -34,6 +34,15 @@ struct lis3lv02d_platform_data {
 #define LIS3_IRQ_OPEN_DRAIN	(1 << 6)
 #define LIS3_IRQ_ACTIVE_LOW	(1 << 7)
 	unsigned char irq_cfg;
+
+#define LIS3_WAKEUP_X_LO	(1 << 0)
+#define LIS3_WAKEUP_X_HI	(1 << 1)
+#define LIS3_WAKEUP_Y_LO	(1 << 2)
+#define LIS3_WAKEUP_Y_HI	(1 << 3)
+#define LIS3_WAKEUP_Z_LO	(1 << 4)
+#define LIS3_WAKEUP_Z_HI	(1 << 5)
+	unsigned char wakeup_flags;
+	unsigned char wakeup_thresh;
 };
 
 #endif /* __LIS3LV02D_H_ */
-- 
cgit v1.2.3


From abd6633c67925f90775bb74755f9c547e30f1f20 Mon Sep 17 00:00:00 2001
From: David Härdeman <david@hardeman.nu>
Date: Mon, 21 Sep 2009 17:04:52 -0700
Subject: pnp: add a shutdown method to pnp drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shutdown method is used by the winbond cir driver to setup the
hardware for wake-from-S5.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: David Härdeman <david@hardeman.nu>
Cc: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pnp/driver.c | 10 ++++++++++
 include/linux/pnp.h  |  1 +
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index 527ee764c93f..cd11b113494f 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -135,6 +135,15 @@ static int pnp_device_remove(struct device *dev)
 	return 0;
 }
 
+static void pnp_device_shutdown(struct device *dev)
+{
+	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
+	struct pnp_driver *drv = pnp_dev->driver;
+
+	if (drv && drv->shutdown)
+		drv->shutdown(pnp_dev);
+}
+
 static int pnp_bus_match(struct device *dev, struct device_driver *drv)
 {
 	struct pnp_dev *pnp_dev = to_pnp_dev(dev);
@@ -203,6 +212,7 @@ struct bus_type pnp_bus_type = {
 	.match   = pnp_bus_match,
 	.probe   = pnp_device_probe,
 	.remove  = pnp_device_remove,
+	.shutdown = pnp_device_shutdown,
 	.suspend = pnp_bus_suspend,
 	.resume  = pnp_bus_resume,
 	.dev_attrs = pnp_interface_attrs,
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index b063c7328ba5..fddfafaed024 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -360,6 +360,7 @@ struct pnp_driver {
 	unsigned int flags;
 	int (*probe) (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
 	void (*remove) (struct pnp_dev *dev);
+	void (*shutdown) (struct pnp_dev *dev);
 	int (*suspend) (struct pnp_dev *dev, pm_message_t state);
 	int (*resume) (struct pnp_dev *dev);
 	struct device_driver driver;
-- 
cgit v1.2.3


From 6ef297f86b62f187c59475784208f75c2ed8ccd8 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Tue, 22 Sep 2009 14:29:36 +0100
Subject: ARM: 5720/1: Move MMCI header to amba include dir

This moves the mmci platform data definition struct away from
arch/arm/include/asm/mach/mmc.h into the more proper place among
the other primecells in include/linux/amba/mmci.h and at the same
time renames it to "mmci.h", and also the struct in this file
confusingly named mmc_platform_data has been renamed
mmci_platform_data for clarity.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/mach/mmc.h          | 18 ------------------
 arch/arm/mach-integrator/integrator_cp.c |  4 ++--
 arch/arm/mach-realview/core.c            |  6 +++---
 arch/arm/mach-realview/core.h            |  4 ++--
 arch/arm/mach-realview/realview_eb.c     |  2 +-
 arch/arm/mach-realview/realview_pb1176.c |  2 +-
 arch/arm/mach-realview/realview_pb11mp.c |  2 +-
 arch/arm/mach-realview/realview_pba8.c   |  2 +-
 arch/arm/mach-realview/realview_pbx.c    |  2 +-
 arch/arm/mach-u300/mmc.c                 |  4 ++--
 arch/arm/mach-versatile/core.c           |  4 ++--
 arch/arm/mach-versatile/versatile_pb.c   |  4 ++--
 drivers/mmc/host/mmci.c                  |  4 ++--
 drivers/mmc/host/mmci.h                  |  2 +-
 include/linux/amba/mmci.h                | 18 ++++++++++++++++++
 15 files changed, 39 insertions(+), 39 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/mmc.h
 create mode 100644 include/linux/amba/mmci.h

(limited to 'include')

diff --git a/arch/arm/include/asm/mach/mmc.h b/arch/arm/include/asm/mach/mmc.h
deleted file mode 100644
index 27bec555ee16..000000000000
--- a/arch/arm/include/asm/mach/mmc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/mmc.h
- */
-#ifndef ASMARM_MACH_MMC_H
-#define ASMARM_MACH_MMC_H
-
-#include <linux/mmc/host.h>
-
-struct mmc_platform_data {
-	unsigned int ocr_mask;			/* available voltages */
-	u32 (*translate_vdd)(struct device *, unsigned int);
-	unsigned int (*status)(struct device *);
-	int	gpio_wp;
-	int	gpio_cd;
-	unsigned long capabilities;
-};
-
-#endif
diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 2a318eba1b07..3f35293d457a 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -19,6 +19,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/kmi.h>
 #include <linux/amba/clcd.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/clkdev.h>
@@ -35,7 +36,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/map.h>
 #include <asm/mach/time.h>
 
@@ -400,7 +400,7 @@ static unsigned int mmc_status(struct device *dev)
 	return status & 8;
 }
 
-static struct mmc_platform_data mmc_data = {
+static struct mmci_platform_data mmc_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 5a5e1f72461e..a2083b60e3fb 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -30,6 +30,7 @@
 #include <linux/io.h>
 #include <linux/smsc911x.h>
 #include <linux/ata_platform.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/clkdev.h>
 #include <asm/system.h>
@@ -44,7 +45,6 @@
 #include <asm/mach/flash.h>
 #include <asm/mach/irq.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include <asm/hardware/gic.h>
 
@@ -237,14 +237,14 @@ static unsigned int realview_mmc_status(struct device *dev)
 	return readl(REALVIEW_SYSMCI) & mask;
 }
 
-struct mmc_platform_data realview_mmc0_plat_data = {
+struct mmci_platform_data realview_mmc0_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= realview_mmc_status,
 	.gpio_wp	= 17,
 	.gpio_cd	= 16,
 };
 
-struct mmc_platform_data realview_mmc1_plat_data = {
+struct mmci_platform_data realview_mmc1_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= realview_mmc_status,
 	.gpio_wp	= 19,
diff --git a/arch/arm/mach-realview/core.h b/arch/arm/mach-realview/core.h
index 59a337ba4be7..46cd6acb4d40 100644
--- a/arch/arm/mach-realview/core.h
+++ b/arch/arm/mach-realview/core.h
@@ -47,8 +47,8 @@ static struct amba_device name##_device = {			\
 extern struct platform_device realview_flash_device;
 extern struct platform_device realview_cf_device;
 extern struct platform_device realview_i2c_device;
-extern struct mmc_platform_data realview_mmc0_plat_data;
-extern struct mmc_platform_data realview_mmc1_plat_data;
+extern struct mmci_platform_data realview_mmc0_plat_data;
+extern struct mmci_platform_data realview_mmc1_plat_data;
 extern struct clcd_board clcd_plat_data;
 extern void __iomem *gic_cpu_base_addr;
 extern void __iomem *timer0_va_base;
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index c0795ea3b3a7..1d65e64ae571 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-eb.h>
diff --git a/arch/arm/mach-realview/realview_pb1176.c b/arch/arm/mach-realview/realview_pb1176.c
index 395dc18070b4..2817fe099319 100644
--- a/arch/arm/mach-realview/realview_pb1176.c
+++ b/arch/arm/mach-realview/realview_pb1176.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -37,7 +38,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb1176.h>
diff --git a/arch/arm/mach-realview/realview_pb11mp.c b/arch/arm/mach-realview/realview_pb11mp.c
index c0c9e35e6e38..94680fcf726d 100644
--- a/arch/arm/mach-realview/realview_pb11mp.c
+++ b/arch/arm/mach-realview/realview_pb11mp.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -38,7 +39,6 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/board-pb11mp.h>
diff --git a/arch/arm/mach-realview/realview_pba8.c b/arch/arm/mach-realview/realview_pba8.c
index 4fc64e146c32..941beb2b9709 100644
--- a/arch/arm/mach-realview/realview_pba8.c
+++ b/arch/arm/mach-realview/realview_pba8.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-realview/realview_pbx.c b/arch/arm/mach-realview/realview_pbx.c
index cf68b5426061..7e4bc6cdca52 100644
--- a/arch/arm/mach-realview/realview_pbx.c
+++ b/arch/arm/mach-realview/realview_pbx.c
@@ -23,6 +23,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <asm/irq.h>
@@ -34,7 +35,6 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 #include <asm/mach/time.h>
 
 #include <mach/hardware.h>
diff --git a/arch/arm/mach-u300/mmc.c b/arch/arm/mach-u300/mmc.c
index 82af247760e6..7b6b016786bb 100644
--- a/arch/arm/mach-u300/mmc.c
+++ b/arch/arm/mach-u300/mmc.c
@@ -19,8 +19,8 @@
 #include <linux/regulator/consumer.h>
 #include <linux/regulator/machine.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
 
-#include <asm/mach/mmc.h>
 #include "mmc.h"
 #include "padmux.h"
 
@@ -28,7 +28,7 @@ struct mmci_card_event {
 	struct input_dev *mmc_input;
 	int mmc_inserted;
 	struct work_struct workq;
-	struct mmc_platform_data mmc0_plat_data;
+	struct mmci_platform_data mmc0_plat_data;
 };
 
 static unsigned int mmc_status(struct device *dev)
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 975eae41ee66..e13be7c444ca 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -27,6 +27,7 @@
 #include <linux/amba/bus.h>
 #include <linux/amba/clcd.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
 #include <linux/cnt32_to_63.h>
@@ -47,7 +48,6 @@
 #include <asm/mach/irq.h>
 #include <asm/mach/time.h>
 #include <asm/mach/map.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 #include "clock.h"
@@ -369,7 +369,7 @@ unsigned int mmc_status(struct device *dev)
 	return readl(VERSATILE_SYSMCI) & mask;
 }
 
-static struct mmc_platform_data mmc0_plat_data = {
+static struct mmci_platform_data mmc0_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/arch/arm/mach-versatile/versatile_pb.c b/arch/arm/mach-versatile/versatile_pb.c
index 9af8d8154df5..239cd30fc4f5 100644
--- a/arch/arm/mach-versatile/versatile_pb.c
+++ b/arch/arm/mach-versatile/versatile_pb.c
@@ -24,6 +24,7 @@
 #include <linux/sysdev.h>
 #include <linux/amba/bus.h>
 #include <linux/amba/pl061.h>
+#include <linux/amba/mmci.h>
 #include <linux/io.h>
 
 #include <mach/hardware.h>
@@ -31,7 +32,6 @@
 #include <asm/mach-types.h>
 
 #include <asm/mach/arch.h>
-#include <asm/mach/mmc.h>
 
 #include "core.h"
 
@@ -41,7 +41,7 @@
 #define IRQ_MMCI1A	IRQ_SIC_MMCI1A
 #endif
 
-static struct mmc_platform_data mmc1_plat_data = {
+static struct mmci_platform_data mmc1_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
 	.gpio_wp	= -1,
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index bf7c05b29e2c..79205e565c07 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -22,12 +22,12 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/cacheflush.h>
 #include <asm/div64.h>
 #include <asm/io.h>
 #include <asm/sizes.h>
-#include <asm/mach/mmc.h>
 
 #include "mmci.h"
 
@@ -537,7 +537,7 @@ static void mmci_check_status(unsigned long data)
 
 static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 {
-	struct mmc_platform_data *plat = dev->dev.platform_data;
+	struct mmci_platform_data *plat = dev->dev.platform_data;
 	struct mmci_host *host;
 	struct mmc_host *mmc;
 	int ret;
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 839f264c9725..a7f9a51a0a3e 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -161,7 +161,7 @@ struct mmci_host {
 	unsigned int		mclk;
 	unsigned int		cclk;
 	u32			pwr;
-	struct mmc_platform_data *plat;
+	struct mmci_platform_data *plat;
 
 	u8			hw_designer;
 	u8			hw_revision:4;
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
new file mode 100644
index 000000000000..6b4241748dda
--- /dev/null
+++ b/include/linux/amba/mmci.h
@@ -0,0 +1,18 @@
+/*
+ *  include/linux/amba/mmci.h
+ */
+#ifndef AMBA_MMCI_H
+#define AMBA_MMCI_H
+
+#include <linux/mmc/host.h>
+
+struct mmci_platform_data {
+	unsigned int ocr_mask;			/* available voltages */
+	u32 (*translate_vdd)(struct device *, unsigned int);
+	unsigned int (*status)(struct device *);
+	int	gpio_wp;
+	int	gpio_cd;
+	unsigned long capabilities;
+};
+
+#endif
-- 
cgit v1.2.3


From 8cd09a5984c08dafade74c9c1ab4311af2bf2d24 Mon Sep 17 00:00:00 2001
From: Li Hong <lihong.hi@gmail.com>
Date: Tue, 22 Sep 2009 18:00:44 +0800
Subject: tracing: Fix a comment and a trivial format issue in tracepoint.h

Fix the tracepoint documentation path in tracepoints headers and
a misaligned tabulation.

Signed-off-by: Li Hong <lihong.hi@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@redhat.com>
LKML-Reference: <3a3680030909220300h7cf18849q4d4702b9d4feaa67@mail.gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tracepoint.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a80580..2aac8a83e89b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
@@ -36,7 +36,7 @@ struct tracepoint {
 #ifndef DECLARE_TRACE
 
 #define TP_PROTO(args...)	args
-#define TP_ARGS(args...)		args
+#define TP_ARGS(args...)	args
 
 #ifdef CONFIG_TRACEPOINTS
 
-- 
cgit v1.2.3


From 9961079348d43dddb1f21118c17f054f63bbaa05 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 22 Sep 2009 12:40:33 +1000
Subject: tracing/workqueue: Use %pf in workqueue trace events

Using %pf instead of %pF supresses printing of the function offset
which will always be 0 in the case of worklet functions.

Signed-off-by: Anton Blanchard <anton@samba.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Zhaolei <zhaolei@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <20090922024033.GB31801@kryten>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/trace/events/workqueue.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
index fcfd9a1e4b96..e4612dbd7ba6 100644
--- a/include/trace/events/workqueue.h
+++ b/include/trace/events/workqueue.h
@@ -26,7 +26,7 @@ TRACE_EVENT(workqueue_insertion,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
@@ -48,7 +48,7 @@ TRACE_EVENT(workqueue_execution,
 		__entry->func		= work->func;
 	),
 
-	TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+	TP_printk("thread=%s:%d func=%pf", __entry->thread_comm,
 		__entry->thread_pid, __entry->func)
 );
 
-- 
cgit v1.2.3


From ec4756238239f1a331d9fb95bad8b281dad56855 Mon Sep 17 00:00:00 2001
From: Steve Glendinning <steve.glendinning@smsc.com>
Date: Tue, 22 Sep 2009 04:00:27 +0000
Subject: smsc95xx: fix transmission where ZLP is expected

Usbnet framework assumes USB hardware doesn't handle zero length
packets, but SMSC LAN95xx requires these to be sent for correct
operation.

This patch fixes an easily reproducible tx lockup when sending a frame
that results in exactly 512 bytes in a USB transmission (e.g. a UDP
frame with 458 data bytes, due to IP headers and our USB headers).  It
adds an extra flag to usbnet for the hardware driver to indicate that
it can handle and requires the zero length packets.

This patch should not affect other usbnet users, please also consider
for -stable.

Signed-off-by: Steve Glendinning <steve.glendinning@smsc.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc95xx.c | 2 +-
 drivers/net/usb/usbnet.c   | 2 +-
 include/linux/usb/usbnet.h | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 3aafebdbe7b5..c6c922247d05 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -1227,7 +1227,7 @@ static const struct driver_info smsc95xx_info = {
 	.rx_fixup	= smsc95xx_rx_fixup,
 	.tx_fixup	= smsc95xx_tx_fixup,
 	.status		= smsc95xx_status,
-	.flags		= FLAG_ETHER,
+	.flags		= FLAG_ETHER | FLAG_SEND_ZLP,
 };
 
 static const struct usb_device_id products[] = {
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 24b36f795151..ca5ca5ae061d 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1049,7 +1049,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	 * NOTE:  strictly conforming cdc-ether devices should expect
 	 * the ZLP here, but ignore the one-byte packet.
 	 */
-	if ((length % dev->maxpacket) == 0) {
+	if (!(info->flags & FLAG_SEND_ZLP) && (length % dev->maxpacket) == 0) {
 		urb->transfer_buffer_length++;
 		if (skb_tailroom(skb)) {
 			skb->data[skb->len] = 0;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index bb69e256cd16..f81473052059 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -89,6 +89,7 @@ struct driver_info {
 #define FLAG_FRAMING_AX 0x0040		/* AX88772/178 packets */
 #define FLAG_WLAN	0x0080		/* use "wlan%d" names */
 #define FLAG_AVOID_UNLINK_URBS 0x0100	/* don't unlink urbs at usbnet_stop() */
+#define FLAG_SEND_ZLP	0x0200		/* hw requires ZLPs are sent */
 
 
 	/* init device ... can sleep, or cause probe() failure */
-- 
cgit v1.2.3


From 7c329288d72e025db4feac65f0fed95fb3e3ef1c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 23 Sep 2009 09:52:18 +1000
Subject: vgaarb: make client interface config invariant.

Fixes build when VGA_ARB is off.

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vgaarb.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/vgaarb.h b/include/linux/vgaarb.h
index e81c64af80c1..923f9040ea20 100644
--- a/include/linux/vgaarb.h
+++ b/include/linux/vgaarb.h
@@ -41,7 +41,7 @@
  *     interrupts at any time.
  */
 extern void vga_set_legacy_decoding(struct pci_dev *pdev,
-									unsigned int decodes);
+				    unsigned int decodes);
 
 /**
  *     vga_get         - acquire & locks VGA resources
@@ -193,8 +193,17 @@ static inline int vga_conflicts(struct pci_dev *p1, struct pci_dev *p2)
  * They driver will get a callback when VGA arbitration is first used
  * by userspace since we some older X servers have issues.
  */
+#if defined(CONFIG_VGA_ARB)
 int vga_client_register(struct pci_dev *pdev, void *cookie,
 			void (*irq_set_state)(void *cookie, bool state),
 			unsigned int (*set_vga_decode)(void *cookie, bool state));
+#else
+static inline int vga_client_register(struct pci_dev *pdev, void *cookie,
+				      void (*irq_set_state)(void *cookie, bool state),
+				      unsigned int (*set_vga_decode)(void *cookie, bool state))
+{
+	return 0;
+}
+#endif
 
 #endif /* LINUX_VGA_H */
-- 
cgit v1.2.3


From bb6baf76f45708dbba651ed76a7ad94462f30c0b Mon Sep 17 00:00:00 2001
From: Chris Wilson <chris@chris-wilson.co.uk>
Date: Tue, 22 Sep 2009 14:24:13 +0100
Subject: drm/i915: Track purged state.

In order to correctly prevent the invalid reuse of a purged buffer, we
need to track such events and warn the user before something bad
happens.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c | 24 +++++++++++++++---------
 include/drm/i915_drm.h          |  1 +
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8beec97fa348..f4f714e39b7b 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1470,6 +1470,7 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 	int i;
 
 	BUG_ON(obj_priv->pages_refcount == 0);
+	BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
 
 	if (--obj_priv->pages_refcount != 0)
 		return;
@@ -1534,11 +1535,14 @@ i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
 static void
 i915_gem_object_truncate(struct drm_gem_object *obj)
 {
-    struct inode *inode;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	struct inode *inode;
 
-    inode = obj->filp->f_path.dentry->d_inode;
-    if (inode->i_op->truncate)
-	    inode->i_op->truncate (inode);
+	inode = obj->filp->f_path.dentry->d_inode;
+	if (inode->i_op->truncate)
+		inode->i_op->truncate (inode);
+
+	obj_priv->madv = __I915_MADV_PURGED;
 }
 
 static inline int
@@ -2559,7 +2563,7 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
 	if (dev_priv->mm.suspended)
 		return -EBUSY;
 
-	if (obj_priv->madv == I915_MADV_DONTNEED) {
+	if (obj_priv->madv != I915_MADV_WILLNEED) {
 		DRM_ERROR("Attempting to bind a purgeable object\n");
 		return -EINVAL;
 	}
@@ -3928,8 +3932,8 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 	}
 	obj_priv = obj->driver_private;
 
-	if (obj_priv->madv == I915_MADV_DONTNEED) {
-		DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n");
+	if (obj_priv->madv != I915_MADV_WILLNEED) {
+		DRM_ERROR("Attempting to pin a purgeable buffer\n");
 		drm_gem_object_unreference(obj);
 		mutex_unlock(&dev->struct_mutex);
 		return -EINVAL;
@@ -4081,14 +4085,16 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	obj_priv->madv = args->madv;
-	args->retained = obj_priv->gtt_space != NULL;
+	if (obj_priv->madv != __I915_MADV_PURGED)
+		obj_priv->madv = args->madv;
 
 	/* if the object is no longer bound, discard its backing storage */
 	if (i915_gem_object_is_purgeable(obj_priv) &&
 	    obj_priv->gtt_space == NULL)
 		i915_gem_object_truncate(obj);
 
+	args->retained = obj_priv->madv != __I915_MADV_PURGED;
+
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 607c9da061e8..7e0cb1da92e6 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -671,6 +671,7 @@ struct drm_i915_get_pipe_from_crtc_id {
 
 #define I915_MADV_WILLNEED 0
 #define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
 
 struct drm_i915_gem_madvise {
 	/** Handle of the buffer to change the backing store advice */
-- 
cgit v1.2.3


From 268e46712d57a6493cc0f98e7d200a0f674c31ed Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 22 Sep 2009 19:56:50 -0700
Subject: asm-generic: syscall_get_nr returns int

Only 32 bits of system call number are meaningful, so make the
specification for syscall_get_nr() be to return int, not long.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 include/asm-generic/syscall.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index ea8087b55ffc..5c122ae6bfa6 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -1,7 +1,7 @@
 /*
  * Access to user system call parameters and results
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -32,9 +32,13 @@ struct pt_regs;
  * If @task is not executing a system call, i.e. it's blocked
  * inside the kernel for a fault or signal, returns -1.
  *
+ * Note this returns int even on 64-bit machines.  Only 32 bits of
+ * system call number can be meaningful.  If the actual arch value
+ * is 64 bits, this truncates to 32 bits so 0xffffffff means -1.
+ *
  * It's only valid to call this when @task is known to be blocked.
  */
-long syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
+int syscall_get_nr(struct task_struct *task, struct pt_regs *regs);
 
 /**
  * syscall_rollback - roll back registers after an aborted system call
-- 
cgit v1.2.3


From 3c1b27d5043086a485f8526353ae9fe37bfa1065 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 23 Sep 2009 22:26:31 -0600
Subject: virtio: make add_buf return capacity remaining

This API change means that virtio_net can tell how much capacity
remains for buffers.  It's necessarily fuzzy, since
VIRTIO_RING_F_INDIRECT_DESC means we can fit any number of descriptors
in one, *if* we can kmalloc.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Dinesh Subhraveti <dineshs@us.ibm.com>
---
 drivers/block/virtio_blk.c          |  2 +-
 drivers/char/hw_random/virtio-rng.c |  2 +-
 drivers/char/virtio_console.c       |  4 ++--
 drivers/net/virtio_net.c            | 14 +++++++-------
 drivers/virtio/virtio_balloon.c     |  2 +-
 drivers/virtio/virtio_ring.c        |  6 +++++-
 include/linux/virtio.h              |  2 +-
 net/9p/trans_virtio.c               |  2 +-
 8 files changed, 19 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index aa1a3d5a3e2b..d739ee4201f9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -139,7 +139,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		}
 	}
 
-	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr)) {
+	if (vblk->vq->vq_ops->add_buf(vblk->vq, vblk->sg, out, in, vbr) < 0) {
 		mempool_free(vbr, vblk->pool);
 		return false;
 	}
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 32216b623248..b6c24dcc987d 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -51,7 +51,7 @@ static void register_buffer(void)
 
 	sg_init_one(&sg, random_data+data_left, RANDOM_DATA_SIZE-data_left);
 	/* There should always be room for one buffer. */
-	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 0, 1, random_data) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 }
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index c74dacfa6795..a035ae39a359 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -65,7 +65,7 @@ static int put_chars(u32 vtermno, const char *buf, int count)
 
 	/* add_buf wants a token to identify this buffer: we hand it any
 	 * non-NULL pointer, since there's only ever one buffer. */
-	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) == 0) {
+	if (out_vq->vq_ops->add_buf(out_vq, sg, 1, 0, (void *)1) >= 0) {
 		/* Tell Host to go! */
 		out_vq->vq_ops->kick(out_vq);
 		/* Chill out until it's done with the buffer. */
@@ -85,7 +85,7 @@ static void add_inbuf(void)
 	sg_init_one(sg, inbuf, PAGE_SIZE);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) != 0)
+	if (in_vq->vq_ops->add_buf(in_vq, sg, 0, 1, inbuf) < 0)
 		BUG();
 	in_vq->vq_ops->kick(in_vq);
 }
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 32266fb89c20..fbf04a553f5a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -320,7 +320,7 @@ static bool try_fill_recv_maxbufs(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, num, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			trim_pages(vi, skb);
 			kfree_skb(skb);
@@ -373,7 +373,7 @@ static bool try_fill_recv(struct virtnet_info *vi, gfp_t gfp)
 		skb_queue_head(&vi->recv, skb);
 
 		err = vi->rvq->vq_ops->add_buf(vi->rvq, sg, 0, 1, skb);
-		if (err) {
+		if (err < 0) {
 			skb_unlink(skb, &vi->recv);
 			kfree_skb(skb);
 			break;
@@ -527,7 +527,7 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb)
 	num = skb_to_sgvec(skb, sg+1, 0, skb->len) + 1;
 
 	err = vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb);
-	if (!err && !vi->free_in_tasklet)
+	if (err >= 0 && !vi->free_in_tasklet)
 		mod_timer(&vi->xmit_free_timer, jiffies + (HZ/10));
 
 	return err;
@@ -538,7 +538,7 @@ static void xmit_tasklet(unsigned long data)
 	struct virtnet_info *vi = (void *)data;
 
 	netif_tx_lock_bh(vi->dev);
-	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) {
+	if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) >= 0) {
 		vi->svq->vq_ops->kick(vi->svq);
 		vi->last_xmit_skb = NULL;
 	}
@@ -557,7 +557,7 @@ again:
 
 	/* If we has a buffer left over from last time, send it now. */
 	if (unlikely(vi->last_xmit_skb) &&
-	    xmit_skb(vi, vi->last_xmit_skb) != 0)
+	    xmit_skb(vi, vi->last_xmit_skb) < 0)
 		goto stop_queue;
 
 	vi->last_xmit_skb = NULL;
@@ -565,7 +565,7 @@ again:
 	/* Put new one in send queue and do transmit */
 	if (likely(skb)) {
 		__skb_queue_head(&vi->send, skb);
-		if (xmit_skb(vi, skb) != 0) {
+		if (xmit_skb(vi, skb) < 0) {
 			vi->last_xmit_skb = skb;
 			skb = NULL;
 			goto stop_queue;
@@ -668,7 +668,7 @@ static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
 		sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
 	sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
 
-	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi));
+	BUG_ON(vi->cvq->vq_ops->add_buf(vi->cvq, sg, out, in, vi) < 0);
 
 	vi->cvq->vq_ops->kick(vi->cvq);
 
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 26b278264796..39789232646d 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -84,7 +84,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
 	init_completion(&vb->acked);
 
 	/* We should always be able to add one buffer to an empty queue. */
-	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) != 0)
+	if (vq->vq_ops->add_buf(vq, &sg, 1, 0, vb) < 0)
 		BUG();
 	vq->vq_ops->kick(vq);
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a882f2606515..f53600580726 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -208,7 +208,11 @@ add_head:
 
 	pr_debug("Added buffer head %i to %p\n", head, vq);
 	END_USE(vq);
-	return 0;
+
+	/* If we're indirect, we can fit many (assuming not OOM). */
+	if (vq->indirect)
+		return vq->num_free ? vq->vring.num : 0;
+	return vq->num_free;
 }
 
 static void vring_kick(struct virtqueue *_vq)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 4fca4f5440ba..057a2e010758 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -34,7 +34,7 @@ struct virtqueue {
  *	out_num: the number of sg readable by other side
  *	in_num: the number of sg which are writable (after readable ones)
  *	data: the token identifying the buffer.
- *      Returns 0 or an error.
+ *      Returns remaining capacity of queue (sg segments) or a negative error.
  * @kick: update after add_buf
  *	vq: the struct virtqueue
  *	After one or more add_buf calls, invoke this to kick the other side.
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 9bf0b737aa51..53c139d31a21 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -200,7 +200,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
 	req->status = REQ_STATUS_SENT;
 
-	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) {
+	if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc) < 0) {
 		P9_DPRINTK(P9_DEBUG_TRANS,
 			"9p debug: virtio rpc add_buf returned failure");
 		return -EIO;
-- 
cgit v1.2.3


From 3ca4f5ca73057a617f9444a91022d7127041970a Mon Sep 17 00:00:00 2001
From: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Date: Fri, 31 Jul 2009 15:25:56 +0900
Subject: virtio: add virtio IDs file

Virtio IDs are spread all over the tree which makes assigning new IDs
bothersome. Putting them together should make the process less error-prone.

Signed-off-by: Fernando Luis Vazquez Cao <fernando@oss.ntt.co.jp>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 Documentation/lguest/lguest.c       |  1 +
 drivers/block/virtio_blk.c          |  1 +
 drivers/char/hw_random/virtio-rng.c |  1 +
 drivers/char/virtio_console.c       |  1 +
 drivers/net/virtio_net.c            |  1 +
 drivers/virtio/virtio_balloon.c     |  1 +
 include/linux/virtio_9p.h           |  2 --
 include/linux/virtio_balloon.h      |  3 ---
 include/linux/virtio_blk.h          |  3 ---
 include/linux/virtio_console.h      |  3 ---
 include/linux/virtio_ids.h          | 17 +++++++++++++++++
 include/linux/virtio_net.h          |  3 ---
 include/linux/virtio_rng.h          |  3 ---
 net/9p/trans_virtio.c               |  1 +
 14 files changed, 24 insertions(+), 17 deletions(-)
 create mode 100644 include/linux/virtio_ids.h

(limited to 'include')

diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 950cde6d6e58..84bbb190bf7b 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -42,6 +42,7 @@
 #include <signal.h>
 #include "linux/lguest_launcher.h"
 #include "linux/virtio_config.h"
+#include <linux/virtio_ids.h>
 #include "linux/virtio_net.h"
 #include "linux/virtio_blk.h"
 #include "linux/virtio_console.h"
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index d739ee4201f9..73de7532fcf5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -3,6 +3,7 @@
 #include <linux/blkdev.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_blk.h>
 #include <linux/scatterlist.h>
 
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index b6c24dcc987d..962968f05b94 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -21,6 +21,7 @@
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_rng.h>
 
 /* The host will fill any buffer we give it with sweet, sweet randomness.  We
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index a035ae39a359..0d328b59568d 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -31,6 +31,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_console.h>
 #include "hvc_console.h"
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fbf04a553f5a..5c498d2b043f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -22,6 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/module.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_net.h>
 #include <linux/scatterlist.h>
 #include <linux/if_vlan.h>
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 39789232646d..200c22f55130 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -19,6 +19,7 @@
  */
 //#define DEBUG
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_balloon.h>
 #include <linux/swap.h>
 #include <linux/kthread.h>
diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h
index b3c4a60ceeb3..ea7226a45acb 100644
--- a/include/linux/virtio_9p.h
+++ b/include/linux/virtio_9p.h
@@ -4,8 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio console */
-#define VIRTIO_ID_9P	9
 /* Maximum number of virtio channels per partition (1 for now) */
 #define MAX_9P_CHAN	1
 
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 8726ff77763e..09d730085060 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -4,9 +4,6 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_balloon */
-#define VIRTIO_ID_BALLOON	5
-
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST	0 /* Tell before reclaiming pages */
 
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 8dab9f2b8832..25fbabfa90d4 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -5,9 +5,6 @@
 #include <linux/types.h>
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_block */
-#define VIRTIO_ID_BLOCK	2
-
 /* Feature bits */
 #define VIRTIO_BLK_F_BARRIER	0	/* Does host support barriers? */
 #define VIRTIO_BLK_F_SIZE_MAX	1	/* Indicates maximum segment size */
diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h
index dc161115ae35..b5f519806014 100644
--- a/include/linux/virtio_console.h
+++ b/include/linux/virtio_console.h
@@ -5,9 +5,6 @@
 /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so
  * anyone can use the definitions to implement compatible drivers/servers. */
 
-/* The ID for virtio console */
-#define VIRTIO_ID_CONSOLE	3
-
 /* Feature bits */
 #define VIRTIO_CONSOLE_F_SIZE	0	/* Does host provide console size? */
 
diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h
new file mode 100644
index 000000000000..06660c0a78d7
--- /dev/null
+++ b/include/linux/virtio_ids.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_VIRTIO_IDS_H
+#define _LINUX_VIRTIO_IDS_H
+/*
+ * Virtio IDs
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ */
+
+#define VIRTIO_ID_NET		1 /* virtio net */
+#define VIRTIO_ID_BLOCK		2 /* virtio block */
+#define VIRTIO_ID_CONSOLE	3 /* virtio console */
+#define VIRTIO_ID_RNG		4 /* virtio ring */
+#define VIRTIO_ID_BALLOON	5 /* virtio balloon */
+#define VIRTIO_ID_9P		9 /* 9p virtio console */
+
+#endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index d8dd539c9f48..1f41734bbb77 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -6,9 +6,6 @@
 #include <linux/virtio_config.h>
 #include <linux/if_ether.h>
 
-/* The ID for virtio_net */
-#define VIRTIO_ID_NET	1
-
 /* The feature bitmap for virtio net */
 #define VIRTIO_NET_F_CSUM	0	/* Host handles pkts w/ partial csum */
 #define VIRTIO_NET_F_GUEST_CSUM	1	/* Guest handles pkts w/ partial csum */
diff --git a/include/linux/virtio_rng.h b/include/linux/virtio_rng.h
index 1a85dab8a940..48121c3c434b 100644
--- a/include/linux/virtio_rng.h
+++ b/include/linux/virtio_rng.h
@@ -4,7 +4,4 @@
  * compatible drivers/servers. */
 #include <linux/virtio_config.h>
 
-/* The ID for virtio_rng */
-#define VIRTIO_ID_RNG	4
-
 #endif /* _LINUX_VIRTIO_RNG_H */
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index ea1e3daabefe..b2e07f0dd298 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -43,6 +43,7 @@
 #include <net/9p/transport.h>
 #include <linux/scatterlist.h>
 #include <linux/virtio.h>
+#include <linux/virtio_ids.h>
 #include <linux/virtio_9p.h>
 
 #define VIRTQUEUE_NUM	128
-- 
cgit v1.2.3


From f1b0ef062602713c2c7cfa12362d5d90ed01c5f6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 17 Sep 2009 19:57:42 +0200
Subject: virtio_blk: add support for cache flush

Recent qemu has added a VIRTIO_BLK_F_FLUSH flag to advertise that the
virtual disk has a volatile write cache that needs to be flushed.  In case
we see this feature implement tell the Linux block layer about the fact
and use the new VIRTIO_BLK_T_FLUSH to flush the cache when required.  This
allows for an correct and simple implementation of write barriers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/block/virtio_blk.c | 30 +++++++++++++++++++++++++-----
 include/linux/virtio_blk.h | 15 +++++++++++++++
 2 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 73de7532fcf5..3d5fe97569c7 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -92,15 +92,26 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
 		return false;
 
 	vbr->req = req;
-	if (blk_fs_request(vbr->req)) {
+	switch (req->cmd_type) {
+	case REQ_TYPE_FS:
 		vbr->out_hdr.type = 0;
 		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else if (blk_pc_request(vbr->req)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-	} else {
+		break;
+	case REQ_TYPE_LINUX_BLOCK:
+		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
+			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		}
+		/*FALLTHRU*/
+	default:
 		/* We don't put anything else in the queue. */
 		BUG();
 	}
@@ -200,6 +211,12 @@ out:
 	return err;
 }
 
+static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
+{
+	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
+	req->cmd[0] = REQ_LB_OP_FLUSH;
+}
+
 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
@@ -338,7 +355,10 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
 	index++;
 
 	/* If barriers are supported, tell block layer that queue is ordered */
-	if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
+	if (virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH))
+		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_DRAIN_FLUSH,
+				  virtblk_prepare_flush);
+	else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER))
 		blk_queue_ordered(vblk->disk->queue, QUEUE_ORDERED_TAG, NULL);
 
 	/* If disk is read-only in the host, the guest should obey */
@@ -425,7 +445,7 @@ static struct virtio_device_id id_table[] = {
 static unsigned int features[] = {
 	VIRTIO_BLK_F_BARRIER, VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
 	VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
-	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY
+	VIRTIO_BLK_F_SCSI, VIRTIO_BLK_F_IDENTIFY, VIRTIO_BLK_F_FLUSH
 };
 
 /*
diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h
index 25fbabfa90d4..15cb666581d7 100644
--- a/include/linux/virtio_blk.h
+++ b/include/linux/virtio_blk.h
@@ -14,6 +14,7 @@
 #define VIRTIO_BLK_F_BLK_SIZE	6	/* Block size of disk is available*/
 #define VIRTIO_BLK_F_SCSI	7	/* Supports scsi command passthru */
 #define VIRTIO_BLK_F_IDENTIFY	8	/* ATA IDENTIFY supported */
+#define VIRTIO_BLK_F_FLUSH	9	/* Cache flush command support */
 
 #define VIRTIO_BLK_ID_BYTES	(sizeof(__u16[256]))	/* IDENTIFY DATA */
 
@@ -35,6 +36,17 @@ struct virtio_blk_config {
 	__u8 identify[VIRTIO_BLK_ID_BYTES];
 } __attribute__((packed));
 
+/*
+ * Command types
+ *
+ * Usage is a bit tricky as some bits are used as flags and some are not.
+ *
+ * Rules:
+ *   VIRTIO_BLK_T_OUT may be combined with VIRTIO_BLK_T_SCSI_CMD or
+ *   VIRTIO_BLK_T_BARRIER.  VIRTIO_BLK_T_FLUSH is a command of its own
+ *   and may not be combined with any of the other flags.
+ */
+
 /* These two define direction. */
 #define VIRTIO_BLK_T_IN		0
 #define VIRTIO_BLK_T_OUT	1
@@ -42,6 +54,9 @@ struct virtio_blk_config {
 /* This bit says it's a scsi command, not an actual read or write. */
 #define VIRTIO_BLK_T_SCSI_CMD	2
 
+/* Cache flush command */
+#define VIRTIO_BLK_T_FLUSH	4
+
 /* Barrier before this op. */
 #define VIRTIO_BLK_T_BARRIER	0x80000000
 
-- 
cgit v1.2.3


From 4e9e92003529e5c7bb11281f7c2c9b3fe8858403 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Thu, 11 Jun 2009 08:53:20 -0600
Subject: USB: usbmon: end ugly tricks with DMA peeking

This patch fixes crashes when usbmon attempts to access GART aperture.
The old code attempted to take a bus address and convert it into a
virtual address, which clearly was impossible on systems with actual
IOMMUs. Let us not persist in this foolishness, and use transfer_buffer
in all cases instead.

I think downsides are negligible. The ones I see are:
 - A driver may pass an address of one buffer down as transfer_buffer,
   and entirely different entity mapped for DMA, resulting in misleading
   output of usbmon. Note, however, that PIO based controllers would
   do transfer the same data that usbmon sees here.
 - Out of tree drivers may crash usbmon if they store garbage in
   transfer_buffer. I inspected the in-tree drivers, and clarified
   the documentation in comments.
 - Drivers that use get_user_pages will not be possible to monitor.
   I only found one driver with this problem (drivers/staging/rspiusb).
 - Same happens with with usb_storage transferring from highmem, but
   it works fine on 64-bit systems, so I think it's not a concern.
   At least we don't crash anymore.

Why didn't we do this in 2.6.10? That's because back in those days
it was popular not to fill in transfer_buffer, so almost all
traffic would be invisible (e.g. all of HID was like that).
But now, the tree is almost 100% PIO friendly, so we can do the
right thing at last.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/mon/Makefile   |  2 +-
 drivers/usb/mon/mon_bin.c  | 12 +-----
 drivers/usb/mon/mon_dma.c  | 95 ----------------------------------------------
 drivers/usb/mon/mon_main.c |  1 -
 drivers/usb/mon/mon_text.c | 14 -------
 drivers/usb/mon/usb_mon.h  | 14 -------
 include/linux/usb.h        | 19 +++++++---
 7 files changed, 15 insertions(+), 142 deletions(-)
 delete mode 100644 drivers/usb/mon/mon_dma.c

(limited to 'include')

diff --git a/drivers/usb/mon/Makefile b/drivers/usb/mon/Makefile
index c6516b566731..384b198faa7c 100644
--- a/drivers/usb/mon/Makefile
+++ b/drivers/usb/mon/Makefile
@@ -2,6 +2,6 @@
 # Makefile for USB monitor
 #
 
-usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o mon_dma.o
+usbmon-objs	:= mon_main.o mon_stat.o mon_text.o mon_bin.o
 
 obj-$(CONFIG_USB_MON)	+= usbmon.o
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 0f7a30b7d2d1..dfdc43e2e00d 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -220,9 +220,8 @@ static void mon_free_buff(struct mon_pgmap *map, int npages);
 
 /*
  * This is a "chunked memcpy". It does not manipulate any counters.
- * But it returns the new offset for repeated application.
  */
-unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
+static void mon_copy_to_buff(const struct mon_reader_bin *this,
     unsigned int off, const unsigned char *from, unsigned int length)
 {
 	unsigned int step_len;
@@ -247,7 +246,6 @@ unsigned int mon_copy_to_buff(const struct mon_reader_bin *this,
 		from += step_len;
 		length -= step_len;
 	}
-	return off;
 }
 
 /*
@@ -400,15 +398,8 @@ static char mon_bin_get_data(const struct mon_reader_bin *rp,
     unsigned int offset, struct urb *urb, unsigned int length)
 {
 
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		mon_dmapeek_vec(rp, offset, urb->transfer_dma, length);
-		return 0;
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';
-
 	mon_copy_to_buff(rp, offset, urb->transfer_buffer, length);
 	return 0;
 }
@@ -635,7 +626,6 @@ static int mon_bin_open(struct inode *inode, struct file *file)
 	spin_lock_init(&rp->b_lock);
 	init_waitqueue_head(&rp->b_wait);
 	mutex_init(&rp->fetch_lock);
-
 	rp->b_size = BUFF_DFL;
 
 	size = sizeof(struct mon_pgmap) * (rp->b_size/CHUNK_SIZE);
diff --git a/drivers/usb/mon/mon_dma.c b/drivers/usb/mon/mon_dma.c
deleted file mode 100644
index 140cc80bd2b1..000000000000
--- a/drivers/usb/mon/mon_dma.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * The USB Monitor, inspired by Dave Harding's USBMon.
- *
- * mon_dma.c: Library which snoops on DMA areas.
- *
- * Copyright (C) 2005 Pete Zaitcev (zaitcev@redhat.com)
- */
-#include <linux/kernel.h>
-#include <linux/list.h>
-#include <linux/highmem.h>
-#include <asm/page.h>
-
-#include <linux/usb.h>	/* Only needed for declarations in usb_mon.h */
-#include "usb_mon.h"
-
-/*
- * PC-compatibles, are, fortunately, sufficiently cache-coherent for this.
- */
-#if defined(__i386__) || defined(__x86_64__) /* CONFIG_ARCH_I386 doesn't exit */
-#define MON_HAS_UNMAP 1
-
-#define phys_to_page(phys)	pfn_to_page((phys) >> PAGE_SHIFT)
-
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	struct page *pg;
-	unsigned long flags;
-	unsigned char *map;
-	unsigned char *ptr;
-
-	/*
-	 * On i386, a DMA handle is the "physical" address of a page.
-	 * In other words, the bus address is equal to physical address.
-	 * There is no IOMMU.
-	 */
-	pg = phys_to_page(dma_addr);
-
-	/*
-	 * We are called from hardware IRQs in case of callbacks.
-	 * But we can be called from softirq or process context in case
-	 * of submissions. In such case, we need to protect KM_IRQ0.
-	 */
-	local_irq_save(flags);
-	map = kmap_atomic(pg, KM_IRQ0);
-	ptr = map + (dma_addr & (PAGE_SIZE-1));
-	memcpy(dst, ptr, len);
-	kunmap_atomic(map, KM_IRQ0);
-	local_irq_restore(flags);
-	return 0;
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	unsigned long flags;
-	unsigned int step_len;
-	struct page *pg;
-	unsigned char *map;
-	unsigned long page_off, page_len;
-
-	local_irq_save(flags);
-	while (length) {
-		/* compute number of bytes we are going to copy in this page */
-		step_len = length;
-		page_off = dma_addr & (PAGE_SIZE-1);
-		page_len = PAGE_SIZE - page_off;
-		if (page_len < step_len)
-			step_len = page_len;
-
-		/* copy data and advance pointers */
-		pg = phys_to_page(dma_addr);
-		map = kmap_atomic(pg, KM_IRQ0);
-		offset = mon_copy_to_buff(rp, offset, map + page_off, step_len);
-		kunmap_atomic(map, KM_IRQ0);
-		dma_addr += step_len;
-		length -= step_len;
-	}
-	local_irq_restore(flags);
-}
-
-#endif /* __i386__ */
-
-#ifndef MON_HAS_UNMAP
-char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len)
-{
-	return 'D';
-}
-
-void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int length)
-{
-	;
-}
-
-#endif /* MON_HAS_UNMAP */
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index 5e0ab4201c00..e0c2db3b767b 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -361,7 +361,6 @@ static int __init mon_init(void)
 	}
 	// MOD_INC_USE_COUNT(which_module?);
 
-
 	mutex_lock(&usb_bus_list_lock);
 	list_for_each_entry (ubus, &usb_bus_list, bus_list) {
 		mon_bus_init(ubus);
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index a7eb4c99342c..9f1a9227ebe6 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -150,20 +150,6 @@ static inline char mon_text_get_data(struct mon_event_text *ep, struct urb *urb,
 			return '>';
 	}
 
-	/*
-	 * The check to see if it's safe to poke at data has an enormous
-	 * number of corner cases, but it seems that the following is
-	 * more or less safe.
-	 *
-	 * We do not even try to look at transfer_buffer, because it can
-	 * contain non-NULL garbage in case the upper level promised to
-	 * set DMA for the HCD.
-	 */
-	if (urb->dev->bus->uses_dma &&
-	    (urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		return mon_dmapeek(ep->data, urb->transfer_dma, len);
-	}
-
 	if (urb->transfer_buffer == NULL)
 		return 'Z';	/* '0' would be not as pretty. */
 
diff --git a/drivers/usb/mon/usb_mon.h b/drivers/usb/mon/usb_mon.h
index f5d84ff8c101..df9a4df342c7 100644
--- a/drivers/usb/mon/usb_mon.h
+++ b/drivers/usb/mon/usb_mon.h
@@ -64,20 +64,6 @@ void mon_text_exit(void);
 int __init mon_bin_init(void);
 void mon_bin_exit(void);
 
-/*
- * DMA interface.
- *
- * XXX The vectored side needs a serious re-thinking. Abstracting vectors,
- * like in Paolo's original patch, produces a double pkmap. We need an idea.
-*/
-extern char mon_dmapeek(unsigned char *dst, dma_addr_t dma_addr, int len);
-
-struct mon_reader_bin;
-extern void mon_dmapeek_vec(const struct mon_reader_bin *rp,
-    unsigned int offset, dma_addr_t dma_addr, unsigned int len);
-extern unsigned int mon_copy_to_buff(const struct mon_reader_bin *rp,
-    unsigned int offset, const unsigned char *from, unsigned int len);
-
 /*
  */
 extern struct mutex mon_lock;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 19fabc487beb..3b45a0d27b80 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1036,9 +1036,10 @@ typedef void (*usb_complete_t)(struct urb *);
  * @transfer_flags: A variety of flags may be used to affect how URB
  *	submission, unlinking, or operation are handled.  Different
  *	kinds of URB can use different flags.
- * @transfer_buffer:  This identifies the buffer to (or from) which
- * 	the I/O request will be performed (unless URB_NO_TRANSFER_DMA_MAP
- *	is set).  This buffer must be suitable for DMA; allocate it with
+ * @transfer_buffer:  This identifies the buffer to (or from) which the I/O
+ *	request will be performed unless URB_NO_TRANSFER_DMA_MAP is set
+ *	(however, do not leave garbage in transfer_buffer even then).
+ *	This buffer must be suitable for DMA; allocate it with
  *	kmalloc() or equivalent.  For transfers to "in" endpoints, contents
  *	of this buffer will be modified.  This buffer is used for the data
  *	stage of control transfers.
@@ -1104,9 +1105,15 @@ typedef void (*usb_complete_t)(struct urb *);
  * allocate a DMA buffer with usb_buffer_alloc() or call usb_buffer_map().
  * When these transfer flags are provided, host controller drivers will
  * attempt to use the dma addresses found in the transfer_dma and/or
- * setup_dma fields rather than determining a dma address themselves.  (Note
- * that transfer_buffer and setup_packet must still be set because not all
- * host controllers use DMA, nor do virtual root hubs).
+ * setup_dma fields rather than determining a dma address themselves.
+ *
+ * Note that transfer_buffer must still be set if the controller
+ * does not support DMA (as indicated by bus.uses_dma) and when talking
+ * to root hub. If you have to trasfer between highmem zone and the device
+ * on such controller, create a bounce buffer or bail out with an error.
+ * If transfer_buffer cannot be set (is in highmem) and the controller is DMA
+ * capable, assign NULL to it, so that usbmon knows not to use the value.
+ * The setup_packet must always be set, so it cannot be located in highmem.
  *
  * Initialization:
  *
-- 
cgit v1.2.3


From 85e08ca54c5c203cd2638f0fc8fa899a539f6254 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:19:23 +0200
Subject: USB: Move endpoint sync type definitions from usb/audio.h to
 usb/ch9.h

And use the new definitions in the USB Audio Class gadget driver.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_audio.c |  2 +-
 include/linux/usb/audio.h    | 10 ----------
 include/linux/usb/ch9.h      |  6 ++++++
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 66527ba2d2ea..76afbd1b515f 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -174,7 +174,7 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 	.bLength =		USB_DT_ENDPOINT_AUDIO_SIZE,
 	.bDescriptorType =	USB_DT_ENDPOINT,
 	.bEndpointAddress =	USB_DIR_OUT,
-	.bmAttributes =		USB_AS_ENDPOINT_ADAPTIVE
+	.bmAttributes =		USB_ENDPOINT_SYNC_ADAPTIVE
 				| USB_ENDPOINT_XFER_ISOC,
 	.wMaxPacketSize =	__constant_cpu_to_le16(OUT_EP_MAX_PACKET_SIZE),
 	.bInterval =		4,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index b5744bc218ab..bcf4fb4f5e82 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -46,12 +46,6 @@
 #define MIDI_IN_JACK			0x02
 #define MIDI_OUT_JACK			0x03
 
-/* endpoint attributes */
-#define EP_ATTR_MASK			0x0c
-#define EP_ATTR_ASYNC			0x04
-#define EP_ATTR_ADAPTIVE		0x08
-#define EP_ATTR_SYNC			0x0c
-
 /* cs endpoint attributes */
 #define EP_CS_ATTR_SAMPLE_RATE		0x01
 #define EP_CS_ATTR_PITCH_CONTROL	0x02
@@ -244,10 +238,6 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 #define USB_AS_FORMAT_TYPE_II		0x2
 #define USB_AS_FORMAT_TYPE_III		0x3
 
-#define USB_AS_ENDPOINT_ASYNC		(1 << 2)
-#define USB_AS_ENDPOINT_ADAPTIVE	(2 << 2)
-#define USB_AS_ENDPOINT_SYNC		(3 << 2)
-
 struct usb_as_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 93223638f702..8f8b7411b87b 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -348,6 +348,12 @@ struct usb_endpoint_descriptor {
 #define USB_ENDPOINT_NUMBER_MASK	0x0f	/* in bEndpointAddress */
 #define USB_ENDPOINT_DIR_MASK		0x80
 
+#define USB_ENDPOINT_SYNCTYPE		0x0c
+#define USB_ENDPOINT_SYNC_NONE		(0 << 2)
+#define USB_ENDPOINT_SYNC_ASYNC		(1 << 2)
+#define USB_ENDPOINT_SYNC_ADAPTIVE	(2 << 2)
+#define USB_ENDPOINT_SYNC_SYNC		(3 << 2)
+
 #define USB_ENDPOINT_XFERTYPE_MASK	0x03	/* in bmAttributes */
 #define USB_ENDPOINT_XFER_CONTROL	0
 #define USB_ENDPOINT_XFER_ISOC		1
-- 
cgit v1.2.3


From 315ad3028c8aae14891797040f855fc3291a076b Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:20:39 +0200
Subject: USB: Move vendor subclass definition from usb/audio.h to usb/ch9.h

USB_SUBCLASS_VENDOR_SPEC is common to several USB classes and as such belongs
to usb/ch9.h.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h | 1 -
 include/linux/usb/ch9.h   | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index bcf4fb4f5e82..f75092aba02f 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -24,7 +24,6 @@
 #define USB_SUBCLASS_AUDIOCONTROL	0x01
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
-#define USB_SUBCLASS_VENDOR_SPEC	0xff
 
 /* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
 #define HEADER				0x01
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 8f8b7411b87b..94012e649d86 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -258,6 +258,8 @@ struct usb_device_descriptor {
 #define USB_CLASS_APP_SPEC		0xfe
 #define USB_CLASS_VENDOR_SPEC		0xff
 
+#define USB_SUBCLASS_VENDOR_SPEC	0xff
+
 /*-------------------------------------------------------------------------*/
 
 /* USB_DT_CONFIG: Configuration descriptor information.
-- 
cgit v1.2.3


From 512ad27d8667158747de2e8da8a23e8f50e91856 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:23:05 +0200
Subject: USB audio gadget: Prefix all macro definitions with UAC_ in
 linux/usb/audio.h

linux/usb/audio.h is a public header file that includes definitions
exported to userspace. To avoid namespace clashes, prefix all macro
definitions with UAC_. Existing macros and structures prefixed with
USB_AC_ and USB_AS_ are renamed for consistency.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/audio.c   |  24 ++--
 drivers/usb/gadget/f_audio.c |  80 +++++++-------
 drivers/usb/gadget/gmidi.c   |   8 +-
 include/linux/usb/audio.h    | 258 ++++++++++++++++++++++---------------------
 4 files changed, 188 insertions(+), 182 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/audio.c b/drivers/usb/gadget/audio.c
index 9f80f4e970bd..a3a0f4a27ef0 100644
--- a/drivers/usb/gadget/audio.c
+++ b/drivers/usb/gadget/audio.c
@@ -106,20 +106,20 @@ static int audio_set_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case SET_CUR:
+	case UAC_SET_CUR:
 		value = 0;
 		break;
 
-	case SET_MIN:
+	case UAC_SET_MIN:
 		break;
 
-	case SET_MAX:
+	case UAC_SET_MAX:
 		break;
 
-	case SET_RES:
+	case UAC_SET_RES:
 		break;
 
-	case SET_MEM:
+	case UAC_SET_MEM:
 		break;
 
 	default:
@@ -142,13 +142,13 @@ static int audio_get_endpoint_req(struct usb_configuration *c,
 			ctrl->bRequest, w_value, len, ep);
 
 	switch (ctrl->bRequest) {
-	case GET_CUR:
-	case GET_MIN:
-	case GET_MAX:
-	case GET_RES:
+	case UAC_GET_CUR:
+	case UAC_GET_MIN:
+	case UAC_GET_MAX:
+	case UAC_GET_RES:
 		value = 3;
 		break;
-	case GET_MEM:
+	case UAC_GET_MEM:
 		break;
 	default:
 		break;
@@ -171,11 +171,11 @@ audio_setup(struct usb_configuration *c, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_ENDPOINT:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_set_endpoint_req(c, ctrl);
 		break;
 
-	case USB_AUDIO_GET_ENDPOINT:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_ENDPOINT:
 		value = audio_get_endpoint_req(c, ctrl);
 		break;
 
diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 76afbd1b515f..7b05b3c8c0b1 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -50,16 +50,16 @@ static struct usb_interface_descriptor ac_interface_desc __initdata = {
 	.bInterfaceSubClass =	USB_SUBCLASS_AUDIOCONTROL,
 };
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(2);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(2);
 
-#define USB_DT_AC_HEADER_LENGH	USB_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
+#define UAC_DT_AC_HEADER_LENGTH	UAC_DT_AC_HEADER_SIZE(F_AUDIO_NUM_INTERFACES)
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static struct usb_ac_header_descriptor_2 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_LENGH,
+static struct uac_ac_header_descriptor_2 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_LENGTH,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	HEADER,
+	.bDescriptorSubtype =	UAC_HEADER,
 	.bcdADC =		__constant_cpu_to_le16(0x0100),
-	.wTotalLength =		__constant_cpu_to_le16(USB_DT_AC_HEADER_LENGH),
+	.wTotalLength =		__constant_cpu_to_le16(UAC_DT_AC_HEADER_LENGTH),
 	.bInCollection =	F_AUDIO_NUM_INTERFACES,
 	.baInterfaceNr = {
 		[0] =		F_AUDIO_AC_INTERFACE,
@@ -68,33 +68,33 @@ static struct usb_ac_header_descriptor_2 ac_header_desc = {
 };
 
 #define INPUT_TERMINAL_ID	1
-static struct usb_input_terminal_descriptor input_terminal_desc = {
-	.bLength =		USB_DT_AC_INPUT_TERMINAL_SIZE,
+static struct uac_input_terminal_descriptor input_terminal_desc = {
+	.bLength =		UAC_DT_INPUT_TERMINAL_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	INPUT_TERMINAL,
+	.bDescriptorSubtype =	UAC_INPUT_TERMINAL,
 	.bTerminalID =		INPUT_TERMINAL_ID,
-	.wTerminalType =	USB_AC_TERMINAL_STREAMING,
+	.wTerminalType =	UAC_TERMINAL_STREAMING,
 	.bAssocTerminal =	0,
 	.wChannelConfig =	0x3,
 };
 
-DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(0);
+DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(0);
 
 #define FEATURE_UNIT_ID		2
-static struct usb_ac_feature_unit_descriptor_0 feature_unit_desc = {
-	.bLength		= USB_DT_AC_FEATURE_UNIT_SIZE(0),
+static struct uac_feature_unit_descriptor_0 feature_unit_desc = {
+	.bLength		= UAC_DT_FEATURE_UNIT_SIZE(0),
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= FEATURE_UNIT,
+	.bDescriptorSubtype	= UAC_FEATURE_UNIT,
 	.bUnitID		= FEATURE_UNIT_ID,
 	.bSourceID		= INPUT_TERMINAL_ID,
 	.bControlSize		= 2,
-	.bmaControls[0]		= (FU_MUTE | FU_VOLUME),
+	.bmaControls[0]		= (UAC_FU_MUTE | UAC_FU_VOLUME),
 };
 
 static struct usb_audio_control mute_control = {
 	.list = LIST_HEAD_INIT(mute_control.list),
 	.name = "Mute Control",
-	.type = MUTE_CONTROL,
+	.type = UAC_MUTE_CONTROL,
 	/* Todo: add real Mute control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -103,7 +103,7 @@ static struct usb_audio_control mute_control = {
 static struct usb_audio_control volume_control = {
 	.list = LIST_HEAD_INIT(volume_control.list),
 	.name = "Volume Control",
-	.type = VOLUME_CONTROL,
+	.type = UAC_VOLUME_CONTROL,
 	/* Todo: add real Volume control code */
 	.set = generic_set_cmd,
 	.get = generic_get_cmd,
@@ -113,17 +113,17 @@ static struct usb_audio_control_selector feature_unit = {
 	.list = LIST_HEAD_INIT(feature_unit.list),
 	.id = FEATURE_UNIT_ID,
 	.name = "Mute & Volume Control",
-	.type = FEATURE_UNIT,
+	.type = UAC_FEATURE_UNIT,
 	.desc = (struct usb_descriptor_header *)&feature_unit_desc,
 };
 
 #define OUTPUT_TERMINAL_ID	3
-static struct usb_output_terminal_descriptor output_terminal_desc = {
-	.bLength		= USB_DT_AC_OUTPUT_TERMINAL_SIZE,
+static struct uac_output_terminal_descriptor output_terminal_desc = {
+	.bLength		= UAC_DT_OUTPUT_TERMINAL_SIZE,
 	.bDescriptorType	= USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype	= OUTPUT_TERMINAL,
+	.bDescriptorSubtype	= UAC_OUTPUT_TERMINAL,
 	.bTerminalID		= OUTPUT_TERMINAL_ID,
-	.wTerminalType		= USB_AC_OUTPUT_TERMINAL_SPEAKER,
+	.wTerminalType		= UAC_OUTPUT_TERMINAL_SPEAKER,
 	.bAssocTerminal		= FEATURE_UNIT_ID,
 	.bSourceID		= FEATURE_UNIT_ID,
 };
@@ -148,22 +148,22 @@ static struct usb_interface_descriptor as_interface_alt_1_desc = {
 };
 
 /* B.4.2  Class-Specific AS Interface Descriptor */
-static struct usb_as_header_descriptor as_header_desc = {
-	.bLength =		USB_DT_AS_HEADER_SIZE,
+static struct uac_as_header_descriptor as_header_desc = {
+	.bLength =		UAC_DT_AS_HEADER_SIZE,
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	AS_GENERAL,
+	.bDescriptorSubtype =	UAC_AS_GENERAL,
 	.bTerminalLink =	INPUT_TERMINAL_ID,
 	.bDelay =		1,
-	.wFormatTag =		USB_AS_AUDIO_FORMAT_TYPE_I_PCM,
+	.wFormatTag =		UAC_FORMAT_TYPE_I_PCM,
 };
 
-DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(1);
+DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(1);
 
-static struct usb_as_formate_type_i_discrete_descriptor_1 as_type_i_desc = {
-	.bLength =		USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
+static struct uac_format_type_i_discrete_descriptor_1 as_type_i_desc = {
+	.bLength =		UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
-	.bDescriptorSubtype =	FORMAT_TYPE,
-	.bFormatType =		USB_AS_FORMAT_TYPE_I,
+	.bDescriptorSubtype =	UAC_FORMAT_TYPE,
+	.bFormatType =		UAC_FORMAT_TYPE_I,
 	.bSubframeSize =	2,
 	.bBitResolution =	16,
 	.bSamFreqType =		1,
@@ -181,10 +181,10 @@ static struct usb_endpoint_descriptor as_out_ep_desc __initdata = {
 };
 
 /* Class-specific AS ISO OUT Endpoint Descriptor */
-static struct usb_as_iso_endpoint_descriptor as_iso_out_desc __initdata = {
-	.bLength =		USB_AS_ISO_ENDPOINT_DESC_SIZE,
+static struct uac_iso_endpoint_descriptor as_iso_out_desc __initdata = {
+	.bLength =		UAC_ISO_ENDPOINT_DESC_SIZE,
 	.bDescriptorType =	USB_DT_CS_ENDPOINT,
-	.bDescriptorSubtype =	EP_GENERAL,
+	.bDescriptorSubtype =	UAC_EP_GENERAL,
 	.bmAttributes = 	1,
 	.bLockDelayUnits =	1,
 	.wLockDelay =		__constant_cpu_to_le16(1),
@@ -456,11 +456,11 @@ f_audio_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	 * Audio class messages; interface activation uses set_alt().
 	 */
 	switch (ctrl->bRequestType) {
-	case USB_AUDIO_SET_INTF:
+	case USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_set_intf_req(f, ctrl);
 		break;
 
-	case USB_AUDIO_GET_INTF:
+	case USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE:
 		value = audio_get_intf_req(f, ctrl);
 		break;
 
@@ -642,10 +642,10 @@ int __init control_selector_init(struct f_audio *audio)
 	list_add(&mute_control.list, &feature_unit.control);
 	list_add(&volume_control.list, &feature_unit.control);
 
-	volume_control.data[_CUR] = 0xffc0;
-	volume_control.data[_MIN] = 0xe3a0;
-	volume_control.data[_MAX] = 0xfff0;
-	volume_control.data[_RES] = 0x0030;
+	volume_control.data[UAC__CUR] = 0xffc0;
+	volume_control.data[UAC__MIN] = 0xe3a0;
+	volume_control.data[UAC__MAX] = 0xfff0;
+	volume_control.data[UAC__RES] = 0x0030;
 
 	return 0;
 }
diff --git a/drivers/usb/gadget/gmidi.c b/drivers/usb/gadget/gmidi.c
index b9312dc6e041..d0b1e836f0e0 100644
--- a/drivers/usb/gadget/gmidi.c
+++ b/drivers/usb/gadget/gmidi.c
@@ -191,7 +191,7 @@ module_param(qlen, uint, S_IRUGO);
 #define GMIDI_MS_INTERFACE	1
 #define GMIDI_NUM_INTERFACES	2
 
-DECLARE_USB_AC_HEADER_DESCRIPTOR(1);
+DECLARE_UAC_AC_HEADER_DESCRIPTOR(1);
 DECLARE_USB_MIDI_OUT_JACK_DESCRIPTOR(1);
 DECLARE_USB_MS_ENDPOINT_DESCRIPTOR(1);
 
@@ -237,12 +237,12 @@ static const struct usb_interface_descriptor ac_interface_desc = {
 };
 
 /* B.3.2  Class-Specific AC Interface Descriptor */
-static const struct usb_ac_header_descriptor_1 ac_header_desc = {
-	.bLength =		USB_DT_AC_HEADER_SIZE(1),
+static const struct uac_ac_header_descriptor_1 ac_header_desc = {
+	.bLength =		UAC_DT_AC_HEADER_SIZE(1),
 	.bDescriptorType =	USB_DT_CS_INTERFACE,
 	.bDescriptorSubtype =	USB_MS_HEADER,
 	.bcdADC =		cpu_to_le16(0x0100),
-	.wTotalLength =		cpu_to_le16(USB_DT_AC_HEADER_SIZE(1)),
+	.wTotalLength =		cpu_to_le16(UAC_DT_AC_HEADER_SIZE(1)),
 	.bInCollection =	1,
 	.baInterfaceNr = {
 		[0] =		GMIDI_MS_INTERFACE,
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index f75092aba02f..c3edfcb72c34 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -25,80 +25,77 @@
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
 
-/* A.5 Audio Class-Specific AC interface Descriptor Subtypes*/
-#define HEADER				0x01
-#define INPUT_TERMINAL			0x02
-#define OUTPUT_TERMINAL			0x03
-#define MIXER_UNIT			0x04
-#define SELECTOR_UNIT			0x05
-#define FEATURE_UNIT			0x06
-#define PROCESSING_UNIT			0x07
-#define EXTENSION_UNIT			0x08
-
-#define AS_GENERAL			0x01
-#define FORMAT_TYPE			0x02
-#define FORMAT_SPECIFIC			0x03
-
-#define EP_GENERAL			0x01
-
-#define MS_GENERAL			0x01
-#define MIDI_IN_JACK			0x02
-#define MIDI_OUT_JACK			0x03
-
-/* cs endpoint attributes */
-#define EP_CS_ATTR_SAMPLE_RATE		0x01
-#define EP_CS_ATTR_PITCH_CONTROL	0x02
-#define EP_CS_ATTR_FILL_MAX		0x80
-
-/* Audio Class specific Request Codes */
-#define USB_AUDIO_SET_INTF		0x21
-#define USB_AUDIO_SET_ENDPOINT		0x22
-#define USB_AUDIO_GET_INTF		0xa1
-#define USB_AUDIO_GET_ENDPOINT		0xa2
-
-#define SET_	0x00
-#define GET_	0x80
-
-#define _CUR	0x1
-#define _MIN	0x2
-#define _MAX	0x3
-#define _RES	0x4
-#define _MEM	0x5
-
-#define SET_CUR		(SET_ | _CUR)
-#define GET_CUR		(GET_ | _CUR)
-#define SET_MIN		(SET_ | _MIN)
-#define GET_MIN		(GET_ | _MIN)
-#define SET_MAX		(SET_ | _MAX)
-#define GET_MAX		(GET_ | _MAX)
-#define SET_RES		(SET_ | _RES)
-#define GET_RES		(GET_ | _RES)
-#define SET_MEM		(SET_ | _MEM)
-#define GET_MEM		(GET_ | _MEM)
-
-#define GET_STAT	0xff
-
-#define USB_AC_TERMINAL_UNDEFINED	0x100
-#define USB_AC_TERMINAL_STREAMING	0x101
-#define USB_AC_TERMINAL_VENDOR_SPEC	0x1FF
+/* A.5 Audio Class-Specific AC Interface Descriptor Subtypes */
+#define UAC_HEADER			0x01
+#define UAC_INPUT_TERMINAL		0x02
+#define UAC_OUTPUT_TERMINAL		0x03
+#define UAC_MIXER_UNIT			0x04
+#define UAC_SELECTOR_UNIT		0x05
+#define UAC_FEATURE_UNIT		0x06
+#define UAC_PROCESSING_UNIT		0x07
+#define UAC_EXTENSION_UNIT		0x08
+
+/* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
+#define UAC_AS_GENERAL			0x01
+#define UAC_FORMAT_TYPE			0x02
+#define UAC_FORMAT_SPECIFIC		0x03
+
+/* A.8 Audio Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_EP_GENERAL			0x01
+
+/* A.9 Audio Class-Specific Request Codes */
+#define UAC_SET_			0x00
+#define UAC_GET_			0x80
+
+#define UAC__CUR			0x1
+#define UAC__MIN			0x2
+#define UAC__MAX			0x3
+#define UAC__RES			0x4
+#define UAC__MEM			0x5
+
+#define UAC_SET_CUR			(UAC_SET_ | UAC__CUR)
+#define UAC_GET_CUR			(UAC_GET_ | UAC__CUR)
+#define UAC_SET_MIN			(UAC_SET_ | UAC__MIN)
+#define UAC_GET_MIN			(UAC_GET_ | UAC__MIN)
+#define UAC_SET_MAX			(UAC_SET_ | UAC__MAX)
+#define UAC_GET_MAX			(UAC_GET_ | UAC__MAX)
+#define UAC_SET_RES			(UAC_SET_ | UAC__RES)
+#define UAC_GET_RES			(UAC_GET_ | UAC__RES)
+#define UAC_SET_MEM			(UAC_SET_ | UAC__MEM)
+#define UAC_GET_MEM			(UAC_GET_ | UAC__MEM)
+
+#define UAC_GET_STAT			0xff
+
+/* MIDI - A.1 MS Class-Specific Interface Descriptor Subtypes */
+#define UAC_MS_HEADER			0x01
+#define UAC_MIDI_IN_JACK		0x02
+#define UAC_MIDI_OUT_JACK		0x03
+
+/* MIDI - A.1 MS Class-Specific Endpoint Descriptor Subtypes */
+#define UAC_MS_GENERAL			0x01
+
+/* Terminals - 2.1 USB Terminal Types */
+#define UAC_TERMINAL_UNDEFINED		0x100
+#define UAC_TERMINAL_STREAMING		0x101
+#define UAC_TERMINAL_VENDOR_SPEC	0x1FF
 
 /* Terminal Control Selectors */
 /* 4.3.2  Class-Specific AC Interface Descriptor */
-struct usb_ac_header_descriptor {
+struct uac_ac_header_descriptor {
 	__u8  bLength;			/* 8 + n */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
-	__u8  bDescriptorSubtype;	/* USB_MS_HEADER */
+	__u8  bDescriptorSubtype;	/* UAC_MS_HEADER */
 	__le16 bcdADC;			/* 0x0100 */
 	__le16 wTotalLength;		/* includes Unit and Terminal desc. */
 	__u8  bInCollection;		/* n */
 	__u8  baInterfaceNr[];		/* [n] */
 } __attribute__ ((packed));
 
-#define USB_DT_AC_HEADER_SIZE(n)	(8 + (n))
+#define UAC_DT_AC_HEADER_SIZE(n)	(8 + (n))
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_HEADER_DESCRIPTOR(n) 			\
-struct usb_ac_header_descriptor_##n {				\
+#define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n) 			\
+struct uac_ac_header_descriptor_##n {				\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -109,7 +106,7 @@ struct usb_ac_header_descriptor_##n {				\
 } __attribute__ ((packed))
 
 /* 4.3.2.1 Input Terminal Descriptor */
-struct usb_input_terminal_descriptor {
+struct uac_input_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 12 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* INPUT_TERMINAL descriptor subtype */
@@ -122,18 +119,19 @@ struct usb_input_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_INPUT_TERMINAL_SIZE			12
+#define UAC_DT_INPUT_TERMINAL_SIZE			12
 
-#define USB_AC_INPUT_TERMINAL_UNDEFINED			0x200
-#define USB_AC_INPUT_TERMINAL_MICROPHONE		0x201
-#define USB_AC_INPUT_TERMINAL_DESKTOP_MICROPHONE	0x202
-#define USB_AC_INPUT_TERMINAL_PERSONAL_MICROPHONE	0x203
-#define USB_AC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE	0x204
-#define USB_AC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
-#define USB_AC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
+/* Terminals - 2.2 Input Terminal Types */
+#define UAC_INPUT_TERMINAL_UNDEFINED			0x200
+#define UAC_INPUT_TERMINAL_MICROPHONE			0x201
+#define UAC_INPUT_TERMINAL_DESKTOP_MICROPHONE		0x202
+#define UAC_INPUT_TERMINAL_PERSONAL_MICROPHONE		0x203
+#define UAC_INPUT_TERMINAL_OMNI_DIR_MICROPHONE		0x204
+#define UAC_INPUT_TERMINAL_MICROPHONE_ARRAY		0x205
+#define UAC_INPUT_TERMINAL_PROC_MICROPHONE_ARRAY	0x206
 
 /* 4.3.2.2 Output Terminal Descriptor */
-struct usb_output_terminal_descriptor {
+struct uac_output_terminal_descriptor {
 	__u8  bLength;			/* in bytes: 9 */
 	__u8  bDescriptorType;		/* CS_INTERFACE descriptor type */
 	__u8  bDescriptorSubtype;	/* OUTPUT_TERMINAL descriptor subtype */
@@ -144,23 +142,24 @@ struct usb_output_terminal_descriptor {
 	__u8  iTerminal;
 } __attribute__ ((packed));
 
-#define USB_DT_AC_OUTPUT_TERMINAL_SIZE				9
+#define UAC_DT_OUTPUT_TERMINAL_SIZE			9
 
-#define USB_AC_OUTPUT_TERMINAL_UNDEFINED			0x300
-#define USB_AC_OUTPUT_TERMINAL_SPEAKER				0x301
-#define USB_AC_OUTPUT_TERMINAL_HEADPHONES			0x302
-#define USB_AC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
-#define USB_AC_OUTPUT_TERMINAL_DESKTOP_SPEAKER			0x304
-#define USB_AC_OUTPUT_TERMINAL_ROOM_SPEAKER			0x305
-#define USB_AC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER		0x306
-#define USB_AC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER		0x307
+/* Terminals - 2.3 Output Terminal Types */
+#define UAC_OUTPUT_TERMINAL_UNDEFINED			0x300
+#define UAC_OUTPUT_TERMINAL_SPEAKER			0x301
+#define UAC_OUTPUT_TERMINAL_HEADPHONES			0x302
+#define UAC_OUTPUT_TERMINAL_HEAD_MOUNTED_DISPLAY_AUDIO	0x303
+#define UAC_OUTPUT_TERMINAL_DESKTOP_SPEAKER		0x304
+#define UAC_OUTPUT_TERMINAL_ROOM_SPEAKER		0x305
+#define UAC_OUTPUT_TERMINAL_COMMUNICATION_SPEAKER	0x306
+#define UAC_OUTPUT_TERMINAL_LOW_FREQ_EFFECTS_SPEAKER	0x307
 
 /* Set bControlSize = 2 as default setting */
-#define USB_DT_AC_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
+#define UAC_DT_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_USB_AC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
-struct usb_ac_feature_unit_descriptor_##ch {			\
+#define DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
+struct uac_feature_unit_descriptor_##ch {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -172,7 +171,7 @@ struct usb_ac_feature_unit_descriptor_##ch {			\
 } __attribute__ ((packed))
 
 /* 4.5.2 Class-Specific AS Interface Descriptor */
-struct usb_as_header_descriptor {
+struct uac_as_header_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* AS_GENERAL */
@@ -181,16 +180,17 @@ struct usb_as_header_descriptor {
 	__le16 wFormatTag;		/* The Audio Data Format */
 } __attribute__ ((packed));
 
-#define USB_DT_AS_HEADER_SIZE		7
+#define UAC_DT_AS_HEADER_SIZE		7
 
-#define USB_AS_AUDIO_FORMAT_TYPE_I_UNDEFINED	0x0
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM		0x1
-#define USB_AS_AUDIO_FORMAT_TYPE_I_PCM8		0x2
-#define USB_AS_AUDIO_FORMAT_TYPE_I_IEEE_FLOAT	0x3
-#define USB_AS_AUDIO_FORMAT_TYPE_I_ALAW		0x4
-#define USB_AS_AUDIO_FORMAT_TYPE_I_MULAW	0x5
+/* Formats - A.1.1 Audio Data Format Type I Codes */
+#define UAC_FORMAT_TYPE_I_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I_PCM		0x1
+#define UAC_FORMAT_TYPE_I_PCM8		0x2
+#define UAC_FORMAT_TYPE_I_IEEE_FLOAT	0x3
+#define UAC_FORMAT_TYPE_I_ALAW		0x4
+#define UAC_FORMAT_TYPE_I_MULAW		0x5
 
-struct usb_as_format_type_i_continuous_descriptor {
+struct uac_format_type_i_continuous_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -203,9 +203,9 @@ struct usb_as_format_type_i_continuous_descriptor {
 	__u8  tUpperSamFreq[3];
 } __attribute__ ((packed));
 
-#define USB_AS_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
+#define UAC_FORMAT_TYPE_I_CONTINUOUS_DESC_SIZE	14
 
-struct usb_as_formate_type_i_discrete_descriptor {
+struct uac_format_type_i_discrete_descriptor {
 	__u8  bLength;			/* in bytes: 8 + (ns * 3) */
 	__u8  bDescriptorType;		/* USB_DT_CS_INTERFACE */
 	__u8  bDescriptorSubtype;	/* FORMAT_TYPE */
@@ -217,8 +217,8 @@ struct usb_as_formate_type_i_discrete_descriptor {
 	__u8  tSamFreq[][3];
 } __attribute__ ((packed));
 
-#define DECLARE_USB_AS_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
-struct usb_as_formate_type_i_discrete_descriptor_##n {		\
+#define DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
+struct uac_format_type_i_discrete_descriptor_##n {		\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
 	__u8  bDescriptorSubtype;				\
@@ -230,14 +230,15 @@ struct usb_as_formate_type_i_discrete_descriptor_##n {		\
 	__u8  tSamFreq[n][3];					\
 } __attribute__ ((packed))
 
-#define USB_AS_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
+#define UAC_FORMAT_TYPE_I_DISCRETE_DESC_SIZE(n)	(8 + (n * 3))
 
-#define USB_AS_FORMAT_TYPE_UNDEFINED	0x0
-#define USB_AS_FORMAT_TYPE_I		0x1
-#define USB_AS_FORMAT_TYPE_II		0x2
-#define USB_AS_FORMAT_TYPE_III		0x3
+/* Formats - A.2 Format Type Codes */
+#define UAC_FORMAT_TYPE_UNDEFINED	0x0
+#define UAC_FORMAT_TYPE_I		0x1
+#define UAC_FORMAT_TYPE_II		0x2
+#define UAC_FORMAT_TYPE_III		0x3
 
-struct usb_as_iso_endpoint_descriptor {
+struct uac_iso_endpoint_descriptor {
 	__u8  bLength;			/* in bytes: 7 */
 	__u8  bDescriptorType;		/* USB_DT_CS_ENDPOINT */
 	__u8  bDescriptorSubtype;	/* EP_GENERAL */
@@ -245,30 +246,35 @@ struct usb_as_iso_endpoint_descriptor {
 	__u8  bLockDelayUnits;
 	__le16 wLockDelay;
 };
-#define USB_AS_ISO_ENDPOINT_DESC_SIZE	7
-
-#define FU_CONTROL_UNDEFINED		0x00
-#define MUTE_CONTROL			0x01
-#define VOLUME_CONTROL			0x02
-#define BASS_CONTROL			0x03
-#define MID_CONTROL			0x04
-#define TREBLE_CONTROL			0x05
-#define GRAPHIC_EQUALIZER_CONTROL	0x06
-#define AUTOMATIC_GAIN_CONTROL		0x07
-#define DELAY_CONTROL			0x08
-#define BASS_BOOST_CONTROL		0x09
-#define LOUDNESS_CONTROL		0x0a
-
-#define FU_MUTE		(1 << (MUTE_CONTROL - 1))
-#define FU_VOLUME	(1 << (VOLUME_CONTROL - 1))
-#define FU_BASS		(1 << (BASS_CONTROL - 1))
-#define FU_MID		(1 << (MID_CONTROL - 1))
-#define FU_TREBLE	(1 << (TREBLE_CONTROL - 1))
-#define FU_GRAPHIC_EQ	(1 << (GRAPHIC_EQUALIZER_CONTROL - 1))
-#define FU_AUTO_GAIN	(1 << (AUTOMATIC_GAIN_CONTROL - 1))
-#define FU_DELAY	(1 << (DELAY_CONTROL - 1))
-#define FU_BASS_BOOST	(1 << (BASS_BOOST_CONTROL - 1))
-#define FU_LOUDNESS	(1 << (LOUDNESS_CONTROL - 1))
+#define UAC_ISO_ENDPOINT_DESC_SIZE	7
+
+#define UAC_EP_CS_ATTR_SAMPLE_RATE	0x01
+#define UAC_EP_CS_ATTR_PITCH_CONTROL	0x02
+#define UAC_EP_CS_ATTR_FILL_MAX		0x80
+
+/* A.10.2 Feature Unit Control Selectors */
+#define UAC_FU_CONTROL_UNDEFINED	0x00
+#define UAC_MUTE_CONTROL		0x01
+#define UAC_VOLUME_CONTROL		0x02
+#define UAC_BASS_CONTROL		0x03
+#define UAC_MID_CONTROL			0x04
+#define UAC_TREBLE_CONTROL		0x05
+#define UAC_GRAPHIC_EQUALIZER_CONTROL	0x06
+#define UAC_AUTOMATIC_GAIN_CONTROL	0x07
+#define UAC_DELAY_CONTROL		0x08
+#define UAC_BASS_BOOST_CONTROL		0x09
+#define UAC_LOUDNESS_CONTROL		0x0a
+
+#define UAC_FU_MUTE		(1 << (UAC_MUTE_CONTROL - 1))
+#define UAC_FU_VOLUME		(1 << (UAC_VOLUME_CONTROL - 1))
+#define UAC_FU_BASS		(1 << (UAC_BASS_CONTROL - 1))
+#define UAC_FU_MID		(1 << (UAC_MID_CONTROL - 1))
+#define UAC_FU_TREBLE		(1 << (UAC_TREBLE_CONTROL - 1))
+#define UAC_FU_GRAPHIC_EQ	(1 << (UAC_GRAPHIC_EQUALIZER_CONTROL - 1))
+#define UAC_FU_AUTO_GAIN	(1 << (UAC_AUTOMATIC_GAIN_CONTROL - 1))
+#define UAC_FU_DELAY		(1 << (UAC_DELAY_CONTROL - 1))
+#define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
+#define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
 struct usb_audio_control {
 	struct list_head list;
-- 
cgit v1.2.3


From b95cd7ec3e93bae199e820bd65b21b23e4538acc Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@skynet.be>
Date: Sun, 21 Jun 2009 23:21:55 +0200
Subject: USB audio gadget: Un-inline generic_[gs]et_cmd

Those functions are used only used to fill the set/get members of
usb_audio_control. It doesn't make much sense to inline them.

Signed-off-by: Laurent Pinchart <laurent.pinchart@skynet.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_audio.c | 15 +++++++++++++++
 include/linux/usb/audio.h    | 12 ------------
 2 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/gadget/f_audio.c b/drivers/usb/gadget/f_audio.c
index 7b05b3c8c0b1..98e9bb977291 100644
--- a/drivers/usb/gadget/f_audio.c
+++ b/drivers/usb/gadget/f_audio.c
@@ -28,6 +28,9 @@ static int audio_buf_size = 48000;
 module_param(audio_buf_size, int, S_IRUGO);
 MODULE_PARM_DESC(audio_buf_size, "Audio buffer size");
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value);
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd);
+
 /*
  * DESCRIPTORS ... most are static, but strings and full
  * configuration descriptors are built on demand.
@@ -632,6 +635,18 @@ f_audio_unbind(struct usb_configuration *c, struct usb_function *f)
 
 /*-------------------------------------------------------------------------*/
 
+static int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
+{
+	con->data[cmd] = value;
+
+	return 0;
+}
+
+static int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
+{
+	return con->data[cmd];
+}
+
 /* Todo: add more control selecotor dynamically */
 int __init control_selector_init(struct f_audio *audio)
 {
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index c3edfcb72c34..7b33c493917f 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -285,18 +285,6 @@ struct usb_audio_control {
 	int (*get)(struct usb_audio_control *con, u8 cmd);
 };
 
-static inline int generic_set_cmd(struct usb_audio_control *con, u8 cmd, int value)
-{
-	con->data[cmd] = value;
-
-	return 0;
-}
-
-static inline int generic_get_cmd(struct usb_audio_control *con, u8 cmd)
-{
-	return con->data[cmd];
-}
-
 struct usb_audio_control_selector {
 	struct list_head list;
 	struct list_head control;
-- 
cgit v1.2.3


From 7cbe5dca399a50ce8aa74314b1d276e2fb904e1b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 10:56:54 -0400
Subject: USB: add API for userspace drivers to "claim" ports

This patch (as1258) implements a feature that users have been asking
for: It gives programs the ability to "claim" a port on a hub, via a
new usbfs ioctl.  A device plugged into a "claimed" port will not be
touched by the kernel beyond the immediate necessities of
initialization and enumeration.

In particular, when a device is plugged into a "claimed" port, the
kernel will not select and install a configuration.  And when a config
is installed by usbfs or sysfs, the kernel will not probe any drivers
for any of the interfaces.  (However the kernel will fetch various
string descriptors during enumeration.  One could argue that this
isn't really necessary, but the strings are exported in sysfs.)

The patch does not guarantee exclusive access to these devices; it is
still possible for more than one program to open the device file
concurrently.  Programs are responsible for coordinating access among
themselves.

A demonstration program showing how to use the new interface can be
found in an attachment to

	http://marc.info/?l=linux-usb&m=124345857431452&w=2

The patch also makes a small simplification to the hub driver,
replacing a bunch of more-or-less useless variants of "out of memory"
with a single message.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c     | 35 ++++++++++++++++++
 drivers/usb/core/driver.c    |  3 ++
 drivers/usb/core/generic.c   |  4 +-
 drivers/usb/core/hub.c       | 88 +++++++++++++++++++++++++++++++++++++++++---
 drivers/usb/core/usb.h       |  7 ++++
 include/linux/usbdevice_fs.h |  2 +
 6 files changed, 133 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 4247eccf858c..165de5d59005 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -52,6 +52,7 @@
 
 #include "hcd.h"	/* for usbcore internals */
 #include "usb.h"
+#include "hub.h"
 
 #define USB_MAXBUS			64
 #define USB_DEVICE_MAX			USB_MAXBUS * 128
@@ -655,6 +656,7 @@ static int usbdev_release(struct inode *inode, struct file *file)
 	struct async *as;
 
 	usb_lock_device(dev);
+	usb_hub_release_all_ports(dev, ps);
 
 	/* Protect against simultaneous open */
 	mutex_lock(&usbfs_mutex);
@@ -1548,6 +1550,29 @@ static int proc_ioctl_compat(struct dev_state *ps, compat_uptr_t arg)
 }
 #endif
 
+static int proc_claim_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+	int rc;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	rc = usb_hub_claim_port(ps->dev, portnum, ps);
+	if (rc == 0)
+		snoop(&ps->dev->dev, "port %d claimed by process %d: %s\n",
+			portnum, task_pid_nr(current), current->comm);
+	return rc;
+}
+
+static int proc_release_port(struct dev_state *ps, void __user *arg)
+{
+	unsigned portnum;
+
+	if (get_user(portnum, (unsigned __user *) arg))
+		return -EFAULT;
+	return usb_hub_release_port(ps->dev, portnum, ps);
+}
+
 /*
  * NOTE:  All requests here that have interface numbers as parameters
  * are assuming that somehow the configuration has been prevented from
@@ -1689,6 +1714,16 @@ static int usbdev_ioctl(struct inode *inode, struct file *file,
 		snoop(&dev->dev, "%s: IOCTL\n", __func__);
 		ret = proc_ioctl_default(ps, p);
 		break;
+
+	case USBDEVFS_CLAIM_PORT:
+		snoop(&dev->dev, "%s: CLAIM_PORT\n", __func__);
+		ret = proc_claim_port(ps, p);
+		break;
+
+	case USBDEVFS_RELEASE_PORT:
+		snoop(&dev->dev, "%s: RELEASE_PORT\n", __func__);
+		ret = proc_release_port(ps, p);
+		break;
 	}
 	usb_unlock_device(dev);
 	if (ret >= 0)
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 69e5773abfce..1bad4e5a6abb 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -207,6 +207,9 @@ static int usb_probe_interface(struct device *dev)
 
 	intf->needs_binding = 0;
 
+	if (usb_device_is_owned(udev))
+		return -ENODEV;
+
 	if (udev->authorized == 0) {
 		dev_err(&intf->dev, "Device is not authorized for usage\n");
 		return -ENODEV;
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 30ecac3af15a..05e6d313961e 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -158,7 +158,9 @@ static int generic_probe(struct usb_device *udev)
 	/* Choose and set the configuration.  This registers the interfaces
 	 * with the driver core and lets interface drivers bind to them.
 	 */
-	if (udev->authorized == 0)
+	if (usb_device_is_owned(udev))
+		;		/* Don't configure if the device is owned */
+	else if (udev->authorized == 0)
 		dev_err(&udev->dev, "Device is not authorized for usage\n");
 	else {
 		c = usb_choose_configuration(udev);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 71f86c60d83c..cb54420ed583 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -78,6 +78,7 @@ struct usb_hub {
 	u8			indicator[USB_MAXCHILDREN];
 	struct delayed_work	leds;
 	struct delayed_work	init_work;
+	void			**port_owners;
 };
 
 
@@ -860,19 +861,17 @@ static int hub_configure(struct usb_hub *hub,
 	u16 wHubCharacteristics;
 	unsigned int pipe;
 	int maxp, ret;
-	char *message;
+	char *message = "out of memory";
 
 	hub->buffer = usb_buffer_alloc(hdev, sizeof(*hub->buffer), GFP_KERNEL,
 			&hub->buffer_dma);
 	if (!hub->buffer) {
-		message = "can't allocate hub irq buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	hub->status = kmalloc(sizeof(*hub->status), GFP_KERNEL);
 	if (!hub->status) {
-		message = "can't kmalloc hub status buffer";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -880,7 +879,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->descriptor = kmalloc(sizeof(*hub->descriptor), GFP_KERNEL);
 	if (!hub->descriptor) {
-		message = "can't kmalloc hub descriptor";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -904,6 +902,12 @@ static int hub_configure(struct usb_hub *hub,
 	dev_info (hub_dev, "%d port%s detected\n", hdev->maxchild,
 		(hdev->maxchild == 1) ? "" : "s");
 
+	hub->port_owners = kzalloc(hdev->maxchild * sizeof(void *), GFP_KERNEL);
+	if (!hub->port_owners) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
 	wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics);
 
 	if (wHubCharacteristics & HUB_CHAR_COMPOUND) {
@@ -1082,7 +1086,6 @@ static int hub_configure(struct usb_hub *hub,
 
 	hub->urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!hub->urb) {
-		message = "couldn't allocate interrupt urb";
 		ret = -ENOMEM;
 		goto fail;
 	}
@@ -1131,11 +1134,13 @@ static void hub_disconnect(struct usb_interface *intf)
 	hub_quiesce(hub, HUB_DISCONNECT);
 
 	usb_set_intfdata (intf, NULL);
+	hub->hdev->maxchild = 0;
 
 	if (hub->hdev->speed == USB_SPEED_HIGH)
 		highspeed_hubs--;
 
 	usb_free_urb(hub->urb);
+	kfree(hub->port_owners);
 	kfree(hub->descriptor);
 	kfree(hub->status);
 	usb_buffer_free(hub->hdev, sizeof(*hub->buffer), hub->buffer,
@@ -1250,6 +1255,79 @@ hub_ioctl(struct usb_interface *intf, unsigned int code, void *user_data)
 	}
 }
 
+/*
+ * Allow user programs to claim ports on a hub.  When a device is attached
+ * to one of these "claimed" ports, the program will "own" the device.
+ */
+static int find_port_owner(struct usb_device *hdev, unsigned port1,
+		void ***ppowner)
+{
+	if (hdev->state == USB_STATE_NOTATTACHED)
+		return -ENODEV;
+	if (port1 == 0 || port1 > hdev->maxchild)
+		return -EINVAL;
+
+	/* This assumes that devices not managed by the hub driver
+	 * will always have maxchild equal to 0.
+	 */
+	*ppowner = &(hdev_to_hub(hdev)->port_owners[port1 - 1]);
+	return 0;
+}
+
+/* In the following three functions, the caller must hold hdev's lock */
+int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner)
+		return -EBUSY;
+	*powner = owner;
+	return rc;
+}
+
+int usb_hub_release_port(struct usb_device *hdev, unsigned port1, void *owner)
+{
+	int rc;
+	void **powner;
+
+	rc = find_port_owner(hdev, port1, &powner);
+	if (rc)
+		return rc;
+	if (*powner != owner)
+		return -ENOENT;
+	*powner = NULL;
+	return rc;
+}
+
+void usb_hub_release_all_ports(struct usb_device *hdev, void *owner)
+{
+	int n;
+	void **powner;
+
+	n = find_port_owner(hdev, 1, &powner);
+	if (n == 0) {
+		for (; n < hdev->maxchild; (++n, ++powner)) {
+			if (*powner == owner)
+				*powner = NULL;
+		}
+	}
+}
+
+/* The caller must hold udev's lock */
+bool usb_device_is_owned(struct usb_device *udev)
+{
+	struct usb_hub *hub;
+
+	if (udev->state == USB_STATE_NOTATTACHED || !udev->parent)
+		return false;
+	hub = hdev_to_hub(udev->parent);
+	return !!hub->port_owners[udev->portnum - 1];
+}
+
 
 static void recursively_mark_NOTATTACHED(struct usb_device *udev)
 {
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index c0e0ae2bb8e7..9a8b15e6377a 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -37,6 +37,13 @@ extern int usb_match_device(struct usb_device *dev,
 extern void usb_forced_unbind_intf(struct usb_interface *intf);
 extern void usb_rebind_intf(struct usb_interface *intf);
 
+extern int usb_hub_claim_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern int usb_hub_release_port(struct usb_device *hdev, unsigned port,
+		void *owner);
+extern void usb_hub_release_all_ports(struct usb_device *hdev, void *owner);
+extern bool usb_device_is_owned(struct usb_device *udev);
+
 extern int  usb_hub_init(void);
 extern void usb_hub_cleanup(void);
 extern int usb_major_init(void);
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 0044d9b4cb85..00ceebeb9e5c 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -175,4 +175,6 @@ struct usbdevfs_ioctl32 {
 #define USBDEVFS_CLEAR_HALT        _IOR('U', 21, unsigned int)
 #define USBDEVFS_DISCONNECT        _IO('U', 22)
 #define USBDEVFS_CONNECT           _IO('U', 23)
+#define USBDEVFS_CLAIM_PORT        _IOR('U', 24, unsigned int)
+#define USBDEVFS_RELEASE_PORT      _IOR('U', 25, unsigned int)
 #endif /* _LINUX_USBDEVICE_FS_H */
-- 
cgit v1.2.3


From ccf5b801cef4f9e2d708d3b87e91e2bc6abd5206 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Mon, 29 Jun 2009 11:00:01 -0400
Subject: USB: make intf.pm_usage an atomic_t

This patch (as1260) changes the pm_usage_cnt field in struct
usb_interface from an int to an atomic_t.  This is so that drivers can
invoke the usb_autopm_get_interface_async() and
usb_autopm_put_interface_async() routines without locking and without
fear of corrupting the pm_usage_cnt value.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 38 ++++++++++++++++++++++----------------
 drivers/usb/core/hub.c    |  5 +++--
 include/linux/usb.h       |  6 +++---
 3 files changed, 28 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 1bad4e5a6abb..1c976c141f33 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -235,7 +235,7 @@ static int usb_probe_interface(struct device *dev)
 		/* The interface should always appear to be in use
 		 * unless the driver suports autosuspend.
 		 */
-		intf->pm_usage_cnt = !(driver->supports_autosuspend);
+		atomic_set(&intf->pm_usage_cnt, !driver->supports_autosuspend);
 
 		/* Carry out a deferred switch to altsetting 0 */
 		if (intf->needs_altsetting0) {
@@ -347,7 +347,7 @@ int usb_driver_claim_interface(struct usb_driver *driver,
 	usb_pm_lock(udev);
 	iface->condition = USB_INTERFACE_BOUND;
 	mark_active(iface);
-	iface->pm_usage_cnt = !(driver->supports_autosuspend);
+	atomic_set(&iface->pm_usage_cnt, !driver->supports_autosuspend);
 	usb_pm_unlock(udev);
 
 	/* if interface was already added, bind now; else let
@@ -1068,7 +1068,7 @@ static int autosuspend_check(struct usb_device *udev, int reschedule)
 			intf = udev->actconfig->interface[i];
 			if (!is_active(intf))
 				continue;
-			if (intf->pm_usage_cnt > 0)
+			if (atomic_read(&intf->pm_usage_cnt) > 0)
 				return -EBUSY;
 			if (intf->needs_remote_wakeup &&
 					!udev->do_remote_wakeup) {
@@ -1464,17 +1464,19 @@ static int usb_autopm_do_interface(struct usb_interface *intf,
 		status = -ENODEV;
 	else {
 		udev->auto_pm = 1;
-		intf->pm_usage_cnt += inc_usage_cnt;
+		atomic_add(inc_usage_cnt, &intf->pm_usage_cnt);
 		udev->last_busy = jiffies;
-		if (inc_usage_cnt >= 0 && intf->pm_usage_cnt > 0) {
+		if (inc_usage_cnt >= 0 &&
+				atomic_read(&intf->pm_usage_cnt) > 0) {
 			if (udev->state == USB_STATE_SUSPENDED)
 				status = usb_resume_both(udev,
 						PMSG_AUTO_RESUME);
 			if (status != 0)
-				intf->pm_usage_cnt -= inc_usage_cnt;
+				atomic_sub(inc_usage_cnt, &intf->pm_usage_cnt);
 			else
 				udev->last_busy = jiffies;
-		} else if (inc_usage_cnt <= 0 && intf->pm_usage_cnt <= 0) {
+		} else if (inc_usage_cnt <= 0 &&
+				atomic_read(&intf->pm_usage_cnt) <= 0) {
 			status = usb_suspend_both(udev, PMSG_AUTO_SUSPEND);
 		}
 	}
@@ -1519,7 +1521,7 @@ void usb_autopm_put_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, -1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface);
 
@@ -1547,10 +1549,10 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	} else {
 		udev->last_busy = jiffies;
-		--intf->pm_usage_cnt;
+		atomic_dec(&intf->pm_usage_cnt);
 		if (udev->autosuspend_disabled || udev->autosuspend_delay < 0)
 			status = -EPERM;
-		else if (intf->pm_usage_cnt <= 0 &&
+		else if (atomic_read(&intf->pm_usage_cnt) <= 0 &&
 				!timer_pending(&udev->autosuspend.timer)) {
 			queue_delayed_work(ksuspend_usb_wq, &udev->autosuspend,
 					round_jiffies_up_relative(
@@ -1558,7 +1560,7 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 		}
 	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 }
 EXPORT_SYMBOL_GPL(usb_autopm_put_interface_async);
 
@@ -1602,7 +1604,7 @@ int usb_autopm_get_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 1);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface);
@@ -1630,10 +1632,14 @@ int usb_autopm_get_interface_async(struct usb_interface *intf)
 		status = -ENODEV;
 	else if (udev->autoresume_disabled)
 		status = -EPERM;
-	else if (++intf->pm_usage_cnt > 0 && udev->state == USB_STATE_SUSPENDED)
-		queue_work(ksuspend_usb_wq, &udev->autoresume);
+	else {
+		atomic_inc(&intf->pm_usage_cnt);
+		if (atomic_read(&intf->pm_usage_cnt) > 0 &&
+				udev->state == USB_STATE_SUSPENDED)
+			queue_work(ksuspend_usb_wq, &udev->autoresume);
+	}
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_get_interface_async);
@@ -1655,7 +1661,7 @@ int usb_autopm_set_interface(struct usb_interface *intf)
 
 	status = usb_autopm_do_interface(intf, 0);
 	dev_vdbg(&intf->dev, "%s: status %d cnt %d\n",
-			__func__, status, intf->pm_usage_cnt);
+			__func__, status, atomic_read(&intf->pm_usage_cnt));
 	return status;
 }
 EXPORT_SYMBOL_GPL(usb_autopm_set_interface);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cb54420ed583..69e3a966a4b7 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -373,7 +373,7 @@ static void kick_khubd(struct usb_hub *hub)
 	unsigned long	flags;
 
 	/* Suppress autosuspend until khubd runs */
-	to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+	atomic_set(&to_usb_interface(hub->intfdev)->pm_usage_cnt, 1);
 
 	spin_lock_irqsave(&hub_event_lock, flags);
 	if (!hub->disconnected && list_empty(&hub->event_list)) {
@@ -678,7 +678,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type)
 					msecs_to_jiffies(delay));
 
 			/* Suppress autosuspend until init is done */
-			to_usb_interface(hub->intfdev)->pm_usage_cnt = 1;
+			atomic_set(&to_usb_interface(hub->intfdev)->
+					pm_usage_cnt, 1);
 			return;		/* Continues at init2: below */
 		} else {
 			hub_power_on(hub, true);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3b45a0d27b80..a34fa89f1474 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -195,7 +195,7 @@ struct usb_interface {
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-	int pm_usage_cnt;		/* usage counter for autosuspend */
+	atomic_t pm_usage_cnt;		/* usage counter for autosuspend */
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
@@ -551,13 +551,13 @@ extern void usb_autopm_put_interface_async(struct usb_interface *intf);
 
 static inline void usb_autopm_enable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 0;
+	atomic_set(&intf->pm_usage_cnt, 0);
 	usb_autopm_set_interface(intf);
 }
 
 static inline void usb_autopm_disable(struct usb_interface *intf)
 {
-	intf->pm_usage_cnt = 1;
+	atomic_set(&intf->pm_usage_cnt, 1);
 	usb_autopm_set_interface(intf);
 }
 
-- 
cgit v1.2.3


From 331ac6b288d9f3689514ced1878041fb0df7e13c Mon Sep 17 00:00:00 2001
From: Alek Du <alek.du@intel.com>
Date: Mon, 13 Jul 2009 12:41:20 +0800
Subject: USB: EHCI: Add Intel Moorestown EHCI controller HOSTPCx extensions
 and support phy low power mode

The Intel Moorestown EHCI controller supports non-standard HOSTPCx register
extension. This register controls the LPM behaviour and controls the behaviour
of each USB port.

Signed-off-by: Jacob Pan <jacob.jun.pan@intel.com>
Signed-off-by: Alek Du <alek.du@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/ehci-hcd.c  |  6 +++++
 drivers/usb/host/ehci-hub.c  | 62 ++++++++++++++++++++++++++++++++++++++++----
 drivers/usb/host/ehci.h      |  3 ++-
 include/linux/usb/ehci_def.h | 13 ++++++++++
 4 files changed, 78 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 7bee1638dfd7..5fbc67c5a913 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -249,6 +249,12 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	retval = handshake (ehci, &ehci->regs->command,
 			    CMD_RESET, 0, 250 * 1000);
 
+	if (ehci->has_hostpc) {
+		ehci_writel(ehci, USBMODE_EX_HC | USBMODE_EX_VBPS,
+			(u32 __iomem *)(((u8 *)ehci->regs) + USBMODE_EX));
+		ehci_writel(ehci, TXFIFO_DEFAULT,
+			(u32 __iomem *)(((u8 *)ehci->regs) + TXFILLTUNING));
+	}
 	if (retval)
 		return retval;
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index f46ad27c9a90..6fef1ee7d101 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -111,6 +111,7 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 	struct ehci_hcd		*ehci = hcd_to_ehci (hcd);
 	int			port;
 	int			mask;
+	u32 __iomem		*hostpc_reg = NULL;
 
 	ehci_dbg(ehci, "suspend root hub\n");
 
@@ -142,6 +143,9 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		u32		t1 = ehci_readl(ehci, reg) & ~PORT_RWC_BITS;
 		u32		t2 = t1;
 
+		if (ehci->has_hostpc)
+			hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * (port & 0xff));
 		/* keep track of which ports we suspend */
 		if (t1 & PORT_OWNER)
 			set_bit(port, &ehci->owned_ports);
@@ -151,15 +155,37 @@ static int ehci_bus_suspend (struct usb_hcd *hcd)
 		}
 
 		/* enable remote wakeup on all ports */
-		if (hcd->self.root_hub->do_remote_wakeup)
-			t2 |= PORT_WAKE_BITS;
-		else
+		if (hcd->self.root_hub->do_remote_wakeup) {
+			/* only enable appropriate wake bits, otherwise the
+			 * hardware can not go phy low power mode. If a race
+			 * condition happens here(connection change during bits
+			 * set), the port change detection will finally fix it.
+			 */
+			if (t1 & PORT_CONNECT) {
+				t2 |= PORT_WKOC_E | PORT_WKDISC_E;
+				t2 &= ~PORT_WKCONN_E;
+			} else {
+				t2 |= PORT_WKOC_E | PORT_WKCONN_E;
+				t2 &= ~PORT_WKDISC_E;
+			}
+		} else
 			t2 &= ~PORT_WAKE_BITS;
 
 		if (t1 != t2) {
 			ehci_vdbg (ehci, "port %d, %08x -> %08x\n",
 				port + 1, t1, t2);
 			ehci_writel(ehci, t2, reg);
+			if (hostpc_reg) {
+				u32	t3;
+
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, t3 | HOSTPC_PHCD, hostpc_reg);
+				t3 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					port, (t3 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 		}
 	}
 
@@ -563,7 +589,8 @@ static int ehci_hub_control (
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
 	u32 __iomem	*status_reg = &ehci->regs->port_status[
 				(wIndex & 0xff) - 1];
-	u32		temp, status;
+	u32 __iomem	*hostpc_reg = NULL;
+	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
@@ -575,6 +602,9 @@ static int ehci_hub_control (
 	 * power, "this is the one", etc.  EHCI spec supports this.
 	 */
 
+	if (ehci->has_hostpc)
+		hostpc_reg = (u32 __iomem *)((u8 *)ehci->regs
+				+ HOSTPC0 + 4 * ((wIndex & 0xff) - 1));
 	spin_lock_irqsave (&ehci->lock, flags);
 	switch (typeReq) {
 	case ClearHubFeature:
@@ -773,7 +803,11 @@ static int ehci_hub_control (
 		if (temp & PORT_CONNECT) {
 			status |= 1 << USB_PORT_FEAT_CONNECTION;
 			// status may be from integrated TT
-			status |= ehci_port_speed(ehci, temp);
+			if (ehci->has_hostpc) {
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				status |= ehci_port_speed(ehci, temp1);
+			} else
+				status |= ehci_port_speed(ehci, temp);
 		}
 		if (temp & PORT_PE)
 			status |= 1 << USB_PORT_FEAT_ENABLE;
@@ -832,6 +866,24 @@ static int ehci_hub_control (
 					|| (temp & PORT_RESET) != 0)
 				goto error;
 			ehci_writel(ehci, temp | PORT_SUSPEND, status_reg);
+			/* After above check the port must be connected.
+			 * Set appropriate bit thus could put phy into low power
+			 * mode if we have hostpc feature
+			 */
+			if (hostpc_reg) {
+				temp &= ~PORT_WKCONN_E;
+				temp |= (PORT_WKDISC_E | PORT_WKOC_E);
+				ehci_writel(ehci, temp | PORT_SUSPEND,
+							status_reg);
+				msleep(5);/* 5ms for HCD enter low pwr mode */
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_writel(ehci, temp1 | HOSTPC_PHCD,
+					hostpc_reg);
+				temp1 = ehci_readl(ehci, hostpc_reg);
+				ehci_dbg(ehci, "Port%d phy low pwr mode %s\n",
+					wIndex, (temp1 & HOSTPC_PHCD) ?
+					"succeeded" : "failed");
+			}
 			set_bit(wIndex, &ehci->suspended_ports);
 			break;
 		case USB_PORT_FEAT_POWER:
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index fa12f20fbfe2..ec3dba6b8e48 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -136,6 +136,7 @@ struct ehci_hcd {			/* one per controller */
 	#define OHCI_HCCTRL_OFFSET      0x4
 	#define OHCI_HCCTRL_LEN         0x4
 	__hc32			*ohci_hcctrl_reg;
+	unsigned		has_hostpc:1;
 
 	u8			sbrn;		/* packed release number */
 
@@ -548,7 +549,7 @@ static inline unsigned int
 ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 {
 	if (ehci_is_TDI(ehci)) {
-		switch ((portsc>>26)&3) {
+		switch ((portsc >> (ehci->has_hostpc ? 25 : 26)) & 3) {
 		case 0:
 			return 0;
 		case 1:
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 5b88e36c9103..4c4b701ff265 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -132,6 +132,19 @@ struct ehci_regs {
 #define USBMODE_CM_HC	(3<<0)		/* host controller mode */
 #define USBMODE_CM_IDLE	(0<<0)		/* idle state */
 
+/* Moorestown has some non-standard registers, partially due to the fact that
+ * its EHCI controller has both TT and LPM support. HOSTPCx are extentions to
+ * PORTSCx
+ */
+#define HOSTPC0		0x84		/* HOSTPC extension */
+#define HOSTPC_PHCD	(1<<22)		/* Phy clock disable */
+#define HOSTPC_PSPD	(3<<25)		/* Port speed detection */
+#define USBMODE_EX	0xc8		/* USB Device mode extension */
+#define USBMODE_EX_VBPS	(1<<5)		/* VBus Power Select On */
+#define USBMODE_EX_HC	(3<<0)		/* host controller mode */
+#define TXFILLTUNING	0x24		/* TX FIFO Tuning register */
+#define TXFIFO_DEFAULT	(8<<16)		/* FIFO burst threshold 8 */
+
 /* Appendix C, Debug port ... intended for use with special "debug devices"
  * that can help if there's no serial console.  (nonstandard enumeration.)
  */
-- 
cgit v1.2.3


From 9da69c604d87afea37b5411867bb76e3c624cc92 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Wed, 15 Jul 2009 23:22:54 -0400
Subject: USB: isp1760: allow platform devices to customize devflags

Platform device support was merged earlier, but support for boards to
customize the devflags aspect of the controller was not.  We want this on
Blackfin systems to control the bus width, but might as well expose all of
the fields while we're at it.

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/host/isp1760-hcd.c |  4 ++++
 drivers/usb/host/isp1760-hcd.h |  2 ++
 drivers/usb/host/isp1760-if.c  | 21 ++++++++++++++++++++-
 include/linux/usb/isp1760.h    | 18 ++++++++++++++++++
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/usb/isp1760.h

(limited to 'include')

diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index 15438469f21a..9600a58299db 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -386,6 +386,10 @@ static int isp1760_hc_setup(struct usb_hcd *hcd)
 		hwmode |= HW_DACK_POL_HIGH;
 	if (priv->devflags & ISP1760_FLAG_DREQ_POL_HIGH)
 		hwmode |= HW_DREQ_POL_HIGH;
+	if (priv->devflags & ISP1760_FLAG_INTR_POL_HIGH)
+		hwmode |= HW_INTR_HIGH_ACT;
+	if (priv->devflags & ISP1760_FLAG_INTR_EDGE_TRIG)
+		hwmode |= HW_INTR_EDGE_TRIG;
 
 	/*
 	 * We have to set this first in case we're in 16-bit mode.
diff --git a/drivers/usb/host/isp1760-hcd.h b/drivers/usb/host/isp1760-hcd.h
index 462f4943cb1b..6931ef5c9650 100644
--- a/drivers/usb/host/isp1760-hcd.h
+++ b/drivers/usb/host/isp1760-hcd.h
@@ -142,6 +142,8 @@ typedef void (packet_enqueue)(struct usb_hcd *hcd, struct isp1760_qh *qh,
 #define ISP1760_FLAG_DACK_POL_HIGH	0x00000010 /* DACK active high */
 #define ISP1760_FLAG_DREQ_POL_HIGH	0x00000020 /* DREQ active high */
 #define ISP1760_FLAG_ISP1761		0x00000040 /* Chip is ISP1761 */
+#define ISP1760_FLAG_INTR_POL_HIGH	0x00000080 /* Interrupt polarity active high */
+#define ISP1760_FLAG_INTR_EDGE_TRIG	0x00000100 /* Interrupt edge triggered */
 
 /* chip memory management */
 struct memory_chunk {
diff --git a/drivers/usb/host/isp1760-if.c b/drivers/usb/host/isp1760-if.c
index d4feebfc63bd..1c9f977a5c9c 100644
--- a/drivers/usb/host/isp1760-if.c
+++ b/drivers/usb/host/isp1760-if.c
@@ -3,6 +3,7 @@
  * Currently there is support for
  * - OpenFirmware
  * - PCI
+ * - PDEV (generic platform device centralized driver model)
  *
  * (c) 2007 Sebastian Siewior <bigeasy@linutronix.de>
  *
@@ -11,6 +12,7 @@
 #include <linux/usb.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/usb/isp1760.h>
 
 #include "../core/hcd.h"
 #include "isp1760-hcd.h"
@@ -308,6 +310,8 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	struct resource *mem_res;
 	struct resource *irq_res;
 	resource_size_t mem_size;
+	struct isp1760_platform_data *priv = pdev->dev.platform_data;
+	unsigned int devflags = 0;
 	unsigned long irqflags = IRQF_SHARED | IRQF_DISABLED;
 
 	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -330,8 +334,23 @@ static int __devinit isp1760_plat_probe(struct platform_device *pdev)
 	}
 	irqflags |= irq_res->flags & IRQF_TRIGGER_MASK;
 
+	if (priv) {
+		if (priv->is_isp1761)
+			devflags |= ISP1760_FLAG_ISP1761;
+		if (priv->bus_width_16)
+			devflags |= ISP1760_FLAG_BUS_WIDTH_16;
+		if (priv->port1_otg)
+			devflags |= ISP1760_FLAG_OTG_EN;
+		if (priv->analog_oc)
+			devflags |= ISP1760_FLAG_ANALOG_OC;
+		if (priv->dack_polarity_high)
+			devflags |= ISP1760_FLAG_DACK_POL_HIGH;
+		if (priv->dreq_polarity_high)
+			devflags |= ISP1760_FLAG_DREQ_POL_HIGH;
+	}
+
 	hcd = isp1760_register(mem_res->start, mem_size, irq_res->start,
-			       irqflags, &pdev->dev, dev_name(&pdev->dev), 0);
+			       irqflags, &pdev->dev, dev_name(&pdev->dev), devflags);
 	if (IS_ERR(hcd)) {
 		pr_warning("isp1760: Failed to register the HCD device\n");
 		ret = -ENODEV;
diff --git a/include/linux/usb/isp1760.h b/include/linux/usb/isp1760.h
new file mode 100644
index 000000000000..de7de53c5531
--- /dev/null
+++ b/include/linux/usb/isp1760.h
@@ -0,0 +1,18 @@
+/*
+ * board initialization should put one of these into dev->platform_data
+ * and place the isp1760 onto platform_bus named "isp1760-hcd".
+ */
+
+#ifndef __LINUX_USB_ISP1760_H
+#define __LINUX_USB_ISP1760_H
+
+struct isp1760_platform_data {
+	unsigned is_isp1761:1;			/* Chip is ISP1761 */
+	unsigned bus_width_16:1;		/* 16/32-bit data bus width */
+	unsigned port1_otg:1;			/* Port 1 supports OTG */
+	unsigned analog_oc:1;			/* Analog overcurrent */
+	unsigned dack_polarity_high:1;		/* DACK active high */
+	unsigned dreq_polarity_high:1;		/* DREQ active high */
+};
+
+#endif /* __LINUX_USB_ISP1760_H */
-- 
cgit v1.2.3


From a9d43091c5be1e7a60d5abe84be4f3050236b26a Mon Sep 17 00:00:00 2001
From: Lothar Wassmann <LW@KARO-electronics.de>
Date: Thu, 16 Jul 2009 20:51:21 -0400
Subject: USB: NXP ISP1362 USB host driver

Signed-off-by: Lothar Wassmann <LW@KARO-electronics.de>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/Makefile           |    1 +
 drivers/usb/host/Kconfig       |   12 +
 drivers/usb/host/Makefile      |    1 +
 drivers/usb/host/isp1362-hcd.c | 2905 ++++++++++++++++++++++++++++++++++++++++
 drivers/usb/host/isp1362.h     | 1079 +++++++++++++++
 include/linux/usb/isp1362.h    |   46 +
 6 files changed, 4044 insertions(+)
 create mode 100644 drivers/usb/host/isp1362-hcd.c
 create mode 100644 drivers/usb/host/isp1362.h
 create mode 100644 include/linux/usb/isp1362.h

(limited to 'include')

diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 19cb7d5480d7..dbe4fb694ad2 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_USB_UHCI_HCD)	+= host/
 obj-$(CONFIG_USB_FHCI_HCD)	+= host/
 obj-$(CONFIG_USB_XHCI_HCD)	+= host/
 obj-$(CONFIG_USB_SL811_HCD)	+= host/
+obj-$(CONFIG_USB_ISP1362_HCD)	+= host/
 obj-$(CONFIG_USB_U132_HCD)	+= host/
 obj-$(CONFIG_USB_R8A66597_HCD)	+= host/
 obj-$(CONFIG_USB_HWA_HCD)	+= host/
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 41b8d7de2e07..9b43b226817f 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -159,6 +159,18 @@ config USB_ISP1760_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called isp1760.
 
+config USB_ISP1362_HCD
+	tristate "ISP1362 HCD support"
+	depends on USB
+	default N
+	---help---
+	  Supports the Philips ISP1362 chip as a host controller
+
+	  This driver does not support isochronous transfers.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called isp1362-hcd.
+
 config USB_OHCI_HCD
 	tristate "OHCI HCD support"
 	depends on USB && USB_ARCH_HAS_OHCI
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 289d748bb414..f58b2494c44a 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_PCI)		+= pci-quirks.o
 obj-$(CONFIG_USB_EHCI_HCD)	+= ehci-hcd.o
 obj-$(CONFIG_USB_OXU210HP_HCD)	+= oxu210hp-hcd.o
 obj-$(CONFIG_USB_ISP116X_HCD)	+= isp116x-hcd.o
+obj-$(CONFIG_USB_ISP1362_HCD)	+= isp1362-hcd.o
 obj-$(CONFIG_USB_OHCI_HCD)	+= ohci-hcd.o
 obj-$(CONFIG_USB_UHCI_HCD)	+= uhci-hcd.o
 obj-$(CONFIG_USB_FHCI_HCD)	+= fhci.o
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
new file mode 100644
index 000000000000..5819e10a146c
--- /dev/null
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -0,0 +1,2905 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * Copyright (C) 2005 Lothar Wassmann <LW@KARO-electronics.de>
+ *
+ * Derived from the SL811 HCD, rewritten for ISP116x.
+ * Copyright (C) 2005 Olav Kongas <ok@artecdesign.ee>
+ *
+ * Portions:
+ * Copyright (C) 2004 Psion Teklogix (for NetBook PRO)
+ * Copyright (C) 2004 David Brownell
+ */
+
+/*
+ * The ISP1362 chip requires a large delay (300ns and 462ns) between
+ * accesses to the address and data register.
+ * The following timing options exist:
+ *
+ * 1. Configure your memory controller to add such delays if it can (the best)
+ * 2. Implement platform-specific delay function possibly
+ *    combined with configuring the memory controller; see
+ *    include/linux/usb_isp1362.h for more info.
+ * 3. Use ndelay (easiest, poorest).
+ *
+ * Use the corresponding macros USE_PLATFORM_DELAY and USE_NDELAY in the
+ * platform specific section of isp1362.h to select the appropriate variant.
+ *
+ * Also note that according to the Philips "ISP1362 Errata" document
+ * Rev 1.00 from 27 May data corruption may occur when the #WR signal
+ * is reasserted (even with #CS deasserted) within 132ns after a
+ * write cycle to any controller register. If the hardware doesn't
+ * implement the recommended fix (gating the #WR with #CS) software
+ * must ensure that no further write cycle (not necessarily to the chip!)
+ * is issued by the CPU within this interval.
+
+ * For PXA25x this can be ensured by using VLIO with the maximum
+ * recovery time (MSCx = 0x7f8c) with a memory clock of 99.53 MHz.
+ */
+
+#ifdef CONFIG_USB_DEBUG
+# define ISP1362_DEBUG
+#else
+# undef ISP1362_DEBUG
+#endif
+
+/*
+ * The PXA255 UDC apparently doesn't handle GET_STATUS, GET_CONFIG and
+ * GET_INTERFACE requests correctly when the SETUP and DATA stages of the
+ * requests are carried out in separate frames. This will delay any SETUP
+ * packets until the start of the next frame so that this situation is
+ * unlikely to occur (and makes usbtest happy running with a PXA255 target
+ * device).
+ */
+#undef BUGGY_PXA2XX_UDC_USBTEST
+
+#undef PTD_TRACE
+#undef URB_TRACE
+#undef VERBOSE
+#undef REGISTERS
+
+/* This enables a memory test on the ISP1362 chip memory to make sure the
+ * chip access timing is correct.
+ */
+#undef CHIP_BUFFER_TEST
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/interrupt.h>
+#include <linux/usb.h>
+#include <linux/usb/isp1362.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/io.h>
+#include <linux/bitops.h>
+
+#include <asm/irq.h>
+#include <asm/system.h>
+#include <asm/byteorder.h>
+#include <asm/unaligned.h>
+
+static int dbg_level;
+#ifdef ISP1362_DEBUG
+module_param(dbg_level, int, 0644);
+#else
+module_param(dbg_level, int, 0);
+#define	STUB_DEBUG_FILE
+#endif
+
+#include "../core/hcd.h"
+#include "../core/usb.h"
+#include "isp1362.h"
+
+
+#define DRIVER_VERSION	"2005-04-04"
+#define DRIVER_DESC	"ISP1362 USB Host Controller Driver"
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+static const char hcd_name[] = "isp1362-hcd";
+
+static void isp1362_hc_stop(struct usb_hcd *hcd);
+static int isp1362_hc_start(struct usb_hcd *hcd);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * When called from the interrupthandler only isp1362_hcd->irqenb is modified,
+ * since the interrupt handler will write isp1362_hcd->irqenb to HCuPINT upon
+ * completion.
+ * We don't need a 'disable' counterpart, since interrupts will be disabled
+ * only by the interrupt handler.
+ */
+static inline void isp1362_enable_int(struct isp1362_hcd *isp1362_hcd, u16 mask)
+{
+	if ((isp1362_hcd->irqenb | mask) == isp1362_hcd->irqenb)
+		return;
+	if (mask & ~isp1362_hcd->irqenb)
+		isp1362_write_reg16(isp1362_hcd, HCuPINT, mask & ~isp1362_hcd->irqenb);
+	isp1362_hcd->irqenb |= mask;
+	if (isp1362_hcd->irq_active)
+		return;
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline struct isp1362_ep_queue *get_ptd_queue(struct isp1362_hcd *isp1362_hcd,
+						     u16 offset)
+{
+	struct isp1362_ep_queue *epq = NULL;
+
+	if (offset < isp1362_hcd->istl_queue[1].buf_start)
+		epq = &isp1362_hcd->istl_queue[0];
+	else if (offset < isp1362_hcd->intl_queue.buf_start)
+		epq = &isp1362_hcd->istl_queue[1];
+	else if (offset < isp1362_hcd->atl_queue.buf_start)
+		epq = &isp1362_hcd->intl_queue;
+	else if (offset < isp1362_hcd->atl_queue.buf_start +
+		   isp1362_hcd->atl_queue.buf_size)
+		epq = &isp1362_hcd->atl_queue;
+
+	if (epq)
+		DBG(1, "%s: PTD $%04x is on %s queue\n", __func__, offset, epq->name);
+	else
+		pr_warning("%s: invalid PTD $%04x\n", __func__, offset);
+
+	return epq;
+}
+
+static inline int get_ptd_offset(struct isp1362_ep_queue *epq, u8 index)
+{
+	int offset;
+
+	if (index * epq->blk_size > epq->buf_size) {
+		pr_warning("%s: Bad %s index %d(%d)\n", __func__, epq->name, index,
+		     epq->buf_size / epq->blk_size);
+		return -EINVAL;
+	}
+	offset = epq->buf_start + index * epq->blk_size;
+	DBG(3, "%s: %s PTD[%02x] # %04x\n", __func__, epq->name, index, offset);
+
+	return offset;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static inline u16 max_transfer_size(struct isp1362_ep_queue *epq, size_t size,
+				    int mps)
+{
+	u16 xfer_size = min_t(size_t, MAX_XFER_SIZE, size);
+
+	xfer_size = min_t(size_t, xfer_size, epq->buf_avail * epq->blk_size - PTD_HEADER_SIZE);
+	if (xfer_size < size && xfer_size % mps)
+		xfer_size -= xfer_size % mps;
+
+	return xfer_size;
+}
+
+static int claim_ptd_buffers(struct isp1362_ep_queue *epq,
+			     struct isp1362_ep *ep, u16 len)
+{
+	int ptd_offset = -EINVAL;
+	int index;
+	int num_ptds = ((len + PTD_HEADER_SIZE - 1) / epq->blk_size) + 1;
+	int found = -1;
+	int last = -1;
+
+	BUG_ON(len > epq->buf_size);
+
+	if (!epq->buf_avail)
+		return -ENOMEM;
+
+	if (ep->num_ptds)
+		pr_err("%s: %s len %d/%d num_ptds %d buf_map %08lx skip_map %08lx\n", __func__,
+		    epq->name, len, epq->blk_size, num_ptds, epq->buf_map, epq->skip_map);
+	BUG_ON(ep->num_ptds != 0);
+
+	for (index = 0; index <= epq->buf_count - num_ptds; index++) {
+		if (test_bit(index, &epq->buf_map))
+			continue;
+		found = index;
+		for (last = index + 1; last < index + num_ptds; last++) {
+			if (test_bit(last, &epq->buf_map)) {
+				found = -1;
+				break;
+			}
+		}
+		if (found >= 0)
+			break;
+	}
+	if (found < 0)
+		return -EOVERFLOW;
+
+	DBG(1, "%s: Found %d PTDs[%d] for %d/%d byte\n", __func__,
+	    num_ptds, found, len, (int)(epq->blk_size - PTD_HEADER_SIZE));
+	ptd_offset = get_ptd_offset(epq, found);
+	WARN_ON(ptd_offset < 0);
+	ep->ptd_offset = ptd_offset;
+	ep->num_ptds += num_ptds;
+	epq->buf_avail -= num_ptds;
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	ep->ptd_index = found;
+	for (index = found; index < last; index++)
+		__set_bit(index, &epq->buf_map);
+	DBG(1, "%s: Done %s PTD[%d] $%04x, avail %d count %d claimed %d %08lx:%08lx\n",
+	    __func__, epq->name, ep->ptd_index, ep->ptd_offset,
+	    epq->buf_avail, epq->buf_count, num_ptds, epq->buf_map, epq->skip_map);
+
+	return found;
+}
+
+static inline void release_ptd_buffers(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	int index = ep->ptd_index;
+	int last = ep->ptd_index + ep->num_ptds;
+
+	if (last > epq->buf_count)
+		pr_err("%s: ep %p req %d len %d %s PTD[%d] $%04x num_ptds %d buf_count %d buf_avail %d buf_map %08lx skip_map %08lx\n",
+		    __func__, ep, ep->num_req, ep->length, epq->name, ep->ptd_index,
+		    ep->ptd_offset, ep->num_ptds, epq->buf_count, epq->buf_avail,
+		    epq->buf_map, epq->skip_map);
+	BUG_ON(last > epq->buf_count);
+
+	for (; index < last; index++) {
+		__clear_bit(index, &epq->buf_map);
+		__set_bit(index, &epq->skip_map);
+	}
+	epq->buf_avail += ep->num_ptds;
+	epq->ptd_count--;
+
+	BUG_ON(epq->buf_avail > epq->buf_count);
+	BUG_ON(epq->ptd_count > epq->buf_count);
+
+	DBG(1, "%s: Done %s PTDs $%04x released %d avail %d count %d\n",
+	    __func__, epq->name,
+	    ep->ptd_offset, ep->num_ptds, epq->buf_avail, epq->buf_count);
+	DBG(1, "%s: buf_map %08lx skip_map %08lx\n", __func__,
+	    epq->buf_map, epq->skip_map);
+
+	ep->num_ptds = 0;
+	ep->ptd_offset = -EINVAL;
+	ep->ptd_index = -EINVAL;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/*
+  Set up PTD's.
+*/
+static void prepare_ptd(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+			struct isp1362_ep *ep, struct isp1362_ep_queue *epq,
+			u16 fno)
+{
+	struct ptd *ptd;
+	int toggle;
+	int dir;
+	u16 len;
+	size_t buf_len = urb->transfer_buffer_length - urb->actual_length;
+
+	DBG(3, "%s: %s ep %p\n", __func__, epq->name, ep);
+
+	ptd = &ep->ptd;
+
+	ep->data = (unsigned char *)urb->transfer_buffer + urb->actual_length;
+
+	switch (ep->nextpid) {
+	case USB_PID_IN:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 0);
+		dir = PTD_DIR_IN;
+		if (usb_pipecontrol(urb->pipe)) {
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		} else if (usb_pipeisoc(urb->pipe)) {
+			len = min_t(size_t, urb->iso_frame_desc[fno].length, MAX_XFER_SIZE);
+			ep->data = urb->transfer_buffer + urb->iso_frame_desc[fno].offset;
+		} else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		DBG(1, "%s: IN    len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_OUT:
+		toggle = usb_gettoggle(urb->dev, ep->epnum, 1);
+		dir = PTD_DIR_OUT;
+		if (usb_pipecontrol(urb->pipe))
+			len = min_t(size_t, ep->maxpacket, buf_len);
+		else if (usb_pipeisoc(urb->pipe))
+			len = min_t(size_t, urb->iso_frame_desc[0].length, MAX_XFER_SIZE);
+		else
+			len = max_transfer_size(epq, buf_len, ep->maxpacket);
+		if (len == 0)
+			pr_info("%s: Sending ZERO packet: %d\n", __func__,
+			     urb->transfer_flags & URB_ZERO_PACKET);
+		DBG(1, "%s: OUT   len %d/%d/%d from URB\n", __func__, len, ep->maxpacket,
+		    (int)buf_len);
+		break;
+	case USB_PID_SETUP:
+		toggle = 0;
+		dir = PTD_DIR_SETUP;
+		len = sizeof(struct usb_ctrlrequest);
+		DBG(1, "%s: SETUP len %d\n", __func__, len);
+		ep->data = urb->setup_packet;
+		break;
+	case USB_PID_ACK:
+		toggle = 1;
+		len = 0;
+		dir = (urb->transfer_buffer_length && usb_pipein(urb->pipe)) ?
+			PTD_DIR_OUT : PTD_DIR_IN;
+		DBG(1, "%s: ACK   len %d\n", __func__, len);
+		break;
+	default:
+		toggle = dir = len = 0;
+		pr_err("%s@%d: ep->nextpid %02x\n", __func__, __LINE__, ep->nextpid);
+		BUG_ON(1);
+	}
+
+	ep->length = len;
+	if (!len)
+		ep->data = NULL;
+
+	ptd->count = PTD_CC_MSK | PTD_ACTIVE_MSK | PTD_TOGGLE(toggle);
+	ptd->mps = PTD_MPS(ep->maxpacket) | PTD_SPD(urb->dev->speed == USB_SPEED_LOW) |
+		PTD_EP(ep->epnum);
+	ptd->len = PTD_LEN(len) | PTD_DIR(dir);
+	ptd->faddr = PTD_FA(usb_pipedevice(urb->pipe));
+
+	if (usb_pipeint(urb->pipe)) {
+		ptd->faddr |= PTD_SF_INT(ep->branch);
+		ptd->faddr |= PTD_PR(ep->interval ? __ffs(ep->interval) : 0);
+	}
+	if (usb_pipeisoc(urb->pipe))
+		ptd->faddr |= PTD_SF_ISO(fno);
+
+	DBG(1, "%s: Finished\n", __func__);
+}
+
+static void isp1362_write_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			      struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int len = PTD_GET_DIR(ptd) == PTD_DIR_IN ? 0 : ep->length;
+
+	_BUG_ON(ep->ptd_offset < 0);
+
+	prefetch(ptd);
+	isp1362_write_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	if (len)
+		isp1362_write_buffer(isp1362_hcd, ep->data,
+				     ep->ptd_offset + PTD_HEADER_SIZE, len);
+
+	dump_ptd(ptd);
+	dump_ptd_out_data(ptd, ep->data);
+}
+
+static void isp1362_read_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			     struct isp1362_ep_queue *epq)
+{
+	struct ptd *ptd = &ep->ptd;
+	int act_len;
+
+	WARN_ON(list_empty(&ep->active));
+	BUG_ON(ep->ptd_offset < 0);
+
+	list_del_init(&ep->active);
+	DBG(1, "%s: ep %p removed from active list %p\n", __func__, ep, &epq->active);
+
+	prefetchw(ptd);
+	isp1362_read_buffer(isp1362_hcd, ptd, ep->ptd_offset, PTD_HEADER_SIZE);
+	dump_ptd(ptd);
+	act_len = PTD_GET_COUNT(ptd);
+	if (PTD_GET_DIR(ptd) != PTD_DIR_IN || act_len == 0)
+		return;
+	if (act_len > ep->length)
+		pr_err("%s: ep %p PTD $%04x act_len %d ep->length %d\n", __func__, ep,
+			 ep->ptd_offset, act_len, ep->length);
+	BUG_ON(act_len > ep->length);
+	/* Only transfer the amount of data that has actually been overwritten
+	 * in the chip buffer. We don't want any data that doesn't belong to the
+	 * transfer to leak out of the chip to the callers transfer buffer!
+	 */
+	prefetchw(ep->data);
+	isp1362_read_buffer(isp1362_hcd, ep->data,
+			    ep->ptd_offset + PTD_HEADER_SIZE, act_len);
+	dump_ptd_in_data(ptd, ep->data);
+}
+
+/*
+ * INT PTDs will stay in the chip until data is available.
+ * This function will remove a PTD from the chip when the URB is dequeued.
+ * Must be called with the spinlock held and IRQs disabled
+ */
+static void remove_ptd(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+
+{
+	int index;
+	struct isp1362_ep_queue *epq;
+
+	DBG(1, "%s: ep %p PTD[%d] $%04x\n", __func__, ep, ep->ptd_index, ep->ptd_offset);
+	BUG_ON(ep->ptd_offset < 0);
+
+	epq = get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+	BUG_ON(!epq);
+
+	/* put ep in remove_list for cleanup */
+	WARN_ON(!list_empty(&ep->remove_list));
+	list_add_tail(&ep->remove_list, &isp1362_hcd->remove_list);
+	/* let SOF interrupt handle the cleanup */
+	isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+
+	index = ep->ptd_index;
+	if (index < 0)
+		/* ISO queues don't have SKIP registers */
+		return;
+
+	DBG(1, "%s: Disabling PTD[%02x] $%04x %08lx|%08x\n", __func__,
+	    index, ep->ptd_offset, epq->skip_map, 1 << index);
+
+	/* prevent further processing of PTD (will be effective after next SOF) */
+	epq->skip_map |= 1 << index;
+	if (epq == &isp1362_hcd->atl_queue) {
+		DBG(2, "%s: ATLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCATLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else if (epq == &isp1362_hcd->intl_queue) {
+		DBG(2, "%s: INTLSKIP = %08x -> %08lx\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTLSKIP), epq->skip_map);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, epq->skip_map);
+		if (~epq->skip_map == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	}
+}
+
+/*
+  Take done or failed requests out of schedule. Give back
+  processed urbs.
+*/
+static void finish_request(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep,
+			   struct urb *urb, int status)
+     __releases(isp1362_hcd->lock)
+     __acquires(isp1362_hcd->lock)
+{
+	urb->hcpriv = NULL;
+	ep->error_count = 0;
+
+	if (usb_pipecontrol(urb->pipe))
+		ep->nextpid = USB_PID_SETUP;
+
+	URB_DBG("%s: req %d FA %d ep%d%s %s: len %d/%d %s stat %d\n", __func__,
+		ep->num_req, usb_pipedevice(urb->pipe),
+		usb_pipeendpoint(urb->pipe),
+		!usb_pipein(urb->pipe) ? "out" : "in",
+		usb_pipecontrol(urb->pipe) ? "ctrl" :
+			usb_pipeint(urb->pipe) ? "int" :
+			usb_pipebulk(urb->pipe) ? "bulk" :
+			"iso",
+		urb->actual_length, urb->transfer_buffer_length,
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "", urb->status);
+
+
+	usb_hcd_unlink_urb_from_ep(isp1362_hcd_to_hcd(isp1362_hcd), urb);
+	spin_unlock(&isp1362_hcd->lock);
+	usb_hcd_giveback_urb(isp1362_hcd_to_hcd(isp1362_hcd), urb, status);
+	spin_lock(&isp1362_hcd->lock);
+
+	/* take idle endpoints out of the schedule right away */
+	if (!list_empty(&ep->hep->urb_list))
+		return;
+
+	/* async deschedule */
+	if (!list_empty(&ep->schedule)) {
+		list_del_init(&ep->schedule);
+		return;
+	}
+
+
+	if (ep->interval) {
+		/* periodic deschedule */
+		DBG(1, "deschedule qh%d/%p branch %d load %d bandwidth %d -> %d\n", ep->interval,
+		    ep, ep->branch, ep->load,
+		    isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] - ep->load);
+		isp1362_hcd->load[ep->branch] -= ep->load;
+		ep->branch = PERIODIC_SIZE;
+	}
+}
+
+/*
+ * Analyze transfer results, handle partial transfers and errors
+*/
+static void postproc_ep(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep *ep)
+{
+	struct urb *urb = get_urb(ep);
+	struct usb_device *udev;
+	struct ptd *ptd;
+	int short_ok;
+	u16 len;
+	int urbstat = -EINPROGRESS;
+	u8 cc;
+
+	DBG(2, "%s: ep %p req %d\n", __func__, ep, ep->num_req);
+
+	udev = urb->dev;
+	ptd = &ep->ptd;
+	cc = PTD_GET_CC(ptd);
+	if (cc == PTD_NOTACCESSED) {
+		pr_err("%s: req %d PTD %p Untouched by ISP1362\n", __func__,
+		    ep->num_req, ptd);
+		cc = PTD_DEVNOTRESP;
+	}
+
+	short_ok = !(urb->transfer_flags & URB_SHORT_NOT_OK);
+	len = urb->transfer_buffer_length - urb->actual_length;
+
+	/* Data underrun is special. For allowed underrun
+	   we clear the error and continue as normal. For
+	   forbidden underrun we finish the DATA stage
+	   immediately while for control transfer,
+	   we do a STATUS stage.
+	*/
+	if (cc == PTD_DATAUNDERRUN) {
+		if (short_ok) {
+			DBG(1, "%s: req %d Allowed data underrun short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req, short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			cc = PTD_CC_NOERROR;
+			urbstat = 0;
+		} else {
+			DBG(1, "%s: req %d Data Underrun %s nextpid %02x short_%sok %d/%d/%d byte\n",
+			    __func__, ep->num_req,
+			    usb_pipein(urb->pipe) ? "IN" : "OUT", ep->nextpid,
+			    short_ok ? "" : "not_",
+			    PTD_GET_COUNT(ptd), ep->maxpacket, len);
+			if (usb_pipecontrol(urb->pipe)) {
+				ep->nextpid = USB_PID_ACK;
+				/* save the data underrun error code for later and
+				 * procede with the status stage
+				 */
+				urb->actual_length += PTD_GET_COUNT(ptd);
+				BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+
+				if (urb->status == -EINPROGRESS)
+					urb->status = cc_to_error[PTD_DATAUNDERRUN];
+			} else {
+				usb_settoggle(udev, ep->epnum, ep->nextpid == USB_PID_OUT,
+					      PTD_GET_TOGGLE(ptd));
+				urbstat = cc_to_error[PTD_DATAUNDERRUN];
+			}
+			goto out;
+		}
+	}
+
+	if (cc != PTD_CC_NOERROR) {
+		if (++ep->error_count >= 3 || cc == PTD_CC_STALL || cc == PTD_DATAOVERRUN) {
+			urbstat = cc_to_error[cc];
+			DBG(1, "%s: req %d nextpid %02x, status %d, error %d, error_count %d\n",
+			    __func__, ep->num_req, ep->nextpid, urbstat, cc,
+			    ep->error_count);
+		}
+		goto out;
+	}
+
+	switch (ep->nextpid) {
+	case USB_PID_OUT:
+		if (PTD_GET_COUNT(ptd) != ep->length)
+			pr_err("%s: count=%d len=%d\n", __func__,
+			   PTD_GET_COUNT(ptd), ep->length);
+		BUG_ON(PTD_GET_COUNT(ptd) != ep->length);
+		urb->actual_length += ep->length;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 1, PTD_GET_TOGGLE(ptd));
+		if (urb->actual_length == urb->transfer_buffer_length) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				if (len % ep->maxpacket ||
+				    !(urb->transfer_flags & URB_ZERO_PACKET)) {
+					urbstat = 0;
+					DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+					    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+					    urbstat, len, ep->maxpacket, urb->actual_length);
+				}
+			}
+		}
+		break;
+	case USB_PID_IN:
+		len = PTD_GET_COUNT(ptd);
+		BUG_ON(len > ep->length);
+		urb->actual_length += len;
+		BUG_ON(urb->actual_length > urb->transfer_buffer_length);
+		usb_settoggle(udev, ep->epnum, 0, PTD_GET_TOGGLE(ptd));
+		/* if transfer completed or (allowed) data underrun */
+		if ((urb->transfer_buffer_length == urb->actual_length) ||
+		    len % ep->maxpacket) {
+			DBG(3, "%s: req %d xfer complete %d/%d status %d -> 0\n", __func__,
+			    ep->num_req, len, ep->maxpacket, urbstat);
+			if (usb_pipecontrol(urb->pipe)) {
+				DBG(3, "%s: req %d %s Wait for ACK\n", __func__,
+				    ep->num_req,
+				    usb_pipein(urb->pipe) ? "IN" : "OUT");
+				ep->nextpid = USB_PID_ACK;
+			} else {
+				urbstat = 0;
+				DBG(3, "%s: req %d URB %s status %d count %d/%d/%d\n",
+				    __func__, ep->num_req, usb_pipein(urb->pipe) ? "IN" : "OUT",
+				    urbstat, len, ep->maxpacket, urb->actual_length);
+			}
+		}
+		break;
+	case USB_PID_SETUP:
+		if (urb->transfer_buffer_length == urb->actual_length) {
+			ep->nextpid = USB_PID_ACK;
+		} else if (usb_pipeout(urb->pipe)) {
+			usb_settoggle(udev, 0, 1, 1);
+			ep->nextpid = USB_PID_OUT;
+		} else {
+			usb_settoggle(udev, 0, 0, 1);
+			ep->nextpid = USB_PID_IN;
+		}
+		break;
+	case USB_PID_ACK:
+		DBG(3, "%s: req %d got ACK %d -> 0\n", __func__, ep->num_req,
+		    urbstat);
+		WARN_ON(urbstat != -EINPROGRESS);
+		urbstat = 0;
+		ep->nextpid = 0;
+		break;
+	default:
+		BUG_ON(1);
+	}
+
+ out:
+	if (urbstat != -EINPROGRESS) {
+		DBG(2, "%s: Finishing ep %p req %d urb %p status %d\n", __func__,
+		    ep, ep->num_req, urb, urbstat);
+		finish_request(isp1362_hcd, ep, urb, urbstat);
+	}
+}
+
+static void finish_unlinks(struct isp1362_hcd *isp1362_hcd)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->remove_list, remove_list) {
+		struct isp1362_ep_queue *epq =
+			get_ptd_queue(isp1362_hcd, ep->ptd_offset);
+		int index = ep->ptd_index;
+
+		BUG_ON(epq == NULL);
+		if (index >= 0) {
+			DBG(1, "%s: remove PTD[%d] $%04x\n", __func__, index, ep->ptd_offset);
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+		}
+		if (!list_empty(&ep->hep->urb_list)) {
+			struct urb *urb = get_urb(ep);
+
+			DBG(1, "%s: Finishing req %d ep %p from remove_list\n", __func__,
+			    ep->num_req, ep);
+			finish_request(isp1362_hcd, ep, urb, -ESHUTDOWN);
+		}
+		WARN_ON(list_empty(&ep->active));
+		if (!list_empty(&ep->active)) {
+			list_del_init(&ep->active);
+			DBG(1, "%s: ep %p removed from active list\n", __func__, ep);
+		}
+		list_del_init(&ep->remove_list);
+		DBG(1, "%s: ep %p removed from remove_list\n", __func__, ep);
+	}
+	DBG(1, "%s: Done\n", __func__);
+}
+
+static inline void enable_atl_transfers(struct isp1362_hcd *isp1362_hcd, int count)
+{
+	if (count > 0) {
+		if (count < isp1362_hcd->atl_queue.ptd_count)
+			isp1362_write_reg16(isp1362_hcd, HCATLDTC, count);
+		isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, isp1362_hcd->atl_queue.skip_map);
+		isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+	} else
+		isp1362_enable_int(isp1362_hcd, HCuPINT_SOF);
+}
+
+static inline void enable_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_enable_int(isp1362_hcd, HCuPINT_INTL);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, isp1362_hcd->intl_queue.skip_map);
+}
+
+static inline void enable_istl_transfers(struct isp1362_hcd *isp1362_hcd, int flip)
+{
+	isp1362_enable_int(isp1362_hcd, flip ? HCuPINT_ISTL1 : HCuPINT_ISTL0);
+	isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, flip ?
+			   HCBUFSTAT_ISTL1_FULL : HCBUFSTAT_ISTL0_FULL);
+}
+
+static int submit_req(struct isp1362_hcd *isp1362_hcd, struct urb *urb,
+		      struct isp1362_ep *ep, struct isp1362_ep_queue *epq)
+{
+	int index = epq->free_ptd;
+
+	prepare_ptd(isp1362_hcd, urb, ep, epq, 0);
+	index = claim_ptd_buffers(epq, ep, ep->length);
+	if (index == -ENOMEM) {
+		DBG(1, "%s: req %d No free %s PTD available: %d, %08lx:%08lx\n", __func__,
+		    ep->num_req, epq->name, ep->num_ptds, epq->buf_map, epq->skip_map);
+		return index;
+	} else if (index == -EOVERFLOW) {
+		DBG(1, "%s: req %d Not enough space for %d byte %s PTD %d %08lx:%08lx\n",
+		    __func__, ep->num_req, ep->length, epq->name, ep->num_ptds,
+		    epq->buf_map, epq->skip_map);
+		return index;
+	} else
+		BUG_ON(index < 0);
+	list_add_tail(&ep->active, &epq->active);
+	DBG(1, "%s: ep %p req %d len %d added to active list %p\n", __func__,
+	    ep, ep->num_req, ep->length, &epq->active);
+	DBG(1, "%s: Submitting %s PTD $%04x for ep %p req %d\n", __func__, epq->name,
+	    ep->ptd_offset, ep, ep->num_req);
+	isp1362_write_ptd(isp1362_hcd, ep, epq);
+	__clear_bit(ep->ptd_index, &epq->skip_map);
+
+	return 0;
+}
+
+static void start_atl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->atl_queue;
+	struct isp1362_ep *ep;
+	int defer = 0;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(2, "%s: Skipping active %s ep %p\n", __func__, epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__, epq->name,
+		    ep, ep->num_req);
+
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM) {
+			defer = 1;
+			break;
+		} else if (ret == -EOVERFLOW) {
+			defer = 1;
+			continue;
+		}
+#ifdef BUGGY_PXA2XX_UDC_USBTEST
+		defer = ep->nextpid == USB_PID_SETUP;
+#endif
+		ptd_count++;
+	}
+
+	/* Avoid starving of endpoints */
+	if (isp1362_hcd->async.next != isp1362_hcd->async.prev) {
+		DBG(2, "%s: Cycling ASYNC schedule %d\n", __func__, ptd_count);
+		list_move(&isp1362_hcd->async, isp1362_hcd->async.next);
+	}
+	if (ptd_count || defer)
+		enable_atl_transfers(isp1362_hcd, defer ? 0 : ptd_count);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds) {
+		epq->stat_maxptds = epq->ptd_count;
+		DBG(0, "%s: max_ptds: %d\n", __func__, epq->stat_maxptds);
+	}
+}
+
+static void start_intl_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	struct isp1362_ep_queue *epq = &isp1362_hcd->intl_queue;
+	struct isp1362_ep *ep;
+
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		struct urb *urb = get_urb(ep);
+		int ret;
+
+		if (!list_empty(&ep->active)) {
+			DBG(1, "%s: Skipping active %s ep %p\n", __func__,
+			    epq->name, ep);
+			continue;
+		}
+
+		DBG(1, "%s: Processing %s ep %p req %d\n", __func__,
+		    epq->name, ep, ep->num_req);
+		ret = submit_req(isp1362_hcd, urb, ep, epq);
+		if (ret == -ENOMEM)
+			break;
+		else if (ret == -EOVERFLOW)
+			continue;
+		ptd_count++;
+	}
+
+	if (ptd_count) {
+		static int last_count;
+
+		if (ptd_count != last_count) {
+			DBG(0, "%s: ptd_count: %d\n", __func__, ptd_count);
+			last_count = ptd_count;
+		}
+		enable_intl_transfers(isp1362_hcd);
+	}
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+}
+
+static inline int next_ptd(struct isp1362_ep_queue *epq, struct isp1362_ep *ep)
+{
+	u16 ptd_offset = ep->ptd_offset;
+	int num_ptds = (ep->length + PTD_HEADER_SIZE + (epq->blk_size - 1)) / epq->blk_size;
+
+	DBG(2, "%s: PTD offset $%04x + %04x => %d * %04x -> $%04x\n", __func__, ptd_offset,
+	    ep->length, num_ptds, epq->blk_size, ptd_offset + num_ptds * epq->blk_size);
+
+	ptd_offset += num_ptds * epq->blk_size;
+	if (ptd_offset < epq->buf_start + epq->buf_size)
+		return ptd_offset;
+	else
+		return -ENOMEM;
+}
+
+static void start_iso_transfers(struct isp1362_hcd *isp1362_hcd)
+{
+	int ptd_count = 0;
+	int flip = isp1362_hcd->istl_flip;
+	struct isp1362_ep_queue *epq;
+	int ptd_offset;
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+	u16 fno = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+
+ fill2:
+	epq = &isp1362_hcd->istl_queue[flip];
+	if (atomic_read(&epq->finishing)) {
+		DBG(1, "%s: finish_transfers is active for %s\n", __func__, epq->name);
+		return;
+	}
+
+	if (!list_empty(&epq->active))
+		return;
+
+	ptd_offset = epq->buf_start;
+	list_for_each_entry_safe(ep, tmp, &isp1362_hcd->isoc, schedule) {
+		struct urb *urb = get_urb(ep);
+		s16 diff = fno - (u16)urb->start_frame;
+
+		DBG(1, "%s: Processing %s ep %p\n", __func__, epq->name, ep);
+
+		if (diff > urb->number_of_packets) {
+			/* time frame for this URB has elapsed */
+			finish_request(isp1362_hcd, ep, urb, -EOVERFLOW);
+			continue;
+		} else if (diff < -1) {
+			/* URB is not due in this frame or the next one.
+			 * Comparing with '-1' instead of '0' accounts for double
+			 * buffering in the ISP1362 which enables us to queue the PTD
+			 * one frame ahead of time
+			 */
+		} else if (diff == -1) {
+			/* submit PTD's that are due in the next frame */
+			prepare_ptd(isp1362_hcd, urb, ep, epq, fno);
+			if (ptd_offset + PTD_HEADER_SIZE + ep->length >
+			    epq->buf_start + epq->buf_size) {
+				pr_err("%s: Not enough ISO buffer space for %d byte PTD\n",
+				    __func__, ep->length);
+				continue;
+			}
+			ep->ptd_offset = ptd_offset;
+			list_add_tail(&ep->active, &epq->active);
+
+			ptd_offset = next_ptd(epq, ep);
+			if (ptd_offset < 0) {
+				pr_warning("%s: req %d No more %s PTD buffers available\n", __func__,
+				     ep->num_req, epq->name);
+				break;
+			}
+		}
+	}
+	list_for_each_entry(ep, &epq->active, active) {
+		if (epq->active.next == &ep->active)
+			ep->ptd.mps |= PTD_LAST_MSK;
+		isp1362_write_ptd(isp1362_hcd, ep, epq);
+		ptd_count++;
+	}
+
+	if (ptd_count)
+		enable_istl_transfers(isp1362_hcd, flip);
+
+	epq->ptd_count += ptd_count;
+	if (epq->ptd_count > epq->stat_maxptds)
+		epq->stat_maxptds = epq->ptd_count;
+
+	/* check, whether the second ISTL buffer may also be filled */
+	if (!(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) &
+	      (flip ? HCBUFSTAT_ISTL0_FULL : HCBUFSTAT_ISTL1_FULL))) {
+		fno++;
+		ptd_count = 0;
+		flip = 1 - flip;
+		goto fill2;
+	}
+}
+
+static void finish_transfers(struct isp1362_hcd *isp1362_hcd, unsigned long done_map,
+			     struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers %08lx\n", __func__, epq->name, done_map);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		int index = ep->ptd_index;
+
+		DBG(1, "%s: Checking %s PTD[%02x] $%04x\n", __func__, epq->name,
+		    index, ep->ptd_offset);
+
+		BUG_ON(index < 0);
+		if (__test_and_clear_bit(index, &done_map)) {
+			isp1362_read_ptd(isp1362_hcd, ep, epq);
+			epq->free_ptd = index;
+			BUG_ON(ep->num_ptds == 0);
+			release_ptd_buffers(epq, ep);
+
+			DBG(1, "%s: ep %p req %d removed from active list\n", __func__,
+			    ep, ep->num_req);
+			if (!list_empty(&ep->remove_list)) {
+				list_del_init(&ep->remove_list);
+				DBG(1, "%s: ep %p removed from remove list\n", __func__, ep);
+			}
+			DBG(1, "%s: Postprocessing %s ep %p req %d\n", __func__, epq->name,
+			    ep, ep->num_req);
+			postproc_ep(isp1362_hcd, ep);
+		}
+		if (!done_map)
+			break;
+	}
+	if (done_map)
+		pr_warning("%s: done_map not clear: %08lx:%08lx\n", __func__, done_map,
+		     epq->skip_map);
+	atomic_dec(&epq->finishing);
+}
+
+static void finish_iso_transfers(struct isp1362_hcd *isp1362_hcd, struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	struct isp1362_ep *tmp;
+
+	if (list_empty(&epq->active)) {
+		DBG(1, "%s: Nothing to do for %s queue\n", __func__, epq->name);
+		return;
+	}
+
+	DBG(1, "%s: Finishing %s transfers\n", __func__, epq->name);
+
+	atomic_inc(&epq->finishing);
+	list_for_each_entry_safe(ep, tmp, &epq->active, active) {
+		DBG(1, "%s: Checking PTD $%04x\n", __func__, ep->ptd_offset);
+
+		isp1362_read_ptd(isp1362_hcd, ep, epq);
+		DBG(1, "%s: Postprocessing %s ep %p\n", __func__, epq->name, ep);
+		postproc_ep(isp1362_hcd, ep);
+	}
+	WARN_ON(epq->blk_size != 0);
+	atomic_dec(&epq->finishing);
+}
+
+static irqreturn_t isp1362_irq(struct usb_hcd *hcd)
+{
+	int handled = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u16 irqstat;
+	u16 svc_mask;
+
+	spin_lock(&isp1362_hcd->lock);
+
+	BUG_ON(isp1362_hcd->irq_active++);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	irqstat = isp1362_read_reg16(isp1362_hcd, HCuPINT);
+	DBG(3, "%s: got IRQ %04x:%04x\n", __func__, irqstat, isp1362_hcd->irqenb);
+
+	/* only handle interrupts that are currently enabled */
+	irqstat &= isp1362_hcd->irqenb;
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, irqstat);
+	svc_mask = irqstat;
+
+	if (irqstat & HCuPINT_SOF) {
+		isp1362_hcd->irqenb &= ~HCuPINT_SOF;
+		isp1362_hcd->irq_stat[ISP1362_INT_SOF]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SOF;
+		DBG(3, "%s: SOF\n", __func__);
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		if (!list_empty(&isp1362_hcd->remove_list))
+			finish_unlinks(isp1362_hcd);
+		if (!list_empty(&isp1362_hcd->async) && !(irqstat & HCuPINT_ATL)) {
+			if (list_empty(&isp1362_hcd->atl_queue.active)) {
+				start_atl_transfers(isp1362_hcd);
+			} else {
+				isp1362_enable_int(isp1362_hcd, HCuPINT_ATL);
+				isp1362_write_reg32(isp1362_hcd, HCATLSKIP,
+						    isp1362_hcd->atl_queue.skip_map);
+				isp1362_set_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+			}
+		}
+	}
+
+	if (irqstat & HCuPINT_ISTL0) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL0]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL0;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL0_FULL);
+		DBG(1, "%s: ISTL0\n", __func__);
+		WARN_ON((int)!!isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL0_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL0;
+	}
+
+	if (irqstat & HCuPINT_ISTL1) {
+		isp1362_hcd->irq_stat[ISP1362_INT_ISTL1]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_ISTL1;
+		isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ISTL1_FULL);
+		DBG(1, "%s: ISTL1\n", __func__);
+		WARN_ON(!(int)isp1362_hcd->istl_flip);
+		WARN_ON(isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_ACTIVE);
+		WARN_ON(!isp1362_read_reg16(isp1362_hcd, HCBUFSTAT) & HCBUFSTAT_ISTL1_DONE);
+		isp1362_hcd->irqenb &= ~HCuPINT_ISTL1;
+	}
+
+	if (irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) {
+		WARN_ON((irqstat & (HCuPINT_ISTL0 | HCuPINT_ISTL1)) ==
+			(HCuPINT_ISTL0 | HCuPINT_ISTL1));
+		finish_iso_transfers(isp1362_hcd,
+				     &isp1362_hcd->istl_queue[isp1362_hcd->istl_flip]);
+		start_iso_transfers(isp1362_hcd);
+		isp1362_hcd->istl_flip = 1 - isp1362_hcd->istl_flip;
+	}
+
+	if (irqstat & HCuPINT_INTL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCINTLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_INTL]++;
+
+		DBG(2, "%s: INTL\n", __func__);
+
+		svc_mask &= ~HCuPINT_INTL;
+
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			/* All PTDs are finished, disable INTL processing entirely */
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_INTL_ACTIVE);
+
+		handled = 1;
+		WARN_ON(!done_map);
+		if (done_map) {
+			DBG(3, "%s: INTL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+			start_intl_transfers(isp1362_hcd);
+		}
+	}
+
+	if (irqstat & HCuPINT_ATL) {
+		u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+		u32 skip_map = isp1362_read_reg32(isp1362_hcd, HCATLSKIP);
+		isp1362_hcd->irq_stat[ISP1362_INT_ATL]++;
+
+		DBG(2, "%s: ATL\n", __func__);
+
+		svc_mask &= ~HCuPINT_ATL;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, skip_map | done_map);
+		if (~(done_map | skip_map) == 0)
+			isp1362_clr_mask16(isp1362_hcd, HCBUFSTAT, HCBUFSTAT_ATL_ACTIVE);
+		if (done_map) {
+			DBG(3, "%s: ATL done_map %08x\n", __func__, done_map);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+			start_atl_transfers(isp1362_hcd);
+		}
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_OPR) {
+		u32 intstat = isp1362_read_reg32(isp1362_hcd, HCINTSTAT);
+		isp1362_hcd->irq_stat[ISP1362_INT_OPR]++;
+
+		svc_mask &= ~HCuPINT_OPR;
+		DBG(2, "%s: OPR %08x:%08x\n", __func__, intstat, isp1362_hcd->intenb);
+		intstat &= isp1362_hcd->intenb;
+		if (intstat & OHCI_INTR_UE) {
+			pr_err("Unrecoverable error\n");
+			/* FIXME: do here reset or cleanup or whatever */
+		}
+		if (intstat & OHCI_INTR_RHSC) {
+			isp1362_hcd->rhstatus = isp1362_read_reg32(isp1362_hcd, HCRHSTATUS);
+			isp1362_hcd->rhport[0] = isp1362_read_reg32(isp1362_hcd, HCRHPORT1);
+			isp1362_hcd->rhport[1] = isp1362_read_reg32(isp1362_hcd, HCRHPORT2);
+		}
+		if (intstat & OHCI_INTR_RD) {
+			pr_info("%s: RESUME DETECTED\n", __func__);
+			isp1362_show_reg(isp1362_hcd, HCCONTROL);
+			usb_hcd_resume_root_hub(hcd);
+		}
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, intstat);
+		irqstat &= ~HCuPINT_OPR;
+		handled = 1;
+	}
+
+	if (irqstat & HCuPINT_SUSP) {
+		isp1362_hcd->irq_stat[ISP1362_INT_SUSP]++;
+		handled = 1;
+		svc_mask &= ~HCuPINT_SUSP;
+
+		pr_info("%s: SUSPEND IRQ\n", __func__);
+	}
+
+	if (irqstat & HCuPINT_CLKRDY) {
+		isp1362_hcd->irq_stat[ISP1362_INT_CLKRDY]++;
+		handled = 1;
+		isp1362_hcd->irqenb &= ~HCuPINT_CLKRDY;
+		svc_mask &= ~HCuPINT_CLKRDY;
+		pr_info("%s: CLKRDY IRQ\n", __func__);
+	}
+
+	if (svc_mask)
+		pr_err("%s: Unserviced interrupt(s) %04x\n", __func__, svc_mask);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+	isp1362_hcd->irq_active--;
+	spin_unlock(&isp1362_hcd->lock);
+
+	return IRQ_RETVAL(handled);
+}
+
+/*-------------------------------------------------------------------------*/
+
+#define	MAX_PERIODIC_LOAD	900	/* out of 1000 usec */
+static int balance(struct isp1362_hcd *isp1362_hcd, u16 interval, u16 load)
+{
+	int i, branch = -ENOSPC;
+
+	/* search for the least loaded schedule branch of that interval
+	 * which has enough bandwidth left unreserved.
+	 */
+	for (i = 0; i < interval; i++) {
+		if (branch < 0 || isp1362_hcd->load[branch] > isp1362_hcd->load[i]) {
+			int j;
+
+			for (j = i; j < PERIODIC_SIZE; j += interval) {
+				if ((isp1362_hcd->load[j] + load) > MAX_PERIODIC_LOAD) {
+					pr_err("%s: new load %d load[%02x] %d max %d\n", __func__,
+					    load, j, isp1362_hcd->load[j], MAX_PERIODIC_LOAD);
+					break;
+				}
+			}
+			if (j < PERIODIC_SIZE)
+				continue;
+			branch = i;
+		}
+	}
+	return branch;
+}
+
+/* NB! ALL the code above this point runs with isp1362_hcd->lock
+   held, irqs off
+*/
+
+/*-------------------------------------------------------------------------*/
+
+static int isp1362_urb_enqueue(struct usb_hcd *hcd,
+			       struct urb *urb,
+			       gfp_t mem_flags)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_device *udev = urb->dev;
+	unsigned int pipe = urb->pipe;
+	int is_out = !usb_pipein(pipe);
+	int type = usb_pipetype(pipe);
+	int epnum = usb_pipeendpoint(pipe);
+	struct usb_host_endpoint *hep = urb->ep;
+	struct isp1362_ep *ep = NULL;
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	if (type == PIPE_ISOCHRONOUS) {
+		pr_err("Isochronous transfers not supported\n");
+		return -ENOSPC;
+	}
+
+	URB_DBG("%s: FA %d ep%d%s %s: len %d %s%s\n", __func__,
+		usb_pipedevice(pipe), epnum,
+		is_out ? "out" : "in",
+		usb_pipecontrol(pipe) ? "ctrl" :
+			usb_pipeint(pipe) ? "int" :
+			usb_pipebulk(pipe) ? "bulk" :
+			"iso",
+		urb->transfer_buffer_length,
+		(urb->transfer_flags & URB_ZERO_PACKET) ? "ZERO_PACKET " : "",
+		!(urb->transfer_flags & URB_SHORT_NOT_OK) ?
+		"short_ok" : "");
+
+	/* avoid all allocations within spinlocks: request or endpoint */
+	if (!hep->hcpriv) {
+		ep = kcalloc(1, sizeof *ep, mem_flags);
+		if (!ep)
+			return -ENOMEM;
+	}
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* don't submit to a dead or disabled port */
+	if (!((isp1362_hcd->rhport[0] | isp1362_hcd->rhport[1]) &
+	      (1 << USB_PORT_FEAT_ENABLE)) ||
+	    !HC_IS_RUNNING(hcd->state)) {
+		kfree(ep);
+		retval = -ENODEV;
+		goto fail_not_linked;
+	}
+
+	retval = usb_hcd_link_urb_to_ep(hcd, urb);
+	if (retval) {
+		kfree(ep);
+		goto fail_not_linked;
+	}
+
+	if (hep->hcpriv) {
+		ep = hep->hcpriv;
+	} else {
+		INIT_LIST_HEAD(&ep->schedule);
+		INIT_LIST_HEAD(&ep->active);
+		INIT_LIST_HEAD(&ep->remove_list);
+		ep->udev = usb_get_dev(udev);
+		ep->hep = hep;
+		ep->epnum = epnum;
+		ep->maxpacket = usb_maxpacket(udev, urb->pipe, is_out);
+		ep->ptd_offset = -EINVAL;
+		ep->ptd_index = -EINVAL;
+		usb_settoggle(udev, epnum, is_out, 0);
+
+		if (type == PIPE_CONTROL)
+			ep->nextpid = USB_PID_SETUP;
+		else if (is_out)
+			ep->nextpid = USB_PID_OUT;
+		else
+			ep->nextpid = USB_PID_IN;
+
+		switch (type) {
+		case PIPE_ISOCHRONOUS:
+		case PIPE_INTERRUPT:
+			if (urb->interval > PERIODIC_SIZE)
+				urb->interval = PERIODIC_SIZE;
+			ep->interval = urb->interval;
+			ep->branch = PERIODIC_SIZE;
+			ep->load = usb_calc_bus_time(udev->speed, !is_out,
+						     (type == PIPE_ISOCHRONOUS),
+						     usb_maxpacket(udev, pipe, is_out)) / 1000;
+			break;
+		}
+		hep->hcpriv = ep;
+	}
+	ep->num_req = isp1362_hcd->req_serial++;
+
+	/* maybe put endpoint into schedule */
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		if (list_empty(&ep->schedule)) {
+			DBG(1, "%s: Adding ep %p req %d to async schedule\n",
+				__func__, ep, ep->num_req);
+			list_add_tail(&ep->schedule, &isp1362_hcd->async);
+		}
+		break;
+	case PIPE_ISOCHRONOUS:
+	case PIPE_INTERRUPT:
+		urb->interval = ep->interval;
+
+		/* urb submitted for already existing EP */
+		if (ep->branch < PERIODIC_SIZE)
+			break;
+
+		retval = balance(isp1362_hcd, ep->interval, ep->load);
+		if (retval < 0) {
+			pr_err("%s: balance returned %d\n", __func__, retval);
+			goto fail;
+		}
+		ep->branch = retval;
+		retval = 0;
+		isp1362_hcd->fmindex = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+		DBG(1, "%s: Current frame %04x branch %02x start_frame %04x(%04x)\n",
+		    __func__, isp1362_hcd->fmindex, ep->branch,
+		    ((isp1362_hcd->fmindex + PERIODIC_SIZE - 1) &
+		     ~(PERIODIC_SIZE - 1)) + ep->branch,
+		    (isp1362_hcd->fmindex & (PERIODIC_SIZE - 1)) + ep->branch);
+
+		if (list_empty(&ep->schedule)) {
+			if (type == PIPE_ISOCHRONOUS) {
+				u16 frame = isp1362_hcd->fmindex;
+
+				frame += max_t(u16, 8, ep->interval);
+				frame &= ~(ep->interval - 1);
+				frame |= ep->branch;
+				if (frame_before(frame, isp1362_hcd->fmindex))
+					frame += ep->interval;
+				urb->start_frame = frame;
+
+				DBG(1, "%s: Adding ep %p to isoc schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->isoc);
+			} else {
+				DBG(1, "%s: Adding ep %p to periodic schedule\n", __func__, ep);
+				list_add_tail(&ep->schedule, &isp1362_hcd->periodic);
+			}
+		} else
+			DBG(1, "%s: ep %p already scheduled\n", __func__, ep);
+
+		DBG(2, "%s: load %d bandwidth %d -> %d\n", __func__,
+		    ep->load / ep->interval, isp1362_hcd->load[ep->branch],
+		    isp1362_hcd->load[ep->branch] + ep->load);
+		isp1362_hcd->load[ep->branch] += ep->load;
+	}
+
+	urb->hcpriv = hep;
+	ALIGNSTAT(isp1362_hcd, urb->transfer_buffer);
+
+	switch (type) {
+	case PIPE_CONTROL:
+	case PIPE_BULK:
+		start_atl_transfers(isp1362_hcd);
+		break;
+	case PIPE_INTERRUPT:
+		start_intl_transfers(isp1362_hcd);
+		break;
+	case PIPE_ISOCHRONOUS:
+		start_iso_transfers(isp1362_hcd);
+		break;
+	default:
+		BUG();
+	}
+ fail:
+	if (retval)
+		usb_hcd_unlink_urb_from_ep(hcd, urb);
+
+
+ fail_not_linked:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (retval)
+		DBG(0, "%s: urb %p failed with %d\n", __func__, urb, retval);
+	return retval;
+}
+
+static int isp1362_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct usb_host_endpoint *hep;
+	unsigned long flags;
+	struct isp1362_ep *ep;
+	int retval = 0;
+
+	DBG(3, "%s: urb %p\n", __func__, urb);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	retval = usb_hcd_check_unlink_urb(hcd, urb, status);
+	if (retval)
+		goto done;
+
+	hep = urb->hcpriv;
+
+	if (!hep) {
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return -EIDRM;
+	}
+
+	ep = hep->hcpriv;
+	if (ep) {
+		/* In front of queue? */
+		if (ep->hep->urb_list.next == &urb->urb_list) {
+			if (!list_empty(&ep->active)) {
+				DBG(1, "%s: urb %p ep %p req %d active PTD[%d] $%04x\n", __func__,
+				    urb, ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+				/* disable processing and queue PTD for removal */
+				remove_ptd(isp1362_hcd, ep);
+				urb = NULL;
+			}
+		}
+		if (urb) {
+			DBG(1, "%s: Finishing ep %p req %d\n", __func__, ep,
+			    ep->num_req);
+			finish_request(isp1362_hcd, ep, urb, status);
+		} else
+			DBG(1, "%s: urb %p active; wait4irq\n", __func__, urb);
+	} else {
+		pr_warning("%s: No EP in URB %p\n", __func__, urb);
+		retval = -EINVAL;
+	}
+done:
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	DBG(3, "%s: exit\n", __func__);
+
+	return retval;
+}
+
+static void isp1362_endpoint_disable(struct usb_hcd *hcd, struct usb_host_endpoint *hep)
+{
+	struct isp1362_ep *ep = hep->hcpriv;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(1, "%s: ep %p\n", __func__, ep);
+	if (!ep)
+		return;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	if (!list_empty(&hep->urb_list)) {
+		if (!list_empty(&ep->active) && list_empty(&ep->remove_list)) {
+			DBG(1, "%s: Removing ep %p req %d PTD[%d] $%04x\n", __func__,
+			    ep, ep->num_req, ep->ptd_index, ep->ptd_offset);
+			remove_ptd(isp1362_hcd, ep);
+			pr_info("%s: Waiting for Interrupt to clean up\n", __func__);
+		}
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* Wait for interrupt to clear out active list */
+	while (!list_empty(&ep->active))
+		msleep(1);
+
+	DBG(1, "%s: Freeing EP %p\n", __func__, ep);
+
+	usb_put_dev(ep->udev);
+	kfree(ep);
+	hep->hcpriv = NULL;
+}
+
+static int isp1362_get_frame(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 fmnum;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	fmnum = isp1362_read_reg32(isp1362_hcd, HCFMNUM);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return (int)fmnum;
+}
+
+/*-------------------------------------------------------------------------*/
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_status_data(struct usb_hcd *hcd, char *buf)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int ports, i, changed = 0;
+	unsigned long flags;
+
+	if (!HC_IS_RUNNING(hcd->state))
+		return -ESHUTDOWN;
+
+	/* Report no status change now, if we are scheduled to be
+	   called later */
+	if (timer_pending(&hcd->rh_timer))
+		return 0;
+
+	ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	BUG_ON(ports > 2);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* init status */
+	if (isp1362_hcd->rhstatus & (RH_HS_LPSC | RH_HS_OCIC))
+		buf[0] = changed = 1;
+	else
+		buf[0] = 0;
+
+	for (i = 0; i < ports; i++) {
+		u32 status = isp1362_hcd->rhport[i];
+
+		if (status & (RH_PS_CSC | RH_PS_PESC | RH_PS_PSSC |
+			      RH_PS_OCIC | RH_PS_PRSC)) {
+			changed = 1;
+			buf[0] |= 1 << (i + 1);
+			continue;
+		}
+
+		if (!(status & RH_PS_CCS))
+			continue;
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return changed;
+}
+
+static void isp1362_hub_descriptor(struct isp1362_hcd *isp1362_hcd,
+				   struct usb_hub_descriptor *desc)
+{
+	u32 reg = isp1362_hcd->rhdesca;
+
+	DBG(3, "%s: enter\n", __func__);
+
+	desc->bDescriptorType = 0x29;
+	desc->bDescLength = 9;
+	desc->bHubContrCurrent = 0;
+	desc->bNbrPorts = reg & 0x3;
+	/* Power switching, device type, overcurrent. */
+	desc->wHubCharacteristics = cpu_to_le16((reg >> 8) & 0x1f);
+	DBG(0, "%s: hubcharacteristics = %02x\n", __func__, cpu_to_le16((reg >> 8) & 0x1f));
+	desc->bPwrOn2PwrGood = (reg >> 24) & 0xff;
+	/* two bitmaps:  ports removable, and legacy PortPwrCtrlMask */
+	desc->bitmap[0] = desc->bNbrPorts == 1 ? 1 << 1 : 3 << 1;
+	desc->bitmap[1] = ~0;
+
+	DBG(3, "%s: exit\n", __func__);
+}
+
+/* Adapted from ohci-hub.c */
+static int isp1362_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
+			       u16 wIndex, char *buf, u16 wLength)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	int retval = 0;
+	unsigned long flags;
+	unsigned long t1;
+	int ports = isp1362_hcd->rhdesca & RH_A_NDP;
+	u32 tmp = 0;
+
+	switch (typeReq) {
+	case ClearHubFeature:
+		DBG(0, "ClearHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+			_DBG(0, "C_HUB_OVER_CURRENT\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_OCIC);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case SetHubFeature:
+		DBG(0, "SetHubFeature: ");
+		switch (wValue) {
+		case C_HUB_OVER_CURRENT:
+		case C_HUB_LOCAL_POWER:
+			_DBG(0, "C_HUB_OVER_CURRENT or C_HUB_LOCAL_POWER\n");
+			break;
+		default:
+			goto error;
+		}
+		break;
+	case GetHubDescriptor:
+		DBG(0, "GetHubDescriptor\n");
+		isp1362_hub_descriptor(isp1362_hcd, (struct usb_hub_descriptor *)buf);
+		break;
+	case GetHubStatus:
+		DBG(0, "GetHubStatus\n");
+		put_unaligned(cpu_to_le32(0), (__le32 *) buf);
+		break;
+	case GetPortStatus:
+#ifndef VERBOSE
+		DBG(0, "GetPortStatus\n");
+#endif
+		if (!wIndex || wIndex > ports)
+			goto error;
+		tmp = isp1362_hcd->rhport[--wIndex];
+		put_unaligned(cpu_to_le32(tmp), (__le32 *) buf);
+		break;
+	case ClearPortFeature:
+		DBG(0, "ClearPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+
+		switch (wValue) {
+		case USB_PORT_FEAT_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_ENABLE\n");
+			tmp = RH_PS_CCS;
+			break;
+		case USB_PORT_FEAT_C_ENABLE:
+			_DBG(0, "USB_PORT_FEAT_C_ENABLE\n");
+			tmp = RH_PS_PESC;
+			break;
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+			tmp = RH_PS_POCI;
+			break;
+		case USB_PORT_FEAT_C_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_C_SUSPEND\n");
+			tmp = RH_PS_PSSC;
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			tmp = RH_PS_LSDA;
+
+			break;
+		case USB_PORT_FEAT_C_CONNECTION:
+			_DBG(0, "USB_PORT_FEAT_C_CONNECTION\n");
+			tmp = RH_PS_CSC;
+			break;
+		case USB_PORT_FEAT_C_OVER_CURRENT:
+			_DBG(0, "USB_PORT_FEAT_C_OVER_CURRENT\n");
+			tmp = RH_PS_OCIC;
+			break;
+		case USB_PORT_FEAT_C_RESET:
+			_DBG(0, "USB_PORT_FEAT_C_RESET\n");
+			tmp = RH_PS_PRSC;
+			break;
+		default:
+			goto error;
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, tmp);
+		isp1362_hcd->rhport[wIndex] =
+			isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		break;
+	case SetPortFeature:
+		DBG(0, "SetPortFeature: ");
+		if (!wIndex || wIndex > ports)
+			goto error;
+		wIndex--;
+		switch (wValue) {
+		case USB_PORT_FEAT_SUSPEND:
+			_DBG(0, "USB_PORT_FEAT_SUSPEND\n");
+#ifdef	CONFIG_USB_OTG
+			if (ohci->hcd.self.otg_port == (wIndex + 1) &&
+			    ohci->hcd.self.b_hnp_enable) {
+				start_hnp(ohci);
+				break;
+			}
+#endif
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PSS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_POWER:
+			_DBG(0, "USB_PORT_FEAT_POWER\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, RH_PS_PPS);
+			isp1362_hcd->rhport[wIndex] =
+				isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		case USB_PORT_FEAT_RESET:
+			_DBG(0, "USB_PORT_FEAT_RESET\n");
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+			t1 = jiffies + msecs_to_jiffies(USB_RESET_WIDTH);
+			while (time_before(jiffies, t1)) {
+				/* spin until any current reset finishes */
+				for (;;) {
+					tmp = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + wIndex);
+					if (!(tmp & RH_PS_PRS))
+						break;
+					udelay(500);
+				}
+				if (!(tmp & RH_PS_CCS))
+					break;
+				/* Reset lasts 10ms (claims datasheet) */
+				isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + wIndex, (RH_PS_PRS));
+
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				msleep(10);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			}
+
+			isp1362_hcd->rhport[wIndex] = isp1362_read_reg32(isp1362_hcd,
+									 HCRHPORT1 + wIndex);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			break;
+		default:
+			goto error;
+		}
+		break;
+
+	default:
+ error:
+		/* "protocol stall" on error */
+		_DBG(0, "PROTOCOL STALL\n");
+		retval = -EPIPE;
+	}
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_bus_suspend(struct usb_hcd *hcd)
+{
+	int status = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+	case OHCI_USB_RESUME:
+		DBG(0, "%s: resume/suspend?\n", __func__);
+		isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+		isp1362_hcd->hc_control |= OHCI_USB_RESET;
+		isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+		/* FALL THROUGH */
+	case OHCI_USB_RESET:
+		status = -EBUSY;
+		pr_warning("%s: needs reinit!\n", __func__);
+		goto done;
+	case OHCI_USB_SUSPEND:
+		pr_warning("%s: already suspended?\n", __func__);
+		goto done;
+	}
+	DBG(0, "%s: suspend root hub\n", __func__);
+
+	/* First stop any processing */
+	hcd->state = HC_STATE_QUIESCING;
+	if (!list_empty(&isp1362_hcd->atl_queue.active) ||
+	    !list_empty(&isp1362_hcd->intl_queue.active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[0] .active) ||
+	    !list_empty(&isp1362_hcd->istl_queue[1] .active)) {
+		int limit;
+
+		isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+		isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+		isp1362_write_reg16(isp1362_hcd, HCBUFSTAT, 0);
+		isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+		isp1362_write_reg32(isp1362_hcd, HCINTSTAT, OHCI_INTR_SF);
+
+		DBG(0, "%s: stopping schedules ...\n", __func__);
+		limit = 2000;
+		while (limit > 0) {
+			udelay(250);
+			limit -= 250;
+			if (isp1362_read_reg32(isp1362_hcd, HCINTSTAT) & OHCI_INTR_SF)
+				break;
+		}
+		mdelay(7);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ATL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCATLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->atl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_INTL) {
+			u32 done_map = isp1362_read_reg32(isp1362_hcd, HCINTLDONE);
+			finish_transfers(isp1362_hcd, done_map, &isp1362_hcd->intl_queue);
+		}
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL0)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[0]);
+		if (isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_ISTL1)
+			finish_iso_transfers(isp1362_hcd, &isp1362_hcd->istl_queue[1]);
+	}
+	DBG(0, "%s: HCINTSTAT: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	isp1362_write_reg32(isp1362_hcd, HCINTSTAT,
+			    isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+
+	/* Suspend hub */
+	isp1362_hcd->hc_control = OHCI_USB_SUSPEND;
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+
+#if 1
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	if ((isp1362_hcd->hc_control & OHCI_CTRL_HCFS) != OHCI_USB_SUSPEND) {
+		pr_err("%s: controller won't suspend %08x\n", __func__,
+		    isp1362_hcd->hc_control);
+		status = -EBUSY;
+	} else
+#endif
+	{
+		/* no resumes until devices finish suspending */
+		isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(5);
+	}
+done:
+	if (status == 0) {
+		hcd->state = HC_STATE_SUSPENDED;
+		DBG(0, "%s: HCD suspended: %08x\n", __func__,
+		    isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	return status;
+}
+
+static int isp1362_bus_resume(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	u32 port;
+	unsigned long flags;
+	int status = -EINPROGRESS;
+
+	if (time_before(jiffies, isp1362_hcd->next_statechange))
+		msleep(5);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_hcd->hc_control = isp1362_read_reg32(isp1362_hcd, HCCONTROL);
+	pr_info("%s: HCCONTROL: %08x\n", __func__, isp1362_hcd->hc_control);
+	if (hcd->state == HC_STATE_RESUMING) {
+		pr_warning("%s: duplicate resume\n", __func__);
+		status = 0;
+	} else
+		switch (isp1362_hcd->hc_control & OHCI_CTRL_HCFS) {
+		case OHCI_USB_SUSPEND:
+			DBG(0, "%s: resume root hub\n", __func__);
+			isp1362_hcd->hc_control &= ~OHCI_CTRL_HCFS;
+			isp1362_hcd->hc_control |= OHCI_USB_RESUME;
+			isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+			break;
+		case OHCI_USB_RESUME:
+			/* HCFS changes sometime after INTR_RD */
+			DBG(0, "%s: remote wakeup\n", __func__);
+			break;
+		case OHCI_USB_OPER:
+			DBG(0, "%s: odd resume\n", __func__);
+			status = 0;
+			hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+			break;
+		default:		/* RESET, we lost power */
+			DBG(0, "%s: root hub hardware reset\n", __func__);
+			status = -EBUSY;
+		}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (status == -EBUSY) {
+		DBG(0, "%s: Restarting HC\n", __func__);
+		isp1362_hc_stop(hcd);
+		return isp1362_hc_start(hcd);
+	}
+	if (status != -EINPROGRESS)
+		return status;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	port = isp1362_read_reg32(isp1362_hcd, HCRHDESCA) & RH_A_NDP;
+	while (port--) {
+		u32 stat = isp1362_read_reg32(isp1362_hcd, HCRHPORT1 + port);
+
+		/* force global, not selective, resume */
+		if (!(stat & RH_PS_PSS)) {
+			DBG(0, "%s: Not Resuming RH port %d\n", __func__, port);
+			continue;
+		}
+		DBG(0, "%s: Resuming RH port %d\n", __func__, port);
+		isp1362_write_reg32(isp1362_hcd, HCRHPORT1 + port, RH_PS_POCI);
+	}
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	/* Some controllers (lucent) need extra-long delays */
+	hcd->state = HC_STATE_RESUMING;
+	mdelay(20 /* usb 11.5.1.10 */ + 15);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	/* TRSMRCY */
+	msleep(10);
+
+	/* keep it alive for ~5x suspend + resume costs */
+	isp1362_hcd->next_statechange = jiffies + msecs_to_jiffies(250);
+
+	hcd->self.root_hub->dev.power.power_state = PMSG_ON;
+	hcd->state = HC_STATE_RUNNING;
+	return 0;
+}
+#else
+#define	isp1362_bus_suspend	NULL
+#define	isp1362_bus_resume	NULL
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+#ifdef STUB_DEBUG_FILE
+
+static inline void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+static inline void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+}
+
+#else
+
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static void dump_irq(struct seq_file *s, char *label, u16 mask)
+{
+	seq_printf(s, "%-15s %04x%s%s%s%s%s%s\n", label, mask,
+		   mask & HCuPINT_CLKRDY ? " clkrdy" : "",
+		   mask & HCuPINT_SUSP ? " susp" : "",
+		   mask & HCuPINT_OPR ? " opr" : "",
+		   mask & HCuPINT_EOT ? " eot" : "",
+		   mask & HCuPINT_ATL ? " atl" : "",
+		   mask & HCuPINT_SOF ? " sof" : "");
+}
+
+static void dump_int(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s%s%s%s%s\n", label, mask,
+		   mask & OHCI_INTR_MIE ? " MIE" : "",
+		   mask & OHCI_INTR_RHSC ? " rhsc" : "",
+		   mask & OHCI_INTR_FNO ? " fno" : "",
+		   mask & OHCI_INTR_UE ? " ue" : "",
+		   mask & OHCI_INTR_RD ? " rd" : "",
+		   mask & OHCI_INTR_SF ? " sof" : "",
+		   mask & OHCI_INTR_SO ? " so" : "");
+}
+
+static void dump_ctrl(struct seq_file *s, char *label, u32 mask)
+{
+	seq_printf(s, "%-15s %08x%s%s%s\n", label, mask,
+		   mask & OHCI_CTRL_RWC ? " rwc" : "",
+		   mask & OHCI_CTRL_RWE ? " rwe" : "",
+		   ({
+			   char *hcfs;
+			   switch (mask & OHCI_CTRL_HCFS) {
+			   case OHCI_USB_OPER:
+				   hcfs = " oper";
+				   break;
+			   case OHCI_USB_RESET:
+				   hcfs = " reset";
+				   break;
+			   case OHCI_USB_RESUME:
+				   hcfs = " resume";
+				   break;
+			   case OHCI_USB_SUSPEND:
+				   hcfs = " suspend";
+				   break;
+			   default:
+				   hcfs = " ?";
+			   }
+			   hcfs;
+		   }));
+}
+
+static void dump_regs(struct seq_file *s, struct isp1362_hcd *isp1362_hcd)
+{
+	seq_printf(s, "HCREVISION [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCREVISION),
+		   isp1362_read_reg32(isp1362_hcd, HCREVISION));
+	seq_printf(s, "HCCONTROL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCONTROL),
+		   isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+	seq_printf(s, "HCCMDSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCCMDSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCCMDSTAT));
+	seq_printf(s, "HCINTSTAT  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTSTAT),
+		   isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	seq_printf(s, "HCINTENB   [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTENB),
+		   isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	seq_printf(s, "HCFMINTVL  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMINTVL),
+		   isp1362_read_reg32(isp1362_hcd, HCFMINTVL));
+	seq_printf(s, "HCFMREM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMREM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMREM));
+	seq_printf(s, "HCFMNUM    [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCFMNUM),
+		   isp1362_read_reg32(isp1362_hcd, HCFMNUM));
+	seq_printf(s, "HCLSTHRESH [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCLSTHRESH),
+		   isp1362_read_reg32(isp1362_hcd, HCLSTHRESH));
+	seq_printf(s, "HCRHDESCA  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCA),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCA));
+	seq_printf(s, "HCRHDESCB  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHDESCB),
+		   isp1362_read_reg32(isp1362_hcd, HCRHDESCB));
+	seq_printf(s, "HCRHSTATUS [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHSTATUS),
+		   isp1362_read_reg32(isp1362_hcd, HCRHSTATUS));
+	seq_printf(s, "HCRHPORT1  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT1),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT1));
+	seq_printf(s, "HCRHPORT2  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCRHPORT2),
+		   isp1362_read_reg32(isp1362_hcd, HCRHPORT2));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCHWCFG    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCHWCFG),
+		   isp1362_read_reg16(isp1362_hcd, HCHWCFG));
+	seq_printf(s, "HCDMACFG   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCDMACFG),
+		   isp1362_read_reg16(isp1362_hcd, HCDMACFG));
+	seq_printf(s, "HCXFERCTR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCXFERCTR),
+		   isp1362_read_reg16(isp1362_hcd, HCXFERCTR));
+	seq_printf(s, "HCuPINT    [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINT),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	seq_printf(s, "HCuPINTENB [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCuPINTENB),
+		   isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	seq_printf(s, "HCCHIPID   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCCHIPID),
+		   isp1362_read_reg16(isp1362_hcd, HCCHIPID));
+	seq_printf(s, "HCSCRATCH  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCSCRATCH),
+		   isp1362_read_reg16(isp1362_hcd, HCSCRATCH));
+	seq_printf(s, "HCBUFSTAT  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCBUFSTAT),
+		   isp1362_read_reg16(isp1362_hcd, HCBUFSTAT));
+	seq_printf(s, "HCDIRADDR  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCDIRADDR),
+		   isp1362_read_reg32(isp1362_hcd, HCDIRADDR));
+#if 0
+	seq_printf(s, "HCDIRDATA  [%02x]     %04x\n", ISP1362_REG_NO(HCDIRDATA),
+		   isp1362_read_reg16(isp1362_hcd, HCDIRDATA));
+#endif
+	seq_printf(s, "HCISTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLBUFSZ));
+	seq_printf(s, "HCISTLRATE [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCISTLRATE),
+		   isp1362_read_reg16(isp1362_hcd, HCISTLRATE));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCINTLBUFSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBUFSZ));
+	seq_printf(s, "HCINTLBLKSZ[%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLBLKSZ));
+	seq_printf(s, "HCINTLDONE [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLDONE));
+	seq_printf(s, "HCINTLSKIP [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLSKIP));
+	seq_printf(s, "HCINTLLAST [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCINTLLAST));
+	seq_printf(s, "HCINTLCURR [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCINTLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCINTLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLBUFSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBUFSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBUFSZ));
+	seq_printf(s, "HCATLBLKSZ [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLBLKSZ),
+		   isp1362_read_reg16(isp1362_hcd, HCATLBLKSZ));
+#if 0
+	seq_printf(s, "HCATLDONE  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDONE),
+		   isp1362_read_reg32(isp1362_hcd, HCATLDONE));
+#endif
+	seq_printf(s, "HCATLSKIP  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLSKIP),
+		   isp1362_read_reg32(isp1362_hcd, HCATLSKIP));
+	seq_printf(s, "HCATLLAST  [%02x] %08x\n", ISP1362_REG_NO(ISP1362_REG_HCATLLAST),
+		   isp1362_read_reg32(isp1362_hcd, HCATLLAST));
+	seq_printf(s, "HCATLCURR  [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLCURR),
+		   isp1362_read_reg16(isp1362_hcd, HCATLCURR));
+	seq_printf(s, "\n");
+	seq_printf(s, "HCATLDTC   [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTC),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTC));
+	seq_printf(s, "HCATLDTCTO [%02x]     %04x\n", ISP1362_REG_NO(ISP1362_REG_HCATLDTCTO),
+		   isp1362_read_reg16(isp1362_hcd, HCATLDTCTO));
+}
+
+static int proc_isp1362_show(struct seq_file *s, void *unused)
+{
+	struct isp1362_hcd *isp1362_hcd = s->private;
+	struct isp1362_ep *ep;
+	int i;
+
+	seq_printf(s, "%s\n%s version %s\n",
+		   isp1362_hcd_to_hcd(isp1362_hcd)->product_desc, hcd_name, DRIVER_VERSION);
+
+	/* collect statistics to help estimate potential win for
+	 * DMA engines that care about alignment (PXA)
+	 */
+	seq_printf(s, "alignment:  16b/%ld 8b/%ld 4b/%ld 2b/%ld 1b/%ld\n",
+		   isp1362_hcd->stat16, isp1362_hcd->stat8, isp1362_hcd->stat4,
+		   isp1362_hcd->stat2, isp1362_hcd->stat1);
+	seq_printf(s, "max # ptds in ATL  fifo: %d\n", isp1362_hcd->atl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in INTL fifo: %d\n", isp1362_hcd->intl_queue.stat_maxptds);
+	seq_printf(s, "max # ptds in ISTL fifo: %d\n",
+		   max(isp1362_hcd->istl_queue[0] .stat_maxptds,
+		       isp1362_hcd->istl_queue[1] .stat_maxptds));
+
+	/* FIXME: don't show the following in suspended state */
+	spin_lock_irq(&isp1362_hcd->lock);
+
+	dump_irq(s, "hc_irq_enable", isp1362_read_reg16(isp1362_hcd, HCuPINTENB));
+	dump_irq(s, "hc_irq_status", isp1362_read_reg16(isp1362_hcd, HCuPINT));
+	dump_int(s, "ohci_int_enable", isp1362_read_reg32(isp1362_hcd, HCINTENB));
+	dump_int(s, "ohci_int_status", isp1362_read_reg32(isp1362_hcd, HCINTSTAT));
+	dump_ctrl(s, "ohci_control", isp1362_read_reg32(isp1362_hcd, HCCONTROL));
+
+	for (i = 0; i < NUM_ISP1362_IRQS; i++)
+		if (isp1362_hcd->irq_stat[i])
+			seq_printf(s, "%-15s: %d\n",
+				   ISP1362_INT_NAME(i), isp1362_hcd->irq_stat[i]);
+
+	dump_regs(s, isp1362_hcd);
+	list_for_each_entry(ep, &isp1362_hcd->async, schedule) {
+		struct urb *urb;
+
+		seq_printf(s, "%p, ep%d%s, maxpacket %d:\n", ep, ep->epnum,
+			   ({
+				   char *s;
+				   switch (ep->nextpid) {
+				   case USB_PID_IN:
+					   s = "in";
+					   break;
+				   case USB_PID_OUT:
+					   s = "out";
+					   break;
+				   case USB_PID_SETUP:
+					   s = "setup";
+					   break;
+				   case USB_PID_ACK:
+					   s = "status";
+					   break;
+				   default:
+					   s = "?";
+					   break;
+				   };
+				   s;}), ep->maxpacket) ;
+		list_for_each_entry(urb, &ep->hep->urb_list, urb_list) {
+			seq_printf(s, "  urb%p, %d/%d\n", urb,
+				   urb->actual_length,
+				   urb->transfer_buffer_length);
+		}
+	}
+	if (!list_empty(&isp1362_hcd->async))
+		seq_printf(s, "\n");
+	dump_ptd_queue(&isp1362_hcd->atl_queue);
+
+	seq_printf(s, "periodic size= %d\n", PERIODIC_SIZE);
+
+	list_for_each_entry(ep, &isp1362_hcd->periodic, schedule) {
+		seq_printf(s, "branch:%2d load:%3d PTD[%d] $%04x:\n", ep->branch,
+			   isp1362_hcd->load[ep->branch], ep->ptd_index, ep->ptd_offset);
+
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+	dump_ptd_queue(&isp1362_hcd->intl_queue);
+
+	seq_printf(s, "ISO:\n");
+
+	list_for_each_entry(ep, &isp1362_hcd->isoc, schedule) {
+		seq_printf(s, "   %d/%p (%sdev%d ep%d%s max %d)\n",
+			   ep->interval, ep,
+			   (ep->udev->speed == USB_SPEED_FULL) ? "" : "ls ",
+			   ep->udev->devnum, ep->epnum,
+			   (ep->epnum == 0) ? "" :
+			   ((ep->nextpid == USB_PID_IN) ?
+			    "in" : "out"), ep->maxpacket);
+	}
+
+	spin_unlock_irq(&isp1362_hcd->lock);
+	seq_printf(s, "\n");
+
+	return 0;
+}
+
+static int proc_isp1362_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, proc_isp1362_show, PDE(inode)->data);
+}
+
+static const struct file_operations proc_ops = {
+	.open = proc_isp1362_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/* expect just one isp1362_hcd per system */
+static const char proc_filename[] = "driver/isp1362";
+
+static void create_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	struct proc_dir_entry *pde;
+
+	pde = create_proc_entry(proc_filename, 0, NULL);
+	if (pde == NULL) {
+		pr_warning("%s: Failed to create debug file '%s'\n", __func__, proc_filename);
+		return;
+	}
+
+	pde->proc_fops = &proc_ops;
+	pde->data = isp1362_hcd;
+	isp1362_hcd->pde = pde;
+}
+
+static void remove_debug_file(struct isp1362_hcd *isp1362_hcd)
+{
+	if (isp1362_hcd->pde)
+		remove_proc_entry(proc_filename, 0);
+}
+
+#endif
+
+/*-------------------------------------------------------------------------*/
+
+static void isp1362_sw_reset(struct isp1362_hcd *isp1362_hcd)
+{
+	int tmp = 20;
+	unsigned long flags;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCSWRES, HCSWRES_MAGIC);
+	isp1362_write_reg32(isp1362_hcd, HCCMDSTAT, OHCI_HCR);
+	while (--tmp) {
+		mdelay(1);
+		if (!(isp1362_read_reg32(isp1362_hcd, HCCMDSTAT) & OHCI_HCR))
+			break;
+	}
+	if (!tmp)
+		pr_err("Software reset timeout\n");
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+static int isp1362_mem_config(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 total;
+	u16 istl_size = ISP1362_ISTL_BUFSIZE;
+	u16 intl_blksize = ISP1362_INTL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 intl_size = ISP1362_INTL_BUFFERS * intl_blksize;
+	u16 atl_blksize = ISP1362_ATL_BLKSIZE + PTD_HEADER_SIZE;
+	u16 atl_buffers = (ISP1362_BUF_SIZE - (istl_size + intl_size)) / atl_blksize;
+	u16 atl_size;
+	int i;
+
+	WARN_ON(istl_size & 3);
+	WARN_ON(atl_blksize & 3);
+	WARN_ON(intl_blksize & 3);
+	WARN_ON(atl_blksize < PTD_HEADER_SIZE);
+	WARN_ON(intl_blksize < PTD_HEADER_SIZE);
+
+	BUG_ON((unsigned)ISP1362_INTL_BUFFERS > 32);
+	if (atl_buffers > 32)
+		atl_buffers = 32;
+	atl_size = atl_buffers * atl_blksize;
+	total = atl_size + intl_size + istl_size;
+	dev_info(hcd->self.controller, "ISP1362 Memory usage:\n");
+	dev_info(hcd->self.controller, "  ISTL:    2 * %4d:     %4d @ $%04x:$%04x\n",
+		 istl_size / 2, istl_size, 0, istl_size / 2);
+	dev_info(hcd->self.controller, "  INTL: %4d * (%3u+8):  %4d @ $%04x\n",
+		 ISP1362_INTL_BUFFERS, intl_blksize - PTD_HEADER_SIZE,
+		 intl_size, istl_size);
+	dev_info(hcd->self.controller, "  ATL : %4d * (%3u+8):  %4d @ $%04x\n",
+		 atl_buffers, atl_blksize - PTD_HEADER_SIZE,
+		 atl_size, istl_size + intl_size);
+	dev_info(hcd->self.controller, "  USED/FREE:   %4d      %4d\n", total,
+		 ISP1362_BUF_SIZE - total);
+
+	if (total > ISP1362_BUF_SIZE) {
+		dev_err(hcd->self.controller, "%s: Memory requested: %d, available %d\n",
+			__func__, total, ISP1362_BUF_SIZE);
+		return -ENOMEM;
+	}
+
+	total = istl_size + intl_size + atl_size;
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	for (i = 0; i < 2; i++) {
+		isp1362_hcd->istl_queue[i].buf_start = i * istl_size / 2,
+		isp1362_hcd->istl_queue[i].buf_size = istl_size / 2;
+		isp1362_hcd->istl_queue[i].blk_size = 4;
+		INIT_LIST_HEAD(&isp1362_hcd->istl_queue[i].active);
+		snprintf(isp1362_hcd->istl_queue[i].name,
+			 sizeof(isp1362_hcd->istl_queue[i].name), "ISTL%d", i);
+		DBG(3, "%s: %5s buf $%04x %d\n", __func__,
+		     isp1362_hcd->istl_queue[i].name,
+		     isp1362_hcd->istl_queue[i].buf_start,
+		     isp1362_hcd->istl_queue[i].buf_size);
+	}
+	isp1362_write_reg16(isp1362_hcd, HCISTLBUFSZ, istl_size / 2);
+
+	isp1362_hcd->intl_queue.buf_start = istl_size;
+	isp1362_hcd->intl_queue.buf_size = intl_size;
+	isp1362_hcd->intl_queue.buf_count = ISP1362_INTL_BUFFERS;
+	isp1362_hcd->intl_queue.blk_size = intl_blksize;
+	isp1362_hcd->intl_queue.buf_avail = isp1362_hcd->intl_queue.buf_count;
+	isp1362_hcd->intl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->intl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCINTLBUFSZ,
+			    isp1362_hcd->intl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCINTLBLKSZ,
+			    isp1362_hcd->intl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCINTLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCINTLLAST,
+			    1 << (ISP1362_INTL_BUFFERS - 1));
+
+	isp1362_hcd->atl_queue.buf_start = istl_size + intl_size;
+	isp1362_hcd->atl_queue.buf_size = atl_size;
+	isp1362_hcd->atl_queue.buf_count = atl_buffers;
+	isp1362_hcd->atl_queue.blk_size = atl_blksize;
+	isp1362_hcd->atl_queue.buf_avail = isp1362_hcd->atl_queue.buf_count;
+	isp1362_hcd->atl_queue.skip_map = ~0;
+	INIT_LIST_HEAD(&isp1362_hcd->atl_queue.active);
+
+	isp1362_write_reg16(isp1362_hcd, HCATLBUFSZ,
+			    isp1362_hcd->atl_queue.buf_size);
+	isp1362_write_reg16(isp1362_hcd, HCATLBLKSZ,
+			    isp1362_hcd->atl_queue.blk_size - PTD_HEADER_SIZE);
+	isp1362_write_reg32(isp1362_hcd, HCATLSKIP, ~0);
+	isp1362_write_reg32(isp1362_hcd, HCATLLAST,
+			    1 << (atl_buffers - 1));
+
+	snprintf(isp1362_hcd->atl_queue.name,
+		 sizeof(isp1362_hcd->atl_queue.name), "ATL");
+	snprintf(isp1362_hcd->intl_queue.name,
+		 sizeof(isp1362_hcd->intl_queue.name), "INTL");
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->intl_queue.name,
+	     isp1362_hcd->intl_queue.buf_start,
+	     ISP1362_INTL_BUFFERS, isp1362_hcd->intl_queue.blk_size,
+	     isp1362_hcd->intl_queue.buf_size);
+	DBG(3, "%s: %5s buf $%04x %2d * %4d = %4d\n", __func__,
+	     isp1362_hcd->atl_queue.name,
+	     isp1362_hcd->atl_queue.buf_start,
+	     atl_buffers, isp1362_hcd->atl_queue.blk_size,
+	     isp1362_hcd->atl_queue.buf_size);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+static int isp1362_hc_reset(struct usb_hcd *hcd)
+{
+	int ret = 0;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long t;
+	unsigned long timeout = 100;
+	unsigned long flags;
+	int clkrdy = 0;
+
+	pr_info("%s:\n", __func__);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->reset) {
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+		msleep(20);
+		if (isp1362_hcd->board->clock)
+			isp1362_hcd->board->clock(hcd->self.controller, 1);
+		isp1362_hcd->board->reset(hcd->self.controller, 0);
+	} else
+		isp1362_sw_reset(isp1362_hcd);
+
+	/* chip has been reset. First we need to see a clock */
+	t = jiffies + msecs_to_jiffies(timeout);
+	while (!clkrdy && time_before_eq(jiffies, t)) {
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		clkrdy = isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_CLKRDY;
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		if (!clkrdy)
+			msleep(4);
+	}
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_CLKRDY);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	if (!clkrdy) {
+		pr_err("Clock not ready after %lums\n", timeout);
+		ret = -ENODEV;
+	}
+	return ret;
+}
+
+static void isp1362_hc_stop(struct usb_hcd *hcd)
+{
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	u32 tmp;
+
+	pr_info("%s:\n", __func__);
+
+	del_timer_sync(&hcd->rh_timer);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* Switch off power for all ports */
+	tmp = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+	tmp &= ~(RH_A_NPS | RH_A_PSM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, tmp);
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+
+	/* Reset the chip */
+	if (isp1362_hcd->board && isp1362_hcd->board->reset)
+		isp1362_hcd->board->reset(hcd->self.controller, 1);
+	else
+		isp1362_sw_reset(isp1362_hcd);
+
+	if (isp1362_hcd->board && isp1362_hcd->board->clock)
+		isp1362_hcd->board->clock(hcd->self.controller, 0);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+}
+
+#ifdef CHIP_BUFFER_TEST
+static int isp1362_chip_test(struct isp1362_hcd *isp1362_hcd)
+{
+	int ret = 0;
+	u16 *ref;
+	unsigned long flags;
+
+	ref = kmalloc(2 * ISP1362_BUF_SIZE, GFP_KERNEL);
+	if (ref) {
+		int offset;
+		u16 *tst = &ref[ISP1362_BUF_SIZE / 2];
+
+		for (offset = 0; offset < ISP1362_BUF_SIZE / 2; offset++) {
+			ref[offset] = ~offset;
+			tst[offset] = offset;
+		}
+
+		for (offset = 0; offset < 4; offset++) {
+			int j;
+
+			for (j = 0; j < 8; j++) {
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_write_buffer(isp1362_hcd, (u8 *)ref + offset, 0, j);
+				isp1362_read_buffer(isp1362_hcd, (u8 *)tst + offset, 0, j);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+				if (memcmp(ref, tst, j)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with %d byte offset %d failed\n",
+					    __func__, j, offset);
+					dump_data((u8 *)ref + offset, j);
+					dump_data((u8 *)tst + offset, j);
+				}
+			}
+		}
+
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_buffer(isp1362_hcd, ref, 0, ISP1362_BUF_SIZE);
+		isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+		if (memcmp(ref, tst, ISP1362_BUF_SIZE)) {
+			ret = -ENODEV;
+			pr_err("%s: memory check failed\n", __func__);
+			dump_data((u8 *)tst, ISP1362_BUF_SIZE / 2);
+		}
+
+		for (offset = 0; offset < 256; offset++) {
+			int test_size = 0;
+
+			yield();
+
+			memset(tst, 0, ISP1362_BUF_SIZE);
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			isp1362_read_buffer(isp1362_hcd, tst, 0, ISP1362_BUF_SIZE);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(tst, tst + (ISP1362_BUF_SIZE / (2 * sizeof(*tst))),
+				   ISP1362_BUF_SIZE / 2)) {
+				pr_err("%s: Failed to clear buffer\n", __func__);
+				dump_data((u8 *)tst, ISP1362_BUF_SIZE);
+				break;
+			}
+			spin_lock_irqsave(&isp1362_hcd->lock, flags);
+			isp1362_write_buffer(isp1362_hcd, ref, offset * 2, PTD_HEADER_SIZE);
+			isp1362_write_buffer(isp1362_hcd, ref + PTD_HEADER_SIZE / sizeof(*ref),
+					     offset * 2 + PTD_HEADER_SIZE, test_size);
+			isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+					    PTD_HEADER_SIZE + test_size);
+			spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+			if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+				dump_data(((u8 *)ref) + offset, PTD_HEADER_SIZE + test_size);
+				dump_data((u8 *)tst, PTD_HEADER_SIZE + test_size);
+				spin_lock_irqsave(&isp1362_hcd->lock, flags);
+				isp1362_read_buffer(isp1362_hcd, tst, offset * 2,
+						    PTD_HEADER_SIZE + test_size);
+				spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+				if (memcmp(ref, tst, PTD_HEADER_SIZE + test_size)) {
+					ret = -ENODEV;
+					pr_err("%s: memory check with offset %02x failed\n",
+					    __func__, offset);
+					break;
+				}
+				pr_warning("%s: memory check with offset %02x ok after second read\n",
+				     __func__, offset);
+			}
+		}
+		kfree(ref);
+	}
+	return ret;
+}
+#endif
+
+static int isp1362_hc_start(struct usb_hcd *hcd)
+{
+	int ret;
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct isp1362_platform_data *board = isp1362_hcd->board;
+	u16 hwcfg;
+	u16 chipid;
+	unsigned long flags;
+
+	pr_info("%s:\n", __func__);
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	chipid = isp1362_read_reg16(isp1362_hcd, HCCHIPID);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	if ((chipid & HCCHIPID_MASK) != HCCHIPID_MAGIC) {
+		pr_err("%s: Invalid chip ID %04x\n", __func__, chipid);
+		return -ENODEV;
+	}
+
+#ifdef CHIP_BUFFER_TEST
+	ret = isp1362_chip_test(isp1362_hcd);
+	if (ret)
+		return -ENODEV;
+#endif
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* clear interrupt status and disable all interrupt sources */
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, 0xff);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, 0);
+
+	/* HW conf */
+	hwcfg = HCHWCFG_INT_ENABLE | HCHWCFG_DBWIDTH(1);
+	if (board->sel15Kres)
+		hwcfg |= HCHWCFG_PULLDOWN_DS2 |
+			(MAX_ROOT_PORTS > 1) ? HCHWCFG_PULLDOWN_DS1 : 0;
+	if (board->clknotstop)
+		hwcfg |= HCHWCFG_CLKNOTSTOP;
+	if (board->oc_enable)
+		hwcfg |= HCHWCFG_ANALOG_OC;
+	if (board->int_act_high)
+		hwcfg |= HCHWCFG_INT_POL;
+	if (board->int_edge_triggered)
+		hwcfg |= HCHWCFG_INT_TRIGGER;
+	if (board->dreq_act_high)
+		hwcfg |= HCHWCFG_DREQ_POL;
+	if (board->dack_act_high)
+		hwcfg |= HCHWCFG_DACK_POL;
+	isp1362_write_reg16(isp1362_hcd, HCHWCFG, hwcfg);
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_write_reg16(isp1362_hcd, HCDMACFG, 0);
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	ret = isp1362_mem_config(hcd);
+	if (ret)
+		return ret;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+
+	/* Root hub conf */
+	isp1362_hcd->rhdesca = 0;
+	if (board->no_power_switching)
+		isp1362_hcd->rhdesca |= RH_A_NPS;
+	if (board->power_switching_mode)
+		isp1362_hcd->rhdesca |= RH_A_PSM;
+	if (board->potpg)
+		isp1362_hcd->rhdesca |= (board->potpg << 24) & RH_A_POTPGT;
+	else
+		isp1362_hcd->rhdesca |= (25 << 24) & RH_A_POTPGT;
+
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca & ~RH_A_OCPM);
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCA, isp1362_hcd->rhdesca | RH_A_OCPM);
+	isp1362_hcd->rhdesca = isp1362_read_reg32(isp1362_hcd, HCRHDESCA);
+
+	isp1362_hcd->rhdescb = RH_B_PPCM;
+	isp1362_write_reg32(isp1362_hcd, HCRHDESCB, isp1362_hcd->rhdescb);
+	isp1362_hcd->rhdescb = isp1362_read_reg32(isp1362_hcd, HCRHDESCB);
+
+	isp1362_read_reg32(isp1362_hcd, HCFMINTVL);
+	isp1362_write_reg32(isp1362_hcd, HCFMINTVL, (FSMP(FI) << 16) | FI);
+	isp1362_write_reg32(isp1362_hcd, HCLSTHRESH, LSTHRESH);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	isp1362_hcd->hc_control = OHCI_USB_OPER;
+	hcd->state = HC_STATE_RUNNING;
+
+	spin_lock_irqsave(&isp1362_hcd->lock, flags);
+	/* Set up interrupts */
+	isp1362_hcd->intenb = OHCI_INTR_MIE | OHCI_INTR_RHSC | OHCI_INTR_UE;
+	isp1362_hcd->intenb |= OHCI_INTR_RD;
+	isp1362_hcd->irqenb = HCuPINT_OPR | HCuPINT_SUSP;
+	isp1362_write_reg32(isp1362_hcd, HCINTENB, isp1362_hcd->intenb);
+	isp1362_write_reg16(isp1362_hcd, HCuPINTENB, isp1362_hcd->irqenb);
+
+	/* Go operational */
+	isp1362_write_reg32(isp1362_hcd, HCCONTROL, isp1362_hcd->hc_control);
+	/* enable global power */
+	isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC | RH_HS_DRWE);
+
+	spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+
+static struct hc_driver isp1362_hc_driver = {
+	.description =		hcd_name,
+	.product_desc =		"ISP1362 Host Controller",
+	.hcd_priv_size =	sizeof(struct isp1362_hcd),
+
+	.irq =			isp1362_irq,
+	.flags =		HCD_USB11 | HCD_MEMORY,
+
+	.reset =		isp1362_hc_reset,
+	.start =		isp1362_hc_start,
+	.stop =			isp1362_hc_stop,
+
+	.urb_enqueue =		isp1362_urb_enqueue,
+	.urb_dequeue =		isp1362_urb_dequeue,
+	.endpoint_disable =	isp1362_endpoint_disable,
+
+	.get_frame_number =	isp1362_get_frame,
+
+	.hub_status_data =	isp1362_hub_status_data,
+	.hub_control =		isp1362_hub_control,
+	.bus_suspend =		isp1362_bus_suspend,
+	.bus_resume =		isp1362_bus_resume,
+};
+
+/*-------------------------------------------------------------------------*/
+
+#define resource_len(r) (((r)->end - (r)->start) + 1)
+
+static int __devexit isp1362_remove(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	struct resource *res;
+
+	remove_debug_file(isp1362_hcd);
+	DBG(0, "%s: Removing HCD\n", __func__);
+	usb_remove_hcd(hcd);
+
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->data_reg);
+	iounmap(isp1362_hcd->data_reg);
+
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__,
+	    (u32)isp1362_hcd->addr_reg);
+	iounmap(isp1362_hcd->addr_reg);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	DBG(0, "%s: release mem_region: %08lx\n", __func__, (long unsigned int)res->start);
+	if (res)
+		release_mem_region(res->start, resource_len(res));
+
+	DBG(0, "%s: put_hcd\n", __func__);
+	usb_put_hcd(hcd);
+	DBG(0, "%s: Done\n", __func__);
+
+	return 0;
+}
+
+static int __init isp1362_probe(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd;
+	struct isp1362_hcd *isp1362_hcd;
+	struct resource *addr, *data;
+	void __iomem *addr_reg;
+	void __iomem *data_reg;
+	int irq;
+	int retval = 0;
+
+	/* basic sanity checks first.  board-specific init logic should
+	 * have initialized this the three resources and probably board
+	 * specific platform_data.  we don't probe for IRQs, and do only
+	 * minimal sanity checking.
+	 */
+	if (pdev->num_resources < 3) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+	data = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	addr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	irq = platform_get_irq(pdev, 0);
+	if (!addr || !data || irq < 0) {
+		retval = -ENODEV;
+		goto err1;
+	}
+
+#ifdef CONFIG_USB_HCD_DMA
+	if (pdev->dev.dma_mask) {
+		struct resource *dma_res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+
+		if (!dma_res) {
+			retval = -ENODEV;
+			goto err1;
+		}
+		isp1362_hcd->data_dma = dma_res->start;
+		isp1362_hcd->max_dma_size = resource_len(dma_res);
+	}
+#else
+	if (pdev->dev.dma_mask) {
+		DBG(1, "won't do DMA");
+		retval = -ENODEV;
+		goto err1;
+	}
+#endif
+
+	if (!request_mem_region(addr->start, resource_len(addr), hcd_name)) {
+		retval = -EBUSY;
+		goto err1;
+	}
+	addr_reg = ioremap(addr->start, resource_len(addr));
+	if (addr_reg == NULL) {
+		retval = -ENOMEM;
+		goto err2;
+	}
+
+	if (!request_mem_region(data->start, resource_len(data), hcd_name)) {
+		retval = -EBUSY;
+		goto err3;
+	}
+	data_reg = ioremap(data->start, resource_len(data));
+	if (data_reg == NULL) {
+		retval = -ENOMEM;
+		goto err4;
+	}
+
+	/* allocate and initialize hcd */
+	hcd = usb_create_hcd(&isp1362_hc_driver, &pdev->dev, dev_name(&pdev->dev));
+	if (!hcd) {
+		retval = -ENOMEM;
+		goto err5;
+	}
+	hcd->rsrc_start = data->start;
+	isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	isp1362_hcd->data_reg = data_reg;
+	isp1362_hcd->addr_reg = addr_reg;
+
+	isp1362_hcd->next_statechange = jiffies;
+	spin_lock_init(&isp1362_hcd->lock);
+	INIT_LIST_HEAD(&isp1362_hcd->async);
+	INIT_LIST_HEAD(&isp1362_hcd->periodic);
+	INIT_LIST_HEAD(&isp1362_hcd->isoc);
+	INIT_LIST_HEAD(&isp1362_hcd->remove_list);
+	isp1362_hcd->board = pdev->dev.platform_data;
+#if USE_PLATFORM_DELAY
+	if (!isp1362_hcd->board->delay) {
+		dev_err(hcd->self.controller, "No platform delay function given\n");
+		retval = -ENODEV;
+		goto err6;
+	}
+#endif
+
+#ifdef CONFIG_ARM
+	if (isp1362_hcd->board)
+		set_irq_type(irq, isp1362_hcd->board->int_act_high ? IRQT_RISING : IRQT_FALLING);
+#endif
+
+	retval = usb_add_hcd(hcd, irq, IRQF_TRIGGER_LOW | IRQF_DISABLED | IRQF_SHARED);
+	if (retval != 0)
+		goto err6;
+	pr_info("%s, irq %d\n", hcd->product_desc, irq);
+
+	create_debug_file(isp1362_hcd);
+
+	return 0;
+
+ err6:
+	DBG(0, "%s: Freeing dev %08x\n", __func__, (u32)isp1362_hcd);
+	usb_put_hcd(hcd);
+ err5:
+	DBG(0, "%s: Unmapping data_reg @ %08x\n", __func__, (u32)data_reg);
+	iounmap(data_reg);
+ err4:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)data->start);
+	release_mem_region(data->start, resource_len(data));
+ err3:
+	DBG(0, "%s: Unmapping addr_reg @ %08x\n", __func__, (u32)addr_reg);
+	iounmap(addr_reg);
+ err2:
+	DBG(0, "%s: Releasing mem region %08lx\n", __func__, (long unsigned int)addr->start);
+	release_mem_region(addr->start, resource_len(addr));
+ err1:
+	pr_err("%s: init error, %d\n", __func__, retval);
+
+	return retval;
+}
+
+#ifdef	CONFIG_PM
+static int isp1362_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+	int retval = 0;
+
+	DBG(0, "%s: Suspending device\n", __func__);
+
+	if (state.event == PM_EVENT_FREEZE) {
+		DBG(0, "%s: Suspending root hub\n", __func__);
+		retval = isp1362_bus_suspend(hcd);
+	} else {
+		DBG(0, "%s: Suspending RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPS);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+	}
+	if (retval == 0)
+		pdev->dev.power.power_state = state;
+	return retval;
+}
+
+static int isp1362_resume(struct platform_device *pdev)
+{
+	struct usb_hcd *hcd = platform_get_drvdata(pdev);
+	struct isp1362_hcd *isp1362_hcd = hcd_to_isp1362_hcd(hcd);
+	unsigned long flags;
+
+	DBG(0, "%s: Resuming\n", __func__);
+
+	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
+		DBG(0, "%s: Resume RH ports\n", __func__);
+		spin_lock_irqsave(&isp1362_hcd->lock, flags);
+		isp1362_write_reg32(isp1362_hcd, HCRHSTATUS, RH_HS_LPSC);
+		spin_unlock_irqrestore(&isp1362_hcd->lock, flags);
+		return 0;
+	}
+
+	pdev->dev.power.power_state = PMSG_ON;
+
+	return isp1362_bus_resume(isp1362_hcd_to_hcd(isp1362_hcd));
+}
+#else
+#define	isp1362_suspend	NULL
+#define	isp1362_resume	NULL
+#endif
+
+static struct platform_driver isp1362_driver = {
+	.probe = isp1362_probe,
+	.remove = __devexit_p(isp1362_remove),
+
+	.suspend = isp1362_suspend,
+	.resume = isp1362_resume,
+	.driver = {
+		.name = (char *)hcd_name,
+		.owner = THIS_MODULE,
+	},
+};
+
+/*-------------------------------------------------------------------------*/
+
+static int __init isp1362_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+	pr_info("driver %s, %s\n", hcd_name, DRIVER_VERSION);
+	return platform_driver_register(&isp1362_driver);
+}
+module_init(isp1362_init);
+
+static void __exit isp1362_cleanup(void)
+{
+	platform_driver_unregister(&isp1362_driver);
+}
+module_exit(isp1362_cleanup);
diff --git a/drivers/usb/host/isp1362.h b/drivers/usb/host/isp1362.h
new file mode 100644
index 000000000000..26e44fc8776f
--- /dev/null
+++ b/drivers/usb/host/isp1362.h
@@ -0,0 +1,1079 @@
+/*
+ * ISP1362 HCD (Host Controller Driver) for USB.
+ *
+ * COPYRIGHT (C) by L. Wassmann <LW@KARO-electronics.de>
+ */
+
+/* ------------------------------------------------------------------------- */
+/*
+ * Platform specific compile time options
+ */
+#if defined(CONFIG_ARCH_KARO)
+#include <asm/arch/hardware.h>
+#include <asm/arch/pxa-regs.h>
+#include <asm/arch/karo.h>
+
+#define USE_32BIT		1
+
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	1
+#define USE_NDELAY		0
+/*
+ * MAX_ROOT_PORTS: Number of downstream ports
+ *
+ * The chip has two USB ports, one of which can be configured as
+ * an USB device port, so the value of this constant is implementation
+ * specific.
+ */
+#define MAX_ROOT_PORTS		2
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+/* insert platform specific definitions for other machines here */
+#elif defined(CONFIG_BLACKFIN)
+
+#include <linux/io.h>
+#define USE_32BIT		0
+#define MAX_ROOT_PORTS		2
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		1
+
+#define DUMMY_DELAY_ACCESS \
+	do { \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+		bfin_read16(ASYNC_BANK0_BASE); \
+	} while (0)
+
+#undef insw
+#undef outsw
+
+#define insw  delayed_insw
+#define outsw  delayed_outsw
+
+static inline void delayed_outsw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		outw(*bp++, addr);
+	}
+}
+
+static inline void delayed_insw(unsigned int addr, void *buf, int len)
+{
+	unsigned short *bp = (unsigned short *)buf;
+	while (len--) {
+		DUMMY_DELAY_ACCESS;
+		*bp++ = inw((void *)addr);
+	}
+}
+
+#else
+
+#define MAX_ROOT_PORTS		2
+
+#define USE_32BIT		0
+
+/* These options are mutually eclusive */
+#define USE_PLATFORM_DELAY	0
+#define USE_NDELAY		0
+
+#define DUMMY_DELAY_ACCESS do {} while (0)
+
+#endif
+
+
+/* ------------------------------------------------------------------------- */
+
+#define USB_RESET_WIDTH			50
+#define MAX_XFER_SIZE			1023
+
+/* Buffer sizes */
+#define ISP1362_BUF_SIZE		4096
+#define ISP1362_ISTL_BUFSIZE		512
+#define ISP1362_INTL_BLKSIZE		64
+#define ISP1362_INTL_BUFFERS		16
+#define ISP1362_ATL_BLKSIZE		64
+
+#define ISP1362_REG_WRITE_OFFSET	0x80
+
+#ifdef ISP1362_DEBUG
+typedef const unsigned int isp1362_reg_t;
+
+#define REG_WIDTH_16			0x000
+#define REG_WIDTH_32			0x100
+#define REG_WIDTH_MASK			0x100
+#define REG_NO_MASK			0x0ff
+
+#define REG_ACCESS_R			0x200
+#define REG_ACCESS_W			0x400
+#define REG_ACCESS_RW			0x600
+#define REG_ACCESS_MASK			0x600
+
+#define ISP1362_REG_NO(r)		((r) & REG_NO_MASK)
+
+#define _BUG_ON(x)	BUG_ON(x)
+#define _WARN_ON(x)	WARN_ON(x)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = ((addr) | (width) | (rw))
+
+#define REG_ACCESS_TEST(r)   BUG_ON(((r) & ISP1362_REG_WRITE_OFFSET) && !((r) & REG_ACCESS_W))
+#define REG_WIDTH_TEST(r, w) BUG_ON(((r) & REG_WIDTH_MASK) != (w))
+#else
+typedef const unsigned char isp1362_reg_t;
+#define ISP1362_REG_NO(r)		(r)
+#define _BUG_ON(x)			do {} while (0)
+#define _WARN_ON(x)			do {} while (0)
+
+#define ISP1362_REG(name, addr, width, rw) \
+static isp1362_reg_t ISP1362_REG_##name = addr
+
+#define REG_ACCESS_TEST(r)		do {} while (0)
+#define REG_WIDTH_TEST(r, w)		do {} while (0)
+#endif
+
+/* OHCI compatible registers */
+/*
+ * Note: Some of the ISP1362 'OHCI' registers implement only
+ * a subset of the bits defined in the OHCI spec.
+ *
+ * Bitmasks for the individual bits of these registers are defined in "ohci.h"
+ */
+ISP1362_REG(HCREVISION,	0x00,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCCONTROL,	0x01,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCCMDSTAT,	0x02,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTSTAT,	0x03,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTENB,	0x04,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTDIS,	0x05,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMINTVL,	0x0d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMREM,	0x0e,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCFMNUM,	0x0f,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCLSTHRESH,	0x11,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCA,	0x12,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHDESCB,	0x13,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHSTATUS,	0x14,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT1,	0x15,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCRHPORT2,	0x16,	REG_WIDTH_32,	REG_ACCESS_RW);
+
+/* Philips ISP1362 specific registers */
+ISP1362_REG(HCHWCFG,	0x20,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCHWCFG_DISABLE_SUSPEND	(1 << 15)
+#define HCHWCFG_GLOBAL_PWRDOWN	(1 << 14)
+#define HCHWCFG_PULLDOWN_DS1	(1 << 13)
+#define HCHWCFG_PULLDOWN_DS2	(1 << 12)
+#define HCHWCFG_CLKNOTSTOP	(1 << 11)
+#define HCHWCFG_ANALOG_OC	(1 << 10)
+#define HCHWCFG_ONEINT		(1 << 9)
+#define HCHWCFG_DACK_MODE	(1 << 8)
+#define HCHWCFG_ONEDMA		(1 << 7)
+#define HCHWCFG_DACK_POL	(1 << 6)
+#define HCHWCFG_DREQ_POL	(1 << 5)
+#define HCHWCFG_DBWIDTH_MASK	(0x03 << 3)
+#define HCHWCFG_DBWIDTH(n)	(((n) << 3) & HCHWCFG_DBWIDTH_MASK)
+#define HCHWCFG_INT_POL		(1 << 2)
+#define HCHWCFG_INT_TRIGGER	(1 << 1)
+#define HCHWCFG_INT_ENABLE	(1 << 0)
+
+ISP1362_REG(HCDMACFG,	0x21,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCDMACFG_CTR_ENABLE	(1 << 7)
+#define HCDMACFG_BURST_LEN_MASK	(0x03 << 5)
+#define HCDMACFG_BURST_LEN(n)	(((n) << 5) & HCDMACFG_BURST_LEN_MASK)
+#define HCDMACFG_BURST_LEN_1	HCDMACFG_BURST_LEN(0)
+#define HCDMACFG_BURST_LEN_4	HCDMACFG_BURST_LEN(1)
+#define HCDMACFG_BURST_LEN_8	HCDMACFG_BURST_LEN(2)
+#define HCDMACFG_DMA_ENABLE	(1 << 4)
+#define HCDMACFG_BUF_TYPE_MASK	(0x07 << 1)
+#define HCDMACFG_BUF_TYPE(n)	(((n) << 1) & HCDMACFG_BUF_TYPE_MASK)
+#define HCDMACFG_BUF_ISTL0	HCDMACFG_BUF_TYPE(0)
+#define HCDMACFG_BUF_ISTL1	HCDMACFG_BUF_TYPE(1)
+#define HCDMACFG_BUF_INTL	HCDMACFG_BUF_TYPE(2)
+#define HCDMACFG_BUF_ATL	HCDMACFG_BUF_TYPE(3)
+#define HCDMACFG_BUF_DIRECT	HCDMACFG_BUF_TYPE(4)
+#define HCDMACFG_DMA_RW_SELECT	(1 << 0)
+
+ISP1362_REG(HCXFERCTR,	0x22,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCuPINT,	0x24,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCuPINT_SOF		(1 << 0)
+#define HCuPINT_ISTL0		(1 << 1)
+#define HCuPINT_ISTL1		(1 << 2)
+#define HCuPINT_EOT		(1 << 3)
+#define HCuPINT_OPR		(1 << 4)
+#define HCuPINT_SUSP		(1 << 5)
+#define HCuPINT_CLKRDY		(1 << 6)
+#define HCuPINT_INTL		(1 << 7)
+#define HCuPINT_ATL		(1 << 8)
+#define HCuPINT_OTG		(1 << 9)
+
+ISP1362_REG(HCuPINTENB,	0x25,	REG_WIDTH_16,	REG_ACCESS_RW);
+/* same bit definitions apply as for HCuPINT */
+
+ISP1362_REG(HCCHIPID,	0x27,	REG_WIDTH_16,	REG_ACCESS_R);
+#define HCCHIPID_MASK		0xff00
+#define HCCHIPID_MAGIC		0x3600
+
+ISP1362_REG(HCSCRATCH,	0x28,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCSWRES,	0x29,	REG_WIDTH_16,	REG_ACCESS_W);
+#define HCSWRES_MAGIC		0x00f6
+
+ISP1362_REG(HCBUFSTAT,	0x2c,	REG_WIDTH_16,	REG_ACCESS_RW);
+#define HCBUFSTAT_ISTL0_FULL	(1 << 0)
+#define HCBUFSTAT_ISTL1_FULL	(1 << 1)
+#define HCBUFSTAT_INTL_ACTIVE	(1 << 2)
+#define HCBUFSTAT_ATL_ACTIVE	(1 << 3)
+#define HCBUFSTAT_RESET_HWPP	(1 << 4)
+#define HCBUFSTAT_ISTL0_ACTIVE	(1 << 5)
+#define HCBUFSTAT_ISTL1_ACTIVE	(1 << 6)
+#define HCBUFSTAT_ISTL0_DONE	(1 << 8)
+#define HCBUFSTAT_ISTL1_DONE	(1 << 9)
+#define HCBUFSTAT_PAIRED_PTDPP	(1 << 10)
+
+ISP1362_REG(HCDIRADDR,	0x32,	REG_WIDTH_32,	REG_ACCESS_RW);
+#define HCDIRADDR_ADDR_MASK	0x0000ffff
+#define HCDIRADDR_ADDR(n)	(((n) << 0) & HCDIRADDR_ADDR_MASK)
+#define HCDIRADDR_COUNT_MASK	0xffff0000
+#define HCDIRADDR_COUNT(n)	(((n) << 16) & HCDIRADDR_COUNT_MASK)
+ISP1362_REG(HCDIRDATA,	0x45,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCISTLBUFSZ, 0x30,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL0PORT, 0x40,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTL1PORT, 0x42,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCISTLRATE,	0x47,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+ISP1362_REG(HCINTLBUFSZ, 0x33,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLPORT,	0x43,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLBLKSZ, 0x53,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLDONE,	0x17,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCINTLSKIP,	0x18,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLLAST,	0x19,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCINTLCURR,	0x1a,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLBUFSZ, 0x34,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLPORT,	0x44,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLBLKSZ, 0x54,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDONE,	0x1b,	REG_WIDTH_32,	REG_ACCESS_R);
+ISP1362_REG(HCATLSKIP,	0x1c,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLLAST,	0x1d,	REG_WIDTH_32,	REG_ACCESS_RW);
+ISP1362_REG(HCATLCURR,	0x1e,	REG_WIDTH_16,	REG_ACCESS_R);
+
+ISP1362_REG(HCATLDTC,	0x51,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(HCATLDTCTO,	0x52,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+
+ISP1362_REG(OTGCONTROL,	0x62,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGSTATUS,	0x67,	REG_WIDTH_16,	REG_ACCESS_R);
+ISP1362_REG(OTGINT,	0x68,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGINTENB,	0x69,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGTIMER,	0x6A,	REG_WIDTH_16,	REG_ACCESS_RW);
+ISP1362_REG(OTGALTTMR,	0x6C,	REG_WIDTH_16,	REG_ACCESS_RW);
+
+/* Philips transfer descriptor, cpu-endian */
+struct ptd {
+	u16 count;
+#define	PTD_COUNT_MSK	(0x3ff << 0)
+#define	PTD_TOGGLE_MSK	(1 << 10)
+#define	PTD_ACTIVE_MSK	(1 << 11)
+#define	PTD_CC_MSK	(0xf << 12)
+	u16 mps;
+#define	PTD_MPS_MSK	(0x3ff << 0)
+#define	PTD_SPD_MSK	(1 << 10)
+#define	PTD_LAST_MSK	(1 << 11)
+#define	PTD_EP_MSK	(0xf << 12)
+	u16 len;
+#define	PTD_LEN_MSK	(0x3ff << 0)
+#define	PTD_DIR_MSK	(3 << 10)
+#define	PTD_DIR_SETUP	(0)
+#define	PTD_DIR_OUT	(1)
+#define	PTD_DIR_IN	(2)
+	u16 faddr;
+#define	PTD_FA_MSK	(0x7f << 0)
+/* PTD Byte 7: [StartingFrame (if ISO PTD) | StartingFrame[0..4], PollingRate[0..2] (if INT PTD)] */
+#define PTD_SF_ISO_MSK	(0xff << 8)
+#define PTD_SF_INT_MSK	(0x1f << 8)
+#define PTD_PR_MSK	(0x07 << 13)
+} __attribute__ ((packed, aligned(2)));
+#define PTD_HEADER_SIZE sizeof(struct ptd)
+
+/* ------------------------------------------------------------------------- */
+/* Copied from ohci.h: */
+/*
+ * Hardware transfer status codes -- CC from PTD
+ */
+#define PTD_CC_NOERROR      0x00
+#define PTD_CC_CRC          0x01
+#define PTD_CC_BITSTUFFING  0x02
+#define PTD_CC_DATATOGGLEM  0x03
+#define PTD_CC_STALL        0x04
+#define PTD_DEVNOTRESP      0x05
+#define PTD_PIDCHECKFAIL    0x06
+#define PTD_UNEXPECTEDPID   0x07
+#define PTD_DATAOVERRUN     0x08
+#define PTD_DATAUNDERRUN    0x09
+    /* 0x0A, 0x0B reserved for hardware */
+#define PTD_BUFFEROVERRUN   0x0C
+#define PTD_BUFFERUNDERRUN  0x0D
+    /* 0x0E, 0x0F reserved for HCD */
+#define PTD_NOTACCESSED     0x0F
+
+
+/* map OHCI TD status codes (CC) to errno values */
+static const int cc_to_error[16] = {
+	/* No  Error  */               0,
+	/* CRC Error  */               -EILSEQ,
+	/* Bit Stuff  */               -EPROTO,
+	/* Data Togg  */               -EILSEQ,
+	/* Stall      */               -EPIPE,
+	/* DevNotResp */               -ETIMEDOUT,
+	/* PIDCheck   */               -EPROTO,
+	/* UnExpPID   */               -EPROTO,
+	/* DataOver   */               -EOVERFLOW,
+	/* DataUnder  */               -EREMOTEIO,
+	/* (for hw)   */               -EIO,
+	/* (for hw)   */               -EIO,
+	/* BufferOver */               -ECOMM,
+	/* BuffUnder  */               -ENOSR,
+	/* (for HCD)  */               -EALREADY,
+	/* (for HCD)  */               -EALREADY
+};
+
+
+/*
+ * HcControl (control) register masks
+ */
+#define OHCI_CTRL_HCFS	(3 << 6)	/* host controller functional state */
+#define OHCI_CTRL_RWC	(1 << 9)	/* remote wakeup connected */
+#define OHCI_CTRL_RWE	(1 << 10)	/* remote wakeup enable */
+
+/* pre-shifted values for HCFS */
+#	define OHCI_USB_RESET	(0 << 6)
+#	define OHCI_USB_RESUME	(1 << 6)
+#	define OHCI_USB_OPER	(2 << 6)
+#	define OHCI_USB_SUSPEND	(3 << 6)
+
+/*
+ * HcCommandStatus (cmdstatus) register masks
+ */
+#define OHCI_HCR	(1 << 0)	/* host controller reset */
+#define OHCI_SOC  	(3 << 16)	/* scheduling overrun count */
+
+/*
+ * masks used with interrupt registers:
+ * HcInterruptStatus (intrstatus)
+ * HcInterruptEnable (intrenable)
+ * HcInterruptDisable (intrdisable)
+ */
+#define OHCI_INTR_SO	(1 << 0)	/* scheduling overrun */
+#define OHCI_INTR_WDH	(1 << 1)	/* writeback of done_head */
+#define OHCI_INTR_SF	(1 << 2)	/* start frame */
+#define OHCI_INTR_RD	(1 << 3)	/* resume detect */
+#define OHCI_INTR_UE	(1 << 4)	/* unrecoverable error */
+#define OHCI_INTR_FNO	(1 << 5)	/* frame number overflow */
+#define OHCI_INTR_RHSC	(1 << 6)	/* root hub status change */
+#define OHCI_INTR_OC	(1 << 30)	/* ownership change */
+#define OHCI_INTR_MIE	(1 << 31)	/* master interrupt enable */
+
+/* roothub.portstatus [i] bits */
+#define RH_PS_CCS            0x00000001   	/* current connect status */
+#define RH_PS_PES            0x00000002   	/* port enable status*/
+#define RH_PS_PSS            0x00000004   	/* port suspend status */
+#define RH_PS_POCI           0x00000008   	/* port over current indicator */
+#define RH_PS_PRS            0x00000010  	/* port reset status */
+#define RH_PS_PPS            0x00000100   	/* port power status */
+#define RH_PS_LSDA           0x00000200    	/* low speed device attached */
+#define RH_PS_CSC            0x00010000 	/* connect status change */
+#define RH_PS_PESC           0x00020000   	/* port enable status change */
+#define RH_PS_PSSC           0x00040000    	/* port suspend status change */
+#define RH_PS_OCIC           0x00080000    	/* over current indicator change */
+#define RH_PS_PRSC           0x00100000   	/* port reset status change */
+
+/* roothub.status bits */
+#define RH_HS_LPS	     0x00000001		/* local power status */
+#define RH_HS_OCI	     0x00000002		/* over current indicator */
+#define RH_HS_DRWE	     0x00008000		/* device remote wakeup enable */
+#define RH_HS_LPSC	     0x00010000		/* local power status change */
+#define RH_HS_OCIC	     0x00020000		/* over current indicator change */
+#define RH_HS_CRWE	     0x80000000		/* clear remote wakeup enable */
+
+/* roothub.b masks */
+#define RH_B_DR		0x0000ffff		/* device removable flags */
+#define RH_B_PPCM	0xffff0000		/* port power control mask */
+
+/* roothub.a masks */
+#define	RH_A_NDP	(0xff << 0)		/* number of downstream ports */
+#define	RH_A_PSM	(1 << 8)		/* power switching mode */
+#define	RH_A_NPS	(1 << 9)		/* no power switching */
+#define	RH_A_DT		(1 << 10)		/* device type (mbz) */
+#define	RH_A_OCPM	(1 << 11)		/* over current protection mode */
+#define	RH_A_NOCP	(1 << 12)		/* no over current protection */
+#define	RH_A_POTPGT	(0xff << 24)		/* power on to power good time */
+
+#define	FI			0x2edf		/* 12000 bits per frame (-1) */
+#define	FSMP(fi) 		(0x7fff & ((6 * ((fi) - 210)) / 7))
+#define LSTHRESH		0x628		/* lowspeed bit threshold */
+
+/* ------------------------------------------------------------------------- */
+
+/* PTD accessor macros. */
+#define PTD_GET_COUNT(p)	(((p)->count & PTD_COUNT_MSK) >> 0)
+#define PTD_COUNT(v)		(((v) << 0) & PTD_COUNT_MSK)
+#define PTD_GET_TOGGLE(p)	(((p)->count & PTD_TOGGLE_MSK) >> 10)
+#define PTD_TOGGLE(v)		(((v) << 10) & PTD_TOGGLE_MSK)
+#define PTD_GET_ACTIVE(p)	(((p)->count & PTD_ACTIVE_MSK) >> 11)
+#define PTD_ACTIVE(v)		(((v) << 11) & PTD_ACTIVE_MSK)
+#define PTD_GET_CC(p)		(((p)->count & PTD_CC_MSK) >> 12)
+#define PTD_CC(v)		(((v) << 12) & PTD_CC_MSK)
+#define PTD_GET_MPS(p)		(((p)->mps & PTD_MPS_MSK) >> 0)
+#define PTD_MPS(v)		(((v) << 0) & PTD_MPS_MSK)
+#define PTD_GET_SPD(p)		(((p)->mps & PTD_SPD_MSK) >> 10)
+#define PTD_SPD(v)		(((v) << 10) & PTD_SPD_MSK)
+#define PTD_GET_LAST(p)		(((p)->mps & PTD_LAST_MSK) >> 11)
+#define PTD_LAST(v)		(((v) << 11) & PTD_LAST_MSK)
+#define PTD_GET_EP(p)		(((p)->mps & PTD_EP_MSK) >> 12)
+#define PTD_EP(v)		(((v) << 12) & PTD_EP_MSK)
+#define PTD_GET_LEN(p)		(((p)->len & PTD_LEN_MSK) >> 0)
+#define PTD_LEN(v)		(((v) << 0) & PTD_LEN_MSK)
+#define PTD_GET_DIR(p)		(((p)->len & PTD_DIR_MSK) >> 10)
+#define PTD_DIR(v)		(((v) << 10) & PTD_DIR_MSK)
+#define PTD_GET_FA(p)		(((p)->faddr & PTD_FA_MSK) >> 0)
+#define PTD_FA(v)		(((v) << 0) & PTD_FA_MSK)
+#define PTD_GET_SF_INT(p)	(((p)->faddr & PTD_SF_INT_MSK) >> 8)
+#define PTD_SF_INT(v)		(((v) << 8) & PTD_SF_INT_MSK)
+#define PTD_GET_SF_ISO(p)	(((p)->faddr & PTD_SF_ISO_MSK) >> 8)
+#define PTD_SF_ISO(v)		(((v) << 8) & PTD_SF_ISO_MSK)
+#define PTD_GET_PR(p)		(((p)->faddr & PTD_PR_MSK) >> 13)
+#define PTD_PR(v)		(((v) << 13) & PTD_PR_MSK)
+
+#define	LOG2_PERIODIC_SIZE	5	/* arbitrary; this matches OHCI */
+#define	PERIODIC_SIZE		(1 << LOG2_PERIODIC_SIZE)
+
+struct isp1362_ep {
+	struct usb_host_endpoint *hep;
+	struct usb_device	*udev;
+
+	/* philips transfer descriptor */
+	struct ptd		ptd;
+
+	u8			maxpacket;
+	u8			epnum;
+	u8			nextpid;
+	u16			error_count;
+	u16			length;		/* of current packet */
+	s16			ptd_offset;	/* buffer offset in ISP1362 where
+						   PTD has been stored
+						   (for access thru HCDIRDATA) */
+	int			ptd_index;
+	int num_ptds;
+	void 			*data;		/* to databuf */
+	/* queue of active EPs (the ones transmitted to the chip) */
+	struct list_head	active;
+
+	/* periodic schedule */
+	u8			branch;
+	u16			interval;
+	u16			load;
+	u16			last_iso;
+
+	/* async schedule */
+	struct list_head	schedule;	/* list of all EPs that need processing */
+	struct list_head	remove_list;
+	int			num_req;
+};
+
+struct isp1362_ep_queue {
+	struct list_head	active;		/* list of PTDs currently processed by HC */
+	atomic_t		finishing;
+	unsigned long		buf_map;
+	unsigned long		skip_map;
+	int			free_ptd;
+	u16			buf_start;
+	u16			buf_size;
+	u16			blk_size;	/* PTD buffer block size for ATL and INTL */
+	u8			buf_count;
+	u8			buf_avail;
+	char			name[16];
+
+	/* for statistical tracking */
+	u8			stat_maxptds;	/* Max # of ptds seen simultaneously in fifo */
+	u8			ptd_count;	/* number of ptds submitted to this queue */
+};
+
+struct isp1362_hcd {
+	spinlock_t		lock;
+	void __iomem		*addr_reg;
+	void __iomem		*data_reg;
+
+	struct isp1362_platform_data *board;
+
+	struct proc_dir_entry	*pde;
+	unsigned long		stat1, stat2, stat4, stat8, stat16;
+
+	/* HC registers */
+	u32			intenb;		/* "OHCI" interrupts */
+	u16			irqenb;		/* uP interrupts */
+
+	/* Root hub registers */
+	u32			rhdesca;
+	u32			rhdescb;
+	u32			rhstatus;
+	u32			rhport[MAX_ROOT_PORTS];
+	unsigned long		next_statechange;
+
+	/* HC control reg shadow copy */
+	u32			hc_control;
+
+	/* async schedule: control, bulk */
+	struct list_head	async;
+
+	/* periodic schedule: int */
+	u16			load[PERIODIC_SIZE];
+	struct list_head	periodic;
+	u16			fmindex;
+
+	/* periodic schedule: isochronous */
+	struct list_head	isoc;
+	int			istl_flip:1;
+	int			irq_active:1;
+
+	/* Schedules for the current frame */
+	struct isp1362_ep_queue atl_queue;
+	struct isp1362_ep_queue intl_queue;
+	struct isp1362_ep_queue istl_queue[2];
+
+	/* list of PTDs retrieved from HC */
+	struct list_head	remove_list;
+	enum {
+		ISP1362_INT_SOF,
+		ISP1362_INT_ISTL0,
+		ISP1362_INT_ISTL1,
+		ISP1362_INT_EOT,
+		ISP1362_INT_OPR,
+		ISP1362_INT_SUSP,
+		ISP1362_INT_CLKRDY,
+		ISP1362_INT_INTL,
+		ISP1362_INT_ATL,
+		ISP1362_INT_OTG,
+		NUM_ISP1362_IRQS
+	} IRQ_NAMES;
+	unsigned int		irq_stat[NUM_ISP1362_IRQS];
+	int			req_serial;
+};
+
+static inline const char *ISP1362_INT_NAME(int n)
+{
+	switch (n) {
+	case ISP1362_INT_SOF:    return "SOF";
+	case ISP1362_INT_ISTL0:  return "ISTL0";
+	case ISP1362_INT_ISTL1:  return "ISTL1";
+	case ISP1362_INT_EOT:    return "EOT";
+	case ISP1362_INT_OPR:    return "OPR";
+	case ISP1362_INT_SUSP:   return "SUSP";
+	case ISP1362_INT_CLKRDY: return "CLKRDY";
+	case ISP1362_INT_INTL:   return "INTL";
+	case ISP1362_INT_ATL:    return "ATL";
+	case ISP1362_INT_OTG:    return "OTG";
+	default:                 return "unknown";
+	}
+}
+
+static inline void ALIGNSTAT(struct isp1362_hcd *isp1362_hcd, void *ptr)
+{
+	unsigned p = (unsigned)ptr;
+	if (!(p & 0xf))
+		isp1362_hcd->stat16++;
+	else if (!(p & 0x7))
+		isp1362_hcd->stat8++;
+	else if (!(p & 0x3))
+		isp1362_hcd->stat4++;
+	else if (!(p & 0x1))
+		isp1362_hcd->stat2++;
+	else
+		isp1362_hcd->stat1++;
+}
+
+static inline struct isp1362_hcd *hcd_to_isp1362_hcd(struct usb_hcd *hcd)
+{
+	return (struct isp1362_hcd *) (hcd->hcd_priv);
+}
+
+static inline struct usb_hcd *isp1362_hcd_to_hcd(struct isp1362_hcd *isp1362_hcd)
+{
+	return container_of((void *)isp1362_hcd, struct usb_hcd, hcd_priv);
+}
+
+#define frame_before(f1, f2)	((s16)((u16)f1 - (u16)f2) < 0)
+
+/*
+ * ISP1362 HW Interface
+ */
+
+#ifdef ISP1362_DEBUG
+#define DBG(level, fmt...) \
+	do { \
+		if (dbg_level > level) \
+			pr_debug(fmt); \
+	} while (0)
+#define _DBG(level, fmt...)	\
+	do { \
+		if (dbg_level > level) \
+			printk(fmt); \
+	} while (0)
+#else
+#define DBG(fmt...)		do {} while (0)
+#define _DBG DBG
+#endif
+
+#ifdef VERBOSE
+#    define VDBG(fmt...)	DBG(3, fmt)
+#else
+#    define VDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef REGISTERS
+#    define RDBG(fmt...)	DBG(1, fmt)
+#else
+#    define RDBG(fmt...)	do {} while (0)
+#endif
+
+#ifdef URB_TRACE
+#define URB_DBG(fmt...)		DBG(0, fmt)
+#else
+#define URB_DBG(fmt...)		do {} while (0)
+#endif
+
+
+#if USE_PLATFORM_DELAY
+#if USE_NDELAY
+#error USE_PLATFORM_DELAY and USE_NDELAY defined simultaneously.
+#endif
+#define	isp1362_delay(h, d)	(h)->board->delay(isp1362_hcd_to_hcd(h)->self.controller, d)
+#elif USE_NDELAY
+#define	isp1362_delay(h, d)	ndelay(d)
+#else
+#define	isp1362_delay(h, d)	do {} while (0)
+#endif
+
+#define get_urb(ep) ({							\
+	BUG_ON(list_empty(&ep->hep->urb_list));				\
+	container_of(ep->hep->urb_list.next, struct urb, urb_list);	\
+})
+
+/* basic access functions for ISP1362 chip registers */
+/* NOTE: The contents of the address pointer register cannot be read back! The driver must ensure,
+ * that all register accesses are performed with interrupts disabled, since the interrupt
+ * handler has no way of restoring the previous state.
+ */
+static void isp1362_write_addr(struct isp1362_hcd *isp1362_hcd, isp1362_reg_t reg)
+{
+	/*_BUG_ON((reg & ISP1362_REG_WRITE_OFFSET) && !(reg & REG_ACCESS_W));*/
+	REG_ACCESS_TEST(reg);
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(ISP1362_REG_NO(reg), isp1362_hcd->addr_reg);
+	DUMMY_DELAY_ACCESS;
+	isp1362_delay(isp1362_hcd, 1);
+}
+
+static void isp1362_write_data16(struct isp1362_hcd *isp1362_hcd, u16 val)
+{
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	writew(val, isp1362_hcd->data_reg);
+}
+
+static u16 isp1362_read_data16(struct isp1362_hcd *isp1362_hcd)
+{
+	u16 val;
+
+	_BUG_ON(!irqs_disabled());
+	DUMMY_DELAY_ACCESS;
+	val = readw(isp1362_hcd->data_reg);
+
+	return val;
+}
+
+static void isp1362_write_data32(struct isp1362_hcd *isp1362_hcd, u32 val)
+{
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	writel(val, isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	writew((u16)val, isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	writew(val >> 16, isp1362_hcd->data_reg);
+#endif
+}
+
+static u32 isp1362_read_data32(struct isp1362_hcd *isp1362_hcd)
+{
+	u32 val;
+
+	_BUG_ON(!irqs_disabled());
+#if USE_32BIT
+	DUMMY_DELAY_ACCESS;
+	val = readl(isp1362_hcd->data_reg);
+#else
+	DUMMY_DELAY_ACCESS;
+	val = (u32)readw(isp1362_hcd->data_reg);
+	DUMMY_DELAY_ACCESS;
+	val |= (u32)readw(isp1362_hcd->data_reg) << 16;
+#endif
+	return val;
+}
+
+/* use readsw/writesw to access the fifo whenever possible */
+/* assume HCDIRDATA or XFERCTR & addr_reg have been set up */
+static void isp1362_read_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Reading %d byte from fifo to mem @ %p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using readsl for %d dwords\n", __func__, len >> 2);
+		readsl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using readsw for %d words\n", __func__, len >> 1);
+		insw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		data = isp1362_read_data16(isp1362_hcd);
+		RDBG("%s: Reading trailing byte %02x to mem @ %08x\n", __func__,
+		     (u8)data, (u32)dp);
+		*dp = (u8)data;
+	}
+}
+
+static void isp1362_write_fifo(struct isp1362_hcd *isp1362_hcd, void *buf, u16 len)
+{
+	u8 *dp = buf;
+	u16 data;
+
+	if (!len)
+		return;
+
+	if ((unsigned)dp & 0x1) {
+		/* not aligned */
+		for (; len > 1; len -= 2) {
+			data = *dp++;
+			data |= *dp++ << 8;
+			isp1362_write_data16(isp1362_hcd, data);
+		}
+		if (len)
+			isp1362_write_data16(isp1362_hcd, *dp);
+		return;
+	}
+
+	_BUG_ON(!irqs_disabled());
+
+	RDBG("%s: Writing %d byte to fifo from memory @%p\n", __func__, len, buf);
+#if USE_32BIT
+	if (len >= 4) {
+		RDBG("%s: Using writesl for %d dwords\n", __func__, len >> 2);
+		writesl(isp1362_hcd->data_reg, dp, len >> 2);
+		dp += len & ~3;
+		len &= 3;
+	}
+#endif
+	if (len >= 2) {
+		RDBG("%s: Using writesw for %d words\n", __func__, len >> 1);
+		outsw((unsigned long)isp1362_hcd->data_reg, dp, len >> 1);
+		dp += len & ~1;
+		len &= 1;
+	}
+
+	BUG_ON(len & ~1);
+	if (len > 0) {
+		/* finally write any trailing byte; we don't need to care
+		 * about the high byte of the last word written
+		 */
+		data = (u16)*dp;
+		RDBG("%s: Sending trailing byte %02x from mem @ %08x\n", __func__,
+			data, (u32)dp);
+		isp1362_write_data16(isp1362_hcd, data);
+	}
+}
+
+#define isp1362_read_reg16(d, r)		({			\
+	u16 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data16(d);					\
+	RDBG("%s: Read %04x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_read_reg32(d, r)		({			\
+	u32 __v;							\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);			\
+	isp1362_write_addr(d, ISP1362_REG_##r);				\
+	__v = isp1362_read_data32(d);					\
+	RDBG("%s: Read %08x from %s[%02x]\n", __func__, __v, #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));				\
+	__v;								\
+})
+
+#define isp1362_write_reg16(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_16);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data16(d, (u16)(v));					\
+	RDBG("%s: Wrote %04x to %s[%02x]\n", __func__, (u16)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_write_reg32(d, r, v)	{					\
+	REG_WIDTH_TEST(ISP1362_REG_##r, REG_WIDTH_32);				\
+	isp1362_write_addr(d, (ISP1362_REG_##r) | ISP1362_REG_WRITE_OFFSET);	\
+	isp1362_write_data32(d, (u32)(v));					\
+	RDBG("%s: Wrote %08x to %s[%02x]\n", __func__, (u32)(v), #r,	\
+	     ISP1362_REG_NO(ISP1362_REG_##r));					\
+}
+
+#define isp1362_set_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg16(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask16(d, r, m) {			\
+	u16 __v;					\
+	__v = isp1362_read_reg16(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg16(d, r, __v & ~m);	\
+}
+
+#define isp1362_set_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v | m) != __v)				\
+		isp1362_write_reg32(d, r, __v | m);	\
+}
+
+#define isp1362_clr_mask32(d, r, m) {			\
+	u32 __v;					\
+	__v = isp1362_read_reg32(d, r);			\
+	if ((__v & ~m) != __v)			\
+		isp1362_write_reg32(d, r, __v & ~m);	\
+}
+
+#ifdef ISP1362_DEBUG
+#define isp1362_show_reg(d, r) {								\
+	if ((ISP1362_REG_##r & REG_WIDTH_MASK) == REG_WIDTH_32)			\
+		DBG(0, "%-12s[%02x]: %08x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg32(d, r));	\
+	else									\
+		DBG(0, "%-12s[%02x]:     %04x\n", #r,					\
+			ISP1362_REG_NO(ISP1362_REG_##r), isp1362_read_reg16(d, r));	\
+}
+#else
+#define isp1362_show_reg(d, r)	do {} while (0)
+#endif
+
+static void __attribute__((__unused__)) isp1362_show_regs(struct isp1362_hcd *isp1362_hcd)
+{
+	isp1362_show_reg(isp1362_hcd, HCREVISION);
+	isp1362_show_reg(isp1362_hcd, HCCONTROL);
+	isp1362_show_reg(isp1362_hcd, HCCMDSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTSTAT);
+	isp1362_show_reg(isp1362_hcd, HCINTENB);
+	isp1362_show_reg(isp1362_hcd, HCFMINTVL);
+	isp1362_show_reg(isp1362_hcd, HCFMREM);
+	isp1362_show_reg(isp1362_hcd, HCFMNUM);
+	isp1362_show_reg(isp1362_hcd, HCLSTHRESH);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCA);
+	isp1362_show_reg(isp1362_hcd, HCRHDESCB);
+	isp1362_show_reg(isp1362_hcd, HCRHSTATUS);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT1);
+	isp1362_show_reg(isp1362_hcd, HCRHPORT2);
+
+	isp1362_show_reg(isp1362_hcd, HCHWCFG);
+	isp1362_show_reg(isp1362_hcd, HCDMACFG);
+	isp1362_show_reg(isp1362_hcd, HCXFERCTR);
+	isp1362_show_reg(isp1362_hcd, HCuPINT);
+
+	if (in_interrupt())
+		DBG(0, "%-12s[%02x]:     %04x\n", "HCuPINTENB",
+			 ISP1362_REG_NO(ISP1362_REG_HCuPINTENB), isp1362_hcd->irqenb);
+	else
+		isp1362_show_reg(isp1362_hcd, HCuPINTENB);
+	isp1362_show_reg(isp1362_hcd, HCCHIPID);
+	isp1362_show_reg(isp1362_hcd, HCSCRATCH);
+	isp1362_show_reg(isp1362_hcd, HCBUFSTAT);
+	isp1362_show_reg(isp1362_hcd, HCDIRADDR);
+	/* Access would advance fifo
+	 * isp1362_show_reg(isp1362_hcd, HCDIRDATA);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCISTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCISTLRATE);
+	isp1362_show_reg(isp1362_hcd, HCINTLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLBLKSZ);
+	isp1362_show_reg(isp1362_hcd, HCINTLDONE);
+	isp1362_show_reg(isp1362_hcd, HCINTLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCINTLLAST);
+	isp1362_show_reg(isp1362_hcd, HCINTLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLBUFSZ);
+	isp1362_show_reg(isp1362_hcd, HCATLBLKSZ);
+	/* only valid after ATL_DONE interrupt
+	 * isp1362_show_reg(isp1362_hcd, HCATLDONE);
+	 */
+	isp1362_show_reg(isp1362_hcd, HCATLSKIP);
+	isp1362_show_reg(isp1362_hcd, HCATLLAST);
+	isp1362_show_reg(isp1362_hcd, HCATLCURR);
+	isp1362_show_reg(isp1362_hcd, HCATLDTC);
+	isp1362_show_reg(isp1362_hcd, HCATLDTCTO);
+}
+
+static void isp1362_write_diraddr(struct isp1362_hcd *isp1362_hcd, u16 offset, u16 len)
+{
+	_BUG_ON(offset & 1);
+	_BUG_ON(offset >= ISP1362_BUF_SIZE);
+	_BUG_ON(len > ISP1362_BUF_SIZE);
+	_BUG_ON(offset + len > ISP1362_BUF_SIZE);
+	len = (len + 1) & ~1;
+
+	isp1362_clr_mask16(isp1362_hcd, HCDMACFG, HCDMACFG_CTR_ENABLE);
+	isp1362_write_reg32(isp1362_hcd, HCDIRADDR,
+			    HCDIRADDR_ADDR(offset) | HCDIRADDR_COUNT(len));
+}
+
+static void isp1362_read_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Reading %d byte from buffer @%04x to memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA);
+
+	isp1362_read_fifo(isp1362_hcd, buf, len);
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void isp1362_write_buffer(struct isp1362_hcd *isp1362_hcd, void *buf, u16 offset, int len)
+{
+	_BUG_ON(offset & 1);
+
+	isp1362_write_diraddr(isp1362_hcd, offset, len);
+
+	DBG(3, "%s: Writing %d byte to buffer @%04x from memory @ %08x\n", __func__,
+	    len, offset, (u32)buf);
+
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+
+	isp1362_write_addr(isp1362_hcd, ISP1362_REG_HCDIRDATA | ISP1362_REG_WRITE_OFFSET);
+	isp1362_write_fifo(isp1362_hcd, buf, len);
+
+	_WARN_ON(!(isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+	isp1362_write_reg16(isp1362_hcd, HCuPINT, HCuPINT_EOT);
+	_WARN_ON((isp1362_read_reg16(isp1362_hcd, HCuPINT) & HCuPINT_EOT));
+}
+
+static void __attribute__((unused)) dump_data(char *buf, int len)
+{
+	if (dbg_level > 0) {
+		int k;
+		int lf = 0;
+
+		for (k = 0; k < len; ++k) {
+			if (!lf)
+				DBG(0, "%04x:", k);
+			printk(" %02x", ((u8 *) buf)[k]);
+			lf = 1;
+			if (!k)
+				continue;
+			if (k % 16 == 15) {
+				printk("\n");
+				lf = 0;
+				continue;
+			}
+			if (k % 8 == 7)
+				printk(" ");
+			if (k % 4 == 3)
+				printk(" ");
+		}
+		if (lf)
+			printk("\n");
+	}
+}
+
+#if defined(ISP1362_DEBUG) && defined(PTD_TRACE)
+
+static void dump_ptd(struct ptd *ptd)
+{
+	DBG(0, "EP %p: CC=%x EP=%d DIR=%x CNT=%d LEN=%d MPS=%d TGL=%x ACT=%x FA=%d SPD=%x SF=%x PR=%x LST=%x\n",
+	    container_of(ptd, struct isp1362_ep, ptd),
+	    PTD_GET_CC(ptd), PTD_GET_EP(ptd), PTD_GET_DIR(ptd),
+	    PTD_GET_COUNT(ptd), PTD_GET_LEN(ptd), PTD_GET_MPS(ptd),
+	    PTD_GET_TOGGLE(ptd), PTD_GET_ACTIVE(ptd), PTD_GET_FA(ptd),
+	    PTD_GET_SPD(ptd), PTD_GET_SF_INT(ptd), PTD_GET_PR(ptd), PTD_GET_LAST(ptd));
+	DBG(0, "  %04x %04x %04x %04x\n", ptd->count, ptd->mps, ptd->len, ptd->faddr);
+}
+
+static void dump_ptd_out_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) != PTD_DIR_IN && PTD_GET_LEN(ptd)) {
+			DBG(0, "--out->\n");
+			dump_data(buf, PTD_GET_LEN(ptd));
+		}
+	}
+}
+
+static void dump_ptd_in_data(struct ptd *ptd, u8 *buf)
+{
+	if (dbg_level > 0) {
+		if (PTD_GET_DIR(ptd) == PTD_DIR_IN && PTD_GET_COUNT(ptd)) {
+			DBG(0, "<--in--\n");
+			dump_data(buf, PTD_GET_COUNT(ptd));
+		}
+		DBG(0, "-----\n");
+	}
+}
+
+static void dump_ptd_queue(struct isp1362_ep_queue *epq)
+{
+	struct isp1362_ep *ep;
+	int dbg = dbg_level;
+
+	dbg_level = 1;
+	list_for_each_entry(ep, &epq->active, active) {
+		dump_ptd(&ep->ptd);
+		dump_data(ep->data, ep->length);
+	}
+	dbg_level = dbg;
+}
+#else
+#define dump_ptd(ptd)			do {} while (0)
+#define dump_ptd_in_data(ptd, buf)	do {} while (0)
+#define dump_ptd_out_data(ptd, buf)	do {} while (0)
+#define dump_ptd_data(ptd, buf)		do {} while (0)
+#define dump_ptd_queue(epq)		do {} while (0)
+#endif
diff --git a/include/linux/usb/isp1362.h b/include/linux/usb/isp1362.h
new file mode 100644
index 000000000000..642684bb9292
--- /dev/null
+++ b/include/linux/usb/isp1362.h
@@ -0,0 +1,46 @@
+/*
+ * board initialization code should put one of these into dev->platform_data
+ * and place the isp1362 onto platform_bus.
+ */
+
+#ifndef __LINUX_USB_ISP1362_H__
+#define __LINUX_USB_ISP1362_H__
+
+struct isp1362_platform_data {
+	/* Enable internal pulldown resistors on downstream ports */
+	unsigned sel15Kres:1;
+	/* Clock cannot be stopped */
+	unsigned clknotstop:1;
+	/* On-chip overcurrent protection */
+	unsigned oc_enable:1;
+	/* INT output polarity */
+	unsigned int_act_high:1;
+	/* INT edge or level triggered */
+	unsigned int_edge_triggered:1;
+	/* DREQ output polarity */
+	unsigned dreq_act_high:1;
+	/* DACK input polarity */
+	unsigned dack_act_high:1;
+	/* chip can be resumed via H_WAKEUP pin */
+	unsigned remote_wakeup_connected:1;
+	/* Switch or not to switch (keep always powered) */
+	unsigned no_power_switching:1;
+	/* Ganged port power switching (0) or individual port power switching (1) */
+	unsigned power_switching_mode:1;
+	/* Given port_power, msec/2 after power on till power good */
+	u8 potpg;
+	/* Hardware reset set/clear */
+	void (*reset) (struct device *dev, int set);
+	/* Clock start/stop */
+	void (*clock) (struct device *dev, int start);
+	/* Inter-io delay (ns). The chip is picky about access timings; it
+	 * expects at least:
+	 * 110ns delay between consecutive accesses to DATA_REG,
+	 * 300ns delay between access to ADDR_REG and DATA_REG (registers)
+	 * 462ns delay between access to ADDR_REG and DATA_REG (buffer memory)
+	 * WE MUST NOT be activated during these intervals (even without CS!)
+	 */
+	void (*delay) (struct device *dev, unsigned int delay);
+};
+
+#endif
-- 
cgit v1.2.3


From c35013087aa9b10e4674b53b7c8f7966de83c194 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 29 Jul 2009 14:23:25 +0300
Subject: USB: audio: guard kernel-only code with __KERNEL__

include/linux/usb/audio.h is exported to userspace,
so part of this file that is for internal kernel
usage need to be guarded with ifdef __KERNEL__.
This way make headers_install will stript it out.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/audio.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 7b33c493917f..eaf9dffe0a01 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -276,6 +276,8 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
 #define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
+#ifdef __KERNEL__
+
 struct usb_audio_control {
 	struct list_head list;
 	const char *name;
@@ -294,4 +296,6 @@ struct usb_audio_control_selector {
 	struct usb_descriptor_header *desc;
 };
 
+#endif /* __KERNEL__ */
+
 #endif /* __LINUX_USB_AUDIO_H */
-- 
cgit v1.2.3


From 8e8dce065088833fc418bfa5fbf035cb0726c04c Mon Sep 17 00:00:00 2001
From: David VomLehn <dvomlehn@cisco.com>
Date: Fri, 28 Aug 2009 12:54:27 -0700
Subject: USB: use kfifo to buffer usb-generic serial writes

When do_output_char() attempts to write a carriage return/line feed sequence,
it first checks to see how much buffer room is available. If there are at least
two characters free, it will write the carriage return/line feed with two calls
to tty_put_char(). It calls the tty_operation functions write() for devices that
don't support the tty_operations function put_char(). If the USB generic serial
device's write URB is not in use, it will return the buffer size when asked how
much room is available. The write() of the carriage return will cause it to mark
the write URB busy, so the subsequent write() of the line feed will be ignored.

This patch uses the kfifo infrastructure to implement a write FIFO that
accurately returns the amount of space available in the buffer.

Signed-off-by: David VomLehn <dvomlehn@cisco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 206 ++++++++++++++++++++++++----------------
 drivers/usb/serial/usb-serial.c |   7 ++
 include/linux/usb/serial.h      |   2 +
 3 files changed, 134 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index d9398e9f30ce..deba08c7a015 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -19,7 +19,7 @@
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
 #include <linux/uaccess.h>
-
+#include <linux/kfifo.h>
 
 static int debug;
 
@@ -166,24 +166,6 @@ static void generic_cleanup(struct usb_serial_port *port)
 	}
 }
 
-int usb_serial_generic_resume(struct usb_serial *serial)
-{
-	struct usb_serial_port *port;
-	int i, c = 0, r;
-
-	for (i = 0; i < serial->num_ports; i++) {
-		port = serial->port[i];
-		if (port->port.count && port->read_urb) {
-			r = usb_submit_urb(port->read_urb, GFP_NOIO);
-			if (r < 0)
-				c++;
-		}
-	}
-
-	return c ? -EIO : 0;
-}
-EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
-
 void usb_serial_generic_close(struct usb_serial_port *port)
 {
 	dbg("%s - port %d", __func__, port->number);
@@ -272,12 +254,81 @@ error_no_buffer:
 	return bwrite;
 }
 
+/**
+ * usb_serial_generic_write_start - kick off an URB write
+ * @port:	Pointer to the &struct usb_serial_port data
+ *
+ * Returns the number of bytes queued on success. This will be zero if there
+ * was nothing to send. Otherwise, it returns a negative errno value
+ */
+static int usb_serial_generic_write_start(struct usb_serial_port *port)
+{
+	struct usb_serial *serial = port->serial;
+	unsigned char *data;
+	int result;
+	int count;
+	unsigned long flags;
+	bool start_io;
+
+	/* Atomically determine whether we can and need to start a USB
+	 * operation. */
+	spin_lock_irqsave(&port->lock, flags);
+	if (port->write_urb_busy)
+		start_io = false;
+	else {
+		start_io = (__kfifo_len(port->write_fifo) != 0);
+		port->write_urb_busy = start_io;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (!start_io)
+		return 0;
+
+	data = port->write_urb->transfer_buffer;
+	count = kfifo_get(port->write_fifo, data, port->bulk_out_size);
+	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+
+	/* set up our urb */
+	usb_fill_bulk_urb(port->write_urb, serial->dev,
+			   usb_sndbulkpipe(serial->dev,
+				port->bulk_out_endpointAddress),
+			   port->write_urb->transfer_buffer, count,
+			   ((serial->type->write_bulk_callback) ?
+			     serial->type->write_bulk_callback :
+			     usb_serial_generic_write_bulk_callback),
+			   port);
+
+	/* send the data out the bulk port */
+	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
+	if (result) {
+		dev_err(&port->dev,
+			"%s - failed submitting write urb, error %d\n",
+						__func__, result);
+		/* don't have to grab the lock here, as we will
+		   retry if != 0 */
+		port->write_urb_busy = 0;
+	} else
+		result = count;
+
+	return result;
+}
+
+/**
+ * usb_serial_generic_write - generic write function for serial USB devices
+ * @tty:	Pointer to &struct tty_struct for the device
+ * @port:	Pointer to the &usb_serial_port structure for the device
+ * @buf:	Pointer to the data to write
+ * @count:	Number of bytes to write
+ *
+ * Returns the number of characters actually written, which may be anything
+ * from zero to @count. If an error occurs, it returns the negative errno
+ * value.
+ */
 int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
 	struct usb_serial *serial = port->serial;
 	int result;
-	unsigned char *data;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -287,57 +338,20 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	}
 
 	/* only do something if we have a bulk out endpoint */
-	if (serial->num_bulk_out) {
-		unsigned long flags;
-
-		if (serial->type->max_in_flight_urbs)
-			return usb_serial_multi_urb_write(tty, port,
-							  buf, count);
-
-		spin_lock_irqsave(&port->lock, flags);
-		if (port->write_urb_busy) {
-			spin_unlock_irqrestore(&port->lock, flags);
-			dbg("%s - already writing", __func__);
-			return 0;
-		}
-		port->write_urb_busy = 1;
-		spin_unlock_irqrestore(&port->lock, flags);
-
-		count = (count > port->bulk_out_size) ?
-					port->bulk_out_size : count;
-
-		memcpy(port->write_urb->transfer_buffer, buf, count);
-		data = port->write_urb->transfer_buffer;
-		usb_serial_debug_data(debug, &port->dev, __func__, count, data);
+	if (!serial->num_bulk_out)
+		return 0;
 
-		/* set up our urb */
-		usb_fill_bulk_urb(port->write_urb, serial->dev,
-				   usb_sndbulkpipe(serial->dev,
-					port->bulk_out_endpointAddress),
-				   port->write_urb->transfer_buffer, count,
-				   ((serial->type->write_bulk_callback) ?
-				     serial->type->write_bulk_callback :
-				     usb_serial_generic_write_bulk_callback),
-				   port);
+	if (serial->type->max_in_flight_urbs)
+		return usb_serial_multi_urb_write(tty, port,
+						  buf, count);
 
-		/* send the data out the bulk port */
-		port->write_urb_busy = 1;
-		result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-		if (result) {
-			dev_err(&port->dev,
-				"%s - failed submitting write urb, error %d\n",
-							__func__, result);
-			/* don't have to grab the lock here, as we will
-			   retry if != 0 */
-			port->write_urb_busy = 0;
-		} else
-			result = count;
+	count = kfifo_put(port->write_fifo, buf, count);
+	result = usb_serial_generic_write_start(port);
 
-		return result;
-	}
+	if (result >= 0)
+		result = count;
 
-	/* no bulk out, so return 0 bytes written */
-	return 0;
+	return result;
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write);
 
@@ -355,9 +369,8 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 			room = port->bulk_out_size *
 				(serial->type->max_in_flight_urbs -
 				 port->urbs_in_flight);
-	} else if (serial->num_bulk_out && !(port->write_urb_busy)) {
-		room = port->bulk_out_size;
-	}
+	} else if (serial->num_bulk_out)
+		room = port->write_fifo->size - __kfifo_len(port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -377,11 +390,8 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		spin_lock_irqsave(&port->lock, flags);
 		chars = port->tx_bytes_flight;
 		spin_unlock_irqrestore(&port->lock, flags);
-	} else if (serial->num_bulk_out) {
-		/* FIXME: Locking */
-		if (port->write_urb_busy)
-			chars = port->write_urb->transfer_buffer_length;
-	}
+	} else if (serial->num_bulk_out)
+		chars = kfifo_len(port->write_fifo);
 
 	dbg("%s - returns %d", __func__, chars);
 	return chars;
@@ -485,16 +495,23 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		if (port->urbs_in_flight < 0)
 			port->urbs_in_flight = 0;
 		spin_unlock_irqrestore(&port->lock, flags);
+
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			return;
+		}
 	} else {
-		/* Handle the case for single urb mode */
 		port->write_urb_busy = 0;
-	}
 
-	if (status) {
-		dbg("%s - nonzero write bulk status received: %d",
-		    __func__, status);
-		return;
+		if (status) {
+			dbg("%s - nonzero multi-urb write bulk status "
+				"received: %d", __func__, status);
+			kfifo_reset(port->write_fifo);
+		} else
+			usb_serial_generic_write_start(port);
 	}
+
 	usb_serial_port_softint(port);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback);
@@ -559,6 +576,33 @@ int usb_serial_handle_break(struct usb_serial_port *port)
 }
 EXPORT_SYMBOL_GPL(usb_serial_handle_break);
 
+int usb_serial_generic_resume(struct usb_serial *serial)
+{
+	struct usb_serial_port *port;
+	int i, c = 0, r;
+
+	for (i = 0; i < serial->num_ports; i++) {
+		port = serial->port[i];
+		if (!port->port.count)
+			continue;
+
+		if (port->read_urb) {
+			r = usb_submit_urb(port->read_urb, GFP_NOIO);
+			if (r < 0)
+				c++;
+		}
+
+		if (port->write_urb) {
+			r = usb_serial_generic_write_start(port);
+			if (r < 0)
+				c++;
+		}
+	}
+
+	return c ? -EIO : 0;
+}
+EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
+
 void usb_serial_generic_disconnect(struct usb_serial *serial)
 {
 	int i;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 45975b4984ea..ff75a3589e7e 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -35,6 +35,7 @@
 #include <linux/serial.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
+#include <linux/kfifo.h>
 #include "pl2303.h"
 
 /*
@@ -625,6 +626,8 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
+	if (!IS_ERR(port->write_fifo) && port->write_fifo)
+		kfifo_free(port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
 	kfree(port->interrupt_in_buffer);
@@ -964,6 +967,10 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
+		port->write_fifo = kfifo_alloc(PAGE_SIZE, GFP_KERNEL,
+			&port->lock);
+		if (IS_ERR(port->write_fifo))
+			goto probe_error;
 		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 7b85e327af91..c17eb64d7213 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -59,6 +59,7 @@ enum port_dev_state {
  * @bulk_out_buffer: pointer to the bulk out buffer for this port.
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
+ * @write_fifo: kfifo used to buffer outgoing data
  * @write_urb_busy: port`s writing status
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
@@ -96,6 +97,7 @@ struct usb_serial_port {
 	unsigned char		*bulk_out_buffer;
 	int			bulk_out_size;
 	struct urb		*write_urb;
+	struct kfifo		*write_fifo;
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-- 
cgit v1.2.3


From df6c516900d48df3581b23d37d6516a22ec4f2ca Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:48 -0500
Subject: USB: ehci,dbgp,early_printk: split ehci debug driver from
 early_printk.c

Move the dbgp early printk driver in advance of refactoring and adding
new code, so the changes to this code are tracked separately from the
move of the code.

The drivers/usb/early directory will be the location of the current
and future early usb code for driving usb devices prior initializing
the standard interrupt driven USB drivers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/x86/kernel/early_printk.c | 715 ----------------------------------------
 drivers/usb/Makefile           |   1 +
 drivers/usb/early/Makefile     |   5 +
 drivers/usb/early/ehci-dbgp.c  | 723 +++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/ehci_def.h   |   6 +
 5 files changed, 735 insertions(+), 715 deletions(-)
 create mode 100644 drivers/usb/early/Makefile
 create mode 100644 drivers/usb/early/ehci-dbgp.c

(limited to 'include')

diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index b11cab3c323a..519a5e10be53 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -160,721 +160,6 @@ static struct console early_serial_console = {
 	.index =	-1,
 };
 
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
-	u32 bus;
-	u32 slot;
-	u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE	0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
-	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
-	return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT		0xe1
-#define USB_PID_IN		0x69
-#define USB_PID_SOF		0xa5
-#define USB_PID_SETUP		0x2d
-/* handshake */
-#define USB_PID_ACK		0xd2
-#define USB_PID_NAK		0x5a
-#define USB_PID_STALL		0x1e
-#define USB_PID_NYET		0x96
-/* data */
-#define USB_PID_DATA0		0xc3
-#define USB_PID_DATA1		0x4b
-#define USB_PID_DATA2		0x87
-#define USB_PID_MDATA		0x0f
-/* Special */
-#define USB_PID_PREAMBLE	0x3c
-#define USB_PID_ERR		0x3c
-#define USB_PID_SPLIT		0x78
-#define USB_PID_PING		0xb4
-#define USB_PID_UNDEF_0		0xf0
-
-#define USB_PID_DATA_TOGGLE	0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG	0xa
-
-#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
-#define HUB_SHORT_RESET_TIME	10
-#define HUB_LONG_RESET_TIME	200
-#define HUB_RESET_TIMEOUT	500
-
-#define DBGP_MAX_PACKET		8
-
-static int dbgp_wait_until_complete(void)
-{
-	u32 ctrl;
-	int loop = 0x100000;
-
-	do {
-		ctrl = readl(&ehci_debug->control);
-		/* Stop when the transaction is finished */
-		if (ctrl & DBGP_DONE)
-			break;
-	} while (--loop > 0);
-
-	if (!loop)
-		return -1;
-
-	/*
-	 * Now that we have observed the completed transaction,
-	 * clear the done bit.
-	 */
-	writel(ctrl | DBGP_DONE, &ehci_debug->control);
-	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
-	int i;
-
-	while (ms--) {
-		for (i = 0; i < 1000; i++)
-			outb(0x1, 0x80);
-	}
-}
-
-static void dbgp_breath(void)
-{
-	/* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
-	u32 pids, lpid;
-	int ret;
-	int loop = 3;
-
-retry:
-	writel(ctrl | DBGP_GO, &ehci_debug->control);
-	ret = dbgp_wait_until_complete();
-	pids = readl(&ehci_debug->pids);
-	lpid = DBGP_PID_GET(pids);
-
-	if (ret < 0)
-		return ret;
-
-	/*
-	 * If the port is getting full or it has dropped data
-	 * start pacing ourselves, not necessary but it's friendly.
-	 */
-	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
-		dbgp_breath();
-
-	/* If I get a NACK reissue the transmission */
-	if (lpid == USB_PID_NAK) {
-		if (--loop > 0)
-			goto retry;
-	}
-
-	return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
-	const unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = hi = 0;
-	for (i = 0; i < 4 && i < size; i++)
-		lo |= bytes[i] << (8*i);
-	for (; i < 8 && i < size; i++)
-		hi |= bytes[i] << (8*(i - 4));
-	writel(lo, &ehci_debug->data03);
-	writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
-	unsigned char *bytes = buf;
-	u32 lo, hi;
-	int i;
-
-	lo = readl(&ehci_debug->data03);
-	hi = readl(&ehci_debug->data47);
-	for (i = 0; i < 4 && i < size; i++)
-		bytes[i] = (lo >> (8*i)) & 0xff;
-	for (; i < 8 && i < size; i++)
-		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
-			 const char *bytes, int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_OUT);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	dbgp_set_data(bytes, size);
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
-				 int size)
-{
-	u32 pids, addr, ctrl;
-	int ret;
-
-	if (size > DBGP_MAX_PACKET)
-		return -1;
-
-	addr = DBGP_EPADDR(devnum, endpoint);
-
-	pids = readl(&ehci_debug->pids);
-	pids = dbgp_pid_update(pids, USB_PID_IN);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, size);
-	ctrl &= ~DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	if (size > ret)
-		size = ret;
-	dbgp_get_data(data, size);
-	return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
-	int request, int value, int index, void *data, int size)
-{
-	u32 pids, addr, ctrl;
-	struct usb_ctrlrequest req;
-	int read;
-	int ret;
-
-	read = (requesttype & USB_DIR_IN) != 0;
-	if (size > (read ? DBGP_MAX_PACKET:0))
-		return -1;
-
-	/* Compute the control message */
-	req.bRequestType = requesttype;
-	req.bRequest = request;
-	req.wValue = cpu_to_le16(value);
-	req.wIndex = cpu_to_le16(index);
-	req.wLength = cpu_to_le16(size);
-
-	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
-	addr = DBGP_EPADDR(devnum, 0);
-
-	ctrl = readl(&ehci_debug->control);
-	ctrl = dbgp_len_update(ctrl, sizeof(req));
-	ctrl |= DBGP_OUT;
-	ctrl |= DBGP_GO;
-
-	/* Send the setup message */
-	dbgp_set_data(&req, sizeof(req));
-	writel(addr, &ehci_debug->address);
-	writel(pids, &ehci_debug->pids);
-	ret = dbgp_wait_until_done(ctrl);
-	if (ret < 0)
-		return ret;
-
-	/* Read the result */
-	return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
-	u8 pos;
-	int bytes;
-
-	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
-		PCI_STATUS_CAP_LIST))
-		return 0;
-
-	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
-	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
-		u8 id;
-
-		pos &= ~3;
-		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
-		if (id == 0xff)
-			break;
-		if (id == cap)
-			return pos;
-
-		pos = read_pci_config_byte(num, slot, func,
-						 pos+PCI_CAP_LIST_NEXT);
-	}
-	return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
-	u32 class;
-
-	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
-	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
-		return 0;
-
-	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
-	u32 bus, slot, func;
-
-	for (bus = 0; bus < 256; bus++) {
-		for (slot = 0; slot < 32; slot++) {
-			for (func = 0; func < 8; func++) {
-				unsigned cap;
-
-				cap = __find_dbgp(bus, slot, func);
-
-				if (!cap)
-					continue;
-				if (ehci_num-- != 0)
-					continue;
-				*rbus = bus;
-				*rslot = slot;
-				*rfunc = func;
-				return cap;
-			}
-		}
-	}
-	return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
-	u32 portsc;
-	u32 delay_time, delay;
-	int loop;
-
-	/* Reset the usb debug port */
-	portsc = readl(&ehci_regs->port_status[port - 1]);
-	portsc &= ~PORT_PE;
-	portsc |= PORT_RESET;
-	writel(portsc, &ehci_regs->port_status[port - 1]);
-
-	delay = HUB_ROOT_RESET_TIME;
-	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
-	     delay_time += delay) {
-		dbgp_mdelay(delay);
-
-		portsc = readl(&ehci_regs->port_status[port - 1]);
-		if (portsc & PORT_RESET) {
-			/* force reset to complete */
-			loop = 2;
-			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
-				&ehci_regs->port_status[port - 1]);
-			do {
-				portsc = readl(&ehci_regs->port_status[port-1]);
-			} while ((portsc & PORT_RESET) && (--loop > 0));
-		}
-
-		/* Device went away? */
-		if (!(portsc & PORT_CONNECT))
-			return -ENOTCONN;
-
-		/* bomb out completely if something weird happend */
-		if ((portsc & PORT_CSC))
-			return -EINVAL;
-
-		/* If we've finished resetting, then break out of the loop */
-		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
-			return 0;
-	}
-	return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
-	u32 status;
-	int ret, reps;
-
-	for (reps = 0; reps < 3; reps++) {
-		dbgp_mdelay(100);
-		status = readl(&ehci_regs->status);
-		if (status & STS_PCD) {
-			ret = ehci_reset_port(port);
-			if (ret == 0)
-				return 0;
-		}
-	}
-	return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
-	u32 dword;
-	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-				 0x74);
-	dword &= ~(0x0f<<12);
-	dword |= ((port & 0x0f)<<12);
-	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
-				 dword);
-	dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
-	u32 vendorid;
-
-	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
-		 0x00);
-
-	if ((vendorid & 0xffff) == 0x10de) {
-		dbgp_printk("using nvidia set_debug_port\n");
-		set_debug_port = nvidia_set_debug_port;
-	}
-}
-
-static int __init ehci_setup(void)
-{
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
-	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
-	int ret, i;
-	int loop;
-	int port_map_tried;
-	int playtimes = 3;
-
-try_next_time:
-	port_map_tried = 0;
-
-try_next_port:
-
-	hcs_params = readl(&ehci_caps->hcs_params);
-	debug_port = HCS_DEBUG_PORT(hcs_params);
-	n_ports    = HCS_N_PORTS(hcs_params);
-
-	dbgp_printk("debug_port: %d\n", debug_port);
-	dbgp_printk("n_ports:    %d\n", n_ports);
-
-	for (i = 1; i <= n_ports; i++) {
-		portsc = readl(&ehci_regs->port_status[i-1]);
-		dbgp_printk("portstatus%d: %08x\n", i, portsc);
-	}
-
-	if (port_map_tried && (new_debug_port != debug_port)) {
-		if (--playtimes) {
-			set_debug_port(new_debug_port);
-			goto try_next_time;
-		}
-		return -1;
-	}
-
-	loop = 100000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-	} while ((status & STS_HALT) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("ehci can be started\n");
-		return -1;
-	}
-	dbgp_printk("ehci started\n");
-
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
-		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
-
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
-	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
-	}
-	dbgp_printk("small write doned\n");
-
-	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
-
-next_debug_port:
-	port_map_tried |= (1<<(debug_port - 1));
-	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
-	if (port_map_tried != ((1<<n_ports) - 1)) {
-		set_debug_port(new_debug_port);
-		goto try_next_port;
-	}
-	if (--playtimes) {
-		set_debug_port(new_debug_port);
-		goto try_next_time;
-	}
-
-	return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
-	u32 debug_port, bar, offset;
-	u32 bus, slot, func, cap;
-	void __iomem *ehci_bar;
-	u32 dbgp_num;
-	u32 bar_val;
-	char *e;
-	int ret;
-	u8 byte;
-
-	if (!early_pci_allowed())
-		return -1;
-
-	dbgp_num = 0;
-	if (*s)
-		dbgp_num = simple_strtoul(s, &e, 10);
-	dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
-	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
-	if (!cap)
-		return -1;
-
-	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
-			 func);
-
-	debug_port = read_pci_config(bus, slot, func, cap);
-	bar = (debug_port >> 29) & 0x7;
-	bar = (bar * 4) + 0xc;
-	offset = (debug_port >> 16) & 0xfff;
-	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
-	if (bar != PCI_BASE_ADDRESS_0) {
-		dbgp_printk("only debug ports on bar 1 handled.\n");
-
-		return -1;
-	}
-
-	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
-	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
-	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
-		dbgp_printk("only simple 32bit mmio bars supported\n");
-
-		return -1;
-	}
-
-	/* double check if the mem space is enabled */
-	byte = read_pci_config_byte(bus, slot, func, 0x04);
-	if (!(byte & 0x2)) {
-		byte  |= 0x02;
-		write_pci_config_byte(bus, slot, func, 0x04, byte);
-		dbgp_printk("mmio for ehci enabled\n");
-	}
-
-	/*
-	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
-	 * than enough.  1K is the biggest I have seen.
-	 */
-	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
-	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
-	ehci_bar += bar_val & ~PAGE_MASK;
-	dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
-	ehci_caps  = ehci_bar;
-	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
-	ehci_debug = ehci_bar + offset;
-	ehci_dev.bus = bus;
-	ehci_dev.slot = slot;
-	ehci_dev.func = func;
-
-	detect_set_debug_port();
-
-	ret = ehci_setup();
-	if (ret < 0) {
-		dbgp_printk("ehci_setup failed\n");
-		ehci_debug = NULL;
-
-		return -1;
-	}
-
-	return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
-	int chunk, ret;
-
-	if (!ehci_debug)
-		return;
-	while (n > 0) {
-		chunk = n;
-		if (chunk > DBGP_MAX_PACKET)
-			chunk = DBGP_MAX_PACKET;
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, str, chunk);
-		str += chunk;
-		n -= chunk;
-	}
-}
-
-static struct console early_dbgp_console = {
-	.name =		"earlydbg",
-	.write =	early_dbgp_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
-#endif
-
 /* Direct interface for emergencies */
 static struct console *early_console = &early_vga_console;
 static int __initdata early_console_initialized;
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index dbe4fb694ad2..be3c9b80bc9f 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_USB_MICROTEK)	+= image/
 obj-$(CONFIG_USB_SERIAL)	+= serial/
 
 obj-$(CONFIG_USB)		+= misc/
+obj-y				+= early/
 
 obj-$(CONFIG_USB_ATM)		+= atm/
 obj-$(CONFIG_USB_SPEEDTOUCH)	+= atm/
diff --git a/drivers/usb/early/Makefile b/drivers/usb/early/Makefile
new file mode 100644
index 000000000000..dfedee8c45b6
--- /dev/null
+++ b/drivers/usb/early/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for early USB devices
+#
+
+obj-$(CONFIG_EARLY_PRINTK_DBGP)        += ehci-dbgp.o
diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
new file mode 100644
index 000000000000..821b7b21c29c
--- /dev/null
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -0,0 +1,723 @@
+#include <linux/console.h>
+#include <linux/errno.h>
+#include <linux/pci_regs.h>
+#include <linux/pci_ids.h>
+#include <linux/usb/ch9.h>
+#include <linux/usb/ehci_def.h>
+#include <linux/delay.h>
+#include <asm/io.h>
+#include <asm/pci-direct.h>
+#include <asm/fixmap.h>
+
+static struct ehci_caps __iomem *ehci_caps;
+static struct ehci_regs __iomem *ehci_regs;
+static struct ehci_dbg_port __iomem *ehci_debug;
+static unsigned int dbgp_endpoint_out;
+
+struct ehci_dev {
+	u32 bus;
+	u32 slot;
+	u32 func;
+};
+
+static struct ehci_dev ehci_dev;
+
+#define USB_DEBUG_DEVNUM 127
+
+#define DBGP_DATA_TOGGLE	0x8800
+
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
+{
+	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
+}
+
+static inline u32 dbgp_len_update(u32 x, u32 len)
+{
+	return (x & ~0x0f) | (len & 0x0f);
+}
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+
+/* token */
+#define USB_PID_OUT		0xe1
+#define USB_PID_IN		0x69
+#define USB_PID_SOF		0xa5
+#define USB_PID_SETUP		0x2d
+/* handshake */
+#define USB_PID_ACK		0xd2
+#define USB_PID_NAK		0x5a
+#define USB_PID_STALL		0x1e
+#define USB_PID_NYET		0x96
+/* data */
+#define USB_PID_DATA0		0xc3
+#define USB_PID_DATA1		0x4b
+#define USB_PID_DATA2		0x87
+#define USB_PID_MDATA		0x0f
+/* Special */
+#define USB_PID_PREAMBLE	0x3c
+#define USB_PID_ERR		0x3c
+#define USB_PID_SPLIT		0x78
+#define USB_PID_PING		0xb4
+#define USB_PID_UNDEF_0		0xf0
+
+#define USB_PID_DATA_TOGGLE	0x88
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
+
+#define PCI_CAP_ID_EHCI_DEBUG	0xa
+
+#define HUB_ROOT_RESET_TIME	50	/* times are in msec */
+#define HUB_SHORT_RESET_TIME	10
+#define HUB_LONG_RESET_TIME	200
+#define HUB_RESET_TIMEOUT	500
+
+#define DBGP_MAX_PACKET		8
+
+static int dbgp_wait_until_complete(void)
+{
+	u32 ctrl;
+	int loop = 0x100000;
+
+	do {
+		ctrl = readl(&ehci_debug->control);
+		/* Stop when the transaction is finished */
+		if (ctrl & DBGP_DONE)
+			break;
+	} while (--loop > 0);
+
+	if (!loop)
+		return -1;
+
+	/*
+	 * Now that we have observed the completed transaction,
+	 * clear the done bit.
+	 */
+	writel(ctrl | DBGP_DONE, &ehci_debug->control);
+	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+}
+
+static void __init dbgp_mdelay(int ms)
+{
+	int i;
+
+	while (ms--) {
+		for (i = 0; i < 1000; i++)
+			outb(0x1, 0x80);
+	}
+}
+
+static void dbgp_breath(void)
+{
+	/* Sleep to give the debug port a chance to breathe */
+}
+
+static int dbgp_wait_until_done(unsigned ctrl)
+{
+	u32 pids, lpid;
+	int ret;
+	int loop = 3;
+
+retry:
+	writel(ctrl | DBGP_GO, &ehci_debug->control);
+	ret = dbgp_wait_until_complete();
+	pids = readl(&ehci_debug->pids);
+	lpid = DBGP_PID_GET(pids);
+
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * If the port is getting full or it has dropped data
+	 * start pacing ourselves, not necessary but it's friendly.
+	 */
+	if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
+		dbgp_breath();
+
+	/* If I get a NACK reissue the transmission */
+	if (lpid == USB_PID_NAK) {
+		if (--loop > 0)
+			goto retry;
+	}
+
+	return ret;
+}
+
+static void dbgp_set_data(const void *buf, int size)
+{
+	const unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = hi = 0;
+	for (i = 0; i < 4 && i < size; i++)
+		lo |= bytes[i] << (8*i);
+	for (; i < 8 && i < size; i++)
+		hi |= bytes[i] << (8*(i - 4));
+	writel(lo, &ehci_debug->data03);
+	writel(hi, &ehci_debug->data47);
+}
+
+static void __init dbgp_get_data(void *buf, int size)
+{
+	unsigned char *bytes = buf;
+	u32 lo, hi;
+	int i;
+
+	lo = readl(&ehci_debug->data03);
+	hi = readl(&ehci_debug->data47);
+	for (i = 0; i < 4 && i < size; i++)
+		bytes[i] = (lo >> (8*i)) & 0xff;
+	for (; i < 8 && i < size; i++)
+		bytes[i] = (hi >> (8*(i - 4))) & 0xff;
+}
+
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
+			 const char *bytes, int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_OUT);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	dbgp_set_data(bytes, size);
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	return ret;
+}
+
+static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+				 int size)
+{
+	u32 pids, addr, ctrl;
+	int ret;
+
+	if (size > DBGP_MAX_PACKET)
+		return -1;
+
+	addr = DBGP_EPADDR(devnum, endpoint);
+
+	pids = readl(&ehci_debug->pids);
+	pids = dbgp_pid_update(pids, USB_PID_IN);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, size);
+	ctrl &= ~DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	if (size > ret)
+		size = ret;
+	dbgp_get_data(data, size);
+	return ret;
+}
+
+static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+	int request, int value, int index, void *data, int size)
+{
+	u32 pids, addr, ctrl;
+	struct usb_ctrlrequest req;
+	int read;
+	int ret;
+
+	read = (requesttype & USB_DIR_IN) != 0;
+	if (size > (read ? DBGP_MAX_PACKET:0))
+		return -1;
+
+	/* Compute the control message */
+	req.bRequestType = requesttype;
+	req.bRequest = request;
+	req.wValue = cpu_to_le16(value);
+	req.wIndex = cpu_to_le16(index);
+	req.wLength = cpu_to_le16(size);
+
+	pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
+	addr = DBGP_EPADDR(devnum, 0);
+
+	ctrl = readl(&ehci_debug->control);
+	ctrl = dbgp_len_update(ctrl, sizeof(req));
+	ctrl |= DBGP_OUT;
+	ctrl |= DBGP_GO;
+
+	/* Send the setup message */
+	dbgp_set_data(&req, sizeof(req));
+	writel(addr, &ehci_debug->address);
+	writel(pids, &ehci_debug->pids);
+	ret = dbgp_wait_until_done(ctrl);
+	if (ret < 0)
+		return ret;
+
+	/* Read the result */
+	return dbgp_bulk_read(devnum, 0, data, size);
+}
+
+
+/* Find a PCI capability */
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
+{
+	u8 pos;
+	int bytes;
+
+	if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
+		PCI_STATUS_CAP_LIST))
+		return 0;
+
+	pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
+	for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
+		u8 id;
+
+		pos &= ~3;
+		id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+
+		pos = read_pci_config_byte(num, slot, func,
+						 pos+PCI_CAP_LIST_NEXT);
+	}
+	return 0;
+}
+
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
+{
+	u32 class;
+
+	class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
+	if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
+		return 0;
+
+	return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
+}
+
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
+{
+	u32 bus, slot, func;
+
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < 8; func++) {
+				unsigned cap;
+
+				cap = __find_dbgp(bus, slot, func);
+
+				if (!cap)
+					continue;
+				if (ehci_num-- != 0)
+					continue;
+				*rbus = bus;
+				*rslot = slot;
+				*rfunc = func;
+				return cap;
+			}
+		}
+	}
+	return 0;
+}
+
+static int __init ehci_reset_port(int port)
+{
+	u32 portsc;
+	u32 delay_time, delay;
+	int loop;
+
+	/* Reset the usb debug port */
+	portsc = readl(&ehci_regs->port_status[port - 1]);
+	portsc &= ~PORT_PE;
+	portsc |= PORT_RESET;
+	writel(portsc, &ehci_regs->port_status[port - 1]);
+
+	delay = HUB_ROOT_RESET_TIME;
+	for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
+	     delay_time += delay) {
+		dbgp_mdelay(delay);
+
+		portsc = readl(&ehci_regs->port_status[port - 1]);
+		if (portsc & PORT_RESET) {
+			/* force reset to complete */
+			loop = 2;
+			writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
+				&ehci_regs->port_status[port - 1]);
+			do {
+				portsc = readl(&ehci_regs->port_status[port-1]);
+			} while ((portsc & PORT_RESET) && (--loop > 0));
+		}
+
+		/* Device went away? */
+		if (!(portsc & PORT_CONNECT))
+			return -ENOTCONN;
+
+		/* bomb out completely if something weird happend */
+		if ((portsc & PORT_CSC))
+			return -EINVAL;
+
+		/* If we've finished resetting, then break out of the loop */
+		if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
+			return 0;
+	}
+	return -EBUSY;
+}
+
+static int __init ehci_wait_for_port(int port)
+{
+	u32 status;
+	int ret, reps;
+
+	for (reps = 0; reps < 3; reps++) {
+		dbgp_mdelay(100);
+		status = readl(&ehci_regs->status);
+		if (status & STS_PCD) {
+			ret = ehci_reset_port(port);
+			if (ret == 0)
+				return 0;
+		}
+	}
+	return -ENOTCONN;
+}
+
+#ifdef DBGP_DEBUG
+# define dbgp_printk early_printk
+#else
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
+typedef void (*set_debug_port_t)(int port);
+
+static void __init default_set_debug_port(int port)
+{
+}
+
+static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
+
+static void __init nvidia_set_debug_port(int port)
+{
+	u32 dword;
+	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+				 0x74);
+	dword &= ~(0x0f<<12);
+	dword |= ((port & 0x0f)<<12);
+	write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
+				 dword);
+	dbgp_printk("set debug port to %d\n", port);
+}
+
+static void __init detect_set_debug_port(void)
+{
+	u32 vendorid;
+
+	vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+		 0x00);
+
+	if ((vendorid & 0xffff) == 0x10de) {
+		dbgp_printk("using nvidia set_debug_port\n");
+		set_debug_port = nvidia_set_debug_port;
+	}
+}
+
+static int __init ehci_setup(void)
+{
+	struct usb_debug_descriptor dbgp_desc;
+	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 debug_port, new_debug_port = 0, n_ports;
+	u32  devnum;
+	int ret, i;
+	int loop;
+	int port_map_tried;
+	int playtimes = 3;
+
+try_next_time:
+	port_map_tried = 0;
+
+try_next_port:
+
+	hcs_params = readl(&ehci_caps->hcs_params);
+	debug_port = HCS_DEBUG_PORT(hcs_params);
+	n_ports    = HCS_N_PORTS(hcs_params);
+
+	dbgp_printk("debug_port: %d\n", debug_port);
+	dbgp_printk("n_ports:    %d\n", n_ports);
+
+	for (i = 1; i <= n_ports; i++) {
+		portsc = readl(&ehci_regs->port_status[i-1]);
+		dbgp_printk("portstatus%d: %08x\n", i, portsc);
+	}
+
+	if (port_map_tried && (new_debug_port != debug_port)) {
+		if (--playtimes) {
+			set_debug_port(new_debug_port);
+			goto try_next_time;
+		}
+		return -1;
+	}
+
+	loop = 100000;
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_printk("ehci reset done\n");
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+	} while ((status & STS_HALT) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("ehci can be started\n");
+		return -1;
+	}
+	dbgp_printk("ehci started\n");
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(debug_port);
+	if (ret < 0) {
+		dbgp_printk("No device found in debug port\n");
+		goto next_debug_port;
+	}
+	dbgp_printk("ehci wait for port done\n");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		goto err;
+	}
+	dbgp_printk("debug ported enabled\n");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
+
+	dbgp_mdelay(100);
+
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+
+	return 0;
+err:
+	/* Things didn't work so remove my claim */
+	ctrl = readl(&ehci_debug->control);
+	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+	writel(ctrl, &ehci_debug->control);
+	return -1;
+
+next_debug_port:
+	port_map_tried |= (1<<(debug_port - 1));
+	new_debug_port = ((debug_port-1+1)%n_ports) + 1;
+	if (port_map_tried != ((1<<n_ports) - 1)) {
+		set_debug_port(new_debug_port);
+		goto try_next_port;
+	}
+	if (--playtimes) {
+		set_debug_port(new_debug_port);
+		goto try_next_time;
+	}
+
+	return -1;
+}
+
+int __init early_dbgp_init(char *s)
+{
+	u32 debug_port, bar, offset;
+	u32 bus, slot, func, cap;
+	void __iomem *ehci_bar;
+	u32 dbgp_num;
+	u32 bar_val;
+	char *e;
+	int ret;
+	u8 byte;
+
+	if (!early_pci_allowed())
+		return -1;
+
+	dbgp_num = 0;
+	if (*s)
+		dbgp_num = simple_strtoul(s, &e, 10);
+	dbgp_printk("dbgp_num: %d\n", dbgp_num);
+
+	cap = find_dbgp(dbgp_num, &bus, &slot, &func);
+	if (!cap)
+		return -1;
+
+	dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
+			 func);
+
+	debug_port = read_pci_config(bus, slot, func, cap);
+	bar = (debug_port >> 29) & 0x7;
+	bar = (bar * 4) + 0xc;
+	offset = (debug_port >> 16) & 0xfff;
+	dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
+	if (bar != PCI_BASE_ADDRESS_0) {
+		dbgp_printk("only debug ports on bar 1 handled.\n");
+
+		return -1;
+	}
+
+	bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
+	dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
+	if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
+		dbgp_printk("only simple 32bit mmio bars supported\n");
+
+		return -1;
+	}
+
+	/* double check if the mem space is enabled */
+	byte = read_pci_config_byte(bus, slot, func, 0x04);
+	if (!(byte & 0x2)) {
+		byte  |= 0x02;
+		write_pci_config_byte(bus, slot, func, 0x04, byte);
+		dbgp_printk("mmio for ehci enabled\n");
+	}
+
+	/*
+	 * FIXME I don't have the bar size so just guess PAGE_SIZE is more
+	 * than enough.  1K is the biggest I have seen.
+	 */
+	set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
+	ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
+	ehci_bar += bar_val & ~PAGE_MASK;
+	dbgp_printk("ehci_bar: %p\n", ehci_bar);
+
+	ehci_caps  = ehci_bar;
+	ehci_regs  = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
+	ehci_debug = ehci_bar + offset;
+	ehci_dev.bus = bus;
+	ehci_dev.slot = slot;
+	ehci_dev.func = func;
+
+	detect_set_debug_port();
+
+	ret = ehci_setup();
+	if (ret < 0) {
+		dbgp_printk("ehci_setup failed\n");
+		ehci_debug = NULL;
+
+		return -1;
+	}
+
+	return 0;
+}
+
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
+{
+	int chunk, ret;
+
+	if (!ehci_debug)
+		return;
+	while (n > 0) {
+		chunk = n;
+		if (chunk > DBGP_MAX_PACKET)
+			chunk = DBGP_MAX_PACKET;
+		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+			dbgp_endpoint_out, str, chunk);
+		str += chunk;
+		n -= chunk;
+	}
+}
+
+struct console early_dbgp_console = {
+	.name =		"earlydbg",
+	.write =	early_dbgp_write,
+	.flags =	CON_PRINTBUFFER,
+	.index =	-1,
+};
+
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 4c4b701ff265..1deac2a7b12e 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -170,4 +170,10 @@ struct ehci_dbg_port {
 #define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
 } __attribute__ ((packed));
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+#include <linux/init.h>
+extern int __init early_dbgp_init(char *s);
+extern struct console early_dbgp_console;
+#endif /* CONFIG_EARLY_PRINTK_DBGP */
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From 917778267fbe67703ab7d5c6f0b7a05d4c3df485 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:53 -0500
Subject: USB: ehci-dbgp: stability improvements and external re-init

This patch implements several changes:

1) Improve the capability to debug the dbgp driver

   The dbgp_ehci_status() was added in a number of places to report
   the critical ehci registers to diagnose the cause of a failure of
   the ehci-dbgp driver.

2) Capability to survive the host controller initialization

   The dbgp_external_startup(), dbgp_not_safe, and dbgp_phys_port were
   added so as to allow the ehci-dbgp to re-initialize after the ehci
   host controller is reset by the standard host controller driver.
   This same routine is common for the early startup or
   re-initialization.

   This resulted in the need to move some of the initialization code
   out of the __init section because the ehci driver has the
   possibility to be loaded later on as a kernel module.

3) Stability improvements for device initialization

   The device enumeration from 0 to 127 has the possibility to fail
   the first time after a warm reset on some older EHCI debug
   controllers.  The enumeration will be tried up to 3 times to
   account for this failure case.

   The dbg_wait_until_complete() was changed to wait up to 250 ms
   before failing which only comes into play during device
   initialization. The maximum delay will never get hit during the
   course of normal operation of the driver, unless the device got
   unplugged or there was a ehci controller failure, in which case the
   dbgp device driver will shut itself down.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: dbrownell@users.sourceforge.net
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 442 ++++++++++++++++++++++++++++--------------
 include/linux/usb/ehci_def.h  |   5 +
 2 files changed, 299 insertions(+), 148 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 6198ebded3a4..06e05ea17871 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -1,5 +1,19 @@
+/*
+ * Standalone EHCI usb debug driver
+ *
+ * Originally written by:
+ *  Eric W. Biederman" <ebiederm@xmission.com> and
+ *  Yinghai Lu <yhlu.kernel@gmail.com>
+ *
+ * Changes for early/late printk and HW errata:
+ *  Jason Wessel <jason.wessel@windriver.com>
+ *  Copyright (C) 2009 Wind River Systems, Inc.
+ *
+ */
+
 #include <linux/console.h>
 #include <linux/errno.h>
+#include <linux/module.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/usb/ch9.h>
@@ -9,15 +23,37 @@
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
 
-#ifdef DBGP_DEBUG
-# define dbgp_printk printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
+/* The code here is intended to talk directly to the EHCI debug port
+ * and does not require that you have any kind of USB host controller
+ * drivers or USB device drivers compiled into the kernel.
+ *
+ * If you make a change to anything in here, the following test cases
+ * need to pass where a USB debug device works in the following
+ * configurations.
+ *
+ * 1. boot args:  earlyprintk=dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 2. boot args: earlyprintk=dbgp,keep
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ * 3. boot args: earlyprintk=dbgp console=ttyUSB0
+ *     o kernel has CONFIG_USB_EHCI_HCD=y and
+ *       CONFIG_USB_SERIAL_DEBUG=y
+ * 4. boot args: earlyprintk=vga,dbgp
+ *     o kernel compiled with # CONFIG_USB_EHCI_HCD is not set
+ *     o kernel compiled with CONFIG_USB_EHCI_HCD=y
+ *
+ * For the 4th configuration you can turn on or off the DBGP_DEBUG
+ * such that you can debug the dbgp device's driver code.
+ */
+
+static int dbgp_phys_port = 1;
 
 static struct ehci_caps __iomem *ehci_caps;
 static struct ehci_regs __iomem *ehci_regs;
 static struct ehci_dbg_port __iomem *ehci_debug;
+static int dbgp_not_safe; /* Cannot use debug device during ehci reset */
 static unsigned int dbgp_endpoint_out;
 
 struct ehci_dev {
@@ -32,6 +68,26 @@ static struct ehci_dev ehci_dev;
 
 #define DBGP_DATA_TOGGLE	0x8800
 
+#ifdef DBGP_DEBUG
+#define dbgp_printk printk
+static void dbgp_ehci_status(char *str)
+{
+	if (!ehci_debug)
+		return;
+	dbgp_printk("dbgp: %s\n", str);
+	dbgp_printk("  Debug control: %08x", readl(&ehci_debug->control));
+	dbgp_printk("  ehci cmd     : %08x", readl(&ehci_regs->command));
+	dbgp_printk("  ehci conf flg: %08x\n",
+		    readl(&ehci_regs->configured_flag));
+	dbgp_printk("  ehci status  : %08x", readl(&ehci_regs->status));
+	dbgp_printk("  ehci portsc  : %08x\n",
+		    readl(&ehci_regs->port_status[dbgp_phys_port - 1]));
+}
+#else
+static inline void dbgp_ehci_status(char *str) { }
+static inline void dbgp_printk(const char *fmt, ...) { }
+#endif
+
 static inline u32 dbgp_pid_update(u32 x, u32 tok)
 {
 	return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
@@ -79,21 +135,23 @@ static inline u32 dbgp_len_update(u32 x, u32 len)
 #define HUB_RESET_TIMEOUT	500
 
 #define DBGP_MAX_PACKET		8
+#define DBGP_TIMEOUT		(250 * 1000)
 
 static int dbgp_wait_until_complete(void)
 {
 	u32 ctrl;
-	int loop = 0x100000;
+	int loop = DBGP_TIMEOUT;
 
 	do {
 		ctrl = readl(&ehci_debug->control);
 		/* Stop when the transaction is finished */
 		if (ctrl & DBGP_DONE)
 			break;
+		udelay(1);
 	} while (--loop > 0);
 
 	if (!loop)
-		return -1;
+		return -DBGP_TIMEOUT;
 
 	/*
 	 * Now that we have observed the completed transaction,
@@ -103,7 +161,7 @@ static int dbgp_wait_until_complete(void)
 	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
 }
 
-static void __init dbgp_mdelay(int ms)
+static inline void dbgp_mdelay(int ms)
 {
 	int i;
 
@@ -130,8 +188,17 @@ retry:
 	pids = readl(&ehci_debug->pids);
 	lpid = DBGP_PID_GET(pids);
 
-	if (ret < 0)
+	if (ret < 0) {
+		/* A -DBGP_TIMEOUT failure here means the device has
+		 * failed, perhaps because it was unplugged, in which
+		 * case we do not want to hang the system so the dbgp
+		 * will be marked as unsafe to use.  EHCI reset is the
+		 * only way to recover if you unplug the dbgp device.
+		 */
+		if (ret == -DBGP_TIMEOUT && !dbgp_not_safe)
+			dbgp_not_safe = 1;
 		return ret;
+	}
 
 	/*
 	 * If the port is getting full or it has dropped data
@@ -149,7 +216,7 @@ retry:
 	return ret;
 }
 
-static void dbgp_set_data(const void *buf, int size)
+static inline void dbgp_set_data(const void *buf, int size)
 {
 	const unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -164,7 +231,7 @@ static void dbgp_set_data(const void *buf, int size)
 	writel(hi, &ehci_debug->data47);
 }
 
-static void __init dbgp_get_data(void *buf, int size)
+static inline void dbgp_get_data(void *buf, int size)
 {
 	unsigned char *bytes = buf;
 	u32 lo, hi;
@@ -208,7 +275,7 @@ static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
 	return ret;
 }
 
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 				 int size)
 {
 	u32 pids, addr, ctrl;
@@ -239,7 +306,7 @@ static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
 	return ret;
 }
 
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
+static int dbgp_control_msg(unsigned devnum, int requesttype,
 	int request, int value, int index, void *data, int size)
 {
 	u32 pids, addr, ctrl;
@@ -342,13 +409,179 @@ static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
 	return 0;
 }
 
+static int dbgp_ehci_startup(void)
+{
+	u32 ctrl, cmd, status;
+	int loop;
+
+	/* Claim ownership, but do not enable yet */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_OWNER;
+	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
+	writel(ctrl, &ehci_debug->control);
+	udelay(1);
+
+	dbgp_ehci_status("EHCI startup");
+	/* Start the ehci running */
+	cmd = readl(&ehci_regs->command);
+	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
+	cmd |= CMD_RUN;
+	writel(cmd, &ehci_regs->command);
+
+	/* Ensure everything is routed to the EHCI */
+	writel(FLAG_CF, &ehci_regs->configured_flag);
+
+	/* Wait until the controller is no longer halted */
+	loop = 10;
+	do {
+		status = readl(&ehci_regs->status);
+		if (!(status & STS_HALT))
+			break;
+		udelay(1);
+	} while (--loop > 0);
+
+	if (!loop) {
+		dbgp_printk("ehci can not be started\n");
+		return -ENODEV;
+	}
+	dbgp_printk("ehci started\n");
+	return 0;
+}
+
+static int dbgp_ehci_controller_reset(void)
+{
+	int loop = 250 * 1000;
+	u32 cmd;
+
+	/* Reset the EHCI controller */
+	cmd = readl(&ehci_regs->command);
+	cmd |= CMD_RESET;
+	writel(cmd, &ehci_regs->command);
+	do {
+		cmd = readl(&ehci_regs->command);
+	} while ((cmd & CMD_RESET) && (--loop > 0));
+
+	if (!loop) {
+		dbgp_printk("can not reset ehci\n");
+		return -1;
+	}
+	dbgp_ehci_status("ehci reset done");
+	return 0;
+}
+static int ehci_wait_for_port(int port);
+/* Return 0 on success
+ * Return -ENODEV for any general failure
+ * Return -EIO if wait for port fails
+ */
+int dbgp_external_startup(void)
+{
+	int devnum;
+	struct usb_debug_descriptor dbgp_desc;
+	int ret;
+	u32 ctrl, portsc;
+	int dbg_port = dbgp_phys_port;
+	int tries = 3;
+
+	ret = dbgp_ehci_startup();
+	if (ret)
+		return ret;
+
+	/* Wait for a device to show up in the debug port */
+	ret = ehci_wait_for_port(dbg_port);
+	if (ret < 0) {
+		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		dbgp_printk("No device found in debug port\n");
+		return -EIO;
+	}
+	dbgp_ehci_status("wait for port done");
+
+	/* Enable the debug port */
+	ctrl = readl(&ehci_debug->control);
+	ctrl |= DBGP_CLAIM;
+	writel(ctrl, &ehci_debug->control);
+	ctrl = readl(&ehci_debug->control);
+	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
+		dbgp_printk("No device in debug port\n");
+		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
+		return -ENODEV;
+	}
+	dbgp_ehci_status("debug ported enabled");
+
+	/* Completely transfer the debug device to the debug controller */
+	portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+	portsc &= ~PORT_PE;
+	writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+
+	dbgp_mdelay(100);
+
+try_again:
+	/* Find the debug device and make it device number 127 */
+	for (devnum = 0; devnum <= 127; devnum++) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
+			&dbgp_desc, sizeof(dbgp_desc));
+		if (ret > 0)
+			break;
+	}
+	if (devnum > 127) {
+		dbgp_printk("Could not find attached debug device\n");
+		goto err;
+	}
+	if (ret < 0) {
+		dbgp_printk("Attached device is not a debug device\n");
+		goto err;
+	}
+	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
+
+	/* Move the device to 127 if it isn't already there */
+	if (devnum != USB_DEBUG_DEVNUM) {
+		ret = dbgp_control_msg(devnum,
+			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
+		if (ret < 0) {
+			dbgp_printk("Could not move attached device to %d\n",
+				USB_DEBUG_DEVNUM);
+			goto err;
+		}
+		devnum = USB_DEBUG_DEVNUM;
+		dbgp_printk("debug device renamed to 127\n");
+	}
+
+	/* Enable the debug interface */
+	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
+		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
+		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
+	if (ret < 0) {
+		dbgp_printk(" Could not enable the debug device\n");
+		goto err;
+	}
+	dbgp_printk("debug interface enabled\n");
+	/* Perform a small write to get the even/odd data state in sync
+	 */
+	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
+	if (ret < 0) {
+		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
+		goto err;
+	}
+	dbgp_printk("small write doned\n");
+	dbgp_not_safe = 0;
+
+	return 0;
+err:
+	if (tries--)
+		goto try_again;
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(dbgp_external_startup);
+
 static int __init ehci_reset_port(int port)
 {
 	u32 portsc;
 	u32 delay_time, delay;
 	int loop;
 
-	dbgp_printk("ehci_reset_port %i\n", port);
+	dbgp_ehci_status("reset port");
 	/* Reset the usb debug port */
 	portsc = readl(&ehci_regs->port_status[port - 1]);
 	portsc &= ~PORT_PE;
@@ -388,7 +621,7 @@ static int __init ehci_reset_port(int port)
 	return -EBUSY;
 }
 
-static int __init ehci_wait_for_port(int port)
+static int ehci_wait_for_port(int port)
 {
 	u32 status;
 	int ret, reps;
@@ -487,12 +720,9 @@ static void __init early_ehci_bios_handoff(void)
 
 static int __init ehci_setup(void)
 {
-	struct usb_debug_descriptor dbgp_desc;
-	u32 cmd, ctrl, status, portsc, hcs_params;
+	u32 ctrl, portsc, hcs_params;
 	u32 debug_port, new_debug_port = 0, n_ports;
-	u32  devnum;
 	int ret, i;
-	int loop;
 	int port_map_tried;
 	int playtimes = 3;
 
@@ -505,10 +735,12 @@ try_next_port:
 
 	hcs_params = readl(&ehci_caps->hcs_params);
 	debug_port = HCS_DEBUG_PORT(hcs_params);
+	dbgp_phys_port = debug_port;
 	n_ports    = HCS_N_PORTS(hcs_params);
 
 	dbgp_printk("debug_port: %d\n", debug_port);
 	dbgp_printk("n_ports:    %d\n", n_ports);
+	dbgp_ehci_status("");
 
 	for (i = 1; i <= n_ports; i++) {
 		portsc = readl(&ehci_regs->port_status[i-1]);
@@ -523,138 +755,27 @@ try_next_port:
 		return -1;
 	}
 
-	loop = 250 * 1000;
-	/* Reset the EHCI controller */
-	cmd = readl(&ehci_regs->command);
-	cmd |= CMD_RESET;
-	writel(cmd, &ehci_regs->command);
-	do {
-		cmd = readl(&ehci_regs->command);
-	} while ((cmd & CMD_RESET) && (--loop > 0));
-
-	if (!loop) {
-		dbgp_printk("can not reset ehci\n");
-		return -1;
-	}
-	dbgp_printk("ehci reset done\n");
-
-	/* Claim ownership, but do not enable yet */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_OWNER;
-	ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
-	writel(ctrl, &ehci_debug->control);
-	udelay(1);
-
-	/* Start the ehci running */
-	cmd = readl(&ehci_regs->command);
-	cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
-	cmd |= CMD_RUN;
-	writel(cmd, &ehci_regs->command);
-
-	/* Ensure everything is routed to the EHCI */
-	writel(FLAG_CF, &ehci_regs->configured_flag);
-
-	/* Wait until the controller is no longer halted */
-	loop = 10;
-	do {
-		status = readl(&ehci_regs->status);
-		if (!(status & STS_HALT))
-			break;
-		udelay(1);
-	} while (--loop > 0);
-
-	if (!loop) {
-		dbgp_printk("ehci can not be started\n");
-		return -1;
+	/* Only reset the controller if it is not already in the
+	 * configured state */
+	if (!(readl(&ehci_regs->configured_flag) & FLAG_CF)) {
+		if (dbgp_ehci_controller_reset() != 0)
+			return -1;
+	} else {
+		dbgp_ehci_status("ehci skip - already configured");
 	}
-	dbgp_printk("ehci started\n");
 
-	/* Wait for a device to show up in the debug port */
-	ret = ehci_wait_for_port(debug_port);
-	if (ret < 0) {
-		dbgp_printk("No device found in debug port\n");
+	ret = dbgp_external_startup();
+	if (ret == -EIO)
 		goto next_debug_port;
-	}
-	dbgp_printk("ehci wait for port done\n");
-
-	/* Enable the debug port */
-	ctrl = readl(&ehci_debug->control);
-	ctrl |= DBGP_CLAIM;
-	writel(ctrl, &ehci_debug->control);
-	ctrl = readl(&ehci_debug->control);
-	if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
-		dbgp_printk("No device in debug port\n");
-		writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
-		goto err;
-	}
-	dbgp_printk("debug ported enabled\n");
 
-	/* Completely transfer the debug device to the debug controller */
-	portsc = readl(&ehci_regs->port_status[debug_port - 1]);
-	portsc &= ~PORT_PE;
-	writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
-	dbgp_mdelay(100);
-
-	/* Find the debug device and make it device number 127 */
-	for (devnum = 0; devnum <= 127; devnum++) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
-			&dbgp_desc, sizeof(dbgp_desc));
-		if (ret > 0)
-			break;
-	}
-	if (devnum > 127) {
-		dbgp_printk("Could not find attached debug device\n");
-		goto err;
-	}
 	if (ret < 0) {
-		dbgp_printk("Attached device is not a debug device\n");
-		goto err;
-	}
-	dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
-	/* Move the device to 127 if it isn't already there */
-	if (devnum != USB_DEBUG_DEVNUM) {
-		ret = dbgp_control_msg(devnum,
-			USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-			USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
-		if (ret < 0) {
-			dbgp_printk("Could not move attached device to %d\n",
-				USB_DEBUG_DEVNUM);
-			goto err;
-		}
-		devnum = USB_DEBUG_DEVNUM;
-		dbgp_printk("debug device renamed to 127\n");
-	}
-
-	/* Enable the debug interface */
-	ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
-		USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
-		USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
-	if (ret < 0) {
-		dbgp_printk(" Could not enable the debug device\n");
-		goto err;
-	}
-	dbgp_printk("debug interface enabled\n");
-
-	/* Perform a small write to get the even/odd data state in sync
-	 */
-	ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
-	if (ret < 0) {
-		dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
-		goto err;
+		/* Things didn't work so remove my claim */
+		ctrl = readl(&ehci_debug->control);
+		ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
+		writel(ctrl, &ehci_debug->control);
+		return -1;
 	}
-	dbgp_printk("small write doned\n");
-
 	return 0;
-err:
-	/* Things didn't work so remove my claim */
-	ctrl = readl(&ehci_debug->control);
-	ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
-	writel(ctrl, &ehci_debug->control);
-	return -1;
 
 next_debug_port:
 	port_map_tried |= (1<<(debug_port - 1));
@@ -749,6 +870,7 @@ int __init early_dbgp_init(char *s)
 
 		return -1;
 	}
+	dbgp_ehci_status("early_init_complete");
 
 	return 0;
 }
@@ -758,9 +880,27 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 	int chunk, ret;
 	char buf[DBGP_MAX_PACKET];
 	int use_cr = 0;
+	u32 cmd, ctrl;
+	int reset_run = 0;
 
-	if (!ehci_debug)
+	if (!ehci_debug || dbgp_not_safe)
 		return;
+
+	cmd = readl(&ehci_regs->command);
+	if (unlikely(!(cmd & CMD_RUN))) {
+		/* If the ehci controller is not in the run state do extended
+		 * checks to see if the acpi or some other initialization also
+		 * reset the ehci debug port */
+		ctrl = readl(&ehci_debug->control);
+		if (!(ctrl & DBGP_ENABLED)) {
+			dbgp_not_safe = 1;
+			dbgp_external_startup();
+		} else {
+			cmd |= CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			reset_run = 1;
+		}
+	}
 	while (n > 0) {
 		for (chunk = 0; chunk < DBGP_MAX_PACKET && n > 0;
 		     str++, chunk++, n--) {
@@ -775,8 +915,15 @@ static void early_dbgp_write(struct console *con, const char *str, u32 n)
 				use_cr = 0;
 			buf[chunk] = *str;
 		}
-		ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
-			dbgp_endpoint_out, buf, chunk);
+		if (chunk > 0) {
+			ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
+				      dbgp_endpoint_out, buf, chunk);
+		}
+	}
+	if (unlikely(reset_run)) {
+		cmd = readl(&ehci_regs->command);
+		cmd &= ~CMD_RUN;
+		writel(cmd, &ehci_regs->command);
 	}
 }
 
@@ -786,4 +933,3 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
-
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1deac2a7b12e..07851e05d763 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -176,4 +176,9 @@ extern int __init early_dbgp_init(char *s);
 extern struct console early_dbgp_console;
 #endif /* CONFIG_EARLY_PRINTK_DBGP */
 
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+/* Call backs from ehci host driver to ehci debug driver */
+extern int dbgp_external_startup(void);
+#endif
+
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From 8d053c79f22462f55c02c8083580730b922cf7b4 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:54 -0500
Subject: USB: ehci-dbgp,ehci: Allow early or late use of the dbgp device

If the EHCI debug port is initialized and in use, the EHCI host
controller driver must follow two rules.

1) If the EHCI host driver issues a controller reset, the debug
   controller driver re-initialization must get called after the reset
   is completed.

2) The EHCI host driver should ignore any requests to the physical
   EHCI debug port when the EHCI debug port is in use.

The code to check for the debug port was moved from ehci_pci_reinit()
to ehci_pci_setup because it must get called prior to ehci_reset()
which will clear the debug port registers.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: dbrownell@users.sourceforge.net
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 23 +++++++++++++++++++++++
 drivers/usb/host/ehci-hcd.c   |  8 ++++++++
 drivers/usb/host/ehci-hub.c   | 10 ++++++++++
 drivers/usb/host/ehci-pci.c   | 39 +++++++++++++++++++--------------------
 include/linux/usb/ehci_def.h  | 10 ++++++++++
 5 files changed, 70 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index 06e05ea17871..b88cb65b64e0 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -933,3 +933,26 @@ struct console early_dbgp_console = {
 	.flags =	CON_PRINTBUFFER,
 	.index =	-1,
 };
+
+int dbgp_reset_prep(void)
+{
+	u32 ctrl;
+
+	dbgp_not_safe = 1;
+	if (!ehci_debug)
+		return 0;
+
+	if (early_dbgp_console.index != -1 &&
+		!(early_dbgp_console.flags & CON_BOOT))
+		return 1;
+	/* This means the console is not initialized, or should get
+	 * shutdown so as to allow for reuse of the usb device, which
+	 * means it is time to shutdown the usb debug port. */
+	ctrl = readl(&ehci_debug->control);
+	if (ctrl & DBGP_ENABLED) {
+		ctrl &= ~(DBGP_CLAIM);
+		writel(ctrl, &ehci_debug->control);
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dbgp_reset_prep);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 4f89d7ffd53a..9835e0713943 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -240,6 +240,11 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	int	retval;
 	u32	command = ehci_readl(ehci, &ehci->regs->command);
 
+	/* If the EHCI debug controller is active, special care must be
+	 * taken before and after a host controller reset */
+	if (ehci->debug && !dbgp_reset_prep())
+		ehci->debug = NULL;
+
 	command |= CMD_RESET;
 	dbg_cmd (ehci, "reset", command);
 	ehci_writel(ehci, command, &ehci->regs->command);
@@ -260,6 +265,9 @@ static int ehci_reset (struct ehci_hcd *ehci)
 	if (ehci_is_TDI(ehci))
 		tdi_reset (ehci);
 
+	if (ehci->debug)
+		dbgp_external_startup();
+
 	return retval;
 }
 
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 818647c33da8..6b5e4d18d4bf 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -855,6 +855,15 @@ static int ehci_hub_control (
 	case SetPortFeature:
 		selector = wIndex >> 8;
 		wIndex &= 0xff;
+		if (unlikely(ehci->debug)) {
+			/* If the debug port is active any port
+			 * feature requests should get denied */
+			if (wIndex == HCS_DEBUG_PORT(ehci->hcs_params) &&
+			    (readl(&ehci->debug->control) & DBGP_ENABLED)) {
+				retval = -ENODEV;
+				goto error_exit;
+			}
+		}
 		if (!wIndex || wIndex > ports)
 			goto error;
 		wIndex--;
@@ -951,6 +960,7 @@ error:
 		/* "stall" on error */
 		retval = -EPIPE;
 	}
+error_exit:
 	spin_unlock_irqrestore (&ehci->lock, flags);
 	return retval;
 }
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index a88ad517ec5c..378861b9d79a 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -27,28 +27,8 @@
 /* called after powerup, by probe or system-pm "wakeup" */
 static int ehci_pci_reinit(struct ehci_hcd *ehci, struct pci_dev *pdev)
 {
-	u32			temp;
 	int			retval;
 
-	/* optional debug port, normally in the first BAR */
-	temp = pci_find_capability(pdev, 0x0a);
-	if (temp) {
-		pci_read_config_dword(pdev, temp, &temp);
-		temp >>= 16;
-		if ((temp & (3 << 13)) == (1 << 13)) {
-			temp &= 0x1fff;
-			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
-			temp = ehci_readl(ehci, &ehci->debug->control);
-			ehci_info(ehci, "debug port %d%s\n",
-				HCS_DEBUG_PORT(ehci->hcs_params),
-				(temp & DBGP_ENABLED)
-					? " IN USE"
-					: "");
-			if (!(temp & DBGP_ENABLED))
-				ehci->debug = NULL;
-		}
-	}
-
 	/* we expect static quirk code to handle the "extended capabilities"
 	 * (currently just BIOS handoff) allowed starting with EHCI 0.96
 	 */
@@ -195,6 +175,25 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		break;
 	}
 
+	/* optional debug port, normally in the first BAR */
+	temp = pci_find_capability(pdev, 0x0a);
+	if (temp) {
+		pci_read_config_dword(pdev, temp, &temp);
+		temp >>= 16;
+		if ((temp & (3 << 13)) == (1 << 13)) {
+			temp &= 0x1fff;
+			ehci->debug = ehci_to_hcd(ehci)->regs + temp;
+			temp = ehci_readl(ehci, &ehci->debug->control);
+			ehci_info(ehci, "debug port %d%s\n",
+				HCS_DEBUG_PORT(ehci->hcs_params),
+				(temp & DBGP_ENABLED)
+					? " IN USE"
+					: "");
+			if (!(temp & DBGP_ENABLED))
+				ehci->debug = NULL;
+		}
+	}
+
 	ehci_reset(ehci);
 
 	/* at least the Genesys GL880S needs fixup here */
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 07851e05d763..1909d924f816 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -179,6 +179,16 @@ extern struct console early_dbgp_console;
 #ifdef CONFIG_EARLY_PRINTK_DBGP
 /* Call backs from ehci host driver to ehci debug driver */
 extern int dbgp_external_startup(void);
+extern int dbgp_reset_prep(void);
+#else
+static inline int dbgp_reset_prep(void)
+{
+	return 1;
+}
+static inline int dbgp_external_startup(void)
+{
+	return -1;
+}
 #endif
 
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From aab2d4086a1876fcff282aa36e2d4a92aa9935c9 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 Aug 2009 15:39:55 -0500
Subject: USB: ehci-dbgp: errata for EHCI debug controller initialization

On some EHCI usb debug controllers, the EHCI debug device will fail to
be seen after a port reset, after a warm reset.  Two options exist to
get the device to initialize correctly.

Option 1 is to unplug and plug in the device.

Option 2 is to use the EHCI port test to get the usb debug device to
start talking again.  At that point the debug controller port reset
will succeed.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
CC: dbrownell@users.sourceforge.net
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/early/ehci-dbgp.c | 23 ++++++++++++++++++++++-
 include/linux/usb/ehci_def.h  |  1 +
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/early/ehci-dbgp.c b/drivers/usb/early/ehci-dbgp.c
index b88cb65b64e0..f0a41c647bef 100644
--- a/drivers/usb/early/ehci-dbgp.c
+++ b/drivers/usb/early/ehci-dbgp.c
@@ -478,10 +478,13 @@ int dbgp_external_startup(void)
 	int devnum;
 	struct usb_debug_descriptor dbgp_desc;
 	int ret;
-	u32 ctrl, portsc;
+	u32 ctrl, portsc, cmd;
 	int dbg_port = dbgp_phys_port;
 	int tries = 3;
+	int reset_port_tries = 1;
+	int try_hard_once = 1;
 
+try_port_reset_again:
 	ret = dbgp_ehci_startup();
 	if (ret)
 		return ret;
@@ -490,6 +493,24 @@ int dbgp_external_startup(void)
 	ret = ehci_wait_for_port(dbg_port);
 	if (ret < 0) {
 		portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+		if (!(portsc & PORT_CONNECT) && try_hard_once) {
+			/* Last ditch effort to try to force enable
+			 * the debug device by using the packet test
+			 * ehci command to try and wake it up. */
+			try_hard_once = 0;
+			cmd = readl(&ehci_regs->command);
+			cmd &= ~CMD_RUN;
+			writel(cmd, &ehci_regs->command);
+			portsc = readl(&ehci_regs->port_status[dbg_port - 1]);
+			portsc |= PORT_TEST_PKT;
+			writel(portsc, &ehci_regs->port_status[dbg_port - 1]);
+			dbgp_ehci_status("Trying to force debug port online");
+			mdelay(50);
+			dbgp_ehci_controller_reset();
+			goto try_port_reset_again;
+		} else if (reset_port_tries--) {
+			goto try_port_reset_again;
+		}
 		dbgp_printk("No device found in debug port\n");
 		return -EIO;
 	}
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index 1909d924f816..af4b86f3aca3 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -105,6 +105,7 @@ struct ehci_regs {
 #define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
 #define PORT_WKCONN_E	(1<<20)		/* wake on connect (enable) */
 /* 19:16 for port testing */
+#define PORT_TEST_PKT	(0x4<<16)	/* Port Test Control - packet test */
 #define PORT_LED_OFF	(0<<14)
 #define PORT_LED_AMBER	(1<<14)
 #define PORT_LED_GREEN	(2<<14)
-- 
cgit v1.2.3


From 01c6460f968d7b57fc6f98adb587952628c6e099 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Tue, 1 Sep 2009 11:09:56 -0400
Subject: USB: usbfs: add USBDEVFS_URB_BULK_CONTINUATION flag

This patch (as1283) adds a new flag, USBDEVFS_URB_BULK_CONTINUATION,
to usbfs.  It is intended for userspace libraries such as libusb and
openusb.  When they have to break up a single usbfs bulk transfer into
multiple URBs, they will set the flag on all but the first URB of the
series.

If an error other than an unlink occurs, the kernel will automatically
cancel all the following URBs for the same endpoint and refuse to
accept new submissions, until an URB is encountered that is not marked
as a BULK_CONTINUATION.  Such an URB would indicate the start of a new
transfer or the presence of an older library, so the kernel returns to
normal operation.

This enables libraries to delimit bulk transfers correctly, even in
the presence of early termination as indicated by short packets.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devio.c     | 78 +++++++++++++++++++++++++++++++++++++++++++-
 include/linux/usbdevice_fs.h |  1 +
 2 files changed, 78 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 71514be8b715..181f78c84105 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -74,6 +74,7 @@ struct dev_state {
 	void __user *disccontext;
 	unsigned long ifclaimed;
 	u32 secid;
+	u32 disabled_bulk_eps;
 };
 
 struct async {
@@ -88,6 +89,8 @@ struct async {
 	struct urb *urb;
 	int status;
 	u32 secid;
+	u8 bulk_addr;
+	u8 bulk_status;
 };
 
 static int usbfs_snoop;
@@ -343,6 +346,43 @@ static void snoop_urb(struct usb_device *udev,
 	}
 }
 
+#define AS_CONTINUATION	1
+#define AS_UNLINK	2
+
+static void cancel_bulk_urbs(struct dev_state *ps, unsigned bulk_addr)
+__releases(ps->lock)
+__acquires(ps->lock)
+{
+	struct async *as;
+
+	/* Mark all the pending URBs that match bulk_addr, up to but not
+	 * including the first one without AS_CONTINUATION.  If such an
+	 * URB is encountered then a new transfer has already started so
+	 * the endpoint doesn't need to be disabled; otherwise it does.
+	 */
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_addr == bulk_addr) {
+			if (as->bulk_status != AS_CONTINUATION)
+				goto rescan;
+			as->bulk_status = AS_UNLINK;
+			as->bulk_addr = 0;
+		}
+	}
+	ps->disabled_bulk_eps |= (1 << bulk_addr);
+
+	/* Now carefully unlink all the marked pending URBs */
+ rescan:
+	list_for_each_entry(as, &ps->async_pending, asynclist) {
+		if (as->bulk_status == AS_UNLINK) {
+			as->bulk_status = 0;		/* Only once */
+			spin_unlock(&ps->lock);		/* Allow completions */
+			usb_unlink_urb(as->urb);
+			spin_lock(&ps->lock);
+			goto rescan;
+		}
+	}
+}
+
 static void async_completed(struct urb *urb)
 {
 	struct async *as = urb->context;
@@ -371,6 +411,9 @@ static void async_completed(struct urb *urb)
 	snoop(&urb->dev->dev, "urb complete\n");
 	snoop_urb(urb->dev, as->userurb, urb->pipe, urb->actual_length,
 			as->status, COMPLETE);
+	if (as->status < 0 && as->bulk_addr && as->status != -ECONNRESET &&
+			as->status != -ENOENT)
+		cancel_bulk_urbs(ps, as->bulk_addr);
 	spin_unlock(&ps->lock);
 
 	if (signr)
@@ -993,6 +1036,7 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 
 	if (uurb->flags & ~(USBDEVFS_URB_ISO_ASAP |
 				USBDEVFS_URB_SHORT_NOT_OK |
+				USBDEVFS_URB_BULK_CONTINUATION |
 				USBDEVFS_URB_NO_FSBR |
 				USBDEVFS_URB_ZERO_PACKET |
 				USBDEVFS_URB_NO_INTERRUPT))
@@ -1194,7 +1238,39 @@ static int proc_do_submiturb(struct dev_state *ps, struct usbdevfs_urb *uurb,
 	snoop_urb(ps->dev, as->userurb, as->urb->pipe,
 			as->urb->transfer_buffer_length, 0, SUBMIT);
 	async_newpending(as);
-	if ((ret = usb_submit_urb(as->urb, GFP_KERNEL))) {
+
+	if (usb_endpoint_xfer_bulk(&ep->desc)) {
+		spin_lock_irq(&ps->lock);
+
+		/* Not exactly the endpoint address; the direction bit is
+		 * shifted to the 0x10 position so that the value will be
+		 * between 0 and 31.
+		 */
+		as->bulk_addr = usb_endpoint_num(&ep->desc) |
+			((ep->desc.bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+				>> 3);
+
+		/* If this bulk URB is the start of a new transfer, re-enable
+		 * the endpoint.  Otherwise mark it as a continuation URB.
+		 */
+		if (uurb->flags & USBDEVFS_URB_BULK_CONTINUATION)
+			as->bulk_status = AS_CONTINUATION;
+		else
+			ps->disabled_bulk_eps &= ~(1 << as->bulk_addr);
+
+		/* Don't accept continuation URBs if the endpoint is
+		 * disabled because of an earlier error.
+		 */
+		if (ps->disabled_bulk_eps & (1 << as->bulk_addr))
+			ret = -EREMOTEIO;
+		else
+			ret = usb_submit_urb(as->urb, GFP_ATOMIC);
+		spin_unlock_irq(&ps->lock);
+	} else {
+		ret = usb_submit_urb(as->urb, GFP_KERNEL);
+	}
+
+	if (ret) {
 		dev_printk(KERN_DEBUG, &ps->dev->dev,
 			   "usbfs: usb_submit_urb returned %d\n", ret);
 		snoop_urb(ps->dev, as->userurb, as->urb->pipe,
diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h
index 00ceebeb9e5c..b2a7d8ba6ee3 100644
--- a/include/linux/usbdevice_fs.h
+++ b/include/linux/usbdevice_fs.h
@@ -77,6 +77,7 @@ struct usbdevfs_connectinfo {
 
 #define USBDEVFS_URB_SHORT_NOT_OK	0x01
 #define USBDEVFS_URB_ISO_ASAP		0x02
+#define USBDEVFS_URB_BULK_CONTINUATION	0x04
 #define USBDEVFS_URB_NO_FSBR		0x20
 #define USBDEVFS_URB_ZERO_PACKET	0x40
 #define USBDEVFS_URB_NO_INTERRUPT	0x80
-- 
cgit v1.2.3


From fa081b00a80ef3f4575c99af6e97d29e1628cf51 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 22 Sep 2009 16:43:27 -0700
Subject: include/linux/kmemcheck.h: fix a trillion warnings

of the form

include/net/inet_sock.h:208: warning: ISO C90 forbids mixed declarations and code

Cc: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index c8006607f947..136cdcdf92ef 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -145,10 +145,12 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 
 #define kmemcheck_annotate_bitfield(ptr, name)				\
 	do {								\
+		int _n;							\
+									\
 		if (!ptr)						\
 			break;						\
 									\
-		int _n = (long) &((ptr)->name##_end)			\
+		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
 		BUILD_BUG_ON(_n < 0);					\
 									\
-- 
cgit v1.2.3


From af91322ef3f29ae4114e736e2a72e28b4d619cf9 Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: printk: add printk_delay to make messages readable for some scenarios

When syslog is not possible, at the same time there's no serial/net
console available, it will be hard to read the printk messages.  For
example oops/panic/warning messages in shutdown phase.

Add a printk delay feature, we can make each printk message delay some
milliseconds.

Setting the delay by proc/sysctl interface: /proc/sys/kernel/printk_delay

The value range from 0 - 10000, default value is 0

[akpm@linux-foundation.org: fix a few things]
Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt |  8 ++++++++
 include/linux/kernel.h          |  2 ++
 kernel/printk.c                 | 15 +++++++++++++++
 kernel/sysctl.c                 | 14 ++++++++++++++
 4 files changed, 39 insertions(+)

(limited to 'include')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 3e5b63ebb821..b3d8b4922740 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -313,6 +313,14 @@ send before ratelimiting kicks in.
 
 ==============================================================
 
+printk_delay:
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+==============================================================
+
 randomize-va-space:
 
 This option can be used to select the type of process address
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2b5b1e0899a8..0a2a19087863 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -246,6 +246,8 @@ extern int printk_ratelimit(void);
 extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
 				   unsigned int interval_msec);
 
+extern int printk_delay_msec;
+
 /*
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
diff --git a/kernel/printk.c b/kernel/printk.c
index 932ea21feb18..f38b07f78a4e 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -653,6 +653,20 @@ static int recursion_bug;
 static int new_text_line = 1;
 static char printk_buf[1024];
 
+int printk_delay_msec __read_mostly;
+
+static inline void printk_delay(void)
+{
+	if (unlikely(printk_delay_msec)) {
+		int m = printk_delay_msec;
+
+		while (m--) {
+			mdelay(1);
+			touch_nmi_watchdog();
+		}
+	}
+}
+
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
 	int printed_len = 0;
@@ -662,6 +676,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	char *p;
 
 	boot_delay_msec();
+	printk_delay();
 
 	preempt_disable();
 	/* This stops the holder of console_sem just where we want him */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6ba49c7cb128..0dfaa47d7cb6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -106,6 +106,9 @@ static int __maybe_unused one = 1;
 static int __maybe_unused two = 2;
 static unsigned long one_ul = 1;
 static int one_hundred = 100;
+#ifdef CONFIG_PRINTK
+static int ten_thousand = 10000;
+#endif
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -722,6 +725,17 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "printk_delay",
+		.data		= &printk_delay_msec,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+		.extra2		= &ten_thousand,
+	},
 #endif
 	{
 		.ctl_name	= KERN_NGROUPS_MAX,
-- 
cgit v1.2.3


From 1fd7317d02ec03c6fdf072317841287933d06d24 Mon Sep 17 00:00:00 2001
From: Nick Black <dank@qemfd.net>
Date: Tue, 22 Sep 2009 16:43:33 -0700
Subject: Move magic numbers into magic.h

Move various magic-number definitions into magic.h.

Signed-off-by: Nick Black <dank@qemfd.net>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/devpts/inode.c          | 3 +--
 fs/hugetlbfs/inode.c       | 4 +---
 include/linux/magic.h      | 5 +++++
 net/socket.c               | 3 +--
 security/smack/smack_lsm.c | 8 +-------
 5 files changed, 9 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 75efb028974b..d5f8c96964be 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -18,14 +18,13 @@
 #include <linux/mount.h>
 #include <linux/tty.h>
 #include <linux/mutex.h>
+#include <linux/magic.h>
 #include <linux/idr.h>
 #include <linux/devpts_fs.h>
 #include <linux/parser.h>
 #include <linux/fsnotify.h>
 #include <linux/seq_file.h>
 
-#define DEVPTS_SUPER_MAGIC 0x1cd1
-
 #define DEVPTS_DEFAULT_MODE 0600
 /*
  * ptmx is a new node in /dev/pts and will be unused in legacy (single-
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 06b7c2623f99..eba6d552d9c9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -31,12 +31,10 @@
 #include <linux/statfs.h>
 #include <linux/security.h>
 #include <linux/ima.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 
-/* some random number */
-#define HUGETLBFS_MAGIC	0x958458f6
-
 static const struct super_operations hugetlbfs_ops;
 static const struct address_space_operations hugetlbfs_aops;
 const struct file_operations hugetlbfs_file_operations;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 1923327b9869..bce37786a0a5 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -13,6 +13,7 @@
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
 #define TMPFS_MAGIC		0x01021994
+#define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
@@ -53,4 +54,8 @@
 #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
 
 #define STACK_END_MAGIC		0x57AC6E9D
+
+#define DEVPTS_SUPER_MAGIC	0x1cd1
+#define SOCKFS_MAGIC		0x534F434B
+
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/net/socket.c b/net/socket.c
index 0ad02ae61a91..49917a1cac7d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -86,6 +86,7 @@
 #include <linux/audit.h>
 #include <linux/wireless.h>
 #include <linux/nsproxy.h>
+#include <linux/magic.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -235,8 +236,6 @@ int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr,
 	return __put_user(klen, ulen);
 }
 
-#define SOCKFS_MAGIC 0x534F434B
-
 static struct kmem_cache *sock_inode_cachep __read_mostly;
 
 static struct inode *sock_alloc_inode(struct super_block *sb)
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index acae7ef4092d..c33b6bb9b6dd 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -30,17 +30,11 @@
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
 #include <linux/audit.h>
+#include <linux/magic.h>
 #include "smack.h"
 
 #define task_security(task)	(task_cred_xxx((task), security))
 
-/*
- * I hope these are the hokeyist lines of code in the module. Casey.
- */
-#define DEVPTS_SUPER_MAGIC	0x1cd1
-#define SOCKFS_MAGIC		0x534F434B
-#define TMPFS_MAGIC		0x01021994
-
 /**
  * smk_fetch - Fetch the smack label from a file.
  * @ip: a pointer to the inode
-- 
cgit v1.2.3


From 54fdade1c3332391948ec43530c02c4794a38172 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Tue, 22 Sep 2009 16:43:39 -0700
Subject: generic-ipi: make struct call_function_data lockless

This patch can remove spinlock from struct call_function_data, the
reasons are below:

1: add a new interface for cpumask named cpumask_test_and_clear_cpu(),
   it can atomically test and clear specific cpu, we can use it instead
   of cpumask_test_cpu() and cpumask_clear_cpu() and no need data->lock
   to protect those in generic_smp_call_function_interrupt().

2: in smp_call_function_many(), after csd_lock() return, the current's
   cfd_data is deleted from call_function list, so it not have race
   between other cpus, then cfs_data is only used in
   smp_call_function_many() that must disable preemption and not from
   a hardware interrupthandler or from a bottom half handler to call,
   only the correspond cpu can use it, so it not have race in current
   cpu, no need cfs_data->lock to protect it.

3: after 1 and 2, cfs_data->lock is only use to protect cfs_data->refs in
   generic_smp_call_function_interrupt(), so we can define cfs_data->refs
   to atomic_t, and no need cfs_data->lock any more.

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
[akpm@linux-foundation.org: use atomic_dec_return()]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpumask.h | 12 ++++++++++++
 kernel/smp.c            | 29 ++++++++---------------------
 2 files changed, 20 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 796df12091b7..9b1d458aac6e 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -714,6 +714,18 @@ static inline int cpumask_test_and_set_cpu(int cpu, struct cpumask *cpumask)
 	return test_and_set_bit(cpumask_check(cpu), cpumask_bits(cpumask));
 }
 
+/**
+ * cpumask_test_and_clear_cpu - atomically test and clear a cpu in a cpumask
+ * @cpu: cpu number (< nr_cpu_ids)
+ * @cpumask: the cpumask pointer
+ *
+ * test_and_clear_bit wrapper for cpumasks.
+ */
+static inline int cpumask_test_and_clear_cpu(int cpu, struct cpumask *cpumask)
+{
+	return test_and_clear_bit(cpumask_check(cpu), cpumask_bits(cpumask));
+}
+
 /**
  * cpumask_setall - set all cpus (< nr_cpu_ids) in a cpumask
  * @dstp: the cpumask pointer
diff --git a/kernel/smp.c b/kernel/smp.c
index 8e218500ab14..fd47a256a24e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -29,8 +29,7 @@ enum {
 
 struct call_function_data {
 	struct call_single_data	csd;
-	spinlock_t		lock;
-	unsigned int		refs;
+	atomic_t		refs;
 	cpumask_var_t		cpumask;
 };
 
@@ -39,9 +38,7 @@ struct call_single_queue {
 	spinlock_t		lock;
 };
 
-static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
-	.lock			= __SPIN_LOCK_UNLOCKED(cfd_data.lock),
-};
+static DEFINE_PER_CPU(struct call_function_data, cfd_data);
 
 static int
 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
@@ -196,25 +193,18 @@ void generic_smp_call_function_interrupt(void)
 	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
 		int refs;
 
-		spin_lock(&data->lock);
-		if (!cpumask_test_cpu(cpu, data->cpumask)) {
-			spin_unlock(&data->lock);
+		if (!cpumask_test_and_clear_cpu(cpu, data->cpumask))
 			continue;
-		}
-		cpumask_clear_cpu(cpu, data->cpumask);
-		spin_unlock(&data->lock);
 
 		data->csd.func(data->csd.info);
 
-		spin_lock(&data->lock);
-		WARN_ON(data->refs == 0);
-		refs = --data->refs;
+		refs = atomic_dec_return(&data->refs);
+		WARN_ON(refs < 0);
 		if (!refs) {
 			spin_lock(&call_function.lock);
 			list_del_rcu(&data->csd.list);
 			spin_unlock(&call_function.lock);
 		}
-		spin_unlock(&data->lock);
 
 		if (refs)
 			continue;
@@ -419,23 +409,20 @@ void smp_call_function_many(const struct cpumask *mask,
 	data = &__get_cpu_var(cfd_data);
 	csd_lock(&data->csd);
 
-	spin_lock_irqsave(&data->lock, flags);
 	data->csd.func = func;
 	data->csd.info = info;
 	cpumask_and(data->cpumask, mask, cpu_online_mask);
 	cpumask_clear_cpu(this_cpu, data->cpumask);
-	data->refs = cpumask_weight(data->cpumask);
+	atomic_set(&data->refs, cpumask_weight(data->cpumask));
 
-	spin_lock(&call_function.lock);
+	spin_lock_irqsave(&call_function.lock, flags);
 	/*
 	 * Place entry at the _HEAD_ of the list, so that any cpu still
 	 * observing the entry in generic_smp_call_function_interrupt()
 	 * will not miss any other list entries:
 	 */
 	list_add_rcu(&data->csd.list, &call_function.queue);
-	spin_unlock(&call_function.lock);
-
-	spin_unlock_irqrestore(&data->lock, flags);
+	spin_unlock_irqrestore(&call_function.lock, flags);
 
 	/*
 	 * Make the list addition visible before sending the ipi.
-- 
cgit v1.2.3


From 88e9d34c727883d7d6f02cf1475b3ec98b8480c7 Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Tue, 22 Sep 2009 16:43:43 -0700
Subject: seq_file: constify seq_operations

Make all seq_operations structs const, to help mitigate against
revectoring user-triggerable function pointers.

This is derived from the grsecurity patch, although generated from scratch
because it's simpler than extracting the changes from there.

Signed-off-by: James Morris <jmorris@namei.org>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/mn10300/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/setup-common.c           |  2 +-
 arch/powerpc/platforms/pseries/hvCall_inst.c |  2 +-
 drivers/block/cciss.c                        |  2 +-
 drivers/char/misc.c                          |  2 +-
 drivers/char/tpm/tpm_bios.c                  |  4 ++--
 drivers/isdn/capi/kcapi_proc.c               | 10 +++++-----
 drivers/scsi/sg.c                            |  6 +++---
 fs/afs/proc.c                                |  8 ++++----
 fs/dlm/debug_fs.c                            | 12 ++++++------
 fs/jbd2/journal.c                            |  4 ++--
 fs/nfs/client.c                              |  4 ++--
 fs/nfsd/export.c                             |  2 +-
 fs/ocfs2/cluster/netdebug.c                  |  4 ++--
 fs/ocfs2/dlm/dlmdebug.c                      |  2 +-
 fs/proc/nommu.c                              |  2 +-
 include/linux/nfsd/nfsd.h                    |  2 +-
 ipc/util.c                                   |  2 +-
 kernel/cgroup.c                              |  2 +-
 kernel/kprobes.c                             |  2 +-
 kernel/lockdep_proc.c                        |  2 +-
 kernel/trace/ftrace.c                        |  4 ++--
 kernel/trace/trace.c                         |  4 ++--
 net/ipv6/ip6mr.c                             |  2 +-
 security/integrity/ima/ima_fs.c              |  4 ++--
 security/smack/smackfs.c                     |  6 +++---
 26 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
index 79890edfd67a..3f24c298a3af 100644
--- a/arch/mn10300/kernel/setup.c
+++ b/arch/mn10300/kernel/setup.c
@@ -285,7 +285,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start	= c_start,
 	.next	= c_next,
 	.stop	= c_stop,
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 02fed27af7f6..1d5570a1e456 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -328,7 +328,7 @@ static void c_stop(struct seq_file *m, void *v)
 {
 }
 
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
 	.start =c_start,
 	.next =	c_next,
 	.stop =	c_stop,
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index eae51ef9af24..3631a4f277eb 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -71,7 +71,7 @@ static int hc_show(struct seq_file *m, void *p)
 	return 0;
 }
 
-static struct seq_operations hcall_inst_seq_ops = {
+static const struct seq_operations hcall_inst_seq_ops = {
         .start = hc_start,
         .next  = hc_next,
         .stop  = hc_stop,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4f19105f755c..24c3e21ab263 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -363,7 +363,7 @@ static void cciss_seq_stop(struct seq_file *seq, void *v)
 	h->busy_configuring = 0;
 }
 
-static struct seq_operations cciss_seq_ops = {
+static const struct seq_operations cciss_seq_ops = {
 	.start = cciss_seq_start,
 	.show  = cciss_seq_show,
 	.next  = cciss_seq_next,
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index 1ee27cc23426..07fa612a58d5 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -91,7 +91,7 @@ static int misc_seq_show(struct seq_file *seq, void *v)
 }
 
 
-static struct seq_operations misc_seq_ops = {
+static const struct seq_operations misc_seq_ops = {
 	.start = misc_seq_start,
 	.next  = misc_seq_next,
 	.stop  = misc_seq_stop,
diff --git a/drivers/char/tpm/tpm_bios.c b/drivers/char/tpm/tpm_bios.c
index 0c2f55a38b95..bf2170fb1cdd 100644
--- a/drivers/char/tpm/tpm_bios.c
+++ b/drivers/char/tpm/tpm_bios.c
@@ -343,14 +343,14 @@ static int tpm_ascii_bios_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tpm_ascii_b_measurments_seqops = {
+static const struct seq_operations tpm_ascii_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
 	.show = tpm_ascii_bios_measurements_show,
 };
 
-static struct seq_operations tpm_binary_b_measurments_seqops = {
+static const struct seq_operations tpm_binary_b_measurments_seqops = {
 	.start = tpm_bios_measurements_start,
 	.next = tpm_bios_measurements_next,
 	.stop = tpm_bios_measurements_stop,
diff --git a/drivers/isdn/capi/kcapi_proc.c b/drivers/isdn/capi/kcapi_proc.c
index 50ed778f63fc..09d4db764d22 100644
--- a/drivers/isdn/capi/kcapi_proc.c
+++ b/drivers/isdn/capi/kcapi_proc.c
@@ -89,14 +89,14 @@ static int contrstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_controller_ops = {
+static const struct seq_operations seq_controller_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
 	.show	= controller_show,
 };
 
-static struct seq_operations seq_contrstats_ops = {
+static const struct seq_operations seq_contrstats_ops = {
 	.start	= controller_start,
 	.next	= controller_next,
 	.stop	= controller_stop,
@@ -194,14 +194,14 @@ applstats_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_applications_ops = {
+static const struct seq_operations seq_applications_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
 	.show	= applications_show,
 };
 
-static struct seq_operations seq_applstats_ops = {
+static const struct seq_operations seq_applstats_ops = {
 	.start	= applications_start,
 	.next	= applications_next,
 	.stop	= applications_stop,
@@ -264,7 +264,7 @@ static int capi_driver_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations seq_capi_driver_ops = {
+static const struct seq_operations seq_capi_driver_ops = {
 	.start	= capi_driver_start,
 	.next	= capi_driver_next,
 	.stop	= capi_driver_stop,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 4968c4ced385..848b59466850 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -2233,7 +2233,7 @@ static struct file_operations dev_fops = {
 	.open = sg_proc_open_dev,
 	.release = seq_release,
 };
-static struct seq_operations dev_seq_ops = {
+static const struct seq_operations dev_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2246,7 +2246,7 @@ static struct file_operations devstrs_fops = {
 	.open = sg_proc_open_devstrs,
 	.release = seq_release,
 };
-static struct seq_operations devstrs_seq_ops = {
+static const struct seq_operations devstrs_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
@@ -2259,7 +2259,7 @@ static struct file_operations debug_fops = {
 	.open = sg_proc_open_debug,
 	.release = seq_release,
 };
-static struct seq_operations debug_seq_ops = {
+static const struct seq_operations debug_seq_ops = {
 	.start = dev_seq_start,
 	.next  = dev_seq_next,
 	.stop  = dev_seq_stop,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 8630615e57fe..852739d262a9 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -28,7 +28,7 @@ static int afs_proc_cells_show(struct seq_file *m, void *v);
 static ssize_t afs_proc_cells_write(struct file *file, const char __user *buf,
 				    size_t size, loff_t *_pos);
 
-static struct seq_operations afs_proc_cells_ops = {
+static const struct seq_operations afs_proc_cells_ops = {
 	.start	= afs_proc_cells_start,
 	.next	= afs_proc_cells_next,
 	.stop	= afs_proc_cells_stop,
@@ -70,7 +70,7 @@ static void *afs_proc_cell_volumes_next(struct seq_file *p, void *v,
 static void afs_proc_cell_volumes_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_volumes_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_volumes_ops = {
+static const struct seq_operations afs_proc_cell_volumes_ops = {
 	.start	= afs_proc_cell_volumes_start,
 	.next	= afs_proc_cell_volumes_next,
 	.stop	= afs_proc_cell_volumes_stop,
@@ -95,7 +95,7 @@ static void *afs_proc_cell_vlservers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_vlservers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_vlservers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_vlservers_ops = {
+static const struct seq_operations afs_proc_cell_vlservers_ops = {
 	.start	= afs_proc_cell_vlservers_start,
 	.next	= afs_proc_cell_vlservers_next,
 	.stop	= afs_proc_cell_vlservers_stop,
@@ -119,7 +119,7 @@ static void *afs_proc_cell_servers_next(struct seq_file *p, void *v,
 static void afs_proc_cell_servers_stop(struct seq_file *p, void *v);
 static int afs_proc_cell_servers_show(struct seq_file *m, void *v);
 
-static struct seq_operations afs_proc_cell_servers_ops = {
+static const struct seq_operations afs_proc_cell_servers_ops = {
 	.start	= afs_proc_cell_servers_start,
 	.next	= afs_proc_cell_servers_next,
 	.stop	= afs_proc_cell_servers_stop,
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 1d1d27442235..1c8bb8c3a82e 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -386,9 +386,9 @@ static int table_seq_show(struct seq_file *seq, void *iter_ptr)
 	return rv;
 }
 
-static struct seq_operations format1_seq_ops;
-static struct seq_operations format2_seq_ops;
-static struct seq_operations format3_seq_ops;
+static const struct seq_operations format1_seq_ops;
+static const struct seq_operations format2_seq_ops;
+static const struct seq_operations format3_seq_ops;
 
 static void *table_seq_start(struct seq_file *seq, loff_t *pos)
 {
@@ -534,21 +534,21 @@ static void table_seq_stop(struct seq_file *seq, void *iter_ptr)
 	}
 }
 
-static struct seq_operations format1_seq_ops = {
+static const struct seq_operations format1_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format2_seq_ops = {
+static const struct seq_operations format2_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
 	.show  = table_seq_show,
 };
 
-static struct seq_operations format3_seq_ops = {
+static const struct seq_operations format3_seq_ops = {
 	.start = table_seq_start,
 	.next  = table_seq_next,
 	.stop  = table_seq_stop,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a8a358bc0f21..53b86e16e5fe 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -768,7 +768,7 @@ static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_history_ops = {
+static const struct seq_operations jbd2_seq_history_ops = {
 	.start  = jbd2_seq_history_start,
 	.next   = jbd2_seq_history_next,
 	.stop   = jbd2_seq_history_stop,
@@ -872,7 +872,7 @@ static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations jbd2_seq_info_ops = {
+static const struct seq_operations jbd2_seq_info_ops = {
 	.start  = jbd2_seq_info_start,
 	.next   = jbd2_seq_info_next,
 	.stop   = jbd2_seq_info_stop,
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a7ce15d3c248..152025358dad 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1531,7 +1531,7 @@ static void *nfs_server_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_server_list_stop(struct seq_file *p, void *v);
 static int nfs_server_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_server_list_ops = {
+static const struct seq_operations nfs_server_list_ops = {
 	.start	= nfs_server_list_start,
 	.next	= nfs_server_list_next,
 	.stop	= nfs_server_list_stop,
@@ -1552,7 +1552,7 @@ static void *nfs_volume_list_next(struct seq_file *p, void *v, loff_t *pos);
 static void nfs_volume_list_stop(struct seq_file *p, void *v);
 static int nfs_volume_list_show(struct seq_file *m, void *v);
 
-static struct seq_operations nfs_volume_list_ops = {
+static const struct seq_operations nfs_volume_list_ops = {
 	.start	= nfs_volume_list_start,
 	.next	= nfs_volume_list_next,
 	.stop	= nfs_volume_list_stop,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 984a5ebcc1d6..c1c9e035d4a4 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1517,7 +1517,7 @@ static int e_show(struct seq_file *m, void *p)
 	return svc_export_show(m, &svc_export_cache, cp);
 }
 
-struct seq_operations nfs_exports_op = {
+const struct seq_operations nfs_exports_op = {
 	.start	= e_start,
 	.next	= e_next,
 	.stop	= e_stop,
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index f8424874fa07..cfb2be708abe 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -163,7 +163,7 @@ static void nst_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations nst_seq_ops = {
+static const struct seq_operations nst_seq_ops = {
 	.start = nst_seq_start,
 	.next = nst_seq_next,
 	.stop = nst_seq_stop,
@@ -344,7 +344,7 @@ static void sc_seq_stop(struct seq_file *seq, void *v)
 {
 }
 
-static struct seq_operations sc_seq_ops = {
+static const struct seq_operations sc_seq_ops = {
 	.start = sc_seq_start,
 	.next = sc_seq_next,
 	.stop = sc_seq_stop,
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index df52f706f669..c5c88124096d 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -683,7 +683,7 @@ static int lockres_seq_show(struct seq_file *s, void *v)
 	return 0;
 }
 
-static struct seq_operations debug_lockres_ops = {
+static const struct seq_operations debug_lockres_ops = {
 	.start =	lockres_seq_start,
 	.stop =		lockres_seq_stop,
 	.next =		lockres_seq_next,
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 7e14d1a04001..9fe7d7ebe115 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -109,7 +109,7 @@ static void *nommu_region_list_next(struct seq_file *m, void *v, loff_t *pos)
 	return rb_next((struct rb_node *) v);
 }
 
-static struct seq_operations proc_nommu_region_list_seqop = {
+static const struct seq_operations proc_nommu_region_list_seqop = {
 	.start	= nommu_region_list_start,
 	.next	= nommu_region_list_next,
 	.stop	= nommu_region_list_stop,
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 03bbe9039104..510ffdd5020e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -60,7 +60,7 @@ extern spinlock_t		nfsd_drc_lock;
 extern unsigned int		nfsd_drc_max_mem;
 extern unsigned int		nfsd_drc_mem_used;
 
-extern struct seq_operations nfs_exports_op;
+extern const struct seq_operations nfs_exports_op;
 
 /*
  * Function prototypes.
diff --git a/ipc/util.c b/ipc/util.c
index b8e4ba92f6d1..79ce84e890f7 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -942,7 +942,7 @@ static int sysvipc_proc_show(struct seq_file *s, void *it)
 	return iface->show(s, it);
 }
 
-static struct seq_operations sysvipc_proc_seqops = {
+static const struct seq_operations sysvipc_proc_seqops = {
 	.start = sysvipc_proc_start,
 	.stop  = sysvipc_proc_stop,
 	.next  = sysvipc_proc_next,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 213b7f92fcdd..cd83d9933b6b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2314,7 +2314,7 @@ static int cgroup_tasks_show(struct seq_file *s, void *v)
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static struct seq_operations cgroup_tasks_seq_operations = {
+static const struct seq_operations cgroup_tasks_seq_operations = {
 	.start = cgroup_tasks_start,
 	.stop = cgroup_tasks_stop,
 	.next = cgroup_tasks_next,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index ef177d653b2c..cfadc1291d0b 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1321,7 +1321,7 @@ static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v)
 	return 0;
 }
 
-static struct seq_operations kprobes_seq_ops = {
+static const struct seq_operations kprobes_seq_ops = {
 	.start = kprobe_seq_start,
 	.next  = kprobe_seq_next,
 	.stop  = kprobe_seq_stop,
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4b3dbc79fdb..d4aba4f3584c 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -594,7 +594,7 @@ static int ls_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations lockstat_ops = {
+static const struct seq_operations lockstat_ops = {
 	.start	= ls_start,
 	.next	= ls_next,
 	.stop	= ls_stop,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c71e91bf7372..23df7771c937 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1520,7 +1520,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_ftrace_seq_ops = {
+static const struct seq_operations show_ftrace_seq_ops = {
 	.start = t_start,
 	.next = t_next,
 	.stop = t_stop,
@@ -2459,7 +2459,7 @@ static int g_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ftrace_graph_seq_ops = {
+static const struct seq_operations ftrace_graph_seq_ops = {
 	.start = g_start,
 	.next = g_next,
 	.stop = g_stop,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a35925d222ba..6c0f6a8a22eb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1949,7 +1949,7 @@ static int s_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations tracer_seq_ops = {
+static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
 	.stop		= s_stop,
@@ -2163,7 +2163,7 @@ static int t_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations show_traces_seq_ops = {
+static const struct seq_operations show_traces_seq_ops = {
 	.start		= t_start,
 	.next		= t_next,
 	.stop		= t_stop,
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 3907510c2ce3..090675e269ee 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -324,7 +324,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct seq_operations ipmr_mfc_seq_ops = {
+static const struct seq_operations ipmr_mfc_seq_ops = {
 	.start = ipmr_mfc_seq_start,
 	.next  = ipmr_mfc_seq_next,
 	.stop  = ipmr_mfc_seq_stop,
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 6bfc7eaebfda..8e9777b76405 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -146,7 +146,7 @@ static int ima_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_measurments_seqops = {
+static const struct seq_operations ima_measurments_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
@@ -221,7 +221,7 @@ static int ima_ascii_measurements_show(struct seq_file *m, void *v)
 	return 0;
 }
 
-static struct seq_operations ima_ascii_measurements_seqops = {
+static const struct seq_operations ima_ascii_measurements_seqops = {
 	.start = ima_measurements_start,
 	.next = ima_measurements_next,
 	.stop = ima_measurements_stop,
diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c
index f83a80980726..aeead7585093 100644
--- a/security/smack/smackfs.c
+++ b/security/smack/smackfs.c
@@ -187,7 +187,7 @@ static void load_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations load_seq_ops = {
+static const struct seq_operations load_seq_ops = {
 	.start = load_seq_start,
 	.next  = load_seq_next,
 	.show  = load_seq_show,
@@ -503,7 +503,7 @@ static void cipso_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations cipso_seq_ops = {
+static const struct seq_operations cipso_seq_ops = {
 	.start = cipso_seq_start,
 	.stop  = cipso_seq_stop,
 	.next  = cipso_seq_next,
@@ -697,7 +697,7 @@ static void netlbladdr_seq_stop(struct seq_file *s, void *v)
 	/* No-op */
 }
 
-static struct seq_operations netlbladdr_seq_ops = {
+static const struct seq_operations netlbladdr_seq_ops = {
 	.start = netlbladdr_seq_start,
 	.stop  = netlbladdr_seq_stop,
 	.next  = netlbladdr_seq_next,
-- 
cgit v1.2.3


From 02b51df1b07b4e9ca823c89284e704cadb323cd1 Mon Sep 17 00:00:00 2001
From: Scott James Remnant <scott@ubuntu.com>
Date: Tue, 22 Sep 2009 16:43:44 -0700
Subject: proc connector: add event for process becoming session leader

The act of a process becoming a session leader is a useful signal to a
supervising init daemon such as Upstart.

While a daemon will normally do this as part of the process of becoming a
daemon, it is rare for its children to do so.  When the children do, it is
nearly always a sign that the child should be considered detached from the
parent and not supervised along with it.

The poster-child example is OpenSSH; the per-login children call setsid()
so that they may control the pty connected to them.  If the primary daemon
dies or is restarted, we do not want to consider the per-login children
and want to respawn the primary daemon without killing the children.

This patch adds a new PROC_SID_EVENT and associated structure to the
proc_event event_data union, it arranges for this to be emitted when the
special PIDTYPE_SID pid is set.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Scott James Remnant <scott@ubuntu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Acked-by: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/connector/cn_proc.c | 25 +++++++++++++++++++++++++
 include/linux/cn_proc.h     | 10 ++++++++++
 kernel/exit.c               |  4 +++-
 3 files changed, 38 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 85e5dc0431fe..abf4a2529f80 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -139,6 +139,31 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
 }
 
+void proc_sid_connector(struct task_struct *task)
+{
+	struct cn_msg *msg;
+	struct proc_event *ev;
+	struct timespec ts;
+	__u8 buffer[CN_PROC_MSG_SIZE];
+
+	if (atomic_read(&proc_event_num_listeners) < 1)
+		return;
+
+	msg = (struct cn_msg *)buffer;
+	ev = (struct proc_event *)msg->data;
+	get_seq(&msg->seq, &ev->cpu);
+	ktime_get_ts(&ts); /* get high res monotonic timestamp */
+	put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
+	ev->what = PROC_EVENT_SID;
+	ev->event_data.sid.process_pid = task->pid;
+	ev->event_data.sid.process_tgid = task->tgid;
+
+	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
+	msg->ack = 0; /* not used */
+	msg->len = sizeof(*ev);
+	cn_netlink_send(msg, CN_IDX_PROC, GFP_KERNEL);
+}
+
 void proc_exit_connector(struct task_struct *task)
 {
 	struct cn_msg *msg;
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index b8125b2eb665..47dac5ea8d3a 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -52,6 +52,7 @@ struct proc_event {
 		PROC_EVENT_EXEC = 0x00000002,
 		PROC_EVENT_UID  = 0x00000004,
 		PROC_EVENT_GID  = 0x00000040,
+		PROC_EVENT_SID  = 0x00000080,
 		/* "next" should be 0x00000400 */
 		/* "last" is the last process event: exit */
 		PROC_EVENT_EXIT = 0x80000000
@@ -89,6 +90,11 @@ struct proc_event {
 			} e;
 		} id;
 
+		struct sid_proc_event {
+			__kernel_pid_t process_pid;
+			__kernel_pid_t process_tgid;
+		} sid;
+
 		struct exit_proc_event {
 			__kernel_pid_t process_pid;
 			__kernel_pid_t process_tgid;
@@ -102,6 +108,7 @@ struct proc_event {
 void proc_fork_connector(struct task_struct *task);
 void proc_exec_connector(struct task_struct *task);
 void proc_id_connector(struct task_struct *task, int which_id);
+void proc_sid_connector(struct task_struct *task);
 void proc_exit_connector(struct task_struct *task);
 #else
 static inline void proc_fork_connector(struct task_struct *task)
@@ -114,6 +121,9 @@ static inline void proc_id_connector(struct task_struct *task,
 				     int which_id)
 {}
 
+static inline void proc_sid_connector(struct task_struct *task)
+{}
+
 static inline void proc_exit_connector(struct task_struct *task)
 {}
 #endif	/* CONFIG_PROC_EVENTS */
diff --git a/kernel/exit.c b/kernel/exit.c
index e47ee8a06135..61bb1761c7b8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -359,8 +359,10 @@ void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
 
-	if (task_session(curr) != pid)
+	if (task_session(curr) != pid) {
 		change_pid(curr, PIDTYPE_SID, pid);
+		proc_sid_connector(curr);
+	}
 
 	if (task_pgrp(curr) != pid)
 		change_pid(curr, PIDTYPE_PGID, pid);
-- 
cgit v1.2.3


From 70867453092297be9afb2249e712a1f960ec0a09 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 22 Sep 2009 16:43:46 -0700
Subject: printk_once(): use bool for boolean flag

Using the type bool (instead of int) for the __print_once flag in the
printk_once() macro matches the intent of the code better, and allows the
compiler to generate smaller code; eg a typical callsite with gcc 4.3.3 on
i386:

add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-6 (-6)
function                                     old     new   delta
static.__print_once                            4       1      -3
get_cpu_vendor                               146     143      -3

Saving 6 bytes of object size per callsite by slightly improving the
readability of the source seems like a win to me.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 0a2a19087863..55723afa097b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -252,10 +252,10 @@ extern int printk_delay_msec;
  * Print a one-time message (analogous to WARN_ONCE() et al):
  */
 #define printk_once(x...) ({			\
-	static int __print_once = 1;		\
+	static bool __print_once = true;	\
 						\
 	if (__print_once) {			\
-		__print_once = 0;		\
+		__print_once = false;		\
 		printk(x);			\
 	}					\
 })
-- 
cgit v1.2.3


From 8c87df457cb58fe75b9b893007917cf8095660a0 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Tue, 22 Sep 2009 16:43:52 -0700
Subject: BUILD_BUG_ON(): fix it and a couple of bogus uses of it

gcc permitting variable length arrays makes the current construct used for
BUILD_BUG_ON() useless, as that doesn't produce any diagnostic if the
controlling expression isn't really constant.  Instead, this patch makes
it so that a bit field gets used here.  Consequently, those uses where the
condition isn't really constant now also need fixing.

Note that in the gfp.h, kmemcheck.h, and virtio_config.h cases
MAYBE_BUILD_BUG_ON() really just serves documentation purposes - even if
the expression is compile time constant (__builtin_constant_p() yields
true), the array is still deemed of variable length by gcc, and hence the
whole expression doesn't have the intended effect.

[akpm@linux-foundation.org: make arch/sparc/include/asm/vio.h compile]
[akpm@linux-foundation.org: more nonsensical assertions in tpm.c..]
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rajiv Andrade <srajiv@linux.vnet.ibm.com>
Cc: Mimi Zohar <zohar@us.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/include/asm/vio.h  | 2 +-
 drivers/char/tpm/tpm.c        | 4 ++--
 drivers/net/niu.c             | 2 +-
 include/linux/gfp.h           | 2 +-
 include/linux/kernel.h        | 8 ++++++--
 include/linux/kmemcheck.h     | 2 +-
 include/linux/virtio_config.h | 3 +--
 7 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h
index d4de32f0f8af..6cdbf7e7351d 100644
--- a/arch/sparc/include/asm/vio.h
+++ b/arch/sparc/include/asm/vio.h
@@ -258,7 +258,7 @@ static inline void *vio_dring_entry(struct vio_dring_state *dr,
 static inline u32 vio_dring_avail(struct vio_dring_state *dr,
 				  unsigned int ring_size)
 {
-	BUILD_BUG_ON(!is_power_of_2(ring_size));
+	MAYBE_BUILD_BUG_ON(!is_power_of_2(ring_size));
 
 	return (dr->pending -
 		((dr->prod - dr->cons) & (ring_size - 1)));
diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c
index b0603b2e5684..32b957efa420 100644
--- a/drivers/char/tpm/tpm.c
+++ b/drivers/char/tpm/tpm.c
@@ -696,7 +696,7 @@ int __tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, u8 *res_buf)
 
 	cmd.header.in = pcrread_header;
 	cmd.params.pcrread_in.pcr_idx = cpu_to_be32(pcr_idx);
-	BUILD_BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
+	BUG_ON(cmd.header.in.length > READ_PCR_RESULT_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
 			  "attempting to read a pcr value");
 
@@ -760,7 +760,7 @@ int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash)
 		return -ENODEV;
 
 	cmd.header.in = pcrextend_header;
-	BUILD_BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
+	BUG_ON(be32_to_cpu(cmd.header.in.length) > EXTEND_PCR_SIZE);
 	cmd.params.pcrextend_in.pcr_idx = cpu_to_be32(pcr_idx);
 	memcpy(cmd.params.pcrextend_in.hash, hash, TPM_DIGEST_SIZE);
 	rc = transmit_cmd(chip, &cmd, cmd.header.in.length,
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 76cc2614f480..f9364d0678f2 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -5615,7 +5615,7 @@ static void niu_init_tx_mac(struct niu *np)
 	/* The XMAC_MIN register only accepts values for TX min which
 	 * have the low 3 bits cleared.
 	 */
-	BUILD_BUG_ON(min & 0x7);
+	BUG_ON(min & 0x7);
 
 	if (np->flags & NIU_FLAGS_XMAC)
 		niu_init_tx_xmac(np, min, max);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index f53e9b868c26..557bdad320b6 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -220,7 +220,7 @@ static inline enum zone_type gfp_zone(gfp_t flags)
 					 ((1 << ZONES_SHIFT) - 1);
 
 	if (__builtin_constant_p(bit))
-		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+		MAYBE_BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
 	else {
 #ifdef CONFIG_DEBUG_VM
 		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 55723afa097b..63dcaece1ac5 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -678,13 +678,17 @@ struct sysinfo {
 };
 
 /* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition))
+
+/* Force a compilation error if condition is constant and true */
+#define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
    aren't permitted). */
-#define BUILD_BUG_ON_ZERO(e) (sizeof(char[1 - 2 * !!(e)]) - 1)
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); }))
 
 /* Trap pasters of __FUNCTION__ at compile-time */
 #define __FUNCTION__ (__func__)
diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index 136cdcdf92ef..e880d4cf9e22 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -152,7 +152,7 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 									\
 		_n = (long) &((ptr)->name##_end)			\
 			- (long) &((ptr)->name##_begin);		\
-		BUILD_BUG_ON(_n < 0);					\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
 									\
 		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
 	} while (0)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e547e3c8ee9a..0093dd7c1d6f 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,8 +109,7 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 				      unsigned int fbit)
 {
 	/* Did you forget to fix assumptions on max features? */
-	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+	MAYBE_BUILD_BUG_ON(fbit >= 32);
 
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
-- 
cgit v1.2.3


From 562787a5c32ccdf182de27793a83a9f2ee86cd77 Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Tue, 22 Sep 2009 16:43:57 -0700
Subject: anonfd: split interface into file creation and install

Split the anonfd interface into a bare file pointer creation one, and a
file pointer creation plus install one.

There are cases, like the usage of eventfds inside other kernel
interfaces, where the file pointer created by anonfd needs to be used
inside the initialization of other structures.

As it is right now, as soon as anon_inode_getfd() returns, the kenrle can
race with userspace closing the newly installed file descriptor.

This patch, while keeping the old anon_inode_getfd(), introduces a new
anon_inode_getfile() (whose services are reused in anon_inode_getfd())
that allows to split the file creation phase and the fd install one.

Once all the kernel structures are initialized, the code can call the
proper fd_install().

Gregory manifested the need for something like this inside KVM.

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: James Morris <jmorris@namei.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Gregory Haskins <ghaskins@novell.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Acked-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/anon_inodes.c            | 68 +++++++++++++++++++++++++++++++++------------
 fs/eventfd.c                | 67 +++++++++++++++++++++++++++++++++++---------
 include/linux/anon_inodes.h |  3 ++
 include/linux/eventfd.h     |  6 ++++
 4 files changed, 114 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 47d4a01c5393..d11c51fc2a3f 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -77,28 +77,24 @@ static const struct address_space_operations anon_aops = {
  *
  * Creates a new file by hooking it on a single inode. This is useful for files
  * that do not need to have a full-fledged inode in order to operate correctly.
- * All the files created with anon_inode_getfd() will share a single inode,
+ * All the files created with anon_inode_getfile() will share a single inode,
  * hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.  Returns new descriptor or -error.
+ * setup.  Returns the newly created file* or an error pointer.
  */
-int anon_inode_getfd(const char *name, const struct file_operations *fops,
-		     void *priv, int flags)
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags)
 {
 	struct qstr this;
 	struct dentry *dentry;
 	struct file *file;
-	int error, fd;
+	int error;
 
 	if (IS_ERR(anon_inode_inode))
-		return -ENODEV;
+		return ERR_PTR(-ENODEV);
 
 	if (fops->owner && !try_module_get(fops->owner))
-		return -ENOENT;
-
-	error = get_unused_fd_flags(flags);
-	if (error < 0)
-		goto err_module;
-	fd = error;
+		return ERR_PTR(-ENOENT);
 
 	/*
 	 * Link the inode to a directory entry by creating a unique name
@@ -110,7 +106,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	this.hash = 0;
 	dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
 	if (!dentry)
-		goto err_put_unused_fd;
+		goto err_module;
 
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
@@ -136,16 +132,54 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
 	file->f_version = 0;
 	file->private_data = priv;
 
+	return file;
+
+err_dput:
+	dput(dentry);
+err_module:
+	module_put(fops->owner);
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile);
+
+/**
+ * anon_inode_getfd - creates a new file instance by hooking it up to an
+ *                    anonymous inode, and a dentry that describe the "class"
+ *                    of the file
+ *
+ * @name:    [in]    name of the "class" of the new file
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
+ *
+ * Creates a new file by hooking it on a single inode. This is useful for files
+ * that do not need to have a full-fledged inode in order to operate correctly.
+ * All the files created with anon_inode_getfd() will share a single inode,
+ * hence saving memory and avoiding code duplication for the file/inode/dentry
+ * setup.  Returns new descriptor or an error code.
+ */
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+		     void *priv, int flags)
+{
+	int error, fd;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = anon_inode_getfile(name, fops, priv, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
 	fd_install(fd, file);
 
 	return fd;
 
-err_dput:
-	dput(dentry);
 err_put_unused_fd:
 	put_unused_fd(fd);
-err_module:
-	module_put(fops->owner);
 	return error;
 }
 EXPORT_SYMBOL_GPL(anon_inode_getfd);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 31d12de83a2a..8b47e4200e65 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -68,11 +68,16 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n)
 }
 EXPORT_SYMBOL_GPL(eventfd_signal);
 
+static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+{
+	kfree(ctx);
+}
+
 static void eventfd_free(struct kref *kref)
 {
 	struct eventfd_ctx *ctx = container_of(kref, struct eventfd_ctx, kref);
 
-	kfree(ctx);
+	eventfd_free_ctx(ctx);
 }
 
 /**
@@ -298,9 +303,23 @@ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_fileget);
 
-SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+/**
+ * eventfd_file_create - Creates an eventfd file pointer.
+ * @count: Initial eventfd counter value.
+ * @flags: Flags for the eventfd file.
+ *
+ * This function creates an eventfd file pointer, w/out installing it into
+ * the fd table. This is useful when the eventfd file is used during the
+ * initialization of data structures that require extra setup after the eventfd
+ * creation. So the eventfd creation is split into the file pointer creation
+ * phase, and the file descriptor installation phase.
+ * In this way races with userspace closing the newly installed file descriptor
+ * can be avoided.
+ * Returns an eventfd file pointer, or a proper error pointer.
+ */
+struct file *eventfd_file_create(unsigned int count, int flags)
 {
-	int fd;
+	struct file *file;
 	struct eventfd_ctx *ctx;
 
 	/* Check the EFD_* constants for consistency.  */
@@ -308,26 +327,48 @@ SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
 	BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
 
 	if (flags & ~EFD_FLAGS_SET)
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	kref_init(&ctx->kref);
 	init_waitqueue_head(&ctx->wqh);
 	ctx->count = count;
 	ctx->flags = flags;
 
-	/*
-	 * When we call this, the initialization must be complete, since
-	 * anon_inode_getfd() will install the fd.
-	 */
-	fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
-			      flags & EFD_SHARED_FCNTL_FLAGS);
-	if (fd < 0)
-		kfree(ctx);
+	file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
+				  flags & EFD_SHARED_FCNTL_FLAGS);
+	if (IS_ERR(file))
+		eventfd_free_ctx(ctx);
+
+	return file;
+}
+
+SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
+{
+	int fd, error;
+	struct file *file;
+
+	error = get_unused_fd_flags(flags & EFD_SHARED_FCNTL_FLAGS);
+	if (error < 0)
+		return error;
+	fd = error;
+
+	file = eventfd_file_create(count, flags);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto err_put_unused_fd;
+	}
+	fd_install(fd, file);
+
 	return fd;
+
+err_put_unused_fd:
+	put_unused_fd(fd);
+
+	return error;
 }
 
 SYSCALL_DEFINE1(eventfd, unsigned int, count)
diff --git a/include/linux/anon_inodes.h b/include/linux/anon_inodes.h
index e0a0cdc2da43..69a21e0ebd33 100644
--- a/include/linux/anon_inodes.h
+++ b/include/linux/anon_inodes.h
@@ -8,6 +8,9 @@
 #ifndef _LINUX_ANON_INODES_H
 #define _LINUX_ANON_INODES_H
 
+struct file *anon_inode_getfile(const char *name,
+				const struct file_operations *fops,
+				void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
 		     void *priv, int flags);
 
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 3b85ba6479f4..94dd10366a78 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -27,6 +27,7 @@
 
 #ifdef CONFIG_EVENTFD
 
+struct file *eventfd_file_create(unsigned int count, int flags);
 struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx);
 void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
@@ -40,6 +41,11 @@ int eventfd_signal(struct eventfd_ctx *ctx, int n);
  * Ugly ugly ugly error layer to support modules that uses eventfd but
  * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO.
  */
+static inline struct file *eventfd_file_create(unsigned int count, int flags)
+{
+	return ERR_PTR(-ENOSYS);
+}
+
 static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
 {
 	return ERR_PTR(-ENOSYS);
-- 
cgit v1.2.3


From a49c59c042c63b432307c1bbf7dac5a104c786e6 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Tue, 22 Sep 2009 16:44:03 -0700
Subject: Make sure the value in abs() does not get truncated if it is greater
 than 2^32

abs() will truncate the input if is it outside the 2^32 range.  Fix that
by assuming `long' input.

This might generate worse code in the common case.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 63dcaece1ac5..d3cd23f30039 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -146,7 +146,7 @@ extern int _cond_resched(void);
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
 
 #define abs(x) ({				\
-		int __x = (x);			\
+		long __x = (x);			\
 		(__x < 0) ? -__x : __x;		\
 	})
 
-- 
cgit v1.2.3


From b28cfd2c0616e1b42acc6ee3c77ef6cc3873c510 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Tue, 22 Sep 2009 16:44:05 -0700
Subject: kmap_types.h: rename D macro

I tend to use a 'D' debugging macro a lot during debugging.  When I define
it before includes I often get conflicts with kmap_types.h's use of 'D'
too.  It's not very nice when a global include pollutes the name space
like this.

Rename the kmap_types.h D to KMAP_D.  It is only used temporarily in the
header so has no effect on anything else.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/kmap_types.h | 47 ++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h
index eddbce0f9fb9..e5f234a08540 100644
--- a/include/asm-generic/kmap_types.h
+++ b/include/asm-generic/kmap_types.h
@@ -2,34 +2,35 @@
 #define _ASM_GENERIC_KMAP_TYPES_H
 
 #ifdef __WITH_KM_FENCE
-# define D(n) __KM_FENCE_##n ,
+# define KMAP_D(n) __KM_FENCE_##n ,
 #else
-# define D(n)
+# define KMAP_D(n)
 #endif
 
 enum km_type {
-D(0)	KM_BOUNCE_READ,
-D(1)	KM_SKB_SUNRPC_DATA,
-D(2)	KM_SKB_DATA_SOFTIRQ,
-D(3)	KM_USER0,
-D(4)	KM_USER1,
-D(5)	KM_BIO_SRC_IRQ,
-D(6)	KM_BIO_DST_IRQ,
-D(7)	KM_PTE0,
-D(8)	KM_PTE1,
-D(9)	KM_IRQ0,
-D(10)	KM_IRQ1,
-D(11)	KM_SOFTIRQ0,
-D(12)	KM_SOFTIRQ1,
-D(13)	KM_SYNC_ICACHE,
-D(14)	KM_SYNC_DCACHE,
-D(15)	KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */
-D(16)	KM_IRQ_PTE,
-D(17)	KM_NMI,
-D(18)	KM_NMI_PTE,
-D(19)	KM_TYPE_NR
+KMAP_D(0)	KM_BOUNCE_READ,
+KMAP_D(1)	KM_SKB_SUNRPC_DATA,
+KMAP_D(2)	KM_SKB_DATA_SOFTIRQ,
+KMAP_D(3)	KM_USER0,
+KMAP_D(4)	KM_USER1,
+KMAP_D(5)	KM_BIO_SRC_IRQ,
+KMAP_D(6)	KM_BIO_DST_IRQ,
+KMAP_D(7)	KM_PTE0,
+KMAP_D(8)	KM_PTE1,
+KMAP_D(9)	KM_IRQ0,
+KMAP_D(10)	KM_IRQ1,
+KMAP_D(11)	KM_SOFTIRQ0,
+KMAP_D(12)	KM_SOFTIRQ1,
+KMAP_D(13)	KM_SYNC_ICACHE,
+KMAP_D(14)	KM_SYNC_DCACHE,
+/* UML specific, for copy_*_user - used in do_op_one_page */
+KMAP_D(15)	KM_UML_USERCOPY,
+KMAP_D(16)	KM_IRQ_PTE,
+KMAP_D(17)	KM_NMI,
+KMAP_D(18)	KM_NMI_PTE,
+KMAP_D(19)	KM_TYPE_NR
 };
 
-#undef D
+#undef KMAP_D
 
 #endif
-- 
cgit v1.2.3


From 1f10206cf8e945220f7220a809d8bfc15c21f9a5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 22 Sep 2009 16:44:10 -0700
Subject: getrusage: fill ru_maxrss value

Make ->ru_maxrss value in struct rusage filled accordingly to rss hiwater
mark.  This struct is filled as a parameter to getrusage syscall.
->ru_maxrss value is set to KBs which is the way it is done in BSD
systems.  /usr/bin/time (gnu time) application converts ->ru_maxrss to KBs
which seems to be incorrect behavior.  Maintainer of this util was
notified by me with the patch which corrects it and cc'ed.

To make this happen we extend struct signal_struct by two fields.  The
first one is ->maxrss which we use to store rss hiwater of the task.  The
second one is ->cmaxrss which we use to store highest rss hiwater of all
task childs.  These values are used in k_getrusage() to actually fill
->ru_maxrss.  k_getrusage() uses current rss hiwater value directly if mm
struct exists.

Note:
exec() clear mm->hiwater_rss, but doesn't clear sig->maxrss.
it is intetionally behavior. *BSD getrusage have exec() inheriting.

test programs
========================================================

getrusage.c
===========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"

 #define err(str) perror(str), exit(1)

int main(int argc, char** argv)
{
	int status;

	printf("allocate 100MB\n");
	consume(100);

	printf("testcase1: fork inherit? \n");
	printf("  expect: initial.self ~= child.self\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase2: fork inherit? (cont.) \n");
	printf("  expect: initial.children ~= 100MB, but child.children = 0\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		show_rusage("child");
		_exit(0);
	}
	printf("\n");

	printf("testcase3: fork + malloc \n");
	printf("  expect: child.self ~= initial.self + 50MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
	} else {
		printf("allocate +50MB\n");
		consume(50);
		show_rusage("fork child");
		_exit(0);
	}
	printf("\n");

	printf("testcase4: grandchild maxrss\n");
	printf("  expect: post_wait.children ~= 300MB\n");
	show_rusage("initial");
	if (__fork()) {
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 0 -g 300");
		_exit(0);
	}
	printf("\n");

	printf("testcase5: zombie\n");
	printf("  expect: pre_wait ~= initial, IOW the zombie process is not accounted.\n");
	printf("          post_wait ~= 400MB, IOW wait() collect child's max_rss. \n");
	show_rusage("initial");
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("pre_wait");
		wait(&status);
		show_rusage("post_wait");
	} else {
		system("./child -n 400");
		_exit(0);
	}
	printf("\n");

	printf("testcase6: SIG_IGN\n");
	printf("  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).\n");
	show_rusage("initial");
	signal(SIGCHLD, SIG_IGN);
	if (__fork()) {
		sleep(1); /* children become zombie */
		show_rusage("after_zombie");
	} else {
		system("./child -n 500");
		_exit(0);
	}
	printf("\n");
	signal(SIGCHLD, SIG_DFL);

	printf("testcase7: exec (without fork) \n");
	printf("  expect: initial ~= exec \n");
	show_rusage("initial");
	execl("./child", "child", "-v", NULL);

	return 0;
}

child.c
=======
 #include <sys/types.h>
 #include <unistd.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>

 #include "common.h"

int main(int argc, char** argv)
{
	int status;
	int c;
	long consume_size = 0;
	long grandchild_consume_size = 0;
	int show = 0;

	while ((c = getopt(argc, argv, "n:g:v")) != -1) {
		switch (c) {
		case 'n':
			consume_size = atol(optarg);
			break;
		case 'v':
			show = 1;
			break;
		case 'g':

			grandchild_consume_size = atol(optarg);
			break;
		default:
			break;
		}
	}

	if (show)
		show_rusage("exec");

	if (consume_size) {
		printf("child alloc %ldMB\n", consume_size);
		consume(consume_size);
	}

	if (grandchild_consume_size) {
		if (fork()) {
			wait(&status);
		} else {
			printf("grandchild alloc %ldMB\n", grandchild_consume_size);
			consume(grandchild_consume_size);

			exit(0);
		}
	}

	return 0;
}

common.c
========
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <unistd.h>
 #include <signal.h>
 #include <sys/mman.h>

 #include "common.h"
 #define err(str) perror(str), exit(1)

void show_rusage(char *prefix)
{
    	int err, err2;
    	struct rusage rusage_self;
    	struct rusage rusage_children;

    	printf("%s: ", prefix);
    	err = getrusage(RUSAGE_SELF, &rusage_self);
    	if (!err)
    		printf("self %ld ", rusage_self.ru_maxrss);
    	err2 = getrusage(RUSAGE_CHILDREN, &rusage_children);
    	if (!err2)
    		printf("children %ld ", rusage_children.ru_maxrss);

    	printf("\n");
}

/* Some buggy OS need this worthless CPU waste. */
void make_pagefault(void)
{
	void *addr;
	int size = getpagesize();
	int i;

	for (i=0; i<1000; i++) {
		addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
		if (addr == MAP_FAILED)
			err("make_pagefault");
		memset(addr, 0, size);
		munmap(addr, size);
	}
}

void consume(int mega)
{
    	size_t sz = mega * 1024 * 1024;
    	void *ptr;

    	ptr = malloc(sz);
    	memset(ptr, 0, sz);
	make_pagefault();
}

pid_t __fork(void)
{
	pid_t pid;

	pid = fork();
	make_pagefault();

	return pid;
}

common.h
========
void show_rusage(char *prefix);
void make_pagefault(void);
void consume(int mega);
pid_t __fork(void);

FreeBSD result (expected result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 103492 children 0
fork child: self 103540 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 103540 children 103540
child: self 103564 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 103564 children 103564
allocate +50MB
fork child: self 154860 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 103564 children 154860
grandchild alloc 300MB
post_wait: self 103564 children 308720

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 103564 children 308720
child alloc 400MB
pre_wait: self 103564 children 308720
post_wait: self 103564 children 411312

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 103564 children 411312
child alloc 500MB
after_zombie: self 103624 children 411312

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 103624 children 411312
exec: self 103624 children 411312

Linux result (actual test result)
========================================================
allocate 100MB
testcase1: fork inherit?
  expect: initial.self ~= child.self
initial: self 102848 children 0
fork child: self 102572 children 0

testcase2: fork inherit? (cont.)
  expect: initial.children ~= 100MB, but child.children = 0
initial: self 102876 children 102644
child: self 102572 children 0

testcase3: fork + malloc
  expect: child.self ~= initial.self + 50MB
initial: self 102876 children 102644
allocate +50MB
fork child: self 153804 children 0

testcase4: grandchild maxrss
  expect: post_wait.children ~= 300MB
initial: self 102876 children 153864
grandchild alloc 300MB
post_wait: self 102876 children 307536

testcase5: zombie
  expect: pre_wait ~= initial, IOW the zombie process is not accounted.
          post_wait ~= 400MB, IOW wait() collect child's max_rss.
initial: self 102876 children 307536
child alloc 400MB
pre_wait: self 102876 children 307536
post_wait: self 102876 children 410076

testcase6: SIG_IGN
  expect: initial ~= after_zombie (child's 500MB alloc should be ignored).
initial: self 102876 children 410076
child alloc 500MB
after_zombie: self 102880 children 410076

testcase7: exec (without fork)
  expect: initial ~= exec
initial: self 102880 children 410076
exec: self 102880 children 410076

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c             |  3 +++
 include/linux/sched.h | 10 ++++++++++
 kernel/exit.c         |  6 ++++++
 kernel/fork.c         |  1 +
 kernel/sys.c          | 14 ++++++++++++++
 5 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 434dba778ccc..69bb9d899791 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -845,6 +845,9 @@ static int de_thread(struct task_struct *tsk)
 	sig->notify_count = 0;
 
 no_thread_group:
+	if (current->mm)
+		setmax_mm_hiwater_rss(&sig->maxrss, current->mm);
+
 	exit_itimers(sig);
 	flush_itimer_signals();
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 97b10da0a3ea..6448bbc6406b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -426,6 +426,15 @@ static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
 	return max(mm->hiwater_rss, get_mm_rss(mm));
 }
 
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+					 struct mm_struct *mm)
+{
+	unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+	if (*maxrss < hiwater_rss)
+		*maxrss = hiwater_rss;
+}
+
 static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
 {
 	return max(mm->hiwater_vm, mm->total_vm);
@@ -612,6 +621,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long maxrss, cmaxrss;
 	struct task_io_accounting ioac;
 
 	/*
diff --git a/kernel/exit.c b/kernel/exit.c
index 61bb1761c7b8..60d6fdcc9265 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -947,6 +947,8 @@ NORET_TYPE void do_exit(long code)
 	if (group_dead) {
 		hrtimer_cancel(&tsk->signal->real_timer);
 		exit_itimers(tsk->signal);
+		if (tsk->mm)
+			setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
 	}
 	acct_collect(code, group_dead);
 	if (group_dead)
@@ -1210,6 +1212,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	if (likely(!traced) && likely(!task_detached(p))) {
 		struct signal_struct *psig;
 		struct signal_struct *sig;
+		unsigned long maxrss;
 
 		/*
 		 * The resource counters for the group leader are in its
@@ -1258,6 +1261,9 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+		maxrss = max(sig->maxrss, sig->cmaxrss);
+		if (psig->cmaxrss < maxrss)
+			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
diff --git a/kernel/fork.c b/kernel/fork.c
index 1020977b57ca..7cf45812ce84 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -866,6 +866,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+	sig->maxrss = sig->cmaxrss = 0;
 	task_io_accounting_init(&sig->ioac);
 	sig->sum_sched_runtime = 0;
 	taskstats_tgid_init(sig);
diff --git a/kernel/sys.c b/kernel/sys.c
index ea5c3bcac881..ebcb15611728 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1338,6 +1338,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 	unsigned long flags;
 	cputime_t utime, stime;
 	struct task_cputime cputime;
+	unsigned long maxrss = 0;
 
 	memset((char *) r, 0, sizeof *r);
 	utime = stime = cputime_zero;
@@ -1346,6 +1347,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 		utime = task_utime(current);
 		stime = task_stime(current);
 		accumulate_thread_rusage(p, r);
+		maxrss = p->signal->maxrss;
 		goto out;
 	}
 
@@ -1363,6 +1365,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt = p->signal->cmaj_flt;
 			r->ru_inblock = p->signal->cinblock;
 			r->ru_oublock = p->signal->coublock;
+			maxrss = p->signal->cmaxrss;
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -1377,6 +1380,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 			r->ru_majflt += p->signal->maj_flt;
 			r->ru_inblock += p->signal->inblock;
 			r->ru_oublock += p->signal->oublock;
+			if (maxrss < p->signal->maxrss)
+				maxrss = p->signal->maxrss;
 			t = p;
 			do {
 				accumulate_thread_rusage(t, r);
@@ -1392,6 +1397,15 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
 out:
 	cputime_to_timeval(utime, &r->ru_utime);
 	cputime_to_timeval(stime, &r->ru_stime);
+
+	if (who != RUSAGE_CHILDREN) {
+		struct mm_struct *mm = get_task_mm(p);
+		if (mm) {
+			setmax_mm_hiwater_rss(&maxrss, mm);
+			mmput(mm);
+		}
+	}
+	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
 }
 
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
-- 
cgit v1.2.3


From 00afe029aab03bd95eba210b5e74a252017c4692 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Tue, 22 Sep 2009 16:44:14 -0700
Subject: asm/sections: add text/data checking functions for arches to override

Some ports (like the Blackfin arch) have a discontiguous memory map which
means there may be text or data that falls outside of the standard range
of the start/end text/data symbols.  Creating some helper functions allows
these non-standard ports to declare these regions without adversely
affecting anyone else.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Robin Getz <rgetz@blackfin.uclinux.org>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/sections.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index d083561337f2..b3bfabc258f3 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -23,4 +23,20 @@ extern char __ctors_start[], __ctors_end[];
 #define dereference_function_descriptor(p) (p)
 #endif
 
+/* random extra sections (if any).  Override
+ * in asm/sections.h */
+#ifndef arch_is_kernel_text
+static inline int arch_is_kernel_text(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
+#ifndef arch_is_kernel_data
+static inline int arch_is_kernel_data(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
-- 
cgit v1.2.3


From 8ea926b22e2d13238e4d65d8f61c48fe424e6f4f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:29 -0700
Subject: mmc: add 'enable' and 'disable' methods to mmc host

MMC hosts that support power saving can use the 'enable' and 'disable'
methods to exit and enter power saving states.  An explanation of their
use is provided in the comments added to include/linux/mmc/host.h.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 177 +++++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/core/host.c  |   1 +
 drivers/mmc/core/host.h  |   2 +
 include/linux/mmc/host.h |  47 +++++++++++++
 4 files changed, 221 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index e22d2b5576ec..fb24a096dba8 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -343,6 +343,101 @@ unsigned int mmc_align_data_size(struct mmc_card *card, unsigned int sz)
 }
 EXPORT_SYMBOL(mmc_align_data_size);
 
+/**
+ *	mmc_host_enable - enable a host.
+ *	@host: mmc host to enable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_enable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (host->nesting_cnt++)
+		return 0;
+
+	cancel_delayed_work_sync(&host->disable);
+
+	if (host->enabled)
+		return 0;
+
+	if (host->ops->enable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->enable(host);
+		host->en_dis_recurs = 0;
+
+		if (err) {
+			pr_debug("%s: enable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+	}
+	host->enabled = 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_host_enable);
+
+static int mmc_host_do_disable(struct mmc_host *host, int lazy)
+{
+	if (host->ops->disable) {
+		int err;
+
+		host->en_dis_recurs = 1;
+		err = host->ops->disable(host, lazy);
+		host->en_dis_recurs = 0;
+
+		if (err < 0) {
+			pr_debug("%s: disable error %d\n",
+				 mmc_hostname(host), err);
+			return err;
+		}
+		if (err > 0) {
+			unsigned long delay = msecs_to_jiffies(err);
+
+			mmc_schedule_delayed_work(&host->disable, delay);
+		}
+	}
+	host->enabled = 0;
+	return 0;
+}
+
+/**
+ *	mmc_host_disable - disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_disable(struct mmc_host *host)
+{
+	int err;
+
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	err = mmc_host_do_disable(host, 0);
+	return err;
+}
+EXPORT_SYMBOL(mmc_host_disable);
+
 /**
  *	__mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
@@ -379,11 +474,81 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
+	if (!stop)
+		mmc_host_enable(host);
 	return stop;
 }
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
+static int mmc_try_claim_host(struct mmc_host *host)
+{
+	int claimed_host = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (!host->claimed) {
+		host->claimed = 1;
+		claimed_host = 1;
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+	return claimed_host;
+}
+
+static void mmc_do_release_host(struct mmc_host *host)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+	host->claimed = 0;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	wake_up(&host->wq);
+}
+
+void mmc_host_deeper_disable(struct work_struct *work)
+{
+	struct mmc_host *host =
+		container_of(work, struct mmc_host, disable.work);
+
+	/* If the host is claimed then we do not want to disable it anymore */
+	if (!mmc_try_claim_host(host))
+		return;
+	mmc_host_do_disable(host, 1);
+	mmc_do_release_host(host);
+}
+
+/**
+ *	mmc_host_lazy_disable - lazily disable a host.
+ *	@host: mmc host to disable
+ *
+ *	Hosts that support power saving can use the 'enable' and 'disable'
+ *	methods to exit and enter power saving states. For more information
+ *	see comments for struct mmc_host_ops.
+ */
+int mmc_host_lazy_disable(struct mmc_host *host)
+{
+	if (!(host->caps & MMC_CAP_DISABLE))
+		return 0;
+
+	if (host->en_dis_recurs)
+		return 0;
+
+	if (--host->nesting_cnt)
+		return 0;
+
+	if (!host->enabled)
+		return 0;
+
+	if (host->disable_delay) {
+		mmc_schedule_delayed_work(&host->disable,
+				msecs_to_jiffies(host->disable_delay));
+		return 0;
+	} else
+		return mmc_host_do_disable(host, 1);
+}
+EXPORT_SYMBOL(mmc_host_lazy_disable);
+
 /**
  *	mmc_release_host - release a host
  *	@host: mmc host to release
@@ -393,15 +558,11 @@ EXPORT_SYMBOL(__mmc_claim_host);
  */
 void mmc_release_host(struct mmc_host *host)
 {
-	unsigned long flags;
-
 	WARN_ON(!host->claimed);
 
-	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
+	mmc_host_lazy_disable(host);
 
-	wake_up(&host->wq);
+	mmc_do_release_host(host);
 }
 
 EXPORT_SYMBOL(mmc_release_host);
@@ -953,6 +1114,8 @@ void mmc_stop_host(struct mmc_host *host)
 	spin_unlock_irqrestore(&host->lock, flags);
 #endif
 
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
@@ -981,6 +1144,8 @@ void mmc_stop_host(struct mmc_host *host)
  */
 int mmc_suspend_host(struct mmc_host *host, pm_message_t state)
 {
+	if (host->caps & MMC_CAP_DISABLE)
+		cancel_delayed_work(&host->disable);
 	cancel_delayed_work(&host->detect);
 	mmc_flush_scheduled_work();
 
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 5e945e64ead7..a268d12f1af0 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -83,6 +83,7 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
 	spin_lock_init(&host->lock);
 	init_waitqueue_head(&host->wq);
 	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+	INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable);
 
 	/*
 	 * By default, hosts do not support SGIO or large requests.
diff --git a/drivers/mmc/core/host.h b/drivers/mmc/core/host.h
index c2dc3d2d9f9a..8c87e1109a34 100644
--- a/drivers/mmc/core/host.h
+++ b/drivers/mmc/core/host.h
@@ -14,5 +14,7 @@
 int mmc_register_host_class(void);
 void mmc_unregister_host_class(void);
 
+void mmc_host_deeper_disable(struct work_struct *work);
+
 #endif
 
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 3e7615e9087e..338a9b3d51e4 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -51,6 +51,35 @@ struct mmc_ios {
 };
 
 struct mmc_host_ops {
+	/*
+	 * Hosts that support power saving can use the 'enable' and 'disable'
+	 * methods to exit and enter power saving states. 'enable' is called
+	 * when the host is claimed and 'disable' is called (or scheduled with
+	 * a delay) when the host is released. The 'disable' is scheduled if
+	 * the disable delay set by 'mmc_set_disable_delay()' is non-zero,
+	 * otherwise 'disable' is called immediately. 'disable' may be
+	 * scheduled repeatedly, to permit ever greater power saving at the
+	 * expense of ever greater latency to re-enable. Rescheduling is
+	 * determined by the return value of the 'disable' method. A positive
+	 * value gives the delay in milliseconds.
+	 *
+	 * In the case where a host function (like set_ios) may be called
+	 * with or without the host claimed, enabling and disabling can be
+	 * done directly and will nest correctly. Call 'mmc_host_enable()' and
+	 * 'mmc_host_lazy_disable()' for this purpose, but note that these
+	 * functions must be paired.
+	 *
+	 * Alternatively, 'mmc_host_enable()' may be paired with
+	 * 'mmc_host_disable()' which calls 'disable' immediately.  In this
+	 * case the 'disable' method will be called with 'lazy' set to 0.
+	 * This is mainly useful for error paths.
+	 *
+	 * Because lazy disable may be called from a work queue, the 'disable'
+	 * method must claim the host when 'lazy' != 0, which will work
+	 * correctly because recursion is detected and handled.
+	 */
+	int (*enable)(struct mmc_host *host);
+	int (*disable)(struct mmc_host *host, int lazy);
 	void	(*request)(struct mmc_host *host, struct mmc_request *req);
 	/*
 	 * Avoid calling these three functions too often or in a "fast path",
@@ -118,6 +147,7 @@ struct mmc_host {
 #define MMC_CAP_SPI		(1 << 4)	/* Talks only SPI protocols */
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
+#define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -142,6 +172,13 @@ struct mmc_host {
 	unsigned int		removed:1;	/* host is being removed */
 #endif
 
+	/* Only used with MMC_CAP_DISABLE */
+	int			enabled;	/* host is enabled */
+	int			nesting_cnt;	/* "enable" nesting count */
+	int			en_dis_recurs;	/* detect recursion */
+	unsigned int		disable_delay;	/* disable delay in msecs */
+	struct delayed_work	disable;	/* disabling work */
+
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
@@ -197,5 +234,15 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_host_enable(struct mmc_host *host);
+int mmc_host_disable(struct mmc_host *host);
+int mmc_host_lazy_disable(struct mmc_host *host);
+
+static inline void mmc_set_disable_delay(struct mmc_host *host,
+					 unsigned int disable_delay)
+{
+	host->disable_delay = disable_delay;
+}
+
 #endif
 
-- 
cgit v1.2.3


From 319a3f1429c91147058ac26c5f5bac8ec1730bc6 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:30 -0700
Subject: mmc: allow host claim / release nesting

This change allows the MMC host to be claimed in situations where the host
may or may not have already been claimed.  Also 'mmc_try_claim_host()' is
now exported.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 34 +++++++++++++++++++++++++---------
 include/linux/mmc/core.h |  1 +
 include/linux/mmc/host.h |  2 ++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index fb24a096dba8..02f2b1871a38 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -461,16 +461,18 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		stop = abort ? atomic_read(abort) : 0;
-		if (stop || !host->claimed)
+		if (stop || !host->claimed || host->claimer == current)
 			break;
 		spin_unlock_irqrestore(&host->lock, flags);
 		schedule();
 		spin_lock_irqsave(&host->lock, flags);
 	}
 	set_current_state(TASK_RUNNING);
-	if (!stop)
+	if (!stop) {
 		host->claimed = 1;
-	else
+		host->claimer = current;
+		host->claim_cnt += 1;
+	} else
 		wake_up(&host->wq);
 	spin_unlock_irqrestore(&host->lock, flags);
 	remove_wait_queue(&host->wq, &wait);
@@ -481,29 +483,43 @@ int __mmc_claim_host(struct mmc_host *host, atomic_t *abort)
 
 EXPORT_SYMBOL(__mmc_claim_host);
 
-static int mmc_try_claim_host(struct mmc_host *host)
+/**
+ *	mmc_try_claim_host - try exclusively to claim a host
+ *	@host: mmc host to claim
+ *
+ *	Returns %1 if the host is claimed, %0 otherwise.
+ */
+int mmc_try_claim_host(struct mmc_host *host)
 {
 	int claimed_host = 0;
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	if (!host->claimed) {
+	if (!host->claimed || host->claimer == current) {
 		host->claimed = 1;
+		host->claimer = current;
+		host->claim_cnt += 1;
 		claimed_host = 1;
 	}
 	spin_unlock_irqrestore(&host->lock, flags);
 	return claimed_host;
 }
+EXPORT_SYMBOL(mmc_try_claim_host);
 
 static void mmc_do_release_host(struct mmc_host *host)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(&host->lock, flags);
-	host->claimed = 0;
-	spin_unlock_irqrestore(&host->lock, flags);
-
-	wake_up(&host->wq);
+	if (--host->claim_cnt) {
+		/* Release for nested claim */
+		spin_unlock_irqrestore(&host->lock, flags);
+	} else {
+		host->claimed = 0;
+		host->claimer = NULL;
+		spin_unlock_irqrestore(&host->lock, flags);
+		wake_up(&host->wq);
+	}
 }
 
 void mmc_host_deeper_disable(struct work_struct *work)
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 7ac8b500d55c..e4898e9eeb59 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -139,6 +139,7 @@ extern unsigned int mmc_align_data_size(struct mmc_card *, unsigned int);
 
 extern int __mmc_claim_host(struct mmc_host *host, atomic_t *abort);
 extern void mmc_release_host(struct mmc_host *host);
+extern int mmc_try_claim_host(struct mmc_host *host);
 
 /**
  *	mmc_claim_host - exclusively claim a host
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 338a9b3d51e4..631a2fea5264 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -182,6 +182,8 @@ struct mmc_host {
 	struct mmc_card		*card;		/* device attached to this host */
 
 	wait_queue_head_t	wq;
+	struct task_struct	*claimer;	/* task that has host claimed */
+	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
 
-- 
cgit v1.2.3


From 9feae246963c648b212abad0f0eb8938de5f5fe5 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:32 -0700
Subject: mmc: add MMC_CAP_NONREMOVABLE host capability

eMMC's are not removable, so unsafe resume is OK always.

To permit this a new host capability MMC_CAP_NONREMOVABLE has been added
and suspend / resume updated accordingly.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c   | 41 ++++++++++++++++++++++++++++++++++-------
 drivers/mmc/core/sd.c    | 41 ++++++++++++++++++++++++++++++++++-------
 include/linux/mmc/host.h |  1 +
 3 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 2fb9d5f271ea..995db1853a81 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -507,8 +507,6 @@ static void mmc_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -551,20 +549,49 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_suspend NULL
-#define mmc_resume NULL
+static const struct mmc_bus_ops mmc_ops = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
+	.suspend = mmc_suspend,
+	.resume = mmc_resume,
+};
 
-#endif
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_ops = {
+	.remove = mmc_remove,
+	.detect = mmc_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
 };
 
+static void mmc_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_ops_unsafe;
+	else
+		bus_ops = &mmc_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for MMC card init.
  */
@@ -575,7 +602,7 @@ int mmc_attach_mmc(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_ops);
+	mmc_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 7ad646fe077e..92fa9dceca79 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -561,8 +561,6 @@ static void mmc_sd_detect(struct mmc_host *host)
 	}
 }
 
-#ifdef CONFIG_MMC_UNSAFE_RESUME
-
 /*
  * Suspend callback from host.
  */
@@ -605,20 +603,49 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
-#else
+#ifdef CONFIG_MMC_UNSAFE_RESUME
 
-#define mmc_sd_suspend NULL
-#define mmc_sd_resume NULL
+static const struct mmc_bus_ops mmc_sd_ops = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
+	.suspend = mmc_sd_suspend,
+	.resume = mmc_sd_resume,
+};
 
-#endif
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	mmc_attach_bus(host, &mmc_sd_ops);
+}
+
+#else
 
 static const struct mmc_bus_ops mmc_sd_ops = {
+	.remove = mmc_sd_remove,
+	.detect = mmc_sd_detect,
+	.suspend = NULL,
+	.resume = NULL,
+};
+
+static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.remove = mmc_sd_remove,
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 };
 
+static void mmc_sd_attach_bus_ops(struct mmc_host *host)
+{
+	const struct mmc_bus_ops *bus_ops;
+
+	if (host->caps & MMC_CAP_NONREMOVABLE)
+		bus_ops = &mmc_sd_ops_unsafe;
+	else
+		bus_ops = &mmc_sd_ops;
+	mmc_attach_bus(host, bus_ops);
+}
+
+#endif
+
 /*
  * Starting point for SD card init.
  */
@@ -629,7 +656,7 @@ int mmc_attach_sd(struct mmc_host *host, u32 ocr)
 	BUG_ON(!host);
 	WARN_ON(!host->claimed);
 
-	mmc_attach_bus(host, &mmc_sd_ops);
+	mmc_sd_attach_bus_ops(host);
 
 	/*
 	 * We need to get OCR a different way for SPI.
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 631a2fea5264..bb867d2c26bd 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -148,6 +148,7 @@ struct mmc_host {
 #define MMC_CAP_NEEDS_POLL	(1 << 5)	/* Needs polling for card-detection */
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
+#define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
-- 
cgit v1.2.3


From eae1aeeed852aae37621b82a9e7f6c05096a18fd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:33 -0700
Subject: mmc: add ability to save power by powering off cards

Power can be saved by powering off cards that are not in use.  This is
similar to suspend / resume except it is under the control of the driver,
and does not require any power management support.  It can only be used
when the driver can monitor whether the card is removed, otherwise it is
unsafe.  This is possible because, unlike suspend, the driver still
receives card detect and / or cover switch interrupts.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c  | 34 ++++++++++++++++++++++++++++++++++
 drivers/mmc/core/core.h  |  2 ++
 drivers/mmc/core/mmc.c   | 11 +++++++++++
 drivers/mmc/core/sd.c    | 11 +++++++++++
 include/linux/mmc/host.h |  3 +++
 5 files changed, 61 insertions(+)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 02f2b1871a38..be1fc013fbe9 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1151,6 +1151,40 @@ void mmc_stop_host(struct mmc_host *host)
 	mmc_power_off(host);
 }
 
+void mmc_power_save_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	if (host->bus_ops->power_save)
+		host->bus_ops->power_save(host);
+
+	mmc_bus_put(host);
+
+	mmc_power_off(host);
+}
+EXPORT_SYMBOL(mmc_power_save_host);
+
+void mmc_power_restore_host(struct mmc_host *host)
+{
+	mmc_bus_get(host);
+
+	if (!host->bus_ops || host->bus_dead || !host->bus_ops->power_restore) {
+		mmc_bus_put(host);
+		return;
+	}
+
+	mmc_power_up(host);
+	host->bus_ops->power_restore(host);
+
+	mmc_bus_put(host);
+}
+EXPORT_SYMBOL(mmc_power_restore_host);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index c819effa1032..f7eb4c4ca014 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -20,6 +20,8 @@ struct mmc_bus_ops {
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
 	void (*resume)(struct mmc_host *);
+	void (*power_save)(struct mmc_host *);
+	void (*power_restore)(struct mmc_host *);
 };
 
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 995db1853a81..27e842df6a6f 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -549,6 +549,14 @@ static void mmc_resume(struct mmc_host *host)
 
 }
 
+static void mmc_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
@@ -556,6 +564,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
@@ -570,6 +579,7 @@ static const struct mmc_bus_ops mmc_ops = {
 	.detect = mmc_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
@@ -577,6 +587,7 @@ static const struct mmc_bus_ops mmc_ops_unsafe = {
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
 	.resume = mmc_resume,
+	.power_restore = mmc_power_restore,
 };
 
 static void mmc_attach_bus_ops(struct mmc_host *host)
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 92fa9dceca79..222a60928cdb 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -603,6 +603,14 @@ static void mmc_sd_resume(struct mmc_host *host)
 
 }
 
+static void mmc_sd_power_restore(struct mmc_host *host)
+{
+	host->card->state &= ~MMC_STATE_HIGHSPEED;
+	mmc_claim_host(host);
+	mmc_sd_init_card(host, host->ocr, host->card);
+	mmc_release_host(host);
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_sd_ops = {
@@ -610,6 +618,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
@@ -624,6 +633,7 @@ static const struct mmc_bus_ops mmc_sd_ops = {
 	.detect = mmc_sd_detect,
 	.suspend = NULL,
 	.resume = NULL,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
@@ -631,6 +641,7 @@ static const struct mmc_bus_ops mmc_sd_ops_unsafe = {
 	.detect = mmc_sd_detect,
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
+	.power_restore = mmc_sd_power_restore,
 };
 
 static void mmc_sd_attach_bus_ops(struct mmc_host *host)
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index bb867d2c26bd..c1cbe598d470 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -223,6 +223,9 @@ static inline void *mmc_priv(struct mmc_host *host)
 extern int mmc_suspend_host(struct mmc_host *, pm_message_t);
 extern int mmc_resume_host(struct mmc_host *);
 
+extern void mmc_power_save_host(struct mmc_host *host);
+extern void mmc_power_restore_host(struct mmc_host *host);
+
 extern void mmc_detect_change(struct mmc_host *, unsigned long delay);
 extern void mmc_request_done(struct mmc_host *, struct mmc_request *);
 
-- 
cgit v1.2.3


From b1ebe38456f7fe61a88af2844361e763ac6ea5ae Mon Sep 17 00:00:00 2001
From: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Date: Tue, 22 Sep 2009 16:44:34 -0700
Subject: mmc: add mmc card sleep and awake support

Add support for the new MMC command SLEEP_AWAKE.

Signed-off-by: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/core.c    | 40 ++++++++++++++++++++++++++++++++++
 drivers/mmc/core/core.h    |  2 ++
 drivers/mmc/core/mmc.c     | 54 +++++++++++++++++++++++++++++++++++++++++-----
 drivers/mmc/core/mmc_ops.c | 36 +++++++++++++++++++++++++++++++
 drivers/mmc/core/mmc_ops.h |  1 +
 include/linux/mmc/card.h   |  2 ++
 include/linux/mmc/host.h   |  5 +++++
 include/linux/mmc/mmc.h    |  2 ++
 8 files changed, 137 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index be1fc013fbe9..828e60ea528c 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -1185,6 +1185,46 @@ void mmc_power_restore_host(struct mmc_host *host)
 }
 EXPORT_SYMBOL(mmc_power_restore_host);
 
+int mmc_card_awake(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->awake(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_awake);
+
+int mmc_card_sleep(struct mmc_host *host)
+{
+	int err = -ENOSYS;
+
+	mmc_bus_get(host);
+
+	if (host->bus_ops && !host->bus_dead && host->bus_ops->awake)
+		err = host->bus_ops->sleep(host);
+
+	mmc_bus_put(host);
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_card_sleep);
+
+int mmc_card_can_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+
+	if (card && mmc_card_mmc(card) && card->ext_csd.rev >= 3)
+		return 1;
+	return 0;
+}
+EXPORT_SYMBOL(mmc_card_can_sleep);
+
 #ifdef CONFIG_PM
 
 /**
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index f7eb4c4ca014..c386348f5f73 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -16,6 +16,8 @@
 #define MMC_CMD_RETRIES        3
 
 struct mmc_bus_ops {
+	int (*awake)(struct mmc_host *);
+	int (*sleep)(struct mmc_host *);
 	void (*remove)(struct mmc_host *);
 	void (*detect)(struct mmc_host *);
 	void (*suspend)(struct mmc_host *);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 27e842df6a6f..e0bfa9515c8a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -160,7 +160,6 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 {
 	int err;
 	u8 *ext_csd;
-	unsigned int ext_csd_struct;
 
 	BUG_ON(!card);
 
@@ -207,16 +206,16 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
-	ext_csd_struct = ext_csd[EXT_CSD_REV];
-	if (ext_csd_struct > 3) {
+	card->ext_csd.rev = ext_csd[EXT_CSD_REV];
+	if (card->ext_csd.rev > 3) {
 		printk(KERN_ERR "%s: unrecognised EXT_CSD structure "
 			"version %d\n", mmc_hostname(card->host),
-			ext_csd_struct);
+			card->ext_csd.rev);
 		err = -EINVAL;
 		goto out;
 	}
 
-	if (ext_csd_struct >= 2) {
+	if (card->ext_csd.rev >= 2) {
 		card->ext_csd.sectors =
 			ext_csd[EXT_CSD_SEC_CNT + 0] << 0 |
 			ext_csd[EXT_CSD_SEC_CNT + 1] << 8 |
@@ -241,6 +240,15 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 		goto out;
 	}
 
+	if (card->ext_csd.rev >= 3) {
+		u8 sa_shift = ext_csd[EXT_CSD_S_A_TIMEOUT];
+
+		/* Sleep / awake timeout in 100ns units */
+		if (sa_shift > 0 && sa_shift <= 0x17)
+			card->ext_csd.sa_timeout =
+					1 << ext_csd[EXT_CSD_S_A_TIMEOUT];
+	}
+
 out:
 	kfree(ext_csd);
 
@@ -557,9 +565,41 @@ static void mmc_power_restore(struct mmc_host *host)
 	mmc_release_host(host);
 }
 
+static int mmc_sleep(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 1);
+		if (err < 0)
+			pr_debug("%s: Error %d while putting card into sleep",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
+static int mmc_awake(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	int err = -ENOSYS;
+
+	if (card && card->ext_csd.rev >= 3) {
+		err = mmc_card_sleepawake(host, 0);
+		if (err < 0)
+			pr_debug("%s: Error %d while awaking sleeping card",
+				 mmc_hostname(host), err);
+	}
+
+	return err;
+}
+
 #ifdef CONFIG_MMC_UNSAFE_RESUME
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
@@ -575,6 +615,8 @@ static void mmc_attach_bus_ops(struct mmc_host *host)
 #else
 
 static const struct mmc_bus_ops mmc_ops = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = NULL,
@@ -583,6 +625,8 @@ static const struct mmc_bus_ops mmc_ops = {
 };
 
 static const struct mmc_bus_ops mmc_ops_unsafe = {
+	.awake = mmc_awake,
+	.sleep = mmc_sleep,
 	.remove = mmc_remove,
 	.detect = mmc_detect,
 	.suspend = mmc_suspend,
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 34ce2703d29a..355c6042cf65 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -57,6 +57,42 @@ int mmc_deselect_cards(struct mmc_host *host)
 	return _mmc_select_card(host, NULL);
 }
 
+int mmc_card_sleepawake(struct mmc_host *host, int sleep)
+{
+	struct mmc_command cmd;
+	struct mmc_card *card = host->card;
+	int err;
+
+	if (sleep)
+		mmc_deselect_cards(host);
+
+	memset(&cmd, 0, sizeof(struct mmc_command));
+
+	cmd.opcode = MMC_SLEEP_AWAKE;
+	cmd.arg = card->rca << 16;
+	if (sleep)
+		cmd.arg |= 1 << 15;
+
+	cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
+	err = mmc_wait_for_cmd(host, &cmd, 0);
+	if (err)
+		return err;
+
+	/*
+	 * If the host does not wait while the card signals busy, then we will
+	 * will have to wait the sleep/awake timeout.  Note, we cannot use the
+	 * SEND_STATUS command to poll the status because that command (and most
+	 * others) is invalid while the card sleeps.
+	 */
+	if (!(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
+		mmc_delay(DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000));
+
+	if (!sleep)
+		err = mmc_select_card(card);
+
+	return err;
+}
+
 int mmc_go_idle(struct mmc_host *host)
 {
 	int err;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 17854bf7cf0d..653eb8e84178 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -25,6 +25,7 @@ int mmc_send_status(struct mmc_card *card, u32 *status);
 int mmc_send_cid(struct mmc_host *host, u32 *cid);
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp);
 int mmc_spi_set_crc(struct mmc_host *host, int use_crc);
+int mmc_card_sleepawake(struct mmc_host *host, int sleep);
 
 #endif
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 403aa505f27e..58f59174c64b 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -40,6 +40,8 @@ struct mmc_csd {
 };
 
 struct mmc_ext_csd {
+	u8			rev;
+	unsigned int		sa_timeout;		/* Units: 100ns */
 	unsigned int		hs_max_dtr;
 	unsigned int		sectors;
 };
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index c1cbe598d470..81bb42358595 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -149,6 +149,7 @@ struct mmc_host {
 #define MMC_CAP_8_BIT_DATA	(1 << 6)	/* Can the host do 8 bit transfers */
 #define MMC_CAP_DISABLE		(1 << 7)	/* Can the host be disabled */
 #define MMC_CAP_NONREMOVABLE	(1 << 8)	/* Nonremovable e.g. eMMC */
+#define MMC_CAP_WAIT_WHILE_BUSY	(1 << 9)	/* Waits while card is busy */
 
 	/* host specific block data */
 	unsigned int		max_seg_size;	/* see blk_queue_max_segment_size */
@@ -240,6 +241,10 @@ struct regulator;
 int mmc_regulator_get_ocrmask(struct regulator *supply);
 int mmc_regulator_set_ocr(struct regulator *supply, unsigned short vdd_bit);
 
+int mmc_card_awake(struct mmc_host *host);
+int mmc_card_sleep(struct mmc_host *host);
+int mmc_card_can_sleep(struct mmc_host *host);
+
 int mmc_host_enable(struct mmc_host *host);
 int mmc_host_disable(struct mmc_host *host);
 int mmc_host_lazy_disable(struct mmc_host *host);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 14b81f3e5232..b2b40951c16c 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -31,6 +31,7 @@
 #define MMC_ALL_SEND_CID          2   /* bcr                     R2  */
 #define MMC_SET_RELATIVE_ADDR     3   /* ac   [31:16] RCA        R1  */
 #define MMC_SET_DSR               4   /* bc   [31:16] RCA            */
+#define MMC_SLEEP_AWAKE		  5   /* ac   [31:16] RCA 15:flg R1b */
 #define MMC_SWITCH                6   /* ac   [31:0] See below   R1b */
 #define MMC_SELECT_CARD           7   /* ac   [31:16] RCA        R1  */
 #define MMC_SEND_EXT_CSD          8   /* adtc                    R1  */
@@ -254,6 +255,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE	196	/* RO */
 #define EXT_CSD_REV		192	/* RO */
 #define EXT_CSD_SEC_CNT		212	/* RO, 4 bytes */
+#define EXT_CSD_S_A_TIMEOUT	217
 
 /*
  * EXT_CSD field definitions
-- 
cgit v1.2.3


From ef0b27d4ccacac32afc3d1c0e8a95e4091dfbc8c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@nokia.com>
Date: Tue, 22 Sep 2009 16:44:37 -0700
Subject: mmc: check status after MMC SWITCH command

According to the standard, the SWITCH command should be followed by a
SEND_STATUS command to check for errors.

Signed-off-by: Adrian Hunter <adrian.hunter@nokia.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Jarkko Lavinen <jarkko.lavinen@nokia.com>
Cc: Denis Karpov <ext-denis.2.karpov@nokia.com>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: Philip Langdale <philipl@overt.org>
Cc: "Madhusudhan" <madhu.cr@ti.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/mmc.c     | 24 ++++++++++++++++++------
 drivers/mmc/core/mmc_ops.c | 23 +++++++++++++++++++++++
 include/linux/mmc/mmc.h    |  1 +
 3 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index e0bfa9515c8a..a6b5fe9d3969 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -416,12 +416,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		(host->caps & MMC_CAP_MMC_HIGHSPEED)) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 			EXT_CSD_HS_TIMING, 1);
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_card_set_highspeed(card);
-
-		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to highspeed failed\n",
+			       mmc_hostname(card->host));
+			err = 0;
+		} else {
+			mmc_card_set_highspeed(card);
+			mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+		}
 	}
 
 	/*
@@ -456,10 +461,17 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_BUS_WIDTH, ext_csd_bit);
 
-		if (err)
+		if (err && err != -EBADMSG)
 			goto free_card;
 
-		mmc_set_bus_width(card->host, bus_width);
+		if (err) {
+			printk(KERN_WARNING "%s: switch to bus width %d "
+			       "failed\n", mmc_hostname(card->host),
+			       1 << bus_width);
+			err = 0;
+		} else {
+			mmc_set_bus_width(card->host, bus_width);
+		}
 	}
 
 	if (!oldcard)
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 355c6042cf65..d2cb5c634392 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -390,6 +390,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 {
 	int err;
 	struct mmc_command cmd;
+	u32 status;
 
 	BUG_ON(!card);
 	BUG_ON(!card->host);
@@ -407,6 +408,28 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value)
 	if (err)
 		return err;
 
+	/* Must check status to be sure of no errors */
+	do {
+		err = mmc_send_status(card, &status);
+		if (err)
+			return err;
+		if (card->host->caps & MMC_CAP_WAIT_WHILE_BUSY)
+			break;
+		if (mmc_host_is_spi(card->host))
+			break;
+	} while (R1_CURRENT_STATE(status) == 7);
+
+	if (mmc_host_is_spi(card->host)) {
+		if (status & R1_SPI_ILLEGAL_COMMAND)
+			return -EBADMSG;
+	} else {
+		if (status & 0xFDFFA000)
+			printk(KERN_WARNING "%s: unexpected status %#x after "
+			       "switch", mmc_hostname(card->host), status);
+		if (status & R1_SWITCH_ERROR)
+			return -EBADMSG;
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index b2b40951c16c..c02c8db73701 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -128,6 +128,7 @@
 #define R1_STATUS(x)            (x & 0xFFFFE000)
 #define R1_CURRENT_STATE(x)	((x & 0x00001E00) >> 9)	/* sx, b (4 bits) */
 #define R1_READY_FOR_DATA	(1 << 8)	/* sx, a */
+#define R1_SWITCH_ERROR		(1 << 7)	/* sx, c */
 #define R1_APP_CMD		(1 << 5)	/* sr, c */
 
 /*
-- 
cgit v1.2.3


From 006ebd5de13854d6250eecc76866bbfad1ff7daf Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:07 -0700
Subject: sdio: add CD disable support

Add support to disconnect the pull-up resistor on CD/DAT[3] (pin 1)
of the card. This may be desired on certain setups of boards,
controllers and embedded sdio devices which do not need the card's
pull-up. As a result, card detection is disabled and power is saved.

[akpm@linux-foundation.org: simplify sdio_disable_cd() a bit]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Acked-by: Matt Fleming <matt@console-pimps.org>
Cc: Ian Molton <ian@mnementh.co.uk>
Cc: "Roberto A. Foglietta" <roberto.foglietta@gmail.com>
Cc: Philip Langdale <philipl@overt.org>
Cc: Pierre Ossman <pierre@ossman.eu>
Cc: David Vrabel <david.vrabel@csr.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio.c  | 30 ++++++++++++++++++++++++++++++
 include/linux/mmc/card.h |  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 6f221dc029ad..2d24a123f0b0 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -164,6 +164,29 @@ static int sdio_enable_wide(struct mmc_card *card)
 	return 0;
 }
 
+/*
+ * If desired, disconnect the pull-up resistor on CD/DAT[3] (pin 1)
+ * of the card. This may be required on certain setups of boards,
+ * controllers and embedded sdio device which do not need the card's
+ * pull-up. As a result, card detection is disabled and power is saved.
+ */
+static int sdio_disable_cd(struct mmc_card *card)
+{
+	int ret;
+	u8 ctrl;
+
+	if (!card->cccr.disable_cd)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_IF, 0, &ctrl);
+	if (ret)
+		return ret;
+
+	ctrl |= SDIO_BUS_CD_DISABLE;
+
+	return mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
+}
+
 /*
  * Test if the card supports high-speed mode and, if so, switch to it.
  */
@@ -384,6 +407,13 @@ int mmc_attach_sdio(struct mmc_host *host, u32 ocr)
 	if (err)
 		goto remove;
 
+	/*
+	 * If needed, disconnect card detection pull-up resistor.
+	 */
+	err = sdio_disable_cd(card);
+	if (err)
+		goto remove;
+
 	/*
 	 * Initialize (but don't add) all present functions.
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 58f59174c64b..00db39cceadc 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -64,7 +64,8 @@ struct sdio_cccr {
 				low_speed:1,
 				wide_bus:1,
 				high_power:1,
-				high_speed:1;
+				high_speed:1,
+				disable_cd:1;
 };
 
 struct sdio_cis {
-- 
cgit v1.2.3


From 7c979ec7135d96bbff34790bf4b85a8508ede7fc Mon Sep 17 00:00:00 2001
From: Ohad Ben-Cohen <ohad@wizery.com>
Date: Tue, 22 Sep 2009 16:45:18 -0700
Subject: sdio: add MMC_QUIRK_LENIENT_FN0

Normally writes to SDIO function 0 outside the vendor specific CCCR
registers are prohibited.

To support embedded devices that require writes to SDIO function 0 outside
this range (e.g.  TI WL127x embedded sdio wifi device),
MMC_QUIRK_LENIENT_FN0 is introduced.

A card quirks field is added to `struct mmc_card' to support non-standard
devices (e.g.  embedded sdio devices).

[akpm@linux-foundation.org: code in C, not cpp!]
Signed-off-by: Ohad Ben-Cohen <ohad@wizery.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_io.c | 2 +-
 include/linux/mmc/card.h   | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_io.c b/drivers/mmc/core/sdio_io.c
index f61fc2d4cd0a..f9aa8a7deffa 100644
--- a/drivers/mmc/core/sdio_io.c
+++ b/drivers/mmc/core/sdio_io.c
@@ -624,7 +624,7 @@ void sdio_f0_writeb(struct sdio_func *func, unsigned char b, unsigned int addr,
 
 	BUG_ON(!func);
 
-	if (addr < 0xF0 || addr > 0xFF) {
+	if ((addr < 0xF0 || addr > 0xFF) && (!mmc_card_lenient_fn0(func->card))) {
 		if (err_ret)
 			*err_ret = -EINVAL;
 		return;
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 00db39cceadc..2ee22e8af110 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -97,6 +97,8 @@ struct mmc_card {
 #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
 #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
 #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
+	unsigned int		quirks; 	/* card quirks */
+#define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 
 	u32			raw_cid[4];	/* raw card CID */
 	u32			raw_csd[4];	/* raw card CSD */
@@ -132,6 +134,11 @@ struct mmc_card {
 #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
 #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
 
+static inline int mmc_card_lenient_fn0(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_LENIENT_FN0;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
-- 
cgit v1.2.3


From 996ad5686c5f868e67557cc1bfcb2cfdde1a18b4 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Tue, 22 Sep 2009 16:45:30 -0700
Subject: mmc: make SDIO device/driver struct accessors public

Especially with the PM framework, those are quite handy to have in driver
code too.

Signed-off-by: Nicolas Pitre <nico@marvell.com>
Cc: <linux-mmc@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mmc/core/sdio_bus.c   | 3 ---
 include/linux/mmc/sdio_func.h | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 46284b527397..d37464e296a5 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -20,9 +20,6 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
-#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
-#define to_sdio_driver(d)      container_of(d, struct sdio_driver, drv)
-
 /* show configuration fields */
 #define sdio_config_attr(field, format_string)				\
 static ssize_t								\
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 451bdfc85830..ac3ab683fec6 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -67,6 +67,7 @@ struct sdio_func {
 
 #define sdio_get_drvdata(f)	dev_get_drvdata(&(f)->dev)
 #define sdio_set_drvdata(f,d)	dev_set_drvdata(&(f)->dev, d)
+#define dev_to_sdio_func(d)	container_of(d, struct sdio_func, dev)
 
 /*
  * SDIO function device driver
@@ -81,6 +82,8 @@ struct sdio_driver {
 	struct device_driver drv;
 };
 
+#define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
+
 /**
  * SDIO_DEVICE - macro used to describe a specific SDIO device
  * @vend: the 16 bit manufacturer code
-- 
cgit v1.2.3


From d899bf7b55f503ba7d3d07ed27c3a37e270fa7db Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Tue, 22 Sep 2009 16:45:40 -0700
Subject: procfs: provide stack information for threads

A patch to give a better overview of the userland application stack usage,
especially for embedded linux.

Currently you are only able to dump the main process/thread stack usage
which is showed in /proc/pid/status by the "VmStk" Value.  But you get no
information about the consumed stack memory of the the threads.

There is an enhancement in the /proc/<pid>/{task/*,}/*maps and which marks
the vm mapping where the thread stack pointer reside with "[thread stack
xxxxxxxx]".  xxxxxxxx is the maximum size of stack.  This is a value
information, because libpthread doesn't set the start of the stack to the
top of the mapped area, depending of the pthread usage.

A sample output of /proc/<pid>/task/<tid>/maps looks like:

08048000-08049000 r-xp 00000000 03:00 8312       /opt/z
08049000-0804a000 rw-p 00001000 03:00 8312       /opt/z
0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
a7d12000-a7d13000 ---p 00000000 00:00 0
a7d13000-a7f13000 rw-p 00000000 00:00 0          [thread stack: 001ff4b4]
a7f13000-a7f14000 ---p 00000000 00:00 0
a7f14000-a7f36000 rw-p 00000000 00:00 0
a7f36000-a8069000 r-xp 00000000 03:00 4222       /lib/libc.so.6
a8069000-a806b000 r--p 00133000 03:00 4222       /lib/libc.so.6
a806b000-a806c000 rw-p 00135000 03:00 4222       /lib/libc.so.6
a806c000-a806f000 rw-p 00000000 00:00 0
a806f000-a8083000 r-xp 00000000 03:00 14462      /lib/libpthread.so.0
a8083000-a8084000 r--p 00013000 03:00 14462      /lib/libpthread.so.0
a8084000-a8085000 rw-p 00014000 03:00 14462      /lib/libpthread.so.0
a8085000-a8088000 rw-p 00000000 00:00 0
a8088000-a80a4000 r-xp 00000000 03:00 8317       /lib/ld-linux.so.2
a80a4000-a80a5000 r--p 0001b000 03:00 8317       /lib/ld-linux.so.2
a80a5000-a80a6000 rw-p 0001c000 03:00 8317       /lib/ld-linux.so.2
afaf5000-afb0a000 rw-p 00000000 00:00 0          [stack]
ffffe000-fffff000 r-xp 00000000 00:00 0          [vdso]

Also there is a new entry "stack usage" in /proc/<pid>/{task/*,}/status
which will you give the current stack usage in kb.

A sample output of /proc/self/status looks like:

Name:	cat
State:	R (running)
Tgid:	507
Pid:	507
.
.
.
CapBnd:	fffffffffffffeff
voluntary_ctxt_switches:	0
nonvoluntary_ctxt_switches:	0
Stack usage:	12 kB

I also fixed stack base address in /proc/<pid>/{task/*,}/stat to the base
address of the associated thread stack and not the one of the main
process.  This makes more sense.

[akpm@linux-foundation.org: fs/proc/array.c now needs walk_page_range()]
Signed-off-by: Stefani Seibold <stefani@seibold.net>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  5 ++-
 fs/exec.c                          |  2 +
 fs/proc/array.c                    | 85 +++++++++++++++++++++++++++++++++++++-
 fs/proc/task_mmu.c                 | 19 +++++++++
 include/linux/sched.h              |  1 +
 kernel/fork.c                      |  2 +
 mm/Makefile                        |  4 +-
 7 files changed, 114 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 75988ba26a51..b5aee7838a00 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -176,6 +176,7 @@ read the file /proc/PID/status:
   CapBnd: ffffffffffffffff
   voluntary_ctxt_switches:        0
   nonvoluntary_ctxt_switches:     1
+  Stack usage:    12 kB
 
 This shows you nearly the same information you would get if you viewed it with
 the ps  command.  In  fact,  ps  uses  the  proc  file  system  to  obtain its
@@ -229,6 +230,7 @@ Table 1-2: Contents of the statm files (as of 2.6.30-rc7)
  Mems_allowed_list           Same as previous, but in "list format"
  voluntary_ctxt_switches     number of voluntary context switches
  nonvoluntary_ctxt_switches  number of non voluntary context switches
+ Stack usage:                stack usage high water mark (round up to page size)
 ..............................................................................
 
 Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
@@ -307,7 +309,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -343,6 +345,7 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
+ [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/exec.c b/fs/exec.c
index 69bb9d899791..5c833c18d0d4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1357,6 +1357,8 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
+	current->stack_start = current->mm->start_stack;
+
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 725a650bbbb8..0c6bc602e6c4 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -82,6 +82,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
+#include <linux/swapops.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -321,6 +322,87 @@ static inline void task_context_switch_counts(struct seq_file *m,
 			p->nivcsw);
 }
 
+struct stack_stats {
+	struct vm_area_struct *vma;
+	unsigned long	startpage;
+	unsigned long	usage;
+};
+
+static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
+				unsigned long end, struct mm_walk *walk)
+{
+	struct stack_stats *ss = walk->private;
+	struct vm_area_struct *vma = ss->vma;
+	pte_t *pte, ptent;
+	spinlock_t *ptl;
+	int ret = 0;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	for (; addr != end; pte++, addr += PAGE_SIZE) {
+		ptent = *pte;
+
+#ifdef CONFIG_STACK_GROWSUP
+		if (pte_present(ptent) || is_swap_pte(ptent))
+			ss->usage = addr - ss->startpage + PAGE_SIZE;
+#else
+		if (pte_present(ptent) || is_swap_pte(ptent)) {
+			ss->usage = ss->startpage - addr + PAGE_SIZE;
+			pte++;
+			ret = 1;
+			break;
+		}
+#endif
+	}
+	pte_unmap_unlock(pte - 1, ptl);
+	cond_resched();
+	return ret;
+}
+
+static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
+				struct task_struct *task)
+{
+	struct stack_stats ss;
+	struct mm_walk stack_walk = {
+		.pmd_entry = stack_usage_pte_range,
+		.mm = vma->vm_mm,
+		.private = &ss,
+	};
+
+	if (!vma->vm_mm || is_vm_hugetlb_page(vma))
+		return 0;
+
+	ss.vma = vma;
+	ss.startpage = task->stack_start & PAGE_MASK;
+	ss.usage = 0;
+
+#ifdef CONFIG_STACK_GROWSUP
+	walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
+		&stack_walk);
+#else
+	walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
+		&stack_walk);
+#endif
+	return ss.usage;
+}
+
+static inline void task_show_stack_usage(struct seq_file *m,
+						struct task_struct *task)
+{
+	struct vm_area_struct	*vma;
+	struct mm_struct	*mm = get_task_mm(task);
+
+	if (mm) {
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, task->stack_start);
+		if (vma)
+			seq_printf(m, "Stack usage:\t%lu kB\n",
+				get_stack_usage_in_bytes(vma, task) >> 10);
+
+		up_read(&mm->mmap_sem);
+		mmput(mm);
+	}
+}
+
 int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task)
 {
@@ -340,6 +422,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 	task_show_regs(m, task);
 #endif
 	task_context_switch_counts(m, task);
+	task_show_stack_usage(m, task);
 	return 0;
 }
 
@@ -481,7 +564,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
-		(permitted && mm) ? mm->start_stack : 0,
+		(permitted) ? task->stack_start : 0,
 		esp,
 		eip,
 		/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 366b1017a4f1..2a1bef9203c6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -243,6 +243,25 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 				} else if (vma->vm_start <= mm->start_stack &&
 					   vma->vm_end >= mm->start_stack) {
 					name = "[stack]";
+				} else {
+					unsigned long stack_start;
+					struct proc_maps_private *pmp;
+
+					pmp = m->private;
+					stack_start = pmp->task->stack_start;
+
+					if (vma->vm_start <= stack_start &&
+					    vma->vm_end >= stack_start) {
+						pad_len_spaces(m, len);
+						seq_printf(m,
+						 "[threadstack:%08lx]",
+#ifdef CONFIG_STACK_GROWSUP
+						 vma->vm_end - stack_start
+#else
+						 stack_start - vma->vm_start
+#endif
+						);
+					}
 				}
 			} else {
 				name = "[vdso]";
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6448bbc6406b..3cbc6c0be666 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1529,6 +1529,7 @@ struct task_struct {
 	/* bitmask of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
+	unsigned long stack_start;
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7cf45812ce84..8f45b0ebdda7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1095,6 +1095,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->bts = NULL;
 
+	p->stack_start = stack_start;
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
diff --git a/mm/Makefile b/mm/Makefile
index 728a9fde49d1..88193d73cd1a 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -11,10 +11,10 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
-			   page_isolation.o mm_init.o mmu_context.o $(mmu-y)
+			   page_isolation.o mm_init.o mmu_context.o \
+			   pagewalk.o $(mmu-y)
 obj-y += init-mm.o
 
-obj-$(CONFIG_PROC_PAGE_MONITOR) += pagewalk.o
 obj-$(CONFIG_BOUNCE)	+= bounce.o
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HAS_DMA)	+= dmapool.o
-- 
cgit v1.2.3


From 2ef43ec772551e975a6ea7cf22b59c84955aadf9 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:41 -0700
Subject: kcore: use usual list for kclist

This patchset is for /proc/kcore.  With this,

 - many per-arch hooks are removed.

 - /proc/kcore will know really valid physical memory area.

 - /proc/kcore will be aware of memory hotplug.

 - /proc/kcore will be architecture independent i.e.
   if an arch supports CONFIG_MMU, it can use /proc/kcore.
   (if the arch uses usual memory layout.)

This patch:

/proc/kcore uses its own list handling codes. It's better to use
generic list codes.

No changes in logic. just clean up.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c         | 12 ++++++------
 include/linux/proc_fs.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0cf8a24cf6c3..f9327e51ce99 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <linux/list.h>
 
 #define CORE_STR "CORE"
 
@@ -57,7 +58,7 @@ struct memelfnote
 	void *data;
 };
 
-static struct kcore_list *kclist;
+static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
@@ -67,8 +68,7 @@ kclist_add(struct kcore_list *new, void *addr, size_t size)
 	new->size = size;
 
 	write_lock(&kclist_lock);
-	new->next = kclist;
-	kclist = new;
+	list_add_tail(&new->list, &kclist_head);
 	write_unlock(&kclist_lock);
 }
 
@@ -80,7 +80,7 @@ static size_t get_kcore_size(int *nphdr, size_t *elf_buflen)
 	*nphdr = 1; /* PT_NOTE */
 	size = 0;
 
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		try = kc_vaddr_to_offset((size_t)m->addr + m->size);
 		if (try > size)
 			size = try;
@@ -192,7 +192,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
 	nhdr->p_align	= 0;
 
 	/* setup ELF PT_LOAD program header for every area */
-	for (m=kclist; m; m=m->next) {
+	list_for_each_entry(m, &kclist_head, list) {
 		phdr = (struct elf_phdr *) bufp;
 		bufp += sizeof(struct elf_phdr);
 		offset += sizeof(struct elf_phdr);
@@ -317,7 +317,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		struct kcore_list *m;
 
 		read_lock(&kclist_lock);
-		for (m=kclist; m; m=m->next) {
+		list_for_each_entry(m, &kclist_head, list) {
 			if (start >= m->addr && start < (m->addr+m->size))
 				break;
 		}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index e6e77d31c418..0aff2a62eba9 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -79,7 +79,7 @@ struct proc_dir_entry {
 };
 
 struct kcore_list {
-	struct kcore_list *next;
+	struct list_head list;
 	unsigned long addr;
 	size_t size;
 };
-- 
cgit v1.2.3


From c30bb2a25fcfde6157e6154a32c14686fb0bedbe Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:43 -0700
Subject: kcore: add kclist types

Presently, kclist_add() only eats start address and size as its arguments.
Considering to make kclist dynamically reconfigulable, it's necessary to
know which kclists are for System RAM and which are not.

This patch add kclist types as
  KCORE_RAM
  KCORE_VMALLOC
  KCORE_TEXT
  KCORE_OTHER

This "type" is used in a patch following this for detecting KCORE_RAM.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/mm/init.c       |  7 ++++---
 arch/mips/mm/init.c       |  7 ++++---
 arch/powerpc/mm/init_32.c |  4 ++--
 arch/powerpc/mm/init_64.c |  5 +++--
 arch/sh/mm/init.c         |  4 ++--
 arch/x86/mm/init_32.c     |  4 ++--
 arch/x86/mm/init_64.c     | 11 ++++++-----
 fs/proc/kcore.c           |  3 ++-
 include/linux/proc_fs.h   | 13 +++++++++++--
 9 files changed, 36 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1d286244a562..f6a3c21a2826 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -639,9 +639,10 @@ mem_init (void)
 
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, _stext, _end - _stext);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE, KCORE_RAM);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+			VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, _stext, _end - _stext, KCORE_TEXT);
 
 	for_each_online_pgdat(pgdat)
 		if (pgdat->bdata->node_bootmem_map)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 1f4ee4797a6e..f8661985bff6 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -409,11 +409,12 @@ void __init mem_init(void)
 	if ((unsigned long) &_text > (unsigned long) CKSEG0)
 		/* The -4 is a hack so that user tools don't have to handle
 		   the overflow.  */
-		kclist_add(&kcore_kseg0, (void *) CKSEG0, 0x80000000 - 4);
+		kclist_add(&kcore_kseg0, (void *) CKSEG0,
+				0x80000000 - 4, KCORE_TEXT);
 #endif
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 	       "%ldk reserved, %ldk data, %ldk init, %ldk highmem)\n",
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 3ef5084b90ca..e91add90ec54 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -268,11 +268,11 @@ static int __init setup_kcore(void)
 						size);
 		}
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
 	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
-		VMALLOC_END-VMALLOC_START);
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 31582329cd67..9ee563101b56 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -128,10 +128,11 @@ static int __init setup_kcore(void)
 		if (!kcore_mem)
 			panic("%s: kmalloc failed\n", __func__);
 
-		kclist_add(kcore_mem, __va(base), size);
+		kclist_add(kcore_mem, __va(base), size, KCORE_RAM);
 	}
 
-	kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
+	kclist_add(&kcore_vmem, (void *)VMALLOC_START,
+		VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	return 0;
 }
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index fabb7c6f48d2..ef56c9f9d7ba 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -226,9 +226,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END - VMALLOC_START);
+		   VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 	       "%dk data, %dk init)\n",
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index b49b4f67453d..2cbc40112932 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -886,9 +886,9 @@ void __init mem_init(void)
 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 810bd31e7f5f..c05810b614fe 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -677,13 +677,14 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
+	kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT, KCORE_RAM);
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
-		   VMALLOC_END-VMALLOC_START);
-	kclist_add(&kcore_kernel, &_stext, _end - _stext);
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
+		   VMALLOC_END-VMALLOC_START, KCORE_VMALLOC);
+	kclist_add(&kcore_kernel, &_stext, _end - _stext, KCORE_TEXT);
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
+			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
-				 VSYSCALL_END - VSYSCALL_START);
+			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index f9327e51ce99..659c1635db81 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -62,10 +62,11 @@ static LIST_HEAD(kclist_head);
 static DEFINE_RWLOCK(kclist_lock);
 
 void
-kclist_add(struct kcore_list *new, void *addr, size_t size)
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 	new->addr = (unsigned long)addr;
 	new->size = size;
+	new->type = type;
 
 	write_lock(&kclist_lock);
 	list_add_tail(&new->list, &kclist_head);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 0aff2a62eba9..bd7b840765a0 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -78,10 +78,18 @@ struct proc_dir_entry {
 	struct list_head pde_openers;	/* who did ->open, but not ->release */
 };
 
+enum kcore_type {
+	KCORE_TEXT,
+	KCORE_VMALLOC,
+	KCORE_RAM,
+	KCORE_OTHER,
+};
+
 struct kcore_list {
 	struct list_head list;
 	unsigned long addr;
 	size_t size;
+	int type;
 };
 
 struct vmcore {
@@ -233,11 +241,12 @@ static inline void dup_mm_exe_file(struct mm_struct *oldmm,
 #endif /* CONFIG_PROC_FS */
 
 #if !defined(CONFIG_PROC_KCORE)
-static inline void kclist_add(struct kcore_list *new, void *addr, size_t size)
+static inline void
+kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
 #else
-extern void kclist_add(struct kcore_list *, void *, size_t);
+extern void kclist_add(struct kcore_list *, void *, size_t, int type);
 #endif
 
 union proc_op {
-- 
cgit v1.2.3


From 908eedc6168bd92e89f90d89fa389065a36358fa Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:46 -0700
Subject: walk system ram range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Originally, walk_memory_resource() was introduced to traverse all memory
of "System RAM" for detecting memory hotplug/unplug range.  For doing so,
flags of IORESOUCE_MEM|IORESOURCE_BUSY was used and this was enough for
memory hotplug.

But for using other purpose, /proc/kcore, this may includes some firmware
area marked as IORESOURCE_BUSY | IORESOUCE_MEM.  This patch makes the
check strict to find out busy "System RAM".

Note: PPC64 keeps their own walk_memory_resouce(), which walk through
ppc64's lmb informaton.  Because old kclist_add() is called per lmb, this
patch makes no difference in behavior, finally.

And this patch removes CONFIG_MEMORY_HOTPLUG check from this function.
Because pfn_valid() just show "there is memmap or not* and cannot be used
for "there is physical memory or not", this function is useful in generic
to scan physical memory range.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Américo Wang <xiyou.wangcong@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/mm/mem.c                  |  6 +++---
 drivers/infiniband/hw/ehca/ehca_mrmw.c |  2 +-
 drivers/net/ehea/ehea_qmr.c            |  2 +-
 include/linux/ioport.h                 |  4 ++++
 include/linux/memory_hotplug.h         |  8 --------
 kernel/resource.c                      | 23 ++++++++++++++++-------
 mm/memory_hotplug.c                    |  6 +++---
 7 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 0e5c59b995ef..59736317bf0e 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -143,8 +143,8 @@ int arch_add_memory(int nid, u64 start, u64 size)
  * memory regions, find holes and callback for contiguous regions.
  */
 int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct lmb_property res;
 	unsigned long pfn, len;
@@ -166,7 +166,7 @@ walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
 	}
 	return ret;
 }
-EXPORT_SYMBOL_GPL(walk_memory_resource);
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
 
 /*
  * Initialize the bootmem system and give it all the memory we
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 7663a2a9f130..7550a534005c 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -2463,7 +2463,7 @@ int ehca_create_busmap(void)
 	int ret;
 
 	ehca_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehca_create_busmap_callback);
 	return ret;
 }
diff --git a/drivers/net/ehea/ehea_qmr.c b/drivers/net/ehea/ehea_qmr.c
index 3747457f5e69..bc7c5b7abb88 100644
--- a/drivers/net/ehea/ehea_qmr.c
+++ b/drivers/net/ehea/ehea_qmr.c
@@ -751,7 +751,7 @@ int ehea_create_busmap(void)
 
 	mutex_lock(&ehea_busmap_mutex);
 	ehea_mr_len = 0;
-	ret = walk_memory_resource(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
+	ret = walk_system_ram_range(0, 1ULL << MAX_PHYSMEM_BITS, NULL,
 				   ehea_create_busmap_callback);
 	mutex_unlock(&ehea_busmap_mutex);
 	return ret;
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 786e7b8cece9..83aa81297ea3 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -184,5 +184,9 @@ extern void __devm_release_region(struct device *dev, struct resource *parent,
 extern int iomem_map_sanity_check(resource_size_t addr, unsigned long size);
 extern int iomem_is_exclusive(u64 addr);
 
+extern int
+walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *));
+
 #endif /* __ASSEMBLY__ */
 #endif	/* _LINUX_IOPORT_H */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d95f72e79b82..fed969281a41 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -191,14 +191,6 @@ static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
 
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
-/*
- * Walk through all memory which is registered as resource.
- * arg is (start_pfn, nr_pages, private_arg_pointer)
- */
-extern int walk_memory_resource(unsigned long start_pfn,
-			unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *));
-
 #ifdef CONFIG_MEMORY_HOTREMOVE
 
 extern int is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
diff --git a/kernel/resource.c b/kernel/resource.c
index 78b087221c15..fb11a58b9594 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -223,13 +223,13 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
  * Finds the lowest memory reosurce exists within [res->start.res->end)
- * the caller must specify res->start, res->end, res->flags.
+ * the caller must specify res->start, res->end, res->flags and "name".
  * If found, returns 0, res is overwritten, if not found, returns -1.
  */
-static int find_next_system_ram(struct resource *res)
+static int find_next_system_ram(struct resource *res, char *name)
 {
 	resource_size_t start, end;
 	struct resource *p;
@@ -245,6 +245,8 @@ static int find_next_system_ram(struct resource *res)
 		/* system ram is just marked as IORESOURCE_MEM */
 		if (p->flags != res->flags)
 			continue;
+		if (name && strcmp(p->name, name))
+			continue;
 		if (p->start > end) {
 			p = NULL;
 			break;
@@ -262,19 +264,26 @@ static int find_next_system_ram(struct resource *res)
 		res->end = p->end;
 	return 0;
 }
-int
-walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
-			int (*func)(unsigned long, unsigned long, void *))
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM".
+ */
+int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
+		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
 	struct resource res;
 	unsigned long pfn, len;
 	u64 orig_end;
 	int ret = -1;
+
 	res.start = (u64) start_pfn << PAGE_SHIFT;
 	res.end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
 	res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 	orig_end = res.end;
-	while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+	while ((res.start < res.end) &&
+		(find_next_system_ram(&res, "System RAM") >= 0)) {
 		pfn = (unsigned long)(res.start >> PAGE_SHIFT);
 		len = (unsigned long)((res.end + 1 - res.start) >> PAGE_SHIFT);
 		ret = (*func)(pfn, len, arg);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index efe3e0ec2e61..821dee596377 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -413,7 +413,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
 	if (!populated_zone(zone))
 		need_zonelists_rebuild = 1;
 
-	ret = walk_memory_resource(pfn, nr_pages, &onlined_pages,
+	ret = walk_system_ram_range(pfn, nr_pages, &onlined_pages,
 		online_pages_range);
 	if (ret) {
 		printk(KERN_DEBUG "online_pages %lx at %lx failed\n",
@@ -705,7 +705,7 @@ offline_isolated_pages_cb(unsigned long start, unsigned long nr_pages,
 static void
 offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 {
-	walk_memory_resource(start_pfn, end_pfn - start_pfn, NULL,
+	walk_system_ram_range(start_pfn, end_pfn - start_pfn, NULL,
 				offline_isolated_pages_cb);
 }
 
@@ -731,7 +731,7 @@ check_pages_isolated(unsigned long start_pfn, unsigned long end_pfn)
 	long offlined = 0;
 	int ret;
 
-	ret = walk_memory_resource(start_pfn, end_pfn - start_pfn, &offlined,
+	ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, &offlined,
 			check_pages_isolated_cb);
 	if (ret < 0)
 		offlined = (long)ret;
-- 
cgit v1.2.3


From 26562c59fa9111ae3ea7b78045889662aac9e5ac Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register vmemmap range

Benjamin Herrenschmidt <benh@kernel.crashing.org> pointed out that vmemmap
range is not included in KCORE_RAM, KCORE_VMALLOC ....

This adds KCORE_VMEMMAP if SPARSEMEM_VMEMMAP is used.  By this, vmemmap
can be readable via /proc/kcore

Because it's not vmalloc area, vread/vwrite cannot be used.  But the range
is static against the memory layout, this patch handles vmemmap area by
the same scheme with physical memory.

This patch assumes SPARSEMEM_VMEMMAP range is not in VMALLOC range.  It's
correct now.

[akpm@linux-foundation.org: fix typo]
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/kcore.c         | 52 +++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/proc_fs.h |  1 +
 2 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 802de33d6341..78970e6f715c 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -103,7 +103,7 @@ static void free_kclist_ents(struct list_head *head)
 	}
 }
 /*
- * Replace all KCORE_RAM information with passed list.
+ * Replace all KCORE_RAM/KCORE_VMEMMAP information with passed list.
  */
 static void __kcore_update_ram(struct list_head *list)
 {
@@ -113,7 +113,8 @@ static void __kcore_update_ram(struct list_head *list)
 	write_lock(&kclist_lock);
 	if (kcore_need_update) {
 		list_for_each_entry_safe(pos, tmp, &kclist_head, list) {
-			if (pos->type == KCORE_RAM)
+			if (pos->type == KCORE_RAM
+				|| pos->type == KCORE_VMEMMAP)
 				list_move(&pos->list, &garbage);
 		}
 		list_splice_tail(list, &kclist_head);
@@ -151,6 +152,47 @@ static int kcore_update_ram(void)
 
 #else /* !CONFIG_HIGHMEM */
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/* calculate vmemmap's address from given system ram pfn and register it */
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	unsigned long pfn = __pa(ent->addr) >> PAGE_SHIFT;
+	unsigned long nr_pages = ent->size >> PAGE_SHIFT;
+	unsigned long start, end;
+	struct kcore_list *vmm, *tmp;
+
+
+	start = ((unsigned long)pfn_to_page(pfn)) & PAGE_MASK;
+	end = ((unsigned long)pfn_to_page(pfn + nr_pages)) - 1;
+	end = ALIGN(end, PAGE_SIZE);
+	/* overlap check (because we have to align page */
+	list_for_each_entry(tmp, head, list) {
+		if (tmp->type != KCORE_VMEMMAP)
+			continue;
+		if (start < tmp->addr + tmp->size)
+			if (end > tmp->addr)
+				end = tmp->addr;
+	}
+	if (start < end) {
+		vmm = kmalloc(sizeof(*vmm), GFP_KERNEL);
+		if (!vmm)
+			return 0;
+		vmm->addr = start;
+		vmm->size = end - start;
+		vmm->type = KCORE_VMEMMAP;
+		list_add_tail(&vmm->list, head);
+	}
+	return 1;
+
+}
+#else
+int get_sparsemem_vmemmap_info(struct kcore_list *ent, struct list_head *head)
+{
+	return 1;
+}
+
+#endif
+
 static int
 kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 {
@@ -181,6 +223,12 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
 
 	ent->type = KCORE_RAM;
 	list_add_tail(&ent->list, head);
+
+	if (!get_sparsemem_vmemmap_info(ent, head)) {
+		list_del(&ent->list);
+		goto free_out;
+	}
+
 	return 0;
 free_out:
 	kfree(ent);
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index bd7b840765a0..379eaed72d4b 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -82,6 +82,7 @@ enum kcore_type {
 	KCORE_TEXT,
 	KCORE_VMALLOC,
 	KCORE_RAM,
+	KCORE_VMEMMAP,
 	KCORE_OTHER,
 };
 
-- 
cgit v1.2.3


From 81ac3ad9061dd9cd490ee92f0c5316a14d77ce18 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Tue, 22 Sep 2009 16:45:49 -0700
Subject: kcore: register module area in generic way

Some archs define MODULED_VADDR/MODULES_END which is not in VMALLOC area.
This is handled only in x86-64.  This patch make it more generic.  And we
can use vread/vwrite to access the area.  Fix it.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: WANG Cong <xiyou.wangcong@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/init_64.c |  4 +---
 fs/proc/kcore.c       | 19 ++++++++++++++++++-
 include/linux/mm.h    |  8 ++++++++
 mm/vmalloc.c          |  2 +-
 4 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index d5d23cc24076..5a4398a6006b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,7 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-static struct kcore_list kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
 
 void __init mem_init(void)
 {
@@ -676,8 +676,6 @@ void __init mem_init(void)
 	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
 
 	/* Register memory areas for /proc/kcore */
-	kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN,
-			KCORE_OTHER);
 	kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
 			 VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 78970e6f715c..c6a5ec731972 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -490,7 +490,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 		if (m == NULL) {
 			if (clear_user(buffer, tsz))
 				return -EFAULT;
-		} else if (is_vmalloc_addr((void *)start)) {
+		} else if (is_vmalloc_or_module_addr((void *)start)) {
 			char * elf_buf;
 
 			elf_buf = kzalloc(tsz, GFP_KERNEL);
@@ -586,6 +586,22 @@ static void __init proc_kcore_text_init(void)
 }
 #endif
 
+#if defined(CONFIG_MODULES) && defined(MODULES_VADDR)
+/*
+ * MODULES_VADDR has no intersection with VMALLOC_ADDR.
+ */
+struct kcore_list kcore_modules;
+static void __init add_modules_range(void)
+{
+	kclist_add(&kcore_modules, (void *)MODULES_VADDR,
+			MODULES_END - MODULES_VADDR, KCORE_VMALLOC);
+}
+#else
+static void __init add_modules_range(void)
+{
+}
+#endif
+
 static int __init proc_kcore_init(void)
 {
 	proc_root_kcore = proc_create("kcore", S_IRUSR, NULL,
@@ -595,6 +611,7 @@ static int __init proc_kcore_init(void)
 	/* Store vmalloc area */
 	kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
 		VMALLOC_END - VMALLOC_START, KCORE_VMALLOC);
+	add_modules_range();
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
 	hotplug_memory_notifier(kcore_callback, 0);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5946e2ff9fe8..b6eae5e3144b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -285,6 +285,14 @@ static inline int is_vmalloc_addr(const void *x)
 	return 0;
 #endif
 }
+#ifdef CONFIG_MMU
+extern int is_vmalloc_or_module_addr(const void *x);
+#else
+static int is_vmalloc_or_module_addr(const void *x)
+{
+	return 0;
+}
+#endif
 
 static inline struct page *compound_head(struct page *page)
 {
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5535da1d6961..69511e663234 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -184,7 +184,7 @@ static int vmap_page_range(unsigned long start, unsigned long end,
 	return ret;
 }
 
-static inline int is_vmalloc_or_module_addr(const void *x)
+int is_vmalloc_or_module_addr(const void *x)
 {
 	/*
 	 * ARM, x86-64 and sparc64 put modules in a special place,
-- 
cgit v1.2.3


From a7e3108cca54c105f496919040f00df56767ec00 Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Tue, 22 Sep 2009 16:45:53 -0700
Subject: ramfs: move RAMFS_MAGIC to include/linux/magic.h

initramfs userspace likes to use this magic number.

Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: maximilian attems <max@stro.at>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/inode.c      | 4 +---
 include/linux/magic.h | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a7f0110fca4c..a6090aa1a7c1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,12 +34,10 @@
 #include <linux/ramfs.h>
 #include <linux/sched.h>
 #include <linux/parser.h>
+#include <linux/magic.h>
 #include <asm/uaccess.h>
 #include "internal.h"
 
-/* some random number */
-#define RAMFS_MAGIC	0x858458f6
-
 #define RAMFS_DEFAULT_MODE	0755
 
 static const struct super_operations ramfs_ops;
diff --git a/include/linux/magic.h b/include/linux/magic.h
index bce37786a0a5..76285e01b39e 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -12,6 +12,7 @@
 #define SYSFS_MAGIC		0x62656572
 #define SECURITYFS_MAGIC	0x73636673
 #define SELINUX_MAGIC		0xf97cff8c
+#define RAMFS_MAGIC		0x858458f6	/* some random number */
 #define TMPFS_MAGIC		0x01021994
 #define HUGETLBFS_MAGIC 	0x958458f6	/* some random number */
 #define SQUASHFS_MAGIC		0x73717368
-- 
cgit v1.2.3


From b73b255956119111dc18fa063d1e3a0bb3f06328 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 22 Sep 2009 16:46:00 -0700
Subject: spi.h: add missing kernel-doc for struct spi_master

Add missing kernel-doc notation in spi.h for struct spi_master:

Warning(include/linux/spi/spi.h:289): No description found for parameter 'mode_bits'
Warning(include/linux/spi/spi.h:289): No description found for parameter 'flags'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/spi/spi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index c47c4b4da97e..eb25cedb995b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -207,6 +207,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
  *	each slave has a chipselect signal, but it's common that not
  *	every chipselect is connected to a slave.
  * @dma_alignment: SPI controller constraint on DMA buffers alignment.
+ * @mode_bits: flags understood by this controller driver
+ * @flags: other constraints relevant to this driver
  * @setup: updates the device mode and clocking records used by a
  *	device's SPI controller; protocol code may call this.  This
  *	must fail if an unrecognized or unsupported mode is requested.
-- 
cgit v1.2.3


From 75368bf6c2876d8f33abfe77aa3864869a3893eb Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:04 -0700
Subject: spi: add support for device table matching

With this patch spi drivers can use standard spi_driver.id_table and
MODULE_DEVICE_TABLE() mechanisms to bind against the devices.  Just like
we do with I2C drivers.

This is useful when a single driver supports several variants of devices
but it is not possible to detect them in run-time (like non-JEDEC chips
probing in drivers/mtd/devices/m25p80.c), and when platform_data usage is
overkill.

This patch also makes life a lot easier on OpenFirmware platforms, since
with OF we extensively use proper device IDs in modaliases.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c               | 23 +++++++++++++++++++++++
 include/linux/mod_devicetable.h | 10 ++++++++++
 include/linux/spi/spi.h         | 10 ++++++++--
 scripts/mod/file2alias.c        | 13 +++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 70845ccd85c3..8518a6eb63f3 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -59,9 +59,32 @@ static struct device_attribute spi_dev_attrs[] = {
  * and the sysfs version makes coldplug work too.
  */
 
+static const struct spi_device_id *spi_match_id(const struct spi_device_id *id,
+						const struct spi_device *sdev)
+{
+	while (id->name[0]) {
+		if (!strcmp(sdev->modalias, id->name))
+			return id;
+		id++;
+	}
+	return NULL;
+}
+
+const struct spi_device_id *spi_get_device_id(const struct spi_device *sdev)
+{
+	const struct spi_driver *sdrv = to_spi_driver(sdev->dev.driver);
+
+	return spi_match_id(sdrv->id_table, sdev);
+}
+EXPORT_SYMBOL_GPL(spi_get_device_id);
+
 static int spi_match_device(struct device *dev, struct device_driver *drv)
 {
 	const struct spi_device	*spi = to_spi_device(dev);
+	const struct spi_driver	*sdrv = to_spi_driver(drv);
+
+	if (sdrv->id_table)
+		return !!spi_match_id(sdrv->id_table, spi);
 
 	return strcmp(spi->modalias, drv->name) == 0;
 }
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 1bf5900ffe43..b34f1ef2f1fe 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -399,6 +399,16 @@ struct i2c_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+/* spi */
+
+#define SPI_NAME_SIZE	32
+
+struct spi_device_id {
+	char name[SPI_NAME_SIZE];
+	kernel_ulong_t driver_data	/* Data private to the driver */
+			__attribute__((aligned(sizeof(kernel_ulong_t))));
+};
+
 /* dmi */
 enum dmi_field {
 	DMI_NONE,
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index eb25cedb995b..e2051f39f6a8 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -20,6 +20,7 @@
 #define __LINUX_SPI_H
 
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 
 /*
  * INTERFACES between SPI master-side drivers and SPI infrastructure.
@@ -86,7 +87,7 @@ struct spi_device {
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
-	char			modalias[32];
+	char			modalias[SPI_NAME_SIZE];
 
 	/*
 	 * likely need more hooks for more protocol options affecting how
@@ -145,6 +146,7 @@ struct spi_message;
 
 /**
  * struct spi_driver - Host side "protocol" driver
+ * @id_table: List of SPI devices supported by this driver
  * @probe: Binds this driver to the spi device.  Drivers can verify
  *	that the device is actually present, and may need to configure
  *	characteristics (such as bits_per_word) which weren't needed for
@@ -170,6 +172,7 @@ struct spi_message;
  * MMC, RTC, filesystem character device nodes, and hardware monitoring.
  */
 struct spi_driver {
+	const struct spi_device_id *id_table;
 	int			(*probe)(struct spi_device *spi);
 	int			(*remove)(struct spi_device *spi);
 	void			(*shutdown)(struct spi_device *spi);
@@ -734,7 +737,7 @@ struct spi_board_info {
 	 * controller_data goes to spi_device.controller_data,
 	 * irq is copied too
 	 */
-	char		modalias[32];
+	char		modalias[SPI_NAME_SIZE];
 	const void	*platform_data;
 	void		*controller_data;
 	int		irq;
@@ -802,4 +805,7 @@ spi_unregister_device(struct spi_device *spi)
 		device_unregister(&spi->dev);
 }
 
+extern const struct spi_device_id *
+spi_get_device_id(const struct spi_device *sdev);
+
 #endif /* __LINUX_SPI_H */
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 40e0045876ee..9d446e34519c 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,6 +657,15 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
+/* Looks like: S */
+static int do_spi_entry(const char *filename, struct spi_device_id *id,
+			char *alias)
+{
+	sprintf(alias, "%s", id->name);
+
+	return 1;
+}
+
 static const struct dmifield {
 	const char *prefix;
 	int field;
@@ -853,6 +862,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct i2c_device_id), "i2c",
 			 do_i2c_entry, mod);
+	else if (sym_is(symname, "__mod_spi_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct spi_device_id), "spi",
+			 do_spi_entry, mod);
 	else if (sym_is(symname, "__mod_dmi_device_table"))
 		do_table(symval, sym->st_size,
 			 sizeof(struct dmi_system_id), "dmi",
-- 
cgit v1.2.3


From e0626e3844e8f430fc1a4417f523a00797df7ca6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Tue, 22 Sep 2009 16:46:08 -0700
Subject: spi: prefix modalias with "spi:"

This makes it consistent with other buses (platform, i2c, vio, ...).  I'm
not sure why we use the prefixes, but there must be a reason.

This was easy enough to do it, and I did it.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Cc: David Brownell <dbrownell@users.sourceforge.net>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Ben Dooks <ben-linux@fluff.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Dmitry Torokhov <dtor@mail.ru>
Cc: Samuel Ortiz <sameo@openedhand.com>
Cc: "John W. Linville" <linville@tuxdriver.com>
Acked-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/max7301.c                    | 1 +
 drivers/gpio/mcp23s08.c                   | 1 +
 drivers/hwmon/lis3lv02d_spi.c             | 2 +-
 drivers/hwmon/max1111.c                   | 1 +
 drivers/input/touchscreen/ad7877.c        | 1 +
 drivers/input/touchscreen/ad7879.c        | 1 +
 drivers/input/touchscreen/ads7846.c       | 1 +
 drivers/leds/leds-dac124s085.c            | 1 +
 drivers/mfd/ezx-pcap.c                    | 1 +
 drivers/misc/eeprom/at25.c                | 2 +-
 drivers/mmc/host/mmc_spi.c                | 1 +
 drivers/mtd/devices/mtd_dataflash.c       | 1 +
 drivers/net/enc28j60.c                    | 1 +
 drivers/net/ks8851.c                      | 1 +
 drivers/net/wireless/libertas/if_spi.c    | 1 +
 drivers/net/wireless/p54/p54spi.c         | 1 +
 drivers/net/wireless/wl12xx/wl1251_main.c | 1 +
 drivers/rtc/rtc-ds1305.c                  | 1 +
 drivers/rtc/rtc-ds1390.c                  | 1 +
 drivers/rtc/rtc-ds3234.c                  | 1 +
 drivers/rtc/rtc-m41t94.c                  | 1 +
 drivers/rtc/rtc-max6902.c                 | 1 +
 drivers/rtc/rtc-r9701.c                   | 1 +
 drivers/rtc/rtc-rs5c348.c                 | 1 +
 drivers/serial/max3100.c                  | 1 +
 drivers/spi/spi.c                         | 3 ++-
 drivers/spi/spidev.c                      | 1 +
 drivers/spi/tle62x0.c                     | 1 +
 drivers/staging/stlc45xx/stlc45xx.c       | 1 +
 drivers/video/backlight/corgi_lcd.c       | 1 +
 drivers/video/backlight/ltv350qv.c        | 1 +
 drivers/video/backlight/tdo24m.c          | 1 +
 drivers/video/backlight/tosa_lcd.c        | 2 +-
 drivers/video/backlight/vgg2432a4.c       | 3 +--
 include/linux/mod_devicetable.h           | 1 +
 scripts/mod/file2alias.c                  | 4 ++--
 36 files changed, 38 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c
index 7b82eaae2621..480956f1ca50 100644
--- a/drivers/gpio/max7301.c
+++ b/drivers/gpio/max7301.c
@@ -339,3 +339,4 @@ module_exit(max7301_exit);
 MODULE_AUTHOR("Juergen Beisert");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("MAX7301 SPI based GPIO-Expander");
+MODULE_ALIAS("spi:" DRIVER_NAME);
diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c
index f6fae0e50e65..c6c7aa15f5da 100644
--- a/drivers/gpio/mcp23s08.c
+++ b/drivers/gpio/mcp23s08.c
@@ -433,3 +433,4 @@ static void __exit mcp23s08_exit(void)
 module_exit(mcp23s08_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mcp23s08");
diff --git a/drivers/hwmon/lis3lv02d_spi.c b/drivers/hwmon/lis3lv02d_spi.c
index 82ebca5a699c..ecd739534f6a 100644
--- a/drivers/hwmon/lis3lv02d_spi.c
+++ b/drivers/hwmon/lis3lv02d_spi.c
@@ -139,4 +139,4 @@ module_exit(lis302dl_exit);
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("lis3lv02d SPI glue layer");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/hwmon/max1111.c b/drivers/hwmon/max1111.c
index bfaa665ccf32..9ac497271adf 100644
--- a/drivers/hwmon/max1111.c
+++ b/drivers/hwmon/max1111.c
@@ -242,3 +242,4 @@ module_exit(max1111_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("MAX1111 ADC Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max1111");
diff --git a/drivers/input/touchscreen/ad7877.c b/drivers/input/touchscreen/ad7877.c
index ecaeb7e8e75e..eb83939c705e 100644
--- a/drivers/input/touchscreen/ad7877.c
+++ b/drivers/input/touchscreen/ad7877.c
@@ -842,3 +842,4 @@ module_exit(ad7877_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7877 touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7877");
diff --git a/drivers/input/touchscreen/ad7879.c b/drivers/input/touchscreen/ad7879.c
index 5d8a70398807..19b4db7e974d 100644
--- a/drivers/input/touchscreen/ad7879.c
+++ b/drivers/input/touchscreen/ad7879.c
@@ -779,3 +779,4 @@ module_exit(ad7879_exit);
 MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
 MODULE_DESCRIPTION("AD7879(-1) touchscreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ad7879");
diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c
index ba9d38c3f412..09c810999b92 100644
--- a/drivers/input/touchscreen/ads7846.c
+++ b/drivers/input/touchscreen/ads7846.c
@@ -1256,3 +1256,4 @@ module_exit(ads7846_exit);
 
 MODULE_DESCRIPTION("ADS7846 TouchScreen Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ads7846");
diff --git a/drivers/leds/leds-dac124s085.c b/drivers/leds/leds-dac124s085.c
index 098d9aae7259..2913d76ad3d2 100644
--- a/drivers/leds/leds-dac124s085.c
+++ b/drivers/leds/leds-dac124s085.c
@@ -148,3 +148,4 @@ module_exit(dac124s085_leds_exit);
 MODULE_AUTHOR("Guennadi Liakhovetski <lg@denx.de>");
 MODULE_DESCRIPTION("DAC124S085 LED driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:dac124s085");
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index 016be4938e4c..876288917976 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -548,3 +548,4 @@ module_exit(ezx_pcap_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Daniel Ribeiro / Harald Welte");
 MODULE_DESCRIPTION("Motorola PCAP2 ASIC Driver");
+MODULE_ALIAS("spi:ezx-pcap");
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 2e535a0ccd5e..d902d81dde39 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -417,4 +417,4 @@ module_exit(at25_exit);
 MODULE_DESCRIPTION("Driver for most SPI EEPROMs");
 MODULE_AUTHOR("David Brownell");
 MODULE_LICENSE("GPL");
-
+MODULE_ALIAS("spi:at25");
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index a461017ce5ce..d55fe4fb7935 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -1562,3 +1562,4 @@ MODULE_AUTHOR("Mike Lavender, David Brownell, "
 		"Hans-Peter Nilsson, Jan Nikitenko");
 MODULE_DESCRIPTION("SPI SD/MMC host driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:mmc_spi");
diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c
index 43976aa4dbb1..211c27acd01e 100644
--- a/drivers/mtd/devices/mtd_dataflash.c
+++ b/drivers/mtd/devices/mtd_dataflash.c
@@ -966,3 +966,4 @@ module_exit(dataflash_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andrew Victor, David Brownell");
 MODULE_DESCRIPTION("MTD DataFlash driver");
+MODULE_ALIAS("spi:mtd_dataflash");
diff --git a/drivers/net/enc28j60.c b/drivers/net/enc28j60.c
index 117fc6c12e34..66813c91a720 100644
--- a/drivers/net/enc28j60.c
+++ b/drivers/net/enc28j60.c
@@ -1666,3 +1666,4 @@ MODULE_AUTHOR("Claudio Lanconelli <lanconelli.claudio@eptar.com>");
 MODULE_LICENSE("GPL");
 module_param_named(debug, debug.msg_enable, int, 0);
 MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., ffff=all)");
+MODULE_ALIAS("spi:" DRV_NAME);
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 547ac7c7479c..237835864357 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -1321,3 +1321,4 @@ MODULE_LICENSE("GPL");
 
 module_param_named(message, msg_enable, int, 0);
 MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
+MODULE_ALIAS("spi:ks8851");
diff --git a/drivers/net/wireless/libertas/if_spi.c b/drivers/net/wireless/libertas/if_spi.c
index 446e327180f8..cb8be8d7abc1 100644
--- a/drivers/net/wireless/libertas/if_spi.c
+++ b/drivers/net/wireless/libertas/if_spi.c
@@ -1222,3 +1222,4 @@ MODULE_DESCRIPTION("Libertas SPI WLAN Driver");
 MODULE_AUTHOR("Andrey Yurovsky <andrey@cozybit.com>, "
 	      "Colin McCabe <colin@cozybit.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:libertas_spi");
diff --git a/drivers/net/wireless/p54/p54spi.c b/drivers/net/wireless/p54/p54spi.c
index 05458d9249ce..afd26bf06649 100644
--- a/drivers/net/wireless/p54/p54spi.c
+++ b/drivers/net/wireless/p54/p54spi.c
@@ -731,3 +731,4 @@ module_exit(p54spi_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Christian Lamparter <chunkeey@web.de>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 5809ef5b18f8..1103256ad989 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -1426,3 +1426,4 @@ EXPORT_SYMBOL_GPL(wl1251_free_hw);
 MODULE_DESCRIPTION("TI wl1251 Wireles LAN Driver Core");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:wl12xx");
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 8f410e59d9f5..2736b11a1b1e 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -841,3 +841,4 @@ module_exit(ds1305_exit);
 
 MODULE_DESCRIPTION("RTC driver for DS1305 and DS1306 chips");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1305");
diff --git a/drivers/rtc/rtc-ds1390.c b/drivers/rtc/rtc-ds1390.c
index e01b955db077..cdb705057091 100644
--- a/drivers/rtc/rtc-ds1390.c
+++ b/drivers/rtc/rtc-ds1390.c
@@ -189,3 +189,4 @@ module_exit(ds1390_exit);
 MODULE_DESCRIPTION("Dallas/Maxim DS1390/93/94 SPI RTC driver");
 MODULE_AUTHOR("Mark Jackson <mpfj@mimc.co.uk>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-ds1390");
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index c51589ede5b7..a774ca35b5f7 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -188,3 +188,4 @@ module_exit(ds3234_exit);
 MODULE_DESCRIPTION("DS3234 SPI RTC driver");
 MODULE_AUTHOR("Dennis Aberilla <denzzzhome@yahoo.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ds3234");
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index c3a18c58daf6..c8c97a4169d4 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -171,3 +171,4 @@ module_exit(m41t94_exit);
 MODULE_AUTHOR("Kim B. Heino <Kim.Heino@bluegiga.com>");
 MODULE_DESCRIPTION("Driver for ST M41T94 SPI RTC");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-m41t94");
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 36a8ea9ed8ba..657403ebd54a 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -175,3 +175,4 @@ module_exit(max6902_exit);
 MODULE_DESCRIPTION ("max6902 spi RTC driver");
 MODULE_AUTHOR ("Raphael Assenat");
 MODULE_LICENSE ("GPL");
+MODULE_ALIAS("spi:rtc-max6902");
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 42028f233bef..9beba49c3c5b 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -174,3 +174,4 @@ module_exit(r9701_exit);
 MODULE_DESCRIPTION("r9701 spi RTC driver");
 MODULE_AUTHOR("Magnus Damm <damm@opensource.se>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:rtc-r9701");
diff --git a/drivers/rtc/rtc-rs5c348.c b/drivers/rtc/rtc-rs5c348.c
index dd1e2bc7a472..2099037cb3ea 100644
--- a/drivers/rtc/rtc-rs5c348.c
+++ b/drivers/rtc/rtc-rs5c348.c
@@ -251,3 +251,4 @@ MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
 MODULE_DESCRIPTION("Ricoh RS5C348 RTC driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
+MODULE_ALIAS("spi:rtc-rs5c348");
diff --git a/drivers/serial/max3100.c b/drivers/serial/max3100.c
index 75ab00631c41..3c30c56aa2e1 100644
--- a/drivers/serial/max3100.c
+++ b/drivers/serial/max3100.c
@@ -925,3 +925,4 @@ module_exit(max3100_exit);
 MODULE_DESCRIPTION("MAX3100 driver");
 MODULE_AUTHOR("Christian Pellegrin <chripell@evolware.org>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:max3100");
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 8518a6eb63f3..49e84860c8da 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/cache.h>
 #include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
 #include <linux/spi/spi.h>
 
 
@@ -93,7 +94,7 @@ static int spi_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
 	const struct spi_device		*spi = to_spi_device(dev);
 
-	add_uevent_var(env, "MODALIAS=%s", spi->modalias);
+	add_uevent_var(env, "MODALIAS=%s%s", SPI_MODULE_PREFIX, spi->modalias);
 	return 0;
 }
 
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 606e7a40a8da..f921bd1109e1 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -688,3 +688,4 @@ module_exit(spidev_exit);
 MODULE_AUTHOR("Andrea Paterniani, <a.paterniani@swapp-eng.it>");
 MODULE_DESCRIPTION("User mode SPI device interface");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:spidev");
diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c
index 455991fbe28f..bf9540f5fb98 100644
--- a/drivers/spi/tle62x0.c
+++ b/drivers/spi/tle62x0.c
@@ -329,3 +329,4 @@ module_exit(tle62x0_exit);
 MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
 MODULE_DESCRIPTION("TLE62x0 SPI driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("spi:tle62x0");
diff --git a/drivers/staging/stlc45xx/stlc45xx.c b/drivers/staging/stlc45xx/stlc45xx.c
index 12d414deaad6..be99eb33d817 100644
--- a/drivers/staging/stlc45xx/stlc45xx.c
+++ b/drivers/staging/stlc45xx/stlc45xx.c
@@ -2591,3 +2591,4 @@ module_exit(stlc45xx_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Kalle Valo <kalle.valo@nokia.com>");
+MODULE_ALIAS("spi:cx3110x");
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index f8a4bb20f41a..2211a852af9c 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -639,3 +639,4 @@ module_exit(corgi_lcd_exit);
 MODULE_DESCRIPTION("LCD and backlight driver for SHARP C7x0/Cxx00");
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:corgi-lcd");
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index 2eb206bf73e6..4631ca8fa4a4 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -328,3 +328,4 @@ module_exit(ltv350qv_exit);
 MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
 MODULE_DESCRIPTION("Samsung LTV350QV LCD Driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:ltv350qv");
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 51422fc4f606..bbfb502add67 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -472,3 +472,4 @@ module_exit(tdo24m_exit);
 MODULE_AUTHOR("Eric Miao <eric.miao@marvell.com>");
 MODULE_DESCRIPTION("Driver for Toppoly TDO24M LCD Panel");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("spi:tdo24m");
diff --git a/drivers/video/backlight/tosa_lcd.c b/drivers/video/backlight/tosa_lcd.c
index b7fbc75a62fc..50ec17dfc517 100644
--- a/drivers/video/backlight/tosa_lcd.c
+++ b/drivers/video/backlight/tosa_lcd.c
@@ -300,4 +300,4 @@ module_exit(tosa_lcd_exit);
 MODULE_AUTHOR("Dmitry Baryshkov");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("LCD/Backlight control for Sharp SL-6000 PDA");
-
+MODULE_ALIAS("spi:tosa-lcd");
diff --git a/drivers/video/backlight/vgg2432a4.c b/drivers/video/backlight/vgg2432a4.c
index 8e653b8a6f17..b49063c831e7 100644
--- a/drivers/video/backlight/vgg2432a4.c
+++ b/drivers/video/backlight/vgg2432a4.c
@@ -280,5 +280,4 @@ module_exit(vgg2432a4_exit);
 MODULE_AUTHOR("Ben Dooks <ben-linux@fluff.org>");
 MODULE_DESCRIPTION("VGG2432A4 LCD Driver");
 MODULE_LICENSE("GPL v2");
-
-
+MODULE_ALIAS("spi:VGG2432A4");
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b34f1ef2f1fe..f58e9d836f32 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -402,6 +402,7 @@ struct i2c_device_id {
 /* spi */
 
 #define SPI_NAME_SIZE	32
+#define SPI_MODULE_PREFIX "spi:"
 
 struct spi_device_id {
 	char name[SPI_NAME_SIZE];
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 9d446e34519c..62a9025cdcc7 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -657,11 +657,11 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *id,
 	return 1;
 }
 
-/* Looks like: S */
+/* Looks like: spi:S */
 static int do_spi_entry(const char *filename, struct spi_device_id *id,
 			char *alias)
 {
-	sprintf(alias, "%s", id->name);
+	sprintf(alias, SPI_MODULE_PREFIX "%s", id->name);
 
 	return 1;
 }
-- 
cgit v1.2.3


From 568d0697f42771425ae9f1e9a3db769fef7e10b6 Mon Sep 17 00:00:00 2001
From: David Brownell <dbrownell@users.sourceforge.net>
Date: Tue, 22 Sep 2009 16:46:18 -0700
Subject: spi: handle TX-only/RX-only

Support two new half-duplex SPI implementation restrictions, for links
that talk to TX-only or RX-only devices.  (Existing half-duplex flavors
support both transfer directions, just not at the same time.)

Move spi_async() into the spi.c core, and stop inlining it.  Then make
that function perform error checks and reject messages that demand more
than the underlying controller can support.

Based on a patch from Marek Szyprowski which did this only for the
bitbanged GPIO driver.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi.c       | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h | 39 +++-----------------------------
 2 files changed, 62 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 49e84860c8da..b76f2468a84a 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -663,6 +663,65 @@ int spi_setup(struct spi_device *spi)
 }
 EXPORT_SYMBOL_GPL(spi_setup);
 
+/**
+ * spi_async - asynchronous SPI transfer
+ * @spi: device with which data will be exchanged
+ * @message: describes the data transfers, including completion callback
+ * Context: any (irqs may be blocked, etc)
+ *
+ * This call may be used in_irq and other contexts which can't sleep,
+ * as well as from task contexts which can sleep.
+ *
+ * The completion callback is invoked in a context which can't sleep.
+ * Before that invocation, the value of message->status is undefined.
+ * When the callback is issued, message->status holds either zero (to
+ * indicate complete success) or a negative error code.  After that
+ * callback returns, the driver which issued the transfer request may
+ * deallocate the associated memory; it's no longer in use by any SPI
+ * core or controller driver code.
+ *
+ * Note that although all messages to a spi_device are handled in
+ * FIFO order, messages may go to different devices in other orders.
+ * Some device might be higher priority, or have various "hard" access
+ * time requirements, for example.
+ *
+ * On detection of any fault during the transfer, processing of
+ * the entire message is aborted, and the device is deselected.
+ * Until returning from the associated message completion callback,
+ * no other spi_message queued to that device will be processed.
+ * (This rule applies equally to all the synchronous transfer calls,
+ * which are wrappers around this core asynchronous primitive.)
+ */
+int spi_async(struct spi_device *spi, struct spi_message *message)
+{
+	struct spi_master *master = spi->master;
+
+	/* Half-duplex links include original MicroWire, and ones with
+	 * only one data pin like SPI_3WIRE (switches direction) or where
+	 * either MOSI or MISO is missing.  They can also be caused by
+	 * software limitations.
+	 */
+	if ((master->flags & SPI_MASTER_HALF_DUPLEX)
+			|| (spi->mode & SPI_3WIRE)) {
+		struct spi_transfer *xfer;
+		unsigned flags = master->flags;
+
+		list_for_each_entry(xfer, &message->transfers, transfer_list) {
+			if (xfer->rx_buf && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_TX) && xfer->tx_buf)
+				return -EINVAL;
+			if ((flags & SPI_MASTER_NO_RX) && xfer->rx_buf)
+				return -EINVAL;
+		}
+	}
+
+	message->spi = spi;
+	message->status = -EINPROGRESS;
+	return master->transfer(spi, message);
+}
+EXPORT_SYMBOL_GPL(spi_async);
+
 
 /*-------------------------------------------------------------------------*/
 
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index e2051f39f6a8..97b60b37f445 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -258,6 +258,8 @@ struct spi_master {
 	/* other constraints relevant to this driver */
 	u16			flags;
 #define SPI_MASTER_HALF_DUPLEX	BIT(0)		/* can't do full duplex */
+#define SPI_MASTER_NO_RX	BIT(1)		/* can't do buffer read */
+#define SPI_MASTER_NO_TX	BIT(2)		/* can't do buffer write */
 
 	/* Setup mode and clock, etc (spi driver may call many times).
 	 *
@@ -538,42 +540,7 @@ static inline void spi_message_free(struct spi_message *m)
 }
 
 extern int spi_setup(struct spi_device *spi);
-
-/**
- * spi_async - asynchronous SPI transfer
- * @spi: device with which data will be exchanged
- * @message: describes the data transfers, including completion callback
- * Context: any (irqs may be blocked, etc)
- *
- * This call may be used in_irq and other contexts which can't sleep,
- * as well as from task contexts which can sleep.
- *
- * The completion callback is invoked in a context which can't sleep.
- * Before that invocation, the value of message->status is undefined.
- * When the callback is issued, message->status holds either zero (to
- * indicate complete success) or a negative error code.  After that
- * callback returns, the driver which issued the transfer request may
- * deallocate the associated memory; it's no longer in use by any SPI
- * core or controller driver code.
- *
- * Note that although all messages to a spi_device are handled in
- * FIFO order, messages may go to different devices in other orders.
- * Some device might be higher priority, or have various "hard" access
- * time requirements, for example.
- *
- * On detection of any fault during the transfer, processing of
- * the entire message is aborted, and the device is deselected.
- * Until returning from the associated message completion callback,
- * no other spi_message queued to that device will be processed.
- * (This rule applies equally to all the synchronous transfer calls,
- * which are wrappers around this core asynchronous primitive.)
- */
-static inline int
-spi_async(struct spi_device *spi, struct spi_message *message)
-{
-	message->spi = spi;
-	return spi->master->transfer(spi, message);
-}
+extern int spi_async(struct spi_device *spi, struct spi_message *message);
 
 /*---------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From a4177ee7f1a83eecb1d75e85d32664b023ef65e9 Mon Sep 17 00:00:00 2001
From: Jani Nikula <ext-jani.1.nikula@nokia.com>
Date: Tue, 22 Sep 2009 16:46:33 -0700
Subject: gpiolib: allow exported GPIO nodes to be named using sysfs links

Commit 926b663ce8215ba448960e1ff6e58b67a2c3b99b (gpiolib: allow GPIOs to
be named) already provides naming on the chip level. This patch provides
more flexibility by allowing multiple names where ever in sysfs on a per
GPIO basis.

Adapted from David Brownell's comments on a similar concept:
http://lkml.org/lkml/2009/4/20/203.

[randy.dunlap@oracle.com: fix build for CONFIG_GENERIC_GPIO=n]
Signed-off-by: Jani Nikula <ext-jani.1.nikula@nokia.com>
Acked-by: David Brownell <david-b@pacbell.net>
Cc: Daniel Silverstone <dsilvers@simtec.co.uk>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt     | 10 ++++++++++
 drivers/gpio/gpiolib.c     | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/asm-generic/gpio.h |  8 ++++++++
 include/linux/gpio.h       | 11 +++++++++++
 4 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index e4b6985044a2..566edaa56a53 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -555,6 +555,11 @@ requested using gpio_request():
 	/* reverse gpio_export() */
 	void gpio_unexport();
 
+	/* create a sysfs link to an exported GPIO node */
+	int gpio_export_link(struct device *dev, const char *name,
+		unsigned gpio)
+
+
 After a kernel driver requests a GPIO, it may only be made available in
 the sysfs interface by gpio_export().  The driver can control whether the
 signal direction may change.  This helps drivers prevent userspace code
@@ -563,3 +568,8 @@ from accidentally clobbering important system state.
 This explicit exporting can help with debugging (by making some kinds
 of experiments easier), or can provide an always-there interface that's
 suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpio_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node.  Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 51a8d4103be5..aef6b3d8e2cf 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -504,6 +504,51 @@ static int match_export(struct device *dev, void *data)
 	return dev_get_drvdata(dev) == data;
 }
 
+/**
+ * gpio_export_link - create a sysfs link to an exported GPIO node
+ * @dev: device under which to create symlink
+ * @name: name of the symlink
+ * @gpio: gpio to create symlink to, already exported
+ *
+ * Set up a symlink from /sys/.../dev/name to /sys/class/gpio/gpioN
+ * node. Caller is responsible for unlinking.
+ *
+ * Returns zero on success, else an error.
+ */
+int gpio_export_link(struct device *dev, const char *name, unsigned gpio)
+{
+	struct gpio_desc	*desc;
+	int			status = -EINVAL;
+
+	if (!gpio_is_valid(gpio))
+		goto done;
+
+	mutex_lock(&sysfs_lock);
+
+	desc = &gpio_desc[gpio];
+
+	if (test_bit(FLAG_EXPORT, &desc->flags)) {
+		struct device *tdev;
+
+		tdev = class_find_device(&gpio_class, NULL, desc, match_export);
+		if (tdev != NULL) {
+			status = sysfs_create_link(&dev->kobj, &tdev->kobj,
+						name);
+		} else {
+			status = -ENODEV;
+		}
+	}
+
+	mutex_unlock(&sysfs_lock);
+
+done:
+	if (status)
+		pr_debug("%s: gpio%d status %d\n", __func__, gpio, status);
+
+	return status;
+}
+EXPORT_SYMBOL_GPL(gpio_export_link);
+
 /**
  * gpio_unexport - reverse effect of gpio_export()
  * @gpio: gpio to make unavailable
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index d6c379dc64fa..9cca3785cab8 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -141,6 +141,8 @@ extern int __gpio_to_irq(unsigned gpio);
  * but more typically is configured entirely from userspace.
  */
 extern int gpio_export(unsigned gpio, bool direction_may_change);
+extern int gpio_export_link(struct device *dev, const char *name,
+			unsigned gpio);
 extern void gpio_unexport(unsigned gpio);
 
 #endif	/* CONFIG_GPIO_SYSFS */
@@ -185,6 +187,12 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -ENOSYS;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	return -ENOSYS;
+}
+
 static inline void gpio_unexport(unsigned gpio)
 {
 }
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index e10c49a5b96e..059bd189d35d 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -12,6 +12,8 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 
+struct device;
+
 /*
  * Some platforms don't support the GPIO programming interface.
  *
@@ -89,6 +91,15 @@ static inline int gpio_export(unsigned gpio, bool direction_may_change)
 	return -EINVAL;
 }
 
+static inline int gpio_export_link(struct device *dev, const char *name,
+				unsigned gpio)
+{
+	/* GPIO can never have been exported */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+
 static inline void gpio_unexport(unsigned gpio)
 {
 	/* GPIO can never have been exported */
-- 
cgit v1.2.3


From 1e5db00687c1ebd93a902caf1d3694209013cb3e Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Date: Tue, 22 Sep 2009 16:46:34 -0700
Subject: gpio: add MC33880 driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A GPIO driver for the Freescale MC33880 High/Low side switch

Signed-off-by: Richard Röjfors <richard.rojfors.ext@mocean-labs.com>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   7 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/mc33880.c      | 196 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/mc33880.h |  10 +++
 4 files changed, 214 insertions(+)
 create mode 100644 drivers/gpio/mc33880.c
 create mode 100644 include/linux/spi/mc33880.h

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 6b4c484a699a..223e7c92fd54 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -195,4 +195,11 @@ config GPIO_MCP23S08
 	  SPI driver for Microchip MCP23S08 I/O expander.  This provides
 	  a GPIO interface supporting inputs and outputs.
 
+config GPIO_MC33880
+	tristate "Freescale MC33880 high-side/low-side switch"
+	depends on SPI_MASTER
+	help
+	  SPI driver for Freescale MC33880 high-side/low-side switch.
+	  This provides GPIO interface supporting inputs and outputs.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index ea7c745f26a8..f35de16c7c49 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
+obj-$(CONFIG_GPIO_MC33880)	+= mc33880.o
 obj-$(CONFIG_GPIO_MCP23S08)	+= mcp23s08.o
 obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
diff --git a/drivers/gpio/mc33880.c b/drivers/gpio/mc33880.c
new file mode 100644
index 000000000000..e7d01bd8fdb3
--- /dev/null
+++ b/drivers/gpio/mc33880.c
@@ -0,0 +1,196 @@
+/*
+ * mc33880.c MC33880 high-side/low-side switch GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Freescale MC33880 high-side/low-side switch
+ */
+
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/mc33880.h>
+#include <linux/gpio.h>
+
+#define DRIVER_NAME "mc33880"
+
+/*
+ * Pin configurations, see MAX7301 datasheet page 6
+ */
+#define PIN_CONFIG_MASK 0x03
+#define PIN_CONFIG_IN_PULLUP 0x03
+#define PIN_CONFIG_IN_WO_PULLUP 0x02
+#define PIN_CONFIG_OUT 0x01
+
+#define PIN_NUMBER 8
+
+
+/*
+ * Some registers must be read back to modify.
+ * To save time we cache them here in memory
+ */
+struct mc33880 {
+	struct mutex	lock;	/* protect from simultanous accesses */
+	u8		port_config;
+	struct gpio_chip chip;
+	struct spi_device *spi;
+};
+
+static int mc33880_write_config(struct mc33880 *mc)
+{
+	return spi_write(mc->spi, &mc->port_config, sizeof(mc->port_config));
+}
+
+
+static int __mc33880_set(struct mc33880 *mc, unsigned offset, int value)
+{
+	if (value)
+		mc->port_config |= 1 << offset;
+	else
+		mc->port_config &= ~(1 << offset);
+
+	return mc33880_write_config(mc);
+}
+
+
+static void mc33880_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+	struct mc33880 *mc = container_of(chip, struct mc33880, chip);
+
+	mutex_lock(&mc->lock);
+
+	__mc33880_set(mc, offset, value);
+
+	mutex_unlock(&mc->lock);
+}
+
+static int __devinit mc33880_probe(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	struct mc33880_platform_data *pdata;
+	int ret;
+
+	pdata = spi->dev.platform_data;
+	if (!pdata || !pdata->base) {
+		dev_dbg(&spi->dev, "incorrect or missing platform data\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * bits_per_word cannot be configured in platform data
+	 */
+	spi->bits_per_word = 8;
+
+	ret = spi_setup(spi);
+	if (ret < 0)
+		return ret;
+
+	mc = kzalloc(sizeof(struct mc33880), GFP_KERNEL);
+	if (!mc)
+		return -ENOMEM;
+
+	mutex_init(&mc->lock);
+
+	dev_set_drvdata(&spi->dev, mc);
+
+	mc->spi = spi;
+
+	mc->chip.label = DRIVER_NAME,
+	mc->chip.set = mc33880_set;
+	mc->chip.base = pdata->base;
+	mc->chip.ngpio = PIN_NUMBER;
+	mc->chip.can_sleep = 1;
+	mc->chip.dev = &spi->dev;
+	mc->chip.owner = THIS_MODULE;
+
+	mc->port_config = 0x00;
+	/* write twice, because during initialisation the first setting
+	 * is just for testing SPI communication, and the second is the
+	 * "real" configuration
+	 */
+	ret = mc33880_write_config(mc);
+	mc->port_config = 0x00;
+	if (!ret)
+		ret = mc33880_write_config(mc);
+
+	if (ret) {
+		printk(KERN_ERR "Failed writing to " DRIVER_NAME ": %d\n", ret);
+		goto exit_destroy;
+	}
+
+	ret = gpiochip_add(&mc->chip);
+	if (ret)
+		goto exit_destroy;
+
+	return ret;
+
+exit_destroy:
+	dev_set_drvdata(&spi->dev, NULL);
+	mutex_destroy(&mc->lock);
+	kfree(mc);
+	return ret;
+}
+
+static int mc33880_remove(struct spi_device *spi)
+{
+	struct mc33880 *mc;
+	int ret;
+
+	mc = dev_get_drvdata(&spi->dev);
+	if (mc == NULL)
+		return -ENODEV;
+
+	dev_set_drvdata(&spi->dev, NULL);
+
+	ret = gpiochip_remove(&mc->chip);
+	if (!ret) {
+		mutex_destroy(&mc->lock);
+		kfree(mc);
+	} else
+		dev_err(&spi->dev, "Failed to remove the GPIO controller: %d\n",
+			ret);
+
+	return ret;
+}
+
+static struct spi_driver mc33880_driver = {
+	.driver = {
+		.name		= DRIVER_NAME,
+		.owner		= THIS_MODULE,
+	},
+	.probe		= mc33880_probe,
+	.remove		= __devexit_p(mc33880_remove),
+};
+
+static int __init mc33880_init(void)
+{
+	return spi_register_driver(&mc33880_driver);
+}
+/* register after spi postcore initcall and before
+ * subsys initcalls that may rely on these GPIOs
+ */
+subsys_initcall(mc33880_init);
+
+static void __exit mc33880_exit(void)
+{
+	spi_unregister_driver(&mc33880_driver);
+}
+module_exit(mc33880_exit);
+
+MODULE_AUTHOR("Mocean Laboratories <info@mocean-labs.com>");
+MODULE_LICENSE("GPL v2");
+
diff --git a/include/linux/spi/mc33880.h b/include/linux/spi/mc33880.h
new file mode 100644
index 000000000000..82ffccd6fbe5
--- /dev/null
+++ b/include/linux/spi/mc33880.h
@@ -0,0 +1,10 @@
+#ifndef LINUX_SPI_MC33880_H
+#define LINUX_SPI_MC33880_H
+
+struct mc33880_platform_data {
+	/* number assigned to the first GPIO */
+	unsigned	base;
+};
+
+#endif
+
-- 
cgit v1.2.3


From 4cf8e53b3b55fa2f9b2a6b9c3e557b649adf7c6a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marek.vasut@gmail.com>
Date: Tue, 22 Sep 2009 16:46:35 -0700
Subject: mfd/gpio: add a GPIO interface to the UCB1400 MFD chip driver via
 gpiolib

Cc: Eric Miao <eric.y.miao@gmail.com>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: David Brownell <david-b@pacbell.net>
Cc: Samuel Ortiz <sameo@openedhand.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |  12 +++++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/ucb1400_gpio.c | 125 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mfd/ucb1400_core.c  |  31 +++++++++--
 include/linux/ucb1400.h     |  19 +++++++
 5 files changed, 184 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpio/ucb1400_gpio.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 223e7c92fd54..ccca08e0b595 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -202,4 +202,16 @@ config GPIO_MC33880
 	  SPI driver for Freescale MC33880 high-side/low-side switch.
 	  This provides GPIO interface supporting inputs and outputs.
 
+comment "AC97 GPIO expanders:"
+
+config GPIO_UCB1400
+	bool "Philips UCB1400 GPIO"
+	depends on UCB1400_CORE
+	help
+	  This enables support for the Philips UCB1400 GPIO pins.
+	  The UCB1400 is an AC97 audio codec.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ucb1400_gpio.
+
 endif
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index f35de16c7c49..c1ac034698c5 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_GPIO_PCA953X)	+= pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)	+= pcf857x.o
 obj-$(CONFIG_GPIO_PL061)	+= pl061.o
 obj-$(CONFIG_GPIO_TWL4030)	+= twl4030-gpio.o
+obj-$(CONFIG_GPIO_UCB1400)	+= ucb1400_gpio.o
 obj-$(CONFIG_GPIO_XILINX)	+= xilinx_gpio.o
 obj-$(CONFIG_GPIO_BT8XX)	+= bt8xxgpio.o
 obj-$(CONFIG_GPIO_VR41XX)	+= vr41xx_giu.o
diff --git a/drivers/gpio/ucb1400_gpio.c b/drivers/gpio/ucb1400_gpio.c
new file mode 100644
index 000000000000..50e6bd1392ce
--- /dev/null
+++ b/drivers/gpio/ucb1400_gpio.c
@@ -0,0 +1,125 @@
+/*
+ * Philips UCB1400 GPIO driver
+ *
+ * Author: Marek Vasut <marek.vasut@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/ucb1400.h>
+
+struct ucb1400_gpio_data *ucbdata;
+
+static int ucb1400_gpio_dir_in(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 0);
+	return 0;
+}
+
+static int ucb1400_gpio_dir_out(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_direction(gpio->ac97, off, 1);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+	return 0;
+}
+
+static int ucb1400_gpio_get(struct gpio_chip *gc, unsigned off)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	return ucb1400_gpio_get_value(gpio->ac97, off);
+}
+
+static void ucb1400_gpio_set(struct gpio_chip *gc, unsigned off, int val)
+{
+	struct ucb1400_gpio *gpio;
+	gpio = container_of(gc, struct ucb1400_gpio, gc);
+	ucb1400_gpio_set_value(gpio->ac97, off, val);
+}
+
+static int ucb1400_gpio_probe(struct platform_device *dev)
+{
+	struct ucb1400_gpio *ucb = dev->dev.platform_data;
+	int err = 0;
+
+	if (!(ucbdata && ucbdata->gpio_offset)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	platform_set_drvdata(dev, ucb);
+
+	ucb->gc.label = "ucb1400_gpio";
+	ucb->gc.base = ucbdata->gpio_offset;
+	ucb->gc.ngpio = 10;
+	ucb->gc.owner = THIS_MODULE;
+
+	ucb->gc.direction_input = ucb1400_gpio_dir_in;
+	ucb->gc.direction_output = ucb1400_gpio_dir_out;
+	ucb->gc.get = ucb1400_gpio_get;
+	ucb->gc.set = ucb1400_gpio_set;
+	ucb->gc.can_sleep = 1;
+
+	err = gpiochip_add(&ucb->gc);
+	if (err)
+		goto err;
+
+	if (ucbdata && ucbdata->gpio_setup)
+		err = ucbdata->gpio_setup(&dev->dev, ucb->gc.ngpio);
+
+err:
+	return err;
+
+}
+
+static int ucb1400_gpio_remove(struct platform_device *dev)
+{
+	int err = 0;
+	struct ucb1400_gpio *ucb = platform_get_drvdata(dev);
+
+	if (ucbdata && ucbdata->gpio_teardown) {
+		err = ucbdata->gpio_teardown(&dev->dev, ucb->gc.ngpio);
+		if (err)
+			return err;
+	}
+
+	err = gpiochip_remove(&ucb->gc);
+	return err;
+}
+
+static struct platform_driver ucb1400_gpio_driver = {
+	.probe	= ucb1400_gpio_probe,
+	.remove	= ucb1400_gpio_remove,
+	.driver	= {
+		.name	= "ucb1400_gpio"
+	},
+};
+
+static int __init ucb1400_gpio_init(void)
+{
+	return platform_driver_register(&ucb1400_gpio_driver);
+}
+
+static void __exit ucb1400_gpio_exit(void)
+{
+	platform_driver_unregister(&ucb1400_gpio_driver);
+}
+
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data)
+{
+	ucbdata = data;
+}
+
+module_init(ucb1400_gpio_init);
+module_exit(ucb1400_gpio_exit);
+
+MODULE_DESCRIPTION("Philips UCB1400 GPIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 78c2135c5de6..2afc08006e6d 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -48,9 +48,11 @@ static int ucb1400_core_probe(struct device *dev)
 	int err;
 	struct ucb1400 *ucb;
 	struct ucb1400_ts ucb_ts;
+	struct ucb1400_gpio ucb_gpio;
 	struct snd_ac97 *ac97;
 
 	memset(&ucb_ts, 0, sizeof(ucb_ts));
+	memset(&ucb_gpio, 0, sizeof(ucb_gpio));
 
 	ucb = kzalloc(sizeof(struct ucb1400), GFP_KERNEL);
 	if (!ucb) {
@@ -68,25 +70,44 @@ static int ucb1400_core_probe(struct device *dev)
 		goto err0;
 	}
 
+	/* GPIO */
+	ucb_gpio.ac97 = ac97;
+	ucb->ucb1400_gpio = platform_device_alloc("ucb1400_gpio", -1);
+	if (!ucb->ucb1400_gpio) {
+		err = -ENOMEM;
+		goto err0;
+	}
+	err = platform_device_add_data(ucb->ucb1400_gpio, &ucb_gpio,
+					sizeof(ucb_gpio));
+	if (err)
+		goto err1;
+	err = platform_device_add(ucb->ucb1400_gpio);
+	if (err)
+		goto err1;
+
 	/* TOUCHSCREEN */
 	ucb_ts.ac97 = ac97;
 	ucb->ucb1400_ts = platform_device_alloc("ucb1400_ts", -1);
 	if (!ucb->ucb1400_ts) {
 		err = -ENOMEM;
-		goto err0;
+		goto err2;
 	}
 	err = platform_device_add_data(ucb->ucb1400_ts, &ucb_ts,
 					sizeof(ucb_ts));
 	if (err)
-		goto err1;
+		goto err3;
 	err = platform_device_add(ucb->ucb1400_ts);
 	if (err)
-		goto err1;
+		goto err3;
 
 	return 0;
 
-err1:
+err3:
 	platform_device_put(ucb->ucb1400_ts);
+err2:
+	platform_device_unregister(ucb->ucb1400_gpio);
+err1:
+	platform_device_put(ucb->ucb1400_gpio);
 err0:
 	kfree(ucb);
 err:
@@ -98,6 +119,8 @@ static int ucb1400_core_remove(struct device *dev)
 	struct ucb1400 *ucb = dev_get_drvdata(dev);
 
 	platform_device_unregister(ucb->ucb1400_ts);
+	platform_device_unregister(ucb->ucb1400_gpio);
+
 	kfree(ucb);
 	return 0;
 }
diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h
index ae779bb8cc0f..adb44066680c 100644
--- a/include/linux/ucb1400.h
+++ b/include/linux/ucb1400.h
@@ -26,6 +26,7 @@
 #include <sound/ac97_codec.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/gpio.h>
 
 /*
  * UCB1400 AC-link registers
@@ -82,6 +83,17 @@
 #define UCB_ID			0x7e
 #define UCB_ID_1400             0x4304
 
+struct ucb1400_gpio_data {
+	int gpio_offset;
+	int (*gpio_setup)(struct device *dev, int ngpio);
+	int (*gpio_teardown)(struct device *dev, int ngpio);
+};
+
+struct ucb1400_gpio {
+	struct gpio_chip	gc;
+	struct snd_ac97		*ac97;
+};
+
 struct ucb1400_ts {
 	struct input_dev	*ts_idev;
 	struct task_struct	*ts_task;
@@ -95,6 +107,7 @@ struct ucb1400_ts {
 
 struct ucb1400 {
 	struct platform_device	*ucb1400_ts;
+	struct platform_device	*ucb1400_gpio;
 };
 
 static inline u16 ucb1400_reg_read(struct snd_ac97 *ac97, u16 reg)
@@ -147,4 +160,10 @@ static inline void ucb1400_adc_disable(struct snd_ac97 *ac97)
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
 			      int adcsync);
 
+#ifdef CONFIG_GPIO_UCB1400
+void __init ucb1400_gpio_set_data(struct ucb1400_gpio_data *data);
+#else
+static inline void ucb1400_gpio_set_data(struct ucb1400_gpio_data *data) {}
+#endif
+
 #endif
-- 
cgit v1.2.3


From 4ed824d9aead77a6a4eb1e89c3b3d270ba386fad Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci: fb: Frame Buffer driver for TI DA8xx/OMAP-L1xx

Add LCD controller (LCDC) driver for TI's DA8xx/OMAP-L1xx architecture.
LCDC specifications can be found at http://www.ti.com/litv/pdf/sprufm0a.

LCDC on DA8xx consists of two independent controllers, the Raster
Controller and the LCD Interface Display Driver (LIDD) controller.  LIDD
further supports character and graphic displays.

This patch adds support for the graphic display (Sharp LQ035Q3DG01) found
on the DA830 based EVM.  The EVM details can be found at:
http://support.spectrumdigital.com/boards/dskda830/revc/.

Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
DESC
davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-fix
EDESC
From: Andrew Morton <akpm@linux-foundation.org>

fix kconfig indenting

Cc: Krzysztof Helt <krzysztof.h1@wp.pl>
Cc: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Cc: Steve Chen <schen@mvista.com>
Cc: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/Kconfig    |  11 +
 drivers/video/Makefile   |   1 +
 drivers/video/da8xx-fb.c | 909 +++++++++++++++++++++++++++++++++++++++++++++++
 include/video/da8xx-fb.h | 106 ++++++
 4 files changed, 1027 insertions(+)
 create mode 100644 drivers/video/da8xx-fb.c
 create mode 100644 include/video/da8xx-fb.h

(limited to 'include')

diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 05184a1c3e97..a7944ef48a2a 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2041,6 +2041,17 @@ config FB_SH7760
 	  and 8, 15 or 16 bpp color; 90 degrees clockwise display rotation for
 	  panels <= 320 pixel horizontal resolution.
 
+config FB_DA8XX
+	tristate "DA8xx/OMAP-L1xx Framebuffer support"
+	depends on FB && ARCH_DAVINCI_DA8XX
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	---help---
+	  This is the frame buffer device driver for the TI LCD controller
+	  found on DA8xx/OMAP-L1xx SoCs.
+	  If unsure, say N.
+
 config FB_VIRTUAL
 	tristate "Virtual Frame Buffer support (ONLY FOR TESTING!)"
 	depends on FB
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index f4b6c150bd06..85b2d2322dc7 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -137,6 +137,7 @@ obj-$(CONFIG_FB_OF)               += offb.o
 obj-$(CONFIG_FB_BF54X_LQ043)	  += bf54x-lq043fb.o
 obj-$(CONFIG_FB_BFIN_T350MCQB)	  += bfin-t350mcqb-fb.o
 obj-$(CONFIG_FB_MX3)		  += mx3fb.o
+obj-$(CONFIG_FB_DA8XX)		  += da8xx-fb.o
 
 # the test framebuffer is last
 obj-$(CONFIG_FB_VIRTUAL)          += vfb.o
diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
new file mode 100644
index 000000000000..e0bbc66499c8
--- /dev/null
+++ b/drivers/video/da8xx-fb.c
@@ -0,0 +1,909 @@
+/*
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * Based on the LCD driver for TI Avalanche processors written by
+ * Ajay Singh and Shalom Hai.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option)any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fb.h>
+#include <linux/dma-mapping.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/uaccess.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <video/da8xx-fb.h>
+
+#define DRIVER_NAME "da8xx_lcdc"
+
+/* LCD Status Register */
+#define LCD_END_OF_FRAME0		BIT(8)
+#define LCD_FIFO_UNDERFLOW		BIT(5)
+#define LCD_SYNC_LOST			BIT(2)
+
+/* LCD DMA Control Register */
+#define LCD_DMA_BURST_SIZE(x)		((x) << 4)
+#define LCD_DMA_BURST_1			0x0
+#define LCD_DMA_BURST_2			0x1
+#define LCD_DMA_BURST_4			0x2
+#define LCD_DMA_BURST_8			0x3
+#define LCD_DMA_BURST_16		0x4
+#define LCD_END_OF_FRAME_INT_ENA	BIT(2)
+#define LCD_DUAL_FRAME_BUFFER_ENABLE	BIT(0)
+
+/* LCD Control Register */
+#define LCD_CLK_DIVISOR(x)		((x) << 8)
+#define LCD_RASTER_MODE			0x01
+
+/* LCD Raster Control Register */
+#define LCD_PALETTE_LOAD_MODE(x)	((x) << 20)
+#define PALETTE_AND_DATA		0x00
+#define PALETTE_ONLY			0x01
+
+#define LCD_MONO_8BIT_MODE		BIT(9)
+#define LCD_RASTER_ORDER		BIT(8)
+#define LCD_TFT_MODE			BIT(7)
+#define LCD_UNDERFLOW_INT_ENA		BIT(6)
+#define LCD_MONOCHROME_MODE		BIT(1)
+#define LCD_RASTER_ENABLE		BIT(0)
+#define LCD_TFT_ALT_ENABLE		BIT(23)
+#define LCD_STN_565_ENABLE		BIT(24)
+
+/* LCD Raster Timing 2 Register */
+#define LCD_AC_BIAS_TRANSITIONS_PER_INT(x)	((x) << 16)
+#define LCD_AC_BIAS_FREQUENCY(x)		((x) << 8)
+#define LCD_SYNC_CTRL				BIT(25)
+#define LCD_SYNC_EDGE				BIT(24)
+#define LCD_INVERT_PIXEL_CLOCK			BIT(22)
+#define LCD_INVERT_LINE_CLOCK			BIT(21)
+#define LCD_INVERT_FRAME_CLOCK			BIT(20)
+
+/* LCD Block */
+#define  LCD_CTRL_REG				0x4
+#define  LCD_STAT_REG				0x8
+#define  LCD_RASTER_CTRL_REG			0x28
+#define  LCD_RASTER_TIMING_0_REG		0x2C
+#define  LCD_RASTER_TIMING_1_REG		0x30
+#define  LCD_RASTER_TIMING_2_REG		0x34
+#define  LCD_DMA_CTRL_REG			0x40
+#define  LCD_DMA_FRM_BUF_BASE_ADDR_0_REG	0x44
+#define  LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG	0x48
+
+#define WSI_TIMEOUT	50
+#define PALETTE_SIZE	256
+#define LEFT_MARGIN	64
+#define RIGHT_MARGIN	64
+#define UPPER_MARGIN	32
+#define LOWER_MARGIN	32
+
+static resource_size_t da8xx_fb_reg_base;
+static struct resource *lcdc_regs;
+
+static inline unsigned int lcdc_read(unsigned int addr)
+{
+	return (unsigned int)__raw_readl(da8xx_fb_reg_base + (addr));
+}
+
+static inline void lcdc_write(unsigned int val, unsigned int addr)
+{
+	__raw_writel(val, da8xx_fb_reg_base + (addr));
+}
+
+struct da8xx_fb_par {
+	wait_queue_head_t da8xx_wq;
+	resource_size_t p_palette_base;
+	unsigned char *v_palette_base;
+	struct clk *lcdc_clk;
+	int irq;
+	unsigned short pseudo_palette[16];
+	unsigned int databuf_sz;
+	unsigned int palette_sz;
+};
+
+/* Variable Screen Information */
+static struct fb_var_screeninfo da8xx_fb_var __devinitdata = {
+	.xoffset = 0,
+	.yoffset = 0,
+	.transp = {0, 0, 0},
+	.nonstd = 0,
+	.activate = 0,
+	.height = -1,
+	.width = -1,
+	.pixclock = 46666,	/* 46us - AUO display */
+	.accel_flags = 0,
+	.left_margin = LEFT_MARGIN,
+	.right_margin = RIGHT_MARGIN,
+	.upper_margin = UPPER_MARGIN,
+	.lower_margin = LOWER_MARGIN,
+	.sync = 0,
+	.vmode = FB_VMODE_NONINTERLACED
+};
+
+static struct fb_fix_screeninfo da8xx_fb_fix __devinitdata = {
+	.id = "DA8xx FB Drv",
+	.type = FB_TYPE_PACKED_PIXELS,
+	.type_aux = 0,
+	.visual = FB_VISUAL_PSEUDOCOLOR,
+	.xpanstep = 1,
+	.ypanstep = 1,
+	.ywrapstep = 1,
+	.accel = FB_ACCEL_NONE
+};
+
+struct da8xx_panel {
+	const char	name[25];	/* Full name <vendor>_<model> */
+	unsigned short	width;
+	unsigned short	height;
+	int		hfp;		/* Horizontal front porch */
+	int		hbp;		/* Horizontal back porch */
+	int		hsw;		/* Horizontal Sync Pulse Width */
+	int		vfp;		/* Vertical front porch */
+	int		vbp;		/* Vertical back porch */
+	int		vsw;		/* Vertical Sync Pulse Width */
+	int		pxl_clk;	/* Pixel clock */
+};
+
+static struct da8xx_panel known_lcd_panels[] = {
+	/* Sharp LCD035Q3DG01 */
+	[0] = {
+		.name = "Sharp_LCD035Q3DG01",
+		.width = 320,
+		.height = 240,
+		.hfp = 8,
+		.hbp = 6,
+		.hsw = 0,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 0,
+		.pxl_clk = 0x10,
+	},
+	/* Sharp LK043T1DG01 */
+	[1] = {
+		.name = "Sharp_LK043T1DG01",
+		.width = 480,
+		.height = 272,
+		.hfp = 2,
+		.hbp = 2,
+		.hsw = 41,
+		.vfp = 2,
+		.vbp = 2,
+		.vsw = 10,
+		.pxl_clk = 0x12,
+	},
+};
+
+/* Disable the Raster Engine of the LCD Controller */
+static int lcd_disable_raster(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	if (reg & LCD_RASTER_ENABLE) {
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		ret = wait_event_interruptible_timeout(par->da8xx_wq,
+						!lcdc_read(LCD_STAT_REG) &
+						LCD_END_OF_FRAME0, WSI_TIMEOUT);
+	}
+
+	if (ret < 0)
+		return ret;
+	if (ret == 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
+{
+	u32 tmp = par->p_palette_base + par->databuf_sz - 4;
+	u32 reg;
+
+	/* Update the databuf in the hw. */
+	lcdc_write(par->p_palette_base, LCD_DMA_FRM_BUF_BASE_ADDR_0_REG);
+	lcdc_write(tmp, LCD_DMA_FRM_BUF_CEILING_ADDR_0_REG);
+
+	/* Start the DMA. */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG);
+	reg &= ~(3 << 20);
+	if (load_mode == LOAD_DATA)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_AND_DATA);
+	else if (load_mode == LOAD_PALETTE)
+		reg |= LCD_PALETTE_LOAD_MODE(PALETTE_ONLY);
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+}
+
+/* Configure the Burst Size of DMA */
+static int lcd_cfg_dma(int burst_size)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_DMA_CTRL_REG) & 0x00000001;
+	switch (burst_size) {
+	case 1:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_1);
+		break;
+	case 2:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_2);
+		break;
+	case 4:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_4);
+		break;
+	case 8:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_8);
+		break;
+	case 16:
+		reg |= LCD_DMA_BURST_SIZE(LCD_DMA_BURST_16);
+		break;
+	default:
+		return -EINVAL;
+	}
+	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+
+	return 0;
+}
+
+static void lcd_cfg_ac_bias(int period, int transitions_per_int)
+{
+	u32 reg;
+
+	/* Set the AC Bias Period and Number of Transisitons per Interrupt */
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG) & 0xFFF00000;
+	reg |= LCD_AC_BIAS_FREQUENCY(period) |
+		LCD_AC_BIAS_TRANSITIONS_PER_INT(transitions_per_int);
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+}
+
+static void lcd_cfg_horizontal_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG) & 0xf;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+}
+
+static void lcd_cfg_vertical_sync(int back_porch, int pulse_width,
+		int front_porch)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG) & 0x3ff;
+	reg |= ((back_porch & 0xff) << 24)
+	    | ((front_porch & 0xff) << 16)
+	    | ((pulse_width & 0x3f) << 10);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+}
+
+static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
+{
+	u32 reg;
+
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(LCD_TFT_MODE |
+						LCD_MONO_8BIT_MODE |
+						LCD_MONOCHROME_MODE);
+
+	switch (cfg->p_disp_panel->panel_shade) {
+	case MONOCHROME:
+		reg |= LCD_MONOCHROME_MODE;
+		if (cfg->mono_8bit_mode)
+			reg |= LCD_MONO_8BIT_MODE;
+		break;
+	case COLOR_ACTIVE:
+		reg |= LCD_TFT_MODE;
+		if (cfg->tft_alt_mode)
+			reg |= LCD_TFT_ALT_ENABLE;
+		break;
+
+	case COLOR_PASSIVE:
+		if (cfg->stn_565_mode)
+			reg |= LCD_STN_565_ENABLE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	/* enable additional interrupts here */
+	reg |= LCD_UNDERFLOW_INT_ENA;
+
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	reg = lcdc_read(LCD_RASTER_TIMING_2_REG);
+
+	if (cfg->sync_ctrl)
+		reg |= LCD_SYNC_CTRL;
+	else
+		reg &= ~LCD_SYNC_CTRL;
+
+	if (cfg->sync_edge)
+		reg |= LCD_SYNC_EDGE;
+	else
+		reg &= ~LCD_SYNC_EDGE;
+
+	if (cfg->invert_pxl_clock)
+		reg |= LCD_INVERT_PIXEL_CLOCK;
+	else
+		reg &= ~LCD_INVERT_PIXEL_CLOCK;
+
+	if (cfg->invert_line_clock)
+		reg |= LCD_INVERT_LINE_CLOCK;
+	else
+		reg &= ~LCD_INVERT_LINE_CLOCK;
+
+	if (cfg->invert_frm_clock)
+		reg |= LCD_INVERT_FRAME_CLOCK;
+	else
+		reg &= ~LCD_INVERT_FRAME_CLOCK;
+
+	lcdc_write(reg, LCD_RASTER_TIMING_2_REG);
+
+	return 0;
+}
+
+static int lcd_cfg_frame_buffer(struct da8xx_fb_par *par, u32 width, u32 height,
+		u32 bpp, u32 raster_order)
+{
+	u32 bpl, reg;
+
+	/* Disable Dual Frame Buffer. */
+	reg = lcdc_read(LCD_DMA_CTRL_REG);
+	lcdc_write(reg & ~LCD_DUAL_FRAME_BUFFER_ENABLE,
+						LCD_DMA_CTRL_REG);
+	/* Set the Panel Width */
+	/* Pixels per line = (PPL + 1)*16 */
+	/*0x3F in bits 4..9 gives max horisontal resolution = 1024 pixels*/
+	width &= 0x3f0;
+	reg = lcdc_read(LCD_RASTER_TIMING_0_REG);
+	reg &= 0xfffffc00;
+	reg |= ((width >> 4) - 1) << 4;
+	lcdc_write(reg, LCD_RASTER_TIMING_0_REG);
+
+	/* Set the Panel Height */
+	reg = lcdc_read(LCD_RASTER_TIMING_1_REG);
+	reg = ((height - 1) & 0x3ff) | (reg & 0xfffffc00);
+	lcdc_write(reg, LCD_RASTER_TIMING_1_REG);
+
+	/* Set the Raster Order of the Frame Buffer */
+	reg = lcdc_read(LCD_RASTER_CTRL_REG) & ~(1 << 8);
+	if (raster_order)
+		reg |= LCD_RASTER_ORDER;
+	lcdc_write(reg, LCD_RASTER_CTRL_REG);
+
+	switch (bpp) {
+	case 1:
+	case 2:
+	case 4:
+	case 16:
+		par->palette_sz = 16 * 2;
+		break;
+
+	case 8:
+		par->palette_sz = 256 * 2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	bpl = width * bpp / 8;
+	par->databuf_sz = height * bpl + par->palette_sz;
+
+	return 0;
+}
+
+static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
+			      unsigned blue, unsigned transp,
+			      struct fb_info *info)
+{
+	struct da8xx_fb_par *par = info->par;
+	unsigned short *palette = (unsigned short *)par->v_palette_base;
+	u_short pal;
+
+	if (regno > 255)
+		return 1;
+
+	if (info->fix.visual == FB_VISUAL_DIRECTCOLOR)
+		return 1;
+
+	if (info->var.bits_per_pixel == 8) {
+		red >>= 4;
+		green >>= 8;
+		blue >>= 12;
+
+		pal = (red & 0x0f00);
+		pal |= (green & 0x00f0);
+		pal |= (blue & 0x000f);
+
+		palette[regno] = pal;
+
+	} else if ((info->var.bits_per_pixel == 16) && regno < 16) {
+		red >>= (16 - info->var.red.length);
+		red <<= info->var.red.offset;
+
+		green >>= (16 - info->var.green.length);
+		green <<= info->var.green.offset;
+
+		blue >>= (16 - info->var.blue.length);
+		blue <<= info->var.blue.offset;
+
+		par->pseudo_palette[regno] = red | green | blue;
+
+		palette[0] = 0x4000;
+	}
+
+	return 0;
+}
+
+static int lcd_reset(struct da8xx_fb_par *par)
+{
+	int ret = 0;
+
+	/* Disable the Raster if previously Enabled */
+	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+		ret = lcd_disable_raster(par);
+
+	/* DMA has to be disabled */
+	lcdc_write(0, LCD_DMA_CTRL_REG);
+	lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+	return ret;
+}
+
+static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
+		struct da8xx_panel *panel)
+{
+	u32 bpp;
+	int ret = 0;
+
+	ret = lcd_reset(par);
+	if (ret != 0)
+		return ret;
+
+	/* Configure the LCD clock divisor. */
+	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
+			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
+
+	/* Configure the DMA burst size. */
+	ret = lcd_cfg_dma(cfg->dma_burst_sz);
+	if (ret < 0)
+		return ret;
+
+	/* Configure the AC bias properties. */
+	lcd_cfg_ac_bias(cfg->ac_bias, cfg->ac_bias_intrpt);
+
+	/* Configure the vertical and horizontal sync properties. */
+	lcd_cfg_vertical_sync(panel->vbp, panel->vsw, panel->vfp);
+	lcd_cfg_horizontal_sync(panel->hbp, panel->hsw, panel->hfp);
+
+	/* Configure for disply */
+	ret = lcd_cfg_display(cfg);
+	if (ret < 0)
+		return ret;
+
+	if (QVGA != cfg->p_disp_panel->panel_type)
+		return -EINVAL;
+
+	if (cfg->bpp <= cfg->p_disp_panel->max_bpp &&
+	    cfg->bpp >= cfg->p_disp_panel->min_bpp)
+		bpp = cfg->bpp;
+	else
+		bpp = cfg->p_disp_panel->max_bpp;
+	if (bpp == 12)
+		bpp = 16;
+	ret = lcd_cfg_frame_buffer(par, (unsigned int)panel->width,
+				(unsigned int)panel->height, bpp,
+				cfg->raster_order);
+	if (ret < 0)
+		return ret;
+
+	/* Configure FDD */
+	lcdc_write((lcdc_read(LCD_RASTER_CTRL_REG) & 0xfff00fff) |
+		       (cfg->fdd << 12), LCD_RASTER_CTRL_REG);
+
+	return 0;
+}
+
+static irqreturn_t lcdc_irq_handler(int irq, void *arg)
+{
+	u32 stat = lcdc_read(LCD_STAT_REG);
+	struct da8xx_fb_par *par = arg;
+	u32 reg;
+
+	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
+		reg = lcdc_read(LCD_RASTER_CTRL_REG);
+		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+		lcdc_write(stat, LCD_STAT_REG);
+		lcdc_write(reg | LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+	} else
+		lcdc_write(stat, LCD_STAT_REG);
+
+	wake_up_interruptible(&par->da8xx_wq);
+	return IRQ_HANDLED;
+}
+
+static int fb_check_var(struct fb_var_screeninfo *var,
+			struct fb_info *info)
+{
+	int err = 0;
+
+	switch (var->bits_per_pixel) {
+	case 1:
+	case 8:
+		var->red.offset = 0;
+		var->red.length = 8;
+		var->green.offset = 0;
+		var->green.length = 8;
+		var->blue.offset = 0;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 4:
+		var->red.offset = 0;
+		var->red.length = 4;
+		var->green.offset = 0;
+		var->green.length = 4;
+		var->blue.offset = 0;
+		var->blue.length = 4;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 16:		/* RGB 565 */
+		var->red.offset = 0;
+		var->red.length = 5;
+		var->green.offset = 5;
+		var->green.length = 6;
+		var->blue.offset = 11;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	default:
+		err = -EINVAL;
+	}
+
+	var->red.msb_right = 0;
+	var->green.msb_right = 0;
+	var->blue.msb_right = 0;
+	var->transp.msb_right = 0;
+	return err;
+}
+
+static int __devexit fb_remove(struct platform_device *dev)
+{
+	struct fb_info *info = dev_get_drvdata(&dev->dev);
+	int ret = 0;
+
+	if (info) {
+		struct da8xx_fb_par *par = info->par;
+
+		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
+			ret = lcd_disable_raster(par);
+		lcdc_write(0, LCD_RASTER_CTRL_REG);
+
+		/* disable DMA  */
+		lcdc_write(0, LCD_DMA_CTRL_REG);
+
+		unregister_framebuffer(info);
+		fb_dealloc_cmap(&info->cmap);
+		dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+					info->screen_base,
+					info->fix.smem_start);
+		free_irq(par->irq, par);
+		clk_disable(par->lcdc_clk);
+		clk_put(par->lcdc_clk);
+		framebuffer_release(info);
+		iounmap((void __iomem *)da8xx_fb_reg_base);
+		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
+
+	}
+	return ret;
+}
+
+static int fb_ioctl(struct fb_info *info, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct lcd_sync_arg sync_arg;
+
+	switch (cmd) {
+	case FBIOGET_CONTRAST:
+	case FBIOPUT_CONTRAST:
+	case FBIGET_BRIGHTNESS:
+	case FBIPUT_BRIGHTNESS:
+	case FBIGET_COLOR:
+	case FBIPUT_COLOR:
+		return -EINVAL;
+	case FBIPUT_HSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_horizontal_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	case FBIPUT_VSYNC:
+		if (copy_from_user(&sync_arg, (char *)arg,
+				sizeof(struct lcd_sync_arg)))
+			return -EINVAL;
+		lcd_cfg_vertical_sync(sync_arg.back_porch,
+					sync_arg.pulse_width,
+					sync_arg.front_porch);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct fb_ops da8xx_fb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = fb_check_var,
+	.fb_setcolreg = fb_setcolreg,
+	.fb_ioctl = fb_ioctl,
+	.fb_fillrect = cfb_fillrect,
+	.fb_copyarea = cfb_copyarea,
+	.fb_imageblit = cfb_imageblit,
+};
+
+static int __init fb_probe(struct platform_device *device)
+{
+	struct da8xx_lcdc_platform_data *fb_pdata =
+						device->dev.platform_data;
+	struct lcd_ctrl_config *lcd_cfg;
+	struct da8xx_panel *lcdc_info;
+	struct fb_info *da8xx_fb_info;
+	struct clk *fb_clk = NULL;
+	struct da8xx_fb_par *par;
+	resource_size_t len;
+	int ret, i;
+
+	if (fb_pdata == NULL) {
+		dev_err(&device->dev, "Can not get platform data\n");
+		return -ENOENT;
+	}
+
+	lcdc_regs = platform_get_resource(device, IORESOURCE_MEM, 0);
+	if (!lcdc_regs) {
+		dev_err(&device->dev,
+			"Can not get memory resource for LCD controller\n");
+		return -ENOENT;
+	}
+
+	len = resource_size(lcdc_regs);
+
+	lcdc_regs = request_mem_region(lcdc_regs->start, len, lcdc_regs->name);
+	if (!lcdc_regs)
+		return -EBUSY;
+
+	da8xx_fb_reg_base = (resource_size_t)ioremap(lcdc_regs->start, len);
+	if (!da8xx_fb_reg_base) {
+		ret = -EBUSY;
+		goto err_request_mem;
+	}
+
+	fb_clk = clk_get(&device->dev, NULL);
+	if (IS_ERR(fb_clk)) {
+		dev_err(&device->dev, "Can not get device clock\n");
+		ret = -ENODEV;
+		goto err_ioremap;
+	}
+	ret = clk_enable(fb_clk);
+	if (ret)
+		goto err_clk_put;
+
+	for (i = 0, lcdc_info = known_lcd_panels;
+		i < ARRAY_SIZE(known_lcd_panels);
+		i++, lcdc_info++) {
+		if (strcmp(fb_pdata->type, lcdc_info->name) == 0)
+			break;
+	}
+
+	if (i == ARRAY_SIZE(known_lcd_panels)) {
+		dev_err(&device->dev, "GLCD: No valid panel found\n");
+		ret = ENODEV;
+		goto err_clk_disable;
+	} else
+		dev_info(&device->dev, "GLCD: Found %s panel\n",
+					fb_pdata->type);
+
+	lcd_cfg = (struct lcd_ctrl_config *)fb_pdata->controller_data;
+
+	da8xx_fb_info = framebuffer_alloc(sizeof(struct da8xx_fb_par),
+					&device->dev);
+	if (!da8xx_fb_info) {
+		dev_dbg(&device->dev, "Memory allocation failed for fb_info\n");
+		ret = -ENOMEM;
+		goto err_clk_disable;
+	}
+
+	par = da8xx_fb_info->par;
+
+	if (lcd_init(par, lcd_cfg, lcdc_info) < 0) {
+		dev_err(&device->dev, "lcd_init failed\n");
+		ret = -EFAULT;
+		goto err_release_fb;
+	}
+
+	/* allocate frame buffer */
+	da8xx_fb_info->screen_base = dma_alloc_coherent(NULL,
+					par->databuf_sz + PAGE_SIZE,
+					(resource_size_t *)
+					&da8xx_fb_info->fix.smem_start,
+					GFP_KERNEL | GFP_DMA);
+
+	if (!da8xx_fb_info->screen_base) {
+		dev_err(&device->dev,
+			"GLCD: kmalloc for frame buffer failed\n");
+		ret = -EINVAL;
+		goto err_release_fb;
+	}
+
+	/* move palette base pointer by (PAGE_SIZE - palette_sz) bytes */
+	par->v_palette_base = da8xx_fb_info->screen_base +
+				(PAGE_SIZE - par->palette_sz);
+	par->p_palette_base = da8xx_fb_info->fix.smem_start +
+				(PAGE_SIZE - par->palette_sz);
+
+	/* the rest of the frame buffer is pixel data */
+	da8xx_fb_fix.smem_start = par->p_palette_base + par->palette_sz;
+	da8xx_fb_fix.smem_len = par->databuf_sz - par->palette_sz;
+	da8xx_fb_fix.line_length = (lcdc_info->width * lcd_cfg->bpp) / 8;
+
+	par->lcdc_clk = fb_clk;
+
+	init_waitqueue_head(&par->da8xx_wq);
+
+	par->irq = platform_get_irq(device, 0);
+	if (par->irq < 0) {
+		ret = -ENOENT;
+		goto err_release_fb_mem;
+	}
+
+	ret = request_irq(par->irq, lcdc_irq_handler, 0, DRIVER_NAME, par);
+	if (ret)
+		goto err_release_fb_mem;
+
+	/* Initialize par */
+	da8xx_fb_info->var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.xres = lcdc_info->width;
+	da8xx_fb_var.xres_virtual = lcdc_info->width;
+
+	da8xx_fb_var.yres = lcdc_info->height;
+	da8xx_fb_var.yres_virtual = lcdc_info->height;
+
+	da8xx_fb_var.grayscale =
+	    lcd_cfg->p_disp_panel->panel_shade == MONOCHROME ? 1 : 0;
+	da8xx_fb_var.bits_per_pixel = lcd_cfg->bpp;
+
+	da8xx_fb_var.hsync_len = lcdc_info->hsw;
+	da8xx_fb_var.vsync_len = lcdc_info->vsw;
+
+	/* Initialize fbinfo */
+	da8xx_fb_info->flags = FBINFO_FLAG_DEFAULT;
+	da8xx_fb_info->fix = da8xx_fb_fix;
+	da8xx_fb_info->var = da8xx_fb_var;
+	da8xx_fb_info->fbops = &da8xx_fb_ops;
+	da8xx_fb_info->pseudo_palette = par->pseudo_palette;
+
+	ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0);
+	if (ret)
+		goto err_free_irq;
+
+	/* First palette_sz byte of the frame buffer is the palette */
+	da8xx_fb_info->cmap.len = par->palette_sz;
+
+	/* Flush the buffer to the screen. */
+	lcd_blit(LOAD_DATA, par);
+
+	/* initialize var_screeninfo */
+	da8xx_fb_var.activate = FB_ACTIVATE_FORCE;
+	fb_set_var(da8xx_fb_info, &da8xx_fb_var);
+
+	dev_set_drvdata(&device->dev, da8xx_fb_info);
+	/* Register the Frame Buffer  */
+	if (register_framebuffer(da8xx_fb_info) < 0) {
+		dev_err(&device->dev,
+			"GLCD: Frame Buffer Registration Failed!\n");
+		ret = -EINVAL;
+		goto err_dealloc_cmap;
+	}
+
+	/* enable raster engine */
+	lcdc_write(lcdc_read(LCD_RASTER_CTRL_REG) |
+			LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
+
+	return 0;
+
+err_dealloc_cmap:
+	fb_dealloc_cmap(&da8xx_fb_info->cmap);
+
+err_free_irq:
+	free_irq(par->irq, par);
+
+err_release_fb_mem:
+	dma_free_coherent(NULL, par->databuf_sz + PAGE_SIZE,
+				da8xx_fb_info->screen_base,
+				da8xx_fb_info->fix.smem_start);
+
+err_release_fb:
+	framebuffer_release(da8xx_fb_info);
+
+err_clk_disable:
+	clk_disable(fb_clk);
+
+err_clk_put:
+	clk_put(fb_clk);
+
+err_ioremap:
+	iounmap((void __iomem *)da8xx_fb_reg_base);
+
+err_request_mem:
+	release_mem_region(lcdc_regs->start, len);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM
+static int fb_suspend(struct platform_device *dev, pm_message_t state)
+{
+	 return -EBUSY;
+}
+static int fb_resume(struct platform_device *dev)
+{
+	 return -EBUSY;
+}
+#else
+#define fb_suspend NULL
+#define fb_resume NULL
+#endif
+
+static struct platform_driver da8xx_fb_driver = {
+	.probe = fb_probe,
+	.remove = fb_remove,
+	.suspend = fb_suspend,
+	.resume = fb_resume,
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .owner = THIS_MODULE,
+		   },
+};
+
+static int __init da8xx_fb_init(void)
+{
+	return platform_driver_register(&da8xx_fb_driver);
+}
+
+static void __exit da8xx_fb_cleanup(void)
+{
+	platform_driver_unregister(&da8xx_fb_driver);
+}
+
+module_init(da8xx_fb_init);
+module_exit(da8xx_fb_cleanup);
+
+MODULE_DESCRIPTION("Framebuffer driver for TI da8xx/omap-l1xx");
+MODULE_AUTHOR("Texas Instruments");
+MODULE_LICENSE("GPL");
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
new file mode 100644
index 000000000000..5f7767547fa0
--- /dev/null
+++ b/include/video/da8xx-fb.h
@@ -0,0 +1,106 @@
+/*
+ * Header file for TI DA8XX LCD controller platform data.
+ *
+ * Copyright (C) 2008-2009 MontaVista Software Inc.
+ * Copyright (C) 2008-2009 Texas Instruments Inc
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef DA8XX_FB_H
+#define DA8XX_FB_H
+
+enum panel_type {
+	QVGA = 0
+};
+
+enum panel_shade {
+	MONOCHROME = 0,
+	COLOR_ACTIVE,
+	COLOR_PASSIVE,
+};
+
+enum raster_load_mode {
+	LOAD_DATA = 1,
+	LOAD_PALETTE,
+};
+
+struct display_panel {
+	enum panel_type panel_type; /* QVGA */
+	int max_bpp;
+	int min_bpp;
+	enum panel_shade panel_shade;
+};
+
+struct da8xx_lcdc_platform_data {
+	const char manu_name[10];
+	void *controller_data;
+	const char type[25];
+};
+
+struct lcd_ctrl_config {
+	const struct display_panel *p_disp_panel;
+
+	/* AC Bias Pin Frequency */
+	int ac_bias;
+
+	/* AC Bias Pin Transitions per Interrupt */
+	int ac_bias_intrpt;
+
+	/* DMA burst size */
+	int dma_burst_sz;
+
+	/* Bits per pixel */
+	int bpp;
+
+	/* FIFO DMA Request Delay */
+	int fdd;
+
+	/* TFT Alternative Signal Mapping (Only for active) */
+	unsigned char tft_alt_mode;
+
+	/* 12 Bit Per Pixel (5-6-5) Mode (Only for passive) */
+	unsigned char stn_565_mode;
+
+	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
+	unsigned char mono_8bit_mode;
+
+	/* Invert pixel clock */
+	unsigned char invert_pxl_clock;
+
+	/* Invert line clock */
+	unsigned char invert_line_clock;
+
+	/* Invert frame clock  */
+	unsigned char invert_frm_clock;
+
+	/* Horizontal and Vertical Sync Edge: 0=rising 1=falling */
+	unsigned char sync_edge;
+
+	/* Horizontal and Vertical Sync: Control: 0=ignore */
+	unsigned char sync_ctrl;
+
+	/* Raster Data Order Select: 1=Most-to-least 0=Least-to-most */
+	unsigned char raster_order;
+};
+
+struct lcd_sync_arg {
+	int back_porch;
+	int front_porch;
+	int pulse_width;
+};
+
+/* ioctls */
+#define FBIOGET_CONTRAST	_IOR('F', 1, int)
+#define FBIOPUT_CONTRAST	_IOW('F', 2, int)
+#define FBIGET_BRIGHTNESS	_IOR('F', 3, int)
+#define FBIPUT_BRIGHTNESS	_IOW('F', 3, int)
+#define FBIGET_COLOR		_IOR('F', 5, int)
+#define FBIPUT_COLOR		_IOW('F', 6, int)
+#define FBIPUT_HSYNC		_IOW('F', 9, int)
+#define FBIPUT_VSYNC		_IOW('F', 10, int)
+
+#endif  /* ifndef DA8XX_FB_H */
+
-- 
cgit v1.2.3


From 2f93e8f4822fdd48fa9c4c901eea87ab1c902f87 Mon Sep 17 00:00:00 2001
From: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Date: Tue, 22 Sep 2009 16:47:06 -0700
Subject: davinci-fb-frame-buffer-driver-for-ti-da8xx-omap-l1xx-v4

Since the previous version, return values in ioctl() function have been
modified.

[akpm@linux-foundation.org: simplify lcd_disable_raster()]
Signed-off-by: Sudhakar Rajashekhara <sudhakar.raj@ti.com>
Signed-off-by: Pavel Kiryukhin <pkiryukhin@ru.mvista.com>
Signed-off-by: Steve Chen <schen@mvista.com>
Acked-by: Krzysztof Helt <krzysztof.h1@wp.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/video/da8xx-fb.c | 61 +++++++++++++++++-------------------------------
 include/video/da8xx-fb.h |  3 ---
 2 files changed, 21 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/drivers/video/da8xx-fb.c b/drivers/video/da8xx-fb.c
index e0bbc66499c8..42e1005e2916 100644
--- a/drivers/video/da8xx-fb.c
+++ b/drivers/video/da8xx-fb.c
@@ -107,7 +107,6 @@ static inline void lcdc_write(unsigned int val, unsigned int addr)
 }
 
 struct da8xx_fb_par {
-	wait_queue_head_t da8xx_wq;
 	resource_size_t p_palette_base;
 	unsigned char *v_palette_base;
 	struct clk *lcdc_clk;
@@ -158,6 +157,7 @@ struct da8xx_panel {
 	int		vbp;		/* Vertical back porch */
 	int		vsw;		/* Vertical Sync Pulse Width */
 	int		pxl_clk;	/* Pixel clock */
+	unsigned char	invert_pxl_clk;	/* Invert Pixel clock */
 };
 
 static struct da8xx_panel known_lcd_panels[] = {
@@ -173,6 +173,7 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 0,
 		.pxl_clk = 0x10,
+		.invert_pxl_clk = 1,
 	},
 	/* Sharp LK043T1DG01 */
 	[1] = {
@@ -186,29 +187,18 @@ static struct da8xx_panel known_lcd_panels[] = {
 		.vbp = 2,
 		.vsw = 10,
 		.pxl_clk = 0x12,
+		.invert_pxl_clk = 0,
 	},
 };
 
 /* Disable the Raster Engine of the LCD Controller */
-static int lcd_disable_raster(struct da8xx_fb_par *par)
+static void lcd_disable_raster(struct da8xx_fb_par *par)
 {
-	int ret = 0;
 	u32 reg;
 
 	reg = lcdc_read(LCD_RASTER_CTRL_REG);
-	if (reg & LCD_RASTER_ENABLE) {
+	if (reg & LCD_RASTER_ENABLE)
 		lcdc_write(reg & ~LCD_RASTER_ENABLE, LCD_RASTER_CTRL_REG);
-		ret = wait_event_interruptible_timeout(par->da8xx_wq,
-						!lcdc_read(LCD_STAT_REG) &
-						LCD_END_OF_FRAME0, WSI_TIMEOUT);
-	}
-
-	if (ret < 0)
-		return ret;
-	if (ret == 0)
-		return -ETIMEDOUT;
-
-	return 0;
 }
 
 static void lcd_blit(int load_mode, struct da8xx_fb_par *par)
@@ -256,7 +246,7 @@ static int lcd_cfg_dma(int burst_size)
 	default:
 		return -EINVAL;
 	}
-	lcdc_write(reg | LCD_END_OF_FRAME_INT_ENA, LCD_DMA_CTRL_REG);
+	lcdc_write(reg, LCD_DMA_CTRL_REG);
 
 	return 0;
 }
@@ -342,11 +332,6 @@ static int lcd_cfg_display(const struct lcd_ctrl_config *cfg)
 	else
 		reg &= ~LCD_SYNC_EDGE;
 
-	if (cfg->invert_pxl_clock)
-		reg |= LCD_INVERT_PIXEL_CLOCK;
-	else
-		reg &= ~LCD_INVERT_PIXEL_CLOCK;
-
 	if (cfg->invert_line_clock)
 		reg |= LCD_INVERT_LINE_CLOCK;
 	else
@@ -456,19 +441,15 @@ static int fb_setcolreg(unsigned regno, unsigned red, unsigned green,
 	return 0;
 }
 
-static int lcd_reset(struct da8xx_fb_par *par)
+static void lcd_reset(struct da8xx_fb_par *par)
 {
-	int ret = 0;
-
 	/* Disable the Raster if previously Enabled */
 	if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-		ret = lcd_disable_raster(par);
+		lcd_disable_raster(par);
 
 	/* DMA has to be disabled */
 	lcdc_write(0, LCD_DMA_CTRL_REG);
 	lcdc_write(0, LCD_RASTER_CTRL_REG);
-
-	return ret;
 }
 
 static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
@@ -477,14 +458,19 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 	u32 bpp;
 	int ret = 0;
 
-	ret = lcd_reset(par);
-	if (ret != 0)
-		return ret;
+	lcd_reset(par);
 
 	/* Configure the LCD clock divisor. */
 	lcdc_write(LCD_CLK_DIVISOR(panel->pxl_clk) |
 			(LCD_RASTER_MODE & 0x1), LCD_CTRL_REG);
 
+	if (panel->invert_pxl_clk)
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) |
+			LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+	else
+		lcdc_write((lcdc_read(LCD_RASTER_TIMING_2_REG) &
+			~LCD_INVERT_PIXEL_CLOCK), LCD_RASTER_TIMING_2_REG);
+
 	/* Configure the DMA burst size. */
 	ret = lcd_cfg_dma(cfg->dma_burst_sz);
 	if (ret < 0)
@@ -528,7 +514,6 @@ static int lcd_init(struct da8xx_fb_par *par, const struct lcd_ctrl_config *cfg,
 static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 {
 	u32 stat = lcdc_read(LCD_STAT_REG);
-	struct da8xx_fb_par *par = arg;
 	u32 reg;
 
 	if ((stat & LCD_SYNC_LOST) && (stat & LCD_FIFO_UNDERFLOW)) {
@@ -539,7 +524,6 @@ static irqreturn_t lcdc_irq_handler(int irq, void *arg)
 	} else
 		lcdc_write(stat, LCD_STAT_REG);
 
-	wake_up_interruptible(&par->da8xx_wq);
 	return IRQ_HANDLED;
 }
 
@@ -594,13 +578,12 @@ static int fb_check_var(struct fb_var_screeninfo *var,
 static int __devexit fb_remove(struct platform_device *dev)
 {
 	struct fb_info *info = dev_get_drvdata(&dev->dev);
-	int ret = 0;
 
 	if (info) {
 		struct da8xx_fb_par *par = info->par;
 
 		if (lcdc_read(LCD_RASTER_CTRL_REG) & LCD_RASTER_ENABLE)
-			ret = lcd_disable_raster(par);
+			lcd_disable_raster(par);
 		lcdc_write(0, LCD_RASTER_CTRL_REG);
 
 		/* disable DMA  */
@@ -619,7 +602,7 @@ static int __devexit fb_remove(struct platform_device *dev)
 		release_mem_region(lcdc_regs->start, resource_size(lcdc_regs));
 
 	}
-	return ret;
+	return 0;
 }
 
 static int fb_ioctl(struct fb_info *info, unsigned int cmd,
@@ -634,11 +617,11 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_BRIGHTNESS:
 	case FBIGET_COLOR:
 	case FBIPUT_COLOR:
-		return -EINVAL;
+		return -ENOTTY;
 	case FBIPUT_HSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_horizontal_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -646,7 +629,7 @@ static int fb_ioctl(struct fb_info *info, unsigned int cmd,
 	case FBIPUT_VSYNC:
 		if (copy_from_user(&sync_arg, (char *)arg,
 				sizeof(struct lcd_sync_arg)))
-			return -EINVAL;
+			return -EFAULT;
 		lcd_cfg_vertical_sync(sync_arg.back_porch,
 					sync_arg.pulse_width,
 					sync_arg.front_porch);
@@ -773,8 +756,6 @@ static int __init fb_probe(struct platform_device *device)
 
 	par->lcdc_clk = fb_clk;
 
-	init_waitqueue_head(&par->da8xx_wq);
-
 	par->irq = platform_get_irq(device, 0);
 	if (par->irq < 0) {
 		ret = -ENOENT;
diff --git a/include/video/da8xx-fb.h b/include/video/da8xx-fb.h
index 5f7767547fa0..c051a50ed528 100644
--- a/include/video/da8xx-fb.h
+++ b/include/video/da8xx-fb.h
@@ -67,9 +67,6 @@ struct lcd_ctrl_config {
 	/* Mono 8-bit Mode: 1=D0-D7 or 0=D0-D3 */
 	unsigned char mono_8bit_mode;
 
-	/* Invert pixel clock */
-	unsigned char invert_pxl_clock;
-
 	/* Invert line clock */
 	unsigned char invert_line_clock;
 
-- 
cgit v1.2.3


From 4fefce9abaeef0d6ec45e06a882db23a65135272 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Tue, 22 Sep 2009 16:48:02 -0700
Subject: jbd.h: bitfields should be unsigned

bitfields should be unsigned.

This fixes sparse noise:
  error: dubious one-bit signed bitfield

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Cc: Jan Kara <jack@ucw.cz>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/jbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index a1187a0c99b4..331530cd3cc6 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -556,7 +556,7 @@ struct transaction_s
 	 * This transaction is being forced and some process is
 	 * waiting for it to finish.
 	 */
-	int t_synchronous_commit:1;
+	unsigned int t_synchronous_commit:1;
 };
 
 /**
-- 
cgit v1.2.3


From 4fd8da8d62416d0dae05603ab5990a498d9aeb12 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 23 Sep 2009 17:49:55 +0200
Subject: fs: change sys_truncate length parameter type

For this system call user space passes a signed long length parameter,
while the kernel side takes an unsigned long parameter and converts it
later to signed long again.

This has led to bugs in compat wrappers see e.g.  dd90bbd5 "powerpc: Add
compat_sys_truncate".  The s390 compat wrapper for this functions is
broken as well since it also performs zero extension instead of sign
extension for the length parameter.

In addition if hpa comes up with an automated way of generating
compat wrappers it would generate a wrong one here.

So change the length parameter from unsigned long to long.

Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/open.c                | 5 ++---
 include/linux/syscalls.h | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/open.c b/fs/open.c
index 31191bf513e4..4f01e06227c6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -290,10 +290,9 @@ out:
 	return error;
 }
 
-SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length)
+SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
 {
-	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
-	return do_sys_truncate(path, (long)length);
+	return do_sys_truncate(path, length);
 }
 
 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 8d8285a10db9..a990ace1a838 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -460,8 +460,7 @@ asmlinkage long sys_mount(char __user *dev_name, char __user *dir_name,
 				void __user *data);
 asmlinkage long sys_umount(char __user *name, int flags);
 asmlinkage long sys_oldumount(char __user *name);
-asmlinkage long sys_truncate(const char __user *path,
-				unsigned long length);
+asmlinkage long sys_truncate(const char __user *path, long length);
 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
 asmlinkage long sys_stat(char __user *filename,
 			struct __old_kernel_stat __user *statbuf);
-- 
cgit v1.2.3


From 74908a0009eb36054190ab80deb9671014efed96 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 17 Sep 2009 17:47:12 -0700
Subject: include/linux/cred.h: fix build

mips allmodconfig:

include/linux/cred.h: In function `creds_are_invalid':
include/linux/cred.h:187: error: `PAGE_SIZE' undeclared (first use in this function)
include/linux/cred.h:187: error: (Each undeclared identifier is reported only once
include/linux/cred.h:187: error: for each function it appears in.)

Fixes

commit b6dff3ec5e116e3af6f537d4caedcad6b9e5082a
Author:     David Howells <dhowells@redhat.com>
AuthorDate: Fri Nov 14 10:39:16 2008 +1100
Commit:     James Morris <jmorris@namei.org>
CommitDate: Fri Nov 14 10:39:16 2008 +1100

    CRED: Separate task security context from task_struct

I think.

It's way too large to be inlined anyway.

Dunno if this needs an EXPORT_SYMBOL() yet.

Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/cred.h | 18 +-----------------
 kernel/cred.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index fb371601a3b4..4e3387a89cb9 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -176,23 +176,7 @@ extern void __invalid_creds(const struct cred *, const char *, unsigned);
 extern void __validate_process_creds(struct task_struct *,
 				     const char *, unsigned);
 
-static inline bool creds_are_invalid(const struct cred *cred)
-{
-	if (cred->magic != CRED_MAGIC)
-		return true;
-	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
-		return true;
-#ifdef CONFIG_SECURITY_SELINUX
-	if (selinux_is_enabled()) {
-		if ((unsigned long) cred->security < PAGE_SIZE)
-			return true;
-		if ((*(u32 *)cred->security & 0xffffff00) ==
-		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
-			return true;
-	}
-#endif
-	return false;
-}
+extern bool creds_are_invalid(const struct cred *cred);
 
 static inline void __validate_creds(const struct cred *cred,
 				    const char *file, unsigned line)
diff --git a/kernel/cred.c b/kernel/cred.c
index d7f7a01082eb..70bda79fae24 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -782,6 +782,24 @@ EXPORT_SYMBOL(set_create_files_as);
 
 #ifdef CONFIG_DEBUG_CREDENTIALS
 
+bool creds_are_invalid(const struct cred *cred)
+{
+	if (cred->magic != CRED_MAGIC)
+		return true;
+	if (atomic_read(&cred->usage) < atomic_read(&cred->subscribers))
+		return true;
+#ifdef CONFIG_SECURITY_SELINUX
+	if (selinux_is_enabled()) {
+		if ((unsigned long) cred->security < PAGE_SIZE)
+			return true;
+		if ((*(u32 *)cred->security & 0xffffff00) ==
+		    (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))
+			return true;
+	}
+#endif
+	return false;
+}
+
 /*
  * dump invalid credentials
  */
-- 
cgit v1.2.3


From 60e78d2c993e58d890596d951fff77d5965adcd6 Mon Sep 17 00:00:00 2001
From: Abhishek Kulkarni <adkulkar@umail.iu.edu>
Date: Wed, 23 Sep 2009 13:00:27 -0500
Subject: 9p: Add fscache support to 9p

This patch adds a persistent, read-only caching facility for
9p clients using the FS-Cache caching backend.

When the fscache facility is enabled, each inode is associated
with a corresponding vcookie which is an index into the FS-Cache
indexing tree. The FS-Cache indexing tree is indexed at 3 levels:
- session object associated with each mount.
- inode/vcookie
- actual data (pages)

A cache tag is chosen randomly for each session. These tags can
be read off /sys/fs/9p/caches and can be passed as a mount-time
parameter to re-attach to the specified caching session.

Signed-off-by: Abhishek Kulkarni <adkulkar@umail.iu.edu>
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/Kconfig       |   9 +
 fs/9p/Makefile      |   3 +-
 fs/9p/cache.c       | 474 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/9p/cache.h       | 176 +++++++++++++++++++
 fs/9p/v9fs.c        | 196 +++++++++++++++++++---
 fs/9p/v9fs.h        |  13 +-
 fs/9p/v9fs_vfs.h    |   6 +
 fs/9p/vfs_addr.c    |  88 +++++++++-
 fs/9p/vfs_file.c    |  10 +-
 fs/9p/vfs_inode.c   |  57 ++++++-
 fs/9p/vfs_super.c   |  16 +-
 include/net/9p/9p.h |   3 +
 12 files changed, 1009 insertions(+), 42 deletions(-)
 create mode 100644 fs/9p/cache.c
 create mode 100644 fs/9p/cache.h

(limited to 'include')

diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 74e0723e90bc..795233702a4e 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -8,3 +8,12 @@ config 9P_FS
 	  See <http://v9fs.sf.net> for more information.
 
 	  If unsure, say N.
+
+config 9P_FSCACHE
+	bool "Enable 9P client caching support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	depends on 9P_FS=m && FSCACHE || 9P_FS=y && FSCACHE=y
+	help
+	  Choose Y here to enable persistent, read-only local
+	  caching support for 9p clients using FS-Cache
+
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index bc7f0d1551e6..1a940ec7af61 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -8,5 +8,6 @@ obj-$(CONFIG_9P_FS) := 9p.o
 	vfs_dir.o \
 	vfs_dentry.o \
 	v9fs.o \
-	fid.o \
+	fid.o
 
+9p-$(CONFIG_9P_FSCACHE) += cache.o
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
new file mode 100644
index 000000000000..51c94e26a346
--- /dev/null
+++ b/fs/9p/cache.c
@@ -0,0 +1,474 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#include <linux/jiffies.h>
+#include <linux/file.h>
+#include <linux/stat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+
+#include "v9fs.h"
+#include "cache.h"
+
+#define CACHETAG_LEN  11
+
+struct kmem_cache *vcookie_cache;
+
+struct fscache_netfs v9fs_cache_netfs = {
+	.name 		= "9p",
+	.version 	= 0,
+};
+
+static void init_once(void *foo)
+{
+	struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo;
+	vcookie->fscache = NULL;
+	vcookie->qid = NULL;
+	inode_init_once(&vcookie->inode);
+}
+
+/**
+ * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain
+ *			    vcookie to inode mapping
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_init_vcookiecache(void)
+{
+	vcookie_cache = kmem_cache_create("vcookie_cache",
+					  sizeof(struct v9fs_cookie),
+					  0, (SLAB_RECLAIM_ACCOUNT|
+					      SLAB_MEM_SPREAD),
+					  init_once);
+	if (!vcookie_cache)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ * v9fs_destroy_vcookiecache - destroy the cache of vcookies
+ *
+ */
+
+static void v9fs_destroy_vcookiecache(void)
+{
+	kmem_cache_destroy(vcookie_cache);
+}
+
+int __v9fs_cache_register(void)
+{
+	int ret;
+	ret = v9fs_init_vcookiecache();
+	if (ret < 0)
+		return ret;
+
+	return fscache_register_netfs(&v9fs_cache_netfs);
+}
+
+void __v9fs_cache_unregister(void)
+{
+	v9fs_destroy_vcookiecache();
+	fscache_unregister_netfs(&v9fs_cache_netfs);
+}
+
+/**
+ * v9fs_random_cachetag - Generate a random tag to be associated
+ *			  with a new cache session.
+ *
+ * The value of jiffies is used for a fairly randomly cache tag.
+ */
+
+static
+int v9fs_random_cachetag(struct v9fs_session_info *v9ses)
+{
+	v9ses->cachetag = kmalloc(CACHETAG_LEN, GFP_KERNEL);
+	if (!v9ses->cachetag)
+		return -ENOMEM;
+
+	return scnprintf(v9ses->cachetag, CACHETAG_LEN, "%lu", jiffies);
+}
+
+static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data,
+					   void *buffer, uint16_t bufmax)
+{
+	struct v9fs_session_info *v9ses;
+	uint16_t klen = 0;
+
+	v9ses = (struct v9fs_session_info *)cookie_netfs_data;
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p buf %p size %u", v9ses,
+		   buffer, bufmax);
+
+	if (v9ses->cachetag)
+		klen = strlen(v9ses->cachetag);
+
+	if (klen > bufmax)
+		return 0;
+
+	memcpy(buffer, v9ses->cachetag, klen);
+	P9_DPRINTK(P9_DEBUG_FSC, "cache session tag %s", v9ses->cachetag);
+	return klen;
+}
+
+const struct fscache_cookie_def v9fs_cache_session_index_def = {
+	.name 		= "9P.session",
+	.type 		= FSCACHE_COOKIE_TYPE_INDEX,
+	.get_key 	= v9fs_cache_session_get_key,
+};
+
+void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses)
+{
+	/* If no cache session tag was specified, we generate a random one. */
+	if (!v9ses->cachetag)
+		v9fs_random_cachetag(v9ses);
+
+	v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index,
+						&v9fs_cache_session_index_def,
+						v9ses);
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p get cookie %p", v9ses,
+		   v9ses->fscache);
+}
+
+void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses)
+{
+	P9_DPRINTK(P9_DEBUG_FSC, "session %p put cookie %p", v9ses,
+		   v9ses->fscache);
+	fscache_relinquish_cookie(v9ses->fscache, 0);
+	v9ses->fscache = NULL;
+}
+
+
+static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
+					 void *buffer, uint16_t bufmax)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path));
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode,
+		   vcookie->qid->path);
+	return sizeof(vcookie->qid->path);
+}
+
+static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
+				      uint64_t *size)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	*size = i_size_read(&vcookie->inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode,
+		   *size);
+}
+
+static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
+					 void *buffer, uint16_t buflen)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+	memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version));
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode,
+		   vcookie->qid->version);
+	return sizeof(vcookie->qid->version);
+}
+
+static enum
+fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
+					    const void *buffer,
+					    uint16_t buflen)
+{
+	const struct v9fs_cookie *vcookie = cookie_netfs_data;
+
+	if (buflen != sizeof(vcookie->qid->version))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	if (memcmp(buffer, &vcookie->qid->version,
+		   sizeof(vcookie->qid->version)))
+		return FSCACHE_CHECKAUX_OBSOLETE;
+
+	return FSCACHE_CHECKAUX_OKAY;
+}
+
+static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data)
+{
+	struct v9fs_cookie *vcookie = cookie_netfs_data;
+	struct pagevec pvec;
+	pgoff_t first;
+	int loop, nr_pages;
+
+	pagevec_init(&pvec, 0);
+	first = 0;
+
+	for (;;) {
+		nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping,
+					  first,
+					  PAGEVEC_SIZE - pagevec_count(&pvec));
+		if (!nr_pages)
+			break;
+
+		for (loop = 0; loop < nr_pages; loop++)
+			ClearPageFsCache(pvec.pages[loop]);
+
+		first = pvec.pages[nr_pages - 1]->index + 1;
+
+		pvec.nr = nr_pages;
+		pagevec_release(&pvec);
+		cond_resched();
+	}
+}
+
+const struct fscache_cookie_def v9fs_cache_inode_index_def = {
+	.name		= "9p.inode",
+	.type		= FSCACHE_COOKIE_TYPE_DATAFILE,
+	.get_key	= v9fs_cache_inode_get_key,
+	.get_attr	= v9fs_cache_inode_get_attr,
+	.get_aux	= v9fs_cache_inode_get_aux,
+	.check_aux	= v9fs_cache_inode_check_aux,
+	.now_uncached	= v9fs_cache_inode_now_uncached,
+};
+
+void v9fs_cache_inode_get_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie;
+	struct v9fs_session_info *v9ses;
+
+	if (!S_ISREG(inode->i_mode))
+		return;
+
+	vcookie = v9fs_inode2cookie(inode);
+	if (vcookie->fscache)
+		return;
+
+	v9ses = v9fs_inode2v9ses(inode);
+	vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+						  &v9fs_cache_inode_index_def,
+						  vcookie);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode,
+		   vcookie->fscache);
+}
+
+void v9fs_cache_inode_put_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	if (!vcookie->fscache)
+		return;
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode,
+		   vcookie->fscache);
+
+	fscache_relinquish_cookie(vcookie->fscache, 0);
+	vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_flush_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	if (!vcookie->fscache)
+		return;
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode,
+		   vcookie->fscache);
+
+	fscache_relinquish_cookie(vcookie->fscache, 1);
+	vcookie->fscache = NULL;
+}
+
+void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	struct p9_fid *fid;
+
+	if (!vcookie->fscache)
+		return;
+
+	spin_lock(&vcookie->lock);
+	fid = filp->private_data;
+	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
+		v9fs_cache_inode_flush_cookie(inode);
+	else
+		v9fs_cache_inode_get_cookie(inode);
+
+	spin_unlock(&vcookie->lock);
+}
+
+void v9fs_cache_inode_reset_cookie(struct inode *inode)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	struct v9fs_session_info *v9ses;
+	struct fscache_cookie *old;
+
+	if (!vcookie->fscache)
+		return;
+
+	old = vcookie->fscache;
+
+	spin_lock(&vcookie->lock);
+	fscache_relinquish_cookie(vcookie->fscache, 1);
+
+	v9ses = v9fs_inode2v9ses(inode);
+	vcookie->fscache = fscache_acquire_cookie(v9ses->fscache,
+						  &v9fs_cache_inode_index_def,
+						  vcookie);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p",
+		   inode, old, vcookie->fscache);
+
+	spin_unlock(&vcookie->lock);
+}
+
+int __v9fs_fscache_release_page(struct page *page, gfp_t gfp)
+{
+	struct inode *inode = page->mapping->host;
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	BUG_ON(!vcookie->fscache);
+
+	if (PageFsCache(page)) {
+		if (fscache_check_page_write(vcookie->fscache, page)) {
+			if (!(gfp & __GFP_WAIT))
+				return 0;
+			fscache_wait_on_page_write(vcookie->fscache, page);
+		}
+
+		fscache_uncache_page(vcookie->fscache, page);
+		ClearPageFsCache(page);
+	}
+
+	return 1;
+}
+
+void __v9fs_fscache_invalidate_page(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	BUG_ON(!vcookie->fscache);
+
+	if (PageFsCache(page)) {
+		fscache_wait_on_page_write(vcookie->fscache, page);
+		BUG_ON(!PageLocked(page));
+		fscache_uncache_page(vcookie->fscache, page);
+		ClearPageFsCache(page);
+	}
+}
+
+static void v9fs_vfs_readpage_complete(struct page *page, void *data,
+				       int error)
+{
+	if (!error)
+		SetPageUptodate(page);
+
+	unlock_page(page);
+}
+
+/**
+ * __v9fs_readpage_from_fscache - read a page from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+	if (!vcookie->fscache)
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_page(vcookie->fscache,
+					 page,
+					 v9fs_vfs_readpage_complete,
+					 NULL,
+					 GFP_KERNEL);
+	switch (ret) {
+	case -ENOBUFS:
+	case -ENODATA:
+		P9_DPRINTK(P9_DEBUG_FSC, "page/inode not in cache %d", ret);
+		return 1;
+	case 0:
+		P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+		return ret;
+	default:
+		P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+		return ret;
+	}
+}
+
+/**
+ * __v9fs_readpages_from_fscache - read multiple pages from cache
+ *
+ * Returns 0 if the pages are in cache and a BIO is submitted,
+ * 1 if the pages are not in cache and -error otherwise.
+ */
+
+int __v9fs_readpages_from_fscache(struct inode *inode,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  unsigned *nr_pages)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages);
+	if (!vcookie->fscache)
+		return -ENOBUFS;
+
+	ret = fscache_read_or_alloc_pages(vcookie->fscache,
+					  mapping, pages, nr_pages,
+					  v9fs_vfs_readpage_complete,
+					  NULL,
+					  mapping_gfp_mask(mapping));
+	switch (ret) {
+	case -ENOBUFS:
+	case -ENODATA:
+		P9_DPRINTK(P9_DEBUG_FSC, "pages/inodes not in cache %d", ret);
+		return 1;
+	case 0:
+		BUG_ON(!list_empty(pages));
+		BUG_ON(*nr_pages != 0);
+		P9_DPRINTK(P9_DEBUG_FSC, "BIO submitted");
+		return ret;
+	default:
+		P9_DPRINTK(P9_DEBUG_FSC, "ret %d", ret);
+		return ret;
+	}
+}
+
+/**
+ * __v9fs_readpage_to_fscache - write a page to the cache
+ *
+ */
+
+void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page)
+{
+	int ret;
+	const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+
+	P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page);
+	ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL);
+	P9_DPRINTK(P9_DEBUG_FSC, "ret =  %d", ret);
+	if (ret != 0)
+		v9fs_uncache_page(inode, page);
+}
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
new file mode 100644
index 000000000000..a94192bfaee8
--- /dev/null
+++ b/fs/9p/cache.h
@@ -0,0 +1,176 @@
+/*
+ * V9FS cache definitions.
+ *
+ *  Copyright (C) 2009 by Abhishek Kulkarni <adkulkar@umail.iu.edu>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to:
+ *  Free Software Foundation
+ *  51 Franklin Street, Fifth Floor
+ *  Boston, MA  02111-1301  USA
+ *
+ */
+
+#ifndef _9P_CACHE_H
+#ifdef CONFIG_9P_FSCACHE
+#include <linux/fscache.h>
+#include <linux/spinlock.h>
+
+extern struct kmem_cache *vcookie_cache;
+
+struct v9fs_cookie {
+	spinlock_t lock;
+	struct inode inode;
+	struct fscache_cookie *fscache;
+	struct p9_qid *qid;
+};
+
+static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode)
+{
+	return container_of(inode, struct v9fs_cookie, inode);
+}
+
+extern struct fscache_netfs v9fs_cache_netfs;
+extern const struct fscache_cookie_def v9fs_cache_session_index_def;
+extern const struct fscache_cookie_def v9fs_cache_inode_index_def;
+
+extern void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses);
+extern void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses);
+
+extern void v9fs_cache_inode_get_cookie(struct inode *inode);
+extern void v9fs_cache_inode_put_cookie(struct inode *inode);
+extern void v9fs_cache_inode_flush_cookie(struct inode *inode);
+extern void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp);
+extern void v9fs_cache_inode_reset_cookie(struct inode *inode);
+
+extern int __v9fs_cache_register(void);
+extern void __v9fs_cache_unregister(void);
+
+extern int __v9fs_fscache_release_page(struct page *page, gfp_t gfp);
+extern void __v9fs_fscache_invalidate_page(struct page *page);
+extern int __v9fs_readpage_from_fscache(struct inode *inode,
+					struct page *page);
+extern int __v9fs_readpages_from_fscache(struct inode *inode,
+					 struct address_space *mapping,
+					 struct list_head *pages,
+					 unsigned *nr_pages);
+extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page);
+
+
+/**
+ * v9fs_cache_register - Register v9fs file system with the cache
+ */
+static inline int v9fs_cache_register(void)
+{
+	return __v9fs_cache_register();
+}
+
+/**
+ * v9fs_cache_unregister - Unregister v9fs from the cache
+ */
+static inline void v9fs_cache_unregister(void)
+{
+	__v9fs_cache_unregister();
+}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+					    gfp_t gfp)
+{
+	return __v9fs_fscache_release_page(page, gfp);
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page)
+{
+	__v9fs_fscache_invalidate_page(page);
+}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+					     struct page *page)
+{
+	return __v9fs_readpage_from_fscache(inode, page);
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return __v9fs_readpages_from_fscache(inode, mapping, pages,
+					     nr_pages);
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{
+	if (PageFsCache(page))
+		__v9fs_readpage_to_fscache(inode, page);
+}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	fscache_uncache_page(vcookie->fscache, page);
+	BUG_ON(PageFsCache(page));
+}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+					struct p9_qid *qid)
+{
+	struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode);
+	spin_lock(&vcookie->lock);
+	vcookie->qid = qid;
+	spin_unlock(&vcookie->lock);
+}
+
+#else /* CONFIG_9P_FSCACHE */
+
+static inline int v9fs_cache_register(void)
+{
+	return 1;
+}
+
+static inline void v9fs_cache_unregister(void) {}
+
+static inline int v9fs_fscache_release_page(struct page *page,
+					    gfp_t gfp) {
+	return 1;
+}
+
+static inline void v9fs_fscache_invalidate_page(struct page *page) {}
+
+static inline int v9fs_readpage_from_fscache(struct inode *inode,
+					     struct page *page)
+{
+	return -ENOBUFS;
+}
+
+static inline int v9fs_readpages_from_fscache(struct inode *inode,
+					      struct address_space *mapping,
+					      struct list_head *pages,
+					      unsigned *nr_pages)
+{
+	return -ENOBUFS;
+}
+
+static inline void v9fs_readpage_to_fscache(struct inode *inode,
+					    struct page *page)
+{}
+
+static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
+{}
+
+static inline void v9fs_vcookie_set_qid(struct inode *inode,
+					struct p9_qid *qid)
+{}
+
+#endif /* CONFIG_9P_FSCACHE */
+#endif /* _9P_CACHE_H */
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index f7003cfac63d..cf62b05e296a 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -34,21 +34,25 @@
 #include <net/9p/transport.h>
 #include "v9fs.h"
 #include "v9fs_vfs.h"
+#include "cache.h"
+
+static DEFINE_SPINLOCK(v9fs_sessionlist_lock);
+static LIST_HEAD(v9fs_sessionlist);
 
 /*
-  * Option Parsing (code inspired by NFS code)
-  *  NOTE: each transport will parse its own options
-  */
+ * Option Parsing (code inspired by NFS code)
+ *  NOTE: each transport will parse its own options
+ */
 
 enum {
 	/* Options that take integer arguments */
 	Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid,
 	/* String options */
-	Opt_uname, Opt_remotename, Opt_trans,
+	Opt_uname, Opt_remotename, Opt_trans, Opt_cache, Opt_cachetag,
 	/* Options that take no arguments */
 	Opt_nodevmap,
 	/* Cache options */
-	Opt_cache_loose,
+	Opt_cache_loose, Opt_fscache,
 	/* Access options */
 	Opt_access,
 	/* Error token */
@@ -63,8 +67,10 @@ static const match_table_t tokens = {
 	{Opt_uname, "uname=%s"},
 	{Opt_remotename, "aname=%s"},
 	{Opt_nodevmap, "nodevmap"},
-	{Opt_cache_loose, "cache=loose"},
+	{Opt_cache, "cache=%s"},
 	{Opt_cache_loose, "loose"},
+	{Opt_fscache, "fscache"},
+	{Opt_cachetag, "cachetag=%s"},
 	{Opt_access, "access=%s"},
 	{Opt_err, NULL}
 };
@@ -89,16 +95,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 	v9ses->afid = ~0;
 	v9ses->debug = 0;
 	v9ses->cache = 0;
+#ifdef CONFIG_9P_FSCACHE
+	v9ses->cachetag = NULL;
+#endif
 
 	if (!opts)
 		return 0;
 
 	options = kstrdup(opts, GFP_KERNEL);
-	if (!options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
-			   "failed to allocate copy of option string\n");
-		return -ENOMEM;
-	}
+	if (!options)
+		goto fail_option_alloc;
 
 	while ((p = strsep(&options, ",")) != NULL) {
 		int token;
@@ -143,16 +149,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 		case Opt_cache_loose:
 			v9ses->cache = CACHE_LOOSE;
 			break;
+		case Opt_fscache:
+			v9ses->cache = CACHE_FSCACHE;
+			break;
+		case Opt_cachetag:
+#ifdef CONFIG_9P_FSCACHE
+			v9ses->cachetag = match_strdup(&args[0]);
+#endif
+			break;
+		case Opt_cache:
+			s = match_strdup(&args[0]);
+			if (!s)
+				goto fail_option_alloc;
+
+			if (strcmp(s, "loose") == 0)
+				v9ses->cache = CACHE_LOOSE;
+			else if (strcmp(s, "fscache") == 0)
+				v9ses->cache = CACHE_FSCACHE;
+			else
+				v9ses->cache = CACHE_NONE;
+			kfree(s);
+			break;
 
 		case Opt_access:
 			s = match_strdup(&args[0]);
-			if (!s) {
-				P9_DPRINTK(P9_DEBUG_ERROR,
-					   "failed to allocate copy"
-					   " of option argument\n");
-				ret = -ENOMEM;
-				break;
-			}
+			if (!s)
+				goto fail_option_alloc;
+
 			v9ses->flags &= ~V9FS_ACCESS_MASK;
 			if (strcmp(s, "user") == 0)
 				v9ses->flags |= V9FS_ACCESS_USER;
@@ -173,6 +196,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 	}
 	kfree(options);
 	return ret;
+
+fail_option_alloc:
+	P9_DPRINTK(P9_DEBUG_ERROR,
+		   "failed to allocate copy of option argument\n");
+	return -ENOMEM;
 }
 
 /**
@@ -200,6 +228,10 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	spin_lock(&v9fs_sessionlist_lock);
+	list_add(&v9ses->slist, &v9fs_sessionlist);
+	spin_unlock(&v9fs_sessionlist_lock);
+
 	v9ses->flags = V9FS_EXTENDED | V9FS_ACCESS_USER;
 	strcpy(v9ses->uname, V9FS_DEFUSER);
 	strcpy(v9ses->aname, V9FS_DEFANAME);
@@ -249,6 +281,11 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	else
 		fid->uid = ~0;
 
+#ifdef CONFIG_9P_FSCACHE
+	/* register the session for caching */
+	v9fs_cache_session_get_cookie(v9ses);
+#endif
+
 	return fid;
 
 error:
@@ -268,8 +305,18 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 		v9ses->clnt = NULL;
 	}
 
+#ifdef CONFIG_9P_FSCACHE
+	if (v9ses->fscache) {
+		v9fs_cache_session_put_cookie(v9ses);
+		kfree(v9ses->cachetag);
+	}
+#endif
 	__putname(v9ses->uname);
 	__putname(v9ses->aname);
+
+	spin_lock(&v9fs_sessionlist_lock);
+	list_del(&v9ses->slist);
+	spin_unlock(&v9fs_sessionlist_lock);
 }
 
 /**
@@ -286,25 +333,132 @@ void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
 
 extern int v9fs_error_init(void);
 
+static struct kobject *v9fs_kobj;
+
+#ifdef CONFIG_9P_FSCACHE
 /**
- * v9fs_init - Initialize module
+ * caches_show - list caches associated with a session
+ *
+ * Returns the size of buffer written.
+ */
+
+static ssize_t caches_show(struct kobject *kobj,
+			   struct kobj_attribute *attr,
+			   char *buf)
+{
+	ssize_t n = 0, count = 0, limit = PAGE_SIZE;
+	struct v9fs_session_info *v9ses;
+
+	spin_lock(&v9fs_sessionlist_lock);
+	list_for_each_entry(v9ses, &v9fs_sessionlist, slist) {
+		if (v9ses->cachetag) {
+			n = snprintf(buf, limit, "%s\n", v9ses->cachetag);
+			if (n < 0) {
+				count = n;
+				break;
+			}
+
+			count += n;
+			limit -= n;
+		}
+	}
+
+	spin_unlock(&v9fs_sessionlist_lock);
+	return count;
+}
+
+static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches);
+#endif /* CONFIG_9P_FSCACHE */
+
+static struct attribute *v9fs_attrs[] = {
+#ifdef CONFIG_9P_FSCACHE
+	&v9fs_attr_cache.attr,
+#endif
+	NULL,
+};
+
+static struct attribute_group v9fs_attr_group = {
+	.attrs = v9fs_attrs,
+};
+
+/**
+ * v9fs_sysfs_init - Initialize the v9fs sysfs interface
+ *
+ */
+
+static int v9fs_sysfs_init(void)
+{
+	v9fs_kobj = kobject_create_and_add("9p", fs_kobj);
+	if (!v9fs_kobj)
+		return -ENOMEM;
+
+	if (sysfs_create_group(v9fs_kobj, &v9fs_attr_group)) {
+		kobject_put(v9fs_kobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface
+ *
+ */
+
+static void v9fs_sysfs_cleanup(void)
+{
+	sysfs_remove_group(v9fs_kobj, &v9fs_attr_group);
+	kobject_put(v9fs_kobj);
+}
+
+/**
+ * init_v9fs - Initialize module
  *
  */
 
 static int __init init_v9fs(void)
 {
+	int err;
 	printk(KERN_INFO "Installing v9fs 9p2000 file system support\n");
 	/* TODO: Setup list of registered trasnport modules */
-	return register_filesystem(&v9fs_fs_type);
+	err = register_filesystem(&v9fs_fs_type);
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register filesystem\n");
+		return err;
+	}
+
+	err = v9fs_cache_register();
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register v9fs for caching\n");
+		goto out_fs_unreg;
+	}
+
+	err = v9fs_sysfs_init();
+	if (err < 0) {
+		printk(KERN_ERR "Failed to register with sysfs\n");
+		goto out_sysfs_cleanup;
+	}
+
+	return 0;
+
+out_sysfs_cleanup:
+	v9fs_sysfs_cleanup();
+
+out_fs_unreg:
+	unregister_filesystem(&v9fs_fs_type);
+
+	return err;
 }
 
 /**
- * v9fs_init - shutdown module
+ * exit_v9fs - shutdown module
  *
  */
 
 static void __exit exit_v9fs(void)
 {
+	v9fs_sysfs_cleanup();
+	v9fs_cache_unregister();
 	unregister_filesystem(&v9fs_fs_type);
 }
 
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 38762bf102a9..019f4ccb70c1 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -51,6 +51,7 @@ enum p9_session_flags {
 enum p9_cache_modes {
 	CACHE_NONE,
 	CACHE_LOOSE,
+	CACHE_FSCACHE,
 };
 
 /**
@@ -60,6 +61,8 @@ enum p9_cache_modes {
  * @debug: debug level
  * @afid: authentication handle
  * @cache: cache mode of type &p9_cache_modes
+ * @cachetag: the tag of the cache associated with this session
+ * @fscache: session cookie associated with FS-Cache
  * @options: copy of options string given by user
  * @uname: string user name to mount hierarchy as
  * @aname: mount specifier for remote hierarchy
@@ -68,7 +71,7 @@ enum p9_cache_modes {
  * @dfltgid: default numeric groupid to mount hierarchy as
  * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
  * @clnt: reference to 9P network client instantiated for this session
- * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ * @slist: reference to list of registered 9p sessions
  *
  * This structure holds state for each session instance established during
  * a sys_mount() .
@@ -84,6 +87,10 @@ struct v9fs_session_info {
 	unsigned short debug;
 	unsigned int afid;
 	unsigned int cache;
+#ifdef CONFIG_9P_FSCACHE
+	char *cachetag;
+	struct fscache_cookie *fscache;
+#endif
 
 	char *uname;		/* user name to mount as */
 	char *aname;		/* name of remote hierarchy being mounted */
@@ -92,11 +99,9 @@ struct v9fs_session_info {
 	unsigned int dfltgid;	/* default gid for legacy support */
 	u32 uid;		/* if ACCESS_SINGLE, the uid that has access */
 	struct p9_client *clnt;	/* 9p client */
-	struct dentry *debugfs_dir;
+	struct list_head slist; /* list of sessions registered with v9fs */
 };
 
-extern struct dentry *v9fs_debugfs_root;
-
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
 									char *);
 void v9fs_session_close(struct v9fs_session_info *v9ses);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f0c7de78e205..3a7560e35865 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -44,7 +44,13 @@ extern const struct file_operations v9fs_dir_operations;
 extern const struct dentry_operations v9fs_dentry_operations;
 extern const struct dentry_operations v9fs_cached_dentry_operations;
 
+#ifdef CONFIG_9P_FSCACHE
+struct inode *v9fs_alloc_inode(struct super_block *sb);
+void v9fs_destroy_inode(struct inode *inode);
+#endif
+
 struct inode *v9fs_get_inode(struct super_block *sb, int mode);
+void v9fs_clear_inode(struct inode *inode);
 ino_t v9fs_qid2ino(struct p9_qid *qid);
 void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *);
 int v9fs_dir_release(struct inode *inode, struct file *filp);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 92828281a30b..90e38449f4b3 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -38,6 +38,7 @@
 
 #include "v9fs.h"
 #include "v9fs_vfs.h"
+#include "cache.h"
 
 /**
  * v9fs_vfs_readpage - read an entire page in from 9P
@@ -52,18 +53,31 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
 	int retval;
 	loff_t offset;
 	char *buffer;
+	struct inode *inode;
 
+	inode = page->mapping->host;
 	P9_DPRINTK(P9_DEBUG_VFS, "\n");
+
+	BUG_ON(!PageLocked(page));
+
+	retval = v9fs_readpage_from_fscache(inode, page);
+	if (retval == 0)
+		return retval;
+
 	buffer = kmap(page);
 	offset = page_offset(page);
 
 	retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset);
-	if (retval < 0)
+	if (retval < 0) {
+		v9fs_uncache_page(inode, page);
 		goto done;
+	}
 
 	memset(buffer + retval, 0, PAGE_CACHE_SIZE - retval);
 	flush_dcache_page(page);
 	SetPageUptodate(page);
+
+	v9fs_readpage_to_fscache(inode, page);
 	retval = 0;
 
 done:
@@ -72,6 +86,78 @@ done:
 	return retval;
 }
 
+/**
+ * v9fs_vfs_readpages - read a set of pages from 9P
+ *
+ * @filp: file being read
+ * @mapping: the address space
+ * @pages: list of pages to read
+ * @nr_pages: count of pages to read
+ *
+ */
+
+static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
+			     struct list_head *pages, unsigned nr_pages)
+{
+	int ret = 0;
+	struct inode *inode;
+
+	inode = mapping->host;
+	P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
+
+	ret = v9fs_readpages_from_fscache(inode, mapping, pages, &nr_pages);
+	if (ret == 0)
+		return ret;
+
+	ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+	P9_DPRINTK(P9_DEBUG_VFS, "  = %d\n", ret);
+	return ret;
+}
+
+/**
+ * v9fs_release_page - release the private state associated with a page
+ *
+ * Returns 1 if the page can be released, false otherwise.
+ */
+
+static int v9fs_release_page(struct page *page, gfp_t gfp)
+{
+	if (PagePrivate(page))
+		return 0;
+
+	return v9fs_fscache_release_page(page, gfp);
+}
+
+/**
+ * v9fs_invalidate_page - Invalidate a page completely or partially
+ *
+ * @page: structure to page
+ * @offset: offset in the page
+ */
+
+static void v9fs_invalidate_page(struct page *page, unsigned long offset)
+{
+	if (offset == 0)
+		v9fs_fscache_invalidate_page(page);
+}
+
+/**
+ * v9fs_launder_page - Writeback a dirty page
+ * Since the writes go directly to the server, we simply return a 0
+ * here to indicate success.
+ *
+ * Returns 0 on success.
+ */
+
+static int v9fs_launder_page(struct page *page)
+{
+	return 0;
+}
+
 const struct address_space_operations v9fs_addr_operations = {
       .readpage = v9fs_vfs_readpage,
+      .readpages = v9fs_vfs_readpages,
+      .releasepage = v9fs_release_page,
+      .invalidatepage = v9fs_invalidate_page,
+      .launder_page = v9fs_launder_page,
 };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index cafaa46434ba..3902bf43a088 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -41,6 +41,7 @@
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
+#include "cache.h"
 
 static const struct file_operations v9fs_cached_file_operations;
 
@@ -86,6 +87,10 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		/* enable cached file options */
 		if(file->f_op == &v9fs_file_operations)
 			file->f_op = &v9fs_cached_file_operations;
+
+#ifdef CONFIG_9P_FSCACHE
+		v9fs_cache_inode_set_cookie(inode, file);
+#endif
 	}
 
 	return 0;
@@ -238,8 +243,9 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	if (total > 0) {
 		pg_start = origin >> PAGE_CACHE_SHIFT;
 		pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT;
-		invalidate_inode_pages2_range(inode->i_mapping, pg_start,
-								pg_end);
+		if (inode->i_mapping && inode->i_mapping->nrpages)
+			invalidate_inode_pages2_range(inode->i_mapping,
+						      pg_start, pg_end);
 		*offset += total;
 		i_size_write(inode, i_size_read(inode) + total);
 		inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index f3bfa87926bd..5947628aefef 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -40,6 +40,7 @@
 #include "v9fs.h"
 #include "v9fs_vfs.h"
 #include "fid.h"
+#include "cache.h"
 
 static const struct inode_operations v9fs_dir_inode_operations;
 static const struct inode_operations v9fs_dir_inode_operations_ext;
@@ -197,6 +198,39 @@ v9fs_blank_wstat(struct p9_wstat *wstat)
 	wstat->extension = NULL;
 }
 
+#ifdef CONFIG_9P_FSCACHE
+/**
+ * v9fs_alloc_inode - helper function to allocate an inode
+ * This callback is executed before setting up the inode so that we
+ * can associate a vcookie with each inode.
+ *
+ */
+
+struct inode *v9fs_alloc_inode(struct super_block *sb)
+{
+	struct v9fs_cookie *vcookie;
+	vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache,
+							 GFP_KERNEL);
+	if (!vcookie)
+		return NULL;
+
+	vcookie->fscache = NULL;
+	vcookie->qid = NULL;
+	spin_lock_init(&vcookie->lock);
+	return &vcookie->inode;
+}
+
+/**
+ * v9fs_destroy_inode - destroy an inode
+ *
+ */
+
+void v9fs_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode));
+}
+#endif
+
 /**
  * v9fs_get_inode - helper function to setup an inode
  * @sb: superblock
@@ -326,6 +360,21 @@ error:
 }
 */
 
+
+/**
+ * v9fs_clear_inode - release an inode
+ * @inode: inode to release
+ *
+ */
+void v9fs_clear_inode(struct inode *inode)
+{
+	filemap_fdatawrite(inode->i_mapping);
+
+#ifdef CONFIG_9P_FSCACHE
+	v9fs_cache_inode_put_cookie(inode);
+#endif
+}
+
 /**
  * v9fs_inode_from_fid - populate an inode by issuing a attribute request
  * @v9ses: session information
@@ -356,8 +405,14 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 
 	v9fs_stat2inode(st, ret, sb);
 	ret->i_ino = v9fs_qid2ino(&st->qid);
+
+#ifdef CONFIG_9P_FSCACHE
+	v9fs_vcookie_set_qid(ret, &st->qid);
+	v9fs_cache_inode_get_cookie(ret);
+#endif
 	p9stat_free(st);
 	kfree(st);
+
 	return ret;
 
 error:
@@ -751,7 +806,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry);
 	err = -EPERM;
 	v9ses = v9fs_inode2v9ses(dentry->d_inode);
-	if (v9ses->cache == CACHE_LOOSE)
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		return simple_getattr(mnt, dentry, stat);
 
 	fid = v9fs_fid_lookup(dentry);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 8961f1a8f668..14a86448572c 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -44,20 +44,8 @@
 #include "v9fs_vfs.h"
 #include "fid.h"
 
-static void v9fs_clear_inode(struct inode *);
 static const struct super_operations v9fs_super_ops;
 
-/**
- * v9fs_clear_inode - release an inode
- * @inode: inode to release
- *
- */
-
-static void v9fs_clear_inode(struct inode *inode)
-{
-	filemap_fdatawrite(inode->i_mapping);
-}
-
 /**
  * v9fs_set_super - set the superblock
  * @s: super block
@@ -220,6 +208,10 @@ v9fs_umount_begin(struct super_block *sb)
 }
 
 static const struct super_operations v9fs_super_ops = {
+#ifdef CONFIG_9P_FSCACHE
+	.alloc_inode = v9fs_alloc_inode,
+	.destroy_inode = v9fs_destroy_inode,
+#endif
 	.statfs = simple_statfs,
 	.clear_inode = v9fs_clear_inode,
 	.show_options = generic_show_options,
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index b77c1478c99f..a7fb54808a23 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -38,6 +38,8 @@
  * @P9_DEBUG_SLABS: memory management tracing
  * @P9_DEBUG_FCALL: verbose dump of protocol messages
  * @P9_DEBUG_FID: fid allocation/deallocation tracking
+ * @P9_DEBUG_PKT: packet marshalling/unmarshalling
+ * @P9_DEBUG_FSC: FS-cache tracing
  *
  * These flags are passed at mount time to turn on various levels of
  * verbosity and tracing which will be output to the system logs.
@@ -54,6 +56,7 @@ enum p9_debug_flags {
 	P9_DEBUG_FCALL =	(1<<8),
 	P9_DEBUG_FID =		(1<<9),
 	P9_DEBUG_PKT =		(1<<10),
+	P9_DEBUG_FSC =		(1<<11),
 };
 
 #ifdef CONFIG_NET_9P_DEBUG
-- 
cgit v1.2.3


From 1b9894f342a39601bb0420b7b8c7e445670c1b51 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 21 Sep 2009 11:12:03 -0700
Subject: serial core: fix new kernel-doc warnings

Fix new kernel-doc warnings in serial_core.[hc] files.

  Warning(include/linux/serial_core.h:485): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:485): Excess function parameter 'port' description in 'uart_handle_dcd_change'
  Warning(include/linux/serial_core.h:511): No description found for parameter 'uport'
  Warning(include/linux/serial_core.h:511): Excess function parameter 'port' description in 'uart_handle_cts_change'
  Warning(drivers/serial/serial_core.c:2437): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2437): Excess function parameter 'port' description in 'uart_add_one_port'
  Warning(drivers/serial/serial_core.c:2509): No description found for parameter 'uport'
  Warning(drivers/serial/serial_core.c:2509): Excess function parameter 'port' description in 'uart_remove_one_port'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/serial/serial_core.c | 4 ++--
 include/linux/serial_core.h  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
index 2514d00c0f6f..1689bda1d13b 100644
--- a/drivers/serial/serial_core.c
+++ b/drivers/serial/serial_core.c
@@ -2426,7 +2426,7 @@ struct tty_driver *uart_console_device(struct console *co, int *index)
 /**
  *	uart_add_one_port - attach a driver-defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure to use for this port.
+ *	@uport: uart port structure to use for this port.
  *
  *	This allows the driver to register its own uart_port structure
  *	with the core driver.  The main purpose is to allow the low
@@ -2499,7 +2499,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 /**
  *	uart_remove_one_port - detach a driver defined port structure
  *	@drv: pointer to the uart low level driver structure for this port
- *	@port: uart port structure for this port
+ *	@uport: uart port structure for this port
  *
  *	This unhooks (and hangs up) the specified port structure from the
  *	core driver.  No further calls will be made to the low-level code
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d58e460844dd..fe661afe0713 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -477,7 +477,7 @@ static inline int uart_handle_break(struct uart_port *port)
 
 /**
  *	uart_handle_dcd_change - handle a change of carrier detect state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new carrier detect status, nonzero if active
  */
 static inline void
@@ -503,7 +503,7 @@ uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 /**
  *	uart_handle_cts_change - handle a change of clear-to-send state
- *	@port: uart_port structure for the open port
+ *	@uport: uart_port structure for the open port
  *	@status: new clear to send status, nonzero if active
  */
 static inline void
-- 
cgit v1.2.3


From 97363c6a4f93a20380b4a9e11f35e27fed68a517 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 23 Sep 2009 14:36:38 -0400
Subject: sunrpc: xdr_xcode_hyper helpers cannot presume 64-bit alignment

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 7da466ba4b0d..f5cc0898bc53 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -11,6 +11,7 @@
 
 #include <linux/uio.h>
 #include <asm/byteorder.h>
+#include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
 /*
@@ -117,14 +118,14 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
 static inline __be32 *
 xdr_encode_hyper(__be32 *p, __u64 val)
 {
-	*(__be64 *)p = cpu_to_be64(val);
+	put_unaligned_be64(val, p);
 	return p + 2;
 }
 
 static inline __be32 *
 xdr_decode_hyper(__be32 *p, __u64 *valp)
 {
-	*valp = be64_to_cpup((__be64 *)p);
+	*valp = get_unaligned_be64(p);
 	return p + 2;
 }
 
-- 
cgit v1.2.3


From 29c337a034b5526e80a785409d15d3b7c7edecf4 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:26 -0600
Subject: cpumask: remove obsolete node_to_cpumask now everyone uses
 cpumask_of_node

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/alpha/include/asm/topology.h          | 17 -----------------
 arch/ia64/include/asm/topology.h           |  1 -
 arch/mips/include/asm/mach-ip27/topology.h |  1 -
 arch/mips/sgi-ip27/ip27-memory.c           |  2 +-
 arch/powerpc/include/asm/topology.h        |  5 -----
 arch/sh/include/asm/topology.h             |  1 -
 arch/sparc/include/asm/topology_64.h       | 14 --------------
 include/asm-generic/topology.h             | 17 -----------------
 8 files changed, 1 insertion(+), 57 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index f5bd6cd4b3bc..36b3a30ba0e5 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -22,23 +22,6 @@ static inline int cpu_to_node(int cpu)
 	return node;
 }
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	cpumask_t node_cpu_mask = CPU_MASK_NONE;
-	int cpu;
-
-	for_each_online_cpu(cpu) {
-		if (cpu_to_node(cpu) == node)
-			cpu_set(cpu, node_cpu_mask);
-	}
-
-#ifdef DEBUG_NUMA
-	printk("node %d: cpu_mask: %016lx\n", node, node_cpu_mask);
-#endif
-
-	return node_cpu_mask;
-}
-
 extern struct cpumask node_to_cpumask_map[];
 /* FIXME: This is dumb, recalculating every time.  But simple. */
 static const struct cpumask *cpumask_of_node(int node)
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index d0141fbf51d0..e85da7f1db56 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -33,7 +33,6 @@
 /*
  * Returns a bitmask of CPUs on Node 'node'.
  */
-#define node_to_cpumask(node) (node_to_cpu_mask[node])
 #define cpumask_of_node(node) (&node_to_cpu_mask[node])
 
 /*
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 697244a7d39e..f6837422fe65 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -24,7 +24,6 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
 
 #define cpu_to_node(cpu)	(sn_cpu_info[(cpu)].p_nodeid)
 #define parent_node(node)	(node)
-#define node_to_cpumask(node)	(hub_data(node)->h_cpus)
 #define cpumask_of_node(node)	(&hub_data(node)->h_cpus)
 struct pci_bus;
 extern int pcibus_to_node(struct pci_bus *);
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 060d853d7b35..f61c164d1e67 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -421,7 +421,7 @@ static void __init node_mem_init(cnodeid_t node)
 
 /*
  * A node with nothing.  We use it to avoid any special casing in
- * node_to_cpumask
+ * cpumask_of_node
  */
 static struct node_data null_node = {
 	.hub = {
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 9a3300d6a27a..829bf3c9b689 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -17,11 +17,6 @@ static inline int cpu_to_node(int cpu)
 
 #define parent_node(node)	(node)
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return numa_cpumask_lookup_table[node];
-}
-
 #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
 
 int of_node_to_nid(struct device_node *device);
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index f8c40cc65054..65e7bd2f2240 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -31,7 +31,6 @@
 #define cpu_to_node(cpu)	((void)(cpu),0)
 #define parent_node(node)	((void)(node),0)
 
-#define node_to_cpumask(node)	((void)node, cpu_online_map)
 #define cpumask_of_node(node)	((void)node, cpu_online_mask)
 
 #define pcibus_to_node(bus)	((void)(bus), -1)
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 26cd25c08399..75752e106f47 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -12,22 +12,8 @@ static inline int cpu_to_node(int cpu)
 
 #define parent_node(node)	(node)
 
-static inline cpumask_t node_to_cpumask(int node)
-{
-	return numa_cpumask_lookup_table[node];
-}
 #define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
 
-/*
- * Returns a pointer to the cpumask of CPUs on Node 'node'.
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#define node_to_cpumask_ptr(v, node)		\
-		cpumask_t *v = &(numa_cpumask_lookup_table[node])
-
-#define node_to_cpumask_ptr_next(v, node)	\
-			   v = &(numa_cpumask_lookup_table[node])
-
 struct pci_bus;
 #ifdef CONFIG_PCI
 extern int pcibus_to_node(struct pci_bus *pbus);
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 88bada2ebc4b..510df36dd5d4 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -37,9 +37,6 @@
 #ifndef parent_node
 #define parent_node(node)	((void)(node),0)
 #endif
-#ifndef node_to_cpumask
-#define node_to_cpumask(node)	((void)node, cpu_online_map)
-#endif
 #ifndef cpumask_of_node
 #define cpumask_of_node(node)	((void)node, cpu_online_mask)
 #endif
@@ -55,18 +52,4 @@
 
 #endif	/* CONFIG_NUMA */
 
-/*
- * returns pointer to cpumask for specified node
- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
- */
-#ifndef node_to_cpumask_ptr
-
-#define	node_to_cpumask_ptr(v, node) 					\
-		cpumask_t _##v = node_to_cpumask(node);			\
-		const cpumask_t *v = &_##v
-
-#define node_to_cpumask_ptr_next(v, node)				\
-			  _##v = node_to_cpumask(node)
-#endif
-
 #endif /* _ASM_GENERIC_TOPOLOGY_H */
-- 
cgit v1.2.3


From a0219d948dd712561817b0d7c95fd2f10b698203 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:35 -0600
Subject: cpumask: remove dangerous CPU_MASK_ALL_PTR

(Thanks to Al Viro for reminding me of this, via Ingo)

CPU_MASK_ALL is the (deprecated) "all bits set" cpumask, defined as so:

	#define CPU_MASK_ALL (cpumask_t) { { ... } }

Taking the address of such a temporary is questionable at best,
unfortunately 321a8e9d (cpumask: add CPU_MASK_ALL_PTR macro) added
CPU_MASK_ALL_PTR:

	#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)

Which formalizes this practice.  One day gcc could bite us over this
usage (though we seem to have gotten away with it so far).

Now all callers are removed, we kill it.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Mike Travis <travis@sgi.com>
---
 include/linux/cpumask.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 9b1d458aac6e..c0ab3588129d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -324,8 +324,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
 } }
 
-#define CPU_MASK_ALL_PTR	(&CPU_MASK_ALL)
-
 #else
 
 #define CPU_MASK_ALL							\
@@ -336,7 +334,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 
 /* cpu_mask_all is in init/main.c */
 extern cpumask_t cpu_mask_all;
-#define CPU_MASK_ALL_PTR	(&cpu_mask_all)
 
 #endif
 
-- 
cgit v1.2.3


From 72d78d05cbaa69f2a32f5f9d65a4551ba0da571f Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:36 -0600
Subject: cpumask: remove unused cpu_mask_all

It's only defined for NR_CPUS > BITS_PER_LONG; cpu_all_mask is always
defined (and const).

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/cpumask.h | 3 ---
 init/main.c             | 5 -----
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c0ab3588129d..dbb8367ecf56 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -332,9 +332,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
 } }
 
-/* cpu_mask_all is in init/main.c */
-extern cpumask_t cpu_mask_all;
-
 #endif
 
 #define CPU_MASK_NONE							\
diff --git a/init/main.c b/init/main.c
index 6107223124e4..51695cee1864 100644
--- a/init/main.c
+++ b/init/main.c
@@ -360,11 +360,6 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 
 #else
 
-#if NR_CPUS > BITS_PER_LONG
-cpumask_t cpu_mask_all __read_mostly = CPU_MASK_ALL;
-EXPORT_SYMBOL(cpu_mask_all);
-#endif
-
 /* Setup number of possible processor ids */
 int nr_cpu_ids __read_mostly = NR_CPUS;
 EXPORT_SYMBOL(nr_cpu_ids);
-- 
cgit v1.2.3


From ef79f8e191722dbc1fc33bdfc448f572266c37e9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:37 -0600
Subject: cpumask: remove unused mask field from struct irqaction.

Up until 1.1.83, the primitive human tribes used struct sigaction for
interrupts.  The sa_mask field was overloaded to hold a pointer to the
name.

When someone created the new "struct irqaction" they carried across
the "mask" field as a kind of ancestor worship: the fact that it was
unused makes clear its spiritual significance.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/interrupt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 8e9e151f811e..894ed7180bff 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -97,7 +97,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
 struct irqaction {
 	irq_handler_t handler;
 	unsigned long flags;
-	cpumask_t mask;
 	const char *name;
 	void *dev_id;
 	struct irqaction *next;
-- 
cgit v1.2.3


From 144e2ce6115c0a1ee4cb5c935360ea4e2966b0ce Mon Sep 17 00:00:00 2001
From: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
Date: Mon, 15 Jun 2009 12:16:54 +0900
Subject: cpumask: Remove mask field from comments

By 7be23e278f, mask field was deleted by irqaction. However, it was not
deleted from comment.

Signed-off-by: Nobuhiro Iwamatsu <iwamatsu.nobuhiro@renesas.com>
CC: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/interrupt.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 894ed7180bff..b78cf8194957 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -84,7 +84,6 @@ typedef irqreturn_t (*irq_handler_t)(int, void *);
  * struct irqaction - per interrupt action descriptor
  * @handler:	interrupt handler function
  * @flags:	flags (see IRQF_* above)
- * @mask:	no comment as it is useless and about to be removed
  * @name:	name of the device
  * @dev_id:	cookie to identify the device
  * @next:	pointer to the next irqaction for shared interrupts
-- 
cgit v1.2.3


From e0ad955680878998ff7dc51ce06ddad12260423a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:38 -0600
Subject: cpumask: don't define set_cpus_allowed() if CONFIG_CPUMASK_OFFSTACK=y

You're not supposed to pass cpumasks on the stack in that case.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/sched.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index cbf2a3b46280..848d1f20086e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1817,10 +1817,13 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p,
 	return 0;
 }
 #endif
+
+#ifndef CONFIG_CPUMASK_OFFSTACK
 static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
 {
 	return set_cpus_allowed_ptr(p, &new_mask);
 }
+#endif
 
 /*
  * Architectures can set this to 1 if they have specified
-- 
cgit v1.2.3


From fe71a3c7dc8cfe0f239c04b4fc6501f4aa56aa0a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:40 -0600
Subject: cpumask: remove the deprecated smp_call_function_mask()

Everyone is now using smp_call_function_many().

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/smp.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9e3d8af09207..39c64bae776d 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -73,15 +73,6 @@ int smp_call_function(void(*func)(void *info), void *info, int wait);
 void smp_call_function_many(const struct cpumask *mask,
 			    void (*func)(void *info), void *info, bool wait);
 
-/* Deprecated: Use smp_call_function_many which takes a pointer to the mask. */
-static inline int
-smp_call_function_mask(cpumask_t mask, void(*func)(void *info), void *info,
-		       int wait)
-{
-	smp_call_function_many(&mask, func, info, wait);
-	return 0;
-}
-
 void __smp_call_function_single(int cpuid, struct call_single_data *data,
 				int wait);
 
@@ -144,8 +135,6 @@ static inline int up_smp_call_function(void (*func)(void *), void *info)
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()			1
 #define smp_prepare_boot_cpu()			do {} while (0)
-#define smp_call_function_mask(mask, func, info, wait) \
-			(up_smp_call_function(func, info))
 #define smp_call_function_many(mask, func, info, wait) \
 			(up_smp_call_function(func, info))
 static inline void init_call_single_data(void)
-- 
cgit v1.2.3


From 6f401420e2822c24c36e6e1c657f6e7f7f777a93 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:40 -0600
Subject: cpumask: remove obsolete topology_core_siblings and
 topology_thread_siblings: core

There were replaced by topology_core_cpumask and topology_thread_cpumask.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/topology.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 809b26c07090..fc0bf3edeb67 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -211,12 +211,6 @@ int arch_update_cpu_topology(void);
 #ifndef topology_core_id
 #define topology_core_id(cpu)			((void)(cpu), 0)
 #endif
-#ifndef topology_thread_siblings
-#define topology_thread_siblings(cpu)		cpumask_of_cpu(cpu)
-#endif
-#ifndef topology_core_siblings
-#define topology_core_siblings(cpu)		cpumask_of_cpu(cpu)
-#endif
 #ifndef topology_thread_cpumask
 #define topology_thread_cpumask(cpu)		cpumask_of(cpu)
 #endif
-- 
cgit v1.2.3


From 4b805b17382c11a8b1c9bb8053ce9d1dcde0701a Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:52 -0600
Subject: cpumask: remove unused deprecated functions, avoid accusations of
 insanity

We're not forcing removal of the old cpu_ functions, but we might as
well delete the now-unused ones.

Especially CPUMASK_ALLOC and friends.  I actually got a phone call (!)
from a hacker who thought I had introduced them as the new cpumask
API.  He seemed bewildered that I had lost all taste.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: benh@kernel.crashing.org
---
 include/linux/cpumask.h | 112 +-----------------------------------------------
 1 file changed, 1 insertion(+), 111 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index dbb8367ecf56..e162d13c65ab 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -15,10 +15,6 @@
  * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
  * For details of cpulist_scnprintf() and cpulist_parse(), see
  * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
- * For details of cpus_remap(), see bitmap_remap in lib/bitmap.c.
- * For details of cpus_onto(), see bitmap_onto in lib/bitmap.c.
- * For details of cpus_fold(), see bitmap_fold in lib/bitmap.c.
  *
  * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  * Note: The alternate operations with the suffix "_nr" are used
@@ -47,22 +43,17 @@
  * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
  * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
  * int cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
- * void cpus_complement(dst, src)	dst = ~src
  *
  * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
  * int cpus_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
  * int cpus_subset(mask1, mask2)	Is mask1 a subset of mask2?
  * int cpus_empty(mask)			Is mask empty (no bits sets)?
- * int cpus_full(mask)			Is mask full (all bits sets)?
  * int cpus_weight(mask)		Hamming weigh - number of set bits
- * int cpus_weight_nr(mask)		Same using nr_cpu_ids instead of NR_CPUS
  *
- * void cpus_shift_right(dst, src, n)	Shift right
  * void cpus_shift_left(dst, src, n)	Shift left
  *
  * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
  * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
- * int next_cpu_nr(cpu, mask)		Next cpu past 'cpu', or nr_cpu_ids
  *
  * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
  *					(can be used as an lvalue)
@@ -70,45 +61,10 @@
  * CPU_MASK_NONE			Initializer - no bits set
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
  *
- * CPUMASK_ALLOC kmalloc's a structure that is a composite of many cpumask_t
- * variables, and CPUMASK_PTR provides pointers to each field.
- *
- * The structure should be defined something like this:
- * struct my_cpumasks {
- *	cpumask_t mask1;
- *	cpumask_t mask2;
- * };
- *
- * Usage is then:
- *	CPUMASK_ALLOC(my_cpumasks);
- *	CPUMASK_PTR(mask1, my_cpumasks);
- *	CPUMASK_PTR(mask2, my_cpumasks);
- *
- *	--- DO NOT reference cpumask_t pointers until this check ---
- *	if (my_cpumasks == NULL)
- *		"kmalloc failed"...
- *
- * References are now pointers to the cpumask_t variables (*mask1, ...)
- *
- *if NR_CPUS > BITS_PER_LONG
- *   CPUMASK_ALLOC(m)			Declares and allocates struct m *m =
- *						kmalloc(sizeof(*m), GFP_KERNEL)
- *   CPUMASK_FREE(m)			Macro for kfree(m)
- *else
- *   CPUMASK_ALLOC(m)			Declares struct m _m, *m = &_m
- *   CPUMASK_FREE(m)			Nop
- *endif
- *   CPUMASK_PTR(v, m)			Declares cpumask_t *v = &(m->v)
- * ------------------------------------------------------------------------
- *
  * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
  * int cpumask_parse_user(ubuf, ulen, mask)	Parse ascii string as cpumask
  * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
  * int cpulist_parse(buf, map)		Parse ascii string as cpulist
- * int cpu_remap(oldbit, old, new)	newbit = map(old, new)(oldbit)
- * void cpus_remap(dst, src, old, new)	*dst = map(old, new)(src)
- * void cpus_onto(dst, orig, relmap)	*dst = orig relative to relmap
- * void cpus_fold(dst, orig, sz)	dst bits = orig bits mod sz
  *
  * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask using NR_CPUS
  * for_each_cpu_mask_nr(cpu, mask)	for-loop cpu over mask using nr_cpu_ids
@@ -142,7 +98,6 @@
 #include <linux/bitmap.h>
 
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
-extern cpumask_t _unused_cpumask_arg_;
 
 #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
@@ -207,13 +162,6 @@ static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
 	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
 }
 
-#define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS)
-static inline void __cpus_complement(cpumask_t *dstp,
-					const cpumask_t *srcp, int nbits)
-{
-	bitmap_complement(dstp->bits, srcp->bits, nbits);
-}
-
 #define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
 static inline int __cpus_equal(const cpumask_t *src1p,
 					const cpumask_t *src2p, int nbits)
@@ -241,26 +189,12 @@ static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
 	return bitmap_empty(srcp->bits, nbits);
 }
 
-#define cpus_full(cpumask) __cpus_full(&(cpumask), NR_CPUS)
-static inline int __cpus_full(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_full(srcp->bits, nbits);
-}
-
 #define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
 static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
 {
 	return bitmap_weight(srcp->bits, nbits);
 }
 
-#define cpus_shift_right(dst, src, n) \
-			__cpus_shift_right(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_right(cpumask_t *dstp,
-					const cpumask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_right(dstp->bits, srcp->bits, n, nbits);
-}
-
 #define cpus_shift_left(dst, src, n) \
 			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
 static inline void __cpus_shift_left(cpumask_t *dstp,
@@ -346,46 +280,6 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 
 #define cpus_addr(src) ((src).bits)
 
-#if NR_CPUS > BITS_PER_LONG
-#define	CPUMASK_ALLOC(m)	struct m *m = kmalloc(sizeof(*m), GFP_KERNEL)
-#define	CPUMASK_FREE(m)		kfree(m)
-#else
-#define	CPUMASK_ALLOC(m)	struct m _m, *m = &_m
-#define	CPUMASK_FREE(m)
-#endif
-#define	CPUMASK_PTR(v, m) 	cpumask_t *v = &(m->v)
-
-#define cpu_remap(oldbit, old, new) \
-		__cpu_remap((oldbit), &(old), &(new), NR_CPUS)
-static inline int __cpu_remap(int oldbit,
-		const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-	return bitmap_bitremap(oldbit, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_remap(dst, src, old, new) \
-		__cpus_remap(&(dst), &(src), &(old), &(new), NR_CPUS)
-static inline void __cpus_remap(cpumask_t *dstp, const cpumask_t *srcp,
-		const cpumask_t *oldp, const cpumask_t *newp, int nbits)
-{
-	bitmap_remap(dstp->bits, srcp->bits, oldp->bits, newp->bits, nbits);
-}
-
-#define cpus_onto(dst, orig, relmap) \
-		__cpus_onto(&(dst), &(orig), &(relmap), NR_CPUS)
-static inline void __cpus_onto(cpumask_t *dstp, const cpumask_t *origp,
-		const cpumask_t *relmapp, int nbits)
-{
-	bitmap_onto(dstp->bits, origp->bits, relmapp->bits, nbits);
-}
-
-#define cpus_fold(dst, orig, sz) \
-		__cpus_fold(&(dst), &(orig), sz, NR_CPUS)
-static inline void __cpus_fold(cpumask_t *dstp, const cpumask_t *origp,
-		int sz, int nbits)
-{
-	bitmap_fold(dstp->bits, origp->bits, sz, nbits);
-}
 #endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 
 #if NR_CPUS == 1
@@ -419,18 +313,14 @@ int __any_online_cpu(const cpumask_t *mask);
 #ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
 #if NR_CPUS <= 64
 
-#define next_cpu_nr(n, src)		next_cpu(n, src)
-#define cpus_weight_nr(cpumask)		cpus_weight(cpumask)
 #define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
 
 #else /* NR_CPUS > 64 */
 
 int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define next_cpu_nr(n, src)	__next_cpu_nr((n), &(src))
-#define cpus_weight_nr(cpumask)	__cpus_weight(&(cpumask), nr_cpu_ids)
 #define for_each_cpu_mask_nr(cpu, mask)			\
 	for ((cpu) = -1;				\
-		(cpu) = next_cpu_nr((cpu), (mask)),	\
+		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
 		(cpu) < nr_cpu_ids; )
 
 #endif /* NR_CPUS > 64 */
-- 
cgit v1.2.3


From 6ba2ef7baac23a5d9bb85e28b882d16b439a2293 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Thu, 24 Sep 2009 09:34:53 -0600
Subject: cpumask: Move deprecated functions to end of header.

The new ones have pretty kerneldoc.  Move the old ones to the end to
avoid confusing people.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: benh@kernel.crashing.org
---
 include/linux/cpumask.h | 593 ++++++++++++++++++++----------------------------
 1 file changed, 252 insertions(+), 341 deletions(-)

(limited to 'include')

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index e162d13c65ab..789cf5f920ce 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -3,328 +3,37 @@
 
 /*
  * Cpumasks provide a bitmap suitable for representing the
- * set of CPU's in a system, one bit position per CPU number.
- *
- * The new cpumask_ ops take a "struct cpumask *"; the old ones
- * use cpumask_t.
- *
- * See detailed comments in the file linux/bitmap.h describing the
- * data type on which these cpumasks are based.
- *
- * For details of cpumask_scnprintf() and cpumask_parse_user(),
- * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
- * For details of cpulist_scnprintf() and cpulist_parse(), see
- * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
- *
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- * Note: The alternate operations with the suffix "_nr" are used
- *       to limit the range of the loop to nr_cpu_ids instead of
- *       NR_CPUS when NR_CPUS > 64 for performance reasons.
- *       If NR_CPUS is <= 64 then most assembler bitmask
- *       operators execute faster with a constant range, so
- *       the operator will continue to use NR_CPUS.
- *
- *       Another consideration is that nr_cpu_ids is initialized
- *       to NR_CPUS and isn't lowered until the possible cpus are
- *       discovered (including any disabled cpus).  So early uses
- *       will span the entire range of NR_CPUS.
- * . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
- *
- * The obsolescent cpumask operations are:
- *
- * void cpu_set(cpu, mask)		turn on bit 'cpu' in mask
- * void cpu_clear(cpu, mask)		turn off bit 'cpu' in mask
- * void cpus_setall(mask)		set all bits
- * void cpus_clear(mask)		clear all bits
- * int cpu_isset(cpu, mask)		true iff bit 'cpu' set in mask
- * int cpu_test_and_set(cpu, mask)	test and set bit 'cpu' in mask
- *
- * int cpus_and(dst, src1, src2)	dst = src1 & src2  [intersection]
- * void cpus_or(dst, src1, src2)	dst = src1 | src2  [union]
- * void cpus_xor(dst, src1, src2)	dst = src1 ^ src2
- * int cpus_andnot(dst, src1, src2)	dst = src1 & ~src2
- *
- * int cpus_equal(mask1, mask2)		Does mask1 == mask2?
- * int cpus_intersects(mask1, mask2)	Do mask1 and mask2 intersect?
- * int cpus_subset(mask1, mask2)	Is mask1 a subset of mask2?
- * int cpus_empty(mask)			Is mask empty (no bits sets)?
- * int cpus_weight(mask)		Hamming weigh - number of set bits
- *
- * void cpus_shift_left(dst, src, n)	Shift left
- *
- * int first_cpu(mask)			Number lowest set bit, or NR_CPUS
- * int next_cpu(cpu, mask)		Next cpu past 'cpu', or NR_CPUS
- *
- * cpumask_t cpumask_of_cpu(cpu)	Return cpumask with bit 'cpu' set
- *					(can be used as an lvalue)
- * CPU_MASK_ALL				Initializer - all bits set
- * CPU_MASK_NONE			Initializer - no bits set
- * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
- *
- * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse_user(ubuf, ulen, mask)	Parse ascii string as cpumask
- * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
- * int cpulist_parse(buf, map)		Parse ascii string as cpulist
- *
- * for_each_cpu_mask(cpu, mask)		for-loop cpu over mask using NR_CPUS
- * for_each_cpu_mask_nr(cpu, mask)	for-loop cpu over mask using nr_cpu_ids
- *
- * int num_online_cpus()		Number of online CPUs
- * int num_possible_cpus()		Number of all possible CPUs
- * int num_present_cpus()		Number of present CPUs
- *
- * int cpu_online(cpu)			Is some cpu online?
- * int cpu_possible(cpu)		Is some cpu possible?
- * int cpu_present(cpu)			Is some cpu present (can schedule)?
- *
- * int any_online_cpu(mask)		First online cpu in mask
- *
- * for_each_possible_cpu(cpu)		for-loop cpu over cpu_possible_map
- * for_each_online_cpu(cpu)		for-loop cpu over cpu_online_map
- * for_each_present_cpu(cpu)		for-loop cpu over cpu_present_map
- *
- * Subtlety:
- * 1) The 'type-checked' form of cpu_isset() causes gcc (3.3.2, anyway)
- *    to generate slightly worse code.  Note for example the additional
- *    40 lines of assembly code compiling the "for each possible cpu"
- *    loops buried in the disk_stat_read() macros calls when compiling
- *    drivers/block/genhd.c (arch i386, CONFIG_SMP=y).  So use a simple
- *    one-line #define for cpu_isset(), instead of wrapping an inline
- *    inside a macro, the way we do the other calls.
+ * set of CPU's in a system, one bit position per CPU number.  In general,
+ * only nr_cpu_ids (<= NR_CPUS) bits are valid.
  */
-
 #include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/bitmap.h>
 
 typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;
 
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
-static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
-{
-	set_bit(cpu, dstp->bits);
-}
-
-#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
-static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
-{
-	clear_bit(cpu, dstp->bits);
-}
-
-#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
-static inline void __cpus_setall(cpumask_t *dstp, int nbits)
-{
-	bitmap_fill(dstp->bits, nbits);
-}
-
-#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
-static inline void __cpus_clear(cpumask_t *dstp, int nbits)
-{
-	bitmap_zero(dstp->bits, nbits);
-}
-
-/* No static inline type checking - see Subtlety (1) above. */
-#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
-
-#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
-static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
-{
-	return test_and_set_bit(cpu, addr->bits);
-}
-
-#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
-static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_andnot(dst, src1, src2) \
-				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
-static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_equal(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_equal(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_intersects(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
-static inline int __cpus_subset(const cpumask_t *src1p,
-					const cpumask_t *src2p, int nbits)
-{
-	return bitmap_subset(src1p->bits, src2p->bits, nbits);
-}
-
-#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
-static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_empty(srcp->bits, nbits);
-}
-
-#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
-static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
-{
-	return bitmap_weight(srcp->bits, nbits);
-}
-
-#define cpus_shift_left(dst, src, n) \
-			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
-static inline void __cpus_shift_left(cpumask_t *dstp,
-					const cpumask_t *srcp, int n, int nbits)
-{
-	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
-}
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-
 /**
- * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
- * @bitmap: the bitmap
+ * cpumask_bits - get the bits in a cpumask
+ * @maskp: the struct cpumask *
  *
- * There are a few places where cpumask_var_t isn't appropriate and
- * static cpumasks must be used (eg. very early boot), yet we don't
- * expose the definition of 'struct cpumask'.
- *
- * This does the conversion, and can be used as a constant initializer.
+ * You should only assume nr_cpu_ids bits of this mask are valid.  This is
+ * a macro so it's const-correct.
  */
-#define to_cpumask(bitmap)						\
-	((struct cpumask *)(1 ? (bitmap)				\
-			    : (void *)sizeof(__check_is_bitmap(bitmap))))
-
-static inline int __check_is_bitmap(const unsigned long *bitmap)
-{
-	return 1;
-}
-
-/*
- * Special-case data structure for "single bit set only" constant CPU masks.
- *
- * We pre-generate all the 64 (or 32) possible bit positions, with enough
- * padding to the left and the right, and return the constant pointer
- * appropriately offset.
- */
-extern const unsigned long
-	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
-
-static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
-{
-	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
-	p -= cpu / BITS_PER_LONG;
-	return to_cpumask(p);
-}
-
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-/*
- * In cases where we take the address of the cpumask immediately,
- * gcc optimizes it out (it's a constant) and there's no huge stack
- * variable created:
- */
-#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
-
-
-#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
-
-#if NR_CPUS <= BITS_PER_LONG
-
-#define CPU_MASK_ALL							\
-(cpumask_t) { {								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
-} }
-
-#else
-
-#define CPU_MASK_ALL							\
-(cpumask_t) { {								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
-} }
-
-#endif
-
-#define CPU_MASK_NONE							\
-(cpumask_t) { {								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL				\
-} }
-
-#define CPU_MASK_CPU0							\
-(cpumask_t) { {								\
-	[0] =  1UL							\
-} }
-
-#define cpus_addr(src) ((src).bits)
-
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#define cpumask_bits(maskp) ((maskp)->bits)
 
 #if NR_CPUS == 1
-
 #define nr_cpu_ids		1
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#define first_cpu(src)		({ (void)(src); 0; })
-#define next_cpu(n, src)	({ (void)(src); 1; })
-#define any_online_cpu(mask)	0
-#define for_each_cpu_mask(cpu, mask)	\
-	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
-#else /* NR_CPUS > 1 */
-
+#else
 extern int nr_cpu_ids;
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-int __first_cpu(const cpumask_t *srcp);
-int __next_cpu(int n, const cpumask_t *srcp);
-int __any_online_cpu(const cpumask_t *mask);
-
-#define first_cpu(src)		__first_cpu(&(src))
-#define next_cpu(n, src)	__next_cpu((n), &(src))
-#define any_online_cpu(mask) __any_online_cpu(&(mask))
-#define for_each_cpu_mask(cpu, mask)			\
-	for ((cpu) = -1;				\
-		(cpu) = next_cpu((cpu), (mask)),	\
-		(cpu) < NR_CPUS; )
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
 #endif
 
-#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
-#if NR_CPUS <= 64
-
-#define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
-
-#else /* NR_CPUS > 64 */
-
-int __next_cpu_nr(int n, const cpumask_t *srcp);
-#define for_each_cpu_mask_nr(cpu, mask)			\
-	for ((cpu) = -1;				\
-		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
-		(cpu) < nr_cpu_ids; )
-
-#endif /* NR_CPUS > 64 */
-#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+#ifdef CONFIG_CPUMASK_OFFSTACK
+/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
+ * not all bits may be allocated. */
+#define nr_cpumask_bits	nr_cpu_ids
+#else
+#define nr_cpumask_bits	NR_CPUS
+#endif
 
 /*
  * The following particular system cpumasks and operations manage
@@ -371,12 +80,6 @@ extern const struct cpumask *const cpu_online_mask;
 extern const struct cpumask *const cpu_present_mask;
 extern const struct cpumask *const cpu_active_mask;
 
-/* These strip const, as traditionally they weren't const. */
-#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
-#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
-#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
-#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
-
 #if NR_CPUS > 1
 #define num_online_cpus()	cpumask_weight(cpu_online_mask)
 #define num_possible_cpus()	cpumask_weight(cpu_possible_mask)
@@ -395,35 +98,6 @@ extern const struct cpumask *const cpu_active_mask;
 #define cpu_active(cpu)		((cpu) == 0)
 #endif
 
-#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
-
-/* These are the new versions of the cpumask operators: passed by pointer.
- * The older versions will be implemented in terms of these, then deleted. */
-#define cpumask_bits(maskp) ((maskp)->bits)
-
-#if NR_CPUS <= BITS_PER_LONG
-#define CPU_BITS_ALL						\
-{								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
-}
-
-#else /* NR_CPUS > BITS_PER_LONG */
-
-#define CPU_BITS_ALL						\
-{								\
-	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
-	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
-}
-#endif /* NR_CPUS > BITS_PER_LONG */
-
-#ifdef CONFIG_CPUMASK_OFFSTACK
-/* Assuming NR_CPUS is huge, a runtime limit is more efficient.  Also,
- * not all bits may be allocated. */
-#define nr_cpumask_bits	nr_cpu_ids
-#else
-#define nr_cpumask_bits	NR_CPUS
-#endif
-
 /* verify cpu argument to cpumask_* operators */
 static inline unsigned int cpumask_check(unsigned int cpu)
 {
@@ -984,4 +658,241 @@ void set_cpu_active(unsigned int cpu, bool active);
 void init_cpu_present(const struct cpumask *src);
 void init_cpu_possible(const struct cpumask *src);
 void init_cpu_online(const struct cpumask *src);
+
+/**
+ * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
+ * @bitmap: the bitmap
+ *
+ * There are a few places where cpumask_var_t isn't appropriate and
+ * static cpumasks must be used (eg. very early boot), yet we don't
+ * expose the definition of 'struct cpumask'.
+ *
+ * This does the conversion, and can be used as a constant initializer.
+ */
+#define to_cpumask(bitmap)						\
+	((struct cpumask *)(1 ? (bitmap)				\
+			    : (void *)sizeof(__check_is_bitmap(bitmap))))
+
+static inline int __check_is_bitmap(const unsigned long *bitmap)
+{
+	return 1;
+}
+
+/*
+ * Special-case data structure for "single bit set only" constant CPU masks.
+ *
+ * We pre-generate all the 64 (or 32) possible bit positions, with enough
+ * padding to the left and the right, and return the constant pointer
+ * appropriately offset.
+ */
+extern const unsigned long
+	cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)];
+
+static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
+{
+	const unsigned long *p = cpu_bit_bitmap[1 + cpu % BITS_PER_LONG];
+	p -= cpu / BITS_PER_LONG;
+	return to_cpumask(p);
+}
+
+#define cpu_is_offline(cpu)	unlikely(!cpu_online(cpu))
+
+#if NR_CPUS <= BITS_PER_LONG
+#define CPU_BITS_ALL						\
+{								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD	\
+}
+
+#else /* NR_CPUS > BITS_PER_LONG */
+
+#define CPU_BITS_ALL						\
+{								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD		\
+}
+#endif /* NR_CPUS > BITS_PER_LONG */
+
+/*
+ *
+ * From here down, all obsolete.  Use cpumask_ variants!
+ *
+ */
+#ifndef CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS
+/* These strip const, as traditionally they weren't const. */
+#define cpu_possible_map	(*(cpumask_t *)cpu_possible_mask)
+#define cpu_online_map		(*(cpumask_t *)cpu_online_mask)
+#define cpu_present_map		(*(cpumask_t *)cpu_present_mask)
+#define cpu_active_map		(*(cpumask_t *)cpu_active_mask)
+
+#define cpumask_of_cpu(cpu) (*get_cpu_mask(cpu))
+
+#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)
+
+#if NR_CPUS <= BITS_PER_LONG
+
+#define CPU_MASK_ALL							\
+(cpumask_t) { {								\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
+} }
+
+#else
+
+#define CPU_MASK_ALL							\
+(cpumask_t) { {								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
+	[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD			\
+} }
+
+#endif
+
+#define CPU_MASK_NONE							\
+(cpumask_t) { {								\
+	[0 ... BITS_TO_LONGS(NR_CPUS)-1] =  0UL				\
+} }
+
+#define CPU_MASK_CPU0							\
+(cpumask_t) { {								\
+	[0] =  1UL							\
+} }
+
+#if NR_CPUS == 1
+#define first_cpu(src)		({ (void)(src); 0; })
+#define next_cpu(n, src)	({ (void)(src); 1; })
+#define any_online_cpu(mask)	0
+#define for_each_cpu_mask(cpu, mask)	\
+	for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
+#else /* NR_CPUS > 1 */
+int __first_cpu(const cpumask_t *srcp);
+int __next_cpu(int n, const cpumask_t *srcp);
+int __any_online_cpu(const cpumask_t *mask);
+
+#define first_cpu(src)		__first_cpu(&(src))
+#define next_cpu(n, src)	__next_cpu((n), &(src))
+#define any_online_cpu(mask) __any_online_cpu(&(mask))
+#define for_each_cpu_mask(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = next_cpu((cpu), (mask)),	\
+		(cpu) < NR_CPUS; )
+#endif /* SMP */
+
+#if NR_CPUS <= 64
+
+#define for_each_cpu_mask_nr(cpu, mask)	for_each_cpu_mask(cpu, mask)
+
+#else /* NR_CPUS > 64 */
+
+int __next_cpu_nr(int n, const cpumask_t *srcp);
+#define for_each_cpu_mask_nr(cpu, mask)			\
+	for ((cpu) = -1;				\
+		(cpu) = __next_cpu_nr((cpu), &(mask)),	\
+		(cpu) < nr_cpu_ids; )
+
+#endif /* NR_CPUS > 64 */
+
+#define cpus_addr(src) ((src).bits)
+
+#define cpu_set(cpu, dst) __cpu_set((cpu), &(dst))
+static inline void __cpu_set(int cpu, volatile cpumask_t *dstp)
+{
+	set_bit(cpu, dstp->bits);
+}
+
+#define cpu_clear(cpu, dst) __cpu_clear((cpu), &(dst))
+static inline void __cpu_clear(int cpu, volatile cpumask_t *dstp)
+{
+	clear_bit(cpu, dstp->bits);
+}
+
+#define cpus_setall(dst) __cpus_setall(&(dst), NR_CPUS)
+static inline void __cpus_setall(cpumask_t *dstp, int nbits)
+{
+	bitmap_fill(dstp->bits, nbits);
+}
+
+#define cpus_clear(dst) __cpus_clear(&(dst), NR_CPUS)
+static inline void __cpus_clear(cpumask_t *dstp, int nbits)
+{
+	bitmap_zero(dstp->bits, nbits);
+}
+
+/* No static inline type checking - see Subtlety (1) above. */
+#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)
+
+#define cpu_test_and_set(cpu, cpumask) __cpu_test_and_set((cpu), &(cpumask))
+static inline int __cpu_test_and_set(int cpu, cpumask_t *addr)
+{
+	return test_and_set_bit(cpu, addr->bits);
+}
+
+#define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_or(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	bitmap_or(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_xor(dst, src1, src2) __cpus_xor(&(dst), &(src1), &(src2), NR_CPUS)
+static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	bitmap_xor(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_andnot(dst, src1, src2) \
+				__cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS)
+static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_equal(src1, src2) __cpus_equal(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_equal(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_equal(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_intersects(src1, src2) __cpus_intersects(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_intersects(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_intersects(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_subset(src1, src2) __cpus_subset(&(src1), &(src2), NR_CPUS)
+static inline int __cpus_subset(const cpumask_t *src1p,
+					const cpumask_t *src2p, int nbits)
+{
+	return bitmap_subset(src1p->bits, src2p->bits, nbits);
+}
+
+#define cpus_empty(src) __cpus_empty(&(src), NR_CPUS)
+static inline int __cpus_empty(const cpumask_t *srcp, int nbits)
+{
+	return bitmap_empty(srcp->bits, nbits);
+}
+
+#define cpus_weight(cpumask) __cpus_weight(&(cpumask), NR_CPUS)
+static inline int __cpus_weight(const cpumask_t *srcp, int nbits)
+{
+	return bitmap_weight(srcp->bits, nbits);
+}
+
+#define cpus_shift_left(dst, src, n) \
+			__cpus_shift_left(&(dst), &(src), (n), NR_CPUS)
+static inline void __cpus_shift_left(cpumask_t *dstp,
+					const cpumask_t *srcp, int n, int nbits)
+{
+	bitmap_shift_left(dstp->bits, srcp->bits, n, nbits);
+}
+#endif /* !CONFIG_DISABLE_OBSOLETE_CPUMASK_FUNCTIONS */
+
 #endif /* __LINUX_CPUMASK_H */
-- 
cgit v1.2.3


From 2bcd57ab61e7cabed626226a3771617981c11ce1 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 24 Sep 2009 04:22:25 +0400
Subject: headers: utsname.h redux

* remove asm/atomic.h inclusion from linux/utsname.h --
   not needed after kref conversion
 * remove linux/utsname.h inclusion from files which do not need it

NOTE: it looks like fs/binfmt_elf.c do not need utsname.h, however
due to some personality stuff it _is_ needed -- cowardly leave ELF-related
headers and files alone.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/process.c                     | 1 -
 arch/arm/kernel/sys_arm.c                       | 1 -
 arch/frv/kernel/sys_frv.c                       | 1 -
 arch/h8300/kernel/sys_h8300.c                   | 1 -
 arch/m68k/kernel/sys_m68k.c                     | 1 -
 arch/m68knommu/kernel/sys_m68k.c                | 1 -
 arch/microblaze/kernel/sys_microblaze.c         | 1 -
 arch/mn10300/kernel/sys_mn10300.c               | 1 -
 arch/parisc/kernel/sys_parisc32.c               | 1 -
 arch/powerpc/kernel/setup-common.c              | 1 -
 arch/powerpc/kernel/sys_ppc32.c                 | 1 -
 arch/s390/kernel/compat_linux.c                 | 1 -
 arch/s390/kernel/process.c                      | 1 -
 arch/sh/kernel/sys_sh32.c                       | 1 -
 arch/sh/kernel/sys_sh64.c                       | 1 -
 arch/sparc/kernel/sys_sparc32.c                 | 1 -
 arch/sparc/kernel/systbls.h                     | 3 ++-
 arch/x86/kernel/dumpstack_32.c                  | 1 -
 arch/x86/kernel/dumpstack_64.c                  | 1 -
 arch/x86/kernel/traps.c                         | 1 -
 drivers/media/video/usbvision/usbvision-core.c  | 1 -
 drivers/media/video/usbvision/usbvision-i2c.c   | 1 -
 drivers/media/video/usbvision/usbvision-video.c | 1 -
 drivers/s390/char/zcore.c                       | 1 -
 drivers/usb/gadget/f_loopback.c                 | 1 -
 drivers/usb/gadget/f_obex.c                     | 1 -
 drivers/usb/gadget/f_sourcesink.c               | 1 -
 drivers/usb/gadget/u_audio.c                    | 1 -
 drivers/usb/gadget/u_ether.c                    | 1 -
 fs/gfs2/ops_inode.c                             | 1 -
 fs/lockd/xdr.c                                  | 1 -
 fs/lockd/xdr4.c                                 | 1 -
 fs/nfs/nfs2xdr.c                                | 1 -
 fs/nfs/nfs3proc.c                               | 1 -
 fs/nfs/nfs3xdr.c                                | 1 -
 fs/nfs/nfs4proc.c                               | 1 -
 fs/nfs/nfs4xdr.c                                | 1 -
 fs/nfs/proc.c                                   | 1 -
 fs/nfsd/nfs4idmap.c                             | 1 -
 fs/ocfs2/dlm/dlmast.c                           | 1 -
 fs/ocfs2/dlm/dlmconvert.c                       | 1 -
 fs/ocfs2/dlm/dlmdebug.c                         | 1 -
 fs/ocfs2/dlm/dlmdomain.c                        | 1 -
 fs/ocfs2/dlm/dlmlock.c                          | 1 -
 fs/ocfs2/dlm/dlmmaster.c                        | 1 -
 fs/ocfs2/dlm/dlmrecovery.c                      | 1 -
 fs/ocfs2/dlm/dlmthread.c                        | 1 -
 fs/ocfs2/dlm/dlmunlock.c                        | 1 -
 fs/ocfs2/super.c                                | 1 -
 fs/ocfs2/symlink.c                              | 1 -
 include/linux/utsname.h                         | 1 -
 init/main.c                                     | 1 -
 kernel/power/swap.c                             | 1 -
 kernel/sysctl.c                                 | 1 -
 kernel/uid16.c                                  | 1 -
 net/sunrpc/auth_null.c                          | 1 -
 56 files changed, 2 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 3a2fb7a02db4..289039bb6bb2 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -19,7 +19,6 @@
 #include <linux/ptrace.h>
 #include <linux/slab.h>
 #include <linux/user.h>
-#include <linux/utsname.h>
 #include <linux/time.h>
 #include <linux/major.h>
 #include <linux/stat.h>
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index b3ec641b5cf8..78ecaac65206 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -25,7 +25,6 @@
 #include <linux/mman.h>
 #include <linux/fs.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 #include <linux/uaccess.h>
 
diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
index baadc97f8627..2b6b5289cdcc 100644
--- a/arch/frv/kernel/sys_frv.c
+++ b/arch/frv/kernel/sys_frv.c
@@ -21,7 +21,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/syscalls.h>
 #include <linux/ipc.h>
 
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 2745656dcc52..8cb5d73a0e35 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -17,7 +17,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/fs.h>
 #include <linux/ipc.h>
 
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7f54efaf60bb..7deb402bfc75 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -20,7 +20,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index 700281638629..efdd090778a3 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -17,7 +17,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/ipc.h>
 #include <linux/fs.h>
 
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index b96f1682bb24..07cabed4b947 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -23,7 +23,6 @@
 #include <linux/mman.h>
 #include <linux/sys.h>
 #include <linux/ipc.h>
-#include <linux/utsname.h>
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/err.h>
diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
index 3e52a1054327..8ca5af00334c 100644
--- a/arch/mn10300/kernel/sys_mn10300.c
+++ b/arch/mn10300/kernel/sys_mn10300.c
@@ -19,7 +19,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/tty.h>
 
 #include <asm/uaccess.h>
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index 92a0acaa0d12..561388b17c91 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -18,7 +18,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/time.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1d5570a1e456..74cd1a7d0d4b 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -24,7 +24,6 @@
 #include <linux/seq_file.h>
 #include <linux/ioport.h>
 #include <linux/console.h>
-#include <linux/utsname.h>
 #include <linux/screen_info.h>
 #include <linux/root_dev.h>
 #include <linux/notifier.h>
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 1cc5e9e5da96..b97c2d67f4ac 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -22,7 +22,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 5519cb745106..0debcec23a39 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -24,7 +24,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 59fe6ecc6ed3..5417eb57271a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
-#include <linux/utsname.h>
 #include <linux/tick.h>
 #include <linux/elfcore.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index 63ba12836eae..eb68bfdd86e6 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -9,7 +9,6 @@
 #include <linux/syscalls.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/ipc.h>
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 91fb8445a5a0..287235768bc5 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -23,7 +23,6 @@
 #include <linux/stat.h>
 #include <linux/mman.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/syscalls.h>
 #include <linux/ipc.h>
 #include <asm/uaccess.h>
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index f5000a460c05..04e28b2671c8 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -16,7 +16,6 @@
 #include <linux/signal.h>
 #include <linux/resource.h>
 #include <linux/times.h>
-#include <linux/utsname.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/sem.h>
diff --git a/arch/sparc/kernel/systbls.h b/arch/sparc/kernel/systbls.h
index 15c2d752b2bc..a63c5d2d9849 100644
--- a/arch/sparc/kernel/systbls.h
+++ b/arch/sparc/kernel/systbls.h
@@ -3,10 +3,11 @@
 
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/utsname.h>
 #include <asm/utrap.h>
 #include <asm/signal.h>
 
+struct new_utsname;
+
 extern asmlinkage unsigned long sys_getpagesize(void);
 extern asmlinkage unsigned long sparc_brk(unsigned long brk);
 extern asmlinkage long sparc_pipe(struct pt_regs *regs);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba91c9e..f7dd2a7c3bf4 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/hardirq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a3276766..a071e6be177e 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/hardirq.h>
 #include <linux/kdebug.h>
 #include <linux/module.h>
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9346e102338d..a665c71352b8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
 #include <linux/spinlock.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
-#include <linux/utsname.h>
 #include <linux/kdebug.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-core.c b/drivers/media/video/usbvision/usbvision-core.c
index 6ba16abeebdd..e0f91e4ab653 100644
--- a/drivers/media/video/usbvision/usbvision-core.c
+++ b/drivers/media/video/usbvision/usbvision-core.c
@@ -28,7 +28,6 @@
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/drivers/media/video/usbvision/usbvision-i2c.c b/drivers/media/video/usbvision/usbvision-i2c.c
index f97fd06d5948..c19f51dba2ee 100644
--- a/drivers/media/video/usbvision/usbvision-i2c.c
+++ b/drivers/media/video/usbvision/usbvision-i2c.c
@@ -28,7 +28,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include <linux/ioport.h>
diff --git a/drivers/media/video/usbvision/usbvision-video.c b/drivers/media/video/usbvision/usbvision-video.c
index 90d9b5c0e9a7..a2a50d608a3f 100644
--- a/drivers/media/video/usbvision/usbvision-video.c
+++ b/drivers/media/video/usbvision/usbvision-video.c
@@ -52,7 +52,6 @@
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c
index c431198bdbc4..82daa3c1dc9c 100644
--- a/drivers/s390/char/zcore.c
+++ b/drivers/s390/char/zcore.c
@@ -14,7 +14,6 @@
 
 #include <linux/init.h>
 #include <linux/miscdevice.h>
-#include <linux/utsname.h>
 #include <linux/debugfs.h>
 #include <asm/ipl.h>
 #include <asm/sclp.h>
diff --git a/drivers/usb/gadget/f_loopback.c b/drivers/usb/gadget/f_loopback.c
index eb6ddfc20857..6cb29d3df575 100644
--- a/drivers/usb/gadget/f_loopback.c
+++ b/drivers/usb/gadget/f_loopback.c
@@ -22,7 +22,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "g_zero.h"
diff --git a/drivers/usb/gadget/f_obex.c b/drivers/usb/gadget/f_obex.c
index 46d6266f30ec..b4a3ba654ea5 100644
--- a/drivers/usb/gadget/f_obex.c
+++ b/drivers/usb/gadget/f_obex.c
@@ -24,7 +24,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "u_serial.h"
diff --git a/drivers/usb/gadget/f_sourcesink.c b/drivers/usb/gadget/f_sourcesink.c
index bffe91d525f9..09cba273d2db 100644
--- a/drivers/usb/gadget/f_sourcesink.c
+++ b/drivers/usb/gadget/f_sourcesink.c
@@ -22,7 +22,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 
 #include "g_zero.h"
diff --git a/drivers/usb/gadget/u_audio.c b/drivers/usb/gadget/u_audio.c
index b5200d551458..8252595d619d 100644
--- a/drivers/usb/gadget/u_audio.c
+++ b/drivers/usb/gadget/u_audio.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
diff --git a/drivers/usb/gadget/u_ether.c b/drivers/usb/gadget/u_ether.c
index f8751ff863cd..2fc02bd95848 100644
--- a/drivers/usb/gadget/u_ether.c
+++ b/drivers/usb/gadget/u_ether.c
@@ -23,7 +23,6 @@
 /* #define VERBOSE_DEBUG */
 
 #include <linux/kernel.h>
-#include <linux/utsname.h>
 #include <linux/device.h>
 #include <linux/ctype.h>
 #include <linux/etherdevice.h>
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index c3ac18054057..247436c10deb 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -12,7 +12,6 @@
 #include <linux/completion.h>
 #include <linux/buffer_head.h>
 #include <linux/namei.h>
-#include <linux/utsname.h>
 #include <linux/mm.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 0336f2beacde..b583ab0a4cbb 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -8,7 +8,6 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/utsname.h>
 #include <linux/nfs.h>
 
 #include <linux/sunrpc/xdr.h>
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index e1d528653192..ad9dbbc9145d 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/sched.h>
-#include <linux/utsname.h>
 #include <linux/nfs.h>
 
 #include <linux/sunrpc/xdr.h>
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index c862c9340f9a..5e078b222b4e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -13,7 +13,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ee6a13f05443..3f8881d1a050 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 35869a4921f1..5fe5492fbd29 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -10,7 +10,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be6544aef41f..ed7c269e2514 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -36,7 +36,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cfc30d362f94..83ad47cbdd8a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -39,7 +39,6 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7be72d90d49d..ef583854d8d0 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -32,7 +32,6 @@
 #include <linux/slab.h>
 #include <linux/time.h>
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/in.h>
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index cdfa86fa1471..ba2c199592fd 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -38,7 +38,6 @@
 #include <linux/init.h>
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/sunrpc/clnt.h>
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 81eff8e58322..01cf8cc3d286 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 75997b4deaf3..ca96bce50e18 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index c5c88124096d..ca46002ec10e 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/sysctl.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 4d9e6b288dd8..0334000676d3 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -28,7 +28,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/delay.h>
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 83a9f2972ac8..437698e9465f 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f8b653fcd4dd..83bcaf266b35 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 43e6e3280569..d9fa3d22e17c 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 98569e86c613..52ec020ea78b 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 756f5b0998e0..00f53b2aea76 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -30,7 +30,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/sysctl.h>
 #include <linux/random.h>
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 24feb449a1dc..4cc3c890a2cd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -28,7 +28,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/highmem.h>
-#include <linux/utsname.h>
 #include <linux/init.h>
 #include <linux/random.h>
 #include <linux/statfs.h>
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 579dd1b1110f..e3421030a69f 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -38,7 +38,6 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
-#include <linux/utsname.h>
 #include <linux/namei.h>
 
 #define MLOG_MASK_PREFIX ML_NAMEI
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 3656b300de3a..69f39974c041 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -36,7 +36,6 @@ struct new_utsname {
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
 #include <linux/err.h>
-#include <asm/atomic.h>
 
 struct uts_namespace {
 	struct kref kref;
diff --git a/init/main.c b/init/main.c
index 6107223124e4..76961db298f0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/delay.h>
-#include <linux/utsname.h>
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/smp_lock.h>
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 8ba052c86d48..b101cdc4df3f 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -13,7 +13,6 @@
 
 #include <linux/module.h>
 #include <linux/file.h>
-#include <linux/utsname.h>
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <linux/genhd.h>
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0dfaa47d7cb6..7f4f57bea4ce 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -26,7 +26,6 @@
 #include <linux/proc_fs.h>
 #include <linux/security.h>
 #include <linux/ctype.h>
-#include <linux/utsname.h>
 #include <linux/kmemcheck.h>
 #include <linux/smp_lock.h>
 #include <linux/fs.h>
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 0314501688b9..419209893d87 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -4,7 +4,6 @@
  */
 
 #include <linux/mm.h>
-#include <linux/utsname.h>
 #include <linux/mman.h>
 #include <linux/notifier.h>
 #include <linux/reboot.h>
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index c70dd7f5258e..1db618f56ecb 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -8,7 +8,6 @@
 
 #include <linux/types.h>
 #include <linux/module.h>
-#include <linux/utsname.h>
 #include <linux/sunrpc/clnt.h>
 
 #ifdef RPC_DEBUG
-- 
cgit v1.2.3


From 22fe404218156328a27e66349b1175cd0baa4990 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 18 Sep 2009 13:05:44 -0700
Subject: vfs: split generic_forget_inode() so that hugetlbfs does not have to
 copy it

Hugetlbfs needs to do special things instead of truncate_inode_pages().
 Currently, it copied generic_forget_inode() except for
truncate_inode_pages() call which is asking for trouble (the code there
isn't trivial).  So create a separate function generic_detach_inode()
which does all the list magic done in generic_forget_inode() and call
it from hugetlbfs_forget_inode().

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/hugetlbfs/inode.c | 33 ++++-----------------------------
 fs/inode.c           | 21 +++++++++++++++++++--
 include/linux/fs.h   |  1 +
 3 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index eba6d552d9c9..478a169a262d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -380,36 +380,11 @@ static void hugetlbfs_delete_inode(struct inode *inode)
 
 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock)
 {
-	struct super_block *sb = inode->i_sb;
-
-	if (!hlist_unhashed(&inode->i_hash)) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
-		if (!sb || (sb->s_flags & MS_ACTIVE)) {
-			spin_unlock(&inode_lock);
-			return;
-		}
-		inode->i_state |= I_WILL_FREE;
-		spin_unlock(&inode_lock);
-		/*
-		 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK
-		 * in our backing_dev_info.
-		 */
-		write_inode_now(inode, 1);
-		spin_lock(&inode_lock);
-		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
-		hlist_del_init(&inode->i_hash);
+	if (generic_detach_inode(inode)) {
+		truncate_hugepages(inode, 0);
+		clear_inode(inode);
+		destroy_inode(inode);
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
-	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
-	spin_unlock(&inode_lock);
-	truncate_hugepages(inode, 0);
-	clear_inode(inode);
-	destroy_inode(inode);
 }
 
 static void hugetlbfs_drop_inode(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 07d775ea6161..fa506d539653 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1241,7 +1241,16 @@ void generic_delete_inode(struct inode *inode)
 }
 EXPORT_SYMBOL(generic_delete_inode);
 
-static void generic_forget_inode(struct inode *inode)
+/**
+ *	generic_detach_inode - remove inode from inode lists
+ *	@inode: inode to remove
+ *
+ *	Remove inode from inode lists, write it if it's dirty. This is just an
+ *	internal VFS helper exported for hugetlbfs. Do not use!
+ *
+ *	Returns 1 if inode should be completely destroyed.
+ */
+int generic_detach_inode(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 
@@ -1251,7 +1260,7 @@ static void generic_forget_inode(struct inode *inode)
 		inodes_stat.nr_unused++;
 		if (sb->s_flags & MS_ACTIVE) {
 			spin_unlock(&inode_lock);
-			return;
+			return 0;
 		}
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_WILL_FREE;
@@ -1269,6 +1278,14 @@ static void generic_forget_inode(struct inode *inode)
 	inode->i_state |= I_FREEING;
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(generic_detach_inode);
+
+static void generic_forget_inode(struct inode *inode)
+{
+	if (!generic_detach_inode(inode))
+		return;
 	if (inode->i_data.nrpages)
 		truncate_inode_pages(&inode->i_data, 0);
 	clear_inode(inode);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b095..955e34615cb7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2156,6 +2156,7 @@ extern ino_t iunique(struct super_block *, ino_t);
 extern int inode_needs_sync(struct inode *inode);
 extern void generic_delete_inode(struct inode *inode);
 extern void generic_drop_inode(struct inode *inode);
+extern int generic_detach_inode(struct inode *inode);
 
 extern struct inode *ilookup5_nowait(struct super_block *sb,
 		unsigned long hashval, int (*test)(struct inode *, void *),
-- 
cgit v1.2.3


From 42cb56ae2ab67390da34906b27bedc3f2ff1393b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 18 Sep 2009 13:05:53 -0700
Subject: vfs: change sb->s_maxbytes to a loff_t

sb->s_maxbytes is supposed to indicate the maximum size of a file that can
exist on the filesystem.  It's declared as an unsigned long long.

Even if a filesystem has no inherent limit that prevents it from using
every bit in that unsigned long long, it's still problematic to set it to
anything larger than MAX_LFS_FILESIZE.  There are places in the kernel
that cast s_maxbytes to a signed value.  If it's set too large then this
cast makes it a negative number and generally breaks the comparison.

Change s_maxbytes to be loff_t instead.  That should help eliminate the
temptation to set it too large by making it a signed value.

Also, add a warning for couple of releases to help catch filesystems that
set s_maxbytes too large.  Eventually we can either convert this to a
BUG() or just remove it and in the hope that no one will get it wrong now
that it's a signed value.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Robert Love <rlove@google.com>
Cc: Mandeep Singh Baines <msb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/super.c         | 10 ++++++++++
 include/linux/fs.h |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 0e7207b9815c..4906e2d8f400 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -892,6 +892,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
  	if (error)
  		goto out_sb;
 
+	/*
+	 * filesystems should never set s_maxbytes larger than MAX_LFS_FILESIZE
+	 * but s_maxbytes was an unsigned long long for many releases. Throw
+	 * this warning for a little while to try and catch filesystems that
+	 * violate this rule. This warning should be either removed or
+	 * converted to a BUG() in 2.6.34.
+	 */
+	WARN((mnt->mnt_sb->s_maxbytes < 0), "%s set sb->s_maxbytes to "
+		"negative value (%lld)\n", type->name, mnt->mnt_sb->s_maxbytes);
+
 	mnt->mnt_mountpoint = mnt->mnt_root;
 	mnt->mnt_parent = mnt;
 	up_write(&mnt->mnt_sb->s_umount);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 955e34615cb7..cbb7724c11d3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1315,7 +1315,7 @@ struct super_block {
 	unsigned long		s_blocksize;
 	unsigned char		s_blocksize_bits;
 	unsigned char		s_dirt;
-	unsigned long long	s_maxbytes;	/* Max file size */
+	loff_t			s_maxbytes;	/* Max file size */
 	struct file_system_type	*s_type;
 	const struct super_operations	*s_op;
 	const struct dquot_operations	*dq_op;
-- 
cgit v1.2.3


From f84398068d9c2babe41500504ef247ae07081857 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 Sep 2009 14:48:36 +0200
Subject: vfs: seq_file: add helpers for data filling

Add two helpers that allow access to the seq_file's own buffer, but
hide the internal details of seq_files.

This allows easier implementation of special purpose filling
functions.  It also cleans up some existing functions which duplicated
the seq_file logic.

Make these inline functions in seq_file.h, as suggested by Al.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Acked-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/seq_file.c            | 75 +++++++++++++++++++++++++-----------------------
 include/linux/seq_file.h | 38 ++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/fs/seq_file.c b/fs/seq_file.c
index 66efd0aa8fb3..eae7d9dbf3ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -429,20 +429,21 @@ EXPORT_SYMBOL(mangle_path);
  */
 int seq_path(struct seq_file *m, struct path *path, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = d_path(path, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = d_path(path, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 EXPORT_SYMBOL(seq_path);
 
@@ -454,27 +455,28 @@ EXPORT_SYMBOL(seq_path);
 int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 		  char *esc)
 {
-	int err = -ENAMETOOLONG;
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -ENAMETOOLONG;
+
+	if (size) {
 		char *p;
 
 		spin_lock(&dcache_lock);
-		p = __d_path(path, root, s, m->size - m->count);
+		p = __d_path(path, root, buf, size);
 		spin_unlock(&dcache_lock);
-		err = PTR_ERR(p);
+		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return 0;
-			}
-			err = -ENAMETOOLONG;
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
+			else
+				res = -ENAMETOOLONG;
 		}
 	}
-	m->count = m->size;
-	return err;
+	seq_commit(m, res);
+
+	return res < 0 ? res : 0;
 }
 
 /*
@@ -482,20 +484,21 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
  */
 int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
 {
-	if (m->count < m->size) {
-		char *s = m->buf + m->count;
-		char *p = dentry_path(dentry, s, m->size - m->count);
+	char *buf;
+	size_t size = seq_get_buf(m, &buf);
+	int res = -1;
+
+	if (size) {
+		char *p = dentry_path(dentry, buf, size);
 		if (!IS_ERR(p)) {
-			s = mangle_path(s, p, esc);
-			if (s) {
-				p = m->buf + m->count;
-				m->count = s - m->buf;
-				return s - p;
-			}
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	m->count = m->size;
-	return -1;
+	seq_commit(m, res);
+
+	return res;
 }
 
 int seq_bitmap(struct seq_file *m, const unsigned long *bits,
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 0c6a86b79596..8366d8f12e53 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -35,6 +35,44 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+/**
+ * seq_get_buf - get buffer to write arbitrary data to
+ * @m: the seq_file handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_get_buf(struct seq_file *m, char **bufp)
+{
+	BUG_ON(m->count > m->size);
+	if (m->count < m->size)
+		*bufp = m->buf + m->count;
+	else
+		*bufp = NULL;
+
+	return m->size - m->count;
+}
+
+/**
+ * seq_commit - commit data to the buffer
+ * @m: the seq_file handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_commit(struct seq_file *m, int num)
+{
+	if (num < 0) {
+		m->count = m->size;
+	} else {
+		BUG_ON(m->count + num > m->size);
+		m->count += num;
+	}
+}
+
 char *mangle_path(char *s, char *p, char *esc);
 int seq_open(struct file *, const struct seq_operations *);
 ssize_t seq_read(struct file *, char __user *, size_t, loff_t *);
-- 
cgit v1.2.3


From 4fadd7bb20a1e7c774ed88dc703d8fbcd00ff917 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Aug 2009 23:28:06 +0200
Subject: freeze_bdev: kill bd_mount_sem

Now that we have the freeze count there is not much reason for bd_mount_sem
anymore.  The actual freeze/thaw operations are serialized using the
bd_fsfreeze_mutex, and the only other place we take bd_mount_sem is
get_sb_bdev which tries to prevent mounting a filesystem while the block
device is frozen.  Instead of add a check for bd_fsfreeze_count and
return -EBUSY if a filesystem is frozen.  While that is a change in user
visible behaviour a failing mount is much better for this case rather
than having the mount process stuck uninterruptible for a long time.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 8 +-------
 fs/super.c         | 9 +++++++--
 include/linux/fs.h | 1 -
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 5d1ed50bd46c..22506eb4a58e 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -216,8 +216,6 @@ EXPORT_SYMBOL(fsync_bdev);
  * freeze_bdev  --  lock a filesystem and force it into a consistent state
  * @bdev:	blockdevice to lock
  *
- * This takes the block device bd_mount_sem to make sure no new mounts
- * happen on bdev until thaw_bdev() is called.
  * If a superblock is found on this device, we take the s_umount semaphore
  * on it to make sure nobody unmounts until the snapshot creation is done.
  * The reference counter (bd_fsfreeze_count) guarantees that only the last
@@ -240,7 +238,6 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	}
 	bdev->bd_fsfreeze_count++;
 
-	down(&bdev->bd_mount_sem);
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
@@ -260,7 +257,6 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 					"VFS:Filesystem freeze failed\n");
 				sb->s_frozen = SB_UNFROZEN;
 				drop_super(sb);
-				up(&bdev->bd_mount_sem);
 				bdev->bd_fsfreeze_count--;
 				mutex_unlock(&bdev->bd_fsfreeze_mutex);
 				return ERR_PTR(error);
@@ -271,7 +267,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sync_blockdev(bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 
-	return sb;	/* thaw_bdev releases s->s_umount and bd_mount_sem */
+	return sb;	/* thaw_bdev releases s->s_umount */
 }
 EXPORT_SYMBOL(freeze_bdev);
 
@@ -321,7 +317,6 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		drop_super(sb);
 	}
 
-	up(&bdev->bd_mount_sem);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
@@ -430,7 +425,6 @@ static void init_once(void *foo)
 
 	memset(bdev, 0, sizeof(*bdev));
 	mutex_init(&bdev->bd_mutex);
-	sema_init(&bdev->bd_mount_sem, 1);
 	INIT_LIST_HEAD(&bdev->bd_inodes);
 	INIT_LIST_HEAD(&bdev->bd_list);
 #ifdef CONFIG_SYSFS
diff --git a/fs/super.c b/fs/super.c
index 4906e2d8f400..1cb26a3e3df0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -743,9 +743,14 @@ int get_sb_bdev(struct file_system_type *fs_type,
 	 * will protect the lockfs code from trying to start a snapshot
 	 * while we are mounting
 	 */
-	down(&bdev->bd_mount_sem);
+	mutex_lock(&bdev->bd_fsfreeze_mutex);
+	if (bdev->bd_fsfreeze_count > 0) {
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		error = -EBUSY;
+		goto error_bdev;
+	}
 	s = sget(fs_type, test_bdev_super, set_bdev_super, bdev);
-	up(&bdev->bd_mount_sem);
+	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	if (IS_ERR(s))
 		goto error_s;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cbb7724c11d3..72dfbd423974 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -640,7 +640,6 @@ struct block_device {
 	struct super_block *	bd_super;
 	int			bd_openers;
 	struct mutex		bd_mutex;	/* open/close mutex */
-	struct semaphore	bd_mount_sem;
 	struct list_head	bd_inodes;
 	void *			bd_holder;
 	int			bd_holders;
-- 
cgit v1.2.3


From 4504230a71566785a05d3e6b53fa1ee071b864eb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Aug 2009 23:28:35 +0200
Subject: freeze_bdev: grab active reference to frozen superblocks

Currently we held s_umount while a filesystem is frozen, despite that we
might return to userspace and unlock it from a different process.  Instead
grab an active reference to keep the file system busy and add an explicit
check for frozen filesystems in remount and reject the remount instead
of blocking on s_umount.

Add a new get_active_super helper to super.c for use by freeze_bdev that
grabs an active reference to a superblock from a given block device.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/block_dev.c     | 132 +++++++++++++++++++++++++++++------------------------
 fs/super.c         |  48 ++++++++++++++++++-
 include/linux/fs.h |   1 +
 3 files changed, 120 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 22506eb4a58e..9cf4b926f8e4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -230,43 +230,54 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	int error = 0;
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (bdev->bd_fsfreeze_count > 0) {
-		bdev->bd_fsfreeze_count++;
+	if (++bdev->bd_fsfreeze_count > 1) {
+		/*
+		 * We don't even need to grab a reference - the first call
+		 * to freeze_bdev grab an active reference and only the last
+		 * thaw_bdev drops it.
+		 */
 		sb = get_super(bdev);
+		drop_super(sb);
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
 		return sb;
 	}
-	bdev->bd_fsfreeze_count++;
-
-	sb = get_super(bdev);
-	if (sb && !(sb->s_flags & MS_RDONLY)) {
-		sb->s_frozen = SB_FREEZE_WRITE;
-		smp_wmb();
-
-		sync_filesystem(sb);
-
-		sb->s_frozen = SB_FREEZE_TRANS;
-		smp_wmb();
-
-		sync_blockdev(sb->s_bdev);
-
-		if (sb->s_op->freeze_fs) {
-			error = sb->s_op->freeze_fs(sb);
-			if (error) {
-				printk(KERN_ERR
-					"VFS:Filesystem freeze failed\n");
-				sb->s_frozen = SB_UNFROZEN;
-				drop_super(sb);
-				bdev->bd_fsfreeze_count--;
-				mutex_unlock(&bdev->bd_fsfreeze_mutex);
-				return ERR_PTR(error);
-			}
+
+	sb = get_active_super(bdev);
+	if (!sb)
+		goto out;
+	if (sb->s_flags & MS_RDONLY) {
+		deactivate_locked_super(sb);
+		mutex_unlock(&bdev->bd_fsfreeze_mutex);
+		return sb;
+	}
+
+	sb->s_frozen = SB_FREEZE_WRITE;
+	smp_wmb();
+
+	sync_filesystem(sb);
+
+	sb->s_frozen = SB_FREEZE_TRANS;
+	smp_wmb();
+
+	sync_blockdev(sb->s_bdev);
+
+	if (sb->s_op->freeze_fs) {
+		error = sb->s_op->freeze_fs(sb);
+		if (error) {
+			printk(KERN_ERR
+				"VFS:Filesystem freeze failed\n");
+			sb->s_frozen = SB_UNFROZEN;
+			deactivate_locked_super(sb);
+			bdev->bd_fsfreeze_count--;
+			mutex_unlock(&bdev->bd_fsfreeze_mutex);
+			return ERR_PTR(error);
 		}
 	}
+	up_write(&sb->s_umount);
 
+ out:
 	sync_blockdev(bdev);
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
-
 	return sb;	/* thaw_bdev releases s->s_umount */
 }
 EXPORT_SYMBOL(freeze_bdev);
@@ -280,43 +291,44 @@ EXPORT_SYMBOL(freeze_bdev);
  */
 int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 {
-	int error = 0;
+	int error = -EINVAL;
 
 	mutex_lock(&bdev->bd_fsfreeze_mutex);
-	if (!bdev->bd_fsfreeze_count) {
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return -EINVAL;
-	}
-
-	bdev->bd_fsfreeze_count--;
-	if (bdev->bd_fsfreeze_count > 0) {
-		if (sb)
-			drop_super(sb);
-		mutex_unlock(&bdev->bd_fsfreeze_mutex);
-		return 0;
-	}
-
-	if (sb) {
-		BUG_ON(sb->s_bdev != bdev);
-		if (!(sb->s_flags & MS_RDONLY)) {
-			if (sb->s_op->unfreeze_fs) {
-				error = sb->s_op->unfreeze_fs(sb);
-				if (error) {
-					printk(KERN_ERR
-						"VFS:Filesystem thaw failed\n");
-					sb->s_frozen = SB_FREEZE_TRANS;
-					bdev->bd_fsfreeze_count++;
-					mutex_unlock(&bdev->bd_fsfreeze_mutex);
-					return error;
-				}
-			}
-			sb->s_frozen = SB_UNFROZEN;
-			smp_wmb();
-			wake_up(&sb->s_wait_unfrozen);
+	if (!bdev->bd_fsfreeze_count)
+		goto out_unlock;
+
+	error = 0;
+	if (--bdev->bd_fsfreeze_count > 0)
+		goto out_unlock;
+
+	if (!sb)
+		goto out_unlock;
+
+	BUG_ON(sb->s_bdev != bdev);
+	down_write(&sb->s_umount);
+	if (sb->s_flags & MS_RDONLY)
+		goto out_deactivate;
+
+	if (sb->s_op->unfreeze_fs) {
+		error = sb->s_op->unfreeze_fs(sb);
+		if (error) {
+			printk(KERN_ERR
+				"VFS:Filesystem thaw failed\n");
+			sb->s_frozen = SB_FREEZE_TRANS;
+			bdev->bd_fsfreeze_count++;
+			mutex_unlock(&bdev->bd_fsfreeze_mutex);
+			return error;
 		}
-		drop_super(sb);
 	}
 
+	sb->s_frozen = SB_UNFROZEN;
+	smp_wmb();
+	wake_up(&sb->s_wait_unfrozen);
+
+out_deactivate:
+	if (sb)
+		deactivate_locked_super(sb);
+out_unlock:
 	mutex_unlock(&bdev->bd_fsfreeze_mutex);
 	return 0;
 }
diff --git a/fs/super.c b/fs/super.c
index 1cb26a3e3df0..19eb70b374bc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -465,6 +465,48 @@ rescan:
 }
 
 EXPORT_SYMBOL(get_super);
+
+/**
+ * get_active_super - get an active reference to the superblock of a device
+ * @bdev: device to get the superblock for
+ *
+ * Scans the superblock list and finds the superblock of the file system
+ * mounted on the device given.  Returns the superblock with an active
+ * reference and s_umount held exclusively or %NULL if none was found.
+ */
+struct super_block *get_active_super(struct block_device *bdev)
+{
+	struct super_block *sb;
+
+	if (!bdev)
+		return NULL;
+
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &super_blocks, s_list) {
+		if (sb->s_bdev != bdev)
+			continue;
+
+		sb->s_count++;
+		spin_unlock(&sb_lock);
+		down_write(&sb->s_umount);
+		if (sb->s_root) {
+			spin_lock(&sb_lock);
+			if (sb->s_count > S_BIAS) {
+				atomic_inc(&sb->s_active);
+				sb->s_count--;
+				spin_unlock(&sb_lock);
+				return sb;
+			}
+			spin_unlock(&sb_lock);
+		}
+		up_write(&sb->s_umount);
+		put_super(sb);
+		yield();
+		spin_lock(&sb_lock);
+	}
+	spin_unlock(&sb_lock);
+	return NULL;
+}
  
 struct super_block * user_get_super(dev_t dev)
 {
@@ -527,11 +569,15 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 {
 	int retval;
 	int remount_rw;
-	
+
+	if (sb->s_frozen != SB_UNFROZEN)
+		return -EBUSY;
+
 #ifdef CONFIG_BLOCK
 	if (!(flags & MS_RDONLY) && bdev_read_only(sb->s_bdev))
 		return -EACCES;
 #endif
+
 	if (flags & MS_RDONLY)
 		acct_auto_close(sb);
 	shrink_dcache_sb(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 72dfbd423974..502d96ef345d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2334,6 +2334,7 @@ extern void get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
 extern struct super_block *get_super(struct block_device *);
+extern struct super_block *get_active_super(struct block_device *bdev);
 extern struct super_block *user_get_super(dev_t);
 extern void drop_super(struct super_block *sb);
 
-- 
cgit v1.2.3


From 25d9e2d15286281ec834b829a4aaf8969011f1cd Mon Sep 17 00:00:00 2001
From: "npiggin@suse.de" <npiggin@suse.de>
Date: Fri, 21 Aug 2009 02:35:05 +1000
Subject: truncate: new helpers

Introduce new truncate helpers truncate_pagecache and inode_newsize_ok.
vmtruncate is also consolidated from mm/memory.c and mm/nommu.c and
into mm/truncate.c.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/vm/locking |  2 +-
 fs/attr.c                | 46 ++++++++++++++++++++++++++++++++--
 include/linux/fs.h       |  3 ++-
 include/linux/mm.h       |  5 ++--
 mm/filemap.c             |  2 +-
 mm/memory.c              | 62 +++-------------------------------------------
 mm/mremap.c              |  4 +--
 mm/nommu.c               | 40 ------------------------------
 mm/truncate.c            | 64 ++++++++++++++++++++++++++++++++++++++++++++++++
 9 files changed, 120 insertions(+), 108 deletions(-)

(limited to 'include')

diff --git a/Documentation/vm/locking b/Documentation/vm/locking
index f366fa956179..25fadb448760 100644
--- a/Documentation/vm/locking
+++ b/Documentation/vm/locking
@@ -80,7 +80,7 @@ Note: PTL can also be used to guarantee that no new clones using the
 mm start up ... this is a loose form of stability on mm_users. For
 example, it is used in copy_mm to protect against a racing tlb_gather_mmu
 single address space optimization, so that the zap_page_range (from
-vmtruncate) does not lose sending ipi's to cloned threads that might 
+truncate) does not lose sending ipi's to cloned threads that might
 be spawned underneath it and go to user mode to drag in pte's into tlbs.
 
 swap_lock
diff --git a/fs/attr.c b/fs/attr.c
index 9fe1b1bd30a8..96d394bdaddf 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -18,7 +18,7 @@
 /* Taken over from the old code... */
 
 /* POSIX UID/GID verification for setting inode attributes. */
-int inode_change_ok(struct inode *inode, struct iattr *attr)
+int inode_change_ok(const struct inode *inode, struct iattr *attr)
 {
 	int retval = -EPERM;
 	unsigned int ia_valid = attr->ia_valid;
@@ -60,9 +60,51 @@ fine:
 error:
 	return retval;
 }
-
 EXPORT_SYMBOL(inode_change_ok);
 
+/**
+ * inode_newsize_ok - may this inode be truncated to a given size
+ * @inode:	the inode to be truncated
+ * @offset:	the new size to assign to the inode
+ * @Returns:	0 on success, -ve errno on failure
+ *
+ * inode_newsize_ok will check filesystem limits and ulimits to check that the
+ * new inode size is within limits. inode_newsize_ok will also send SIGXFSZ
+ * when necessary. Caller must not proceed with inode size change if failure is
+ * returned. @inode must be a file (not directory), with appropriate
+ * permissions to allow truncate (inode_newsize_ok does NOT check these
+ * conditions).
+ *
+ * inode_newsize_ok must be called with i_mutex held.
+ */
+int inode_newsize_ok(const struct inode *inode, loff_t offset)
+{
+	if (inode->i_size < offset) {
+		unsigned long limit;
+
+		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+		if (limit != RLIM_INFINITY && offset > limit)
+			goto out_sig;
+		if (offset > inode->i_sb->s_maxbytes)
+			goto out_big;
+	} else {
+		/*
+		 * truncation of in-use swapfiles is disallowed - it would
+		 * cause subsequent swapout to scribble on the now-freed
+		 * blocks.
+		 */
+		if (IS_SWAPFILE(inode))
+			return -ETXTBSY;
+	}
+
+	return 0;
+out_sig:
+	send_sig(SIGXFSZ, current, 0);
+out_big:
+	return -EFBIG;
+}
+EXPORT_SYMBOL(inode_newsize_ok);
+
 int inode_setattr(struct inode * inode, struct iattr * attr)
 {
 	unsigned int ia_valid = attr->ia_valid;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 502d96ef345d..2b08b5ce09b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2382,7 +2382,8 @@ extern int buffer_migrate_page(struct address_space *,
 #define buffer_migrate_page NULL
 #endif
 
-extern int inode_change_ok(struct inode *, struct iattr *);
+extern int inode_change_ok(const struct inode *, struct iattr *);
+extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern int __must_check inode_setattr(struct inode *, struct iattr *);
 
 extern void file_update_time(struct file *file);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..8347e938fb2f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -791,8 +791,9 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 	unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
-extern int vmtruncate(struct inode * inode, loff_t offset);
-extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern int vmtruncate(struct inode *inode, loff_t offset);
+extern int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end);
 
 #ifdef CONFIG_MMU
 extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
diff --git a/mm/filemap.c b/mm/filemap.c
index bcc7372aebbc..33349adb227a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -58,7 +58,7 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_lock		(vmtruncate)
+ *  ->i_mmap_lock		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
diff --git a/mm/memory.c b/mm/memory.c
index b1443ac07c00..ebcd3decac89 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -297,7 +297,8 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		unsigned long addr = vma->vm_start;
 
 		/*
-		 * Hide vma from rmap and vmtruncate before freeing pgtables
+		 * Hide vma from rmap and truncate_pagecache before freeing
+		 * pgtables
 		 */
 		anon_vma_unlink(vma);
 		unlink_file_vma(vma);
@@ -2407,7 +2408,7 @@ restart:
  * @mapping: the address space containing mmaps to be unmapped.
  * @holebegin: byte in first page to unmap, relative to the start of
  * the underlying file.  This will be rounded down to a PAGE_SIZE
- * boundary.  Note that this is different from vmtruncate(), which
+ * boundary.  Note that this is different from truncate_pagecache(), which
  * must keep the partial page.  In contrast, we must get rid of
  * partial pages.
  * @holelen: size of prospective hole in bytes.  This will be rounded
@@ -2458,63 +2459,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/**
- * vmtruncate - unmap mappings "freed" by truncate() syscall
- * @inode: inode of the file used
- * @offset: file offset to start truncating
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode * inode, loff_t offset)
-{
-	if (inode->i_size < offset) {
-		unsigned long limit;
-
-		limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-		if (limit != RLIM_INFINITY && offset > limit)
-			goto out_sig;
-		if (offset > inode->i_sb->s_maxbytes)
-			goto out_big;
-		i_size_write(inode, offset);
-	} else {
-		struct address_space *mapping = inode->i_mapping;
-
-		/*
-		 * truncation of in-use swapfiles is disallowed - it would
-		 * cause subsequent swapout to scribble on the now-freed
-		 * blocks.
-		 */
-		if (IS_SWAPFILE(inode))
-			return -ETXTBSY;
-		i_size_write(inode, offset);
-
-		/*
-		 * unmap_mapping_range is called twice, first simply for
-		 * efficiency so that truncate_inode_pages does fewer
-		 * single-page unmaps.  However after this first call, and
-		 * before truncate_inode_pages finishes, it is possible for
-		 * private pages to be COWed, which remain after
-		 * truncate_inode_pages finishes, hence the second
-		 * unmap_mapping_range call must be made for correctness.
-		 */
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-		truncate_inode_pages(mapping, offset);
-		unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
-	}
-
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out_big:
-	return -EFBIG;
-}
-EXPORT_SYMBOL(vmtruncate);
-
 int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
 {
 	struct address_space *mapping = inode->i_mapping;
diff --git a/mm/mremap.c b/mm/mremap.c
index 20a07dba6be0..97bff2547719 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -86,8 +86,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	if (vma->vm_file) {
 		/*
 		 * Subtle point from Rajesh Venkatasubramanian: before
-		 * moving file-based ptes, we must lock vmtruncate out,
-		 * since it might clean the dst vma before the src vma,
+		 * moving file-based ptes, we must lock truncate_pagecache
+		 * out, since it might clean the dst vma before the src vma,
 		 * and we propagate stale pages into the dst afterward.
 		 */
 		mapping = vma->vm_file->f_mapping;
diff --git a/mm/nommu.c b/mm/nommu.c
index 8d484241d034..56a446f05971 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -82,46 +82,6 @@ DECLARE_RWSEM(nommu_region_sem);
 struct vm_operations_struct generic_file_vm_ops = {
 };
 
-/*
- * Handle all mappings that got truncated by a "truncate()"
- * system call.
- *
- * NOTE! We have to be ready to update the memory sharing
- * between the file and the memory map for a potential last
- * incomplete page.  Ugly, but necessary.
- */
-int vmtruncate(struct inode *inode, loff_t offset)
-{
-	struct address_space *mapping = inode->i_mapping;
-	unsigned long limit;
-
-	if (inode->i_size < offset)
-		goto do_expand;
-	i_size_write(inode, offset);
-
-	truncate_inode_pages(mapping, offset);
-	goto out_truncate;
-
-do_expand:
-	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
-	if (limit != RLIM_INFINITY && offset > limit)
-		goto out_sig;
-	if (offset > inode->i_sb->s_maxbytes)
-		goto out;
-	i_size_write(inode, offset);
-
-out_truncate:
-	if (inode->i_op->truncate)
-		inode->i_op->truncate(inode);
-	return 0;
-out_sig:
-	send_sig(SIGXFSZ, current, 0);
-out:
-	return -EFBIG;
-}
-
-EXPORT_SYMBOL(vmtruncate);
-
 /*
  * Return the total memory allocated for this pointer, not
  * just what the caller asked for.
diff --git a/mm/truncate.c b/mm/truncate.c
index ccc3ecf7cb98..5900afca0fa9 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -465,3 +465,67 @@ int invalidate_inode_pages2(struct address_space *mapping)
 	return invalidate_inode_pages2_range(mapping, 0, -1);
 }
 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
+
+/**
+ * truncate_pagecache - unmap and remove pagecache that has been truncated
+ * @inode: inode
+ * @old: old file offset
+ * @new: new file offset
+ *
+ * inode's new i_size must already be written before truncate_pagecache
+ * is called.
+ *
+ * This function should typically be called before the filesystem
+ * releases resources associated with the freed range (eg. deallocates
+ * blocks). This way, pagecache will always stay logically coherent
+ * with on-disk format, and the filesystem would not have to deal with
+ * situations such as writepage being called for a page that has already
+ * had its underlying blocks deallocated.
+ */
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
+{
+	if (new < old) {
+		struct address_space *mapping = inode->i_mapping;
+
+		/*
+		 * unmap_mapping_range is called twice, first simply for
+		 * efficiency so that truncate_inode_pages does fewer
+		 * single-page unmaps.  However after this first call, and
+		 * before truncate_inode_pages finishes, it is possible for
+		 * private pages to be COWed, which remain after
+		 * truncate_inode_pages finishes, hence the second
+		 * unmap_mapping_range call must be made for correctness.
+		 */
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+		truncate_inode_pages(mapping, new);
+		unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+	}
+}
+EXPORT_SYMBOL(truncate_pagecache);
+
+/**
+ * vmtruncate - unmap mappings "freed" by truncate() syscall
+ * @inode: inode of the file used
+ * @offset: file offset to start truncating
+ *
+ * NOTE! We have to be ready to update the memory sharing
+ * between the file and the memory map for a potential last
+ * incomplete page.  Ugly, but necessary.
+ */
+int vmtruncate(struct inode *inode, loff_t offset)
+{
+	loff_t oldsize;
+	int error;
+
+	error = inode_newsize_ok(inode, offset);
+	if (error)
+		return error;
+	oldsize = inode->i_size;
+	i_size_write(inode, offset);
+	truncate_pagecache(inode, oldsize, offset);
+	if (inode->i_op->truncate)
+		inode->i_op->truncate(inode);
+
+	return error;
+}
+EXPORT_SYMBOL(vmtruncate);
-- 
cgit v1.2.3


From 57f1f0874f426a9bdfc5cd3f886113dd5cd17834 Mon Sep 17 00:00:00 2001
From: Zhaolei <zhaolei@cn.fujitsu.com>
Date: Wed, 23 Sep 2009 15:56:10 -0700
Subject: time: add function to convert between calendar time and broken-down
 time for universal use

There are many similar code in kernel for one object: convert time between
calendar time and broken-down time.

Here is some source I found:
  fs/ncpfs/dir.c
  fs/smbfs/proc.c
  fs/fat/misc.c
  fs/udf/udftime.c
  fs/cifs/netmisc.c
  net/netfilter/xt_time.c
  drivers/scsi/ips.c
  drivers/input/misc/hp_sdc_rtc.c
  drivers/rtc/rtc-lib.c
  arch/ia64/hp/sim/boot/fw-emu.c
  arch/m68k/mac/misc.c
  arch/powerpc/kernel/time.c
  arch/parisc/include/asm/rtc.h
  ...

We can make a common function for this type of conversion, At least we
can get following benefit:

1: Make kernel simple and unify
2: Easy to fix bug in converting code
3: Reduce clone of code in future
   For example, I'm trying to make ftrace display walltime,
   this patch will make me easy.

This code is based on code from glibc-2.6

Signed-off-by: Zhao Lei <zhaolei@cn.fujitsu.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/time.h   |  28 +++++++++++
 kernel/time/Makefile   |   2 +-
 kernel/time/timeconv.c | 127 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 kernel/time/timeconv.c

(limited to 'include')

diff --git a/include/linux/time.h b/include/linux/time.h
index 56787c093345..fe04e5ef6a59 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -155,6 +155,34 @@ extern void timekeeping_leap_insert(int leapsecond);
 struct tms;
 extern void do_sys_times(struct tms *);
 
+/*
+ * Similar to the struct tm in userspace <time.h>, but it needs to be here so
+ * that the kernel source is self contained.
+ */
+struct tm {
+	/*
+	 * the number of seconds after the minute, normally in the range
+	 * 0 to 59, but can be up to 60 to allow for leap seconds
+	 */
+	int tm_sec;
+	/* the number of minutes after the hour, in the range 0 to 59*/
+	int tm_min;
+	/* the number of hours past midnight, in the range 0 to 23 */
+	int tm_hour;
+	/* the day of the month, in the range 1 to 31 */
+	int tm_mday;
+	/* the number of months since January, in the range 0 to 11 */
+	int tm_mon;
+	/* the number of years since 1900 */
+	long tm_year;
+	/* the number of days since Sunday, in the range 0 to 6 */
+	int tm_wday;
+	/* the number of days since January 1, in the range 0 to 365 */
+	int tm_yday;
+};
+
+void time_to_tm(time_t totalsecs, int offset, struct tm *result);
+
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index 0b0a6366c9d4..ee266620b06c 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,4 +1,4 @@
-obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
+obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o timeconv.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
new file mode 100644
index 000000000000..86628e755f38
--- /dev/null
+++ b/kernel/time/timeconv.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
+ * This file is part of the GNU C Library.
+ * Contributed by Paul Eggert (eggert@twinsun.com).
+ *
+ * The GNU C Library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * The GNU C Library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU C Library; see the file COPYING.LIB.  If not,
+ * write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+/*
+ * Converts the calendar time to broken-down time representation
+ * Based on code from glibc-2.6
+ *
+ * 2009-7-14:
+ *   Moved from glibc-2.6 to kernel by Zhaolei<zhaolei@cn.fujitsu.com>
+ */
+
+#include <linux/time.h>
+#include <linux/module.h>
+
+/*
+ * Nonzero if YEAR is a leap year (every 4 years,
+ * except every 100th isn't, and every 400th is).
+ */
+static int __isleap(long year)
+{
+	return (year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0);
+}
+
+/* do a mathdiv for long type */
+static long math_div(long a, long b)
+{
+	return a / b - (a % b < 0);
+}
+
+/* How many leap years between y1 and y2, y1 must less or equal to y2 */
+static long leaps_between(long y1, long y2)
+{
+	long leaps1 = math_div(y1 - 1, 4) - math_div(y1 - 1, 100)
+		+ math_div(y1 - 1, 400);
+	long leaps2 = math_div(y2 - 1, 4) - math_div(y2 - 1, 100)
+		+ math_div(y2 - 1, 400);
+	return leaps2 - leaps1;
+}
+
+/* How many days come before each month (0-12). */
+static const unsigned short __mon_yday[2][13] = {
+	/* Normal years. */
+	{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+	/* Leap years. */
+	{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
+};
+
+#define SECS_PER_HOUR	(60 * 60)
+#define SECS_PER_DAY	(SECS_PER_HOUR * 24)
+
+/**
+ * time_to_tm - converts the calendar time to local broken-down time
+ *
+ * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
+ *		Coordinated Universal Time (UTC).
+ * @offset	offset seconds adding to totalsecs.
+ * @result	pointer to struct tm variable to receive broken-down time
+ */
+void time_to_tm(time_t totalsecs, int offset, struct tm *result)
+{
+	long days, rem, y;
+	const unsigned short *ip;
+
+	days = totalsecs / SECS_PER_DAY;
+	rem = totalsecs % SECS_PER_DAY;
+	rem += offset;
+	while (rem < 0) {
+		rem += SECS_PER_DAY;
+		--days;
+	}
+	while (rem >= SECS_PER_DAY) {
+		rem -= SECS_PER_DAY;
+		++days;
+	}
+
+	result->tm_hour = rem / SECS_PER_HOUR;
+	rem %= SECS_PER_HOUR;
+	result->tm_min = rem / 60;
+	result->tm_sec = rem % 60;
+
+	/* January 1, 1970 was a Thursday. */
+	result->tm_wday = (4 + days) % 7;
+	if (result->tm_wday < 0)
+		result->tm_wday += 7;
+
+	y = 1970;
+
+	while (days < 0 || days >= (__isleap(y) ? 366 : 365)) {
+		/* Guess a corrected year, assuming 365 days per year. */
+		long yg = y + math_div(days, 365);
+
+		/* Adjust DAYS and Y to match the guessed year. */
+		days -= (yg - y) * 365 + leaps_between(y, yg);
+		y = yg;
+	}
+
+	result->tm_year = y - 1900;
+
+	result->tm_yday = days;
+
+	ip = __mon_yday[__isleap(y)];
+	for (y = 11; days < ip[y]; y--)
+		continue;
+	days -= ip[y];
+
+	result->tm_mon = y;
+	result->tm_mday = days + 1;
+}
+EXPORT_SYMBOL(time_to_tm);
-- 
cgit v1.2.3


From 55dff4954ebdeba2be59e19398a607d799c5fa9f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Wed, 23 Sep 2009 15:56:17 -0700
Subject: docs: fix various Documentation/ paths in header files

Fix various Documentation/ paths in include/linux/.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Reviewed-by: Jesper Juhl <jj@chaosbits.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/configfs.h   | 4 ++--
 include/linux/debugfs.h    | 2 +-
 include/linux/relay.h      | 2 +-
 include/linux/tracepoint.h | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/configfs.h b/include/linux/configfs.h
index 7f627775c947..ddb7a97c78c2 100644
--- a/include/linux/configfs.h
+++ b/include/linux/configfs.h
@@ -27,8 +27,8 @@
  *
  * configfs Copyright (C) 2005 Oracle.  All rights reserved.
  *
- * Please read Documentation/filesystems/configfs.txt before using the
- * configfs interface, ESPECIALLY the parts about reference counts and
+ * Please read Documentation/filesystems/configfs/configfs.txt before using
+ * the configfs interface, ESPECIALLY the parts about reference counts and
  * item destructors.
  */
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index eb5c2ba2f81a..fc1b930f246c 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -9,7 +9,7 @@
  *	2 as published by the Free Software Foundation.
  *
  *  debugfs is for people to use instead of /proc or /sys.
- *  See Documentation/DocBook/kernel-api for more details.
+ *  See Documentation/DocBook/filesystems for more details.
  */
 
 #ifndef _DEBUGFS_H_
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 953fc055e875..14a86bc7102b 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -140,7 +140,7 @@ struct rchan_callbacks
 	 * cause relay_open() to create a single global buffer rather
 	 * than the default set of per-cpu buffers.
 	 *
-	 * See Documentation/filesystems/relayfs.txt for more info.
+	 * See Documentation/filesystems/relay.txt for more info.
 	 */
 	struct dentry *(*create_buf_file)(const char *filename,
 					  struct dentry *parent,
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 63a3f7a80580..660a9de96f81 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -4,7 +4,7 @@
 /*
  * Kernel Tracepoint API.
  *
- * See Documentation/tracepoint.txt.
+ * See Documentation/trace/tracepoints.txt.
  *
  * (C) Copyright 2008 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
  *
-- 
cgit v1.2.3


From 8f3ff20862cfcb85500a2bb55ee64622bd59fd0c Mon Sep 17 00:00:00 2001
From: Paul Menage <menage@google.com>
Date: Wed, 23 Sep 2009 15:56:25 -0700
Subject: cgroups: revert "cgroups: fix pid namespace bug"

The following series adds a "cgroup.procs" file to each cgroup that
reports unique tgids rather than pids, and allows all threads in a
threadgroup to be atomically moved to a new cgroup.

The subsystem "attach" interface is modified to support attaching whole
threadgroups at a time, which could introduce potential problems if any
subsystem were to need to access the old cgroup of every thread being
moved.  The attach interface may need to be revised if this becomes the
case.

Also added is functionality for read/write locking all CLONE_THREAD
fork()ing within a threadgroup, by means of an rwsem that lives in the
sighand_struct, for per-threadgroup-ness and also for sharing a cacheline
with the sighand's atomic count.  This scheme should introduce no extra
overhead in the fork path when there's no contention.

The final patch reveals potential for a race when forking before a
subsystem's attach function is called - one potential solution in case any
subsystem has this problem is to hang on to the group's fork mutex through
the attach() calls, though no subsystem yet demonstrates need for an
extended critical section.

This patch:

Revert

commit 096b7fe012d66ed55e98bc8022405ede0cc80e96
Author:     Li Zefan <lizf@cn.fujitsu.com>
AuthorDate: Wed Jul 29 15:04:04 2009 -0700
Commit:     Linus Torvalds <torvalds@linux-foundation.org>
CommitDate: Wed Jul 29 19:10:35 2009 -0700

    cgroups: fix pid namespace bug

This is in preparation for some clashing cgroups changes that subsume the
original commit's functionaliy.

The original commit fixed a pid namespace bug which Ben Blum fixed
independently (in the same way, but with different code) as part of a
series of patches.  I played around with trying to reconcile Ben's patch
series with Li's patch, but concluded that it was simpler to just revert
Li's, given that Ben's patch series contained essentially the same fix.

Signed-off-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 11 +++---
 kernel/cgroup.c        | 95 +++++++++++++-------------------------------------
 2 files changed, 31 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 90bba9e62286..c833d6f23672 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -179,11 +179,14 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects pids_list and cached pid arrays. */
+	/* pids_mutex protects the fields below */
 	struct rw_semaphore pids_mutex;
-
-	/* Linked list of struct cgroup_pids */
-	struct list_head pids_list;
+	/* Array of process ids in the cgroup */
+	pid_t *tasks_pids;
+	/* How many files are using the current tasks_pids array */
+	int pids_use_count;
+	/* Length of the current tasks_pids array */
+	int pids_length;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 14efffed72c8..22db0a7cf1fa 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1121,7 +1121,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	INIT_LIST_HEAD(&cgrp->pids_list);
 	init_rwsem(&cgrp->pids_mutex);
 }
 
@@ -2431,30 +2430,12 @@ err:
 	return ret;
 }
 
-/*
- * Cache pids for all threads in the same pid namespace that are
- * opening the same "tasks" file.
- */
-struct cgroup_pids {
-	/* The node in cgrp->pids_list */
-	struct list_head list;
-	/* The cgroup those pids belong to */
-	struct cgroup *cgrp;
-	/* The namepsace those pids belong to */
-	struct pid_namespace *ns;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the this tasks_pids array */
-	int use_count;
-	/* Length of the current tasks_pids array */
-	int length;
-};
-
 static int cmppid(const void *a, const void *b)
 {
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+
 /*
  * seq_file methods for the "tasks" file. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
@@ -2469,47 +2450,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup_pids *cp = s->private;
-	struct cgroup *cgrp = cp->cgrp;
+	struct cgroup *cgrp = s->private;
 	int index = 0, pid = *pos;
 	int *iter;
 
 	down_read(&cgrp->pids_mutex);
 	if (pid) {
-		int end = cp->length;
+		int end = cgrp->pids_length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cp->tasks_pids[mid] == pid) {
+			if (cgrp->tasks_pids[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cp->tasks_pids[mid] <= pid)
+			} else if (cgrp->tasks_pids[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cp->length)
+	if (index >= cgrp->pids_length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cp->tasks_pids + index;
+	iter = cgrp->tasks_pids + index;
 	*pos = *iter;
 	return iter;
 }
 
 static void cgroup_tasks_stop(struct seq_file *s, void *v)
 {
-	struct cgroup_pids *cp = s->private;
-	struct cgroup *cgrp = cp->cgrp;
+	struct cgroup *cgrp = s->private;
 	up_read(&cgrp->pids_mutex);
 }
 
 static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup_pids *cp = s->private;
+	struct cgroup *cgrp = s->private;
 	int *p = v;
-	int *end = cp->tasks_pids + cp->length;
+	int *end = cgrp->tasks_pids + cgrp->pids_length;
 
 	/*
 	 * Advance to the next pid in the array. If this goes off the
@@ -2536,33 +2515,26 @@ static const struct seq_operations cgroup_tasks_seq_operations = {
 	.show = cgroup_tasks_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup_pids *cp)
+static void release_cgroup_pid_array(struct cgroup *cgrp)
 {
-	struct cgroup *cgrp = cp->cgrp;
-
 	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cp->use_count);
-	if (!--cp->use_count) {
-		list_del(&cp->list);
-		put_pid_ns(cp->ns);
-		kfree(cp->tasks_pids);
-		kfree(cp);
+	BUG_ON(!cgrp->pids_use_count);
+	if (!--cgrp->pids_use_count) {
+		kfree(cgrp->tasks_pids);
+		cgrp->tasks_pids = NULL;
+		cgrp->pids_length = 0;
 	}
 	up_write(&cgrp->pids_mutex);
 }
 
 static int cgroup_tasks_release(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	struct cgroup_pids *cp;
+	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
 
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	seq = file->private_data;
-	cp = seq->private;
-
-	release_cgroup_pid_array(cp);
+	release_cgroup_pid_array(cgrp);
 	return seq_release(inode, file);
 }
 
@@ -2581,8 +2553,6 @@ static struct file_operations cgroup_tasks_operations = {
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct pid_namespace *ns = current->nsproxy->pid_ns;
-	struct cgroup_pids *cp;
 	pid_t *pidarray;
 	int npids;
 	int retval;
@@ -2609,37 +2579,20 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)
 	 * array if necessary
 	 */
 	down_write(&cgrp->pids_mutex);
-
-	list_for_each_entry(cp, &cgrp->pids_list, list) {
-		if (ns == cp->ns)
-			goto found;
-	}
-
-	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
-	if (!cp) {
-		up_write(&cgrp->pids_mutex);
-		kfree(pidarray);
-		return -ENOMEM;
-	}
-	cp->cgrp = cgrp;
-	cp->ns = ns;
-	get_pid_ns(ns);
-	list_add(&cp->list, &cgrp->pids_list);
-found:
-	kfree(cp->tasks_pids);
-	cp->tasks_pids = pidarray;
-	cp->length = npids;
-	cp->use_count++;
+	kfree(cgrp->tasks_pids);
+	cgrp->tasks_pids = pidarray;
+	cgrp->pids_length = npids;
+	cgrp->pids_use_count++;
 	up_write(&cgrp->pids_mutex);
 
 	file->f_op = &cgroup_tasks_operations;
 
 	retval = seq_open(file, &cgroup_tasks_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cp);
+		release_cgroup_pid_array(cgrp);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cp;
+	((struct seq_file *)file->private_data)->private = cgrp;
 	return 0;
 }
 
-- 
cgit v1.2.3


From 102a775e3647628727ae83a9a6abf0564c3ca7cb Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:26 -0700
Subject: cgroups: add a read-only "procs" file similar to "tasks" that shows
 only unique tgids

struct cgroup used to have a bunch of fields for keeping track of the
pidlist for the tasks file.  Those are now separated into a new struct
cgroup_pidlist, of which two are had, one for procs and one for tasks.
The way the seq_file operations are set up is changed so that just the
pidlist struct gets passed around as the private data.

Interface example: Suppose a multithreaded process has pid 1000 and other
threads with ids 1001, 1002, 1003:
$ cat tasks
1000
1001
1002
1003
$ cat cgroup.procs
1000
$

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  22 ++--
 kernel/cgroup.c        | 278 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 186 insertions(+), 114 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index c833d6f23672..2357733a0a80 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,6 +141,17 @@ enum {
 	CGRP_WAIT_ON_RMDIR,
 };
 
+struct cgroup_pidlist {
+	/* protects the other fields */
+	struct rw_semaphore mutex;
+	/* array of xids */
+	pid_t *list;
+	/* how many elements the above list has */
+	int length;
+	/* how many files are using the current array */
+	int use_count;
+};
+
 struct cgroup {
 	unsigned long flags;		/* "unsigned long" so bitops work */
 
@@ -179,14 +190,9 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* pids_mutex protects the fields below */
-	struct rw_semaphore pids_mutex;
-	/* Array of process ids in the cgroup */
-	pid_t *tasks_pids;
-	/* How many files are using the current tasks_pids array */
-	int pids_use_count;
-	/* Length of the current tasks_pids array */
-	int pids_length;
+	/* we will have two separate pidlists, one for pids (the tasks file)
+	 * and one for tgids (the procs file). */
+	struct cgroup_pidlist tasks, procs;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 22db0a7cf1fa..a9433f50e53d 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1121,7 +1121,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	init_rwsem(&cgrp->pids_mutex);
+	init_rwsem(&(cgrp->tasks.mutex));
+	init_rwsem(&(cgrp->procs.mutex));
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -1637,15 +1638,6 @@ static int cgroup_tasks_write(struct cgroup *cgrp, struct cftype *cft, u64 pid)
 	return ret;
 }
 
-/* The various types of files and directories in a cgroup file system */
-enum cgroup_filetype {
-	FILE_ROOT,
-	FILE_DIR,
-	FILE_TASKLIST,
-	FILE_NOTIFY_ON_RELEASE,
-	FILE_RELEASE_AGENT,
-};
-
 /**
  * cgroup_lock_live_group - take cgroup_mutex and check that cgrp is alive.
  * @cgrp: the cgroup to be checked for liveness
@@ -2343,7 +2335,7 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
 }
 
 /*
- * Stuff for reading the 'tasks' file.
+ * Stuff for reading the 'tasks'/'procs' files.
  *
  * Reading this file can return large amounts of data if a cgroup has
  * *lots* of attached tasks. So it may need several calls to read(),
@@ -2353,27 +2345,106 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan)
  */
 
 /*
- * Load into 'pidarray' up to 'npids' of the tasks using cgroup
- * 'cgrp'.  Return actual number of pids loaded.  No need to
- * task_lock(p) when reading out p->cgroup, since we're in an RCU
- * read section, so the css_set can't go away, and is
- * immutable after creation.
+ * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
+ * If the new stripped list is sufficiently smaller and there's enough memory
+ * to allocate a new buffer, will let go of the unneeded memory. Returns the
+ * number of unique elements.
  */
-static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp)
+/* is the size difference enough that we should re-allocate the array? */
+#define PIDLIST_REALLOC_DIFFERENCE(old, new) ((old) - PAGE_SIZE >= (new))
+static int pidlist_uniq(pid_t **p, int length)
 {
-	int n = 0, pid;
+	int src, dest = 1;
+	pid_t *list = *p;
+	pid_t *newlist;
+
+	/*
+	 * we presume the 0th element is unique, so i starts at 1. trivial
+	 * edge cases first; no work needs to be done for either
+	 */
+	if (length == 0 || length == 1)
+		return length;
+	/* src and dest walk down the list; dest counts unique elements */
+	for (src = 1; src < length; src++) {
+		/* find next unique element */
+		while (list[src] == list[src-1]) {
+			src++;
+			if (src == length)
+				goto after;
+		}
+		/* dest always points to where the next unique element goes */
+		list[dest] = list[src];
+		dest++;
+	}
+after:
+	/*
+	 * if the length difference is large enough, we want to allocate a
+	 * smaller buffer to save memory. if this fails due to out of memory,
+	 * we'll just stay with what we've got.
+	 */
+	if (PIDLIST_REALLOC_DIFFERENCE(length, dest)) {
+		newlist = krealloc(list, dest * sizeof(pid_t), GFP_KERNEL);
+		if (newlist)
+			*p = newlist;
+	}
+	return dest;
+}
+
+static int cmppid(const void *a, const void *b)
+{
+	return *(pid_t *)a - *(pid_t *)b;
+}
+
+/*
+ * Load a cgroup's pidarray with either procs' tgids or tasks' pids
+ */
+static int pidlist_array_load(struct cgroup *cgrp, bool procs)
+{
+	pid_t *array;
+	int length;
+	int pid, n = 0; /* used for populating the array */
 	struct cgroup_iter it;
 	struct task_struct *tsk;
+	struct cgroup_pidlist *l;
+
+	/*
+	 * If cgroup gets more users after we read count, we won't have
+	 * enough space - tough.  This race is indistinguishable to the
+	 * caller from the case that the additional cgroup users didn't
+	 * show up until sometime later on.
+	 */
+	length = cgroup_task_count(cgrp);
+	array = kmalloc(length * sizeof(pid_t), GFP_KERNEL);
+	if (!array)
+		return -ENOMEM;
+	/* now, populate the array */
 	cgroup_iter_start(cgrp, &it);
 	while ((tsk = cgroup_iter_next(cgrp, &it))) {
-		if (unlikely(n == npids))
+		if (unlikely(n == length))
 			break;
-		pid = task_pid_vnr(tsk);
-		if (pid > 0)
-			pidarray[n++] = pid;
+		/* get tgid or pid for procs or tasks file respectively */
+		pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk));
+		if (pid > 0) /* make sure to only use valid results */
+			array[n++] = pid;
 	}
 	cgroup_iter_end(cgrp, &it);
-	return n;
+	length = n;
+	/* now sort & (if procs) strip out duplicates */
+	sort(array, length, sizeof(pid_t), cmppid, NULL);
+	if (procs) {
+		length = pidlist_uniq(&array, length);
+		l = &(cgrp->procs);
+	} else {
+		l = &(cgrp->tasks);
+	}
+	/* store array in cgroup, freeing old if necessary */
+	down_write(&l->mutex);
+	kfree(l->list);
+	l->list = array;
+	l->length = length;
+	l->use_count++;
+	up_write(&l->mutex);
+	return 0;
 }
 
 /**
@@ -2430,19 +2501,14 @@ err:
 	return ret;
 }
 
-static int cmppid(const void *a, const void *b)
-{
-	return *(pid_t *)a - *(pid_t *)b;
-}
-
 
 /*
- * seq_file methods for the "tasks" file. The seq_file position is the
+ * seq_file methods for the tasks/procs files. The seq_file position is the
  * next pid to display; the seq_file iterator is a pointer to the pid
- * in the cgroup->tasks_pids array.
+ * in the cgroup->l->list array.
  */
 
-static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
+static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
 {
 	/*
 	 * Initially we receive a position value that corresponds to
@@ -2450,46 +2516,45 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)
 	 * after a seek to the start). Use a binary-search to find the
 	 * next pid to display, if any
 	 */
-	struct cgroup *cgrp = s->private;
+	struct cgroup_pidlist *l = s->private;
 	int index = 0, pid = *pos;
 	int *iter;
 
-	down_read(&cgrp->pids_mutex);
+	down_read(&l->mutex);
 	if (pid) {
-		int end = cgrp->pids_length;
+		int end = l->length;
 
 		while (index < end) {
 			int mid = (index + end) / 2;
-			if (cgrp->tasks_pids[mid] == pid) {
+			if (l->list[mid] == pid) {
 				index = mid;
 				break;
-			} else if (cgrp->tasks_pids[mid] <= pid)
+			} else if (l->list[mid] <= pid)
 				index = mid + 1;
 			else
 				end = mid;
 		}
 	}
 	/* If we're off the end of the array, we're done */
-	if (index >= cgrp->pids_length)
+	if (index >= l->length)
 		return NULL;
 	/* Update the abstract position to be the actual pid that we found */
-	iter = cgrp->tasks_pids + index;
+	iter = l->list + index;
 	*pos = *iter;
 	return iter;
 }
 
-static void cgroup_tasks_stop(struct seq_file *s, void *v)
+static void cgroup_pidlist_stop(struct seq_file *s, void *v)
 {
-	struct cgroup *cgrp = s->private;
-	up_read(&cgrp->pids_mutex);
+	struct cgroup_pidlist *l = s->private;
+	up_read(&l->mutex);
 }
 
-static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
+static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
 {
-	struct cgroup *cgrp = s->private;
-	int *p = v;
-	int *end = cgrp->tasks_pids + cgrp->pids_length;
-
+	struct cgroup_pidlist *l = s->private;
+	pid_t *p = v;
+	pid_t *end = l->list + l->length;
 	/*
 	 * Advance to the next pid in the array. If this goes off the
 	 * end, we're done
@@ -2503,98 +2568,94 @@ static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)
 	}
 }
 
-static int cgroup_tasks_show(struct seq_file *s, void *v)
+static int cgroup_pidlist_show(struct seq_file *s, void *v)
 {
 	return seq_printf(s, "%d\n", *(int *)v);
 }
 
-static const struct seq_operations cgroup_tasks_seq_operations = {
-	.start = cgroup_tasks_start,
-	.stop = cgroup_tasks_stop,
-	.next = cgroup_tasks_next,
-	.show = cgroup_tasks_show,
+/*
+ * seq_operations functions for iterating on pidlists through seq_file -
+ * independent of whether it's tasks or procs
+ */
+static const struct seq_operations cgroup_pidlist_seq_operations = {
+	.start = cgroup_pidlist_start,
+	.stop = cgroup_pidlist_stop,
+	.next = cgroup_pidlist_next,
+	.show = cgroup_pidlist_show,
 };
 
-static void release_cgroup_pid_array(struct cgroup *cgrp)
+static void cgroup_release_pid_array(struct cgroup_pidlist *l)
 {
-	down_write(&cgrp->pids_mutex);
-	BUG_ON(!cgrp->pids_use_count);
-	if (!--cgrp->pids_use_count) {
-		kfree(cgrp->tasks_pids);
-		cgrp->tasks_pids = NULL;
-		cgrp->pids_length = 0;
+	down_write(&l->mutex);
+	BUG_ON(!l->use_count);
+	if (!--l->use_count) {
+		kfree(l->list);
+		l->list = NULL;
+		l->length = 0;
 	}
-	up_write(&cgrp->pids_mutex);
+	up_write(&l->mutex);
 }
 
-static int cgroup_tasks_release(struct inode *inode, struct file *file)
+static int cgroup_pidlist_release(struct inode *inode, struct file *file)
 {
-	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-
+	struct cgroup_pidlist *l;
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
-
-	release_cgroup_pid_array(cgrp);
+	/*
+	 * the seq_file will only be initialized if the file was opened for
+	 * reading; hence we check if it's not null only in that case.
+	 */
+	l = ((struct seq_file *)file->private_data)->private;
+	cgroup_release_pid_array(l);
 	return seq_release(inode, file);
 }
 
-static struct file_operations cgroup_tasks_operations = {
+static const struct file_operations cgroup_pidlist_operations = {
 	.read = seq_read,
 	.llseek = seq_lseek,
 	.write = cgroup_file_write,
-	.release = cgroup_tasks_release,
+	.release = cgroup_pidlist_release,
 };
 
 /*
- * Handle an open on 'tasks' file.  Prepare an array containing the
- * process id's of tasks currently attached to the cgroup being opened.
+ * The following functions handle opens on a file that displays a pidlist
+ * (tasks or procs). Prepare an array of the process/thread IDs of whoever's
+ * in the cgroup.
  */
-
-static int cgroup_tasks_open(struct inode *unused, struct file *file)
+/* helper function for the two below it */
+static int cgroup_pidlist_open(struct file *file, bool procs)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	pid_t *pidarray;
-	int npids;
+	struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks);
 	int retval;
 
 	/* Nothing to do for write-only files */
 	if (!(file->f_mode & FMODE_READ))
 		return 0;
 
-	/*
-	 * If cgroup gets more users after we read count, we won't have
-	 * enough space - tough.  This race is indistinguishable to the
-	 * caller from the case that the additional cgroup users didn't
-	 * show up until sometime later on.
-	 */
-	npids = cgroup_task_count(cgrp);
-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
-	if (!pidarray)
-		return -ENOMEM;
-	npids = pid_array_load(pidarray, npids, cgrp);
-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
-
-	/*
-	 * Store the array in the cgroup, freeing the old
-	 * array if necessary
-	 */
-	down_write(&cgrp->pids_mutex);
-	kfree(cgrp->tasks_pids);
-	cgrp->tasks_pids = pidarray;
-	cgrp->pids_length = npids;
-	cgrp->pids_use_count++;
-	up_write(&cgrp->pids_mutex);
-
-	file->f_op = &cgroup_tasks_operations;
+	/* have the array populated */
+	retval = pidlist_array_load(cgrp, procs);
+	if (retval)
+		return retval;
+	/* configure file information */
+	file->f_op = &cgroup_pidlist_operations;
 
-	retval = seq_open(file, &cgroup_tasks_seq_operations);
+	retval = seq_open(file, &cgroup_pidlist_seq_operations);
 	if (retval) {
-		release_cgroup_pid_array(cgrp);
+		cgroup_release_pid_array(l);
 		return retval;
 	}
-	((struct seq_file *)file->private_data)->private = cgrp;
+	((struct seq_file *)file->private_data)->private = l;
 	return 0;
 }
+static int cgroup_tasks_open(struct inode *unused, struct file *file)
+{
+	return cgroup_pidlist_open(file, false);
+}
+static int cgroup_procs_open(struct inode *unused, struct file *file)
+{
+	return cgroup_pidlist_open(file, true);
+}
 
 static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
 					    struct cftype *cft)
@@ -2617,21 +2678,27 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp,
 /*
  * for the common functions, 'private' gives the type of file
  */
+/* for hysterical raisins, we can't put this on the older files */
+#define CGROUP_FILE_GENERIC_PREFIX "cgroup."
 static struct cftype files[] = {
 	{
 		.name = "tasks",
 		.open = cgroup_tasks_open,
 		.write_u64 = cgroup_tasks_write,
-		.release = cgroup_tasks_release,
-		.private = FILE_TASKLIST,
+		.release = cgroup_pidlist_release,
 		.mode = S_IRUGO | S_IWUSR,
 	},
-
+	{
+		.name = CGROUP_FILE_GENERIC_PREFIX "procs",
+		.open = cgroup_procs_open,
+		/* .write_u64 = cgroup_procs_write, TODO */
+		.release = cgroup_pidlist_release,
+		.mode = S_IRUGO,
+	},
 	{
 		.name = "notify_on_release",
 		.read_u64 = cgroup_read_notify_on_release,
 		.write_u64 = cgroup_write_notify_on_release,
-		.private = FILE_NOTIFY_ON_RELEASE,
 	},
 };
 
@@ -2640,7 +2707,6 @@ static struct cftype cft_release_agent = {
 	.read_seq_string = cgroup_release_agent_show,
 	.write_string = cgroup_release_agent_write,
 	.max_write_len = PATH_MAX,
-	.private = FILE_RELEASE_AGENT,
 };
 
 static int cgroup_populate_dir(struct cgroup *cgrp)
-- 
cgit v1.2.3


From 72a8cb30d10d4041c455a7054607a7d519167c87 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:27 -0700
Subject: cgroups: ensure correct concurrent opening/reading of pidlists across
 pid namespaces

Previously there was the problem in which two processes from different pid
namespaces reading the tasks or procs file could result in one process
seeing results from the other's namespace.  Rather than one pidlist for
each file in a cgroup, we now keep a list of pidlists keyed by namespace
and file type (tasks versus procs) in which entries are placed on demand.
Each pidlist has its own lock, and that the pidlists themselves are passed
around in the seq_file's private pointer means we don't have to touch the
cgroup or its master list except when creating and destroying entries.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |  34 +++++++++++++---
 kernel/cgroup.c        | 107 +++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 119 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 2357733a0a80..88e863460726 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -141,15 +141,36 @@ enum {
 	CGRP_WAIT_ON_RMDIR,
 };
 
+/* which pidlist file are we talking about? */
+enum cgroup_filetype {
+	CGROUP_FILE_PROCS,
+	CGROUP_FILE_TASKS,
+};
+
+/*
+ * A pidlist is a list of pids that virtually represents the contents of one
+ * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
+ * a pair (one each for procs, tasks) for each pid namespace that's relevant
+ * to the cgroup.
+ */
 struct cgroup_pidlist {
-	/* protects the other fields */
-	struct rw_semaphore mutex;
+	/*
+	 * used to find which pidlist is wanted. doesn't change as long as
+	 * this particular list stays in the list.
+	 */
+	struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
 	/* array of xids */
 	pid_t *list;
 	/* how many elements the above list has */
 	int length;
 	/* how many files are using the current array */
 	int use_count;
+	/* each of these stored in a list by its cgroup */
+	struct list_head links;
+	/* pointer to the cgroup we belong to, for list removal purposes */
+	struct cgroup *owner;
+	/* protects the other fields */
+	struct rw_semaphore mutex;
 };
 
 struct cgroup {
@@ -190,9 +211,12 @@ struct cgroup {
 	 */
 	struct list_head release_list;
 
-	/* we will have two separate pidlists, one for pids (the tasks file)
-	 * and one for tgids (the procs file). */
-	struct cgroup_pidlist tasks, procs;
+	/*
+	 * list of pidlists, up to two for each namespace (one for procs, one
+	 * for tasks); created on demand.
+	 */
+	struct list_head pidlists;
+	struct mutex pidlist_mutex;
 
 	/* For RCU-protected deletion */
 	struct rcu_head rcu_head;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a9433f50e53d..97194ba12014 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -776,6 +776,12 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
 		 */
 		deactivate_super(cgrp->root->sb);
 
+		/*
+		 * if we're getting rid of the cgroup, refcount should ensure
+		 * that there are no pidlists left.
+		 */
+		BUG_ON(!list_empty(&cgrp->pidlists));
+
 		call_rcu(&cgrp->rcu_head, free_cgroup_rcu);
 	}
 	iput(inode);
@@ -1121,8 +1127,8 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->children);
 	INIT_LIST_HEAD(&cgrp->css_sets);
 	INIT_LIST_HEAD(&cgrp->release_list);
-	init_rwsem(&(cgrp->tasks.mutex));
-	init_rwsem(&(cgrp->procs.mutex));
+	INIT_LIST_HEAD(&cgrp->pidlists);
+	mutex_init(&cgrp->pidlist_mutex);
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -2395,10 +2401,60 @@ static int cmppid(const void *a, const void *b)
 	return *(pid_t *)a - *(pid_t *)b;
 }
 
+/*
+ * find the appropriate pidlist for our purpose (given procs vs tasks)
+ * returns with the lock on that pidlist already held, and takes care
+ * of the use count, or returns NULL with no locks held if we're out of
+ * memory.
+ */
+static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
+						  enum cgroup_filetype type)
+{
+	struct cgroup_pidlist *l;
+	/* don't need task_nsproxy() if we're looking at ourself */
+	struct pid_namespace *ns = get_pid_ns(current->nsproxy->pid_ns);
+	/*
+	 * We can't drop the pidlist_mutex before taking the l->mutex in case
+	 * the last ref-holder is trying to remove l from the list at the same
+	 * time. Holding the pidlist_mutex precludes somebody taking whichever
+	 * list we find out from under us - compare release_pid_array().
+	 */
+	mutex_lock(&cgrp->pidlist_mutex);
+	list_for_each_entry(l, &cgrp->pidlists, links) {
+		if (l->key.type == type && l->key.ns == ns) {
+			/* found a matching list - drop the extra refcount */
+			put_pid_ns(ns);
+			/* make sure l doesn't vanish out from under us */
+			down_write(&l->mutex);
+			mutex_unlock(&cgrp->pidlist_mutex);
+			l->use_count++;
+			return l;
+		}
+	}
+	/* entry not found; create a new one */
+	l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
+	if (!l) {
+		mutex_unlock(&cgrp->pidlist_mutex);
+		put_pid_ns(ns);
+		return l;
+	}
+	init_rwsem(&l->mutex);
+	down_write(&l->mutex);
+	l->key.type = type;
+	l->key.ns = ns;
+	l->use_count = 0; /* don't increment here */
+	l->list = NULL;
+	l->owner = cgrp;
+	list_add(&l->links, &cgrp->pidlists);
+	mutex_unlock(&cgrp->pidlist_mutex);
+	return l;
+}
+
 /*
  * Load a cgroup's pidarray with either procs' tgids or tasks' pids
  */
-static int pidlist_array_load(struct cgroup *cgrp, bool procs)
+static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
+			      struct cgroup_pidlist **lp)
 {
 	pid_t *array;
 	int length;
@@ -2423,7 +2479,10 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
 		if (unlikely(n == length))
 			break;
 		/* get tgid or pid for procs or tasks file respectively */
-		pid = (procs ? task_tgid_vnr(tsk) : task_pid_vnr(tsk));
+		if (type == CGROUP_FILE_PROCS)
+			pid = task_tgid_vnr(tsk);
+		else
+			pid = task_pid_vnr(tsk);
 		if (pid > 0) /* make sure to only use valid results */
 			array[n++] = pid;
 	}
@@ -2431,19 +2490,20 @@ static int pidlist_array_load(struct cgroup *cgrp, bool procs)
 	length = n;
 	/* now sort & (if procs) strip out duplicates */
 	sort(array, length, sizeof(pid_t), cmppid, NULL);
-	if (procs) {
+	if (type == CGROUP_FILE_PROCS)
 		length = pidlist_uniq(&array, length);
-		l = &(cgrp->procs);
-	} else {
-		l = &(cgrp->tasks);
+	l = cgroup_pidlist_find(cgrp, type);
+	if (!l) {
+		kfree(array);
+		return -ENOMEM;
 	}
-	/* store array in cgroup, freeing old if necessary */
-	down_write(&l->mutex);
+	/* store array, freeing old if necessary - lock already held */
 	kfree(l->list);
 	l->list = array;
 	l->length = length;
 	l->use_count++;
 	up_write(&l->mutex);
+	*lp = l;
 	return 0;
 }
 
@@ -2586,13 +2646,26 @@ static const struct seq_operations cgroup_pidlist_seq_operations = {
 
 static void cgroup_release_pid_array(struct cgroup_pidlist *l)
 {
+	/*
+	 * the case where we're the last user of this particular pidlist will
+	 * have us remove it from the cgroup's list, which entails taking the
+	 * mutex. since in pidlist_find the pidlist->lock depends on cgroup->
+	 * pidlist_mutex, we have to take pidlist_mutex first.
+	 */
+	mutex_lock(&l->owner->pidlist_mutex);
 	down_write(&l->mutex);
 	BUG_ON(!l->use_count);
 	if (!--l->use_count) {
+		/* we're the last user if refcount is 0; remove and free */
+		list_del(&l->links);
+		mutex_unlock(&l->owner->pidlist_mutex);
 		kfree(l->list);
-		l->list = NULL;
-		l->length = 0;
+		put_pid_ns(l->key.ns);
+		up_write(&l->mutex);
+		kfree(l);
+		return;
 	}
+	mutex_unlock(&l->owner->pidlist_mutex);
 	up_write(&l->mutex);
 }
 
@@ -2623,10 +2696,10 @@ static const struct file_operations cgroup_pidlist_operations = {
  * in the cgroup.
  */
 /* helper function for the two below it */
-static int cgroup_pidlist_open(struct file *file, bool procs)
+static int cgroup_pidlist_open(struct file *file, enum cgroup_filetype type)
 {
 	struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);
-	struct cgroup_pidlist *l = (procs ? &cgrp->procs : &cgrp->tasks);
+	struct cgroup_pidlist *l;
 	int retval;
 
 	/* Nothing to do for write-only files */
@@ -2634,7 +2707,7 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
 		return 0;
 
 	/* have the array populated */
-	retval = pidlist_array_load(cgrp, procs);
+	retval = pidlist_array_load(cgrp, type, &l);
 	if (retval)
 		return retval;
 	/* configure file information */
@@ -2650,11 +2723,11 @@ static int cgroup_pidlist_open(struct file *file, bool procs)
 }
 static int cgroup_tasks_open(struct inode *unused, struct file *file)
 {
-	return cgroup_pidlist_open(file, false);
+	return cgroup_pidlist_open(file, CGROUP_FILE_TASKS);
 }
 static int cgroup_procs_open(struct inode *unused, struct file *file)
 {
-	return cgroup_pidlist_open(file, true);
+	return cgroup_pidlist_open(file, CGROUP_FILE_PROCS);
 }
 
 static u64 cgroup_read_notify_on_release(struct cgroup *cgrp,
-- 
cgit v1.2.3


From c378369d8b4fa516ff2b1e79c3eded4e0e955ebb Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:29 -0700
Subject: cgroups: change css_set freeing mechanism to be under RCU

Changes css_set freeing mechanism to be under RCU

This is a prepatch for making the procs file writable. In order to free the
old css_sets for each task to be moved as they're being moved, the freeing
mechanism must be RCU-protected, or else we would have to have a call to
synchronize_rcu() for each task before freeing its old css_set.

Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Cc: "Paul E. McKenney" <paulmck@us.ibm.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h | 3 +++
 kernel/cgroup.c        | 8 +++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 88e863460726..3ac78a2f4b5a 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -260,6 +260,9 @@ struct css_set {
 	 * during subsystem registration (at boot time).
 	 */
 	struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
+
+	/* For RCU-protected deletion */
+	struct rcu_head rcu_head;
 };
 
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3e356b05b2d5..bf8dd1a9f2d1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -267,6 +267,12 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	return &css_set_table[index];
 }
 
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+	kfree(cg);
+}
+
 /* We don't maintain the lists running through each css_set to its
  * task until after the first call to cgroup_iter_start(). This
  * reduces the fork()/exit() overhead for people who have cgroups
@@ -310,7 +316,7 @@ static void __put_css_set(struct css_set *cg, int taskexit)
 	}
 
 	write_unlock(&css_set_lock);
-	kfree(cg);
+	call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
-- 
cgit v1.2.3


From be367d09927023d081f9199665c8500f69f14d22 Mon Sep 17 00:00:00 2001
From: Ben Blum <bblum@google.com>
Date: Wed, 23 Sep 2009 15:56:31 -0700
Subject: cgroups: let ss->can_attach and ss->attach do whole threadgroups at a
 time

Alter the ss->can_attach and ss->attach functions to be able to deal with
a whole threadgroup at a time, for use in cgroup_attach_proc.  (This is a
pre-patch to cgroup-procs-writable.patch.)

Currently, new mode of the attach function can only tell the subsystem
about the old cgroup of the threadgroup leader.  No subsystem currently
needs that information for each thread that's being moved, but if one were
to be added (for example, one that counts tasks within a group) this bit
would need to be reworked a bit to tell the subsystem the right
information.

[hidave.darkstar@gmail.com: fix build]
Signed-off-by: Ben Blum <bblum@google.com>
Signed-off-by: Paul Menage <menage@google.com>
Acked-by: Li Zefan <lizf@cn.fujitsu.com>
Reviewed-by: Matt Helsley <matthltc@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dave Young <hidave.darkstar@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/cgroups.txt | 12 +++++--
 include/linux/cgroup.h            |  7 +++--
 kernel/cgroup.c                   |  4 +--
 kernel/cgroup_freezer.c           | 15 ++++++++-
 kernel/cpuset.c                   | 66 ++++++++++++++++++++++++++++++---------
 kernel/ns_cgroup.c                | 16 ++++++++--
 kernel/sched.c                    | 35 +++++++++++++++++++--
 mm/memcontrol.c                   |  3 +-
 security/device_cgroup.c          |  3 +-
 9 files changed, 131 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 4bccfc19196b..455d4e6d346d 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -521,7 +521,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
 called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	       struct task_struct *task)
+	       struct task_struct *task, bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
@@ -529,14 +529,20 @@ returns an error, this will abort the attach operation.  If a NULL
 task is passed, then a successful result indicates that *any*
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
-remain valid while the caller holds cgroup_mutex.
+remain valid while the caller holds cgroup_mutex. If threadgroup is
+true, then a successful result indicates that all threads in the given
+thread's threadgroup can be moved together.
 
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-	    struct cgroup *old_cgrp, struct task_struct *task)
+	    struct cgroup *old_cgrp, struct task_struct *task,
+	    bool threadgroup)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
+If threadgroup is true, the subsystem should take care of all threads
+in the specified thread's threadgroup. Currently does not support any
+subsystem that might need the old_cgrp for every thread in the group.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3ac78a2f4b5a..b62bb9294d0c 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -425,10 +425,11 @@ struct cgroup_subsys {
 						  struct cgroup *cgrp);
 	int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 	void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
-	int (*can_attach)(struct cgroup_subsys *ss,
-			  struct cgroup *cgrp, struct task_struct *tsk);
+	int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
+			  struct task_struct *tsk, bool threadgroup);
 	void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			struct cgroup *old_cgrp, struct task_struct *tsk);
+			struct cgroup *old_cgrp, struct task_struct *tsk,
+			bool threadgroup);
 	void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 	void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
 	int (*populate)(struct cgroup_subsys *ss,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index bf8dd1a9f2d1..7ccba4bc5e3b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1552,7 +1552,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	for_each_subsys(root, ss) {
 		if (ss->can_attach) {
-			retval = ss->can_attach(ss, cgrp, tsk);
+			retval = ss->can_attach(ss, cgrp, tsk, false);
 			if (retval)
 				return retval;
 		}
@@ -1590,7 +1590,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 
 	for_each_subsys(root, ss) {
 		if (ss->attach)
-			ss->attach(ss, cgrp, oldcgrp, tsk);
+			ss->attach(ss, cgrp, oldcgrp, tsk, false);
 	}
 	set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
 	synchronize_rcu();
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index fb249e2bcada..59e9ef6aab40 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -159,7 +159,7 @@ static bool is_task_frozen_enough(struct task_struct *task)
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
 			      struct cgroup *new_cgroup,
-			      struct task_struct *task)
+			      struct task_struct *task, bool threadgroup)
 {
 	struct freezer *freezer;
 
@@ -177,6 +177,19 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
 	if (freezer->state == CGROUP_FROZEN)
 		return -EBUSY;
 
+	if (threadgroup) {
+		struct task_struct *c;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (is_task_frozen_enough(c)) {
+				rcu_read_unlock();
+				return -EBUSY;
+			}
+		}
+		rcu_read_unlock();
+	}
+
 	return 0;
 }
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7e75a41bd508..b5cb469d2545 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1324,9 +1324,10 @@ static int fmeter_getrate(struct fmeter *fmp)
 static cpumask_var_t cpus_attach;
 
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
-static int cpuset_can_attach(struct cgroup_subsys *ss,
-			     struct cgroup *cont, struct task_struct *tsk)
+static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			     struct task_struct *tsk, bool threadgroup)
 {
+	int ret;
 	struct cpuset *cs = cgroup_cs(cont);
 
 	if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1343,18 +1344,51 @@ static int cpuset_can_attach(struct cgroup_subsys *ss,
 	if (tsk->flags & PF_THREAD_BOUND)
 		return -EINVAL;
 
-	return security_task_setscheduler(tsk, 0, NULL);
+	ret = security_task_setscheduler(tsk, 0, NULL);
+	if (ret)
+		return ret;
+	if (threadgroup) {
+		struct task_struct *c;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			ret = security_task_setscheduler(c, 0, NULL);
+			if (ret) {
+				rcu_read_unlock();
+				return ret;
+			}
+		}
+		rcu_read_unlock();
+	}
+	return 0;
+}
+
+static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
+			       struct cpuset *cs)
+{
+	int err;
+	/*
+	 * can_attach beforehand should guarantee that this doesn't fail.
+	 * TODO: have a better way to handle failure here
+	 */
+	err = set_cpus_allowed_ptr(tsk, cpus_attach);
+	WARN_ON_ONCE(err);
+
+	task_lock(tsk);
+	cpuset_change_task_nodemask(tsk, to);
+	task_unlock(tsk);
+	cpuset_update_task_spread_flag(cs, tsk);
+
 }
 
-static void cpuset_attach(struct cgroup_subsys *ss,
-			  struct cgroup *cont, struct cgroup *oldcont,
-			  struct task_struct *tsk)
+static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
+			  struct cgroup *oldcont, struct task_struct *tsk,
+			  bool threadgroup)
 {
 	nodemask_t from, to;
 	struct mm_struct *mm;
 	struct cpuset *cs = cgroup_cs(cont);
 	struct cpuset *oldcs = cgroup_cs(oldcont);
-	int err;
 
 	if (cs == &top_cpuset) {
 		cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1363,15 +1397,19 @@ static void cpuset_attach(struct cgroup_subsys *ss,
 		guarantee_online_cpus(cs, cpus_attach);
 		guarantee_online_mems(cs, &to);
 	}
-	err = set_cpus_allowed_ptr(tsk, cpus_attach);
-	if (err)
-		return;
 
-	task_lock(tsk);
-	cpuset_change_task_nodemask(tsk, &to);
-	task_unlock(tsk);
-	cpuset_update_task_spread_flag(cs, tsk);
+	/* do per-task migration stuff possibly for each in the threadgroup */
+	cpuset_attach_task(tsk, &to, cs);
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			cpuset_attach_task(c, &to, cs);
+		}
+		rcu_read_unlock();
+	}
 
+	/* change mm; only needs to be done once even if threadgroup */
 	from = oldcs->mems_allowed;
 	to = cs->mems_allowed;
 	mm = get_task_mm(tsk);
diff --git a/kernel/ns_cgroup.c b/kernel/ns_cgroup.c
index 5aa854f9e5ae..2a5dfec8efe0 100644
--- a/kernel/ns_cgroup.c
+++ b/kernel/ns_cgroup.c
@@ -42,8 +42,8 @@ int ns_cgroup_clone(struct task_struct *task, struct pid *pid)
  *       (hence either you are in the same cgroup as task, or in an
  *        ancestor cgroup thereof)
  */
-static int ns_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+static int ns_can_attach(struct cgroup_subsys *ss, struct cgroup *new_cgroup,
+			 struct task_struct *task, bool threadgroup)
 {
 	if (current != task) {
 		if (!capable(CAP_SYS_ADMIN))
@@ -56,6 +56,18 @@ static int ns_can_attach(struct cgroup_subsys *ss,
 	if (!cgroup_is_descendant(new_cgroup, task))
 		return -EPERM;
 
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
+			if (!cgroup_is_descendant(new_cgroup, c)) {
+				rcu_read_unlock();
+				return -EPERM;
+			}
+		}
+		rcu_read_unlock();
+	}
+
 	return 0;
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 2f76e06bea58..0d0361b9dbb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10377,8 +10377,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 
 static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-		      struct task_struct *tsk)
+cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
 	if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk))
@@ -10388,15 +10387,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
 	if (tsk->sched_class != &fair_sched_class)
 		return -EINVAL;
 #endif
+	return 0;
+}
 
+static int
+cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
+		      struct task_struct *tsk, bool threadgroup)
+{
+	int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
+	if (retval)
+		return retval;
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			retval = cpu_cgroup_can_attach_task(cgrp, c);
+			if (retval) {
+				rcu_read_unlock();
+				return retval;
+			}
+		}
+		rcu_read_unlock();
+	}
 	return 0;
 }
 
 static void
 cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-			struct cgroup *old_cont, struct task_struct *tsk)
+		  struct cgroup *old_cont, struct task_struct *tsk,
+		  bool threadgroup)
 {
 	sched_move_task(tsk);
+	if (threadgroup) {
+		struct task_struct *c;
+		rcu_read_lock();
+		list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
+			sched_move_task(c);
+		}
+		rcu_read_unlock();
+	}
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9b10d8753784..cf2e717f5c12 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2612,7 +2612,8 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
 				struct cgroup *cont,
 				struct cgroup *old_cont,
-				struct task_struct *p)
+				struct task_struct *p,
+				bool threadgroup)
 {
 	mutex_lock(&memcg_tasklist);
 	/*
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index b8186bac8b7e..6cf8fd2b79e8 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -61,7 +61,8 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-		struct cgroup *new_cgroup, struct task_struct *task)
+		struct cgroup *new_cgroup, struct task_struct *task,
+		bool threadgroup)
 {
 	if (current != task && !capable(CAP_SYS_ADMIN))
 			return -EPERM;
-- 
cgit v1.2.3


From 4b3bde4c983de36c59e6c1a24701f6fe816f9f55 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:32 -0700
Subject: memcg: remove the overhead associated with the root cgroup

Change the memory cgroup to remove the overhead associated with accounting
all pages in the root cgroup.  As a side-effect, we can no longer set a
memory hard limit in the root cgroup.

A new flag to track whether the page has been accounted or not has been
added as well.  Flags are now set atomically for page_cgroup,
pcg_default_flags is now obsolete and removed.

[akpm@linux-foundation.org: fix a few documentation glitches]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |  4 +++
 include/linux/page_cgroup.h      | 13 ++++++++++
 mm/memcontrol.c                  | 54 +++++++++++++++++++++++++++++-----------
 3 files changed, 57 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23d1262c0775..ab0a02172cf4 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -179,6 +179,9 @@ The reclaim algorithm has not been modified for cgroups, except that
 pages that are selected for reclaiming come from the per cgroup LRU
 list.
 
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
 2. Locking
 
 The memory controller uses the following hierarchy
@@ -210,6 +213,7 @@ We can alter the memory limit:
 NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
 mega or gigabytes.
 NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
 
 # cat /cgroups/0/memory.limit_in_bytes
 4194304
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index ada779f24178..4b938d4f3ac2 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -38,6 +38,7 @@ enum {
 	PCG_LOCK,  /* page cgroup is locked */
 	PCG_CACHE, /* charged as cache */
 	PCG_USED, /* this object is in use. */
+	PCG_ACCT_LRU, /* page has been accounted for */
 };
 
 #define TESTPCGFLAG(uname, lname)			\
@@ -52,11 +53,23 @@ static inline void SetPageCgroup##uname(struct page_cgroup *pc)\
 static inline void ClearPageCgroup##uname(struct page_cgroup *pc)	\
 	{ clear_bit(PCG_##lname, &pc->flags);  }
 
+#define TESTCLEARPCGFLAG(uname, lname)			\
+static inline int TestClearPageCgroup##uname(struct page_cgroup *pc)	\
+	{ return test_and_clear_bit(PCG_##lname, &pc->flags);  }
+
 /* Cache flag is set only once (at allocation) */
 TESTPCGFLAG(Cache, CACHE)
+CLEARPCGFLAG(Cache, CACHE)
+SETPCGFLAG(Cache, CACHE)
 
 TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
+SETPCGFLAG(Used, USED)
+
+SETPCGFLAG(AcctLRU, ACCT_LRU)
+CLEARPCGFLAG(AcctLRU, ACCT_LRU)
+TESTPCGFLAG(AcctLRU, ACCT_LRU)
+TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
 
 static inline int page_cgroup_nid(struct page_cgroup *pc)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cf2e717f5c12..b0757660663f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -43,6 +43,7 @@
 
 struct cgroup_subsys mem_cgroup_subsys __read_mostly;
 #define MEM_CGROUP_RECLAIM_RETRIES	5
+struct mem_cgroup *root_mem_cgroup __read_mostly;
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
@@ -200,13 +201,8 @@ enum charge_type {
 #define PCGF_CACHE	(1UL << PCG_CACHE)
 #define PCGF_USED	(1UL << PCG_USED)
 #define PCGF_LOCK	(1UL << PCG_LOCK)
-static const unsigned long
-pcg_default_flags[NR_CHARGE_TYPE] = {
-	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* File Cache */
-	PCGF_USED | PCGF_LOCK, /* Anon */
-	PCGF_CACHE | PCGF_USED | PCGF_LOCK, /* Shmem */
-	0, /* FORCE */
-};
+/* Not used, but added here for completeness */
+#define PCGF_ACCT	(1UL << PCG_ACCT)
 
 /* for encoding cft->private value on file */
 #define _MEM			(0)
@@ -354,6 +350,11 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
 	return ret;
 }
 
+static inline bool mem_cgroup_is_root(struct mem_cgroup *mem)
+{
+	return (mem == root_mem_cgroup);
+}
+
 /*
  * Following LRU functions are allowed to be used without PCG_LOCK.
  * Operations are called by routine of global LRU independently from memcg.
@@ -371,22 +372,24 @@ static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data,
 void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
 {
 	struct page_cgroup *pc;
-	struct mem_cgroup *mem;
 	struct mem_cgroup_per_zone *mz;
 
 	if (mem_cgroup_disabled())
 		return;
 	pc = lookup_page_cgroup(page);
 	/* can happen while we handle swapcache. */
-	if (list_empty(&pc->lru) || !pc->mem_cgroup)
+	if (!TestClearPageCgroupAcctLRU(pc))
 		return;
+	VM_BUG_ON(!pc->mem_cgroup);
 	/*
 	 * We don't check PCG_USED bit. It's cleared when the "page" is finally
 	 * removed from global LRU.
 	 */
 	mz = page_cgroup_zoneinfo(pc);
-	mem = pc->mem_cgroup;
 	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+	if (mem_cgroup_is_root(pc->mem_cgroup))
+		return;
+	VM_BUG_ON(list_empty(&pc->lru));
 	list_del_init(&pc->lru);
 	return;
 }
@@ -410,8 +413,8 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
 	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
 	 */
 	smp_rmb();
-	/* unused page is not rotated. */
-	if (!PageCgroupUsed(pc))
+	/* unused or root page is not rotated. */
+	if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
 		return;
 	mz = page_cgroup_zoneinfo(pc);
 	list_move(&pc->lru, &mz->lists[lru]);
@@ -425,6 +428,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 	if (mem_cgroup_disabled())
 		return;
 	pc = lookup_page_cgroup(page);
+	VM_BUG_ON(PageCgroupAcctLRU(pc));
 	/*
 	 * Used bit is set without atomic ops but after smp_wmb().
 	 * For making pc->mem_cgroup visible, insert smp_rmb() here.
@@ -435,6 +439,9 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
 
 	mz = page_cgroup_zoneinfo(pc);
 	MEM_CGROUP_ZSTAT(mz, lru) += 1;
+	SetPageCgroupAcctLRU(pc);
+	if (mem_cgroup_is_root(pc->mem_cgroup))
+		return;
 	list_add(&pc->lru, &mz->lists[lru]);
 }
 
@@ -469,7 +476,7 @@ static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page)
 
 	spin_lock_irqsave(&zone->lru_lock, flags);
 	/* link when the page is linked to LRU but page_cgroup isn't */
-	if (PageLRU(page) && list_empty(&pc->lru))
+	if (PageLRU(page) && !PageCgroupAcctLRU(pc))
 		mem_cgroup_add_lru_list(page, page_lru(page));
 	spin_unlock_irqrestore(&zone->lru_lock, flags);
 }
@@ -1125,9 +1132,22 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 		css_put(&mem->css);
 		return;
 	}
+
 	pc->mem_cgroup = mem;
 	smp_wmb();
-	pc->flags = pcg_default_flags[ctype];
+	switch (ctype) {
+	case MEM_CGROUP_CHARGE_TYPE_CACHE:
+	case MEM_CGROUP_CHARGE_TYPE_SHMEM:
+		SetPageCgroupCache(pc);
+		SetPageCgroupUsed(pc);
+		break;
+	case MEM_CGROUP_CHARGE_TYPE_MAPPED:
+		ClearPageCgroupCache(pc);
+		SetPageCgroupUsed(pc);
+		break;
+	default:
+		break;
+	}
 
 	mem_cgroup_charge_statistics(mem, pc, true);
 
@@ -2083,6 +2103,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 	name = MEMFILE_ATTR(cft->private);
 	switch (name) {
 	case RES_LIMIT:
+		if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
+			ret = -EINVAL;
+			break;
+		}
 		/* This function does all necessary parse...reuse it */
 		ret = res_counter_memparse_write_strategy(buffer, &val);
 		if (ret)
@@ -2549,6 +2573,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	if (cont->parent == NULL) {
 		enable_swap_cgroup();
 		parent = NULL;
+		root_mem_cgroup = mem;
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		mem->use_hierarchy = parent->use_hierarchy;
@@ -2577,6 +2602,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	return &mem->css;
 free_out:
 	__mem_cgroup_free(mem);
+	root_mem_cgroup = NULL;
 	return ERR_PTR(error);
 }
 
-- 
cgit v1.2.3


From 296c81d89f4f14269f7346f81442910158c0a83a Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:36 -0700
Subject: memory controller: soft limit interface

Add an interface to allow get/set of soft limits.  Soft limits for memory
plus swap controller (memsw) is currently not supported.  Resource
counters have been enhanced to support soft limits and new type
RES_SOFT_LIMIT has been added.  Unlike hard limits, soft limits can be
directly set and do not need any reclaim or checks before setting them to
a newer value.

Kamezawa-San raised a question as to whether soft limit should belong to
res_counter.  Since all resources understand the basic concepts of hard
and soft limits, it is justified to add soft limits here.  Soft limits are
a generic resource usage feature, even file system quotas support soft
limits.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h | 58 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/res_counter.c        |  3 +++
 mm/memcontrol.c             | 20 ++++++++++++++++
 3 files changed, 81 insertions(+)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 511f42fc6816..fcb9884df618 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -34,6 +34,10 @@ struct res_counter {
 	 * the limit that usage cannot exceed
 	 */
 	unsigned long long limit;
+	/*
+	 * the limit that usage can be exceed
+	 */
+	unsigned long long soft_limit;
 	/*
 	 * the number of unsuccessful attempts to consume the resource
 	 */
@@ -87,6 +91,7 @@ enum {
 	RES_MAX_USAGE,
 	RES_LIMIT,
 	RES_FAILCNT,
+	RES_SOFT_LIMIT,
 };
 
 /*
@@ -132,6 +137,36 @@ static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 	return false;
 }
 
+static inline bool res_counter_soft_limit_check_locked(struct res_counter *cnt)
+{
+	if (cnt->usage < cnt->soft_limit)
+		return true;
+
+	return false;
+}
+
+/**
+ * Get the difference between the usage and the soft limit
+ * @cnt: The counter
+ *
+ * Returns 0 if usage is less than or equal to soft limit
+ * The difference between usage and soft limit, otherwise.
+ */
+static inline unsigned long long
+res_counter_soft_limit_excess(struct res_counter *cnt)
+{
+	unsigned long long excess;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	if (cnt->usage <= cnt->soft_limit)
+		excess = 0;
+	else
+		excess = cnt->usage - cnt->soft_limit;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return excess;
+}
+
 /*
  * Helper function to detect if the cgroup is within it's limit or
  * not. It's currently called from cgroup_rss_prepare()
@@ -147,6 +182,17 @@ static inline bool res_counter_check_under_limit(struct res_counter *cnt)
 	return ret;
 }
 
+static inline bool res_counter_check_under_soft_limit(struct res_counter *cnt)
+{
+	bool ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	ret = res_counter_soft_limit_check_locked(cnt);
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return ret;
+}
+
 static inline void res_counter_reset_max(struct res_counter *cnt)
 {
 	unsigned long flags;
@@ -180,4 +226,16 @@ static inline int res_counter_set_limit(struct res_counter *cnt,
 	return ret;
 }
 
+static inline int
+res_counter_set_soft_limit(struct res_counter *cnt,
+				unsigned long long soft_limit)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&cnt->lock, flags);
+	cnt->soft_limit = soft_limit;
+	spin_unlock_irqrestore(&cnt->lock, flags);
+	return 0;
+}
+
 #endif
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index e1338f074314..bcdabf37c40b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -19,6 +19,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
 	spin_lock_init(&counter->lock);
 	counter->limit = RESOURCE_MAX;
+	counter->soft_limit = RESOURCE_MAX;
 	counter->parent = parent;
 }
 
@@ -101,6 +102,8 @@ res_counter_member(struct res_counter *counter, int member)
 		return &counter->limit;
 	case RES_FAILCNT:
 		return &counter->failcnt;
+	case RES_SOFT_LIMIT:
+		return &counter->soft_limit;
 	};
 
 	BUG();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index eb9571815f0c..4ad3e6be045d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2123,6 +2123,20 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
 		else
 			ret = mem_cgroup_resize_memsw_limit(memcg, val);
 		break;
+	case RES_SOFT_LIMIT:
+		ret = res_counter_memparse_write_strategy(buffer, &val);
+		if (ret)
+			break;
+		/*
+		 * For memsw, soft limits are hard to implement in terms
+		 * of semantics, for now, we support soft limits for
+		 * control without swap
+		 */
+		if (type == _MEM)
+			ret = res_counter_set_soft_limit(&memcg->res, val);
+		else
+			ret = -EINVAL;
+		break;
 	default:
 		ret = -EINVAL; /* should be BUG() ? */
 		break;
@@ -2375,6 +2389,12 @@ static struct cftype mem_cgroup_files[] = {
 		.write_string = mem_cgroup_write,
 		.read_u64 = mem_cgroup_read,
 	},
+	{
+		.name = "soft_limit_in_bytes",
+		.private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
+		.write_string = mem_cgroup_write,
+		.read_u64 = mem_cgroup_read,
+	},
 	{
 		.name = "failcnt",
 		.private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
-- 
cgit v1.2.3


From f64c3f54940d6929a2b6dcffaab942bd62be2e66 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:37 -0700
Subject: memory controller: soft limit organize cgroups

Organize cgroups over soft limit in a RB-Tree

Introduce an RB-Tree for storing memory cgroups that are over their soft
limit.  The overall goal is to

1. Add a memory cgroup to the RB-Tree when the soft limit is exceeded.
   We are careful about updates, updates take place only after a particular
   time interval has passed
2. We remove the node from the RB-Tree when the usage goes below the soft
   limit

The next set of patches will exploit the RB-Tree to get the group that is
over its soft limit by the largest amount and reclaim from it, when we
face memory contention.

[hugh.dickins@tiscali.co.uk: CONFIG_CGROUP_MEM_RES_CTLR=y CONFIG_PREEMPT=y fails to boot]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Jiri Slaby <jirislaby@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h |   6 +-
 kernel/res_counter.c        |  18 ++-
 mm/memcontrol.c             | 300 +++++++++++++++++++++++++++++++++++++-------
 3 files changed, 277 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index fcb9884df618..731af71cddc9 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -114,7 +114,8 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
+		unsigned long val, struct res_counter **limit_fail_at,
+		struct res_counter **soft_limit_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
@@ -127,7 +128,8 @@ int __must_check res_counter_charge(struct res_counter *counter,
  */
 
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+				bool *was_soft_limit_excess);
 
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index bcdabf37c40b..88faec23e833 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -37,17 +37,27 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
 }
 
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at)
+			struct res_counter **limit_fail_at,
+			struct res_counter **soft_limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
 	struct res_counter *c, *u;
 
 	*limit_fail_at = NULL;
+	if (soft_limit_fail_at)
+		*soft_limit_fail_at = NULL;
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
 		ret = res_counter_charge_locked(c, val);
+		/*
+		 * With soft limits, we return the highest ancestor
+		 * that exceeds its soft limit
+		 */
+		if (soft_limit_fail_at &&
+			!res_counter_soft_limit_check_locked(c))
+			*soft_limit_fail_at = c;
 		spin_unlock(&c->lock);
 		if (ret < 0) {
 			*limit_fail_at = c;
@@ -75,7 +85,8 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 	counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val,
+				bool *was_soft_limit_excess)
 {
 	unsigned long flags;
 	struct res_counter *c;
@@ -83,6 +94,9 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
+		if (was_soft_limit_excess)
+			*was_soft_limit_excess =
+				!res_counter_soft_limit_check_locked(c);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ad3e6be045d..0ed325943cd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/limits.h>
 #include <linux/mutex.h>
+#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/spinlock.h>
@@ -54,6 +55,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
 #endif
 
 static DEFINE_MUTEX(memcg_tasklist);	/* can be hold under cgroup_mutex */
+#define SOFTLIMIT_EVENTS_THRESH (1000)
 
 /*
  * Statistics for memory cgroup.
@@ -67,6 +69,7 @@ enum mem_cgroup_stat_index {
 	MEM_CGROUP_STAT_MAPPED_FILE,  /* # of pages charged as file rss */
 	MEM_CGROUP_STAT_PGPGIN_COUNT,	/* # of pages paged in */
 	MEM_CGROUP_STAT_PGPGOUT_COUNT,	/* # of pages paged out */
+	MEM_CGROUP_STAT_EVENTS,	/* sum of pagein + pageout for internal use */
 
 	MEM_CGROUP_STAT_NSTATS,
 };
@@ -79,6 +82,20 @@ struct mem_cgroup_stat {
 	struct mem_cgroup_stat_cpu cpustat[0];
 };
 
+static inline void
+__mem_cgroup_stat_reset_safe(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	stat->count[idx] = 0;
+}
+
+static inline s64
+__mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat,
+				enum mem_cgroup_stat_index idx)
+{
+	return stat->count[idx];
+}
+
 /*
  * For accounting under irq disable, no need for increment preempt count.
  */
@@ -118,6 +135,10 @@ struct mem_cgroup_per_zone {
 	unsigned long		count[NR_LRU_LISTS];
 
 	struct zone_reclaim_stat reclaim_stat;
+	struct rb_node		tree_node;	/* RB tree node */
+	unsigned long long	usage_in_excess;/* Set to the value by which */
+						/* the soft limit is exceeded*/
+	bool			on_tree;
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -130,6 +151,26 @@ struct mem_cgroup_lru_info {
 	struct mem_cgroup_per_node *nodeinfo[MAX_NUMNODES];
 };
 
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_zone {
+	struct rb_root rb_root;
+	spinlock_t lock;
+};
+
+struct mem_cgroup_tree_per_node {
+	struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
+};
+
+struct mem_cgroup_tree {
+	struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
 /*
  * The memory controller data structure. The memory controller controls both
  * page cache and RSS per cgroup. We would eventually like to provide
@@ -215,6 +256,150 @@ static void mem_cgroup_get(struct mem_cgroup *mem);
 static void mem_cgroup_put(struct mem_cgroup *mem);
 static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *mem);
 
+static struct mem_cgroup_per_zone *
+mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
+{
+	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
+}
+
+static struct mem_cgroup_per_zone *
+page_cgroup_zoneinfo(struct page_cgroup *pc)
+{
+	struct mem_cgroup *mem = pc->mem_cgroup;
+	int nid = page_cgroup_nid(pc);
+	int zid = page_cgroup_zid(pc);
+
+	if (!mem)
+		return NULL;
+
+	return mem_cgroup_zoneinfo(mem, nid, zid);
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_node_zone(int nid, int zid)
+{
+	return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static struct mem_cgroup_tree_per_zone *
+soft_limit_tree_from_page(struct page *page)
+{
+	int nid = page_to_nid(page);
+	int zid = page_zonenum(page);
+
+	return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct rb_node **p = &mctz->rb_root.rb_node;
+	struct rb_node *parent = NULL;
+	struct mem_cgroup_per_zone *mz_node;
+
+	if (mz->on_tree)
+		return;
+
+	mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+	spin_lock(&mctz->lock);
+	while (*p) {
+		parent = *p;
+		mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
+					tree_node);
+		if (mz->usage_in_excess < mz_node->usage_in_excess)
+			p = &(*p)->rb_left;
+		/*
+		 * We can't avoid mem cgroups that are over their soft
+		 * limit by the same amount
+		 */
+		else if (mz->usage_in_excess >= mz_node->usage_in_excess)
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&mz->tree_node, parent, p);
+	rb_insert_color(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = true;
+	spin_unlock(&mctz->lock);
+}
+
+static void
+mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	spin_lock(&mctz->lock);
+	rb_erase(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = false;
+	spin_unlock(&mctz->lock);
+}
+
+static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
+{
+	bool ret = false;
+	int cpu;
+	s64 val;
+	struct mem_cgroup_stat_cpu *cpustat;
+
+	cpu = get_cpu();
+	cpustat = &mem->stat.cpustat[cpu];
+	val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_EVENTS);
+	if (unlikely(val > SOFTLIMIT_EVENTS_THRESH)) {
+		__mem_cgroup_stat_reset_safe(cpustat, MEM_CGROUP_STAT_EVENTS);
+		ret = true;
+	}
+	put_cpu();
+	return ret;
+}
+
+static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
+{
+	unsigned long long prev_usage_in_excess, new_usage_in_excess;
+	bool updated_tree = false;
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+	mctz = soft_limit_tree_from_page(page);
+
+	/*
+	 * We do updates in lazy mode, mem's are removed
+	 * lazily from the per-zone, per-node rb tree
+	 */
+	prev_usage_in_excess = mz->usage_in_excess;
+
+	new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
+	if (prev_usage_in_excess) {
+		mem_cgroup_remove_exceeded(mem, mz, mctz);
+		updated_tree = true;
+	}
+	if (!new_usage_in_excess)
+		goto done;
+	mem_cgroup_insert_exceeded(mem, mz, mctz);
+
+done:
+	if (updated_tree) {
+		spin_lock(&mctz->lock);
+		mz->usage_in_excess = new_usage_in_excess;
+		spin_unlock(&mctz->lock);
+	}
+}
+
+static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
+{
+	int node, zone;
+	struct mem_cgroup_per_zone *mz;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	for_each_node_state(node, N_POSSIBLE) {
+		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+			mz = mem_cgroup_zoneinfo(mem, node, zone);
+			mctz = soft_limit_tree_node_zone(node, zone);
+			mem_cgroup_remove_exceeded(mem, mz, mctz);
+		}
+	}
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -236,28 +421,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 	else
 		__mem_cgroup_stat_add_safe(cpustat,
 				MEM_CGROUP_STAT_PGPGOUT_COUNT, 1);
+	__mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_EVENTS, 1);
 	put_cpu();
 }
 
-static struct mem_cgroup_per_zone *
-mem_cgroup_zoneinfo(struct mem_cgroup *mem, int nid, int zid)
-{
-	return &mem->info.nodeinfo[nid]->zoneinfo[zid];
-}
-
-static struct mem_cgroup_per_zone *
-page_cgroup_zoneinfo(struct page_cgroup *pc)
-{
-	struct mem_cgroup *mem = pc->mem_cgroup;
-	int nid = page_cgroup_nid(pc);
-	int zid = page_cgroup_zid(pc);
-
-	if (!mem)
-		return NULL;
-
-	return mem_cgroup_zoneinfo(mem, nid, zid);
-}
-
 static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem,
 					enum lru_list idx)
 {
@@ -972,11 +1139,11 @@ done:
  */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			gfp_t gfp_mask, struct mem_cgroup **memcg,
-			bool oom)
+			bool oom, struct page *page)
 {
-	struct mem_cgroup *mem, *mem_over_limit;
+	struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct res_counter *fail_res;
+	struct res_counter *fail_res, *soft_fail_res = NULL;
 
 	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
 		/* Don't account this! */
@@ -1006,16 +1173,17 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		int ret;
 		bool noswap = false;
 
-		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
+		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
+						&soft_fail_res);
 		if (likely(!ret)) {
 			if (!do_swap_account)
 				break;
 			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-							&fail_res);
+							&fail_res, NULL);
 			if (likely(!ret))
 				break;
 			/* mem+swap counter fails */
-			res_counter_uncharge(&mem->res, PAGE_SIZE);
+			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 			noswap = true;
 			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
 									memsw);
@@ -1053,13 +1221,23 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			goto nomem;
 		}
 	}
+	/*
+	 * Insert just the ancestor, we should trickle down to the correct
+	 * cgroup for reclaim, since the other nodes will be below their
+	 * soft limit
+	 */
+	if (soft_fail_res) {
+		mem_over_soft_limit =
+			mem_cgroup_from_res_counter(soft_fail_res, res);
+		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
+			mem_cgroup_update_tree(mem_over_soft_limit, page);
+	}
 	return 0;
 nomem:
 	css_put(&mem->css);
 	return -ENOMEM;
 }
 
-
 /*
  * A helper function to get mem_cgroup from ID. must be called under
  * rcu_read_lock(). The caller must check css_is_removed() or some if
@@ -1126,9 +1304,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	lock_page_cgroup(pc);
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
-		res_counter_uncharge(&mem->res, PAGE_SIZE);
+		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 		css_put(&mem->css);
 		return;
 	}
@@ -1205,7 +1383,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	if (pc->mem_cgroup != from)
 		goto out;
 
-	res_counter_uncharge(&from->res, PAGE_SIZE);
+	res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1225,7 +1403,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	}
 
 	if (do_swap_account)
-		res_counter_uncharge(&from->memsw, PAGE_SIZE);
+		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
 	css_put(&from->css);
 
 	css_get(&to->css);
@@ -1265,7 +1443,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
 	parent = mem_cgroup_from_cont(pcg);
 
 
-	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false);
+	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, page);
 	if (ret || !parent)
 		return ret;
 
@@ -1295,9 +1473,9 @@ uncharge:
 	/* drop extra refcnt by try_charge() */
 	css_put(&parent->css);
 	/* uncharge if move fails */
-	res_counter_uncharge(&parent->res, PAGE_SIZE);
+	res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
 	if (do_swap_account)
-		res_counter_uncharge(&parent->memsw, PAGE_SIZE);
+		res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
 	return ret;
 }
 
@@ -1322,7 +1500,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
 	prefetchw(pc);
 
 	mem = memcg;
-	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true);
+	ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page);
 	if (ret || !mem)
 		return ret;
 
@@ -1441,14 +1619,14 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
 	if (!mem)
 		goto charge_cur_mm;
 	*ptr = mem;
-	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true);
+	ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, page);
 	/* drop extra refcnt from tryget */
 	css_put(&mem->css);
 	return ret;
 charge_cur_mm:
 	if (unlikely(!mm))
 		mm = &init_mm;
-	return __mem_cgroup_try_charge(mm, mask, ptr, true);
+	return __mem_cgroup_try_charge(mm, mask, ptr, true, page);
 }
 
 static void
@@ -1486,7 +1664,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * This recorded memcg can be obsolete one. So, avoid
 			 * calling css_tryget
 			 */
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
 			mem_cgroup_put(memcg);
 		}
 		rcu_read_unlock();
@@ -1511,9 +1689,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 		return;
 	if (!mem)
 		return;
-	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
 	if (do_swap_account)
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 	css_put(&mem->css);
 }
 
@@ -1527,6 +1705,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
 	struct mem_cgroup_per_zone *mz;
+	bool soft_limit_excess = false;
 
 	if (mem_cgroup_disabled())
 		return NULL;
@@ -1565,9 +1744,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 		break;
 	}
 
-	res_counter_uncharge(&mem->res, PAGE_SIZE);
+	res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
 	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-		res_counter_uncharge(&mem->memsw, PAGE_SIZE);
+		res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
 	mem_cgroup_charge_statistics(mem, pc, false);
 
 	ClearPageCgroupUsed(pc);
@@ -1581,6 +1760,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	mz = page_cgroup_zoneinfo(pc);
 	unlock_page_cgroup(pc);
 
+	if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+		mem_cgroup_update_tree(mem, page);
 	/* at swapout, this memcg will be accessed to record to swap */
 	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		css_put(&mem->css);
@@ -1656,7 +1837,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * We uncharge this because swap is freed.
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
-		res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+		res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
 		mem_cgroup_put(memcg);
 	}
 	rcu_read_unlock();
@@ -1685,7 +1866,8 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
 	unlock_page_cgroup(pc);
 
 	if (mem) {
-		ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+		ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false,
+						page);
 		css_put(&mem->css);
 	}
 	*ptr = mem;
@@ -2194,6 +2376,7 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
 			res_counter_reset_failcnt(&mem->memsw);
 		break;
 	}
+
 	return 0;
 }
 
@@ -2489,6 +2672,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 		mz = &pn->zoneinfo[zone];
 		for_each_lru(l)
 			INIT_LIST_HEAD(&mz->lists[l]);
+		mz->usage_in_excess = 0;
 	}
 	return 0;
 }
@@ -2534,6 +2718,7 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
 {
 	int node;
 
+	mem_cgroup_remove_from_trees(mem);
 	free_css_id(&mem_cgroup_subsys, &mem->css);
 
 	for_each_node_state(node, N_POSSIBLE)
@@ -2582,6 +2767,31 @@ static void __init enable_swap_cgroup(void)
 }
 #endif
 
+static int mem_cgroup_soft_limit_tree_init(void)
+{
+	struct mem_cgroup_tree_per_node *rtpn;
+	struct mem_cgroup_tree_per_zone *rtpz;
+	int tmp, node, zone;
+
+	for_each_node_state(node, N_POSSIBLE) {
+		tmp = node;
+		if (!node_state(node, N_NORMAL_MEMORY))
+			tmp = -1;
+		rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
+		if (!rtpn)
+			return 1;
+
+		soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+		for (zone = 0; zone < MAX_NR_ZONES; zone++) {
+			rtpz = &rtpn->rb_tree_per_zone[zone];
+			rtpz->rb_root = RB_ROOT;
+			spin_lock_init(&rtpz->lock);
+		}
+	}
+	return 0;
+}
+
 static struct cgroup_subsys_state * __ref
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
@@ -2596,11 +2806,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 	for_each_node_state(node, N_POSSIBLE)
 		if (alloc_mem_cgroup_per_zone_info(mem, node))
 			goto free_out;
+
 	/* root ? */
 	if (cont->parent == NULL) {
 		enable_swap_cgroup();
 		parent = NULL;
 		root_mem_cgroup = mem;
+		if (mem_cgroup_soft_limit_tree_init())
+			goto free_out;
+
 	} else {
 		parent = mem_cgroup_from_cont(cont->parent);
 		mem->use_hierarchy = parent->use_hierarchy;
-- 
cgit v1.2.3


From 4e41695356fb4e0b153be1440ad027e46e0a7ea2 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2009 15:56:39 -0700
Subject: memory controller: soft limit reclaim on contention

Implement reclaim from groups over their soft limit

Permit reclaim from memory cgroups on contention (via the direct reclaim
path).

memory cgroup soft limit reclaim finds the group that exceeds its soft
limit by the largest number of pages and reclaims pages from it and then
reinserts the cgroup into its correct place in the rbtree.

Add additional checks to mem_cgroup_hierarchical_reclaim() to detect long
loops in case all swap is turned off.  The code has been refactored and
the loop check (loop < 2) has been enhanced for soft limits.  For soft
limits, we try to do more targetted reclaim.  Instead of bailing out after
two loops, the routine now reclaims memory proportional to the size by
which the soft limit is exceeded.  The proportion has been empirically
determined.

[akpm@linux-foundation.org: build fix]
[kamezawa.hiroyu@jp.fujitsu.com: fix softlimit css refcnt handling]
[nishimura@mxp.nes.nec.co.jp: refcount of the "victim" should be decremented before exiting the loop]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  10 ++
 include/linux/swap.h       |   5 +
 mm/memcontrol.c            | 226 ++++++++++++++++++++++++++++++++++++++++++---
 mm/vmscan.c                |  45 ++++++++-
 4 files changed, 271 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e46a0734ab6e..bf9213b2db8f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -118,6 +118,9 @@ static inline bool mem_cgroup_disabled(void)
 
 extern bool mem_cgroup_oom_called(struct task_struct *task);
 void mem_cgroup_update_mapped_file_stat(struct page *page, int val);
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+						gfp_t gfp_mask, int nid,
+						int zid);
 #else /* CONFIG_CGROUP_MEM_RES_CTLR */
 struct mem_cgroup;
 
@@ -276,6 +279,13 @@ static inline void mem_cgroup_update_mapped_file_stat(struct page *page,
 {
 }
 
+static inline
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+					    gfp_t gfp_mask, int nid, int zid)
+{
+	return 0;
+}
+
 #endif /* CONFIG_CGROUP_MEM_CONT */
 
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6c990e658f4e..4c78fea989b9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -217,6 +217,11 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
 						  gfp_t gfp_mask, bool noswap,
 						  unsigned int swappiness);
+extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+						gfp_t gfp_mask, bool noswap,
+						unsigned int swappiness,
+						struct zone *zone,
+						int nid);
 extern int __isolate_lru_page(struct page *page, int mode, int file);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90f0b13e1c3c..011aba6cad70 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -139,6 +139,8 @@ struct mem_cgroup_per_zone {
 	unsigned long long	usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
+	struct mem_cgroup	*mem;		/* Back pointer, we cannot */
+						/* use container_of	   */
 };
 /* Macro for accessing counter */
 #define MEM_CGROUP_ZSTAT(mz, idx)	((mz)->count[(idx)])
@@ -228,6 +230,13 @@ struct mem_cgroup {
 	struct mem_cgroup_stat stat;
 };
 
+/*
+ * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define	MEM_CGROUP_MAX_RECLAIM_LOOPS		(100)
+#define	MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS	(2)
+
 enum charge_type {
 	MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
 	MEM_CGROUP_CHARGE_TYPE_MAPPED,
@@ -259,6 +268,8 @@ enum charge_type {
 #define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
 #define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
 #define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+#define MEM_CGROUP_RECLAIM_SOFT_BIT	0x2
+#define MEM_CGROUP_RECLAIM_SOFT		(1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
 
 static void mem_cgroup_get(struct mem_cgroup *mem);
 static void mem_cgroup_put(struct mem_cgroup *mem);
@@ -299,7 +310,7 @@ soft_limit_tree_from_page(struct page *page)
 }
 
 static void
-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+__mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
 				struct mem_cgroup_tree_per_zone *mctz)
 {
@@ -311,7 +322,6 @@ mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 		return;
 
 	mz->usage_in_excess = res_counter_soft_limit_excess(&mem->res);
-	spin_lock(&mctz->lock);
 	while (*p) {
 		parent = *p;
 		mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
@@ -328,6 +338,26 @@ mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
 	rb_link_node(&mz->tree_node, parent, p);
 	rb_insert_color(&mz->tree_node, &mctz->rb_root);
 	mz->on_tree = true;
+}
+
+static void
+__mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	if (!mz->on_tree)
+		return;
+	rb_erase(&mz->tree_node, &mctz->rb_root);
+	mz->on_tree = false;
+}
+
+static void
+mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
+				struct mem_cgroup_per_zone *mz,
+				struct mem_cgroup_tree_per_zone *mctz)
+{
+	spin_lock(&mctz->lock);
+	__mem_cgroup_insert_exceeded(mem, mz, mctz);
 	spin_unlock(&mctz->lock);
 }
 
@@ -337,8 +367,7 @@ mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_tree_per_zone *mctz)
 {
 	spin_lock(&mctz->lock);
-	rb_erase(&mz->tree_node, &mctz->rb_root);
-	mz->on_tree = false;
+	__mem_cgroup_remove_exceeded(mem, mz, mctz);
 	spin_unlock(&mctz->lock);
 }
 
@@ -408,6 +437,47 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem)
 	}
 }
 
+static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem)
+{
+	return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT;
+}
+
+static struct mem_cgroup_per_zone *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct rb_node *rightmost = NULL;
+	struct mem_cgroup_per_zone *mz = NULL;
+
+retry:
+	rightmost = rb_last(&mctz->rb_root);
+	if (!rightmost)
+		goto done;		/* Nothing to reclaim from */
+
+	mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
+	/*
+	 * Remove the node now but someone else can add it back,
+	 * we will to add it back at the end of reclaim to its correct
+	 * position in the tree.
+	 */
+	__mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+	if (!res_counter_soft_limit_excess(&mz->mem->res) ||
+		!css_tryget(&mz->mem->css))
+		goto retry;
+done:
+	return mz;
+}
+
+static struct mem_cgroup_per_zone *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
+{
+	struct mem_cgroup_per_zone *mz;
+
+	spin_lock(&mctz->lock);
+	mz = __mem_cgroup_largest_soft_limit_node(mctz);
+	spin_unlock(&mctz->lock);
+	return mz;
+}
+
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
 					 struct page_cgroup *pc,
 					 bool charge)
@@ -1037,6 +1107,7 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem)
  * If shrink==true, for avoiding to free too much, this returns immedieately.
  */
 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
+						struct zone *zone,
 						gfp_t gfp_mask,
 						unsigned long reclaim_options)
 {
@@ -1045,23 +1116,53 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 	int loop = 0;
 	bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
 	bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
+	bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
+	unsigned long excess = mem_cgroup_get_excess(root_mem);
 
 	/* If memsw_is_minimum==1, swap-out is of-no-use. */
 	if (root_mem->memsw_is_minimum)
 		noswap = true;
 
-	while (loop < 2) {
+	while (1) {
 		victim = mem_cgroup_select_victim(root_mem);
-		if (victim == root_mem)
+		if (victim == root_mem) {
 			loop++;
+			if (loop >= 2) {
+				/*
+				 * If we have not been able to reclaim
+				 * anything, it might because there are
+				 * no reclaimable pages under this hierarchy
+				 */
+				if (!check_soft || !total) {
+					css_put(&victim->css);
+					break;
+				}
+				/*
+				 * We want to do more targetted reclaim.
+				 * excess >> 2 is not to excessive so as to
+				 * reclaim too much, nor too less that we keep
+				 * coming back to reclaim from this cgroup
+				 */
+				if (total >= (excess >> 2) ||
+					(loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) {
+					css_put(&victim->css);
+					break;
+				}
+			}
+		}
 		if (!mem_cgroup_local_usage(&victim->stat)) {
 			/* this cgroup's local usage == 0 */
 			css_put(&victim->css);
 			continue;
 		}
 		/* we use swappiness of local cgroup */
-		ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
-						   get_swappiness(victim));
+		if (check_soft)
+			ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
+				noswap, get_swappiness(victim), zone,
+				zone->zone_pgdat->node_id);
+		else
+			ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
+						noswap, get_swappiness(victim));
 		css_put(&victim->css);
 		/*
 		 * At shrinking usage, we can't check we should stop here or
@@ -1071,7 +1172,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
 		if (shrink)
 			return ret;
 		total += ret;
-		if (mem_cgroup_check_under_limit(root_mem))
+		if (check_soft) {
+			if (res_counter_check_under_soft_limit(&root_mem->res))
+				return total;
+		} else if (mem_cgroup_check_under_limit(root_mem))
 			return 1 + total;
 	}
 	return total;
@@ -1206,8 +1310,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		if (!(gfp_mask & __GFP_WAIT))
 			goto nomem;
 
-		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask,
-							flags);
+		ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
+						gfp_mask, flags);
 		if (ret)
 			continue;
 
@@ -2018,8 +2122,9 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		progress = mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
-						   MEM_CGROUP_RECLAIM_SHRINK);
+		progress = mem_cgroup_hierarchical_reclaim(memcg, NULL,
+						GFP_KERNEL,
+						MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
   		if (curusage >= oldusage)
@@ -2071,7 +2176,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_hierarchical_reclaim(memcg, GFP_KERNEL,
+		mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
 						MEM_CGROUP_RECLAIM_NOSWAP |
 						MEM_CGROUP_RECLAIM_SHRINK);
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
@@ -2084,6 +2189,97 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 	return ret;
 }
 
+unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
+						gfp_t gfp_mask, int nid,
+						int zid)
+{
+	unsigned long nr_reclaimed = 0;
+	struct mem_cgroup_per_zone *mz, *next_mz = NULL;
+	unsigned long reclaimed;
+	int loop = 0;
+	struct mem_cgroup_tree_per_zone *mctz;
+
+	if (order > 0)
+		return 0;
+
+	mctz = soft_limit_tree_node_zone(nid, zid);
+	/*
+	 * This loop can run a while, specially if mem_cgroup's continuously
+	 * keep exceeding their soft limit and putting the system under
+	 * pressure
+	 */
+	do {
+		if (next_mz)
+			mz = next_mz;
+		else
+			mz = mem_cgroup_largest_soft_limit_node(mctz);
+		if (!mz)
+			break;
+
+		reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
+						gfp_mask,
+						MEM_CGROUP_RECLAIM_SOFT);
+		nr_reclaimed += reclaimed;
+		spin_lock(&mctz->lock);
+
+		/*
+		 * If we failed to reclaim anything from this memory cgroup
+		 * it is time to move on to the next cgroup
+		 */
+		next_mz = NULL;
+		if (!reclaimed) {
+			do {
+				/*
+				 * Loop until we find yet another one.
+				 *
+				 * By the time we get the soft_limit lock
+				 * again, someone might have aded the
+				 * group back on the RB tree. Iterate to
+				 * make sure we get a different mem.
+				 * mem_cgroup_largest_soft_limit_node returns
+				 * NULL if no other cgroup is present on
+				 * the tree
+				 */
+				next_mz =
+				__mem_cgroup_largest_soft_limit_node(mctz);
+				if (next_mz == mz) {
+					css_put(&next_mz->mem->css);
+					next_mz = NULL;
+				} else /* next_mz == NULL or other memcg */
+					break;
+			} while (1);
+		}
+		mz->usage_in_excess =
+			res_counter_soft_limit_excess(&mz->mem->res);
+		__mem_cgroup_remove_exceeded(mz->mem, mz, mctz);
+		/*
+		 * One school of thought says that we should not add
+		 * back the node to the tree if reclaim returns 0.
+		 * But our reclaim could return 0, simply because due
+		 * to priority we are exposing a smaller subset of
+		 * memory to reclaim from. Consider this as a longer
+		 * term TODO.
+		 */
+		if (mz->usage_in_excess)
+			__mem_cgroup_insert_exceeded(mz->mem, mz, mctz);
+		spin_unlock(&mctz->lock);
+		css_put(&mz->mem->css);
+		loop++;
+		/*
+		 * Could not reclaim anything and there are no more
+		 * mem cgroups to try or we seem to be looping without
+		 * reclaiming anything.
+		 */
+		if (!nr_reclaimed &&
+			(next_mz == NULL ||
+			loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+			break;
+	} while (!nr_reclaimed);
+	if (next_mz)
+		css_put(&next_mz->mem->css);
+	return nr_reclaimed;
+}
+
 /*
  * This routine traverse page_cgroup in given list and drop them all.
  * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
@@ -2686,6 +2882,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
 		for_each_lru(l)
 			INIT_LIST_HEAD(&mz->lists[l]);
 		mz->usage_in_excess = 0;
+		mz->on_tree = false;
+		mz->mem = mem;
 	}
 	return 0;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 613e89f471d9..2423782214ab 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1836,11 +1836,45 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
+unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
+						gfp_t gfp_mask, bool noswap,
+						unsigned int swappiness,
+						struct zone *zone, int nid)
+{
+	struct scan_control sc = {
+		.may_writepage = !laptop_mode,
+		.may_unmap = 1,
+		.may_swap = !noswap,
+		.swap_cluster_max = SWAP_CLUSTER_MAX,
+		.swappiness = swappiness,
+		.order = 0,
+		.mem_cgroup = mem,
+		.isolate_pages = mem_cgroup_isolate_pages,
+	};
+	nodemask_t nm  = nodemask_of_node(nid);
+
+	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
+			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
+	sc.nodemask = &nm;
+	sc.nr_reclaimed = 0;
+	sc.nr_scanned = 0;
+	/*
+	 * NOTE: Although we can get the priority field, using it
+	 * here is not a good idea, since it limits the pages we can scan.
+	 * if we don't reclaim here, the shrink_zone from balance_pgdat
+	 * will pick up pages from other mem cgroup's as well. We hack
+	 * the priority and make it zero.
+	 */
+	shrink_zone(0, zone, &sc);
+	return sc.nr_reclaimed;
+}
+
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 					   gfp_t gfp_mask,
 					   bool noswap,
 					   unsigned int swappiness)
 {
+	struct zonelist *zonelist;
 	struct scan_control sc = {
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
@@ -1852,7 +1886,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 		.isolate_pages = mem_cgroup_isolate_pages,
 		.nodemask = NULL, /* we don't care the placement */
 	};
-	struct zonelist *zonelist;
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -1974,6 +2007,7 @@ loop_again:
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
+			int nid, zid;
 
 			if (!populated_zone(zone))
 				continue;
@@ -1988,6 +2022,15 @@ loop_again:
 			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
 			note_zone_scanning_priority(zone, priority);
+
+			nid = pgdat->node_id;
+			zid = zone_idx(zone);
+			/*
+			 * Call soft limit reclaim before calling shrink_zone.
+			 * For now we ignore the return value
+			 */
+			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
+							nid, zid);
 			/*
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
-- 
cgit v1.2.3


From a7f0765edfd53aed09cb7b0e15863688b39447de Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:56:44 -0700
Subject: ptrace: __ptrace_detach: do __wake_up_parent() if we reap the tracee

The bug is old, it wasn't cause by recent changes.

Test case:

	static void *tfunc(void *arg)
	{
		int pid = (long)arg;

		assert(ptrace(PTRACE_ATTACH, pid, NULL, NULL) == 0);
		kill(pid, SIGKILL);

		sleep(1);
		return NULL;
	}

	int main(void)
	{
		pthread_t th;
		long pid = fork();

		if (!pid)
			pause();

		signal(SIGCHLD, SIG_IGN);
		assert(pthread_create(&th, NULL, tfunc, (void*)pid) == 0);

		int r = waitpid(-1, NULL, __WNOTHREAD);
		printf("waitpid: %d %m\n", r);

		return 0;
	}

Before the patch this program hangs, after this patch waitpid() correctly
fails with errno == -ECHILD.

The problem is, __ptrace_detach() reaps the EXIT_ZOMBIE tracee if its
->real_parent is our sub-thread and we ignore SIGCHLD.  But in this case
we should wake up other threads which can sleep in do_wait().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Vitaly Mayatskikh <vmayatsk@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/exit.c         |  5 +++++
 kernel/ptrace.c       | 11 +++++++----
 kernel/signal.c       |  9 ---------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 848d1f20086e..9e5a88afe6be 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2059,6 +2059,7 @@ extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
 extern int kill_proc_info(int, struct siginfo *, pid_t);
 extern int do_notify_parent(struct task_struct *, int);
+extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent);
 extern void force_sig(int, struct task_struct *);
 extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
diff --git a/kernel/exit.c b/kernel/exit.c
index 60d6fdcc9265..782b2e1f7ca2 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1575,6 +1575,11 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk)
 	return 0;
 }
 
+void __wake_up_parent(struct task_struct *p, struct task_struct *parent)
+{
+	wake_up_interruptible_sync(&parent->signal->wait_chldexit);
+}
+
 static long do_wait(struct wait_opts *wo)
 {
 	DECLARE_WAITQUEUE(wait, current);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 307c285af59e..23bd09cd042e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -266,9 +266,10 @@ static int ignoring_children(struct sighand_struct *sigh)
  * or self-reaping.  Do notification now if it would have happened earlier.
  * If it should reap itself, return true.
  *
- * If it's our own child, there is no notification to do.
- * But if our normal children self-reap, then this child
- * was prevented by ptrace and we must reap it now.
+ * If it's our own child, there is no notification to do. But if our normal
+ * children self-reap, then this child was prevented by ptrace and we must
+ * reap it now, in that case we must also wake up sub-threads sleeping in
+ * do_wait().
  */
 static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 {
@@ -278,8 +279,10 @@ static bool __ptrace_detach(struct task_struct *tracer, struct task_struct *p)
 		if (!task_detached(p) && thread_group_empty(p)) {
 			if (!same_thread_group(p->real_parent, tracer))
 				do_notify_parent(p, p->exit_signal);
-			else if (ignoring_children(tracer->sighand))
+			else if (ignoring_children(tracer->sighand)) {
+				__wake_up_parent(p, tracer);
 				p->exit_signal = -1;
+			}
 		}
 		if (task_detached(p)) {
 			/* Mark it as in the process of being reaped. */
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5d..534ea81cde47 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1382,15 +1382,6 @@ ret:
 	return ret;
 }
 
-/*
- * Wake up any threads in the parent blocked in wait* syscalls.
- */
-static inline void __wake_up_parent(struct task_struct *p,
-				    struct task_struct *parent)
-{
-	wake_up_interruptible_sync(&parent->signal->wait_chldexit);
-}
-
 /*
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
-- 
cgit v1.2.3


From ae6d2ed7bb3877ff35b9569402025f40ea2e1803 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 23 Sep 2009 15:56:53 -0700
Subject: signals: tracehook_notify_jctl change

This changes tracehook_notify_jctl() so it's called with the siglock held,
and changes its argument and return value definition.  These clean-ups
make it a better fit for what new tracing hooks need to check.

Tracing needs the siglock here, held from the time TASK_STOPPED was set,
to avoid potential SIGCONT races if it wants to allow any blocking in its
tracing hooks.

This also folds the finish_stop() function into its caller
do_signal_stop().  The function is short, called only once and only
unconditionally.  It aids readability to fold it in.

[oleg@redhat.com: do not call tracehook_notify_jctl() in TASK_STOPPED state]
[oleg@redhat.com: introduce tracehook_finish_jctl() helper]
Signed-off-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tracehook.h | 34 ++++++++++++-----
 kernel/signal.c           | 97 +++++++++++++++++++++++------------------------
 2 files changed, 72 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 17ba82efa483..1eb44a924e56 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -1,7 +1,7 @@
 /*
  * Tracing hooks
  *
- * Copyright (C) 2008 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc.  All rights reserved.
  *
  * This copyrighted material is made available to anyone wishing to use,
  * modify, copy, or redistribute it subject to the terms and conditions
@@ -463,22 +463,38 @@ static inline int tracehook_get_signal(struct task_struct *task,
 
 /**
  * tracehook_notify_jctl - report about job control stop/continue
- * @notify:		nonzero if this is the last thread in the group to stop
+ * @notify:		zero, %CLD_STOPPED or %CLD_CONTINUED
  * @why:		%CLD_STOPPED or %CLD_CONTINUED
  *
  * This is called when we might call do_notify_parent_cldstop().
- * It's called when about to stop for job control; we are already in
- * %TASK_STOPPED state, about to call schedule().  It's also called when
- * a delayed %CLD_STOPPED or %CLD_CONTINUED report is ready to be made.
  *
- * Return nonzero to generate a %SIGCHLD with @why, which is
- * normal if @notify is nonzero.
+ * @notify is zero if we would not ordinarily send a %SIGCHLD,
+ * or is the %CLD_STOPPED or %CLD_CONTINUED .si_code for %SIGCHLD.
  *
- * Called with no locks held.
+ * @why is %CLD_STOPPED when about to stop for job control;
+ * we are already in %TASK_STOPPED state, about to call schedule().
+ * It might also be that we have just exited (check %PF_EXITING),
+ * but need to report that a group-wide stop is complete.
+ *
+ * @why is %CLD_CONTINUED when waking up after job control stop and
+ * ready to make a delayed @notify report.
+ *
+ * Return the %CLD_* value for %SIGCHLD, or zero to generate no signal.
+ *
+ * Called with the siglock held.
  */
 static inline int tracehook_notify_jctl(int notify, int why)
 {
-	return notify || (current->ptrace & PT_PTRACED);
+	return notify ?: (current->ptrace & PT_PTRACED) ? why : 0;
+}
+
+/**
+ * tracehook_finish_jctl - report about return from job control stop
+ *
+ * This is called by do_signal_stop() after wakeup.
+ */
+static inline void tracehook_finish_jctl(void)
+{
 }
 
 #define DEATH_REAP			-1
diff --git a/kernel/signal.c b/kernel/signal.c
index 534ea81cde47..5d3b3f8f219b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -705,7 +705,7 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns)
 
 		if (why) {
 			/*
-			 * The first thread which returns from finish_stop()
+			 * The first thread which returns from do_signal_stop()
 			 * will take ->siglock, notice SIGNAL_CLD_MASK, and
 			 * notify its parent. See get_signal_to_deliver().
 			 */
@@ -1664,29 +1664,6 @@ void ptrace_notify(int exit_code)
 	spin_unlock_irq(&current->sighand->siglock);
 }
 
-static void
-finish_stop(int stop_count)
-{
-	/*
-	 * If there are no other threads in the group, or if there is
-	 * a group stop in progress and we are the last to stop,
-	 * report to the parent.  When ptraced, every thread reports itself.
-	 */
-	if (tracehook_notify_jctl(stop_count == 0, CLD_STOPPED)) {
-		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(current, CLD_STOPPED);
-		read_unlock(&tasklist_lock);
-	}
-
-	do {
-		schedule();
-	} while (try_to_freeze());
-	/*
-	 * Now we don't run again until continued.
-	 */
-	current->exit_code = 0;
-}
-
 /*
  * This performs the stopping for SIGSTOP and other stop signals.
  * We have to stop all threads in the thread group.
@@ -1696,15 +1673,9 @@ finish_stop(int stop_count)
 static int do_signal_stop(int signr)
 {
 	struct signal_struct *sig = current->signal;
-	int stop_count;
+	int notify;
 
-	if (sig->group_stop_count > 0) {
-		/*
-		 * There is a group stop in progress.  We don't need to
-		 * start another one.
-		 */
-		stop_count = --sig->group_stop_count;
-	} else {
+	if (!sig->group_stop_count) {
 		struct task_struct *t;
 
 		if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) ||
@@ -1716,7 +1687,7 @@ static int do_signal_stop(int signr)
 		 */
 		sig->group_exit_code = signr;
 
-		stop_count = 0;
+		sig->group_stop_count = 1;
 		for (t = next_thread(current); t != current; t = next_thread(t))
 			/*
 			 * Setting state to TASK_STOPPED for a group
@@ -1725,19 +1696,44 @@ static int do_signal_stop(int signr)
 			 */
 			if (!(t->flags & PF_EXITING) &&
 			    !task_is_stopped_or_traced(t)) {
-				stop_count++;
+				sig->group_stop_count++;
 				signal_wake_up(t, 0);
 			}
-		sig->group_stop_count = stop_count;
 	}
+	/*
+	 * If there are no other threads in the group, or if there is
+	 * a group stop in progress and we are the last to stop, report
+	 * to the parent.  When ptraced, every thread reports itself.
+	 */
+	notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0;
+	notify = tracehook_notify_jctl(notify, CLD_STOPPED);
+	/*
+	 * tracehook_notify_jctl() can drop and reacquire siglock, so
+	 * we keep ->group_stop_count != 0 before the call. If SIGCONT
+	 * or SIGKILL comes in between ->group_stop_count == 0.
+	 */
+	if (sig->group_stop_count) {
+		if (!--sig->group_stop_count)
+			sig->flags = SIGNAL_STOP_STOPPED;
+		current->exit_code = sig->group_exit_code;
+		__set_current_state(TASK_STOPPED);
+	}
+	spin_unlock_irq(&current->sighand->siglock);
 
-	if (stop_count == 0)
-		sig->flags = SIGNAL_STOP_STOPPED;
-	current->exit_code = sig->group_exit_code;
-	__set_current_state(TASK_STOPPED);
+	if (notify) {
+		read_lock(&tasklist_lock);
+		do_notify_parent_cldstop(current, notify);
+		read_unlock(&tasklist_lock);
+	}
+
+	/* Now we don't run again until woken by SIGCONT or SIGKILL */
+	do {
+		schedule();
+	} while (try_to_freeze());
+
+	tracehook_finish_jctl();
+	current->exit_code = 0;
 
-	spin_unlock_irq(&current->sighand->siglock);
-	finish_stop(stop_count);
 	return 1;
 }
 
@@ -1806,14 +1802,15 @@ relock:
 		int why = (signal->flags & SIGNAL_STOP_CONTINUED)
 				? CLD_CONTINUED : CLD_STOPPED;
 		signal->flags &= ~SIGNAL_CLD_MASK;
-		spin_unlock_irq(&sighand->siglock);
 
-		if (unlikely(!tracehook_notify_jctl(1, why)))
-			goto relock;
+		why = tracehook_notify_jctl(why, CLD_CONTINUED);
+		spin_unlock_irq(&sighand->siglock);
 
-		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(current->group_leader, why);
-		read_unlock(&tasklist_lock);
+		if (why) {
+			read_lock(&tasklist_lock);
+			do_notify_parent_cldstop(current->group_leader, why);
+			read_unlock(&tasklist_lock);
+		}
 		goto relock;
 	}
 
@@ -1978,14 +1975,14 @@ void exit_signals(struct task_struct *tsk)
 	if (unlikely(tsk->signal->group_stop_count) &&
 			!--tsk->signal->group_stop_count) {
 		tsk->signal->flags = SIGNAL_STOP_STOPPED;
-		group_stop = 1;
+		group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED);
 	}
 out:
 	spin_unlock_irq(&tsk->sighand->siglock);
 
-	if (unlikely(group_stop) && tracehook_notify_jctl(1, CLD_STOPPED)) {
+	if (unlikely(group_stop)) {
 		read_lock(&tasklist_lock);
-		do_notify_parent_cldstop(tsk, CLD_STOPPED);
+		do_notify_parent_cldstop(tsk, group_stop);
 		read_unlock(&tasklist_lock);
 	}
 }
-- 
cgit v1.2.3


From 964ee7df90d799e38fb1556c57cd5c45fc736436 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:56:59 -0700
Subject: exec: fix set_binfmt() vs sys_delete_module() race

sys_delete_module() can set MODULE_STATE_GOING after
search_binary_handler() does try_module_get().  In this case
set_binfmt()->try_module_get() fails but since none of the callers
check the returned error, the task will run with the wrong old
->binfmt.

The proper fix should change all ->load_binary() methods, but we can
rely on fact that the caller must hold a reference to binfmt->module
and use __module_get() which never fails.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c               | 14 +++++---------
 include/linux/binfmts.h |  2 +-
 2 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 8efbdc606a1e..6dc92c39dd94 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1395,18 +1395,14 @@ out_ret:
 	return retval;
 }
 
-int set_binfmt(struct linux_binfmt *new)
+void set_binfmt(struct linux_binfmt *new)
 {
-	struct linux_binfmt *old = current->binfmt;
+	if (current->binfmt)
+		module_put(current->binfmt->module);
 
-	if (new) {
-		if (!try_module_get(new->module))
-			return -1;
-	}
 	current->binfmt = new;
-	if (old)
-		module_put(old->module);
-	return 0;
+	if (new)
+		__module_get(new->module);
 }
 
 EXPORT_SYMBOL(set_binfmt);
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 2046b5b8af48..aece486ac734 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -120,7 +120,7 @@ extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
 extern int prepare_bprm_creds(struct linux_binprm *bprm);
 extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
-extern int set_binfmt(struct linux_binfmt *new);
+extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 4a30debfb778240a4b1767d4b0c5a5b25ab97160 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 23 Sep 2009 15:57:00 -0700
Subject: signals: introduce do_send_sig_info() helper

Introduce do_send_sig_info() and convert group_send_sig_info(),
send_sig_info(), do_send_specific() to use this helper.

Hopefully it will have more users soon, it allows to specify
specific/group behaviour via "bool group" argument.

Shaves 80 bytes from .text.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h |  2 ++
 kernel/signal.c        | 56 ++++++++++++++++++++++++--------------------------
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index c7552836bd95..ab9272cc270c 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -233,6 +233,8 @@ static inline int valid_signal(unsigned long sig)
 }
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
+extern int do_send_sig_info(int sig, struct siginfo *info,
+				struct task_struct *p, bool group);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
 extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
diff --git a/kernel/signal.c b/kernel/signal.c
index 5d3b3f8f219b..c6d7a24a86a1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -971,6 +971,20 @@ specific_send_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 	return send_signal(sig, info, t, 0);
 }
 
+int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+			bool group)
+{
+	unsigned long flags;
+	int ret = -ESRCH;
+
+	if (lock_task_sighand(p, &flags)) {
+		ret = send_signal(sig, info, p, group);
+		unlock_task_sighand(p, &flags);
+	}
+
+	return ret;
+}
+
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
@@ -1068,18 +1082,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long
  */
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-	unsigned long flags;
-	int ret;
-
-	ret = check_kill_permission(sig, info, p);
+	int ret = check_kill_permission(sig, info, p);
 
-	if (!ret && sig) {
-		ret = -ESRCH;
-		if (lock_task_sighand(p, &flags)) {
-			ret = __group_send_sig_info(sig, info, p);
-			unlock_task_sighand(p, &flags);
-		}
-	}
+	if (!ret && sig)
+		ret = do_send_sig_info(sig, info, p, true);
 
 	return ret;
 }
@@ -1224,15 +1230,9 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
  * These are for backward compatibility with the rest of the kernel source.
  */
 
-/*
- * The caller must ensure the task can't exit.
- */
 int
 send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
-	int ret;
-	unsigned long flags;
-
 	/*
 	 * Make sure legacy kernel users don't send in bad values
 	 * (normal paths check this in check_kill_permission).
@@ -1240,10 +1240,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	if (!valid_signal(sig))
 		return -EINVAL;
 
-	spin_lock_irqsave(&p->sighand->siglock, flags);
-	ret = specific_send_sig_info(sig, info, p);
-	spin_unlock_irqrestore(&p->sighand->siglock, flags);
-	return ret;
+	return do_send_sig_info(sig, info, p, false);
 }
 
 #define __si_special(priv) \
@@ -2278,7 +2275,6 @@ static int
 do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
 	struct task_struct *p;
-	unsigned long flags;
 	int error = -ESRCH;
 
 	rcu_read_lock();
@@ -2288,14 +2284,16 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 		/*
 		 * The null signal is a permissions and process existence
 		 * probe.  No signal is actually delivered.
-		 *
-		 * If lock_task_sighand() fails we pretend the task dies
-		 * after receiving the signal. The window is tiny, and the
-		 * signal is private anyway.
 		 */
-		if (!error && sig && lock_task_sighand(p, &flags)) {
-			error = specific_send_sig_info(sig, info, p);
-			unlock_task_sighand(p, &flags);
+		if (!error && sig) {
+			error = do_send_sig_info(sig, info, p, false);
+			/*
+			 * If lock_task_sighand() failed we pretend the task
+			 * dies after receiving the signal. The window is tiny,
+			 * and the signal is private anyway.
+			 */
+			if (unlikely(error == -ESRCH))
+				error = 0;
 		}
 	}
 	rcu_read_unlock();
-- 
cgit v1.2.3


From ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 23 Sep 2009 15:57:03 -0700
Subject: fcntl: add F_[SG]ETOWN_EX

In order to direct the SIGIO signal to a particular thread of a
multi-threaded application we cannot, like suggested by the manpage, put a
TID into the regular fcntl(F_SETOWN) call.  It will still be send to the
whole process of which that thread is part.

Since people do want to properly direct SIGIO we introduce F_SETOWN_EX.

The need to direct SIGIO comes from self-monitoring profiling such as with
perf-counters.  Perf-counters uses SIGIO to notify that new sample data is
available.  If the signal is delivered to the same task that generated the
new sample it can augment that data by inspecting the task's user-space
state right after it returns from the kernel.  This is esp.  convenient
for interpreted or virtual machine driven environments.

Both F_SETOWN_EX and F_GETOWN_EX take a pointer to a struct f_owner_ex
as argument:

struct f_owner_ex {
	int   type;
	pid_t pid;
};

Where type is one of F_OWNER_TID, F_OWNER_PID or F_OWNER_GID.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Tested-by: stephane eranian <eranian@googlemail.com>
Cc: Michael Kerrisk <mtk.manpages@googlemail.com>
Cc: Roland McGrath <roland@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/include/asm/fcntl.h  |   2 +
 arch/parisc/include/asm/fcntl.h |   2 +
 fs/fcntl.c                      | 108 +++++++++++++++++++++++++++++++++++++---
 include/asm-generic/fcntl.h     |  13 +++++
 4 files changed, 117 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/include/asm/fcntl.h b/arch/alpha/include/asm/fcntl.h
index 25da0017ec87..e42823e954aa 100644
--- a/arch/alpha/include/asm/fcntl.h
+++ b/arch/alpha/include/asm/fcntl.h
@@ -26,6 +26,8 @@
 #define F_GETOWN	6	/*  for sockets. */
 #define F_SETSIG	10	/*  for sockets. */
 #define F_GETSIG	11	/*  for sockets. */
+#define F_SETOWN_EX	12
+#define F_GETOWN_EX	13
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK		1
diff --git a/arch/parisc/include/asm/fcntl.h b/arch/parisc/include/asm/fcntl.h
index 1e1c824764ee..5f39d5597ced 100644
--- a/arch/parisc/include/asm/fcntl.h
+++ b/arch/parisc/include/asm/fcntl.h
@@ -28,6 +28,8 @@
 #define F_SETOWN	12	/*  for sockets. */
 #define F_SETSIG	13	/*  for sockets. */
 #define F_GETSIG	14	/*  for sockets. */
+#define F_GETOWN_EX	15
+#define F_SETOWN_EX	16
 
 /* for posix fcntl() and lockf() */
 #define F_RDLCK		01
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 50a988993262..fc089f2f7f56 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
 	return pid;
 }
 
+static int f_setown_ex(struct file *filp, unsigned long arg)
+{
+	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex owner;
+	struct pid *pid;
+	int type;
+	int ret;
+
+	ret = copy_from_user(&owner, owner_p, sizeof(owner));
+	if (ret)
+		return ret;
+
+	switch (owner.type) {
+	case F_OWNER_TID:
+		type = PIDTYPE_MAX;
+		break;
+
+	case F_OWNER_PID:
+		type = PIDTYPE_PID;
+		break;
+
+	case F_OWNER_GID:
+		type = PIDTYPE_PGID;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	rcu_read_lock();
+	pid = find_vpid(owner.pid);
+	if (owner.pid && !pid)
+		ret = -ESRCH;
+	else
+		ret = __f_setown(filp, pid, type, 1);
+	rcu_read_unlock();
+
+	return ret;
+}
+
+static int f_getown_ex(struct file *filp, unsigned long arg)
+{
+	struct f_owner_ex * __user owner_p = (void * __user)arg;
+	struct f_owner_ex owner;
+	int ret = 0;
+
+	read_lock(&filp->f_owner.lock);
+	owner.pid = pid_vnr(filp->f_owner.pid);
+	switch (filp->f_owner.pid_type) {
+	case PIDTYPE_MAX:
+		owner.type = F_OWNER_TID;
+		break;
+
+	case PIDTYPE_PID:
+		owner.type = F_OWNER_PID;
+		break;
+
+	case PIDTYPE_PGID:
+		owner.type = F_OWNER_GID;
+		break;
+
+	default:
+		WARN_ON(1);
+		ret = -EINVAL;
+		break;
+	}
+	read_unlock(&filp->f_owner.lock);
+
+	if (!ret)
+		ret = copy_to_user(owner_p, &owner, sizeof(owner));
+	return ret;
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	case F_SETOWN:
 		err = f_setown(filp, arg, 1);
 		break;
+	case F_GETOWN_EX:
+		err = f_getown_ex(filp, arg);
+		break;
+	case F_SETOWN_EX:
+		err = f_setown_ex(filp, arg);
+		break;
 	case F_GETSIG:
 		err = filp->f_owner.signum;
 		break;
@@ -428,8 +507,7 @@ static inline int sigio_perm(struct task_struct *p,
 
 static void send_sigio_to_task(struct task_struct *p,
 			       struct fown_struct *fown,
-			       int fd,
-			       int reason)
+			       int fd, int reason, int group)
 {
 	/*
 	 * F_SETSIG can change ->signum lockless in parallel, make
@@ -461,11 +539,11 @@ static void send_sigio_to_task(struct task_struct *p,
 			else
 				si.si_band = band_table[reason - POLL_IN];
 			si.si_fd    = fd;
-			if (!do_send_sig_info(signum, &si, p, true))
+			if (!do_send_sig_info(signum, &si, p, group))
 				break;
 		/* fall-through: fall back on the old plain SIGIO signal */
 		case 0:
-			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, true);
+			do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
 	}
 }
 
@@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
 	struct task_struct *p;
 	enum pid_type type;
 	struct pid *pid;
+	int group = 1;
 	
 	read_lock(&fown->lock);
+
 	type = fown->pid_type;
+	if (type == PIDTYPE_MAX) {
+		group = 0;
+		type = PIDTYPE_PID;
+	}
+
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
 	
 	read_lock(&tasklist_lock);
 	do_each_pid_task(pid, type, p) {
-		send_sigio_to_task(p, fown, fd, band);
+		send_sigio_to_task(p, fown, fd, band, group);
 	} while_each_pid_task(pid, type, p);
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
@@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown, int fd, int band)
 }
 
 static void send_sigurg_to_task(struct task_struct *p,
-                                struct fown_struct *fown)
+				struct fown_struct *fown, int group)
 {
 	if (sigio_perm(p, fown, SIGURG))
-		group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
+		do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
 }
 
 int send_sigurg(struct fown_struct *fown)
@@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown)
 	struct task_struct *p;
 	enum pid_type type;
 	struct pid *pid;
+	int group = 1;
 	int ret = 0;
 	
 	read_lock(&fown->lock);
+
 	type = fown->pid_type;
+	if (type == PIDTYPE_MAX) {
+		group = 0;
+		type = PIDTYPE_PID;
+	}
+
 	pid = fown->pid;
 	if (!pid)
 		goto out_unlock_fown;
@@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown)
 	
 	read_lock(&tasklist_lock);
 	do_each_pid_task(pid, type, p) {
-		send_sigurg_to_task(p, fown);
+		send_sigurg_to_task(p, fown, group);
 	} while_each_pid_task(pid, type, p);
 	read_unlock(&tasklist_lock);
  out_unlock_fown:
diff --git a/include/asm-generic/fcntl.h b/include/asm-generic/fcntl.h
index 4d3e48373e74..0c3dd8603927 100644
--- a/include/asm-generic/fcntl.h
+++ b/include/asm-generic/fcntl.h
@@ -73,6 +73,19 @@
 #define F_SETSIG	10	/* for sockets. */
 #define F_GETSIG	11	/* for sockets. */
 #endif
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX	12
+#define F_GETOWN_EX	13
+#endif
+
+#define F_OWNER_TID	0
+#define F_OWNER_PID	1
+#define F_OWNER_GID	2
+
+struct f_owner_ex {
+	int	type;
+	pid_t	pid;
+};
 
 /* for F_[GET|SET]FL */
 #define FD_CLOEXEC	1	/* actually anything with low bit set goes */
-- 
cgit v1.2.3


From d9588725e52650e82989707f8fd2feb67ad2dc8e Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Wed, 23 Sep 2009 15:57:04 -0700
Subject: signals: inline __fatal_signal_pending

__fatal_signal_pending inlines to one instruction on x86, probably two
instructions on other machines.  It takes two longer x86 instructions just
to call it and test its return value, not to mention the function itself.

On my random x86_64 config, this saved 70 bytes of text (59 of those being
__fatal_signal_pending itself).

Signed-off-by: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 5 ++++-
 kernel/signal.c       | 6 ------
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9e5a88afe6be..e951bd2eb9fc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2337,7 +2337,10 @@ static inline int signal_pending(struct task_struct *p)
 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
 }
 
-extern int __fatal_signal_pending(struct task_struct *p);
+static inline int __fatal_signal_pending(struct task_struct *p)
+{
+	return unlikely(sigismember(&p->pending.signal, SIGKILL));
+}
 
 static inline int fatal_signal_pending(struct task_struct *p)
 {
diff --git a/kernel/signal.c b/kernel/signal.c
index c6d7a24a86a1..6705320784fd 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1050,12 +1050,6 @@ void zap_other_threads(struct task_struct *p)
 	}
 }
 
-int __fatal_signal_pending(struct task_struct *tsk)
-{
-	return sigismember(&tsk->pending.signal, SIGKILL);
-}
-EXPORT_SYMBOL(__fatal_signal_pending);
-
 struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
 {
 	struct sighand_struct *sighand;
-- 
cgit v1.2.3


From 8d65af789f3e2cf4cfbdbf71a0f7a61ebcd41d38 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 23 Sep 2009 15:57:19 -0700
Subject: sysctl: remove "struct file *" argument of ->proc_handler

It's unused.

It isn't needed -- read or write flag is already passed and sysctl
shouldn't care about the rest.

It _was_ used in two places at arch/frv for some reason.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/frv/kernel/pm.c               | 14 +++----
 arch/mips/lasat/sysctl.c           | 18 ++++-----
 arch/s390/appldata/appldata_base.c |  9 ++---
 arch/s390/kernel/debug.c           |  4 +-
 arch/s390/mm/cmm.c                 |  4 +-
 arch/x86/include/asm/nmi.h         |  3 +-
 arch/x86/kernel/apic/nmi.c         |  4 +-
 arch/x86/kernel/vsyscall_64.c      | 10 +----
 drivers/cdrom/cdrom.c              |  8 ++--
 drivers/char/random.c              |  4 +-
 drivers/net/wireless/arlan-proc.c  | 28 +++++++-------
 drivers/parport/procfs.c           | 12 +++---
 fs/coda/coda_int.h                 |  1 +
 fs/drop_caches.c                   |  4 +-
 fs/file_table.c                    |  6 +--
 fs/proc/proc_sysctl.c              |  2 +-
 fs/xfs/linux-2.6/xfs_sysctl.c      |  3 +-
 include/linux/fs.h                 |  2 +-
 include/linux/ftrace.h             |  4 +-
 include/linux/hugetlb.h            |  6 +--
 include/linux/mm.h                 |  2 +-
 include/linux/mmzone.h             | 13 +++----
 include/linux/sched.h              |  8 ++--
 include/linux/security.h           |  2 +-
 include/linux/swap.h               |  2 +-
 include/linux/sysctl.h             | 19 +++++-----
 include/linux/writeback.h          | 11 +++---
 include/net/ip.h                   |  2 +-
 include/net/ndisc.h                |  2 -
 ipc/ipc_sysctl.c                   | 16 ++++----
 ipc/mq_sysctl.c                    |  8 ++--
 kernel/hung_task.c                 |  4 +-
 kernel/sched.c                     |  4 +-
 kernel/sched_fair.c                |  4 +-
 kernel/slow-work.c                 | 12 +++---
 kernel/softlockup.c                |  4 +-
 kernel/sysctl.c                    | 78 ++++++++++++++++----------------------
 kernel/trace/ftrace.c              |  4 +-
 kernel/trace/trace_stack.c         |  4 +-
 kernel/utsname_sysctl.c            |  4 +-
 mm/hugetlb.c                       | 12 +++---
 mm/page-writeback.c                | 20 +++++-----
 mm/page_alloc.c                    | 24 ++++++------
 mm/vmscan.c                        |  4 +-
 net/bridge/br_netfilter.c          |  4 +-
 net/decnet/dn_dev.c                |  5 +--
 net/decnet/sysctl_net_decnet.c     |  2 -
 net/ipv4/devinet.c                 | 12 +++---
 net/ipv4/route.c                   |  7 ++--
 net/ipv4/sysctl_net_ipv4.c         | 16 ++++----
 net/ipv6/addrconf.c                |  8 ++--
 net/ipv6/ndisc.c                   |  8 ++--
 net/ipv6/route.c                   |  4 +-
 net/irda/irsysctl.c                |  8 ++--
 net/netfilter/ipvs/ip_vs_ctl.c     |  8 ++--
 net/netfilter/nf_log.c             |  4 +-
 net/phonet/sysctl.c                |  4 +-
 net/sunrpc/sysctl.c                |  4 +-
 net/sunrpc/xprtrdma/svc_rdma.c     |  2 +-
 security/min_addr.c                |  4 +-
 60 files changed, 239 insertions(+), 270 deletions(-)

(limited to 'include')

diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index be722fc1acff..0d4d3e3a4cfc 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -150,7 +150,7 @@ static int user_atoi(char __user *ubuf, size_t len)
 /*
  * Send us to sleep.
  */
-static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
+static int sysctl_pm_do_suspend(ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int retval, mode;
@@ -198,13 +198,13 @@ static int try_set_cmode(int new_cmode)
 }
 
 
-static int cmode_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cmode_procctl(ctl_table *ctl, int write,
 			 void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_cmode;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_cmode = user_atoi(buffer, *lenp);
 
@@ -301,13 +301,13 @@ static int try_set_cm(int new_cm)
 	return 0;
 }
 
-static int p0_procctl(ctl_table *ctl, int write, struct file *filp,
+static int p0_procctl(ctl_table *ctl, int write,
 		      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_p0;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_p0 = user_atoi(buffer, *lenp);
 
@@ -345,13 +345,13 @@ static int p0_sysctl(ctl_table *table,
 	return 1;
 }
 
-static int cm_procctl(ctl_table *ctl, int write, struct file *filp,
+static int cm_procctl(ctl_table *ctl, int write,
 		      void __user *buffer, size_t *lenp, loff_t *fpos)
 {
 	int new_cm;
 
 	if (!write)
-		return proc_dointvec(ctl, write, filp, buffer, lenp, fpos);
+		return proc_dointvec(ctl, write, buffer, lenp, fpos);
 
 	new_cm = user_atoi(buffer, *lenp);
 
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3f04d4c406b7..b3deed8db619 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -56,12 +56,12 @@ int sysctl_lasatstring(ctl_table *table,
 
 
 /* And the same for proc */
-int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
+int proc_dolasatstring(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+	r = proc_dostring(table, write, buffer, lenp, ppos);
 	if ((!write) || r)
 		return r;
 
@@ -71,12 +71,12 @@ int proc_dolasatstring(ctl_table *table, int write, struct file *filp,
 }
 
 /* proc function to write EEPROM after changing int entry */
-int proc_dolasatint(ctl_table *table, int write, struct file *filp,
+int proc_dolasatint(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if ((!write) || r)
 		return r;
 
@@ -89,7 +89,7 @@ int proc_dolasatint(ctl_table *table, int write, struct file *filp,
 static int rtctmp;
 
 /* proc function to read/write RealTime Clock */
-int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
+int proc_dolasatrtc(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct timespec ts;
@@ -102,7 +102,7 @@ int proc_dolasatrtc(ctl_table *table, int write, struct file *filp,
 		if (rtctmp < 0)
 			rtctmp = 0;
 	}
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (r)
 		return r;
 
@@ -154,7 +154,7 @@ int sysctl_lasat_rtc(ctl_table *table,
 #endif
 
 #ifdef CONFIG_INET
-int proc_lasat_ip(ctl_table *table, int write, struct file *filp,
+int proc_lasat_ip(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int ip;
@@ -231,12 +231,12 @@ static int sysctl_lasat_prid(ctl_table *table,
 	return 0;
 }
 
-int proc_lasat_prid(ctl_table *table, int write, struct file *filp,
+int proc_lasat_prid(ctl_table *table, int write,
 		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int r;
 
-	r = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	r = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (r < 0)
 		return r;
 	if (write) {
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 264528e4f58d..b55fd7ed1c31 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -50,10 +50,9 @@ static struct platform_device *appldata_pdev;
  * /proc entries (sysctl)
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
-static int appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+static int appldata_timer_handler(ctl_table *ctl, int write,
 				  void __user *buffer, size_t *lenp, loff_t *ppos);
 static int appldata_interval_handler(ctl_table *ctl, int write,
-					 struct file *filp,
 					 void __user *buffer,
 					 size_t *lenp, loff_t *ppos);
 
@@ -247,7 +246,7 @@ __appldata_vtimer_setup(int cmd)
  * Start/Stop timer, show status of timer (0 = not active, 1 = active)
  */
 static int
-appldata_timer_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_timer_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int len;
@@ -289,7 +288,7 @@ out:
  * current timer interval.
  */
 static int
-appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_interval_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int len, interval;
@@ -335,7 +334,7 @@ out:
  * monitoring (0 = not in process, 1 = in process)
  */
 static int
-appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
+appldata_generic_handler(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct appldata_ops *ops = NULL, *tmp_ops;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 4c512561687d..20f282c911c2 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -881,11 +881,11 @@ static int debug_active=1;
  * if debug_active is already off
  */
 static int
-s390dbf_procactive(ctl_table *table, int write, struct file *filp,
+s390dbf_procactive(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!write || debug_stoppable || !debug_active)
-		return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+		return proc_dointvec(table, write, buffer, lenp, ppos);
 	else
 		return 0;
 }
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index 413c240cbca7..b201135cc18c 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -262,7 +262,7 @@ cmm_skip_blanks(char *cp, char **endp)
 static struct ctl_table cmm_table[];
 
 static int
-cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_pages_handler(ctl_table *ctl, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[16], *p;
@@ -303,7 +303,7 @@ cmm_pages_handler(ctl_table *ctl, int write, struct file *filp,
 }
 
 static int
-cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp,
+cmm_timeout_handler(ctl_table *ctl, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d441e1..139d4c1a33a7 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
 #define NMI_INVALID	3
 
 struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
 
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22d98ad..7ff61d6a188a 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos)
 {
 	int old_state;
 
 	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
 	old_state = nmi_watchdog_enabled;
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (!!old_state == !!nmi_watchdog_enabled)
 		return 0;
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78e2dcf..8cb4974ff599 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
 }
 
 #ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
 static ctl_table kernel_table2[] = {
 	{ .procname = "vsyscall64",
 	  .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
 	  .mode = 0644,
-	  .proc_handler = vsyscall_sysctl_change },
+	  .proc_handler = proc_dointvec },
 	{}
 };
 
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 71d1b9bab70b..614da5b8613a 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3412,7 +3412,7 @@ static int cdrom_print_info(const char *header, int val, char *info,
 	return 0;
 }
 
-static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_info(ctl_table *ctl, int write,
                            void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int pos;
@@ -3489,7 +3489,7 @@ static int cdrom_sysctl_info(ctl_table *ctl, int write, struct file * filp,
 		goto done;
 doit:
 	mutex_unlock(&cdrom_mutex);
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 done:
 	printk(KERN_INFO "cdrom: info buffer too small\n");
 	goto doit;
@@ -3525,12 +3525,12 @@ static void cdrom_update_settings(void)
 	mutex_unlock(&cdrom_mutex);
 }
 
-static int cdrom_sysctl_handler(ctl_table *ctl, int write, struct file * filp,
+static int cdrom_sysctl_handler(ctl_table *ctl, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 	
diff --git a/drivers/char/random.c b/drivers/char/random.c
index d8a9255e1a3f..04b505e5a5e2 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1231,7 +1231,7 @@ static char sysctl_bootid[16];
  * as an ASCII string in the standard UUID format.  If accesses via the
  * sysctl system call, it is returned as 16 bytes of binary data.
  */
-static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
+static int proc_do_uuid(ctl_table *table, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	ctl_table fake_table;
@@ -1254,7 +1254,7 @@ static int proc_do_uuid(ctl_table *table, int write, struct file *filp,
 	fake_table.data = buf;
 	fake_table.maxlen = sizeof(buf);
 
-	return proc_dostring(&fake_table, write, filp, buffer, lenp, ppos);
+	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
 }
 
 static int uuid_strategy(ctl_table *table,
diff --git a/drivers/net/wireless/arlan-proc.c b/drivers/net/wireless/arlan-proc.c
index 2ab1d59870f4..a8b689635a3b 100644
--- a/drivers/net/wireless/arlan-proc.c
+++ b/drivers/net/wireless/arlan-proc.c
@@ -402,7 +402,7 @@ static int arlan_setup_card_by_book(struct net_device *dev)
 
 static char arlan_drive_info[ARLAN_STR_SIZE] = "A655\n\0";
 
-static int arlan_sysctl_info(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info(ctl_table * ctl, int write,
 		      void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -629,7 +629,7 @@ final:
 	*lenp = pos;
 
 	if (!write)
-		retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+		retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	else
 	{
 		*lenp = 0;
@@ -639,7 +639,7 @@ final:
 }
 
 
-static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info161719(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -669,11 +669,11 @@ static int arlan_sysctl_info161719(ctl_table * ctl, int write, struct file *filp
 
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_infotxRing(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -698,11 +698,11 @@ static int arlan_sysctl_infotxRing(ctl_table * ctl, int write, struct file *filp
 	SARLBNpln(u_char, txBuffer, 0x800);
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_inforxRing(ctl_table * ctl, int write,
 			    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -726,11 +726,11 @@ static int arlan_sysctl_inforxRing(ctl_table * ctl, int write, struct file *filp
 	SARLBNpln(u_char, rxBuffer, 0x800);
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
-static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_info18(ctl_table * ctl, int write,
 			void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int i;
@@ -756,7 +756,7 @@ static int arlan_sysctl_info18(ctl_table * ctl, int write, struct file *filp,
 
 final:
 	*lenp = pos;
-	retv = proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	retv = proc_dostring(ctl, write, buffer, lenp, ppos);
 	return retv;
 }
 
@@ -766,7 +766,7 @@ final:
 
 static char conf_reset_result[200];
 
-static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
+static int arlan_configure(ctl_table * ctl, int write,
 		    void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int pos = 0;
@@ -788,10 +788,10 @@ static int arlan_configure(ctl_table * ctl, int write, struct file *filp,
 		return -1;
 
 	*lenp = pos;
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
-static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
+static int arlan_sysctl_reset(ctl_table * ctl, int write,
 		       void __user *buffer, size_t * lenp, loff_t *ppos)
 {
 	int pos = 0;
@@ -811,7 +811,7 @@ static int arlan_sysctl_reset(ctl_table * ctl, int write, struct file *filp,
 	} else
 		return -1;
 	*lenp = pos + 3;
-	return proc_dostring(ctl, write, filp, buffer, lenp, ppos);
+	return proc_dostring(ctl, write, buffer, lenp, ppos);
 }
 
 
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 554e11f9e1ce..8eefe56f1cbe 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -31,7 +31,7 @@
 #define PARPORT_MIN_SPINTIME_VALUE 1
 #define PARPORT_MAX_SPINTIME_VALUE 1000
 
-static int do_active_device(ctl_table *table, int write, struct file *filp,
+static int do_active_device(ctl_table *table, int write,
 		      void __user *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -68,7 +68,7 @@ static int do_active_device(ctl_table *table, int write, struct file *filp,
 }
 
 #ifdef CONFIG_PARPORT_1284
-static int do_autoprobe(ctl_table *table, int write, struct file *filp,
+static int do_autoprobe(ctl_table *table, int write,
 			void __user *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport_device_info *info = table->extra2;
@@ -111,7 +111,7 @@ static int do_autoprobe(ctl_table *table, int write, struct file *filp,
 #endif /* IEEE1284.3 support. */
 
 static int do_hardware_base_addr (ctl_table *table, int write,
-				  struct file *filp, void __user *result,
+				  void __user *result,
 				  size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -139,7 +139,7 @@ static int do_hardware_base_addr (ctl_table *table, int write,
 }
 
 static int do_hardware_irq (ctl_table *table, int write,
-			    struct file *filp, void __user *result,
+			    void __user *result,
 			    size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -167,7 +167,7 @@ static int do_hardware_irq (ctl_table *table, int write,
 }
 
 static int do_hardware_dma (ctl_table *table, int write,
-			    struct file *filp, void __user *result,
+			    void __user *result,
 			    size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
@@ -195,7 +195,7 @@ static int do_hardware_dma (ctl_table *table, int write,
 }
 
 static int do_hardware_modes (ctl_table *table, int write,
-			      struct file *filp, void __user *result,
+			      void __user *result,
 			      size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 8ccd5ed81d9c..d99860a33890 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -2,6 +2,7 @@
 #define _CODA_INT_
 
 struct dentry;
+struct file;
 
 extern struct file_system_type coda_fs_type;
 extern unsigned long coda_timeout;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index a2edb7913447..31f4b0e6d72c 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -63,9 +63,9 @@ static void drop_slab(void)
 }
 
 int drop_caches_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (write) {
 		if (sysctl_drop_caches & 1)
 			drop_pagecache();
diff --git a/fs/file_table.c b/fs/file_table.c
index 334ce39881f8..8eb44042e009 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -74,14 +74,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
  * Handle nr_files sysctl
  */
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(table, write, buffer, lenp, ppos);
 }
 #else
-int proc_nr_files(ctl_table *table, int write, struct file *filp,
+int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 9b1e4e9a16bf..f667e8aeabdf 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -153,7 +153,7 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 
 	/* careful: calling conventions are nasty here */
 	res = count;
-	error = table->proc_handler(table, write, filp, buf, &res, ppos);
+	error = table->proc_handler(table, write, buf, &res, ppos);
 	if (!error)
 		error = res;
 out:
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index 916c0ffb6083..c5bc67c4e3bb 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -26,7 +26,6 @@ STATIC int
 xfs_stats_clear_proc_handler(
 	ctl_table	*ctl,
 	int		write,
-	struct file	*filp,
 	void		__user *buffer,
 	size_t		*lenp,
 	loff_t		*ppos)
@@ -34,7 +33,7 @@ xfs_stats_clear_proc_handler(
 	int		c, ret, *valp = ctl->data;
 	__uint32_t	vn_active;
 
-	ret = proc_dointvec_minmax(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
 
 	if (!ret && write && *valp) {
 		printk("XFS Clearing xfsstats\n");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51803528b095..33ed6644abd0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2467,7 +2467,7 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 			  size_t len, loff_t *ppos);
 
 struct ctl_table;
-int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+int proc_nr_files(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
 
 int __init get_filesystem_list(char *buf);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3c0924a18daf..cd3d2abaf30a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -19,7 +19,7 @@
 extern int ftrace_enabled;
 extern int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     struct file *filp, void __user *buffer, size_t *lenp,
+		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos);
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
@@ -94,7 +94,7 @@ static inline void ftrace_start(void) { }
 extern int stack_tracer_enabled;
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-		   struct file *file, void __user *buffer, size_t *lenp,
+		   void __user *buffer, size_t *lenp,
 		   loff_t *ppos);
 #endif
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 176e7ee73eff..11ab19ac6b3d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,9 +20,9 @@ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
 }
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			struct page **, struct vm_area_struct **,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6eae5e3144b..87218ae84e36 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1279,7 +1279,7 @@ int in_gate_area_no_task(unsigned long addr);
 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
-int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
+int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
 			unsigned long lru_pages);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 652ef01be582..6f7561730d88 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -755,21 +755,20 @@ static inline int is_dma(struct zone *zone)
 
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
-struct file;
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int, struct file *, 
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, struct file *,
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, struct file *,
+int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 
 extern int numa_zonelist_order_handler(struct ctl_table *, int,
-			struct file *, void __user *, size_t *, loff_t *);
+			void __user *, size_t *, loff_t *);
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN 16	/* string buffer size */
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e951bd2eb9fc..811cd96524d7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -309,7 +309,7 @@ extern void softlockup_tick(void);
 extern void touch_softlockup_watchdog(void);
 extern void touch_all_softlockup_watchdogs(void);
 extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-				    struct file *filp, void __user *buffer,
+				    void __user *buffer,
 				    size_t *lenp, loff_t *ppos);
 extern unsigned int  softlockup_panic;
 extern int softlockup_thresh;
@@ -331,7 +331,7 @@ extern unsigned long sysctl_hung_task_check_count;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_warnings;
 extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 struct file *filp, void __user *buffer,
+					 void __user *buffer,
 					 size_t *lenp, loff_t *ppos);
 #endif
 
@@ -1906,7 +1906,7 @@ extern unsigned int sysctl_sched_time_avg;
 extern unsigned int sysctl_timer_migration;
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-		struct file *file, void __user *buffer, size_t *length,
+		void __user *buffer, size_t *length,
 		loff_t *ppos);
 #endif
 #ifdef CONFIG_SCHED_DEBUG
@@ -1924,7 +1924,7 @@ extern unsigned int sysctl_sched_rt_period;
 extern int sysctl_sched_rt_runtime;
 
 int sched_rt_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
 extern unsigned int sysctl_sched_compat_yield;
diff --git a/include/linux/security.h b/include/linux/security.h
index d050b66ab9ef..239e40d0450b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -133,7 +133,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
 		return PAGE_ALIGN(mmap_min_addr);
 	return hint;
 }
-extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+extern int mmap_min_addr_handler(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos);
 
 #ifdef CONFIG_SECURITY
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4c78fea989b9..82232dbea3f7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -245,7 +245,7 @@ extern int page_evictable(struct page *page, struct vm_area_struct *vma);
 extern void scan_mapping_unevictable_pages(struct address_space *);
 
 extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int, struct file *,
+extern int scan_unevictable_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int scan_unevictable_register_node(struct node *node);
 extern void scan_unevictable_unregister_node(struct node *node);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index e76d3b22a466..1e4743ee6831 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -29,7 +29,6 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 
-struct file;
 struct completion;
 
 #define CTL_MAXNAME 10		/* how many path components do we allow in a
@@ -977,25 +976,25 @@ typedef int ctl_handler (struct ctl_table *table,
 			 void __user *oldval, size_t __user *oldlenp,
 			 void __user *newval, size_t newlen);
 
-typedef int proc_handler (struct ctl_table *ctl, int write, struct file * filp,
+typedef int proc_handler (struct ctl_table *ctl, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos);
 
-extern int proc_dostring(struct ctl_table *, int, struct file *,
+extern int proc_dostring(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int, struct file *,
+extern int proc_dointvec(struct ctl_table *, int,
 			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_minmax(struct ctl_table *, int,
 				void __user *, size_t *, loff_t *);
-extern int proc_dointvec_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_jiffies(struct ctl_table *, int,
 				 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, struct file *,
+extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
 				    void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int, struct file *,
+extern int proc_doulongvec_minmax(struct ctl_table *, int,
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
-				      struct file *, void __user *, size_t *, loff_t *);
+				      void __user *, size_t *, loff_t *);
 
 extern int do_sysctl (int __user *name, int nlen,
 		      void __user *oldval, size_t __user *oldlenp,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 75cf58666ff9..66ebddcff664 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -110,21 +110,20 @@ extern int laptop_mode;
 extern unsigned long determine_dirtyable_memory(void);
 
 extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 extern int dirty_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos);
 
 struct ctl_table;
-struct file;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
+int dirty_writeback_centisecs_handler(struct ctl_table *, int,
 				      void __user *, size_t *, loff_t *);
 
 void get_dirty_limits(unsigned long *pbackground, unsigned long *pdirty,
diff --git a/include/net/ip.h b/include/net/ip.h
index 72c36926c26d..5b26a0bd178e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -399,7 +399,7 @@ extern void	ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
  * fed into the routing cache should use these handlers.
  */
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 struct file* filp, void __user *buffer,
+			 void __user *buffer,
 			 size_t *lenp, loff_t *ppos);
 int ipv4_doint_and_flush_strategy(ctl_table *table,
 				  void __user *oldval, size_t __user *oldlenp,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 1459ed3e2697..f76f22d05721 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -55,7 +55,6 @@ enum {
 #include <net/neighbour.h>
 
 struct ctl_table;
-struct file;
 struct inet6_dev;
 struct net_device;
 struct net_proto_family;
@@ -139,7 +138,6 @@ extern int			igmp6_event_report(struct sk_buff *skb);
 #ifdef CONFIG_SYSCTL
 extern int 			ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
 							   int write,
-							   struct file * filp,
 							   void __user *buffer,
 							   size_t *lenp,
 							   loff_t *ppos);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index 40eab7314aeb..7d3704750efc 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -27,18 +27,18 @@ static void *get_ipc(ctl_table *table)
 }
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_ipc_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_ipc_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	return proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 }
 
 static int proc_ipc_callback_dointvec(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	size_t lenp_bef = *lenp;
@@ -47,7 +47,7 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	rc = proc_dointvec(&ipc_table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp)
 		/*
@@ -61,13 +61,13 @@ static int proc_ipc_callback_dointvec(ctl_table *table, int write,
 }
 
 static int proc_ipc_doulongvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	memcpy(&ipc_table, table, sizeof(ipc_table));
 	ipc_table.data = get_ipc(table);
 
-	return proc_doulongvec_minmax(&ipc_table, write, filp, buffer,
+	return proc_doulongvec_minmax(&ipc_table, write, buffer,
 					lenp, ppos);
 }
 
@@ -95,7 +95,7 @@ static void ipc_auto_callback(int val)
 }
 
 static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	size_t lenp_bef = *lenp;
@@ -106,7 +106,7 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write,
 	ipc_table.data = get_ipc(table);
 	oldval = *((int *)(ipc_table.data));
 
-	rc = proc_dointvec_minmax(&ipc_table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 
 	if (write && !rc && lenp_bef == *lenp) {
 		int newval = *((int *)(ipc_table.data));
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 24ae46dfe45d..8a058711fc10 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -31,24 +31,24 @@ static void *get_mq(ctl_table *table)
 	return which;
 }
 
-static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
+static int proc_mq_dointvec(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
 	mq_table.data = get_mq(table);
 
-	return proc_dointvec(&mq_table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
 }
 
 static int proc_mq_dointvec_minmax(ctl_table *table, int write,
-	struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
 	mq_table.data = get_mq(table);
 
-	return proc_dointvec_minmax(&mq_table, write, filp, buffer,
+	return proc_dointvec_minmax(&mq_table, write, buffer,
 					lenp, ppos);
 }
 #else
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index 022a4927b785..d4e841747400 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -171,12 +171,12 @@ static unsigned long timeout_jiffies(unsigned long timeout)
  * Process updating of timeout sysctl
  */
 int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-				  struct file *filp, void __user *buffer,
+				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		goto out;
diff --git a/kernel/sched.c b/kernel/sched.c
index 0d0361b9dbb3..ee61f454a98b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -10312,7 +10312,7 @@ static int sched_rt_global_constraints(void)
 #endif /* CONFIG_RT_GROUP_SCHED */
 
 int sched_rt_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
@@ -10323,7 +10323,7 @@ int sched_rt_handler(struct ctl_table *table, int write,
 	old_period = sysctl_sched_rt_period;
 	old_runtime = sysctl_sched_rt_runtime;
 
-	ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (!ret && write) {
 		ret = sched_rt_global_constraints();
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ecc637a0d591..4e777b47eeda 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -384,10 +384,10 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 
 #ifdef CONFIG_SCHED_DEBUG
 int sched_nr_latency_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
 	if (ret || !write)
 		return ret;
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index 09d7519557d3..0d31135efbf4 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -26,10 +26,10 @@ static void slow_work_cull_timeout(unsigned long);
 static void slow_work_oom_timeout(unsigned long);
 
 #ifdef CONFIG_SYSCTL
-static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
+static int slow_work_min_threads_sysctl(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 
-static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
+static int slow_work_max_threads_sysctl(struct ctl_table *, int ,
 					void __user *, size_t *, loff_t *);
 #endif
 
@@ -493,10 +493,10 @@ static void slow_work_oom_timeout(unsigned long data)
  * Handle adjustment of the minimum number of threads
  */
 static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	int n;
 
 	if (ret == 0) {
@@ -521,10 +521,10 @@ static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
  * Handle adjustment of the maximum number of threads
  */
 static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	int n;
 
 	if (ret == 0) {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 88796c330838..81324d12eb35 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -90,11 +90,11 @@ void touch_all_softlockup_watchdogs(void)
 EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
 
 int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
-			     struct file *filp, void __user *buffer,
+			     void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
 {
 	touch_all_softlockup_watchdogs();
-	return proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 37abb8c3995b..a02697b7cb97 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,9 +163,9 @@ extern int max_lock_depth;
 #endif
 
 #ifdef CONFIG_PROC_SYSCTL
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
@@ -2226,7 +2226,7 @@ void sysctl_head_put(struct ctl_table_header *head)
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(void* data, int maxlen, int write,
-			   struct file *filp, void __user *buffer,
+			   void __user *buffer,
 			   size_t *lenp, loff_t *ppos)
 {
 	size_t len;
@@ -2287,7 +2287,6 @@ static int _proc_do_string(void* data, int maxlen, int write,
  * proc_dostring - read a string sysctl
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2301,10 +2300,10 @@ static int _proc_do_string(void* data, int maxlen, int write,
  *
  * Returns 0 on success.
  */
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return _proc_do_string(table->data, table->maxlen, write, filp,
+	return _proc_do_string(table->data, table->maxlen, write,
 			       buffer, lenp, ppos);
 }
 
@@ -2329,7 +2328,7 @@ static int do_proc_dointvec_conv(int *negp, unsigned long *lvalp,
 }
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
-		  int write, struct file *filp, void __user *buffer,
+		  int write, void __user *buffer,
 		  size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
@@ -2436,13 +2435,13 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 #undef TMPBUFLEN
 }
 
-static int do_proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+static int do_proc_dointvec(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos,
 		  int (*conv)(int *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
 		  void *data)
 {
-	return __do_proc_dointvec(table->data, table, write, filp,
+	return __do_proc_dointvec(table->data, table, write,
 			buffer, lenp, ppos, conv, data);
 }
 
@@ -2450,7 +2449,6 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  * proc_dointvec - read a vector of integers
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2460,10 +2458,10 @@ static int do_proc_dointvec(struct ctl_table *table, int write, struct file *fil
  *
  * Returns 0 on success.
  */
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    NULL,NULL);
 }
 
@@ -2471,7 +2469,7 @@ int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
  * Taint values can only be increased
  * This means we can safely use a temporary.
  */
-static int proc_taint(struct ctl_table *table, int write, struct file *filp,
+static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
@@ -2483,7 +2481,7 @@ static int proc_taint(struct ctl_table *table, int write, struct file *filp,
 
 	t = *table;
 	t.data = &tmptaint;
-	err = proc_doulongvec_minmax(&t, write, filp, buffer, lenp, ppos);
+	err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos);
 	if (err < 0)
 		return err;
 
@@ -2535,7 +2533,6 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_minmax - read a vector of integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2548,19 +2545,18 @@ static int do_proc_dointvec_minmax_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct do_proc_dointvec_minmax_conv_param param = {
 		.min = (int *) table->extra1,
 		.max = (int *) table->extra2,
 	};
-	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_minmax_conv, &param);
 }
 
 static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
-				     struct file *filp,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
 				     unsigned long convmul,
@@ -2665,21 +2661,19 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 }
 
 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
-				     struct file *filp,
 				     void __user *buffer,
 				     size_t *lenp, loff_t *ppos,
 				     unsigned long convmul,
 				     unsigned long convdiv)
 {
 	return __do_proc_doulongvec_minmax(table->data, table, write,
-			filp, buffer, lenp, ppos, convmul, convdiv);
+			buffer, lenp, ppos, convmul, convdiv);
 }
 
 /**
  * proc_doulongvec_minmax - read a vector of long integers with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2692,17 +2686,16 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
  *
  * Returns 0 on success.
  */
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos, 1l, 1l);
+    return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
 }
 
 /**
  * proc_doulongvec_ms_jiffies_minmax - read a vector of millisecond values with min/max values
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2717,11 +2710,10 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp
  * Returns 0 on success.
  */
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      struct file *filp,
 				      void __user *buffer,
 				      size_t *lenp, loff_t *ppos)
 {
-    return do_proc_doulongvec_minmax(table, write, filp, buffer,
+    return do_proc_doulongvec_minmax(table, write, buffer,
 				     lenp, ppos, HZ, 1000l);
 }
 
@@ -2797,7 +2789,6 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  * proc_dointvec_jiffies - read a vector of integers as seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2809,10 +2800,10 @@ static int do_proc_dointvec_ms_jiffies_conv(int *negp, unsigned long *lvalp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_jiffies_conv,NULL);
 }
 
@@ -2820,7 +2811,6 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  * proc_dointvec_userhz_jiffies - read a vector of integers as 1/USER_HZ seconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: pointer to the file position
@@ -2832,10 +2822,10 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
 				 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-    return do_proc_dointvec(table,write,filp,buffer,lenp,ppos,
+    return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
 }
 
@@ -2843,7 +2833,6 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  * proc_dointvec_ms_jiffies - read a vector of integers as 1 milliseconds
  * @table: the sysctl table
  * @write: %TRUE if this is a write to the sysctl file
- * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
  * @ppos: file position
@@ -2856,14 +2845,14 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file
  *
  * Returns 0 on success.
  */
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-	return do_proc_dointvec(table, write, filp, buffer, lenp, ppos,
+	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_ms_jiffies_conv, NULL);
 }
 
-static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp,
+static int proc_do_cad_pid(struct ctl_table *table, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct pid *new_pid;
@@ -2872,7 +2861,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
 	tmp = pid_vnr(cad_pid);
 
-	r = __do_proc_dointvec(&tmp, table, write, filp, buffer,
+	r = __do_proc_dointvec(&tmp, table, write, buffer,
 			       lenp, ppos, NULL, NULL);
 	if (r || !write)
 		return r;
@@ -2887,50 +2876,49 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, struct file *filp
 
 #else /* CONFIG_PROC_FS */
 
-int proc_dostring(struct ctl_table *table, int write, struct file *filp,
+int proc_dostring(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec(struct ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, struct file *filp,
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_doulongvec_minmax(struct ctl_table *table, int write, struct file *filp,
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      struct file *filp,
 				      void __user *buffer,
 				      size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 23df7771c937..a142579765bf 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3015,7 +3015,7 @@ int unregister_ftrace_function(struct ftrace_ops *ops)
 
 int
 ftrace_enable_sysctl(struct ctl_table *table, int write,
-		     struct file *file, void __user *buffer, size_t *lenp,
+		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos)
 {
 	int ret;
@@ -3025,7 +3025,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 	mutex_lock(&ftrace_lock);
 
-	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+	ret  = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
 		goto out;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 0f6facb050a1..8504ac71e4e8 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -296,14 +296,14 @@ static const struct file_operations stack_trace_fops = {
 
 int
 stack_trace_sysctl(struct ctl_table *table, int write,
-		   struct file *file, void __user *buffer, size_t *lenp,
+		   void __user *buffer, size_t *lenp,
 		   loff_t *ppos)
 {
 	int ret;
 
 	mutex_lock(&stack_sysctl_mutex);
 
-	ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
+	ret = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if (ret || !write ||
 	    (last_stack_tracer_enabled == !!stack_tracer_enabled))
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 92359cc747a7..69eae358a726 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -42,14 +42,14 @@ static void put_uts(ctl_table *table, int write, void *which)
  *	Special case of dostring for the UTS structure. This has locks
  *	to observe. Should this be in kernel/sys.c ????
  */
-static int proc_do_uts_string(ctl_table *table, int write, struct file *filp,
+static int proc_do_uts_string(ctl_table *table, int write,
 		  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table uts_table;
 	int r;
 	memcpy(&uts_table, table, sizeof(uts_table));
 	uts_table.data = get_uts(table, write);
-	r = proc_dostring(&uts_table,write,filp,buffer,lenp, ppos);
+	r = proc_dostring(&uts_table,write,buffer,lenp, ppos);
 	put_uts(table, write, uts_table.data);
 	return r;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 815dbd4a6dcb..6f048fcc749c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1537,7 +1537,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 
 #ifdef CONFIG_SYSCTL
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-			   struct file *file, void __user *buffer,
+			   void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
@@ -1548,7 +1548,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write)
 		h->max_huge_pages = set_max_huge_pages(h, tmp);
@@ -1557,10 +1557,10 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-			struct file *file, void __user *buffer,
+			void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (hugepages_treat_as_movable)
 		htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
 	else
@@ -1569,7 +1569,7 @@ int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
 }
 
 int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-			struct file *file, void __user *buffer,
+			void __user *buffer,
 			size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
@@ -1580,7 +1580,7 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
 
 	table->data = &tmp;
 	table->maxlen = sizeof(unsigned long);
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write) {
 		spin_lock(&hugetlb_lock);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f378dd58802..be197f71b096 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -155,37 +155,37 @@ static void update_completion_period(void)
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write)
 		dirty_background_bytes = 0;
 	return ret;
 }
 
 int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write)
 		dirty_background_ratio = 0;
 	return ret;
 }
 
 int dirty_ratio_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	int old_ratio = vm_dirty_ratio;
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_ratio != old_ratio) {
 		update_completion_period();
 		vm_dirty_bytes = 0;
@@ -195,13 +195,13 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 
 
 int dirty_bytes_handler(struct ctl_table *table, int write,
-		struct file *filp, void __user *buffer, size_t *lenp,
+		void __user *buffer, size_t *lenp,
 		loff_t *ppos)
 {
 	unsigned long old_bytes = vm_dirty_bytes;
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write && vm_dirty_bytes != old_bytes) {
 		update_completion_period();
 		vm_dirty_ratio = 0;
@@ -686,9 +686,9 @@ static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	return 0;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5717f27a0704..88248b3c20bb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2373,7 +2373,7 @@ early_param("numa_zonelist_order", setup_numa_zonelist_order);
  * sysctl handler for numa_zonelist_order
  */
 int numa_zonelist_order_handler(ctl_table *table, int write,
-		struct file *file, void __user *buffer, size_t *length,
+		void __user *buffer, size_t *length,
 		loff_t *ppos)
 {
 	char saved_string[NUMA_ZONELIST_ORDER_LEN];
@@ -2382,7 +2382,7 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
 	if (write)
 		strncpy(saved_string, (char*)table->data,
 			NUMA_ZONELIST_ORDER_LEN);
-	ret = proc_dostring(table, write, file, buffer, length, ppos);
+	ret = proc_dostring(table, write, buffer, length, ppos);
 	if (ret)
 		return ret;
 	if (write) {
@@ -4706,9 +4706,9 @@ module_init(init_per_zone_wmark_min)
  *	changes.
  */
 int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec(table, write, file, buffer, length, ppos);
+	proc_dointvec(table, write, buffer, length, ppos);
 	if (write)
 		setup_per_zone_wmarks();
 	return 0;
@@ -4716,12 +4716,12 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
 
 #ifdef CONFIG_NUMA
 int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	int rc;
 
-	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (rc)
 		return rc;
 
@@ -4732,12 +4732,12 @@ int sysctl_min_unmapped_ratio_sysctl_handler(ctl_table *table, int write,
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	int rc;
 
-	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (rc)
 		return rc;
 
@@ -4758,9 +4758,9 @@ int sysctl_min_slab_ratio_sysctl_handler(ctl_table *table, int write,
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
-	proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	setup_per_zone_lowmem_reserve();
 	return 0;
 }
@@ -4772,13 +4772,13 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
  */
 
 int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+	void __user *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	unsigned int cpu;
 	int ret;
 
-	ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
 	if (!write || (ret == -EINVAL))
 		return ret;
 	for_each_populated_zone(zone) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2423782214ab..f444b7409085 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2844,10 +2844,10 @@ static void scan_all_zones_unevictable_pages(void)
 unsigned long scan_unevictable_pages;
 
 int scan_unevictable_handler(struct ctl_table *table, int write,
-			   struct file *file, void __user *buffer,
+			   void __user *buffer,
 			   size_t *length, loff_t *ppos)
 {
-	proc_doulongvec_minmax(table, write, file, buffer, length, ppos);
+	proc_doulongvec_minmax(table, write, buffer, length, ppos);
 
 	if (write && *(unsigned long *)table->data)
 		scan_all_zones_unevictable_pages();
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 907a82e9023d..a16a2342f6bf 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -965,12 +965,12 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 
 #ifdef CONFIG_SYSCTL
 static
-int brnf_sysctl_call_tables(ctl_table * ctl, int write, struct file *filp,
+int brnf_sysctl_call_tables(ctl_table * ctl, int write,
 			    void __user * buffer, size_t * lenp, loff_t * ppos)
 {
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *(int *)(ctl->data))
 		*(int *)(ctl->data) = 1;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 1c6a5bb6f0c8..6e1f085db06a 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -164,7 +164,7 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
 static int min_priority[1];
 static int max_priority[] = { 127 }; /* From DECnet spec */
 
-static int dn_forwarding_proc(ctl_table *, int, struct file *,
+static int dn_forwarding_proc(ctl_table *, int,
 			void __user *, size_t *, loff_t *);
 static int dn_forwarding_sysctl(ctl_table *table,
 			void __user *oldval, size_t __user *oldlenp,
@@ -274,7 +274,6 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
 }
 
 static int dn_forwarding_proc(ctl_table *table, int write,
-				struct file *filep,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -290,7 +289,7 @@ static int dn_forwarding_proc(ctl_table *table, int write,
 	dn_db = dev->dn_ptr;
 	old = dn_db->parms.forwarding;
 
-	err = proc_dointvec(table, write, filep, buffer, lenp, ppos);
+	err = proc_dointvec(table, write, buffer, lenp, ppos);
 
 	if ((err >= 0) && write) {
 		if (dn_db->parms.forwarding < 0)
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 5bcd592ae6dd..26b0ab1e9f56 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -165,7 +165,6 @@ static int dn_node_address_strategy(ctl_table *table,
 }
 
 static int dn_node_address_handler(ctl_table *table, int write,
-				struct file *filp,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -276,7 +275,6 @@ static int dn_def_dev_strategy(ctl_table *table,
 
 
 static int dn_def_dev_handler(ctl_table *table, int write,
-				struct file * filp,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 07336c6201f0..e92f1fd28aa5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1270,10 +1270,10 @@ static void inet_forward_change(struct net *net)
 }
 
 static int devinet_conf_proc(ctl_table *ctl, int write,
-			     struct file *filp, void __user *buffer,
+			     void __user *buffer,
 			     size_t *lenp, loff_t *ppos)
 {
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write) {
 		struct ipv4_devconf *cnf = ctl->extra1;
@@ -1342,12 +1342,12 @@ static int devinet_conf_sysctl(ctl_table *table,
 }
 
 static int devinet_sysctl_forward(ctl_table *ctl, int write,
-				  struct file *filp, void __user *buffer,
+				  void __user *buffer,
 				  size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write && *valp != val) {
 		struct net *net = ctl->extra2;
@@ -1372,12 +1372,12 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
 }
 
 int ipv4_doint_and_flush(ctl_table *ctl, int write,
-			 struct file *filp, void __user *buffer,
+			 void __user *buffer,
 			 size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
-	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 	struct net *net = ctl->extra2;
 
 	if (write && *valp != val)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df9347314538..bb4199252026 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3036,7 +3036,7 @@ void ip_rt_multicast_event(struct in_device *in_dev)
 
 #ifdef CONFIG_SYSCTL
 static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
-					struct file *filp, void __user *buffer,
+					void __user *buffer,
 					size_t *lenp, loff_t *ppos)
 {
 	if (write) {
@@ -3046,7 +3046,7 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 
 		memcpy(&ctl, __ctl, sizeof(ctl));
 		ctl.data = &flush_delay;
-		proc_dointvec(&ctl, write, filp, buffer, lenp, ppos);
+		proc_dointvec(&ctl, write, buffer, lenp, ppos);
 
 		net = (struct net *)__ctl->extra1;
 		rt_cache_flush(net, flush_delay);
@@ -3106,12 +3106,11 @@ static void rt_secret_reschedule(int old)
 }
 
 static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-					  struct file *filp,
 					  void __user *buffer, size_t *lenp,
 					  loff_t *ppos)
 {
 	int old = ip_rt_secret_interval;
-	int ret = proc_dointvec_jiffies(ctl, write, filp, buffer, lenp, ppos);
+	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
 
 	rt_secret_reschedule(old);
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 4710d219f06a..2dcf04d9b005 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,7 +36,7 @@ static void set_local_port_range(int range[2])
 }
 
 /* Validate changes from /proc interface. */
-static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
+static int ipv4_local_port_range(ctl_table *table, int write,
 				 void __user *buffer,
 				 size_t *lenp, loff_t *ppos)
 {
@@ -51,7 +51,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp,
 	};
 
 	inet_get_local_port_range(range, range + 1);
-	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
 		if (range[1] < range[0])
@@ -91,7 +91,7 @@ static int ipv4_sysctl_local_port_range(ctl_table *table,
 }
 
 
-static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * filp,
+static int proc_tcp_congestion_control(ctl_table *ctl, int write,
 				       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char val[TCP_CA_NAME_MAX];
@@ -103,7 +103,7 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file *
 
 	tcp_get_default_congestion_control(val);
 
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		ret = tcp_set_default_congestion_control(val);
 	return ret;
@@ -129,7 +129,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table,
 }
 
 static int proc_tcp_available_congestion_control(ctl_table *ctl,
-						 int write, struct file * filp,
+						 int write,
 						 void __user *buffer, size_t *lenp,
 						 loff_t *ppos)
 {
@@ -140,13 +140,13 @@ static int proc_tcp_available_congestion_control(ctl_table *ctl,
 	if (!tbl.data)
 		return -ENOMEM;
 	tcp_get_available_congestion_control(tbl.data, TCP_CA_BUF_MAX);
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	kfree(tbl.data);
 	return ret;
 }
 
 static int proc_allowed_congestion_control(ctl_table *ctl,
-					   int write, struct file * filp,
+					   int write,
 					   void __user *buffer, size_t *lenp,
 					   loff_t *ppos)
 {
@@ -158,7 +158,7 @@ static int proc_allowed_congestion_control(ctl_table *ctl,
 		return -ENOMEM;
 
 	tcp_get_allowed_congestion_control(tbl.data, tbl.maxlen);
-	ret = proc_dostring(&tbl, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
 	if (write && ret == 0)
 		ret = tcp_set_allowed_congestion_control(tbl.data);
 	kfree(tbl.data);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 55f486d89c88..1fd0a3d775d2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3986,14 +3986,14 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 #ifdef CONFIG_SYSCTL
 
 static
-int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_forward(ctl_table *ctl, int write,
 			   void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_fixup_forwarding(ctl, valp, val);
@@ -4090,14 +4090,14 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int old)
 }
 
 static
-int addrconf_sysctl_disable(ctl_table *ctl, int write, struct file * filp,
+int addrconf_sysctl_disable(ctl_table *ctl, int write,
 			    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
 	int ret;
 
-	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	if (write)
 		ret = addrconf_disable_ipv6(ctl, valp, val);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 7015478797f6..498b9b0b0fad 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1735,7 +1735,7 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
 	}
 }
 
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
@@ -1746,16 +1746,16 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
 
 	if (strcmp(ctl->procname, "retrans_time") == 0)
-		ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
 	else if (strcmp(ctl->procname, "base_reachable_time") == 0)
 		ret = proc_dointvec_jiffies(ctl, write,
-					    filp, buffer, lenp, ppos);
+					    buffer, lenp, ppos);
 
 	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
 		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
 		ret = proc_dointvec_ms_jiffies(ctl, write,
-					       filp, buffer, lenp, ppos);
+					       buffer, lenp, ppos);
 	else
 		ret = -1;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 77aecbe8ff6c..d6fe7646a8ff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2524,13 +2524,13 @@ static const struct file_operations rt6_stats_seq_fops = {
 #ifdef CONFIG_SYSCTL
 
 static
-int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
+int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
 			      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	int delay = net->ipv6.sysctl.flush_delay;
 	if (write) {
-		proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+		proc_dointvec(ctl, write, buffer, lenp, ppos);
 		fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
 		return 0;
 	} else
diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c
index 57f8817c3979..5c86567e5a78 100644
--- a/net/irda/irsysctl.c
+++ b/net/irda/irsysctl.c
@@ -73,12 +73,12 @@ static int min_lap_keepalive_time = 100;	/* 100us */
 /* For other sysctl, I've no idea of the range. Maybe Dag could help
  * us on that - Jean II */
 
-static int do_devname(ctl_table *table, int write, struct file *filp,
+static int do_devname(ctl_table *table, int write,
 		      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_dostring(table, write, filp, buffer, lenp, ppos);
+	ret = proc_dostring(table, write, buffer, lenp, ppos);
 	if (ret == 0 && write) {
 		struct ias_value *val;
 
@@ -90,12 +90,12 @@ static int do_devname(ctl_table *table, int write, struct file *filp,
 }
 
 
-static int do_discovery(ctl_table *table, int write, struct file *filp,
+static int do_discovery(ctl_table *table, int write,
                     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
        int ret;
 
-       ret = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+       ret = proc_dointvec(table, write, buffer, lenp, ppos);
        if (ret)
 	       return ret;
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index fba2892b99e1..446e9bd4b4bc 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1496,14 +1496,14 @@ static int ip_vs_zero_all(void)
 
 
 static int
-proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
+proc_do_defense_mode(ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
 	int val = *valp;
 	int rc;
 
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (write && (*valp != val)) {
 		if ((*valp < 0) || (*valp > 3)) {
 			/* Restore the correct value */
@@ -1517,7 +1517,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
 
 
 static int
-proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
+proc_do_sync_threshold(ctl_table *table, int write,
 		       void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
@@ -1527,7 +1527,7 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
 	/* backup the value first */
 	memcpy(val, valp, sizeof(val));
 
-	rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
+	rc = proc_dointvec(table, write, buffer, lenp, ppos);
 	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
 		/* Restore the correct value */
 		memcpy(valp, val, sizeof(val));
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 4e620305f28c..c93494fef8ef 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -226,7 +226,7 @@ static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
 static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
 static struct ctl_table_header *nf_log_dir_header;
 
-static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
+static int nf_log_proc_dostring(ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	const struct nf_logger *logger;
@@ -260,7 +260,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
 			table->data = "NONE";
 		else
 			table->data = logger->name;
-		r = proc_dostring(table, write, filp, buffer, lenp, ppos);
+		r = proc_dostring(table, write, buffer, lenp, ppos);
 		mutex_unlock(&nf_log_mutex);
 	}
 
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 7b5749ee2765..2220f3322326 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -56,7 +56,7 @@ void phonet_get_local_port_range(int *min, int *max)
 	} while (read_seqretry(&local_port_range_lock, seq));
 }
 
-static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
+static int proc_local_port_range(ctl_table *table, int write,
 				void __user *buffer,
 				size_t *lenp, loff_t *ppos)
 {
@@ -70,7 +70,7 @@ static int proc_local_port_range(ctl_table *table, int write, struct file *filp,
 		.extra2 = &local_port_range_max,
 	};
 
-	ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
 
 	if (write && ret == 0) {
 		if (range[1] < range[0])
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 5231f7aaac0e..42f9748ae093 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -56,7 +56,7 @@ rpc_unregister_sysctl(void)
 	}
 }
 
-static int proc_do_xprt(ctl_table *table, int write, struct file *file,
+static int proc_do_xprt(ctl_table *table, int write,
 			void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char tmpbuf[256];
@@ -71,7 +71,7 @@ static int proc_do_xprt(ctl_table *table, int write, struct file *file,
 }
 
 static int
-proc_dodebug(ctl_table *table, int write, struct file *file,
+proc_dodebug(ctl_table *table, int write,
 				void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	char		tmpbuf[20], c, *s;
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 87101177825b..35fb68b9c8ec 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,7 +80,7 @@ struct kmem_cache *svc_rdma_ctxt_cachep;
  * current value.
  */
 static int read_reset_stat(ctl_table *table, int write,
-			   struct file *filp, void __user *buffer, size_t *lenp,
+			   void __user *buffer, size_t *lenp,
 			   loff_t *ppos)
 {
 	atomic_t *stat = (atomic_t *)table->data;
diff --git a/security/min_addr.c b/security/min_addr.c
index 14cc7b3b8d03..c844eed7915d 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -28,12 +28,12 @@ static void update_mmap_min_addr(void)
  * sysctl handler which just sets dac_mmap_min_addr = the new value and then
  * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
  */
-int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp,
+int mmap_min_addr_handler(struct ctl_table *table, int write,
 			  void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
-	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
+	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 
 	update_mmap_min_addr();
 
-- 
cgit v1.2.3


From 9064a6787aa1d8ceaf5ba16fe1dfedb0755dc7eb Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 23 Sep 2009 15:57:23 -0700
Subject: linux/futex.h: place kernel types behind __KERNEL__

The forward decls for some kernel types are only needed by the code behind
__KERNEL__, so don't bleed these types to userspace.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/futex.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/futex.h b/include/linux/futex.h
index 34956c8fdebf..8ec17997d94f 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -4,11 +4,6 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 
-struct inode;
-struct mm_struct;
-struct task_struct;
-union ktime;
-
 /* Second argument to futex syscall */
 
 
@@ -129,6 +124,11 @@ struct robust_list_head {
 #define FUTEX_BITSET_MATCH_ANY	0xffffffff
 
 #ifdef __KERNEL__
+struct inode;
+struct mm_struct;
+struct task_struct;
+union ktime;
+
 long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
 	      u32 __user *uaddr2, u32 val2, u32 val3);
 
-- 
cgit v1.2.3


From 858f09930b32c11b40fd0c5c467982ba09b10894 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Wed, 23 Sep 2009 15:57:32 -0700
Subject: aio: ifdef fields in mm_struct

->ioctx_lock and ->ioctx_list are used only under CONFIG_AIO.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Zach Brown <zach.brown@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  5 ++---
 kernel/fork.c            | 11 +++++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0042090a4d70..6b7029ab9c8e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -259,11 +259,10 @@ struct mm_struct {
 	unsigned long flags; /* Must use atomic bitops to access the bits */
 
 	struct core_state *core_state; /* coredumping support */
-
-	/* aio bits */
+#ifdef CONFIG_AIO
 	spinlock_t		ioctx_lock;
 	struct hlist_head	ioctx_list;
-
+#endif
 #ifdef CONFIG_MM_OWNER
 	/*
 	 * "owner" points to a task that is regarded as the canonical
diff --git a/kernel/fork.c b/kernel/fork.c
index b51fd2ccb2f1..e49f181ba1ca 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -434,6 +434,14 @@ __setup("coredump_filter=", coredump_filter_setup);
 
 #include <linux/init_task.h>
 
+static void mm_init_aio(struct mm_struct *mm)
+{
+#ifdef CONFIG_AIO
+	spin_lock_init(&mm->ioctx_lock);
+	INIT_HLIST_HEAD(&mm->ioctx_list);
+#endif
+}
+
 static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 {
 	atomic_set(&mm->mm_users, 1);
@@ -447,10 +455,9 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
 	set_mm_counter(mm, file_rss, 0);
 	set_mm_counter(mm, anon_rss, 0);
 	spin_lock_init(&mm->page_table_lock);
-	spin_lock_init(&mm->ioctx_lock);
-	INIT_HLIST_HEAD(&mm->ioctx_list);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 
 	if (likely(!mm_alloc_pgd(mm))) {
-- 
cgit v1.2.3


From 2fa4341074cd02fb39aa23410740764948755635 Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Wed, 23 Sep 2009 15:57:38 -0700
Subject: include/linux/unaligned/{l,b}e_byteshift.h: fix usage for compressed
 kernels

When unaligned accesses are required for uncompressing a kernel (such as
for LZO decompression on ARM in a patch that follows), including
<linux/kernel.h> causes issues as it brings in a lot of things that are
not available in the decompression environment.

linux/kernel.h brings at least:
extern int console_printk[];
extern const char hex_asc[];
which causes errors at link-time as they are not available when
compiling the pre-boot environement. There are also a few others:

  arch/arm/boot/compressed/misc.o: In function `valid_user_regs':
   arch/arm/include/asm/ptrace.h:158: undefined reference to `elf_hwcap'
  arch/arm/boot/compressed/misc.o: In function `console_silent':
   include/linux/kernel.h:292: undefined reference to `console_printk'
  arch/arm/boot/compressed/misc.o: In function `console_verbose':
   include/linux/kernel.h:297: undefined reference to `console_printk'
  arch/arm/boot/compressed/misc.o: In function `pack_hex_byte':
   include/linux/kernel.h:360: undefined reference to `hex_asc'
  arch/arm/boot/compressed/misc.o: In function `hweight_long':
   include/linux/bitops.h:45: undefined reference to `hweight32'
  arch/arm/boot/compressed/misc.o: In function `__cmpxchg_local_generic':
   include/asm-generic/cmpxchg-local.h:21: undefined reference to `wrong_size_cmpxchg'
   include/asm-generic/cmpxchg-local.h:42: undefined reference to `wrong_size_cmpxchg'
  arch/arm/boot/compressed/misc.o: In function `__xchg':
   arch/arm/include/asm/system.h:309: undefined reference to `__bad_xchg'

However, those files apparently use nothing from <linux/kernel.h>, all
they need is the declaration of types such as u32 or u64, so
<linux/types.h> should be enough

Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Phillip Lougher <phillip@lougher.demon.co.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/unaligned/be_byteshift.h | 2 +-
 include/linux/unaligned/le_byteshift.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h
index 46dd12c5709e..9356b24223ac 100644
--- a/include/linux/unaligned/be_byteshift.h
+++ b/include/linux/unaligned/be_byteshift.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_BE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_be16(const u8 *p)
 {
diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h
index 59777e951baf..be376fb79b64 100644
--- a/include/linux/unaligned/le_byteshift.h
+++ b/include/linux/unaligned/le_byteshift.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H
 #define _LINUX_UNALIGNED_LE_BYTESHIFT_H
 
-#include <linux/kernel.h>
+#include <linux/types.h>
 
 static inline u16 __get_unaligned_le16(const u8 *p)
 {
-- 
cgit v1.2.3


From 801460d0cf5c5288153b722565773059b0f44348 Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Wed, 23 Sep 2009 15:57:41 -0700
Subject: task_struct cleanup: move binfmt field to mm_struct

Because the binfmt is not different between threads in the same process,
it can be moved from task_struct to mm_struct.  And binfmt moudle is
handled per mm_struct instead of task_struct.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/exec.c                | 10 ++++++----
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  1 -
 kernel/exit.c            |  2 --
 kernel/fork.c            | 13 +++++++------
 5 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/exec.c b/fs/exec.c
index 6dc92c39dd94..d49be6bc1793 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1397,10 +1397,12 @@ out_ret:
 
 void set_binfmt(struct linux_binfmt *new)
 {
-	if (current->binfmt)
-		module_put(current->binfmt->module);
+	struct mm_struct *mm = current->mm;
+
+	if (mm->binfmt)
+		module_put(mm->binfmt->module);
 
-	current->binfmt = new;
+	mm->binfmt = new;
 	if (new)
 		__module_get(new->module);
 }
@@ -1770,7 +1772,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 
 	audit_core_dumps(signr);
 
-	binfmt = current->binfmt;
+	binfmt = mm->binfmt;
 	if (!binfmt || !binfmt->core_dump)
 		goto fail;
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6b7029ab9c8e..21d6aa45206a 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -240,6 +240,8 @@ struct mm_struct {
 
 	unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
+	struct linux_binfmt *binfmt;
+
 	cpumask_t cpu_vm_mask;
 
 	/* Architecture-specific MM context */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 811cd96524d7..8a16f6d11dcd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1271,7 +1271,6 @@ struct task_struct {
 	struct mm_struct *mm, *active_mm;
 
 /* task state */
-	struct linux_binfmt *binfmt;
 	int exit_state;
 	int exit_code, exit_signal;
 	int pdeath_signal;  /*  The signal sent when the parent dies  */
diff --git a/kernel/exit.c b/kernel/exit.c
index 6c75ff83a8fe..5859f598c951 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -976,8 +976,6 @@ NORET_TYPE void do_exit(long code)
 		disassociate_ctty(1);
 
 	module_put(task_thread_info(tsk)->exec_domain->module);
-	if (tsk->binfmt)
-		module_put(tsk->binfmt->module);
 
 	proc_exit_connector(tsk);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index e49f181ba1ca..266c6af6ef1b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -518,6 +518,8 @@ void mmput(struct mm_struct *mm)
 			spin_unlock(&mmlist_lock);
 		}
 		put_swap_token(mm);
+		if (mm->binfmt)
+			module_put(mm->binfmt->module);
 		mmdrop(mm);
 	}
 }
@@ -643,9 +645,14 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
 	mm->hiwater_rss = get_mm_rss(mm);
 	mm->hiwater_vm = mm->total_vm;
 
+	if (mm->binfmt && !try_module_get(mm->binfmt->module))
+		goto free_pt;
+
 	return mm;
 
 free_pt:
+	/* don't put binfmt in mmput, we haven't got module yet */
+	mm->binfmt = NULL;
 	mmput(mm);
 
 fail_nomem:
@@ -1037,9 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	if (!try_module_get(task_thread_info(p)->exec_domain->module))
 		goto bad_fork_cleanup_count;
 
-	if (p->binfmt && !try_module_get(p->binfmt->module))
-		goto bad_fork_cleanup_put_domain;
-
 	p->did_exec = 0;
 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
 	copy_flags(clone_flags, p);
@@ -1327,9 +1331,6 @@ bad_fork_cleanup_cgroup:
 #endif
 	cgroup_exit(p, cgroup_callbacks_done);
 	delayacct_tsk_free(p);
-	if (p->binfmt)
-		module_put(p->binfmt->module);
-bad_fork_cleanup_put_domain:
 	module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
 	atomic_dec(&p->cred->user->processes);
-- 
cgit v1.2.3


From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 6 Jul 2009 14:50:42 +0100
Subject: module: reduce symbol table for loaded modules (v2)

Discard all symbols not interesting for kallsyms use: absolute,
section, and in the common case (!KALLSYMS_ALL) data ones.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 10 ++++--
 kernel/module.c        | 91 +++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 94 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 1c755b2f937d..1d3ccb173fd6 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -308,9 +308,13 @@ struct module
 #endif
 
 #ifdef CONFIG_KALLSYMS
-	/* We keep the symbol and string tables for kallsyms. */
-	Elf_Sym *symtab;
-	unsigned int num_symtab;
+	/*
+	 * We keep the symbol and string tables for kallsyms.
+	 * The core_* fields below are temporary, loader-only (they
+	 * could really be discarded after module init).
+	 */
+	Elf_Sym *symtab, *core_symtab;
+	unsigned int num_symtab, core_num_syms;
 	char *strtab;
 
 	/* Section attributes */
diff --git a/kernel/module.c b/kernel/module.c
index e6bc4b28aa62..97f4d5e15535 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym,
 	return '?';
 }
 
+static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
+                           unsigned int shnum)
+{
+	const Elf_Shdr *sec;
+
+	if (src->st_shndx == SHN_UNDEF
+	    || src->st_shndx >= shnum
+	    || !src->st_name)
+		return false;
+
+	sec = sechdrs + src->st_shndx;
+	if (!(sec->sh_flags & SHF_ALLOC)
+#ifndef CONFIG_KALLSYMS_ALL
+	    || !(sec->sh_flags & SHF_EXECINSTR)
+#endif
+	    || (sec->sh_entsize & INIT_OFFSET_MASK))
+		return false;
+
+	return true;
+}
+
+static unsigned long layout_symtab(struct module *mod,
+				   Elf_Shdr *sechdrs,
+				   unsigned int symindex,
+				   const Elf_Ehdr *hdr,
+				   const char *secstrings)
+{
+	unsigned long symoffs;
+	Elf_Shdr *symsect = sechdrs + symindex;
+	const Elf_Sym *src;
+	unsigned int i, nsrc, ndst;
+
+	/* Put symbol section at end of init part of module. */
+	symsect->sh_flags |= SHF_ALLOC;
+	symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect,
+					 symindex) | INIT_OFFSET_MASK;
+	DEBUGP("\t%s\n", secstrings + symsect->sh_name);
+
+	src = (void *)hdr + symsect->sh_offset;
+	nsrc = symsect->sh_size / sizeof(*src);
+	for (ndst = i = 1; i < nsrc; ++i, ++src)
+		if (is_core_symbol(src, sechdrs, hdr->e_shnum))
+			++ndst;
+
+	/* Append room for core symbols at end of core part. */
+	symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
+	mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
+
+	return symoffs;
+}
+
 static void add_kallsyms(struct module *mod,
 			 Elf_Shdr *sechdrs,
+			 unsigned int shnum,
 			 unsigned int symindex,
 			 unsigned int strindex,
+			 unsigned long symoffs,
 			 const char *secstrings)
 {
-	unsigned int i;
+	unsigned int i, ndst;
+	const Elf_Sym *src;
+	Elf_Sym *dst;
 
 	mod->symtab = (void *)sechdrs[symindex].sh_addr;
 	mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod,
 	for (i = 0; i < mod->num_symtab; i++)
 		mod->symtab[i].st_info
 			= elf_type(&mod->symtab[i], sechdrs, secstrings, mod);
+
+	mod->core_symtab = dst = mod->module_core + symoffs;
+	src = mod->symtab;
+	*dst = *src;
+	for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) {
+		if (!is_core_symbol(src, sechdrs, shnum))
+			continue;
+		dst[ndst] = *src;
+		++ndst;
+	}
+	mod->core_num_syms = ndst;
 }
 #else
+static inline unsigned long layout_symtab(struct module *mod,
+					  Elf_Shdr *sechdrs,
+					  unsigned int symindex,
+					  const Elf_Hdr *hdr,
+					  const char *secstrings)
+{
+}
 static inline void add_kallsyms(struct module *mod,
 				Elf_Shdr *sechdrs,
+				unsigned int shnum,
 				unsigned int symindex,
 				unsigned int strindex,
+				unsigned long symoffs,
 				const char *secstrings)
 {
 }
@@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod,
 	struct module *mod;
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
+#ifdef CONFIG_KALLSYMS
+	unsigned long symoffs;
+#endif
 	mm_segment_t old_fs;
 
 	DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod,
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 #ifdef CONFIG_KALLSYMS
-	/* Keep symbol and string tables for decoding later. */
-	sechdrs[symindex].sh_flags |= SHF_ALLOC;
+	/* Keep string table for decoding later. */
 	sechdrs[strindex].sh_flags |= SHF_ALLOC;
 #endif
 
@@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod,
 	   this is done generically; there doesn't appear to be any
 	   special cases for the architectures. */
 	layout_sections(mod, hdr, sechdrs, secstrings);
+	symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings);
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
@@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod,
 	percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
 		       sechdrs[pcpuindex].sh_size);
 
-	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+	add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
+		     symoffs, secstrings);
 
 	if (!mod->taints) {
 		struct _ddebug *debug;
@@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 	/* Drop initial reference. */
 	module_put(mod);
 	trim_init_extable(mod);
+#ifdef CONFIG_KALLSYMS
+	mod->num_symtab = mod->core_num_syms;
+	mod->symtab = mod->core_symtab;
+#endif
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
 	mod->init_size = 0;
-- 
cgit v1.2.3


From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@novell.com>
Date: Mon, 6 Jul 2009 14:51:44 +0100
Subject: module: reduce string table for loaded modules (v2)

Also remove all parts of the string table (referenced by the symbol
table) that are not needed for kallsyms use (i.e. which were only
referenced by symbols discarded by the previous patch, or not
referenced at all for whatever reason).

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h |  2 +-
 kernel/module.c        | 68 +++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 1d3ccb173fd6..aca980365956 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -315,7 +315,7 @@ struct module
 	 */
 	Elf_Sym *symtab, *core_symtab;
 	unsigned int num_symtab, core_num_syms;
-	char *strtab;
+	char *strtab, *core_strtab;
 
 	/* Section attributes */
 	struct module_sect_attrs *sect_attrs;
diff --git a/kernel/module.c b/kernel/module.c
index 97f4d5e15535..39827c3d9484 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
 static unsigned long layout_symtab(struct module *mod,
 				   Elf_Shdr *sechdrs,
 				   unsigned int symindex,
+				   unsigned int strindex,
 				   const Elf_Ehdr *hdr,
-				   const char *secstrings)
+				   const char *secstrings,
+				   unsigned long *pstroffs,
+				   unsigned long *strmap)
 {
 	unsigned long symoffs;
 	Elf_Shdr *symsect = sechdrs + symindex;
+	Elf_Shdr *strsect = sechdrs + strindex;
 	const Elf_Sym *src;
+	const char *strtab;
 	unsigned int i, nsrc, ndst;
 
 	/* Put symbol section at end of init part of module. */
@@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod,
 
 	src = (void *)hdr + symsect->sh_offset;
 	nsrc = symsect->sh_size / sizeof(*src);
+	strtab = (void *)hdr + strsect->sh_offset;
 	for (ndst = i = 1; i < nsrc; ++i, ++src)
-		if (is_core_symbol(src, sechdrs, hdr->e_shnum))
+		if (is_core_symbol(src, sechdrs, hdr->e_shnum)) {
+			unsigned int j = src->st_name;
+
+			while(!__test_and_set_bit(j, strmap) && strtab[j])
+				++j;
 			++ndst;
+		}
 
 	/* Append room for core symbols at end of core part. */
 	symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1);
 	mod->core_size = symoffs + ndst * sizeof(Elf_Sym);
 
+	/* Put string table section at end of init part of module. */
+	strsect->sh_flags |= SHF_ALLOC;
+	strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect,
+					 strindex) | INIT_OFFSET_MASK;
+	DEBUGP("\t%s\n", secstrings + strsect->sh_name);
+
+	/* Append room for core symbols' strings at end of core part. */
+	*pstroffs = mod->core_size;
+	__set_bit(0, strmap);
+	mod->core_size += bitmap_weight(strmap, strsect->sh_size);
+
 	return symoffs;
 }
 
@@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod,
 			 unsigned int symindex,
 			 unsigned int strindex,
 			 unsigned long symoffs,
-			 const char *secstrings)
+			 unsigned long stroffs,
+			 const char *secstrings,
+			 unsigned long *strmap)
 {
 	unsigned int i, ndst;
 	const Elf_Sym *src;
 	Elf_Sym *dst;
+	char *s;
 
 	mod->symtab = (void *)sechdrs[symindex].sh_addr;
 	mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
@@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod,
 		if (!is_core_symbol(src, sechdrs, shnum))
 			continue;
 		dst[ndst] = *src;
+		dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name);
 		++ndst;
 	}
 	mod->core_num_syms = ndst;
+
+	mod->core_strtab = s = mod->module_core + stroffs;
+	for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i)
+		if (test_bit(i, strmap))
+			*++s = mod->strtab[i];
 }
 #else
 static inline unsigned long layout_symtab(struct module *mod,
 					  Elf_Shdr *sechdrs,
 					  unsigned int symindex,
+					  unsigned int strindex,
 					  const Elf_Hdr *hdr,
-					  const char *secstrings)
+					  const char *secstrings,
+					  unsigned long *pstroffs,
+					  unsigned long *strmap)
 {
 }
 static inline void add_kallsyms(struct module *mod,
@@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod,
 				unsigned int symindex,
 				unsigned int strindex,
 				unsigned long symoffs,
-				const char *secstrings)
+				unsigned long stroffs,
+				const char *secstrings,
+				const unsigned long *strmap)
 {
 }
 #endif /* CONFIG_KALLSYMS */
@@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod,
 	long err = 0;
 	void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
 #ifdef CONFIG_KALLSYMS
-	unsigned long symoffs;
+	unsigned long symoffs, stroffs, *strmap;
 #endif
 	mm_segment_t old_fs;
 
@@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod,
 	/* Don't keep modinfo and version sections. */
 	sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
 	sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
-#ifdef CONFIG_KALLSYMS
-	/* Keep string table for decoding later. */
-	sechdrs[strindex].sh_flags |= SHF_ALLOC;
-#endif
 
 	/* Check module struct version now, before we try to use module. */
 	if (!check_modstruct_version(sechdrs, versindex, mod)) {
@@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod,
 		goto free_hdr;
 	}
 
+	strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
+			 * sizeof(long), GFP_KERNEL);
+	if (!strmap) {
+		err = -ENOMEM;
+		goto free_mod;
+	}
+
 	if (find_module(mod->name)) {
 		err = -EEXIST;
 		goto free_mod;
@@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod,
 	   this is done generically; there doesn't appear to be any
 	   special cases for the architectures. */
 	layout_sections(mod, hdr, sechdrs, secstrings);
-	symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings);
+	symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
+				secstrings, &stroffs, strmap);
 
 	/* Do the allocs. */
 	ptr = module_alloc_update_bounds(mod->core_size);
@@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod,
 		       sechdrs[pcpuindex].sh_size);
 
 	add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
-		     symoffs, secstrings);
+		     symoffs, stroffs, secstrings, strmap);
+	kfree(strmap);
+	strmap = NULL;
 
 	if (!mod->taints) {
 		struct _ddebug *debug;
@@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod,
 		percpu_modfree(percpu);
  free_mod:
 	kfree(args);
+	kfree(strmap);
  free_hdr:
 	vfree(hdr);
 	return ERR_PTR(err);
@@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
 #ifdef CONFIG_KALLSYMS
 	mod->num_symtab = mod->core_num_syms;
 	mod->symtab = mod->core_symtab;
+	mod->strtab = mod->core_strtab;
 #endif
 	module_free(mod, mod->module_init);
 	mod->module_init = NULL;
-- 
cgit v1.2.3


From 1d7015caa082d465faeae5d6fd1be077ee6dfa87 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 25 Sep 2009 00:32:58 -0600
Subject: module: preferred way to use MODULE_AUTHOR

For the longest time now we've been using multiple MODULE_AUTHOR()
statements when a module has more than one author, but the comment here
disagrees.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Luciano Coelho <luciano.coelho@nokia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index aca980365956..482efc865acf 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -128,7 +128,10 @@ extern struct module __this_module;
  */
 #define MODULE_LICENSE(_license) MODULE_INFO(license, _license)
 
-/* Author, ideally of form NAME[, NAME]*[ and NAME] */
+/*
+ * Author(s), use "Name <email>" or just "Name", for multiple
+ * authors use multiple MODULE_AUTHOR() statements/lines.
+ */
 #define MODULE_AUTHOR(_author) MODULE_INFO(author, _author)
   
 /* What your module does. */
-- 
cgit v1.2.3


From 18a1166de994685d770425086b2bcc1ba567f7ed Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Wed, 23 Sep 2009 03:17:11 +0000
Subject: Phonet: error on broadcast sending (unimplemented)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we ever implement this, then we can stop returning an error.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h | 1 +
 net/phonet/af_phonet.c | 6 ++++++
 2 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index 1ef5a0781831..e5126cff9b2a 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
 #define PNPIPE_IFINDEX		2
 
 #define PNADDR_ANY		0
+#define PNADDR_BROADCAST	0xFC
 #define PNPORT_RESOURCE_ROUTING	0
 
 /* Values for PNPIPE_ENCAP option */
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index a662e62a99cf..f60c0c2aacba 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -168,6 +168,12 @@ static int pn_send(struct sk_buff *skb, struct net_device *dev,
 		goto drop;
 	}
 
+	/* Broadcast sending is not implemented */
+	if (pn_addr(dst) == PNADDR_BROADCAST) {
+		err = -EOPNOTSUPP;
+		goto drop;
+	}
+
 	skb_reset_transport_header(skb);
 	WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */
 	skb_push(skb, sizeof(struct phonethdr));
-- 
cgit v1.2.3


From a43912ab1925788765208da5cd664b6f8e011d08 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 23 Sep 2009 10:28:33 +0000
Subject: tunnel: eliminate recursion field

It seems recursion field from "struct ip_tunnel" is not anymore needed.
recursion prevention is done at the upper level (in dev_queue_xmit()),
since we use HARD_TX_LOCK protection for tunnels.

This avoids a cache line ping pong on "struct ip_tunnel" : This structure
should be now mostly read on xmit and receive paths.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipip.h    |  1 -
 net/ipv4/ip_gre.c     | 13 +------------
 net/ipv4/ipip.c       |  8 --------
 net/ipv6/ip6_tunnel.c |  7 -------
 net/ipv6/sit.c        |  8 --------
 5 files changed, 1 insertion(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/net/ipip.h b/include/net/ipip.h
index 5d3036fa1511..76e3ea6e2fe5 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -12,7 +12,6 @@ struct ip_tunnel
 	struct ip_tunnel	*next;
 	struct net_device	*dev;
 
-	int			recursion;	/* Depth of hard_start_xmit recursion */
 	int			err_count;	/* Number of arrived ICMP errors */
 	unsigned long		err_time;	/* Time when the last ICMP error arrived */
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index d9645c94a067..41ada9904d31 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -66,10 +66,7 @@
    solution, but it supposes maintaing new variable in ALL
    skb, even if no tunneling is used.
 
-   Current solution: t->recursion lock breaks dead loops. It looks
-   like dev->tbusy flag, but I preferred new variable, because
-   the semantics is different. One day, when hard_start_xmit
-   will be multithreaded we will have to use skb->encapsulation.
+   Current solution: HARD_TX_LOCK lock breaks dead loops.
 
 
@@ -678,11 +675,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	__be32 dst;
 	int    mtu;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (dev->type == ARPHRD_ETHER)
 		IPCB(skb)->flags = 0;
 
@@ -820,7 +812,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -888,7 +879,6 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -897,7 +887,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 62548cb0923c..08ccd344de7a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -402,11 +402,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	__be32 dst = tiph->daddr;
 	int    mtu;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (skb->protocol != htons(ETH_P_IP))
 		goto tx_error;
 
@@ -485,7 +480,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -523,7 +517,6 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -531,7 +524,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 7d25bbe32110..c595bbe1ed99 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1043,11 +1043,6 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_device_stats *stats = &t->dev->stats;
 	int ret;
 
-	if (t->recursion++) {
-		stats->collisions++;
-		goto tx_err;
-	}
-
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		ret = ip4ip6_tnl_xmit(skb, dev);
@@ -1062,14 +1057,12 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (ret < 0)
 		goto tx_err;
 
-	t->recursion--;
 	return NETDEV_TX_OK;
 
 tx_err:
 	stats->tx_errors++;
 	stats->tx_dropped++;
 	kfree_skb(skb);
-	t->recursion--;
 	return NETDEV_TX_OK;
 }
 
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0ae4f6448187..fcb539628847 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -626,11 +626,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	struct in6_addr *addr6;
 	int addr_type;
 
-	if (tunnel->recursion++) {
-		stats->collisions++;
-		goto tx_error;
-	}
-
 	if (skb->protocol != htons(ETH_P_IPV6))
 		goto tx_error;
 
@@ -753,7 +748,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 			ip_rt_put(rt);
 			stats->tx_dropped++;
 			dev_kfree_skb(skb);
-			tunnel->recursion--;
 			return NETDEV_TX_OK;
 		}
 		if (skb->sk)
@@ -794,7 +788,6 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
 	nf_reset(skb);
 
 	IPTUNNEL_XMIT();
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 
 tx_error_icmp:
@@ -802,7 +795,6 @@ tx_error_icmp:
 tx_error:
 	stats->tx_errors++;
 	dev_kfree_skb(skb);
-	tunnel->recursion--;
 	return NETDEV_TX_OK;
 }
 
-- 
cgit v1.2.3


From b8273570f802a7658827dcb077b0b517ba75a289 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 24 Sep 2009 15:44:05 -0700
Subject: genetlink: fix netns vs. netlink table locking (2)

Similar to commit d136f1bd366fdb7e747ca7e0218171e7a00a98a5,
there's a bug when unregistering a generic netlink family,
which is caught by the might_sleep() added in that commit:

    BUG: sleeping function called from invalid context at net/netlink/af_netlink.c:183
    in_atomic(): 1, irqs_disabled(): 0, pid: 1510, name: rmmod
    2 locks held by rmmod/1510:
     #0:  (genl_mutex){+.+.+.}, at: [<ffffffff8138283b>] genl_unregister_family+0x2b/0x130
     #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff8138270c>] __genl_unregister_mc_group+0x1c/0x120
    Pid: 1510, comm: rmmod Not tainted 2.6.31-wl #444
    Call Trace:
     [<ffffffff81044ff9>] __might_sleep+0x119/0x150
     [<ffffffff81380501>] netlink_table_grab+0x21/0x100
     [<ffffffff813813a3>] netlink_clear_multicast_users+0x23/0x60
     [<ffffffff81382761>] __genl_unregister_mc_group+0x71/0x120
     [<ffffffff81382866>] genl_unregister_family+0x56/0x130
     [<ffffffffa0007d85>] nl80211_exit+0x15/0x20 [cfg80211]
     [<ffffffffa000005a>] cfg80211_exit+0x1a/0x40 [cfg80211]

Fix in the same way by grabbing the netlink table lock
before doing rcu_read_lock().

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  1 +
 net/netlink/af_netlink.c | 19 +++++++++++--------
 net/netlink/genetlink.c  |  4 +++-
 3 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 080f6ba9e73a..ab5d3126831f 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -187,6 +187,7 @@ extern struct sock *netlink_kernel_create(struct net *net,
 extern void netlink_kernel_release(struct sock *sk);
 extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups);
 extern int netlink_change_ngroups(struct sock *sk, unsigned int groups);
+extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group);
 extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
 extern int netlink_has_listeners(struct sock *sk, unsigned int group);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 55180b99562a..a4bafbf15097 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1609,6 +1609,16 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
 	return err;
 }
 
+void __netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
+{
+	struct sock *sk;
+	struct hlist_node *node;
+	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
+
+	sk_for_each_bound(sk, node, &tbl->mc_list)
+		netlink_update_socket_mc(nlk_sk(sk), group, 0);
+}
+
 /**
  * netlink_clear_multicast_users - kick off multicast listeners
  *
@@ -1619,15 +1629,8 @@ int netlink_change_ngroups(struct sock *sk, unsigned int groups)
  */
 void netlink_clear_multicast_users(struct sock *ksk, unsigned int group)
 {
-	struct sock *sk;
-	struct hlist_node *node;
-	struct netlink_table *tbl = &nl_table[ksk->sk_protocol];
-
 	netlink_table_grab();
-
-	sk_for_each_bound(sk, node, &tbl->mc_list)
-		netlink_update_socket_mc(nlk_sk(sk), group, 0);
-
+	__netlink_clear_multicast_users(ksk, group);
 	netlink_table_ungrab();
 }
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 566941e03363..44ff3f3810fa 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -220,10 +220,12 @@ static void __genl_unregister_mc_group(struct genl_family *family,
 	struct net *net;
 	BUG_ON(grp->family != family);
 
+	netlink_table_grab();
 	rcu_read_lock();
 	for_each_net_rcu(net)
-		netlink_clear_multicast_users(net->genl_sock, grp->id);
+		__netlink_clear_multicast_users(net->genl_sock, grp->id);
 	rcu_read_unlock();
+	netlink_table_ungrab();
 
 	clear_bit(grp->id, mc_groups);
 	list_del(&grp->list);
-- 
cgit v1.2.3


From e9ea0e2d1d00959b451dfc239df126d3c6a22043 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 24 Sep 2009 14:47:45 -0700
Subject: hugetlb_file_setup(): use C, not cpp

Why macros are always wrong:

  mm/mmap.c: In function 'do_mmap_pgoff':
  mm/mmap.c:953: warning: unused variable 'user'

also, move a couple of struct forward-decls outside `#ifdef
CONFIG_HUGETLB_PAGE' - it's pointless and frequently harmful to make these
conditional (eg, this patch needed `struct user_struct').

Cc: Lee Schermerhorn <lee.schermerhorn@hp.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Cc: Eric B Munson <ebmunson@us.ibm.com>
Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 11ab19ac6b3d..16937995abd4 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -3,15 +3,15 @@
 
 #include <linux/fs.h>
 
+struct ctl_table;
+struct user_struct;
+
 #ifdef CONFIG_HUGETLB_PAGE
 
 #include <linux/mempolicy.h>
 #include <linux/shm.h>
 #include <asm/tlbflush.h>
 
-struct ctl_table;
-struct user_struct;
-
 int PageHuge(struct page *page);
 
 static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
@@ -187,7 +187,11 @@ static inline void set_file_hugepages(struct file *file)
 
 #define is_file_hugepages(file)			0
 #define set_file_hugepages(file)		BUG()
-#define hugetlb_file_setup(name,size,acct,user,creat)	ERR_PTR(-ENOSYS)
+static inline struct file *hugetlb_file_setup(const char *name, size_t size,
+		int acctflag, struct user_struct **user, int creat_flags)
+{
+	return ERR_PTR(-ENOSYS);
+}
 
 #endif /* !CONFIG_HUGETLBFS */
 
-- 
cgit v1.2.3


From 1b2086227cd1a24f748398c22ea9652c383499cf Mon Sep 17 00:00:00 2001
From: Tim Abbott <tabbott@ksplice.com>
Date: Thu, 24 Sep 2009 10:36:16 -0400
Subject: Optimize the ordering of sections in RW_DATA_SECTION.

The old RW_DATA_SECTION had INIT_TASK_DATA (which was
more-than-PAGE_SIZE-aligned), followed by a bunch of small alignment
stuff, followed by more PAGE_SIZE-aligned stuff, so you wasted memory
in the middle of .data re-aligning back up to PAGE_SIZE.

This patch sorts the sections by alignment requirements, which should
pack them essentially optimally.

Signed-off-by: Tim Abbott <tabbott@ksplice.com>
Reviewed-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/vmlinux.lds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 29ca8f53ffbe..b6e818f4b247 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -721,12 +721,12 @@
 	. = ALIGN(PAGE_SIZE);						\
 	.data : AT(ADDR(.data) - LOAD_OFFSET) {				\
 		INIT_TASK_DATA(inittask)				\
+		NOSAVE_DATA						\
+		PAGE_ALIGNED_DATA(pagealigned)				\
 		CACHELINE_ALIGNED_DATA(cacheline)			\
 		READ_MOSTLY_DATA(cacheline)				\
 		DATA_DATA						\
 		CONSTRUCTORS						\
-		NOSAVE_DATA						\
-		PAGE_ALIGNED_DATA(pagealigned)				\
 	}
 
 #define INIT_TEXT_SECTION(inittext_align)				\
-- 
cgit v1.2.3


From 934831d060ccd5471ecbc562804a8d3ccd6e562c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 24 Sep 2009 12:33:32 +0100
Subject: NOMMU: Fallback for is_vmalloc_or_module_addr() should be inline

The NOMMU fallback for is_vmalloc_or_module_addr() should be static inline,
not just static, in linux/mm.h.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index df08551cb0ad..24c395694f4d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -288,7 +288,7 @@ static inline int is_vmalloc_addr(const void *x)
 #ifdef CONFIG_MMU
 extern int is_vmalloc_or_module_addr(const void *x);
 #else
-static int is_vmalloc_or_module_addr(const void *x)
+static inline int is_vmalloc_or_module_addr(const void *x)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From d50ba256b5f1478e15accfcfda9b72fd7a661364 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Wed, 23 Sep 2009 14:44:08 +1000
Subject: drm/kms: start adding command line interface using fb.

[note this requires an fb patch posted to linux-fbdev-devel already]

This uses the normal video= command line option to control the kms
output setup at boot time. It is used to override the autodetection
done by kms.

video= normally takes a framebuffer as the first parameter, in kms
it will take a connector name, DVI-I-1, or LVDS-1 etc. If no output
connector is specified the mode string will apply to all connectors.

The mode specification used will match down the probed modes, and if
no mode is found it will add a CVT mode that matches.

video=1024x768 - all connectors match a 1024x768 mode or add a CVT on
video=VGA-1:1024x768, VGA-1 connector gets mode only.

The same strings as used in current fb modedb.c are used, except I've
added three more letters, e, D, d, e = enable, D = enable Digital,
d = disable, which allow a connector to be forced into a certain state.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c                 |   1 +
 drivers/gpu/drm/drm_crtc_helper.c          |  79 +++++++++-
 drivers/gpu/drm/drm_edid.c                 |   6 +-
 drivers/gpu/drm/drm_fb_helper.c            | 224 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/drm_modes.c                |   3 +-
 drivers/gpu/drm/i915/intel_fb.c            |   5 +-
 drivers/gpu/drm/radeon/radeon_connectors.c |  25 +++-
 drivers/gpu/drm/radeon/radeon_fb.c         |  26 +++-
 include/drm/drm_crtc.h                     |  14 +-
 include/drm/drm_fb_helper.h                |  24 ++++
 10 files changed, 386 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index ba728ad77f2a..8e7b0ebece0c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -482,6 +482,7 @@ void drm_connector_cleanup(struct drm_connector *connector)
 	list_for_each_entry_safe(mode, t, &connector->user_modes, head)
 		drm_mode_remove(connector, mode);
 
+	kfree(connector->fb_helper_private);
 	mutex_lock(&dev->mode_config.mutex);
 	drm_mode_object_put(dev, &connector->base);
 	list_del(&connector->head);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index fe8697447f32..82fd6e82450f 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -32,6 +32,7 @@
 #include "drmP.h"
 #include "drm_crtc.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 
 static void drm_mode_validate_flag(struct drm_connector *connector,
 				   int flags)
@@ -90,7 +91,15 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	list_for_each_entry_safe(mode, t, &connector->modes, head)
 		mode->status = MODE_UNVERIFIED;
 
-	connector->status = connector->funcs->detect(connector);
+	if (connector->force) {
+		if (connector->force == DRM_FORCE_ON)
+			connector->status = connector_status_connected;
+		else
+			connector->status = connector_status_disconnected;
+		if (connector->funcs->force)
+			connector->funcs->force(connector);
+	} else
+		connector->status = connector->funcs->detect(connector);
 
 	if (connector->status == connector_status_disconnected) {
 		DRM_DEBUG_KMS("%s is disconnected\n",
@@ -267,6 +276,56 @@ static struct drm_display_mode *drm_has_preferred_mode(struct drm_connector *con
 	return NULL;
 }
 
+static bool drm_has_cmdline_mode(struct drm_connector *connector)
+{
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+	return cmdline_mode->specified;
+}
+
+static struct drm_display_mode *drm_pick_cmdline_mode(struct drm_connector *connector, int width, int height)
+{
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+	struct drm_display_mode *mode = NULL;
+
+	if (cmdline_mode->specified == false)
+		return mode;
+
+	/* attempt to find a matching mode in the list of modes
+	 *  we have gotten so far, if not add a CVT mode that conforms
+	 */
+	if (cmdline_mode->rb || cmdline_mode->margins)
+		goto create_mode;
+
+	list_for_each_entry(mode, &connector->modes, head) {
+		/* check width/height */
+		if (mode->hdisplay != cmdline_mode->xres ||
+		    mode->vdisplay != cmdline_mode->yres)
+			continue;
+
+		if (cmdline_mode->refresh_specified) {
+			if (mode->vrefresh != cmdline_mode->refresh)
+				continue;
+		}
+
+		if (cmdline_mode->interlace) {
+			if (!(mode->flags & DRM_MODE_FLAG_INTERLACE))
+				continue;
+		}
+		return mode;
+	}
+
+create_mode:
+	mode = drm_cvt_mode(connector->dev, cmdline_mode->xres,
+			    cmdline_mode->yres,
+			    cmdline_mode->refresh_specified ? cmdline_mode->refresh : 60,
+			    cmdline_mode->rb, cmdline_mode->interlace,
+			    cmdline_mode->margins);
+	list_add(&mode->head, &connector->modes);
+	return mode;
+}
+
 static bool drm_connector_enabled(struct drm_connector *connector, bool strict)
 {
 	bool enable;
@@ -317,10 +376,16 @@ static bool drm_target_preferred(struct drm_device *dev,
 			continue;
 		}
 
-		DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
-			  connector->base.id);
+		DRM_DEBUG_KMS("looking for cmdline mode on connector %d\n",
+			      connector->base.id);
 
-		modes[i] = drm_has_preferred_mode(connector, width, height);
+		/* got for command line mode first */
+		modes[i] = drm_pick_cmdline_mode(connector, width, height);
+		if (!modes[i]) {
+			DRM_DEBUG_KMS("looking for preferred mode on connector %d\n",
+				      connector->base.id);
+			modes[i] = drm_has_preferred_mode(connector, width, height);
+		}
 		/* No preferred modes, pick one off the list */
 		if (!modes[i] && !list_empty(&connector->modes)) {
 			list_for_each_entry(modes[i], &connector->modes, head)
@@ -369,6 +434,8 @@ static int drm_pick_crtcs(struct drm_device *dev,
 	my_score = 1;
 	if (connector->status == connector_status_connected)
 		my_score++;
+	if (drm_has_cmdline_mode(connector))
+		my_score++;
 	if (drm_has_preferred_mode(connector, width, height))
 		my_score++;
 
@@ -943,6 +1010,8 @@ bool drm_helper_initial_config(struct drm_device *dev)
 {
 	int count = 0;
 
+	drm_fb_helper_parse_command_line(dev);
+
 	count = drm_helper_probe_connector_modes(dev,
 						 dev->mode_config.max_width,
 						 dev->mode_config.max_height);
@@ -950,7 +1019,7 @@ bool drm_helper_initial_config(struct drm_device *dev)
 	/*
 	 * we shouldn't end up with no modes here.
 	 */
-	WARN(!count, "Connected connector with 0 modes\n");
+	WARN(!count, "No connectors reported connected with modes\n");
 
 	drm_setup_crtcs(dev);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 9888c2076b2e..3c0d2b3aed76 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -560,7 +560,8 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 		vsize = (hsize * 9) / 16;
 	/* HDTV hack */
 	if (hsize == 1360 && vsize == 765 && vrefresh_rate == 60) {
-		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
+		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0,
+				    false);
 		mode->hdisplay = 1366;
 		mode->vsync_start = mode->vsync_start - 1;
 		mode->vsync_end = mode->vsync_end - 1;
@@ -579,7 +580,8 @@ struct drm_display_mode *drm_mode_std(struct drm_device *dev,
 		mode = drm_gtf_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
 		break;
 	case LEVEL_CVT:
-		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0);
+		mode = drm_cvt_mode(dev, hsize, vsize, vrefresh_rate, 0, 0,
+				    false);
 		break;
 	}
 	return mode;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 2c4671314884..2537d2e81849 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -40,6 +40,196 @@ MODULE_LICENSE("GPL and additional rights");
 
 static LIST_HEAD(kernel_fb_helper_list);
 
+int drm_fb_helper_add_connector(struct drm_connector *connector)
+{
+	connector->fb_helper_private = kzalloc(sizeof(struct drm_fb_helper_connector), GFP_KERNEL);
+	if (!connector->fb_helper_private)
+		return -ENOMEM;
+
+	return 0;
+
+}
+EXPORT_SYMBOL(drm_fb_helper_add_connector);
+
+static int my_atoi(const char *name)
+{
+	int val = 0;
+
+	for (;; name++) {
+		switch (*name) {
+		case '0' ... '9':
+			val = 10*val+(*name-'0');
+			break;
+		default:
+			return val;
+		}
+	}
+}
+
+/**
+ * drm_fb_helper_connector_parse_command_line - parse command line for connector
+ * @connector - connector to parse line for
+ * @mode_option - per connector mode option
+ *
+ * This parses the connector specific then generic command lines for
+ * modes and options to configure the connector.
+ *
+ * This uses the same parameters as the fb modedb.c, except for extra
+ *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+ *
+ * enable/enable Digital/disable bit at the end
+ */
+static bool drm_fb_helper_connector_parse_command_line(struct drm_connector *connector,
+						       const char *mode_option)
+{
+	const char *name;
+	unsigned int namelen;
+	int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
+	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
+	int yres_specified = 0, cvt = 0, rb = 0, interlace = 0, margins = 0;
+	int i;
+	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
+	struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+	struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+
+	if (!mode_option)
+		mode_option = fb_mode_option;
+
+	if (!mode_option) {
+		cmdline_mode->specified = false;
+		return false;
+	}
+
+	name = mode_option;
+	namelen = strlen(name);
+	for (i = namelen-1; i >= 0; i--) {
+		switch (name[i]) {
+		case '@':
+			namelen = i;
+			if (!refresh_specified && !bpp_specified &&
+			    !yres_specified) {
+				refresh = my_atoi(&name[i+1]);
+				refresh_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case '-':
+			namelen = i;
+			if (!bpp_specified && !yres_specified) {
+				bpp = my_atoi(&name[i+1]);
+				bpp_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case 'x':
+			if (!yres_specified) {
+				yres = my_atoi(&name[i+1]);
+				yres_specified = 1;
+			} else
+				goto done;
+		case '0' ... '9':
+			break;
+		case 'M':
+			if (!yres_specified)
+				cvt = 1;
+			break;
+		case 'R':
+			if (!cvt)
+				rb = 1;
+			break;
+		case 'm':
+			if (!cvt)
+				margins = 1;
+			break;
+		case 'i':
+			if (!cvt)
+				interlace = 1;
+			break;
+		case 'e':
+			force = DRM_FORCE_ON;
+			break;
+		case 'D':
+			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) ||
+			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
+				force = DRM_FORCE_ON;
+			else
+				force = DRM_FORCE_ON_DIGITAL;
+			break;
+		case 'd':
+			force = DRM_FORCE_OFF;
+			break;
+		default:
+			goto done;
+		}
+	}
+	if (i < 0 && yres_specified) {
+		xres = my_atoi(name);
+		res_specified = 1;
+	}
+done:
+
+	DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
+		drm_get_connector_name(connector), xres, yres,
+		(refresh) ? refresh : 60, (rb) ? " reduced blanking" :
+		"", (margins) ? " with margins" : "", (interlace) ?
+		" interlaced" : "");
+
+	if (force) {
+		const char *s;
+		switch (force) {
+		case DRM_FORCE_OFF: s = "OFF"; break;
+		case DRM_FORCE_ON_DIGITAL: s = "ON - dig"; break;
+		default:
+		case DRM_FORCE_ON: s = "ON"; break;
+		}
+
+		DRM_INFO("forcing %s connector %s\n",
+			 drm_get_connector_name(connector), s);
+		connector->force = force;
+	}
+
+	if (res_specified) {
+		cmdline_mode->specified = true;
+		cmdline_mode->xres = xres;
+		cmdline_mode->yres = yres;
+	}
+
+	if (refresh_specified) {
+		cmdline_mode->refresh_specified = true;
+		cmdline_mode->refresh = refresh;
+	}
+
+	if (bpp_specified) {
+		cmdline_mode->bpp_specified = true;
+		cmdline_mode->bpp = bpp;
+	}
+	cmdline_mode->rb = rb ? true : false;
+	cmdline_mode->cvt = cvt  ? true : false;
+	cmdline_mode->interlace = interlace ? true : false;
+
+	return true;
+}
+
+int drm_fb_helper_parse_command_line(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		char *option = NULL;
+
+		/* do something on return - turn off connector maybe */
+		if (fb_get_options(drm_get_connector_name(connector), &option))
+			continue;
+
+		drm_fb_helper_connector_parse_command_line(connector, option);
+	}
+	return 0;
+}
+
 bool drm_fb_helper_force_kernel_mode(void)
 {
 	int i = 0;
@@ -484,6 +674,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 						   uint32_t fb_height,
 						   uint32_t surface_width,
 						   uint32_t surface_height,
+						   uint32_t surface_depth,
+						   uint32_t surface_bpp,
 						   struct drm_framebuffer **fb_ptr))
 {
 	struct drm_crtc *crtc;
@@ -497,8 +689,37 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 	struct drm_framebuffer *fb;
 	struct drm_mode_set *modeset = NULL;
 	struct drm_fb_helper *fb_helper;
+	uint32_t surface_depth = 24, surface_bpp = 32;
 
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct drm_fb_helper_connector *fb_help_conn = connector->fb_helper_private;
+		struct drm_fb_helper_cmdline_mode *cmdline_mode = &fb_help_conn->cmdline_mode;
+
+		if (cmdline_mode->bpp_specified) {
+			switch (cmdline_mode->bpp) {
+			case 8:
+				surface_depth = surface_bpp = 8;
+				break;
+			case 15:
+				surface_depth = 15;
+				surface_bpp = 16;
+				break;
+			case 16:
+				surface_depth = surface_bpp = 16;
+				break;
+			case 24:
+				surface_depth = surface_bpp = 24;
+				break;
+			case 32:
+				surface_depth = 24;
+				surface_bpp = 32;
+				break;
+			}
+			break;
+		}
+	}
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (drm_helper_crtc_in_use(crtc)) {
 			if (crtc->desired_mode) {
@@ -527,7 +748,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 	/* do we have an fb already? */
 	if (list_empty(&dev->mode_config.fb_kernel_list)) {
 		ret = (*fb_create)(dev, fb_width, fb_height, surface_width,
-				   surface_height, &fb);
+				   surface_height, surface_depth, surface_bpp,
+				   &fb);
 		if (ret)
 			return -EINVAL;
 		new_fb = 1;
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 49404ce1666e..51f677215f1d 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -88,7 +88,7 @@ EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 #define HV_FACTOR			1000
 struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 				      int vdisplay, int vrefresh,
-				      bool reduced, bool interlaced)
+				      bool reduced, bool interlaced, bool margins)
 {
 	/* 1) top/bottom margin size (% of height) - default: 1.8, */
 #define	CVT_MARGIN_PERCENTAGE		18
@@ -101,7 +101,6 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 	/* Pixel Clock step (kHz) */
 #define CVT_CLOCK_STEP			250
 	struct drm_display_mode *drm_mode;
-	bool margins = false;
 	unsigned int vfieldrate, hperiod;
 	int hdisplay_rnd, hmargin, vdisplay_rnd, vmargin, vsync;
 	int interlace;
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index 7ba4a232a97f..e85d7e9eed7d 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -110,6 +110,7 @@ EXPORT_SYMBOL(intelfb_resize);
 static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 			  uint32_t fb_height, uint32_t surface_width,
 			  uint32_t surface_height,
+			  uint32_t surface_depth, uint32_t surface_bpp,
 			  struct drm_framebuffer **fb_p)
 {
 	struct fb_info *info;
@@ -125,9 +126,9 @@ static int intelfb_create(struct drm_device *dev, uint32_t fb_width,
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
 
-	mode_cmd.bpp = 32;
+	mode_cmd.bpp = surface_bpp;
 	mode_cmd.pitch = ALIGN(mode_cmd.width * ((mode_cmd.bpp + 1) / 8), 64);
-	mode_cmd.depth = 24;
+	mode_cmd.depth = surface_depth;
 
 	size = mode_cmd.pitch * mode_cmd.height;
 	size = ALIGN(size, PAGE_SIZE);
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index f8cb8b4e2b17..db155d5e60ce 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -26,6 +26,7 @@
 #include "drmP.h"
 #include "drm_edid.h"
 #include "drm_crtc_helper.h"
+#include "drm_fb_helper.h"
 #include "radeon_drm.h"
 #include "radeon.h"
 #include "atom.h"
@@ -245,7 +246,7 @@ static void radeon_add_common_modes(struct drm_encoder *encoder, struct drm_conn
 		if (common_modes[i].w < 320 || common_modes[i].h < 200)
 			continue;
 
-		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false);
+		mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false);
 		drm_mode_probed_add(connector, mode);
 	}
 }
@@ -559,7 +560,7 @@ static int radeon_tv_get_modes(struct drm_connector *connector)
 		radeon_add_common_modes(encoder, connector);
 	else {
 		/* only 800x600 is supported right now on pre-avivo chips */
-		tv_mode = drm_cvt_mode(dev, 800, 600, 60, false, false);
+		tv_mode = drm_cvt_mode(dev, 800, 600, 60, false, false, false);
 		tv_mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
 		drm_mode_probed_add(connector, tv_mode);
 	}
@@ -743,6 +744,15 @@ struct drm_encoder *radeon_dvi_encoder(struct drm_connector *connector)
 	return NULL;
 }
 
+static void radeon_dvi_force(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	if (connector->force == DRM_FORCE_ON)
+		radeon_connector->use_digital = false;
+	if (connector->force == DRM_FORCE_ON_DIGITAL)
+		radeon_connector->use_digital = true;
+}
+
 struct drm_connector_helper_funcs radeon_dvi_connector_helper_funcs = {
 	.get_modes = radeon_dvi_get_modes,
 	.mode_valid = radeon_vga_mode_valid,
@@ -755,6 +765,7 @@ struct drm_connector_funcs radeon_dvi_connector_funcs = {
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = radeon_connector_set_property,
 	.destroy = radeon_connector_destroy,
+	.force = radeon_dvi_force,
 };
 
 void
@@ -771,6 +782,7 @@ radeon_add_atom_connector(struct drm_device *dev,
 	struct radeon_connector *radeon_connector;
 	struct radeon_connector_atom_dig *radeon_dig_connector;
 	uint32_t subpixel_order = SubPixelNone;
+	int ret;
 
 	/* fixme - tv/cv/din */
 	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
@@ -914,6 +926,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	}
 
+	ret = drm_fb_helper_add_connector(connector);
+	if (ret)
+		goto failed;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
@@ -936,6 +952,7 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
 	uint32_t subpixel_order = SubPixelNone;
+	int ret;
 
 	/* fixme - tv/cv/din */
 	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
@@ -1027,6 +1044,10 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	}
 
+	ret = drm_fb_helper_add_connector(connector);
+	if (ret)
+		goto failed;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 944e4fa78db5..1ba704eedefb 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -128,6 +128,7 @@ static struct drm_fb_helper_funcs radeon_fb_helper_funcs = {
 int radeonfb_create(struct drm_device *dev,
 		    uint32_t fb_width, uint32_t fb_height,
 		    uint32_t surface_width, uint32_t surface_height,
+		    uint32_t surface_depth, uint32_t surface_bpp,
 		    struct drm_framebuffer **fb_p)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -148,10 +149,10 @@ int radeonfb_create(struct drm_device *dev,
 
 	mode_cmd.width = surface_width;
 	mode_cmd.height = surface_height;
-	mode_cmd.bpp = 32;
+	mode_cmd.bpp = surface_bpp;
 	/* need to align pitch with crtc limits */
 	mode_cmd.pitch = radeon_align_pitch(rdev, mode_cmd.width, mode_cmd.bpp, fb_tiled) * ((mode_cmd.bpp + 1) / 8);
-	mode_cmd.depth = 24;
+	mode_cmd.depth = surface_depth;
 
 	size = mode_cmd.pitch * mode_cmd.height;
 	aligned_size = ALIGN(size, PAGE_SIZE);
@@ -290,13 +291,26 @@ out:
 	return ret;
 }
 
+static char *mode_option;
+int radeon_parse_options(char *options)
+{
+	char *this_opt;
+
+	if (!options || !*options)
+		return 0;
+
+	while ((this_opt = strsep(&options, ",")) != NULL) {
+		if (!*this_opt)
+			continue;
+		mode_option = this_opt;
+	}
+	return 0;
+}
+
 int radeonfb_probe(struct drm_device *dev)
 {
-	int ret;
-	ret = drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
-	return ret;
+	return drm_fb_helper_single_fb_probe(dev, &radeonfb_create);
 }
-EXPORT_SYMBOL(radeonfb_probe);
 
 int radeonfb_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 {
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index ae1e9e166959..b69347b8904f 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -387,6 +387,7 @@ struct drm_crtc {
  * @get_modes: get mode list for this connector
  * @set_property: property for this connector may need update
  * @destroy: make object go away
+ * @force: notify the driver the connector is forced on
  *
  * Each CRTC may have one or more connectors attached to it.  The functions
  * below allow the core DRM code to control connectors, enumerate available modes,
@@ -401,6 +402,7 @@ struct drm_connector_funcs {
 	int (*set_property)(struct drm_connector *connector, struct drm_property *property,
 			     uint64_t val);
 	void (*destroy)(struct drm_connector *connector);
+	void (*force)(struct drm_connector *connector);
 };
 
 struct drm_encoder_funcs {
@@ -429,6 +431,13 @@ struct drm_encoder {
 	void *helper_private;
 };
 
+enum drm_connector_force {
+	DRM_FORCE_UNSPECIFIED,
+	DRM_FORCE_OFF,
+	DRM_FORCE_ON,         /* force on analog part normally */
+	DRM_FORCE_ON_DIGITAL, /* for DVI-I use digital connector */
+};
+
 /**
  * drm_connector - central DRM connector control structure
  * @crtc: CRTC this connector is currently connected to, NULL if none
@@ -478,9 +487,12 @@ struct drm_connector {
 
 	void *helper_private;
 
+	/* forced on connector */
+	enum drm_connector_force force;
 	uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
 	uint32_t force_encoder_id;
 	struct drm_encoder *encoder; /* currently active encoder */
+	void *fb_helper_private;
 };
 
 /**
@@ -746,7 +758,7 @@ extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern struct drm_display_mode *drm_cvt_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
-				bool reduced, bool interlaced);
+				bool reduced, bool interlaced, bool margins);
 extern struct drm_display_mode *drm_gtf_mode(struct drm_device *dev,
 				int hdisplay, int vdisplay, int vrefresh,
 				bool interlaced, int margins);
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 88fffbdfa26f..4aa5740ce59f 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -35,11 +35,30 @@ struct drm_fb_helper_crtc {
 	struct drm_mode_set mode_set;
 };
 
+
 struct drm_fb_helper_funcs {
 	void (*gamma_set)(struct drm_crtc *crtc, u16 red, u16 green,
 			  u16 blue, int regno);
 };
 
+/* mode specified on the command line */
+struct drm_fb_helper_cmdline_mode {
+	bool specified;
+	bool refresh_specified;
+	bool bpp_specified;
+	int xres, yres;
+	int bpp;
+	int refresh;
+	bool rb;
+	bool interlace;
+	bool cvt;
+	bool margins;
+};
+
+struct drm_fb_helper_connector {
+	struct drm_fb_helper_cmdline_mode cmdline_mode;
+};
+
 struct drm_fb_helper {
 	struct drm_framebuffer *fb;
 	struct drm_device *dev;
@@ -57,6 +76,8 @@ int drm_fb_helper_single_fb_probe(struct drm_device *dev,
 						   uint32_t fb_height,
 						   uint32_t surface_width,
 						   uint32_t surface_height,
+						   uint32_t surface_depth,
+						   uint32_t surface_bpp,
 						   struct drm_framebuffer **fb_ptr));
 int drm_fb_helper_init_crtc_count(struct drm_fb_helper *helper, int crtc_count,
 				  int max_conn);
@@ -79,4 +100,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_framebuffer *fb,
 			    uint32_t fb_width, uint32_t fb_height);
 void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch);
 
+int drm_fb_helper_add_connector(struct drm_connector *connector);
+int drm_fb_helper_parse_command_line(struct drm_device *dev);
+
 #endif
-- 
cgit v1.2.3


From caaa6efb3d82d0102db9e7094ca5773c46e6780c Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:29:10 +0000
Subject: ACPI: save device_type in acpi_device

Most uses of the ACPI bus device_type (ACPI_BUS_TYPE_DEVICE,
ACPI_BUS_TYPE_POWER, etc) are during device initialization, but
we do need it later for notify handler installation, since that
is different for fixed hardware devices vs. namespace devices.

This patch saves the device_type in the acpi_device structure,
so we can check that rather than comparing against the _HID string.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 1 +
 include/acpi/acpi_bus.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index ab5a26469707..c73681b7e69e 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1240,6 +1240,7 @@ acpi_add_single_object(struct acpi_device **child,
 		return -ENOMEM;
 	}
 
+	device->device_type = type;
 	device->handle = handle;
 	device->parent = parent;
 	device->bus_ops = *ops; /* workround for not call .start */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 1cef1398e358..8456e8cbf9fd 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -262,7 +262,8 @@ struct acpi_device_wakeup {
 /* Device */
 
 struct acpi_device {
-	acpi_handle handle;
+	int device_type;
+	acpi_handle handle;		/* no handle for fixed hardware */
 	struct acpi_device *parent;
 	struct list_head children;
 	struct list_head node;
-- 
cgit v1.2.3


From 859ac9a4be0c753cece0e30a2e4a65fd2cdcaeee Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:29:50 +0000
Subject: ACPI: identify device tree root by null parent pointer, not
 ACPI_BUS_TYPE

We can identify the root of the ACPI device tree by the fact that it
has no parent.  This is simpler than passing around ACPI_BUS_TYPE_SYSTEM
and will help remove special treatment of the device tree root.

Currently, we add the root by hand with ACPI_BUS_TYPE_SYSTEM.  If we
traverse the tree treating the root as just another device and use
acpi_get_type(), the root shows up as ACPI_TYPE_DEVICE.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 20 +++++++++++++-------
 include/acpi/acpi_bus.h |  1 -
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 27d2dec55c6c..0b5aaf059c9b 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -22,6 +22,8 @@ extern struct acpi_device *acpi_root;
 #define ACPI_BUS_HID			"LNXSYBUS"
 #define ACPI_BUS_DEVICE_NAME		"System Bus"
 
+#define ACPI_IS_ROOT_DEVICE(device)    (!(device)->parent)
+
 static LIST_HEAD(acpi_device_list);
 static LIST_HEAD(acpi_bus_id_list);
 DEFINE_MUTEX(acpi_device_lock);
@@ -955,10 +957,12 @@ static void acpi_device_get_busid(struct acpi_device *device)
 	 * The device's Bus ID is simply the object name.
 	 * TBD: Shouldn't this value be unique (within the ACPI namespace)?
 	 */
-	switch (device->device_type) {
-	case ACPI_BUS_TYPE_SYSTEM:
+	if (ACPI_IS_ROOT_DEVICE(device)) {
 		strcpy(device->pnp.bus_id, "ACPI");
-		break;
+		return;
+	}
+
+	switch (device->device_type) {
 	case ACPI_BUS_TYPE_POWER_BUTTON:
 		strcpy(device->pnp.bus_id, "PWRF");
 		break;
@@ -1093,6 +1097,11 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 	switch (device->device_type) {
 	case ACPI_BUS_TYPE_DEVICE:
+		if (ACPI_IS_ROOT_DEVICE(device)) {
+			hid = ACPI_SYSTEM_HID;
+			break;
+		}
+
 		status = acpi_get_object_info(device->handle, &info);
 		if (ACPI_FAILURE(status)) {
 			printk(KERN_ERR PREFIX "%s: Error reading device info\n", __func__);
@@ -1129,9 +1138,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 	case ACPI_BUS_TYPE_PROCESSOR:
 		hid = ACPI_PROCESSOR_OBJECT_HID;
 		break;
-	case ACPI_BUS_TYPE_SYSTEM:
-		hid = ACPI_SYSTEM_HID;
-		break;
 	case ACPI_BUS_TYPE_THERMAL:
 		hid = ACPI_THERMAL_HID;
 		break;
@@ -1643,7 +1649,7 @@ int __init acpi_scan_init(void)
 	 * Create the root device in the bus's device tree
 	 */
 	result = acpi_add_single_object(&acpi_root, ACPI_ROOT_OBJECT,
-					ACPI_BUS_TYPE_SYSTEM, &ops);
+					ACPI_BUS_TYPE_DEVICE, &ops);
 	if (result)
 		goto Done;
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 8456e8cbf9fd..bc7a69516dce 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -70,7 +70,6 @@ enum acpi_bus_device_type {
 	ACPI_BUS_TYPE_POWER,
 	ACPI_BUS_TYPE_PROCESSOR,
 	ACPI_BUS_TYPE_THERMAL,
-	ACPI_BUS_TYPE_SYSTEM,
 	ACPI_BUS_TYPE_POWER_BUTTON,
 	ACPI_BUS_TYPE_SLEEP_BUTTON,
 	ACPI_BUS_DEVICE_TYPE_COUNT
-- 
cgit v1.2.3


From 402ac53614bce0c273c73a80339556bf56dd3d39 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 19:30:01 +0000
Subject: ACPI: add acpi_bus_get_status_handle()

Add acpi_bus_get_status_handle() so we can get the status of a namespace
object before building a struct acpi_device.

This removes a use of "device->flags.dynamic_status", a cached indicator of
whether _STA exists.  It seems simpler and more reliable to just evaluate
_STA and catch AE_NOT_FOUND errors.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/bus.c      | 49 ++++++++++++++++++++++---------------------------
 include/acpi/acpi_bus.h |  2 ++
 2 files changed, 24 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 135fbfe1825c..741191524353 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -94,36 +94,33 @@ int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device)
 
 EXPORT_SYMBOL(acpi_bus_get_device);
 
-int acpi_bus_get_status(struct acpi_device *device)
+acpi_status acpi_bus_get_status_handle(acpi_handle handle,
+				       unsigned long long *sta)
 {
-	acpi_status status = AE_OK;
-	unsigned long long sta = 0;
-
+	acpi_status status;
 
-	if (!device)
-		return -EINVAL;
+	status = acpi_evaluate_integer(handle, "_STA", NULL, sta);
+	if (ACPI_SUCCESS(status))
+		return AE_OK;
 
-	/*
-	 * Evaluate _STA if present.
-	 */
-	if (device->flags.dynamic_status) {
-		status =
-		    acpi_evaluate_integer(device->handle, "_STA", NULL, &sta);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-		STRUCT_TO_INT(device->status) = (int)sta;
+	if (status == AE_NOT_FOUND) {
+		*sta = ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED |
+		       ACPI_STA_DEVICE_UI      | ACPI_STA_DEVICE_FUNCTIONING;
+		return AE_OK;
 	}
+	return status;
+}
 
-	/*
-	 * According to ACPI spec some device can be present and functional
-	 * even if the parent is not present but functional.
-	 * In such conditions the child device should not inherit the status
-	 * from the parent.
-	 */
-	else
-		STRUCT_TO_INT(device->status) =
-		    ACPI_STA_DEVICE_PRESENT | ACPI_STA_DEVICE_ENABLED |
-		    ACPI_STA_DEVICE_UI      | ACPI_STA_DEVICE_FUNCTIONING;
+int acpi_bus_get_status(struct acpi_device *device)
+{
+	acpi_status status;
+	unsigned long long sta;
+
+	status = acpi_bus_get_status_handle(device->handle, &sta);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	STRUCT_TO_INT(device->status) = (int) sta;
 
 	if (device->status.functional && !device->status.present) {
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] status [%08x]: "
@@ -135,10 +132,8 @@ int acpi_bus_get_status(struct acpi_device *device)
 	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] status [%08x]\n",
 			  device->pnp.bus_id,
 			  (u32) STRUCT_TO_INT(device->status)));
-
 	return 0;
 }
-
 EXPORT_SYMBOL(acpi_bus_get_status);
 
 void acpi_bus_private_data_handler(acpi_handle handle,
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index bc7a69516dce..670f7f33837e 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -322,6 +322,8 @@ extern void unregister_acpi_bus_notifier(struct notifier_block *nb);
 
 int acpi_bus_get_device(acpi_handle handle, struct acpi_device **device);
 void acpi_bus_data_handler(acpi_handle handle, void *context);
+acpi_status acpi_bus_get_status_handle(acpi_handle handle,
+				       unsigned long long *sta);
 int acpi_bus_get_status(struct acpi_device *device);
 int acpi_bus_get_power(acpi_handle handle, int *state);
 int acpi_bus_set_power(acpi_handle handle, int state);
-- 
cgit v1.2.3


From 7f47fa6c2ff15f5e59cdbb350f86faef6829294a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:19 -0600
Subject: ACPI: maintain a single list of _HID and _CID IDs

There's no need to treat _HID and _CID differently.  Keeping them in
a single list makes code that uses the IDs a little simpler because it
can just traverse the list rather than checking "do we have a HID?",
"do we have any CIDs?"

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Reviewed-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c        | 166 +++++++++++++--------------------------------
 drivers/pnp/pnpacpi/core.c |  16 ++---
 include/acpi/acpi_bus.h    |  10 ++-
 3 files changed, 60 insertions(+), 132 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 53b96e7a64ab..2e8889f62666 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -45,6 +45,7 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 {
 	int len;
 	int count;
+	struct acpi_hardware_id *id;
 
 	if (!acpi_dev->flags.hardware_id && !acpi_dev->flags.compatible_ids)
 		return -ENODEV;
@@ -52,33 +53,14 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	len = snprintf(modalias, size, "acpi:");
 	size -= len;
 
-	if (acpi_dev->flags.hardware_id) {
-		count = snprintf(&modalias[len], size, "%s:",
-				 acpi_dev->pnp.hardware_id);
+	list_for_each_entry(id, &acpi_dev->pnp.ids, list) {
+		count = snprintf(&modalias[len], size, "%s:", id->id);
 		if (count < 0 || count >= size)
 			return -EINVAL;
 		len += count;
 		size -= count;
 	}
 
-	if (acpi_dev->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list;
-		int i;
-
-		cid_list = acpi_dev->pnp.cid_list;
-		for (i = 0; i < cid_list->count; i++) {
-			count = snprintf(&modalias[len], size, "%s:",
-					 cid_list->ids[i].string);
-			if (count < 0 || count >= size) {
-				printk(KERN_ERR PREFIX "%s cid[%i] exceeds event buffer size",
-				       acpi_dev->pnp.device_name, i);
-				break;
-			}
-			len += count;
-			size -= count;
-		}
-	}
-
 	modalias[len] = '\0';
 	return len;
 }
@@ -273,6 +255,7 @@ int acpi_match_device_ids(struct acpi_device *device,
 			  const struct acpi_device_id *ids)
 {
 	const struct acpi_device_id *id;
+	struct acpi_hardware_id *hwid;
 
 	/*
 	 * If the device is not present, it is unnecessary to load device
@@ -281,40 +264,30 @@ int acpi_match_device_ids(struct acpi_device *device,
 	if (!device->status.present)
 		return -ENODEV;
 
-	if (device->flags.hardware_id) {
-		for (id = ids; id->id[0]; id++) {
-			if (!strcmp((char*)id->id, device->pnp.hardware_id))
+	for (id = ids; id->id[0]; id++)
+		list_for_each_entry(hwid, &device->pnp.ids, list)
+			if (!strcmp((char *) id->id, hwid->id))
 				return 0;
-		}
-	}
-
-	if (device->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
-		int i;
-
-		for (id = ids; id->id[0]; id++) {
-			/* compare multiple _CID entries against driver ids */
-			for (i = 0; i < cid_list->count; i++) {
-				if (!strcmp((char*)id->id,
-					    cid_list->ids[i].string))
-					return 0;
-			}
-		}
-	}
 
 	return -ENOENT;
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
+static void acpi_free_ids(struct acpi_device *device)
+{
+	struct acpi_hardware_id *id, *tmp;
+
+	list_for_each_entry_safe(id, tmp, &device->pnp.ids, list) {
+		kfree(id->id);
+		kfree(id);
+	}
+}
+
 static void acpi_device_release(struct device *dev)
 {
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 
-	kfree(acpi_dev->pnp.cid_list);
-	if (acpi_dev->flags.hardware_id)
-		kfree(acpi_dev->pnp.hardware_id);
-	if (acpi_dev->flags.unique_id)
-		kfree(acpi_dev->pnp.unique_id);
+	acpi_free_ids(acpi_dev);
 	kfree(acpi_dev);
 }
 
@@ -1028,62 +1001,31 @@ static int acpi_dock_match(struct acpi_device *device)
 	return acpi_get_handle(device->handle, "_DCK", &tmp);
 }
 
-static struct acpica_device_id_list*
-acpi_add_cid(
-	struct acpi_device_info         *info,
-	struct acpica_device_id         *new_cid)
+char *acpi_device_hid(struct acpi_device *device)
 {
-	struct acpica_device_id_list    *cid;
-	char                            *next_id_string;
-	acpi_size                       cid_length;
-	acpi_size                       new_cid_length;
-	u32                             i;
-
-
-	/* Allocate new CID list with room for the new CID */
-
-	if (!new_cid)
-		new_cid_length = info->compatible_id_list.list_size;
-	else if (info->compatible_id_list.list_size)
-		new_cid_length = info->compatible_id_list.list_size +
-			new_cid->length + sizeof(struct acpica_device_id);
-	else
-		new_cid_length = sizeof(struct acpica_device_id_list) + new_cid->length;
-
-	cid = ACPI_ALLOCATE_ZEROED(new_cid_length);
-	if (!cid) {
-		return NULL;
-	}
-
-	cid->list_size = new_cid_length;
-	cid->count = info->compatible_id_list.count;
-	if (new_cid)
-		cid->count++;
-	next_id_string = (char *) cid->ids + (cid->count * sizeof(struct acpica_device_id));
-
-	/* Copy all existing CIDs */
+	struct acpi_hardware_id *hid;
 
-	for (i = 0; i < info->compatible_id_list.count; i++) {
-		cid_length = info->compatible_id_list.ids[i].length;
-		cid->ids[i].string = next_id_string;
-		cid->ids[i].length = cid_length;
-
-		ACPI_MEMCPY(next_id_string, info->compatible_id_list.ids[i].string,
-			cid_length);
-
-		next_id_string += cid_length;
-	}
+	hid = list_first_entry(&device->pnp.ids, struct acpi_hardware_id, list);
+	return hid->id;
+}
+EXPORT_SYMBOL(acpi_device_hid);
 
-	/* Append the new CID */
+static void acpi_add_id(struct acpi_device *device, const char *dev_id)
+{
+	struct acpi_hardware_id *id;
 
-	if (new_cid) {
-		cid->ids[i].string = next_id_string;
-		cid->ids[i].length = new_cid->length;
+	id = kmalloc(sizeof(*id), GFP_KERNEL);
+	if (!id)
+		return;
 
-		ACPI_MEMCPY(next_id_string, new_cid->string, new_cid->length);
+	id->id = kmalloc(strlen(dev_id) + 1, GFP_KERNEL);
+	if (!id->id) {
+		kfree(id);
+		return;
 	}
 
-	return cid;
+	strcpy(id->id, dev_id);
+	list_add_tail(&id->list, &device->pnp.ids);
 }
 
 static void acpi_device_set_id(struct acpi_device *device)
@@ -1094,6 +1036,7 @@ static void acpi_device_set_id(struct acpi_device *device)
 	struct acpica_device_id_list *cid_list = NULL;
 	char *cid_add = NULL;
 	acpi_status status;
+	int i;
 
 	switch (device->device_type) {
 	case ACPI_BUS_TYPE_DEVICE:
@@ -1166,15 +1109,9 @@ static void acpi_device_set_id(struct acpi_device *device)
 		hid = "device";
 
 	if (hid) {
-		device->pnp.hardware_id = ACPI_ALLOCATE_ZEROED(strlen (hid) + 1);
-		if (device->pnp.hardware_id) {
-			strcpy(device->pnp.hardware_id, hid);
-			device->flags.hardware_id = 1;
-		}
+		acpi_add_id(device, hid);
+		device->flags.hardware_id = 1;
 	}
-	if (!device->flags.hardware_id)
-		device->pnp.hardware_id = "";
-
 	if (uid) {
 		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
 		if (device->pnp.unique_id) {
@@ -1185,24 +1122,12 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (!device->flags.unique_id)
 		device->pnp.unique_id = "";
 
-	if (cid_list || cid_add) {
-		struct acpica_device_id_list *list;
-
-		if (cid_add) {
-			struct acpica_device_id cid;
-			cid.length = strlen (cid_add) + 1;
-			cid.string = cid_add;
-
-			list = acpi_add_cid(info, &cid);
-		} else {
-			list = acpi_add_cid(info, NULL);
-		}
-
-		if (list) {
-			device->pnp.cid_list = list;
-			if (cid_add)
-				device->flags.compatible_ids = 1;
-		}
+	if (cid_list)
+		for (i = 0; i < cid_list->count; i++)
+			acpi_add_id(device, cid_list->ids[i].string);
+	if (cid_add) {
+		acpi_add_id(device, cid_add);
+		device->flags.compatible_ids = 1;
 	}
 
 	kfree(info);
@@ -1269,6 +1194,7 @@ static int acpi_add_single_object(struct acpi_device **child,
 		return -ENOMEM;
 	}
 
+	INIT_LIST_HEAD(&device->pnp.ids);
 	device->device_type = type;
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index ff963d4dab46..3a4478f1fc72 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -153,6 +153,7 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	acpi_handle temp = NULL;
 	acpi_status status;
 	struct pnp_dev *dev;
+	struct acpi_hardware_id *id;
 
 	/*
 	 * If a PnPacpi device is not present , the device
@@ -193,15 +194,12 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
 	if (dev->capabilities & PNP_CONFIGURABLE)
 		pnpacpi_parse_resource_option_data(dev);
 
-	if (device->flags.compatible_ids) {
-		struct acpica_device_id_list *cid_list = device->pnp.cid_list;
-		int i;
-
-		for (i = 0; i < cid_list->count; i++) {
-			if (!ispnpidacpi(cid_list->ids[i].string))
-				continue;
-			pnp_add_id(dev, cid_list->ids[i].string);
-		}
+	list_for_each_entry(id, &device->pnp.ids, list) {
+		if (!strcmp(id->id, acpi_device_hid(device)))
+			continue;
+		if (!ispnpidacpi(id->id))
+			continue;
+		pnp_add_id(dev, id->id);
 	}
 
 	/* clear out the damaged flags */
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 670f7f33837e..c2c434626edc 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -171,19 +171,23 @@ typedef unsigned long acpi_bus_address;
 typedef char acpi_device_name[40];
 typedef char acpi_device_class[20];
 
+struct acpi_hardware_id {
+	struct list_head list;
+	char *id;
+};
+
 struct acpi_device_pnp {
 	acpi_bus_id bus_id;	/* Object name */
 	acpi_bus_address bus_address;	/* _ADR */
-	char *hardware_id;	/* _HID */
-	struct acpica_device_id_list *cid_list;	/* _CIDs */
 	char *unique_id;	/* _UID */
+	struct list_head ids;		/* _HID and _CIDs */
 	acpi_device_name device_name;	/* Driver-determined */
 	acpi_device_class device_class;	/*        "          */
 };
 
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
 #define acpi_device_adr(d)	((d)->pnp.bus_address)
-#define acpi_device_hid(d)	((d)->pnp.hardware_id)
+char *acpi_device_hid(struct acpi_device *device);
 #define acpi_device_uid(d)	((d)->pnp.unique_id)
 #define acpi_device_name(d)	((d)->pnp.device_name)
 #define acpi_device_class(d)	((d)->pnp.device_class)
-- 
cgit v1.2.3


From b2972f87508a21db7584d11fdb5c97cb7101a788 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:24 -0600
Subject: ACPI: remove acpi_device.flags.compatible_ids

We now keep a single list of IDs that includes both the _HID and any
_CIDs.  We no longer need to keep track of whether the device has a _CID.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 15 ++++-----------
 include/acpi/acpi_bus.h |  3 +--
 2 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 2e8889f62666..395ae129aae0 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -47,7 +47,7 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	int count;
 	struct acpi_hardware_id *id;
 
-	if (!acpi_dev->flags.hardware_id && !acpi_dev->flags.compatible_ids)
+	if (!acpi_dev->flags.hardware_id)
 		return -ENODEV;
 
 	len = snprintf(modalias, size, "acpi:");
@@ -209,7 +209,7 @@ static int acpi_device_setup_files(struct acpi_device *dev)
 			goto end;
 	}
 
-	if (dev->flags.hardware_id || dev->flags.compatible_ids) {
+	if (dev->flags.hardware_id) {
 		result = device_create_file(&dev->dev, &dev_attr_modalias);
 		if (result)
 			goto end;
@@ -239,7 +239,7 @@ static void acpi_device_remove_files(struct acpi_device *dev)
 	if (ACPI_SUCCESS(status))
 		device_remove_file(&dev->dev, &dev_attr_eject);
 
-	if (dev->flags.hardware_id || dev->flags.compatible_ids)
+	if (dev->flags.hardware_id)
 		device_remove_file(&dev->dev, &dev_attr_modalias);
 
 	if (dev->flags.hardware_id)
@@ -876,11 +876,6 @@ static int acpi_bus_get_flags(struct acpi_device *device)
 	if (ACPI_SUCCESS(status))
 		device->flags.dynamic_status = 1;
 
-	/* Presence of _CID indicates 'compatible_ids' */
-	status = acpi_get_handle(device->handle, "_CID", &temp);
-	if (ACPI_SUCCESS(status))
-		device->flags.compatible_ids = 1;
-
 	/* Presence of _RMV indicates 'removable' */
 	status = acpi_get_handle(device->handle, "_RMV", &temp);
 	if (ACPI_SUCCESS(status))
@@ -1125,10 +1120,8 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (cid_list)
 		for (i = 0; i < cid_list->count; i++)
 			acpi_add_id(device, cid_list->ids[i].string);
-	if (cid_add) {
+	if (cid_add)
 		acpi_add_id(device, cid_add);
-		device->flags.compatible_ids = 1;
-	}
 
 	kfree(info);
 }
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index c2c434626edc..0a970e4ade6f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -142,7 +142,6 @@ struct acpi_device_status {
 struct acpi_device_flags {
 	u32 dynamic_status:1;
 	u32 hardware_id:1;
-	u32 compatible_ids:1;
 	u32 bus_address:1;
 	u32 unique_id:1;
 	u32 removable:1;
@@ -153,7 +152,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:19;
+	u32 reserved:20;
 };
 
 /* File System */
-- 
cgit v1.2.3


From 1131b938f0757350f569f8ad5bee737cd02b8e58 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:29 -0600
Subject: ACPI: remove acpi_device.flags.hardware_id

Every acpi_device has at least one ID (if there's no _HID or _CID, we
give it a synthetic or default ID).  So there's no longer a need to
check whether an ID exists; we can just use it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c        | 37 +++++++++++++------------------------
 drivers/pnp/pnpacpi/core.c |  3 +--
 include/acpi/acpi_bus.h    |  3 +--
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 395ae129aae0..7e031b90c09c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -47,9 +47,6 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	int count;
 	struct acpi_hardware_id *id;
 
-	if (!acpi_dev->flags.hardware_id)
-		return -ENODEV;
-
 	len = snprintf(modalias, size, "acpi:");
 	size -= len;
 
@@ -203,17 +200,13 @@ static int acpi_device_setup_files(struct acpi_device *dev)
 			goto end;
 	}
 
-	if (dev->flags.hardware_id) {
-		result = device_create_file(&dev->dev, &dev_attr_hid);
-		if (result)
-			goto end;
-	}
+	result = device_create_file(&dev->dev, &dev_attr_hid);
+	if (result)
+		goto end;
 
-	if (dev->flags.hardware_id) {
-		result = device_create_file(&dev->dev, &dev_attr_modalias);
-		if (result)
-			goto end;
-	}
+	result = device_create_file(&dev->dev, &dev_attr_modalias);
+	if (result)
+		goto end;
 
         /*
          * If device has _EJ0, 'eject' file is created that is used to trigger
@@ -239,11 +232,8 @@ static void acpi_device_remove_files(struct acpi_device *dev)
 	if (ACPI_SUCCESS(status))
 		device_remove_file(&dev->dev, &dev_attr_eject);
 
-	if (dev->flags.hardware_id)
-		device_remove_file(&dev->dev, &dev_attr_modalias);
-
-	if (dev->flags.hardware_id)
-		device_remove_file(&dev->dev, &dev_attr_hid);
+	device_remove_file(&dev->dev, &dev_attr_modalias);
+	device_remove_file(&dev->dev, &dev_attr_hid);
 	if (dev->handle)
 		device_remove_file(&dev->dev, &dev_attr_path);
 }
@@ -474,8 +464,9 @@ static int acpi_device_register(struct acpi_device *device)
 	 * If failed, create one and link it into acpi_bus_id_list
 	 */
 	list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) {
-		if (!strcmp(acpi_device_bus_id->bus_id, device->flags.hardware_id ? acpi_device_hid(device) : "device")) {
-			acpi_device_bus_id->instance_no ++;
+		if (!strcmp(acpi_device_bus_id->bus_id,
+			    acpi_device_hid(device))) {
+			acpi_device_bus_id->instance_no++;
 			found = 1;
 			kfree(new_bus_id);
 			break;
@@ -483,7 +474,7 @@ static int acpi_device_register(struct acpi_device *device)
 	}
 	if (!found) {
 		acpi_device_bus_id = new_bus_id;
-		strcpy(acpi_device_bus_id->bus_id, device->flags.hardware_id ? acpi_device_hid(device) : "device");
+		strcpy(acpi_device_bus_id->bus_id, acpi_device_hid(device));
 		acpi_device_bus_id->instance_no = 0;
 		list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list);
 	}
@@ -1103,10 +1094,8 @@ static void acpi_device_set_id(struct acpi_device *device)
 	if (!hid && !cid_list && !cid_add)
 		hid = "device";
 
-	if (hid) {
+	if (hid)
 		acpi_add_id(device, hid);
-		device->flags.hardware_id = 1;
-	}
 	if (uid) {
 		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
 		if (device->pnp.unique_id) {
diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c
index 3a4478f1fc72..83b8b5ac49c9 100644
--- a/drivers/pnp/pnpacpi/core.c
+++ b/drivers/pnp/pnpacpi/core.c
@@ -230,8 +230,7 @@ static int __init acpi_pnp_match(struct device *dev, void *_pnp)
 	struct pnp_dev *pnp = _pnp;
 
 	/* true means it matched */
-	return acpi->flags.hardware_id
-	    && !acpi_get_physical_device(acpi->handle)
+	return !acpi_get_physical_device(acpi->handle)
 	    && compare_pnp_id(pnp->id, acpi_device_hid(acpi));
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0a970e4ade6f..6599b8cab45a 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -141,7 +141,6 @@ struct acpi_device_status {
 
 struct acpi_device_flags {
 	u32 dynamic_status:1;
-	u32 hardware_id:1;
 	u32 bus_address:1;
 	u32 unique_id:1;
 	u32 removable:1;
@@ -152,7 +151,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:20;
+	u32 reserved:21;
 };
 
 /* File System */
-- 
cgit v1.2.3


From 6622d8cee73a26bce958484065c8f0e704911a62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Mon, 21 Sep 2009 13:35:35 -0600
Subject: ACPI: remove acpi_device_uid() and related stuff

Nobody uses acpi_device_uid(), so this patch removes it.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/scan.c     | 18 ------------------
 include/acpi/acpi_bus.h |  4 +---
 2 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 7e031b90c09c..da11b5379dc8 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1018,7 +1018,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 {
 	struct acpi_device_info *info = NULL;
 	char *hid = NULL;
-	char *uid = NULL;
 	struct acpica_device_id_list *cid_list = NULL;
 	char *cid_add = NULL;
 	acpi_status status;
@@ -1045,8 +1044,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 		if (info->valid & ACPI_VALID_HID)
 			hid = info->hardware_id.string;
-		if (info->valid & ACPI_VALID_UID)
-			uid = info->unique_id.string;
 		if (info->valid & ACPI_VALID_CID)
 			cid_list = &info->compatible_id_list;
 		if (info->valid & ACPI_VALID_ADR) {
@@ -1096,16 +1093,6 @@ static void acpi_device_set_id(struct acpi_device *device)
 
 	if (hid)
 		acpi_add_id(device, hid);
-	if (uid) {
-		device->pnp.unique_id = ACPI_ALLOCATE_ZEROED(strlen (uid) + 1);
-		if (device->pnp.unique_id) {
-			strcpy(device->pnp.unique_id, uid);
-			device->flags.unique_id = 1;
-		}
-	}
-	if (!device->flags.unique_id)
-		device->pnp.unique_id = "";
-
 	if (cid_list)
 		for (i = 0; i < cid_list->count; i++)
 			acpi_add_id(device, cid_list->ids[i].string);
@@ -1200,11 +1187,6 @@ static int acpi_add_single_object(struct acpi_device **child,
 	 * -----------------
 	 * TBD: Synch with Core's enumeration/initialization process.
 	 */
-
-	/*
-	 * Hardware ID, Unique ID, & Bus Address
-	 * -------------------------------------
-	 */
 	acpi_device_set_id(device);
 
 	/*
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 6599b8cab45a..3cd9ccdcbd8f 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -142,7 +142,6 @@ struct acpi_device_status {
 struct acpi_device_flags {
 	u32 dynamic_status:1;
 	u32 bus_address:1;
-	u32 unique_id:1;
 	u32 removable:1;
 	u32 ejectable:1;
 	u32 lockable:1;
@@ -151,7 +150,7 @@ struct acpi_device_flags {
 	u32 performance_manageable:1;
 	u32 wake_capable:1;	/* Wakeup(_PRW) supported? */
 	u32 force_power_state:1;
-	u32 reserved:21;
+	u32 reserved:22;
 };
 
 /* File System */
@@ -186,7 +185,6 @@ struct acpi_device_pnp {
 #define acpi_device_bid(d)	((d)->pnp.bus_id)
 #define acpi_device_adr(d)	((d)->pnp.bus_address)
 char *acpi_device_hid(struct acpi_device *device);
-#define acpi_device_uid(d)	((d)->pnp.unique_id)
 #define acpi_device_name(d)	((d)->pnp.device_name)
 #define acpi_device_class(d)	((d)->pnp.device_class)
 
-- 
cgit v1.2.3


From a72bfd4dea053bb8e2233902c3f1893ef5485802 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 26 Sep 2009 00:07:46 +0200
Subject: writeback: pass in super_block to bdi_start_writeback()

Sometimes we only want to write pages from a specific super_block,
so allow that to be passed in.

This fixes a problem with commit 56a131dcf7ed36c3c6e36bea448b674ea85ed5bb
causing writeback on all super_blocks on a bdi, where we only really
want to sync a specific sb from writeback_inodes_sb().

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/fs-writeback.c           | 6 ++++--
 include/linux/backing-dev.h | 3 ++-
 mm/page-writeback.c         | 2 +-
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index fb61178c86e3..9d5360c4c2af 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -250,9 +250,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+			 long nr_pages)
 {
 	struct wb_writeback_args args = {
+		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.nr_pages	= nr_pages,
 		.range_cyclic	= 1,
@@ -1206,7 +1208,7 @@ void writeback_inodes_sb(struct super_block *sb)
 	nr_to_write = nr_dirty + nr_unstable +
 			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
 
-	bdi_start_writeback(sb->s_bdi, nr_to_write);
+	bdi_start_writeback(sb->s_bdi, sb, nr_to_write);
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0ee33c2e6129..b449e738533a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -101,7 +101,8 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,
 		const char *fmt, ...);
 int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev);
 void bdi_unregister(struct backing_dev_info *bdi);
-void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
+void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
+				long nr_pages);
 int bdi_writeback_task(struct bdi_writeback *wb);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 69b5fbabc8bd..a3b14090b1fb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -596,7 +596,7 @@ static void balance_dirty_pages(struct address_space *mapping,
 	    (!laptop_mode && ((global_page_state(NR_FILE_DIRTY)
 			       + global_page_state(NR_UNSTABLE_NFS))
 					  > background_thresh)))
-		bdi_start_writeback(bdi, 0);
+		bdi_start_writeback(bdi, NULL, 0);
 }
 
 void set_page_dirty_balance(struct page *page, int page_mkwrite)
-- 
cgit v1.2.3


From 1d1764c39815db55e10b2d78732db4d6dd9d6039 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 26 Sep 2009 19:37:22 +0400
Subject: headers: kref.h redux

* remove asm/atomic.h inclusion from kref.h -- not needed, linux/types.h
  is enough for atomic_t
* remove linux/kref.h inclusion from files which do not need it.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/net/irda/kingsun-sir.c           | 1 -
 drivers/net/irda/ks959-sir.c             | 1 -
 drivers/net/irda/ksdazzle-sir.c          | 1 -
 drivers/net/irda/mcs7780.c               | 1 -
 drivers/scsi/pmcraid.h                   | 1 -
 drivers/usb/misc/sisusbvga/sisusb_init.c | 1 -
 include/linux/ipc.h                      | 2 --
 include/linux/kref.h                     | 1 -
 include/linux/nfs_fs.h                   | 1 -
 9 files changed, 10 deletions(-)

(limited to 'include')

diff --git a/drivers/net/irda/kingsun-sir.c b/drivers/net/irda/kingsun-sir.c
index 2fc30b449eea..cb90d640007a 100644
--- a/drivers/net/irda/kingsun-sir.c
+++ b/drivers/net/irda/kingsun-sir.c
@@ -66,7 +66,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/ks959-sir.c b/drivers/net/irda/ks959-sir.c
index f4d13fc51cbc..b54d3b48045e 100644
--- a/drivers/net/irda/ks959-sir.c
+++ b/drivers/net/irda/ks959-sir.c
@@ -118,7 +118,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/ksdazzle-sir.c b/drivers/net/irda/ksdazzle-sir.c
index 5f9d73353972..8d713ebac15b 100644
--- a/drivers/net/irda/ksdazzle-sir.c
+++ b/drivers/net/irda/ksdazzle-sir.c
@@ -82,7 +82,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c
index b3d30bcb88e7..c0e0bb9401d3 100644
--- a/drivers/net/irda/mcs7780.c
+++ b/drivers/net/irda/mcs7780.c
@@ -50,7 +50,6 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/kref.h>
 #include <linux/usb.h>
 #include <linux/device.h>
 #include <linux/crc32.h>
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 614b3a764fed..3441b3f90827 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -26,7 +26,6 @@
 #include <linux/completion.h>
 #include <linux/list.h>
 #include <scsi/scsi.h>
-#include <linux/kref.h>
 #include <scsi/scsi_cmnd.h>
 #include <linux/cdev.h>
 #include <net/netlink.h>
diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.c b/drivers/usb/misc/sisusbvga/sisusb_init.c
index 273de5d0934e..0ab990744830 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_init.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_init.c
@@ -43,7 +43,6 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/kref.h>
 
 #include "sisusb.h"
 
diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index b8826107b518..3b1594d662b0 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -78,8 +78,6 @@ struct ipc_kludge {
 #define IPCCALL(version,op)	((version)<<16 | (op))
 
 #ifdef __KERNEL__
-
-#include <linux/kref.h>
 #include <linux/spinlock.h>
 
 #define IPCMNI 32768  /* <= MAX_INT limit for ipc arrays (including sysctl changes) */
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 0cef6badd6fb..b0cb0ebad9e6 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -16,7 +16,6 @@
 #define _KREF_H_
 
 #include <linux/types.h>
-#include <asm/atomic.h>
 
 struct kref {
 	atomic_t refcount;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f6b90240dd41..d09db1bc9083 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -40,7 +40,6 @@
 #ifdef __KERNEL__
 
 #include <linux/in.h>
-#include <linux/kref.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/rbtree.h>
-- 
cgit v1.2.3


From d1f8297a96b0d70f17704296a6666468f2087ce6 Mon Sep 17 00:00:00 2001
From: Sascha Hlusiak <contact@saschahlusiak.de>
Date: Sat, 26 Sep 2009 20:28:07 -0700
Subject: Revert "sit: stateless autoconf for isatap"

This reverts commit 645069299a1c7358cf7330afe293f07552f11a5d.

While the code does not actually break anything, it does not completely follow
RFC5214 yet. After talking back with Fred L. Templin, I agree that completing the
ISATAP specific RS/RA code, would pollute the kernel a lot with code that is better
implemented in userspace.

The kernel should not send RS packages for ISATAP at all.

Signed-off-by: Sascha Hlusiak <contact@saschahlusiak.de>
Acked-by: Fred L. Templin <Fred.L.Templin@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_tunnel.h |  2 +-
 include/net/ipip.h        |  7 ------
 net/ipv6/ndisc.c          |  1 -
 net/ipv6/sit.c            | 58 -----------------------------------------------
 4 files changed, 1 insertion(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h
index 5eb9b0f857e0..5a9aae4adb44 100644
--- a/include/linux/if_tunnel.h
+++ b/include/linux/if_tunnel.h
@@ -44,7 +44,7 @@ struct ip_tunnel_prl {
 	__u16			flags;
 	__u16			__reserved;
 	__u32			datalen;
-	__u32			rs_delay;
+	__u32			__reserved2;
 	/* data follows */
 };
 
diff --git a/include/net/ipip.h b/include/net/ipip.h
index 76e3ea6e2fe5..87acf8f3a155 100644
--- a/include/net/ipip.h
+++ b/include/net/ipip.h
@@ -27,18 +27,11 @@ struct ip_tunnel
 	unsigned int			prl_count;	/* # of entries in PRL */
 };
 
-/* ISATAP: default interval between RS in secondy */
-#define IPTUNNEL_RS_DEFAULT_DELAY	(900)
-
 struct ip_tunnel_prl_entry
 {
 	struct ip_tunnel_prl_entry	*next;
 	__be32				addr;
 	u16				flags;
-	unsigned long			rs_delay;
-	struct timer_list		rs_timer;
-	struct ip_tunnel		*tunnel;
-	spinlock_t			lock;
 };
 
 #define IPTUNNEL_XMIT() do {						\
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 498b9b0b0fad..f74e4e2cdd06 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -658,7 +658,6 @@ void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
 		     &icmp6h, NULL,
 		     send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
 }
-EXPORT_SYMBOL(ndisc_send_rs);
 
 
 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index fcb539628847..d65e0c496cc0 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -15,7 +15,6 @@
  * Roger Venning <r.venning@telstra.com>:	6to4 support
  * Nate Thompson <nate@thebog.net>:		6to4 support
  * Fred Templin <fred.l.templin@boeing.com>:	isatap support
- * Sascha Hlusiak <mail@saschahlusiak.de>:	stateless autoconf for isatap
  */
 
 #include <linux/module.h>
@@ -223,44 +222,6 @@ failed:
 	return NULL;
 }
 
-static void ipip6_tunnel_rs_timer(unsigned long data)
-{
-	struct ip_tunnel_prl_entry *p = (struct ip_tunnel_prl_entry *) data;
-	struct inet6_dev *ifp;
-	struct inet6_ifaddr *addr;
-
-	spin_lock(&p->lock);
-	ifp = __in6_dev_get(p->tunnel->dev);
-
-	read_lock_bh(&ifp->lock);
-	for (addr = ifp->addr_list; addr; addr = addr->if_next) {
-		struct in6_addr rtr;
-
-		if (!(ipv6_addr_type(&addr->addr) & IPV6_ADDR_LINKLOCAL))
-			continue;
-
-		/* Send RS to guessed linklocal address of router
-		 *
-		 * Better: send to ff02::2 encapsuled in unicast directly
-		 * to router-v4 instead of guessing the v6 address.
-		 *
-		 * Cisco/Windows seem to not set the u/l bit correctly,
-		 * so we won't guess right.
-		 */
-		ipv6_addr_set(&rtr,  htonl(0xFE800000), 0, 0, 0);
-		if (!__ipv6_isatap_ifid(rtr.s6_addr + 8,
-					p->addr)) {
-			ndisc_send_rs(p->tunnel->dev, &addr->addr, &rtr);
-		}
-	}
-	read_unlock_bh(&ifp->lock);
-
-	mod_timer(&p->rs_timer, jiffies + HZ * p->rs_delay);
-	spin_unlock(&p->lock);
-
-	return;
-}
-
 static struct ip_tunnel_prl_entry *
 __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 {
@@ -319,7 +280,6 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
 			continue;
 		kp[c].addr = prl->addr;
 		kp[c].flags = prl->flags;
-		kp[c].rs_delay = prl->rs_delay;
 		c++;
 		if (kprl.addr != htonl(INADDR_ANY))
 			break;
@@ -369,23 +329,11 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
 	}
 
 	p->next = t->prl;
-	p->tunnel = t;
 	t->prl = p;
 	t->prl_count++;
-
-	spin_lock_init(&p->lock);
-	setup_timer(&p->rs_timer, ipip6_tunnel_rs_timer, (unsigned long) p);
 update:
 	p->addr = a->addr;
 	p->flags = a->flags;
-	p->rs_delay = a->rs_delay;
-	if (p->rs_delay == 0)
-		p->rs_delay = IPTUNNEL_RS_DEFAULT_DELAY;
-	spin_lock(&p->lock);
-	del_timer(&p->rs_timer);
-	if (p->flags & PRL_DEFAULT)
-		mod_timer(&p->rs_timer, jiffies + 1);
-	spin_unlock(&p->lock);
 out:
 	write_unlock(&ipip6_lock);
 	return err;
@@ -404,9 +352,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 			if ((*p)->addr == a->addr) {
 				x = *p;
 				*p = x->next;
-				spin_lock(&x->lock);
-				del_timer(&x->rs_timer);
-				spin_unlock(&x->lock);
 				kfree(x);
 				t->prl_count--;
 				goto out;
@@ -417,9 +362,6 @@ ipip6_tunnel_del_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a)
 		while (t->prl) {
 			x = t->prl;
 			t->prl = t->prl->next;
-			spin_lock(&x->lock);
-			del_timer(&x->rs_timer);
-			spin_unlock(&x->lock);
 			kfree(x);
 			t->prl_count--;
 		}
-- 
cgit v1.2.3


From f0f37e2f77731b3473fa6bd5ee53255d9a9cdb40 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 27 Sep 2009 22:29:37 +0400
Subject: const: mark struct vm_struct_operations

* mark struct vm_area_struct::vm_ops as const
* mark vm_ops in AGP code

But leave TTM code alone, something is fishy there with global vm_ops
being used.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/ia32/binfmt_elf32.c                |  4 ++--
 arch/powerpc/platforms/cell/spufs/file.c     | 14 +++++++-------
 arch/x86/pci/i386.c                          |  2 +-
 drivers/char/agp/agp.h                       |  2 +-
 drivers/char/agp/alpha-agp.c                 |  2 +-
 drivers/char/mem.c                           |  2 +-
 drivers/char/mspec.c                         |  2 +-
 drivers/gpu/drm/drm_vm.c                     |  8 ++++----
 drivers/gpu/drm/radeon/radeon_ttm.c          |  2 +-
 drivers/gpu/drm/ttm/ttm_bo_vm.c              |  2 +-
 drivers/ieee1394/dma.c                       |  2 +-
 drivers/infiniband/hw/ehca/ehca_uverbs.c     |  2 +-
 drivers/infiniband/hw/ipath/ipath_file_ops.c |  2 +-
 drivers/infiniband/hw/ipath/ipath_mmap.c     |  2 +-
 drivers/media/video/cafe_ccic.c              |  2 +-
 drivers/media/video/et61x251/et61x251_core.c |  2 +-
 drivers/media/video/gspca/gspca.c            |  2 +-
 drivers/media/video/meye.c                   |  2 +-
 drivers/media/video/sn9c102/sn9c102_core.c   |  2 +-
 drivers/media/video/stk-webcam.c             |  2 +-
 drivers/media/video/uvc/uvc_v4l2.c           |  2 +-
 drivers/media/video/videobuf-dma-contig.c    |  2 +-
 drivers/media/video/videobuf-dma-sg.c        |  2 +-
 drivers/media/video/videobuf-vmalloc.c       |  2 +-
 drivers/media/video/vino.c                   |  2 +-
 drivers/media/video/zc0301/zc0301_core.c     |  2 +-
 drivers/media/video/zoran/zoran_driver.c     |  2 +-
 drivers/misc/sgi-gru/grufile.c               |  2 +-
 drivers/misc/sgi-gru/grutables.h             |  2 +-
 drivers/scsi/sg.c                            |  2 +-
 drivers/uio/uio.c                            |  2 +-
 drivers/usb/mon/mon_bin.c                    |  2 +-
 drivers/video/fb_defio.c                     |  2 +-
 drivers/video/omap/dispc.c                   |  2 +-
 fs/btrfs/file.c                              |  2 +-
 fs/ext4/file.c                               |  2 +-
 fs/fuse/file.c                               |  2 +-
 fs/gfs2/file.c                               |  2 +-
 fs/ncpfs/mmap.c                              |  2 +-
 fs/nfs/file.c                                |  4 ++--
 fs/nilfs2/file.c                             |  2 +-
 fs/ocfs2/mmap.c                              |  2 +-
 fs/sysfs/bin.c                               |  4 ++--
 fs/ubifs/file.c                              |  2 +-
 fs/xfs/linux-2.6/xfs_file.c                  |  4 ++--
 include/linux/agp_backend.h                  |  2 +-
 include/linux/hugetlb.h                      |  2 +-
 include/linux/mm_types.h                     |  2 +-
 include/linux/ramfs.h                        |  2 +-
 ipc/shm.c                                    |  4 ++--
 kernel/perf_event.c                          |  2 +-
 kernel/relay.c                               |  2 +-
 mm/filemap.c                                 |  2 +-
 mm/filemap_xip.c                             |  2 +-
 mm/hugetlb.c                                 |  2 +-
 mm/mmap.c                                    |  2 +-
 mm/nommu.c                                   |  2 +-
 mm/shmem.c                                   |  4 ++--
 net/packet/af_packet.c                       |  2 +-
 sound/core/pcm_native.c                      |  8 ++++----
 sound/usb/usx2y/us122l.c                     |  2 +-
 sound/usb/usx2y/usX2Yhwdep.c                 |  2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c              |  2 +-
 virt/kvm/kvm_main.c                          |  4 ++--
 64 files changed, 83 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index f92bdaac8976..c69552bf893e 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -69,11 +69,11 @@ ia32_install_gate_page (struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 
-static struct vm_operations_struct ia32_shared_page_vm_ops = {
+static const struct vm_operations_struct ia32_shared_page_vm_ops = {
 	.fault = ia32_install_shared_page
 };
 
-static struct vm_operations_struct ia32_gate_page_vm_ops = {
+static const struct vm_operations_struct ia32_gate_page_vm_ops = {
 	.fault = ia32_install_gate_page
 };
 
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 8f079b865ad0..961309446170 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -309,7 +309,7 @@ static int spufs_mem_mmap_access(struct vm_area_struct *vma,
 	return len;
 }
 
-static struct vm_operations_struct spufs_mem_mmap_vmops = {
+static const struct vm_operations_struct spufs_mem_mmap_vmops = {
 	.fault = spufs_mem_mmap_fault,
 	.access = spufs_mem_mmap_access,
 };
@@ -436,7 +436,7 @@ static int spufs_cntl_mmap_fault(struct vm_area_struct *vma,
 	return spufs_ps_fault(vma, vmf, 0x4000, SPUFS_CNTL_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_cntl_mmap_vmops = {
+static const struct vm_operations_struct spufs_cntl_mmap_vmops = {
 	.fault = spufs_cntl_mmap_fault,
 };
 
@@ -1143,7 +1143,7 @@ spufs_signal1_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #endif
 }
 
-static struct vm_operations_struct spufs_signal1_mmap_vmops = {
+static const struct vm_operations_struct spufs_signal1_mmap_vmops = {
 	.fault = spufs_signal1_mmap_fault,
 };
 
@@ -1279,7 +1279,7 @@ spufs_signal2_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 #endif
 }
 
-static struct vm_operations_struct spufs_signal2_mmap_vmops = {
+static const struct vm_operations_struct spufs_signal2_mmap_vmops = {
 	.fault = spufs_signal2_mmap_fault,
 };
 
@@ -1397,7 +1397,7 @@ spufs_mss_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_MSS_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_mss_mmap_vmops = {
+static const struct vm_operations_struct spufs_mss_mmap_vmops = {
 	.fault = spufs_mss_mmap_fault,
 };
 
@@ -1458,7 +1458,7 @@ spufs_psmap_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x0000, SPUFS_PS_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_psmap_mmap_vmops = {
+static const struct vm_operations_struct spufs_psmap_mmap_vmops = {
 	.fault = spufs_psmap_mmap_fault,
 };
 
@@ -1517,7 +1517,7 @@ spufs_mfc_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return spufs_ps_fault(vma, vmf, 0x3000, SPUFS_MFC_MAP_SIZE);
 }
 
-static struct vm_operations_struct spufs_mfc_mmap_vmops = {
+static const struct vm_operations_struct spufs_mfc_mmap_vmops = {
 	.fault = spufs_mfc_mmap_fault,
 };
 
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e57fedd..b22d13b0c71d 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
 	pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
 }
 
-static struct vm_operations_struct pci_mmap_ops = {
+static const struct vm_operations_struct pci_mmap_ops = {
 	.access = generic_access_phys,
 };
 
diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h
index d6f36c004d9b..870f12cfed93 100644
--- a/drivers/char/agp/agp.h
+++ b/drivers/char/agp/agp.h
@@ -131,7 +131,7 @@ struct agp_bridge_driver {
 struct agp_bridge_data {
 	const struct agp_version *version;
 	const struct agp_bridge_driver *driver;
-	struct vm_operations_struct *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 	void *previous_size;
 	void *current_size;
 	void *dev_private_data;
diff --git a/drivers/char/agp/alpha-agp.c b/drivers/char/agp/alpha-agp.c
index 5ea4da8e9954..dd84af4d4f7e 100644
--- a/drivers/char/agp/alpha-agp.c
+++ b/drivers/char/agp/alpha-agp.c
@@ -40,7 +40,7 @@ static struct aper_size_info_fixed alpha_core_agp_sizes[] =
 	{ 0, 0, 0 }, /* filled in by alpha_core_agp_setup */
 };
 
-struct vm_operations_struct alpha_core_agp_vm_ops = {
+static const struct vm_operations_struct alpha_core_agp_vm_ops = {
 	.fault = alpha_core_agp_vm_fault,
 };
 
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 6c8b65d069e5..a074fceb67d3 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -301,7 +301,7 @@ static inline int private_mapping_ok(struct vm_area_struct *vma)
 }
 #endif
 
-static struct vm_operations_struct mmap_mem_ops = {
+static const struct vm_operations_struct mmap_mem_ops = {
 #ifdef CONFIG_HAVE_IOREMAP_PROT
 	.access = generic_access_phys
 #endif
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 30f095a8c2d4..1997270bb6f4 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -239,7 +239,7 @@ mspec_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return VM_FAULT_NOPAGE;
 }
 
-static struct vm_operations_struct mspec_vm_ops = {
+static const struct vm_operations_struct mspec_vm_ops = {
 	.open = mspec_open,
 	.close = mspec_close,
 	.fault = mspec_fault,
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 7e1fbe5d4779..4ac900f4647f 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -369,28 +369,28 @@ static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 }
 
 /** AGP virtual memory operations */
-static struct vm_operations_struct drm_vm_ops = {
+static const struct vm_operations_struct drm_vm_ops = {
 	.fault = drm_vm_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
 };
 
 /** Shared virtual memory operations */
-static struct vm_operations_struct drm_vm_shm_ops = {
+static const struct vm_operations_struct drm_vm_shm_ops = {
 	.fault = drm_vm_shm_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_shm_close,
 };
 
 /** DMA virtual memory operations */
-static struct vm_operations_struct drm_vm_dma_ops = {
+static const struct vm_operations_struct drm_vm_dma_ops = {
 	.fault = drm_vm_dma_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
 };
 
 /** Scatter-gather virtual memory operations */
-static struct vm_operations_struct drm_vm_sg_ops = {
+static const struct vm_operations_struct drm_vm_sg_ops = {
 	.fault = drm_vm_sg_fault,
 	.open = drm_vm_open,
 	.close = drm_vm_close,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index acd889c94549..5b1cf04a011a 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -530,7 +530,7 @@ void radeon_ttm_fini(struct radeon_device *rdev)
 }
 
 static struct vm_operations_struct radeon_ttm_vm_ops;
-static struct vm_operations_struct *ttm_vm_ops = NULL;
+static const struct vm_operations_struct *ttm_vm_ops = NULL;
 
 static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 33de7637c0c6..1c040d040338 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -228,7 +228,7 @@ static void ttm_bo_vm_close(struct vm_area_struct *vma)
 	vma->vm_private_data = NULL;
 }
 
-static struct vm_operations_struct ttm_bo_vm_ops = {
+static const struct vm_operations_struct ttm_bo_vm_ops = {
 	.fault = ttm_bo_vm_fault,
 	.open = ttm_bo_vm_open,
 	.close = ttm_bo_vm_close
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 1aba8c13fe8f..8e7e3344c4b3 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -247,7 +247,7 @@ static int dma_region_pagefault(struct vm_area_struct *vma,
 	return 0;
 }
 
-static struct vm_operations_struct dma_region_vm_ops = {
+static const struct vm_operations_struct dma_region_vm_ops = {
 	.fault = dma_region_pagefault,
 };
 
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 3cb688d29131..f1565cae8ec6 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -95,7 +95,7 @@ static void ehca_mm_close(struct vm_area_struct *vma)
 		     vma->vm_start, vma->vm_end, *count);
 }
 
-static struct vm_operations_struct vm_ops = {
+static const struct vm_operations_struct vm_ops = {
 	.open =	ehca_mm_open,
 	.close = ehca_mm_close,
 };
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 38a287006612..40dbe54056c7 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1151,7 +1151,7 @@ static int ipath_file_vma_fault(struct vm_area_struct *vma,
 	return 0;
 }
 
-static struct vm_operations_struct ipath_file_vm_ops = {
+static const struct vm_operations_struct ipath_file_vm_ops = {
 	.fault = ipath_file_vma_fault,
 };
 
diff --git a/drivers/infiniband/hw/ipath/ipath_mmap.c b/drivers/infiniband/hw/ipath/ipath_mmap.c
index fa830e22002f..b28865faf435 100644
--- a/drivers/infiniband/hw/ipath/ipath_mmap.c
+++ b/drivers/infiniband/hw/ipath/ipath_mmap.c
@@ -74,7 +74,7 @@ static void ipath_vma_close(struct vm_area_struct *vma)
 	kref_put(&ip->ref, ipath_release_mmap_info);
 }
 
-static struct vm_operations_struct ipath_vm_ops = {
+static const struct vm_operations_struct ipath_vm_ops = {
 	.open =     ipath_vma_open,
 	.close =    ipath_vma_close,
 };
diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c
index 657c481d255c..10230cb3d210 100644
--- a/drivers/media/video/cafe_ccic.c
+++ b/drivers/media/video/cafe_ccic.c
@@ -1325,7 +1325,7 @@ static void cafe_v4l_vm_close(struct vm_area_struct *vma)
 	mutex_unlock(&sbuf->cam->s_mutex);
 }
 
-static struct vm_operations_struct cafe_v4l_vm_ops = {
+static const struct vm_operations_struct cafe_v4l_vm_ops = {
 	.open = cafe_v4l_vm_open,
 	.close = cafe_v4l_vm_close
 };
diff --git a/drivers/media/video/et61x251/et61x251_core.c b/drivers/media/video/et61x251/et61x251_core.c
index 74092f436be6..88987a57cf7b 100644
--- a/drivers/media/video/et61x251/et61x251_core.c
+++ b/drivers/media/video/et61x251/et61x251_core.c
@@ -1496,7 +1496,7 @@ static void et61x251_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct et61x251_vm_ops = {
+static const struct vm_operations_struct et61x251_vm_ops = {
 	.open = et61x251_vm_open,
 	.close = et61x251_vm_close,
 };
diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c
index cf6540da1e42..23d3fb776918 100644
--- a/drivers/media/video/gspca/gspca.c
+++ b/drivers/media/video/gspca/gspca.c
@@ -99,7 +99,7 @@ static void gspca_vm_close(struct vm_area_struct *vma)
 		frame->v4l2_buf.flags &= ~V4L2_BUF_FLAG_MAPPED;
 }
 
-static struct vm_operations_struct gspca_vm_ops = {
+static const struct vm_operations_struct gspca_vm_ops = {
 	.open		= gspca_vm_open,
 	.close		= gspca_vm_close,
 };
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index d0765bed79c9..4b1bc05a462c 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -1589,7 +1589,7 @@ static void meye_vm_close(struct vm_area_struct *vma)
 	meye.vma_use_count[idx]--;
 }
 
-static struct vm_operations_struct meye_vm_ops = {
+static const struct vm_operations_struct meye_vm_ops = {
 	.open		= meye_vm_open,
 	.close		= meye_vm_close,
 };
diff --git a/drivers/media/video/sn9c102/sn9c102_core.c b/drivers/media/video/sn9c102/sn9c102_core.c
index 9d84c94e8a40..4a7711c3e745 100644
--- a/drivers/media/video/sn9c102/sn9c102_core.c
+++ b/drivers/media/video/sn9c102/sn9c102_core.c
@@ -2077,7 +2077,7 @@ static void sn9c102_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct sn9c102_vm_ops = {
+static const struct vm_operations_struct sn9c102_vm_ops = {
 	.open = sn9c102_vm_open,
 	.close = sn9c102_vm_close,
 };
diff --git a/drivers/media/video/stk-webcam.c b/drivers/media/video/stk-webcam.c
index 0b996ea4134e..6b41865f42bd 100644
--- a/drivers/media/video/stk-webcam.c
+++ b/drivers/media/video/stk-webcam.c
@@ -790,7 +790,7 @@ static void stk_v4l_vm_close(struct vm_area_struct *vma)
 	if (sbuf->mapcount == 0)
 		sbuf->v4lbuf.flags &= ~V4L2_BUF_FLAG_MAPPED;
 }
-static struct vm_operations_struct stk_v4l_vm_ops = {
+static const struct vm_operations_struct stk_v4l_vm_ops = {
 	.open = stk_v4l_vm_open,
 	.close = stk_v4l_vm_close
 };
diff --git a/drivers/media/video/uvc/uvc_v4l2.c b/drivers/media/video/uvc/uvc_v4l2.c
index 9e7351569b5d..a2bdd806efab 100644
--- a/drivers/media/video/uvc/uvc_v4l2.c
+++ b/drivers/media/video/uvc/uvc_v4l2.c
@@ -1069,7 +1069,7 @@ static void uvc_vm_close(struct vm_area_struct *vma)
 	buffer->vma_use_count--;
 }
 
-static struct vm_operations_struct uvc_vm_ops = {
+static const struct vm_operations_struct uvc_vm_ops = {
 	.open		= uvc_vm_open,
 	.close		= uvc_vm_close,
 };
diff --git a/drivers/media/video/videobuf-dma-contig.c b/drivers/media/video/videobuf-dma-contig.c
index d09ce83a9429..635ffc7b0391 100644
--- a/drivers/media/video/videobuf-dma-contig.c
+++ b/drivers/media/video/videobuf-dma-contig.c
@@ -105,7 +105,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	}
 }
 
-static struct vm_operations_struct videobuf_vm_ops = {
+static const struct vm_operations_struct videobuf_vm_ops = {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
 };
diff --git a/drivers/media/video/videobuf-dma-sg.c b/drivers/media/video/videobuf-dma-sg.c
index a8dd22ace3fb..53cdd67cebe1 100644
--- a/drivers/media/video/videobuf-dma-sg.c
+++ b/drivers/media/video/videobuf-dma-sg.c
@@ -394,7 +394,7 @@ videobuf_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct videobuf_vm_ops =
+static const struct vm_operations_struct videobuf_vm_ops =
 {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
diff --git a/drivers/media/video/videobuf-vmalloc.c b/drivers/media/video/videobuf-vmalloc.c
index 30ae30f99ccc..35f3900c5633 100644
--- a/drivers/media/video/videobuf-vmalloc.c
+++ b/drivers/media/video/videobuf-vmalloc.c
@@ -116,7 +116,7 @@ static void videobuf_vm_close(struct vm_area_struct *vma)
 	return;
 }
 
-static struct vm_operations_struct videobuf_vm_ops =
+static const struct vm_operations_struct videobuf_vm_ops =
 {
 	.open     = videobuf_vm_open,
 	.close    = videobuf_vm_close,
diff --git a/drivers/media/video/vino.c b/drivers/media/video/vino.c
index cd6a3446ab7e..b034a81d2b1c 100644
--- a/drivers/media/video/vino.c
+++ b/drivers/media/video/vino.c
@@ -3857,7 +3857,7 @@ static void vino_vm_close(struct vm_area_struct *vma)
 	dprintk("vino_vm_close(): count = %d\n", fb->map_count);
 }
 
-static struct vm_operations_struct vino_vm_ops = {
+static const struct vm_operations_struct vino_vm_ops = {
 	.open	= vino_vm_open,
 	.close	= vino_vm_close,
 };
diff --git a/drivers/media/video/zc0301/zc0301_core.c b/drivers/media/video/zc0301/zc0301_core.c
index b3c6436b33ba..312a71336fd0 100644
--- a/drivers/media/video/zc0301/zc0301_core.c
+++ b/drivers/media/video/zc0301/zc0301_core.c
@@ -935,7 +935,7 @@ static void zc0301_vm_close(struct vm_area_struct* vma)
 }
 
 
-static struct vm_operations_struct zc0301_vm_ops = {
+static const struct vm_operations_struct zc0301_vm_ops = {
 	.open = zc0301_vm_open,
 	.close = zc0301_vm_close,
 };
diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c
index bcdefb1bcb3d..47137deafcfd 100644
--- a/drivers/media/video/zoran/zoran_driver.c
+++ b/drivers/media/video/zoran/zoran_driver.c
@@ -3172,7 +3172,7 @@ zoran_vm_close (struct vm_area_struct *vma)
 	mutex_unlock(&zr->resource_lock);
 }
 
-static struct vm_operations_struct zoran_vm_ops = {
+static const struct vm_operations_struct zoran_vm_ops = {
 	.open = zoran_vm_open,
 	.close = zoran_vm_close,
 };
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index aed609832bc2..300e7ba391a0 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -438,7 +438,7 @@ static struct miscdevice gru_miscdev = {
 	.fops		= &gru_fops,
 };
 
-struct vm_operations_struct gru_vm_ops = {
+const struct vm_operations_struct gru_vm_ops = {
 	.close		= gru_vma_close,
 	.fault		= gru_fault,
 };
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 34ab3d453919..46990bcfa536 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -624,7 +624,7 @@ static inline int is_kernel_context(struct gru_thread_state *gts)
  */
 struct gru_unload_context_req;
 
-extern struct vm_operations_struct gru_vm_ops;
+extern const struct vm_operations_struct gru_vm_ops;
 extern struct device *grudev;
 
 extern struct gru_vma_data *gru_alloc_vma_data(struct vm_area_struct *vma,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 848b59466850..0cb049f5cc56 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1185,7 +1185,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-static struct vm_operations_struct sg_mmap_vm_ops = {
+static const struct vm_operations_struct sg_mmap_vm_ops = {
 	.fault = sg_vma_fault,
 };
 
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 03efb065455f..a9d707047202 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -658,7 +658,7 @@ static int uio_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct uio_vm_ops = {
+static const struct vm_operations_struct uio_vm_ops = {
 	.open = uio_vma_open,
 	.close = uio_vma_close,
 	.fault = uio_vma_fault,
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index dfdc43e2e00d..9ed3e741bee1 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1174,7 +1174,7 @@ static int mon_bin_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct mon_bin_vm_ops = {
+static const struct vm_operations_struct mon_bin_vm_ops = {
 	.open =     mon_bin_vma_open,
 	.close =    mon_bin_vma_close,
 	.fault =    mon_bin_vma_fault,
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 0a7a6679ee6e..c27ab1ed9604 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -125,7 +125,7 @@ page_already_added:
 	return 0;
 }
 
-static struct vm_operations_struct fb_deferred_io_vm_ops = {
+static const struct vm_operations_struct fb_deferred_io_vm_ops = {
 	.fault		= fb_deferred_io_fault,
 	.page_mkwrite	= fb_deferred_io_mkwrite,
 };
diff --git a/drivers/video/omap/dispc.c b/drivers/video/omap/dispc.c
index 80a11d078df4..f16e42154229 100644
--- a/drivers/video/omap/dispc.c
+++ b/drivers/video/omap/dispc.c
@@ -1035,7 +1035,7 @@ static void mmap_user_close(struct vm_area_struct *vma)
 	atomic_dec(&dispc.map_count[plane]);
 }
 
-static struct vm_operations_struct mmap_user_ops = {
+static const struct vm_operations_struct mmap_user_ops = {
 	.open = mmap_user_open,
 	.close = mmap_user_close,
 };
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 571ad3c13b47..a3492a3ad96b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1184,7 +1184,7 @@ out:
 	return ret > 0 ? EIO : ret;
 }
 
-static struct vm_operations_struct btrfs_file_vm_ops = {
+static const struct vm_operations_struct btrfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= btrfs_page_mkwrite,
 };
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 5ca3eca70a1e..9630583cef28 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -81,7 +81,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 	return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
-static struct vm_operations_struct ext4_file_vm_ops = {
+static const struct vm_operations_struct ext4_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite   = ext4_page_mkwrite,
 };
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cbc464043b6f..a3492f7d207c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1313,7 +1313,7 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct fuse_file_vm_ops = {
+static const struct vm_operations_struct fuse_file_vm_ops = {
 	.close		= fuse_vma_close,
 	.fault		= filemap_fault,
 	.page_mkwrite	= fuse_page_mkwrite,
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 166f38fbd246..4eb308aa3234 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -418,7 +418,7 @@ out:
 	return ret;
 }
 
-static struct vm_operations_struct gfs2_vm_ops = {
+static const struct vm_operations_struct gfs2_vm_ops = {
 	.fault = filemap_fault,
 	.page_mkwrite = gfs2_page_mkwrite,
 };
diff --git a/fs/ncpfs/mmap.c b/fs/ncpfs/mmap.c
index 5d8dcb9ee326..15458decdb8a 100644
--- a/fs/ncpfs/mmap.c
+++ b/fs/ncpfs/mmap.c
@@ -95,7 +95,7 @@ static int ncp_file_mmap_fault(struct vm_area_struct *area,
 	return VM_FAULT_MAJOR;
 }
 
-static struct vm_operations_struct ncp_file_mmap =
+static const struct vm_operations_struct ncp_file_mmap =
 {
 	.fault = ncp_file_mmap_fault,
 };
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 86d6b4db1096..f5fdd39e037a 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -59,7 +59,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl);
 
-static struct vm_operations_struct nfs_file_vm_ops;
+static const struct vm_operations_struct nfs_file_vm_ops;
 
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
@@ -572,7 +572,7 @@ out_unlock:
 	return VM_FAULT_SIGBUS;
 }
 
-static struct vm_operations_struct nfs_file_vm_ops = {
+static const struct vm_operations_struct nfs_file_vm_ops = {
 	.fault = filemap_fault,
 	.page_mkwrite = nfs_vm_page_mkwrite,
 };
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index fc8278c77cdd..7d7b4983dee3 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -117,7 +117,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-struct vm_operations_struct nilfs_file_vm_ops = {
+static const struct vm_operations_struct nilfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= nilfs_page_mkwrite,
 };
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index b606496b72ec..39737613424a 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -202,7 +202,7 @@ out:
 	return ret;
 }
 
-static struct vm_operations_struct ocfs2_file_vm_ops = {
+static const struct vm_operations_struct ocfs2_file_vm_ops = {
 	.fault		= ocfs2_fault,
 	.page_mkwrite	= ocfs2_page_mkwrite,
 };
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 2524714bece1..60c702bc10ae 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -40,7 +40,7 @@ struct bin_buffer {
 	struct mutex			mutex;
 	void				*buffer;
 	int				mmapped;
-	struct vm_operations_struct 	*vm_ops;
+	const struct vm_operations_struct *vm_ops;
 	struct file			*file;
 	struct hlist_node		list;
 };
@@ -331,7 +331,7 @@ static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from,
 }
 #endif
 
-static struct vm_operations_struct bin_vm_ops = {
+static const struct vm_operations_struct bin_vm_ops = {
 	.open		= bin_vma_open,
 	.close		= bin_vma_close,
 	.fault		= bin_fault,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 2e6481a7701c..1009adc8d602 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1534,7 +1534,7 @@ out_unlock:
 	return err;
 }
 
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
 	.fault        = filemap_fault,
 	.page_mkwrite = ubifs_vm_page_mkwrite,
 };
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 988d8f87bc0f..629370974e57 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -42,7 +42,7 @@
 
 #include <linux/dcache.h>
 
-static struct vm_operations_struct xfs_file_vm_ops;
+static const struct vm_operations_struct xfs_file_vm_ops;
 
 STATIC ssize_t
 xfs_file_aio_read(
@@ -280,7 +280,7 @@ const struct file_operations xfs_dir_file_operations = {
 	.fsync		= xfs_file_fsync,
 };
 
-static struct vm_operations_struct xfs_file_vm_ops = {
+static const struct vm_operations_struct xfs_file_vm_ops = {
 	.fault		= filemap_fault,
 	.page_mkwrite	= xfs_vm_page_mkwrite,
 };
diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h
index 880130f7311f..9101ed64f803 100644
--- a/include/linux/agp_backend.h
+++ b/include/linux/agp_backend.h
@@ -53,7 +53,7 @@ struct agp_kern_info {
 	int current_memory;
 	bool cant_use_aperture;
 	unsigned long page_mask;
-	struct vm_operations_struct *vm_ops;
+	const struct vm_operations_struct *vm_ops;
 };
 
 /*
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 16937995abd4..41a59afc70fa 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -163,7 +163,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
 }
 
 extern const struct file_operations hugetlbfs_file_operations;
-extern struct vm_operations_struct hugetlb_vm_ops;
+extern const struct vm_operations_struct hugetlb_vm_ops;
 struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
 				struct user_struct **user, int creat_flags);
 int hugetlb_get_quota(struct address_space *mapping, long delta);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 21d6aa45206a..84a524afb3dc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -171,7 +171,7 @@ struct vm_area_struct {
 	struct anon_vma *anon_vma;	/* Serialized by page_table_lock */
 
 	/* Function pointers to deal with this struct. */
-	struct vm_operations_struct * vm_ops;
+	const struct vm_operations_struct *vm_ops;
 
 	/* Information about our backing store: */
 	unsigned long vm_pgoff;		/* Offset (within vm_file) in PAGE_SIZE
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 37aaf2b39863..4e768dda87b0 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -17,7 +17,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 #endif
 
 extern const struct file_operations ramfs_file_operations;
-extern struct vm_operations_struct generic_file_vm_ops;
+extern const struct vm_operations_struct generic_file_vm_ops;
 extern int __init init_rootfs(void);
 
 #endif
diff --git a/ipc/shm.c b/ipc/shm.c
index 9eb1488b543b..464694e0aa4a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -55,7 +55,7 @@ struct shm_file_data {
 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
 
 static const struct file_operations shm_file_operations;
-static struct vm_operations_struct shm_vm_ops;
+static const struct vm_operations_struct shm_vm_ops;
 
 #define shm_ids(ns)	((ns)->ids[IPC_SHM_IDS])
 
@@ -312,7 +312,7 @@ static const struct file_operations shm_file_operations = {
 	.get_unmapped_area	= shm_get_unmapped_area,
 };
 
-static struct vm_operations_struct shm_vm_ops = {
+static const struct vm_operations_struct shm_vm_ops = {
 	.open	= shm_open,	/* callback for a new vm-area open */
 	.close	= shm_close,	/* callback for when the vm-area is released */
 	.fault	= shm_fault,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 76ac4db405e9..0f86feb6db0c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2253,7 +2253,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	}
 }
 
-static struct vm_operations_struct perf_mmap_vmops = {
+static const struct vm_operations_struct perf_mmap_vmops = {
 	.open		= perf_mmap_open,
 	.close		= perf_mmap_close,
 	.fault		= perf_mmap_fault,
diff --git a/kernel/relay.c b/kernel/relay.c
index bc188549788f..760c26209a3c 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -60,7 +60,7 @@ static int relay_buf_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 /*
  * vm_ops for relay file mappings.
  */
-static struct vm_operations_struct relay_file_mmap_ops = {
+static const struct vm_operations_struct relay_file_mmap_ops = {
 	.fault = relay_buf_fault,
 	.close = relay_file_mmap_close,
 };
diff --git a/mm/filemap.c b/mm/filemap.c
index 6c84e598b4a9..ef169f37156d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1611,7 +1611,7 @@ page_not_uptodate:
 }
 EXPORT_SYMBOL(filemap_fault);
 
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
 	.fault		= filemap_fault,
 };
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 427dfe3ce78c..1888b2d71bb8 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -296,7 +296,7 @@ out:
 	}
 }
 
-static struct vm_operations_struct xip_file_vm_ops = {
+static const struct vm_operations_struct xip_file_vm_ops = {
 	.fault	= xip_file_fault,
 };
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6f048fcc749c..5d7601b02874 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1721,7 +1721,7 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-struct vm_operations_struct hugetlb_vm_ops = {
+const struct vm_operations_struct hugetlb_vm_ops = {
 	.fault = hugetlb_vm_op_fault,
 	.open = hugetlb_vm_op_open,
 	.close = hugetlb_vm_op_close,
diff --git a/mm/mmap.c b/mm/mmap.c
index 21d4029a07b3..73f5e4b64010 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2282,7 +2282,7 @@ static void special_mapping_close(struct vm_area_struct *vma)
 {
 }
 
-static struct vm_operations_struct special_mapping_vmops = {
+static const struct vm_operations_struct special_mapping_vmops = {
 	.close = special_mapping_close,
 	.fault = special_mapping_fault,
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index c73aa4753d79..5189b5aed8c0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -79,7 +79,7 @@ static struct kmem_cache *vm_region_jar;
 struct rb_root nommu_region_tree = RB_ROOT;
 DECLARE_RWSEM(nommu_region_sem);
 
-struct vm_operations_struct generic_file_vm_ops = {
+const struct vm_operations_struct generic_file_vm_ops = {
 };
 
 /*
diff --git a/mm/shmem.c b/mm/shmem.c
index ccf446a9faa1..356dd99566ec 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -218,7 +218,7 @@ static const struct file_operations shmem_file_operations;
 static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
-static struct vm_operations_struct shmem_vm_ops;
+static const struct vm_operations_struct shmem_vm_ops;
 
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
 	.ra_pages	= 0,	/* No readahead */
@@ -2498,7 +2498,7 @@ static const struct super_operations shmem_ops = {
 	.put_super	= shmem_put_super,
 };
 
-static struct vm_operations_struct shmem_vm_ops = {
+static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c66cdc2..103d5611b818 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2084,7 +2084,7 @@ static void packet_mm_close(struct vm_area_struct *vma)
 		atomic_dec(&pkt_sk(sk)->mapped);
 }
 
-static struct vm_operations_struct packet_mmap_ops = {
+static const struct vm_operations_struct packet_mmap_ops = {
 	.open	=	packet_mm_open,
 	.close	=	packet_mm_close,
 };
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 561d6d95a2d3..ab73edf2c89a 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -2985,7 +2985,7 @@ static int snd_pcm_mmap_status_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_status =
+static const struct vm_operations_struct snd_pcm_vm_ops_status =
 {
 	.fault =	snd_pcm_mmap_status_fault,
 };
@@ -3024,7 +3024,7 @@ static int snd_pcm_mmap_control_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_control =
+static const struct vm_operations_struct snd_pcm_vm_ops_control =
 {
 	.fault =	snd_pcm_mmap_control_fault,
 };
@@ -3094,7 +3094,7 @@ static int snd_pcm_mmap_data_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct snd_pcm_vm_ops_data =
+static const struct vm_operations_struct snd_pcm_vm_ops_data =
 {
 	.open =		snd_pcm_mmap_data_open,
 	.close =	snd_pcm_mmap_data_close,
@@ -3118,7 +3118,7 @@ static int snd_pcm_default_mmap(struct snd_pcm_substream *substream,
  * mmap the DMA buffer on I/O memory area
  */
 #if SNDRV_PCM_INFO_MMAP_IOMEM
-static struct vm_operations_struct snd_pcm_vm_ops_data_mmio =
+static const struct vm_operations_struct snd_pcm_vm_ops_data_mmio =
 {
 	.open =		snd_pcm_mmap_data_open,
 	.close =	snd_pcm_mmap_data_close,
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index fd44946ce4b3..99f33766cd51 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -154,7 +154,7 @@ static void usb_stream_hwdep_vm_close(struct vm_area_struct *area)
 	snd_printdd(KERN_DEBUG "%i\n", atomic_read(&us122l->mmap_count));
 }
 
-static struct vm_operations_struct usb_stream_hwdep_vm_ops = {
+static const struct vm_operations_struct usb_stream_hwdep_vm_ops = {
 	.open = usb_stream_hwdep_vm_open,
 	.fault = usb_stream_hwdep_vm_fault,
 	.close = usb_stream_hwdep_vm_close,
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index f3d8f71265dd..52e04b2f35d3 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -53,7 +53,7 @@ static int snd_us428ctls_vm_fault(struct vm_area_struct *area,
 	return 0;
 }
 
-static struct vm_operations_struct us428ctls_vm_ops = {
+static const struct vm_operations_struct us428ctls_vm_ops = {
 	.fault = snd_us428ctls_vm_fault,
 };
 
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 117946f2debb..4b2304c2e02d 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -697,7 +697,7 @@ static int snd_usX2Y_hwdep_pcm_vm_fault(struct vm_area_struct *area,
 }
 
 
-static struct vm_operations_struct snd_usX2Y_hwdep_pcm_vm_ops = {
+static const struct vm_operations_struct snd_usX2Y_hwdep_pcm_vm_ops = {
 	.open = snd_usX2Y_hwdep_pcm_vm_open,
 	.close = snd_usX2Y_hwdep_pcm_vm_close,
 	.fault = snd_usX2Y_hwdep_pcm_vm_fault,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 034a798b0431..b5e7e3f1183f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1713,7 +1713,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct kvm_vcpu_vm_ops = {
+static const struct vm_operations_struct kvm_vcpu_vm_ops = {
 	.fault = kvm_vcpu_fault,
 };
 
@@ -2317,7 +2317,7 @@ static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static struct vm_operations_struct kvm_vm_vm_ops = {
+static const struct vm_operations_struct kvm_vm_vm_ops = {
 	.fault = kvm_vm_fault,
 };
 
-- 
cgit v1.2.3


From f278a2f7bbc2239f479eaf63d0b3ae573b1d746c Mon Sep 17 00:00:00 2001
From: Dave Young <hidave.darkstar@gmail.com>
Date: Sun, 27 Sep 2009 16:00:42 +0000
Subject: tty: Fix regressions caused by commit b50989dc

The following commit made console open fails while booting:

	commit b50989dc444599c8b21edc23536fc305f4e9b7d5
	Author: Alan Cox <alan@linux.intel.com>
	Date:   Sat Sep 19 13:13:22 2009 -0700

	tty: make the kref destructor occur asynchronously

Due to tty release routines run in a workqueue now, error like the
following will be reported while booting:

INIT open /dev/console Input/output error

It also causes hibernation regression to appear as reported at
http://bugzilla.kernel.org/show_bug.cgi?id=14229

The reason is that now there's latency issue with closing, but when
we open a "closing not finished" tty, -EIO will be returned.

Fix it as per the following Alan's suggestion:

  Fun but it's actually not a bug and the fix is wrong in itself as
  the port may be closing but not yet being destructed, in which case
  it seems to do the wrong thing.  Opening a tty that is closing (and
  could be closing for long periods) is supposed to return -EIO.

  I suspect a better way to deal with this and keep the old console
  timing is to split tty->shutdown into two functions.

  tty->shutdown() - called synchronously just before we dump the tty
  onto the waitqueue for destruction

  tty->cleanup() - called when the destructor runs.

  We would then do the shutdown part which can occur in IRQ context
  fine, before queueing the rest of the release (from tty->magic = 0
  ...  the end) to occur asynchronously

  The USB update in -next would then need a call like

       if (tty->cleanup)
               tty->cleanup(tty);

  at the top of the async function and the USB shutdown to be split
  between shutdown and cleanup as the USB resource cleanup and final
  tidy cannot occur synchronously as it needs to sleep.

  In other words the logic becomes

       final kref put
               make object unfindable

       async
               clean it up

Signed-off-by: Dave Young <hidave.darkstar@gmail.com>
[ rjw: Rebased on top of 2.6.31-git, reworked the changelog. ]
Signed-off-by: "Rafael J. Wysocki" <rjw@sisk.pl>
[ Changed serial naming to match new rules, dropped tty_shutdown as per
  comments from Alan Stern  - Linus ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/tty_io.c           | 15 ++++++++++-----
 drivers/usb/serial/usb-serial.c | 14 ++++++--------
 include/linux/tty_driver.h      | 13 +++++++++++--
 3 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index ea18a129b0b5..59499ee0fe6a 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -1389,7 +1389,7 @@ EXPORT_SYMBOL(tty_shutdown);
  *	of ttys that the driver keeps.
  *
  *	This method gets called from a work queue so that the driver private
- *	shutdown ops can sleep (needed for USB at least)
+ *	cleanup ops can sleep (needed for USB at least)
  */
 static void release_one_tty(struct work_struct *work)
 {
@@ -1397,10 +1397,9 @@ static void release_one_tty(struct work_struct *work)
 		container_of(work, struct tty_struct, hangup_work);
 	struct tty_driver *driver = tty->driver;
 
-	if (tty->ops->shutdown)
-		tty->ops->shutdown(tty);
-	else
-		tty_shutdown(tty);
+	if (tty->ops->cleanup)
+		tty->ops->cleanup(tty);
+
 	tty->magic = 0;
 	tty_driver_kref_put(driver);
 	module_put(driver->owner);
@@ -1415,6 +1414,12 @@ static void release_one_tty(struct work_struct *work)
 static void queue_release_one_tty(struct kref *kref)
 {
 	struct tty_struct *tty = container_of(kref, struct tty_struct, kref);
+
+	if (tty->ops->shutdown)
+		tty->ops->shutdown(tty);
+	else
+		tty_shutdown(tty);
+
 	/* The hangup queue is now free so we can reuse it rather than
 	   waste a chunk of memory for each port */
 	INIT_WORK(&tty->hangup_work, release_one_tty);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index ff75a3589e7e..aa6b2ae951ae 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -192,7 +192,7 @@ void usb_serial_put(struct usb_serial *serial)
  * This is the first place a new tty gets used.  Hence this is where we
  * acquire references to the usb_serial structure and the driver module,
  * where we store a pointer to the port, and where we do an autoresume.
- * All these actions are reversed in serial_release().
+ * All these actions are reversed in serial_cleanup().
  */
 static int serial_install(struct tty_driver *driver, struct tty_struct *tty)
 {
@@ -339,15 +339,16 @@ static void serial_close(struct tty_struct *tty, struct file *filp)
 }
 
 /**
- * serial_release - free resources post close/hangup
+ * serial_cleanup - free resources post close/hangup
  * @port: port to free up
  *
  * Do the resource freeing and refcount dropping for the port.
  * Avoid freeing the console.
  *
- * Called when the last tty kref is dropped.
+ * Called asynchronously after the last tty kref is dropped,
+ * and the tty layer has already done the tty_shutdown(tty);
  */
-static void serial_release(struct tty_struct *tty)
+static void serial_cleanup(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial;
@@ -361,9 +362,6 @@ static void serial_release(struct tty_struct *tty)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	/* Standard shutdown processing */
-	tty_shutdown(tty);
-
 	tty->driver_data = NULL;
 
 	serial = port->serial;
@@ -1210,7 +1208,7 @@ static const struct tty_operations serial_ops = {
 	.chars_in_buffer =	serial_chars_in_buffer,
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
-	.shutdown = 		serial_release,
+	.cleanup = 		serial_cleanup,
 	.install = 		serial_install,
 	.proc_fops =		&serial_proc_fops,
 };
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 3566129384a4..b08677982525 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -45,8 +45,16 @@
  *
  * void (*shutdown)(struct tty_struct * tty);
  *
- * 	This routine is called when a particular tty device is closed for
- *	the last time freeing up the resources.
+ * 	This routine is called synchronously when a particular tty device
+ *	is closed for the last time freeing up the resources.
+ *
+ *
+ * void (*cleanup)(struct tty_struct * tty);
+ *
+ *	This routine is called asynchronously when a particular tty device
+ *	is closed for the last time freeing up the resources. This is
+ *	actually the second part of shutdown for routines that might sleep.
+ *
  *
  * int (*write)(struct tty_struct * tty,
  * 		 const unsigned char *buf, int count);
@@ -233,6 +241,7 @@ struct tty_operations {
 	int  (*open)(struct tty_struct * tty, struct file * filp);
 	void (*close)(struct tty_struct * tty, struct file * filp);
 	void (*shutdown)(struct tty_struct *tty);
+	void (*cleanup)(struct tty_struct *tty);
 	int  (*write)(struct tty_struct * tty,
 		      const unsigned char *buf, int count);
 	int  (*put_char)(struct tty_struct *tty, unsigned char ch);
-- 
cgit v1.2.3


From 74bf2ad508efa93db4254c9da9c7238da44e2c58 Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@linux.ie>
Date: Mon, 28 Sep 2009 15:31:10 +1000
Subject: drm/kms: make fb helper work for all drivers.

This initialises the fb helper with the connector helper,
so that the fb cmdline code works for intel as well.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_fb_helper.c            |  1 -
 drivers/gpu/drm/radeon/radeon_connectors.c | 64 +++++++++++++++++++-----------
 include/drm/drm_crtc_helper.h              |  4 +-
 3 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 10d810ef8faa..83d7b7d03863 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -47,7 +47,6 @@ int drm_fb_helper_add_connector(struct drm_connector *connector)
 		return -ENOMEM;
 
 	return 0;
-
 }
 EXPORT_SYMBOL(drm_fb_helper_add_connector);
 
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index db155d5e60ce..e376be47a4a0 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -808,7 +808,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 	switch (connector_type) {
 	case DRM_MODE_CONNECTOR_VGA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
 			if (!radeon_connector->ddc_bus)
@@ -821,7 +823,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_DVIA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -841,7 +845,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -865,7 +871,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "HDMI");
 			if (!radeon_connector->ddc_bus)
@@ -884,7 +892,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DP");
 			if (!radeon_connector->ddc_bus)
@@ -897,12 +907,14 @@ radeon_add_atom_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_9PinDIN:
 		if (radeon_tv == 1) {
 			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
-			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			ret = drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			if (ret)
+				goto failed;
+			radeon_connector->dac_load_detect = true;
+			drm_connector_attach_property(&radeon_connector->base,
+						      rdev->mode_info.load_detect_property,
+						      1);
 		}
-		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.load_detect_property,
-					      1);
 		break;
 	case DRM_MODE_CONNECTOR_LVDS:
 		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
@@ -912,7 +924,9 @@ radeon_add_atom_connector(struct drm_device *dev,
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
 		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
 			if (!radeon_connector->ddc_bus)
@@ -926,10 +940,6 @@ radeon_add_atom_connector(struct drm_device *dev,
 		break;
 	}
 
-	ret = drm_fb_helper_add_connector(connector);
-	if (ret)
-		goto failed;
-
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
@@ -978,7 +988,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	switch (connector_type) {
 	case DRM_MODE_CONNECTOR_VGA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "VGA");
 			if (!radeon_connector->ddc_bus)
@@ -991,7 +1003,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_DVIA:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -1005,7 +1019,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_DVII:
 	case DRM_MODE_CONNECTOR_DVID:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "DVI");
 			if (!radeon_connector->ddc_bus)
@@ -1022,7 +1038,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	case DRM_MODE_CONNECTOR_9PinDIN:
 		if (radeon_tv == 1) {
 			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
-			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			ret = drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			if (ret)
+				goto failed;
 			radeon_connector->dac_load_detect = true;
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.load_detect_property,
@@ -1031,7 +1049,9 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	case DRM_MODE_CONNECTOR_LVDS:
 		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		ret = drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+		if (ret)
+			goto failed;
 		if (i2c_bus->valid) {
 			radeon_connector->ddc_bus = radeon_i2c_create(dev, i2c_bus, "LVDS");
 			if (!radeon_connector->ddc_bus)
@@ -1044,10 +1064,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
 		break;
 	}
 
-	ret = drm_fb_helper_add_connector(connector);
-	if (ret)
-		goto failed;
-
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_sysfs_connector_add(connector);
 	return;
diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h
index 4c8dacaf4f58..ef47dfd8e5e9 100644
--- a/include/drm/drm_crtc_helper.h
+++ b/include/drm/drm_crtc_helper.h
@@ -39,6 +39,7 @@
 
 #include <linux/fb.h>
 
+#include "drm_fb_helper.h"
 struct drm_crtc_helper_funcs {
 	/*
 	 * Control power levels on the CRTC.  If the mode passed in is
@@ -119,10 +120,11 @@ static inline void drm_encoder_helper_add(struct drm_encoder *encoder,
 	encoder->helper_private = (void *)funcs;
 }
 
-static inline void drm_connector_helper_add(struct drm_connector *connector,
+static inline int drm_connector_helper_add(struct drm_connector *connector,
 					    const struct drm_connector_helper_funcs *funcs)
 {
 	connector->helper_private = (void *)funcs;
+	return drm_fb_helper_add_connector(connector);
 }
 
 extern int drm_helper_resume_force_mode(struct drm_device *dev);
-- 
cgit v1.2.3


From 55138e0bc29c0751e2152df9ad35deea542f29b3 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 29 Sep 2009 13:31:31 -0400
Subject: ext4: Adjust ext4_da_writepages() to write out larger contiguous
 chunks

Work around problems in the writeback code to force out writebacks in
larger chunks than just 4mb, which is just too small.  This also works
around limitations in the ext4 block allocator, which can't allocate
more than 2048 blocks at a time.  So we need to defeat the round-robin
characteristics of the writeback code and try to write out as many
blocks in one inode before allowing the writeback code to move on to
another inode.  We add a a new per-filesystem tunable,
max_writeback_mb_bump, which caps this to a default of 128mb per
inode.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h              |   1 +
 fs/ext4/inode.c             | 105 +++++++++++++++++++++++++++++++++++++++-----
 fs/ext4/super.c             |   3 ++
 include/trace/events/ext4.h |  14 ++++--
 4 files changed, 107 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index e227eea23f05..a58438e18d0b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -942,6 +942,7 @@ struct ext4_sb_info {
 	unsigned int s_mb_stats;
 	unsigned int s_mb_order2_reqs;
 	unsigned int s_mb_group_prealloc;
+	unsigned int s_max_writeback_mb_bump;
 	/* where last allocation was done - for stream allocation */
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5fb72a98ccbe..20e2d704dc2e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1144,6 +1144,64 @@ static int check_block_validity(struct inode *inode, const char *msg,
 	return 0;
 }
 
+/*
+ * Return the number of dirty pages in the given inode starting at
+ * page frame idx.
+ */
+static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
+				    unsigned int max_pages)
+{
+	struct address_space *mapping = inode->i_mapping;
+	pgoff_t	index;
+	struct pagevec pvec;
+	pgoff_t num = 0;
+	int i, nr_pages, done = 0;
+
+	if (max_pages == 0)
+		return 0;
+	pagevec_init(&pvec, 0);
+	while (!done) {
+		index = idx;
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+					      PAGECACHE_TAG_DIRTY,
+					      (pgoff_t)PAGEVEC_SIZE);
+		if (nr_pages == 0)
+			break;
+		for (i = 0; i < nr_pages; i++) {
+			struct page *page = pvec.pages[i];
+			struct buffer_head *bh, *head;
+
+			lock_page(page);
+			if (unlikely(page->mapping != mapping) ||
+			    !PageDirty(page) ||
+			    PageWriteback(page) ||
+			    page->index != idx) {
+				done = 1;
+				unlock_page(page);
+				break;
+			}
+			head = page_buffers(page);
+			bh = head;
+			do {
+				if (!buffer_delay(bh) &&
+				    !buffer_unwritten(bh)) {
+					done = 1;
+					break;
+				}
+			} while ((bh = bh->b_this_page) != head);
+			unlock_page(page);
+			if (done)
+				break;
+			idx++;
+			num++;
+			if (num >= max_pages)
+				break;
+		}
+		pagevec_release(&pvec);
+	}
+	return num;
+}
+
 /*
  * The ext4_get_blocks() function tries to look up the requested blocks,
  * and returns if the blocks are already mapped.
@@ -2743,8 +2801,10 @@ static int ext4_da_writepages(struct address_space *mapping,
 	int no_nrwrite_index_update;
 	int pages_written = 0;
 	long pages_skipped;
+	unsigned int max_pages;
 	int range_cyclic, cycled = 1, io_done = 0;
-	int needed_blocks, ret = 0, nr_to_writebump = 0;
+	int needed_blocks, ret = 0;
+	long desired_nr_to_write, nr_to_writebump = 0;
 	loff_t range_start = wbc->range_start;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
 
@@ -2771,16 +2831,6 @@ static int ext4_da_writepages(struct address_space *mapping,
 	if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
 		return -EROFS;
 
-	/*
-	 * Make sure nr_to_write is >= sbi->s_mb_stream_request
-	 * This make sure small files blocks are allocated in
-	 * single attempt. This ensure that small files
-	 * get less fragmented.
-	 */
-	if (wbc->nr_to_write < sbi->s_mb_stream_request) {
-		nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
-		wbc->nr_to_write = sbi->s_mb_stream_request;
-	}
 	if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
 		range_whole = 1;
 
@@ -2795,6 +2845,36 @@ static int ext4_da_writepages(struct address_space *mapping,
 	} else
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
 
+	/*
+	 * This works around two forms of stupidity.  The first is in
+	 * the writeback code, which caps the maximum number of pages
+	 * written to be 1024 pages.  This is wrong on multiple
+	 * levels; different architectues have a different page size,
+	 * which changes the maximum amount of data which gets
+	 * written.  Secondly, 4 megabytes is way too small.  XFS
+	 * forces this value to be 16 megabytes by multiplying
+	 * nr_to_write parameter by four, and then relies on its
+	 * allocator to allocate larger extents to make them
+	 * contiguous.  Unfortunately this brings us to the second
+	 * stupidity, which is that ext4's mballoc code only allocates
+	 * at most 2048 blocks.  So we force contiguous writes up to
+	 * the number of dirty blocks in the inode, or
+	 * sbi->max_writeback_mb_bump whichever is smaller.
+	 */
+	max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
+	if (!range_cyclic && range_whole)
+		desired_nr_to_write = wbc->nr_to_write * 8;
+	else
+		desired_nr_to_write = ext4_num_dirty_pages(inode, index,
+							   max_pages);
+	if (desired_nr_to_write > max_pages)
+		desired_nr_to_write = max_pages;
+
+	if (wbc->nr_to_write < desired_nr_to_write) {
+		nr_to_writebump = desired_nr_to_write - wbc->nr_to_write;
+		wbc->nr_to_write = desired_nr_to_write;
+	}
+
 	mpd.wbc = wbc;
 	mpd.inode = mapping->host;
 
@@ -2914,7 +2994,8 @@ retry:
 out_writepages:
 	if (!no_nrwrite_index_update)
 		wbc->no_nrwrite_index_update = 0;
-	wbc->nr_to_write -= nr_to_writebump;
+	if (wbc->nr_to_write > nr_to_writebump)
+		wbc->nr_to_write -= nr_to_writebump;
 	wbc->range_start = range_start;
 	trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
 	return ret;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index df539ba27779..16817737ba52 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2197,6 +2197,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);
 EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
 EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
 EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
+EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
 
 static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(delayed_allocation_blocks),
@@ -2210,6 +2211,7 @@ static struct attribute *ext4_attrs[] = {
 	ATTR_LIST(mb_order2_req),
 	ATTR_LIST(mb_stream_req),
 	ATTR_LIST(mb_group_prealloc),
+	ATTR_LIST(max_writeback_mb_bump),
 	NULL,
 };
 
@@ -2679,6 +2681,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 
 	sbi->s_stripe = ext4_get_stripe_size(sbi);
+	sbi->s_max_writeback_mb_bump = 128;
 
 	/*
 	 * set up enough so that it can read an inode
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index c1bd8f1e8b94..7c6bbb7198a3 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -236,6 +236,7 @@ TRACE_EVENT(ext4_da_writepages,
 		__field(	char,	for_kupdate		)
 		__field(	char,	for_reclaim		)
 		__field(	char,	range_cyclic		)
+		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
@@ -249,15 +250,17 @@ TRACE_EVENT(ext4_da_writepages,
 		__entry->for_kupdate	= wbc->for_kupdate;
 		__entry->for_reclaim	= wbc->for_reclaim;
 		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d",
+	TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
 		  jbd2_dev_to_name(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
 		  __entry->pages_skipped, __entry->range_start,
 		  __entry->range_end, __entry->nonblocking,
 		  __entry->for_kupdate, __entry->for_reclaim,
-		  __entry->range_cyclic)
+		  __entry->range_cyclic,
+		  (unsigned long) __entry->writeback_index)
 );
 
 TRACE_EVENT(ext4_da_write_pages,
@@ -309,6 +312,7 @@ TRACE_EVENT(ext4_da_writepages_result,
 		__field(	char,	encountered_congestion	)
 		__field(	char,	more_io			)	
 		__field(	char,	no_nrwrite_index_update )
+		__field(       pgoff_t,	writeback_index		)
 	),
 
 	TP_fast_assign(
@@ -320,14 +324,16 @@ TRACE_EVENT(ext4_da_writepages_result,
 		__entry->encountered_congestion	= wbc->encountered_congestion;
 		__entry->more_io	= wbc->more_io;
 		__entry->no_nrwrite_index_update = wbc->no_nrwrite_index_update;
+		__entry->writeback_index = inode->i_mapping->writeback_index;
 	),
 
-	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d",
+	TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d writeback_index %lu",
 		  jbd2_dev_to_name(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->ret,
 		  __entry->pages_written, __entry->pages_skipped,
 		  __entry->encountered_congestion, __entry->more_io,
-		  __entry->no_nrwrite_index_update)
+		  __entry->no_nrwrite_index_update,
+		  (unsigned long) __entry->writeback_index)
 );
 
 TRACE_EVENT(ext4_da_write_begin,
-- 
cgit v1.2.3


From 8f1546cadf7ac5e9a40d54089a1c7302264ec49b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 28 Sep 2009 15:26:43 +0200
Subject: wext: add back wireless/ dir in sysfs for cfg80211 interfaces

The move away from having drivers assign wireless handlers,
in favour of making cfg80211 assign them, broke the sysfs
registration (the wireless/ dir went missing) because the
handlers are now assigned only after registration, which is
too late.

Fix this by special-casing cfg80211-based devices, all
of which are required to have an ieee80211_ptr, in the
sysfs code, and also using get_wireless_stats() to have
the same values reported as in procfs.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Reported-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Tested-by: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/wext.h   |  1 +
 net/core/net-sysfs.c | 12 +++++-------
 net/wireless/wext.c  |  2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/wext.h b/include/net/wext.h
index 6d76a39a9c5b..3f2b94de2cfa 100644
--- a/include/net/wext.h
+++ b/include/net/wext.h
@@ -14,6 +14,7 @@ extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cm
 			     void __user *arg);
 extern int compat_wext_handle_ioctl(struct net *net, unsigned int cmd,
 				    unsigned long arg);
+extern struct iw_statistics *get_wireless_stats(struct net_device *dev);
 #else
 static inline int wext_proc_init(struct net *net)
 {
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7d4c57523b09..821d30918cfc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -16,7 +16,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
-#include <net/iw_handler.h>
+#include <net/wext.h>
 
 #include "net-sysfs.h"
 
@@ -363,15 +363,13 @@ static ssize_t wireless_show(struct device *d, char *buf,
 					       char *))
 {
 	struct net_device *dev = to_net_dev(d);
-	const struct iw_statistics *iw = NULL;
+	const struct iw_statistics *iw;
 	ssize_t ret = -EINVAL;
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(dev)) {
-		if (dev->wireless_handlers &&
-		    dev->wireless_handlers->get_wireless_stats)
-			iw = dev->wireless_handlers->get_wireless_stats(dev);
-		if (iw != NULL)
+		iw = get_wireless_stats(dev);
+		if (iw)
 			ret = (*format)(iw, buf);
 	}
 	read_unlock(&dev_base_lock);
@@ -505,7 +503,7 @@ int netdev_register_kobject(struct net_device *net)
 	*groups++ = &netstat_group;
 
 #ifdef CONFIG_WIRELESS_EXT_SYSFS
-	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
+	if (net->wireless_handlers || net->ieee80211_ptr)
 		*groups++ = &wireless_group;
 #endif
 #endif /* CONFIG_SYSFS */
diff --git a/net/wireless/wext.c b/net/wireless/wext.c
index ac4ac26b53ce..60fe57761ca9 100644
--- a/net/wireless/wext.c
+++ b/net/wireless/wext.c
@@ -470,7 +470,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 /*
  * Get statistics out of the driver
  */
-static struct iw_statistics *get_wireless_stats(struct net_device *dev)
+struct iw_statistics *get_wireless_stats(struct net_device *dev)
 {
 	/* New location */
 	if ((dev->wireless_handlers != NULL) &&
-- 
cgit v1.2.3


From 827b4649d4626bf97b203b4bcd69476bb9b4e760 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 29 Sep 2009 00:10:41 +0200
Subject: PM / PCMCIA: Drop second argument of pcmcia_socket_dev_suspend()

pcmcia_socket_dev_suspend() doesn't use its second argument, so it
may be dropped safely.

This change is necessary for the subsequent yenta suspend/resume fix.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: stable@kernel.org
---
 drivers/pcmcia/at91_cf.c        | 2 +-
 drivers/pcmcia/au1000_generic.c | 2 +-
 drivers/pcmcia/bfin_cf_pcmcia.c | 2 +-
 drivers/pcmcia/cs.c             | 2 +-
 drivers/pcmcia/i82092.c         | 2 +-
 drivers/pcmcia/i82365.c         | 2 +-
 drivers/pcmcia/m32r_cfc.c       | 2 +-
 drivers/pcmcia/m32r_pcc.c       | 2 +-
 drivers/pcmcia/m8xx_pcmcia.c    | 2 +-
 drivers/pcmcia/omap_cf.c        | 2 +-
 drivers/pcmcia/pd6729.c         | 2 +-
 drivers/pcmcia/pxa2xx_base.c    | 2 +-
 drivers/pcmcia/sa1100_generic.c | 2 +-
 drivers/pcmcia/sa1111_generic.c | 2 +-
 drivers/pcmcia/tcic.c           | 2 +-
 drivers/pcmcia/vrc4171_card.c   | 2 +-
 drivers/pcmcia/yenta_socket.c   | 2 +-
 include/pcmcia/ss.h             | 2 +-
 18 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/pcmcia/at91_cf.c b/drivers/pcmcia/at91_cf.c
index 9e1140f085fd..e1dccedc5960 100644
--- a/drivers/pcmcia/at91_cf.c
+++ b/drivers/pcmcia/at91_cf.c
@@ -363,7 +363,7 @@ static int at91_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 	struct at91_cf_socket	*cf = platform_get_drvdata(pdev);
 	struct at91_cf_data	*board = cf->board;
 
-	pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	pcmcia_socket_dev_suspend(&pdev->dev);
 	if (device_may_wakeup(&pdev->dev)) {
 		enable_irq_wake(board->det_pin);
 		if (board->irq_pin)
diff --git a/drivers/pcmcia/au1000_generic.c b/drivers/pcmcia/au1000_generic.c
index 90013341cd5f..02088704ac2c 100644
--- a/drivers/pcmcia/au1000_generic.c
+++ b/drivers/pcmcia/au1000_generic.c
@@ -515,7 +515,7 @@ static int au1x00_drv_pcmcia_probe(struct platform_device *dev)
 static int au1x00_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int au1x00_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/bfin_cf_pcmcia.c b/drivers/pcmcia/bfin_cf_pcmcia.c
index b59d4115d20f..300b368605c9 100644
--- a/drivers/pcmcia/bfin_cf_pcmcia.c
+++ b/drivers/pcmcia/bfin_cf_pcmcia.c
@@ -302,7 +302,7 @@ static int __devexit bfin_cf_remove(struct platform_device *pdev)
 
 static int bfin_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int bfin_cf_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index 0660ad182589..934d4bee39a0 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -101,7 +101,7 @@ EXPORT_SYMBOL(pcmcia_socket_list_rwsem);
 static int socket_resume(struct pcmcia_socket *skt);
 static int socket_suspend(struct pcmcia_socket *skt);
 
-int pcmcia_socket_dev_suspend(struct device *dev, pm_message_t state)
+int pcmcia_socket_dev_suspend(struct device *dev)
 {
 	struct pcmcia_socket *socket;
 
diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c
index 46561face128..a04f21c8170f 100644
--- a/drivers/pcmcia/i82092.c
+++ b/drivers/pcmcia/i82092.c
@@ -42,7 +42,7 @@ MODULE_DEVICE_TABLE(pci, i82092aa_pci_ids);
 #ifdef CONFIG_PM
 static int i82092aa_socket_suspend (struct pci_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int i82092aa_socket_resume (struct pci_dev *dev)
diff --git a/drivers/pcmcia/i82365.c b/drivers/pcmcia/i82365.c
index 40d4953e4b12..b906abe26ad0 100644
--- a/drivers/pcmcia/i82365.c
+++ b/drivers/pcmcia/i82365.c
@@ -1241,7 +1241,7 @@ static int pcic_init(struct pcmcia_socket *s)
 static int i82365_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int i82365_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m32r_cfc.c b/drivers/pcmcia/m32r_cfc.c
index 62b4ecc97c46..d1d89c4491ad 100644
--- a/drivers/pcmcia/m32r_cfc.c
+++ b/drivers/pcmcia/m32r_cfc.c
@@ -699,7 +699,7 @@ static struct pccard_operations pcc_operations = {
 static int cfc_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int cfc_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m32r_pcc.c b/drivers/pcmcia/m32r_pcc.c
index 12034b41d196..a0655839c8d3 100644
--- a/drivers/pcmcia/m32r_pcc.c
+++ b/drivers/pcmcia/m32r_pcc.c
@@ -675,7 +675,7 @@ static struct pccard_operations pcc_operations = {
 static int pcc_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pcc_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/m8xx_pcmcia.c b/drivers/pcmcia/m8xx_pcmcia.c
index d1ad0966392d..c69f2c4fe520 100644
--- a/drivers/pcmcia/m8xx_pcmcia.c
+++ b/drivers/pcmcia/m8xx_pcmcia.c
@@ -1296,7 +1296,7 @@ static int m8xx_remove(struct of_device *ofdev)
 #ifdef CONFIG_PM
 static int m8xx_suspend(struct platform_device *pdev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, state);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int m8xx_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/omap_cf.c b/drivers/pcmcia/omap_cf.c
index f3736398900e..68570bc3ac86 100644
--- a/drivers/pcmcia/omap_cf.c
+++ b/drivers/pcmcia/omap_cf.c
@@ -334,7 +334,7 @@ static int __exit omap_cf_remove(struct platform_device *pdev)
 
 static int omap_cf_suspend(struct platform_device *pdev, pm_message_t mesg)
 {
-	return pcmcia_socket_dev_suspend(&pdev->dev, mesg);
+	return pcmcia_socket_dev_suspend(&pdev->dev);
 }
 
 static int omap_cf_resume(struct platform_device *pdev)
diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c
index 8bed1dab9039..1c39d3438f20 100644
--- a/drivers/pcmcia/pd6729.c
+++ b/drivers/pcmcia/pd6729.c
@@ -758,7 +758,7 @@ static void __devexit pd6729_pci_remove(struct pci_dev *dev)
 #ifdef CONFIG_PM
 static int pd6729_socket_suspend(struct pci_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pd6729_socket_resume(struct pci_dev *dev)
diff --git a/drivers/pcmcia/pxa2xx_base.c b/drivers/pcmcia/pxa2xx_base.c
index 87e22ef8eb02..0e35acb1366b 100644
--- a/drivers/pcmcia/pxa2xx_base.c
+++ b/drivers/pcmcia/pxa2xx_base.c
@@ -302,7 +302,7 @@ static int pxa2xx_drv_pcmcia_remove(struct platform_device *dev)
 
 static int pxa2xx_drv_pcmcia_suspend(struct device *dev)
 {
-	return pcmcia_socket_dev_suspend(dev, PMSG_SUSPEND);
+	return pcmcia_socket_dev_suspend(dev);
 }
 
 static int pxa2xx_drv_pcmcia_resume(struct device *dev)
diff --git a/drivers/pcmcia/sa1100_generic.c b/drivers/pcmcia/sa1100_generic.c
index d8da5ac844e9..2d0e99751530 100644
--- a/drivers/pcmcia/sa1100_generic.c
+++ b/drivers/pcmcia/sa1100_generic.c
@@ -89,7 +89,7 @@ static int sa11x0_drv_pcmcia_remove(struct platform_device *dev)
 static int sa11x0_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int sa11x0_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/sa1111_generic.c b/drivers/pcmcia/sa1111_generic.c
index 401052a21ce8..4be4e172ffa1 100644
--- a/drivers/pcmcia/sa1111_generic.c
+++ b/drivers/pcmcia/sa1111_generic.c
@@ -159,7 +159,7 @@ static int __devexit pcmcia_remove(struct sa1111_dev *dev)
 
 static int pcmcia_suspend(struct sa1111_dev *dev, pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int pcmcia_resume(struct sa1111_dev *dev)
diff --git a/drivers/pcmcia/tcic.c b/drivers/pcmcia/tcic.c
index 8eb04230fec7..582413fcb62f 100644
--- a/drivers/pcmcia/tcic.c
+++ b/drivers/pcmcia/tcic.c
@@ -366,7 +366,7 @@ static int __init get_tcic_id(void)
 static int tcic_drv_pcmcia_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int tcic_drv_pcmcia_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/vrc4171_card.c b/drivers/pcmcia/vrc4171_card.c
index d4ad50d737b0..c9fcbdc164ea 100644
--- a/drivers/pcmcia/vrc4171_card.c
+++ b/drivers/pcmcia/vrc4171_card.c
@@ -707,7 +707,7 @@ __setup("vrc4171_card=", vrc4171_card_setup);
 static int vrc4171_card_suspend(struct platform_device *dev,
 				     pm_message_t state)
 {
-	return pcmcia_socket_dev_suspend(&dev->dev, state);
+	return pcmcia_socket_dev_suspend(&dev->dev);
 }
 
 static int vrc4171_card_resume(struct platform_device *dev)
diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c
index b459e87a30ac..6fa1ed8f2b2f 100644
--- a/drivers/pcmcia/yenta_socket.c
+++ b/drivers/pcmcia/yenta_socket.c
@@ -1230,7 +1230,7 @@ static int yenta_dev_suspend (struct pci_dev *dev, pm_message_t state)
 	struct yenta_socket *socket = pci_get_drvdata(dev);
 	int ret;
 
-	ret = pcmcia_socket_dev_suspend(&dev->dev, state);
+	ret = pcmcia_socket_dev_suspend(&dev->dev);
 
 	if (socket) {
 		if (socket->type && socket->type->save_state)
diff --git a/include/pcmcia/ss.h b/include/pcmcia/ss.h
index 9a3b49865173..d696a692d94a 100644
--- a/include/pcmcia/ss.h
+++ b/include/pcmcia/ss.h
@@ -279,7 +279,7 @@ extern struct pccard_resource_ops pccard_iodyn_ops;
 extern struct pccard_resource_ops pccard_nonstatic_ops;
 
 /* socket drivers are expected to use these callbacks in their .drv struct */
-extern int pcmcia_socket_dev_suspend(struct device *dev, pm_message_t state);
+extern int pcmcia_socket_dev_suspend(struct device *dev);
 extern int pcmcia_socket_dev_resume(struct device *dev);
 
 /* socket drivers use this callback in their IRQ handler */
-- 
cgit v1.2.3


From 296c355cd6443d89fa251885a8d78778fe111dc4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 Sep 2009 00:32:42 -0400
Subject: ext4: Use tracepoints for mb_history trace file

The /proc/fs/ext4/<dev>/mb_history was maintained manually, and had a
number of problems: it required a largish amount of memory to be
allocated for each ext4 filesystem, and the s_mb_history_lock
introduced a CPU contention problem.

By ripping out the mb_history code and replacing it with ftrace
tracepoints, and we get more functionality: timestamps, event
filtering, the ability to correlate mballoc history with other ext4
tracepoints, etc.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 Documentation/filesystems/proc.txt |   1 -
 fs/ext4/ext4.h                     |  14 +-
 fs/ext4/mballoc.c                  | 301 ++-----------------------------------
 fs/ext4/mballoc.h                  |  33 ----
 fs/ext4/super.c                    |  18 +--
 include/trace/events/ext4.h        | 163 ++++++++++++++++++++
 6 files changed, 182 insertions(+), 348 deletions(-)

(limited to 'include')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b5aee7838a00..2c48f945546b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1113,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname>
 ..............................................................................
  File            Content                                        
  mb_groups       details of multiblock allocator buddy cache of free blocks
- mb_history      multiblock allocation history
 ..............................................................................
 
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b491576e11c3..c508cf7be75c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -65,6 +65,12 @@ typedef __u32 ext4_lblk_t;
 /* data type for block group number */
 typedef unsigned int ext4_group_t;
 
+/*
+ * Flags used in mballoc's allocation_context flags field.  
+ *
+ * Also used to show what's going on for debugging purposes when the
+ * flag field is exported via the traceport interface
+ */
 
 /* prefer goal again. length */
 #define EXT4_MB_HINT_MERGE		0x0001
@@ -971,14 +977,6 @@ struct ext4_sb_info {
 	unsigned long s_mb_last_group;
 	unsigned long s_mb_last_start;
 
-	/* history to debug policy */
-	struct ext4_mb_history *s_mb_history;
-	int s_mb_history_cur;
-	int s_mb_history_max;
-	int s_mb_history_num;
-	spinlock_t s_mb_history_lock;
-	int s_mb_history_filter;
-
 	/* stats for buddy allocator */
 	spinlock_t s_mb_pa_lock;
 	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3e2320e66721..bba12824defa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2096,207 +2096,6 @@ out:
 	return err;
 }
 
-#ifdef EXT4_MB_HISTORY
-struct ext4_mb_proc_session {
-	struct ext4_mb_history *history;
-	struct super_block *sb;
-	int start;
-	int max;
-};
-
-static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s,
-					struct ext4_mb_history *hs,
-					int first)
-{
-	if (hs == s->history + s->max)
-		hs = s->history;
-	if (!first && hs == s->history + s->start)
-		return NULL;
-	while (hs->orig.fe_len == 0) {
-		hs++;
-		if (hs == s->history + s->max)
-			hs = s->history;
-		if (hs == s->history + s->start)
-			return NULL;
-	}
-	return hs;
-}
-
-static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos)
-{
-	struct ext4_mb_proc_session *s = seq->private;
-	struct ext4_mb_history *hs;
-	int l = *pos;
-
-	if (l == 0)
-		return SEQ_START_TOKEN;
-	hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1);
-	if (!hs)
-		return NULL;
-	while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL);
-	return hs;
-}
-
-static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v,
-				      loff_t *pos)
-{
-	struct ext4_mb_proc_session *s = seq->private;
-	struct ext4_mb_history *hs = v;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return ext4_mb_history_skip_empty(s, s->history + s->start, 1);
-	else
-		return ext4_mb_history_skip_empty(s, ++hs, 0);
-}
-
-static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
-{
-	char buf[25], buf2[25], buf3[25], *fmt;
-	struct ext4_mb_history *hs = v;
-
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
-				"%-5s %-2s %-6s %-5s %-5s %-6s\n",
-			  "pid", "inode", "original", "goal", "result", "found",
-			   "grps", "cr", "flags", "merge", "tail", "broken");
-		return 0;
-	}
-
-	if (hs->op == EXT4_MB_HISTORY_ALLOC) {
-		fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
-			"0x%04x %-5s %-5u %-6u\n";
-		sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len,
-			hs->result.fe_logical);
-		sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
-			hs->orig.fe_start, hs->orig.fe_len,
-			hs->orig.fe_logical);
-		sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
-			hs->goal.fe_start, hs->goal.fe_len,
-			hs->goal.fe_logical);
-		seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
-				hs->found, hs->groups, hs->cr, hs->flags,
-				hs->merged ? "M" : "", hs->tail,
-				hs->buddy ? 1 << hs->buddy : 0);
-	} else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
-		fmt = "%-5u %-8u %-23s %-23s %-23s\n";
-		sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len,
-			hs->result.fe_logical);
-		sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
-			hs->orig.fe_start, hs->orig.fe_len,
-			hs->orig.fe_logical);
-		seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
-	} else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
-		sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len);
-		seq_printf(seq, "%-5u %-8u %-23s discard\n",
-				hs->pid, hs->ino, buf2);
-	} else if (hs->op == EXT4_MB_HISTORY_FREE) {
-		sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
-			hs->result.fe_start, hs->result.fe_len);
-		seq_printf(seq, "%-5u %-8u %-23s free\n",
-				hs->pid, hs->ino, buf2);
-	}
-	return 0;
-}
-
-static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations ext4_mb_seq_history_ops = {
-	.start  = ext4_mb_seq_history_start,
-	.next   = ext4_mb_seq_history_next,
-	.stop   = ext4_mb_seq_history_stop,
-	.show   = ext4_mb_seq_history_show,
-};
-
-static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
-{
-	struct super_block *sb = PDE(inode)->data;
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_mb_proc_session *s;
-	int rc;
-	int size;
-
-	if (unlikely(sbi->s_mb_history == NULL))
-		return -ENOMEM;
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL)
-		return -ENOMEM;
-	s->sb = sb;
-	size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max;
-	s->history = kmalloc(size, GFP_KERNEL);
-	if (s->history == NULL) {
-		kfree(s);
-		return -ENOMEM;
-	}
-
-	spin_lock(&sbi->s_mb_history_lock);
-	memcpy(s->history, sbi->s_mb_history, size);
-	s->max = sbi->s_mb_history_max;
-	s->start = sbi->s_mb_history_cur % s->max;
-	spin_unlock(&sbi->s_mb_history_lock);
-
-	rc = seq_open(file, &ext4_mb_seq_history_ops);
-	if (rc == 0) {
-		struct seq_file *m = (struct seq_file *)file->private_data;
-		m->private = s;
-	} else {
-		kfree(s->history);
-		kfree(s);
-	}
-	return rc;
-
-}
-
-static int ext4_mb_seq_history_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = (struct seq_file *)file->private_data;
-	struct ext4_mb_proc_session *s = seq->private;
-	kfree(s->history);
-	kfree(s);
-	return seq_release(inode, file);
-}
-
-static ssize_t ext4_mb_seq_history_write(struct file *file,
-				const char __user *buffer,
-				size_t count, loff_t *ppos)
-{
-	struct seq_file *seq = (struct seq_file *)file->private_data;
-	struct ext4_mb_proc_session *s = seq->private;
-	struct super_block *sb = s->sb;
-	char str[32];
-	int value;
-
-	if (count >= sizeof(str)) {
-		printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n",
-				"mb_history", (int)sizeof(str));
-		return -EOVERFLOW;
-	}
-
-	if (copy_from_user(str, buffer, count))
-		return -EFAULT;
-
-	value = simple_strtol(str, NULL, 0);
-	if (value < 0)
-		return -ERANGE;
-	EXT4_SB(sb)->s_mb_history_filter = value;
-
-	return count;
-}
-
-static const struct file_operations ext4_mb_seq_history_fops = {
-	.owner		= THIS_MODULE,
-	.open		= ext4_mb_seq_history_open,
-	.read		= seq_read,
-	.write		= ext4_mb_seq_history_write,
-	.llseek		= seq_lseek,
-	.release	= ext4_mb_seq_history_release,
-};
-
 static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
 {
 	struct super_block *sb = seq->private;
@@ -2396,82 +2195,6 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
 	.release	= seq_release,
 };
 
-static void ext4_mb_history_release(struct super_block *sb)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-
-	if (sbi->s_proc != NULL) {
-		remove_proc_entry("mb_groups", sbi->s_proc);
-		if (sbi->s_mb_history_max)
-			remove_proc_entry("mb_history", sbi->s_proc);
-	}
-	kfree(sbi->s_mb_history);
-}
-
-static void ext4_mb_history_init(struct super_block *sb)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	int i;
-
-	if (sbi->s_proc != NULL) {
-		if (sbi->s_mb_history_max)
-			proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
-					 &ext4_mb_seq_history_fops, sb);
-		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
-				 &ext4_mb_seq_groups_fops, sb);
-	}
-
-	sbi->s_mb_history_cur = 0;
-	spin_lock_init(&sbi->s_mb_history_lock);
-	i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
-	sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
-	/* if we can't allocate history, then we simple won't use it */
-}
-
-static noinline_for_stack void
-ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-	struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
-	struct ext4_mb_history h;
-
-	if (sbi->s_mb_history == NULL)
-		return;
-
-	if (!(ac->ac_op & sbi->s_mb_history_filter))
-		return;
-
-	h.op = ac->ac_op;
-	h.pid = current->pid;
-	h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0;
-	h.orig = ac->ac_o_ex;
-	h.result = ac->ac_b_ex;
-	h.flags = ac->ac_flags;
-	h.found = ac->ac_found;
-	h.groups = ac->ac_groups_scanned;
-	h.cr = ac->ac_criteria;
-	h.tail = ac->ac_tail;
-	h.buddy = ac->ac_buddy;
-	h.merged = 0;
-	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
-		if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
-				ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
-			h.merged = 1;
-		h.goal = ac->ac_g_ex;
-		h.result = ac->ac_f_ex;
-	}
-
-	spin_lock(&sbi->s_mb_history_lock);
-	memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h));
-	if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max)
-		sbi->s_mb_history_cur = 0;
-	spin_unlock(&sbi->s_mb_history_lock);
-}
-
-#else
-#define ext4_mb_history_release(sb)
-#define ext4_mb_history_init(sb)
-#endif
-
 
 /* Create and initialize ext4_group_info data for the given group. */
 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
@@ -2690,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	sbi->s_mb_stats = MB_DEFAULT_STATS;
 	sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
 	sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
-	sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
 	sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
 
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
@@ -2708,7 +2430,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 		spin_lock_init(&lg->lg_prealloc_lock);
 	}
 
-	ext4_mb_history_init(sb);
+	if (sbi->s_proc)
+		proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
+				 &ext4_mb_seq_groups_fops, sb);
 
 	if (sbi->s_journal)
 		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
@@ -2788,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb)
 	}
 
 	free_percpu(sbi->s_locality_groups);
-	ext4_mb_history_release(sb);
+	if (sbi->s_proc)
+		remove_proc_entry("mb_groups", sbi->s_proc);
 
 	return 0;
 }
@@ -3274,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
 			atomic_inc(&sbi->s_bal_breaks);
 	}
 
-	ext4_mb_store_history(ac);
+	if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
+		trace_ext4_mballoc_alloc(ac);
+	else
+		trace_ext4_mballoc_prealloc(ac);
 }
 
 /*
@@ -3774,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	if (ac) {
 		ac->ac_sb = sb;
 		ac->ac_inode = pa->pa_inode;
-		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
 	}
 
 	while (bit < end) {
@@ -3794,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			ac->ac_b_ex.fe_start = bit;
 			ac->ac_b_ex.fe_len = next - bit;
 			ac->ac_b_ex.fe_logical = 0;
-			ext4_mb_store_history(ac);
+			trace_ext4_mballoc_discard(ac);
 		}
 
 		trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
@@ -3829,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	if (ac)
-		ac->ac_op = EXT4_MB_HISTORY_DISCARD;
-
 	trace_ext4_mb_release_group_pa(ac, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3846,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
 		ac->ac_b_ex.fe_start = bit;
 		ac->ac_b_ex.fe_len = pa->pa_len;
 		ac->ac_b_ex.fe_logical = 0;
-		ext4_mb_store_history(ac);
+		trace_ext4_mballoc_discard(ac);
 	}
 
 	return 0;
@@ -4737,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
 
 	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
 	if (ac) {
-		ac->ac_op = EXT4_MB_HISTORY_FREE;
 		ac->ac_inode = inode;
 		ac->ac_sb = sb;
 	}
@@ -4804,7 +4527,7 @@ do_more:
 		ac->ac_b_ex.fe_group = block_group;
 		ac->ac_b_ex.fe_start = bit;
 		ac->ac_b_ex.fe_len = count;
-		ext4_mb_store_history(ac);
+		trace_ext4_mballoc_free(ac);
 	}
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 14f25f253112..0ca811061bc7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -52,18 +52,8 @@ extern u8 mb_enable_debug;
 #define mb_debug(n, fmt, a...)
 #endif
 
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
 #define EXT4_MB_HISTORY_ALLOC		1	/* allocation */
 #define EXT4_MB_HISTORY_PREALLOC	2	/* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD		4	/* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE		8	/* free */
-
-#define EXT4_MB_HISTORY_DEFAULT		(EXT4_MB_HISTORY_ALLOC | \
-					 EXT4_MB_HISTORY_PREALLOC)
 
 /*
  * How long mballoc can look for a best extent (in found extents)
@@ -217,22 +207,6 @@ struct ext4_allocation_context {
 #define AC_STATUS_FOUND		2
 #define AC_STATUS_BREAK		3
 
-struct ext4_mb_history {
-	struct ext4_free_extent orig;	/* orig allocation */
-	struct ext4_free_extent goal;	/* goal allocation */
-	struct ext4_free_extent result;	/* result allocation */
-	unsigned pid;
-	unsigned ino;
-	__u16 found;	/* how many extents have been found */
-	__u16 groups;	/* how many groups have been scanned */
-	__u16 tail;	/* what tail broke some buddy */
-	__u16 buddy;	/* buddy the tail ^^^ broke */
-	__u16 flags;
-	__u8 cr:3;	/* which phase the result extent was found at */
-	__u8 op:4;
-	__u8 merged:1;
-};
-
 struct ext4_buddy {
 	struct page *bd_buddy_page;
 	void *bd_buddy;
@@ -247,13 +221,6 @@ struct ext4_buddy {
 #define EXT4_MB_BITMAP(e4b)	((e4b)->bd_bitmap)
 #define EXT4_MB_BUDDY(e4b)	((e4b)->bd_buddy)
 
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
-	return;
-}
-#endif
-
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
 static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5b206a043a5..12e726a7073f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -50,13 +50,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ext4.h>
 
-static int default_mb_history_length = 1000;
-
-module_param_named(default_mb_history_length, default_mb_history_length,
-		   int, 0644);
-MODULE_PARM_DESC(default_mb_history_length,
-		 "Default number of entries saved for mb_history");
-
 struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
 
@@ -1079,7 +1072,7 @@ enum {
 	Opt_journal_update, Opt_journal_dev,
 	Opt_journal_checksum, Opt_journal_async_commit,
 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
-	Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length,
+	Opt_data_err_abort, Opt_data_err_ignore,
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
@@ -1126,7 +1119,6 @@ static const match_table_t tokens = {
 	{Opt_data_writeback, "data=writeback"},
 	{Opt_data_err_abort, "data_err=abort"},
 	{Opt_data_err_ignore, "data_err=ignore"},
-	{Opt_mb_history_length, "mb_history_length=%u"},
 	{Opt_offusrjquota, "usrjquota="},
 	{Opt_usrjquota, "usrjquota=%s"},
 	{Opt_offgrpjquota, "grpjquota="},
@@ -1367,13 +1359,6 @@ static int parse_options(char *options, struct super_block *sb,
 		case Opt_data_err_ignore:
 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
 			break;
-		case Opt_mb_history_length:
-			if (match_int(&args[0], &option))
-				return 0;
-			if (option < 0)
-				return 0;
-			sbi->s_mb_history_max = option;
-			break;
 #ifdef CONFIG_QUOTA
 		case Opt_usrjquota:
 			qtype = USRQUOTA;
@@ -2435,7 +2420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
-	sbi->s_mb_history_max = default_mb_history_length;
 
 	set_opt(sbi->s_mount_opt, BARRIER);
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 7c6bbb7198a3..b8320256dc5d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks,
 		  __entry->data_blocks, __entry->meta_blocks)
 );
 
+TRACE_EVENT(ext4_mballoc_alloc,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u16,	found			)
+		__field(	__u16,	groups			)
+		__field(	__u16,	buddy			)
+		__field(	__u16,	flags			)
+		__field(	__u16,	tail			)
+		__field(	__u8,	cr			)
+		__field(	__u32, 	orig_logical		)
+		__field(	  int,	orig_start		)
+		__field(	__u32, 	orig_group		)
+		__field(	  int,	orig_len		)
+		__field(	__u32, 	goal_logical		)
+		__field(	  int,	goal_start		)
+		__field(	__u32, 	goal_group		)
+		__field(	  int,	goal_len		)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->found		= ac->ac_found;
+		__entry->flags		= ac->ac_flags;
+		__entry->groups		= ac->ac_groups_scanned;
+		__entry->buddy		= ac->ac_buddy;
+		__entry->tail		= ac->ac_tail;
+		__entry->cr		= ac->ac_criteria;
+		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
+		__entry->orig_start	= ac->ac_o_ex.fe_start;
+		__entry->orig_group	= ac->ac_o_ex.fe_group;
+		__entry->orig_len	= ac->ac_o_ex.fe_len;
+		__entry->goal_logical	= ac->ac_g_ex.fe_logical;
+		__entry->goal_start	= ac->ac_g_ex.fe_start;
+		__entry->goal_group	= ac->ac_g_ex.fe_group;
+		__entry->goal_len	= ac->ac_g_ex.fe_len;
+		__entry->result_logical	= ac->ac_f_ex.fe_logical;
+		__entry->result_start	= ac->ac_f_ex.fe_start;
+		__entry->result_group	= ac->ac_f_ex.fe_group;
+		__entry->result_len	= ac->ac_f_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
+		  "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
+		  "tail %u broken %u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->orig_group, __entry->orig_start,
+		  __entry->orig_len, __entry->orig_logical,
+		  __entry->goal_group, __entry->goal_start,
+		  __entry->goal_len, __entry->goal_logical,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical,
+		  __entry->found, __entry->groups, __entry->cr,
+		  __entry->flags, __entry->tail,
+		  __entry->buddy ? 1 << __entry->buddy : 0)
+);
+
+TRACE_EVENT(ext4_mballoc_prealloc,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	orig_logical		)
+		__field(	  int,	orig_start		)
+		__field(	__u32, 	orig_group		)
+		__field(	  int,	orig_len		)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->orig_logical	= ac->ac_o_ex.fe_logical;
+		__entry->orig_start	= ac->ac_o_ex.fe_start;
+		__entry->orig_group	= ac->ac_o_ex.fe_group;
+		__entry->orig_len	= ac->ac_o_ex.fe_len;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->orig_group, __entry->orig_start,
+		  __entry->orig_len, __entry->orig_logical,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
+TRACE_EVENT(ext4_mballoc_discard,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
+TRACE_EVENT(ext4_mballoc_free,
+	TP_PROTO(struct ext4_allocation_context *ac),
+
+	TP_ARGS(ac),
+
+	TP_STRUCT__entry(
+		__field(	dev_t,	dev			)
+		__field(	ino_t,	ino			)
+		__field(	__u32, 	result_logical		)
+		__field(	  int,	result_start		)
+		__field(	__u32, 	result_group		)
+		__field(	  int,	result_len		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= ac->ac_inode->i_sb->s_dev;
+		__entry->ino		= ac->ac_inode->i_ino;
+		__entry->result_logical	= ac->ac_b_ex.fe_logical;
+		__entry->result_start	= ac->ac_b_ex.fe_start;
+		__entry->result_group	= ac->ac_b_ex.fe_group;
+		__entry->result_len	= ac->ac_b_ex.fe_len;
+	),
+
+	TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
+		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+		  __entry->result_group, __entry->result_start,
+		  __entry->result_len, __entry->result_logical)
+);
+
 #endif /* _TRACE_EXT4_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From bf6993276f74d46776f35c45ddef29b981b1d1c6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 Sep 2009 00:32:06 -0400
Subject: jbd2: Use tracepoints for history file

The /proc/fs/jbd2/<dev>/history was maintained manually; by using
tracepoints, we can get all of the existing functionality of the /proc
file plus extra capabilities thanks to the ftrace infrastructure.  We
save memory as a bonus.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c        |   7 ++
 fs/jbd2/commit.c            |  59 +++++++-------
 fs/jbd2/journal.c           | 187 +++-----------------------------------------
 include/linux/jbd2.h        |  27 ++-----
 include/trace/events/jbd2.h |  78 ++++++++++++++++++
 5 files changed, 130 insertions(+), 228 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 5d70b3e6d49b..ca0f5eb62b20 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -643,6 +643,7 @@ out:
 
 int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 {
+	struct transaction_chp_stats_s *stats;
 	transaction_t *transaction;
 	journal_t *journal;
 	int ret = 0;
@@ -679,6 +680,12 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
+	stats = &transaction->t_chp_stats;
+	if (stats->cs_chp_time)
+		stats->cs_chp_time = jbd2_time_diff(stats->cs_chp_time,
+						    jiffies);
+	trace_jbd2_checkpoint_stats(journal->j_fs_dev->bd_dev,
+				    transaction->t_tid, stats);
 
 	__jbd2_journal_drop_transaction(journal, transaction);
 	kfree(transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 26d991ddc1e6..d4cfd6d2779e 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -410,10 +410,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	if (commit_transaction->t_synchronous_commit)
 		write_op = WRITE_SYNC_PLUG;
 	trace_jbd2_commit_locking(journal, commit_transaction);
-	stats.u.run.rs_wait = commit_transaction->t_max_wait;
-	stats.u.run.rs_locked = jiffies;
-	stats.u.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
-						stats.u.run.rs_locked);
+	stats.run.rs_wait = commit_transaction->t_max_wait;
+	stats.run.rs_locked = jiffies;
+	stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start,
+					      stats.run.rs_locked);
 
 	spin_lock(&commit_transaction->t_handle_lock);
 	while (commit_transaction->t_updates) {
@@ -486,9 +486,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	jbd2_journal_switch_revoke_table(journal);
 
 	trace_jbd2_commit_flushing(journal, commit_transaction);
-	stats.u.run.rs_flushing = jiffies;
-	stats.u.run.rs_locked = jbd2_time_diff(stats.u.run.rs_locked,
-					       stats.u.run.rs_flushing);
+	stats.run.rs_flushing = jiffies;
+	stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
+					     stats.run.rs_flushing);
 
 	commit_transaction->t_state = T_FLUSH;
 	journal->j_committing_transaction = commit_transaction;
@@ -523,11 +523,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_state_lock);
 
 	trace_jbd2_commit_logging(journal, commit_transaction);
-	stats.u.run.rs_logging = jiffies;
-	stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing,
-						 stats.u.run.rs_logging);
-	stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
-	stats.u.run.rs_blocks_logged = 0;
+	stats.run.rs_logging = jiffies;
+	stats.run.rs_flushing = jbd2_time_diff(stats.run.rs_flushing,
+					       stats.run.rs_logging);
+	stats.run.rs_blocks = commit_transaction->t_outstanding_credits;
+	stats.run.rs_blocks_logged = 0;
 
 	J_ASSERT(commit_transaction->t_nr_buffers <=
 		 commit_transaction->t_outstanding_credits);
@@ -695,7 +695,7 @@ start_journal_io:
 				submit_bh(write_op, bh);
 			}
 			cond_resched();
-			stats.u.run.rs_blocks_logged += bufs;
+			stats.run.rs_blocks_logged += bufs;
 
 			/* Force a new descriptor to be generated next
                            time round the loop. */
@@ -988,33 +988,30 @@ restart_loop:
 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
 
 	commit_transaction->t_start = jiffies;
-	stats.u.run.rs_logging = jbd2_time_diff(stats.u.run.rs_logging,
-						commit_transaction->t_start);
+	stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
+					      commit_transaction->t_start);
 
 	/*
-	 * File the transaction for history
+	 * File the transaction statistics
 	 */
-	stats.ts_type = JBD2_STATS_RUN;
 	stats.ts_tid = commit_transaction->t_tid;
-	stats.u.run.rs_handle_count = commit_transaction->t_handle_count;
-	spin_lock(&journal->j_history_lock);
-	memcpy(journal->j_history + journal->j_history_cur, &stats,
-			sizeof(stats));
-	if (++journal->j_history_cur == journal->j_history_max)
-		journal->j_history_cur = 0;
+	stats.run.rs_handle_count = commit_transaction->t_handle_count;
+	trace_jbd2_run_stats(journal->j_fs_dev->bd_dev,
+			     commit_transaction->t_tid, &stats.run);
 
 	/*
 	 * Calculate overall stats
 	 */
+	spin_lock(&journal->j_history_lock);
 	journal->j_stats.ts_tid++;
-	journal->j_stats.u.run.rs_wait += stats.u.run.rs_wait;
-	journal->j_stats.u.run.rs_running += stats.u.run.rs_running;
-	journal->j_stats.u.run.rs_locked += stats.u.run.rs_locked;
-	journal->j_stats.u.run.rs_flushing += stats.u.run.rs_flushing;
-	journal->j_stats.u.run.rs_logging += stats.u.run.rs_logging;
-	journal->j_stats.u.run.rs_handle_count += stats.u.run.rs_handle_count;
-	journal->j_stats.u.run.rs_blocks += stats.u.run.rs_blocks;
-	journal->j_stats.u.run.rs_blocks_logged += stats.u.run.rs_blocks_logged;
+	journal->j_stats.run.rs_wait += stats.run.rs_wait;
+	journal->j_stats.run.rs_running += stats.run.rs_running;
+	journal->j_stats.run.rs_locked += stats.run.rs_locked;
+	journal->j_stats.run.rs_flushing += stats.run.rs_flushing;
+	journal->j_stats.run.rs_logging += stats.run.rs_logging;
+	journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count;
+	journal->j_stats.run.rs_blocks += stats.run.rs_blocks;
+	journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged;
 	spin_unlock(&journal->j_history_lock);
 
 	commit_transaction->t_state = T_FINISHED;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 977a8dafb76d..761af77491f5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -676,153 +676,6 @@ struct jbd2_stats_proc_session {
 	int max;
 };
 
-static void *jbd2_history_skip_empty(struct jbd2_stats_proc_session *s,
-					struct transaction_stats_s *ts,
-					int first)
-{
-	if (ts == s->stats + s->max)
-		ts = s->stats;
-	if (!first && ts == s->stats + s->start)
-		return NULL;
-	while (ts->ts_type == 0) {
-		ts++;
-		if (ts == s->stats + s->max)
-			ts = s->stats;
-		if (ts == s->stats + s->start)
-			return NULL;
-	}
-	return ts;
-
-}
-
-static void *jbd2_seq_history_start(struct seq_file *seq, loff_t *pos)
-{
-	struct jbd2_stats_proc_session *s = seq->private;
-	struct transaction_stats_s *ts;
-	int l = *pos;
-
-	if (l == 0)
-		return SEQ_START_TOKEN;
-	ts = jbd2_history_skip_empty(s, s->stats + s->start, 1);
-	if (!ts)
-		return NULL;
-	l--;
-	while (l) {
-		ts = jbd2_history_skip_empty(s, ++ts, 0);
-		if (!ts)
-			break;
-		l--;
-	}
-	return ts;
-}
-
-static void *jbd2_seq_history_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	struct jbd2_stats_proc_session *s = seq->private;
-	struct transaction_stats_s *ts = v;
-
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return jbd2_history_skip_empty(s, s->stats + s->start, 1);
-	else
-		return jbd2_history_skip_empty(s, ++ts, 0);
-}
-
-static int jbd2_seq_history_show(struct seq_file *seq, void *v)
-{
-	struct transaction_stats_s *ts = v;
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "%-4s %-5s %-5s %-5s %-5s %-5s %-5s %-6s %-5s "
-				"%-5s %-5s %-5s %-5s %-5s\n", "R/C", "tid",
-				"wait", "run", "lock", "flush", "log", "hndls",
-				"block", "inlog", "ctime", "write", "drop",
-				"close");
-		return 0;
-	}
-	if (ts->ts_type == JBD2_STATS_RUN)
-		seq_printf(seq, "%-4s %-5lu %-5u %-5u %-5u %-5u %-5u "
-				"%-6lu %-5lu %-5lu\n", "R", ts->ts_tid,
-				jiffies_to_msecs(ts->u.run.rs_wait),
-				jiffies_to_msecs(ts->u.run.rs_running),
-				jiffies_to_msecs(ts->u.run.rs_locked),
-				jiffies_to_msecs(ts->u.run.rs_flushing),
-				jiffies_to_msecs(ts->u.run.rs_logging),
-				ts->u.run.rs_handle_count,
-				ts->u.run.rs_blocks,
-				ts->u.run.rs_blocks_logged);
-	else if (ts->ts_type == JBD2_STATS_CHECKPOINT)
-		seq_printf(seq, "%-4s %-5lu %48s %-5u %-5lu %-5lu %-5lu\n",
-				"C", ts->ts_tid, " ",
-				jiffies_to_msecs(ts->u.chp.cs_chp_time),
-				ts->u.chp.cs_written, ts->u.chp.cs_dropped,
-				ts->u.chp.cs_forced_to_close);
-	else
-		J_ASSERT(0);
-	return 0;
-}
-
-static void jbd2_seq_history_stop(struct seq_file *seq, void *v)
-{
-}
-
-static const struct seq_operations jbd2_seq_history_ops = {
-	.start  = jbd2_seq_history_start,
-	.next   = jbd2_seq_history_next,
-	.stop   = jbd2_seq_history_stop,
-	.show   = jbd2_seq_history_show,
-};
-
-static int jbd2_seq_history_open(struct inode *inode, struct file *file)
-{
-	journal_t *journal = PDE(inode)->data;
-	struct jbd2_stats_proc_session *s;
-	int rc, size;
-
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (s == NULL)
-		return -ENOMEM;
-	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-	s->stats = kmalloc(size, GFP_KERNEL);
-	if (s->stats == NULL) {
-		kfree(s);
-		return -ENOMEM;
-	}
-	spin_lock(&journal->j_history_lock);
-	memcpy(s->stats, journal->j_history, size);
-	s->max = journal->j_history_max;
-	s->start = journal->j_history_cur % s->max;
-	spin_unlock(&journal->j_history_lock);
-
-	rc = seq_open(file, &jbd2_seq_history_ops);
-	if (rc == 0) {
-		struct seq_file *m = file->private_data;
-		m->private = s;
-	} else {
-		kfree(s->stats);
-		kfree(s);
-	}
-	return rc;
-
-}
-
-static int jbd2_seq_history_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct jbd2_stats_proc_session *s = seq->private;
-
-	kfree(s->stats);
-	kfree(s);
-	return seq_release(inode, file);
-}
-
-static struct file_operations jbd2_seq_history_fops = {
-	.owner		= THIS_MODULE,
-	.open           = jbd2_seq_history_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = jbd2_seq_history_release,
-};
-
 static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
 {
 	return *pos ? NULL : SEQ_START_TOKEN;
@@ -839,29 +692,29 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v)
 
 	if (v != SEQ_START_TOKEN)
 		return 0;
-	seq_printf(seq, "%lu transaction, each upto %u blocks\n",
+	seq_printf(seq, "%lu transaction, each up to %u blocks\n",
 			s->stats->ts_tid,
 			s->journal->j_max_transaction_buffers);
 	if (s->stats->ts_tid == 0)
 		return 0;
 	seq_printf(seq, "average: \n  %ums waiting for transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_wait / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
 	seq_printf(seq, "  %ums running transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_running / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
 	seq_printf(seq, "  %ums transaction was being locked\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_locked / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
 	seq_printf(seq, "  %ums flushing data (in ordered mode)\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_flushing / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
 	seq_printf(seq, "  %ums logging transaction\n",
-	    jiffies_to_msecs(s->stats->u.run.rs_logging / s->stats->ts_tid));
+	    jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
 	seq_printf(seq, "  %lluus average transaction commit time\n",
 		   div_u64(s->journal->j_average_commit_time, 1000));
 	seq_printf(seq, "  %lu handles per transaction\n",
-	    s->stats->u.run.rs_handle_count / s->stats->ts_tid);
+	    s->stats->run.rs_handle_count / s->stats->ts_tid);
 	seq_printf(seq, "  %lu blocks per transaction\n",
-	    s->stats->u.run.rs_blocks / s->stats->ts_tid);
+	    s->stats->run.rs_blocks / s->stats->ts_tid);
 	seq_printf(seq, "  %lu logged blocks per transaction\n",
-	    s->stats->u.run.rs_blocks_logged / s->stats->ts_tid);
+	    s->stats->run.rs_blocks_logged / s->stats->ts_tid);
 	return 0;
 }
 
@@ -931,8 +784,6 @@ static void jbd2_stats_proc_init(journal_t *journal)
 {
 	journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
 	if (journal->j_proc_entry) {
-		proc_create_data("history", S_IRUGO, journal->j_proc_entry,
-				 &jbd2_seq_history_fops, journal);
 		proc_create_data("info", S_IRUGO, journal->j_proc_entry,
 				 &jbd2_seq_info_fops, journal);
 	}
@@ -941,27 +792,9 @@ static void jbd2_stats_proc_init(journal_t *journal)
 static void jbd2_stats_proc_exit(journal_t *journal)
 {
 	remove_proc_entry("info", journal->j_proc_entry);
-	remove_proc_entry("history", journal->j_proc_entry);
 	remove_proc_entry(journal->j_devname, proc_jbd2_stats);
 }
 
-static void journal_init_stats(journal_t *journal)
-{
-	int size;
-
-	if (!proc_jbd2_stats)
-		return;
-
-	journal->j_history_max = 100;
-	size = sizeof(struct transaction_stats_s) * journal->j_history_max;
-	journal->j_history = kzalloc(size, GFP_KERNEL);
-	if (!journal->j_history) {
-		journal->j_history_max = 0;
-		return;
-	}
-	spin_lock_init(&journal->j_history_lock);
-}
-
 /*
  * Management for journal control blocks: functions to create and
  * destroy journal_t structures, and to initialise and read existing
@@ -1006,7 +839,7 @@ static journal_t * journal_init_common (void)
 		goto fail;
 	}
 
-	journal_init_stats(journal);
+	spin_lock_init(&journal->j_history_lock);
 
 	return journal;
 fail:
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 52695d3dfd0b..f1011f7f3d41 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -464,9 +464,9 @@ struct handle_s
  */
 struct transaction_chp_stats_s {
 	unsigned long		cs_chp_time;
-	unsigned long		cs_forced_to_close;
-	unsigned long		cs_written;
-	unsigned long		cs_dropped;
+	__u32			cs_forced_to_close;
+	__u32			cs_written;
+	__u32			cs_dropped;
 };
 
 /* The transaction_t type is the guts of the journaling mechanism.  It
@@ -668,23 +668,16 @@ struct transaction_run_stats_s {
 	unsigned long		rs_flushing;
 	unsigned long		rs_logging;
 
-	unsigned long		rs_handle_count;
-	unsigned long		rs_blocks;
-	unsigned long		rs_blocks_logged;
+	__u32			rs_handle_count;
+	__u32			rs_blocks;
+	__u32			rs_blocks_logged;
 };
 
 struct transaction_stats_s {
-	int 			ts_type;
 	unsigned long		ts_tid;
-	union {
-		struct transaction_run_stats_s run;
-		struct transaction_chp_stats_s chp;
-	} u;
+	struct transaction_run_stats_s run;
 };
 
-#define JBD2_STATS_RUN		1
-#define JBD2_STATS_CHECKPOINT	2
-
 static inline unsigned long
 jbd2_time_diff(unsigned long start, unsigned long end)
 {
@@ -988,12 +981,6 @@ struct journal_s
 	/*
 	 * Journal statistics
 	 */
-	struct transaction_stats_s *j_history;
-	int			j_history_max;
-	int			j_history_cur;
-	/*
-	 * Protect the transactions statistics history
-	 */
 	spinlock_t		j_history_lock;
 	struct proc_dir_entry	*j_proc_entry;
 	struct transaction_stats_s j_stats;
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index b851f0b4701c..3c60b75adb9e 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -7,6 +7,9 @@
 #include <linux/jbd2.h>
 #include <linux/tracepoint.h>
 
+struct transaction_chp_stats_s;
+struct transaction_run_stats_s;
+
 TRACE_EVENT(jbd2_checkpoint,
 
 	TP_PROTO(journal_t *journal, int result),
@@ -162,6 +165,81 @@ TRACE_EVENT(jbd2_submit_inode_data,
 		  jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
 );
 
+TRACE_EVENT(jbd2_run_stats,
+	TP_PROTO(dev_t dev, unsigned long tid,
+		 struct transaction_run_stats_s *stats),
+
+	TP_ARGS(dev, tid, stats),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	unsigned long,	wait		)
+		__field(	unsigned long,	running		)
+		__field(	unsigned long,	locked		)
+		__field(	unsigned long,	flushing	)
+		__field(	unsigned long,	logging		)
+		__field(		__u32,	handle_count	)
+		__field(		__u32,	blocks		)
+		__field(		__u32,	blocks_logged	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= dev;
+		__entry->tid		= tid;
+		__entry->wait		= stats->rs_wait;
+		__entry->running	= stats->rs_running;
+		__entry->locked		= stats->rs_locked;
+		__entry->flushing	= stats->rs_flushing;
+		__entry->logging	= stats->rs_logging;
+		__entry->handle_count	= stats->rs_handle_count;
+		__entry->blocks		= stats->rs_blocks;
+		__entry->blocks_logged	= stats->rs_blocks_logged;
+	),
+
+	TP_printk("dev %s tid %lu wait %u running %u locked %u flushing %u "
+		  "logging %u handle_count %u blocks %u blocks_logged %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  jiffies_to_msecs(__entry->wait),
+		  jiffies_to_msecs(__entry->running),
+		  jiffies_to_msecs(__entry->locked),
+		  jiffies_to_msecs(__entry->flushing),
+		  jiffies_to_msecs(__entry->logging),
+		  __entry->handle_count, __entry->blocks,
+		  __entry->blocks_logged)
+);
+
+TRACE_EVENT(jbd2_checkpoint_stats,
+	TP_PROTO(dev_t dev, unsigned long tid,
+		 struct transaction_chp_stats_s *stats),
+
+	TP_ARGS(dev, tid, stats),
+
+	TP_STRUCT__entry(
+		__field(		dev_t,	dev		)
+		__field(	unsigned long,	tid		)
+		__field(	unsigned long,	chp_time	)
+		__field(		__u32,	forced_to_close	)
+		__field(		__u32,	written		)
+		__field(		__u32,	dropped		)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= dev;
+		__entry->tid		= tid;
+		__entry->chp_time	= stats->cs_chp_time;
+		__entry->forced_to_close= stats->cs_forced_to_close;
+		__entry->written	= stats->cs_written;
+		__entry->dropped	= stats->cs_dropped;
+	),
+
+	TP_printk("dev %s tid %lu chp_time %u forced_to_close %u "
+		  "written %u dropped %u",
+		  jbd2_dev_to_name(__entry->dev), __entry->tid,
+		  jiffies_to_msecs(__entry->chp_time),
+		  __entry->forced_to_close, __entry->written, __entry->dropped)
+);
+
 #endif /* _TRACE_JBD2_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 0ef122494020521309be855bfdeeb41f34bf8c94 Mon Sep 17 00:00:00 2001
From: Josh Stone <jistone@redhat.com>
Date: Wed, 30 Sep 2009 00:51:22 -0400
Subject: ext4: Add a stub for mpage_da_data in the trace header

The tracepoint ext4_da_write_pages has a struct mpage_da_data*
parameter, but that struct is only defined in fs/ext4/ext4.h.  This
patch adds a forward declaration for that struct, so this tracepoint
header can still be used by tools like SystemTap.

This is a continuation of the fix in commit 3661d286.

http://sourceware.org/bugzilla/show_bug.cgi?id=10703

Signed-off-by: Josh Stone <jistone@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 include/trace/events/ext4.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index b8320256dc5d..d09550bf3f95 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -11,6 +11,7 @@ struct ext4_allocation_context;
 struct ext4_allocation_request;
 struct ext4_prealloc_space;
 struct ext4_inode_info;
+struct mpage_da_data;
 
 #define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode))
 
-- 
cgit v1.2.3


From 9fcd66e572b94974365a9119b073e0a43d496eb7 Mon Sep 17 00:00:00 2001
From: Maxime Bizon <mbizon@freebox.fr>
Date: Fri, 18 Sep 2009 13:04:58 +0200
Subject: MIPS: BCM63xx: Add serial driver for bcm63xx integrated UART.

Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/bcm63xx/Makefile                         |   2 +-
 arch/mips/bcm63xx/boards/board_bcm963xx.c          |   3 +
 arch/mips/bcm63xx/dev-uart.c                       |  41 +
 .../include/asm/mach-bcm63xx/bcm63xx_dev_uart.h    |   6 +
 drivers/serial/Kconfig                             |  19 +
 drivers/serial/Makefile                            |   1 +
 drivers/serial/bcm63xx_uart.c                      | 890 +++++++++++++++++++++
 include/linux/serial_core.h                        |   3 +
 8 files changed, 964 insertions(+), 1 deletion(-)
 create mode 100644 arch/mips/bcm63xx/dev-uart.c
 create mode 100644 arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
 create mode 100644 drivers/serial/bcm63xx_uart.c

(limited to 'include')

diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile
index aaa585cf26e3..cff75de8449b 100644
--- a/arch/mips/bcm63xx/Makefile
+++ b/arch/mips/bcm63xx/Makefile
@@ -1,5 +1,5 @@
 obj-y		+= clk.o cpu.o cs.o gpio.o irq.o prom.o setup.o timer.o \
-		   dev-dsp.o dev-enet.o
+		   dev-dsp.o dev-enet.o dev-uart.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
 obj-y		+= boards/
diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c
index 12add0ca9fed..5a327f3a7167 100644
--- a/arch/mips/bcm63xx/boards/board_bcm963xx.c
+++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c
@@ -23,6 +23,7 @@
 #include <bcm63xx_dev_pci.h>
 #include <bcm63xx_dev_enet.h>
 #include <bcm63xx_dev_dsp.h>
+#include <bcm63xx_dev_uart.h>
 #include <board_bcm963xx.h>
 
 #define PFX	"board_bcm963xx: "
@@ -792,6 +793,8 @@ int __init board_register_devices(void)
 {
 	u32 val;
 
+	bcm63xx_uart_register();
+
 	if (board.has_enet0 &&
 	    !board_get_mac_address(board.enet0.mac_addr))
 		bcm63xx_enet_register(0, &board.enet0);
diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c
new file mode 100644
index 000000000000..5f3d89c4a988
--- /dev/null
+++ b/arch/mips/bcm63xx/dev-uart.c
@@ -0,0 +1,41 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <bcm63xx_cpu.h>
+#include <bcm63xx_dev_uart.h>
+
+static struct resource uart_resources[] = {
+	{
+		.start		= -1, /* filled at runtime */
+		.end		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_MEM,
+	},
+	{
+		.start		= -1, /* filled at runtime */
+		.flags		= IORESOURCE_IRQ,
+	},
+};
+
+static struct platform_device bcm63xx_uart_device = {
+	.name		= "bcm63xx_uart",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(uart_resources),
+	.resource	= uart_resources,
+};
+
+int __init bcm63xx_uart_register(void)
+{
+	uart_resources[0].start = bcm63xx_regset_address(RSET_UART0);
+	uart_resources[0].end = uart_resources[0].start;
+	uart_resources[0].end += RSET_UART_SIZE - 1;
+	uart_resources[1].start = bcm63xx_get_irq_number(IRQ_UART0);
+	return platform_device_register(&bcm63xx_uart_device);
+}
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
new file mode 100644
index 000000000000..bf348f573bbc
--- /dev/null
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h
@@ -0,0 +1,6 @@
+#ifndef BCM63XX_DEV_UART_H_
+#define BCM63XX_DEV_UART_H_
+
+int bcm63xx_uart_register(void);
+
+#endif /* BCM63XX_DEV_UART_H_ */
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 03422ce878cf..e70712044a7e 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1458,4 +1458,23 @@ config SERIAL_TIMBERDALE
 	---help---
 	Add support for UART controller on timberdale.
 
+config SERIAL_BCM63XX
+	tristate "bcm63xx serial port support"
+	select SERIAL_CORE
+	depends on BCM63XX
+	help
+	  If you have a bcm63xx CPU, you can enable its onboard
+	  serial port by enabling this options.
+
+          To compile this driver as a module, choose M here: the
+          module will be called bcm963xx_uart.
+
+config SERIAL_BCM63XX_CONSOLE
+	bool "Console on bcm63xx serial port"
+	depends on SERIAL_BCM63XX=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  If you have enabled the serial port on the bcm63xx CPU
+	  you can make it the console by answering Y to this option.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 97f6fcc8b432..d21d5dd5d048 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
 obj-$(CONFIG_SERIAL_PXA) += pxa.o
 obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o
 obj-$(CONFIG_SERIAL_SA1100) += sa1100.o
+obj-$(CONFIG_SERIAL_BCM63XX) += bcm63xx_uart.o
 obj-$(CONFIG_SERIAL_BFIN) += bfin_5xx.o
 obj-$(CONFIG_SERIAL_BFIN_SPORT) += bfin_sport_uart.o
 obj-$(CONFIG_SERIAL_SAMSUNG) += samsung.o
diff --git a/drivers/serial/bcm63xx_uart.c b/drivers/serial/bcm63xx_uart.c
new file mode 100644
index 000000000000..beddaa6e9069
--- /dev/null
+++ b/drivers/serial/bcm63xx_uart.c
@@ -0,0 +1,890 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Derived from many drivers using generic_serial interface.
+ *
+ * Copyright (C) 2008 Maxime Bizon <mbizon@freebox.fr>
+ *
+ *  Serial driver for BCM63xx integrated UART.
+ *
+ * Hardware flow control was _not_ tested since I only have RX/TX on
+ * my board.
+ */
+
+#if defined(CONFIG_SERIAL_BCM63XX_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/clk.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/sysrq.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+
+#include <bcm63xx_clk.h>
+#include <bcm63xx_irq.h>
+#include <bcm63xx_regs.h>
+#include <bcm63xx_io.h>
+
+#define BCM63XX_NR_UARTS	1
+
+static struct uart_port ports[BCM63XX_NR_UARTS];
+
+/*
+ * rx interrupt mask / stat
+ *
+ * mask:
+ *  - rx fifo full
+ *  - rx fifo above threshold
+ *  - rx fifo not empty for too long
+ */
+#define UART_RX_INT_MASK	(UART_IR_MASK(UART_IR_RXOVER) |		\
+				UART_IR_MASK(UART_IR_RXTHRESH) |	\
+				UART_IR_MASK(UART_IR_RXTIMEOUT))
+
+#define UART_RX_INT_STAT	(UART_IR_STAT(UART_IR_RXOVER) |		\
+				UART_IR_STAT(UART_IR_RXTHRESH) |	\
+				UART_IR_STAT(UART_IR_RXTIMEOUT))
+
+/*
+ * tx interrupt mask / stat
+ *
+ * mask:
+ * - tx fifo empty
+ * - tx fifo below threshold
+ */
+#define UART_TX_INT_MASK	(UART_IR_MASK(UART_IR_TXEMPTY) |	\
+				UART_IR_MASK(UART_IR_TXTRESH))
+
+#define UART_TX_INT_STAT	(UART_IR_STAT(UART_IR_TXEMPTY) |	\
+				UART_IR_STAT(UART_IR_TXTRESH))
+
+/*
+ * external input interrupt
+ *
+ * mask: any edge on CTS, DCD
+ */
+#define UART_EXTINP_INT_MASK	(UART_EXTINP_IRMASK(UART_EXTINP_IR_CTS) | \
+				 UART_EXTINP_IRMASK(UART_EXTINP_IR_DCD))
+
+/*
+ * handy uart register accessor
+ */
+static inline unsigned int bcm_uart_readl(struct uart_port *port,
+					 unsigned int offset)
+{
+	return bcm_readl(port->membase + offset);
+}
+
+static inline void bcm_uart_writel(struct uart_port *port,
+				  unsigned int value, unsigned int offset)
+{
+	bcm_writel(value, port->membase + offset);
+}
+
+/*
+ * serial core request to check if uart tx fifo is empty
+ */
+static unsigned int bcm_uart_tx_empty(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	return (val & UART_IR_STAT(UART_IR_TXEMPTY)) ? 1 : 0;
+}
+
+/*
+ * serial core request to set RTS and DTR pin state and loopback mode
+ */
+static void bcm_uart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val &= ~(UART_MCTL_DTR_MASK | UART_MCTL_RTS_MASK);
+	/* invert of written value is reflected on the pin */
+	if (!(mctrl & TIOCM_DTR))
+		val |= UART_MCTL_DTR_MASK;
+	if (!(mctrl & TIOCM_RTS))
+		val |= UART_MCTL_RTS_MASK;
+	bcm_uart_writel(port, val, UART_MCTL_REG);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	if (mctrl & TIOCM_LOOP)
+		val |= UART_CTL_LOOPBACK_MASK;
+	else
+		val &= ~UART_CTL_LOOPBACK_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * serial core request to return RI, CTS, DCD and DSR pin state
+ */
+static unsigned int bcm_uart_get_mctrl(struct uart_port *port)
+{
+	unsigned int val, mctrl;
+
+	mctrl = 0;
+	val = bcm_uart_readl(port, UART_EXTINP_REG);
+	if (val & UART_EXTINP_RI_MASK)
+		mctrl |= TIOCM_RI;
+	if (val & UART_EXTINP_CTS_MASK)
+		mctrl |= TIOCM_CTS;
+	if (val & UART_EXTINP_DCD_MASK)
+		mctrl |= TIOCM_CD;
+	if (val & UART_EXTINP_DSR_MASK)
+		mctrl |= TIOCM_DSR;
+	return mctrl;
+}
+
+/*
+ * serial core request to disable tx ASAP (used for flow control)
+ */
+static void bcm_uart_stop_tx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~(UART_CTL_TXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to (re)enable tx
+ */
+static void bcm_uart_start_tx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val |= UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= UART_CTL_TXEN_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * serial core request to stop rx, called before port shutdown
+ */
+static void bcm_uart_stop_rx(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_RX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to enable modem status interrupt reporting
+ */
+static void bcm_uart_enable_ms(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val |= UART_IR_MASK(UART_IR_EXTIP);
+	bcm_uart_writel(port, val, UART_IR_REG);
+}
+
+/*
+ * serial core request to start/stop emitting break char
+ */
+static void bcm_uart_break_ctl(struct uart_port *port, int ctl)
+{
+	unsigned long flags;
+	unsigned int val;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	if (ctl)
+		val |= UART_CTL_XMITBRK_MASK;
+	else
+		val &= ~UART_CTL_XMITBRK_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * return port type in string format
+ */
+static const char *bcm_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_BCM63XX) ? "bcm63xx_uart" : NULL;
+}
+
+/*
+ * read all chars in rx fifo and send them to core
+ */
+static void bcm_uart_do_rx(struct uart_port *port)
+{
+	struct tty_struct *tty;
+	unsigned int max_count;
+
+	/* limit number of char read in interrupt, should not be
+	 * higher than fifo size anyway since we're much faster than
+	 * serial port */
+	max_count = 32;
+	tty = port->info->port.tty;
+	do {
+		unsigned int iestat, c, cstat;
+		char flag;
+
+		/* get overrun/fifo empty information from ier
+		 * register */
+		iestat = bcm_uart_readl(port, UART_IR_REG);
+		if (!(iestat & UART_IR_STAT(UART_IR_RXNOTEMPTY)))
+			break;
+
+		cstat = c = bcm_uart_readl(port, UART_FIFO_REG);
+		port->icount.rx++;
+		flag = TTY_NORMAL;
+		c &= 0xff;
+
+		if (unlikely((cstat & UART_FIFO_ANYERR_MASK))) {
+			/* do stats first */
+			if (cstat & UART_FIFO_BRKDET_MASK) {
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					continue;
+			}
+
+			if (cstat & UART_FIFO_PARERR_MASK)
+				port->icount.parity++;
+			if (cstat & UART_FIFO_FRAMEERR_MASK)
+				port->icount.frame++;
+
+			/* update flag wrt read_status_mask */
+			cstat &= port->read_status_mask;
+			if (cstat & UART_FIFO_BRKDET_MASK)
+				flag = TTY_BREAK;
+			if (cstat & UART_FIFO_FRAMEERR_MASK)
+				flag = TTY_FRAME;
+			if (cstat & UART_FIFO_PARERR_MASK)
+				flag = TTY_PARITY;
+		}
+
+		if (uart_handle_sysrq_char(port, c))
+			continue;
+
+		if (unlikely(iestat & UART_IR_STAT(UART_IR_RXOVER))) {
+			port->icount.overrun++;
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		}
+
+		if ((cstat & port->ignore_status_mask) == 0)
+			tty_insert_flip_char(tty, c, flag);
+
+	} while (--max_count);
+
+	tty_flip_buffer_push(tty);
+}
+
+/*
+ * fill tx fifo with chars to send, stop when fifo is about to be full
+ * or when all chars have been sent.
+ */
+static void bcm_uart_do_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit;
+	unsigned int val, max_count;
+
+	if (port->x_char) {
+		bcm_uart_writel(port, port->x_char, UART_FIFO_REG);
+		port->icount.tx++;
+		port->x_char = 0;
+		return;
+	}
+
+	if (uart_tx_stopped(port)) {
+		bcm_uart_stop_tx(port);
+		return;
+	}
+
+	xmit = &port->info->xmit;
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT;
+	max_count = port->fifosize - val;
+
+	while (max_count--) {
+		unsigned int c;
+
+		c = xmit->buf[xmit->tail];
+		bcm_uart_writel(port, c, UART_FIFO_REG);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+		if (uart_circ_empty(xmit))
+			break;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		goto txq_empty;
+	return;
+
+txq_empty:
+	/* nothing to send, disable transmit interrupt */
+	val = bcm_uart_readl(port, UART_IR_REG);
+	val &= ~UART_TX_INT_MASK;
+	bcm_uart_writel(port, val, UART_IR_REG);
+	return;
+}
+
+/*
+ * process uart interrupt
+ */
+static irqreturn_t bcm_uart_interrupt(int irq, void *dev_id)
+{
+	struct uart_port *port;
+	unsigned int irqstat;
+
+	port = dev_id;
+	spin_lock(&port->lock);
+
+	irqstat = bcm_uart_readl(port, UART_IR_REG);
+	if (irqstat & UART_RX_INT_STAT)
+		bcm_uart_do_rx(port);
+
+	if (irqstat & UART_TX_INT_STAT)
+		bcm_uart_do_tx(port);
+
+	if (irqstat & UART_IR_MASK(UART_IR_EXTIP)) {
+		unsigned int estat;
+
+		estat = bcm_uart_readl(port, UART_EXTINP_REG);
+		if (estat & UART_EXTINP_IRSTAT(UART_EXTINP_IR_CTS))
+			uart_handle_cts_change(port,
+					       estat & UART_EXTINP_CTS_MASK);
+		if (estat & UART_EXTINP_IRSTAT(UART_EXTINP_IR_DCD))
+			uart_handle_dcd_change(port,
+					       estat & UART_EXTINP_DCD_MASK);
+	}
+
+	spin_unlock(&port->lock);
+	return IRQ_HANDLED;
+}
+
+/*
+ * enable rx & tx operation on uart
+ */
+static void bcm_uart_enable(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= (UART_CTL_BRGEN_MASK | UART_CTL_TXEN_MASK | UART_CTL_RXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * disable rx & tx operation on uart
+ */
+static void bcm_uart_disable(struct uart_port *port)
+{
+	unsigned int val;
+
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~(UART_CTL_BRGEN_MASK | UART_CTL_TXEN_MASK |
+		 UART_CTL_RXEN_MASK);
+	bcm_uart_writel(port, val, UART_CTL_REG);
+}
+
+/*
+ * clear all unread data in rx fifo and unsent data in tx fifo
+ */
+static void bcm_uart_flush(struct uart_port *port)
+{
+	unsigned int val;
+
+	/* empty rx and tx fifo */
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val |= UART_CTL_RSTRXFIFO_MASK | UART_CTL_RSTTXFIFO_MASK;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	/* read any pending char to make sure all irq status are
+	 * cleared */
+	(void)bcm_uart_readl(port, UART_FIFO_REG);
+}
+
+/*
+ * serial core request to initialize uart and start rx operation
+ */
+static int bcm_uart_startup(struct uart_port *port)
+{
+	unsigned int val;
+	int ret;
+
+	/* mask all irq and flush port */
+	bcm_uart_disable(port);
+	bcm_uart_writel(port, 0, UART_IR_REG);
+	bcm_uart_flush(port);
+
+	/* clear any pending external input interrupt */
+	(void)bcm_uart_readl(port, UART_EXTINP_REG);
+
+	/* set rx/tx fifo thresh to fifo half size */
+	val = bcm_uart_readl(port, UART_MCTL_REG);
+	val &= ~(UART_MCTL_RXFIFOTHRESH_MASK | UART_MCTL_TXFIFOTHRESH_MASK);
+	val |= (port->fifosize / 2) << UART_MCTL_RXFIFOTHRESH_SHIFT;
+	val |= (port->fifosize / 2) << UART_MCTL_TXFIFOTHRESH_SHIFT;
+	bcm_uart_writel(port, val, UART_MCTL_REG);
+
+	/* set rx fifo timeout to 1 char time */
+	val = bcm_uart_readl(port, UART_CTL_REG);
+	val &= ~UART_CTL_RXTMOUTCNT_MASK;
+	val |= 1 << UART_CTL_RXTMOUTCNT_SHIFT;
+	bcm_uart_writel(port, val, UART_CTL_REG);
+
+	/* report any edge on dcd and cts */
+	val = UART_EXTINP_INT_MASK;
+	val |= UART_EXTINP_DCD_NOSENSE_MASK;
+	val |= UART_EXTINP_CTS_NOSENSE_MASK;
+	bcm_uart_writel(port, val, UART_EXTINP_REG);
+
+	/* register irq and enable rx interrupts */
+	ret = request_irq(port->irq, bcm_uart_interrupt, 0,
+			  bcm_uart_type(port), port);
+	if (ret)
+		return ret;
+	bcm_uart_writel(port, UART_RX_INT_MASK, UART_IR_REG);
+	bcm_uart_enable(port);
+	return 0;
+}
+
+/*
+ * serial core request to flush & disable uart
+ */
+static void bcm_uart_shutdown(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	bcm_uart_writel(port, 0, UART_IR_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	bcm_uart_disable(port);
+	bcm_uart_flush(port);
+	free_irq(port->irq, port);
+}
+
+/*
+ * serial core request to change current uart setting
+ */
+static void bcm_uart_set_termios(struct uart_port *port,
+				 struct ktermios *new,
+				 struct ktermios *old)
+{
+	unsigned int ctl, baud, quot, ier;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* disable uart while changing speed */
+	bcm_uart_disable(port);
+	bcm_uart_flush(port);
+
+	/* update Control register */
+	ctl = bcm_uart_readl(port, UART_CTL_REG);
+	ctl &= ~UART_CTL_BITSPERSYM_MASK;
+
+	switch (new->c_cflag & CSIZE) {
+	case CS5:
+		ctl |= (0 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	case CS6:
+		ctl |= (1 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	case CS7:
+		ctl |= (2 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	default:
+		ctl |= (3 << UART_CTL_BITSPERSYM_SHIFT);
+		break;
+	}
+
+	ctl &= ~UART_CTL_STOPBITS_MASK;
+	if (new->c_cflag & CSTOPB)
+		ctl |= UART_CTL_STOPBITS_2;
+	else
+		ctl |= UART_CTL_STOPBITS_1;
+
+	ctl &= ~(UART_CTL_RXPAREN_MASK | UART_CTL_TXPAREN_MASK);
+	if (new->c_cflag & PARENB)
+		ctl |= (UART_CTL_RXPAREN_MASK | UART_CTL_TXPAREN_MASK);
+	ctl &= ~(UART_CTL_RXPAREVEN_MASK | UART_CTL_TXPAREVEN_MASK);
+	if (new->c_cflag & PARODD)
+		ctl |= (UART_CTL_RXPAREVEN_MASK | UART_CTL_TXPAREVEN_MASK);
+	bcm_uart_writel(port, ctl, UART_CTL_REG);
+
+	/* update Baudword register */
+	baud = uart_get_baud_rate(port, new, old, 0, port->uartclk / 16);
+	quot = uart_get_divisor(port, baud) - 1;
+	bcm_uart_writel(port, quot, UART_BAUD_REG);
+
+	/* update Interrupt register */
+	ier = bcm_uart_readl(port, UART_IR_REG);
+
+	ier &= ~UART_IR_MASK(UART_IR_EXTIP);
+	if (UART_ENABLE_MS(port, new->c_cflag))
+		ier |= UART_IR_MASK(UART_IR_EXTIP);
+
+	bcm_uart_writel(port, ier, UART_IR_REG);
+
+	/* update read/ignore mask */
+	port->read_status_mask = UART_FIFO_VALID_MASK;
+	if (new->c_iflag & INPCK) {
+		port->read_status_mask |= UART_FIFO_FRAMEERR_MASK;
+		port->read_status_mask |= UART_FIFO_PARERR_MASK;
+	}
+	if (new->c_iflag & (BRKINT))
+		port->read_status_mask |= UART_FIFO_BRKDET_MASK;
+
+	port->ignore_status_mask = 0;
+	if (new->c_iflag & IGNPAR)
+		port->ignore_status_mask |= UART_FIFO_PARERR_MASK;
+	if (new->c_iflag & IGNBRK)
+		port->ignore_status_mask |= UART_FIFO_BRKDET_MASK;
+	if (!(new->c_cflag & CREAD))
+		port->ignore_status_mask |= UART_FIFO_VALID_MASK;
+
+	uart_update_timeout(port, new->c_cflag, baud);
+	bcm_uart_enable(port);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/*
+ * serial core request to claim uart iomem
+ */
+static int bcm_uart_request_port(struct uart_port *port)
+{
+	unsigned int size;
+
+	size = RSET_UART_SIZE;
+	if (!request_mem_region(port->mapbase, size, "bcm63xx")) {
+		dev_err(port->dev, "Memory region busy\n");
+		return -EBUSY;
+	}
+
+	port->membase = ioremap(port->mapbase, size);
+	if (!port->membase) {
+		dev_err(port->dev, "Unable to map registers\n");
+		release_mem_region(port->mapbase, size);
+		return -EBUSY;
+	}
+	return 0;
+}
+
+/*
+ * serial core request to release uart iomem
+ */
+static void bcm_uart_release_port(struct uart_port *port)
+{
+	release_mem_region(port->mapbase, RSET_UART_SIZE);
+	iounmap(port->membase);
+}
+
+/*
+ * serial core request to do any port required autoconfiguration
+ */
+static void bcm_uart_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE) {
+		if (bcm_uart_request_port(port))
+			return;
+		port->type = PORT_BCM63XX;
+	}
+}
+
+/*
+ * serial core request to check that port information in serinfo are
+ * suitable
+ */
+static int bcm_uart_verify_port(struct uart_port *port,
+				struct serial_struct *serinfo)
+{
+	if (port->type != PORT_BCM63XX)
+		return -EINVAL;
+	if (port->irq != serinfo->irq)
+		return -EINVAL;
+	if (port->iotype != serinfo->io_type)
+		return -EINVAL;
+	if (port->mapbase != (unsigned long)serinfo->iomem_base)
+		return -EINVAL;
+	return 0;
+}
+
+/* serial core callbacks */
+static struct uart_ops bcm_uart_ops = {
+	.tx_empty	= bcm_uart_tx_empty,
+	.get_mctrl	= bcm_uart_get_mctrl,
+	.set_mctrl	= bcm_uart_set_mctrl,
+	.start_tx	= bcm_uart_start_tx,
+	.stop_tx	= bcm_uart_stop_tx,
+	.stop_rx	= bcm_uart_stop_rx,
+	.enable_ms	= bcm_uart_enable_ms,
+	.break_ctl	= bcm_uart_break_ctl,
+	.startup	= bcm_uart_startup,
+	.shutdown	= bcm_uart_shutdown,
+	.set_termios	= bcm_uart_set_termios,
+	.type		= bcm_uart_type,
+	.release_port	= bcm_uart_release_port,
+	.request_port	= bcm_uart_request_port,
+	.config_port	= bcm_uart_config_port,
+	.verify_port	= bcm_uart_verify_port,
+};
+
+
+
+#ifdef CONFIG_SERIAL_BCM63XX_CONSOLE
+static inline void wait_for_xmitr(struct uart_port *port)
+{
+	unsigned int tmout;
+
+	/* Wait up to 10ms for the character(s) to be sent. */
+	tmout = 10000;
+	while (--tmout) {
+		unsigned int val;
+
+		val = bcm_uart_readl(port, UART_IR_REG);
+		if (val & UART_IR_STAT(UART_IR_TXEMPTY))
+			break;
+		udelay(1);
+	}
+
+	/* Wait up to 1s for flow control if necessary */
+	if (port->flags & UPF_CONS_FLOW) {
+		tmout = 1000000;
+		while (--tmout) {
+			unsigned int val;
+
+			val = bcm_uart_readl(port, UART_EXTINP_REG);
+			if (val & UART_EXTINP_CTS_MASK)
+				break;
+			udelay(1);
+		}
+	}
+}
+
+/*
+ * output given char
+ */
+static void bcm_console_putchar(struct uart_port *port, int ch)
+{
+	wait_for_xmitr(port);
+	bcm_uart_writel(port, ch, UART_FIFO_REG);
+}
+
+/*
+ * console core request to output given string
+ */
+static void bcm_console_write(struct console *co, const char *s,
+			      unsigned int count)
+{
+	struct uart_port *port;
+	unsigned long flags;
+	int locked;
+
+	port = &ports[co->index];
+
+	local_irq_save(flags);
+	if (port->sysrq) {
+		/* bcm_uart_interrupt() already took the lock */
+		locked = 0;
+	} else if (oops_in_progress) {
+		locked = spin_trylock(&port->lock);
+	} else {
+		spin_lock(&port->lock);
+		locked = 1;
+	}
+
+	/* call helper to deal with \r\n */
+	uart_console_write(port, s, count, bcm_console_putchar);
+
+	/* and wait for char to be transmitted */
+	wait_for_xmitr(port);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+/*
+ * console core request to setup given console, find matching uart
+ * port and setup it.
+ */
+static int bcm_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= BCM63XX_NR_UARTS)
+		return -EINVAL;
+	port = &ports[co->index];
+	if (!port->membase)
+		return -ENODEV;
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver bcm_uart_driver;
+
+static struct console bcm63xx_console = {
+	.name		= "ttyS",
+	.write		= bcm_console_write,
+	.device		= uart_console_device,
+	.setup		= bcm_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &bcm_uart_driver,
+};
+
+static int __init bcm63xx_console_init(void)
+{
+	register_console(&bcm63xx_console);
+	return 0;
+}
+
+console_initcall(bcm63xx_console_init);
+
+#define BCM63XX_CONSOLE	(&bcm63xx_console)
+#else
+#define BCM63XX_CONSOLE	NULL
+#endif /* CONFIG_SERIAL_BCM63XX_CONSOLE */
+
+static struct uart_driver bcm_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "bcm63xx_uart",
+	.dev_name	= "ttyS",
+	.major		= TTY_MAJOR,
+	.minor		= 64,
+	.nr		= 1,
+	.cons		= BCM63XX_CONSOLE,
+};
+
+/*
+ * platform driver probe/remove callback
+ */
+static int __devinit bcm_uart_probe(struct platform_device *pdev)
+{
+	struct resource *res_mem, *res_irq;
+	struct uart_port *port;
+	struct clk *clk;
+	int ret;
+
+	if (pdev->id < 0 || pdev->id >= BCM63XX_NR_UARTS)
+		return -EINVAL;
+
+	if (ports[pdev->id].membase)
+		return -EBUSY;
+
+	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res_mem)
+		return -ENODEV;
+
+	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res_irq)
+		return -ENODEV;
+
+	clk = clk_get(&pdev->dev, "periph");
+	if (IS_ERR(clk))
+		return -ENODEV;
+
+	port = &ports[pdev->id];
+	memset(port, 0, sizeof(*port));
+	port->iotype = UPIO_MEM;
+	port->mapbase = res_mem->start;
+	port->irq = res_irq->start;
+	port->ops = &bcm_uart_ops;
+	port->flags = UPF_BOOT_AUTOCONF;
+	port->dev = &pdev->dev;
+	port->fifosize = 16;
+	port->uartclk = clk_get_rate(clk) / 2;
+	clk_put(clk);
+
+	ret = uart_add_one_port(&bcm_uart_driver, port);
+	if (ret) {
+		kfree(port);
+		return ret;
+	}
+	platform_set_drvdata(pdev, port);
+	return 0;
+}
+
+static int __devexit bcm_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+
+	port = platform_get_drvdata(pdev);
+	uart_remove_one_port(&bcm_uart_driver, port);
+	platform_set_drvdata(pdev, NULL);
+	/* mark port as free */
+	ports[pdev->id].membase = 0;
+	return 0;
+}
+
+/*
+ * platform driver stuff
+ */
+static struct platform_driver bcm_uart_platform_driver = {
+	.probe	= bcm_uart_probe,
+	.remove	= __devexit_p(bcm_uart_remove),
+	.driver	= {
+		.owner = THIS_MODULE,
+		.name  = "bcm63xx_uart",
+	},
+};
+
+static int __init bcm_uart_init(void)
+{
+	int ret;
+
+	ret = uart_register_driver(&bcm_uart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&bcm_uart_platform_driver);
+	if (ret)
+		uart_unregister_driver(&bcm_uart_driver);
+
+	return ret;
+}
+
+static void __exit bcm_uart_exit(void)
+{
+	platform_driver_unregister(&bcm_uart_platform_driver);
+	uart_unregister_driver(&bcm_uart_driver);
+}
+
+module_init(bcm_uart_init);
+module_exit(bcm_uart_exit);
+
+MODULE_AUTHOR("Maxime Bizon <mbizon@freebox.fr>");
+MODULE_DESCRIPTION("Broadcom 63<xx integrated uart driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index fe661afe0713..db532ce288be 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -176,6 +176,9 @@
 /* Qualcomm MSM SoCs */
 #define PORT_MSM	88
 
+/* BCM63xx family SoCs */
+#define PORT_BCM63XX	89
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From b7058842c940ad2c08dd829b21e5c92ebe3b8758 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 30 Sep 2009 16:12:20 -0700
Subject: net: Make setsockopt() optlen be unsigned.

This provides safety against negative optlen at the type
level instead of depending upon (sometimes non-trivial)
checks against this sprinkled all over the the place, in
each and every implementation.

Based upon work done by Arjan van de Ven and feedback
from Linus Torvalds.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/ambassador.c           |  8 -----
 drivers/atm/eni.c                  |  2 +-
 drivers/atm/firestream.c           |  2 +-
 drivers/atm/fore200e.c             |  2 +-
 drivers/atm/horizon.c              |  2 +-
 drivers/atm/iphase.c               |  2 +-
 drivers/atm/zatm.c                 |  2 +-
 drivers/isdn/mISDN/socket.c        |  2 +-
 drivers/net/pppol2tp.c             |  2 +-
 include/linux/atmdev.h             |  2 +-
 include/linux/mroute.h             |  4 +--
 include/linux/mroute6.h            |  4 +--
 include/linux/net.h                |  8 ++---
 include/linux/netfilter.h          |  4 +--
 include/net/compat.h               |  4 +--
 include/net/inet_connection_sock.h |  6 ++--
 include/net/ip.h                   |  4 +--
 include/net/ipv6.h                 |  4 +--
 include/net/sctp/structs.h         |  4 +--
 include/net/sock.h                 | 12 ++++----
 include/net/tcp.h                  |  4 +--
 include/net/udp.h                  |  2 +-
 net/atm/common.c                   |  2 +-
 net/atm/common.h                   |  2 +-
 net/atm/pvc.c                      |  2 +-
 net/atm/svc.c                      |  2 +-
 net/ax25/af_ax25.c                 |  2 +-
 net/bluetooth/hci_sock.c           |  2 +-
 net/bluetooth/l2cap.c              |  4 +--
 net/bluetooth/rfcomm/sock.c        |  4 +--
 net/bluetooth/sco.c                |  2 +-
 net/can/raw.c                      |  2 +-
 net/compat.c                       | 12 ++++----
 net/core/sock.c                    |  8 ++---
 net/dccp/dccp.h                    |  4 +--
 net/dccp/proto.c                   | 10 +++---
 net/decnet/af_decnet.c             |  6 ++--
 net/ieee802154/dgram.c             |  2 +-
 net/ieee802154/raw.c               |  2 +-
 net/ipv4/inet_connection_sock.c    |  2 +-
 net/ipv4/ip_sockglue.c             |  6 ++--
 net/ipv4/ipmr.c                    |  2 +-
 net/ipv4/raw.c                     |  6 ++--
 net/ipv4/tcp.c                     |  6 ++--
 net/ipv4/udp.c                     |  6 ++--
 net/ipv4/udp_impl.h                |  4 +--
 net/ipv6/ip6mr.c                   |  2 +-
 net/ipv6/ipv6_sockglue.c           |  6 ++--
 net/ipv6/raw.c                     |  6 ++--
 net/ipv6/udp.c                     |  4 +--
 net/ipv6/udp_impl.h                |  4 +--
 net/ipx/af_ipx.c                   |  2 +-
 net/irda/af_irda.c                 |  2 +-
 net/iucv/af_iucv.c                 |  2 +-
 net/llc/af_llc.c                   |  2 +-
 net/netfilter/nf_sockopt.c         |  4 +--
 net/netlink/af_netlink.c           |  2 +-
 net/netrom/af_netrom.c             |  2 +-
 net/packet/af_packet.c             |  2 +-
 net/phonet/pep.c                   |  2 +-
 net/rds/af_rds.c                   |  2 +-
 net/rose/af_rose.c                 |  2 +-
 net/rxrpc/af_rxrpc.c               |  2 +-
 net/sctp/socket.c                  | 62 ++++++++++++++++++++------------------
 net/socket.c                       |  2 +-
 net/tipc/socket.c                  |  2 +-
 net/x25/af_x25.c                   |  2 +-
 67 files changed, 149 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 703364b52170..66e181345b3a 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -1306,14 +1306,6 @@ static void amb_close (struct atm_vcc * atm_vcc) {
   return;
 }
 
-/********** Set socket options for a VC **********/
-
-// int amb_getsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
-
-/********** Set socket options for a VC **********/
-
-// int amb_setsockopt (struct atm_vcc * atm_vcc, int level, int optname, void * optval, int optlen);
-
 /********** Send **********/
 
 static int amb_send (struct atm_vcc * atm_vcc, struct sk_buff * skb) {
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index 5503bfc8e132..0c3026145443 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -2031,7 +2031,7 @@ static int eni_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int eni_setsockopt(struct atm_vcc *vcc,int level,int optname,
-    void __user *optval,int optlen)
+    void __user *optval,unsigned int optlen)
 {
 	return -EINVAL;
 }
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c
index b119640e1ee9..cd5049af47a9 100644
--- a/drivers/atm/firestream.c
+++ b/drivers/atm/firestream.c
@@ -1244,7 +1244,7 @@ static int fs_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int fs_setsockopt(struct atm_vcc *vcc,int level,int optname,
-			 void __user *optval,int optlen)
+			 void __user *optval,unsigned int optlen)
 {
 	func_enter ();
 	func_exit ();
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index 10f000dbe448..f766cc46b4c4 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1795,7 +1795,7 @@ fore200e_getsockopt(struct atm_vcc* vcc, int level, int optname, void __user *op
 
 
 static int
-fore200e_setsockopt(struct atm_vcc* vcc, int level, int optname, void __user *optval, int optlen)
+fore200e_setsockopt(struct atm_vcc* vcc, int level, int optname, void __user *optval, unsigned int optlen)
 {
     /* struct fore200e* fore200e = FORE200E_DEV(vcc->dev); */
     
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 01ce241dbeae..4e49021e67ee 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2590,7 +2590,7 @@ static int hrz_getsockopt (struct atm_vcc * atm_vcc, int level, int optname,
 }
 
 static int hrz_setsockopt (struct atm_vcc * atm_vcc, int level, int optname,
-			   void *optval, int optlen) {
+			   void *optval, unsigned int optlen) {
   hrz_dev * dev = HRZ_DEV(atm_vcc->dev);
   PRINTD (DBG_FLOW|DBG_VCC, "hrz_setsockopt");
   switch (level) {
diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 78c9736c3579..b2c1b37ab2e4 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -2862,7 +2862,7 @@ static int ia_getsockopt(struct atm_vcc *vcc, int level, int optname,
 }  
   
 static int ia_setsockopt(struct atm_vcc *vcc, int level, int optname,   
-	void __user *optval, int optlen)  
+	void __user *optval, unsigned int optlen)  
 {  
 	IF_EVENT(printk(">ia_setsockopt\n");)  
 	return -EINVAL;  
diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 752b1ba81f7e..2e9635be048c 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1517,7 +1517,7 @@ static int zatm_getsockopt(struct atm_vcc *vcc,int level,int optname,
 
 
 static int zatm_setsockopt(struct atm_vcc *vcc,int level,int optname,
-    void __user *optval,int optlen)
+    void __user *optval,unsigned int optlen)
 {
 	return -EINVAL;
 }
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index c36f52137456..feb0fa45b664 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -415,7 +415,7 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 
 static int data_sock_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int len)
+	char __user *optval, unsigned int len)
 {
 	struct sock *sk = sock->sk;
 	int err = 0, opt = 0;
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
index cc394d073755..5910df60c93e 100644
--- a/drivers/net/pppol2tp.c
+++ b/drivers/net/pppol2tp.c
@@ -2179,7 +2179,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
  * session or the special tunnel type.
  */
 static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, int optlen)
+			       char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct pppol2tp_session *session = sk->sk_user_data;
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 086e5c362d3a..817b23705c91 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -397,7 +397,7 @@ struct atmdev_ops { /* only send is required */
 	int (*getsockopt)(struct atm_vcc *vcc,int level,int optname,
 	    void __user *optval,int optlen);
 	int (*setsockopt)(struct atm_vcc *vcc,int level,int optname,
-	    void __user *optval,int optlen);
+	    void __user *optval,unsigned int optlen);
 	int (*send)(struct atm_vcc *vcc,struct sk_buff *skb);
 	int (*send_oam)(struct atm_vcc *vcc,void *cell,int flags);
 	void (*phy_put)(struct atm_dev *dev,unsigned char value,
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 0d45b4e8d367..08bc776d05e2 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -145,14 +145,14 @@ static inline int ip_mroute_opt(int opt)
 #endif
 
 #ifdef CONFIG_IP_MROUTE
-extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int);
+extern int ip_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
 extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
 extern int ip_mr_init(void);
 #else
 static inline
 int ip_mroute_setsockopt(struct sock *sock,
-			 int optname, char __user *optval, int optlen)
+			 int optname, char __user *optval, unsigned int optlen)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 43dc97e32183..b191865a6ca3 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -134,7 +134,7 @@ static inline int ip6_mroute_opt(int opt)
 struct sock;
 
 #ifdef CONFIG_IPV6_MROUTE
-extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, int);
+extern int ip6_mroute_setsockopt(struct sock *, int, char __user *, unsigned int);
 extern int ip6_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
 extern int ip6_mr_input(struct sk_buff *skb);
 extern int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg);
@@ -143,7 +143,7 @@ extern void ip6_mr_cleanup(void);
 #else
 static inline
 int ip6_mroute_setsockopt(struct sock *sock,
-			  int optname, char __user *optval, int optlen)
+			  int optname, char __user *optval, unsigned int optlen)
 {
 	return -ENOPROTOOPT;
 }
diff --git a/include/linux/net.h b/include/linux/net.h
index 9040a10584f7..529a0931711d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -178,11 +178,11 @@ struct proto_ops {
 	int		(*listen)    (struct socket *sock, int len);
 	int		(*shutdown)  (struct socket *sock, int flags);
 	int		(*setsockopt)(struct socket *sock, int level,
-				      int optname, char __user *optval, int optlen);
+				      int optname, char __user *optval, unsigned int optlen);
 	int		(*getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
 	int		(*compat_setsockopt)(struct socket *sock, int level,
-				      int optname, char __user *optval, int optlen);
+				      int optname, char __user *optval, unsigned int optlen);
 	int		(*compat_getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
 	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
@@ -256,7 +256,7 @@ extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr,
 extern int kernel_getsockopt(struct socket *sock, int level, int optname,
 			     char *optval, int *optlen);
 extern int kernel_setsockopt(struct socket *sock, int level, int optname,
-			     char *optval, int optlen);
+			     char *optval, unsigned int optlen);
 extern int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 			   size_t size, int flags);
 extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg);
@@ -313,7 +313,7 @@ SOCKCALL_WRAP(name, compat_ioctl, (struct socket *sock, unsigned int cmd, \
 SOCKCALL_WRAP(name, listen, (struct socket *sock, int len), (sock, len)) \
 SOCKCALL_WRAP(name, shutdown, (struct socket *sock, int flags), (sock, flags)) \
 SOCKCALL_WRAP(name, setsockopt, (struct socket *sock, int level, int optname, \
-			 char __user *optval, int optlen), (sock, level, optname, optval, optlen)) \
+			 char __user *optval, unsigned int optlen), (sock, level, optname, optval, optlen)) \
 SOCKCALL_WRAP(name, getsockopt, (struct socket *sock, int level, int optname, \
 			 char __user *optval, int __user *optlen), (sock, level, optname, optval, optlen)) \
 SOCKCALL_WRAP(name, sendmsg, (struct kiocb *iocb, struct socket *sock, struct msghdr *m, size_t len), \
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 48cfe51bfddc..6132b5e6d9d3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -221,12 +221,12 @@ __ret;})
 
 /* Call setsockopt() */
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
-		  int len);
+		  unsigned int len);
 int nf_getsockopt(struct sock *sk, u_int8_t pf, int optval, char __user *opt,
 		  int *len);
 
 int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int optval,
-		char __user *opt, int len);
+		char __user *opt, unsigned int len);
 int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int optval,
 		char __user *opt, int *len);
 
diff --git a/include/net/compat.h b/include/net/compat.h
index 5bbf8bf9efea..7c3002832d05 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -40,8 +40,8 @@ extern int put_cmsg_compat(struct msghdr*, int, int, int, void *);
 
 extern int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *, unsigned char *, int);
 
-extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, int,
-	int (*)(struct sock *, int, int, char __user *, int));
+extern int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
+	int (*)(struct sock *, int, int, char __user *, unsigned int));
 extern int compat_mc_getsockopt(struct sock *, int, int, char __user *,
 	int __user *, int (*)(struct sock *, int, int, char __user *,
 				int __user *));
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 03cffd9f64e3..696d6e4ce68a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -48,13 +48,13 @@ struct inet_connection_sock_af_ops {
 	u16	    net_header_len;
 	u16	    sockaddr_len;
 	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
-				  char __user *optval, int optlen);
+				  char __user *optval, unsigned int optlen);
 	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
 				  char __user *optval, int __user *optlen);
 #ifdef CONFIG_COMPAT
 	int	    (*compat_setsockopt)(struct sock *sk,
 				int level, int optname,
-				char __user *optval, int optlen);
+				char __user *optval, unsigned int optlen);
 	int	    (*compat_getsockopt)(struct sock *sk,
 				int level, int optname,
 				char __user *optval, int __user *optlen);
@@ -332,5 +332,5 @@ extern void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
 extern int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 				      char __user *optval, int __user *optlen);
 extern int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
-				      char __user *optval, int optlen);
+				      char __user *optval, unsigned int optlen);
 #endif /* _INET_CONNECTION_SOCK_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b26a0bd178e..2f47e5482b55 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -381,10 +381,10 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
 extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
 extern int	ip_cmsg_send(struct net *net,
 			     struct msghdr *msg, struct ipcm_cookie *ipc);
-extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen);
+extern int	ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen);
 extern int	ip_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen);
 extern int	compat_ip_setsockopt(struct sock *sk, int level,
-			int optname, char __user *optval, int optlen);
+			int optname, char __user *optval, unsigned int optlen);
 extern int	compat_ip_getsockopt(struct sock *sk, int level,
 			int optname, char __user *optval, int __user *optlen);
 extern int	ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *));
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index ad9a51130254..8c31d8a0c1fe 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -550,7 +550,7 @@ extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type);
 extern int			ipv6_setsockopt(struct sock *sk, int level, 
 						int optname,
 						char __user *optval, 
-						int optlen);
+						unsigned int optlen);
 extern int			ipv6_getsockopt(struct sock *sk, int level, 
 						int optname,
 						char __user *optval, 
@@ -559,7 +559,7 @@ extern int			compat_ipv6_setsockopt(struct sock *sk,
 						int level,
 						int optname,
 						char __user *optval,
-						int optlen);
+						unsigned int optlen);
 extern int			compat_ipv6_getsockopt(struct sock *sk,
 						int level,
 						int optname,
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 42d00ced5eb8..6e5f0e0c7967 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -544,7 +544,7 @@ struct sctp_af {
 					 int level,
 					 int optname,
 					 char __user *optval,
-					 int optlen);
+					 unsigned int optlen);
 	int		(*getsockopt)	(struct sock *sk,
 					 int level,
 					 int optname,
@@ -554,7 +554,7 @@ struct sctp_af {
 					 int level,
 					 int optname,
 					 char __user *optval,
-					 int optlen);
+					 unsigned int optlen);
 	int		(*compat_getsockopt)	(struct sock *sk,
 					 int level,
 					 int optname,
diff --git a/include/net/sock.h b/include/net/sock.h
index 950409dcec3d..1621935aad5b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -624,7 +624,7 @@ struct proto {
 	void			(*shutdown)(struct sock *sk, int how);
 	int			(*setsockopt)(struct sock *sk, int level, 
 					int optname, char __user *optval,
-					int optlen);
+					unsigned int optlen);
 	int			(*getsockopt)(struct sock *sk, int level, 
 					int optname, char __user *optval, 
 					int __user *option);  	 
@@ -632,7 +632,7 @@ struct proto {
 	int			(*compat_setsockopt)(struct sock *sk,
 					int level,
 					int optname, char __user *optval,
-					int optlen);
+					unsigned int optlen);
 	int			(*compat_getsockopt)(struct sock *sk,
 					int level,
 					int optname, char __user *optval,
@@ -951,7 +951,7 @@ extern void			sock_rfree(struct sk_buff *skb);
 
 extern int			sock_setsockopt(struct socket *sock, int level,
 						int op, char __user *optval,
-						int optlen);
+						unsigned int optlen);
 
 extern int			sock_getsockopt(struct socket *sock, int level,
 						int op, char __user *optval, 
@@ -993,7 +993,7 @@ extern int                      sock_no_shutdown(struct socket *, int);
 extern int			sock_no_getsockopt(struct socket *, int , int,
 						   char __user *, int __user *);
 extern int			sock_no_setsockopt(struct socket *, int, int,
-						   char __user *, int);
+						   char __user *, unsigned int);
 extern int                      sock_no_sendmsg(struct kiocb *, struct socket *,
 						struct msghdr *, size_t);
 extern int                      sock_no_recvmsg(struct kiocb *, struct socket *,
@@ -1015,11 +1015,11 @@ extern int sock_common_getsockopt(struct socket *sock, int level, int optname,
 extern int sock_common_recvmsg(struct kiocb *iocb, struct socket *sock,
 			       struct msghdr *msg, size_t size, int flags);
 extern int sock_common_setsockopt(struct socket *sock, int level, int optname,
-				  char __user *optval, int optlen);
+				  char __user *optval, unsigned int optlen);
 extern int compat_sock_common_getsockopt(struct socket *sock, int level,
 		int optname, char __user *optval, int __user *optlen);
 extern int compat_sock_common_setsockopt(struct socket *sock, int level,
-		int optname, char __user *optval, int optlen);
+		int optname, char __user *optval, unsigned int optlen);
 
 extern void sk_common_release(struct sock *sk);
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 56b76027b85e..03a49c703377 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -394,13 +394,13 @@ extern int			tcp_getsockopt(struct sock *sk, int level,
 					       int __user *optlen);
 extern int			tcp_setsockopt(struct sock *sk, int level, 
 					       int optname, char __user *optval, 
-					       int optlen);
+					       unsigned int optlen);
 extern int			compat_tcp_getsockopt(struct sock *sk,
 					int level, int optname,
 					char __user *optval, int __user *optlen);
 extern int			compat_tcp_setsockopt(struct sock *sk,
 					int level, int optname,
-					char __user *optval, int optlen);
+					char __user *optval, unsigned int optlen);
 extern void			tcp_set_keepalive(struct sock *sk, int val);
 extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 					    struct msghdr *msg,
diff --git a/include/net/udp.h b/include/net/udp.h
index 5fb029f817a3..f98abd2ce709 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -144,7 +144,7 @@ extern unsigned int udp_poll(struct file *file, struct socket *sock,
 extern int 	udp_lib_getsockopt(struct sock *sk, int level, int optname,
 			           char __user *optval, int __user *optlen);
 extern int 	udp_lib_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen,
+				   char __user *optval, unsigned int optlen,
 				   int (*push_pending_frames)(struct sock *));
 
 extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
diff --git a/net/atm/common.c b/net/atm/common.c
index 8c4d843eb17f..950bd16d2383 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -679,7 +679,7 @@ static int check_qos(const struct atm_qos *qos)
 }
 
 int vcc_setsockopt(struct socket *sock, int level, int optname,
-		   char __user *optval, int optlen)
+		   char __user *optval, unsigned int optlen)
 {
 	struct atm_vcc *vcc;
 	unsigned long value;
diff --git a/net/atm/common.h b/net/atm/common.h
index 92e2981f479f..f48a76b6cdf4 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -21,7 +21,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
 int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 int vcc_setsockopt(struct socket *sock, int level, int optname,
-		   char __user *optval, int optlen);
+		   char __user *optval, unsigned int optlen);
 int vcc_getsockopt(struct socket *sock, int level, int optname,
 		   char __user *optval, int __user *optlen);
 
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index e1d22d9430dd..d4c024504f99 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -59,7 +59,7 @@ static int pvc_connect(struct socket *sock,struct sockaddr *sockaddr,
 }
 
 static int pvc_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int error;
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 7b831b526d0b..f90d143c4b25 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -446,7 +446,7 @@ int svc_change_qos(struct atm_vcc *vcc,struct atm_qos *qos)
 
 
 static int svc_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct atm_vcc *vcc = ATM_SD(sock);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 4102de1022ee..cd1c3dc0fe01 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -534,7 +534,7 @@ ax25_cb *ax25_create_cb(void)
  */
 
 static int ax25_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	ax25_cb *ax25;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 4f9621f759a0..75302a986067 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -466,7 +466,7 @@ drop:
 	goto done;
 }
 
-static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int len)
+static int hci_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int len)
 {
 	struct hci_ufilter uf = { .opcode = 0 };
 	struct sock *sk = sock->sk;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b03012564647..555d9da1869b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1698,7 +1698,7 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	return bt_sock_recvmsg(iocb, sock, msg, len, flags);
 }
 
-static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
+static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct l2cap_options opts;
@@ -1755,7 +1755,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 	return err;
 }
 
-static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct bt_security sec;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 0b85e8116859..8a20aaf1f231 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -730,7 +730,7 @@ out:
 	return copied ? : err;
 }
 
-static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
+static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
@@ -766,7 +766,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u
 	return err;
 }
 
-static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct bt_security sec;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 13c27f17192c..77f4153bdb5e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -644,7 +644,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	return err;
 }
 
-static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err = 0;
diff --git a/net/can/raw.c b/net/can/raw.c
index db3152df7d2b..b5e897922d32 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -411,7 +411,7 @@ static int raw_getname(struct socket *sock, struct sockaddr *uaddr,
 }
 
 static int raw_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct raw_sock *ro = raw_sk(sk);
diff --git a/net/compat.c b/net/compat.c
index 12728b17a226..a407c3addbae 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -331,7 +331,7 @@ struct compat_sock_fprog {
 };
 
 static int do_set_attach_filter(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval;
 	struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog));
@@ -351,7 +351,7 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname,
 }
 
 static int do_set_sock_timeout(struct socket *sock, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	struct compat_timeval __user *up = (struct compat_timeval __user *) optval;
 	struct timeval ktime;
@@ -373,7 +373,7 @@ static int do_set_sock_timeout(struct socket *sock, int level,
 }
 
 static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	if (optname == SO_ATTACH_FILTER)
 		return do_set_attach_filter(sock, level, optname,
@@ -385,7 +385,7 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname,
 }
 
 asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	int err;
 	struct socket *sock;
@@ -558,8 +558,8 @@ struct compat_group_filter {
 
 
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
-	char __user *optval, int optlen,
-	int (*setsockopt)(struct sock *,int,int,char __user *,int))
+	char __user *optval, unsigned int optlen,
+	int (*setsockopt)(struct sock *,int,int,char __user *,unsigned int))
 {
 	char __user	*koptval = optval;
 	int		koptlen = optlen;
diff --git a/net/core/sock.c b/net/core/sock.c
index 524712a7b154..77fbfed332e8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -446,7 +446,7 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
  */
 
 int sock_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int val;
@@ -1697,7 +1697,7 @@ int sock_no_shutdown(struct socket *sock, int how)
 EXPORT_SYMBOL(sock_no_shutdown);
 
 int sock_no_setsockopt(struct socket *sock, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	return -EOPNOTSUPP;
 }
@@ -2018,7 +2018,7 @@ EXPORT_SYMBOL(sock_common_recvmsg);
  *	Set socket options on an inet socket.
  */
 int sock_common_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
@@ -2028,7 +2028,7 @@ EXPORT_SYMBOL(sock_common_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
-				  char __user *optval, int optlen)
+				  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index d6bc47363b1c..5ef32c2f0d6a 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -290,14 +290,14 @@ extern int	   dccp_disconnect(struct sock *sk, int flags);
 extern int	   dccp_getsockopt(struct sock *sk, int level, int optname,
 				   char __user *optval, int __user *optlen);
 extern int	   dccp_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen);
+				   char __user *optval, unsigned int optlen);
 #ifdef CONFIG_COMPAT
 extern int	   compat_dccp_getsockopt(struct sock *sk,
 				int level, int optname,
 				char __user *optval, int __user *optlen);
 extern int	   compat_dccp_setsockopt(struct sock *sk,
 				int level, int optname,
-				char __user *optval, int optlen);
+				char __user *optval, unsigned int optlen);
 #endif
 extern int	   dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 extern int	   dccp_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index bc4467082a00..a156319fd0ac 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -393,7 +393,7 @@ out:
 EXPORT_SYMBOL_GPL(dccp_ioctl);
 
 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
-				   char __user *optval, int optlen)
+				   char __user *optval, unsigned int optlen)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct dccp_service_list *sl = NULL;
@@ -464,7 +464,7 @@ static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
 }
 
 static int dccp_setsockopt_ccid(struct sock *sk, int type,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	u8 *val;
 	int rc = 0;
@@ -494,7 +494,7 @@ static int dccp_setsockopt_ccid(struct sock *sk, int type,
 }
 
 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
-		char __user *optval, int optlen)
+		char __user *optval, unsigned int optlen)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
 	int val, err = 0;
@@ -546,7 +546,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
 }
 
 int dccp_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_DCCP)
 		return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
@@ -559,7 +559,7 @@ EXPORT_SYMBOL_GPL(dccp_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_DCCP)
 		return inet_csk_compat_setsockopt(sk, level, optname,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 77d40289653c..7a58c87baf17 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -157,7 +157,7 @@ static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
 static atomic_t decnet_memory_allocated;
 
-static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
+static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
 
 static struct hlist_head *dn_find_list(struct sock *sk)
@@ -1325,7 +1325,7 @@ out:
 	return err;
 }
 
-static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int err;
@@ -1337,7 +1337,7 @@ static int dn_setsockopt(struct socket *sock, int level, int optname, char __use
 	return err;
 }
 
-static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, int optlen, int flags)
+static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags)
 {
 	struct	sock *sk = sock->sk;
 	struct dn_scp *scp = DN_SK(sk);
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 51593a48f2dd..a413b1bf4465 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -414,7 +414,7 @@ static int dgram_getsockopt(struct sock *sk, int level, int optname,
 }
 
 static int dgram_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct dgram_sock *ro = dgram_sk(sk);
 	int val;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 13198859982e..30e74eee07d6 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -244,7 +244,7 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
 }
 
 static int raw_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 22cd19ee44e5..4351ca2cf0b8 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -714,7 +714,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
 
 int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
-			       char __user *optval, int optlen)
+			       char __user *optval, unsigned int optlen)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5a0693576e82..0c0b6e363a20 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -440,7 +440,7 @@ out:
  */
 
 static int do_ip_setsockopt(struct sock *sk, int level,
-			    int optname, char __user *optval, int optlen)
+			    int optname, char __user *optval, unsigned int optlen)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	int val = 0, err;
@@ -950,7 +950,7 @@ e_inval:
 }
 
 int ip_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	int err;
 
@@ -975,7 +975,7 @@ EXPORT_SYMBOL(ip_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ip_setsockopt(struct sock *sk, int level, int optname,
-			 char __user *optval, int optlen)
+			 char __user *optval, unsigned int optlen)
 {
 	int err;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c43ec2d51ce2..630a56df7b47 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -931,7 +931,7 @@ static void mrtsock_destruct(struct sock *sk)
  *	MOSPF/PIM router set up we can clean this up.
  */
 
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
 	int ret;
 	struct vifctl vif;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ebb1e5848bc6..757c9171e7c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -741,7 +741,7 @@ out:	return ret;
 }
 
 static int do_raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (optname == ICMP_FILTER) {
 		if (inet_sk(sk)->num != IPPROTO_ICMP)
@@ -753,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
 }
 
 static int raw_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_RAW)
 		return ip_setsockopt(sk, level, optname, optval, optlen);
@@ -762,7 +762,7 @@ static int raw_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
-				 char __user *optval, int optlen)
+				 char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_RAW)
 		return compat_ip_setsockopt(sk, level, optname, optval, optlen);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21387ebabf00..5a15e7629d8e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2032,7 +2032,7 @@ int tcp_disconnect(struct sock *sk, int flags)
  *	Socket option code for TCP.
  */
 static int do_tcp_setsockopt(struct sock *sk, int level,
-		int optname, char __user *optval, int optlen)
+		int optname, char __user *optval, unsigned int optlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2220,7 +2220,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 }
 
 int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
-		   int optlen)
+		   unsigned int optlen)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 
@@ -2232,7 +2232,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 
 #ifdef CONFIG_COMPAT
 int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level != SOL_TCP)
 		return inet_csk_compat_setsockopt(sk, level, optname,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ebaaa7f973d7..3326aff65906 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1359,7 +1359,7 @@ void udp_destroy_sock(struct sock *sk)
  *	Socket option code for UDP
  */
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
-		       char __user *optval, int optlen,
+		       char __user *optval, unsigned int optlen,
 		       int (*push_pending_frames)(struct sock *))
 {
 	struct udp_sock *up = udp_sk(sk);
@@ -1441,7 +1441,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(udp_lib_setsockopt);
 
 int udp_setsockopt(struct sock *sk, int level, int optname,
-		   char __user *optval, int optlen)
+		   char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1451,7 +1451,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 int compat_udp_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 9f4a6165f722..aaad650d47d9 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -11,13 +11,13 @@ extern void 	__udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
 extern int	udp_v4_get_port(struct sock *sk, unsigned short snum);
 
 extern int	udp_setsockopt(struct sock *sk, int level, int optname,
-			       char __user *optval, int optlen);
+			       char __user *optval, unsigned int optlen);
 extern int	udp_getsockopt(struct sock *sk, int level, int optname,
 			       char __user *optval, int __user *optlen);
 
 #ifdef CONFIG_COMPAT
 extern int	compat_udp_setsockopt(struct sock *sk, int level, int optname,
-				      char __user *optval, int optlen);
+				      char __user *optval, unsigned int optlen);
 extern int	compat_udp_getsockopt(struct sock *sk, int level, int optname,
 				      char __user *optval, int __user *optlen);
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 090675e269ee..716153941fc4 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1281,7 +1281,7 @@ int ip6mr_sk_done(struct sock *sk)
  *	MOSPF/PIM router set up we can clean this up.
  */
 
-int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
 {
 	int ret;
 	struct mif6ctl vif;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index f5e0682b402d..14f54eb5a7fc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -123,7 +123,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 }
 
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
@@ -773,7 +773,7 @@ e_inval:
 }
 
 int ipv6_setsockopt(struct sock *sk, int level, int optname,
-		    char __user *optval, int optlen)
+		    char __user *optval, unsigned int optlen)
 {
 	int err;
 
@@ -801,7 +801,7 @@ EXPORT_SYMBOL(ipv6_setsockopt);
 
 #ifdef CONFIG_COMPAT
 int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	int err;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 7d675b8d82d3..4f24570b0869 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -957,7 +957,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
 
 
 static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	struct raw6_sock *rp = raw6_sk(sk);
 	int val;
@@ -1000,7 +1000,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
 }
 
 static int rawv6_setsockopt(struct sock *sk, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	switch(level) {
 		case SOL_RAW:
@@ -1024,7 +1024,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
-				   char __user *optval, int optlen)
+				   char __user *optval, unsigned int optlen)
 {
 	switch (level) {
 	case SOL_RAW:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b265b7047d3e..3a60f12b34ed 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1044,7 +1044,7 @@ void udpv6_destroy_sock(struct sock *sk)
  *	Socket option code for UDP
  */
 int udpv6_setsockopt(struct sock *sk, int level, int optname,
-		     char __user *optval, int optlen)
+		     char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1054,7 +1054,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_COMPAT
 int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	if (level == SOL_UDP  ||  level == SOL_UDPLITE)
 		return udp_lib_setsockopt(sk, level, optname, optval, optlen,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 6bb303471e20..d7571046bfc4 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -16,10 +16,10 @@ extern int	udp_v6_get_port(struct sock *sk, unsigned short snum);
 extern int	udpv6_getsockopt(struct sock *sk, int level, int optname,
 				 char __user *optval, int __user *optlen);
 extern int	udpv6_setsockopt(struct sock *sk, int level, int optname,
-				 char __user *optval, int optlen);
+				 char __user *optval, unsigned int optlen);
 #ifdef CONFIG_COMPAT
 extern int	compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
-					char __user *optval, int optlen);
+					char __user *optval, unsigned int optlen);
 extern int	compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
 				       char __user *optval, int __user *optlen);
 #endif
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f1118d92a191..66c7a20011f3 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1292,7 +1292,7 @@ const char *ipx_device_name(struct ipx_interface *intrfc)
  * socket object. */
 
 static int ipx_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	int opt;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 50b43c57d5d8..dd35641835f4 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1826,7 +1826,7 @@ static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
  *
  */
 static int irda_setsockopt(struct socket *sock, int level, int optname,
-			   char __user *optval, int optlen)
+			   char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct irda_sock *self = irda_sk(sk);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index d985d163dcfc..bada1b9c670b 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1387,7 +1387,7 @@ static int iucv_sock_release(struct socket *sock)
 
 /* getsockopt and setsockopt */
 static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct iucv_sock *iucv = iucv_sk(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index c45eee1c0e8d..7aa4fd170104 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -973,7 +973,7 @@ static int llc_ui_ioctl(struct socket *sock, unsigned int cmd,
  *	Set various connection specific parameters.
  */
 static int llc_ui_setsockopt(struct socket *sock, int level, int optname,
-			     char __user *optval, int optlen)
+			     char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c
index 8ab829f86574..f042ae521557 100644
--- a/net/netfilter/nf_sockopt.c
+++ b/net/netfilter/nf_sockopt.c
@@ -113,7 +113,7 @@ static int nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 }
 
 int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt,
-		  int len)
+		  unsigned int len)
 {
 	return nf_sockopt(sk, pf, val, opt, &len, 0);
 }
@@ -154,7 +154,7 @@ static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
 }
 
 int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
-		int val, char __user *opt, int len)
+		int val, char __user *opt, unsigned int len)
 {
 	return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index dd85320907cb..19e98007691c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1150,7 +1150,7 @@ static void netlink_update_socket_mc(struct netlink_sock *nlk,
 }
 
 static int netlink_setsockopt(struct socket *sock, int level, int optname,
-			      char __user *optval, int optlen)
+			      char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ce1a34b99c23..7a834952f67f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -301,7 +301,7 @@ void nr_destroy_socket(struct sock *sk)
  */
 
 static int nr_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct nr_sock *nr = nr_sk(sk);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d3d52c66cdc2..1238949e66a9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1701,7 +1701,7 @@ static void packet_flush_mclist(struct sock *sk)
 }
 
 static int
-packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
+packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b8252d289cd7..5f32d217535b 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -742,7 +742,7 @@ static int pep_init(struct sock *sk)
 }
 
 static int pep_setsockopt(struct sock *sk, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int val = 0, err = 0;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 6b58aeff4c7a..98e05382fd3c 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -248,7 +248,7 @@ static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
 }
 
 static int rds_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	struct rds_sock *rs = rds_sk_to_rs(sock->sk);
 	int ret;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 1e166c9685aa..502cce76621d 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -370,7 +370,7 @@ void rose_destroy_socket(struct sock *sk)
  */
 
 static int rose_setsockopt(struct socket *sock, int level, int optname,
-	char __user *optval, int optlen)
+	char __user *optval, unsigned int optlen)
 {
 	struct sock *sk = sock->sk;
 	struct rose_sock *rose = rose_sk(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index bfe493ebf27c..a86afceaa94f 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -507,7 +507,7 @@ out:
  * set RxRPC socket options
  */
 static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
-			    char __user *optval, int optlen)
+			    char __user *optval, unsigned int optlen)
 {
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	unsigned min_sec_level;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 89af37a6c871..c8d05758661d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2027,7 +2027,8 @@ out:
  * instead a error will be indicated to the user.
  */
 static int sctp_setsockopt_disable_fragments(struct sock *sk,
-					    char __user *optval, int optlen)
+					     char __user *optval,
+					     unsigned int optlen)
 {
 	int val;
 
@@ -2043,7 +2044,7 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk,
 }
 
 static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
-					int optlen)
+				  unsigned int optlen)
 {
 	if (optlen > sizeof(struct sctp_event_subscribe))
 		return -EINVAL;
@@ -2064,7 +2065,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
  * association is closed.
  */
 static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
-					    int optlen)
+				     unsigned int optlen)
 {
 	struct sctp_sock *sp = sctp_sk(sk);
 
@@ -2318,7 +2319,8 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 }
 
 static int sctp_setsockopt_peer_addr_params(struct sock *sk,
-					    char __user *optval, int optlen)
+					    char __user *optval,
+					    unsigned int optlen)
 {
 	struct sctp_paddrparams  params;
 	struct sctp_transport   *trans = NULL;
@@ -2430,7 +2432,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
  */
 
 static int sctp_setsockopt_delayed_ack(struct sock *sk,
-					    char __user *optval, int optlen)
+				       char __user *optval, unsigned int optlen)
 {
 	struct sctp_sack_info    params;
 	struct sctp_transport   *trans = NULL;
@@ -2546,7 +2548,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
  * by the change).  With TCP-style sockets, this option is inherited by
  * sockets derived from a listener socket.
  */
-static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	struct sctp_initmsg sinit;
 	struct sctp_sock *sp = sctp_sk(sk);
@@ -2583,7 +2585,8 @@ static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int opt
  *   to this call if the caller is using the UDP model.
  */
 static int sctp_setsockopt_default_send_param(struct sock *sk,
-						char __user *optval, int optlen)
+					      char __user *optval,
+					      unsigned int optlen)
 {
 	struct sctp_sndrcvinfo info;
 	struct sctp_association *asoc;
@@ -2622,7 +2625,7 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
  * association peer's addresses.
  */
 static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
-					int optlen)
+					unsigned int optlen)
 {
 	struct sctp_prim prim;
 	struct sctp_transport *trans;
@@ -2651,7 +2654,7 @@ static int sctp_setsockopt_primary_addr(struct sock *sk, char __user *optval,
  *  integer boolean flag.
  */
 static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
-					int optlen)
+				   unsigned int optlen)
 {
 	int val;
 
@@ -2676,7 +2679,8 @@ static int sctp_setsockopt_nodelay(struct sock *sk, char __user *optval,
  * be changed.
  *
  */
-static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int optlen) {
+static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, unsigned int optlen)
+{
 	struct sctp_rtoinfo rtoinfo;
 	struct sctp_association *asoc;
 
@@ -2728,7 +2732,7 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int opt
  * See [SCTP] for more information.
  *
  */
-static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 
 	struct sctp_assocparams assocparams;
@@ -2800,7 +2804,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o
  * addresses and a user will receive both PF_INET6 and PF_INET type
  * addresses on the socket.
  */
-static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	int val;
 	struct sctp_sock *sp = sctp_sk(sk);
@@ -2844,7 +2848,7 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int op
  *    changed (effecting future associations only).
  * assoc_value:  This parameter specifies the maximum size in bytes.
  */
-static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optlen)
+static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_association *asoc;
@@ -2899,7 +2903,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
  *   set primary request:
  */
 static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
-					     int optlen)
+					     unsigned int optlen)
 {
 	struct sctp_sock	*sp;
 	struct sctp_endpoint	*ep;
@@ -2950,7 +2954,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva
 }
 
 static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval,
-					  int optlen)
+					    unsigned int optlen)
 {
 	struct sctp_setadaptation adaptation;
 
@@ -2979,7 +2983,7 @@ static int sctp_setsockopt_adaptation_layer(struct sock *sk, char __user *optval
  * saved with outbound messages.
  */
 static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
-				   int optlen)
+				   unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_sock *sp;
@@ -3030,7 +3034,7 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval,
  */
 static int sctp_setsockopt_fragment_interleave(struct sock *sk,
 					       char __user *optval,
-					       int optlen)
+					       unsigned int optlen)
 {
 	int val;
 
@@ -3063,7 +3067,7 @@ static int sctp_setsockopt_fragment_interleave(struct sock *sk,
  */
 static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
 						  char __user *optval,
-						  int optlen)
+						  unsigned int optlen)
 {
 	u32 val;
 
@@ -3096,7 +3100,7 @@ static int sctp_setsockopt_partial_delivery_point(struct sock *sk,
  */
 static int sctp_setsockopt_maxburst(struct sock *sk,
 				    char __user *optval,
-				    int optlen)
+				    unsigned int optlen)
 {
 	struct sctp_assoc_value params;
 	struct sctp_sock *sp;
@@ -3140,8 +3144,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
  * will only effect future associations on the socket.
  */
 static int sctp_setsockopt_auth_chunk(struct sock *sk,
-				    char __user *optval,
-				    int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_authchunk val;
 
@@ -3172,8 +3176,8 @@ static int sctp_setsockopt_auth_chunk(struct sock *sk,
  * endpoint requires the peer to use.
  */
 static int sctp_setsockopt_hmac_ident(struct sock *sk,
-				    char __user *optval,
-				    int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_hmacalgo *hmacs;
 	u32 idents;
@@ -3215,7 +3219,7 @@ out:
  */
 static int sctp_setsockopt_auth_key(struct sock *sk,
 				    char __user *optval,
-				    int optlen)
+				    unsigned int optlen)
 {
 	struct sctp_authkey *authkey;
 	struct sctp_association *asoc;
@@ -3260,8 +3264,8 @@ out:
  * the association shared key.
  */
 static int sctp_setsockopt_active_key(struct sock *sk,
-					char __user *optval,
-					int optlen)
+				      char __user *optval,
+				      unsigned int optlen)
 {
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
@@ -3288,8 +3292,8 @@ static int sctp_setsockopt_active_key(struct sock *sk,
  * This set option will delete a shared secret key from use.
  */
 static int sctp_setsockopt_del_key(struct sock *sk,
-					char __user *optval,
-					int optlen)
+				   char __user *optval,
+				   unsigned int optlen)
 {
 	struct sctp_authkeyid val;
 	struct sctp_association *asoc;
@@ -3332,7 +3336,7 @@ static int sctp_setsockopt_del_key(struct sock *sk,
  *   optlen  - the size of the buffer.
  */
 SCTP_STATIC int sctp_setsockopt(struct sock *sk, int level, int optname,
-				char __user *optval, int optlen)
+				char __user *optval, unsigned int optlen)
 {
 	int retval = 0;
 
diff --git a/net/socket.c b/net/socket.c
index 41e8847508aa..75655365b5fd 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2391,7 +2391,7 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 }
 
 int kernel_setsockopt(struct socket *sock, int level, int optname,
-			char *optval, int optlen)
+			char *optval, unsigned int optlen)
 {
 	mm_segment_t oldfs = get_fs();
 	int err;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e8254e809b79..e6d9abf7440e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1658,7 +1658,7 @@ restart:
  */
 
 static int setsockopt(struct socket *sock,
-		      int lvl, int opt, char __user *ov, int ol)
+		      int lvl, int opt, char __user *ov, unsigned int ol)
 {
 	struct sock *sk = sock->sk;
 	struct tipc_port *tport = tipc_sk_port(sk);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e6c072c64d3..7fa9c7ad3d3b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -409,7 +409,7 @@ static void x25_destroy_socket(struct sock *sk)
  */
 
 static int x25_setsockopt(struct socket *sock, int level, int optname,
-			  char __user *optval, int optlen)
+			  char __user *optval, unsigned int optlen)
 {
 	int opt;
 	struct sock *sk = sock->sk;
-- 
cgit v1.2.3


From 48c0d4d4c04dd520c55e0fd756fa4e7c83de3d13 Mon Sep 17 00:00:00 2001
From: Zdenek Kabelac <zdenek.kabelac@gmail.com>
Date: Fri, 25 Sep 2009 06:19:26 +0200
Subject: Add missing blk_trace_remove_sysfs to be in pair with
 blk_trace_init_sysfs

Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs
introduced in commit 1d54ad6da9192fed5dd3b60224d9f2dfea0dcd82.
Release kobject also in case the request_fn is NULL.

Problem was noticed via kmemleak backtrace when some sysfs entries were
note properly destroyed during  device removal:

unreferenced object 0xffff88001aa76640 (size 80):
  comm "lvcreate", pid 2120, jiffies 4294885144
  hex dump (first 32 bytes):
    01 00 00 00 00 00 00 00 f0 65 a7 1a 00 88 ff ff  .........e......
    90 66 a7 1a 00 88 ff ff 86 1d 53 81 ff ff ff ff  .f........S.....
  backtrace:
    [<ffffffff813f9cc6>] kmemleak_alloc+0x26/0x60
    [<ffffffff8111d693>] kmem_cache_alloc+0x133/0x1c0
    [<ffffffff81195891>] sysfs_new_dirent+0x41/0x120
    [<ffffffff81194b0c>] sysfs_add_file_mode+0x3c/0xb0
    [<ffffffff81197c81>] internal_create_group+0xc1/0x1a0
    [<ffffffff81197d93>] sysfs_create_group+0x13/0x20
    [<ffffffff810d8004>] blk_trace_init_sysfs+0x14/0x20
    [<ffffffff8123f45c>] blk_register_queue+0x3c/0xf0
    [<ffffffff812447e4>] add_disk+0x94/0x160
    [<ffffffffa00d8b08>] dm_create+0x598/0x6e0 [dm_mod]
    [<ffffffffa00de951>] dev_create+0x51/0x350 [dm_mod]
    [<ffffffffa00de823>] ctl_ioctl+0x1a3/0x240 [dm_mod]
    [<ffffffffa00de8f2>] dm_compat_ctl_ioctl+0x12/0x20 [dm_mod]
    [<ffffffff81177bfd>] compat_sys_ioctl+0xcd/0x4f0
    [<ffffffff81036ed8>] sysenter_dispatch+0x7/0x2c
    [<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: Zdenek Kabelac <zkabelac@redhat.com>
Reviewed-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-sysfs.c            | 11 ++++++-----
 include/linux/blktrace_api.h |  2 ++
 kernel/trace/blktrace.c      |  5 +++++
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3e2670..8a6d81afb284 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
+		blk_trace_remove_sysfs(disk_to_dev(disk));
 		return ret;
 	}
 
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return;
 
-	if (q->request_fn) {
+	if (q->request_fn)
 		elv_unregister_queue(q);
 
-		kobject_uevent(&q->kobj, KOBJ_REMOVE);
-		kobject_del(&q->kobj);
-		kobject_put(&disk_to_dev(disk)->kobj);
-	}
+	kobject_uevent(&q->kobj, KOBJ_REMOVE);
+	kobject_del(&q->kobj);
+	blk_trace_remove_sysfs(disk_to_dev(disk));
+	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7e4350ece0f8..622939a23299 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -198,6 +198,7 @@ extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern void blk_trace_remove_sysfs(struct device *dev);
 extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
@@ -211,6 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+# define blk_trace_remove_sysfs(struct device *dev)	do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3eb159c277c8..60b5c5a3d4b4 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1657,6 +1657,11 @@ int blk_trace_init_sysfs(struct device *dev)
 	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+void blk_trace_remove_sysfs(struct device *dev)
+{
+	sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
-- 
cgit v1.2.3


From c15227de132f1295f3db6b7df9079956b1020fd8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:52:12 +0200
Subject: block: use normal I/O path for discard requests

prepare_discard_fn() was being called in a place where memory allocation
was effectively impossible.  This makes it inappropriate for all but
the most trivial translations of Linux's DISCARD operation to the block
command set.  Additionally adding a payload there makes the ownership
of the bio backing unclear as it's now allocated by the device driver
and not the submitter as usual.

It is replaced with QUEUE_FLAG_DISCARD which is used to indicate whether
the queue supports discard operations or not.  blkdev_issue_discard now
allocates a one-page, sector-length payload which is the right thing
for the common ATA and SCSI implementations.

The mtd implementation of prepare_discard_fn() is replaced with simply
checking for the request being a discard.

Largely based on a previous patch from Matthew Wilcox <matthew@wil.cx>
which did the prepare_discard_fn but not the different payload allocation
yet.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c         | 35 ++++++++++++++++++++++++++++++-----
 block/blk-core.c            |  3 +--
 block/blk-settings.c        | 17 -----------------
 drivers/mtd/mtd_blkdevs.c   | 19 +++++--------------
 drivers/staging/dst/dcore.c |  2 +-
 include/linux/blkdev.h      |  6 ++----
 6 files changed, 39 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab39cfe9..21f5025c3945 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
+	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -372,26 +373,44 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & DISCARD_FL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
 	int ret = 0;
 
 	if (!q)
 		return -ENXIO;
 
-	if (!q->prepare_discard_fn)
+	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
-		if (!bio)
-			return -ENOMEM;
+		unsigned int sector_size = q->limits.logical_block_size;
 
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & DISCARD_FL_WAIT)
 			bio->bi_private = &wait;
 
-		bio->bi_sector = sector;
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
 
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
 		if (nr_sects > queue_max_hw_sectors(q)) {
 			bio->bi_size = queue_max_hw_sectors(q) << 9;
 			nr_sects -= queue_max_hw_sectors(q);
@@ -414,5 +433,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_put(bio);
 	}
 	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228e4b29..80a020dd1580 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1124,7 +1124,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1470,7 +1469,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !q->prepare_discard_fn) {
+		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index eaf122ff5f16..d29498ef1eb5 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -33,23 +33,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 }
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
-/**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:		queue
- * @dfn:	prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-	q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
 /**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0acbf4f5be50..8ca17a3e96ea 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,14 +32,6 @@ struct mtd_blkcore_priv {
 	spinlock_t queue_lock;
 };
 
-static int blktrans_discard_request(struct request_queue *q,
-				    struct request *req)
-{
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_DISCARD;
-	return 0;
-}
-
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -52,10 +44,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 	buf = req->buffer;
 
-	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return tr->discard(dev, block, nsect);
-
 	if (!blk_fs_request(req))
 		return -EIO;
 
@@ -63,6 +51,9 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
+	if (blk_discard_rq(req))
+		return tr->discard(dev, block, nsect);
+
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
-		blk_queue_set_discard(tr->blkcore_priv->rq,
-				      blktrans_discard_request);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+					tr->blkcore_priv->rq);
 
 	tr->blkshift = ffs(tr->blksize) - 1;
 
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..5e8db0677582 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -102,7 +102,7 @@ static int dst_request(struct request_queue *q, struct bio *bio)
 	struct dst_node *n = q->queuedata;
 	int err = -EIO;
 
-	if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
+	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
 		/*
 		 * This is a dirty^Wnice hack, but if we complete this
 		 * operation with -EOPNOTSUPP like intended, XFS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86cae5ac..f62d45e87618 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -82,7 +82,6 @@ enum rq_cmd_type_bits {
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
-	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
@@ -261,7 +260,6 @@ typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -340,7 +338,6 @@ struct request_queue
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
-	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
@@ -460,6 +457,7 @@ struct request_queue
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
+#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -591,6 +589,7 @@ enum {
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
+#define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -955,7 +954,6 @@ extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-- 
cgit v1.2.3


From 67efc9258010da35b27b3854d0880c7e193004ed Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 30 Sep 2009 13:54:20 +0200
Subject: block: allow large discard requests

Currently we set the bio size to the byte equivalent of the blocks to
be trimmed when submitting the initial DISCARD ioctl.  That means it
is subject to the max_hw_sectors limitation of the HBA which is
much lower than the size of a DISCARD request we can support.
Add a separate max_discard_sectors tunable to limit the size for discard
requests.

We limit the max discard request size in bytes to 32bit as that is the
limit for bio->bi_size.  This could be much larger if we had a way to pass
that information through the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-barrier.c    | 10 ++++++----
 block/blk-core.c       |  3 ++-
 block/blk-settings.c   | 13 +++++++++++++
 include/linux/blkdev.h |  3 +++
 4 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 21f5025c3945..8873b9b439ff 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -385,6 +385,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 
 	while (nr_sects && !ret) {
 		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
 		bio = bio_alloc(gfp_mask, 1);
 		if (!bio)
@@ -411,10 +413,10 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 * touch many more blocks on disk than the actual payload
 		 * length.
 		 */
-		if (nr_sects > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			nr_sects -= queue_max_hw_sectors(q);
-			sector += queue_max_hw_sectors(q);
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
diff --git a/block/blk-core.c b/block/blk-core.c
index 80a020dd1580..34504f309728 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1436,7 +1436,8 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d29498ef1eb5..e0695bca7027 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
+	lim->max_discard_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -238,6 +239,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
+/**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors)
+{
+	q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
 /**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f62d45e87618..1a03b715dfad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -311,6 +311,7 @@ struct queue_limits {
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
+	unsigned int		max_discard_sectors;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
@@ -928,6 +929,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
-- 
cgit v1.2.3


From b0da3f0dada78832c9da03ad2152ae76bd9a2496 Mon Sep 17 00:00:00 2001
From: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Date: Thu, 1 Oct 2009 21:16:13 +0200
Subject: Add a tracepoint for block request remapping

Since 2.6.31 now has request-based device-mapper, it's useful to have
a tracepoint for request-remapping as well as bio-remapping.
This patch adds a tracepoint for request-remapping, trace_block_rq_remap().

Signed-off-by: Kiyoshi Ueda <k-ueda@ct.jp.nec.com>
Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Cc: Alasdair G Kergon <agk@redhat.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c             |  1 +
 include/linux/blktrace_api.h |  2 +-
 include/trace/events/block.h | 33 +++++++++++++++++++++++++++++++++
 kernel/trace/blktrace.c      | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 34504f309728..ddaaea4fdffc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 622939a23299..3b73b9992b26 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -212,7 +212,7 @@ extern struct attribute_group blk_trace_attr_group;
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
-# define blk_trace_remove_sysfs(struct device *dev)	do { } while (0)
+# define blk_trace_remove_sysfs(dev)			do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d86af94691c2..00405b5f624a 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -488,6 +488,39 @@ TRACE_EVENT(block_remap,
 		  (unsigned long long)__entry->old_sector)
 );
 
+TRACE_EVENT(block_rq_remap,
+
+	TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, rq, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= disk_devt(rq->rq_disk);
+		__entry->sector		= blk_rq_pos(rq);
+		__entry->nr_sector	= blk_rq_sectors(rq);
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
 #endif /* _TRACE_BLOCK_H */
 
 /* This part must be outside protection */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 60b5c5a3d4b4..d9d6206e0b14 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -855,6 +855,37 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
 			sizeof(r), &r);
 }
 
+/**
+ * blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @dev:	target device
+ * @from:	source sector
+ *
+ * Description:
+ *     Device mapper remaps request to other devices.
+ *     Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_rq_remap(struct request_queue *q,
+				   struct request *rq, dev_t dev,
+				   sector_t from)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+	r.sector_from = cpu_to_be64(from);
+
+	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+			rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+			sizeof(r), &r);
+}
+
 /**
  * blk_add_driver_data - Add binary message with driver-specific data
  * @q:		queue the io is for
@@ -922,10 +953,13 @@ static void blk_register_tracepoints(void)
 	WARN_ON(ret);
 	ret = register_trace_block_remap(blk_add_trace_remap);
 	WARN_ON(ret);
+	ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
+	WARN_ON(ret);
 }
 
 static void blk_unregister_tracepoints(void)
 {
+	unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
 	unregister_trace_block_remap(blk_add_trace_remap);
 	unregister_trace_block_split(blk_add_trace_split);
 	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
-- 
cgit v1.2.3


From 828c09509b9695271bcbdc53e9fc9a6a737148d2 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 1 Oct 2009 15:43:56 -0700
Subject: const: constify remaining file_operations

[akpm@linux-foundation.org: fix KVM]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Acked-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mach-ns9xxx/clock.c               |  2 +-
 arch/blackfin/mach-bf561/coreb.c           |  2 +-
 arch/cris/arch-v10/drivers/sync_serial.c   |  2 +-
 arch/cris/arch-v32/drivers/mach-fs/gpio.c  |  2 +-
 arch/powerpc/kvm/timing.c                  |  2 +-
 arch/powerpc/platforms/cell/spufs/file.c   |  2 +-
 arch/powerpc/platforms/pseries/dtl.c       |  2 +-
 arch/x86/xen/debugfs.c                     |  2 +-
 drivers/acpi/video.c                       |  2 +-
 drivers/block/cciss.c                      |  2 +-
 drivers/char/apm-emulation.c               |  2 +-
 drivers/char/bfin-otp.c                    |  2 +-
 drivers/char/xilinx_hwicap/xilinx_hwicap.c |  2 +-
 drivers/gpio/gpiolib.c                     |  2 +-
 drivers/hwmon/fschmd.c                     |  2 +-
 drivers/lguest/lguest_user.c               |  2 +-
 drivers/media/dvb/dvb-core/dmxdev.c        |  2 +-
 drivers/media/dvb/firewire/firedtv-ci.c    |  2 +-
 drivers/misc/phantom.c                     |  2 +-
 drivers/misc/sgi-gru/grufile.c             |  3 +--
 drivers/mmc/core/debugfs.c                 |  2 +-
 drivers/s390/cio/qdio_debug.c              |  2 +-
 drivers/s390/cio/qdio_perf.c               |  2 +-
 drivers/scsi/sg.c                          | 43 +++++++++++++++++++++---------
 drivers/spi/spidev.c                       |  2 +-
 drivers/usb/class/usbtmc.c                 |  2 +-
 drivers/usb/gadget/printer.c               |  2 +-
 drivers/usb/host/whci/debug.c              |  6 ++---
 drivers/usb/misc/rio500.c                  |  3 +--
 drivers/uwb/uwb-debug.c                    |  6 ++---
 fs/btrfs/ctree.h                           |  2 +-
 fs/btrfs/file.c                            |  2 +-
 fs/btrfs/inode.c                           |  4 +--
 fs/jbd2/journal.c                          |  2 +-
 fs/nfsd/nfsctl.c                           |  2 +-
 fs/nilfs2/dir.c                            |  2 +-
 fs/nilfs2/file.c                           |  2 +-
 fs/nilfs2/mdt.c                            |  2 +-
 fs/nilfs2/nilfs.h                          |  4 +--
 fs/ocfs2/cluster/heartbeat.c               |  2 +-
 fs/ocfs2/cluster/netdebug.c                |  4 +--
 fs/ocfs2/dlm/dlmdebug.c                    |  8 +++---
 fs/ocfs2/super.c                           |  2 +-
 fs/omfs/dir.c                              |  2 +-
 fs/omfs/file.c                             |  2 +-
 fs/omfs/omfs.h                             |  4 +--
 include/linux/cgroup.h                     |  2 +-
 include/linux/fs.h                         |  2 +-
 kernel/cgroup.c                            | 10 +++----
 kernel/kprobes.c                           |  4 +--
 kernel/rcutree_trace.c                     | 10 +++----
 kernel/sched.c                             |  2 +-
 kernel/time/timer_list.c                   |  2 +-
 kernel/time/timer_stats.c                  |  2 +-
 samples/tracepoints/tracepoint-sample.c    |  2 +-
 security/integrity/ima/ima_fs.c            | 10 +++----
 virt/kvm/kvm_main.c                        |  2 +-
 57 files changed, 110 insertions(+), 95 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-ns9xxx/clock.c b/arch/arm/mach-ns9xxx/clock.c
index 44ed20d4a388..cf81cbc57544 100644
--- a/arch/arm/mach-ns9xxx/clock.c
+++ b/arch/arm/mach-ns9xxx/clock.c
@@ -195,7 +195,7 @@ static int clk_debugfs_open(struct inode *inode, struct file *file)
 	return single_open(file, clk_debugfs_show, NULL);
 }
 
-static struct file_operations clk_debugfs_operations = {
+static const struct file_operations clk_debugfs_operations = {
 	.open = clk_debugfs_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/arch/blackfin/mach-bf561/coreb.c b/arch/blackfin/mach-bf561/coreb.c
index 93635a766f9c..1e60a92dd602 100644
--- a/arch/blackfin/mach-bf561/coreb.c
+++ b/arch/blackfin/mach-bf561/coreb.c
@@ -48,7 +48,7 @@ coreb_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned l
 	return ret;
 }
 
-static struct file_operations coreb_fops = {
+static const struct file_operations coreb_fops = {
 	.owner   = THIS_MODULE,
 	.ioctl   = coreb_ioctl,
 };
diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c
index 6cc1a0319a5d..562b9a7feae7 100644
--- a/arch/cris/arch-v10/drivers/sync_serial.c
+++ b/arch/cris/arch-v10/drivers/sync_serial.c
@@ -244,7 +244,7 @@ static unsigned sync_serial_prescale_shadow;
 
 #define NUMBER_OF_PORTS 2
 
-static struct file_operations sync_serial_fops = {
+static const struct file_operations sync_serial_fops = {
 	.owner   = THIS_MODULE,
 	.write   = sync_serial_write,
 	.read    = sync_serial_read,
diff --git a/arch/cris/arch-v32/drivers/mach-fs/gpio.c b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
index fe1fde893887..d89ab80498ed 100644
--- a/arch/cris/arch-v32/drivers/mach-fs/gpio.c
+++ b/arch/cris/arch-v32/drivers/mach-fs/gpio.c
@@ -855,7 +855,7 @@ gpio_leds_ioctl(unsigned int cmd, unsigned long arg)
 	return 0;
 }
 
-struct file_operations gpio_fops = {
+static const struct file_operations gpio_fops = {
 	.owner       = THIS_MODULE,
 	.poll        = gpio_poll,
 	.ioctl       = gpio_ioctl,
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 47ee603f558e..2aa371e30079 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -201,7 +201,7 @@ static int kvmppc_exit_timing_open(struct inode *inode, struct file *file)
 	return single_open(file, kvmppc_exit_timing_show, inode->i_private);
 }
 
-static struct file_operations kvmppc_exit_timing_fops = {
+static const struct file_operations kvmppc_exit_timing_fops = {
 	.owner   = THIS_MODULE,
 	.open    = kvmppc_exit_timing_open,
 	.read    = seq_read,
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 961309446170..884e8bcec499 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -147,7 +147,7 @@ static int __fops ## _open(struct inode *inode, struct file *file)	\
 	__simple_attr_check_format(__fmt, 0ull);			\
 	return spufs_attr_open(inode, file, __get, __set, __fmt);	\
 }									\
-static struct file_operations __fops = {				\
+static const struct file_operations __fops = {				\
 	.owner	 = THIS_MODULE,						\
 	.open	 = __fops ## _open,					\
 	.release = spufs_attr_release,					\
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index ab69925d579b..937a544a236d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -209,7 +209,7 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
 	return n_read * sizeof(struct dtl_entry);
 }
 
-static struct file_operations dtl_fops = {
+static const struct file_operations dtl_fops = {
 	.open		= dtl_file_open,
 	.release	= dtl_file_release,
 	.read		= dtl_file_read,
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d2cac3..e133ce25e290 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations u32_array_fops = {
+static const struct file_operations u32_array_fops = {
 	.owner	= THIS_MODULE,
 	.open	= u32_array_open,
 	.release= xen_array_release,
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a4fddb24476f..f6e54bf8dd96 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -285,7 +285,7 @@ static int acpi_video_device_brightness_open_fs(struct inode *inode,
 						struct file *file);
 static ssize_t acpi_video_device_write_brightness(struct file *file,
 	const char __user *buffer, size_t count, loff_t *data);
-static struct file_operations acpi_video_device_brightness_fops = {
+static const struct file_operations acpi_video_device_brightness_fops = {
 	.owner = THIS_MODULE,
 	.open = acpi_video_device_brightness_open_fs,
 	.read = seq_read,
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 24c3e21ab263..1ece0b47b581 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -426,7 +426,7 @@ out:
 	return err;
 }
 
-static struct file_operations cciss_proc_fops = {
+static const struct file_operations cciss_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open    = cciss_seq_open,
 	.read    = seq_read,
diff --git a/drivers/char/apm-emulation.c b/drivers/char/apm-emulation.c
index aaca40283be9..4f568cb9af3f 100644
--- a/drivers/char/apm-emulation.c
+++ b/drivers/char/apm-emulation.c
@@ -393,7 +393,7 @@ static int apm_open(struct inode * inode, struct file * filp)
 	return as ? 0 : -ENOMEM;
 }
 
-static struct file_operations apm_bios_fops = {
+static const struct file_operations apm_bios_fops = {
 	.owner		= THIS_MODULE,
 	.read		= apm_read,
 	.poll		= apm_poll,
diff --git a/drivers/char/bfin-otp.c b/drivers/char/bfin-otp.c
index e3dd24bff514..836d4f0a876f 100644
--- a/drivers/char/bfin-otp.c
+++ b/drivers/char/bfin-otp.c
@@ -217,7 +217,7 @@ static long bfin_otp_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
 # define bfin_otp_ioctl NULL
 #endif
 
-static struct file_operations bfin_otp_fops = {
+static const struct file_operations bfin_otp_fops = {
 	.owner          = THIS_MODULE,
 	.unlocked_ioctl = bfin_otp_ioctl,
 	.read           = bfin_otp_read,
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index f40ab699860f..4846d50199f3 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -559,7 +559,7 @@ static int hwicap_release(struct inode *inode, struct file *file)
 	return status;
 }
 
-static struct file_operations hwicap_fops = {
+static const struct file_operations hwicap_fops = {
 	.owner = THIS_MODULE,
 	.write = hwicap_write,
 	.read = hwicap_read,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index bb11a429394a..662ed923d9eb 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1487,7 +1487,7 @@ static int gpiolib_open(struct inode *inode, struct file *file)
 	return single_open(file, gpiolib_show, NULL);
 }
 
-static struct file_operations gpiolib_operations = {
+static const struct file_operations gpiolib_operations = {
 	.open		= gpiolib_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/drivers/hwmon/fschmd.c b/drivers/hwmon/fschmd.c
index ea955edde87e..2a7a85a6dc36 100644
--- a/drivers/hwmon/fschmd.c
+++ b/drivers/hwmon/fschmd.c
@@ -915,7 +915,7 @@ static int watchdog_ioctl(struct inode *inode, struct file *filp,
 	return ret;
 }
 
-static struct file_operations watchdog_fops = {
+static const struct file_operations watchdog_fops = {
 	.owner = THIS_MODULE,
 	.llseek = no_llseek,
 	.open = watchdog_open,
diff --git a/drivers/lguest/lguest_user.c b/drivers/lguest/lguest_user.c
index b4d3f7ca554f..bd1632388e4a 100644
--- a/drivers/lguest/lguest_user.c
+++ b/drivers/lguest/lguest_user.c
@@ -508,7 +508,7 @@ static int close(struct inode *inode, struct file *file)
  * uses: reading and writing a character device called /dev/lguest.  All the
  * work happens in the read(), write() and close() routines:
  */
-static struct file_operations lguest_fops = {
+static const struct file_operations lguest_fops = {
 	.owner	 = THIS_MODULE,
 	.release = close,
 	.write	 = write,
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 3750ff48cba1..516414983593 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -1203,7 +1203,7 @@ static unsigned int dvb_dvr_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-static struct file_operations dvb_dvr_fops = {
+static const struct file_operations dvb_dvr_fops = {
 	.owner = THIS_MODULE,
 	.read = dvb_dvr_read,
 	.write = dvb_dvr_write,
diff --git a/drivers/media/dvb/firewire/firedtv-ci.c b/drivers/media/dvb/firewire/firedtv-ci.c
index eeb80d0ea3ff..853e04b7cb36 100644
--- a/drivers/media/dvb/firewire/firedtv-ci.c
+++ b/drivers/media/dvb/firewire/firedtv-ci.c
@@ -215,7 +215,7 @@ static unsigned int fdtv_ca_io_poll(struct file *file, poll_table *wait)
 	return POLLIN;
 }
 
-static struct file_operations fdtv_ca_fops = {
+static const struct file_operations fdtv_ca_fops = {
 	.owner		= THIS_MODULE,
 	.ioctl		= dvb_generic_ioctl,
 	.open		= dvb_generic_open,
diff --git a/drivers/misc/phantom.c b/drivers/misc/phantom.c
index fa57b67593ae..90a95ce8dc34 100644
--- a/drivers/misc/phantom.c
+++ b/drivers/misc/phantom.c
@@ -271,7 +271,7 @@ static unsigned int phantom_poll(struct file *file, poll_table *wait)
 	return mask;
 }
 
-static struct file_operations phantom_file_ops = {
+static const struct file_operations phantom_file_ops = {
 	.open = phantom_open,
 	.release = phantom_release,
 	.unlocked_ioctl = phantom_ioctl,
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index 300e7ba391a0..41c8fe2a928c 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -53,7 +53,6 @@ struct gru_stats_s gru_stats;
 /* Guaranteed user available resources on each node */
 static int max_user_cbrs, max_user_dsr_bytes;
 
-static struct file_operations gru_fops;
 static struct miscdevice gru_miscdev;
 
 
@@ -426,7 +425,7 @@ static void __exit gru_exit(void)
 	gru_proc_exit();
 }
 
-static struct file_operations gru_fops = {
+static const struct file_operations gru_fops = {
 	.owner		= THIS_MODULE,
 	.unlocked_ioctl	= gru_file_unlocked_ioctl,
 	.mmap		= gru_file_mmap,
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 610dbd1fcc82..96d10f40fb23 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -240,7 +240,7 @@ static int mmc_ext_csd_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations mmc_dbg_ext_csd_fops = {
+static const struct file_operations mmc_dbg_ext_csd_fops = {
 	.open		= mmc_ext_csd_open,
 	.read		= mmc_ext_csd_read,
 	.release	= mmc_ext_csd_release,
diff --git a/drivers/s390/cio/qdio_debug.c b/drivers/s390/cio/qdio_debug.c
index 1b78f639ead3..76769978285f 100644
--- a/drivers/s390/cio/qdio_debug.c
+++ b/drivers/s390/cio/qdio_debug.c
@@ -125,7 +125,7 @@ static int qstat_seq_open(struct inode *inode, struct file *filp)
 			   filp->f_path.dentry->d_inode->i_private);
 }
 
-static struct file_operations debugfs_fops = {
+static const struct file_operations debugfs_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = qstat_seq_open,
 	.read	 = seq_read,
diff --git a/drivers/s390/cio/qdio_perf.c b/drivers/s390/cio/qdio_perf.c
index eff943923c6f..968e3c7c2632 100644
--- a/drivers/s390/cio/qdio_perf.c
+++ b/drivers/s390/cio/qdio_perf.c
@@ -84,7 +84,7 @@ static int qdio_perf_seq_open(struct inode *inode, struct file *filp)
 	return single_open(filp, qdio_perf_proc_show, NULL);
 }
 
-static struct file_operations qdio_perf_proc_fops = {
+static const struct file_operations qdio_perf_proc_fops = {
 	.owner	 = THIS_MODULE,
 	.open	 = qdio_perf_seq_open,
 	.read	 = seq_read,
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 0cb049f5cc56..747a5e5c1276 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1317,7 +1317,7 @@ static void sg_rq_end_io(struct request *rq, int uptodate)
 	}
 }
 
-static struct file_operations sg_fops = {
+static const struct file_operations sg_fops = {
 	.owner = THIS_MODULE,
 	.read = sg_read,
 	.write = sg_write,
@@ -2194,9 +2194,11 @@ static int sg_proc_seq_show_int(struct seq_file *s, void *v);
 static int sg_proc_single_open_adio(struct inode *inode, struct file *file);
 static ssize_t sg_proc_write_adio(struct file *filp, const char __user *buffer,
 			          size_t count, loff_t *off);
-static struct file_operations adio_fops = {
-	/* .owner, .read and .llseek added in sg_proc_init() */
+static const struct file_operations adio_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_adio,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.write = sg_proc_write_adio,
 	.release = single_release,
 };
@@ -2204,23 +2206,32 @@ static struct file_operations adio_fops = {
 static int sg_proc_single_open_dressz(struct inode *inode, struct file *file);
 static ssize_t sg_proc_write_dressz(struct file *filp, 
 		const char __user *buffer, size_t count, loff_t *off);
-static struct file_operations dressz_fops = {
+static const struct file_operations dressz_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_dressz,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.write = sg_proc_write_dressz,
 	.release = single_release,
 };
 
 static int sg_proc_seq_show_version(struct seq_file *s, void *v);
 static int sg_proc_single_open_version(struct inode *inode, struct file *file);
-static struct file_operations version_fops = {
+static const struct file_operations version_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_version,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = single_release,
 };
 
 static int sg_proc_seq_show_devhdr(struct seq_file *s, void *v);
 static int sg_proc_single_open_devhdr(struct inode *inode, struct file *file);
-static struct file_operations devhdr_fops = {
+static const struct file_operations devhdr_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_single_open_devhdr,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = single_release,
 };
 
@@ -2229,8 +2240,11 @@ static int sg_proc_open_dev(struct inode *inode, struct file *file);
 static void * dev_seq_start(struct seq_file *s, loff_t *pos);
 static void * dev_seq_next(struct seq_file *s, void *v, loff_t *pos);
 static void dev_seq_stop(struct seq_file *s, void *v);
-static struct file_operations dev_fops = {
+static const struct file_operations dev_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_dev,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations dev_seq_ops = {
@@ -2242,8 +2256,11 @@ static const struct seq_operations dev_seq_ops = {
 
 static int sg_proc_seq_show_devstrs(struct seq_file *s, void *v);
 static int sg_proc_open_devstrs(struct inode *inode, struct file *file);
-static struct file_operations devstrs_fops = {
+static const struct file_operations devstrs_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_devstrs,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations devstrs_seq_ops = {
@@ -2255,8 +2272,11 @@ static const struct seq_operations devstrs_seq_ops = {
 
 static int sg_proc_seq_show_debug(struct seq_file *s, void *v);
 static int sg_proc_open_debug(struct inode *inode, struct file *file);
-static struct file_operations debug_fops = {
+static const struct file_operations debug_fops = {
+	.owner = THIS_MODULE,
 	.open = sg_proc_open_debug,
+	.read = seq_read,
+	.llseek = seq_lseek,
 	.release = seq_release,
 };
 static const struct seq_operations debug_seq_ops = {
@@ -2269,7 +2289,7 @@ static const struct seq_operations debug_seq_ops = {
 
 struct sg_proc_leaf {
 	const char * name;
-	struct file_operations * fops;
+	const struct file_operations * fops;
 };
 
 static struct sg_proc_leaf sg_proc_leaf_arr[] = {
@@ -2295,9 +2315,6 @@ sg_proc_init(void)
 	for (k = 0; k < num_leaves; ++k) {
 		leaf = &sg_proc_leaf_arr[k];
 		mask = leaf->fops->write ? S_IRUGO | S_IWUSR : S_IRUGO;
-		leaf->fops->owner = THIS_MODULE;
-		leaf->fops->read = seq_read;
-		leaf->fops->llseek = seq_lseek;
 		proc_create(leaf->name, mask, sg_proc_sgp, leaf->fops);
 	}
 	return 0;
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index f921bd1109e1..5d23983f02fc 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -537,7 +537,7 @@ static int spidev_release(struct inode *inode, struct file *filp)
 	return status;
 }
 
-static struct file_operations spidev_fops = {
+static const struct file_operations spidev_fops = {
 	.owner =	THIS_MODULE,
 	/* REVISIT switch to aio primitives, so that userspace
 	 * gets more complete API coverage.  It'll simplify things
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 333ee02e7b2b..864f0ba6a344 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -993,7 +993,7 @@ skip_io_on_zombie:
 	return retval;
 }
 
-static struct file_operations fops = {
+static const struct file_operations fops = {
 	.owner		= THIS_MODULE,
 	.read		= usbtmc_read,
 	.write		= usbtmc_write,
diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c
index 29500154d00c..2d867fd22413 100644
--- a/drivers/usb/gadget/printer.c
+++ b/drivers/usb/gadget/printer.c
@@ -875,7 +875,7 @@ printer_ioctl(struct file *fd, unsigned int code, unsigned long arg)
 }
 
 /* used after endpoint configuration */
-static struct file_operations printer_io_operations = {
+static const struct file_operations printer_io_operations = {
 	.owner =	THIS_MODULE,
 	.open =		printer_open,
 	.read =		printer_read,
diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c
index cf2d45946c57..2273c815941f 100644
--- a/drivers/usb/host/whci/debug.c
+++ b/drivers/usb/host/whci/debug.c
@@ -134,7 +134,7 @@ static int pzl_open(struct inode *inode, struct file *file)
 	return single_open(file, pzl_print, inode->i_private);
 }
 
-static struct file_operations di_fops = {
+static const struct file_operations di_fops = {
 	.open    = di_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -142,7 +142,7 @@ static struct file_operations di_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static struct file_operations asl_fops = {
+static const struct file_operations asl_fops = {
 	.open    = asl_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -150,7 +150,7 @@ static struct file_operations asl_fops = {
 	.owner   = THIS_MODULE,
 };
 
-static struct file_operations pzl_fops = {
+static const struct file_operations pzl_fops = {
 	.open    = pzl_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index d645f3899fe1..32d0199d0c32 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -429,8 +429,7 @@ read_rio(struct file *file, char __user *buffer, size_t count, loff_t * ppos)
 	return read_count;
 }
 
-static struct
-file_operations usb_rio_fops = {
+static const struct file_operations usb_rio_fops = {
 	.owner =	THIS_MODULE,
 	.read =		read_rio,
 	.write =	write_rio,
diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c
index 4a42993700c1..2eecec0c13c9 100644
--- a/drivers/uwb/uwb-debug.c
+++ b/drivers/uwb/uwb-debug.c
@@ -205,7 +205,7 @@ static ssize_t command_write(struct file *file, const char __user *buf,
 	return ret < 0 ? ret : len;
 }
 
-static struct file_operations command_fops = {
+static const struct file_operations command_fops = {
 	.open   = command_open,
 	.write  = command_write,
 	.read   = NULL,
@@ -255,7 +255,7 @@ static int reservations_open(struct inode *inode, struct file *file)
 	return single_open(file, reservations_print, inode->i_private);
 }
 
-static struct file_operations reservations_fops = {
+static const struct file_operations reservations_fops = {
 	.open    = reservations_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
@@ -283,7 +283,7 @@ static int drp_avail_open(struct inode *inode, struct file *file)
 	return single_open(file, drp_avail_print, inode->i_private);
 }
 
-static struct file_operations drp_avail_fops = {
+static const struct file_operations drp_avail_fops = {
 	.open    = drp_avail_open,
 	.read    = seq_read,
 	.llseek  = seq_lseek,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80599b4e42bd..4484eb3408af 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2326,7 +2326,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync);
 int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 			    int skip_pinned);
 int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
-extern struct file_operations btrfs_file_operations;
+extern const struct file_operations btrfs_file_operations;
 int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct inode *inode,
 		       u64 start, u64 end, u64 locked_end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a3492a3ad96b..9ed17dbe5c6e 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1196,7 +1196,7 @@ static int btrfs_file_mmap(struct file	*filp, struct vm_area_struct *vma)
 	return 0;
 }
 
-struct file_operations btrfs_file_operations = {
+const struct file_operations btrfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.aio_read       = generic_file_aio_read,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e9b76bcd1c12..b9fe06d751c0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -62,7 +62,7 @@ static const struct inode_operations btrfs_special_inode_operations;
 static const struct inode_operations btrfs_file_inode_operations;
 static const struct address_space_operations btrfs_aops;
 static const struct address_space_operations btrfs_symlink_aops;
-static struct file_operations btrfs_dir_file_operations;
+static const struct file_operations btrfs_dir_file_operations;
 static struct extent_io_ops btrfs_extent_io_ops;
 
 static struct kmem_cache *btrfs_inode_cachep;
@@ -5544,7 +5544,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = {
 	.permission	= btrfs_permission,
 };
 
-static struct file_operations btrfs_dir_file_operations = {
+static const struct file_operations btrfs_dir_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= btrfs_real_readdir,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 761af77491f5..b0ab5219becb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -770,7 +770,7 @@ static int jbd2_seq_info_release(struct inode *inode, struct file *file)
 	return seq_release(inode, file);
 }
 
-static struct file_operations jbd2_seq_info_fops = {
+static const struct file_operations jbd2_seq_info_fops = {
 	.owner		= THIS_MODULE,
 	.open           = jbd2_seq_info_open,
 	.read           = seq_read,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 00388d2a3c99..5c01fc148ce8 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -176,7 +176,7 @@ static const struct file_operations exports_operations = {
 extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
 extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
 
-static struct file_operations pool_stats_operations = {
+static const struct file_operations pool_stats_operations = {
 	.open		= nfsd_pool_stats_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 1a4fa04cf071..e097099bfc8f 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -697,7 +697,7 @@ not_empty:
 	return 0;
 }
 
-struct file_operations nilfs_dir_operations = {
+const struct file_operations nilfs_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
 	.readdir	= nilfs_readdir,
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 7d7b4983dee3..30292df443ce 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -134,7 +134,7 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  * We have mostly NULL's here: the current defaults are ok for
  * the nilfs filesystem.
  */
-struct file_operations nilfs_file_operations = {
+const struct file_operations nilfs_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index b18c4998f8d0..f6326112d647 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -433,7 +433,7 @@ static const struct address_space_operations def_mdt_aops = {
 };
 
 static const struct inode_operations def_mdt_iops;
-static struct file_operations def_mdt_fops;
+static const struct file_operations def_mdt_fops;
 
 /*
  * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile,
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index bad7368782d0..4da6f67e9a91 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -294,9 +294,9 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *);
 /*
  * Inodes and files operations
  */
-extern struct file_operations nilfs_dir_operations;
+extern const struct file_operations nilfs_dir_operations;
 extern const struct inode_operations nilfs_file_inode_operations;
-extern struct file_operations nilfs_file_operations;
+extern const struct file_operations nilfs_file_operations;
 extern const struct address_space_operations nilfs_aops;
 extern const struct inode_operations nilfs_dir_inode_operations;
 extern const struct inode_operations nilfs_special_inode_operations;
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 09cc25d04611..c452d116b892 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -966,7 +966,7 @@ static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
 }
 #endif  /* CONFIG_DEBUG_FS */
 
-static struct file_operations o2hb_debug_fops = {
+static const struct file_operations o2hb_debug_fops = {
 	.open =		o2hb_debug_open,
 	.release =	o2hb_debug_release,
 	.read =		o2hb_debug_read,
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c
index cfb2be708abe..da794bc07a6c 100644
--- a/fs/ocfs2/cluster/netdebug.c
+++ b/fs/ocfs2/cluster/netdebug.c
@@ -207,7 +207,7 @@ static int nst_fop_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations nst_seq_fops = {
+static const struct file_operations nst_seq_fops = {
 	.open = nst_fop_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -388,7 +388,7 @@ static int sc_fop_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations sc_seq_fops = {
+static const struct file_operations sc_seq_fops = {
 	.open = sc_fop_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index ca46002ec10e..42b0bad7a612 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -478,7 +478,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_purgelist_fops = {
+static const struct file_operations debug_purgelist_fops = {
 	.open =		debug_purgelist_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
@@ -538,7 +538,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_mle_fops = {
+static const struct file_operations debug_mle_fops = {
 	.open =		debug_mle_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
@@ -741,7 +741,7 @@ static int debug_lockres_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static struct file_operations debug_lockres_fops = {
+static const struct file_operations debug_lockres_fops = {
 	.open =		debug_lockres_open,
 	.release =	debug_lockres_release,
 	.read =		seq_read,
@@ -925,7 +925,7 @@ bail:
 	return -ENOMEM;
 }
 
-static struct file_operations debug_state_fops = {
+static const struct file_operations debug_state_fops = {
 	.open =		debug_state_open,
 	.release =	debug_buffer_release,
 	.read =		debug_buffer_read,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 4cc3c890a2cd..c0e48aeebb1c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -373,7 +373,7 @@ static ssize_t ocfs2_debug_read(struct file *file, char __user *buf,
 }
 #endif	/* CONFIG_DEBUG_FS */
 
-static struct file_operations ocfs2_osb_debug_fops = {
+static const struct file_operations ocfs2_osb_debug_fops = {
 	.open =		ocfs2_osb_debug_open,
 	.release =	ocfs2_debug_release,
 	.read =		ocfs2_debug_read,
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3680bae335b5..b42d62419034 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -498,7 +498,7 @@ const struct inode_operations omfs_dir_inops = {
 	.rmdir = omfs_rmdir,
 };
 
-struct file_operations omfs_dir_operations = {
+const struct file_operations omfs_dir_operations = {
 	.read = generic_read_dir,
 	.readdir = omfs_readdir,
 	.llseek = generic_file_llseek,
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 4845fbb18e6e..399487c09364 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -322,7 +322,7 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
 	return generic_block_bmap(mapping, block, omfs_get_block);
 }
 
-struct file_operations omfs_file_operations = {
+const struct file_operations omfs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
 	.write = do_sync_write,
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
index df71039945ac..ebe2fdbe535e 100644
--- a/fs/omfs/omfs.h
+++ b/fs/omfs/omfs.h
@@ -44,14 +44,14 @@ extern int omfs_allocate_range(struct super_block *sb, int min_request,
 extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
 
 /* dir.c */
-extern struct file_operations omfs_dir_operations;
+extern const struct file_operations omfs_dir_operations;
 extern const struct inode_operations omfs_dir_inops;
 extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
 extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
 			u64 fsblock);
 
 /* file.c */
-extern struct file_operations omfs_file_operations;
+extern const struct file_operations omfs_file_operations;
 extern const struct inode_operations omfs_file_inops;
 extern const struct address_space_operations omfs_aops;
 extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b62bb9294d0c..0008dee66514 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,7 +37,7 @@ extern void cgroup_exit(struct task_struct *p, int run_callbacks);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
-extern struct file_operations proc_cgroup_operations;
+extern const struct file_operations proc_cgroup_operations;
 
 /* Define the enumeration of all cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2adaa2529f18..a1e6899d4b6c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2446,7 +2446,7 @@ static int __fops ## _open(struct inode *inode, struct file *file)	\
 	__simple_attr_check_format(__fmt, 0ull);			\
 	return simple_attr_open(inode, file, __get, __set, __fmt);	\
 }									\
-static struct file_operations __fops = {				\
+static const struct file_operations __fops = {				\
 	.owner	 = THIS_MODULE,						\
 	.open	 = __fops ## _open,					\
 	.release = simple_attr_release,					\
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7ccba4bc5e3b..d2b88596efde 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -703,7 +703,7 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
 static int cgroup_populate_dir(struct cgroup *cgrp);
 static const struct inode_operations cgroup_dir_inode_operations;
-static struct file_operations proc_cgroupstats_operations;
+static const struct file_operations proc_cgroupstats_operations;
 
 static struct backing_dev_info cgroup_backing_dev_info = {
 	.name		= "cgroup",
@@ -1863,7 +1863,7 @@ static int cgroup_seqfile_release(struct inode *inode, struct file *file)
 	return single_release(inode, file);
 }
 
-static struct file_operations cgroup_seqfile_operations = {
+static const struct file_operations cgroup_seqfile_operations = {
 	.read = seq_read,
 	.write = cgroup_file_write,
 	.llseek = seq_lseek,
@@ -1922,7 +1922,7 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry,
 	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
 }
 
-static struct file_operations cgroup_file_operations = {
+static const struct file_operations cgroup_file_operations = {
 	.read = cgroup_file_read,
 	.write = cgroup_file_write,
 	.llseek = generic_file_llseek,
@@ -3369,7 +3369,7 @@ static int cgroup_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cgroup_show, pid);
 }
 
-struct file_operations proc_cgroup_operations = {
+const struct file_operations proc_cgroup_operations = {
 	.open		= cgroup_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
@@ -3398,7 +3398,7 @@ static int cgroupstats_open(struct inode *inode, struct file *file)
 	return single_open(file, proc_cgroupstats_show, NULL);
 }
 
-static struct file_operations proc_cgroupstats_operations = {
+static const struct file_operations proc_cgroupstats_operations = {
 	.open = cgroupstats_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index cfadc1291d0b..5240d75f4c60 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1333,7 +1333,7 @@ static int __kprobes kprobes_open(struct inode *inode, struct file *filp)
 	return seq_open(filp, &kprobes_seq_ops);
 }
 
-static struct file_operations debugfs_kprobes_operations = {
+static const struct file_operations debugfs_kprobes_operations = {
 	.open           = kprobes_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
@@ -1515,7 +1515,7 @@ static ssize_t write_enabled_file_bool(struct file *file,
 	return count;
 }
 
-static struct file_operations fops_kp = {
+static const struct file_operations fops_kp = {
 	.read =         read_enabled_file_bool,
 	.write =        write_enabled_file_bool,
 };
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index c89f5e9fd173..179e6ad80dc0 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -93,7 +93,7 @@ static int rcudata_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata, NULL);
 }
 
-static struct file_operations rcudata_fops = {
+static const struct file_operations rcudata_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_open,
 	.read = seq_read,
@@ -145,7 +145,7 @@ static int rcudata_csv_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcudata_csv, NULL);
 }
 
-static struct file_operations rcudata_csv_fops = {
+static const struct file_operations rcudata_csv_fops = {
 	.owner = THIS_MODULE,
 	.open = rcudata_csv_open,
 	.read = seq_read,
@@ -196,7 +196,7 @@ static int rcuhier_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcuhier, NULL);
 }
 
-static struct file_operations rcuhier_fops = {
+static const struct file_operations rcuhier_fops = {
 	.owner = THIS_MODULE,
 	.open = rcuhier_open,
 	.read = seq_read,
@@ -222,7 +222,7 @@ static int rcugp_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcugp, NULL);
 }
 
-static struct file_operations rcugp_fops = {
+static const struct file_operations rcugp_fops = {
 	.owner = THIS_MODULE,
 	.open = rcugp_open,
 	.read = seq_read,
@@ -276,7 +276,7 @@ static int rcu_pending_open(struct inode *inode, struct file *file)
 	return single_open(file, show_rcu_pending, NULL);
 }
 
-static struct file_operations rcu_pending_fops = {
+static const struct file_operations rcu_pending_fops = {
 	.owner = THIS_MODULE,
 	.open = rcu_pending_open,
 	.read = seq_read,
diff --git a/kernel/sched.c b/kernel/sched.c
index ee61f454a98b..1535f3884b88 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -780,7 +780,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp)
 	return single_open(filp, sched_feat_show, NULL);
 }
 
-static struct file_operations sched_feat_fops = {
+static const struct file_operations sched_feat_fops = {
 	.open		= sched_feat_open,
 	.write		= sched_feat_write,
 	.read		= seq_read,
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index fddd69d16e03..1b5b7aa2fdfd 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -275,7 +275,7 @@ static int timer_list_open(struct inode *inode, struct file *filp)
 	return single_open(filp, timer_list_show, NULL);
 }
 
-static struct file_operations timer_list_fops = {
+static const struct file_operations timer_list_fops = {
 	.open		= timer_list_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index 4cde8b9c716f..ee5681f8d7ec 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -395,7 +395,7 @@ static int tstats_open(struct inode *inode, struct file *filp)
 	return single_open(filp, tstats_show, NULL);
 }
 
-static struct file_operations tstats_fops = {
+static const struct file_operations tstats_fops = {
 	.open		= tstats_open,
 	.read		= seq_read,
 	.write		= tstats_write,
diff --git a/samples/tracepoints/tracepoint-sample.c b/samples/tracepoints/tracepoint-sample.c
index 9cf80a11e8b6..26fab33ffa8c 100644
--- a/samples/tracepoints/tracepoint-sample.c
+++ b/samples/tracepoints/tracepoint-sample.c
@@ -28,7 +28,7 @@ static int my_open(struct inode *inode, struct file *file)
 	return -EPERM;
 }
 
-static struct file_operations mark_ops = {
+static const struct file_operations mark_ops = {
 	.open = my_open,
 };
 
diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 8e9777b76405..0c72c9c38956 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -43,7 +43,7 @@ static ssize_t ima_show_htable_violations(struct file *filp,
 	return ima_show_htable_value(buf, count, ppos, &ima_htable.violations);
 }
 
-static struct file_operations ima_htable_violations_ops = {
+static const struct file_operations ima_htable_violations_ops = {
 	.read = ima_show_htable_violations
 };
 
@@ -55,7 +55,7 @@ static ssize_t ima_show_measurements_count(struct file *filp,
 
 }
 
-static struct file_operations ima_measurements_count_ops = {
+static const struct file_operations ima_measurements_count_ops = {
 	.read = ima_show_measurements_count
 };
 
@@ -158,7 +158,7 @@ static int ima_measurements_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ima_measurments_seqops);
 }
 
-static struct file_operations ima_measurements_ops = {
+static const struct file_operations ima_measurements_ops = {
 	.open = ima_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -233,7 +233,7 @@ static int ima_ascii_measurements_open(struct inode *inode, struct file *file)
 	return seq_open(file, &ima_ascii_measurements_seqops);
 }
 
-static struct file_operations ima_ascii_measurements_ops = {
+static const struct file_operations ima_ascii_measurements_ops = {
 	.open = ima_ascii_measurements_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
@@ -313,7 +313,7 @@ static int ima_release_policy(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static struct file_operations ima_measure_policy_ops = {
+static const struct file_operations ima_measure_policy_ops = {
 	.open = ima_open_policy,
 	.write = ima_write_policy,
 	.release = ima_release_policy
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b5e7e3f1183f..e79c54034bcd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2625,7 +2625,7 @@ static int vcpu_stat_get(void *_offset, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
 
-static struct file_operations *stat_fops[] = {
+static const struct file_operations *stat_fops[] = {
 	[KVM_STAT_VCPU] = &vcpu_stat_fops,
 	[KVM_STAT_VM]   = &vm_stat_fops,
 };
-- 
cgit v1.2.3


From b3db4a8ad19173a8fd0ced13d47c97910f1ab14b Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Thu, 1 Oct 2009 15:43:56 -0700
Subject: asm-generic/gpio.h: pull in linux/kernel.h for might_sleep()

The asm-generic/gpio.h header uses the might_sleep() macro but doesn't
include the header for it, so any source code that might include
linux/gpio.h before linux/kernel.h can easily lead to a build failure.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/gpio.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index 9cca3785cab8..66d6106a2067 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_GENERIC_GPIO_H
 #define _ASM_GENERIC_GPIO_H
 
+#include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 
-- 
cgit v1.2.3


From 4e649152cbaa1aedd01821d200ab9d597fe469e4 Mon Sep 17 00:00:00 2001
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Date: Thu, 1 Oct 2009 15:44:11 -0700
Subject: memcg: some modification to softlimit under hierarchical memory
 reclaim.

This patch clean up/fixes for memcg's uncharge soft limit path.

Problems:
  Now, res_counter_charge()/uncharge() handles softlimit information at
  charge/uncharge and softlimit-check is done when event counter per memcg
  goes over limit. Now, event counter per memcg is updated only when
  memory usage is over soft limit. Here, considering hierarchical memcg
  management, ancesotors should be taken care of.

  Now, ancerstors(hierarchy) are handled in charge() but not in uncharge().
  This is not good.

  Prolems:
  1. memcg's event counter incremented only when softlimit hits. That's bad.
     It makes event counter hard to be reused for other purpose.

  2. At uncharge, only the lowest level rescounter is handled. This is bug.
     Because ancesotor's event counter is not incremented, children should
     take care of them.

  3. res_counter_uncharge()'s 3rd argument is NULL in most case.
     ops under res_counter->lock should be small. No "if" sentense is better.

Fixes:
  * Removed soft_limit_xx poitner and checks in charge and uncharge.
    Do-check-only-when-necessary scheme works enough well without them.

  * make event-counter of memcg incremented at every charge/uncharge.
    (per-cpu area will be accessed soon anyway)

  * All ancestors are checked at soft-limit-check. This is necessary because
    ancesotor's event counter may never be modified. Then, they should be
    checked at the same time.

Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/res_counter.h |   6 +--
 kernel/res_counter.c        |  18 +------
 mm/memcontrol.c             | 113 ++++++++++++++++++++------------------------
 3 files changed, 54 insertions(+), 83 deletions(-)

(limited to 'include')

diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index 731af71cddc9..fcb9884df618 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -114,8 +114,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
 int __must_check res_counter_charge_locked(struct res_counter *counter,
 		unsigned long val);
 int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at,
-		struct res_counter **soft_limit_at);
+		unsigned long val, struct res_counter **limit_fail_at);
 
 /*
  * uncharge - tell that some portion of the resource is released
@@ -128,8 +127,7 @@ int __must_check res_counter_charge(struct res_counter *counter,
  */
 
 void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val);
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess);
+void res_counter_uncharge(struct res_counter *counter, unsigned long val);
 
 static inline bool res_counter_limit_check_locked(struct res_counter *cnt)
 {
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index 88faec23e833..bcdabf37c40b 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -37,27 +37,17 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val)
 }
 
 int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at,
-			struct res_counter **soft_limit_fail_at)
+			struct res_counter **limit_fail_at)
 {
 	int ret;
 	unsigned long flags;
 	struct res_counter *c, *u;
 
 	*limit_fail_at = NULL;
-	if (soft_limit_fail_at)
-		*soft_limit_fail_at = NULL;
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
 		ret = res_counter_charge_locked(c, val);
-		/*
-		 * With soft limits, we return the highest ancestor
-		 * that exceeds its soft limit
-		 */
-		if (soft_limit_fail_at &&
-			!res_counter_soft_limit_check_locked(c))
-			*soft_limit_fail_at = c;
 		spin_unlock(&c->lock);
 		if (ret < 0) {
 			*limit_fail_at = c;
@@ -85,8 +75,7 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
 	counter->usage -= val;
 }
 
-void res_counter_uncharge(struct res_counter *counter, unsigned long val,
-				bool *was_soft_limit_excess)
+void res_counter_uncharge(struct res_counter *counter, unsigned long val)
 {
 	unsigned long flags;
 	struct res_counter *c;
@@ -94,9 +83,6 @@ void res_counter_uncharge(struct res_counter *counter, unsigned long val,
 	local_irq_save(flags);
 	for (c = counter; c != NULL; c = c->parent) {
 		spin_lock(&c->lock);
-		if (was_soft_limit_excess)
-			*was_soft_limit_excess =
-				!res_counter_soft_limit_check_locked(c);
 		res_counter_uncharge_locked(c, val);
 		spin_unlock(&c->lock);
 	}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 21a30629ca80..1ae8c439584a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -352,16 +352,6 @@ __mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 	mz->on_tree = false;
 }
 
-static void
-mem_cgroup_insert_exceeded(struct mem_cgroup *mem,
-				struct mem_cgroup_per_zone *mz,
-				struct mem_cgroup_tree_per_zone *mctz)
-{
-	spin_lock(&mctz->lock);
-	__mem_cgroup_insert_exceeded(mem, mz, mctz);
-	spin_unlock(&mctz->lock);
-}
-
 static void
 mem_cgroup_remove_exceeded(struct mem_cgroup *mem,
 				struct mem_cgroup_per_zone *mz,
@@ -392,34 +382,40 @@ static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem)
 
 static void mem_cgroup_update_tree(struct mem_cgroup *mem, struct page *page)
 {
-	unsigned long long prev_usage_in_excess, new_usage_in_excess;
-	bool updated_tree = false;
+	unsigned long long new_usage_in_excess;
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup_tree_per_zone *mctz;
-
-	mz = mem_cgroup_zoneinfo(mem, page_to_nid(page), page_zonenum(page));
+	int nid = page_to_nid(page);
+	int zid = page_zonenum(page);
 	mctz = soft_limit_tree_from_page(page);
 
 	/*
-	 * We do updates in lazy mode, mem's are removed
-	 * lazily from the per-zone, per-node rb tree
+	 * Necessary to update all ancestors when hierarchy is used.
+	 * because their event counter is not touched.
 	 */
-	prev_usage_in_excess = mz->usage_in_excess;
-
-	new_usage_in_excess = res_counter_soft_limit_excess(&mem->res);
-	if (prev_usage_in_excess) {
-		mem_cgroup_remove_exceeded(mem, mz, mctz);
-		updated_tree = true;
-	}
-	if (!new_usage_in_excess)
-		goto done;
-	mem_cgroup_insert_exceeded(mem, mz, mctz);
-
-done:
-	if (updated_tree) {
-		spin_lock(&mctz->lock);
-		mz->usage_in_excess = new_usage_in_excess;
-		spin_unlock(&mctz->lock);
+	for (; mem; mem = parent_mem_cgroup(mem)) {
+		mz = mem_cgroup_zoneinfo(mem, nid, zid);
+		new_usage_in_excess =
+			res_counter_soft_limit_excess(&mem->res);
+		/*
+		 * We have to update the tree if mz is on RB-tree or
+		 * mem is over its softlimit.
+		 */
+		if (new_usage_in_excess || mz->on_tree) {
+			spin_lock(&mctz->lock);
+			/* if on-tree, remove it */
+			if (mz->on_tree)
+				__mem_cgroup_remove_exceeded(mem, mz, mctz);
+			/*
+			 * if over soft limit, insert again. mz->usage_in_excess
+			 * will be updated properly.
+			 */
+			if (new_usage_in_excess)
+				__mem_cgroup_insert_exceeded(mem, mz, mctz);
+			else
+				mz->usage_in_excess = 0;
+			spin_unlock(&mctz->lock);
+		}
 	}
 }
 
@@ -1271,9 +1267,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 			gfp_t gfp_mask, struct mem_cgroup **memcg,
 			bool oom, struct page *page)
 {
-	struct mem_cgroup *mem, *mem_over_limit, *mem_over_soft_limit;
+	struct mem_cgroup *mem, *mem_over_limit;
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
-	struct res_counter *fail_res, *soft_fail_res = NULL;
+	struct res_counter *fail_res;
 
 	if (unlikely(test_thread_flag(TIF_MEMDIE))) {
 		/* Don't account this! */
@@ -1305,17 +1301,16 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 
 		if (mem_cgroup_is_root(mem))
 			goto done;
-		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res,
-						&soft_fail_res);
+		ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res);
 		if (likely(!ret)) {
 			if (!do_swap_account)
 				break;
 			ret = res_counter_charge(&mem->memsw, PAGE_SIZE,
-							&fail_res, NULL);
+							&fail_res);
 			if (likely(!ret))
 				break;
 			/* mem+swap counter fails */
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			flags |= MEM_CGROUP_RECLAIM_NOSWAP;
 			mem_over_limit = mem_cgroup_from_res_counter(fail_res,
 									memsw);
@@ -1354,16 +1349,11 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 		}
 	}
 	/*
-	 * Insert just the ancestor, we should trickle down to the correct
-	 * cgroup for reclaim, since the other nodes will be below their
-	 * soft limit
+	 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
+	 * if they exceeds softlimit.
 	 */
-	if (soft_fail_res) {
-		mem_over_soft_limit =
-			mem_cgroup_from_res_counter(soft_fail_res, res);
-		if (mem_cgroup_soft_limit_check(mem_over_soft_limit))
-			mem_cgroup_update_tree(mem_over_soft_limit, page);
-	}
+	if (mem_cgroup_soft_limit_check(mem))
+		mem_cgroup_update_tree(mem, page);
 done:
 	return 0;
 nomem:
@@ -1438,10 +1428,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 	if (unlikely(PageCgroupUsed(pc))) {
 		unlock_page_cgroup(pc);
 		if (!mem_cgroup_is_root(mem)) {
-			res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->res, PAGE_SIZE);
 			if (do_swap_account)
-				res_counter_uncharge(&mem->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 		}
 		css_put(&mem->css);
 		return;
@@ -1520,7 +1509,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 		goto out;
 
 	if (!mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->res, PAGE_SIZE);
 	mem_cgroup_charge_statistics(from, pc, false);
 
 	page = pc->page;
@@ -1540,7 +1529,7 @@ static int mem_cgroup_move_account(struct page_cgroup *pc,
 	}
 
 	if (do_swap_account && !mem_cgroup_is_root(from))
-		res_counter_uncharge(&from->memsw, PAGE_SIZE, NULL);
+		res_counter_uncharge(&from->memsw, PAGE_SIZE);
 	css_put(&from->css);
 
 	css_get(&to->css);
@@ -1611,9 +1600,9 @@ uncharge:
 	css_put(&parent->css);
 	/* uncharge if move fails */
 	if (!mem_cgroup_is_root(parent)) {
-		res_counter_uncharge(&parent->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&parent->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&parent->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&parent->memsw, PAGE_SIZE);
 	}
 	return ret;
 }
@@ -1804,8 +1793,7 @@ __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr,
 			 * calling css_tryget
 			 */
 			if (!mem_cgroup_is_root(memcg))
-				res_counter_uncharge(&memcg->memsw, PAGE_SIZE,
-							NULL);
+				res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 			mem_cgroup_swap_statistics(memcg, false);
 			mem_cgroup_put(memcg);
 		}
@@ -1832,9 +1820,9 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem)
 	if (!mem)
 		return;
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, NULL);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account)
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	css_put(&mem->css);
 }
@@ -1849,7 +1837,6 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	struct page_cgroup *pc;
 	struct mem_cgroup *mem = NULL;
 	struct mem_cgroup_per_zone *mz;
-	bool soft_limit_excess = false;
 
 	if (mem_cgroup_disabled())
 		return NULL;
@@ -1889,10 +1876,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	}
 
 	if (!mem_cgroup_is_root(mem)) {
-		res_counter_uncharge(&mem->res, PAGE_SIZE, &soft_limit_excess);
+		res_counter_uncharge(&mem->res, PAGE_SIZE);
 		if (do_swap_account &&
 				(ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
-			res_counter_uncharge(&mem->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&mem->memsw, PAGE_SIZE);
 	}
 	if (ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
 		mem_cgroup_swap_statistics(mem, true);
@@ -1909,7 +1896,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 	mz = page_cgroup_zoneinfo(pc);
 	unlock_page_cgroup(pc);
 
-	if (soft_limit_excess && mem_cgroup_soft_limit_check(mem))
+	if (mem_cgroup_soft_limit_check(mem))
 		mem_cgroup_update_tree(mem, page);
 	/* at swapout, this memcg will be accessed to record to swap */
 	if (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
@@ -1987,7 +1974,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent)
 		 * This memcg can be obsolete one. We avoid calling css_tryget
 		 */
 		if (!mem_cgroup_is_root(memcg))
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE, NULL);
+			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
 		mem_cgroup_swap_statistics(memcg, false);
 		mem_cgroup_put(memcg);
 	}
-- 
cgit v1.2.3


From 293500a23f4b0698cb04abfecfc9a954d8ab2742 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:04 +0000
Subject: connector: Keep the skb in cn_callback_data

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  |  3 ++-
 drivers/connector/connector.c | 11 +++++------
 include/linux/connector.h     |  4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 4a1dfe1f4ba9..b4cfac93f723 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -78,8 +78,9 @@ void cn_queue_wrapper(struct work_struct *work)
 	struct cn_callback_entry *cbq =
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
 
-	d->callback(d->callback_priv);
+	d->callback(msg);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 74f52af79563..fc9887fa453f 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,10 +129,11 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
+	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(skb));
 	int err = -ENODEV;
 
 	spin_lock_bh(&dev->cbdev->queue_lock);
@@ -140,7 +141,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
 					__cbq->data.ddata == NULL)) {
-				__cbq->data.callback_priv = msg;
+				__cbq->data.skb = skb;
 
 				__cbq->data.ddata = data;
 				__cbq->data.destruct_data = destruct_data;
@@ -156,7 +157,7 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
 				__new_cbq = kzalloc(sizeof(struct cn_callback_entry), GFP_ATOMIC);
 				if (__new_cbq) {
 					d = &__new_cbq->data;
-					d->callback_priv = msg;
+					d->skb = skb;
 					d->callback = __cbq->data.callback;
 					d->ddata = data;
 					d->destruct_data = destruct_data;
@@ -191,7 +192,6 @@ static int cn_call_callback(struct cn_msg *msg, void (*destruct_data)(void *), v
  */
 static void cn_rx_skb(struct sk_buff *__skb)
 {
-	struct cn_msg *msg;
 	struct nlmsghdr *nlh;
 	int err;
 	struct sk_buff *skb;
@@ -208,8 +208,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		msg = NLMSG_DATA(nlh);
-		err = cn_call_callback(msg, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 47ebf416f512..05a7a14126d8 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -134,8 +134,8 @@ struct cn_callback_id {
 struct cn_callback_data {
 	void (*destruct_data) (void *);
 	void *ddata;
-	
-	void *callback_priv;
+
+	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *);
 
 	void *free;
-- 
cgit v1.2.3


From 7069331dbe7155f23966f5944109f909fea0c7e4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:05 +0000
Subject: connector: Provide the sender's credentials to the callback

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/connector/cn_test.c      | 2 +-
 Documentation/connector/connector.txt  | 8 ++++----
 drivers/connector/cn_queue.c           | 7 ++++---
 drivers/connector/connector.c          | 4 ++--
 drivers/md/dm-log-userspace-transfer.c | 2 +-
 drivers/staging/dst/dcore.c            | 2 +-
 drivers/staging/pohmelfs/config.c      | 2 +-
 drivers/video/uvesafb.c                | 2 +-
 drivers/w1/w1_netlink.c                | 2 +-
 include/linux/connector.h              | 6 +++---
 10 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index 1711adc33373..b07add3467f1 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -34,7 +34,7 @@ static char cn_test_name[] = "cn_test";
 static struct sock *nls;
 static struct timer_list cn_test_timer;
 
-static void cn_test_callback(struct cn_msg *msg)
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
 	        __func__, jiffies, msg->id.idx, msg->id.val,
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
index 81e6bf6ead57..78c9466a9aa8 100644
--- a/Documentation/connector/connector.txt
+++ b/Documentation/connector/connector.txt
@@ -23,7 +23,7 @@ handling, etc...  The Connector driver allows any kernelspace agents to use
 netlink based networking for inter-process communication in a significantly
 easier way:
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_netlink_send(struct cn_msg *msg, u32 __group, int gfp_mask);
 
 struct cb_id
@@ -53,15 +53,15 @@ struct cn_msg
 Connector interfaces.
 /*****************************************/
 
-int cn_add_callback(struct cb_id *id, char *name, void (*callback) (void *));
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 
  Registers new callback with connector core.
 
  struct cb_id *id		- unique connector's user identifier.
 				  It must be registered in connector.h for legal in-kernel users.
  char *name			- connector's callback symbolic name.
- void (*callback) (void *)	- connector's callback.
-				  Argument must be dereferenced to struct cn_msg *.
+ void (*callback) (struct cn..)	- connector's callback.
+				  cn_msg and the sender's credentials
 
 
 void cn_del_callback(struct cb_id *id);
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index b4cfac93f723..163c3e3d0d11 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -79,8 +79,9 @@ void cn_queue_wrapper(struct work_struct *work)
 		container_of(work, struct cn_callback_entry, work);
 	struct cn_callback_data *d = &cbq->data;
 	struct cn_msg *msg = NLMSG_DATA(nlmsg_hdr(d->skb));
+	struct netlink_skb_parms *nsp = &NETLINK_CB(d->skb);
 
-	d->callback(msg);
+	d->callback(msg, nsp);
 
 	d->destruct_data(d->ddata);
 	d->ddata = NULL;
@@ -90,7 +91,7 @@ void cn_queue_wrapper(struct work_struct *work)
 
 static struct cn_callback_entry *
 cn_queue_alloc_callback_entry(char *name, struct cb_id *id,
-			      void (*callback)(struct cn_msg *))
+			      void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq;
 
@@ -124,7 +125,7 @@ int cn_cb_equal(struct cb_id *i1, struct cb_id *i2)
 }
 
 int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id,
-			  void (*callback)(struct cn_msg *))
+			  void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	struct cn_callback_entry *cbq, *__cbq;
 	int found = 0;
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fc9887fa453f..e59f0ab8f828 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -269,7 +269,7 @@ static void cn_notify(struct cb_id *id, u32 notify_event)
  * May sleep.
  */
 int cn_add_callback(struct cb_id *id, char *name,
-		    void (*callback)(struct cn_msg *))
+		    void (*callback)(struct cn_msg *, struct netlink_skb_parms *))
 {
 	int err;
 	struct cn_dev *dev = &cdev;
@@ -351,7 +351,7 @@ static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
  *
  * Used for notification of a request's processing.
  */
-static void cn_callback(struct cn_msg *msg)
+static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct cn_ctl_msg *ctl;
 	struct cn_ctl_entry *ent;
diff --git a/drivers/md/dm-log-userspace-transfer.c b/drivers/md/dm-log-userspace-transfer.c
index ba0edad2d048..556131f7d847 100644
--- a/drivers/md/dm-log-userspace-transfer.c
+++ b/drivers/md/dm-log-userspace-transfer.c
@@ -129,7 +129,7 @@ static int fill_pkg(struct cn_msg *msg, struct dm_ulog_request *tfr)
  * This is the connector callback that delivers data
  * that was sent from userspace.
  */
-static void cn_ulog_callback(void *data)
+static void cn_ulog_callback(void *data, struct netlink_skb_parms *nsp)
 {
 	struct cn_msg *msg = (struct cn_msg *)data;
 	struct dm_ulog_request *tfr = (struct dm_ulog_request *)(msg + 1);
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ac8577358ba0..3943c91e6c96 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -847,7 +847,7 @@ static dst_command_func dst_commands[] = {
 /*
  * Configuration parser.
  */
-static void cn_dst_callback(struct cn_msg *msg)
+static void cn_dst_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct dst_ctl *ctl;
 	int err;
diff --git a/drivers/staging/pohmelfs/config.c b/drivers/staging/pohmelfs/config.c
index 90f962ee5fd8..c9162b3f0bf3 100644
--- a/drivers/staging/pohmelfs/config.c
+++ b/drivers/staging/pohmelfs/config.c
@@ -527,7 +527,7 @@ out_unlock:
 	return err;
 }
 
-static void pohmelfs_cn_callback(struct cn_msg *msg)
+static void pohmelfs_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	int err;
 
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index e98baf6916b8..aa7cd959cced 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -67,7 +67,7 @@ static DEFINE_MUTEX(uvfb_lock);
  * find the kernel part of the task struct, copy the registers and
  * the buffer contents and then complete the task.
  */
-static void uvesafb_cn_callback(struct cn_msg *msg)
+static void uvesafb_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct uvesafb_task *utask;
 	struct uvesafb_ktask *task;
diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c
index 52ccb3d3a963..45c126fea31d 100644
--- a/drivers/w1/w1_netlink.c
+++ b/drivers/w1/w1_netlink.c
@@ -306,7 +306,7 @@ static int w1_netlink_send_error(struct cn_msg *rcmsg, struct w1_netlink_msg *rm
 	return error;
 }
 
-static void w1_cn_callback(struct cn_msg *msg)
+static void w1_cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
 {
 	struct w1_netlink_msg *m = (struct w1_netlink_msg *)(msg + 1);
 	struct w1_netlink_cmd *cmd;
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 05a7a14126d8..545728e20b63 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -136,7 +136,7 @@ struct cn_callback_data {
 	void *ddata;
 
 	struct sk_buff *skb;
-	void (*callback) (struct cn_msg *);
+	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
 	void *free;
 };
@@ -167,11 +167,11 @@ struct cn_dev {
 	struct cn_queue_dev *cbdev;
 };
 
-int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *));
+int cn_add_callback(struct cb_id *, char *, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
 void cn_del_callback(struct cb_id *);
 int cn_netlink_send(struct cn_msg *, u32, gfp_t);
 
-int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *));
+int cn_queue_add_callback(struct cn_queue_dev *dev, char *name, struct cb_id *id, void (*callback)(struct cn_msg *, struct netlink_skb_parms *));
 void cn_queue_del_callback(struct cn_queue_dev *dev, struct cb_id *id);
 
 int queue_cn_work(struct cn_callback_entry *cbq, struct work_struct *work);
-- 
cgit v1.2.3


From f1489cfb173509a3c13444b46b6c989bad4f5b16 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Fri, 2 Oct 2009 02:40:07 +0000
Subject: connector: Removed the destruct_data callback since it is always
 kfree_skb()

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Acked-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Acked-by: Evgeniy Polyakov <zbr@ioremap.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/cn_queue.c  |  4 ++--
 drivers/connector/connector.c | 11 +++--------
 include/linux/connector.h     |  3 ---
 3 files changed, 5 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 163c3e3d0d11..210338ea222f 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -83,8 +83,8 @@ void cn_queue_wrapper(struct work_struct *work)
 
 	d->callback(msg, nsp);
 
-	d->destruct_data(d->ddata);
-	d->ddata = NULL;
+	kfree_skb(d->skb);
+	d->skb = NULL;
 
 	kfree(d->free);
 }
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index e59f0ab8f828..f06024668f99 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -129,7 +129,7 @@ EXPORT_SYMBOL_GPL(cn_netlink_send);
 /*
  * Callback helper - queues work and setup destructor for given data.
  */
-static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *), void *data)
+static int cn_call_callback(struct sk_buff *skb)
 {
 	struct cn_callback_entry *__cbq, *__new_cbq;
 	struct cn_dev *dev = &cdev;
@@ -140,12 +140,9 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 	list_for_each_entry(__cbq, &dev->cbdev->queue_list, callback_entry) {
 		if (cn_cb_equal(&__cbq->id.id, &msg->id)) {
 			if (likely(!work_pending(&__cbq->work) &&
-					__cbq->data.ddata == NULL)) {
+					__cbq->data.skb == NULL)) {
 				__cbq->data.skb = skb;
 
-				__cbq->data.ddata = data;
-				__cbq->data.destruct_data = destruct_data;
-
 				if (queue_cn_work(__cbq, &__cbq->work))
 					err = 0;
 				else
@@ -159,8 +156,6 @@ static int cn_call_callback(struct sk_buff *skb, void (*destruct_data)(void *),
 					d = &__new_cbq->data;
 					d->skb = skb;
 					d->callback = __cbq->data.callback;
-					d->ddata = data;
-					d->destruct_data = destruct_data;
 					d->free = __new_cbq;
 
 					__new_cbq->pdev = __cbq->pdev;
@@ -208,7 +203,7 @@ static void cn_rx_skb(struct sk_buff *__skb)
 			return;
 		}
 
-		err = cn_call_callback(skb, (void (*)(void *))kfree_skb, skb);
+		err = cn_call_callback(skb);
 		if (err < 0)
 			kfree_skb(skb);
 	}
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 545728e20b63..3a14615fd35c 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -132,9 +132,6 @@ struct cn_callback_id {
 };
 
 struct cn_callback_data {
-	void (*destruct_data) (void *);
-	void *ddata;
-
 	struct sk_buff *skb;
 	void (*callback) (struct cn_msg *, struct netlink_skb_parms *);
 
-- 
cgit v1.2.3


From 8e2967555571659d2c8a70dd120710110ed7bba4 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sat, 3 Oct 2009 16:26:03 +0200
Subject: cfq-iosched: implement slower async initiate and queue ramp up

This slowly ramps up the async queue depth based on the time
passed since the sync IO, and doesn't allow async at all until
a sync slice period has passed.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c       |  8 ++++++++
 block/cfq-iosched.c    | 56 ++++++++++++++++++++++++++++++++------------------
 include/linux/blkdev.h |  4 ++++
 3 files changed, 48 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index ddaaea4fdffc..a8c7fbe52e24 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2492,6 +2492,14 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work)
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+				  struct delayed_work *work,
+				  unsigned long delay)
+{
+	return queue_delayed_work(kblockd_workqueue, work, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 70b48ea0e3e9..fce8a749f4be 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -150,7 +150,7 @@ struct cfq_data {
 	 * idle window management
 	 */
 	struct timer_list idle_slice_timer;
-	struct work_struct unplug_work;
+	struct delayed_work unplug_work;
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
@@ -268,11 +268,13 @@ static inline int cfq_bio_sync(struct bio *bio)
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
+					 unsigned long delay)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+		kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
+						delay);
 	}
 }
 
@@ -1316,8 +1318,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 	 * Does this cfqq already have too much IO in flight?
 	 */
 	if (cfqq->dispatched >= max_dispatch) {
-		unsigned long load_at = cfqd->last_end_sync_rq + cfq_slice_sync;
-
 		/*
 		 * idle queue must always only have a single IO in flight
 		 */
@@ -1331,20 +1331,36 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 			return 0;
 
 		/*
-		 * If a sync request has completed recently, don't overload
-		 * the dispatch queue yet with async requests.
+		 * Sole queue user, allow bigger slice
 		 */
-		if (cfqd->cfq_desktop && !cfq_cfqq_sync(cfqq)
-		    && time_before(jiffies, load_at))
-			return 0;
+		max_dispatch *= 4;
+	}
+
+	/*
+	 * Async queues must wait a bit before being allowed dispatch.
+	 * We also ramp up the dispatch depth gradually for async IO,
+	 * based on the last sync IO we serviced
+	 */
+	if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_desktop) {
+		unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+		unsigned int depth;
 
 		/*
-		 * we are the only queue, allow up to 4 times of 'quantum'
+		 * must wait a bit longer
 		 */
-		if (cfqq->dispatched >= 4 * max_dispatch)
+		if (last_sync < cfq_slice_sync) {
+			cfq_schedule_dispatch(cfqd, cfq_slice_sync - last_sync);
 			return 0;
+		}
+
+		depth = last_sync / cfq_slice_sync;
+		if (depth < max_dispatch)
+			max_dispatch = depth;
 	}
 
+	if (cfqq->dispatched >= max_dispatch)
+		return 0;
+
 	/*
 	 * Dispatch a request from this cfqq
 	 */
@@ -1389,7 +1405,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	kmem_cache_free(cfq_pool, cfqq);
@@ -1484,7 +1500,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	if (unlikely(cfqq == cfqd->active_queue)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	cfq_put_queue(cfqq);
@@ -2201,7 +2217,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	}
 
 	if (!rq_in_driver(cfqd))
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 }
 
 /*
@@ -2331,7 +2347,7 @@ queue_fail:
 	if (cic)
 		put_io_context(cic->ioc);
 
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	cfq_log(cfqd, "set_request fail");
 	return 1;
@@ -2340,7 +2356,7 @@ queue_fail:
 static void cfq_kick_queue(struct work_struct *work)
 {
 	struct cfq_data *cfqd =
-		container_of(work, struct cfq_data, unplug_work);
+		container_of(work, struct cfq_data, unplug_work.work);
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
@@ -2394,7 +2410,7 @@ static void cfq_idle_slice_timer(unsigned long data)
 expire:
 	cfq_slice_expired(cfqd, timed_out);
 out_kick:
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2402,7 +2418,7 @@ out_cont:
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	cancel_work_sync(&cfqd->unplug_work);
+	cancel_delayed_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2484,7 +2500,7 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
 	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
+	INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1a03b715dfad..a7323930d2ba 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1147,7 +1147,11 @@ static inline void put_dev_sector(Sector p)
 }
 
 struct work_struct;
+struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q,
+					struct delayed_work *work,
+				 	unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
-- 
cgit v1.2.3


From ac481c20ef8f6c6f2be75d581863f40c43874ef7 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Sat, 3 Oct 2009 20:52:01 +0200
Subject: block: Topology ioctls

Not all users of the topology information want to use libblkid.  Provide
the topology information through bdev ioctls.

Also clarify sector size comments for existing BLK ioctls.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/compat_ioctl.c   | 13 +++++++++++++
 block/ioctl.c          | 17 +++++++++++++++--
 include/linux/blkdev.h | 35 ++++++++++++++++++++++++++++++-----
 include/linux/fs.h     |  4 ++++
 4 files changed, 62 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34e0faa..9bd086c1a4d5 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
 	return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
 
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
 static int compat_put_long(unsigned long arg, long val)
 {
 	return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	switch (cmd) {
 	case HDIO_GETGEO:
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+	case BLKPBSZGET:
+		return compat_put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return compat_put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return compat_put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return compat_put_int(arg, bdev_alignment_offset(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b5827a34..1f4d1de12b09 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
 	return put_user(val, (int __user *)arg);
 }
 
+static int put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (unsigned int __user *)arg);
+}
+
 static int put_long(unsigned long arg, long val)
 {
 	return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
+	case BLKSSZGET: /* get block device logical block size */
 		return put_int(arg, bdev_logical_block_size(bdev));
+	case BLKPBSZGET: /* get block device physical block size */
+		return put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return put_int(arg, bdev_alignment_offset(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a7323930d2ba..25119041e034 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1081,25 +1081,37 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q)
 	return q->limits.physical_block_size;
 }
 
+static inline int bdev_physical_block_size(struct block_device *bdev)
+{
+	return queue_physical_block_size(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 
+static inline int bdev_io_min(struct block_device *bdev)
+{
+	return queue_io_min(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 
+static inline int bdev_io_opt(struct block_device *bdev)
+{
+	return queue_io_opt(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(struct request_queue *q)
 {
-	if (q && q->limits.misaligned)
+	if (q->limits.misaligned)
 		return -1;
 
-	if (q && q->limits.alignment_offset)
-		return q->limits.alignment_offset;
-
-	return 0;
+	return q->limits.alignment_offset;
 }
 
 static inline int queue_sector_alignment_offset(struct request_queue *q,
@@ -1109,6 +1121,19 @@ static inline int queue_sector_alignment_offset(struct request_queue *q,
 		& (q->limits.io_min - 1);
 }
 
+static inline int bdev_alignment_offset(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q->limits.misaligned)
+		return -1;
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->alignment_offset;
+
+	return q->limits.alignment_offset;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2adaa2529f18..883eaacfd924 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -300,6 +300,10 @@ struct inodes_stat_t {
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
-- 
cgit v1.2.3


From 0f78ab9899e9d6acb09d5465def618704255963b Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sun, 4 Oct 2009 21:04:38 +0200
Subject: Revert "Seperate read and write statistics of in_flight requests"

This reverts commit a9327cac440be4d8333bba975cbbf76045096275.

Corrado Zoccolo <czoccolo@gmail.com> reports:

"with 2.6.32-rc1 I started getting the following strange output from
"iostat -kx 2":
Linux 2.6.31bisect (et2) 	04/10/2009 	_i686_	(2 CPU)

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
          10,70    0,00    3,16   15,75    0,00   70,38

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda              18,22     0,00    0,67    0,01    14,77     0,02
43,94     0,01   10,53 39043915,03 2629219,87
sdb              60,89     9,68   50,79    3,04  1724,43    50,52
65,95     0,70   13,06 488437,47 2629219,87

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           2,72    0,00    0,74    0,00    0,00   96,53

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           6,68    0,00    0,99    0,00    0,00   92,33

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00

avg-cpu:  %user   %nice %system %iowait  %steal   %idle
           4,40    0,00    0,73    1,47    0,00   93,40

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s
avgrq-sz avgqu-sz   await  svctm  %util
sda               0,00     0,00    0,00    0,00     0,00     0,00
0,00     0,00    0,00   0,00 100,00
sdb               0,00     4,00    0,00    3,00     0,00    28,00
18,67     0,06   19,50 333,33 100,00

Global values for service time and utilization are garbage. For
interval values, utilization is always 100%, and service time is
higher than normal.

I bisected it down to:
[a9327cac440be4d8333bba975cbbf76045096275] Seperate read and write
statistics of in_flight requests
and verified that reverting just that commit indeed solves the issue
on 2.6.32-rc1."

So until this is debugged, revert the bad commit.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-core.c      |  6 +++---
 block/blk-merge.c     |  2 +-
 block/genhd.c         |  4 +---
 drivers/md/dm.c       | 16 ++++++----------
 fs/partitions/check.c | 12 +-----------
 include/linux/genhd.h | 21 +++++++--------------
 6 files changed, 19 insertions(+), 42 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index a8c7fbe52e24..81f34311659a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -70,7 +70,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part, rw);
+		part_inc_in_flight(part);
 	}
 
 	part_stat_unlock();
@@ -1032,7 +1032,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 
 	if (part->in_flight) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part_in_flight(part) * (now - part->stamp));
+				part->in_flight * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1739,7 +1739,7 @@ static void blk_account_io_done(struct request *req)
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rw);
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f447..b0de8574fdc8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rq_data_dir(req));
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
diff --git a/block/genhd.c b/block/genhd.c
index 517e4332cb37..5a0861da324d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,7 +869,6 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -889,7 +888,6 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1055,7 +1053,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   part_in_flight(hd),
+			   hd->in_flight,
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 376f1ab48a24..23e76fe0d359 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending[2];
+	atomic_t pending;
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,14 +453,13 @@ static void start_io_acct(struct dm_io *io)
 {
 	struct mapped_device *md = io->md;
 	int cpu;
-	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -480,9 +479,8 @@ static void end_io_acct(struct dm_io *io)
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight[rw] = pending =
-		atomic_dec_return(&md->pending[rw]);
-	pending += atomic_read(&md->pending[rw^0x1]);
+	dm_disk(md)->part0.in_flight = pending =
+		atomic_dec_return(&md->pending);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1787,8 +1785,7 @@ static struct mapped_device *alloc_dev(int minor)
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending[0], 0);
-	atomic_set(&md->pending[1], 0);
+	atomic_set(&md->pending, 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2091,8 +2088,7 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending[0]) &&
-					!atomic_read(&md->pending[1]))
+		} else if (!atomic_read(&md->pending))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e10cbad..f38fee0311a7 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,19 +248,11 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		part_in_flight(p),
+		p->in_flight,
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
-ssize_t part_inflight_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct hd_struct *p = dev_to_part(dev);
-
-	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -289,7 +281,6 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -301,7 +292,6 @@ static struct attribute *part_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45ffd0a..7beaa21b3880 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight[2];
+	int in_flight;
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,23 +322,18 @@ static inline void free_part_stats(struct hd_struct *part)
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
+static inline void part_inc_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]++;
+	part->in_flight++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]++;
+		part_to_disk(part)->part0.in_flight++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
+static inline void part_dec_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]--;
+	part->in_flight--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]--;
-}
-
-static inline int part_in_flight(struct hd_struct *part)
-{
-	return part->in_flight[0] + part->in_flight[1];
+		part_to_disk(part)->part0.in_flight--;
 }
 
 /* block/blk-core.c */
@@ -551,8 +546,6 @@ extern ssize_t part_size_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
-			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From a99bbaf5ee6bad1aca0c88ea65ec6e5373e86184 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 4 Oct 2009 16:11:37 +0400
Subject: headers: remove sched.h from poll.h

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/dtlk.c                    | 1 +
 drivers/char/ipmi/ipmi_devintf.c       | 1 +
 drivers/char/ipmi/ipmi_msghandler.c    | 1 +
 drivers/firewire/core-cdev.c           | 1 +
 drivers/hid/hidraw.c                   | 1 +
 drivers/infiniband/core/ucm.c          | 1 +
 drivers/infiniband/core/user_mad.c     | 1 +
 drivers/infiniband/core/uverbs_main.c  | 1 +
 drivers/input/evdev.c                  | 1 +
 drivers/input/input.c                  | 1 +
 drivers/input/joydev.c                 | 1 +
 drivers/input/misc/uinput.c            | 1 +
 drivers/input/mousedev.c               | 1 +
 drivers/isdn/divert/divert_procfs.c    | 1 +
 drivers/media/dvb/dvb-core/dmxdev.c    | 1 +
 drivers/media/dvb/dvb-core/dvb_demux.c | 1 +
 drivers/media/radio/radio-cadet.c      | 1 +
 drivers/media/video/cpia.c             | 1 +
 drivers/mfd/ucb1400_core.c             | 1 +
 drivers/usb/gadget/inode.c             | 1 +
 drivers/xen/xenfs/xenbus.c             | 1 +
 fs/anon_inodes.c                       | 2 ++
 fs/coda/psdev.c                        | 1 +
 fs/select.c                            | 1 +
 include/linux/poll.h                   | 2 +-
 net/rfkill/core.c                      | 1 +
 26 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/dtlk.c b/drivers/char/dtlk.c
index 52e06589821d..045c930e6320 100644
--- a/drivers/char/dtlk.c
+++ b/drivers/char/dtlk.c
@@ -56,6 +56,7 @@
 #include <linux/errno.h>	/* for -EBUSY */
 #include <linux/ioport.h>	/* for request_region */
 #include <linux/delay.h>	/* for loops_per_jiffy */
+#include <linux/sched.h>
 #include <linux/smp_lock.h>	/* cycle_kernel_lock() */
 #include <asm/io.h>		/* for inb_p, outb_p, inb, outb, etc. */
 #include <asm/uaccess.h>	/* for get_user, etc. */
diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c
index 41fc11dc921c..65545de3dbf4 100644
--- a/drivers/char/ipmi/ipmi_devintf.c
+++ b/drivers/char/ipmi/ipmi_devintf.c
@@ -36,6 +36,7 @@
 #include <linux/errno.h>
 #include <asm/system.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/ipmi.h>
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 09050797c76a..ec5e3f8df648 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -35,6 +35,7 @@
 #include <linux/errno.h>
 #include <asm/system.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index ced186d7e9a9..5089331544ed 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -33,6 +33,7 @@
 #include <linux/mutex.h>
 #include <linux/poll.h>
 #include <linux/preempt.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/time.h>
 #include <linux/uaccess.h>
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index 0c6639ea03dd..ba05275e5104 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -30,6 +30,7 @@
 #include <linux/major.h>
 #include <linux/hid.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
 #include <linux/smp_lock.h>
 
 #include <linux/hidraw.h>
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 51bd9669cb1f..f504c9b00c1b 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -38,6 +38,7 @@
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 8c46f2257098..7de02969ed7d 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -44,6 +44,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/compat.h>
+#include <linux/sched.h>
 #include <linux/semaphore.h>
 
 #include <asm/uaccess.h>
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d3fff9e008a3..aec0fbdfe7f0 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -40,6 +40,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/cdev.h>
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 1148140d08a1..dee6706038aa 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -13,6 +13,7 @@
 #define EVDEV_BUFFER_SIZE	64
 
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 16ec33f27c5d..c6f88ebb40c7 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -17,6 +17,7 @@
 #include <linux/random.h>
 #include <linux/major.h>
 #include <linux/proc_fs.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
 #include <linux/device.h>
diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c
index 901b2525993e..b1bd6dd32286 100644
--- a/drivers/input/joydev.c
+++ b/drivers/input/joydev.c
@@ -18,6 +18,7 @@
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/major.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index c5a49aba418f..d3f57245420a 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -30,6 +30,7 @@
  *		- first public version
  */
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/drivers/input/mousedev.c b/drivers/input/mousedev.c
index 966b8868f792..a13d80f7da17 100644
--- a/drivers/input/mousedev.c
+++ b/drivers/input/mousedev.c
@@ -13,6 +13,7 @@
 #define MOUSEDEV_MINORS		32
 #define MOUSEDEV_MIX		31
 
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/poll.h>
diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c
index 8b256a617c8a..3697c409bec6 100644
--- a/drivers/isdn/divert/divert_procfs.c
+++ b/drivers/isdn/divert/divert_procfs.c
@@ -16,6 +16,7 @@
 #else
 #include <linux/fs.h>
 #endif
+#include <linux/sched.h>
 #include <linux/isdnif.h>
 #include <net/net_namespace.h>
 #include "isdn_divert.h"
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 516414983593..c37790ad92d0 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -20,6 +20,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c
index eef6d3616626..91c537bca8ad 100644
--- a/drivers/media/dvb/dvb-core/dvb_demux.c
+++ b/drivers/media/dvb/dvb-core/dvb_demux.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/drivers/media/radio/radio-cadet.c b/drivers/media/radio/radio-cadet.c
index 8b1440136c45..482d0f3be5ff 100644
--- a/drivers/media/radio/radio-cadet.c
+++ b/drivers/media/radio/radio-cadet.c
@@ -38,6 +38,7 @@
 #include <linux/videodev2.h>	/* V4L2 API defs		*/
 #include <linux/param.h>
 #include <linux/pnp.h>
+#include <linux/sched.h>
 #include <linux/io.h>		/* outb, outb_p			*/
 #include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
diff --git a/drivers/media/video/cpia.c b/drivers/media/video/cpia.c
index 43ab0adf3b61..2377313c041a 100644
--- a/drivers/media/video/cpia.c
+++ b/drivers/media/video/cpia.c
@@ -31,6 +31,7 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/ctype.h>
diff --git a/drivers/mfd/ucb1400_core.c b/drivers/mfd/ucb1400_core.c
index 2afc08006e6d..fa294b6d600a 100644
--- a/drivers/mfd/ucb1400_core.c
+++ b/drivers/mfd/ucb1400_core.c
@@ -21,6 +21,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/ucb1400.h>
 
 unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index c44367fea185..bf0f6520c6df 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -30,6 +30,7 @@
 #include <linux/wait.h>
 #include <linux/compiler.h>
 #include <asm/uaccess.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
 #include <linux/smp_lock.h>
diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c
index a9592d981b10..6c4269b836b7 100644
--- a/drivers/xen/xenfs/xenbus.c
+++ b/drivers/xen/xenfs/xenbus.c
@@ -43,6 +43,7 @@
 #include <linux/fs.h>
 #include <linux/poll.h>
 #include <linux/mutex.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/mount.h>
 #include <linux/pagemap.h>
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index d11c51fc2a3f..2ca7a7cafdbf 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -8,8 +8,10 @@
  *
  */
 
+#include <linux/cred.h>
 #include <linux/file.h>
 #include <linux/poll.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/fs.h>
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 0376ac66c44a..be4392ca2098 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/major.h>
 #include <linux/time.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/fcntl.h>
diff --git a/fs/select.c b/fs/select.c
index a201fc370223..fd38ce2e32e3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
 #include <linux/syscalls.h>
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/include/linux/poll.h b/include/linux/poll.h
index fa287f25138d..6673743946f7 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -6,10 +6,10 @@
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
+#include <linux/ktime.h>
 #include <linux/wait.h>
 #include <linux/string.h>
 #include <linux/fs.h>
-#include <linux/sched.h>
 #include <asm/uaccess.h>
 
 /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index dbeaf2983822..ba2efb960c60 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/mutex.h>
 #include <linux/rfkill.h>
+#include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/miscdevice.h>
 #include <linux/wait.h>
-- 
cgit v1.2.3